{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 200, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10617753118276596, "fcm_dpo/delta": 0.2997117042541504, "fcm_dpo/margin": -0.0013532638549804688, "fcm_dpo/q_t": 0.5000401735305786, "grad_norm": 29.968503952026367, "learning_rate": 0.0, "logits/chosen": 0.13337239623069763, "logits/rejected": 0.12492949515581131, "logps/chosen": -64.5841293334961, "logps/ref_chosen": -64.61280822753906, "logps/ref_rejected": -64.17195129394531, "logps/rejected": -64.14192199707031, "loss": 1.3866, "margin_dpo/margin_mean": -0.0013527870178222656, "margin_dpo/margin_std": 0.2561596930027008, "step": 1 }, { "epoch": 0.0030234315948601664, "fcm_dpo/beta": 0.10940517485141754, "fcm_dpo/delta": 0.2951034903526306, "fcm_dpo/margin": 0.037450045347213745, "fcm_dpo/q_t": 0.49900609254837036, "grad_norm": 30.472522735595703, "learning_rate": 7.462686567164179e-09, "logits/chosen": 0.09414851665496826, "logits/rejected": 0.07363267242908478, "logps/chosen": -56.101890563964844, "logps/ref_chosen": -56.0989990234375, "logps/ref_rejected": -66.59971618652344, "logps/rejected": -66.64006042480469, "loss": 1.3823, "margin_dpo/margin_mean": 0.03744968771934509, "margin_dpo/margin_std": 0.27811938524246216, "step": 2 }, { "epoch": 0.0045351473922902496, "fcm_dpo/beta": 0.1160772293806076, "fcm_dpo/delta": 0.2968211770057678, "fcm_dpo/margin": -0.018296539783477783, "fcm_dpo/q_t": 0.5005147457122803, "grad_norm": 36.36097717285156, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 0.0993613749742508, "logits/rejected": 0.061305850744247437, "logps/chosen": -65.44808959960938, "logps/ref_chosen": -65.45726013183594, "logps/ref_rejected": -90.82853698730469, "logps/rejected": -90.80107116699219, "loss": 1.3885, "margin_dpo/margin_mean": -0.01829671859741211, "margin_dpo/margin_std": 0.29925334453582764, "step": 3 }, { "epoch": 0.006046863189720333, "fcm_dpo/beta": 0.12316329777240753, "fcm_dpo/delta": 0.2957630753517151, "fcm_dpo/margin": 0.005419373512268066, "fcm_dpo/q_t": 0.49983808398246765, "grad_norm": 42.36802673339844, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 0.10537564754486084, "logits/rejected": 0.08931504189968109, "logps/chosen": -76.84124755859375, "logps/ref_chosen": -76.86018371582031, "logps/ref_rejected": -79.91523742675781, "logps/rejected": -79.90172576904297, "loss": 1.3859, "margin_dpo/margin_mean": 0.005418956279754639, "margin_dpo/margin_std": 0.36413294076919556, "step": 4 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.13464157283306122, "fcm_dpo/delta": 0.2998310327529907, "fcm_dpo/margin": -0.019110530614852905, "fcm_dpo/q_t": 0.5006451606750488, "grad_norm": 39.96297836303711, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.08226083219051361, "logits/rejected": 0.04336044192314148, "logps/chosen": -63.03343963623047, "logps/ref_chosen": -62.97134017944336, "logps/ref_rejected": -79.9192123413086, "logps/rejected": -79.96220397949219, "loss": 1.3894, "margin_dpo/margin_mean": -0.019109666347503662, "margin_dpo/margin_std": 0.33363407850265503, "step": 5 }, { "epoch": 0.009070294784580499, "fcm_dpo/beta": 0.14293737709522247, "fcm_dpo/delta": 0.29895198345184326, "fcm_dpo/margin": -0.03577873110771179, "fcm_dpo/q_t": 0.501295268535614, "grad_norm": 42.36745834350586, "learning_rate": 3.731343283582089e-08, "logits/chosen": 0.1481696516275406, "logits/rejected": 0.10847893357276917, "logps/chosen": -51.35020065307617, "logps/ref_chosen": -51.30736541748047, "logps/ref_rejected": -82.77239227294922, "logps/rejected": -82.77944946289062, "loss": 1.3922, "margin_dpo/margin_mean": -0.035778701305389404, "margin_dpo/margin_std": 0.3935219645500183, "step": 6 }, { "epoch": 0.010582010582010581, "fcm_dpo/beta": 0.1473204642534256, "fcm_dpo/delta": 0.2976083755493164, "fcm_dpo/margin": 0.007407635450363159, "fcm_dpo/q_t": 0.4997352957725525, "grad_norm": 40.037208557128906, "learning_rate": 4.477611940298507e-08, "logits/chosen": 0.01973375305533409, "logits/rejected": -0.024250730872154236, "logps/chosen": -51.42656707763672, "logps/ref_chosen": -51.45941162109375, "logps/ref_rejected": -66.3828125, "logps/rejected": -66.35736846923828, "loss": 1.3854, "margin_dpo/margin_mean": 0.007407456636428833, "margin_dpo/margin_std": 0.24686771631240845, "step": 7 }, { "epoch": 0.012093726379440665, "fcm_dpo/beta": 0.16093291342258453, "fcm_dpo/delta": 0.29529640078544617, "fcm_dpo/margin": 0.01261255145072937, "fcm_dpo/q_t": 0.4995649755001068, "grad_norm": 46.08513259887695, "learning_rate": 5.223880597014925e-08, "logits/chosen": 0.10268443822860718, "logits/rejected": 0.0798833817243576, "logps/chosen": -62.18996810913086, "logps/ref_chosen": -62.197547912597656, "logps/ref_rejected": -74.66180419921875, "logps/rejected": -74.66683959960938, "loss": 1.385, "margin_dpo/margin_mean": 0.012611836194992065, "margin_dpo/margin_std": 0.323344886302948, "step": 8 }, { "epoch": 0.013605442176870748, "fcm_dpo/beta": 0.1707511693239212, "fcm_dpo/delta": 0.29609861969947815, "fcm_dpo/margin": 0.01630309224128723, "fcm_dpo/q_t": 0.49936288595199585, "grad_norm": 53.85386657714844, "learning_rate": 5.970149253731343e-08, "logits/chosen": 0.175694078207016, "logits/rejected": 0.11593098938465118, "logps/chosen": -55.649192810058594, "logps/ref_chosen": -55.629722595214844, "logps/ref_rejected": -86.21221923828125, "logps/rejected": -86.24800109863281, "loss": 1.3845, "margin_dpo/margin_mean": 0.016303330659866333, "margin_dpo/margin_std": 0.35587644577026367, "step": 9 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.18108926713466644, "fcm_dpo/delta": 0.2939136326313019, "fcm_dpo/margin": -0.010755598545074463, "fcm_dpo/q_t": 0.5005801916122437, "grad_norm": 53.989009857177734, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.14876845479011536, "logits/rejected": 0.11683596670627594, "logps/chosen": -62.660911560058594, "logps/ref_chosen": -62.69060134887695, "logps/ref_rejected": -90.610107421875, "logps/rejected": -90.56967163085938, "loss": 1.3898, "margin_dpo/margin_mean": -0.010755836963653564, "margin_dpo/margin_std": 0.42465806007385254, "step": 10 }, { "epoch": 0.016628873771730914, "fcm_dpo/beta": 0.19215525686740875, "fcm_dpo/delta": 0.29656803607940674, "fcm_dpo/margin": -0.036903828382492065, "fcm_dpo/q_t": 0.5018194913864136, "grad_norm": 56.99840545654297, "learning_rate": 7.462686567164178e-08, "logits/chosen": 0.0916949212551117, "logits/rejected": 0.0850258320569992, "logps/chosen": -65.7918701171875, "logps/ref_chosen": -65.76712036132812, "logps/ref_rejected": -72.4764633178711, "logps/rejected": -72.46430969238281, "loss": 1.3942, "margin_dpo/margin_mean": -0.036903709173202515, "margin_dpo/margin_std": 0.27903497219085693, "step": 11 }, { "epoch": 0.018140589569160998, "fcm_dpo/beta": 0.19215525686740875, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.06204596161842346, "fcm_dpo/q_t": 0.5029789209365845, "grad_norm": 54.74998474121094, "learning_rate": 8.208955223880596e-08, "logits/chosen": -0.0007961541414260864, "logits/rejected": -0.016335628926753998, "logps/chosen": -60.76300048828125, "logps/ref_chosen": -60.704891204833984, "logps/ref_rejected": -69.41564178466797, "logps/rejected": -69.41170501708984, "loss": 1.399, "margin_dpo/margin_mean": -0.06204575300216675, "margin_dpo/margin_std": 0.28618353605270386, "step": 12 }, { "epoch": 0.019652305366591082, "fcm_dpo/beta": 0.19215525686740875, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.06563130021095276, "fcm_dpo/q_t": 0.5031505823135376, "grad_norm": 56.53636932373047, "learning_rate": 8.955223880597014e-08, "logits/chosen": 0.13603587448596954, "logits/rejected": 0.07090998440980911, "logps/chosen": -49.936485290527344, "logps/ref_chosen": -49.90925598144531, "logps/ref_rejected": -92.37818145751953, "logps/rejected": -92.33977508544922, "loss": 1.3996, "margin_dpo/margin_mean": -0.06563141942024231, "margin_dpo/margin_std": 0.2707768678665161, "step": 13 }, { "epoch": 0.021164021164021163, "fcm_dpo/beta": 0.2097225785255432, "fcm_dpo/delta": 0.5815231800079346, "fcm_dpo/margin": 0.09396466612815857, "fcm_dpo/q_t": 0.4953842759132385, "grad_norm": 61.18081283569336, "learning_rate": 9.701492537313432e-08, "logits/chosen": 0.08565383404493332, "logits/rejected": 0.06797105073928833, "logps/chosen": -60.578643798828125, "logps/ref_chosen": -60.61879348754883, "logps/ref_rejected": -71.79306030273438, "logps/rejected": -71.84687042236328, "loss": 1.3676, "margin_dpo/margin_mean": 0.09396436810493469, "margin_dpo/margin_std": 0.2676827907562256, "step": 14 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.21585531532764435, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.01250794529914856, "fcm_dpo/q_t": 0.5006709098815918, "grad_norm": 72.49380493164062, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.07883284986019135, "logits/rejected": 0.03507314249873161, "logps/chosen": -63.48413848876953, "logps/ref_chosen": -63.46953582763672, "logps/ref_rejected": -88.88951110839844, "logps/rejected": -88.89160919189453, "loss": 1.3902, "margin_dpo/margin_mean": -0.01250794529914856, "margin_dpo/margin_std": 0.3276939392089844, "step": 15 }, { "epoch": 0.02418745275888133, "fcm_dpo/beta": 0.22911374270915985, "fcm_dpo/delta": 0.29805102944374084, "fcm_dpo/margin": -0.0001682192087173462, "fcm_dpo/q_t": 0.5000435709953308, "grad_norm": 61.602413177490234, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 0.10727139562368393, "logits/rejected": 0.07074932754039764, "logps/chosen": -46.52628707885742, "logps/ref_chosen": -46.53229904174805, "logps/ref_rejected": -74.27533721923828, "logps/rejected": -74.2691650390625, "loss": 1.3874, "margin_dpo/margin_mean": -0.00016827881336212158, "margin_dpo/margin_std": 0.2899588346481323, "step": 16 }, { "epoch": 0.025699168556311415, "fcm_dpo/beta": 0.23615270853042603, "fcm_dpo/delta": 0.2981564998626709, "fcm_dpo/margin": -0.013454735279083252, "fcm_dpo/q_t": 0.5007691979408264, "grad_norm": 77.29975128173828, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 0.05410391837358475, "logits/rejected": 0.03542998805642128, "logps/chosen": -64.09270477294922, "logps/ref_chosen": -64.07783508300781, "logps/ref_rejected": -86.40876770019531, "logps/rejected": -86.41018676757812, "loss": 1.3906, "margin_dpo/margin_mean": -0.013455450534820557, "margin_dpo/margin_std": 0.2995184659957886, "step": 17 }, { "epoch": 0.027210884353741496, "fcm_dpo/beta": 0.26530909538269043, "fcm_dpo/delta": 0.5765583515167236, "fcm_dpo/margin": 0.0933290421962738, "fcm_dpo/q_t": 0.49414387345314026, "grad_norm": 73.40933227539062, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 0.07863786071538925, "logits/rejected": 0.03418922424316406, "logps/chosen": -44.8294563293457, "logps/ref_chosen": -44.87433624267578, "logps/ref_rejected": -70.97604370117188, "logps/rejected": -71.02449798583984, "loss": 1.363, "margin_dpo/margin_mean": 0.09332945942878723, "margin_dpo/margin_std": 0.28065070509910583, "step": 18 }, { "epoch": 0.02872260015117158, "fcm_dpo/beta": 0.2983761131763458, "fcm_dpo/delta": 0.5883051156997681, "fcm_dpo/margin": 0.04047667980194092, "fcm_dpo/q_t": 0.49707770347595215, "grad_norm": 92.44178009033203, "learning_rate": 1.343283582089552e-07, "logits/chosen": 0.06618274748325348, "logits/rejected": 0.05268959701061249, "logps/chosen": -68.09764099121094, "logps/ref_chosen": -68.1598129272461, "logps/ref_rejected": -81.17138671875, "logps/rejected": -81.14969635009766, "loss": 1.3764, "margin_dpo/margin_mean": 0.04047694802284241, "margin_dpo/margin_std": 0.33094900846481323, "step": 19 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.32533735036849976, "fcm_dpo/delta": 0.29020002484321594, "fcm_dpo/margin": 0.029502317309379578, "fcm_dpo/q_t": 0.4977492094039917, "grad_norm": 94.25809478759766, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.16051048040390015, "logits/rejected": 0.1364545077085495, "logps/chosen": -53.649314880371094, "logps/ref_chosen": -53.67856216430664, "logps/ref_rejected": -74.16911315917969, "logps/rejected": -74.16937255859375, "loss": 1.3783, "margin_dpo/margin_mean": 0.029502198100090027, "margin_dpo/margin_std": 0.23968058824539185, "step": 20 }, { "epoch": 0.031746031746031744, "fcm_dpo/beta": 0.33534562587738037, "fcm_dpo/delta": 0.2985358238220215, "fcm_dpo/margin": -0.01896098256111145, "fcm_dpo/q_t": 0.501542866230011, "grad_norm": 98.49360656738281, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 0.0989365428686142, "logits/rejected": 0.07379420101642609, "logps/chosen": -64.69507598876953, "logps/ref_chosen": -64.70155334472656, "logps/ref_rejected": -81.02095031738281, "logps/rejected": -80.9955062866211, "loss": 1.395, "margin_dpo/margin_mean": -0.018961191177368164, "margin_dpo/margin_std": 0.303374707698822, "step": 21 }, { "epoch": 0.03325774754346183, "fcm_dpo/beta": 0.37750545144081116, "fcm_dpo/delta": 0.589049756526947, "fcm_dpo/margin": 0.03040817379951477, "fcm_dpo/q_t": 0.49726974964141846, "grad_norm": 108.48323059082031, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 0.0004040743806399405, "logits/rejected": -0.020364250987768173, "logps/chosen": -58.048194885253906, "logps/ref_chosen": -58.03599166870117, "logps/ref_rejected": -80.72721862792969, "logps/rejected": -80.76982116699219, "loss": 1.3781, "margin_dpo/margin_mean": 0.030408114194869995, "margin_dpo/margin_std": 0.3076469302177429, "step": 22 }, { "epoch": 0.03476946334089191, "fcm_dpo/beta": 0.42479178309440613, "fcm_dpo/delta": 0.5902184844017029, "fcm_dpo/margin": 0.024062812328338623, "fcm_dpo/q_t": 0.49758607149124146, "grad_norm": 138.24923706054688, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 0.1478240191936493, "logits/rejected": 0.1219228208065033, "logps/chosen": -66.36636352539062, "logps/ref_chosen": -66.35608673095703, "logps/ref_rejected": -93.02769470214844, "logps/rejected": -93.06202697753906, "loss": 1.3801, "margin_dpo/margin_mean": 0.024062126874923706, "margin_dpo/margin_std": 0.30408918857574463, "step": 23 }, { "epoch": 0.036281179138321996, "fcm_dpo/beta": 0.43721428513526917, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.05492676794528961, "fcm_dpo/q_t": 0.5059708952903748, "grad_norm": 117.01378631591797, "learning_rate": 1.716417910447761e-07, "logits/chosen": 0.13261155784130096, "logits/rejected": 0.09979216754436493, "logps/chosen": -54.479827880859375, "logps/ref_chosen": -54.461238861083984, "logps/ref_rejected": -68.33817291259766, "logps/rejected": -68.30183410644531, "loss": 1.4136, "margin_dpo/margin_mean": -0.054926902055740356, "margin_dpo/margin_std": 0.2555236518383026, "step": 24 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.47826117277145386, "fcm_dpo/delta": 0.5947252511978149, "fcm_dpo/margin": 0.011602401733398438, "fcm_dpo/q_t": 0.4986857771873474, "grad_norm": 141.31944274902344, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.13031867146492004, "logits/rejected": 0.07699558138847351, "logps/chosen": -60.027915954589844, "logps/ref_chosen": -60.00420379638672, "logps/ref_rejected": -90.47376251220703, "logps/rejected": -90.50907897949219, "loss": 1.3869, "margin_dpo/margin_mean": 0.011602312326431274, "margin_dpo/margin_std": 0.3307107388973236, "step": 25 }, { "epoch": 0.039304610733182165, "fcm_dpo/beta": 0.522553026676178, "fcm_dpo/delta": 0.2967897057533264, "fcm_dpo/margin": -0.020512163639068604, "fcm_dpo/q_t": 0.5027469396591187, "grad_norm": 156.2456512451172, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 0.10858827084302902, "logits/rejected": 0.09025729447603226, "logps/chosen": -56.84931945800781, "logps/ref_chosen": -56.81915283203125, "logps/ref_rejected": -77.84333038330078, "logps/rejected": -77.85298156738281, "loss": 1.4085, "margin_dpo/margin_mean": -0.020512670278549194, "margin_dpo/margin_std": 0.4012720584869385, "step": 26 }, { "epoch": 0.04081632653061224, "fcm_dpo/beta": 0.522553026676178, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.018406003713607788, "fcm_dpo/q_t": 0.5024095177650452, "grad_norm": 150.67922973632812, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 0.0803753212094307, "logits/rejected": 0.056419409811496735, "logps/chosen": -62.89083480834961, "logps/ref_chosen": -62.87702560424805, "logps/ref_rejected": -71.34437561035156, "logps/rejected": -71.33978271484375, "loss": 1.4023, "margin_dpo/margin_mean": -0.018405765295028687, "margin_dpo/margin_std": 0.3038307726383209, "step": 27 }, { "epoch": 0.042328042328042326, "fcm_dpo/beta": 0.5533031821250916, "fcm_dpo/delta": 0.2858981192111969, "fcm_dpo/margin": 0.021839946508407593, "fcm_dpo/q_t": 0.4972037672996521, "grad_norm": 153.09165954589844, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 0.053669240325689316, "logits/rejected": 0.045026686042547226, "logps/chosen": -59.809776306152344, "logps/ref_chosen": -59.8333740234375, "logps/ref_rejected": -70.39804077148438, "logps/rejected": -70.39627838134766, "loss": 1.3808, "margin_dpo/margin_mean": 0.021840453147888184, "margin_dpo/margin_std": 0.2902475595474243, "step": 28 }, { "epoch": 0.04383975812547241, "fcm_dpo/beta": 0.5869318842887878, "fcm_dpo/delta": 0.2950134873390198, "fcm_dpo/margin": -0.0369829386472702, "fcm_dpo/q_t": 0.5054592490196228, "grad_norm": 198.2787628173828, "learning_rate": 2.08955223880597e-07, "logits/chosen": 0.14541292190551758, "logits/rejected": 0.12754370272159576, "logps/chosen": -74.16682434082031, "logps/ref_chosen": -74.12020111083984, "logps/ref_rejected": -83.33099365234375, "logps/rejected": -83.34062957763672, "loss": 1.4175, "margin_dpo/margin_mean": -0.03698325157165527, "margin_dpo/margin_std": 0.3256310224533081, "step": 29 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.603141188621521, "fcm_dpo/delta": 0.2688133418560028, "fcm_dpo/margin": 0.046625733375549316, "fcm_dpo/q_t": 0.4932544231414795, "grad_norm": 177.89276123046875, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.1190497875213623, "logits/rejected": 0.06499116122722626, "logps/chosen": -50.79059600830078, "logps/ref_chosen": -50.75128936767578, "logps/ref_rejected": -89.29063415527344, "logps/rejected": -89.3765640258789, "loss": 1.3655, "margin_dpo/margin_mean": 0.04662585258483887, "margin_dpo/margin_std": 0.29222309589385986, "step": 30 }, { "epoch": 0.04686318972033258, "fcm_dpo/beta": 0.6370232105255127, "fcm_dpo/delta": 0.27749747037887573, "fcm_dpo/margin": 0.0308951735496521, "fcm_dpo/q_t": 0.4952976703643799, "grad_norm": 219.1363983154297, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 0.11047565937042236, "logits/rejected": 0.06402811408042908, "logps/chosen": -65.35533905029297, "logps/ref_chosen": -65.33675384521484, "logps/ref_rejected": -100.76666259765625, "logps/rejected": -100.8161392211914, "loss": 1.3758, "margin_dpo/margin_mean": 0.03089618682861328, "margin_dpo/margin_std": 0.30930471420288086, "step": 31 }, { "epoch": 0.04837490551776266, "fcm_dpo/beta": 0.6727170944213867, "fcm_dpo/delta": 0.26795029640197754, "fcm_dpo/margin": 0.04653581976890564, "fcm_dpo/q_t": 0.49249711632728577, "grad_norm": 198.70797729492188, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 0.08043690025806427, "logits/rejected": 0.07259618490934372, "logps/chosen": -67.17460632324219, "logps/ref_chosen": -67.18333435058594, "logps/ref_rejected": -82.80763244628906, "logps/rejected": -82.84544372558594, "loss": 1.3664, "margin_dpo/margin_mean": 0.04653611779212952, "margin_dpo/margin_std": 0.3230898380279541, "step": 32 }, { "epoch": 0.049886621315192746, "fcm_dpo/beta": 0.7546996474266052, "fcm_dpo/delta": 0.5825432538986206, "fcm_dpo/margin": 0.023887306451797485, "fcm_dpo/q_t": 0.4956985116004944, "grad_norm": 240.79859924316406, "learning_rate": 2.388059701492537e-07, "logits/chosen": 0.028976380825042725, "logits/rejected": 0.0030624661594629288, "logps/chosen": -64.09410095214844, "logps/ref_chosen": -64.03948211669922, "logps/ref_rejected": -75.68357849121094, "logps/rejected": -75.76209259033203, "loss": 1.3862, "margin_dpo/margin_mean": 0.023888081312179565, "margin_dpo/margin_std": 0.3596636652946472, "step": 33 }, { "epoch": 0.05139833711262283, "fcm_dpo/beta": 0.8192043900489807, "fcm_dpo/delta": 0.27031949162483215, "fcm_dpo/margin": 0.035017967224121094, "fcm_dpo/q_t": 0.49334418773651123, "grad_norm": 234.72634887695312, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 0.09738805890083313, "logits/rejected": 0.067594513297081, "logps/chosen": -53.69878387451172, "logps/ref_chosen": -53.6642951965332, "logps/ref_rejected": -65.77989959716797, "logps/rejected": -65.84939575195312, "loss": 1.3704, "margin_dpo/margin_mean": 0.03501778841018677, "margin_dpo/margin_std": 0.27197498083114624, "step": 34 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.8430140018463135, "fcm_dpo/delta": 0.2825099527835846, "fcm_dpo/margin": 0.009096980094909668, "fcm_dpo/q_t": 0.498089462518692, "grad_norm": 236.1203155517578, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.05875828117132187, "logits/rejected": 0.03618273511528969, "logps/chosen": -61.09056854248047, "logps/ref_chosen": -61.01686096191406, "logps/ref_rejected": -72.78598022460938, "logps/rejected": -72.86878967285156, "loss": 1.3968, "margin_dpo/margin_mean": 0.009096503257751465, "margin_dpo/margin_std": 0.3263735771179199, "step": 35 }, { "epoch": 0.05442176870748299, "fcm_dpo/beta": 0.8931436538696289, "fcm_dpo/delta": 0.2947741150856018, "fcm_dpo/margin": 0.0023790299892425537, "fcm_dpo/q_t": 0.49950528144836426, "grad_norm": 255.0270538330078, "learning_rate": 2.611940298507462e-07, "logits/chosen": 0.10544905811548233, "logits/rejected": 0.05225067213177681, "logps/chosen": -50.6300048828125, "logps/ref_chosen": -50.53736114501953, "logps/ref_rejected": -78.11678314208984, "logps/rejected": -78.21180725097656, "loss": 1.4039, "margin_dpo/margin_mean": 0.002379119396209717, "margin_dpo/margin_std": 0.3160307705402374, "step": 36 }, { "epoch": 0.055933484504913075, "fcm_dpo/beta": 0.9926947951316833, "fcm_dpo/delta": 0.5041809678077698, "fcm_dpo/margin": 0.10119250416755676, "fcm_dpo/q_t": 0.47944971919059753, "grad_norm": 356.8680725097656, "learning_rate": 2.686567164179104e-07, "logits/chosen": 0.08121801912784576, "logits/rejected": 0.0041434187442064285, "logps/chosen": -59.60346221923828, "logps/ref_chosen": -59.55394744873047, "logps/ref_rejected": -108.27702331542969, "logps/rejected": -108.427734375, "loss": 1.3356, "margin_dpo/margin_mean": 0.10119268298149109, "margin_dpo/margin_std": 0.44900059700012207, "step": 37 }, { "epoch": 0.05744520030234316, "fcm_dpo/beta": 1.0603744983673096, "fcm_dpo/delta": 0.20875374972820282, "fcm_dpo/margin": 0.05771473050117493, "fcm_dpo/q_t": 0.4865277409553528, "grad_norm": 308.198974609375, "learning_rate": 2.761194029850746e-07, "logits/chosen": 0.07032038271427155, "logits/rejected": 0.0563180111348629, "logps/chosen": -65.86856079101562, "logps/ref_chosen": -65.78836059570312, "logps/ref_rejected": -76.1619873046875, "logps/rejected": -76.29991149902344, "loss": 1.3607, "margin_dpo/margin_mean": 0.05771511793136597, "margin_dpo/margin_std": 0.3312731385231018, "step": 38 }, { "epoch": 0.05895691609977324, "fcm_dpo/beta": 1.1480540037155151, "fcm_dpo/delta": 0.5104781985282898, "fcm_dpo/margin": 0.08076709508895874, "fcm_dpo/q_t": 0.47740280628204346, "grad_norm": 329.3815612792969, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 0.1415819525718689, "logits/rejected": 0.11533431708812714, "logps/chosen": -57.262725830078125, "logps/ref_chosen": -57.17681121826172, "logps/ref_rejected": -79.486328125, "logps/rejected": -79.65301513671875, "loss": 1.3441, "margin_dpo/margin_mean": 0.08076697587966919, "margin_dpo/margin_std": 0.3901514708995819, "step": 39 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 1.2407445907592773, "fcm_dpo/delta": 0.2749688923358917, "fcm_dpo/margin": 0.0019149184226989746, "fcm_dpo/q_t": 0.49921295046806335, "grad_norm": 422.4781188964844, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.1148887425661087, "logits/rejected": 0.06507512927055359, "logps/chosen": -61.413368225097656, "logps/ref_chosen": -61.33416748046875, "logps/ref_rejected": -79.10697174072266, "logps/rejected": -79.1880874633789, "loss": 1.4165, "margin_dpo/margin_mean": 0.0019148588180541992, "margin_dpo/margin_std": 0.2892666459083557, "step": 40 }, { "epoch": 0.06198034769463341, "fcm_dpo/beta": 1.3399341106414795, "fcm_dpo/delta": 0.49436837434768677, "fcm_dpo/margin": 0.08180907368659973, "fcm_dpo/q_t": 0.4749258756637573, "grad_norm": 407.1304626464844, "learning_rate": 2.985074626865671e-07, "logits/chosen": 0.05015815421938896, "logits/rejected": 0.03012828528881073, "logps/chosen": -67.65704345703125, "logps/ref_chosen": -67.5467300415039, "logps/ref_rejected": -83.87788391113281, "logps/rejected": -84.07000732421875, "loss": 1.344, "margin_dpo/margin_mean": 0.08180946111679077, "margin_dpo/margin_std": 0.386926531791687, "step": 41 }, { "epoch": 0.06349206349206349, "fcm_dpo/beta": 1.4246625900268555, "fcm_dpo/delta": 0.19674822688102722, "fcm_dpo/margin": 0.06773808598518372, "fcm_dpo/q_t": 0.47795066237449646, "grad_norm": 400.0076599121094, "learning_rate": 3.059701492537313e-07, "logits/chosen": 0.0690753310918808, "logits/rejected": 0.046986717730760574, "logps/chosen": -61.381797790527344, "logps/ref_chosen": -61.26485824584961, "logps/ref_rejected": -76.3629150390625, "logps/rejected": -76.54759216308594, "loss": 1.3368, "margin_dpo/margin_mean": 0.067737877368927, "margin_dpo/margin_std": 0.28877195715904236, "step": 42 }, { "epoch": 0.06500377928949358, "fcm_dpo/beta": 1.5338796377182007, "fcm_dpo/delta": 0.48754990100860596, "fcm_dpo/margin": 0.07686775922775269, "fcm_dpo/q_t": 0.4750903844833374, "grad_norm": 511.4953308105469, "learning_rate": 3.134328358208955e-07, "logits/chosen": 0.08270905166864395, "logits/rejected": 0.07184214890003204, "logps/chosen": -71.89559936523438, "logps/ref_chosen": -71.80902862548828, "logps/ref_rejected": -81.12464141845703, "logps/rejected": -81.28807830810547, "loss": 1.3775, "margin_dpo/margin_mean": 0.07686793804168701, "margin_dpo/margin_std": 0.4142921566963196, "step": 43 }, { "epoch": 0.06651549508692366, "fcm_dpo/beta": 1.6183886528015137, "fcm_dpo/delta": 0.29549404978752136, "fcm_dpo/margin": -0.0225009024143219, "fcm_dpo/q_t": 0.5110141038894653, "grad_norm": 553.7533569335938, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 0.05938286706805229, "logits/rejected": 0.028474589809775352, "logps/chosen": -66.68466186523438, "logps/ref_chosen": -66.55043029785156, "logps/ref_rejected": -85.06198120117188, "logps/rejected": -85.1737060546875, "loss": 1.4972, "margin_dpo/margin_mean": -0.022500991821289062, "margin_dpo/margin_std": 0.346932590007782, "step": 44 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 1.7693145275115967, "fcm_dpo/delta": 0.4276418685913086, "fcm_dpo/margin": 0.102606400847435, "fcm_dpo/q_t": 0.45728254318237305, "grad_norm": 532.6951293945312, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.1108519434928894, "logits/rejected": 0.058622151613235474, "logps/chosen": -62.343910217285156, "logps/ref_chosen": -62.24385452270508, "logps/ref_rejected": -92.96665954589844, "logps/rejected": -93.16932678222656, "loss": 1.3183, "margin_dpo/margin_mean": 0.10260695219039917, "margin_dpo/margin_std": 0.3721820116043091, "step": 45 }, { "epoch": 0.06953892668178382, "fcm_dpo/beta": 1.9173414707183838, "fcm_dpo/delta": 0.3439553380012512, "fcm_dpo/margin": 0.13755828142166138, "fcm_dpo/q_t": 0.4469168782234192, "grad_norm": 541.6246337890625, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 0.11820603907108307, "logits/rejected": 0.07246644049882889, "logps/chosen": -61.56231689453125, "logps/ref_chosen": -61.498905181884766, "logps/ref_rejected": -78.91172790527344, "logps/rejected": -79.11270141601562, "loss": 1.2947, "margin_dpo/margin_mean": 0.13755744695663452, "margin_dpo/margin_std": 0.42911720275878906, "step": 46 }, { "epoch": 0.0710506424792139, "fcm_dpo/beta": 1.999995231628418, "fcm_dpo/delta": 0.20687083899974823, "fcm_dpo/margin": 0.20091423392295837, "fcm_dpo/q_t": 0.410871297121048, "grad_norm": 481.5821533203125, "learning_rate": 3.432835820895522e-07, "logits/chosen": 0.04726361110806465, "logits/rejected": 0.0036482480354607105, "logps/chosen": -51.672603607177734, "logps/ref_chosen": -51.578346252441406, "logps/ref_rejected": -68.2215576171875, "logps/rejected": -68.51673889160156, "loss": 1.1136, "margin_dpo/margin_mean": 0.200914204120636, "margin_dpo/margin_std": 0.30879902839660645, "step": 47 }, { "epoch": 0.07256235827664399, "fcm_dpo/beta": 2.1475226879119873, "fcm_dpo/delta": 0.29038742184638977, "fcm_dpo/margin": 0.0035857856273651123, "fcm_dpo/q_t": 0.5014243125915527, "grad_norm": 611.6590576171875, "learning_rate": 3.507462686567164e-07, "logits/chosen": 0.14877364039421082, "logits/rejected": 0.11894898861646652, "logps/chosen": -51.974037170410156, "logps/ref_chosen": -51.79365158081055, "logps/ref_rejected": -64.22503662109375, "logps/rejected": -64.40901184082031, "loss": 1.5324, "margin_dpo/margin_mean": 0.0035860538482666016, "margin_dpo/margin_std": 0.3811490833759308, "step": 48 }, { "epoch": 0.07407407407407407, "fcm_dpo/beta": 2.2708346843719482, "fcm_dpo/delta": 0.3342975378036499, "fcm_dpo/margin": 0.11987686157226562, "fcm_dpo/q_t": 0.4392061233520508, "grad_norm": 582.0693969726562, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 0.02266620472073555, "logits/rejected": 0.0014079846441745758, "logps/chosen": -58.300750732421875, "logps/ref_chosen": -58.13460159301758, "logps/ref_rejected": -64.63206481933594, "logps/rejected": -64.9180908203125, "loss": 1.3162, "margin_dpo/margin_mean": 0.11987724900245667, "margin_dpo/margin_std": 0.40768203139305115, "step": 49 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 2.3943064212799072, "fcm_dpo/delta": 0.2095961570739746, "fcm_dpo/margin": 0.03892210125923157, "fcm_dpo/q_t": 0.48075801134109497, "grad_norm": 678.2787475585938, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.11922754347324371, "logits/rejected": 0.08905205130577087, "logps/chosen": -53.13764190673828, "logps/ref_chosen": -52.85643768310547, "logps/ref_rejected": -72.17460632324219, "logps/rejected": -72.49472045898438, "loss": 1.4695, "margin_dpo/margin_mean": 0.03892248868942261, "margin_dpo/margin_std": 0.3610963225364685, "step": 50 }, { "epoch": 0.07709750566893424, "fcm_dpo/beta": 2.45587158203125, "fcm_dpo/delta": 0.1764744520187378, "fcm_dpo/margin": 0.17561078071594238, "fcm_dpo/q_t": 0.4224376082420349, "grad_norm": 632.7899780273438, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 0.06935389339923859, "logits/rejected": 0.042039111256599426, "logps/chosen": -63.871707916259766, "logps/ref_chosen": -63.65644073486328, "logps/ref_rejected": -86.13229370117188, "logps/rejected": -86.52317810058594, "loss": 1.303, "margin_dpo/margin_mean": 0.17561128735542297, "margin_dpo/margin_std": 0.49183645844459534, "step": 51 }, { "epoch": 0.07860922146636433, "fcm_dpo/beta": 2.520862102508545, "fcm_dpo/delta": 0.10279199481010437, "fcm_dpo/margin": 0.19924332201480865, "fcm_dpo/q_t": 0.40319594740867615, "grad_norm": 748.8369140625, "learning_rate": 3.805970149253731e-07, "logits/chosen": 0.0923246219754219, "logits/rejected": 0.04198576882481575, "logps/chosen": -68.06822204589844, "logps/ref_chosen": -67.8402099609375, "logps/ref_rejected": -96.97090911865234, "logps/rejected": -97.39816284179688, "loss": 1.2025, "margin_dpo/margin_mean": 0.19924335181713104, "margin_dpo/margin_std": 0.4342193007469177, "step": 52 }, { "epoch": 0.0801209372637944, "fcm_dpo/beta": 2.6138997077941895, "fcm_dpo/delta": 0.2746652364730835, "fcm_dpo/margin": 0.12827840447425842, "fcm_dpo/q_t": 0.43137750029563904, "grad_norm": 688.0023193359375, "learning_rate": 3.880597014925373e-07, "logits/chosen": 0.07982613146305084, "logits/rejected": 0.06915253400802612, "logps/chosen": -57.14837646484375, "logps/ref_chosen": -56.87813949584961, "logps/ref_rejected": -60.75569152832031, "logps/rejected": -61.15420913696289, "loss": 1.3153, "margin_dpo/margin_mean": 0.12827906012535095, "margin_dpo/margin_std": 0.39129942655563354, "step": 53 }, { "epoch": 0.08163265306122448, "fcm_dpo/beta": 2.705203056335449, "fcm_dpo/delta": 0.18513087928295135, "fcm_dpo/margin": 0.15532580018043518, "fcm_dpo/q_t": 0.41879549622535706, "grad_norm": 747.0259399414062, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 0.059670425951480865, "logits/rejected": 0.04430554807186127, "logps/chosen": -47.553924560546875, "logps/ref_chosen": -47.26692199707031, "logps/ref_rejected": -62.19426727294922, "logps/rejected": -62.63658905029297, "loss": 1.3013, "margin_dpo/margin_mean": 0.15532565116882324, "margin_dpo/margin_std": 0.4225253760814667, "step": 54 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 2.7942681312561035, "fcm_dpo/delta": -0.0019312426447868347, "fcm_dpo/margin": 0.21504637598991394, "fcm_dpo/q_t": 0.39572954177856445, "grad_norm": 801.02685546875, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.005894917994737625, "logits/rejected": -0.06754656881093979, "logps/chosen": -50.632896423339844, "logps/ref_chosen": -50.32619094848633, "logps/ref_rejected": -92.44389343261719, "logps/rejected": -92.96563720703125, "loss": 1.2136, "margin_dpo/margin_mean": 0.2150469720363617, "margin_dpo/margin_std": 0.46592026948928833, "step": 55 }, { "epoch": 0.08465608465608465, "fcm_dpo/beta": 2.8418025970458984, "fcm_dpo/delta": 0.15264582633972168, "fcm_dpo/margin": 0.1597883105278015, "fcm_dpo/q_t": 0.4092499613761902, "grad_norm": 765.663818359375, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 0.12413370609283447, "logits/rejected": 0.10176189243793488, "logps/chosen": -57.053218841552734, "logps/ref_chosen": -56.766971588134766, "logps/ref_rejected": -66.30504608154297, "logps/rejected": -66.7510757446289, "loss": 1.2304, "margin_dpo/margin_mean": 0.15978825092315674, "margin_dpo/margin_std": 0.38756871223449707, "step": 56 }, { "epoch": 0.08616780045351474, "fcm_dpo/beta": 2.760141134262085, "fcm_dpo/delta": -0.1479550153017044, "fcm_dpo/margin": 0.265031099319458, "fcm_dpo/q_t": 0.367247074842453, "grad_norm": 623.2364501953125, "learning_rate": 4.17910447761194e-07, "logits/chosen": 0.09955069422721863, "logits/rejected": 0.03469717875123024, "logps/chosen": -58.13420104980469, "logps/ref_chosen": -57.76774597167969, "logps/ref_rejected": -82.75698852539062, "logps/rejected": -83.38848114013672, "loss": 1.1, "margin_dpo/margin_mean": 0.26503095030784607, "margin_dpo/margin_std": 0.4654003381729126, "step": 57 }, { "epoch": 0.08767951625094482, "fcm_dpo/beta": 2.6086440086364746, "fcm_dpo/delta": -0.313137412071228, "fcm_dpo/margin": 0.21258825063705444, "fcm_dpo/q_t": 0.42052599787712097, "grad_norm": 794.4469604492188, "learning_rate": 4.253731343283582e-07, "logits/chosen": 0.06267602741718292, "logits/rejected": 0.04661073535680771, "logps/chosen": -73.14319610595703, "logps/ref_chosen": -72.76408386230469, "logps/ref_rejected": -84.49275207519531, "logps/rejected": -85.08444213867188, "loss": 1.3993, "margin_dpo/margin_mean": 0.21258807182312012, "margin_dpo/margin_std": 0.5894132852554321, "step": 58 }, { "epoch": 0.08919123204837491, "fcm_dpo/beta": 2.5581846237182617, "fcm_dpo/delta": -0.19727279245853424, "fcm_dpo/margin": 0.1872905194759369, "fcm_dpo/q_t": 0.424525648355484, "grad_norm": 646.26611328125, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 0.13470959663391113, "logits/rejected": 0.06717785447835922, "logps/chosen": -50.17283630371094, "logps/ref_chosen": -49.820777893066406, "logps/ref_rejected": -77.14368438720703, "logps/rejected": -77.68303680419922, "loss": 1.3031, "margin_dpo/margin_mean": 0.187290757894516, "margin_dpo/margin_std": 0.4808598756790161, "step": 59 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 2.6267943382263184, "fcm_dpo/delta": 0.35662251710891724, "fcm_dpo/margin": 0.09666135907173157, "fcm_dpo/q_t": 0.44885069131851196, "grad_norm": 917.6198120117188, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.10107119381427765, "logits/rejected": 0.09958083927631378, "logps/chosen": -63.605628967285156, "logps/ref_chosen": -63.22477340698242, "logps/ref_rejected": -61.360477447509766, "logps/rejected": -61.83799743652344, "loss": 1.4647, "margin_dpo/margin_mean": 0.09666106104850769, "margin_dpo/margin_std": 0.4546535611152649, "step": 60 }, { "epoch": 0.09221466364323508, "fcm_dpo/beta": 2.7735347747802734, "fcm_dpo/delta": 0.24584190547466278, "fcm_dpo/margin": 0.1311599463224411, "fcm_dpo/q_t": 0.43129193782806396, "grad_norm": 845.3485107421875, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 0.12099134922027588, "logits/rejected": 0.08859476447105408, "logps/chosen": -49.42567443847656, "logps/ref_chosen": -49.01679992675781, "logps/ref_rejected": -74.90817260742188, "logps/rejected": -75.44821166992188, "loss": 1.4356, "margin_dpo/margin_mean": 0.13115930557250977, "margin_dpo/margin_std": 0.48122483491897583, "step": 61 }, { "epoch": 0.09372637944066516, "fcm_dpo/beta": 2.867135524749756, "fcm_dpo/delta": -0.01591368019580841, "fcm_dpo/margin": 0.21290084719657898, "fcm_dpo/q_t": 0.3772019147872925, "grad_norm": 801.1388549804688, "learning_rate": 4.552238805970149e-07, "logits/chosen": 0.11106555908918381, "logits/rejected": 0.07138316333293915, "logps/chosen": -63.18655014038086, "logps/ref_chosen": -62.751869201660156, "logps/ref_rejected": -78.93360900878906, "logps/rejected": -79.58119201660156, "loss": 1.3206, "margin_dpo/margin_mean": 0.21290069818496704, "margin_dpo/margin_std": 0.5628150701522827, "step": 62 }, { "epoch": 0.09523809523809523, "fcm_dpo/beta": 2.718989372253418, "fcm_dpo/delta": -0.3079483211040497, "fcm_dpo/margin": 0.3243086040019989, "fcm_dpo/q_t": 0.3317871689796448, "grad_norm": 665.6367797851562, "learning_rate": 4.626865671641791e-07, "logits/chosen": 0.16446399688720703, "logits/rejected": 0.13985374569892883, "logps/chosen": -60.89350509643555, "logps/ref_chosen": -60.51525115966797, "logps/ref_rejected": -85.11021423339844, "logps/rejected": -85.81277465820312, "loss": 0.9762, "margin_dpo/margin_mean": 0.32430848479270935, "margin_dpo/margin_std": 0.4499557614326477, "step": 63 }, { "epoch": 0.09674981103552532, "fcm_dpo/beta": 2.7576589584350586, "fcm_dpo/delta": 0.3326021730899811, "fcm_dpo/margin": 0.10074976086616516, "fcm_dpo/q_t": 0.4503590166568756, "grad_norm": 842.7352294921875, "learning_rate": 4.701492537313433e-07, "logits/chosen": 0.07350215315818787, "logits/rejected": 0.04859868437051773, "logps/chosen": -51.69322204589844, "logps/ref_chosen": -51.20684814453125, "logps/ref_rejected": -66.93081665039062, "logps/rejected": -67.5179443359375, "loss": 1.4735, "margin_dpo/margin_mean": 0.10075005888938904, "margin_dpo/margin_std": 0.4669637978076935, "step": 64 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 2.659918785095215, "fcm_dpo/delta": -0.42128875851631165, "fcm_dpo/margin": 0.36822575330734253, "fcm_dpo/q_t": 0.32521775364875793, "grad_norm": 680.5181884765625, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.16169877350330353, "logits/rejected": 0.1325380802154541, "logps/chosen": -67.74839782714844, "logps/ref_chosen": -67.2886962890625, "logps/ref_rejected": -74.44281005859375, "logps/rejected": -75.27073669433594, "loss": 1.0552, "margin_dpo/margin_mean": 0.3682248294353485, "margin_dpo/margin_std": 0.5791603326797485, "step": 65 }, { "epoch": 0.09977324263038549, "fcm_dpo/beta": 2.555934429168701, "fcm_dpo/delta": -0.0033800601959228516, "fcm_dpo/margin": 0.23333951830863953, "fcm_dpo/q_t": 0.3993811309337616, "grad_norm": 727.94482421875, "learning_rate": 4.850746268656717e-07, "logits/chosen": 0.09812623262405396, "logits/rejected": 0.07371249049901962, "logps/chosen": -71.21185302734375, "logps/ref_chosen": -70.743408203125, "logps/ref_rejected": -77.26499938964844, "logps/rejected": -77.96678161621094, "loss": 1.2259, "margin_dpo/margin_mean": 0.23333919048309326, "margin_dpo/margin_std": 0.53450608253479, "step": 66 }, { "epoch": 0.10128495842781557, "fcm_dpo/beta": 2.613276958465576, "fcm_dpo/delta": 0.020865630358457565, "fcm_dpo/margin": 0.22206446528434753, "fcm_dpo/q_t": 0.41789835691452026, "grad_norm": 757.7228393554688, "learning_rate": 4.925373134328357e-07, "logits/chosen": 0.08542338758707047, "logits/rejected": 0.028661586344242096, "logps/chosen": -60.98101043701172, "logps/ref_chosen": -60.60260009765625, "logps/ref_rejected": -75.22235870361328, "logps/rejected": -75.82283020019531, "loss": 1.2948, "margin_dpo/margin_mean": 0.22206458449363708, "margin_dpo/margin_std": 0.5843450427055359, "step": 67 }, { "epoch": 0.10279667422524566, "fcm_dpo/beta": 2.542264461517334, "fcm_dpo/delta": -0.09804828464984894, "fcm_dpo/margin": 0.2701076567173004, "fcm_dpo/q_t": 0.3731071352958679, "grad_norm": 731.7794189453125, "learning_rate": 5e-07, "logits/chosen": 0.023803437128663063, "logits/rejected": -0.00498470664024353, "logps/chosen": -78.01327514648438, "logps/ref_chosen": -77.52836608886719, "logps/ref_rejected": -93.17778015136719, "logps/rejected": -93.93280029296875, "loss": 1.2249, "margin_dpo/margin_mean": 0.27010834217071533, "margin_dpo/margin_std": 0.5684667825698853, "step": 68 }, { "epoch": 0.10430839002267574, "fcm_dpo/beta": 2.6366710662841797, "fcm_dpo/delta": 0.053127557039260864, "fcm_dpo/margin": 0.20509648323059082, "fcm_dpo/q_t": 0.3979250490665436, "grad_norm": 705.78271484375, "learning_rate": 4.999965034812934e-07, "logits/chosen": 0.09332907199859619, "logits/rejected": 0.04950705170631409, "logps/chosen": -66.47467041015625, "logps/ref_chosen": -65.94305419921875, "logps/ref_rejected": -89.7735595703125, "logps/rejected": -90.51026916503906, "loss": 1.3071, "margin_dpo/margin_mean": 0.20509576797485352, "margin_dpo/margin_std": 0.48671072721481323, "step": 69 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 2.5765388011932373, "fcm_dpo/delta": 0.06844906508922577, "fcm_dpo/margin": 0.2062298059463501, "fcm_dpo/q_t": 0.40513622760772705, "grad_norm": 689.6810302734375, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.11499057710170746, "logits/rejected": 0.09173914790153503, "logps/chosen": -62.437904357910156, "logps/ref_chosen": -61.95791244506836, "logps/ref_rejected": -75.80945587158203, "logps/rejected": -76.49568176269531, "loss": 1.3418, "margin_dpo/margin_mean": 0.2062298059463501, "margin_dpo/margin_std": 0.5412454605102539, "step": 70 }, { "epoch": 0.1073318216175359, "fcm_dpo/beta": 2.740046977996826, "fcm_dpo/delta": 0.2646937966346741, "fcm_dpo/margin": 0.12536178529262543, "fcm_dpo/q_t": 0.4393612742424011, "grad_norm": 829.3287963867188, "learning_rate": 4.999685319184688e-07, "logits/chosen": 0.04790864139795303, "logits/rejected": 0.032932039350271225, "logps/chosen": -63.92980194091797, "logps/ref_chosen": -63.34757995605469, "logps/ref_rejected": -67.49658203125, "logps/rejected": -68.20416259765625, "loss": 1.4527, "margin_dpo/margin_mean": 0.1253620833158493, "margin_dpo/margin_std": 0.501061737537384, "step": 71 }, { "epoch": 0.10884353741496598, "fcm_dpo/beta": 2.6920130252838135, "fcm_dpo/delta": -0.16975723206996918, "fcm_dpo/margin": 0.2809232175350189, "fcm_dpo/q_t": 0.3771663010120392, "grad_norm": 761.12744140625, "learning_rate": 4.999440576567755e-07, "logits/chosen": 0.1025976687669754, "logits/rejected": 0.038601793348789215, "logps/chosen": -56.350685119628906, "logps/ref_chosen": -55.85929870605469, "logps/ref_rejected": -68.45423889160156, "logps/rejected": -69.22655487060547, "loss": 1.228, "margin_dpo/margin_mean": 0.2809233069419861, "margin_dpo/margin_std": 0.5827616453170776, "step": 72 }, { "epoch": 0.11035525321239607, "fcm_dpo/beta": 2.8283796310424805, "fcm_dpo/delta": 0.3381425738334656, "fcm_dpo/margin": 0.0951181948184967, "fcm_dpo/q_t": 0.45442691445350647, "grad_norm": 973.8218994140625, "learning_rate": 4.999125919224965e-07, "logits/chosen": 0.06888525933027267, "logits/rejected": 0.05457981303334236, "logps/chosen": -69.80500793457031, "logps/ref_chosen": -69.13880920410156, "logps/ref_rejected": -79.04586791992188, "logps/rejected": -79.80718231201172, "loss": 1.6541, "margin_dpo/margin_mean": 0.09511902928352356, "margin_dpo/margin_std": 0.5735561847686768, "step": 73 }, { "epoch": 0.11186696900982615, "fcm_dpo/beta": 2.7798233032226562, "fcm_dpo/delta": -0.34147214889526367, "fcm_dpo/margin": 0.32702162861824036, "fcm_dpo/q_t": 0.35246068239212036, "grad_norm": 655.92626953125, "learning_rate": 4.998741355957963e-07, "logits/chosen": 0.11409755051136017, "logits/rejected": 0.061128612607717514, "logps/chosen": -50.37104034423828, "logps/ref_chosen": -49.923736572265625, "logps/ref_rejected": -81.73213958740234, "logps/rejected": -82.5064697265625, "loss": 1.135, "margin_dpo/margin_mean": 0.32702159881591797, "margin_dpo/margin_std": 0.5811691284179688, "step": 74 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 2.5214171409606934, "fcm_dpo/delta": -0.28205248713493347, "fcm_dpo/margin": 0.33626946806907654, "fcm_dpo/q_t": 0.35245347023010254, "grad_norm": 540.9854736328125, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.0784030556678772, "logits/rejected": 0.04736529663205147, "logps/chosen": -46.60426330566406, "logps/ref_chosen": -46.06875228881836, "logps/ref_rejected": -66.1181411743164, "logps/rejected": -66.98992156982422, "loss": 1.1406, "margin_dpo/margin_mean": 0.33626896142959595, "margin_dpo/margin_std": 0.5945340991020203, "step": 75 }, { "epoch": 0.11489040060468632, "fcm_dpo/beta": 2.5335192680358887, "fcm_dpo/delta": 0.030991412699222565, "fcm_dpo/margin": 0.22527188062667847, "fcm_dpo/q_t": 0.4082220196723938, "grad_norm": 678.3681030273438, "learning_rate": 4.997762556634679e-07, "logits/chosen": 0.06428511440753937, "logits/rejected": 0.023087866604328156, "logps/chosen": -54.62993240356445, "logps/ref_chosen": -54.06275177001953, "logps/ref_rejected": -74.87464141845703, "logps/rejected": -75.66709899902344, "loss": 1.3191, "margin_dpo/margin_mean": 0.22527173161506653, "margin_dpo/margin_std": 0.5910253524780273, "step": 76 }, { "epoch": 0.1164021164021164, "fcm_dpo/beta": 2.543339490890503, "fcm_dpo/delta": 0.14452725648880005, "fcm_dpo/margin": 0.17910584807395935, "fcm_dpo/q_t": 0.41442954540252686, "grad_norm": 735.1400146484375, "learning_rate": 4.99716834795752e-07, "logits/chosen": 0.09905283898115158, "logits/rejected": 0.059680838137865067, "logps/chosen": -53.726112365722656, "logps/ref_chosen": -53.07609176635742, "logps/ref_rejected": -74.45601654052734, "logps/rejected": -75.28514099121094, "loss": 1.4008, "margin_dpo/margin_mean": 0.1791057288646698, "margin_dpo/margin_std": 0.5253371000289917, "step": 77 }, { "epoch": 0.11791383219954649, "fcm_dpo/beta": 2.617399215698242, "fcm_dpo/delta": 0.07255343347787857, "fcm_dpo/margin": 0.20233109593391418, "fcm_dpo/q_t": 0.4078395366668701, "grad_norm": 876.5040893554688, "learning_rate": 4.996504288113623e-07, "logits/chosen": 0.07440010458230972, "logits/rejected": 0.05441536009311676, "logps/chosen": -68.28507995605469, "logps/ref_chosen": -67.72541809082031, "logps/ref_rejected": -79.03926849365234, "logps/rejected": -79.80126953125, "loss": 1.4932, "margin_dpo/margin_mean": 0.20233124494552612, "margin_dpo/margin_std": 0.6498842239379883, "step": 78 }, { "epoch": 0.11942554799697656, "fcm_dpo/beta": 2.5738704204559326, "fcm_dpo/delta": -0.23911376297473907, "fcm_dpo/margin": 0.3186035454273224, "fcm_dpo/q_t": 0.3460078537464142, "grad_norm": 575.8170776367188, "learning_rate": 4.995770395678171e-07, "logits/chosen": 0.1345619410276413, "logits/rejected": 0.07574545592069626, "logps/chosen": -52.80107879638672, "logps/ref_chosen": -52.16064453125, "logps/ref_rejected": -83.31062316894531, "logps/rejected": -84.2696533203125, "loss": 1.0213, "margin_dpo/margin_mean": 0.3186036944389343, "margin_dpo/margin_std": 0.49217498302459717, "step": 79 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 2.5109777450561523, "fcm_dpo/delta": -0.09966981410980225, "fcm_dpo/margin": 0.2758336067199707, "fcm_dpo/q_t": 0.3807419538497925, "grad_norm": 784.2391967773438, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.09654100239276886, "logits/rejected": 0.03951960429549217, "logps/chosen": -62.05357360839844, "logps/ref_chosen": -61.410560607910156, "logps/ref_rejected": -78.66004943847656, "logps/rejected": -79.57888793945312, "loss": 1.2567, "margin_dpo/margin_mean": 0.2758333683013916, "margin_dpo/margin_std": 0.6135913133621216, "step": 80 }, { "epoch": 0.12244897959183673, "fcm_dpo/beta": 2.3567330837249756, "fcm_dpo/delta": -0.29948320984840393, "fcm_dpo/margin": 0.36920058727264404, "fcm_dpo/q_t": 0.35193294286727905, "grad_norm": 582.5974731445312, "learning_rate": 4.994093197099587e-07, "logits/chosen": 0.08831678330898285, "logits/rejected": 0.05518771708011627, "logps/chosen": -64.47744750976562, "logps/ref_chosen": -63.80437088012695, "logps/ref_rejected": -79.3484115600586, "logps/rejected": -80.39068603515625, "loss": 1.0674, "margin_dpo/margin_mean": 0.36920061707496643, "margin_dpo/margin_std": 0.6044883728027344, "step": 81 }, { "epoch": 0.12396069538926682, "fcm_dpo/beta": 2.2348222732543945, "fcm_dpo/delta": -0.3301578760147095, "fcm_dpo/margin": 0.4032464921474457, "fcm_dpo/q_t": 0.31583303213119507, "grad_norm": 494.9556884765625, "learning_rate": 4.993149937871306e-07, "logits/chosen": 0.07512053102254868, "logits/rejected": 0.012894164770841599, "logps/chosen": -49.45317840576172, "logps/ref_chosen": -48.817893981933594, "logps/ref_rejected": -70.31497955322266, "logps/rejected": -71.353515625, "loss": 0.9006, "margin_dpo/margin_mean": 0.4032464325428009, "margin_dpo/margin_std": 0.4665597677230835, "step": 82 }, { "epoch": 0.1254724111866969, "fcm_dpo/beta": 2.105210781097412, "fcm_dpo/delta": -0.2685595750808716, "fcm_dpo/margin": 0.401851087808609, "fcm_dpo/q_t": 0.3413703143596649, "grad_norm": 505.2091979980469, "learning_rate": 4.992136939879856e-07, "logits/chosen": 0.13643789291381836, "logits/rejected": 0.08787815272808075, "logps/chosen": -57.84598159790039, "logps/ref_chosen": -57.15077209472656, "logps/ref_rejected": -75.1710205078125, "logps/rejected": -76.26808166503906, "loss": 1.0579, "margin_dpo/margin_mean": 0.40185046195983887, "margin_dpo/margin_std": 0.6407204866409302, "step": 83 }, { "epoch": 0.12698412698412698, "fcm_dpo/beta": 2.1374802589416504, "fcm_dpo/delta": 0.2308960258960724, "fcm_dpo/margin": 0.17705368995666504, "fcm_dpo/q_t": 0.4283042848110199, "grad_norm": 767.9581909179688, "learning_rate": 4.991054231460969e-07, "logits/chosen": 0.12467605620622635, "logits/rejected": 0.08387472480535507, "logps/chosen": -65.62751007080078, "logps/ref_chosen": -64.77729797363281, "logps/ref_rejected": -84.71949768066406, "logps/rejected": -85.74675750732422, "loss": 1.3964, "margin_dpo/margin_mean": 0.17705348134040833, "margin_dpo/margin_std": 0.594845175743103, "step": 84 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 2.0624887943267822, "fcm_dpo/delta": -0.3910365104675293, "fcm_dpo/margin": 0.4630565047264099, "fcm_dpo/q_t": 0.33032843470573425, "grad_norm": 518.486083984375, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.11759524047374725, "logits/rejected": 0.07492982596158981, "logps/chosen": -50.919776916503906, "logps/ref_chosen": -50.25169372558594, "logps/ref_rejected": -66.55439758300781, "logps/rejected": -67.68553161621094, "loss": 1.0213, "margin_dpo/margin_mean": 0.46305617690086365, "margin_dpo/margin_std": 0.6838178038597107, "step": 85 }, { "epoch": 0.13000755857898716, "fcm_dpo/beta": 1.9690525531768799, "fcm_dpo/delta": -0.09890329092741013, "fcm_dpo/margin": 0.3509269654750824, "fcm_dpo/q_t": 0.37694597244262695, "grad_norm": 459.40576171875, "learning_rate": 4.988679806432711e-07, "logits/chosen": 0.14702852070331573, "logits/rejected": 0.12872420251369476, "logps/chosen": -61.52762985229492, "logps/ref_chosen": -60.72917938232422, "logps/ref_rejected": -72.30961608886719, "logps/rejected": -73.45899200439453, "loss": 1.1121, "margin_dpo/margin_mean": 0.35092705488204956, "margin_dpo/margin_std": 0.6370328664779663, "step": 86 }, { "epoch": 0.13151927437641722, "fcm_dpo/beta": 1.907198190689087, "fcm_dpo/delta": -0.10468879342079163, "fcm_dpo/margin": 0.3619868755340576, "fcm_dpo/q_t": 0.3715543746948242, "grad_norm": 590.1043701171875, "learning_rate": 4.987388156241114e-07, "logits/chosen": 0.0830545723438263, "logits/rejected": 0.02916475385427475, "logps/chosen": -66.58967590332031, "logps/ref_chosen": -65.75796508789062, "logps/ref_rejected": -84.81159973144531, "logps/rejected": -86.00529479980469, "loss": 1.2636, "margin_dpo/margin_mean": 0.3619861900806427, "margin_dpo/margin_std": 0.7822234630584717, "step": 87 }, { "epoch": 0.1330309901738473, "fcm_dpo/beta": 1.892395257949829, "fcm_dpo/delta": -0.09083393216133118, "fcm_dpo/margin": 0.361200749874115, "fcm_dpo/q_t": 0.37534597516059875, "grad_norm": 558.025146484375, "learning_rate": 4.986026928455767e-07, "logits/chosen": 0.15574653446674347, "logits/rejected": 0.13011637330055237, "logps/chosen": -63.60407257080078, "logps/ref_chosen": -62.82402801513672, "logps/ref_rejected": -74.9607162475586, "logps/rejected": -76.10195922851562, "loss": 1.2043, "margin_dpo/margin_mean": 0.36120113730430603, "margin_dpo/margin_std": 0.7272647023200989, "step": 88 }, { "epoch": 0.1345427059712774, "fcm_dpo/beta": 1.8832402229309082, "fcm_dpo/delta": -0.11566749215126038, "fcm_dpo/margin": 0.3750349283218384, "fcm_dpo/q_t": 0.3602682650089264, "grad_norm": 456.3847351074219, "learning_rate": 4.984596161153135e-07, "logits/chosen": 0.20242905616760254, "logits/rejected": 0.12148602306842804, "logps/chosen": -41.91245651245117, "logps/ref_chosen": -41.191436767578125, "logps/ref_rejected": -85.44769287109375, "logps/rejected": -86.54374694824219, "loss": 1.1854, "margin_dpo/margin_mean": 0.37503573298454285, "margin_dpo/margin_std": 0.7346115708351135, "step": 89 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 1.8486794233322144, "fcm_dpo/delta": 0.013222428038716316, "fcm_dpo/margin": 0.31774118542671204, "fcm_dpo/q_t": 0.39341413974761963, "grad_norm": 499.37640380859375, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.07887136936187744, "logits/rejected": 0.027213769033551216, "logps/chosen": -57.39919662475586, "logps/ref_chosen": -56.58390808105469, "logps/ref_rejected": -86.86978149414062, "logps/rejected": -88.00281524658203, "loss": 1.2172, "margin_dpo/margin_mean": 0.317741334438324, "margin_dpo/margin_std": 0.7089375853538513, "step": 90 }, { "epoch": 0.13756613756613756, "fcm_dpo/beta": 1.7621798515319824, "fcm_dpo/delta": -0.3605840802192688, "fcm_dpo/margin": 0.5263523459434509, "fcm_dpo/q_t": 0.3355669379234314, "grad_norm": 379.0741271972656, "learning_rate": 4.98152617002662e-07, "logits/chosen": 0.0857606828212738, "logits/rejected": 0.044405438005924225, "logps/chosen": -53.12702560424805, "logps/ref_chosen": -52.38234329223633, "logps/ref_rejected": -72.17642211914062, "logps/rejected": -73.44746398925781, "loss": 0.9948, "margin_dpo/margin_mean": 0.5263521671295166, "margin_dpo/margin_std": 0.7791818380355835, "step": 91 }, { "epoch": 0.13907785336356765, "fcm_dpo/beta": 1.7615249156951904, "fcm_dpo/delta": -0.010726943612098694, "fcm_dpo/margin": 0.3414357304573059, "fcm_dpo/q_t": 0.38870492577552795, "grad_norm": 441.7849426269531, "learning_rate": 4.979887032076988e-07, "logits/chosen": 0.16990810632705688, "logits/rejected": 0.1302517056465149, "logps/chosen": -53.923187255859375, "logps/ref_chosen": -53.00870132446289, "logps/ref_rejected": -79.77812957763672, "logps/rejected": -81.0340576171875, "loss": 1.2497, "margin_dpo/margin_mean": 0.34143590927124023, "margin_dpo/margin_std": 0.7262367010116577, "step": 92 }, { "epoch": 0.14058956916099774, "fcm_dpo/beta": 1.7267677783966064, "fcm_dpo/delta": -0.014829907566308975, "fcm_dpo/margin": 0.35534825921058655, "fcm_dpo/q_t": 0.38280731439590454, "grad_norm": 380.222412109375, "learning_rate": 4.978178526356172e-07, "logits/chosen": 0.13078534603118896, "logits/rejected": 0.10297398269176483, "logps/chosen": -45.80891799926758, "logps/ref_chosen": -44.90705108642578, "logps/ref_rejected": -58.7879524230957, "logps/rejected": -60.045162200927734, "loss": 1.1901, "margin_dpo/margin_mean": 0.35534799098968506, "margin_dpo/margin_std": 0.7640775442123413, "step": 93 }, { "epoch": 0.1421012849584278, "fcm_dpo/beta": 1.669801115989685, "fcm_dpo/delta": -0.09243573993444443, "fcm_dpo/margin": 0.40765514969825745, "fcm_dpo/q_t": 0.37269920110702515, "grad_norm": 421.4466552734375, "learning_rate": 4.976400700654751e-07, "logits/chosen": 0.1810636818408966, "logits/rejected": 0.1434386819601059, "logps/chosen": -60.6273193359375, "logps/ref_chosen": -59.93777084350586, "logps/ref_rejected": -79.3138427734375, "logps/rejected": -80.41104125976562, "loss": 1.217, "margin_dpo/margin_mean": 0.4076548218727112, "margin_dpo/margin_std": 0.9024351835250854, "step": 94 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 1.5891482830047607, "fcm_dpo/delta": -0.3270946741104126, "fcm_dpo/margin": 0.5614709854125977, "fcm_dpo/q_t": 0.32878196239471436, "grad_norm": 361.7147216796875, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.07957080006599426, "logits/rejected": 0.01844627410173416, "logps/chosen": -61.05595397949219, "logps/ref_chosen": -60.168487548828125, "logps/ref_rejected": -90.73665618896484, "logps/rejected": -92.18559265136719, "loss": 0.9685, "margin_dpo/margin_mean": 0.5614703297615051, "margin_dpo/margin_std": 0.7650089859962463, "step": 95 }, { "epoch": 0.14512471655328799, "fcm_dpo/beta": 1.5399678945541382, "fcm_dpo/delta": -0.2155621349811554, "fcm_dpo/margin": 0.5187313556671143, "fcm_dpo/q_t": 0.3439177870750427, "grad_norm": 334.061279296875, "learning_rate": 4.972637290166157e-07, "logits/chosen": 0.11804474890232086, "logits/rejected": 0.0765266865491867, "logps/chosen": -61.5324821472168, "logps/ref_chosen": -60.66877746582031, "logps/ref_rejected": -88.30673217773438, "logps/rejected": -89.68916320800781, "loss": 1.0112, "margin_dpo/margin_mean": 0.518730878829956, "margin_dpo/margin_std": 0.7388438582420349, "step": 96 }, { "epoch": 0.14663643235071808, "fcm_dpo/beta": 1.4972609281539917, "fcm_dpo/delta": 0.018384963274002075, "fcm_dpo/margin": 0.3870452344417572, "fcm_dpo/q_t": 0.40638962388038635, "grad_norm": 447.0497741699219, "learning_rate": 4.970651810649666e-07, "logits/chosen": 0.06303801387548447, "logits/rejected": 0.02052931673824787, "logps/chosen": -66.08944702148438, "logps/ref_chosen": -65.04412078857422, "logps/ref_rejected": -78.42092895507812, "logps/rejected": -79.85330200195312, "loss": 1.1872, "margin_dpo/margin_mean": 0.38704511523246765, "margin_dpo/margin_std": 0.8554011583328247, "step": 97 }, { "epoch": 0.14814814814814814, "fcm_dpo/beta": 1.5304303169250488, "fcm_dpo/delta": 0.13418416678905487, "fcm_dpo/margin": 0.3075883686542511, "fcm_dpo/q_t": 0.4082970917224884, "grad_norm": 418.0280456542969, "learning_rate": 4.968597221690985e-07, "logits/chosen": 0.1497489959001541, "logits/rejected": 0.12327147275209427, "logps/chosen": -56.33129119873047, "logps/ref_chosen": -55.503231048583984, "logps/ref_rejected": -72.81553649902344, "logps/rejected": -73.9511947631836, "loss": 1.2315, "margin_dpo/margin_mean": 0.3075885772705078, "margin_dpo/margin_std": 0.695486307144165, "step": 98 }, { "epoch": 0.14965986394557823, "fcm_dpo/beta": 1.5460871458053589, "fcm_dpo/delta": -0.0771804004907608, "fcm_dpo/margin": 0.4346781373023987, "fcm_dpo/q_t": 0.3763027489185333, "grad_norm": 433.2595520019531, "learning_rate": 4.966473580761389e-07, "logits/chosen": 0.15021108090877533, "logits/rejected": 0.11408122628927231, "logps/chosen": -59.42683410644531, "logps/ref_chosen": -58.57563781738281, "logps/ref_rejected": -78.693603515625, "logps/rejected": -79.97947692871094, "loss": 1.1167, "margin_dpo/margin_mean": 0.43467745184898376, "margin_dpo/margin_std": 0.7960972189903259, "step": 99 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 1.5719187259674072, "fcm_dpo/delta": 0.07484941184520721, "fcm_dpo/margin": 0.3342527747154236, "fcm_dpo/q_t": 0.41647306084632874, "grad_norm": 518.3218383789062, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.12237241864204407, "logits/rejected": 0.11543639004230499, "logps/chosen": -80.54015350341797, "logps/ref_chosen": -79.58343505859375, "logps/ref_rejected": -92.152587890625, "logps/rejected": -93.44355773925781, "loss": 1.3578, "margin_dpo/margin_mean": 0.3342531621456146, "margin_dpo/margin_std": 0.9492334127426147, "step": 100 }, { "epoch": 0.15268329554043839, "fcm_dpo/beta": 1.5015451908111572, "fcm_dpo/delta": -0.34811440110206604, "fcm_dpo/margin": 0.610725998878479, "fcm_dpo/q_t": 0.3262676000595093, "grad_norm": 315.0995788574219, "learning_rate": 4.96201938253052e-07, "logits/chosen": 0.14972041547298431, "logits/rejected": 0.1133972704410553, "logps/chosen": -53.138023376464844, "logps/ref_chosen": -52.332786560058594, "logps/ref_rejected": -69.55589294433594, "logps/rejected": -70.97186279296875, "loss": 0.9147, "margin_dpo/margin_mean": 0.6107259392738342, "margin_dpo/margin_std": 0.741790771484375, "step": 101 }, { "epoch": 0.15419501133786848, "fcm_dpo/beta": 1.4856407642364502, "fcm_dpo/delta": 0.10125008225440979, "fcm_dpo/margin": 0.33820411562919617, "fcm_dpo/q_t": 0.40724170207977295, "grad_norm": 422.3230895996094, "learning_rate": 4.959688949822748e-07, "logits/chosen": 0.07880719006061554, "logits/rejected": 0.0402679443359375, "logps/chosen": -65.69737243652344, "logps/ref_chosen": -64.74348449707031, "logps/ref_rejected": -69.06132507324219, "logps/rejected": -70.35342407226562, "loss": 1.2587, "margin_dpo/margin_mean": 0.33820363879203796, "margin_dpo/margin_std": 0.8350169062614441, "step": 102 }, { "epoch": 0.15570672713529857, "fcm_dpo/beta": 1.474139928817749, "fcm_dpo/delta": -0.0702066570520401, "fcm_dpo/margin": 0.4514557421207428, "fcm_dpo/q_t": 0.38118183612823486, "grad_norm": 412.3791198730469, "learning_rate": 4.957289714327572e-07, "logits/chosen": 0.13948319852352142, "logits/rejected": 0.1087922602891922, "logps/chosen": -64.72733306884766, "logps/ref_chosen": -63.83664321899414, "logps/ref_rejected": -79.32362365722656, "logps/rejected": -80.665771484375, "loss": 1.1328, "margin_dpo/margin_mean": 0.45145532488822937, "margin_dpo/margin_std": 0.8555996417999268, "step": 103 }, { "epoch": 0.15721844293272866, "fcm_dpo/beta": 1.4898967742919922, "fcm_dpo/delta": 0.06615878641605377, "fcm_dpo/margin": 0.35946568846702576, "fcm_dpo/q_t": 0.407916396856308, "grad_norm": 478.3046569824219, "learning_rate": 4.954821743156767e-07, "logits/chosen": 0.16390858590602875, "logits/rejected": 0.08152991533279419, "logps/chosen": -61.924827575683594, "logps/ref_chosen": -60.99920654296875, "logps/ref_rejected": -98.84645080566406, "logps/rejected": -100.13154602050781, "loss": 1.2451, "margin_dpo/margin_mean": 0.3594653904438019, "margin_dpo/margin_std": 0.8471232652664185, "step": 104 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 1.4996893405914307, "fcm_dpo/delta": 0.011385314166545868, "fcm_dpo/margin": 0.3916703760623932, "fcm_dpo/q_t": 0.39221182465553284, "grad_norm": 480.3543395996094, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.10116258263587952, "logits/rejected": 0.04981735721230507, "logps/chosen": -71.88473510742188, "logps/ref_chosen": -70.95027160644531, "logps/ref_rejected": -87.88340759277344, "logps/rejected": -89.20955657958984, "loss": 1.2319, "margin_dpo/margin_mean": 0.39167043566703796, "margin_dpo/margin_std": 0.8842525482177734, "step": 105 }, { "epoch": 0.1602418745275888, "fcm_dpo/beta": 1.4681777954101562, "fcm_dpo/delta": -0.06567725539207458, "fcm_dpo/margin": 0.4502679407596588, "fcm_dpo/q_t": 0.37183088064193726, "grad_norm": 399.4701232910156, "learning_rate": 4.949679871846857e-07, "logits/chosen": 0.1115492507815361, "logits/rejected": 0.0991833508014679, "logps/chosen": -63.30242919921875, "logps/ref_chosen": -62.45933151245117, "logps/ref_rejected": -67.00595092773438, "logps/rejected": -68.29930877685547, "loss": 1.1228, "margin_dpo/margin_mean": 0.450268030166626, "margin_dpo/margin_std": 0.8252195119857788, "step": 106 }, { "epoch": 0.1617535903250189, "fcm_dpo/beta": 1.491356372833252, "fcm_dpo/delta": 0.19463634490966797, "fcm_dpo/margin": 0.27668291330337524, "fcm_dpo/q_t": 0.428573340177536, "grad_norm": 520.7399291992188, "learning_rate": 4.947006115536947e-07, "logits/chosen": 0.07313170284032822, "logits/rejected": 0.053381193429231644, "logps/chosen": -76.79938507080078, "logps/ref_chosen": -75.83796691894531, "logps/ref_rejected": -87.74038696289062, "logps/rejected": -88.97848510742188, "loss": 1.3955, "margin_dpo/margin_mean": 0.2766827940940857, "margin_dpo/margin_std": 0.8988056778907776, "step": 107 }, { "epoch": 0.16326530612244897, "fcm_dpo/beta": 1.5026856660842896, "fcm_dpo/delta": -0.1375723034143448, "fcm_dpo/margin": 0.48424917459487915, "fcm_dpo/q_t": 0.367892861366272, "grad_norm": 377.7269287109375, "learning_rate": 4.944263911205772e-07, "logits/chosen": 0.09277549386024475, "logits/rejected": 0.0655142068862915, "logps/chosen": -69.25541687011719, "logps/ref_chosen": -68.39323425292969, "logps/ref_rejected": -83.24267578125, "logps/rejected": -84.58909606933594, "loss": 1.0992, "margin_dpo/margin_mean": 0.4842491149902344, "margin_dpo/margin_std": 0.8379406332969666, "step": 108 }, { "epoch": 0.16477702191987906, "fcm_dpo/beta": 1.4416108131408691, "fcm_dpo/delta": -0.14801928400993347, "fcm_dpo/margin": 0.5100686550140381, "fcm_dpo/q_t": 0.3660755157470703, "grad_norm": 357.8731994628906, "learning_rate": 4.941453335558681e-07, "logits/chosen": 0.08502168953418732, "logits/rejected": 0.03738076239824295, "logps/chosen": -56.36671829223633, "logps/ref_chosen": -55.52748107910156, "logps/ref_rejected": -83.55218505859375, "logps/rejected": -84.9014892578125, "loss": 1.0253, "margin_dpo/margin_mean": 0.510068416595459, "margin_dpo/margin_std": 0.8034826517105103, "step": 109 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 1.4788618087768555, "fcm_dpo/delta": 0.11002928763628006, "fcm_dpo/margin": 0.33293917775154114, "fcm_dpo/q_t": 0.4101300537586212, "grad_norm": 451.916015625, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.07888751477003098, "logits/rejected": 0.08660773187875748, "logps/chosen": -82.09772491455078, "logps/ref_chosen": -81.15874481201172, "logps/ref_rejected": -72.56021118164062, "logps/rejected": -73.83213806152344, "loss": 1.246, "margin_dpo/margin_mean": 0.3329389691352844, "margin_dpo/margin_std": 0.8031635284423828, "step": 110 }, { "epoch": 0.16780045351473924, "fcm_dpo/beta": 1.4763123989105225, "fcm_dpo/delta": -0.048699431121349335, "fcm_dpo/margin": 0.43631669878959656, "fcm_dpo/q_t": 0.38030728697776794, "grad_norm": 397.7984313964844, "learning_rate": 4.935627386698418e-07, "logits/chosen": 0.17604178190231323, "logits/rejected": 0.1414678990840912, "logps/chosen": -53.380836486816406, "logps/ref_chosen": -52.358985900878906, "logps/ref_rejected": -77.06150817871094, "logps/rejected": -78.51966857910156, "loss": 1.2185, "margin_dpo/margin_mean": 0.43631690740585327, "margin_dpo/margin_std": 0.8934418559074402, "step": 111 }, { "epoch": 0.1693121693121693, "fcm_dpo/beta": 1.439178228378296, "fcm_dpo/delta": -0.10804037004709244, "fcm_dpo/margin": 0.4868055582046509, "fcm_dpo/q_t": 0.3607805073261261, "grad_norm": 399.50689697265625, "learning_rate": 4.932612176449559e-07, "logits/chosen": 0.061367664486169815, "logits/rejected": 0.0060233473777771, "logps/chosen": -63.8758544921875, "logps/ref_chosen": -63.02006530761719, "logps/ref_rejected": -111.36941528320312, "logps/rejected": -112.71200561523438, "loss": 1.1135, "margin_dpo/margin_mean": 0.48680511116981506, "margin_dpo/margin_std": 0.843925952911377, "step": 112 }, { "epoch": 0.1708238851095994, "fcm_dpo/beta": 1.4334089756011963, "fcm_dpo/delta": -0.07620993256568909, "fcm_dpo/margin": 0.4656754732131958, "fcm_dpo/q_t": 0.3700527548789978, "grad_norm": 418.9619445800781, "learning_rate": 4.929528920808854e-07, "logits/chosen": 0.07376405596733093, "logits/rejected": 0.04032863304018974, "logps/chosen": -56.809410095214844, "logps/ref_chosen": -55.80766296386719, "logps/ref_rejected": -69.84014129638672, "logps/rejected": -71.30755615234375, "loss": 1.2208, "margin_dpo/margin_mean": 0.46567538380622864, "margin_dpo/margin_std": 0.9440125226974487, "step": 113 }, { "epoch": 0.17233560090702948, "fcm_dpo/beta": 1.3131306171417236, "fcm_dpo/delta": -0.38322171568870544, "fcm_dpo/margin": 0.7164374589920044, "fcm_dpo/q_t": 0.32581445574760437, "grad_norm": 272.3829040527344, "learning_rate": 4.92637770602159e-07, "logits/chosen": 0.15654101967811584, "logits/rejected": 0.0994241014122963, "logps/chosen": -67.11263275146484, "logps/ref_chosen": -66.33277130126953, "logps/ref_rejected": -71.61489868164062, "logps/rejected": -73.1112060546875, "loss": 0.9336, "margin_dpo/margin_mean": 0.716437578201294, "margin_dpo/margin_std": 0.9500221610069275, "step": 114 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 1.2982274293899536, "fcm_dpo/delta": -0.07483263313770294, "fcm_dpo/margin": 0.5153573751449585, "fcm_dpo/q_t": 0.3759816586971283, "grad_norm": 347.9417724609375, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.11214175820350647, "logits/rejected": 0.05748974159359932, "logps/chosen": -56.71136474609375, "logps/ref_chosen": -55.74903869628906, "logps/ref_rejected": -79.59849548339844, "logps/rejected": -81.07618713378906, "loss": 1.0579, "margin_dpo/margin_mean": 0.5153576135635376, "margin_dpo/margin_std": 0.8605426549911499, "step": 115 }, { "epoch": 0.17535903250188964, "fcm_dpo/beta": 1.2686808109283447, "fcm_dpo/delta": -0.09204667806625366, "fcm_dpo/margin": 0.5405768752098083, "fcm_dpo/q_t": 0.3639325499534607, "grad_norm": 282.1678771972656, "learning_rate": 4.91987175349089e-07, "logits/chosen": 0.11463910341262817, "logits/rejected": 0.054369859397411346, "logps/chosen": -50.27439880371094, "logps/ref_chosen": -49.36516571044922, "logps/ref_rejected": -72.84671020507812, "logps/rejected": -74.2965087890625, "loss": 1.0219, "margin_dpo/margin_mean": 0.5405769348144531, "margin_dpo/margin_std": 0.7944033145904541, "step": 116 }, { "epoch": 0.17687074829931973, "fcm_dpo/beta": 1.2446372509002686, "fcm_dpo/delta": -0.007452197372913361, "fcm_dpo/margin": 0.4859652817249298, "fcm_dpo/q_t": 0.3739723563194275, "grad_norm": 297.24615478515625, "learning_rate": 4.916517197732933e-07, "logits/chosen": 0.13651910424232483, "logits/rejected": 0.10319022834300995, "logps/chosen": -58.53379821777344, "logps/ref_chosen": -57.710899353027344, "logps/ref_rejected": -69.77253723144531, "logps/rejected": -71.08141326904297, "loss": 1.117, "margin_dpo/margin_mean": 0.48596563935279846, "margin_dpo/margin_std": 0.8542994260787964, "step": 117 }, { "epoch": 0.17838246409674982, "fcm_dpo/beta": 1.234614610671997, "fcm_dpo/delta": -0.08241216838359833, "fcm_dpo/margin": 0.5473380088806152, "fcm_dpo/q_t": 0.36874717473983765, "grad_norm": 312.6552734375, "learning_rate": 4.913095046794281e-07, "logits/chosen": 0.2066066563129425, "logits/rejected": 0.16842034459114075, "logps/chosen": -53.291236877441406, "logps/ref_chosen": -52.479896545410156, "logps/ref_rejected": -81.359130859375, "logps/rejected": -82.71780395507812, "loss": 1.0609, "margin_dpo/margin_mean": 0.547337532043457, "margin_dpo/margin_std": 0.8634383678436279, "step": 118 }, { "epoch": 0.17989417989417988, "fcm_dpo/beta": 1.2622461318969727, "fcm_dpo/delta": 0.11250065267086029, "fcm_dpo/margin": 0.3900427222251892, "fcm_dpo/q_t": 0.40436500310897827, "grad_norm": 346.1368408203125, "learning_rate": 4.909605396399855e-07, "logits/chosen": 0.08708840608596802, "logits/rejected": 0.053689248859882355, "logps/chosen": -62.44677734375, "logps/ref_chosen": -61.35767364501953, "logps/ref_rejected": -75.71510314941406, "logps/rejected": -77.19424438476562, "loss": 1.2627, "margin_dpo/margin_mean": 0.3900427222251892, "margin_dpo/margin_std": 0.9560626745223999, "step": 119 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 1.2444019317626953, "fcm_dpo/delta": -0.11569002270698547, "fcm_dpo/margin": 0.5686198472976685, "fcm_dpo/q_t": 0.3610965609550476, "grad_norm": 287.4088439941406, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.10341674834489822, "logits/rejected": 0.04983753338456154, "logps/chosen": -60.8148078918457, "logps/ref_chosen": -59.907569885253906, "logps/ref_rejected": -79.6910629272461, "logps/rejected": -81.16691589355469, "loss": 0.9954, "margin_dpo/margin_mean": 0.5686200261116028, "margin_dpo/margin_std": 0.7998465299606323, "step": 120 }, { "epoch": 0.18291761148904007, "fcm_dpo/beta": 1.2363691329956055, "fcm_dpo/delta": 0.008508594706654549, "fcm_dpo/margin": 0.4788138270378113, "fcm_dpo/q_t": 0.38364630937576294, "grad_norm": 282.2060546875, "learning_rate": 4.902423989581143e-07, "logits/chosen": 0.21674856543540955, "logits/rejected": 0.13925334811210632, "logps/chosen": -56.681575775146484, "logps/ref_chosen": -55.66604232788086, "logps/ref_rejected": -101.56233978271484, "logps/rejected": -103.05668640136719, "loss": 1.0875, "margin_dpo/margin_mean": 0.47881391644477844, "margin_dpo/margin_std": 0.8130632638931274, "step": 121 }, { "epoch": 0.18442932728647016, "fcm_dpo/beta": 1.1877247095108032, "fcm_dpo/delta": -0.3032481074333191, "fcm_dpo/margin": 0.7386814951896667, "fcm_dpo/q_t": 0.33100205659866333, "grad_norm": 291.67059326171875, "learning_rate": 4.898732434036243e-07, "logits/chosen": 0.11649955809116364, "logits/rejected": 0.08512992411851883, "logps/chosen": -64.32618713378906, "logps/ref_chosen": -63.334373474121094, "logps/ref_rejected": -73.67523193359375, "logps/rejected": -75.40573120117188, "loss": 0.9224, "margin_dpo/margin_mean": 0.7386811971664429, "margin_dpo/margin_std": 0.961184024810791, "step": 122 }, { "epoch": 0.18594104308390022, "fcm_dpo/beta": 1.1708195209503174, "fcm_dpo/delta": -0.07181403040885925, "fcm_dpo/margin": 0.5666613578796387, "fcm_dpo/q_t": 0.3692570924758911, "grad_norm": 300.7272033691406, "learning_rate": 4.894973780788722e-07, "logits/chosen": 0.10572931170463562, "logits/rejected": 0.06912669539451599, "logps/chosen": -57.7982177734375, "logps/ref_chosen": -56.89874267578125, "logps/ref_rejected": -78.97028350830078, "logps/rejected": -80.4364242553711, "loss": 1.0918, "margin_dpo/margin_mean": 0.5666618347167969, "margin_dpo/margin_std": 0.9293673038482666, "step": 123 }, { "epoch": 0.1874527588813303, "fcm_dpo/beta": 1.1005598306655884, "fcm_dpo/delta": -0.28181761503219604, "fcm_dpo/margin": 0.7790488004684448, "fcm_dpo/q_t": 0.32957297563552856, "grad_norm": 234.5536346435547, "learning_rate": 4.89114813497619e-07, "logits/chosen": 0.15644077956676483, "logits/rejected": 0.10393092036247253, "logps/chosen": -58.090946197509766, "logps/ref_chosen": -57.116085052490234, "logps/ref_rejected": -87.93074035644531, "logps/rejected": -89.68465423583984, "loss": 0.9107, "margin_dpo/margin_mean": 0.7790486812591553, "margin_dpo/margin_std": 0.9213578701019287, "step": 124 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 1.066893458366394, "fcm_dpo/delta": -0.0766465961933136, "fcm_dpo/margin": 0.628391683101654, "fcm_dpo/q_t": 0.36512354016304016, "grad_norm": 266.8402099609375, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.16830167174339294, "logits/rejected": 0.11665754020214081, "logps/chosen": -66.76890563964844, "logps/ref_chosen": -65.7061767578125, "logps/ref_rejected": -91.72711944580078, "logps/rejected": -93.41824340820312, "loss": 1.0499, "margin_dpo/margin_mean": 0.6283919215202332, "margin_dpo/margin_std": 0.96758633852005, "step": 125 }, { "epoch": 0.19047619047619047, "fcm_dpo/beta": 1.0625847578048706, "fcm_dpo/delta": -0.014553094282746315, "fcm_dpo/margin": 0.5772075653076172, "fcm_dpo/q_t": 0.390198290348053, "grad_norm": 249.5762176513672, "learning_rate": 4.883296295573176e-07, "logits/chosen": 0.007961070165038109, "logits/rejected": 0.001765979453921318, "logps/chosen": -69.1175308227539, "logps/ref_chosen": -68.17608642578125, "logps/ref_rejected": -65.1175537109375, "logps/rejected": -66.63619995117188, "loss": 1.1388, "margin_dpo/margin_mean": 0.577207624912262, "margin_dpo/margin_std": 1.1864867210388184, "step": 126 }, { "epoch": 0.19198790627362056, "fcm_dpo/beta": 1.034506916999817, "fcm_dpo/delta": -0.14043903350830078, "fcm_dpo/margin": 0.703222393989563, "fcm_dpo/q_t": 0.3558955490589142, "grad_norm": 248.7368621826172, "learning_rate": 4.87927032161552e-07, "logits/chosen": 0.11731210350990295, "logits/rejected": 0.08830760419368744, "logps/chosen": -62.924774169921875, "logps/ref_chosen": -61.88023376464844, "logps/ref_rejected": -68.46012878417969, "logps/rejected": -70.20789337158203, "loss": 0.9918, "margin_dpo/margin_mean": 0.7032221555709839, "margin_dpo/margin_std": 0.9526394605636597, "step": 127 }, { "epoch": 0.19349962207105065, "fcm_dpo/beta": 1.027363896369934, "fcm_dpo/delta": -0.016578957438468933, "fcm_dpo/margin": 0.5983456373214722, "fcm_dpo/q_t": 0.37779900431632996, "grad_norm": 250.40814208984375, "learning_rate": 4.875177794352363e-07, "logits/chosen": 0.14589917659759521, "logits/rejected": 0.09506042301654816, "logps/chosen": -67.86463165283203, "logps/ref_chosen": -66.708984375, "logps/ref_rejected": -94.97969055175781, "logps/rejected": -96.73368835449219, "loss": 1.1327, "margin_dpo/margin_mean": 0.5983456373214722, "margin_dpo/margin_std": 1.1081733703613281, "step": 128 }, { "epoch": 0.19501133786848074, "fcm_dpo/beta": 1.0448354482650757, "fcm_dpo/delta": 0.09077582508325577, "fcm_dpo/margin": 0.4919281601905823, "fcm_dpo/q_t": 0.3963082730770111, "grad_norm": 280.36907958984375, "learning_rate": 4.871018828260491e-07, "logits/chosen": 0.11568663269281387, "logits/rejected": 0.10815045237541199, "logps/chosen": -66.55685424804688, "logps/ref_chosen": -65.33882904052734, "logps/ref_rejected": -68.06109619140625, "logps/rejected": -69.77104187011719, "loss": 1.1198, "margin_dpo/margin_mean": 0.4919281303882599, "margin_dpo/margin_std": 0.8836801052093506, "step": 129 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 1.020601749420166, "fcm_dpo/delta": -0.08440172672271729, "fcm_dpo/margin": 0.6577671766281128, "fcm_dpo/q_t": 0.36100125312805176, "grad_norm": 262.4644470214844, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.0954977497458458, "logits/rejected": 0.051679350435733795, "logps/chosen": -59.74810791015625, "logps/ref_chosen": -58.660743713378906, "logps/ref_rejected": -79.24510192871094, "logps/rejected": -80.990234375, "loss": 0.9852, "margin_dpo/margin_mean": 0.6577669382095337, "margin_dpo/margin_std": 0.7911086082458496, "step": 130 }, { "epoch": 0.1980347694633409, "fcm_dpo/beta": 1.0078396797180176, "fcm_dpo/delta": -0.15889500081539154, "fcm_dpo/margin": 0.7407888770103455, "fcm_dpo/q_t": 0.3538159728050232, "grad_norm": 228.83726501464844, "learning_rate": 4.86250204678667e-07, "logits/chosen": 0.09853261709213257, "logits/rejected": 0.04450761526823044, "logps/chosen": -53.55117416381836, "logps/ref_chosen": -52.51453399658203, "logps/ref_rejected": -85.18299865722656, "logps/rejected": -86.96041870117188, "loss": 1.0013, "margin_dpo/margin_mean": 0.7407891750335693, "margin_dpo/margin_std": 1.04507577419281, "step": 131 }, { "epoch": 0.19954648526077098, "fcm_dpo/beta": 1.000880479812622, "fcm_dpo/delta": -0.013521028682589531, "fcm_dpo/margin": 0.612145185470581, "fcm_dpo/q_t": 0.37782806158065796, "grad_norm": 272.0556640625, "learning_rate": 4.858144469637408e-07, "logits/chosen": 0.17530867457389832, "logits/rejected": 0.14589731395244598, "logps/chosen": -66.89759826660156, "logps/ref_chosen": -65.68513488769531, "logps/ref_rejected": -69.54120635986328, "logps/rejected": -71.36582946777344, "loss": 1.1305, "margin_dpo/margin_mean": 0.6121453046798706, "margin_dpo/margin_std": 1.148574709892273, "step": 132 }, { "epoch": 0.20105820105820105, "fcm_dpo/beta": 1.0055501461029053, "fcm_dpo/delta": 0.04156200587749481, "fcm_dpo/margin": 0.557662844657898, "fcm_dpo/q_t": 0.38376274704933167, "grad_norm": 264.5892639160156, "learning_rate": 4.853720930118138e-07, "logits/chosen": 0.1060660257935524, "logits/rejected": 0.09657873213291168, "logps/chosen": -64.78939819335938, "logps/ref_chosen": -63.598114013671875, "logps/ref_rejected": -73.72798156738281, "logps/rejected": -75.4769287109375, "loss": 1.108, "margin_dpo/margin_mean": 0.5576624870300293, "margin_dpo/margin_std": 0.9732006192207336, "step": 133 }, { "epoch": 0.20256991685563114, "fcm_dpo/beta": 0.9682743549346924, "fcm_dpo/delta": -0.23356372117996216, "fcm_dpo/margin": 0.8395987153053284, "fcm_dpo/q_t": 0.3344127833843231, "grad_norm": 208.65017700195312, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.20197008550167084, "logits/rejected": 0.15169410407543182, "logps/chosen": -54.892337799072266, "logps/ref_chosen": -53.79457092285156, "logps/ref_rejected": -74.16741943359375, "logps/rejected": -76.10478973388672, "loss": 0.9149, "margin_dpo/margin_mean": 0.8395991325378418, "margin_dpo/margin_std": 0.9888654351234436, "step": 134 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.9734876751899719, "fcm_dpo/delta": 0.10381930321455002, "fcm_dpo/margin": 0.5151646137237549, "fcm_dpo/q_t": 0.4011284112930298, "grad_norm": 222.495849609375, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.13704870641231537, "logits/rejected": 0.10613438487052917, "logps/chosen": -50.54332733154297, "logps/ref_chosen": -49.441078186035156, "logps/ref_rejected": -65.96878051757812, "logps/rejected": -67.58619689941406, "loss": 1.1536, "margin_dpo/margin_mean": 0.5151640176773071, "margin_dpo/margin_std": 1.0082441568374634, "step": 135 }, { "epoch": 0.20559334845049132, "fcm_dpo/beta": 0.9727784395217896, "fcm_dpo/delta": -0.06393898278474808, "fcm_dpo/margin": 0.6782030463218689, "fcm_dpo/q_t": 0.37919890880584717, "grad_norm": 289.11376953125, "learning_rate": 4.840055783904106e-07, "logits/chosen": 0.1222413182258606, "logits/rejected": 0.06046907976269722, "logps/chosen": -68.07365417480469, "logps/ref_chosen": -66.75926208496094, "logps/ref_rejected": -94.61787414550781, "logps/rejected": -96.61046600341797, "loss": 1.178, "margin_dpo/margin_mean": 0.6782038807868958, "margin_dpo/margin_std": 1.3909220695495605, "step": 136 }, { "epoch": 0.20710506424792138, "fcm_dpo/beta": 0.9643727540969849, "fcm_dpo/delta": -0.053648076951503754, "fcm_dpo/margin": 0.6741567850112915, "fcm_dpo/q_t": 0.37186992168426514, "grad_norm": 218.29730224609375, "learning_rate": 4.835369650662767e-07, "logits/chosen": 0.14991703629493713, "logits/rejected": 0.12426199018955231, "logps/chosen": -57.97966766357422, "logps/ref_chosen": -56.78379821777344, "logps/ref_rejected": -69.89952087402344, "logps/rejected": -71.76954650878906, "loss": 1.1013, "margin_dpo/margin_mean": 0.6741572618484497, "margin_dpo/margin_std": 1.1703739166259766, "step": 137 }, { "epoch": 0.20861678004535147, "fcm_dpo/beta": 0.978662371635437, "fcm_dpo/delta": 0.12415525317192078, "fcm_dpo/margin": 0.49200883507728577, "fcm_dpo/q_t": 0.4015718102455139, "grad_norm": 254.64759826660156, "learning_rate": 4.830618192112065e-07, "logits/chosen": 0.1492946743965149, "logits/rejected": 0.11607992649078369, "logps/chosen": -60.26990509033203, "logps/ref_chosen": -58.766014099121094, "logps/ref_rejected": -68.12371826171875, "logps/rejected": -70.11962890625, "loss": 1.2273, "margin_dpo/margin_mean": 0.49200862646102905, "margin_dpo/margin_std": 1.141247034072876, "step": 138 }, { "epoch": 0.21012849584278157, "fcm_dpo/beta": 0.9795228242874146, "fcm_dpo/delta": -0.11077337712049484, "fcm_dpo/margin": 0.7158379554748535, "fcm_dpo/q_t": 0.3632628917694092, "grad_norm": 245.56246948242188, "learning_rate": 4.825801541160509e-07, "logits/chosen": 0.10423211008310318, "logits/rejected": 0.07835687696933746, "logps/chosen": -72.62336730957031, "logps/ref_chosen": -71.2255859375, "logps/ref_rejected": -82.1834716796875, "logps/rejected": -84.29708862304688, "loss": 1.0295, "margin_dpo/margin_mean": 0.7158380746841431, "margin_dpo/margin_std": 1.026993751525879, "step": 139 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.9235955476760864, "fcm_dpo/delta": -0.28677302598953247, "fcm_dpo/margin": 0.9338580369949341, "fcm_dpo/q_t": 0.3302295506000519, "grad_norm": 248.67660522460938, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.11341448128223419, "logits/rejected": 0.07324690371751785, "logps/chosen": -64.4937973022461, "logps/ref_chosen": -63.27766418457031, "logps/ref_rejected": -83.30647277832031, "logps/rejected": -85.45646667480469, "loss": 1.0038, "margin_dpo/margin_mean": 0.9338581562042236, "margin_dpo/margin_std": 1.3346967697143555, "step": 140 }, { "epoch": 0.21315192743764172, "fcm_dpo/beta": 0.8918517827987671, "fcm_dpo/delta": -0.07686886191368103, "fcm_dpo/margin": 0.7517862319946289, "fcm_dpo/q_t": 0.3738592863082886, "grad_norm": 238.80625915527344, "learning_rate": 4.815973202802966e-07, "logits/chosen": 0.14074575901031494, "logits/rejected": 0.10288909077644348, "logps/chosen": -63.11772155761719, "logps/ref_chosen": -61.76676940917969, "logps/ref_rejected": -88.60601806640625, "logps/rejected": -90.7087631225586, "loss": 1.0866, "margin_dpo/margin_mean": 0.7517873048782349, "margin_dpo/margin_std": 1.2767117023468018, "step": 141 }, { "epoch": 0.2146636432350718, "fcm_dpo/beta": 0.9092315435409546, "fcm_dpo/delta": 0.09595802426338196, "fcm_dpo/margin": 0.5593085885047913, "fcm_dpo/q_t": 0.39656299352645874, "grad_norm": 221.80491638183594, "learning_rate": 4.810961790316729e-07, "logits/chosen": 0.11350887268781662, "logits/rejected": 0.09118533134460449, "logps/chosen": -66.62858581542969, "logps/ref_chosen": -65.2747802734375, "logps/ref_rejected": -81.1378173828125, "logps/rejected": -83.05094146728516, "loss": 1.1396, "margin_dpo/margin_mean": 0.5593085289001465, "margin_dpo/margin_std": 1.056554913520813, "step": 142 }, { "epoch": 0.2161753590325019, "fcm_dpo/beta": 0.9147884845733643, "fcm_dpo/delta": 0.02730133756995201, "fcm_dpo/margin": 0.6277639865875244, "fcm_dpo/q_t": 0.3815556764602661, "grad_norm": 275.406982421875, "learning_rate": 4.805885735261454e-07, "logits/chosen": 0.13471481204032898, "logits/rejected": 0.11962398886680603, "logps/chosen": -63.97998046875, "logps/ref_chosen": -62.617828369140625, "logps/ref_rejected": -70.39239501953125, "logps/rejected": -72.38230895996094, "loss": 1.135, "margin_dpo/margin_mean": 0.6277639865875244, "margin_dpo/margin_std": 1.1966910362243652, "step": 143 }, { "epoch": 0.21768707482993196, "fcm_dpo/beta": 0.9090726375579834, "fcm_dpo/delta": -0.06992662698030472, "fcm_dpo/margin": 0.7318644523620605, "fcm_dpo/q_t": 0.37165123224258423, "grad_norm": 247.63697814941406, "learning_rate": 4.800745179625307e-07, "logits/chosen": 0.11681709438562393, "logits/rejected": 0.09179212152957916, "logps/chosen": -62.25020980834961, "logps/ref_chosen": -60.80268859863281, "logps/ref_rejected": -79.07284545898438, "logps/rejected": -81.25222778320312, "loss": 1.1048, "margin_dpo/margin_mean": 0.7318645715713501, "margin_dpo/margin_std": 1.285665512084961, "step": 144 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.8981227874755859, "fcm_dpo/delta": -0.06589463353157043, "fcm_dpo/margin": 0.736532986164093, "fcm_dpo/q_t": 0.3731478452682495, "grad_norm": 267.699951171875, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.06849057227373123, "logits/rejected": 0.08533404767513275, "logps/chosen": -75.94109344482422, "logps/ref_chosen": -74.61146545410156, "logps/ref_rejected": -83.24461364746094, "logps/rejected": -85.31077575683594, "loss": 1.1106, "margin_dpo/margin_mean": 0.73653244972229, "margin_dpo/margin_std": 1.3149917125701904, "step": 145 }, { "epoch": 0.22071050642479215, "fcm_dpo/beta": 0.8851872086524963, "fcm_dpo/delta": -0.06594666093587875, "fcm_dpo/margin": 0.7474347352981567, "fcm_dpo/q_t": 0.370151549577713, "grad_norm": 210.81593322753906, "learning_rate": 4.790271143580173e-07, "logits/chosen": 0.06502532958984375, "logits/rejected": 0.049605365842580795, "logps/chosen": -59.10708236694336, "logps/ref_chosen": -57.84098434448242, "logps/ref_rejected": -67.47422790527344, "logps/rejected": -69.48776245117188, "loss": 1.0465, "margin_dpo/margin_mean": 0.7474343776702881, "margin_dpo/margin_std": 1.1844416856765747, "step": 146 }, { "epoch": 0.2222222222222222, "fcm_dpo/beta": 0.8997819423675537, "fcm_dpo/delta": 0.11252903193235397, "fcm_dpo/margin": 0.5466242432594299, "fcm_dpo/q_t": 0.39630264043807983, "grad_norm": 281.1480407714844, "learning_rate": 4.784937956152489e-07, "logits/chosen": 0.10493030399084091, "logits/rejected": 0.06713957339525223, "logps/chosen": -68.31733703613281, "logps/ref_chosen": -66.81346893310547, "logps/ref_rejected": -81.1796875, "logps/rejected": -83.23017883300781, "loss": 1.2139, "margin_dpo/margin_mean": 0.546623945236206, "margin_dpo/margin_std": 1.2387210130691528, "step": 147 }, { "epoch": 0.2237339380196523, "fcm_dpo/beta": 0.8874396085739136, "fcm_dpo/delta": -0.14694717526435852, "fcm_dpo/margin": 0.8294271230697632, "fcm_dpo/q_t": 0.35729408264160156, "grad_norm": 183.421875, "learning_rate": 4.779540854098347e-07, "logits/chosen": 0.21212086081504822, "logits/rejected": 0.1479133516550064, "logps/chosen": -50.136993408203125, "logps/ref_chosen": -48.6877555847168, "logps/ref_rejected": -67.50503540039062, "logps/rejected": -69.78369903564453, "loss": 1.0178, "margin_dpo/margin_mean": 0.8294271230697632, "margin_dpo/margin_std": 1.2071778774261475, "step": 148 }, { "epoch": 0.2252456538170824, "fcm_dpo/beta": 0.8515768051147461, "fcm_dpo/delta": -0.18191197514533997, "fcm_dpo/margin": 0.9019421935081482, "fcm_dpo/q_t": 0.35212624073028564, "grad_norm": 195.56973266601562, "learning_rate": 4.774079988386296e-07, "logits/chosen": 0.05525077506899834, "logits/rejected": 0.012519324198365211, "logps/chosen": -56.84036636352539, "logps/ref_chosen": -55.143775939941406, "logps/ref_rejected": -64.79888916015625, "logps/rejected": -67.39742279052734, "loss": 0.9851, "margin_dpo/margin_mean": 0.9019420742988586, "margin_dpo/margin_std": 1.2582323551177979, "step": 149 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.8074795007705688, "fcm_dpo/delta": -0.29154300689697266, "fcm_dpo/margin": 1.072885274887085, "fcm_dpo/q_t": 0.3229233920574188, "grad_norm": 183.36480712890625, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.10887187719345093, "logits/rejected": 0.07173850387334824, "logps/chosen": -68.60828399658203, "logps/ref_chosen": -67.47074890136719, "logps/ref_rejected": -89.21170806884766, "logps/rejected": -91.422119140625, "loss": 0.9129, "margin_dpo/margin_mean": 1.072885274887085, "margin_dpo/margin_std": 1.268955945968628, "step": 150 }, { "epoch": 0.22826908541194255, "fcm_dpo/beta": 0.7697363495826721, "fcm_dpo/delta": -0.23718157410621643, "fcm_dpo/margin": 1.0629806518554688, "fcm_dpo/q_t": 0.33049389719963074, "grad_norm": 152.50540161132812, "learning_rate": 4.762967578776406e-07, "logits/chosen": 0.1117258220911026, "logits/rejected": 0.06622982025146484, "logps/chosen": -53.716087341308594, "logps/ref_chosen": -52.45954132080078, "logps/ref_rejected": -79.0630111694336, "logps/rejected": -81.38253784179688, "loss": 0.8937, "margin_dpo/margin_mean": 1.0629799365997314, "margin_dpo/margin_std": 1.1620628833770752, "step": 151 }, { "epoch": 0.22978080120937264, "fcm_dpo/beta": 0.7504318356513977, "fcm_dpo/delta": -0.11411969363689423, "fcm_dpo/margin": 0.9407040476799011, "fcm_dpo/q_t": 0.35981130599975586, "grad_norm": 173.67433166503906, "learning_rate": 4.757316345716553e-07, "logits/chosen": 0.18621572852134705, "logits/rejected": 0.1421864628791809, "logps/chosen": -58.064674377441406, "logps/ref_chosen": -56.5538330078125, "logps/ref_rejected": -76.55074310302734, "logps/rejected": -79.00228881835938, "loss": 1.0133, "margin_dpo/margin_mean": 0.9407035112380981, "margin_dpo/margin_std": 1.3374671936035156, "step": 152 }, { "epoch": 0.23129251700680273, "fcm_dpo/beta": 0.7341737151145935, "fcm_dpo/delta": -0.054202862083911896, "fcm_dpo/margin": 0.8861300945281982, "fcm_dpo/q_t": 0.36215510964393616, "grad_norm": 165.9080810546875, "learning_rate": 4.751601970666064e-07, "logits/chosen": 0.09495476633310318, "logits/rejected": 0.060364123433828354, "logps/chosen": -69.46710205078125, "logps/ref_chosen": -68.00689697265625, "logps/ref_rejected": -74.83482360839844, "logps/rejected": -77.18115997314453, "loss": 0.9848, "margin_dpo/margin_mean": 0.8861297369003296, "margin_dpo/margin_std": 1.1169935464859009, "step": 153 }, { "epoch": 0.2328042328042328, "fcm_dpo/beta": 0.741860032081604, "fcm_dpo/delta": 0.07353915274143219, "fcm_dpo/margin": 0.7147952914237976, "fcm_dpo/q_t": 0.39196473360061646, "grad_norm": 181.0109405517578, "learning_rate": 4.745824613468292e-07, "logits/chosen": 0.15605072677135468, "logits/rejected": 0.1525098979473114, "logps/chosen": -60.95063400268555, "logps/ref_chosen": -59.222537994384766, "logps/ref_rejected": -64.19131469726562, "logps/rejected": -66.63421630859375, "loss": 1.1976, "margin_dpo/margin_mean": 0.7147954702377319, "margin_dpo/margin_std": 1.5419955253601074, "step": 154 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.7455950975418091, "fcm_dpo/delta": 0.033561088144779205, "fcm_dpo/margin": 0.7621853351593018, "fcm_dpo/q_t": 0.3880825638771057, "grad_norm": 202.052490234375, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.17025849223136902, "logits/rejected": 0.15225285291671753, "logps/chosen": -70.11236572265625, "logps/ref_chosen": -68.45469665527344, "logps/ref_rejected": -77.91763305664062, "logps/rejected": -80.33748626708984, "loss": 1.1799, "margin_dpo/margin_mean": 0.7621854543685913, "margin_dpo/margin_std": 1.5514647960662842, "step": 155 }, { "epoch": 0.23582766439909297, "fcm_dpo/beta": 0.7278070449829102, "fcm_dpo/delta": -0.19898098707199097, "fcm_dpo/margin": 1.0765371322631836, "fcm_dpo/q_t": 0.3450552225112915, "grad_norm": 177.1094970703125, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 0.12135711312294006, "logits/rejected": 0.08175168931484222, "logps/chosen": -68.89403533935547, "logps/ref_chosen": -67.26959991455078, "logps/ref_rejected": -86.95914459228516, "logps/rejected": -89.66011047363281, "loss": 0.9381, "margin_dpo/margin_mean": 1.0765368938446045, "margin_dpo/margin_std": 1.3415881395339966, "step": 156 }, { "epoch": 0.23733938019652306, "fcm_dpo/beta": 0.7118189930915833, "fcm_dpo/delta": -0.0019494742155075073, "fcm_dpo/margin": 0.8422449827194214, "fcm_dpo/q_t": 0.37625086307525635, "grad_norm": 170.37062072753906, "learning_rate": 4.728116273823847e-07, "logits/chosen": 0.10696057975292206, "logits/rejected": 0.08737646043300629, "logps/chosen": -56.2518424987793, "logps/ref_chosen": -54.77287292480469, "logps/ref_rejected": -63.87866973876953, "logps/rejected": -66.19988250732422, "loss": 1.068, "margin_dpo/margin_mean": 0.8422449827194214, "margin_dpo/margin_std": 1.3212616443634033, "step": 157 }, { "epoch": 0.23885109599395313, "fcm_dpo/beta": 0.7147585153579712, "fcm_dpo/delta": -0.01963052526116371, "fcm_dpo/margin": 0.8647520542144775, "fcm_dpo/q_t": 0.3729850649833679, "grad_norm": 183.34059143066406, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 0.12363462150096893, "logits/rejected": 0.09393608570098877, "logps/chosen": -66.52328491210938, "logps/ref_chosen": -64.92271423339844, "logps/ref_rejected": -82.23789978027344, "logps/rejected": -84.70321655273438, "loss": 1.0499, "margin_dpo/margin_mean": 0.8647524118423462, "margin_dpo/margin_std": 1.3220181465148926, "step": 158 }, { "epoch": 0.24036281179138322, "fcm_dpo/beta": 0.721579909324646, "fcm_dpo/delta": -0.027753673493862152, "fcm_dpo/margin": 0.864780068397522, "fcm_dpo/q_t": 0.36861127614974976, "grad_norm": 200.0271453857422, "learning_rate": 4.715998812855304e-07, "logits/chosen": 0.15346220135688782, "logits/rejected": 0.12215955555438995, "logps/chosen": -58.74644470214844, "logps/ref_chosen": -57.046993255615234, "logps/ref_rejected": -73.32441711425781, "logps/rejected": -75.88864135742188, "loss": 1.0532, "margin_dpo/margin_mean": 0.8647797107696533, "margin_dpo/margin_std": 1.29610013961792, "step": 159 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.7163674831390381, "fcm_dpo/delta": 0.023492824286222458, "fcm_dpo/margin": 0.8065335154533386, "fcm_dpo/q_t": 0.38599950075149536, "grad_norm": 164.5467071533203, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.02466520667076111, "logits/rejected": -0.009153635241091251, "logps/chosen": -51.687294006347656, "logps/ref_chosen": -49.806915283203125, "logps/ref_rejected": -68.3370132446289, "logps/rejected": -71.02392578125, "loss": 1.0989, "margin_dpo/margin_mean": 0.806533694267273, "margin_dpo/margin_std": 1.4155818223953247, "step": 160 }, { "epoch": 0.24338624338624337, "fcm_dpo/beta": 0.7120651006698608, "fcm_dpo/delta": -0.07392075657844543, "fcm_dpo/margin": 0.9392312169075012, "fcm_dpo/q_t": 0.37427765130996704, "grad_norm": 163.46099853515625, "learning_rate": 4.703633408618955e-07, "logits/chosen": 0.13712120056152344, "logits/rejected": 0.1043066680431366, "logps/chosen": -54.344390869140625, "logps/ref_chosen": -52.50048828125, "logps/ref_rejected": -66.04540252685547, "logps/rejected": -68.82853698730469, "loss": 1.066, "margin_dpo/margin_mean": 0.9392315149307251, "margin_dpo/margin_std": 1.528224229812622, "step": 161 }, { "epoch": 0.24489795918367346, "fcm_dpo/beta": 0.674642026424408, "fcm_dpo/delta": -0.27861732244491577, "fcm_dpo/margin": 1.2653286457061768, "fcm_dpo/q_t": 0.3272291421890259, "grad_norm": 152.6450653076172, "learning_rate": 4.697358159051549e-07, "logits/chosen": 0.18720999360084534, "logits/rejected": 0.14557072520256042, "logps/chosen": -71.4220962524414, "logps/ref_chosen": -69.46919250488281, "logps/ref_rejected": -92.00952911376953, "logps/rejected": -95.22775268554688, "loss": 0.9046, "margin_dpo/margin_mean": 1.2653292417526245, "margin_dpo/margin_std": 1.4768035411834717, "step": 162 }, { "epoch": 0.24640967498110355, "fcm_dpo/beta": 0.6542046070098877, "fcm_dpo/delta": -0.20735059678554535, "fcm_dpo/margin": 1.2088186740875244, "fcm_dpo/q_t": 0.34692829847335815, "grad_norm": 157.87530517578125, "learning_rate": 4.691021444652876e-07, "logits/chosen": 0.12066847085952759, "logits/rejected": 0.08132156729698181, "logps/chosen": -52.40902328491211, "logps/ref_chosen": -50.613834381103516, "logps/ref_rejected": -74.62033081054688, "logps/rejected": -77.62433624267578, "loss": 0.9751, "margin_dpo/margin_mean": 1.2088189125061035, "margin_dpo/margin_std": 1.6171760559082031, "step": 163 }, { "epoch": 0.24792139077853365, "fcm_dpo/beta": 0.6249392032623291, "fcm_dpo/delta": -0.18022370338439941, "fcm_dpo/margin": 1.226841926574707, "fcm_dpo/q_t": 0.3479166030883789, "grad_norm": 136.3046112060547, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 0.11843805015087128, "logits/rejected": 0.0651586502790451, "logps/chosen": -56.87559509277344, "logps/ref_chosen": -54.848114013671875, "logps/ref_rejected": -79.0630111694336, "logps/rejected": -82.31733703613281, "loss": 0.9642, "margin_dpo/margin_mean": 1.226841926574707, "margin_dpo/margin_std": 1.567777395248413, "step": 164 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.6082509160041809, "fcm_dpo/delta": -0.1315651535987854, "fcm_dpo/margin": 1.187391757965088, "fcm_dpo/q_t": 0.34501856565475464, "grad_norm": 124.84252166748047, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.15890663862228394, "logits/rejected": 0.11084471642971039, "logps/chosen": -53.137123107910156, "logps/ref_chosen": -51.089210510253906, "logps/ref_rejected": -71.23370361328125, "logps/rejected": -74.46900177001953, "loss": 0.9169, "margin_dpo/margin_mean": 1.1873915195465088, "margin_dpo/margin_std": 1.251267433166504, "step": 165 }, { "epoch": 0.2509448223733938, "fcm_dpo/beta": 0.610527753829956, "fcm_dpo/delta": 0.0991782620549202, "fcm_dpo/margin": 0.8285017609596252, "fcm_dpo/q_t": 0.3985018730163574, "grad_norm": 163.606689453125, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 0.12126700580120087, "logits/rejected": 0.05116554722189903, "logps/chosen": -65.15718841552734, "logps/ref_chosen": -63.19081115722656, "logps/ref_rejected": -93.8402099609375, "logps/rejected": -96.63508605957031, "loss": 1.1193, "margin_dpo/margin_mean": 0.8285011053085327, "margin_dpo/margin_std": 1.486143946647644, "step": 166 }, { "epoch": 0.25245653817082386, "fcm_dpo/beta": 0.5978639721870422, "fcm_dpo/delta": -0.133498877286911, "fcm_dpo/margin": 1.2082691192626953, "fcm_dpo/q_t": 0.3483770489692688, "grad_norm": 123.5165023803711, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 0.07217014580965042, "logits/rejected": 0.041656773537397385, "logps/chosen": -60.747589111328125, "logps/ref_chosen": -58.92427062988281, "logps/ref_rejected": -72.97377014160156, "logps/rejected": -76.00535583496094, "loss": 0.9345, "margin_dpo/margin_mean": 1.208269476890564, "margin_dpo/margin_std": 1.390377163887024, "step": 167 }, { "epoch": 0.25396825396825395, "fcm_dpo/beta": 0.6161779165267944, "fcm_dpo/delta": 0.20701685547828674, "fcm_dpo/margin": 0.6512250900268555, "fcm_dpo/q_t": 0.4141233265399933, "grad_norm": 171.8645477294922, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 0.1256096065044403, "logits/rejected": 0.09992053359746933, "logps/chosen": -67.95152282714844, "logps/ref_chosen": -65.65138244628906, "logps/ref_rejected": -79.71418762207031, "logps/rejected": -82.66555786132812, "loss": 1.1715, "margin_dpo/margin_mean": 0.651225209236145, "margin_dpo/margin_std": 1.301137089729309, "step": 168 }, { "epoch": 0.25547996976568405, "fcm_dpo/beta": 0.6163840889930725, "fcm_dpo/delta": -0.07705948501825333, "fcm_dpo/margin": 1.090137004852295, "fcm_dpo/q_t": 0.372364342212677, "grad_norm": 154.63226318359375, "learning_rate": 4.651720442612075e-07, "logits/chosen": 0.17598003149032593, "logits/rejected": 0.1469280868768692, "logps/chosen": -63.419395446777344, "logps/ref_chosen": -61.425865173339844, "logps/ref_rejected": -76.09590148925781, "logps/rejected": -79.1795654296875, "loss": 1.0473, "margin_dpo/margin_mean": 1.090137243270874, "margin_dpo/margin_std": 1.7698707580566406, "step": 169 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.6154909133911133, "fcm_dpo/delta": 0.023734763264656067, "fcm_dpo/margin": 0.9384795427322388, "fcm_dpo/q_t": 0.3792095482349396, "grad_norm": 138.38641357421875, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.11248569190502167, "logits/rejected": 0.11067883670330048, "logps/chosen": -58.78153610229492, "logps/ref_chosen": -56.65319061279297, "logps/ref_rejected": -63.45965576171875, "logps/rejected": -66.52647399902344, "loss": 1.0942, "margin_dpo/margin_mean": 0.9384795427322388, "margin_dpo/margin_std": 1.5680842399597168, "step": 170 }, { "epoch": 0.2585034013605442, "fcm_dpo/beta": 0.601055383682251, "fcm_dpo/delta": -0.09785895049571991, "fcm_dpo/margin": 1.1450403928756714, "fcm_dpo/q_t": 0.36030152440071106, "grad_norm": 157.53836059570312, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 0.16954882442951202, "logits/rejected": 0.12765035033226013, "logps/chosen": -65.85299682617188, "logps/ref_chosen": -63.73476028442383, "logps/ref_rejected": -78.50328063964844, "logps/rejected": -81.76654815673828, "loss": 1.0409, "margin_dpo/margin_mean": 1.1450397968292236, "margin_dpo/margin_std": 1.7091573476791382, "step": 171 }, { "epoch": 0.2600151171579743, "fcm_dpo/beta": 0.5899873971939087, "fcm_dpo/delta": -0.2224036306142807, "fcm_dpo/margin": 1.3640680313110352, "fcm_dpo/q_t": 0.3370872139930725, "grad_norm": 118.71820068359375, "learning_rate": 4.631254907558365e-07, "logits/chosen": 0.20709475874900818, "logits/rejected": 0.1604321449995041, "logps/chosen": -54.41551971435547, "logps/ref_chosen": -52.201759338378906, "logps/ref_rejected": -82.85285949707031, "logps/rejected": -86.43069458007812, "loss": 0.9441, "margin_dpo/margin_mean": 1.3640680313110352, "margin_dpo/margin_std": 1.687800407409668, "step": 172 }, { "epoch": 0.2615268329554044, "fcm_dpo/beta": 0.5607134103775024, "fcm_dpo/delta": -0.10873574763536453, "fcm_dpo/margin": 1.2412900924682617, "fcm_dpo/q_t": 0.36867547035217285, "grad_norm": 128.23153686523438, "learning_rate": 4.624313574873786e-07, "logits/chosen": 0.18149101734161377, "logits/rejected": 0.10862697660923004, "logps/chosen": -57.77250671386719, "logps/ref_chosen": -55.434722900390625, "logps/ref_rejected": -77.81967163085938, "logps/rejected": -81.39875793457031, "loss": 1.0666, "margin_dpo/margin_mean": 1.2412903308868408, "margin_dpo/margin_std": 1.9681799411773682, "step": 173 }, { "epoch": 0.26303854875283444, "fcm_dpo/beta": 0.5515092015266418, "fcm_dpo/delta": -0.16861163079738617, "fcm_dpo/margin": 1.3705697059631348, "fcm_dpo/q_t": 0.3501899838447571, "grad_norm": 139.6669158935547, "learning_rate": 4.61731282057198e-07, "logits/chosen": 0.17310944199562073, "logits/rejected": 0.11688442528247833, "logps/chosen": -59.5833740234375, "logps/ref_chosen": -57.17195129394531, "logps/ref_rejected": -85.47578430175781, "logps/rejected": -89.25776672363281, "loss": 1.0024, "margin_dpo/margin_mean": 1.3705697059631348, "margin_dpo/margin_std": 1.9443564414978027, "step": 174 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.529313862323761, "fcm_dpo/delta": -0.20316217839717865, "fcm_dpo/margin": 1.4862399101257324, "fcm_dpo/q_t": 0.3484209477901459, "grad_norm": 134.76806640625, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.18298515677452087, "logits/rejected": 0.15775969624519348, "logps/chosen": -70.1815185546875, "logps/ref_chosen": -67.6656265258789, "logps/ref_rejected": -84.36766815185547, "logps/rejected": -88.36979675292969, "loss": 1.0048, "margin_dpo/margin_mean": 1.4862403869628906, "margin_dpo/margin_std": 2.140425682067871, "step": 175 }, { "epoch": 0.2660619803476946, "fcm_dpo/beta": 0.522968053817749, "fcm_dpo/delta": 0.002730097621679306, "fcm_dpo/margin": 1.1418085098266602, "fcm_dpo/q_t": 0.3858110308647156, "grad_norm": 153.61614990234375, "learning_rate": 4.603133832077953e-07, "logits/chosen": 0.11803459376096725, "logits/rejected": 0.09647019952535629, "logps/chosen": -80.58767700195312, "logps/ref_chosen": -77.8587646484375, "logps/ref_rejected": -81.08732604980469, "logps/rejected": -84.95805358886719, "loss": 1.1419, "margin_dpo/margin_mean": 1.141808271408081, "margin_dpo/margin_std": 2.209990978240967, "step": 176 }, { "epoch": 0.2675736961451247, "fcm_dpo/beta": 0.49255213141441345, "fcm_dpo/delta": -0.38714170455932617, "fcm_dpo/margin": 1.9227688312530518, "fcm_dpo/q_t": 0.30820608139038086, "grad_norm": 130.02647399902344, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 0.26098543405532837, "logits/rejected": 0.17869731783866882, "logps/chosen": -57.80853271484375, "logps/ref_chosen": -55.22039794921875, "logps/ref_rejected": -92.54973602294922, "logps/rejected": -97.0606460571289, "loss": 0.8372, "margin_dpo/margin_mean": 1.9227689504623413, "margin_dpo/margin_std": 2.0203702449798584, "step": 177 }, { "epoch": 0.2690854119425548, "fcm_dpo/beta": 0.484801709651947, "fcm_dpo/delta": 0.052113160490989685, "fcm_dpo/margin": 1.1327934265136719, "fcm_dpo/q_t": 0.3832756578922272, "grad_norm": 118.76311492919922, "learning_rate": 4.588719528532341e-07, "logits/chosen": 0.14650282263755798, "logits/rejected": 0.10431109368801117, "logps/chosen": -63.50492858886719, "logps/ref_chosen": -60.81049346923828, "logps/ref_rejected": -81.12973022460938, "logps/rejected": -84.95695495605469, "loss": 1.0522, "margin_dpo/margin_mean": 1.1327934265136719, "margin_dpo/margin_std": 1.5966503620147705, "step": 178 }, { "epoch": 0.2705971277399849, "fcm_dpo/beta": 0.49064087867736816, "fcm_dpo/delta": -0.036107227206230164, "fcm_dpo/margin": 1.2913639545440674, "fcm_dpo/q_t": 0.3769165277481079, "grad_norm": 124.46369171142578, "learning_rate": 4.581424636586928e-07, "logits/chosen": 0.2028494030237198, "logits/rejected": 0.18809382617473602, "logps/chosen": -68.69247436523438, "logps/ref_chosen": -65.67171478271484, "logps/ref_rejected": -75.32586669921875, "logps/rejected": -79.63799285888672, "loss": 1.0848, "margin_dpo/margin_mean": 1.2913641929626465, "margin_dpo/margin_std": 2.173964500427246, "step": 179 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.4949728846549988, "fcm_dpo/delta": 0.06321872025728226, "fcm_dpo/margin": 1.0894110202789307, "fcm_dpo/q_t": 0.39300286769866943, "grad_norm": 128.481201171875, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.03600196912884712, "logits/rejected": 0.019377566874027252, "logps/chosen": -59.216148376464844, "logps/ref_chosen": -56.68280792236328, "logps/ref_rejected": -64.94414520263672, "logps/rejected": -68.56689453125, "loss": 1.1358, "margin_dpo/margin_mean": 1.0894112586975098, "margin_dpo/margin_std": 2.019700765609741, "step": 180 }, { "epoch": 0.273620559334845, "fcm_dpo/beta": 0.4756387174129486, "fcm_dpo/delta": -0.2628810703754425, "fcm_dpo/margin": 1.768264651298523, "fcm_dpo/q_t": 0.32634565234184265, "grad_norm": 107.11132049560547, "learning_rate": 4.566660392614228e-07, "logits/chosen": 0.1837688833475113, "logits/rejected": 0.15156866610050201, "logps/chosen": -63.249237060546875, "logps/ref_chosen": -60.77604675292969, "logps/ref_rejected": -83.98361206054688, "logps/rejected": -88.22506713867188, "loss": 0.8683, "margin_dpo/margin_mean": 1.768264651298523, "margin_dpo/margin_std": 1.8696491718292236, "step": 181 }, { "epoch": 0.2751322751322751, "fcm_dpo/beta": 0.45705896615982056, "fcm_dpo/delta": -0.14843697845935822, "fcm_dpo/margin": 1.6124354600906372, "fcm_dpo/q_t": 0.3526133894920349, "grad_norm": 119.8134765625, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 0.1729632019996643, "logits/rejected": 0.10781346261501312, "logps/chosen": -63.12035369873047, "logps/ref_chosen": -60.2537841796875, "logps/ref_rejected": -89.7706298828125, "logps/rejected": -94.2496337890625, "loss": 1.0022, "margin_dpo/margin_mean": 1.6124353408813477, "margin_dpo/margin_std": 2.2613399028778076, "step": 182 }, { "epoch": 0.2766439909297052, "fcm_dpo/beta": 0.4692537188529968, "fcm_dpo/delta": 0.23354226350784302, "fcm_dpo/margin": 0.8008232712745667, "fcm_dpo/q_t": 0.41931334137916565, "grad_norm": 124.27472686767578, "learning_rate": 4.551664914523433e-07, "logits/chosen": 0.17078173160552979, "logits/rejected": 0.15395784378051758, "logps/chosen": -65.33309173583984, "logps/ref_chosen": -61.76142120361328, "logps/ref_rejected": -72.54627990722656, "logps/rejected": -76.91877746582031, "loss": 1.2418, "margin_dpo/margin_mean": 0.8008227348327637, "margin_dpo/margin_std": 1.9933383464813232, "step": 183 }, { "epoch": 0.2781557067271353, "fcm_dpo/beta": 0.46749451756477356, "fcm_dpo/delta": -0.054233402013778687, "fcm_dpo/margin": 1.3855884075164795, "fcm_dpo/q_t": 0.36680716276168823, "grad_norm": 91.9602279663086, "learning_rate": 4.544080985994258e-07, "logits/chosen": 0.2527492642402649, "logits/rejected": 0.1999446451663971, "logps/chosen": -49.69728469848633, "logps/ref_chosen": -46.840721130371094, "logps/ref_rejected": -69.3609390258789, "logps/rejected": -73.60308837890625, "loss": 0.983, "margin_dpo/margin_mean": 1.3855886459350586, "margin_dpo/margin_std": 1.710758924484253, "step": 184 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.4638601243495941, "fcm_dpo/delta": -0.064957395195961, "fcm_dpo/margin": 1.4216983318328857, "fcm_dpo/q_t": 0.37293511629104614, "grad_norm": 105.73377990722656, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.17072643339633942, "logits/rejected": 0.1306096911430359, "logps/chosen": -55.55766677856445, "logps/ref_chosen": -52.32114028930664, "logps/ref_rejected": -68.3885726928711, "logps/rejected": -73.04679870605469, "loss": 1.1099, "margin_dpo/margin_mean": 1.4216983318328857, "margin_dpo/margin_std": 2.5109610557556152, "step": 185 }, { "epoch": 0.2811791383219955, "fcm_dpo/beta": 0.46233463287353516, "fcm_dpo/delta": -0.024930700659751892, "fcm_dpo/margin": 1.3478795289993286, "fcm_dpo/q_t": 0.37602150440216064, "grad_norm": 126.88275146484375, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 0.12501423060894012, "logits/rejected": 0.09016162157058716, "logps/chosen": -70.59927368164062, "logps/ref_chosen": -67.42012786865234, "logps/ref_rejected": -82.50968933105469, "logps/rejected": -87.03671264648438, "loss": 1.0736, "margin_dpo/margin_mean": 1.3478801250457764, "margin_dpo/margin_std": 2.204350233078003, "step": 186 }, { "epoch": 0.28269085411942557, "fcm_dpo/beta": 0.4492034614086151, "fcm_dpo/delta": -0.14501912891864777, "fcm_dpo/margin": 1.629001259803772, "fcm_dpo/q_t": 0.3524951934814453, "grad_norm": 132.88339233398438, "learning_rate": 4.520986992917297e-07, "logits/chosen": 0.14765876531600952, "logits/rejected": 0.09966124594211578, "logps/chosen": -78.94313049316406, "logps/ref_chosen": -75.52549743652344, "logps/ref_rejected": -94.76289367675781, "logps/rejected": -99.80952453613281, "loss": 1.0305, "margin_dpo/margin_mean": 1.6290010213851929, "margin_dpo/margin_std": 2.3944525718688965, "step": 187 }, { "epoch": 0.2842025699168556, "fcm_dpo/beta": 0.4454974830150604, "fcm_dpo/delta": -0.04850031062960625, "fcm_dpo/margin": 1.4481823444366455, "fcm_dpo/q_t": 0.36388927698135376, "grad_norm": 128.6500701904297, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 0.17862358689308167, "logits/rejected": 0.14158707857131958, "logps/chosen": -74.60177612304688, "logps/ref_chosen": -71.52333068847656, "logps/ref_rejected": -78.29949951171875, "logps/rejected": -82.82612609863281, "loss": 1.0798, "margin_dpo/margin_mean": 1.4481827020645142, "margin_dpo/margin_std": 2.430912733078003, "step": 188 }, { "epoch": 0.2857142857142857, "fcm_dpo/beta": 0.4300735890865326, "fcm_dpo/delta": -0.1480191946029663, "fcm_dpo/margin": 1.7021592855453491, "fcm_dpo/q_t": 0.349149227142334, "grad_norm": 105.27059936523438, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 0.23444759845733643, "logits/rejected": 0.2442852109670639, "logps/chosen": -75.18251037597656, "logps/ref_chosen": -72.17626953125, "logps/ref_rejected": -75.26313781738281, "logps/rejected": -79.97154235839844, "loss": 0.9592, "margin_dpo/margin_mean": 1.7021600008010864, "margin_dpo/margin_std": 2.0677247047424316, "step": 189 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.41945913434028625, "fcm_dpo/delta": -0.2064790427684784, "fcm_dpo/margin": 1.8837922811508179, "fcm_dpo/q_t": 0.3407885730266571, "grad_norm": 102.42926025390625, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.23205827176570892, "logits/rejected": 0.11382026970386505, "logps/chosen": -57.74479293823242, "logps/ref_chosen": -54.624271392822266, "logps/ref_rejected": -101.47068786621094, "logps/rejected": -106.47500610351562, "loss": 0.9484, "margin_dpo/margin_mean": 1.8837926387786865, "margin_dpo/margin_std": 2.3639721870422363, "step": 190 }, { "epoch": 0.2887377173091459, "fcm_dpo/beta": 0.4082399904727936, "fcm_dpo/delta": -0.1050073504447937, "fcm_dpo/margin": 1.7088083028793335, "fcm_dpo/q_t": 0.35841071605682373, "grad_norm": 117.19302368164062, "learning_rate": 4.48940460132708e-07, "logits/chosen": 0.24298545718193054, "logits/rejected": 0.22123336791992188, "logps/chosen": -76.58650207519531, "logps/ref_chosen": -72.93251037597656, "logps/ref_rejected": -89.95103454589844, "logps/rejected": -95.31382751464844, "loss": 1.0019, "margin_dpo/margin_mean": 1.708808422088623, "margin_dpo/margin_std": 2.35361909866333, "step": 191 }, { "epoch": 0.29024943310657597, "fcm_dpo/beta": 0.41070353984832764, "fcm_dpo/delta": 0.06258545815944672, "fcm_dpo/margin": 1.31644868850708, "fcm_dpo/q_t": 0.3867712914943695, "grad_norm": 88.0949935913086, "learning_rate": 4.481369327558329e-07, "logits/chosen": 0.23867926001548767, "logits/rejected": 0.21680811047554016, "logps/chosen": -57.586605072021484, "logps/ref_chosen": -54.001121520996094, "logps/ref_rejected": -63.531551361083984, "logps/rejected": -68.43347930908203, "loss": 1.0851, "margin_dpo/margin_mean": 1.31644868850708, "margin_dpo/margin_std": 2.15932559967041, "step": 192 }, { "epoch": 0.29176114890400606, "fcm_dpo/beta": 0.4014725089073181, "fcm_dpo/delta": -0.1198146715760231, "fcm_dpo/margin": 1.7677130699157715, "fcm_dpo/q_t": 0.35724347829818726, "grad_norm": 87.17979431152344, "learning_rate": 4.47327863063023e-07, "logits/chosen": 0.14901183545589447, "logits/rejected": 0.12912751734256744, "logps/chosen": -60.38057327270508, "logps/ref_chosen": -56.74927520751953, "logps/ref_rejected": -58.80629348754883, "logps/rejected": -64.20530700683594, "loss": 0.9639, "margin_dpo/margin_mean": 1.767713189125061, "margin_dpo/margin_std": 2.299346446990967, "step": 193 }, { "epoch": 0.29327286470143615, "fcm_dpo/beta": 0.3976183533668518, "fcm_dpo/delta": -0.013770565390586853, "fcm_dpo/margin": 1.537994146347046, "fcm_dpo/q_t": 0.3772721290588379, "grad_norm": 91.97148132324219, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 0.21334470808506012, "logits/rejected": 0.18826696276664734, "logps/chosen": -60.034690856933594, "logps/ref_chosen": -56.64944076538086, "logps/ref_rejected": -69.98954772949219, "logps/rejected": -74.91279602050781, "loss": 1.0845, "margin_dpo/margin_mean": 1.537994623184204, "margin_dpo/margin_std": 2.5277225971221924, "step": 194 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.393227219581604, "fcm_dpo/delta": -0.05794385075569153, "fcm_dpo/margin": 1.6573469638824463, "fcm_dpo/q_t": 0.3646981716156006, "grad_norm": 98.00434112548828, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.13874436914920807, "logits/rejected": 0.13722333312034607, "logps/chosen": -74.5829849243164, "logps/ref_chosen": -70.40977478027344, "logps/ref_rejected": -74.39448547363281, "logps/rejected": -80.22503662109375, "loss": 1.0265, "margin_dpo/margin_mean": 1.6573466062545776, "margin_dpo/margin_std": 2.350985050201416, "step": 195 }, { "epoch": 0.2962962962962963, "fcm_dpo/beta": 0.39594680070877075, "fcm_dpo/delta": -0.05085838586091995, "fcm_dpo/margin": 1.6336736679077148, "fcm_dpo/q_t": 0.36142659187316895, "grad_norm": 82.5360107421875, "learning_rate": 4.448676271745197e-07, "logits/chosen": 0.22305762767791748, "logits/rejected": 0.18741697072982788, "logps/chosen": -62.90294647216797, "logps/ref_chosen": -59.227577209472656, "logps/ref_rejected": -83.54757690429688, "logps/rejected": -88.85661315917969, "loss": 1.029, "margin_dpo/margin_mean": 1.6336736679077148, "margin_dpo/margin_std": 2.302030563354492, "step": 196 }, { "epoch": 0.29780801209372637, "fcm_dpo/beta": 0.3870530128479004, "fcm_dpo/delta": -0.17038501799106598, "fcm_dpo/margin": 1.954482078552246, "fcm_dpo/q_t": 0.3541423976421356, "grad_norm": 97.38890075683594, "learning_rate": 4.440366160729392e-07, "logits/chosen": 0.2841119170188904, "logits/rejected": 0.2431061565876007, "logps/chosen": -55.41398620605469, "logps/ref_chosen": -51.52912902832031, "logps/ref_rejected": -73.70631408691406, "logps/rejected": -79.545654296875, "loss": 1.0967, "margin_dpo/margin_mean": 1.9544826745986938, "margin_dpo/margin_std": 3.2290852069854736, "step": 197 }, { "epoch": 0.29931972789115646, "fcm_dpo/beta": 0.3700205683708191, "fcm_dpo/delta": -0.21359995007514954, "fcm_dpo/margin": 2.1537539958953857, "fcm_dpo/q_t": 0.3357986807823181, "grad_norm": 87.0055923461914, "learning_rate": 4.432001773500957e-07, "logits/chosen": 0.24669349193572998, "logits/rejected": 0.2129189372062683, "logps/chosen": -63.674686431884766, "logps/ref_chosen": -59.78268051147461, "logps/ref_rejected": -72.24533081054688, "logps/rejected": -78.29109191894531, "loss": 0.9107, "margin_dpo/margin_mean": 2.1537539958953857, "margin_dpo/margin_std": 2.4757540225982666, "step": 198 }, { "epoch": 0.30083144368858655, "fcm_dpo/beta": 0.3673195540904999, "fcm_dpo/delta": 0.0151963010430336, "fcm_dpo/margin": 1.5892671346664429, "fcm_dpo/q_t": 0.3766128718852997, "grad_norm": 93.22285461425781, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 0.19999980926513672, "logits/rejected": 0.12446457147598267, "logps/chosen": -60.5451545715332, "logps/ref_chosen": -56.38677215576172, "logps/ref_rejected": -74.56779479980469, "logps/rejected": -80.31544494628906, "loss": 1.1037, "margin_dpo/margin_mean": 1.5892664194107056, "margin_dpo/margin_std": 2.644639492034912, "step": 199 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.3473024368286133, "fcm_dpo/delta": -0.2598249912261963, "fcm_dpo/margin": 2.40299654006958, "fcm_dpo/q_t": 0.3380799889564514, "grad_norm": 84.88159942626953, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.2574974298477173, "logits/rejected": 0.19607925415039062, "logps/chosen": -61.37678909301758, "logps/ref_chosen": -57.82432556152344, "logps/ref_rejected": -89.28246307373047, "logps/rejected": -95.23792266845703, "loss": 0.9546, "margin_dpo/margin_mean": 2.40299654006958, "margin_dpo/margin_std": 3.1777901649475098, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.34564897418022156, "eval_logits/chosen": 0.21787600219249725, "eval_logits/rejected": 0.1796640008687973, "eval_logps/chosen": -78.88257598876953, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -85.223876953125, "eval_loss": 0.5604943037033081, "eval_margin_dpo/margin_mean": 1.6517823934555054, "eval_margin_dpo/margin_std": 2.969238042831421, "eval_runtime": 38.0736, "eval_samples_per_second": 60.488, "eval_steps_per_second": 1.891, "step": 200 }, { "epoch": 0.30385487528344673, "fcm_dpo/beta": 0.34147608280181885, "fcm_dpo/delta": -0.05875827372074127, "fcm_dpo/margin": 1.9166682958602905, "fcm_dpo/q_t": 0.3675483465194702, "grad_norm": 89.931884765625, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 0.26193559169769287, "logits/rejected": 0.22425445914268494, "logps/chosen": -63.281890869140625, "logps/ref_chosen": -58.999759674072266, "logps/ref_rejected": -84.67575073242188, "logps/rejected": -90.87454223632812, "loss": 1.0029, "margin_dpo/margin_mean": 1.916668176651001, "margin_dpo/margin_std": 2.625683307647705, "step": 201 }, { "epoch": 0.30536659108087677, "fcm_dpo/beta": 0.33684271574020386, "fcm_dpo/delta": -0.1786990910768509, "fcm_dpo/margin": 2.268734931945801, "fcm_dpo/q_t": 0.34659096598625183, "grad_norm": 71.41585540771484, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 0.16486480832099915, "logits/rejected": 0.10804621875286102, "logps/chosen": -51.65721893310547, "logps/ref_chosen": -47.660648345947266, "logps/ref_rejected": -73.63249969482422, "logps/rejected": -79.89779663085938, "loss": 0.9641, "margin_dpo/margin_mean": 2.268734931945801, "margin_dpo/margin_std": 2.9032340049743652, "step": 202 }, { "epoch": 0.30687830687830686, "fcm_dpo/beta": 0.33270418643951416, "fcm_dpo/delta": 0.032331258058547974, "fcm_dpo/margin": 1.710151195526123, "fcm_dpo/q_t": 0.3836914598941803, "grad_norm": 91.3531265258789, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 0.26537784934043884, "logits/rejected": 0.20603656768798828, "logps/chosen": -66.40350341796875, "logps/ref_chosen": -62.32553482055664, "logps/ref_rejected": -99.37226104736328, "logps/rejected": -105.16038513183594, "loss": 1.0735, "margin_dpo/margin_mean": 1.7101507186889648, "margin_dpo/margin_std": 2.758167266845703, "step": 203 }, { "epoch": 0.30839002267573695, "fcm_dpo/beta": 0.3235759139060974, "fcm_dpo/delta": -0.05550873279571533, "fcm_dpo/margin": 1.9951179027557373, "fcm_dpo/q_t": 0.37603023648262024, "grad_norm": 76.90956115722656, "learning_rate": 4.380688857426449e-07, "logits/chosen": 0.18045659363269806, "logits/rejected": 0.12303562462329865, "logps/chosen": -55.126976013183594, "logps/ref_chosen": -50.62931823730469, "logps/ref_rejected": -66.60475158691406, "logps/rejected": -73.0975341796875, "loss": 1.0797, "margin_dpo/margin_mean": 1.995117425918579, "margin_dpo/margin_std": 3.2013866901397705, "step": 204 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.3267316222190857, "fcm_dpo/delta": 0.013398218899965286, "fcm_dpo/margin": 1.7957442998886108, "fcm_dpo/q_t": 0.3864005208015442, "grad_norm": 108.05115509033203, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.26472169160842896, "logits/rejected": 0.21426978707313538, "logps/chosen": -75.12602233886719, "logps/ref_chosen": -70.3561782836914, "logps/ref_rejected": -93.39848327636719, "logps/rejected": -99.96406555175781, "loss": 1.1463, "margin_dpo/margin_mean": 1.7957442998886108, "margin_dpo/margin_std": 3.4385950565338135, "step": 205 }, { "epoch": 0.31141345427059713, "fcm_dpo/beta": 0.33613136410713196, "fcm_dpo/delta": 0.07342677563428879, "fcm_dpo/margin": 1.56940758228302, "fcm_dpo/q_t": 0.39384615421295166, "grad_norm": 90.17076110839844, "learning_rate": 4.363161124189387e-07, "logits/chosen": 0.25817301869392395, "logits/rejected": 0.2432713508605957, "logps/chosen": -72.54843139648438, "logps/ref_chosen": -67.64547729492188, "logps/ref_rejected": -79.89584350585938, "logps/rejected": -86.36820220947266, "loss": 1.1686, "margin_dpo/margin_mean": 1.5694081783294678, "margin_dpo/margin_std": 3.069244623184204, "step": 206 }, { "epoch": 0.3129251700680272, "fcm_dpo/beta": 0.32442450523376465, "fcm_dpo/delta": -0.12306281924247742, "fcm_dpo/margin": 2.1917169094085693, "fcm_dpo/q_t": 0.36162668466567993, "grad_norm": 76.4348373413086, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 0.17872723937034607, "logits/rejected": 0.12270534038543701, "logps/chosen": -72.61674499511719, "logps/ref_chosen": -67.66419219970703, "logps/ref_rejected": -85.10249328613281, "logps/rejected": -92.24675750732422, "loss": 1.0183, "margin_dpo/margin_mean": 2.1917169094085693, "margin_dpo/margin_std": 3.2143168449401855, "step": 207 }, { "epoch": 0.3144368858654573, "fcm_dpo/beta": 0.3316575884819031, "fcm_dpo/delta": 0.10988043993711472, "fcm_dpo/margin": 1.4934909343719482, "fcm_dpo/q_t": 0.40299874544143677, "grad_norm": 84.07500457763672, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 0.2049015909433365, "logits/rejected": 0.1825580596923828, "logps/chosen": -62.14056396484375, "logps/ref_chosen": -57.731712341308594, "logps/ref_rejected": -74.19276428222656, "logps/rejected": -80.09510803222656, "loss": 1.1711, "margin_dpo/margin_mean": 1.493491291999817, "margin_dpo/margin_std": 3.0397095680236816, "step": 208 }, { "epoch": 0.31594860166288735, "fcm_dpo/beta": 0.3277115523815155, "fcm_dpo/delta": -0.19000059366226196, "fcm_dpo/margin": 2.3637375831604004, "fcm_dpo/q_t": 0.35109424591064453, "grad_norm": 93.42904663085938, "learning_rate": 4.336479271643833e-07, "logits/chosen": 0.204912006855011, "logits/rejected": 0.1559399664402008, "logps/chosen": -73.3060073852539, "logps/ref_chosen": -68.55007934570312, "logps/ref_rejected": -87.90541076660156, "logps/rejected": -95.02507781982422, "loss": 1.0241, "margin_dpo/margin_mean": 2.363738536834717, "margin_dpo/margin_std": 3.465147018432617, "step": 209 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.31141048669815063, "fcm_dpo/delta": -0.2101190686225891, "fcm_dpo/margin": 2.548922538757324, "fcm_dpo/q_t": 0.34733301401138306, "grad_norm": 71.60018157958984, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.28091442584991455, "logits/rejected": 0.20208045840263367, "logps/chosen": -62.09604263305664, "logps/ref_chosen": -57.268272399902344, "logps/ref_rejected": -85.72807312011719, "logps/rejected": -93.10476684570312, "loss": 0.9873, "margin_dpo/margin_mean": 2.548922061920166, "margin_dpo/margin_std": 3.552750587463379, "step": 210 }, { "epoch": 0.31897203325774753, "fcm_dpo/beta": 0.30577632784843445, "fcm_dpo/delta": -0.07666610926389694, "fcm_dpo/margin": 2.1951539516448975, "fcm_dpo/q_t": 0.3637697696685791, "grad_norm": 83.09772491455078, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 0.2706531882286072, "logits/rejected": 0.19553887844085693, "logps/chosen": -58.080902099609375, "logps/ref_chosen": -53.640708923339844, "logps/ref_rejected": -93.0387954711914, "logps/rejected": -99.67414855957031, "loss": 0.9942, "margin_dpo/margin_mean": 2.1951539516448975, "margin_dpo/margin_std": 2.999103546142578, "step": 211 }, { "epoch": 0.3204837490551776, "fcm_dpo/beta": 0.2980360984802246, "fcm_dpo/delta": -0.10260573029518127, "fcm_dpo/margin": 2.3334219455718994, "fcm_dpo/q_t": 0.36129042506217957, "grad_norm": 74.72057342529297, "learning_rate": 4.309335095262675e-07, "logits/chosen": 0.25780636072158813, "logits/rejected": 0.19710323214530945, "logps/chosen": -62.407325744628906, "logps/ref_chosen": -57.36674499511719, "logps/ref_rejected": -79.89643096923828, "logps/rejected": -87.27043151855469, "loss": 1.036, "margin_dpo/margin_mean": 2.333421230316162, "margin_dpo/margin_std": 3.5129289627075195, "step": 212 }, { "epoch": 0.3219954648526077, "fcm_dpo/beta": 0.28785935044288635, "fcm_dpo/delta": -0.16928018629550934, "fcm_dpo/margin": 2.624055862426758, "fcm_dpo/q_t": 0.35196423530578613, "grad_norm": 74.51473236083984, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 0.2479405403137207, "logits/rejected": 0.2258269488811493, "logps/chosen": -69.40491485595703, "logps/ref_chosen": -65.22111511230469, "logps/ref_rejected": -80.1810302734375, "logps/rejected": -86.98887634277344, "loss": 1.0117, "margin_dpo/margin_mean": 2.624055862426758, "margin_dpo/margin_std": 3.8042173385620117, "step": 213 }, { "epoch": 0.3235071806500378, "fcm_dpo/beta": 0.28325408697128296, "fcm_dpo/delta": -0.1327255666255951, "fcm_dpo/margin": 2.5523083209991455, "fcm_dpo/q_t": 0.35900163650512695, "grad_norm": 74.05313873291016, "learning_rate": 4.290985500881143e-07, "logits/chosen": 0.14122872054576874, "logits/rejected": 0.11947432160377502, "logps/chosen": -66.09378814697266, "logps/ref_chosen": -61.292327880859375, "logps/ref_rejected": -67.69841003417969, "logps/rejected": -75.05216979980469, "loss": 0.9921, "margin_dpo/margin_mean": 2.552309036254883, "margin_dpo/margin_std": 3.5551910400390625, "step": 214 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.2725156843662262, "fcm_dpo/delta": -0.2077074944972992, "fcm_dpo/margin": 2.905134677886963, "fcm_dpo/q_t": 0.3412568271160126, "grad_norm": 67.4879150390625, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.15792277455329895, "logits/rejected": 0.073989138007164, "logps/chosen": -68.87016296386719, "logps/ref_chosen": -63.869136810302734, "logps/ref_rejected": -98.7657241821289, "logps/rejected": -106.67188262939453, "loss": 0.9368, "margin_dpo/margin_mean": 2.9051353931427, "margin_dpo/margin_std": 3.5075745582580566, "step": 215 }, { "epoch": 0.32653061224489793, "fcm_dpo/beta": 0.2604866623878479, "fcm_dpo/delta": -0.22343488037586212, "fcm_dpo/margin": 3.094121217727661, "fcm_dpo/q_t": 0.3563622832298279, "grad_norm": 61.407466888427734, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 0.2697373032569885, "logits/rejected": 0.1980409026145935, "logps/chosen": -73.16397094726562, "logps/ref_chosen": -67.824951171875, "logps/ref_rejected": -96.40231323242188, "logps/rejected": -104.83544921875, "loss": 0.993, "margin_dpo/margin_mean": 3.094120740890503, "margin_dpo/margin_std": 4.95653772354126, "step": 216 }, { "epoch": 0.328042328042328, "fcm_dpo/beta": 0.24557505548000336, "fcm_dpo/delta": -0.2807563543319702, "fcm_dpo/margin": 3.488009214401245, "fcm_dpo/q_t": 0.32953548431396484, "grad_norm": 58.03193664550781, "learning_rate": 4.26308602680756e-07, "logits/chosen": 0.2073604166507721, "logits/rejected": 0.11603499948978424, "logps/chosen": -66.12384796142578, "logps/ref_chosen": -60.5049934387207, "logps/ref_rejected": -84.26618194580078, "logps/rejected": -93.373046875, "loss": 0.8786, "margin_dpo/margin_mean": 3.488009452819824, "margin_dpo/margin_std": 3.95634126663208, "step": 217 }, { "epoch": 0.3295540438397581, "fcm_dpo/beta": 0.2504267394542694, "fcm_dpo/delta": 0.1592218428850174, "fcm_dpo/margin": 1.7742626667022705, "fcm_dpo/q_t": 0.4074835777282715, "grad_norm": 69.1903076171875, "learning_rate": 4.253687219265803e-07, "logits/chosen": 0.11688175797462463, "logits/rejected": 0.11001837253570557, "logps/chosen": -76.50672912597656, "logps/ref_chosen": -70.59431457519531, "logps/ref_rejected": -73.89038848876953, "logps/rejected": -81.57705688476562, "loss": 1.2216, "margin_dpo/margin_mean": 1.7742631435394287, "margin_dpo/margin_std": 4.000538349151611, "step": 218 }, { "epoch": 0.3310657596371882, "fcm_dpo/beta": 0.2547418475151062, "fcm_dpo/delta": 0.14973081648349762, "fcm_dpo/margin": 1.7954492568969727, "fcm_dpo/q_t": 0.40601488947868347, "grad_norm": 68.71503448486328, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 0.1661921888589859, "logits/rejected": 0.1311841607093811, "logps/chosen": -65.78245544433594, "logps/ref_chosen": -60.490943908691406, "logps/ref_rejected": -75.85001373291016, "logps/rejected": -82.93697357177734, "loss": 1.1538, "margin_dpo/margin_mean": 1.7954490184783936, "margin_dpo/margin_std": 3.453105926513672, "step": 219 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.25497251749038696, "fcm_dpo/delta": -0.04306017607450485, "fcm_dpo/margin": 2.509706974029541, "fcm_dpo/q_t": 0.3725942373275757, "grad_norm": 52.4035530090332, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.25568246841430664, "logits/rejected": 0.20475764572620392, "logps/chosen": -50.56587219238281, "logps/ref_chosen": -45.013397216796875, "logps/ref_rejected": -70.49369812011719, "logps/rejected": -78.55587768554688, "loss": 1.0567, "margin_dpo/margin_mean": 2.509706497192383, "margin_dpo/margin_std": 3.935103416442871, "step": 220 }, { "epoch": 0.3340891912320484, "fcm_dpo/beta": 0.2531549334526062, "fcm_dpo/delta": -0.1000896617770195, "fcm_dpo/margin": 2.737396717071533, "fcm_dpo/q_t": 0.3685762882232666, "grad_norm": 61.76700210571289, "learning_rate": 4.22519752870528e-07, "logits/chosen": 0.23615026473999023, "logits/rejected": 0.18113240599632263, "logps/chosen": -64.33263397216797, "logps/ref_chosen": -59.09584045410156, "logps/ref_rejected": -88.64388275146484, "logps/rejected": -96.61807250976562, "loss": 1.0304, "margin_dpo/margin_mean": 2.737396717071533, "margin_dpo/margin_std": 4.217761993408203, "step": 221 }, { "epoch": 0.3356009070294785, "fcm_dpo/beta": 0.2420484572649002, "fcm_dpo/delta": -0.23359233140945435, "fcm_dpo/margin": 3.366135597229004, "fcm_dpo/q_t": 0.3340812921524048, "grad_norm": 63.084312438964844, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 0.30527517199516296, "logits/rejected": 0.23241345584392548, "logps/chosen": -61.709754943847656, "logps/ref_chosen": -55.9976921081543, "logps/ref_rejected": -111.94727325439453, "logps/rejected": -121.02547454833984, "loss": 0.9189, "margin_dpo/margin_mean": 3.3661351203918457, "margin_dpo/margin_std": 3.9613122940063477, "step": 222 }, { "epoch": 0.3371126228269085, "fcm_dpo/beta": 0.22976723313331604, "fcm_dpo/delta": -0.21587347984313965, "fcm_dpo/margin": 3.46626877784729, "fcm_dpo/q_t": 0.3348970413208008, "grad_norm": 50.928836822509766, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 0.2393488734960556, "logits/rejected": 0.19234737753868103, "logps/chosen": -65.4186019897461, "logps/ref_chosen": -59.891422271728516, "logps/ref_rejected": -86.28954315185547, "logps/rejected": -95.28298950195312, "loss": 0.8931, "margin_dpo/margin_mean": 3.4662694931030273, "margin_dpo/margin_std": 3.742668628692627, "step": 223 }, { "epoch": 0.3386243386243386, "fcm_dpo/beta": 0.2317299097776413, "fcm_dpo/delta": 0.10583598166704178, "fcm_dpo/margin": 2.155870199203491, "fcm_dpo/q_t": 0.39710187911987305, "grad_norm": 62.04116439819336, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 0.2909647822380066, "logits/rejected": 0.2737959623336792, "logps/chosen": -69.97515869140625, "logps/ref_chosen": -64.04463195800781, "logps/ref_rejected": -75.05450439453125, "logps/rejected": -83.14089965820312, "loss": 1.0852, "margin_dpo/margin_mean": 2.1558704376220703, "margin_dpo/margin_std": 3.457667589187622, "step": 224 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.22542855143547058, "fcm_dpo/delta": -0.18165551126003265, "fcm_dpo/margin": 3.39239764213562, "fcm_dpo/q_t": 0.34687235951423645, "grad_norm": 69.0022201538086, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.2634621262550354, "logits/rejected": 0.1664544939994812, "logps/chosen": -72.20433044433594, "logps/ref_chosen": -66.0958251953125, "logps/ref_rejected": -97.68675231933594, "logps/rejected": -107.18766784667969, "loss": 0.955, "margin_dpo/margin_mean": 3.392397403717041, "margin_dpo/margin_std": 4.276772499084473, "step": 225 }, { "epoch": 0.3416477702191988, "fcm_dpo/beta": 0.22262665629386902, "fcm_dpo/delta": -0.07381819188594818, "fcm_dpo/margin": 3.0040860176086426, "fcm_dpo/q_t": 0.3633517026901245, "grad_norm": 46.30059051513672, "learning_rate": 4.176753170773052e-07, "logits/chosen": 0.2735338509082794, "logits/rejected": 0.23284432291984558, "logps/chosen": -57.213523864746094, "logps/ref_chosen": -51.4168701171875, "logps/ref_rejected": -66.30068969726562, "logps/rejected": -75.10142517089844, "loss": 1.0239, "margin_dpo/margin_mean": 3.0040855407714844, "margin_dpo/margin_std": 4.355281829833984, "step": 226 }, { "epoch": 0.3431594860166289, "fcm_dpo/beta": 0.21982993185520172, "fcm_dpo/delta": -0.030905138701200485, "fcm_dpo/margin": 2.857992172241211, "fcm_dpo/q_t": 0.37706050276756287, "grad_norm": 60.83171081542969, "learning_rate": 4.166922501290729e-07, "logits/chosen": 0.31351593136787415, "logits/rejected": 0.2783881425857544, "logps/chosen": -64.13533020019531, "logps/ref_chosen": -57.989776611328125, "logps/ref_rejected": -75.05464172363281, "logps/rejected": -84.05818939208984, "loss": 1.0752, "margin_dpo/margin_mean": 2.857992172241211, "margin_dpo/margin_std": 4.642690658569336, "step": 227 }, { "epoch": 0.34467120181405897, "fcm_dpo/beta": 0.21965107321739197, "fcm_dpo/delta": -0.05902150273323059, "fcm_dpo/margin": 2.9821901321411133, "fcm_dpo/q_t": 0.370593786239624, "grad_norm": 51.929935455322266, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 0.2829626798629761, "logits/rejected": 0.21694326400756836, "logps/chosen": -62.26646423339844, "logps/ref_chosen": -55.55936813354492, "logps/ref_rejected": -77.02364349365234, "logps/rejected": -86.71292877197266, "loss": 1.0521, "margin_dpo/margin_mean": 2.9821906089782715, "margin_dpo/margin_std": 4.6442036628723145, "step": 228 }, { "epoch": 0.34618291761148906, "fcm_dpo/beta": 0.21824893355369568, "fcm_dpo/delta": 0.01088004931807518, "fcm_dpo/margin": 2.7022318840026855, "fcm_dpo/q_t": 0.3750082552433014, "grad_norm": 150.70008850097656, "learning_rate": 4.147121556398312e-07, "logits/chosen": 0.34077906608581543, "logits/rejected": 0.29125073552131653, "logps/chosen": -56.72576904296875, "logps/ref_chosen": -50.79466247558594, "logps/ref_rejected": -78.4474105834961, "logps/rejected": -87.08074951171875, "loss": 1.0911, "margin_dpo/margin_mean": 2.7022314071655273, "margin_dpo/margin_std": 4.491253852844238, "step": 229 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.21833573281764984, "fcm_dpo/delta": -0.08132877200841904, "fcm_dpo/margin": 3.0884265899658203, "fcm_dpo/q_t": 0.36123067140579224, "grad_norm": 57.547664642333984, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.26574379205703735, "logits/rejected": 0.2626535892486572, "logps/chosen": -63.243568420410156, "logps/ref_chosen": -56.729225158691406, "logps/ref_rejected": -62.99180603027344, "logps/rejected": -72.59457397460938, "loss": 1.0252, "margin_dpo/margin_mean": 3.0884273052215576, "margin_dpo/margin_std": 4.373671531677246, "step": 230 }, { "epoch": 0.3492063492063492, "fcm_dpo/beta": 0.2022572159767151, "fcm_dpo/delta": -0.35897156596183777, "fcm_dpo/margin": 4.558291435241699, "fcm_dpo/q_t": 0.30946117639541626, "grad_norm": 49.22685623168945, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 0.20000478625297546, "logits/rejected": 0.18240705132484436, "logps/chosen": -79.33709716796875, "logps/ref_chosen": -72.59709930419922, "logps/ref_rejected": -86.2322998046875, "logps/rejected": -97.53059387207031, "loss": 0.8372, "margin_dpo/margin_mean": 4.558291435241699, "margin_dpo/margin_std": 4.532693386077881, "step": 231 }, { "epoch": 0.3507180650037793, "fcm_dpo/beta": 0.19782654941082, "fcm_dpo/delta": -0.05669552460312843, "fcm_dpo/margin": 3.2991318702697754, "fcm_dpo/q_t": 0.37337297201156616, "grad_norm": 53.600704193115234, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 0.23124045133590698, "logits/rejected": 0.2049122452735901, "logps/chosen": -74.73058319091797, "logps/ref_chosen": -68.1185302734375, "logps/ref_rejected": -83.79415893554688, "logps/rejected": -93.7053451538086, "loss": 1.0636, "margin_dpo/margin_mean": 3.2991318702697754, "margin_dpo/margin_std": 5.3659348487854, "step": 232 }, { "epoch": 0.35222978080120937, "fcm_dpo/beta": 0.19616064429283142, "fcm_dpo/delta": 0.051648423075675964, "fcm_dpo/margin": 2.7871627807617188, "fcm_dpo/q_t": 0.3915916085243225, "grad_norm": 49.19864273071289, "learning_rate": 4.106969024216348e-07, "logits/chosen": 0.23559258878231049, "logits/rejected": 0.18777360022068024, "logps/chosen": -63.05714416503906, "logps/ref_chosen": -55.070152282714844, "logps/ref_rejected": -66.61845397949219, "logps/rejected": -77.39260864257812, "loss": 1.1095, "margin_dpo/margin_mean": 2.787163257598877, "margin_dpo/margin_std": 4.586450576782227, "step": 233 }, { "epoch": 0.35374149659863946, "fcm_dpo/beta": 0.20507487654685974, "fcm_dpo/delta": 0.10537093132734299, "fcm_dpo/margin": 2.423401355743408, "fcm_dpo/q_t": 0.40045344829559326, "grad_norm": 52.23374938964844, "learning_rate": 4.09681781007452e-07, "logits/chosen": 0.2050936222076416, "logits/rejected": 0.19188997149467468, "logps/chosen": -63.265869140625, "logps/ref_chosen": -55.92589569091797, "logps/ref_rejected": -51.11608123779297, "logps/rejected": -60.87945556640625, "loss": 1.1624, "margin_dpo/margin_mean": 2.423401355743408, "margin_dpo/margin_std": 4.720711708068848, "step": 234 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.19567573070526123, "fcm_dpo/delta": -0.2706778347492218, "fcm_dpo/margin": 4.328647613525391, "fcm_dpo/q_t": 0.31999263167381287, "grad_norm": 47.88417053222656, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.30758044123649597, "logits/rejected": 0.28944361209869385, "logps/chosen": -71.12437438964844, "logps/ref_chosen": -64.53972625732422, "logps/ref_rejected": -77.69151306152344, "logps/rejected": -88.60479736328125, "loss": 0.8412, "margin_dpo/margin_mean": 4.328647613525391, "margin_dpo/margin_std": 4.075915336608887, "step": 235 }, { "epoch": 0.35676492819349964, "fcm_dpo/beta": 0.19021353125572205, "fcm_dpo/delta": -0.08151474595069885, "fcm_dpo/margin": 3.550055980682373, "fcm_dpo/q_t": 0.36800432205200195, "grad_norm": 50.881134033203125, "learning_rate": 4.076381667711306e-07, "logits/chosen": 0.22476598620414734, "logits/rejected": 0.2125941663980484, "logps/chosen": -79.9710464477539, "logps/ref_chosen": -71.15473937988281, "logps/ref_rejected": -84.88541412353516, "logps/rejected": -97.25178527832031, "loss": 1.0551, "margin_dpo/margin_mean": 3.550055980682373, "margin_dpo/margin_std": 5.55735969543457, "step": 236 }, { "epoch": 0.35827664399092973, "fcm_dpo/beta": 0.18786309659481049, "fcm_dpo/delta": -0.06414446234703064, "fcm_dpo/margin": 3.5106282234191895, "fcm_dpo/q_t": 0.36550983786582947, "grad_norm": 52.91632843017578, "learning_rate": 4.066097311132753e-07, "logits/chosen": 0.2820720672607422, "logits/rejected": 0.2691257894039154, "logps/chosen": -83.85193634033203, "logps/ref_chosen": -76.14201354980469, "logps/ref_rejected": -80.88479614257812, "logps/rejected": -92.1053466796875, "loss": 1.0547, "margin_dpo/margin_mean": 3.5106277465820312, "margin_dpo/margin_std": 5.33742618560791, "step": 237 }, { "epoch": 0.35978835978835977, "fcm_dpo/beta": 0.18471507728099823, "fcm_dpo/delta": -0.10358825325965881, "fcm_dpo/margin": 3.7690069675445557, "fcm_dpo/q_t": 0.3609452247619629, "grad_norm": 65.1036148071289, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 0.2169681191444397, "logits/rejected": 0.19963425397872925, "logps/chosen": -76.23017120361328, "logps/ref_chosen": -68.88484954833984, "logps/ref_rejected": -75.8946304321289, "logps/rejected": -87.00895690917969, "loss": 1.0214, "margin_dpo/margin_mean": 3.7690064907073975, "margin_dpo/margin_std": 5.457906723022461, "step": 238 }, { "epoch": 0.36130007558578986, "fcm_dpo/beta": 0.18199658393859863, "fcm_dpo/delta": -0.13259382545948029, "fcm_dpo/margin": 3.9706599712371826, "fcm_dpo/q_t": 0.3562832474708557, "grad_norm": 49.44264602661133, "learning_rate": 4.045397465551513e-07, "logits/chosen": 0.3672158718109131, "logits/rejected": 0.26534122228622437, "logps/chosen": -65.59939575195312, "logps/ref_chosen": -56.771827697753906, "logps/ref_rejected": -116.23050689697266, "logps/rejected": -129.0287322998047, "loss": 1.0188, "margin_dpo/margin_mean": 3.9706602096557617, "margin_dpo/margin_std": 5.715615749359131, "step": 239 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.17385989427566528, "fcm_dpo/delta": -0.24004262685775757, "fcm_dpo/margin": 4.721988677978516, "fcm_dpo/q_t": 0.3290281593799591, "grad_norm": 37.56927490234375, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.26035207509994507, "logits/rejected": 0.18253850936889648, "logps/chosen": -62.00343322753906, "logps/ref_chosen": -53.35411071777344, "logps/ref_rejected": -80.12019348144531, "logps/rejected": -93.49150085449219, "loss": 0.9114, "margin_dpo/margin_mean": 4.721988677978516, "margin_dpo/margin_std": 5.341388702392578, "step": 240 }, { "epoch": 0.36432350718065004, "fcm_dpo/beta": 0.17665785551071167, "fcm_dpo/delta": 0.19322022795677185, "fcm_dpo/margin": 2.3397579193115234, "fcm_dpo/q_t": 0.4169955849647522, "grad_norm": 53.57221984863281, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 0.23470798134803772, "logits/rejected": 0.20348861813545227, "logps/chosen": -80.33531951904297, "logps/ref_chosen": -71.89541625976562, "logps/ref_rejected": -83.03492736816406, "logps/rejected": -93.81459045410156, "loss": 1.1753, "margin_dpo/margin_mean": 2.3397579193115234, "margin_dpo/margin_std": 4.811473846435547, "step": 241 }, { "epoch": 0.36583522297808013, "fcm_dpo/beta": 0.16935241222381592, "fcm_dpo/delta": -0.23963555693626404, "fcm_dpo/margin": 4.812247276306152, "fcm_dpo/q_t": 0.33974403142929077, "grad_norm": 37.257667541503906, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 0.24610793590545654, "logits/rejected": 0.2245916724205017, "logps/chosen": -65.82247924804688, "logps/ref_chosen": -57.927433013916016, "logps/ref_rejected": -67.838623046875, "logps/rejected": -80.54591369628906, "loss": 0.9602, "margin_dpo/margin_mean": 4.812246799468994, "margin_dpo/margin_std": 6.34306526184082, "step": 242 }, { "epoch": 0.3673469387755102, "fcm_dpo/beta": 0.1678386628627777, "fcm_dpo/delta": -0.04528873786330223, "fcm_dpo/margin": 3.8267951011657715, "fcm_dpo/q_t": 0.3693522810935974, "grad_norm": 46.92771530151367, "learning_rate": 4.003481376353596e-07, "logits/chosen": 0.2732234597206116, "logits/rejected": 0.2697628438472748, "logps/chosen": -82.59149169921875, "logps/ref_chosen": -74.27667236328125, "logps/ref_rejected": -73.24340057373047, "logps/rejected": -85.38501739501953, "loss": 1.032, "margin_dpo/margin_mean": 3.8267946243286133, "margin_dpo/margin_std": 5.578545093536377, "step": 243 }, { "epoch": 0.3688586545729403, "fcm_dpo/beta": 0.16238996386528015, "fcm_dpo/delta": -0.23858490586280823, "fcm_dpo/margin": 5.046672821044922, "fcm_dpo/q_t": 0.3252617120742798, "grad_norm": 34.678497314453125, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 0.2962725758552551, "logits/rejected": 0.2212146818637848, "logps/chosen": -62.00054931640625, "logps/ref_chosen": -53.36390686035156, "logps/ref_rejected": -71.10276794433594, "logps/rejected": -84.78608703613281, "loss": 0.8479, "margin_dpo/margin_mean": 5.0466718673706055, "margin_dpo/margin_std": 4.714447498321533, "step": 244 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.1527130901813507, "fcm_dpo/delta": -0.3079785108566284, "fcm_dpo/margin": 5.767277240753174, "fcm_dpo/q_t": 0.3175172805786133, "grad_norm": 56.39506530761719, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.3219825029373169, "logits/rejected": 0.2969723641872406, "logps/chosen": -79.81932067871094, "logps/ref_chosen": -71.19510650634766, "logps/ref_rejected": -80.76235961914062, "logps/rejected": -95.15385437011719, "loss": 0.8586, "margin_dpo/margin_mean": 5.767277717590332, "margin_dpo/margin_std": 6.025437355041504, "step": 245 }, { "epoch": 0.37188208616780044, "fcm_dpo/beta": 0.14736124873161316, "fcm_dpo/delta": -0.14660793542861938, "fcm_dpo/margin": 4.994536399841309, "fcm_dpo/q_t": 0.35354191064834595, "grad_norm": 42.80738067626953, "learning_rate": 3.971601703742932e-07, "logits/chosen": 0.321519672870636, "logits/rejected": 0.268222451210022, "logps/chosen": -81.36954498291016, "logps/ref_chosen": -71.62104797363281, "logps/ref_rejected": -94.03392028808594, "logps/rejected": -108.77696228027344, "loss": 0.9924, "margin_dpo/margin_mean": 4.994536399841309, "margin_dpo/margin_std": 6.899087905883789, "step": 246 }, { "epoch": 0.37339380196523053, "fcm_dpo/beta": 0.15197286009788513, "fcm_dpo/delta": 0.19533714652061462, "fcm_dpo/margin": 2.685239315032959, "fcm_dpo/q_t": 0.41240644454956055, "grad_norm": 48.20946502685547, "learning_rate": 3.960892420986177e-07, "logits/chosen": 0.3213634490966797, "logits/rejected": 0.31023019552230835, "logps/chosen": -90.37286376953125, "logps/ref_chosen": -80.02254486083984, "logps/ref_rejected": -89.22705841064453, "logps/rejected": -102.26261138916016, "loss": 1.2183, "margin_dpo/margin_mean": 2.685239315032959, "margin_dpo/margin_std": 6.087411880493164, "step": 247 }, { "epoch": 0.3749055177626606, "fcm_dpo/beta": 0.1494050920009613, "fcm_dpo/delta": -0.13823390007019043, "fcm_dpo/margin": 4.874991416931152, "fcm_dpo/q_t": 0.36219164729118347, "grad_norm": 41.61267852783203, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 0.26923084259033203, "logits/rejected": 0.29580309987068176, "logps/chosen": -74.59526062011719, "logps/ref_chosen": -65.37796020507812, "logps/ref_rejected": -61.365787506103516, "logps/rejected": -75.45808410644531, "loss": 1.0236, "margin_dpo/margin_mean": 4.8749918937683105, "margin_dpo/margin_std": 7.288352012634277, "step": 248 }, { "epoch": 0.3764172335600907, "fcm_dpo/beta": 0.1551496684551239, "fcm_dpo/delta": 0.28272420167922974, "fcm_dpo/margin": 2.0840535163879395, "fcm_dpo/q_t": 0.42946404218673706, "grad_norm": 52.263946533203125, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 0.2614513039588928, "logits/rejected": 0.2892727851867676, "logps/chosen": -85.43158721923828, "logps/ref_chosen": -74.60145568847656, "logps/ref_rejected": -63.79338455200195, "logps/rejected": -76.70757293701172, "loss": 1.3308, "margin_dpo/margin_mean": 2.0840537548065186, "margin_dpo/margin_std": 6.5972580909729, "step": 249 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.151943176984787, "fcm_dpo/delta": -0.22958946228027344, "fcm_dpo/margin": 5.341045379638672, "fcm_dpo/q_t": 0.3368905484676361, "grad_norm": 38.571678161621094, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.3983957767486572, "logits/rejected": 0.3594222962856293, "logps/chosen": -71.35509490966797, "logps/ref_chosen": -61.938209533691406, "logps/ref_rejected": -72.21602630615234, "logps/rejected": -86.97395324707031, "loss": 0.9672, "margin_dpo/margin_mean": 5.341045379638672, "margin_dpo/margin_std": 7.081840515136719, "step": 250 }, { "epoch": 0.3794406651549509, "fcm_dpo/beta": 0.150055930018425, "fcm_dpo/delta": 0.051377397030591965, "fcm_dpo/margin": 3.6751105785369873, "fcm_dpo/q_t": 0.3900037407875061, "grad_norm": 44.067535400390625, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 0.35311421751976013, "logits/rejected": 0.31758221983909607, "logps/chosen": -76.70925903320312, "logps/ref_chosen": -66.85694885253906, "logps/ref_rejected": -84.83396911621094, "logps/rejected": -98.36139678955078, "loss": 1.1125, "margin_dpo/margin_mean": 3.675110340118408, "margin_dpo/margin_std": 6.56778621673584, "step": 251 }, { "epoch": 0.38095238095238093, "fcm_dpo/beta": 0.154201477766037, "fcm_dpo/delta": 0.03998170793056488, "fcm_dpo/margin": 3.6078786849975586, "fcm_dpo/q_t": 0.3854004144668579, "grad_norm": 43.117156982421875, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 0.26551708579063416, "logits/rejected": 0.21662738919258118, "logps/chosen": -65.75033569335938, "logps/ref_chosen": -56.22393035888672, "logps/ref_rejected": -77.1136245727539, "logps/rejected": -90.24790954589844, "loss": 1.1662, "margin_dpo/margin_mean": 3.6078789234161377, "margin_dpo/margin_std": 6.923517227172852, "step": 252 }, { "epoch": 0.382464096749811, "fcm_dpo/beta": 0.15114212036132812, "fcm_dpo/delta": 0.043711431324481964, "fcm_dpo/margin": 3.6810967922210693, "fcm_dpo/q_t": 0.3912495970726013, "grad_norm": 39.50165939331055, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 0.35707587003707886, "logits/rejected": 0.3203071355819702, "logps/chosen": -62.26034164428711, "logps/ref_chosen": -52.21001434326172, "logps/ref_rejected": -58.75764846801758, "logps/rejected": -72.48906707763672, "loss": 1.1144, "margin_dpo/margin_mean": 3.6810965538024902, "margin_dpo/margin_std": 6.311221122741699, "step": 253 }, { "epoch": 0.3839758125472411, "fcm_dpo/beta": 0.15054289996623993, "fcm_dpo/delta": -0.09489809721708298, "fcm_dpo/margin": 4.567934513092041, "fcm_dpo/q_t": 0.36535903811454773, "grad_norm": 43.543678283691406, "learning_rate": 3.884800159665276e-07, "logits/chosen": 0.2740534543991089, "logits/rejected": 0.22902369499206543, "logps/chosen": -75.52339935302734, "logps/ref_chosen": -65.63632202148438, "logps/ref_rejected": -82.34425354003906, "logps/rejected": -96.79925537109375, "loss": 1.03, "margin_dpo/margin_mean": 4.567934036254883, "margin_dpo/margin_std": 6.8034586906433105, "step": 254 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.14535465836524963, "fcm_dpo/delta": -0.19926312565803528, "fcm_dpo/margin": 5.382228374481201, "fcm_dpo/q_t": 0.34879204630851746, "grad_norm": 36.42246627807617, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.30461984872817993, "logits/rejected": 0.27335721254348755, "logps/chosen": -76.67054748535156, "logps/ref_chosen": -67.91108703613281, "logps/ref_rejected": -83.89114379882812, "logps/rejected": -98.03282928466797, "loss": 0.9773, "margin_dpo/margin_mean": 5.382227897644043, "margin_dpo/margin_std": 7.389373779296875, "step": 255 }, { "epoch": 0.3869992441421013, "fcm_dpo/beta": 0.14256221055984497, "fcm_dpo/delta": -0.06361023336648941, "fcm_dpo/margin": 4.620975971221924, "fcm_dpo/q_t": 0.3729037642478943, "grad_norm": 42.76344680786133, "learning_rate": 3.862706303320329e-07, "logits/chosen": 0.2616586685180664, "logits/rejected": 0.21148879826068878, "logps/chosen": -73.64082336425781, "logps/ref_chosen": -63.49998474121094, "logps/ref_rejected": -90.77104187011719, "logps/rejected": -105.53286743164062, "loss": 1.0775, "margin_dpo/margin_mean": 4.620975494384766, "margin_dpo/margin_std": 7.655024528503418, "step": 256 }, { "epoch": 0.3885109599395314, "fcm_dpo/beta": 0.1366242617368698, "fcm_dpo/delta": -0.27341482043266296, "fcm_dpo/margin": 6.215221881866455, "fcm_dpo/q_t": 0.3420974016189575, "grad_norm": 43.8852424621582, "learning_rate": 3.851602043638994e-07, "logits/chosen": 0.2920815050601959, "logits/rejected": 0.2364548146724701, "logps/chosen": -81.00942993164062, "logps/ref_chosen": -70.60064697265625, "logps/ref_rejected": -108.58313751220703, "logps/rejected": -125.20713806152344, "loss": 0.9761, "margin_dpo/margin_mean": 6.215222358703613, "margin_dpo/margin_std": 8.710794448852539, "step": 257 }, { "epoch": 0.3900226757369615, "fcm_dpo/beta": 0.13516320288181305, "fcm_dpo/delta": -0.05724785476922989, "fcm_dpo/margin": 4.827666282653809, "fcm_dpo/q_t": 0.3620738387107849, "grad_norm": 38.29178237915039, "learning_rate": 3.840459976743023e-07, "logits/chosen": 0.304485023021698, "logits/rejected": 0.2620296776294708, "logps/chosen": -70.34207153320312, "logps/ref_chosen": -59.25416564941406, "logps/ref_rejected": -85.58709716796875, "logps/rejected": -101.50267028808594, "loss": 0.9813, "margin_dpo/margin_mean": 4.827666282653809, "margin_dpo/margin_std": 5.940984725952148, "step": 258 }, { "epoch": 0.3915343915343915, "fcm_dpo/beta": 0.12516869604587555, "fcm_dpo/delta": -0.39691078662872314, "fcm_dpo/margin": 7.635554313659668, "fcm_dpo/q_t": 0.305417001247406, "grad_norm": 31.369762420654297, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 0.2632516622543335, "logits/rejected": 0.1743551343679428, "logps/chosen": -74.25553131103516, "logps/ref_chosen": -65.43487548828125, "logps/ref_rejected": -95.41731262207031, "logps/rejected": -111.87351989746094, "loss": 0.839, "margin_dpo/margin_mean": 7.635554790496826, "margin_dpo/margin_std": 7.796221733093262, "step": 259 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.12094450742006302, "fcm_dpo/delta": -0.1034887433052063, "fcm_dpo/margin": 5.749767303466797, "fcm_dpo/q_t": 0.3611637055873871, "grad_norm": 31.525815963745117, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.26589736342430115, "logits/rejected": 0.19382989406585693, "logps/chosen": -59.13935089111328, "logps/ref_chosen": -49.08958435058594, "logps/ref_rejected": -79.01708221435547, "logps/rejected": -94.81661987304688, "loss": 1.0165, "margin_dpo/margin_mean": 5.749767303466797, "margin_dpo/margin_std": 8.208473205566406, "step": 260 }, { "epoch": 0.3945578231292517, "fcm_dpo/beta": 0.12129713594913483, "fcm_dpo/delta": 0.0323747955262661, "fcm_dpo/margin": 4.69488525390625, "fcm_dpo/q_t": 0.3866156339645386, "grad_norm": 39.33859634399414, "learning_rate": 3.806810054678331e-07, "logits/chosen": 0.17265966534614563, "logits/rejected": 0.18686804175376892, "logps/chosen": -80.5276870727539, "logps/ref_chosen": -70.87239074707031, "logps/ref_rejected": -65.01522064208984, "logps/rejected": -79.36540222167969, "loss": 1.0891, "margin_dpo/margin_mean": 4.69488525390625, "margin_dpo/margin_std": 7.86189079284668, "step": 261 }, { "epoch": 0.3960695389266818, "fcm_dpo/beta": 0.12047137320041656, "fcm_dpo/delta": -0.08758753538131714, "fcm_dpo/margin": 5.658448219299316, "fcm_dpo/q_t": 0.3600999712944031, "grad_norm": 33.62800598144531, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 0.3352086544036865, "logits/rejected": 0.28318509459495544, "logps/chosen": -78.01824951171875, "logps/ref_chosen": -67.8706283569336, "logps/ref_rejected": -88.7205810546875, "logps/rejected": -104.52664947509766, "loss": 0.9836, "margin_dpo/margin_mean": 5.658448696136475, "margin_dpo/margin_std": 7.331066131591797, "step": 262 }, { "epoch": 0.3975812547241119, "fcm_dpo/beta": 0.12068259716033936, "fcm_dpo/delta": 0.03326220065355301, "fcm_dpo/margin": 4.707237720489502, "fcm_dpo/q_t": 0.38229966163635254, "grad_norm": 32.8046989440918, "learning_rate": 3.784193478933516e-07, "logits/chosen": 0.28113555908203125, "logits/rejected": 0.1925010085105896, "logps/chosen": -65.09586334228516, "logps/ref_chosen": -55.194583892822266, "logps/ref_rejected": -80.54048156738281, "logps/rejected": -95.14900207519531, "loss": 1.0815, "margin_dpo/margin_mean": 4.707237243652344, "margin_dpo/margin_std": 7.578971862792969, "step": 263 }, { "epoch": 0.39909297052154197, "fcm_dpo/beta": 0.11911486089229584, "fcm_dpo/delta": -0.07203864306211472, "fcm_dpo/margin": 5.601800918579102, "fcm_dpo/q_t": 0.36986517906188965, "grad_norm": 37.93541717529297, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 0.22223809361457825, "logits/rejected": 0.1795908510684967, "logps/chosen": -93.02720642089844, "logps/ref_chosen": -83.17068481445312, "logps/ref_rejected": -88.33625793457031, "logps/rejected": -103.79458618164062, "loss": 1.0562, "margin_dpo/margin_mean": 5.601801872253418, "margin_dpo/margin_std": 8.86518669128418, "step": 264 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.11703728884458542, "fcm_dpo/delta": -0.13774295151233673, "fcm_dpo/margin": 6.217557430267334, "fcm_dpo/q_t": 0.3545699119567871, "grad_norm": 36.12660598754883, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.361573725938797, "logits/rejected": 0.3007473647594452, "logps/chosen": -61.666404724121094, "logps/ref_chosen": -51.66284942626953, "logps/ref_rejected": -67.1720962524414, "logps/rejected": -83.3932113647461, "loss": 1.0502, "margin_dpo/margin_mean": 6.217557907104492, "margin_dpo/margin_std": 9.483875274658203, "step": 265 }, { "epoch": 0.4021164021164021, "fcm_dpo/beta": 0.11421965062618256, "fcm_dpo/delta": -0.060123834758996964, "fcm_dpo/margin": 5.743941783905029, "fcm_dpo/q_t": 0.3692956864833832, "grad_norm": 33.072265625, "learning_rate": 3.75e-07, "logits/chosen": 0.3172535300254822, "logits/rejected": 0.2529382109642029, "logps/chosen": -66.79499816894531, "logps/ref_chosen": -57.45049285888672, "logps/ref_rejected": -77.60826110839844, "logps/rejected": -92.69670104980469, "loss": 1.0264, "margin_dpo/margin_mean": 5.7439422607421875, "margin_dpo/margin_std": 8.409375190734863, "step": 266 }, { "epoch": 0.4036281179138322, "fcm_dpo/beta": 0.11638803780078888, "fcm_dpo/delta": 0.06048017740249634, "fcm_dpo/margin": 4.635496616363525, "fcm_dpo/q_t": 0.3898630738258362, "grad_norm": 29.81191062927246, "learning_rate": 3.738531817228131e-07, "logits/chosen": 0.325040340423584, "logits/rejected": 0.30857351422309875, "logps/chosen": -63.52793884277344, "logps/ref_chosen": -55.03535079956055, "logps/ref_rejected": -66.0953369140625, "logps/rejected": -79.22342681884766, "loss": 1.1486, "margin_dpo/margin_mean": 4.635497093200684, "margin_dpo/margin_std": 8.722969055175781, "step": 267 }, { "epoch": 0.4051398337112623, "fcm_dpo/beta": 0.1157803162932396, "fcm_dpo/delta": -0.022753987461328506, "fcm_dpo/margin": 5.360468864440918, "fcm_dpo/q_t": 0.37341806292533875, "grad_norm": 30.24585723876953, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 0.2242826521396637, "logits/rejected": 0.20411059260368347, "logps/chosen": -73.67584228515625, "logps/ref_chosen": -65.07174682617188, "logps/ref_rejected": -71.42485809326172, "logps/rejected": -85.38943481445312, "loss": 1.0239, "margin_dpo/margin_mean": 5.360469341278076, "margin_dpo/margin_std": 7.504269123077393, "step": 268 }, { "epoch": 0.40665154950869237, "fcm_dpo/beta": 0.11419200897216797, "fcm_dpo/delta": -0.13315117359161377, "fcm_dpo/margin": 6.319992542266846, "fcm_dpo/q_t": 0.35147666931152344, "grad_norm": 30.362510681152344, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 0.38946646451950073, "logits/rejected": 0.346447229385376, "logps/chosen": -76.6282730102539, "logps/ref_chosen": -67.1362075805664, "logps/ref_rejected": -82.55778503417969, "logps/rejected": -98.36984252929688, "loss": 0.9771, "margin_dpo/margin_mean": 6.3199920654296875, "margin_dpo/margin_std": 7.847947120666504, "step": 269 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.11265287548303604, "fcm_dpo/delta": 0.07302689552307129, "fcm_dpo/margin": 4.712079048156738, "fcm_dpo/q_t": 0.3870832324028015, "grad_norm": 35.79549026489258, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.3457132577896118, "logits/rejected": 0.28162434697151184, "logps/chosen": -75.90498352050781, "logps/ref_chosen": -66.6886978149414, "logps/ref_rejected": -85.16129302978516, "logps/rejected": -99.08966064453125, "loss": 1.1083, "margin_dpo/margin_mean": 4.712078094482422, "margin_dpo/margin_std": 8.200397491455078, "step": 270 }, { "epoch": 0.40967498110355255, "fcm_dpo/beta": 0.1142662763595581, "fcm_dpo/delta": 0.08464036136865616, "fcm_dpo/margin": 4.546772480010986, "fcm_dpo/q_t": 0.408853679895401, "grad_norm": 37.240455627441406, "learning_rate": 3.692315864546635e-07, "logits/chosen": 0.33662497997283936, "logits/rejected": 0.2839156985282898, "logps/chosen": -81.24642181396484, "logps/ref_chosen": -72.40754699707031, "logps/ref_rejected": -92.06311798095703, "logps/rejected": -105.44876861572266, "loss": 1.2112, "margin_dpo/margin_mean": 4.546772480010986, "margin_dpo/margin_std": 10.11610221862793, "step": 271 }, { "epoch": 0.41118669690098264, "fcm_dpo/beta": 0.11065268516540527, "fcm_dpo/delta": -0.3351590037345886, "fcm_dpo/margin": 8.187671661376953, "fcm_dpo/q_t": 0.3182171881198883, "grad_norm": 29.123409271240234, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 0.3442276120185852, "logits/rejected": 0.3472369909286499, "logps/chosen": -73.9910888671875, "logps/ref_chosen": -66.60140228271484, "logps/ref_rejected": -67.74340057373047, "logps/rejected": -83.32075500488281, "loss": 0.849, "margin_dpo/margin_mean": 8.187671661376953, "margin_dpo/margin_std": 8.43535041809082, "step": 272 }, { "epoch": 0.4126984126984127, "fcm_dpo/beta": 0.10678541660308838, "fcm_dpo/delta": -0.08693182468414307, "fcm_dpo/margin": 6.37816858291626, "fcm_dpo/q_t": 0.3652857840061188, "grad_norm": 32.0718879699707, "learning_rate": 3.669006483223828e-07, "logits/chosen": 0.3392181396484375, "logits/rejected": 0.2840285897254944, "logps/chosen": -67.47334289550781, "logps/ref_chosen": -57.35487747192383, "logps/ref_rejected": -84.17168426513672, "logps/rejected": -100.66831970214844, "loss": 1.0624, "margin_dpo/margin_mean": 6.378169059753418, "margin_dpo/margin_std": 10.178689002990723, "step": 273 }, { "epoch": 0.41421012849584277, "fcm_dpo/beta": 0.10466930270195007, "fcm_dpo/delta": -0.0918121486902237, "fcm_dpo/margin": 6.54940128326416, "fcm_dpo/q_t": 0.36318063735961914, "grad_norm": 26.941736221313477, "learning_rate": 3.657302579891656e-07, "logits/chosen": 0.21288266777992249, "logits/rejected": 0.19043758511543274, "logps/chosen": -69.32371520996094, "logps/ref_chosen": -59.64149475097656, "logps/ref_rejected": -68.29348754882812, "logps/rejected": -84.52510070800781, "loss": 1.0363, "margin_dpo/margin_mean": 6.54940128326416, "margin_dpo/margin_std": 9.826854705810547, "step": 274 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.10242324322462082, "fcm_dpo/delta": -0.13851945102214813, "fcm_dpo/margin": 7.114006042480469, "fcm_dpo/q_t": 0.3521403670310974, "grad_norm": 27.108287811279297, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.33497947454452515, "logits/rejected": 0.2582448720932007, "logps/chosen": -62.62168884277344, "logps/ref_chosen": -53.26664352416992, "logps/ref_rejected": -73.84062194824219, "logps/rejected": -90.3096694946289, "loss": 0.9787, "margin_dpo/margin_mean": 7.1140055656433105, "margin_dpo/margin_std": 9.458660125732422, "step": 275 }, { "epoch": 0.41723356009070295, "fcm_dpo/beta": 0.09872230142354965, "fcm_dpo/delta": -0.14825645089149475, "fcm_dpo/margin": 7.460000038146973, "fcm_dpo/q_t": 0.3443659543991089, "grad_norm": 26.00741195678711, "learning_rate": 3.633797984793294e-07, "logits/chosen": 0.2792910039424896, "logits/rejected": 0.2462400197982788, "logps/chosen": -61.21537399291992, "logps/ref_chosen": -53.02079772949219, "logps/ref_rejected": -61.56678771972656, "logps/rejected": -77.22136688232422, "loss": 0.9353, "margin_dpo/margin_mean": 7.460000038146973, "margin_dpo/margin_std": 8.583602905273438, "step": 276 }, { "epoch": 0.41874527588813304, "fcm_dpo/beta": 0.1010965034365654, "fcm_dpo/delta": 0.15551161766052246, "fcm_dpo/margin": 4.456169128417969, "fcm_dpo/q_t": 0.40854841470718384, "grad_norm": 30.41317367553711, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 0.3963392674922943, "logits/rejected": 0.4135650098323822, "logps/chosen": -81.75311279296875, "logps/ref_chosen": -71.43299102783203, "logps/ref_rejected": -67.65852355957031, "logps/rejected": -82.434814453125, "loss": 1.1704, "margin_dpo/margin_mean": 4.4561686515808105, "margin_dpo/margin_std": 9.094401359558105, "step": 277 }, { "epoch": 0.42025699168556313, "fcm_dpo/beta": 0.10135230422019958, "fcm_dpo/delta": -0.08824028819799423, "fcm_dpo/margin": 6.714378356933594, "fcm_dpo/q_t": 0.3628871738910675, "grad_norm": 34.11618423461914, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 0.2849266827106476, "logits/rejected": 0.24051225185394287, "logps/chosen": -77.12722778320312, "logps/ref_chosen": -67.11076354980469, "logps/ref_rejected": -88.74851989746094, "logps/rejected": -105.4793701171875, "loss": 1.031, "margin_dpo/margin_mean": 6.714378356933594, "margin_dpo/margin_std": 9.721296310424805, "step": 278 }, { "epoch": 0.4217687074829932, "fcm_dpo/beta": 0.09616866707801819, "fcm_dpo/delta": -0.2708485722541809, "fcm_dpo/margin": 8.823453903198242, "fcm_dpo/q_t": 0.3255208432674408, "grad_norm": 24.725061416625977, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 0.31211304664611816, "logits/rejected": 0.27127406001091003, "logps/chosen": -61.78339385986328, "logps/ref_chosen": -54.49748611450195, "logps/ref_rejected": -70.42373657226562, "logps/rejected": -86.53308868408203, "loss": 0.8828, "margin_dpo/margin_mean": 8.823453903198242, "margin_dpo/margin_std": 9.46353816986084, "step": 279 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.09034930169582367, "fcm_dpo/delta": -0.2888822555541992, "fcm_dpo/margin": 9.556791305541992, "fcm_dpo/q_t": 0.31915369629859924, "grad_norm": 22.01110076904297, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.3331267237663269, "logits/rejected": 0.2932952046394348, "logps/chosen": -68.28158569335938, "logps/ref_chosen": -60.43281173706055, "logps/ref_rejected": -78.39051818847656, "logps/rejected": -95.79608154296875, "loss": 0.8307, "margin_dpo/margin_mean": 9.556791305541992, "margin_dpo/margin_std": 9.003580093383789, "step": 280 }, { "epoch": 0.42479213907785335, "fcm_dpo/beta": 0.08677025139331818, "fcm_dpo/delta": -0.1635596752166748, "fcm_dpo/margin": 8.651966094970703, "fcm_dpo/q_t": 0.34282469749450684, "grad_norm": 21.93960952758789, "learning_rate": 3.574487280222929e-07, "logits/chosen": 0.3163455128669739, "logits/rejected": 0.3208748400211334, "logps/chosen": -69.17593383789062, "logps/ref_chosen": -60.2820930480957, "logps/ref_rejected": -62.04009246826172, "logps/rejected": -79.58589935302734, "loss": 0.9387, "margin_dpo/margin_mean": 8.651966094970703, "margin_dpo/margin_std": 10.228010177612305, "step": 281 }, { "epoch": 0.42630385487528344, "fcm_dpo/beta": 0.08634951710700989, "fcm_dpo/delta": -0.08936205506324768, "fcm_dpo/margin": 7.882460117340088, "fcm_dpo/q_t": 0.3642592430114746, "grad_norm": 26.94883155822754, "learning_rate": 3.562533640600075e-07, "logits/chosen": 0.2749297618865967, "logits/rejected": 0.2267666459083557, "logps/chosen": -71.17241668701172, "logps/ref_chosen": -60.623924255371094, "logps/ref_rejected": -68.67400360107422, "logps/rejected": -87.10496520996094, "loss": 1.0174, "margin_dpo/margin_mean": 7.88245964050293, "margin_dpo/margin_std": 10.839717864990234, "step": 282 }, { "epoch": 0.42781557067271353, "fcm_dpo/beta": 0.08574334532022476, "fcm_dpo/delta": 0.031460996717214584, "fcm_dpo/margin": 6.637720584869385, "fcm_dpo/q_t": 0.38565975427627563, "grad_norm": 31.932655334472656, "learning_rate": 3.550550279627215e-07, "logits/chosen": 0.3168482780456543, "logits/rejected": 0.2327718585729599, "logps/chosen": -78.9610824584961, "logps/ref_chosen": -67.64775085449219, "logps/ref_rejected": -99.96835327148438, "logps/rejected": -117.91941833496094, "loss": 1.0944, "margin_dpo/margin_mean": 6.637721061706543, "margin_dpo/margin_std": 11.038079261779785, "step": 283 }, { "epoch": 0.4293272864701436, "fcm_dpo/beta": 0.08396576344966888, "fcm_dpo/delta": -0.08251890540122986, "fcm_dpo/margin": 8.056709289550781, "fcm_dpo/q_t": 0.3591376543045044, "grad_norm": 22.93587875366211, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 0.4004897475242615, "logits/rejected": 0.33910277485847473, "logps/chosen": -67.47752380371094, "logps/ref_chosen": -56.96742630004883, "logps/ref_rejected": -86.36236572265625, "logps/rejected": -104.92916870117188, "loss": 0.9775, "margin_dpo/margin_mean": 8.056710243225098, "margin_dpo/margin_std": 10.448408126831055, "step": 284 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.08457425236701965, "fcm_dpo/delta": 0.02803485468029976, "fcm_dpo/margin": 6.776731014251709, "fcm_dpo/q_t": 0.38514000177383423, "grad_norm": 28.730764389038086, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.3962095379829407, "logits/rejected": 0.36934328079223633, "logps/chosen": -85.16078186035156, "logps/ref_chosen": -71.65611267089844, "logps/ref_rejected": -81.63829803466797, "logps/rejected": -101.91970825195312, "loss": 1.0692, "margin_dpo/margin_mean": 6.776730537414551, "margin_dpo/margin_std": 10.675975799560547, "step": 285 }, { "epoch": 0.4323507180650038, "fcm_dpo/beta": 0.08037659525871277, "fcm_dpo/delta": -0.30072513222694397, "fcm_dpo/margin": 10.86276626586914, "fcm_dpo/q_t": 0.31989628076553345, "grad_norm": 23.31649398803711, "learning_rate": 3.514425224712835e-07, "logits/chosen": 0.2933613657951355, "logits/rejected": 0.2070850282907486, "logps/chosen": -73.17205047607422, "logps/ref_chosen": -61.07952117919922, "logps/ref_rejected": -91.28128051757812, "logps/rejected": -114.236572265625, "loss": 0.8592, "margin_dpo/margin_mean": 10.862764358520508, "margin_dpo/margin_std": 11.261336326599121, "step": 286 }, { "epoch": 0.43386243386243384, "fcm_dpo/beta": 0.07688654214143753, "fcm_dpo/delta": -0.24263165891170502, "fcm_dpo/margin": 10.706443786621094, "fcm_dpo/q_t": 0.33241310715675354, "grad_norm": 21.12100601196289, "learning_rate": 3.502326338516534e-07, "logits/chosen": 0.34049081802368164, "logits/rejected": 0.30252766609191895, "logps/chosen": -56.355995178222656, "logps/ref_chosen": -46.035789489746094, "logps/ref_rejected": -59.95293426513672, "logps/rejected": -80.97958374023438, "loss": 0.8952, "margin_dpo/margin_mean": 10.706443786621094, "margin_dpo/margin_std": 11.937873840332031, "step": 287 }, { "epoch": 0.43537414965986393, "fcm_dpo/beta": 0.0763639435172081, "fcm_dpo/delta": 0.05032455921173096, "fcm_dpo/margin": 7.232652187347412, "fcm_dpo/q_t": 0.38707786798477173, "grad_norm": 27.177230834960938, "learning_rate": 3.490199415097892e-07, "logits/chosen": 0.24442994594573975, "logits/rejected": 0.19374153017997742, "logps/chosen": -79.61978912353516, "logps/ref_chosen": -65.3908462524414, "logps/ref_rejected": -88.53607940673828, "logps/rejected": -109.99766540527344, "loss": 1.0832, "margin_dpo/margin_mean": 7.2326507568359375, "margin_dpo/margin_std": 11.782697677612305, "step": 288 }, { "epoch": 0.436885865457294, "fcm_dpo/beta": 0.07672218978404999, "fcm_dpo/delta": -0.0018288381397724152, "fcm_dpo/margin": 7.838685989379883, "fcm_dpo/q_t": 0.3824378252029419, "grad_norm": 24.673717498779297, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 0.4334472417831421, "logits/rejected": 0.3991202414035797, "logps/chosen": -68.89795684814453, "logps/ref_chosen": -54.5936279296875, "logps/ref_rejected": -67.20855712890625, "logps/rejected": -89.35157012939453, "loss": 1.0778, "margin_dpo/margin_mean": 7.838686943054199, "margin_dpo/margin_std": 12.600842475891113, "step": 289 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.07465630769729614, "fcm_dpo/delta": -0.12069503962993622, "fcm_dpo/margin": 9.523682594299316, "fcm_dpo/q_t": 0.3567933142185211, "grad_norm": 27.99603271484375, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.444963276386261, "logits/rejected": 0.3783135414123535, "logps/chosen": -78.06217193603516, "logps/ref_chosen": -61.38457489013672, "logps/ref_rejected": -91.92778015136719, "logps/rejected": -118.12904357910156, "loss": 0.9965, "margin_dpo/margin_mean": 9.523681640625, "margin_dpo/margin_std": 13.034603118896484, "step": 290 }, { "epoch": 0.4399092970521542, "fcm_dpo/beta": 0.07467788457870483, "fcm_dpo/delta": -0.09996376186609268, "fcm_dpo/margin": 9.23512077331543, "fcm_dpo/q_t": 0.3609282970428467, "grad_norm": 27.58074378967285, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 0.5009055733680725, "logits/rejected": 0.44027698040008545, "logps/chosen": -66.63168334960938, "logps/ref_chosen": -50.863037109375, "logps/ref_rejected": -82.20868682861328, "logps/rejected": -107.21245574951172, "loss": 1.006, "margin_dpo/margin_mean": 9.23512077331543, "margin_dpo/margin_std": 12.100658416748047, "step": 291 }, { "epoch": 0.4414210128495843, "fcm_dpo/beta": 0.07281124591827393, "fcm_dpo/delta": -0.03628935664892197, "fcm_dpo/margin": 8.707426071166992, "fcm_dpo/q_t": 0.3691728711128235, "grad_norm": 28.966171264648438, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 0.40035903453826904, "logits/rejected": 0.35217487812042236, "logps/chosen": -79.63127136230469, "logps/ref_chosen": -64.34888458251953, "logps/ref_rejected": -72.86434173583984, "logps/rejected": -96.85415649414062, "loss": 1.0421, "margin_dpo/margin_mean": 8.707426071166992, "margin_dpo/margin_std": 12.978470802307129, "step": 292 }, { "epoch": 0.4429327286470144, "fcm_dpo/beta": 0.07096080482006073, "fcm_dpo/delta": -0.2058958262205124, "fcm_dpo/margin": 11.13209342956543, "fcm_dpo/q_t": 0.34235402941703796, "grad_norm": 19.71794891357422, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 0.4608747661113739, "logits/rejected": 0.3821406662464142, "logps/chosen": -71.60606384277344, "logps/ref_chosen": -54.869468688964844, "logps/ref_rejected": -81.858642578125, "logps/rejected": -109.72734069824219, "loss": 0.9815, "margin_dpo/margin_mean": 11.132092475891113, "margin_dpo/margin_std": 14.750845909118652, "step": 293 }, { "epoch": 0.4444444444444444, "fcm_dpo/beta": 0.06872005015611649, "fcm_dpo/delta": -0.03578688204288483, "fcm_dpo/margin": 9.196503639221191, "fcm_dpo/q_t": 0.37264391779899597, "grad_norm": 20.966272354125977, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 0.41916030645370483, "logits/rejected": 0.3763779401779175, "logps/chosen": -72.47305297851562, "logps/ref_chosen": -56.670902252197266, "logps/ref_rejected": -70.32819366455078, "logps/rejected": -95.32685089111328, "loss": 1.02, "margin_dpo/margin_mean": 9.196502685546875, "margin_dpo/margin_std": 12.933424949645996, "step": 294 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.06950195878744125, "fcm_dpo/delta": 0.04927371069788933, "fcm_dpo/margin": 7.9632248878479, "fcm_dpo/q_t": 0.39062416553497314, "grad_norm": 26.211135864257812, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.41103291511535645, "logits/rejected": 0.32525143027305603, "logps/chosen": -67.91084289550781, "logps/ref_chosen": -50.40088653564453, "logps/ref_rejected": -83.43521881103516, "logps/rejected": -108.90840148925781, "loss": 1.0986, "margin_dpo/margin_mean": 7.963224411010742, "margin_dpo/margin_std": 13.621759414672852, "step": 295 }, { "epoch": 0.4474678760393046, "fcm_dpo/beta": 0.06849856674671173, "fcm_dpo/delta": -0.0997304916381836, "fcm_dpo/margin": 10.102575302124023, "fcm_dpo/q_t": 0.3605468273162842, "grad_norm": 23.900463104248047, "learning_rate": 3.392215553979679e-07, "logits/chosen": 0.36223775148391724, "logits/rejected": 0.32354265451431274, "logps/chosen": -86.39460754394531, "logps/ref_chosen": -69.15034484863281, "logps/ref_rejected": -89.60166931152344, "logps/rejected": -116.94850158691406, "loss": 0.9945, "margin_dpo/margin_mean": 10.102575302124023, "margin_dpo/margin_std": 13.747122764587402, "step": 296 }, { "epoch": 0.4489795918367347, "fcm_dpo/beta": 0.06773459911346436, "fcm_dpo/delta": -0.12187168002128601, "fcm_dpo/margin": 10.525908470153809, "fcm_dpo/q_t": 0.35121363401412964, "grad_norm": 20.95337677001953, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 0.4012449383735657, "logits/rejected": 0.34180164337158203, "logps/chosen": -76.49490356445312, "logps/ref_chosen": -58.01630401611328, "logps/ref_rejected": -69.95780944824219, "logps/rejected": -98.96231079101562, "loss": 0.9485, "margin_dpo/margin_mean": 10.525908470153809, "margin_dpo/margin_std": 12.319080352783203, "step": 297 }, { "epoch": 0.4504913076341648, "fcm_dpo/beta": 0.06668893992900848, "fcm_dpo/delta": -0.0053915292955935, "fcm_dpo/margin": 9.072604179382324, "fcm_dpo/q_t": 0.3817402124404907, "grad_norm": 23.481307983398438, "learning_rate": 3.367463137189156e-07, "logits/chosen": 0.5120540857315063, "logits/rejected": 0.45104530453681946, "logps/chosen": -73.73829650878906, "logps/ref_chosen": -56.1693115234375, "logps/ref_rejected": -68.55052185058594, "logps/rejected": -95.19210815429688, "loss": 1.1053, "margin_dpo/margin_mean": 9.072603225708008, "margin_dpo/margin_std": 15.78736686706543, "step": 298 }, { "epoch": 0.4520030234315949, "fcm_dpo/beta": 0.0676107257604599, "fcm_dpo/delta": 0.11564286053180695, "fcm_dpo/margin": 7.249474048614502, "fcm_dpo/q_t": 0.40451353788375854, "grad_norm": 25.117929458618164, "learning_rate": 3.355050358314172e-07, "logits/chosen": 0.33155155181884766, "logits/rejected": 0.30397695302963257, "logps/chosen": -79.69677734375, "logps/ref_chosen": -62.31780242919922, "logps/ref_rejected": -72.60028839111328, "logps/rejected": -97.22874450683594, "loss": 1.2026, "margin_dpo/margin_mean": 7.24947452545166, "margin_dpo/margin_std": 15.536123275756836, "step": 299 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.0678647980093956, "fcm_dpo/delta": -0.053635694086551666, "fcm_dpo/margin": 9.58050537109375, "fcm_dpo/q_t": 0.3681472837924957, "grad_norm": 23.66339874267578, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.4309826195240021, "logits/rejected": 0.37077367305755615, "logps/chosen": -77.88261413574219, "logps/ref_chosen": -60.38157653808594, "logps/ref_rejected": -75.45442199707031, "logps/rejected": -102.53596496582031, "loss": 1.0712, "margin_dpo/margin_mean": 9.58050537109375, "margin_dpo/margin_std": 15.316072463989258, "step": 300 }, { "epoch": 0.455026455026455, "fcm_dpo/beta": 0.06702058017253876, "fcm_dpo/delta": -0.002799011766910553, "fcm_dpo/margin": 8.974074363708496, "fcm_dpo/q_t": 0.38000398874282837, "grad_norm": 20.609079360961914, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 0.4343421459197998, "logits/rejected": 0.4045522212982178, "logps/chosen": -69.66383361816406, "logps/ref_chosen": -52.85089111328125, "logps/ref_rejected": -69.97584533691406, "logps/rejected": -95.76286315917969, "loss": 1.0996, "margin_dpo/margin_mean": 8.974075317382812, "margin_dpo/margin_std": 15.309377670288086, "step": 301 }, { "epoch": 0.4565381708238851, "fcm_dpo/beta": 0.0691293403506279, "fcm_dpo/delta": 0.16866973042488098, "fcm_dpo/margin": 6.349787712097168, "fcm_dpo/q_t": 0.4120634198188782, "grad_norm": 28.044296264648438, "learning_rate": 3.317669908293554e-07, "logits/chosen": 0.30643230676651, "logits/rejected": 0.2559657692909241, "logps/chosen": -85.57098388671875, "logps/ref_chosen": -66.96650695800781, "logps/ref_rejected": -88.09510803222656, "logps/rejected": -113.04937744140625, "loss": 1.2034, "margin_dpo/margin_mean": 6.349788188934326, "margin_dpo/margin_std": 14.239937782287598, "step": 302 }, { "epoch": 0.4580498866213152, "fcm_dpo/beta": 0.06739898025989532, "fcm_dpo/delta": -0.25346097350120544, "fcm_dpo/margin": 12.354694366455078, "fcm_dpo/q_t": 0.33981257677078247, "grad_norm": 21.23597526550293, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 0.4204411506652832, "logits/rejected": 0.35760125517845154, "logps/chosen": -77.63226318359375, "logps/ref_chosen": -62.12152862548828, "logps/ref_rejected": -90.31204223632812, "logps/rejected": -118.17748260498047, "loss": 0.9833, "margin_dpo/margin_mean": 12.354693412780762, "margin_dpo/margin_std": 16.887344360351562, "step": 303 }, { "epoch": 0.4595616024187453, "fcm_dpo/beta": 0.0645943209528923, "fcm_dpo/delta": -0.1871630847454071, "fcm_dpo/margin": 11.962800979614258, "fcm_dpo/q_t": 0.336465060710907, "grad_norm": 20.540264129638672, "learning_rate": 3.292634667444117e-07, "logits/chosen": 0.4185236394405365, "logits/rejected": 0.3664189577102661, "logps/chosen": -74.27428436279297, "logps/ref_chosen": -60.695091247558594, "logps/ref_rejected": -78.2525405883789, "logps/rejected": -103.79454040527344, "loss": 0.9063, "margin_dpo/margin_mean": 11.962800979614258, "margin_dpo/margin_std": 12.955540657043457, "step": 304 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.06333063542842865, "fcm_dpo/delta": -0.008959665894508362, "fcm_dpo/margin": 9.583664894104004, "fcm_dpo/q_t": 0.37659746408462524, "grad_norm": 24.14905548095703, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.356367290019989, "logits/rejected": 0.37138211727142334, "logps/chosen": -89.701416015625, "logps/ref_chosen": -72.69914245605469, "logps/ref_rejected": -65.65670776367188, "logps/rejected": -92.24266052246094, "loss": 1.0968, "margin_dpo/margin_mean": 9.58366584777832, "margin_dpo/margin_std": 16.054397583007812, "step": 305 }, { "epoch": 0.46258503401360546, "fcm_dpo/beta": 0.06377913057804108, "fcm_dpo/delta": 0.05049442499876022, "fcm_dpo/margin": 8.643620491027832, "fcm_dpo/q_t": 0.38526010513305664, "grad_norm": 20.86713981628418, "learning_rate": 3.267510740432719e-07, "logits/chosen": 0.4515618681907654, "logits/rejected": 0.3459406793117523, "logps/chosen": -69.68206787109375, "logps/ref_chosen": -53.97052764892578, "logps/ref_rejected": -71.02423095703125, "logps/rejected": -95.37939453125, "loss": 1.0697, "margin_dpo/margin_mean": 8.643620491027832, "margin_dpo/margin_std": 13.185958862304688, "step": 306 }, { "epoch": 0.46409674981103555, "fcm_dpo/beta": 0.06696438044309616, "fcm_dpo/delta": 0.20386965572834015, "fcm_dpo/margin": 6.0009446144104, "fcm_dpo/q_t": 0.42609280347824097, "grad_norm": 28.50688934326172, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 0.4598926901817322, "logits/rejected": 0.41593605279922485, "logps/chosen": -70.69493865966797, "logps/ref_chosen": -57.413108825683594, "logps/ref_rejected": -68.68010711669922, "logps/rejected": -87.96287536621094, "loss": 1.2877, "margin_dpo/margin_mean": 6.000943183898926, "margin_dpo/margin_std": 16.271331787109375, "step": 307 }, { "epoch": 0.4656084656084656, "fcm_dpo/beta": 0.06626871228218079, "fcm_dpo/delta": -0.051046308130025864, "fcm_dpo/margin": 9.75979232788086, "fcm_dpo/q_t": 0.36723071336746216, "grad_norm": 20.633642196655273, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 0.41328901052474976, "logits/rejected": 0.3950084447860718, "logps/chosen": -80.57633972167969, "logps/ref_chosen": -66.59879302978516, "logps/ref_rejected": -74.337158203125, "logps/rejected": -98.07449340820312, "loss": 1.0541, "margin_dpo/margin_mean": 9.75979232788086, "margin_dpo/margin_std": 14.91418170928955, "step": 308 }, { "epoch": 0.4671201814058957, "fcm_dpo/beta": 0.06552757322788239, "fcm_dpo/delta": -0.05448343604803085, "fcm_dpo/margin": 9.916336059570312, "fcm_dpo/q_t": 0.36798107624053955, "grad_norm": 24.828540802001953, "learning_rate": 3.229664715194511e-07, "logits/chosen": 0.47302621603012085, "logits/rejected": 0.4183216691017151, "logps/chosen": -81.97295379638672, "logps/ref_chosen": -65.39474487304688, "logps/ref_rejected": -75.70930480957031, "logps/rejected": -102.20384979248047, "loss": 1.0241, "margin_dpo/margin_mean": 9.916337966918945, "margin_dpo/margin_std": 14.151466369628906, "step": 309 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.0674559623003006, "fcm_dpo/delta": 0.19796836376190186, "fcm_dpo/margin": 6.087133407592773, "fcm_dpo/q_t": 0.4217107594013214, "grad_norm": 27.96128273010254, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.45399510860443115, "logits/rejected": 0.4443548321723938, "logps/chosen": -91.49375915527344, "logps/ref_chosen": -74.66827392578125, "logps/ref_rejected": -80.5689697265625, "logps/rejected": -103.48159790039062, "loss": 1.2421, "margin_dpo/margin_mean": 6.087133407592773, "margin_dpo/margin_std": 14.875473976135254, "step": 310 }, { "epoch": 0.47014361300075586, "fcm_dpo/beta": 0.06830902397632599, "fcm_dpo/delta": 0.02322382479906082, "fcm_dpo/margin": 8.457318305969238, "fcm_dpo/q_t": 0.38491734862327576, "grad_norm": 25.18285369873047, "learning_rate": 3.204331392103574e-07, "logits/chosen": 0.3568248450756073, "logits/rejected": 0.24125118553638458, "logps/chosen": -72.52886962890625, "logps/ref_chosen": -59.738033294677734, "logps/ref_rejected": -93.60757446289062, "logps/rejected": -114.85572814941406, "loss": 1.0963, "margin_dpo/margin_mean": 8.457318305969238, "margin_dpo/margin_std": 14.571065902709961, "step": 311 }, { "epoch": 0.47165532879818595, "fcm_dpo/beta": 0.06694841384887695, "fcm_dpo/delta": -0.21142852306365967, "fcm_dpo/margin": 11.876443862915039, "fcm_dpo/q_t": 0.3356952965259552, "grad_norm": 20.244081497192383, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 0.43080633878707886, "logits/rejected": 0.3392553925514221, "logps/chosen": -67.07718658447266, "logps/ref_chosen": -53.816436767578125, "logps/ref_rejected": -68.6575698852539, "logps/rejected": -93.79476928710938, "loss": 0.9214, "margin_dpo/margin_mean": 11.876442909240723, "margin_dpo/margin_std": 13.817426681518555, "step": 312 }, { "epoch": 0.47316704459561604, "fcm_dpo/beta": 0.06668175011873245, "fcm_dpo/delta": 0.09295180439949036, "fcm_dpo/margin": 7.676837921142578, "fcm_dpo/q_t": 0.4016116261482239, "grad_norm": 23.693931579589844, "learning_rate": 3.178919262911314e-07, "logits/chosen": 0.4855688810348511, "logits/rejected": 0.4634004533290863, "logps/chosen": -72.90442657470703, "logps/ref_chosen": -59.957359313964844, "logps/ref_rejected": -69.31729888916016, "logps/rejected": -89.94120788574219, "loss": 1.173, "margin_dpo/margin_mean": 7.676837921142578, "margin_dpo/margin_std": 15.700519561767578, "step": 313 }, { "epoch": 0.47467876039304613, "fcm_dpo/beta": 0.06443378329277039, "fcm_dpo/delta": -0.2552639842033386, "fcm_dpo/margin": 12.927923202514648, "fcm_dpo/q_t": 0.33266928791999817, "grad_norm": 20.972042083740234, "learning_rate": 3.166184534225087e-07, "logits/chosen": 0.42626792192459106, "logits/rejected": 0.44356074929237366, "logps/chosen": -82.15957641601562, "logps/ref_chosen": -70.26815795898438, "logps/ref_rejected": -69.23971557617188, "logps/rejected": -94.05905151367188, "loss": 0.8906, "margin_dpo/margin_mean": 12.927923202514648, "margin_dpo/margin_std": 14.38834285736084, "step": 314 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.0640818327665329, "fcm_dpo/delta": -0.038807280361652374, "fcm_dpo/margin": 9.904523849487305, "fcm_dpo/q_t": 0.36713922023773193, "grad_norm": 20.750200271606445, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.3727494478225708, "logits/rejected": 0.3308834433555603, "logps/chosen": -81.04085540771484, "logps/ref_chosen": -67.79469299316406, "logps/ref_rejected": -74.55148315429688, "logps/rejected": -97.70216369628906, "loss": 1.0285, "margin_dpo/margin_mean": 9.904522895812988, "margin_dpo/margin_std": 13.835953712463379, "step": 315 }, { "epoch": 0.47770219198790626, "fcm_dpo/beta": 0.062294527888298035, "fcm_dpo/delta": -0.20774495601654053, "fcm_dpo/margin": 12.683357238769531, "fcm_dpo/q_t": 0.33479374647140503, "grad_norm": 22.39960289001465, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 0.34574979543685913, "logits/rejected": 0.23477637767791748, "logps/chosen": -67.19189453125, "logps/ref_chosen": -55.288482666015625, "logps/ref_rejected": -96.15723419189453, "logps/rejected": -120.74400329589844, "loss": 0.9071, "margin_dpo/margin_mean": 12.683357238769531, "margin_dpo/margin_std": 13.058753967285156, "step": 316 }, { "epoch": 0.47921390778533635, "fcm_dpo/beta": 0.058482684195041656, "fcm_dpo/delta": -0.22425857186317444, "fcm_dpo/margin": 13.763182640075684, "fcm_dpo/q_t": 0.33089691400527954, "grad_norm": 21.47138786315918, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 0.46589696407318115, "logits/rejected": 0.4175897240638733, "logps/chosen": -65.06788635253906, "logps/ref_chosen": -54.58137512207031, "logps/ref_rejected": -72.77232360839844, "logps/rejected": -97.02201843261719, "loss": 0.8863, "margin_dpo/margin_mean": 13.763182640075684, "margin_dpo/margin_std": 14.472084045410156, "step": 317 }, { "epoch": 0.48072562358276644, "fcm_dpo/beta": 0.05881628394126892, "fcm_dpo/delta": 0.09629607200622559, "fcm_dpo/margin": 8.648832321166992, "fcm_dpo/q_t": 0.4013023376464844, "grad_norm": 22.732677459716797, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 0.4817764163017273, "logits/rejected": 0.420946329832077, "logps/chosen": -66.19764709472656, "logps/ref_chosen": -52.88822937011719, "logps/ref_rejected": -80.63988494873047, "logps/rejected": -102.59813690185547, "loss": 1.2008, "margin_dpo/margin_mean": 8.648832321166992, "margin_dpo/margin_std": 18.586837768554688, "step": 318 }, { "epoch": 0.48223733938019653, "fcm_dpo/beta": 0.058495644479990005, "fcm_dpo/delta": -0.06954564899206161, "fcm_dpo/margin": 11.364679336547852, "fcm_dpo/q_t": 0.3667501211166382, "grad_norm": 20.766460418701172, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 0.4081874191761017, "logits/rejected": 0.3880736231803894, "logps/chosen": -75.96013641357422, "logps/ref_chosen": -64.36333465576172, "logps/ref_rejected": -79.47296142578125, "logps/rejected": -102.43443298339844, "loss": 1.0272, "margin_dpo/margin_mean": 11.364680290222168, "margin_dpo/margin_std": 16.68465805053711, "step": 319 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.05962809920310974, "fcm_dpo/delta": 0.043348222970962524, "fcm_dpo/margin": 9.29432201385498, "fcm_dpo/q_t": 0.3930048942565918, "grad_norm": 21.635061264038086, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.32784324884414673, "logits/rejected": 0.2866894602775574, "logps/chosen": -60.18971633911133, "logps/ref_chosen": -49.558746337890625, "logps/ref_rejected": -71.23444366455078, "logps/rejected": -91.15972900390625, "loss": 1.1116, "margin_dpo/margin_mean": 9.29432201385498, "margin_dpo/margin_std": 15.848209381103516, "step": 320 }, { "epoch": 0.4852607709750567, "fcm_dpo/beta": 0.05789444223046303, "fcm_dpo/delta": -0.04901197552680969, "fcm_dpo/margin": 11.1171236038208, "fcm_dpo/q_t": 0.3659276068210602, "grad_norm": 22.60474967956543, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 0.40224313735961914, "logits/rejected": 0.3855370283126831, "logps/chosen": -64.09539794921875, "logps/ref_chosen": -52.08526611328125, "logps/ref_rejected": -55.58674621582031, "logps/rejected": -78.71399688720703, "loss": 1.0382, "margin_dpo/margin_mean": 11.1171236038208, "margin_dpo/margin_std": 15.96502685546875, "step": 321 }, { "epoch": 0.48677248677248675, "fcm_dpo/beta": 0.056709811091423035, "fcm_dpo/delta": -0.2197633981704712, "fcm_dpo/margin": 14.153626441955566, "fcm_dpo/q_t": 0.33018940687179565, "grad_norm": 18.229646682739258, "learning_rate": 3.063665887884511e-07, "logits/chosen": 0.466824471950531, "logits/rejected": 0.39176371693611145, "logps/chosen": -61.10657501220703, "logps/ref_chosen": -47.404109954833984, "logps/ref_rejected": -73.4260025024414, "logps/rejected": -101.28208923339844, "loss": 0.888, "margin_dpo/margin_mean": 14.153627395629883, "margin_dpo/margin_std": 14.78386116027832, "step": 322 }, { "epoch": 0.48828420256991684, "fcm_dpo/beta": 0.05662210285663605, "fcm_dpo/delta": 0.04281177371740341, "fcm_dpo/margin": 9.853229522705078, "fcm_dpo/q_t": 0.3951577842235565, "grad_norm": 21.729673385620117, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 0.36065399646759033, "logits/rejected": 0.29074978828430176, "logps/chosen": -84.03905487060547, "logps/ref_chosen": -70.00630187988281, "logps/ref_rejected": -86.96690368652344, "logps/rejected": -110.8528823852539, "loss": 1.1531, "margin_dpo/margin_mean": 9.853229522705078, "margin_dpo/margin_std": 19.110153198242188, "step": 323 }, { "epoch": 0.4897959183673469, "fcm_dpo/beta": 0.055441729724407196, "fcm_dpo/delta": -0.06376861035823822, "fcm_dpo/margin": 11.880046844482422, "fcm_dpo/q_t": 0.3605431914329529, "grad_norm": 22.298242568969727, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 0.48270517587661743, "logits/rejected": 0.4262162446975708, "logps/chosen": -67.59870147705078, "logps/ref_chosen": -55.88882064819336, "logps/ref_rejected": -75.23088073730469, "logps/rejected": -98.82080078125, "loss": 0.9783, "margin_dpo/margin_mean": 11.880046844482422, "margin_dpo/margin_std": 14.770977973937988, "step": 324 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.05457047373056412, "fcm_dpo/delta": -0.04917249083518982, "fcm_dpo/margin": 11.787601470947266, "fcm_dpo/q_t": 0.3657293915748596, "grad_norm": 19.67741584777832, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.43900197744369507, "logits/rejected": 0.37955278158187866, "logps/chosen": -79.83528137207031, "logps/ref_chosen": -64.14701843261719, "logps/ref_rejected": -79.91143798828125, "logps/rejected": -107.38729858398438, "loss": 1.0173, "margin_dpo/margin_mean": 11.787601470947266, "margin_dpo/margin_std": 16.2464542388916, "step": 325 }, { "epoch": 0.4928193499622071, "fcm_dpo/beta": 0.056850794702768326, "fcm_dpo/delta": 0.23083430528640747, "fcm_dpo/margin": 6.6530256271362305, "fcm_dpo/q_t": 0.42433783411979675, "grad_norm": 23.759443283081055, "learning_rate": 3.012016670162977e-07, "logits/chosen": 0.40460771322250366, "logits/rejected": 0.40543869137763977, "logps/chosen": -95.39392852783203, "logps/ref_chosen": -75.53131103515625, "logps/ref_rejected": -76.5898666381836, "logps/rejected": -103.10550689697266, "loss": 1.2501, "margin_dpo/margin_mean": 6.6530256271362305, "margin_dpo/margin_std": 16.712099075317383, "step": 326 }, { "epoch": 0.4943310657596372, "fcm_dpo/beta": 0.05837913602590561, "fcm_dpo/delta": 0.055630847811698914, "fcm_dpo/margin": 9.364798545837402, "fcm_dpo/q_t": 0.39527618885040283, "grad_norm": 25.109912872314453, "learning_rate": 2.99906765620341e-07, "logits/chosen": 0.35513800382614136, "logits/rejected": 0.31463754177093506, "logps/chosen": -87.85820770263672, "logps/ref_chosen": -69.33717346191406, "logps/ref_rejected": -73.37751770019531, "logps/rejected": -101.26336669921875, "loss": 1.1821, "margin_dpo/margin_mean": 9.364799499511719, "margin_dpo/margin_std": 19.344242095947266, "step": 327 }, { "epoch": 0.4958427815570673, "fcm_dpo/beta": 0.05786886066198349, "fcm_dpo/delta": -0.04748005419969559, "fcm_dpo/margin": 11.136017799377441, "fcm_dpo/q_t": 0.37086835503578186, "grad_norm": 21.607135772705078, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 0.4023295044898987, "logits/rejected": 0.37287038564682007, "logps/chosen": -76.78651428222656, "logps/ref_chosen": -61.70623016357422, "logps/ref_rejected": -83.73808288574219, "logps/rejected": -109.95437622070312, "loss": 1.0223, "margin_dpo/margin_mean": 11.136016845703125, "margin_dpo/margin_std": 16.010103225708008, "step": 328 }, { "epoch": 0.4973544973544973, "fcm_dpo/beta": 0.056619107723236084, "fcm_dpo/delta": -0.11470720171928406, "fcm_dpo/margin": 12.474262237548828, "fcm_dpo/q_t": 0.35940489172935486, "grad_norm": 22.656681060791016, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 0.49048396944999695, "logits/rejected": 0.4374903738498688, "logps/chosen": -82.60576629638672, "logps/ref_chosen": -64.4984130859375, "logps/ref_rejected": -83.6591796875, "logps/rejected": -114.24079132080078, "loss": 0.9853, "margin_dpo/margin_mean": 12.474262237548828, "margin_dpo/margin_std": 16.834518432617188, "step": 329 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.05554806441068649, "fcm_dpo/delta": -0.13988427817821503, "fcm_dpo/margin": 13.138544082641602, "fcm_dpo/q_t": 0.3539305031299591, "grad_norm": 19.861940383911133, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.4434637427330017, "logits/rejected": 0.37669873237609863, "logps/chosen": -69.35079193115234, "logps/ref_chosen": -54.80464172363281, "logps/ref_rejected": -75.3194351196289, "logps/rejected": -103.00413513183594, "loss": 1.0449, "margin_dpo/margin_mean": 13.138545036315918, "margin_dpo/margin_std": 19.897079467773438, "step": 330 }, { "epoch": 0.5003779289493575, "fcm_dpo/beta": 0.05216227471828461, "fcm_dpo/delta": -0.3423612713813782, "fcm_dpo/margin": 17.468751907348633, "fcm_dpo/q_t": 0.31727951765060425, "grad_norm": 18.028335571289062, "learning_rate": 2.947135628327544e-07, "logits/chosen": 0.5425066947937012, "logits/rejected": 0.5136238932609558, "logps/chosen": -75.56310272216797, "logps/ref_chosen": -59.242584228515625, "logps/ref_rejected": -69.87483215332031, "logps/rejected": -103.66410827636719, "loss": 0.8929, "margin_dpo/margin_mean": 17.468753814697266, "margin_dpo/margin_std": 20.137441635131836, "step": 331 }, { "epoch": 0.5018896447467877, "fcm_dpo/beta": 0.050989434123039246, "fcm_dpo/delta": -0.1064976155757904, "fcm_dpo/margin": 13.684735298156738, "fcm_dpo/q_t": 0.3593630790710449, "grad_norm": 18.97589111328125, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.4010659456253052, "logits/rejected": 0.3557261824607849, "logps/chosen": -85.42317199707031, "logps/ref_chosen": -67.10975646972656, "logps/ref_rejected": -77.11839294433594, "logps/rejected": -109.11653900146484, "loss": 0.9728, "margin_dpo/margin_mean": 13.684735298156738, "margin_dpo/margin_std": 17.230125427246094, "step": 332 }, { "epoch": 0.5034013605442177, "fcm_dpo/beta": 0.04889250546693802, "fcm_dpo/delta": -0.1948154717683792, "fcm_dpo/margin": 15.953669548034668, "fcm_dpo/q_t": 0.3400271236896515, "grad_norm": 16.75796890258789, "learning_rate": 2.921093116725076e-07, "logits/chosen": 0.47107183933258057, "logits/rejected": 0.40521296858787537, "logps/chosen": -77.97359466552734, "logps/ref_chosen": -58.381134033203125, "logps/ref_rejected": -85.02839660644531, "logps/rejected": -120.57453155517578, "loss": 0.9096, "margin_dpo/margin_mean": 15.953670501708984, "margin_dpo/margin_std": 17.924297332763672, "step": 333 }, { "epoch": 0.5049130763416477, "fcm_dpo/beta": 0.04890444874763489, "fcm_dpo/delta": 0.07669065147638321, "fcm_dpo/margin": 10.776724815368652, "fcm_dpo/q_t": 0.3956913352012634, "grad_norm": 21.935901641845703, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 0.47279083728790283, "logits/rejected": 0.4287077784538269, "logps/chosen": -87.2547378540039, "logps/ref_chosen": -66.89199829101562, "logps/ref_rejected": -91.83695220947266, "logps/rejected": -122.97640991210938, "loss": 1.1689, "margin_dpo/margin_mean": 10.776723861694336, "margin_dpo/margin_std": 21.717811584472656, "step": 334 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.048929620534181595, "fcm_dpo/delta": -0.038483135402202606, "fcm_dpo/margin": 12.993217468261719, "fcm_dpo/q_t": 0.37484511733055115, "grad_norm": 22.736284255981445, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.4713771939277649, "logits/rejected": 0.4355486035346985, "logps/chosen": -81.71247100830078, "logps/ref_chosen": -61.51445770263672, "logps/ref_rejected": -75.68916320800781, "logps/rejected": -108.8803939819336, "loss": 1.0595, "margin_dpo/margin_mean": 12.993217468261719, "margin_dpo/margin_std": 20.287212371826172, "step": 335 }, { "epoch": 0.5079365079365079, "fcm_dpo/beta": 0.04836486279964447, "fcm_dpo/delta": 0.02678491175174713, "fcm_dpo/margin": 11.85739517211914, "fcm_dpo/q_t": 0.38848936557769775, "grad_norm": 20.710521697998047, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 0.40805622935295105, "logits/rejected": 0.3888956606388092, "logps/chosen": -94.53564453125, "logps/ref_chosen": -68.85006713867188, "logps/ref_rejected": -92.99603271484375, "logps/rejected": -130.53900146484375, "loss": 1.0894, "margin_dpo/margin_mean": 11.857397079467773, "margin_dpo/margin_std": 19.84499740600586, "step": 336 }, { "epoch": 0.509448223733938, "fcm_dpo/beta": 0.049236997961997986, "fcm_dpo/delta": 0.05038725584745407, "fcm_dpo/margin": 11.218805313110352, "fcm_dpo/q_t": 0.3871212899684906, "grad_norm": 19.411130905151367, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 0.3875211775302887, "logits/rejected": 0.35309669375419617, "logps/chosen": -97.92208862304688, "logps/ref_chosen": -73.18783569335938, "logps/ref_rejected": -86.89118957519531, "logps/rejected": -122.84425354003906, "loss": 1.0883, "margin_dpo/margin_mean": 11.218805313110352, "margin_dpo/margin_std": 18.6793212890625, "step": 337 }, { "epoch": 0.5109599395313681, "fcm_dpo/beta": 0.04963863641023636, "fcm_dpo/delta": 0.037454307079315186, "fcm_dpo/margin": 11.375207901000977, "fcm_dpo/q_t": 0.38713350892066956, "grad_norm": 20.63473129272461, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 0.42949801683425903, "logits/rejected": 0.4031613767147064, "logps/chosen": -88.67749786376953, "logps/ref_chosen": -63.939613342285156, "logps/ref_rejected": -75.34243774414062, "logps/rejected": -111.45553588867188, "loss": 1.1082, "margin_dpo/margin_mean": 11.37520694732666, "margin_dpo/margin_std": 20.042879104614258, "step": 338 }, { "epoch": 0.5124716553287982, "fcm_dpo/beta": 0.04877196252346039, "fcm_dpo/delta": -0.09286697953939438, "fcm_dpo/margin": 14.053956031799316, "fcm_dpo/q_t": 0.36065682768821716, "grad_norm": 19.03911781311035, "learning_rate": 2.842694572172736e-07, "logits/chosen": 0.5791463255882263, "logits/rejected": 0.4974604845046997, "logps/chosen": -65.61044311523438, "logps/ref_chosen": -45.54913330078125, "logps/ref_rejected": -67.0482177734375, "logps/rejected": -101.16348266601562, "loss": 0.9885, "margin_dpo/margin_mean": 14.053956985473633, "margin_dpo/margin_std": 18.47199058532715, "step": 339 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.048321135342121124, "fcm_dpo/delta": -0.12813809514045715, "fcm_dpo/margin": 14.868640899658203, "fcm_dpo/q_t": 0.36593562364578247, "grad_norm": 19.047794342041016, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.4682982563972473, "logits/rejected": 0.44503962993621826, "logps/chosen": -78.10293579101562, "logps/ref_chosen": -54.00564956665039, "logps/ref_rejected": -61.314430236816406, "logps/rejected": -100.28036499023438, "loss": 1.065, "margin_dpo/margin_mean": 14.868640899658203, "margin_dpo/margin_std": 23.708301544189453, "step": 340 }, { "epoch": 0.5154950869236583, "fcm_dpo/beta": 0.04579398036003113, "fcm_dpo/delta": -0.11630547046661377, "fcm_dpo/margin": 15.230375289916992, "fcm_dpo/q_t": 0.3636672794818878, "grad_norm": 19.403221130371094, "learning_rate": 2.816481133934373e-07, "logits/chosen": 0.48653745651245117, "logits/rejected": 0.4402218759059906, "logps/chosen": -86.47044372558594, "logps/ref_chosen": -63.39509582519531, "logps/ref_rejected": -76.20973205566406, "logps/rejected": -114.51545715332031, "loss": 1.0637, "margin_dpo/margin_mean": 15.230375289916992, "margin_dpo/margin_std": 22.663076400756836, "step": 341 }, { "epoch": 0.5170068027210885, "fcm_dpo/beta": 0.04497259855270386, "fcm_dpo/delta": -0.1905035674571991, "fcm_dpo/margin": 17.22130012512207, "fcm_dpo/q_t": 0.348552405834198, "grad_norm": 16.44388198852539, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 0.5286259055137634, "logits/rejected": 0.48626136779785156, "logps/chosen": -76.10055541992188, "logps/ref_chosen": -53.047813415527344, "logps/ref_rejected": -68.2854232788086, "logps/rejected": -108.55946350097656, "loss": 0.954, "margin_dpo/margin_mean": 17.22130012512207, "margin_dpo/margin_std": 21.990345001220703, "step": 342 }, { "epoch": 0.5185185185185185, "fcm_dpo/beta": 0.04516913741827011, "fcm_dpo/delta": 0.15312719345092773, "fcm_dpo/margin": 10.011658668518066, "fcm_dpo/q_t": 0.40422749519348145, "grad_norm": 18.471750259399414, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 0.4091396927833557, "logits/rejected": 0.40280401706695557, "logps/chosen": -92.39411926269531, "logps/ref_chosen": -70.57852935791016, "logps/ref_rejected": -84.73873901367188, "logps/rejected": -116.56597900390625, "loss": 1.1685, "margin_dpo/margin_mean": 10.011658668518066, "margin_dpo/margin_std": 19.0828800201416, "step": 343 }, { "epoch": 0.5200302343159486, "fcm_dpo/beta": 0.04532856121659279, "fcm_dpo/delta": -0.08428593724966049, "fcm_dpo/margin": 14.950438499450684, "fcm_dpo/q_t": 0.3651253283023834, "grad_norm": 19.586933135986328, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 0.5108197927474976, "logits/rejected": 0.4525598883628845, "logps/chosen": -82.04597473144531, "logps/ref_chosen": -55.811004638671875, "logps/ref_rejected": -84.77637481689453, "logps/rejected": -125.96177673339844, "loss": 1.0386, "margin_dpo/margin_mean": 14.950438499450684, "margin_dpo/margin_std": 22.50165557861328, "step": 344 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.04496072232723236, "fcm_dpo/delta": -0.06477053463459015, "fcm_dpo/margin": 14.691195487976074, "fcm_dpo/q_t": 0.3674304783344269, "grad_norm": 18.69040870666504, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.5651530623435974, "logits/rejected": 0.5218570232391357, "logps/chosen": -76.76480102539062, "logps/ref_chosen": -57.78609848022461, "logps/ref_rejected": -78.91847229003906, "logps/rejected": -112.58836364746094, "loss": 1.038, "margin_dpo/margin_mean": 14.691194534301758, "margin_dpo/margin_std": 22.197650909423828, "step": 345 }, { "epoch": 0.5230536659108088, "fcm_dpo/beta": 0.04380536079406738, "fcm_dpo/delta": -0.18842804431915283, "fcm_dpo/margin": 17.673648834228516, "fcm_dpo/q_t": 0.3476044535636902, "grad_norm": 19.525056838989258, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 0.5473288297653198, "logits/rejected": 0.47578173875808716, "logps/chosen": -77.24856567382812, "logps/ref_chosen": -56.285125732421875, "logps/ref_rejected": -91.15303039550781, "logps/rejected": -129.7901153564453, "loss": 0.9867, "margin_dpo/margin_mean": 17.673648834228516, "margin_dpo/margin_std": 23.69552993774414, "step": 346 }, { "epoch": 0.5245653817082389, "fcm_dpo/beta": 0.04321366548538208, "fcm_dpo/delta": 0.0015529077500104904, "fcm_dpo/margin": 13.848976135253906, "fcm_dpo/q_t": 0.385122686624527, "grad_norm": 19.80738639831543, "learning_rate": 2.737640108260456e-07, "logits/chosen": 0.6206649541854858, "logits/rejected": 0.5723800659179688, "logps/chosen": -78.5164566040039, "logps/ref_chosen": -53.499542236328125, "logps/ref_rejected": -72.52565002441406, "logps/rejected": -111.39154815673828, "loss": 1.0826, "margin_dpo/margin_mean": 13.848976135253906, "margin_dpo/margin_std": 23.184226989746094, "step": 347 }, { "epoch": 0.5260770975056689, "fcm_dpo/beta": 0.04169192165136337, "fcm_dpo/delta": -0.18153540790081024, "fcm_dpo/margin": 18.360071182250977, "fcm_dpo/q_t": 0.35915493965148926, "grad_norm": 17.797361373901367, "learning_rate": 2.724474525774229e-07, "logits/chosen": 0.6300745606422424, "logits/rejected": 0.600340723991394, "logps/chosen": -72.2784194946289, "logps/ref_chosen": -50.78684997558594, "logps/ref_rejected": -68.63732147216797, "logps/rejected": -108.48896026611328, "loss": 1.005, "margin_dpo/margin_mean": 18.360071182250977, "margin_dpo/margin_std": 26.658245086669922, "step": 348 }, { "epoch": 0.527588813303099, "fcm_dpo/beta": 0.041053976863622665, "fcm_dpo/delta": -0.12168022990226746, "fcm_dpo/margin": 17.36975860595703, "fcm_dpo/q_t": 0.35872676968574524, "grad_norm": 18.761396408081055, "learning_rate": 2.711302664252973e-07, "logits/chosen": 0.5437895655632019, "logits/rejected": 0.4607025980949402, "logps/chosen": -75.03057861328125, "logps/ref_chosen": -53.325008392333984, "logps/ref_rejected": -83.21236419677734, "logps/rejected": -122.28768157958984, "loss": 1.002, "margin_dpo/margin_mean": 17.36975860595703, "margin_dpo/margin_std": 24.134292602539062, "step": 349 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.03879928961396217, "fcm_dpo/delta": -0.27879321575164795, "fcm_dpo/margin": 22.000972747802734, "fcm_dpo/q_t": 0.32647570967674255, "grad_norm": 18.498384475708008, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.5079820156097412, "logits/rejected": 0.4346851408481598, "logps/chosen": -85.52665710449219, "logps/ref_chosen": -61.625770568847656, "logps/ref_rejected": -87.63627624511719, "logps/rejected": -133.53811645507812, "loss": 0.8827, "margin_dpo/margin_mean": 22.000972747802734, "margin_dpo/margin_std": 24.152040481567383, "step": 350 }, { "epoch": 0.5306122448979592, "fcm_dpo/beta": 0.037720829248428345, "fcm_dpo/delta": -0.10465708374977112, "fcm_dpo/margin": 18.47935676574707, "fcm_dpo/q_t": 0.3536849617958069, "grad_norm": 15.722846031188965, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 0.4817102551460266, "logits/rejected": 0.4085959196090698, "logps/chosen": -79.06599426269531, "logps/ref_chosen": -56.2563362121582, "logps/ref_rejected": -79.11589813232422, "logps/rejected": -120.4049072265625, "loss": 1.0295, "margin_dpo/margin_mean": 18.47935676574707, "margin_dpo/margin_std": 26.676305770874023, "step": 351 }, { "epoch": 0.5321239606953893, "fcm_dpo/beta": 0.03718000277876854, "fcm_dpo/delta": -0.042777154594659805, "fcm_dpo/margin": 17.197158813476562, "fcm_dpo/q_t": 0.3716466426849365, "grad_norm": 17.15549087524414, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 0.5181227922439575, "logits/rejected": 0.46758154034614563, "logps/chosen": -85.7076644897461, "logps/ref_chosen": -63.05195236206055, "logps/ref_rejected": -85.52035522460938, "logps/rejected": -125.37321472167969, "loss": 1.0158, "margin_dpo/margin_mean": 17.197158813476562, "margin_dpo/margin_std": 24.20104217529297, "step": 352 }, { "epoch": 0.5336356764928194, "fcm_dpo/beta": 0.03698758780956268, "fcm_dpo/delta": -0.08900751918554306, "fcm_dpo/margin": 18.460941314697266, "fcm_dpo/q_t": 0.3600703477859497, "grad_norm": 17.056467056274414, "learning_rate": 2.658559799141411e-07, "logits/chosen": 0.5669195055961609, "logits/rejected": 0.5683079957962036, "logps/chosen": -90.16744232177734, "logps/ref_chosen": -69.00918579101562, "logps/ref_rejected": -72.65840148925781, "logps/rejected": -112.27760314941406, "loss": 1.0175, "margin_dpo/margin_mean": 18.460941314697266, "margin_dpo/margin_std": 26.078454971313477, "step": 353 }, { "epoch": 0.5351473922902494, "fcm_dpo/beta": 0.03558646887540817, "fcm_dpo/delta": -0.14204002916812897, "fcm_dpo/margin": 20.501020431518555, "fcm_dpo/q_t": 0.35099613666534424, "grad_norm": 18.90435028076172, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 0.5953085422515869, "logits/rejected": 0.4663833975791931, "logps/chosen": -61.88752365112305, "logps/ref_chosen": -39.78833770751953, "logps/ref_rejected": -69.56885528564453, "logps/rejected": -112.1690673828125, "loss": 0.9858, "margin_dpo/margin_mean": 20.501022338867188, "margin_dpo/margin_std": 27.029680252075195, "step": 354 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.03496289253234863, "fcm_dpo/delta": -0.1490025818347931, "fcm_dpo/margin": 21.115114212036133, "fcm_dpo/q_t": 0.34892308712005615, "grad_norm": 16.861616134643555, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.5601600408554077, "logits/rejected": 0.43914663791656494, "logps/chosen": -71.95262145996094, "logps/ref_chosen": -46.25537872314453, "logps/ref_rejected": -78.20236206054688, "logps/rejected": -125.01471710205078, "loss": 0.9849, "margin_dpo/margin_mean": 21.115114212036133, "margin_dpo/margin_std": 27.885860443115234, "step": 355 }, { "epoch": 0.5381708238851096, "fcm_dpo/beta": 0.03437604755163193, "fcm_dpo/delta": 0.0072648786008358, "fcm_dpo/margin": 17.235088348388672, "fcm_dpo/q_t": 0.3816342353820801, "grad_norm": 17.55934715270996, "learning_rate": 2.618954789559356e-07, "logits/chosen": 0.5410733222961426, "logits/rejected": 0.4691797196865082, "logps/chosen": -71.19071197509766, "logps/ref_chosen": -47.906158447265625, "logps/ref_rejected": -74.29397583007812, "logps/rejected": -114.81361389160156, "loss": 1.1678, "margin_dpo/margin_mean": 17.235088348388672, "margin_dpo/margin_std": 33.58668899536133, "step": 356 }, { "epoch": 0.5396825396825397, "fcm_dpo/beta": 0.03437814116477966, "fcm_dpo/delta": 0.050616808235645294, "fcm_dpo/margin": 15.968550682067871, "fcm_dpo/q_t": 0.38629966974258423, "grad_norm": 17.052623748779297, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 0.5412014722824097, "logits/rejected": 0.5130825042724609, "logps/chosen": -92.02487182617188, "logps/ref_chosen": -62.63500213623047, "logps/ref_rejected": -65.11399841308594, "logps/rejected": -110.47242736816406, "loss": 1.0998, "margin_dpo/margin_mean": 15.968551635742188, "margin_dpo/margin_std": 25.727272033691406, "step": 357 }, { "epoch": 0.5411942554799698, "fcm_dpo/beta": 0.034367240965366364, "fcm_dpo/delta": -0.15965090692043304, "fcm_dpo/margin": 21.760364532470703, "fcm_dpo/q_t": 0.3511369824409485, "grad_norm": 21.150100708007812, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 0.4965895712375641, "logits/rejected": 0.5016806125640869, "logps/chosen": -94.08679962158203, "logps/ref_chosen": -67.20960998535156, "logps/ref_rejected": -69.34715270996094, "logps/rejected": -117.98471069335938, "loss": 0.978, "margin_dpo/margin_mean": 21.760364532470703, "margin_dpo/margin_std": 28.61833953857422, "step": 358 }, { "epoch": 0.5427059712773998, "fcm_dpo/beta": 0.035116057842969894, "fcm_dpo/delta": 0.17041327059268951, "fcm_dpo/margin": 12.320904731750488, "fcm_dpo/q_t": 0.4136947691440582, "grad_norm": 18.667757034301758, "learning_rate": 2.579319833745169e-07, "logits/chosen": 0.5113773941993713, "logits/rejected": 0.4831411838531494, "logps/chosen": -92.3634033203125, "logps/ref_chosen": -62.52578353881836, "logps/ref_rejected": -76.63114929199219, "logps/rejected": -118.7896728515625, "loss": 1.1787, "margin_dpo/margin_mean": 12.320904731750488, "margin_dpo/margin_std": 24.88362693786621, "step": 359 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.03468718379735947, "fcm_dpo/delta": -0.04342162236571312, "fcm_dpo/margin": 18.45904541015625, "fcm_dpo/q_t": 0.37331700325012207, "grad_norm": 17.929542541503906, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.45557552576065063, "logits/rejected": 0.3662692904472351, "logps/chosen": -93.12303161621094, "logps/ref_chosen": -63.48772048950195, "logps/ref_rejected": -90.6891098022461, "logps/rejected": -138.78347778320312, "loss": 1.0288, "margin_dpo/margin_mean": 18.45904541015625, "margin_dpo/margin_std": 27.04231834411621, "step": 360 }, { "epoch": 0.54572940287226, "fcm_dpo/beta": 0.03404659405350685, "fcm_dpo/delta": -0.1389157474040985, "fcm_dpo/margin": 21.408405303955078, "fcm_dpo/q_t": 0.3507522940635681, "grad_norm": 20.36210823059082, "learning_rate": 2.552884820191154e-07, "logits/chosen": 0.6088442206382751, "logits/rejected": 0.5571874380111694, "logps/chosen": -86.91483306884766, "logps/ref_chosen": -57.917144775390625, "logps/ref_rejected": -72.39089965820312, "logps/rejected": -122.7969970703125, "loss": 0.973, "margin_dpo/margin_mean": 21.408405303955078, "margin_dpo/margin_std": 27.88947296142578, "step": 361 }, { "epoch": 0.54724111866969, "fcm_dpo/beta": 0.033488351851701736, "fcm_dpo/delta": -0.12048141658306122, "fcm_dpo/margin": 21.240074157714844, "fcm_dpo/q_t": 0.3562188744544983, "grad_norm": 17.9212589263916, "learning_rate": 2.53966490958702e-07, "logits/chosen": 0.6075149774551392, "logits/rejected": 0.5085099935531616, "logps/chosen": -91.7391357421875, "logps/ref_chosen": -63.4434700012207, "logps/ref_rejected": -103.45516967773438, "logps/rejected": -152.99090576171875, "loss": 1.004, "margin_dpo/margin_mean": 21.240074157714844, "margin_dpo/margin_std": 29.03826141357422, "step": 362 }, { "epoch": 0.5487528344671202, "fcm_dpo/beta": 0.032427407801151276, "fcm_dpo/delta": -0.09851216524839401, "fcm_dpo/margin": 21.322847366333008, "fcm_dpo/q_t": 0.3581845164299011, "grad_norm": 19.257793426513672, "learning_rate": 2.526443889470099e-07, "logits/chosen": 0.6183967590332031, "logits/rejected": 0.49492183327674866, "logps/chosen": -81.70713806152344, "logps/ref_chosen": -48.65182876586914, "logps/ref_rejected": -88.65904235839844, "logps/rejected": -143.03720092773438, "loss": 0.9931, "margin_dpo/margin_mean": 21.322847366333008, "margin_dpo/margin_std": 28.826107025146484, "step": 363 }, { "epoch": 0.5502645502645502, "fcm_dpo/beta": 0.03153586387634277, "fcm_dpo/delta": -0.14443664252758026, "fcm_dpo/margin": 23.249065399169922, "fcm_dpo/q_t": 0.35703152418136597, "grad_norm": 15.726482391357422, "learning_rate": 2.513222129660744e-07, "logits/chosen": 0.45400315523147583, "logits/rejected": 0.36352038383483887, "logps/chosen": -86.7956314086914, "logps/ref_chosen": -57.87107467651367, "logps/ref_rejected": -80.95503234863281, "logps/rejected": -133.12864685058594, "loss": 1.0311, "margin_dpo/margin_mean": 23.249065399169922, "margin_dpo/margin_std": 34.755950927734375, "step": 364 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.031077580526471138, "fcm_dpo/delta": -0.06097899004817009, "fcm_dpo/margin": 21.13581085205078, "fcm_dpo/q_t": 0.36084866523742676, "grad_norm": 14.798433303833008, "learning_rate": 2.5e-07, "logits/chosen": 0.5402770042419434, "logits/rejected": 0.5327832102775574, "logps/chosen": -89.29142761230469, "logps/ref_chosen": -64.94217681884766, "logps/ref_rejected": -74.8599853515625, "logps/rejected": -120.34504699707031, "loss": 0.977, "margin_dpo/margin_mean": 21.135812759399414, "margin_dpo/margin_std": 25.899639129638672, "step": 365 }, { "epoch": 0.5532879818594104, "fcm_dpo/beta": 0.031172068789601326, "fcm_dpo/delta": -0.010473225265741348, "fcm_dpo/margin": 19.541061401367188, "fcm_dpo/q_t": 0.3788078725337982, "grad_norm": 16.791519165039062, "learning_rate": 2.486777870339255e-07, "logits/chosen": 0.4921402931213379, "logits/rejected": 0.4748532474040985, "logps/chosen": -78.04362487792969, "logps/ref_chosen": -55.16598129272461, "logps/ref_rejected": -65.26121520996094, "logps/rejected": -107.67991638183594, "loss": 1.0843, "margin_dpo/margin_mean": 19.541061401367188, "margin_dpo/margin_std": 31.972339630126953, "step": 366 }, { "epoch": 0.5547996976568406, "fcm_dpo/beta": 0.030978696420788765, "fcm_dpo/delta": 0.030442271381616592, "fcm_dpo/margin": 18.435543060302734, "fcm_dpo/q_t": 0.3786276578903198, "grad_norm": 17.63971710205078, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 0.5248080492019653, "logits/rejected": 0.4257400929927826, "logps/chosen": -84.86387634277344, "logps/ref_chosen": -56.01046371459961, "logps/ref_rejected": -77.31010437011719, "logps/rejected": -124.59906768798828, "loss": 1.0777, "margin_dpo/margin_mean": 18.435543060302734, "margin_dpo/margin_std": 29.5982666015625, "step": 367 }, { "epoch": 0.5563114134542706, "fcm_dpo/beta": 0.031203612685203552, "fcm_dpo/delta": 0.0020916834473609924, "fcm_dpo/margin": 19.164134979248047, "fcm_dpo/q_t": 0.3759109377861023, "grad_norm": 17.352895736694336, "learning_rate": 2.46033509041298e-07, "logits/chosen": 0.40486133098602295, "logits/rejected": 0.39880359172821045, "logps/chosen": -106.20714569091797, "logps/ref_chosen": -74.82927703857422, "logps/ref_rejected": -76.11680603027344, "logps/rejected": -126.65880584716797, "loss": 1.066, "margin_dpo/margin_mean": 19.164134979248047, "margin_dpo/margin_std": 29.87588882446289, "step": 368 }, { "epoch": 0.5578231292517006, "fcm_dpo/beta": 0.031594499945640564, "fcm_dpo/delta": 0.048815835267305374, "fcm_dpo/margin": 17.495262145996094, "fcm_dpo/q_t": 0.3873006999492645, "grad_norm": 17.4019832611084, "learning_rate": 2.447115179808846e-07, "logits/chosen": 0.5595611929893494, "logits/rejected": 0.5114997029304504, "logps/chosen": -87.9697494506836, "logps/ref_chosen": -58.32621765136719, "logps/ref_rejected": -80.92183685302734, "logps/rejected": -128.0606231689453, "loss": 1.094, "margin_dpo/margin_mean": 17.495262145996094, "margin_dpo/margin_std": 29.117238998413086, "step": 369 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.030709534883499146, "fcm_dpo/delta": -0.15342025458812714, "fcm_dpo/margin": 24.153587341308594, "fcm_dpo/q_t": 0.35305267572402954, "grad_norm": 16.30162811279297, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.6652737259864807, "logits/rejected": 0.5999115705490112, "logps/chosen": -82.10697937011719, "logps/ref_chosen": -52.88372039794922, "logps/ref_rejected": -79.43692016601562, "logps/rejected": -132.8137664794922, "loss": 1.0301, "margin_dpo/margin_mean": 24.153587341308594, "margin_dpo/margin_std": 36.119441986083984, "step": 370 }, { "epoch": 0.5608465608465608, "fcm_dpo/beta": 0.030408132821321487, "fcm_dpo/delta": -0.07384546846151352, "fcm_dpo/margin": 21.980266571044922, "fcm_dpo/q_t": 0.3661743402481079, "grad_norm": 17.487380981445312, "learning_rate": 2.420680166254831e-07, "logits/chosen": 0.6762181520462036, "logits/rejected": 0.6435130834579468, "logps/chosen": -78.06260681152344, "logps/ref_chosen": -49.224212646484375, "logps/ref_rejected": -63.348472595214844, "logps/rejected": -114.16712951660156, "loss": 1.0248, "margin_dpo/margin_mean": 21.980266571044922, "margin_dpo/margin_std": 31.435279846191406, "step": 371 }, { "epoch": 0.562358276643991, "fcm_dpo/beta": 0.031202610582113266, "fcm_dpo/delta": 0.17676496505737305, "fcm_dpo/margin": 13.67655086517334, "fcm_dpo/q_t": 0.41596049070358276, "grad_norm": 21.666107177734375, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 0.5750303268432617, "logits/rejected": 0.4893125593662262, "logps/chosen": -84.21045684814453, "logps/ref_chosen": -52.269554138183594, "logps/ref_rejected": -72.99522399902344, "logps/rejected": -118.61267852783203, "loss": 1.3036, "margin_dpo/margin_mean": 13.676551818847656, "margin_dpo/margin_std": 37.41542053222656, "step": 372 }, { "epoch": 0.563869992441421, "fcm_dpo/beta": 0.03156846761703491, "fcm_dpo/delta": 0.07378996908664703, "fcm_dpo/margin": 16.784202575683594, "fcm_dpo/q_t": 0.4008631110191345, "grad_norm": 24.01235008239746, "learning_rate": 2.394254027623792e-07, "logits/chosen": 0.5762922763824463, "logits/rejected": 0.5083121061325073, "logps/chosen": -97.91470336914062, "logps/ref_chosen": -61.112998962402344, "logps/ref_rejected": -76.24851989746094, "logps/rejected": -129.8344268798828, "loss": 1.2322, "margin_dpo/margin_mean": 16.784204483032227, "margin_dpo/margin_std": 38.091888427734375, "step": 373 }, { "epoch": 0.5653817082388511, "fcm_dpo/beta": 0.02992125041782856, "fcm_dpo/delta": -0.3461046814918518, "fcm_dpo/margin": 30.510740280151367, "fcm_dpo/q_t": 0.31146040558815, "grad_norm": 19.470178604125977, "learning_rate": 2.381045210440644e-07, "logits/chosen": 0.47531217336654663, "logits/rejected": 0.46916258335113525, "logps/chosen": -101.13727569580078, "logps/ref_chosen": -72.66920471191406, "logps/ref_rejected": -76.83158874511719, "logps/rejected": -135.81039428710938, "loss": 0.8386, "margin_dpo/margin_mean": 30.510740280151367, "margin_dpo/margin_std": 30.510784149169922, "step": 374 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.029693206772208214, "fcm_dpo/delta": 0.06657587736845016, "fcm_dpo/margin": 18.084556579589844, "fcm_dpo/q_t": 0.3913113474845886, "grad_norm": 20.931381225585938, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.5903283953666687, "logits/rejected": 0.537385106086731, "logps/chosen": -86.04524993896484, "logps/ref_chosen": -57.68330383300781, "logps/ref_rejected": -79.34097290039062, "logps/rejected": -125.78746795654297, "loss": 1.1199, "margin_dpo/margin_mean": 18.08455467224121, "margin_dpo/margin_std": 32.31146240234375, "step": 375 }, { "epoch": 0.5684051398337112, "fcm_dpo/beta": 0.02892487682402134, "fcm_dpo/delta": -0.2050780951976776, "fcm_dpo/margin": 27.252056121826172, "fcm_dpo/q_t": 0.34515899419784546, "grad_norm": 19.573123931884766, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 0.6316548585891724, "logits/rejected": 0.5691288709640503, "logps/chosen": -83.20816040039062, "logps/ref_chosen": -51.674072265625, "logps/ref_rejected": -75.69713592529297, "logps/rejected": -134.4832763671875, "loss": 0.9825, "margin_dpo/margin_mean": 27.252056121826172, "margin_dpo/margin_std": 36.99662780761719, "step": 376 }, { "epoch": 0.5699168556311414, "fcm_dpo/beta": 0.02893291413784027, "fcm_dpo/delta": 0.02121102437376976, "fcm_dpo/margin": 20.025833129882812, "fcm_dpo/q_t": 0.3795938193798065, "grad_norm": 17.584617614746094, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 0.5613811016082764, "logits/rejected": 0.5406124591827393, "logps/chosen": -79.32124328613281, "logps/ref_chosen": -46.17853546142578, "logps/ref_rejected": -57.756500244140625, "logps/rejected": -110.92504119873047, "loss": 1.0804, "margin_dpo/margin_mean": 20.025833129882812, "margin_dpo/margin_std": 32.1743278503418, "step": 377 }, { "epoch": 0.5714285714285714, "fcm_dpo/beta": 0.029261738061904907, "fcm_dpo/delta": 0.07294605672359467, "fcm_dpo/margin": 18.119808197021484, "fcm_dpo/q_t": 0.39253151416778564, "grad_norm": 17.491439819335938, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 0.6229523420333862, "logits/rejected": 0.5752782225608826, "logps/chosen": -90.11911010742188, "logps/ref_chosen": -59.21887969970703, "logps/ref_rejected": -71.24818420410156, "logps/rejected": -120.26823425292969, "loss": 1.118, "margin_dpo/margin_mean": 18.119808197021484, "margin_dpo/margin_std": 31.92162322998047, "step": 378 }, { "epoch": 0.5729402872260015, "fcm_dpo/beta": 0.028815243393182755, "fcm_dpo/delta": -0.10706745088100433, "fcm_dpo/margin": 24.281211853027344, "fcm_dpo/q_t": 0.36030155420303345, "grad_norm": 18.351364135742188, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 0.600252628326416, "logits/rejected": 0.535982072353363, "logps/chosen": -104.5278091430664, "logps/ref_chosen": -76.31658935546875, "logps/ref_rejected": -104.26200103759766, "logps/rejected": -156.75442504882812, "loss": 0.9933, "margin_dpo/margin_mean": 24.281211853027344, "margin_dpo/margin_std": 33.16697692871094, "step": 379 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.027855779975652695, "fcm_dpo/delta": -0.18078401684761047, "fcm_dpo/margin": 27.52959442138672, "fcm_dpo/q_t": 0.3419705629348755, "grad_norm": 18.710771560668945, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.5741807818412781, "logits/rejected": 0.5386440753936768, "logps/chosen": -86.78734588623047, "logps/ref_chosen": -61.283164978027344, "logps/ref_rejected": -72.38892364501953, "logps/rejected": -125.42269897460938, "loss": 0.9754, "margin_dpo/margin_mean": 27.52959442138672, "margin_dpo/margin_std": 35.956398010253906, "step": 380 }, { "epoch": 0.5759637188208617, "fcm_dpo/beta": 0.029031910002231598, "fcm_dpo/delta": 0.2659192681312561, "fcm_dpo/margin": 11.643007278442383, "fcm_dpo/q_t": 0.4340417981147766, "grad_norm": 19.243091583251953, "learning_rate": 2.288697335747027e-07, "logits/chosen": 0.5794563293457031, "logits/rejected": 0.5545228123664856, "logps/chosen": -92.53129577636719, "logps/ref_chosen": -58.2139892578125, "logps/ref_rejected": -60.78669357299805, "logps/rejected": -106.74700927734375, "loss": 1.2855, "margin_dpo/margin_mean": 11.6430082321167, "margin_dpo/margin_std": 32.82004928588867, "step": 381 }, { "epoch": 0.5774754346182918, "fcm_dpo/beta": 0.02958078868687153, "fcm_dpo/delta": 0.04316580295562744, "fcm_dpo/margin": 18.832475662231445, "fcm_dpo/q_t": 0.38638246059417725, "grad_norm": 19.289762496948242, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 0.6157612800598145, "logits/rejected": 0.5689198970794678, "logps/chosen": -97.4847412109375, "logps/ref_chosen": -61.82532501220703, "logps/ref_rejected": -83.0452880859375, "logps/rejected": -137.53717041015625, "loss": 1.0714, "margin_dpo/margin_mean": 18.832475662231445, "margin_dpo/margin_std": 29.27760887145996, "step": 382 }, { "epoch": 0.5789871504157218, "fcm_dpo/beta": 0.028668655082583427, "fcm_dpo/delta": -0.11247755587100983, "fcm_dpo/margin": 24.513856887817383, "fcm_dpo/q_t": 0.363398015499115, "grad_norm": 19.029403686523438, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 0.48890191316604614, "logits/rejected": 0.5110622644424438, "logps/chosen": -112.27488708496094, "logps/ref_chosen": -80.56326293945312, "logps/ref_rejected": -74.62922668457031, "logps/rejected": -130.85470581054688, "loss": 1.0314, "margin_dpo/margin_mean": 24.513858795166016, "margin_dpo/margin_std": 36.4691162109375, "step": 383 }, { "epoch": 0.5804988662131519, "fcm_dpo/beta": 0.028615575283765793, "fcm_dpo/delta": -0.03850318491458893, "fcm_dpo/margin": 22.226734161376953, "fcm_dpo/q_t": 0.3695475459098816, "grad_norm": 17.54273796081543, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 0.6078575849533081, "logits/rejected": 0.5643937587738037, "logps/chosen": -99.3906478881836, "logps/ref_chosen": -65.47514343261719, "logps/ref_rejected": -79.67378234863281, "logps/rejected": -135.81600952148438, "loss": 1.0225, "margin_dpo/margin_mean": 22.22673225402832, "margin_dpo/margin_std": 31.673675537109375, "step": 384 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.02794378250837326, "fcm_dpo/delta": -0.16195213794708252, "fcm_dpo/margin": 26.842784881591797, "fcm_dpo/q_t": 0.34499049186706543, "grad_norm": 16.783960342407227, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.5352010726928711, "logits/rejected": 0.4990323483943939, "logps/chosen": -98.1661605834961, "logps/ref_chosen": -66.0565185546875, "logps/ref_rejected": -86.68023681640625, "logps/rejected": -145.63265991210938, "loss": 0.9344, "margin_dpo/margin_mean": 26.842784881591797, "margin_dpo/margin_std": 31.331087112426758, "step": 385 }, { "epoch": 0.5835222978080121, "fcm_dpo/beta": 0.02810431644320488, "fcm_dpo/delta": 0.14293362200260162, "fcm_dpo/margin": 16.507038116455078, "fcm_dpo/q_t": 0.4019835591316223, "grad_norm": 18.913597106933594, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 0.5284489393234253, "logits/rejected": 0.47032076120376587, "logps/chosen": -113.90336608886719, "logps/ref_chosen": -75.6236572265625, "logps/ref_rejected": -92.62330627441406, "logps/rejected": -147.4100341796875, "loss": 1.1448, "margin_dpo/margin_mean": 16.507038116455078, "margin_dpo/margin_std": 31.028078079223633, "step": 386 }, { "epoch": 0.5850340136054422, "fcm_dpo/beta": 0.02792777121067047, "fcm_dpo/delta": -0.11489059031009674, "fcm_dpo/margin": 25.30979347229004, "fcm_dpo/q_t": 0.35611510276794434, "grad_norm": 17.214059829711914, "learning_rate": 2.209767714686924e-07, "logits/chosen": 0.5747230052947998, "logits/rejected": 0.4819261431694031, "logps/chosen": -81.60426330566406, "logps/ref_chosen": -47.22170639038086, "logps/ref_rejected": -87.338134765625, "logps/rejected": -147.03048706054688, "loss": 0.9573, "margin_dpo/margin_mean": 25.30979347229004, "margin_dpo/margin_std": 31.456417083740234, "step": 387 }, { "epoch": 0.5865457294028723, "fcm_dpo/beta": 0.02807321399450302, "fcm_dpo/delta": 0.12129095196723938, "fcm_dpo/margin": 17.26071548461914, "fcm_dpo/q_t": 0.4033401608467102, "grad_norm": 18.226442337036133, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 0.5805580615997314, "logits/rejected": 0.5685257911682129, "logps/chosen": -108.80488586425781, "logps/ref_chosen": -74.5794677734375, "logps/ref_rejected": -79.92558288574219, "logps/rejected": -131.41171264648438, "loss": 1.1656, "margin_dpo/margin_mean": 17.26071548461914, "margin_dpo/margin_std": 34.313987731933594, "step": 388 }, { "epoch": 0.5880574452003023, "fcm_dpo/beta": 0.027882620692253113, "fcm_dpo/delta": -0.1335650533437729, "fcm_dpo/margin": 25.965024948120117, "fcm_dpo/q_t": 0.3534383177757263, "grad_norm": 32.512664794921875, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 0.5873498916625977, "logits/rejected": 0.551581621170044, "logps/chosen": -95.86332702636719, "logps/ref_chosen": -61.624366760253906, "logps/ref_rejected": -76.50978088378906, "logps/rejected": -136.71377563476562, "loss": 1.0076, "margin_dpo/margin_mean": 25.965023040771484, "margin_dpo/margin_std": 36.41337585449219, "step": 389 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.027584530413150787, "fcm_dpo/delta": -0.008268344216048717, "fcm_dpo/margin": 22.029890060424805, "fcm_dpo/q_t": 0.3749474883079529, "grad_norm": 15.995476722717285, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.6494970917701721, "logits/rejected": 0.5868979096412659, "logps/chosen": -77.04705047607422, "logps/ref_chosen": -45.871864318847656, "logps/ref_rejected": -61.305999755859375, "logps/rejected": -114.51107788085938, "loss": 1.0388, "margin_dpo/margin_mean": 22.029890060424805, "margin_dpo/margin_std": 32.316524505615234, "step": 390 }, { "epoch": 0.5910808767951625, "fcm_dpo/beta": 0.027057552710175514, "fcm_dpo/delta": -0.10849063843488693, "fcm_dpo/margin": 25.87117576599121, "fcm_dpo/q_t": 0.3591760993003845, "grad_norm": 17.0324764251709, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 0.5981206297874451, "logits/rejected": 0.5333592891693115, "logps/chosen": -90.99639892578125, "logps/ref_chosen": -58.18701171875, "logps/ref_rejected": -83.63442993164062, "logps/rejected": -142.31500244140625, "loss": 1.0371, "margin_dpo/margin_mean": 25.871173858642578, "margin_dpo/margin_std": 38.77320861816406, "step": 391 }, { "epoch": 0.5925925925925926, "fcm_dpo/beta": 0.02655864879488945, "fcm_dpo/delta": -0.17987582087516785, "fcm_dpo/margin": 28.844318389892578, "fcm_dpo/q_t": 0.34854570031166077, "grad_norm": 15.680495262145996, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 0.676337718963623, "logits/rejected": 0.6182987093925476, "logps/chosen": -96.6463623046875, "logps/ref_chosen": -69.7445297241211, "logps/ref_rejected": -94.05877685546875, "logps/rejected": -149.804931640625, "loss": 0.9825, "margin_dpo/margin_mean": 28.844318389892578, "margin_dpo/margin_std": 38.47731018066406, "step": 392 }, { "epoch": 0.5941043083900227, "fcm_dpo/beta": 0.025468815118074417, "fcm_dpo/delta": -0.12191242724657059, "fcm_dpo/margin": 27.956777572631836, "fcm_dpo/q_t": 0.3524329364299774, "grad_norm": 15.502283096313477, "learning_rate": 2.131130332936195e-07, "logits/chosen": 0.6358774900436401, "logits/rejected": 0.5986746549606323, "logps/chosen": -87.28994750976562, "logps/ref_chosen": -52.33489990234375, "logps/ref_rejected": -74.33809661865234, "logps/rejected": -137.24993896484375, "loss": 0.9476, "margin_dpo/margin_mean": 27.956775665283203, "margin_dpo/margin_std": 33.050567626953125, "step": 393 }, { "epoch": 0.5956160241874527, "fcm_dpo/beta": 0.025639474391937256, "fcm_dpo/delta": 0.014459993690252304, "fcm_dpo/margin": 22.846038818359375, "fcm_dpo/q_t": 0.37462514638900757, "grad_norm": 19.941524505615234, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 0.671512246131897, "logits/rejected": 0.6399627327919006, "logps/chosen": -92.83706665039062, "logps/ref_chosen": -60.6761360168457, "logps/ref_rejected": -71.36074829101562, "logps/rejected": -126.36771392822266, "loss": 1.0159, "margin_dpo/margin_mean": 22.846038818359375, "margin_dpo/margin_std": 30.038795471191406, "step": 394 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.026083827018737793, "fcm_dpo/delta": 0.10105286538600922, "fcm_dpo/margin": 19.264225006103516, "fcm_dpo/q_t": 0.39714348316192627, "grad_norm": 21.03672218322754, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.6039600968360901, "logits/rejected": 0.5188575983047485, "logps/chosen": -85.5516357421875, "logps/ref_chosen": -50.60432434082031, "logps/ref_rejected": -77.08731079101562, "logps/rejected": -131.29885864257812, "loss": 1.107, "margin_dpo/margin_mean": 19.264225006103516, "margin_dpo/margin_std": 32.470375061035156, "step": 395 }, { "epoch": 0.5986394557823129, "fcm_dpo/beta": 0.025592200458049774, "fcm_dpo/delta": -0.030677609145641327, "fcm_dpo/margin": 24.426685333251953, "fcm_dpo/q_t": 0.369695782661438, "grad_norm": 17.182994842529297, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 0.6177434921264648, "logits/rejected": 0.5232092142105103, "logps/chosen": -84.13562774658203, "logps/ref_chosen": -51.35961151123047, "logps/ref_rejected": -79.89360046386719, "logps/rejected": -137.09629821777344, "loss": 1.007, "margin_dpo/margin_mean": 24.42668342590332, "margin_dpo/margin_std": 31.296443939208984, "step": 396 }, { "epoch": 0.600151171579743, "fcm_dpo/beta": 0.0270930714905262, "fcm_dpo/delta": 0.2718348205089569, "fcm_dpo/margin": 12.380789756774902, "fcm_dpo/q_t": 0.4283815622329712, "grad_norm": 23.26795768737793, "learning_rate": 2.078906883274924e-07, "logits/chosen": 0.5592355728149414, "logits/rejected": 0.5140686631202698, "logps/chosen": -105.91973114013672, "logps/ref_chosen": -66.45622253417969, "logps/ref_rejected": -85.74736785888672, "logps/rejected": -137.59164428710938, "loss": 1.3212, "margin_dpo/margin_mean": 12.380790710449219, "margin_dpo/margin_std": 38.163108825683594, "step": 397 }, { "epoch": 0.6016628873771731, "fcm_dpo/beta": 0.02662910521030426, "fcm_dpo/delta": -0.1545412540435791, "fcm_dpo/margin": 27.880821228027344, "fcm_dpo/q_t": 0.34979376196861267, "grad_norm": 16.451763153076172, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.5903204679489136, "logits/rejected": 0.5277484059333801, "logps/chosen": -82.19943237304688, "logps/ref_chosen": -49.244239807128906, "logps/ref_rejected": -75.18949127197266, "logps/rejected": -136.0255126953125, "loss": 0.9379, "margin_dpo/margin_mean": 27.88081932067871, "margin_dpo/margin_std": 33.525672912597656, "step": 398 }, { "epoch": 0.6031746031746031, "fcm_dpo/beta": 0.025618739426136017, "fcm_dpo/delta": -0.21381694078445435, "fcm_dpo/margin": 31.081968307495117, "fcm_dpo/q_t": 0.3390474319458008, "grad_norm": 17.575740814208984, "learning_rate": 2.052864371672457e-07, "logits/chosen": 0.533795952796936, "logits/rejected": 0.4038241505622864, "logps/chosen": -106.9549560546875, "logps/ref_chosen": -68.30679321289062, "logps/ref_rejected": -113.2708511352539, "logps/rejected": -183.0009765625, "loss": 0.9195, "margin_dpo/margin_mean": 31.081968307495117, "margin_dpo/margin_std": 36.69319152832031, "step": 399 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.025582678616046906, "fcm_dpo/delta": 0.1144244372844696, "fcm_dpo/margin": 19.171369552612305, "fcm_dpo/q_t": 0.4018818438053131, "grad_norm": 26.281423568725586, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.5914314985275269, "logits/rejected": 0.5376293659210205, "logps/chosen": -116.35350036621094, "logps/ref_chosen": -71.62649536132812, "logps/ref_rejected": -90.98765563964844, "logps/rejected": -154.8860321044922, "loss": 1.1411, "margin_dpo/margin_mean": 19.171369552612305, "margin_dpo/margin_std": 35.359981536865234, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.025976210832595825, "eval_logits/chosen": 0.6214596033096313, "eval_logits/rejected": 0.5730489492416382, "eval_logps/chosen": -110.48468017578125, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -137.4872283935547, "eval_loss": 0.5378274917602539, "eval_margin_dpo/margin_mean": 22.313037872314453, "eval_margin_dpo/margin_std": 35.38352584838867, "eval_runtime": 38.0499, "eval_samples_per_second": 60.526, "eval_steps_per_second": 1.892, "step": 400 }, { "epoch": 0.6061980347694633, "fcm_dpo/beta": 0.02523546852171421, "fcm_dpo/delta": -0.1849718540906906, "fcm_dpo/margin": 30.52855682373047, "fcm_dpo/q_t": 0.3418422341346741, "grad_norm": 12.788175582885742, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 0.6369705200195312, "logits/rejected": 0.5418835878372192, "logps/chosen": -85.19168090820312, "logps/ref_chosen": -53.72495651245117, "logps/ref_rejected": -75.06304931640625, "logps/rejected": -137.05831909179688, "loss": 0.907, "margin_dpo/margin_mean": 30.528560638427734, "margin_dpo/margin_std": 33.80531311035156, "step": 401 }, { "epoch": 0.6077097505668935, "fcm_dpo/beta": 0.02487211488187313, "fcm_dpo/delta": 0.009320281445980072, "fcm_dpo/margin": 23.725364685058594, "fcm_dpo/q_t": 0.3787681460380554, "grad_norm": 16.4634952545166, "learning_rate": 2.013895317751323e-07, "logits/chosen": 0.6205127239227295, "logits/rejected": 0.5858608484268188, "logps/chosen": -98.28569793701172, "logps/ref_chosen": -61.873931884765625, "logps/ref_rejected": -66.15198516845703, "logps/rejected": -126.28912353515625, "loss": 1.0598, "margin_dpo/margin_mean": 23.725364685058594, "margin_dpo/margin_std": 35.9758415222168, "step": 402 }, { "epoch": 0.6092214663643235, "fcm_dpo/beta": 0.024643344804644585, "fcm_dpo/delta": -0.15608033537864685, "fcm_dpo/margin": 30.217971801757812, "fcm_dpo/q_t": 0.3511902689933777, "grad_norm": 17.57577896118164, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 0.7137749195098877, "logits/rejected": 0.6363557577133179, "logps/chosen": -90.44751739501953, "logps/ref_chosen": -51.321502685546875, "logps/ref_rejected": -86.54010772705078, "logps/rejected": -155.88409423828125, "loss": 0.9707, "margin_dpo/margin_mean": 30.217971801757812, "margin_dpo/margin_std": 38.760894775390625, "step": 403 }, { "epoch": 0.6107331821617535, "fcm_dpo/beta": 0.023660175502300262, "fcm_dpo/delta": -0.11365822702646255, "fcm_dpo/margin": 29.694786071777344, "fcm_dpo/q_t": 0.3559054732322693, "grad_norm": 17.716331481933594, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 0.5810792446136475, "logits/rejected": 0.4951407313346863, "logps/chosen": -97.16690063476562, "logps/ref_chosen": -62.26288604736328, "logps/ref_rejected": -95.19029998779297, "logps/rejected": -159.78909301757812, "loss": 0.9881, "margin_dpo/margin_mean": 29.694786071777344, "margin_dpo/margin_std": 38.58500671386719, "step": 404 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.023709017783403397, "fcm_dpo/delta": 0.059486567974090576, "fcm_dpo/margin": 22.857555389404297, "fcm_dpo/q_t": 0.3891262412071228, "grad_norm": 15.247061729431152, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.6707639694213867, "logits/rejected": 0.6255220770835876, "logps/chosen": -89.48225402832031, "logps/ref_chosen": -50.5843391418457, "logps/ref_rejected": -65.43156433105469, "logps/rejected": -127.1870346069336, "loss": 1.076, "margin_dpo/margin_mean": 22.857553482055664, "margin_dpo/margin_std": 34.472572326660156, "step": 405 }, { "epoch": 0.6137566137566137, "fcm_dpo/beta": 0.023622972890734673, "fcm_dpo/delta": -0.10287454724311829, "fcm_dpo/margin": 29.435489654541016, "fcm_dpo/q_t": 0.35592135787010193, "grad_norm": 16.751718521118164, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 0.645000159740448, "logits/rejected": 0.49971795082092285, "logps/chosen": -86.35372924804688, "logps/ref_chosen": -48.99560546875, "logps/ref_rejected": -92.47774505615234, "logps/rejected": -159.2713623046875, "loss": 0.998, "margin_dpo/margin_mean": 29.435489654541016, "margin_dpo/margin_std": 39.57469940185547, "step": 406 }, { "epoch": 0.6152683295540439, "fcm_dpo/beta": 0.023810304701328278, "fcm_dpo/delta": 0.06428097188472748, "fcm_dpo/margin": 22.641036987304688, "fcm_dpo/q_t": 0.3884776830673218, "grad_norm": 17.64628791809082, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 0.5463271141052246, "logits/rejected": 0.47302818298339844, "logps/chosen": -136.9144287109375, "logps/ref_chosen": -89.40056610107422, "logps/ref_rejected": -99.28775024414062, "logps/rejected": -169.44264221191406, "loss": 1.0804, "margin_dpo/margin_mean": 22.641036987304688, "margin_dpo/margin_std": 36.08110046386719, "step": 407 }, { "epoch": 0.6167800453514739, "fcm_dpo/beta": 0.023345306515693665, "fcm_dpo/delta": -0.2195199728012085, "fcm_dpo/margin": 34.35175323486328, "fcm_dpo/q_t": 0.3320169746875763, "grad_norm": 13.968607902526855, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 0.6046391129493713, "logits/rejected": 0.5297787189483643, "logps/chosen": -89.42475128173828, "logps/ref_chosen": -54.70391845703125, "logps/ref_rejected": -73.98648834228516, "logps/rejected": -143.05908203125, "loss": 0.9009, "margin_dpo/margin_mean": 34.35175323486328, "margin_dpo/margin_std": 36.27836227416992, "step": 408 }, { "epoch": 0.618291761148904, "fcm_dpo/beta": 0.023423273116350174, "fcm_dpo/delta": 0.1822575032711029, "fcm_dpo/margin": 18.17670440673828, "fcm_dpo/q_t": 0.41613560914993286, "grad_norm": 17.823652267456055, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 0.6338742971420288, "logits/rejected": 0.6387898325920105, "logps/chosen": -106.66569519042969, "logps/ref_chosen": -62.11822509765625, "logps/ref_rejected": -61.933509826660156, "logps/rejected": -124.65768432617188, "loss": 1.1785, "margin_dpo/margin_mean": 18.17670440673828, "margin_dpo/margin_std": 37.99756622314453, "step": 409 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.02331661805510521, "fcm_dpo/delta": -0.0475025437772274, "fcm_dpo/margin": 27.560718536376953, "fcm_dpo/q_t": 0.36133018136024475, "grad_norm": 16.006240844726562, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.6429124474525452, "logits/rejected": 0.619254469871521, "logps/chosen": -102.84481048583984, "logps/ref_chosen": -61.80266189575195, "logps/ref_rejected": -76.60002136230469, "logps/rejected": -145.202880859375, "loss": 0.9526, "margin_dpo/margin_mean": 27.560718536376953, "margin_dpo/margin_std": 29.782230377197266, "step": 410 }, { "epoch": 0.6213151927437641, "fcm_dpo/beta": 0.02313273400068283, "fcm_dpo/delta": -0.08924700319766998, "fcm_dpo/margin": 29.52808380126953, "fcm_dpo/q_t": 0.36143290996551514, "grad_norm": 16.007495880126953, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 0.6322345733642578, "logits/rejected": 0.5753542184829712, "logps/chosen": -100.47373962402344, "logps/ref_chosen": -54.44539260864258, "logps/ref_rejected": -74.5650863647461, "logps/rejected": -150.12149047851562, "loss": 1.0053, "margin_dpo/margin_mean": 29.52808380126953, "margin_dpo/margin_std": 41.01704406738281, "step": 411 }, { "epoch": 0.6228269085411943, "fcm_dpo/beta": 0.022871272638440132, "fcm_dpo/delta": -0.019356705248355865, "fcm_dpo/margin": 27.005107879638672, "fcm_dpo/q_t": 0.37393760681152344, "grad_norm": 16.466289520263672, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 0.6046679019927979, "logits/rejected": 0.5413898229598999, "logps/chosen": -105.70858764648438, "logps/ref_chosen": -55.248085021972656, "logps/ref_rejected": -68.96623229980469, "logps/rejected": -146.43185424804688, "loss": 1.0133, "margin_dpo/margin_mean": 27.005109786987305, "margin_dpo/margin_std": 36.81440734863281, "step": 412 }, { "epoch": 0.6243386243386243, "fcm_dpo/beta": 0.023207662627100945, "fcm_dpo/delta": 0.0680059865117073, "fcm_dpo/margin": 23.077539443969727, "fcm_dpo/q_t": 0.3903021216392517, "grad_norm": 20.254179000854492, "learning_rate": 1.872130032047302e-07, "logits/chosen": 0.49796950817108154, "logits/rejected": 0.46667468547821045, "logps/chosen": -122.41281127929688, "logps/ref_chosen": -68.72074890136719, "logps/ref_rejected": -78.76539611816406, "logps/rejected": -155.53500366210938, "loss": 1.1366, "margin_dpo/margin_mean": 23.077537536621094, "margin_dpo/margin_std": 42.77297592163086, "step": 413 }, { "epoch": 0.6258503401360545, "fcm_dpo/beta": 0.02289092168211937, "fcm_dpo/delta": -0.10468479245901108, "fcm_dpo/margin": 30.4606990814209, "fcm_dpo/q_t": 0.35490280389785767, "grad_norm": 15.836713790893555, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 0.6493165493011475, "logits/rejected": 0.5884075164794922, "logps/chosen": -101.4513168334961, "logps/ref_chosen": -54.138214111328125, "logps/ref_rejected": -74.65741729736328, "logps/rejected": -152.43121337890625, "loss": 0.973, "margin_dpo/margin_mean": 30.4606990814209, "margin_dpo/margin_std": 38.70647048950195, "step": 414 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.022673513740301132, "fcm_dpo/delta": -0.018438737839460373, "fcm_dpo/margin": 27.21916961669922, "fcm_dpo/q_t": 0.3717983365058899, "grad_norm": 17.9824275970459, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.6430912017822266, "logits/rejected": 0.6277749538421631, "logps/chosen": -104.0219955444336, "logps/ref_chosen": -55.91856002807617, "logps/ref_rejected": -61.747703552246094, "logps/rejected": -137.0703125, "loss": 1.0492, "margin_dpo/margin_mean": 27.21916961669922, "margin_dpo/margin_std": 41.05143737792969, "step": 415 }, { "epoch": 0.6288737717309146, "fcm_dpo/beta": 0.023443318903446198, "fcm_dpo/delta": 0.08808435499668121, "fcm_dpo/margin": 21.66620445251465, "fcm_dpo/q_t": 0.3991958498954773, "grad_norm": 20.130155563354492, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 0.6014896631240845, "logits/rejected": 0.5532357096672058, "logps/chosen": -106.42463684082031, "logps/ref_chosen": -54.72308349609375, "logps/ref_rejected": -69.17388916015625, "logps/rejected": -142.54165649414062, "loss": 1.1462, "margin_dpo/margin_mean": 21.66620445251465, "margin_dpo/margin_std": 38.5516357421875, "step": 416 }, { "epoch": 0.6303854875283447, "fcm_dpo/beta": 0.02283429354429245, "fcm_dpo/delta": -0.12874314188957214, "fcm_dpo/margin": 31.50384521484375, "fcm_dpo/q_t": 0.34769943356513977, "grad_norm": 16.118452072143555, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 0.7364401817321777, "logits/rejected": 0.6695940494537354, "logps/chosen": -110.97312927246094, "logps/ref_chosen": -56.791259765625, "logps/ref_rejected": -68.7791748046875, "logps/rejected": -154.4648895263672, "loss": 0.9872, "margin_dpo/margin_mean": 31.503847122192383, "margin_dpo/margin_std": 41.09132385253906, "step": 417 }, { "epoch": 0.6318972033257747, "fcm_dpo/beta": 0.022661913186311722, "fcm_dpo/delta": 0.10957328975200653, "fcm_dpo/margin": 21.837736129760742, "fcm_dpo/q_t": 0.40230679512023926, "grad_norm": 19.2397518157959, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 0.668450117111206, "logits/rejected": 0.6779706478118896, "logps/chosen": -128.31222534179688, "logps/ref_chosen": -69.10798645019531, "logps/ref_rejected": -75.09132385253906, "logps/rejected": -156.13330078125, "loss": 1.1355, "margin_dpo/margin_mean": 21.837738037109375, "margin_dpo/margin_std": 39.48761749267578, "step": 418 }, { "epoch": 0.6334089191232048, "fcm_dpo/beta": 0.022264618426561356, "fcm_dpo/delta": -0.1917235106229782, "fcm_dpo/margin": 34.83051300048828, "fcm_dpo/q_t": 0.34440430998802185, "grad_norm": 16.647216796875, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 0.5454314947128296, "logits/rejected": 0.4921835660934448, "logps/chosen": -102.27273559570312, "logps/ref_chosen": -58.1717643737793, "logps/ref_rejected": -71.67066955566406, "logps/rejected": -150.60215759277344, "loss": 0.9429, "margin_dpo/margin_mean": 34.83051300048828, "margin_dpo/margin_std": 42.898826599121094, "step": 419 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.022696789354085922, "fcm_dpo/delta": 0.21054378151893616, "fcm_dpo/margin": 17.547386169433594, "fcm_dpo/q_t": 0.42180436849594116, "grad_norm": 18.340421676635742, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.6378327012062073, "logits/rejected": 0.5907524228096008, "logps/chosen": -117.64787292480469, "logps/ref_chosen": -57.05351257324219, "logps/ref_rejected": -62.670982360839844, "logps/rejected": -140.81272888183594, "loss": 1.2553, "margin_dpo/margin_mean": 17.547386169433594, "margin_dpo/margin_std": 44.069618225097656, "step": 420 }, { "epoch": 0.636432350718065, "fcm_dpo/beta": 0.02310902252793312, "fcm_dpo/delta": 0.035230088979005814, "fcm_dpo/margin": 24.51070213317871, "fcm_dpo/q_t": 0.38860011100769043, "grad_norm": 18.31899070739746, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 0.6146277785301208, "logits/rejected": 0.556212306022644, "logps/chosen": -114.83110046386719, "logps/ref_chosen": -57.32324981689453, "logps/ref_rejected": -75.33782958984375, "logps/rejected": -157.35638427734375, "loss": 1.1604, "margin_dpo/margin_mean": 24.510700225830078, "margin_dpo/margin_std": 47.4954719543457, "step": 421 }, { "epoch": 0.6379440665154951, "fcm_dpo/beta": 0.022881243377923965, "fcm_dpo/delta": -0.12513823807239532, "fcm_dpo/margin": 31.305706024169922, "fcm_dpo/q_t": 0.3527275621891022, "grad_norm": 19.757509231567383, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 0.6293450593948364, "logits/rejected": 0.602583110332489, "logps/chosen": -115.69070434570312, "logps/ref_chosen": -67.05757141113281, "logps/ref_rejected": -72.12803649902344, "logps/rejected": -152.06686401367188, "loss": 0.9678, "margin_dpo/margin_mean": 31.305706024169922, "margin_dpo/margin_std": 39.57312774658203, "step": 422 }, { "epoch": 0.6394557823129252, "fcm_dpo/beta": 0.02209470421075821, "fcm_dpo/delta": -0.1590176820755005, "fcm_dpo/margin": 33.78448486328125, "fcm_dpo/q_t": 0.3498442769050598, "grad_norm": 15.129724502563477, "learning_rate": 1.745083602306071e-07, "logits/chosen": 0.6799654364585876, "logits/rejected": 0.613652765750885, "logps/chosen": -105.32487487792969, "logps/ref_chosen": -54.06167221069336, "logps/ref_rejected": -76.64092254638672, "logps/rejected": -161.68861389160156, "loss": 0.955, "margin_dpo/margin_mean": 33.78448486328125, "margin_dpo/margin_std": 42.586509704589844, "step": 423 }, { "epoch": 0.6409674981103552, "fcm_dpo/beta": 0.021351175382733345, "fcm_dpo/delta": -0.14496149122714996, "fcm_dpo/margin": 34.29045867919922, "fcm_dpo/q_t": 0.350393682718277, "grad_norm": 17.672733306884766, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 0.5625635385513306, "logits/rejected": 0.5283594131469727, "logps/chosen": -107.4459228515625, "logps/ref_chosen": -53.60887145996094, "logps/ref_rejected": -79.2139892578125, "logps/rejected": -167.34152221679688, "loss": 0.9617, "margin_dpo/margin_mean": 34.29045867919922, "margin_dpo/margin_std": 42.79728698730469, "step": 424 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.02123691514134407, "fcm_dpo/delta": -0.02055555209517479, "fcm_dpo/margin": 29.152355194091797, "fcm_dpo/q_t": 0.3736664652824402, "grad_norm": 17.606735229492188, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.677306592464447, "logits/rejected": 0.6497554779052734, "logps/chosen": -110.3130111694336, "logps/ref_chosen": -58.41468048095703, "logps/ref_rejected": -66.59054565429688, "logps/rejected": -147.6412353515625, "loss": 1.0286, "margin_dpo/margin_mean": 29.15235137939453, "margin_dpo/margin_std": 41.597991943359375, "step": 425 }, { "epoch": 0.6439909297052154, "fcm_dpo/beta": 0.02198956534266472, "fcm_dpo/delta": 0.22994250059127808, "fcm_dpo/margin": 17.23443031311035, "fcm_dpo/q_t": 0.42192342877388, "grad_norm": 20.545143127441406, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 0.6122332811355591, "logits/rejected": 0.6184878945350647, "logps/chosen": -137.40863037109375, "logps/ref_chosen": -71.70822143554688, "logps/ref_rejected": -73.57725524902344, "logps/rejected": -156.51211547851562, "loss": 1.2701, "margin_dpo/margin_mean": 17.234432220458984, "margin_dpo/margin_std": 46.23017120361328, "step": 426 }, { "epoch": 0.6455026455026455, "fcm_dpo/beta": 0.022307440638542175, "fcm_dpo/delta": 0.015383723191916943, "fcm_dpo/margin": 26.24768829345703, "fcm_dpo/q_t": 0.38463151454925537, "grad_norm": 19.993911743164062, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 0.6968556642532349, "logits/rejected": 0.6430996656417847, "logps/chosen": -120.65845489501953, "logps/ref_chosen": -58.64276885986328, "logps/ref_rejected": -86.25437927246094, "logps/rejected": -174.51776123046875, "loss": 1.1157, "margin_dpo/margin_mean": 26.24768829345703, "margin_dpo/margin_std": 46.92239761352539, "step": 427 }, { "epoch": 0.6470143613000756, "fcm_dpo/beta": 0.022043775767087936, "fcm_dpo/delta": -0.12670910358428955, "fcm_dpo/margin": 32.557342529296875, "fcm_dpo/q_t": 0.3596833646297455, "grad_norm": 16.594730377197266, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 0.5824407339096069, "logits/rejected": 0.5360796451568604, "logps/chosen": -123.86228942871094, "logps/ref_chosen": -66.5960464477539, "logps/ref_rejected": -82.3941650390625, "logps/rejected": -172.21774291992188, "loss": 1.0117, "margin_dpo/margin_mean": 32.557342529296875, "margin_dpo/margin_std": 46.57630157470703, "step": 428 }, { "epoch": 0.6485260770975056, "fcm_dpo/beta": 0.022087689489126205, "fcm_dpo/delta": 0.04340841621160507, "fcm_dpo/margin": 25.24394989013672, "fcm_dpo/q_t": 0.3880462646484375, "grad_norm": 19.481197357177734, "learning_rate": 1.669846604344412e-07, "logits/chosen": 0.605602502822876, "logits/rejected": 0.6182312965393066, "logps/chosen": -118.7700424194336, "logps/ref_chosen": -57.00970458984375, "logps/ref_rejected": -59.86549377441406, "logps/rejected": -146.86978149414062, "loss": 1.1297, "margin_dpo/margin_mean": 25.24394989013672, "margin_dpo/margin_std": 45.29219436645508, "step": 429 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.021354306489229202, "fcm_dpo/delta": -0.1940208375453949, "fcm_dpo/margin": 36.47758483886719, "fcm_dpo/q_t": 0.34010353684425354, "grad_norm": 16.44814109802246, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.5480879545211792, "logits/rejected": 0.5461628437042236, "logps/chosen": -114.69402313232422, "logps/ref_chosen": -59.563194274902344, "logps/ref_rejected": -70.52289581298828, "logps/rejected": -162.1313018798828, "loss": 0.9438, "margin_dpo/margin_mean": 36.47758483886719, "margin_dpo/margin_std": 44.4223747253418, "step": 430 }, { "epoch": 0.6515495086923658, "fcm_dpo/beta": 0.021211152896285057, "fcm_dpo/delta": 0.0317416712641716, "fcm_dpo/margin": 26.876333236694336, "fcm_dpo/q_t": 0.38250017166137695, "grad_norm": 16.88580322265625, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 0.6321650743484497, "logits/rejected": 0.58774334192276, "logps/chosen": -100.53054809570312, "logps/ref_chosen": -50.20032501220703, "logps/ref_rejected": -77.81680297851562, "logps/rejected": -155.0233612060547, "loss": 1.0702, "margin_dpo/margin_mean": 26.876333236694336, "margin_dpo/margin_std": 42.4122314453125, "step": 431 }, { "epoch": 0.6530612244897959, "fcm_dpo/beta": 0.021274283528327942, "fcm_dpo/delta": 0.0018111169338226318, "fcm_dpo/margin": 28.123023986816406, "fcm_dpo/q_t": 0.3795931339263916, "grad_norm": 16.432628631591797, "learning_rate": 1.632536862810844e-07, "logits/chosen": 0.6638392210006714, "logits/rejected": 0.6173241138458252, "logps/chosen": -115.13069152832031, "logps/ref_chosen": -61.662757873535156, "logps/ref_rejected": -83.94496154785156, "logps/rejected": -165.53591918945312, "loss": 1.0911, "margin_dpo/margin_mean": 28.123023986816406, "margin_dpo/margin_std": 47.277862548828125, "step": 432 }, { "epoch": 0.654572940287226, "fcm_dpo/beta": 0.02076822891831398, "fcm_dpo/delta": -0.19751861691474915, "fcm_dpo/margin": 37.677452087402344, "fcm_dpo/q_t": 0.33977317810058594, "grad_norm": 16.52134132385254, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 0.7488293051719666, "logits/rejected": 0.7424620389938354, "logps/chosen": -116.32231140136719, "logps/ref_chosen": -63.72917938232422, "logps/ref_rejected": -65.8391342163086, "logps/rejected": -156.10971069335938, "loss": 0.9462, "margin_dpo/margin_mean": 37.677452087402344, "margin_dpo/margin_std": 45.816856384277344, "step": 433 }, { "epoch": 0.656084656084656, "fcm_dpo/beta": 0.019872212782502174, "fcm_dpo/delta": -0.13274145126342773, "fcm_dpo/margin": 36.2554817199707, "fcm_dpo/q_t": 0.34939053654670715, "grad_norm": 14.539586067199707, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 0.7579059600830078, "logits/rejected": 0.6943522691726685, "logps/chosen": -94.58082580566406, "logps/ref_chosen": -47.97331619262695, "logps/ref_rejected": -72.51132202148438, "logps/rejected": -155.37432861328125, "loss": 1.0036, "margin_dpo/margin_mean": 36.25547790527344, "margin_dpo/margin_std": 49.25444030761719, "step": 434 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.019995521754026413, "fcm_dpo/delta": -0.024610616266727448, "fcm_dpo/margin": 31.11350440979004, "fcm_dpo/q_t": 0.3718177378177643, "grad_norm": 17.21233558654785, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.7389936447143555, "logits/rejected": 0.7017002105712891, "logps/chosen": -113.01530456542969, "logps/ref_chosen": -57.06024932861328, "logps/ref_rejected": -71.69146728515625, "logps/rejected": -158.76004028320312, "loss": 1.0395, "margin_dpo/margin_mean": 31.11350440979004, "margin_dpo/margin_std": 45.02946090698242, "step": 435 }, { "epoch": 0.6591080876795162, "fcm_dpo/beta": 0.020023200660943985, "fcm_dpo/delta": 0.0525226816534996, "fcm_dpo/margin": 27.47412872314453, "fcm_dpo/q_t": 0.3874613046646118, "grad_norm": 15.634448051452637, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 0.6600745320320129, "logits/rejected": 0.6036931872367859, "logps/chosen": -110.47145080566406, "logps/ref_chosen": -56.158050537109375, "logps/ref_rejected": -67.63787841796875, "logps/rejected": -149.4254150390625, "loss": 1.0997, "margin_dpo/margin_mean": 27.47412872314453, "margin_dpo/margin_std": 46.383445739746094, "step": 436 }, { "epoch": 0.6606198034769464, "fcm_dpo/beta": 0.019956011325120926, "fcm_dpo/delta": 0.05999675393104553, "fcm_dpo/margin": 27.10342788696289, "fcm_dpo/q_t": 0.38999560475349426, "grad_norm": 17.91028594970703, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 0.6346490383148193, "logits/rejected": 0.5062617659568787, "logps/chosen": -116.76597595214844, "logps/ref_chosen": -56.98578643798828, "logps/ref_rejected": -85.61524963378906, "logps/rejected": -172.49887084960938, "loss": 1.1374, "margin_dpo/margin_mean": 27.10342788696289, "margin_dpo/margin_std": 48.52922439575195, "step": 437 }, { "epoch": 0.6621315192743764, "fcm_dpo/beta": 0.019836513325572014, "fcm_dpo/delta": -0.17882977426052094, "fcm_dpo/margin": 38.5899772644043, "fcm_dpo/q_t": 0.34486445784568787, "grad_norm": 16.361831665039062, "learning_rate": 1.558581854913253e-07, "logits/chosen": 0.7000746130943298, "logits/rejected": 0.6368303894996643, "logps/chosen": -92.46006774902344, "logps/ref_chosen": -41.27777862548828, "logps/ref_rejected": -65.33840942382812, "logps/rejected": -155.1106719970703, "loss": 0.9389, "margin_dpo/margin_mean": 38.58998107910156, "margin_dpo/margin_std": 46.060386657714844, "step": 438 }, { "epoch": 0.6636432350718064, "fcm_dpo/beta": 0.01960304006934166, "fcm_dpo/delta": -0.03375595808029175, "fcm_dpo/margin": 32.17954635620117, "fcm_dpo/q_t": 0.3675764799118042, "grad_norm": 15.4530611038208, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 0.5799434781074524, "logits/rejected": 0.5474724769592285, "logps/chosen": -136.7943572998047, "logps/ref_chosen": -81.41764831542969, "logps/ref_rejected": -94.72309875488281, "logps/rejected": -182.27935791015625, "loss": 1.0031, "margin_dpo/margin_mean": 32.17954635620117, "margin_dpo/margin_std": 42.14781951904297, "step": 439 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.019320860505104065, "fcm_dpo/delta": -0.08684191107749939, "fcm_dpo/margin": 35.21943283081055, "fcm_dpo/q_t": 0.36134228110313416, "grad_norm": 21.23478889465332, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.6408597230911255, "logits/rejected": 0.545317530632019, "logps/chosen": -93.93386840820312, "logps/ref_chosen": -42.538185119628906, "logps/ref_rejected": -69.78813934326172, "logps/rejected": -156.40325927734375, "loss": 1.0121, "margin_dpo/margin_mean": 35.21943283081055, "margin_dpo/margin_std": 48.64961242675781, "step": 440 }, { "epoch": 0.6666666666666666, "fcm_dpo/beta": 0.018760252743959427, "fcm_dpo/delta": -0.07676707208156586, "fcm_dpo/margin": 35.738895416259766, "fcm_dpo/q_t": 0.3565472364425659, "grad_norm": 14.383801460266113, "learning_rate": 1.521955206326976e-07, "logits/chosen": 0.6243678331375122, "logits/rejected": 0.5356660485267639, "logps/chosen": -106.02540588378906, "logps/ref_chosen": -57.593223571777344, "logps/ref_rejected": -84.82878875732422, "logps/rejected": -168.99986267089844, "loss": 0.9456, "margin_dpo/margin_mean": 35.738895416259766, "margin_dpo/margin_std": 39.57575988769531, "step": 441 }, { "epoch": 0.6681783824640968, "fcm_dpo/beta": 0.01864861510694027, "fcm_dpo/delta": -0.04125872999429703, "fcm_dpo/margin": 34.23713684082031, "fcm_dpo/q_t": 0.3659091889858246, "grad_norm": 16.47283363342285, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 0.6572163701057434, "logits/rejected": 0.610891580581665, "logps/chosen": -128.51307678222656, "logps/ref_chosen": -67.46121978759766, "logps/ref_rejected": -89.0693588256836, "logps/rejected": -184.3583526611328, "loss": 1.0119, "margin_dpo/margin_mean": 34.23713684082031, "margin_dpo/margin_std": 46.631752014160156, "step": 442 }, { "epoch": 0.6696900982615268, "fcm_dpo/beta": 0.018264703452587128, "fcm_dpo/delta": -0.15920081734657288, "fcm_dpo/margin": 40.92947006225586, "fcm_dpo/q_t": 0.3452005982398987, "grad_norm": 16.483232498168945, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 0.6689678430557251, "logits/rejected": 0.5985565185546875, "logps/chosen": -105.98631286621094, "logps/ref_chosen": -54.79610061645508, "logps/ref_rejected": -77.80781555175781, "logps/rejected": -169.92750549316406, "loss": 0.9651, "margin_dpo/margin_mean": 40.929473876953125, "margin_dpo/margin_std": 51.907249450683594, "step": 443 }, { "epoch": 0.671201814058957, "fcm_dpo/beta": 0.01889648288488388, "fcm_dpo/delta": 0.3607651889324188, "fcm_dpo/margin": 13.21998405456543, "fcm_dpo/q_t": 0.4478858411312103, "grad_norm": 19.747303009033203, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 0.6540986895561218, "logits/rejected": 0.5701756477355957, "logps/chosen": -124.91129302978516, "logps/ref_chosen": -58.749061584472656, "logps/ref_rejected": -86.87396240234375, "logps/rejected": -166.2561798095703, "loss": 1.327, "margin_dpo/margin_mean": 13.219983100891113, "margin_dpo/margin_std": 44.31674575805664, "step": 444 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.019091159105300903, "fcm_dpo/delta": -0.12670674920082092, "fcm_dpo/margin": 37.597110748291016, "fcm_dpo/q_t": 0.35341084003448486, "grad_norm": 16.564191818237305, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.6639256477355957, "logits/rejected": 0.6510766744613647, "logps/chosen": -123.94331359863281, "logps/ref_chosen": -60.91743850708008, "logps/ref_rejected": -71.5637435913086, "logps/rejected": -172.18673706054688, "loss": 0.9766, "margin_dpo/margin_mean": 37.597110748291016, "margin_dpo/margin_std": 48.70800018310547, "step": 445 }, { "epoch": 0.674225245653817, "fcm_dpo/beta": 0.018637076020240784, "fcm_dpo/delta": -0.20336636900901794, "fcm_dpo/margin": 42.1799201965332, "fcm_dpo/q_t": 0.33526235818862915, "grad_norm": 13.191262245178223, "learning_rate": 1.461462467495284e-07, "logits/chosen": 0.7034658193588257, "logits/rejected": 0.6283047199249268, "logps/chosen": -103.102783203125, "logps/ref_chosen": -48.79924774169922, "logps/ref_rejected": -71.8719482421875, "logps/rejected": -168.35540771484375, "loss": 0.8875, "margin_dpo/margin_mean": 42.17991638183594, "margin_dpo/margin_std": 40.7318229675293, "step": 446 }, { "epoch": 0.6757369614512472, "fcm_dpo/beta": 0.01757156476378441, "fcm_dpo/delta": -0.19900262355804443, "fcm_dpo/margin": 44.52255630493164, "fcm_dpo/q_t": 0.33368557691574097, "grad_norm": 17.807926177978516, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 0.6144707202911377, "logits/rejected": 0.5171197056770325, "logps/chosen": -104.90130615234375, "logps/ref_chosen": -53.682716369628906, "logps/ref_rejected": -88.17315673828125, "logps/rejected": -183.914306640625, "loss": 0.9309, "margin_dpo/margin_mean": 44.522560119628906, "margin_dpo/margin_std": 51.547584533691406, "step": 447 }, { "epoch": 0.6772486772486772, "fcm_dpo/beta": 0.017467539757490158, "fcm_dpo/delta": 0.0035621817223727703, "fcm_dpo/margin": 34.157630920410156, "fcm_dpo/q_t": 0.37248873710632324, "grad_norm": 15.845419883728027, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 0.6645723581314087, "logits/rejected": 0.6151407957077026, "logps/chosen": -111.9876708984375, "logps/ref_chosen": -53.75125503540039, "logps/ref_rejected": -77.17623901367188, "logps/rejected": -169.57028198242188, "loss": 1.0297, "margin_dpo/margin_mean": 34.157630920410156, "margin_dpo/margin_std": 47.70314025878906, "step": 448 }, { "epoch": 0.6787603930461074, "fcm_dpo/beta": 0.018148906528949738, "fcm_dpo/delta": 0.284239262342453, "fcm_dpo/margin": 17.95632553100586, "fcm_dpo/q_t": 0.43146008253097534, "grad_norm": 23.75647735595703, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 0.5378659963607788, "logits/rejected": 0.528540849685669, "logps/chosen": -145.61447143554688, "logps/ref_chosen": -75.82737731933594, "logps/ref_rejected": -82.20687866210938, "logps/rejected": -169.95028686523438, "loss": 1.213, "margin_dpo/margin_mean": 17.95632553100586, "margin_dpo/margin_std": 40.200843811035156, "step": 449 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.018794924020767212, "fcm_dpo/delta": 0.1283785104751587, "fcm_dpo/margin": 25.42965316772461, "fcm_dpo/q_t": 0.4035590589046478, "grad_norm": 18.1055850982666, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.754492461681366, "logits/rejected": 0.6441506147384644, "logps/chosen": -103.79361724853516, "logps/ref_chosen": -47.11572265625, "logps/ref_rejected": -78.7546615600586, "logps/rejected": -160.86221313476562, "loss": 1.1364, "margin_dpo/margin_mean": 25.42965316772461, "margin_dpo/margin_std": 46.92584991455078, "step": 450 }, { "epoch": 0.6817838246409675, "fcm_dpo/beta": 0.01930905692279339, "fcm_dpo/delta": 0.09509407728910446, "fcm_dpo/margin": 26.37521743774414, "fcm_dpo/q_t": 0.3959817886352539, "grad_norm": 18.775625228881836, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 0.6403440237045288, "logits/rejected": 0.5913487672805786, "logps/chosen": -120.57457733154297, "logps/ref_chosen": -63.350440979003906, "logps/ref_rejected": -76.28530883789062, "logps/rejected": -159.88465881347656, "loss": 1.1225, "margin_dpo/margin_mean": 26.37521743774414, "margin_dpo/margin_std": 47.211151123046875, "step": 451 }, { "epoch": 0.6832955404383976, "fcm_dpo/beta": 0.019437428563833237, "fcm_dpo/delta": 0.025507230311632156, "fcm_dpo/margin": 29.629127502441406, "fcm_dpo/q_t": 0.37918704748153687, "grad_norm": 17.062150955200195, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 0.6154012680053711, "logits/rejected": 0.5401051044464111, "logps/chosen": -112.09454345703125, "logps/ref_chosen": -55.58583450317383, "logps/ref_rejected": -77.68738555908203, "logps/rejected": -163.82521057128906, "loss": 1.1002, "margin_dpo/margin_mean": 29.629127502441406, "margin_dpo/margin_std": 49.8187255859375, "step": 452 }, { "epoch": 0.6848072562358276, "fcm_dpo/beta": 0.01923062652349472, "fcm_dpo/delta": -0.060018863528966904, "fcm_dpo/margin": 34.10637664794922, "fcm_dpo/q_t": 0.37033775448799133, "grad_norm": 20.816280364990234, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 0.616425633430481, "logits/rejected": 0.5868883728981018, "logps/chosen": -115.68905639648438, "logps/ref_chosen": -61.778202056884766, "logps/ref_rejected": -71.51403045654297, "logps/rejected": -159.53126525878906, "loss": 1.047, "margin_dpo/margin_mean": 34.10637664794922, "margin_dpo/margin_std": 51.63662338256836, "step": 453 }, { "epoch": 0.6863189720332578, "fcm_dpo/beta": 0.019015073776245117, "fcm_dpo/delta": -0.06713598966598511, "fcm_dpo/margin": 34.84968566894531, "fcm_dpo/q_t": 0.3608110845088959, "grad_norm": 14.855917930603027, "learning_rate": 1.366202015206706e-07, "logits/chosen": 0.6720168590545654, "logits/rejected": 0.631004810333252, "logps/chosen": -101.51078796386719, "logps/ref_chosen": -51.59515380859375, "logps/ref_rejected": -63.96732711791992, "logps/rejected": -148.73263549804688, "loss": 1.0185, "margin_dpo/margin_mean": 34.84968566894531, "margin_dpo/margin_std": 49.115013122558594, "step": 454 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.01869376376271248, "fcm_dpo/delta": -0.07124396413564682, "fcm_dpo/margin": 35.63090896606445, "fcm_dpo/q_t": 0.3637927770614624, "grad_norm": 16.458694458007812, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.5351288318634033, "logits/rejected": 0.5000825524330139, "logps/chosen": -129.70745849609375, "logps/ref_chosen": -70.65170288085938, "logps/ref_rejected": -77.44276428222656, "logps/rejected": -172.12942504882812, "loss": 0.9883, "margin_dpo/margin_mean": 35.63090515136719, "margin_dpo/margin_std": 46.63299560546875, "step": 455 }, { "epoch": 0.6893424036281179, "fcm_dpo/beta": 0.018715500831604004, "fcm_dpo/delta": 0.02745303139090538, "fcm_dpo/margin": 30.67329978942871, "fcm_dpo/q_t": 0.38108596205711365, "grad_norm": 19.61562156677246, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 0.5628154277801514, "logits/rejected": 0.5071948170661926, "logps/chosen": -114.44416046142578, "logps/ref_chosen": -56.398284912109375, "logps/ref_rejected": -82.61642456054688, "logps/rejected": -171.33560180664062, "loss": 1.0582, "margin_dpo/margin_mean": 30.673297882080078, "margin_dpo/margin_std": 46.49111557006836, "step": 456 }, { "epoch": 0.690854119425548, "fcm_dpo/beta": 0.018704025074839592, "fcm_dpo/delta": 0.019063390791416168, "fcm_dpo/margin": 31.069124221801758, "fcm_dpo/q_t": 0.3758009970188141, "grad_norm": 14.574370384216309, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 0.7333712577819824, "logits/rejected": 0.6618935465812683, "logps/chosen": -103.5023193359375, "logps/ref_chosen": -44.72057342529297, "logps/ref_rejected": -68.1158676147461, "logps/rejected": -157.96673583984375, "loss": 1.0142, "margin_dpo/margin_mean": 31.069124221801758, "margin_dpo/margin_std": 39.806251525878906, "step": 457 }, { "epoch": 0.6923658352229781, "fcm_dpo/beta": 0.018755577504634857, "fcm_dpo/delta": -0.030353626236319542, "fcm_dpo/margin": 33.504337310791016, "fcm_dpo/q_t": 0.3689650893211365, "grad_norm": 14.937610626220703, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 0.6596442461013794, "logits/rejected": 0.5467248558998108, "logps/chosen": -103.88152313232422, "logps/ref_chosen": -50.00569152832031, "logps/ref_rejected": -87.50015258789062, "logps/rejected": -174.88031005859375, "loss": 1.0325, "margin_dpo/margin_mean": 33.504337310791016, "margin_dpo/margin_std": 48.71820831298828, "step": 458 }, { "epoch": 0.6938775510204082, "fcm_dpo/beta": 0.018601369112730026, "fcm_dpo/delta": -0.1576491743326187, "fcm_dpo/margin": 39.97838592529297, "fcm_dpo/q_t": 0.34621572494506836, "grad_norm": 17.257484436035156, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 0.6838634014129639, "logits/rejected": 0.6482840180397034, "logps/chosen": -117.18104553222656, "logps/ref_chosen": -65.37794494628906, "logps/ref_rejected": -88.19244384765625, "logps/rejected": -179.9739227294922, "loss": 0.9548, "margin_dpo/margin_mean": 39.97838592529297, "margin_dpo/margin_std": 46.098365783691406, "step": 459 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.01769839972257614, "fcm_dpo/delta": -0.13911324739456177, "fcm_dpo/margin": 41.11860656738281, "fcm_dpo/q_t": 0.3479554057121277, "grad_norm": 14.071867942810059, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.7055972218513489, "logits/rejected": 0.5804663896560669, "logps/chosen": -123.29414367675781, "logps/ref_chosen": -64.5616683959961, "logps/ref_rejected": -88.67890167236328, "logps/rejected": -188.5299835205078, "loss": 0.9342, "margin_dpo/margin_mean": 41.11861038208008, "margin_dpo/margin_std": 46.839012145996094, "step": 460 }, { "epoch": 0.6969009826152683, "fcm_dpo/beta": 0.01716051995754242, "fcm_dpo/delta": -0.11889907717704773, "fcm_dpo/margin": 41.18246078491211, "fcm_dpo/q_t": 0.3509414792060852, "grad_norm": 14.316862106323242, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 0.7764365673065186, "logits/rejected": 0.687043309211731, "logps/chosen": -96.71049499511719, "logps/ref_chosen": -49.4779167175293, "logps/ref_rejected": -72.65262603759766, "logps/rejected": -161.06765747070312, "loss": 0.9482, "margin_dpo/margin_mean": 41.18246078491211, "margin_dpo/margin_std": 46.42418670654297, "step": 461 }, { "epoch": 0.6984126984126984, "fcm_dpo/beta": 0.016697831451892853, "fcm_dpo/delta": -0.17689791321754456, "fcm_dpo/margin": 45.61006164550781, "fcm_dpo/q_t": 0.33651816844940186, "grad_norm": 13.249038696289062, "learning_rate": 1.27297100994108e-07, "logits/chosen": 0.6552219390869141, "logits/rejected": 0.6001119613647461, "logps/chosen": -114.67098999023438, "logps/ref_chosen": -60.4951171875, "logps/ref_rejected": -74.82136535644531, "logps/rejected": -174.60731506347656, "loss": 0.9112, "margin_dpo/margin_mean": 45.61006164550781, "margin_dpo/margin_std": 48.99812316894531, "step": 462 }, { "epoch": 0.6999244142101285, "fcm_dpo/beta": 0.016872413456439972, "fcm_dpo/delta": 0.11362668126821518, "fcm_dpo/margin": 29.165672302246094, "fcm_dpo/q_t": 0.39225536584854126, "grad_norm": 17.52629852294922, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 0.6610893607139587, "logits/rejected": 0.6551651954650879, "logps/chosen": -129.19302368164062, "logps/ref_chosen": -67.68511962890625, "logps/ref_rejected": -71.32196044921875, "logps/rejected": -161.9955291748047, "loss": 1.0752, "margin_dpo/margin_mean": 29.165672302246094, "margin_dpo/margin_std": 42.47758483886719, "step": 463 }, { "epoch": 0.7014361300075586, "fcm_dpo/beta": 0.01695796474814415, "fcm_dpo/delta": -0.06330247223377228, "fcm_dpo/margin": 38.858402252197266, "fcm_dpo/q_t": 0.36157849431037903, "grad_norm": 16.41646385192871, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.6483955979347229, "logits/rejected": 0.6234833598136902, "logps/chosen": -118.78719329833984, "logps/ref_chosen": -59.16564178466797, "logps/ref_rejected": -69.56146240234375, "logps/rejected": -168.04141235351562, "loss": 1.0087, "margin_dpo/margin_mean": 38.8583984375, "margin_dpo/margin_std": 51.726806640625, "step": 464 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.016867714002728462, "fcm_dpo/delta": 0.03191475197672844, "fcm_dpo/margin": 33.78591537475586, "fcm_dpo/q_t": 0.382260799407959, "grad_norm": 19.196998596191406, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.7347100973129272, "logits/rejected": 0.6733226776123047, "logps/chosen": -122.17825317382812, "logps/ref_chosen": -58.513671875, "logps/ref_rejected": -84.31745910644531, "logps/rejected": -181.76795959472656, "loss": 1.0671, "margin_dpo/margin_mean": 33.785911560058594, "margin_dpo/margin_std": 52.060218811035156, "step": 465 }, { "epoch": 0.7044595616024187, "fcm_dpo/beta": 0.017417848110198975, "fcm_dpo/delta": 0.21429502964019775, "fcm_dpo/margin": 22.65721321105957, "fcm_dpo/q_t": 0.4179609715938568, "grad_norm": 22.925125122070312, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 0.6901623010635376, "logits/rejected": 0.6949342489242554, "logps/chosen": -140.54638671875, "logps/ref_chosen": -73.26580810546875, "logps/ref_rejected": -74.83621215820312, "logps/rejected": -164.77401733398438, "loss": 1.1498, "margin_dpo/margin_mean": 22.657211303710938, "margin_dpo/margin_std": 41.78242492675781, "step": 466 }, { "epoch": 0.7059712773998488, "fcm_dpo/beta": 0.01785450614988804, "fcm_dpo/delta": 0.07046917825937271, "fcm_dpo/margin": 29.86865997314453, "fcm_dpo/q_t": 0.3858645260334015, "grad_norm": 15.465002059936523, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 0.7200223803520203, "logits/rejected": 0.5697600841522217, "logps/chosen": -109.60458374023438, "logps/ref_chosen": -47.57947540283203, "logps/ref_rejected": -78.68522644042969, "logps/rejected": -170.57899475097656, "loss": 1.0483, "margin_dpo/margin_mean": 29.86865997314453, "margin_dpo/margin_std": 42.51439666748047, "step": 467 }, { "epoch": 0.7074829931972789, "fcm_dpo/beta": 0.017471440136432648, "fcm_dpo/delta": -0.19157786667346954, "fcm_dpo/margin": 44.475189208984375, "fcm_dpo/q_t": 0.3382449746131897, "grad_norm": 16.933467864990234, "learning_rate": 1.204480113956011e-07, "logits/chosen": 0.6419360637664795, "logits/rejected": 0.6371017694473267, "logps/chosen": -118.92312622070312, "logps/ref_chosen": -63.92778778076172, "logps/ref_rejected": -76.51626586914062, "logps/rejected": -175.98680114746094, "loss": 0.9168, "margin_dpo/margin_mean": 44.47518539428711, "margin_dpo/margin_std": 50.01071548461914, "step": 468 }, { "epoch": 0.708994708994709, "fcm_dpo/beta": 0.0168905146420002, "fcm_dpo/delta": -0.07182085514068604, "fcm_dpo/margin": 39.333194732666016, "fcm_dpo/q_t": 0.35752636194229126, "grad_norm": 16.476343154907227, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 0.7270421981811523, "logits/rejected": 0.716751217842102, "logps/chosen": -116.4421157836914, "logps/ref_chosen": -59.05818176269531, "logps/ref_rejected": -75.67672729492188, "logps/rejected": -172.39385986328125, "loss": 0.955, "margin_dpo/margin_mean": 39.333194732666016, "margin_dpo/margin_std": 43.63664245605469, "step": 469 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.01703723520040512, "fcm_dpo/delta": -0.01729283481836319, "fcm_dpo/margin": 36.148399353027344, "fcm_dpo/q_t": 0.3683815896511078, "grad_norm": 14.40355110168457, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.6840169429779053, "logits/rejected": 0.6180592179298401, "logps/chosen": -105.3543701171875, "logps/ref_chosen": -47.86743927001953, "logps/ref_rejected": -65.96859741210938, "logps/rejected": -159.60391235351562, "loss": 1.0213, "margin_dpo/margin_mean": 36.148399353027344, "margin_dpo/margin_std": 49.47236633300781, "step": 470 }, { "epoch": 0.7120181405895691, "fcm_dpo/beta": 0.01653927005827427, "fcm_dpo/delta": -0.16447000205516815, "fcm_dpo/margin": 45.47620391845703, "fcm_dpo/q_t": 0.3416385054588318, "grad_norm": 14.304550170898438, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 0.6405422687530518, "logits/rejected": 0.5829768180847168, "logps/chosen": -109.19689178466797, "logps/ref_chosen": -57.777854919433594, "logps/ref_rejected": -73.81172180175781, "logps/rejected": -170.70697021484375, "loss": 0.9287, "margin_dpo/margin_mean": 45.47620391845703, "margin_dpo/margin_std": 51.99640655517578, "step": 471 }, { "epoch": 0.7135298563869993, "fcm_dpo/beta": 0.01654931902885437, "fcm_dpo/delta": 0.019277174025774002, "fcm_dpo/margin": 35.099578857421875, "fcm_dpo/q_t": 0.3808854818344116, "grad_norm": 17.019304275512695, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 0.6810761094093323, "logits/rejected": 0.634406328201294, "logps/chosen": -110.3800048828125, "logps/ref_chosen": -55.908668518066406, "logps/ref_rejected": -74.70294189453125, "logps/rejected": -164.27386474609375, "loss": 1.0809, "margin_dpo/margin_mean": 35.099578857421875, "margin_dpo/margin_std": 56.11036682128906, "step": 472 }, { "epoch": 0.7150415721844293, "fcm_dpo/beta": 0.016645582392811775, "fcm_dpo/delta": 0.07657499611377716, "fcm_dpo/margin": 31.687740325927734, "fcm_dpo/q_t": 0.3922686278820038, "grad_norm": 19.925630569458008, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 0.7695510387420654, "logits/rejected": 0.6590477228164673, "logps/chosen": -109.32481384277344, "logps/ref_chosen": -54.16088104248047, "logps/ref_rejected": -92.76789855957031, "logps/rejected": -179.61956787109375, "loss": 1.1266, "margin_dpo/margin_mean": 31.687740325927734, "margin_dpo/margin_std": 57.10271453857422, "step": 473 }, { "epoch": 0.7165532879818595, "fcm_dpo/beta": 0.01691177673637867, "fcm_dpo/delta": 0.08062369376420975, "fcm_dpo/margin": 30.960586547851562, "fcm_dpo/q_t": 0.3930462598800659, "grad_norm": 21.025999069213867, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 0.7726494669914246, "logits/rejected": 0.6976436376571655, "logps/chosen": -108.3525390625, "logps/ref_chosen": -46.685707092285156, "logps/ref_rejected": -71.44731903076172, "logps/rejected": -164.07473754882812, "loss": 1.1183, "margin_dpo/margin_mean": 30.960586547851562, "margin_dpo/margin_std": 54.630615234375, "step": 474 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.01635439321398735, "fcm_dpo/delta": -0.20893925428390503, "fcm_dpo/margin": 48.320587158203125, "fcm_dpo/q_t": 0.3328055143356323, "grad_norm": 12.721051216125488, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.6676898002624512, "logits/rejected": 0.6170685887336731, "logps/chosen": -115.66683959960938, "logps/ref_chosen": -58.4873046875, "logps/ref_rejected": -87.00187683105469, "logps/rejected": -192.5019989013672, "loss": 0.8783, "margin_dpo/margin_mean": 48.320594787597656, "margin_dpo/margin_std": 48.11223602294922, "step": 475 }, { "epoch": 0.7195767195767195, "fcm_dpo/beta": 0.01653391122817993, "fcm_dpo/delta": 0.06583425402641296, "fcm_dpo/margin": 32.468589782714844, "fcm_dpo/q_t": 0.3886245787143707, "grad_norm": 16.372636795043945, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 0.6212706565856934, "logits/rejected": 0.6143991947174072, "logps/chosen": -143.81753540039062, "logps/ref_chosen": -75.38162231445312, "logps/ref_rejected": -76.99822235107422, "logps/rejected": -177.90272521972656, "loss": 1.0843, "margin_dpo/margin_mean": 32.468589782714844, "margin_dpo/margin_std": 51.891624450683594, "step": 476 }, { "epoch": 0.7210884353741497, "fcm_dpo/beta": 0.016756640747189522, "fcm_dpo/delta": 0.10726511478424072, "fcm_dpo/margin": 29.727947235107422, "fcm_dpo/q_t": 0.3972741961479187, "grad_norm": 19.15401840209961, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 0.6817283630371094, "logits/rejected": 0.636535108089447, "logps/chosen": -133.14248657226562, "logps/ref_chosen": -61.073387145996094, "logps/ref_rejected": -81.34375, "logps/rejected": -183.14080810546875, "loss": 1.178, "margin_dpo/margin_mean": 29.727947235107422, "margin_dpo/margin_std": 60.472991943359375, "step": 477 }, { "epoch": 0.7226001511715797, "fcm_dpo/beta": 0.017096243798732758, "fcm_dpo/delta": 0.0992036759853363, "fcm_dpo/margin": 29.590330123901367, "fcm_dpo/q_t": 0.3972923159599304, "grad_norm": 19.976335525512695, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 0.6263631582260132, "logits/rejected": 0.6319549679756165, "logps/chosen": -118.4478530883789, "logps/ref_chosen": -57.16731643676758, "logps/ref_rejected": -53.30917739868164, "logps/rejected": -144.1800537109375, "loss": 1.1515, "margin_dpo/margin_mean": 29.59033203125, "margin_dpo/margin_std": 56.895023345947266, "step": 478 }, { "epoch": 0.7241118669690099, "fcm_dpo/beta": 0.017538445070385933, "fcm_dpo/delta": 0.16506525874137878, "fcm_dpo/margin": 25.232295989990234, "fcm_dpo/q_t": 0.4069156050682068, "grad_norm": 20.780662536621094, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 0.610386848449707, "logits/rejected": 0.5595101118087769, "logps/chosen": -124.1976547241211, "logps/ref_chosen": -58.91331481933594, "logps/ref_rejected": -63.7403450012207, "logps/rejected": -154.25698852539062, "loss": 1.1754, "margin_dpo/margin_mean": 25.2322940826416, "margin_dpo/margin_std": 51.18651580810547, "step": 479 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.017718058079481125, "fcm_dpo/delta": -0.08875527232885361, "fcm_dpo/margin": 38.47267150878906, "fcm_dpo/q_t": 0.357020765542984, "grad_norm": 20.216873168945312, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.6035921573638916, "logits/rejected": 0.5966066122055054, "logps/chosen": -125.77318572998047, "logps/ref_chosen": -62.80061340332031, "logps/ref_rejected": -67.58859252929688, "logps/rejected": -169.03384399414062, "loss": 1.0383, "margin_dpo/margin_mean": 38.47267532348633, "margin_dpo/margin_std": 55.48522186279297, "step": 480 }, { "epoch": 0.72713529856387, "fcm_dpo/beta": 0.017298948019742966, "fcm_dpo/delta": -0.05451624467968941, "fcm_dpo/margin": 37.62444305419922, "fcm_dpo/q_t": 0.3680918216705322, "grad_norm": 17.83654022216797, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 0.6130036115646362, "logits/rejected": 0.6114796996116638, "logps/chosen": -126.92695617675781, "logps/ref_chosen": -65.28649139404297, "logps/ref_rejected": -70.78668212890625, "logps/rejected": -170.0515899658203, "loss": 1.0328, "margin_dpo/margin_mean": 37.62444305419922, "margin_dpo/margin_std": 55.061363220214844, "step": 481 }, { "epoch": 0.7286470143613001, "fcm_dpo/beta": 0.017456453293561935, "fcm_dpo/delta": 0.03557737171649933, "fcm_dpo/margin": 32.40789794921875, "fcm_dpo/q_t": 0.3861989974975586, "grad_norm": 21.065109252929688, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 0.6204051375389099, "logits/rejected": 0.47864723205566406, "logps/chosen": -131.6238555908203, "logps/ref_chosen": -60.906185150146484, "logps/ref_rejected": -103.44656372070312, "logps/rejected": -206.57212829589844, "loss": 1.1402, "margin_dpo/margin_mean": 32.40789794921875, "margin_dpo/margin_std": 60.17729949951172, "step": 482 }, { "epoch": 0.7301587301587301, "fcm_dpo/beta": 0.01712999865412712, "fcm_dpo/delta": -0.11436626315116882, "fcm_dpo/margin": 41.23766326904297, "fcm_dpo/q_t": 0.3495997190475464, "grad_norm": 16.394908905029297, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 0.7566468715667725, "logits/rejected": 0.6558492183685303, "logps/chosen": -112.64329528808594, "logps/ref_chosen": -53.192012786865234, "logps/ref_rejected": -81.83927154541016, "logps/rejected": -182.52822875976562, "loss": 0.9599, "margin_dpo/margin_mean": 41.23766326904297, "margin_dpo/margin_std": 50.17535400390625, "step": 483 }, { "epoch": 0.7316704459561603, "fcm_dpo/beta": 0.017164533957839012, "fcm_dpo/delta": 0.027778685092926025, "fcm_dpo/margin": 33.390480041503906, "fcm_dpo/q_t": 0.3766096830368042, "grad_norm": 18.586793899536133, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 0.7305570840835571, "logits/rejected": 0.6929386854171753, "logps/chosen": -117.24603271484375, "logps/ref_chosen": -57.76945877075195, "logps/ref_rejected": -71.6829833984375, "logps/rejected": -164.550048828125, "loss": 1.0136, "margin_dpo/margin_mean": 33.390480041503906, "margin_dpo/margin_std": 42.75909423828125, "step": 484 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.016995690762996674, "fcm_dpo/delta": 0.030734747648239136, "fcm_dpo/margin": 33.485816955566406, "fcm_dpo/q_t": 0.38060659170150757, "grad_norm": 16.272403717041016, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.6844367980957031, "logits/rejected": 0.6609545946121216, "logps/chosen": -118.37860107421875, "logps/ref_chosen": -56.63584899902344, "logps/ref_rejected": -70.85614013671875, "logps/rejected": -166.08470153808594, "loss": 1.0547, "margin_dpo/margin_mean": 33.485816955566406, "margin_dpo/margin_std": 48.70667266845703, "step": 485 }, { "epoch": 0.7346938775510204, "fcm_dpo/beta": 0.017520343884825706, "fcm_dpo/delta": 0.11381173133850098, "fcm_dpo/margin": 28.05940055847168, "fcm_dpo/q_t": 0.4005647599697113, "grad_norm": 17.28680419921875, "learning_rate": 1.007103520743035e-07, "logits/chosen": 0.7284814715385437, "logits/rejected": 0.6097695827484131, "logps/chosen": -131.38426208496094, "logps/ref_chosen": -56.347023010253906, "logps/ref_rejected": -85.97221374511719, "logps/rejected": -189.06886291503906, "loss": 1.1411, "margin_dpo/margin_mean": 28.059402465820312, "margin_dpo/margin_std": 53.089866638183594, "step": 486 }, { "epoch": 0.7362055933484505, "fcm_dpo/beta": 0.017645984888076782, "fcm_dpo/delta": 0.031026359647512436, "fcm_dpo/margin": 32.34307098388672, "fcm_dpo/q_t": 0.3809584975242615, "grad_norm": 22.65226173400879, "learning_rate": 9.965186236464046e-08, "logits/chosen": 0.7624588012695312, "logits/rejected": 0.7029759883880615, "logps/chosen": -127.072998046875, "logps/ref_chosen": -60.617218017578125, "logps/ref_rejected": -82.50975036621094, "logps/rejected": -181.30859375, "loss": 1.0653, "margin_dpo/margin_mean": 32.34307098388672, "margin_dpo/margin_std": 50.19340515136719, "step": 487 }, { "epoch": 0.7377173091458806, "fcm_dpo/beta": 0.017560675740242004, "fcm_dpo/delta": -0.09279748052358627, "fcm_dpo/margin": 39.08435821533203, "fcm_dpo/q_t": 0.35840192437171936, "grad_norm": 21.256258010864258, "learning_rate": 9.859757821558337e-08, "logits/chosen": 0.6977435946464539, "logits/rejected": 0.6269375085830688, "logps/chosen": -123.77168273925781, "logps/ref_chosen": -63.10905075073242, "logps/ref_rejected": -82.49348449707031, "logps/rejected": -182.24046325683594, "loss": 0.9753, "margin_dpo/margin_mean": 39.08435821533203, "margin_dpo/margin_std": 49.42118835449219, "step": 488 }, { "epoch": 0.7392290249433107, "fcm_dpo/beta": 0.017897412180900574, "fcm_dpo/delta": 0.2119128406047821, "fcm_dpo/margin": 22.175716400146484, "fcm_dpo/q_t": 0.4207577109336853, "grad_norm": 20.755985260009766, "learning_rate": 9.754752911772615e-08, "logits/chosen": 0.6768500208854675, "logits/rejected": 0.6314944624900818, "logps/chosen": -135.53665161132812, "logps/ref_chosen": -64.98896026611328, "logps/ref_rejected": -84.39607238769531, "logps/rejected": -177.11947631835938, "loss": 1.2272, "margin_dpo/margin_mean": 22.175716400146484, "margin_dpo/margin_std": 52.71125793457031, "step": 489 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.01822846755385399, "fcm_dpo/delta": 0.059169452637434006, "fcm_dpo/margin": 29.84263038635254, "fcm_dpo/q_t": 0.39093446731567383, "grad_norm": 17.13382339477539, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.7506576776504517, "logits/rejected": 0.7263511419296265, "logps/chosen": -123.81832885742188, "logps/ref_chosen": -61.90874481201172, "logps/ref_rejected": -70.58566284179688, "logps/rejected": -162.33787536621094, "loss": 1.188, "margin_dpo/margin_mean": 29.84263038635254, "margin_dpo/margin_std": 61.338348388671875, "step": 490 }, { "epoch": 0.7422524565381708, "fcm_dpo/beta": 0.018151359632611275, "fcm_dpo/delta": -0.016892343759536743, "fcm_dpo/margin": 33.84369659423828, "fcm_dpo/q_t": 0.3726619780063629, "grad_norm": 16.95362091064453, "learning_rate": 9.546025344484868e-08, "logits/chosen": 0.6099548935890198, "logits/rejected": 0.5560557246208191, "logps/chosen": -116.60749053955078, "logps/ref_chosen": -55.47570037841797, "logps/ref_rejected": -78.70318603515625, "logps/rejected": -173.6786651611328, "loss": 1.0401, "margin_dpo/margin_mean": 33.84369659423828, "margin_dpo/margin_std": 48.538536071777344, "step": 491 }, { "epoch": 0.7437641723356009, "fcm_dpo/beta": 0.018728461116552353, "fcm_dpo/delta": 0.10250745713710785, "fcm_dpo/margin": 26.719730377197266, "fcm_dpo/q_t": 0.400844931602478, "grad_norm": 21.921876907348633, "learning_rate": 9.442308525541589e-08, "logits/chosen": 0.6604113578796387, "logits/rejected": 0.586262583732605, "logps/chosen": -143.04898071289062, "logps/ref_chosen": -67.28638458251953, "logps/ref_rejected": -82.78628540039062, "logps/rejected": -185.26861572265625, "loss": 1.17, "margin_dpo/margin_mean": 26.719728469848633, "margin_dpo/margin_std": 53.01930236816406, "step": 492 }, { "epoch": 0.745275888133031, "fcm_dpo/beta": 0.018443478271365166, "fcm_dpo/delta": -0.12488085776567459, "fcm_dpo/margin": 38.8165283203125, "fcm_dpo/q_t": 0.35293227434158325, "grad_norm": 17.643783569335938, "learning_rate": 9.339026888672468e-08, "logits/chosen": 0.6158891916275024, "logits/rejected": 0.5381425619125366, "logps/chosen": -115.87471008300781, "logps/ref_chosen": -55.92750549316406, "logps/ref_rejected": -79.12149810791016, "logps/rejected": -177.88523864746094, "loss": 1.0033, "margin_dpo/margin_mean": 38.8165283203125, "margin_dpo/margin_std": 53.64328384399414, "step": 493 }, { "epoch": 0.7467876039304611, "fcm_dpo/beta": 0.018356945365667343, "fcm_dpo/delta": 0.07535573840141296, "fcm_dpo/margin": 28.798974990844727, "fcm_dpo/q_t": 0.3923889994621277, "grad_norm": 21.99494743347168, "learning_rate": 9.236183322886945e-08, "logits/chosen": 0.5371869802474976, "logits/rejected": 0.4848790168762207, "logps/chosen": -130.42276000976562, "logps/ref_chosen": -67.95410919189453, "logps/ref_rejected": -90.50865173339844, "logps/rejected": -181.7762908935547, "loss": 1.1839, "margin_dpo/margin_mean": 28.798974990844727, "margin_dpo/margin_std": 59.69166564941406, "step": 494 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.01888749748468399, "fcm_dpo/delta": 0.10935745388269424, "fcm_dpo/margin": 26.206092834472656, "fcm_dpo/q_t": 0.4023835062980652, "grad_norm": 23.057941436767578, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.7392085790634155, "logits/rejected": 0.6697524785995483, "logps/chosen": -114.02333068847656, "logps/ref_chosen": -52.62546157836914, "logps/ref_rejected": -72.06781005859375, "logps/rejected": -159.6717529296875, "loss": 1.1555, "margin_dpo/margin_mean": 26.206092834472656, "margin_dpo/margin_std": 51.413429260253906, "step": 495 }, { "epoch": 0.7498110355253212, "fcm_dpo/beta": 0.01871339976787567, "fcm_dpo/delta": -0.004845082759857178, "fcm_dpo/margin": 32.221229553222656, "fcm_dpo/q_t": 0.3820890784263611, "grad_norm": 18.55131721496582, "learning_rate": 9.031821899254797e-08, "logits/chosen": 0.7021945118904114, "logits/rejected": 0.5876121520996094, "logps/chosen": -126.39350891113281, "logps/ref_chosen": -57.597320556640625, "logps/ref_rejected": -94.36127471923828, "logps/rejected": -195.37869262695312, "loss": 1.0993, "margin_dpo/margin_mean": 32.22122573852539, "margin_dpo/margin_std": 54.520545959472656, "step": 496 }, { "epoch": 0.7513227513227513, "fcm_dpo/beta": 0.018337702378630638, "fcm_dpo/delta": -0.17800766229629517, "fcm_dpo/margin": 41.654964447021484, "fcm_dpo/q_t": 0.3428105115890503, "grad_norm": 16.170692443847656, "learning_rate": 8.930309757836516e-08, "logits/chosen": 0.7111356258392334, "logits/rejected": 0.678577721118927, "logps/chosen": -141.17437744140625, "logps/ref_chosen": -72.78994750976562, "logps/ref_rejected": -89.48483276367188, "logps/rejected": -199.5242156982422, "loss": 0.9558, "margin_dpo/margin_mean": 41.654964447021484, "margin_dpo/margin_std": 52.02744674682617, "step": 497 }, { "epoch": 0.7528344671201814, "fcm_dpo/beta": 0.018001677468419075, "fcm_dpo/delta": -0.08155323565006256, "fcm_dpo/margin": 37.55533981323242, "fcm_dpo/q_t": 0.3615556061267853, "grad_norm": 23.24351692199707, "learning_rate": 8.829247120198563e-08, "logits/chosen": 0.695479154586792, "logits/rejected": 0.6608887910842896, "logps/chosen": -129.8267364501953, "logps/ref_chosen": -68.36572265625, "logps/ref_rejected": -71.28846740722656, "logps/rejected": -170.30484008789062, "loss": 0.9793, "margin_dpo/margin_mean": 37.55533981323242, "margin_dpo/margin_std": 48.39889144897461, "step": 498 }, { "epoch": 0.7543461829176115, "fcm_dpo/beta": 0.017900779843330383, "fcm_dpo/delta": -0.03278336673974991, "fcm_dpo/margin": 35.225341796875, "fcm_dpo/q_t": 0.3726680278778076, "grad_norm": 19.511375427246094, "learning_rate": 8.728636813280163e-08, "logits/chosen": 0.6967817544937134, "logits/rejected": 0.6287128925323486, "logps/chosen": -120.04055786132812, "logps/ref_chosen": -61.90882873535156, "logps/ref_rejected": -91.9411392211914, "logps/rejected": -185.29820251464844, "loss": 1.0937, "margin_dpo/margin_mean": 35.225341796875, "margin_dpo/margin_std": 58.685157775878906, "step": 499 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.017738111317157745, "fcm_dpo/delta": -0.015023061074316502, "fcm_dpo/margin": 34.61691665649414, "fcm_dpo/q_t": 0.37329190969467163, "grad_norm": 23.707897186279297, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.6063783168792725, "logits/rejected": 0.6012179851531982, "logps/chosen": -134.2138671875, "logps/ref_chosen": -70.225830078125, "logps/ref_rejected": -71.72203063964844, "logps/rejected": -170.32699584960938, "loss": 1.078, "margin_dpo/margin_mean": 34.61691665649414, "margin_dpo/margin_std": 55.942474365234375, "step": 500 }, { "epoch": 0.7573696145124716, "fcm_dpo/beta": 0.018007531762123108, "fcm_dpo/delta": 0.06094428896903992, "fcm_dpo/margin": 30.07050323486328, "fcm_dpo/q_t": 0.3839370608329773, "grad_norm": 13.959453582763672, "learning_rate": 8.528784436016878e-08, "logits/chosen": 0.6482034921646118, "logits/rejected": 0.6431034803390503, "logps/chosen": -127.50128173828125, "logps/ref_chosen": -64.59880828857422, "logps/ref_rejected": -70.59329223632812, "logps/rejected": -163.5662841796875, "loss": 1.0386, "margin_dpo/margin_mean": 30.07050323486328, "margin_dpo/margin_std": 40.626930236816406, "step": 501 }, { "epoch": 0.7588813303099018, "fcm_dpo/beta": 0.01812717691063881, "fcm_dpo/delta": 0.0442255400121212, "fcm_dpo/margin": 30.785253524780273, "fcm_dpo/q_t": 0.3837216794490814, "grad_norm": 19.163633346557617, "learning_rate": 8.4295479559726e-08, "logits/chosen": 0.6712077856063843, "logits/rejected": 0.6248115301132202, "logps/chosen": -126.57681274414062, "logps/ref_chosen": -65.46662902832031, "logps/ref_rejected": -90.22233581542969, "logps/rejected": -182.11776733398438, "loss": 1.0425, "margin_dpo/margin_mean": 30.78525161743164, "margin_dpo/margin_std": 44.158851623535156, "step": 502 }, { "epoch": 0.7603930461073318, "fcm_dpo/beta": 0.017972594127058983, "fcm_dpo/delta": -0.040539998561143875, "fcm_dpo/margin": 35.48341369628906, "fcm_dpo/q_t": 0.3686982989311218, "grad_norm": 16.06653594970703, "learning_rate": 8.330774987092712e-08, "logits/chosen": 0.6875156164169312, "logits/rejected": 0.6794095039367676, "logps/chosen": -108.67987060546875, "logps/ref_chosen": -51.83476257324219, "logps/ref_rejected": -57.62522506713867, "logps/rejected": -149.9537353515625, "loss": 1.0504, "margin_dpo/margin_mean": 35.4834098815918, "margin_dpo/margin_std": 53.49339294433594, "step": 503 }, { "epoch": 0.7619047619047619, "fcm_dpo/beta": 0.017518125474452972, "fcm_dpo/delta": -0.2152797430753708, "fcm_dpo/margin": 45.586830139160156, "fcm_dpo/q_t": 0.3326077163219452, "grad_norm": 21.208383560180664, "learning_rate": 8.232468292269479e-08, "logits/chosen": 0.6347143650054932, "logits/rejected": 0.6134928464889526, "logps/chosen": -126.3889389038086, "logps/ref_chosen": -68.65119934082031, "logps/ref_rejected": -77.91394805908203, "logps/rejected": -181.238525390625, "loss": 0.887, "margin_dpo/margin_mean": 45.586830139160156, "margin_dpo/margin_std": 47.29257583618164, "step": 504 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.01733359880745411, "fcm_dpo/delta": 0.10931959003210068, "fcm_dpo/margin": 28.567665100097656, "fcm_dpo/q_t": 0.4036433696746826, "grad_norm": 19.609338760375977, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.6816761493682861, "logits/rejected": 0.6424638032913208, "logps/chosen": -120.88430786132812, "logps/ref_chosen": -59.99884796142578, "logps/ref_rejected": -76.88048553466797, "logps/rejected": -166.33358764648438, "loss": 1.1797, "margin_dpo/margin_mean": 28.567665100097656, "margin_dpo/margin_std": 57.868568420410156, "step": 505 }, { "epoch": 0.764928193499622, "fcm_dpo/beta": 0.017731061205267906, "fcm_dpo/delta": 0.0775858610868454, "fcm_dpo/margin": 29.690635681152344, "fcm_dpo/q_t": 0.3898463249206543, "grad_norm": 19.283348083496094, "learning_rate": 8.037264711071698e-08, "logits/chosen": 0.6876957416534424, "logits/rejected": 0.6667051315307617, "logps/chosen": -128.15101623535156, "logps/ref_chosen": -70.07130432128906, "logps/ref_rejected": -82.03775024414062, "logps/rejected": -169.80810546875, "loss": 1.1416, "margin_dpo/margin_mean": 29.690635681152344, "margin_dpo/margin_std": 55.41517639160156, "step": 506 }, { "epoch": 0.7664399092970522, "fcm_dpo/beta": 0.01782643049955368, "fcm_dpo/delta": -0.006546961143612862, "fcm_dpo/margin": 33.99785614013672, "fcm_dpo/q_t": 0.3816778361797333, "grad_norm": 19.276065826416016, "learning_rate": 7.940373284960933e-08, "logits/chosen": 0.6648691892623901, "logits/rejected": 0.6145754456520081, "logps/chosen": -137.01760864257812, "logps/ref_chosen": -72.00703430175781, "logps/ref_rejected": -93.94987487792969, "logps/rejected": -192.9582977294922, "loss": 1.0757, "margin_dpo/margin_mean": 33.99785614013672, "margin_dpo/margin_std": 55.21949005126953, "step": 507 }, { "epoch": 0.7679516250944822, "fcm_dpo/beta": 0.017841465771198273, "fcm_dpo/delta": -0.05562988296151161, "fcm_dpo/margin": 36.50687789916992, "fcm_dpo/q_t": 0.3688430190086365, "grad_norm": 19.247188568115234, "learning_rate": 7.843959053281663e-08, "logits/chosen": 0.6344867944717407, "logits/rejected": 0.5096554160118103, "logps/chosen": -118.91818237304688, "logps/ref_chosen": -60.21992492675781, "logps/ref_rejected": -95.9200668334961, "logps/rejected": -191.1251983642578, "loss": 1.0282, "margin_dpo/margin_mean": 36.50688171386719, "margin_dpo/margin_std": 52.37531280517578, "step": 508 }, { "epoch": 0.7694633408919124, "fcm_dpo/beta": 0.017571065574884415, "fcm_dpo/delta": -0.003388401120901108, "fcm_dpo/margin": 34.30541229248047, "fcm_dpo/q_t": 0.37258362770080566, "grad_norm": 18.990793228149414, "learning_rate": 7.748024712947204e-08, "logits/chosen": 0.6367828249931335, "logits/rejected": 0.6140943765640259, "logps/chosen": -127.34040069580078, "logps/ref_chosen": -66.27017211914062, "logps/ref_rejected": -71.73065185546875, "logps/rejected": -167.10629272460938, "loss": 1.029, "margin_dpo/margin_mean": 34.30541229248047, "margin_dpo/margin_std": 47.9427604675293, "step": 509 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.01737632229924202, "fcm_dpo/delta": -0.06894849985837936, "fcm_dpo/margin": 38.18497085571289, "fcm_dpo/q_t": 0.3653348386287689, "grad_norm": 17.3432674407959, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.7944908142089844, "logits/rejected": 0.6980875730514526, "logps/chosen": -113.6026840209961, "logps/ref_chosen": -53.54487609863281, "logps/ref_rejected": -91.36648559570312, "logps/rejected": -189.6092529296875, "loss": 1.0255, "margin_dpo/margin_mean": 38.184967041015625, "margin_dpo/margin_std": 54.98851013183594, "step": 510 }, { "epoch": 0.7724867724867724, "fcm_dpo/beta": 0.017056778073310852, "fcm_dpo/delta": -0.15736544132232666, "fcm_dpo/margin": 43.72998809814453, "fcm_dpo/q_t": 0.3473592698574066, "grad_norm": 18.05554962158203, "learning_rate": 7.557606426772961e-08, "logits/chosen": 0.685520589351654, "logits/rejected": 0.6331069469451904, "logps/chosen": -116.88287353515625, "logps/ref_chosen": -55.844383239746094, "logps/ref_rejected": -86.49819946289062, "logps/rejected": -191.2666778564453, "loss": 0.9905, "margin_dpo/margin_mean": 43.72998809814453, "margin_dpo/margin_std": 58.842472076416016, "step": 511 }, { "epoch": 0.7739984882842026, "fcm_dpo/beta": 0.01700318232178688, "fcm_dpo/delta": 0.05129774659872055, "fcm_dpo/margin": 32.43523406982422, "fcm_dpo/q_t": 0.388268381357193, "grad_norm": 20.476276397705078, "learning_rate": 7.463127807341966e-08, "logits/chosen": 0.5704078674316406, "logits/rejected": 0.5559252500534058, "logps/chosen": -120.79446411132812, "logps/ref_chosen": -61.653038024902344, "logps/ref_rejected": -72.83148193359375, "logps/rejected": -164.40814208984375, "loss": 1.111, "margin_dpo/margin_mean": 32.43523406982422, "margin_dpo/margin_std": 56.39421081542969, "step": 512 }, { "epoch": 0.7755102040816326, "fcm_dpo/beta": 0.01689431443810463, "fcm_dpo/delta": -0.01903168112039566, "fcm_dpo/margin": 36.52797317504883, "fcm_dpo/q_t": 0.37010854482650757, "grad_norm": 15.648270606994629, "learning_rate": 7.369139731924401e-08, "logits/chosen": 0.7794561386108398, "logits/rejected": 0.7309125065803528, "logps/chosen": -104.30062866210938, "logps/ref_chosen": -50.85256576538086, "logps/ref_rejected": -69.21754455566406, "logps/rejected": -159.19357299804688, "loss": 0.9905, "margin_dpo/margin_mean": 36.527976989746094, "margin_dpo/margin_std": 45.804161071777344, "step": 513 }, { "epoch": 0.7770219198790628, "fcm_dpo/beta": 0.016719449311494827, "fcm_dpo/delta": -0.12295001745223999, "fcm_dpo/margin": 42.724876403808594, "fcm_dpo/q_t": 0.34755611419677734, "grad_norm": 15.384542465209961, "learning_rate": 7.275644829568747e-08, "logits/chosen": 0.7162846326828003, "logits/rejected": 0.6777454614639282, "logps/chosen": -129.981201171875, "logps/ref_chosen": -69.38493347167969, "logps/ref_rejected": -83.32447814941406, "logps/rejected": -186.64561462402344, "loss": 0.9488, "margin_dpo/margin_mean": 42.724876403808594, "margin_dpo/margin_std": 50.2280387878418, "step": 514 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.016596363857388496, "fcm_dpo/delta": 0.032611072063446045, "fcm_dpo/margin": 34.295352935791016, "fcm_dpo/q_t": 0.38189035654067993, "grad_norm": 19.570980072021484, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.7053956985473633, "logits/rejected": 0.6231397390365601, "logps/chosen": -123.07106018066406, "logps/ref_chosen": -53.687034606933594, "logps/ref_rejected": -83.59614562988281, "logps/rejected": -187.27552795410156, "loss": 1.0657, "margin_dpo/margin_mean": 34.295352935791016, "margin_dpo/margin_std": 52.851593017578125, "step": 515 }, { "epoch": 0.780045351473923, "fcm_dpo/beta": 0.01694701611995697, "fcm_dpo/delta": 0.09288481622934341, "fcm_dpo/margin": 30.182415008544922, "fcm_dpo/q_t": 0.3905951678752899, "grad_norm": 21.40145492553711, "learning_rate": 7.090144991188568e-08, "logits/chosen": 0.6563661098480225, "logits/rejected": 0.6072605848312378, "logps/chosen": -114.33444213867188, "logps/ref_chosen": -56.9017219543457, "logps/ref_rejected": -67.83477783203125, "logps/rejected": -155.44992065429688, "loss": 1.1149, "margin_dpo/margin_mean": 30.182415008544922, "margin_dpo/margin_std": 52.542152404785156, "step": 516 }, { "epoch": 0.781557067271353, "fcm_dpo/beta": 0.017495661973953247, "fcm_dpo/delta": 0.16309672594070435, "fcm_dpo/margin": 25.303401947021484, "fcm_dpo/q_t": 0.4092411398887634, "grad_norm": 20.3939208984375, "learning_rate": 6.998145243993284e-08, "logits/chosen": 0.7242580652236938, "logits/rejected": 0.7175397872924805, "logps/chosen": -131.94554138183594, "logps/ref_chosen": -61.775142669677734, "logps/ref_rejected": -62.88270950317383, "logps/rejected": -158.35650634765625, "loss": 1.1588, "margin_dpo/margin_mean": 25.303401947021484, "margin_dpo/margin_std": 49.52540969848633, "step": 517 }, { "epoch": 0.783068783068783, "fcm_dpo/beta": 0.01770472340285778, "fcm_dpo/delta": 0.0501800999045372, "fcm_dpo/margin": 31.204975128173828, "fcm_dpo/q_t": 0.38718700408935547, "grad_norm": 16.25372886657715, "learning_rate": 6.906649047373245e-08, "logits/chosen": 0.6553936004638672, "logits/rejected": 0.6047691106796265, "logps/chosen": -118.64441680908203, "logps/ref_chosen": -62.02523422241211, "logps/ref_rejected": -79.06085205078125, "logps/rejected": -166.885009765625, "loss": 1.0764, "margin_dpo/margin_mean": 31.204975128173828, "margin_dpo/margin_std": 49.72199249267578, "step": 518 }, { "epoch": 0.7845804988662132, "fcm_dpo/beta": 0.018285606056451797, "fcm_dpo/delta": 0.1760295182466507, "fcm_dpo/margin": 23.527294158935547, "fcm_dpo/q_t": 0.41504037380218506, "grad_norm": 29.17710304260254, "learning_rate": 6.815658960673781e-08, "logits/chosen": 0.7163988351821899, "logits/rejected": 0.6581120491027832, "logps/chosen": -132.91104125976562, "logps/ref_chosen": -61.60636901855469, "logps/ref_rejected": -74.50727844238281, "logps/rejected": -169.33924865722656, "loss": 1.2797, "margin_dpo/margin_mean": 23.527294158935547, "margin_dpo/margin_std": 61.222747802734375, "step": 519 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.018460825085639954, "fcm_dpo/delta": 0.09022289514541626, "fcm_dpo/margin": 27.83536148071289, "fcm_dpo/q_t": 0.39378347992897034, "grad_norm": 21.448122024536133, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.7613925337791443, "logits/rejected": 0.7015221118927002, "logps/chosen": -126.41165161132812, "logps/ref_chosen": -62.87343215942383, "logps/ref_rejected": -76.505615234375, "logps/rejected": -167.8791961669922, "loss": 1.1113, "margin_dpo/margin_mean": 27.83536148071289, "margin_dpo/margin_std": 47.354896545410156, "step": 520 }, { "epoch": 0.7876039304610734, "fcm_dpo/beta": 0.018552154302597046, "fcm_dpo/delta": -0.07973211258649826, "fcm_dpo/margin": 36.347511291503906, "fcm_dpo/q_t": 0.3598254919052124, "grad_norm": 21.132991790771484, "learning_rate": 6.63520728356167e-08, "logits/chosen": 0.5479804277420044, "logits/rejected": 0.47508206963539124, "logps/chosen": -129.48300170898438, "logps/ref_chosen": -64.20668029785156, "logps/ref_rejected": -92.28083038330078, "logps/rejected": -193.9046630859375, "loss": 1.0013, "margin_dpo/margin_mean": 36.347511291503906, "margin_dpo/margin_std": 49.076534271240234, "step": 521 }, { "epoch": 0.7891156462585034, "fcm_dpo/beta": 0.018455319106578827, "fcm_dpo/delta": 0.04709668457508087, "fcm_dpo/margin": 30.094348907470703, "fcm_dpo/q_t": 0.39099377393722534, "grad_norm": 21.805530548095703, "learning_rate": 6.545750740770336e-08, "logits/chosen": 0.6754323244094849, "logits/rejected": 0.6666183471679688, "logps/chosen": -121.40290832519531, "logps/ref_chosen": -58.369720458984375, "logps/ref_rejected": -68.79248046875, "logps/rejected": -161.92001342773438, "loss": 1.1803, "margin_dpo/margin_mean": 30.09434700012207, "margin_dpo/margin_std": 61.12078857421875, "step": 522 }, { "epoch": 0.7906273620559335, "fcm_dpo/beta": 0.018495675176382065, "fcm_dpo/delta": 0.007439367473125458, "fcm_dpo/margin": 32.03522491455078, "fcm_dpo/q_t": 0.37477177381515503, "grad_norm": 25.171443939208984, "learning_rate": 6.456810403001012e-08, "logits/chosen": 0.6811109781265259, "logits/rejected": 0.5619876980781555, "logps/chosen": -133.40420532226562, "logps/ref_chosen": -65.71324157714844, "logps/ref_rejected": -91.98896789550781, "logps/rejected": -191.71514892578125, "loss": 1.1145, "margin_dpo/margin_mean": 32.03522491455078, "margin_dpo/margin_std": 55.60651779174805, "step": 523 }, { "epoch": 0.7921390778533636, "fcm_dpo/beta": 0.018733873963356018, "fcm_dpo/delta": 0.011134952306747437, "fcm_dpo/margin": 31.432987213134766, "fcm_dpo/q_t": 0.37468042969703674, "grad_norm": 18.025999069213867, "learning_rate": 6.368388758106134e-08, "logits/chosen": 0.6386536359786987, "logits/rejected": 0.6183046102523804, "logps/chosen": -132.13656616210938, "logps/ref_chosen": -76.35124969482422, "logps/ref_rejected": -89.96072387695312, "logps/rejected": -177.1790313720703, "loss": 1.0466, "margin_dpo/margin_mean": 31.432987213134766, "margin_dpo/margin_std": 45.8270378112793, "step": 524 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.018994202837347984, "fcm_dpo/delta": 0.12279119342565536, "fcm_dpo/margin": 25.432735443115234, "fcm_dpo/q_t": 0.40213990211486816, "grad_norm": 23.485536575317383, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.5125986337661743, "logits/rejected": 0.5042914748191833, "logps/chosen": -138.84378051757812, "logps/ref_chosen": -75.49578857421875, "logps/ref_rejected": -84.04852294921875, "logps/rejected": -172.82925415039062, "loss": 1.1386, "margin_dpo/margin_mean": 25.432735443115234, "margin_dpo/margin_std": 47.67351531982422, "step": 525 }, { "epoch": 0.7951625094482238, "fcm_dpo/beta": 0.01910669542849064, "fcm_dpo/delta": 0.09421326220035553, "fcm_dpo/margin": 26.58679962158203, "fcm_dpo/q_t": 0.39384567737579346, "grad_norm": 20.159889221191406, "learning_rate": 6.193111425735515e-08, "logits/chosen": 0.6956998109817505, "logits/rejected": 0.6291480660438538, "logps/chosen": -128.66256713867188, "logps/ref_chosen": -61.29241943359375, "logps/ref_rejected": -82.47763061523438, "logps/rejected": -176.4345703125, "loss": 1.1305, "margin_dpo/margin_mean": 26.58679962158203, "margin_dpo/margin_std": 45.9437255859375, "step": 526 }, { "epoch": 0.7966742252456538, "fcm_dpo/beta": 0.020175091922283173, "fcm_dpo/delta": 0.2082231193780899, "fcm_dpo/margin": 19.762725830078125, "fcm_dpo/q_t": 0.42157065868377686, "grad_norm": 24.948040008544922, "learning_rate": 6.106260641143546e-08, "logits/chosen": 0.7550373077392578, "logits/rejected": 0.6782054901123047, "logps/chosen": -134.86746215820312, "logps/ref_chosen": -61.472625732421875, "logps/ref_rejected": -90.52831268310547, "logps/rejected": -183.68588256835938, "loss": 1.2637, "margin_dpo/margin_mean": 19.762725830078125, "margin_dpo/margin_std": 51.479583740234375, "step": 527 }, { "epoch": 0.7981859410430839, "fcm_dpo/beta": 0.020768921822309494, "fcm_dpo/delta": 0.18035244941711426, "fcm_dpo/margin": 20.595016479492188, "fcm_dpo/q_t": 0.4134521782398224, "grad_norm": 26.238304138183594, "learning_rate": 6.019938355056422e-08, "logits/chosen": 0.5912600159645081, "logits/rejected": 0.5084381103515625, "logps/chosen": -124.909912109375, "logps/ref_chosen": -58.792015075683594, "logps/ref_rejected": -71.82516479492188, "logps/rejected": -158.5380859375, "loss": 1.2727, "margin_dpo/margin_mean": 20.59501838684082, "margin_dpo/margin_std": 53.321876525878906, "step": 528 }, { "epoch": 0.799697656840514, "fcm_dpo/beta": 0.01942495070397854, "fcm_dpo/delta": -0.47512802481651306, "fcm_dpo/margin": 52.53473663330078, "fcm_dpo/q_t": 0.29438188672065735, "grad_norm": 17.863595962524414, "learning_rate": 5.934146982094049e-08, "logits/chosen": 0.602439284324646, "logits/rejected": 0.5486577153205872, "logps/chosen": -114.20620727539062, "logps/ref_chosen": -55.070960998535156, "logps/ref_rejected": -75.44007873535156, "logps/rejected": -187.1100616455078, "loss": 0.8219, "margin_dpo/margin_mean": 52.53473663330078, "margin_dpo/margin_std": 52.704856872558594, "step": 529 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.019187599420547485, "fcm_dpo/delta": 0.0037046317011117935, "fcm_dpo/margin": 31.082916259765625, "fcm_dpo/q_t": 0.37992024421691895, "grad_norm": 22.0632266998291, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.6941254734992981, "logits/rejected": 0.654128909111023, "logps/chosen": -118.39797973632812, "logps/ref_chosen": -56.743812561035156, "logps/ref_rejected": -76.6692123413086, "logps/rejected": -169.40631103515625, "loss": 1.0404, "margin_dpo/margin_mean": 31.082918167114258, "margin_dpo/margin_std": 45.24073028564453, "step": 530 }, { "epoch": 0.8027210884353742, "fcm_dpo/beta": 0.019356567412614822, "fcm_dpo/delta": -0.005651660263538361, "fcm_dpo/margin": 31.139101028442383, "fcm_dpo/q_t": 0.37849652767181396, "grad_norm": 21.71436309814453, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 0.6697413921356201, "logits/rejected": 0.6052749156951904, "logps/chosen": -115.15210723876953, "logps/ref_chosen": -51.116455078125, "logps/ref_rejected": -79.52884674072266, "logps/rejected": -174.70359802246094, "loss": 1.0816, "margin_dpo/margin_mean": 31.139101028442383, "margin_dpo/margin_std": 49.85601806640625, "step": 531 }, { "epoch": 0.8042328042328042, "fcm_dpo/beta": 0.018879860639572144, "fcm_dpo/delta": -0.07339326292276382, "fcm_dpo/margin": 35.395713806152344, "fcm_dpo/q_t": 0.36597567796707153, "grad_norm": 19.15670394897461, "learning_rate": 5.679982264990424e-08, "logits/chosen": 0.6101032495498657, "logits/rejected": 0.559300422668457, "logps/chosen": -132.02627563476562, "logps/ref_chosen": -58.279945373535156, "logps/ref_rejected": -78.05426788330078, "logps/rejected": -187.19631958007812, "loss": 1.0516, "margin_dpo/margin_mean": 35.395713806152344, "margin_dpo/margin_std": 54.57707977294922, "step": 532 }, { "epoch": 0.8057445200302343, "fcm_dpo/beta": 0.01890089176595211, "fcm_dpo/delta": 0.00393468514084816, "fcm_dpo/margin": 31.543041229248047, "fcm_dpo/q_t": 0.37516263127326965, "grad_norm": 16.823314666748047, "learning_rate": 5.596338392706076e-08, "logits/chosen": 0.7527922987937927, "logits/rejected": 0.692599356174469, "logps/chosen": -107.6111831665039, "logps/ref_chosen": -56.41801071166992, "logps/ref_rejected": -73.89324951171875, "logps/rejected": -156.62945556640625, "loss": 1.0561, "margin_dpo/margin_mean": 31.543039321899414, "margin_dpo/margin_std": 48.05528259277344, "step": 533 }, { "epoch": 0.8072562358276644, "fcm_dpo/beta": 0.018499422818422318, "fcm_dpo/delta": -0.0869862511754036, "fcm_dpo/margin": 36.743194580078125, "fcm_dpo/q_t": 0.36393046379089355, "grad_norm": 17.313426971435547, "learning_rate": 5.513237282548033e-08, "logits/chosen": 0.677596926689148, "logits/rejected": 0.6374635100364685, "logps/chosen": -120.32328033447266, "logps/ref_chosen": -60.748687744140625, "logps/ref_rejected": -73.8623046875, "logps/rejected": -170.18008422851562, "loss": 1.0367, "margin_dpo/margin_mean": 36.743194580078125, "margin_dpo/margin_std": 54.76251983642578, "step": 534 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.018779411911964417, "fcm_dpo/delta": 0.06701276451349258, "fcm_dpo/margin": 28.552780151367188, "fcm_dpo/q_t": 0.39095014333724976, "grad_norm": 20.778942108154297, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.5685528516769409, "logits/rejected": 0.5054241418838501, "logps/chosen": -132.29876708984375, "logps/ref_chosen": -61.637413024902344, "logps/ref_rejected": -80.93138885498047, "logps/rejected": -180.1455078125, "loss": 1.1322, "margin_dpo/margin_mean": 28.552780151367188, "margin_dpo/margin_std": 52.61593246459961, "step": 535 }, { "epoch": 0.8102796674225246, "fcm_dpo/beta": 0.01826276257634163, "fcm_dpo/delta": -0.18799223005771637, "fcm_dpo/margin": 42.34637451171875, "fcm_dpo/q_t": 0.33896517753601074, "grad_norm": 15.240792274475098, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 0.7089515924453735, "logits/rejected": 0.6250416040420532, "logps/chosen": -112.83555603027344, "logps/ref_chosen": -51.88897705078125, "logps/ref_rejected": -73.34864044189453, "logps/rejected": -176.6416015625, "loss": 0.891, "margin_dpo/margin_mean": 42.34637451171875, "margin_dpo/margin_std": 44.537410736083984, "step": 536 }, { "epoch": 0.8117913832199547, "fcm_dpo/beta": 0.018009407445788383, "fcm_dpo/delta": 0.005187440663576126, "fcm_dpo/margin": 33.01459503173828, "fcm_dpo/q_t": 0.38105130195617676, "grad_norm": 16.58041763305664, "learning_rate": 5.267213693697695e-08, "logits/chosen": 0.7628644704818726, "logits/rejected": 0.6717967987060547, "logps/chosen": -124.62332916259766, "logps/ref_chosen": -54.248619079589844, "logps/ref_rejected": -94.94343566894531, "logps/rejected": -198.33274841308594, "loss": 1.0657, "margin_dpo/margin_mean": 33.01459884643555, "margin_dpo/margin_std": 51.61365509033203, "step": 537 }, { "epoch": 0.8133030990173847, "fcm_dpo/beta": 0.018021808937191963, "fcm_dpo/delta": -0.08093195408582687, "fcm_dpo/margin": 37.45811462402344, "fcm_dpo/q_t": 0.3603672385215759, "grad_norm": 18.360652923583984, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 0.6641885042190552, "logits/rejected": 0.6308572292327881, "logps/chosen": -136.5439453125, "logps/ref_chosen": -70.09353637695312, "logps/ref_rejected": -79.49833679199219, "logps/rejected": -183.40684509277344, "loss": 0.999, "margin_dpo/margin_mean": 37.45811462402344, "margin_dpo/margin_std": 50.08565139770508, "step": 538 }, { "epoch": 0.8148148148148148, "fcm_dpo/beta": 0.01791461370885372, "fcm_dpo/delta": 0.06643770635128021, "fcm_dpo/margin": 29.97534942626953, "fcm_dpo/q_t": 0.38791027665138245, "grad_norm": 18.469646453857422, "learning_rate": 5.105953986729195e-08, "logits/chosen": 0.6261240243911743, "logits/rejected": 0.5512057542800903, "logps/chosen": -132.72520446777344, "logps/ref_chosen": -61.93169403076172, "logps/ref_rejected": -84.08946228027344, "logps/rejected": -184.8583221435547, "loss": 1.0637, "margin_dpo/margin_mean": 29.97534942626953, "margin_dpo/margin_std": 45.154361724853516, "step": 539 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.017926346510648727, "fcm_dpo/delta": -0.12926867604255676, "fcm_dpo/margin": 40.096641540527344, "fcm_dpo/q_t": 0.3533179759979248, "grad_norm": 17.8641414642334, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.6939501762390137, "logits/rejected": 0.6017695069313049, "logps/chosen": -126.96577453613281, "logps/ref_chosen": -62.704254150390625, "logps/ref_rejected": -95.63597106933594, "logps/rejected": -199.994140625, "loss": 0.9672, "margin_dpo/margin_mean": 40.096641540527344, "margin_dpo/margin_std": 49.30036926269531, "step": 540 }, { "epoch": 0.817838246409675, "fcm_dpo/beta": 0.017087796702980995, "fcm_dpo/delta": -0.13908647000789642, "fcm_dpo/margin": 42.50551986694336, "fcm_dpo/q_t": 0.3478375971317291, "grad_norm": 15.025036811828613, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 0.6994584202766418, "logits/rejected": 0.7143793106079102, "logps/chosen": -123.77883911132812, "logps/ref_chosen": -62.48084259033203, "logps/ref_rejected": -57.55541229248047, "logps/rejected": -161.3589324951172, "loss": 0.9436, "margin_dpo/margin_mean": 42.50551986694336, "margin_dpo/margin_std": 49.231876373291016, "step": 541 }, { "epoch": 0.8193499622071051, "fcm_dpo/beta": 0.01672377809882164, "fcm_dpo/delta": -0.1440107524394989, "fcm_dpo/margin": 43.837928771972656, "fcm_dpo/q_t": 0.34867095947265625, "grad_norm": 15.291959762573242, "learning_rate": 4.868243561723534e-08, "logits/chosen": 0.7241077423095703, "logits/rejected": 0.6722509860992432, "logps/chosen": -102.95439910888672, "logps/ref_chosen": -49.454891204833984, "logps/ref_rejected": -65.33275604248047, "logps/rejected": -162.67019653320312, "loss": 0.968, "margin_dpo/margin_mean": 43.837928771972656, "margin_dpo/margin_std": 55.83599853515625, "step": 542 }, { "epoch": 0.8208616780045351, "fcm_dpo/beta": 0.016603415831923485, "fcm_dpo/delta": -0.038653384894132614, "fcm_dpo/margin": 38.306365966796875, "fcm_dpo/q_t": 0.3619435131549835, "grad_norm": 12.996235847473145, "learning_rate": 4.790130070827028e-08, "logits/chosen": 0.6842800378799438, "logits/rejected": 0.5959610939025879, "logps/chosen": -111.56507873535156, "logps/ref_chosen": -51.100860595703125, "logps/ref_rejected": -76.06130981445312, "logps/rejected": -174.83189392089844, "loss": 1.0011, "margin_dpo/margin_mean": 38.306365966796875, "margin_dpo/margin_std": 49.799415588378906, "step": 543 }, { "epoch": 0.8223733938019653, "fcm_dpo/beta": 0.0162653811275959, "fcm_dpo/delta": -0.1092371717095375, "fcm_dpo/margin": 43.14006805419922, "fcm_dpo/q_t": 0.35268715023994446, "grad_norm": 14.857081413269043, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 0.672330379486084, "logits/rejected": 0.5996150374412537, "logps/chosen": -124.62445068359375, "logps/ref_chosen": -60.2772331237793, "logps/ref_rejected": -88.40553283691406, "logps/rejected": -195.892822265625, "loss": 0.9689, "margin_dpo/margin_mean": 43.14006805419922, "margin_dpo/margin_std": 53.813167572021484, "step": 544 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.01642528548836708, "fcm_dpo/delta": 0.12011280655860901, "fcm_dpo/margin": 29.575916290283203, "fcm_dpo/q_t": 0.39873120188713074, "grad_norm": 17.39080047607422, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.6350932121276855, "logits/rejected": 0.5763211846351624, "logps/chosen": -130.05453491210938, "logps/ref_chosen": -61.61524963378906, "logps/ref_rejected": -78.71266174316406, "logps/rejected": -176.72787475585938, "loss": 1.1295, "margin_dpo/margin_mean": 29.575916290283203, "margin_dpo/margin_std": 53.032081604003906, "step": 545 }, { "epoch": 0.8253968253968254, "fcm_dpo/beta": 0.016498159617185593, "fcm_dpo/delta": 0.004416411742568016, "fcm_dpo/margin": 36.10563659667969, "fcm_dpo/q_t": 0.37277141213417053, "grad_norm": 19.63165283203125, "learning_rate": 4.559190140057428e-08, "logits/chosen": 0.7387676239013672, "logits/rejected": 0.7273943424224854, "logps/chosen": -122.2755126953125, "logps/ref_chosen": -59.313262939453125, "logps/ref_rejected": -64.73631286621094, "logps/rejected": -163.80419921875, "loss": 1.0279, "margin_dpo/margin_mean": 36.10563659667969, "margin_dpo/margin_std": 49.768165588378906, "step": 546 }, { "epoch": 0.8269085411942555, "fcm_dpo/beta": 0.016392838209867477, "fcm_dpo/delta": -0.05539948120713234, "fcm_dpo/margin": 39.75334930419922, "fcm_dpo/q_t": 0.3588758111000061, "grad_norm": 18.761903762817383, "learning_rate": 4.483350854765672e-08, "logits/chosen": 0.632850170135498, "logits/rejected": 0.56563401222229, "logps/chosen": -113.59066772460938, "logps/ref_chosen": -54.97674560546875, "logps/ref_rejected": -75.35922241210938, "logps/rejected": -173.7264862060547, "loss": 0.9989, "margin_dpo/margin_mean": 39.75334930419922, "margin_dpo/margin_std": 51.920860290527344, "step": 547 }, { "epoch": 0.8284202569916855, "fcm_dpo/beta": 0.016840200871229172, "fcm_dpo/delta": 0.14554663002490997, "fcm_dpo/margin": 27.277130126953125, "fcm_dpo/q_t": 0.4067327380180359, "grad_norm": 18.979921340942383, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 0.5610533952713013, "logits/rejected": 0.49250972270965576, "logps/chosen": -131.07614135742188, "logps/ref_chosen": -63.21067428588867, "logps/ref_rejected": -81.23347473144531, "logps/rejected": -176.37606811523438, "loss": 1.1475, "margin_dpo/margin_mean": 27.277130126953125, "margin_dpo/margin_std": 51.35624694824219, "step": 548 }, { "epoch": 0.8299319727891157, "fcm_dpo/beta": 0.017113741487264633, "fcm_dpo/delta": 0.1011405736207962, "fcm_dpo/margin": 29.445924758911133, "fcm_dpo/q_t": 0.394071489572525, "grad_norm": 17.566940307617188, "learning_rate": 4.333396073857723e-08, "logits/chosen": 0.7469007968902588, "logits/rejected": 0.6796466112136841, "logps/chosen": -129.56695556640625, "logps/ref_chosen": -64.27351379394531, "logps/ref_rejected": -92.31663513183594, "logps/rejected": -187.05599975585938, "loss": 1.1245, "margin_dpo/margin_mean": 29.4459228515625, "margin_dpo/margin_std": 52.37643051147461, "step": 549 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.017646487802267075, "fcm_dpo/delta": 0.21381746232509613, "fcm_dpo/margin": 22.392440795898438, "fcm_dpo/q_t": 0.41807249188423157, "grad_norm": 22.020166397094727, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.6913200616836548, "logits/rejected": 0.6597945690155029, "logps/chosen": -130.39352416992188, "logps/ref_chosen": -56.230438232421875, "logps/ref_rejected": -62.59788513183594, "logps/rejected": -159.15341186523438, "loss": 1.1887, "margin_dpo/margin_mean": 22.392440795898438, "margin_dpo/margin_std": 47.07420349121094, "step": 550 }, { "epoch": 0.8329554043839759, "fcm_dpo/beta": 0.01831832528114319, "fcm_dpo/delta": 0.10014162957668304, "fcm_dpo/margin": 27.50286865234375, "fcm_dpo/q_t": 0.3956736922264099, "grad_norm": 19.295076370239258, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 0.7169756889343262, "logits/rejected": 0.6815935373306274, "logps/chosen": -137.21009826660156, "logps/ref_chosen": -67.74720764160156, "logps/ref_rejected": -87.04285430908203, "logps/rejected": -184.00860595703125, "loss": 1.0868, "margin_dpo/margin_mean": 27.502866744995117, "margin_dpo/margin_std": 44.02717590332031, "step": 551 }, { "epoch": 0.8344671201814059, "fcm_dpo/beta": 0.018308354541659355, "fcm_dpo/delta": -0.051000580191612244, "fcm_dpo/margin": 35.34024429321289, "fcm_dpo/q_t": 0.3574460744857788, "grad_norm": 22.781795501708984, "learning_rate": 4.112804714676593e-08, "logits/chosen": 0.659945011138916, "logits/rejected": 0.6055397391319275, "logps/chosen": -127.8887939453125, "logps/ref_chosen": -62.92625427246094, "logps/ref_rejected": -82.98365783691406, "logps/rejected": -183.2864227294922, "loss": 1.0082, "margin_dpo/margin_mean": 35.34024429321289, "margin_dpo/margin_std": 46.828224182128906, "step": 552 }, { "epoch": 0.8359788359788359, "fcm_dpo/beta": 0.01820247806608677, "fcm_dpo/delta": 0.038443662226200104, "fcm_dpo/margin": 30.970130920410156, "fcm_dpo/q_t": 0.3889210820198059, "grad_norm": 23.225704193115234, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 0.6440668106079102, "logits/rejected": 0.5367642045021057, "logps/chosen": -128.24612426757812, "logps/ref_chosen": -56.038490295410156, "logps/ref_rejected": -84.48454284667969, "logps/rejected": -187.6623077392578, "loss": 1.1264, "margin_dpo/margin_mean": 30.97012710571289, "margin_dpo/margin_std": 56.776390075683594, "step": 553 }, { "epoch": 0.8374905517762661, "fcm_dpo/beta": 0.018095003440976143, "fcm_dpo/delta": -0.06582193076610565, "fcm_dpo/margin": 36.556236267089844, "fcm_dpo/q_t": 0.3641040623188019, "grad_norm": 19.767202377319336, "learning_rate": 3.968661679220467e-08, "logits/chosen": 0.6238239407539368, "logits/rejected": 0.6004227995872498, "logps/chosen": -129.68283081054688, "logps/ref_chosen": -64.53059387207031, "logps/ref_rejected": -71.2155990600586, "logps/rejected": -172.924072265625, "loss": 1.0152, "margin_dpo/margin_mean": 36.55623245239258, "margin_dpo/margin_std": 51.607032775878906, "step": 554 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.018217993900179863, "fcm_dpo/delta": -0.023561611771583557, "fcm_dpo/margin": 33.95972442626953, "fcm_dpo/q_t": 0.37040045857429504, "grad_norm": 20.464981079101562, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.6768993139266968, "logits/rejected": 0.6472059488296509, "logps/chosen": -139.9117431640625, "logps/ref_chosen": -66.65191650390625, "logps/ref_rejected": -68.6667251586914, "logps/rejected": -175.88629150390625, "loss": 1.0948, "margin_dpo/margin_mean": 33.95972442626953, "margin_dpo/margin_std": 54.30375671386719, "step": 555 }, { "epoch": 0.8405139833711263, "fcm_dpo/beta": 0.018018240109086037, "fcm_dpo/delta": 0.0479285754263401, "fcm_dpo/margin": 30.78514289855957, "fcm_dpo/q_t": 0.3913532793521881, "grad_norm": 17.889385223388672, "learning_rate": 3.826871794280192e-08, "logits/chosen": 0.7341389656066895, "logits/rejected": 0.6801250576972961, "logps/chosen": -126.58598327636719, "logps/ref_chosen": -52.832366943359375, "logps/ref_rejected": -64.49044036865234, "logps/rejected": -169.02919006347656, "loss": 1.1322, "margin_dpo/margin_mean": 30.78514289855957, "margin_dpo/margin_std": 56.485740661621094, "step": 556 }, { "epoch": 0.8420256991685563, "fcm_dpo/beta": 0.017613768577575684, "fcm_dpo/delta": -0.1427953541278839, "fcm_dpo/margin": 41.473854064941406, "fcm_dpo/q_t": 0.3491743505001068, "grad_norm": 18.236114501953125, "learning_rate": 3.756864251262143e-08, "logits/chosen": 0.7962363362312317, "logits/rejected": 0.7152137160301208, "logps/chosen": -127.98336029052734, "logps/ref_chosen": -55.03598403930664, "logps/ref_rejected": -75.80644989013672, "logps/rejected": -190.22769165039062, "loss": 0.9416, "margin_dpo/margin_mean": 41.473854064941406, "margin_dpo/margin_std": 48.5777587890625, "step": 557 }, { "epoch": 0.8435374149659864, "fcm_dpo/beta": 0.01709606498479843, "fcm_dpo/delta": -0.16726849973201752, "fcm_dpo/margin": 44.07666778564453, "fcm_dpo/q_t": 0.34456583857536316, "grad_norm": 14.090262413024902, "learning_rate": 3.687450924416341e-08, "logits/chosen": 0.7165727615356445, "logits/rejected": 0.6611793637275696, "logps/chosen": -125.71041107177734, "logps/ref_chosen": -63.226348876953125, "logps/ref_rejected": -91.46881866455078, "logps/rejected": -198.02957153320312, "loss": 0.9276, "margin_dpo/margin_mean": 44.07666778564453, "margin_dpo/margin_std": 50.671913146972656, "step": 558 }, { "epoch": 0.8450491307634165, "fcm_dpo/beta": 0.016778361052274704, "fcm_dpo/delta": -0.028636924922466278, "fcm_dpo/margin": 37.217071533203125, "fcm_dpo/q_t": 0.3728501796722412, "grad_norm": 15.830818176269531, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 0.6524174213409424, "logits/rejected": 0.5823867321014404, "logps/chosen": -130.78497314453125, "logps/ref_chosen": -61.521644592285156, "logps/ref_rejected": -82.83859252929688, "logps/rejected": -189.31900024414062, "loss": 1.0403, "margin_dpo/margin_mean": 37.217071533203125, "margin_dpo/margin_std": 53.64512634277344, "step": 559 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.01705295592546463, "fcm_dpo/delta": -0.007127054035663605, "fcm_dpo/margin": 35.50212097167969, "fcm_dpo/q_t": 0.374332070350647, "grad_norm": 16.82029914855957, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.6676818132400513, "logits/rejected": 0.6309837102890015, "logps/chosen": -132.1116943359375, "logps/ref_chosen": -60.64122009277344, "logps/ref_rejected": -78.75474548339844, "logps/rejected": -185.72735595703125, "loss": 1.0186, "margin_dpo/margin_mean": 35.50212097167969, "margin_dpo/margin_std": 47.463401794433594, "step": 560 }, { "epoch": 0.8480725623582767, "fcm_dpo/beta": 0.016799356788396835, "fcm_dpo/delta": -0.03773471340537071, "fcm_dpo/margin": 37.817176818847656, "fcm_dpo/q_t": 0.37105944752693176, "grad_norm": 15.695465087890625, "learning_rate": 3.482795573879241e-08, "logits/chosen": 0.680199921131134, "logits/rejected": 0.6491532325744629, "logps/chosen": -126.62320709228516, "logps/ref_chosen": -62.49859619140625, "logps/ref_rejected": -78.72064208984375, "logps/rejected": -180.66241455078125, "loss": 1.026, "margin_dpo/margin_mean": 37.817176818847656, "margin_dpo/margin_std": 53.56718444824219, "step": 561 }, { "epoch": 0.8495842781557067, "fcm_dpo/beta": 0.016474956646561623, "fcm_dpo/delta": -0.0877566933631897, "fcm_dpo/margin": 41.32499313354492, "fcm_dpo/q_t": 0.3588718771934509, "grad_norm": 19.099084854125977, "learning_rate": 3.415778361095226e-08, "logits/chosen": 0.6784518957138062, "logits/rejected": 0.6398619413375854, "logps/chosen": -143.8610382080078, "logps/ref_chosen": -74.78173828125, "logps/ref_rejected": -92.63499450683594, "logps/rejected": -203.03929138183594, "loss": 0.9801, "margin_dpo/margin_mean": 41.324989318847656, "margin_dpo/margin_std": 52.4063606262207, "step": 562 }, { "epoch": 0.8510959939531368, "fcm_dpo/beta": 0.01647357828915119, "fcm_dpo/delta": -0.01924915984272957, "fcm_dpo/margin": 37.51539993286133, "fcm_dpo/q_t": 0.3716714382171631, "grad_norm": 21.878982543945312, "learning_rate": 3.349364905389032e-08, "logits/chosen": 0.7664488554000854, "logits/rejected": 0.7137491106987, "logps/chosen": -111.7672348022461, "logps/ref_chosen": -50.19850158691406, "logps/ref_rejected": -66.76687622070312, "logps/rejected": -165.8509979248047, "loss": 1.0639, "margin_dpo/margin_mean": 37.51539611816406, "margin_dpo/margin_std": 58.186012268066406, "step": 563 }, { "epoch": 0.8526077097505669, "fcm_dpo/beta": 0.016082683578133583, "fcm_dpo/delta": -0.14546218514442444, "fcm_dpo/margin": 45.69166946411133, "fcm_dpo/q_t": 0.3453413248062134, "grad_norm": 14.259784698486328, "learning_rate": 3.283557064487785e-08, "logits/chosen": 0.6534693241119385, "logits/rejected": 0.6237531304359436, "logps/chosen": -116.24386596679688, "logps/ref_chosen": -55.7408447265625, "logps/ref_rejected": -74.82323455810547, "logps/rejected": -181.01791381835938, "loss": 0.9615, "margin_dpo/margin_mean": 45.69166564941406, "margin_dpo/margin_std": 56.90264892578125, "step": 564 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.016341106966137886, "fcm_dpo/delta": 0.12114652246236801, "fcm_dpo/margin": 29.57483673095703, "fcm_dpo/q_t": 0.3988476097583771, "grad_norm": 17.22108268737793, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.7081042528152466, "logits/rejected": 0.6530278921127319, "logps/chosen": -135.77761840820312, "logps/ref_chosen": -58.33738327026367, "logps/ref_rejected": -78.31776428222656, "logps/rejected": -185.33282470703125, "loss": 1.1058, "margin_dpo/margin_mean": 29.57483673095703, "margin_dpo/margin_std": 49.10877990722656, "step": 565 }, { "epoch": 0.8556311413454271, "fcm_dpo/beta": 0.016569407656788826, "fcm_dpo/delta": 0.06318099051713943, "fcm_dpo/margin": 32.56593704223633, "fcm_dpo/q_t": 0.3885696232318878, "grad_norm": 22.122758865356445, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 0.6910306215286255, "logits/rejected": 0.671326756477356, "logps/chosen": -139.0428009033203, "logps/ref_chosen": -71.22373962402344, "logps/ref_rejected": -71.11601257324219, "logps/rejected": -171.50100708007812, "loss": 1.1378, "margin_dpo/margin_mean": 32.56593322753906, "margin_dpo/margin_std": 60.11833190917969, "step": 566 }, { "epoch": 0.8571428571428571, "fcm_dpo/beta": 0.01638803631067276, "fcm_dpo/delta": -0.00884208083152771, "fcm_dpo/margin": 37.02666473388672, "fcm_dpo/q_t": 0.3724210262298584, "grad_norm": 14.405105590820312, "learning_rate": 3.089785553471233e-08, "logits/chosen": 0.6986731886863708, "logits/rejected": 0.6037529706954956, "logps/chosen": -119.60701751708984, "logps/ref_chosen": -52.669273376464844, "logps/ref_rejected": -74.34785461425781, "logps/rejected": -178.312255859375, "loss": 1.0172, "margin_dpo/margin_mean": 37.02666473388672, "margin_dpo/margin_std": 49.046302795410156, "step": 567 }, { "epoch": 0.8586545729402872, "fcm_dpo/beta": 0.016353819519281387, "fcm_dpo/delta": -0.10809920728206635, "fcm_dpo/margin": 42.83213806152344, "fcm_dpo/q_t": 0.3527492880821228, "grad_norm": 15.531497955322266, "learning_rate": 3.026418409484513e-08, "logits/chosen": 0.6940714120864868, "logits/rejected": 0.6082979440689087, "logps/chosen": -114.17964172363281, "logps/ref_chosen": -52.178001403808594, "logps/ref_rejected": -85.8277587890625, "logps/rejected": -190.6615447998047, "loss": 0.9465, "margin_dpo/margin_mean": 42.83213806152344, "margin_dpo/margin_std": 48.710716247558594, "step": 568 }, { "epoch": 0.8601662887377173, "fcm_dpo/beta": 0.016410736367106438, "fcm_dpo/delta": 0.16210441291332245, "fcm_dpo/margin": 27.106124877929688, "fcm_dpo/q_t": 0.4084968566894531, "grad_norm": 17.497173309326172, "learning_rate": 2.963665913810451e-08, "logits/chosen": 0.605643630027771, "logits/rejected": 0.5765886306762695, "logps/chosen": -133.26766967773438, "logps/ref_chosen": -62.649261474609375, "logps/ref_rejected": -75.4298324584961, "logps/rejected": -173.15435791015625, "loss": 1.1579, "margin_dpo/margin_mean": 27.106124877929688, "margin_dpo/margin_std": 51.35100173950195, "step": 569 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.016275301575660706, "fcm_dpo/delta": -0.22408686578273773, "fcm_dpo/margin": 49.55414581298828, "fcm_dpo/q_t": 0.3300275206565857, "grad_norm": 14.673301696777344, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.6282495260238647, "logits/rejected": 0.5449954271316528, "logps/chosen": -112.49989318847656, "logps/ref_chosen": -50.04179382324219, "logps/ref_rejected": -78.27146911621094, "logps/rejected": -190.28370666503906, "loss": 0.8892, "margin_dpo/margin_mean": 49.55415344238281, "margin_dpo/margin_std": 51.157962799072266, "step": 570 }, { "epoch": 0.8631897203325775, "fcm_dpo/beta": 0.016408953815698624, "fcm_dpo/delta": 0.1358412206172943, "fcm_dpo/margin": 28.562528610229492, "fcm_dpo/q_t": 0.3967515230178833, "grad_norm": 16.442455291748047, "learning_rate": 2.840011871446962e-08, "logits/chosen": 0.6660867929458618, "logits/rejected": 0.6316110491752625, "logps/chosen": -123.6998291015625, "logps/ref_chosen": -53.65681457519531, "logps/ref_rejected": -66.13298034667969, "logps/rejected": -164.738525390625, "loss": 1.1327, "margin_dpo/margin_mean": 28.562530517578125, "margin_dpo/margin_std": 49.88398742675781, "step": 571 }, { "epoch": 0.8647014361300076, "fcm_dpo/beta": 0.01645139418542385, "fcm_dpo/delta": 0.0013835076242685318, "fcm_dpo/margin": 36.385189056396484, "fcm_dpo/q_t": 0.37128400802612305, "grad_norm": 17.820674896240234, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 0.68881756067276, "logits/rejected": 0.7318768501281738, "logps/chosen": -141.16531372070312, "logps/ref_chosen": -74.81792449951172, "logps/ref_rejected": -65.88681030273438, "logps/rejected": -168.619384765625, "loss": 0.9925, "margin_dpo/margin_mean": 36.385189056396484, "margin_dpo/margin_std": 44.444793701171875, "step": 572 }, { "epoch": 0.8662131519274376, "fcm_dpo/beta": 0.016566410660743713, "fcm_dpo/delta": 0.08530230820178986, "fcm_dpo/margin": 31.338947296142578, "fcm_dpo/q_t": 0.393259197473526, "grad_norm": 20.416149139404297, "learning_rate": 2.718837261761528e-08, "logits/chosen": 0.6766190528869629, "logits/rejected": 0.6312921643257141, "logps/chosen": -143.67018127441406, "logps/ref_chosen": -68.72564697265625, "logps/ref_rejected": -88.16201782226562, "logps/rejected": -194.44549560546875, "loss": 1.1457, "margin_dpo/margin_mean": 31.338947296142578, "margin_dpo/margin_std": 59.51763916015625, "step": 573 }, { "epoch": 0.8677248677248677, "fcm_dpo/beta": 0.016609128564596176, "fcm_dpo/delta": -0.07822871953248978, "fcm_dpo/margin": 40.49908447265625, "fcm_dpo/q_t": 0.3569110631942749, "grad_norm": 14.132019996643066, "learning_rate": 2.659183991914696e-08, "logits/chosen": 0.7488802671432495, "logits/rejected": 0.6764457821846008, "logps/chosen": -123.04143524169922, "logps/ref_chosen": -56.31340026855469, "logps/ref_rejected": -83.91553497314453, "logps/rejected": -191.14266967773438, "loss": 0.951, "margin_dpo/margin_mean": 40.49908447265625, "margin_dpo/margin_std": 45.910301208496094, "step": 574 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.016653649508953094, "fcm_dpo/delta": 0.15081343054771423, "fcm_dpo/margin": 27.361234664916992, "fcm_dpo/q_t": 0.4088207185268402, "grad_norm": 18.94614601135254, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.7127535343170166, "logits/rejected": 0.632762610912323, "logps/chosen": -132.92755126953125, "logps/ref_chosen": -64.5841293334961, "logps/ref_rejected": -93.47034454345703, "logps/rejected": -189.1750030517578, "loss": 1.1885, "margin_dpo/margin_mean": 27.361234664916992, "margin_dpo/margin_std": 56.532806396484375, "step": 575 }, { "epoch": 0.8707482993197279, "fcm_dpo/beta": 0.01663918048143387, "fcm_dpo/delta": -0.0805702805519104, "fcm_dpo/margin": 40.50676727294922, "fcm_dpo/q_t": 0.3621788024902344, "grad_norm": 15.60090446472168, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 0.702485203742981, "logits/rejected": 0.597660481929779, "logps/chosen": -113.17729187011719, "logps/ref_chosen": -53.28052520751953, "logps/ref_rejected": -84.2000503540039, "logps/rejected": -184.60357666015625, "loss": 1.0073, "margin_dpo/margin_mean": 40.50676727294922, "margin_dpo/margin_std": 55.57630920410156, "step": 576 }, { "epoch": 0.872260015117158, "fcm_dpo/beta": 0.0168609656393528, "fcm_dpo/delta": 0.091962069272995, "fcm_dpo/margin": 30.416515350341797, "fcm_dpo/q_t": 0.39214855432510376, "grad_norm": 16.009185791015625, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 0.6752563714981079, "logits/rejected": 0.6577494144439697, "logps/chosen": -128.01956176757812, "logps/ref_chosen": -62.32468795776367, "logps/ref_rejected": -67.300537109375, "logps/rejected": -163.41192626953125, "loss": 1.1286, "margin_dpo/margin_mean": 30.416515350341797, "margin_dpo/margin_std": 54.648590087890625, "step": 577 }, { "epoch": 0.873771730914588, "fcm_dpo/beta": 0.01736040972173214, "fcm_dpo/delta": 0.17633679509162903, "fcm_dpo/margin": 24.863304138183594, "fcm_dpo/q_t": 0.4127289652824402, "grad_norm": 18.371273040771484, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 0.7377026081085205, "logits/rejected": 0.7115747928619385, "logps/chosen": -122.48710632324219, "logps/ref_chosen": -56.65557861328125, "logps/ref_rejected": -68.21835327148438, "logps/rejected": -158.91317749023438, "loss": 1.1781, "margin_dpo/margin_mean": 24.863304138183594, "margin_dpo/margin_std": 50.95249938964844, "step": 578 }, { "epoch": 0.8752834467120182, "fcm_dpo/beta": 0.017089959233999252, "fcm_dpo/delta": -0.18611499667167664, "fcm_dpo/margin": 45.14439392089844, "fcm_dpo/q_t": 0.3376918435096741, "grad_norm": 14.985367774963379, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 0.6345067024230957, "logits/rejected": 0.6059737205505371, "logps/chosen": -125.25282287597656, "logps/ref_chosen": -56.809661865234375, "logps/ref_rejected": -68.09613037109375, "logps/rejected": -181.68368530273438, "loss": 0.9086, "margin_dpo/margin_mean": 45.14439392089844, "margin_dpo/margin_std": 49.639869689941406, "step": 579 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.01717275008559227, "fcm_dpo/delta": 0.05046956241130829, "fcm_dpo/margin": 32.10778045654297, "fcm_dpo/q_t": 0.3875921666622162, "grad_norm": 18.05984878540039, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.650100827217102, "logits/rejected": 0.5966248512268066, "logps/chosen": -124.11175537109375, "logps/ref_chosen": -57.70011520385742, "logps/ref_rejected": -77.90664672851562, "logps/rejected": -176.42605590820312, "loss": 1.1186, "margin_dpo/margin_mean": 32.1077766418457, "margin_dpo/margin_std": 56.66058349609375, "step": 580 }, { "epoch": 0.8783068783068783, "fcm_dpo/beta": 0.017328936606645584, "fcm_dpo/delta": 0.1224561482667923, "fcm_dpo/margin": 27.898914337158203, "fcm_dpo/q_t": 0.40121370553970337, "grad_norm": 20.932573318481445, "learning_rate": 2.259200116137039e-08, "logits/chosen": 0.7106508612632751, "logits/rejected": 0.6442577838897705, "logps/chosen": -135.62893676757812, "logps/ref_chosen": -59.332359313964844, "logps/ref_rejected": -83.64482116699219, "logps/rejected": -187.84033203125, "loss": 1.1397, "margin_dpo/margin_mean": 27.898914337158203, "margin_dpo/margin_std": 51.49224853515625, "step": 581 }, { "epoch": 0.8798185941043084, "fcm_dpo/beta": 0.017463190481066704, "fcm_dpo/delta": -0.039771128445863724, "fcm_dpo/margin": 36.49109649658203, "fcm_dpo/q_t": 0.367986798286438, "grad_norm": 18.59633445739746, "learning_rate": 2.204591459016525e-08, "logits/chosen": 0.7187392711639404, "logits/rejected": 0.7509971857070923, "logps/chosen": -130.5426025390625, "logps/ref_chosen": -64.16285705566406, "logps/ref_rejected": -58.632896423339844, "logps/rejected": -161.50375366210938, "loss": 1.0385, "margin_dpo/margin_mean": 36.49109649658203, "margin_dpo/margin_std": 53.77030944824219, "step": 582 }, { "epoch": 0.8813303099017384, "fcm_dpo/beta": 0.017327800393104553, "fcm_dpo/delta": -0.0331776961684227, "fcm_dpo/margin": 36.420982360839844, "fcm_dpo/q_t": 0.3738207221031189, "grad_norm": 21.699419021606445, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 0.792787492275238, "logits/rejected": 0.6811857223510742, "logps/chosen": -119.08982849121094, "logps/ref_chosen": -51.87239456176758, "logps/ref_rejected": -83.86331176757812, "logps/rejected": -187.50172424316406, "loss": 1.094, "margin_dpo/margin_mean": 36.42098617553711, "margin_dpo/margin_std": 61.2724609375, "step": 583 }, { "epoch": 0.8828420256991686, "fcm_dpo/beta": 0.017169862985610962, "fcm_dpo/delta": 0.004221245646476746, "fcm_dpo/margin": 34.651824951171875, "fcm_dpo/q_t": 0.3810550570487976, "grad_norm": 18.9495849609375, "learning_rate": 2.09728856419826e-08, "logits/chosen": 0.7909083366394043, "logits/rejected": 0.6847249865531921, "logps/chosen": -106.61726379394531, "logps/ref_chosen": -46.571388244628906, "logps/ref_rejected": -80.67969512939453, "logps/rejected": -175.3773956298828, "loss": 1.113, "margin_dpo/margin_mean": 34.65182876586914, "margin_dpo/margin_std": 60.08186340332031, "step": 584 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.017794519662857056, "fcm_dpo/delta": 0.14609597623348236, "fcm_dpo/margin": 25.79193878173828, "fcm_dpo/q_t": 0.4023086428642273, "grad_norm": 16.919166564941406, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.6519303321838379, "logits/rejected": 0.6100852489471436, "logps/chosen": -127.17724609375, "logps/ref_chosen": -58.124534606933594, "logps/ref_rejected": -79.00538635253906, "logps/rejected": -173.85003662109375, "loss": 1.1695, "margin_dpo/margin_mean": 25.79193687438965, "margin_dpo/margin_std": 51.46417236328125, "step": 585 }, { "epoch": 0.8858654572940288, "fcm_dpo/beta": 0.017533529549837112, "fcm_dpo/delta": -0.07861245423555374, "fcm_dpo/margin": 38.363807678222656, "fcm_dpo/q_t": 0.35704219341278076, "grad_norm": 19.74669647216797, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 0.7134085297584534, "logits/rejected": 0.6633602380752563, "logps/chosen": -121.17387390136719, "logps/ref_chosen": -54.10163879394531, "logps/ref_rejected": -63.72113037109375, "logps/rejected": -169.15716552734375, "loss": 0.9595, "margin_dpo/margin_mean": 38.363807678222656, "margin_dpo/margin_std": 45.4287223815918, "step": 586 }, { "epoch": 0.8873771730914588, "fcm_dpo/beta": 0.017398815602064133, "fcm_dpo/delta": -0.05770276114344597, "fcm_dpo/margin": 37.58617401123047, "fcm_dpo/q_t": 0.36453670263290405, "grad_norm": 16.288728713989258, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 0.7234928011894226, "logits/rejected": 0.7105797529220581, "logps/chosen": -127.94309997558594, "logps/ref_chosen": -63.41719436645508, "logps/ref_rejected": -63.47003936767578, "logps/rejected": -165.58212280273438, "loss": 1.0827, "margin_dpo/margin_mean": 37.58617401123047, "margin_dpo/margin_std": 61.84636306762695, "step": 587 }, { "epoch": 0.8888888888888888, "fcm_dpo/beta": 0.017139676958322525, "fcm_dpo/delta": -0.0592581145465374, "fcm_dpo/margin": 38.221168518066406, "fcm_dpo/q_t": 0.3644402027130127, "grad_norm": 18.18668556213379, "learning_rate": 1.890382096832699e-08, "logits/chosen": 0.726331353187561, "logits/rejected": 0.6786773800849915, "logps/chosen": -131.14239501953125, "logps/ref_chosen": -62.20103454589844, "logps/ref_rejected": -82.10249328613281, "logps/rejected": -189.26502990722656, "loss": 1.0204, "margin_dpo/margin_mean": 38.221168518066406, "margin_dpo/margin_std": 54.02344512939453, "step": 588 }, { "epoch": 0.890400604686319, "fcm_dpo/beta": 0.01690484955906868, "fcm_dpo/delta": -0.09004709124565125, "fcm_dpo/margin": 40.455116271972656, "fcm_dpo/q_t": 0.35561931133270264, "grad_norm": 18.06881332397461, "learning_rate": 1.840267971970344e-08, "logits/chosen": 0.6733403205871582, "logits/rejected": 0.644467830657959, "logps/chosen": -118.9821548461914, "logps/ref_chosen": -56.71361541748047, "logps/ref_rejected": -76.7366943359375, "logps/rejected": -179.46034240722656, "loss": 0.9454, "margin_dpo/margin_mean": 40.455116271972656, "margin_dpo/margin_std": 46.39854431152344, "step": 589 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.01654157042503357, "fcm_dpo/delta": -0.12827324867248535, "fcm_dpo/margin": 43.478607177734375, "fcm_dpo/q_t": 0.3495979905128479, "grad_norm": 16.176586151123047, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.6159425973892212, "logits/rejected": 0.589844286441803, "logps/chosen": -136.01022338867188, "logps/ref_chosen": -66.5138168334961, "logps/ref_rejected": -85.70820617675781, "logps/rejected": -198.6832275390625, "loss": 0.9602, "margin_dpo/margin_mean": 43.478607177734375, "margin_dpo/margin_std": 53.51036834716797, "step": 590 }, { "epoch": 0.8934240362811792, "fcm_dpo/beta": 0.015954457223415375, "fcm_dpo/delta": -0.12046321481466293, "fcm_dpo/margin": 44.440185546875, "fcm_dpo/q_t": 0.3531830310821533, "grad_norm": 19.646526336669922, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 0.798710823059082, "logits/rejected": 0.7333512306213379, "logps/chosen": -120.38575744628906, "logps/ref_chosen": -60.697181701660156, "logps/ref_rejected": -86.12278747558594, "logps/rejected": -190.25155639648438, "loss": 1.0121, "margin_dpo/margin_mean": 44.440185546875, "margin_dpo/margin_std": 60.961891174316406, "step": 591 }, { "epoch": 0.8949357520786092, "fcm_dpo/beta": 0.015935592353343964, "fcm_dpo/delta": 0.02253812551498413, "fcm_dpo/margin": 36.251068115234375, "fcm_dpo/q_t": 0.3809935748577118, "grad_norm": 17.447284698486328, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 0.7249664664268494, "logits/rejected": 0.6130805611610413, "logps/chosen": -113.61046600341797, "logps/ref_chosen": -51.237327575683594, "logps/ref_rejected": -81.60242462158203, "logps/rejected": -180.22662353515625, "loss": 1.0401, "margin_dpo/margin_mean": 36.25107192993164, "margin_dpo/margin_std": 51.01573944091797, "step": 592 }, { "epoch": 0.8964474678760394, "fcm_dpo/beta": 0.016153138130903244, "fcm_dpo/delta": 0.04338935390114784, "fcm_dpo/margin": 34.608680725097656, "fcm_dpo/q_t": 0.3850771486759186, "grad_norm": 19.05673599243164, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 0.6903887987136841, "logits/rejected": 0.5974088311195374, "logps/chosen": -99.88896179199219, "logps/ref_chosen": -42.08000183105469, "logps/ref_rejected": -68.47499084472656, "logps/rejected": -160.89260864257812, "loss": 1.1043, "margin_dpo/margin_mean": 34.608680725097656, "margin_dpo/margin_std": 58.95058822631836, "step": 593 }, { "epoch": 0.8979591836734694, "fcm_dpo/beta": 0.01636182889342308, "fcm_dpo/delta": 0.08321181684732437, "fcm_dpo/margin": 31.851009368896484, "fcm_dpo/q_t": 0.38907164335250854, "grad_norm": 16.617149353027344, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 0.6165971755981445, "logits/rejected": 0.5995768308639526, "logps/chosen": -135.38876342773438, "logps/ref_chosen": -63.658668518066406, "logps/ref_rejected": -70.35597229003906, "logps/rejected": -173.9370880126953, "loss": 1.0775, "margin_dpo/margin_mean": 31.851009368896484, "margin_dpo/margin_std": 49.175872802734375, "step": 594 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.016220130026340485, "fcm_dpo/delta": -0.13953274488449097, "fcm_dpo/margin": 44.979042053222656, "fcm_dpo/q_t": 0.3480144143104553, "grad_norm": 15.34145450592041, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.7449917793273926, "logits/rejected": 0.6512782573699951, "logps/chosen": -119.75555419921875, "logps/ref_chosen": -56.21875762939453, "logps/ref_rejected": -83.95773315429688, "logps/rejected": -192.47357177734375, "loss": 0.9818, "margin_dpo/margin_mean": 44.979042053222656, "margin_dpo/margin_std": 58.99134063720703, "step": 595 }, { "epoch": 0.9009826152683296, "fcm_dpo/beta": 0.016492169350385666, "fcm_dpo/delta": 0.2100110501050949, "fcm_dpo/margin": 24.183456420898438, "fcm_dpo/q_t": 0.42047858238220215, "grad_norm": 16.09357261657715, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.6181765794754028, "logits/rejected": 0.6303185224533081, "logps/chosen": -140.47659301757812, "logps/ref_chosen": -68.48088073730469, "logps/ref_rejected": -61.732967376708984, "logps/rejected": -157.91213989257812, "loss": 1.1953, "margin_dpo/margin_mean": 24.183456420898438, "margin_dpo/margin_std": 52.747528076171875, "step": 596 }, { "epoch": 0.9024943310657596, "fcm_dpo/beta": 0.016784945502877235, "fcm_dpo/delta": 0.02770313434302807, "fcm_dpo/margin": 34.191078186035156, "fcm_dpo/q_t": 0.38534241914749146, "grad_norm": 16.53261947631836, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 0.6633099317550659, "logits/rejected": 0.6418108940124512, "logps/chosen": -106.30998229980469, "logps/ref_chosen": -48.85750961303711, "logps/ref_rejected": -55.068084716796875, "logps/rejected": -146.71163940429688, "loss": 1.0651, "margin_dpo/margin_mean": 34.191078186035156, "margin_dpo/margin_std": 53.32984161376953, "step": 597 }, { "epoch": 0.9040060468631897, "fcm_dpo/beta": 0.017436373978853226, "fcm_dpo/delta": 0.21474510431289673, "fcm_dpo/margin": 22.515594482421875, "fcm_dpo/q_t": 0.42177683115005493, "grad_norm": 26.964988708496094, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 0.7077724933624268, "logits/rejected": 0.6297961473464966, "logps/chosen": -135.16842651367188, "logps/ref_chosen": -58.88715362548828, "logps/ref_rejected": -81.43145751953125, "logps/rejected": -180.22833251953125, "loss": 1.2212, "margin_dpo/margin_mean": 22.515594482421875, "margin_dpo/margin_std": 52.95011520385742, "step": 598 }, { "epoch": 0.9055177626606198, "fcm_dpo/beta": 0.017993086948990822, "fcm_dpo/delta": 0.1332523375749588, "fcm_dpo/margin": 26.235389709472656, "fcm_dpo/q_t": 0.4060378074645996, "grad_norm": 20.685775756835938, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 0.774587869644165, "logits/rejected": 0.7272125482559204, "logps/chosen": -135.90377807617188, "logps/ref_chosen": -57.60719299316406, "logps/ref_rejected": -71.80469512939453, "logps/rejected": -176.336669921875, "loss": 1.1719, "margin_dpo/margin_mean": 26.235389709472656, "margin_dpo/margin_std": 53.39300537109375, "step": 599 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.018428601324558258, "fcm_dpo/delta": 0.06073428690433502, "fcm_dpo/margin": 29.233776092529297, "fcm_dpo/q_t": 0.3916233777999878, "grad_norm": 21.98250961303711, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.758099377155304, "logits/rejected": 0.7081928849220276, "logps/chosen": -129.58924865722656, "logps/ref_chosen": -58.44231414794922, "logps/ref_rejected": -83.64639282226562, "logps/rejected": -184.027099609375, "loss": 1.1307, "margin_dpo/margin_mean": 29.23377799987793, "margin_dpo/margin_std": 52.01209259033203, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.018254384398460388, "eval_logits/chosen": 0.6983587741851807, "eval_logits/rejected": 0.6510134935379028, "eval_logps/chosen": -139.107177734375, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -178.12290954589844, "eval_loss": 0.5317867994308472, "eval_margin_dpo/margin_mean": 34.32622146606445, "eval_margin_dpo/margin_std": 53.363624572753906, "eval_runtime": 38.0129, "eval_samples_per_second": 60.585, "eval_steps_per_second": 1.894, "step": 600 }, { "epoch": 0.90854119425548, "fcm_dpo/beta": 0.01776060089468956, "fcm_dpo/delta": -0.14013047516345978, "fcm_dpo/margin": 41.001068115234375, "fcm_dpo/q_t": 0.3542864918708801, "grad_norm": 16.561017990112305, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 0.689449667930603, "logits/rejected": 0.6153823137283325, "logps/chosen": -114.82102966308594, "logps/ref_chosen": -55.59432601928711, "logps/ref_rejected": -83.68630981445312, "logps/rejected": -183.91409301757812, "loss": 1.0035, "margin_dpo/margin_mean": 41.001068115234375, "margin_dpo/margin_std": 56.815673828125, "step": 601 }, { "epoch": 0.91005291005291, "fcm_dpo/beta": 0.017588762566447258, "fcm_dpo/delta": -0.08487124741077423, "fcm_dpo/margin": 38.61279296875, "fcm_dpo/q_t": 0.3599042296409607, "grad_norm": 16.240304946899414, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 0.68162602186203, "logits/rejected": 0.6481719017028809, "logps/chosen": -110.06434631347656, "logps/ref_chosen": -56.349185943603516, "logps/ref_rejected": -71.9959716796875, "logps/rejected": -164.3239288330078, "loss": 0.9813, "margin_dpo/margin_mean": 38.61279296875, "margin_dpo/margin_std": 49.46638107299805, "step": 602 }, { "epoch": 0.9115646258503401, "fcm_dpo/beta": 0.017239883542060852, "fcm_dpo/delta": -0.0702548399567604, "fcm_dpo/margin": 38.59679412841797, "fcm_dpo/q_t": 0.36295539140701294, "grad_norm": 19.132186889648438, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 0.6549055576324463, "logits/rejected": 0.5953928232192993, "logps/chosen": -114.13343048095703, "logps/ref_chosen": -53.16838836669922, "logps/ref_rejected": -73.8604736328125, "logps/rejected": -173.4223175048828, "loss": 1.0085, "margin_dpo/margin_mean": 38.59679412841797, "margin_dpo/margin_std": 53.11018371582031, "step": 603 }, { "epoch": 0.9130763416477702, "fcm_dpo/beta": 0.017200354486703873, "fcm_dpo/delta": -0.011806067079305649, "fcm_dpo/margin": 35.52631378173828, "fcm_dpo/q_t": 0.377369225025177, "grad_norm": 19.996925354003906, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 0.6451849937438965, "logits/rejected": 0.6497629880905151, "logps/chosen": -131.07199096679688, "logps/ref_chosen": -72.64942169189453, "logps/ref_rejected": -69.8792724609375, "logps/rejected": -163.82815551757812, "loss": 1.0794, "margin_dpo/margin_mean": 35.52631378173828, "margin_dpo/margin_std": 58.30902099609375, "step": 604 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.017121510580182076, "fcm_dpo/delta": 0.015186280012130737, "fcm_dpo/margin": 34.185150146484375, "fcm_dpo/q_t": 0.37800726294517517, "grad_norm": 18.581581115722656, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.7332407236099243, "logits/rejected": 0.6724931001663208, "logps/chosen": -131.06732177734375, "logps/ref_chosen": -61.61284637451172, "logps/ref_rejected": -79.34398651123047, "logps/rejected": -182.98361206054688, "loss": 1.0838, "margin_dpo/margin_mean": 34.18514633178711, "margin_dpo/margin_std": 55.458526611328125, "step": 605 }, { "epoch": 0.9160997732426304, "fcm_dpo/beta": 0.017126478254795074, "fcm_dpo/delta": -0.07169067859649658, "fcm_dpo/margin": 38.930240631103516, "fcm_dpo/q_t": 0.36326584219932556, "grad_norm": 17.81355094909668, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 0.697436511516571, "logits/rejected": 0.6320433616638184, "logps/chosen": -114.82478332519531, "logps/ref_chosen": -54.46424102783203, "logps/ref_rejected": -79.62708282470703, "logps/rejected": -178.91786193847656, "loss": 0.9987, "margin_dpo/margin_mean": 38.930240631103516, "margin_dpo/margin_std": 51.813507080078125, "step": 606 }, { "epoch": 0.9176114890400605, "fcm_dpo/beta": 0.016687501221895218, "fcm_dpo/delta": -0.03947858512401581, "fcm_dpo/margin": 37.99995803833008, "fcm_dpo/q_t": 0.3684396743774414, "grad_norm": 17.625709533691406, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 0.6601325273513794, "logits/rejected": 0.6314581632614136, "logps/chosen": -131.482666015625, "logps/ref_chosen": -62.86086654663086, "logps/ref_rejected": -72.5501937866211, "logps/rejected": -179.17196655273438, "loss": 1.0299, "margin_dpo/margin_mean": 37.99995422363281, "margin_dpo/margin_std": 52.8006706237793, "step": 607 }, { "epoch": 0.9191232048374905, "fcm_dpo/beta": 0.017145490273833275, "fcm_dpo/delta": 0.1086527556180954, "fcm_dpo/margin": 28.957908630371094, "fcm_dpo/q_t": 0.3978094458580017, "grad_norm": 18.91431999206543, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 0.6649261116981506, "logits/rejected": 0.5938790440559387, "logps/chosen": -134.17208862304688, "logps/ref_chosen": -63.18071746826172, "logps/ref_rejected": -99.15888214111328, "logps/rejected": -199.10816955566406, "loss": 1.1162, "margin_dpo/margin_mean": 28.957908630371094, "margin_dpo/margin_std": 50.635528564453125, "step": 608 }, { "epoch": 0.9206349206349206, "fcm_dpo/beta": 0.016784831881523132, "fcm_dpo/delta": -0.12502390146255493, "fcm_dpo/margin": 42.56886291503906, "fcm_dpo/q_t": 0.344668447971344, "grad_norm": 13.991439819335938, "learning_rate": 9.757601041885694e-09, "logits/chosen": 0.7602401971817017, "logits/rejected": 0.7267245650291443, "logps/chosen": -108.38632202148438, "logps/ref_chosen": -48.62322235107422, "logps/ref_rejected": -68.28271484375, "logps/rejected": -170.61468505859375, "loss": 0.9301, "margin_dpo/margin_mean": 42.56886291503906, "margin_dpo/margin_std": 46.08442687988281, "step": 609 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.016544140875339508, "fcm_dpo/delta": -0.06457509100437164, "fcm_dpo/margin": 39.86622619628906, "fcm_dpo/q_t": 0.3651999235153198, "grad_norm": 16.56065559387207, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.6606322526931763, "logits/rejected": 0.6659835577011108, "logps/chosen": -139.08177185058594, "logps/ref_chosen": -72.66513061523438, "logps/ref_rejected": -87.15310668945312, "logps/rejected": -193.43597412109375, "loss": 0.9975, "margin_dpo/margin_mean": 39.86622619628906, "margin_dpo/margin_std": 53.14252471923828, "step": 610 }, { "epoch": 0.9236583522297808, "fcm_dpo/beta": 0.016713187098503113, "fcm_dpo/delta": 0.09823843091726303, "fcm_dpo/margin": 30.310468673706055, "fcm_dpo/q_t": 0.39233702421188354, "grad_norm": 18.44264030456543, "learning_rate": 9.03946036001449e-09, "logits/chosen": 0.7202863097190857, "logits/rejected": 0.6715569496154785, "logps/chosen": -110.45277404785156, "logps/ref_chosen": -48.30857849121094, "logps/ref_rejected": -70.6141128540039, "logps/rejected": -163.06878662109375, "loss": 1.0841, "margin_dpo/margin_mean": 30.310466766357422, "margin_dpo/margin_std": 46.933807373046875, "step": 611 }, { "epoch": 0.9251700680272109, "fcm_dpo/beta": 0.01676209270954132, "fcm_dpo/delta": -0.055530332028865814, "fcm_dpo/margin": 38.89222717285156, "fcm_dpo/q_t": 0.36272430419921875, "grad_norm": 17.055858612060547, "learning_rate": 8.690495320571839e-09, "logits/chosen": 0.6043254137039185, "logits/rejected": 0.5377599000930786, "logps/chosen": -130.5507354736328, "logps/ref_chosen": -61.23155975341797, "logps/ref_rejected": -94.37979888916016, "logps/rejected": -202.5911865234375, "loss": 1.0225, "margin_dpo/margin_mean": 38.89222717285156, "margin_dpo/margin_std": 55.477169036865234, "step": 612 }, { "epoch": 0.926681783824641, "fcm_dpo/beta": 0.016338884830474854, "fcm_dpo/delta": -0.18059828877449036, "fcm_dpo/margin": 46.94964599609375, "fcm_dpo/q_t": 0.3390156626701355, "grad_norm": 14.02810287475586, "learning_rate": 8.348280226706722e-09, "logits/chosen": 0.6071598529815674, "logits/rejected": 0.604604959487915, "logps/chosen": -110.54042053222656, "logps/ref_chosen": -53.98310852050781, "logps/ref_rejected": -58.32208251953125, "logps/rejected": -161.82904052734375, "loss": 0.9147, "margin_dpo/margin_mean": 46.94964599609375, "margin_dpo/margin_std": 52.04528045654297, "step": 613 }, { "epoch": 0.9281934996220711, "fcm_dpo/beta": 0.01610434241592884, "fcm_dpo/delta": -0.02669554390013218, "fcm_dpo/margin": 38.80487060546875, "fcm_dpo/q_t": 0.36256033182144165, "grad_norm": 17.52783203125, "learning_rate": 8.012824650910937e-09, "logits/chosen": 0.7269790172576904, "logits/rejected": 0.7166494727134705, "logps/chosen": -127.43746185302734, "logps/ref_chosen": -60.24303436279297, "logps/ref_rejected": -72.26258850097656, "logps/rejected": -178.2618865966797, "loss": 0.9902, "margin_dpo/margin_mean": 38.80487060546875, "margin_dpo/margin_std": 47.703529357910156, "step": 614 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.015918750315904617, "fcm_dpo/delta": -0.01292453333735466, "fcm_dpo/margin": 38.415618896484375, "fcm_dpo/q_t": 0.3736609220504761, "grad_norm": 16.462543487548828, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.6522448062896729, "logits/rejected": 0.6058327555656433, "logps/chosen": -141.9201202392578, "logps/ref_chosen": -72.09467315673828, "logps/ref_rejected": -104.02980041503906, "logps/rejected": -212.2708740234375, "loss": 1.0683, "margin_dpo/margin_mean": 38.415618896484375, "margin_dpo/margin_std": 60.30884552001953, "step": 615 }, { "epoch": 0.9312169312169312, "fcm_dpo/beta": 0.016069892793893814, "fcm_dpo/delta": 0.0499076284468174, "fcm_dpo/margin": 34.402427673339844, "fcm_dpo/q_t": 0.38505834341049194, "grad_norm": 15.858017921447754, "learning_rate": 7.36222939784098e-09, "logits/chosen": 0.7267533540725708, "logits/rejected": 0.6444547176361084, "logps/chosen": -125.30108642578125, "logps/ref_chosen": -58.530723571777344, "logps/ref_rejected": -75.48025512695312, "logps/rejected": -176.65304565429688, "loss": 1.0629, "margin_dpo/margin_mean": 34.402427673339844, "margin_dpo/margin_std": 52.15081024169922, "step": 616 }, { "epoch": 0.9327286470143613, "fcm_dpo/beta": 0.016335247084498405, "fcm_dpo/delta": 0.1371011584997177, "fcm_dpo/margin": 28.701946258544922, "fcm_dpo/q_t": 0.4002954661846161, "grad_norm": 19.695823669433594, "learning_rate": 7.047107919114586e-09, "logits/chosen": 0.692958414554596, "logits/rejected": 0.6451589465141296, "logps/chosen": -133.67486572265625, "logps/ref_chosen": -57.608673095703125, "logps/ref_rejected": -81.22109985351562, "logps/rejected": -185.98922729492188, "loss": 1.1238, "margin_dpo/margin_mean": 28.701946258544922, "margin_dpo/margin_std": 48.95549392700195, "step": 617 }, { "epoch": 0.9342403628117913, "fcm_dpo/beta": 0.016573436558246613, "fcm_dpo/delta": 0.03523973748087883, "fcm_dpo/margin": 34.153472900390625, "fcm_dpo/q_t": 0.38183510303497314, "grad_norm": 19.02802085876465, "learning_rate": 6.738782355044048e-09, "logits/chosen": 0.664625346660614, "logits/rejected": 0.5642604827880859, "logps/chosen": -120.1915512084961, "logps/ref_chosen": -56.69594192504883, "logps/ref_rejected": -85.92362976074219, "logps/rejected": -183.57272338867188, "loss": 1.0365, "margin_dpo/margin_mean": 34.153472900390625, "margin_dpo/margin_std": 47.633235931396484, "step": 618 }, { "epoch": 0.9357520786092215, "fcm_dpo/beta": 0.016661301255226135, "fcm_dpo/delta": -0.003810018301010132, "fcm_dpo/margin": 36.216026306152344, "fcm_dpo/q_t": 0.3725661635398865, "grad_norm": 16.40776252746582, "learning_rate": 6.437261330158206e-09, "logits/chosen": 0.8066527843475342, "logits/rejected": 0.7323254346847534, "logps/chosen": -116.28726196289062, "logps/ref_chosen": -54.05841827392578, "logps/ref_rejected": -83.55493927001953, "logps/rejected": -181.99981689453125, "loss": 1.0208, "margin_dpo/margin_mean": 36.216026306152344, "margin_dpo/margin_std": 49.7095947265625, "step": 619 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.017014428973197937, "fcm_dpo/delta": 0.02767297625541687, "fcm_dpo/margin": 33.48244857788086, "fcm_dpo/q_t": 0.3834364712238312, "grad_norm": 20.08639907836914, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.6961303353309631, "logits/rejected": 0.6932598352432251, "logps/chosen": -124.36492919921875, "logps/ref_chosen": -63.36971664428711, "logps/ref_rejected": -65.68269348144531, "logps/rejected": -160.16033935546875, "loss": 1.0699, "margin_dpo/margin_mean": 33.48244857788086, "margin_dpo/margin_std": 49.82050323486328, "step": 620 }, { "epoch": 0.9387755102040817, "fcm_dpo/beta": 0.017159054055809975, "fcm_dpo/delta": 0.12095116078853607, "fcm_dpo/margin": 28.226398468017578, "fcm_dpo/q_t": 0.39944252371788025, "grad_norm": 17.94400978088379, "learning_rate": 5.854666444131934e-09, "logits/chosen": 0.7288126945495605, "logits/rejected": 0.6246720552444458, "logps/chosen": -117.13513946533203, "logps/ref_chosen": -52.321224212646484, "logps/ref_rejected": -88.09001159667969, "logps/rejected": -181.1303253173828, "loss": 1.1334, "margin_dpo/margin_mean": 28.226398468017578, "margin_dpo/margin_std": 51.35742950439453, "step": 621 }, { "epoch": 0.9402872260015117, "fcm_dpo/beta": 0.017424535006284714, "fcm_dpo/delta": 0.060253530740737915, "fcm_dpo/margin": 31.139747619628906, "fcm_dpo/q_t": 0.38587862253189087, "grad_norm": 20.17845916748047, "learning_rate": 5.573608879422875e-09, "logits/chosen": 0.6671361923217773, "logits/rejected": 0.6277763843536377, "logps/chosen": -130.28573608398438, "logps/ref_chosen": -59.86545944213867, "logps/ref_rejected": -81.86668395996094, "logps/rejected": -183.42669677734375, "loss": 1.0714, "margin_dpo/margin_mean": 31.139747619628906, "margin_dpo/margin_std": 47.877464294433594, "step": 622 }, { "epoch": 0.9417989417989417, "fcm_dpo/beta": 0.01731981709599495, "fcm_dpo/delta": -0.02505187690258026, "fcm_dpo/margin": 35.991477966308594, "fcm_dpo/q_t": 0.36854812502861023, "grad_norm": 16.328716278076172, "learning_rate": 5.299388446305342e-09, "logits/chosen": 0.66575688123703, "logits/rejected": 0.5999845266342163, "logps/chosen": -142.4012451171875, "logps/ref_chosen": -67.36846160888672, "logps/ref_rejected": -82.02733612060547, "logps/rejected": -193.05160522460938, "loss": 1.0023, "margin_dpo/margin_mean": 35.991477966308594, "margin_dpo/margin_std": 46.96021270751953, "step": 623 }, { "epoch": 0.9433106575963719, "fcm_dpo/beta": 0.017001666128635406, "fcm_dpo/delta": -0.10930600017309189, "fcm_dpo/margin": 41.240196228027344, "fcm_dpo/q_t": 0.35751086473464966, "grad_norm": 18.303504943847656, "learning_rate": 5.03201281531429e-09, "logits/chosen": 0.6909003853797913, "logits/rejected": 0.592226505279541, "logps/chosen": -112.12557983398438, "logps/ref_chosen": -51.02655029296875, "logps/ref_rejected": -76.49203491210938, "logps/rejected": -178.83126831054688, "loss": 0.9887, "margin_dpo/margin_mean": 41.240196228027344, "margin_dpo/margin_std": 54.28327941894531, "step": 624 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.017218362540006638, "fcm_dpo/delta": 0.09852743148803711, "fcm_dpo/margin": 29.410263061523438, "fcm_dpo/q_t": 0.39616650342941284, "grad_norm": 18.753204345703125, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.7545461654663086, "logits/rejected": 0.6679770350456238, "logps/chosen": -119.49295043945312, "logps/ref_chosen": -54.20761489868164, "logps/ref_rejected": -84.93669128417969, "logps/rejected": -179.63229370117188, "loss": 1.1324, "margin_dpo/margin_mean": 29.410261154174805, "margin_dpo/margin_std": 54.021522521972656, "step": 625 }, { "epoch": 0.9463340891912321, "fcm_dpo/beta": 0.017001252621412277, "fcm_dpo/delta": -0.09621863812208176, "fcm_dpo/margin": 40.53666687011719, "fcm_dpo/q_t": 0.3652860224246979, "grad_norm": 17.47981834411621, "learning_rate": 4.517825684323323e-09, "logits/chosen": 0.783034086227417, "logits/rejected": 0.6518374681472778, "logps/chosen": -106.47137451171875, "logps/ref_chosen": -45.06201934814453, "logps/ref_rejected": -89.66368103027344, "logps/rejected": -191.60971069335938, "loss": 1.0556, "margin_dpo/margin_mean": 40.53666687011719, "margin_dpo/margin_std": 62.77349853515625, "step": 626 }, { "epoch": 0.9478458049886621, "fcm_dpo/beta": 0.016569461673498154, "fcm_dpo/delta": -0.13161586225032806, "fcm_dpo/margin": 43.53169250488281, "fcm_dpo/q_t": 0.35016459226608276, "grad_norm": 17.202301025390625, "learning_rate": 4.271028567242818e-09, "logits/chosen": 0.6269781589508057, "logits/rejected": 0.5125735402107239, "logps/chosen": -126.37629699707031, "logps/ref_chosen": -58.791053771972656, "logps/ref_rejected": -94.90802001953125, "logps/rejected": -206.02496337890625, "loss": 0.979, "margin_dpo/margin_mean": 43.53169250488281, "margin_dpo/margin_std": 56.438446044921875, "step": 627 }, { "epoch": 0.9493575207860923, "fcm_dpo/beta": 0.016308607533574104, "fcm_dpo/delta": -0.19555062055587769, "fcm_dpo/margin": 47.73688507080078, "fcm_dpo/q_t": 0.3357737958431244, "grad_norm": 17.734283447265625, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 0.7015246152877808, "logits/rejected": 0.6676697731018066, "logps/chosen": -111.4091796875, "logps/ref_chosen": -52.80357360839844, "logps/ref_rejected": -76.49468994140625, "logps/rejected": -182.83717346191406, "loss": 0.9789, "margin_dpo/margin_mean": 47.73688507080078, "margin_dpo/margin_std": 57.750762939453125, "step": 628 }, { "epoch": 0.9508692365835223, "fcm_dpo/beta": 0.016032151877880096, "fcm_dpo/delta": 0.03456023707985878, "fcm_dpo/margin": 35.36682891845703, "fcm_dpo/q_t": 0.3803756535053253, "grad_norm": 15.833183288574219, "learning_rate": 3.798061746947995e-09, "logits/chosen": 0.69905686378479, "logits/rejected": 0.6941945552825928, "logps/chosen": -134.60055541992188, "logps/ref_chosen": -70.71749877929688, "logps/ref_rejected": -78.96273803710938, "logps/rejected": -178.21261596679688, "loss": 1.0171, "margin_dpo/margin_mean": 35.36682891845703, "margin_dpo/margin_std": 45.6728515625, "step": 629 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.015975255519151688, "fcm_dpo/delta": -0.04418795555830002, "fcm_dpo/margin": 40.13007354736328, "fcm_dpo/q_t": 0.3665482997894287, "grad_norm": 13.421211242675781, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.6364885568618774, "logits/rejected": 0.5930874347686768, "logps/chosen": -120.07940673828125, "logps/ref_chosen": -56.201412200927734, "logps/ref_rejected": -74.69807434082031, "logps/rejected": -178.70614624023438, "loss": 0.9952, "margin_dpo/margin_mean": 40.13007354736328, "margin_dpo/margin_std": 51.83021545410156, "step": 630 }, { "epoch": 0.9538926681783825, "fcm_dpo/beta": 0.015368154272437096, "fcm_dpo/delta": -0.12270902097225189, "fcm_dpo/margin": 46.13782501220703, "fcm_dpo/q_t": 0.35464248061180115, "grad_norm": 16.841995239257812, "learning_rate": 3.352641923861144e-09, "logits/chosen": 0.7928265929222107, "logits/rejected": 0.6835281252861023, "logps/chosen": -118.16952514648438, "logps/ref_chosen": -58.82059860229492, "logps/ref_rejected": -96.51437377929688, "logps/rejected": -202.0011444091797, "loss": 0.9862, "margin_dpo/margin_mean": 46.1378288269043, "margin_dpo/margin_std": 58.701908111572266, "step": 631 }, { "epoch": 0.9554043839758125, "fcm_dpo/beta": 0.015180578455328941, "fcm_dpo/delta": -0.13757643103599548, "fcm_dpo/margin": 47.93510437011719, "fcm_dpo/q_t": 0.34288290143013, "grad_norm": 14.054166793823242, "learning_rate": 3.140277830901428e-09, "logits/chosen": 0.7194072008132935, "logits/rejected": 0.6981650590896606, "logps/chosen": -120.36946105957031, "logps/ref_chosen": -58.786048889160156, "logps/ref_rejected": -67.21923828125, "logps/rejected": -176.73776245117188, "loss": 0.9196, "margin_dpo/margin_mean": 47.93510437011719, "margin_dpo/margin_std": 51.463253021240234, "step": 632 }, { "epoch": 0.9569160997732427, "fcm_dpo/beta": 0.015230704098939896, "fcm_dpo/delta": 0.06359954923391342, "fcm_dpo/margin": 35.429588317871094, "fcm_dpo/q_t": 0.3875540494918823, "grad_norm": 16.164337158203125, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 0.6756185293197632, "logits/rejected": 0.615243136882782, "logps/chosen": -108.6497802734375, "logps/ref_chosen": -52.13019561767578, "logps/ref_rejected": -67.23016357421875, "logps/rejected": -159.1793212890625, "loss": 1.067, "margin_dpo/margin_mean": 35.429588317871094, "margin_dpo/margin_std": 53.79712677001953, "step": 633 }, { "epoch": 0.9584278155706727, "fcm_dpo/beta": 0.01600709743797779, "fcm_dpo/delta": 0.30318742990493774, "fcm_dpo/margin": 19.04725456237793, "fcm_dpo/q_t": 0.4390316605567932, "grad_norm": 21.8569393157959, "learning_rate": 2.736270983384276e-09, "logits/chosen": 0.7564660310745239, "logits/rejected": 0.7673421502113342, "logps/chosen": -129.17764282226562, "logps/ref_chosen": -60.97979736328125, "logps/ref_rejected": -58.50825119018555, "logps/rejected": -145.75335693359375, "loss": 1.2675, "margin_dpo/margin_mean": 19.047256469726562, "margin_dpo/margin_std": 52.853660583496094, "step": 634 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.016620900481939316, "fcm_dpo/delta": 0.15847182273864746, "fcm_dpo/margin": 26.969745635986328, "fcm_dpo/q_t": 0.4112244248390198, "grad_norm": 16.682920455932617, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.5778566002845764, "logits/rejected": 0.5155054330825806, "logps/chosen": -147.23043823242188, "logps/ref_chosen": -65.9730224609375, "logps/ref_rejected": -85.61317443847656, "logps/rejected": -193.84033203125, "loss": 1.2101, "margin_dpo/margin_mean": 26.969745635986328, "margin_dpo/margin_std": 60.308692932128906, "step": 635 }, { "epoch": 0.9614512471655329, "fcm_dpo/beta": 0.016455478966236115, "fcm_dpo/delta": -0.07432037591934204, "fcm_dpo/margin": 40.624237060546875, "fcm_dpo/q_t": 0.3579748272895813, "grad_norm": 14.709686279296875, "learning_rate": 2.359929934524829e-09, "logits/chosen": 0.6613017916679382, "logits/rejected": 0.5728839635848999, "logps/chosen": -111.96475219726562, "logps/ref_chosen": -49.140167236328125, "logps/ref_rejected": -81.26971435546875, "logps/rejected": -184.71853637695312, "loss": 0.9803, "margin_dpo/margin_mean": 40.624237060546875, "margin_dpo/margin_std": 51.248046875, "step": 636 }, { "epoch": 0.9629629629629629, "fcm_dpo/beta": 0.01680588349699974, "fcm_dpo/delta": 0.1063363254070282, "fcm_dpo/margin": 29.647350311279297, "fcm_dpo/q_t": 0.40189188718795776, "grad_norm": 18.443958282470703, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 0.6527888774871826, "logits/rejected": 0.5813695192337036, "logps/chosen": -155.8303680419922, "logps/ref_chosen": -73.69658660888672, "logps/ref_rejected": -83.01487731933594, "logps/rejected": -194.79600524902344, "loss": 1.155, "margin_dpo/margin_mean": 29.647350311279297, "margin_dpo/margin_std": 57.743892669677734, "step": 637 }, { "epoch": 0.9644746787603931, "fcm_dpo/beta": 0.01687886193394661, "fcm_dpo/delta": -0.0013024341315031052, "fcm_dpo/margin": 35.60945129394531, "fcm_dpo/q_t": 0.3767626881599426, "grad_norm": 18.627864837646484, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 0.6956614255905151, "logits/rejected": 0.6454405188560486, "logps/chosen": -135.54721069335938, "logps/ref_chosen": -62.78158187866211, "logps/ref_rejected": -85.40478515625, "logps/rejected": -193.7798614501953, "loss": 1.0436, "margin_dpo/margin_mean": 35.60945129394531, "margin_dpo/margin_std": 52.48899459838867, "step": 638 }, { "epoch": 0.9659863945578231, "fcm_dpo/beta": 0.016702190041542053, "fcm_dpo/delta": -0.07177025079727173, "fcm_dpo/margin": 39.934791564941406, "fcm_dpo/q_t": 0.3631782829761505, "grad_norm": 19.758222579956055, "learning_rate": 1.847382997337943e-09, "logits/chosen": 0.6821843385696411, "logits/rejected": 0.5689026117324829, "logps/chosen": -119.10699462890625, "logps/ref_chosen": -53.76658630371094, "logps/ref_rejected": -72.30009460449219, "logps/rejected": -177.57528686523438, "loss": 0.9971, "margin_dpo/margin_mean": 39.934791564941406, "margin_dpo/margin_std": 53.109901428222656, "step": 639 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.01664363034069538, "fcm_dpo/delta": 0.033666323870420456, "fcm_dpo/margin": 34.142127990722656, "fcm_dpo/q_t": 0.3770233392715454, "grad_norm": 16.493188858032227, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.7594385147094727, "logits/rejected": 0.6996129751205444, "logps/chosen": -119.43183135986328, "logps/ref_chosen": -51.41777801513672, "logps/ref_rejected": -77.27879333496094, "logps/rejected": -179.43496704101562, "loss": 1.0664, "margin_dpo/margin_mean": 34.142127990722656, "margin_dpo/margin_std": 52.741458892822266, "step": 640 }, { "epoch": 0.9690098261526833, "fcm_dpo/beta": 0.01675679162144661, "fcm_dpo/delta": 0.039283640682697296, "fcm_dpo/margin": 33.59276580810547, "fcm_dpo/q_t": 0.3780820965766907, "grad_norm": 15.796224594116211, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 0.6780893802642822, "logits/rejected": 0.6513991355895996, "logps/chosen": -141.1656951904297, "logps/ref_chosen": -71.0546646118164, "logps/ref_rejected": -82.2440185546875, "logps/rejected": -185.94781494140625, "loss": 1.0314, "margin_dpo/margin_mean": 33.59276580810547, "margin_dpo/margin_std": 46.151939392089844, "step": 641 }, { "epoch": 0.9705215419501134, "fcm_dpo/beta": 0.017033934593200684, "fcm_dpo/delta": 0.1573614478111267, "fcm_dpo/margin": 26.348485946655273, "fcm_dpo/q_t": 0.40937286615371704, "grad_norm": 21.933429718017578, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 0.6313962936401367, "logits/rejected": 0.6405247449874878, "logps/chosen": -144.66749572753906, "logps/ref_chosen": -68.92927551269531, "logps/ref_rejected": -70.85682678222656, "logps/rejected": -172.9435272216797, "loss": 1.1935, "margin_dpo/margin_mean": 26.348485946655273, "margin_dpo/margin_std": 54.467716217041016, "step": 642 }, { "epoch": 0.9720332577475435, "fcm_dpo/beta": 0.017401862889528275, "fcm_dpo/delta": 0.02936522290110588, "fcm_dpo/margin": 32.88540267944336, "fcm_dpo/q_t": 0.3840622305870056, "grad_norm": 27.94525718688965, "learning_rate": 1.261184375888541e-09, "logits/chosen": 0.6284259557723999, "logits/rejected": 0.5389350056648254, "logps/chosen": -135.20681762695312, "logps/ref_chosen": -65.30903625488281, "logps/ref_rejected": -83.61613464355469, "logps/rejected": -186.39932250976562, "loss": 1.0915, "margin_dpo/margin_mean": 32.885398864746094, "margin_dpo/margin_std": 54.64806365966797, "step": 643 }, { "epoch": 0.9735449735449735, "fcm_dpo/beta": 0.017747625708580017, "fcm_dpo/delta": 0.05810273438692093, "fcm_dpo/margin": 30.634418487548828, "fcm_dpo/q_t": 0.3914340138435364, "grad_norm": 16.880868911743164, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 0.7576817274093628, "logits/rejected": 0.7244468927383423, "logps/chosen": -113.37348937988281, "logps/ref_chosen": -51.002601623535156, "logps/ref_rejected": -64.46372985839844, "logps/rejected": -157.4690399169922, "loss": 1.1632, "margin_dpo/margin_mean": 30.634418487548828, "margin_dpo/margin_std": 60.088741302490234, "step": 644 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.017687616869807243, "fcm_dpo/delta": 0.030620308592915535, "fcm_dpo/margin": 32.27980041503906, "fcm_dpo/q_t": 0.382048100233078, "grad_norm": 19.38361930847168, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.6390470266342163, "logits/rejected": 0.6177409291267395, "logps/chosen": -129.932861328125, "logps/ref_chosen": -60.963409423828125, "logps/ref_rejected": -69.73353576660156, "logps/rejected": -170.9827880859375, "loss": 1.0597, "margin_dpo/margin_mean": 32.27980041503906, "margin_dpo/margin_std": 49.32701873779297, "step": 645 }, { "epoch": 0.9765684051398337, "fcm_dpo/beta": 0.01789352297782898, "fcm_dpo/delta": 0.039331234991550446, "fcm_dpo/margin": 31.456527709960938, "fcm_dpo/q_t": 0.3862152695655823, "grad_norm": 18.05316925048828, "learning_rate": 8.945768539031783e-10, "logits/chosen": 0.7234746813774109, "logits/rejected": 0.6710031032562256, "logps/chosen": -135.71275329589844, "logps/ref_chosen": -62.290069580078125, "logps/ref_rejected": -85.54812622070312, "logps/rejected": -190.42733764648438, "loss": 1.1176, "margin_dpo/margin_mean": 31.456527709960938, "margin_dpo/margin_std": 56.255977630615234, "step": 646 }, { "epoch": 0.9780801209372638, "fcm_dpo/beta": 0.017382677644491196, "fcm_dpo/delta": -0.22605100274085999, "fcm_dpo/margin": 46.494606018066406, "fcm_dpo/q_t": 0.334111750125885, "grad_norm": 18.57387924194336, "learning_rate": 7.863060120144316e-10, "logits/chosen": 0.7320365905761719, "logits/rejected": 0.6377066373825073, "logps/chosen": -139.47146606445312, "logps/ref_chosen": -67.515869140625, "logps/ref_rejected": -101.50871276855469, "logps/rejected": -219.95889282226562, "loss": 0.905, "margin_dpo/margin_mean": 46.494606018066406, "margin_dpo/margin_std": 52.26645278930664, "step": 647 }, { "epoch": 0.9795918367346939, "fcm_dpo/beta": 0.017017535865306854, "fcm_dpo/delta": -0.011982899159193039, "fcm_dpo/margin": 35.888240814208984, "fcm_dpo/q_t": 0.3698846697807312, "grad_norm": 15.682772636413574, "learning_rate": 6.850062128694045e-10, "logits/chosen": 0.6553179025650024, "logits/rejected": 0.5904830694198608, "logps/chosen": -135.27606201171875, "logps/ref_chosen": -64.59593963623047, "logps/ref_rejected": -83.384033203125, "logps/rejected": -189.952392578125, "loss": 1.0545, "margin_dpo/margin_mean": 35.88824462890625, "margin_dpo/margin_std": 53.6502571105957, "step": 648 }, { "epoch": 0.981103552532124, "fcm_dpo/beta": 0.016928700730204582, "fcm_dpo/delta": -0.009675152599811554, "fcm_dpo/margin": 35.89997100830078, "fcm_dpo/q_t": 0.3757920265197754, "grad_norm": 24.2320556640625, "learning_rate": 5.906802900412788e-10, "logits/chosen": 0.7032018899917603, "logits/rejected": 0.6470180749893188, "logps/chosen": -117.7570571899414, "logps/ref_chosen": -49.30964660644531, "logps/ref_rejected": -73.73710632324219, "logps/rejected": -178.08450317382812, "loss": 1.0723, "margin_dpo/margin_mean": 35.89997100830078, "margin_dpo/margin_std": 56.242855072021484, "step": 649 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.017035439610481262, "fcm_dpo/delta": 0.0025629187002778053, "fcm_dpo/margin": 35.0762825012207, "fcm_dpo/q_t": 0.38023632764816284, "grad_norm": 19.767776489257812, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.7739812135696411, "logits/rejected": 0.710270881652832, "logps/chosen": -119.4194107055664, "logps/ref_chosen": -55.06325912475586, "logps/ref_rejected": -77.39610290527344, "logps/rejected": -176.8285369873047, "loss": 1.1056, "margin_dpo/margin_mean": 35.07628631591797, "margin_dpo/margin_std": 59.95784378051758, "step": 650 }, { "epoch": 0.9841269841269841, "fcm_dpo/beta": 0.017338156700134277, "fcm_dpo/delta": 0.06786399334669113, "fcm_dpo/margin": 30.842973709106445, "fcm_dpo/q_t": 0.39106687903404236, "grad_norm": 18.881311416625977, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 0.7450631856918335, "logits/rejected": 0.6721943616867065, "logps/chosen": -118.5746841430664, "logps/ref_chosen": -54.065162658691406, "logps/ref_rejected": -77.79080200195312, "logps/rejected": -173.143310546875, "loss": 1.0911, "margin_dpo/margin_mean": 30.842973709106445, "margin_dpo/margin_std": 50.0612678527832, "step": 651 }, { "epoch": 0.9856386999244142, "fcm_dpo/beta": 0.017395323142409325, "fcm_dpo/delta": 0.06672574579715729, "fcm_dpo/margin": 30.856266021728516, "fcm_dpo/q_t": 0.39265865087509155, "grad_norm": 19.28176498413086, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 0.695549726486206, "logits/rejected": 0.6393716931343079, "logps/chosen": -140.04200744628906, "logps/ref_chosen": -63.64030456542969, "logps/ref_rejected": -78.86882019042969, "logps/rejected": -186.12680053710938, "loss": 1.1184, "margin_dpo/margin_mean": 30.856264114379883, "margin_dpo/margin_std": 54.582725524902344, "step": 652 }, { "epoch": 0.9871504157218443, "fcm_dpo/beta": 0.017352044582366943, "fcm_dpo/delta": -0.08883976191282272, "fcm_dpo/margin": 39.352874755859375, "fcm_dpo/q_t": 0.360774964094162, "grad_norm": 20.68842315673828, "learning_rate": 2.831652042480093e-10, "logits/chosen": 0.6648276448249817, "logits/rejected": 0.6195430159568787, "logps/chosen": -129.05657958984375, "logps/ref_chosen": -61.668373107910156, "logps/ref_rejected": -73.83012390136719, "logps/rejected": -180.57122802734375, "loss": 1.022, "margin_dpo/margin_mean": 39.352874755859375, "margin_dpo/margin_std": 56.605133056640625, "step": 653 }, { "epoch": 0.9886621315192744, "fcm_dpo/beta": 0.017307717353105545, "fcm_dpo/delta": 0.11917576938867569, "fcm_dpo/margin": 28.009092330932617, "fcm_dpo/q_t": 0.4049610197544098, "grad_norm": 19.33361053466797, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 0.6821545362472534, "logits/rejected": 0.5850452184677124, "logps/chosen": -127.31944274902344, "logps/ref_chosen": -57.568267822265625, "logps/ref_rejected": -87.74789428710938, "logps/rejected": -185.50816345214844, "loss": 1.1431, "margin_dpo/margin_mean": 28.009090423583984, "margin_dpo/margin_std": 50.39651107788086, "step": 654 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.016991257667541504, "fcm_dpo/delta": -0.1648244857788086, "fcm_dpo/margin": 43.98023986816406, "fcm_dpo/q_t": 0.33990585803985596, "grad_norm": 14.38284969329834, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.6561794281005859, "logits/rejected": 0.5625892877578735, "logps/chosen": -108.23675537109375, "logps/ref_chosen": -52.14714813232422, "logps/ref_rejected": -80.85014343261719, "logps/rejected": -180.91998291015625, "loss": 0.9022, "margin_dpo/margin_mean": 43.98023986816406, "margin_dpo/margin_std": 43.58360290527344, "step": 655 }, { "epoch": 0.9916855631141346, "fcm_dpo/beta": 0.016844086349010468, "fcm_dpo/delta": -0.08287950605154037, "fcm_dpo/margin": 40.203338623046875, "fcm_dpo/q_t": 0.3588668704032898, "grad_norm": 15.275532722473145, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 0.627634584903717, "logits/rejected": 0.5770636796951294, "logps/chosen": -143.4762725830078, "logps/ref_chosen": -73.25672912597656, "logps/ref_rejected": -85.35127258300781, "logps/rejected": -195.77413940429688, "loss": 0.9826, "margin_dpo/margin_mean": 40.203338623046875, "margin_dpo/margin_std": 51.91832733154297, "step": 656 }, { "epoch": 0.9931972789115646, "fcm_dpo/beta": 0.016424383968114853, "fcm_dpo/delta": -0.12891708314418793, "fcm_dpo/margin": 43.787635803222656, "fcm_dpo/q_t": 0.3502225875854492, "grad_norm": 15.052490234375, "learning_rate": 8.740807750345913e-11, "logits/chosen": 0.8135035037994385, "logits/rejected": 0.7229472398757935, "logps/chosen": -114.978271484375, "logps/ref_chosen": -49.72339630126953, "logps/ref_rejected": -75.1568603515625, "logps/rejected": -184.19937133789062, "loss": 0.9865, "margin_dpo/margin_mean": 43.78763961791992, "margin_dpo/margin_std": 57.974388122558594, "step": 657 }, { "epoch": 0.9947089947089947, "fcm_dpo/beta": 0.016662094742059708, "fcm_dpo/delta": 0.07724005728960037, "fcm_dpo/margin": 31.531997680664062, "fcm_dpo/q_t": 0.39289391040802, "grad_norm": 19.596899032592773, "learning_rate": 5.594234322453539e-11, "logits/chosen": 0.7062351703643799, "logits/rejected": 0.6689351797103882, "logps/chosen": -133.1918487548828, "logps/ref_chosen": -63.04634094238281, "logps/ref_rejected": -83.44963073730469, "logps/rejected": -185.12713623046875, "loss": 1.1634, "margin_dpo/margin_mean": 31.531997680664062, "margin_dpo/margin_std": 61.76860809326172, "step": 658 }, { "epoch": 0.9962207105064248, "fcm_dpo/beta": 0.016693908721208572, "fcm_dpo/delta": 0.10468055307865143, "fcm_dpo/margin": 29.910579681396484, "fcm_dpo/q_t": 0.39701542258262634, "grad_norm": 18.149425506591797, "learning_rate": 3.146808153123293e-11, "logits/chosen": 0.7528729438781738, "logits/rejected": 0.6886565685272217, "logps/chosen": -125.83087921142578, "logps/ref_chosen": -55.0802001953125, "logps/ref_rejected": -71.91049194335938, "logps/rejected": -172.57176208496094, "loss": 1.1432, "margin_dpo/margin_mean": 29.910579681396484, "margin_dpo/margin_std": 54.502227783203125, "step": 659 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.016900621354579926, "fcm_dpo/delta": -0.035610876977443695, "fcm_dpo/margin": 37.475677490234375, "fcm_dpo/q_t": 0.3650882840156555, "grad_norm": 17.15520668029785, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.7790225744247437, "logits/rejected": 0.7075534462928772, "logps/chosen": -124.61892700195312, "logps/ref_chosen": -54.525917053222656, "logps/ref_rejected": -81.23604583740234, "logps/rejected": -188.8047332763672, "loss": 0.988, "margin_dpo/margin_mean": 37.475677490234375, "margin_dpo/margin_std": 47.050201416015625, "step": 660 }, { "epoch": 0.999244142101285, "fcm_dpo/beta": 0.017178639769554138, "fcm_dpo/delta": 0.10657184571027756, "fcm_dpo/margin": 28.993637084960938, "fcm_dpo/q_t": 0.3975844383239746, "grad_norm": 20.30781364440918, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 0.6451204419136047, "logits/rejected": 0.562833845615387, "logps/chosen": -139.31686401367188, "logps/ref_chosen": -60.37263870239258, "logps/ref_rejected": -77.42874145507812, "logps/rejected": -185.3666229248047, "loss": 1.153, "margin_dpo/margin_mean": 28.99363899230957, "margin_dpo/margin_std": 56.09125518798828, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.0886367675756723, "train_runtime": 1755.0349, "train_samples_per_second": 24.123, "train_steps_per_second": 0.377 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }