{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 200, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10886543244123459, "fcm_dpo/delta": 0.4247117042541504, "fcm_dpo/margin": -0.0013532638549804688, "fcm_dpo/q_t": 0.5000430345535278, "grad_norm": 30.727170944213867, "learning_rate": 0.0, "logits/chosen": 0.13337239623069763, "logits/rejected": 0.12492949515581131, "logps/chosen": -64.5841293334961, "logps/ref_chosen": -64.61280822753906, "logps/ref_rejected": -64.17195129394531, "logps/rejected": -64.14192199707031, "loss": 1.3866, "margin_dpo/margin_mean": -0.0013527870178222656, "margin_dpo/margin_std": 0.2561596930027008, "step": 1 }, { "epoch": 0.0030234315948601664, "fcm_dpo/beta": 0.11363507062196732, "fcm_dpo/delta": 0.4199795424938202, "fcm_dpo/margin": 0.037450045347213745, "fcm_dpo/q_t": 0.49898096919059753, "grad_norm": 31.676591873168945, "learning_rate": 7.462686567164179e-09, "logits/chosen": 0.09414851665496826, "logits/rejected": 0.07363267242908478, "logps/chosen": -56.101890563964844, "logps/ref_chosen": -56.0989990234375, "logps/ref_rejected": -66.59971618652344, "logps/rejected": -66.64006042480469, "loss": 1.3821, "margin_dpo/margin_mean": 0.03744968771934509, "margin_dpo/margin_std": 0.27811938524246216, "step": 2 }, { "epoch": 0.0045351473922902496, "fcm_dpo/beta": 0.12360024452209473, "fcm_dpo/delta": 0.42059752345085144, "fcm_dpo/margin": 0.004606842994689941, "fcm_dpo/q_t": 0.4998636245727539, "grad_norm": 38.59760665893555, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 0.09945081174373627, "logits/rejected": 0.06145160272717476, "logps/chosen": -65.43189239501953, "logps/ref_chosen": -65.45726013183594, "logps/ref_rejected": -90.82853698730469, "logps/rejected": -90.80776977539062, "loss": 1.3857, "margin_dpo/margin_mean": 0.004606842994689941, "margin_dpo/margin_std": 0.2735193371772766, "step": 3 }, { "epoch": 0.006046863189720333, "fcm_dpo/beta": 0.14635387063026428, "fcm_dpo/delta": 0.8448625206947327, "fcm_dpo/margin": 0.037091463804244995, "fcm_dpo/q_t": 0.49872055649757385, "grad_norm": 49.67146682739258, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 0.10524652898311615, "logits/rejected": 0.08913983404636383, "logps/chosen": -76.82958984375, "logps/ref_chosen": -76.86018371582031, "logps/ref_rejected": -79.91523742675781, "logps/rejected": -79.92173767089844, "loss": 1.3815, "margin_dpo/margin_mean": 0.03709092736244202, "margin_dpo/margin_std": 0.3865681290626526, "step": 4 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.1591610610485077, "fcm_dpo/delta": 0.41841045022010803, "fcm_dpo/margin": 0.01996675133705139, "fcm_dpo/q_t": 0.49924030900001526, "grad_norm": 47.10337829589844, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.08479103446006775, "logits/rejected": 0.04581887274980545, "logps/chosen": -62.968536376953125, "logps/ref_chosen": -62.97134017944336, "logps/ref_rejected": -79.9192123413086, "logps/rejected": -79.93637084960938, "loss": 1.3833, "margin_dpo/margin_mean": 0.01996755599975586, "margin_dpo/margin_std": 0.2942441701889038, "step": 5 }, { "epoch": 0.009070294784580499, "fcm_dpo/beta": 0.16581664979457855, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.02515178918838501, "fcm_dpo/q_t": 0.5010402798652649, "grad_norm": 49.417415618896484, "learning_rate": 3.731343283582089e-08, "logits/chosen": 0.15653175115585327, "logits/rejected": 0.11625839024782181, "logps/chosen": -51.34194564819336, "logps/ref_chosen": -51.30736541748047, "logps/ref_rejected": -82.77239227294922, "logps/rejected": -82.78182220458984, "loss": 1.3911, "margin_dpo/margin_mean": -0.025151371955871582, "margin_dpo/margin_std": 0.30363306403160095, "step": 6 }, { "epoch": 0.010582010582010581, "fcm_dpo/beta": 0.18848590552806854, "fcm_dpo/delta": 0.8485120534896851, "fcm_dpo/margin": 0.008793026208877563, "fcm_dpo/q_t": 0.49962806701660156, "grad_norm": 51.45716857910156, "learning_rate": 4.477611940298507e-08, "logits/chosen": 0.02227121591567993, "logits/rejected": -0.021784139797091484, "logps/chosen": -51.428985595703125, "logps/ref_chosen": -51.45941162109375, "logps/ref_rejected": -66.3828125, "logps/rejected": -66.36117553710938, "loss": 1.3852, "margin_dpo/margin_mean": 0.008793264627456665, "margin_dpo/margin_std": 0.2343991994857788, "step": 7 }, { "epoch": 0.012093726379440665, "fcm_dpo/beta": 0.2231920063495636, "fcm_dpo/delta": 0.8420383930206299, "fcm_dpo/margin": 0.03777146339416504, "fcm_dpo/q_t": 0.49801886081695557, "grad_norm": 63.04829025268555, "learning_rate": 5.223880597014925e-08, "logits/chosen": 0.07406742870807648, "logits/rejected": 0.05182163789868355, "logps/chosen": -62.19073486328125, "logps/ref_chosen": -62.197547912597656, "logps/ref_rejected": -74.66180419921875, "logps/rejected": -74.69276428222656, "loss": 1.3792, "margin_dpo/margin_mean": 0.03777092695236206, "margin_dpo/margin_std": 0.34941548109054565, "step": 8 }, { "epoch": 0.013605442176870748, "fcm_dpo/beta": 0.25312358140945435, "fcm_dpo/delta": 0.42442524433135986, "fcm_dpo/margin": -0.004266202449798584, "fcm_dpo/q_t": 0.5002864599227905, "grad_norm": 80.34166717529297, "learning_rate": 5.970149253731343e-08, "logits/chosen": 0.17168600857257843, "logits/rejected": 0.11221244931221008, "logps/chosen": -55.64410400390625, "logps/ref_chosen": -55.629722595214844, "logps/ref_rejected": -86.21221923828125, "logps/rejected": -86.22233581542969, "loss": 1.3894, "margin_dpo/margin_mean": -0.004266202449798584, "margin_dpo/margin_std": 0.35202211141586304, "step": 9 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.27516475319862366, "fcm_dpo/delta": 0.4174611568450928, "fcm_dpo/margin": 0.015347898006439209, "fcm_dpo/q_t": 0.4991258382797241, "grad_norm": 81.2122802734375, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.1367185264825821, "logits/rejected": 0.10551740229129791, "logps/chosen": -62.67530059814453, "logps/ref_chosen": -62.69060134887695, "logps/ref_rejected": -90.610107421875, "logps/rejected": -90.61016845703125, "loss": 1.3847, "margin_dpo/margin_mean": 0.015347808599472046, "margin_dpo/margin_std": 0.37078261375427246, "step": 10 }, { "epoch": 0.016628873771730914, "fcm_dpo/beta": 0.2990139424800873, "fcm_dpo/delta": 0.41560107469558716, "fcm_dpo/margin": 0.029833942651748657, "fcm_dpo/q_t": 0.49797219038009644, "grad_norm": 87.37360382080078, "learning_rate": 7.462686567164178e-08, "logits/chosen": 0.1075279489159584, "logits/rejected": 0.10058905184268951, "logps/chosen": -65.76591491699219, "logps/ref_chosen": -65.76712036132812, "logps/ref_rejected": -72.4764633178711, "logps/rejected": -72.50508880615234, "loss": 1.3795, "margin_dpo/margin_mean": 0.029834330081939697, "margin_dpo/margin_std": 0.30201759934425354, "step": 11 }, { "epoch": 0.018140589569160998, "fcm_dpo/beta": 0.3249116837978363, "fcm_dpo/delta": 0.415316104888916, "fcm_dpo/margin": 0.021361559629440308, "fcm_dpo/q_t": 0.49845293164253235, "grad_norm": 92.45681762695312, "learning_rate": 8.208955223880596e-08, "logits/chosen": 0.010168695822358131, "logits/rejected": -0.005617397837340832, "logps/chosen": -60.72811508178711, "logps/ref_chosen": -60.704891204833984, "logps/ref_rejected": -69.41564178466797, "logps/rejected": -69.4602279663086, "loss": 1.3823, "margin_dpo/margin_mean": 0.021361559629440308, "margin_dpo/margin_std": 0.3284778594970703, "step": 12 }, { "epoch": 0.019652305366591082, "fcm_dpo/beta": 0.33930647373199463, "fcm_dpo/delta": 0.42449629306793213, "fcm_dpo/margin": -0.04561507701873779, "fcm_dpo/q_t": 0.5036793947219849, "grad_norm": 99.87137603759766, "learning_rate": 8.955223880597014e-08, "logits/chosen": 0.10898313671350479, "logits/rejected": 0.046505216509103775, "logps/chosen": -49.909000396728516, "logps/ref_chosen": -49.90925598144531, "logps/ref_rejected": -92.37818145751953, "logps/rejected": -92.33231353759766, "loss": 1.4034, "margin_dpo/margin_mean": -0.04561561346054077, "margin_dpo/margin_std": 0.2739795744419098, "step": 13 }, { "epoch": 0.021164021164021163, "fcm_dpo/beta": 0.4011165499687195, "fcm_dpo/delta": 0.8268953561782837, "fcm_dpo/margin": 0.060496360063552856, "fcm_dpo/q_t": 0.49426159262657166, "grad_norm": 115.7387466430664, "learning_rate": 9.701492537313432e-08, "logits/chosen": 0.10329781472682953, "logits/rejected": 0.08507229387760162, "logps/chosen": -60.59901428222656, "logps/ref_chosen": -60.61879348754883, "logps/ref_rejected": -71.79306030273438, "logps/rejected": -71.83377838134766, "loss": 1.3648, "margin_dpo/margin_mean": 0.06049656867980957, "margin_dpo/margin_std": 0.2852107584476471, "step": 14 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.4355963468551636, "fcm_dpo/delta": 0.42002391815185547, "fcm_dpo/margin": -0.011380374431610107, "fcm_dpo/q_t": 0.5011443495750427, "grad_norm": 146.9692840576172, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.07006427645683289, "logits/rejected": 0.02684413641691208, "logps/chosen": -63.50836944580078, "logps/ref_chosen": -63.46953582763672, "logps/ref_rejected": -88.88951110839844, "logps/rejected": -88.91697692871094, "loss": 1.3975, "margin_dpo/margin_mean": -0.011380106210708618, "margin_dpo/margin_std": 0.38208454847335815, "step": 15 }, { "epoch": 0.02418745275888133, "fcm_dpo/beta": 0.49392595887184143, "fcm_dpo/delta": 0.42274531722068787, "fcm_dpo/margin": 0.0010766535997390747, "fcm_dpo/q_t": 0.49993181228637695, "grad_norm": 134.13528442382812, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 0.12334809452295303, "logits/rejected": 0.08568301796913147, "logps/chosen": -46.53916549682617, "logps/ref_chosen": -46.53229904174805, "logps/ref_rejected": -74.27533721923828, "logps/rejected": -74.28327941894531, "loss": 1.3916, "margin_dpo/margin_mean": 0.0010768026113510132, "margin_dpo/margin_std": 0.3080522418022156, "step": 16 }, { "epoch": 0.025699168556311415, "fcm_dpo/beta": 0.49392595887184143, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.009143054485321045, "fcm_dpo/q_t": 0.5011139512062073, "grad_norm": 168.68765258789062, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 0.03793691098690033, "logits/rejected": 0.019878219813108444, "logps/chosen": -64.09962463378906, "logps/ref_chosen": -64.07783508300781, "logps/ref_rejected": -86.40876770019531, "logps/rejected": -86.42141723632812, "loss": 1.3977, "margin_dpo/margin_mean": -0.009143710136413574, "margin_dpo/margin_std": 0.32370686531066895, "step": 17 }, { "epoch": 0.027210884353741496, "fcm_dpo/beta": 0.5146567821502686, "fcm_dpo/delta": 0.40302640199661255, "fcm_dpo/margin": 0.03949823975563049, "fcm_dpo/q_t": 0.49513158202171326, "grad_norm": 141.66015625, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 0.10013440996408463, "logits/rejected": 0.05400983989238739, "logps/chosen": -44.85398864746094, "logps/ref_chosen": -44.87433624267578, "logps/ref_rejected": -70.97604370117188, "logps/rejected": -70.99519348144531, "loss": 1.3699, "margin_dpo/margin_mean": 0.03949823975563049, "margin_dpo/margin_std": 0.26558351516723633, "step": 18 }, { "epoch": 0.02872260015117158, "fcm_dpo/beta": 0.6078168153762817, "fcm_dpo/delta": 0.8412047624588013, "fcm_dpo/margin": 0.016053587198257446, "fcm_dpo/q_t": 0.4977712035179138, "grad_norm": 190.8359832763672, "learning_rate": 1.343283582089552e-07, "logits/chosen": 0.07114684581756592, "logits/rejected": 0.057568684220314026, "logps/chosen": -68.10987854003906, "logps/ref_chosen": -68.1598129272461, "logps/ref_rejected": -81.17138671875, "logps/rejected": -81.13750457763672, "loss": 1.3852, "margin_dpo/margin_mean": 0.016053855419158936, "margin_dpo/margin_std": 0.3032751679420471, "step": 19 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.7179453372955322, "fcm_dpo/delta": 0.8326936960220337, "fcm_dpo/margin": 0.027033761143684387, "fcm_dpo/q_t": 0.49571579694747925, "grad_norm": 209.52816772460938, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.15309768915176392, "logits/rejected": 0.12928786873817444, "logps/chosen": -53.66864013671875, "logps/ref_chosen": -53.67856216430664, "logps/ref_rejected": -74.16911315917969, "logps/rejected": -74.18623352050781, "loss": 1.3828, "margin_dpo/margin_mean": 0.027033761143684387, "margin_dpo/margin_std": 0.33954381942749023, "step": 20 }, { "epoch": 0.031746031746031744, "fcm_dpo/beta": 0.8144156336784363, "fcm_dpo/delta": 0.4235011041164398, "fcm_dpo/margin": -0.015689924359321594, "fcm_dpo/q_t": 0.5032085180282593, "grad_norm": 245.65411376953125, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 0.09726514667272568, "logits/rejected": 0.07214757055044174, "logps/chosen": -64.7015151977539, "logps/ref_chosen": -64.70155334472656, "logps/ref_rejected": -81.02095031738281, "logps/rejected": -81.00521087646484, "loss": 1.4162, "margin_dpo/margin_mean": -0.01569044589996338, "margin_dpo/margin_std": 0.32213884592056274, "step": 21 }, { "epoch": 0.03325774754346183, "fcm_dpo/beta": 0.9216822385787964, "fcm_dpo/delta": 0.8162908554077148, "fcm_dpo/margin": 0.03929051756858826, "fcm_dpo/q_t": 0.49158748984336853, "grad_norm": 258.1221923828125, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 0.0003290371969342232, "logits/rejected": -0.02038179337978363, "logps/chosen": -58.05890655517578, "logps/ref_chosen": -58.03599166870117, "logps/ref_rejected": -80.72721862792969, "logps/rejected": -80.78941345214844, "loss": 1.3677, "margin_dpo/margin_mean": 0.03929010033607483, "margin_dpo/margin_std": 0.2883184552192688, "step": 22 }, { "epoch": 0.03476946334089191, "fcm_dpo/beta": 0.9588446021080017, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.004442840814590454, "fcm_dpo/q_t": 0.5012105703353882, "grad_norm": 318.4825439453125, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 0.15388762950897217, "logits/rejected": 0.12773939967155457, "logps/chosen": -66.37007141113281, "logps/ref_chosen": -66.35608673095703, "logps/ref_rejected": -93.02769470214844, "logps/rejected": -93.0372314453125, "loss": 1.4114, "margin_dpo/margin_mean": -0.004443138837814331, "margin_dpo/margin_std": 0.3012203574180603, "step": 23 }, { "epoch": 0.036281179138321996, "fcm_dpo/beta": 0.9588446021080017, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.04989251494407654, "fcm_dpo/q_t": 0.511908233165741, "grad_norm": 262.23992919921875, "learning_rate": 1.716417910447761e-07, "logits/chosen": 0.1356058567762375, "logits/rejected": 0.10264482349157333, "logps/chosen": -54.50740051269531, "logps/ref_chosen": -54.461238861083984, "logps/ref_rejected": -68.33817291259766, "logps/rejected": -68.33444213867188, "loss": 1.4523, "margin_dpo/margin_mean": -0.04989221692085266, "margin_dpo/margin_std": 0.270521342754364, "step": 24 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 1.0845671892166138, "fcm_dpo/delta": 0.8133487701416016, "fcm_dpo/margin": 0.03639337420463562, "fcm_dpo/q_t": 0.4910878539085388, "grad_norm": 317.3623352050781, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.10586078464984894, "logits/rejected": 0.05431347340345383, "logps/chosen": -60.022918701171875, "logps/ref_chosen": -60.00420379638672, "logps/ref_rejected": -90.47376251220703, "logps/rejected": -90.52886962890625, "loss": 1.3718, "margin_dpo/margin_mean": 0.036393433809280396, "margin_dpo/margin_std": 0.2930064797401428, "step": 25 }, { "epoch": 0.039304610733182165, "fcm_dpo/beta": 1.2240636348724365, "fcm_dpo/delta": 0.4076637029647827, "fcm_dpo/margin": 0.009161576628684998, "fcm_dpo/q_t": 0.49825724959373474, "grad_norm": 367.88232421875, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 0.11543253064155579, "logits/rejected": 0.09682787954807281, "logps/chosen": -56.85016632080078, "logps/ref_chosen": -56.81915283203125, "logps/ref_rejected": -77.84333038330078, "logps/rejected": -77.88349914550781, "loss": 1.4092, "margin_dpo/margin_mean": 0.009161293506622314, "margin_dpo/margin_std": 0.3032963275909424, "step": 26 }, { "epoch": 0.04081632653061224, "fcm_dpo/beta": 1.2240636348724365, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.011037558317184448, "fcm_dpo/q_t": 0.5028091669082642, "grad_norm": 365.7059326171875, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 0.11122125387191772, "logits/rejected": 0.0861082598567009, "logps/chosen": -62.909698486328125, "logps/ref_chosen": -62.87702560424805, "logps/ref_rejected": -71.34437561035156, "logps/rejected": -71.36601257324219, "loss": 1.4414, "margin_dpo/margin_mean": -0.011037617921829224, "margin_dpo/margin_std": 0.33684635162353516, "step": 27 }, { "epoch": 0.042328042328042326, "fcm_dpo/beta": 1.3243428468704224, "fcm_dpo/delta": 0.3937011957168579, "fcm_dpo/margin": -0.004308909177780151, "fcm_dpo/q_t": 0.5018905401229858, "grad_norm": 379.9216613769531, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 0.05917387455701828, "logits/rejected": 0.050440460443496704, "logps/chosen": -59.853607177734375, "logps/ref_chosen": -59.8333740234375, "logps/ref_rejected": -70.39804077148438, "logps/rejected": -70.4139633178711, "loss": 1.4359, "margin_dpo/margin_mean": -0.004308879375457764, "margin_dpo/margin_std": 0.31428390741348267, "step": 28 }, { "epoch": 0.04383975812547241, "fcm_dpo/beta": 1.4270368814468384, "fcm_dpo/delta": 0.3734189569950104, "fcm_dpo/margin": 0.02456316351890564, "fcm_dpo/q_t": 0.4919978380203247, "grad_norm": 459.2485656738281, "learning_rate": 2.08955223880597e-07, "logits/chosen": 0.11985379457473755, "logits/rejected": 0.10274408757686615, "logps/chosen": -74.13998413085938, "logps/ref_chosen": -74.12020111083984, "logps/ref_rejected": -83.33099365234375, "logps/rejected": -83.37533569335938, "loss": 1.4074, "margin_dpo/margin_mean": 0.02456343173980713, "margin_dpo/margin_std": 0.327664315700531, "step": 29 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 1.536737322807312, "fcm_dpo/delta": 0.3703068792819977, "fcm_dpo/margin": 0.007722645998001099, "fcm_dpo/q_t": 0.49483001232147217, "grad_norm": 477.6687316894531, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.11647738516330719, "logits/rejected": 0.0625992864370346, "logps/chosen": -50.76539611816406, "logps/ref_chosen": -50.75128936767578, "logps/ref_rejected": -89.29063415527344, "logps/rejected": -89.31246948242188, "loss": 1.4322, "margin_dpo/margin_mean": 0.007722735404968262, "margin_dpo/margin_std": 0.31650030612945557, "step": 30 }, { "epoch": 0.04686318972033258, "fcm_dpo/beta": 1.5832395553588867, "fcm_dpo/delta": 0.29380002617836, "fcm_dpo/margin": 0.08262354135513306, "fcm_dpo/q_t": 0.4720662236213684, "grad_norm": 524.7825927734375, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 0.10439669340848923, "logits/rejected": 0.05832277983427048, "logps/chosen": -65.34173583984375, "logps/ref_chosen": -65.33675384521484, "logps/ref_rejected": -100.76666259765625, "logps/rejected": -100.85426330566406, "loss": 1.326, "margin_dpo/margin_mean": 0.08262395858764648, "margin_dpo/margin_std": 0.3305758833885193, "step": 31 }, { "epoch": 0.04837490551776266, "fcm_dpo/beta": 1.8284441232681274, "fcm_dpo/delta": 0.7290701866149902, "fcm_dpo/margin": 0.06868910789489746, "fcm_dpo/q_t": 0.4716106355190277, "grad_norm": 563.244873046875, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 0.08951601386070251, "logits/rejected": 0.08166046440601349, "logps/chosen": -67.16220092773438, "logps/ref_chosen": -67.18333435058594, "logps/ref_rejected": -82.80763244628906, "logps/rejected": -82.85519409179688, "loss": 1.3435, "margin_dpo/margin_mean": 0.06868937611579895, "margin_dpo/margin_std": 0.31365060806274414, "step": 32 }, { "epoch": 0.049886621315192746, "fcm_dpo/beta": 2.125655174255371, "fcm_dpo/delta": 0.7907329201698303, "fcm_dpo/margin": 0.030030831694602966, "fcm_dpo/q_t": 0.4842595160007477, "grad_norm": 734.3580322265625, "learning_rate": 2.388059701492537e-07, "logits/chosen": 0.028011824935674667, "logits/rejected": 0.0021050497889518738, "logps/chosen": -64.11384582519531, "logps/ref_chosen": -64.03948211669922, "logps/ref_rejected": -75.68357849121094, "logps/rejected": -75.7879867553711, "loss": 1.4391, "margin_dpo/margin_mean": 0.030031487345695496, "margin_dpo/margin_std": 0.3220931887626648, "step": 33 }, { "epoch": 0.05139833711262283, "fcm_dpo/beta": 2.4897372722625732, "fcm_dpo/delta": 0.8030319809913635, "fcm_dpo/margin": 0.02098938822746277, "fcm_dpo/q_t": 0.48873692750930786, "grad_norm": 721.6285400390625, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 0.08669179677963257, "logits/rejected": 0.05742088705301285, "logps/chosen": -53.719154357910156, "logps/ref_chosen": -53.6642951965332, "logps/ref_rejected": -65.77989959716797, "logps/rejected": -65.85574340820312, "loss": 1.465, "margin_dpo/margin_mean": 0.020989298820495605, "margin_dpo/margin_std": 0.29796358942985535, "step": 34 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 2.5934488773345947, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.02340644598007202, "fcm_dpo/q_t": 0.5208801031112671, "grad_norm": 830.2484741210938, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.06822899729013443, "logits/rejected": 0.045317377895116806, "logps/chosen": -61.09272766113281, "logps/ref_chosen": -61.01686096191406, "logps/ref_rejected": -72.78598022460938, "logps/rejected": -72.83843994140625, "loss": 1.6113, "margin_dpo/margin_mean": -0.023406386375427246, "margin_dpo/margin_std": 0.3278309106826782, "step": 35 }, { "epoch": 0.05442176870748299, "fcm_dpo/beta": 2.6975440979003906, "fcm_dpo/delta": 0.38608044385910034, "fcm_dpo/margin": 0.0097598135471344, "fcm_dpo/q_t": 0.491857647895813, "grad_norm": 901.6752319335938, "learning_rate": 2.611940298507462e-07, "logits/chosen": 0.09897307306528091, "logits/rejected": 0.04630749300122261, "logps/chosen": -50.61589050292969, "logps/ref_chosen": -50.53736114501953, "logps/ref_rejected": -78.11678314208984, "logps/rejected": -78.20507049560547, "loss": 1.5817, "margin_dpo/margin_mean": 0.009759783744812012, "margin_dpo/margin_std": 0.3618467152118683, "step": 36 }, { "epoch": 0.055933484504913075, "fcm_dpo/beta": 3.038989543914795, "fcm_dpo/delta": 0.5818780660629272, "fcm_dpo/margin": 0.09463274478912354, "fcm_dpo/q_t": 0.4537751078605652, "grad_norm": 1168.1463623046875, "learning_rate": 2.686567164179104e-07, "logits/chosen": 0.10465328395366669, "logits/rejected": 0.024744877591729164, "logps/chosen": -59.59927749633789, "logps/ref_chosen": -59.55394744873047, "logps/ref_rejected": -108.27702331542969, "logps/rejected": -108.4169921875, "loss": 1.4775, "margin_dpo/margin_mean": 0.09463286399841309, "margin_dpo/margin_std": 0.4426842927932739, "step": 37 }, { "epoch": 0.05744520030234316, "fcm_dpo/beta": 3.270552158355713, "fcm_dpo/delta": 0.19189269840717316, "fcm_dpo/margin": 0.04368185997009277, "fcm_dpo/q_t": 0.47869473695755005, "grad_norm": 995.4248046875, "learning_rate": 2.761194029850746e-07, "logits/chosen": 0.048389457166194916, "logits/rejected": 0.03467674180865288, "logps/chosen": -65.86851501464844, "logps/ref_chosen": -65.78836059570312, "logps/ref_rejected": -76.1619873046875, "logps/rejected": -76.28582763671875, "loss": 1.6706, "margin_dpo/margin_mean": 0.043681979179382324, "margin_dpo/margin_std": 0.4209554195404053, "step": 38 }, { "epoch": 0.05895691609977324, "fcm_dpo/beta": 3.536396026611328, "fcm_dpo/delta": 0.4801386296749115, "fcm_dpo/margin": 0.108737051486969, "fcm_dpo/q_t": 0.4242640733718872, "grad_norm": 939.8898315429688, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 0.15147145092487335, "logits/rejected": 0.12492187321186066, "logps/chosen": -57.238365173339844, "logps/ref_chosen": -57.17681121826172, "logps/ref_rejected": -79.486328125, "logps/rejected": -79.65663146972656, "loss": 1.2786, "margin_dpo/margin_mean": 0.10873657464981079, "margin_dpo/margin_std": 0.2975834012031555, "step": 39 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 3.880685567855835, "fcm_dpo/delta": 0.3751264214515686, "fcm_dpo/margin": -0.05382639169692993, "fcm_dpo/q_t": 0.5360496044158936, "grad_norm": 1590.185302734375, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.10350456833839417, "logits/rejected": 0.05420894920825958, "logps/chosen": -61.45352554321289, "logps/ref_chosen": -61.33416748046875, "logps/ref_rejected": -79.10697174072266, "logps/rejected": -79.1725082397461, "loss": 1.9531, "margin_dpo/margin_mean": -0.05382627248764038, "margin_dpo/margin_std": 0.31318140029907227, "step": 40 }, { "epoch": 0.06198034769463341, "fcm_dpo/beta": 4.317322254180908, "fcm_dpo/delta": 0.6840596199035645, "fcm_dpo/margin": 0.04054167866706848, "fcm_dpo/q_t": 0.4672660231590271, "grad_norm": 1532.7891845703125, "learning_rate": 2.985074626865671e-07, "logits/chosen": 0.03596208989620209, "logits/rejected": 0.01636538654565811, "logps/chosen": -67.6545639038086, "logps/ref_chosen": -67.5467300415039, "logps/ref_rejected": -83.87788391113281, "logps/rejected": -84.02627563476562, "loss": 1.8037, "margin_dpo/margin_mean": 0.04054197669029236, "margin_dpo/margin_std": 0.402584969997406, "step": 41 }, { "epoch": 0.06349206349206349, "fcm_dpo/beta": 4.645079612731934, "fcm_dpo/delta": 0.2149239331483841, "fcm_dpo/margin": 0.013764455914497375, "fcm_dpo/q_t": 0.47997668385505676, "grad_norm": 1505.7274169921875, "learning_rate": 3.059701492537313e-07, "logits/chosen": 0.051899224519729614, "logits/rejected": 0.03025246225297451, "logps/chosen": -61.4012336730957, "logps/ref_chosen": -61.26485824584961, "logps/ref_rejected": -76.3629150390625, "logps/rejected": -76.51305389404297, "loss": 1.871, "margin_dpo/margin_mean": 0.013764426112174988, "margin_dpo/margin_std": 0.35858240723609924, "step": 42 }, { "epoch": 0.06500377928949358, "fcm_dpo/beta": 4.940211296081543, "fcm_dpo/delta": 0.4239178001880646, "fcm_dpo/margin": 0.09020450711250305, "fcm_dpo/q_t": 0.4286651015281677, "grad_norm": 1634.933349609375, "learning_rate": 3.134328358208955e-07, "logits/chosen": 0.08877776563167572, "logits/rejected": 0.07763132452964783, "logps/chosen": -71.8860092163086, "logps/ref_chosen": -71.80902862548828, "logps/ref_rejected": -81.12464141845703, "logps/rejected": -81.29181671142578, "loss": 1.7431, "margin_dpo/margin_mean": 0.09020435810089111, "margin_dpo/margin_std": 0.4161643981933594, "step": 43 }, { "epoch": 0.06651549508692366, "fcm_dpo/beta": 5.494063854217529, "fcm_dpo/delta": 0.41538116335868835, "fcm_dpo/margin": -0.00924178957939148, "fcm_dpo/q_t": 0.520038366317749, "grad_norm": 2336.511474609375, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 0.05336465686559677, "logits/rejected": 0.022505655884742737, "logps/chosen": -66.67141723632812, "logps/ref_chosen": -66.55043029785156, "logps/ref_rejected": -85.06198120117188, "logps/rejected": -85.17372131347656, "loss": 2.2627, "margin_dpo/margin_mean": -0.009241342544555664, "margin_dpo/margin_std": 0.39431923627853394, "step": 44 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 5.540068626403809, "fcm_dpo/delta": 0.08240819722414017, "fcm_dpo/margin": 0.1397048979997635, "fcm_dpo/q_t": 0.37695854902267456, "grad_norm": 1859.1026611328125, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.11681336164474487, "logits/rejected": 0.06394165754318237, "logps/chosen": -62.34455108642578, "logps/ref_chosen": -62.24385452270508, "logps/ref_rejected": -92.96665954589844, "logps/rejected": -93.20706176757812, "loss": 1.5415, "margin_dpo/margin_mean": 0.13970479369163513, "margin_dpo/margin_std": 0.38627296686172485, "step": 45 }, { "epoch": 0.06953892668178382, "fcm_dpo/beta": 5.465640068054199, "fcm_dpo/delta": -0.18290278315544128, "fcm_dpo/margin": 0.1856483817100525, "fcm_dpo/q_t": 0.3596438765525818, "grad_norm": 1365.26123046875, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 0.10824910551309586, "logits/rejected": 0.06305693089962006, "logps/chosen": -61.60289001464844, "logps/ref_chosen": -61.498905181884766, "logps/ref_rejected": -78.91172790527344, "logps/rejected": -79.20137023925781, "loss": 1.3262, "margin_dpo/margin_mean": 0.1856483519077301, "margin_dpo/margin_std": 0.3790084421634674, "step": 46 }, { "epoch": 0.0710506424792139, "fcm_dpo/beta": 5.482945919036865, "fcm_dpo/delta": 0.12942156195640564, "fcm_dpo/margin": 0.1331653594970703, "fcm_dpo/q_t": 0.38719505071640015, "grad_norm": 1422.8770751953125, "learning_rate": 3.432835820895522e-07, "logits/chosen": 0.02270699478685856, "logits/rejected": -0.01980304904282093, "logps/chosen": -51.67852020263672, "logps/ref_chosen": -51.578346252441406, "logps/ref_rejected": -68.2215576171875, "logps/rejected": -68.45490264892578, "loss": 1.3589, "margin_dpo/margin_mean": 0.13316544890403748, "margin_dpo/margin_std": 0.31922364234924316, "step": 47 }, { "epoch": 0.07256235827664399, "fcm_dpo/beta": 5.691621780395508, "fcm_dpo/delta": 0.29117757081985474, "fcm_dpo/margin": 0.017589092254638672, "fcm_dpo/q_t": 0.4899485409259796, "grad_norm": 1831.3077392578125, "learning_rate": 3.507462686567164e-07, "logits/chosen": 0.16300594806671143, "logits/rejected": 0.13274288177490234, "logps/chosen": -51.943580627441406, "logps/ref_chosen": -51.79365158081055, "logps/ref_rejected": -64.22503662109375, "logps/rejected": -64.39256286621094, "loss": 2.1944, "margin_dpo/margin_mean": 0.017589718103408813, "margin_dpo/margin_std": 0.3933258056640625, "step": 48 }, { "epoch": 0.07407407407407407, "fcm_dpo/beta": 5.941600799560547, "fcm_dpo/delta": 0.14188799262046814, "fcm_dpo/margin": 0.01929350197315216, "fcm_dpo/q_t": 0.4635244905948639, "grad_norm": 1869.5223388671875, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 0.012968342751264572, "logits/rejected": -0.00816606730222702, "logps/chosen": -58.31328582763672, "logps/ref_chosen": -58.13460159301758, "logps/ref_rejected": -64.63206481933594, "logps/rejected": -64.83004760742188, "loss": 2.1107, "margin_dpo/margin_mean": 0.019293993711471558, "margin_dpo/margin_std": 0.3666878342628479, "step": 49 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 6.342537879943848, "fcm_dpo/delta": 0.38531041145324707, "fcm_dpo/margin": 0.07645577192306519, "fcm_dpo/q_t": 0.4381590187549591, "grad_norm": 2065.01953125, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.11765280365943909, "logits/rejected": 0.08736774325370789, "logps/chosen": -53.062259674072266, "logps/ref_chosen": -52.85643768310547, "logps/ref_rejected": -72.17460632324219, "logps/rejected": -72.45687866210938, "loss": 1.8018, "margin_dpo/margin_mean": 0.0764555037021637, "margin_dpo/margin_std": 0.35330671072006226, "step": 50 }, { "epoch": 0.07709750566893424, "fcm_dpo/beta": 6.480748653411865, "fcm_dpo/delta": -0.008223239332437515, "fcm_dpo/margin": 0.1322861611843109, "fcm_dpo/q_t": 0.42159244418144226, "grad_norm": 1923.1666259765625, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 0.09921061992645264, "logits/rejected": 0.07079657912254333, "logps/chosen": -63.85832977294922, "logps/ref_chosen": -63.65644073486328, "logps/ref_rejected": -86.13229370117188, "logps/rejected": -86.46647644042969, "loss": 1.7411, "margin_dpo/margin_mean": 0.1322861909866333, "margin_dpo/margin_std": 0.4351498484611511, "step": 51 }, { "epoch": 0.07860922146636433, "fcm_dpo/beta": 6.4637298583984375, "fcm_dpo/delta": 0.048552006483078, "fcm_dpo/margin": 0.12421192228794098, "fcm_dpo/q_t": 0.41275399923324585, "grad_norm": 2170.326416015625, "learning_rate": 3.805970149253731e-07, "logits/chosen": 0.05486099421977997, "logits/rejected": 0.007401124574244022, "logps/chosen": -68.0733642578125, "logps/ref_chosen": -67.8402099609375, "logps/ref_rejected": -96.97090911865234, "logps/rejected": -97.32827758789062, "loss": 1.6966, "margin_dpo/margin_mean": 0.12421198189258575, "margin_dpo/margin_std": 0.39311325550079346, "step": 52 }, { "epoch": 0.0801209372637944, "fcm_dpo/beta": 6.674091339111328, "fcm_dpo/delta": 0.08519239723682404, "fcm_dpo/margin": 0.0567784458398819, "fcm_dpo/q_t": 0.4542519748210907, "grad_norm": 2142.70947265625, "learning_rate": 3.880597014925373e-07, "logits/chosen": 0.08207565546035767, "logits/rejected": 0.07128915190696716, "logps/chosen": -57.14149856567383, "logps/ref_chosen": -56.87813949584961, "logps/ref_rejected": -60.75569152832031, "logps/rejected": -61.075828552246094, "loss": 2.0249, "margin_dpo/margin_mean": 0.05677822232246399, "margin_dpo/margin_std": 0.35405731201171875, "step": 53 }, { "epoch": 0.08163265306122448, "fcm_dpo/beta": 6.724396705627441, "fcm_dpo/delta": 0.20413607358932495, "fcm_dpo/margin": 0.09712421894073486, "fcm_dpo/q_t": 0.40143126249313354, "grad_norm": 2092.410400390625, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 0.07327298074960709, "logits/rejected": 0.05757633596658707, "logps/chosen": -47.52605056762695, "logps/ref_chosen": -47.26692199707031, "logps/ref_rejected": -62.19426727294922, "logps/rejected": -62.55051803588867, "loss": 1.7674, "margin_dpo/margin_mean": 0.09712427854537964, "margin_dpo/margin_std": 0.3280726373195648, "step": 54 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 6.342741012573242, "fcm_dpo/delta": -0.6529929637908936, "fcm_dpo/margin": 0.2226666957139969, "fcm_dpo/q_t": 0.35867583751678467, "grad_norm": 1971.2823486328125, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.009333456866443157, "logits/rejected": -0.06498602777719498, "logps/chosen": -50.54954528808594, "logps/ref_chosen": -50.32619094848633, "logps/ref_rejected": -92.44389343261719, "logps/rejected": -92.88990783691406, "loss": 1.5563, "margin_dpo/margin_mean": 0.22266672551631927, "margin_dpo/margin_std": 0.49195396900177, "step": 55 }, { "epoch": 0.08465608465608465, "fcm_dpo/beta": 6.315101146697998, "fcm_dpo/delta": 0.1984485387802124, "fcm_dpo/margin": 0.1050320565700531, "fcm_dpo/q_t": 0.4071798324584961, "grad_norm": 1768.2681884765625, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 0.13452336192131042, "logits/rejected": 0.11181502044200897, "logps/chosen": -56.96580505371094, "logps/ref_chosen": -56.766971588134766, "logps/ref_rejected": -66.30504608154297, "logps/rejected": -66.60890197753906, "loss": 1.6331, "margin_dpo/margin_mean": 0.10503232479095459, "margin_dpo/margin_std": 0.3666898310184479, "step": 56 }, { "epoch": 0.08616780045351474, "fcm_dpo/beta": 5.578975677490234, "fcm_dpo/delta": -0.6684575080871582, "fcm_dpo/margin": 0.2490333914756775, "fcm_dpo/q_t": 0.32324889302253723, "grad_norm": 1445.676025390625, "learning_rate": 4.17910447761194e-07, "logits/chosen": 0.10891781747341156, "logits/rejected": 0.043134208768606186, "logps/chosen": -58.001564025878906, "logps/ref_chosen": -57.76774597167969, "logps/ref_rejected": -82.75698852539062, "logps/rejected": -83.23983764648438, "loss": 1.278, "margin_dpo/margin_mean": 0.24903348088264465, "margin_dpo/margin_std": 0.43858030438423157, "step": 57 }, { "epoch": 0.08767951625094482, "fcm_dpo/beta": 5.610134124755859, "fcm_dpo/delta": 0.21757441759109497, "fcm_dpo/margin": 0.11425483226776123, "fcm_dpo/q_t": 0.4058857560157776, "grad_norm": 1669.9715576171875, "learning_rate": 4.253731343283582e-07, "logits/chosen": 0.03284445032477379, "logits/rejected": 0.01788032241165638, "logps/chosen": -73.05523681640625, "logps/ref_chosen": -72.76408386230469, "logps/ref_rejected": -84.49275207519531, "logps/rejected": -84.89814758300781, "loss": 1.7105, "margin_dpo/margin_mean": 0.11425450444221497, "margin_dpo/margin_std": 0.38692593574523926, "step": 58 }, { "epoch": 0.08919123204837491, "fcm_dpo/beta": 5.876424789428711, "fcm_dpo/delta": -0.19149622321128845, "fcm_dpo/margin": 0.16999658942222595, "fcm_dpo/q_t": 0.36740055680274963, "grad_norm": 1536.5157470703125, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 0.11274999380111694, "logits/rejected": 0.047084975987672806, "logps/chosen": -50.087608337402344, "logps/ref_chosen": -49.820777893066406, "logps/ref_rejected": -77.14368438720703, "logps/rejected": -77.58052062988281, "loss": 1.4267, "margin_dpo/margin_mean": 0.16999676823616028, "margin_dpo/margin_std": 0.3600447475910187, "step": 59 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 6.065890312194824, "fcm_dpo/delta": 0.5198989510536194, "fcm_dpo/margin": 0.05685025453567505, "fcm_dpo/q_t": 0.4424615502357483, "grad_norm": 2018.1671142578125, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.10006190836429596, "logits/rejected": 0.09853567183017731, "logps/chosen": -63.53302001953125, "logps/ref_chosen": -63.22477340698242, "logps/ref_rejected": -61.360477447509766, "logps/rejected": -61.7255744934082, "loss": 2.1575, "margin_dpo/margin_mean": 0.05685010552406311, "margin_dpo/margin_std": 0.4581069350242615, "step": 60 }, { "epoch": 0.09221466364323508, "fcm_dpo/beta": 6.438871383666992, "fcm_dpo/delta": 0.36666175723075867, "fcm_dpo/margin": 0.07799457013607025, "fcm_dpo/q_t": 0.4422876238822937, "grad_norm": 2275.224609375, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 0.13344313204288483, "logits/rejected": 0.10039305686950684, "logps/chosen": -49.34346008300781, "logps/ref_chosen": -49.01679992675781, "logps/ref_rejected": -74.90817260742188, "logps/rejected": -75.31282806396484, "loss": 2.3946, "margin_dpo/margin_mean": 0.07799449563026428, "margin_dpo/margin_std": 0.47035545110702515, "step": 61 }, { "epoch": 0.09372637944066516, "fcm_dpo/beta": 6.52736759185791, "fcm_dpo/delta": -0.3168880343437195, "fcm_dpo/margin": 0.17259901762008667, "fcm_dpo/q_t": 0.3768240511417389, "grad_norm": 2118.017578125, "learning_rate": 4.552238805970149e-07, "logits/chosen": 0.09595489501953125, "logits/rejected": 0.05686543136835098, "logps/chosen": -63.08671188354492, "logps/ref_chosen": -62.751869201660156, "logps/ref_rejected": -78.93360900878906, "logps/rejected": -79.44105529785156, "loss": 1.764, "margin_dpo/margin_mean": 0.1725986897945404, "margin_dpo/margin_std": 0.43187639117240906, "step": 62 }, { "epoch": 0.09523809523809523, "fcm_dpo/beta": 5.918633460998535, "fcm_dpo/delta": -0.517593264579773, "fcm_dpo/margin": 0.2199816107749939, "fcm_dpo/q_t": 0.31167125701904297, "grad_norm": 1738.617431640625, "learning_rate": 4.626865671641791e-07, "logits/chosen": 0.18449003994464874, "logits/rejected": 0.15899410843849182, "logps/chosen": -60.80646514892578, "logps/ref_chosen": -60.51525115966797, "logps/ref_rejected": -85.11021423339844, "logps/rejected": -85.62141418457031, "loss": 1.3985, "margin_dpo/margin_mean": 0.2199820578098297, "margin_dpo/margin_std": 0.4385373294353485, "step": 63 }, { "epoch": 0.09674981103552532, "fcm_dpo/beta": 5.581248760223389, "fcm_dpo/delta": -0.05339386314153671, "fcm_dpo/margin": 0.06814375519752502, "fcm_dpo/q_t": 0.43303388357162476, "grad_norm": 1775.4036865234375, "learning_rate": 4.701492537313433e-07, "logits/chosen": 0.08545556664466858, "logits/rejected": 0.06017608195543289, "logps/chosen": -51.58530044555664, "logps/ref_chosen": -51.20684814453125, "logps/ref_rejected": -66.93081665039062, "logps/rejected": -67.3774185180664, "loss": 1.9515, "margin_dpo/margin_mean": 0.06814375519752502, "margin_dpo/margin_std": 0.4061310291290283, "step": 64 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 4.899092674255371, "fcm_dpo/delta": -0.8839624524116516, "fcm_dpo/margin": 0.3248189091682434, "fcm_dpo/q_t": 0.2600834369659424, "grad_norm": 1128.9351806640625, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.1839187741279602, "logits/rejected": 0.15427696704864502, "logps/chosen": -67.62045288085938, "logps/ref_chosen": -67.2886962890625, "logps/ref_rejected": -74.44281005859375, "logps/rejected": -75.09938049316406, "loss": 1.0723, "margin_dpo/margin_mean": 0.32481849193573, "margin_dpo/margin_std": 0.4664004445075989, "step": 65 }, { "epoch": 0.09977324263038549, "fcm_dpo/beta": 4.846595764160156, "fcm_dpo/delta": 0.3371145725250244, "fcm_dpo/margin": 0.10960313677787781, "fcm_dpo/q_t": 0.40099650621414185, "grad_norm": 1611.027587890625, "learning_rate": 4.850746268656717e-07, "logits/chosen": 0.07052364945411682, "logits/rejected": 0.04705191031098366, "logps/chosen": -71.12271118164062, "logps/ref_chosen": -70.743408203125, "logps/ref_rejected": -77.26499938964844, "logps/rejected": -77.75389862060547, "loss": 1.7288, "margin_dpo/margin_mean": 0.10960283875465393, "margin_dpo/margin_std": 0.41340774297714233, "step": 66 }, { "epoch": 0.10128495842781557, "fcm_dpo/beta": 4.897915840148926, "fcm_dpo/delta": -0.19840705394744873, "fcm_dpo/margin": 0.20993411540985107, "fcm_dpo/q_t": 0.3695130944252014, "grad_norm": 1324.92138671875, "learning_rate": 4.925373134328357e-07, "logits/chosen": 0.06483873724937439, "logits/rejected": 0.009191485121846199, "logps/chosen": -60.90003204345703, "logps/ref_chosen": -60.60260009765625, "logps/ref_rejected": -75.22235870361328, "logps/rejected": -75.72972106933594, "loss": 1.3653, "margin_dpo/margin_mean": 0.20993369817733765, "margin_dpo/margin_std": 0.46069464087486267, "step": 67 }, { "epoch": 0.10279667422524566, "fcm_dpo/beta": 4.430768966674805, "fcm_dpo/delta": -0.389474093914032, "fcm_dpo/margin": 0.2649560272693634, "fcm_dpo/q_t": 0.3379044234752655, "grad_norm": 1240.4334716796875, "learning_rate": 5e-07, "logits/chosen": 0.0564710795879364, "logits/rejected": 0.0265452042222023, "logps/chosen": -77.92970275878906, "logps/ref_chosen": -77.52836608886719, "logps/ref_rejected": -93.17778015136719, "logps/rejected": -93.84407043457031, "loss": 1.2815, "margin_dpo/margin_mean": 0.2649560570716858, "margin_dpo/margin_std": 0.5076867938041687, "step": 68 }, { "epoch": 0.10430839002267574, "fcm_dpo/beta": 4.559112548828125, "fcm_dpo/delta": 0.041590481996536255, "fcm_dpo/margin": 0.17647495865821838, "fcm_dpo/q_t": 0.3657025992870331, "grad_norm": 1222.0938720703125, "learning_rate": 4.999965034812934e-07, "logits/chosen": 0.07601971179246902, "logits/rejected": 0.03346855193376541, "logps/chosen": -66.33596801757812, "logps/ref_chosen": -65.94305419921875, "logps/ref_rejected": -89.7735595703125, "logps/rejected": -90.34294891357422, "loss": 1.3248, "margin_dpo/margin_mean": 0.17647448182106018, "margin_dpo/margin_std": 0.39660531282424927, "step": 69 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 4.578952312469482, "fcm_dpo/delta": 0.2021067887544632, "fcm_dpo/margin": 0.14442333579063416, "fcm_dpo/q_t": 0.3830464482307434, "grad_norm": 1205.255615234375, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.10305196791887283, "logits/rejected": 0.08014979958534241, "logps/chosen": -62.320560455322266, "logps/ref_chosen": -61.95791244506836, "logps/ref_rejected": -75.80945587158203, "logps/rejected": -76.3165283203125, "loss": 1.483, "margin_dpo/margin_mean": 0.14442339539527893, "margin_dpo/margin_std": 0.4010279178619385, "step": 70 }, { "epoch": 0.1073318216175359, "fcm_dpo/beta": 4.783401012420654, "fcm_dpo/delta": 0.1197274923324585, "fcm_dpo/margin": 0.04047618806362152, "fcm_dpo/q_t": 0.4602009356021881, "grad_norm": 1565.259033203125, "learning_rate": 4.999685319184688e-07, "logits/chosen": 0.06028672307729721, "logits/rejected": 0.04451918601989746, "logps/chosen": -63.822017669677734, "logps/ref_chosen": -63.34757995605469, "logps/ref_rejected": -67.49658203125, "logps/rejected": -68.0114974975586, "loss": 2.0922, "margin_dpo/margin_mean": 0.04047642648220062, "margin_dpo/margin_std": 0.4636165499687195, "step": 71 }, { "epoch": 0.10884353741496598, "fcm_dpo/beta": 4.545166969299316, "fcm_dpo/delta": -0.38348883390426636, "fcm_dpo/margin": 0.26161858439445496, "fcm_dpo/q_t": 0.34238147735595703, "grad_norm": 1248.3271484375, "learning_rate": 4.999440576567755e-07, "logits/chosen": 0.06008986011147499, "logits/rejected": -0.0004155375063419342, "logps/chosen": -56.19200134277344, "logps/ref_chosen": -55.85929870605469, "logps/ref_rejected": -68.45423889160156, "logps/rejected": -69.04856872558594, "loss": 1.317, "margin_dpo/margin_mean": 0.26161882281303406, "margin_dpo/margin_std": 0.5102354288101196, "step": 72 }, { "epoch": 0.11035525321239607, "fcm_dpo/beta": 4.637323379516602, "fcm_dpo/delta": 0.24215392768383026, "fcm_dpo/margin": 0.1334337592124939, "fcm_dpo/q_t": 0.41474786400794983, "grad_norm": 1419.067138671875, "learning_rate": 4.999125919224965e-07, "logits/chosen": 0.04662982001900673, "logits/rejected": 0.032734230160713196, "logps/chosen": -69.61697387695312, "logps/ref_chosen": -69.13880920410156, "logps/ref_rejected": -79.04586791992188, "logps/rejected": -79.657470703125, "loss": 1.743, "margin_dpo/margin_mean": 0.1334337592124939, "margin_dpo/margin_std": 0.49519866704940796, "step": 73 }, { "epoch": 0.11186696900982615, "fcm_dpo/beta": 4.333841323852539, "fcm_dpo/delta": -0.5068634152412415, "fcm_dpo/margin": 0.2982664108276367, "fcm_dpo/q_t": 0.32818102836608887, "grad_norm": 919.8814697265625, "learning_rate": 4.998741355957963e-07, "logits/chosen": 0.09711477905511856, "logits/rejected": 0.046173036098480225, "logps/chosen": -50.251930236816406, "logps/ref_chosen": -49.923736572265625, "logps/ref_rejected": -81.73213958740234, "logps/rejected": -82.35859680175781, "loss": 1.0557, "margin_dpo/margin_mean": 0.2982656955718994, "margin_dpo/margin_std": 0.4928857088088989, "step": 74 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 4.022831916809082, "fcm_dpo/delta": -0.23631714284420013, "fcm_dpo/margin": 0.263003945350647, "fcm_dpo/q_t": 0.35024577379226685, "grad_norm": 891.9666748046875, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.07056191563606262, "logits/rejected": 0.04056151211261749, "logps/chosen": -46.45494842529297, "logps/ref_chosen": -46.06875228881836, "logps/ref_rejected": -66.1181411743164, "logps/rejected": -66.76734161376953, "loss": 1.1421, "margin_dpo/margin_mean": 0.26300370693206787, "margin_dpo/margin_std": 0.46238186955451965, "step": 75 }, { "epoch": 0.11489040060468632, "fcm_dpo/beta": 4.063389301300049, "fcm_dpo/delta": 0.1831236034631729, "fcm_dpo/margin": 0.16686102747917175, "fcm_dpo/q_t": 0.3965134620666504, "grad_norm": 1074.15380859375, "learning_rate": 4.997762556634679e-07, "logits/chosen": 0.065489761531353, "logits/rejected": 0.02454444393515587, "logps/chosen": -54.44904708862305, "logps/ref_chosen": -54.06275177001953, "logps/ref_rejected": -74.87464141845703, "logps/rejected": -75.42780303955078, "loss": 1.5124, "margin_dpo/margin_mean": 0.16686102747917175, "margin_dpo/margin_std": 0.4800097346305847, "step": 76 }, { "epoch": 0.1164021164021164, "fcm_dpo/beta": 4.121022701263428, "fcm_dpo/delta": 0.1284758448600769, "fcm_dpo/margin": 0.17436596751213074, "fcm_dpo/q_t": 0.38447582721710205, "grad_norm": 1231.5897216796875, "learning_rate": 4.99716834795752e-07, "logits/chosen": 0.0951685756444931, "logits/rejected": 0.05633886903524399, "logps/chosen": -53.52678680419922, "logps/ref_chosen": -53.07609176635742, "logps/ref_rejected": -74.45601654052734, "logps/rejected": -75.0810775756836, "loss": 1.4287, "margin_dpo/margin_mean": 0.17436623573303223, "margin_dpo/margin_std": 0.42879635095596313, "step": 77 }, { "epoch": 0.11791383219954649, "fcm_dpo/beta": 4.575469970703125, "fcm_dpo/delta": 0.39146047830581665, "fcm_dpo/margin": 0.1026681512594223, "fcm_dpo/q_t": 0.40832120180130005, "grad_norm": 1497.64501953125, "learning_rate": 4.996504288113623e-07, "logits/chosen": 0.06529897451400757, "logits/rejected": 0.04548865556716919, "logps/chosen": -68.17215728759766, "logps/ref_chosen": -67.72541809082031, "logps/ref_rejected": -79.03926849365234, "logps/rejected": -79.58867645263672, "loss": 1.7305, "margin_dpo/margin_mean": 0.10266757011413574, "margin_dpo/margin_std": 0.4406697750091553, "step": 78 }, { "epoch": 0.11942554799697656, "fcm_dpo/beta": 4.646597385406494, "fcm_dpo/delta": -0.01867286115884781, "fcm_dpo/margin": 0.1862274706363678, "fcm_dpo/q_t": 0.36434149742126465, "grad_norm": 1128.8731689453125, "learning_rate": 4.995770395678171e-07, "logits/chosen": 0.10144677013158798, "logits/rejected": 0.04583786800503731, "logps/chosen": -52.6339111328125, "logps/ref_chosen": -52.16064453125, "logps/ref_rejected": -83.31062316894531, "logps/rejected": -83.97010803222656, "loss": 1.2794, "margin_dpo/margin_mean": 0.1862274706363678, "margin_dpo/margin_std": 0.41245660185813904, "step": 79 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 4.5290632247924805, "fcm_dpo/delta": -0.1917770802974701, "fcm_dpo/margin": 0.2254573255777359, "fcm_dpo/q_t": 0.3562784790992737, "grad_norm": 1285.3790283203125, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.12082693725824356, "logits/rejected": 0.06224146857857704, "logps/chosen": -61.87617492675781, "logps/ref_chosen": -61.410560607910156, "logps/ref_rejected": -78.66004943847656, "logps/rejected": -79.35111999511719, "loss": 1.3237, "margin_dpo/margin_mean": 0.22545722126960754, "margin_dpo/margin_std": 0.46818071603775024, "step": 80 }, { "epoch": 0.12244897959183673, "fcm_dpo/beta": 4.244363307952881, "fcm_dpo/delta": -0.19199597835540771, "fcm_dpo/margin": 0.2396332323551178, "fcm_dpo/q_t": 0.35482287406921387, "grad_norm": 1211.70751953125, "learning_rate": 4.994093197099587e-07, "logits/chosen": 0.0907532274723053, "logits/rejected": 0.05751120299100876, "logps/chosen": -64.23452758789062, "logps/ref_chosen": -63.80437088012695, "logps/ref_rejected": -79.3484115600586, "logps/rejected": -80.01820373535156, "loss": 1.3572, "margin_dpo/margin_mean": 0.23963311314582825, "margin_dpo/margin_std": 0.5050238966941833, "step": 81 }, { "epoch": 0.12396069538926682, "fcm_dpo/beta": 4.048993110656738, "fcm_dpo/delta": -0.4337691068649292, "fcm_dpo/margin": 0.30429962277412415, "fcm_dpo/q_t": 0.3015890121459961, "grad_norm": 1029.6053466796875, "learning_rate": 4.993149937871306e-07, "logits/chosen": 0.05798634514212608, "logits/rejected": -0.002718113362789154, "logps/chosen": -49.222694396972656, "logps/ref_chosen": -48.817893981933594, "logps/ref_rejected": -70.31497955322266, "logps/rejected": -71.02407836914062, "loss": 1.0236, "margin_dpo/margin_mean": 0.3042997121810913, "margin_dpo/margin_std": 0.42897915840148926, "step": 82 }, { "epoch": 0.1254724111866969, "fcm_dpo/beta": 3.6137092113494873, "fcm_dpo/delta": -0.46512115001678467, "fcm_dpo/margin": 0.34642505645751953, "fcm_dpo/q_t": 0.2966233491897583, "grad_norm": 804.3886108398438, "learning_rate": 4.992136939879856e-07, "logits/chosen": 0.14557309448719025, "logits/rejected": 0.09665486216545105, "logps/chosen": -57.57659149169922, "logps/ref_chosen": -57.15077209472656, "logps/ref_rejected": -75.1710205078125, "logps/rejected": -75.9432601928711, "loss": 1.0428, "margin_dpo/margin_mean": 0.34642475843429565, "margin_dpo/margin_std": 0.505172610282898, "step": 83 }, { "epoch": 0.12698412698412698, "fcm_dpo/beta": 3.7466650009155273, "fcm_dpo/delta": 0.3175293207168579, "fcm_dpo/margin": 0.14622744917869568, "fcm_dpo/q_t": 0.4235062599182129, "grad_norm": 1326.18408203125, "learning_rate": 4.991054231460969e-07, "logits/chosen": 0.11884990334510803, "logits/rejected": 0.078602634370327, "logps/chosen": -65.2996826171875, "logps/ref_chosen": -64.77729797363281, "logps/ref_rejected": -84.71949768066406, "logps/rejected": -85.38810729980469, "loss": 1.6192, "margin_dpo/margin_mean": 0.14622774720191956, "margin_dpo/margin_std": 0.5217863321304321, "step": 84 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 3.6050148010253906, "fcm_dpo/delta": -0.3528403639793396, "fcm_dpo/margin": 0.32242467999458313, "fcm_dpo/q_t": 0.3231235146522522, "grad_norm": 1045.094482421875, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.09119876474142075, "logits/rejected": 0.04982073977589607, "logps/chosen": -50.68433380126953, "logps/ref_chosen": -50.25169372558594, "logps/ref_rejected": -66.55439758300781, "logps/rejected": -67.30945587158203, "loss": 1.1877, "margin_dpo/margin_mean": 0.3224252462387085, "margin_dpo/margin_std": 0.5532187223434448, "step": 85 }, { "epoch": 0.13000755857898716, "fcm_dpo/beta": 3.499697685241699, "fcm_dpo/delta": -0.12498529255390167, "fcm_dpo/margin": 0.27497416734695435, "fcm_dpo/q_t": 0.366787314414978, "grad_norm": 1007.2182006835938, "learning_rate": 4.988679806432711e-07, "logits/chosen": 0.15556927025318146, "logits/rejected": 0.13702501356601715, "logps/chosen": -61.228721618652344, "logps/ref_chosen": -60.72917938232422, "logps/ref_rejected": -72.30961608886719, "logps/rejected": -73.0841293334961, "loss": 1.2257, "margin_dpo/margin_mean": 0.2749743163585663, "margin_dpo/margin_std": 0.5419769883155823, "step": 86 }, { "epoch": 0.13151927437641722, "fcm_dpo/beta": 3.2829439640045166, "fcm_dpo/delta": -0.21701934933662415, "fcm_dpo/margin": 0.31577974557876587, "fcm_dpo/q_t": 0.33885273337364197, "grad_norm": 955.0740966796875, "learning_rate": 4.987388156241114e-07, "logits/chosen": 0.0836351215839386, "logits/rejected": 0.030040550976991653, "logps/chosen": -66.27688598632812, "logps/ref_chosen": -65.75796508789062, "logps/ref_rejected": -84.81159973144531, "logps/rejected": -85.64628601074219, "loss": 1.2115, "margin_dpo/margin_mean": 0.31578001379966736, "margin_dpo/margin_std": 0.5747581720352173, "step": 87 }, { "epoch": 0.1330309901738473, "fcm_dpo/beta": 3.314173460006714, "fcm_dpo/delta": 0.09749428927898407, "fcm_dpo/margin": 0.2290271520614624, "fcm_dpo/q_t": 0.38487881422042847, "grad_norm": 1090.8194580078125, "learning_rate": 4.986026928455767e-07, "logits/chosen": 0.14258967339992523, "logits/rejected": 0.11725766956806183, "logps/chosen": -63.33268356323242, "logps/ref_chosen": -62.82402801513672, "logps/ref_rejected": -74.9607162475586, "logps/rejected": -75.69840240478516, "loss": 1.496, "margin_dpo/margin_mean": 0.22902727127075195, "margin_dpo/margin_std": 0.6007837653160095, "step": 88 }, { "epoch": 0.1345427059712774, "fcm_dpo/beta": 3.4935643672943115, "fcm_dpo/delta": 0.11871908605098724, "fcm_dpo/margin": 0.20880961418151855, "fcm_dpo/q_t": 0.3681311309337616, "grad_norm": 984.82470703125, "learning_rate": 4.984596161153135e-07, "logits/chosen": 0.1835154891014099, "logits/rejected": 0.10450133681297302, "logps/chosen": -41.65589141845703, "logps/ref_chosen": -41.191436767578125, "logps/ref_rejected": -85.44769287109375, "logps/rejected": -86.12095642089844, "loss": 1.611, "margin_dpo/margin_mean": 0.2088102400302887, "margin_dpo/margin_std": 0.6353697776794434, "step": 89 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 3.4933032989501953, "fcm_dpo/delta": 0.05548207834362984, "fcm_dpo/margin": 0.22840037941932678, "fcm_dpo/q_t": 0.37616848945617676, "grad_norm": 1013.5514526367188, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.08436588197946548, "logits/rejected": 0.032395198941230774, "logps/chosen": -57.09331512451172, "logps/ref_chosen": -56.58390808105469, "logps/ref_rejected": -86.86978149414062, "logps/rejected": -87.60758972167969, "loss": 1.5349, "margin_dpo/margin_mean": 0.22840029001235962, "margin_dpo/margin_std": 0.6292995810508728, "step": 90 }, { "epoch": 0.13756613756613756, "fcm_dpo/beta": 3.3264732360839844, "fcm_dpo/delta": -0.30509454011917114, "fcm_dpo/margin": 0.3368951082229614, "fcm_dpo/q_t": 0.33719223737716675, "grad_norm": 858.5227661132812, "learning_rate": 4.98152617002662e-07, "logits/chosen": 0.09481631219387054, "logits/rejected": 0.05292369797825813, "logps/chosen": -52.90049362182617, "logps/ref_chosen": -52.38234329223633, "logps/ref_rejected": -72.17642211914062, "logps/rejected": -73.0314712524414, "loss": 1.3205, "margin_dpo/margin_mean": 0.33689484000205994, "margin_dpo/margin_std": 0.6805263757705688, "step": 91 }, { "epoch": 0.13907785336356765, "fcm_dpo/beta": 3.2963027954101562, "fcm_dpo/delta": -0.1682032346725464, "fcm_dpo/margin": 0.2999283969402313, "fcm_dpo/q_t": 0.3624107241630554, "grad_norm": 807.3601684570312, "learning_rate": 4.979887032076988e-07, "logits/chosen": 0.13207153975963593, "logits/rejected": 0.09528068453073502, "logps/chosen": -53.570884704589844, "logps/ref_chosen": -53.00870132446289, "logps/ref_rejected": -79.77812957763672, "logps/rejected": -80.64024353027344, "loss": 1.2978, "margin_dpo/margin_mean": 0.2999285161495209, "margin_dpo/margin_std": 0.5701849460601807, "step": 92 }, { "epoch": 0.14058956916099774, "fcm_dpo/beta": 3.103659152984619, "fcm_dpo/delta": -0.10753681510686874, "fcm_dpo/margin": 0.3049851357936859, "fcm_dpo/q_t": 0.3586532771587372, "grad_norm": 682.6175537109375, "learning_rate": 4.978178526356172e-07, "logits/chosen": 0.08406171947717667, "logits/rejected": 0.058728571981191635, "logps/chosen": -45.46405792236328, "logps/ref_chosen": -44.90705108642578, "logps/ref_rejected": -58.7879524230957, "logps/rejected": -59.649932861328125, "loss": 1.3186, "margin_dpo/margin_mean": 0.3049851953983307, "margin_dpo/margin_std": 0.6701629161834717, "step": 93 }, { "epoch": 0.1421012849584278, "fcm_dpo/beta": 2.9392552375793457, "fcm_dpo/delta": -0.2067282795906067, "fcm_dpo/margin": 0.34755954146385193, "fcm_dpo/q_t": 0.33474797010421753, "grad_norm": 712.4808349609375, "learning_rate": 4.976400700654751e-07, "logits/chosen": 0.14686943590641022, "logits/rejected": 0.11152657866477966, "logps/chosen": -60.35730743408203, "logps/ref_chosen": -59.93777084350586, "logps/ref_rejected": -79.3138427734375, "logps/rejected": -80.0809326171875, "loss": 1.2872, "margin_dpo/margin_mean": 0.3475595712661743, "margin_dpo/margin_std": 0.7137982845306396, "step": 94 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 2.8255391120910645, "fcm_dpo/delta": -0.2533861994743347, "fcm_dpo/margin": 0.3784918189048767, "fcm_dpo/q_t": 0.32923561334609985, "grad_norm": 712.9932861328125, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.08593058586120605, "logits/rejected": 0.02429114282131195, "logps/chosen": -60.73108673095703, "logps/ref_chosen": -60.168487548828125, "logps/ref_rejected": -90.73665618896484, "logps/rejected": -91.67774963378906, "loss": 1.0566, "margin_dpo/margin_mean": 0.3784918189048767, "margin_dpo/margin_std": 0.5973398089408875, "step": 95 }, { "epoch": 0.14512471655328799, "fcm_dpo/beta": 2.735480546951294, "fcm_dpo/delta": -0.29815971851348877, "fcm_dpo/margin": 0.40782594680786133, "fcm_dpo/q_t": 0.3135584592819214, "grad_norm": 572.3253784179688, "learning_rate": 4.972637290166157e-07, "logits/chosen": 0.10200202465057373, "logits/rejected": 0.06153492629528046, "logps/chosen": -61.17717361450195, "logps/ref_chosen": -60.66877746582031, "logps/ref_rejected": -88.30673217773438, "logps/rejected": -89.22294616699219, "loss": 1.0682, "margin_dpo/margin_mean": 0.4078254997730255, "margin_dpo/margin_std": 0.6238170862197876, "step": 96 }, { "epoch": 0.14663643235071808, "fcm_dpo/beta": 2.6156375408172607, "fcm_dpo/delta": -0.02265828847885132, "fcm_dpo/margin": 0.3311424255371094, "fcm_dpo/q_t": 0.373293936252594, "grad_norm": 762.564697265625, "learning_rate": 4.970651810649666e-07, "logits/chosen": 0.049608707427978516, "logits/rejected": 0.007526304107159376, "logps/chosen": -65.6923828125, "logps/ref_chosen": -65.04412078857422, "logps/ref_rejected": -78.42092895507812, "logps/rejected": -79.40032958984375, "loss": 1.1484, "margin_dpo/margin_mean": 0.33114248514175415, "margin_dpo/margin_std": 0.6242318153381348, "step": 97 }, { "epoch": 0.14814814814814814, "fcm_dpo/beta": 2.640901565551758, "fcm_dpo/delta": 0.013076554983854294, "fcm_dpo/margin": 0.3170929551124573, "fcm_dpo/q_t": 0.36007198691368103, "grad_norm": 630.3800659179688, "learning_rate": 4.968597221690985e-07, "logits/chosen": 0.1261807680130005, "logits/rejected": 0.10040568560361862, "logps/chosen": -55.91543197631836, "logps/ref_chosen": -55.503231048583984, "logps/ref_rejected": -72.81553649902344, "logps/rejected": -73.54483032226562, "loss": 1.0684, "margin_dpo/margin_mean": 0.3170931935310364, "margin_dpo/margin_std": 0.5252500772476196, "step": 98 }, { "epoch": 0.14965986394557823, "fcm_dpo/beta": 2.610173463821411, "fcm_dpo/delta": -0.0660782903432846, "fcm_dpo/margin": 0.34793078899383545, "fcm_dpo/q_t": 0.35216856002807617, "grad_norm": 699.6543579101562, "learning_rate": 4.966473580761389e-07, "logits/chosen": 0.14399348199367523, "logits/rejected": 0.10823240876197815, "logps/chosen": -59.083717346191406, "logps/ref_chosen": -58.57563781738281, "logps/ref_rejected": -78.693603515625, "logps/rejected": -79.54962158203125, "loss": 1.0688, "margin_dpo/margin_mean": 0.3479306697845459, "margin_dpo/margin_std": 0.5678527355194092, "step": 99 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 2.6637752056121826, "fcm_dpo/delta": -0.013185635209083557, "fcm_dpo/margin": 0.3216173052787781, "fcm_dpo/q_t": 0.37674546241760254, "grad_norm": 795.3727416992188, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.12704704701900482, "logits/rejected": 0.11997953057289124, "logps/chosen": -80.09172058105469, "logps/ref_chosen": -79.58343505859375, "logps/ref_rejected": -92.152587890625, "logps/rejected": -92.98249053955078, "loss": 1.3194, "margin_dpo/margin_mean": 0.3216173052787781, "margin_dpo/margin_std": 0.7164607048034668, "step": 100 }, { "epoch": 0.15268329554043839, "fcm_dpo/beta": 2.503209114074707, "fcm_dpo/delta": -0.42283251881599426, "fcm_dpo/margin": 0.48820391297340393, "fcm_dpo/q_t": 0.2904645800590515, "grad_norm": 486.78692626953125, "learning_rate": 4.96201938253052e-07, "logits/chosen": 0.12482018768787384, "logits/rejected": 0.08969442546367645, "logps/chosen": -52.763427734375, "logps/ref_chosen": -52.332786560058594, "logps/ref_rejected": -69.55589294433594, "logps/rejected": -70.47473907470703, "loss": 0.8776, "margin_dpo/margin_mean": 0.48820409178733826, "margin_dpo/margin_std": 0.5912094116210938, "step": 101 }, { "epoch": 0.15419501133786848, "fcm_dpo/beta": 2.454827308654785, "fcm_dpo/delta": 0.0955345630645752, "fcm_dpo/margin": 0.3097192943096161, "fcm_dpo/q_t": 0.37257254123687744, "grad_norm": 702.9950561523438, "learning_rate": 4.959688949822748e-07, "logits/chosen": 0.03808064013719559, "logits/rejected": 0.0005772793665528297, "logps/chosen": -65.2833251953125, "logps/ref_chosen": -64.74348449707031, "logps/ref_rejected": -69.06132507324219, "logps/rejected": -69.910888671875, "loss": 1.2787, "margin_dpo/margin_mean": 0.3097189664840698, "margin_dpo/margin_std": 0.6924293041229248, "step": 102 }, { "epoch": 0.15570672713529857, "fcm_dpo/beta": 2.502030372619629, "fcm_dpo/delta": 0.10205619037151337, "fcm_dpo/margin": 0.3015629053115845, "fcm_dpo/q_t": 0.3854670822620392, "grad_norm": 735.919677734375, "learning_rate": 4.957289714327572e-07, "logits/chosen": 0.14897724986076355, "logits/rejected": 0.11798413842916489, "logps/chosen": -64.3989028930664, "logps/ref_chosen": -63.83664321899414, "logps/ref_rejected": -79.32362365722656, "logps/rejected": -80.18745422363281, "loss": 1.2513, "margin_dpo/margin_mean": 0.30156272649765015, "margin_dpo/margin_std": 0.6556486487388611, "step": 103 }, { "epoch": 0.15721844293272866, "fcm_dpo/beta": 2.617619514465332, "fcm_dpo/delta": 0.2569226324558258, "fcm_dpo/margin": 0.2314532995223999, "fcm_dpo/q_t": 0.4117388129234314, "grad_norm": 881.36279296875, "learning_rate": 4.954821743156767e-07, "logits/chosen": 0.12788525223731995, "logits/rejected": 0.04946213215589523, "logps/chosen": -61.58454895019531, "logps/ref_chosen": -60.99920654296875, "logps/ref_rejected": -98.84645080566406, "logps/rejected": -99.66325378417969, "loss": 1.3872, "margin_dpo/margin_mean": 0.23145365715026855, "margin_dpo/margin_std": 0.6401793956756592, "step": 104 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 2.609062671661377, "fcm_dpo/delta": -0.13659755885601044, "fcm_dpo/margin": 0.37271663546562195, "fcm_dpo/q_t": 0.3249310255050659, "grad_norm": 777.34033203125, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.08643177151679993, "logits/rejected": 0.03597265109419823, "logps/chosen": -71.42201232910156, "logps/ref_chosen": -70.95027160644531, "logps/ref_rejected": -87.88340759277344, "logps/rejected": -88.72787475585938, "loss": 1.1343, "margin_dpo/margin_mean": 0.3727165460586548, "margin_dpo/margin_std": 0.6506966352462769, "step": 105 }, { "epoch": 0.1602418745275888, "fcm_dpo/beta": 2.6030874252319336, "fcm_dpo/delta": 0.14328259229660034, "fcm_dpo/margin": 0.2754959166049957, "fcm_dpo/q_t": 0.37446996569633484, "grad_norm": 718.373779296875, "learning_rate": 4.949679871846857e-07, "logits/chosen": 0.10392872244119644, "logits/rejected": 0.0915747880935669, "logps/chosen": -62.947120666503906, "logps/ref_chosen": -62.45933151245117, "logps/ref_rejected": -67.00595092773438, "logps/rejected": -67.76923370361328, "loss": 1.2383, "margin_dpo/margin_mean": 0.27549615502357483, "margin_dpo/margin_std": 0.5983477234840393, "step": 106 }, { "epoch": 0.1617535903250189, "fcm_dpo/beta": 2.7499284744262695, "fcm_dpo/delta": 0.3244553506374359, "fcm_dpo/margin": 0.19795957207679749, "fcm_dpo/q_t": 0.42030882835388184, "grad_norm": 988.1804809570312, "learning_rate": 4.947006115536947e-07, "logits/chosen": 0.043431270867586136, "logits/rejected": 0.025126943364739418, "logps/chosen": -76.38347625732422, "logps/ref_chosen": -75.83796691894531, "logps/ref_rejected": -87.74038696289062, "logps/rejected": -88.48384094238281, "loss": 1.5542, "margin_dpo/margin_mean": 0.19795984029769897, "margin_dpo/margin_std": 0.7145728468894958, "step": 107 }, { "epoch": 0.16326530612244897, "fcm_dpo/beta": 2.7320618629455566, "fcm_dpo/delta": -0.21486234664916992, "fcm_dpo/margin": 0.3816843628883362, "fcm_dpo/q_t": 0.35164210200309753, "grad_norm": 659.1549682617188, "learning_rate": 4.944263911205772e-07, "logits/chosen": 0.0848315954208374, "logits/rejected": 0.05806386470794678, "logps/chosen": -68.84207153320312, "logps/ref_chosen": -68.39323425292969, "logps/ref_rejected": -83.24267578125, "logps/rejected": -84.07319641113281, "loss": 1.1571, "margin_dpo/margin_mean": 0.38168424367904663, "margin_dpo/margin_std": 0.6761659383773804, "step": 108 }, { "epoch": 0.16477702191987906, "fcm_dpo/beta": 2.7161760330200195, "fcm_dpo/delta": 0.05944516137242317, "fcm_dpo/margin": 0.29280829429626465, "fcm_dpo/q_t": 0.375564306974411, "grad_norm": 748.4414672851562, "learning_rate": 4.941453335558681e-07, "logits/chosen": 0.11102043837308884, "logits/rejected": 0.06125715374946594, "logps/chosen": -56.02159881591797, "logps/ref_chosen": -55.52748107910156, "logps/ref_rejected": -83.55218505859375, "logps/rejected": -84.339111328125, "loss": 1.2473, "margin_dpo/margin_mean": 0.292807400226593, "margin_dpo/margin_std": 0.6247273683547974, "step": 109 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 2.8052263259887695, "fcm_dpo/delta": 0.17110225558280945, "fcm_dpo/margin": 0.24601304531097412, "fcm_dpo/q_t": 0.39738592505455017, "grad_norm": 919.9741821289062, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.04998182877898216, "logits/rejected": 0.056924428790807724, "logps/chosen": -81.71244049072266, "logps/ref_chosen": -81.15874481201172, "logps/ref_rejected": -72.56021118164062, "logps/rejected": -73.35992431640625, "loss": 1.426, "margin_dpo/margin_mean": 0.24601292610168457, "margin_dpo/margin_std": 0.6903856992721558, "step": 110 }, { "epoch": 0.16780045351473924, "fcm_dpo/beta": 2.9170947074890137, "fcm_dpo/delta": 0.15463948249816895, "fcm_dpo/margin": 0.2409285306930542, "fcm_dpo/q_t": 0.37675726413726807, "grad_norm": 812.91015625, "learning_rate": 4.935627386698418e-07, "logits/chosen": 0.15344518423080444, "logits/rejected": 0.12011007964611053, "logps/chosen": -53.007442474365234, "logps/ref_chosen": -52.358985900878906, "logps/ref_rejected": -77.06150817871094, "logps/rejected": -77.95088195800781, "loss": 1.359, "margin_dpo/margin_mean": 0.2409285306930542, "margin_dpo/margin_std": 0.5762333869934082, "step": 111 }, { "epoch": 0.1693121693121693, "fcm_dpo/beta": 2.845439910888672, "fcm_dpo/delta": -0.06481163203716278, "fcm_dpo/margin": 0.31798386573791504, "fcm_dpo/q_t": 0.36621609330177307, "grad_norm": 860.8700561523438, "learning_rate": 4.932612176449559e-07, "logits/chosen": 0.061526067554950714, "logits/rejected": 0.006518724840134382, "logps/chosen": -63.515830993652344, "logps/ref_chosen": -63.02006530761719, "logps/ref_rejected": -111.36941528320312, "logps/rejected": -112.18316650390625, "loss": 1.3607, "margin_dpo/margin_mean": 0.3179827332496643, "margin_dpo/margin_std": 0.6988146305084229, "step": 112 }, { "epoch": 0.1708238851095994, "fcm_dpo/beta": 2.9742894172668457, "fcm_dpo/delta": 0.08943277597427368, "fcm_dpo/margin": 0.25452733039855957, "fcm_dpo/q_t": 0.38922226428985596, "grad_norm": 920.0910034179688, "learning_rate": 4.929528920808854e-07, "logits/chosen": 0.0860273689031601, "logits/rejected": 0.05197536200284958, "logps/chosen": -56.43023681640625, "logps/ref_chosen": -55.80766296386719, "logps/ref_rejected": -69.84014129638672, "logps/rejected": -70.71724700927734, "loss": 1.4264, "margin_dpo/margin_mean": 0.2545267343521118, "margin_dpo/margin_std": 0.635810911655426, "step": 113 }, { "epoch": 0.17233560090702948, "fcm_dpo/beta": 2.616295337677002, "fcm_dpo/delta": -0.5849612951278687, "fcm_dpo/margin": 0.5088604688644409, "fcm_dpo/q_t": 0.2969014048576355, "grad_norm": 562.796630859375, "learning_rate": 4.92637770602159e-07, "logits/chosen": 0.1057385504245758, "logits/rejected": 0.050528474152088165, "logps/chosen": -66.74571228027344, "logps/ref_chosen": -66.33277130126953, "logps/ref_rejected": -71.61489868164062, "logps/rejected": -72.53669738769531, "loss": 0.9587, "margin_dpo/margin_mean": 0.5088605880737305, "margin_dpo/margin_std": 0.6802812814712524, "step": 114 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 2.7045106887817383, "fcm_dpo/delta": 0.11715377867221832, "fcm_dpo/margin": 0.27014443278312683, "fcm_dpo/q_t": 0.3813377618789673, "grad_norm": 830.4659423828125, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.11370372772216797, "logits/rejected": 0.059039607644081116, "logps/chosen": -56.367103576660156, "logps/ref_chosen": -55.74903869628906, "logps/ref_rejected": -79.59849548339844, "logps/rejected": -80.48670959472656, "loss": 1.2694, "margin_dpo/margin_mean": 0.27014434337615967, "margin_dpo/margin_std": 0.6018053889274597, "step": 115 }, { "epoch": 0.17535903250188964, "fcm_dpo/beta": 2.6384530067443848, "fcm_dpo/delta": -0.15713399648666382, "fcm_dpo/margin": 0.37526413798332214, "fcm_dpo/q_t": 0.3505271077156067, "grad_norm": 637.3699340820312, "learning_rate": 4.91987175349089e-07, "logits/chosen": 0.09473671019077301, "logits/rejected": 0.03647337108850479, "logps/chosen": -49.91804504394531, "logps/ref_chosen": -49.36516571044922, "logps/ref_rejected": -72.84671020507812, "logps/rejected": -73.77484893798828, "loss": 1.1559, "margin_dpo/margin_mean": 0.37526440620422363, "margin_dpo/margin_std": 0.665389895439148, "step": 116 }, { "epoch": 0.17687074829931973, "fcm_dpo/beta": 2.590177297592163, "fcm_dpo/delta": 0.11993909627199173, "fcm_dpo/margin": 0.2842903733253479, "fcm_dpo/q_t": 0.37484538555145264, "grad_norm": 673.7462768554688, "learning_rate": 4.916517197732933e-07, "logits/chosen": 0.11810576915740967, "logits/rejected": 0.08560114353895187, "logps/chosen": -58.2153205871582, "logps/ref_chosen": -57.710899353027344, "logps/ref_rejected": -69.77253723144531, "logps/rejected": -70.5612564086914, "loss": 1.2845, "margin_dpo/margin_mean": 0.28429079055786133, "margin_dpo/margin_std": 0.6136384010314941, "step": 117 }, { "epoch": 0.17838246409674982, "fcm_dpo/beta": 2.5296597480773926, "fcm_dpo/delta": -0.21083010733127594, "fcm_dpo/margin": 0.40842828154563904, "fcm_dpo/q_t": 0.32514363527297974, "grad_norm": 606.0830078125, "learning_rate": 4.913095046794281e-07, "logits/chosen": 0.16361942887306213, "logits/rejected": 0.1275358349084854, "logps/chosen": -52.91107940673828, "logps/ref_chosen": -52.479896545410156, "logps/ref_rejected": -81.359130859375, "logps/rejected": -82.19873809814453, "loss": 0.9701, "margin_dpo/margin_mean": 0.4084276854991913, "margin_dpo/margin_std": 0.54984050989151, "step": 118 }, { "epoch": 0.17989417989417988, "fcm_dpo/beta": 2.512711763381958, "fcm_dpo/delta": -0.045835524797439575, "fcm_dpo/margin": 0.354824960231781, "fcm_dpo/q_t": 0.36521679162979126, "grad_norm": 660.1326904296875, "learning_rate": 4.909605396399855e-07, "logits/chosen": 0.09545660018920898, "logits/rejected": 0.06180203706026077, "logps/chosen": -61.986427307128906, "logps/ref_chosen": -61.35767364501953, "logps/ref_rejected": -75.71510314941406, "logps/rejected": -76.69867706298828, "loss": 1.2155, "margin_dpo/margin_mean": 0.35482484102249146, "margin_dpo/margin_std": 0.6957262754440308, "step": 119 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 2.400790214538574, "fcm_dpo/delta": -0.25915199518203735, "fcm_dpo/margin": 0.4492540657520294, "fcm_dpo/q_t": 0.3342965245246887, "grad_norm": 572.9517822265625, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.08783574402332306, "logits/rejected": 0.035401877015829086, "logps/chosen": -60.40111541748047, "logps/ref_chosen": -59.907569885253906, "logps/ref_rejected": -79.6910629272461, "logps/rejected": -80.63386535644531, "loss": 1.018, "margin_dpo/margin_mean": 0.44925418496131897, "margin_dpo/margin_std": 0.7025552988052368, "step": 120 }, { "epoch": 0.18291761148904007, "fcm_dpo/beta": 2.3966712951660156, "fcm_dpo/delta": 0.10127197206020355, "fcm_dpo/margin": 0.31514492630958557, "fcm_dpo/q_t": 0.3769919276237488, "grad_norm": 587.5, "learning_rate": 4.902423989581143e-07, "logits/chosen": 0.16948141157627106, "logits/rejected": 0.09744793176651001, "logps/chosen": -56.259464263916016, "logps/ref_chosen": -55.66604232788086, "logps/ref_rejected": -101.56233978271484, "logps/rejected": -102.47091674804688, "loss": 1.2044, "margin_dpo/margin_mean": 0.3151443600654602, "margin_dpo/margin_std": 0.6316248178482056, "step": 121 }, { "epoch": 0.18442932728647016, "fcm_dpo/beta": 2.2983148097991943, "fcm_dpo/delta": -0.4261060357093811, "fcm_dpo/margin": 0.5331162214279175, "fcm_dpo/q_t": 0.29028403759002686, "grad_norm": 550.7630615234375, "learning_rate": 4.898732434036243e-07, "logits/chosen": 0.11293643712997437, "logits/rejected": 0.0820358544588089, "logps/chosen": -63.88978958129883, "logps/ref_chosen": -63.334373474121094, "logps/ref_rejected": -73.67523193359375, "logps/rejected": -74.7637710571289, "loss": 0.9413, "margin_dpo/margin_mean": 0.5331156849861145, "margin_dpo/margin_std": 0.7090832591056824, "step": 122 }, { "epoch": 0.18594104308390022, "fcm_dpo/beta": 2.3069674968719482, "fcm_dpo/delta": 0.09509618580341339, "fcm_dpo/margin": 0.32712411880493164, "fcm_dpo/q_t": 0.3730233907699585, "grad_norm": 685.9718627929688, "learning_rate": 4.894973780788722e-07, "logits/chosen": 0.13213904201984406, "logits/rejected": 0.094410739839077, "logps/chosen": -57.433197021484375, "logps/ref_chosen": -56.89874267578125, "logps/ref_rejected": -78.97028350830078, "logps/rejected": -79.83187103271484, "loss": 1.3738, "margin_dpo/margin_mean": 0.3271239697933197, "margin_dpo/margin_std": 0.788016676902771, "step": 123 }, { "epoch": 0.1874527588813303, "fcm_dpo/beta": 2.1601970195770264, "fcm_dpo/delta": -0.31936076283454895, "fcm_dpo/margin": 0.5234503746032715, "fcm_dpo/q_t": 0.3022237718105316, "grad_norm": 433.8125, "learning_rate": 4.89114813497619e-07, "logits/chosen": 0.1536797285079956, "logits/rejected": 0.10203144699335098, "logps/chosen": -57.66457748413086, "logps/ref_chosen": -57.116085052490234, "logps/ref_rejected": -87.93074035644531, "logps/rejected": -89.002685546875, "loss": 0.8572, "margin_dpo/margin_mean": 0.5234500169754028, "margin_dpo/margin_std": 0.6252888441085815, "step": 124 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 2.037970781326294, "fcm_dpo/delta": -0.2241692841053009, "fcm_dpo/margin": 0.5122190117835999, "fcm_dpo/q_t": 0.32587265968322754, "grad_norm": 482.951171875, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.1680688112974167, "logits/rejected": 0.11711547523736954, "logps/chosen": -66.33033752441406, "logps/ref_chosen": -65.7061767578125, "logps/ref_rejected": -91.72711944580078, "logps/rejected": -92.86349487304688, "loss": 0.9948, "margin_dpo/margin_mean": 0.5122197270393372, "margin_dpo/margin_std": 0.7382586002349854, "step": 125 }, { "epoch": 0.19047619047619047, "fcm_dpo/beta": 1.9791793823242188, "fcm_dpo/delta": -0.211552694439888, "fcm_dpo/margin": 0.5251641273498535, "fcm_dpo/q_t": 0.3407570719718933, "grad_norm": 396.5646057128906, "learning_rate": 4.883296295573176e-07, "logits/chosen": -0.02226438745856285, "logits/rejected": -0.028386151418089867, "logps/chosen": -68.68487548828125, "logps/ref_chosen": -68.17608642578125, "logps/ref_rejected": -65.1175537109375, "logps/rejected": -66.15150451660156, "loss": 1.0228, "margin_dpo/margin_mean": 0.5251647233963013, "margin_dpo/margin_std": 0.8751634359359741, "step": 126 }, { "epoch": 0.19198790627362056, "fcm_dpo/beta": 1.9311566352844238, "fcm_dpo/delta": -0.020116418600082397, "fcm_dpo/margin": 0.4483014643192291, "fcm_dpo/q_t": 0.3441677689552307, "grad_norm": 473.1387023925781, "learning_rate": 4.87927032161552e-07, "logits/chosen": 0.08045360445976257, "logits/rejected": 0.05260235071182251, "logps/chosen": -62.5809326171875, "logps/ref_chosen": -61.88023376464844, "logps/ref_rejected": -68.46012878417969, "logps/rejected": -69.609130859375, "loss": 1.0264, "margin_dpo/margin_mean": 0.4483017027378082, "margin_dpo/margin_std": 0.6659648418426514, "step": 127 }, { "epoch": 0.19349962207105065, "fcm_dpo/beta": 1.9168998003005981, "fcm_dpo/delta": -0.033443547785282135, "fcm_dpo/margin": 0.45753252506256104, "fcm_dpo/q_t": 0.35872209072113037, "grad_norm": 473.319091796875, "learning_rate": 4.875177794352363e-07, "logits/chosen": 0.11974930018186569, "logits/rejected": 0.07149255275726318, "logps/chosen": -67.38662719726562, "logps/ref_chosen": -66.708984375, "logps/ref_rejected": -94.97969055175781, "logps/rejected": -96.1148681640625, "loss": 1.1219, "margin_dpo/margin_mean": 0.45753246545791626, "margin_dpo/margin_std": 0.8322083950042725, "step": 128 }, { "epoch": 0.19501133786848074, "fcm_dpo/beta": 2.0064656734466553, "fcm_dpo/delta": 0.22321073710918427, "fcm_dpo/margin": 0.31931766867637634, "fcm_dpo/q_t": 0.38313671946525574, "grad_norm": 575.3306884765625, "learning_rate": 4.871018828260491e-07, "logits/chosen": 0.1001749187707901, "logits/rejected": 0.09252482652664185, "logps/chosen": -66.09547424316406, "logps/ref_chosen": -65.33882904052734, "logps/ref_rejected": -68.06109619140625, "logps/rejected": -69.13705444335938, "loss": 1.2288, "margin_dpo/margin_mean": 0.3193177878856659, "margin_dpo/margin_std": 0.6939840316772461, "step": 129 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 2.0064070224761963, "fcm_dpo/delta": 0.050023213028907776, "fcm_dpo/margin": 0.39817652106285095, "fcm_dpo/q_t": 0.3601888418197632, "grad_norm": 567.6577758789062, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.07646722346544266, "logits/rejected": 0.03454095870256424, "logps/chosen": -59.36882019042969, "logps/ref_chosen": -58.660743713378906, "logps/ref_rejected": -79.24510192871094, "logps/rejected": -80.35134887695312, "loss": 1.0687, "margin_dpo/margin_mean": 0.39817655086517334, "margin_dpo/margin_std": 0.6305863857269287, "step": 130 }, { "epoch": 0.1980347694633409, "fcm_dpo/beta": 2.0193214416503906, "fcm_dpo/delta": -0.10755333304405212, "fcm_dpo/margin": 0.4692103862762451, "fcm_dpo/q_t": 0.3443056643009186, "grad_norm": 473.69549560546875, "learning_rate": 4.86250204678667e-07, "logits/chosen": 0.05275092273950577, "logits/rejected": 0.004051988013088703, "logps/chosen": -53.13858413696289, "logps/ref_chosen": -52.51453399658203, "logps/ref_rejected": -85.18299865722656, "logps/rejected": -86.27625274658203, "loss": 1.0233, "margin_dpo/margin_mean": 0.4692104160785675, "margin_dpo/margin_std": 0.719977855682373, "step": 131 }, { "epoch": 0.19954648526077098, "fcm_dpo/beta": 2.014409303665161, "fcm_dpo/delta": 0.0671209990978241, "fcm_dpo/margin": 0.3912177085876465, "fcm_dpo/q_t": 0.3556290864944458, "grad_norm": 584.9998168945312, "learning_rate": 4.858144469637408e-07, "logits/chosen": 0.1664508730173111, "logits/rejected": 0.1374509632587433, "logps/chosen": -66.4674301147461, "logps/ref_chosen": -65.68513488769531, "logps/ref_rejected": -69.54120635986328, "logps/rejected": -70.7147216796875, "loss": 1.2363, "margin_dpo/margin_mean": 0.3912178874015808, "margin_dpo/margin_std": 0.8271607160568237, "step": 132 }, { "epoch": 0.20105820105820105, "fcm_dpo/beta": 2.066967248916626, "fcm_dpo/delta": 0.10779528319835663, "fcm_dpo/margin": 0.36285972595214844, "fcm_dpo/q_t": 0.3746221363544464, "grad_norm": 585.4243774414062, "learning_rate": 4.853720930118138e-07, "logits/chosen": 0.08279750496149063, "logits/rejected": 0.07346326112747192, "logps/chosen": -64.35284423828125, "logps/ref_chosen": -63.598114013671875, "logps/ref_rejected": -73.72798156738281, "logps/rejected": -74.84557342529297, "loss": 1.254, "margin_dpo/margin_mean": 0.36285945773124695, "margin_dpo/margin_std": 0.7753314971923828, "step": 133 }, { "epoch": 0.20256991685563114, "fcm_dpo/beta": 1.9597253799438477, "fcm_dpo/delta": -0.31700825691223145, "fcm_dpo/margin": 0.5747710466384888, "fcm_dpo/q_t": 0.301810622215271, "grad_norm": 413.8312683105469, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.1624392867088318, "logits/rejected": 0.1149115115404129, "logps/chosen": -54.434722900390625, "logps/ref_chosen": -53.79457092285156, "logps/ref_rejected": -74.16741943359375, "logps/rejected": -75.38233947753906, "loss": 0.8856, "margin_dpo/margin_mean": 0.5747714042663574, "margin_dpo/margin_std": 0.7277013063430786, "step": 134 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 2.0122458934783936, "fcm_dpo/delta": 0.2390393614768982, "fcm_dpo/margin": 0.3112886846065521, "fcm_dpo/q_t": 0.39257875084877014, "grad_norm": 493.87615966796875, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.10040457546710968, "logits/rejected": 0.07089127600193024, "logps/chosen": -50.159515380859375, "logps/ref_chosen": -49.441078186035156, "logps/ref_rejected": -65.96878051757812, "logps/rejected": -66.99850463867188, "loss": 1.2206, "margin_dpo/margin_mean": 0.31128865480422974, "margin_dpo/margin_std": 0.6809731721878052, "step": 135 }, { "epoch": 0.20559334845049132, "fcm_dpo/beta": 2.0793657302856445, "fcm_dpo/delta": 0.1040540561079979, "fcm_dpo/margin": 0.3623303472995758, "fcm_dpo/q_t": 0.39911192655563354, "grad_norm": 749.82666015625, "learning_rate": 4.840055783904106e-07, "logits/chosen": 0.1170252338051796, "logits/rejected": 0.05652901157736778, "logps/chosen": -67.56098937988281, "logps/ref_chosen": -66.75926208496094, "logps/ref_rejected": -94.61787414550781, "logps/rejected": -95.78192901611328, "loss": 1.4101, "margin_dpo/margin_mean": 0.3623313307762146, "margin_dpo/margin_std": 0.962636411190033, "step": 136 }, { "epoch": 0.20710506424792138, "fcm_dpo/beta": 2.0693423748016357, "fcm_dpo/delta": -0.1056227907538414, "fcm_dpo/margin": 0.4567766785621643, "fcm_dpo/q_t": 0.35195136070251465, "grad_norm": 478.63922119140625, "learning_rate": 4.835369650662767e-07, "logits/chosen": 0.12457015365362167, "logits/rejected": 0.10022950917482376, "logps/chosen": -57.523067474365234, "logps/ref_chosen": -56.78379821777344, "logps/ref_rejected": -69.89952087402344, "logps/rejected": -71.09556579589844, "loss": 1.1134, "margin_dpo/margin_mean": 0.45677661895751953, "margin_dpo/margin_std": 0.781032383441925, "step": 137 }, { "epoch": 0.20861678004535147, "fcm_dpo/beta": 2.088082790374756, "fcm_dpo/delta": 0.1478821486234665, "fcm_dpo/margin": 0.34126073122024536, "fcm_dpo/q_t": 0.3721213936805725, "grad_norm": 539.020751953125, "learning_rate": 4.830618192112065e-07, "logits/chosen": 0.10970278084278107, "logits/rejected": 0.07803289592266083, "logps/chosen": -59.64773178100586, "logps/ref_chosen": -58.766014099121094, "logps/ref_rejected": -68.12371826171875, "logps/rejected": -69.3467025756836, "loss": 1.2453, "margin_dpo/margin_mean": 0.3412603735923767, "margin_dpo/margin_std": 0.7408077716827393, "step": 138 }, { "epoch": 0.21012849584278157, "fcm_dpo/beta": 2.0522103309631348, "fcm_dpo/delta": -0.23259088397026062, "fcm_dpo/margin": 0.5156592130661011, "fcm_dpo/q_t": 0.33579808473587036, "grad_norm": 497.1232604980469, "learning_rate": 4.825801541160509e-07, "logits/chosen": 0.10866852849721909, "logits/rejected": 0.08270702511072159, "logps/chosen": -72.01712036132812, "logps/ref_chosen": -71.2255859375, "logps/ref_rejected": -82.1834716796875, "logps/rejected": -83.49066162109375, "loss": 1.0294, "margin_dpo/margin_mean": 0.515658974647522, "margin_dpo/margin_std": 0.769908607006073, "step": 139 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 1.8860867023468018, "fcm_dpo/delta": -0.42799749970436096, "fcm_dpo/margin": 0.6498202085494995, "fcm_dpo/q_t": 0.2918680012226105, "grad_norm": 530.7315063476562, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.08455317467451096, "logits/rejected": 0.04641052335500717, "logps/chosen": -64.02261352539062, "logps/ref_chosen": -63.27766418457031, "logps/ref_rejected": -83.30647277832031, "logps/rejected": -84.70124816894531, "loss": 1.0826, "margin_dpo/margin_mean": 0.649819552898407, "margin_dpo/margin_std": 0.9729900360107422, "step": 140 }, { "epoch": 0.21315192743764172, "fcm_dpo/beta": 1.8111655712127686, "fcm_dpo/delta": -0.05867675691843033, "fcm_dpo/margin": 0.4977453052997589, "fcm_dpo/q_t": 0.36327552795410156, "grad_norm": 493.1156005859375, "learning_rate": 4.815973202802966e-07, "logits/chosen": 0.12604521214962006, "logits/rejected": 0.08942769467830658, "logps/chosen": -62.58486557006836, "logps/ref_chosen": -61.76676940917969, "logps/ref_rejected": -88.60601806640625, "logps/rejected": -89.92186737060547, "loss": 1.1357, "margin_dpo/margin_mean": 0.4977456331253052, "margin_dpo/margin_std": 0.8883162140846252, "step": 141 }, { "epoch": 0.2146636432350718, "fcm_dpo/beta": 1.8200013637542725, "fcm_dpo/delta": 0.021424515172839165, "fcm_dpo/margin": 0.4561222195625305, "fcm_dpo/q_t": 0.3619512617588043, "grad_norm": 431.1319274902344, "learning_rate": 4.810961790316729e-07, "logits/chosen": 0.11921191215515137, "logits/rejected": 0.09684738516807556, "logps/chosen": -65.99165344238281, "logps/ref_chosen": -65.2747802734375, "logps/ref_rejected": -81.1378173828125, "logps/rejected": -82.31082153320312, "loss": 1.0878, "margin_dpo/margin_mean": 0.45612233877182007, "margin_dpo/margin_std": 0.8028172254562378, "step": 142 }, { "epoch": 0.2161753590325019, "fcm_dpo/beta": 1.8570456504821777, "fcm_dpo/delta": 0.14970946311950684, "fcm_dpo/margin": 0.3828372359275818, "fcm_dpo/q_t": 0.37365925312042236, "grad_norm": 583.10400390625, "learning_rate": 4.805885735261454e-07, "logits/chosen": 0.09592782706022263, "logits/rejected": 0.08162565529346466, "logps/chosen": -63.41217041015625, "logps/ref_chosen": -62.617828369140625, "logps/ref_rejected": -70.39239501953125, "logps/rejected": -71.56956481933594, "loss": 1.2184, "margin_dpo/margin_mean": 0.38283705711364746, "margin_dpo/margin_std": 0.7997678518295288, "step": 143 }, { "epoch": 0.21768707482993196, "fcm_dpo/beta": 1.8605518341064453, "fcm_dpo/delta": -0.11310499161481857, "fcm_dpo/margin": 0.5119171142578125, "fcm_dpo/q_t": 0.3569183945655823, "grad_norm": 530.5613403320312, "learning_rate": 4.800745179625307e-07, "logits/chosen": 0.08935252577066422, "logits/rejected": 0.06564676761627197, "logps/chosen": -61.614990234375, "logps/ref_chosen": -60.80268859863281, "logps/ref_rejected": -79.07284545898438, "logps/rejected": -80.39706420898438, "loss": 1.1037, "margin_dpo/margin_mean": 0.5119173526763916, "margin_dpo/margin_std": 0.8959058523178101, "step": 144 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 1.8628008365631104, "fcm_dpo/delta": 0.05122518166899681, "fcm_dpo/margin": 0.4308074712753296, "fcm_dpo/q_t": 0.37797456979751587, "grad_norm": 631.3311157226562, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.06172323599457741, "logits/rejected": 0.07850059121847153, "logps/chosen": -75.40628051757812, "logps/ref_chosen": -74.61146545410156, "logps/ref_rejected": -83.24461364746094, "logps/rejected": -84.4702377319336, "loss": 1.2266, "margin_dpo/margin_mean": 0.4308076798915863, "margin_dpo/margin_std": 0.8929077386856079, "step": 145 }, { "epoch": 0.22071050642479215, "fcm_dpo/beta": 1.8260502815246582, "fcm_dpo/delta": -0.1331080198287964, "fcm_dpo/margin": 0.5313305258750916, "fcm_dpo/q_t": 0.33441442251205444, "grad_norm": 422.6907958984375, "learning_rate": 4.790271143580173e-07, "logits/chosen": 0.0609685517847538, "logits/rejected": 0.04538050293922424, "logps/chosen": -58.57465744018555, "logps/ref_chosen": -57.84098434448242, "logps/ref_rejected": -67.47422790527344, "logps/rejected": -68.73922729492188, "loss": 1.0638, "margin_dpo/margin_mean": 0.5313305854797363, "margin_dpo/margin_std": 0.8781388998031616, "step": 146 }, { "epoch": 0.2222222222222222, "fcm_dpo/beta": 1.8587777614593506, "fcm_dpo/delta": 0.1607896387577057, "fcm_dpo/margin": 0.3767205476760864, "fcm_dpo/q_t": 0.37250229716300964, "grad_norm": 581.6641235351562, "learning_rate": 4.784937956152489e-07, "logits/chosen": 0.0543268620967865, "logits/rejected": 0.01894828863441944, "logps/chosen": -67.6400375366211, "logps/ref_chosen": -66.81346893310547, "logps/ref_rejected": -81.1796875, "logps/rejected": -82.38298034667969, "loss": 1.1887, "margin_dpo/margin_mean": 0.37672001123428345, "margin_dpo/margin_std": 0.7602115869522095, "step": 147 }, { "epoch": 0.2237339380196523, "fcm_dpo/beta": 1.8126616477966309, "fcm_dpo/delta": -0.24772456288337708, "fcm_dpo/margin": 0.5912690162658691, "fcm_dpo/q_t": 0.3363417088985443, "grad_norm": 373.9577331542969, "learning_rate": 4.779540854098347e-07, "logits/chosen": 0.1572439968585968, "logits/rejected": 0.09718590974807739, "logps/chosen": -49.48698806762695, "logps/ref_chosen": -48.6877555847168, "logps/ref_rejected": -67.50503540039062, "logps/rejected": -68.89553833007812, "loss": 1.0406, "margin_dpo/margin_mean": 0.5912688970565796, "margin_dpo/margin_std": 0.9395405054092407, "step": 148 }, { "epoch": 0.2252456538170824, "fcm_dpo/beta": 1.7451000213623047, "fcm_dpo/delta": -0.16907699406147003, "fcm_dpo/margin": 0.5744267702102661, "fcm_dpo/q_t": 0.3332281708717346, "grad_norm": 422.275634765625, "learning_rate": 4.774079988386296e-07, "logits/chosen": 0.06443419307470322, "logits/rejected": 0.021416954696178436, "logps/chosen": -56.10673904418945, "logps/ref_chosen": -55.143775939941406, "logps/ref_rejected": -64.79888916015625, "logps/rejected": -66.33627319335938, "loss": 1.0112, "margin_dpo/margin_mean": 0.5744273662567139, "margin_dpo/margin_std": 0.8520516157150269, "step": 149 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 1.6926888227462769, "fcm_dpo/delta": -0.16446954011917114, "fcm_dpo/margin": 0.5896965265274048, "fcm_dpo/q_t": 0.31847673654556274, "grad_norm": 410.8077697753906, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.10813453793525696, "logits/rejected": 0.07115641236305237, "logps/chosen": -68.07341003417969, "logps/ref_chosen": -67.47074890136719, "logps/ref_rejected": -89.21170806884766, "logps/rejected": -90.40406799316406, "loss": 0.9558, "margin_dpo/margin_mean": 0.5896967649459839, "margin_dpo/margin_std": 0.7787685394287109, "step": 150 }, { "epoch": 0.22826908541194255, "fcm_dpo/beta": 1.6143739223480225, "fcm_dpo/delta": -0.25309672951698303, "fcm_dpo/margin": 0.6668341159820557, "fcm_dpo/q_t": 0.3007487654685974, "grad_norm": 306.0788269042969, "learning_rate": 4.762967578776406e-07, "logits/chosen": 0.0946289449930191, "logits/rejected": 0.05068827420473099, "logps/chosen": -53.11603546142578, "logps/ref_chosen": -52.45954132080078, "logps/ref_rejected": -79.0630111694336, "logps/rejected": -80.38633728027344, "loss": 0.8835, "margin_dpo/margin_mean": 0.6668342351913452, "margin_dpo/margin_std": 0.7877708077430725, "step": 151 }, { "epoch": 0.22978080120937264, "fcm_dpo/beta": 1.5444799661636353, "fcm_dpo/delta": -0.18640229105949402, "fcm_dpo/margin": 0.6589242219924927, "fcm_dpo/q_t": 0.3180433511734009, "grad_norm": 338.899658203125, "learning_rate": 4.757316345716553e-07, "logits/chosen": 0.14255166053771973, "logits/rejected": 0.10172566026449203, "logps/chosen": -57.358360290527344, "logps/ref_chosen": -56.5538330078125, "logps/ref_rejected": -76.55074310302734, "logps/rejected": -78.01419067382812, "loss": 0.9339, "margin_dpo/margin_mean": 0.6589239835739136, "margin_dpo/margin_std": 0.8599318265914917, "step": 152 }, { "epoch": 0.23129251700680273, "fcm_dpo/beta": 1.5402113199234009, "fcm_dpo/delta": 0.06967150419950485, "fcm_dpo/margin": 0.5102236270904541, "fcm_dpo/q_t": 0.35215288400650024, "grad_norm": 371.8020935058594, "learning_rate": 4.751601970666064e-07, "logits/chosen": 0.06787143647670746, "logits/rejected": 0.03406914696097374, "logps/chosen": -68.78588104248047, "logps/ref_chosen": -68.00689697265625, "logps/ref_rejected": -74.83482360839844, "logps/rejected": -76.12403869628906, "loss": 1.0502, "margin_dpo/margin_mean": 0.5102236270904541, "margin_dpo/margin_std": 0.7997216582298279, "step": 153 }, { "epoch": 0.2328042328042328, "fcm_dpo/beta": 1.5851788520812988, "fcm_dpo/delta": 0.15434226393699646, "fcm_dpo/margin": 0.4456132650375366, "fcm_dpo/q_t": 0.37568652629852295, "grad_norm": 404.9493713378906, "learning_rate": 4.745824613468292e-07, "logits/chosen": 0.13575251400470734, "logits/rejected": 0.13199105858802795, "logps/chosen": -60.18925476074219, "logps/ref_chosen": -59.222537994384766, "logps/ref_rejected": -64.19131469726562, "logps/rejected": -65.60365295410156, "loss": 1.2275, "margin_dpo/margin_mean": 0.4456136226654053, "margin_dpo/margin_std": 0.9545999765396118, "step": 154 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 1.6388814449310303, "fcm_dpo/delta": 0.1953224092721939, "fcm_dpo/margin": 0.40763676166534424, "fcm_dpo/q_t": 0.3838460147380829, "grad_norm": 457.6610107421875, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.12242255359888077, "logits/rejected": 0.10534698516130447, "logps/chosen": -69.38691711425781, "logps/ref_chosen": -68.45469665527344, "logps/ref_rejected": -77.91763305664062, "logps/rejected": -79.25749206542969, "loss": 1.2961, "margin_dpo/margin_mean": 0.40763652324676514, "margin_dpo/margin_std": 0.9401887655258179, "step": 155 }, { "epoch": 0.23582766439909297, "fcm_dpo/beta": 1.6212600469589233, "fcm_dpo/delta": -0.16775542497634888, "fcm_dpo/margin": 0.6174951791763306, "fcm_dpo/q_t": 0.33825477957725525, "grad_norm": 420.23248291015625, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 0.09560365974903107, "logits/rejected": 0.05767889693379402, "logps/chosen": -68.15042114257812, "logps/ref_chosen": -67.26959991455078, "logps/ref_rejected": -86.95914459228516, "logps/rejected": -88.45746612548828, "loss": 0.9711, "margin_dpo/margin_mean": 0.6174949407577515, "margin_dpo/margin_std": 0.9183490872383118, "step": 156 }, { "epoch": 0.23733938019652306, "fcm_dpo/beta": 1.5758273601531982, "fcm_dpo/delta": -0.013841405510902405, "fcm_dpo/margin": 0.5440424680709839, "fcm_dpo/q_t": 0.334421306848526, "grad_norm": 375.8836669921875, "learning_rate": 4.728116273823847e-07, "logits/chosen": 0.11276492476463318, "logits/rejected": 0.09284964948892593, "logps/chosen": -55.550567626953125, "logps/ref_chosen": -54.77287292480469, "logps/ref_rejected": -63.87866973876953, "logps/rejected": -65.20040893554688, "loss": 1.0336, "margin_dpo/margin_mean": 0.5440424084663391, "margin_dpo/margin_std": 0.816327691078186, "step": 157 }, { "epoch": 0.23885109599395313, "fcm_dpo/beta": 1.5792837142944336, "fcm_dpo/delta": -0.07293359935283661, "fcm_dpo/margin": 0.5799823999404907, "fcm_dpo/q_t": 0.3303491473197937, "grad_norm": 387.7180480957031, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 0.10120804607868195, "logits/rejected": 0.0727510154247284, "logps/chosen": -65.7298583984375, "logps/ref_chosen": -64.92271423339844, "logps/ref_rejected": -82.23789978027344, "logps/rejected": -83.62501525878906, "loss": 0.9862, "margin_dpo/margin_mean": 0.5799820423126221, "margin_dpo/margin_std": 0.8221684098243713, "step": 158 }, { "epoch": 0.24036281179138322, "fcm_dpo/beta": 1.6407793760299683, "fcm_dpo/delta": 0.1321692019701004, "fcm_dpo/margin": 0.43719008564949036, "fcm_dpo/q_t": 0.3659708499908447, "grad_norm": 493.34710693359375, "learning_rate": 4.715998812855304e-07, "logits/chosen": 0.1393783539533615, "logits/rejected": 0.1092163473367691, "logps/chosen": -57.99412536621094, "logps/ref_chosen": -57.046993255615234, "logps/ref_rejected": -73.32441711425781, "logps/rejected": -74.708740234375, "loss": 1.228, "margin_dpo/margin_mean": 0.43719035387039185, "margin_dpo/margin_std": 0.8903641700744629, "step": 159 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 1.640321969985962, "fcm_dpo/delta": 0.06273063272237778, "fcm_dpo/margin": 0.48271435499191284, "fcm_dpo/q_t": 0.3727272152900696, "grad_norm": 414.4522399902344, "learning_rate": 4.7098470178228755e-07, "logits/chosen": -0.017423782497644424, "logits/rejected": -0.04727357625961304, "logps/chosen": -50.78994369506836, "logps/ref_chosen": -49.806915283203125, "logps/ref_rejected": -68.3370132446289, "logps/rejected": -69.8027572631836, "loss": 1.1378, "margin_dpo/margin_mean": 0.48271453380584717, "margin_dpo/margin_std": 0.9058699607849121, "step": 160 }, { "epoch": 0.24338624338624337, "fcm_dpo/beta": 1.6497435569763184, "fcm_dpo/delta": -0.025253944098949432, "fcm_dpo/margin": 0.5282729864120483, "fcm_dpo/q_t": 0.36747777462005615, "grad_norm": 402.39227294921875, "learning_rate": 4.703633408618955e-07, "logits/chosen": 0.11677715182304382, "logits/rejected": 0.0853380635380745, "logps/chosen": -53.445526123046875, "logps/ref_chosen": -52.50048828125, "logps/ref_rejected": -66.04540252685547, "logps/rejected": -67.51871490478516, "loss": 1.1383, "margin_dpo/margin_mean": 0.5282737016677856, "margin_dpo/margin_std": 0.960330069065094, "step": 161 }, { "epoch": 0.24489795918367346, "fcm_dpo/beta": 1.5421152114868164, "fcm_dpo/delta": -0.3480435609817505, "fcm_dpo/margin": 0.7493581175804138, "fcm_dpo/q_t": 0.28938403725624084, "grad_norm": 365.9737854003906, "learning_rate": 4.697358159051549e-07, "logits/chosen": 0.1516774296760559, "logits/rejected": 0.1125471442937851, "logps/chosen": -70.50188446044922, "logps/ref_chosen": -69.46919250488281, "logps/ref_rejected": -92.00952911376953, "logps/rejected": -93.79158020019531, "loss": 0.9088, "margin_dpo/margin_mean": 0.7493584156036377, "margin_dpo/margin_std": 0.9313629865646362, "step": 162 }, { "epoch": 0.24640967498110355, "fcm_dpo/beta": 1.480248212814331, "fcm_dpo/delta": -0.2779368758201599, "fcm_dpo/margin": 0.7414557933807373, "fcm_dpo/q_t": 0.3119097352027893, "grad_norm": 371.7340087890625, "learning_rate": 4.691021444652876e-07, "logits/chosen": 0.10076501965522766, "logits/rejected": 0.06344390660524368, "logps/chosen": -51.57759094238281, "logps/ref_chosen": -50.613834381103516, "logps/ref_rejected": -74.62033081054688, "logps/rejected": -76.32554626464844, "loss": 0.9404, "margin_dpo/margin_mean": 0.7414567470550537, "margin_dpo/margin_std": 0.9699376225471497, "step": 163 }, { "epoch": 0.24792139077853365, "fcm_dpo/beta": 1.425843358039856, "fcm_dpo/delta": -0.0862051472067833, "fcm_dpo/margin": 0.6510294675827026, "fcm_dpo/q_t": 0.3333462178707123, "grad_norm": 325.4397888183594, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 0.0917167067527771, "logits/rejected": 0.041261181235313416, "logps/chosen": -55.971466064453125, "logps/ref_chosen": -54.848114013671875, "logps/ref_rejected": -79.0630111694336, "logps/rejected": -80.83738708496094, "loss": 0.9959, "margin_dpo/margin_mean": 0.6510298252105713, "margin_dpo/margin_std": 0.9245976805686951, "step": 164 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 1.4056333303451538, "fcm_dpo/delta": -0.08572079241275787, "fcm_dpo/margin": 0.6598940491676331, "fcm_dpo/q_t": 0.32658183574676514, "grad_norm": 278.6569519042969, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.15755686163902283, "logits/rejected": 0.11023740470409393, "logps/chosen": -52.1827392578125, "logps/ref_chosen": -51.089210510253906, "logps/ref_rejected": -71.23370361328125, "logps/rejected": -72.98712158203125, "loss": 0.944, "margin_dpo/margin_mean": 0.6598936319351196, "margin_dpo/margin_std": 0.8561975359916687, "step": 165 }, { "epoch": 0.2509448223733938, "fcm_dpo/beta": 1.4288179874420166, "fcm_dpo/delta": 0.1548466831445694, "fcm_dpo/margin": 0.4935830235481262, "fcm_dpo/q_t": 0.378057599067688, "grad_norm": 363.7931213378906, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 0.11771443486213684, "logits/rejected": 0.05061034858226776, "logps/chosen": -64.18374633789062, "logps/ref_chosen": -63.19081115722656, "logps/ref_rejected": -93.8402099609375, "logps/rejected": -95.32672119140625, "loss": 1.1495, "margin_dpo/margin_mean": 0.4935823976993561, "margin_dpo/margin_std": 0.9294769167900085, "step": 166 }, { "epoch": 0.25245653817082386, "fcm_dpo/beta": 1.3979008197784424, "fcm_dpo/delta": -0.15741577744483948, "fcm_dpo/margin": 0.7090965509414673, "fcm_dpo/q_t": 0.3177732825279236, "grad_norm": 280.4785461425781, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 0.059697844088077545, "logits/rejected": 0.029954345896840096, "logps/chosen": -59.82019805908203, "logps/ref_chosen": -58.92427062988281, "logps/ref_rejected": -72.97377014160156, "logps/rejected": -74.57879638671875, "loss": 0.9047, "margin_dpo/margin_mean": 0.7090966701507568, "margin_dpo/margin_std": 0.8868396282196045, "step": 167 }, { "epoch": 0.25396825396825395, "fcm_dpo/beta": 1.4516682624816895, "fcm_dpo/delta": 0.32116368412971497, "fcm_dpo/margin": 0.3770996034145355, "fcm_dpo/q_t": 0.39416801929473877, "grad_norm": 410.0304870605469, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 0.10317844152450562, "logits/rejected": 0.079832062125206, "logps/chosen": -66.82249450683594, "logps/ref_chosen": -65.65138244628906, "logps/ref_rejected": -79.71418762207031, "logps/rejected": -81.26239776611328, "loss": 1.181, "margin_dpo/margin_mean": 0.37709951400756836, "margin_dpo/margin_std": 0.7641937732696533, "step": 168 }, { "epoch": 0.25547996976568405, "fcm_dpo/beta": 1.438971996307373, "fcm_dpo/delta": -0.19553548097610474, "fcm_dpo/margin": 0.7125035524368286, "fcm_dpo/q_t": 0.32765817642211914, "grad_norm": 361.0661926269531, "learning_rate": 4.651720442612075e-07, "logits/chosen": 0.16506020724773407, "logits/rejected": 0.13683012127876282, "logps/chosen": -62.39191818237305, "logps/ref_chosen": -61.425865173339844, "logps/ref_rejected": -76.09590148925781, "logps/rejected": -77.77445220947266, "loss": 0.9547, "margin_dpo/margin_mean": 0.7125036716461182, "margin_dpo/margin_std": 1.0379252433776855, "step": 169 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 1.4439091682434082, "fcm_dpo/delta": 0.06322521716356277, "fcm_dpo/margin": 0.5480768084526062, "fcm_dpo/q_t": 0.35138139128685, "grad_norm": 313.36566162109375, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.08458521962165833, "logits/rejected": 0.08335210382938385, "logps/chosen": -57.703369140625, "logps/ref_chosen": -56.65319061279297, "logps/ref_rejected": -63.45965576171875, "logps/rejected": -65.05790710449219, "loss": 1.0725, "margin_dpo/margin_mean": 0.5480765104293823, "margin_dpo/margin_std": 0.8856065273284912, "step": 170 }, { "epoch": 0.2585034013605442, "fcm_dpo/beta": 1.3877865076065063, "fcm_dpo/delta": -0.18020044267177582, "fcm_dpo/margin": 0.7254009246826172, "fcm_dpo/q_t": 0.3164675533771515, "grad_norm": 300.18817138671875, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 0.16546288132667542, "logits/rejected": 0.12482231855392456, "logps/chosen": -64.7464599609375, "logps/ref_chosen": -63.73476028442383, "logps/ref_rejected": -78.50328063964844, "logps/rejected": -80.24037170410156, "loss": 0.9604, "margin_dpo/margin_mean": 0.7254012823104858, "margin_dpo/margin_std": 0.968841552734375, "step": 171 }, { "epoch": 0.2600151171579743, "fcm_dpo/beta": 1.3836549520492554, "fcm_dpo/delta": -0.12257562577724457, "fcm_dpo/margin": 0.6935369372367859, "fcm_dpo/q_t": 0.3231150507926941, "grad_norm": 285.284423828125, "learning_rate": 4.631254907558365e-07, "logits/chosen": 0.15227068960666656, "logits/rejected": 0.1104995459318161, "logps/chosen": -53.32598114013672, "logps/ref_chosen": -52.201759338378906, "logps/ref_rejected": -82.85285949707031, "logps/rejected": -84.67062377929688, "loss": 0.9731, "margin_dpo/margin_mean": 0.6935364007949829, "margin_dpo/margin_std": 0.9233601689338684, "step": 172 }, { "epoch": 0.2615268329554044, "fcm_dpo/beta": 1.3000613451004028, "fcm_dpo/delta": -0.1376366764307022, "fcm_dpo/margin": 0.7386313080787659, "fcm_dpo/q_t": 0.34407109022140503, "grad_norm": 271.6639709472656, "learning_rate": 4.624313574873786e-07, "logits/chosen": 0.1513877511024475, "logits/rejected": 0.08314318209886551, "logps/chosen": -56.60365676879883, "logps/ref_chosen": -55.434722900390625, "logps/ref_rejected": -77.81967163085938, "logps/rejected": -79.72723388671875, "loss": 1.0046, "margin_dpo/margin_mean": 0.7386313676834106, "margin_dpo/margin_std": 1.1225433349609375, "step": 173 }, { "epoch": 0.26303854875283444, "fcm_dpo/beta": 1.2976107597351074, "fcm_dpo/delta": -0.1577700823545456, "fcm_dpo/margin": 0.764767050743103, "fcm_dpo/q_t": 0.329203724861145, "grad_norm": 344.06982421875, "learning_rate": 4.61731282057198e-07, "logits/chosen": 0.14828677475452423, "logits/rejected": 0.09550824761390686, "logps/chosen": -58.302215576171875, "logps/ref_chosen": -57.17195129394531, "logps/ref_rejected": -85.47578430175781, "logps/rejected": -87.37081146240234, "loss": 0.9964, "margin_dpo/margin_mean": 0.7647665143013, "margin_dpo/margin_std": 1.099808931350708, "step": 174 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 1.2494964599609375, "fcm_dpo/delta": -0.13457192480564117, "fcm_dpo/margin": 0.776630163192749, "fcm_dpo/q_t": 0.33105403184890747, "grad_norm": 340.8416748046875, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.1482573300600052, "logits/rejected": 0.12473028898239136, "logps/chosen": -68.90888977050781, "logps/ref_chosen": -67.6656265258789, "logps/ref_rejected": -84.36766815185547, "logps/rejected": -86.38755798339844, "loss": 1.1129, "margin_dpo/margin_mean": 0.776630163192749, "margin_dpo/margin_std": 1.2866604328155518, "step": 175 }, { "epoch": 0.2660619803476946, "fcm_dpo/beta": 1.2561092376708984, "fcm_dpo/delta": 0.07927154749631882, "fcm_dpo/margin": 0.618436336517334, "fcm_dpo/q_t": 0.37566858530044556, "grad_norm": 377.3238525390625, "learning_rate": 4.603133832077953e-07, "logits/chosen": 0.12545280158519745, "logits/rejected": 0.10419806838035583, "logps/chosen": -79.16165924072266, "logps/ref_chosen": -77.8587646484375, "logps/ref_rejected": -81.08732604980469, "logps/rejected": -83.00865936279297, "loss": 1.2024, "margin_dpo/margin_mean": 0.6184365153312683, "margin_dpo/margin_std": 1.2843239307403564, "step": 176 }, { "epoch": 0.2675736961451247, "fcm_dpo/beta": 1.175858497619629, "fcm_dpo/delta": -0.4110918641090393, "fcm_dpo/margin": 1.0227813720703125, "fcm_dpo/q_t": 0.28089749813079834, "grad_norm": 325.0892639160156, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 0.2447664737701416, "logits/rejected": 0.16549718379974365, "logps/chosen": -56.427490234375, "logps/ref_chosen": -55.22039794921875, "logps/ref_rejected": -92.54973602294922, "logps/rejected": -94.77960968017578, "loss": 0.8547, "margin_dpo/margin_mean": 1.0227817296981812, "margin_dpo/margin_std": 1.196207880973816, "step": 177 }, { "epoch": 0.2690854119425548, "fcm_dpo/beta": 1.1856530904769897, "fcm_dpo/delta": 0.15816958248615265, "fcm_dpo/margin": 0.5928707122802734, "fcm_dpo/q_t": 0.37204986810684204, "grad_norm": 311.888916015625, "learning_rate": 4.588719528532341e-07, "logits/chosen": 0.1036371961236, "logits/rejected": 0.06448065489530563, "logps/chosen": -62.1276741027832, "logps/ref_chosen": -60.81049346923828, "logps/ref_rejected": -81.12973022460938, "logps/rejected": -83.03977966308594, "loss": 1.0769, "margin_dpo/margin_mean": 0.592870831489563, "margin_dpo/margin_std": 0.9916863441467285, "step": 178 }, { "epoch": 0.2705971277399849, "fcm_dpo/beta": 1.214045524597168, "fcm_dpo/delta": 0.029299605637788773, "fcm_dpo/margin": 0.6777085661888123, "fcm_dpo/q_t": 0.3637648820877075, "grad_norm": 311.5542297363281, "learning_rate": 4.581424636586928e-07, "logits/chosen": 0.16729748249053955, "logits/rejected": 0.15362539887428284, "logps/chosen": -67.09163665771484, "logps/ref_chosen": -65.67171478271484, "logps/ref_rejected": -75.32586669921875, "logps/rejected": -77.42349243164062, "loss": 1.0894, "margin_dpo/margin_mean": 0.6777083873748779, "margin_dpo/margin_std": 1.1790449619293213, "step": 179 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 1.2290477752685547, "fcm_dpo/delta": 0.06452183425426483, "fcm_dpo/margin": 0.6426513195037842, "fcm_dpo/q_t": 0.35979628562927246, "grad_norm": 315.0635986328125, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.018470853567123413, "logits/rejected": 0.002668549306690693, "logps/chosen": -57.85248565673828, "logps/ref_chosen": -56.68280792236328, "logps/ref_rejected": -64.94414520263672, "logps/rejected": -66.75647735595703, "loss": 1.0941, "margin_dpo/margin_mean": 0.6426514387130737, "margin_dpo/margin_std": 1.0852210521697998, "step": 180 }, { "epoch": 0.273620559334845, "fcm_dpo/beta": 1.1799449920654297, "fcm_dpo/delta": -0.3260830044746399, "fcm_dpo/margin": 0.9661595821380615, "fcm_dpo/q_t": 0.2936689853668213, "grad_norm": 262.0937194824219, "learning_rate": 4.566660392614228e-07, "logits/chosen": 0.17798230051994324, "logits/rejected": 0.1470857560634613, "logps/chosen": -61.9011344909668, "logps/ref_chosen": -60.77604675292969, "logps/ref_rejected": -83.98361206054688, "logps/rejected": -86.07485961914062, "loss": 0.8082, "margin_dpo/margin_mean": 0.9661591053009033, "margin_dpo/margin_std": 1.0348703861236572, "step": 181 }, { "epoch": 0.2751322751322751, "fcm_dpo/beta": 1.1442968845367432, "fcm_dpo/delta": -0.008135635405778885, "fcm_dpo/margin": 0.7490738034248352, "fcm_dpo/q_t": 0.3455432653427124, "grad_norm": 313.6883850097656, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 0.11941174417734146, "logits/rejected": 0.060549668967723846, "logps/chosen": -61.594669342041016, "logps/ref_chosen": -60.2537841796875, "logps/ref_rejected": -89.7706298828125, "logps/rejected": -91.86058807373047, "loss": 1.1166, "margin_dpo/margin_mean": 0.7490732669830322, "margin_dpo/margin_std": 1.2752363681793213, "step": 182 }, { "epoch": 0.2766439909297052, "fcm_dpo/beta": 1.2075517177581787, "fcm_dpo/delta": 0.3678004741668701, "fcm_dpo/margin": 0.41555270552635193, "fcm_dpo/q_t": 0.40780603885650635, "grad_norm": 330.7014465332031, "learning_rate": 4.551664914523433e-07, "logits/chosen": 0.12853467464447021, "logits/rejected": 0.1137867271900177, "logps/chosen": -63.4423713684082, "logps/ref_chosen": -61.76142120361328, "logps/ref_rejected": -72.54627990722656, "logps/rejected": -74.64278411865234, "loss": 1.2964, "margin_dpo/margin_mean": 0.41555219888687134, "margin_dpo/margin_std": 1.0669963359832764, "step": 183 }, { "epoch": 0.2781557067271353, "fcm_dpo/beta": 1.2260205745697021, "fcm_dpo/delta": 0.00919228047132492, "fcm_dpo/margin": 0.6851847171783447, "fcm_dpo/q_t": 0.34627607464790344, "grad_norm": 230.51353454589844, "learning_rate": 4.544080985994258e-07, "logits/chosen": 0.20697131752967834, "logits/rejected": 0.1586826741695404, "logps/chosen": -48.165016174316406, "logps/ref_chosen": -46.840721130371094, "logps/ref_rejected": -69.3609390258789, "logps/rejected": -71.37042236328125, "loss": 0.9445, "margin_dpo/margin_mean": 0.6851844191551208, "margin_dpo/margin_std": 0.8955023288726807, "step": 184 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 1.229468822479248, "fcm_dpo/delta": -0.02585327997803688, "fcm_dpo/margin": 0.7103534936904907, "fcm_dpo/q_t": 0.357565701007843, "grad_norm": 296.47052001953125, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.13178446888923645, "logits/rejected": 0.09441807866096497, "logps/chosen": -53.799964904785156, "logps/ref_chosen": -52.32114028930664, "logps/ref_rejected": -68.3885726928711, "logps/rejected": -70.57774353027344, "loss": 1.1773, "margin_dpo/margin_mean": 0.7103538513183594, "margin_dpo/margin_std": 1.335863471031189, "step": 185 }, { "epoch": 0.2811791383219955, "fcm_dpo/beta": 1.225243091583252, "fcm_dpo/delta": 0.017454147338867188, "fcm_dpo/margin": 0.6798607110977173, "fcm_dpo/q_t": 0.3525392413139343, "grad_norm": 322.0429382324219, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 0.10579089820384979, "logits/rejected": 0.07291128486394882, "logps/chosen": -68.80215454101562, "logps/ref_chosen": -67.42012786865234, "logps/ref_rejected": -82.50968933105469, "logps/rejected": -84.57157897949219, "loss": 1.0273, "margin_dpo/margin_mean": 0.6798614263534546, "margin_dpo/margin_std": 1.0495535135269165, "step": 186 }, { "epoch": 0.28269085411942557, "fcm_dpo/beta": 1.190890908241272, "fcm_dpo/delta": -0.14130036532878876, "fcm_dpo/margin": 0.8149707317352295, "fcm_dpo/q_t": 0.32857638597488403, "grad_norm": 374.1227111816406, "learning_rate": 4.520986992917297e-07, "logits/chosen": 0.10602662712335587, "logits/rejected": 0.06065261363983154, "logps/chosen": -77.01930236816406, "logps/ref_chosen": -75.52549743652344, "logps/ref_rejected": -94.76289367675781, "logps/rejected": -97.0716781616211, "loss": 1.1516, "margin_dpo/margin_mean": 0.8149705529212952, "margin_dpo/margin_std": 1.4823930263519287, "step": 187 }, { "epoch": 0.2842025699168556, "fcm_dpo/beta": 1.1827126741409302, "fcm_dpo/delta": -0.07674290239810944, "fcm_dpo/margin": 0.7770618200302124, "fcm_dpo/q_t": 0.32536211609840393, "grad_norm": 329.20703125, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 0.146919846534729, "logits/rejected": 0.11189775168895721, "logps/chosen": -72.79115295410156, "logps/ref_chosen": -71.52333068847656, "logps/ref_rejected": -78.29949951171875, "logps/rejected": -80.34439086914062, "loss": 1.0643, "margin_dpo/margin_mean": 0.7770620584487915, "margin_dpo/margin_std": 1.269317388534546, "step": 188 }, { "epoch": 0.2857142857142857, "fcm_dpo/beta": 1.1473429203033447, "fcm_dpo/delta": -0.11207123845815659, "fcm_dpo/margin": 0.8247168064117432, "fcm_dpo/q_t": 0.325833261013031, "grad_norm": 281.21337890625, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 0.133220374584198, "logits/rejected": 0.14191579818725586, "logps/chosen": -73.43196105957031, "logps/ref_chosen": -72.17626953125, "logps/ref_rejected": -75.26313781738281, "logps/rejected": -77.34353637695312, "loss": 0.9465, "margin_dpo/margin_mean": 0.8247175216674805, "margin_dpo/margin_std": 1.0672475099563599, "step": 189 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 1.1403197050094604, "fcm_dpo/delta": -0.09567119181156158, "fcm_dpo/margin": 0.8214250802993774, "fcm_dpo/q_t": 0.3279981315135956, "grad_norm": 274.4941101074219, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.2039899379014969, "logits/rejected": 0.0918896347284317, "logps/chosen": -55.97621154785156, "logps/ref_chosen": -54.624271392822266, "logps/ref_rejected": -101.47068786621094, "logps/rejected": -103.64405059814453, "loss": 0.9625, "margin_dpo/margin_mean": 0.821425199508667, "margin_dpo/margin_std": 1.1282612085342407, "step": 190 }, { "epoch": 0.2887377173091459, "fcm_dpo/beta": 1.1277894973754883, "fcm_dpo/delta": -0.048582788556814194, "fcm_dpo/margin": 0.7929339408874512, "fcm_dpo/q_t": 0.33727309107780457, "grad_norm": 331.9569396972656, "learning_rate": 4.48940460132708e-07, "logits/chosen": 0.19591175019741058, "logits/rejected": 0.17645688354969025, "logps/chosen": -74.50765991210938, "logps/ref_chosen": -72.93251037597656, "logps/ref_rejected": -89.95103454589844, "logps/rejected": -92.31910705566406, "loss": 1.0172, "margin_dpo/margin_mean": 0.7929338216781616, "margin_dpo/margin_std": 1.1808544397354126, "step": 191 }, { "epoch": 0.29024943310657597, "fcm_dpo/beta": 1.1447663307189941, "fcm_dpo/delta": 0.11726969480514526, "fcm_dpo/margin": 0.6475502252578735, "fcm_dpo/q_t": 0.3605687618255615, "grad_norm": 227.45828247070312, "learning_rate": 4.481369327558329e-07, "logits/chosen": 0.1682664304971695, "logits/rejected": 0.14873062074184418, "logps/chosen": -55.458255767822266, "logps/ref_chosen": -54.001121520996094, "logps/ref_rejected": -63.531551361083984, "logps/rejected": -65.63623809814453, "loss": 1.0626, "margin_dpo/margin_mean": 0.6475502848625183, "margin_dpo/margin_std": 1.047489047050476, "step": 192 }, { "epoch": 0.29176114890400606, "fcm_dpo/beta": 1.115247368812561, "fcm_dpo/delta": -0.15359792113304138, "fcm_dpo/margin": 0.8840051293373108, "fcm_dpo/q_t": 0.32005080580711365, "grad_norm": 219.44058227539062, "learning_rate": 4.47327863063023e-07, "logits/chosen": 0.10667343437671661, "logits/rejected": 0.08797129988670349, "logps/chosen": -58.189697265625, "logps/ref_chosen": -56.74927520751953, "logps/ref_rejected": -58.80629348754883, "logps/rejected": -61.13072204589844, "loss": 0.8919, "margin_dpo/margin_mean": 0.8840053081512451, "margin_dpo/margin_std": 1.11568284034729, "step": 193 }, { "epoch": 0.29327286470143615, "fcm_dpo/beta": 1.1258536577224731, "fcm_dpo/delta": 0.11767183244228363, "fcm_dpo/margin": 0.6577843427658081, "fcm_dpo/q_t": 0.36665472388267517, "grad_norm": 271.59124755859375, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 0.18203461170196533, "logits/rejected": 0.15865445137023926, "logps/chosen": -58.03636932373047, "logps/ref_chosen": -56.64944076538086, "logps/ref_rejected": -69.98954772949219, "logps/rejected": -72.03425598144531, "loss": 1.1466, "margin_dpo/margin_mean": 0.6577843427658081, "margin_dpo/margin_std": 1.216817021369934, "step": 194 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 1.1474632024765015, "fcm_dpo/delta": 0.10708002001047134, "fcm_dpo/margin": 0.6532057523727417, "fcm_dpo/q_t": 0.3579810559749603, "grad_norm": 295.77105712890625, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.07060997188091278, "logits/rejected": 0.06883937120437622, "logps/chosen": -72.13310241699219, "logps/ref_chosen": -70.40977478027344, "logps/ref_rejected": -74.39448547363281, "logps/rejected": -76.77101135253906, "loss": 1.1179, "margin_dpo/margin_mean": 0.6532056331634521, "margin_dpo/margin_std": 1.1192982196807861, "step": 195 }, { "epoch": 0.2962962962962963, "fcm_dpo/beta": 1.1742520332336426, "fcm_dpo/delta": -0.010713696479797363, "fcm_dpo/margin": 0.7313066720962524, "fcm_dpo/q_t": 0.34498167037963867, "grad_norm": 252.9849395751953, "learning_rate": 4.448676271745197e-07, "logits/chosen": 0.16027367115020752, "logits/rejected": 0.12814804911613464, "logps/chosen": -60.709869384765625, "logps/ref_chosen": -59.227577209472656, "logps/ref_rejected": -83.54757690429688, "logps/rejected": -85.76117706298828, "loss": 1.0713, "margin_dpo/margin_mean": 0.7313063144683838, "margin_dpo/margin_std": 1.170758605003357, "step": 196 }, { "epoch": 0.29780801209372637, "fcm_dpo/beta": 1.1702077388763428, "fcm_dpo/delta": -0.154099702835083, "fcm_dpo/margin": 0.8365795016288757, "fcm_dpo/q_t": 0.341280460357666, "grad_norm": 327.5577697753906, "learning_rate": 4.440366160729392e-07, "logits/chosen": 0.22983162105083466, "logits/rejected": 0.19321538507938385, "logps/chosen": -53.02672576904297, "logps/ref_chosen": -51.52912902832031, "logps/ref_rejected": -73.70631408691406, "logps/rejected": -76.04048156738281, "loss": 1.2086, "margin_dpo/margin_mean": 0.836578905582428, "margin_dpo/margin_std": 1.5111768245697021, "step": 197 }, { "epoch": 0.29931972789115646, "fcm_dpo/beta": 1.096937894821167, "fcm_dpo/delta": -0.2913700044155121, "fcm_dpo/margin": 1.0111193656921387, "fcm_dpo/q_t": 0.30249547958374023, "grad_norm": 260.2440490722656, "learning_rate": 4.432001773500957e-07, "logits/chosen": 0.19865413010120392, "logits/rejected": 0.1672060191631317, "logps/chosen": -61.236427307128906, "logps/ref_chosen": -59.78268051147461, "logps/ref_rejected": -72.24533081054688, "logps/rejected": -74.71019744873047, "loss": 0.8902, "margin_dpo/margin_mean": 1.0111192464828491, "margin_dpo/margin_std": 1.2340037822723389, "step": 198 }, { "epoch": 0.30083144368858655, "fcm_dpo/beta": 1.0855709314346313, "fcm_dpo/delta": 0.07997329533100128, "fcm_dpo/margin": 0.7136896848678589, "fcm_dpo/q_t": 0.3555886745452881, "grad_norm": 296.0491943359375, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 0.15217125415802002, "logits/rejected": 0.08081059157848358, "logps/chosen": -57.951934814453125, "logps/ref_chosen": -56.38677215576172, "logps/ref_rejected": -74.56779479980469, "logps/rejected": -76.8466567993164, "loss": 1.0663, "margin_dpo/margin_mean": 0.7136895656585693, "margin_dpo/margin_std": 1.1397019624710083, "step": 199 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 1.0411416292190552, "fcm_dpo/delta": -0.19544623792171478, "fcm_dpo/margin": 0.9813714027404785, "fcm_dpo/q_t": 0.3251346945762634, "grad_norm": 261.3872985839844, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.1957240104675293, "logits/rejected": 0.13964782655239105, "logps/chosen": -59.139198303222656, "logps/ref_chosen": -57.82432556152344, "logps/ref_rejected": -89.28246307373047, "logps/rejected": -91.57870483398438, "loss": 0.9719, "margin_dpo/margin_mean": 0.9813716411590576, "margin_dpo/margin_std": 1.381603479385376, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 1.0409806966781616, "eval_logits/chosen": 0.1611482799053192, "eval_logits/rejected": 0.12632358074188232, "eval_logps/chosen": -76.3137435913086, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -81.69268035888672, "eval_loss": 0.576444149017334, "eval_margin_dpo/margin_mean": 0.6894133687019348, "eval_margin_dpo/margin_std": 1.306614875793457, "eval_runtime": 38.062, "eval_samples_per_second": 60.506, "eval_steps_per_second": 1.892, "step": 200 }, { "epoch": 0.30385487528344673, "fcm_dpo/beta": 1.035691499710083, "fcm_dpo/delta": -0.036999065428972244, "fcm_dpo/margin": 0.8532841205596924, "fcm_dpo/q_t": 0.33598366379737854, "grad_norm": 237.65528869628906, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 0.20192654430866241, "logits/rejected": 0.16781750321388245, "logps/chosen": -60.54228210449219, "logps/ref_chosen": -58.999759674072266, "logps/ref_rejected": -84.67575073242188, "logps/rejected": -87.07154846191406, "loss": 0.9362, "margin_dpo/margin_mean": 0.8532842397689819, "margin_dpo/margin_std": 1.0954551696777344, "step": 201 }, { "epoch": 0.30536659108087677, "fcm_dpo/beta": 1.0213491916656494, "fcm_dpo/delta": -0.18040968477725983, "fcm_dpo/margin": 0.9893622398376465, "fcm_dpo/q_t": 0.31719034910202026, "grad_norm": 201.83595275878906, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 0.11923044919967651, "logits/rejected": 0.06677506864070892, "logps/chosen": -49.09148406982422, "logps/ref_chosen": -47.660648345947266, "logps/ref_rejected": -73.63249969482422, "logps/rejected": -76.05269622802734, "loss": 0.8957, "margin_dpo/margin_mean": 0.9893627166748047, "margin_dpo/margin_std": 1.2042524814605713, "step": 202 }, { "epoch": 0.30687830687830686, "fcm_dpo/beta": 1.0198414325714111, "fcm_dpo/delta": 0.10756812244653702, "fcm_dpo/margin": 0.73426353931427, "fcm_dpo/q_t": 0.36804383993148804, "grad_norm": 288.4985046386719, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 0.20233526825904846, "logits/rejected": 0.15018996596336365, "logps/chosen": -63.7635498046875, "logps/ref_chosen": -62.32553482055664, "logps/ref_rejected": -99.37226104736328, "logps/rejected": -101.54454040527344, "loss": 1.1316, "margin_dpo/margin_mean": 0.7342634797096252, "margin_dpo/margin_std": 1.341469407081604, "step": 203 }, { "epoch": 0.30839002267573695, "fcm_dpo/beta": 1.0091928243637085, "fcm_dpo/delta": 0.06542906165122986, "fcm_dpo/margin": 0.7767828702926636, "fcm_dpo/q_t": 0.3685183525085449, "grad_norm": 244.5875244140625, "learning_rate": 4.380688857426449e-07, "logits/chosen": 0.1188371554017067, "logits/rejected": 0.0663602203130722, "logps/chosen": -52.25553894042969, "logps/ref_chosen": -50.62931823730469, "logps/ref_rejected": -66.60475158691406, "logps/rejected": -69.00775909423828, "loss": 1.1436, "margin_dpo/margin_mean": 0.7767831087112427, "margin_dpo/margin_std": 1.4177677631378174, "step": 204 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 1.0608057975769043, "fcm_dpo/delta": 0.2543222904205322, "fcm_dpo/margin": 0.57657790184021, "fcm_dpo/q_t": 0.3946090340614319, "grad_norm": 368.377197265625, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.18162226676940918, "logits/rejected": 0.1382521241903305, "logps/chosen": -72.12284851074219, "logps/ref_chosen": -70.3561782836914, "logps/ref_rejected": -93.39848327636719, "logps/rejected": -95.7417221069336, "loss": 1.3038, "margin_dpo/margin_mean": 0.5765775442123413, "margin_dpo/margin_std": 1.4098232984542847, "step": 205 }, { "epoch": 0.31141345427059713, "fcm_dpo/beta": 1.1130549907684326, "fcm_dpo/delta": 0.04998175799846649, "fcm_dpo/margin": 0.7163587212562561, "fcm_dpo/q_t": 0.36462822556495667, "grad_norm": 297.85198974609375, "learning_rate": 4.363161124189387e-07, "logits/chosen": 0.17337989807128906, "logits/rejected": 0.16002798080444336, "logps/chosen": -69.37913513183594, "logps/ref_chosen": -67.64547729492188, "logps/ref_rejected": -79.89584350585938, "logps/rejected": -82.34585571289062, "loss": 1.1734, "margin_dpo/margin_mean": 0.7163584232330322, "margin_dpo/margin_std": 1.320636510848999, "step": 206 }, { "epoch": 0.3129251700680272, "fcm_dpo/beta": 1.0816932916641235, "fcm_dpo/delta": 0.017237961292266846, "fcm_dpo/margin": 0.7667139768600464, "fcm_dpo/q_t": 0.3583172559738159, "grad_norm": 258.2594299316406, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 0.1258050948381424, "logits/rejected": 0.07419107109308243, "logps/chosen": -69.36555480957031, "logps/ref_chosen": -67.66419219970703, "logps/ref_rejected": -85.10249328613281, "logps/rejected": -87.57057189941406, "loss": 1.0501, "margin_dpo/margin_mean": 0.7667145729064941, "margin_dpo/margin_std": 1.2524394989013672, "step": 207 }, { "epoch": 0.3144368858654573, "fcm_dpo/beta": 1.1183842420578003, "fcm_dpo/delta": 0.107530876994133, "fcm_dpo/margin": 0.6710580587387085, "fcm_dpo/q_t": 0.37669068574905396, "grad_norm": 279.748291015625, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 0.1450473964214325, "logits/rejected": 0.12448123842477798, "logps/chosen": -59.174312591552734, "logps/ref_chosen": -57.731712341308594, "logps/ref_rejected": -74.19276428222656, "logps/rejected": -76.30642700195312, "loss": 1.1878, "margin_dpo/margin_mean": 0.6710573434829712, "margin_dpo/margin_std": 1.319955825805664, "step": 208 }, { "epoch": 0.31594860166288735, "fcm_dpo/beta": 1.116701364517212, "fcm_dpo/delta": -0.21553626656532288, "fcm_dpo/margin": 0.928307294845581, "fcm_dpo/q_t": 0.33162564039230347, "grad_norm": 317.8649597167969, "learning_rate": 4.336479271643833e-07, "logits/chosen": 0.1369079351425171, "logits/rejected": 0.09211073815822601, "logps/chosen": -70.18434143066406, "logps/ref_chosen": -68.55007934570312, "logps/ref_rejected": -87.90541076660156, "logps/rejected": -90.46798706054688, "loss": 1.0438, "margin_dpo/margin_mean": 0.9283081293106079, "margin_dpo/margin_std": 1.3755784034729004, "step": 209 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 1.0468344688415527, "fcm_dpo/delta": -0.23611387610435486, "fcm_dpo/margin": 1.014129877090454, "fcm_dpo/q_t": 0.32915377616882324, "grad_norm": 255.683349609375, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.17080672085285187, "logits/rejected": 0.10357716679573059, "logps/chosen": -58.91559600830078, "logps/ref_chosen": -57.268272399902344, "logps/ref_rejected": -85.72807312011719, "logps/rejected": -88.38953399658203, "loss": 0.98, "margin_dpo/margin_mean": 1.0141295194625854, "margin_dpo/margin_std": 1.424011468887329, "step": 210 }, { "epoch": 0.31897203325774753, "fcm_dpo/beta": 1.031850814819336, "fcm_dpo/delta": 0.012154560536146164, "fcm_dpo/margin": 0.8127963542938232, "fcm_dpo/q_t": 0.34257203340530396, "grad_norm": 256.3596496582031, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 0.20410758256912231, "logits/rejected": 0.1363191306591034, "logps/chosen": -55.072418212890625, "logps/ref_chosen": -53.640708923339844, "logps/ref_rejected": -93.0387954711914, "logps/rejected": -95.28330993652344, "loss": 0.9539, "margin_dpo/margin_mean": 0.8127955198287964, "margin_dpo/margin_std": 1.0826678276062012, "step": 211 }, { "epoch": 0.3204837490551776, "fcm_dpo/beta": 1.0302294492721558, "fcm_dpo/delta": -0.022576171904802322, "fcm_dpo/margin": 0.8448600172996521, "fcm_dpo/q_t": 0.34546002745628357, "grad_norm": 255.3373565673828, "learning_rate": 4.309335095262675e-07, "logits/chosen": 0.1806902289390564, "logits/rejected": 0.12561562657356262, "logps/chosen": -59.00420379638672, "logps/ref_chosen": -57.36674499511719, "logps/ref_rejected": -79.89643096923828, "logps/rejected": -82.37875366210938, "loss": 1.012, "margin_dpo/margin_mean": 0.8448594212532043, "margin_dpo/margin_std": 1.2606749534606934, "step": 212 }, { "epoch": 0.3219954648526077, "fcm_dpo/beta": 1.004958152770996, "fcm_dpo/delta": -0.09056046605110168, "fcm_dpo/margin": 0.9262406826019287, "fcm_dpo/q_t": 0.3364323675632477, "grad_norm": 259.2325134277344, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 0.19759291410446167, "logits/rejected": 0.1763480305671692, "logps/chosen": -66.59672546386719, "logps/ref_chosen": -65.22111511230469, "logps/ref_rejected": -80.1810302734375, "logps/rejected": -82.48287963867188, "loss": 1.0256, "margin_dpo/margin_mean": 0.9262403249740601, "margin_dpo/margin_std": 1.395569086074829, "step": 213 }, { "epoch": 0.3235071806500378, "fcm_dpo/beta": 1.0033698081970215, "fcm_dpo/delta": -0.0978882685303688, "fcm_dpo/margin": 0.9341999292373657, "fcm_dpo/q_t": 0.33533668518066406, "grad_norm": 262.6483154296875, "learning_rate": 4.290985500881143e-07, "logits/chosen": 0.0739990621805191, "logits/rejected": 0.0537070631980896, "logps/chosen": -62.82872772216797, "logps/ref_chosen": -61.292327880859375, "logps/ref_rejected": -67.69841003417969, "logps/rejected": -70.16900634765625, "loss": 0.983, "margin_dpo/margin_mean": 0.9342003464698792, "margin_dpo/margin_std": 1.3554797172546387, "step": 214 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.9648710489273071, "fcm_dpo/delta": -0.2038130909204483, "fcm_dpo/margin": 1.0707131624221802, "fcm_dpo/q_t": 0.3213546872138977, "grad_norm": 233.17303466796875, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.08917293697595596, "logits/rejected": 0.014121760614216328, "logps/chosen": -65.3584213256836, "logps/ref_chosen": -63.869136810302734, "logps/ref_rejected": -98.7657241821289, "logps/rejected": -101.32572174072266, "loss": 0.9073, "margin_dpo/margin_mean": 1.0707132816314697, "margin_dpo/margin_std": 1.3552508354187012, "step": 215 }, { "epoch": 0.32653061224489793, "fcm_dpo/beta": 0.9256983995437622, "fcm_dpo/delta": -0.2675190567970276, "fcm_dpo/margin": 1.174886703491211, "fcm_dpo/q_t": 0.3485989570617676, "grad_norm": 239.40997314453125, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 0.2045373022556305, "logits/rejected": 0.13870683312416077, "logps/chosen": -69.51454162597656, "logps/ref_chosen": -67.824951171875, "logps/ref_rejected": -96.40231323242188, "logps/rejected": -99.26679229736328, "loss": 1.083, "margin_dpo/margin_mean": 1.174886703491211, "margin_dpo/margin_std": 2.2862184047698975, "step": 216 }, { "epoch": 0.328042328042328, "fcm_dpo/beta": 0.8413063883781433, "fcm_dpo/delta": -0.3993903398513794, "fcm_dpo/margin": 1.4241771697998047, "fcm_dpo/q_t": 0.2878588140010834, "grad_norm": 174.88531494140625, "learning_rate": 4.26308602680756e-07, "logits/chosen": 0.14676231145858765, "logits/rejected": 0.06052399426698685, "logps/chosen": -62.13254165649414, "logps/ref_chosen": -60.5049934387207, "logps/ref_rejected": -84.26618194580078, "logps/rejected": -87.31790924072266, "loss": 0.7768, "margin_dpo/margin_mean": 1.4241769313812256, "margin_dpo/margin_std": 1.5880484580993652, "step": 217 }, { "epoch": 0.3295540438397581, "fcm_dpo/beta": 0.8780190944671631, "fcm_dpo/delta": 0.30152568221092224, "fcm_dpo/margin": 0.6365560293197632, "fcm_dpo/q_t": 0.3970625102519989, "grad_norm": 239.61326599121094, "learning_rate": 4.253687219265803e-07, "logits/chosen": 0.03338109701871872, "logits/rejected": 0.026734884828329086, "logps/chosen": -72.37188720703125, "logps/ref_chosen": -70.59431457519531, "logps/ref_rejected": -73.89038848876953, "logps/rejected": -76.30451965332031, "loss": 1.2469, "margin_dpo/margin_mean": 0.6365566253662109, "margin_dpo/margin_std": 1.4416258335113525, "step": 218 }, { "epoch": 0.3310657596371882, "fcm_dpo/beta": 0.9143224954605103, "fcm_dpo/delta": 0.2528460621833801, "fcm_dpo/margin": 0.670140266418457, "fcm_dpo/q_t": 0.380919486284256, "grad_norm": 221.69932556152344, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 0.10697716474533081, "logits/rejected": 0.07464110851287842, "logps/chosen": -62.023380279541016, "logps/ref_chosen": -60.490943908691406, "logps/ref_rejected": -75.85001373291016, "logps/rejected": -78.0525894165039, "loss": 1.0959, "margin_dpo/margin_mean": 0.6701398491859436, "margin_dpo/margin_std": 1.143141269683838, "step": 219 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.9308043718338013, "fcm_dpo/delta": 0.06596644967794418, "fcm_dpo/margin": 0.8476877212524414, "fcm_dpo/q_t": 0.3605768382549286, "grad_norm": 188.4651641845703, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.18505269289016724, "logits/rejected": 0.13934630155563354, "logps/chosen": -46.70742416381836, "logps/ref_chosen": -45.013397216796875, "logps/ref_rejected": -70.49369812011719, "logps/rejected": -73.03541564941406, "loss": 1.0733, "margin_dpo/margin_mean": 0.8476879596710205, "margin_dpo/margin_std": 1.3994786739349365, "step": 220 }, { "epoch": 0.3340891912320484, "fcm_dpo/beta": 0.9400441646575928, "fcm_dpo/delta": -0.04620972275733948, "fcm_dpo/margin": 0.948039174079895, "fcm_dpo/q_t": 0.3494917154312134, "grad_norm": 225.3359832763672, "learning_rate": 4.22519752870528e-07, "logits/chosen": 0.1869061291217804, "logits/rejected": 0.13732600212097168, "logps/chosen": -60.603050231933594, "logps/ref_chosen": -59.09584045410156, "logps/ref_rejected": -88.64388275146484, "logps/rejected": -91.09913635253906, "loss": 1.0056, "margin_dpo/margin_mean": 0.948039174079895, "margin_dpo/margin_std": 1.4668171405792236, "step": 221 }, { "epoch": 0.3356009070294785, "fcm_dpo/beta": 0.9103920459747314, "fcm_dpo/delta": -0.15236330032348633, "fcm_dpo/margin": 1.084613561630249, "fcm_dpo/q_t": 0.31708869338035583, "grad_norm": 218.53720092773438, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 0.20063142478466034, "logits/rejected": 0.1383713036775589, "logps/chosen": -57.69280242919922, "logps/ref_chosen": -55.9976921081543, "logps/ref_rejected": -111.94727325439453, "logps/rejected": -114.72698974609375, "loss": 0.9187, "margin_dpo/margin_mean": 1.0846132040023804, "margin_dpo/margin_std": 1.366696834564209, "step": 222 }, { "epoch": 0.3371126228269085, "fcm_dpo/beta": 0.8663169741630554, "fcm_dpo/delta": -0.21477438509464264, "fcm_dpo/margin": 1.199501633644104, "fcm_dpo/q_t": 0.30882522463798523, "grad_norm": 179.276123046875, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 0.18869906663894653, "logits/rejected": 0.14491060376167297, "logps/chosen": -61.44518280029297, "logps/ref_chosen": -59.891422271728516, "logps/ref_rejected": -86.28954315185547, "logps/rejected": -89.04280090332031, "loss": 0.8567, "margin_dpo/margin_mean": 1.1995023488998413, "margin_dpo/margin_std": 1.3853929042816162, "step": 223 }, { "epoch": 0.3386243386243386, "fcm_dpo/beta": 0.8999690413475037, "fcm_dpo/delta": 0.2813396155834198, "fcm_dpo/margin": 0.6505717635154724, "fcm_dpo/q_t": 0.38824737071990967, "grad_norm": 239.49497985839844, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 0.19951647520065308, "logits/rejected": 0.1848064661026001, "logps/chosen": -65.77809143066406, "logps/ref_chosen": -64.04463195800781, "logps/ref_rejected": -75.05450439453125, "logps/rejected": -77.43852233886719, "loss": 1.0938, "margin_dpo/margin_mean": 0.6505719423294067, "margin_dpo/margin_std": 1.119499683380127, "step": 224 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.8856638669967651, "fcm_dpo/delta": -0.1330086588859558, "fcm_dpo/margin": 1.0904693603515625, "fcm_dpo/q_t": 0.33212995529174805, "grad_norm": 278.0043640136719, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.1784878522157669, "logits/rejected": 0.09040804207324982, "logps/chosen": -67.76234436035156, "logps/ref_chosen": -66.0958251953125, "logps/ref_rejected": -97.68675231933594, "logps/rejected": -100.4437255859375, "loss": 0.9711, "margin_dpo/margin_mean": 1.0904691219329834, "margin_dpo/margin_std": 1.5158851146697998, "step": 225 }, { "epoch": 0.3416477702191988, "fcm_dpo/beta": 0.888001561164856, "fcm_dpo/delta": 0.01624445617198944, "fcm_dpo/margin": 0.939960777759552, "fcm_dpo/q_t": 0.35010144114494324, "grad_norm": 175.6317138671875, "learning_rate": 4.176753170773052e-07, "logits/chosen": 0.1665615737438202, "logits/rejected": 0.1313961148262024, "logps/chosen": -53.05473327636719, "logps/ref_chosen": -51.4168701171875, "logps/ref_rejected": -66.30068969726562, "logps/rejected": -68.87850952148438, "loss": 1.02, "margin_dpo/margin_mean": 0.9399595260620117, "margin_dpo/margin_std": 1.428729772567749, "step": 226 }, { "epoch": 0.3431594860166289, "fcm_dpo/beta": 0.9011565446853638, "fcm_dpo/delta": 0.07179167866706848, "fcm_dpo/margin": 0.8698738217353821, "fcm_dpo/q_t": 0.3623507022857666, "grad_norm": 259.9079895019531, "learning_rate": 4.166922501290729e-07, "logits/chosen": 0.20261543989181519, "logits/rejected": 0.1713910549879074, "logps/chosen": -59.633522033691406, "logps/ref_chosen": -57.989776611328125, "logps/ref_rejected": -75.05464172363281, "logps/rejected": -77.5682601928711, "loss": 1.1078, "margin_dpo/margin_mean": 0.8698740005493164, "margin_dpo/margin_std": 1.5209307670593262, "step": 227 }, { "epoch": 0.34467120181405897, "fcm_dpo/beta": 0.905102014541626, "fcm_dpo/delta": -0.021704578772187233, "fcm_dpo/margin": 0.9609760046005249, "fcm_dpo/q_t": 0.34984922409057617, "grad_norm": 212.20846557617188, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 0.202199786901474, "logits/rejected": 0.14202827215194702, "logps/chosen": -57.361549377441406, "logps/ref_chosen": -55.55936813354492, "logps/ref_rejected": -77.02364349365234, "logps/rejected": -79.78680419921875, "loss": 1.0938, "margin_dpo/margin_mean": 0.9609757661819458, "margin_dpo/margin_std": 1.6258370876312256, "step": 228 }, { "epoch": 0.34618291761148906, "fcm_dpo/beta": 0.9085903167724609, "fcm_dpo/delta": 0.06304832547903061, "fcm_dpo/margin": 0.8716516494750977, "fcm_dpo/q_t": 0.35091277956962585, "grad_norm": 457.2859802246094, "learning_rate": 4.147121556398312e-07, "logits/chosen": 0.2513842284679413, "logits/rejected": 0.20943355560302734, "logps/chosen": -52.35901641845703, "logps/ref_chosen": -50.79466247558594, "logps/ref_rejected": -78.4474105834961, "logps/rejected": -80.88341522216797, "loss": 1.0559, "margin_dpo/margin_mean": 0.8716517090797424, "margin_dpo/margin_std": 1.377872109413147, "step": 229 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.9284258484840393, "fcm_dpo/delta": -0.008764654397964478, "fcm_dpo/margin": 0.9182890057563782, "fcm_dpo/q_t": 0.3512055575847626, "grad_norm": 235.49261474609375, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.13936060667037964, "logits/rejected": 0.1368224024772644, "logps/chosen": -58.469757080078125, "logps/ref_chosen": -56.729225158691406, "logps/ref_rejected": -62.99180603027344, "logps/rejected": -65.65062713623047, "loss": 1.095, "margin_dpo/margin_mean": 0.9182896614074707, "margin_dpo/margin_std": 1.5704759359359741, "step": 230 }, { "epoch": 0.3492063492063492, "fcm_dpo/beta": 0.8553475141525269, "fcm_dpo/delta": -0.3350568413734436, "fcm_dpo/margin": 1.3306918144226074, "fcm_dpo/q_t": 0.2868633270263672, "grad_norm": 190.81130981445312, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 0.12404580414295197, "logits/rejected": 0.10656890273094177, "logps/chosen": -74.32974243164062, "logps/ref_chosen": -72.59709930419922, "logps/ref_rejected": -86.2322998046875, "logps/rejected": -89.29563903808594, "loss": 0.7918, "margin_dpo/margin_mean": 1.3306909799575806, "margin_dpo/margin_std": 1.3364955186843872, "step": 231 }, { "epoch": 0.3507180650037793, "fcm_dpo/beta": 0.8403864502906799, "fcm_dpo/delta": -0.048685502260923386, "fcm_dpo/margin": 1.0624288320541382, "fcm_dpo/q_t": 0.3467335104942322, "grad_norm": 216.3763427734375, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 0.1653335839509964, "logits/rejected": 0.14129364490509033, "logps/chosen": -69.74419403076172, "logps/ref_chosen": -68.1185302734375, "logps/ref_rejected": -83.79415893554688, "logps/rejected": -86.48225402832031, "loss": 1.0269, "margin_dpo/margin_mean": 1.0624287128448486, "margin_dpo/margin_std": 1.6378672122955322, "step": 232 }, { "epoch": 0.35222978080120937, "fcm_dpo/beta": 0.8553488254547119, "fcm_dpo/delta": 0.21577247977256775, "fcm_dpo/margin": 0.7533851265907288, "fcm_dpo/q_t": 0.3966567814350128, "grad_norm": 225.2250213623047, "learning_rate": 4.106969024216348e-07, "logits/chosen": 0.1627584844827652, "logits/rejected": 0.11888204514980316, "logps/chosen": -57.22935485839844, "logps/ref_chosen": -55.070152282714844, "logps/ref_rejected": -66.61845397949219, "logps/rejected": -69.53103637695312, "loss": 1.1841, "margin_dpo/margin_mean": 0.7533849477767944, "margin_dpo/margin_std": 1.5512669086456299, "step": 233 }, { "epoch": 0.35374149659863946, "fcm_dpo/beta": 0.9342302083969116, "fcm_dpo/delta": 0.32500237226486206, "fcm_dpo/margin": 0.5758814811706543, "fcm_dpo/q_t": 0.4090895652770996, "grad_norm": 245.4609375, "learning_rate": 4.09681781007452e-07, "logits/chosen": 0.09976236522197723, "logits/rejected": 0.08703955262899399, "logps/chosen": -57.818267822265625, "logps/ref_chosen": -55.92589569091797, "logps/ref_rejected": -51.11608123779297, "logps/rejected": -53.584327697753906, "loss": 1.3011, "margin_dpo/margin_mean": 0.575881838798523, "margin_dpo/margin_std": 1.4566171169281006, "step": 234 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.889824628829956, "fcm_dpo/delta": -0.34530460834503174, "fcm_dpo/margin": 1.2960200309753418, "fcm_dpo/q_t": 0.27998316287994385, "grad_norm": 195.18954467773438, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.17580223083496094, "logits/rejected": 0.15976448357105255, "logps/chosen": -66.0561752319336, "logps/ref_chosen": -64.53972625732422, "logps/ref_rejected": -77.69151306152344, "logps/rejected": -80.50398254394531, "loss": 0.7405, "margin_dpo/margin_mean": 1.2960199117660522, "margin_dpo/margin_std": 1.2017241716384888, "step": 235 }, { "epoch": 0.35676492819349964, "fcm_dpo/beta": 0.8656524419784546, "fcm_dpo/delta": -0.07761366665363312, "fcm_dpo/margin": 1.0628429651260376, "fcm_dpo/q_t": 0.34958380460739136, "grad_norm": 225.35562133789062, "learning_rate": 4.076381667711306e-07, "logits/chosen": 0.11892463266849518, "logits/rejected": 0.1088462769985199, "logps/chosen": -73.25985717773438, "logps/ref_chosen": -71.15473937988281, "logps/ref_rejected": -84.88541412353516, "logps/rejected": -88.05337524414062, "loss": 1.0714, "margin_dpo/margin_mean": 1.062842607498169, "margin_dpo/margin_std": 1.7734475135803223, "step": 236 }, { "epoch": 0.35827664399092973, "fcm_dpo/beta": 0.858435869216919, "fcm_dpo/delta": 0.0019276365637779236, "fcm_dpo/margin": 0.986770510673523, "fcm_dpo/q_t": 0.3497011661529541, "grad_norm": 243.37867736816406, "learning_rate": 4.066097311132753e-07, "logits/chosen": 0.18724274635314941, "logits/rejected": 0.17509755492210388, "logps/chosen": -77.92250061035156, "logps/ref_chosen": -76.14201354980469, "logps/ref_rejected": -80.88479614257812, "logps/rejected": -83.65205383300781, "loss": 1.0883, "margin_dpo/margin_mean": 0.9867702722549438, "margin_dpo/margin_std": 1.6136043071746826, "step": 237 }, { "epoch": 0.35978835978835977, "fcm_dpo/beta": 0.8781789541244507, "fcm_dpo/delta": 0.06022172421216965, "fcm_dpo/margin": 0.9036314487457275, "fcm_dpo/q_t": 0.3595254719257355, "grad_norm": 921.947021484375, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 0.15353929996490479, "logits/rejected": 0.13579578697681427, "logps/chosen": -70.63078308105469, "logps/ref_chosen": -68.88484954833984, "logps/ref_rejected": -75.8946304321289, "logps/rejected": -78.54420471191406, "loss": 1.0725, "margin_dpo/margin_mean": 0.903631329536438, "margin_dpo/margin_std": 1.4686760902404785, "step": 238 }, { "epoch": 0.36130007558578986, "fcm_dpo/beta": 0.8707563877105713, "fcm_dpo/delta": -0.0933084636926651, "fcm_dpo/margin": 1.0726871490478516, "fcm_dpo/q_t": 0.33421483635902405, "grad_norm": 247.51217651367188, "learning_rate": 4.045397465551513e-07, "logits/chosen": 0.24642007052898407, "logits/rejected": 0.16015931963920593, "logps/chosen": -58.81086730957031, "logps/ref_chosen": -56.771827697753906, "logps/ref_rejected": -116.23050689697266, "logps/rejected": -119.34222412109375, "loss": 1.0295, "margin_dpo/margin_mean": 1.072688341140747, "margin_dpo/margin_std": 1.598010540008545, "step": 239 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.8458698987960815, "fcm_dpo/delta": -0.2129267305135727, "fcm_dpo/margin": 1.2287921905517578, "fcm_dpo/q_t": 0.31920289993286133, "grad_norm": 178.1742401123047, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.14184461534023285, "logits/rejected": 0.0741148293018341, "logps/chosen": -55.44900894165039, "logps/ref_chosen": -53.35411071777344, "logps/ref_rejected": -80.12019348144531, "logps/rejected": -83.44389343261719, "loss": 0.9645, "margin_dpo/margin_mean": 1.2287919521331787, "margin_dpo/margin_std": 1.6595501899719238, "step": 240 }, { "epoch": 0.36432350718065004, "fcm_dpo/beta": 0.8800208568572998, "fcm_dpo/delta": 0.3664669394493103, "fcm_dpo/margin": 0.5624747276306152, "fcm_dpo/q_t": 0.4117809534072876, "grad_norm": 292.58184814453125, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 0.12403697520494461, "logits/rejected": 0.09658272564411163, "logps/chosen": -73.85459899902344, "logps/ref_chosen": -71.89541625976562, "logps/ref_rejected": -83.03492736816406, "logps/rejected": -85.55657958984375, "loss": 1.2604, "margin_dpo/margin_mean": 0.5624747276306152, "margin_dpo/margin_std": 1.3676249980926514, "step": 241 }, { "epoch": 0.36583522297808013, "fcm_dpo/beta": 0.8379240036010742, "fcm_dpo/delta": -0.24741961061954498, "fcm_dpo/margin": 1.2643775939941406, "fcm_dpo/q_t": 0.31461301445961, "grad_norm": 174.297119140625, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 0.12286406755447388, "logits/rejected": 0.10437546670436859, "logps/chosen": -59.590065002441406, "logps/ref_chosen": -57.927433013916016, "logps/ref_rejected": -67.838623046875, "logps/rejected": -70.765625, "loss": 0.8866, "margin_dpo/margin_mean": 1.2643779516220093, "margin_dpo/margin_std": 1.5288593769073486, "step": 242 }, { "epoch": 0.3673469387755102, "fcm_dpo/beta": 0.8361135125160217, "fcm_dpo/delta": -0.02479562908411026, "fcm_dpo/margin": 1.042922854423523, "fcm_dpo/q_t": 0.3457787334918976, "grad_norm": 210.47018432617188, "learning_rate": 4.003481376353596e-07, "logits/chosen": 0.1957082599401474, "logits/rejected": 0.19097524881362915, "logps/chosen": -76.08808898925781, "logps/ref_chosen": -74.27667236328125, "logps/ref_rejected": -73.24340057373047, "logps/rejected": -76.09774017333984, "loss": 0.9948, "margin_dpo/margin_mean": 1.0429233312606812, "margin_dpo/margin_std": 1.5180702209472656, "step": 243 }, { "epoch": 0.3688586545729403, "fcm_dpo/beta": 0.8155351877212524, "fcm_dpo/delta": -0.20803546905517578, "fcm_dpo/margin": 1.2712355852127075, "fcm_dpo/q_t": 0.3006499111652374, "grad_norm": 164.11488342285156, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 0.18668177723884583, "logits/rejected": 0.11918094754219055, "logps/chosen": -55.324073791503906, "logps/ref_chosen": -53.36390686035156, "logps/ref_rejected": -71.10276794433594, "logps/rejected": -74.33417510986328, "loss": 0.8009, "margin_dpo/margin_mean": 1.2712348699569702, "margin_dpo/margin_std": 1.3225237131118774, "step": 244 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.7821507453918457, "fcm_dpo/delta": -0.22865894436836243, "fcm_dpo/margin": 1.3489384651184082, "fcm_dpo/q_t": 0.3079974055290222, "grad_norm": 259.376220703125, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.20032089948654175, "logits/rejected": 0.17909392714500427, "logps/chosen": -73.11966705322266, "logps/ref_chosen": -71.19510650634766, "logps/ref_rejected": -80.76235961914062, "logps/rejected": -84.03585815429688, "loss": 0.912, "margin_dpo/margin_mean": 1.3489389419555664, "margin_dpo/margin_std": 1.7213486433029175, "step": 245 }, { "epoch": 0.37188208616780044, "fcm_dpo/beta": 0.7464067935943604, "fcm_dpo/delta": -0.1680677980184555, "fcm_dpo/margin": 1.3396670818328857, "fcm_dpo/q_t": 0.32120266556739807, "grad_norm": 182.52078247070312, "learning_rate": 3.971601703742932e-07, "logits/chosen": 0.19234727323055267, "logits/rejected": 0.14638212323188782, "logps/chosen": -73.82595825195312, "logps/ref_chosen": -71.62104797363281, "logps/ref_rejected": -94.03392028808594, "logps/rejected": -97.5784912109375, "loss": 0.9033, "margin_dpo/margin_mean": 1.3396670818328857, "margin_dpo/margin_std": 1.6984140872955322, "step": 246 }, { "epoch": 0.37339380196523053, "fcm_dpo/beta": 0.7954495549201965, "fcm_dpo/delta": 0.36253783106803894, "fcm_dpo/margin": 0.6254063844680786, "fcm_dpo/q_t": 0.40435123443603516, "grad_norm": 241.6053924560547, "learning_rate": 3.960892420986177e-07, "logits/chosen": 0.1911957561969757, "logits/rejected": 0.1811983436346054, "logps/chosen": -82.40559387207031, "logps/ref_chosen": -80.02254486083984, "logps/ref_rejected": -89.22705841064453, "logps/rejected": -92.23551940917969, "loss": 1.273, "margin_dpo/margin_mean": 0.6254061460494995, "margin_dpo/margin_std": 1.573524832725525, "step": 247 }, { "epoch": 0.3749055177626606, "fcm_dpo/beta": 0.8019500374794006, "fcm_dpo/delta": -0.030305165797472, "fcm_dpo/margin": 1.0933787822723389, "fcm_dpo/q_t": 0.3430135250091553, "grad_norm": 200.2981719970703, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 0.1514793336391449, "logits/rejected": 0.17173510789871216, "logps/chosen": -67.46146392822266, "logps/ref_chosen": -65.37796020507812, "logps/ref_rejected": -61.365787506103516, "logps/rejected": -64.54267120361328, "loss": 0.9833, "margin_dpo/margin_mean": 1.0933786630630493, "margin_dpo/margin_std": 1.5513389110565186, "step": 248 }, { "epoch": 0.3764172335600907, "fcm_dpo/beta": 0.8616625070571899, "fcm_dpo/delta": 0.4642504155635834, "fcm_dpo/margin": 0.4600977599620819, "fcm_dpo/q_t": 0.41762542724609375, "grad_norm": 264.34185791015625, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 0.12900127470493317, "logits/rejected": 0.15251630544662476, "logps/chosen": -77.03469848632812, "logps/ref_chosen": -74.60145568847656, "logps/ref_rejected": -63.79338455200195, "logps/rejected": -66.68672180175781, "loss": 1.4297, "margin_dpo/margin_mean": 0.4600982069969177, "margin_dpo/margin_std": 1.5953214168548584, "step": 249 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.8493759632110596, "fcm_dpo/delta": -0.23597529530525208, "fcm_dpo/margin": 1.2496278285980225, "fcm_dpo/q_t": 0.3237505853176117, "grad_norm": 190.65785217285156, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.24538511037826538, "logits/rejected": 0.21211454272270203, "logps/chosen": -64.02789306640625, "logps/ref_chosen": -61.938209533691406, "logps/ref_rejected": -72.21602630615234, "logps/rejected": -75.55533599853516, "loss": 0.9779, "margin_dpo/margin_mean": 1.2496273517608643, "margin_dpo/margin_std": 1.8356934785842896, "step": 250 }, { "epoch": 0.3794406651549509, "fcm_dpo/beta": 0.8356133103370667, "fcm_dpo/delta": 0.06252037733793259, "fcm_dpo/margin": 0.9481085538864136, "fcm_dpo/q_t": 0.37249547243118286, "grad_norm": 214.85369873046875, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 0.22139692306518555, "logits/rejected": 0.19079577922821045, "logps/chosen": -69.10639953613281, "logps/ref_chosen": -66.85694885253906, "logps/ref_rejected": -84.83396911621094, "logps/rejected": -88.03153991699219, "loss": 1.0963, "margin_dpo/margin_mean": 0.9481081962585449, "margin_dpo/margin_std": 1.7044211626052856, "step": 251 }, { "epoch": 0.38095238095238093, "fcm_dpo/beta": 0.861457109451294, "fcm_dpo/delta": -0.012143999338150024, "fcm_dpo/margin": 0.9872934222221375, "fcm_dpo/q_t": 0.337787389755249, "grad_norm": 205.43638610839844, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 0.1312238872051239, "logits/rejected": 0.08839388191699982, "logps/chosen": -58.39625549316406, "logps/ref_chosen": -56.22393035888672, "logps/ref_rejected": -77.1136245727539, "logps/rejected": -80.27323913574219, "loss": 1.1382, "margin_dpo/margin_mean": 0.98729407787323, "margin_dpo/margin_std": 1.6986348628997803, "step": 252 }, { "epoch": 0.382464096749811, "fcm_dpo/beta": 0.8236818909645081, "fcm_dpo/delta": -0.03074963390827179, "fcm_dpo/margin": 1.0598118305206299, "fcm_dpo/q_t": 0.34884679317474365, "grad_norm": 161.52169799804688, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 0.19195407629013062, "logits/rejected": 0.1622268557548523, "logps/chosen": -54.37004089355469, "logps/ref_chosen": -52.21001434326172, "logps/ref_rejected": -58.75764846801758, "logps/rejected": -61.97748565673828, "loss": 0.9898, "margin_dpo/margin_mean": 1.0598115921020508, "margin_dpo/margin_std": 1.498337745666504, "step": 253 }, { "epoch": 0.3839758125472411, "fcm_dpo/beta": 0.815079927444458, "fcm_dpo/delta": -0.1290552169084549, "fcm_dpo/margin": 1.1844369173049927, "fcm_dpo/q_t": 0.3433462381362915, "grad_norm": 219.98338317871094, "learning_rate": 3.884800159665276e-07, "logits/chosen": 0.1558561623096466, "logits/rejected": 0.11703409254550934, "logps/chosen": -67.93498229980469, "logps/ref_chosen": -65.63632202148438, "logps/ref_rejected": -82.34425354003906, "logps/rejected": -85.82734680175781, "loss": 1.0388, "margin_dpo/margin_mean": 1.184436559677124, "margin_dpo/margin_std": 1.8613649606704712, "step": 254 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.809474527835846, "fcm_dpo/delta": 0.004410445690155029, "fcm_dpo/margin": 1.0441983938217163, "fcm_dpo/q_t": 0.3516331613063812, "grad_norm": 200.7998046875, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.1561092585325241, "logits/rejected": 0.12892277538776398, "logps/chosen": -69.9217758178711, "logps/ref_chosen": -67.91108703613281, "logps/ref_rejected": -83.89114379882812, "logps/rejected": -86.9460220336914, "loss": 1.0398, "margin_dpo/margin_mean": 1.0441988706588745, "margin_dpo/margin_std": 1.6480119228363037, "step": 255 }, { "epoch": 0.3869992441421013, "fcm_dpo/beta": 0.804902195930481, "fcm_dpo/delta": -0.09969654679298401, "fcm_dpo/margin": 1.1683741807937622, "fcm_dpo/q_t": 0.33634892106056213, "grad_norm": 218.11962890625, "learning_rate": 3.862706303320329e-07, "logits/chosen": 0.12878583371639252, "logits/rejected": 0.08810890465974808, "logps/chosen": -65.82264709472656, "logps/ref_chosen": -63.49998474121094, "logps/ref_rejected": -90.77104187011719, "logps/rejected": -94.2620849609375, "loss": 1.0063, "margin_dpo/margin_mean": 1.1683729887008667, "margin_dpo/margin_std": 1.776692271232605, "step": 256 }, { "epoch": 0.3885109599395314, "fcm_dpo/beta": 0.7456899285316467, "fcm_dpo/delta": -0.395789235830307, "fcm_dpo/margin": 1.5996819734573364, "fcm_dpo/q_t": 0.3006623685359955, "grad_norm": 192.77134704589844, "learning_rate": 3.851602043638994e-07, "logits/chosen": 0.16418027877807617, "logits/rejected": 0.11701178550720215, "logps/chosen": -72.88190460205078, "logps/ref_chosen": -70.60064697265625, "logps/ref_rejected": -108.58313751220703, "logps/rejected": -112.46407318115234, "loss": 0.9113, "margin_dpo/margin_mean": 1.5996819734573364, "margin_dpo/margin_std": 2.1490330696105957, "step": 257 }, { "epoch": 0.3900226757369615, "fcm_dpo/beta": 0.7447835206985474, "fcm_dpo/delta": -0.006751693785190582, "fcm_dpo/margin": 1.1468995809555054, "fcm_dpo/q_t": 0.3328554034233093, "grad_norm": 184.07823181152344, "learning_rate": 3.840459976743023e-07, "logits/chosen": 0.20480632781982422, "logits/rejected": 0.16838130354881287, "logps/chosen": -61.762428283691406, "logps/ref_chosen": -59.25416564941406, "logps/ref_rejected": -85.58709716796875, "logps/rejected": -89.24226379394531, "loss": 0.9089, "margin_dpo/margin_mean": 1.1468994617462158, "margin_dpo/margin_std": 1.3625071048736572, "step": 258 }, { "epoch": 0.3915343915343915, "fcm_dpo/beta": 0.6809320449829102, "fcm_dpo/delta": -0.46764737367630005, "fcm_dpo/margin": 1.8398231267929077, "fcm_dpo/q_t": 0.2804723381996155, "grad_norm": 148.42945861816406, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 0.12444441020488739, "logits/rejected": 0.049020834267139435, "logps/chosen": -67.50753021240234, "logps/ref_chosen": -65.43487548828125, "logps/ref_rejected": -95.41731262207031, "logps/rejected": -99.32978820800781, "loss": 0.7919, "margin_dpo/margin_mean": 1.8398233652114868, "margin_dpo/margin_std": 1.9219727516174316, "step": 259 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.6663204431533813, "fcm_dpo/delta": -0.012575246393680573, "fcm_dpo/margin": 1.2920453548431396, "fcm_dpo/q_t": 0.3447534441947937, "grad_norm": 153.9876251220703, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.16772714257240295, "logits/rejected": 0.10460503399372101, "logps/chosen": -51.540916442871094, "logps/ref_chosen": -49.08958435058594, "logps/ref_rejected": -79.01708221435547, "logps/rejected": -82.76045989990234, "loss": 0.9871, "margin_dpo/margin_mean": 1.292044997215271, "margin_dpo/margin_std": 1.8242688179016113, "step": 260 }, { "epoch": 0.3945578231292517, "fcm_dpo/beta": 0.6728366613388062, "fcm_dpo/delta": 0.06468392163515091, "fcm_dpo/margin": 1.1746070384979248, "fcm_dpo/q_t": 0.3549262285232544, "grad_norm": 185.98231506347656, "learning_rate": 3.806810054678331e-07, "logits/chosen": 0.08221499621868134, "logits/rejected": 0.09243050217628479, "logps/chosen": -73.12066650390625, "logps/ref_chosen": -70.87239074707031, "logps/ref_rejected": -65.01522064208984, "logps/rejected": -68.43810272216797, "loss": 1.0405, "margin_dpo/margin_mean": 1.174607753753662, "margin_dpo/margin_std": 1.8360176086425781, "step": 261 }, { "epoch": 0.3960695389266818, "fcm_dpo/beta": 0.6801202297210693, "fcm_dpo/delta": -0.019086986780166626, "fcm_dpo/margin": 1.2746036052703857, "fcm_dpo/q_t": 0.34386593103408813, "grad_norm": 170.8404083251953, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 0.20572137832641602, "logits/rejected": 0.16200119256973267, "logps/chosen": -70.40789794921875, "logps/ref_chosen": -67.8706283569336, "logps/ref_rejected": -88.7205810546875, "logps/rejected": -92.53245544433594, "loss": 0.9448, "margin_dpo/margin_mean": 1.274604320526123, "margin_dpo/margin_std": 1.7153469324111938, "step": 262 }, { "epoch": 0.3975812547241119, "fcm_dpo/beta": 0.6981139779090881, "fcm_dpo/delta": 0.134951651096344, "fcm_dpo/margin": 1.0312126874923706, "fcm_dpo/q_t": 0.36829593777656555, "grad_norm": 167.5323486328125, "learning_rate": 3.784193478933516e-07, "logits/chosen": 0.1324343979358673, "logits/rejected": 0.058942101895809174, "logps/chosen": -57.610633850097656, "logps/ref_chosen": -55.194583892822266, "logps/ref_rejected": -80.54048156738281, "logps/rejected": -83.98774719238281, "loss": 1.0934, "margin_dpo/margin_mean": 1.031212329864502, "margin_dpo/margin_std": 1.735314130783081, "step": 263 }, { "epoch": 0.39909297052154197, "fcm_dpo/beta": 0.6857741475105286, "fcm_dpo/delta": -0.11159483343362808, "fcm_dpo/margin": 1.386823058128357, "fcm_dpo/q_t": 0.34189584851264954, "grad_norm": 195.39012145996094, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 0.10226079821586609, "logits/rejected": 0.06431174278259277, "logps/chosen": -85.52044677734375, "logps/ref_chosen": -83.17068481445312, "logps/ref_rejected": -88.33625793457031, "logps/rejected": -92.07284545898438, "loss": 1.0629, "margin_dpo/margin_mean": 1.3868227005004883, "margin_dpo/margin_std": 2.2063112258911133, "step": 264 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.6698117256164551, "fcm_dpo/delta": -0.11223047971725464, "fcm_dpo/margin": 1.4208091497421265, "fcm_dpo/q_t": 0.3248485028743744, "grad_norm": 194.76893615722656, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.22791503369808197, "logits/rejected": 0.17402033507823944, "logps/chosen": -54.32725524902344, "logps/ref_chosen": -51.66284942626953, "logps/ref_rejected": -67.1720962524414, "logps/rejected": -71.25730895996094, "loss": 0.9819, "margin_dpo/margin_mean": 1.4208089113235474, "margin_dpo/margin_std": 1.9814412593841553, "step": 265 }, { "epoch": 0.4021164021164021, "fcm_dpo/beta": 0.6639435291290283, "fcm_dpo/delta": -0.014321202412247658, "fcm_dpo/margin": 1.299785852432251, "fcm_dpo/q_t": 0.3548884093761444, "grad_norm": 184.37330627441406, "learning_rate": 3.75e-07, "logits/chosen": 0.15636898577213287, "logits/rejected": 0.1033758819103241, "logps/chosen": -59.907691955566406, "logps/ref_chosen": -57.45049285888672, "logps/ref_rejected": -77.60826110839844, "logps/rejected": -81.36524200439453, "loss": 1.0529, "margin_dpo/margin_mean": 1.299785852432251, "margin_dpo/margin_std": 2.1626858711242676, "step": 266 }, { "epoch": 0.4036281179138322, "fcm_dpo/beta": 0.6879450082778931, "fcm_dpo/delta": 0.1648833006620407, "fcm_dpo/margin": 1.0033197402954102, "fcm_dpo/q_t": 0.3694334626197815, "grad_norm": 146.89279174804688, "learning_rate": 3.738531817228131e-07, "logits/chosen": 0.1628885567188263, "logits/rejected": 0.15053339302539825, "logps/chosen": -57.460472106933594, "logps/ref_chosen": -55.03535079956055, "logps/ref_rejected": -66.0953369140625, "logps/rejected": -69.5237808227539, "loss": 1.1668, "margin_dpo/margin_mean": 1.0033204555511475, "margin_dpo/margin_std": 1.9306485652923584, "step": 267 }, { "epoch": 0.4051398337112623, "fcm_dpo/beta": 0.6884621381759644, "fcm_dpo/delta": 0.005110621452331543, "fcm_dpo/margin": 1.2268198728561401, "fcm_dpo/q_t": 0.35186707973480225, "grad_norm": 164.82493591308594, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 0.08974149078130722, "logits/rejected": 0.07196816802024841, "logps/chosen": -67.441162109375, "logps/ref_chosen": -65.07174682617188, "logps/ref_rejected": -71.42485809326172, "logps/rejected": -75.0210952758789, "loss": 0.9971, "margin_dpo/margin_mean": 1.2268199920654297, "margin_dpo/margin_std": 1.8398901224136353, "step": 268 }, { "epoch": 0.40665154950869237, "fcm_dpo/beta": 0.6847081184387207, "fcm_dpo/delta": -0.12639540433883667, "fcm_dpo/margin": 1.4018278121948242, "fcm_dpo/q_t": 0.3242768943309784, "grad_norm": 167.1279754638672, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 0.233078271150589, "logits/rejected": 0.1958373486995697, "logps/chosen": -69.78608703613281, "logps/ref_chosen": -67.1362075805664, "logps/ref_rejected": -82.55778503417969, "logps/rejected": -86.6094970703125, "loss": 0.9398, "margin_dpo/margin_mean": 1.4018274545669556, "margin_dpo/margin_std": 1.7281426191329956, "step": 269 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.6847708821296692, "fcm_dpo/delta": 0.15696696937084198, "fcm_dpo/margin": 1.0275520086288452, "fcm_dpo/q_t": 0.36070716381073, "grad_norm": 188.62623596191406, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.20225092768669128, "logits/rejected": 0.15019787847995758, "logps/chosen": -69.22811889648438, "logps/ref_chosen": -66.6886978149414, "logps/ref_rejected": -85.16129302978516, "logps/rejected": -88.728271484375, "loss": 1.0767, "margin_dpo/margin_mean": 1.0275520086288452, "margin_dpo/margin_std": 1.7189823389053345, "step": 270 }, { "epoch": 0.40967498110355255, "fcm_dpo/beta": 0.7114520072937012, "fcm_dpo/delta": 0.24464687705039978, "fcm_dpo/margin": 0.8730853199958801, "fcm_dpo/q_t": 0.40264761447906494, "grad_norm": 210.4980926513672, "learning_rate": 3.692315864546635e-07, "logits/chosen": 0.20817291736602783, "logits/rejected": 0.16300782561302185, "logps/chosen": -75.00304412841797, "logps/ref_chosen": -72.40754699707031, "logps/ref_rejected": -92.06311798095703, "logps/rejected": -95.53170776367188, "loss": 1.3161, "margin_dpo/margin_mean": 0.8730854392051697, "margin_dpo/margin_std": 2.194467544555664, "step": 271 }, { "epoch": 0.41118669690098264, "fcm_dpo/beta": 0.6955171823501587, "fcm_dpo/delta": -0.3055380582809448, "fcm_dpo/margin": 1.6134432554244995, "fcm_dpo/q_t": 0.30224862694740295, "grad_norm": 148.1903839111328, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 0.1571994125843048, "logits/rejected": 0.15734095871448517, "logps/chosen": -68.978759765625, "logps/ref_chosen": -66.60140228271484, "logps/ref_rejected": -67.74340057373047, "logps/rejected": -71.73419952392578, "loss": 0.8169, "margin_dpo/margin_mean": 1.613443374633789, "margin_dpo/margin_std": 1.8132987022399902, "step": 272 }, { "epoch": 0.4126984126984127, "fcm_dpo/beta": 0.673996090888977, "fcm_dpo/delta": -0.1313486248254776, "fcm_dpo/margin": 1.43618905544281, "fcm_dpo/q_t": 0.3279706537723541, "grad_norm": 155.58108520507812, "learning_rate": 3.669006483223828e-07, "logits/chosen": 0.2045176476240158, "logits/rejected": 0.1588110476732254, "logps/chosen": -60.12089538574219, "logps/ref_chosen": -57.35487747192383, "logps/ref_rejected": -84.17168426513672, "logps/rejected": -88.37388610839844, "loss": 0.9412, "margin_dpo/margin_mean": 1.4361895322799683, "margin_dpo/margin_std": 1.9495878219604492, "step": 273 }, { "epoch": 0.41421012849584277, "fcm_dpo/beta": 0.6581387519836426, "fcm_dpo/delta": -0.054179951548576355, "fcm_dpo/margin": 1.366485595703125, "fcm_dpo/q_t": 0.3364384174346924, "grad_norm": 147.8866424560547, "learning_rate": 3.657302579891656e-07, "logits/chosen": 0.09074236452579498, "logits/rejected": 0.07023008912801743, "logps/chosen": -62.19242858886719, "logps/ref_chosen": -59.64149475097656, "logps/ref_rejected": -68.29348754882812, "logps/rejected": -72.21089935302734, "loss": 1.0003, "margin_dpo/margin_mean": 1.3664849996566772, "margin_dpo/margin_std": 1.996286392211914, "step": 274 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.6430532336235046, "fcm_dpo/delta": -0.14487166702747345, "fcm_dpo/margin": 1.5252916812896729, "fcm_dpo/q_t": 0.33012163639068604, "grad_norm": 146.9214324951172, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.16096563637256622, "logits/rejected": 0.09621478617191315, "logps/chosen": -55.98505783081055, "logps/ref_chosen": -53.26664352416992, "logps/ref_rejected": -73.84062194824219, "logps/rejected": -78.08432006835938, "loss": 0.9415, "margin_dpo/margin_mean": 1.5252916812896729, "margin_dpo/margin_std": 2.0830984115600586, "step": 275 }, { "epoch": 0.41723356009070295, "fcm_dpo/beta": 0.6220403909683228, "fcm_dpo/delta": -0.1508302390575409, "fcm_dpo/margin": 1.5846750736236572, "fcm_dpo/q_t": 0.31243956089019775, "grad_norm": 129.06207275390625, "learning_rate": 3.633797984793294e-07, "logits/chosen": 0.11567900329828262, "logits/rejected": 0.0872059017419815, "logps/chosen": -55.34302520751953, "logps/ref_chosen": -53.02079772949219, "logps/ref_rejected": -61.56678771972656, "logps/rejected": -65.47369384765625, "loss": 0.8598, "margin_dpo/margin_mean": 1.5846753120422363, "margin_dpo/margin_std": 1.8111482858657837, "step": 276 }, { "epoch": 0.41874527588813304, "fcm_dpo/beta": 0.6342289447784424, "fcm_dpo/delta": 0.08050861954689026, "fcm_dpo/margin": 1.2154932022094727, "fcm_dpo/q_t": 0.3719508647918701, "grad_norm": 160.6136474609375, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 0.20484420657157898, "logits/rejected": 0.21808558702468872, "logps/chosen": -74.26129913330078, "logps/ref_chosen": -71.43299102783203, "logps/ref_rejected": -67.65852355957031, "logps/rejected": -71.70232391357422, "loss": 1.0903, "margin_dpo/margin_mean": 1.2154929637908936, "margin_dpo/margin_std": 2.1663973331451416, "step": 277 }, { "epoch": 0.42025699168556313, "fcm_dpo/beta": 0.6268476247787476, "fcm_dpo/delta": -0.13949307799339294, "fcm_dpo/margin": 1.5503504276275635, "fcm_dpo/q_t": 0.31966453790664673, "grad_norm": 143.23236083984375, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 0.10400999337434769, "logits/rejected": 0.07079809904098511, "logps/chosen": -69.66726684570312, "logps/ref_chosen": -67.11076354980469, "logps/ref_rejected": -88.74851989746094, "logps/rejected": -92.8553695678711, "loss": 0.9289, "margin_dpo/margin_mean": 1.5503500699996948, "margin_dpo/margin_std": 1.8908162117004395, "step": 278 }, { "epoch": 0.4217687074829932, "fcm_dpo/beta": 0.5942553877830505, "fcm_dpo/delta": -0.2251128852367401, "fcm_dpo/margin": 1.7702274322509766, "fcm_dpo/q_t": 0.2947637438774109, "grad_norm": 111.46631622314453, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 0.1307368278503418, "logits/rejected": 0.09648337960243225, "logps/chosen": -56.71268081665039, "logps/ref_chosen": -54.49748611450195, "logps/ref_rejected": -70.42373657226562, "logps/rejected": -74.4091567993164, "loss": 0.7973, "margin_dpo/margin_mean": 1.7702279090881348, "margin_dpo/margin_std": 1.7679616212844849, "step": 279 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.5645568370819092, "fcm_dpo/delta": -0.19094182550907135, "fcm_dpo/margin": 1.8066459894180298, "fcm_dpo/q_t": 0.29915091395378113, "grad_norm": 107.831298828125, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.19378116726875305, "logits/rejected": 0.15992864966392517, "logps/chosen": -62.757484436035156, "logps/ref_chosen": -60.43281173706055, "logps/ref_rejected": -78.39051818847656, "logps/rejected": -82.52183532714844, "loss": 0.784, "margin_dpo/margin_mean": 1.806646704673767, "margin_dpo/margin_std": 1.7414586544036865, "step": 280 }, { "epoch": 0.42479213907785335, "fcm_dpo/beta": 0.5530017614364624, "fcm_dpo/delta": -0.0612470768392086, "fcm_dpo/margin": 1.6353685855865479, "fcm_dpo/q_t": 0.3299909234046936, "grad_norm": 115.87896728515625, "learning_rate": 3.574487280222929e-07, "logits/chosen": 0.15672294795513153, "logits/rejected": 0.15733087062835693, "logps/chosen": -62.75849914550781, "logps/ref_chosen": -60.2820930480957, "logps/ref_rejected": -62.04009246826172, "logps/rejected": -66.15187072753906, "loss": 0.9472, "margin_dpo/margin_mean": 1.6353683471679688, "margin_dpo/margin_std": 2.125490188598633, "step": 281 }, { "epoch": 0.42630385487528344, "fcm_dpo/beta": 0.5638306140899658, "fcm_dpo/delta": -0.040468767285346985, "fcm_dpo/margin": 1.559096097946167, "fcm_dpo/q_t": 0.3394070267677307, "grad_norm": 134.23129272460938, "learning_rate": 3.562533640600075e-07, "logits/chosen": 0.11245854943990707, "logits/rejected": 0.0724257230758667, "logps/chosen": -63.39463806152344, "logps/ref_chosen": -60.623924255371094, "logps/ref_rejected": -68.67400360107422, "logps/rejected": -73.00382995605469, "loss": 0.9555, "margin_dpo/margin_mean": 1.5590956211090088, "margin_dpo/margin_std": 1.9817804098129272, "step": 282 }, { "epoch": 0.42781557067271353, "fcm_dpo/beta": 0.5626036524772644, "fcm_dpo/delta": 0.08880946785211563, "fcm_dpo/margin": 1.361499309539795, "fcm_dpo/q_t": 0.3661307096481323, "grad_norm": 173.3498992919922, "learning_rate": 3.550550279627215e-07, "logits/chosen": 0.12812848389148712, "logits/rejected": 0.06249154359102249, "logps/chosen": -70.5411376953125, "logps/ref_chosen": -67.64775085449219, "logps/ref_rejected": -99.96835327148438, "logps/rejected": -104.2232437133789, "loss": 1.0685, "margin_dpo/margin_mean": 1.3614987134933472, "margin_dpo/margin_std": 2.2333407402038574, "step": 283 }, { "epoch": 0.4293272864701436, "fcm_dpo/beta": 0.5531774163246155, "fcm_dpo/delta": -0.07725730538368225, "fcm_dpo/margin": 1.6630501747131348, "fcm_dpo/q_t": 0.3319721221923828, "grad_norm": 116.7216567993164, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 0.1991874873638153, "logits/rejected": 0.14788982272148132, "logps/chosen": -59.54648971557617, "logps/ref_chosen": -56.96742630004883, "logps/ref_rejected": -86.36236572265625, "logps/rejected": -90.60448455810547, "loss": 0.9141, "margin_dpo/margin_mean": 1.663050651550293, "margin_dpo/margin_std": 2.190598487854004, "step": 284 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.565268337726593, "fcm_dpo/delta": 0.11996881663799286, "fcm_dpo/margin": 1.304656982421875, "fcm_dpo/q_t": 0.36226344108581543, "grad_norm": 138.88722229003906, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.2119036614894867, "logits/rejected": 0.19066905975341797, "logps/chosen": -74.73896789550781, "logps/ref_chosen": -71.65611267089844, "logps/ref_rejected": -81.63829803466797, "logps/rejected": -86.02581787109375, "loss": 1.0085, "margin_dpo/margin_mean": 1.3046571016311646, "margin_dpo/margin_std": 1.9224095344543457, "step": 285 }, { "epoch": 0.4323507180650038, "fcm_dpo/beta": 0.5349950194358826, "fcm_dpo/delta": -0.34029078483581543, "fcm_dpo/margin": 2.147865056991577, "fcm_dpo/q_t": 0.28494542837142944, "grad_norm": 113.43247985839844, "learning_rate": 3.514425224712835e-07, "logits/chosen": 0.12417186796665192, "logits/rejected": 0.05310012400150299, "logps/chosen": -63.82947540283203, "logps/ref_chosen": -61.07952117919922, "logps/ref_rejected": -91.28128051757812, "logps/rejected": -96.17909240722656, "loss": 0.822, "margin_dpo/margin_mean": 2.14786434173584, "margin_dpo/margin_std": 2.381056308746338, "step": 286 }, { "epoch": 0.43386243386243384, "fcm_dpo/beta": 0.5180379152297974, "fcm_dpo/delta": -0.15914088487625122, "fcm_dpo/margin": 1.9180022478103638, "fcm_dpo/q_t": 0.30951881408691406, "grad_norm": 104.51337432861328, "learning_rate": 3.502326338516534e-07, "logits/chosen": 0.15763196349143982, "logits/rejected": 0.12553146481513977, "logps/chosen": -48.694236755371094, "logps/ref_chosen": -46.035789489746094, "logps/ref_rejected": -59.95293426513672, "logps/rejected": -64.52938079833984, "loss": 0.8438, "margin_dpo/margin_mean": 1.9180022478103638, "margin_dpo/margin_std": 2.110712766647339, "step": 287 }, { "epoch": 0.43537414965986393, "fcm_dpo/beta": 0.5219178795814514, "fcm_dpo/delta": 0.1593482941389084, "fcm_dpo/margin": 1.34527587890625, "fcm_dpo/q_t": 0.37496429681777954, "grad_norm": 154.528076171875, "learning_rate": 3.490199415097892e-07, "logits/chosen": 0.07364333420991898, "logits/rejected": 0.03298754245042801, "logps/chosen": -68.56511688232422, "logps/ref_chosen": -65.3908462524414, "logps/ref_rejected": -88.53607940673828, "logps/rejected": -93.05561828613281, "loss": 1.0938, "margin_dpo/margin_mean": 1.345275640487671, "margin_dpo/margin_std": 2.366204261779785, "step": 288 }, { "epoch": 0.436885865457294, "fcm_dpo/beta": 0.5325125455856323, "fcm_dpo/delta": 0.054238371551036835, "fcm_dpo/margin": 1.5024845600128174, "fcm_dpo/q_t": 0.36477959156036377, "grad_norm": 118.85664367675781, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 0.22808194160461426, "logits/rejected": 0.20086199045181274, "logps/chosen": -57.79352569580078, "logps/ref_chosen": -54.5936279296875, "logps/ref_rejected": -67.20855712890625, "logps/rejected": -71.91093444824219, "loss": 1.0486, "margin_dpo/margin_mean": 1.5024845600128174, "margin_dpo/margin_std": 2.4532387256622314, "step": 289 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.5275927782058716, "fcm_dpo/delta": -0.057161666452884674, "fcm_dpo/margin": 1.7088825702667236, "fcm_dpo/q_t": 0.3398800194263458, "grad_norm": 145.15016174316406, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.2324827015399933, "logits/rejected": 0.1766033172607422, "logps/chosen": -64.75177001953125, "logps/ref_chosen": -61.38457489013672, "logps/ref_rejected": -91.92778015136719, "logps/rejected": -97.00385284423828, "loss": 0.9761, "margin_dpo/margin_mean": 1.7088818550109863, "margin_dpo/margin_std": 2.4079670906066895, "step": 290 }, { "epoch": 0.4399092970521542, "fcm_dpo/beta": 0.5278656482696533, "fcm_dpo/delta": -0.10052298754453659, "fcm_dpo/margin": 1.7776007652282715, "fcm_dpo/q_t": 0.32905471324920654, "grad_norm": 116.88301086425781, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 0.3069065809249878, "logits/rejected": 0.25807487964630127, "logps/chosen": -53.91313171386719, "logps/ref_chosen": -50.863037109375, "logps/ref_rejected": -82.20868682861328, "logps/rejected": -87.03638458251953, "loss": 0.9582, "margin_dpo/margin_mean": 1.7776010036468506, "margin_dpo/margin_std": 2.3519887924194336, "step": 291 }, { "epoch": 0.4414210128495843, "fcm_dpo/beta": 0.5298629403114319, "fcm_dpo/delta": 0.10786361247301102, "fcm_dpo/margin": 1.4120714664459229, "fcm_dpo/q_t": 0.3546355962753296, "grad_norm": 154.5143280029297, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 0.22030502557754517, "logits/rejected": 0.17777778208255768, "logps/chosen": -67.32970428466797, "logps/ref_chosen": -64.34888458251953, "logps/ref_rejected": -72.86434173583984, "logps/rejected": -77.25723266601562, "loss": 1.0112, "margin_dpo/margin_mean": 1.4120711088180542, "margin_dpo/margin_std": 2.0962743759155273, "step": 292 }, { "epoch": 0.4429327286470144, "fcm_dpo/beta": 0.515381932258606, "fcm_dpo/delta": -0.20761936902999878, "fcm_dpo/margin": 2.0112385749816895, "fcm_dpo/q_t": 0.30622002482414246, "grad_norm": 96.73804473876953, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 0.24665296077728271, "logits/rejected": 0.18240293860435486, "logps/chosen": -58.349090576171875, "logps/ref_chosen": -54.869468688964844, "logps/ref_rejected": -81.858642578125, "logps/rejected": -87.34951782226562, "loss": 0.8824, "margin_dpo/margin_mean": 2.0112390518188477, "margin_dpo/margin_std": 2.4112563133239746, "step": 293 }, { "epoch": 0.4444444444444444, "fcm_dpo/beta": 0.49291643500328064, "fcm_dpo/delta": -0.11736033111810684, "fcm_dpo/margin": 1.9326649904251099, "fcm_dpo/q_t": 0.3255431652069092, "grad_norm": 89.73761749267578, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 0.1944410502910614, "logits/rejected": 0.16065473854541779, "logps/chosen": -59.7293701171875, "logps/ref_chosen": -56.670902252197266, "logps/ref_rejected": -70.32819366455078, "logps/rejected": -75.31932067871094, "loss": 0.8748, "margin_dpo/margin_mean": 1.932664155960083, "margin_dpo/margin_std": 2.3227932453155518, "step": 294 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.5035637617111206, "fcm_dpo/delta": 0.06118401512503624, "fcm_dpo/margin": 1.5732038021087646, "fcm_dpo/q_t": 0.35997217893600464, "grad_norm": 111.81068420410156, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.21110177040100098, "logits/rejected": 0.14388032257556915, "logps/chosen": -53.704856872558594, "logps/ref_chosen": -50.40088653564453, "logps/ref_rejected": -83.43521881103516, "logps/rejected": -88.31239318847656, "loss": 1.0204, "margin_dpo/margin_mean": 1.5732042789459229, "margin_dpo/margin_std": 2.3596243858337402, "step": 295 }, { "epoch": 0.4474678760393046, "fcm_dpo/beta": 0.47933870553970337, "fcm_dpo/delta": -0.27881085872650146, "fcm_dpo/margin": 2.288297653198242, "fcm_dpo/q_t": 0.2990821599960327, "grad_norm": 114.7384033203125, "learning_rate": 3.392215553979679e-07, "logits/chosen": 0.1592310667037964, "logits/rejected": 0.12974442541599274, "logps/chosen": -72.52301025390625, "logps/ref_chosen": -69.15034484863281, "logps/ref_rejected": -89.60166931152344, "logps/rejected": -95.26262664794922, "loss": 0.8132, "margin_dpo/margin_mean": 2.288297653198242, "margin_dpo/margin_std": 2.4353115558624268, "step": 296 }, { "epoch": 0.4489795918367347, "fcm_dpo/beta": 0.4688694477081299, "fcm_dpo/delta": -0.11125542968511581, "fcm_dpo/margin": 2.027672529220581, "fcm_dpo/q_t": 0.3116467297077179, "grad_norm": 99.90240478515625, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 0.19764375686645508, "logits/rejected": 0.14518359303474426, "logps/chosen": -61.69432830810547, "logps/ref_chosen": -58.01630401611328, "logps/ref_rejected": -69.95780944824219, "logps/rejected": -75.66350555419922, "loss": 0.8257, "margin_dpo/margin_mean": 2.027672290802002, "margin_dpo/margin_std": 2.0139260292053223, "step": 297 }, { "epoch": 0.4504913076341648, "fcm_dpo/beta": 0.4668663740158081, "fcm_dpo/delta": 0.017975449562072754, "fcm_dpo/margin": 1.7846050262451172, "fcm_dpo/q_t": 0.34727245569229126, "grad_norm": 103.42831420898438, "learning_rate": 3.367463137189156e-07, "logits/chosen": 0.2629234790802002, "logits/rejected": 0.21024516224861145, "logps/chosen": -59.81189727783203, "logps/ref_chosen": -56.1693115234375, "logps/ref_rejected": -68.55052185058594, "logps/rejected": -73.97770690917969, "loss": 1.0074, "margin_dpo/margin_mean": 1.7846052646636963, "margin_dpo/margin_std": 2.6018660068511963, "step": 298 }, { "epoch": 0.4520030234315949, "fcm_dpo/beta": 0.47573158144950867, "fcm_dpo/delta": 0.13766300678253174, "fcm_dpo/margin": 1.5178146362304688, "fcm_dpo/q_t": 0.3709160387516022, "grad_norm": 113.95995330810547, "learning_rate": 3.355050358314172e-07, "logits/chosen": 0.14425645768642426, "logits/rejected": 0.12183210253715515, "logps/chosen": -65.68190002441406, "logps/ref_chosen": -62.31780242919922, "logps/ref_rejected": -72.60028839111328, "logps/rejected": -77.48220825195312, "loss": 1.0751, "margin_dpo/margin_mean": 1.517815351486206, "margin_dpo/margin_std": 2.508615016937256, "step": 299 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.47874048352241516, "fcm_dpo/delta": -0.004431587643921375, "fcm_dpo/margin": 1.7839136123657227, "fcm_dpo/q_t": 0.3367232084274292, "grad_norm": 123.27234649658203, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.22687631845474243, "logits/rejected": 0.1784324198961258, "logps/chosen": -64.30814361572266, "logps/ref_chosen": -60.38157653808594, "logps/ref_rejected": -75.45442199707031, "logps/rejected": -81.16490173339844, "loss": 1.0008, "margin_dpo/margin_mean": 1.7839126586914062, "margin_dpo/margin_std": 2.55767822265625, "step": 300 }, { "epoch": 0.455026455026455, "fcm_dpo/beta": 0.4758816361427307, "fcm_dpo/delta": 0.04150884598493576, "fcm_dpo/margin": 1.7021515369415283, "fcm_dpo/q_t": 0.36344343423843384, "grad_norm": 114.94867706298828, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 0.26481878757476807, "logits/rejected": 0.23754526674747467, "logps/chosen": -56.669219970703125, "logps/ref_chosen": -52.85089111328125, "logps/ref_rejected": -69.97584533691406, "logps/rejected": -75.49632263183594, "loss": 1.0754, "margin_dpo/margin_mean": 1.7021512985229492, "margin_dpo/margin_std": 2.8137896060943604, "step": 301 }, { "epoch": 0.4565381708238851, "fcm_dpo/beta": 0.5032765865325928, "fcm_dpo/delta": 0.2927466034889221, "fcm_dpo/margin": 1.140923023223877, "fcm_dpo/q_t": 0.3921450972557068, "grad_norm": 140.91168212890625, "learning_rate": 3.317669908293554e-07, "logits/chosen": 0.1072927862405777, "logits/rejected": 0.06553728133440018, "logps/chosen": -70.84619140625, "logps/ref_chosen": -66.96650695800781, "logps/ref_rejected": -88.09510803222656, "logps/rejected": -93.11572265625, "loss": 1.1807, "margin_dpo/margin_mean": 1.140923023223877, "margin_dpo/margin_std": 2.358726978302002, "step": 302 }, { "epoch": 0.4580498866213152, "fcm_dpo/beta": 0.48211240768432617, "fcm_dpo/delta": -0.4027268886566162, "fcm_dpo/margin": 2.498141288757324, "fcm_dpo/q_t": 0.2989780902862549, "grad_norm": 100.8934097290039, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 0.2042883336544037, "logits/rejected": 0.1521485447883606, "logps/chosen": -65.78520202636719, "logps/ref_chosen": -62.12152862548828, "logps/ref_rejected": -90.31204223632812, "logps/rejected": -96.4738540649414, "loss": 0.8664, "margin_dpo/margin_mean": 2.498141050338745, "margin_dpo/margin_std": 3.0501856803894043, "step": 303 }, { "epoch": 0.4595616024187453, "fcm_dpo/beta": 0.45722144842147827, "fcm_dpo/delta": -0.24640443921089172, "fcm_dpo/margin": 2.341660976409912, "fcm_dpo/q_t": 0.29080310463905334, "grad_norm": 92.65155029296875, "learning_rate": 3.292634667444117e-07, "logits/chosen": 0.21486225724220276, "logits/rejected": 0.17082390189170837, "logps/chosen": -64.18489837646484, "logps/ref_chosen": -60.695091247558594, "logps/ref_rejected": -78.2525405883789, "logps/rejected": -84.08401489257812, "loss": 0.7711, "margin_dpo/margin_mean": 2.3416614532470703, "margin_dpo/margin_std": 2.206408739089966, "step": 304 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.4463159441947937, "fcm_dpo/delta": 0.014554038643836975, "fcm_dpo/margin": 1.8721232414245605, "fcm_dpo/q_t": 0.3510298430919647, "grad_norm": 109.76885223388672, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.1453256607055664, "logits/rejected": 0.152765691280365, "logps/chosen": -77.118408203125, "logps/ref_chosen": -72.69914245605469, "logps/ref_rejected": -65.65670776367188, "logps/rejected": -71.9480972290039, "loss": 1.0397, "margin_dpo/margin_mean": 1.872122883796692, "margin_dpo/margin_std": 2.9213171005249023, "step": 305 }, { "epoch": 0.46258503401360546, "fcm_dpo/beta": 0.44983065128326416, "fcm_dpo/delta": 0.10561927407979965, "fcm_dpo/margin": 1.6640582084655762, "fcm_dpo/q_t": 0.3665274977684021, "grad_norm": 105.87984466552734, "learning_rate": 3.267510740432719e-07, "logits/chosen": 0.22533267736434937, "logits/rejected": 0.13813963532447815, "logps/chosen": -58.389102935791016, "logps/ref_chosen": -53.97052764892578, "logps/ref_rejected": -71.02423095703125, "logps/rejected": -77.10685729980469, "loss": 1.0711, "margin_dpo/margin_mean": 1.6640576124191284, "margin_dpo/margin_std": 2.727498769760132, "step": 306 }, { "epoch": 0.46409674981103555, "fcm_dpo/beta": 0.47032594680786133, "fcm_dpo/delta": 0.10532738268375397, "fcm_dpo/margin": 1.5966615676879883, "fcm_dpo/q_t": 0.37514978647232056, "grad_norm": 117.74783325195312, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 0.22091859579086304, "logits/rejected": 0.18501907587051392, "logps/chosen": -61.177223205566406, "logps/ref_chosen": -57.413108825683594, "logps/ref_rejected": -68.68010711669922, "logps/rejected": -74.04087829589844, "loss": 1.1527, "margin_dpo/margin_mean": 1.59666109085083, "margin_dpo/margin_std": 2.9838297367095947, "step": 307 }, { "epoch": 0.4656084656084656, "fcm_dpo/beta": 0.45703125, "fcm_dpo/delta": -0.17830657958984375, "fcm_dpo/margin": 2.210214376449585, "fcm_dpo/q_t": 0.31531810760498047, "grad_norm": 98.66069793701172, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 0.20415475964546204, "logits/rejected": 0.1843133568763733, "logps/chosen": -70.50775909423828, "logps/ref_chosen": -66.59879302978516, "logps/ref_rejected": -74.337158203125, "logps/rejected": -80.45633697509766, "loss": 0.8886, "margin_dpo/margin_mean": 2.210214614868164, "margin_dpo/margin_std": 2.7284340858459473, "step": 308 }, { "epoch": 0.4671201814058957, "fcm_dpo/beta": 0.4430055022239685, "fcm_dpo/delta": -0.08050990849733353, "fcm_dpo/margin": 2.0769472122192383, "fcm_dpo/q_t": 0.33381199836730957, "grad_norm": 133.50289916992188, "learning_rate": 3.229664715194511e-07, "logits/chosen": 0.23776155710220337, "logits/rejected": 0.19149421155452728, "logps/chosen": -69.8235092163086, "logps/ref_chosen": -65.39474487304688, "logps/ref_rejected": -75.70930480957031, "logps/rejected": -82.21501159667969, "loss": 0.9315, "margin_dpo/margin_mean": 2.0769472122192383, "margin_dpo/margin_std": 2.7262511253356934, "step": 309 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.46543052792549133, "fcm_dpo/delta": 0.28398555517196655, "fcm_dpo/margin": 1.2512989044189453, "fcm_dpo/q_t": 0.4003729820251465, "grad_norm": 143.03170776367188, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.2502453923225403, "logits/rejected": 0.24358004331588745, "logps/chosen": -79.23374938964844, "logps/ref_chosen": -74.66827392578125, "logps/ref_rejected": -80.5689697265625, "logps/rejected": -86.3857421875, "loss": 1.1998, "margin_dpo/margin_mean": 1.2512991428375244, "margin_dpo/margin_std": 2.6639318466186523, "step": 310 }, { "epoch": 0.47014361300075586, "fcm_dpo/beta": 0.4706069827079773, "fcm_dpo/delta": 0.0076281167566776276, "fcm_dpo/margin": 1.7907335758209229, "fcm_dpo/q_t": 0.35300174355506897, "grad_norm": 112.3255844116211, "learning_rate": 3.204331392103574e-07, "logits/chosen": 0.15300993621349335, "logits/rejected": 0.06236676499247551, "logps/chosen": -63.637657165527344, "logps/ref_chosen": -59.738033294677734, "logps/ref_rejected": -93.60757446289062, "logps/rejected": -99.29792785644531, "loss": 1.0718, "margin_dpo/margin_mean": 1.7907336950302124, "margin_dpo/margin_std": 3.0270423889160156, "step": 311 }, { "epoch": 0.47165532879818595, "fcm_dpo/beta": 0.46410685777664185, "fcm_dpo/delta": -0.15572930872440338, "fcm_dpo/margin": 2.134312868118286, "fcm_dpo/q_t": 0.31550097465515137, "grad_norm": 114.26778411865234, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 0.2003878653049469, "logits/rejected": 0.12340877950191498, "logps/chosen": -57.946800231933594, "logps/ref_chosen": -53.816436767578125, "logps/ref_rejected": -68.6575698852539, "logps/rejected": -74.92224884033203, "loss": 0.935, "margin_dpo/margin_mean": 2.134312629699707, "margin_dpo/margin_std": 2.824443817138672, "step": 312 }, { "epoch": 0.47316704459561604, "fcm_dpo/beta": 0.4569798707962036, "fcm_dpo/delta": 0.017772406339645386, "fcm_dpo/margin": 1.823397159576416, "fcm_dpo/q_t": 0.36278456449508667, "grad_norm": 106.69249725341797, "learning_rate": 3.178919262911314e-07, "logits/chosen": 0.2684420347213745, "logits/rejected": 0.24755266308784485, "logps/chosen": -63.84406280517578, "logps/ref_chosen": -59.957359313964844, "logps/ref_rejected": -69.31729888916016, "logps/rejected": -75.02740478515625, "loss": 1.0815, "margin_dpo/margin_mean": 1.8233965635299683, "margin_dpo/margin_std": 3.1479573249816895, "step": 313 }, { "epoch": 0.47467876039304613, "fcm_dpo/beta": 0.4399215579032898, "fcm_dpo/delta": -0.27059251070022583, "fcm_dpo/margin": 2.476907253265381, "fcm_dpo/q_t": 0.30071818828582764, "grad_norm": 92.56130981445312, "learning_rate": 3.166184534225087e-07, "logits/chosen": 0.20626097917556763, "logits/rejected": 0.21696346998214722, "logps/chosen": -74.36647033691406, "logps/ref_chosen": -70.26815795898438, "logps/ref_rejected": -69.23971557617188, "logps/rejected": -75.81494140625, "loss": 0.81, "margin_dpo/margin_mean": 2.476907253265381, "margin_dpo/margin_std": 2.6668601036071777, "step": 314 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.43925973773002625, "fcm_dpo/delta": -0.023149289190769196, "fcm_dpo/margin": 1.9778019189834595, "fcm_dpo/q_t": 0.33359387516975403, "grad_norm": 96.88661193847656, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.14388734102249146, "logits/rejected": 0.10625620931386948, "logps/chosen": -72.1413803100586, "logps/ref_chosen": -67.79469299316406, "logps/ref_rejected": -74.55148315429688, "logps/rejected": -80.87596893310547, "loss": 0.9586, "margin_dpo/margin_mean": 1.9778021574020386, "margin_dpo/margin_std": 2.5699832439422607, "step": 315 }, { "epoch": 0.47770219198790626, "fcm_dpo/beta": 0.4275297522544861, "fcm_dpo/delta": -0.23587097227573395, "fcm_dpo/margin": 2.4735541343688965, "fcm_dpo/q_t": 0.3129580616950989, "grad_norm": 92.06230163574219, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 0.14784720540046692, "logits/rejected": 0.0601130872964859, "logps/chosen": -59.30602264404297, "logps/ref_chosen": -55.288482666015625, "logps/ref_rejected": -96.15723419189453, "logps/rejected": -102.64833068847656, "loss": 0.8803, "margin_dpo/margin_mean": 2.473552942276001, "margin_dpo/margin_std": 2.830709934234619, "step": 316 }, { "epoch": 0.47921390778533635, "fcm_dpo/beta": 0.39385396242141724, "fcm_dpo/delta": -0.29807132482528687, "fcm_dpo/margin": 2.8259806632995605, "fcm_dpo/q_t": 0.29416024684906006, "grad_norm": 85.33952331542969, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 0.24099749326705933, "logits/rejected": 0.19853255152702332, "logps/chosen": -58.54376220703125, "logps/ref_chosen": -54.58137512207031, "logps/ref_rejected": -72.77232360839844, "logps/rejected": -79.56068420410156, "loss": 0.7843, "margin_dpo/margin_mean": 2.8259806632995605, "margin_dpo/margin_std": 2.902916431427002, "step": 317 }, { "epoch": 0.48072562358276644, "fcm_dpo/beta": 0.3996548354625702, "fcm_dpo/delta": 0.19046162068843842, "fcm_dpo/margin": 1.683230996131897, "fcm_dpo/q_t": 0.3898201286792755, "grad_norm": 105.31067657470703, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 0.25582095980644226, "logits/rejected": 0.2070683240890503, "logps/chosen": -57.41715621948242, "logps/ref_chosen": -52.88822937011719, "logps/ref_rejected": -80.63988494873047, "logps/rejected": -86.85203552246094, "loss": 1.2347, "margin_dpo/margin_mean": 1.6832314729690552, "margin_dpo/margin_std": 3.642604351043701, "step": 318 }, { "epoch": 0.48223733938019653, "fcm_dpo/beta": 0.3988262116909027, "fcm_dpo/delta": -0.07955436408519745, "fcm_dpo/margin": 2.311298370361328, "fcm_dpo/q_t": 0.334533154964447, "grad_norm": 92.12313079833984, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 0.18351227045059204, "logits/rejected": 0.1661396622657776, "logps/chosen": -68.296630859375, "logps/ref_chosen": -64.36333465576172, "logps/ref_rejected": -79.47296142578125, "logps/rejected": -85.7175521850586, "loss": 0.9816, "margin_dpo/margin_mean": 2.3112986087799072, "margin_dpo/margin_std": 3.327394723892212, "step": 319 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.41219210624694824, "fcm_dpo/delta": 0.10497826337814331, "fcm_dpo/margin": 1.8063325881958008, "fcm_dpo/q_t": 0.3807426691055298, "grad_norm": 100.4907455444336, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.1191617101430893, "logits/rejected": 0.08381935209035873, "logps/chosen": -53.169158935546875, "logps/ref_chosen": -49.558746337890625, "logps/ref_rejected": -71.23444366455078, "logps/rejected": -76.65119171142578, "loss": 1.1398, "margin_dpo/margin_mean": 1.8063322305679321, "margin_dpo/margin_std": 3.381862163543701, "step": 320 }, { "epoch": 0.4852607709750567, "fcm_dpo/beta": 0.4042707085609436, "fcm_dpo/delta": 0.016887515783309937, "fcm_dpo/margin": 2.0608034133911133, "fcm_dpo/q_t": 0.3445979356765747, "grad_norm": 84.57140350341797, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 0.154787078499794, "logits/rejected": 0.14245569705963135, "logps/chosen": -56.562965393066406, "logps/ref_chosen": -52.08526611328125, "logps/ref_rejected": -55.58674621582031, "logps/rejected": -62.125244140625, "loss": 1.0386, "margin_dpo/margin_mean": 2.0608034133911133, "margin_dpo/margin_std": 3.0765933990478516, "step": 321 }, { "epoch": 0.48677248677248675, "fcm_dpo/beta": 0.3972187638282776, "fcm_dpo/delta": -0.2467936873435974, "fcm_dpo/margin": 2.695878267288208, "fcm_dpo/q_t": 0.29130974411964417, "grad_norm": 79.39057159423828, "learning_rate": 3.063665887884511e-07, "logits/chosen": 0.22061362862586975, "logits/rejected": 0.15948614478111267, "logps/chosen": -51.72674560546875, "logps/ref_chosen": -47.404109954833984, "logps/ref_rejected": -73.4260025024414, "logps/rejected": -80.44451141357422, "loss": 0.7937, "margin_dpo/margin_mean": 2.695878267288208, "margin_dpo/margin_std": 2.6556789875030518, "step": 322 }, { "epoch": 0.48828420256991684, "fcm_dpo/beta": 0.39542824029922485, "fcm_dpo/delta": 0.02666623145341873, "fcm_dpo/margin": 2.0780739784240723, "fcm_dpo/q_t": 0.37355026602745056, "grad_norm": 110.35505676269531, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 0.17272543907165527, "logits/rejected": 0.11414434015750885, "logps/chosen": -74.46739196777344, "logps/ref_chosen": -70.00630187988281, "logps/ref_rejected": -86.96690368652344, "logps/rejected": -93.50607299804688, "loss": 1.1522, "margin_dpo/margin_mean": 2.078073263168335, "margin_dpo/margin_std": 3.899219512939453, "step": 323 }, { "epoch": 0.4897959183673469, "fcm_dpo/beta": 0.38537365198135376, "fcm_dpo/delta": -0.061207324266433716, "fcm_dpo/margin": 2.3483946323394775, "fcm_dpo/q_t": 0.32360321283340454, "grad_norm": 79.66868591308594, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 0.2204309105873108, "logits/rejected": 0.17378325760364532, "logps/chosen": -59.973960876464844, "logps/ref_chosen": -55.88882064819336, "logps/ref_rejected": -75.23088073730469, "logps/rejected": -81.66442108154297, "loss": 0.8655, "margin_dpo/margin_mean": 2.3483948707580566, "margin_dpo/margin_std": 2.62096905708313, "step": 324 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.38281673192977905, "fcm_dpo/delta": 0.013121634721755981, "fcm_dpo/margin": 2.1842405796051025, "fcm_dpo/q_t": 0.3410230278968811, "grad_norm": 88.95374298095703, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.1963476836681366, "logits/rejected": 0.14803680777549744, "logps/chosen": -68.87274932861328, "logps/ref_chosen": -64.14701843261719, "logps/ref_rejected": -79.91143798828125, "logps/rejected": -86.82140350341797, "loss": 0.9619, "margin_dpo/margin_mean": 2.1842405796051025, "margin_dpo/margin_std": 2.953347682952881, "step": 325 }, { "epoch": 0.4928193499622071, "fcm_dpo/beta": 0.40765079855918884, "fcm_dpo/delta": 0.35959964990615845, "fcm_dpo/margin": 1.2490381002426147, "fcm_dpo/q_t": 0.407970130443573, "grad_norm": 122.61116790771484, "learning_rate": 3.012016670162977e-07, "logits/chosen": 0.16117537021636963, "logits/rejected": 0.1591501086950302, "logps/chosen": -80.88282775878906, "logps/ref_chosen": -75.53131103515625, "logps/ref_rejected": -76.5898666381836, "logps/rejected": -83.19041442871094, "loss": 1.2647, "margin_dpo/margin_mean": 1.249037742614746, "margin_dpo/margin_std": 3.0318169593811035, "step": 326 }, { "epoch": 0.4943310657596372, "fcm_dpo/beta": 0.41696861386299133, "fcm_dpo/delta": -0.012611184269189835, "fcm_dpo/margin": 2.0652732849121094, "fcm_dpo/q_t": 0.35170531272888184, "grad_norm": 102.3149185180664, "learning_rate": 2.99906765620341e-07, "logits/chosen": 0.13333944976329803, "logits/rejected": 0.09463554620742798, "logps/chosen": -73.87966918945312, "logps/ref_chosen": -69.33717346191406, "logps/ref_rejected": -73.37751770019531, "logps/rejected": -79.98530578613281, "loss": 1.0782, "margin_dpo/margin_mean": 2.0652735233306885, "margin_dpo/margin_std": 3.41621732711792, "step": 327 }, { "epoch": 0.4958427815570673, "fcm_dpo/beta": 0.4161731004714966, "fcm_dpo/delta": 0.029472189024090767, "fcm_dpo/margin": 1.9774518013000488, "fcm_dpo/q_t": 0.3513525724411011, "grad_norm": 101.79065704345703, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 0.16545960307121277, "logits/rejected": 0.14433646202087402, "logps/chosen": -65.78724670410156, "logps/ref_chosen": -61.70623016357422, "logps/ref_rejected": -83.73808288574219, "logps/rejected": -89.79654693603516, "loss": 1.0012, "margin_dpo/margin_mean": 1.9774516820907593, "margin_dpo/margin_std": 2.9047513008117676, "step": 328 }, { "epoch": 0.4973544973544973, "fcm_dpo/beta": 0.4103434085845947, "fcm_dpo/delta": -0.03602056950330734, "fcm_dpo/margin": 2.1446993350982666, "fcm_dpo/q_t": 0.3538691997528076, "grad_norm": 116.51476287841797, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 0.2215312123298645, "logits/rejected": 0.17835497856140137, "logps/chosen": -69.37106323242188, "logps/ref_chosen": -64.4984130859375, "logps/ref_rejected": -83.6591796875, "logps/rejected": -90.676513671875, "loss": 1.0608, "margin_dpo/margin_mean": 2.1446990966796875, "margin_dpo/margin_std": 3.5509347915649414, "step": 329 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.4071670174598694, "fcm_dpo/delta": -0.150421604514122, "fcm_dpo/margin": 2.4211106300354004, "fcm_dpo/q_t": 0.32498639822006226, "grad_norm": 86.76107025146484, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.22971756756305695, "logits/rejected": 0.17051860690116882, "logps/chosen": -58.83673095703125, "logps/ref_chosen": -54.80464172363281, "logps/ref_rejected": -75.3194351196289, "logps/rejected": -81.77262878417969, "loss": 1.0026, "margin_dpo/margin_mean": 2.421110153198242, "margin_dpo/margin_std": 3.4969735145568848, "step": 330 }, { "epoch": 0.5003779289493575, "fcm_dpo/beta": 0.38293445110321045, "fcm_dpo/delta": -0.2958265542984009, "fcm_dpo/margin": 2.9018197059631348, "fcm_dpo/q_t": 0.3032812178134918, "grad_norm": 87.00528717041016, "learning_rate": 2.947135628327544e-07, "logits/chosen": 0.3037651777267456, "logits/rejected": 0.2769252061843872, "logps/chosen": -63.68206024169922, "logps/ref_chosen": -59.242584228515625, "logps/ref_rejected": -69.87483215332031, "logps/rejected": -77.21614074707031, "loss": 0.8903, "margin_dpo/margin_mean": 2.901819944381714, "margin_dpo/margin_std": 3.6702699661254883, "step": 331 }, { "epoch": 0.5018896447467877, "fcm_dpo/beta": 0.37952011823654175, "fcm_dpo/delta": -0.07606175541877747, "fcm_dpo/margin": 2.4156653881073, "fcm_dpo/q_t": 0.3336995542049408, "grad_norm": 90.73616027832031, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.15260085463523865, "logits/rejected": 0.11272098869085312, "logps/chosen": -71.80547332763672, "logps/ref_chosen": -67.10975646972656, "logps/ref_rejected": -77.11839294433594, "logps/rejected": -84.22976684570312, "loss": 0.9245, "margin_dpo/margin_mean": 2.4156653881073, "margin_dpo/margin_std": 3.1381473541259766, "step": 332 }, { "epoch": 0.5034013605442177, "fcm_dpo/beta": 0.35634881258010864, "fcm_dpo/delta": -0.3166324496269226, "fcm_dpo/margin": 3.1737165451049805, "fcm_dpo/q_t": 0.2953706979751587, "grad_norm": 72.91551208496094, "learning_rate": 2.921093116725076e-07, "logits/chosen": 0.2314390391111374, "logits/rejected": 0.17708361148834229, "logps/chosen": -62.782108306884766, "logps/ref_chosen": -58.381134033203125, "logps/ref_rejected": -85.02839660644531, "logps/rejected": -92.60308837890625, "loss": 0.7954, "margin_dpo/margin_mean": 3.1737163066864014, "margin_dpo/margin_std": 3.3868823051452637, "step": 333 }, { "epoch": 0.5049130763416477, "fcm_dpo/beta": 0.35540711879730225, "fcm_dpo/delta": 0.12635207176208496, "fcm_dpo/margin": 2.0626180171966553, "fcm_dpo/q_t": 0.36454758048057556, "grad_norm": 94.0213623046875, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 0.2191026508808136, "logits/rejected": 0.18316911160945892, "logps/chosen": -71.8309326171875, "logps/ref_chosen": -66.89199829101562, "logps/ref_rejected": -91.83695220947266, "logps/rejected": -98.8385009765625, "loss": 1.1093, "margin_dpo/margin_mean": 2.062617778778076, "margin_dpo/margin_std": 3.5854196548461914, "step": 334 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.3609894812107086, "fcm_dpo/delta": -0.032357558608055115, "fcm_dpo/margin": 2.4321510791778564, "fcm_dpo/q_t": 0.34385281801223755, "grad_norm": 80.12577056884766, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.2101183980703354, "logits/rejected": 0.17928308248519897, "logps/chosen": -66.1629638671875, "logps/ref_chosen": -61.51445770263672, "logps/ref_rejected": -75.68916320800781, "logps/rejected": -82.76982879638672, "loss": 1.0087, "margin_dpo/margin_mean": 2.4321508407592773, "margin_dpo/margin_std": 3.508328914642334, "step": 335 }, { "epoch": 0.5079365079365079, "fcm_dpo/beta": 0.35370177030563354, "fcm_dpo/delta": 0.04779374599456787, "fcm_dpo/margin": 2.2667744159698486, "fcm_dpo/q_t": 0.36775410175323486, "grad_norm": 97.24869537353516, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 0.1473027765750885, "logits/rejected": 0.13241709768772125, "logps/chosen": -74.49370574951172, "logps/ref_chosen": -68.85006713867188, "logps/ref_rejected": -92.99603271484375, "logps/rejected": -100.90644836425781, "loss": 1.0564, "margin_dpo/margin_mean": 2.2667746543884277, "margin_dpo/margin_std": 3.6942431926727295, "step": 336 }, { "epoch": 0.509448223733938, "fcm_dpo/beta": 0.3663819432258606, "fcm_dpo/delta": 0.12714561820030212, "fcm_dpo/margin": 1.9988073110580444, "fcm_dpo/q_t": 0.36406582593917847, "grad_norm": 92.78190612792969, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 0.11123036593198776, "logits/rejected": 0.08163227140903473, "logps/chosen": -78.41031646728516, "logps/ref_chosen": -73.18783569335938, "logps/ref_rejected": -86.89118957519531, "logps/rejected": -94.11248779296875, "loss": 1.0628, "margin_dpo/margin_mean": 1.9988073110580444, "margin_dpo/margin_std": 3.276918649673462, "step": 337 }, { "epoch": 0.5109599395313681, "fcm_dpo/beta": 0.3737809658050537, "fcm_dpo/delta": 0.052867673337459564, "fcm_dpo/margin": 2.1428439617156982, "fcm_dpo/q_t": 0.3521310091018677, "grad_norm": 91.3255386352539, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 0.16858114302158356, "logits/rejected": 0.1449778825044632, "logps/chosen": -69.2572021484375, "logps/ref_chosen": -63.939613342285156, "logps/ref_rejected": -75.34243774414062, "logps/rejected": -82.80287170410156, "loss": 1.0087, "margin_dpo/margin_mean": 2.14284348487854, "margin_dpo/margin_std": 3.1413938999176025, "step": 338 }, { "epoch": 0.5124716553287982, "fcm_dpo/beta": 0.368240624666214, "fcm_dpo/delta": -0.04275989532470703, "fcm_dpo/margin": 2.410059690475464, "fcm_dpo/q_t": 0.3387882709503174, "grad_norm": 81.14483642578125, "learning_rate": 2.842694572172736e-07, "logits/chosen": 0.2619893550872803, "logits/rejected": 0.19453716278076172, "logps/chosen": -50.87765884399414, "logps/ref_chosen": -45.54913330078125, "logps/ref_rejected": -67.0482177734375, "logps/rejected": -74.78680419921875, "loss": 0.9471, "margin_dpo/margin_mean": 2.4100594520568848, "margin_dpo/margin_std": 3.20064115524292, "step": 339 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.3670642673969269, "fcm_dpo/delta": -0.1399812549352646, "fcm_dpo/margin": 2.6568050384521484, "fcm_dpo/q_t": 0.3358362317085266, "grad_norm": 76.66029357910156, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.18538600206375122, "logits/rejected": 0.16327080130577087, "logps/chosen": -59.5405158996582, "logps/ref_chosen": -54.00564956665039, "logps/ref_rejected": -61.314430236816406, "logps/rejected": -69.506103515625, "loss": 1.0199, "margin_dpo/margin_mean": 2.6568050384521484, "margin_dpo/margin_std": 3.9218854904174805, "step": 340 }, { "epoch": 0.5154950869236583, "fcm_dpo/beta": 0.34759050607681274, "fcm_dpo/delta": -0.07815767824649811, "fcm_dpo/margin": 2.6133947372436523, "fcm_dpo/q_t": 0.33973705768585205, "grad_norm": 102.14833068847656, "learning_rate": 2.816481133934373e-07, "logits/chosen": 0.23524977266788483, "logits/rejected": 0.19626188278198242, "logps/chosen": -68.50048828125, "logps/ref_chosen": -63.39509582519531, "logps/ref_rejected": -76.20973205566406, "logps/rejected": -83.92851257324219, "loss": 1.0228, "margin_dpo/margin_mean": 2.6133944988250732, "margin_dpo/margin_std": 3.6573469638824463, "step": 341 }, { "epoch": 0.5170068027210885, "fcm_dpo/beta": 0.3411981463432312, "fcm_dpo/delta": -0.22494906187057495, "fcm_dpo/margin": 3.076798677444458, "fcm_dpo/q_t": 0.3198407292366028, "grad_norm": 65.66190338134766, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 0.20193436741828918, "logits/rejected": 0.16605061292648315, "logps/chosen": -58.59202575683594, "logps/ref_chosen": -53.047813415527344, "logps/ref_rejected": -68.2854232788086, "logps/rejected": -76.90643310546875, "loss": 0.8953, "margin_dpo/margin_mean": 3.076798915863037, "margin_dpo/margin_std": 3.8478684425354004, "step": 342 }, { "epoch": 0.5185185185185185, "fcm_dpo/beta": 0.3504355549812317, "fcm_dpo/delta": 0.31661123037338257, "fcm_dpo/margin": 1.5729039907455444, "fcm_dpo/q_t": 0.3953471779823303, "grad_norm": 93.7979965209961, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 0.14726698398590088, "logits/rejected": 0.14337411522865295, "logps/chosen": -75.63270568847656, "logps/ref_chosen": -70.57852935791016, "logps/ref_rejected": -84.73873901367188, "logps/rejected": -91.36581420898438, "loss": 1.2176, "margin_dpo/margin_mean": 1.5729038715362549, "margin_dpo/margin_std": 3.362330198287964, "step": 343 }, { "epoch": 0.5200302343159486, "fcm_dpo/beta": 0.35818302631378174, "fcm_dpo/delta": -0.029689103364944458, "fcm_dpo/margin": 2.4477405548095703, "fcm_dpo/q_t": 0.33762115240097046, "grad_norm": 91.70993041992188, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 0.201224684715271, "logits/rejected": 0.1562662124633789, "logps/chosen": -61.6827392578125, "logps/ref_chosen": -55.811004638671875, "logps/ref_rejected": -84.77637481689453, "logps/rejected": -93.0958480834961, "loss": 0.9849, "margin_dpo/margin_mean": 2.447740077972412, "margin_dpo/margin_std": 3.4348011016845703, "step": 344 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.3570266366004944, "fcm_dpo/delta": -0.005030490458011627, "fcm_dpo/margin": 2.3924896717071533, "fcm_dpo/q_t": 0.3465713858604431, "grad_norm": 74.15127563476562, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.2622082233428955, "logits/rejected": 0.22717997431755066, "logps/chosen": -62.51710510253906, "logps/ref_chosen": -57.78609848022461, "logps/ref_rejected": -78.91847229003906, "logps/rejected": -86.0419692993164, "loss": 0.9996, "margin_dpo/margin_mean": 2.3924896717071533, "margin_dpo/margin_std": 3.526125431060791, "step": 345 }, { "epoch": 0.5230536659108088, "fcm_dpo/beta": 0.3490391969680786, "fcm_dpo/delta": -0.2172488123178482, "fcm_dpo/margin": 2.994063377380371, "fcm_dpo/q_t": 0.315748393535614, "grad_norm": 82.33354187011719, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 0.27292877435684204, "logits/rejected": 0.2155582308769226, "logps/chosen": -61.15895080566406, "logps/ref_chosen": -56.285125732421875, "logps/ref_rejected": -91.15303039550781, "logps/rejected": -99.02091979980469, "loss": 0.9019, "margin_dpo/margin_mean": 2.994063377380371, "margin_dpo/margin_std": 3.723098039627075, "step": 346 }, { "epoch": 0.5245653817082389, "fcm_dpo/beta": 0.343899130821228, "fcm_dpo/delta": 0.03331548720598221, "fcm_dpo/margin": 2.382420539855957, "fcm_dpo/q_t": 0.3580579161643982, "grad_norm": 79.35133361816406, "learning_rate": 2.737640108260456e-07, "logits/chosen": 0.3136757016181946, "logits/rejected": 0.2722731828689575, "logps/chosen": -59.121246337890625, "logps/ref_chosen": -53.499542236328125, "logps/ref_rejected": -72.52565002441406, "logps/rejected": -80.52978515625, "loss": 1.0411, "margin_dpo/margin_mean": 2.382420539855957, "margin_dpo/margin_std": 3.8586859703063965, "step": 347 }, { "epoch": 0.5260770975056689, "fcm_dpo/beta": 0.32882171869277954, "fcm_dpo/delta": -0.2895653247833252, "fcm_dpo/margin": 3.359653949737549, "fcm_dpo/q_t": 0.3357928991317749, "grad_norm": 68.9252700805664, "learning_rate": 2.724474525774229e-07, "logits/chosen": 0.31114572286605835, "logits/rejected": 0.28537189960479736, "logps/chosen": -55.660499572753906, "logps/ref_chosen": -50.78684997558594, "logps/ref_rejected": -68.63732147216797, "logps/rejected": -76.87062072753906, "loss": 0.9576, "margin_dpo/margin_mean": 3.3596534729003906, "margin_dpo/margin_std": 4.968472480773926, "step": 348 }, { "epoch": 0.527588813303099, "fcm_dpo/beta": 0.32099393010139465, "fcm_dpo/delta": -0.10274302214384079, "fcm_dpo/margin": 2.9376416206359863, "fcm_dpo/q_t": 0.3241846561431885, "grad_norm": 76.00582122802734, "learning_rate": 2.711302664252973e-07, "logits/chosen": 0.2401653528213501, "logits/rejected": 0.17262977361679077, "logps/chosen": -58.41884231567383, "logps/ref_chosen": -53.325008392333984, "logps/ref_rejected": -83.21236419677734, "logps/rejected": -91.24383544921875, "loss": 0.9257, "margin_dpo/margin_mean": 2.9376418590545654, "margin_dpo/margin_std": 3.7340314388275146, "step": 349 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.3053371012210846, "fcm_dpo/delta": -0.2579427659511566, "fcm_dpo/margin": 3.528461217880249, "fcm_dpo/q_t": 0.307132363319397, "grad_norm": 78.71974182128906, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.18329796195030212, "logits/rejected": 0.1252821534872055, "logps/chosen": -67.06321716308594, "logps/ref_chosen": -61.625770568847656, "logps/ref_rejected": -87.63627624511719, "logps/rejected": -96.6021728515625, "loss": 0.851, "margin_dpo/margin_mean": 3.5284602642059326, "margin_dpo/margin_std": 4.088956832885742, "step": 350 }, { "epoch": 0.5306122448979592, "fcm_dpo/beta": 0.304359495639801, "fcm_dpo/delta": 0.031957440078258514, "fcm_dpo/margin": 2.6963400840759277, "fcm_dpo/q_t": 0.3338220417499542, "grad_norm": 68.61235046386719, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 0.15642112493515015, "logits/rejected": 0.09655676037073135, "logps/chosen": -61.657840728759766, "logps/ref_chosen": -56.2563362121582, "logps/ref_rejected": -79.11589813232422, "logps/rejected": -87.2137451171875, "loss": 1.0152, "margin_dpo/margin_mean": 2.696340322494507, "margin_dpo/margin_std": 3.8406622409820557, "step": 351 }, { "epoch": 0.5321239606953893, "fcm_dpo/beta": 0.3055855333805084, "fcm_dpo/delta": 0.049372974783182144, "fcm_dpo/margin": 2.6310970783233643, "fcm_dpo/q_t": 0.3505841791629791, "grad_norm": 74.63536071777344, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 0.2353535145521164, "logits/rejected": 0.19251593947410583, "logps/chosen": -68.26545715332031, "logps/ref_chosen": -63.05195236206055, "logps/ref_rejected": -85.52035522460938, "logps/rejected": -93.36494445800781, "loss": 0.9607, "margin_dpo/margin_mean": 2.6310970783233643, "margin_dpo/margin_std": 3.5701792240142822, "step": 352 }, { "epoch": 0.5336356764928194, "fcm_dpo/beta": 0.31123608350753784, "fcm_dpo/delta": 0.0005646422505378723, "fcm_dpo/margin": 2.724490165710449, "fcm_dpo/q_t": 0.34112924337387085, "grad_norm": 71.16338348388672, "learning_rate": 2.658559799141411e-07, "logits/chosen": 0.2434076964855194, "logits/rejected": 0.24078680574893951, "logps/chosen": -74.14156341552734, "logps/ref_chosen": -69.00918579101562, "logps/ref_rejected": -72.65840148925781, "logps/rejected": -80.51527404785156, "loss": 0.9975, "margin_dpo/margin_mean": 2.724489450454712, "margin_dpo/margin_std": 3.953610420227051, "step": 353 }, { "epoch": 0.5351473922902494, "fcm_dpo/beta": 0.301596999168396, "fcm_dpo/delta": -0.08267806470394135, "fcm_dpo/margin": 3.0578184127807617, "fcm_dpo/q_t": 0.32855403423309326, "grad_norm": 85.67142486572266, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 0.26027190685272217, "logits/rejected": 0.15137949585914612, "logps/chosen": -45.0567626953125, "logps/ref_chosen": -39.78833770751953, "logps/ref_rejected": -69.56885528564453, "logps/rejected": -77.89509582519531, "loss": 0.9408, "margin_dpo/margin_mean": 3.0578184127807617, "margin_dpo/margin_std": 3.9154303073883057, "step": 354 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.3026568293571472, "fcm_dpo/delta": -0.0332593210041523, "fcm_dpo/margin": 2.908555030822754, "fcm_dpo/q_t": 0.3385379910469055, "grad_norm": 74.2393569946289, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.21670889854431152, "logits/rejected": 0.12240596115589142, "logps/chosen": -51.702720642089844, "logps/ref_chosen": -46.25537872314453, "logps/ref_rejected": -78.20236206054688, "logps/rejected": -86.55825805664062, "loss": 0.9828, "margin_dpo/margin_mean": 2.908555507659912, "margin_dpo/margin_std": 4.00840950012207, "step": 355 }, { "epoch": 0.5381708238851096, "fcm_dpo/beta": 0.3031303584575653, "fcm_dpo/delta": 0.06947789341211319, "fcm_dpo/margin": 2.592515230178833, "fcm_dpo/q_t": 0.35653093457221985, "grad_norm": 73.01097106933594, "learning_rate": 2.618954789559356e-07, "logits/chosen": 0.21377143263816833, "logits/rejected": 0.15734095871448517, "logps/chosen": -53.23834228515625, "logps/ref_chosen": -47.906158447265625, "logps/ref_rejected": -74.29397583007812, "logps/rejected": -82.21867370605469, "loss": 1.1381, "margin_dpo/margin_mean": 2.592514991760254, "margin_dpo/margin_std": 4.66711950302124, "step": 356 }, { "epoch": 0.5396825396825397, "fcm_dpo/beta": 0.3081361651420593, "fcm_dpo/delta": 0.1387558877468109, "fcm_dpo/margin": 2.3360090255737305, "fcm_dpo/q_t": 0.3646143972873688, "grad_norm": 81.77513885498047, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 0.23374928534030914, "logits/rejected": 0.20612743496894836, "logps/chosen": -68.37667083740234, "logps/ref_chosen": -62.63500213623047, "logps/ref_rejected": -65.11399841308594, "logps/rejected": -73.19168853759766, "loss": 1.087, "margin_dpo/margin_mean": 2.3360087871551514, "margin_dpo/margin_std": 3.8651585578918457, "step": 357 }, { "epoch": 0.5411942554799698, "fcm_dpo/beta": 0.30569684505462646, "fcm_dpo/delta": -0.21745963394641876, "fcm_dpo/margin": 3.4191436767578125, "fcm_dpo/q_t": 0.30736494064331055, "grad_norm": 61.0597038269043, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 0.17847847938537598, "logits/rejected": 0.17569740116596222, "logps/chosen": -72.6213150024414, "logps/ref_chosen": -67.20960998535156, "logps/ref_rejected": -69.34715270996094, "logps/rejected": -78.17800903320312, "loss": 0.852, "margin_dpo/margin_mean": 3.41914439201355, "margin_dpo/margin_std": 3.8466320037841797, "step": 358 }, { "epoch": 0.5427059712773998, "fcm_dpo/beta": 0.3135707676410675, "fcm_dpo/delta": 0.16817688941955566, "fcm_dpo/margin": 2.1862258911132812, "fcm_dpo/q_t": 0.37526875734329224, "grad_norm": 81.7992172241211, "learning_rate": 2.579319833745169e-07, "logits/chosen": 0.2080976814031601, "logits/rejected": 0.18186524510383606, "logps/chosen": -68.27560424804688, "logps/ref_chosen": -62.52578353881836, "logps/ref_rejected": -76.63114929199219, "logps/rejected": -84.56719970703125, "loss": 1.1152, "margin_dpo/margin_mean": 2.186225652694702, "margin_dpo/margin_std": 3.738008975982666, "step": 359 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.3083285391330719, "fcm_dpo/delta": -0.04855549335479736, "fcm_dpo/margin": 2.899702310562134, "fcm_dpo/q_t": 0.3426200747489929, "grad_norm": 78.34294891357422, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.16897408664226532, "logits/rejected": 0.09226278215646744, "logps/chosen": -69.31376647949219, "logps/ref_chosen": -63.48772048950195, "logps/ref_rejected": -90.6891098022461, "logps/rejected": -99.41486358642578, "loss": 0.9654, "margin_dpo/margin_mean": 2.899702548980713, "margin_dpo/margin_std": 4.131417274475098, "step": 360 }, { "epoch": 0.54572940287226, "fcm_dpo/beta": 0.3021819591522217, "fcm_dpo/delta": -0.10609018802642822, "fcm_dpo/margin": 3.1286063194274902, "fcm_dpo/q_t": 0.3255624771118164, "grad_norm": 71.85318756103516, "learning_rate": 2.552884820191154e-07, "logits/chosen": 0.2620081901550293, "logits/rejected": 0.21690014004707336, "logps/chosen": -63.33432388305664, "logps/ref_chosen": -57.917144775390625, "logps/ref_rejected": -72.39089965820312, "logps/rejected": -80.93669128417969, "loss": 0.929, "margin_dpo/margin_mean": 3.1286067962646484, "margin_dpo/margin_std": 4.0841383934021, "step": 361 }, { "epoch": 0.54724111866969, "fcm_dpo/beta": 0.30271005630493164, "fcm_dpo/delta": -0.09775380790233612, "fcm_dpo/margin": 3.0863213539123535, "fcm_dpo/q_t": 0.32780078053474426, "grad_norm": 71.3369369506836, "learning_rate": 2.53966490958702e-07, "logits/chosen": 0.27897951006889343, "logits/rejected": 0.2024056762456894, "logps/chosen": -68.86507415771484, "logps/ref_chosen": -63.4434700012207, "logps/ref_rejected": -103.45516967773438, "logps/rejected": -111.96309661865234, "loss": 0.9063, "margin_dpo/margin_mean": 3.0863213539123535, "margin_dpo/margin_std": 3.5979232788085938, "step": 362 }, { "epoch": 0.5487528344671202, "fcm_dpo/beta": 0.29156380891799927, "fcm_dpo/delta": -0.09345076978206635, "fcm_dpo/margin": 3.205629587173462, "fcm_dpo/q_t": 0.3229590952396393, "grad_norm": 65.02751922607422, "learning_rate": 2.526443889470099e-07, "logits/chosen": 0.25476565957069397, "logits/rejected": 0.1603575050830841, "logps/chosen": -55.03227996826172, "logps/ref_chosen": -48.65182876586914, "logps/ref_rejected": -88.65904235839844, "logps/rejected": -98.24512481689453, "loss": 0.9196, "margin_dpo/margin_mean": 3.205629348754883, "margin_dpo/margin_std": 4.075560569763184, "step": 363 }, { "epoch": 0.5502645502645502, "fcm_dpo/beta": 0.285260945558548, "fcm_dpo/delta": -0.11832509934902191, "fcm_dpo/margin": 3.354739189147949, "fcm_dpo/q_t": 0.33564698696136475, "grad_norm": 63.04609680175781, "learning_rate": 2.513222129660744e-07, "logits/chosen": 0.1435980498790741, "logits/rejected": 0.06505458801984787, "logps/chosen": -63.30625915527344, "logps/ref_chosen": -57.87107467651367, "logps/ref_rejected": -80.95503234863281, "logps/rejected": -89.74494934082031, "loss": 1.0008, "margin_dpo/margin_mean": 3.3547396659851074, "margin_dpo/margin_std": 4.903078079223633, "step": 364 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.2818984389305115, "fcm_dpo/delta": -0.03541882336139679, "fcm_dpo/margin": 3.1297309398651123, "fcm_dpo/q_t": 0.3227863907814026, "grad_norm": 58.35200119018555, "learning_rate": 2.5e-07, "logits/chosen": 0.26480984687805176, "logits/rejected": 0.2539828419685364, "logps/chosen": -69.91170501708984, "logps/ref_chosen": -64.94217681884766, "logps/ref_rejected": -74.8599853515625, "logps/rejected": -82.95924377441406, "loss": 0.8809, "margin_dpo/margin_mean": 3.1297316551208496, "margin_dpo/margin_std": 3.41985821723938, "step": 365 }, { "epoch": 0.5532879818594104, "fcm_dpo/beta": 0.28988415002822876, "fcm_dpo/delta": 0.10946245491504669, "fcm_dpo/margin": 2.567075729370117, "fcm_dpo/q_t": 0.36410263180732727, "grad_norm": 70.50802612304688, "learning_rate": 2.486777870339255e-07, "logits/chosen": 0.1697695553302765, "logits/rejected": 0.150035560131073, "logps/chosen": -59.808685302734375, "logps/ref_chosen": -55.16598129272461, "logps/ref_rejected": -65.26121520996094, "logps/rejected": -72.47099304199219, "loss": 1.1025, "margin_dpo/margin_mean": 2.5670762062072754, "margin_dpo/margin_std": 4.370789527893066, "step": 366 }, { "epoch": 0.5547996976568406, "fcm_dpo/beta": 0.29406261444091797, "fcm_dpo/delta": 0.1747613400220871, "fcm_dpo/margin": 2.338266372680664, "fcm_dpo/q_t": 0.35922878980636597, "grad_norm": 66.86473083496094, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 0.2019767463207245, "logits/rejected": 0.1173165887594223, "logps/chosen": -61.685611724853516, "logps/ref_chosen": -56.01046371459961, "logps/ref_rejected": -77.31010437011719, "logps/rejected": -85.32351684570312, "loss": 1.0202, "margin_dpo/margin_mean": 2.338266611099243, "margin_dpo/margin_std": 3.347780227661133, "step": 367 }, { "epoch": 0.5563114134542706, "fcm_dpo/beta": 0.30230095982551575, "fcm_dpo/delta": 0.11584046483039856, "fcm_dpo/margin": 2.457531452178955, "fcm_dpo/q_t": 0.36515921354293823, "grad_norm": 76.8958969116211, "learning_rate": 2.46033509041298e-07, "logits/chosen": 0.10607978701591492, "logits/rejected": 0.09705992043018341, "logps/chosen": -80.48188781738281, "logps/ref_chosen": -74.82927703857422, "logps/ref_rejected": -76.11680603027344, "logps/rejected": -84.2269515991211, "loss": 1.0747, "margin_dpo/margin_mean": 2.4575319290161133, "margin_dpo/margin_std": 4.010162830352783, "step": 368 }, { "epoch": 0.5578231292517006, "fcm_dpo/beta": 0.31162023544311523, "fcm_dpo/delta": 0.09726077318191528, "fcm_dpo/margin": 2.434133529663086, "fcm_dpo/q_t": 0.35368454456329346, "grad_norm": 68.05358123779297, "learning_rate": 2.447115179808846e-07, "logits/chosen": 0.19629350304603577, "logits/rejected": 0.15964928269386292, "logps/chosen": -63.93141174316406, "logps/ref_chosen": -58.32621765136719, "logps/ref_rejected": -80.92183685302734, "logps/rejected": -88.96116638183594, "loss": 1.0539, "margin_dpo/margin_mean": 2.4341330528259277, "margin_dpo/margin_std": 3.771906852722168, "step": 369 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.30336689949035645, "fcm_dpo/delta": -0.14237123727798462, "fcm_dpo/margin": 3.222742795944214, "fcm_dpo/q_t": 0.3289080858230591, "grad_norm": 71.9991226196289, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.2874883711338043, "logits/rejected": 0.23224374651908875, "logps/chosen": -58.521183013916016, "logps/ref_chosen": -52.88372039794922, "logps/ref_rejected": -79.43692016601562, "logps/rejected": -88.297119140625, "loss": 1.0348, "margin_dpo/margin_mean": 3.222743034362793, "margin_dpo/margin_std": 4.919537544250488, "step": 370 }, { "epoch": 0.5608465608465608, "fcm_dpo/beta": 0.30383872985839844, "fcm_dpo/delta": -0.03940815478563309, "fcm_dpo/margin": 2.9095592498779297, "fcm_dpo/q_t": 0.34773844480514526, "grad_norm": 68.0054702758789, "learning_rate": 2.420680166254831e-07, "logits/chosen": 0.31215453147888184, "logits/rejected": 0.28234991431236267, "logps/chosen": -54.938941955566406, "logps/ref_chosen": -49.224212646484375, "logps/ref_rejected": -63.348472595214844, "logps/rejected": -71.97276306152344, "loss": 1.06, "margin_dpo/margin_mean": 2.909559488296509, "margin_dpo/margin_std": 4.57585334777832, "step": 371 }, { "epoch": 0.562358276643991, "fcm_dpo/beta": 0.31304311752319336, "fcm_dpo/delta": 0.1990874856710434, "fcm_dpo/margin": 2.10213565826416, "fcm_dpo/q_t": 0.3859516382217407, "grad_norm": 79.47772216796875, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 0.24154727160930634, "logits/rejected": 0.17342635989189148, "logps/chosen": -58.218475341796875, "logps/ref_chosen": -52.269554138183594, "logps/ref_rejected": -72.99522399902344, "logps/rejected": -81.0462875366211, "loss": 1.2735, "margin_dpo/margin_mean": 2.102135419845581, "margin_dpo/margin_std": 4.756669998168945, "step": 372 }, { "epoch": 0.563869992441421, "fcm_dpo/beta": 0.3137848973274231, "fcm_dpo/delta": 0.0420655757188797, "fcm_dpo/margin": 2.5857388973236084, "fcm_dpo/q_t": 0.36145395040512085, "grad_norm": 99.41732788085938, "learning_rate": 2.394254027623792e-07, "logits/chosen": 0.2519097924232483, "logits/rejected": 0.1964668333530426, "logps/chosen": -67.56515502929688, "logps/ref_chosen": -61.112998962402344, "logps/ref_rejected": -76.24851989746094, "logps/rejected": -85.28641510009766, "loss": 1.1351, "margin_dpo/margin_mean": 2.5857386589050293, "margin_dpo/margin_std": 4.668540000915527, "step": 373 }, { "epoch": 0.5653817082388511, "fcm_dpo/beta": 0.2995087504386902, "fcm_dpo/delta": -0.29449182748794556, "fcm_dpo/margin": 3.7042269706726074, "fcm_dpo/q_t": 0.2900841534137726, "grad_norm": 73.12067413330078, "learning_rate": 2.381045210440644e-07, "logits/chosen": 0.15426763892173767, "logits/rejected": 0.1400105059146881, "logps/chosen": -78.16365051269531, "logps/ref_chosen": -72.66920471191406, "logps/ref_rejected": -76.83158874511719, "logps/rejected": -86.03025817871094, "loss": 0.7882, "margin_dpo/margin_mean": 3.7042269706726074, "margin_dpo/margin_std": 3.675267457962036, "step": 374 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.30636027455329895, "fcm_dpo/delta": 0.21007570624351501, "fcm_dpo/margin": 2.1330573558807373, "fcm_dpo/q_t": 0.38012027740478516, "grad_norm": 88.26660919189453, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.2568998336791992, "logits/rejected": 0.21344329416751862, "logps/chosen": -63.1327018737793, "logps/ref_chosen": -57.68330383300781, "logps/ref_rejected": -79.34097290039062, "logps/rejected": -86.92343139648438, "loss": 1.1583, "margin_dpo/margin_mean": 2.133056402206421, "margin_dpo/margin_std": 4.111923694610596, "step": 375 }, { "epoch": 0.5684051398337112, "fcm_dpo/beta": 0.29211366176605225, "fcm_dpo/delta": -0.32011279463768005, "fcm_dpo/margin": 3.864445924758911, "fcm_dpo/q_t": 0.3048480749130249, "grad_norm": 64.11585998535156, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 0.2729426622390747, "logits/rejected": 0.2218175083398819, "logps/chosen": -57.24680709838867, "logps/ref_chosen": -51.674072265625, "logps/ref_rejected": -75.69713592529297, "logps/rejected": -85.13431549072266, "loss": 0.891, "margin_dpo/margin_mean": 3.8644466400146484, "margin_dpo/margin_std": 4.771137237548828, "step": 376 }, { "epoch": 0.5699168556311414, "fcm_dpo/beta": 0.2949063181877136, "fcm_dpo/delta": 0.06371963024139404, "fcm_dpo/margin": 2.6792593002319336, "fcm_dpo/q_t": 0.3503820598125458, "grad_norm": 61.93028259277344, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 0.1965014487504959, "logits/rejected": 0.17393234372138977, "logps/chosen": -52.405757904052734, "logps/ref_chosen": -46.17853546142578, "logps/ref_rejected": -57.756500244140625, "logps/rejected": -66.66297912597656, "loss": 1.0073, "margin_dpo/margin_mean": 2.6792590618133545, "margin_dpo/margin_std": 3.86149263381958, "step": 377 }, { "epoch": 0.5714285714285714, "fcm_dpo/beta": 0.30152004957199097, "fcm_dpo/delta": 0.09856449067592621, "fcm_dpo/margin": 2.506265163421631, "fcm_dpo/q_t": 0.3678427040576935, "grad_norm": 75.0130386352539, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 0.2718163728713989, "logits/rejected": 0.2303917407989502, "logps/chosen": -65.19330596923828, "logps/ref_chosen": -59.21887969970703, "logps/ref_rejected": -71.24818420410156, "logps/rejected": -79.7288818359375, "loss": 1.0841, "margin_dpo/margin_mean": 2.5062649250030518, "margin_dpo/margin_std": 4.203706741333008, "step": 378 }, { "epoch": 0.5729402872260015, "fcm_dpo/beta": 0.29446160793304443, "fcm_dpo/delta": -0.12026385217905045, "fcm_dpo/margin": 3.2557544708251953, "fcm_dpo/q_t": 0.3261454403400421, "grad_norm": 81.48579406738281, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 0.27448570728302, "logits/rejected": 0.22502100467681885, "logps/chosen": -81.78964233398438, "logps/ref_chosen": -76.31658935546875, "logps/ref_rejected": -104.26200103759766, "logps/rejected": -112.99081420898438, "loss": 0.9215, "margin_dpo/margin_mean": 3.2557549476623535, "margin_dpo/margin_std": 4.196225166320801, "step": 379 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.28408801555633545, "fcm_dpo/delta": -0.21791377663612366, "fcm_dpo/margin": 3.680061101913452, "fcm_dpo/q_t": 0.3053869307041168, "grad_norm": 64.25228118896484, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.2684813141822815, "logits/rejected": 0.23553693294525146, "logps/chosen": -66.39458465576172, "logps/ref_chosen": -61.283164978027344, "logps/ref_rejected": -72.38892364501953, "logps/rejected": -81.18040466308594, "loss": 0.9043, "margin_dpo/margin_mean": 3.6800613403320312, "margin_dpo/margin_std": 4.509557723999023, "step": 380 }, { "epoch": 0.5759637188208617, "fcm_dpo/beta": 0.30122846364974976, "fcm_dpo/delta": 0.3828117847442627, "fcm_dpo/margin": 1.577541470527649, "fcm_dpo/q_t": 0.4115419387817383, "grad_norm": 81.65371704101562, "learning_rate": 2.288697335747027e-07, "logits/chosen": 0.20629438757896423, "logits/rejected": 0.18379296362400055, "logps/chosen": -64.43083190917969, "logps/ref_chosen": -58.2139892578125, "logps/ref_rejected": -60.78669357299805, "logps/rejected": -68.5810775756836, "loss": 1.2574, "margin_dpo/margin_mean": 1.5775421857833862, "margin_dpo/margin_std": 3.8531484603881836, "step": 381 }, { "epoch": 0.5774754346182918, "fcm_dpo/beta": 0.3108755052089691, "fcm_dpo/delta": 0.0599745512008667, "fcm_dpo/margin": 2.53261137008667, "fcm_dpo/q_t": 0.3535318076610565, "grad_norm": 71.12667083740234, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 0.2725491523742676, "logits/rejected": 0.23602010309696198, "logps/chosen": -68.12150573730469, "logps/ref_chosen": -61.82532501220703, "logps/ref_rejected": -83.0452880859375, "logps/rejected": -91.87407684326172, "loss": 1.0033, "margin_dpo/margin_mean": 2.53261137008667, "margin_dpo/margin_std": 3.539917230606079, "step": 382 }, { "epoch": 0.5789871504157218, "fcm_dpo/beta": 0.29766133427619934, "fcm_dpo/delta": -0.1338372379541397, "fcm_dpo/margin": 3.2536959648132324, "fcm_dpo/q_t": 0.33455702662467957, "grad_norm": 88.27291107177734, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 0.17788708209991455, "logits/rejected": 0.1857486367225647, "logps/chosen": -86.0854721069336, "logps/ref_chosen": -80.56326293945312, "logps/ref_rejected": -74.62922668457031, "logps/rejected": -83.4051284790039, "loss": 0.9802, "margin_dpo/margin_mean": 3.2536959648132324, "margin_dpo/margin_std": 4.626866340637207, "step": 383 }, { "epoch": 0.5804988662131519, "fcm_dpo/beta": 0.2986605763435364, "fcm_dpo/delta": -0.017689041793346405, "fcm_dpo/margin": 2.899064540863037, "fcm_dpo/q_t": 0.3348064124584198, "grad_norm": 76.15990447998047, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 0.24556401371955872, "logits/rejected": 0.20763534307479858, "logps/chosen": -71.83543395996094, "logps/ref_chosen": -65.47514343261719, "logps/ref_rejected": -79.67378234863281, "logps/rejected": -88.93313598632812, "loss": 0.9493, "margin_dpo/margin_mean": 2.899064064025879, "margin_dpo/margin_std": 3.847461462020874, "step": 384 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.29407477378845215, "fcm_dpo/delta": -0.0921003669500351, "fcm_dpo/margin": 3.174560070037842, "fcm_dpo/q_t": 0.3196459710597992, "grad_norm": 72.75206756591797, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.18978914618492126, "logits/rejected": 0.15941289067268372, "logps/chosen": -71.92022705078125, "logps/ref_chosen": -66.0565185546875, "logps/ref_rejected": -86.68023681640625, "logps/rejected": -95.718505859375, "loss": 0.8738, "margin_dpo/margin_mean": 3.1745595932006836, "margin_dpo/margin_std": 3.579244375228882, "step": 385 }, { "epoch": 0.5835222978080121, "fcm_dpo/beta": 0.2947796583175659, "fcm_dpo/delta": 0.09643702208995819, "fcm_dpo/margin": 2.581514835357666, "fcm_dpo/q_t": 0.35556066036224365, "grad_norm": 74.4271011352539, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 0.18471872806549072, "logits/rejected": 0.13904157280921936, "logps/chosen": -81.9725341796875, "logps/ref_chosen": -75.6236572265625, "logps/ref_rejected": -92.62330627441406, "logps/rejected": -101.55369567871094, "loss": 1.0201, "margin_dpo/margin_mean": 2.5815157890319824, "margin_dpo/margin_std": 3.8327016830444336, "step": 386 }, { "epoch": 0.5850340136054422, "fcm_dpo/beta": 0.28877580165863037, "fcm_dpo/delta": -0.19857263565063477, "fcm_dpo/margin": 3.560486316680908, "fcm_dpo/q_t": 0.29805874824523926, "grad_norm": 58.27238082885742, "learning_rate": 2.209767714686924e-07, "logits/chosen": 0.25704652070999146, "logits/rejected": 0.18072374165058136, "logps/chosen": -52.98728942871094, "logps/ref_chosen": -47.22170639038086, "logps/ref_rejected": -87.338134765625, "logps/rejected": -96.6642074584961, "loss": 0.7939, "margin_dpo/margin_mean": 3.56048583984375, "margin_dpo/margin_std": 3.5000014305114746, "step": 387 }, { "epoch": 0.5865457294028723, "fcm_dpo/beta": 0.2872125208377838, "fcm_dpo/delta": 0.10189881175756454, "fcm_dpo/margin": 2.6280932426452637, "fcm_dpo/q_t": 0.3697548806667328, "grad_norm": 70.79524993896484, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 0.23693621158599854, "logits/rejected": 0.22322086989879608, "logps/chosen": -80.55561828613281, "logps/ref_chosen": -74.5794677734375, "logps/ref_rejected": -79.92558288574219, "logps/rejected": -88.52982330322266, "loss": 1.0817, "margin_dpo/margin_mean": 2.6280932426452637, "margin_dpo/margin_std": 4.3980913162231445, "step": 388 }, { "epoch": 0.5880574452003023, "fcm_dpo/beta": 0.28803473711013794, "fcm_dpo/delta": -0.08635501563549042, "fcm_dpo/margin": 3.223116874694824, "fcm_dpo/q_t": 0.32394009828567505, "grad_norm": 64.27388000488281, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 0.25568056106567383, "logits/rejected": 0.22184878587722778, "logps/chosen": -67.70536041259766, "logps/ref_chosen": -61.624366760253906, "logps/ref_rejected": -76.50978088378906, "logps/rejected": -85.81388854980469, "loss": 0.9533, "margin_dpo/margin_mean": 3.223116874694824, "margin_dpo/margin_std": 4.228720664978027, "step": 389 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.2855742573738098, "fcm_dpo/delta": -0.026870589703321457, "fcm_dpo/margin": 3.0623464584350586, "fcm_dpo/q_t": 0.3377484977245331, "grad_norm": 57.22404479980469, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.2583312392234802, "logits/rejected": 0.20761854946613312, "logps/chosen": -51.456993103027344, "logps/ref_chosen": -45.871864318847656, "logps/ref_rejected": -61.305999755859375, "logps/rejected": -69.95347595214844, "loss": 0.9365, "margin_dpo/margin_mean": 3.0623462200164795, "margin_dpo/margin_std": 3.994518995285034, "step": 390 }, { "epoch": 0.5910808767951625, "fcm_dpo/beta": 0.2780035734176636, "fcm_dpo/delta": -0.16346335411071777, "fcm_dpo/margin": 3.586273193359375, "fcm_dpo/q_t": 0.3249879479408264, "grad_norm": 62.95237731933594, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 0.25140058994293213, "logits/rejected": 0.1961970329284668, "logps/chosen": -64.07249450683594, "logps/ref_chosen": -58.18701171875, "logps/ref_rejected": -83.63442993164062, "logps/rejected": -93.10619354248047, "loss": 1.0028, "margin_dpo/margin_mean": 3.586272954940796, "margin_dpo/margin_std": 5.170510768890381, "step": 391 }, { "epoch": 0.5925925925925926, "fcm_dpo/beta": 0.26895231008529663, "fcm_dpo/delta": -0.2476973831653595, "fcm_dpo/margin": 3.9834837913513184, "fcm_dpo/q_t": 0.31376171112060547, "grad_norm": 75.80889892578125, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 0.3188512623310089, "logits/rejected": 0.2672809958457947, "logps/chosen": -74.83085632324219, "logps/ref_chosen": -69.7445297241211, "logps/ref_rejected": -94.05877685546875, "logps/rejected": -103.12858581542969, "loss": 0.9064, "margin_dpo/margin_mean": 3.9834845066070557, "margin_dpo/margin_std": 5.051880836486816, "step": 392 }, { "epoch": 0.5941043083900227, "fcm_dpo/beta": 0.25690969824790955, "fcm_dpo/delta": -0.06800644844770432, "fcm_dpo/margin": 3.5361289978027344, "fcm_dpo/q_t": 0.32431554794311523, "grad_norm": 57.294376373291016, "learning_rate": 2.131130332936195e-07, "logits/chosen": 0.2550227642059326, "logits/rejected": 0.22178462147712708, "logps/chosen": -58.557674407958984, "logps/ref_chosen": -52.33489990234375, "logps/ref_rejected": -74.33809661865234, "logps/rejected": -84.09700012207031, "loss": 0.8692, "margin_dpo/margin_mean": 3.5361287593841553, "margin_dpo/margin_std": 3.8214592933654785, "step": 393 }, { "epoch": 0.5956160241874527, "fcm_dpo/beta": 0.2616024911403656, "fcm_dpo/delta": 0.02560308948159218, "fcm_dpo/margin": 3.156092643737793, "fcm_dpo/q_t": 0.33914124965667725, "grad_norm": 60.21733856201172, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 0.2926919460296631, "logits/rejected": 0.2641337513923645, "logps/chosen": -66.64264678955078, "logps/ref_chosen": -60.6761360168457, "logps/ref_rejected": -71.36074829101562, "logps/rejected": -80.48336029052734, "loss": 0.9327, "margin_dpo/margin_mean": 3.1560916900634766, "margin_dpo/margin_std": 3.9220170974731445, "step": 394 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.2669936418533325, "fcm_dpo/delta": 0.11683942377567291, "fcm_dpo/margin": 2.7706220149993896, "fcm_dpo/q_t": 0.35929474234580994, "grad_norm": 66.62834167480469, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.20315015316009521, "logits/rejected": 0.13946810364723206, "logps/chosen": -56.223976135253906, "logps/ref_chosen": -50.60432434082031, "logps/ref_rejected": -77.08731079101562, "logps/rejected": -85.47758483886719, "loss": 1.0212, "margin_dpo/margin_mean": 2.7706220149993896, "margin_dpo/margin_std": 4.05787992477417, "step": 395 }, { "epoch": 0.5986394557823129, "fcm_dpo/beta": 0.2582937180995941, "fcm_dpo/delta": -0.08063206076622009, "fcm_dpo/margin": 3.540877103805542, "fcm_dpo/q_t": 0.32276594638824463, "grad_norm": 57.03807830810547, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 0.23377332091331482, "logits/rejected": 0.15629377961158752, "logps/chosen": -56.608436584472656, "logps/ref_chosen": -51.35961151123047, "logps/ref_rejected": -79.89360046386719, "logps/rejected": -88.68330383300781, "loss": 0.873, "margin_dpo/margin_mean": 3.540876865386963, "margin_dpo/margin_std": 3.7023258209228516, "step": 396 }, { "epoch": 0.600151171579743, "fcm_dpo/beta": 0.27533477544784546, "fcm_dpo/delta": 0.29201841354370117, "fcm_dpo/margin": 2.0799665451049805, "fcm_dpo/q_t": 0.38529521226882935, "grad_norm": 87.17749786376953, "learning_rate": 2.078906883274924e-07, "logits/chosen": 0.19908317923545837, "logits/rejected": 0.1614154875278473, "logps/chosen": -72.62335205078125, "logps/ref_chosen": -66.45622253417969, "logps/ref_rejected": -85.74736785888672, "logps/rejected": -93.99446105957031, "loss": 1.2512, "margin_dpo/margin_mean": 2.0799667835235596, "margin_dpo/margin_std": 4.818110942840576, "step": 397 }, { "epoch": 0.6016628873771731, "fcm_dpo/beta": 0.2720872461795807, "fcm_dpo/delta": -0.113968625664711, "fcm_dpo/margin": 3.4996390342712402, "fcm_dpo/q_t": 0.32238367199897766, "grad_norm": 56.072662353515625, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.22736221551895142, "logits/rejected": 0.1738460659980774, "logps/chosen": -54.861907958984375, "logps/ref_chosen": -49.244239807128906, "logps/ref_rejected": -75.18949127197266, "logps/rejected": -84.30679321289062, "loss": 0.8728, "margin_dpo/margin_mean": 3.4996397495269775, "margin_dpo/margin_std": 4.072452068328857, "step": 398 }, { "epoch": 0.6031746031746031, "fcm_dpo/beta": 0.26396697759628296, "fcm_dpo/delta": -0.1899876594543457, "fcm_dpo/margin": 3.865616798400879, "fcm_dpo/q_t": 0.32054808735847473, "grad_norm": 63.06287384033203, "learning_rate": 2.052864371672457e-07, "logits/chosen": 0.18875831365585327, "logits/rejected": 0.08430158346891403, "logps/chosen": -74.19822692871094, "logps/ref_chosen": -68.30679321289062, "logps/ref_rejected": -113.2708511352539, "logps/rejected": -123.02790069580078, "loss": 0.896, "margin_dpo/margin_mean": 3.8656165599823, "margin_dpo/margin_std": 4.880072116851807, "step": 399 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.263761043548584, "fcm_dpo/delta": 0.13915222883224487, "fcm_dpo/margin": 2.7275257110595703, "fcm_dpo/q_t": 0.368988037109375, "grad_norm": 70.60572052001953, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.2591952085494995, "logits/rejected": 0.2135709524154663, "logps/chosen": -78.38478088378906, "logps/ref_chosen": -71.62649536132812, "logps/ref_rejected": -90.98765563964844, "logps/rejected": -100.47346496582031, "loss": 1.0301, "margin_dpo/margin_mean": 2.727525472640991, "margin_dpo/margin_std": 4.12076997756958, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.2689932584762573, "eval_logits/chosen": 0.2485727071762085, "eval_logits/rejected": 0.20795086026191711, "eval_logps/chosen": -80.81339263916016, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -88.20954895019531, "eval_loss": 0.5461485385894775, "eval_margin_dpo/margin_mean": 2.706634283065796, "eval_margin_dpo/margin_std": 4.596866130828857, "eval_runtime": 37.9957, "eval_samples_per_second": 60.612, "eval_steps_per_second": 1.895, "step": 400 }, { "epoch": 0.6061980347694633, "fcm_dpo/beta": 0.2562599778175354, "fcm_dpo/delta": -0.3326801657676697, "fcm_dpo/margin": 4.466838836669922, "fcm_dpo/q_t": 0.29139143228530884, "grad_norm": 48.446571350097656, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 0.2457781285047531, "logits/rejected": 0.1672484576702118, "logps/chosen": -59.1573486328125, "logps/ref_chosen": -53.72495651245117, "logps/ref_rejected": -75.06304931640625, "logps/rejected": -84.9622802734375, "loss": 0.7902, "margin_dpo/margin_mean": 4.466838836669922, "margin_dpo/margin_std": 4.662143707275391, "step": 401 }, { "epoch": 0.6077097505668935, "fcm_dpo/beta": 0.2497277706861496, "fcm_dpo/delta": -0.013817459344863892, "fcm_dpo/margin": 3.4514431953430176, "fcm_dpo/q_t": 0.33873581886291504, "grad_norm": 60.394657135009766, "learning_rate": 2.013895317751323e-07, "logits/chosen": 0.2631894648075104, "logits/rejected": 0.22899389266967773, "logps/chosen": -67.79601287841797, "logps/ref_chosen": -61.873931884765625, "logps/ref_rejected": -66.15198516845703, "logps/rejected": -75.5255126953125, "loss": 0.9592, "margin_dpo/margin_mean": 3.4514427185058594, "margin_dpo/margin_std": 4.586933135986328, "step": 402 }, { "epoch": 0.6092214663643235, "fcm_dpo/beta": 0.24135196208953857, "fcm_dpo/delta": -0.28697896003723145, "fcm_dpo/margin": 4.581813812255859, "fcm_dpo/q_t": 0.2987240254878998, "grad_norm": 49.79636764526367, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 0.3315698504447937, "logits/rejected": 0.2662360370159149, "logps/chosen": -57.209228515625, "logps/ref_chosen": -51.321502685546875, "logps/ref_rejected": -86.54010772705078, "logps/rejected": -97.0096435546875, "loss": 0.8399, "margin_dpo/margin_mean": 4.581814289093018, "margin_dpo/margin_std": 5.112116813659668, "step": 403 }, { "epoch": 0.6107331821617535, "fcm_dpo/beta": 0.2342020571231842, "fcm_dpo/delta": -0.006323143839836121, "fcm_dpo/margin": 3.6440062522888184, "fcm_dpo/q_t": 0.33462953567504883, "grad_norm": 57.89472198486328, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 0.20489439368247986, "logits/rejected": 0.1360418051481247, "logps/chosen": -67.75517272949219, "logps/ref_chosen": -62.26288604736328, "logps/ref_rejected": -95.19029998779297, "logps/rejected": -104.32658386230469, "loss": 0.9366, "margin_dpo/margin_mean": 3.6440062522888184, "margin_dpo/margin_std": 4.503121852874756, "step": 404 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.23300763964653015, "fcm_dpo/delta": 0.038883745670318604, "fcm_dpo/margin": 3.459103584289551, "fcm_dpo/q_t": 0.3483220934867859, "grad_norm": 47.8633918762207, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.29452911019325256, "logits/rejected": 0.2554025650024414, "logps/chosen": -56.493064880371094, "logps/ref_chosen": -50.5843391418457, "logps/ref_rejected": -65.43156433105469, "logps/rejected": -74.79939270019531, "loss": 0.9609, "margin_dpo/margin_mean": 3.459103584289551, "margin_dpo/margin_std": 4.2689619064331055, "step": 405 }, { "epoch": 0.6137566137566137, "fcm_dpo/beta": 0.23268848657608032, "fcm_dpo/delta": -0.11276289820671082, "fcm_dpo/margin": 4.082479000091553, "fcm_dpo/q_t": 0.31986764073371887, "grad_norm": 47.064353942871094, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 0.2542474865913391, "logits/rejected": 0.1390293538570404, "logps/chosen": -54.7130126953125, "logps/ref_chosen": -48.99560546875, "logps/ref_rejected": -92.47774505615234, "logps/rejected": -102.27763366699219, "loss": 0.8894, "margin_dpo/margin_mean": 4.082479476928711, "margin_dpo/margin_std": 4.736398220062256, "step": 406 }, { "epoch": 0.6152683295540439, "fcm_dpo/beta": 0.2396630048751831, "fcm_dpo/delta": 0.17042268812656403, "fcm_dpo/margin": 2.883038282394409, "fcm_dpo/q_t": 0.36978358030319214, "grad_norm": 79.12983703613281, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 0.18423596024513245, "logits/rejected": 0.12922564148902893, "logps/chosen": -96.18956756591797, "logps/ref_chosen": -89.40056610107422, "logps/ref_rejected": -99.28775024414062, "logps/rejected": -108.95979309082031, "loss": 1.0471, "margin_dpo/margin_mean": 2.883037567138672, "margin_dpo/margin_std": 4.590210914611816, "step": 407 }, { "epoch": 0.6167800453514739, "fcm_dpo/beta": 0.23743906617164612, "fcm_dpo/delta": -0.19000959396362305, "fcm_dpo/margin": 4.296039581298828, "fcm_dpo/q_t": 0.3071790337562561, "grad_norm": 45.11009216308594, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 0.24663150310516357, "logits/rejected": 0.18223854899406433, "logps/chosen": -60.074920654296875, "logps/ref_chosen": -54.70391845703125, "logps/ref_rejected": -73.98648834228516, "logps/rejected": -83.65353393554688, "loss": 0.8398, "margin_dpo/margin_mean": 4.296039581298828, "margin_dpo/margin_std": 4.550353527069092, "step": 408 }, { "epoch": 0.618291761148904, "fcm_dpo/beta": 0.24273526668548584, "fcm_dpo/delta": 0.29451048374176025, "fcm_dpo/margin": 2.3577988147735596, "fcm_dpo/q_t": 0.3989053964614868, "grad_norm": 62.7466926574707, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 0.24126318097114563, "logits/rejected": 0.24055859446525574, "logps/chosen": -68.67960357666016, "logps/ref_chosen": -62.11822509765625, "logps/ref_rejected": -61.933509826660156, "logps/rejected": -70.85269165039062, "loss": 1.1796, "margin_dpo/margin_mean": 2.3577983379364014, "margin_dpo/margin_std": 4.950777053833008, "step": 409 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.24500462412834167, "fcm_dpo/delta": 0.04858040064573288, "fcm_dpo/margin": 3.2761478424072266, "fcm_dpo/q_t": 0.3398808240890503, "grad_norm": 59.190223693847656, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.2868541479110718, "logits/rejected": 0.2624509632587433, "logps/chosen": -67.55817413330078, "logps/ref_chosen": -61.80266189575195, "logps/ref_rejected": -76.60002136230469, "logps/rejected": -85.63168334960938, "loss": 0.9129, "margin_dpo/margin_mean": 3.2761478424072266, "margin_dpo/margin_std": 3.6564488410949707, "step": 410 }, { "epoch": 0.6213151927437641, "fcm_dpo/beta": 0.2463398426771164, "fcm_dpo/delta": -0.03521712124347687, "fcm_dpo/margin": 3.5785136222839355, "fcm_dpo/q_t": 0.34663787484169006, "grad_norm": 66.22770690917969, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 0.2386634349822998, "logits/rejected": 0.18876875936985016, "logps/chosen": -61.16634750366211, "logps/ref_chosen": -54.44539260864258, "logps/ref_rejected": -74.5650863647461, "logps/rejected": -84.86454772949219, "loss": 1.0363, "margin_dpo/margin_mean": 3.5785140991210938, "margin_dpo/margin_std": 5.528660774230957, "step": 411 }, { "epoch": 0.6228269085411943, "fcm_dpo/beta": 0.2440623939037323, "fcm_dpo/delta": -0.02338419109582901, "fcm_dpo/margin": 3.56341814994812, "fcm_dpo/q_t": 0.3446499705314636, "grad_norm": 57.37812042236328, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 0.2372305691242218, "logits/rejected": 0.1809903085231781, "logps/chosen": -62.51192092895508, "logps/ref_chosen": -55.248085021972656, "logps/ref_rejected": -68.96623229980469, "logps/rejected": -79.7934799194336, "loss": 0.9592, "margin_dpo/margin_mean": 3.56341814994812, "margin_dpo/margin_std": 4.865736961364746, "step": 412 }, { "epoch": 0.6243386243386243, "fcm_dpo/beta": 0.24663466215133667, "fcm_dpo/delta": 0.031184524297714233, "fcm_dpo/margin": 3.33031964302063, "fcm_dpo/q_t": 0.3551603853702545, "grad_norm": 66.90776062011719, "learning_rate": 1.872130032047302e-07, "logits/chosen": 0.14157438278198242, "logits/rejected": 0.11581124365329742, "logps/chosen": -75.62931060791016, "logps/ref_chosen": -68.72074890136719, "logps/ref_rejected": -78.76539611816406, "logps/rejected": -89.00428771972656, "loss": 1.0651, "margin_dpo/margin_mean": 3.330319404602051, "margin_dpo/margin_std": 5.480165958404541, "step": 413 }, { "epoch": 0.6258503401360545, "fcm_dpo/beta": 0.2433382272720337, "fcm_dpo/delta": -0.0841752216219902, "fcm_dpo/margin": 3.804185152053833, "fcm_dpo/q_t": 0.33140766620635986, "grad_norm": 54.20486068725586, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 0.24406284093856812, "logits/rejected": 0.1906662881374359, "logps/chosen": -60.66529083251953, "logps/ref_chosen": -54.138214111328125, "logps/ref_rejected": -74.65741729736328, "logps/rejected": -84.98867797851562, "loss": 0.9494, "margin_dpo/margin_mean": 3.8041844367980957, "margin_dpo/margin_std": 5.061384201049805, "step": 414 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.24203211069107056, "fcm_dpo/delta": -0.014722846448421478, "fcm_dpo/margin": 3.5665831565856934, "fcm_dpo/q_t": 0.3441469073295593, "grad_norm": 58.89690017700195, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.22098597884178162, "logits/rejected": 0.20324364304542542, "logps/chosen": -62.7205810546875, "logps/ref_chosen": -55.91856002807617, "logps/ref_rejected": -61.747703552246094, "logps/rejected": -72.1163101196289, "loss": 1.026, "margin_dpo/margin_mean": 3.5665831565856934, "margin_dpo/margin_std": 5.469634056091309, "step": 415 }, { "epoch": 0.6288737717309146, "fcm_dpo/beta": 0.25225508213043213, "fcm_dpo/delta": 0.11526554822921753, "fcm_dpo/margin": 2.9003496170043945, "fcm_dpo/q_t": 0.3673805892467499, "grad_norm": 69.7919921875, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 0.22921934723854065, "logits/rejected": 0.18885570764541626, "logps/chosen": -61.164649963378906, "logps/ref_chosen": -54.72308349609375, "logps/ref_rejected": -69.17388916015625, "logps/rejected": -78.51580810546875, "loss": 1.102, "margin_dpo/margin_mean": 2.9003496170043945, "margin_dpo/margin_std": 4.846092224121094, "step": 416 }, { "epoch": 0.6303854875283447, "fcm_dpo/beta": 0.24670086801052094, "fcm_dpo/delta": -0.15245218575000763, "fcm_dpo/margin": 3.9919228553771973, "fcm_dpo/q_t": 0.31675058603286743, "grad_norm": 66.77470397949219, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 0.30952125787734985, "logits/rejected": 0.2565500736236572, "logps/chosen": -64.06097412109375, "logps/ref_chosen": -56.791259765625, "logps/ref_rejected": -68.7791748046875, "logps/rejected": -80.04081726074219, "loss": 0.936, "margin_dpo/margin_mean": 3.9919233322143555, "margin_dpo/margin_std": 5.018634796142578, "step": 417 }, { "epoch": 0.6318972033257747, "fcm_dpo/beta": 0.2470259666442871, "fcm_dpo/delta": 0.2453382909297943, "fcm_dpo/margin": 2.509967803955078, "fcm_dpo/q_t": 0.39756616950035095, "grad_norm": 81.32603454589844, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 0.2638748586177826, "logits/rejected": 0.2686142921447754, "logps/chosen": -77.12152099609375, "logps/ref_chosen": -69.10798645019531, "logps/ref_rejected": -75.09132385253906, "logps/rejected": -85.61483764648438, "loss": 1.2226, "margin_dpo/margin_mean": 2.5099682807922363, "margin_dpo/margin_std": 5.600542068481445, "step": 418 }, { "epoch": 0.6334089191232048, "fcm_dpo/beta": 0.24269002676010132, "fcm_dpo/delta": -0.21217991411685944, "fcm_dpo/margin": 4.270475387573242, "fcm_dpo/q_t": 0.31270739436149597, "grad_norm": 52.399044036865234, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 0.15831753611564636, "logits/rejected": 0.11195243149995804, "logps/chosen": -64.01142883300781, "logps/ref_chosen": -58.1717643737793, "logps/ref_rejected": -71.67066955566406, "logps/rejected": -81.78080749511719, "loss": 0.875, "margin_dpo/margin_mean": 4.270475387573242, "margin_dpo/margin_std": 5.095606803894043, "step": 419 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.24959485232830048, "fcm_dpo/delta": 0.24828967452049255, "fcm_dpo/margin": 2.474740505218506, "fcm_dpo/q_t": 0.3962467908859253, "grad_norm": 71.6791763305664, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.21745803952217102, "logits/rejected": 0.1744053065776825, "logps/chosen": -65.115966796875, "logps/ref_chosen": -57.05351257324219, "logps/ref_rejected": -62.670982360839844, "logps/rejected": -73.20817565917969, "loss": 1.2861, "margin_dpo/margin_mean": 2.4747402667999268, "margin_dpo/margin_std": 5.943141937255859, "step": 420 }, { "epoch": 0.636432350718065, "fcm_dpo/beta": 0.2592123746871948, "fcm_dpo/delta": 0.14652492105960846, "fcm_dpo/margin": 2.7551355361938477, "fcm_dpo/q_t": 0.38477665185928345, "grad_norm": 71.33401489257812, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 0.2027522772550583, "logits/rejected": 0.15381047129631042, "logps/chosen": -65.26667022705078, "logps/ref_chosen": -57.32324981689453, "logps/ref_rejected": -75.33782958984375, "logps/rejected": -86.03638458251953, "loss": 1.2791, "margin_dpo/margin_mean": 2.7551350593566895, "margin_dpo/margin_std": 6.1498517990112305, "step": 421 }, { "epoch": 0.6379440665154951, "fcm_dpo/beta": 0.2613186538219452, "fcm_dpo/delta": -0.06008271127939224, "fcm_dpo/margin": 3.4610629081726074, "fcm_dpo/q_t": 0.34318864345550537, "grad_norm": 69.64408874511719, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 0.24464266002178192, "logits/rejected": 0.212172269821167, "logps/chosen": -73.77204895019531, "logps/ref_chosen": -67.05757141113281, "logps/ref_rejected": -72.12803649902344, "logps/rejected": -82.3035659790039, "loss": 1.0071, "margin_dpo/margin_mean": 3.4610633850097656, "margin_dpo/margin_std": 5.229635715484619, "step": 422 }, { "epoch": 0.6394557823129252, "fcm_dpo/beta": 0.250480979681015, "fcm_dpo/delta": -0.19729526340961456, "fcm_dpo/margin": 4.0947794914245605, "fcm_dpo/q_t": 0.3230004906654358, "grad_norm": 55.7142448425293, "learning_rate": 1.745083602306071e-07, "logits/chosen": 0.27640843391418457, "logits/rejected": 0.2188403606414795, "logps/chosen": -60.870277404785156, "logps/ref_chosen": -54.06167221069336, "logps/ref_rejected": -76.64092254638672, "logps/rejected": -87.5443115234375, "loss": 0.93, "margin_dpo/margin_mean": 4.0947794914245605, "margin_dpo/margin_std": 5.4799394607543945, "step": 423 }, { "epoch": 0.6409674981103552, "fcm_dpo/beta": 0.2431943267583847, "fcm_dpo/delta": -0.15173768997192383, "fcm_dpo/margin": 4.056219100952148, "fcm_dpo/q_t": 0.32653218507766724, "grad_norm": 60.763397216796875, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 0.17878620326519012, "logits/rejected": 0.14686840772628784, "logps/chosen": -60.15678024291992, "logps/ref_chosen": -53.60887145996094, "logps/ref_rejected": -79.2139892578125, "logps/rejected": -89.81812286376953, "loss": 0.9293, "margin_dpo/margin_mean": 4.056219577789307, "margin_dpo/margin_std": 5.32462739944458, "step": 424 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.24080964922904968, "fcm_dpo/delta": -0.02124994806945324, "fcm_dpo/margin": 3.610257863998413, "fcm_dpo/q_t": 0.34036120772361755, "grad_norm": 53.22382736206055, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.2400674670934677, "logits/rejected": 0.21408450603485107, "logps/chosen": -65.2353515625, "logps/ref_chosen": -58.41468048095703, "logps/ref_rejected": -66.59054565429688, "logps/rejected": -77.02146911621094, "loss": 0.9689, "margin_dpo/margin_mean": 3.610257625579834, "margin_dpo/margin_std": 4.985018730163574, "step": 425 }, { "epoch": 0.6439909297052154, "fcm_dpo/beta": 0.2545412480831146, "fcm_dpo/delta": 0.3679015040397644, "fcm_dpo/margin": 1.9646282196044922, "fcm_dpo/q_t": 0.40837377309799194, "grad_norm": 78.42141723632812, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 0.1992775797843933, "logits/rejected": 0.20033419132232666, "logps/chosen": -79.82450866699219, "logps/ref_chosen": -71.70822143554688, "logps/ref_rejected": -73.57725524902344, "logps/rejected": -83.6581802368164, "loss": 1.2953, "margin_dpo/margin_mean": 1.964627981185913, "margin_dpo/margin_std": 5.136331558227539, "step": 426 }, { "epoch": 0.6455026455026455, "fcm_dpo/beta": 0.2586546838283539, "fcm_dpo/delta": -0.023448972031474113, "fcm_dpo/margin": 3.368882656097412, "fcm_dpo/q_t": 0.3508281707763672, "grad_norm": 60.90048599243164, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 0.29382023215293884, "logits/rejected": 0.24823549389839172, "logps/chosen": -66.44595336914062, "logps/ref_chosen": -58.64276885986328, "logps/ref_rejected": -86.25437927246094, "logps/rejected": -97.42644500732422, "loss": 1.0582, "margin_dpo/margin_mean": 3.368882894515991, "margin_dpo/margin_std": 5.518677234649658, "step": 427 }, { "epoch": 0.6470143613000756, "fcm_dpo/beta": 0.25078386068344116, "fcm_dpo/delta": -0.22688086330890656, "fcm_dpo/margin": 4.200922966003418, "fcm_dpo/q_t": 0.3197481632232666, "grad_norm": 63.953575134277344, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 0.22337879240512848, "logits/rejected": 0.17726576328277588, "logps/chosen": -73.97862243652344, "logps/ref_chosen": -66.5960464477539, "logps/ref_rejected": -82.3941650390625, "logps/rejected": -93.9776611328125, "loss": 0.9094, "margin_dpo/margin_mean": 4.200922966003418, "margin_dpo/margin_std": 5.368341445922852, "step": 428 }, { "epoch": 0.6485260770975056, "fcm_dpo/beta": 0.2456437349319458, "fcm_dpo/delta": -0.059685856103897095, "fcm_dpo/margin": 3.6800642013549805, "fcm_dpo/q_t": 0.33456557989120483, "grad_norm": 61.284217834472656, "learning_rate": 1.669846604344412e-07, "logits/chosen": 0.1801915019750595, "logits/rejected": 0.1813092678785324, "logps/chosen": -64.23383331298828, "logps/ref_chosen": -57.00970458984375, "logps/ref_rejected": -59.86549377441406, "logps/rejected": -70.76969909667969, "loss": 0.9927, "margin_dpo/margin_mean": 3.6800642013549805, "margin_dpo/margin_std": 5.228022575378418, "step": 429 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.23953106999397278, "fcm_dpo/delta": -0.12144466489553452, "fcm_dpo/margin": 4.007453918457031, "fcm_dpo/q_t": 0.3295876979827881, "grad_norm": 57.089744567871094, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.15598759055137634, "logits/rejected": 0.14352598786354065, "logps/chosen": -66.40084838867188, "logps/ref_chosen": -59.563194274902344, "logps/ref_rejected": -70.52289581298828, "logps/rejected": -81.36799621582031, "loss": 0.9546, "margin_dpo/margin_mean": 4.007453918457031, "margin_dpo/margin_std": 5.4395599365234375, "step": 430 }, { "epoch": 0.6515495086923658, "fcm_dpo/beta": 0.23662912845611572, "fcm_dpo/delta": 0.004378672689199448, "fcm_dpo/margin": 3.5732975006103516, "fcm_dpo/q_t": 0.35064366459846497, "grad_norm": 56.98310089111328, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 0.22091971337795258, "logits/rejected": 0.18462924659252167, "logps/chosen": -56.664363861083984, "logps/ref_chosen": -50.20032501220703, "logps/ref_rejected": -77.81680297851562, "logps/rejected": -87.85414123535156, "loss": 0.9811, "margin_dpo/margin_mean": 3.5732975006103516, "margin_dpo/margin_std": 5.193387031555176, "step": 431 }, { "epoch": 0.6530612244897959, "fcm_dpo/beta": 0.23365934193134308, "fcm_dpo/delta": -0.056146346032619476, "fcm_dpo/margin": 3.847005844116211, "fcm_dpo/q_t": 0.3386353850364685, "grad_norm": 58.32561111450195, "learning_rate": 1.632536862810844e-07, "logits/chosen": 0.24721886217594147, "logits/rejected": 0.2073393613100052, "logps/chosen": -68.7791976928711, "logps/ref_chosen": -61.662757873535156, "logps/ref_rejected": -83.94496154785156, "logps/rejected": -94.90840911865234, "loss": 0.9881, "margin_dpo/margin_mean": 3.8470053672790527, "margin_dpo/margin_std": 5.408696174621582, "step": 432 }, { "epoch": 0.654572940287226, "fcm_dpo/beta": 0.22784093022346497, "fcm_dpo/delta": -0.2313033938407898, "fcm_dpo/margin": 4.640135288238525, "fcm_dpo/q_t": 0.3186371326446533, "grad_norm": 50.4363899230957, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 0.3478536605834961, "logits/rejected": 0.33294785022735596, "logps/chosen": -71.23434448242188, "logps/ref_chosen": -63.72917938232422, "logps/ref_rejected": -65.8391342163086, "logps/rejected": -77.98443603515625, "loss": 0.9176, "margin_dpo/margin_mean": 4.640135288238525, "margin_dpo/margin_std": 5.971785545349121, "step": 433 }, { "epoch": 0.656084656084656, "fcm_dpo/beta": 0.21730631589889526, "fcm_dpo/delta": -0.13528761267662048, "fcm_dpo/margin": 4.452031135559082, "fcm_dpo/q_t": 0.3225432336330414, "grad_norm": 46.248233795166016, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 0.30574116110801697, "logits/rejected": 0.2512272894382477, "logps/chosen": -54.50779724121094, "logps/ref_chosen": -47.97331619262695, "logps/ref_rejected": -72.51132202148438, "logps/rejected": -83.49783325195312, "loss": 0.9787, "margin_dpo/margin_mean": 4.452031135559082, "margin_dpo/margin_std": 6.084223747253418, "step": 434 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.22146287560462952, "fcm_dpo/delta": 0.0298960879445076, "fcm_dpo/margin": 3.7065372467041016, "fcm_dpo/q_t": 0.3446485996246338, "grad_norm": 52.24251937866211, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.3137568235397339, "logits/rejected": 0.2790324091911316, "logps/chosen": -64.93702697753906, "logps/ref_chosen": -57.06024932861328, "logps/ref_rejected": -71.69146728515625, "logps/rejected": -83.2747802734375, "loss": 1.0002, "margin_dpo/margin_mean": 3.7065377235412598, "margin_dpo/margin_std": 5.282249927520752, "step": 435 }, { "epoch": 0.6591080876795162, "fcm_dpo/beta": 0.2214878499507904, "fcm_dpo/delta": 0.02604525163769722, "fcm_dpo/margin": 3.728400707244873, "fcm_dpo/q_t": 0.34906071424484253, "grad_norm": 52.046791076660156, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 0.2411668300628662, "logits/rejected": 0.19079329073429108, "logps/chosen": -63.56175231933594, "logps/ref_chosen": -56.158050537109375, "logps/ref_rejected": -67.63787841796875, "logps/rejected": -78.76997375488281, "loss": 1.0039, "margin_dpo/margin_mean": 3.728400945663452, "margin_dpo/margin_std": 5.423241138458252, "step": 436 }, { "epoch": 0.6606198034769464, "fcm_dpo/beta": 0.2188284695148468, "fcm_dpo/delta": 0.0661238431930542, "fcm_dpo/margin": 3.576071262359619, "fcm_dpo/q_t": 0.3653622269630432, "grad_norm": 58.89640426635742, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 0.23456737399101257, "logits/rejected": 0.12770959734916687, "logps/chosen": -65.12676239013672, "logps/ref_chosen": -56.98578643798828, "logps/ref_rejected": -85.61524963378906, "logps/rejected": -97.33230590820312, "loss": 1.1308, "margin_dpo/margin_mean": 3.5760717391967773, "margin_dpo/margin_std": 6.206454277038574, "step": 437 }, { "epoch": 0.6621315192743764, "fcm_dpo/beta": 0.22138898074626923, "fcm_dpo/delta": -0.13801656663417816, "fcm_dpo/margin": 4.400524139404297, "fcm_dpo/q_t": 0.32329899072647095, "grad_norm": 40.44050979614258, "learning_rate": 1.558581854913253e-07, "logits/chosen": 0.2807733416557312, "logits/rejected": 0.2254466414451599, "logps/chosen": -48.16633987426758, "logps/ref_chosen": -41.27777862548828, "logps/ref_rejected": -65.33840942382812, "logps/rejected": -76.62749481201172, "loss": 0.923, "margin_dpo/margin_mean": 4.400524616241455, "margin_dpo/margin_std": 5.516866683959961, "step": 438 }, { "epoch": 0.6636432350718064, "fcm_dpo/beta": 0.2201937735080719, "fcm_dpo/delta": -0.016198471188545227, "fcm_dpo/margin": 3.9138498306274414, "fcm_dpo/q_t": 0.335410475730896, "grad_norm": 62.81761169433594, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 0.20101311802864075, "logits/rejected": 0.1731702983379364, "logps/chosen": -88.3592529296875, "logps/ref_chosen": -81.41764831542969, "logps/ref_rejected": -94.72309875488281, "logps/rejected": -105.57853698730469, "loss": 0.9611, "margin_dpo/margin_mean": 3.913849353790283, "margin_dpo/margin_std": 5.098721504211426, "step": 439 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.21266132593154907, "fcm_dpo/delta": -0.17807799577713013, "fcm_dpo/margin": 4.751244068145752, "fcm_dpo/q_t": 0.3173283338546753, "grad_norm": 46.422149658203125, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.19558237493038177, "logits/rejected": 0.11220754683017731, "logps/chosen": -49.08099365234375, "logps/ref_chosen": -42.538185119628906, "logps/ref_rejected": -69.78813934326172, "logps/rejected": -81.08218383789062, "loss": 0.8729, "margin_dpo/margin_mean": 4.751243591308594, "margin_dpo/margin_std": 5.614121437072754, "step": 440 }, { "epoch": 0.6666666666666666, "fcm_dpo/beta": 0.20615889132022858, "fcm_dpo/delta": -0.07547109574079514, "fcm_dpo/margin": 4.451215744018555, "fcm_dpo/q_t": 0.32043367624282837, "grad_norm": 47.443084716796875, "learning_rate": 1.521955206326976e-07, "logits/chosen": 0.2026526778936386, "logits/rejected": 0.1278238445520401, "logps/chosen": -64.02963256835938, "logps/ref_chosen": -57.593223571777344, "logps/ref_rejected": -84.82878875732422, "logps/rejected": -95.71641540527344, "loss": 0.8561, "margin_dpo/margin_mean": 4.451216220855713, "margin_dpo/margin_std": 4.816239833831787, "step": 441 }, { "epoch": 0.6681783824640968, "fcm_dpo/beta": 0.20843441784381866, "fcm_dpo/delta": 0.0724664106965065, "fcm_dpo/margin": 3.7578186988830566, "fcm_dpo/q_t": 0.35375648736953735, "grad_norm": 60.085304260253906, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 0.29126518964767456, "logits/rejected": 0.25034332275390625, "logps/chosen": -75.44874572753906, "logps/ref_chosen": -67.46121978759766, "logps/ref_rejected": -89.0693588256836, "logps/rejected": -100.81471252441406, "loss": 1.0015, "margin_dpo/margin_mean": 3.7578182220458984, "margin_dpo/margin_std": 5.468325614929199, "step": 442 }, { "epoch": 0.6696900982615268, "fcm_dpo/beta": 0.20462161302566528, "fcm_dpo/delta": -0.17430701851844788, "fcm_dpo/margin": 4.92136287689209, "fcm_dpo/q_t": 0.31620925664901733, "grad_norm": 45.6391716003418, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 0.2725718021392822, "logits/rejected": 0.2120482325553894, "logps/chosen": -61.66060256958008, "logps/ref_chosen": -54.79610061645508, "logps/ref_rejected": -77.80781555175781, "logps/rejected": -89.59368896484375, "loss": 0.9136, "margin_dpo/margin_mean": 4.921362400054932, "margin_dpo/margin_std": 6.193216323852539, "step": 443 }, { "epoch": 0.671201814058957, "fcm_dpo/beta": 0.21466189622879028, "fcm_dpo/delta": 0.45215946435928345, "fcm_dpo/margin": 1.9520701169967651, "fcm_dpo/q_t": 0.42362260818481445, "grad_norm": 64.94635009765625, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 0.2781602740287781, "logits/rejected": 0.2134133130311966, "logps/chosen": -67.09490966796875, "logps/ref_chosen": -58.749061584472656, "logps/ref_rejected": -86.87396240234375, "logps/rejected": -97.17189025878906, "loss": 1.3118, "margin_dpo/margin_mean": 1.9520692825317383, "margin_dpo/margin_std": 5.343048572540283, "step": 444 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.21728411316871643, "fcm_dpo/delta": -0.12314336001873016, "fcm_dpo/margin": 4.423813819885254, "fcm_dpo/q_t": 0.32274460792541504, "grad_norm": 56.279396057128906, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.2420450747013092, "logits/rejected": 0.22509875893592834, "logps/chosen": -69.55735778808594, "logps/ref_chosen": -60.91743850708008, "logps/ref_rejected": -71.5637435913086, "logps/rejected": -84.62747192382812, "loss": 0.9196, "margin_dpo/margin_mean": 4.423814296722412, "margin_dpo/margin_std": 5.549365520477295, "step": 445 }, { "epoch": 0.674225245653817, "fcm_dpo/beta": 0.20891538262367249, "fcm_dpo/delta": -0.36004990339279175, "fcm_dpo/margin": 5.58787202835083, "fcm_dpo/q_t": 0.2773993909358978, "grad_norm": 38.038795471191406, "learning_rate": 1.461462467495284e-07, "logits/chosen": 0.300902783870697, "logits/rejected": 0.23340097069740295, "logps/chosen": -56.217681884765625, "logps/ref_chosen": -48.79924774169922, "logps/ref_rejected": -71.8719482421875, "logps/rejected": -84.87826538085938, "loss": 0.7393, "margin_dpo/margin_mean": 5.587871551513672, "margin_dpo/margin_std": 4.869062900543213, "step": 446 }, { "epoch": 0.6757369614512472, "fcm_dpo/beta": 0.19416730105876923, "fcm_dpo/delta": -0.2389349639415741, "fcm_dpo/margin": 5.477807521820068, "fcm_dpo/q_t": 0.29294759035110474, "grad_norm": 41.903839111328125, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 0.23307769000530243, "logits/rejected": 0.14838972687721252, "logps/chosen": -60.058258056640625, "logps/ref_chosen": -53.682716369628906, "logps/ref_rejected": -88.17315673828125, "logps/rejected": -100.02650451660156, "loss": 0.8425, "margin_dpo/margin_mean": 5.47780704498291, "margin_dpo/margin_std": 6.009408950805664, "step": 447 }, { "epoch": 0.6772486772486772, "fcm_dpo/beta": 0.19167715311050415, "fcm_dpo/delta": 0.008233718574047089, "fcm_dpo/margin": 4.395143508911133, "fcm_dpo/q_t": 0.3358060121536255, "grad_norm": 41.25887680053711, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 0.2761896848678589, "logits/rejected": 0.23135310411453247, "logps/chosen": -61.26433563232422, "logps/ref_chosen": -53.75125503540039, "logps/ref_rejected": -77.17623901367188, "logps/rejected": -89.08446502685547, "loss": 0.939, "margin_dpo/margin_mean": 4.395143508911133, "margin_dpo/margin_std": 5.651719093322754, "step": 448 }, { "epoch": 0.6787603930461074, "fcm_dpo/beta": 0.2022373080253601, "fcm_dpo/delta": 0.3825373649597168, "fcm_dpo/margin": 2.4102249145507812, "fcm_dpo/q_t": 0.4050193727016449, "grad_norm": 57.15595245361328, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 0.15068916976451874, "logits/rejected": 0.13591475784778595, "logps/chosen": -84.59050750732422, "logps/ref_chosen": -75.82737731933594, "logps/ref_rejected": -82.20687866210938, "logps/rejected": -93.3802261352539, "loss": 1.1481, "margin_dpo/margin_mean": 2.4102249145507812, "margin_dpo/margin_std": 4.6423020362854, "step": 449 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.21165935695171356, "fcm_dpo/delta": 0.12616947293281555, "fcm_dpo/margin": 3.4614691734313965, "fcm_dpo/q_t": 0.3656800091266632, "grad_norm": 56.690269470214844, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.311156302690506, "logits/rejected": 0.22413130104541779, "logps/chosen": -54.62956237792969, "logps/ref_chosen": -47.11572265625, "logps/ref_rejected": -78.7546615600586, "logps/rejected": -89.72996520996094, "loss": 1.0545, "margin_dpo/margin_mean": 3.461467981338501, "margin_dpo/margin_std": 5.579974174499512, "step": 450 }, { "epoch": 0.6817838246409675, "fcm_dpo/beta": 0.2162429690361023, "fcm_dpo/delta": 0.1287505030632019, "fcm_dpo/margin": 3.379551887512207, "fcm_dpo/q_t": 0.36499056220054626, "grad_norm": 59.05227279663086, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 0.24645878374576569, "logits/rejected": 0.20367026329040527, "logps/chosen": -71.26134490966797, "logps/ref_chosen": -63.350440979003906, "logps/ref_rejected": -76.28530883789062, "logps/rejected": -87.57575988769531, "loss": 1.0718, "margin_dpo/margin_mean": 3.3795528411865234, "margin_dpo/margin_std": 5.619390487670898, "step": 451 }, { "epoch": 0.6832955404383976, "fcm_dpo/beta": 0.21667127311229706, "fcm_dpo/delta": -0.04757612198591232, "fcm_dpo/margin": 4.123049736022949, "fcm_dpo/q_t": 0.3350658118724823, "grad_norm": 57.07819747924805, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 0.21967121958732605, "logits/rejected": 0.15877500176429749, "logps/chosen": -63.10958480834961, "logps/ref_chosen": -55.58583450317383, "logps/ref_rejected": -77.68738555908203, "logps/rejected": -89.33418273925781, "loss": 0.9949, "margin_dpo/margin_mean": 4.123049736022949, "margin_dpo/margin_std": 5.820711135864258, "step": 452 }, { "epoch": 0.6848072562358276, "fcm_dpo/beta": 0.21547909080982208, "fcm_dpo/delta": -0.022098319604992867, "fcm_dpo/margin": 4.038339614868164, "fcm_dpo/q_t": 0.3498329520225525, "grad_norm": 50.18575668334961, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 0.22223559021949768, "logits/rejected": 0.19204358756542206, "logps/chosen": -68.8441162109375, "logps/ref_chosen": -61.778202056884766, "logps/ref_rejected": -71.51403045654297, "logps/rejected": -82.6182861328125, "loss": 1.028, "margin_dpo/margin_mean": 4.0383405685424805, "margin_dpo/margin_std": 6.134515285491943, "step": 453 }, { "epoch": 0.6863189720332578, "fcm_dpo/beta": 0.21051616966724396, "fcm_dpo/delta": -0.13284781575202942, "fcm_dpo/margin": 4.6063690185546875, "fcm_dpo/q_t": 0.32341745495796204, "grad_norm": 43.10291290283203, "learning_rate": 1.366202015206706e-07, "logits/chosen": 0.28594014048576355, "logits/rejected": 0.24734768271446228, "logps/chosen": -58.167015075683594, "logps/ref_chosen": -51.59515380859375, "logps/ref_rejected": -63.96732711791992, "logps/rejected": -75.14555358886719, "loss": 0.9569, "margin_dpo/margin_mean": 4.6063690185546875, "margin_dpo/margin_std": 6.16324520111084, "step": 454 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.20485688745975494, "fcm_dpo/delta": -0.10616355389356613, "fcm_dpo/margin": 4.611947059631348, "fcm_dpo/q_t": 0.3254557251930237, "grad_norm": 50.521175384521484, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.13476872444152832, "logits/rejected": 0.09828665852546692, "logps/chosen": -78.38386535644531, "logps/ref_chosen": -70.65170288085938, "logps/ref_rejected": -77.44276428222656, "logps/rejected": -89.786865234375, "loss": 0.9001, "margin_dpo/margin_mean": 4.611947059631348, "margin_dpo/margin_std": 5.726899147033691, "step": 455 }, { "epoch": 0.6893424036281179, "fcm_dpo/beta": 0.20512458682060242, "fcm_dpo/delta": 0.0033988687209784985, "fcm_dpo/margin": 4.128653049468994, "fcm_dpo/q_t": 0.338392049074173, "grad_norm": 50.77932357788086, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 0.16939248144626617, "logits/rejected": 0.12530234456062317, "logps/chosen": -64.22583770751953, "logps/ref_chosen": -56.398284912109375, "logps/ref_rejected": -82.61642456054688, "logps/rejected": -94.5726318359375, "loss": 0.9574, "margin_dpo/margin_mean": 4.128653526306152, "margin_dpo/margin_std": 5.482672691345215, "step": 456 }, { "epoch": 0.690854119425548, "fcm_dpo/beta": 0.2063646912574768, "fcm_dpo/delta": 0.10697145760059357, "fcm_dpo/margin": 3.6349315643310547, "fcm_dpo/q_t": 0.35435450077056885, "grad_norm": 46.9370231628418, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 0.3234666883945465, "logits/rejected": 0.2596738934516907, "logps/chosen": -52.51266860961914, "logps/ref_chosen": -44.72057342529297, "logps/ref_rejected": -68.1158676147461, "logps/rejected": -79.54289245605469, "loss": 0.98, "margin_dpo/margin_mean": 3.6349313259124756, "margin_dpo/margin_std": 4.778542518615723, "step": 457 }, { "epoch": 0.6923658352229781, "fcm_dpo/beta": 0.20488256216049194, "fcm_dpo/delta": -0.11901578307151794, "fcm_dpo/margin": 4.668992042541504, "fcm_dpo/q_t": 0.31962084770202637, "grad_norm": 49.14594650268555, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 0.24556541442871094, "logits/rejected": 0.151944100856781, "logps/chosen": -56.84272003173828, "logps/ref_chosen": -50.00569152832031, "logps/ref_rejected": -87.50015258789062, "logps/rejected": -99.00617218017578, "loss": 0.9301, "margin_dpo/margin_mean": 4.668992519378662, "margin_dpo/margin_std": 5.956631660461426, "step": 458 }, { "epoch": 0.6938775510204082, "fcm_dpo/beta": 0.2047109305858612, "fcm_dpo/delta": -0.14493052661418915, "fcm_dpo/margin": 4.762661933898926, "fcm_dpo/q_t": 0.31784969568252563, "grad_norm": 50.155799865722656, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 0.28778326511383057, "logits/rejected": 0.25329944491386414, "logps/chosen": -72.57955932617188, "logps/ref_chosen": -65.37794494628906, "logps/ref_rejected": -88.19244384765625, "logps/rejected": -100.1567153930664, "loss": 0.8925, "margin_dpo/margin_mean": 4.762661933898926, "margin_dpo/margin_std": 5.247749328613281, "step": 459 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.1921759843826294, "fcm_dpo/delta": -0.2016555219888687, "fcm_dpo/margin": 5.358639717102051, "fcm_dpo/q_t": 0.31141430139541626, "grad_norm": 48.96949005126953, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.2766995429992676, "logits/rejected": 0.17160335183143616, "logps/chosen": -72.19833374023438, "logps/ref_chosen": -64.5616683959961, "logps/ref_rejected": -88.67890167236328, "logps/rejected": -101.6741943359375, "loss": 0.8422, "margin_dpo/margin_mean": 5.358639717102051, "margin_dpo/margin_std": 5.980681419372559, "step": 460 }, { "epoch": 0.6969009826152683, "fcm_dpo/beta": 0.18721193075180054, "fcm_dpo/delta": -0.04403623193502426, "fcm_dpo/margin": 4.7339019775390625, "fcm_dpo/q_t": 0.3311063051223755, "grad_norm": 42.88606262207031, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 0.3637089729309082, "logits/rejected": 0.28602486848831177, "logps/chosen": -56.37803649902344, "logps/ref_chosen": -49.4779167175293, "logps/ref_rejected": -72.65262603759766, "logps/rejected": -84.28665161132812, "loss": 0.9256, "margin_dpo/margin_mean": 4.7339019775390625, "margin_dpo/margin_std": 5.710514068603516, "step": 461 }, { "epoch": 0.6984126984126984, "fcm_dpo/beta": 0.18369469046592712, "fcm_dpo/delta": -0.15608486533164978, "fcm_dpo/margin": 5.382818222045898, "fcm_dpo/q_t": 0.30420053005218506, "grad_norm": 40.72878646850586, "learning_rate": 1.27297100994108e-07, "logits/chosen": 0.23772986233234406, "logits/rejected": 0.18360589444637299, "logps/chosen": -67.96499633789062, "logps/ref_chosen": -60.4951171875, "logps/ref_rejected": -74.82136535644531, "logps/rejected": -87.674072265625, "loss": 0.8356, "margin_dpo/margin_mean": 5.382818222045898, "margin_dpo/margin_std": 5.639156341552734, "step": 462 }, { "epoch": 0.6999244142101285, "fcm_dpo/beta": 0.185808002948761, "fcm_dpo/delta": 0.10821240395307541, "fcm_dpo/margin": 4.03656005859375, "fcm_dpo/q_t": 0.3506065607070923, "grad_norm": 43.807926177978516, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 0.28047433495521545, "logits/rejected": 0.2664549648761749, "logps/chosen": -75.05906677246094, "logps/ref_chosen": -67.68511962890625, "logps/ref_rejected": -71.32196044921875, "logps/rejected": -82.73246765136719, "loss": 0.9477, "margin_dpo/margin_mean": 4.036560535430908, "margin_dpo/margin_std": 4.958671569824219, "step": 463 }, { "epoch": 0.7014361300075586, "fcm_dpo/beta": 0.19172126054763794, "fcm_dpo/delta": 0.0832245945930481, "fcm_dpo/margin": 4.018474578857422, "fcm_dpo/q_t": 0.3552596867084503, "grad_norm": 53.8079833984375, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.24116164445877075, "logits/rejected": 0.21551595628261566, "logps/chosen": -67.01548767089844, "logps/ref_chosen": -59.16564178466797, "logps/ref_rejected": -69.56146240234375, "logps/rejected": -81.42977905273438, "loss": 1.0547, "margin_dpo/margin_mean": 4.018474102020264, "margin_dpo/margin_std": 6.148181915283203, "step": 464 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.190629780292511, "fcm_dpo/delta": 0.02115422859787941, "fcm_dpo/margin": 4.356546401977539, "fcm_dpo/q_t": 0.3453529477119446, "grad_norm": 47.5476188659668, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.33090129494667053, "logits/rejected": 0.2758718729019165, "logps/chosen": -66.55125427246094, "logps/ref_chosen": -58.513671875, "logps/ref_rejected": -84.31745910644531, "logps/rejected": -96.71158599853516, "loss": 0.985, "margin_dpo/margin_mean": 4.356546401977539, "margin_dpo/margin_std": 6.026215553283691, "step": 465 }, { "epoch": 0.7044595616024187, "fcm_dpo/beta": 0.198988139629364, "fcm_dpo/delta": 0.28733205795288086, "fcm_dpo/margin": 2.915306329727173, "fcm_dpo/q_t": 0.38667044043540955, "grad_norm": 60.51933670043945, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 0.2653227150440216, "logits/rejected": 0.2612670958042145, "logps/chosen": -82.42771911621094, "logps/ref_chosen": -73.26580810546875, "logps/ref_rejected": -74.83621215820312, "logps/rejected": -86.91342163085938, "loss": 1.0816, "margin_dpo/margin_mean": 2.915306568145752, "margin_dpo/margin_std": 4.817732810974121, "step": 466 }, { "epoch": 0.7059712773998488, "fcm_dpo/beta": 0.20076759159564972, "fcm_dpo/delta": -0.06342404335737228, "fcm_dpo/margin": 4.520856857299805, "fcm_dpo/q_t": 0.3295537531375885, "grad_norm": 41.36443328857422, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 0.26830387115478516, "logits/rejected": 0.14777283370494843, "logps/chosen": -55.57886505126953, "logps/ref_chosen": -47.57947540283203, "logps/ref_rejected": -78.68522644042969, "logps/rejected": -91.20547485351562, "loss": 0.9024, "margin_dpo/margin_mean": 4.520857334136963, "margin_dpo/margin_std": 5.519647598266602, "step": 467 }, { "epoch": 0.7074829931972789, "fcm_dpo/beta": 0.19205418229103088, "fcm_dpo/delta": -0.3020942509174347, "fcm_dpo/margin": 5.825957298278809, "fcm_dpo/q_t": 0.2959809899330139, "grad_norm": 44.799903869628906, "learning_rate": 1.204480113956011e-07, "logits/chosen": 0.2383967787027359, "logits/rejected": 0.22732359170913696, "logps/chosen": -70.88501739501953, "logps/ref_chosen": -63.92778778076172, "logps/ref_rejected": -76.51626586914062, "logps/rejected": -89.29945373535156, "loss": 0.8096, "margin_dpo/margin_mean": 5.825956344604492, "margin_dpo/margin_std": 6.250823020935059, "step": 468 }, { "epoch": 0.708994708994709, "fcm_dpo/beta": 0.1848841905593872, "fcm_dpo/delta": -0.024960562586784363, "fcm_dpo/margin": 4.694052696228027, "fcm_dpo/q_t": 0.33774009346961975, "grad_norm": 41.76699447631836, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 0.32318294048309326, "logits/rejected": 0.30566686391830444, "logps/chosen": -66.29656982421875, "logps/ref_chosen": -59.05818176269531, "logps/ref_rejected": -75.67672729492188, "logps/rejected": -87.60916137695312, "loss": 0.9215, "margin_dpo/margin_mean": 4.694052696228027, "margin_dpo/margin_std": 5.674604892730713, "step": 469 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.18895672261714935, "fcm_dpo/delta": 0.010860327631235123, "fcm_dpo/margin": 4.441650867462158, "fcm_dpo/q_t": 0.33179694414138794, "grad_norm": 39.528404235839844, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.25164568424224854, "logits/rejected": 0.19546663761138916, "logps/chosen": -55.655845642089844, "logps/ref_chosen": -47.86743927001953, "logps/ref_rejected": -65.96859741210938, "logps/rejected": -78.19864654541016, "loss": 0.9389, "margin_dpo/margin_mean": 4.441650867462158, "margin_dpo/margin_std": 5.509934425354004, "step": 470 }, { "epoch": 0.7120181405895691, "fcm_dpo/beta": 0.183636873960495, "fcm_dpo/delta": -0.16288867592811584, "fcm_dpo/margin": 5.427962303161621, "fcm_dpo/q_t": 0.3086378872394562, "grad_norm": 44.055545806884766, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 0.2511060833930969, "logits/rejected": 0.19487126171588898, "logps/chosen": -64.90446472167969, "logps/ref_chosen": -57.777854919433594, "logps/ref_rejected": -73.81172180175781, "logps/rejected": -86.36629486083984, "loss": 0.8795, "margin_dpo/margin_mean": 5.427962303161621, "margin_dpo/margin_std": 6.277071952819824, "step": 471 }, { "epoch": 0.7135298563869993, "fcm_dpo/beta": 0.1824641078710556, "fcm_dpo/delta": -0.035785011947155, "fcm_dpo/margin": 4.831416130065918, "fcm_dpo/q_t": 0.3352402150630951, "grad_norm": 45.14156723022461, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 0.28769558668136597, "logits/rejected": 0.24208904802799225, "logps/chosen": -62.80769348144531, "logps/ref_chosen": -55.908668518066406, "logps/ref_rejected": -74.70294189453125, "logps/rejected": -86.43338775634766, "loss": 0.967, "margin_dpo/margin_mean": 4.831416130065918, "margin_dpo/margin_std": 6.460024833679199, "step": 472 }, { "epoch": 0.7150415721844293, "fcm_dpo/beta": 0.179289311170578, "fcm_dpo/delta": -0.016662299633026123, "fcm_dpo/margin": 4.8229851722717285, "fcm_dpo/q_t": 0.3476918935775757, "grad_norm": 42.09556198120117, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 0.3349047303199768, "logits/rejected": 0.24838170409202576, "logps/chosen": -61.18727493286133, "logps/ref_chosen": -54.16088104248047, "logps/ref_rejected": -92.76789855957031, "logps/rejected": -104.61727142333984, "loss": 1.0116, "margin_dpo/margin_mean": 4.822985649108887, "margin_dpo/margin_std": 7.087156772613525, "step": 473 }, { "epoch": 0.7165532879818595, "fcm_dpo/beta": 0.182792067527771, "fcm_dpo/delta": 0.10003271698951721, "fcm_dpo/margin": 4.1439433097839355, "fcm_dpo/q_t": 0.36064907908439636, "grad_norm": 47.92180252075195, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 0.3086499273777008, "logits/rejected": 0.24886074662208557, "logps/chosen": -55.18489074707031, "logps/ref_chosen": -46.685707092285156, "logps/ref_rejected": -71.44731903076172, "logps/rejected": -84.09043884277344, "loss": 1.0493, "margin_dpo/margin_mean": 4.1439433097839355, "margin_dpo/margin_std": 6.442416191101074, "step": 474 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.17439153790473938, "fcm_dpo/delta": -0.2883111834526062, "fcm_dpo/margin": 6.32227897644043, "fcm_dpo/q_t": 0.28803032636642456, "grad_norm": 37.30523681640625, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.24227741360664368, "logits/rejected": 0.19501101970672607, "logps/chosen": -66.50431060791016, "logps/ref_chosen": -58.4873046875, "logps/ref_rejected": -87.00187683105469, "logps/rejected": -101.3411636352539, "loss": 0.7646, "margin_dpo/margin_mean": 6.32227897644043, "margin_dpo/margin_std": 5.994036674499512, "step": 475 }, { "epoch": 0.7195767195767195, "fcm_dpo/beta": 0.17916938662528992, "fcm_dpo/delta": 0.17899703979492188, "fcm_dpo/margin": 3.8024401664733887, "fcm_dpo/q_t": 0.3761757016181946, "grad_norm": 57.621246337890625, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 0.18236970901489258, "logits/rejected": 0.1682473123073578, "logps/chosen": -84.62786865234375, "logps/ref_chosen": -75.38162231445312, "logps/ref_rejected": -76.99822235107422, "logps/rejected": -90.04690551757812, "loss": 1.1027, "margin_dpo/margin_mean": 3.802440881729126, "margin_dpo/margin_std": 6.650286674499512, "step": 476 }, { "epoch": 0.7210884353741497, "fcm_dpo/beta": 0.18340060114860535, "fcm_dpo/delta": 0.13215288519859314, "fcm_dpo/margin": 3.9657039642333984, "fcm_dpo/q_t": 0.36404669284820557, "grad_norm": 57.48204803466797, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 0.27557092905044556, "logits/rejected": 0.23873813450336456, "logps/chosen": -70.09407043457031, "logps/ref_chosen": -61.073387145996094, "logps/ref_rejected": -81.34375, "logps/rejected": -94.33013916015625, "loss": 1.1397, "margin_dpo/margin_mean": 3.9657044410705566, "margin_dpo/margin_std": 7.145218849182129, "step": 477 }, { "epoch": 0.7226001511715797, "fcm_dpo/beta": 0.18710201978683472, "fcm_dpo/delta": 0.10514857620000839, "fcm_dpo/margin": 4.023831367492676, "fcm_dpo/q_t": 0.3657424747943878, "grad_norm": 49.14711380004883, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 0.21086078882217407, "logits/rejected": 0.20800764858722687, "logps/chosen": -65.02413940429688, "logps/ref_chosen": -57.16731643676758, "logps/ref_rejected": -53.30917739868164, "logps/rejected": -65.18983459472656, "loss": 1.0618, "margin_dpo/margin_mean": 4.023830890655518, "margin_dpo/margin_std": 6.53040885925293, "step": 478 }, { "epoch": 0.7241118669690099, "fcm_dpo/beta": 0.19023653864860535, "fcm_dpo/delta": 0.12472105771303177, "fcm_dpo/margin": 3.858086347579956, "fcm_dpo/q_t": 0.36114710569381714, "grad_norm": 47.78655242919922, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 0.1616727113723755, "logits/rejected": 0.11323510110378265, "logps/chosen": -67.915283203125, "logps/ref_chosen": -58.91331481933594, "logps/ref_rejected": -63.7403450012207, "logps/rejected": -76.60040283203125, "loss": 1.1059, "margin_dpo/margin_mean": 3.858086585998535, "margin_dpo/margin_std": 6.676839828491211, "step": 479 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.19588303565979004, "fcm_dpo/delta": -0.023159652948379517, "fcm_dpo/margin": 4.419933795928955, "fcm_dpo/q_t": 0.3366478979587555, "grad_norm": 56.999000549316406, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.2097318321466446, "logits/rejected": 0.1971430778503418, "logps/chosen": -71.11746978759766, "logps/ref_chosen": -62.80061340332031, "logps/ref_rejected": -67.58859252929688, "logps/rejected": -80.32538604736328, "loss": 1.0673, "margin_dpo/margin_mean": 4.419934272766113, "margin_dpo/margin_std": 6.8448004722595215, "step": 480 }, { "epoch": 0.72713529856387, "fcm_dpo/beta": 0.18832021951675415, "fcm_dpo/delta": -0.12109113484621048, "fcm_dpo/margin": 5.092473983764648, "fcm_dpo/q_t": 0.33122923970222473, "grad_norm": 46.02914047241211, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 0.2160688042640686, "logits/rejected": 0.20522311329841614, "logps/chosen": -72.78104400634766, "logps/ref_chosen": -65.28649139404297, "logps/ref_rejected": -70.78668212890625, "logps/rejected": -83.37371063232422, "loss": 0.952, "margin_dpo/margin_mean": 5.092473983764648, "margin_dpo/margin_std": 6.993575572967529, "step": 481 }, { "epoch": 0.7286470143613001, "fcm_dpo/beta": 0.1910247802734375, "fcm_dpo/delta": 0.08871780335903168, "fcm_dpo/margin": 4.016700267791748, "fcm_dpo/q_t": 0.36484792828559875, "grad_norm": 61.503326416015625, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 0.21902191638946533, "logits/rejected": 0.10311245173215866, "logps/chosen": -69.74750518798828, "logps/ref_chosen": -60.906185150146484, "logps/ref_rejected": -103.44656372070312, "logps/rejected": -116.30458068847656, "loss": 1.1322, "margin_dpo/margin_mean": 4.016700267791748, "margin_dpo/margin_std": 7.26608943939209, "step": 482 }, { "epoch": 0.7301587301587301, "fcm_dpo/beta": 0.1862109899520874, "fcm_dpo/delta": -0.14671653509140015, "fcm_dpo/margin": 5.271034240722656, "fcm_dpo/q_t": 0.32034099102020264, "grad_norm": 44.32430648803711, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 0.3017137050628662, "logits/rejected": 0.21860943734645844, "logps/chosen": -61.299842834472656, "logps/ref_chosen": -53.192012786865234, "logps/ref_rejected": -81.83927154541016, "logps/rejected": -95.21813201904297, "loss": 0.9343, "margin_dpo/margin_mean": 5.271034240722656, "margin_dpo/margin_std": 6.870976448059082, "step": 483 }, { "epoch": 0.7316704459561603, "fcm_dpo/beta": 0.1890522539615631, "fcm_dpo/delta": 0.09609914571046829, "fcm_dpo/margin": 4.021857738494873, "fcm_dpo/q_t": 0.3523946702480316, "grad_norm": 50.47954177856445, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 0.30475252866744995, "logits/rejected": 0.26562267541885376, "logps/chosen": -65.94442749023438, "logps/ref_chosen": -57.76945877075195, "logps/ref_rejected": -71.6829833984375, "logps/rejected": -83.87980651855469, "loss": 0.9651, "margin_dpo/margin_mean": 4.021857261657715, "margin_dpo/margin_std": 5.328012466430664, "step": 484 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.1838168501853943, "fcm_dpo/delta": -0.04106524586677551, "fcm_dpo/margin": 4.771086692810059, "fcm_dpo/q_t": 0.335861474275589, "grad_norm": 45.630088806152344, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.296680212020874, "logits/rejected": 0.26908427476882935, "logps/chosen": -64.26364135742188, "logps/ref_chosen": -56.63584899902344, "logps/ref_rejected": -70.85614013671875, "logps/rejected": -83.25502014160156, "loss": 0.9562, "margin_dpo/margin_mean": 4.771087646484375, "margin_dpo/margin_std": 6.045078277587891, "step": 485 }, { "epoch": 0.7346938775510204, "fcm_dpo/beta": 0.18868184089660645, "fcm_dpo/delta": 0.03267286345362663, "fcm_dpo/margin": 4.345629692077637, "fcm_dpo/q_t": 0.3601461946964264, "grad_norm": 48.578330993652344, "learning_rate": 1.007103520743035e-07, "logits/chosen": 0.2949785590171814, "logits/rejected": 0.20217090845108032, "logps/chosen": -65.90437316894531, "logps/ref_chosen": -56.347023010253906, "logps/ref_rejected": -85.97221374511719, "logps/rejected": -99.87519836425781, "loss": 1.0612, "margin_dpo/margin_mean": 4.345630168914795, "margin_dpo/margin_std": 7.313272476196289, "step": 486 }, { "epoch": 0.7362055933484505, "fcm_dpo/beta": 0.1893056333065033, "fcm_dpo/delta": 0.00844599213451147, "fcm_dpo/margin": 4.448924541473389, "fcm_dpo/q_t": 0.34452641010284424, "grad_norm": 50.25308609008789, "learning_rate": 9.965186236464046e-08, "logits/chosen": 0.34107547998428345, "logits/rejected": 0.29102659225463867, "logps/chosen": -68.70523834228516, "logps/ref_chosen": -60.617218017578125, "logps/ref_rejected": -82.50975036621094, "logps/rejected": -95.04669189453125, "loss": 0.9623, "margin_dpo/margin_mean": 4.448925018310547, "margin_dpo/margin_std": 6.057121276855469, "step": 487 }, { "epoch": 0.7377173091458806, "fcm_dpo/beta": 0.18650321662425995, "fcm_dpo/delta": -0.1942426562309265, "fcm_dpo/margin": 5.484685897827148, "fcm_dpo/q_t": 0.30992603302001953, "grad_norm": 43.414710998535156, "learning_rate": 9.859757821558337e-08, "logits/chosen": 0.2581988275051117, "logits/rejected": 0.1931534707546234, "logps/chosen": -70.77301025390625, "logps/ref_chosen": -63.10905075073242, "logps/ref_rejected": -82.49348449707031, "logps/rejected": -95.64212036132812, "loss": 0.8646, "margin_dpo/margin_mean": 5.484686374664307, "margin_dpo/margin_std": 6.123306751251221, "step": 488 }, { "epoch": 0.7392290249433107, "fcm_dpo/beta": 0.1910599172115326, "fcm_dpo/delta": 0.34434235095977783, "fcm_dpo/margin": 2.7443325519561768, "fcm_dpo/q_t": 0.40401116013526917, "grad_norm": 54.903987884521484, "learning_rate": 9.754752911772615e-08, "logits/chosen": 0.29846933484077454, "logits/rejected": 0.2590899169445038, "logps/chosen": -73.30157470703125, "logps/ref_chosen": -64.98896026611328, "logps/ref_rejected": -84.39607238769531, "logps/rejected": -95.45301818847656, "loss": 1.2114, "margin_dpo/margin_mean": 2.744333267211914, "margin_dpo/margin_std": 6.05691385269165, "step": 489 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.199541836977005, "fcm_dpo/delta": 0.13197389245033264, "fcm_dpo/margin": 3.642141580581665, "fcm_dpo/q_t": 0.3773440718650818, "grad_norm": 61.38715744018555, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.326549768447876, "logits/rejected": 0.30207884311676025, "logps/chosen": -70.25138854980469, "logps/ref_chosen": -61.90874481201172, "logps/ref_rejected": -70.58566284179688, "logps/rejected": -82.57044982910156, "loss": 1.2245, "margin_dpo/margin_mean": 3.642141342163086, "margin_dpo/margin_std": 7.560006141662598, "step": 490 }, { "epoch": 0.7422524565381708, "fcm_dpo/beta": 0.19735094904899597, "fcm_dpo/delta": -0.0074592530727386475, "fcm_dpo/margin": 4.32567024230957, "fcm_dpo/q_t": 0.3363799452781677, "grad_norm": 47.401344299316406, "learning_rate": 9.546025344484868e-08, "logits/chosen": 0.18645425140857697, "logits/rejected": 0.13546575605869293, "logps/chosen": -63.35127258300781, "logps/ref_chosen": -55.47570037841797, "logps/ref_rejected": -78.70318603515625, "logps/rejected": -90.90443420410156, "loss": 0.9645, "margin_dpo/margin_mean": 4.3256707191467285, "margin_dpo/margin_std": 5.561920166015625, "step": 491 }, { "epoch": 0.7437641723356009, "fcm_dpo/beta": 0.20590060949325562, "fcm_dpo/delta": 0.10966280847787857, "fcm_dpo/margin": 3.6100869178771973, "fcm_dpo/q_t": 0.37008100748062134, "grad_norm": 61.88254928588867, "learning_rate": 9.442308525541589e-08, "logits/chosen": 0.24195045232772827, "logits/rejected": 0.17962196469306946, "logps/chosen": -77.16026306152344, "logps/ref_chosen": -67.28638458251953, "logps/ref_rejected": -82.78628540039062, "logps/rejected": -96.27024841308594, "loss": 1.1601, "margin_dpo/margin_mean": 3.610086441040039, "margin_dpo/margin_std": 6.680771827697754, "step": 492 }, { "epoch": 0.745275888133031, "fcm_dpo/beta": 0.1993444263935089, "fcm_dpo/delta": -0.19864656031131744, "fcm_dpo/margin": 5.160065650939941, "fcm_dpo/q_t": 0.3086199164390564, "grad_norm": 48.454776763916016, "learning_rate": 9.339026888672468e-08, "logits/chosen": 0.2442186325788498, "logits/rejected": 0.1783977746963501, "logps/chosen": -63.58871078491211, "logps/ref_chosen": -55.92750549316406, "logps/ref_rejected": -79.12149810791016, "logps/rejected": -91.9427719116211, "loss": 0.902, "margin_dpo/margin_mean": 5.160066604614258, "margin_dpo/margin_std": 6.3702802658081055, "step": 493 }, { "epoch": 0.7467876039304611, "fcm_dpo/beta": 0.19964680075645447, "fcm_dpo/delta": 0.14756183326244354, "fcm_dpo/margin": 3.572193145751953, "fcm_dpo/q_t": 0.37176692485809326, "grad_norm": 62.97100830078125, "learning_rate": 9.236183322886945e-08, "logits/chosen": 0.16220593452453613, "logits/rejected": 0.1173371970653534, "logps/chosen": -75.95932006835938, "logps/ref_chosen": -67.95410919189453, "logps/ref_rejected": -90.50865173339844, "logps/rejected": -102.0860595703125, "loss": 1.178, "margin_dpo/margin_mean": 3.572193145751953, "margin_dpo/margin_std": 7.063486576080322, "step": 494 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.20778684318065643, "fcm_dpo/delta": 0.14525321125984192, "fcm_dpo/margin": 3.4341237545013428, "fcm_dpo/q_t": 0.37397438287734985, "grad_norm": 52.764556884765625, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.33206620812416077, "logits/rejected": 0.27231916785240173, "logps/chosen": -60.14783477783203, "logps/ref_chosen": -52.62546157836914, "logps/ref_rejected": -72.06781005859375, "logps/rejected": -83.02430725097656, "loss": 1.0904, "margin_dpo/margin_mean": 3.4341230392456055, "margin_dpo/margin_std": 5.9770073890686035, "step": 495 }, { "epoch": 0.7498110355253212, "fcm_dpo/beta": 0.2041645646095276, "fcm_dpo/delta": -0.07183012366294861, "fcm_dpo/margin": 4.473697185516357, "fcm_dpo/q_t": 0.3525882959365845, "grad_norm": 54.563907623291016, "learning_rate": 9.031821899254797e-08, "logits/chosen": 0.2877293825149536, "logits/rejected": 0.1940247118473053, "logps/chosen": -65.89447021484375, "logps/ref_chosen": -57.597320556640625, "logps/ref_rejected": -94.36127471923828, "logps/rejected": -107.13212585449219, "loss": 1.0568, "margin_dpo/margin_mean": 4.473696708679199, "margin_dpo/margin_std": 7.258790969848633, "step": 496 }, { "epoch": 0.7513227513227513, "fcm_dpo/beta": 0.1935914307832718, "fcm_dpo/delta": -0.3445747196674347, "fcm_dpo/margin": 5.949126243591309, "fcm_dpo/q_t": 0.29749977588653564, "grad_norm": 48.13323211669922, "learning_rate": 8.930309757836516e-08, "logits/chosen": 0.2928311824798584, "logits/rejected": 0.2597949504852295, "logps/chosen": -81.34271240234375, "logps/ref_chosen": -72.78994750976562, "logps/ref_rejected": -89.48483276367188, "logps/rejected": -103.98672485351562, "loss": 0.8528, "margin_dpo/margin_mean": 5.949126243591309, "margin_dpo/margin_std": 7.026630401611328, "step": 497 }, { "epoch": 0.7528344671201814, "fcm_dpo/beta": 0.188248872756958, "fcm_dpo/delta": -0.13193053007125854, "fcm_dpo/margin": 5.149151802062988, "fcm_dpo/q_t": 0.32633334398269653, "grad_norm": 52.20969009399414, "learning_rate": 8.829247120198563e-08, "logits/chosen": 0.279436856508255, "logits/rejected": 0.24694347381591797, "logps/chosen": -75.69930267333984, "logps/ref_chosen": -68.36572265625, "logps/ref_rejected": -71.28846740722656, "logps/rejected": -83.77120971679688, "loss": 0.8913, "margin_dpo/margin_mean": 5.149151802062988, "margin_dpo/margin_std": 6.356961250305176, "step": 498 }, { "epoch": 0.7543461829176115, "fcm_dpo/beta": 0.1871645599603653, "fcm_dpo/delta": 0.03554215282201767, "fcm_dpo/margin": 4.36710786819458, "fcm_dpo/q_t": 0.361484557390213, "grad_norm": 50.920005798339844, "learning_rate": 8.728636813280163e-08, "logits/chosen": 0.257360577583313, "logits/rejected": 0.19850978255271912, "logps/chosen": -69.13787841796875, "logps/ref_chosen": -61.90882873535156, "logps/ref_rejected": -91.9411392211914, "logps/rejected": -103.53729248046875, "loss": 1.1192, "margin_dpo/margin_mean": 4.367107391357422, "margin_dpo/margin_std": 7.621745586395264, "step": 499 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.18969736993312836, "fcm_dpo/delta": 0.06616081297397614, "fcm_dpo/margin": 4.159477710723877, "fcm_dpo/q_t": 0.3585333228111267, "grad_norm": 54.2974853515625, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.20467233657836914, "logits/rejected": 0.19438844919204712, "logps/chosen": -78.15548706054688, "logps/ref_chosen": -70.225830078125, "logps/ref_rejected": -71.72203063964844, "logps/rejected": -83.8111572265625, "loss": 1.0841, "margin_dpo/margin_mean": 4.159477233886719, "margin_dpo/margin_std": 6.929226875305176, "step": 500 }, { "epoch": 0.7573696145124716, "fcm_dpo/beta": 0.1939505636692047, "fcm_dpo/delta": 0.0667010024189949, "fcm_dpo/margin": 4.054576396942139, "fcm_dpo/q_t": 0.3484607934951782, "grad_norm": 45.469051361083984, "learning_rate": 8.528784436016878e-08, "logits/chosen": 0.2413794994354248, "logits/rejected": 0.22745752334594727, "logps/chosen": -72.44284057617188, "logps/ref_chosen": -64.59880828857422, "logps/ref_rejected": -70.59329223632812, "logps/rejected": -82.49189758300781, "loss": 0.9391, "margin_dpo/margin_mean": 4.054576873779297, "margin_dpo/margin_std": 5.029389381408691, "step": 501 }, { "epoch": 0.7588813303099018, "fcm_dpo/beta": 0.1954634189605713, "fcm_dpo/delta": 0.08000719547271729, "fcm_dpo/margin": 3.9702367782592773, "fcm_dpo/q_t": 0.35143595933914185, "grad_norm": 54.6177978515625, "learning_rate": 8.4295479559726e-08, "logits/chosen": 0.2817111015319824, "logits/rejected": 0.24115899205207825, "logps/chosen": -72.94097900390625, "logps/ref_chosen": -65.46662902832031, "logps/ref_rejected": -90.22233581542969, "logps/rejected": -101.66691589355469, "loss": 0.971, "margin_dpo/margin_mean": 3.9702374935150146, "margin_dpo/margin_std": 5.515082836151123, "step": 502 }, { "epoch": 0.7603930461073318, "fcm_dpo/beta": 0.19681644439697266, "fcm_dpo/delta": 0.032539092004299164, "fcm_dpo/margin": 4.167027473449707, "fcm_dpo/q_t": 0.3450298309326172, "grad_norm": 50.10343933105469, "learning_rate": 8.330774987092712e-08, "logits/chosen": 0.26110100746154785, "logits/rejected": 0.241864413022995, "logps/chosen": -58.82035446166992, "logps/ref_chosen": -51.83476257324219, "logps/ref_rejected": -57.62522506713867, "logps/rejected": -68.77783966064453, "loss": 1.0317, "margin_dpo/margin_mean": 4.167027473449707, "margin_dpo/margin_std": 6.2261762619018555, "step": 503 }, { "epoch": 0.7619047619047619, "fcm_dpo/beta": 0.1906535029411316, "fcm_dpo/delta": -0.28664782643318176, "fcm_dpo/margin": 5.799999713897705, "fcm_dpo/q_t": 0.2936969995498657, "grad_norm": 47.68788146972656, "learning_rate": 8.232468292269479e-08, "logits/chosen": 0.21891510486602783, "logits/rejected": 0.19424328207969666, "logps/chosen": -75.66064453125, "logps/ref_chosen": -68.65119934082031, "logps/ref_rejected": -77.91394805908203, "logps/rejected": -90.72340393066406, "loss": 0.7799, "margin_dpo/margin_mean": 5.800000190734863, "margin_dpo/margin_std": 5.795510768890381, "step": 504 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.18748188018798828, "fcm_dpo/delta": 0.11835876107215881, "fcm_dpo/margin": 3.9429283142089844, "fcm_dpo/q_t": 0.3705042004585266, "grad_norm": 56.70164108276367, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.2908661365509033, "logits/rejected": 0.25435012578964233, "logps/chosen": -67.60441589355469, "logps/ref_chosen": -59.99884796142578, "logps/ref_rejected": -76.88048553466797, "logps/rejected": -88.42898559570312, "loss": 1.1116, "margin_dpo/margin_mean": 3.942927598953247, "margin_dpo/margin_std": 6.963860988616943, "step": 505 }, { "epoch": 0.764928193499622, "fcm_dpo/beta": 0.19286711513996124, "fcm_dpo/delta": 0.10626673698425293, "fcm_dpo/margin": 3.8978567123413086, "fcm_dpo/q_t": 0.3608360290527344, "grad_norm": 59.737823486328125, "learning_rate": 8.037264711071698e-08, "logits/chosen": 0.27555349469184875, "logits/rejected": 0.2518009543418884, "logps/chosen": -77.1668472290039, "logps/ref_chosen": -70.07130432128906, "logps/ref_rejected": -82.03775024414062, "logps/rejected": -93.03116607666016, "loss": 1.0974, "margin_dpo/margin_mean": 3.8978567123413086, "margin_dpo/margin_std": 6.750155925750732, "step": 506 }, { "epoch": 0.7664399092970522, "fcm_dpo/beta": 0.19243893027305603, "fcm_dpo/delta": -0.06309761106967926, "fcm_dpo/margin": 4.713381767272949, "fcm_dpo/q_t": 0.3451889157295227, "grad_norm": 51.14573669433594, "learning_rate": 7.940373284960933e-08, "logits/chosen": 0.2574292719364166, "logits/rejected": 0.21393823623657227, "logps/chosen": -79.86482238769531, "logps/ref_chosen": -72.00703430175781, "logps/ref_rejected": -93.94987487792969, "logps/rejected": -106.52104187011719, "loss": 1.0122, "margin_dpo/margin_mean": 4.713381767272949, "margin_dpo/margin_std": 7.080389976501465, "step": 507 }, { "epoch": 0.7679516250944822, "fcm_dpo/beta": 0.19176054000854492, "fcm_dpo/delta": -0.08605434000492096, "fcm_dpo/margin": 4.834778308868408, "fcm_dpo/q_t": 0.33275818824768066, "grad_norm": 50.69294357299805, "learning_rate": 7.843959053281663e-08, "logits/chosen": 0.24106700718402863, "logits/rejected": 0.13942265510559082, "logps/chosen": -67.20732116699219, "logps/ref_chosen": -60.21992492675781, "logps/ref_rejected": -95.9200668334961, "logps/rejected": -107.74224853515625, "loss": 0.9514, "margin_dpo/margin_mean": 4.834778308868408, "margin_dpo/margin_std": 6.46806526184082, "step": 508 }, { "epoch": 0.7694633408919124, "fcm_dpo/beta": 0.18921613693237305, "fcm_dpo/delta": 0.007762765511870384, "fcm_dpo/margin": 4.454644203186035, "fcm_dpo/q_t": 0.3404228389263153, "grad_norm": 50.47156524658203, "learning_rate": 7.748024712947204e-08, "logits/chosen": 0.21267962455749512, "logits/rejected": 0.18627651035785675, "logps/chosen": -73.61685180664062, "logps/ref_chosen": -66.27017211914062, "logps/ref_rejected": -71.73065185546875, "logps/rejected": -83.53197479248047, "loss": 0.9905, "margin_dpo/margin_mean": 4.454644680023193, "margin_dpo/margin_std": 6.248918533325195, "step": 509 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.18685214221477509, "fcm_dpo/delta": -0.05087687447667122, "fcm_dpo/margin": 4.791868209838867, "fcm_dpo/q_t": 0.33913111686706543, "grad_norm": 53.60639953613281, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.3427318334579468, "logits/rejected": 0.2648513913154602, "logps/chosen": -61.08159637451172, "logps/ref_chosen": -53.54487609863281, "logps/ref_rejected": -91.36648559570312, "logps/rejected": -103.69507598876953, "loss": 0.9909, "margin_dpo/margin_mean": 4.791868686676025, "margin_dpo/margin_std": 6.868839263916016, "step": 510 }, { "epoch": 0.7724867724867724, "fcm_dpo/beta": 0.180000901222229, "fcm_dpo/delta": -0.2913375198841095, "fcm_dpo/margin": 6.163692951202393, "fcm_dpo/q_t": 0.29723721742630005, "grad_norm": 44.948909759521484, "learning_rate": 7.557606426772961e-08, "logits/chosen": 0.2703331410884857, "logits/rejected": 0.2265818864107132, "logps/chosen": -63.129844665527344, "logps/ref_chosen": -55.844383239746094, "logps/ref_rejected": -86.49819946289062, "logps/rejected": -99.94735717773438, "loss": 0.8221, "margin_dpo/margin_mean": 6.163693428039551, "margin_dpo/margin_std": 6.658810615539551, "step": 511 }, { "epoch": 0.7739984882842026, "fcm_dpo/beta": 0.17689445614814758, "fcm_dpo/delta": -0.06903138756752014, "fcm_dpo/margin": 5.151975631713867, "fcm_dpo/q_t": 0.3688731789588928, "grad_norm": 48.45236587524414, "learning_rate": 7.463127807341966e-08, "logits/chosen": 0.15684255957603455, "logits/rejected": 0.13879981637001038, "logps/chosen": -69.19769287109375, "logps/ref_chosen": -61.653038024902344, "logps/ref_rejected": -72.83148193359375, "logps/rejected": -85.52812194824219, "loss": 1.0905, "margin_dpo/margin_mean": 5.151974201202393, "margin_dpo/margin_std": 11.439408302307129, "step": 512 }, { "epoch": 0.7755102040816326, "fcm_dpo/beta": 0.1718558818101883, "fcm_dpo/delta": -0.029932759702205658, "fcm_dpo/margin": 5.091045379638672, "fcm_dpo/q_t": 0.3317672610282898, "grad_norm": 35.039642333984375, "learning_rate": 7.369139731924401e-08, "logits/chosen": 0.3575049042701721, "logits/rejected": 0.31288132071495056, "logps/chosen": -57.67890930175781, "logps/ref_chosen": -50.85256576538086, "logps/ref_rejected": -69.21754455566406, "logps/rejected": -81.13493347167969, "loss": 0.8854, "margin_dpo/margin_mean": 5.091045379638672, "margin_dpo/margin_std": 5.839650630950928, "step": 513 }, { "epoch": 0.7770219198790628, "fcm_dpo/beta": 0.17328277230262756, "fcm_dpo/delta": -0.023398784920573235, "fcm_dpo/margin": 5.028005599975586, "fcm_dpo/q_t": 0.3316155970096588, "grad_norm": 45.34275817871094, "learning_rate": 7.275644829568747e-08, "logits/chosen": 0.296786367893219, "logits/rejected": 0.2570733428001404, "logps/chosen": -77.50872039794922, "logps/ref_chosen": -69.38493347167969, "logps/ref_rejected": -83.32447814941406, "logps/rejected": -96.47626495361328, "loss": 0.9483, "margin_dpo/margin_mean": 5.028005599975586, "margin_dpo/margin_std": 6.503722667694092, "step": 514 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.1716073453426361, "fcm_dpo/delta": -0.023606671020388603, "fcm_dpo/margin": 5.077951908111572, "fcm_dpo/q_t": 0.33353549242019653, "grad_norm": 41.92245864868164, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.28858014941215515, "logits/rejected": 0.215793639421463, "logps/chosen": -62.36624526977539, "logps/ref_chosen": -53.687034606933594, "logps/ref_rejected": -83.59614562988281, "logps/rejected": -97.35330200195312, "loss": 0.9296, "margin_dpo/margin_mean": 5.077951431274414, "margin_dpo/margin_std": 6.358786582946777, "step": 515 }, { "epoch": 0.780045351473923, "fcm_dpo/beta": 0.17564967274665833, "fcm_dpo/delta": 0.12671038508415222, "fcm_dpo/margin": 4.166781902313232, "fcm_dpo/q_t": 0.35390928387641907, "grad_norm": 43.83501052856445, "learning_rate": 7.090144991188568e-08, "logits/chosen": 0.2420632243156433, "logits/rejected": 0.19160601496696472, "logps/chosen": -63.76110076904297, "logps/ref_chosen": -56.9017219543457, "logps/ref_rejected": -67.83477783203125, "logps/rejected": -78.8609390258789, "loss": 1.0135, "margin_dpo/margin_mean": 4.166782379150391, "margin_dpo/margin_std": 6.096147537231445, "step": 516 }, { "epoch": 0.781557067271353, "fcm_dpo/beta": 0.17860382795333862, "fcm_dpo/delta": 0.08228413015604019, "fcm_dpo/margin": 4.333194255828857, "fcm_dpo/q_t": 0.35983067750930786, "grad_norm": 43.23679733276367, "learning_rate": 6.998145243993284e-08, "logits/chosen": 0.28628867864608765, "logits/rejected": 0.27577298879623413, "logps/chosen": -70.22264099121094, "logps/ref_chosen": -61.775142669677734, "logps/ref_rejected": -62.88270950317383, "logps/rejected": -75.66339874267578, "loss": 1.0162, "margin_dpo/margin_mean": 4.333193778991699, "margin_dpo/margin_std": 6.631152629852295, "step": 517 }, { "epoch": 0.783068783068783, "fcm_dpo/beta": 0.18542221188545227, "fcm_dpo/delta": 0.19371888041496277, "fcm_dpo/margin": 3.5995750427246094, "fcm_dpo/q_t": 0.38120606541633606, "grad_norm": 45.127986907958984, "learning_rate": 6.906649047373245e-08, "logits/chosen": 0.253539502620697, "logits/rejected": 0.20896165072917938, "logps/chosen": -69.38099670410156, "logps/ref_chosen": -62.02523422241211, "logps/ref_rejected": -79.06085205078125, "logps/rejected": -90.01618957519531, "loss": 1.1021, "margin_dpo/margin_mean": 3.5995755195617676, "margin_dpo/margin_std": 6.429238796234131, "step": 518 }, { "epoch": 0.7845804988662132, "fcm_dpo/beta": 0.19594839215278625, "fcm_dpo/delta": 0.30930453538894653, "fcm_dpo/margin": 2.8367679119110107, "fcm_dpo/q_t": 0.4056819677352905, "grad_norm": 65.59712982177734, "learning_rate": 6.815658960673781e-08, "logits/chosen": 0.28866493701934814, "logits/rejected": 0.23657400906085968, "logps/chosen": -70.72344207763672, "logps/ref_chosen": -61.60636901855469, "logps/ref_rejected": -74.50727844238281, "logps/rejected": -86.46111297607422, "loss": 1.3519, "margin_dpo/margin_mean": 2.8367679119110107, "margin_dpo/margin_std": 7.668980598449707, "step": 519 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.1965680867433548, "fcm_dpo/delta": 0.009395897388458252, "fcm_dpo/margin": 4.272980690002441, "fcm_dpo/q_t": 0.33951810002326965, "grad_norm": 48.875953674316406, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.32016807794570923, "logits/rejected": 0.2701001763343811, "logps/chosen": -70.08049774169922, "logps/ref_chosen": -62.87343215942383, "logps/ref_rejected": -76.505615234375, "logps/rejected": -87.98565673828125, "loss": 0.9418, "margin_dpo/margin_mean": 4.272979736328125, "margin_dpo/margin_std": 5.404156684875488, "step": 520 }, { "epoch": 0.7876039304610734, "fcm_dpo/beta": 0.19937585294246674, "fcm_dpo/delta": 0.0007315799593925476, "fcm_dpo/margin": 4.257848262786865, "fcm_dpo/q_t": 0.3392133116722107, "grad_norm": 54.9530143737793, "learning_rate": 6.63520728356167e-08, "logits/chosen": 0.1798115372657776, "logits/rejected": 0.11677288264036179, "logps/chosen": -71.92523193359375, "logps/ref_chosen": -64.20668029785156, "logps/ref_rejected": -92.28083038330078, "logps/rejected": -104.25723266601562, "loss": 0.9967, "margin_dpo/margin_mean": 4.257848262786865, "margin_dpo/margin_std": 6.102939128875732, "step": 521 }, { "epoch": 0.7891156462585034, "fcm_dpo/beta": 0.19949057698249817, "fcm_dpo/delta": 0.02661752700805664, "fcm_dpo/margin": 4.138130187988281, "fcm_dpo/q_t": 0.3503352105617523, "grad_norm": 52.36747741699219, "learning_rate": 6.545750740770336e-08, "logits/chosen": 0.24446165561676025, "logits/rejected": 0.22852011024951935, "logps/chosen": -65.54415130615234, "logps/ref_chosen": -58.369720458984375, "logps/ref_rejected": -68.79248046875, "logps/rejected": -80.10504150390625, "loss": 1.124, "margin_dpo/margin_mean": 4.138129234313965, "margin_dpo/margin_std": 7.349329948425293, "step": 522 }, { "epoch": 0.7906273620559335, "fcm_dpo/beta": 0.19474643468856812, "fcm_dpo/delta": -0.12762956321239471, "fcm_dpo/margin": 4.950833320617676, "fcm_dpo/q_t": 0.3200003504753113, "grad_norm": 57.38889694213867, "learning_rate": 6.456810403001012e-08, "logits/chosen": 0.28123581409454346, "logits/rejected": 0.18239277601242065, "logps/chosen": -74.2017822265625, "logps/ref_chosen": -65.71324157714844, "logps/ref_rejected": -91.98896789550781, "logps/rejected": -105.4283447265625, "loss": 0.99, "margin_dpo/margin_mean": 4.950833320617676, "margin_dpo/margin_std": 6.844690322875977, "step": 523 }, { "epoch": 0.7921390778533636, "fcm_dpo/beta": 0.19928528368473053, "fcm_dpo/delta": 0.14152082800865173, "fcm_dpo/margin": 3.602719306945801, "fcm_dpo/q_t": 0.36274781823158264, "grad_norm": 56.699684143066406, "learning_rate": 6.368388758106134e-08, "logits/chosen": 0.25369152426719666, "logits/rejected": 0.23207354545593262, "logps/chosen": -82.81105041503906, "logps/ref_chosen": -76.35124969482422, "logps/ref_rejected": -89.96072387695312, "logps/rejected": -100.02325439453125, "loss": 1.0615, "margin_dpo/margin_mean": 3.602719783782959, "margin_dpo/margin_std": 5.887624740600586, "step": 524 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.2056574821472168, "fcm_dpo/delta": 0.18491694331169128, "fcm_dpo/margin": 3.2947845458984375, "fcm_dpo/q_t": 0.37222665548324585, "grad_norm": 59.12623596191406, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.1436086744070053, "logits/rejected": 0.12916171550750732, "logps/chosen": -82.95500183105469, "logps/ref_chosen": -75.49578857421875, "logps/ref_rejected": -84.04852294921875, "logps/rejected": -94.80252075195312, "loss": 1.0611, "margin_dpo/margin_mean": 3.2947843074798584, "margin_dpo/margin_std": 5.342177391052246, "step": 525 }, { "epoch": 0.7951625094482238, "fcm_dpo/beta": 0.20773226022720337, "fcm_dpo/delta": 0.12321735173463821, "fcm_dpo/margin": 3.5220367908477783, "fcm_dpo/q_t": 0.3576337397098541, "grad_norm": 53.73698806762695, "learning_rate": 6.193111425735515e-08, "logits/chosen": 0.2691265046596527, "logits/rejected": 0.2151428908109665, "logps/chosen": -69.07579040527344, "logps/ref_chosen": -61.29241943359375, "logps/ref_rejected": -82.47763061523438, "logps/rejected": -93.78303527832031, "loss": 1.0304, "margin_dpo/margin_mean": 3.5220372676849365, "margin_dpo/margin_std": 4.995627403259277, "step": 526 }, { "epoch": 0.7966742252456538, "fcm_dpo/beta": 0.2198951095342636, "fcm_dpo/delta": 0.2048795521259308, "fcm_dpo/margin": 2.992419719696045, "fcm_dpo/q_t": 0.3876710534095764, "grad_norm": 77.22991943359375, "learning_rate": 6.106260641143546e-08, "logits/chosen": 0.34561246633529663, "logits/rejected": 0.2859325706958771, "logps/chosen": -69.87942504882812, "logps/ref_chosen": -61.472625732421875, "logps/ref_rejected": -90.52831268310547, "logps/rejected": -101.92753601074219, "loss": 1.2208, "margin_dpo/margin_mean": 2.9924192428588867, "margin_dpo/margin_std": 6.500236511230469, "step": 527 }, { "epoch": 0.7981859410430839, "fcm_dpo/beta": 0.22476676106452942, "fcm_dpo/delta": 0.16451933979988098, "fcm_dpo/margin": 3.0980749130249023, "fcm_dpo/q_t": 0.3729293942451477, "grad_norm": 56.562042236328125, "learning_rate": 6.019938355056422e-08, "logits/chosen": 0.15835247933864594, "logits/rejected": 0.09338517487049103, "logps/chosen": -66.64041900634766, "logps/ref_chosen": -58.792015075683594, "logps/ref_rejected": -71.82516479492188, "logps/rejected": -82.77163696289062, "loss": 1.2032, "margin_dpo/margin_mean": 3.0980749130249023, "margin_dpo/margin_std": 6.16510534286499, "step": 528 }, { "epoch": 0.799697656840514, "fcm_dpo/beta": 0.2031538188457489, "fcm_dpo/delta": -0.6506019234657288, "fcm_dpo/margin": 6.816672325134277, "fcm_dpo/q_t": 0.2616058588027954, "grad_norm": 40.82773208618164, "learning_rate": 5.934146982094049e-08, "logits/chosen": 0.18278737366199493, "logits/rejected": 0.12872397899627686, "logps/chosen": -61.67981719970703, "logps/ref_chosen": -55.070960998535156, "logps/ref_rejected": -75.44007873535156, "logps/rejected": -88.8656005859375, "loss": 0.759, "margin_dpo/margin_mean": 6.816672325134277, "margin_dpo/margin_std": 6.74721622467041, "step": 529 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.20093229413032532, "fcm_dpo/delta": -0.02702292613685131, "fcm_dpo/margin": 4.352562427520752, "fcm_dpo/q_t": 0.3375556468963623, "grad_norm": 45.43187713623047, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.28208237886428833, "logits/rejected": 0.24441692233085632, "logps/chosen": -64.21714782714844, "logps/ref_chosen": -56.743812561035156, "logps/ref_rejected": -76.6692123413086, "logps/rejected": -88.49510192871094, "loss": 0.935, "margin_dpo/margin_mean": 4.35256290435791, "margin_dpo/margin_std": 5.480373382568359, "step": 530 }, { "epoch": 0.8027210884353742, "fcm_dpo/beta": 0.20748470723628998, "fcm_dpo/delta": 0.13889390230178833, "fcm_dpo/margin": 3.442054033279419, "fcm_dpo/q_t": 0.37123921513557434, "grad_norm": 58.889068603515625, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 0.23493140935897827, "logits/rejected": 0.18107816576957703, "logps/chosen": -58.95187759399414, "logps/ref_chosen": -51.116455078125, "logps/ref_rejected": -79.52884674072266, "logps/rejected": -90.80632019042969, "loss": 1.0841, "margin_dpo/margin_mean": 3.44205379486084, "margin_dpo/margin_std": 5.732078552246094, "step": 531 }, { "epoch": 0.8042328042328042, "fcm_dpo/beta": 0.20114368200302124, "fcm_dpo/delta": -0.1241353303194046, "fcm_dpo/margin": 4.7797698974609375, "fcm_dpo/q_t": 0.332572877407074, "grad_norm": 49.33393096923828, "learning_rate": 5.679982264990424e-08, "logits/chosen": 0.19763997197151184, "logits/rejected": 0.15084370970726013, "logps/chosen": -66.58512115478516, "logps/ref_chosen": -58.279945373535156, "logps/ref_rejected": -78.05426788330078, "logps/rejected": -91.13921356201172, "loss": 0.969, "margin_dpo/margin_mean": 4.779770374298096, "margin_dpo/margin_std": 6.663928031921387, "step": 532 }, { "epoch": 0.8057445200302343, "fcm_dpo/beta": 0.1999870240688324, "fcm_dpo/delta": -0.04222950339317322, "fcm_dpo/margin": 4.442439079284668, "fcm_dpo/q_t": 0.33722180128097534, "grad_norm": 48.564510345458984, "learning_rate": 5.596338392706076e-08, "logits/chosen": 0.3552475869655609, "logits/rejected": 0.3035711944103241, "logps/chosen": -62.87090301513672, "logps/ref_chosen": -56.41801071166992, "logps/ref_rejected": -73.89324951171875, "logps/rejected": -84.78857421875, "loss": 0.991, "margin_dpo/margin_mean": 4.442439079284668, "margin_dpo/margin_std": 6.396747589111328, "step": 533 }, { "epoch": 0.8072562358276644, "fcm_dpo/beta": 0.19827596843242645, "fcm_dpo/delta": 0.030380956828594208, "fcm_dpo/margin": 4.141201019287109, "fcm_dpo/q_t": 0.3470977544784546, "grad_norm": 49.71409225463867, "learning_rate": 5.513237282548033e-08, "logits/chosen": 0.25465166568756104, "logits/rejected": 0.2110665887594223, "logps/chosen": -67.78778076171875, "logps/ref_chosen": -60.748687744140625, "logps/ref_rejected": -73.8623046875, "logps/rejected": -85.04259490966797, "loss": 1.0232, "margin_dpo/margin_mean": 4.141200065612793, "margin_dpo/margin_std": 6.159672737121582, "step": 534 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.20083844661712646, "fcm_dpo/delta": -0.018803313374519348, "fcm_dpo/margin": 4.314824104309082, "fcm_dpo/q_t": 0.3484407663345337, "grad_norm": 49.287715911865234, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.16682550311088562, "logits/rejected": 0.10979054868221283, "logps/chosen": -69.68934631347656, "logps/ref_chosen": -61.637413024902344, "logps/ref_rejected": -80.93138885498047, "logps/rejected": -93.29814147949219, "loss": 1.0629, "margin_dpo/margin_mean": 4.314825057983398, "margin_dpo/margin_std": 6.968698501586914, "step": 535 }, { "epoch": 0.8102796674225246, "fcm_dpo/beta": 0.19315822422504425, "fcm_dpo/delta": -0.16967397928237915, "fcm_dpo/margin": 5.180801868438721, "fcm_dpo/q_t": 0.31073105335235596, "grad_norm": 40.0985107421875, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 0.2999964952468872, "logits/rejected": 0.22906377911567688, "logps/chosen": -59.02777862548828, "logps/ref_chosen": -51.88897705078125, "logps/ref_rejected": -73.34864044189453, "logps/rejected": -85.66825103759766, "loss": 0.8271, "margin_dpo/margin_mean": 5.1808013916015625, "margin_dpo/margin_std": 5.521551132202148, "step": 536 }, { "epoch": 0.8117913832199547, "fcm_dpo/beta": 0.19068770110607147, "fcm_dpo/delta": -0.007951788604259491, "fcm_dpo/margin": 4.485739707946777, "fcm_dpo/q_t": 0.3408752679824829, "grad_norm": 45.67889404296875, "learning_rate": 5.267213693697695e-08, "logits/chosen": 0.3263600468635559, "logits/rejected": 0.25088340044021606, "logps/chosen": -62.604698181152344, "logps/ref_chosen": -54.248619079589844, "logps/ref_rejected": -94.94343566894531, "logps/rejected": -107.78524780273438, "loss": 1.0353, "margin_dpo/margin_mean": 4.485739707946777, "margin_dpo/margin_std": 6.675798416137695, "step": 537 }, { "epoch": 0.8133030990173847, "fcm_dpo/beta": 0.18905366957187653, "fcm_dpo/delta": -0.18150761723518372, "fcm_dpo/margin": 5.358842849731445, "fcm_dpo/q_t": 0.30919575691223145, "grad_norm": 49.55366516113281, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 0.25182631611824036, "logits/rejected": 0.2184920608997345, "logps/chosen": -78.08734130859375, "logps/ref_chosen": -70.09353637695312, "logps/ref_rejected": -79.49833679199219, "logps/rejected": -92.85098266601562, "loss": 0.8536, "margin_dpo/margin_mean": 5.358841896057129, "margin_dpo/margin_std": 6.0890302658081055, "step": 538 }, { "epoch": 0.8148148148148148, "fcm_dpo/beta": 0.18460465967655182, "fcm_dpo/delta": -0.013019578531384468, "fcm_dpo/margin": 4.667385101318359, "fcm_dpo/q_t": 0.33970072865486145, "grad_norm": 46.4735107421875, "learning_rate": 5.105953986729195e-08, "logits/chosen": 0.2540702223777771, "logits/rejected": 0.19249705970287323, "logps/chosen": -69.79113006591797, "logps/ref_chosen": -61.93169403076172, "logps/ref_rejected": -84.08946228027344, "logps/rejected": -96.61628723144531, "loss": 0.9249, "margin_dpo/margin_mean": 4.667386054992676, "margin_dpo/margin_std": 5.918379783630371, "step": 539 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.18176347017288208, "fcm_dpo/delta": -0.2018444836139679, "fcm_dpo/margin": 5.670566558837891, "fcm_dpo/q_t": 0.31293582916259766, "grad_norm": 48.642704010009766, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.28550148010253906, "logits/rejected": 0.21018415689468384, "logps/chosen": -70.13665771484375, "logps/ref_chosen": -62.704254150390625, "logps/ref_rejected": -95.63597106933594, "logps/rejected": -108.73894500732422, "loss": 0.8836, "margin_dpo/margin_mean": 5.670566558837891, "margin_dpo/margin_std": 6.66909122467041, "step": 540 }, { "epoch": 0.817838246409675, "fcm_dpo/beta": 0.17354460060596466, "fcm_dpo/delta": -0.10751892626285553, "fcm_dpo/margin": 5.446465969085693, "fcm_dpo/q_t": 0.325096070766449, "grad_norm": 43.86935806274414, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 0.26334431767463684, "logits/rejected": 0.2641550600528717, "logps/chosen": -69.57955932617188, "logps/ref_chosen": -62.48084259033203, "logps/ref_rejected": -57.55541229248047, "logps/rejected": -70.10059356689453, "loss": 0.9464, "margin_dpo/margin_mean": 5.446465492248535, "margin_dpo/margin_std": 7.033263683319092, "step": 541 }, { "epoch": 0.8193499622071051, "fcm_dpo/beta": 0.1704559624195099, "fcm_dpo/delta": -0.12397484481334686, "fcm_dpo/margin": 5.641759872436523, "fcm_dpo/q_t": 0.3238675892353058, "grad_norm": 35.97234344482422, "learning_rate": 4.868243561723534e-08, "logits/chosen": 0.29136067628860474, "logits/rejected": 0.240421861410141, "logps/chosen": -55.7423095703125, "logps/ref_chosen": -49.454891204833984, "logps/ref_rejected": -65.33275604248047, "logps/rejected": -77.26193237304688, "loss": 0.9075, "margin_dpo/margin_mean": 5.641759872436523, "margin_dpo/margin_std": 6.985322952270508, "step": 542 }, { "epoch": 0.8208616780045351, "fcm_dpo/beta": 0.1713915467262268, "fcm_dpo/delta": 0.03326155245304108, "fcm_dpo/margin": 4.7779693603515625, "fcm_dpo/q_t": 0.3359708786010742, "grad_norm": 35.59272384643555, "learning_rate": 4.790130070827028e-08, "logits/chosen": 0.2787627577781677, "logits/rejected": 0.20168402791023254, "logps/chosen": -58.36046600341797, "logps/ref_chosen": -51.100860595703125, "logps/ref_rejected": -76.06130981445312, "logps/rejected": -88.09889221191406, "loss": 0.9651, "margin_dpo/margin_mean": 4.7779693603515625, "margin_dpo/margin_std": 6.366022109985352, "step": 543 }, { "epoch": 0.8223733938019653, "fcm_dpo/beta": 0.16629686951637268, "fcm_dpo/delta": -0.18447908759117126, "fcm_dpo/margin": 6.10788631439209, "fcm_dpo/q_t": 0.3162338137626648, "grad_norm": 39.026893615722656, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 0.2566012740135193, "logits/rejected": 0.19304610788822174, "logps/chosen": -67.98334503173828, "logps/ref_chosen": -60.2772331237793, "logps/ref_rejected": -88.40553283691406, "logps/rejected": -102.21954345703125, "loss": 0.9123, "margin_dpo/margin_mean": 6.10788631439209, "margin_dpo/margin_std": 7.644372940063477, "step": 544 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.16722407937049866, "fcm_dpo/delta": 0.11144264042377472, "fcm_dpo/margin": 4.466965675354004, "fcm_dpo/q_t": 0.35912227630615234, "grad_norm": 46.097412109375, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.2190103530883789, "logits/rejected": 0.16595765948295593, "logps/chosen": -69.51997375488281, "logps/ref_chosen": -61.61524963378906, "logps/ref_rejected": -78.71266174316406, "logps/rejected": -91.0843505859375, "loss": 1.0322, "margin_dpo/margin_mean": 4.466965198516846, "margin_dpo/margin_std": 6.752434730529785, "step": 545 }, { "epoch": 0.8253968253968254, "fcm_dpo/beta": 0.1708962321281433, "fcm_dpo/delta": 0.09486165642738342, "fcm_dpo/margin": 4.460666656494141, "fcm_dpo/q_t": 0.349132239818573, "grad_norm": 47.134735107421875, "learning_rate": 4.559190140057428e-08, "logits/chosen": 0.26342087984085083, "logits/rejected": 0.24543830752372742, "logps/chosen": -66.58544921875, "logps/ref_chosen": -59.313262939453125, "logps/ref_rejected": -64.73631286621094, "logps/rejected": -76.46916198730469, "loss": 1.0067, "margin_dpo/margin_mean": 4.460666656494141, "margin_dpo/margin_std": 6.285987377166748, "step": 546 }, { "epoch": 0.8269085411942555, "fcm_dpo/beta": 0.16892951726913452, "fcm_dpo/delta": -0.09182556718587875, "fcm_dpo/margin": 5.522754192352295, "fcm_dpo/q_t": 0.31475168466567993, "grad_norm": 37.49753189086914, "learning_rate": 4.483350854765672e-08, "logits/chosen": 0.2093636393547058, "logits/rejected": 0.14936429262161255, "logps/chosen": -61.5643310546875, "logps/ref_chosen": -54.97674560546875, "logps/ref_rejected": -75.35922241210938, "logps/rejected": -87.46955871582031, "loss": 0.8833, "margin_dpo/margin_mean": 5.522754192352295, "margin_dpo/margin_std": 6.282604217529297, "step": 547 }, { "epoch": 0.8284202569916855, "fcm_dpo/beta": 0.17445912957191467, "fcm_dpo/delta": 0.1631871610879898, "fcm_dpo/margin": 3.9804792404174805, "fcm_dpo/q_t": 0.37146803736686707, "grad_norm": 47.63283920288086, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 0.18536588549613953, "logits/rejected": 0.12500083446502686, "logps/chosen": -71.25007629394531, "logps/ref_chosen": -63.21067428588867, "logps/ref_rejected": -81.23347473144531, "logps/rejected": -93.25334930419922, "loss": 1.0408, "margin_dpo/margin_mean": 3.9804794788360596, "margin_dpo/margin_std": 6.179323673248291, "step": 548 }, { "epoch": 0.8299319727891157, "fcm_dpo/beta": 0.17808356881141663, "fcm_dpo/delta": 0.12966041266918182, "fcm_dpo/margin": 4.094516754150391, "fcm_dpo/q_t": 0.36078569293022156, "grad_norm": 52.33575439453125, "learning_rate": 4.333396073857723e-08, "logits/chosen": 0.3678380846977234, "logits/rejected": 0.3106314539909363, "logps/chosen": -71.68885803222656, "logps/ref_chosen": -64.27351379394531, "logps/ref_rejected": -92.31663513183594, "logps/rejected": -103.82649230957031, "loss": 1.0571, "margin_dpo/margin_mean": 4.094517707824707, "margin_dpo/margin_std": 6.406397819519043, "step": 549 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.18214035034179688, "fcm_dpo/delta": 0.19023308157920837, "fcm_dpo/margin": 3.6925172805786133, "fcm_dpo/q_t": 0.37335193157196045, "grad_norm": 42.97513961791992, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.25656658411026, "logits/rejected": 0.22733622789382935, "logps/chosen": -64.19790649414062, "logps/ref_chosen": -56.230438232421875, "logps/ref_rejected": -62.59788513183594, "logps/rejected": -74.25787353515625, "loss": 1.0665, "margin_dpo/margin_mean": 3.692517042160034, "margin_dpo/margin_std": 5.977490425109863, "step": 550 }, { "epoch": 0.8329554043839759, "fcm_dpo/beta": 0.18962660431861877, "fcm_dpo/delta": 0.09512817859649658, "fcm_dpo/margin": 4.010119438171387, "fcm_dpo/q_t": 0.35618987679481506, "grad_norm": 47.10663986206055, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 0.30963271856307983, "logits/rejected": 0.2778348922729492, "logps/chosen": -75.92347717285156, "logps/ref_chosen": -67.74720764160156, "logps/ref_rejected": -87.04285430908203, "logps/rejected": -99.229248046875, "loss": 0.993, "margin_dpo/margin_mean": 4.0101189613342285, "margin_dpo/margin_std": 5.625774383544922, "step": 551 }, { "epoch": 0.8344671201814059, "fcm_dpo/beta": 0.18955287337303162, "fcm_dpo/delta": -0.06805290281772614, "fcm_dpo/margin": 4.80015230178833, "fcm_dpo/q_t": 0.31782644987106323, "grad_norm": 48.02134704589844, "learning_rate": 4.112804714676593e-08, "logits/chosen": 0.2556304633617401, "logits/rejected": 0.20716384053230286, "logps/chosen": -70.34280395507812, "logps/ref_chosen": -62.92625427246094, "logps/ref_rejected": -82.98365783691406, "logps/rejected": -95.20034790039062, "loss": 0.9436, "margin_dpo/margin_mean": 4.80015230178833, "margin_dpo/margin_std": 6.0169830322265625, "step": 552 }, { "epoch": 0.8359788359788359, "fcm_dpo/beta": 0.1859016716480255, "fcm_dpo/delta": 0.005417622625827789, "fcm_dpo/margin": 4.543527126312256, "fcm_dpo/q_t": 0.36400720477104187, "grad_norm": 50.315792083740234, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 0.219588965177536, "logits/rejected": 0.13211965560913086, "logps/chosen": -64.96172332763672, "logps/ref_chosen": -56.038490295410156, "logps/ref_rejected": -84.48454284667969, "logps/rejected": -97.9512939453125, "loss": 1.1175, "margin_dpo/margin_mean": 4.543527603149414, "margin_dpo/margin_std": 8.079211235046387, "step": 553 }, { "epoch": 0.8374905517762661, "fcm_dpo/beta": 0.1871982216835022, "fcm_dpo/delta": -0.0026739854365587234, "fcm_dpo/margin": 4.553138732910156, "fcm_dpo/q_t": 0.35031062364578247, "grad_norm": 50.560482025146484, "learning_rate": 3.968661679220467e-08, "logits/chosen": 0.24321147799491882, "logits/rejected": 0.21679717302322388, "logps/chosen": -72.2421875, "logps/ref_chosen": -64.53059387207031, "logps/ref_rejected": -71.2155990600586, "logps/rejected": -83.48033905029297, "loss": 1.0192, "margin_dpo/margin_mean": 4.553138732910156, "margin_dpo/margin_std": 7.070949554443359, "step": 554 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.19130748510360718, "fcm_dpo/delta": 0.047974929213523865, "fcm_dpo/margin": 4.179170608520508, "fcm_dpo/q_t": 0.34389621019363403, "grad_norm": 54.529205322265625, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.23633408546447754, "logits/rejected": 0.20477698743343353, "logps/chosen": -75.52586364746094, "logps/ref_chosen": -66.65191650390625, "logps/ref_rejected": -68.6667251586914, "logps/rejected": -81.71983337402344, "loss": 1.0644, "margin_dpo/margin_mean": 4.179170608520508, "margin_dpo/margin_std": 6.368129253387451, "step": 555 }, { "epoch": 0.8405139833711263, "fcm_dpo/beta": 0.18900999426841736, "fcm_dpo/delta": 0.013541316613554955, "fcm_dpo/margin": 4.431397438049316, "fcm_dpo/q_t": 0.3580917716026306, "grad_norm": 47.6467170715332, "learning_rate": 3.826871794280192e-08, "logits/chosen": 0.27349787950515747, "logits/rejected": 0.22483891248703003, "logps/chosen": -61.78699493408203, "logps/ref_chosen": -52.832366943359375, "logps/ref_rejected": -64.49044036865234, "logps/rejected": -77.87646484375, "loss": 1.0529, "margin_dpo/margin_mean": 4.431397914886475, "margin_dpo/margin_std": 7.119260311126709, "step": 556 }, { "epoch": 0.8420256991685563, "fcm_dpo/beta": 0.1805565357208252, "fcm_dpo/delta": -0.22024190425872803, "fcm_dpo/margin": 5.766865253448486, "fcm_dpo/q_t": 0.3106374740600586, "grad_norm": 45.826904296875, "learning_rate": 3.756864251262143e-08, "logits/chosen": 0.31960952281951904, "logits/rejected": 0.2534366548061371, "logps/chosen": -63.537750244140625, "logps/ref_chosen": -55.03598403930664, "logps/ref_rejected": -75.80644989013672, "logps/rejected": -90.07508087158203, "loss": 0.8573, "margin_dpo/margin_mean": 5.766864776611328, "margin_dpo/margin_std": 6.593277454376221, "step": 557 }, { "epoch": 0.8435374149659864, "fcm_dpo/beta": 0.17705780267715454, "fcm_dpo/delta": -0.10933573544025421, "fcm_dpo/margin": 5.353769779205322, "fcm_dpo/q_t": 0.327720582485199, "grad_norm": 47.34086990356445, "learning_rate": 3.687450924416341e-08, "logits/chosen": 0.32476723194122314, "logits/rejected": 0.2724132239818573, "logps/chosen": -70.65055847167969, "logps/ref_chosen": -63.226348876953125, "logps/ref_rejected": -91.46881866455078, "logps/rejected": -104.24679565429688, "loss": 0.9053, "margin_dpo/margin_mean": 5.353769302368164, "margin_dpo/margin_std": 6.638795852661133, "step": 558 }, { "epoch": 0.8450491307634165, "fcm_dpo/beta": 0.17447656393051147, "fcm_dpo/delta": -0.02185794711112976, "fcm_dpo/margin": 4.971553802490234, "fcm_dpo/q_t": 0.34655678272247314, "grad_norm": 44.358978271484375, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 0.23880869150161743, "logits/rejected": 0.17617136240005493, "logps/chosen": -69.51097869873047, "logps/ref_chosen": -61.521644592285156, "logps/ref_rejected": -82.83859252929688, "logps/rejected": -95.79948425292969, "loss": 1.0178, "margin_dpo/margin_mean": 4.971553802490234, "margin_dpo/margin_std": 7.330011367797852, "step": 559 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.17926636338233948, "fcm_dpo/delta": 0.02828364074230194, "fcm_dpo/margin": 4.576733589172363, "fcm_dpo/q_t": 0.3509957492351532, "grad_norm": 46.233280181884766, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.2628178298473358, "logits/rejected": 0.22722047567367554, "logps/chosen": -68.85298919677734, "logps/ref_chosen": -60.64122009277344, "logps/ref_rejected": -78.75474548339844, "logps/rejected": -91.54324340820312, "loss": 1.0091, "margin_dpo/margin_mean": 4.576733589172363, "margin_dpo/margin_std": 6.540881633758545, "step": 560 }, { "epoch": 0.8480725623582767, "fcm_dpo/beta": 0.17519283294677734, "fcm_dpo/delta": -0.06392641365528107, "fcm_dpo/margin": 5.182496547698975, "fcm_dpo/q_t": 0.3386669456958771, "grad_norm": 41.15628433227539, "learning_rate": 3.482795573879241e-08, "logits/chosen": 0.2851717174053192, "logits/rejected": 0.25381070375442505, "logps/chosen": -69.367919921875, "logps/ref_chosen": -62.49859619140625, "logps/ref_rejected": -78.72064208984375, "logps/rejected": -90.77245330810547, "loss": 0.9509, "margin_dpo/margin_mean": 5.182496547698975, "margin_dpo/margin_std": 7.013888359069824, "step": 561 }, { "epoch": 0.8495842781557067, "fcm_dpo/beta": 0.17091301083564758, "fcm_dpo/delta": -0.11639774590730667, "fcm_dpo/margin": 5.581327438354492, "fcm_dpo/q_t": 0.3209976553916931, "grad_norm": 40.8580322265625, "learning_rate": 3.415778361095226e-08, "logits/chosen": 0.2667577266693115, "logits/rejected": 0.23144987225532532, "logps/chosen": -82.92703247070312, "logps/ref_chosen": -74.78173828125, "logps/ref_rejected": -92.63499450683594, "logps/rejected": -106.36161804199219, "loss": 0.8912, "margin_dpo/margin_mean": 5.58132791519165, "margin_dpo/margin_std": 6.571352005004883, "step": 562 }, { "epoch": 0.8510959939531368, "fcm_dpo/beta": 0.17142033576965332, "fcm_dpo/delta": 0.007226529531180859, "fcm_dpo/margin": 4.919520378112793, "fcm_dpo/q_t": 0.3452809453010559, "grad_norm": 44.207237243652344, "learning_rate": 3.349364905389032e-08, "logits/chosen": 0.3307056427001953, "logits/rejected": 0.28086477518081665, "logps/chosen": -57.743751525878906, "logps/ref_chosen": -50.19850158691406, "logps/ref_rejected": -66.76687622070312, "logps/rejected": -79.23164367675781, "loss": 1.0449, "margin_dpo/margin_mean": 4.919520378112793, "margin_dpo/margin_std": 7.567540168762207, "step": 563 }, { "epoch": 0.8526077097505669, "fcm_dpo/beta": 0.17031943798065186, "fcm_dpo/delta": -0.06808815151453018, "fcm_dpo/margin": 5.352931976318359, "fcm_dpo/q_t": 0.3269733786582947, "grad_norm": 41.774166107177734, "learning_rate": 3.283557064487785e-08, "logits/chosen": 0.23458395898342133, "logits/rejected": 0.2039821743965149, "logps/chosen": -62.68193054199219, "logps/ref_chosen": -55.7408447265625, "logps/ref_rejected": -74.82323455810547, "logps/rejected": -87.11726379394531, "loss": 0.9261, "margin_dpo/margin_mean": 5.352931022644043, "margin_dpo/margin_std": 6.691219806671143, "step": 564 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.17381291091442108, "fcm_dpo/delta": 0.14001916348934174, "fcm_dpo/margin": 4.128961086273193, "fcm_dpo/q_t": 0.36346155405044556, "grad_norm": 47.45510482788086, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.304351806640625, "logits/rejected": 0.25713908672332764, "logps/chosen": -67.73616027832031, "logps/ref_chosen": -58.33738327026367, "logps/ref_rejected": -78.31776428222656, "logps/rejected": -91.84550476074219, "loss": 1.0411, "margin_dpo/margin_mean": 4.128960609436035, "margin_dpo/margin_std": 6.340240001678467, "step": 565 }, { "epoch": 0.8556311413454271, "fcm_dpo/beta": 0.17765888571739197, "fcm_dpo/delta": 0.09049337357282639, "fcm_dpo/margin": 4.29995584487915, "fcm_dpo/q_t": 0.36482471227645874, "grad_norm": 55.42988586425781, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 0.3093351721763611, "logits/rejected": 0.28633812069892883, "logps/chosen": -79.33909606933594, "logps/ref_chosen": -71.22373962402344, "logps/ref_rejected": -71.11601257324219, "logps/rejected": -83.53132629394531, "loss": 1.1324, "margin_dpo/margin_mean": 4.299956321716309, "margin_dpo/margin_std": 7.633435249328613, "step": 566 }, { "epoch": 0.8571428571428571, "fcm_dpo/beta": 0.17416879534721375, "fcm_dpo/delta": -0.03981255739927292, "fcm_dpo/margin": 5.07735538482666, "fcm_dpo/q_t": 0.33635973930358887, "grad_norm": 41.349308013916016, "learning_rate": 3.089785553471233e-08, "logits/chosen": 0.3112683892250061, "logits/rejected": 0.22656577825546265, "logps/chosen": -60.94340133666992, "logps/ref_chosen": -52.669273376464844, "logps/ref_rejected": -74.34785461425781, "logps/rejected": -87.6993408203125, "loss": 0.9308, "margin_dpo/margin_mean": 5.077354907989502, "margin_dpo/margin_std": 6.490789413452148, "step": 567 }, { "epoch": 0.8586545729402872, "fcm_dpo/beta": 0.17367665469646454, "fcm_dpo/delta": -0.14223557710647583, "fcm_dpo/margin": 5.628249168395996, "fcm_dpo/q_t": 0.31329670548439026, "grad_norm": 38.60588836669922, "learning_rate": 3.026418409484513e-08, "logits/chosen": 0.27716493606567383, "logits/rejected": 0.2054300308227539, "logps/chosen": -59.362396240234375, "logps/ref_chosen": -52.178001403808594, "logps/ref_rejected": -85.8277587890625, "logps/rejected": -98.64041137695312, "loss": 0.8378, "margin_dpo/margin_mean": 5.6282477378845215, "margin_dpo/margin_std": 5.813695907592773, "step": 568 }, { "epoch": 0.8601662887377173, "fcm_dpo/beta": 0.17136166989803314, "fcm_dpo/delta": 0.12116237729787827, "fcm_dpo/margin": 4.294561862945557, "fcm_dpo/q_t": 0.3553329408168793, "grad_norm": 44.1339111328125, "learning_rate": 2.963665913810451e-08, "logits/chosen": 0.18196213245391846, "logits/rejected": 0.1568015217781067, "logps/chosen": -70.65408325195312, "logps/ref_chosen": -62.649261474609375, "logps/ref_rejected": -75.4298324584961, "logps/rejected": -87.72921752929688, "loss": 1.0417, "margin_dpo/margin_mean": 4.294561386108398, "margin_dpo/margin_std": 6.439499378204346, "step": 569 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.1683310866355896, "fcm_dpo/delta": -0.32472285628318787, "fcm_dpo/margin": 6.764734268188477, "fcm_dpo/q_t": 0.2851110100746155, "grad_norm": 35.5472526550293, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.2262299358844757, "logits/rejected": 0.14589998126029968, "logps/chosen": -56.79124450683594, "logps/ref_chosen": -50.04179382324219, "logps/ref_rejected": -78.27146911621094, "logps/rejected": -91.78564453125, "loss": 0.7689, "margin_dpo/margin_mean": 6.764734268188477, "margin_dpo/margin_std": 6.428610324859619, "step": 570 }, { "epoch": 0.8631897203325775, "fcm_dpo/beta": 0.16944804787635803, "fcm_dpo/delta": 0.19956818222999573, "fcm_dpo/margin": 3.9083523750305176, "fcm_dpo/q_t": 0.3683198392391205, "grad_norm": 42.308650970458984, "learning_rate": 2.840011871446962e-08, "logits/chosen": 0.2377762794494629, "logits/rejected": 0.2050282061100006, "logps/chosen": -61.697296142578125, "logps/ref_chosen": -53.65681457519531, "logps/ref_rejected": -66.13298034667969, "logps/rejected": -78.0818099975586, "loss": 1.0888, "margin_dpo/margin_mean": 3.908352851867676, "margin_dpo/margin_std": 6.5086469650268555, "step": 571 }, { "epoch": 0.8647014361300076, "fcm_dpo/beta": 0.17394644021987915, "fcm_dpo/delta": 0.11392390727996826, "fcm_dpo/margin": 4.276225566864014, "fcm_dpo/q_t": 0.3549836277961731, "grad_norm": 49.87938690185547, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 0.29352182149887085, "logits/rejected": 0.3179316520690918, "logps/chosen": -82.94725036621094, "logps/ref_chosen": -74.81792449951172, "logps/ref_rejected": -65.88681030273438, "logps/rejected": -78.29235076904297, "loss": 0.9979, "margin_dpo/margin_mean": 4.276226043701172, "margin_dpo/margin_std": 6.016283988952637, "step": 572 }, { "epoch": 0.8662131519274376, "fcm_dpo/beta": 0.17347398400306702, "fcm_dpo/delta": 0.006579414010047913, "fcm_dpo/margin": 4.861777305603027, "fcm_dpo/q_t": 0.3557564616203308, "grad_norm": 50.46380615234375, "learning_rate": 2.718837261761528e-08, "logits/chosen": 0.2632012963294983, "logits/rejected": 0.2227526158094406, "logps/chosen": -77.41082763671875, "logps/ref_chosen": -68.72564697265625, "logps/ref_rejected": -88.16201782226562, "logps/rejected": -101.70896911621094, "loss": 1.0797, "margin_dpo/margin_mean": 4.8617777824401855, "margin_dpo/margin_std": 8.154335021972656, "step": 573 }, { "epoch": 0.8677248677248677, "fcm_dpo/beta": 0.17341530323028564, "fcm_dpo/delta": -0.11930923908948898, "fcm_dpo/margin": 5.5170817375183105, "fcm_dpo/q_t": 0.31562137603759766, "grad_norm": 38.88788986206055, "learning_rate": 2.659183991914696e-08, "logits/chosen": 0.3164798617362976, "logits/rejected": 0.25295859575271606, "logps/chosen": -64.34851837158203, "logps/ref_chosen": -56.31340026855469, "logps/ref_rejected": -83.91553497314453, "logps/rejected": -97.46773529052734, "loss": 0.8352, "margin_dpo/margin_mean": 5.517082214355469, "margin_dpo/margin_std": 5.712655067443848, "step": 574 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.17326758801937103, "fcm_dpo/delta": 0.19507335126399994, "fcm_dpo/margin": 3.850635051727295, "fcm_dpo/q_t": 0.3820122182369232, "grad_norm": 47.62089920043945, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.32219284772872925, "logits/rejected": 0.2561734616756439, "logps/chosen": -72.64906311035156, "logps/ref_chosen": -64.5841293334961, "logps/ref_rejected": -93.47034454345703, "logps/rejected": -105.38591766357422, "loss": 1.147, "margin_dpo/margin_mean": 3.850634813308716, "margin_dpo/margin_std": 7.171681880950928, "step": 575 }, { "epoch": 0.8707482993197279, "fcm_dpo/beta": 0.17552167177200317, "fcm_dpo/delta": -0.030258819460868835, "fcm_dpo/margin": 4.996522903442383, "fcm_dpo/q_t": 0.3442118763923645, "grad_norm": 52.461891174316406, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 0.28439998626708984, "logits/rejected": 0.19962754845619202, "logps/chosen": -60.18579864501953, "logps/ref_chosen": -53.28052520751953, "logps/ref_rejected": -84.2000503540039, "logps/rejected": -96.10183715820312, "loss": 0.9895, "margin_dpo/margin_mean": 4.996521949768066, "margin_dpo/margin_std": 7.055662631988525, "step": 576 }, { "epoch": 0.872260015117158, "fcm_dpo/beta": 0.18019238114356995, "fcm_dpo/delta": 0.18000566959381104, "fcm_dpo/margin": 3.7884044647216797, "fcm_dpo/q_t": 0.3773168623447418, "grad_norm": 47.84729766845703, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 0.2548506557941437, "logits/rejected": 0.23487058281898499, "logps/chosen": -69.96644592285156, "logps/ref_chosen": -62.32468795776367, "logps/ref_rejected": -67.300537109375, "logps/rejected": -78.73069763183594, "loss": 1.1324, "margin_dpo/margin_mean": 3.788404941558838, "margin_dpo/margin_std": 7.017814636230469, "step": 577 }, { "epoch": 0.873771730914588, "fcm_dpo/beta": 0.1872500777244568, "fcm_dpo/delta": 0.2222413420677185, "fcm_dpo/margin": 3.4311113357543945, "fcm_dpo/q_t": 0.38888439536094666, "grad_norm": 50.03500747680664, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 0.310182124376297, "logits/rejected": 0.2835952341556549, "logps/chosen": -64.78092956542969, "logps/ref_chosen": -56.65557861328125, "logps/ref_rejected": -68.21835327148438, "logps/rejected": -79.77481079101562, "loss": 1.1301, "margin_dpo/margin_mean": 3.4311115741729736, "margin_dpo/margin_std": 6.33961296081543, "step": 578 }, { "epoch": 0.8752834467120182, "fcm_dpo/beta": 0.18103978037834167, "fcm_dpo/delta": -0.32594001293182373, "fcm_dpo/margin": 6.280138969421387, "fcm_dpo/q_t": 0.29298943281173706, "grad_norm": 42.93381118774414, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 0.2113402634859085, "logits/rejected": 0.17864689230918884, "logps/chosen": -64.75434875488281, "logps/ref_chosen": -56.809661865234375, "logps/ref_rejected": -68.09613037109375, "logps/rejected": -82.32095336914062, "loss": 0.805, "margin_dpo/margin_mean": 6.280138969421387, "margin_dpo/margin_std": 6.796194076538086, "step": 579 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.18160466849803925, "fcm_dpo/delta": 0.059685517102479935, "fcm_dpo/margin": 4.3713226318359375, "fcm_dpo/q_t": 0.35958653688430786, "grad_norm": 43.264957427978516, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.21349485218524933, "logits/rejected": 0.17105573415756226, "logps/chosen": -65.28569030761719, "logps/ref_chosen": -57.70011520385742, "logps/ref_rejected": -77.90664672851562, "logps/rejected": -89.86354064941406, "loss": 1.0838, "margin_dpo/margin_mean": 4.371322154998779, "margin_dpo/margin_std": 7.185041427612305, "step": 580 }, { "epoch": 0.8783068783068783, "fcm_dpo/beta": 0.18188832700252533, "fcm_dpo/delta": 0.08677008748054504, "fcm_dpo/margin": 4.2297773361206055, "fcm_dpo/q_t": 0.3590962886810303, "grad_norm": 51.48876190185547, "learning_rate": 2.259200116137039e-08, "logits/chosen": 0.3077224791049957, "logits/rejected": 0.2525234818458557, "logps/chosen": -67.98102569580078, "logps/ref_chosen": -59.332359313964844, "logps/ref_rejected": -83.64482116699219, "logps/rejected": -96.52326965332031, "loss": 1.0355, "margin_dpo/margin_mean": 4.2297773361206055, "margin_dpo/margin_std": 6.5604119300842285, "step": 581 }, { "epoch": 0.8798185941043084, "fcm_dpo/beta": 0.18250367045402527, "fcm_dpo/delta": -0.04962211474776268, "fcm_dpo/margin": 4.9043426513671875, "fcm_dpo/q_t": 0.33205729722976685, "grad_norm": 45.71406555175781, "learning_rate": 2.204591459016525e-08, "logits/chosen": 0.2833797335624695, "logits/rejected": 0.30160412192344666, "logps/chosen": -71.7133560180664, "logps/ref_chosen": -64.16285705566406, "logps/ref_rejected": -58.632896423339844, "logps/rejected": -71.08773803710938, "loss": 0.9396, "margin_dpo/margin_mean": 4.9043426513671875, "margin_dpo/margin_std": 6.365811347961426, "step": 582 }, { "epoch": 0.8813303099017384, "fcm_dpo/beta": 0.18371570110321045, "fcm_dpo/delta": 0.05710229277610779, "fcm_dpo/margin": 4.340847015380859, "fcm_dpo/q_t": 0.36037206649780273, "grad_norm": 49.90797424316406, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 0.35961782932281494, "logits/rejected": 0.27381908893585205, "logps/chosen": -59.777198791503906, "logps/ref_chosen": -51.87239456176758, "logps/ref_rejected": -83.86331176757812, "logps/rejected": -96.10896301269531, "loss": 1.0971, "margin_dpo/margin_mean": 4.340846061706543, "margin_dpo/margin_std": 7.52435827255249, "step": 583 }, { "epoch": 0.8828420256991686, "fcm_dpo/beta": 0.18236804008483887, "fcm_dpo/delta": -0.02732960134744644, "fcm_dpo/margin": 4.791356563568115, "fcm_dpo/q_t": 0.34965622425079346, "grad_norm": 45.7866325378418, "learning_rate": 2.09728856419826e-08, "logits/chosen": 0.3540344834327698, "logits/rejected": 0.2653522789478302, "logps/chosen": -53.58039855957031, "logps/ref_chosen": -46.571388244628906, "logps/ref_rejected": -80.67969512939453, "logps/rejected": -92.48005676269531, "loss": 1.0547, "margin_dpo/margin_mean": 4.791356563568115, "margin_dpo/margin_std": 7.583406448364258, "step": 584 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.18653377890586853, "fcm_dpo/delta": 0.10668753832578659, "fcm_dpo/margin": 4.0271315574646, "fcm_dpo/q_t": 0.35192471742630005, "grad_norm": 47.550262451171875, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.23220500349998474, "logits/rejected": 0.19631624221801758, "logps/chosen": -65.75765228271484, "logps/ref_chosen": -58.124534606933594, "logps/ref_rejected": -79.00538635253906, "logps/rejected": -90.66563415527344, "loss": 1.0511, "margin_dpo/margin_mean": 4.027131080627441, "margin_dpo/margin_std": 6.313577175140381, "step": 585 }, { "epoch": 0.8858654572940288, "fcm_dpo/beta": 0.18284255266189575, "fcm_dpo/delta": -0.12782521545886993, "fcm_dpo/margin": 5.273736000061035, "fcm_dpo/q_t": 0.3119266927242279, "grad_norm": 40.83317184448242, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 0.2742339074611664, "logits/rejected": 0.22679969668388367, "logps/chosen": -62.187355041503906, "logps/ref_chosen": -54.10163879394531, "logps/ref_rejected": -63.72113037109375, "logps/rejected": -77.08058166503906, "loss": 0.8626, "margin_dpo/margin_mean": 5.273736000061035, "margin_dpo/margin_std": 5.873756408691406, "step": 586 }, { "epoch": 0.8873771730914588, "fcm_dpo/beta": 0.18256214261054993, "fcm_dpo/delta": -0.001638067769818008, "fcm_dpo/margin": 4.664154052734375, "fcm_dpo/q_t": 0.3415156304836273, "grad_norm": 54.03948974609375, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 0.2829993665218353, "logits/rejected": 0.2623155117034912, "logps/chosen": -70.61738586425781, "logps/ref_chosen": -63.41719436645508, "logps/ref_rejected": -63.47003936767578, "logps/rejected": -75.3343734741211, "loss": 1.0717, "margin_dpo/margin_mean": 4.664153575897217, "margin_dpo/margin_std": 7.594302654266357, "step": 587 }, { "epoch": 0.8888888888888888, "fcm_dpo/beta": 0.1814488172531128, "fcm_dpo/delta": -0.027305468916893005, "fcm_dpo/margin": 4.821053504943848, "fcm_dpo/q_t": 0.3352981507778168, "grad_norm": 48.0519905090332, "learning_rate": 1.890382096832699e-08, "logits/chosen": 0.3110392093658447, "logits/rejected": 0.26618409156799316, "logps/chosen": -70.01336669921875, "logps/ref_chosen": -62.20103454589844, "logps/ref_rejected": -82.10249328613281, "logps/rejected": -94.73588562011719, "loss": 0.9675, "margin_dpo/margin_mean": 4.821053504943848, "margin_dpo/margin_std": 6.605319499969482, "step": 588 }, { "epoch": 0.890400604686319, "fcm_dpo/beta": 0.1763853132724762, "fcm_dpo/delta": -0.18737711012363434, "fcm_dpo/margin": 5.771547317504883, "fcm_dpo/q_t": 0.30814313888549805, "grad_norm": 43.660152435302734, "learning_rate": 1.840267971970344e-08, "logits/chosen": 0.25168222188949585, "logits/rejected": 0.22019024193286896, "logps/chosen": -63.57328414916992, "logps/ref_chosen": -56.71361541748047, "logps/ref_rejected": -76.7366943359375, "logps/rejected": -89.36790466308594, "loss": 0.8337, "margin_dpo/margin_mean": 5.771548271179199, "margin_dpo/margin_std": 6.418168067932129, "step": 589 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.1695275604724884, "fcm_dpo/delta": -0.2477089762687683, "fcm_dpo/margin": 6.322257041931152, "fcm_dpo/q_t": 0.3001672029495239, "grad_norm": 39.7830810546875, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.2025907039642334, "logits/rejected": 0.17383888363838196, "logps/chosen": -74.46649169921875, "logps/ref_chosen": -66.5138168334961, "logps/ref_rejected": -85.70820617675781, "logps/rejected": -99.98313903808594, "loss": 0.8505, "margin_dpo/margin_mean": 6.322257041931152, "margin_dpo/margin_std": 7.0092453956604, "step": 590 }, { "epoch": 0.8934240362811792, "fcm_dpo/beta": 0.16398374736309052, "fcm_dpo/delta": -0.02152249962091446, "fcm_dpo/margin": 5.285676002502441, "fcm_dpo/q_t": 0.3420785665512085, "grad_norm": 41.17580795288086, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 0.3504617214202881, "logits/rejected": 0.2953767776489258, "logps/chosen": -67.76081848144531, "logps/ref_chosen": -60.697181701660156, "logps/ref_rejected": -86.12278747558594, "logps/rejected": -98.47210693359375, "loss": 0.9932, "margin_dpo/margin_mean": 5.285675525665283, "margin_dpo/margin_std": 7.387969970703125, "step": 591 }, { "epoch": 0.8949357520786092, "fcm_dpo/beta": 0.16582736372947693, "fcm_dpo/delta": 0.05810259282588959, "fcm_dpo/margin": 4.798344135284424, "fcm_dpo/q_t": 0.35599485039711, "grad_norm": 43.80263137817383, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 0.2896095812320709, "logits/rejected": 0.19301211833953857, "logps/chosen": -58.819942474365234, "logps/ref_chosen": -51.237327575683594, "logps/ref_rejected": -81.60242462158203, "logps/rejected": -93.98338317871094, "loss": 0.9877, "margin_dpo/margin_mean": 4.798343658447266, "margin_dpo/margin_std": 6.754156112670898, "step": 592 }, { "epoch": 0.8964474678760394, "fcm_dpo/beta": 0.16896365582942963, "fcm_dpo/delta": 0.05192846059799194, "fcm_dpo/margin": 4.747987747192383, "fcm_dpo/q_t": 0.3541349172592163, "grad_norm": 43.26925277709961, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 0.26381367444992065, "logits/rejected": 0.18189498782157898, "logps/chosen": -48.68609619140625, "logps/ref_chosen": -42.08000183105469, "logps/ref_rejected": -68.47499084472656, "logps/rejected": -79.82907104492188, "loss": 1.088, "margin_dpo/margin_mean": 4.747987747192383, "margin_dpo/margin_std": 7.88329553604126, "step": 593 }, { "epoch": 0.8979591836734694, "fcm_dpo/beta": 0.17408084869384766, "fcm_dpo/delta": 0.21379601955413818, "fcm_dpo/margin": 3.7374889850616455, "fcm_dpo/q_t": 0.3792218267917633, "grad_norm": 43.85451889038086, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 0.19765910506248474, "logits/rejected": 0.17842236161231995, "logps/chosen": -71.97224426269531, "logps/ref_chosen": -63.658668518066406, "logps/ref_rejected": -70.35597229003906, "logps/rejected": -82.40703582763672, "loss": 1.1051, "margin_dpo/margin_mean": 3.7374887466430664, "margin_dpo/margin_std": 6.5507001876831055, "step": 594 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.17081937193870544, "fcm_dpo/delta": -0.2570795714855194, "fcm_dpo/margin": 6.321621417999268, "fcm_dpo/q_t": 0.3001200556755066, "grad_norm": 40.53380584716797, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.3341759443283081, "logits/rejected": 0.24922290444374084, "logps/chosen": -63.90322494506836, "logps/ref_chosen": -56.21875762939453, "logps/ref_rejected": -83.95773315429688, "logps/rejected": -97.96382141113281, "loss": 0.8964, "margin_dpo/margin_mean": 6.321621894836426, "margin_dpo/margin_std": 7.671756744384766, "step": 595 }, { "epoch": 0.9009826152683296, "fcm_dpo/beta": 0.17557457089424133, "fcm_dpo/delta": 0.30167368054389954, "fcm_dpo/margin": 3.221146583557129, "fcm_dpo/q_t": 0.403555303812027, "grad_norm": 46.12143325805664, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.2137114405632019, "logits/rejected": 0.21473908424377441, "logps/chosen": -76.90093994140625, "logps/ref_chosen": -68.48088073730469, "logps/ref_rejected": -61.732967376708984, "logps/rejected": -73.3741683959961, "loss": 1.1891, "margin_dpo/margin_mean": 3.221146583557129, "margin_dpo/margin_std": 6.960387229919434, "step": 596 }, { "epoch": 0.9024943310657596, "fcm_dpo/beta": 0.17845208942890167, "fcm_dpo/delta": 0.019212447106838226, "fcm_dpo/margin": 4.664018154144287, "fcm_dpo/q_t": 0.3509853184223175, "grad_norm": 38.720130920410156, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 0.2167622148990631, "logits/rejected": 0.18858037889003754, "logps/chosen": -55.6777458190918, "logps/ref_chosen": -48.85750961303711, "logps/ref_rejected": -55.068084716796875, "logps/rejected": -66.55233764648438, "loss": 0.9812, "margin_dpo/margin_mean": 4.664018154144287, "margin_dpo/margin_std": 6.593279838562012, "step": 597 }, { "epoch": 0.9040060468631897, "fcm_dpo/beta": 0.1865067183971405, "fcm_dpo/delta": 0.26280879974365234, "fcm_dpo/margin": 3.2310941219329834, "fcm_dpo/q_t": 0.39431557059288025, "grad_norm": 50.766639709472656, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 0.2725675702095032, "logits/rejected": 0.20792771875858307, "logps/chosen": -67.86152648925781, "logps/ref_chosen": -58.88715362548828, "logps/ref_rejected": -81.43145751953125, "logps/rejected": -93.63692474365234, "loss": 1.1975, "margin_dpo/margin_mean": 3.2310941219329834, "margin_dpo/margin_std": 6.851696014404297, "step": 598 }, { "epoch": 0.9055177626606198, "fcm_dpo/beta": 0.19415175914764404, "fcm_dpo/delta": 0.1273910403251648, "fcm_dpo/margin": 3.755466938018799, "fcm_dpo/q_t": 0.37717127799987793, "grad_norm": 55.01444625854492, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 0.38753992319107056, "logits/rejected": 0.3443824052810669, "logps/chosen": -66.8466796875, "logps/ref_chosen": -57.60719299316406, "logps/ref_rejected": -71.80469512939453, "logps/rejected": -84.79964447021484, "loss": 1.1353, "margin_dpo/margin_mean": 3.755467414855957, "margin_dpo/margin_std": 6.981395721435547, "step": 599 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.20105046033859253, "fcm_dpo/delta": 0.1314106285572052, "fcm_dpo/margin": 3.578876495361328, "fcm_dpo/q_t": 0.37118592858314514, "grad_norm": 54.210601806640625, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.3219030499458313, "logits/rejected": 0.28190159797668457, "logps/chosen": -67.0551528930664, "logps/ref_chosen": -58.44231414794922, "logps/ref_rejected": -83.64639282226562, "logps/rejected": -95.83810424804688, "loss": 1.1541, "margin_dpo/margin_mean": 3.57887601852417, "margin_dpo/margin_std": 6.589799880981445, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.19878557324409485, "eval_logits/chosen": 0.29669952392578125, "eval_logits/rejected": 0.25336459279060364, "eval_logps/chosen": -82.59681701660156, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -91.48360443115234, "eval_loss": 0.5392169952392578, "eval_margin_dpo/margin_mean": 4.197268009185791, "eval_margin_dpo/margin_std": 6.897202491760254, "eval_runtime": 38.0373, "eval_samples_per_second": 60.546, "eval_steps_per_second": 1.893, "step": 600 }, { "epoch": 0.90854119425548, "fcm_dpo/beta": 0.19258500635623932, "fcm_dpo/delta": -0.149948388338089, "fcm_dpo/margin": 5.099976539611816, "fcm_dpo/q_t": 0.33194196224212646, "grad_norm": 48.01551818847656, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 0.28927916288375854, "logits/rejected": 0.22528886795043945, "logps/chosen": -62.39053726196289, "logps/ref_chosen": -55.59432601928711, "logps/ref_rejected": -83.68630981445312, "logps/rejected": -95.58250427246094, "loss": 0.9898, "margin_dpo/margin_mean": 5.099976539611816, "margin_dpo/margin_std": 7.26269006729126, "step": 601 }, { "epoch": 0.91005291005291, "fcm_dpo/beta": 0.19018596410751343, "fcm_dpo/delta": -0.12933437526226044, "fcm_dpo/margin": 5.084250450134277, "fcm_dpo/q_t": 0.3237204849720001, "grad_norm": 44.3646240234375, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 0.2777378559112549, "logits/rejected": 0.24087250232696533, "logps/chosen": -62.39347839355469, "logps/ref_chosen": -56.349185943603516, "logps/ref_rejected": -71.9959716796875, "logps/rejected": -83.12451171875, "loss": 0.8914, "margin_dpo/margin_mean": 5.0842509269714355, "margin_dpo/margin_std": 6.108532905578613, "step": 602 }, { "epoch": 0.9115646258503401, "fcm_dpo/beta": 0.18402621150016785, "fcm_dpo/delta": -0.10828899592161179, "fcm_dpo/margin": 5.145867824554443, "fcm_dpo/q_t": 0.3228384852409363, "grad_norm": 39.86903762817383, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 0.21399766206741333, "logits/rejected": 0.16194592416286469, "logps/chosen": -59.277183532714844, "logps/ref_chosen": -53.16838836669922, "logps/ref_rejected": -73.8604736328125, "logps/rejected": -85.11514282226562, "loss": 0.8905, "margin_dpo/margin_mean": 5.145867824554443, "margin_dpo/margin_std": 6.194065093994141, "step": 603 }, { "epoch": 0.9130763416477702, "fcm_dpo/beta": 0.18175940215587616, "fcm_dpo/delta": -0.0712120309472084, "fcm_dpo/margin": 5.031689643859863, "fcm_dpo/q_t": 0.3407028615474701, "grad_norm": 46.584659576416016, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 0.2513910233974457, "logits/rejected": 0.24693363904953003, "logps/chosen": -79.33577728271484, "logps/ref_chosen": -72.64942169189453, "logps/ref_rejected": -69.8792724609375, "logps/rejected": -81.5973129272461, "loss": 1.0237, "margin_dpo/margin_mean": 5.03169059753418, "margin_dpo/margin_std": 7.756058692932129, "step": 604 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.18186496198177338, "fcm_dpo/delta": 0.04116290062665939, "fcm_dpo/margin": 4.465373992919922, "fcm_dpo/q_t": 0.3500198721885681, "grad_norm": 47.683250427246094, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.3524032235145569, "logits/rejected": 0.2950265407562256, "logps/chosen": -69.99392700195312, "logps/ref_chosen": -61.61284637451172, "logps/ref_rejected": -79.34398651123047, "logps/rejected": -92.19044494628906, "loss": 1.0198, "margin_dpo/margin_mean": 4.465373992919922, "margin_dpo/margin_std": 6.671446800231934, "step": 605 }, { "epoch": 0.9160997732426304, "fcm_dpo/beta": 0.18503758311271667, "fcm_dpo/delta": 0.008188098669052124, "fcm_dpo/margin": 4.541077613830566, "fcm_dpo/q_t": 0.34726542234420776, "grad_norm": 46.52579116821289, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 0.26749229431152344, "logits/rejected": 0.21147756278514862, "logps/chosen": -61.70413589477539, "logps/ref_chosen": -54.46424102783203, "logps/ref_rejected": -79.62708282470703, "logps/rejected": -91.40805053710938, "loss": 0.9797, "margin_dpo/margin_mean": 4.541077613830566, "margin_dpo/margin_std": 6.255065441131592, "step": 606 }, { "epoch": 0.9176114890400605, "fcm_dpo/beta": 0.17840437591075897, "fcm_dpo/delta": -0.06762778759002686, "fcm_dpo/margin": 5.074832916259766, "fcm_dpo/q_t": 0.3349849283695221, "grad_norm": 46.92865753173828, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 0.2795192301273346, "logits/rejected": 0.24547183513641357, "logps/chosen": -70.56070709228516, "logps/ref_chosen": -62.86086654663086, "logps/ref_rejected": -72.5501937866211, "logps/rejected": -85.32487487792969, "loss": 0.9709, "margin_dpo/margin_mean": 5.074833869934082, "margin_dpo/margin_std": 6.887617588043213, "step": 607 }, { "epoch": 0.9191232048374905, "fcm_dpo/beta": 0.18580615520477295, "fcm_dpo/delta": 0.18324339389801025, "fcm_dpo/margin": 3.6547422409057617, "fcm_dpo/q_t": 0.375454306602478, "grad_norm": 59.2383918762207, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 0.2840557098388672, "logits/rejected": 0.2288847118616104, "logps/chosen": -71.39105224609375, "logps/ref_chosen": -63.18071746826172, "logps/ref_rejected": -99.15888214111328, "logps/rejected": -111.02395629882812, "loss": 1.0933, "margin_dpo/margin_mean": 3.6547417640686035, "margin_dpo/margin_std": 6.374434471130371, "step": 608 }, { "epoch": 0.9206349206349206, "fcm_dpo/beta": 0.1796351969242096, "fcm_dpo/delta": -0.18832767009735107, "fcm_dpo/margin": 5.636974811553955, "fcm_dpo/q_t": 0.3059696555137634, "grad_norm": 34.60963821411133, "learning_rate": 9.757601041885694e-09, "logits/chosen": 0.30717933177948, "logits/rejected": 0.2738397717475891, "logps/chosen": -55.23808288574219, "logps/ref_chosen": -48.62322235107422, "logps/ref_rejected": -68.28271484375, "logps/rejected": -80.53456115722656, "loss": 0.8554, "margin_dpo/margin_mean": 5.636974334716797, "margin_dpo/margin_std": 6.186499118804932, "step": 609 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.17905402183532715, "fcm_dpo/delta": -0.06748346984386444, "fcm_dpo/margin": 5.089980125427246, "fcm_dpo/q_t": 0.3470401167869568, "grad_norm": 56.96098327636719, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.24939410388469696, "logits/rejected": 0.24990352988243103, "logps/chosen": -80.74764251708984, "logps/ref_chosen": -72.66513061523438, "logps/ref_rejected": -87.15310668945312, "logps/rejected": -100.32559967041016, "loss": 1.0303, "margin_dpo/margin_mean": 5.089980125427246, "margin_dpo/margin_std": 7.849494934082031, "step": 610 }, { "epoch": 0.9236583522297808, "fcm_dpo/beta": 0.18114158511161804, "fcm_dpo/delta": 0.2056940793991089, "fcm_dpo/margin": 3.6257357597351074, "fcm_dpo/q_t": 0.3758603036403656, "grad_norm": 45.56681823730469, "learning_rate": 9.03946036001449e-09, "logits/chosen": 0.3103299140930176, "logits/rejected": 0.26671937108039856, "logps/chosen": -55.76822280883789, "logps/ref_chosen": -48.30857849121094, "logps/ref_rejected": -70.6141128540039, "logps/rejected": -81.69949340820312, "loss": 1.0748, "margin_dpo/margin_mean": 3.62573504447937, "margin_dpo/margin_std": 5.797907829284668, "step": 611 }, { "epoch": 0.9251700680272109, "fcm_dpo/beta": 0.1818360984325409, "fcm_dpo/delta": -0.15190520882606506, "fcm_dpo/margin": 5.42856502532959, "fcm_dpo/q_t": 0.32411032915115356, "grad_norm": 46.31725311279297, "learning_rate": 8.690495320571839e-09, "logits/chosen": 0.22141186892986298, "logits/rejected": 0.1634463667869568, "logps/chosen": -68.8099365234375, "logps/ref_chosen": -61.23155975341797, "logps/ref_rejected": -94.37979888916016, "logps/rejected": -107.38673400878906, "loss": 0.9778, "margin_dpo/margin_mean": 5.428564071655273, "margin_dpo/margin_std": 7.684731483459473, "step": 612 }, { "epoch": 0.926681783824641, "fcm_dpo/beta": 0.17303548753261566, "fcm_dpo/delta": -0.30681759119033813, "fcm_dpo/margin": 6.491569519042969, "fcm_dpo/q_t": 0.29310303926467896, "grad_norm": 38.10737991333008, "learning_rate": 8.348280226706722e-09, "logits/chosen": 0.2111150324344635, "logits/rejected": 0.19616663455963135, "logps/chosen": -60.43955612182617, "logps/ref_chosen": -53.98310852050781, "logps/ref_rejected": -58.32208251953125, "logps/rejected": -71.27009582519531, "loss": 0.8138, "margin_dpo/margin_mean": 6.491570472717285, "margin_dpo/margin_std": 6.981858253479004, "step": 613 }, { "epoch": 0.9281934996220711, "fcm_dpo/beta": 0.17160148918628693, "fcm_dpo/delta": 0.05498047545552254, "fcm_dpo/margin": 4.653756141662598, "fcm_dpo/q_t": 0.3367941379547119, "grad_norm": 41.467777252197266, "learning_rate": 8.012824650910937e-09, "logits/chosen": 0.28697025775909424, "logits/rejected": 0.27061259746551514, "logps/chosen": -68.2115249633789, "logps/ref_chosen": -60.24303436279297, "logps/ref_rejected": -72.26258850097656, "logps/rejected": -84.88483428955078, "loss": 0.9136, "margin_dpo/margin_mean": 4.653756141662598, "margin_dpo/margin_std": 5.355663299560547, "step": 614 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.1685989797115326, "fcm_dpo/delta": -0.06165684387087822, "fcm_dpo/margin": 5.367961883544922, "fcm_dpo/q_t": 0.33193883299827576, "grad_norm": 50.48664474487305, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.28863102197647095, "logits/rejected": 0.24735435843467712, "logps/chosen": -80.24966430664062, "logps/ref_chosen": -72.09467315673828, "logps/ref_rejected": -104.02980041503906, "logps/rejected": -117.55276489257812, "loss": 0.9757, "margin_dpo/margin_mean": 5.367961883544922, "margin_dpo/margin_std": 7.349823951721191, "step": 615 }, { "epoch": 0.9312169312169312, "fcm_dpo/beta": 0.17112982273101807, "fcm_dpo/delta": 0.10860873758792877, "fcm_dpo/margin": 4.380434036254883, "fcm_dpo/q_t": 0.362366259098053, "grad_norm": 41.709930419921875, "learning_rate": 7.36222939784098e-09, "logits/chosen": 0.3024919033050537, "logits/rejected": 0.23271536827087402, "logps/chosen": -66.6510009765625, "logps/ref_chosen": -58.530723571777344, "logps/ref_rejected": -75.48025512695312, "logps/rejected": -87.98096466064453, "loss": 1.0149, "margin_dpo/margin_mean": 4.380434513092041, "margin_dpo/margin_std": 6.544014930725098, "step": 616 }, { "epoch": 0.9327286470143613, "fcm_dpo/beta": 0.17270605266094208, "fcm_dpo/delta": 0.0973830297589302, "fcm_dpo/margin": 4.382899284362793, "fcm_dpo/q_t": 0.35784029960632324, "grad_norm": 44.091949462890625, "learning_rate": 7.047107919114586e-09, "logits/chosen": 0.25396719574928284, "logits/rejected": 0.21397271752357483, "logps/chosen": -66.15753936767578, "logps/ref_chosen": -57.608673095703125, "logps/ref_rejected": -81.22109985351562, "logps/rejected": -94.15286254882812, "loss": 1.0028, "margin_dpo/margin_mean": 4.382899284362793, "margin_dpo/margin_std": 6.131780624389648, "step": 617 }, { "epoch": 0.9342403628117913, "fcm_dpo/beta": 0.17426848411560059, "fcm_dpo/delta": 0.008210502564907074, "fcm_dpo/margin": 4.818388938903809, "fcm_dpo/q_t": 0.34145089983940125, "grad_norm": 44.20525360107422, "learning_rate": 6.738782355044048e-09, "logits/chosen": 0.2514890134334564, "logits/rejected": 0.1663253903388977, "logps/chosen": -63.71135711669922, "logps/ref_chosen": -56.69594192504883, "logps/ref_rejected": -85.92362976074219, "logps/rejected": -97.75743865966797, "loss": 0.9383, "margin_dpo/margin_mean": 4.818388938903809, "margin_dpo/margin_std": 6.156173229217529, "step": 618 }, { "epoch": 0.9357520786092215, "fcm_dpo/beta": 0.1756330132484436, "fcm_dpo/delta": 0.005198441445827484, "fcm_dpo/margin": 4.809282302856445, "fcm_dpo/q_t": 0.3440103828907013, "grad_norm": 44.29880142211914, "learning_rate": 6.437261330158206e-09, "logits/chosen": 0.343215674161911, "logits/rejected": 0.2847067713737488, "logps/chosen": -61.114315032958984, "logps/ref_chosen": -54.05841827392578, "logps/ref_rejected": -83.55493927001953, "logps/rejected": -95.42012023925781, "loss": 0.9784, "margin_dpo/margin_mean": 4.809283256530762, "margin_dpo/margin_std": 6.668694496154785, "step": 619 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.18186062574386597, "fcm_dpo/delta": 0.08738522231578827, "fcm_dpo/margin": 4.195881366729736, "fcm_dpo/q_t": 0.36279648542404175, "grad_norm": 51.88892364501953, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.29026031494140625, "logits/rejected": 0.27886366844177246, "logps/chosen": -70.6176986694336, "logps/ref_chosen": -63.36971664428711, "logps/ref_rejected": -65.68269348144531, "logps/rejected": -77.12655639648438, "loss": 1.034, "margin_dpo/margin_mean": 4.1958818435668945, "margin_dpo/margin_std": 6.316009521484375, "step": 620 }, { "epoch": 0.9387755102040817, "fcm_dpo/beta": 0.18603846430778503, "fcm_dpo/delta": 0.1751212179660797, "fcm_dpo/margin": 3.6776561737060547, "fcm_dpo/q_t": 0.37486201524734497, "grad_norm": 51.965782165527344, "learning_rate": 5.854666444131934e-09, "logits/chosen": 0.3206130862236023, "logits/rejected": 0.2369777262210846, "logps/chosen": -60.00037384033203, "logps/ref_chosen": -52.321224212646484, "logps/ref_rejected": -88.09001159667969, "logps/rejected": -99.44680786132812, "loss": 1.0999, "margin_dpo/margin_mean": 3.6776556968688965, "margin_dpo/margin_std": 6.303377151489258, "step": 621 }, { "epoch": 0.9402872260015117, "fcm_dpo/beta": 0.18810513615608215, "fcm_dpo/delta": 0.0432133674621582, "fcm_dpo/margin": 4.305281639099121, "fcm_dpo/q_t": 0.35074567794799805, "grad_norm": 47.06365966796875, "learning_rate": 5.573608879422875e-09, "logits/chosen": 0.24988241493701935, "logits/rejected": 0.2150622308254242, "logps/chosen": -67.39698028564453, "logps/ref_chosen": -59.86545944213867, "logps/ref_rejected": -81.86668395996094, "logps/rejected": -93.7034912109375, "loss": 0.9729, "margin_dpo/margin_mean": 4.305282115936279, "margin_dpo/margin_std": 5.978768825531006, "step": 622 }, { "epoch": 0.9417989417989417, "fcm_dpo/beta": 0.18544979393482208, "fcm_dpo/delta": -0.08368836343288422, "fcm_dpo/margin": 4.992826461791992, "fcm_dpo/q_t": 0.32211193442344666, "grad_norm": 41.866485595703125, "learning_rate": 5.299388446305342e-09, "logits/chosen": 0.2434006631374359, "logits/rejected": 0.1846180558204651, "logps/chosen": -75.88226318359375, "logps/ref_chosen": -67.36846160888672, "logps/ref_rejected": -82.02733612060547, "logps/rejected": -95.53396606445312, "loss": 0.8903, "margin_dpo/margin_mean": 4.992826461791992, "margin_dpo/margin_std": 5.761674880981445, "step": 623 }, { "epoch": 0.9433106575963719, "fcm_dpo/beta": 0.18060320615768433, "fcm_dpo/delta": -0.112208291888237, "fcm_dpo/margin": 5.261911392211914, "fcm_dpo/q_t": 0.33041465282440186, "grad_norm": 42.327178955078125, "learning_rate": 5.03201281531429e-09, "logits/chosen": 0.25088438391685486, "logits/rejected": 0.16487614810466766, "logps/chosen": -58.05131530761719, "logps/ref_chosen": -51.02655029296875, "logps/ref_rejected": -76.49203491210938, "logps/rejected": -88.77871704101562, "loss": 0.9333, "margin_dpo/margin_mean": 5.261911392211914, "margin_dpo/margin_std": 6.842192649841309, "step": 624 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.1855674684047699, "fcm_dpo/delta": 0.14608462154865265, "fcm_dpo/margin": 3.8421430587768555, "fcm_dpo/q_t": 0.37177574634552, "grad_norm": 48.58590316772461, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.33973124623298645, "logits/rejected": 0.2644117474555969, "logps/chosen": -62.346839904785156, "logps/ref_chosen": -54.20761489868164, "logps/ref_rejected": -84.93669128417969, "logps/rejected": -96.91806030273438, "loss": 1.0799, "margin_dpo/margin_mean": 3.8421425819396973, "margin_dpo/margin_std": 6.479580879211426, "step": 625 }, { "epoch": 0.9463340891912321, "fcm_dpo/beta": 0.180339515209198, "fcm_dpo/delta": -0.19849389791488647, "fcm_dpo/margin": 5.699306011199951, "fcm_dpo/q_t": 0.32750552892684937, "grad_norm": 42.651458740234375, "learning_rate": 4.517825684323323e-09, "logits/chosen": 0.34960970282554626, "logits/rejected": 0.24316346645355225, "logps/chosen": -51.93178176879883, "logps/ref_chosen": -45.06201934814453, "logps/ref_rejected": -89.66368103027344, "logps/rejected": -102.23274230957031, "loss": 0.9744, "margin_dpo/margin_mean": 5.699305534362793, "margin_dpo/margin_std": 8.007684707641602, "step": 626 }, { "epoch": 0.9478458049886621, "fcm_dpo/beta": 0.1721944957971573, "fcm_dpo/delta": -0.2246345579624176, "fcm_dpo/margin": 6.09731388092041, "fcm_dpo/q_t": 0.31280094385147095, "grad_norm": 46.61338424682617, "learning_rate": 4.271028567242818e-09, "logits/chosen": 0.22155846655368805, "logits/rejected": 0.12805700302124023, "logps/chosen": -66.43733215332031, "logps/ref_chosen": -58.791053771972656, "logps/ref_rejected": -94.90802001953125, "logps/rejected": -108.651611328125, "loss": 0.893, "margin_dpo/margin_mean": 6.097313404083252, "margin_dpo/margin_std": 7.516200065612793, "step": 627 }, { "epoch": 0.9493575207860923, "fcm_dpo/beta": 0.16774258017539978, "fcm_dpo/delta": -0.2266627550125122, "fcm_dpo/margin": 6.269933700561523, "fcm_dpo/q_t": 0.3056507706642151, "grad_norm": 40.3947868347168, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 0.2967239022254944, "logits/rejected": 0.26311272382736206, "logps/chosen": -59.4424934387207, "logps/ref_chosen": -52.80357360839844, "logps/ref_rejected": -76.49468994140625, "logps/rejected": -89.4035415649414, "loss": 0.927, "margin_dpo/margin_mean": 6.269933700561523, "margin_dpo/margin_std": 7.717181205749512, "step": 628 }, { "epoch": 0.9508692365835223, "fcm_dpo/beta": 0.1646927446126938, "fcm_dpo/delta": 0.023073244839906693, "fcm_dpo/margin": 5.028815746307373, "fcm_dpo/q_t": 0.3410007059574127, "grad_norm": 38.630470275878906, "learning_rate": 3.798061746947995e-09, "logits/chosen": 0.2684643864631653, "logits/rejected": 0.2583003640174866, "logps/chosen": -77.83274841308594, "logps/ref_chosen": -70.71749877929688, "logps/ref_rejected": -78.96273803710938, "logps/rejected": -91.1068115234375, "loss": 0.907, "margin_dpo/margin_mean": 5.028815746307373, "margin_dpo/margin_std": 5.933478832244873, "step": 629 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.16420958936214447, "fcm_dpo/delta": -0.05578102171421051, "fcm_dpo/margin": 5.478397846221924, "fcm_dpo/q_t": 0.3335376977920532, "grad_norm": 35.65196228027344, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.2280709147453308, "logits/rejected": 0.18566131591796875, "logps/chosen": -63.271915435791016, "logps/ref_chosen": -56.201412200927734, "logps/ref_rejected": -74.69807434082031, "logps/rejected": -87.24697875976562, "loss": 0.9099, "margin_dpo/margin_mean": 5.478397846221924, "margin_dpo/margin_std": 6.630637168884277, "step": 630 }, { "epoch": 0.9538926681783825, "fcm_dpo/beta": 0.156265988945961, "fcm_dpo/delta": -0.1330309510231018, "fcm_dpo/margin": 6.156580924987793, "fcm_dpo/q_t": 0.32488080859184265, "grad_norm": 43.58830261230469, "learning_rate": 3.352641923861144e-09, "logits/chosen": 0.36850327253341675, "logits/rejected": 0.2770962715148926, "logps/chosen": -65.9747314453125, "logps/ref_chosen": -58.82059860229492, "logps/ref_rejected": -96.51437377929688, "logps/rejected": -109.82508850097656, "loss": 0.9339, "margin_dpo/margin_mean": 6.156581878662109, "margin_dpo/margin_std": 7.784612655639648, "step": 631 }, { "epoch": 0.9554043839758125, "fcm_dpo/beta": 0.15440338850021362, "fcm_dpo/delta": -0.1414494812488556, "fcm_dpo/margin": 6.329607963562012, "fcm_dpo/q_t": 0.306906521320343, "grad_norm": 34.58030700683594, "learning_rate": 3.140277830901428e-09, "logits/chosen": 0.33119070529937744, "logits/rejected": 0.3036194443702698, "logps/chosen": -65.92765808105469, "logps/ref_chosen": -58.786048889160156, "logps/ref_rejected": -67.21923828125, "logps/rejected": -80.69046020507812, "loss": 0.8424, "margin_dpo/margin_mean": 6.3296074867248535, "margin_dpo/margin_std": 6.613114356994629, "step": 632 }, { "epoch": 0.9569160997732427, "fcm_dpo/beta": 0.1560203731060028, "fcm_dpo/delta": 0.08354716002941132, "fcm_dpo/margin": 4.950506210327148, "fcm_dpo/q_t": 0.3561435043811798, "grad_norm": 34.561824798583984, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 0.2669551968574524, "logits/rejected": 0.2126702070236206, "logps/chosen": -58.36284255981445, "logps/ref_chosen": -52.13019561767578, "logps/ref_rejected": -67.23016357421875, "logps/rejected": -78.41331481933594, "loss": 0.9923, "margin_dpo/margin_mean": 4.950506210327148, "margin_dpo/margin_std": 7.004974365234375, "step": 633 }, { "epoch": 0.9584278155706727, "fcm_dpo/beta": 0.16562215983867645, "fcm_dpo/delta": 0.3642282485961914, "fcm_dpo/margin": 3.030942440032959, "fcm_dpo/q_t": 0.4143024682998657, "grad_norm": 51.9135856628418, "learning_rate": 2.736270983384276e-09, "logits/chosen": 0.32782524824142456, "logits/rejected": 0.3300801217556, "logps/chosen": -68.82040405273438, "logps/ref_chosen": -60.97979736328125, "logps/ref_rejected": -58.50825119018555, "logps/rejected": -69.37980651855469, "loss": 1.2159, "margin_dpo/margin_mean": 3.03094220161438, "margin_dpo/margin_std": 6.932095527648926, "step": 634 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.172117680311203, "fcm_dpo/delta": 0.15006113052368164, "fcm_dpo/margin": 4.126021862030029, "fcm_dpo/q_t": 0.3806537687778473, "grad_norm": 51.643699645996094, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.200457364320755, "logits/rejected": 0.14902645349502563, "logps/chosen": -75.00791931152344, "logps/ref_chosen": -65.9730224609375, "logps/ref_rejected": -85.61317443847656, "logps/rejected": -98.77409362792969, "loss": 1.1453, "margin_dpo/margin_mean": 4.126021385192871, "margin_dpo/margin_std": 7.76697301864624, "step": 635 }, { "epoch": 0.9614512471655329, "fcm_dpo/beta": 0.16995760798454285, "fcm_dpo/delta": -0.10415925085544586, "fcm_dpo/margin": 5.553957462310791, "fcm_dpo/q_t": 0.31619903445243835, "grad_norm": 38.7966423034668, "learning_rate": 2.359929934524829e-09, "logits/chosen": 0.22739680111408234, "logits/rejected": 0.1509087234735489, "logps/chosen": -56.36328125, "logps/ref_chosen": -49.140167236328125, "logps/ref_rejected": -81.26971435546875, "logps/rejected": -94.04678344726562, "loss": 0.8522, "margin_dpo/margin_mean": 5.553957939147949, "margin_dpo/margin_std": 6.102416515350342, "step": 636 }, { "epoch": 0.9629629629629629, "fcm_dpo/beta": 0.17249064147472382, "fcm_dpo/delta": 0.06623414158821106, "fcm_dpo/margin": 4.564568519592285, "fcm_dpo/q_t": 0.3609241545200348, "grad_norm": 49.76155471801758, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 0.25316452980041504, "logits/rejected": 0.18915875256061554, "logps/chosen": -82.8052978515625, "logps/ref_chosen": -73.69658660888672, "logps/ref_rejected": -83.01487731933594, "logps/rejected": -96.68817138671875, "loss": 1.0562, "margin_dpo/margin_mean": 4.564568519592285, "margin_dpo/margin_std": 7.476813316345215, "step": 637 }, { "epoch": 0.9644746787603931, "fcm_dpo/beta": 0.16861847043037415, "fcm_dpo/delta": -0.1186608374118805, "fcm_dpo/margin": 5.676681041717529, "fcm_dpo/q_t": 0.3233264684677124, "grad_norm": 43.48808670043945, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 0.23133961856365204, "logits/rejected": 0.18495085835456848, "logps/chosen": -71.15939331054688, "logps/ref_chosen": -62.78158187866211, "logps/ref_rejected": -85.40478515625, "logps/rejected": -99.45928955078125, "loss": 0.8993, "margin_dpo/margin_mean": 5.6766815185546875, "margin_dpo/margin_std": 6.878664970397949, "step": 638 }, { "epoch": 0.9659863945578231, "fcm_dpo/beta": 0.16743648052215576, "fcm_dpo/delta": -0.024798255413770676, "fcm_dpo/margin": 5.211551666259766, "fcm_dpo/q_t": 0.33889099955558777, "grad_norm": 41.27330780029297, "learning_rate": 1.847382997337943e-09, "logits/chosen": 0.2327461540699005, "logits/rejected": 0.12944665551185608, "logps/chosen": -61.39842987060547, "logps/ref_chosen": -53.76658630371094, "logps/ref_rejected": -72.30009460449219, "logps/rejected": -85.14349365234375, "loss": 0.941, "margin_dpo/margin_mean": 5.211551189422607, "margin_dpo/margin_std": 6.853907108306885, "step": 639 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.170379638671875, "fcm_dpo/delta": 0.15754404664039612, "fcm_dpo/margin": 4.130763053894043, "fcm_dpo/q_t": 0.3569382131099701, "grad_norm": 44.13318634033203, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.34077322483062744, "logits/rejected": 0.29080483317375183, "logps/chosen": -59.03297805786133, "logps/ref_chosen": -51.41777801513672, "logps/ref_rejected": -77.27879333496094, "logps/rejected": -89.0247573852539, "loss": 1.0821, "margin_dpo/margin_mean": 4.130763053894043, "margin_dpo/margin_std": 6.8070220947265625, "step": 640 }, { "epoch": 0.9690098261526833, "fcm_dpo/beta": 0.17606830596923828, "fcm_dpo/delta": 0.12295868247747421, "fcm_dpo/margin": 4.176577568054199, "fcm_dpo/q_t": 0.35564327239990234, "grad_norm": 62.344032287597656, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 0.2501823902130127, "logits/rejected": 0.22024014592170715, "logps/chosen": -79.21989440917969, "logps/ref_chosen": -71.0546646118164, "logps/ref_rejected": -82.2440185546875, "logps/rejected": -94.5858154296875, "loss": 1.0164, "margin_dpo/margin_mean": 4.176577091217041, "margin_dpo/margin_std": 6.268715858459473, "step": 641 }, { "epoch": 0.9705215419501134, "fcm_dpo/beta": 0.18004068732261658, "fcm_dpo/delta": 0.20534493029117584, "fcm_dpo/margin": 3.6544651985168457, "fcm_dpo/q_t": 0.3828880786895752, "grad_norm": 56.02330780029297, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 0.2151576280593872, "logits/rejected": 0.21160337328910828, "logps/chosen": -77.8289566040039, "logps/ref_chosen": -68.92927551269531, "logps/ref_rejected": -70.85682678222656, "logps/rejected": -83.41098022460938, "loss": 1.1699, "margin_dpo/margin_mean": 3.6544651985168457, "margin_dpo/margin_std": 7.091136455535889, "step": 642 }, { "epoch": 0.9720332577475435, "fcm_dpo/beta": 0.18671706318855286, "fcm_dpo/delta": 0.19122450053691864, "fcm_dpo/margin": 3.5908524990081787, "fcm_dpo/q_t": 0.3652816712856293, "grad_norm": 699.5745849609375, "learning_rate": 1.261184375888541e-09, "logits/chosen": 0.19385960698127747, "logits/rejected": 0.12275560200214386, "logps/chosen": -73.89380645751953, "logps/ref_chosen": -65.30903625488281, "logps/ref_rejected": -83.61613464355469, "logps/rejected": -95.791748046875, "loss": 1.2635, "margin_dpo/margin_mean": 3.590852737426758, "margin_dpo/margin_std": 8.115373611450195, "step": 643 }, { "epoch": 0.9735449735449735, "fcm_dpo/beta": 0.1939758062362671, "fcm_dpo/delta": 0.07629616558551788, "fcm_dpo/margin": 4.0165934562683105, "fcm_dpo/q_t": 0.36271920800209045, "grad_norm": 54.096920013427734, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 0.3378884792327881, "logits/rejected": 0.30602043867111206, "logps/chosen": -58.58208084106445, "logps/ref_chosen": -51.002601623535156, "logps/ref_rejected": -64.46372985839844, "logps/rejected": -76.05979919433594, "loss": 1.1655, "margin_dpo/margin_mean": 4.0165934562683105, "margin_dpo/margin_std": 7.581402778625488, "step": 644 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.19180044531822205, "fcm_dpo/delta": -0.06086551398038864, "fcm_dpo/margin": 4.7189226150512695, "fcm_dpo/q_t": 0.3323266804218292, "grad_norm": 49.45684051513672, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.23067082464694977, "logits/rejected": 0.2046801596879959, "logps/chosen": -69.1575698852539, "logps/ref_chosen": -60.963409423828125, "logps/ref_rejected": -69.73353576660156, "logps/rejected": -82.64661407470703, "loss": 0.9268, "margin_dpo/margin_mean": 4.7189226150512695, "margin_dpo/margin_std": 6.106879234313965, "step": 645 }, { "epoch": 0.9765684051398337, "fcm_dpo/beta": 0.18813073635101318, "fcm_dpo/delta": -0.12552106380462646, "fcm_dpo/margin": 5.120940208435059, "fcm_dpo/q_t": 0.32650938630104065, "grad_norm": 43.06781005859375, "learning_rate": 8.945768539031783e-10, "logits/chosen": 0.33291056752204895, "logits/rejected": 0.28602612018585205, "logps/chosen": -71.06059265136719, "logps/ref_chosen": -62.290069580078125, "logps/ref_rejected": -85.54812622070312, "logps/rejected": -99.43959045410156, "loss": 0.9471, "margin_dpo/margin_mean": 5.120940208435059, "margin_dpo/margin_std": 6.938012599945068, "step": 646 }, { "epoch": 0.9780801209372638, "fcm_dpo/beta": 0.17964306473731995, "fcm_dpo/delta": -0.3153572380542755, "fcm_dpo/margin": 6.294256210327148, "fcm_dpo/q_t": 0.29699230194091797, "grad_norm": 52.842838287353516, "learning_rate": 7.863060120144316e-10, "logits/chosen": 0.3208690881729126, "logits/rejected": 0.23945260047912598, "logps/chosen": -75.46033477783203, "logps/ref_chosen": -67.515869140625, "logps/ref_rejected": -101.50871276855469, "logps/rejected": -115.74742126464844, "loss": 0.8364, "margin_dpo/margin_mean": 6.294255256652832, "margin_dpo/margin_std": 7.129534721374512, "step": 647 }, { "epoch": 0.9795918367346939, "fcm_dpo/beta": 0.17466625571250916, "fcm_dpo/delta": 0.014513436704874039, "fcm_dpo/margin": 4.785521507263184, "fcm_dpo/q_t": 0.33822280168533325, "grad_norm": 44.56800842285156, "learning_rate": 6.850062128694045e-10, "logits/chosen": 0.24528364837169647, "logits/rejected": 0.18555161356925964, "logps/chosen": -73.12615203857422, "logps/ref_chosen": -64.59593963623047, "logps/ref_rejected": -83.384033203125, "logps/rejected": -96.69976806640625, "loss": 0.9935, "margin_dpo/margin_mean": 4.785521507263184, "margin_dpo/margin_std": 6.645514488220215, "step": 648 }, { "epoch": 0.981103552532124, "fcm_dpo/beta": 0.17323724925518036, "fcm_dpo/delta": -0.04737182706594467, "fcm_dpo/margin": 5.144677639007568, "fcm_dpo/q_t": 0.3352319300174713, "grad_norm": 47.017181396484375, "learning_rate": 5.906802900412788e-10, "logits/chosen": 0.2713914215564728, "logits/rejected": 0.21891635656356812, "logps/chosen": -56.810150146484375, "logps/ref_chosen": -49.30964660644531, "logps/ref_rejected": -73.73710632324219, "logps/rejected": -86.38228607177734, "loss": 0.9812, "margin_dpo/margin_mean": 5.14467716217041, "margin_dpo/margin_std": 7.023676872253418, "step": 649 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.17615830898284912, "fcm_dpo/delta": 0.04753459244966507, "fcm_dpo/margin": 4.5751237869262695, "fcm_dpo/q_t": 0.3590129613876343, "grad_norm": 56.19305419921875, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.3633171021938324, "logits/rejected": 0.3100350499153137, "logps/chosen": -62.552730560302734, "logps/ref_chosen": -55.06325912475586, "logps/ref_rejected": -77.39610290527344, "logps/rejected": -89.460693359375, "loss": 1.0783, "margin_dpo/margin_mean": 4.575124740600586, "margin_dpo/margin_std": 7.5110650062561035, "step": 650 }, { "epoch": 0.9841269841269841, "fcm_dpo/beta": 0.17935200035572052, "fcm_dpo/delta": 0.041574351489543915, "fcm_dpo/margin": 4.506626129150391, "fcm_dpo/q_t": 0.3520933985710144, "grad_norm": 50.82936477661133, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 0.35629215836524963, "logits/rejected": 0.2943439483642578, "logps/chosen": -61.30165481567383, "logps/ref_chosen": -54.065162658691406, "logps/ref_rejected": -77.79080200195312, "logps/rejected": -89.53392791748047, "loss": 0.9943, "margin_dpo/margin_mean": 4.506626129150391, "margin_dpo/margin_std": 6.297160625457764, "step": 651 }, { "epoch": 0.9856386999244142, "fcm_dpo/beta": 0.17824706435203552, "fcm_dpo/delta": 0.06212994083762169, "fcm_dpo/margin": 4.44667911529541, "fcm_dpo/q_t": 0.3550780117511749, "grad_norm": 47.756290435791016, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 0.30222636461257935, "logits/rejected": 0.24972417950630188, "logps/chosen": -72.32354736328125, "logps/ref_chosen": -63.64030456542969, "logps/ref_rejected": -78.86882019042969, "logps/rejected": -91.99874114990234, "loss": 1.0128, "margin_dpo/margin_mean": 4.446678161621094, "margin_dpo/margin_std": 6.5444793701171875, "step": 652 }, { "epoch": 0.9871504157218443, "fcm_dpo/beta": 0.1795337200164795, "fcm_dpo/delta": -0.0007160389795899391, "fcm_dpo/margin": 4.738056182861328, "fcm_dpo/q_t": 0.34943753480911255, "grad_norm": 47.07265090942383, "learning_rate": 2.831652042480093e-10, "logits/chosen": 0.24882294237613678, "logits/rejected": 0.20429188013076782, "logps/chosen": -69.21574401855469, "logps/ref_chosen": -61.668373107910156, "logps/ref_rejected": -73.83012390136719, "logps/rejected": -86.11554718017578, "loss": 0.9995, "margin_dpo/margin_mean": 4.73805570602417, "margin_dpo/margin_std": 6.993717193603516, "step": 653 }, { "epoch": 0.9886621315192744, "fcm_dpo/beta": 0.1786322295665741, "fcm_dpo/delta": 0.027579210698604584, "fcm_dpo/margin": 4.607399940490723, "fcm_dpo/q_t": 0.3561086058616638, "grad_norm": 51.774452209472656, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 0.26551055908203125, "logits/rejected": 0.18796709179878235, "logps/chosen": -65.25658416748047, "logps/ref_chosen": -57.568267822265625, "logps/ref_rejected": -87.74789428710938, "logps/rejected": -100.04360961914062, "loss": 1.0041, "margin_dpo/margin_mean": 4.6073994636535645, "margin_dpo/margin_std": 6.91198205947876, "step": 654 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.17491678893566132, "fcm_dpo/delta": -0.10167790949344635, "fcm_dpo/margin": 5.344045639038086, "fcm_dpo/q_t": 0.3169988989830017, "grad_norm": 36.369998931884766, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.2785993218421936, "logits/rejected": 0.1956745982170105, "logps/chosen": -58.907257080078125, "logps/ref_chosen": -52.14714813232422, "logps/ref_rejected": -80.85014343261719, "logps/rejected": -92.95428466796875, "loss": 0.8513, "margin_dpo/margin_mean": 5.344045639038086, "margin_dpo/margin_std": 5.551706314086914, "step": 655 }, { "epoch": 0.9916855631141346, "fcm_dpo/beta": 0.17215146124362946, "fcm_dpo/delta": -0.15092015266418457, "fcm_dpo/margin": 5.719700813293457, "fcm_dpo/q_t": 0.3209837079048157, "grad_norm": 42.81301498413086, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 0.256664901971817, "logits/rejected": 0.20875748991966248, "logps/chosen": -81.50572204589844, "logps/ref_chosen": -73.25672912597656, "logps/ref_rejected": -85.35127258300781, "logps/rejected": -99.3199691772461, "loss": 0.889, "margin_dpo/margin_mean": 5.719700336456299, "margin_dpo/margin_std": 6.941084861755371, "step": 656 }, { "epoch": 0.9931972789115646, "fcm_dpo/beta": 0.16662049293518066, "fcm_dpo/delta": -0.16033346951007843, "fcm_dpo/margin": 5.956956386566162, "fcm_dpo/q_t": 0.3180977702140808, "grad_norm": 39.3922004699707, "learning_rate": 8.740807750345913e-11, "logits/chosen": 0.38615646958351135, "logits/rejected": 0.30962836742401123, "logps/chosen": -57.52777862548828, "logps/ref_chosen": -49.72339630126953, "logps/ref_rejected": -75.1568603515625, "logps/rejected": -88.91819763183594, "loss": 0.9139, "margin_dpo/margin_mean": 5.956956386566162, "margin_dpo/margin_std": 7.429527282714844, "step": 657 }, { "epoch": 0.9947089947089947, "fcm_dpo/beta": 0.17064854502677917, "fcm_dpo/delta": 0.09364507347345352, "fcm_dpo/margin": 4.452823638916016, "fcm_dpo/q_t": 0.36766210198402405, "grad_norm": 46.99626922607422, "learning_rate": 5.594234322453539e-11, "logits/chosen": 0.3046637773513794, "logits/rejected": 0.2689476013183594, "logps/chosen": -71.40650939941406, "logps/ref_chosen": -63.04634094238281, "logps/ref_rejected": -83.44963073730469, "logps/rejected": -96.26261901855469, "loss": 1.124, "margin_dpo/margin_mean": 4.452824115753174, "margin_dpo/margin_std": 7.831037521362305, "step": 658 }, { "epoch": 0.9962207105064248, "fcm_dpo/beta": 0.17275840044021606, "fcm_dpo/delta": 0.1908574402332306, "fcm_dpo/margin": 3.889401435852051, "fcm_dpo/q_t": 0.3744069039821625, "grad_norm": 44.81747817993164, "learning_rate": 3.146808153123293e-11, "logits/chosen": 0.3521784543991089, "logits/rejected": 0.29007095098495483, "logps/chosen": -63.78364562988281, "logps/ref_chosen": -55.0802001953125, "logps/ref_rejected": -71.91049194335938, "logps/rejected": -84.50334167480469, "loss": 1.1285, "margin_dpo/margin_mean": 3.8894009590148926, "margin_dpo/margin_std": 7.039710998535156, "step": 659 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.17371876537799835, "fcm_dpo/delta": -0.0941925197839737, "fcm_dpo/margin": 5.3846049308776855, "fcm_dpo/q_t": 0.32256758213043213, "grad_norm": 42.97410202026367, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.3720157742500305, "logits/rejected": 0.3104935884475708, "logps/chosen": -62.22294616699219, "logps/ref_chosen": -54.525917053222656, "logps/ref_rejected": -81.23604583740234, "logps/rejected": -94.31768798828125, "loss": 0.8823, "margin_dpo/margin_mean": 5.384605407714844, "margin_dpo/margin_std": 6.330369472503662, "step": 660 }, { "epoch": 0.999244142101285, "fcm_dpo/beta": 0.17523688077926636, "fcm_dpo/delta": 0.0759362056851387, "fcm_dpo/margin": 4.449112892150879, "fcm_dpo/q_t": 0.3557879328727722, "grad_norm": 45.270999908447266, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 0.24594025313854218, "logits/rejected": 0.17398732900619507, "logps/chosen": -69.74138641357422, "logps/ref_chosen": -60.37263870239258, "logps/ref_rejected": -77.42874145507812, "logps/rejected": -91.24661254882812, "loss": 1.0872, "margin_dpo/margin_mean": 4.449113845825195, "margin_dpo/margin_std": 7.379522323608398, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.0861937751748378, "train_runtime": 1754.5635, "train_samples_per_second": 24.129, "train_steps_per_second": 0.377 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }