{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 200, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10407507419586182, "fcm_dpo/delta": 0.19971171021461487, "fcm_dpo/margin": -0.0013532638549804688, "fcm_dpo/q_t": 0.5000380277633667, "grad_norm": 29.367589950561523, "learning_rate": 0.0, "logits/chosen": 0.13337239623069763, "logits/rejected": 0.12492949515581131, "logps/chosen": -64.5841293334961, "logps/ref_chosen": -64.61280822753906, "logps/ref_rejected": -64.17195129394531, "logps/rejected": -64.14192199707031, "loss": 1.3866, "margin_dpo/margin_mean": -0.0013527870178222656, "margin_dpo/margin_std": 0.2561596930027008, "step": 1 }, { "epoch": 0.0030234315948601664, "fcm_dpo/beta": 0.10614679753780365, "fcm_dpo/delta": 0.19520045816898346, "fcm_dpo/margin": 0.037450045347213745, "fcm_dpo/q_t": 0.49902579188346863, "grad_norm": 29.559593200683594, "learning_rate": 7.462686567164179e-09, "logits/chosen": 0.09414851665496826, "logits/rejected": 0.07363267242908478, "logps/chosen": -56.101890563964844, "logps/ref_chosen": -56.0989990234375, "logps/ref_rejected": -66.59971618652344, "logps/rejected": -66.64006042480469, "loss": 1.3824, "margin_dpo/margin_mean": 0.03744968771934509, "margin_dpo/margin_std": 0.27811938524246216, "step": 2 }, { "epoch": 0.0045351473922902496, "fcm_dpo/beta": 0.11039507389068604, "fcm_dpo/delta": 0.19718493521213531, "fcm_dpo/margin": -0.027670353651046753, "fcm_dpo/q_t": 0.5007483959197998, "grad_norm": 34.550472259521484, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 0.09949980676174164, "logits/rejected": 0.0614531971514225, "logps/chosen": -65.45438385009766, "logps/ref_chosen": -65.45726013183594, "logps/ref_rejected": -90.82853698730469, "logps/rejected": -90.7979736328125, "loss": 1.3895, "margin_dpo/margin_mean": -0.027670294046401978, "margin_dpo/margin_std": 0.3105807602405548, "step": 3 }, { "epoch": 0.006046863189720333, "fcm_dpo/beta": 0.11483684927225113, "fcm_dpo/delta": 0.1972825527191162, "fcm_dpo/margin": 0.023561865091323853, "fcm_dpo/q_t": 0.4993370473384857, "grad_norm": 39.347110748291016, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 0.11346002668142319, "logits/rejected": 0.09720361232757568, "logps/chosen": -76.83723449707031, "logps/ref_chosen": -76.86018371582031, "logps/ref_rejected": -79.91523742675781, "logps/rejected": -79.91584777832031, "loss": 1.3838, "margin_dpo/margin_mean": 0.023561745882034302, "margin_dpo/margin_std": 0.2997610569000244, "step": 4 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.11710208654403687, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03837460279464722, "fcm_dpo/q_t": 0.5011225938796997, "grad_norm": 34.73713302612305, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.08234861493110657, "logits/rejected": 0.04350630193948746, "logps/chosen": -63.00522232055664, "logps/ref_chosen": -62.97134017944336, "logps/ref_rejected": -79.9192123413086, "logps/rejected": -79.91471862792969, "loss": 1.3911, "margin_dpo/margin_mean": -0.03837430477142334, "margin_dpo/margin_std": 0.31006568670272827, "step": 5 }, { "epoch": 0.009070294784580499, "fcm_dpo/beta": 0.11710208654403687, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0550386905670166, "fcm_dpo/q_t": 0.5016094446182251, "grad_norm": 34.79764938354492, "learning_rate": 3.731343283582089e-08, "logits/chosen": 0.1415054202079773, "logits/rejected": 0.10215698182582855, "logps/chosen": -51.34320831298828, "logps/ref_chosen": -51.30736541748047, "logps/ref_rejected": -82.77239227294922, "logps/rejected": -82.7531967163086, "loss": 1.3933, "margin_dpo/margin_mean": -0.055038899183273315, "margin_dpo/margin_std": 0.40159815549850464, "step": 6 }, { "epoch": 0.010582010582010581, "fcm_dpo/beta": 0.12184364348649979, "fcm_dpo/delta": 0.19846273958683014, "fcm_dpo/margin": -0.017774909734725952, "fcm_dpo/q_t": 0.5005569458007812, "grad_norm": 33.15961456298828, "learning_rate": 4.477611940298507e-08, "logits/chosen": 0.0027350555174052715, "logits/rejected": -0.04020844027400017, "logps/chosen": -51.44348907470703, "logps/ref_chosen": -51.45941162109375, "logps/ref_rejected": -66.3828125, "logps/rejected": -66.34911346435547, "loss": 1.3886, "margin_dpo/margin_mean": -0.017774999141693115, "margin_dpo/margin_std": 0.21953274309635162, "step": 7 }, { "epoch": 0.012093726379440665, "fcm_dpo/beta": 0.12922216951847076, "fcm_dpo/delta": 0.3909910321235657, "fcm_dpo/margin": 0.07266899943351746, "fcm_dpo/q_t": 0.49774932861328125, "grad_norm": 36.538394927978516, "learning_rate": 5.223880597014925e-08, "logits/chosen": 0.07437925040721893, "logits/rejected": 0.05213908106088638, "logps/chosen": -62.17906951904297, "logps/ref_chosen": -62.197547912597656, "logps/ref_rejected": -74.66180419921875, "logps/rejected": -74.71600341796875, "loss": 1.3774, "margin_dpo/margin_mean": 0.07266855239868164, "margin_dpo/margin_std": 0.328883558511734, "step": 8 }, { "epoch": 0.013605442176870748, "fcm_dpo/beta": 0.13690130412578583, "fcm_dpo/delta": 0.19161710143089294, "fcm_dpo/margin": 0.046944648027420044, "fcm_dpo/q_t": 0.49847450852394104, "grad_norm": 43.08509826660156, "learning_rate": 5.970149253731343e-08, "logits/chosen": 0.1650906503200531, "logits/rejected": 0.10613168776035309, "logps/chosen": -55.64226150512695, "logps/ref_chosen": -55.629722595214844, "logps/ref_rejected": -86.21221923828125, "logps/rejected": -86.2717056274414, "loss": 1.3804, "margin_dpo/margin_mean": 0.04694512486457825, "margin_dpo/margin_std": 0.31391388177871704, "step": 9 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.14234879612922668, "fcm_dpo/delta": 0.1951005458831787, "fcm_dpo/margin": 0.015252351760864258, "fcm_dpo/q_t": 0.4995075762271881, "grad_norm": 42.305152893066406, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.14489957690238953, "logits/rejected": 0.11318053305149078, "logps/chosen": -62.666648864746094, "logps/ref_chosen": -62.69060134887695, "logps/ref_rejected": -90.610107421875, "logps/rejected": -90.6014175415039, "loss": 1.3848, "margin_dpo/margin_mean": 0.01525232195854187, "margin_dpo/margin_std": 0.36224645376205444, "step": 10 }, { "epoch": 0.016628873771730914, "fcm_dpo/beta": 0.1479165107011795, "fcm_dpo/delta": 0.1918383240699768, "fcm_dpo/margin": 0.036658138036727905, "fcm_dpo/q_t": 0.49872156977653503, "grad_norm": 43.193355560302734, "learning_rate": 7.462686567164178e-08, "logits/chosen": 0.11544118076562881, "logits/rejected": 0.1084853783249855, "logps/chosen": -65.735595703125, "logps/ref_chosen": -65.76712036132812, "logps/ref_rejected": -72.4764633178711, "logps/rejected": -72.4815902709961, "loss": 1.3816, "margin_dpo/margin_mean": 0.03665819764137268, "margin_dpo/margin_std": 0.338836669921875, "step": 11 }, { "epoch": 0.018140589569160998, "fcm_dpo/beta": 0.15691301226615906, "fcm_dpo/delta": 0.39368370175361633, "fcm_dpo/margin": 0.04242005944252014, "fcm_dpo/q_t": 0.4984257221221924, "grad_norm": 43.97606658935547, "learning_rate": 8.208955223880596e-08, "logits/chosen": 0.03171448037028313, "logits/rejected": 0.015399420633912086, "logps/chosen": -60.683692932128906, "logps/ref_chosen": -60.704891204833984, "logps/ref_rejected": -69.41564178466797, "logps/rejected": -69.43685913085938, "loss": 1.3805, "margin_dpo/margin_mean": 0.04242032766342163, "margin_dpo/margin_std": 0.3394607901573181, "step": 12 }, { "epoch": 0.019652305366591082, "fcm_dpo/beta": 0.1632937341928482, "fcm_dpo/delta": 0.1996660977602005, "fcm_dpo/margin": -0.03977265954017639, "fcm_dpo/q_t": 0.5015895366668701, "grad_norm": 47.461734771728516, "learning_rate": 8.955223880597014e-08, "logits/chosen": 0.10258764028549194, "logits/rejected": 0.040754396468400955, "logps/chosen": -49.935394287109375, "logps/ref_chosen": -49.90925598144531, "logps/ref_rejected": -92.37818145751953, "logps/rejected": -92.36454772949219, "loss": 1.3933, "margin_dpo/margin_mean": -0.03977331519126892, "margin_dpo/margin_std": 0.29797568917274475, "step": 13 }, { "epoch": 0.021164021164021163, "fcm_dpo/beta": 0.16982074081897736, "fcm_dpo/delta": 0.19240428507328033, "fcm_dpo/margin": 0.03764998912811279, "fcm_dpo/q_t": 0.49843764305114746, "grad_norm": 49.910789489746094, "learning_rate": 9.701492537313432e-08, "logits/chosen": 0.05405683070421219, "logits/rejected": 0.03726121038198471, "logps/chosen": -60.57716369628906, "logps/ref_chosen": -60.61879348754883, "logps/ref_rejected": -71.79306030273438, "logps/rejected": -71.78907775878906, "loss": 1.3804, "margin_dpo/margin_mean": 0.03765037655830383, "margin_dpo/margin_std": 0.2961253225803375, "step": 14 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.17645195126533508, "fcm_dpo/delta": 0.1906808763742447, "fcm_dpo/margin": -0.017833799123764038, "fcm_dpo/q_t": 0.5007701516151428, "grad_norm": 58.39421463012695, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.09454727172851562, "logits/rejected": 0.04970131069421768, "logps/chosen": -63.49525451660156, "logps/ref_chosen": -63.46953582763672, "logps/ref_rejected": -88.88951110839844, "logps/rejected": -88.89739990234375, "loss": 1.3901, "margin_dpo/margin_mean": -0.01783338189125061, "margin_dpo/margin_std": 0.3462127149105072, "step": 15 }, { "epoch": 0.02418745275888133, "fcm_dpo/beta": 0.1834275722503662, "fcm_dpo/delta": 0.1969115436077118, "fcm_dpo/margin": 0.003320828080177307, "fcm_dpo/q_t": 0.49985557794570923, "grad_norm": 49.16959762573242, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 0.11556181311607361, "logits/rejected": 0.07850321382284164, "logps/chosen": -46.563758850097656, "logps/ref_chosen": -46.53229904174805, "logps/ref_rejected": -74.27533721923828, "logps/rejected": -74.31011962890625, "loss": 1.3865, "margin_dpo/margin_mean": 0.003320828080177307, "margin_dpo/margin_std": 0.333503782749176, "step": 16 }, { "epoch": 0.025699168556311415, "fcm_dpo/beta": 0.19445914030075073, "fcm_dpo/delta": 0.1945243775844574, "fcm_dpo/margin": 0.008714765310287476, "fcm_dpo/q_t": 0.49963706731796265, "grad_norm": 64.02886199951172, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 0.05395728349685669, "logits/rejected": 0.035220514982938766, "logps/chosen": -64.09066772460938, "logps/ref_chosen": -64.07783508300781, "logps/ref_rejected": -86.40876770019531, "logps/rejected": -86.4303207397461, "loss": 1.3858, "margin_dpo/margin_mean": 0.00871431827545166, "margin_dpo/margin_std": 0.33132392168045044, "step": 17 }, { "epoch": 0.027210884353741496, "fcm_dpo/beta": 0.1984160840511322, "fcm_dpo/delta": 0.19945251941680908, "fcm_dpo/margin": -0.00810861587524414, "fcm_dpo/q_t": 0.500395655632019, "grad_norm": 55.707515716552734, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 0.10771282017230988, "logits/rejected": 0.06092551350593567, "logps/chosen": -44.87413024902344, "logps/ref_chosen": -44.87433624267578, "logps/ref_rejected": -70.97604370117188, "logps/rejected": -70.96773529052734, "loss": 1.3886, "margin_dpo/margin_mean": -0.008108556270599365, "margin_dpo/margin_std": 0.27668923139572144, "step": 18 }, { "epoch": 0.02872260015117158, "fcm_dpo/beta": 0.21469247341156006, "fcm_dpo/delta": 0.3905554413795471, "fcm_dpo/margin": 0.04513262212276459, "fcm_dpo/q_t": 0.49763813614845276, "grad_norm": 66.41383361816406, "learning_rate": 1.343283582089552e-07, "logits/chosen": 0.0882333517074585, "logits/rejected": 0.07449622452259064, "logps/chosen": -68.14503479003906, "logps/ref_chosen": -68.1598129272461, "logps/ref_rejected": -81.17138671875, "logps/rejected": -81.20173645019531, "loss": 1.3777, "margin_dpo/margin_mean": 0.04513297975063324, "margin_dpo/margin_std": 0.31693926453590393, "step": 19 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.22325360774993896, "fcm_dpo/delta": 0.19887003302574158, "fcm_dpo/margin": -0.009545460343360901, "fcm_dpo/q_t": 0.5005234479904175, "grad_norm": 65.36317443847656, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.14998410642147064, "logits/rejected": 0.12636204063892365, "logps/chosen": -53.66815185546875, "logps/ref_chosen": -53.67856216430664, "logps/ref_rejected": -74.16911315917969, "logps/rejected": -74.14915466308594, "loss": 1.3893, "margin_dpo/margin_mean": -0.009545668959617615, "margin_dpo/margin_std": 0.2792781591415405, "step": 20 }, { "epoch": 0.031746031746031744, "fcm_dpo/beta": 0.23690250515937805, "fcm_dpo/delta": 0.19825556874275208, "fcm_dpo/margin": 0.004699960350990295, "fcm_dpo/q_t": 0.4997465908527374, "grad_norm": 69.45832061767578, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 0.11373256146907806, "logits/rejected": 0.0878261923789978, "logps/chosen": -64.69596862792969, "logps/ref_chosen": -64.70155334472656, "logps/ref_rejected": -81.02095031738281, "logps/rejected": -81.02006530761719, "loss": 1.3871, "margin_dpo/margin_mean": 0.004698842763900757, "margin_dpo/margin_std": 0.36591023206710815, "step": 21 }, { "epoch": 0.03325774754346183, "fcm_dpo/beta": 0.25136297941207886, "fcm_dpo/delta": 0.39239007234573364, "fcm_dpo/margin": 0.031166553497314453, "fcm_dpo/q_t": 0.4980974495410919, "grad_norm": 72.23171997070312, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 0.0061013903468847275, "logits/rejected": -0.014899881556630135, "logps/chosen": -58.05042266845703, "logps/ref_chosen": -58.03599166870117, "logps/ref_rejected": -80.72721862792969, "logps/rejected": -80.77281188964844, "loss": 1.3797, "margin_dpo/margin_mean": 0.031166434288024902, "margin_dpo/margin_std": 0.28403687477111816, "step": 22 }, { "epoch": 0.03476946334089191, "fcm_dpo/beta": 0.2609216570854187, "fcm_dpo/delta": 0.1793270856142044, "fcm_dpo/margin": 0.06387266516685486, "fcm_dpo/q_t": 0.49592792987823486, "grad_norm": 86.23075866699219, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 0.13783769309520721, "logits/rejected": 0.11253305524587631, "logps/chosen": -66.32743835449219, "logps/ref_chosen": -66.35608673095703, "logps/ref_rejected": -93.02769470214844, "logps/rejected": -93.06291198730469, "loss": 1.3715, "margin_dpo/margin_mean": 0.06387221813201904, "margin_dpo/margin_std": 0.3480584919452667, "step": 23 }, { "epoch": 0.036281179138321996, "fcm_dpo/beta": 0.26560020446777344, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0906066745519638, "fcm_dpo/q_t": 0.5060100555419922, "grad_norm": 70.8504638671875, "learning_rate": 1.716417910447761e-07, "logits/chosen": 0.13605038821697235, "logits/rejected": 0.10310132801532745, "logps/chosen": -54.53034210205078, "logps/ref_chosen": -54.461238861083984, "logps/ref_rejected": -68.33817291259766, "logps/rejected": -68.3166732788086, "loss": 1.4118, "margin_dpo/margin_mean": -0.09060648083686829, "margin_dpo/margin_std": 0.2676948308944702, "step": 24 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.2819034457206726, "fcm_dpo/delta": 0.396295428276062, "fcm_dpo/margin": 0.013779282569885254, "fcm_dpo/q_t": 0.499076247215271, "grad_norm": 83.40914154052734, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.10568390786647797, "logits/rejected": 0.05422993749380112, "logps/chosen": -60.02375793457031, "logps/ref_chosen": -60.00420379638672, "logps/ref_rejected": -90.47376251220703, "logps/rejected": -90.50709533691406, "loss": 1.3841, "margin_dpo/margin_mean": 0.013779401779174805, "margin_dpo/margin_std": 0.2889242172241211, "step": 25 }, { "epoch": 0.039304610733182165, "fcm_dpo/beta": 0.3045765161514282, "fcm_dpo/delta": 0.38625574111938477, "fcm_dpo/margin": 0.04751601815223694, "fcm_dpo/q_t": 0.49659329652786255, "grad_norm": 88.86567687988281, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 0.12892276048660278, "logits/rejected": 0.10985440760850906, "logps/chosen": -56.801429748535156, "logps/ref_chosen": -56.81915283203125, "logps/ref_rejected": -77.84333038330078, "logps/rejected": -77.87312316894531, "loss": 1.3747, "margin_dpo/margin_mean": 0.047515541315078735, "margin_dpo/margin_std": 0.3302631378173828, "step": 26 }, { "epoch": 0.04081632653061224, "fcm_dpo/beta": 0.3105989396572113, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.012165576219558716, "fcm_dpo/q_t": 0.5009288787841797, "grad_norm": 90.4833984375, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 0.1274949312210083, "logits/rejected": 0.10163411498069763, "logps/chosen": -62.892974853515625, "logps/ref_chosen": -62.87702560424805, "logps/ref_rejected": -71.34437561035156, "logps/rejected": -71.3481674194336, "loss": 1.3925, "margin_dpo/margin_mean": -0.012165874242782593, "margin_dpo/margin_std": 0.31988954544067383, "step": 27 }, { "epoch": 0.042328042328042326, "fcm_dpo/beta": 0.3105989396572113, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.051617324352264404, "fcm_dpo/q_t": 0.5040013194084167, "grad_norm": 86.85352325439453, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 0.021893545985221863, "logits/rejected": 0.01391864474862814, "logps/chosen": -59.873966217041016, "logps/ref_chosen": -59.8333740234375, "logps/ref_rejected": -70.39804077148438, "logps/rejected": -70.38700866699219, "loss": 1.4041, "margin_dpo/margin_mean": -0.051616936922073364, "margin_dpo/margin_std": 0.2616669535636902, "step": 28 }, { "epoch": 0.04383975812547241, "fcm_dpo/beta": 0.3168638348579407, "fcm_dpo/delta": 0.1977420449256897, "fcm_dpo/margin": 0.002045929431915283, "fcm_dpo/q_t": 0.49984467029571533, "grad_norm": 103.7999038696289, "learning_rate": 2.08955223880597e-07, "logits/chosen": 0.16094645857810974, "logits/rejected": 0.142642542719841, "logps/chosen": -74.13179016113281, "logps/ref_chosen": -74.12020111083984, "logps/ref_rejected": -83.33099365234375, "logps/rejected": -83.34461975097656, "loss": 1.3876, "margin_dpo/margin_mean": 0.002046048641204834, "margin_dpo/margin_std": 0.28636813163757324, "step": 29 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.33617156744003296, "fcm_dpo/delta": 0.19785372912883759, "fcm_dpo/margin": -0.010556548833847046, "fcm_dpo/q_t": 0.5009069442749023, "grad_norm": 101.58157348632812, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.13398107886314392, "logits/rejected": 0.07874200493097305, "logps/chosen": -50.80282211303711, "logps/ref_chosen": -50.75128936767578, "logps/ref_rejected": -89.29063415527344, "logps/rejected": -89.33160400390625, "loss": 1.393, "margin_dpo/margin_mean": -0.010556995868682861, "margin_dpo/margin_std": 0.331157386302948, "step": 30 }, { "epoch": 0.04686318972033258, "fcm_dpo/beta": 0.34257882833480835, "fcm_dpo/delta": 0.18705223500728607, "fcm_dpo/margin": -0.021575331687927246, "fcm_dpo/q_t": 0.5018072128295898, "grad_norm": 119.50495147705078, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 0.0952620878815651, "logits/rejected": 0.0497773140668869, "logps/chosen": -65.39213562011719, "logps/ref_chosen": -65.33675384521484, "logps/ref_rejected": -100.76666259765625, "logps/rejected": -100.80046081542969, "loss": 1.3965, "margin_dpo/margin_mean": -0.021574944257736206, "margin_dpo/margin_std": 0.3273775577545166, "step": 31 }, { "epoch": 0.04837490551776266, "fcm_dpo/beta": 0.3551083207130432, "fcm_dpo/delta": 0.1724216490983963, "fcm_dpo/margin": 0.03195449709892273, "fcm_dpo/q_t": 0.49723586440086365, "grad_norm": 106.27752685546875, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 0.0901811346411705, "logits/rejected": 0.08230896294116974, "logps/chosen": -67.19805145263672, "logps/ref_chosen": -67.18333435058594, "logps/ref_rejected": -82.80763244628906, "logps/rejected": -82.85430908203125, "loss": 1.3781, "margin_dpo/margin_mean": 0.031954437494277954, "margin_dpo/margin_std": 0.3250340223312378, "step": 32 }, { "epoch": 0.049886621315192746, "fcm_dpo/beta": 0.36843010783195496, "fcm_dpo/delta": 0.19543671607971191, "fcm_dpo/margin": 0.005410343408584595, "fcm_dpo/q_t": 0.4994819760322571, "grad_norm": 116.07134246826172, "learning_rate": 2.388059701492537e-07, "logits/chosen": 0.041345153003931046, "logits/rejected": 0.014864582568407059, "logps/chosen": -64.06546783447266, "logps/ref_chosen": -64.03948211669922, "logps/ref_rejected": -75.68357849121094, "logps/rejected": -75.71498107910156, "loss": 1.3883, "margin_dpo/margin_mean": 0.005411058664321899, "margin_dpo/margin_std": 0.3478584289550781, "step": 33 }, { "epoch": 0.05139833711262283, "fcm_dpo/beta": 0.3973255157470703, "fcm_dpo/delta": 0.37884321808815, "fcm_dpo/margin": 0.05614650249481201, "fcm_dpo/q_t": 0.49473071098327637, "grad_norm": 112.28120422363281, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 0.09276697039604187, "logits/rejected": 0.06323037296533585, "logps/chosen": -53.700801849365234, "logps/ref_chosen": -53.6642951965332, "logps/ref_rejected": -65.77989959716797, "logps/rejected": -65.87255096435547, "loss": 1.3682, "margin_dpo/margin_mean": 0.05614641308784485, "margin_dpo/margin_std": 0.3014362156391144, "step": 34 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.40519657731056213, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.009445160627365112, "fcm_dpo/q_t": 0.5009890794754028, "grad_norm": 113.60429382324219, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.06334627419710159, "logits/rejected": 0.040607914328575134, "logps/chosen": -61.111026763916016, "logps/ref_chosen": -61.01686096191406, "logps/ref_rejected": -72.78598022460938, "logps/rejected": -72.8707046508789, "loss": 1.395, "margin_dpo/margin_mean": -0.009445279836654663, "margin_dpo/margin_std": 0.3441219925880432, "step": 35 }, { "epoch": 0.05442176870748299, "fcm_dpo/beta": 0.42913883924484253, "fcm_dpo/delta": 0.3846970200538635, "fcm_dpo/margin": 0.03751775622367859, "fcm_dpo/q_t": 0.4962383508682251, "grad_norm": 122.54588317871094, "learning_rate": 2.611940298507462e-07, "logits/chosen": 0.10409566760063171, "logits/rejected": 0.051027558743953705, "logps/chosen": -50.6123046875, "logps/ref_chosen": -50.53736114501953, "logps/ref_rejected": -78.11678314208984, "logps/rejected": -78.229248046875, "loss": 1.3762, "margin_dpo/margin_mean": 0.03751787543296814, "margin_dpo/margin_std": 0.35155099630355835, "step": 36 }, { "epoch": 0.055933484504913075, "fcm_dpo/beta": 0.4613209366798401, "fcm_dpo/delta": 0.34767961502075195, "fcm_dpo/margin": 0.11707112193107605, "fcm_dpo/q_t": 0.48699861764907837, "grad_norm": 174.01670837402344, "learning_rate": 2.686567164179104e-07, "logits/chosen": 0.09628035128116608, "logits/rejected": 0.01733151637017727, "logps/chosen": -59.590187072753906, "logps/ref_chosen": -59.55394744873047, "logps/ref_rejected": -108.27702331542969, "logps/rejected": -108.43034362792969, "loss": 1.3434, "margin_dpo/margin_mean": 0.11707085371017456, "margin_dpo/margin_std": 0.4468412399291992, "step": 37 }, { "epoch": 0.05744520030234316, "fcm_dpo/beta": 0.4848484396934509, "fcm_dpo/delta": 0.1649438589811325, "fcm_dpo/margin": 0.03932034969329834, "fcm_dpo/q_t": 0.49558743834495544, "grad_norm": 144.6350860595703, "learning_rate": 2.761194029850746e-07, "logits/chosen": 0.04511827975511551, "logits/rejected": 0.03162279352545738, "logps/chosen": -65.85999298095703, "logps/ref_chosen": -65.78836059570312, "logps/ref_rejected": -76.1619873046875, "logps/rejected": -76.27294921875, "loss": 1.3769, "margin_dpo/margin_mean": 0.03931984305381775, "margin_dpo/margin_std": 0.3753390312194824, "step": 38 }, { "epoch": 0.05895691609977324, "fcm_dpo/beta": 0.5116080045700073, "fcm_dpo/delta": 0.35159415006637573, "fcm_dpo/margin": 0.09740224480628967, "fcm_dpo/q_t": 0.48791423439979553, "grad_norm": 146.56613159179688, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 0.1656300127506256, "logits/rejected": 0.13862335681915283, "logps/chosen": -57.26053237915039, "logps/ref_chosen": -57.17681121826172, "logps/ref_rejected": -79.486328125, "logps/rejected": -79.66746520996094, "loss": 1.3449, "margin_dpo/margin_mean": 0.09740233421325684, "margin_dpo/margin_std": 0.3506489396095276, "step": 39 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 0.5398132801055908, "fcm_dpo/delta": 0.18534015119075775, "fcm_dpo/margin": -0.042171984910964966, "fcm_dpo/q_t": 0.5057384371757507, "grad_norm": 176.7834014892578, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.09759977459907532, "logits/rejected": 0.04874386638402939, "logps/chosen": -61.46282958984375, "logps/ref_chosen": -61.33416748046875, "logps/ref_rejected": -79.10697174072266, "logps/rejected": -79.19346618652344, "loss": 1.4197, "margin_dpo/margin_mean": -0.04217180609703064, "margin_dpo/margin_std": 0.36746376752853394, "step": 40 }, { "epoch": 0.06198034769463341, "fcm_dpo/beta": 0.5703096389770508, "fcm_dpo/delta": 0.36740079522132874, "fcm_dpo/margin": 0.05953556299209595, "fcm_dpo/q_t": 0.4920777380466461, "grad_norm": 169.90399169921875, "learning_rate": 2.985074626865671e-07, "logits/chosen": 0.03998805582523346, "logits/rejected": 0.020248761400580406, "logps/chosen": -67.65834045410156, "logps/ref_chosen": -67.5467300415039, "logps/ref_rejected": -83.87788391113281, "logps/rejected": -84.04903411865234, "loss": 1.3651, "margin_dpo/margin_mean": 0.05953595042228699, "margin_dpo/margin_std": 0.3872567415237427, "step": 41 }, { "epoch": 0.06349206349206349, "fcm_dpo/beta": 0.612642765045166, "fcm_dpo/delta": 0.36006850004196167, "fcm_dpo/margin": 0.06822788715362549, "fcm_dpo/q_t": 0.49012911319732666, "grad_norm": 176.0435028076172, "learning_rate": 3.059701492537313e-07, "logits/chosen": 0.05589213967323303, "logits/rejected": 0.034156039357185364, "logps/chosen": -61.35820770263672, "logps/ref_chosen": -61.26485824584961, "logps/ref_rejected": -76.3629150390625, "logps/rejected": -76.52449035644531, "loss": 1.3608, "margin_dpo/margin_mean": 0.06822726130485535, "margin_dpo/margin_std": 0.40432050824165344, "step": 42 }, { "epoch": 0.06500377928949358, "fcm_dpo/beta": 0.6587069034576416, "fcm_dpo/delta": 0.3486868739128113, "fcm_dpo/margin": 0.08003175258636475, "fcm_dpo/q_t": 0.4871513843536377, "grad_norm": 227.68112182617188, "learning_rate": 3.134328358208955e-07, "logits/chosen": 0.08429280668497086, "logits/rejected": 0.07338319718837738, "logps/chosen": -71.92668151855469, "logps/ref_chosen": -71.80902862548828, "logps/ref_rejected": -81.12464141845703, "logps/rejected": -81.32231140136719, "loss": 1.3553, "margin_dpo/margin_mean": 0.08003199100494385, "margin_dpo/margin_std": 0.44389355182647705, "step": 43 }, { "epoch": 0.06651549508692366, "fcm_dpo/beta": 0.6816689968109131, "fcm_dpo/delta": 0.17928901314735413, "fcm_dpo/margin": 0.0221157968044281, "fcm_dpo/q_t": 0.4966672658920288, "grad_norm": 220.933349609375, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 0.040969911962747574, "logits/rejected": 0.010756943374872208, "logps/chosen": -66.72364807128906, "logps/ref_chosen": -66.55043029785156, "logps/ref_rejected": -85.06198120117188, "logps/rejected": -85.25730895996094, "loss": 1.3932, "margin_dpo/margin_mean": 0.022116124629974365, "margin_dpo/margin_std": 0.4371548295021057, "step": 44 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 0.7307697534561157, "fcm_dpo/delta": 0.3396417498588562, "fcm_dpo/margin": 0.08511926233768463, "fcm_dpo/q_t": 0.48502203822135925, "grad_norm": 232.3180694580078, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.12735822796821594, "logits/rejected": 0.07406317442655563, "logps/chosen": -62.372169494628906, "logps/ref_chosen": -62.24385452270508, "logps/ref_rejected": -92.96665954589844, "logps/rejected": -93.18009948730469, "loss": 1.3537, "margin_dpo/margin_mean": 0.08512008190155029, "margin_dpo/margin_std": 0.4685259461402893, "step": 45 }, { "epoch": 0.06953892668178382, "fcm_dpo/beta": 0.7730103731155396, "fcm_dpo/delta": 0.2614397406578064, "fcm_dpo/margin": 0.18374797701835632, "fcm_dpo/q_t": 0.46673786640167236, "grad_norm": 223.03953552246094, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 0.09179161489009857, "logits/rejected": 0.04750993847846985, "logps/chosen": -61.60199737548828, "logps/ref_chosen": -61.498905181884766, "logps/ref_rejected": -78.91172790527344, "logps/rejected": -79.19857025146484, "loss": 1.2771, "margin_dpo/margin_mean": 0.18374782800674438, "margin_dpo/margin_std": 0.4380492866039276, "step": 46 }, { "epoch": 0.0710506424792139, "fcm_dpo/beta": 0.8166114687919617, "fcm_dpo/delta": 0.27844953536987305, "fcm_dpo/margin": 0.15279075503349304, "fcm_dpo/q_t": 0.4699801206588745, "grad_norm": 221.4151611328125, "learning_rate": 3.432835820895522e-07, "logits/chosen": 0.029810963198542595, "logits/rejected": -0.012774014845490456, "logps/chosen": -51.69743347167969, "logps/ref_chosen": -51.578346252441406, "logps/ref_rejected": -68.2215576171875, "logps/rejected": -68.49343872070312, "loss": 1.2897, "margin_dpo/margin_mean": 0.15279105305671692, "margin_dpo/margin_std": 0.3854082226753235, "step": 47 }, { "epoch": 0.07256235827664399, "fcm_dpo/beta": 0.8717821836471558, "fcm_dpo/delta": 0.34792637825012207, "fcm_dpo/margin": 0.06187397241592407, "fcm_dpo/q_t": 0.4878446161746979, "grad_norm": 236.5067138671875, "learning_rate": 3.507462686567164e-07, "logits/chosen": 0.16963548958301544, "logits/rejected": 0.13918644189834595, "logps/chosen": -51.98871994018555, "logps/ref_chosen": -51.79365158081055, "logps/ref_rejected": -64.22503662109375, "logps/rejected": -64.48198699951172, "loss": 1.3667, "margin_dpo/margin_mean": 0.061874061822891235, "margin_dpo/margin_std": 0.4206083416938782, "step": 48 }, { "epoch": 0.07407407407407407, "fcm_dpo/beta": 0.9262492060661316, "fcm_dpo/delta": 0.288057416677475, "fcm_dpo/margin": 0.12440468370914459, "fcm_dpo/q_t": 0.4729665517807007, "grad_norm": 237.94810485839844, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 0.02041550911962986, "logits/rejected": -0.0007341206073760986, "logps/chosen": -58.339622497558594, "logps/ref_chosen": -58.13460159301758, "logps/ref_rejected": -64.63206481933594, "logps/rejected": -64.96148681640625, "loss": 1.3232, "margin_dpo/margin_mean": 0.12440502643585205, "margin_dpo/margin_std": 0.4858497977256775, "step": 49 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 0.9749960899353027, "fcm_dpo/delta": 0.25156453251838684, "fcm_dpo/margin": 0.15634498000144958, "fcm_dpo/q_t": 0.4646122455596924, "grad_norm": 252.39637756347656, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.11577291041612625, "logits/rejected": 0.08579862117767334, "logps/chosen": -53.112850189208984, "logps/ref_chosen": -52.85643768310547, "logps/ref_rejected": -72.17460632324219, "logps/rejected": -72.58735656738281, "loss": 1.2742, "margin_dpo/margin_mean": 0.15634474158287048, "margin_dpo/margin_std": 0.38461118936538696, "step": 50 }, { "epoch": 0.07709750566893424, "fcm_dpo/beta": 1.0202209949493408, "fcm_dpo/delta": 0.2239471673965454, "fcm_dpo/margin": 0.1768064796924591, "fcm_dpo/q_t": 0.45897069573402405, "grad_norm": 271.5158996582031, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 0.0644986554980278, "logits/rejected": 0.03733114153146744, "logps/chosen": -63.9501838684082, "logps/ref_chosen": -63.65644073486328, "logps/ref_rejected": -86.13229370117188, "logps/rejected": -86.60284423828125, "loss": 1.2691, "margin_dpo/margin_mean": 0.17680680751800537, "margin_dpo/margin_std": 0.4650823473930359, "step": 51 }, { "epoch": 0.07860922146636433, "fcm_dpo/beta": 1.0686278343200684, "fcm_dpo/delta": 0.21884143352508545, "fcm_dpo/margin": 0.17313197255134583, "fcm_dpo/q_t": 0.45887812972068787, "grad_norm": 316.1194152832031, "learning_rate": 3.805970149253731e-07, "logits/chosen": 0.09268851578235626, "logits/rejected": 0.04252258688211441, "logps/chosen": -68.11876678466797, "logps/ref_chosen": -67.8402099609375, "logps/ref_rejected": -96.97090911865234, "logps/rejected": -97.42259216308594, "loss": 1.2702, "margin_dpo/margin_mean": 0.1731322705745697, "margin_dpo/margin_std": 0.47208118438720703, "step": 52 }, { "epoch": 0.0801209372637944, "fcm_dpo/beta": 1.1181976795196533, "fcm_dpo/delta": 0.2672102749347687, "fcm_dpo/margin": 0.1227661669254303, "fcm_dpo/q_t": 0.46797460317611694, "grad_norm": 288.12396240234375, "learning_rate": 3.880597014925373e-07, "logits/chosen": 0.10207226872444153, "logits/rejected": 0.0910034328699112, "logps/chosen": -57.202964782714844, "logps/ref_chosen": -56.87813949584961, "logps/ref_rejected": -60.75569152832031, "logps/rejected": -61.203285217285156, "loss": 1.2989, "margin_dpo/margin_mean": 0.12276646494865417, "margin_dpo/margin_std": 0.3650910258293152, "step": 53 }, { "epoch": 0.08163265306122448, "fcm_dpo/beta": 1.1756856441497803, "fcm_dpo/delta": 0.22768601775169373, "fcm_dpo/margin": 0.1503320038318634, "fcm_dpo/q_t": 0.45959293842315674, "grad_norm": 297.62738037109375, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 0.05468449741601944, "logits/rejected": 0.03934643790125847, "logps/chosen": -47.59065246582031, "logps/ref_chosen": -47.26692199707031, "logps/ref_rejected": -62.19426727294922, "logps/rejected": -62.66832733154297, "loss": 1.2773, "margin_dpo/margin_mean": 0.15033209323883057, "margin_dpo/margin_std": 0.42221611738204956, "step": 54 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 1.2158875465393066, "fcm_dpo/delta": 0.06430363655090332, "fcm_dpo/margin": 0.2756027579307556, "fcm_dpo/q_t": 0.43045541644096375, "grad_norm": 339.1961669921875, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.0669720247387886, "logits/rejected": -0.013603119179606438, "logps/chosen": -50.63751983642578, "logps/ref_chosen": -50.32619094848633, "logps/ref_rejected": -92.44389343261719, "logps/rejected": -93.03082275390625, "loss": 1.2078, "margin_dpo/margin_mean": 0.2756025791168213, "margin_dpo/margin_std": 0.5928495526313782, "step": 55 }, { "epoch": 0.08465608465608465, "fcm_dpo/beta": 1.251664161682129, "fcm_dpo/delta": 0.23274877667427063, "fcm_dpo/margin": 0.13655099272727966, "fcm_dpo/q_t": 0.4612714946269989, "grad_norm": 343.06378173828125, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 0.10051405429840088, "logits/rejected": 0.07845334708690643, "logps/chosen": -57.09839630126953, "logps/ref_chosen": -56.766971588134766, "logps/ref_rejected": -66.30504608154297, "logps/rejected": -66.77301788330078, "loss": 1.3046, "margin_dpo/margin_mean": 0.13655099272727966, "margin_dpo/margin_std": 0.466732382774353, "step": 56 }, { "epoch": 0.08616780045351474, "fcm_dpo/beta": 1.2687859535217285, "fcm_dpo/delta": 0.05950481444597244, "fcm_dpo/margin": 0.26971304416656494, "fcm_dpo/q_t": 0.42512619495391846, "grad_norm": 329.37237548828125, "learning_rate": 4.17910447761194e-07, "logits/chosen": 0.11962918192148209, "logits/rejected": 0.05380728468298912, "logps/chosen": -58.167022705078125, "logps/ref_chosen": -57.76774597167969, "logps/ref_rejected": -82.75698852539062, "logps/rejected": -83.42597961425781, "loss": 1.1973, "margin_dpo/margin_mean": 0.2697131037712097, "margin_dpo/margin_std": 0.5702307224273682, "step": 57 }, { "epoch": 0.08767951625094482, "fcm_dpo/beta": 1.2718815803527832, "fcm_dpo/delta": -0.031620148569345474, "fcm_dpo/margin": 0.17981407046318054, "fcm_dpo/q_t": 0.4582921266555786, "grad_norm": 405.1985778808594, "learning_rate": 4.253731343283582e-07, "logits/chosen": 0.056253060698509216, "logits/rejected": 0.04066895321011543, "logps/chosen": -73.22657775878906, "logps/ref_chosen": -72.76408386230469, "logps/ref_rejected": -84.49275207519531, "logps/rejected": -85.13505554199219, "loss": 1.3311, "margin_dpo/margin_mean": 0.17981407046318054, "margin_dpo/margin_std": 0.654416561126709, "step": 58 }, { "epoch": 0.08919123204837491, "fcm_dpo/beta": 1.2566231489181519, "fcm_dpo/delta": -0.12143002450466156, "fcm_dpo/margin": 0.24419176578521729, "fcm_dpo/q_t": 0.4358407258987427, "grad_norm": 294.4437561035156, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 0.12476523220539093, "logits/rejected": 0.05869518965482712, "logps/chosen": -50.228233337402344, "logps/ref_chosen": -49.820777893066406, "logps/ref_rejected": -77.14368438720703, "logps/rejected": -77.79533386230469, "loss": 1.2168, "margin_dpo/margin_mean": 0.24419182538986206, "margin_dpo/margin_std": 0.48622995615005493, "step": 59 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 1.2652430534362793, "fcm_dpo/delta": 0.09526422619819641, "fcm_dpo/margin": 0.06482309103012085, "fcm_dpo/q_t": 0.4771226644515991, "grad_norm": 414.355224609375, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.11259119212627411, "logits/rejected": 0.11113601922988892, "logps/chosen": -63.673728942871094, "logps/ref_chosen": -63.22477340698242, "logps/ref_rejected": -61.360477447509766, "logps/rejected": -61.8742561340332, "loss": 1.4422, "margin_dpo/margin_mean": 0.0648232102394104, "margin_dpo/margin_std": 0.5844757556915283, "step": 60 }, { "epoch": 0.09221466364323508, "fcm_dpo/beta": 1.3022596836090088, "fcm_dpo/delta": 0.2242211550474167, "fcm_dpo/margin": 0.1385476291179657, "fcm_dpo/q_t": 0.4630514979362488, "grad_norm": 383.376220703125, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 0.14720313251018524, "logits/rejected": 0.11374804377555847, "logps/chosen": -49.5374755859375, "logps/ref_chosen": -49.01679992675781, "logps/ref_rejected": -74.90817260742188, "logps/rejected": -75.56739044189453, "loss": 1.3838, "margin_dpo/margin_mean": 0.13854748010635376, "margin_dpo/margin_std": 0.6521978974342346, "step": 61 }, { "epoch": 0.09372637944066516, "fcm_dpo/beta": 1.355870246887207, "fcm_dpo/delta": 0.14111031591892242, "fcm_dpo/margin": 0.19318178296089172, "fcm_dpo/q_t": 0.44203782081604004, "grad_norm": 394.4239501953125, "learning_rate": 4.552238805970149e-07, "logits/chosen": 0.1043829694390297, "logits/rejected": 0.06520397216081619, "logps/chosen": -63.30534362792969, "logps/ref_chosen": -62.751869201660156, "logps/ref_rejected": -78.93360900878906, "logps/rejected": -79.68026733398438, "loss": 1.2919, "margin_dpo/margin_mean": 0.1931813657283783, "margin_dpo/margin_std": 0.5964335799217224, "step": 62 }, { "epoch": 0.09523809523809523, "fcm_dpo/beta": 1.320521354675293, "fcm_dpo/delta": -0.21281126141548157, "fcm_dpo/margin": 0.4545568525791168, "fcm_dpo/q_t": 0.360975980758667, "grad_norm": 294.78167724609375, "learning_rate": 4.626865671641791e-07, "logits/chosen": 0.17442727088928223, "logits/rejected": 0.1496298611164093, "logps/chosen": -60.93528366088867, "logps/ref_chosen": -60.51525115966797, "logps/ref_rejected": -85.11021423339844, "logps/rejected": -85.98480224609375, "loss": 0.9608, "margin_dpo/margin_mean": 0.454556941986084, "margin_dpo/margin_std": 0.46920114755630493, "step": 63 }, { "epoch": 0.09674981103552532, "fcm_dpo/beta": 1.3571248054504395, "fcm_dpo/delta": 0.29117465019226074, "fcm_dpo/margin": 0.0832415223121643, "fcm_dpo/q_t": 0.47412389516830444, "grad_norm": 399.5368347167969, "learning_rate": 4.701492537313433e-07, "logits/chosen": 0.07589007169008255, "logits/rejected": 0.051104120910167694, "logps/chosen": -51.82196044921875, "logps/ref_chosen": -51.20684814453125, "logps/ref_rejected": -66.93081665039062, "logps/rejected": -67.62918090820312, "loss": 1.41, "margin_dpo/margin_mean": 0.08324190974235535, "margin_dpo/margin_std": 0.5599809885025024, "step": 64 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 1.3357598781585693, "fcm_dpo/delta": -0.2229897379875183, "fcm_dpo/margin": 0.4559674859046936, "fcm_dpo/q_t": 0.371783971786499, "grad_norm": 337.01251220703125, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.1916767954826355, "logits/rejected": 0.1618405431509018, "logps/chosen": -67.87342834472656, "logps/ref_chosen": -67.2886962890625, "logps/ref_rejected": -74.44281005859375, "logps/rejected": -75.48350524902344, "loss": 1.0623, "margin_dpo/margin_mean": 0.4559671878814697, "margin_dpo/margin_std": 0.7171410918235779, "step": 65 }, { "epoch": 0.09977324263038549, "fcm_dpo/beta": 1.3253322839736938, "fcm_dpo/delta": 0.06414327025413513, "fcm_dpo/margin": 0.25458577275276184, "fcm_dpo/q_t": 0.4319732189178467, "grad_norm": 368.9222412109375, "learning_rate": 4.850746268656717e-07, "logits/chosen": 0.1052892655134201, "logits/rejected": 0.08073309063911438, "logps/chosen": -71.32987976074219, "logps/ref_chosen": -70.743408203125, "logps/ref_rejected": -77.26499938964844, "logps/rejected": -78.10604858398438, "loss": 1.2347, "margin_dpo/margin_mean": 0.2545853555202484, "margin_dpo/margin_std": 0.6333310604095459, "step": 66 }, { "epoch": 0.10128495842781557, "fcm_dpo/beta": 1.3538342714309692, "fcm_dpo/delta": 0.04710801690816879, "fcm_dpo/margin": 0.26157301664352417, "fcm_dpo/q_t": 0.431488960981369, "grad_norm": 357.73748779296875, "learning_rate": 4.925373134328357e-07, "logits/chosen": 0.047591157257556915, "logits/rejected": -0.006785998586565256, "logps/chosen": -61.096351623535156, "logps/ref_chosen": -60.60260009765625, "logps/ref_rejected": -75.22235870361328, "logps/rejected": -75.97767639160156, "loss": 1.2217, "margin_dpo/margin_mean": 0.2615726888179779, "margin_dpo/margin_std": 0.6404443979263306, "step": 67 }, { "epoch": 0.10279667422524566, "fcm_dpo/beta": 1.3258135318756104, "fcm_dpo/delta": -0.06805318593978882, "fcm_dpo/margin": 0.34885549545288086, "fcm_dpo/q_t": 0.40520694851875305, "grad_norm": 383.10980224609375, "learning_rate": 5e-07, "logits/chosen": 0.028347402811050415, "logits/rejected": -0.000688064843416214, "logps/chosen": -78.18305206298828, "logps/ref_chosen": -77.52836608886719, "logps/ref_rejected": -93.17778015136719, "logps/rejected": -94.18131256103516, "loss": 1.1489, "margin_dpo/margin_mean": 0.3488554060459137, "margin_dpo/margin_std": 0.6785191893577576, "step": 68 }, { "epoch": 0.10430839002267574, "fcm_dpo/beta": 1.3233981132507324, "fcm_dpo/delta": -0.10464634746313095, "fcm_dpo/margin": 0.37709471583366394, "fcm_dpo/q_t": 0.39530229568481445, "grad_norm": 314.11279296875, "learning_rate": 4.999965034812934e-07, "logits/chosen": 0.09186286479234695, "logits/rejected": 0.048370733857154846, "logps/chosen": -66.55419921875, "logps/ref_chosen": -65.94305419921875, "logps/ref_rejected": -89.7735595703125, "logps/rejected": -90.76179504394531, "loss": 1.0926, "margin_dpo/margin_mean": 0.3770950138568878, "margin_dpo/margin_std": 0.5968654751777649, "step": 69 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 1.305304765701294, "fcm_dpo/delta": 0.03656423091888428, "fcm_dpo/margin": 0.2790451943874359, "fcm_dpo/q_t": 0.42012423276901245, "grad_norm": 358.5043029785156, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.11005310714244843, "logits/rejected": 0.08715207874774933, "logps/chosen": -62.6015625, "logps/ref_chosen": -61.95791244506836, "logps/ref_rejected": -75.80945587158203, "logps/rejected": -76.7321548461914, "loss": 1.215, "margin_dpo/margin_mean": 0.2790454030036926, "margin_dpo/margin_std": 0.6277328729629517, "step": 70 }, { "epoch": 0.1073318216175359, "fcm_dpo/beta": 1.3658053874969482, "fcm_dpo/delta": 0.24697905778884888, "fcm_dpo/margin": 0.1148194968700409, "fcm_dpo/q_t": 0.4629502594470978, "grad_norm": 417.89801025390625, "learning_rate": 4.999685319184688e-07, "logits/chosen": 0.05348202586174011, "logits/rejected": 0.03862350434064865, "logps/chosen": -64.16213989257812, "logps/ref_chosen": -63.34757995605469, "logps/ref_rejected": -67.49658203125, "logps/rejected": -68.42596435546875, "loss": 1.3647, "margin_dpo/margin_mean": 0.11482015252113342, "margin_dpo/margin_std": 0.5449914932250977, "step": 71 }, { "epoch": 0.10884353741496598, "fcm_dpo/beta": 1.3521233797073364, "fcm_dpo/delta": -0.13006776571273804, "fcm_dpo/margin": 0.38662588596343994, "fcm_dpo/q_t": 0.38986825942993164, "grad_norm": 355.95806884765625, "learning_rate": 4.999440576567755e-07, "logits/chosen": 0.11371571570634842, "logits/rejected": 0.04910843446850777, "logps/chosen": -56.53982925415039, "logps/ref_chosen": -55.85929870605469, "logps/ref_rejected": -68.45423889160156, "logps/rejected": -69.52140045166016, "loss": 1.1028, "margin_dpo/margin_mean": 0.38662609457969666, "margin_dpo/margin_std": 0.6601760387420654, "step": 72 }, { "epoch": 0.11035525321239607, "fcm_dpo/beta": 1.39102303981781, "fcm_dpo/delta": 0.16966001689434052, "fcm_dpo/margin": 0.16693082451820374, "fcm_dpo/q_t": 0.4621427059173584, "grad_norm": 460.1529846191406, "learning_rate": 4.999125919224965e-07, "logits/chosen": 0.07310564070940018, "logits/rejected": 0.05886080861091614, "logps/chosen": -70.09381103515625, "logps/ref_chosen": -69.13880920410156, "logps/ref_rejected": -79.04586791992188, "logps/rejected": -80.16780090332031, "loss": 1.4425, "margin_dpo/margin_mean": 0.16693082451820374, "margin_dpo/margin_std": 0.8031052350997925, "step": 73 }, { "epoch": 0.11186696900982615, "fcm_dpo/beta": 1.3699589967727661, "fcm_dpo/delta": -0.1809014230966568, "fcm_dpo/margin": 0.4163665473461151, "fcm_dpo/q_t": 0.38876447081565857, "grad_norm": 305.5662536621094, "learning_rate": 4.998741355957963e-07, "logits/chosen": 0.10880277305841446, "logits/rejected": 0.056502409279346466, "logps/chosen": -50.64822769165039, "logps/ref_chosen": -49.923736572265625, "logps/ref_rejected": -81.73213958740234, "logps/rejected": -82.87300109863281, "loss": 1.1053, "margin_dpo/margin_mean": 0.4163666069507599, "margin_dpo/margin_std": 0.7277534604072571, "step": 74 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 1.3012433052062988, "fcm_dpo/delta": -0.18539077043533325, "fcm_dpo/margin": 0.44073373079299927, "fcm_dpo/q_t": 0.3843110501766205, "grad_norm": 269.8968811035156, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.08823660016059875, "logits/rejected": 0.05635009706020355, "logps/chosen": -46.86772537231445, "logps/ref_chosen": -46.06875228881836, "logps/ref_rejected": -66.1181411743164, "logps/rejected": -67.35784912109375, "loss": 1.0864, "margin_dpo/margin_mean": 0.44073382019996643, "margin_dpo/margin_std": 0.7411842942237854, "step": 75 }, { "epoch": 0.11489040060468632, "fcm_dpo/beta": 1.3104901313781738, "fcm_dpo/delta": 0.09375004470348358, "fcm_dpo/margin": 0.2359827756881714, "fcm_dpo/q_t": 0.437259316444397, "grad_norm": 354.0776672363281, "learning_rate": 4.997762556634679e-07, "logits/chosen": 0.07581540942192078, "logits/rejected": 0.03382248058915138, "logps/chosen": -54.947086334228516, "logps/ref_chosen": -54.06275177001953, "logps/ref_rejected": -74.87464141845703, "logps/rejected": -75.99496459960938, "loss": 1.2838, "margin_dpo/margin_mean": 0.23598253726959229, "margin_dpo/margin_std": 0.6900110840797424, "step": 76 }, { "epoch": 0.1164021164021164, "fcm_dpo/beta": 1.2992005348205566, "fcm_dpo/delta": -0.001370757818222046, "fcm_dpo/margin": 0.3053017258644104, "fcm_dpo/q_t": 0.4183969497680664, "grad_norm": 362.9595031738281, "learning_rate": 4.99716834795752e-07, "logits/chosen": 0.09972919523715973, "logits/rejected": 0.06024125590920448, "logps/chosen": -54.065406799316406, "logps/ref_chosen": -53.07609176635742, "logps/ref_rejected": -74.45601654052734, "logps/rejected": -75.7506332397461, "loss": 1.229, "margin_dpo/margin_mean": 0.3053016662597656, "margin_dpo/margin_std": 0.6845893859863281, "step": 77 }, { "epoch": 0.11791383219954649, "fcm_dpo/beta": 1.3032406568527222, "fcm_dpo/delta": -0.07537812739610672, "fcm_dpo/margin": 0.36197635531425476, "fcm_dpo/q_t": 0.3986741900444031, "grad_norm": 356.3768615722656, "learning_rate": 4.996504288113623e-07, "logits/chosen": 0.07950174808502197, "logits/rejected": 0.05943600460886955, "logps/chosen": -68.604736328125, "logps/ref_chosen": -67.72541809082031, "logps/ref_rejected": -79.03926849365234, "logps/rejected": -80.28056335449219, "loss": 1.134, "margin_dpo/margin_mean": 0.3619759976863861, "margin_dpo/margin_std": 0.6469442844390869, "step": 78 }, { "epoch": 0.11942554799697656, "fcm_dpo/beta": 1.27435302734375, "fcm_dpo/delta": -0.15202443301677704, "fcm_dpo/margin": 0.4268932640552521, "fcm_dpo/q_t": 0.38484495878219604, "grad_norm": 283.2723083496094, "learning_rate": 4.995770395678171e-07, "logits/chosen": 0.11961696296930313, "logits/rejected": 0.061562664806842804, "logps/chosen": -53.108978271484375, "logps/ref_chosen": -52.16064453125, "logps/ref_rejected": -83.31062316894531, "logps/rejected": -84.68585205078125, "loss": 1.0591, "margin_dpo/margin_mean": 0.4268933832645416, "margin_dpo/margin_std": 0.6421747207641602, "step": 79 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 1.251448631286621, "fcm_dpo/delta": -0.08930756151676178, "fcm_dpo/margin": 0.38737350702285767, "fcm_dpo/q_t": 0.400845468044281, "grad_norm": 356.0257263183594, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.10540800541639328, "logits/rejected": 0.04691263288259506, "logps/chosen": -62.39448928833008, "logps/ref_chosen": -61.410560607910156, "logps/ref_rejected": -78.66004943847656, "logps/rejected": -80.0313491821289, "loss": 1.1444, "margin_dpo/margin_mean": 0.38737374544143677, "margin_dpo/margin_std": 0.7291325330734253, "step": 80 }, { "epoch": 0.12244897959183673, "fcm_dpo/beta": 1.2081029415130615, "fcm_dpo/delta": -0.13699756562709808, "fcm_dpo/margin": 0.4377623200416565, "fcm_dpo/q_t": 0.3887847363948822, "grad_norm": 295.5246276855469, "learning_rate": 4.994093197099587e-07, "logits/chosen": 0.07170180976390839, "logits/rejected": 0.039106931537389755, "logps/chosen": -64.8592300415039, "logps/ref_chosen": -63.80437088012695, "logps/ref_rejected": -79.3484115600586, "logps/rejected": -80.84103393554688, "loss": 1.0711, "margin_dpo/margin_mean": 0.43776261806488037, "margin_dpo/margin_std": 0.6732680797576904, "step": 81 }, { "epoch": 0.12396069538926682, "fcm_dpo/beta": 1.1594877243041992, "fcm_dpo/delta": -0.2622827887535095, "fcm_dpo/margin": 0.5566993951797485, "fcm_dpo/q_t": 0.3570610582828522, "grad_norm": 258.639404296875, "learning_rate": 4.993149937871306e-07, "logits/chosen": 0.05478543043136597, "logits/rejected": -0.006189014762639999, "logps/chosen": -49.740875244140625, "logps/ref_chosen": -48.817893981933594, "logps/ref_rejected": -70.31497955322266, "logps/rejected": -71.79466247558594, "loss": 0.9629, "margin_dpo/margin_mean": 0.5566992163658142, "margin_dpo/margin_std": 0.6383960843086243, "step": 82 }, { "epoch": 0.1254724111866969, "fcm_dpo/beta": 1.1256608963012695, "fcm_dpo/delta": -0.09757444262504578, "fcm_dpo/margin": 0.43767139315605164, "fcm_dpo/q_t": 0.39699286222457886, "grad_norm": 282.66607666015625, "learning_rate": 4.992136939879856e-07, "logits/chosen": 0.1710186004638672, "logits/rejected": 0.12015236914157867, "logps/chosen": -58.20113754272461, "logps/ref_chosen": -57.15077209472656, "logps/ref_rejected": -75.1710205078125, "logps/rejected": -76.6590576171875, "loss": 1.1497, "margin_dpo/margin_mean": 0.43767082691192627, "margin_dpo/margin_std": 0.8551985025405884, "step": 83 }, { "epoch": 0.12698412698412698, "fcm_dpo/beta": 1.1269830465316772, "fcm_dpo/delta": 0.03831970691680908, "fcm_dpo/margin": 0.3221731185913086, "fcm_dpo/q_t": 0.4186519980430603, "grad_norm": 351.0410461425781, "learning_rate": 4.991054231460969e-07, "logits/chosen": 0.13431841135025024, "logits/rejected": 0.09221207350492477, "logps/chosen": -65.97208404541016, "logps/ref_chosen": -64.77729797363281, "logps/ref_rejected": -84.71949768066406, "logps/rejected": -86.2364501953125, "loss": 1.2095, "margin_dpo/margin_mean": 0.322173148393631, "margin_dpo/margin_std": 0.7237873673439026, "step": 84 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 1.082049012184143, "fcm_dpo/delta": -0.3127109408378601, "fcm_dpo/margin": 0.6393401622772217, "fcm_dpo/q_t": 0.3549611568450928, "grad_norm": 254.55479431152344, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.12381778657436371, "logits/rejected": 0.08032877743244171, "logps/chosen": -51.256591796875, "logps/ref_chosen": -50.25169372558594, "logps/ref_rejected": -66.55439758300781, "logps/rejected": -68.19862365722656, "loss": 0.9901, "margin_dpo/margin_mean": 0.6393401622772217, "margin_dpo/margin_std": 0.8425341844558716, "step": 85 }, { "epoch": 0.13000755857898716, "fcm_dpo/beta": 1.0500352382659912, "fcm_dpo/delta": -0.04895002394914627, "fcm_dpo/margin": 0.4251331388950348, "fcm_dpo/q_t": 0.40430694818496704, "grad_norm": 238.89324951171875, "learning_rate": 4.988679806432711e-07, "logits/chosen": 0.1550569087266922, "logits/rejected": 0.13627271354198456, "logps/chosen": -61.91556167602539, "logps/ref_chosen": -60.72917938232422, "logps/ref_rejected": -72.30961608886719, "logps/rejected": -73.92112731933594, "loss": 1.1148, "margin_dpo/margin_mean": 0.42513370513916016, "margin_dpo/margin_std": 0.7163010835647583, "step": 86 }, { "epoch": 0.13151927437641722, "fcm_dpo/beta": 1.0135871171951294, "fcm_dpo/delta": -0.1784614622592926, "fcm_dpo/margin": 0.5574131011962891, "fcm_dpo/q_t": 0.37792396545410156, "grad_norm": 287.37872314453125, "learning_rate": 4.987388156241114e-07, "logits/chosen": 0.06490974128246307, "logits/rejected": 0.01204625889658928, "logps/chosen": -66.92779541015625, "logps/ref_chosen": -65.75796508789062, "logps/ref_rejected": -84.81159973144531, "logps/rejected": -86.53883361816406, "loss": 1.1033, "margin_dpo/margin_mean": 0.5574125647544861, "margin_dpo/margin_std": 0.9363458156585693, "step": 87 }, { "epoch": 0.1330309901738473, "fcm_dpo/beta": 0.9974713325500488, "fcm_dpo/delta": -0.057315874844789505, "fcm_dpo/margin": 0.4542155861854553, "fcm_dpo/q_t": 0.4059000611305237, "grad_norm": 284.4267272949219, "learning_rate": 4.986026928455767e-07, "logits/chosen": 0.15725532174110413, "logits/rejected": 0.13133074343204498, "logps/chosen": -64.01789855957031, "logps/ref_chosen": -62.82402801513672, "logps/ref_rejected": -74.9607162475586, "logps/rejected": -76.60880279541016, "loss": 1.1881, "margin_dpo/margin_mean": 0.4542158246040344, "margin_dpo/margin_std": 0.9578008651733398, "step": 88 }, { "epoch": 0.1345427059712774, "fcm_dpo/beta": 1.00105881690979, "fcm_dpo/delta": -0.06055300682783127, "fcm_dpo/margin": 0.45661479234695435, "fcm_dpo/q_t": 0.40275073051452637, "grad_norm": 266.41510009765625, "learning_rate": 4.984596161153135e-07, "logits/chosen": 0.20890012383460999, "logits/rejected": 0.1264243870973587, "logps/chosen": -42.28339767456055, "logps/ref_chosen": -41.191436767578125, "logps/ref_rejected": -85.44769287109375, "logps/rejected": -86.99627685546875, "loss": 1.1581, "margin_dpo/margin_mean": 0.45661553740501404, "margin_dpo/margin_std": 0.8792251348495483, "step": 89 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 0.9870717525482178, "fcm_dpo/delta": -0.043573714792728424, "fcm_dpo/margin": 0.4473706781864166, "fcm_dpo/q_t": 0.40594804286956787, "grad_norm": 262.72222900390625, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.10524974763393402, "logits/rejected": 0.05074058473110199, "logps/chosen": -57.77943420410156, "logps/ref_chosen": -56.58390808105469, "logps/ref_rejected": -86.86978149414062, "logps/rejected": -88.51268005371094, "loss": 1.1531, "margin_dpo/margin_mean": 0.4473702609539032, "margin_dpo/margin_std": 0.8626862168312073, "step": 90 }, { "epoch": 0.13756613756613756, "fcm_dpo/beta": 0.9517369270324707, "fcm_dpo/delta": -0.20988881587982178, "fcm_dpo/margin": 0.6276508569717407, "fcm_dpo/q_t": 0.37632960081100464, "grad_norm": 212.9899139404297, "learning_rate": 4.98152617002662e-07, "logits/chosen": 0.10068871080875397, "logits/rejected": 0.057977497577667236, "logps/chosen": -53.59182357788086, "logps/ref_chosen": -52.38234329223633, "logps/ref_rejected": -72.17642211914062, "logps/rejected": -74.0135498046875, "loss": 1.0585, "margin_dpo/margin_mean": 0.6276512145996094, "margin_dpo/margin_std": 0.9779685735702515, "step": 91 }, { "epoch": 0.13907785336356765, "fcm_dpo/beta": 0.9485722780227661, "fcm_dpo/delta": -0.03629232943058014, "fcm_dpo/margin": 0.4553447365760803, "fcm_dpo/q_t": 0.40844064950942993, "grad_norm": 226.4413604736328, "learning_rate": 4.979887032076988e-07, "logits/chosen": 0.13536790013313293, "logits/rejected": 0.09770982712507248, "logps/chosen": -54.35186004638672, "logps/ref_chosen": -53.00870132446289, "logps/ref_rejected": -79.77812957763672, "logps/rejected": -81.5766372680664, "loss": 1.1892, "margin_dpo/margin_mean": 0.4553444981575012, "margin_dpo/margin_std": 0.9228836297988892, "step": 92 }, { "epoch": 0.14058956916099774, "fcm_dpo/beta": 0.9315764904022217, "fcm_dpo/delta": -0.021424148231744766, "fcm_dpo/margin": 0.45140916109085083, "fcm_dpo/q_t": 0.41404592990875244, "grad_norm": 205.6354217529297, "learning_rate": 4.978178526356172e-07, "logits/chosen": 0.144273042678833, "logits/rejected": 0.11550083011388779, "logps/chosen": -46.28402328491211, "logps/ref_chosen": -44.90705108642578, "logps/ref_rejected": -58.7879524230957, "logps/rejected": -60.61632537841797, "loss": 1.1919, "margin_dpo/margin_mean": 0.45140883326530457, "margin_dpo/margin_std": 0.9891193509101868, "step": 93 }, { "epoch": 0.1421012849584278, "fcm_dpo/beta": 0.8985931873321533, "fcm_dpo/delta": -0.160437673330307, "fcm_dpo/margin": 0.6093255281448364, "fcm_dpo/q_t": 0.3826707899570465, "grad_norm": 215.9398193359375, "learning_rate": 4.976400700654751e-07, "logits/chosen": 0.16830044984817505, "logits/rejected": 0.13087698817253113, "logps/chosen": -61.00678253173828, "logps/ref_chosen": -59.93777084350586, "logps/ref_rejected": -79.3138427734375, "logps/rejected": -80.99217224121094, "loss": 1.1328, "margin_dpo/margin_mean": 0.6093254685401917, "margin_dpo/margin_std": 1.1317553520202637, "step": 94 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 0.8708415627479553, "fcm_dpo/delta": -0.18497896194458008, "fcm_dpo/margin": 0.657183825969696, "fcm_dpo/q_t": 0.37511885166168213, "grad_norm": 209.54396057128906, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.07259067893028259, "logits/rejected": 0.011487288400530815, "logps/chosen": -61.56159210205078, "logps/ref_chosen": -60.168487548828125, "logps/ref_rejected": -90.73665618896484, "logps/rejected": -92.78694152832031, "loss": 1.0422, "margin_dpo/margin_mean": 0.6571837067604065, "margin_dpo/margin_std": 0.9313837289810181, "step": 95 }, { "epoch": 0.14512471655328799, "fcm_dpo/beta": 0.8509422540664673, "fcm_dpo/delta": -0.15672199428081512, "fcm_dpo/margin": 0.644451379776001, "fcm_dpo/q_t": 0.3799927532672882, "grad_norm": 196.17584228515625, "learning_rate": 4.972637290166157e-07, "logits/chosen": 0.11201013624668121, "logits/rejected": 0.07059841603040695, "logps/chosen": -62.019317626953125, "logps/ref_chosen": -60.66877746582031, "logps/ref_rejected": -88.30673217773438, "logps/rejected": -90.30171966552734, "loss": 1.0662, "margin_dpo/margin_mean": 0.6444511413574219, "margin_dpo/margin_std": 0.9644219875335693, "step": 96 }, { "epoch": 0.14663643235071808, "fcm_dpo/beta": 0.8328443765640259, "fcm_dpo/delta": -0.009154386818408966, "fcm_dpo/margin": 0.48872342705726624, "fcm_dpo/q_t": 0.420562207698822, "grad_norm": 232.0682830810547, "learning_rate": 4.970651810649666e-07, "logits/chosen": 0.06511442363262177, "logits/rejected": 0.02223094552755356, "logps/chosen": -66.63578033447266, "logps/ref_chosen": -65.04412078857422, "logps/ref_rejected": -78.42092895507812, "logps/rejected": -80.50130462646484, "loss": 1.1966, "margin_dpo/margin_mean": 0.48872342705726624, "margin_dpo/margin_std": 1.0647801160812378, "step": 97 }, { "epoch": 0.14814814814814814, "fcm_dpo/beta": 0.8404428958892822, "fcm_dpo/delta": 0.041608165949583054, "fcm_dpo/margin": 0.4276841878890991, "fcm_dpo/q_t": 0.4225125312805176, "grad_norm": 219.1876678466797, "learning_rate": 4.968597221690985e-07, "logits/chosen": 0.13621120154857635, "logits/rejected": 0.11001783609390259, "logps/chosen": -56.84211730957031, "logps/ref_chosen": -55.503231048583984, "logps/ref_rejected": -72.81553649902344, "logps/rejected": -74.58211517333984, "loss": 1.1941, "margin_dpo/margin_mean": 0.4276837110519409, "margin_dpo/margin_std": 0.8988784551620483, "step": 98 }, { "epoch": 0.14965986394557823, "fcm_dpo/beta": 0.82810378074646, "fcm_dpo/delta": -0.16143038868904114, "fcm_dpo/margin": 0.6673827171325684, "fcm_dpo/q_t": 0.38527190685272217, "grad_norm": 217.5617218017578, "learning_rate": 4.966473580761389e-07, "logits/chosen": 0.14945261180400848, "logits/rejected": 0.11294733732938766, "logps/chosen": -59.96638107299805, "logps/ref_chosen": -58.57563781738281, "logps/ref_rejected": -78.693603515625, "logps/rejected": -80.7517318725586, "loss": 1.0649, "margin_dpo/margin_mean": 0.6673829555511475, "margin_dpo/margin_std": 1.0417191982269287, "step": 99 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 0.8276715874671936, "fcm_dpo/delta": -0.04269018769264221, "fcm_dpo/margin": 0.5278904438018799, "fcm_dpo/q_t": 0.4166935086250305, "grad_norm": 255.79344177246094, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.1620662957429886, "logits/rejected": 0.15444490313529968, "logps/chosen": -81.0914306640625, "logps/ref_chosen": -79.58343505859375, "logps/ref_rejected": -92.152587890625, "logps/rejected": -94.18846893310547, "loss": 1.2587, "margin_dpo/margin_mean": 0.527890682220459, "margin_dpo/margin_std": 1.3204901218414307, "step": 100 }, { "epoch": 0.15268329554043839, "fcm_dpo/beta": 0.7907246947288513, "fcm_dpo/delta": -0.26945069432258606, "fcm_dpo/margin": 0.824831485748291, "fcm_dpo/q_t": 0.3596486747264862, "grad_norm": 161.67445373535156, "learning_rate": 4.96201938253052e-07, "logits/chosen": 0.11919434368610382, "logits/rejected": 0.08386727422475815, "logps/chosen": -53.63953399658203, "logps/ref_chosen": -52.332786560058594, "logps/ref_rejected": -69.55589294433594, "logps/rejected": -71.6874771118164, "loss": 0.9761, "margin_dpo/margin_mean": 0.8248312473297119, "margin_dpo/margin_std": 0.9689401388168335, "step": 101 }, { "epoch": 0.15419501133786848, "fcm_dpo/beta": 0.7701091170310974, "fcm_dpo/delta": -0.039370447397232056, "fcm_dpo/margin": 0.5678646564483643, "fcm_dpo/q_t": 0.40815913677215576, "grad_norm": 202.9934539794922, "learning_rate": 4.959688949822748e-07, "logits/chosen": 0.0649181455373764, "logits/rejected": 0.026860184967517853, "logps/chosen": -66.30096435546875, "logps/ref_chosen": -64.74348449707031, "logps/ref_rejected": -69.06132507324219, "logps/rejected": -71.18667602539062, "loss": 1.1748, "margin_dpo/margin_mean": 0.5678646564483643, "margin_dpo/margin_std": 1.1583635807037354, "step": 102 }, { "epoch": 0.15570672713529857, "fcm_dpo/beta": 0.7587429285049438, "fcm_dpo/delta": -0.07340075820684433, "fcm_dpo/margin": 0.6194088459014893, "fcm_dpo/q_t": 0.4005558490753174, "grad_norm": 210.62454223632812, "learning_rate": 4.957289714327572e-07, "logits/chosen": 0.14911600947380066, "logits/rejected": 0.1179221123456955, "logps/chosen": -65.3800277709961, "logps/ref_chosen": -63.83664321899414, "logps/ref_rejected": -79.32362365722656, "logps/rejected": -81.48641967773438, "loss": 1.1314, "margin_dpo/margin_mean": 0.6194085478782654, "margin_dpo/margin_std": 1.123617172241211, "step": 103 }, { "epoch": 0.15721844293272866, "fcm_dpo/beta": 0.7468178272247314, "fcm_dpo/delta": -0.08313950151205063, "fcm_dpo/margin": 0.6416445970535278, "fcm_dpo/q_t": 0.40650674700737, "grad_norm": 213.34971618652344, "learning_rate": 4.954821743156767e-07, "logits/chosen": 0.19349342584609985, "logits/rejected": 0.1075948029756546, "logps/chosen": -62.497222900390625, "logps/ref_chosen": -60.99920654296875, "logps/ref_rejected": -98.84645080566406, "logps/rejected": -100.98611450195312, "loss": 1.1443, "margin_dpo/margin_mean": 0.6416438817977905, "margin_dpo/margin_std": 1.2357177734375, "step": 104 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 0.7460736632347107, "fcm_dpo/delta": 0.003730788826942444, "fcm_dpo/margin": 0.530280590057373, "fcm_dpo/q_t": 0.4173508286476135, "grad_norm": 230.8499298095703, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.11480271816253662, "logits/rejected": 0.06232992187142372, "logps/chosen": -72.46751403808594, "logps/ref_chosen": -70.95027160644531, "logps/ref_rejected": -87.88340759277344, "logps/rejected": -89.93093872070312, "loss": 1.2081, "margin_dpo/margin_mean": 0.5302802920341492, "margin_dpo/margin_std": 1.1925766468048096, "step": 105 }, { "epoch": 0.1602418745275888, "fcm_dpo/beta": 0.7372579574584961, "fcm_dpo/delta": -0.013026438653469086, "fcm_dpo/margin": 0.5590275526046753, "fcm_dpo/q_t": 0.4098079204559326, "grad_norm": 204.2117919921875, "learning_rate": 4.949679871846857e-07, "logits/chosen": 0.15588057041168213, "logits/rejected": 0.14248088002204895, "logps/chosen": -63.92448806762695, "logps/ref_chosen": -62.45933151245117, "logps/ref_rejected": -67.00595092773438, "logps/rejected": -69.03013610839844, "loss": 1.174, "margin_dpo/margin_mean": 0.5590271949768066, "margin_dpo/margin_std": 1.1391665935516357, "step": 106 }, { "epoch": 0.1617535903250189, "fcm_dpo/beta": 0.7442134618759155, "fcm_dpo/delta": 0.10780694335699081, "fcm_dpo/margin": 0.3956339359283447, "fcm_dpo/q_t": 0.4418404996395111, "grad_norm": 259.65618896484375, "learning_rate": 4.947006115536947e-07, "logits/chosen": 0.09667301923036575, "logits/rejected": 0.07568878680467606, "logps/chosen": -77.47236633300781, "logps/ref_chosen": -75.83796691894531, "logps/ref_rejected": -87.74038696289062, "logps/rejected": -89.7704086303711, "loss": 1.3412, "margin_dpo/margin_mean": 0.39563363790512085, "margin_dpo/margin_std": 1.3363198041915894, "step": 107 }, { "epoch": 0.16326530612244897, "fcm_dpo/beta": 0.7422864437103271, "fcm_dpo/delta": -0.15637360513210297, "fcm_dpo/margin": 0.7383235692977905, "fcm_dpo/q_t": 0.38797321915626526, "grad_norm": 188.6773223876953, "learning_rate": 4.944263911205772e-07, "logits/chosen": 0.0897904634475708, "logits/rejected": 0.06220732629299164, "logps/chosen": -69.81796264648438, "logps/ref_chosen": -68.39323425292969, "logps/ref_rejected": -83.24267578125, "logps/rejected": -85.40571594238281, "loss": 1.1084, "margin_dpo/margin_mean": 0.738323450088501, "margin_dpo/margin_std": 1.277268886566162, "step": 108 }, { "epoch": 0.16477702191987906, "fcm_dpo/beta": 0.7077078819274902, "fcm_dpo/delta": -0.24241167306900024, "fcm_dpo/margin": 0.8866573572158813, "fcm_dpo/q_t": 0.371415376663208, "grad_norm": 157.52822875976562, "learning_rate": 4.941453335558681e-07, "logits/chosen": 0.10934356600046158, "logits/rejected": 0.059307027608156204, "logps/chosen": -56.932743072509766, "logps/ref_chosen": -55.52748107910156, "logps/ref_rejected": -83.55218505859375, "logps/rejected": -85.84410858154297, "loss": 1.0002, "margin_dpo/margin_mean": 0.8866567611694336, "margin_dpo/margin_std": 1.2174742221832275, "step": 109 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 0.7073754072189331, "fcm_dpo/delta": 0.05803888291120529, "fcm_dpo/margin": 0.48516157269477844, "fcm_dpo/q_t": 0.428050696849823, "grad_norm": 221.89695739746094, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.08555251359939575, "logits/rejected": 0.09368757903575897, "logps/chosen": -82.78941345214844, "logps/ref_chosen": -81.15874481201172, "logps/ref_rejected": -72.56021118164062, "logps/rejected": -74.67604064941406, "loss": 1.2345, "margin_dpo/margin_mean": 0.4851612150669098, "margin_dpo/margin_std": 1.202465295791626, "step": 110 }, { "epoch": 0.16780045351473924, "fcm_dpo/beta": 0.707313597202301, "fcm_dpo/delta": -0.029390130192041397, "fcm_dpo/margin": 0.6044574975967407, "fcm_dpo/q_t": 0.4066217243671417, "grad_norm": 188.60508728027344, "learning_rate": 4.935627386698418e-07, "logits/chosen": 0.20676471292972565, "logits/rejected": 0.17049609124660492, "logps/chosen": -54.14882278442383, "logps/ref_chosen": -52.358985900878906, "logps/ref_rejected": -77.06150817871094, "logps/rejected": -79.45579528808594, "loss": 1.2042, "margin_dpo/margin_mean": 0.6044571399688721, "margin_dpo/margin_std": 1.3020250797271729, "step": 111 }, { "epoch": 0.1693121693121693, "fcm_dpo/beta": 0.6883267164230347, "fcm_dpo/delta": -0.13100209832191467, "fcm_dpo/margin": 0.7614709734916687, "fcm_dpo/q_t": 0.3860092759132385, "grad_norm": 189.34796142578125, "learning_rate": 4.932612176449559e-07, "logits/chosen": 0.10900241881608963, "logits/rejected": 0.048736996948719025, "logps/chosen": -64.50396728515625, "logps/ref_chosen": -63.02006530761719, "logps/ref_rejected": -111.36941528320312, "logps/rejected": -113.61478424072266, "loss": 1.1028, "margin_dpo/margin_mean": 0.7614700198173523, "margin_dpo/margin_std": 1.2644274234771729, "step": 112 }, { "epoch": 0.1708238851095994, "fcm_dpo/beta": 0.6883626580238342, "fcm_dpo/delta": -0.01845688372850418, "fcm_dpo/margin": 0.6044442057609558, "fcm_dpo/q_t": 0.4099445939064026, "grad_norm": 201.9014434814453, "learning_rate": 4.929528920808854e-07, "logits/chosen": 0.13628755509853363, "logits/rejected": 0.0992526188492775, "logps/chosen": -57.55316925048828, "logps/ref_chosen": -55.80766296386719, "logps/ref_rejected": -69.84014129638672, "logps/rejected": -72.19009399414062, "loss": 1.2038, "margin_dpo/margin_mean": 0.6044440269470215, "margin_dpo/margin_std": 1.3071109056472778, "step": 113 }, { "epoch": 0.17233560090702948, "fcm_dpo/beta": 0.6465242505073547, "fcm_dpo/delta": -0.31181034445762634, "fcm_dpo/margin": 1.0640312433242798, "fcm_dpo/q_t": 0.3557575047016144, "grad_norm": 138.01580810546875, "learning_rate": 4.92637770602159e-07, "logits/chosen": 0.14081577956676483, "logits/rejected": 0.08423489332199097, "logps/chosen": -67.75685119628906, "logps/ref_chosen": -66.33277130126953, "logps/ref_rejected": -71.61489868164062, "logps/rejected": -74.1030044555664, "loss": 0.9686, "margin_dpo/margin_mean": 1.0640311241149902, "margin_dpo/margin_std": 1.3256173133850098, "step": 114 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 0.6344318985939026, "fcm_dpo/delta": -0.0754128247499466, "fcm_dpo/margin": 0.7438329458236694, "fcm_dpo/q_t": 0.4032962918281555, "grad_norm": 169.448974609375, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.14370641112327576, "logits/rejected": 0.08639145642518997, "logps/chosen": -57.44829559326172, "logps/ref_chosen": -55.74903869628906, "logps/ref_rejected": -79.59849548339844, "logps/rejected": -82.04158782958984, "loss": 1.1079, "margin_dpo/margin_mean": 0.7438331842422485, "margin_dpo/margin_std": 1.2644445896148682, "step": 115 }, { "epoch": 0.17535903250188964, "fcm_dpo/beta": 0.6176035404205322, "fcm_dpo/delta": -0.15088674426078796, "fcm_dpo/margin": 0.8790351152420044, "fcm_dpo/q_t": 0.38145536184310913, "grad_norm": 134.537841796875, "learning_rate": 4.91987175349089e-07, "logits/chosen": 0.15729355812072754, "logits/rejected": 0.09324823319911957, "logps/chosen": -50.96516418457031, "logps/ref_chosen": -49.36516571044922, "logps/ref_rejected": -72.84671020507812, "logps/rejected": -75.32572937011719, "loss": 1.0332, "margin_dpo/margin_mean": 0.879035472869873, "margin_dpo/margin_std": 1.1753203868865967, "step": 116 }, { "epoch": 0.17687074829931973, "fcm_dpo/beta": 0.5997291803359985, "fcm_dpo/delta": -0.06563954800367355, "fcm_dpo/margin": 0.7671282291412354, "fcm_dpo/q_t": 0.3957594633102417, "grad_norm": 144.81961059570312, "learning_rate": 4.916517197732933e-07, "logits/chosen": 0.13732296228408813, "logits/rejected": 0.10388919711112976, "logps/chosen": -59.18053436279297, "logps/ref_chosen": -57.710899353027344, "logps/ref_rejected": -69.77253723144531, "logps/rejected": -72.00930786132812, "loss": 1.1199, "margin_dpo/margin_mean": 0.767128586769104, "margin_dpo/margin_std": 1.2783540487289429, "step": 117 }, { "epoch": 0.17838246409674982, "fcm_dpo/beta": 0.5893198847770691, "fcm_dpo/delta": -0.13144376873970032, "fcm_dpo/margin": 0.8884799480438232, "fcm_dpo/q_t": 0.3849080204963684, "grad_norm": 144.99359130859375, "learning_rate": 4.913095046794281e-07, "logits/chosen": 0.20904436707496643, "logits/rejected": 0.1704784482717514, "logps/chosen": -53.963443756103516, "logps/ref_chosen": -52.479896545410156, "logps/ref_rejected": -81.359130859375, "logps/rejected": -83.73114776611328, "loss": 1.0523, "margin_dpo/margin_mean": 0.8884795904159546, "margin_dpo/margin_std": 1.2354551553726196, "step": 118 }, { "epoch": 0.17989417989417988, "fcm_dpo/beta": 0.5922322869300842, "fcm_dpo/delta": 0.035253897309303284, "fcm_dpo/margin": 0.6178330183029175, "fcm_dpo/q_t": 0.4204210042953491, "grad_norm": 160.6442413330078, "learning_rate": 4.909605396399855e-07, "logits/chosen": 0.12867693603038788, "logits/rejected": 0.09313994646072388, "logps/chosen": -63.268272399902344, "logps/ref_chosen": -61.35767364501953, "logps/ref_rejected": -75.71510314941406, "logps/rejected": -78.2435302734375, "loss": 1.2274, "margin_dpo/margin_mean": 0.6178329586982727, "margin_dpo/margin_std": 1.4659829139709473, "step": 119 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 0.5799944400787354, "fcm_dpo/delta": -0.13340742886066437, "fcm_dpo/margin": 0.9073317050933838, "fcm_dpo/q_t": 0.38455823063850403, "grad_norm": 136.73265075683594, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.1341107040643692, "logits/rejected": 0.07828693091869354, "logps/chosen": -61.49884796142578, "logps/ref_chosen": -59.907569885253906, "logps/ref_rejected": -79.6910629272461, "logps/rejected": -82.1896743774414, "loss": 1.0331, "margin_dpo/margin_mean": 0.9073318243026733, "margin_dpo/margin_std": 1.1959114074707031, "step": 120 }, { "epoch": 0.18291761148904007, "fcm_dpo/beta": 0.5721194744110107, "fcm_dpo/delta": -0.04547984525561333, "fcm_dpo/margin": 0.7750140428543091, "fcm_dpo/q_t": 0.40316978096961975, "grad_norm": 131.84844970703125, "learning_rate": 4.902423989581143e-07, "logits/chosen": 0.23460043966770172, "logits/rejected": 0.15458913147449493, "logps/chosen": -57.46360778808594, "logps/ref_chosen": -55.66604232788086, "logps/ref_rejected": -101.56233978271484, "logps/rejected": -104.13491821289062, "loss": 1.0973, "margin_dpo/margin_mean": 0.7750145792961121, "margin_dpo/margin_std": 1.2166812419891357, "step": 121 }, { "epoch": 0.18442932728647016, "fcm_dpo/beta": 0.5550453066825867, "fcm_dpo/delta": -0.2271803468465805, "fcm_dpo/margin": 1.1058701276779175, "fcm_dpo/q_t": 0.3700582981109619, "grad_norm": 148.10528564453125, "learning_rate": 4.898732434036243e-07, "logits/chosen": 0.14437180757522583, "logits/rejected": 0.11171398311853409, "logps/chosen": -65.11761474609375, "logps/ref_chosen": -63.334373474121094, "logps/ref_rejected": -73.67523193359375, "logps/rejected": -76.5643539428711, "loss": 1.0191, "margin_dpo/margin_mean": 1.1058697700500488, "margin_dpo/margin_std": 1.5541871786117554, "step": 122 }, { "epoch": 0.18594104308390022, "fcm_dpo/beta": 0.5463725328445435, "fcm_dpo/delta": -0.061007022857666016, "fcm_dpo/margin": 0.836315929889679, "fcm_dpo/q_t": 0.39866113662719727, "grad_norm": 136.9763946533203, "learning_rate": 4.894973780788722e-07, "logits/chosen": 0.1658335030078888, "logits/rejected": 0.12649542093276978, "logps/chosen": -58.5877799987793, "logps/ref_chosen": -56.89874267578125, "logps/ref_rejected": -78.97028350830078, "logps/rejected": -81.49563598632812, "loss": 1.1202, "margin_dpo/margin_mean": 0.8363161087036133, "margin_dpo/margin_std": 1.388123631477356, "step": 123 }, { "epoch": 0.1874527588813303, "fcm_dpo/beta": 0.5198322534561157, "fcm_dpo/delta": -0.2163441777229309, "fcm_dpo/margin": 1.1578905582427979, "fcm_dpo/q_t": 0.3664587140083313, "grad_norm": 113.58149719238281, "learning_rate": 4.89114813497619e-07, "logits/chosen": 0.18245506286621094, "logits/rejected": 0.12739111483097076, "logps/chosen": -58.94036102294922, "logps/ref_chosen": -57.116085052490234, "logps/ref_rejected": -87.93074035644531, "logps/rejected": -90.91291046142578, "loss": 0.9841, "margin_dpo/margin_mean": 1.15788996219635, "margin_dpo/margin_std": 1.3284329175949097, "step": 124 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 0.5057904720306396, "fcm_dpo/delta": -0.11596319824457169, "fcm_dpo/margin": 1.007077693939209, "fcm_dpo/q_t": 0.3886137008666992, "grad_norm": 125.64469909667969, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.1723092794418335, "logits/rejected": 0.11974655091762543, "logps/chosen": -67.64098358154297, "logps/ref_chosen": -65.7061767578125, "logps/ref_rejected": -91.72711944580078, "logps/rejected": -94.66900634765625, "loss": 1.0564, "margin_dpo/margin_mean": 1.0070770978927612, "margin_dpo/margin_std": 1.4230128526687622, "step": 125 }, { "epoch": 0.19047619047619047, "fcm_dpo/beta": 0.5007922649383545, "fcm_dpo/delta": -0.04253540188074112, "fcm_dpo/margin": 0.8797393441200256, "fcm_dpo/q_t": 0.41190239787101746, "grad_norm": 117.30374908447266, "learning_rate": 4.883296295573176e-07, "logits/chosen": 0.020032621920108795, "logits/rejected": 0.01394350454211235, "logps/chosen": -69.92984771728516, "logps/ref_chosen": -68.17608642578125, "logps/ref_rejected": -65.1175537109375, "logps/rejected": -67.75105285644531, "loss": 1.1541, "margin_dpo/margin_mean": 0.8797396421432495, "margin_dpo/margin_std": 1.8252670764923096, "step": 126 }, { "epoch": 0.19198790627362056, "fcm_dpo/beta": 0.48407071828842163, "fcm_dpo/delta": -0.1724184900522232, "fcm_dpo/margin": 1.157707691192627, "fcm_dpo/q_t": 0.3734918236732483, "grad_norm": 111.56476593017578, "learning_rate": 4.87927032161552e-07, "logits/chosen": 0.10429647564888, "logits/rejected": 0.07567700743675232, "logps/chosen": -63.79847717285156, "logps/ref_chosen": -61.88023376464844, "logps/ref_rejected": -68.46012878417969, "logps/rejected": -71.53607177734375, "loss": 0.9958, "margin_dpo/margin_mean": 1.1577074527740479, "margin_dpo/margin_std": 1.2988379001617432, "step": 127 }, { "epoch": 0.19349962207105065, "fcm_dpo/beta": 0.4786633849143982, "fcm_dpo/delta": -0.03551746904850006, "fcm_dpo/margin": 0.9054340124130249, "fcm_dpo/q_t": 0.4062952995300293, "grad_norm": 124.49673461914062, "learning_rate": 4.875177794352363e-07, "logits/chosen": 0.12086290121078491, "logits/rejected": 0.07124130427837372, "logps/chosen": -68.7914810180664, "logps/ref_chosen": -66.708984375, "logps/ref_rejected": -94.97969055175781, "logps/rejected": -97.9676284790039, "loss": 1.1534, "margin_dpo/margin_mean": 0.9054335355758667, "margin_dpo/margin_std": 1.7206192016601562, "step": 128 }, { "epoch": 0.19501133786848074, "fcm_dpo/beta": 0.4809413254261017, "fcm_dpo/delta": 0.016113094985485077, "fcm_dpo/margin": 0.7994933128356934, "fcm_dpo/q_t": 0.4160780906677246, "grad_norm": 132.9231414794922, "learning_rate": 4.871018828260491e-07, "logits/chosen": 0.1366616189479828, "logits/rejected": 0.12947385013103485, "logps/chosen": -67.60844421386719, "logps/ref_chosen": -65.33882904052734, "logps/ref_rejected": -68.06109619140625, "logps/rejected": -71.13020324707031, "loss": 1.156, "margin_dpo/margin_mean": 0.7994937896728516, "margin_dpo/margin_std": 1.5029575824737549, "step": 129 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 0.47079285979270935, "fcm_dpo/delta": -0.08745460212230682, "fcm_dpo/margin": 1.020914912223816, "fcm_dpo/q_t": 0.3913637697696686, "grad_norm": 128.69020080566406, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.08387620002031326, "logits/rejected": 0.04010556638240814, "logps/chosen": -60.71220016479492, "logps/ref_chosen": -58.660743713378906, "logps/ref_rejected": -79.24510192871094, "logps/rejected": -82.31746673583984, "loss": 1.0497, "margin_dpo/margin_mean": 1.020914912223816, "margin_dpo/margin_std": 1.254804253578186, "step": 130 }, { "epoch": 0.1980347694633409, "fcm_dpo/beta": 0.4654075503349304, "fcm_dpo/delta": -0.12034067511558533, "fcm_dpo/margin": 1.1049726009368896, "fcm_dpo/q_t": 0.3882097601890564, "grad_norm": 114.07759094238281, "learning_rate": 4.86250204678667e-07, "logits/chosen": 0.12750566005706787, "logits/rejected": 0.06975096464157104, "logps/chosen": -54.52845764160156, "logps/ref_chosen": -52.51453399658203, "logps/ref_rejected": -85.18299865722656, "logps/rejected": -88.30189514160156, "loss": 1.0891, "margin_dpo/margin_mean": 1.1049723625183105, "margin_dpo/margin_std": 1.7650415897369385, "step": 131 }, { "epoch": 0.19954648526077098, "fcm_dpo/beta": 0.4587087631225586, "fcm_dpo/delta": -0.04479576647281647, "fcm_dpo/margin": 0.9653003215789795, "fcm_dpo/q_t": 0.4012880325317383, "grad_norm": 122.89605712890625, "learning_rate": 4.858144469637408e-07, "logits/chosen": 0.21066978573799133, "logits/rejected": 0.1797289103269577, "logps/chosen": -67.94572448730469, "logps/ref_chosen": -65.68513488769531, "logps/ref_rejected": -69.54120635986328, "logps/rejected": -72.76710510253906, "loss": 1.1302, "margin_dpo/margin_mean": 0.9653001427650452, "margin_dpo/margin_std": 1.7105207443237305, "step": 132 }, { "epoch": 0.20105820105820105, "fcm_dpo/beta": 0.4549636244773865, "fcm_dpo/delta": -0.036287300288677216, "fcm_dpo/margin": 0.9553782939910889, "fcm_dpo/q_t": 0.4041925072669983, "grad_norm": 119.6462173461914, "learning_rate": 4.853720930118138e-07, "logits/chosen": 0.1289050132036209, "logits/rejected": 0.11935198307037354, "logps/chosen": -65.79443359375, "logps/ref_chosen": -63.598114013671875, "logps/ref_rejected": -73.72798156738281, "logps/rejected": -76.87968444824219, "loss": 1.1168, "margin_dpo/margin_mean": 0.955377995967865, "margin_dpo/margin_std": 1.588864803314209, "step": 133 }, { "epoch": 0.20256991685563114, "fcm_dpo/beta": 0.4380050301551819, "fcm_dpo/delta": -0.21895548701286316, "fcm_dpo/margin": 1.3798502683639526, "fcm_dpo/q_t": 0.3665582835674286, "grad_norm": 100.31354522705078, "learning_rate": 4.849231551964771e-07, "logits/chosen": 0.2302895486354828, "logits/rejected": 0.17770959436893463, "logps/chosen": -55.90137481689453, "logps/ref_chosen": -53.79457092285156, "logps/ref_rejected": -74.16741943359375, "logps/rejected": -77.65406799316406, "loss": 0.9906, "margin_dpo/margin_mean": 1.3798508644104004, "margin_dpo/margin_std": 1.6743557453155518, "step": 134 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.436504989862442, "fcm_dpo/delta": 0.015902848914265633, "fcm_dpo/margin": 0.8812652230262756, "fcm_dpo/q_t": 0.41663700342178345, "grad_norm": 100.42858123779297, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.16259633004665375, "logits/rejected": 0.13083291053771973, "logps/chosen": -51.5870475769043, "logps/ref_chosen": -49.441078186035156, "logps/ref_rejected": -65.96878051757812, "logps/rejected": -68.99601745605469, "loss": 1.1571, "margin_dpo/margin_mean": 0.8812651634216309, "margin_dpo/margin_std": 1.672384262084961, "step": 135 }, { "epoch": 0.20559334845049132, "fcm_dpo/beta": 0.4303373098373413, "fcm_dpo/delta": -0.09148843586444855, "fcm_dpo/margin": 1.1316370964050293, "fcm_dpo/q_t": 0.40487587451934814, "grad_norm": 126.06951141357422, "learning_rate": 4.840055783904106e-07, "logits/chosen": 0.12648090720176697, "logits/rejected": 0.06315163522958755, "logps/chosen": -69.26954650878906, "logps/ref_chosen": -66.75926208496094, "logps/ref_rejected": -94.61787414550781, "logps/rejected": -98.25979614257812, "loss": 1.1621, "margin_dpo/margin_mean": 1.1316382884979248, "margin_dpo/margin_std": 2.3219375610351562, "step": 136 }, { "epoch": 0.20710506424792138, "fcm_dpo/beta": 0.4227127432823181, "fcm_dpo/delta": -0.10718655586242676, "fcm_dpo/margin": 1.1874181032180786, "fcm_dpo/q_t": 0.3915935456752777, "grad_norm": 96.57810974121094, "learning_rate": 4.835369650662767e-07, "logits/chosen": 0.1805136352777481, "logits/rejected": 0.15341074764728546, "logps/chosen": -59.14149856567383, "logps/ref_chosen": -56.78379821777344, "logps/ref_rejected": -69.89952087402344, "logps/rejected": -73.44464111328125, "loss": 1.0832, "margin_dpo/margin_mean": 1.1874182224273682, "margin_dpo/margin_std": 1.862781047821045, "step": 137 }, { "epoch": 0.20861678004535147, "fcm_dpo/beta": 0.4196794033050537, "fcm_dpo/delta": 0.003834410570561886, "fcm_dpo/margin": 0.9443340301513672, "fcm_dpo/q_t": 0.41209620237350464, "grad_norm": 105.36014556884766, "learning_rate": 4.830618192112065e-07, "logits/chosen": 0.15982270240783691, "logits/rejected": 0.12580767273902893, "logps/chosen": -61.54125213623047, "logps/ref_chosen": -58.766014099121094, "logps/ref_rejected": -68.12371826171875, "logps/rejected": -71.84329223632812, "loss": 1.1579, "margin_dpo/margin_mean": 0.9443341493606567, "margin_dpo/margin_std": 1.7914925813674927, "step": 138 }, { "epoch": 0.21012849584278157, "fcm_dpo/beta": 0.4146318733692169, "fcm_dpo/delta": -0.1286012828350067, "fcm_dpo/margin": 1.2585363388061523, "fcm_dpo/q_t": 0.3855533003807068, "grad_norm": 105.36235809326172, "learning_rate": 4.825801541160509e-07, "logits/chosen": 0.12006682902574539, "logits/rejected": 0.0935576856136322, "logps/chosen": -73.93391418457031, "logps/ref_chosen": -71.2255859375, "logps/ref_rejected": -82.1834716796875, "logps/rejected": -86.15032958984375, "loss": 1.0427, "margin_dpo/margin_mean": 1.2585363388061523, "margin_dpo/margin_std": 1.6681911945343018, "step": 139 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.39565181732177734, "fcm_dpo/delta": -0.22462356090545654, "fcm_dpo/margin": 1.5439527034759521, "fcm_dpo/q_t": 0.36816489696502686, "grad_norm": 110.28506469726562, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.13060888648033142, "logits/rejected": 0.08857063204050064, "logps/chosen": -65.80934143066406, "logps/ref_chosen": -63.27766418457031, "logps/ref_rejected": -83.30647277832031, "logps/rejected": -87.3821029663086, "loss": 1.059, "margin_dpo/margin_mean": 1.5439517498016357, "margin_dpo/margin_std": 2.388321876525879, "step": 140 }, { "epoch": 0.21315192743764172, "fcm_dpo/beta": 0.3803204894065857, "fcm_dpo/delta": -0.143003448843956, "fcm_dpo/margin": 1.4029268026351929, "fcm_dpo/q_t": 0.3864386975765228, "grad_norm": 98.1742935180664, "learning_rate": 4.815973202802966e-07, "logits/chosen": 0.17304059863090515, "logits/rejected": 0.13255923986434937, "logps/chosen": -64.45763397216797, "logps/ref_chosen": -61.76676940917969, "logps/ref_rejected": -88.60601806640625, "logps/rejected": -92.69981384277344, "loss": 1.0631, "margin_dpo/margin_mean": 1.4029275178909302, "margin_dpo/margin_std": 2.0814921855926514, "step": 141 }, { "epoch": 0.2146636432350718, "fcm_dpo/beta": 0.3807763159275055, "fcm_dpo/delta": 0.0015191948041319847, "fcm_dpo/margin": 1.0465271472930908, "fcm_dpo/q_t": 0.41167542338371277, "grad_norm": 96.54689025878906, "learning_rate": 4.810961790316729e-07, "logits/chosen": 0.15406419336795807, "logits/rejected": 0.13056586682796478, "logps/chosen": -67.98941802978516, "logps/ref_chosen": -65.2747802734375, "logps/ref_rejected": -81.1378173828125, "logps/rejected": -84.89898681640625, "loss": 1.1335, "margin_dpo/margin_mean": 1.0465269088745117, "margin_dpo/margin_std": 1.7925764322280884, "step": 142 }, { "epoch": 0.2161753590325019, "fcm_dpo/beta": 0.37847280502319336, "fcm_dpo/delta": -0.016717037186026573, "fcm_dpo/margin": 1.0988901853561401, "fcm_dpo/q_t": 0.4074873626232147, "grad_norm": 112.29006958007812, "learning_rate": 4.805885735261454e-07, "logits/chosen": 0.16635353863239288, "logits/rejected": 0.15071120858192444, "logps/chosen": -65.32980346679688, "logps/ref_chosen": -62.617828369140625, "logps/ref_rejected": -70.39239501953125, "logps/rejected": -74.20326232910156, "loss": 1.1593, "margin_dpo/margin_mean": 1.0988903045654297, "margin_dpo/margin_std": 2.125783681869507, "step": 143 }, { "epoch": 0.21768707482993196, "fcm_dpo/beta": 0.3751413822174072, "fcm_dpo/delta": -0.06344564259052277, "fcm_dpo/margin": 1.227471113204956, "fcm_dpo/q_t": 0.3995649814605713, "grad_norm": 101.40666198730469, "learning_rate": 4.800745179625307e-07, "logits/chosen": 0.15766915678977966, "logits/rejected": 0.13041952252388, "logps/chosen": -63.764076232910156, "logps/ref_chosen": -60.80268859863281, "logps/ref_rejected": -79.07284545898438, "logps/rejected": -83.26170349121094, "loss": 1.1134, "margin_dpo/margin_mean": 1.2274713516235352, "margin_dpo/margin_std": 2.0769271850585938, "step": 144 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.3703409433364868, "fcm_dpo/delta": -0.06774787604808807, "fcm_dpo/margin": 1.2544803619384766, "fcm_dpo/q_t": 0.4007543623447418, "grad_norm": 111.68207550048828, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.11246728897094727, "logits/rejected": 0.12925654649734497, "logps/chosen": -77.45028686523438, "logps/ref_chosen": -74.61146545410156, "logps/ref_rejected": -83.24461364746094, "logps/rejected": -87.3379135131836, "loss": 1.1278, "margin_dpo/margin_mean": 1.2544798851013184, "margin_dpo/margin_std": 2.258026599884033, "step": 145 }, { "epoch": 0.22071050642479215, "fcm_dpo/beta": 0.3645484447479248, "fcm_dpo/delta": -0.09541298449039459, "fcm_dpo/margin": 1.3463966846466064, "fcm_dpo/q_t": 0.3941432237625122, "grad_norm": 92.99526977539062, "learning_rate": 4.790271143580173e-07, "logits/chosen": 0.1032009869813919, "logits/rejected": 0.0877787172794342, "logps/chosen": -60.48515701293945, "logps/ref_chosen": -57.84098434448242, "logps/ref_rejected": -67.47422790527344, "logps/rejected": -71.46479034423828, "loss": 1.082, "margin_dpo/margin_mean": 1.3463963270187378, "margin_dpo/margin_std": 2.091710090637207, "step": 146 }, { "epoch": 0.2222222222222222, "fcm_dpo/beta": 0.36428195238113403, "fcm_dpo/delta": 0.020590590313076973, "fcm_dpo/margin": 1.0431098937988281, "fcm_dpo/q_t": 0.41619163751602173, "grad_norm": 116.23778533935547, "learning_rate": 4.784937956152489e-07, "logits/chosen": 0.13601523637771606, "logits/rejected": 0.09667371958494186, "logps/chosen": -69.94081115722656, "logps/ref_chosen": -66.81346893310547, "logps/ref_rejected": -81.1796875, "logps/rejected": -85.35014343261719, "loss": 1.1814, "margin_dpo/margin_mean": 1.0431100130081177, "margin_dpo/margin_std": 2.160703659057617, "step": 147 }, { "epoch": 0.2237339380196523, "fcm_dpo/beta": 0.3580806255340576, "fcm_dpo/delta": -0.1336955428123474, "fcm_dpo/margin": 1.4712722301483154, "fcm_dpo/q_t": 0.3866221308708191, "grad_norm": 77.85800170898438, "learning_rate": 4.779540854098347e-07, "logits/chosen": 0.23744139075279236, "logits/rejected": 0.1707296371459961, "logps/chosen": -51.66542053222656, "logps/ref_chosen": -48.6877555847168, "logps/ref_rejected": -67.50503540039062, "logps/rejected": -71.95396423339844, "loss": 1.07, "margin_dpo/margin_mean": 1.4712722301483154, "margin_dpo/margin_std": 2.22235369682312, "step": 148 }, { "epoch": 0.2252456538170824, "fcm_dpo/beta": 0.3439862132072449, "fcm_dpo/delta": -0.19360411167144775, "fcm_dpo/margin": 1.6928032636642456, "fcm_dpo/q_t": 0.3755077123641968, "grad_norm": 79.94820404052734, "learning_rate": 4.774079988386296e-07, "logits/chosen": 0.12722453474998474, "logits/rejected": 0.08169707655906677, "logps/chosen": -58.75447082519531, "logps/ref_chosen": -55.143775939941406, "logps/ref_rejected": -64.79888916015625, "logps/rejected": -70.10238647460938, "loss": 1.0219, "margin_dpo/margin_mean": 1.6928033828735352, "margin_dpo/margin_std": 2.312622547149658, "step": 149 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.32747161388397217, "fcm_dpo/delta": -0.2582157552242279, "fcm_dpo/margin": 1.9585295915603638, "fcm_dpo/q_t": 0.35701072216033936, "grad_norm": 76.83651733398438, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.15965455770492554, "logits/rejected": 0.11961585283279419, "logps/chosen": -70.1795425415039, "logps/ref_chosen": -67.47074890136719, "logps/ref_rejected": -89.21170806884766, "logps/rejected": -93.8790283203125, "loss": 0.9533, "margin_dpo/margin_mean": 1.9585298299789429, "margin_dpo/margin_std": 2.1194467544555664, "step": 150 }, { "epoch": 0.22826908541194255, "fcm_dpo/beta": 0.3103789687156677, "fcm_dpo/delta": -0.2853265702724457, "fcm_dpo/margin": 2.147468328475952, "fcm_dpo/q_t": 0.3529084324836731, "grad_norm": 64.65057373046875, "learning_rate": 4.762967578776406e-07, "logits/chosen": 0.16163957118988037, "logits/rejected": 0.11220911145210266, "logps/chosen": -55.12104034423828, "logps/ref_chosen": -52.45954132080078, "logps/ref_rejected": -79.0630111694336, "logps/rejected": -83.87197875976562, "loss": 0.9339, "margin_dpo/margin_mean": 2.147468328475952, "margin_dpo/margin_std": 2.266123056411743, "step": 151 }, { "epoch": 0.22978080120937264, "fcm_dpo/beta": 0.30170968174934387, "fcm_dpo/delta": -0.11586311459541321, "fcm_dpo/margin": 1.69052255153656, "fcm_dpo/q_t": 0.3897024095058441, "grad_norm": 74.69120025634766, "learning_rate": 4.757316345716553e-07, "logits/chosen": 0.20322634279727936, "logits/rejected": 0.1573318988084793, "logps/chosen": -59.99116134643555, "logps/ref_chosen": -56.5538330078125, "logps/ref_rejected": -76.55074310302734, "logps/rejected": -81.6785888671875, "loss": 1.0747, "margin_dpo/margin_mean": 1.6905221939086914, "margin_dpo/margin_std": 2.551938533782959, "step": 152 }, { "epoch": 0.23129251700680273, "fcm_dpo/beta": 0.29183411598205566, "fcm_dpo/delta": -0.13964568078517914, "fcm_dpo/margin": 1.8225746154785156, "fcm_dpo/q_t": 0.37922757863998413, "grad_norm": 68.27378845214844, "learning_rate": 4.751601970666064e-07, "logits/chosen": 0.1238468810915947, "logits/rejected": 0.08851639926433563, "logps/chosen": -71.23893737792969, "logps/ref_chosen": -68.00689697265625, "logps/ref_rejected": -74.83482360839844, "logps/rejected": -79.88944244384766, "loss": 1.01, "margin_dpo/margin_mean": 1.8225750923156738, "margin_dpo/margin_std": 2.1173226833343506, "step": 153 }, { "epoch": 0.2328042328042328, "fcm_dpo/beta": 0.29103392362594604, "fcm_dpo/delta": 0.008851571008563042, "fcm_dpo/margin": 1.3450570106506348, "fcm_dpo/q_t": 0.4131838083267212, "grad_norm": 72.07164001464844, "learning_rate": 4.745824613468292e-07, "logits/chosen": 0.22070343792438507, "logits/rejected": 0.21719685196876526, "logps/chosen": -63.0831413269043, "logps/ref_chosen": -59.222537994384766, "logps/ref_rejected": -64.19131469726562, "logps/rejected": -69.39697265625, "loss": 1.178, "margin_dpo/margin_mean": 1.3450572490692139, "margin_dpo/margin_std": 2.7515459060668945, "step": 154 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.2854927182197571, "fcm_dpo/delta": -0.09956204891204834, "fcm_dpo/margin": 1.7304502725601196, "fcm_dpo/q_t": 0.3943309783935547, "grad_norm": 74.5228500366211, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.20225617289543152, "logits/rejected": 0.18362796306610107, "logps/chosen": -72.20230102539062, "logps/ref_chosen": -68.45469665527344, "logps/ref_rejected": -77.91763305664062, "logps/rejected": -83.39569091796875, "loss": 1.1098, "margin_dpo/margin_mean": 1.73045015335083, "margin_dpo/margin_std": 2.9399399757385254, "step": 155 }, { "epoch": 0.23582766439909297, "fcm_dpo/beta": 0.27768462896347046, "fcm_dpo/delta": -0.1894133985042572, "fcm_dpo/margin": 2.0840401649475098, "fcm_dpo/q_t": 0.3746188282966614, "grad_norm": 72.98139953613281, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 0.14099450409412384, "logits/rejected": 0.10012276470661163, "logps/chosen": -71.08142852783203, "logps/ref_chosen": -67.26959991455078, "logps/ref_rejected": -86.95914459228516, "logps/rejected": -92.85501098632812, "loss": 1.0051, "margin_dpo/margin_mean": 2.084041118621826, "margin_dpo/margin_std": 2.6175365447998047, "step": 156 }, { "epoch": 0.23733938019652306, "fcm_dpo/beta": 0.2693888545036316, "fcm_dpo/delta": -0.06476490199565887, "fcm_dpo/margin": 1.7060657739639282, "fcm_dpo/q_t": 0.39782166481018066, "grad_norm": 64.5011215209961, "learning_rate": 4.728116273823847e-07, "logits/chosen": 0.1516086757183075, "logits/rejected": 0.13197970390319824, "logps/chosen": -58.35748291015625, "logps/ref_chosen": -54.77287292480469, "logps/ref_rejected": -63.87866973876953, "logps/rejected": -69.16934204101562, "loss": 1.0873, "margin_dpo/margin_mean": 1.7060656547546387, "margin_dpo/margin_std": 2.5072410106658936, "step": 157 }, { "epoch": 0.23885109599395313, "fcm_dpo/beta": 0.2680598497390747, "fcm_dpo/delta": -0.061768539249897, "fcm_dpo/margin": 1.7115424871444702, "fcm_dpo/q_t": 0.397410124540329, "grad_norm": 68.72582244873047, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 0.19212478399276733, "logits/rejected": 0.1591942310333252, "logps/chosen": -68.83773040771484, "logps/ref_chosen": -64.92271423339844, "logps/ref_rejected": -82.23789978027344, "logps/rejected": -87.86445617675781, "loss": 1.0808, "margin_dpo/margin_mean": 1.7115428447723389, "margin_dpo/margin_std": 2.4922690391540527, "step": 158 }, { "epoch": 0.24036281179138322, "fcm_dpo/beta": 0.26532524824142456, "fcm_dpo/delta": -0.10670725256204605, "fcm_dpo/margin": 1.888586401939392, "fcm_dpo/q_t": 0.38828045129776, "grad_norm": 75.0799560546875, "learning_rate": 4.715998812855304e-07, "logits/chosen": 0.19417089223861694, "logits/rejected": 0.1603868454694748, "logps/chosen": -61.159812927246094, "logps/ref_chosen": -57.046993255615234, "logps/ref_rejected": -73.32441711425781, "logps/rejected": -79.32582092285156, "loss": 1.0816, "margin_dpo/margin_mean": 1.8885865211486816, "margin_dpo/margin_std": 2.8942010402679443, "step": 159 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.25893402099609375, "fcm_dpo/delta": -0.09484230726957321, "fcm_dpo/margin": 1.8935188055038452, "fcm_dpo/q_t": 0.39680206775665283, "grad_norm": 60.400577545166016, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.07180037349462509, "logits/rejected": 0.03388737514615059, "logps/chosen": -54.395286560058594, "logps/ref_chosen": -49.806915283203125, "logps/ref_rejected": -68.3370132446289, "logps/rejected": -74.81890869140625, "loss": 1.1, "margin_dpo/margin_mean": 1.8935189247131348, "margin_dpo/margin_std": 3.1547999382019043, "step": 160 }, { "epoch": 0.24338624338624337, "fcm_dpo/beta": 0.2536158561706543, "fcm_dpo/delta": -0.11104996502399445, "fcm_dpo/margin": 1.9934735298156738, "fcm_dpo/q_t": 0.3908805251121521, "grad_norm": 59.25609588623047, "learning_rate": 4.703633408618955e-07, "logits/chosen": 0.19538050889968872, "logits/rejected": 0.15980537235736847, "logps/chosen": -57.10566329956055, "logps/ref_chosen": -52.50048828125, "logps/ref_rejected": -66.04540252685547, "logps/rejected": -72.64404296875, "loss": 1.0799, "margin_dpo/margin_mean": 1.9934736490249634, "margin_dpo/margin_std": 3.0931880474090576, "step": 161 }, { "epoch": 0.24489795918367346, "fcm_dpo/beta": 0.2403220385313034, "fcm_dpo/delta": -0.2895709276199341, "fcm_dpo/margin": 2.785764217376709, "fcm_dpo/q_t": 0.35252922773361206, "grad_norm": 58.1529426574707, "learning_rate": 4.697358159051549e-07, "logits/chosen": 0.21992871165275574, "logits/rejected": 0.1757911592721939, "logps/chosen": -74.54412841796875, "logps/ref_chosen": -69.46919250488281, "logps/ref_rejected": -92.00952911376953, "logps/rejected": -99.87022399902344, "loss": 0.9441, "margin_dpo/margin_mean": 2.7857649326324463, "margin_dpo/margin_std": 3.090634822845459, "step": 162 }, { "epoch": 0.24640967498110355, "fcm_dpo/beta": 0.23066341876983643, "fcm_dpo/delta": -0.25370314717292786, "fcm_dpo/margin": 2.7662220001220703, "fcm_dpo/q_t": 0.3620498776435852, "grad_norm": 55.00440216064453, "learning_rate": 4.691021444652876e-07, "logits/chosen": 0.1866772174835205, "logits/rejected": 0.1429206132888794, "logps/chosen": -55.19199752807617, "logps/ref_chosen": -50.613834381103516, "logps/ref_rejected": -74.62033081054688, "logps/rejected": -81.96471405029297, "loss": 0.9916, "margin_dpo/margin_mean": 2.7662222385406494, "margin_dpo/margin_std": 3.4099013805389404, "step": 163 }, { "epoch": 0.24792139077853365, "fcm_dpo/beta": 0.2187848836183548, "fcm_dpo/delta": -0.20979532599449158, "fcm_dpo/margin": 2.730527400970459, "fcm_dpo/q_t": 0.3718702793121338, "grad_norm": 51.946624755859375, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 0.16851532459259033, "logits/rejected": 0.11055172979831696, "logps/chosen": -60.02857971191406, "logps/ref_chosen": -54.848114013671875, "logps/ref_rejected": -79.0630111694336, "logps/rejected": -86.9739990234375, "loss": 1.0185, "margin_dpo/margin_mean": 2.730526924133301, "margin_dpo/margin_std": 3.5926055908203125, "step": 164 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.21285484731197357, "fcm_dpo/delta": -0.14022870361804962, "fcm_dpo/margin": 2.503763437271118, "fcm_dpo/q_t": 0.38059890270233154, "grad_norm": 50.90439987182617, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.25604766607284546, "logits/rejected": 0.2025667428970337, "logps/chosen": -56.66874694824219, "logps/ref_chosen": -51.089210510253906, "logps/ref_rejected": -71.23370361328125, "logps/rejected": -79.31700134277344, "loss": 1.0262, "margin_dpo/margin_mean": 2.503763198852539, "margin_dpo/margin_std": 3.0885088443756104, "step": 165 }, { "epoch": 0.2509448223733938, "fcm_dpo/beta": 0.2094953954219818, "fcm_dpo/delta": -0.01251722127199173, "fcm_dpo/margin": 1.966209053993225, "fcm_dpo/q_t": 0.40979406237602234, "grad_norm": 57.34663391113281, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 0.193076953291893, "logits/rejected": 0.11284859478473663, "logps/chosen": -68.62454223632812, "logps/ref_chosen": -63.19081115722656, "logps/ref_rejected": -93.8402099609375, "logps/rejected": -101.24014282226562, "loss": 1.1205, "margin_dpo/margin_mean": 1.9662084579467773, "margin_dpo/margin_std": 3.232795476913452, "step": 166 }, { "epoch": 0.25245653817082386, "fcm_dpo/beta": 0.20221662521362305, "fcm_dpo/delta": -0.20763254165649414, "fcm_dpo/margin": 2.9351630210876465, "fcm_dpo/q_t": 0.36785978078842163, "grad_norm": 45.256717681884766, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 0.15891437232494354, "logits/rejected": 0.12511295080184937, "logps/chosen": -64.05963897705078, "logps/ref_chosen": -58.92427062988281, "logps/ref_rejected": -72.97377014160156, "logps/rejected": -81.04429626464844, "loss": 0.9871, "margin_dpo/margin_mean": 2.9351630210876465, "margin_dpo/margin_std": 3.427379846572876, "step": 167 }, { "epoch": 0.25396825396825395, "fcm_dpo/beta": 0.2014435976743698, "fcm_dpo/delta": 0.011637402698397636, "fcm_dpo/margin": 1.930140733718872, "fcm_dpo/q_t": 0.41260749101638794, "grad_norm": 55.73114776611328, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 0.16870692372322083, "logits/rejected": 0.1389564573764801, "logps/chosen": -71.93734741210938, "logps/ref_chosen": -65.65138244628906, "logps/ref_rejected": -79.71418762207031, "logps/rejected": -87.9302978515625, "loss": 1.122, "margin_dpo/margin_mean": 1.930140495300293, "margin_dpo/margin_std": 3.016066074371338, "step": 168 }, { "epoch": 0.25547996976568405, "fcm_dpo/beta": 0.19683048129081726, "fcm_dpo/delta": -0.16538755595684052, "fcm_dpo/margin": 2.8258461952209473, "fcm_dpo/q_t": 0.3836873173713684, "grad_norm": 51.384742736816406, "learning_rate": 4.651720442612075e-07, "logits/chosen": 0.22757534682750702, "logits/rejected": 0.19626004993915558, "logps/chosen": -67.05967712402344, "logps/ref_chosen": -61.425865173339844, "logps/ref_rejected": -76.09590148925781, "logps/rejected": -84.55555725097656, "loss": 1.0461, "margin_dpo/margin_mean": 2.825845956802368, "margin_dpo/margin_std": 4.196510314941406, "step": 169 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.1929258108139038, "fcm_dpo/delta": -0.09032995253801346, "fcm_dpo/margin": 2.5192790031433105, "fcm_dpo/q_t": 0.3924364447593689, "grad_norm": 43.95397186279297, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.1608143299818039, "logits/rejected": 0.15858882665634155, "logps/chosen": -62.605133056640625, "logps/ref_chosen": -56.65319061279297, "logps/ref_rejected": -63.45965576171875, "logps/rejected": -71.93087768554688, "loss": 1.0914, "margin_dpo/margin_mean": 2.5192790031433105, "margin_dpo/margin_std": 3.963756561279297, "step": 170 }, { "epoch": 0.2585034013605442, "fcm_dpo/beta": 0.18473538756370544, "fcm_dpo/delta": -0.18784065544605255, "fcm_dpo/margin": 3.1072592735290527, "fcm_dpo/q_t": 0.37474268674850464, "grad_norm": 49.094505310058594, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 0.25203442573547363, "logits/rejected": 0.2046370506286621, "logps/chosen": -70.01921844482422, "logps/ref_chosen": -63.73476028442383, "logps/ref_rejected": -78.50328063964844, "logps/rejected": -87.89498901367188, "loss": 1.0471, "margin_dpo/margin_mean": 3.107259750366211, "margin_dpo/margin_std": 4.45649528503418, "step": 171 }, { "epoch": 0.2600151171579743, "fcm_dpo/beta": 0.18111056089401245, "fcm_dpo/delta": -0.18078723549842834, "fcm_dpo/margin": 3.1506669521331787, "fcm_dpo/q_t": 0.3735198378562927, "grad_norm": 42.36780548095703, "learning_rate": 4.631254907558365e-07, "logits/chosen": 0.2554565668106079, "logits/rejected": 0.2034720778465271, "logps/chosen": -59.11717224121094, "logps/ref_chosen": -52.201759338378906, "logps/ref_rejected": -82.85285949707031, "logps/rejected": -92.91893768310547, "loss": 1.0257, "margin_dpo/margin_mean": 3.1506664752960205, "margin_dpo/margin_std": 4.125787258148193, "step": 172 }, { "epoch": 0.2615268329554044, "fcm_dpo/beta": 0.1714455783367157, "fcm_dpo/delta": -0.1695910543203354, "fcm_dpo/margin": 3.238369941711426, "fcm_dpo/q_t": 0.38560813665390015, "grad_norm": 41.579505920410156, "learning_rate": 4.624313574873786e-07, "logits/chosen": 0.24041128158569336, "logits/rejected": 0.15953364968299866, "logps/chosen": -62.48090362548828, "logps/ref_chosen": -55.434722900390625, "logps/ref_rejected": -77.81967163085938, "logps/rejected": -88.1042251586914, "loss": 1.1006, "margin_dpo/margin_mean": 3.238370180130005, "margin_dpo/margin_std": 5.3449625968933105, "step": 173 }, { "epoch": 0.26303854875283444, "fcm_dpo/beta": 0.16659438610076904, "fcm_dpo/delta": -0.20445415377616882, "fcm_dpo/margin": 3.552804708480835, "fcm_dpo/q_t": 0.37425845861434937, "grad_norm": 44.64958953857422, "learning_rate": 4.61731282057198e-07, "logits/chosen": 0.2262486070394516, "logits/rejected": 0.16458037495613098, "logps/chosen": -64.90945434570312, "logps/ref_chosen": -57.17195129394531, "logps/ref_rejected": -85.47578430175781, "logps/rejected": -96.76608276367188, "loss": 1.0443, "margin_dpo/margin_mean": 3.5528039932250977, "margin_dpo/margin_std": 5.244265556335449, "step": 174 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.16002216935157776, "fcm_dpo/delta": -0.2240179479122162, "fcm_dpo/margin": 3.8157334327697754, "fcm_dpo/q_t": 0.373027503490448, "grad_norm": 42.5731086730957, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.28858986496925354, "logits/rejected": 0.25886330008506775, "logps/chosen": -75.49578857421875, "logps/ref_chosen": -67.6656265258789, "logps/ref_rejected": -84.36766815185547, "logps/rejected": -96.01356506347656, "loss": 1.0347, "margin_dpo/margin_mean": 3.8157334327697754, "margin_dpo/margin_std": 5.4957990646362305, "step": 175 }, { "epoch": 0.2660619803476946, "fcm_dpo/beta": 0.15674875676631927, "fcm_dpo/delta": -0.012624900788068771, "fcm_dpo/margin": 2.624541997909546, "fcm_dpo/q_t": 0.4133697748184204, "grad_norm": 50.7176513671875, "learning_rate": 4.603133832077953e-07, "logits/chosen": 0.2168986052274704, "logits/rejected": 0.19173604249954224, "logps/chosen": -86.61616516113281, "logps/ref_chosen": -77.8587646484375, "logps/ref_rejected": -81.08732604980469, "logps/rejected": -92.46926879882812, "loss": 1.1886, "margin_dpo/margin_mean": 2.624541759490967, "margin_dpo/margin_std": 5.590775012969971, "step": 176 }, { "epoch": 0.2675736961451247, "fcm_dpo/beta": 0.14755460619926453, "fcm_dpo/delta": -0.40747910737991333, "fcm_dpo/margin": 5.241545677185059, "fcm_dpo/q_t": 0.32991600036621094, "grad_norm": 42.354209899902344, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 0.3358641266822815, "logits/rejected": 0.2448875606060028, "logps/chosen": -63.21653747558594, "logps/ref_chosen": -55.22039794921875, "logps/ref_rejected": -92.54973602294922, "logps/rejected": -105.78742980957031, "loss": 0.8743, "margin_dpo/margin_mean": 5.241544723510742, "margin_dpo/margin_std": 5.077418327331543, "step": 177 }, { "epoch": 0.2690854119425548, "fcm_dpo/beta": 0.14272984862327576, "fcm_dpo/delta": -0.0517147034406662, "fcm_dpo/margin": 3.132704734802246, "fcm_dpo/q_t": 0.4003003239631653, "grad_norm": 39.6386833190918, "learning_rate": 4.588719528532341e-07, "logits/chosen": 0.20977315306663513, "logits/rejected": 0.1633778065443039, "logps/chosen": -69.47108459472656, "logps/ref_chosen": -60.81049346923828, "logps/ref_rejected": -81.12973022460938, "logps/rejected": -92.92301940917969, "loss": 1.0947, "margin_dpo/margin_mean": 3.1327052116394043, "margin_dpo/margin_std": 4.599012851715088, "step": 178 }, { "epoch": 0.2705971277399849, "fcm_dpo/beta": 0.14164280891418457, "fcm_dpo/delta": -0.10549677163362503, "fcm_dpo/margin": 3.53216290473938, "fcm_dpo/q_t": 0.39201849699020386, "grad_norm": 38.2100944519043, "learning_rate": 4.581424636586928e-07, "logits/chosen": 0.2727736234664917, "logits/rejected": 0.25624555349349976, "logps/chosen": -75.33070373535156, "logps/ref_chosen": -65.67171478271484, "logps/ref_rejected": -75.32586669921875, "logps/rejected": -88.51702880859375, "loss": 1.0975, "margin_dpo/margin_mean": 3.532163143157959, "margin_dpo/margin_std": 5.8223490715026855, "step": 179 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.14003294706344604, "fcm_dpo/delta": -0.05574531853199005, "fcm_dpo/margin": 3.2362074851989746, "fcm_dpo/q_t": 0.4050852060317993, "grad_norm": 37.95149612426758, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.10121668875217438, "logits/rejected": 0.08257490396499634, "logps/chosen": -64.88932037353516, "logps/ref_chosen": -56.68280792236328, "logps/ref_rejected": -64.94414520263672, "logps/rejected": -76.3868637084961, "loss": 1.1465, "margin_dpo/margin_mean": 3.2362074851989746, "margin_dpo/margin_std": 6.117829322814941, "step": 180 }, { "epoch": 0.273620559334845, "fcm_dpo/beta": 0.13333183526992798, "fcm_dpo/delta": -0.28197789192199707, "fcm_dpo/margin": 4.9724249839782715, "fcm_dpo/q_t": 0.352914035320282, "grad_norm": 33.529659271240234, "learning_rate": 4.566660392614228e-07, "logits/chosen": 0.27004173398017883, "logits/rejected": 0.2327122837305069, "logps/chosen": -68.66935729980469, "logps/ref_chosen": -60.77604675292969, "logps/ref_rejected": -83.98361206054688, "logps/rejected": -96.84934997558594, "loss": 0.9329, "margin_dpo/margin_mean": 4.972424507141113, "margin_dpo/margin_std": 5.188924789428711, "step": 181 }, { "epoch": 0.2751322751322751, "fcm_dpo/beta": 0.12604235112667084, "fcm_dpo/delta": -0.2631838917732239, "fcm_dpo/margin": 5.117816925048828, "fcm_dpo/q_t": 0.36235833168029785, "grad_norm": 34.590484619140625, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 0.2485291063785553, "logits/rejected": 0.17378370463848114, "logps/chosen": -69.56755065917969, "logps/ref_chosen": -60.2537841796875, "logps/ref_rejected": -89.7706298828125, "logps/rejected": -104.20220184326172, "loss": 1.0064, "margin_dpo/margin_mean": 5.117816925048828, "margin_dpo/margin_std": 6.874538421630859, "step": 182 }, { "epoch": 0.2766439909297052, "fcm_dpo/beta": 0.12698180973529816, "fcm_dpo/delta": 0.13285091519355774, "fcm_dpo/margin": 2.133739471435547, "fcm_dpo/q_t": 0.4392937421798706, "grad_norm": 36.80066680908203, "learning_rate": 4.551664914523433e-07, "logits/chosen": 0.2542421221733093, "logits/rejected": 0.23342236876487732, "logps/chosen": -72.93169403076172, "logps/ref_chosen": -61.76142120361328, "logps/ref_rejected": -72.54627990722656, "logps/rejected": -85.85029602050781, "loss": 1.253, "margin_dpo/margin_mean": 2.133739471435547, "margin_dpo/margin_std": 5.610200881958008, "step": 183 }, { "epoch": 0.2781557067271353, "fcm_dpo/beta": 0.12451111525297165, "fcm_dpo/delta": -0.13866505026817322, "fcm_dpo/margin": 4.248322486877441, "fcm_dpo/q_t": 0.3827287554740906, "grad_norm": 28.370405197143555, "learning_rate": 4.544080985994258e-07, "logits/chosen": 0.35382214188575745, "logits/rejected": 0.29234981536865234, "logps/chosen": -55.696563720703125, "logps/ref_chosen": -46.840721130371094, "logps/ref_rejected": -69.3609390258789, "logps/rejected": -82.46510314941406, "loss": 1.0227, "margin_dpo/margin_mean": 4.248322486877441, "margin_dpo/margin_std": 5.141722679138184, "step": 184 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.12130877375602722, "fcm_dpo/delta": -0.1432112157344818, "fcm_dpo/margin": 4.402965545654297, "fcm_dpo/q_t": 0.38836491107940674, "grad_norm": 30.318683624267578, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.27195507287979126, "logits/rejected": 0.22637689113616943, "logps/chosen": -62.45649719238281, "logps/ref_chosen": -52.32114028930664, "logps/ref_rejected": -68.3885726928711, "logps/rejected": -82.92689514160156, "loss": 1.1076, "margin_dpo/margin_mean": 4.402966022491455, "margin_dpo/margin_std": 7.5162458419799805, "step": 185 }, { "epoch": 0.2811791383219955, "fcm_dpo/beta": 0.11787950992584229, "fcm_dpo/delta": -0.1722513735294342, "fcm_dpo/margin": 4.769825458526611, "fcm_dpo/q_t": 0.3829618990421295, "grad_norm": 36.4186897277832, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 0.19488704204559326, "logits/rejected": 0.15547996759414673, "logps/chosen": -77.1296157836914, "logps/ref_chosen": -67.42012786865234, "logps/ref_rejected": -82.50968933105469, "logps/rejected": -96.98900604248047, "loss": 1.0768, "margin_dpo/margin_mean": 4.7698259353637695, "margin_dpo/margin_std": 7.652778625488281, "step": 186 }, { "epoch": 0.28269085411942557, "fcm_dpo/beta": 0.1145966649055481, "fcm_dpo/delta": -0.1198846772313118, "fcm_dpo/margin": 4.478250503540039, "fcm_dpo/q_t": 0.3871074914932251, "grad_norm": 36.75972366333008, "learning_rate": 4.520986992917297e-07, "logits/chosen": 0.27034419775009155, "logits/rejected": 0.21536602079868317, "logps/chosen": -86.54496002197266, "logps/ref_chosen": -75.52549743652344, "logps/ref_rejected": -94.76289367675781, "logps/rejected": -110.26060485839844, "loss": 1.0871, "margin_dpo/margin_mean": 4.478249549865723, "margin_dpo/margin_std": 7.051586151123047, "step": 187 }, { "epoch": 0.2842025699168556, "fcm_dpo/beta": 0.11200448125600815, "fcm_dpo/delta": -0.12840519845485687, "fcm_dpo/margin": 4.6570024490356445, "fcm_dpo/q_t": 0.3865613341331482, "grad_norm": 34.73870849609375, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 0.2812601923942566, "logits/rejected": 0.2386472225189209, "logps/chosen": -81.70372009277344, "logps/ref_chosen": -71.52333068847656, "logps/ref_rejected": -78.29949951171875, "logps/rejected": -93.13690185546875, "loss": 1.0811, "margin_dpo/margin_mean": 4.6570024490356445, "margin_dpo/margin_std": 7.432827949523926, "step": 188 }, { "epoch": 0.2857142857142857, "fcm_dpo/beta": 0.10828001797199249, "fcm_dpo/delta": -0.11131785809993744, "fcm_dpo/margin": 4.635937690734863, "fcm_dpo/q_t": 0.38790684938430786, "grad_norm": 32.68986129760742, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 0.2685256004333496, "logits/rejected": 0.2797519862651825, "logps/chosen": -81.99581909179688, "logps/ref_chosen": -72.17626953125, "logps/ref_rejected": -75.26313781738281, "logps/rejected": -89.71861267089844, "loss": 1.0783, "margin_dpo/margin_mean": 4.635939121246338, "margin_dpo/margin_std": 6.736225605010986, "step": 189 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.1046074777841568, "fcm_dpo/delta": -0.27176159620285034, "fcm_dpo/margin": 6.251853942871094, "fcm_dpo/q_t": 0.3575611710548401, "grad_norm": 31.16099739074707, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.32384777069091797, "logits/rejected": 0.18938855826854706, "logps/chosen": -64.28665924072266, "logps/ref_chosen": -54.624271392822266, "logps/ref_rejected": -101.47068786621094, "logps/rejected": -117.38493347167969, "loss": 0.964, "margin_dpo/margin_mean": 6.251853942871094, "margin_dpo/margin_std": 7.330543518066406, "step": 190 }, { "epoch": 0.2887377173091459, "fcm_dpo/beta": 0.1018860712647438, "fcm_dpo/delta": -0.08884115517139435, "fcm_dpo/margin": 4.756298065185547, "fcm_dpo/q_t": 0.3920493721961975, "grad_norm": 33.24656295776367, "learning_rate": 4.48940460132708e-07, "logits/chosen": 0.32119327783584595, "logits/rejected": 0.29560232162475586, "logps/chosen": -84.64088439941406, "logps/ref_chosen": -72.93251037597656, "logps/ref_rejected": -89.95103454589844, "logps/rejected": -106.41569519042969, "loss": 1.0988, "margin_dpo/margin_mean": 4.756298065185547, "margin_dpo/margin_std": 7.644356727600098, "step": 191 }, { "epoch": 0.29024943310657597, "fcm_dpo/beta": 0.10229361802339554, "fcm_dpo/delta": 0.04606345295906067, "fcm_dpo/margin": 3.473459243774414, "fcm_dpo/q_t": 0.42033612728118896, "grad_norm": 26.436538696289062, "learning_rate": 4.481369327558329e-07, "logits/chosen": 0.31390440464019775, "logits/rejected": 0.28907686471939087, "logps/chosen": -65.87787628173828, "logps/ref_chosen": -54.001121520996094, "logps/ref_rejected": -63.531551361083984, "logps/rejected": -78.88176727294922, "loss": 1.1719, "margin_dpo/margin_mean": 3.473459005355835, "margin_dpo/margin_std": 6.743564128875732, "step": 192 }, { "epoch": 0.29176114890400606, "fcm_dpo/beta": 0.09879890084266663, "fcm_dpo/delta": -0.19991721212863922, "fcm_dpo/margin": 5.939870834350586, "fcm_dpo/q_t": 0.37099704146385193, "grad_norm": 25.317665100097656, "learning_rate": 4.47327863063023e-07, "logits/chosen": 0.25306424498558044, "logits/rejected": 0.23036319017410278, "logps/chosen": -67.60955047607422, "logps/ref_chosen": -56.74927520751953, "logps/ref_rejected": -58.80629348754883, "logps/rejected": -75.60643768310547, "loss": 0.9987, "margin_dpo/margin_mean": 5.939870357513428, "margin_dpo/margin_std": 7.335954666137695, "step": 193 }, { "epoch": 0.29327286470143615, "fcm_dpo/beta": 0.09653833508491516, "fcm_dpo/delta": -0.07798396050930023, "fcm_dpo/margin": 4.901164531707764, "fcm_dpo/q_t": 0.39555221796035767, "grad_norm": 25.787086486816406, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 0.322595477104187, "logits/rejected": 0.29371175169944763, "logps/chosen": -67.38978576660156, "logps/ref_chosen": -56.64944076538086, "logps/ref_rejected": -69.98954772949219, "logps/rejected": -85.63105010986328, "loss": 1.135, "margin_dpo/margin_mean": 4.901164531707764, "margin_dpo/margin_std": 8.785233497619629, "step": 194 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.09371854364871979, "fcm_dpo/delta": -0.17399150133132935, "fcm_dpo/margin": 6.003086090087891, "fcm_dpo/q_t": 0.37736234068870544, "grad_norm": 26.80616569519043, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.2243194878101349, "logits/rejected": 0.22502626478672028, "logps/chosen": -82.69122314453125, "logps/ref_chosen": -70.40977478027344, "logps/ref_rejected": -74.39448547363281, "logps/rejected": -92.67901611328125, "loss": 1.0281, "margin_dpo/margin_mean": 6.003086090087891, "margin_dpo/margin_std": 8.03487777709961, "step": 195 }, { "epoch": 0.2962962962962963, "fcm_dpo/beta": 0.0926411971449852, "fcm_dpo/delta": -0.09142302721738815, "fcm_dpo/margin": 5.255779266357422, "fcm_dpo/q_t": 0.3900688588619232, "grad_norm": 24.710390090942383, "learning_rate": 4.448676271745197e-07, "logits/chosen": 0.3130764961242676, "logits/rejected": 0.27285757660865784, "logps/chosen": -70.67607116699219, "logps/ref_chosen": -59.227577209472656, "logps/ref_rejected": -83.54757690429688, "logps/rejected": -100.25184631347656, "loss": 1.0765, "margin_dpo/margin_mean": 5.255779266357422, "margin_dpo/margin_std": 7.652653694152832, "step": 196 }, { "epoch": 0.29780801209372637, "fcm_dpo/beta": 0.08979904651641846, "fcm_dpo/delta": -0.18953318893909454, "fcm_dpo/margin": 6.446434020996094, "fcm_dpo/q_t": 0.37620919942855835, "grad_norm": 25.047359466552734, "learning_rate": 4.440366160729392e-07, "logits/chosen": 0.3811100721359253, "logits/rejected": 0.3337089717388153, "logps/chosen": -62.05528259277344, "logps/ref_chosen": -51.52912902832031, "logps/ref_rejected": -73.70631408691406, "logps/rejected": -90.67889404296875, "loss": 1.0846, "margin_dpo/margin_mean": 6.4464335441589355, "margin_dpo/margin_std": 10.239412307739258, "step": 197 }, { "epoch": 0.29931972789115646, "fcm_dpo/beta": 0.08605066686868668, "fcm_dpo/delta": -0.22043052315711975, "fcm_dpo/margin": 7.0605268478393555, "fcm_dpo/q_t": 0.3634013235569, "grad_norm": 24.03299903869629, "learning_rate": 4.432001773500957e-07, "logits/chosen": 0.35537493228912354, "logits/rejected": 0.3165588080883026, "logps/chosen": -70.80035400390625, "logps/ref_chosen": -59.78268051147461, "logps/ref_rejected": -72.24533081054688, "logps/rejected": -90.32351684570312, "loss": 0.9705, "margin_dpo/margin_mean": 7.0605268478393555, "margin_dpo/margin_std": 7.6906585693359375, "step": 198 }, { "epoch": 0.30083144368858655, "fcm_dpo/beta": 0.08475294709205627, "fcm_dpo/delta": -0.05305337905883789, "fcm_dpo/margin": 5.297489166259766, "fcm_dpo/q_t": 0.4017520546913147, "grad_norm": 26.102365493774414, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 0.30094629526138306, "logits/rejected": 0.2166268527507782, "logps/chosen": -68.91012573242188, "logps/ref_chosen": -56.38677215576172, "logps/ref_rejected": -74.56779479980469, "logps/rejected": -92.38864135742188, "loss": 1.1633, "margin_dpo/margin_mean": 5.297488212585449, "margin_dpo/margin_std": 10.02812385559082, "step": 199 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.08017782866954803, "fcm_dpo/delta": -0.24141071736812592, "fcm_dpo/margin": 7.785175323486328, "fcm_dpo/q_t": 0.3658748269081116, "grad_norm": 23.437423706054688, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.3566874861717224, "logits/rejected": 0.2875964939594269, "logps/chosen": -67.94142150878906, "logps/ref_chosen": -57.82432556152344, "logps/ref_rejected": -89.28246307373047, "logps/rejected": -107.18473815917969, "loss": 1.0189, "margin_dpo/margin_mean": 7.785175323486328, "margin_dpo/margin_std": 10.709969520568848, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.07967542111873627, "eval_logits/chosen": 0.31474569439888, "eval_logits/rejected": 0.2707725763320923, "eval_logps/chosen": -86.38968658447266, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -97.07010650634766, "eval_loss": 0.5583317875862122, "eval_margin_dpo/margin_mean": 5.990893840789795, "eval_margin_dpo/margin_std": 10.197680473327637, "eval_runtime": 38.0389, "eval_samples_per_second": 60.543, "eval_steps_per_second": 1.893, "step": 200 }, { "epoch": 0.30385487528344673, "fcm_dpo/beta": 0.07807569205760956, "fcm_dpo/delta": -0.11501055210828781, "fcm_dpo/margin": 6.5143938064575195, "fcm_dpo/q_t": 0.3902972340583801, "grad_norm": 25.34015464782715, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 0.358869731426239, "logits/rejected": 0.3160993456840515, "logps/chosen": -71.9423828125, "logps/ref_chosen": -58.999759674072266, "logps/ref_rejected": -84.67575073242188, "logps/rejected": -104.13275146484375, "loss": 1.0787, "margin_dpo/margin_mean": 6.514393329620361, "margin_dpo/margin_std": 10.039584159851074, "step": 201 }, { "epoch": 0.30536659108087677, "fcm_dpo/beta": 0.07607395201921463, "fcm_dpo/delta": -0.18573462963104248, "fcm_dpo/margin": 7.564221382141113, "fcm_dpo/q_t": 0.37164774537086487, "grad_norm": 21.76279067993164, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 0.2765665650367737, "logits/rejected": 0.20993581414222717, "logps/chosen": -58.819705963134766, "logps/ref_chosen": -47.660648345947266, "logps/ref_rejected": -73.63249969482422, "logps/rejected": -92.35577392578125, "loss": 1.0235, "margin_dpo/margin_mean": 7.5642218589782715, "margin_dpo/margin_std": 9.893567085266113, "step": 202 }, { "epoch": 0.30687830687830686, "fcm_dpo/beta": 0.07392242550849915, "fcm_dpo/delta": -0.11798103153705597, "fcm_dpo/margin": 6.927526473999023, "fcm_dpo/q_t": 0.3890300989151001, "grad_norm": 24.907970428466797, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 0.35103338956832886, "logits/rejected": 0.2826537489891052, "logps/chosen": -74.85539245605469, "logps/ref_chosen": -62.32553482055664, "logps/ref_rejected": -99.37226104736328, "logps/rejected": -118.82963562011719, "loss": 1.0657, "margin_dpo/margin_mean": 6.927526473999023, "margin_dpo/margin_std": 10.243463516235352, "step": 203 }, { "epoch": 0.30839002267573695, "fcm_dpo/beta": 0.0711907222867012, "fcm_dpo/delta": -0.11254880577325821, "fcm_dpo/margin": 7.061636447906494, "fcm_dpo/q_t": 0.3894692659378052, "grad_norm": 21.693220138549805, "learning_rate": 4.380688857426449e-07, "logits/chosen": 0.2948107421398163, "logits/rejected": 0.22981415688991547, "logps/chosen": -63.0117073059082, "logps/ref_chosen": -50.62931823730469, "logps/ref_rejected": -66.60475158691406, "logps/rejected": -86.04878234863281, "loss": 1.0766, "margin_dpo/margin_mean": 7.061635971069336, "margin_dpo/margin_std": 10.273975372314453, "step": 204 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.07016897201538086, "fcm_dpo/delta": -0.11553419381380081, "fcm_dpo/margin": 7.253925323486328, "fcm_dpo/q_t": 0.38996151089668274, "grad_norm": 26.938138961791992, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.34917423129081726, "logits/rejected": 0.2908037006855011, "logps/chosen": -83.69819641113281, "logps/ref_chosen": -70.3561782836914, "logps/ref_rejected": -93.39848327636719, "logps/rejected": -113.99442291259766, "loss": 1.0946, "margin_dpo/margin_mean": 7.2539262771606445, "margin_dpo/margin_std": 11.814342498779297, "step": 205 }, { "epoch": 0.31141345427059713, "fcm_dpo/beta": 0.07059814780950546, "fcm_dpo/delta": 0.02109716460108757, "fcm_dpo/margin": 5.3691887855529785, "fcm_dpo/q_t": 0.4200194478034973, "grad_norm": 24.054405212402344, "learning_rate": 4.363161124189387e-07, "logits/chosen": 0.38039857149124146, "logits/rejected": 0.3642415404319763, "logps/chosen": -81.48750305175781, "logps/ref_chosen": -67.64547729492188, "logps/ref_rejected": -79.89584350585938, "logps/rejected": -99.1070556640625, "loss": 1.2172, "margin_dpo/margin_mean": 5.3691887855529785, "margin_dpo/margin_std": 12.316067695617676, "step": 206 }, { "epoch": 0.3129251700680272, "fcm_dpo/beta": 0.0685083270072937, "fcm_dpo/delta": -0.15592733025550842, "fcm_dpo/margin": 7.982370376586914, "fcm_dpo/q_t": 0.3835386633872986, "grad_norm": 21.380037307739258, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 0.3081602454185486, "logits/rejected": 0.2436012327671051, "logps/chosen": -82.89225769042969, "logps/ref_chosen": -67.66419219970703, "logps/ref_rejected": -85.10249328613281, "logps/rejected": -108.31292724609375, "loss": 1.0441, "margin_dpo/margin_mean": 7.982370376586914, "margin_dpo/margin_std": 11.379196166992188, "step": 207 }, { "epoch": 0.3144368858654573, "fcm_dpo/beta": 0.069021075963974, "fcm_dpo/delta": 0.08469095081090927, "fcm_dpo/margin": 4.60715389251709, "fcm_dpo/q_t": 0.4303174912929535, "grad_norm": 24.077442169189453, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 0.3450078070163727, "logits/rejected": 0.31959545612335205, "logps/chosen": -69.91799926757812, "logps/ref_chosen": -57.731712341308594, "logps/ref_rejected": -74.19276428222656, "logps/rejected": -90.9862060546875, "loss": 1.2285, "margin_dpo/margin_mean": 4.607153415679932, "margin_dpo/margin_std": 10.970314025878906, "step": 208 }, { "epoch": 0.31594860166288735, "fcm_dpo/beta": 0.06699629127979279, "fcm_dpo/delta": -0.24109607934951782, "fcm_dpo/margin": 9.348739624023438, "fcm_dpo/q_t": 0.36549073457717896, "grad_norm": 23.534868240356445, "learning_rate": 4.336479271643833e-07, "logits/chosen": 0.29840895533561707, "logits/rejected": 0.24482461810112, "logps/chosen": -80.83154296875, "logps/ref_chosen": -68.55007934570312, "logps/ref_rejected": -87.90541076660156, "logps/rejected": -109.53561401367188, "loss": 1.0307, "margin_dpo/margin_mean": 9.348740577697754, "margin_dpo/margin_std": 13.397310256958008, "step": 209 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.06439946591854095, "fcm_dpo/delta": -0.19043028354644775, "fcm_dpo/margin": 9.002416610717773, "fcm_dpo/q_t": 0.37747329473495483, "grad_norm": 20.80362319946289, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.3852647542953491, "logits/rejected": 0.2949644923210144, "logps/chosen": -69.62586975097656, "logps/ref_chosen": -57.268272399902344, "logps/ref_rejected": -85.72807312011719, "logps/rejected": -107.08808898925781, "loss": 1.0512, "margin_dpo/margin_mean": 9.002415657043457, "margin_dpo/margin_std": 13.310730934143066, "step": 210 }, { "epoch": 0.31897203325774753, "fcm_dpo/beta": 0.0625411719083786, "fcm_dpo/delta": -0.14104999601840973, "fcm_dpo/margin": 8.534067153930664, "fcm_dpo/q_t": 0.3859778642654419, "grad_norm": 23.124126434326172, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 0.3993861675262451, "logits/rejected": 0.3120034337043762, "logps/chosen": -64.48066711425781, "logps/ref_chosen": -53.640708923339844, "logps/ref_rejected": -93.0387954711914, "logps/rejected": -112.41282653808594, "loss": 1.0564, "margin_dpo/margin_mean": 8.534066200256348, "margin_dpo/margin_std": 12.409013748168945, "step": 211 }, { "epoch": 0.3204837490551776, "fcm_dpo/beta": 0.0600648857653141, "fcm_dpo/delta": -0.18490049242973328, "fcm_dpo/margin": 9.557143211364746, "fcm_dpo/q_t": 0.37409496307373047, "grad_norm": 18.95738410949707, "learning_rate": 4.309335095262675e-07, "logits/chosen": 0.3936005234718323, "logits/rejected": 0.3249232769012451, "logps/chosen": -70.1107406616211, "logps/ref_chosen": -57.36674499511719, "logps/ref_rejected": -79.89643096923828, "logps/rejected": -102.19757080078125, "loss": 1.0381, "margin_dpo/margin_mean": 9.557143211364746, "margin_dpo/margin_std": 13.448509216308594, "step": 212 }, { "epoch": 0.3219954648526077, "fcm_dpo/beta": 0.057153455913066864, "fcm_dpo/delta": -0.24546325206756592, "fcm_dpo/margin": 10.99544620513916, "fcm_dpo/q_t": 0.3635343313217163, "grad_norm": 17.224510192871094, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 0.38934314250946045, "logits/rejected": 0.3657595217227936, "logps/chosen": -74.25251007080078, "logps/ref_chosen": -65.22111511230469, "logps/ref_rejected": -80.1810302734375, "logps/rejected": -100.20787048339844, "loss": 1.0026, "margin_dpo/margin_mean": 10.995447158813477, "margin_dpo/margin_std": 14.42126178741455, "step": 213 }, { "epoch": 0.3235071806500378, "fcm_dpo/beta": 0.05538104474544525, "fcm_dpo/delta": -0.20099294185638428, "fcm_dpo/margin": 10.645904541015625, "fcm_dpo/q_t": 0.3733557462692261, "grad_norm": 21.00215721130371, "learning_rate": 4.290985500881143e-07, "logits/chosen": 0.2703975439071655, "logits/rejected": 0.24784952402114868, "logps/chosen": -72.12303161621094, "logps/ref_chosen": -61.292327880859375, "logps/ref_rejected": -67.69841003417969, "logps/rejected": -89.17500305175781, "loss": 1.0258, "margin_dpo/margin_mean": 10.645904541015625, "margin_dpo/margin_std": 14.420230865478516, "step": 214 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.053312748670578, "fcm_dpo/delta": -0.1950150430202484, "fcm_dpo/margin": 10.953173637390137, "fcm_dpo/q_t": 0.37544140219688416, "grad_norm": 19.009496688842773, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.2992058992385864, "logits/rejected": 0.1988016963005066, "logps/chosen": -77.40756225585938, "logps/ref_chosen": -63.869136810302734, "logps/ref_rejected": -98.7657241821289, "logps/rejected": -123.25733184814453, "loss": 1.0374, "margin_dpo/margin_mean": 10.953174591064453, "margin_dpo/margin_std": 15.171961784362793, "step": 215 }, { "epoch": 0.32653061224489793, "fcm_dpo/beta": 0.05087217688560486, "fcm_dpo/delta": -0.20583069324493408, "fcm_dpo/margin": 11.67214584350586, "fcm_dpo/q_t": 0.37170571088790894, "grad_norm": 20.9128360748291, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 0.418628454208374, "logits/rejected": 0.33564436435699463, "logps/chosen": -80.5323257446289, "logps/ref_chosen": -67.824951171875, "logps/ref_rejected": -96.40231323242188, "logps/rejected": -120.7818374633789, "loss": 1.0116, "margin_dpo/margin_mean": 11.672143936157227, "margin_dpo/margin_std": 15.552055358886719, "step": 216 }, { "epoch": 0.328042328042328, "fcm_dpo/beta": 0.04872403293848038, "fcm_dpo/delta": -0.20223326981067657, "fcm_dpo/margin": 12.108686447143555, "fcm_dpo/q_t": 0.3712342381477356, "grad_norm": 17.21115493774414, "learning_rate": 4.26308602680756e-07, "logits/chosen": 0.3738940358161926, "logits/rejected": 0.27082303166389465, "logps/chosen": -74.92461395263672, "logps/ref_chosen": -60.5049934387207, "logps/ref_rejected": -84.26618194580078, "logps/rejected": -110.79448699951172, "loss": 1.003, "margin_dpo/margin_mean": 12.108685493469238, "margin_dpo/margin_std": 15.33790397644043, "step": 217 }, { "epoch": 0.3295540438397581, "fcm_dpo/beta": 0.04887588322162628, "fcm_dpo/delta": 0.020069099962711334, "fcm_dpo/margin": 7.752167701721191, "fcm_dpo/q_t": 0.41752538084983826, "grad_norm": 19.853303909301758, "learning_rate": 4.253687219265803e-07, "logits/chosen": 0.26485973596572876, "logits/rejected": 0.25896111130714417, "logps/chosen": -85.6038818359375, "logps/ref_chosen": -70.59431457519531, "logps/ref_rejected": -73.89038848876953, "logps/rejected": -96.6521224975586, "loss": 1.2034, "margin_dpo/margin_mean": 7.752167701721191, "margin_dpo/margin_std": 16.52850341796875, "step": 218 }, { "epoch": 0.3310657596371882, "fcm_dpo/beta": 0.04829990863800049, "fcm_dpo/delta": 0.0020888671278953552, "fcm_dpo/margin": 8.23349666595459, "fcm_dpo/q_t": 0.41240739822387695, "grad_norm": 18.878578186035156, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 0.3172757625579834, "logits/rejected": 0.2777259349822998, "logps/chosen": -74.34814453125, "logps/ref_chosen": -60.490943908691406, "logps/ref_rejected": -75.85001373291016, "logps/rejected": -97.94070434570312, "loss": 1.1424, "margin_dpo/margin_mean": 8.23349666595459, "margin_dpo/margin_std": 14.514315605163574, "step": 219 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.047665540128946304, "fcm_dpo/delta": -0.07977527379989624, "fcm_dpo/margin": 9.964658737182617, "fcm_dpo/q_t": 0.3987919092178345, "grad_norm": 16.081050872802734, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.4192200005054474, "logits/rejected": 0.35927826166152954, "logps/chosen": -57.308311462402344, "logps/ref_chosen": -45.013397216796875, "logps/ref_rejected": -70.49369812011719, "logps/rejected": -92.7532730102539, "loss": 1.1145, "margin_dpo/margin_mean": 9.964658737182617, "margin_dpo/margin_std": 16.84489631652832, "step": 220 }, { "epoch": 0.3340891912320484, "fcm_dpo/beta": 0.04696973040699959, "fcm_dpo/delta": -0.1160588264465332, "fcm_dpo/margin": 10.86303997039795, "fcm_dpo/q_t": 0.39082762598991394, "grad_norm": 17.7648868560791, "learning_rate": 4.22519752870528e-07, "logits/chosen": 0.41853708028793335, "logits/rejected": 0.34903794527053833, "logps/chosen": -71.09147644042969, "logps/ref_chosen": -59.09584045410156, "logps/ref_rejected": -88.64388275146484, "logps/rejected": -111.50254821777344, "loss": 1.0799, "margin_dpo/margin_mean": 10.86303997039795, "margin_dpo/margin_std": 16.972129821777344, "step": 221 }, { "epoch": 0.3356009070294785, "fcm_dpo/beta": 0.04493716359138489, "fcm_dpo/delta": -0.24270300567150116, "fcm_dpo/margin": 13.956724166870117, "fcm_dpo/q_t": 0.3622177243232727, "grad_norm": 17.92151641845703, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 0.47252702713012695, "logits/rejected": 0.3871607780456543, "logps/chosen": -68.96340942382812, "logps/ref_chosen": -55.9976921081543, "logps/ref_rejected": -111.94727325439453, "logps/rejected": -138.8697052001953, "loss": 0.9952, "margin_dpo/margin_mean": 13.956724166870117, "margin_dpo/margin_std": 17.575956344604492, "step": 222 }, { "epoch": 0.3371126228269085, "fcm_dpo/beta": 0.043041862547397614, "fcm_dpo/delta": -0.17856287956237793, "fcm_dpo/margin": 13.174043655395508, "fcm_dpo/q_t": 0.37481993436813354, "grad_norm": 16.398582458496094, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 0.43900156021118164, "logits/rejected": 0.3837320804595947, "logps/chosen": -74.68038940429688, "logps/ref_chosen": -59.891422271728516, "logps/ref_rejected": -86.28954315185547, "logps/rejected": -114.25254821777344, "loss": 1.0015, "margin_dpo/margin_mean": 13.174043655395508, "margin_dpo/margin_std": 15.676023483276367, "step": 223 }, { "epoch": 0.3386243386243386, "fcm_dpo/beta": 0.04326090216636658, "fcm_dpo/delta": 0.07465298473834991, "fcm_dpo/margin": 7.578658580780029, "fcm_dpo/q_t": 0.42947906255722046, "grad_norm": 21.63603973388672, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 0.4613693058490753, "logits/rejected": 0.4411858022212982, "logps/chosen": -81.04054260253906, "logps/ref_chosen": -64.04463195800781, "logps/ref_rejected": -75.05450439453125, "logps/rejected": -99.62906646728516, "loss": 1.1997, "margin_dpo/margin_mean": 7.578658580780029, "margin_dpo/margin_std": 16.304107666015625, "step": 224 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.04221531003713608, "fcm_dpo/delta": -0.17840632796287537, "fcm_dpo/margin": 13.439470291137695, "fcm_dpo/q_t": 0.3767085671424866, "grad_norm": 17.833824157714844, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.44540882110595703, "logits/rejected": 0.33315980434417725, "logps/chosen": -81.73786926269531, "logps/ref_chosen": -66.0958251953125, "logps/ref_rejected": -97.68675231933594, "logps/rejected": -126.76826477050781, "loss": 1.021, "margin_dpo/margin_mean": 13.439470291137695, "margin_dpo/margin_std": 17.625469207763672, "step": 225 }, { "epoch": 0.3416477702191988, "fcm_dpo/beta": 0.041798561811447144, "fcm_dpo/delta": -0.06101213023066521, "fcm_dpo/margin": 10.95936107635498, "fcm_dpo/q_t": 0.4006775915622711, "grad_norm": 15.791013717651367, "learning_rate": 4.176753170773052e-07, "logits/chosen": 0.4840475916862488, "logits/rejected": 0.436703622341156, "logps/chosen": -66.21839141845703, "logps/ref_chosen": -51.4168701171875, "logps/ref_rejected": -66.30068969726562, "logps/rejected": -92.06156921386719, "loss": 1.1384, "margin_dpo/margin_mean": 10.95936107635498, "margin_dpo/margin_std": 19.834434509277344, "step": 226 }, { "epoch": 0.3431594860166289, "fcm_dpo/beta": 0.0407416932284832, "fcm_dpo/delta": -0.0931011512875557, "fcm_dpo/margin": 11.974498748779297, "fcm_dpo/q_t": 0.3970397412776947, "grad_norm": 16.93015480041504, "learning_rate": 4.166922501290729e-07, "logits/chosen": 0.5116022825241089, "logits/rejected": 0.4724254906177521, "logps/chosen": -73.94245147705078, "logps/ref_chosen": -57.989776611328125, "logps/ref_rejected": -75.05464172363281, "logps/rejected": -102.98181915283203, "loss": 1.1164, "margin_dpo/margin_mean": 11.97449779510498, "margin_dpo/margin_std": 20.62078857421875, "step": 227 }, { "epoch": 0.34467120181405897, "fcm_dpo/beta": 0.040374692529439926, "fcm_dpo/delta": -0.07935923337936401, "fcm_dpo/margin": 11.78009033203125, "fcm_dpo/q_t": 0.39624863862991333, "grad_norm": 16.199413299560547, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 0.4898710548877716, "logits/rejected": 0.4122768044471741, "logps/chosen": -73.80580139160156, "logps/ref_chosen": -55.55936813354492, "logps/ref_rejected": -77.02364349365234, "logps/rejected": -107.0501708984375, "loss": 1.0911, "margin_dpo/margin_mean": 11.78009033203125, "margin_dpo/margin_std": 18.271432876586914, "step": 228 }, { "epoch": 0.34618291761148906, "fcm_dpo/beta": 0.03962048888206482, "fcm_dpo/delta": -0.06741949170827866, "fcm_dpo/margin": 11.715319633483887, "fcm_dpo/q_t": 0.3975101113319397, "grad_norm": 32.47303009033203, "learning_rate": 4.147121556398312e-07, "logits/chosen": 0.5499299764633179, "logits/rejected": 0.48605582118034363, "logps/chosen": -64.823974609375, "logps/ref_chosen": -50.79466247558594, "logps/ref_rejected": -78.4474105834961, "logps/rejected": -104.1920394897461, "loss": 1.1338, "margin_dpo/margin_mean": 11.715319633483887, "margin_dpo/margin_std": 21.123619079589844, "step": 229 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.039171136915683746, "fcm_dpo/delta": -0.12602832913398743, "fcm_dpo/margin": 13.247392654418945, "fcm_dpo/q_t": 0.38648897409439087, "grad_norm": 16.753572463989258, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.44350725412368774, "logits/rejected": 0.44191017746925354, "logps/chosen": -73.05119323730469, "logps/ref_chosen": -56.729225158691406, "logps/ref_rejected": -62.99180603027344, "logps/rejected": -92.56117248535156, "loss": 1.0576, "margin_dpo/margin_mean": 13.247393608093262, "margin_dpo/margin_std": 18.401546478271484, "step": 230 }, { "epoch": 0.3492063492063492, "fcm_dpo/beta": 0.03659620136022568, "fcm_dpo/delta": -0.3143787980079651, "fcm_dpo/margin": 18.866680145263672, "fcm_dpo/q_t": 0.34471791982650757, "grad_norm": 17.6395263671875, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 0.40046536922454834, "logits/rejected": 0.3871019780635834, "logps/chosen": -92.10427856445312, "logps/ref_chosen": -72.59709930419922, "logps/ref_rejected": -86.2322998046875, "logps/rejected": -124.60616302490234, "loss": 0.9328, "margin_dpo/margin_mean": 18.866680145263672, "margin_dpo/margin_std": 19.989635467529297, "step": 231 }, { "epoch": 0.3507180650037793, "fcm_dpo/beta": 0.03585398569703102, "fcm_dpo/delta": -0.06070077791810036, "fcm_dpo/margin": 12.77048110961914, "fcm_dpo/q_t": 0.4032752513885498, "grad_norm": 17.137638092041016, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 0.4340604245662689, "logits/rejected": 0.4048748016357422, "logps/chosen": -87.20597839355469, "logps/ref_chosen": -68.1185302734375, "logps/ref_rejected": -83.79415893554688, "logps/rejected": -115.652099609375, "loss": 1.1293, "margin_dpo/margin_mean": 12.77048110961914, "margin_dpo/margin_std": 22.848384857177734, "step": 232 }, { "epoch": 0.35222978080120937, "fcm_dpo/beta": 0.035481683909893036, "fcm_dpo/delta": 0.021351546049118042, "fcm_dpo/margin": 10.630614280700684, "fcm_dpo/q_t": 0.4180358946323395, "grad_norm": 16.118860244750977, "learning_rate": 4.106969024216348e-07, "logits/chosen": 0.48623842000961304, "logits/rejected": 0.42944014072418213, "logps/chosen": -77.33733367919922, "logps/ref_chosen": -55.070152282714844, "logps/ref_rejected": -66.61845397949219, "logps/rejected": -99.5162353515625, "loss": 1.1666, "margin_dpo/margin_mean": 10.630615234375, "margin_dpo/margin_std": 19.464500427246094, "step": 233 }, { "epoch": 0.35374149659863946, "fcm_dpo/beta": 0.03642081841826439, "fcm_dpo/delta": 0.003797471523284912, "fcm_dpo/margin": 10.787586212158203, "fcm_dpo/q_t": 0.41657352447509766, "grad_norm": 18.763879776000977, "learning_rate": 4.09681781007452e-07, "logits/chosen": 0.39658474922180176, "logits/rejected": 0.38530460000038147, "logps/chosen": -76.36502075195312, "logps/ref_chosen": -55.92589569091797, "logps/ref_rejected": -51.11608123779297, "logps/rejected": -82.3427963256836, "loss": 1.193, "margin_dpo/margin_mean": 10.787586212158203, "margin_dpo/margin_std": 21.66558074951172, "step": 234 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.03478018939495087, "fcm_dpo/delta": -0.23790514469146729, "fcm_dpo/margin": 17.92596435546875, "fcm_dpo/q_t": 0.35996830463409424, "grad_norm": 16.978939056396484, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.510696291923523, "logits/rejected": 0.49337536096572876, "logps/chosen": -80.82229614257812, "logps/ref_chosen": -64.53972625732422, "logps/ref_rejected": -77.69151306152344, "logps/rejected": -111.9000473022461, "loss": 0.9688, "margin_dpo/margin_mean": 17.92596435546875, "margin_dpo/margin_std": 20.20526123046875, "step": 235 }, { "epoch": 0.35676492819349964, "fcm_dpo/beta": 0.03382885456085205, "fcm_dpo/delta": -0.0744684636592865, "fcm_dpo/margin": 13.915045738220215, "fcm_dpo/q_t": 0.3973722457885742, "grad_norm": 15.759313583374023, "learning_rate": 4.076381667711306e-07, "logits/chosen": 0.4519270658493042, "logits/rejected": 0.4385327100753784, "logps/chosen": -97.88923645019531, "logps/ref_chosen": -71.15473937988281, "logps/ref_rejected": -84.88541412353516, "logps/rejected": -125.53496551513672, "loss": 1.1185, "margin_dpo/margin_mean": 13.915045738220215, "margin_dpo/margin_std": 23.799827575683594, "step": 236 }, { "epoch": 0.35827664399092973, "fcm_dpo/beta": 0.033125244081020355, "fcm_dpo/delta": -0.13403069972991943, "fcm_dpo/margin": 15.910423278808594, "fcm_dpo/q_t": 0.38460463285446167, "grad_norm": 18.07618522644043, "learning_rate": 4.066097311132753e-07, "logits/chosen": 0.5179036259651184, "logits/rejected": 0.5056600570678711, "logps/chosen": -99.39932250976562, "logps/ref_chosen": -76.14201354980469, "logps/ref_rejected": -80.88479614257812, "logps/rejected": -120.05252838134766, "loss": 1.0734, "margin_dpo/margin_mean": 15.910423278808594, "margin_dpo/margin_std": 23.904003143310547, "step": 237 }, { "epoch": 0.35978835978835977, "fcm_dpo/beta": 0.03213762491941452, "fcm_dpo/delta": -0.1272638738155365, "fcm_dpo/margin": 16.177248001098633, "fcm_dpo/q_t": 0.3859345018863678, "grad_norm": 25.455785751342773, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 0.4519047737121582, "logits/rejected": 0.43929409980773926, "logps/chosen": -89.9517593383789, "logps/ref_chosen": -68.88484954833984, "logps/ref_rejected": -75.8946304321289, "logps/rejected": -113.13878631591797, "loss": 1.0625, "margin_dpo/margin_mean": 16.177248001098633, "margin_dpo/margin_std": 23.426652908325195, "step": 238 }, { "epoch": 0.36130007558578986, "fcm_dpo/beta": 0.03170093148946762, "fcm_dpo/delta": -0.12815909087657928, "fcm_dpo/margin": 16.441516876220703, "fcm_dpo/q_t": 0.38694924116134644, "grad_norm": 17.88521385192871, "learning_rate": 4.045397465551513e-07, "logits/chosen": 0.5872991681098938, "logits/rejected": 0.46438801288604736, "logps/chosen": -82.54875183105469, "logps/ref_chosen": -56.771827697753906, "logps/ref_rejected": -116.23050689697266, "logps/rejected": -158.44894409179688, "loss": 1.0754, "margin_dpo/margin_mean": 16.441518783569336, "margin_dpo/margin_std": 24.606647491455078, "step": 239 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.03009945899248123, "fcm_dpo/delta": -0.2575973570346832, "fcm_dpo/margin": 21.3055419921875, "fcm_dpo/q_t": 0.35840314626693726, "grad_norm": 13.624947547912598, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.5199910998344421, "logits/rejected": 0.4287125766277313, "logps/chosen": -78.27786254882812, "logps/ref_chosen": -53.35411071777344, "logps/ref_rejected": -80.12019348144531, "logps/rejected": -126.34949493408203, "loss": 0.9669, "margin_dpo/margin_mean": 21.3055419921875, "margin_dpo/margin_std": 24.555896759033203, "step": 240 }, { "epoch": 0.36432350718065004, "fcm_dpo/beta": 0.02971363626420498, "fcm_dpo/delta": -0.013462748378515244, "fcm_dpo/margin": 13.889959335327148, "fcm_dpo/q_t": 0.41003936529159546, "grad_norm": 17.35210418701172, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 0.48497307300567627, "logits/rejected": 0.44994428753852844, "logps/chosen": -97.29027557373047, "logps/ref_chosen": -71.89541625976562, "logps/ref_rejected": -83.03492736816406, "logps/rejected": -122.31974792480469, "loss": 1.1403, "margin_dpo/margin_mean": 13.889958381652832, "margin_dpo/margin_std": 24.697525024414062, "step": 241 }, { "epoch": 0.36583522297808013, "fcm_dpo/beta": 0.028537161648273468, "fcm_dpo/delta": -0.18422536551952362, "fcm_dpo/margin": 19.995790481567383, "fcm_dpo/q_t": 0.37522459030151367, "grad_norm": 13.32011890411377, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 0.4771093726158142, "logits/rejected": 0.4567781090736389, "logps/chosen": -84.04574584960938, "logps/ref_chosen": -57.927433013916016, "logps/ref_rejected": -67.838623046875, "logps/rejected": -113.95272827148438, "loss": 1.0272, "margin_dpo/margin_mean": 19.99578857421875, "margin_dpo/margin_std": 26.637714385986328, "step": 242 }, { "epoch": 0.3673469387755102, "fcm_dpo/beta": 0.028129760175943375, "fcm_dpo/delta": -0.09357127547264099, "fcm_dpo/margin": 17.38387107849121, "fcm_dpo/q_t": 0.39220672845840454, "grad_norm": 17.161989212036133, "learning_rate": 4.003481376353596e-07, "logits/chosen": 0.49265414476394653, "logits/rejected": 0.4954020380973816, "logps/chosen": -100.76765441894531, "logps/ref_chosen": -74.27667236328125, "logps/ref_rejected": -73.24340057373047, "logps/rejected": -117.11825561523438, "loss": 1.0782, "margin_dpo/margin_mean": 17.38387107849121, "margin_dpo/margin_std": 26.074810028076172, "step": 243 }, { "epoch": 0.3688586545729403, "fcm_dpo/beta": 0.027078591287136078, "fcm_dpo/delta": -0.2479928731918335, "fcm_dpo/margin": 23.371715545654297, "fcm_dpo/q_t": 0.35928937792778015, "grad_norm": 15.457412719726562, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 0.5174931883811951, "logits/rejected": 0.43636053800582886, "logps/chosen": -80.19678497314453, "logps/ref_chosen": -53.36390686035156, "logps/ref_rejected": -71.10276794433594, "logps/rejected": -121.307373046875, "loss": 0.9595, "margin_dpo/margin_mean": 23.371713638305664, "margin_dpo/margin_std": 25.55000114440918, "step": 244 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.02543710544705391, "fcm_dpo/delta": -0.30314117670059204, "fcm_dpo/margin": 26.81406021118164, "fcm_dpo/q_t": 0.34784555435180664, "grad_norm": 18.74749755859375, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.5679333209991455, "logits/rejected": 0.5423753261566162, "logps/chosen": -100.21349334716797, "logps/ref_chosen": -71.19510650634766, "logps/ref_rejected": -80.76235961914062, "logps/rejected": -136.59481811523438, "loss": 0.9243, "margin_dpo/margin_mean": 26.81406021118164, "margin_dpo/margin_std": 27.46959686279297, "step": 245 }, { "epoch": 0.37188208616780044, "fcm_dpo/beta": 0.024606363847851753, "fcm_dpo/delta": -0.16271455585956573, "fcm_dpo/margin": 22.51306915283203, "fcm_dpo/q_t": 0.38002920150756836, "grad_norm": 17.689266204833984, "learning_rate": 3.971601703742932e-07, "logits/chosen": 0.5599805116653442, "logits/rejected": 0.5014970302581787, "logps/chosen": -106.98703002929688, "logps/ref_chosen": -71.62104797363281, "logps/ref_rejected": -94.03392028808594, "logps/rejected": -151.9129638671875, "loss": 1.0631, "margin_dpo/margin_mean": 22.5130672454834, "margin_dpo/margin_std": 33.42694854736328, "step": 246 }, { "epoch": 0.37339380196523053, "fcm_dpo/beta": 0.0247543603181839, "fcm_dpo/delta": 0.10083875060081482, "fcm_dpo/margin": 12.170625686645508, "fcm_dpo/q_t": 0.4334149956703186, "grad_norm": 18.4669132232666, "learning_rate": 3.960892420986177e-07, "logits/chosen": 0.5527133941650391, "logits/rejected": 0.5427131056785583, "logps/chosen": -120.41569519042969, "logps/ref_chosen": -80.02254486083984, "logps/ref_rejected": -89.22705841064453, "logps/rejected": -141.79083251953125, "loss": 1.2315, "margin_dpo/margin_mean": 12.170624732971191, "margin_dpo/margin_std": 29.19440460205078, "step": 247 }, { "epoch": 0.3749055177626606, "fcm_dpo/beta": 0.024207081645727158, "fcm_dpo/delta": -0.18463271856307983, "fcm_dpo/margin": 23.729291915893555, "fcm_dpo/q_t": 0.3793613910675049, "grad_norm": 15.274466514587402, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 0.5400315523147583, "logits/rejected": 0.5853956937789917, "logps/chosen": -99.80722045898438, "logps/ref_chosen": -65.37796020507812, "logps/ref_rejected": -61.365787506103516, "logps/rejected": -119.52434539794922, "loss": 1.0573, "margin_dpo/margin_mean": 23.729293823242188, "margin_dpo/margin_std": 35.37153625488281, "step": 248 }, { "epoch": 0.3764172335600907, "fcm_dpo/beta": 0.024627620354294777, "fcm_dpo/delta": 0.16489864885807037, "fcm_dpo/margin": 9.622716903686523, "fcm_dpo/q_t": 0.4474208354949951, "grad_norm": 21.90114974975586, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 0.5731900930404663, "logits/rejected": 0.6100406646728516, "logps/chosen": -120.05014038085938, "logps/ref_chosen": -74.60145568847656, "logps/ref_rejected": -63.79338455200195, "logps/rejected": -118.86479187011719, "loss": 1.3203, "margin_dpo/margin_mean": 9.622716903686523, "margin_dpo/margin_std": 32.57998275756836, "step": 249 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.024052519351243973, "fcm_dpo/delta": -0.18917813897132874, "fcm_dpo/margin": 24.05508041381836, "fcm_dpo/q_t": 0.37193530797958374, "grad_norm": 16.015880584716797, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.6647765636444092, "logits/rejected": 0.6217361092567444, "logps/chosen": -101.03977966308594, "logps/ref_chosen": -61.938209533691406, "logps/ref_rejected": -72.21602630615234, "logps/rejected": -135.3726806640625, "loss": 1.0291, "margin_dpo/margin_mean": 24.05508041381836, "margin_dpo/margin_std": 32.849769592285156, "step": 250 }, { "epoch": 0.3794406651549509, "fcm_dpo/beta": 0.023839669302105904, "fcm_dpo/delta": 0.03268512338399887, "fcm_dpo/margin": 15.458717346191406, "fcm_dpo/q_t": 0.4200361967086792, "grad_norm": 23.502283096313477, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 0.6419062614440918, "logits/rejected": 0.6014778017997742, "logps/chosen": -111.71145629882812, "logps/ref_chosen": -66.85694885253906, "logps/ref_rejected": -84.83396911621094, "logps/rejected": -145.14718627929688, "loss": 1.2018, "margin_dpo/margin_mean": 15.458715438842773, "margin_dpo/margin_std": 33.98907470703125, "step": 251 }, { "epoch": 0.38095238095238093, "fcm_dpo/beta": 0.024377018213272095, "fcm_dpo/delta": 0.03264795243740082, "fcm_dpo/margin": 14.927447319030762, "fcm_dpo/q_t": 0.4222317039966583, "grad_norm": 27.788288116455078, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 0.5485316514968872, "logits/rejected": 0.49520280957221985, "logps/chosen": -97.87835693359375, "logps/ref_chosen": -56.22393035888672, "logps/ref_rejected": -77.1136245727539, "logps/rejected": -133.69549560546875, "loss": 1.2416, "margin_dpo/margin_mean": 14.927447319030762, "margin_dpo/margin_std": 35.03778076171875, "step": 252 }, { "epoch": 0.382464096749811, "fcm_dpo/beta": 0.023830143734812737, "fcm_dpo/delta": -0.001905880868434906, "fcm_dpo/margin": 16.788637161254883, "fcm_dpo/q_t": 0.41427648067474365, "grad_norm": 20.045818328857422, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 0.63853919506073, "logits/rejected": 0.5978012084960938, "logps/chosen": -95.88780212402344, "logps/ref_chosen": -52.21001434326172, "logps/ref_rejected": -58.75764846801758, "logps/rejected": -119.22407531738281, "loss": 1.1678, "margin_dpo/margin_mean": 16.788637161254883, "margin_dpo/margin_std": 32.0192985534668, "step": 253 }, { "epoch": 0.3839758125472411, "fcm_dpo/beta": 0.02359645627439022, "fcm_dpo/delta": -0.09121982753276825, "fcm_dpo/margin": 20.583770751953125, "fcm_dpo/q_t": 0.3972465395927429, "grad_norm": 16.444860458374023, "learning_rate": 3.884800159665276e-07, "logits/chosen": 0.5922572016716003, "logits/rejected": 0.5414531230926514, "logps/chosen": -111.32112121582031, "logps/ref_chosen": -65.63632202148438, "logps/ref_rejected": -82.34425354003906, "logps/rejected": -148.61280822753906, "loss": 1.1023, "margin_dpo/margin_mean": 20.583770751953125, "margin_dpo/margin_std": 33.7872314453125, "step": 254 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.023110289126634598, "fcm_dpo/delta": -0.13200685381889343, "fcm_dpo/margin": 22.705629348754883, "fcm_dpo/q_t": 0.38903963565826416, "grad_norm": 23.491275787353516, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.5693901777267456, "logits/rejected": 0.5399304032325745, "logps/chosen": -112.29985046386719, "logps/ref_chosen": -67.91108703613281, "logps/ref_rejected": -83.89114379882812, "logps/rejected": -150.98553466796875, "loss": 1.0989, "margin_dpo/margin_mean": 22.705629348754883, "margin_dpo/margin_std": 38.15801239013672, "step": 255 }, { "epoch": 0.3869992441421013, "fcm_dpo/beta": 0.022572454065084457, "fcm_dpo/delta": -0.10266627371311188, "fcm_dpo/margin": 22.016427993774414, "fcm_dpo/q_t": 0.3943637013435364, "grad_norm": 19.359926223754883, "learning_rate": 3.862706303320329e-07, "logits/chosen": 0.5768101215362549, "logits/rejected": 0.516845703125, "logps/chosen": -114.48297119140625, "logps/ref_chosen": -63.49998474121094, "logps/ref_rejected": -90.77104187011719, "logps/rejected": -163.7704620361328, "loss": 1.1265, "margin_dpo/margin_mean": 22.016427993774414, "margin_dpo/margin_std": 39.41869354248047, "step": 256 }, { "epoch": 0.3885109599395314, "fcm_dpo/beta": 0.022011350840330124, "fcm_dpo/delta": -0.14289262890815735, "fcm_dpo/margin": 24.298580169677734, "fcm_dpo/q_t": 0.3878602981567383, "grad_norm": 16.987897872924805, "learning_rate": 3.851602043638994e-07, "logits/chosen": 0.5744519233703613, "logits/rejected": 0.5112959742546082, "logps/chosen": -121.80198669433594, "logps/ref_chosen": -70.60064697265625, "logps/ref_rejected": -108.58313751220703, "logps/rejected": -184.0830535888672, "loss": 1.0894, "margin_dpo/margin_mean": 24.298580169677734, "margin_dpo/margin_std": 39.85075378417969, "step": 257 }, { "epoch": 0.3900226757369615, "fcm_dpo/beta": 0.02168167755007744, "fcm_dpo/delta": -0.07299736142158508, "fcm_dpo/margin": 21.659503936767578, "fcm_dpo/q_t": 0.39374053478240967, "grad_norm": 16.47481918334961, "learning_rate": 3.840459976743023e-07, "logits/chosen": 0.6185115575790405, "logits/rejected": 0.5693656206130981, "logps/chosen": -109.0198974609375, "logps/ref_chosen": -59.25416564941406, "logps/ref_rejected": -85.58709716796875, "logps/rejected": -157.0123291015625, "loss": 1.0654, "margin_dpo/margin_mean": 21.659503936767578, "margin_dpo/margin_std": 29.831501007080078, "step": 258 }, { "epoch": 0.3915343915343915, "fcm_dpo/beta": 0.02049821801483631, "fcm_dpo/delta": -0.3130166530609131, "fcm_dpo/margin": 33.63237762451172, "fcm_dpo/q_t": 0.3474535346031189, "grad_norm": 14.128410339355469, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 0.5682635307312012, "logits/rejected": 0.4675600528717041, "logps/chosen": -106.17240142822266, "logps/ref_chosen": -65.43487548828125, "logps/ref_rejected": -95.41731262207031, "logps/rejected": -169.78720092773438, "loss": 0.9589, "margin_dpo/margin_mean": 33.63237762451172, "margin_dpo/margin_std": 39.22385787963867, "step": 259 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.01985359564423561, "fcm_dpo/delta": -0.10949759185314178, "fcm_dpo/margin": 25.3404541015625, "fcm_dpo/q_t": 0.3917066156864166, "grad_norm": 16.248342514038086, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.5878227949142456, "logits/rejected": 0.5052345991134644, "logps/chosen": -91.03826141357422, "logps/ref_chosen": -49.08958435058594, "logps/ref_rejected": -79.01708221435547, "logps/rejected": -146.30621337890625, "loss": 1.0994, "margin_dpo/margin_mean": 25.3404541015625, "margin_dpo/margin_std": 41.39429473876953, "step": 260 }, { "epoch": 0.3945578231292517, "fcm_dpo/beta": 0.01975172758102417, "fcm_dpo/delta": -0.02371894381940365, "fcm_dpo/margin": 21.40045928955078, "fcm_dpo/q_t": 0.40781357884407043, "grad_norm": 17.527528762817383, "learning_rate": 3.806810054678331e-07, "logits/chosen": 0.4745985269546509, "logits/rejected": 0.5027123689651489, "logps/chosen": -113.92041778564453, "logps/ref_chosen": -70.87239074707031, "logps/ref_rejected": -65.01522064208984, "logps/rejected": -129.4636993408203, "loss": 1.125, "margin_dpo/margin_mean": 21.40045928955078, "margin_dpo/margin_std": 36.23526382446289, "step": 261 }, { "epoch": 0.3960695389266818, "fcm_dpo/beta": 0.019550006836652756, "fcm_dpo/delta": -0.07667741179466248, "fcm_dpo/margin": 24.20038604736328, "fcm_dpo/q_t": 0.39495140314102173, "grad_norm": 14.670170783996582, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 0.6521082520484924, "logits/rejected": 0.5909339189529419, "logps/chosen": -110.21141052246094, "logps/ref_chosen": -67.8706283569336, "logps/ref_rejected": -88.7205810546875, "logps/rejected": -155.26174926757812, "loss": 1.0755, "margin_dpo/margin_mean": 24.20038604736328, "margin_dpo/margin_std": 34.9122428894043, "step": 262 }, { "epoch": 0.3975812547241119, "fcm_dpo/beta": 0.019483327865600586, "fcm_dpo/delta": -0.007016682997345924, "fcm_dpo/margin": 20.865982055664062, "fcm_dpo/q_t": 0.4093659520149231, "grad_norm": 15.283248901367188, "learning_rate": 3.784193478933516e-07, "logits/chosen": 0.5626658797264099, "logits/rejected": 0.45943546295166016, "logps/chosen": -96.49991607666016, "logps/ref_chosen": -55.194583892822266, "logps/ref_rejected": -80.54048156738281, "logps/rejected": -142.7117919921875, "loss": 1.1333, "margin_dpo/margin_mean": 20.865982055664062, "margin_dpo/margin_std": 35.687191009521484, "step": 263 }, { "epoch": 0.39909297052154197, "fcm_dpo/beta": 0.019220834597945213, "fcm_dpo/delta": -0.060912348330020905, "fcm_dpo/margin": 23.83148193359375, "fcm_dpo/q_t": 0.3992775082588196, "grad_norm": 15.10071086883545, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 0.48142877221107483, "logits/rejected": 0.4368743300437927, "logps/chosen": -126.1361312866211, "logps/ref_chosen": -83.17068481445312, "logps/ref_rejected": -88.33625793457031, "logps/rejected": -155.1331787109375, "loss": 1.1012, "margin_dpo/margin_mean": 23.83148193359375, "margin_dpo/margin_std": 37.955535888671875, "step": 264 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.01891726814210415, "fcm_dpo/delta": -0.1391075700521469, "fcm_dpo/margin": 28.10588836669922, "fcm_dpo/q_t": 0.3853192925453186, "grad_norm": 14.972970008850098, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.6752257347106934, "logits/rejected": 0.6097507476806641, "logps/chosen": -91.71566772460938, "logps/ref_chosen": -51.66284942626953, "logps/ref_rejected": -67.1720962524414, "logps/rejected": -135.330810546875, "loss": 1.0868, "margin_dpo/margin_mean": 28.105884552001953, "margin_dpo/margin_std": 44.18812942504883, "step": 265 }, { "epoch": 0.4021164021164021, "fcm_dpo/beta": 0.018389977514743805, "fcm_dpo/delta": -0.07914341986179352, "fcm_dpo/margin": 25.835094451904297, "fcm_dpo/q_t": 0.39529335498809814, "grad_norm": 15.214395523071289, "learning_rate": 3.75e-07, "logits/chosen": 0.5970737934112549, "logits/rejected": 0.5253136157989502, "logps/chosen": -95.74099731445312, "logps/ref_chosen": -57.45049285888672, "logps/ref_rejected": -77.60826110839844, "logps/rejected": -141.73385620117188, "loss": 1.0845, "margin_dpo/margin_mean": 25.835094451904297, "margin_dpo/margin_std": 39.158912658691406, "step": 266 }, { "epoch": 0.4036281179138322, "fcm_dpo/beta": 0.018644969910383224, "fcm_dpo/delta": 0.04022669047117233, "fcm_dpo/margin": 19.258594512939453, "fcm_dpo/q_t": 0.4202066957950592, "grad_norm": 15.600641250610352, "learning_rate": 3.738531817228131e-07, "logits/chosen": 0.6086355447769165, "logits/rejected": 0.5916974544525146, "logps/chosen": -88.30766296386719, "logps/ref_chosen": -55.03535079956055, "logps/ref_rejected": -66.0953369140625, "logps/rejected": -118.62623596191406, "loss": 1.1904, "margin_dpo/margin_mean": 19.25859260559082, "margin_dpo/margin_std": 38.7248420715332, "step": 267 }, { "epoch": 0.4051398337112623, "fcm_dpo/beta": 0.018376577645540237, "fcm_dpo/delta": -0.06248517334461212, "fcm_dpo/margin": 25.012699127197266, "fcm_dpo/q_t": 0.39753156900405884, "grad_norm": 13.161920547485352, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 0.4666864275932312, "logits/rejected": 0.4518453776836395, "logps/chosen": -98.39913940429688, "logps/ref_chosen": -65.07174682617188, "logps/ref_rejected": -71.42485809326172, "logps/rejected": -129.76495361328125, "loss": 1.0667, "margin_dpo/margin_mean": 25.012699127197266, "margin_dpo/margin_std": 33.614540100097656, "step": 268 }, { "epoch": 0.40665154950869237, "fcm_dpo/beta": 0.018133126199245453, "fcm_dpo/delta": -0.10752344131469727, "fcm_dpo/margin": 27.66704559326172, "fcm_dpo/q_t": 0.38884609937667847, "grad_norm": 14.423748016357422, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 0.7032470703125, "logits/rejected": 0.6554895639419556, "logps/chosen": -105.76145935058594, "logps/ref_chosen": -67.1362075805664, "logps/ref_rejected": -82.55778503417969, "logps/rejected": -148.85006713867188, "loss": 1.0541, "margin_dpo/margin_mean": 27.66704559326172, "margin_dpo/margin_std": 36.86224365234375, "step": 269 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.017861198633909225, "fcm_dpo/delta": 0.01838039606809616, "fcm_dpo/margin": 21.38981819152832, "fcm_dpo/q_t": 0.4144758880138397, "grad_norm": 14.856551170349121, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.6661429405212402, "logits/rejected": 0.5874545574188232, "logps/chosen": -105.91307067871094, "logps/ref_chosen": -66.6886978149414, "logps/ref_rejected": -85.16129302978516, "logps/rejected": -145.77548217773438, "loss": 1.1569, "margin_dpo/margin_mean": 21.38981819152832, "margin_dpo/margin_std": 39.64323806762695, "step": 270 }, { "epoch": 0.40967498110355255, "fcm_dpo/beta": 0.018035490065813065, "fcm_dpo/delta": 0.05983828008174896, "fcm_dpo/margin": 18.96731185913086, "fcm_dpo/q_t": 0.4302310645580292, "grad_norm": 16.447065353393555, "learning_rate": 3.692315864546635e-07, "logits/chosen": 0.6576748490333557, "logits/rejected": 0.5956451892852783, "logps/chosen": -110.25750732421875, "logps/ref_chosen": -72.40754699707031, "logps/ref_rejected": -92.06311798095703, "logps/rejected": -148.88038635253906, "loss": 1.2239, "margin_dpo/margin_mean": 18.96731185913086, "margin_dpo/margin_std": 45.06271743774414, "step": 271 }, { "epoch": 0.41118669690098264, "fcm_dpo/beta": 0.017494186758995056, "fcm_dpo/delta": -0.27478447556495667, "fcm_dpo/margin": 37.57149124145508, "fcm_dpo/q_t": 0.351974219083786, "grad_norm": 16.66138458251953, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 0.6409205198287964, "logits/rejected": 0.657590389251709, "logps/chosen": -97.38768768310547, "logps/ref_chosen": -66.60140228271484, "logps/ref_rejected": -67.74340057373047, "logps/rejected": -136.10118103027344, "loss": 0.9218, "margin_dpo/margin_mean": 37.57149124145508, "margin_dpo/margin_std": 35.830421447753906, "step": 272 }, { "epoch": 0.4126984126984127, "fcm_dpo/beta": 0.01705990359187126, "fcm_dpo/delta": -0.038921333849430084, "fcm_dpo/margin": 25.61779022216797, "fcm_dpo/q_t": 0.4043377637863159, "grad_norm": 17.23778533935547, "learning_rate": 3.669006483223828e-07, "logits/chosen": 0.6578247547149658, "logits/rejected": 0.5909750461578369, "logps/chosen": -102.54705810546875, "logps/ref_chosen": -57.35487747192383, "logps/ref_rejected": -84.17168426513672, "logps/rejected": -154.98165893554688, "loss": 1.165, "margin_dpo/margin_mean": 25.617786407470703, "margin_dpo/margin_std": 50.37907791137695, "step": 273 }, { "epoch": 0.41421012849584277, "fcm_dpo/beta": 0.016863549128174782, "fcm_dpo/delta": -0.0922776535153389, "fcm_dpo/margin": 28.930538177490234, "fcm_dpo/q_t": 0.39474034309387207, "grad_norm": 14.447813034057617, "learning_rate": 3.657302579891656e-07, "logits/chosen": 0.4977789521217346, "logits/rejected": 0.48191457986831665, "logps/chosen": -104.21945190429688, "logps/ref_chosen": -59.64149475097656, "logps/ref_rejected": -68.29348754882812, "logps/rejected": -141.80197143554688, "loss": 1.1082, "margin_dpo/margin_mean": 28.930538177490234, "margin_dpo/margin_std": 48.47543716430664, "step": 274 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.01651889830827713, "fcm_dpo/delta": -0.11709671467542648, "fcm_dpo/margin": 30.950244903564453, "fcm_dpo/q_t": 0.385990709066391, "grad_norm": 14.784261703491211, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.6382570266723633, "logits/rejected": 0.5556176900863647, "logps/chosen": -96.62824249267578, "logps/ref_chosen": -53.26664352416992, "logps/ref_rejected": -73.84062194824219, "logps/rejected": -148.1524658203125, "loss": 1.0538, "margin_dpo/margin_mean": 30.950244903564453, "margin_dpo/margin_std": 42.797706604003906, "step": 275 }, { "epoch": 0.41723356009070295, "fcm_dpo/beta": 0.016219474375247955, "fcm_dpo/delta": -0.05831969529390335, "fcm_dpo/margin": 28.092674255371094, "fcm_dpo/q_t": 0.398104727268219, "grad_norm": 16.444677352905273, "learning_rate": 3.633797984793294e-07, "logits/chosen": 0.5698527097702026, "logits/rejected": 0.5378223657608032, "logps/chosen": -94.54618072509766, "logps/ref_chosen": -53.02079772949219, "logps/ref_rejected": -61.56678771972656, "logps/rejected": -131.18484497070312, "loss": 1.0929, "margin_dpo/margin_mean": 28.092670440673828, "margin_dpo/margin_std": 42.767860412597656, "step": 276 }, { "epoch": 0.41874527588813304, "fcm_dpo/beta": 0.01643621176481247, "fcm_dpo/delta": 0.09840987622737885, "fcm_dpo/margin": 18.521482467651367, "fcm_dpo/q_t": 0.43302056193351746, "grad_norm": 18.792949676513672, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 0.6819844245910645, "logits/rejected": 0.7085480690002441, "logps/chosen": -117.11235046386719, "logps/ref_chosen": -71.43299102783203, "logps/ref_rejected": -67.65852355957031, "logps/rejected": -131.85935974121094, "loss": 1.2177, "margin_dpo/margin_mean": 18.521484375, "margin_dpo/margin_std": 42.374908447265625, "step": 277 }, { "epoch": 0.42025699168556313, "fcm_dpo/beta": 0.01640717126429081, "fcm_dpo/delta": -0.07881483435630798, "fcm_dpo/margin": 28.937284469604492, "fcm_dpo/q_t": 0.3956737518310547, "grad_norm": 23.435312271118164, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 0.5685777068138123, "logits/rejected": 0.5195118188858032, "logps/chosen": -118.54839324951172, "logps/ref_chosen": -67.11076354980469, "logps/ref_rejected": -88.74851989746094, "logps/rejected": -169.12344360351562, "loss": 1.1033, "margin_dpo/margin_mean": 28.93728256225586, "margin_dpo/margin_std": 46.60326385498047, "step": 278 }, { "epoch": 0.4217687074829932, "fcm_dpo/beta": 0.01570543460547924, "fcm_dpo/delta": -0.23970243334770203, "fcm_dpo/margin": 39.80183410644531, "fcm_dpo/q_t": 0.3597272038459778, "grad_norm": 19.260663986206055, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 0.587124228477478, "logits/rejected": 0.5513536334037781, "logps/chosen": -91.71465301513672, "logps/ref_chosen": -54.49748611450195, "logps/ref_rejected": -70.42373657226562, "logps/rejected": -147.44273376464844, "loss": 0.9529, "margin_dpo/margin_mean": 39.80183029174805, "margin_dpo/margin_std": 41.225624084472656, "step": 279 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.014930122531950474, "fcm_dpo/delta": -0.24701552093029022, "fcm_dpo/margin": 42.298980712890625, "fcm_dpo/q_t": 0.35863977670669556, "grad_norm": 11.754053115844727, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.6465336084365845, "logits/rejected": 0.6081722378730774, "logps/chosen": -101.64505004882812, "logps/ref_chosen": -60.43281173706055, "logps/ref_rejected": -78.39051818847656, "logps/rejected": -161.9017333984375, "loss": 0.9399, "margin_dpo/margin_mean": 42.298980712890625, "margin_dpo/margin_std": 42.43488311767578, "step": 280 }, { "epoch": 0.42479213907785335, "fcm_dpo/beta": 0.014409145340323448, "fcm_dpo/delta": -0.15759529173374176, "fcm_dpo/margin": 38.107269287109375, "fcm_dpo/q_t": 0.37785613536834717, "grad_norm": 12.64098072052002, "learning_rate": 3.574487280222929e-07, "logits/chosen": 0.6242285966873169, "logits/rejected": 0.6461759805679321, "logps/chosen": -106.43411254882812, "logps/ref_chosen": -60.2820930480957, "logps/ref_rejected": -62.04009246826172, "logps/rejected": -146.29937744140625, "loss": 1.0218, "margin_dpo/margin_mean": 38.107269287109375, "margin_dpo/margin_std": 48.40364074707031, "step": 281 }, { "epoch": 0.42630385487528344, "fcm_dpo/beta": 0.014241490513086319, "fcm_dpo/delta": -0.07853139936923981, "fcm_dpo/margin": 33.2713737487793, "fcm_dpo/q_t": 0.39666303992271423, "grad_norm": 18.873950958251953, "learning_rate": 3.562533640600075e-07, "logits/chosen": 0.5803976058959961, "logits/rejected": 0.5337048172950745, "logps/chosen": -113.38694763183594, "logps/ref_chosen": -60.623924255371094, "logps/ref_rejected": -68.67400360107422, "logps/rejected": -154.70840454101562, "loss": 1.1035, "margin_dpo/margin_mean": 33.2713737487793, "margin_dpo/margin_std": 52.381553649902344, "step": 282 }, { "epoch": 0.42781557067271353, "fcm_dpo/beta": 0.014045214280486107, "fcm_dpo/delta": -0.02820678800344467, "fcm_dpo/margin": 30.39036750793457, "fcm_dpo/q_t": 0.4052136540412903, "grad_norm": 15.759411811828613, "learning_rate": 3.550550279627215e-07, "logits/chosen": 0.5947866439819336, "logits/rejected": 0.4988853335380554, "logps/chosen": -121.33143615722656, "logps/ref_chosen": -67.64775085449219, "logps/ref_rejected": -99.96835327148438, "logps/rejected": -184.04241943359375, "loss": 1.121, "margin_dpo/margin_mean": 30.390365600585938, "margin_dpo/margin_std": 50.31166076660156, "step": 283 }, { "epoch": 0.4293272864701436, "fcm_dpo/beta": 0.013790317811071873, "fcm_dpo/delta": -0.05972848832607269, "fcm_dpo/margin": 33.095237731933594, "fcm_dpo/q_t": 0.396755188703537, "grad_norm": 12.709166526794434, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 0.6604640483856201, "logits/rejected": 0.5998015403747559, "logps/chosen": -108.848388671875, "logps/ref_chosen": -56.96742630004883, "logps/ref_rejected": -86.36236572265625, "logps/rejected": -171.33856201171875, "loss": 1.0758, "margin_dpo/margin_mean": 33.095237731933594, "margin_dpo/margin_std": 46.18756866455078, "step": 284 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.013867860659956932, "fcm_dpo/delta": 0.004719622433185577, "fcm_dpo/margin": 28.496816635131836, "fcm_dpo/q_t": 0.4120190143585205, "grad_norm": 18.109210968017578, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.6804319620132446, "logits/rejected": 0.654731035232544, "logps/chosen": -136.22836303710938, "logps/ref_chosen": -71.65611267089844, "logps/ref_rejected": -81.63829803466797, "logps/rejected": -174.70738220214844, "loss": 1.1343, "margin_dpo/margin_mean": 28.496816635131836, "margin_dpo/margin_std": 48.14155578613281, "step": 285 }, { "epoch": 0.4323507180650038, "fcm_dpo/beta": 0.013354543596506119, "fcm_dpo/delta": -0.21594460308551788, "fcm_dpo/margin": 45.1060676574707, "fcm_dpo/q_t": 0.36516904830932617, "grad_norm": 14.074089050292969, "learning_rate": 3.514425224712835e-07, "logits/chosen": 0.5963453054428101, "logits/rejected": 0.5053284168243408, "logps/chosen": -119.87309265136719, "logps/ref_chosen": -61.07952117919922, "logps/ref_rejected": -91.28128051757812, "logps/rejected": -195.180908203125, "loss": 0.9671, "margin_dpo/margin_mean": 45.1060676574707, "margin_dpo/margin_std": 48.90450668334961, "step": 286 }, { "epoch": 0.43386243386243384, "fcm_dpo/beta": 0.012879462912678719, "fcm_dpo/delta": -0.19090059399604797, "fcm_dpo/margin": 45.031578063964844, "fcm_dpo/q_t": 0.36986613273620605, "grad_norm": 12.359694480895996, "learning_rate": 3.502326338516534e-07, "logits/chosen": 0.6899577379226685, "logits/rejected": 0.6542201042175293, "logps/chosen": -94.44883728027344, "logps/ref_chosen": -46.035789489746094, "logps/ref_rejected": -59.95293426513672, "logps/rejected": -153.39755249023438, "loss": 0.9868, "margin_dpo/margin_mean": 45.03157424926758, "margin_dpo/margin_std": 51.3031005859375, "step": 287 }, { "epoch": 0.43537414965986393, "fcm_dpo/beta": 0.012728270143270493, "fcm_dpo/delta": -0.017540642991662025, "fcm_dpo/margin": 32.744781494140625, "fcm_dpo/q_t": 0.40644484758377075, "grad_norm": 14.533074378967285, "learning_rate": 3.490199415097892e-07, "logits/chosen": 0.5365294218063354, "logits/rejected": 0.4841228723526001, "logps/chosen": -129.072021484375, "logps/ref_chosen": -65.3908462524414, "logps/ref_rejected": -88.53607940673828, "logps/rejected": -184.96202087402344, "loss": 1.1158, "margin_dpo/margin_mean": 32.744781494140625, "margin_dpo/margin_std": 52.35576629638672, "step": 288 }, { "epoch": 0.436885865457294, "fcm_dpo/beta": 0.012797607108950615, "fcm_dpo/delta": 0.02153756096959114, "fcm_dpo/margin": 29.594276428222656, "fcm_dpo/q_t": 0.4176866412162781, "grad_norm": 18.0996036529541, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 0.7580336928367615, "logits/rejected": 0.7205421924591064, "logps/chosen": -118.97895050048828, "logps/ref_chosen": -54.5936279296875, "logps/ref_rejected": -67.20855712890625, "logps/rejected": -161.1881561279297, "loss": 1.1574, "margin_dpo/margin_mean": 29.59427833557129, "margin_dpo/margin_std": 54.214630126953125, "step": 289 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.012573182582855225, "fcm_dpo/delta": -0.06713174283504486, "fcm_dpo/margin": 36.85968780517578, "fcm_dpo/q_t": 0.3965286612510681, "grad_norm": 16.59284019470215, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.7414308786392212, "logits/rejected": 0.6713223457336426, "logps/chosen": -135.12689208984375, "logps/ref_chosen": -61.38457489013672, "logps/ref_rejected": -91.92778015136719, "logps/rejected": -202.52978515625, "loss": 1.0945, "margin_dpo/margin_mean": 36.85968780517578, "margin_dpo/margin_std": 56.981605529785156, "step": 290 }, { "epoch": 0.4399092970521542, "fcm_dpo/beta": 0.012540910392999649, "fcm_dpo/delta": -0.08436623215675354, "fcm_dpo/margin": 38.22565841674805, "fcm_dpo/q_t": 0.3934711813926697, "grad_norm": 16.229829788208008, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 0.8189216256141663, "logits/rejected": 0.7507155537605286, "logps/chosen": -121.83723449707031, "logps/ref_chosen": -50.863037109375, "logps/ref_rejected": -82.20868682861328, "logps/rejected": -191.40853881835938, "loss": 1.0669, "margin_dpo/margin_mean": 38.22565841674805, "margin_dpo/margin_std": 51.421878814697266, "step": 291 }, { "epoch": 0.4414210128495843, "fcm_dpo/beta": 0.012391122058033943, "fcm_dpo/delta": 0.010787010192871094, "fcm_dpo/margin": 31.44447898864746, "fcm_dpo/q_t": 0.41242778301239014, "grad_norm": 15.73810863494873, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 0.6897194385528564, "logits/rejected": 0.6420219540596008, "logps/chosen": -134.3984375, "logps/ref_chosen": -64.34888458251953, "logps/ref_rejected": -72.86434173583984, "logps/rejected": -174.35838317871094, "loss": 1.1483, "margin_dpo/margin_mean": 31.444477081298828, "margin_dpo/margin_std": 56.412925720214844, "step": 292 }, { "epoch": 0.4429327286470144, "fcm_dpo/beta": 0.012167178094387054, "fcm_dpo/delta": -0.15894976258277893, "fcm_dpo/margin": 45.242408752441406, "fcm_dpo/q_t": 0.37786537408828735, "grad_norm": 11.61641788482666, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 0.7814351916313171, "logits/rejected": 0.6955462694168091, "logps/chosen": -124.15631103515625, "logps/ref_chosen": -54.869468688964844, "logps/ref_rejected": -81.858642578125, "logps/rejected": -196.3878936767578, "loss": 1.0418, "margin_dpo/margin_mean": 45.242408752441406, "margin_dpo/margin_std": 61.57768249511719, "step": 293 }, { "epoch": 0.4444444444444444, "fcm_dpo/beta": 0.012029530480504036, "fcm_dpo/delta": 0.03731568530201912, "fcm_dpo/margin": 30.24942970275879, "fcm_dpo/q_t": 0.4194733500480652, "grad_norm": 12.362147331237793, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 0.7301532030105591, "logits/rejected": 0.6848835945129395, "logps/chosen": -128.99664306640625, "logps/ref_chosen": -56.670902252197266, "logps/ref_rejected": -70.32819366455078, "logps/rejected": -172.9033660888672, "loss": 1.1386, "margin_dpo/margin_mean": 30.24942970275879, "margin_dpo/margin_std": 49.71393585205078, "step": 294 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.01216411404311657, "fcm_dpo/delta": 0.051958102732896805, "fcm_dpo/margin": 28.76223373413086, "fcm_dpo/q_t": 0.42246508598327637, "grad_norm": 19.552597045898438, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.6953170299530029, "logits/rejected": 0.5967302918434143, "logps/chosen": -128.7552032470703, "logps/ref_chosen": -50.40088653564453, "logps/ref_rejected": -83.43521881103516, "logps/rejected": -190.55178833007812, "loss": 1.1691, "margin_dpo/margin_mean": 28.762237548828125, "margin_dpo/margin_std": 54.42176055908203, "step": 295 }, { "epoch": 0.4474678760393046, "fcm_dpo/beta": 0.01211509294807911, "fcm_dpo/delta": -0.029373712837696075, "fcm_dpo/margin": 35.291038513183594, "fcm_dpo/q_t": 0.4047588109970093, "grad_norm": 13.090824127197266, "learning_rate": 3.392215553979679e-07, "logits/chosen": 0.6591260433197021, "logits/rejected": 0.6170543432235718, "logps/chosen": -146.8544921875, "logps/ref_chosen": -69.15034484863281, "logps/ref_rejected": -89.60166931152344, "logps/rejected": -202.59686279296875, "loss": 1.1128, "margin_dpo/margin_mean": 35.29104232788086, "margin_dpo/margin_std": 56.198814392089844, "step": 296 }, { "epoch": 0.4489795918367347, "fcm_dpo/beta": 0.012072188779711723, "fcm_dpo/delta": -0.08097002655267715, "fcm_dpo/margin": 39.51006317138672, "fcm_dpo/q_t": 0.3917747139930725, "grad_norm": 13.36133861541748, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 0.6727302074432373, "logits/rejected": 0.6169841289520264, "logps/chosen": -139.768798828125, "logps/ref_chosen": -58.01630401611328, "logps/ref_rejected": -69.95780944824219, "logps/rejected": -191.22036743164062, "loss": 1.05, "margin_dpo/margin_mean": 39.51006317138672, "margin_dpo/margin_std": 49.360862731933594, "step": 297 }, { "epoch": 0.4504913076341648, "fcm_dpo/beta": 0.012033342383801937, "fcm_dpo/delta": 0.04077546298503876, "fcm_dpo/margin": 29.974117279052734, "fcm_dpo/q_t": 0.42090824246406555, "grad_norm": 13.734335899353027, "learning_rate": 3.367463137189156e-07, "logits/chosen": 0.8173831701278687, "logits/rejected": 0.7571094036102295, "logps/chosen": -135.49334716796875, "logps/ref_chosen": -56.1693115234375, "logps/ref_rejected": -68.55052185058594, "logps/rejected": -177.84866333007812, "loss": 1.1813, "margin_dpo/margin_mean": 29.9741153717041, "margin_dpo/margin_std": 60.25572204589844, "step": 298 }, { "epoch": 0.4520030234315949, "fcm_dpo/beta": 0.012157764285802841, "fcm_dpo/delta": 0.08485768735408783, "fcm_dpo/margin": 26.14179229736328, "fcm_dpo/q_t": 0.42982053756713867, "grad_norm": 18.463979721069336, "learning_rate": 3.355050358314172e-07, "logits/chosen": 0.5963407754898071, "logits/rejected": 0.5696459412574768, "logps/chosen": -140.74490356445312, "logps/ref_chosen": -62.31780242919922, "logps/ref_rejected": -72.60028839111328, "logps/rejected": -177.169189453125, "loss": 1.2232, "margin_dpo/margin_mean": 26.14179229736328, "margin_dpo/margin_std": 59.85368347167969, "step": 299 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.012246577069163322, "fcm_dpo/delta": -0.016002114862203598, "fcm_dpo/margin": 33.91412353515625, "fcm_dpo/q_t": 0.407207727432251, "grad_norm": 13.91763973236084, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.700993537902832, "logits/rejected": 0.6361641883850098, "logps/chosen": -139.4217529296875, "logps/ref_chosen": -60.38157653808594, "logps/ref_rejected": -75.45442199707031, "logps/rejected": -188.40872192382812, "loss": 1.1333, "margin_dpo/margin_mean": 33.914119720458984, "margin_dpo/margin_std": 58.409393310546875, "step": 300 }, { "epoch": 0.455026455026455, "fcm_dpo/beta": 0.01223750039935112, "fcm_dpo/delta": 0.027313653379678726, "fcm_dpo/margin": 30.530223846435547, "fcm_dpo/q_t": 0.41623467206954956, "grad_norm": 15.060598373413086, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 0.7448295950889587, "logits/rejected": 0.7172563076019287, "logps/chosen": -129.93959045410156, "logps/ref_chosen": -52.85089111328125, "logps/ref_rejected": -69.97584533691406, "logps/rejected": -177.59475708007812, "loss": 1.155, "margin_dpo/margin_mean": 30.530223846435547, "margin_dpo/margin_std": 55.4693603515625, "step": 301 }, { "epoch": 0.4565381708238851, "fcm_dpo/beta": 0.012510240077972412, "fcm_dpo/delta": 0.12387596070766449, "fcm_dpo/margin": 22.355518341064453, "fcm_dpo/q_t": 0.437045693397522, "grad_norm": 18.828718185424805, "learning_rate": 3.317669908293554e-07, "logits/chosen": 0.5679184198379517, "logits/rejected": 0.5149757862091064, "logps/chosen": -148.1495819091797, "logps/ref_chosen": -66.96650695800781, "logps/ref_rejected": -88.09510803222656, "logps/rejected": -191.63369750976562, "loss": 1.2288, "margin_dpo/margin_mean": 22.355518341064453, "margin_dpo/margin_std": 53.01789474487305, "step": 302 }, { "epoch": 0.4580498866213152, "fcm_dpo/beta": 0.012377789244055748, "fcm_dpo/delta": -0.12394848465919495, "fcm_dpo/margin": 41.81725311279297, "fcm_dpo/q_t": 0.38504043221473694, "grad_norm": 12.159745216369629, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 0.6651922464370728, "logits/rejected": 0.6016709804534912, "logps/chosen": -133.10816955566406, "logps/ref_chosen": -62.12152862548828, "logps/ref_rejected": -90.31204223632812, "logps/rejected": -203.11593627929688, "loss": 1.0636, "margin_dpo/margin_mean": 41.81725311279297, "margin_dpo/margin_std": 60.54005432128906, "step": 303 }, { "epoch": 0.4595616024187453, "fcm_dpo/beta": 0.012083902955055237, "fcm_dpo/delta": -0.10522289574146271, "fcm_dpo/margin": 41.36079406738281, "fcm_dpo/q_t": 0.3844369053840637, "grad_norm": 13.707260131835938, "learning_rate": 3.292634667444117e-07, "logits/chosen": 0.6590306758880615, "logits/rejected": 0.6075633764266968, "logps/chosen": -122.18032836914062, "logps/ref_chosen": -60.695091247558594, "logps/ref_rejected": -78.2525405883789, "logps/rejected": -181.09857177734375, "loss": 1.0229, "margin_dpo/margin_mean": 41.36079406738281, "margin_dpo/margin_std": 46.75843048095703, "step": 304 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.012013021856546402, "fcm_dpo/delta": 0.019373510032892227, "fcm_dpo/margin": 31.71729278564453, "fcm_dpo/q_t": 0.4142247438430786, "grad_norm": 13.076567649841309, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.6364206671714783, "logits/rejected": 0.6650691032409668, "logps/chosen": -144.49835205078125, "logps/ref_chosen": -72.69914245605469, "logps/ref_rejected": -65.65670776367188, "logps/rejected": -169.1732177734375, "loss": 1.1557, "margin_dpo/margin_mean": 31.71729278564453, "margin_dpo/margin_std": 57.52043151855469, "step": 305 }, { "epoch": 0.46258503401360546, "fcm_dpo/beta": 0.012034446001052856, "fcm_dpo/delta": 0.01424664631485939, "fcm_dpo/margin": 32.047447204589844, "fcm_dpo/q_t": 0.41186851263046265, "grad_norm": 13.938604354858398, "learning_rate": 3.267510740432719e-07, "logits/chosen": 0.723272442817688, "logits/rejected": 0.611417293548584, "logps/chosen": -121.28661346435547, "logps/ref_chosen": -53.97052764892578, "logps/ref_rejected": -71.02423095703125, "logps/rejected": -170.3877716064453, "loss": 1.1107, "margin_dpo/margin_mean": 32.047447204589844, "margin_dpo/margin_std": 45.39753723144531, "step": 306 }, { "epoch": 0.46409674981103555, "fcm_dpo/beta": 0.012543787248432636, "fcm_dpo/delta": 0.20474562048912048, "fcm_dpo/margin": 15.797273635864258, "fcm_dpo/q_t": 0.45768019556999207, "grad_norm": 17.678245544433594, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 0.7229036092758179, "logits/rejected": 0.6749166250228882, "logps/chosen": -118.67990112304688, "logps/ref_chosen": -57.413108825683594, "logps/ref_rejected": -68.68010711669922, "logps/rejected": -145.74417114257812, "loss": 1.3206, "margin_dpo/margin_mean": 15.79727554321289, "margin_dpo/margin_std": 57.190589904785156, "step": 307 }, { "epoch": 0.4656084656084656, "fcm_dpo/beta": 0.012601923197507858, "fcm_dpo/delta": 0.013587992638349533, "fcm_dpo/margin": 30.687833786010742, "fcm_dpo/q_t": 0.41257303953170776, "grad_norm": 11.740730285644531, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 0.6801770925521851, "logits/rejected": 0.6689407825469971, "logps/chosen": -126.86598205566406, "logps/ref_chosen": -66.59879302978516, "logps/ref_rejected": -74.337158203125, "logps/rejected": -165.29217529296875, "loss": 1.1438, "margin_dpo/margin_mean": 30.68783187866211, "margin_dpo/margin_std": 53.468421936035156, "step": 308 }, { "epoch": 0.4671201814058957, "fcm_dpo/beta": 0.01260319072753191, "fcm_dpo/delta": -0.002130165696144104, "fcm_dpo/margin": 31.87583351135254, "fcm_dpo/q_t": 0.40878164768218994, "grad_norm": 12.210992813110352, "learning_rate": 3.229664715194511e-07, "logits/chosen": 0.7181951999664307, "logits/rejected": 0.6613823175430298, "logps/chosen": -134.88624572753906, "logps/ref_chosen": -65.39474487304688, "logps/ref_rejected": -75.70930480957031, "logps/rejected": -177.07664489746094, "loss": 1.1086, "margin_dpo/margin_mean": 31.875831604003906, "margin_dpo/margin_std": 47.10810089111328, "step": 309 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.012920012697577477, "fcm_dpo/delta": 0.15046632289886475, "fcm_dpo/margin": 19.62242889404297, "fcm_dpo/q_t": 0.44542592763900757, "grad_norm": 14.362406730651855, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.6988915205001831, "logits/rejected": 0.6873372793197632, "logps/chosen": -144.29745483398438, "logps/ref_chosen": -74.66827392578125, "logps/ref_rejected": -80.5689697265625, "logps/rejected": -169.82058715820312, "loss": 1.2524, "margin_dpo/margin_mean": 19.62242889404297, "margin_dpo/margin_std": 51.30465316772461, "step": 310 }, { "epoch": 0.47014361300075586, "fcm_dpo/beta": 0.012975428253412247, "fcm_dpo/delta": 0.00755208358168602, "fcm_dpo/margin": 30.218578338623047, "fcm_dpo/q_t": 0.4122556149959564, "grad_norm": 15.364665985107422, "learning_rate": 3.204331392103574e-07, "logits/chosen": 0.6145889163017273, "logits/rejected": 0.4769431948661804, "logps/chosen": -116.09998321533203, "logps/ref_chosen": -59.738033294677734, "logps/ref_rejected": -93.60757446289062, "logps/rejected": -180.1881103515625, "loss": 1.126, "margin_dpo/margin_mean": 30.21858024597168, "margin_dpo/margin_std": 48.436500549316406, "step": 311 }, { "epoch": 0.47165532879818595, "fcm_dpo/beta": 0.012864358723163605, "fcm_dpo/delta": -0.1185460314154625, "fcm_dpo/margin": 39.848915100097656, "fcm_dpo/q_t": 0.38267982006073, "grad_norm": 13.564282417297363, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 0.6986439228057861, "logits/rejected": 0.6026472449302673, "logps/chosen": -112.68070983886719, "logps/ref_chosen": -53.816436767578125, "logps/ref_rejected": -68.6575698852539, "logps/rejected": -167.37075805664062, "loss": 1.0189, "margin_dpo/margin_mean": 39.848915100097656, "margin_dpo/margin_std": 45.942588806152344, "step": 312 }, { "epoch": 0.47316704459561604, "fcm_dpo/beta": 0.012997419573366642, "fcm_dpo/delta": 0.1283135563135147, "fcm_dpo/margin": 21.175430297851562, "fcm_dpo/q_t": 0.44016578793525696, "grad_norm": 12.463242530822754, "learning_rate": 3.178919262911314e-07, "logits/chosen": 0.7597650289535522, "logits/rejected": 0.738747775554657, "logps/chosen": -119.58285522460938, "logps/ref_chosen": -59.957359313964844, "logps/ref_rejected": -69.31729888916016, "logps/rejected": -150.11822509765625, "loss": 1.2264, "margin_dpo/margin_mean": 21.175430297851562, "margin_dpo/margin_std": 49.33220291137695, "step": 313 }, { "epoch": 0.47467876039304613, "fcm_dpo/beta": 0.012764360755681992, "fcm_dpo/delta": -0.14646798372268677, "fcm_dpo/margin": 42.150630950927734, "fcm_dpo/q_t": 0.38041582703590393, "grad_norm": 12.823559761047363, "learning_rate": 3.166184534225087e-07, "logits/chosen": 0.6917102336883545, "logits/rejected": 0.7238273024559021, "logps/chosen": -127.27233123779297, "logps/ref_chosen": -70.26815795898438, "logps/ref_rejected": -69.23971557617188, "logps/rejected": -168.39451599121094, "loss": 1.02, "margin_dpo/margin_mean": 42.15062713623047, "margin_dpo/margin_std": 51.92638397216797, "step": 314 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.012742714956402779, "fcm_dpo/delta": -0.01833246648311615, "fcm_dpo/margin": 32.727691650390625, "fcm_dpo/q_t": 0.4048681855201721, "grad_norm": 12.803494453430176, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.6166698932647705, "logits/rejected": 0.5790517926216125, "logps/chosen": -129.92495727539062, "logps/ref_chosen": -67.79469299316406, "logps/ref_rejected": -74.55148315429688, "logps/rejected": -169.409423828125, "loss": 1.1, "margin_dpo/margin_mean": 32.72768783569336, "margin_dpo/margin_std": 47.19148635864258, "step": 315 }, { "epoch": 0.47770219198790626, "fcm_dpo/beta": 0.012548735365271568, "fcm_dpo/delta": -0.12661194801330566, "fcm_dpo/margin": 41.3825569152832, "fcm_dpo/q_t": 0.38153761625289917, "grad_norm": 13.01544189453125, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 0.5967949628829956, "logits/rejected": 0.4676669239997864, "logps/chosen": -113.19938659667969, "logps/ref_chosen": -55.288482666015625, "logps/ref_rejected": -96.15723419189453, "logps/rejected": -195.45069885253906, "loss": 1.0151, "margin_dpo/margin_mean": 41.38255310058594, "margin_dpo/margin_std": 44.43248748779297, "step": 316 }, { "epoch": 0.47921390778533635, "fcm_dpo/beta": 0.012046756222844124, "fcm_dpo/delta": -0.13969969749450684, "fcm_dpo/margin": 44.12655258178711, "fcm_dpo/q_t": 0.3790159225463867, "grad_norm": 17.531597137451172, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 0.7275354862213135, "logits/rejected": 0.6814507246017456, "logps/chosen": -109.85079956054688, "logps/ref_chosen": -54.58137512207031, "logps/ref_rejected": -72.77232360839844, "logps/rejected": -172.16830444335938, "loss": 1.0104, "margin_dpo/margin_mean": 44.126548767089844, "margin_dpo/margin_std": 50.9593505859375, "step": 317 }, { "epoch": 0.48072562358276644, "fcm_dpo/beta": 0.012052427977323532, "fcm_dpo/delta": 0.03859718143939972, "fcm_dpo/margin": 30.102909088134766, "fcm_dpo/q_t": 0.4214034080505371, "grad_norm": 13.89206600189209, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 0.7386119365692139, "logits/rejected": 0.6694012880325317, "logps/chosen": -116.91323852539062, "logps/ref_chosen": -52.88822937011719, "logps/ref_rejected": -80.63988494873047, "logps/rejected": -174.76779174804688, "loss": 1.1896, "margin_dpo/margin_mean": 30.102909088134766, "margin_dpo/margin_std": 63.038963317871094, "step": 318 }, { "epoch": 0.48223733938019653, "fcm_dpo/beta": 0.011894501745700836, "fcm_dpo/delta": -0.11431370675563812, "fcm_dpo/margin": 42.753143310546875, "fcm_dpo/q_t": 0.38717061281204224, "grad_norm": 14.289010047912598, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 0.6626858115196228, "logits/rejected": 0.647432804107666, "logps/chosen": -124.62028503417969, "logps/ref_chosen": -64.36333465576172, "logps/ref_rejected": -79.47296142578125, "logps/rejected": -182.48306274414062, "loss": 1.0501, "margin_dpo/margin_mean": 42.753143310546875, "margin_dpo/margin_std": 58.25758361816406, "step": 319 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.011949008330702782, "fcm_dpo/delta": -0.012487806379795074, "fcm_dpo/margin": 34.2347526550293, "fcm_dpo/q_t": 0.4102562665939331, "grad_norm": 15.246618270874023, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.5578382015228271, "logits/rejected": 0.5187278389930725, "logps/chosen": -108.69810485839844, "logps/ref_chosen": -49.558746337890625, "logps/ref_rejected": -71.23444366455078, "logps/rejected": -164.60855102539062, "loss": 1.1317, "margin_dpo/margin_mean": 34.2347526550293, "margin_dpo/margin_std": 54.77796173095703, "step": 320 }, { "epoch": 0.4852607709750567, "fcm_dpo/beta": 0.011591393500566483, "fcm_dpo/delta": -0.059746138751506805, "fcm_dpo/margin": 39.27128601074219, "fcm_dpo/q_t": 0.3966837525367737, "grad_norm": 19.282135009765625, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 0.673438549041748, "logits/rejected": 0.6589595675468445, "logps/chosen": -115.71333312988281, "logps/ref_chosen": -52.08526611328125, "logps/ref_rejected": -55.58674621582031, "logps/rejected": -158.486083984375, "loss": 1.0935, "margin_dpo/margin_mean": 39.27128219604492, "margin_dpo/margin_std": 57.47880554199219, "step": 321 }, { "epoch": 0.48677248677248675, "fcm_dpo/beta": 0.011424287222325802, "fcm_dpo/delta": -0.14884579181671143, "fcm_dpo/margin": 47.356597900390625, "fcm_dpo/q_t": 0.3772934675216675, "grad_norm": 12.958308219909668, "learning_rate": 3.063665887884511e-07, "logits/chosen": 0.7031540870666504, "logits/rejected": 0.6229244470596313, "logps/chosen": -121.94970703125, "logps/ref_chosen": -47.404109954833984, "logps/ref_rejected": -73.4260025024414, "logps/rejected": -195.3282012939453, "loss": 1.0104, "margin_dpo/margin_mean": 47.356597900390625, "margin_dpo/margin_std": 55.74169921875, "step": 322 }, { "epoch": 0.48828420256991684, "fcm_dpo/beta": 0.011438079178333282, "fcm_dpo/delta": 0.051193639636039734, "fcm_dpo/margin": 30.61416244506836, "fcm_dpo/q_t": 0.42455974221229553, "grad_norm": 14.519761085510254, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 0.6318315267562866, "logits/rejected": 0.5518868565559387, "logps/chosen": -146.78964233398438, "logps/ref_chosen": -70.00630187988281, "logps/ref_rejected": -86.96690368652344, "logps/rejected": -194.36441040039062, "loss": 1.2003, "margin_dpo/margin_mean": 30.61416244506836, "margin_dpo/margin_std": 66.47247314453125, "step": 323 }, { "epoch": 0.4897959183673469, "fcm_dpo/beta": 0.011217910796403885, "fcm_dpo/delta": -0.10228747129440308, "fcm_dpo/margin": 44.276397705078125, "fcm_dpo/q_t": 0.38728103041648865, "grad_norm": 18.26462173461914, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 0.7387030124664307, "logits/rejected": 0.6826291680335999, "logps/chosen": -121.74503326416016, "logps/ref_chosen": -55.88882064819336, "logps/ref_rejected": -75.23088073730469, "logps/rejected": -185.36349487304688, "loss": 1.0356, "margin_dpo/margin_mean": 44.276397705078125, "margin_dpo/margin_std": 54.363380432128906, "step": 324 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.011081516742706299, "fcm_dpo/delta": -0.03910160809755325, "fcm_dpo/margin": 39.42148208618164, "fcm_dpo/q_t": 0.4006548821926117, "grad_norm": 16.26625633239746, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.6771467924118042, "logits/rejected": 0.61439049243927, "logps/chosen": -147.13021850585938, "logps/ref_chosen": -64.14701843261719, "logps/ref_rejected": -79.91143798828125, "logps/rejected": -202.31610107421875, "loss": 1.1056, "margin_dpo/margin_mean": 39.421478271484375, "margin_dpo/margin_std": 61.576148986816406, "step": 325 }, { "epoch": 0.4928193499622071, "fcm_dpo/beta": 0.011371839791536331, "fcm_dpo/delta": 0.17452731728553772, "fcm_dpo/margin": 20.207664489746094, "fcm_dpo/q_t": 0.4494830369949341, "grad_norm": 14.90293025970459, "learning_rate": 3.012016670162977e-07, "logits/chosen": 0.6311044096946716, "logits/rejected": 0.6375623345375061, "logps/chosen": -172.97830200195312, "logps/ref_chosen": -75.53131103515625, "logps/ref_rejected": -76.5898666381836, "logps/rejected": -194.24452209472656, "loss": 1.2857, "margin_dpo/margin_mean": 20.207664489746094, "margin_dpo/margin_std": 60.42800521850586, "step": 326 }, { "epoch": 0.4943310657596372, "fcm_dpo/beta": 0.011601308360695839, "fcm_dpo/delta": 0.0348714217543602, "fcm_dpo/margin": 31.55461883544922, "fcm_dpo/q_t": 0.4197550117969513, "grad_norm": 17.706899642944336, "learning_rate": 2.99906765620341e-07, "logits/chosen": 0.6011782288551331, "logits/rejected": 0.5678955316543579, "logps/chosen": -162.18263244628906, "logps/ref_chosen": -69.33717346191406, "logps/ref_rejected": -73.37751770019531, "logps/rejected": -197.77761840820312, "loss": 1.1775, "margin_dpo/margin_mean": 31.55462074279785, "margin_dpo/margin_std": 62.52714157104492, "step": 327 }, { "epoch": 0.4958427815570673, "fcm_dpo/beta": 0.011501701548695564, "fcm_dpo/delta": -0.05493466556072235, "fcm_dpo/margin": 39.34053421020508, "fcm_dpo/q_t": 0.39944905042648315, "grad_norm": 13.670370101928711, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 0.6204825639724731, "logits/rejected": 0.5904245376586914, "logps/chosen": -141.41159057617188, "logps/ref_chosen": -61.70623016357422, "logps/ref_rejected": -83.73808288574219, "logps/rejected": -202.78399658203125, "loss": 1.0846, "margin_dpo/margin_mean": 39.34053421020508, "margin_dpo/margin_std": 57.092227935791016, "step": 328 }, { "epoch": 0.4973544973544973, "fcm_dpo/beta": 0.011362850666046143, "fcm_dpo/delta": -0.056787166744470596, "fcm_dpo/margin": 39.974037170410156, "fcm_dpo/q_t": 0.3996536433696747, "grad_norm": 17.699430465698242, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 0.7306606769561768, "logits/rejected": 0.6753987073898315, "logps/chosen": -154.018310546875, "logps/ref_chosen": -64.4984130859375, "logps/ref_rejected": -83.6591796875, "logps/rejected": -213.15310668945312, "loss": 1.0851, "margin_dpo/margin_mean": 39.974037170410156, "margin_dpo/margin_std": 58.887046813964844, "step": 329 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.011202252469956875, "fcm_dpo/delta": -0.10586293041706085, "fcm_dpo/margin": 44.688682556152344, "fcm_dpo/q_t": 0.3895889222621918, "grad_norm": 13.673934936523438, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.6697182059288025, "logits/rejected": 0.6038362979888916, "logps/chosen": -131.23902893066406, "logps/ref_chosen": -54.80464172363281, "logps/ref_rejected": -75.3194351196289, "logps/rejected": -196.4425048828125, "loss": 1.097, "margin_dpo/margin_mean": 44.688682556152344, "margin_dpo/margin_std": 71.45393371582031, "step": 330 }, { "epoch": 0.5003779289493575, "fcm_dpo/beta": 0.010804209858179092, "fcm_dpo/delta": -0.1956481635570526, "fcm_dpo/margin": 54.106658935546875, "fcm_dpo/q_t": 0.3688202500343323, "grad_norm": 13.366751670837402, "learning_rate": 2.947135628327544e-07, "logits/chosen": 0.8018543720245361, "logits/rejected": 0.7764079570770264, "logps/chosen": -142.2760009765625, "logps/ref_chosen": -59.242584228515625, "logps/ref_rejected": -69.87483215332031, "logps/rejected": -207.01490783691406, "loss": 1.0069, "margin_dpo/margin_mean": 54.106658935546875, "margin_dpo/margin_std": 67.39854431152344, "step": 331 }, { "epoch": 0.5018896447467877, "fcm_dpo/beta": 0.010633476078510284, "fcm_dpo/delta": -0.07682856917381287, "fcm_dpo/margin": 44.427947998046875, "fcm_dpo/q_t": 0.39411279559135437, "grad_norm": 14.242164611816406, "learning_rate": 2.934120444167326e-07, "logits/chosen": 0.6497039198875427, "logits/rejected": 0.6062981486320496, "logps/chosen": -151.68014526367188, "logps/ref_chosen": -67.10975646972656, "logps/ref_rejected": -77.11839294433594, "logps/rejected": -206.11672973632812, "loss": 1.0636, "margin_dpo/margin_mean": 44.427947998046875, "margin_dpo/margin_std": 58.21818542480469, "step": 332 }, { "epoch": 0.5034013605442177, "fcm_dpo/beta": 0.010376621037721634, "fcm_dpo/delta": -0.09302500635385513, "fcm_dpo/margin": 47.08435821533203, "fcm_dpo/q_t": 0.38988494873046875, "grad_norm": 13.075955390930176, "learning_rate": 2.921093116725076e-07, "logits/chosen": 0.6993060111999512, "logits/rejected": 0.6273704171180725, "logps/chosen": -148.47674560546875, "logps/ref_chosen": -58.381134033203125, "logps/ref_rejected": -85.02839660644531, "logps/rejected": -222.20835876464844, "loss": 1.0433, "margin_dpo/margin_mean": 47.08435821533203, "margin_dpo/margin_std": 59.184940338134766, "step": 333 }, { "epoch": 0.5049130763416477, "fcm_dpo/beta": 0.010399187915027142, "fcm_dpo/delta": 0.047791529446840286, "fcm_dpo/margin": 34.018856048583984, "fcm_dpo/q_t": 0.4216611981391907, "grad_norm": 13.354299545288086, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 0.6879534721374512, "logits/rejected": 0.6454561948776245, "logps/chosen": -152.24517822265625, "logps/ref_chosen": -66.89199829101562, "logps/ref_rejected": -91.83695220947266, "logps/rejected": -211.208984375, "loss": 1.1831, "margin_dpo/margin_mean": 34.018856048583984, "margin_dpo/margin_std": 68.88700103759766, "step": 334 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.010456325486302376, "fcm_dpo/delta": -0.0019730515778064728, "fcm_dpo/margin": 38.389739990234375, "fcm_dpo/q_t": 0.4113653004169464, "grad_norm": 18.998254776000977, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.7184223532676697, "logits/rejected": 0.6815344095230103, "logps/chosen": -148.72567749023438, "logps/ref_chosen": -61.51445770263672, "logps/ref_rejected": -75.68916320800781, "logps/rejected": -201.29013061523438, "loss": 1.1365, "margin_dpo/margin_mean": 38.389739990234375, "margin_dpo/margin_std": 65.81454467773438, "step": 335 }, { "epoch": 0.5079365079365079, "fcm_dpo/beta": 0.010317089036107063, "fcm_dpo/delta": -0.008530773222446442, "fcm_dpo/margin": 39.463714599609375, "fcm_dpo/q_t": 0.4097328782081604, "grad_norm": 12.17556095123291, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 0.6476879119873047, "logits/rejected": 0.6272980570793152, "logps/chosen": -165.22909545898438, "logps/ref_chosen": -68.85006713867188, "logps/ref_rejected": -92.99603271484375, "logps/rejected": -228.8387908935547, "loss": 1.1198, "margin_dpo/margin_mean": 39.463714599609375, "margin_dpo/margin_std": 62.35620880126953, "step": 336 }, { "epoch": 0.509448223733938, "fcm_dpo/beta": 0.01051211729645729, "fcm_dpo/delta": 0.07588024437427521, "fcm_dpo/margin": 31.061817169189453, "fcm_dpo/q_t": 0.42595547437667847, "grad_norm": 12.565362930297852, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 0.5867069959640503, "logits/rejected": 0.5546263456344604, "logps/chosen": -172.4549560546875, "logps/ref_chosen": -73.18783569335938, "logps/ref_rejected": -86.89118957519531, "logps/rejected": -217.22012329101562, "loss": 1.1796, "margin_dpo/margin_mean": 31.06181526184082, "margin_dpo/margin_std": 60.04143142700195, "step": 337 }, { "epoch": 0.5109599395313681, "fcm_dpo/beta": 0.010581170208752155, "fcm_dpo/delta": 0.03785444423556328, "fcm_dpo/margin": 34.35435104370117, "fcm_dpo/q_t": 0.41939669847488403, "grad_norm": 13.616081237792969, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 0.663311243057251, "logits/rejected": 0.6381123661994934, "logps/chosen": -158.74578857421875, "logps/ref_chosen": -63.939613342285156, "logps/ref_rejected": -75.34243774414062, "logps/rejected": -204.50296020507812, "loss": 1.1652, "margin_dpo/margin_mean": 34.35435104370117, "margin_dpo/margin_std": 64.8512954711914, "step": 338 }, { "epoch": 0.5124716553287982, "fcm_dpo/beta": 0.010615767911076546, "fcm_dpo/delta": 0.007883191108703613, "fcm_dpo/margin": 36.96049499511719, "fcm_dpo/q_t": 0.4113204777240753, "grad_norm": 12.145761489868164, "learning_rate": 2.842694572172736e-07, "logits/chosen": 0.8048558235168457, "logits/rejected": 0.7194130420684814, "logps/chosen": -124.07569122314453, "logps/ref_chosen": -45.54913330078125, "logps/ref_rejected": -67.0482177734375, "logps/rejected": -182.53526306152344, "loss": 1.1217, "margin_dpo/margin_mean": 36.96049880981445, "margin_dpo/margin_std": 57.94923400878906, "step": 339 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.010650699026882648, "fcm_dpo/delta": -0.02553240954875946, "fcm_dpo/margin": 39.822174072265625, "fcm_dpo/q_t": 0.4078987240791321, "grad_norm": 12.790955543518066, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.6951748728752136, "logits/rejected": 0.6753948926925659, "logps/chosen": -142.3338623046875, "logps/ref_chosen": -54.00564956665039, "logps/ref_rejected": -61.314430236816406, "logps/rejected": -189.46482849121094, "loss": 1.1379, "margin_dpo/margin_mean": 39.822174072265625, "margin_dpo/margin_std": 70.49546813964844, "step": 340 }, { "epoch": 0.5154950869236583, "fcm_dpo/beta": 0.010352972894906998, "fcm_dpo/delta": -0.06754864007234573, "fcm_dpo/margin": 44.4967041015625, "fcm_dpo/q_t": 0.39656323194503784, "grad_norm": 13.45057201385498, "learning_rate": 2.816481133934373e-07, "logits/chosen": 0.6896207332611084, "logits/rejected": 0.6430627107620239, "logps/chosen": -149.3645477294922, "logps/ref_chosen": -63.39509582519531, "logps/ref_rejected": -76.20973205566406, "logps/rejected": -206.67588806152344, "loss": 1.0901, "margin_dpo/margin_mean": 44.496700286865234, "margin_dpo/margin_std": 63.01578140258789, "step": 341 }, { "epoch": 0.5170068027210885, "fcm_dpo/beta": 0.010273242369294167, "fcm_dpo/delta": -0.08363170921802521, "fcm_dpo/margin": 46.62270736694336, "fcm_dpo/q_t": 0.3937643766403198, "grad_norm": 11.712442398071289, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 0.7326318025588989, "logits/rejected": 0.6941955089569092, "logps/chosen": -137.80276489257812, "logps/ref_chosen": -53.047813415527344, "logps/ref_rejected": -68.2854232788086, "logps/rejected": -199.6630859375, "loss": 1.0632, "margin_dpo/margin_mean": 46.62270736694336, "margin_dpo/margin_std": 63.597843170166016, "step": 342 }, { "epoch": 0.5185185185185185, "fcm_dpo/beta": 0.01034234743565321, "fcm_dpo/delta": 0.10083356499671936, "fcm_dpo/margin": 29.168373107910156, "fcm_dpo/q_t": 0.4309525191783905, "grad_norm": 12.435088157653809, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 0.5851594805717468, "logits/rejected": 0.5792618989944458, "logps/chosen": -151.103515625, "logps/ref_chosen": -70.57852935791016, "logps/ref_rejected": -84.73873901367188, "logps/rejected": -194.43209838867188, "loss": 1.1982, "margin_dpo/margin_mean": 29.168371200561523, "margin_dpo/margin_std": 57.57359313964844, "step": 343 }, { "epoch": 0.5200302343159486, "fcm_dpo/beta": 0.010332523845136166, "fcm_dpo/delta": -0.07479459047317505, "fcm_dpo/margin": 45.5706787109375, "fcm_dpo/q_t": 0.3941543698310852, "grad_norm": 14.585039138793945, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 0.703796923160553, "logits/rejected": 0.6407324075698853, "logps/chosen": -143.69720458984375, "logps/ref_chosen": -55.811004638671875, "logps/ref_rejected": -84.77637481689453, "logps/rejected": -218.23324584960938, "loss": 1.0615, "margin_dpo/margin_mean": 45.5706787109375, "margin_dpo/margin_std": 60.98072052001953, "step": 344 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.010342312976717949, "fcm_dpo/delta": -0.0065320320427417755, "fcm_dpo/margin": 39.27054214477539, "fcm_dpo/q_t": 0.40868985652923584, "grad_norm": 13.813055992126465, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.769192099571228, "logits/rejected": 0.7235137224197388, "logps/chosen": -126.54728698730469, "logps/ref_chosen": -57.78609848022461, "logps/ref_rejected": -78.91847229003906, "logps/rejected": -186.9501953125, "loss": 1.1169, "margin_dpo/margin_mean": 39.27054214477539, "margin_dpo/margin_std": 62.098785400390625, "step": 345 }, { "epoch": 0.5230536659108088, "fcm_dpo/beta": 0.010258370079100132, "fcm_dpo/delta": -0.06688511371612549, "fcm_dpo/margin": 45.19707489013672, "fcm_dpo/q_t": 0.3958582282066345, "grad_norm": 14.238128662109375, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 0.6935669183731079, "logits/rejected": 0.6146125197410583, "logps/chosen": -133.35507202148438, "logps/ref_chosen": -56.285125732421875, "logps/ref_rejected": -91.15303039550781, "logps/rejected": -213.4200439453125, "loss": 1.0827, "margin_dpo/margin_mean": 45.197078704833984, "margin_dpo/margin_std": 64.67538452148438, "step": 346 }, { "epoch": 0.5245653817082389, "fcm_dpo/beta": 0.010258248075842857, "fcm_dpo/delta": 0.05244377255439758, "fcm_dpo/margin": 34.057273864746094, "fcm_dpo/q_t": 0.4229571223258972, "grad_norm": 16.91437339782715, "learning_rate": 2.737640108260456e-07, "logits/chosen": 0.8284379839897156, "logits/rejected": 0.7780033349990845, "logps/chosen": -137.3411865234375, "logps/ref_chosen": -53.499542236328125, "logps/ref_rejected": -72.52565002441406, "logps/rejected": -190.42459106445312, "loss": 1.1617, "margin_dpo/margin_mean": 34.057273864746094, "margin_dpo/margin_std": 62.23371887207031, "step": 347 }, { "epoch": 0.5260770975056689, "fcm_dpo/beta": 0.010174311697483063, "fcm_dpo/delta": -0.05163482576608658, "fcm_dpo/margin": 44.132564544677734, "fcm_dpo/q_t": 0.4025237560272217, "grad_norm": 12.754920959472656, "learning_rate": 2.724474525774229e-07, "logits/chosen": 0.8104952573776245, "logits/rejected": 0.781765341758728, "logps/chosen": -125.65589141845703, "logps/ref_chosen": -50.78684997558594, "logps/ref_rejected": -68.63732147216797, "logps/rejected": -187.638916015625, "loss": 1.1016, "margin_dpo/margin_mean": 44.132564544677734, "margin_dpo/margin_std": 69.71031188964844, "step": 348 }, { "epoch": 0.527588813303099, "fcm_dpo/beta": 0.01015070267021656, "fcm_dpo/delta": -0.04512634873390198, "fcm_dpo/margin": 43.6390266418457, "fcm_dpo/q_t": 0.40171459317207336, "grad_norm": 13.942571640014648, "learning_rate": 2.711302664252973e-07, "logits/chosen": 0.7045985460281372, "logits/rejected": 0.6161255836486816, "logps/chosen": -127.07453155517578, "logps/ref_chosen": -53.325008392333984, "logps/ref_rejected": -83.21236419677734, "logps/rejected": -200.6009063720703, "loss": 1.0896, "margin_dpo/margin_mean": 43.6390266418457, "margin_dpo/margin_std": 63.60553741455078, "step": 349 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.009878698736429214, "fcm_dpo/delta": -0.1288827359676361, "fcm_dpo/margin": 52.83354187011719, "fcm_dpo/q_t": 0.3816789984703064, "grad_norm": 15.5607328414917, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.7101126909255981, "logits/rejected": 0.6280903816223145, "logps/chosen": -141.38832092285156, "logps/ref_chosen": -61.625770568847656, "logps/ref_rejected": -87.63627624511719, "logps/rejected": -220.23236083984375, "loss": 1.0247, "margin_dpo/margin_mean": 52.83354187011719, "margin_dpo/margin_std": 64.74072265625, "step": 350 }, { "epoch": 0.5306122448979592, "fcm_dpo/beta": 0.009750676341354847, "fcm_dpo/delta": -0.030142836272716522, "fcm_dpo/margin": 43.94773864746094, "fcm_dpo/q_t": 0.4014700651168823, "grad_norm": 13.296806335449219, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 0.6415879726409912, "logits/rejected": 0.5668590068817139, "logps/chosen": -131.79360961914062, "logps/ref_chosen": -56.2563362121582, "logps/ref_rejected": -79.11589813232422, "logps/rejected": -198.6009063720703, "loss": 1.1164, "margin_dpo/margin_mean": 43.94773864746094, "margin_dpo/margin_std": 70.56474304199219, "step": 351 }, { "epoch": 0.5321239606953893, "fcm_dpo/beta": 0.009682442992925644, "fcm_dpo/delta": -0.046238820999860764, "fcm_dpo/margin": 45.857688903808594, "fcm_dpo/q_t": 0.3999043107032776, "grad_norm": 12.848146438598633, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 0.7140268087387085, "logits/rejected": 0.661480188369751, "logps/chosen": -137.10739135742188, "logps/ref_chosen": -63.05195236206055, "logps/ref_rejected": -85.52035522460938, "logps/rejected": -205.4334716796875, "loss": 1.0801, "margin_dpo/margin_mean": 45.857688903808594, "margin_dpo/margin_std": 64.35737609863281, "step": 352 }, { "epoch": 0.5336356764928194, "fcm_dpo/beta": 0.009640311822295189, "fcm_dpo/delta": -0.04901779443025589, "fcm_dpo/margin": 46.344337463378906, "fcm_dpo/q_t": 0.39885491132736206, "grad_norm": 11.270014762878418, "learning_rate": 2.658559799141411e-07, "logits/chosen": 0.7019220590591431, "logits/rejected": 0.7082865238189697, "logps/chosen": -140.48910522460938, "logps/ref_chosen": -69.00918579101562, "logps/ref_rejected": -72.65840148925781, "logps/rejected": -190.482666015625, "loss": 1.0834, "margin_dpo/margin_mean": 46.344337463378906, "margin_dpo/margin_std": 65.52245330810547, "step": 353 }, { "epoch": 0.5351473922902494, "fcm_dpo/beta": 0.009430557489395142, "fcm_dpo/delta": -0.07606241106987, "fcm_dpo/margin": 50.012725830078125, "fcm_dpo/q_t": 0.39343374967575073, "grad_norm": 13.003927230834961, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 0.7931294441223145, "logits/rejected": 0.6584126949310303, "logps/chosen": -115.51165008544922, "logps/ref_chosen": -39.78833770751953, "logps/ref_rejected": -69.56885528564453, "logps/rejected": -195.30490112304688, "loss": 1.0744, "margin_dpo/margin_mean": 50.012725830078125, "margin_dpo/margin_std": 70.41667938232422, "step": 354 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.009351427666842937, "fcm_dpo/delta": -0.08544706553220749, "fcm_dpo/margin": 51.47795867919922, "fcm_dpo/q_t": 0.39146727323532104, "grad_norm": 15.754137992858887, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.7644649744033813, "logits/rejected": 0.629076361656189, "logps/chosen": -126.27880859375, "logps/ref_chosen": -46.25537872314453, "logps/ref_rejected": -78.20236206054688, "logps/rejected": -209.70376586914062, "loss": 1.0715, "margin_dpo/margin_mean": 51.477962493896484, "margin_dpo/margin_std": 72.61712646484375, "step": 355 }, { "epoch": 0.5381708238851096, "fcm_dpo/beta": 0.009251654148101807, "fcm_dpo/delta": 0.009645845741033554, "fcm_dpo/margin": 42.194644927978516, "fcm_dpo/q_t": 0.4131093919277191, "grad_norm": 12.248784065246582, "learning_rate": 2.618954789559356e-07, "logits/chosen": 0.7211343050003052, "logits/rejected": 0.6421887874603271, "logps/chosen": -122.72053527832031, "logps/ref_chosen": -47.906158447265625, "logps/ref_rejected": -74.29397583007812, "logps/rejected": -191.30299377441406, "loss": 1.1683, "margin_dpo/margin_mean": 42.19464111328125, "margin_dpo/margin_std": 81.54269409179688, "step": 356 }, { "epoch": 0.5396825396825397, "fcm_dpo/beta": 0.009272318333387375, "fcm_dpo/delta": 0.04974482208490372, "fcm_dpo/margin": 37.79161834716797, "fcm_dpo/q_t": 0.4197618365287781, "grad_norm": 12.6592435836792, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 0.7132511138916016, "logits/rejected": 0.6882957220077515, "logps/chosen": -152.98204040527344, "logps/ref_chosen": -62.63500213623047, "logps/ref_rejected": -65.11399841308594, "logps/rejected": -193.25267028808594, "loss": 1.1598, "margin_dpo/margin_mean": 37.79161834716797, "margin_dpo/margin_std": 63.681026458740234, "step": 357 }, { "epoch": 0.5411942554799698, "fcm_dpo/beta": 0.009308705106377602, "fcm_dpo/delta": -0.0927896648645401, "fcm_dpo/margin": 52.447574615478516, "fcm_dpo/q_t": 0.39033764600753784, "grad_norm": 15.408126831054688, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 0.6472632884979248, "logits/rejected": 0.663436770439148, "logps/chosen": -151.31112670898438, "logps/ref_chosen": -67.20960998535156, "logps/ref_rejected": -69.34715270996094, "logps/rejected": -205.896240234375, "loss": 1.0529, "margin_dpo/margin_mean": 52.44757843017578, "margin_dpo/margin_std": 68.63077545166016, "step": 358 }, { "epoch": 0.5427059712773998, "fcm_dpo/beta": 0.00950109213590622, "fcm_dpo/delta": 0.15490363538265228, "fcm_dpo/margin": 25.983016967773438, "fcm_dpo/q_t": 0.44459402561187744, "grad_norm": 13.879263877868652, "learning_rate": 2.579319833745169e-07, "logits/chosen": 0.6614922285079956, "logits/rejected": 0.6340516805648804, "logps/chosen": -157.9315185546875, "logps/ref_chosen": -62.52578353881836, "logps/ref_rejected": -76.63114929199219, "logps/rejected": -198.0198974609375, "loss": 1.2399, "margin_dpo/margin_mean": 25.983016967773438, "margin_dpo/margin_std": 61.045677185058594, "step": 359 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.009465381503105164, "fcm_dpo/delta": -0.017956897616386414, "fcm_dpo/margin": 44.06662368774414, "fcm_dpo/q_t": 0.40685826539993286, "grad_norm": 12.662845611572266, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.636221170425415, "logits/rejected": 0.5421815514564514, "logps/chosen": -157.51663208007812, "logps/ref_chosen": -63.48772048950195, "logps/ref_rejected": -90.6891098022461, "logps/rejected": -228.78463745117188, "loss": 1.1035, "margin_dpo/margin_mean": 44.06662368774414, "margin_dpo/margin_std": 66.23414611816406, "step": 360 }, { "epoch": 0.54572940287226, "fcm_dpo/beta": 0.009376795962452888, "fcm_dpo/delta": -0.09013990312814713, "fcm_dpo/margin": 51.81315994262695, "fcm_dpo/q_t": 0.3891136050224304, "grad_norm": 12.409852027893066, "learning_rate": 2.552884820191154e-07, "logits/chosen": 0.7601670026779175, "logits/rejected": 0.7106729745864868, "logps/chosen": -150.19419860839844, "logps/ref_chosen": -57.917144775390625, "logps/ref_rejected": -72.39089965820312, "logps/rejected": -216.48110961914062, "loss": 1.0435, "margin_dpo/margin_mean": 51.81315994262695, "margin_dpo/margin_std": 64.16586303710938, "step": 361 }, { "epoch": 0.54724111866969, "fcm_dpo/beta": 0.00923209823668003, "fcm_dpo/delta": -0.08657985925674438, "fcm_dpo/margin": 52.24518585205078, "fcm_dpo/q_t": 0.3927406668663025, "grad_norm": 14.153077125549316, "learning_rate": 2.53966490958702e-07, "logits/chosen": 0.7800583839416504, "logits/rejected": 0.6689051389694214, "logps/chosen": -157.0105438232422, "logps/ref_chosen": -63.4434700012207, "logps/ref_rejected": -103.45516967773438, "logps/rejected": -249.26744079589844, "loss": 1.0766, "margin_dpo/margin_mean": 52.24518585205078, "margin_dpo/margin_std": 76.14582061767578, "step": 362 }, { "epoch": 0.5487528344671202, "fcm_dpo/beta": 0.00903936568647623, "fcm_dpo/delta": -0.06485149264335632, "fcm_dpo/margin": 51.07155990600586, "fcm_dpo/q_t": 0.3948080539703369, "grad_norm": 15.533196449279785, "learning_rate": 2.526443889470099e-07, "logits/chosen": 0.7673148512840271, "logits/rejected": 0.6327505707740784, "logps/chosen": -147.92532348632812, "logps/ref_chosen": -48.65182876586914, "logps/ref_rejected": -88.65904235839844, "logps/rejected": -239.00411987304688, "loss": 1.0656, "margin_dpo/margin_mean": 51.071563720703125, "margin_dpo/margin_std": 68.49053192138672, "step": 363 }, { "epoch": 0.5502645502645502, "fcm_dpo/beta": 0.008878370746970177, "fcm_dpo/delta": -0.1015341728925705, "fcm_dpo/margin": 55.899478912353516, "fcm_dpo/q_t": 0.3897857964038849, "grad_norm": 11.952712059020996, "learning_rate": 2.513222129660744e-07, "logits/chosen": 0.6561600565910339, "logits/rejected": 0.5624690055847168, "logps/chosen": -147.87716674804688, "logps/ref_chosen": -57.87107467651367, "logps/ref_rejected": -80.95503234863281, "logps/rejected": -226.860595703125, "loss": 1.0774, "margin_dpo/margin_mean": 55.899478912353516, "margin_dpo/margin_std": 83.72024536132812, "step": 364 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.008753440342843533, "fcm_dpo/delta": -0.06477545946836472, "fcm_dpo/margin": 52.74662780761719, "fcm_dpo/q_t": 0.39343178272247314, "grad_norm": 11.89730453491211, "learning_rate": 2.5e-07, "logits/chosen": 0.7345987558364868, "logits/rejected": 0.7310192584991455, "logps/chosen": -145.48480224609375, "logps/ref_chosen": -64.94217681884766, "logps/ref_rejected": -74.8599853515625, "logps/rejected": -208.14923095703125, "loss": 1.0438, "margin_dpo/margin_mean": 52.74662780761719, "margin_dpo/margin_std": 60.88469314575195, "step": 365 }, { "epoch": 0.5532879818594104, "fcm_dpo/beta": 0.008768351748585701, "fcm_dpo/delta": 0.0030683092772960663, "fcm_dpo/margin": 45.25122833251953, "fcm_dpo/q_t": 0.41162019968032837, "grad_norm": 13.978940963745117, "learning_rate": 2.486777870339255e-07, "logits/chosen": 0.6601021885871887, "logits/rejected": 0.6458828449249268, "logps/chosen": -136.3466796875, "logps/ref_chosen": -55.16598129272461, "logps/ref_rejected": -65.26121520996094, "logps/rejected": -191.69314575195312, "loss": 1.1443, "margin_dpo/margin_mean": 45.25122833251953, "margin_dpo/margin_std": 79.96685791015625, "step": 366 }, { "epoch": 0.5547996976568406, "fcm_dpo/beta": 0.008722890168428421, "fcm_dpo/delta": 0.005503779277205467, "fcm_dpo/margin": 45.237735748291016, "fcm_dpo/q_t": 0.40820807218551636, "grad_norm": 12.16567325592041, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 0.7035742998123169, "logits/rejected": 0.5987119078636169, "logps/chosen": -147.3154754638672, "logps/ref_chosen": -56.01046371459961, "logps/ref_rejected": -77.31010437011719, "logps/rejected": -213.8528594970703, "loss": 1.118, "margin_dpo/margin_mean": 45.23773956298828, "margin_dpo/margin_std": 69.86012268066406, "step": 367 }, { "epoch": 0.5563114134542706, "fcm_dpo/beta": 0.008784948848187923, "fcm_dpo/delta": 0.027075402438640594, "fcm_dpo/margin": 42.564666748046875, "fcm_dpo/q_t": 0.4141819477081299, "grad_norm": 12.818552017211914, "learning_rate": 2.46033509041298e-07, "logits/chosen": 0.5555020570755005, "logits/rejected": 0.5551966428756714, "logps/chosen": -175.42108154296875, "logps/ref_chosen": -74.82927703857422, "logps/ref_rejected": -76.11680603027344, "logps/rejected": -219.27328491210938, "loss": 1.1343, "margin_dpo/margin_mean": 42.564666748046875, "margin_dpo/margin_std": 69.22799682617188, "step": 368 }, { "epoch": 0.5578231292517006, "fcm_dpo/beta": 0.008928908035159111, "fcm_dpo/delta": 0.07122094929218292, "fcm_dpo/margin": 37.005409240722656, "fcm_dpo/q_t": 0.42534273862838745, "grad_norm": 12.85647964477539, "learning_rate": 2.447115179808846e-07, "logits/chosen": 0.6955777406692505, "logits/rejected": 0.6436460614204407, "logps/chosen": -153.3529052734375, "logps/ref_chosen": -58.32621765136719, "logps/ref_rejected": -80.92183685302734, "logps/rejected": -212.9539337158203, "loss": 1.1719, "margin_dpo/margin_mean": 37.005409240722656, "margin_dpo/margin_std": 68.3039779663086, "step": 369 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.00882653333246708, "fcm_dpo/delta": -0.06157629191875458, "fcm_dpo/margin": 51.95973205566406, "fcm_dpo/q_t": 0.39605778455734253, "grad_norm": 13.36386775970459, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.7886132597923279, "logits/rejected": 0.7210862636566162, "logps/chosen": -143.81800842285156, "logps/ref_chosen": -52.88372039794922, "logps/ref_rejected": -79.43692016601562, "logps/rejected": -222.3309326171875, "loss": 1.0883, "margin_dpo/margin_mean": 51.95973205566406, "margin_dpo/margin_std": 77.87043762207031, "step": 370 }, { "epoch": 0.5608465608465608, "fcm_dpo/beta": 0.00884594488888979, "fcm_dpo/delta": -0.012843847274780273, "fcm_dpo/margin": 46.544769287109375, "fcm_dpo/q_t": 0.40730273723602295, "grad_norm": 14.952667236328125, "learning_rate": 2.420680166254831e-07, "logits/chosen": 0.7922030687332153, "logits/rejected": 0.7605820894241333, "logps/chosen": -139.9717254638672, "logps/ref_chosen": -49.224212646484375, "logps/ref_rejected": -63.348472595214844, "logps/rejected": -200.6407470703125, "loss": 1.1097, "margin_dpo/margin_mean": 46.54476547241211, "margin_dpo/margin_std": 70.16578674316406, "step": 371 }, { "epoch": 0.562358276643991, "fcm_dpo/beta": 0.009037522599101067, "fcm_dpo/delta": 0.14259661734104156, "fcm_dpo/margin": 28.694950103759766, "fcm_dpo/q_t": 0.44280239939689636, "grad_norm": 16.078189849853516, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 0.7318013310432434, "logits/rejected": 0.6354872584342957, "logps/chosen": -149.69754028320312, "logps/ref_chosen": -52.269554138183594, "logps/ref_rejected": -72.99522399902344, "logps/rejected": -199.1181640625, "loss": 1.2793, "margin_dpo/margin_mean": 28.69495391845703, "margin_dpo/margin_std": 82.97291564941406, "step": 372 }, { "epoch": 0.563869992441421, "fcm_dpo/beta": 0.009121359325945377, "fcm_dpo/delta": 0.048252545297145844, "fcm_dpo/margin": 38.738624572753906, "fcm_dpo/q_t": 0.4220026731491089, "grad_norm": 14.351899147033691, "learning_rate": 2.394254027623792e-07, "logits/chosen": 0.7239790558815002, "logits/rejected": 0.653123140335083, "logps/chosen": -167.8028564453125, "logps/ref_chosen": -61.112998962402344, "logps/ref_rejected": -76.24851989746094, "logps/rejected": -221.677001953125, "loss": 1.2036, "margin_dpo/margin_mean": 38.738624572753906, "margin_dpo/margin_std": 84.81132507324219, "step": 373 }, { "epoch": 0.5653817082388511, "fcm_dpo/beta": 0.008857542648911476, "fcm_dpo/delta": -0.1921839565038681, "fcm_dpo/margin": 65.550048828125, "fcm_dpo/q_t": 0.3672952651977539, "grad_norm": 14.499312400817871, "learning_rate": 2.381045210440644e-07, "logits/chosen": 0.6369169354438782, "logits/rejected": 0.6387360095977783, "logps/chosen": -161.60235595703125, "logps/ref_chosen": -72.66920471191406, "logps/ref_rejected": -76.83158874511719, "logps/rejected": -231.31480407714844, "loss": 0.9808, "margin_dpo/margin_mean": 65.550048828125, "margin_dpo/margin_std": 72.03311920166016, "step": 374 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.00878390483558178, "fcm_dpo/delta": 0.018049051985144615, "fcm_dpo/margin": 43.547298431396484, "fcm_dpo/q_t": 0.4139014482498169, "grad_norm": 14.884501457214355, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.748401403427124, "logits/rejected": 0.6893227696418762, "logps/chosen": -145.9182891845703, "logps/ref_chosen": -57.68330383300781, "logps/ref_rejected": -79.34097290039062, "logps/rejected": -211.12326049804688, "loss": 1.1448, "margin_dpo/margin_mean": 43.54730224609375, "margin_dpo/margin_std": 75.84925842285156, "step": 375 }, { "epoch": 0.5684051398337112, "fcm_dpo/beta": 0.008706707507371902, "fcm_dpo/delta": -0.07801564037799835, "fcm_dpo/margin": 54.46429443359375, "fcm_dpo/q_t": 0.39430510997772217, "grad_norm": 14.08863639831543, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 0.7859851717948914, "logits/rejected": 0.7144784927368164, "logps/chosen": -147.56790161132812, "logps/ref_chosen": -51.674072265625, "logps/ref_rejected": -75.69713592529297, "logps/rejected": -226.0552520751953, "loss": 1.0795, "margin_dpo/margin_mean": 54.46429443359375, "margin_dpo/margin_std": 80.19218444824219, "step": 376 }, { "epoch": 0.5699168556311414, "fcm_dpo/beta": 0.008786122314631939, "fcm_dpo/delta": 0.049597274512052536, "fcm_dpo/margin": 40.009239196777344, "fcm_dpo/q_t": 0.420106440782547, "grad_norm": 14.204717636108398, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 0.7056032419204712, "logits/rejected": 0.6848410367965698, "logps/chosen": -146.47552490234375, "logps/ref_chosen": -46.17853546142578, "logps/ref_rejected": -57.756500244140625, "logps/rejected": -198.062744140625, "loss": 1.1646, "margin_dpo/margin_mean": 40.009239196777344, "margin_dpo/margin_std": 73.26371002197266, "step": 377 }, { "epoch": 0.5714285714285714, "fcm_dpo/beta": 0.008892524987459183, "fcm_dpo/delta": 0.06320677697658539, "fcm_dpo/margin": 38.03527069091797, "fcm_dpo/q_t": 0.423417866230011, "grad_norm": 13.075918197631836, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 0.7477153539657593, "logits/rejected": 0.6995026469230652, "logps/chosen": -155.286376953125, "logps/ref_chosen": -59.21887969970703, "logps/ref_rejected": -71.24818420410156, "logps/rejected": -205.3509521484375, "loss": 1.1712, "margin_dpo/margin_mean": 38.03527069091797, "margin_dpo/margin_std": 70.07217407226562, "step": 378 }, { "epoch": 0.5729402872260015, "fcm_dpo/beta": 0.008829142898321152, "fcm_dpo/delta": -0.03952915593981743, "fcm_dpo/margin": 49.586158752441406, "fcm_dpo/q_t": 0.402193546295166, "grad_norm": 14.709274291992188, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 0.740286111831665, "logits/rejected": 0.6656442880630493, "logps/chosen": -167.2689666748047, "logps/ref_chosen": -76.31658935546875, "logps/ref_rejected": -104.26200103759766, "logps/rejected": -244.800537109375, "loss": 1.0925, "margin_dpo/margin_mean": 49.586158752441406, "margin_dpo/margin_std": 73.70040893554688, "step": 379 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.008623160421848297, "fcm_dpo/delta": -0.1510622799396515, "fcm_dpo/margin": 62.967567443847656, "fcm_dpo/q_t": 0.3760165572166443, "grad_norm": 13.50650691986084, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.7178818583488464, "logits/rejected": 0.6813254952430725, "logps/chosen": -142.29251098632812, "logps/ref_chosen": -61.283164978027344, "logps/ref_rejected": -72.38892364501953, "logps/rejected": -216.3658447265625, "loss": 1.0215, "margin_dpo/margin_mean": 62.967567443847656, "margin_dpo/margin_std": 78.15447998046875, "step": 380 }, { "epoch": 0.5759637188208617, "fcm_dpo/beta": 0.008834085427224636, "fcm_dpo/delta": 0.18309441208839417, "fcm_dpo/margin": 24.82586669921875, "fcm_dpo/q_t": 0.4514288604259491, "grad_norm": 12.987579345703125, "learning_rate": 2.288697335747027e-07, "logits/chosen": 0.6932476162910461, "logits/rejected": 0.6694196462631226, "logps/chosen": -157.75567626953125, "logps/ref_chosen": -58.2139892578125, "logps/ref_rejected": -60.78669357299805, "logps/rejected": -185.15423583984375, "loss": 1.2867, "margin_dpo/margin_mean": 24.82586669921875, "margin_dpo/margin_std": 75.64501953125, "step": 381 }, { "epoch": 0.5774754346182918, "fcm_dpo/beta": 0.008934552781283855, "fcm_dpo/delta": 0.020180724561214447, "fcm_dpo/margin": 42.540550231933594, "fcm_dpo/q_t": 0.4133971333503723, "grad_norm": 13.391695976257324, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 0.7400511503219604, "logits/rejected": 0.6866692304611206, "logps/chosen": -162.21678161621094, "logps/ref_chosen": -61.82532501220703, "logps/ref_rejected": -83.0452880859375, "logps/rejected": -225.977294921875, "loss": 1.1196, "margin_dpo/margin_mean": 42.540550231933594, "margin_dpo/margin_std": 63.50366973876953, "step": 382 }, { "epoch": 0.5789871504157218, "fcm_dpo/beta": 0.008840801194310188, "fcm_dpo/delta": -0.014270953834056854, "fcm_dpo/margin": 46.73270797729492, "fcm_dpo/q_t": 0.40885794162750244, "grad_norm": 14.537291526794434, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 0.6014754176139832, "logits/rejected": 0.6322965621948242, "logps/chosen": -177.6047821044922, "logps/ref_chosen": -80.56326293945312, "logps/ref_rejected": -74.62922668457031, "logps/rejected": -218.40345764160156, "loss": 1.1355, "margin_dpo/margin_mean": 46.73271179199219, "margin_dpo/margin_std": 80.7920150756836, "step": 383 }, { "epoch": 0.5804988662131519, "fcm_dpo/beta": 0.008877087384462357, "fcm_dpo/delta": 0.0022729591000825167, "fcm_dpo/margin": 44.81377029418945, "fcm_dpo/q_t": 0.40858256816864014, "grad_norm": 14.741730690002441, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 0.7499580979347229, "logits/rejected": 0.7032359838485718, "logps/chosen": -162.7613525390625, "logps/ref_chosen": -65.47514343261719, "logps/ref_rejected": -79.67378234863281, "logps/rejected": -221.77377319335938, "loss": 1.116, "margin_dpo/margin_mean": 44.81377029418945, "margin_dpo/margin_std": 69.56358337402344, "step": 384 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.008751116693019867, "fcm_dpo/delta": -0.12058362364768982, "fcm_dpo/margin": 58.79779052734375, "fcm_dpo/q_t": 0.3835332989692688, "grad_norm": 12.566021919250488, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.6230024695396423, "logits/rejected": 0.5862922072410583, "logps/chosen": -160.81768798828125, "logps/ref_chosen": -66.0565185546875, "logps/ref_rejected": -86.68023681640625, "logps/rejected": -240.23919677734375, "loss": 1.0237, "margin_dpo/margin_mean": 58.797786712646484, "margin_dpo/margin_std": 69.73304748535156, "step": 385 }, { "epoch": 0.5835222978080121, "fcm_dpo/beta": 0.008755723014473915, "fcm_dpo/delta": 0.07520150393247604, "fcm_dpo/margin": 37.383819580078125, "fcm_dpo/q_t": 0.42415207624435425, "grad_norm": 13.969977378845215, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 0.637274980545044, "logits/rejected": 0.5775246024131775, "logps/chosen": -184.84173583984375, "logps/ref_chosen": -75.6236572265625, "logps/ref_rejected": -92.62330627441406, "logps/rejected": -239.22520446777344, "loss": 1.1701, "margin_dpo/margin_mean": 37.383819580078125, "margin_dpo/margin_std": 68.13032531738281, "step": 386 }, { "epoch": 0.5850340136054422, "fcm_dpo/beta": 0.00868870597332716, "fcm_dpo/delta": -0.08571085333824158, "fcm_dpo/margin": 55.425376892089844, "fcm_dpo/q_t": 0.3905186653137207, "grad_norm": 12.999944686889648, "learning_rate": 2.209767714686924e-07, "logits/chosen": 0.7348219156265259, "logits/rejected": 0.628671407699585, "logps/chosen": -144.72238159179688, "logps/ref_chosen": -47.22170639038086, "logps/ref_rejected": -87.338134765625, "logps/rejected": -240.26419067382812, "loss": 1.0382, "margin_dpo/margin_mean": 55.425376892089844, "margin_dpo/margin_std": 66.4821548461914, "step": 387 }, { "epoch": 0.5865457294028723, "fcm_dpo/beta": 0.008731149137020111, "fcm_dpo/delta": 0.09577102214097977, "fcm_dpo/margin": 35.175655364990234, "fcm_dpo/q_t": 0.43218091130256653, "grad_norm": 12.877540588378906, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 0.7196862697601318, "logits/rejected": 0.711571455001831, "logps/chosen": -172.91677856445312, "logps/ref_chosen": -74.5794677734375, "logps/ref_rejected": -79.92558288574219, "logps/rejected": -213.4385528564453, "loss": 1.2121, "margin_dpo/margin_mean": 35.175655364990234, "margin_dpo/margin_std": 77.24336242675781, "step": 388 }, { "epoch": 0.5880574452003023, "fcm_dpo/beta": 0.008700037375092506, "fcm_dpo/delta": -0.09904014319181442, "fcm_dpo/margin": 56.8076057434082, "fcm_dpo/q_t": 0.38827866315841675, "grad_norm": 28.18358612060547, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 0.7416616678237915, "logits/rejected": 0.7048197984695435, "logps/chosen": -156.3480682373047, "logps/ref_chosen": -61.624366760253906, "logps/ref_rejected": -76.50978088378906, "logps/rejected": -228.0410919189453, "loss": 1.0495, "margin_dpo/margin_mean": 56.80760955810547, "margin_dpo/margin_std": 74.74456787109375, "step": 389 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.008662872016429901, "fcm_dpo/delta": 0.023952744901180267, "fcm_dpo/margin": 43.51176071166992, "fcm_dpo/q_t": 0.41386693716049194, "grad_norm": 10.690574645996094, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.8148065805435181, "logits/rejected": 0.7424544095993042, "logps/chosen": -133.6822509765625, "logps/ref_chosen": -45.871864318847656, "logps/ref_rejected": -61.305999755859375, "logps/rejected": -192.62814331054688, "loss": 1.1241, "margin_dpo/margin_mean": 43.51176452636719, "margin_dpo/margin_std": 67.04666137695312, "step": 390 }, { "epoch": 0.5910808767951625, "fcm_dpo/beta": 0.008582616224884987, "fcm_dpo/delta": -0.05733926221728325, "fcm_dpo/margin": 52.93098068237305, "fcm_dpo/q_t": 0.3971294164657593, "grad_norm": 13.169042587280273, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 0.7265839576721191, "logits/rejected": 0.6576735377311707, "logps/chosen": -153.22894287109375, "logps/ref_chosen": -58.18701171875, "logps/ref_rejected": -83.63442993164062, "logps/rejected": -231.60736083984375, "loss": 1.097, "margin_dpo/margin_mean": 52.93098449707031, "margin_dpo/margin_std": 81.30315399169922, "step": 391 }, { "epoch": 0.5925925925925926, "fcm_dpo/beta": 0.00850940402597189, "fcm_dpo/delta": -0.1043597012758255, "fcm_dpo/margin": 58.642520904541016, "fcm_dpo/q_t": 0.3879520297050476, "grad_norm": 11.178428649902344, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 0.7724939584732056, "logits/rejected": 0.7124058604240417, "logps/chosen": -151.29806518554688, "logps/ref_chosen": -69.7445297241211, "logps/ref_rejected": -94.05877685546875, "logps/rejected": -234.25485229492188, "loss": 1.0558, "margin_dpo/margin_mean": 58.64252471923828, "margin_dpo/margin_std": 78.97216796875, "step": 392 }, { "epoch": 0.5941043083900227, "fcm_dpo/beta": 0.00827580876648426, "fcm_dpo/delta": -0.08926917612552643, "fcm_dpo/margin": 58.547996520996094, "fcm_dpo/q_t": 0.3895009756088257, "grad_norm": 11.780495643615723, "learning_rate": 2.131130332936195e-07, "logits/chosen": 0.7572908997535706, "logits/rejected": 0.7185690402984619, "logps/chosen": -148.23239135742188, "logps/ref_chosen": -52.33489990234375, "logps/ref_rejected": -74.33809661865234, "logps/rejected": -228.78358459472656, "loss": 1.0396, "margin_dpo/margin_mean": 58.54798889160156, "margin_dpo/margin_std": 70.91494750976562, "step": 393 }, { "epoch": 0.5956160241874527, "fcm_dpo/beta": 0.008259693160653114, "fcm_dpo/delta": -0.021054361015558243, "fcm_dpo/margin": 50.85765838623047, "fcm_dpo/q_t": 0.40173596143722534, "grad_norm": 12.078781127929688, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 0.771820604801178, "logits/rejected": 0.7414647936820984, "logps/chosen": -151.42906188964844, "logps/ref_chosen": -60.6761360168457, "logps/ref_rejected": -71.36074829101562, "logps/rejected": -212.97134399414062, "loss": 1.0744, "margin_dpo/margin_mean": 50.85765838623047, "margin_dpo/margin_std": 62.24065399169922, "step": 394 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.00834040530025959, "fcm_dpo/delta": 0.06285583227872849, "fcm_dpo/margin": 40.60387420654297, "fcm_dpo/q_t": 0.42380064725875854, "grad_norm": 15.293757438659668, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.7607164978981018, "logits/rejected": 0.6606634259223938, "logps/chosen": -147.010986328125, "logps/ref_chosen": -50.60432434082031, "logps/ref_rejected": -77.08731079101562, "logps/rejected": -214.0978546142578, "loss": 1.1548, "margin_dpo/margin_mean": 40.60387420654297, "margin_dpo/margin_std": 68.5374755859375, "step": 395 }, { "epoch": 0.5986394557823129, "fcm_dpo/beta": 0.00826001912355423, "fcm_dpo/delta": -0.008021347224712372, "fcm_dpo/margin": 49.21974182128906, "fcm_dpo/q_t": 0.40675118565559387, "grad_norm": 11.849122047424316, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 0.7603906989097595, "logits/rejected": 0.6580515503883362, "logps/chosen": -141.91624450683594, "logps/ref_chosen": -51.35961151123047, "logps/ref_rejected": -79.89360046386719, "logps/rejected": -219.66998291015625, "loss": 1.0896, "margin_dpo/margin_mean": 49.219749450683594, "margin_dpo/margin_std": 63.23406219482422, "step": 396 }, { "epoch": 0.600151171579743, "fcm_dpo/beta": 0.008545951917767525, "fcm_dpo/delta": 0.1694183051586151, "fcm_dpo/margin": 27.406164169311523, "fcm_dpo/q_t": 0.4465975761413574, "grad_norm": 13.423833847045898, "learning_rate": 2.078906883274924e-07, "logits/chosen": 0.6522234082221985, "logits/rejected": 0.6044944524765015, "logps/chosen": -170.49685668945312, "logps/ref_chosen": -66.45622253417969, "logps/ref_rejected": -85.74736785888672, "logps/rejected": -217.1941680908203, "loss": 1.281, "margin_dpo/margin_mean": 27.40616226196289, "margin_dpo/margin_std": 81.10401916503906, "step": 397 }, { "epoch": 0.6016628873771731, "fcm_dpo/beta": 0.008423902094364166, "fcm_dpo/delta": -0.12061990797519684, "fcm_dpo/margin": 60.99681854248047, "fcm_dpo/q_t": 0.38334938883781433, "grad_norm": 11.325228691101074, "learning_rate": 2.065879555832674e-07, "logits/chosen": 0.7046458721160889, "logits/rejected": 0.6370331048965454, "logps/chosen": -138.74560546875, "logps/ref_chosen": -49.244239807128906, "logps/ref_rejected": -75.18949127197266, "logps/rejected": -225.68765258789062, "loss": 1.0144, "margin_dpo/margin_mean": 60.99681854248047, "margin_dpo/margin_std": 69.0614013671875, "step": 398 }, { "epoch": 0.6031746031746031, "fcm_dpo/beta": 0.008185310289263725, "fcm_dpo/delta": -0.15051433444023132, "fcm_dpo/margin": 66.17098999023438, "fcm_dpo/q_t": 0.37749582529067993, "grad_norm": 13.418987274169922, "learning_rate": 2.052864371672457e-07, "logits/chosen": 0.6756268739700317, "logits/rejected": 0.5277635455131531, "logps/chosen": -172.79800415039062, "logps/ref_chosen": -68.30679321289062, "logps/ref_rejected": -113.2708511352539, "logps/rejected": -283.93304443359375, "loss": 1.006, "margin_dpo/margin_mean": 66.17098999023438, "margin_dpo/margin_std": 76.67375183105469, "step": 399 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.008176662027835846, "fcm_dpo/delta": 0.07041217386722565, "fcm_dpo/margin": 40.524681091308594, "fcm_dpo/q_t": 0.4246814250946045, "grad_norm": 18.240394592285156, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.7267959117889404, "logits/rejected": 0.6661494970321655, "logps/chosen": -187.3385467529297, "logps/ref_chosen": -71.62649536132812, "logps/ref_rejected": -90.98765563964844, "logps/rejected": -247.22439575195312, "loss": 1.163, "margin_dpo/margin_mean": 40.524681091308594, "margin_dpo/margin_std": 68.88404846191406, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.008266105316579342, "eval_logits/chosen": 0.7402104139328003, "eval_logits/rejected": 0.6880174279212952, "eval_logps/chosen": -175.3135528564453, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -225.52288818359375, "eval_loss": 0.5694928169250488, "eval_margin_dpo/margin_mean": 45.51979446411133, "eval_margin_dpo/margin_std": 75.34153747558594, "eval_runtime": 37.9897, "eval_samples_per_second": 60.622, "eval_steps_per_second": 1.895, "step": 400 }, { "epoch": 0.6061980347694633, "fcm_dpo/beta": 0.008165856823325157, "fcm_dpo/delta": -0.06495825201272964, "fcm_dpo/margin": 56.537445068359375, "fcm_dpo/q_t": 0.3948812484741211, "grad_norm": 10.236934661865234, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 0.7750402092933655, "logits/rejected": 0.6722196340560913, "logps/chosen": -142.9902801513672, "logps/ref_chosen": -53.72495651245117, "logps/ref_rejected": -75.06304931640625, "logps/rejected": -220.86581420898438, "loss": 1.0501, "margin_dpo/margin_mean": 56.537445068359375, "margin_dpo/margin_std": 68.74324035644531, "step": 401 }, { "epoch": 0.6077097505668935, "fcm_dpo/beta": 0.00811966322362423, "fcm_dpo/delta": 0.01042521744966507, "fcm_dpo/margin": 47.93630599975586, "fcm_dpo/q_t": 0.41185569763183594, "grad_norm": 13.079541206359863, "learning_rate": 2.013895317751323e-07, "logits/chosen": 0.7229193449020386, "logits/rejected": 0.6913242340087891, "logps/chosen": -158.04225158691406, "logps/ref_chosen": -61.873931884765625, "logps/ref_rejected": -66.15198516845703, "logps/rejected": -210.25660705566406, "loss": 1.1303, "margin_dpo/margin_mean": 47.936309814453125, "margin_dpo/margin_std": 76.8143310546875, "step": 402 }, { "epoch": 0.6092214663643235, "fcm_dpo/beta": 0.008106638677418232, "fcm_dpo/delta": -0.06947094202041626, "fcm_dpo/margin": 57.51924133300781, "fcm_dpo/q_t": 0.39517760276794434, "grad_norm": 11.307629585266113, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 0.8434353470802307, "logits/rejected": 0.7568535804748535, "logps/chosen": -155.8819580078125, "logps/ref_chosen": -51.321502685546875, "logps/ref_rejected": -86.54010772705078, "logps/rejected": -248.61981201171875, "loss": 1.0715, "margin_dpo/margin_mean": 57.51924133300781, "margin_dpo/margin_std": 79.66917419433594, "step": 403 }, { "epoch": 0.6107331821617535, "fcm_dpo/beta": 0.00791841372847557, "fcm_dpo/delta": -0.07457688450813293, "fcm_dpo/margin": 59.30389404296875, "fcm_dpo/q_t": 0.39402520656585693, "grad_norm": 14.147499084472656, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 0.6907485723495483, "logits/rejected": 0.5947024822235107, "logps/chosen": -158.80311584472656, "logps/ref_chosen": -62.26288604736328, "logps/ref_rejected": -95.19029998779297, "logps/rejected": -251.034423828125, "loss": 1.0664, "margin_dpo/margin_mean": 59.30389404296875, "margin_dpo/margin_std": 78.74125671386719, "step": 404 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.00792413204908371, "fcm_dpo/delta": 0.03028678148984909, "fcm_dpo/margin": 46.69974136352539, "fcm_dpo/q_t": 0.4163452088832855, "grad_norm": 11.294724464416504, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.7476557493209839, "logits/rejected": 0.7014354467391968, "logps/chosen": -151.2752685546875, "logps/ref_chosen": -50.5843391418457, "logps/ref_rejected": -65.43156433105469, "logps/rejected": -212.82223510742188, "loss": 1.13, "margin_dpo/margin_mean": 46.69974136352539, "margin_dpo/margin_std": 70.747314453125, "step": 405 }, { "epoch": 0.6137566137566137, "fcm_dpo/beta": 0.007905669510364532, "fcm_dpo/delta": -0.07834555953741074, "fcm_dpo/margin": 60.04176712036133, "fcm_dpo/q_t": 0.3914737105369568, "grad_norm": 14.173489570617676, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 0.7967276573181152, "logits/rejected": 0.628233015537262, "logps/chosen": -147.6551513671875, "logps/ref_chosen": -48.99560546875, "logps/ref_rejected": -92.47774505615234, "logps/rejected": -251.17904663085938, "loss": 1.0637, "margin_dpo/margin_mean": 60.04176330566406, "margin_dpo/margin_std": 80.88154602050781, "step": 406 }, { "epoch": 0.6152683295540439, "fcm_dpo/beta": 0.007924167439341545, "fcm_dpo/delta": 0.02935061603784561, "fcm_dpo/margin": 46.884368896484375, "fcm_dpo/q_t": 0.41602736711502075, "grad_norm": 14.014954566955566, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 0.6189517974853516, "logits/rejected": 0.5417089462280273, "logps/chosen": -208.76776123046875, "logps/ref_chosen": -89.40056610107422, "logps/ref_rejected": -99.28775024414062, "logps/rejected": -265.539306640625, "loss": 1.14, "margin_dpo/margin_mean": 46.884368896484375, "margin_dpo/margin_std": 78.12733459472656, "step": 407 }, { "epoch": 0.6167800453514739, "fcm_dpo/beta": 0.007776426617056131, "fcm_dpo/delta": -0.16104529798030853, "fcm_dpo/margin": 71.02813720703125, "fcm_dpo/q_t": 0.37310662865638733, "grad_norm": 10.806535720825195, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 0.7250916957855225, "logits/rejected": 0.6428213119506836, "logps/chosen": -145.7263641357422, "logps/ref_chosen": -54.70391845703125, "logps/ref_rejected": -73.98648834228516, "logps/rejected": -236.03707885742188, "loss": 0.9905, "margin_dpo/margin_mean": 71.02813720703125, "margin_dpo/margin_std": 74.32557678222656, "step": 408 }, { "epoch": 0.618291761148904, "fcm_dpo/beta": 0.007799787446856499, "fcm_dpo/delta": 0.12649981677532196, "fcm_dpo/margin": 35.53091049194336, "fcm_dpo/q_t": 0.4371756315231323, "grad_norm": 13.238290786743164, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 0.734856367111206, "logits/rejected": 0.7435188293457031, "logps/chosen": -175.16085815429688, "logps/ref_chosen": -62.11822509765625, "logps/ref_rejected": -61.933509826660156, "logps/rejected": -210.50704956054688, "loss": 1.2054, "margin_dpo/margin_mean": 35.53091049194336, "margin_dpo/margin_std": 72.68028259277344, "step": 409 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.007764261215925217, "fcm_dpo/delta": -0.044183533638715744, "fcm_dpo/margin": 56.84495544433594, "fcm_dpo/q_t": 0.39720937609672546, "grad_norm": 11.548017501831055, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.7385885119438171, "logits/rejected": 0.7137470245361328, "logps/chosen": -167.29537963867188, "logps/ref_chosen": -61.80266189575195, "logps/ref_rejected": -76.60002136230469, "logps/rejected": -238.93768310546875, "loss": 1.0508, "margin_dpo/margin_mean": 56.84495544433594, "margin_dpo/margin_std": 62.45063781738281, "step": 410 }, { "epoch": 0.6213151927437641, "fcm_dpo/beta": 0.007759403437376022, "fcm_dpo/delta": -0.033648937940597534, "fcm_dpo/margin": 55.70112609863281, "fcm_dpo/q_t": 0.401795893907547, "grad_norm": 10.255197525024414, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 0.7373828887939453, "logits/rejected": 0.6756826639175415, "logps/chosen": -165.4742431640625, "logps/ref_chosen": -54.44539260864258, "logps/ref_rejected": -74.5650863647461, "logps/rejected": -241.2950439453125, "loss": 1.0894, "margin_dpo/margin_mean": 55.70112609863281, "margin_dpo/margin_std": 79.74048614501953, "step": 411 }, { "epoch": 0.6228269085411943, "fcm_dpo/beta": 0.007705829571932554, "fcm_dpo/delta": -0.003621477633714676, "fcm_dpo/margin": 52.322906494140625, "fcm_dpo/q_t": 0.4078013300895691, "grad_norm": 12.566106796264648, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 0.7539393901824951, "logits/rejected": 0.6827704906463623, "logps/chosen": -171.85638427734375, "logps/ref_chosen": -55.248085021972656, "logps/ref_rejected": -68.96623229980469, "logps/rejected": -237.89743041992188, "loss": 1.0955, "margin_dpo/margin_mean": 52.322906494140625, "margin_dpo/margin_std": 71.428466796875, "step": 412 }, { "epoch": 0.6243386243386243, "fcm_dpo/beta": 0.007800276391208172, "fcm_dpo/delta": 0.05510157719254494, "fcm_dpo/margin": 44.45771789550781, "fcm_dpo/q_t": 0.42176562547683716, "grad_norm": 14.439383506774902, "learning_rate": 1.872130032047302e-07, "logits/chosen": 0.5603929758071899, "logits/rejected": 0.5244190692901611, "logps/chosen": -195.08648681640625, "logps/ref_chosen": -68.72074890136719, "logps/ref_rejected": -78.76539611816406, "logps/rejected": -249.5888671875, "loss": 1.1824, "margin_dpo/margin_mean": 44.45771408081055, "margin_dpo/margin_std": 88.00609588623047, "step": 413 }, { "epoch": 0.6258503401360545, "fcm_dpo/beta": 0.0077771758660674095, "fcm_dpo/delta": -0.044307127594947815, "fcm_dpo/margin": 56.880470275878906, "fcm_dpo/q_t": 0.39816516637802124, "grad_norm": 12.960653305053711, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 0.7278245687484741, "logits/rejected": 0.6642704606056213, "logps/chosen": -165.54698181152344, "logps/ref_chosen": -54.138214111328125, "logps/ref_rejected": -74.65741729736328, "logps/rejected": -242.9466552734375, "loss": 1.0688, "margin_dpo/margin_mean": 56.880470275878906, "margin_dpo/margin_std": 73.23246765136719, "step": 414 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.00775496382266283, "fcm_dpo/delta": 0.01751146838068962, "fcm_dpo/margin": 49.402931213378906, "fcm_dpo/q_t": 0.4125995337963104, "grad_norm": 12.199499130249023, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.7460592985153198, "logits/rejected": 0.7304663062095642, "logps/chosen": -167.877685546875, "logps/ref_chosen": -55.91856002807617, "logps/ref_rejected": -61.747703552246094, "logps/rejected": -223.10975646972656, "loss": 1.1287, "margin_dpo/margin_mean": 49.402931213378906, "margin_dpo/margin_std": 79.15948486328125, "step": 415 }, { "epoch": 0.6288737717309146, "fcm_dpo/beta": 0.007957161404192448, "fcm_dpo/delta": 0.06318769603967667, "fcm_dpo/margin": 42.04512023925781, "fcm_dpo/q_t": 0.4247048497200012, "grad_norm": 14.358795166015625, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 0.7043267488479614, "logits/rejected": 0.6525400876998901, "logps/chosen": -173.0494842529297, "logps/ref_chosen": -54.72308349609375, "logps/ref_rejected": -69.17388916015625, "logps/rejected": -229.54541015625, "loss": 1.1811, "margin_dpo/margin_mean": 42.04512023925781, "margin_dpo/margin_std": 75.42815399169922, "step": 416 }, { "epoch": 0.6303854875283447, "fcm_dpo/beta": 0.007852243259549141, "fcm_dpo/delta": -0.04769314080476761, "fcm_dpo/margin": 56.72645568847656, "fcm_dpo/q_t": 0.3974757194519043, "grad_norm": 12.778168678283691, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 0.834701418876648, "logits/rejected": 0.7620327472686768, "logps/chosen": -178.24203491210938, "logps/ref_chosen": -56.791259765625, "logps/ref_rejected": -68.7791748046875, "logps/rejected": -246.95639038085938, "loss": 1.0928, "margin_dpo/margin_mean": 56.72645568847656, "margin_dpo/margin_std": 83.89405822753906, "step": 417 }, { "epoch": 0.6318972033257747, "fcm_dpo/beta": 0.007806393783539534, "fcm_dpo/delta": 0.05898113176226616, "fcm_dpo/margin": 43.8342399597168, "fcm_dpo/q_t": 0.4236130118370056, "grad_norm": 13.988064765930176, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 0.7765390872955322, "logits/rejected": 0.784039318561554, "logps/chosen": -198.92283630371094, "logps/ref_chosen": -69.10798645019531, "logps/ref_rejected": -75.09132385253906, "logps/rejected": -248.74041748046875, "loss": 1.1617, "margin_dpo/margin_mean": 43.83423614501953, "margin_dpo/margin_std": 75.42573547363281, "step": 418 }, { "epoch": 0.6334089191232048, "fcm_dpo/beta": 0.007763129658997059, "fcm_dpo/delta": -0.0961204543709755, "fcm_dpo/margin": 63.256935119628906, "fcm_dpo/q_t": 0.3896148204803467, "grad_norm": 12.546382904052734, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 0.6601693630218506, "logits/rejected": 0.6037542819976807, "logps/chosen": -162.12216186523438, "logps/ref_chosen": -58.1717643737793, "logps/ref_rejected": -71.67066955566406, "logps/rejected": -238.87799072265625, "loss": 1.0484, "margin_dpo/margin_mean": 63.256935119628906, "margin_dpo/margin_std": 82.42765045166016, "step": 419 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.007878805510699749, "fcm_dpo/delta": 0.13835400342941284, "fcm_dpo/margin": 33.70429992675781, "fcm_dpo/q_t": 0.4415522813796997, "grad_norm": 13.372779846191406, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.7393275499343872, "logits/rejected": 0.6908121109008789, "logps/chosen": -187.9400634765625, "logps/ref_chosen": -57.05351257324219, "logps/ref_rejected": -62.670982360839844, "logps/rejected": -227.2618408203125, "loss": 1.2477, "margin_dpo/margin_mean": 33.70430374145508, "margin_dpo/margin_std": 85.482421875, "step": 420 }, { "epoch": 0.636432350718065, "fcm_dpo/beta": 0.007947009056806564, "fcm_dpo/delta": 0.020302031189203262, "fcm_dpo/margin": 47.831687927246094, "fcm_dpo/q_t": 0.4156237840652466, "grad_norm": 14.011063575744629, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 0.7062793970108032, "logits/rejected": 0.6440611481666565, "logps/chosen": -180.4288330078125, "logps/ref_chosen": -57.32324981689453, "logps/ref_rejected": -75.33782958984375, "logps/rejected": -246.27511596679688, "loss": 1.1732, "margin_dpo/margin_mean": 47.831687927246094, "margin_dpo/margin_std": 93.26679992675781, "step": 421 }, { "epoch": 0.6379440665154951, "fcm_dpo/beta": 0.00788248609751463, "fcm_dpo/delta": -0.10939822345972061, "fcm_dpo/margin": 63.941802978515625, "fcm_dpo/q_t": 0.3854847550392151, "grad_norm": 14.771190643310547, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 0.7229829430580139, "logits/rejected": 0.7074044942855835, "logps/chosen": -173.61679077148438, "logps/ref_chosen": -67.05757141113281, "logps/ref_rejected": -72.12803649902344, "logps/rejected": -242.62905883789062, "loss": 1.0308, "margin_dpo/margin_mean": 63.941802978515625, "margin_dpo/margin_std": 77.09107971191406, "step": 422 }, { "epoch": 0.6394557823129252, "fcm_dpo/beta": 0.0076961456798017025, "fcm_dpo/delta": -0.0938025414943695, "fcm_dpo/margin": 63.5325927734375, "fcm_dpo/q_t": 0.3895634412765503, "grad_norm": 11.972503662109375, "learning_rate": 1.745083602306071e-07, "logits/chosen": 0.7690365314483643, "logits/rejected": 0.6954725980758667, "logps/chosen": -164.59158325195312, "logps/ref_chosen": -54.06167221069336, "logps/ref_rejected": -76.64092254638672, "logps/rejected": -250.70343017578125, "loss": 1.0441, "margin_dpo/margin_mean": 63.5325927734375, "margin_dpo/margin_std": 80.39971923828125, "step": 423 }, { "epoch": 0.6409674981103552, "fcm_dpo/beta": 0.007550341077148914, "fcm_dpo/delta": -0.06942353397607803, "fcm_dpo/margin": 61.598793029785156, "fcm_dpo/q_t": 0.39464449882507324, "grad_norm": 16.436132431030273, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 0.6422996520996094, "logits/rejected": 0.6022803783416748, "logps/chosen": -172.88888549804688, "logps/ref_chosen": -53.60887145996094, "logps/ref_rejected": -79.2139892578125, "logps/rejected": -260.0928039550781, "loss": 1.0666, "margin_dpo/margin_mean": 61.598793029785156, "margin_dpo/margin_std": 82.29408264160156, "step": 424 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.007582271471619606, "fcm_dpo/delta": 0.01660202071070671, "fcm_dpo/margin": 50.64934158325195, "fcm_dpo/q_t": 0.4132668375968933, "grad_norm": 13.83991527557373, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.7531682252883911, "logits/rejected": 0.724544882774353, "logps/chosen": -174.68898010253906, "logps/ref_chosen": -58.41468048095703, "logps/ref_rejected": -66.59054565429688, "logps/rejected": -233.51419067382812, "loss": 1.1281, "margin_dpo/margin_mean": 50.64934158325195, "margin_dpo/margin_std": 81.37447357177734, "step": 425 }, { "epoch": 0.6439909297052154, "fcm_dpo/beta": 0.007783133536577225, "fcm_dpo/delta": 0.16612949967384338, "fcm_dpo/margin": 30.56137466430664, "fcm_dpo/q_t": 0.4456656873226166, "grad_norm": 15.910229682922363, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 0.6517899036407471, "logits/rejected": 0.6605311036109924, "logps/chosen": -211.61373901367188, "logps/ref_chosen": -71.70822143554688, "logps/ref_rejected": -73.57725524902344, "logps/rejected": -244.04415893554688, "loss": 1.2789, "margin_dpo/margin_mean": 30.561378479003906, "margin_dpo/margin_std": 90.49774169921875, "step": 426 }, { "epoch": 0.6455026455026455, "fcm_dpo/beta": 0.007862042635679245, "fcm_dpo/delta": 0.02033122256398201, "fcm_dpo/margin": 48.38745880126953, "fcm_dpo/q_t": 0.41554105281829834, "grad_norm": 14.462299346923828, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 0.7601388692855835, "logits/rejected": 0.698128342628479, "logps/chosen": -189.68942260742188, "logps/ref_chosen": -58.64276885986328, "logps/ref_rejected": -86.25437927246094, "logps/rejected": -265.6884765625, "loss": 1.1533, "margin_dpo/margin_mean": 48.38745880126953, "margin_dpo/margin_std": 88.23645782470703, "step": 427 }, { "epoch": 0.6470143613000756, "fcm_dpo/beta": 0.007850416004657745, "fcm_dpo/delta": -0.05777687579393387, "fcm_dpo/margin": 57.95905685424805, "fcm_dpo/q_t": 0.39873766899108887, "grad_norm": 12.50790023803711, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 0.6862516403198242, "logits/rejected": 0.639532208442688, "logps/chosen": -189.7267608642578, "logps/ref_chosen": -66.5960464477539, "logps/ref_rejected": -82.3941650390625, "logps/rejected": -263.48394775390625, "loss": 1.0891, "margin_dpo/margin_mean": 57.95905303955078, "margin_dpo/margin_std": 86.09635162353516, "step": 428 }, { "epoch": 0.6485260770975056, "fcm_dpo/beta": 0.007888168096542358, "fcm_dpo/delta": 0.048963554203510284, "fcm_dpo/margin": 44.629215240478516, "fcm_dpo/q_t": 0.4203850328922272, "grad_norm": 15.114370346069336, "learning_rate": 1.669846604344412e-07, "logits/chosen": 0.6785605549812317, "logits/rejected": 0.696630597114563, "logps/chosen": -184.79486083984375, "logps/ref_chosen": -57.00970458984375, "logps/ref_rejected": -59.86549377441406, "logps/rejected": -232.2798614501953, "loss": 1.1686, "margin_dpo/margin_mean": 44.62921142578125, "margin_dpo/margin_std": 82.51943969726562, "step": 429 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.007686239667236805, "fcm_dpo/delta": -0.1573198437690735, "fcm_dpo/margin": 71.38347625732422, "fcm_dpo/q_t": 0.37464433908462524, "grad_norm": 12.829866409301758, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.6116656064987183, "logits/rejected": 0.6137654781341553, "logps/chosen": -174.70074462890625, "logps/ref_chosen": -59.563194274902344, "logps/ref_rejected": -70.52289581298828, "logps/rejected": -257.0439147949219, "loss": 1.0126, "margin_dpo/margin_mean": 71.38348388671875, "margin_dpo/margin_std": 85.50177001953125, "step": 430 }, { "epoch": 0.6515495086923658, "fcm_dpo/beta": 0.007644302677363157, "fcm_dpo/delta": 0.021943753585219383, "fcm_dpo/margin": 49.56535339355469, "fcm_dpo/q_t": 0.4139266610145569, "grad_norm": 12.444840431213379, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 0.7115011215209961, "logits/rejected": 0.6581912040710449, "logps/chosen": -162.03805541992188, "logps/ref_chosen": -50.20032501220703, "logps/ref_rejected": -77.81680297851562, "logps/rejected": -239.21987915039062, "loss": 1.133, "margin_dpo/margin_mean": 49.56535339355469, "margin_dpo/margin_std": 80.80972290039062, "step": 431 }, { "epoch": 0.6530612244897959, "fcm_dpo/beta": 0.007644776254892349, "fcm_dpo/delta": -0.008293664082884789, "fcm_dpo/margin": 53.36254119873047, "fcm_dpo/q_t": 0.4074263572692871, "grad_norm": 12.83692455291748, "learning_rate": 1.632536862810844e-07, "logits/chosen": 0.7517593502998352, "logits/rejected": 0.6997089982032776, "logps/chosen": -178.72860717773438, "logps/ref_chosen": -61.662757873535156, "logps/ref_rejected": -83.94496154785156, "logps/rejected": -254.3733673095703, "loss": 1.1177, "margin_dpo/margin_mean": 53.36254119873047, "margin_dpo/margin_std": 84.9062728881836, "step": 432 }, { "epoch": 0.654572940287226, "fcm_dpo/beta": 0.007542489096522331, "fcm_dpo/delta": -0.11253903806209564, "fcm_dpo/margin": 67.21623229980469, "fcm_dpo/q_t": 0.38378891348838806, "grad_norm": 13.461320877075195, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 0.8195096254348755, "logits/rejected": 0.8181531429290771, "logps/chosen": -179.8470458984375, "logps/ref_chosen": -63.72917938232422, "logps/ref_rejected": -65.8391342163086, "logps/rejected": -249.1732177734375, "loss": 1.0334, "margin_dpo/margin_mean": 67.21623229980469, "margin_dpo/margin_std": 82.00685119628906, "step": 433 }, { "epoch": 0.656084656084656, "fcm_dpo/beta": 0.007294449955224991, "fcm_dpo/delta": -0.1217169463634491, "fcm_dpo/margin": 70.45217895507812, "fcm_dpo/q_t": 0.38249802589416504, "grad_norm": 12.421359062194824, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 0.8312444090843201, "logits/rejected": 0.7658089399337769, "logps/chosen": -146.02633666992188, "logps/ref_chosen": -47.97331619262695, "logps/ref_rejected": -72.51132202148438, "logps/rejected": -241.01651000976562, "loss": 1.0482, "margin_dpo/margin_mean": 70.4521713256836, "margin_dpo/margin_std": 93.53169250488281, "step": 434 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.007317832671105862, "fcm_dpo/delta": -0.012319110333919525, "fcm_dpo/margin": 56.22063446044922, "fcm_dpo/q_t": 0.4067924916744232, "grad_norm": 13.529410362243652, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.834632158279419, "logits/rejected": 0.7947177886962891, "logps/chosen": -177.00856018066406, "logps/ref_chosen": -57.06024932861328, "logps/ref_rejected": -71.69146728515625, "logps/rejected": -247.86041259765625, "loss": 1.1107, "margin_dpo/margin_mean": 56.22063446044922, "margin_dpo/margin_std": 85.4617919921875, "step": 435 }, { "epoch": 0.6591080876795162, "fcm_dpo/beta": 0.0073493365198373795, "fcm_dpo/delta": 0.0642806738615036, "fcm_dpo/margin": 45.97419738769531, "fcm_dpo/q_t": 0.4236651360988617, "grad_norm": 14.502552032470703, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 0.7367502450942993, "logits/rejected": 0.6787878274917603, "logps/chosen": -174.20272827148438, "logps/ref_chosen": -56.158050537109375, "logps/ref_rejected": -67.63787841796875, "logps/rejected": -231.65673828125, "loss": 1.1805, "margin_dpo/margin_mean": 45.97420120239258, "margin_dpo/margin_std": 90.24182891845703, "step": 436 }, { "epoch": 0.6606198034769464, "fcm_dpo/beta": 0.007334660738706589, "fcm_dpo/delta": 0.02720458060503006, "fcm_dpo/margin": 50.770957946777344, "fcm_dpo/q_t": 0.4157326817512512, "grad_norm": 16.514482498168945, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 0.7151650786399841, "logits/rejected": 0.5829428434371948, "logps/chosen": -182.744140625, "logps/ref_chosen": -56.98578643798828, "logps/ref_rejected": -85.61524963378906, "logps/rejected": -262.14459228515625, "loss": 1.1681, "margin_dpo/margin_mean": 50.77096176147461, "margin_dpo/margin_std": 93.88005065917969, "step": 437 }, { "epoch": 0.6621315192743764, "fcm_dpo/beta": 0.007298264652490616, "fcm_dpo/delta": -0.12693609297275543, "fcm_dpo/margin": 71.32022094726562, "fcm_dpo/q_t": 0.38320598006248474, "grad_norm": 12.304542541503906, "learning_rate": 1.558581854913253e-07, "logits/chosen": 0.8022891283035278, "logits/rejected": 0.7335547208786011, "logps/chosen": -148.60336303710938, "logps/ref_chosen": -41.27777862548828, "logps/ref_rejected": -65.33840942382812, "logps/rejected": -243.9842071533203, "loss": 1.0229, "margin_dpo/margin_mean": 71.32022094726562, "margin_dpo/margin_std": 85.57806396484375, "step": 438 }, { "epoch": 0.6636432350718064, "fcm_dpo/beta": 0.00723626371473074, "fcm_dpo/delta": -0.027069322764873505, "fcm_dpo/margin": 58.800289154052734, "fcm_dpo/q_t": 0.40320390462875366, "grad_norm": 12.547380447387695, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 0.6307194232940674, "logits/rejected": 0.5939302444458008, "logps/chosen": -202.54116821289062, "logps/ref_chosen": -81.41764831542969, "logps/ref_rejected": -94.72309875488281, "logps/rejected": -274.64691162109375, "loss": 1.0892, "margin_dpo/margin_mean": 58.800289154052734, "margin_dpo/margin_std": 81.8350830078125, "step": 439 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.007164452224969864, "fcm_dpo/delta": -0.05680684745311737, "fcm_dpo/margin": 63.365570068359375, "fcm_dpo/q_t": 0.3969983756542206, "grad_norm": 21.674142837524414, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.6887756586074829, "logits/rejected": 0.5909817218780518, "logps/chosen": -150.68502807617188, "logps/ref_chosen": -42.538185119628906, "logps/ref_rejected": -69.78813934326172, "logps/rejected": -241.30055236816406, "loss": 1.0817, "margin_dpo/margin_mean": 63.365570068359375, "margin_dpo/margin_std": 89.2110595703125, "step": 440 }, { "epoch": 0.6666666666666666, "fcm_dpo/beta": 0.007003414444625378, "fcm_dpo/delta": -0.06929505616426468, "fcm_dpo/margin": 66.45780181884766, "fcm_dpo/q_t": 0.3919275403022766, "grad_norm": 13.894318580627441, "learning_rate": 1.521955206326976e-07, "logits/chosen": 0.690746009349823, "logits/rejected": 0.5955066084861755, "logps/chosen": -163.53961181640625, "logps/ref_chosen": -57.593223571777344, "logps/ref_rejected": -84.82878875732422, "logps/rejected": -257.2330017089844, "loss": 1.0359, "margin_dpo/margin_mean": 66.45780181884766, "margin_dpo/margin_std": 72.28242492675781, "step": 441 }, { "epoch": 0.6681783824640968, "fcm_dpo/beta": 0.006948241498321295, "fcm_dpo/delta": -0.04673624783754349, "fcm_dpo/margin": 63.97719955444336, "fcm_dpo/q_t": 0.39758235216140747, "grad_norm": 14.932785987854004, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 0.6913585662841797, "logits/rejected": 0.6380952000617981, "logps/chosen": -194.80154418945312, "logps/ref_chosen": -67.46121978759766, "logps/ref_rejected": -89.0693588256836, "logps/rejected": -280.3868713378906, "loss": 1.0664, "margin_dpo/margin_mean": 63.97719955444336, "margin_dpo/margin_std": 81.89796447753906, "step": 442 }, { "epoch": 0.6696900982615268, "fcm_dpo/beta": 0.006789367645978928, "fcm_dpo/delta": -0.15468865633010864, "fcm_dpo/margin": 80.46561431884766, "fcm_dpo/q_t": 0.3747613728046417, "grad_norm": 13.57395076751709, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 0.7441185712814331, "logits/rejected": 0.6723178029060364, "logps/chosen": -160.47637939453125, "logps/ref_chosen": -54.79610061645508, "logps/ref_rejected": -77.80781555175781, "logps/rejected": -263.9537048339844, "loss": 1.0045, "margin_dpo/margin_mean": 80.46562194824219, "margin_dpo/margin_std": 92.77104949951172, "step": 443 }, { "epoch": 0.671201814058957, "fcm_dpo/beta": 0.006895772181451321, "fcm_dpo/delta": 0.19947421550750732, "fcm_dpo/margin": 29.74011993408203, "fcm_dpo/q_t": 0.4542357921600342, "grad_norm": 17.251333236694336, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 0.7567894458770752, "logits/rejected": 0.6579380035400391, "logps/chosen": -191.23162841796875, "logps/ref_chosen": -58.749061584472656, "logps/ref_rejected": -86.87396240234375, "logps/rejected": -249.09664916992188, "loss": 1.2867, "margin_dpo/margin_mean": 29.7401180267334, "margin_dpo/margin_std": 88.69583129882812, "step": 444 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.006922256201505661, "fcm_dpo/delta": -0.08680850267410278, "fcm_dpo/margin": 69.72836303710938, "fcm_dpo/q_t": 0.3895564079284668, "grad_norm": 15.33248233795166, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.6761714220046997, "logits/rejected": 0.6643944978713989, "logps/chosen": -184.99374389648438, "logps/ref_chosen": -60.91743850708008, "logps/ref_rejected": -71.5637435913086, "logps/rejected": -265.368408203125, "loss": 1.0445, "margin_dpo/margin_mean": 69.72836303710938, "margin_dpo/margin_std": 86.42784118652344, "step": 445 }, { "epoch": 0.674225245653817, "fcm_dpo/beta": 0.006854848936200142, "fcm_dpo/delta": -0.0958673357963562, "fcm_dpo/margin": 71.51223754882812, "fcm_dpo/q_t": 0.3870481252670288, "grad_norm": 11.376906394958496, "learning_rate": 1.461462467495284e-07, "logits/chosen": 0.7793463468551636, "logits/rejected": 0.695709228515625, "logps/chosen": -161.16827392578125, "logps/ref_chosen": -48.79924774169922, "logps/ref_rejected": -71.8719482421875, "logps/rejected": -255.75323486328125, "loss": 1.0255, "margin_dpo/margin_mean": 71.51223754882812, "margin_dpo/margin_std": 74.22274780273438, "step": 446 }, { "epoch": 0.6757369614512472, "fcm_dpo/beta": 0.006591130513697863, "fcm_dpo/delta": -0.13952668011188507, "fcm_dpo/margin": 80.61551666259766, "fcm_dpo/q_t": 0.3766905665397644, "grad_norm": 16.281320571899414, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 0.6757616996765137, "logits/rejected": 0.5708225965499878, "logps/chosen": -164.0203399658203, "logps/ref_chosen": -53.682716369628906, "logps/ref_rejected": -88.17315673828125, "logps/rejected": -279.12628173828125, "loss": 1.0128, "margin_dpo/margin_mean": 80.61550903320312, "margin_dpo/margin_std": 92.21812438964844, "step": 447 }, { "epoch": 0.6772486772486772, "fcm_dpo/beta": 0.006543307099491358, "fcm_dpo/delta": -0.024397023022174835, "fcm_dpo/margin": 64.70095825195312, "fcm_dpo/q_t": 0.4019678235054016, "grad_norm": 11.577764511108398, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 0.7559643983840942, "logits/rejected": 0.7006471753120422, "logps/chosen": -171.75296020507812, "logps/ref_chosen": -53.75125503540039, "logps/ref_rejected": -77.17623901367188, "logps/rejected": -259.8788757324219, "loss": 1.0809, "margin_dpo/margin_mean": 64.70095825195312, "margin_dpo/margin_std": 85.6202392578125, "step": 448 }, { "epoch": 0.6787603930461074, "fcm_dpo/beta": 0.006716116331517696, "fcm_dpo/delta": 0.19444824755191803, "fcm_dpo/margin": 31.2590274810791, "fcm_dpo/q_t": 0.4524422883987427, "grad_norm": 18.466140747070312, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 0.5716577768325806, "logits/rejected": 0.5704319477081299, "logps/chosen": -215.08062744140625, "logps/ref_chosen": -75.82737731933594, "logps/ref_rejected": -82.20687866210938, "logps/rejected": -252.71914672851562, "loss": 1.2551, "margin_dpo/margin_mean": 31.2590274810791, "margin_dpo/margin_std": 78.21199035644531, "step": 449 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.006843068636953831, "fcm_dpo/delta": 0.06173437833786011, "fcm_dpo/margin": 49.743858337402344, "fcm_dpo/q_t": 0.4245959520339966, "grad_norm": 13.465462684631348, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.8062694072723389, "logits/rejected": 0.6891622543334961, "logps/chosen": -165.41134643554688, "logps/ref_chosen": -47.11572265625, "logps/ref_rejected": -78.7546615600586, "logps/rejected": -246.79415893554688, "loss": 1.1677, "margin_dpo/margin_mean": 49.743858337402344, "margin_dpo/margin_std": 91.77166748046875, "step": 450 }, { "epoch": 0.6817838246409675, "fcm_dpo/beta": 0.006978826597332954, "fcm_dpo/delta": 0.08206881582736969, "fcm_dpo/margin": 45.89945602416992, "fcm_dpo/q_t": 0.4271845817565918, "grad_norm": 13.454634666442871, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 0.6572551727294922, "logits/rejected": 0.6040632128715515, "logps/chosen": -181.79440307617188, "logps/ref_chosen": -63.350440979003906, "logps/ref_rejected": -76.28530883789062, "logps/rejected": -240.62872314453125, "loss": 1.178, "margin_dpo/margin_mean": 45.89945602416992, "margin_dpo/margin_std": 86.73423767089844, "step": 451 }, { "epoch": 0.6832955404383976, "fcm_dpo/beta": 0.007023798301815987, "fcm_dpo/delta": 0.019556403160095215, "fcm_dpo/margin": 54.26256561279297, "fcm_dpo/q_t": 0.41338497400283813, "grad_norm": 14.76894760131836, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 0.6688199043273926, "logits/rejected": 0.5848639607429504, "logps/chosen": -171.7777557373047, "logps/ref_chosen": -55.58583450317383, "logps/ref_rejected": -77.68738555908203, "logps/rejected": -248.141845703125, "loss": 1.144, "margin_dpo/margin_mean": 54.26256561279297, "margin_dpo/margin_std": 93.56272888183594, "step": 452 }, { "epoch": 0.6848072562358276, "fcm_dpo/beta": 0.007006727624684572, "fcm_dpo/delta": -0.005369680933654308, "fcm_dpo/margin": 57.817604064941406, "fcm_dpo/q_t": 0.4089837074279785, "grad_norm": 15.279993057250977, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 0.6729337573051453, "logits/rejected": 0.643782377243042, "logps/chosen": -174.33375549316406, "logps/ref_chosen": -61.778202056884766, "logps/ref_rejected": -71.51403045654297, "logps/rejected": -241.88719177246094, "loss": 1.119, "margin_dpo/margin_mean": 57.81760787963867, "margin_dpo/margin_std": 91.65188598632812, "step": 453 }, { "epoch": 0.6863189720332578, "fcm_dpo/beta": 0.0069506047293543816, "fcm_dpo/delta": -0.052232228219509125, "fcm_dpo/margin": 64.71170043945312, "fcm_dpo/q_t": 0.3960624039173126, "grad_norm": 13.125537872314453, "learning_rate": 1.366202015206706e-07, "logits/chosen": 0.6993863582611084, "logits/rejected": 0.658939778804779, "logps/chosen": -157.39630126953125, "logps/ref_chosen": -51.59515380859375, "logps/ref_rejected": -63.96732711791992, "logps/rejected": -234.4801788330078, "loss": 1.0737, "margin_dpo/margin_mean": 64.71170043945312, "margin_dpo/margin_std": 88.06663513183594, "step": 454 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.006900169886648655, "fcm_dpo/delta": -0.027319904416799545, "fcm_dpo/margin": 61.74366760253906, "fcm_dpo/q_t": 0.40355247259140015, "grad_norm": 12.596735954284668, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.6100882291793823, "logits/rejected": 0.5753176808357239, "logps/chosen": -192.84078979492188, "logps/ref_chosen": -70.65170288085938, "logps/ref_rejected": -77.44276428222656, "logps/rejected": -261.37554931640625, "loss": 1.0855, "margin_dpo/margin_mean": 61.74366760253906, "margin_dpo/margin_std": 85.42503356933594, "step": 455 }, { "epoch": 0.6893424036281179, "fcm_dpo/beta": 0.00692489929497242, "fcm_dpo/delta": 0.03051423281431198, "fcm_dpo/margin": 53.519752502441406, "fcm_dpo/q_t": 0.41640913486480713, "grad_norm": 15.563993453979492, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 0.6147767901420593, "logits/rejected": 0.5489587783813477, "logps/chosen": -179.21768188476562, "logps/ref_chosen": -56.398284912109375, "logps/ref_rejected": -82.61642456054688, "logps/rejected": -258.95556640625, "loss": 1.1384, "margin_dpo/margin_mean": 53.519752502441406, "margin_dpo/margin_std": 88.47608184814453, "step": 456 }, { "epoch": 0.690854119425548, "fcm_dpo/beta": 0.006909678690135479, "fcm_dpo/delta": -0.013150712475180626, "fcm_dpo/margin": 59.67945861816406, "fcm_dpo/q_t": 0.40437084436416626, "grad_norm": 12.112608909606934, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 0.8191350698471069, "logits/rejected": 0.7398391366004944, "logps/chosen": -164.4642791748047, "logps/ref_chosen": -44.72057342529297, "logps/ref_rejected": -68.1158676147461, "logps/rejected": -247.5390167236328, "loss": 1.0835, "margin_dpo/margin_mean": 59.67945098876953, "margin_dpo/margin_std": 76.94285583496094, "step": 457 }, { "epoch": 0.6923658352229781, "fcm_dpo/beta": 0.006901285611093044, "fcm_dpo/delta": -0.02610252983868122, "fcm_dpo/margin": 61.582176208496094, "fcm_dpo/q_t": 0.4029829204082489, "grad_norm": 13.647724151611328, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 0.7404319643974304, "logits/rejected": 0.616897463798523, "logps/chosen": -165.63185119628906, "logps/ref_chosen": -50.00569152832031, "logps/ref_rejected": -87.50015258789062, "logps/rejected": -264.70849609375, "loss": 1.1008, "margin_dpo/margin_mean": 61.582176208496094, "margin_dpo/margin_std": 92.60580444335938, "step": 458 }, { "epoch": 0.6938775510204082, "fcm_dpo/beta": 0.0068240780383348465, "fcm_dpo/delta": -0.12303808331489563, "fcm_dpo/margin": 75.65729522705078, "fcm_dpo/q_t": 0.38151851296424866, "grad_norm": 12.57613754272461, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 0.7350834608078003, "logits/rejected": 0.6995427012443542, "logps/chosen": -173.1988525390625, "logps/ref_chosen": -65.37794494628906, "logps/ref_rejected": -88.19244384765625, "logps/rejected": -271.670654296875, "loss": 1.0222, "margin_dpo/margin_mean": 75.65729522705078, "margin_dpo/margin_std": 85.63201904296875, "step": 459 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.006613044999539852, "fcm_dpo/delta": -0.08804592490196228, "fcm_dpo/margin": 73.07688903808594, "fcm_dpo/q_t": 0.38998186588287354, "grad_norm": 12.085392951965332, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.7788273692131042, "logits/rejected": 0.6462200880050659, "logps/chosen": -186.39027404785156, "logps/ref_chosen": -64.5616683959961, "logps/ref_rejected": -88.67890167236328, "logps/rejected": -283.58441162109375, "loss": 1.0389, "margin_dpo/margin_mean": 73.07688903808594, "margin_dpo/margin_std": 87.7376708984375, "step": 460 }, { "epoch": 0.6969009826152683, "fcm_dpo/beta": 0.006458759307861328, "fcm_dpo/delta": -0.09030409157276154, "fcm_dpo/margin": 74.86151885986328, "fcm_dpo/q_t": 0.38834458589553833, "grad_norm": 12.709549903869629, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 0.8639695048332214, "logits/rejected": 0.7700966596603394, "logps/chosen": -149.4770965576172, "logps/ref_chosen": -49.4779167175293, "logps/ref_rejected": -72.65262603759766, "logps/rejected": -247.51333618164062, "loss": 1.0373, "margin_dpo/margin_mean": 74.86151885986328, "margin_dpo/margin_std": 84.41058349609375, "step": 461 }, { "epoch": 0.6984126984126984, "fcm_dpo/beta": 0.00636675488203764, "fcm_dpo/delta": -0.10163407772779465, "fcm_dpo/margin": 77.89765930175781, "fcm_dpo/q_t": 0.3845703601837158, "grad_norm": 11.933963775634766, "learning_rate": 1.27297100994108e-07, "logits/chosen": 0.7207037210464478, "logits/rejected": 0.6627846956253052, "logps/chosen": -172.74496459960938, "logps/ref_chosen": -60.4951171875, "logps/ref_rejected": -74.82136535644531, "logps/rejected": -264.9688720703125, "loss": 1.0264, "margin_dpo/margin_mean": 77.89765930175781, "margin_dpo/margin_std": 88.62437438964844, "step": 462 }, { "epoch": 0.6999244142101285, "fcm_dpo/beta": 0.006417134776711464, "fcm_dpo/delta": 0.09052874892950058, "fcm_dpo/margin": 48.669517517089844, "fcm_dpo/q_t": 0.4276365637779236, "grad_norm": 15.57586669921875, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 0.7261140942573547, "logits/rejected": 0.7274137139320374, "logps/chosen": -199.56387329101562, "logps/ref_chosen": -67.68511962890625, "logps/ref_rejected": -71.32196044921875, "logps/rejected": -251.8702392578125, "loss": 1.1664, "margin_dpo/margin_mean": 48.669517517089844, "margin_dpo/margin_std": 81.40229034423828, "step": 463 }, { "epoch": 0.7014361300075586, "fcm_dpo/beta": 0.0064452332444489, "fcm_dpo/delta": -0.05545463413000107, "fcm_dpo/margin": 70.26266479492188, "fcm_dpo/q_t": 0.39617645740509033, "grad_norm": 11.537524223327637, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 0.7131586074829102, "logits/rejected": 0.6843728423118591, "logps/chosen": -181.480712890625, "logps/ref_chosen": -59.16564178466797, "logps/ref_rejected": -69.56146240234375, "logps/rejected": -262.13922119140625, "loss": 1.0764, "margin_dpo/margin_mean": 70.26266479492188, "margin_dpo/margin_std": 95.04841613769531, "step": 464 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.006403525359928608, "fcm_dpo/delta": 0.013384605757892132, "fcm_dpo/margin": 60.45001220703125, "fcm_dpo/q_t": 0.41165050864219666, "grad_norm": 13.156111717224121, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.7792471647262573, "logits/rejected": 0.7154022455215454, "logps/chosen": -186.728759765625, "logps/ref_chosen": -58.513671875, "logps/ref_rejected": -84.31745910644531, "logps/rejected": -272.9825744628906, "loss": 1.1211, "margin_dpo/margin_mean": 60.450008392333984, "margin_dpo/margin_std": 93.35221862792969, "step": 465 }, { "epoch": 0.7044595616024187, "fcm_dpo/beta": 0.006560437381267548, "fcm_dpo/delta": 0.15931177139282227, "fcm_dpo/margin": 37.31903839111328, "fcm_dpo/q_t": 0.444455087184906, "grad_norm": 17.772686004638672, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 0.7378557920455933, "logits/rejected": 0.745481014251709, "logps/chosen": -212.20864868164062, "logps/ref_chosen": -73.26580810546875, "logps/ref_rejected": -74.83621215820312, "logps/rejected": -251.0980682373047, "loss": 1.2267, "margin_dpo/margin_mean": 37.319034576416016, "margin_dpo/margin_std": 82.18032836914062, "step": 466 }, { "epoch": 0.7059712773998488, "fcm_dpo/beta": 0.006677757948637009, "fcm_dpo/delta": 0.049721457064151764, "fcm_dpo/margin": 52.71526336669922, "fcm_dpo/q_t": 0.4197397828102112, "grad_norm": 12.660947799682617, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 0.7742260694503784, "logits/rejected": 0.6145238280296326, "logps/chosen": -172.53135681152344, "logps/ref_chosen": -47.57947540283203, "logps/ref_rejected": -78.68522644042969, "logps/rejected": -256.35235595703125, "loss": 1.1333, "margin_dpo/margin_mean": 52.71526336669922, "margin_dpo/margin_std": 81.25802612304688, "step": 467 }, { "epoch": 0.7074829931972789, "fcm_dpo/beta": 0.006609264761209488, "fcm_dpo/delta": -0.09665481746196747, "fcm_dpo/margin": 74.44082641601562, "fcm_dpo/q_t": 0.38663211464881897, "grad_norm": 15.095276832580566, "learning_rate": 1.204480113956011e-07, "logits/chosen": 0.6950595378875732, "logits/rejected": 0.6864985823631287, "logps/chosen": -180.8209991455078, "logps/ref_chosen": -63.92778778076172, "logps/ref_rejected": -76.51626586914062, "logps/rejected": -267.8503112792969, "loss": 1.0416, "margin_dpo/margin_mean": 74.44082641601562, "margin_dpo/margin_std": 92.40238952636719, "step": 468 }, { "epoch": 0.708994708994709, "fcm_dpo/beta": 0.006470114924013615, "fcm_dpo/delta": -0.05656132102012634, "fcm_dpo/margin": 70.00104522705078, "fcm_dpo/q_t": 0.39446496963500977, "grad_norm": 12.5325345993042, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 0.7946516871452332, "logits/rejected": 0.7845124006271362, "logps/chosen": -177.92825317382812, "logps/ref_chosen": -59.05818176269531, "logps/ref_rejected": -75.67672729492188, "logps/rejected": -264.5478515625, "loss": 1.0448, "margin_dpo/margin_mean": 70.00105285644531, "margin_dpo/margin_std": 76.44458770751953, "step": 469 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.0064891641959548, "fcm_dpo/delta": -0.021754732355475426, "fcm_dpo/margin": 64.8369369506836, "fcm_dpo/q_t": 0.40303927659988403, "grad_norm": 12.209174156188965, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.7553179860115051, "logits/rejected": 0.6802823543548584, "logps/chosen": -163.2248077392578, "logps/ref_chosen": -47.86743927001953, "logps/ref_rejected": -65.96859741210938, "logps/rejected": -246.1628875732422, "loss": 1.0883, "margin_dpo/margin_mean": 64.83692932128906, "margin_dpo/margin_std": 88.71844482421875, "step": 470 }, { "epoch": 0.7120181405895691, "fcm_dpo/beta": 0.0063733188435435295, "fcm_dpo/delta": -0.08390133082866669, "fcm_dpo/margin": 75.26802062988281, "fcm_dpo/q_t": 0.3887266218662262, "grad_norm": 12.127059936523438, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 0.7076698541641235, "logits/rejected": 0.6516261100769043, "logps/chosen": -167.81793212890625, "logps/ref_chosen": -57.777854919433594, "logps/ref_rejected": -73.81172180175781, "logps/rejected": -259.11981201171875, "loss": 1.0401, "margin_dpo/margin_mean": 75.26802062988281, "margin_dpo/margin_std": 89.51625061035156, "step": 471 }, { "epoch": 0.7135298563869993, "fcm_dpo/beta": 0.006402644794434309, "fcm_dpo/delta": 0.027136290445923805, "fcm_dpo/margin": 58.34724426269531, "fcm_dpo/q_t": 0.4152415990829468, "grad_norm": 12.967268943786621, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 0.722518801689148, "logits/rejected": 0.6758139729499817, "logps/chosen": -168.4672393798828, "logps/ref_chosen": -55.908668518066406, "logps/ref_rejected": -74.70294189453125, "logps/rejected": -245.6087646484375, "loss": 1.1481, "margin_dpo/margin_mean": 58.34724426269531, "margin_dpo/margin_std": 101.94171142578125, "step": 472 }, { "epoch": 0.7150415721844293, "fcm_dpo/beta": 0.006403686944395304, "fcm_dpo/delta": 0.015270838513970375, "fcm_dpo/margin": 60.171531677246094, "fcm_dpo/q_t": 0.41325926780700684, "grad_norm": 13.97789192199707, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 0.7790465354919434, "logits/rejected": 0.6616805195808411, "logps/chosen": -169.07672119140625, "logps/ref_chosen": -54.16088104248047, "logps/ref_rejected": -92.76789855957031, "logps/rejected": -267.8552551269531, "loss": 1.1431, "margin_dpo/margin_mean": 60.171531677246094, "margin_dpo/margin_std": 104.51329040527344, "step": 473 }, { "epoch": 0.7165532879818595, "fcm_dpo/beta": 0.006446688901633024, "fcm_dpo/delta": 0.03435041010379791, "fcm_dpo/margin": 56.910247802734375, "fcm_dpo/q_t": 0.4173174500465393, "grad_norm": 16.706056594848633, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 0.775465190410614, "logits/rejected": 0.6970380544662476, "logps/chosen": -166.1460723876953, "logps/ref_chosen": -46.685707092285156, "logps/ref_rejected": -71.44731903076172, "logps/rejected": -247.81793212890625, "loss": 1.1507, "margin_dpo/margin_mean": 56.910247802734375, "margin_dpo/margin_std": 100.28426361083984, "step": 474 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.006298656575381756, "fcm_dpo/delta": -0.13723576068878174, "fcm_dpo/margin": 84.00972747802734, "fcm_dpo/q_t": 0.37886470556259155, "grad_norm": 9.853565216064453, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.7011395692825317, "logits/rejected": 0.6511736512184143, "logps/chosen": -170.49496459960938, "logps/ref_chosen": -58.4873046875, "logps/ref_rejected": -87.00187683105469, "logps/rejected": -283.019287109375, "loss": 1.0024, "margin_dpo/margin_mean": 84.00973510742188, "margin_dpo/margin_std": 91.04681396484375, "step": 475 }, { "epoch": 0.7195767195767195, "fcm_dpo/beta": 0.00633836118504405, "fcm_dpo/delta": 0.03369593247771263, "fcm_dpo/margin": 57.90646743774414, "fcm_dpo/q_t": 0.4163343906402588, "grad_norm": 13.150678634643555, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 0.6292476654052734, "logits/rejected": 0.6282519698143005, "logps/chosen": -210.76119995117188, "logps/ref_chosen": -75.38162231445312, "logps/ref_rejected": -76.99822235107422, "logps/rejected": -270.2842712402344, "loss": 1.1359, "margin_dpo/margin_mean": 57.90646743774414, "margin_dpo/margin_std": 92.76966857910156, "step": 476 }, { "epoch": 0.7210884353741497, "fcm_dpo/beta": 0.006370065733790398, "fcm_dpo/delta": 0.055561892688274384, "fcm_dpo/margin": 54.3778076171875, "fcm_dpo/q_t": 0.42135778069496155, "grad_norm": 15.231217384338379, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 0.7329668998718262, "logits/rejected": 0.681520938873291, "logps/chosen": -201.80587768554688, "logps/ref_chosen": -61.073387145996094, "logps/ref_rejected": -81.34375, "logps/rejected": -276.45404052734375, "loss": 1.19, "margin_dpo/margin_mean": 54.3778076171875, "margin_dpo/margin_std": 111.97293853759766, "step": 477 }, { "epoch": 0.7226001511715797, "fcm_dpo/beta": 0.0064564854837954044, "fcm_dpo/delta": 0.08435309678316116, "fcm_dpo/margin": 49.316490173339844, "fcm_dpo/q_t": 0.42754754424095154, "grad_norm": 15.356504440307617, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 0.6630824208259583, "logits/rejected": 0.6739969253540039, "logps/chosen": -180.30245971679688, "logps/ref_chosen": -57.16731643676758, "logps/ref_rejected": -53.30917739868164, "logps/rejected": -225.76080322265625, "loss": 1.188, "margin_dpo/margin_mean": 49.316490173339844, "margin_dpo/margin_std": 97.46630859375, "step": 478 }, { "epoch": 0.7241118669690099, "fcm_dpo/beta": 0.006586470641195774, "fcm_dpo/delta": 0.11177529394626617, "fcm_dpo/margin": 44.27500915527344, "fcm_dpo/q_t": 0.433511346578598, "grad_norm": 14.682652473449707, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 0.6922081708908081, "logits/rejected": 0.6425020098686218, "logps/chosen": -187.39501953125, "logps/ref_chosen": -58.91331481933594, "logps/ref_rejected": -63.7403450012207, "logps/rejected": -236.4970703125, "loss": 1.2032, "margin_dpo/margin_mean": 44.27500915527344, "margin_dpo/margin_std": 91.15420532226562, "step": 479 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.0066421665251255035, "fcm_dpo/delta": -0.0520443469285965, "fcm_dpo/margin": 67.61965942382812, "fcm_dpo/q_t": 0.3969612419605255, "grad_norm": 15.13430118560791, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.6692843437194824, "logits/rejected": 0.6604632139205933, "logps/chosen": -191.74949645996094, "logps/ref_chosen": -62.80061340332031, "logps/ref_rejected": -67.58859252929688, "logps/rejected": -264.1571350097656, "loss": 1.0838, "margin_dpo/margin_mean": 67.61965942382812, "margin_dpo/margin_std": 94.1576156616211, "step": 480 }, { "epoch": 0.72713529856387, "fcm_dpo/beta": 0.006578594446182251, "fcm_dpo/delta": 0.008969607762992382, "fcm_dpo/margin": 59.488807678222656, "fcm_dpo/q_t": 0.4112991988658905, "grad_norm": 14.179603576660156, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 0.640337347984314, "logits/rejected": 0.6388770341873169, "logps/chosen": -191.6460418701172, "logps/ref_chosen": -65.28649139404297, "logps/ref_rejected": -70.78668212890625, "logps/rejected": -256.6350402832031, "loss": 1.1274, "margin_dpo/margin_mean": 59.48881149291992, "margin_dpo/margin_std": 96.6456527709961, "step": 481 }, { "epoch": 0.7286470143613001, "fcm_dpo/beta": 0.00661865808069706, "fcm_dpo/delta": 0.01288910023868084, "fcm_dpo/margin": 58.54414749145508, "fcm_dpo/q_t": 0.41362571716308594, "grad_norm": 14.701621055603027, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 0.6496937274932861, "logits/rejected": 0.49469494819641113, "logps/chosen": -203.09133911132812, "logps/ref_chosen": -60.906185150146484, "logps/ref_rejected": -103.44656372070312, "logps/rejected": -304.1758728027344, "loss": 1.1531, "margin_dpo/margin_mean": 58.54414749145508, "margin_dpo/margin_std": 107.09422302246094, "step": 482 }, { "epoch": 0.7301587301587301, "fcm_dpo/beta": 0.0065412987023591995, "fcm_dpo/delta": -0.07768933475017548, "fcm_dpo/margin": 72.47356414794922, "fcm_dpo/q_t": 0.3910548985004425, "grad_norm": 12.262384414672852, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 0.7782041430473328, "logits/rejected": 0.6690878868103027, "logps/chosen": -174.45535278320312, "logps/ref_chosen": -53.192012786865234, "logps/ref_rejected": -81.83927154541016, "logps/rejected": -275.576171875, "loss": 1.0493, "margin_dpo/margin_mean": 72.47357177734375, "margin_dpo/margin_std": 90.563232421875, "step": 483 }, { "epoch": 0.7316704459561603, "fcm_dpo/beta": 0.006601003929972649, "fcm_dpo/delta": 0.06470303982496262, "fcm_dpo/margin": 51.01061248779297, "fcm_dpo/q_t": 0.42150557041168213, "grad_norm": 17.135540008544922, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 0.8079191446304321, "logits/rejected": 0.770884096622467, "logps/chosen": -181.22967529296875, "logps/ref_chosen": -57.76945877075195, "logps/ref_rejected": -71.6829833984375, "logps/rejected": -246.15380859375, "loss": 1.1319, "margin_dpo/margin_mean": 51.01061248779297, "margin_dpo/margin_std": 71.01333618164062, "step": 484 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.006590794771909714, "fcm_dpo/delta": 0.04942598566412926, "fcm_dpo/margin": 53.343475341796875, "fcm_dpo/q_t": 0.4193718433380127, "grad_norm": 14.788070678710938, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.7287328243255615, "logits/rejected": 0.7055760622024536, "logps/chosen": -186.085693359375, "logps/ref_chosen": -56.63584899902344, "logps/ref_rejected": -70.85614013671875, "logps/rejected": -253.64944458007812, "loss": 1.137, "margin_dpo/margin_mean": 53.343475341796875, "margin_dpo/margin_std": 80.21830749511719, "step": 485 }, { "epoch": 0.7346938775510204, "fcm_dpo/beta": 0.006727076135575771, "fcm_dpo/delta": 0.0707259476184845, "fcm_dpo/margin": 49.296295166015625, "fcm_dpo/q_t": 0.42624154686927795, "grad_norm": 13.047743797302246, "learning_rate": 1.007103520743035e-07, "logits/chosen": 0.772795557975769, "logits/rejected": 0.6431682705879211, "logps/chosen": -204.21878051757812, "logps/ref_chosen": -56.347023010253906, "logps/ref_rejected": -85.97221374511719, "logps/rejected": -283.1402587890625, "loss": 1.1863, "margin_dpo/margin_mean": 49.296295166015625, "margin_dpo/margin_std": 99.67465209960938, "step": 486 }, { "epoch": 0.7362055933484505, "fcm_dpo/beta": 0.006734380032867193, "fcm_dpo/delta": -0.011707952246069908, "fcm_dpo/margin": 61.060760498046875, "fcm_dpo/q_t": 0.40709030628204346, "grad_norm": 16.7856502532959, "learning_rate": 9.965186236464046e-08, "logits/chosen": 0.8297072649002075, "logits/rejected": 0.7643457651138306, "logps/chosen": -192.57928466796875, "logps/ref_chosen": -60.617218017578125, "logps/ref_rejected": -82.50975036621094, "logps/rejected": -275.5325927734375, "loss": 1.105, "margin_dpo/margin_mean": 61.060760498046875, "margin_dpo/margin_std": 92.03384399414062, "step": 487 }, { "epoch": 0.7377173091458806, "fcm_dpo/beta": 0.006715429946780205, "fcm_dpo/delta": -0.04261378198862076, "fcm_dpo/margin": 65.61810302734375, "fcm_dpo/q_t": 0.40011560916900635, "grad_norm": 17.23809051513672, "learning_rate": 9.859757821558337e-08, "logits/chosen": 0.7520276308059692, "logits/rejected": 0.6840384602546692, "logps/chosen": -188.2955322265625, "logps/ref_chosen": -63.10905075073242, "logps/ref_rejected": -82.49348449707031, "logps/rejected": -273.2980651855469, "loss": 1.0788, "margin_dpo/margin_mean": 65.61810302734375, "margin_dpo/margin_std": 89.72518920898438, "step": 488 }, { "epoch": 0.7392290249433107, "fcm_dpo/beta": 0.006793505512177944, "fcm_dpo/delta": 0.12338872253894806, "fcm_dpo/margin": 41.24451446533203, "fcm_dpo/q_t": 0.4370517432689667, "grad_norm": 12.945178985595703, "learning_rate": 9.754752911772615e-08, "logits/chosen": 0.687408447265625, "logits/rejected": 0.6387894153594971, "logps/chosen": -209.18453979492188, "logps/ref_chosen": -64.98896026611328, "logps/ref_rejected": -84.39607238769531, "logps/rejected": -269.8361511230469, "loss": 1.2308, "margin_dpo/margin_mean": 41.244510650634766, "margin_dpo/margin_std": 98.086181640625, "step": 489 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.006871424615383148, "fcm_dpo/delta": 0.05978693813085556, "fcm_dpo/margin": 49.7767333984375, "fcm_dpo/q_t": 0.42369672656059265, "grad_norm": 12.462081909179688, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.7716453075408936, "logits/rejected": 0.7463403344154358, "logps/chosen": -190.76043701171875, "logps/ref_chosen": -61.90874481201172, "logps/ref_rejected": -70.58566284179688, "logps/rejected": -249.21409606933594, "loss": 1.2123, "margin_dpo/margin_mean": 49.7767333984375, "margin_dpo/margin_std": 111.2440414428711, "step": 490 }, { "epoch": 0.7422524565381708, "fcm_dpo/beta": 0.006887979805469513, "fcm_dpo/delta": 0.004202168434858322, "fcm_dpo/margin": 57.374183654785156, "fcm_dpo/q_t": 0.41012823581695557, "grad_norm": 13.691835403442383, "learning_rate": 9.546025344484868e-08, "logits/chosen": 0.6863425970077515, "logits/rejected": 0.6274754405021667, "logps/chosen": -181.0035858154297, "logps/ref_chosen": -55.47570037841797, "logps/ref_rejected": -78.70318603515625, "logps/rejected": -261.6052551269531, "loss": 1.1145, "margin_dpo/margin_mean": 57.374183654785156, "margin_dpo/margin_std": 85.52940368652344, "step": 491 }, { "epoch": 0.7437641723356009, "fcm_dpo/beta": 0.0070613836869597435, "fcm_dpo/delta": 0.09045213460922241, "fcm_dpo/margin": 44.112030029296875, "fcm_dpo/q_t": 0.4296990633010864, "grad_norm": 16.22068977355957, "learning_rate": 9.442308525541589e-08, "logits/chosen": 0.6796330809593201, "logits/rejected": 0.6053134202957153, "logps/chosen": -216.349365234375, "logps/ref_chosen": -67.28638458251953, "logps/ref_rejected": -82.78628540039062, "logps/rejected": -275.9613037109375, "loss": 1.2128, "margin_dpo/margin_mean": 44.112030029296875, "margin_dpo/margin_std": 96.07239532470703, "step": 492 }, { "epoch": 0.745275888133031, "fcm_dpo/beta": 0.007019840180873871, "fcm_dpo/delta": -0.08257926255464554, "fcm_dpo/margin": 68.13697052001953, "fcm_dpo/q_t": 0.39166688919067383, "grad_norm": 13.662456512451172, "learning_rate": 9.339026888672468e-08, "logits/chosen": 0.6610127687454224, "logits/rejected": 0.5783201456069946, "logps/chosen": -177.93154907226562, "logps/ref_chosen": -55.92750549316406, "logps/ref_rejected": -79.12149810791016, "logps/rejected": -269.26251220703125, "loss": 1.0692, "margin_dpo/margin_mean": 68.13697052001953, "margin_dpo/margin_std": 93.68153381347656, "step": 493 }, { "epoch": 0.7467876039304611, "fcm_dpo/beta": 0.0069389790296554565, "fcm_dpo/delta": 0.0046972595155239105, "fcm_dpo/margin": 56.990055084228516, "fcm_dpo/q_t": 0.41134145855903625, "grad_norm": 15.932513236999512, "learning_rate": 9.236183322886945e-08, "logits/chosen": 0.6411583423614502, "logits/rejected": 0.5835065245628357, "logps/chosen": -195.45303344726562, "logps/ref_chosen": -67.95410919189453, "logps/ref_rejected": -90.50865173339844, "logps/rejected": -274.9976501464844, "loss": 1.154, "margin_dpo/margin_mean": 56.99005889892578, "margin_dpo/margin_std": 105.76473999023438, "step": 494 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.0070509654469788074, "fcm_dpo/delta": 0.07117826491594315, "fcm_dpo/margin": 46.89491271972656, "fcm_dpo/q_t": 0.42860695719718933, "grad_norm": 17.861509323120117, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.7724558711051941, "logits/rejected": 0.7018958330154419, "logps/chosen": -178.06219482421875, "logps/ref_chosen": -52.62546157836914, "logps/ref_rejected": -72.06781005859375, "logps/rejected": -244.3994598388672, "loss": 1.1992, "margin_dpo/margin_mean": 46.89491271972656, "margin_dpo/margin_std": 100.6576156616211, "step": 495 }, { "epoch": 0.7498110355253212, "fcm_dpo/beta": 0.007005490828305483, "fcm_dpo/delta": 0.0009736791253089905, "fcm_dpo/margin": 56.84469985961914, "fcm_dpo/q_t": 0.4145466089248657, "grad_norm": 14.480406761169434, "learning_rate": 9.031821899254797e-08, "logits/chosen": 0.7183775901794434, "logits/rejected": 0.5997449159622192, "logps/chosen": -197.369384765625, "logps/ref_chosen": -57.597320556640625, "logps/ref_rejected": -94.36127471923828, "logps/rejected": -290.97802734375, "loss": 1.1545, "margin_dpo/margin_mean": 56.844696044921875, "margin_dpo/margin_std": 105.66326141357422, "step": 496 }, { "epoch": 0.7513227513227513, "fcm_dpo/beta": 0.006960996426641941, "fcm_dpo/delta": -0.07592535018920898, "fcm_dpo/margin": 67.83001708984375, "fcm_dpo/q_t": 0.3918842673301697, "grad_norm": 11.594161033630371, "learning_rate": 8.930309757836516e-08, "logits/chosen": 0.7433227300643921, "logits/rejected": 0.7107380628585815, "logps/chosen": -210.66940307617188, "logps/ref_chosen": -72.78994750976562, "logps/ref_rejected": -89.48483276367188, "logps/rejected": -295.1943054199219, "loss": 1.068, "margin_dpo/margin_mean": 67.83000946044922, "margin_dpo/margin_std": 93.42587280273438, "step": 497 }, { "epoch": 0.7528344671201814, "fcm_dpo/beta": 0.0068884193897247314, "fcm_dpo/delta": -0.05250941216945648, "fcm_dpo/margin": 65.34432983398438, "fcm_dpo/q_t": 0.3966979384422302, "grad_norm": 16.511926651000977, "learning_rate": 8.829247120198563e-08, "logits/chosen": 0.7198547720909119, "logits/rejected": 0.6872572898864746, "logps/chosen": -193.16600036621094, "logps/ref_chosen": -68.36572265625, "logps/ref_rejected": -71.28846740722656, "logps/rejected": -261.43310546875, "loss": 1.068, "margin_dpo/margin_mean": 65.34432983398438, "margin_dpo/margin_std": 86.45211029052734, "step": 498 }, { "epoch": 0.7543461829176115, "fcm_dpo/beta": 0.006849166005849838, "fcm_dpo/delta": -0.03738480433821678, "fcm_dpo/margin": 63.62023162841797, "fcm_dpo/q_t": 0.40244078636169434, "grad_norm": 14.907723426818848, "learning_rate": 8.728636813280163e-08, "logits/chosen": 0.7367246747016907, "logits/rejected": 0.6651813983917236, "logps/chosen": -184.7228546142578, "logps/ref_chosen": -61.90882873535156, "logps/ref_rejected": -91.9411392211914, "logps/rejected": -278.3753967285156, "loss": 1.1302, "margin_dpo/margin_mean": 63.62023162841797, "margin_dpo/margin_std": 109.40308380126953, "step": 499 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.0068002426996827126, "fcm_dpo/delta": -0.0016520768404006958, "fcm_dpo/margin": 59.02922821044922, "fcm_dpo/q_t": 0.40788906812667847, "grad_norm": 16.023651123046875, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.6595408916473389, "logits/rejected": 0.656460165977478, "logps/chosen": -200.04827880859375, "logps/ref_chosen": -70.225830078125, "logps/ref_rejected": -71.72203063964844, "logps/rejected": -260.57373046875, "loss": 1.1502, "margin_dpo/margin_mean": 59.02923583984375, "margin_dpo/margin_std": 107.1384506225586, "step": 500 }, { "epoch": 0.7573696145124716, "fcm_dpo/beta": 0.00688003096729517, "fcm_dpo/delta": 0.03766298666596413, "fcm_dpo/margin": 52.81761169433594, "fcm_dpo/q_t": 0.4161261022090912, "grad_norm": 13.691091537475586, "learning_rate": 8.528784436016878e-08, "logits/chosen": 0.6945023536682129, "logits/rejected": 0.695048987865448, "logps/chosen": -194.70086669921875, "logps/ref_chosen": -64.59880828857422, "logps/ref_rejected": -70.59329223632812, "logps/rejected": -253.51296997070312, "loss": 1.1158, "margin_dpo/margin_mean": 52.81761169433594, "margin_dpo/margin_std": 72.4344711303711, "step": 501 }, { "epoch": 0.7588813303099018, "fcm_dpo/beta": 0.006907638628035784, "fcm_dpo/delta": 0.019454587250947952, "fcm_dpo/margin": 55.175048828125, "fcm_dpo/q_t": 0.41293010115623474, "grad_norm": 13.987884521484375, "learning_rate": 8.4295479559726e-08, "logits/chosen": 0.6924780607223511, "logits/rejected": 0.6435062885284424, "logps/chosen": -193.63800048828125, "logps/ref_chosen": -65.46662902832031, "logps/ref_rejected": -90.22233581542969, "logps/rejected": -273.5687561035156, "loss": 1.1205, "margin_dpo/margin_mean": 55.175048828125, "margin_dpo/margin_std": 83.9233627319336, "step": 502 }, { "epoch": 0.7603930461073318, "fcm_dpo/beta": 0.006876880303025246, "fcm_dpo/delta": -0.010101448744535446, "fcm_dpo/margin": 59.565589904785156, "fcm_dpo/q_t": 0.40677881240844727, "grad_norm": 12.284053802490234, "learning_rate": 8.330774987092712e-08, "logits/chosen": 0.7192538976669312, "logits/rejected": 0.7191355228424072, "logps/chosen": -171.00331115722656, "logps/ref_chosen": -51.83476257324219, "logps/ref_rejected": -57.62522506713867, "logps/rejected": -236.35935974121094, "loss": 1.1189, "margin_dpo/margin_mean": 59.56558609008789, "margin_dpo/margin_std": 94.57598114013672, "step": 503 }, { "epoch": 0.7619047619047619, "fcm_dpo/beta": 0.006783470045775175, "fcm_dpo/delta": -0.1334783434867859, "fcm_dpo/margin": 77.62858581542969, "fcm_dpo/q_t": 0.37845832109451294, "grad_norm": 17.458255767822266, "learning_rate": 8.232468292269479e-08, "logits/chosen": 0.6963123083114624, "logits/rejected": 0.6729052066802979, "logps/chosen": -189.20529174804688, "logps/ref_chosen": -68.65119934082031, "logps/ref_rejected": -77.91394805908203, "logps/rejected": -276.09661865234375, "loss": 1.0045, "margin_dpo/margin_mean": 77.62858581542969, "margin_dpo/margin_std": 83.20510864257812, "step": 504 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.006723982747644186, "fcm_dpo/delta": 0.06746991723775864, "fcm_dpo/margin": 49.71613693237305, "fcm_dpo/q_t": 0.4255116581916809, "grad_norm": 13.61350154876709, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.7165220975875854, "logits/rejected": 0.6773439049720764, "logps/chosen": -186.99365234375, "logps/ref_chosen": -59.99884796142578, "logps/ref_rejected": -76.88048553466797, "logps/rejected": -253.5914306640625, "loss": 1.195, "margin_dpo/margin_mean": 49.71613693237305, "margin_dpo/margin_std": 101.73808288574219, "step": 505 }, { "epoch": 0.764928193499622, "fcm_dpo/beta": 0.006802085321396589, "fcm_dpo/delta": 0.026249084621667862, "fcm_dpo/margin": 55.080204010009766, "fcm_dpo/q_t": 0.4139713644981384, "grad_norm": 15.93222713470459, "learning_rate": 8.037264711071698e-08, "logits/chosen": 0.7465803623199463, "logits/rejected": 0.727637767791748, "logps/chosen": -192.9581298828125, "logps/ref_chosen": -70.07130432128906, "logps/ref_rejected": -82.03775024414062, "logps/rejected": -260.0047607421875, "loss": 1.1574, "margin_dpo/margin_mean": 55.080204010009766, "margin_dpo/margin_std": 100.28858947753906, "step": 506 }, { "epoch": 0.7664399092970522, "fcm_dpo/beta": 0.006808855105191469, "fcm_dpo/delta": 0.0011053308844566345, "fcm_dpo/margin": 58.56227111816406, "fcm_dpo/q_t": 0.4128578305244446, "grad_norm": 13.58639144897461, "learning_rate": 7.940373284960933e-08, "logits/chosen": 0.7121202945709229, "logits/rejected": 0.6578322649002075, "logps/chosen": -208.14859008789062, "logps/ref_chosen": -72.00703430175781, "logps/ref_rejected": -93.94987487792969, "logps/rejected": -288.6536865234375, "loss": 1.1411, "margin_dpo/margin_mean": 58.5622673034668, "margin_dpo/margin_std": 102.10011291503906, "step": 507 }, { "epoch": 0.7679516250944822, "fcm_dpo/beta": 0.006827831733971834, "fcm_dpo/delta": -0.04231097176671028, "fcm_dpo/margin": 64.44580078125, "fcm_dpo/q_t": 0.40085405111312866, "grad_norm": 15.900528907775879, "learning_rate": 7.843959053281663e-08, "logits/chosen": 0.6829763054847717, "logits/rejected": 0.5482306480407715, "logps/chosen": -184.63729858398438, "logps/ref_chosen": -60.21992492675781, "logps/ref_rejected": -95.9200668334961, "logps/rejected": -284.78326416015625, "loss": 1.0987, "margin_dpo/margin_mean": 64.44580841064453, "margin_dpo/margin_std": 96.61326599121094, "step": 508 }, { "epoch": 0.7694633408919124, "fcm_dpo/beta": 0.00677447160705924, "fcm_dpo/delta": 0.0181202981621027, "fcm_dpo/margin": 56.46039962768555, "fcm_dpo/q_t": 0.4128088355064392, "grad_norm": 16.664478302001953, "learning_rate": 7.748024712947204e-08, "logits/chosen": 0.6257327198982239, "logits/rejected": 0.6041054129600525, "logps/chosen": -195.684326171875, "logps/ref_chosen": -66.27017211914062, "logps/ref_rejected": -71.73065185546875, "logps/rejected": -257.605224609375, "loss": 1.131, "margin_dpo/margin_mean": 56.46039962768555, "margin_dpo/margin_std": 91.35394287109375, "step": 509 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.00671444833278656, "fcm_dpo/delta": -0.05051817744970322, "fcm_dpo/margin": 66.65663146972656, "fcm_dpo/q_t": 0.40036720037460327, "grad_norm": 15.058761596679688, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.7969958782196045, "logits/rejected": 0.6925790905952454, "logps/chosen": -182.7880859375, "logps/ref_chosen": -53.54487609863281, "logps/ref_rejected": -91.36648559570312, "logps/rejected": -287.26629638671875, "loss": 1.1068, "margin_dpo/margin_mean": 66.65662384033203, "margin_dpo/margin_std": 106.01554870605469, "step": 510 }, { "epoch": 0.7724867724867724, "fcm_dpo/beta": 0.006622654385864735, "fcm_dpo/delta": -0.12184499204158783, "fcm_dpo/margin": 77.87154388427734, "fcm_dpo/q_t": 0.38256335258483887, "grad_norm": 17.00933265686035, "learning_rate": 7.557606426772961e-08, "logits/chosen": 0.7343342304229736, "logits/rejected": 0.6737358570098877, "logps/chosen": -179.90391540527344, "logps/ref_chosen": -55.844383239746094, "logps/ref_rejected": -86.49819946289062, "logps/rejected": -288.4292907714844, "loss": 1.0325, "margin_dpo/margin_mean": 77.87153625488281, "margin_dpo/margin_std": 96.47059631347656, "step": 511 }, { "epoch": 0.7739984882842026, "fcm_dpo/beta": 0.006611551158130169, "fcm_dpo/delta": 0.053356267511844635, "fcm_dpo/margin": 52.716392517089844, "fcm_dpo/q_t": 0.42018306255340576, "grad_norm": 18.820058822631836, "learning_rate": 7.463127807341966e-08, "logits/chosen": 0.6046255826950073, "logits/rejected": 0.5958194732666016, "logps/chosen": -186.82579040527344, "logps/ref_chosen": -61.653038024902344, "logps/ref_rejected": -72.83148193359375, "logps/rejected": -250.7206268310547, "loss": 1.1685, "margin_dpo/margin_mean": 52.716392517089844, "margin_dpo/margin_std": 98.232666015625, "step": 512 }, { "epoch": 0.7755102040816326, "fcm_dpo/beta": 0.006585326977074146, "fcm_dpo/delta": -0.02963019162416458, "fcm_dpo/margin": 64.99451446533203, "fcm_dpo/q_t": 0.4018802046775818, "grad_norm": 12.53990650177002, "learning_rate": 7.369139731924401e-08, "logits/chosen": 0.8336935043334961, "logits/rejected": 0.7769891023635864, "logps/chosen": -163.33245849609375, "logps/ref_chosen": -50.85256576538086, "logps/ref_rejected": -69.21754455566406, "logps/rejected": -246.6919403076172, "loss": 1.0753, "margin_dpo/margin_mean": 64.99451446533203, "margin_dpo/margin_std": 83.66407775878906, "step": 513 }, { "epoch": 0.7770219198790628, "fcm_dpo/beta": 0.006546557880938053, "fcm_dpo/delta": -0.07992081344127655, "fcm_dpo/margin": 72.72770690917969, "fcm_dpo/q_t": 0.3906702697277069, "grad_norm": 14.33784008026123, "learning_rate": 7.275644829568747e-08, "logits/chosen": 0.7393954396247864, "logits/rejected": 0.7025594711303711, "logps/chosen": -196.56289672851562, "logps/ref_chosen": -69.38493347167969, "logps/ref_rejected": -83.32447814941406, "logps/rejected": -283.2301330566406, "loss": 1.061, "margin_dpo/margin_mean": 72.72770690917969, "margin_dpo/margin_std": 96.85029602050781, "step": 514 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.0065173981711268425, "fcm_dpo/delta": 0.027946949005126953, "fcm_dpo/margin": 57.247520446777344, "fcm_dpo/q_t": 0.41493576765060425, "grad_norm": 17.1118221282959, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.7443599700927734, "logits/rejected": 0.6602603793144226, "logps/chosen": -189.67813110351562, "logps/ref_chosen": -53.687034606933594, "logps/ref_rejected": -83.59614562988281, "logps/rejected": -276.83477783203125, "loss": 1.1414, "margin_dpo/margin_mean": 57.24752426147461, "margin_dpo/margin_std": 96.7652359008789, "step": 515 }, { "epoch": 0.780045351473923, "fcm_dpo/beta": 0.006573040038347244, "fcm_dpo/delta": 0.03187675401568413, "fcm_dpo/margin": 56.167274475097656, "fcm_dpo/q_t": 0.4146859049797058, "grad_norm": 18.41140365600586, "learning_rate": 7.090144991188568e-08, "logits/chosen": 0.7212256193161011, "logits/rejected": 0.6795363426208496, "logps/chosen": -173.27099609375, "logps/ref_chosen": -56.9017219543457, "logps/ref_rejected": -67.83477783203125, "logps/rejected": -240.371337890625, "loss": 1.1466, "margin_dpo/margin_mean": 56.167274475097656, "margin_dpo/margin_std": 96.67643737792969, "step": 516 }, { "epoch": 0.781557067271353, "fcm_dpo/beta": 0.006739528849720955, "fcm_dpo/delta": 0.13554570078849792, "fcm_dpo/margin": 39.611785888671875, "fcm_dpo/q_t": 0.440265029668808, "grad_norm": 17.20258903503418, "learning_rate": 6.998145243993284e-08, "logits/chosen": 0.7755421996116638, "logits/rejected": 0.7712531089782715, "logps/chosen": -202.08642578125, "logps/ref_chosen": -61.775142669677734, "logps/ref_rejected": -62.88270950317383, "logps/rejected": -242.8057861328125, "loss": 1.2254, "margin_dpo/margin_mean": 39.611785888671875, "margin_dpo/margin_std": 89.51103973388672, "step": 517 }, { "epoch": 0.783068783068783, "fcm_dpo/beta": 0.006766768172383308, "fcm_dpo/delta": 0.013192320242524147, "fcm_dpo/margin": 57.238216400146484, "fcm_dpo/q_t": 0.41345030069351196, "grad_norm": 13.554084777832031, "learning_rate": 6.906649047373245e-08, "logits/chosen": 0.7449650764465332, "logits/rejected": 0.6929754018783569, "logps/chosen": -179.30035400390625, "logps/ref_chosen": -62.02523422241211, "logps/ref_rejected": -79.06085205078125, "logps/rejected": -253.57418823242188, "loss": 1.1253, "margin_dpo/margin_mean": 57.238216400146484, "margin_dpo/margin_std": 91.79560089111328, "step": 518 }, { "epoch": 0.7845804988662132, "fcm_dpo/beta": 0.006957560312002897, "fcm_dpo/delta": 0.16204290091991425, "fcm_dpo/margin": 34.67028045654297, "fcm_dpo/q_t": 0.4466700553894043, "grad_norm": 23.354780197143555, "learning_rate": 6.815658960673781e-08, "logits/chosen": 0.7701222896575928, "logits/rejected": 0.7111513614654541, "logps/chosen": -206.356689453125, "logps/ref_chosen": -61.60636901855469, "logps/ref_rejected": -74.50727844238281, "logps/rejected": -253.92787170410156, "loss": 1.3135, "margin_dpo/margin_mean": 34.67028045654297, "margin_dpo/margin_std": 115.71553039550781, "step": 519 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.007012324873358011, "fcm_dpo/delta": 0.05751825124025345, "fcm_dpo/margin": 49.06908416748047, "fcm_dpo/q_t": 0.422906756401062, "grad_norm": 16.057010650634766, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.8252368569374084, "logits/rejected": 0.7620112895965576, "logps/chosen": -194.60711669921875, "logps/ref_chosen": -62.87343215942383, "logps/ref_rejected": -76.505615234375, "logps/rejected": -257.3083801269531, "loss": 1.1649, "margin_dpo/margin_mean": 49.06908416748047, "margin_dpo/margin_std": 88.33916473388672, "step": 520 }, { "epoch": 0.7876039304610734, "fcm_dpo/beta": 0.00697598559781909, "fcm_dpo/delta": -0.10865991562604904, "fcm_dpo/margin": 72.15048217773438, "fcm_dpo/q_t": 0.3853059411048889, "grad_norm": 13.737951278686523, "learning_rate": 6.63520728356167e-08, "logits/chosen": 0.5883495211601257, "logits/rejected": 0.5072116255760193, "logps/chosen": -194.3409423828125, "logps/ref_chosen": -64.20668029785156, "logps/ref_rejected": -92.28083038330078, "logps/rejected": -294.5655822753906, "loss": 1.0398, "margin_dpo/margin_mean": 72.15048217773438, "margin_dpo/margin_std": 91.8095474243164, "step": 521 }, { "epoch": 0.7891156462585034, "fcm_dpo/beta": 0.006977587938308716, "fcm_dpo/delta": 0.0699794664978981, "fcm_dpo/margin": 47.636898040771484, "fcm_dpo/q_t": 0.4260995388031006, "grad_norm": 16.24595069885254, "learning_rate": 6.545750740770336e-08, "logits/chosen": 0.6884851455688477, "logits/rejected": 0.6817054152488708, "logps/chosen": -188.324462890625, "logps/ref_chosen": -58.369720458984375, "logps/ref_rejected": -68.79248046875, "logps/rejected": -246.38412475585938, "loss": 1.2182, "margin_dpo/margin_mean": 47.63689422607422, "margin_dpo/margin_std": 109.36659240722656, "step": 522 }, { "epoch": 0.7906273620559335, "fcm_dpo/beta": 0.007021876983344555, "fcm_dpo/delta": 0.017026737332344055, "fcm_dpo/margin": 54.623382568359375, "fcm_dpo/q_t": 0.41094690561294556, "grad_norm": 20.093515396118164, "learning_rate": 6.456810403001012e-08, "logits/chosen": 0.7484044432640076, "logits/rejected": 0.6152558326721191, "logps/chosen": -199.60435485839844, "logps/ref_chosen": -65.71324157714844, "logps/ref_rejected": -91.98896789550781, "logps/rejected": -280.50347900390625, "loss": 1.1596, "margin_dpo/margin_mean": 54.623382568359375, "margin_dpo/margin_std": 101.72914123535156, "step": 523 }, { "epoch": 0.7921390778533636, "fcm_dpo/beta": 0.007063503377139568, "fcm_dpo/delta": -0.008377037942409515, "fcm_dpo/margin": 57.727630615234375, "fcm_dpo/q_t": 0.4065864086151123, "grad_norm": 16.176006317138672, "learning_rate": 6.368388758106134e-08, "logits/chosen": 0.661557674407959, "logits/rejected": 0.637451171875, "logps/chosen": -190.521728515625, "logps/ref_chosen": -76.35124969482422, "logps/ref_rejected": -89.96072387695312, "logps/rejected": -261.85882568359375, "loss": 1.1107, "margin_dpo/margin_mean": 57.727630615234375, "margin_dpo/margin_std": 87.59829711914062, "step": 524 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.007091479375958443, "fcm_dpo/delta": 0.05275290459394455, "fcm_dpo/margin": 49.22231674194336, "fcm_dpo/q_t": 0.42219555377960205, "grad_norm": 18.684139251708984, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.5486440658569336, "logits/rejected": 0.5439519882202148, "logps/chosen": -203.2303924560547, "logps/ref_chosen": -75.49578857421875, "logps/ref_rejected": -84.04852294921875, "logps/rejected": -261.00543212890625, "loss": 1.1759, "margin_dpo/margin_mean": 49.22231674194336, "margin_dpo/margin_std": 96.31336975097656, "step": 525 }, { "epoch": 0.7951625094482238, "fcm_dpo/beta": 0.007162164896726608, "fcm_dpo/delta": 0.1063753068447113, "fcm_dpo/margin": 41.36842346191406, "fcm_dpo/q_t": 0.43258005380630493, "grad_norm": 16.012828826904297, "learning_rate": 6.193111425735515e-08, "logits/chosen": 0.7274391055107117, "logits/rejected": 0.65253746509552, "logps/chosen": -199.98507690429688, "logps/ref_chosen": -61.29241943359375, "logps/ref_rejected": -82.47763061523438, "logps/rejected": -262.5387268066406, "loss": 1.2155, "margin_dpo/margin_mean": 41.36842346191406, "margin_dpo/margin_std": 88.6600341796875, "step": 526 }, { "epoch": 0.7966742252456538, "fcm_dpo/beta": 0.007441909518092871, "fcm_dpo/delta": 0.13420581817626953, "fcm_dpo/margin": 36.05781555175781, "fcm_dpo/q_t": 0.44124549627304077, "grad_norm": 16.688430786132812, "learning_rate": 6.106260641143546e-08, "logits/chosen": 0.8245516419410706, "logits/rejected": 0.7362295389175415, "logps/chosen": -209.2223358154297, "logps/ref_chosen": -61.472625732421875, "logps/ref_rejected": -90.52831268310547, "logps/rejected": -274.3358154296875, "loss": 1.2613, "margin_dpo/margin_mean": 36.05781173706055, "margin_dpo/margin_std": 97.74179077148438, "step": 527 }, { "epoch": 0.7981859410430839, "fcm_dpo/beta": 0.0075470441952347755, "fcm_dpo/delta": 0.09618767350912094, "fcm_dpo/margin": 40.661521911621094, "fcm_dpo/q_t": 0.4321001172065735, "grad_norm": 18.79235076904297, "learning_rate": 6.019938355056422e-08, "logits/chosen": 0.6475770473480225, "logits/rejected": 0.5618788003921509, "logps/chosen": -189.43951416015625, "logps/ref_chosen": -58.792015075683594, "logps/ref_rejected": -71.82516479492188, "logps/rejected": -243.13417053222656, "loss": 1.2484, "margin_dpo/margin_mean": 40.661521911621094, "margin_dpo/margin_std": 104.12982177734375, "step": 528 }, { "epoch": 0.799697656840514, "fcm_dpo/beta": 0.00729703065007925, "fcm_dpo/delta": -0.2618618607521057, "fcm_dpo/margin": 88.30415344238281, "fcm_dpo/q_t": 0.3532322645187378, "grad_norm": 16.628124237060547, "learning_rate": 5.934146982094049e-08, "logits/chosen": 0.6590827703475952, "logits/rejected": 0.602139949798584, "logps/chosen": -175.66940307617188, "logps/ref_chosen": -55.070960998535156, "logps/ref_rejected": -75.44007873535156, "logps/rejected": -284.3426513671875, "loss": 0.942, "margin_dpo/margin_mean": 88.30415344238281, "margin_dpo/margin_std": 90.58662414550781, "step": 529 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.007245873101055622, "fcm_dpo/delta": 0.02474869042634964, "fcm_dpo/margin": 51.91332244873047, "fcm_dpo/q_t": 0.41642701625823975, "grad_norm": 19.019323348999023, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.7544151544570923, "logits/rejected": 0.706871747970581, "logps/chosen": -182.08175659179688, "logps/ref_chosen": -56.743812561035156, "logps/ref_rejected": -76.6692123413086, "logps/rejected": -253.9204864501953, "loss": 1.1443, "margin_dpo/margin_mean": 51.91332244873047, "margin_dpo/margin_std": 89.03276062011719, "step": 530 }, { "epoch": 0.8027210884353742, "fcm_dpo/beta": 0.007274698466062546, "fcm_dpo/delta": -0.005907643586397171, "fcm_dpo/margin": 55.718894958496094, "fcm_dpo/q_t": 0.4090062081813812, "grad_norm": 15.70089054107666, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 0.6855486631393433, "logits/rejected": 0.6101270318031311, "logps/chosen": -177.32748413085938, "logps/ref_chosen": -51.116455078125, "logps/ref_rejected": -79.52884674072266, "logps/rejected": -261.458740234375, "loss": 1.1205, "margin_dpo/margin_mean": 55.718894958496094, "margin_dpo/margin_std": 88.86299133300781, "step": 531 }, { "epoch": 0.8042328042328042, "fcm_dpo/beta": 0.007200066931545734, "fcm_dpo/delta": -0.03519837558269501, "fcm_dpo/margin": 60.228580474853516, "fcm_dpo/q_t": 0.4023039937019348, "grad_norm": 16.101490020751953, "learning_rate": 5.679982264990424e-08, "logits/chosen": 0.6560695171356201, "logits/rejected": 0.6018407344818115, "logps/chosen": -204.2732696533203, "logps/ref_chosen": -58.279945373535156, "logps/ref_rejected": -78.05426788330078, "logps/rejected": -284.27618408203125, "loss": 1.1038, "margin_dpo/margin_mean": 60.228580474853516, "margin_dpo/margin_std": 93.1765365600586, "step": 532 }, { "epoch": 0.8057445200302343, "fcm_dpo/beta": 0.00716027244925499, "fcm_dpo/delta": -0.03689378499984741, "fcm_dpo/margin": 60.79398727416992, "fcm_dpo/q_t": 0.40077680349349976, "grad_norm": 15.326909065246582, "learning_rate": 5.596338392706076e-08, "logits/chosen": 0.8385459184646606, "logits/rejected": 0.7652316093444824, "logps/chosen": -158.7193145751953, "logps/ref_chosen": -56.41801071166992, "logps/ref_rejected": -73.89324951171875, "logps/rejected": -236.98855590820312, "loss": 1.0913, "margin_dpo/margin_mean": 60.793983459472656, "margin_dpo/margin_std": 87.72501373291016, "step": 533 }, { "epoch": 0.8072562358276644, "fcm_dpo/beta": 0.007086427416652441, "fcm_dpo/delta": -0.02248242497444153, "fcm_dpo/margin": 59.43117141723633, "fcm_dpo/q_t": 0.4066773056983948, "grad_norm": 13.382204055786133, "learning_rate": 5.513237282548033e-08, "logits/chosen": 0.7272214293479919, "logits/rejected": 0.6883162260055542, "logps/chosen": -187.7013397216797, "logps/ref_chosen": -60.748687744140625, "logps/ref_rejected": -73.8623046875, "logps/rejected": -260.2461242675781, "loss": 1.1293, "margin_dpo/margin_mean": 59.431175231933594, "margin_dpo/margin_std": 101.31953430175781, "step": 534 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.007163902744650841, "fcm_dpo/delta": 0.04707575589418411, "fcm_dpo/margin": 49.48677444458008, "fcm_dpo/q_t": 0.4207179546356201, "grad_norm": 17.681066513061523, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.6349679827690125, "logits/rejected": 0.5681853294372559, "logps/chosen": -200.6911163330078, "logps/ref_chosen": -61.637413024902344, "logps/ref_rejected": -80.93138885498047, "logps/rejected": -269.47186279296875, "loss": 1.1707, "margin_dpo/margin_mean": 49.486778259277344, "margin_dpo/margin_std": 94.28369140625, "step": 535 }, { "epoch": 0.8102796674225246, "fcm_dpo/beta": 0.006997551769018173, "fcm_dpo/delta": -0.16011017560958862, "fcm_dpo/margin": 78.75794982910156, "fcm_dpo/q_t": 0.37516194581985474, "grad_norm": 11.368307113647461, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 0.7681195735931396, "logits/rejected": 0.6752403378486633, "logps/chosen": -171.7135009765625, "logps/ref_chosen": -51.88897705078125, "logps/ref_rejected": -73.34864044189453, "logps/rejected": -271.93115234375, "loss": 0.9889, "margin_dpo/margin_mean": 78.75794982910156, "margin_dpo/margin_std": 84.82420349121094, "step": 536 }, { "epoch": 0.8117913832199547, "fcm_dpo/beta": 0.006904451176524162, "fcm_dpo/delta": -0.0123976431787014, "fcm_dpo/margin": 59.59520721435547, "fcm_dpo/q_t": 0.40800943970680237, "grad_norm": 14.604050636291504, "learning_rate": 5.267213693697695e-08, "logits/chosen": 0.8269777297973633, "logits/rejected": 0.7264485359191895, "logps/chosen": -191.2030792236328, "logps/ref_chosen": -54.248619079589844, "logps/ref_rejected": -94.94343566894531, "logps/rejected": -291.49310302734375, "loss": 1.1189, "margin_dpo/margin_mean": 59.59520721435547, "margin_dpo/margin_std": 95.32566833496094, "step": 537 }, { "epoch": 0.8133030990173847, "fcm_dpo/beta": 0.006886166054755449, "fcm_dpo/delta": -0.06406421959400177, "fcm_dpo/margin": 66.96125793457031, "fcm_dpo/q_t": 0.39491915702819824, "grad_norm": 13.988241195678711, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 0.7210502624511719, "logits/rejected": 0.6904141306877136, "logps/chosen": -201.9376983642578, "logps/ref_chosen": -70.09353637695312, "logps/ref_rejected": -79.49833679199219, "logps/rejected": -278.3037414550781, "loss": 1.0629, "margin_dpo/margin_mean": 66.96125793457031, "margin_dpo/margin_std": 87.8351058959961, "step": 538 }, { "epoch": 0.8148148148148148, "fcm_dpo/beta": 0.006873176898807287, "fcm_dpo/delta": 0.052093926817178726, "fcm_dpo/margin": 50.87729263305664, "fcm_dpo/q_t": 0.4218023419380188, "grad_norm": 14.85510540008545, "learning_rate": 5.105953986729195e-08, "logits/chosen": 0.6665756702423096, "logits/rejected": 0.5819079875946045, "logps/chosen": -204.06600952148438, "logps/ref_chosen": -61.93169403076172, "logps/ref_rejected": -84.08946228027344, "logps/rejected": -277.10107421875, "loss": 1.1559, "margin_dpo/margin_mean": 50.877288818359375, "margin_dpo/margin_std": 89.65788269042969, "step": 539 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.00684034638106823, "fcm_dpo/delta": -0.1083156019449234, "fcm_dpo/margin": 73.51258850097656, "fcm_dpo/q_t": 0.38540440797805786, "grad_norm": 13.257943153381348, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.7699329853057861, "logits/rejected": 0.6641333103179932, "logps/chosen": -191.4811553955078, "logps/ref_chosen": -62.704254150390625, "logps/ref_rejected": -95.63597106933594, "logps/rejected": -297.92547607421875, "loss": 1.0315, "margin_dpo/margin_mean": 73.51258850097656, "margin_dpo/margin_std": 87.4825439453125, "step": 540 }, { "epoch": 0.817838246409675, "fcm_dpo/beta": 0.006668367423117161, "fcm_dpo/delta": -0.05727549269795418, "fcm_dpo/margin": 68.03440856933594, "fcm_dpo/q_t": 0.3954416513442993, "grad_norm": 14.147591590881348, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 0.7798404693603516, "logits/rejected": 0.804283857345581, "logps/chosen": -189.17298889160156, "logps/ref_chosen": -62.48084259033203, "logps/ref_rejected": -57.55541229248047, "logps/rejected": -252.281982421875, "loss": 1.0639, "margin_dpo/margin_mean": 68.03440856933594, "margin_dpo/margin_std": 86.24921417236328, "step": 541 }, { "epoch": 0.8193499622071051, "fcm_dpo/beta": 0.0065713440999388695, "fcm_dpo/delta": -0.10843698680400848, "fcm_dpo/margin": 76.51225280761719, "fcm_dpo/q_t": 0.3853691816329956, "grad_norm": 16.98206329345703, "learning_rate": 4.868243561723534e-08, "logits/chosen": 0.7922050952911377, "logits/rejected": 0.7388157248497009, "logps/chosen": -158.83099365234375, "logps/ref_chosen": -49.454891204833984, "logps/ref_rejected": -65.33275604248047, "logps/rejected": -251.22113037109375, "loss": 1.0495, "margin_dpo/margin_mean": 76.51225280761719, "margin_dpo/margin_std": 101.84271240234375, "step": 542 }, { "epoch": 0.8208616780045351, "fcm_dpo/beta": 0.006503199227154255, "fcm_dpo/delta": -0.06481535732746124, "fcm_dpo/margin": 71.01345825195312, "fcm_dpo/q_t": 0.3929465413093567, "grad_norm": 11.35152530670166, "learning_rate": 4.790130070827028e-08, "logits/chosen": 0.731881856918335, "logits/rejected": 0.6383688449859619, "logps/chosen": -168.16604614257812, "logps/ref_chosen": -51.100860595703125, "logps/ref_rejected": -76.06130981445312, "logps/rejected": -264.13995361328125, "loss": 1.0598, "margin_dpo/margin_mean": 71.01346588134766, "margin_dpo/margin_std": 90.14659118652344, "step": 543 }, { "epoch": 0.8223733938019653, "fcm_dpo/beta": 0.006377712823450565, "fcm_dpo/delta": -0.09231595695018768, "fcm_dpo/margin": 76.49650573730469, "fcm_dpo/q_t": 0.38890010118484497, "grad_norm": 14.788056373596191, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 0.7259522676467896, "logits/rejected": 0.6440372467041016, "logps/chosen": -189.947265625, "logps/ref_chosen": -60.2772331237793, "logps/ref_rejected": -88.40553283691406, "logps/rejected": -294.5720520019531, "loss": 1.0524, "margin_dpo/margin_mean": 76.49650573730469, "margin_dpo/margin_std": 100.47244262695312, "step": 544 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.006433564238250256, "fcm_dpo/delta": 0.09561902284622192, "fcm_dpo/margin": 47.767616271972656, "fcm_dpo/q_t": 0.4303530156612396, "grad_norm": 13.540328025817871, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.6927204728126526, "logits/rejected": 0.6323798894882202, "logps/chosen": -199.80316162109375, "logps/ref_chosen": -61.61524963378906, "logps/ref_rejected": -78.71266174316406, "logps/rejected": -264.668212890625, "loss": 1.1927, "margin_dpo/margin_mean": 47.767616271972656, "margin_dpo/margin_std": 95.30821990966797, "step": 545 }, { "epoch": 0.8253968253968254, "fcm_dpo/beta": 0.006493829190731049, "fcm_dpo/delta": 0.04412386938929558, "fcm_dpo/margin": 55.045677185058594, "fcm_dpo/q_t": 0.4179641008377075, "grad_norm": 15.096487998962402, "learning_rate": 4.559190140057428e-08, "logits/chosen": 0.8139510154724121, "logits/rejected": 0.8047194480895996, "logps/chosen": -190.3719482421875, "logps/ref_chosen": -59.313262939453125, "logps/ref_rejected": -64.73631286621094, "logps/rejected": -250.84066772460938, "loss": 1.1597, "margin_dpo/margin_mean": 55.045677185058594, "margin_dpo/margin_std": 99.31507873535156, "step": 546 }, { "epoch": 0.8269085411942555, "fcm_dpo/beta": 0.006442173384130001, "fcm_dpo/delta": -0.06737470626831055, "fcm_dpo/margin": 72.02572631835938, "fcm_dpo/q_t": 0.3931256830692291, "grad_norm": 13.689138412475586, "learning_rate": 4.483350854765672e-08, "logits/chosen": 0.6713754534721375, "logits/rejected": 0.6014930009841919, "logps/chosen": -171.71279907226562, "logps/ref_chosen": -54.97674560546875, "logps/ref_rejected": -75.35922241210938, "logps/rejected": -264.1210021972656, "loss": 1.0656, "margin_dpo/margin_mean": 72.02572631835938, "margin_dpo/margin_std": 96.70033264160156, "step": 547 }, { "epoch": 0.8284202569916855, "fcm_dpo/beta": 0.006535450927913189, "fcm_dpo/delta": 0.08367334306240082, "fcm_dpo/margin": 48.743003845214844, "fcm_dpo/q_t": 0.4280347526073456, "grad_norm": 16.57050895690918, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 0.6213726997375488, "logits/rejected": 0.5548287630081177, "logps/chosen": -197.66107177734375, "logps/ref_chosen": -63.21067428588867, "logps/ref_rejected": -81.23347473144531, "logps/rejected": -264.4268798828125, "loss": 1.1778, "margin_dpo/margin_mean": 48.743003845214844, "margin_dpo/margin_std": 91.22822570800781, "step": 548 }, { "epoch": 0.8299319727891157, "fcm_dpo/beta": 0.006569857243448496, "fcm_dpo/delta": 0.03660057857632637, "fcm_dpo/margin": 55.518028259277344, "fcm_dpo/q_t": 0.41681382060050964, "grad_norm": 15.688474655151367, "learning_rate": 4.333396073857723e-08, "logits/chosen": 0.8061296343803406, "logits/rejected": 0.7307944297790527, "logps/chosen": -195.18118286132812, "logps/ref_chosen": -64.27351379394531, "logps/ref_rejected": -92.31663513183594, "logps/rejected": -278.7423095703125, "loss": 1.1524, "margin_dpo/margin_mean": 55.518028259277344, "margin_dpo/margin_std": 97.83292388916016, "step": 549 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.006715863943099976, "fcm_dpo/delta": 0.15693125128746033, "fcm_dpo/margin": 36.81257629394531, "fcm_dpo/q_t": 0.44385606050491333, "grad_norm": 17.09474754333496, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.7606515884399414, "logits/rejected": 0.7276763916015625, "logps/chosen": -199.18109130859375, "logps/ref_chosen": -56.230438232421875, "logps/ref_rejected": -62.59788513183594, "logps/rejected": -242.3611297607422, "loss": 1.2432, "margin_dpo/margin_mean": 36.81257629394531, "margin_dpo/margin_std": 89.00492858886719, "step": 550 }, { "epoch": 0.8329554043839759, "fcm_dpo/beta": 0.0069015612825751305, "fcm_dpo/delta": 0.07776844501495361, "fcm_dpo/margin": 46.99622344970703, "fcm_dpo/q_t": 0.42739948630332947, "grad_norm": 14.707651138305664, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 0.7485238313674927, "logits/rejected": 0.7114731073379517, "logps/chosen": -208.91763305664062, "logps/ref_chosen": -67.74720764160156, "logps/ref_rejected": -87.04285430908203, "logps/rejected": -275.2095031738281, "loss": 1.1647, "margin_dpo/margin_mean": 46.996219635009766, "margin_dpo/margin_std": 83.65496826171875, "step": 551 }, { "epoch": 0.8344671201814059, "fcm_dpo/beta": 0.006933193188160658, "fcm_dpo/delta": -0.00469888374209404, "fcm_dpo/margin": 58.293643951416016, "fcm_dpo/q_t": 0.404817670583725, "grad_norm": 14.66103458404541, "learning_rate": 4.112804714676593e-08, "logits/chosen": 0.7346080541610718, "logits/rejected": 0.676051139831543, "logps/chosen": -194.23452758789062, "logps/ref_chosen": -62.92625427246094, "logps/ref_rejected": -82.98365783691406, "logps/rejected": -272.5855712890625, "loss": 1.1033, "margin_dpo/margin_mean": 58.29364776611328, "margin_dpo/margin_std": 83.52306365966797, "step": 552 }, { "epoch": 0.8359788359788359, "fcm_dpo/beta": 0.00692669115960598, "fcm_dpo/delta": 0.021221814677119255, "fcm_dpo/margin": 54.79808807373047, "fcm_dpo/q_t": 0.4154573976993561, "grad_norm": 17.43486785888672, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 0.6985148191452026, "logits/rejected": 0.5853888988494873, "logps/chosen": -198.54403686523438, "logps/ref_chosen": -56.038490295410156, "logps/ref_rejected": -84.48454284667969, "logps/rejected": -281.78814697265625, "loss": 1.1534, "margin_dpo/margin_mean": 54.7980842590332, "margin_dpo/margin_std": 100.00516510009766, "step": 553 }, { "epoch": 0.8374905517762661, "fcm_dpo/beta": 0.006878286134451628, "fcm_dpo/delta": -0.05534950643777847, "fcm_dpo/margin": 65.84024810791016, "fcm_dpo/q_t": 0.3967292606830597, "grad_norm": 14.698594093322754, "learning_rate": 3.968661679220467e-08, "logits/chosen": 0.69556725025177, "logits/rejected": 0.6746841669082642, "logps/chosen": -192.9752197265625, "logps/ref_chosen": -64.53059387207031, "logps/ref_rejected": -71.2155990600586, "logps/rejected": -265.50048828125, "loss": 1.0812, "margin_dpo/margin_mean": 65.84025573730469, "margin_dpo/margin_std": 93.83136749267578, "step": 554 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.00692109577357769, "fcm_dpo/delta": 0.010154381394386292, "fcm_dpo/margin": 56.18605422973633, "fcm_dpo/q_t": 0.41057413816452026, "grad_norm": 14.941075325012207, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.7340927720069885, "logits/rejected": 0.7052745819091797, "logps/chosen": -206.43673706054688, "logps/ref_chosen": -66.65191650390625, "logps/ref_rejected": -68.6667251586914, "logps/rejected": -264.6376037597656, "loss": 1.1581, "margin_dpo/margin_mean": 56.18605422973633, "margin_dpo/margin_std": 100.85908508300781, "step": 555 }, { "epoch": 0.8405139833711263, "fcm_dpo/beta": 0.006918167695403099, "fcm_dpo/delta": 0.06461147218942642, "fcm_dpo/margin": 48.79975128173828, "fcm_dpo/q_t": 0.42574968934059143, "grad_norm": 13.405830383300781, "learning_rate": 3.826871794280192e-08, "logits/chosen": 0.7548919916152954, "logits/rejected": 0.700238823890686, "logps/chosen": -194.24932861328125, "logps/ref_chosen": -52.832366943359375, "logps/ref_rejected": -64.49044036865234, "logps/rejected": -254.70716857910156, "loss": 1.198, "margin_dpo/margin_mean": 48.79975128173828, "margin_dpo/margin_std": 103.18643951416016, "step": 556 }, { "epoch": 0.8420256991685563, "fcm_dpo/beta": 0.00680879969149828, "fcm_dpo/delta": -0.1172577515244484, "fcm_dpo/margin": 74.97007751464844, "fcm_dpo/q_t": 0.3834077715873718, "grad_norm": 12.292925834655762, "learning_rate": 3.756864251262143e-08, "logits/chosen": 0.8256399631500244, "logits/rejected": 0.7433536052703857, "logps/chosen": -193.7919158935547, "logps/ref_chosen": -55.03598403930664, "logps/ref_rejected": -75.80644989013672, "logps/rejected": -289.532470703125, "loss": 1.0183, "margin_dpo/margin_mean": 74.97007751464844, "margin_dpo/margin_std": 84.72743225097656, "step": 557 }, { "epoch": 0.8435374149659864, "fcm_dpo/beta": 0.0066352728754282, "fcm_dpo/delta": -0.13472692668437958, "fcm_dpo/margin": 79.395263671875, "fcm_dpo/q_t": 0.3804672360420227, "grad_norm": 12.516437530517578, "learning_rate": 3.687450924416341e-08, "logits/chosen": 0.777481198310852, "logits/rejected": 0.7188490629196167, "logps/chosen": -188.71958923339844, "logps/ref_chosen": -63.226348876953125, "logps/ref_rejected": -91.46881866455078, "logps/rejected": -296.3573303222656, "loss": 1.0151, "margin_dpo/margin_mean": 79.395263671875, "margin_dpo/margin_std": 92.50755310058594, "step": 558 }, { "epoch": 0.8450491307634165, "fcm_dpo/beta": 0.006523288786411285, "fcm_dpo/delta": -0.04582027345895767, "fcm_dpo/margin": 67.82418823242188, "fcm_dpo/q_t": 0.40194329619407654, "grad_norm": 12.600460052490234, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 0.7076990604400635, "logits/rejected": 0.6349881887435913, "logps/chosen": -192.7144317626953, "logps/ref_chosen": -61.521644592285156, "logps/ref_rejected": -82.83859252929688, "logps/rejected": -281.8555908203125, "loss": 1.0989, "margin_dpo/margin_mean": 67.82418823242188, "margin_dpo/margin_std": 102.31001281738281, "step": 559 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.006590306758880615, "fcm_dpo/delta": 0.002619616687297821, "fcm_dpo/margin": 60.240455627441406, "fcm_dpo/q_t": 0.409095823764801, "grad_norm": 14.637415885925293, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.738136351108551, "logits/rejected": 0.6958550214767456, "logps/chosen": -203.272216796875, "logps/ref_chosen": -60.64122009277344, "logps/ref_rejected": -78.75474548339844, "logps/rejected": -281.626220703125, "loss": 1.1043, "margin_dpo/margin_mean": 60.24045181274414, "margin_dpo/margin_std": 84.55506896972656, "step": 560 }, { "epoch": 0.8480725623582767, "fcm_dpo/beta": 0.006551677361130714, "fcm_dpo/delta": 0.00024553295224905014, "fcm_dpo/margin": 61.01319122314453, "fcm_dpo/q_t": 0.41009342670440674, "grad_norm": 13.768065452575684, "learning_rate": 3.482795573879241e-08, "logits/chosen": 0.6982570290565491, "logits/rejected": 0.6633864641189575, "logps/chosen": -192.55197143554688, "logps/ref_chosen": -62.49859619140625, "logps/ref_rejected": -78.72064208984375, "logps/rejected": -269.78717041015625, "loss": 1.1158, "margin_dpo/margin_mean": 61.01319122314453, "margin_dpo/margin_std": 94.03324890136719, "step": 561 }, { "epoch": 0.8495842781557067, "fcm_dpo/beta": 0.006441822275519371, "fcm_dpo/delta": -0.08493717759847641, "fcm_dpo/margin": 74.48919677734375, "fcm_dpo/q_t": 0.3915877342224121, "grad_norm": 14.380753517150879, "learning_rate": 3.415778361095226e-08, "logits/chosen": 0.7225862741470337, "logits/rejected": 0.6788659691810608, "logps/chosen": -211.61782836914062, "logps/ref_chosen": -74.78173828125, "logps/ref_rejected": -92.63499450683594, "logps/rejected": -303.96026611328125, "loss": 1.0511, "margin_dpo/margin_mean": 74.48919677734375, "margin_dpo/margin_std": 94.54464721679688, "step": 562 }, { "epoch": 0.8510959939531368, "fcm_dpo/beta": 0.006419507786631584, "fcm_dpo/delta": -0.03327463939785957, "fcm_dpo/margin": 67.2710952758789, "fcm_dpo/q_t": 0.40121811628341675, "grad_norm": 17.5394287109375, "learning_rate": 3.349364905389032e-08, "logits/chosen": 0.809956431388855, "logits/rejected": 0.7581348419189453, "logps/chosen": -167.83493041992188, "logps/ref_chosen": -50.19850158691406, "logps/ref_rejected": -66.76687622070312, "logps/rejected": -251.67440795898438, "loss": 1.1022, "margin_dpo/margin_mean": 67.27110290527344, "margin_dpo/margin_std": 102.41062927246094, "step": 563 }, { "epoch": 0.8526077097505669, "fcm_dpo/beta": 0.006282067857682705, "fcm_dpo/delta": -0.12858623266220093, "fcm_dpo/margin": 83.0648193359375, "fcm_dpo/q_t": 0.37959784269332886, "grad_norm": 13.681713104248047, "learning_rate": 3.283557064487785e-08, "logits/chosen": 0.6968977451324463, "logits/rejected": 0.6630585789680481, "logps/chosen": -175.38339233398438, "logps/ref_chosen": -55.7408447265625, "logps/ref_rejected": -74.82323455810547, "logps/rejected": -277.5306091308594, "loss": 1.0289, "margin_dpo/margin_mean": 83.06480407714844, "margin_dpo/margin_std": 102.96722412109375, "step": 564 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.0063241226598620415, "fcm_dpo/delta": 0.04935676231980324, "fcm_dpo/margin": 55.58483123779297, "fcm_dpo/q_t": 0.4191403090953827, "grad_norm": 15.041940689086914, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.7640155553817749, "logits/rejected": 0.7049951553344727, "logps/chosen": -205.72286987304688, "logps/ref_chosen": -58.33738327026367, "logps/ref_rejected": -78.31776428222656, "logps/rejected": -281.2880859375, "loss": 1.14, "margin_dpo/margin_mean": 55.58483123779297, "margin_dpo/margin_std": 87.38355255126953, "step": 565 }, { "epoch": 0.8556311413454271, "fcm_dpo/beta": 0.006384224630892277, "fcm_dpo/delta": 0.05589155852794647, "fcm_dpo/margin": 54.115760803222656, "fcm_dpo/q_t": 0.4228624105453491, "grad_norm": 16.651105880737305, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 0.7325712442398071, "logits/rejected": 0.7150111794471741, "logps/chosen": -205.9342041015625, "logps/ref_chosen": -71.22373962402344, "logps/ref_rejected": -71.11601257324219, "logps/rejected": -259.9422302246094, "loss": 1.1896, "margin_dpo/margin_mean": 54.11576461791992, "margin_dpo/margin_std": 111.10244750976562, "step": 566 }, { "epoch": 0.8571428571428571, "fcm_dpo/beta": 0.006314173806458712, "fcm_dpo/delta": -0.026270300149917603, "fcm_dpo/margin": 67.20326232910156, "fcm_dpo/q_t": 0.40221065282821655, "grad_norm": 11.8645601272583, "learning_rate": 3.089785553471233e-08, "logits/chosen": 0.7259865999221802, "logits/rejected": 0.6274834871292114, "logps/chosen": -183.31649780273438, "logps/ref_chosen": -52.669273376464844, "logps/ref_rejected": -74.34785461425781, "logps/rejected": -272.1983337402344, "loss": 1.0897, "margin_dpo/margin_mean": 67.20326232910156, "margin_dpo/margin_std": 92.31834411621094, "step": 567 }, { "epoch": 0.8586545729402872, "fcm_dpo/beta": 0.006262045819312334, "fcm_dpo/delta": -0.1232818141579628, "fcm_dpo/margin": 82.55564880371094, "fcm_dpo/q_t": 0.3823985457420349, "grad_norm": 14.214730262756348, "learning_rate": 3.026418409484513e-08, "logits/chosen": 0.7861194610595703, "logits/rejected": 0.6945356130599976, "logps/chosen": -171.9617919921875, "logps/ref_chosen": -52.178001403808594, "logps/ref_rejected": -85.8277587890625, "logps/rejected": -288.1672058105469, "loss": 1.0136, "margin_dpo/margin_mean": 82.55564880371094, "margin_dpo/margin_std": 90.54248046875, "step": 568 }, { "epoch": 0.8601662887377173, "fcm_dpo/beta": 0.00627292413264513, "fcm_dpo/delta": 0.13700155913829803, "fcm_dpo/margin": 42.50354766845703, "fcm_dpo/q_t": 0.43937602639198303, "grad_norm": 14.89389419555664, "learning_rate": 2.963665913810451e-08, "logits/chosen": 0.6582707166671753, "logits/rejected": 0.6227110624313354, "logps/chosen": -201.08621215820312, "logps/ref_chosen": -62.649261474609375, "logps/ref_rejected": -75.4298324584961, "logps/rejected": -256.370361328125, "loss": 1.2184, "margin_dpo/margin_mean": 42.503543853759766, "margin_dpo/margin_std": 89.05029296875, "step": 569 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.006269059143960476, "fcm_dpo/delta": -0.12943394482135773, "fcm_dpo/margin": 83.39543151855469, "fcm_dpo/q_t": 0.37981927394866943, "grad_norm": 12.89783000946045, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.7100155353546143, "logits/rejected": 0.6229262948036194, "logps/chosen": -173.34877014160156, "logps/ref_chosen": -50.04179382324219, "logps/ref_rejected": -78.27146911621094, "logps/rejected": -284.973876953125, "loss": 1.0119, "margin_dpo/margin_mean": 83.39543151855469, "margin_dpo/margin_std": 93.0318603515625, "step": 570 }, { "epoch": 0.8631897203325775, "fcm_dpo/beta": 0.006321952678263187, "fcm_dpo/delta": 0.10337033122777939, "fcm_dpo/margin": 47.27796936035156, "fcm_dpo/q_t": 0.43055155873298645, "grad_norm": 13.160158157348633, "learning_rate": 2.840011871446962e-08, "logits/chosen": 0.73212730884552, "logits/rejected": 0.6987918019294739, "logps/chosen": -189.09153747558594, "logps/ref_chosen": -53.65681457519531, "logps/ref_rejected": -66.13298034667969, "logps/rejected": -248.84567260742188, "loss": 1.2004, "margin_dpo/margin_mean": 47.27796936035156, "margin_dpo/margin_std": 95.1507568359375, "step": 571 }, { "epoch": 0.8647014361300076, "fcm_dpo/beta": 0.006384381093084812, "fcm_dpo/delta": 0.03750025853514671, "fcm_dpo/margin": 56.937442779541016, "fcm_dpo/q_t": 0.41591769456863403, "grad_norm": 13.933595657348633, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 0.7308815121650696, "logits/rejected": 0.7777682542800903, "logps/chosen": -205.65716552734375, "logps/ref_chosen": -74.81792449951172, "logps/ref_rejected": -65.88681030273438, "logps/rejected": -253.66348266601562, "loss": 1.1253, "margin_dpo/margin_mean": 56.937442779541016, "margin_dpo/margin_std": 84.29963684082031, "step": 572 }, { "epoch": 0.8662131519274376, "fcm_dpo/beta": 0.0064005982130765915, "fcm_dpo/delta": 0.03014349937438965, "fcm_dpo/margin": 57.9608154296875, "fcm_dpo/q_t": 0.41660982370376587, "grad_norm": 14.19927978515625, "learning_rate": 2.718837261761528e-08, "logits/chosen": 0.7378900051116943, "logits/rejected": 0.6873558759689331, "logps/chosen": -214.39682006835938, "logps/ref_chosen": -68.72564697265625, "logps/ref_rejected": -88.16201782226562, "logps/rejected": -291.79400634765625, "loss": 1.1599, "margin_dpo/margin_mean": 57.960819244384766, "margin_dpo/margin_std": 107.89491271972656, "step": 573 }, { "epoch": 0.8677248677248677, "fcm_dpo/beta": 0.006377051584422588, "fcm_dpo/delta": -0.07006673514842987, "fcm_dpo/margin": 73.18067169189453, "fcm_dpo/q_t": 0.3916003108024597, "grad_norm": 11.854135513305664, "learning_rate": 2.659183991914696e-08, "logits/chosen": 0.803756833076477, "logits/rejected": 0.7289662957191467, "logps/chosen": -186.39210510253906, "logps/ref_chosen": -56.31340026855469, "logps/ref_rejected": -83.91553497314453, "logps/rejected": -287.1749267578125, "loss": 1.0321, "margin_dpo/margin_mean": 73.18067169189453, "margin_dpo/margin_std": 77.18203735351562, "step": 574 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.006343858316540718, "fcm_dpo/delta": 0.06961024552583694, "fcm_dpo/margin": 52.35631561279297, "fcm_dpo/q_t": 0.4264383614063263, "grad_norm": 14.030909538269043, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.7899061441421509, "logits/rejected": 0.6997029781341553, "logps/chosen": -198.152587890625, "logps/ref_chosen": -64.5841293334961, "logps/ref_rejected": -93.47034454345703, "logps/rejected": -279.3951416015625, "loss": 1.1954, "margin_dpo/margin_mean": 52.356319427490234, "margin_dpo/margin_std": 106.40255737304688, "step": 575 }, { "epoch": 0.8707482993197279, "fcm_dpo/beta": 0.006311601027846336, "fcm_dpo/delta": -0.06749401986598969, "fcm_dpo/margin": 73.40779113769531, "fcm_dpo/q_t": 0.394220232963562, "grad_norm": 13.683633804321289, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 0.731153130531311, "logits/rejected": 0.6163959503173828, "logps/chosen": -171.4643096923828, "logps/ref_chosen": -53.28052520751953, "logps/ref_rejected": -84.2000503540039, "logps/rejected": -275.7916259765625, "loss": 1.0627, "margin_dpo/margin_mean": 73.40778350830078, "margin_dpo/margin_std": 95.41714477539062, "step": 576 }, { "epoch": 0.872260015117158, "fcm_dpo/beta": 0.006382349878549576, "fcm_dpo/delta": 0.06438060849905014, "fcm_dpo/margin": 52.93243408203125, "fcm_dpo/q_t": 0.42242977023124695, "grad_norm": 14.356147766113281, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 0.7241270542144775, "logits/rejected": 0.7086690664291382, "logps/chosen": -194.91119384765625, "logps/ref_chosen": -62.32468795776367, "logps/ref_rejected": -67.300537109375, "logps/rejected": -252.81947326660156, "loss": 1.1692, "margin_dpo/margin_mean": 52.93243408203125, "margin_dpo/margin_std": 96.21859741210938, "step": 577 }, { "epoch": 0.873771730914588, "fcm_dpo/beta": 0.0064790756441652775, "fcm_dpo/delta": 0.08017978072166443, "fcm_dpo/margin": 49.77034378051758, "fcm_dpo/q_t": 0.4279744327068329, "grad_norm": 16.847196578979492, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 0.8302306532859802, "logits/rejected": 0.8066465258598328, "logps/chosen": -183.35203552246094, "logps/ref_chosen": -56.65557861328125, "logps/ref_rejected": -68.21835327148438, "logps/rejected": -244.68516540527344, "loss": 1.1898, "margin_dpo/margin_mean": 49.77034378051758, "margin_dpo/margin_std": 100.29454040527344, "step": 578 }, { "epoch": 0.8752834467120182, "fcm_dpo/beta": 0.00643126480281353, "fcm_dpo/delta": -0.07598722726106644, "fcm_dpo/margin": 73.43510437011719, "fcm_dpo/q_t": 0.39019495248794556, "grad_norm": 13.77873706817627, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 0.685715913772583, "logits/rejected": 0.6591500043869019, "logps/chosen": -189.93231201171875, "logps/ref_chosen": -56.809661865234375, "logps/ref_rejected": -68.09613037109375, "logps/rejected": -274.65386962890625, "loss": 1.0395, "margin_dpo/margin_mean": 73.43510437011719, "margin_dpo/margin_std": 85.38418579101562, "step": 579 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.0064583588391542435, "fcm_dpo/delta": 0.020921528339385986, "fcm_dpo/margin": 58.764244079589844, "fcm_dpo/q_t": 0.41470351815223694, "grad_norm": 13.418577194213867, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.7243668437004089, "logits/rejected": 0.6619343161582947, "logps/chosen": -190.41073608398438, "logps/ref_chosen": -57.70011520385742, "logps/ref_rejected": -77.90664672851562, "logps/rejected": -269.3815002441406, "loss": 1.1513, "margin_dpo/margin_mean": 58.764244079589844, "margin_dpo/margin_std": 104.921630859375, "step": 580 }, { "epoch": 0.8783068783068783, "fcm_dpo/beta": 0.006431188900023699, "fcm_dpo/delta": 0.012560145929455757, "fcm_dpo/margin": 60.2906494140625, "fcm_dpo/q_t": 0.4119495153427124, "grad_norm": 14.395081520080566, "learning_rate": 2.259200116137039e-08, "logits/chosen": 0.7669543623924255, "logits/rejected": 0.6934635639190674, "logps/chosen": -203.38047790527344, "logps/ref_chosen": -59.332359313964844, "logps/ref_rejected": -83.64482116699219, "logps/rejected": -287.98358154296875, "loss": 1.1236, "margin_dpo/margin_mean": 60.2906494140625, "margin_dpo/margin_std": 94.43912506103516, "step": 581 }, { "epoch": 0.8798185941043084, "fcm_dpo/beta": 0.0064775762148201466, "fcm_dpo/delta": 0.008546445518732071, "fcm_dpo/margin": 60.47076416015625, "fcm_dpo/q_t": 0.41076183319091797, "grad_norm": 11.891162872314453, "learning_rate": 2.204591459016525e-08, "logits/chosen": 0.7505415678024292, "logits/rejected": 0.7810231447219849, "logps/chosen": -193.03822326660156, "logps/ref_chosen": -64.16285705566406, "logps/ref_rejected": -58.632896423339844, "logps/rejected": -247.97903442382812, "loss": 1.1218, "margin_dpo/margin_mean": 60.47076416015625, "margin_dpo/margin_std": 94.79828643798828, "step": 582 }, { "epoch": 0.8813303099017384, "fcm_dpo/beta": 0.006431900896131992, "fcm_dpo/delta": -0.03729263320565224, "fcm_dpo/margin": 67.7349853515625, "fcm_dpo/q_t": 0.40107226371765137, "grad_norm": 15.881672859191895, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 0.862554132938385, "logits/rejected": 0.7417807579040527, "logps/chosen": -182.47802734375, "logps/ref_chosen": -51.87239456176758, "logps/ref_rejected": -83.86331176757812, "logps/rejected": -282.20391845703125, "loss": 1.1078, "margin_dpo/margin_mean": 67.7349853515625, "margin_dpo/margin_std": 106.33586120605469, "step": 583 }, { "epoch": 0.8828420256991686, "fcm_dpo/beta": 0.006398425903171301, "fcm_dpo/delta": 0.0037146955728530884, "fcm_dpo/margin": 61.912025451660156, "fcm_dpo/q_t": 0.41191157698631287, "grad_norm": 12.762717247009277, "learning_rate": 2.09728856419826e-08, "logits/chosen": 0.8829727172851562, "logits/rejected": 0.7666534185409546, "logps/chosen": -165.6560516357422, "logps/ref_chosen": -46.571388244628906, "logps/ref_rejected": -80.67969512939453, "logps/rejected": -261.6763916015625, "loss": 1.144, "margin_dpo/margin_mean": 61.912025451660156, "margin_dpo/margin_std": 109.02485656738281, "step": 584 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.006562906317412853, "fcm_dpo/delta": 0.11703015118837357, "fcm_dpo/margin": 43.517791748046875, "fcm_dpo/q_t": 0.4340656101703644, "grad_norm": 13.247313499450684, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.701311469078064, "logits/rejected": 0.6506215333938599, "logps/chosen": -196.23910522460938, "logps/ref_chosen": -58.124534606933594, "logps/ref_rejected": -79.00538635253906, "logps/rejected": -260.63775634765625, "loss": 1.2099, "margin_dpo/margin_mean": 43.517784118652344, "margin_dpo/margin_std": 92.49024963378906, "step": 585 }, { "epoch": 0.8858654572940288, "fcm_dpo/beta": 0.006531290709972382, "fcm_dpo/delta": -0.0264582596719265, "fcm_dpo/margin": 65.08265686035156, "fcm_dpo/q_t": 0.4013257324695587, "grad_norm": 18.042287826538086, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 0.781760573387146, "logits/rejected": 0.7310307621955872, "logps/chosen": -183.73556518554688, "logps/ref_chosen": -54.10163879394531, "logps/ref_rejected": -63.72113037109375, "logps/rejected": -258.4377136230469, "loss": 1.0613, "margin_dpo/margin_mean": 65.08265686035156, "margin_dpo/margin_std": 74.19046783447266, "step": 586 }, { "epoch": 0.8873771730914588, "fcm_dpo/beta": 0.006523734889924526, "fcm_dpo/delta": -0.01788686215877533, "fcm_dpo/margin": 63.94139099121094, "fcm_dpo/q_t": 0.403054416179657, "grad_norm": 14.469686508178711, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 0.7586396932601929, "logits/rejected": 0.7525993585586548, "logps/chosen": -195.1334686279297, "logps/ref_chosen": -63.41719436645508, "logps/ref_rejected": -63.47003936767578, "logps/rejected": -259.127685546875, "loss": 1.1416, "margin_dpo/margin_mean": 63.9413948059082, "margin_dpo/margin_std": 115.5007553100586, "step": 587 }, { "epoch": 0.8888888888888888, "fcm_dpo/beta": 0.00644359365105629, "fcm_dpo/delta": -0.06252604722976685, "fcm_dpo/margin": 71.29679870605469, "fcm_dpo/q_t": 0.39673376083374023, "grad_norm": 15.000171661376953, "learning_rate": 1.890382096832699e-08, "logits/chosen": 0.7840080261230469, "logits/rejected": 0.7370002269744873, "logps/chosen": -195.41769409179688, "logps/ref_chosen": -62.20103454589844, "logps/ref_rejected": -82.10249328613281, "logps/rejected": -286.615966796875, "loss": 1.0822, "margin_dpo/margin_mean": 71.29679870605469, "margin_dpo/margin_std": 104.02099609375, "step": 588 }, { "epoch": 0.890400604686319, "fcm_dpo/beta": 0.006399224046617746, "fcm_dpo/delta": -0.047763481736183167, "fcm_dpo/margin": 69.64301300048828, "fcm_dpo/q_t": 0.3962135314941406, "grad_norm": 11.454410552978516, "learning_rate": 1.840267971970344e-08, "logits/chosen": 0.7322001457214355, "logits/rejected": 0.7018231153488159, "logps/chosen": -183.03843688964844, "logps/ref_chosen": -56.71361541748047, "logps/ref_rejected": -76.7366943359375, "logps/rejected": -272.70452880859375, "loss": 1.0478, "margin_dpo/margin_mean": 69.64301300048828, "margin_dpo/margin_std": 77.81524658203125, "step": 589 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.006368682254105806, "fcm_dpo/delta": -0.02491743117570877, "fcm_dpo/margin": 66.54501342773438, "fcm_dpo/q_t": 0.4025854766368866, "grad_norm": 14.082845687866211, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.6987200975418091, "logits/rejected": 0.6662635803222656, "logps/chosen": -207.60150146484375, "logps/ref_chosen": -66.5138168334961, "logps/ref_rejected": -85.70820617675781, "logps/rejected": -293.34088134765625, "loss": 1.0858, "margin_dpo/margin_mean": 66.54501342773438, "margin_dpo/margin_std": 90.5670166015625, "step": 590 }, { "epoch": 0.8934240362811792, "fcm_dpo/beta": 0.0061980183236300945, "fcm_dpo/delta": -0.10651122033596039, "fcm_dpo/margin": 80.5565185546875, "fcm_dpo/q_t": 0.38733240962028503, "grad_norm": 15.332803726196289, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 0.8256734609603882, "logits/rejected": 0.7557948231697083, "logps/chosen": -180.6138916015625, "logps/ref_chosen": -60.697181701660156, "logps/ref_rejected": -86.12278747558594, "logps/rejected": -286.59600830078125, "loss": 1.0646, "margin_dpo/margin_mean": 80.5565185546875, "margin_dpo/margin_std": 112.17295837402344, "step": 591 }, { "epoch": 0.8949357520786092, "fcm_dpo/beta": 0.006209563929587603, "fcm_dpo/delta": 0.030359894037246704, "fcm_dpo/margin": 59.65741729736328, "fcm_dpo/q_t": 0.41657984256744385, "grad_norm": 15.251811027526855, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 0.7907428741455078, "logits/rejected": 0.6714023947715759, "logps/chosen": -182.21139526367188, "logps/ref_chosen": -51.237327575683594, "logps/ref_rejected": -81.60242462158203, "logps/rejected": -272.23388671875, "loss": 1.1292, "margin_dpo/margin_mean": 59.65741729736328, "margin_dpo/margin_std": 91.72056579589844, "step": 592 }, { "epoch": 0.8964474678760394, "fcm_dpo/beta": 0.00623913761228323, "fcm_dpo/delta": 0.0039339009672403336, "fcm_dpo/margin": 63.491966247558594, "fcm_dpo/q_t": 0.40932101011276245, "grad_norm": 15.088692665100098, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 0.7265362739562988, "logits/rejected": 0.6285638809204102, "logps/chosen": -152.95541381835938, "logps/ref_chosen": -42.08000183105469, "logps/ref_rejected": -68.47499084472656, "logps/rejected": -242.8423614501953, "loss": 1.1179, "margin_dpo/margin_mean": 63.49196243286133, "margin_dpo/margin_std": 98.1021728515625, "step": 593 }, { "epoch": 0.8979591836734694, "fcm_dpo/beta": 0.006280633620917797, "fcm_dpo/delta": 0.03118686005473137, "fcm_dpo/margin": 58.9074592590332, "fcm_dpo/q_t": 0.4138728380203247, "grad_norm": 13.728915214538574, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 0.6779206991195679, "logits/rejected": 0.6631582975387573, "logps/chosen": -202.61465454101562, "logps/ref_chosen": -63.658668518066406, "logps/ref_rejected": -70.35597229003906, "logps/rejected": -268.21942138671875, "loss": 1.1204, "margin_dpo/margin_mean": 58.9074592590332, "margin_dpo/margin_std": 86.3900146484375, "step": 594 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.006229420658200979, "fcm_dpo/delta": -0.08674081414937973, "fcm_dpo/margin": 77.4767837524414, "fcm_dpo/q_t": 0.3901749551296234, "grad_norm": 11.558106422424316, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.8139692544937134, "logits/rejected": 0.7172808647155762, "logps/chosen": -181.95928955078125, "logps/ref_chosen": -56.21875762939453, "logps/ref_rejected": -83.95773315429688, "logps/rejected": -287.175048828125, "loss": 1.0663, "margin_dpo/margin_mean": 77.4767837524414, "margin_dpo/margin_std": 107.94747924804688, "step": 595 }, { "epoch": 0.9009826152683296, "fcm_dpo/beta": 0.006332032848149538, "fcm_dpo/delta": 0.17451868951320648, "fcm_dpo/margin": 36.29243850708008, "fcm_dpo/q_t": 0.44889748096466064, "grad_norm": 12.626412391662598, "learning_rate": 1.507684480352292e-08, "logits/chosen": 0.6528673768043518, "logits/rejected": 0.6745901107788086, "logps/chosen": -213.66482543945312, "logps/ref_chosen": -68.48088073730469, "logps/ref_rejected": -61.732967376708984, "logps/rejected": -243.2093505859375, "loss": 1.2489, "margin_dpo/margin_mean": 36.292442321777344, "margin_dpo/margin_std": 90.35986328125, "step": 596 }, { "epoch": 0.9024943310657596, "fcm_dpo/beta": 0.006430739536881447, "fcm_dpo/delta": 0.020158810541033745, "fcm_dpo/margin": 59.17926025390625, "fcm_dpo/q_t": 0.41390174627304077, "grad_norm": 11.819912910461426, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 0.7465409636497498, "logits/rejected": 0.7289406657218933, "logps/chosen": -163.42794799804688, "logps/ref_chosen": -48.85750961303711, "logps/ref_rejected": -55.068084716796875, "logps/rejected": -228.81777954101562, "loss": 1.1278, "margin_dpo/margin_mean": 59.17926025390625, "margin_dpo/margin_std": 94.12771606445312, "step": 597 }, { "epoch": 0.9040060468631897, "fcm_dpo/beta": 0.00660196878015995, "fcm_dpo/delta": 0.15759900212287903, "fcm_dpo/margin": 37.216529846191406, "fcm_dpo/q_t": 0.44498932361602783, "grad_norm": 14.393204689025879, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 0.7748836874961853, "logits/rejected": 0.688491702079773, "logps/chosen": -207.56390380859375, "logps/ref_chosen": -58.88715362548828, "logps/ref_rejected": -81.43145751953125, "logps/rejected": -267.32476806640625, "loss": 1.2458, "margin_dpo/margin_mean": 37.216529846191406, "margin_dpo/margin_std": 93.11833190917969, "step": 598 }, { "epoch": 0.9055177626606198, "fcm_dpo/beta": 0.006748649291694164, "fcm_dpo/delta": 0.09477294981479645, "fcm_dpo/margin": 45.604652404785156, "fcm_dpo/q_t": 0.4309813380241394, "grad_norm": 16.184436798095703, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 0.807050883769989, "logits/rejected": 0.7597008943557739, "logps/chosen": -207.95523071289062, "logps/ref_chosen": -57.60719299316406, "logps/ref_rejected": -71.80469512939453, "logps/rejected": -267.75738525390625, "loss": 1.1988, "margin_dpo/margin_mean": 45.604652404785156, "margin_dpo/margin_std": 94.80332946777344, "step": 599 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.006845717318356037, "fcm_dpo/delta": 0.03495318070054054, "fcm_dpo/margin": 53.3426628112793, "fcm_dpo/q_t": 0.41796183586120605, "grad_norm": 15.72593879699707, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.8194034099578857, "logits/rejected": 0.7605953216552734, "logps/chosen": -195.68597412109375, "logps/ref_chosen": -58.44231414794922, "logps/ref_rejected": -83.64639282226562, "logps/rejected": -274.23272705078125, "loss": 1.1521, "margin_dpo/margin_mean": 53.34266662597656, "margin_dpo/margin_std": 92.48655700683594, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.006810956634581089, "eval_logits/chosen": 0.7065654397010803, "eval_logits/rejected": 0.6591749787330627, "eval_logps/chosen": -207.26998901367188, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -269.10394287109375, "eval_loss": 0.570812463760376, "eval_margin_dpo/margin_mean": 57.144439697265625, "eval_margin_dpo/margin_std": 97.93953704833984, "eval_runtime": 38.0483, "eval_samples_per_second": 60.528, "eval_steps_per_second": 1.892, "step": 600 }, { "epoch": 0.90854119425548, "fcm_dpo/beta": 0.006693072617053986, "fcm_dpo/delta": -0.09248337894678116, "fcm_dpo/margin": 72.8189697265625, "fcm_dpo/q_t": 0.39083534479141235, "grad_norm": 12.468385696411133, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 0.7074885368347168, "logits/rejected": 0.6287850141525269, "logps/chosen": -176.33799743652344, "logps/ref_chosen": -55.59432601928711, "logps/ref_rejected": -83.68630981445312, "logps/rejected": -277.24896240234375, "loss": 1.0715, "margin_dpo/margin_mean": 72.8189697265625, "margin_dpo/margin_std": 104.85636138916016, "step": 601 }, { "epoch": 0.91005291005291, "fcm_dpo/beta": 0.0066335154697299, "fcm_dpo/delta": -0.0724048912525177, "fcm_dpo/margin": 70.70862579345703, "fcm_dpo/q_t": 0.3926694989204407, "grad_norm": 15.210699081420898, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 0.7195205688476562, "logits/rejected": 0.6898149251937866, "logps/chosen": -165.505615234375, "logps/ref_chosen": -56.349185943603516, "logps/ref_rejected": -71.9959716796875, "logps/rejected": -251.86102294921875, "loss": 1.054, "margin_dpo/margin_mean": 70.70862579345703, "margin_dpo/margin_std": 89.39802551269531, "step": 602 }, { "epoch": 0.9115646258503401, "fcm_dpo/beta": 0.006544841453433037, "fcm_dpo/delta": -0.0342128649353981, "fcm_dpo/margin": 66.09971618652344, "fcm_dpo/q_t": 0.40155255794525146, "grad_norm": 15.755361557006836, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 0.7011324167251587, "logits/rejected": 0.6412418484687805, "logps/chosen": -176.6502685546875, "logps/ref_chosen": -53.16838836669922, "logps/ref_rejected": -73.8604736328125, "logps/rejected": -263.44207763671875, "loss": 1.0945, "margin_dpo/margin_mean": 66.09971618652344, "margin_dpo/margin_std": 96.66177368164062, "step": 603 }, { "epoch": 0.9130763416477702, "fcm_dpo/beta": 0.006565750576555729, "fcm_dpo/delta": 0.011286220513284206, "fcm_dpo/margin": 59.26122283935547, "fcm_dpo/q_t": 0.4123130440711975, "grad_norm": 14.889829635620117, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 0.6894519329071045, "logits/rejected": 0.6947340965270996, "logps/chosen": -192.02676391601562, "logps/ref_chosen": -72.64942169189453, "logps/ref_rejected": -69.8792724609375, "logps/rejected": -248.51783752441406, "loss": 1.1337, "margin_dpo/margin_mean": 59.26122283935547, "margin_dpo/margin_std": 99.123291015625, "step": 604 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.006560338661074638, "fcm_dpo/delta": 0.01623372733592987, "fcm_dpo/margin": 58.58209228515625, "fcm_dpo/q_t": 0.4119049906730652, "grad_norm": 14.635528564453125, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.7958291172981262, "logits/rejected": 0.7365133166313171, "logps/chosen": -199.8400421142578, "logps/ref_chosen": -61.61284637451172, "logps/ref_rejected": -79.34398651123047, "logps/rejected": -276.1532897949219, "loss": 1.1393, "margin_dpo/margin_mean": 58.58209228515625, "margin_dpo/margin_std": 99.20114135742188, "step": 605 }, { "epoch": 0.9160997732426304, "fcm_dpo/beta": 0.0065470244735479355, "fcm_dpo/delta": -0.061599597334861755, "fcm_dpo/margin": 70.06275939941406, "fcm_dpo/q_t": 0.3971368670463562, "grad_norm": 16.981082916259766, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 0.7151072025299072, "logits/rejected": 0.6450438499450684, "logps/chosen": -175.30453491210938, "logps/ref_chosen": -54.46424102783203, "logps/ref_rejected": -79.62708282470703, "logps/rejected": -270.5301513671875, "loss": 1.0781, "margin_dpo/margin_mean": 70.06275939941406, "margin_dpo/margin_std": 98.99264526367188, "step": 606 }, { "epoch": 0.9176114890400605, "fcm_dpo/beta": 0.0064436523243784904, "fcm_dpo/delta": -0.0057749077677726746, "fcm_dpo/margin": 62.736549377441406, "fcm_dpo/q_t": 0.4076859951019287, "grad_norm": 14.168997764587402, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 0.725983202457428, "logits/rejected": 0.6987679600715637, "logps/chosen": -198.48764038085938, "logps/ref_chosen": -62.86086654663086, "logps/ref_rejected": -72.5501937866211, "logps/rejected": -270.91351318359375, "loss": 1.1196, "margin_dpo/margin_mean": 62.736549377441406, "margin_dpo/margin_std": 96.45252990722656, "step": 607 }, { "epoch": 0.9191232048374905, "fcm_dpo/beta": 0.006499715615063906, "fcm_dpo/delta": 0.00026232190430164337, "fcm_dpo/margin": 61.499488830566406, "fcm_dpo/q_t": 0.408882200717926, "grad_norm": 14.22420883178711, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 0.7125911116600037, "logits/rejected": 0.6294840574264526, "logps/chosen": -200.25701904296875, "logps/ref_chosen": -63.18071746826172, "logps/ref_rejected": -99.15888214111328, "logps/rejected": -297.73468017578125, "loss": 1.1046, "margin_dpo/margin_mean": 61.499488830566406, "margin_dpo/margin_std": 88.79376220703125, "step": 608 }, { "epoch": 0.9206349206349206, "fcm_dpo/beta": 0.006406780332326889, "fcm_dpo/delta": -0.0627242773771286, "fcm_dpo/margin": 71.68486785888672, "fcm_dpo/q_t": 0.3930833339691162, "grad_norm": 12.658600807189941, "learning_rate": 9.757601041885694e-09, "logits/chosen": 0.822446346282959, "logits/rejected": 0.7840192914009094, "logps/chosen": -170.25494384765625, "logps/ref_chosen": -48.62322235107422, "logps/ref_rejected": -68.28271484375, "logps/rejected": -261.59930419921875, "loss": 1.0534, "margin_dpo/margin_mean": 71.68486785888672, "margin_dpo/margin_std": 85.14846801757812, "step": 609 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.006349500268697739, "fcm_dpo/delta": -0.041650526225566864, "fcm_dpo/margin": 69.20389556884766, "fcm_dpo/q_t": 0.4011300802230835, "grad_norm": 14.117827415466309, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.7303283214569092, "logits/rejected": 0.7262221574783325, "logps/chosen": -203.7093505859375, "logps/ref_chosen": -72.66513061523438, "logps/ref_rejected": -87.15310668945312, "logps/rejected": -287.4012145996094, "loss": 1.086, "margin_dpo/margin_mean": 69.20388793945312, "margin_dpo/margin_std": 98.40621185302734, "step": 610 }, { "epoch": 0.9236583522297808, "fcm_dpo/beta": 0.006394756026566029, "fcm_dpo/delta": 0.05369244143366814, "fcm_dpo/margin": 54.4486083984375, "fcm_dpo/q_t": 0.4193510413169861, "grad_norm": 14.468083381652832, "learning_rate": 9.03946036001449e-09, "logits/chosen": 0.7536121606826782, "logits/rejected": 0.7031623125076294, "logps/chosen": -171.63223266601562, "logps/ref_chosen": -48.30857849121094, "logps/ref_rejected": -70.6141128540039, "logps/rejected": -248.38636779785156, "loss": 1.1383, "margin_dpo/margin_mean": 54.448604583740234, "margin_dpo/margin_std": 84.60821533203125, "step": 611 }, { "epoch": 0.9251700680272109, "fcm_dpo/beta": 0.006336958147585392, "fcm_dpo/delta": -0.09963831305503845, "fcm_dpo/margin": 78.07444763183594, "fcm_dpo/q_t": 0.38563889265060425, "grad_norm": 12.297904014587402, "learning_rate": 8.690495320571839e-09, "logits/chosen": 0.6890215277671814, "logits/rejected": 0.6157269477844238, "logps/chosen": -194.6959686279297, "logps/ref_chosen": -61.23155975341797, "logps/ref_rejected": -94.37979888916016, "logps/rejected": -305.91864013671875, "loss": 1.0354, "margin_dpo/margin_mean": 78.07444763183594, "margin_dpo/margin_std": 94.86837768554688, "step": 612 }, { "epoch": 0.926681783824641, "fcm_dpo/beta": 0.006252289284020662, "fcm_dpo/delta": -0.07290597259998322, "fcm_dpo/margin": 75.09466552734375, "fcm_dpo/q_t": 0.39151662588119507, "grad_norm": 11.459220886230469, "learning_rate": 8.348280226706722e-09, "logits/chosen": 0.6596091985702515, "logits/rejected": 0.6567984819412231, "logps/chosen": -167.78414916992188, "logps/ref_chosen": -53.98310852050781, "logps/ref_rejected": -58.32208251953125, "logps/rejected": -247.21780395507812, "loss": 1.0501, "margin_dpo/margin_mean": 75.09466552734375, "margin_dpo/margin_std": 92.46086120605469, "step": 613 }, { "epoch": 0.9281934996220711, "fcm_dpo/beta": 0.006214224733412266, "fcm_dpo/delta": -0.024648673832416534, "fcm_dpo/margin": 68.12400817871094, "fcm_dpo/q_t": 0.4012283682823181, "grad_norm": 14.5516939163208, "learning_rate": 8.012824650910937e-09, "logits/chosen": 0.7856525182723999, "logits/rejected": 0.7776677012443542, "logps/chosen": -194.23440551757812, "logps/ref_chosen": -60.24303436279297, "logps/ref_rejected": -72.26258850097656, "logps/rejected": -274.3779602050781, "loss": 1.0817, "margin_dpo/margin_mean": 68.12400817871094, "margin_dpo/margin_std": 88.94013977050781, "step": 614 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.006137081887573004, "fcm_dpo/delta": -0.009469401091337204, "fcm_dpo/margin": 66.5538558959961, "fcm_dpo/q_t": 0.4069540798664093, "grad_norm": 13.770292282104492, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.6927610039710999, "logits/rejected": 0.6372318863868713, "logps/chosen": -212.01260375976562, "logps/ref_chosen": -72.09467315673828, "logps/ref_rejected": -104.02980041503906, "logps/rejected": -310.5015869140625, "loss": 1.1241, "margin_dpo/margin_mean": 66.5538558959961, "margin_dpo/margin_std": 107.617431640625, "step": 615 }, { "epoch": 0.9312169312169312, "fcm_dpo/beta": 0.0061771986074745655, "fcm_dpo/delta": 0.023163840174674988, "fcm_dpo/margin": 61.14124298095703, "fcm_dpo/q_t": 0.41386234760284424, "grad_norm": 11.939630508422852, "learning_rate": 7.36222939784098e-09, "logits/chosen": 0.789786696434021, "logits/rejected": 0.7031727433204651, "logps/chosen": -190.14306640625, "logps/ref_chosen": -58.530723571777344, "logps/ref_rejected": -75.48025512695312, "logps/rejected": -268.23382568359375, "loss": 1.1217, "margin_dpo/margin_mean": 61.14124298095703, "margin_dpo/margin_std": 92.75981140136719, "step": 616 }, { "epoch": 0.9327286470143613, "fcm_dpo/beta": 0.006244382821023464, "fcm_dpo/delta": 0.1002284437417984, "fcm_dpo/margin": 48.437278747558594, "fcm_dpo/q_t": 0.4307482838630676, "grad_norm": 15.972477912902832, "learning_rate": 7.047107919114586e-09, "logits/chosen": 0.7590749263763428, "logits/rejected": 0.7044551372528076, "logps/chosen": -203.44656372070312, "logps/ref_chosen": -57.608673095703125, "logps/ref_rejected": -81.22109985351562, "logps/rejected": -275.49627685546875, "loss": 1.1846, "margin_dpo/margin_mean": 48.437278747558594, "margin_dpo/margin_std": 87.9116439819336, "step": 617 }, { "epoch": 0.9342403628117913, "fcm_dpo/beta": 0.006315155886113644, "fcm_dpo/delta": 0.010729154571890831, "fcm_dpo/margin": 61.689369201660156, "fcm_dpo/q_t": 0.4114975333213806, "grad_norm": 15.93805980682373, "learning_rate": 6.738782355044048e-09, "logits/chosen": 0.7244783639907837, "logits/rejected": 0.6130063533782959, "logps/chosen": -183.0014190673828, "logps/ref_chosen": -56.69594192504883, "logps/ref_rejected": -85.92362976074219, "logps/rejected": -273.9184875488281, "loss": 1.1092, "margin_dpo/margin_mean": 61.689361572265625, "margin_dpo/margin_std": 89.48055267333984, "step": 618 }, { "epoch": 0.9357520786092215, "fcm_dpo/beta": 0.006306151859462261, "fcm_dpo/delta": -0.02410227060317993, "fcm_dpo/margin": 67.08335876464844, "fcm_dpo/q_t": 0.4025493562221527, "grad_norm": 13.588555335998535, "learning_rate": 6.437261330158206e-09, "logits/chosen": 0.8057536482810974, "logits/rejected": 0.7263665795326233, "logps/chosen": -178.52919006347656, "logps/ref_chosen": -54.05841827392578, "logps/ref_rejected": -83.55493927001953, "logps/rejected": -275.10906982421875, "loss": 1.0933, "margin_dpo/margin_mean": 67.08335876464844, "margin_dpo/margin_std": 96.20336151123047, "step": 619 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.006400998681783676, "fcm_dpo/delta": 0.03517021983861923, "fcm_dpo/margin": 56.845909118652344, "fcm_dpo/q_t": 0.4166967272758484, "grad_norm": 14.824642181396484, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.7471519708633423, "logits/rejected": 0.7479252815246582, "logps/chosen": -186.22177124023438, "logps/ref_chosen": -63.36971664428711, "logps/ref_rejected": -65.68269348144531, "logps/rejected": -245.38064575195312, "loss": 1.1397, "margin_dpo/margin_mean": 56.845909118652344, "margin_dpo/margin_std": 87.86839294433594, "step": 620 }, { "epoch": 0.9387755102040817, "fcm_dpo/beta": 0.006424080580472946, "fcm_dpo/delta": 0.06066777557134628, "fcm_dpo/margin": 53.09856033325195, "fcm_dpo/q_t": 0.4227798581123352, "grad_norm": 16.096923828125, "learning_rate": 5.854666444131934e-09, "logits/chosen": 0.7947447896003723, "logits/rejected": 0.6759539842605591, "logps/chosen": -179.39161682128906, "logps/ref_chosen": -52.321224212646484, "logps/ref_rejected": -88.09001159667969, "logps/rejected": -268.25897216796875, "loss": 1.1665, "margin_dpo/margin_mean": 53.09856033325195, "margin_dpo/margin_std": 97.253173828125, "step": 621 }, { "epoch": 0.9402872260015117, "fcm_dpo/beta": 0.006452606059610844, "fcm_dpo/delta": 0.012101054191589355, "fcm_dpo/margin": 60.169471740722656, "fcm_dpo/q_t": 0.41136711835861206, "grad_norm": 15.709192276000977, "learning_rate": 5.573608879422875e-09, "logits/chosen": 0.6908876299858093, "logits/rejected": 0.6467409729957581, "logps/chosen": -197.26239013671875, "logps/ref_chosen": -59.86545944213867, "logps/ref_rejected": -81.86668395996094, "logps/rejected": -279.43310546875, "loss": 1.1211, "margin_dpo/margin_mean": 60.169471740722656, "margin_dpo/margin_std": 93.21742248535156, "step": 622 }, { "epoch": 0.9417989417989417, "fcm_dpo/beta": 0.006413621827960014, "fcm_dpo/delta": -0.007789114490151405, "fcm_dpo/margin": 63.49436950683594, "fcm_dpo/q_t": 0.4069734215736389, "grad_norm": 13.204484939575195, "learning_rate": 5.299388446305342e-09, "logits/chosen": 0.7018610239028931, "logits/rejected": 0.6416829228401184, "logps/chosen": -213.017822265625, "logps/ref_chosen": -67.36846160888672, "logps/ref_rejected": -82.02733612060547, "logps/rejected": -291.17108154296875, "loss": 1.1068, "margin_dpo/margin_mean": 63.49437713623047, "margin_dpo/margin_std": 94.36400604248047, "step": 623 }, { "epoch": 0.9433106575963719, "fcm_dpo/beta": 0.006365837063640356, "fcm_dpo/delta": -0.053610917180776596, "fcm_dpo/margin": 70.83954620361328, "fcm_dpo/q_t": 0.3987389802932739, "grad_norm": 15.614096641540527, "learning_rate": 5.03201281531429e-09, "logits/chosen": 0.7449072599411011, "logits/rejected": 0.6436171531677246, "logps/chosen": -173.16964721679688, "logps/ref_chosen": -51.02655029296875, "logps/ref_rejected": -76.49203491210938, "logps/rejected": -269.47467041015625, "loss": 1.0817, "margin_dpo/margin_mean": 70.83954620361328, "margin_dpo/margin_std": 100.73272705078125, "step": 624 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.0064194174483418465, "fcm_dpo/delta": 0.058818817138671875, "fcm_dpo/margin": 53.46350860595703, "fcm_dpo/q_t": 0.4236186742782593, "grad_norm": 14.774746894836426, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.7884582281112671, "logits/rejected": 0.6976134181022644, "logps/chosen": -180.1649932861328, "logps/ref_chosen": -54.20761489868164, "logps/ref_rejected": -84.93669128417969, "logps/rejected": -264.35760498046875, "loss": 1.1744, "margin_dpo/margin_mean": 53.46350860595703, "margin_dpo/margin_std": 102.96124267578125, "step": 625 }, { "epoch": 0.9463340891912321, "fcm_dpo/beta": 0.006372136529535055, "fcm_dpo/delta": -0.05755068361759186, "fcm_dpo/margin": 71.36672973632812, "fcm_dpo/q_t": 0.39941319823265076, "grad_norm": 13.7017183303833, "learning_rate": 4.517825684323323e-09, "logits/chosen": 0.8437789082527161, "logits/rejected": 0.7019960880279541, "logps/chosen": -164.62301635742188, "logps/ref_chosen": -45.06201934814453, "logps/ref_rejected": -89.66368103027344, "logps/rejected": -280.5914306640625, "loss": 1.0918, "margin_dpo/margin_mean": 71.36672973632812, "margin_dpo/margin_std": 108.4215316772461, "step": 626 }, { "epoch": 0.9478458049886621, "fcm_dpo/beta": 0.0062685152515769005, "fcm_dpo/delta": -0.08397074788808823, "fcm_dpo/margin": 76.50228881835938, "fcm_dpo/q_t": 0.38953036069869995, "grad_norm": 13.963146209716797, "learning_rate": 4.271028567242818e-09, "logits/chosen": 0.6753963232040405, "logits/rejected": 0.5504001975059509, "logps/chosen": -194.23684692382812, "logps/ref_chosen": -58.791053771972656, "logps/ref_rejected": -94.90802001953125, "logps/rejected": -306.8561096191406, "loss": 1.0565, "margin_dpo/margin_mean": 76.50228881835938, "margin_dpo/margin_std": 100.38463592529297, "step": 627 }, { "epoch": 0.9493575207860923, "fcm_dpo/beta": 0.0062351408414542675, "fcm_dpo/delta": -0.08525798469781876, "fcm_dpo/margin": 77.04563903808594, "fcm_dpo/q_t": 0.3889637589454651, "grad_norm": 17.903270721435547, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 0.7402326464653015, "logits/rejected": 0.7005974650382996, "logps/chosen": -173.79000854492188, "logps/ref_chosen": -52.80357360839844, "logps/ref_rejected": -76.49468994140625, "logps/rejected": -274.5267639160156, "loss": 1.0697, "margin_dpo/margin_mean": 77.04563903808594, "margin_dpo/margin_std": 100.9229736328125, "step": 628 }, { "epoch": 0.9508692365835223, "fcm_dpo/beta": 0.006222175434231758, "fcm_dpo/delta": 0.054710421711206436, "fcm_dpo/margin": 55.74674606323242, "fcm_dpo/q_t": 0.42095980048179626, "grad_norm": 11.830018997192383, "learning_rate": 3.798061746947995e-09, "logits/chosen": 0.7364556789398193, "logits/rejected": 0.7304829359054565, "logps/chosen": -201.89068603515625, "logps/ref_chosen": -70.71749877929688, "logps/ref_rejected": -78.96273803710938, "logps/rejected": -265.8826599121094, "loss": 1.1372, "margin_dpo/margin_mean": 55.74674987792969, "margin_dpo/margin_std": 85.56368255615234, "step": 629 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.006174253765493631, "fcm_dpo/delta": -0.06831058114767075, "fcm_dpo/margin": 75.34062194824219, "fcm_dpo/q_t": 0.39373886585235596, "grad_norm": 10.842805862426758, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.6691682934761047, "logits/rejected": 0.6251407861709595, "logps/chosen": -180.2628173828125, "logps/ref_chosen": -56.201412200927734, "logps/ref_rejected": -74.69807434082031, "logps/rejected": -274.10009765625, "loss": 1.0568, "margin_dpo/margin_mean": 75.34062194824219, "margin_dpo/margin_std": 95.94551086425781, "step": 630 }, { "epoch": 0.9538926681783825, "fcm_dpo/beta": 0.005993704777210951, "fcm_dpo/delta": -0.10610571503639221, "fcm_dpo/margin": 83.22488403320312, "fcm_dpo/q_t": 0.3868658244609833, "grad_norm": 13.053339958190918, "learning_rate": 3.352641923861144e-09, "logits/chosen": 0.8554993867874146, "logits/rejected": 0.735321044921875, "logps/chosen": -176.91673278808594, "logps/ref_chosen": -58.82059860229492, "logps/ref_rejected": -96.51437377929688, "logps/rejected": -297.8354187011719, "loss": 1.0444, "margin_dpo/margin_mean": 83.22488403320312, "margin_dpo/margin_std": 104.36235046386719, "step": 631 }, { "epoch": 0.9554043839758125, "fcm_dpo/beta": 0.005943012423813343, "fcm_dpo/delta": -0.08964134752750397, "fcm_dpo/margin": 81.67279052734375, "fcm_dpo/q_t": 0.3873947858810425, "grad_norm": 12.306257247924805, "learning_rate": 3.140277830901428e-09, "logits/chosen": 0.7849439382553101, "logits/rejected": 0.7638136744499207, "logps/chosen": -179.699951171875, "logps/ref_chosen": -58.786048889160156, "logps/ref_rejected": -67.21923828125, "logps/rejected": -269.8059387207031, "loss": 1.033, "margin_dpo/margin_mean": 81.67279052734375, "margin_dpo/margin_std": 94.06131744384766, "step": 632 }, { "epoch": 0.9569160997732427, "fcm_dpo/beta": 0.005942155607044697, "fcm_dpo/delta": 0.029256466776132584, "fcm_dpo/margin": 62.54181671142578, "fcm_dpo/q_t": 0.4152376651763916, "grad_norm": 12.558691024780273, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 0.729263424873352, "logits/rejected": 0.6651930809020996, "logps/chosen": -166.05075073242188, "logps/ref_chosen": -52.13019561767578, "logps/ref_rejected": -67.23016357421875, "logps/rejected": -243.69253540039062, "loss": 1.1228, "margin_dpo/margin_mean": 62.54180908203125, "margin_dpo/margin_std": 93.7554931640625, "step": 633 }, { "epoch": 0.9584278155706727, "fcm_dpo/beta": 0.006169452797621489, "fcm_dpo/delta": 0.2458600401878357, "fcm_dpo/margin": 25.533851623535156, "fcm_dpo/q_t": 0.4652412533760071, "grad_norm": 15.92563533782959, "learning_rate": 2.736270983384276e-09, "logits/chosen": 0.8017531633377075, "logits/rejected": 0.8143984079360962, "logps/chosen": -203.40805053710938, "logps/ref_chosen": -60.97979736328125, "logps/ref_rejected": -58.50825119018555, "logps/rejected": -226.47035217285156, "loss": 1.3195, "margin_dpo/margin_mean": 25.533855438232422, "margin_dpo/margin_std": 96.38629150390625, "step": 634 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.006332189776003361, "fcm_dpo/delta": 0.08927176892757416, "fcm_dpo/margin": 49.46977233886719, "fcm_dpo/q_t": 0.4298204183578491, "grad_norm": 14.447734832763672, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.6402159929275513, "logits/rejected": 0.574030339717865, "logps/chosen": -220.11471557617188, "logps/ref_chosen": -65.9730224609375, "logps/ref_rejected": -85.61317443847656, "logps/rejected": -289.2246398925781, "loss": 1.2003, "margin_dpo/margin_mean": 49.46977233886719, "margin_dpo/margin_std": 103.6912612915039, "step": 635 }, { "epoch": 0.9614512471655329, "fcm_dpo/beta": 0.00630118977278471, "fcm_dpo/delta": -0.030526097863912582, "fcm_dpo/margin": 68.08009338378906, "fcm_dpo/q_t": 0.4005919098854065, "grad_norm": 11.525025367736816, "learning_rate": 2.359929934524829e-09, "logits/chosen": 0.7165452837944031, "logits/rejected": 0.6174004077911377, "logps/chosen": -170.86050415039062, "logps/ref_chosen": -49.140167236328125, "logps/ref_rejected": -81.26971435546875, "logps/rejected": -271.07012939453125, "loss": 1.0798, "margin_dpo/margin_mean": 68.08008575439453, "margin_dpo/margin_std": 90.331787109375, "step": 636 }, { "epoch": 0.9629629629629629, "fcm_dpo/beta": 0.006400472484529018, "fcm_dpo/delta": 0.07462954521179199, "fcm_dpo/margin": 51.14664840698242, "fcm_dpo/q_t": 0.42703452706336975, "grad_norm": 15.60743236541748, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 0.6757951974868774, "logits/rejected": 0.6063965559005737, "logps/chosen": -232.64633178710938, "logps/ref_chosen": -73.69658660888672, "logps/ref_rejected": -83.01487731933594, "logps/rejected": -293.11126708984375, "loss": 1.189, "margin_dpo/margin_mean": 51.14665222167969, "margin_dpo/margin_std": 103.88670349121094, "step": 637 }, { "epoch": 0.9644746787603931, "fcm_dpo/beta": 0.006420266814529896, "fcm_dpo/delta": 0.009079055860638618, "fcm_dpo/margin": 60.937583923339844, "fcm_dpo/q_t": 0.4115605056285858, "grad_norm": 14.104433059692383, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 0.7370562553405762, "logits/rejected": 0.6876152753829956, "logps/chosen": -203.04010009765625, "logps/ref_chosen": -62.78158187866211, "logps/ref_rejected": -85.40478515625, "logps/rejected": -286.60089111328125, "loss": 1.1168, "margin_dpo/margin_mean": 60.937583923339844, "margin_dpo/margin_std": 93.2264633178711, "step": 638 }, { "epoch": 0.9659863945578231, "fcm_dpo/beta": 0.0063707176595926285, "fcm_dpo/delta": -0.05552205443382263, "fcm_dpo/margin": 71.11326599121094, "fcm_dpo/q_t": 0.3982432782649994, "grad_norm": 14.116220474243164, "learning_rate": 1.847382997337943e-09, "logits/chosen": 0.7373175024986267, "logits/rejected": 0.6314177513122559, "logps/chosen": -179.0370330810547, "logps/ref_chosen": -53.76658630371094, "logps/ref_rejected": -72.30009460449219, "logps/rejected": -268.68377685546875, "loss": 1.0759, "margin_dpo/margin_mean": 71.11326599121094, "margin_dpo/margin_std": 98.96980285644531, "step": 639 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.0063106524758040905, "fcm_dpo/delta": -0.018060026690363884, "fcm_dpo/margin": 66.09814453125, "fcm_dpo/q_t": 0.4042467474937439, "grad_norm": 13.222526550292969, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.8086303472518921, "logits/rejected": 0.7425358295440674, "logps/chosen": -181.33102416992188, "logps/ref_chosen": -51.41777801513672, "logps/ref_rejected": -77.27879333496094, "logps/rejected": -273.2901611328125, "loss": 1.0978, "margin_dpo/margin_mean": 66.09814453125, "margin_dpo/margin_std": 95.61439514160156, "step": 640 }, { "epoch": 0.9690098261526833, "fcm_dpo/beta": 0.006334484554827213, "fcm_dpo/delta": 0.02717267908155918, "fcm_dpo/margin": 59.01282501220703, "fcm_dpo/q_t": 0.41298502683639526, "grad_norm": 13.800342559814453, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 0.7327412366867065, "logits/rejected": 0.711341381072998, "logps/chosen": -208.6962127685547, "logps/ref_chosen": -71.0546646118164, "logps/ref_rejected": -82.2440185546875, "logps/rejected": -278.89837646484375, "loss": 1.1079, "margin_dpo/margin_mean": 59.0128288269043, "margin_dpo/margin_std": 80.039794921875, "step": 641 }, { "epoch": 0.9705215419501134, "fcm_dpo/beta": 0.006432985886931419, "fcm_dpo/delta": 0.12851060926914215, "fcm_dpo/margin": 42.731483459472656, "fcm_dpo/q_t": 0.43809741735458374, "grad_norm": 16.78900146484375, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 0.6944186687469482, "logits/rejected": 0.712386965751648, "logps/chosen": -216.907470703125, "logps/ref_chosen": -68.92927551269531, "logps/ref_rejected": -70.85682678222656, "logps/rejected": -261.5665283203125, "loss": 1.2293, "margin_dpo/margin_mean": 42.731483459472656, "margin_dpo/margin_std": 96.30165100097656, "step": 642 }, { "epoch": 0.9720332577475435, "fcm_dpo/beta": 0.006475288886576891, "fcm_dpo/delta": -0.036847636103630066, "fcm_dpo/margin": 67.19929504394531, "fcm_dpo/q_t": 0.4012775421142578, "grad_norm": 20.203689575195312, "learning_rate": 1.261184375888541e-09, "logits/chosen": 0.68418288230896, "logits/rejected": 0.5926668047904968, "logps/chosen": -196.42494201660156, "logps/ref_chosen": -65.30903625488281, "logps/ref_rejected": -83.61613464355469, "logps/rejected": -281.93133544921875, "loss": 1.097, "margin_dpo/margin_mean": 67.19929504394531, "margin_dpo/margin_std": 99.99290466308594, "step": 643 }, { "epoch": 0.9735449735449735, "fcm_dpo/beta": 0.0065782819874584675, "fcm_dpo/delta": 0.06409407407045364, "fcm_dpo/margin": 51.20503616333008, "fcm_dpo/q_t": 0.42324745655059814, "grad_norm": 13.637568473815918, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 0.8207970261573792, "logits/rejected": 0.7883522510528564, "logps/chosen": -173.14559936523438, "logps/ref_chosen": -51.002601623535156, "logps/ref_rejected": -64.46372985839844, "logps/rejected": -237.811767578125, "loss": 1.1863, "margin_dpo/margin_mean": 51.20503616333008, "margin_dpo/margin_std": 101.434326171875, "step": 644 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.0065458714962005615, "fcm_dpo/delta": -0.0054698544554412365, "fcm_dpo/margin": 61.90576934814453, "fcm_dpo/q_t": 0.4064505100250244, "grad_norm": 15.224577903747559, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.7137904763221741, "logits/rejected": 0.695641279220581, "logps/chosen": -193.05609130859375, "logps/ref_chosen": -60.963409423828125, "logps/ref_rejected": -69.73353576660156, "logps/rejected": -263.73199462890625, "loss": 1.0942, "margin_dpo/margin_mean": 61.90576934814453, "margin_dpo/margin_std": 84.93324279785156, "step": 645 }, { "epoch": 0.9765684051398337, "fcm_dpo/beta": 0.006599565502256155, "fcm_dpo/delta": 0.04752783104777336, "fcm_dpo/margin": 53.65968322753906, "fcm_dpo/q_t": 0.42021092772483826, "grad_norm": 13.733308792114258, "learning_rate": 8.945768539031783e-10, "logits/chosen": 0.7736262679100037, "logits/rejected": 0.7145426869392395, "logps/chosen": -207.87789916992188, "logps/ref_chosen": -62.290069580078125, "logps/ref_rejected": -85.54812622070312, "logps/rejected": -284.795654296875, "loss": 1.1632, "margin_dpo/margin_mean": 53.659690856933594, "margin_dpo/margin_std": 99.115478515625, "step": 646 }, { "epoch": 0.9780801209372638, "fcm_dpo/beta": 0.006466761231422424, "fcm_dpo/delta": -0.16596609354019165, "fcm_dpo/margin": 86.12970733642578, "fcm_dpo/q_t": 0.37213167548179626, "grad_norm": 14.168996810913086, "learning_rate": 7.863060120144316e-10, "logits/chosen": 0.7759414315223694, "logits/rejected": 0.6749926805496216, "logps/chosen": -209.0861358642578, "logps/ref_chosen": -67.515869140625, "logps/ref_rejected": -101.50871276855469, "logps/rejected": -329.20867919921875, "loss": 0.9844, "margin_dpo/margin_mean": 86.12970733642578, "margin_dpo/margin_std": 90.1838150024414, "step": 647 }, { "epoch": 0.9795918367346939, "fcm_dpo/beta": 0.0063859750516712666, "fcm_dpo/delta": 0.01350357010960579, "fcm_dpo/margin": 60.570072174072266, "fcm_dpo/q_t": 0.4120126962661743, "grad_norm": 14.232726097106934, "learning_rate": 6.850062128694045e-10, "logits/chosen": 0.686457097530365, "logits/rejected": 0.6205060482025146, "logps/chosen": -205.75772094726562, "logps/ref_chosen": -64.59593963623047, "logps/ref_rejected": -83.384033203125, "logps/rejected": -285.11590576171875, "loss": 1.1388, "margin_dpo/margin_mean": 60.570072174072266, "margin_dpo/margin_std": 102.00686645507812, "step": 648 }, { "epoch": 0.981103552532124, "fcm_dpo/beta": 0.006387336179614067, "fcm_dpo/delta": 0.005479734390974045, "fcm_dpo/margin": 61.72749328613281, "fcm_dpo/q_t": 0.410408616065979, "grad_norm": 18.840017318725586, "learning_rate": 5.906802900412788e-10, "logits/chosen": 0.7889381647109985, "logits/rejected": 0.7258505821228027, "logps/chosen": -181.8843994140625, "logps/ref_chosen": -49.30964660644531, "logps/ref_rejected": -73.73710632324219, "logps/rejected": -268.03936767578125, "loss": 1.1341, "margin_dpo/margin_mean": 61.72749328613281, "margin_dpo/margin_std": 102.57989501953125, "step": 649 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.00640866719186306, "fcm_dpo/delta": 0.0005240924656391144, "fcm_dpo/margin": 62.324851989746094, "fcm_dpo/q_t": 0.40963131189346313, "grad_norm": 13.567985534667969, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.8344835042953491, "logits/rejected": 0.7642044425010681, "logps/chosen": -180.97103881835938, "logps/ref_chosen": -55.06325912475586, "logps/ref_rejected": -77.39610290527344, "logps/rejected": -265.62872314453125, "loss": 1.1328, "margin_dpo/margin_mean": 62.32485580444336, "margin_dpo/margin_std": 104.03192138671875, "step": 650 }, { "epoch": 0.9841269841269841, "fcm_dpo/beta": 0.006482687778770924, "fcm_dpo/delta": 0.05127622187137604, "fcm_dpo/margin": 54.04645919799805, "fcm_dpo/q_t": 0.421572208404541, "grad_norm": 12.98111343383789, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 0.8331591486930847, "logits/rejected": 0.7519968152046204, "logps/chosen": -180.65142822265625, "logps/ref_chosen": -54.065162658691406, "logps/ref_rejected": -77.79080200195312, "logps/rejected": -258.42352294921875, "loss": 1.15, "margin_dpo/margin_mean": 54.04645919799805, "margin_dpo/margin_std": 91.8290023803711, "step": 651 }, { "epoch": 0.9856386999244142, "fcm_dpo/beta": 0.006511835381388664, "fcm_dpo/delta": 0.05867896229028702, "fcm_dpo/margin": 52.69062423706055, "fcm_dpo/q_t": 0.42380571365356445, "grad_norm": 15.500580787658691, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 0.7394665479660034, "logits/rejected": 0.6864569783210754, "logps/chosen": -212.748779296875, "logps/ref_chosen": -63.64030456542969, "logps/ref_rejected": -78.86882019042969, "logps/rejected": -280.66790771484375, "loss": 1.1808, "margin_dpo/margin_mean": 52.69062423706055, "margin_dpo/margin_std": 102.77951049804688, "step": 652 }, { "epoch": 0.9871504157218443, "fcm_dpo/beta": 0.0065590618178248405, "fcm_dpo/delta": -0.02155652642250061, "fcm_dpo/margin": 64.1231918334961, "fcm_dpo/q_t": 0.4042917490005493, "grad_norm": 14.692912101745605, "learning_rate": 2.831652042480093e-10, "logits/chosen": 0.7130542993545532, "logits/rejected": 0.6688984632492065, "logps/chosen": -195.92251586914062, "logps/ref_chosen": -61.668373107910156, "logps/ref_rejected": -73.83012390136719, "logps/rejected": -272.20745849609375, "loss": 1.1112, "margin_dpo/margin_mean": 64.1231918334961, "margin_dpo/margin_std": 100.28158569335938, "step": 653 }, { "epoch": 0.9886621315192744, "fcm_dpo/beta": 0.006530907936394215, "fcm_dpo/delta": 0.05786158889532089, "fcm_dpo/margin": 52.50005340576172, "fcm_dpo/q_t": 0.4228893518447876, "grad_norm": 13.715435981750488, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 0.7051883935928345, "logits/rejected": 0.5993505120277405, "logps/chosen": -196.5059051513672, "logps/ref_chosen": -57.568267822265625, "logps/ref_rejected": -87.74789428710938, "logps/rejected": -279.1855773925781, "loss": 1.1622, "margin_dpo/margin_mean": 52.500057220458984, "margin_dpo/margin_std": 88.70083618164062, "step": 654 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.006401837803423405, "fcm_dpo/delta": -0.15566277503967285, "fcm_dpo/margin": 85.00445556640625, "fcm_dpo/q_t": 0.37384313344955444, "grad_norm": 12.682045936584473, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.7240867018699646, "logits/rejected": 0.6303001642227173, "logps/chosen": -161.2081756591797, "logps/ref_chosen": -52.14714813232422, "logps/ref_rejected": -80.85014343261719, "logps/rejected": -274.9156494140625, "loss": 0.9841, "margin_dpo/margin_mean": 85.00445556640625, "margin_dpo/margin_std": 81.77285766601562, "step": 655 }, { "epoch": 0.9916855631141346, "fcm_dpo/beta": 0.006386594846844673, "fcm_dpo/delta": -0.021982401609420776, "fcm_dpo/margin": 65.92683410644531, "fcm_dpo/q_t": 0.4033903479576111, "grad_norm": 10.721435546875, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 0.6641607284545898, "logits/rejected": 0.613824188709259, "logps/chosen": -213.263427734375, "logps/ref_chosen": -73.25672912597656, "logps/ref_rejected": -85.35127258300781, "logps/rejected": -291.2847900390625, "loss": 1.0946, "margin_dpo/margin_mean": 65.92683410644531, "margin_dpo/margin_std": 94.80165100097656, "step": 656 }, { "epoch": 0.9931972789115646, "fcm_dpo/beta": 0.0063078515231609344, "fcm_dpo/delta": -0.07679837942123413, "fcm_dpo/margin": 75.00975036621094, "fcm_dpo/q_t": 0.3922984004020691, "grad_norm": 11.450084686279297, "learning_rate": 8.740807750345913e-11, "logits/chosen": 0.8300960063934326, "logits/rejected": 0.7342487573623657, "logps/chosen": -177.7998046875, "logps/ref_chosen": -49.72339630126953, "logps/ref_rejected": -75.1568603515625, "logps/rejected": -278.2430419921875, "loss": 1.0705, "margin_dpo/margin_mean": 75.00975036621094, "margin_dpo/margin_std": 104.88255310058594, "step": 657 }, { "epoch": 0.9947089947089947, "fcm_dpo/beta": 0.006343472748994827, "fcm_dpo/delta": 0.022385437041521072, "fcm_dpo/margin": 59.52809143066406, "fcm_dpo/q_t": 0.41501033306121826, "grad_norm": 12.572582244873047, "learning_rate": 5.594234322453539e-11, "logits/chosen": 0.7570410966873169, "logits/rejected": 0.7112739682197571, "logps/chosen": -197.8555908203125, "logps/ref_chosen": -63.04634094238281, "logps/ref_rejected": -83.44963073730469, "logps/rejected": -277.7869567871094, "loss": 1.1611, "margin_dpo/margin_mean": 59.52809143066406, "margin_dpo/margin_std": 110.2429428100586, "step": 658 }, { "epoch": 0.9962207105064248, "fcm_dpo/beta": 0.006361355073750019, "fcm_dpo/delta": 0.09799276292324066, "fcm_dpo/margin": 47.89827346801758, "fcm_dpo/q_t": 0.43068748712539673, "grad_norm": 17.234956741333008, "learning_rate": 3.146808153123293e-11, "logits/chosen": 0.8032434582710266, "logits/rejected": 0.7370003461837769, "logps/chosen": -194.7205810546875, "logps/ref_chosen": -55.0802001953125, "logps/ref_rejected": -71.91049194335938, "logps/rejected": -259.44915771484375, "loss": 1.2072, "margin_dpo/margin_mean": 47.89827346801758, "margin_dpo/margin_std": 100.34069061279297, "step": 659 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.0063782427459955215, "fcm_dpo/delta": -0.0683525800704956, "fcm_dpo/margin": 72.93805694580078, "fcm_dpo/q_t": 0.3942536413669586, "grad_norm": 13.592533111572266, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.8316740989685059, "logits/rejected": 0.7547441720962524, "logps/chosen": -186.29197692871094, "logps/ref_chosen": -54.525917053222656, "logps/ref_rejected": -81.23604583740234, "logps/rejected": -285.940185546875, "loss": 1.0561, "margin_dpo/margin_mean": 72.93806457519531, "margin_dpo/margin_std": 93.95339965820312, "step": 660 }, { "epoch": 0.999244142101285, "fcm_dpo/beta": 0.0064483098685741425, "fcm_dpo/delta": 0.08764594793319702, "fcm_dpo/margin": 48.82136535644531, "fcm_dpo/q_t": 0.42920881509780884, "grad_norm": 14.784270286560059, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 0.6959401965141296, "logits/rejected": 0.6122831106185913, "logps/chosen": -211.3416748046875, "logps/ref_chosen": -60.37263870239258, "logps/ref_rejected": -77.42874145507812, "logps/rejected": -277.2191162109375, "loss": 1.2096, "margin_dpo/margin_mean": 48.82136535644531, "margin_dpo/margin_std": 107.91928100585938, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.1320684536863204, "train_runtime": 1756.8176, "train_samples_per_second": 24.098, "train_steps_per_second": 0.376 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }