Files
llama-3-8b-base-new-dpo-hh-…/trainer_state.json
ModelHub XC 73b3eb1f2d 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.4
Source: Original Platform
2026-05-14 07:01:43 +08:00

12654 lines
465 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 200,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015117157974300832,
"fcm_dpo/beta": 0.10407507419586182,
"fcm_dpo/delta": 0.19971171021461487,
"fcm_dpo/margin": -0.0013532638549804688,
"fcm_dpo/q_t": 0.5000380277633667,
"grad_norm": 29.367589950561523,
"learning_rate": 0.0,
"logits/chosen": 0.13337239623069763,
"logits/rejected": 0.12492949515581131,
"logps/chosen": -64.5841293334961,
"logps/ref_chosen": -64.61280822753906,
"logps/ref_rejected": -64.17195129394531,
"logps/rejected": -64.14192199707031,
"loss": 1.3866,
"margin_dpo/margin_mean": -0.0013527870178222656,
"margin_dpo/margin_std": 0.2561596930027008,
"step": 1
},
{
"epoch": 0.0030234315948601664,
"fcm_dpo/beta": 0.10614679753780365,
"fcm_dpo/delta": 0.19520045816898346,
"fcm_dpo/margin": 0.037450045347213745,
"fcm_dpo/q_t": 0.49902579188346863,
"grad_norm": 29.559593200683594,
"learning_rate": 7.462686567164179e-09,
"logits/chosen": 0.09414851665496826,
"logits/rejected": 0.07363267242908478,
"logps/chosen": -56.101890563964844,
"logps/ref_chosen": -56.0989990234375,
"logps/ref_rejected": -66.59971618652344,
"logps/rejected": -66.64006042480469,
"loss": 1.3824,
"margin_dpo/margin_mean": 0.03744968771934509,
"margin_dpo/margin_std": 0.27811938524246216,
"step": 2
},
{
"epoch": 0.0045351473922902496,
"fcm_dpo/beta": 0.11039507389068604,
"fcm_dpo/delta": 0.19718493521213531,
"fcm_dpo/margin": -0.027670353651046753,
"fcm_dpo/q_t": 0.5007483959197998,
"grad_norm": 34.550472259521484,
"learning_rate": 1.4925373134328357e-08,
"logits/chosen": 0.09949980676174164,
"logits/rejected": 0.0614531971514225,
"logps/chosen": -65.45438385009766,
"logps/ref_chosen": -65.45726013183594,
"logps/ref_rejected": -90.82853698730469,
"logps/rejected": -90.7979736328125,
"loss": 1.3895,
"margin_dpo/margin_mean": -0.027670294046401978,
"margin_dpo/margin_std": 0.3105807602405548,
"step": 3
},
{
"epoch": 0.006046863189720333,
"fcm_dpo/beta": 0.11483684927225113,
"fcm_dpo/delta": 0.1972825527191162,
"fcm_dpo/margin": 0.023561865091323853,
"fcm_dpo/q_t": 0.4993370473384857,
"grad_norm": 39.347110748291016,
"learning_rate": 2.2388059701492534e-08,
"logits/chosen": 0.11346002668142319,
"logits/rejected": 0.09720361232757568,
"logps/chosen": -76.83723449707031,
"logps/ref_chosen": -76.86018371582031,
"logps/ref_rejected": -79.91523742675781,
"logps/rejected": -79.91584777832031,
"loss": 1.3838,
"margin_dpo/margin_mean": 0.023561745882034302,
"margin_dpo/margin_std": 0.2997610569000244,
"step": 4
},
{
"epoch": 0.007558578987150416,
"fcm_dpo/beta": 0.11710208654403687,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.03837460279464722,
"fcm_dpo/q_t": 0.5011225938796997,
"grad_norm": 34.73713302612305,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": 0.08234861493110657,
"logits/rejected": 0.04350630193948746,
"logps/chosen": -63.00522232055664,
"logps/ref_chosen": -62.97134017944336,
"logps/ref_rejected": -79.9192123413086,
"logps/rejected": -79.91471862792969,
"loss": 1.3911,
"margin_dpo/margin_mean": -0.03837430477142334,
"margin_dpo/margin_std": 0.31006568670272827,
"step": 5
},
{
"epoch": 0.009070294784580499,
"fcm_dpo/beta": 0.11710208654403687,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0550386905670166,
"fcm_dpo/q_t": 0.5016094446182251,
"grad_norm": 34.79764938354492,
"learning_rate": 3.731343283582089e-08,
"logits/chosen": 0.1415054202079773,
"logits/rejected": 0.10215698182582855,
"logps/chosen": -51.34320831298828,
"logps/ref_chosen": -51.30736541748047,
"logps/ref_rejected": -82.77239227294922,
"logps/rejected": -82.7531967163086,
"loss": 1.3933,
"margin_dpo/margin_mean": -0.055038899183273315,
"margin_dpo/margin_std": 0.40159815549850464,
"step": 6
},
{
"epoch": 0.010582010582010581,
"fcm_dpo/beta": 0.12184364348649979,
"fcm_dpo/delta": 0.19846273958683014,
"fcm_dpo/margin": -0.017774909734725952,
"fcm_dpo/q_t": 0.5005569458007812,
"grad_norm": 33.15961456298828,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": 0.0027350555174052715,
"logits/rejected": -0.04020844027400017,
"logps/chosen": -51.44348907470703,
"logps/ref_chosen": -51.45941162109375,
"logps/ref_rejected": -66.3828125,
"logps/rejected": -66.34911346435547,
"loss": 1.3886,
"margin_dpo/margin_mean": -0.017774999141693115,
"margin_dpo/margin_std": 0.21953274309635162,
"step": 7
},
{
"epoch": 0.012093726379440665,
"fcm_dpo/beta": 0.12922216951847076,
"fcm_dpo/delta": 0.3909910321235657,
"fcm_dpo/margin": 0.07266899943351746,
"fcm_dpo/q_t": 0.49774932861328125,
"grad_norm": 36.538394927978516,
"learning_rate": 5.223880597014925e-08,
"logits/chosen": 0.07437925040721893,
"logits/rejected": 0.05213908106088638,
"logps/chosen": -62.17906951904297,
"logps/ref_chosen": -62.197547912597656,
"logps/ref_rejected": -74.66180419921875,
"logps/rejected": -74.71600341796875,
"loss": 1.3774,
"margin_dpo/margin_mean": 0.07266855239868164,
"margin_dpo/margin_std": 0.328883558511734,
"step": 8
},
{
"epoch": 0.013605442176870748,
"fcm_dpo/beta": 0.13690130412578583,
"fcm_dpo/delta": 0.19161710143089294,
"fcm_dpo/margin": 0.046944648027420044,
"fcm_dpo/q_t": 0.49847450852394104,
"grad_norm": 43.08509826660156,
"learning_rate": 5.970149253731343e-08,
"logits/chosen": 0.1650906503200531,
"logits/rejected": 0.10613168776035309,
"logps/chosen": -55.64226150512695,
"logps/ref_chosen": -55.629722595214844,
"logps/ref_rejected": -86.21221923828125,
"logps/rejected": -86.2717056274414,
"loss": 1.3804,
"margin_dpo/margin_mean": 0.04694512486457825,
"margin_dpo/margin_std": 0.31391388177871704,
"step": 9
},
{
"epoch": 0.015117157974300832,
"fcm_dpo/beta": 0.14234879612922668,
"fcm_dpo/delta": 0.1951005458831787,
"fcm_dpo/margin": 0.015252351760864258,
"fcm_dpo/q_t": 0.4995075762271881,
"grad_norm": 42.305152893066406,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": 0.14489957690238953,
"logits/rejected": 0.11318053305149078,
"logps/chosen": -62.666648864746094,
"logps/ref_chosen": -62.69060134887695,
"logps/ref_rejected": -90.610107421875,
"logps/rejected": -90.6014175415039,
"loss": 1.3848,
"margin_dpo/margin_mean": 0.01525232195854187,
"margin_dpo/margin_std": 0.36224645376205444,
"step": 10
},
{
"epoch": 0.016628873771730914,
"fcm_dpo/beta": 0.1479165107011795,
"fcm_dpo/delta": 0.1918383240699768,
"fcm_dpo/margin": 0.036658138036727905,
"fcm_dpo/q_t": 0.49872156977653503,
"grad_norm": 43.193355560302734,
"learning_rate": 7.462686567164178e-08,
"logits/chosen": 0.11544118076562881,
"logits/rejected": 0.1084853783249855,
"logps/chosen": -65.735595703125,
"logps/ref_chosen": -65.76712036132812,
"logps/ref_rejected": -72.4764633178711,
"logps/rejected": -72.4815902709961,
"loss": 1.3816,
"margin_dpo/margin_mean": 0.03665819764137268,
"margin_dpo/margin_std": 0.338836669921875,
"step": 11
},
{
"epoch": 0.018140589569160998,
"fcm_dpo/beta": 0.15691301226615906,
"fcm_dpo/delta": 0.39368370175361633,
"fcm_dpo/margin": 0.04242005944252014,
"fcm_dpo/q_t": 0.4984257221221924,
"grad_norm": 43.97606658935547,
"learning_rate": 8.208955223880596e-08,
"logits/chosen": 0.03171448037028313,
"logits/rejected": 0.015399420633912086,
"logps/chosen": -60.683692932128906,
"logps/ref_chosen": -60.704891204833984,
"logps/ref_rejected": -69.41564178466797,
"logps/rejected": -69.43685913085938,
"loss": 1.3805,
"margin_dpo/margin_mean": 0.04242032766342163,
"margin_dpo/margin_std": 0.3394607901573181,
"step": 12
},
{
"epoch": 0.019652305366591082,
"fcm_dpo/beta": 0.1632937341928482,
"fcm_dpo/delta": 0.1996660977602005,
"fcm_dpo/margin": -0.03977265954017639,
"fcm_dpo/q_t": 0.5015895366668701,
"grad_norm": 47.461734771728516,
"learning_rate": 8.955223880597014e-08,
"logits/chosen": 0.10258764028549194,
"logits/rejected": 0.040754396468400955,
"logps/chosen": -49.935394287109375,
"logps/ref_chosen": -49.90925598144531,
"logps/ref_rejected": -92.37818145751953,
"logps/rejected": -92.36454772949219,
"loss": 1.3933,
"margin_dpo/margin_mean": -0.03977331519126892,
"margin_dpo/margin_std": 0.29797568917274475,
"step": 13
},
{
"epoch": 0.021164021164021163,
"fcm_dpo/beta": 0.16982074081897736,
"fcm_dpo/delta": 0.19240428507328033,
"fcm_dpo/margin": 0.03764998912811279,
"fcm_dpo/q_t": 0.49843764305114746,
"grad_norm": 49.910789489746094,
"learning_rate": 9.701492537313432e-08,
"logits/chosen": 0.05405683070421219,
"logits/rejected": 0.03726121038198471,
"logps/chosen": -60.57716369628906,
"logps/ref_chosen": -60.61879348754883,
"logps/ref_rejected": -71.79306030273438,
"logps/rejected": -71.78907775878906,
"loss": 1.3804,
"margin_dpo/margin_mean": 0.03765037655830383,
"margin_dpo/margin_std": 0.2961253225803375,
"step": 14
},
{
"epoch": 0.022675736961451247,
"fcm_dpo/beta": 0.17645195126533508,
"fcm_dpo/delta": 0.1906808763742447,
"fcm_dpo/margin": -0.017833799123764038,
"fcm_dpo/q_t": 0.5007701516151428,
"grad_norm": 58.39421463012695,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": 0.09454727172851562,
"logits/rejected": 0.04970131069421768,
"logps/chosen": -63.49525451660156,
"logps/ref_chosen": -63.46953582763672,
"logps/ref_rejected": -88.88951110839844,
"logps/rejected": -88.89739990234375,
"loss": 1.3901,
"margin_dpo/margin_mean": -0.01783338189125061,
"margin_dpo/margin_std": 0.3462127149105072,
"step": 15
},
{
"epoch": 0.02418745275888133,
"fcm_dpo/beta": 0.1834275722503662,
"fcm_dpo/delta": 0.1969115436077118,
"fcm_dpo/margin": 0.003320828080177307,
"fcm_dpo/q_t": 0.49985557794570923,
"grad_norm": 49.16959762573242,
"learning_rate": 1.1194029850746268e-07,
"logits/chosen": 0.11556181311607361,
"logits/rejected": 0.07850321382284164,
"logps/chosen": -46.563758850097656,
"logps/ref_chosen": -46.53229904174805,
"logps/ref_rejected": -74.27533721923828,
"logps/rejected": -74.31011962890625,
"loss": 1.3865,
"margin_dpo/margin_mean": 0.003320828080177307,
"margin_dpo/margin_std": 0.333503782749176,
"step": 16
},
{
"epoch": 0.025699168556311415,
"fcm_dpo/beta": 0.19445914030075073,
"fcm_dpo/delta": 0.1945243775844574,
"fcm_dpo/margin": 0.008714765310287476,
"fcm_dpo/q_t": 0.49963706731796265,
"grad_norm": 64.02886199951172,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": 0.05395728349685669,
"logits/rejected": 0.035220514982938766,
"logps/chosen": -64.09066772460938,
"logps/ref_chosen": -64.07783508300781,
"logps/ref_rejected": -86.40876770019531,
"logps/rejected": -86.4303207397461,
"loss": 1.3858,
"margin_dpo/margin_mean": 0.00871431827545166,
"margin_dpo/margin_std": 0.33132392168045044,
"step": 17
},
{
"epoch": 0.027210884353741496,
"fcm_dpo/beta": 0.1984160840511322,
"fcm_dpo/delta": 0.19945251941680908,
"fcm_dpo/margin": -0.00810861587524414,
"fcm_dpo/q_t": 0.500395655632019,
"grad_norm": 55.707515716552734,
"learning_rate": 1.2686567164179106e-07,
"logits/chosen": 0.10771282017230988,
"logits/rejected": 0.06092551350593567,
"logps/chosen": -44.87413024902344,
"logps/ref_chosen": -44.87433624267578,
"logps/ref_rejected": -70.97604370117188,
"logps/rejected": -70.96773529052734,
"loss": 1.3886,
"margin_dpo/margin_mean": -0.008108556270599365,
"margin_dpo/margin_std": 0.27668923139572144,
"step": 18
},
{
"epoch": 0.02872260015117158,
"fcm_dpo/beta": 0.21469247341156006,
"fcm_dpo/delta": 0.3905554413795471,
"fcm_dpo/margin": 0.04513262212276459,
"fcm_dpo/q_t": 0.49763813614845276,
"grad_norm": 66.41383361816406,
"learning_rate": 1.343283582089552e-07,
"logits/chosen": 0.0882333517074585,
"logits/rejected": 0.07449622452259064,
"logps/chosen": -68.14503479003906,
"logps/ref_chosen": -68.1598129272461,
"logps/ref_rejected": -81.17138671875,
"logps/rejected": -81.20173645019531,
"loss": 1.3777,
"margin_dpo/margin_mean": 0.04513297975063324,
"margin_dpo/margin_std": 0.31693926453590393,
"step": 19
},
{
"epoch": 0.030234315948601664,
"fcm_dpo/beta": 0.22325360774993896,
"fcm_dpo/delta": 0.19887003302574158,
"fcm_dpo/margin": -0.009545460343360901,
"fcm_dpo/q_t": 0.5005234479904175,
"grad_norm": 65.36317443847656,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": 0.14998410642147064,
"logits/rejected": 0.12636204063892365,
"logps/chosen": -53.66815185546875,
"logps/ref_chosen": -53.67856216430664,
"logps/ref_rejected": -74.16911315917969,
"logps/rejected": -74.14915466308594,
"loss": 1.3893,
"margin_dpo/margin_mean": -0.009545668959617615,
"margin_dpo/margin_std": 0.2792781591415405,
"step": 20
},
{
"epoch": 0.031746031746031744,
"fcm_dpo/beta": 0.23690250515937805,
"fcm_dpo/delta": 0.19825556874275208,
"fcm_dpo/margin": 0.004699960350990295,
"fcm_dpo/q_t": 0.4997465908527374,
"grad_norm": 69.45832061767578,
"learning_rate": 1.4925373134328355e-07,
"logits/chosen": 0.11373256146907806,
"logits/rejected": 0.0878261923789978,
"logps/chosen": -64.69596862792969,
"logps/ref_chosen": -64.70155334472656,
"logps/ref_rejected": -81.02095031738281,
"logps/rejected": -81.02006530761719,
"loss": 1.3871,
"margin_dpo/margin_mean": 0.004698842763900757,
"margin_dpo/margin_std": 0.36591023206710815,
"step": 21
},
{
"epoch": 0.03325774754346183,
"fcm_dpo/beta": 0.25136297941207886,
"fcm_dpo/delta": 0.39239007234573364,
"fcm_dpo/margin": 0.031166553497314453,
"fcm_dpo/q_t": 0.4980974495410919,
"grad_norm": 72.23171997070312,
"learning_rate": 1.5671641791044775e-07,
"logits/chosen": 0.0061013903468847275,
"logits/rejected": -0.014899881556630135,
"logps/chosen": -58.05042266845703,
"logps/ref_chosen": -58.03599166870117,
"logps/ref_rejected": -80.72721862792969,
"logps/rejected": -80.77281188964844,
"loss": 1.3797,
"margin_dpo/margin_mean": 0.031166434288024902,
"margin_dpo/margin_std": 0.28403687477111816,
"step": 22
},
{
"epoch": 0.03476946334089191,
"fcm_dpo/beta": 0.2609216570854187,
"fcm_dpo/delta": 0.1793270856142044,
"fcm_dpo/margin": 0.06387266516685486,
"fcm_dpo/q_t": 0.49592792987823486,
"grad_norm": 86.23075866699219,
"learning_rate": 1.6417910447761193e-07,
"logits/chosen": 0.13783769309520721,
"logits/rejected": 0.11253305524587631,
"logps/chosen": -66.32743835449219,
"logps/ref_chosen": -66.35608673095703,
"logps/ref_rejected": -93.02769470214844,
"logps/rejected": -93.06291198730469,
"loss": 1.3715,
"margin_dpo/margin_mean": 0.06387221813201904,
"margin_dpo/margin_std": 0.3480584919452667,
"step": 23
},
{
"epoch": 0.036281179138321996,
"fcm_dpo/beta": 0.26560020446777344,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0906066745519638,
"fcm_dpo/q_t": 0.5060100555419922,
"grad_norm": 70.8504638671875,
"learning_rate": 1.716417910447761e-07,
"logits/chosen": 0.13605038821697235,
"logits/rejected": 0.10310132801532745,
"logps/chosen": -54.53034210205078,
"logps/ref_chosen": -54.461238861083984,
"logps/ref_rejected": -68.33817291259766,
"logps/rejected": -68.3166732788086,
"loss": 1.4118,
"margin_dpo/margin_mean": -0.09060648083686829,
"margin_dpo/margin_std": 0.2676948308944702,
"step": 24
},
{
"epoch": 0.03779289493575208,
"fcm_dpo/beta": 0.2819034457206726,
"fcm_dpo/delta": 0.396295428276062,
"fcm_dpo/margin": 0.013779282569885254,
"fcm_dpo/q_t": 0.499076247215271,
"grad_norm": 83.40914154052734,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": 0.10568390786647797,
"logits/rejected": 0.05422993749380112,
"logps/chosen": -60.02375793457031,
"logps/ref_chosen": -60.00420379638672,
"logps/ref_rejected": -90.47376251220703,
"logps/rejected": -90.50709533691406,
"loss": 1.3841,
"margin_dpo/margin_mean": 0.013779401779174805,
"margin_dpo/margin_std": 0.2889242172241211,
"step": 25
},
{
"epoch": 0.039304610733182165,
"fcm_dpo/beta": 0.3045765161514282,
"fcm_dpo/delta": 0.38625574111938477,
"fcm_dpo/margin": 0.04751601815223694,
"fcm_dpo/q_t": 0.49659329652786255,
"grad_norm": 88.86567687988281,
"learning_rate": 1.8656716417910447e-07,
"logits/chosen": 0.12892276048660278,
"logits/rejected": 0.10985440760850906,
"logps/chosen": -56.801429748535156,
"logps/ref_chosen": -56.81915283203125,
"logps/ref_rejected": -77.84333038330078,
"logps/rejected": -77.87312316894531,
"loss": 1.3747,
"margin_dpo/margin_mean": 0.047515541315078735,
"margin_dpo/margin_std": 0.3302631378173828,
"step": 26
},
{
"epoch": 0.04081632653061224,
"fcm_dpo/beta": 0.3105989396572113,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.012165576219558716,
"fcm_dpo/q_t": 0.5009288787841797,
"grad_norm": 90.4833984375,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": 0.1274949312210083,
"logits/rejected": 0.10163411498069763,
"logps/chosen": -62.892974853515625,
"logps/ref_chosen": -62.87702560424805,
"logps/ref_rejected": -71.34437561035156,
"logps/rejected": -71.3481674194336,
"loss": 1.3925,
"margin_dpo/margin_mean": -0.012165874242782593,
"margin_dpo/margin_std": 0.31988954544067383,
"step": 27
},
{
"epoch": 0.042328042328042326,
"fcm_dpo/beta": 0.3105989396572113,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.051617324352264404,
"fcm_dpo/q_t": 0.5040013194084167,
"grad_norm": 86.85352325439453,
"learning_rate": 2.0149253731343282e-07,
"logits/chosen": 0.021893545985221863,
"logits/rejected": 0.01391864474862814,
"logps/chosen": -59.873966217041016,
"logps/ref_chosen": -59.8333740234375,
"logps/ref_rejected": -70.39804077148438,
"logps/rejected": -70.38700866699219,
"loss": 1.4041,
"margin_dpo/margin_mean": -0.051616936922073364,
"margin_dpo/margin_std": 0.2616669535636902,
"step": 28
},
{
"epoch": 0.04383975812547241,
"fcm_dpo/beta": 0.3168638348579407,
"fcm_dpo/delta": 0.1977420449256897,
"fcm_dpo/margin": 0.002045929431915283,
"fcm_dpo/q_t": 0.49984467029571533,
"grad_norm": 103.7999038696289,
"learning_rate": 2.08955223880597e-07,
"logits/chosen": 0.16094645857810974,
"logits/rejected": 0.142642542719841,
"logps/chosen": -74.13179016113281,
"logps/ref_chosen": -74.12020111083984,
"logps/ref_rejected": -83.33099365234375,
"logps/rejected": -83.34461975097656,
"loss": 1.3876,
"margin_dpo/margin_mean": 0.002046048641204834,
"margin_dpo/margin_std": 0.28636813163757324,
"step": 29
},
{
"epoch": 0.045351473922902494,
"fcm_dpo/beta": 0.33617156744003296,
"fcm_dpo/delta": 0.19785372912883759,
"fcm_dpo/margin": -0.010556548833847046,
"fcm_dpo/q_t": 0.5009069442749023,
"grad_norm": 101.58157348632812,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": 0.13398107886314392,
"logits/rejected": 0.07874200493097305,
"logps/chosen": -50.80282211303711,
"logps/ref_chosen": -50.75128936767578,
"logps/ref_rejected": -89.29063415527344,
"logps/rejected": -89.33160400390625,
"loss": 1.393,
"margin_dpo/margin_mean": -0.010556995868682861,
"margin_dpo/margin_std": 0.331157386302948,
"step": 30
},
{
"epoch": 0.04686318972033258,
"fcm_dpo/beta": 0.34257882833480835,
"fcm_dpo/delta": 0.18705223500728607,
"fcm_dpo/margin": -0.021575331687927246,
"fcm_dpo/q_t": 0.5018072128295898,
"grad_norm": 119.50495147705078,
"learning_rate": 2.2388059701492537e-07,
"logits/chosen": 0.0952620878815651,
"logits/rejected": 0.0497773140668869,
"logps/chosen": -65.39213562011719,
"logps/ref_chosen": -65.33675384521484,
"logps/ref_rejected": -100.76666259765625,
"logps/rejected": -100.80046081542969,
"loss": 1.3965,
"margin_dpo/margin_mean": -0.021574944257736206,
"margin_dpo/margin_std": 0.3273775577545166,
"step": 31
},
{
"epoch": 0.04837490551776266,
"fcm_dpo/beta": 0.3551083207130432,
"fcm_dpo/delta": 0.1724216490983963,
"fcm_dpo/margin": 0.03195449709892273,
"fcm_dpo/q_t": 0.49723586440086365,
"grad_norm": 106.27752685546875,
"learning_rate": 2.3134328358208954e-07,
"logits/chosen": 0.0901811346411705,
"logits/rejected": 0.08230896294116974,
"logps/chosen": -67.19805145263672,
"logps/ref_chosen": -67.18333435058594,
"logps/ref_rejected": -82.80763244628906,
"logps/rejected": -82.85430908203125,
"loss": 1.3781,
"margin_dpo/margin_mean": 0.031954437494277954,
"margin_dpo/margin_std": 0.3250340223312378,
"step": 32
},
{
"epoch": 0.049886621315192746,
"fcm_dpo/beta": 0.36843010783195496,
"fcm_dpo/delta": 0.19543671607971191,
"fcm_dpo/margin": 0.005410343408584595,
"fcm_dpo/q_t": 0.4994819760322571,
"grad_norm": 116.07134246826172,
"learning_rate": 2.388059701492537e-07,
"logits/chosen": 0.041345153003931046,
"logits/rejected": 0.014864582568407059,
"logps/chosen": -64.06546783447266,
"logps/ref_chosen": -64.03948211669922,
"logps/ref_rejected": -75.68357849121094,
"logps/rejected": -75.71498107910156,
"loss": 1.3883,
"margin_dpo/margin_mean": 0.005411058664321899,
"margin_dpo/margin_std": 0.3478584289550781,
"step": 33
},
{
"epoch": 0.05139833711262283,
"fcm_dpo/beta": 0.3973255157470703,
"fcm_dpo/delta": 0.37884321808815,
"fcm_dpo/margin": 0.05614650249481201,
"fcm_dpo/q_t": 0.49473071098327637,
"grad_norm": 112.28120422363281,
"learning_rate": 2.4626865671641786e-07,
"logits/chosen": 0.09276697039604187,
"logits/rejected": 0.06323037296533585,
"logps/chosen": -53.700801849365234,
"logps/ref_chosen": -53.6642951965332,
"logps/ref_rejected": -65.77989959716797,
"logps/rejected": -65.87255096435547,
"loss": 1.3682,
"margin_dpo/margin_mean": 0.05614641308784485,
"margin_dpo/margin_std": 0.3014362156391144,
"step": 34
},
{
"epoch": 0.05291005291005291,
"fcm_dpo/beta": 0.40519657731056213,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.009445160627365112,
"fcm_dpo/q_t": 0.5009890794754028,
"grad_norm": 113.60429382324219,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": 0.06334627419710159,
"logits/rejected": 0.040607914328575134,
"logps/chosen": -61.111026763916016,
"logps/ref_chosen": -61.01686096191406,
"logps/ref_rejected": -72.78598022460938,
"logps/rejected": -72.8707046508789,
"loss": 1.395,
"margin_dpo/margin_mean": -0.009445279836654663,
"margin_dpo/margin_std": 0.3441219925880432,
"step": 35
},
{
"epoch": 0.05442176870748299,
"fcm_dpo/beta": 0.42913883924484253,
"fcm_dpo/delta": 0.3846970200538635,
"fcm_dpo/margin": 0.03751775622367859,
"fcm_dpo/q_t": 0.4962383508682251,
"grad_norm": 122.54588317871094,
"learning_rate": 2.611940298507462e-07,
"logits/chosen": 0.10409566760063171,
"logits/rejected": 0.051027558743953705,
"logps/chosen": -50.6123046875,
"logps/ref_chosen": -50.53736114501953,
"logps/ref_rejected": -78.11678314208984,
"logps/rejected": -78.229248046875,
"loss": 1.3762,
"margin_dpo/margin_mean": 0.03751787543296814,
"margin_dpo/margin_std": 0.35155099630355835,
"step": 36
},
{
"epoch": 0.055933484504913075,
"fcm_dpo/beta": 0.4613209366798401,
"fcm_dpo/delta": 0.34767961502075195,
"fcm_dpo/margin": 0.11707112193107605,
"fcm_dpo/q_t": 0.48699861764907837,
"grad_norm": 174.01670837402344,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": 0.09628035128116608,
"logits/rejected": 0.01733151637017727,
"logps/chosen": -59.590187072753906,
"logps/ref_chosen": -59.55394744873047,
"logps/ref_rejected": -108.27702331542969,
"logps/rejected": -108.43034362792969,
"loss": 1.3434,
"margin_dpo/margin_mean": 0.11707085371017456,
"margin_dpo/margin_std": 0.4468412399291992,
"step": 37
},
{
"epoch": 0.05744520030234316,
"fcm_dpo/beta": 0.4848484396934509,
"fcm_dpo/delta": 0.1649438589811325,
"fcm_dpo/margin": 0.03932034969329834,
"fcm_dpo/q_t": 0.49558743834495544,
"grad_norm": 144.6350860595703,
"learning_rate": 2.761194029850746e-07,
"logits/chosen": 0.04511827975511551,
"logits/rejected": 0.03162279352545738,
"logps/chosen": -65.85999298095703,
"logps/ref_chosen": -65.78836059570312,
"logps/ref_rejected": -76.1619873046875,
"logps/rejected": -76.27294921875,
"loss": 1.3769,
"margin_dpo/margin_mean": 0.03931984305381775,
"margin_dpo/margin_std": 0.3753390312194824,
"step": 38
},
{
"epoch": 0.05895691609977324,
"fcm_dpo/beta": 0.5116080045700073,
"fcm_dpo/delta": 0.35159415006637573,
"fcm_dpo/margin": 0.09740224480628967,
"fcm_dpo/q_t": 0.48791423439979553,
"grad_norm": 146.56613159179688,
"learning_rate": 2.8358208955223876e-07,
"logits/chosen": 0.1656300127506256,
"logits/rejected": 0.13862335681915283,
"logps/chosen": -57.26053237915039,
"logps/ref_chosen": -57.17681121826172,
"logps/ref_rejected": -79.486328125,
"logps/rejected": -79.66746520996094,
"loss": 1.3449,
"margin_dpo/margin_mean": 0.09740233421325684,
"margin_dpo/margin_std": 0.3506489396095276,
"step": 39
},
{
"epoch": 0.06046863189720333,
"fcm_dpo/beta": 0.5398132801055908,
"fcm_dpo/delta": 0.18534015119075775,
"fcm_dpo/margin": -0.042171984910964966,
"fcm_dpo/q_t": 0.5057384371757507,
"grad_norm": 176.7834014892578,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": 0.09759977459907532,
"logits/rejected": 0.04874386638402939,
"logps/chosen": -61.46282958984375,
"logps/ref_chosen": -61.33416748046875,
"logps/ref_rejected": -79.10697174072266,
"logps/rejected": -79.19346618652344,
"loss": 1.4197,
"margin_dpo/margin_mean": -0.04217180609703064,
"margin_dpo/margin_std": 0.36746376752853394,
"step": 40
},
{
"epoch": 0.06198034769463341,
"fcm_dpo/beta": 0.5703096389770508,
"fcm_dpo/delta": 0.36740079522132874,
"fcm_dpo/margin": 0.05953556299209595,
"fcm_dpo/q_t": 0.4920777380466461,
"grad_norm": 169.90399169921875,
"learning_rate": 2.985074626865671e-07,
"logits/chosen": 0.03998805582523346,
"logits/rejected": 0.020248761400580406,
"logps/chosen": -67.65834045410156,
"logps/ref_chosen": -67.5467300415039,
"logps/ref_rejected": -83.87788391113281,
"logps/rejected": -84.04903411865234,
"loss": 1.3651,
"margin_dpo/margin_mean": 0.05953595042228699,
"margin_dpo/margin_std": 0.3872567415237427,
"step": 41
},
{
"epoch": 0.06349206349206349,
"fcm_dpo/beta": 0.612642765045166,
"fcm_dpo/delta": 0.36006850004196167,
"fcm_dpo/margin": 0.06822788715362549,
"fcm_dpo/q_t": 0.49012911319732666,
"grad_norm": 176.0435028076172,
"learning_rate": 3.059701492537313e-07,
"logits/chosen": 0.05589213967323303,
"logits/rejected": 0.034156039357185364,
"logps/chosen": -61.35820770263672,
"logps/ref_chosen": -61.26485824584961,
"logps/ref_rejected": -76.3629150390625,
"logps/rejected": -76.52449035644531,
"loss": 1.3608,
"margin_dpo/margin_mean": 0.06822726130485535,
"margin_dpo/margin_std": 0.40432050824165344,
"step": 42
},
{
"epoch": 0.06500377928949358,
"fcm_dpo/beta": 0.6587069034576416,
"fcm_dpo/delta": 0.3486868739128113,
"fcm_dpo/margin": 0.08003175258636475,
"fcm_dpo/q_t": 0.4871513843536377,
"grad_norm": 227.68112182617188,
"learning_rate": 3.134328358208955e-07,
"logits/chosen": 0.08429280668497086,
"logits/rejected": 0.07338319718837738,
"logps/chosen": -71.92668151855469,
"logps/ref_chosen": -71.80902862548828,
"logps/ref_rejected": -81.12464141845703,
"logps/rejected": -81.32231140136719,
"loss": 1.3553,
"margin_dpo/margin_mean": 0.08003199100494385,
"margin_dpo/margin_std": 0.44389355182647705,
"step": 43
},
{
"epoch": 0.06651549508692366,
"fcm_dpo/beta": 0.6816689968109131,
"fcm_dpo/delta": 0.17928901314735413,
"fcm_dpo/margin": 0.0221157968044281,
"fcm_dpo/q_t": 0.4966672658920288,
"grad_norm": 220.933349609375,
"learning_rate": 3.2089552238805965e-07,
"logits/chosen": 0.040969911962747574,
"logits/rejected": 0.010756943374872208,
"logps/chosen": -66.72364807128906,
"logps/ref_chosen": -66.55043029785156,
"logps/ref_rejected": -85.06198120117188,
"logps/rejected": -85.25730895996094,
"loss": 1.3932,
"margin_dpo/margin_mean": 0.022116124629974365,
"margin_dpo/margin_std": 0.4371548295021057,
"step": 44
},
{
"epoch": 0.06802721088435375,
"fcm_dpo/beta": 0.7307697534561157,
"fcm_dpo/delta": 0.3396417498588562,
"fcm_dpo/margin": 0.08511926233768463,
"fcm_dpo/q_t": 0.48502203822135925,
"grad_norm": 232.3180694580078,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": 0.12735822796821594,
"logits/rejected": 0.07406317442655563,
"logps/chosen": -62.372169494628906,
"logps/ref_chosen": -62.24385452270508,
"logps/ref_rejected": -92.96665954589844,
"logps/rejected": -93.18009948730469,
"loss": 1.3537,
"margin_dpo/margin_mean": 0.08512008190155029,
"margin_dpo/margin_std": 0.4685259461402893,
"step": 45
},
{
"epoch": 0.06953892668178382,
"fcm_dpo/beta": 0.7730103731155396,
"fcm_dpo/delta": 0.2614397406578064,
"fcm_dpo/margin": 0.18374797701835632,
"fcm_dpo/q_t": 0.46673786640167236,
"grad_norm": 223.03953552246094,
"learning_rate": 3.3582089552238805e-07,
"logits/chosen": 0.09179161489009857,
"logits/rejected": 0.04750993847846985,
"logps/chosen": -61.60199737548828,
"logps/ref_chosen": -61.498905181884766,
"logps/ref_rejected": -78.91172790527344,
"logps/rejected": -79.19857025146484,
"loss": 1.2771,
"margin_dpo/margin_mean": 0.18374782800674438,
"margin_dpo/margin_std": 0.4380492866039276,
"step": 46
},
{
"epoch": 0.0710506424792139,
"fcm_dpo/beta": 0.8166114687919617,
"fcm_dpo/delta": 0.27844953536987305,
"fcm_dpo/margin": 0.15279075503349304,
"fcm_dpo/q_t": 0.4699801206588745,
"grad_norm": 221.4151611328125,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": 0.029810963198542595,
"logits/rejected": -0.012774014845490456,
"logps/chosen": -51.69743347167969,
"logps/ref_chosen": -51.578346252441406,
"logps/ref_rejected": -68.2215576171875,
"logps/rejected": -68.49343872070312,
"loss": 1.2897,
"margin_dpo/margin_mean": 0.15279105305671692,
"margin_dpo/margin_std": 0.3854082226753235,
"step": 47
},
{
"epoch": 0.07256235827664399,
"fcm_dpo/beta": 0.8717821836471558,
"fcm_dpo/delta": 0.34792637825012207,
"fcm_dpo/margin": 0.06187397241592407,
"fcm_dpo/q_t": 0.4878446161746979,
"grad_norm": 236.5067138671875,
"learning_rate": 3.507462686567164e-07,
"logits/chosen": 0.16963548958301544,
"logits/rejected": 0.13918644189834595,
"logps/chosen": -51.98871994018555,
"logps/ref_chosen": -51.79365158081055,
"logps/ref_rejected": -64.22503662109375,
"logps/rejected": -64.48198699951172,
"loss": 1.3667,
"margin_dpo/margin_mean": 0.061874061822891235,
"margin_dpo/margin_std": 0.4206083416938782,
"step": 48
},
{
"epoch": 0.07407407407407407,
"fcm_dpo/beta": 0.9262492060661316,
"fcm_dpo/delta": 0.288057416677475,
"fcm_dpo/margin": 0.12440468370914459,
"fcm_dpo/q_t": 0.4729665517807007,
"grad_norm": 237.94810485839844,
"learning_rate": 3.5820895522388055e-07,
"logits/chosen": 0.02041550911962986,
"logits/rejected": -0.0007341206073760986,
"logps/chosen": -58.339622497558594,
"logps/ref_chosen": -58.13460159301758,
"logps/ref_rejected": -64.63206481933594,
"logps/rejected": -64.96148681640625,
"loss": 1.3232,
"margin_dpo/margin_mean": 0.12440502643585205,
"margin_dpo/margin_std": 0.4858497977256775,
"step": 49
},
{
"epoch": 0.07558578987150416,
"fcm_dpo/beta": 0.9749960899353027,
"fcm_dpo/delta": 0.25156453251838684,
"fcm_dpo/margin": 0.15634498000144958,
"fcm_dpo/q_t": 0.4646122455596924,
"grad_norm": 252.39637756347656,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": 0.11577291041612625,
"logits/rejected": 0.08579862117767334,
"logps/chosen": -53.112850189208984,
"logps/ref_chosen": -52.85643768310547,
"logps/ref_rejected": -72.17460632324219,
"logps/rejected": -72.58735656738281,
"loss": 1.2742,
"margin_dpo/margin_mean": 0.15634474158287048,
"margin_dpo/margin_std": 0.38461118936538696,
"step": 50
},
{
"epoch": 0.07709750566893424,
"fcm_dpo/beta": 1.0202209949493408,
"fcm_dpo/delta": 0.2239471673965454,
"fcm_dpo/margin": 0.1768064796924591,
"fcm_dpo/q_t": 0.45897069573402405,
"grad_norm": 271.5158996582031,
"learning_rate": 3.7313432835820895e-07,
"logits/chosen": 0.0644986554980278,
"logits/rejected": 0.03733114153146744,
"logps/chosen": -63.9501838684082,
"logps/ref_chosen": -63.65644073486328,
"logps/ref_rejected": -86.13229370117188,
"logps/rejected": -86.60284423828125,
"loss": 1.2691,
"margin_dpo/margin_mean": 0.17680680751800537,
"margin_dpo/margin_std": 0.4650823473930359,
"step": 51
},
{
"epoch": 0.07860922146636433,
"fcm_dpo/beta": 1.0686278343200684,
"fcm_dpo/delta": 0.21884143352508545,
"fcm_dpo/margin": 0.17313197255134583,
"fcm_dpo/q_t": 0.45887812972068787,
"grad_norm": 316.1194152832031,
"learning_rate": 3.805970149253731e-07,
"logits/chosen": 0.09268851578235626,
"logits/rejected": 0.04252258688211441,
"logps/chosen": -68.11876678466797,
"logps/ref_chosen": -67.8402099609375,
"logps/ref_rejected": -96.97090911865234,
"logps/rejected": -97.42259216308594,
"loss": 1.2702,
"margin_dpo/margin_mean": 0.1731322705745697,
"margin_dpo/margin_std": 0.47208118438720703,
"step": 52
},
{
"epoch": 0.0801209372637944,
"fcm_dpo/beta": 1.1181976795196533,
"fcm_dpo/delta": 0.2672102749347687,
"fcm_dpo/margin": 0.1227661669254303,
"fcm_dpo/q_t": 0.46797460317611694,
"grad_norm": 288.12396240234375,
"learning_rate": 3.880597014925373e-07,
"logits/chosen": 0.10207226872444153,
"logits/rejected": 0.0910034328699112,
"logps/chosen": -57.202964782714844,
"logps/ref_chosen": -56.87813949584961,
"logps/ref_rejected": -60.75569152832031,
"logps/rejected": -61.203285217285156,
"loss": 1.2989,
"margin_dpo/margin_mean": 0.12276646494865417,
"margin_dpo/margin_std": 0.3650910258293152,
"step": 53
},
{
"epoch": 0.08163265306122448,
"fcm_dpo/beta": 1.1756856441497803,
"fcm_dpo/delta": 0.22768601775169373,
"fcm_dpo/margin": 0.1503320038318634,
"fcm_dpo/q_t": 0.45959293842315674,
"grad_norm": 297.62738037109375,
"learning_rate": 3.9552238805970144e-07,
"logits/chosen": 0.05468449741601944,
"logits/rejected": 0.03934643790125847,
"logps/chosen": -47.59065246582031,
"logps/ref_chosen": -47.26692199707031,
"logps/ref_rejected": -62.19426727294922,
"logps/rejected": -62.66832733154297,
"loss": 1.2773,
"margin_dpo/margin_mean": 0.15033209323883057,
"margin_dpo/margin_std": 0.42221611738204956,
"step": 54
},
{
"epoch": 0.08314436885865457,
"fcm_dpo/beta": 1.2158875465393066,
"fcm_dpo/delta": 0.06430363655090332,
"fcm_dpo/margin": 0.2756027579307556,
"fcm_dpo/q_t": 0.43045541644096375,
"grad_norm": 339.1961669921875,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": 0.0669720247387886,
"logits/rejected": -0.013603119179606438,
"logps/chosen": -50.63751983642578,
"logps/ref_chosen": -50.32619094848633,
"logps/ref_rejected": -92.44389343261719,
"logps/rejected": -93.03082275390625,
"loss": 1.2078,
"margin_dpo/margin_mean": 0.2756025791168213,
"margin_dpo/margin_std": 0.5928495526313782,
"step": 55
},
{
"epoch": 0.08465608465608465,
"fcm_dpo/beta": 1.251664161682129,
"fcm_dpo/delta": 0.23274877667427063,
"fcm_dpo/margin": 0.13655099272727966,
"fcm_dpo/q_t": 0.4612714946269989,
"grad_norm": 343.06378173828125,
"learning_rate": 4.1044776119402984e-07,
"logits/chosen": 0.10051405429840088,
"logits/rejected": 0.07845334708690643,
"logps/chosen": -57.09839630126953,
"logps/ref_chosen": -56.766971588134766,
"logps/ref_rejected": -66.30504608154297,
"logps/rejected": -66.77301788330078,
"loss": 1.3046,
"margin_dpo/margin_mean": 0.13655099272727966,
"margin_dpo/margin_std": 0.466732382774353,
"step": 56
},
{
"epoch": 0.08616780045351474,
"fcm_dpo/beta": 1.2687859535217285,
"fcm_dpo/delta": 0.05950481444597244,
"fcm_dpo/margin": 0.26971304416656494,
"fcm_dpo/q_t": 0.42512619495391846,
"grad_norm": 329.37237548828125,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": 0.11962918192148209,
"logits/rejected": 0.05380728468298912,
"logps/chosen": -58.167022705078125,
"logps/ref_chosen": -57.76774597167969,
"logps/ref_rejected": -82.75698852539062,
"logps/rejected": -83.42597961425781,
"loss": 1.1973,
"margin_dpo/margin_mean": 0.2697131037712097,
"margin_dpo/margin_std": 0.5702307224273682,
"step": 57
},
{
"epoch": 0.08767951625094482,
"fcm_dpo/beta": 1.2718815803527832,
"fcm_dpo/delta": -0.031620148569345474,
"fcm_dpo/margin": 0.17981407046318054,
"fcm_dpo/q_t": 0.4582921266555786,
"grad_norm": 405.1985778808594,
"learning_rate": 4.253731343283582e-07,
"logits/chosen": 0.056253060698509216,
"logits/rejected": 0.04066895321011543,
"logps/chosen": -73.22657775878906,
"logps/ref_chosen": -72.76408386230469,
"logps/ref_rejected": -84.49275207519531,
"logps/rejected": -85.13505554199219,
"loss": 1.3311,
"margin_dpo/margin_mean": 0.17981407046318054,
"margin_dpo/margin_std": 0.654416561126709,
"step": 58
},
{
"epoch": 0.08919123204837491,
"fcm_dpo/beta": 1.2566231489181519,
"fcm_dpo/delta": -0.12143002450466156,
"fcm_dpo/margin": 0.24419176578521729,
"fcm_dpo/q_t": 0.4358407258987427,
"grad_norm": 294.4437561035156,
"learning_rate": 4.3283582089552234e-07,
"logits/chosen": 0.12476523220539093,
"logits/rejected": 0.05869518965482712,
"logps/chosen": -50.228233337402344,
"logps/ref_chosen": -49.820777893066406,
"logps/ref_rejected": -77.14368438720703,
"logps/rejected": -77.79533386230469,
"loss": 1.2168,
"margin_dpo/margin_mean": 0.24419182538986206,
"margin_dpo/margin_std": 0.48622995615005493,
"step": 59
},
{
"epoch": 0.09070294784580499,
"fcm_dpo/beta": 1.2652430534362793,
"fcm_dpo/delta": 0.09526422619819641,
"fcm_dpo/margin": 0.06482309103012085,
"fcm_dpo/q_t": 0.4771226644515991,
"grad_norm": 414.355224609375,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": 0.11259119212627411,
"logits/rejected": 0.11113601922988892,
"logps/chosen": -63.673728942871094,
"logps/ref_chosen": -63.22477340698242,
"logps/ref_rejected": -61.360477447509766,
"logps/rejected": -61.8742561340332,
"loss": 1.4422,
"margin_dpo/margin_mean": 0.0648232102394104,
"margin_dpo/margin_std": 0.5844757556915283,
"step": 60
},
{
"epoch": 0.09221466364323508,
"fcm_dpo/beta": 1.3022596836090088,
"fcm_dpo/delta": 0.2242211550474167,
"fcm_dpo/margin": 0.1385476291179657,
"fcm_dpo/q_t": 0.4630514979362488,
"grad_norm": 383.376220703125,
"learning_rate": 4.4776119402985074e-07,
"logits/chosen": 0.14720313251018524,
"logits/rejected": 0.11374804377555847,
"logps/chosen": -49.5374755859375,
"logps/ref_chosen": -49.01679992675781,
"logps/ref_rejected": -74.90817260742188,
"logps/rejected": -75.56739044189453,
"loss": 1.3838,
"margin_dpo/margin_mean": 0.13854748010635376,
"margin_dpo/margin_std": 0.6521978974342346,
"step": 61
},
{
"epoch": 0.09372637944066516,
"fcm_dpo/beta": 1.355870246887207,
"fcm_dpo/delta": 0.14111031591892242,
"fcm_dpo/margin": 0.19318178296089172,
"fcm_dpo/q_t": 0.44203782081604004,
"grad_norm": 394.4239501953125,
"learning_rate": 4.552238805970149e-07,
"logits/chosen": 0.1043829694390297,
"logits/rejected": 0.06520397216081619,
"logps/chosen": -63.30534362792969,
"logps/ref_chosen": -62.751869201660156,
"logps/ref_rejected": -78.93360900878906,
"logps/rejected": -79.68026733398438,
"loss": 1.2919,
"margin_dpo/margin_mean": 0.1931813657283783,
"margin_dpo/margin_std": 0.5964335799217224,
"step": 62
},
{
"epoch": 0.09523809523809523,
"fcm_dpo/beta": 1.320521354675293,
"fcm_dpo/delta": -0.21281126141548157,
"fcm_dpo/margin": 0.4545568525791168,
"fcm_dpo/q_t": 0.360975980758667,
"grad_norm": 294.78167724609375,
"learning_rate": 4.626865671641791e-07,
"logits/chosen": 0.17442727088928223,
"logits/rejected": 0.1496298611164093,
"logps/chosen": -60.93528366088867,
"logps/ref_chosen": -60.51525115966797,
"logps/ref_rejected": -85.11021423339844,
"logps/rejected": -85.98480224609375,
"loss": 0.9608,
"margin_dpo/margin_mean": 0.454556941986084,
"margin_dpo/margin_std": 0.46920114755630493,
"step": 63
},
{
"epoch": 0.09674981103552532,
"fcm_dpo/beta": 1.3571248054504395,
"fcm_dpo/delta": 0.29117465019226074,
"fcm_dpo/margin": 0.0832415223121643,
"fcm_dpo/q_t": 0.47412389516830444,
"grad_norm": 399.5368347167969,
"learning_rate": 4.701492537313433e-07,
"logits/chosen": 0.07589007169008255,
"logits/rejected": 0.051104120910167694,
"logps/chosen": -51.82196044921875,
"logps/ref_chosen": -51.20684814453125,
"logps/ref_rejected": -66.93081665039062,
"logps/rejected": -67.62918090820312,
"loss": 1.41,
"margin_dpo/margin_mean": 0.08324190974235535,
"margin_dpo/margin_std": 0.5599809885025024,
"step": 64
},
{
"epoch": 0.0982615268329554,
"fcm_dpo/beta": 1.3357598781585693,
"fcm_dpo/delta": -0.2229897379875183,
"fcm_dpo/margin": 0.4559674859046936,
"fcm_dpo/q_t": 0.371783971786499,
"grad_norm": 337.01251220703125,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": 0.1916767954826355,
"logits/rejected": 0.1618405431509018,
"logps/chosen": -67.87342834472656,
"logps/ref_chosen": -67.2886962890625,
"logps/ref_rejected": -74.44281005859375,
"logps/rejected": -75.48350524902344,
"loss": 1.0623,
"margin_dpo/margin_mean": 0.4559671878814697,
"margin_dpo/margin_std": 0.7171410918235779,
"step": 65
},
{
"epoch": 0.09977324263038549,
"fcm_dpo/beta": 1.3253322839736938,
"fcm_dpo/delta": 0.06414327025413513,
"fcm_dpo/margin": 0.25458577275276184,
"fcm_dpo/q_t": 0.4319732189178467,
"grad_norm": 368.9222412109375,
"learning_rate": 4.850746268656717e-07,
"logits/chosen": 0.1052892655134201,
"logits/rejected": 0.08073309063911438,
"logps/chosen": -71.32987976074219,
"logps/ref_chosen": -70.743408203125,
"logps/ref_rejected": -77.26499938964844,
"logps/rejected": -78.10604858398438,
"loss": 1.2347,
"margin_dpo/margin_mean": 0.2545853555202484,
"margin_dpo/margin_std": 0.6333310604095459,
"step": 66
},
{
"epoch": 0.10128495842781557,
"fcm_dpo/beta": 1.3538342714309692,
"fcm_dpo/delta": 0.04710801690816879,
"fcm_dpo/margin": 0.26157301664352417,
"fcm_dpo/q_t": 0.431488960981369,
"grad_norm": 357.73748779296875,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": 0.047591157257556915,
"logits/rejected": -0.006785998586565256,
"logps/chosen": -61.096351623535156,
"logps/ref_chosen": -60.60260009765625,
"logps/ref_rejected": -75.22235870361328,
"logps/rejected": -75.97767639160156,
"loss": 1.2217,
"margin_dpo/margin_mean": 0.2615726888179779,
"margin_dpo/margin_std": 0.6404443979263306,
"step": 67
},
{
"epoch": 0.10279667422524566,
"fcm_dpo/beta": 1.3258135318756104,
"fcm_dpo/delta": -0.06805318593978882,
"fcm_dpo/margin": 0.34885549545288086,
"fcm_dpo/q_t": 0.40520694851875305,
"grad_norm": 383.10980224609375,
"learning_rate": 5e-07,
"logits/chosen": 0.028347402811050415,
"logits/rejected": -0.000688064843416214,
"logps/chosen": -78.18305206298828,
"logps/ref_chosen": -77.52836608886719,
"logps/ref_rejected": -93.17778015136719,
"logps/rejected": -94.18131256103516,
"loss": 1.1489,
"margin_dpo/margin_mean": 0.3488554060459137,
"margin_dpo/margin_std": 0.6785191893577576,
"step": 68
},
{
"epoch": 0.10430839002267574,
"fcm_dpo/beta": 1.3233981132507324,
"fcm_dpo/delta": -0.10464634746313095,
"fcm_dpo/margin": 0.37709471583366394,
"fcm_dpo/q_t": 0.39530229568481445,
"grad_norm": 314.11279296875,
"learning_rate": 4.999965034812934e-07,
"logits/chosen": 0.09186286479234695,
"logits/rejected": 0.048370733857154846,
"logps/chosen": -66.55419921875,
"logps/ref_chosen": -65.94305419921875,
"logps/ref_rejected": -89.7735595703125,
"logps/rejected": -90.76179504394531,
"loss": 1.0926,
"margin_dpo/margin_mean": 0.3770950138568878,
"margin_dpo/margin_std": 0.5968654751777649,
"step": 69
},
{
"epoch": 0.10582010582010581,
"fcm_dpo/beta": 1.305304765701294,
"fcm_dpo/delta": 0.03656423091888428,
"fcm_dpo/margin": 0.2790451943874359,
"fcm_dpo/q_t": 0.42012423276901245,
"grad_norm": 358.5043029785156,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 0.11005310714244843,
"logits/rejected": 0.08715207874774933,
"logps/chosen": -62.6015625,
"logps/ref_chosen": -61.95791244506836,
"logps/ref_rejected": -75.80945587158203,
"logps/rejected": -76.7321548461914,
"loss": 1.215,
"margin_dpo/margin_mean": 0.2790454030036926,
"margin_dpo/margin_std": 0.6277328729629517,
"step": 70
},
{
"epoch": 0.1073318216175359,
"fcm_dpo/beta": 1.3658053874969482,
"fcm_dpo/delta": 0.24697905778884888,
"fcm_dpo/margin": 0.1148194968700409,
"fcm_dpo/q_t": 0.4629502594470978,
"grad_norm": 417.89801025390625,
"learning_rate": 4.999685319184688e-07,
"logits/chosen": 0.05348202586174011,
"logits/rejected": 0.03862350434064865,
"logps/chosen": -64.16213989257812,
"logps/ref_chosen": -63.34757995605469,
"logps/ref_rejected": -67.49658203125,
"logps/rejected": -68.42596435546875,
"loss": 1.3647,
"margin_dpo/margin_mean": 0.11482015252113342,
"margin_dpo/margin_std": 0.5449914932250977,
"step": 71
},
{
"epoch": 0.10884353741496598,
"fcm_dpo/beta": 1.3521233797073364,
"fcm_dpo/delta": -0.13006776571273804,
"fcm_dpo/margin": 0.38662588596343994,
"fcm_dpo/q_t": 0.38986825942993164,
"grad_norm": 355.95806884765625,
"learning_rate": 4.999440576567755e-07,
"logits/chosen": 0.11371571570634842,
"logits/rejected": 0.04910843446850777,
"logps/chosen": -56.53982925415039,
"logps/ref_chosen": -55.85929870605469,
"logps/ref_rejected": -68.45423889160156,
"logps/rejected": -69.52140045166016,
"loss": 1.1028,
"margin_dpo/margin_mean": 0.38662609457969666,
"margin_dpo/margin_std": 0.6601760387420654,
"step": 72
},
{
"epoch": 0.11035525321239607,
"fcm_dpo/beta": 1.39102303981781,
"fcm_dpo/delta": 0.16966001689434052,
"fcm_dpo/margin": 0.16693082451820374,
"fcm_dpo/q_t": 0.4621427059173584,
"grad_norm": 460.1529846191406,
"learning_rate": 4.999125919224965e-07,
"logits/chosen": 0.07310564070940018,
"logits/rejected": 0.05886080861091614,
"logps/chosen": -70.09381103515625,
"logps/ref_chosen": -69.13880920410156,
"logps/ref_rejected": -79.04586791992188,
"logps/rejected": -80.16780090332031,
"loss": 1.4425,
"margin_dpo/margin_mean": 0.16693082451820374,
"margin_dpo/margin_std": 0.8031052350997925,
"step": 73
},
{
"epoch": 0.11186696900982615,
"fcm_dpo/beta": 1.3699589967727661,
"fcm_dpo/delta": -0.1809014230966568,
"fcm_dpo/margin": 0.4163665473461151,
"fcm_dpo/q_t": 0.38876447081565857,
"grad_norm": 305.5662536621094,
"learning_rate": 4.998741355957963e-07,
"logits/chosen": 0.10880277305841446,
"logits/rejected": 0.056502409279346466,
"logps/chosen": -50.64822769165039,
"logps/ref_chosen": -49.923736572265625,
"logps/ref_rejected": -81.73213958740234,
"logps/rejected": -82.87300109863281,
"loss": 1.1053,
"margin_dpo/margin_mean": 0.4163666069507599,
"margin_dpo/margin_std": 0.7277534604072571,
"step": 74
},
{
"epoch": 0.11337868480725624,
"fcm_dpo/beta": 1.3012433052062988,
"fcm_dpo/delta": -0.18539077043533325,
"fcm_dpo/margin": 0.44073373079299927,
"fcm_dpo/q_t": 0.3843110501766205,
"grad_norm": 269.8968811035156,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": 0.08823660016059875,
"logits/rejected": 0.05635009706020355,
"logps/chosen": -46.86772537231445,
"logps/ref_chosen": -46.06875228881836,
"logps/ref_rejected": -66.1181411743164,
"logps/rejected": -67.35784912109375,
"loss": 1.0864,
"margin_dpo/margin_mean": 0.44073382019996643,
"margin_dpo/margin_std": 0.7411842942237854,
"step": 75
},
{
"epoch": 0.11489040060468632,
"fcm_dpo/beta": 1.3104901313781738,
"fcm_dpo/delta": 0.09375004470348358,
"fcm_dpo/margin": 0.2359827756881714,
"fcm_dpo/q_t": 0.437259316444397,
"grad_norm": 354.0776672363281,
"learning_rate": 4.997762556634679e-07,
"logits/chosen": 0.07581540942192078,
"logits/rejected": 0.03382248058915138,
"logps/chosen": -54.947086334228516,
"logps/ref_chosen": -54.06275177001953,
"logps/ref_rejected": -74.87464141845703,
"logps/rejected": -75.99496459960938,
"loss": 1.2838,
"margin_dpo/margin_mean": 0.23598253726959229,
"margin_dpo/margin_std": 0.6900110840797424,
"step": 76
},
{
"epoch": 0.1164021164021164,
"fcm_dpo/beta": 1.2992005348205566,
"fcm_dpo/delta": -0.001370757818222046,
"fcm_dpo/margin": 0.3053017258644104,
"fcm_dpo/q_t": 0.4183969497680664,
"grad_norm": 362.9595031738281,
"learning_rate": 4.99716834795752e-07,
"logits/chosen": 0.09972919523715973,
"logits/rejected": 0.06024125590920448,
"logps/chosen": -54.065406799316406,
"logps/ref_chosen": -53.07609176635742,
"logps/ref_rejected": -74.45601654052734,
"logps/rejected": -75.7506332397461,
"loss": 1.229,
"margin_dpo/margin_mean": 0.3053016662597656,
"margin_dpo/margin_std": 0.6845893859863281,
"step": 77
},
{
"epoch": 0.11791383219954649,
"fcm_dpo/beta": 1.3032406568527222,
"fcm_dpo/delta": -0.07537812739610672,
"fcm_dpo/margin": 0.36197635531425476,
"fcm_dpo/q_t": 0.3986741900444031,
"grad_norm": 356.3768615722656,
"learning_rate": 4.996504288113623e-07,
"logits/chosen": 0.07950174808502197,
"logits/rejected": 0.05943600460886955,
"logps/chosen": -68.604736328125,
"logps/ref_chosen": -67.72541809082031,
"logps/ref_rejected": -79.03926849365234,
"logps/rejected": -80.28056335449219,
"loss": 1.134,
"margin_dpo/margin_mean": 0.3619759976863861,
"margin_dpo/margin_std": 0.6469442844390869,
"step": 78
},
{
"epoch": 0.11942554799697656,
"fcm_dpo/beta": 1.27435302734375,
"fcm_dpo/delta": -0.15202443301677704,
"fcm_dpo/margin": 0.4268932640552521,
"fcm_dpo/q_t": 0.38484495878219604,
"grad_norm": 283.2723083496094,
"learning_rate": 4.995770395678171e-07,
"logits/chosen": 0.11961696296930313,
"logits/rejected": 0.061562664806842804,
"logps/chosen": -53.108978271484375,
"logps/ref_chosen": -52.16064453125,
"logps/ref_rejected": -83.31062316894531,
"logps/rejected": -84.68585205078125,
"loss": 1.0591,
"margin_dpo/margin_mean": 0.4268933832645416,
"margin_dpo/margin_std": 0.6421747207641602,
"step": 79
},
{
"epoch": 0.12093726379440665,
"fcm_dpo/beta": 1.251448631286621,
"fcm_dpo/delta": -0.08930756151676178,
"fcm_dpo/margin": 0.38737350702285767,
"fcm_dpo/q_t": 0.400845468044281,
"grad_norm": 356.0257263183594,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 0.10540800541639328,
"logits/rejected": 0.04691263288259506,
"logps/chosen": -62.39448928833008,
"logps/ref_chosen": -61.410560607910156,
"logps/ref_rejected": -78.66004943847656,
"logps/rejected": -80.0313491821289,
"loss": 1.1444,
"margin_dpo/margin_mean": 0.38737374544143677,
"margin_dpo/margin_std": 0.7291325330734253,
"step": 80
},
{
"epoch": 0.12244897959183673,
"fcm_dpo/beta": 1.2081029415130615,
"fcm_dpo/delta": -0.13699756562709808,
"fcm_dpo/margin": 0.4377623200416565,
"fcm_dpo/q_t": 0.3887847363948822,
"grad_norm": 295.5246276855469,
"learning_rate": 4.994093197099587e-07,
"logits/chosen": 0.07170180976390839,
"logits/rejected": 0.039106931537389755,
"logps/chosen": -64.8592300415039,
"logps/ref_chosen": -63.80437088012695,
"logps/ref_rejected": -79.3484115600586,
"logps/rejected": -80.84103393554688,
"loss": 1.0711,
"margin_dpo/margin_mean": 0.43776261806488037,
"margin_dpo/margin_std": 0.6732680797576904,
"step": 81
},
{
"epoch": 0.12396069538926682,
"fcm_dpo/beta": 1.1594877243041992,
"fcm_dpo/delta": -0.2622827887535095,
"fcm_dpo/margin": 0.5566993951797485,
"fcm_dpo/q_t": 0.3570610582828522,
"grad_norm": 258.639404296875,
"learning_rate": 4.993149937871306e-07,
"logits/chosen": 0.05478543043136597,
"logits/rejected": -0.006189014762639999,
"logps/chosen": -49.740875244140625,
"logps/ref_chosen": -48.817893981933594,
"logps/ref_rejected": -70.31497955322266,
"logps/rejected": -71.79466247558594,
"loss": 0.9629,
"margin_dpo/margin_mean": 0.5566992163658142,
"margin_dpo/margin_std": 0.6383960843086243,
"step": 82
},
{
"epoch": 0.1254724111866969,
"fcm_dpo/beta": 1.1256608963012695,
"fcm_dpo/delta": -0.09757444262504578,
"fcm_dpo/margin": 0.43767139315605164,
"fcm_dpo/q_t": 0.39699286222457886,
"grad_norm": 282.66607666015625,
"learning_rate": 4.992136939879856e-07,
"logits/chosen": 0.1710186004638672,
"logits/rejected": 0.12015236914157867,
"logps/chosen": -58.20113754272461,
"logps/ref_chosen": -57.15077209472656,
"logps/ref_rejected": -75.1710205078125,
"logps/rejected": -76.6590576171875,
"loss": 1.1497,
"margin_dpo/margin_mean": 0.43767082691192627,
"margin_dpo/margin_std": 0.8551985025405884,
"step": 83
},
{
"epoch": 0.12698412698412698,
"fcm_dpo/beta": 1.1269830465316772,
"fcm_dpo/delta": 0.03831970691680908,
"fcm_dpo/margin": 0.3221731185913086,
"fcm_dpo/q_t": 0.4186519980430603,
"grad_norm": 351.0410461425781,
"learning_rate": 4.991054231460969e-07,
"logits/chosen": 0.13431841135025024,
"logits/rejected": 0.09221207350492477,
"logps/chosen": -65.97208404541016,
"logps/ref_chosen": -64.77729797363281,
"logps/ref_rejected": -84.71949768066406,
"logps/rejected": -86.2364501953125,
"loss": 1.2095,
"margin_dpo/margin_mean": 0.322173148393631,
"margin_dpo/margin_std": 0.7237873673439026,
"step": 84
},
{
"epoch": 0.12849584278155707,
"fcm_dpo/beta": 1.082049012184143,
"fcm_dpo/delta": -0.3127109408378601,
"fcm_dpo/margin": 0.6393401622772217,
"fcm_dpo/q_t": 0.3549611568450928,
"grad_norm": 254.55479431152344,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": 0.12381778657436371,
"logits/rejected": 0.08032877743244171,
"logps/chosen": -51.256591796875,
"logps/ref_chosen": -50.25169372558594,
"logps/ref_rejected": -66.55439758300781,
"logps/rejected": -68.19862365722656,
"loss": 0.9901,
"margin_dpo/margin_mean": 0.6393401622772217,
"margin_dpo/margin_std": 0.8425341844558716,
"step": 85
},
{
"epoch": 0.13000755857898716,
"fcm_dpo/beta": 1.0500352382659912,
"fcm_dpo/delta": -0.04895002394914627,
"fcm_dpo/margin": 0.4251331388950348,
"fcm_dpo/q_t": 0.40430694818496704,
"grad_norm": 238.89324951171875,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": 0.1550569087266922,
"logits/rejected": 0.13627271354198456,
"logps/chosen": -61.91556167602539,
"logps/ref_chosen": -60.72917938232422,
"logps/ref_rejected": -72.30961608886719,
"logps/rejected": -73.92112731933594,
"loss": 1.1148,
"margin_dpo/margin_mean": 0.42513370513916016,
"margin_dpo/margin_std": 0.7163010835647583,
"step": 86
},
{
"epoch": 0.13151927437641722,
"fcm_dpo/beta": 1.0135871171951294,
"fcm_dpo/delta": -0.1784614622592926,
"fcm_dpo/margin": 0.5574131011962891,
"fcm_dpo/q_t": 0.37792396545410156,
"grad_norm": 287.37872314453125,
"learning_rate": 4.987388156241114e-07,
"logits/chosen": 0.06490974128246307,
"logits/rejected": 0.01204625889658928,
"logps/chosen": -66.92779541015625,
"logps/ref_chosen": -65.75796508789062,
"logps/ref_rejected": -84.81159973144531,
"logps/rejected": -86.53883361816406,
"loss": 1.1033,
"margin_dpo/margin_mean": 0.5574125647544861,
"margin_dpo/margin_std": 0.9363458156585693,
"step": 87
},
{
"epoch": 0.1330309901738473,
"fcm_dpo/beta": 0.9974713325500488,
"fcm_dpo/delta": -0.057315874844789505,
"fcm_dpo/margin": 0.4542155861854553,
"fcm_dpo/q_t": 0.4059000611305237,
"grad_norm": 284.4267272949219,
"learning_rate": 4.986026928455767e-07,
"logits/chosen": 0.15725532174110413,
"logits/rejected": 0.13133074343204498,
"logps/chosen": -64.01789855957031,
"logps/ref_chosen": -62.82402801513672,
"logps/ref_rejected": -74.9607162475586,
"logps/rejected": -76.60880279541016,
"loss": 1.1881,
"margin_dpo/margin_mean": 0.4542158246040344,
"margin_dpo/margin_std": 0.9578008651733398,
"step": 88
},
{
"epoch": 0.1345427059712774,
"fcm_dpo/beta": 1.00105881690979,
"fcm_dpo/delta": -0.06055300682783127,
"fcm_dpo/margin": 0.45661479234695435,
"fcm_dpo/q_t": 0.40275073051452637,
"grad_norm": 266.41510009765625,
"learning_rate": 4.984596161153135e-07,
"logits/chosen": 0.20890012383460999,
"logits/rejected": 0.1264243870973587,
"logps/chosen": -42.28339767456055,
"logps/ref_chosen": -41.191436767578125,
"logps/ref_rejected": -85.44769287109375,
"logps/rejected": -86.99627685546875,
"loss": 1.1581,
"margin_dpo/margin_mean": 0.45661553740501404,
"margin_dpo/margin_std": 0.8792251348495483,
"step": 89
},
{
"epoch": 0.1360544217687075,
"fcm_dpo/beta": 0.9870717525482178,
"fcm_dpo/delta": -0.043573714792728424,
"fcm_dpo/margin": 0.4473706781864166,
"fcm_dpo/q_t": 0.40594804286956787,
"grad_norm": 262.72222900390625,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 0.10524974763393402,
"logits/rejected": 0.05074058473110199,
"logps/chosen": -57.77943420410156,
"logps/ref_chosen": -56.58390808105469,
"logps/ref_rejected": -86.86978149414062,
"logps/rejected": -88.51268005371094,
"loss": 1.1531,
"margin_dpo/margin_mean": 0.4473702609539032,
"margin_dpo/margin_std": 0.8626862168312073,
"step": 90
},
{
"epoch": 0.13756613756613756,
"fcm_dpo/beta": 0.9517369270324707,
"fcm_dpo/delta": -0.20988881587982178,
"fcm_dpo/margin": 0.6276508569717407,
"fcm_dpo/q_t": 0.37632960081100464,
"grad_norm": 212.9899139404297,
"learning_rate": 4.98152617002662e-07,
"logits/chosen": 0.10068871080875397,
"logits/rejected": 0.057977497577667236,
"logps/chosen": -53.59182357788086,
"logps/ref_chosen": -52.38234329223633,
"logps/ref_rejected": -72.17642211914062,
"logps/rejected": -74.0135498046875,
"loss": 1.0585,
"margin_dpo/margin_mean": 0.6276512145996094,
"margin_dpo/margin_std": 0.9779685735702515,
"step": 91
},
{
"epoch": 0.13907785336356765,
"fcm_dpo/beta": 0.9485722780227661,
"fcm_dpo/delta": -0.03629232943058014,
"fcm_dpo/margin": 0.4553447365760803,
"fcm_dpo/q_t": 0.40844064950942993,
"grad_norm": 226.4413604736328,
"learning_rate": 4.979887032076988e-07,
"logits/chosen": 0.13536790013313293,
"logits/rejected": 0.09770982712507248,
"logps/chosen": -54.35186004638672,
"logps/ref_chosen": -53.00870132446289,
"logps/ref_rejected": -79.77812957763672,
"logps/rejected": -81.5766372680664,
"loss": 1.1892,
"margin_dpo/margin_mean": 0.4553444981575012,
"margin_dpo/margin_std": 0.9228836297988892,
"step": 92
},
{
"epoch": 0.14058956916099774,
"fcm_dpo/beta": 0.9315764904022217,
"fcm_dpo/delta": -0.021424148231744766,
"fcm_dpo/margin": 0.45140916109085083,
"fcm_dpo/q_t": 0.41404592990875244,
"grad_norm": 205.6354217529297,
"learning_rate": 4.978178526356172e-07,
"logits/chosen": 0.144273042678833,
"logits/rejected": 0.11550083011388779,
"logps/chosen": -46.28402328491211,
"logps/ref_chosen": -44.90705108642578,
"logps/ref_rejected": -58.7879524230957,
"logps/rejected": -60.61632537841797,
"loss": 1.1919,
"margin_dpo/margin_mean": 0.45140883326530457,
"margin_dpo/margin_std": 0.9891193509101868,
"step": 93
},
{
"epoch": 0.1421012849584278,
"fcm_dpo/beta": 0.8985931873321533,
"fcm_dpo/delta": -0.160437673330307,
"fcm_dpo/margin": 0.6093255281448364,
"fcm_dpo/q_t": 0.3826707899570465,
"grad_norm": 215.9398193359375,
"learning_rate": 4.976400700654751e-07,
"logits/chosen": 0.16830044984817505,
"logits/rejected": 0.13087698817253113,
"logps/chosen": -61.00678253173828,
"logps/ref_chosen": -59.93777084350586,
"logps/ref_rejected": -79.3138427734375,
"logps/rejected": -80.99217224121094,
"loss": 1.1328,
"margin_dpo/margin_mean": 0.6093254685401917,
"margin_dpo/margin_std": 1.1317553520202637,
"step": 94
},
{
"epoch": 0.1436130007558579,
"fcm_dpo/beta": 0.8708415627479553,
"fcm_dpo/delta": -0.18497896194458008,
"fcm_dpo/margin": 0.657183825969696,
"fcm_dpo/q_t": 0.37511885166168213,
"grad_norm": 209.54396057128906,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": 0.07259067893028259,
"logits/rejected": 0.011487288400530815,
"logps/chosen": -61.56159210205078,
"logps/ref_chosen": -60.168487548828125,
"logps/ref_rejected": -90.73665618896484,
"logps/rejected": -92.78694152832031,
"loss": 1.0422,
"margin_dpo/margin_mean": 0.6571837067604065,
"margin_dpo/margin_std": 0.9313837289810181,
"step": 95
},
{
"epoch": 0.14512471655328799,
"fcm_dpo/beta": 0.8509422540664673,
"fcm_dpo/delta": -0.15672199428081512,
"fcm_dpo/margin": 0.644451379776001,
"fcm_dpo/q_t": 0.3799927532672882,
"grad_norm": 196.17584228515625,
"learning_rate": 4.972637290166157e-07,
"logits/chosen": 0.11201013624668121,
"logits/rejected": 0.07059841603040695,
"logps/chosen": -62.019317626953125,
"logps/ref_chosen": -60.66877746582031,
"logps/ref_rejected": -88.30673217773438,
"logps/rejected": -90.30171966552734,
"loss": 1.0662,
"margin_dpo/margin_mean": 0.6444511413574219,
"margin_dpo/margin_std": 0.9644219875335693,
"step": 96
},
{
"epoch": 0.14663643235071808,
"fcm_dpo/beta": 0.8328443765640259,
"fcm_dpo/delta": -0.009154386818408966,
"fcm_dpo/margin": 0.48872342705726624,
"fcm_dpo/q_t": 0.420562207698822,
"grad_norm": 232.0682830810547,
"learning_rate": 4.970651810649666e-07,
"logits/chosen": 0.06511442363262177,
"logits/rejected": 0.02223094552755356,
"logps/chosen": -66.63578033447266,
"logps/ref_chosen": -65.04412078857422,
"logps/ref_rejected": -78.42092895507812,
"logps/rejected": -80.50130462646484,
"loss": 1.1966,
"margin_dpo/margin_mean": 0.48872342705726624,
"margin_dpo/margin_std": 1.0647801160812378,
"step": 97
},
{
"epoch": 0.14814814814814814,
"fcm_dpo/beta": 0.8404428958892822,
"fcm_dpo/delta": 0.041608165949583054,
"fcm_dpo/margin": 0.4276841878890991,
"fcm_dpo/q_t": 0.4225125312805176,
"grad_norm": 219.1876678466797,
"learning_rate": 4.968597221690985e-07,
"logits/chosen": 0.13621120154857635,
"logits/rejected": 0.11001783609390259,
"logps/chosen": -56.84211730957031,
"logps/ref_chosen": -55.503231048583984,
"logps/ref_rejected": -72.81553649902344,
"logps/rejected": -74.58211517333984,
"loss": 1.1941,
"margin_dpo/margin_mean": 0.4276837110519409,
"margin_dpo/margin_std": 0.8988784551620483,
"step": 98
},
{
"epoch": 0.14965986394557823,
"fcm_dpo/beta": 0.82810378074646,
"fcm_dpo/delta": -0.16143038868904114,
"fcm_dpo/margin": 0.6673827171325684,
"fcm_dpo/q_t": 0.38527190685272217,
"grad_norm": 217.5617218017578,
"learning_rate": 4.966473580761389e-07,
"logits/chosen": 0.14945261180400848,
"logits/rejected": 0.11294733732938766,
"logps/chosen": -59.96638107299805,
"logps/ref_chosen": -58.57563781738281,
"logps/ref_rejected": -78.693603515625,
"logps/rejected": -80.7517318725586,
"loss": 1.0649,
"margin_dpo/margin_mean": 0.6673829555511475,
"margin_dpo/margin_std": 1.0417191982269287,
"step": 99
},
{
"epoch": 0.15117157974300832,
"fcm_dpo/beta": 0.8276715874671936,
"fcm_dpo/delta": -0.04269018769264221,
"fcm_dpo/margin": 0.5278904438018799,
"fcm_dpo/q_t": 0.4166935086250305,
"grad_norm": 255.79344177246094,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 0.1620662957429886,
"logits/rejected": 0.15444490313529968,
"logps/chosen": -81.0914306640625,
"logps/ref_chosen": -79.58343505859375,
"logps/ref_rejected": -92.152587890625,
"logps/rejected": -94.18846893310547,
"loss": 1.2587,
"margin_dpo/margin_mean": 0.527890682220459,
"margin_dpo/margin_std": 1.3204901218414307,
"step": 100
},
{
"epoch": 0.15268329554043839,
"fcm_dpo/beta": 0.7907246947288513,
"fcm_dpo/delta": -0.26945069432258606,
"fcm_dpo/margin": 0.824831485748291,
"fcm_dpo/q_t": 0.3596486747264862,
"grad_norm": 161.67445373535156,
"learning_rate": 4.96201938253052e-07,
"logits/chosen": 0.11919434368610382,
"logits/rejected": 0.08386727422475815,
"logps/chosen": -53.63953399658203,
"logps/ref_chosen": -52.332786560058594,
"logps/ref_rejected": -69.55589294433594,
"logps/rejected": -71.6874771118164,
"loss": 0.9761,
"margin_dpo/margin_mean": 0.8248312473297119,
"margin_dpo/margin_std": 0.9689401388168335,
"step": 101
},
{
"epoch": 0.15419501133786848,
"fcm_dpo/beta": 0.7701091170310974,
"fcm_dpo/delta": -0.039370447397232056,
"fcm_dpo/margin": 0.5678646564483643,
"fcm_dpo/q_t": 0.40815913677215576,
"grad_norm": 202.9934539794922,
"learning_rate": 4.959688949822748e-07,
"logits/chosen": 0.0649181455373764,
"logits/rejected": 0.026860184967517853,
"logps/chosen": -66.30096435546875,
"logps/ref_chosen": -64.74348449707031,
"logps/ref_rejected": -69.06132507324219,
"logps/rejected": -71.18667602539062,
"loss": 1.1748,
"margin_dpo/margin_mean": 0.5678646564483643,
"margin_dpo/margin_std": 1.1583635807037354,
"step": 102
},
{
"epoch": 0.15570672713529857,
"fcm_dpo/beta": 0.7587429285049438,
"fcm_dpo/delta": -0.07340075820684433,
"fcm_dpo/margin": 0.6194088459014893,
"fcm_dpo/q_t": 0.4005558490753174,
"grad_norm": 210.62454223632812,
"learning_rate": 4.957289714327572e-07,
"logits/chosen": 0.14911600947380066,
"logits/rejected": 0.1179221123456955,
"logps/chosen": -65.3800277709961,
"logps/ref_chosen": -63.83664321899414,
"logps/ref_rejected": -79.32362365722656,
"logps/rejected": -81.48641967773438,
"loss": 1.1314,
"margin_dpo/margin_mean": 0.6194085478782654,
"margin_dpo/margin_std": 1.123617172241211,
"step": 103
},
{
"epoch": 0.15721844293272866,
"fcm_dpo/beta": 0.7468178272247314,
"fcm_dpo/delta": -0.08313950151205063,
"fcm_dpo/margin": 0.6416445970535278,
"fcm_dpo/q_t": 0.40650674700737,
"grad_norm": 213.34971618652344,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": 0.19349342584609985,
"logits/rejected": 0.1075948029756546,
"logps/chosen": -62.497222900390625,
"logps/ref_chosen": -60.99920654296875,
"logps/ref_rejected": -98.84645080566406,
"logps/rejected": -100.98611450195312,
"loss": 1.1443,
"margin_dpo/margin_mean": 0.6416438817977905,
"margin_dpo/margin_std": 1.2357177734375,
"step": 104
},
{
"epoch": 0.15873015873015872,
"fcm_dpo/beta": 0.7460736632347107,
"fcm_dpo/delta": 0.003730788826942444,
"fcm_dpo/margin": 0.530280590057373,
"fcm_dpo/q_t": 0.4173508286476135,
"grad_norm": 230.8499298095703,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": 0.11480271816253662,
"logits/rejected": 0.06232992187142372,
"logps/chosen": -72.46751403808594,
"logps/ref_chosen": -70.95027160644531,
"logps/ref_rejected": -87.88340759277344,
"logps/rejected": -89.93093872070312,
"loss": 1.2081,
"margin_dpo/margin_mean": 0.5302802920341492,
"margin_dpo/margin_std": 1.1925766468048096,
"step": 105
},
{
"epoch": 0.1602418745275888,
"fcm_dpo/beta": 0.7372579574584961,
"fcm_dpo/delta": -0.013026438653469086,
"fcm_dpo/margin": 0.5590275526046753,
"fcm_dpo/q_t": 0.4098079204559326,
"grad_norm": 204.2117919921875,
"learning_rate": 4.949679871846857e-07,
"logits/chosen": 0.15588057041168213,
"logits/rejected": 0.14248088002204895,
"logps/chosen": -63.92448806762695,
"logps/ref_chosen": -62.45933151245117,
"logps/ref_rejected": -67.00595092773438,
"logps/rejected": -69.03013610839844,
"loss": 1.174,
"margin_dpo/margin_mean": 0.5590271949768066,
"margin_dpo/margin_std": 1.1391665935516357,
"step": 106
},
{
"epoch": 0.1617535903250189,
"fcm_dpo/beta": 0.7442134618759155,
"fcm_dpo/delta": 0.10780694335699081,
"fcm_dpo/margin": 0.3956339359283447,
"fcm_dpo/q_t": 0.4418404996395111,
"grad_norm": 259.65618896484375,
"learning_rate": 4.947006115536947e-07,
"logits/chosen": 0.09667301923036575,
"logits/rejected": 0.07568878680467606,
"logps/chosen": -77.47236633300781,
"logps/ref_chosen": -75.83796691894531,
"logps/ref_rejected": -87.74038696289062,
"logps/rejected": -89.7704086303711,
"loss": 1.3412,
"margin_dpo/margin_mean": 0.39563363790512085,
"margin_dpo/margin_std": 1.3363198041915894,
"step": 107
},
{
"epoch": 0.16326530612244897,
"fcm_dpo/beta": 0.7422864437103271,
"fcm_dpo/delta": -0.15637360513210297,
"fcm_dpo/margin": 0.7383235692977905,
"fcm_dpo/q_t": 0.38797321915626526,
"grad_norm": 188.6773223876953,
"learning_rate": 4.944263911205772e-07,
"logits/chosen": 0.0897904634475708,
"logits/rejected": 0.06220732629299164,
"logps/chosen": -69.81796264648438,
"logps/ref_chosen": -68.39323425292969,
"logps/ref_rejected": -83.24267578125,
"logps/rejected": -85.40571594238281,
"loss": 1.1084,
"margin_dpo/margin_mean": 0.738323450088501,
"margin_dpo/margin_std": 1.277268886566162,
"step": 108
},
{
"epoch": 0.16477702191987906,
"fcm_dpo/beta": 0.7077078819274902,
"fcm_dpo/delta": -0.24241167306900024,
"fcm_dpo/margin": 0.8866573572158813,
"fcm_dpo/q_t": 0.371415376663208,
"grad_norm": 157.52822875976562,
"learning_rate": 4.941453335558681e-07,
"logits/chosen": 0.10934356600046158,
"logits/rejected": 0.059307027608156204,
"logps/chosen": -56.932743072509766,
"logps/ref_chosen": -55.52748107910156,
"logps/ref_rejected": -83.55218505859375,
"logps/rejected": -85.84410858154297,
"loss": 1.0002,
"margin_dpo/margin_mean": 0.8866567611694336,
"margin_dpo/margin_std": 1.2174742221832275,
"step": 109
},
{
"epoch": 0.16628873771730915,
"fcm_dpo/beta": 0.7073754072189331,
"fcm_dpo/delta": 0.05803888291120529,
"fcm_dpo/margin": 0.48516157269477844,
"fcm_dpo/q_t": 0.428050696849823,
"grad_norm": 221.89695739746094,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 0.08555251359939575,
"logits/rejected": 0.09368757903575897,
"logps/chosen": -82.78941345214844,
"logps/ref_chosen": -81.15874481201172,
"logps/ref_rejected": -72.56021118164062,
"logps/rejected": -74.67604064941406,
"loss": 1.2345,
"margin_dpo/margin_mean": 0.4851612150669098,
"margin_dpo/margin_std": 1.202465295791626,
"step": 110
},
{
"epoch": 0.16780045351473924,
"fcm_dpo/beta": 0.707313597202301,
"fcm_dpo/delta": -0.029390130192041397,
"fcm_dpo/margin": 0.6044574975967407,
"fcm_dpo/q_t": 0.4066217243671417,
"grad_norm": 188.60508728027344,
"learning_rate": 4.935627386698418e-07,
"logits/chosen": 0.20676471292972565,
"logits/rejected": 0.17049609124660492,
"logps/chosen": -54.14882278442383,
"logps/ref_chosen": -52.358985900878906,
"logps/ref_rejected": -77.06150817871094,
"logps/rejected": -79.45579528808594,
"loss": 1.2042,
"margin_dpo/margin_mean": 0.6044571399688721,
"margin_dpo/margin_std": 1.3020250797271729,
"step": 111
},
{
"epoch": 0.1693121693121693,
"fcm_dpo/beta": 0.6883267164230347,
"fcm_dpo/delta": -0.13100209832191467,
"fcm_dpo/margin": 0.7614709734916687,
"fcm_dpo/q_t": 0.3860092759132385,
"grad_norm": 189.34796142578125,
"learning_rate": 4.932612176449559e-07,
"logits/chosen": 0.10900241881608963,
"logits/rejected": 0.048736996948719025,
"logps/chosen": -64.50396728515625,
"logps/ref_chosen": -63.02006530761719,
"logps/ref_rejected": -111.36941528320312,
"logps/rejected": -113.61478424072266,
"loss": 1.1028,
"margin_dpo/margin_mean": 0.7614700198173523,
"margin_dpo/margin_std": 1.2644274234771729,
"step": 112
},
{
"epoch": 0.1708238851095994,
"fcm_dpo/beta": 0.6883626580238342,
"fcm_dpo/delta": -0.01845688372850418,
"fcm_dpo/margin": 0.6044442057609558,
"fcm_dpo/q_t": 0.4099445939064026,
"grad_norm": 201.9014434814453,
"learning_rate": 4.929528920808854e-07,
"logits/chosen": 0.13628755509853363,
"logits/rejected": 0.0992526188492775,
"logps/chosen": -57.55316925048828,
"logps/ref_chosen": -55.80766296386719,
"logps/ref_rejected": -69.84014129638672,
"logps/rejected": -72.19009399414062,
"loss": 1.2038,
"margin_dpo/margin_mean": 0.6044440269470215,
"margin_dpo/margin_std": 1.3071109056472778,
"step": 113
},
{
"epoch": 0.17233560090702948,
"fcm_dpo/beta": 0.6465242505073547,
"fcm_dpo/delta": -0.31181034445762634,
"fcm_dpo/margin": 1.0640312433242798,
"fcm_dpo/q_t": 0.3557575047016144,
"grad_norm": 138.01580810546875,
"learning_rate": 4.92637770602159e-07,
"logits/chosen": 0.14081577956676483,
"logits/rejected": 0.08423489332199097,
"logps/chosen": -67.75685119628906,
"logps/ref_chosen": -66.33277130126953,
"logps/ref_rejected": -71.61489868164062,
"logps/rejected": -74.1030044555664,
"loss": 0.9686,
"margin_dpo/margin_mean": 1.0640311241149902,
"margin_dpo/margin_std": 1.3256173133850098,
"step": 114
},
{
"epoch": 0.17384731670445955,
"fcm_dpo/beta": 0.6344318985939026,
"fcm_dpo/delta": -0.0754128247499466,
"fcm_dpo/margin": 0.7438329458236694,
"fcm_dpo/q_t": 0.4032962918281555,
"grad_norm": 169.448974609375,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": 0.14370641112327576,
"logits/rejected": 0.08639145642518997,
"logps/chosen": -57.44829559326172,
"logps/ref_chosen": -55.74903869628906,
"logps/ref_rejected": -79.59849548339844,
"logps/rejected": -82.04158782958984,
"loss": 1.1079,
"margin_dpo/margin_mean": 0.7438331842422485,
"margin_dpo/margin_std": 1.2644445896148682,
"step": 115
},
{
"epoch": 0.17535903250188964,
"fcm_dpo/beta": 0.6176035404205322,
"fcm_dpo/delta": -0.15088674426078796,
"fcm_dpo/margin": 0.8790351152420044,
"fcm_dpo/q_t": 0.38145536184310913,
"grad_norm": 134.537841796875,
"learning_rate": 4.91987175349089e-07,
"logits/chosen": 0.15729355812072754,
"logits/rejected": 0.09324823319911957,
"logps/chosen": -50.96516418457031,
"logps/ref_chosen": -49.36516571044922,
"logps/ref_rejected": -72.84671020507812,
"logps/rejected": -75.32572937011719,
"loss": 1.0332,
"margin_dpo/margin_mean": 0.879035472869873,
"margin_dpo/margin_std": 1.1753203868865967,
"step": 116
},
{
"epoch": 0.17687074829931973,
"fcm_dpo/beta": 0.5997291803359985,
"fcm_dpo/delta": -0.06563954800367355,
"fcm_dpo/margin": 0.7671282291412354,
"fcm_dpo/q_t": 0.3957594633102417,
"grad_norm": 144.81961059570312,
"learning_rate": 4.916517197732933e-07,
"logits/chosen": 0.13732296228408813,
"logits/rejected": 0.10388919711112976,
"logps/chosen": -59.18053436279297,
"logps/ref_chosen": -57.710899353027344,
"logps/ref_rejected": -69.77253723144531,
"logps/rejected": -72.00930786132812,
"loss": 1.1199,
"margin_dpo/margin_mean": 0.767128586769104,
"margin_dpo/margin_std": 1.2783540487289429,
"step": 117
},
{
"epoch": 0.17838246409674982,
"fcm_dpo/beta": 0.5893198847770691,
"fcm_dpo/delta": -0.13144376873970032,
"fcm_dpo/margin": 0.8884799480438232,
"fcm_dpo/q_t": 0.3849080204963684,
"grad_norm": 144.99359130859375,
"learning_rate": 4.913095046794281e-07,
"logits/chosen": 0.20904436707496643,
"logits/rejected": 0.1704784482717514,
"logps/chosen": -53.963443756103516,
"logps/ref_chosen": -52.479896545410156,
"logps/ref_rejected": -81.359130859375,
"logps/rejected": -83.73114776611328,
"loss": 1.0523,
"margin_dpo/margin_mean": 0.8884795904159546,
"margin_dpo/margin_std": 1.2354551553726196,
"step": 118
},
{
"epoch": 0.17989417989417988,
"fcm_dpo/beta": 0.5922322869300842,
"fcm_dpo/delta": 0.035253897309303284,
"fcm_dpo/margin": 0.6178330183029175,
"fcm_dpo/q_t": 0.4204210042953491,
"grad_norm": 160.6442413330078,
"learning_rate": 4.909605396399855e-07,
"logits/chosen": 0.12867693603038788,
"logits/rejected": 0.09313994646072388,
"logps/chosen": -63.268272399902344,
"logps/ref_chosen": -61.35767364501953,
"logps/ref_rejected": -75.71510314941406,
"logps/rejected": -78.2435302734375,
"loss": 1.2274,
"margin_dpo/margin_mean": 0.6178329586982727,
"margin_dpo/margin_std": 1.4659829139709473,
"step": 119
},
{
"epoch": 0.18140589569160998,
"fcm_dpo/beta": 0.5799944400787354,
"fcm_dpo/delta": -0.13340742886066437,
"fcm_dpo/margin": 0.9073317050933838,
"fcm_dpo/q_t": 0.38455823063850403,
"grad_norm": 136.73265075683594,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 0.1341107040643692,
"logits/rejected": 0.07828693091869354,
"logps/chosen": -61.49884796142578,
"logps/ref_chosen": -59.907569885253906,
"logps/ref_rejected": -79.6910629272461,
"logps/rejected": -82.1896743774414,
"loss": 1.0331,
"margin_dpo/margin_mean": 0.9073318243026733,
"margin_dpo/margin_std": 1.1959114074707031,
"step": 120
},
{
"epoch": 0.18291761148904007,
"fcm_dpo/beta": 0.5721194744110107,
"fcm_dpo/delta": -0.04547984525561333,
"fcm_dpo/margin": 0.7750140428543091,
"fcm_dpo/q_t": 0.40316978096961975,
"grad_norm": 131.84844970703125,
"learning_rate": 4.902423989581143e-07,
"logits/chosen": 0.23460043966770172,
"logits/rejected": 0.15458913147449493,
"logps/chosen": -57.46360778808594,
"logps/ref_chosen": -55.66604232788086,
"logps/ref_rejected": -101.56233978271484,
"logps/rejected": -104.13491821289062,
"loss": 1.0973,
"margin_dpo/margin_mean": 0.7750145792961121,
"margin_dpo/margin_std": 1.2166812419891357,
"step": 121
},
{
"epoch": 0.18442932728647016,
"fcm_dpo/beta": 0.5550453066825867,
"fcm_dpo/delta": -0.2271803468465805,
"fcm_dpo/margin": 1.1058701276779175,
"fcm_dpo/q_t": 0.3700582981109619,
"grad_norm": 148.10528564453125,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": 0.14437180757522583,
"logits/rejected": 0.11171398311853409,
"logps/chosen": -65.11761474609375,
"logps/ref_chosen": -63.334373474121094,
"logps/ref_rejected": -73.67523193359375,
"logps/rejected": -76.5643539428711,
"loss": 1.0191,
"margin_dpo/margin_mean": 1.1058697700500488,
"margin_dpo/margin_std": 1.5541871786117554,
"step": 122
},
{
"epoch": 0.18594104308390022,
"fcm_dpo/beta": 0.5463725328445435,
"fcm_dpo/delta": -0.061007022857666016,
"fcm_dpo/margin": 0.836315929889679,
"fcm_dpo/q_t": 0.39866113662719727,
"grad_norm": 136.9763946533203,
"learning_rate": 4.894973780788722e-07,
"logits/chosen": 0.1658335030078888,
"logits/rejected": 0.12649542093276978,
"logps/chosen": -58.5877799987793,
"logps/ref_chosen": -56.89874267578125,
"logps/ref_rejected": -78.97028350830078,
"logps/rejected": -81.49563598632812,
"loss": 1.1202,
"margin_dpo/margin_mean": 0.8363161087036133,
"margin_dpo/margin_std": 1.388123631477356,
"step": 123
},
{
"epoch": 0.1874527588813303,
"fcm_dpo/beta": 0.5198322534561157,
"fcm_dpo/delta": -0.2163441777229309,
"fcm_dpo/margin": 1.1578905582427979,
"fcm_dpo/q_t": 0.3664587140083313,
"grad_norm": 113.58149719238281,
"learning_rate": 4.89114813497619e-07,
"logits/chosen": 0.18245506286621094,
"logits/rejected": 0.12739111483097076,
"logps/chosen": -58.94036102294922,
"logps/ref_chosen": -57.116085052490234,
"logps/ref_rejected": -87.93074035644531,
"logps/rejected": -90.91291046142578,
"loss": 0.9841,
"margin_dpo/margin_mean": 1.15788996219635,
"margin_dpo/margin_std": 1.3284329175949097,
"step": 124
},
{
"epoch": 0.1889644746787604,
"fcm_dpo/beta": 0.5057904720306396,
"fcm_dpo/delta": -0.11596319824457169,
"fcm_dpo/margin": 1.007077693939209,
"fcm_dpo/q_t": 0.3886137008666992,
"grad_norm": 125.64469909667969,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": 0.1723092794418335,
"logits/rejected": 0.11974655091762543,
"logps/chosen": -67.64098358154297,
"logps/ref_chosen": -65.7061767578125,
"logps/ref_rejected": -91.72711944580078,
"logps/rejected": -94.66900634765625,
"loss": 1.0564,
"margin_dpo/margin_mean": 1.0070770978927612,
"margin_dpo/margin_std": 1.4230128526687622,
"step": 125
},
{
"epoch": 0.19047619047619047,
"fcm_dpo/beta": 0.5007922649383545,
"fcm_dpo/delta": -0.04253540188074112,
"fcm_dpo/margin": 0.8797393441200256,
"fcm_dpo/q_t": 0.41190239787101746,
"grad_norm": 117.30374908447266,
"learning_rate": 4.883296295573176e-07,
"logits/chosen": 0.020032621920108795,
"logits/rejected": 0.01394350454211235,
"logps/chosen": -69.92984771728516,
"logps/ref_chosen": -68.17608642578125,
"logps/ref_rejected": -65.1175537109375,
"logps/rejected": -67.75105285644531,
"loss": 1.1541,
"margin_dpo/margin_mean": 0.8797396421432495,
"margin_dpo/margin_std": 1.8252670764923096,
"step": 126
},
{
"epoch": 0.19198790627362056,
"fcm_dpo/beta": 0.48407071828842163,
"fcm_dpo/delta": -0.1724184900522232,
"fcm_dpo/margin": 1.157707691192627,
"fcm_dpo/q_t": 0.3734918236732483,
"grad_norm": 111.56476593017578,
"learning_rate": 4.87927032161552e-07,
"logits/chosen": 0.10429647564888,
"logits/rejected": 0.07567700743675232,
"logps/chosen": -63.79847717285156,
"logps/ref_chosen": -61.88023376464844,
"logps/ref_rejected": -68.46012878417969,
"logps/rejected": -71.53607177734375,
"loss": 0.9958,
"margin_dpo/margin_mean": 1.1577074527740479,
"margin_dpo/margin_std": 1.2988379001617432,
"step": 127
},
{
"epoch": 0.19349962207105065,
"fcm_dpo/beta": 0.4786633849143982,
"fcm_dpo/delta": -0.03551746904850006,
"fcm_dpo/margin": 0.9054340124130249,
"fcm_dpo/q_t": 0.4062952995300293,
"grad_norm": 124.49673461914062,
"learning_rate": 4.875177794352363e-07,
"logits/chosen": 0.12086290121078491,
"logits/rejected": 0.07124130427837372,
"logps/chosen": -68.7914810180664,
"logps/ref_chosen": -66.708984375,
"logps/ref_rejected": -94.97969055175781,
"logps/rejected": -97.9676284790039,
"loss": 1.1534,
"margin_dpo/margin_mean": 0.9054335355758667,
"margin_dpo/margin_std": 1.7206192016601562,
"step": 128
},
{
"epoch": 0.19501133786848074,
"fcm_dpo/beta": 0.4809413254261017,
"fcm_dpo/delta": 0.016113094985485077,
"fcm_dpo/margin": 0.7994933128356934,
"fcm_dpo/q_t": 0.4160780906677246,
"grad_norm": 132.9231414794922,
"learning_rate": 4.871018828260491e-07,
"logits/chosen": 0.1366616189479828,
"logits/rejected": 0.12947385013103485,
"logps/chosen": -67.60844421386719,
"logps/ref_chosen": -65.33882904052734,
"logps/ref_rejected": -68.06109619140625,
"logps/rejected": -71.13020324707031,
"loss": 1.156,
"margin_dpo/margin_mean": 0.7994937896728516,
"margin_dpo/margin_std": 1.5029575824737549,
"step": 129
},
{
"epoch": 0.1965230536659108,
"fcm_dpo/beta": 0.47079285979270935,
"fcm_dpo/delta": -0.08745460212230682,
"fcm_dpo/margin": 1.020914912223816,
"fcm_dpo/q_t": 0.3913637697696686,
"grad_norm": 128.69020080566406,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 0.08387620002031326,
"logits/rejected": 0.04010556638240814,
"logps/chosen": -60.71220016479492,
"logps/ref_chosen": -58.660743713378906,
"logps/ref_rejected": -79.24510192871094,
"logps/rejected": -82.31746673583984,
"loss": 1.0497,
"margin_dpo/margin_mean": 1.020914912223816,
"margin_dpo/margin_std": 1.254804253578186,
"step": 130
},
{
"epoch": 0.1980347694633409,
"fcm_dpo/beta": 0.4654075503349304,
"fcm_dpo/delta": -0.12034067511558533,
"fcm_dpo/margin": 1.1049726009368896,
"fcm_dpo/q_t": 0.3882097601890564,
"grad_norm": 114.07759094238281,
"learning_rate": 4.86250204678667e-07,
"logits/chosen": 0.12750566005706787,
"logits/rejected": 0.06975096464157104,
"logps/chosen": -54.52845764160156,
"logps/ref_chosen": -52.51453399658203,
"logps/ref_rejected": -85.18299865722656,
"logps/rejected": -88.30189514160156,
"loss": 1.0891,
"margin_dpo/margin_mean": 1.1049723625183105,
"margin_dpo/margin_std": 1.7650415897369385,
"step": 131
},
{
"epoch": 0.19954648526077098,
"fcm_dpo/beta": 0.4587087631225586,
"fcm_dpo/delta": -0.04479576647281647,
"fcm_dpo/margin": 0.9653003215789795,
"fcm_dpo/q_t": 0.4012880325317383,
"grad_norm": 122.89605712890625,
"learning_rate": 4.858144469637408e-07,
"logits/chosen": 0.21066978573799133,
"logits/rejected": 0.1797289103269577,
"logps/chosen": -67.94572448730469,
"logps/ref_chosen": -65.68513488769531,
"logps/ref_rejected": -69.54120635986328,
"logps/rejected": -72.76710510253906,
"loss": 1.1302,
"margin_dpo/margin_mean": 0.9653001427650452,
"margin_dpo/margin_std": 1.7105207443237305,
"step": 132
},
{
"epoch": 0.20105820105820105,
"fcm_dpo/beta": 0.4549636244773865,
"fcm_dpo/delta": -0.036287300288677216,
"fcm_dpo/margin": 0.9553782939910889,
"fcm_dpo/q_t": 0.4041925072669983,
"grad_norm": 119.6462173461914,
"learning_rate": 4.853720930118138e-07,
"logits/chosen": 0.1289050132036209,
"logits/rejected": 0.11935198307037354,
"logps/chosen": -65.79443359375,
"logps/ref_chosen": -63.598114013671875,
"logps/ref_rejected": -73.72798156738281,
"logps/rejected": -76.87968444824219,
"loss": 1.1168,
"margin_dpo/margin_mean": 0.955377995967865,
"margin_dpo/margin_std": 1.588864803314209,
"step": 133
},
{
"epoch": 0.20256991685563114,
"fcm_dpo/beta": 0.4380050301551819,
"fcm_dpo/delta": -0.21895548701286316,
"fcm_dpo/margin": 1.3798502683639526,
"fcm_dpo/q_t": 0.3665582835674286,
"grad_norm": 100.31354522705078,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": 0.2302895486354828,
"logits/rejected": 0.17770959436893463,
"logps/chosen": -55.90137481689453,
"logps/ref_chosen": -53.79457092285156,
"logps/ref_rejected": -74.16741943359375,
"logps/rejected": -77.65406799316406,
"loss": 0.9906,
"margin_dpo/margin_mean": 1.3798508644104004,
"margin_dpo/margin_std": 1.6743557453155518,
"step": 134
},
{
"epoch": 0.20408163265306123,
"fcm_dpo/beta": 0.436504989862442,
"fcm_dpo/delta": 0.015902848914265633,
"fcm_dpo/margin": 0.8812652230262756,
"fcm_dpo/q_t": 0.41663700342178345,
"grad_norm": 100.42858123779297,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": 0.16259633004665375,
"logits/rejected": 0.13083291053771973,
"logps/chosen": -51.5870475769043,
"logps/ref_chosen": -49.441078186035156,
"logps/ref_rejected": -65.96878051757812,
"logps/rejected": -68.99601745605469,
"loss": 1.1571,
"margin_dpo/margin_mean": 0.8812651634216309,
"margin_dpo/margin_std": 1.672384262084961,
"step": 135
},
{
"epoch": 0.20559334845049132,
"fcm_dpo/beta": 0.4303373098373413,
"fcm_dpo/delta": -0.09148843586444855,
"fcm_dpo/margin": 1.1316370964050293,
"fcm_dpo/q_t": 0.40487587451934814,
"grad_norm": 126.06951141357422,
"learning_rate": 4.840055783904106e-07,
"logits/chosen": 0.12648090720176697,
"logits/rejected": 0.06315163522958755,
"logps/chosen": -69.26954650878906,
"logps/ref_chosen": -66.75926208496094,
"logps/ref_rejected": -94.61787414550781,
"logps/rejected": -98.25979614257812,
"loss": 1.1621,
"margin_dpo/margin_mean": 1.1316382884979248,
"margin_dpo/margin_std": 2.3219375610351562,
"step": 136
},
{
"epoch": 0.20710506424792138,
"fcm_dpo/beta": 0.4227127432823181,
"fcm_dpo/delta": -0.10718655586242676,
"fcm_dpo/margin": 1.1874181032180786,
"fcm_dpo/q_t": 0.3915935456752777,
"grad_norm": 96.57810974121094,
"learning_rate": 4.835369650662767e-07,
"logits/chosen": 0.1805136352777481,
"logits/rejected": 0.15341074764728546,
"logps/chosen": -59.14149856567383,
"logps/ref_chosen": -56.78379821777344,
"logps/ref_rejected": -69.89952087402344,
"logps/rejected": -73.44464111328125,
"loss": 1.0832,
"margin_dpo/margin_mean": 1.1874182224273682,
"margin_dpo/margin_std": 1.862781047821045,
"step": 137
},
{
"epoch": 0.20861678004535147,
"fcm_dpo/beta": 0.4196794033050537,
"fcm_dpo/delta": 0.003834410570561886,
"fcm_dpo/margin": 0.9443340301513672,
"fcm_dpo/q_t": 0.41209620237350464,
"grad_norm": 105.36014556884766,
"learning_rate": 4.830618192112065e-07,
"logits/chosen": 0.15982270240783691,
"logits/rejected": 0.12580767273902893,
"logps/chosen": -61.54125213623047,
"logps/ref_chosen": -58.766014099121094,
"logps/ref_rejected": -68.12371826171875,
"logps/rejected": -71.84329223632812,
"loss": 1.1579,
"margin_dpo/margin_mean": 0.9443341493606567,
"margin_dpo/margin_std": 1.7914925813674927,
"step": 138
},
{
"epoch": 0.21012849584278157,
"fcm_dpo/beta": 0.4146318733692169,
"fcm_dpo/delta": -0.1286012828350067,
"fcm_dpo/margin": 1.2585363388061523,
"fcm_dpo/q_t": 0.3855533003807068,
"grad_norm": 105.36235809326172,
"learning_rate": 4.825801541160509e-07,
"logits/chosen": 0.12006682902574539,
"logits/rejected": 0.0935576856136322,
"logps/chosen": -73.93391418457031,
"logps/ref_chosen": -71.2255859375,
"logps/ref_rejected": -82.1834716796875,
"logps/rejected": -86.15032958984375,
"loss": 1.0427,
"margin_dpo/margin_mean": 1.2585363388061523,
"margin_dpo/margin_std": 1.6681911945343018,
"step": 139
},
{
"epoch": 0.21164021164021163,
"fcm_dpo/beta": 0.39565181732177734,
"fcm_dpo/delta": -0.22462356090545654,
"fcm_dpo/margin": 1.5439527034759521,
"fcm_dpo/q_t": 0.36816489696502686,
"grad_norm": 110.28506469726562,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 0.13060888648033142,
"logits/rejected": 0.08857063204050064,
"logps/chosen": -65.80934143066406,
"logps/ref_chosen": -63.27766418457031,
"logps/ref_rejected": -83.30647277832031,
"logps/rejected": -87.3821029663086,
"loss": 1.059,
"margin_dpo/margin_mean": 1.5439517498016357,
"margin_dpo/margin_std": 2.388321876525879,
"step": 140
},
{
"epoch": 0.21315192743764172,
"fcm_dpo/beta": 0.3803204894065857,
"fcm_dpo/delta": -0.143003448843956,
"fcm_dpo/margin": 1.4029268026351929,
"fcm_dpo/q_t": 0.3864386975765228,
"grad_norm": 98.1742935180664,
"learning_rate": 4.815973202802966e-07,
"logits/chosen": 0.17304059863090515,
"logits/rejected": 0.13255923986434937,
"logps/chosen": -64.45763397216797,
"logps/ref_chosen": -61.76676940917969,
"logps/ref_rejected": -88.60601806640625,
"logps/rejected": -92.69981384277344,
"loss": 1.0631,
"margin_dpo/margin_mean": 1.4029275178909302,
"margin_dpo/margin_std": 2.0814921855926514,
"step": 141
},
{
"epoch": 0.2146636432350718,
"fcm_dpo/beta": 0.3807763159275055,
"fcm_dpo/delta": 0.0015191948041319847,
"fcm_dpo/margin": 1.0465271472930908,
"fcm_dpo/q_t": 0.41167542338371277,
"grad_norm": 96.54689025878906,
"learning_rate": 4.810961790316729e-07,
"logits/chosen": 0.15406419336795807,
"logits/rejected": 0.13056586682796478,
"logps/chosen": -67.98941802978516,
"logps/ref_chosen": -65.2747802734375,
"logps/ref_rejected": -81.1378173828125,
"logps/rejected": -84.89898681640625,
"loss": 1.1335,
"margin_dpo/margin_mean": 1.0465269088745117,
"margin_dpo/margin_std": 1.7925764322280884,
"step": 142
},
{
"epoch": 0.2161753590325019,
"fcm_dpo/beta": 0.37847280502319336,
"fcm_dpo/delta": -0.016717037186026573,
"fcm_dpo/margin": 1.0988901853561401,
"fcm_dpo/q_t": 0.4074873626232147,
"grad_norm": 112.29006958007812,
"learning_rate": 4.805885735261454e-07,
"logits/chosen": 0.16635353863239288,
"logits/rejected": 0.15071120858192444,
"logps/chosen": -65.32980346679688,
"logps/ref_chosen": -62.617828369140625,
"logps/ref_rejected": -70.39239501953125,
"logps/rejected": -74.20326232910156,
"loss": 1.1593,
"margin_dpo/margin_mean": 1.0988903045654297,
"margin_dpo/margin_std": 2.125783681869507,
"step": 143
},
{
"epoch": 0.21768707482993196,
"fcm_dpo/beta": 0.3751413822174072,
"fcm_dpo/delta": -0.06344564259052277,
"fcm_dpo/margin": 1.227471113204956,
"fcm_dpo/q_t": 0.3995649814605713,
"grad_norm": 101.40666198730469,
"learning_rate": 4.800745179625307e-07,
"logits/chosen": 0.15766915678977966,
"logits/rejected": 0.13041952252388,
"logps/chosen": -63.764076232910156,
"logps/ref_chosen": -60.80268859863281,
"logps/ref_rejected": -79.07284545898438,
"logps/rejected": -83.26170349121094,
"loss": 1.1134,
"margin_dpo/margin_mean": 1.2274713516235352,
"margin_dpo/margin_std": 2.0769271850585938,
"step": 144
},
{
"epoch": 0.21919879062736206,
"fcm_dpo/beta": 0.3703409433364868,
"fcm_dpo/delta": -0.06774787604808807,
"fcm_dpo/margin": 1.2544803619384766,
"fcm_dpo/q_t": 0.4007543623447418,
"grad_norm": 111.68207550048828,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": 0.11246728897094727,
"logits/rejected": 0.12925654649734497,
"logps/chosen": -77.45028686523438,
"logps/ref_chosen": -74.61146545410156,
"logps/ref_rejected": -83.24461364746094,
"logps/rejected": -87.3379135131836,
"loss": 1.1278,
"margin_dpo/margin_mean": 1.2544798851013184,
"margin_dpo/margin_std": 2.258026599884033,
"step": 145
},
{
"epoch": 0.22071050642479215,
"fcm_dpo/beta": 0.3645484447479248,
"fcm_dpo/delta": -0.09541298449039459,
"fcm_dpo/margin": 1.3463966846466064,
"fcm_dpo/q_t": 0.3941432237625122,
"grad_norm": 92.99526977539062,
"learning_rate": 4.790271143580173e-07,
"logits/chosen": 0.1032009869813919,
"logits/rejected": 0.0877787172794342,
"logps/chosen": -60.48515701293945,
"logps/ref_chosen": -57.84098434448242,
"logps/ref_rejected": -67.47422790527344,
"logps/rejected": -71.46479034423828,
"loss": 1.082,
"margin_dpo/margin_mean": 1.3463963270187378,
"margin_dpo/margin_std": 2.091710090637207,
"step": 146
},
{
"epoch": 0.2222222222222222,
"fcm_dpo/beta": 0.36428195238113403,
"fcm_dpo/delta": 0.020590590313076973,
"fcm_dpo/margin": 1.0431098937988281,
"fcm_dpo/q_t": 0.41619163751602173,
"grad_norm": 116.23778533935547,
"learning_rate": 4.784937956152489e-07,
"logits/chosen": 0.13601523637771606,
"logits/rejected": 0.09667371958494186,
"logps/chosen": -69.94081115722656,
"logps/ref_chosen": -66.81346893310547,
"logps/ref_rejected": -81.1796875,
"logps/rejected": -85.35014343261719,
"loss": 1.1814,
"margin_dpo/margin_mean": 1.0431100130081177,
"margin_dpo/margin_std": 2.160703659057617,
"step": 147
},
{
"epoch": 0.2237339380196523,
"fcm_dpo/beta": 0.3580806255340576,
"fcm_dpo/delta": -0.1336955428123474,
"fcm_dpo/margin": 1.4712722301483154,
"fcm_dpo/q_t": 0.3866221308708191,
"grad_norm": 77.85800170898438,
"learning_rate": 4.779540854098347e-07,
"logits/chosen": 0.23744139075279236,
"logits/rejected": 0.1707296371459961,
"logps/chosen": -51.66542053222656,
"logps/ref_chosen": -48.6877555847168,
"logps/ref_rejected": -67.50503540039062,
"logps/rejected": -71.95396423339844,
"loss": 1.07,
"margin_dpo/margin_mean": 1.4712722301483154,
"margin_dpo/margin_std": 2.22235369682312,
"step": 148
},
{
"epoch": 0.2252456538170824,
"fcm_dpo/beta": 0.3439862132072449,
"fcm_dpo/delta": -0.19360411167144775,
"fcm_dpo/margin": 1.6928032636642456,
"fcm_dpo/q_t": 0.3755077123641968,
"grad_norm": 79.94820404052734,
"learning_rate": 4.774079988386296e-07,
"logits/chosen": 0.12722453474998474,
"logits/rejected": 0.08169707655906677,
"logps/chosen": -58.75447082519531,
"logps/ref_chosen": -55.143775939941406,
"logps/ref_rejected": -64.79888916015625,
"logps/rejected": -70.10238647460938,
"loss": 1.0219,
"margin_dpo/margin_mean": 1.6928033828735352,
"margin_dpo/margin_std": 2.312622547149658,
"step": 149
},
{
"epoch": 0.22675736961451248,
"fcm_dpo/beta": 0.32747161388397217,
"fcm_dpo/delta": -0.2582157552242279,
"fcm_dpo/margin": 1.9585295915603638,
"fcm_dpo/q_t": 0.35701072216033936,
"grad_norm": 76.83651733398438,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 0.15965455770492554,
"logits/rejected": 0.11961585283279419,
"logps/chosen": -70.1795425415039,
"logps/ref_chosen": -67.47074890136719,
"logps/ref_rejected": -89.21170806884766,
"logps/rejected": -93.8790283203125,
"loss": 0.9533,
"margin_dpo/margin_mean": 1.9585298299789429,
"margin_dpo/margin_std": 2.1194467544555664,
"step": 150
},
{
"epoch": 0.22826908541194255,
"fcm_dpo/beta": 0.3103789687156677,
"fcm_dpo/delta": -0.2853265702724457,
"fcm_dpo/margin": 2.147468328475952,
"fcm_dpo/q_t": 0.3529084324836731,
"grad_norm": 64.65057373046875,
"learning_rate": 4.762967578776406e-07,
"logits/chosen": 0.16163957118988037,
"logits/rejected": 0.11220911145210266,
"logps/chosen": -55.12104034423828,
"logps/ref_chosen": -52.45954132080078,
"logps/ref_rejected": -79.0630111694336,
"logps/rejected": -83.87197875976562,
"loss": 0.9339,
"margin_dpo/margin_mean": 2.147468328475952,
"margin_dpo/margin_std": 2.266123056411743,
"step": 151
},
{
"epoch": 0.22978080120937264,
"fcm_dpo/beta": 0.30170968174934387,
"fcm_dpo/delta": -0.11586311459541321,
"fcm_dpo/margin": 1.69052255153656,
"fcm_dpo/q_t": 0.3897024095058441,
"grad_norm": 74.69120025634766,
"learning_rate": 4.757316345716553e-07,
"logits/chosen": 0.20322634279727936,
"logits/rejected": 0.1573318988084793,
"logps/chosen": -59.99116134643555,
"logps/ref_chosen": -56.5538330078125,
"logps/ref_rejected": -76.55074310302734,
"logps/rejected": -81.6785888671875,
"loss": 1.0747,
"margin_dpo/margin_mean": 1.6905221939086914,
"margin_dpo/margin_std": 2.551938533782959,
"step": 152
},
{
"epoch": 0.23129251700680273,
"fcm_dpo/beta": 0.29183411598205566,
"fcm_dpo/delta": -0.13964568078517914,
"fcm_dpo/margin": 1.8225746154785156,
"fcm_dpo/q_t": 0.37922757863998413,
"grad_norm": 68.27378845214844,
"learning_rate": 4.751601970666064e-07,
"logits/chosen": 0.1238468810915947,
"logits/rejected": 0.08851639926433563,
"logps/chosen": -71.23893737792969,
"logps/ref_chosen": -68.00689697265625,
"logps/ref_rejected": -74.83482360839844,
"logps/rejected": -79.88944244384766,
"loss": 1.01,
"margin_dpo/margin_mean": 1.8225750923156738,
"margin_dpo/margin_std": 2.1173226833343506,
"step": 153
},
{
"epoch": 0.2328042328042328,
"fcm_dpo/beta": 0.29103392362594604,
"fcm_dpo/delta": 0.008851571008563042,
"fcm_dpo/margin": 1.3450570106506348,
"fcm_dpo/q_t": 0.4131838083267212,
"grad_norm": 72.07164001464844,
"learning_rate": 4.745824613468292e-07,
"logits/chosen": 0.22070343792438507,
"logits/rejected": 0.21719685196876526,
"logps/chosen": -63.0831413269043,
"logps/ref_chosen": -59.222537994384766,
"logps/ref_rejected": -64.19131469726562,
"logps/rejected": -69.39697265625,
"loss": 1.178,
"margin_dpo/margin_mean": 1.3450572490692139,
"margin_dpo/margin_std": 2.7515459060668945,
"step": 154
},
{
"epoch": 0.23431594860166288,
"fcm_dpo/beta": 0.2854927182197571,
"fcm_dpo/delta": -0.09956204891204834,
"fcm_dpo/margin": 1.7304502725601196,
"fcm_dpo/q_t": 0.3943309783935547,
"grad_norm": 74.5228500366211,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": 0.20225617289543152,
"logits/rejected": 0.18362796306610107,
"logps/chosen": -72.20230102539062,
"logps/ref_chosen": -68.45469665527344,
"logps/ref_rejected": -77.91763305664062,
"logps/rejected": -83.39569091796875,
"loss": 1.1098,
"margin_dpo/margin_mean": 1.73045015335083,
"margin_dpo/margin_std": 2.9399399757385254,
"step": 155
},
{
"epoch": 0.23582766439909297,
"fcm_dpo/beta": 0.27768462896347046,
"fcm_dpo/delta": -0.1894133985042572,
"fcm_dpo/margin": 2.0840401649475098,
"fcm_dpo/q_t": 0.3746188282966614,
"grad_norm": 72.98139953613281,
"learning_rate": 4.7340816008085305e-07,
"logits/chosen": 0.14099450409412384,
"logits/rejected": 0.10012276470661163,
"logps/chosen": -71.08142852783203,
"logps/ref_chosen": -67.26959991455078,
"logps/ref_rejected": -86.95914459228516,
"logps/rejected": -92.85501098632812,
"loss": 1.0051,
"margin_dpo/margin_mean": 2.084041118621826,
"margin_dpo/margin_std": 2.6175365447998047,
"step": 156
},
{
"epoch": 0.23733938019652306,
"fcm_dpo/beta": 0.2693888545036316,
"fcm_dpo/delta": -0.06476490199565887,
"fcm_dpo/margin": 1.7060657739639282,
"fcm_dpo/q_t": 0.39782166481018066,
"grad_norm": 64.5011215209961,
"learning_rate": 4.728116273823847e-07,
"logits/chosen": 0.1516086757183075,
"logits/rejected": 0.13197970390319824,
"logps/chosen": -58.35748291015625,
"logps/ref_chosen": -54.77287292480469,
"logps/ref_rejected": -63.87866973876953,
"logps/rejected": -69.16934204101562,
"loss": 1.0873,
"margin_dpo/margin_mean": 1.7060656547546387,
"margin_dpo/margin_std": 2.5072410106658936,
"step": 157
},
{
"epoch": 0.23885109599395313,
"fcm_dpo/beta": 0.2680598497390747,
"fcm_dpo/delta": -0.061768539249897,
"fcm_dpo/margin": 1.7115424871444702,
"fcm_dpo/q_t": 0.397410124540329,
"grad_norm": 68.72582244873047,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": 0.19212478399276733,
"logits/rejected": 0.1591942310333252,
"logps/chosen": -68.83773040771484,
"logps/ref_chosen": -64.92271423339844,
"logps/ref_rejected": -82.23789978027344,
"logps/rejected": -87.86445617675781,
"loss": 1.0808,
"margin_dpo/margin_mean": 1.7115428447723389,
"margin_dpo/margin_std": 2.4922690391540527,
"step": 158
},
{
"epoch": 0.24036281179138322,
"fcm_dpo/beta": 0.26532524824142456,
"fcm_dpo/delta": -0.10670725256204605,
"fcm_dpo/margin": 1.888586401939392,
"fcm_dpo/q_t": 0.38828045129776,
"grad_norm": 75.0799560546875,
"learning_rate": 4.715998812855304e-07,
"logits/chosen": 0.19417089223861694,
"logits/rejected": 0.1603868454694748,
"logps/chosen": -61.159812927246094,
"logps/ref_chosen": -57.046993255615234,
"logps/ref_rejected": -73.32441711425781,
"logps/rejected": -79.32582092285156,
"loss": 1.0816,
"margin_dpo/margin_mean": 1.8885865211486816,
"margin_dpo/margin_std": 2.8942010402679443,
"step": 159
},
{
"epoch": 0.2418745275888133,
"fcm_dpo/beta": 0.25893402099609375,
"fcm_dpo/delta": -0.09484230726957321,
"fcm_dpo/margin": 1.8935188055038452,
"fcm_dpo/q_t": 0.39680206775665283,
"grad_norm": 60.400577545166016,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": 0.07180037349462509,
"logits/rejected": 0.03388737514615059,
"logps/chosen": -54.395286560058594,
"logps/ref_chosen": -49.806915283203125,
"logps/ref_rejected": -68.3370132446289,
"logps/rejected": -74.81890869140625,
"loss": 1.1,
"margin_dpo/margin_mean": 1.8935189247131348,
"margin_dpo/margin_std": 3.1547999382019043,
"step": 160
},
{
"epoch": 0.24338624338624337,
"fcm_dpo/beta": 0.2536158561706543,
"fcm_dpo/delta": -0.11104996502399445,
"fcm_dpo/margin": 1.9934735298156738,
"fcm_dpo/q_t": 0.3908805251121521,
"grad_norm": 59.25609588623047,
"learning_rate": 4.703633408618955e-07,
"logits/chosen": 0.19538050889968872,
"logits/rejected": 0.15980537235736847,
"logps/chosen": -57.10566329956055,
"logps/ref_chosen": -52.50048828125,
"logps/ref_rejected": -66.04540252685547,
"logps/rejected": -72.64404296875,
"loss": 1.0799,
"margin_dpo/margin_mean": 1.9934736490249634,
"margin_dpo/margin_std": 3.0931880474090576,
"step": 161
},
{
"epoch": 0.24489795918367346,
"fcm_dpo/beta": 0.2403220385313034,
"fcm_dpo/delta": -0.2895709276199341,
"fcm_dpo/margin": 2.785764217376709,
"fcm_dpo/q_t": 0.35252922773361206,
"grad_norm": 58.1529426574707,
"learning_rate": 4.697358159051549e-07,
"logits/chosen": 0.21992871165275574,
"logits/rejected": 0.1757911592721939,
"logps/chosen": -74.54412841796875,
"logps/ref_chosen": -69.46919250488281,
"logps/ref_rejected": -92.00952911376953,
"logps/rejected": -99.87022399902344,
"loss": 0.9441,
"margin_dpo/margin_mean": 2.7857649326324463,
"margin_dpo/margin_std": 3.090634822845459,
"step": 162
},
{
"epoch": 0.24640967498110355,
"fcm_dpo/beta": 0.23066341876983643,
"fcm_dpo/delta": -0.25370314717292786,
"fcm_dpo/margin": 2.7662220001220703,
"fcm_dpo/q_t": 0.3620498776435852,
"grad_norm": 55.00440216064453,
"learning_rate": 4.691021444652876e-07,
"logits/chosen": 0.1866772174835205,
"logits/rejected": 0.1429206132888794,
"logps/chosen": -55.19199752807617,
"logps/ref_chosen": -50.613834381103516,
"logps/ref_rejected": -74.62033081054688,
"logps/rejected": -81.96471405029297,
"loss": 0.9916,
"margin_dpo/margin_mean": 2.7662222385406494,
"margin_dpo/margin_std": 3.4099013805389404,
"step": 163
},
{
"epoch": 0.24792139077853365,
"fcm_dpo/beta": 0.2187848836183548,
"fcm_dpo/delta": -0.20979532599449158,
"fcm_dpo/margin": 2.730527400970459,
"fcm_dpo/q_t": 0.3718702793121338,
"grad_norm": 51.946624755859375,
"learning_rate": 4.6846234426744624e-07,
"logits/chosen": 0.16851532459259033,
"logits/rejected": 0.11055172979831696,
"logps/chosen": -60.02857971191406,
"logps/ref_chosen": -54.848114013671875,
"logps/ref_rejected": -79.0630111694336,
"logps/rejected": -86.9739990234375,
"loss": 1.0185,
"margin_dpo/margin_mean": 2.730526924133301,
"margin_dpo/margin_std": 3.5926055908203125,
"step": 164
},
{
"epoch": 0.2494331065759637,
"fcm_dpo/beta": 0.21285484731197357,
"fcm_dpo/delta": -0.14022870361804962,
"fcm_dpo/margin": 2.503763437271118,
"fcm_dpo/q_t": 0.38059890270233154,
"grad_norm": 50.90439987182617,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": 0.25604766607284546,
"logits/rejected": 0.2025667428970337,
"logps/chosen": -56.66874694824219,
"logps/ref_chosen": -51.089210510253906,
"logps/ref_rejected": -71.23370361328125,
"logps/rejected": -79.31700134277344,
"loss": 1.0262,
"margin_dpo/margin_mean": 2.503763198852539,
"margin_dpo/margin_std": 3.0885088443756104,
"step": 165
},
{
"epoch": 0.2509448223733938,
"fcm_dpo/beta": 0.2094953954219818,
"fcm_dpo/delta": -0.01251722127199173,
"fcm_dpo/margin": 1.966209053993225,
"fcm_dpo/q_t": 0.40979406237602234,
"grad_norm": 57.34663391113281,
"learning_rate": 4.6716442935512214e-07,
"logits/chosen": 0.193076953291893,
"logits/rejected": 0.11284859478473663,
"logps/chosen": -68.62454223632812,
"logps/ref_chosen": -63.19081115722656,
"logps/ref_rejected": -93.8402099609375,
"logps/rejected": -101.24014282226562,
"loss": 1.1205,
"margin_dpo/margin_mean": 1.9662084579467773,
"margin_dpo/margin_std": 3.232795476913452,
"step": 166
},
{
"epoch": 0.25245653817082386,
"fcm_dpo/beta": 0.20221662521362305,
"fcm_dpo/delta": -0.20763254165649414,
"fcm_dpo/margin": 2.9351630210876465,
"fcm_dpo/q_t": 0.36785978078842163,
"grad_norm": 45.256717681884766,
"learning_rate": 4.6650635094610966e-07,
"logits/chosen": 0.15891437232494354,
"logits/rejected": 0.12511295080184937,
"logps/chosen": -64.05963897705078,
"logps/ref_chosen": -58.92427062988281,
"logps/ref_rejected": -72.97377014160156,
"logps/rejected": -81.04429626464844,
"loss": 0.9871,
"margin_dpo/margin_mean": 2.9351630210876465,
"margin_dpo/margin_std": 3.427379846572876,
"step": 167
},
{
"epoch": 0.25396825396825395,
"fcm_dpo/beta": 0.2014435976743698,
"fcm_dpo/delta": 0.011637402698397636,
"fcm_dpo/margin": 1.930140733718872,
"fcm_dpo/q_t": 0.41260749101638794,
"grad_norm": 55.73114776611328,
"learning_rate": 4.6584221638904767e-07,
"logits/chosen": 0.16870692372322083,
"logits/rejected": 0.1389564573764801,
"logps/chosen": -71.93734741210938,
"logps/ref_chosen": -65.65138244628906,
"logps/ref_rejected": -79.71418762207031,
"logps/rejected": -87.9302978515625,
"loss": 1.122,
"margin_dpo/margin_mean": 1.930140495300293,
"margin_dpo/margin_std": 3.016066074371338,
"step": 168
},
{
"epoch": 0.25547996976568405,
"fcm_dpo/beta": 0.19683048129081726,
"fcm_dpo/delta": -0.16538755595684052,
"fcm_dpo/margin": 2.8258461952209473,
"fcm_dpo/q_t": 0.3836873173713684,
"grad_norm": 51.384742736816406,
"learning_rate": 4.651720442612075e-07,
"logits/chosen": 0.22757534682750702,
"logits/rejected": 0.19626004993915558,
"logps/chosen": -67.05967712402344,
"logps/ref_chosen": -61.425865173339844,
"logps/ref_rejected": -76.09590148925781,
"logps/rejected": -84.55555725097656,
"loss": 1.0461,
"margin_dpo/margin_mean": 2.825845956802368,
"margin_dpo/margin_std": 4.196510314941406,
"step": 169
},
{
"epoch": 0.25699168556311414,
"fcm_dpo/beta": 0.1929258108139038,
"fcm_dpo/delta": -0.09032995253801346,
"fcm_dpo/margin": 2.5192790031433105,
"fcm_dpo/q_t": 0.3924364447593689,
"grad_norm": 43.95397186279297,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 0.1608143299818039,
"logits/rejected": 0.15858882665634155,
"logps/chosen": -62.605133056640625,
"logps/ref_chosen": -56.65319061279297,
"logps/ref_rejected": -63.45965576171875,
"logps/rejected": -71.93087768554688,
"loss": 1.0914,
"margin_dpo/margin_mean": 2.5192790031433105,
"margin_dpo/margin_std": 3.963756561279297,
"step": 170
},
{
"epoch": 0.2585034013605442,
"fcm_dpo/beta": 0.18473538756370544,
"fcm_dpo/delta": -0.18784065544605255,
"fcm_dpo/margin": 3.1072592735290527,
"fcm_dpo/q_t": 0.37474268674850464,
"grad_norm": 49.094505310058594,
"learning_rate": 4.6381366244617224e-07,
"logits/chosen": 0.25203442573547363,
"logits/rejected": 0.2046370506286621,
"logps/chosen": -70.01921844482422,
"logps/ref_chosen": -63.73476028442383,
"logps/ref_rejected": -78.50328063964844,
"logps/rejected": -87.89498901367188,
"loss": 1.0471,
"margin_dpo/margin_mean": 3.107259750366211,
"margin_dpo/margin_std": 4.45649528503418,
"step": 171
},
{
"epoch": 0.2600151171579743,
"fcm_dpo/beta": 0.18111056089401245,
"fcm_dpo/delta": -0.18078723549842834,
"fcm_dpo/margin": 3.1506669521331787,
"fcm_dpo/q_t": 0.3735198378562927,
"grad_norm": 42.36780548095703,
"learning_rate": 4.631254907558365e-07,
"logits/chosen": 0.2554565668106079,
"logits/rejected": 0.2034720778465271,
"logps/chosen": -59.11717224121094,
"logps/ref_chosen": -52.201759338378906,
"logps/ref_rejected": -82.85285949707031,
"logps/rejected": -92.91893768310547,
"loss": 1.0257,
"margin_dpo/margin_mean": 3.1506664752960205,
"margin_dpo/margin_std": 4.125787258148193,
"step": 172
},
{
"epoch": 0.2615268329554044,
"fcm_dpo/beta": 0.1714455783367157,
"fcm_dpo/delta": -0.1695910543203354,
"fcm_dpo/margin": 3.238369941711426,
"fcm_dpo/q_t": 0.38560813665390015,
"grad_norm": 41.579505920410156,
"learning_rate": 4.624313574873786e-07,
"logits/chosen": 0.24041128158569336,
"logits/rejected": 0.15953364968299866,
"logps/chosen": -62.48090362548828,
"logps/ref_chosen": -55.434722900390625,
"logps/ref_rejected": -77.81967163085938,
"logps/rejected": -88.1042251586914,
"loss": 1.1006,
"margin_dpo/margin_mean": 3.238370180130005,
"margin_dpo/margin_std": 5.3449625968933105,
"step": 173
},
{
"epoch": 0.26303854875283444,
"fcm_dpo/beta": 0.16659438610076904,
"fcm_dpo/delta": -0.20445415377616882,
"fcm_dpo/margin": 3.552804708480835,
"fcm_dpo/q_t": 0.37425845861434937,
"grad_norm": 44.64958953857422,
"learning_rate": 4.61731282057198e-07,
"logits/chosen": 0.2262486070394516,
"logits/rejected": 0.16458037495613098,
"logps/chosen": -64.90945434570312,
"logps/ref_chosen": -57.17195129394531,
"logps/ref_rejected": -85.47578430175781,
"logps/rejected": -96.76608276367188,
"loss": 1.0443,
"margin_dpo/margin_mean": 3.5528039932250977,
"margin_dpo/margin_std": 5.244265556335449,
"step": 174
},
{
"epoch": 0.26455026455026454,
"fcm_dpo/beta": 0.16002216935157776,
"fcm_dpo/delta": -0.2240179479122162,
"fcm_dpo/margin": 3.8157334327697754,
"fcm_dpo/q_t": 0.373027503490448,
"grad_norm": 42.5731086730957,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": 0.28858986496925354,
"logits/rejected": 0.25886330008506775,
"logps/chosen": -75.49578857421875,
"logps/ref_chosen": -67.6656265258789,
"logps/ref_rejected": -84.36766815185547,
"logps/rejected": -96.01356506347656,
"loss": 1.0347,
"margin_dpo/margin_mean": 3.8157334327697754,
"margin_dpo/margin_std": 5.4957990646362305,
"step": 175
},
{
"epoch": 0.2660619803476946,
"fcm_dpo/beta": 0.15674875676631927,
"fcm_dpo/delta": -0.012624900788068771,
"fcm_dpo/margin": 2.624541997909546,
"fcm_dpo/q_t": 0.4133697748184204,
"grad_norm": 50.7176513671875,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": 0.2168986052274704,
"logits/rejected": 0.19173604249954224,
"logps/chosen": -86.61616516113281,
"logps/ref_chosen": -77.8587646484375,
"logps/ref_rejected": -81.08732604980469,
"logps/rejected": -92.46926879882812,
"loss": 1.1886,
"margin_dpo/margin_mean": 2.624541759490967,
"margin_dpo/margin_std": 5.590775012969971,
"step": 176
},
{
"epoch": 0.2675736961451247,
"fcm_dpo/beta": 0.14755460619926453,
"fcm_dpo/delta": -0.40747910737991333,
"fcm_dpo/margin": 5.241545677185059,
"fcm_dpo/q_t": 0.32991600036621094,
"grad_norm": 42.354209899902344,
"learning_rate": 4.5959559945025183e-07,
"logits/chosen": 0.3358641266822815,
"logits/rejected": 0.2448875606060028,
"logps/chosen": -63.21653747558594,
"logps/ref_chosen": -55.22039794921875,
"logps/ref_rejected": -92.54973602294922,
"logps/rejected": -105.78742980957031,
"loss": 0.8743,
"margin_dpo/margin_mean": 5.241544723510742,
"margin_dpo/margin_std": 5.077418327331543,
"step": 177
},
{
"epoch": 0.2690854119425548,
"fcm_dpo/beta": 0.14272984862327576,
"fcm_dpo/delta": -0.0517147034406662,
"fcm_dpo/margin": 3.132704734802246,
"fcm_dpo/q_t": 0.4003003239631653,
"grad_norm": 39.6386833190918,
"learning_rate": 4.588719528532341e-07,
"logits/chosen": 0.20977315306663513,
"logits/rejected": 0.1633778065443039,
"logps/chosen": -69.47108459472656,
"logps/ref_chosen": -60.81049346923828,
"logps/ref_rejected": -81.12973022460938,
"logps/rejected": -92.92301940917969,
"loss": 1.0947,
"margin_dpo/margin_mean": 3.1327052116394043,
"margin_dpo/margin_std": 4.599012851715088,
"step": 178
},
{
"epoch": 0.2705971277399849,
"fcm_dpo/beta": 0.14164280891418457,
"fcm_dpo/delta": -0.10549677163362503,
"fcm_dpo/margin": 3.53216290473938,
"fcm_dpo/q_t": 0.39201849699020386,
"grad_norm": 38.2100944519043,
"learning_rate": 4.581424636586928e-07,
"logits/chosen": 0.2727736234664917,
"logits/rejected": 0.25624555349349976,
"logps/chosen": -75.33070373535156,
"logps/ref_chosen": -65.67171478271484,
"logps/ref_rejected": -75.32586669921875,
"logps/rejected": -88.51702880859375,
"loss": 1.0975,
"margin_dpo/margin_mean": 3.532163143157959,
"margin_dpo/margin_std": 5.8223490715026855,
"step": 179
},
{
"epoch": 0.272108843537415,
"fcm_dpo/beta": 0.14003294706344604,
"fcm_dpo/delta": -0.05574531853199005,
"fcm_dpo/margin": 3.2362074851989746,
"fcm_dpo/q_t": 0.4050852060317993,
"grad_norm": 37.95149612426758,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": 0.10121668875217438,
"logits/rejected": 0.08257490396499634,
"logps/chosen": -64.88932037353516,
"logps/ref_chosen": -56.68280792236328,
"logps/ref_rejected": -64.94414520263672,
"logps/rejected": -76.3868637084961,
"loss": 1.1465,
"margin_dpo/margin_mean": 3.2362074851989746,
"margin_dpo/margin_std": 6.117829322814941,
"step": 180
},
{
"epoch": 0.273620559334845,
"fcm_dpo/beta": 0.13333183526992798,
"fcm_dpo/delta": -0.28197789192199707,
"fcm_dpo/margin": 4.9724249839782715,
"fcm_dpo/q_t": 0.352914035320282,
"grad_norm": 33.529659271240234,
"learning_rate": 4.566660392614228e-07,
"logits/chosen": 0.27004173398017883,
"logits/rejected": 0.2327122837305069,
"logps/chosen": -68.66935729980469,
"logps/ref_chosen": -60.77604675292969,
"logps/ref_rejected": -83.98361206054688,
"logps/rejected": -96.84934997558594,
"loss": 0.9329,
"margin_dpo/margin_mean": 4.972424507141113,
"margin_dpo/margin_std": 5.188924789428711,
"step": 181
},
{
"epoch": 0.2751322751322751,
"fcm_dpo/beta": 0.12604235112667084,
"fcm_dpo/delta": -0.2631838917732239,
"fcm_dpo/margin": 5.117816925048828,
"fcm_dpo/q_t": 0.36235833168029785,
"grad_norm": 34.590484619140625,
"learning_rate": 4.5591914535745817e-07,
"logits/chosen": 0.2485291063785553,
"logits/rejected": 0.17378370463848114,
"logps/chosen": -69.56755065917969,
"logps/ref_chosen": -60.2537841796875,
"logps/ref_rejected": -89.7706298828125,
"logps/rejected": -104.20220184326172,
"loss": 1.0064,
"margin_dpo/margin_mean": 5.117816925048828,
"margin_dpo/margin_std": 6.874538421630859,
"step": 182
},
{
"epoch": 0.2766439909297052,
"fcm_dpo/beta": 0.12698180973529816,
"fcm_dpo/delta": 0.13285091519355774,
"fcm_dpo/margin": 2.133739471435547,
"fcm_dpo/q_t": 0.4392937421798706,
"grad_norm": 36.80066680908203,
"learning_rate": 4.551664914523433e-07,
"logits/chosen": 0.2542421221733093,
"logits/rejected": 0.23342236876487732,
"logps/chosen": -72.93169403076172,
"logps/ref_chosen": -61.76142120361328,
"logps/ref_rejected": -72.54627990722656,
"logps/rejected": -85.85029602050781,
"loss": 1.253,
"margin_dpo/margin_mean": 2.133739471435547,
"margin_dpo/margin_std": 5.610200881958008,
"step": 183
},
{
"epoch": 0.2781557067271353,
"fcm_dpo/beta": 0.12451111525297165,
"fcm_dpo/delta": -0.13866505026817322,
"fcm_dpo/margin": 4.248322486877441,
"fcm_dpo/q_t": 0.3827287554740906,
"grad_norm": 28.370405197143555,
"learning_rate": 4.544080985994258e-07,
"logits/chosen": 0.35382214188575745,
"logits/rejected": 0.29234981536865234,
"logps/chosen": -55.696563720703125,
"logps/ref_chosen": -46.840721130371094,
"logps/ref_rejected": -69.3609390258789,
"logps/rejected": -82.46510314941406,
"loss": 1.0227,
"margin_dpo/margin_mean": 4.248322486877441,
"margin_dpo/margin_std": 5.141722679138184,
"step": 184
},
{
"epoch": 0.2796674225245654,
"fcm_dpo/beta": 0.12130877375602722,
"fcm_dpo/delta": -0.1432112157344818,
"fcm_dpo/margin": 4.402965545654297,
"fcm_dpo/q_t": 0.38836491107940674,
"grad_norm": 30.318683624267578,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": 0.27195507287979126,
"logits/rejected": 0.22637689113616943,
"logps/chosen": -62.45649719238281,
"logps/ref_chosen": -52.32114028930664,
"logps/ref_rejected": -68.3885726928711,
"logps/rejected": -82.92689514160156,
"loss": 1.1076,
"margin_dpo/margin_mean": 4.402966022491455,
"margin_dpo/margin_std": 7.5162458419799805,
"step": 185
},
{
"epoch": 0.2811791383219955,
"fcm_dpo/beta": 0.11787950992584229,
"fcm_dpo/delta": -0.1722513735294342,
"fcm_dpo/margin": 4.769825458526611,
"fcm_dpo/q_t": 0.3829618990421295,
"grad_norm": 36.4186897277832,
"learning_rate": 4.5287418106563354e-07,
"logits/chosen": 0.19488704204559326,
"logits/rejected": 0.15547996759414673,
"logps/chosen": -77.1296157836914,
"logps/ref_chosen": -67.42012786865234,
"logps/ref_rejected": -82.50968933105469,
"logps/rejected": -96.98900604248047,
"loss": 1.0768,
"margin_dpo/margin_mean": 4.7698259353637695,
"margin_dpo/margin_std": 7.652778625488281,
"step": 186
},
{
"epoch": 0.28269085411942557,
"fcm_dpo/beta": 0.1145966649055481,
"fcm_dpo/delta": -0.1198846772313118,
"fcm_dpo/margin": 4.478250503540039,
"fcm_dpo/q_t": 0.3871074914932251,
"grad_norm": 36.75972366333008,
"learning_rate": 4.520986992917297e-07,
"logits/chosen": 0.27034419775009155,
"logits/rejected": 0.21536602079868317,
"logps/chosen": -86.54496002197266,
"logps/ref_chosen": -75.52549743652344,
"logps/ref_rejected": -94.76289367675781,
"logps/rejected": -110.26060485839844,
"loss": 1.0871,
"margin_dpo/margin_mean": 4.478249549865723,
"margin_dpo/margin_std": 7.051586151123047,
"step": 187
},
{
"epoch": 0.2842025699168556,
"fcm_dpo/beta": 0.11200448125600815,
"fcm_dpo/delta": -0.12840519845485687,
"fcm_dpo/margin": 4.6570024490356445,
"fcm_dpo/q_t": 0.3865613341331482,
"grad_norm": 34.73870849609375,
"learning_rate": 4.5131756438276466e-07,
"logits/chosen": 0.2812601923942566,
"logits/rejected": 0.2386472225189209,
"logps/chosen": -81.70372009277344,
"logps/ref_chosen": -71.52333068847656,
"logps/ref_rejected": -78.29949951171875,
"logps/rejected": -93.13690185546875,
"loss": 1.0811,
"margin_dpo/margin_mean": 4.6570024490356445,
"margin_dpo/margin_std": 7.432827949523926,
"step": 188
},
{
"epoch": 0.2857142857142857,
"fcm_dpo/beta": 0.10828001797199249,
"fcm_dpo/delta": -0.11131785809993744,
"fcm_dpo/margin": 4.635937690734863,
"fcm_dpo/q_t": 0.38790684938430786,
"grad_norm": 32.68986129760742,
"learning_rate": 4.5053079818876096e-07,
"logits/chosen": 0.2685256004333496,
"logits/rejected": 0.2797519862651825,
"logps/chosen": -81.99581909179688,
"logps/ref_chosen": -72.17626953125,
"logps/ref_rejected": -75.26313781738281,
"logps/rejected": -89.71861267089844,
"loss": 1.0783,
"margin_dpo/margin_mean": 4.635939121246338,
"margin_dpo/margin_std": 6.736225605010986,
"step": 189
},
{
"epoch": 0.2872260015117158,
"fcm_dpo/beta": 0.1046074777841568,
"fcm_dpo/delta": -0.27176159620285034,
"fcm_dpo/margin": 6.251853942871094,
"fcm_dpo/q_t": 0.3575611710548401,
"grad_norm": 31.16099739074707,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 0.32384777069091797,
"logits/rejected": 0.18938855826854706,
"logps/chosen": -64.28665924072266,
"logps/ref_chosen": -54.624271392822266,
"logps/ref_rejected": -101.47068786621094,
"logps/rejected": -117.38493347167969,
"loss": 0.964,
"margin_dpo/margin_mean": 6.251853942871094,
"margin_dpo/margin_std": 7.330543518066406,
"step": 190
},
{
"epoch": 0.2887377173091459,
"fcm_dpo/beta": 0.1018860712647438,
"fcm_dpo/delta": -0.08884115517139435,
"fcm_dpo/margin": 4.756298065185547,
"fcm_dpo/q_t": 0.3920493721961975,
"grad_norm": 33.24656295776367,
"learning_rate": 4.48940460132708e-07,
"logits/chosen": 0.32119327783584595,
"logits/rejected": 0.29560232162475586,
"logps/chosen": -84.64088439941406,
"logps/ref_chosen": -72.93251037597656,
"logps/ref_rejected": -89.95103454589844,
"logps/rejected": -106.41569519042969,
"loss": 1.0988,
"margin_dpo/margin_mean": 4.756298065185547,
"margin_dpo/margin_std": 7.644356727600098,
"step": 191
},
{
"epoch": 0.29024943310657597,
"fcm_dpo/beta": 0.10229361802339554,
"fcm_dpo/delta": 0.04606345295906067,
"fcm_dpo/margin": 3.473459243774414,
"fcm_dpo/q_t": 0.42033612728118896,
"grad_norm": 26.436538696289062,
"learning_rate": 4.481369327558329e-07,
"logits/chosen": 0.31390440464019775,
"logits/rejected": 0.28907686471939087,
"logps/chosen": -65.87787628173828,
"logps/ref_chosen": -54.001121520996094,
"logps/ref_rejected": -63.531551361083984,
"logps/rejected": -78.88176727294922,
"loss": 1.1719,
"margin_dpo/margin_mean": 3.473459005355835,
"margin_dpo/margin_std": 6.743564128875732,
"step": 192
},
{
"epoch": 0.29176114890400606,
"fcm_dpo/beta": 0.09879890084266663,
"fcm_dpo/delta": -0.19991721212863922,
"fcm_dpo/margin": 5.939870834350586,
"fcm_dpo/q_t": 0.37099704146385193,
"grad_norm": 25.317665100097656,
"learning_rate": 4.47327863063023e-07,
"logits/chosen": 0.25306424498558044,
"logits/rejected": 0.23036319017410278,
"logps/chosen": -67.60955047607422,
"logps/ref_chosen": -56.74927520751953,
"logps/ref_rejected": -58.80629348754883,
"logps/rejected": -75.60643768310547,
"loss": 0.9987,
"margin_dpo/margin_mean": 5.939870357513428,
"margin_dpo/margin_std": 7.335954666137695,
"step": 193
},
{
"epoch": 0.29327286470143615,
"fcm_dpo/beta": 0.09653833508491516,
"fcm_dpo/delta": -0.07798396050930023,
"fcm_dpo/margin": 4.901164531707764,
"fcm_dpo/q_t": 0.39555221796035767,
"grad_norm": 25.787086486816406,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": 0.322595477104187,
"logits/rejected": 0.29371175169944763,
"logps/chosen": -67.38978576660156,
"logps/ref_chosen": -56.64944076538086,
"logps/ref_rejected": -69.98954772949219,
"logps/rejected": -85.63105010986328,
"loss": 1.135,
"margin_dpo/margin_mean": 4.901164531707764,
"margin_dpo/margin_std": 8.785233497619629,
"step": 194
},
{
"epoch": 0.2947845804988662,
"fcm_dpo/beta": 0.09371854364871979,
"fcm_dpo/delta": -0.17399150133132935,
"fcm_dpo/margin": 6.003086090087891,
"fcm_dpo/q_t": 0.37736234068870544,
"grad_norm": 26.80616569519043,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": 0.2243194878101349,
"logits/rejected": 0.22502626478672028,
"logps/chosen": -82.69122314453125,
"logps/ref_chosen": -70.40977478027344,
"logps/ref_rejected": -74.39448547363281,
"logps/rejected": -92.67901611328125,
"loss": 1.0281,
"margin_dpo/margin_mean": 6.003086090087891,
"margin_dpo/margin_std": 8.03487777709961,
"step": 195
},
{
"epoch": 0.2962962962962963,
"fcm_dpo/beta": 0.0926411971449852,
"fcm_dpo/delta": -0.09142302721738815,
"fcm_dpo/margin": 5.255779266357422,
"fcm_dpo/q_t": 0.3900688588619232,
"grad_norm": 24.710390090942383,
"learning_rate": 4.448676271745197e-07,
"logits/chosen": 0.3130764961242676,
"logits/rejected": 0.27285757660865784,
"logps/chosen": -70.67607116699219,
"logps/ref_chosen": -59.227577209472656,
"logps/ref_rejected": -83.54757690429688,
"logps/rejected": -100.25184631347656,
"loss": 1.0765,
"margin_dpo/margin_mean": 5.255779266357422,
"margin_dpo/margin_std": 7.652653694152832,
"step": 196
},
{
"epoch": 0.29780801209372637,
"fcm_dpo/beta": 0.08979904651641846,
"fcm_dpo/delta": -0.18953318893909454,
"fcm_dpo/margin": 6.446434020996094,
"fcm_dpo/q_t": 0.37620919942855835,
"grad_norm": 25.047359466552734,
"learning_rate": 4.440366160729392e-07,
"logits/chosen": 0.3811100721359253,
"logits/rejected": 0.3337089717388153,
"logps/chosen": -62.05528259277344,
"logps/ref_chosen": -51.52912902832031,
"logps/ref_rejected": -73.70631408691406,
"logps/rejected": -90.67889404296875,
"loss": 1.0846,
"margin_dpo/margin_mean": 6.4464335441589355,
"margin_dpo/margin_std": 10.239412307739258,
"step": 197
},
{
"epoch": 0.29931972789115646,
"fcm_dpo/beta": 0.08605066686868668,
"fcm_dpo/delta": -0.22043052315711975,
"fcm_dpo/margin": 7.0605268478393555,
"fcm_dpo/q_t": 0.3634013235569,
"grad_norm": 24.03299903869629,
"learning_rate": 4.432001773500957e-07,
"logits/chosen": 0.35537493228912354,
"logits/rejected": 0.3165588080883026,
"logps/chosen": -70.80035400390625,
"logps/ref_chosen": -59.78268051147461,
"logps/ref_rejected": -72.24533081054688,
"logps/rejected": -90.32351684570312,
"loss": 0.9705,
"margin_dpo/margin_mean": 7.0605268478393555,
"margin_dpo/margin_std": 7.6906585693359375,
"step": 198
},
{
"epoch": 0.30083144368858655,
"fcm_dpo/beta": 0.08475294709205627,
"fcm_dpo/delta": -0.05305337905883789,
"fcm_dpo/margin": 5.297489166259766,
"fcm_dpo/q_t": 0.4017520546913147,
"grad_norm": 26.102365493774414,
"learning_rate": 4.4235833440297856e-07,
"logits/chosen": 0.30094629526138306,
"logits/rejected": 0.2166268527507782,
"logps/chosen": -68.91012573242188,
"logps/ref_chosen": -56.38677215576172,
"logps/ref_rejected": -74.56779479980469,
"logps/rejected": -92.38864135742188,
"loss": 1.1633,
"margin_dpo/margin_mean": 5.297488212585449,
"margin_dpo/margin_std": 10.02812385559082,
"step": 199
},
{
"epoch": 0.30234315948601664,
"fcm_dpo/beta": 0.08017782866954803,
"fcm_dpo/delta": -0.24141071736812592,
"fcm_dpo/margin": 7.785175323486328,
"fcm_dpo/q_t": 0.3658748269081116,
"grad_norm": 23.437423706054688,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.3566874861717224,
"logits/rejected": 0.2875964939594269,
"logps/chosen": -67.94142150878906,
"logps/ref_chosen": -57.82432556152344,
"logps/ref_rejected": -89.28246307373047,
"logps/rejected": -107.18473815917969,
"loss": 1.0189,
"margin_dpo/margin_mean": 7.785175323486328,
"margin_dpo/margin_std": 10.709969520568848,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_fcm_dpo/beta": 0.07967542111873627,
"eval_logits/chosen": 0.31474569439888,
"eval_logits/rejected": 0.2707725763320923,
"eval_logps/chosen": -86.38968658447266,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -97.07010650634766,
"eval_loss": 0.5583317875862122,
"eval_margin_dpo/margin_mean": 5.990893840789795,
"eval_margin_dpo/margin_std": 10.197680473327637,
"eval_runtime": 38.0389,
"eval_samples_per_second": 60.543,
"eval_steps_per_second": 1.893,
"step": 200
},
{
"epoch": 0.30385487528344673,
"fcm_dpo/beta": 0.07807569205760956,
"fcm_dpo/delta": -0.11501055210828781,
"fcm_dpo/margin": 6.5143938064575195,
"fcm_dpo/q_t": 0.3902972340583801,
"grad_norm": 25.34015464782715,
"learning_rate": 4.4065853017905953e-07,
"logits/chosen": 0.358869731426239,
"logits/rejected": 0.3160993456840515,
"logps/chosen": -71.9423828125,
"logps/ref_chosen": -58.999759674072266,
"logps/ref_rejected": -84.67575073242188,
"logps/rejected": -104.13275146484375,
"loss": 1.0787,
"margin_dpo/margin_mean": 6.514393329620361,
"margin_dpo/margin_std": 10.039584159851074,
"step": 201
},
{
"epoch": 0.30536659108087677,
"fcm_dpo/beta": 0.07607395201921463,
"fcm_dpo/delta": -0.18573462963104248,
"fcm_dpo/margin": 7.564221382141113,
"fcm_dpo/q_t": 0.37164774537086487,
"grad_norm": 21.76279067993164,
"learning_rate": 4.3980061644943575e-07,
"logits/chosen": 0.2765665650367737,
"logits/rejected": 0.20993581414222717,
"logps/chosen": -58.819705963134766,
"logps/ref_chosen": -47.660648345947266,
"logps/ref_rejected": -73.63249969482422,
"logps/rejected": -92.35577392578125,
"loss": 1.0235,
"margin_dpo/margin_mean": 7.5642218589782715,
"margin_dpo/margin_std": 9.893567085266113,
"step": 202
},
{
"epoch": 0.30687830687830686,
"fcm_dpo/beta": 0.07392242550849915,
"fcm_dpo/delta": -0.11798103153705597,
"fcm_dpo/margin": 6.927526473999023,
"fcm_dpo/q_t": 0.3890300989151001,
"grad_norm": 24.907970428466797,
"learning_rate": 4.3893739358856455e-07,
"logits/chosen": 0.35103338956832886,
"logits/rejected": 0.2826537489891052,
"logps/chosen": -74.85539245605469,
"logps/ref_chosen": -62.32553482055664,
"logps/ref_rejected": -99.37226104736328,
"logps/rejected": -118.82963562011719,
"loss": 1.0657,
"margin_dpo/margin_mean": 6.927526473999023,
"margin_dpo/margin_std": 10.243463516235352,
"step": 203
},
{
"epoch": 0.30839002267573695,
"fcm_dpo/beta": 0.0711907222867012,
"fcm_dpo/delta": -0.11254880577325821,
"fcm_dpo/margin": 7.061636447906494,
"fcm_dpo/q_t": 0.3894692659378052,
"grad_norm": 21.693220138549805,
"learning_rate": 4.380688857426449e-07,
"logits/chosen": 0.2948107421398163,
"logits/rejected": 0.22981415688991547,
"logps/chosen": -63.0117073059082,
"logps/ref_chosen": -50.62931823730469,
"logps/ref_rejected": -66.60475158691406,
"logps/rejected": -86.04878234863281,
"loss": 1.0766,
"margin_dpo/margin_mean": 7.061635971069336,
"margin_dpo/margin_std": 10.273975372314453,
"step": 204
},
{
"epoch": 0.30990173847316704,
"fcm_dpo/beta": 0.07016897201538086,
"fcm_dpo/delta": -0.11553419381380081,
"fcm_dpo/margin": 7.253925323486328,
"fcm_dpo/q_t": 0.38996151089668274,
"grad_norm": 26.938138961791992,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": 0.34917423129081726,
"logits/rejected": 0.2908037006855011,
"logps/chosen": -83.69819641113281,
"logps/ref_chosen": -70.3561782836914,
"logps/ref_rejected": -93.39848327636719,
"logps/rejected": -113.99442291259766,
"loss": 1.0946,
"margin_dpo/margin_mean": 7.2539262771606445,
"margin_dpo/margin_std": 11.814342498779297,
"step": 205
},
{
"epoch": 0.31141345427059713,
"fcm_dpo/beta": 0.07059814780950546,
"fcm_dpo/delta": 0.02109716460108757,
"fcm_dpo/margin": 5.3691887855529785,
"fcm_dpo/q_t": 0.4200194478034973,
"grad_norm": 24.054405212402344,
"learning_rate": 4.363161124189387e-07,
"logits/chosen": 0.38039857149124146,
"logits/rejected": 0.3642415404319763,
"logps/chosen": -81.48750305175781,
"logps/ref_chosen": -67.64547729492188,
"logps/ref_rejected": -79.89584350585938,
"logps/rejected": -99.1070556640625,
"loss": 1.2172,
"margin_dpo/margin_mean": 5.3691887855529785,
"margin_dpo/margin_std": 12.316067695617676,
"step": 206
},
{
"epoch": 0.3129251700680272,
"fcm_dpo/beta": 0.0685083270072937,
"fcm_dpo/delta": -0.15592733025550842,
"fcm_dpo/margin": 7.982370376586914,
"fcm_dpo/q_t": 0.3835386633872986,
"grad_norm": 21.380037307739258,
"learning_rate": 4.3543189596998986e-07,
"logits/chosen": 0.3081602454185486,
"logits/rejected": 0.2436012327671051,
"logps/chosen": -82.89225769042969,
"logps/ref_chosen": -67.66419219970703,
"logps/ref_rejected": -85.10249328613281,
"logps/rejected": -108.31292724609375,
"loss": 1.0441,
"margin_dpo/margin_mean": 7.982370376586914,
"margin_dpo/margin_std": 11.379196166992188,
"step": 207
},
{
"epoch": 0.3144368858654573,
"fcm_dpo/beta": 0.069021075963974,
"fcm_dpo/delta": 0.08469095081090927,
"fcm_dpo/margin": 4.60715389251709,
"fcm_dpo/q_t": 0.4303174912929535,
"grad_norm": 24.077442169189453,
"learning_rate": 4.3454249259229664e-07,
"logits/chosen": 0.3450078070163727,
"logits/rejected": 0.31959545612335205,
"logps/chosen": -69.91799926757812,
"logps/ref_chosen": -57.731712341308594,
"logps/ref_rejected": -74.19276428222656,
"logps/rejected": -90.9862060546875,
"loss": 1.2285,
"margin_dpo/margin_mean": 4.607153415679932,
"margin_dpo/margin_std": 10.970314025878906,
"step": 208
},
{
"epoch": 0.31594860166288735,
"fcm_dpo/beta": 0.06699629127979279,
"fcm_dpo/delta": -0.24109607934951782,
"fcm_dpo/margin": 9.348739624023438,
"fcm_dpo/q_t": 0.36549073457717896,
"grad_norm": 23.534868240356445,
"learning_rate": 4.336479271643833e-07,
"logits/chosen": 0.29840895533561707,
"logits/rejected": 0.24482461810112,
"logps/chosen": -80.83154296875,
"logps/ref_chosen": -68.55007934570312,
"logps/ref_rejected": -87.90541076660156,
"logps/rejected": -109.53561401367188,
"loss": 1.0307,
"margin_dpo/margin_mean": 9.348740577697754,
"margin_dpo/margin_std": 13.397310256958008,
"step": 209
},
{
"epoch": 0.31746031746031744,
"fcm_dpo/beta": 0.06439946591854095,
"fcm_dpo/delta": -0.19043028354644775,
"fcm_dpo/margin": 9.002416610717773,
"fcm_dpo/q_t": 0.37747329473495483,
"grad_norm": 20.80362319946289,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 0.3852647542953491,
"logits/rejected": 0.2949644923210144,
"logps/chosen": -69.62586975097656,
"logps/ref_chosen": -57.268272399902344,
"logps/ref_rejected": -85.72807312011719,
"logps/rejected": -107.08808898925781,
"loss": 1.0512,
"margin_dpo/margin_mean": 9.002415657043457,
"margin_dpo/margin_std": 13.310730934143066,
"step": 210
},
{
"epoch": 0.31897203325774753,
"fcm_dpo/beta": 0.0625411719083786,
"fcm_dpo/delta": -0.14104999601840973,
"fcm_dpo/margin": 8.534067153930664,
"fcm_dpo/q_t": 0.3859778642654419,
"grad_norm": 23.124126434326172,
"learning_rate": 4.3184341039326217e-07,
"logits/chosen": 0.3993861675262451,
"logits/rejected": 0.3120034337043762,
"logps/chosen": -64.48066711425781,
"logps/ref_chosen": -53.640708923339844,
"logps/ref_rejected": -93.0387954711914,
"logps/rejected": -112.41282653808594,
"loss": 1.0564,
"margin_dpo/margin_mean": 8.534066200256348,
"margin_dpo/margin_std": 12.409013748168945,
"step": 211
},
{
"epoch": 0.3204837490551776,
"fcm_dpo/beta": 0.0600648857653141,
"fcm_dpo/delta": -0.18490049242973328,
"fcm_dpo/margin": 9.557143211364746,
"fcm_dpo/q_t": 0.37409496307373047,
"grad_norm": 18.95738410949707,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": 0.3936005234718323,
"logits/rejected": 0.3249232769012451,
"logps/chosen": -70.1107406616211,
"logps/ref_chosen": -57.36674499511719,
"logps/ref_rejected": -79.89643096923828,
"logps/rejected": -102.19757080078125,
"loss": 1.0381,
"margin_dpo/margin_mean": 9.557143211364746,
"margin_dpo/margin_std": 13.448509216308594,
"step": 212
},
{
"epoch": 0.3219954648526077,
"fcm_dpo/beta": 0.057153455913066864,
"fcm_dpo/delta": -0.24546325206756592,
"fcm_dpo/margin": 10.99544620513916,
"fcm_dpo/q_t": 0.3635343313217163,
"grad_norm": 17.224510192871094,
"learning_rate": 4.3001854756006724e-07,
"logits/chosen": 0.38934314250946045,
"logits/rejected": 0.3657595217227936,
"logps/chosen": -74.25251007080078,
"logps/ref_chosen": -65.22111511230469,
"logps/ref_rejected": -80.1810302734375,
"logps/rejected": -100.20787048339844,
"loss": 1.0026,
"margin_dpo/margin_mean": 10.995447158813477,
"margin_dpo/margin_std": 14.42126178741455,
"step": 213
},
{
"epoch": 0.3235071806500378,
"fcm_dpo/beta": 0.05538104474544525,
"fcm_dpo/delta": -0.20099294185638428,
"fcm_dpo/margin": 10.645904541015625,
"fcm_dpo/q_t": 0.3733557462692261,
"grad_norm": 21.00215721130371,
"learning_rate": 4.290985500881143e-07,
"logits/chosen": 0.2703975439071655,
"logits/rejected": 0.24784952402114868,
"logps/chosen": -72.12303161621094,
"logps/ref_chosen": -61.292327880859375,
"logps/ref_rejected": -67.69841003417969,
"logps/rejected": -89.17500305175781,
"loss": 1.0258,
"margin_dpo/margin_mean": 10.645904541015625,
"margin_dpo/margin_std": 14.420230865478516,
"step": 214
},
{
"epoch": 0.3250188964474679,
"fcm_dpo/beta": 0.053312748670578,
"fcm_dpo/delta": -0.1950150430202484,
"fcm_dpo/margin": 10.953173637390137,
"fcm_dpo/q_t": 0.37544140219688416,
"grad_norm": 19.009496688842773,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": 0.2992058992385864,
"logits/rejected": 0.1988016963005066,
"logps/chosen": -77.40756225585938,
"logps/ref_chosen": -63.869136810302734,
"logps/ref_rejected": -98.7657241821289,
"logps/rejected": -123.25733184814453,
"loss": 1.0374,
"margin_dpo/margin_mean": 10.953174591064453,
"margin_dpo/margin_std": 15.171961784362793,
"step": 215
},
{
"epoch": 0.32653061224489793,
"fcm_dpo/beta": 0.05087217688560486,
"fcm_dpo/delta": -0.20583069324493408,
"fcm_dpo/margin": 11.67214584350586,
"fcm_dpo/q_t": 0.37170571088790894,
"grad_norm": 20.9128360748291,
"learning_rate": 4.2724355170431247e-07,
"logits/chosen": 0.418628454208374,
"logits/rejected": 0.33564436435699463,
"logps/chosen": -80.5323257446289,
"logps/ref_chosen": -67.824951171875,
"logps/ref_rejected": -96.40231323242188,
"logps/rejected": -120.7818374633789,
"loss": 1.0116,
"margin_dpo/margin_mean": 11.672143936157227,
"margin_dpo/margin_std": 15.552055358886719,
"step": 216
},
{
"epoch": 0.328042328042328,
"fcm_dpo/beta": 0.04872403293848038,
"fcm_dpo/delta": -0.20223326981067657,
"fcm_dpo/margin": 12.108686447143555,
"fcm_dpo/q_t": 0.3712342381477356,
"grad_norm": 17.21115493774414,
"learning_rate": 4.26308602680756e-07,
"logits/chosen": 0.3738940358161926,
"logits/rejected": 0.27082303166389465,
"logps/chosen": -74.92461395263672,
"logps/ref_chosen": -60.5049934387207,
"logps/ref_rejected": -84.26618194580078,
"logps/rejected": -110.79448699951172,
"loss": 1.003,
"margin_dpo/margin_mean": 12.108685493469238,
"margin_dpo/margin_std": 15.33790397644043,
"step": 217
},
{
"epoch": 0.3295540438397581,
"fcm_dpo/beta": 0.04887588322162628,
"fcm_dpo/delta": 0.020069099962711334,
"fcm_dpo/margin": 7.752167701721191,
"fcm_dpo/q_t": 0.41752538084983826,
"grad_norm": 19.853303909301758,
"learning_rate": 4.253687219265803e-07,
"logits/chosen": 0.26485973596572876,
"logits/rejected": 0.25896111130714417,
"logps/chosen": -85.6038818359375,
"logps/ref_chosen": -70.59431457519531,
"logps/ref_rejected": -73.89038848876953,
"logps/rejected": -96.6521224975586,
"loss": 1.2034,
"margin_dpo/margin_mean": 7.752167701721191,
"margin_dpo/margin_std": 16.52850341796875,
"step": 218
},
{
"epoch": 0.3310657596371882,
"fcm_dpo/beta": 0.04829990863800049,
"fcm_dpo/delta": 0.0020888671278953552,
"fcm_dpo/margin": 8.23349666595459,
"fcm_dpo/q_t": 0.41240739822387695,
"grad_norm": 18.878578186035156,
"learning_rate": 4.2442393573227043e-07,
"logits/chosen": 0.3172757625579834,
"logits/rejected": 0.2777259349822998,
"logps/chosen": -74.34814453125,
"logps/ref_chosen": -60.490943908691406,
"logps/ref_rejected": -75.85001373291016,
"logps/rejected": -97.94070434570312,
"loss": 1.1424,
"margin_dpo/margin_mean": 8.23349666595459,
"margin_dpo/margin_std": 14.514315605163574,
"step": 219
},
{
"epoch": 0.3325774754346183,
"fcm_dpo/beta": 0.047665540128946304,
"fcm_dpo/delta": -0.07977527379989624,
"fcm_dpo/margin": 9.964658737182617,
"fcm_dpo/q_t": 0.3987919092178345,
"grad_norm": 16.081050872802734,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 0.4192200005054474,
"logits/rejected": 0.35927826166152954,
"logps/chosen": -57.308311462402344,
"logps/ref_chosen": -45.013397216796875,
"logps/ref_rejected": -70.49369812011719,
"logps/rejected": -92.7532730102539,
"loss": 1.1145,
"margin_dpo/margin_mean": 9.964658737182617,
"margin_dpo/margin_std": 16.84489631652832,
"step": 220
},
{
"epoch": 0.3340891912320484,
"fcm_dpo/beta": 0.04696973040699959,
"fcm_dpo/delta": -0.1160588264465332,
"fcm_dpo/margin": 10.86303997039795,
"fcm_dpo/q_t": 0.39082762598991394,
"grad_norm": 17.7648868560791,
"learning_rate": 4.22519752870528e-07,
"logits/chosen": 0.41853708028793335,
"logits/rejected": 0.34903794527053833,
"logps/chosen": -71.09147644042969,
"logps/ref_chosen": -59.09584045410156,
"logps/ref_rejected": -88.64388275146484,
"logps/rejected": -111.50254821777344,
"loss": 1.0799,
"margin_dpo/margin_mean": 10.86303997039795,
"margin_dpo/margin_std": 16.972129821777344,
"step": 221
},
{
"epoch": 0.3356009070294785,
"fcm_dpo/beta": 0.04493716359138489,
"fcm_dpo/delta": -0.24270300567150116,
"fcm_dpo/margin": 13.956724166870117,
"fcm_dpo/q_t": 0.3622177243232727,
"grad_norm": 17.92151641845703,
"learning_rate": 4.2156040946718343e-07,
"logits/chosen": 0.47252702713012695,
"logits/rejected": 0.3871607780456543,
"logps/chosen": -68.96340942382812,
"logps/ref_chosen": -55.9976921081543,
"logps/ref_rejected": -111.94727325439453,
"logps/rejected": -138.8697052001953,
"loss": 0.9952,
"margin_dpo/margin_mean": 13.956724166870117,
"margin_dpo/margin_std": 17.575956344604492,
"step": 222
},
{
"epoch": 0.3371126228269085,
"fcm_dpo/beta": 0.043041862547397614,
"fcm_dpo/delta": -0.17856287956237793,
"fcm_dpo/margin": 13.174043655395508,
"fcm_dpo/q_t": 0.37481993436813354,
"grad_norm": 16.398582458496094,
"learning_rate": 4.2059626715039065e-07,
"logits/chosen": 0.43900156021118164,
"logits/rejected": 0.3837320804595947,
"logps/chosen": -74.68038940429688,
"logps/ref_chosen": -59.891422271728516,
"logps/ref_rejected": -86.28954315185547,
"logps/rejected": -114.25254821777344,
"loss": 1.0015,
"margin_dpo/margin_mean": 13.174043655395508,
"margin_dpo/margin_std": 15.676023483276367,
"step": 223
},
{
"epoch": 0.3386243386243386,
"fcm_dpo/beta": 0.04326090216636658,
"fcm_dpo/delta": 0.07465298473834991,
"fcm_dpo/margin": 7.578658580780029,
"fcm_dpo/q_t": 0.42947906255722046,
"grad_norm": 21.63603973388672,
"learning_rate": 4.1962735288928304e-07,
"logits/chosen": 0.4613693058490753,
"logits/rejected": 0.4411858022212982,
"logps/chosen": -81.04054260253906,
"logps/ref_chosen": -64.04463195800781,
"logps/ref_rejected": -75.05450439453125,
"logps/rejected": -99.62906646728516,
"loss": 1.1997,
"margin_dpo/margin_mean": 7.578658580780029,
"margin_dpo/margin_std": 16.304107666015625,
"step": 224
},
{
"epoch": 0.3401360544217687,
"fcm_dpo/beta": 0.04221531003713608,
"fcm_dpo/delta": -0.17840632796287537,
"fcm_dpo/margin": 13.439470291137695,
"fcm_dpo/q_t": 0.3767085671424866,
"grad_norm": 17.833824157714844,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": 0.44540882110595703,
"logits/rejected": 0.33315980434417725,
"logps/chosen": -81.73786926269531,
"logps/ref_chosen": -66.0958251953125,
"logps/ref_rejected": -97.68675231933594,
"logps/rejected": -126.76826477050781,
"loss": 1.021,
"margin_dpo/margin_mean": 13.439470291137695,
"margin_dpo/margin_std": 17.625469207763672,
"step": 225
},
{
"epoch": 0.3416477702191988,
"fcm_dpo/beta": 0.041798561811447144,
"fcm_dpo/delta": -0.06101213023066521,
"fcm_dpo/margin": 10.95936107635498,
"fcm_dpo/q_t": 0.4006775915622711,
"grad_norm": 15.791013717651367,
"learning_rate": 4.176753170773052e-07,
"logits/chosen": 0.4840475916862488,
"logits/rejected": 0.436703622341156,
"logps/chosen": -66.21839141845703,
"logps/ref_chosen": -51.4168701171875,
"logps/ref_rejected": -66.30068969726562,
"logps/rejected": -92.06156921386719,
"loss": 1.1384,
"margin_dpo/margin_mean": 10.95936107635498,
"margin_dpo/margin_std": 19.834434509277344,
"step": 226
},
{
"epoch": 0.3431594860166289,
"fcm_dpo/beta": 0.0407416932284832,
"fcm_dpo/delta": -0.0931011512875557,
"fcm_dpo/margin": 11.974498748779297,
"fcm_dpo/q_t": 0.3970397412776947,
"grad_norm": 16.93015480041504,
"learning_rate": 4.166922501290729e-07,
"logits/chosen": 0.5116022825241089,
"logits/rejected": 0.4724254906177521,
"logps/chosen": -73.94245147705078,
"logps/ref_chosen": -57.989776611328125,
"logps/ref_rejected": -75.05464172363281,
"logps/rejected": -102.98181915283203,
"loss": 1.1164,
"margin_dpo/margin_mean": 11.97449779510498,
"margin_dpo/margin_std": 20.62078857421875,
"step": 227
},
{
"epoch": 0.34467120181405897,
"fcm_dpo/beta": 0.040374692529439926,
"fcm_dpo/delta": -0.07935923337936401,
"fcm_dpo/margin": 11.78009033203125,
"fcm_dpo/q_t": 0.39624863862991333,
"grad_norm": 16.199413299560547,
"learning_rate": 4.1570452044027405e-07,
"logits/chosen": 0.4898710548877716,
"logits/rejected": 0.4122768044471741,
"logps/chosen": -73.80580139160156,
"logps/ref_chosen": -55.55936813354492,
"logps/ref_rejected": -77.02364349365234,
"logps/rejected": -107.0501708984375,
"loss": 1.0911,
"margin_dpo/margin_mean": 11.78009033203125,
"margin_dpo/margin_std": 18.271432876586914,
"step": 228
},
{
"epoch": 0.34618291761148906,
"fcm_dpo/beta": 0.03962048888206482,
"fcm_dpo/delta": -0.06741949170827866,
"fcm_dpo/margin": 11.715319633483887,
"fcm_dpo/q_t": 0.3975101113319397,
"grad_norm": 32.47303009033203,
"learning_rate": 4.147121556398312e-07,
"logits/chosen": 0.5499299764633179,
"logits/rejected": 0.48605582118034363,
"logps/chosen": -64.823974609375,
"logps/ref_chosen": -50.79466247558594,
"logps/ref_rejected": -78.4474105834961,
"logps/rejected": -104.1920394897461,
"loss": 1.1338,
"margin_dpo/margin_mean": 11.715319633483887,
"margin_dpo/margin_std": 21.123619079589844,
"step": 229
},
{
"epoch": 0.3476946334089191,
"fcm_dpo/beta": 0.039171136915683746,
"fcm_dpo/delta": -0.12602832913398743,
"fcm_dpo/margin": 13.247392654418945,
"fcm_dpo/q_t": 0.38648897409439087,
"grad_norm": 16.753572463989258,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 0.44350725412368774,
"logits/rejected": 0.44191017746925354,
"logps/chosen": -73.05119323730469,
"logps/ref_chosen": -56.729225158691406,
"logps/ref_rejected": -62.99180603027344,
"logps/rejected": -92.56117248535156,
"loss": 1.0576,
"margin_dpo/margin_mean": 13.247393608093262,
"margin_dpo/margin_std": 18.401546478271484,
"step": 230
},
{
"epoch": 0.3492063492063492,
"fcm_dpo/beta": 0.03659620136022568,
"fcm_dpo/delta": -0.3143787980079651,
"fcm_dpo/margin": 18.866680145263672,
"fcm_dpo/q_t": 0.34471791982650757,
"grad_norm": 17.6395263671875,
"learning_rate": 4.1271363186719835e-07,
"logits/chosen": 0.40046536922454834,
"logits/rejected": 0.3871019780635834,
"logps/chosen": -92.10427856445312,
"logps/ref_chosen": -72.59709930419922,
"logps/ref_rejected": -86.2322998046875,
"logps/rejected": -124.60616302490234,
"loss": 0.9328,
"margin_dpo/margin_mean": 18.866680145263672,
"margin_dpo/margin_std": 19.989635467529297,
"step": 231
},
{
"epoch": 0.3507180650037793,
"fcm_dpo/beta": 0.03585398569703102,
"fcm_dpo/delta": -0.06070077791810036,
"fcm_dpo/margin": 12.77048110961914,
"fcm_dpo/q_t": 0.4032752513885498,
"grad_norm": 17.137638092041016,
"learning_rate": 4.1170752879801436e-07,
"logits/chosen": 0.4340604245662689,
"logits/rejected": 0.4048748016357422,
"logps/chosen": -87.20597839355469,
"logps/ref_chosen": -68.1185302734375,
"logps/ref_rejected": -83.79415893554688,
"logps/rejected": -115.652099609375,
"loss": 1.1293,
"margin_dpo/margin_mean": 12.77048110961914,
"margin_dpo/margin_std": 22.848384857177734,
"step": 232
},
{
"epoch": 0.35222978080120937,
"fcm_dpo/beta": 0.035481683909893036,
"fcm_dpo/delta": 0.021351546049118042,
"fcm_dpo/margin": 10.630614280700684,
"fcm_dpo/q_t": 0.4180358946323395,
"grad_norm": 16.118860244750977,
"learning_rate": 4.106969024216348e-07,
"logits/chosen": 0.48623842000961304,
"logits/rejected": 0.42944014072418213,
"logps/chosen": -77.33733367919922,
"logps/ref_chosen": -55.070152282714844,
"logps/ref_rejected": -66.61845397949219,
"logps/rejected": -99.5162353515625,
"loss": 1.1666,
"margin_dpo/margin_mean": 10.630615234375,
"margin_dpo/margin_std": 19.464500427246094,
"step": 233
},
{
"epoch": 0.35374149659863946,
"fcm_dpo/beta": 0.03642081841826439,
"fcm_dpo/delta": 0.003797471523284912,
"fcm_dpo/margin": 10.787586212158203,
"fcm_dpo/q_t": 0.41657352447509766,
"grad_norm": 18.763879776000977,
"learning_rate": 4.09681781007452e-07,
"logits/chosen": 0.39658474922180176,
"logits/rejected": 0.38530460000038147,
"logps/chosen": -76.36502075195312,
"logps/ref_chosen": -55.92589569091797,
"logps/ref_rejected": -51.11608123779297,
"logps/rejected": -82.3427963256836,
"loss": 1.193,
"margin_dpo/margin_mean": 10.787586212158203,
"margin_dpo/margin_std": 21.66558074951172,
"step": 234
},
{
"epoch": 0.35525321239606955,
"fcm_dpo/beta": 0.03478018939495087,
"fcm_dpo/delta": -0.23790514469146729,
"fcm_dpo/margin": 17.92596435546875,
"fcm_dpo/q_t": 0.35996830463409424,
"grad_norm": 16.978939056396484,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": 0.510696291923523,
"logits/rejected": 0.49337536096572876,
"logps/chosen": -80.82229614257812,
"logps/ref_chosen": -64.53972625732422,
"logps/ref_rejected": -77.69151306152344,
"logps/rejected": -111.9000473022461,
"loss": 0.9688,
"margin_dpo/margin_mean": 17.92596435546875,
"margin_dpo/margin_std": 20.20526123046875,
"step": 235
},
{
"epoch": 0.35676492819349964,
"fcm_dpo/beta": 0.03382885456085205,
"fcm_dpo/delta": -0.0744684636592865,
"fcm_dpo/margin": 13.915045738220215,
"fcm_dpo/q_t": 0.3973722457885742,
"grad_norm": 15.759313583374023,
"learning_rate": 4.076381667711306e-07,
"logits/chosen": 0.4519270658493042,
"logits/rejected": 0.4385327100753784,
"logps/chosen": -97.88923645019531,
"logps/ref_chosen": -71.15473937988281,
"logps/ref_rejected": -84.88541412353516,
"logps/rejected": -125.53496551513672,
"loss": 1.1185,
"margin_dpo/margin_mean": 13.915045738220215,
"margin_dpo/margin_std": 23.799827575683594,
"step": 236
},
{
"epoch": 0.35827664399092973,
"fcm_dpo/beta": 0.033125244081020355,
"fcm_dpo/delta": -0.13403069972991943,
"fcm_dpo/margin": 15.910423278808594,
"fcm_dpo/q_t": 0.38460463285446167,
"grad_norm": 18.07618522644043,
"learning_rate": 4.066097311132753e-07,
"logits/chosen": 0.5179036259651184,
"logits/rejected": 0.5056600570678711,
"logps/chosen": -99.39932250976562,
"logps/ref_chosen": -76.14201354980469,
"logps/ref_rejected": -80.88479614257812,
"logps/rejected": -120.05252838134766,
"loss": 1.0734,
"margin_dpo/margin_mean": 15.910423278808594,
"margin_dpo/margin_std": 23.904003143310547,
"step": 237
},
{
"epoch": 0.35978835978835977,
"fcm_dpo/beta": 0.03213762491941452,
"fcm_dpo/delta": -0.1272638738155365,
"fcm_dpo/margin": 16.177248001098633,
"fcm_dpo/q_t": 0.3859345018863678,
"grad_norm": 25.455785751342773,
"learning_rate": 4.0557691474458414e-07,
"logits/chosen": 0.4519047737121582,
"logits/rejected": 0.43929409980773926,
"logps/chosen": -89.9517593383789,
"logps/ref_chosen": -68.88484954833984,
"logps/ref_rejected": -75.8946304321289,
"logps/rejected": -113.13878631591797,
"loss": 1.0625,
"margin_dpo/margin_mean": 16.177248001098633,
"margin_dpo/margin_std": 23.426652908325195,
"step": 238
},
{
"epoch": 0.36130007558578986,
"fcm_dpo/beta": 0.03170093148946762,
"fcm_dpo/delta": -0.12815909087657928,
"fcm_dpo/margin": 16.441516876220703,
"fcm_dpo/q_t": 0.38694924116134644,
"grad_norm": 17.88521385192871,
"learning_rate": 4.045397465551513e-07,
"logits/chosen": 0.5872991681098938,
"logits/rejected": 0.46438801288604736,
"logps/chosen": -82.54875183105469,
"logps/ref_chosen": -56.771827697753906,
"logps/ref_rejected": -116.23050689697266,
"logps/rejected": -158.44894409179688,
"loss": 1.0754,
"margin_dpo/margin_mean": 16.441518783569336,
"margin_dpo/margin_std": 24.606647491455078,
"step": 239
},
{
"epoch": 0.36281179138321995,
"fcm_dpo/beta": 0.03009945899248123,
"fcm_dpo/delta": -0.2575973570346832,
"fcm_dpo/margin": 21.3055419921875,
"fcm_dpo/q_t": 0.35840314626693726,
"grad_norm": 13.624947547912598,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 0.5199910998344421,
"logits/rejected": 0.4287125766277313,
"logps/chosen": -78.27786254882812,
"logps/ref_chosen": -53.35411071777344,
"logps/ref_rejected": -80.12019348144531,
"logps/rejected": -126.34949493408203,
"loss": 0.9669,
"margin_dpo/margin_mean": 21.3055419921875,
"margin_dpo/margin_std": 24.555896759033203,
"step": 240
},
{
"epoch": 0.36432350718065004,
"fcm_dpo/beta": 0.02971363626420498,
"fcm_dpo/delta": -0.013462748378515244,
"fcm_dpo/margin": 13.889959335327148,
"fcm_dpo/q_t": 0.41003936529159546,
"grad_norm": 17.35210418701172,
"learning_rate": 4.0245247088227377e-07,
"logits/chosen": 0.48497307300567627,
"logits/rejected": 0.44994428753852844,
"logps/chosen": -97.29027557373047,
"logps/ref_chosen": -71.89541625976562,
"logps/ref_rejected": -83.03492736816406,
"logps/rejected": -122.31974792480469,
"loss": 1.1403,
"margin_dpo/margin_mean": 13.889958381652832,
"margin_dpo/margin_std": 24.697525024414062,
"step": 241
},
{
"epoch": 0.36583522297808013,
"fcm_dpo/beta": 0.028537161648273468,
"fcm_dpo/delta": -0.18422536551952362,
"fcm_dpo/margin": 19.995790481567383,
"fcm_dpo/q_t": 0.37522459030151367,
"grad_norm": 13.32011890411377,
"learning_rate": 4.0140242178441665e-07,
"logits/chosen": 0.4771093726158142,
"logits/rejected": 0.4567781090736389,
"logps/chosen": -84.04574584960938,
"logps/ref_chosen": -57.927433013916016,
"logps/ref_rejected": -67.838623046875,
"logps/rejected": -113.95272827148438,
"loss": 1.0272,
"margin_dpo/margin_mean": 19.99578857421875,
"margin_dpo/margin_std": 26.637714385986328,
"step": 242
},
{
"epoch": 0.3673469387755102,
"fcm_dpo/beta": 0.028129760175943375,
"fcm_dpo/delta": -0.09357127547264099,
"fcm_dpo/margin": 17.38387107849121,
"fcm_dpo/q_t": 0.39220672845840454,
"grad_norm": 17.161989212036133,
"learning_rate": 4.003481376353596e-07,
"logits/chosen": 0.49265414476394653,
"logits/rejected": 0.4954020380973816,
"logps/chosen": -100.76765441894531,
"logps/ref_chosen": -74.27667236328125,
"logps/ref_rejected": -73.24340057373047,
"logps/rejected": -117.11825561523438,
"loss": 1.0782,
"margin_dpo/margin_mean": 17.38387107849121,
"margin_dpo/margin_std": 26.074810028076172,
"step": 243
},
{
"epoch": 0.3688586545729403,
"fcm_dpo/beta": 0.027078591287136078,
"fcm_dpo/delta": -0.2479928731918335,
"fcm_dpo/margin": 23.371715545654297,
"fcm_dpo/q_t": 0.35928937792778015,
"grad_norm": 15.457412719726562,
"learning_rate": 3.9928964792569654e-07,
"logits/chosen": 0.5174931883811951,
"logits/rejected": 0.43636053800582886,
"logps/chosen": -80.19678497314453,
"logps/ref_chosen": -53.36390686035156,
"logps/ref_rejected": -71.10276794433594,
"logps/rejected": -121.307373046875,
"loss": 0.9595,
"margin_dpo/margin_mean": 23.371713638305664,
"margin_dpo/margin_std": 25.55000114440918,
"step": 244
},
{
"epoch": 0.37037037037037035,
"fcm_dpo/beta": 0.02543710544705391,
"fcm_dpo/delta": -0.30314117670059204,
"fcm_dpo/margin": 26.81406021118164,
"fcm_dpo/q_t": 0.34784555435180664,
"grad_norm": 18.74749755859375,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": 0.5679333209991455,
"logits/rejected": 0.5423753261566162,
"logps/chosen": -100.21349334716797,
"logps/ref_chosen": -71.19510650634766,
"logps/ref_rejected": -80.76235961914062,
"logps/rejected": -136.59481811523438,
"loss": 0.9243,
"margin_dpo/margin_mean": 26.81406021118164,
"margin_dpo/margin_std": 27.46959686279297,
"step": 245
},
{
"epoch": 0.37188208616780044,
"fcm_dpo/beta": 0.024606363847851753,
"fcm_dpo/delta": -0.16271455585956573,
"fcm_dpo/margin": 22.51306915283203,
"fcm_dpo/q_t": 0.38002920150756836,
"grad_norm": 17.689266204833984,
"learning_rate": 3.971601703742932e-07,
"logits/chosen": 0.5599805116653442,
"logits/rejected": 0.5014970302581787,
"logps/chosen": -106.98703002929688,
"logps/ref_chosen": -71.62104797363281,
"logps/ref_rejected": -94.03392028808594,
"logps/rejected": -151.9129638671875,
"loss": 1.0631,
"margin_dpo/margin_mean": 22.5130672454834,
"margin_dpo/margin_std": 33.42694854736328,
"step": 246
},
{
"epoch": 0.37339380196523053,
"fcm_dpo/beta": 0.0247543603181839,
"fcm_dpo/delta": 0.10083875060081482,
"fcm_dpo/margin": 12.170625686645508,
"fcm_dpo/q_t": 0.4334149956703186,
"grad_norm": 18.4669132232666,
"learning_rate": 3.960892420986177e-07,
"logits/chosen": 0.5527133941650391,
"logits/rejected": 0.5427131056785583,
"logps/chosen": -120.41569519042969,
"logps/ref_chosen": -80.02254486083984,
"logps/ref_rejected": -89.22705841064453,
"logps/rejected": -141.79083251953125,
"loss": 1.2315,
"margin_dpo/margin_mean": 12.170624732971191,
"margin_dpo/margin_std": 29.19440460205078,
"step": 247
},
{
"epoch": 0.3749055177626606,
"fcm_dpo/beta": 0.024207081645727158,
"fcm_dpo/delta": -0.18463271856307983,
"fcm_dpo/margin": 23.729291915893555,
"fcm_dpo/q_t": 0.3793613910675049,
"grad_norm": 15.274466514587402,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": 0.5400315523147583,
"logits/rejected": 0.5853956937789917,
"logps/chosen": -99.80722045898438,
"logps/ref_chosen": -65.37796020507812,
"logps/ref_rejected": -61.365787506103516,
"logps/rejected": -119.52434539794922,
"loss": 1.0573,
"margin_dpo/margin_mean": 23.729293823242188,
"margin_dpo/margin_std": 35.37153625488281,
"step": 248
},
{
"epoch": 0.3764172335600907,
"fcm_dpo/beta": 0.024627620354294777,
"fcm_dpo/delta": 0.16489864885807037,
"fcm_dpo/margin": 9.622716903686523,
"fcm_dpo/q_t": 0.4474208354949951,
"grad_norm": 21.90114974975586,
"learning_rate": 3.9393515632731094e-07,
"logits/chosen": 0.5731900930404663,
"logits/rejected": 0.6100406646728516,
"logps/chosen": -120.05014038085938,
"logps/ref_chosen": -74.60145568847656,
"logps/ref_rejected": -63.79338455200195,
"logps/rejected": -118.86479187011719,
"loss": 1.3203,
"margin_dpo/margin_mean": 9.622716903686523,
"margin_dpo/margin_std": 32.57998275756836,
"step": 249
},
{
"epoch": 0.3779289493575208,
"fcm_dpo/beta": 0.024052519351243973,
"fcm_dpo/delta": -0.18917813897132874,
"fcm_dpo/margin": 24.05508041381836,
"fcm_dpo/q_t": 0.37193530797958374,
"grad_norm": 16.015880584716797,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 0.6647765636444092,
"logits/rejected": 0.6217361092567444,
"logps/chosen": -101.03977966308594,
"logps/ref_chosen": -61.938209533691406,
"logps/ref_rejected": -72.21602630615234,
"logps/rejected": -135.3726806640625,
"loss": 1.0291,
"margin_dpo/margin_mean": 24.05508041381836,
"margin_dpo/margin_std": 32.849769592285156,
"step": 250
},
{
"epoch": 0.3794406651549509,
"fcm_dpo/beta": 0.023839669302105904,
"fcm_dpo/delta": 0.03268512338399887,
"fcm_dpo/margin": 15.458717346191406,
"fcm_dpo/q_t": 0.4200361967086792,
"grad_norm": 23.502283096313477,
"learning_rate": 3.9176496596569265e-07,
"logits/chosen": 0.6419062614440918,
"logits/rejected": 0.6014778017997742,
"logps/chosen": -111.71145629882812,
"logps/ref_chosen": -66.85694885253906,
"logps/ref_rejected": -84.83396911621094,
"logps/rejected": -145.14718627929688,
"loss": 1.2018,
"margin_dpo/margin_mean": 15.458715438842773,
"margin_dpo/margin_std": 33.98907470703125,
"step": 251
},
{
"epoch": 0.38095238095238093,
"fcm_dpo/beta": 0.024377018213272095,
"fcm_dpo/delta": 0.03264795243740082,
"fcm_dpo/margin": 14.927447319030762,
"fcm_dpo/q_t": 0.4222317039966583,
"grad_norm": 27.788288116455078,
"learning_rate": 3.9067390737445254e-07,
"logits/chosen": 0.5485316514968872,
"logits/rejected": 0.49520280957221985,
"logps/chosen": -97.87835693359375,
"logps/ref_chosen": -56.22393035888672,
"logps/ref_rejected": -77.1136245727539,
"logps/rejected": -133.69549560546875,
"loss": 1.2416,
"margin_dpo/margin_mean": 14.927447319030762,
"margin_dpo/margin_std": 35.03778076171875,
"step": 252
},
{
"epoch": 0.382464096749811,
"fcm_dpo/beta": 0.023830143734812737,
"fcm_dpo/delta": -0.001905880868434906,
"fcm_dpo/margin": 16.788637161254883,
"fcm_dpo/q_t": 0.41427648067474365,
"grad_norm": 20.045818328857422,
"learning_rate": 3.8957891383162304e-07,
"logits/chosen": 0.63853919506073,
"logits/rejected": 0.5978012084960938,
"logps/chosen": -95.88780212402344,
"logps/ref_chosen": -52.21001434326172,
"logps/ref_rejected": -58.75764846801758,
"logps/rejected": -119.22407531738281,
"loss": 1.1678,
"margin_dpo/margin_mean": 16.788637161254883,
"margin_dpo/margin_std": 32.0192985534668,
"step": 253
},
{
"epoch": 0.3839758125472411,
"fcm_dpo/beta": 0.02359645627439022,
"fcm_dpo/delta": -0.09121982753276825,
"fcm_dpo/margin": 20.583770751953125,
"fcm_dpo/q_t": 0.3972465395927429,
"grad_norm": 16.444860458374023,
"learning_rate": 3.884800159665276e-07,
"logits/chosen": 0.5922572016716003,
"logits/rejected": 0.5414531230926514,
"logps/chosen": -111.32112121582031,
"logps/ref_chosen": -65.63632202148438,
"logps/ref_rejected": -82.34425354003906,
"logps/rejected": -148.61280822753906,
"loss": 1.1023,
"margin_dpo/margin_mean": 20.583770751953125,
"margin_dpo/margin_std": 33.7872314453125,
"step": 254
},
{
"epoch": 0.3854875283446712,
"fcm_dpo/beta": 0.023110289126634598,
"fcm_dpo/delta": -0.13200685381889343,
"fcm_dpo/margin": 22.705629348754883,
"fcm_dpo/q_t": 0.38903963565826416,
"grad_norm": 23.491275787353516,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": 0.5693901777267456,
"logits/rejected": 0.5399304032325745,
"logps/chosen": -112.29985046386719,
"logps/ref_chosen": -67.91108703613281,
"logps/ref_rejected": -83.89114379882812,
"logps/rejected": -150.98553466796875,
"loss": 1.0989,
"margin_dpo/margin_mean": 22.705629348754883,
"margin_dpo/margin_std": 38.15801239013672,
"step": 255
},
{
"epoch": 0.3869992441421013,
"fcm_dpo/beta": 0.022572454065084457,
"fcm_dpo/delta": -0.10266627371311188,
"fcm_dpo/margin": 22.016427993774414,
"fcm_dpo/q_t": 0.3943637013435364,
"grad_norm": 19.359926223754883,
"learning_rate": 3.862706303320329e-07,
"logits/chosen": 0.5768101215362549,
"logits/rejected": 0.516845703125,
"logps/chosen": -114.48297119140625,
"logps/ref_chosen": -63.49998474121094,
"logps/ref_rejected": -90.77104187011719,
"logps/rejected": -163.7704620361328,
"loss": 1.1265,
"margin_dpo/margin_mean": 22.016427993774414,
"margin_dpo/margin_std": 39.41869354248047,
"step": 256
},
{
"epoch": 0.3885109599395314,
"fcm_dpo/beta": 0.022011350840330124,
"fcm_dpo/delta": -0.14289262890815735,
"fcm_dpo/margin": 24.298580169677734,
"fcm_dpo/q_t": 0.3878602981567383,
"grad_norm": 16.987897872924805,
"learning_rate": 3.851602043638994e-07,
"logits/chosen": 0.5744519233703613,
"logits/rejected": 0.5112959742546082,
"logps/chosen": -121.80198669433594,
"logps/ref_chosen": -70.60064697265625,
"logps/ref_rejected": -108.58313751220703,
"logps/rejected": -184.0830535888672,
"loss": 1.0894,
"margin_dpo/margin_mean": 24.298580169677734,
"margin_dpo/margin_std": 39.85075378417969,
"step": 257
},
{
"epoch": 0.3900226757369615,
"fcm_dpo/beta": 0.02168167755007744,
"fcm_dpo/delta": -0.07299736142158508,
"fcm_dpo/margin": 21.659503936767578,
"fcm_dpo/q_t": 0.39374053478240967,
"grad_norm": 16.47481918334961,
"learning_rate": 3.840459976743023e-07,
"logits/chosen": 0.6185115575790405,
"logits/rejected": 0.5693656206130981,
"logps/chosen": -109.0198974609375,
"logps/ref_chosen": -59.25416564941406,
"logps/ref_rejected": -85.58709716796875,
"logps/rejected": -157.0123291015625,
"loss": 1.0654,
"margin_dpo/margin_mean": 21.659503936767578,
"margin_dpo/margin_std": 29.831501007080078,
"step": 258
},
{
"epoch": 0.3915343915343915,
"fcm_dpo/beta": 0.02049821801483631,
"fcm_dpo/delta": -0.3130166530609131,
"fcm_dpo/margin": 33.63237762451172,
"fcm_dpo/q_t": 0.3474535346031189,
"grad_norm": 14.128410339355469,
"learning_rate": 3.8292804142999796e-07,
"logits/chosen": 0.5682635307312012,
"logits/rejected": 0.4675600528717041,
"logps/chosen": -106.17240142822266,
"logps/ref_chosen": -65.43487548828125,
"logps/ref_rejected": -95.41731262207031,
"logps/rejected": -169.78720092773438,
"loss": 0.9589,
"margin_dpo/margin_mean": 33.63237762451172,
"margin_dpo/margin_std": 39.22385787963867,
"step": 259
},
{
"epoch": 0.3930461073318216,
"fcm_dpo/beta": 0.01985359564423561,
"fcm_dpo/delta": -0.10949759185314178,
"fcm_dpo/margin": 25.3404541015625,
"fcm_dpo/q_t": 0.3917066156864166,
"grad_norm": 16.248342514038086,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 0.5878227949142456,
"logits/rejected": 0.5052345991134644,
"logps/chosen": -91.03826141357422,
"logps/ref_chosen": -49.08958435058594,
"logps/ref_rejected": -79.01708221435547,
"logps/rejected": -146.30621337890625,
"loss": 1.0994,
"margin_dpo/margin_mean": 25.3404541015625,
"margin_dpo/margin_std": 41.39429473876953,
"step": 260
},
{
"epoch": 0.3945578231292517,
"fcm_dpo/beta": 0.01975172758102417,
"fcm_dpo/delta": -0.02371894381940365,
"fcm_dpo/margin": 21.40045928955078,
"fcm_dpo/q_t": 0.40781357884407043,
"grad_norm": 17.527528762817383,
"learning_rate": 3.806810054678331e-07,
"logits/chosen": 0.4745985269546509,
"logits/rejected": 0.5027123689651489,
"logps/chosen": -113.92041778564453,
"logps/ref_chosen": -70.87239074707031,
"logps/ref_rejected": -65.01522064208984,
"logps/rejected": -129.4636993408203,
"loss": 1.125,
"margin_dpo/margin_mean": 21.40045928955078,
"margin_dpo/margin_std": 36.23526382446289,
"step": 261
},
{
"epoch": 0.3960695389266818,
"fcm_dpo/beta": 0.019550006836652756,
"fcm_dpo/delta": -0.07667741179466248,
"fcm_dpo/margin": 24.20038604736328,
"fcm_dpo/q_t": 0.39495140314102173,
"grad_norm": 14.670170783996582,
"learning_rate": 3.7955198860439887e-07,
"logits/chosen": 0.6521082520484924,
"logits/rejected": 0.5909339189529419,
"logps/chosen": -110.21141052246094,
"logps/ref_chosen": -67.8706283569336,
"logps/ref_rejected": -88.7205810546875,
"logps/rejected": -155.26174926757812,
"loss": 1.0755,
"margin_dpo/margin_mean": 24.20038604736328,
"margin_dpo/margin_std": 34.9122428894043,
"step": 262
},
{
"epoch": 0.3975812547241119,
"fcm_dpo/beta": 0.019483327865600586,
"fcm_dpo/delta": -0.007016682997345924,
"fcm_dpo/margin": 20.865982055664062,
"fcm_dpo/q_t": 0.4093659520149231,
"grad_norm": 15.283248901367188,
"learning_rate": 3.784193478933516e-07,
"logits/chosen": 0.5626658797264099,
"logits/rejected": 0.45943546295166016,
"logps/chosen": -96.49991607666016,
"logps/ref_chosen": -55.194583892822266,
"logps/ref_rejected": -80.54048156738281,
"logps/rejected": -142.7117919921875,
"loss": 1.1333,
"margin_dpo/margin_mean": 20.865982055664062,
"margin_dpo/margin_std": 35.687191009521484,
"step": 263
},
{
"epoch": 0.39909297052154197,
"fcm_dpo/beta": 0.019220834597945213,
"fcm_dpo/delta": -0.060912348330020905,
"fcm_dpo/margin": 23.83148193359375,
"fcm_dpo/q_t": 0.3992775082588196,
"grad_norm": 15.10071086883545,
"learning_rate": 3.7728311501708674e-07,
"logits/chosen": 0.48142877221107483,
"logits/rejected": 0.4368743300437927,
"logps/chosen": -126.1361312866211,
"logps/ref_chosen": -83.17068481445312,
"logps/ref_rejected": -88.33625793457031,
"logps/rejected": -155.1331787109375,
"loss": 1.1012,
"margin_dpo/margin_mean": 23.83148193359375,
"margin_dpo/margin_std": 37.955535888671875,
"step": 264
},
{
"epoch": 0.40060468631897206,
"fcm_dpo/beta": 0.01891726814210415,
"fcm_dpo/delta": -0.1391075700521469,
"fcm_dpo/margin": 28.10588836669922,
"fcm_dpo/q_t": 0.3853192925453186,
"grad_norm": 14.972970008850098,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": 0.6752257347106934,
"logits/rejected": 0.6097507476806641,
"logps/chosen": -91.71566772460938,
"logps/ref_chosen": -51.66284942626953,
"logps/ref_rejected": -67.1720962524414,
"logps/rejected": -135.330810546875,
"loss": 1.0868,
"margin_dpo/margin_mean": 28.105884552001953,
"margin_dpo/margin_std": 44.18812942504883,
"step": 265
},
{
"epoch": 0.4021164021164021,
"fcm_dpo/beta": 0.018389977514743805,
"fcm_dpo/delta": -0.07914341986179352,
"fcm_dpo/margin": 25.835094451904297,
"fcm_dpo/q_t": 0.39529335498809814,
"grad_norm": 15.214395523071289,
"learning_rate": 3.75e-07,
"logits/chosen": 0.5970737934112549,
"logits/rejected": 0.5253136157989502,
"logps/chosen": -95.74099731445312,
"logps/ref_chosen": -57.45049285888672,
"logps/ref_rejected": -77.60826110839844,
"logps/rejected": -141.73385620117188,
"loss": 1.0845,
"margin_dpo/margin_mean": 25.835094451904297,
"margin_dpo/margin_std": 39.158912658691406,
"step": 266
},
{
"epoch": 0.4036281179138322,
"fcm_dpo/beta": 0.018644969910383224,
"fcm_dpo/delta": 0.04022669047117233,
"fcm_dpo/margin": 19.258594512939453,
"fcm_dpo/q_t": 0.4202066957950592,
"grad_norm": 15.600641250610352,
"learning_rate": 3.738531817228131e-07,
"logits/chosen": 0.6086355447769165,
"logits/rejected": 0.5916974544525146,
"logps/chosen": -88.30766296386719,
"logps/ref_chosen": -55.03535079956055,
"logps/ref_rejected": -66.0953369140625,
"logps/rejected": -118.62623596191406,
"loss": 1.1904,
"margin_dpo/margin_mean": 19.25859260559082,
"margin_dpo/margin_std": 38.7248420715332,
"step": 267
},
{
"epoch": 0.4051398337112623,
"fcm_dpo/beta": 0.018376577645540237,
"fcm_dpo/delta": -0.06248517334461212,
"fcm_dpo/margin": 25.012699127197266,
"fcm_dpo/q_t": 0.39753156900405884,
"grad_norm": 13.161920547485352,
"learning_rate": 3.7270289900589204e-07,
"logits/chosen": 0.4666864275932312,
"logits/rejected": 0.4518453776836395,
"logps/chosen": -98.39913940429688,
"logps/ref_chosen": -65.07174682617188,
"logps/ref_rejected": -71.42485809326172,
"logps/rejected": -129.76495361328125,
"loss": 1.0667,
"margin_dpo/margin_mean": 25.012699127197266,
"margin_dpo/margin_std": 33.614540100097656,
"step": 268
},
{
"epoch": 0.40665154950869237,
"fcm_dpo/beta": 0.018133126199245453,
"fcm_dpo/delta": -0.10752344131469727,
"fcm_dpo/margin": 27.66704559326172,
"fcm_dpo/q_t": 0.38884609937667847,
"grad_norm": 14.423748016357422,
"learning_rate": 3.7154918402511714e-07,
"logits/chosen": 0.7032470703125,
"logits/rejected": 0.6554895639419556,
"logps/chosen": -105.76145935058594,
"logps/ref_chosen": -67.1362075805664,
"logps/ref_rejected": -82.55778503417969,
"logps/rejected": -148.85006713867188,
"loss": 1.0541,
"margin_dpo/margin_mean": 27.66704559326172,
"margin_dpo/margin_std": 36.86224365234375,
"step": 269
},
{
"epoch": 0.40816326530612246,
"fcm_dpo/beta": 0.017861198633909225,
"fcm_dpo/delta": 0.01838039606809616,
"fcm_dpo/margin": 21.38981819152832,
"fcm_dpo/q_t": 0.4144758880138397,
"grad_norm": 14.856551170349121,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 0.6661429405212402,
"logits/rejected": 0.5874545574188232,
"logps/chosen": -105.91307067871094,
"logps/ref_chosen": -66.6886978149414,
"logps/ref_rejected": -85.16129302978516,
"logps/rejected": -145.77548217773438,
"loss": 1.1569,
"margin_dpo/margin_mean": 21.38981819152832,
"margin_dpo/margin_std": 39.64323806762695,
"step": 270
},
{
"epoch": 0.40967498110355255,
"fcm_dpo/beta": 0.018035490065813065,
"fcm_dpo/delta": 0.05983828008174896,
"fcm_dpo/margin": 18.96731185913086,
"fcm_dpo/q_t": 0.4302310645580292,
"grad_norm": 16.447065353393555,
"learning_rate": 3.692315864546635e-07,
"logits/chosen": 0.6576748490333557,
"logits/rejected": 0.5956451892852783,
"logps/chosen": -110.25750732421875,
"logps/ref_chosen": -72.40754699707031,
"logps/ref_rejected": -92.06311798095703,
"logps/rejected": -148.88038635253906,
"loss": 1.2239,
"margin_dpo/margin_mean": 18.96731185913086,
"margin_dpo/margin_std": 45.06271743774414,
"step": 271
},
{
"epoch": 0.41118669690098264,
"fcm_dpo/beta": 0.017494186758995056,
"fcm_dpo/delta": -0.27478447556495667,
"fcm_dpo/margin": 37.57149124145508,
"fcm_dpo/q_t": 0.351974219083786,
"grad_norm": 16.66138458251953,
"learning_rate": 3.6806776869317067e-07,
"logits/chosen": 0.6409205198287964,
"logits/rejected": 0.657590389251709,
"logps/chosen": -97.38768768310547,
"logps/ref_chosen": -66.60140228271484,
"logps/ref_rejected": -67.74340057373047,
"logps/rejected": -136.10118103027344,
"loss": 0.9218,
"margin_dpo/margin_mean": 37.57149124145508,
"margin_dpo/margin_std": 35.830421447753906,
"step": 272
},
{
"epoch": 0.4126984126984127,
"fcm_dpo/beta": 0.01705990359187126,
"fcm_dpo/delta": -0.038921333849430084,
"fcm_dpo/margin": 25.61779022216797,
"fcm_dpo/q_t": 0.4043377637863159,
"grad_norm": 17.23778533935547,
"learning_rate": 3.669006483223828e-07,
"logits/chosen": 0.6578247547149658,
"logits/rejected": 0.5909750461578369,
"logps/chosen": -102.54705810546875,
"logps/ref_chosen": -57.35487747192383,
"logps/ref_rejected": -84.17168426513672,
"logps/rejected": -154.98165893554688,
"loss": 1.165,
"margin_dpo/margin_mean": 25.617786407470703,
"margin_dpo/margin_std": 50.37907791137695,
"step": 273
},
{
"epoch": 0.41421012849584277,
"fcm_dpo/beta": 0.016863549128174782,
"fcm_dpo/delta": -0.0922776535153389,
"fcm_dpo/margin": 28.930538177490234,
"fcm_dpo/q_t": 0.39474034309387207,
"grad_norm": 14.447813034057617,
"learning_rate": 3.657302579891656e-07,
"logits/chosen": 0.4977789521217346,
"logits/rejected": 0.48191457986831665,
"logps/chosen": -104.21945190429688,
"logps/ref_chosen": -59.64149475097656,
"logps/ref_rejected": -68.29348754882812,
"logps/rejected": -141.80197143554688,
"loss": 1.1082,
"margin_dpo/margin_mean": 28.930538177490234,
"margin_dpo/margin_std": 48.47543716430664,
"step": 274
},
{
"epoch": 0.41572184429327286,
"fcm_dpo/beta": 0.01651889830827713,
"fcm_dpo/delta": -0.11709671467542648,
"fcm_dpo/margin": 30.950244903564453,
"fcm_dpo/q_t": 0.385990709066391,
"grad_norm": 14.784261703491211,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": 0.6382570266723633,
"logits/rejected": 0.5556176900863647,
"logps/chosen": -96.62824249267578,
"logps/ref_chosen": -53.26664352416992,
"logps/ref_rejected": -73.84062194824219,
"logps/rejected": -148.1524658203125,
"loss": 1.0538,
"margin_dpo/margin_mean": 30.950244903564453,
"margin_dpo/margin_std": 42.797706604003906,
"step": 275
},
{
"epoch": 0.41723356009070295,
"fcm_dpo/beta": 0.016219474375247955,
"fcm_dpo/delta": -0.05831969529390335,
"fcm_dpo/margin": 28.092674255371094,
"fcm_dpo/q_t": 0.398104727268219,
"grad_norm": 16.444677352905273,
"learning_rate": 3.633797984793294e-07,
"logits/chosen": 0.5698527097702026,
"logits/rejected": 0.5378223657608032,
"logps/chosen": -94.54618072509766,
"logps/ref_chosen": -53.02079772949219,
"logps/ref_rejected": -61.56678771972656,
"logps/rejected": -131.18484497070312,
"loss": 1.0929,
"margin_dpo/margin_mean": 28.092670440673828,
"margin_dpo/margin_std": 42.767860412597656,
"step": 276
},
{
"epoch": 0.41874527588813304,
"fcm_dpo/beta": 0.01643621176481247,
"fcm_dpo/delta": 0.09840987622737885,
"fcm_dpo/margin": 18.521482467651367,
"fcm_dpo/q_t": 0.43302056193351746,
"grad_norm": 18.792949676513672,
"learning_rate": 3.6219979505011555e-07,
"logits/chosen": 0.6819844245910645,
"logits/rejected": 0.7085480690002441,
"logps/chosen": -117.11235046386719,
"logps/ref_chosen": -71.43299102783203,
"logps/ref_rejected": -67.65852355957031,
"logps/rejected": -131.85935974121094,
"loss": 1.2177,
"margin_dpo/margin_mean": 18.521484375,
"margin_dpo/margin_std": 42.374908447265625,
"step": 277
},
{
"epoch": 0.42025699168556313,
"fcm_dpo/beta": 0.01640717126429081,
"fcm_dpo/delta": -0.07881483435630798,
"fcm_dpo/margin": 28.937284469604492,
"fcm_dpo/q_t": 0.3956737518310547,
"grad_norm": 23.435312271118164,
"learning_rate": 3.6101665315144353e-07,
"logits/chosen": 0.5685777068138123,
"logits/rejected": 0.5195118188858032,
"logps/chosen": -118.54839324951172,
"logps/ref_chosen": -67.11076354980469,
"logps/ref_rejected": -88.74851989746094,
"logps/rejected": -169.12344360351562,
"loss": 1.1033,
"margin_dpo/margin_mean": 28.93728256225586,
"margin_dpo/margin_std": 46.60326385498047,
"step": 278
},
{
"epoch": 0.4217687074829932,
"fcm_dpo/beta": 0.01570543460547924,
"fcm_dpo/delta": -0.23970243334770203,
"fcm_dpo/margin": 39.80183410644531,
"fcm_dpo/q_t": 0.3597272038459778,
"grad_norm": 19.260663986206055,
"learning_rate": 3.5983040587833563e-07,
"logits/chosen": 0.587124228477478,
"logits/rejected": 0.5513536334037781,
"logps/chosen": -91.71465301513672,
"logps/ref_chosen": -54.49748611450195,
"logps/ref_rejected": -70.42373657226562,
"logps/rejected": -147.44273376464844,
"loss": 0.9529,
"margin_dpo/margin_mean": 39.80183029174805,
"margin_dpo/margin_std": 41.225624084472656,
"step": 279
},
{
"epoch": 0.42328042328042326,
"fcm_dpo/beta": 0.014930122531950474,
"fcm_dpo/delta": -0.24701552093029022,
"fcm_dpo/margin": 42.298980712890625,
"fcm_dpo/q_t": 0.35863977670669556,
"grad_norm": 11.754053115844727,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 0.6465336084365845,
"logits/rejected": 0.6081722378730774,
"logps/chosen": -101.64505004882812,
"logps/ref_chosen": -60.43281173706055,
"logps/ref_rejected": -78.39051818847656,
"logps/rejected": -161.9017333984375,
"loss": 0.9399,
"margin_dpo/margin_mean": 42.298980712890625,
"margin_dpo/margin_std": 42.43488311767578,
"step": 280
},
{
"epoch": 0.42479213907785335,
"fcm_dpo/beta": 0.014409145340323448,
"fcm_dpo/delta": -0.15759529173374176,
"fcm_dpo/margin": 38.107269287109375,
"fcm_dpo/q_t": 0.37785613536834717,
"grad_norm": 12.64098072052002,
"learning_rate": 3.574487280222929e-07,
"logits/chosen": 0.6242285966873169,
"logits/rejected": 0.6461759805679321,
"logps/chosen": -106.43411254882812,
"logps/ref_chosen": -60.2820930480957,
"logps/ref_rejected": -62.04009246826172,
"logps/rejected": -146.29937744140625,
"loss": 1.0218,
"margin_dpo/margin_mean": 38.107269287109375,
"margin_dpo/margin_std": 48.40364074707031,
"step": 281
},
{
"epoch": 0.42630385487528344,
"fcm_dpo/beta": 0.014241490513086319,
"fcm_dpo/delta": -0.07853139936923981,
"fcm_dpo/margin": 33.2713737487793,
"fcm_dpo/q_t": 0.39666303992271423,
"grad_norm": 18.873950958251953,
"learning_rate": 3.562533640600075e-07,
"logits/chosen": 0.5803976058959961,
"logits/rejected": 0.5337048172950745,
"logps/chosen": -113.38694763183594,
"logps/ref_chosen": -60.623924255371094,
"logps/ref_rejected": -68.67400360107422,
"logps/rejected": -154.70840454101562,
"loss": 1.1035,
"margin_dpo/margin_mean": 33.2713737487793,
"margin_dpo/margin_std": 52.381553649902344,
"step": 282
},
{
"epoch": 0.42781557067271353,
"fcm_dpo/beta": 0.014045214280486107,
"fcm_dpo/delta": -0.02820678800344467,
"fcm_dpo/margin": 30.39036750793457,
"fcm_dpo/q_t": 0.4052136540412903,
"grad_norm": 15.759411811828613,
"learning_rate": 3.550550279627215e-07,
"logits/chosen": 0.5947866439819336,
"logits/rejected": 0.4988853335380554,
"logps/chosen": -121.33143615722656,
"logps/ref_chosen": -67.64775085449219,
"logps/ref_rejected": -99.96835327148438,
"logps/rejected": -184.04241943359375,
"loss": 1.121,
"margin_dpo/margin_mean": 30.390365600585938,
"margin_dpo/margin_std": 50.31166076660156,
"step": 283
},
{
"epoch": 0.4293272864701436,
"fcm_dpo/beta": 0.013790317811071873,
"fcm_dpo/delta": -0.05972848832607269,
"fcm_dpo/margin": 33.095237731933594,
"fcm_dpo/q_t": 0.396755188703537,
"grad_norm": 12.709166526794434,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": 0.6604640483856201,
"logits/rejected": 0.5998015403747559,
"logps/chosen": -108.848388671875,
"logps/ref_chosen": -56.96742630004883,
"logps/ref_rejected": -86.36236572265625,
"logps/rejected": -171.33856201171875,
"loss": 1.0758,
"margin_dpo/margin_mean": 33.095237731933594,
"margin_dpo/margin_std": 46.18756866455078,
"step": 284
},
{
"epoch": 0.4308390022675737,
"fcm_dpo/beta": 0.013867860659956932,
"fcm_dpo/delta": 0.004719622433185577,
"fcm_dpo/margin": 28.496816635131836,
"fcm_dpo/q_t": 0.4120190143585205,
"grad_norm": 18.109210968017578,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": 0.6804319620132446,
"logits/rejected": 0.654731035232544,
"logps/chosen": -136.22836303710938,
"logps/ref_chosen": -71.65611267089844,
"logps/ref_rejected": -81.63829803466797,
"logps/rejected": -174.70738220214844,
"loss": 1.1343,
"margin_dpo/margin_mean": 28.496816635131836,
"margin_dpo/margin_std": 48.14155578613281,
"step": 285
},
{
"epoch": 0.4323507180650038,
"fcm_dpo/beta": 0.013354543596506119,
"fcm_dpo/delta": -0.21594460308551788,
"fcm_dpo/margin": 45.1060676574707,
"fcm_dpo/q_t": 0.36516904830932617,
"grad_norm": 14.074089050292969,
"learning_rate": 3.514425224712835e-07,
"logits/chosen": 0.5963453054428101,
"logits/rejected": 0.5053284168243408,
"logps/chosen": -119.87309265136719,
"logps/ref_chosen": -61.07952117919922,
"logps/ref_rejected": -91.28128051757812,
"logps/rejected": -195.180908203125,
"loss": 0.9671,
"margin_dpo/margin_mean": 45.1060676574707,
"margin_dpo/margin_std": 48.90450668334961,
"step": 286
},
{
"epoch": 0.43386243386243384,
"fcm_dpo/beta": 0.012879462912678719,
"fcm_dpo/delta": -0.19090059399604797,
"fcm_dpo/margin": 45.031578063964844,
"fcm_dpo/q_t": 0.36986613273620605,
"grad_norm": 12.359694480895996,
"learning_rate": 3.502326338516534e-07,
"logits/chosen": 0.6899577379226685,
"logits/rejected": 0.6542201042175293,
"logps/chosen": -94.44883728027344,
"logps/ref_chosen": -46.035789489746094,
"logps/ref_rejected": -59.95293426513672,
"logps/rejected": -153.39755249023438,
"loss": 0.9868,
"margin_dpo/margin_mean": 45.03157424926758,
"margin_dpo/margin_std": 51.3031005859375,
"step": 287
},
{
"epoch": 0.43537414965986393,
"fcm_dpo/beta": 0.012728270143270493,
"fcm_dpo/delta": -0.017540642991662025,
"fcm_dpo/margin": 32.744781494140625,
"fcm_dpo/q_t": 0.40644484758377075,
"grad_norm": 14.533074378967285,
"learning_rate": 3.490199415097892e-07,
"logits/chosen": 0.5365294218063354,
"logits/rejected": 0.4841228723526001,
"logps/chosen": -129.072021484375,
"logps/ref_chosen": -65.3908462524414,
"logps/ref_rejected": -88.53607940673828,
"logps/rejected": -184.96202087402344,
"loss": 1.1158,
"margin_dpo/margin_mean": 32.744781494140625,
"margin_dpo/margin_std": 52.35576629638672,
"step": 288
},
{
"epoch": 0.436885865457294,
"fcm_dpo/beta": 0.012797607108950615,
"fcm_dpo/delta": 0.02153756096959114,
"fcm_dpo/margin": 29.594276428222656,
"fcm_dpo/q_t": 0.4176866412162781,
"grad_norm": 18.0996036529541,
"learning_rate": 3.4780447936730247e-07,
"logits/chosen": 0.7580336928367615,
"logits/rejected": 0.7205421924591064,
"logps/chosen": -118.97895050048828,
"logps/ref_chosen": -54.5936279296875,
"logps/ref_rejected": -67.20855712890625,
"logps/rejected": -161.1881561279297,
"loss": 1.1574,
"margin_dpo/margin_mean": 29.59427833557129,
"margin_dpo/margin_std": 54.214630126953125,
"step": 289
},
{
"epoch": 0.4383975812547241,
"fcm_dpo/beta": 0.012573182582855225,
"fcm_dpo/delta": -0.06713174283504486,
"fcm_dpo/margin": 36.85968780517578,
"fcm_dpo/q_t": 0.3965286612510681,
"grad_norm": 16.59284019470215,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 0.7414308786392212,
"logits/rejected": 0.6713223457336426,
"logps/chosen": -135.12689208984375,
"logps/ref_chosen": -61.38457489013672,
"logps/ref_rejected": -91.92778015136719,
"logps/rejected": -202.52978515625,
"loss": 1.0945,
"margin_dpo/margin_mean": 36.85968780517578,
"margin_dpo/margin_std": 56.981605529785156,
"step": 290
},
{
"epoch": 0.4399092970521542,
"fcm_dpo/beta": 0.012540910392999649,
"fcm_dpo/delta": -0.08436623215675354,
"fcm_dpo/margin": 38.22565841674805,
"fcm_dpo/q_t": 0.3934711813926697,
"grad_norm": 16.229829788208008,
"learning_rate": 3.4536538175334343e-07,
"logits/chosen": 0.8189216256141663,
"logits/rejected": 0.7507155537605286,
"logps/chosen": -121.83723449707031,
"logps/ref_chosen": -50.863037109375,
"logps/ref_rejected": -82.20868682861328,
"logps/rejected": -191.40853881835938,
"loss": 1.0669,
"margin_dpo/margin_mean": 38.22565841674805,
"margin_dpo/margin_std": 51.421878814697266,
"step": 291
},
{
"epoch": 0.4414210128495843,
"fcm_dpo/beta": 0.012391122058033943,
"fcm_dpo/delta": 0.010787010192871094,
"fcm_dpo/margin": 31.44447898864746,
"fcm_dpo/q_t": 0.41242778301239014,
"grad_norm": 15.73810863494873,
"learning_rate": 3.4414181450867465e-07,
"logits/chosen": 0.6897194385528564,
"logits/rejected": 0.6420219540596008,
"logps/chosen": -134.3984375,
"logps/ref_chosen": -64.34888458251953,
"logps/ref_rejected": -72.86434173583984,
"logps/rejected": -174.35838317871094,
"loss": 1.1483,
"margin_dpo/margin_mean": 31.444477081298828,
"margin_dpo/margin_std": 56.412925720214844,
"step": 292
},
{
"epoch": 0.4429327286470144,
"fcm_dpo/beta": 0.012167178094387054,
"fcm_dpo/delta": -0.15894976258277893,
"fcm_dpo/margin": 45.242408752441406,
"fcm_dpo/q_t": 0.37786537408828735,
"grad_norm": 11.61641788482666,
"learning_rate": 3.4291561391508185e-07,
"logits/chosen": 0.7814351916313171,
"logits/rejected": 0.6955462694168091,
"logps/chosen": -124.15631103515625,
"logps/ref_chosen": -54.869468688964844,
"logps/ref_rejected": -81.858642578125,
"logps/rejected": -196.3878936767578,
"loss": 1.0418,
"margin_dpo/margin_mean": 45.242408752441406,
"margin_dpo/margin_std": 61.57768249511719,
"step": 293
},
{
"epoch": 0.4444444444444444,
"fcm_dpo/beta": 0.012029530480504036,
"fcm_dpo/delta": 0.03731568530201912,
"fcm_dpo/margin": 30.24942970275879,
"fcm_dpo/q_t": 0.4194733500480652,
"grad_norm": 12.362147331237793,
"learning_rate": 3.4168681427203153e-07,
"logits/chosen": 0.7301532030105591,
"logits/rejected": 0.6848835945129395,
"logps/chosen": -128.99664306640625,
"logps/ref_chosen": -56.670902252197266,
"logps/ref_rejected": -70.32819366455078,
"logps/rejected": -172.9033660888672,
"loss": 1.1386,
"margin_dpo/margin_mean": 30.24942970275879,
"margin_dpo/margin_std": 49.71393585205078,
"step": 294
},
{
"epoch": 0.4459561602418745,
"fcm_dpo/beta": 0.01216411404311657,
"fcm_dpo/delta": 0.051958102732896805,
"fcm_dpo/margin": 28.76223373413086,
"fcm_dpo/q_t": 0.42246508598327637,
"grad_norm": 19.552597045898438,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": 0.6953170299530029,
"logits/rejected": 0.5967302918434143,
"logps/chosen": -128.7552032470703,
"logps/ref_chosen": -50.40088653564453,
"logps/ref_rejected": -83.43521881103516,
"logps/rejected": -190.55178833007812,
"loss": 1.1691,
"margin_dpo/margin_mean": 28.762237548828125,
"margin_dpo/margin_std": 54.42176055908203,
"step": 295
},
{
"epoch": 0.4474678760393046,
"fcm_dpo/beta": 0.01211509294807911,
"fcm_dpo/delta": -0.029373712837696075,
"fcm_dpo/margin": 35.291038513183594,
"fcm_dpo/q_t": 0.4047588109970093,
"grad_norm": 13.090824127197266,
"learning_rate": 3.392215553979679e-07,
"logits/chosen": 0.6591260433197021,
"logits/rejected": 0.6170543432235718,
"logps/chosen": -146.8544921875,
"logps/ref_chosen": -69.15034484863281,
"logps/ref_rejected": -89.60166931152344,
"logps/rejected": -202.59686279296875,
"loss": 1.1128,
"margin_dpo/margin_mean": 35.29104232788086,
"margin_dpo/margin_std": 56.198814392089844,
"step": 296
},
{
"epoch": 0.4489795918367347,
"fcm_dpo/beta": 0.012072188779711723,
"fcm_dpo/delta": -0.08097002655267715,
"fcm_dpo/margin": 39.51006317138672,
"fcm_dpo/q_t": 0.3917747139930725,
"grad_norm": 13.36133861541748,
"learning_rate": 3.3798516512554485e-07,
"logits/chosen": 0.6727302074432373,
"logits/rejected": 0.6169841289520264,
"logps/chosen": -139.768798828125,
"logps/ref_chosen": -58.01630401611328,
"logps/ref_rejected": -69.95780944824219,
"logps/rejected": -191.22036743164062,
"loss": 1.05,
"margin_dpo/margin_mean": 39.51006317138672,
"margin_dpo/margin_std": 49.360862731933594,
"step": 297
},
{
"epoch": 0.4504913076341648,
"fcm_dpo/beta": 0.012033342383801937,
"fcm_dpo/delta": 0.04077546298503876,
"fcm_dpo/margin": 29.974117279052734,
"fcm_dpo/q_t": 0.42090824246406555,
"grad_norm": 13.734335899353027,
"learning_rate": 3.367463137189156e-07,
"logits/chosen": 0.8173831701278687,
"logits/rejected": 0.7571094036102295,
"logps/chosen": -135.49334716796875,
"logps/ref_chosen": -56.1693115234375,
"logps/ref_rejected": -68.55052185058594,
"logps/rejected": -177.84866333007812,
"loss": 1.1813,
"margin_dpo/margin_mean": 29.9741153717041,
"margin_dpo/margin_std": 60.25572204589844,
"step": 298
},
{
"epoch": 0.4520030234315949,
"fcm_dpo/beta": 0.012157764285802841,
"fcm_dpo/delta": 0.08485768735408783,
"fcm_dpo/margin": 26.14179229736328,
"fcm_dpo/q_t": 0.42982053756713867,
"grad_norm": 18.463979721069336,
"learning_rate": 3.355050358314172e-07,
"logits/chosen": 0.5963407754898071,
"logits/rejected": 0.5696459412574768,
"logps/chosen": -140.74490356445312,
"logps/ref_chosen": -62.31780242919922,
"logps/ref_rejected": -72.60028839111328,
"logps/rejected": -177.169189453125,
"loss": 1.2232,
"margin_dpo/margin_mean": 26.14179229736328,
"margin_dpo/margin_std": 59.85368347167969,
"step": 299
},
{
"epoch": 0.45351473922902497,
"fcm_dpo/beta": 0.012246577069163322,
"fcm_dpo/delta": -0.016002114862203598,
"fcm_dpo/margin": 33.91412353515625,
"fcm_dpo/q_t": 0.407207727432251,
"grad_norm": 13.91763973236084,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 0.700993537902832,
"logits/rejected": 0.6361641883850098,
"logps/chosen": -139.4217529296875,
"logps/ref_chosen": -60.38157653808594,
"logps/ref_rejected": -75.45442199707031,
"logps/rejected": -188.40872192382812,
"loss": 1.1333,
"margin_dpo/margin_mean": 33.914119720458984,
"margin_dpo/margin_std": 58.409393310546875,
"step": 300
},
{
"epoch": 0.455026455026455,
"fcm_dpo/beta": 0.01223750039935112,
"fcm_dpo/delta": 0.027313653379678726,
"fcm_dpo/margin": 30.530223846435547,
"fcm_dpo/q_t": 0.41623467206954956,
"grad_norm": 15.060598373413086,
"learning_rate": 3.3301533956555885e-07,
"logits/chosen": 0.7448295950889587,
"logits/rejected": 0.7172563076019287,
"logps/chosen": -129.93959045410156,
"logps/ref_chosen": -52.85089111328125,
"logps/ref_rejected": -69.97584533691406,
"logps/rejected": -177.59475708007812,
"loss": 1.155,
"margin_dpo/margin_mean": 30.530223846435547,
"margin_dpo/margin_std": 55.4693603515625,
"step": 301
},
{
"epoch": 0.4565381708238851,
"fcm_dpo/beta": 0.012510240077972412,
"fcm_dpo/delta": 0.12387596070766449,
"fcm_dpo/margin": 22.355518341064453,
"fcm_dpo/q_t": 0.437045693397522,
"grad_norm": 18.828718185424805,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": 0.5679184198379517,
"logits/rejected": 0.5149757862091064,
"logps/chosen": -148.1495819091797,
"logps/ref_chosen": -66.96650695800781,
"logps/ref_rejected": -88.09510803222656,
"logps/rejected": -191.63369750976562,
"loss": 1.2288,
"margin_dpo/margin_mean": 22.355518341064453,
"margin_dpo/margin_std": 53.01789474487305,
"step": 302
},
{
"epoch": 0.4580498866213152,
"fcm_dpo/beta": 0.012377789244055748,
"fcm_dpo/delta": -0.12394848465919495,
"fcm_dpo/margin": 41.81725311279297,
"fcm_dpo/q_t": 0.38504043221473694,
"grad_norm": 12.159745216369629,
"learning_rate": 3.3051635489464793e-07,
"logits/chosen": 0.6651922464370728,
"logits/rejected": 0.6016709804534912,
"logps/chosen": -133.10816955566406,
"logps/ref_chosen": -62.12152862548828,
"logps/ref_rejected": -90.31204223632812,
"logps/rejected": -203.11593627929688,
"loss": 1.0636,
"margin_dpo/margin_mean": 41.81725311279297,
"margin_dpo/margin_std": 60.54005432128906,
"step": 303
},
{
"epoch": 0.4595616024187453,
"fcm_dpo/beta": 0.012083902955055237,
"fcm_dpo/delta": -0.10522289574146271,
"fcm_dpo/margin": 41.36079406738281,
"fcm_dpo/q_t": 0.3844369053840637,
"grad_norm": 13.707260131835938,
"learning_rate": 3.292634667444117e-07,
"logits/chosen": 0.6590306758880615,
"logits/rejected": 0.6075633764266968,
"logps/chosen": -122.18032836914062,
"logps/ref_chosen": -60.695091247558594,
"logps/ref_rejected": -78.2525405883789,
"logps/rejected": -181.09857177734375,
"loss": 1.0229,
"margin_dpo/margin_mean": 41.36079406738281,
"margin_dpo/margin_std": 46.75843048095703,
"step": 304
},
{
"epoch": 0.46107331821617537,
"fcm_dpo/beta": 0.012013021856546402,
"fcm_dpo/delta": 0.019373510032892227,
"fcm_dpo/margin": 31.71729278564453,
"fcm_dpo/q_t": 0.4142247438430786,
"grad_norm": 13.076567649841309,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": 0.6364206671714783,
"logits/rejected": 0.6650691032409668,
"logps/chosen": -144.49835205078125,
"logps/ref_chosen": -72.69914245605469,
"logps/ref_rejected": -65.65670776367188,
"logps/rejected": -169.1732177734375,
"loss": 1.1557,
"margin_dpo/margin_mean": 31.71729278564453,
"margin_dpo/margin_std": 57.52043151855469,
"step": 305
},
{
"epoch": 0.46258503401360546,
"fcm_dpo/beta": 0.012034446001052856,
"fcm_dpo/delta": 0.01424664631485939,
"fcm_dpo/margin": 32.047447204589844,
"fcm_dpo/q_t": 0.41186851263046265,
"grad_norm": 13.938604354858398,
"learning_rate": 3.267510740432719e-07,
"logits/chosen": 0.723272442817688,
"logits/rejected": 0.611417293548584,
"logps/chosen": -121.28661346435547,
"logps/ref_chosen": -53.97052764892578,
"logps/ref_rejected": -71.02423095703125,
"logps/rejected": -170.3877716064453,
"loss": 1.1107,
"margin_dpo/margin_mean": 32.047447204589844,
"margin_dpo/margin_std": 45.39753723144531,
"step": 306
},
{
"epoch": 0.46409674981103555,
"fcm_dpo/beta": 0.012543787248432636,
"fcm_dpo/delta": 0.20474562048912048,
"fcm_dpo/margin": 15.797273635864258,
"fcm_dpo/q_t": 0.45768019556999207,
"grad_norm": 17.678245544433594,
"learning_rate": 3.2549163976939285e-07,
"logits/chosen": 0.7229036092758179,
"logits/rejected": 0.6749166250228882,
"logps/chosen": -118.67990112304688,
"logps/ref_chosen": -57.413108825683594,
"logps/ref_rejected": -68.68010711669922,
"logps/rejected": -145.74417114257812,
"loss": 1.3206,
"margin_dpo/margin_mean": 15.79727554321289,
"margin_dpo/margin_std": 57.190589904785156,
"step": 307
},
{
"epoch": 0.4656084656084656,
"fcm_dpo/beta": 0.012601923197507858,
"fcm_dpo/delta": 0.013587992638349533,
"fcm_dpo/margin": 30.687833786010742,
"fcm_dpo/q_t": 0.41257303953170776,
"grad_norm": 11.740730285644531,
"learning_rate": 3.2423009383206874e-07,
"logits/chosen": 0.6801770925521851,
"logits/rejected": 0.6689407825469971,
"logps/chosen": -126.86598205566406,
"logps/ref_chosen": -66.59879302978516,
"logps/ref_rejected": -74.337158203125,
"logps/rejected": -165.29217529296875,
"loss": 1.1438,
"margin_dpo/margin_mean": 30.68783187866211,
"margin_dpo/margin_std": 53.468421936035156,
"step": 308
},
{
"epoch": 0.4671201814058957,
"fcm_dpo/beta": 0.01260319072753191,
"fcm_dpo/delta": -0.002130165696144104,
"fcm_dpo/margin": 31.87583351135254,
"fcm_dpo/q_t": 0.40878164768218994,
"grad_norm": 12.210992813110352,
"learning_rate": 3.229664715194511e-07,
"logits/chosen": 0.7181951999664307,
"logits/rejected": 0.6613823175430298,
"logps/chosen": -134.88624572753906,
"logps/ref_chosen": -65.39474487304688,
"logps/ref_rejected": -75.70930480957031,
"logps/rejected": -177.07664489746094,
"loss": 1.1086,
"margin_dpo/margin_mean": 31.875831604003906,
"margin_dpo/margin_std": 47.10810089111328,
"step": 309
},
{
"epoch": 0.46863189720332576,
"fcm_dpo/beta": 0.012920012697577477,
"fcm_dpo/delta": 0.15046632289886475,
"fcm_dpo/margin": 19.62242889404297,
"fcm_dpo/q_t": 0.44542592763900757,
"grad_norm": 14.362406730651855,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 0.6988915205001831,
"logits/rejected": 0.6873372793197632,
"logps/chosen": -144.29745483398438,
"logps/ref_chosen": -74.66827392578125,
"logps/ref_rejected": -80.5689697265625,
"logps/rejected": -169.82058715820312,
"loss": 1.2524,
"margin_dpo/margin_mean": 19.62242889404297,
"margin_dpo/margin_std": 51.30465316772461,
"step": 310
},
{
"epoch": 0.47014361300075586,
"fcm_dpo/beta": 0.012975428253412247,
"fcm_dpo/delta": 0.00755208358168602,
"fcm_dpo/margin": 30.218578338623047,
"fcm_dpo/q_t": 0.4122556149959564,
"grad_norm": 15.364665985107422,
"learning_rate": 3.204331392103574e-07,
"logits/chosen": 0.6145889163017273,
"logits/rejected": 0.4769431948661804,
"logps/chosen": -116.09998321533203,
"logps/ref_chosen": -59.738033294677734,
"logps/ref_rejected": -93.60757446289062,
"logps/rejected": -180.1881103515625,
"loss": 1.126,
"margin_dpo/margin_mean": 30.21858024597168,
"margin_dpo/margin_std": 48.436500549316406,
"step": 311
},
{
"epoch": 0.47165532879818595,
"fcm_dpo/beta": 0.012864358723163605,
"fcm_dpo/delta": -0.1185460314154625,
"fcm_dpo/margin": 39.848915100097656,
"fcm_dpo/q_t": 0.38267982006073,
"grad_norm": 13.564282417297363,
"learning_rate": 3.1916350007663176e-07,
"logits/chosen": 0.6986439228057861,
"logits/rejected": 0.6026472449302673,
"logps/chosen": -112.68070983886719,
"logps/ref_chosen": -53.816436767578125,
"logps/ref_rejected": -68.6575698852539,
"logps/rejected": -167.37075805664062,
"loss": 1.0189,
"margin_dpo/margin_mean": 39.848915100097656,
"margin_dpo/margin_std": 45.942588806152344,
"step": 312
},
{
"epoch": 0.47316704459561604,
"fcm_dpo/beta": 0.012997419573366642,
"fcm_dpo/delta": 0.1283135563135147,
"fcm_dpo/margin": 21.175430297851562,
"fcm_dpo/q_t": 0.44016578793525696,
"grad_norm": 12.463242530822754,
"learning_rate": 3.178919262911314e-07,
"logits/chosen": 0.7597650289535522,
"logits/rejected": 0.738747775554657,
"logps/chosen": -119.58285522460938,
"logps/ref_chosen": -59.957359313964844,
"logps/ref_rejected": -69.31729888916016,
"logps/rejected": -150.11822509765625,
"loss": 1.2264,
"margin_dpo/margin_mean": 21.175430297851562,
"margin_dpo/margin_std": 49.33220291137695,
"step": 313
},
{
"epoch": 0.47467876039304613,
"fcm_dpo/beta": 0.012764360755681992,
"fcm_dpo/delta": -0.14646798372268677,
"fcm_dpo/margin": 42.150630950927734,
"fcm_dpo/q_t": 0.38041582703590393,
"grad_norm": 12.823559761047363,
"learning_rate": 3.166184534225087e-07,
"logits/chosen": 0.6917102336883545,
"logits/rejected": 0.7238273024559021,
"logps/chosen": -127.27233123779297,
"logps/ref_chosen": -70.26815795898438,
"logps/ref_rejected": -69.23971557617188,
"logps/rejected": -168.39451599121094,
"loss": 1.02,
"margin_dpo/margin_mean": 42.15062713623047,
"margin_dpo/margin_std": 51.92638397216797,
"step": 314
},
{
"epoch": 0.47619047619047616,
"fcm_dpo/beta": 0.012742714956402779,
"fcm_dpo/delta": -0.01833246648311615,
"fcm_dpo/margin": 32.727691650390625,
"fcm_dpo/q_t": 0.4048681855201721,
"grad_norm": 12.803494453430176,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": 0.6166698932647705,
"logits/rejected": 0.5790517926216125,
"logps/chosen": -129.92495727539062,
"logps/ref_chosen": -67.79469299316406,
"logps/ref_rejected": -74.55148315429688,
"logps/rejected": -169.409423828125,
"loss": 1.1,
"margin_dpo/margin_mean": 32.72768783569336,
"margin_dpo/margin_std": 47.19148635864258,
"step": 315
},
{
"epoch": 0.47770219198790626,
"fcm_dpo/beta": 0.012548735365271568,
"fcm_dpo/delta": -0.12661194801330566,
"fcm_dpo/margin": 41.3825569152832,
"fcm_dpo/q_t": 0.38153761625289917,
"grad_norm": 13.01544189453125,
"learning_rate": 3.1406595297511564e-07,
"logits/chosen": 0.5967949628829956,
"logits/rejected": 0.4676669239997864,
"logps/chosen": -113.19938659667969,
"logps/ref_chosen": -55.288482666015625,
"logps/ref_rejected": -96.15723419189453,
"logps/rejected": -195.45069885253906,
"loss": 1.0151,
"margin_dpo/margin_mean": 41.38255310058594,
"margin_dpo/margin_std": 44.43248748779297,
"step": 316
},
{
"epoch": 0.47921390778533635,
"fcm_dpo/beta": 0.012046756222844124,
"fcm_dpo/delta": -0.13969969749450684,
"fcm_dpo/margin": 44.12655258178711,
"fcm_dpo/q_t": 0.3790159225463867,
"grad_norm": 17.531597137451172,
"learning_rate": 3.1278699679526975e-07,
"logits/chosen": 0.7275354862213135,
"logits/rejected": 0.6814507246017456,
"logps/chosen": -109.85079956054688,
"logps/ref_chosen": -54.58137512207031,
"logps/ref_rejected": -72.77232360839844,
"logps/rejected": -172.16830444335938,
"loss": 1.0104,
"margin_dpo/margin_mean": 44.126548767089844,
"margin_dpo/margin_std": 50.9593505859375,
"step": 317
},
{
"epoch": 0.48072562358276644,
"fcm_dpo/beta": 0.012052427977323532,
"fcm_dpo/delta": 0.03859718143939972,
"fcm_dpo/margin": 30.102909088134766,
"fcm_dpo/q_t": 0.4214034080505371,
"grad_norm": 13.89206600189209,
"learning_rate": 3.1150628432815336e-07,
"logits/chosen": 0.7386119365692139,
"logits/rejected": 0.6694012880325317,
"logps/chosen": -116.91323852539062,
"logps/ref_chosen": -52.88822937011719,
"logps/ref_rejected": -80.63988494873047,
"logps/rejected": -174.76779174804688,
"loss": 1.1896,
"margin_dpo/margin_mean": 30.102909088134766,
"margin_dpo/margin_std": 63.038963317871094,
"step": 318
},
{
"epoch": 0.48223733938019653,
"fcm_dpo/beta": 0.011894501745700836,
"fcm_dpo/delta": -0.11431370675563812,
"fcm_dpo/margin": 42.753143310546875,
"fcm_dpo/q_t": 0.38717061281204224,
"grad_norm": 14.289010047912598,
"learning_rate": 3.1022385139804707e-07,
"logits/chosen": 0.6626858115196228,
"logits/rejected": 0.647432804107666,
"logps/chosen": -124.62028503417969,
"logps/ref_chosen": -64.36333465576172,
"logps/ref_rejected": -79.47296142578125,
"logps/rejected": -182.48306274414062,
"loss": 1.0501,
"margin_dpo/margin_mean": 42.753143310546875,
"margin_dpo/margin_std": 58.25758361816406,
"step": 319
},
{
"epoch": 0.4837490551776266,
"fcm_dpo/beta": 0.011949008330702782,
"fcm_dpo/delta": -0.012487806379795074,
"fcm_dpo/margin": 34.2347526550293,
"fcm_dpo/q_t": 0.4102562665939331,
"grad_norm": 15.246618270874023,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.5578382015228271,
"logits/rejected": 0.5187278389930725,
"logps/chosen": -108.69810485839844,
"logps/ref_chosen": -49.558746337890625,
"logps/ref_rejected": -71.23444366455078,
"logps/rejected": -164.60855102539062,
"loss": 1.1317,
"margin_dpo/margin_mean": 34.2347526550293,
"margin_dpo/margin_std": 54.77796173095703,
"step": 320
},
{
"epoch": 0.4852607709750567,
"fcm_dpo/beta": 0.011591393500566483,
"fcm_dpo/delta": -0.059746138751506805,
"fcm_dpo/margin": 39.27128601074219,
"fcm_dpo/q_t": 0.3966837525367737,
"grad_norm": 19.282135009765625,
"learning_rate": 3.0765396768561004e-07,
"logits/chosen": 0.673438549041748,
"logits/rejected": 0.6589595675468445,
"logps/chosen": -115.71333312988281,
"logps/ref_chosen": -52.08526611328125,
"logps/ref_rejected": -55.58674621582031,
"logps/rejected": -158.486083984375,
"loss": 1.0935,
"margin_dpo/margin_mean": 39.27128219604492,
"margin_dpo/margin_std": 57.47880554199219,
"step": 321
},
{
"epoch": 0.48677248677248675,
"fcm_dpo/beta": 0.011424287222325802,
"fcm_dpo/delta": -0.14884579181671143,
"fcm_dpo/margin": 47.356597900390625,
"fcm_dpo/q_t": 0.3772934675216675,
"grad_norm": 12.958308219909668,
"learning_rate": 3.063665887884511e-07,
"logits/chosen": 0.7031540870666504,
"logits/rejected": 0.6229244470596313,
"logps/chosen": -121.94970703125,
"logps/ref_chosen": -47.404109954833984,
"logps/ref_rejected": -73.4260025024414,
"logps/rejected": -195.3282012939453,
"loss": 1.0104,
"margin_dpo/margin_mean": 47.356597900390625,
"margin_dpo/margin_std": 55.74169921875,
"step": 322
},
{
"epoch": 0.48828420256991684,
"fcm_dpo/beta": 0.011438079178333282,
"fcm_dpo/delta": 0.051193639636039734,
"fcm_dpo/margin": 30.61416244506836,
"fcm_dpo/q_t": 0.42455974221229553,
"grad_norm": 14.519761085510254,
"learning_rate": 3.0507763319663517e-07,
"logits/chosen": 0.6318315267562866,
"logits/rejected": 0.5518868565559387,
"logps/chosen": -146.78964233398438,
"logps/ref_chosen": -70.00630187988281,
"logps/ref_rejected": -86.96690368652344,
"logps/rejected": -194.36441040039062,
"loss": 1.2003,
"margin_dpo/margin_mean": 30.61416244506836,
"margin_dpo/margin_std": 66.47247314453125,
"step": 323
},
{
"epoch": 0.4897959183673469,
"fcm_dpo/beta": 0.011217910796403885,
"fcm_dpo/delta": -0.10228747129440308,
"fcm_dpo/margin": 44.276397705078125,
"fcm_dpo/q_t": 0.38728103041648865,
"grad_norm": 18.26462173461914,
"learning_rate": 3.0378713696502097e-07,
"logits/chosen": 0.7387030124664307,
"logits/rejected": 0.6826291680335999,
"logps/chosen": -121.74503326416016,
"logps/ref_chosen": -55.88882064819336,
"logps/ref_rejected": -75.23088073730469,
"logps/rejected": -185.36349487304688,
"loss": 1.0356,
"margin_dpo/margin_mean": 44.276397705078125,
"margin_dpo/margin_std": 54.363380432128906,
"step": 324
},
{
"epoch": 0.491307634164777,
"fcm_dpo/beta": 0.011081516742706299,
"fcm_dpo/delta": -0.03910160809755325,
"fcm_dpo/margin": 39.42148208618164,
"fcm_dpo/q_t": 0.4006548821926117,
"grad_norm": 16.26625633239746,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": 0.6771467924118042,
"logits/rejected": 0.61439049243927,
"logps/chosen": -147.13021850585938,
"logps/ref_chosen": -64.14701843261719,
"logps/ref_rejected": -79.91143798828125,
"logps/rejected": -202.31610107421875,
"loss": 1.1056,
"margin_dpo/margin_mean": 39.421478271484375,
"margin_dpo/margin_std": 61.576148986816406,
"step": 325
},
{
"epoch": 0.4928193499622071,
"fcm_dpo/beta": 0.011371839791536331,
"fcm_dpo/delta": 0.17452731728553772,
"fcm_dpo/margin": 20.207664489746094,
"fcm_dpo/q_t": 0.4494830369949341,
"grad_norm": 14.90293025970459,
"learning_rate": 3.012016670162977e-07,
"logits/chosen": 0.6311044096946716,
"logits/rejected": 0.6375623345375061,
"logps/chosen": -172.97830200195312,
"logps/ref_chosen": -75.53131103515625,
"logps/ref_rejected": -76.5898666381836,
"logps/rejected": -194.24452209472656,
"loss": 1.2857,
"margin_dpo/margin_mean": 20.207664489746094,
"margin_dpo/margin_std": 60.42800521850586,
"step": 326
},
{
"epoch": 0.4943310657596372,
"fcm_dpo/beta": 0.011601308360695839,
"fcm_dpo/delta": 0.0348714217543602,
"fcm_dpo/margin": 31.55461883544922,
"fcm_dpo/q_t": 0.4197550117969513,
"grad_norm": 17.706899642944336,
"learning_rate": 2.99906765620341e-07,
"logits/chosen": 0.6011782288551331,
"logits/rejected": 0.5678955316543579,
"logps/chosen": -162.18263244628906,
"logps/ref_chosen": -69.33717346191406,
"logps/ref_rejected": -73.37751770019531,
"logps/rejected": -197.77761840820312,
"loss": 1.1775,
"margin_dpo/margin_mean": 31.55462074279785,
"margin_dpo/margin_std": 62.52714157104492,
"step": 327
},
{
"epoch": 0.4958427815570673,
"fcm_dpo/beta": 0.011501701548695564,
"fcm_dpo/delta": -0.05493466556072235,
"fcm_dpo/margin": 39.34053421020508,
"fcm_dpo/q_t": 0.39944905042648315,
"grad_norm": 13.670370101928711,
"learning_rate": 2.9861046822486766e-07,
"logits/chosen": 0.6204825639724731,
"logits/rejected": 0.5904245376586914,
"logps/chosen": -141.41159057617188,
"logps/ref_chosen": -61.70623016357422,
"logps/ref_rejected": -83.73808288574219,
"logps/rejected": -202.78399658203125,
"loss": 1.0846,
"margin_dpo/margin_mean": 39.34053421020508,
"margin_dpo/margin_std": 57.092227935791016,
"step": 328
},
{
"epoch": 0.4973544973544973,
"fcm_dpo/beta": 0.011362850666046143,
"fcm_dpo/delta": -0.056787166744470596,
"fcm_dpo/margin": 39.974037170410156,
"fcm_dpo/q_t": 0.3996536433696747,
"grad_norm": 17.699430465698242,
"learning_rate": 2.9731281109010253e-07,
"logits/chosen": 0.7306606769561768,
"logits/rejected": 0.6753987073898315,
"logps/chosen": -154.018310546875,
"logps/ref_chosen": -64.4984130859375,
"logps/ref_rejected": -83.6591796875,
"logps/rejected": -213.15310668945312,
"loss": 1.0851,
"margin_dpo/margin_mean": 39.974037170410156,
"margin_dpo/margin_std": 58.887046813964844,
"step": 329
},
{
"epoch": 0.4988662131519274,
"fcm_dpo/beta": 0.011202252469956875,
"fcm_dpo/delta": -0.10586293041706085,
"fcm_dpo/margin": 44.688682556152344,
"fcm_dpo/q_t": 0.3895889222621918,
"grad_norm": 13.673934936523438,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 0.6697182059288025,
"logits/rejected": 0.6038362979888916,
"logps/chosen": -131.23902893066406,
"logps/ref_chosen": -54.80464172363281,
"logps/ref_rejected": -75.3194351196289,
"logps/rejected": -196.4425048828125,
"loss": 1.097,
"margin_dpo/margin_mean": 44.688682556152344,
"margin_dpo/margin_std": 71.45393371582031,
"step": 330
},
{
"epoch": 0.5003779289493575,
"fcm_dpo/beta": 0.010804209858179092,
"fcm_dpo/delta": -0.1956481635570526,
"fcm_dpo/margin": 54.106658935546875,
"fcm_dpo/q_t": 0.3688202500343323,
"grad_norm": 13.366751670837402,
"learning_rate": 2.947135628327544e-07,
"logits/chosen": 0.8018543720245361,
"logits/rejected": 0.7764079570770264,
"logps/chosen": -142.2760009765625,
"logps/ref_chosen": -59.242584228515625,
"logps/ref_rejected": -69.87483215332031,
"logps/rejected": -207.01490783691406,
"loss": 1.0069,
"margin_dpo/margin_mean": 54.106658935546875,
"margin_dpo/margin_std": 67.39854431152344,
"step": 331
},
{
"epoch": 0.5018896447467877,
"fcm_dpo/beta": 0.010633476078510284,
"fcm_dpo/delta": -0.07682856917381287,
"fcm_dpo/margin": 44.427947998046875,
"fcm_dpo/q_t": 0.39411279559135437,
"grad_norm": 14.242164611816406,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": 0.6497039198875427,
"logits/rejected": 0.6062981486320496,
"logps/chosen": -151.68014526367188,
"logps/ref_chosen": -67.10975646972656,
"logps/ref_rejected": -77.11839294433594,
"logps/rejected": -206.11672973632812,
"loss": 1.0636,
"margin_dpo/margin_mean": 44.427947998046875,
"margin_dpo/margin_std": 58.21818542480469,
"step": 332
},
{
"epoch": 0.5034013605442177,
"fcm_dpo/beta": 0.010376621037721634,
"fcm_dpo/delta": -0.09302500635385513,
"fcm_dpo/margin": 47.08435821533203,
"fcm_dpo/q_t": 0.38988494873046875,
"grad_norm": 13.075955390930176,
"learning_rate": 2.921093116725076e-07,
"logits/chosen": 0.6993060111999512,
"logits/rejected": 0.6273704171180725,
"logps/chosen": -148.47674560546875,
"logps/ref_chosen": -58.381134033203125,
"logps/ref_rejected": -85.02839660644531,
"logps/rejected": -222.20835876464844,
"loss": 1.0433,
"margin_dpo/margin_mean": 47.08435821533203,
"margin_dpo/margin_std": 59.184940338134766,
"step": 333
},
{
"epoch": 0.5049130763416477,
"fcm_dpo/beta": 0.010399187915027142,
"fcm_dpo/delta": 0.047791529446840286,
"fcm_dpo/margin": 34.018856048583984,
"fcm_dpo/q_t": 0.4216611981391907,
"grad_norm": 13.354299545288086,
"learning_rate": 2.9080540104031484e-07,
"logits/chosen": 0.6879534721374512,
"logits/rejected": 0.6454561948776245,
"logps/chosen": -152.24517822265625,
"logps/ref_chosen": -66.89199829101562,
"logps/ref_rejected": -91.83695220947266,
"logps/rejected": -211.208984375,
"loss": 1.1831,
"margin_dpo/margin_mean": 34.018856048583984,
"margin_dpo/margin_std": 68.88700103759766,
"step": 334
},
{
"epoch": 0.5064247921390779,
"fcm_dpo/beta": 0.010456325486302376,
"fcm_dpo/delta": -0.0019730515778064728,
"fcm_dpo/margin": 38.389739990234375,
"fcm_dpo/q_t": 0.4113653004169464,
"grad_norm": 18.998254776000977,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": 0.7184223532676697,
"logits/rejected": 0.6815344095230103,
"logps/chosen": -148.72567749023438,
"logps/ref_chosen": -61.51445770263672,
"logps/ref_rejected": -75.68916320800781,
"logps/rejected": -201.29013061523438,
"loss": 1.1365,
"margin_dpo/margin_mean": 38.389739990234375,
"margin_dpo/margin_std": 65.81454467773438,
"step": 335
},
{
"epoch": 0.5079365079365079,
"fcm_dpo/beta": 0.010317089036107063,
"fcm_dpo/delta": -0.008530773222446442,
"fcm_dpo/margin": 39.463714599609375,
"fcm_dpo/q_t": 0.4097328782081604,
"grad_norm": 12.17556095123291,
"learning_rate": 2.8819419203668675e-07,
"logits/chosen": 0.6476879119873047,
"logits/rejected": 0.6272980570793152,
"logps/chosen": -165.22909545898438,
"logps/ref_chosen": -68.85006713867188,
"logps/ref_rejected": -92.99603271484375,
"logps/rejected": -228.8387908935547,
"loss": 1.1198,
"margin_dpo/margin_mean": 39.463714599609375,
"margin_dpo/margin_std": 62.35620880126953,
"step": 336
},
{
"epoch": 0.509448223733938,
"fcm_dpo/beta": 0.01051211729645729,
"fcm_dpo/delta": 0.07588024437427521,
"fcm_dpo/margin": 31.061817169189453,
"fcm_dpo/q_t": 0.42595547437667847,
"grad_norm": 12.565362930297852,
"learning_rate": 2.8688696670638053e-07,
"logits/chosen": 0.5867069959640503,
"logits/rejected": 0.5546263456344604,
"logps/chosen": -172.4549560546875,
"logps/ref_chosen": -73.18783569335938,
"logps/ref_rejected": -86.89118957519531,
"logps/rejected": -217.22012329101562,
"loss": 1.1796,
"margin_dpo/margin_mean": 31.06181526184082,
"margin_dpo/margin_std": 60.04143142700195,
"step": 337
},
{
"epoch": 0.5109599395313681,
"fcm_dpo/beta": 0.010581170208752155,
"fcm_dpo/delta": 0.03785444423556328,
"fcm_dpo/margin": 34.35435104370117,
"fcm_dpo/q_t": 0.41939669847488403,
"grad_norm": 13.616081237792969,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": 0.663311243057251,
"logits/rejected": 0.6381123661994934,
"logps/chosen": -158.74578857421875,
"logps/ref_chosen": -63.939613342285156,
"logps/ref_rejected": -75.34243774414062,
"logps/rejected": -204.50296020507812,
"loss": 1.1652,
"margin_dpo/margin_mean": 34.35435104370117,
"margin_dpo/margin_std": 64.8512954711914,
"step": 338
},
{
"epoch": 0.5124716553287982,
"fcm_dpo/beta": 0.010615767911076546,
"fcm_dpo/delta": 0.007883191108703613,
"fcm_dpo/margin": 36.96049499511719,
"fcm_dpo/q_t": 0.4113204777240753,
"grad_norm": 12.145761489868164,
"learning_rate": 2.842694572172736e-07,
"logits/chosen": 0.8048558235168457,
"logits/rejected": 0.7194130420684814,
"logps/chosen": -124.07569122314453,
"logps/ref_chosen": -45.54913330078125,
"logps/ref_rejected": -67.0482177734375,
"logps/rejected": -182.53526306152344,
"loss": 1.1217,
"margin_dpo/margin_mean": 36.96049880981445,
"margin_dpo/margin_std": 57.94923400878906,
"step": 339
},
{
"epoch": 0.5139833711262283,
"fcm_dpo/beta": 0.010650699026882648,
"fcm_dpo/delta": -0.02553240954875946,
"fcm_dpo/margin": 39.822174072265625,
"fcm_dpo/q_t": 0.4078987240791321,
"grad_norm": 12.790955543518066,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 0.6951748728752136,
"logits/rejected": 0.6753948926925659,
"logps/chosen": -142.3338623046875,
"logps/ref_chosen": -54.00564956665039,
"logps/ref_rejected": -61.314430236816406,
"logps/rejected": -189.46482849121094,
"loss": 1.1379,
"margin_dpo/margin_mean": 39.822174072265625,
"margin_dpo/margin_std": 70.49546813964844,
"step": 340
},
{
"epoch": 0.5154950869236583,
"fcm_dpo/beta": 0.010352972894906998,
"fcm_dpo/delta": -0.06754864007234573,
"fcm_dpo/margin": 44.4967041015625,
"fcm_dpo/q_t": 0.39656323194503784,
"grad_norm": 13.45057201385498,
"learning_rate": 2.816481133934373e-07,
"logits/chosen": 0.6896207332611084,
"logits/rejected": 0.6430627107620239,
"logps/chosen": -149.3645477294922,
"logps/ref_chosen": -63.39509582519531,
"logps/ref_rejected": -76.20973205566406,
"logps/rejected": -206.67588806152344,
"loss": 1.0901,
"margin_dpo/margin_mean": 44.496700286865234,
"margin_dpo/margin_std": 63.01578140258789,
"step": 341
},
{
"epoch": 0.5170068027210885,
"fcm_dpo/beta": 0.010273242369294167,
"fcm_dpo/delta": -0.08363170921802521,
"fcm_dpo/margin": 46.62270736694336,
"fcm_dpo/q_t": 0.3937643766403198,
"grad_norm": 11.712442398071289,
"learning_rate": 2.8033609524527046e-07,
"logits/chosen": 0.7326318025588989,
"logits/rejected": 0.6941955089569092,
"logps/chosen": -137.80276489257812,
"logps/ref_chosen": -53.047813415527344,
"logps/ref_rejected": -68.2854232788086,
"logps/rejected": -199.6630859375,
"loss": 1.0632,
"margin_dpo/margin_mean": 46.62270736694336,
"margin_dpo/margin_std": 63.597843170166016,
"step": 342
},
{
"epoch": 0.5185185185185185,
"fcm_dpo/beta": 0.01034234743565321,
"fcm_dpo/delta": 0.10083356499671936,
"fcm_dpo/margin": 29.168373107910156,
"fcm_dpo/q_t": 0.4309525191783905,
"grad_norm": 12.435088157653809,
"learning_rate": 2.7902322853130753e-07,
"logits/chosen": 0.5851594805717468,
"logits/rejected": 0.5792618989944458,
"logps/chosen": -151.103515625,
"logps/ref_chosen": -70.57852935791016,
"logps/ref_rejected": -84.73873901367188,
"logps/rejected": -194.43209838867188,
"loss": 1.1982,
"margin_dpo/margin_mean": 29.168371200561523,
"margin_dpo/margin_std": 57.57359313964844,
"step": 343
},
{
"epoch": 0.5200302343159486,
"fcm_dpo/beta": 0.010332523845136166,
"fcm_dpo/delta": -0.07479459047317505,
"fcm_dpo/margin": 45.5706787109375,
"fcm_dpo/q_t": 0.3941543698310852,
"grad_norm": 14.585039138793945,
"learning_rate": 2.7770954997525274e-07,
"logits/chosen": 0.703796923160553,
"logits/rejected": 0.6407324075698853,
"logps/chosen": -143.69720458984375,
"logps/ref_chosen": -55.811004638671875,
"logps/ref_rejected": -84.77637481689453,
"logps/rejected": -218.23324584960938,
"loss": 1.0615,
"margin_dpo/margin_mean": 45.5706787109375,
"margin_dpo/margin_std": 60.98072052001953,
"step": 344
},
{
"epoch": 0.5215419501133787,
"fcm_dpo/beta": 0.010342312976717949,
"fcm_dpo/delta": -0.0065320320427417755,
"fcm_dpo/margin": 39.27054214477539,
"fcm_dpo/q_t": 0.40868985652923584,
"grad_norm": 13.813055992126465,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": 0.769192099571228,
"logits/rejected": 0.7235137224197388,
"logps/chosen": -126.54728698730469,
"logps/ref_chosen": -57.78609848022461,
"logps/ref_rejected": -78.91847229003906,
"logps/rejected": -186.9501953125,
"loss": 1.1169,
"margin_dpo/margin_mean": 39.27054214477539,
"margin_dpo/margin_std": 62.098785400390625,
"step": 345
},
{
"epoch": 0.5230536659108088,
"fcm_dpo/beta": 0.010258370079100132,
"fcm_dpo/delta": -0.06688511371612549,
"fcm_dpo/margin": 45.19707489013672,
"fcm_dpo/q_t": 0.3958582282066345,
"grad_norm": 14.238128662109375,
"learning_rate": 2.7507990434420123e-07,
"logits/chosen": 0.6935669183731079,
"logits/rejected": 0.6146125197410583,
"logps/chosen": -133.35507202148438,
"logps/ref_chosen": -56.285125732421875,
"logps/ref_rejected": -91.15303039550781,
"logps/rejected": -213.4200439453125,
"loss": 1.0827,
"margin_dpo/margin_mean": 45.197078704833984,
"margin_dpo/margin_std": 64.67538452148438,
"step": 346
},
{
"epoch": 0.5245653817082389,
"fcm_dpo/beta": 0.010258248075842857,
"fcm_dpo/delta": 0.05244377255439758,
"fcm_dpo/margin": 34.057273864746094,
"fcm_dpo/q_t": 0.4229571223258972,
"grad_norm": 16.91437339782715,
"learning_rate": 2.737640108260456e-07,
"logits/chosen": 0.8284379839897156,
"logits/rejected": 0.7780033349990845,
"logps/chosen": -137.3411865234375,
"logps/ref_chosen": -53.499542236328125,
"logps/ref_rejected": -72.52565002441406,
"logps/rejected": -190.42459106445312,
"loss": 1.1617,
"margin_dpo/margin_mean": 34.057273864746094,
"margin_dpo/margin_std": 62.23371887207031,
"step": 347
},
{
"epoch": 0.5260770975056689,
"fcm_dpo/beta": 0.010174311697483063,
"fcm_dpo/delta": -0.05163482576608658,
"fcm_dpo/margin": 44.132564544677734,
"fcm_dpo/q_t": 0.4025237560272217,
"grad_norm": 12.754920959472656,
"learning_rate": 2.724474525774229e-07,
"logits/chosen": 0.8104952573776245,
"logits/rejected": 0.781765341758728,
"logps/chosen": -125.65589141845703,
"logps/ref_chosen": -50.78684997558594,
"logps/ref_rejected": -68.63732147216797,
"logps/rejected": -187.638916015625,
"loss": 1.1016,
"margin_dpo/margin_mean": 44.132564544677734,
"margin_dpo/margin_std": 69.71031188964844,
"step": 348
},
{
"epoch": 0.527588813303099,
"fcm_dpo/beta": 0.01015070267021656,
"fcm_dpo/delta": -0.04512634873390198,
"fcm_dpo/margin": 43.6390266418457,
"fcm_dpo/q_t": 0.40171459317207336,
"grad_norm": 13.942571640014648,
"learning_rate": 2.711302664252973e-07,
"logits/chosen": 0.7045985460281372,
"logits/rejected": 0.6161255836486816,
"logps/chosen": -127.07453155517578,
"logps/ref_chosen": -53.325008392333984,
"logps/ref_rejected": -83.21236419677734,
"logps/rejected": -200.6009063720703,
"loss": 1.0896,
"margin_dpo/margin_mean": 43.6390266418457,
"margin_dpo/margin_std": 63.60553741455078,
"step": 349
},
{
"epoch": 0.5291005291005291,
"fcm_dpo/beta": 0.009878698736429214,
"fcm_dpo/delta": -0.1288827359676361,
"fcm_dpo/margin": 52.83354187011719,
"fcm_dpo/q_t": 0.3816789984703064,
"grad_norm": 15.5607328414917,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 0.7101126909255981,
"logits/rejected": 0.6280903816223145,
"logps/chosen": -141.38832092285156,
"logps/ref_chosen": -61.625770568847656,
"logps/ref_rejected": -87.63627624511719,
"logps/rejected": -220.23236083984375,
"loss": 1.0247,
"margin_dpo/margin_mean": 52.83354187011719,
"margin_dpo/margin_std": 64.74072265625,
"step": 350
},
{
"epoch": 0.5306122448979592,
"fcm_dpo/beta": 0.009750676341354847,
"fcm_dpo/delta": -0.030142836272716522,
"fcm_dpo/margin": 43.94773864746094,
"fcm_dpo/q_t": 0.4014700651168823,
"grad_norm": 13.296806335449219,
"learning_rate": 2.6849415780518357e-07,
"logits/chosen": 0.6415879726409912,
"logits/rejected": 0.5668590068817139,
"logps/chosen": -131.79360961914062,
"logps/ref_chosen": -56.2563362121582,
"logps/ref_rejected": -79.11589813232422,
"logps/rejected": -198.6009063720703,
"loss": 1.1164,
"margin_dpo/margin_mean": 43.94773864746094,
"margin_dpo/margin_std": 70.56474304199219,
"step": 351
},
{
"epoch": 0.5321239606953893,
"fcm_dpo/beta": 0.009682442992925644,
"fcm_dpo/delta": -0.046238820999860764,
"fcm_dpo/margin": 45.857688903808594,
"fcm_dpo/q_t": 0.3999043107032776,
"grad_norm": 12.848146438598633,
"learning_rate": 2.6717530907482024e-07,
"logits/chosen": 0.7140268087387085,
"logits/rejected": 0.661480188369751,
"logps/chosen": -137.10739135742188,
"logps/ref_chosen": -63.05195236206055,
"logps/ref_rejected": -85.52035522460938,
"logps/rejected": -205.4334716796875,
"loss": 1.0801,
"margin_dpo/margin_mean": 45.857688903808594,
"margin_dpo/margin_std": 64.35737609863281,
"step": 352
},
{
"epoch": 0.5336356764928194,
"fcm_dpo/beta": 0.009640311822295189,
"fcm_dpo/delta": -0.04901779443025589,
"fcm_dpo/margin": 46.344337463378906,
"fcm_dpo/q_t": 0.39885491132736206,
"grad_norm": 11.270014762878418,
"learning_rate": 2.658559799141411e-07,
"logits/chosen": 0.7019220590591431,
"logits/rejected": 0.7082865238189697,
"logps/chosen": -140.48910522460938,
"logps/ref_chosen": -69.00918579101562,
"logps/ref_rejected": -72.65840148925781,
"logps/rejected": -190.482666015625,
"loss": 1.0834,
"margin_dpo/margin_mean": 46.344337463378906,
"margin_dpo/margin_std": 65.52245330810547,
"step": 353
},
{
"epoch": 0.5351473922902494,
"fcm_dpo/beta": 0.009430557489395142,
"fcm_dpo/delta": -0.07606241106987,
"fcm_dpo/margin": 50.012725830078125,
"fcm_dpo/q_t": 0.39343374967575073,
"grad_norm": 13.003927230834961,
"learning_rate": 2.6453620722761895e-07,
"logits/chosen": 0.7931294441223145,
"logits/rejected": 0.6584126949310303,
"logps/chosen": -115.51165008544922,
"logps/ref_chosen": -39.78833770751953,
"logps/ref_rejected": -69.56885528564453,
"logps/rejected": -195.30490112304688,
"loss": 1.0744,
"margin_dpo/margin_mean": 50.012725830078125,
"margin_dpo/margin_std": 70.41667938232422,
"step": 354
},
{
"epoch": 0.5366591080876795,
"fcm_dpo/beta": 0.009351427666842937,
"fcm_dpo/delta": -0.08544706553220749,
"fcm_dpo/margin": 51.47795867919922,
"fcm_dpo/q_t": 0.39146727323532104,
"grad_norm": 15.754137992858887,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": 0.7644649744033813,
"logits/rejected": 0.629076361656189,
"logps/chosen": -126.27880859375,
"logps/ref_chosen": -46.25537872314453,
"logps/ref_rejected": -78.20236206054688,
"logps/rejected": -209.70376586914062,
"loss": 1.0715,
"margin_dpo/margin_mean": 51.477962493896484,
"margin_dpo/margin_std": 72.61712646484375,
"step": 355
},
{
"epoch": 0.5381708238851096,
"fcm_dpo/beta": 0.009251654148101807,
"fcm_dpo/delta": 0.009645845741033554,
"fcm_dpo/margin": 42.194644927978516,
"fcm_dpo/q_t": 0.4131093919277191,
"grad_norm": 12.248784065246582,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": 0.7211343050003052,
"logits/rejected": 0.6421887874603271,
"logps/chosen": -122.72053527832031,
"logps/ref_chosen": -47.906158447265625,
"logps/ref_rejected": -74.29397583007812,
"logps/rejected": -191.30299377441406,
"loss": 1.1683,
"margin_dpo/margin_mean": 42.19464111328125,
"margin_dpo/margin_std": 81.54269409179688,
"step": 356
},
{
"epoch": 0.5396825396825397,
"fcm_dpo/beta": 0.009272318333387375,
"fcm_dpo/delta": 0.04974482208490372,
"fcm_dpo/margin": 37.79161834716797,
"fcm_dpo/q_t": 0.4197618365287781,
"grad_norm": 12.6592435836792,
"learning_rate": 2.6057459723762076e-07,
"logits/chosen": 0.7132511138916016,
"logits/rejected": 0.6882957220077515,
"logps/chosen": -152.98204040527344,
"logps/ref_chosen": -62.63500213623047,
"logps/ref_rejected": -65.11399841308594,
"logps/rejected": -193.25267028808594,
"loss": 1.1598,
"margin_dpo/margin_mean": 37.79161834716797,
"margin_dpo/margin_std": 63.681026458740234,
"step": 357
},
{
"epoch": 0.5411942554799698,
"fcm_dpo/beta": 0.009308705106377602,
"fcm_dpo/delta": -0.0927896648645401,
"fcm_dpo/margin": 52.447574615478516,
"fcm_dpo/q_t": 0.39033764600753784,
"grad_norm": 15.408126831054688,
"learning_rate": 2.5925341972508954e-07,
"logits/chosen": 0.6472632884979248,
"logits/rejected": 0.663436770439148,
"logps/chosen": -151.31112670898438,
"logps/ref_chosen": -67.20960998535156,
"logps/ref_rejected": -69.34715270996094,
"logps/rejected": -205.896240234375,
"loss": 1.0529,
"margin_dpo/margin_mean": 52.44757843017578,
"margin_dpo/margin_std": 68.63077545166016,
"step": 358
},
{
"epoch": 0.5427059712773998,
"fcm_dpo/beta": 0.00950109213590622,
"fcm_dpo/delta": 0.15490363538265228,
"fcm_dpo/margin": 25.983016967773438,
"fcm_dpo/q_t": 0.44459402561187744,
"grad_norm": 13.879263877868652,
"learning_rate": 2.579319833745169e-07,
"logits/chosen": 0.6614922285079956,
"logits/rejected": 0.6340516805648804,
"logps/chosen": -157.9315185546875,
"logps/ref_chosen": -62.52578353881836,
"logps/ref_rejected": -76.63114929199219,
"logps/rejected": -198.0198974609375,
"loss": 1.2399,
"margin_dpo/margin_mean": 25.983016967773438,
"margin_dpo/margin_std": 61.045677185058594,
"step": 359
},
{
"epoch": 0.54421768707483,
"fcm_dpo/beta": 0.009465381503105164,
"fcm_dpo/delta": -0.017956897616386414,
"fcm_dpo/margin": 44.06662368774414,
"fcm_dpo/q_t": 0.40685826539993286,
"grad_norm": 12.662845611572266,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 0.636221170425415,
"logits/rejected": 0.5421815514564514,
"logps/chosen": -157.51663208007812,
"logps/ref_chosen": -63.48772048950195,
"logps/ref_rejected": -90.6891098022461,
"logps/rejected": -228.78463745117188,
"loss": 1.1035,
"margin_dpo/margin_mean": 44.06662368774414,
"margin_dpo/margin_std": 66.23414611816406,
"step": 360
},
{
"epoch": 0.54572940287226,
"fcm_dpo/beta": 0.009376795962452888,
"fcm_dpo/delta": -0.09013990312814713,
"fcm_dpo/margin": 51.81315994262695,
"fcm_dpo/q_t": 0.3891136050224304,
"grad_norm": 12.409852027893066,
"learning_rate": 2.552884820191154e-07,
"logits/chosen": 0.7601670026779175,
"logits/rejected": 0.7106729745864868,
"logps/chosen": -150.19419860839844,
"logps/ref_chosen": -57.917144775390625,
"logps/ref_rejected": -72.39089965820312,
"logps/rejected": -216.48110961914062,
"loss": 1.0435,
"margin_dpo/margin_mean": 51.81315994262695,
"margin_dpo/margin_std": 64.16586303710938,
"step": 361
},
{
"epoch": 0.54724111866969,
"fcm_dpo/beta": 0.00923209823668003,
"fcm_dpo/delta": -0.08657985925674438,
"fcm_dpo/margin": 52.24518585205078,
"fcm_dpo/q_t": 0.3927406668663025,
"grad_norm": 14.153077125549316,
"learning_rate": 2.53966490958702e-07,
"logits/chosen": 0.7800583839416504,
"logits/rejected": 0.6689051389694214,
"logps/chosen": -157.0105438232422,
"logps/ref_chosen": -63.4434700012207,
"logps/ref_rejected": -103.45516967773438,
"logps/rejected": -249.26744079589844,
"loss": 1.0766,
"margin_dpo/margin_mean": 52.24518585205078,
"margin_dpo/margin_std": 76.14582061767578,
"step": 362
},
{
"epoch": 0.5487528344671202,
"fcm_dpo/beta": 0.00903936568647623,
"fcm_dpo/delta": -0.06485149264335632,
"fcm_dpo/margin": 51.07155990600586,
"fcm_dpo/q_t": 0.3948080539703369,
"grad_norm": 15.533196449279785,
"learning_rate": 2.526443889470099e-07,
"logits/chosen": 0.7673148512840271,
"logits/rejected": 0.6327505707740784,
"logps/chosen": -147.92532348632812,
"logps/ref_chosen": -48.65182876586914,
"logps/ref_rejected": -88.65904235839844,
"logps/rejected": -239.00411987304688,
"loss": 1.0656,
"margin_dpo/margin_mean": 51.071563720703125,
"margin_dpo/margin_std": 68.49053192138672,
"step": 363
},
{
"epoch": 0.5502645502645502,
"fcm_dpo/beta": 0.008878370746970177,
"fcm_dpo/delta": -0.1015341728925705,
"fcm_dpo/margin": 55.899478912353516,
"fcm_dpo/q_t": 0.3897857964038849,
"grad_norm": 11.952712059020996,
"learning_rate": 2.513222129660744e-07,
"logits/chosen": 0.6561600565910339,
"logits/rejected": 0.5624690055847168,
"logps/chosen": -147.87716674804688,
"logps/ref_chosen": -57.87107467651367,
"logps/ref_rejected": -80.95503234863281,
"logps/rejected": -226.860595703125,
"loss": 1.0774,
"margin_dpo/margin_mean": 55.899478912353516,
"margin_dpo/margin_std": 83.72024536132812,
"step": 364
},
{
"epoch": 0.5517762660619804,
"fcm_dpo/beta": 0.008753440342843533,
"fcm_dpo/delta": -0.06477545946836472,
"fcm_dpo/margin": 52.74662780761719,
"fcm_dpo/q_t": 0.39343178272247314,
"grad_norm": 11.89730453491211,
"learning_rate": 2.5e-07,
"logits/chosen": 0.7345987558364868,
"logits/rejected": 0.7310192584991455,
"logps/chosen": -145.48480224609375,
"logps/ref_chosen": -64.94217681884766,
"logps/ref_rejected": -74.8599853515625,
"logps/rejected": -208.14923095703125,
"loss": 1.0438,
"margin_dpo/margin_mean": 52.74662780761719,
"margin_dpo/margin_std": 60.88469314575195,
"step": 365
},
{
"epoch": 0.5532879818594104,
"fcm_dpo/beta": 0.008768351748585701,
"fcm_dpo/delta": 0.0030683092772960663,
"fcm_dpo/margin": 45.25122833251953,
"fcm_dpo/q_t": 0.41162019968032837,
"grad_norm": 13.978940963745117,
"learning_rate": 2.486777870339255e-07,
"logits/chosen": 0.6601021885871887,
"logits/rejected": 0.6458828449249268,
"logps/chosen": -136.3466796875,
"logps/ref_chosen": -55.16598129272461,
"logps/ref_rejected": -65.26121520996094,
"logps/rejected": -191.69314575195312,
"loss": 1.1443,
"margin_dpo/margin_mean": 45.25122833251953,
"margin_dpo/margin_std": 79.96685791015625,
"step": 366
},
{
"epoch": 0.5547996976568406,
"fcm_dpo/beta": 0.008722890168428421,
"fcm_dpo/delta": 0.005503779277205467,
"fcm_dpo/margin": 45.237735748291016,
"fcm_dpo/q_t": 0.40820807218551636,
"grad_norm": 12.16567325592041,
"learning_rate": 2.4735561105299014e-07,
"logits/chosen": 0.7035742998123169,
"logits/rejected": 0.5987119078636169,
"logps/chosen": -147.3154754638672,
"logps/ref_chosen": -56.01046371459961,
"logps/ref_rejected": -77.31010437011719,
"logps/rejected": -213.8528594970703,
"loss": 1.118,
"margin_dpo/margin_mean": 45.23773956298828,
"margin_dpo/margin_std": 69.86012268066406,
"step": 367
},
{
"epoch": 0.5563114134542706,
"fcm_dpo/beta": 0.008784948848187923,
"fcm_dpo/delta": 0.027075402438640594,
"fcm_dpo/margin": 42.564666748046875,
"fcm_dpo/q_t": 0.4141819477081299,
"grad_norm": 12.818552017211914,
"learning_rate": 2.46033509041298e-07,
"logits/chosen": 0.5555020570755005,
"logits/rejected": 0.5551966428756714,
"logps/chosen": -175.42108154296875,
"logps/ref_chosen": -74.82927703857422,
"logps/ref_rejected": -76.11680603027344,
"logps/rejected": -219.27328491210938,
"loss": 1.1343,
"margin_dpo/margin_mean": 42.564666748046875,
"margin_dpo/margin_std": 69.22799682617188,
"step": 368
},
{
"epoch": 0.5578231292517006,
"fcm_dpo/beta": 0.008928908035159111,
"fcm_dpo/delta": 0.07122094929218292,
"fcm_dpo/margin": 37.005409240722656,
"fcm_dpo/q_t": 0.42534273862838745,
"grad_norm": 12.85647964477539,
"learning_rate": 2.447115179808846e-07,
"logits/chosen": 0.6955777406692505,
"logits/rejected": 0.6436460614204407,
"logps/chosen": -153.3529052734375,
"logps/ref_chosen": -58.32621765136719,
"logps/ref_rejected": -80.92183685302734,
"logps/rejected": -212.9539337158203,
"loss": 1.1719,
"margin_dpo/margin_mean": 37.005409240722656,
"margin_dpo/margin_std": 68.3039779663086,
"step": 369
},
{
"epoch": 0.5593348450491308,
"fcm_dpo/beta": 0.00882653333246708,
"fcm_dpo/delta": -0.06157629191875458,
"fcm_dpo/margin": 51.95973205566406,
"fcm_dpo/q_t": 0.39605778455734253,
"grad_norm": 13.36386775970459,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 0.7886132597923279,
"logits/rejected": 0.7210862636566162,
"logps/chosen": -143.81800842285156,
"logps/ref_chosen": -52.88372039794922,
"logps/ref_rejected": -79.43692016601562,
"logps/rejected": -222.3309326171875,
"loss": 1.0883,
"margin_dpo/margin_mean": 51.95973205566406,
"margin_dpo/margin_std": 77.87043762207031,
"step": 370
},
{
"epoch": 0.5608465608465608,
"fcm_dpo/beta": 0.00884594488888979,
"fcm_dpo/delta": -0.012843847274780273,
"fcm_dpo/margin": 46.544769287109375,
"fcm_dpo/q_t": 0.40730273723602295,
"grad_norm": 14.952667236328125,
"learning_rate": 2.420680166254831e-07,
"logits/chosen": 0.7922030687332153,
"logits/rejected": 0.7605820894241333,
"logps/chosen": -139.9717254638672,
"logps/ref_chosen": -49.224212646484375,
"logps/ref_rejected": -63.348472595214844,
"logps/rejected": -200.6407470703125,
"loss": 1.1097,
"margin_dpo/margin_mean": 46.54476547241211,
"margin_dpo/margin_std": 70.16578674316406,
"step": 371
},
{
"epoch": 0.562358276643991,
"fcm_dpo/beta": 0.009037522599101067,
"fcm_dpo/delta": 0.14259661734104156,
"fcm_dpo/margin": 28.694950103759766,
"fcm_dpo/q_t": 0.44280239939689636,
"grad_norm": 16.078189849853516,
"learning_rate": 2.4074658027491044e-07,
"logits/chosen": 0.7318013310432434,
"logits/rejected": 0.6354872584342957,
"logps/chosen": -149.69754028320312,
"logps/ref_chosen": -52.269554138183594,
"logps/ref_rejected": -72.99522399902344,
"logps/rejected": -199.1181640625,
"loss": 1.2793,
"margin_dpo/margin_mean": 28.69495391845703,
"margin_dpo/margin_std": 82.97291564941406,
"step": 372
},
{
"epoch": 0.563869992441421,
"fcm_dpo/beta": 0.009121359325945377,
"fcm_dpo/delta": 0.048252545297145844,
"fcm_dpo/margin": 38.738624572753906,
"fcm_dpo/q_t": 0.4220026731491089,
"grad_norm": 14.351899147033691,
"learning_rate": 2.394254027623792e-07,
"logits/chosen": 0.7239790558815002,
"logits/rejected": 0.653123140335083,
"logps/chosen": -167.8028564453125,
"logps/ref_chosen": -61.112998962402344,
"logps/ref_rejected": -76.24851989746094,
"logps/rejected": -221.677001953125,
"loss": 1.2036,
"margin_dpo/margin_mean": 38.738624572753906,
"margin_dpo/margin_std": 84.81132507324219,
"step": 373
},
{
"epoch": 0.5653817082388511,
"fcm_dpo/beta": 0.008857542648911476,
"fcm_dpo/delta": -0.1921839565038681,
"fcm_dpo/margin": 65.550048828125,
"fcm_dpo/q_t": 0.3672952651977539,
"grad_norm": 14.499312400817871,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": 0.6369169354438782,
"logits/rejected": 0.6387360095977783,
"logps/chosen": -161.60235595703125,
"logps/ref_chosen": -72.66920471191406,
"logps/ref_rejected": -76.83158874511719,
"logps/rejected": -231.31480407714844,
"loss": 0.9808,
"margin_dpo/margin_mean": 65.550048828125,
"margin_dpo/margin_std": 72.03311920166016,
"step": 374
},
{
"epoch": 0.5668934240362812,
"fcm_dpo/beta": 0.00878390483558178,
"fcm_dpo/delta": 0.018049051985144615,
"fcm_dpo/margin": 43.547298431396484,
"fcm_dpo/q_t": 0.4139014482498169,
"grad_norm": 14.884501457214355,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": 0.748401403427124,
"logits/rejected": 0.6893227696418762,
"logps/chosen": -145.9182891845703,
"logps/ref_chosen": -57.68330383300781,
"logps/ref_rejected": -79.34097290039062,
"logps/rejected": -211.12326049804688,
"loss": 1.1448,
"margin_dpo/margin_mean": 43.54730224609375,
"margin_dpo/margin_std": 75.84925842285156,
"step": 375
},
{
"epoch": 0.5684051398337112,
"fcm_dpo/beta": 0.008706707507371902,
"fcm_dpo/delta": -0.07801564037799835,
"fcm_dpo/margin": 54.46429443359375,
"fcm_dpo/q_t": 0.39430510997772217,
"grad_norm": 14.08863639831543,
"learning_rate": 2.3546379277238103e-07,
"logits/chosen": 0.7859851717948914,
"logits/rejected": 0.7144784927368164,
"logps/chosen": -147.56790161132812,
"logps/ref_chosen": -51.674072265625,
"logps/ref_rejected": -75.69713592529297,
"logps/rejected": -226.0552520751953,
"loss": 1.0795,
"margin_dpo/margin_mean": 54.46429443359375,
"margin_dpo/margin_std": 80.19218444824219,
"step": 376
},
{
"epoch": 0.5699168556311414,
"fcm_dpo/beta": 0.008786122314631939,
"fcm_dpo/delta": 0.049597274512052536,
"fcm_dpo/margin": 40.009239196777344,
"fcm_dpo/q_t": 0.420106440782547,
"grad_norm": 14.204717636108398,
"learning_rate": 2.3414402008585886e-07,
"logits/chosen": 0.7056032419204712,
"logits/rejected": 0.6848410367965698,
"logps/chosen": -146.47552490234375,
"logps/ref_chosen": -46.17853546142578,
"logps/ref_rejected": -57.756500244140625,
"logps/rejected": -198.062744140625,
"loss": 1.1646,
"margin_dpo/margin_mean": 40.009239196777344,
"margin_dpo/margin_std": 73.26371002197266,
"step": 377
},
{
"epoch": 0.5714285714285714,
"fcm_dpo/beta": 0.008892524987459183,
"fcm_dpo/delta": 0.06320677697658539,
"fcm_dpo/margin": 38.03527069091797,
"fcm_dpo/q_t": 0.423417866230011,
"grad_norm": 13.075918197631836,
"learning_rate": 2.3282469092517977e-07,
"logits/chosen": 0.7477153539657593,
"logits/rejected": 0.6995026469230652,
"logps/chosen": -155.286376953125,
"logps/ref_chosen": -59.21887969970703,
"logps/ref_rejected": -71.24818420410156,
"logps/rejected": -205.3509521484375,
"loss": 1.1712,
"margin_dpo/margin_mean": 38.03527069091797,
"margin_dpo/margin_std": 70.07217407226562,
"step": 378
},
{
"epoch": 0.5729402872260015,
"fcm_dpo/beta": 0.008829142898321152,
"fcm_dpo/delta": -0.03952915593981743,
"fcm_dpo/margin": 49.586158752441406,
"fcm_dpo/q_t": 0.402193546295166,
"grad_norm": 14.709274291992188,
"learning_rate": 2.3150584219481643e-07,
"logits/chosen": 0.740286111831665,
"logits/rejected": 0.6656442880630493,
"logps/chosen": -167.2689666748047,
"logps/ref_chosen": -76.31658935546875,
"logps/ref_rejected": -104.26200103759766,
"logps/rejected": -244.800537109375,
"loss": 1.0925,
"margin_dpo/margin_mean": 49.586158752441406,
"margin_dpo/margin_std": 73.70040893554688,
"step": 379
},
{
"epoch": 0.5744520030234316,
"fcm_dpo/beta": 0.008623160421848297,
"fcm_dpo/delta": -0.1510622799396515,
"fcm_dpo/margin": 62.967567443847656,
"fcm_dpo/q_t": 0.3760165572166443,
"grad_norm": 13.50650691986084,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 0.7178818583488464,
"logits/rejected": 0.6813254952430725,
"logps/chosen": -142.29251098632812,
"logps/ref_chosen": -61.283164978027344,
"logps/ref_rejected": -72.38892364501953,
"logps/rejected": -216.3658447265625,
"loss": 1.0215,
"margin_dpo/margin_mean": 62.967567443847656,
"margin_dpo/margin_std": 78.15447998046875,
"step": 380
},
{
"epoch": 0.5759637188208617,
"fcm_dpo/beta": 0.008834085427224636,
"fcm_dpo/delta": 0.18309441208839417,
"fcm_dpo/margin": 24.82586669921875,
"fcm_dpo/q_t": 0.4514288604259491,
"grad_norm": 12.987579345703125,
"learning_rate": 2.288697335747027e-07,
"logits/chosen": 0.6932476162910461,
"logits/rejected": 0.6694196462631226,
"logps/chosen": -157.75567626953125,
"logps/ref_chosen": -58.2139892578125,
"logps/ref_rejected": -60.78669357299805,
"logps/rejected": -185.15423583984375,
"loss": 1.2867,
"margin_dpo/margin_mean": 24.82586669921875,
"margin_dpo/margin_std": 75.64501953125,
"step": 381
},
{
"epoch": 0.5774754346182918,
"fcm_dpo/beta": 0.008934552781283855,
"fcm_dpo/delta": 0.020180724561214447,
"fcm_dpo/margin": 42.540550231933594,
"fcm_dpo/q_t": 0.4133971333503723,
"grad_norm": 13.391695976257324,
"learning_rate": 2.2755254742257706e-07,
"logits/chosen": 0.7400511503219604,
"logits/rejected": 0.6866692304611206,
"logps/chosen": -162.21678161621094,
"logps/ref_chosen": -61.82532501220703,
"logps/ref_rejected": -83.0452880859375,
"logps/rejected": -225.977294921875,
"loss": 1.1196,
"margin_dpo/margin_mean": 42.540550231933594,
"margin_dpo/margin_std": 63.50366973876953,
"step": 382
},
{
"epoch": 0.5789871504157218,
"fcm_dpo/beta": 0.008840801194310188,
"fcm_dpo/delta": -0.014270953834056854,
"fcm_dpo/margin": 46.73270797729492,
"fcm_dpo/q_t": 0.40885794162750244,
"grad_norm": 14.537291526794434,
"learning_rate": 2.2623598917395436e-07,
"logits/chosen": 0.6014754176139832,
"logits/rejected": 0.6322965621948242,
"logps/chosen": -177.6047821044922,
"logps/ref_chosen": -80.56326293945312,
"logps/ref_rejected": -74.62922668457031,
"logps/rejected": -218.40345764160156,
"loss": 1.1355,
"margin_dpo/margin_mean": 46.73271179199219,
"margin_dpo/margin_std": 80.7920150756836,
"step": 383
},
{
"epoch": 0.5804988662131519,
"fcm_dpo/beta": 0.008877087384462357,
"fcm_dpo/delta": 0.0022729591000825167,
"fcm_dpo/margin": 44.81377029418945,
"fcm_dpo/q_t": 0.40858256816864014,
"grad_norm": 14.741730690002441,
"learning_rate": 2.2492009565579875e-07,
"logits/chosen": 0.7499580979347229,
"logits/rejected": 0.7032359838485718,
"logps/chosen": -162.7613525390625,
"logps/ref_chosen": -65.47514343261719,
"logps/ref_rejected": -79.67378234863281,
"logps/rejected": -221.77377319335938,
"loss": 1.116,
"margin_dpo/margin_mean": 44.81377029418945,
"margin_dpo/margin_std": 69.56358337402344,
"step": 384
},
{
"epoch": 0.582010582010582,
"fcm_dpo/beta": 0.008751116693019867,
"fcm_dpo/delta": -0.12058362364768982,
"fcm_dpo/margin": 58.79779052734375,
"fcm_dpo/q_t": 0.3835332989692688,
"grad_norm": 12.566021919250488,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": 0.6230024695396423,
"logits/rejected": 0.5862922072410583,
"logps/chosen": -160.81768798828125,
"logps/ref_chosen": -66.0565185546875,
"logps/ref_rejected": -86.68023681640625,
"logps/rejected": -240.23919677734375,
"loss": 1.0237,
"margin_dpo/margin_mean": 58.797786712646484,
"margin_dpo/margin_std": 69.73304748535156,
"step": 385
},
{
"epoch": 0.5835222978080121,
"fcm_dpo/beta": 0.008755723014473915,
"fcm_dpo/delta": 0.07520150393247604,
"fcm_dpo/margin": 37.383819580078125,
"fcm_dpo/q_t": 0.42415207624435425,
"grad_norm": 13.969977378845215,
"learning_rate": 2.2229045002474724e-07,
"logits/chosen": 0.637274980545044,
"logits/rejected": 0.5775246024131775,
"logps/chosen": -184.84173583984375,
"logps/ref_chosen": -75.6236572265625,
"logps/ref_rejected": -92.62330627441406,
"logps/rejected": -239.22520446777344,
"loss": 1.1701,
"margin_dpo/margin_mean": 37.383819580078125,
"margin_dpo/margin_std": 68.13032531738281,
"step": 386
},
{
"epoch": 0.5850340136054422,
"fcm_dpo/beta": 0.00868870597332716,
"fcm_dpo/delta": -0.08571085333824158,
"fcm_dpo/margin": 55.425376892089844,
"fcm_dpo/q_t": 0.3905186653137207,
"grad_norm": 12.999944686889648,
"learning_rate": 2.209767714686924e-07,
"logits/chosen": 0.7348219156265259,
"logits/rejected": 0.628671407699585,
"logps/chosen": -144.72238159179688,
"logps/ref_chosen": -47.22170639038086,
"logps/ref_rejected": -87.338134765625,
"logps/rejected": -240.26419067382812,
"loss": 1.0382,
"margin_dpo/margin_mean": 55.425376892089844,
"margin_dpo/margin_std": 66.4821548461914,
"step": 387
},
{
"epoch": 0.5865457294028723,
"fcm_dpo/beta": 0.008731149137020111,
"fcm_dpo/delta": 0.09577102214097977,
"fcm_dpo/margin": 35.175655364990234,
"fcm_dpo/q_t": 0.43218091130256653,
"grad_norm": 12.877540588378906,
"learning_rate": 2.1966390475472954e-07,
"logits/chosen": 0.7196862697601318,
"logits/rejected": 0.711571455001831,
"logps/chosen": -172.91677856445312,
"logps/ref_chosen": -74.5794677734375,
"logps/ref_rejected": -79.92558288574219,
"logps/rejected": -213.4385528564453,
"loss": 1.2121,
"margin_dpo/margin_mean": 35.175655364990234,
"margin_dpo/margin_std": 77.24336242675781,
"step": 388
},
{
"epoch": 0.5880574452003023,
"fcm_dpo/beta": 0.008700037375092506,
"fcm_dpo/delta": -0.09904014319181442,
"fcm_dpo/margin": 56.8076057434082,
"fcm_dpo/q_t": 0.38827866315841675,
"grad_norm": 28.18358612060547,
"learning_rate": 2.1835188660656265e-07,
"logits/chosen": 0.7416616678237915,
"logits/rejected": 0.7048197984695435,
"logps/chosen": -156.3480682373047,
"logps/ref_chosen": -61.624366760253906,
"logps/ref_rejected": -76.50978088378906,
"logps/rejected": -228.0410919189453,
"loss": 1.0495,
"margin_dpo/margin_mean": 56.80760955810547,
"margin_dpo/margin_std": 74.74456787109375,
"step": 389
},
{
"epoch": 0.5895691609977324,
"fcm_dpo/beta": 0.008662872016429901,
"fcm_dpo/delta": 0.023952744901180267,
"fcm_dpo/margin": 43.51176071166992,
"fcm_dpo/q_t": 0.41386693716049194,
"grad_norm": 10.690574645996094,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 0.8148065805435181,
"logits/rejected": 0.7424544095993042,
"logps/chosen": -133.6822509765625,
"logps/ref_chosen": -45.871864318847656,
"logps/ref_rejected": -61.305999755859375,
"logps/rejected": -192.62814331054688,
"loss": 1.1241,
"margin_dpo/margin_mean": 43.51176452636719,
"margin_dpo/margin_std": 67.04666137695312,
"step": 390
},
{
"epoch": 0.5910808767951625,
"fcm_dpo/beta": 0.008582616224884987,
"fcm_dpo/delta": -0.05733926221728325,
"fcm_dpo/margin": 52.93098068237305,
"fcm_dpo/q_t": 0.3971294164657593,
"grad_norm": 13.169042587280273,
"learning_rate": 2.1573054278272636e-07,
"logits/chosen": 0.7265839576721191,
"logits/rejected": 0.6576735377311707,
"logps/chosen": -153.22894287109375,
"logps/ref_chosen": -58.18701171875,
"logps/ref_rejected": -83.63442993164062,
"logps/rejected": -231.60736083984375,
"loss": 1.097,
"margin_dpo/margin_mean": 52.93098449707031,
"margin_dpo/margin_std": 81.30315399169922,
"step": 391
},
{
"epoch": 0.5925925925925926,
"fcm_dpo/beta": 0.00850940402597189,
"fcm_dpo/delta": -0.1043597012758255,
"fcm_dpo/margin": 58.642520904541016,
"fcm_dpo/q_t": 0.3879520297050476,
"grad_norm": 11.178428649902344,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": 0.7724939584732056,
"logits/rejected": 0.7124058604240417,
"logps/chosen": -151.29806518554688,
"logps/ref_chosen": -69.7445297241211,
"logps/ref_rejected": -94.05877685546875,
"logps/rejected": -234.25485229492188,
"loss": 1.0558,
"margin_dpo/margin_mean": 58.64252471923828,
"margin_dpo/margin_std": 78.97216796875,
"step": 392
},
{
"epoch": 0.5941043083900227,
"fcm_dpo/beta": 0.00827580876648426,
"fcm_dpo/delta": -0.08926917612552643,
"fcm_dpo/margin": 58.547996520996094,
"fcm_dpo/q_t": 0.3895009756088257,
"grad_norm": 11.780495643615723,
"learning_rate": 2.131130332936195e-07,
"logits/chosen": 0.7572908997535706,
"logits/rejected": 0.7185690402984619,
"logps/chosen": -148.23239135742188,
"logps/ref_chosen": -52.33489990234375,
"logps/ref_rejected": -74.33809661865234,
"logps/rejected": -228.78358459472656,
"loss": 1.0396,
"margin_dpo/margin_mean": 58.54798889160156,
"margin_dpo/margin_std": 70.91494750976562,
"step": 393
},
{
"epoch": 0.5956160241874527,
"fcm_dpo/beta": 0.008259693160653114,
"fcm_dpo/delta": -0.021054361015558243,
"fcm_dpo/margin": 50.85765838623047,
"fcm_dpo/q_t": 0.40173596143722534,
"grad_norm": 12.078781127929688,
"learning_rate": 2.1180580796331323e-07,
"logits/chosen": 0.771820604801178,
"logits/rejected": 0.7414647936820984,
"logps/chosen": -151.42906188964844,
"logps/ref_chosen": -60.6761360168457,
"logps/ref_rejected": -71.36074829101562,
"logps/rejected": -212.97134399414062,
"loss": 1.0744,
"margin_dpo/margin_mean": 50.85765838623047,
"margin_dpo/margin_std": 62.24065399169922,
"step": 394
},
{
"epoch": 0.5971277399848829,
"fcm_dpo/beta": 0.00834040530025959,
"fcm_dpo/delta": 0.06285583227872849,
"fcm_dpo/margin": 40.60387420654297,
"fcm_dpo/q_t": 0.42380064725875854,
"grad_norm": 15.293757438659668,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": 0.7607164978981018,
"logits/rejected": 0.6606634259223938,
"logps/chosen": -147.010986328125,
"logps/ref_chosen": -50.60432434082031,
"logps/ref_rejected": -77.08731079101562,
"logps/rejected": -214.0978546142578,
"loss": 1.1548,
"margin_dpo/margin_mean": 40.60387420654297,
"margin_dpo/margin_std": 68.5374755859375,
"step": 395
},
{
"epoch": 0.5986394557823129,
"fcm_dpo/beta": 0.00826001912355423,
"fcm_dpo/delta": -0.008021347224712372,
"fcm_dpo/margin": 49.21974182128906,
"fcm_dpo/q_t": 0.40675118565559387,
"grad_norm": 11.849122047424316,
"learning_rate": 2.0919459895968517e-07,
"logits/chosen": 0.7603906989097595,
"logits/rejected": 0.6580515503883362,
"logps/chosen": -141.91624450683594,
"logps/ref_chosen": -51.35961151123047,
"logps/ref_rejected": -79.89360046386719,
"logps/rejected": -219.66998291015625,
"loss": 1.0896,
"margin_dpo/margin_mean": 49.219749450683594,
"margin_dpo/margin_std": 63.23406219482422,
"step": 396
},
{
"epoch": 0.600151171579743,
"fcm_dpo/beta": 0.008545951917767525,
"fcm_dpo/delta": 0.1694183051586151,
"fcm_dpo/margin": 27.406164169311523,
"fcm_dpo/q_t": 0.4465975761413574,
"grad_norm": 13.423833847045898,
"learning_rate": 2.078906883274924e-07,
"logits/chosen": 0.6522234082221985,
"logits/rejected": 0.6044944524765015,
"logps/chosen": -170.49685668945312,
"logps/ref_chosen": -66.45622253417969,
"logps/ref_rejected": -85.74736785888672,
"logps/rejected": -217.1941680908203,
"loss": 1.281,
"margin_dpo/margin_mean": 27.40616226196289,
"margin_dpo/margin_std": 81.10401916503906,
"step": 397
},
{
"epoch": 0.6016628873771731,
"fcm_dpo/beta": 0.008423902094364166,
"fcm_dpo/delta": -0.12061990797519684,
"fcm_dpo/margin": 60.99681854248047,
"fcm_dpo/q_t": 0.38334938883781433,
"grad_norm": 11.325228691101074,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": 0.7046458721160889,
"logits/rejected": 0.6370331048965454,
"logps/chosen": -138.74560546875,
"logps/ref_chosen": -49.244239807128906,
"logps/ref_rejected": -75.18949127197266,
"logps/rejected": -225.68765258789062,
"loss": 1.0144,
"margin_dpo/margin_mean": 60.99681854248047,
"margin_dpo/margin_std": 69.0614013671875,
"step": 398
},
{
"epoch": 0.6031746031746031,
"fcm_dpo/beta": 0.008185310289263725,
"fcm_dpo/delta": -0.15051433444023132,
"fcm_dpo/margin": 66.17098999023438,
"fcm_dpo/q_t": 0.37749582529067993,
"grad_norm": 13.418987274169922,
"learning_rate": 2.052864371672457e-07,
"logits/chosen": 0.6756268739700317,
"logits/rejected": 0.5277635455131531,
"logps/chosen": -172.79800415039062,
"logps/ref_chosen": -68.30679321289062,
"logps/ref_rejected": -113.2708511352539,
"logps/rejected": -283.93304443359375,
"loss": 1.006,
"margin_dpo/margin_mean": 66.17098999023438,
"margin_dpo/margin_std": 76.67375183105469,
"step": 399
},
{
"epoch": 0.6046863189720333,
"fcm_dpo/beta": 0.008176662027835846,
"fcm_dpo/delta": 0.07041217386722565,
"fcm_dpo/margin": 40.524681091308594,
"fcm_dpo/q_t": 0.4246814250946045,
"grad_norm": 18.240394592285156,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 0.7267959117889404,
"logits/rejected": 0.6661494970321655,
"logps/chosen": -187.3385467529297,
"logps/ref_chosen": -71.62649536132812,
"logps/ref_rejected": -90.98765563964844,
"logps/rejected": -247.22439575195312,
"loss": 1.163,
"margin_dpo/margin_mean": 40.524681091308594,
"margin_dpo/margin_std": 68.88404846191406,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_fcm_dpo/beta": 0.008266105316579342,
"eval_logits/chosen": 0.7402104139328003,
"eval_logits/rejected": 0.6880174279212952,
"eval_logps/chosen": -175.3135528564453,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -225.52288818359375,
"eval_loss": 0.5694928169250488,
"eval_margin_dpo/margin_mean": 45.51979446411133,
"eval_margin_dpo/margin_std": 75.34153747558594,
"eval_runtime": 37.9897,
"eval_samples_per_second": 60.622,
"eval_steps_per_second": 1.895,
"step": 400
},
{
"epoch": 0.6061980347694633,
"fcm_dpo/beta": 0.008165856823325157,
"fcm_dpo/delta": -0.06495825201272964,
"fcm_dpo/margin": 56.537445068359375,
"fcm_dpo/q_t": 0.3948812484741211,
"grad_norm": 10.236934661865234,
"learning_rate": 2.0268718890989752e-07,
"logits/chosen": 0.7750402092933655,
"logits/rejected": 0.6722196340560913,
"logps/chosen": -142.9902801513672,
"logps/ref_chosen": -53.72495651245117,
"logps/ref_rejected": -75.06304931640625,
"logps/rejected": -220.86581420898438,
"loss": 1.0501,
"margin_dpo/margin_mean": 56.537445068359375,
"margin_dpo/margin_std": 68.74324035644531,
"step": 401
},
{
"epoch": 0.6077097505668935,
"fcm_dpo/beta": 0.00811966322362423,
"fcm_dpo/delta": 0.01042521744966507,
"fcm_dpo/margin": 47.93630599975586,
"fcm_dpo/q_t": 0.41185569763183594,
"grad_norm": 13.079541206359863,
"learning_rate": 2.013895317751323e-07,
"logits/chosen": 0.7229193449020386,
"logits/rejected": 0.6913242340087891,
"logps/chosen": -158.04225158691406,
"logps/ref_chosen": -61.873931884765625,
"logps/ref_rejected": -66.15198516845703,
"logps/rejected": -210.25660705566406,
"loss": 1.1303,
"margin_dpo/margin_mean": 47.936309814453125,
"margin_dpo/margin_std": 76.8143310546875,
"step": 402
},
{
"epoch": 0.6092214663643235,
"fcm_dpo/beta": 0.008106638677418232,
"fcm_dpo/delta": -0.06947094202041626,
"fcm_dpo/margin": 57.51924133300781,
"fcm_dpo/q_t": 0.39517760276794434,
"grad_norm": 11.307629585266113,
"learning_rate": 2.0009323437965898e-07,
"logits/chosen": 0.8434353470802307,
"logits/rejected": 0.7568535804748535,
"logps/chosen": -155.8819580078125,
"logps/ref_chosen": -51.321502685546875,
"logps/ref_rejected": -86.54010772705078,
"logps/rejected": -248.61981201171875,
"loss": 1.0715,
"margin_dpo/margin_mean": 57.51924133300781,
"margin_dpo/margin_std": 79.66917419433594,
"step": 403
},
{
"epoch": 0.6107331821617535,
"fcm_dpo/beta": 0.00791841372847557,
"fcm_dpo/delta": -0.07457688450813293,
"fcm_dpo/margin": 59.30389404296875,
"fcm_dpo/q_t": 0.39402520656585693,
"grad_norm": 14.147499084472656,
"learning_rate": 1.9879833298370237e-07,
"logits/chosen": 0.6907485723495483,
"logits/rejected": 0.5947024822235107,
"logps/chosen": -158.80311584472656,
"logps/ref_chosen": -62.26288604736328,
"logps/ref_rejected": -95.19029998779297,
"logps/rejected": -251.034423828125,
"loss": 1.0664,
"margin_dpo/margin_mean": 59.30389404296875,
"margin_dpo/margin_std": 78.74125671386719,
"step": 404
},
{
"epoch": 0.6122448979591837,
"fcm_dpo/beta": 0.00792413204908371,
"fcm_dpo/delta": 0.03028678148984909,
"fcm_dpo/margin": 46.69974136352539,
"fcm_dpo/q_t": 0.4163452088832855,
"grad_norm": 11.294724464416504,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": 0.7476557493209839,
"logits/rejected": 0.7014354467391968,
"logps/chosen": -151.2752685546875,
"logps/ref_chosen": -50.5843391418457,
"logps/ref_rejected": -65.43156433105469,
"logps/rejected": -212.82223510742188,
"loss": 1.13,
"margin_dpo/margin_mean": 46.69974136352539,
"margin_dpo/margin_std": 70.747314453125,
"step": 405
},
{
"epoch": 0.6137566137566137,
"fcm_dpo/beta": 0.007905669510364532,
"fcm_dpo/delta": -0.07834555953741074,
"fcm_dpo/margin": 60.04176712036133,
"fcm_dpo/q_t": 0.3914737105369568,
"grad_norm": 14.173489570617676,
"learning_rate": 1.9621286303497914e-07,
"logits/chosen": 0.7967276573181152,
"logits/rejected": 0.628233015537262,
"logps/chosen": -147.6551513671875,
"logps/ref_chosen": -48.99560546875,
"logps/ref_rejected": -92.47774505615234,
"logps/rejected": -251.17904663085938,
"loss": 1.0637,
"margin_dpo/margin_mean": 60.04176330566406,
"margin_dpo/margin_std": 80.88154602050781,
"step": 406
},
{
"epoch": 0.6152683295540439,
"fcm_dpo/beta": 0.007924167439341545,
"fcm_dpo/delta": 0.02935061603784561,
"fcm_dpo/margin": 46.884368896484375,
"fcm_dpo/q_t": 0.41602736711502075,
"grad_norm": 14.014954566955566,
"learning_rate": 1.9492236680336483e-07,
"logits/chosen": 0.6189517974853516,
"logits/rejected": 0.5417089462280273,
"logps/chosen": -208.76776123046875,
"logps/ref_chosen": -89.40056610107422,
"logps/ref_rejected": -99.28775024414062,
"logps/rejected": -265.539306640625,
"loss": 1.14,
"margin_dpo/margin_mean": 46.884368896484375,
"margin_dpo/margin_std": 78.12733459472656,
"step": 407
},
{
"epoch": 0.6167800453514739,
"fcm_dpo/beta": 0.007776426617056131,
"fcm_dpo/delta": -0.16104529798030853,
"fcm_dpo/margin": 71.02813720703125,
"fcm_dpo/q_t": 0.37310662865638733,
"grad_norm": 10.806535720825195,
"learning_rate": 1.9363341121154895e-07,
"logits/chosen": 0.7250916957855225,
"logits/rejected": 0.6428213119506836,
"logps/chosen": -145.7263641357422,
"logps/ref_chosen": -54.70391845703125,
"logps/ref_rejected": -73.98648834228516,
"logps/rejected": -236.03707885742188,
"loss": 0.9905,
"margin_dpo/margin_mean": 71.02813720703125,
"margin_dpo/margin_std": 74.32557678222656,
"step": 408
},
{
"epoch": 0.618291761148904,
"fcm_dpo/beta": 0.007799787446856499,
"fcm_dpo/delta": 0.12649981677532196,
"fcm_dpo/margin": 35.53091049194336,
"fcm_dpo/q_t": 0.4371756315231323,
"grad_norm": 13.238290786743164,
"learning_rate": 1.9234603231438994e-07,
"logits/chosen": 0.734856367111206,
"logits/rejected": 0.7435188293457031,
"logps/chosen": -175.16085815429688,
"logps/ref_chosen": -62.11822509765625,
"logps/ref_rejected": -61.933509826660156,
"logps/rejected": -210.50704956054688,
"loss": 1.2054,
"margin_dpo/margin_mean": 35.53091049194336,
"margin_dpo/margin_std": 72.68028259277344,
"step": 409
},
{
"epoch": 0.6198034769463341,
"fcm_dpo/beta": 0.007764261215925217,
"fcm_dpo/delta": -0.044183533638715744,
"fcm_dpo/margin": 56.84495544433594,
"fcm_dpo/q_t": 0.39720937609672546,
"grad_norm": 11.548017501831055,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 0.7385885119438171,
"logits/rejected": 0.7137470245361328,
"logps/chosen": -167.29537963867188,
"logps/ref_chosen": -61.80266189575195,
"logps/ref_rejected": -76.60002136230469,
"logps/rejected": -238.93768310546875,
"loss": 1.0508,
"margin_dpo/margin_mean": 56.84495544433594,
"margin_dpo/margin_std": 62.45063781738281,
"step": 410
},
{
"epoch": 0.6213151927437641,
"fcm_dpo/beta": 0.007759403437376022,
"fcm_dpo/delta": -0.033648937940597534,
"fcm_dpo/margin": 55.70112609863281,
"fcm_dpo/q_t": 0.401795893907547,
"grad_norm": 10.255197525024414,
"learning_rate": 1.8977614860195296e-07,
"logits/chosen": 0.7373828887939453,
"logits/rejected": 0.6756826639175415,
"logps/chosen": -165.4742431640625,
"logps/ref_chosen": -54.44539260864258,
"logps/ref_rejected": -74.5650863647461,
"logps/rejected": -241.2950439453125,
"loss": 1.0894,
"margin_dpo/margin_mean": 55.70112609863281,
"margin_dpo/margin_std": 79.74048614501953,
"step": 411
},
{
"epoch": 0.6228269085411943,
"fcm_dpo/beta": 0.007705829571932554,
"fcm_dpo/delta": -0.003621477633714676,
"fcm_dpo/margin": 52.322906494140625,
"fcm_dpo/q_t": 0.4078013300895691,
"grad_norm": 12.566106796264648,
"learning_rate": 1.8849371567184662e-07,
"logits/chosen": 0.7539393901824951,
"logits/rejected": 0.6827704906463623,
"logps/chosen": -171.85638427734375,
"logps/ref_chosen": -55.248085021972656,
"logps/ref_rejected": -68.96623229980469,
"logps/rejected": -237.89743041992188,
"loss": 1.0955,
"margin_dpo/margin_mean": 52.322906494140625,
"margin_dpo/margin_std": 71.428466796875,
"step": 412
},
{
"epoch": 0.6243386243386243,
"fcm_dpo/beta": 0.007800276391208172,
"fcm_dpo/delta": 0.05510157719254494,
"fcm_dpo/margin": 44.45771789550781,
"fcm_dpo/q_t": 0.42176562547683716,
"grad_norm": 14.439383506774902,
"learning_rate": 1.872130032047302e-07,
"logits/chosen": 0.5603929758071899,
"logits/rejected": 0.5244190692901611,
"logps/chosen": -195.08648681640625,
"logps/ref_chosen": -68.72074890136719,
"logps/ref_rejected": -78.76539611816406,
"logps/rejected": -249.5888671875,
"loss": 1.1824,
"margin_dpo/margin_mean": 44.45771408081055,
"margin_dpo/margin_std": 88.00609588623047,
"step": 413
},
{
"epoch": 0.6258503401360545,
"fcm_dpo/beta": 0.0077771758660674095,
"fcm_dpo/delta": -0.044307127594947815,
"fcm_dpo/margin": 56.880470275878906,
"fcm_dpo/q_t": 0.39816516637802124,
"grad_norm": 12.960653305053711,
"learning_rate": 1.8593404702488436e-07,
"logits/chosen": 0.7278245687484741,
"logits/rejected": 0.6642704606056213,
"logps/chosen": -165.54698181152344,
"logps/ref_chosen": -54.138214111328125,
"logps/ref_rejected": -74.65741729736328,
"logps/rejected": -242.9466552734375,
"loss": 1.0688,
"margin_dpo/margin_mean": 56.880470275878906,
"margin_dpo/margin_std": 73.23246765136719,
"step": 414
},
{
"epoch": 0.6273620559334845,
"fcm_dpo/beta": 0.00775496382266283,
"fcm_dpo/delta": 0.01751146838068962,
"fcm_dpo/margin": 49.402931213378906,
"fcm_dpo/q_t": 0.4125995337963104,
"grad_norm": 12.199499130249023,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": 0.7460592985153198,
"logits/rejected": 0.7304663062095642,
"logps/chosen": -167.877685546875,
"logps/ref_chosen": -55.91856002807617,
"logps/ref_rejected": -61.747703552246094,
"logps/rejected": -223.10975646972656,
"loss": 1.1287,
"margin_dpo/margin_mean": 49.402931213378906,
"margin_dpo/margin_std": 79.15948486328125,
"step": 415
},
{
"epoch": 0.6288737717309146,
"fcm_dpo/beta": 0.007957161404192448,
"fcm_dpo/delta": 0.06318769603967667,
"fcm_dpo/margin": 42.04512023925781,
"fcm_dpo/q_t": 0.4247048497200012,
"grad_norm": 14.358795166015625,
"learning_rate": 1.8338154657749128e-07,
"logits/chosen": 0.7043267488479614,
"logits/rejected": 0.6525400876998901,
"logps/chosen": -173.0494842529297,
"logps/ref_chosen": -54.72308349609375,
"logps/ref_rejected": -69.17388916015625,
"logps/rejected": -229.54541015625,
"loss": 1.1811,
"margin_dpo/margin_mean": 42.04512023925781,
"margin_dpo/margin_std": 75.42815399169922,
"step": 416
},
{
"epoch": 0.6303854875283447,
"fcm_dpo/beta": 0.007852243259549141,
"fcm_dpo/delta": -0.04769314080476761,
"fcm_dpo/margin": 56.72645568847656,
"fcm_dpo/q_t": 0.3974757194519043,
"grad_norm": 12.778168678283691,
"learning_rate": 1.8210807370886849e-07,
"logits/chosen": 0.834701418876648,
"logits/rejected": 0.7620327472686768,
"logps/chosen": -178.24203491210938,
"logps/ref_chosen": -56.791259765625,
"logps/ref_rejected": -68.7791748046875,
"logps/rejected": -246.95639038085938,
"loss": 1.0928,
"margin_dpo/margin_mean": 56.72645568847656,
"margin_dpo/margin_std": 83.89405822753906,
"step": 417
},
{
"epoch": 0.6318972033257747,
"fcm_dpo/beta": 0.007806393783539534,
"fcm_dpo/delta": 0.05898113176226616,
"fcm_dpo/margin": 43.8342399597168,
"fcm_dpo/q_t": 0.4236130118370056,
"grad_norm": 13.988064765930176,
"learning_rate": 1.8083649992336825e-07,
"logits/chosen": 0.7765390872955322,
"logits/rejected": 0.784039318561554,
"logps/chosen": -198.92283630371094,
"logps/ref_chosen": -69.10798645019531,
"logps/ref_rejected": -75.09132385253906,
"logps/rejected": -248.74041748046875,
"loss": 1.1617,
"margin_dpo/margin_mean": 43.83423614501953,
"margin_dpo/margin_std": 75.42573547363281,
"step": 418
},
{
"epoch": 0.6334089191232048,
"fcm_dpo/beta": 0.007763129658997059,
"fcm_dpo/delta": -0.0961204543709755,
"fcm_dpo/margin": 63.256935119628906,
"fcm_dpo/q_t": 0.3896148204803467,
"grad_norm": 12.546382904052734,
"learning_rate": 1.7956686078964255e-07,
"logits/chosen": 0.6601693630218506,
"logits/rejected": 0.6037542819976807,
"logps/chosen": -162.12216186523438,
"logps/ref_chosen": -58.1717643737793,
"logps/ref_rejected": -71.67066955566406,
"logps/rejected": -238.87799072265625,
"loss": 1.0484,
"margin_dpo/margin_mean": 63.256935119628906,
"margin_dpo/margin_std": 82.42765045166016,
"step": 419
},
{
"epoch": 0.6349206349206349,
"fcm_dpo/beta": 0.007878805510699749,
"fcm_dpo/delta": 0.13835400342941284,
"fcm_dpo/margin": 33.70429992675781,
"fcm_dpo/q_t": 0.4415522813796997,
"grad_norm": 13.372779846191406,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 0.7393275499343872,
"logits/rejected": 0.6908121109008789,
"logps/chosen": -187.9400634765625,
"logps/ref_chosen": -57.05351257324219,
"logps/ref_rejected": -62.670982360839844,
"logps/rejected": -227.2618408203125,
"loss": 1.2477,
"margin_dpo/margin_mean": 33.70430374145508,
"margin_dpo/margin_std": 85.482421875,
"step": 420
},
{
"epoch": 0.636432350718065,
"fcm_dpo/beta": 0.007947009056806564,
"fcm_dpo/delta": 0.020302031189203262,
"fcm_dpo/margin": 47.831687927246094,
"fcm_dpo/q_t": 0.4156237840652466,
"grad_norm": 14.011063575744629,
"learning_rate": 1.7703352848054887e-07,
"logits/chosen": 0.7062793970108032,
"logits/rejected": 0.6440611481666565,
"logps/chosen": -180.4288330078125,
"logps/ref_chosen": -57.32324981689453,
"logps/ref_rejected": -75.33782958984375,
"logps/rejected": -246.27511596679688,
"loss": 1.1732,
"margin_dpo/margin_mean": 47.831687927246094,
"margin_dpo/margin_std": 93.26679992675781,
"step": 421
},
{
"epoch": 0.6379440665154951,
"fcm_dpo/beta": 0.00788248609751463,
"fcm_dpo/delta": -0.10939822345972061,
"fcm_dpo/margin": 63.941802978515625,
"fcm_dpo/q_t": 0.3854847550392151,
"grad_norm": 14.771190643310547,
"learning_rate": 1.7576990616793137e-07,
"logits/chosen": 0.7229829430580139,
"logits/rejected": 0.7074044942855835,
"logps/chosen": -173.61679077148438,
"logps/ref_chosen": -67.05757141113281,
"logps/ref_rejected": -72.12803649902344,
"logps/rejected": -242.62905883789062,
"loss": 1.0308,
"margin_dpo/margin_mean": 63.941802978515625,
"margin_dpo/margin_std": 77.09107971191406,
"step": 422
},
{
"epoch": 0.6394557823129252,
"fcm_dpo/beta": 0.0076961456798017025,
"fcm_dpo/delta": -0.0938025414943695,
"fcm_dpo/margin": 63.5325927734375,
"fcm_dpo/q_t": 0.3895634412765503,
"grad_norm": 11.972503662109375,
"learning_rate": 1.745083602306071e-07,
"logits/chosen": 0.7690365314483643,
"logits/rejected": 0.6954725980758667,
"logps/chosen": -164.59158325195312,
"logps/ref_chosen": -54.06167221069336,
"logps/ref_rejected": -76.64092254638672,
"logps/rejected": -250.70343017578125,
"loss": 1.0441,
"margin_dpo/margin_mean": 63.5325927734375,
"margin_dpo/margin_std": 80.39971923828125,
"step": 423
},
{
"epoch": 0.6409674981103552,
"fcm_dpo/beta": 0.007550341077148914,
"fcm_dpo/delta": -0.06942353397607803,
"fcm_dpo/margin": 61.598793029785156,
"fcm_dpo/q_t": 0.39464449882507324,
"grad_norm": 16.436132431030273,
"learning_rate": 1.7324892595672804e-07,
"logits/chosen": 0.6422996520996094,
"logits/rejected": 0.6022803783416748,
"logps/chosen": -172.88888549804688,
"logps/ref_chosen": -53.60887145996094,
"logps/ref_rejected": -79.2139892578125,
"logps/rejected": -260.0928039550781,
"loss": 1.0666,
"margin_dpo/margin_mean": 61.598793029785156,
"margin_dpo/margin_std": 82.29408264160156,
"step": 424
},
{
"epoch": 0.6424792139077853,
"fcm_dpo/beta": 0.007582271471619606,
"fcm_dpo/delta": 0.01660202071070671,
"fcm_dpo/margin": 50.64934158325195,
"fcm_dpo/q_t": 0.4132668375968933,
"grad_norm": 13.83991527557373,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": 0.7531682252883911,
"logits/rejected": 0.724544882774353,
"logps/chosen": -174.68898010253906,
"logps/ref_chosen": -58.41468048095703,
"logps/ref_rejected": -66.59054565429688,
"logps/rejected": -233.51419067382812,
"loss": 1.1281,
"margin_dpo/margin_mean": 50.64934158325195,
"margin_dpo/margin_std": 81.37447357177734,
"step": 425
},
{
"epoch": 0.6439909297052154,
"fcm_dpo/beta": 0.007783133536577225,
"fcm_dpo/delta": 0.16612949967384338,
"fcm_dpo/margin": 30.56137466430664,
"fcm_dpo/q_t": 0.4456656873226166,
"grad_norm": 15.910229682922363,
"learning_rate": 1.7073653325558828e-07,
"logits/chosen": 0.6517899036407471,
"logits/rejected": 0.6605311036109924,
"logps/chosen": -211.61373901367188,
"logps/ref_chosen": -71.70822143554688,
"logps/ref_rejected": -73.57725524902344,
"logps/rejected": -244.04415893554688,
"loss": 1.2789,
"margin_dpo/margin_mean": 30.561378479003906,
"margin_dpo/margin_std": 90.49774169921875,
"step": 426
},
{
"epoch": 0.6455026455026455,
"fcm_dpo/beta": 0.007862042635679245,
"fcm_dpo/delta": 0.02033122256398201,
"fcm_dpo/margin": 48.38745880126953,
"fcm_dpo/q_t": 0.41554105281829834,
"grad_norm": 14.462299346923828,
"learning_rate": 1.6948364510535218e-07,
"logits/chosen": 0.7601388692855835,
"logits/rejected": 0.698128342628479,
"logps/chosen": -189.68942260742188,
"logps/ref_chosen": -58.64276885986328,
"logps/ref_rejected": -86.25437927246094,
"logps/rejected": -265.6884765625,
"loss": 1.1533,
"margin_dpo/margin_mean": 48.38745880126953,
"margin_dpo/margin_std": 88.23645782470703,
"step": 427
},
{
"epoch": 0.6470143613000756,
"fcm_dpo/beta": 0.007850416004657745,
"fcm_dpo/delta": -0.05777687579393387,
"fcm_dpo/margin": 57.95905685424805,
"fcm_dpo/q_t": 0.39873766899108887,
"grad_norm": 12.50790023803711,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": 0.6862516403198242,
"logits/rejected": 0.639532208442688,
"logps/chosen": -189.7267608642578,
"logps/ref_chosen": -66.5960464477539,
"logps/ref_rejected": -82.3941650390625,
"logps/rejected": -263.48394775390625,
"loss": 1.0891,
"margin_dpo/margin_mean": 57.95905303955078,
"margin_dpo/margin_std": 86.09635162353516,
"step": 428
},
{
"epoch": 0.6485260770975056,
"fcm_dpo/beta": 0.007888168096542358,
"fcm_dpo/delta": 0.048963554203510284,
"fcm_dpo/margin": 44.629215240478516,
"fcm_dpo/q_t": 0.4203850328922272,
"grad_norm": 15.114370346069336,
"learning_rate": 1.669846604344412e-07,
"logits/chosen": 0.6785605549812317,
"logits/rejected": 0.696630597114563,
"logps/chosen": -184.79486083984375,
"logps/ref_chosen": -57.00970458984375,
"logps/ref_rejected": -59.86549377441406,
"logps/rejected": -232.2798614501953,
"loss": 1.1686,
"margin_dpo/margin_mean": 44.62921142578125,
"margin_dpo/margin_std": 82.51943969726562,
"step": 429
},
{
"epoch": 0.6500377928949358,
"fcm_dpo/beta": 0.007686239667236805,
"fcm_dpo/delta": -0.1573198437690735,
"fcm_dpo/margin": 71.38347625732422,
"fcm_dpo/q_t": 0.37464433908462524,
"grad_norm": 12.829866409301758,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 0.6116656064987183,
"logits/rejected": 0.6137654781341553,
"logps/chosen": -174.70074462890625,
"logps/ref_chosen": -59.563194274902344,
"logps/ref_rejected": -70.52289581298828,
"logps/rejected": -257.0439147949219,
"loss": 1.0126,
"margin_dpo/margin_mean": 71.38348388671875,
"margin_dpo/margin_std": 85.50177001953125,
"step": 430
},
{
"epoch": 0.6515495086923658,
"fcm_dpo/beta": 0.007644302677363157,
"fcm_dpo/delta": 0.021943753585219383,
"fcm_dpo/margin": 49.56535339355469,
"fcm_dpo/q_t": 0.4139266610145569,
"grad_norm": 12.444840431213379,
"learning_rate": 1.6449496416858282e-07,
"logits/chosen": 0.7115011215209961,
"logits/rejected": 0.6581912040710449,
"logps/chosen": -162.03805541992188,
"logps/ref_chosen": -50.20032501220703,
"logps/ref_rejected": -77.81680297851562,
"logps/rejected": -239.21987915039062,
"loss": 1.133,
"margin_dpo/margin_mean": 49.56535339355469,
"margin_dpo/margin_std": 80.80972290039062,
"step": 431
},
{
"epoch": 0.6530612244897959,
"fcm_dpo/beta": 0.007644776254892349,
"fcm_dpo/delta": -0.008293664082884789,
"fcm_dpo/margin": 53.36254119873047,
"fcm_dpo/q_t": 0.4074263572692871,
"grad_norm": 12.83692455291748,
"learning_rate": 1.632536862810844e-07,
"logits/chosen": 0.7517593502998352,
"logits/rejected": 0.6997089982032776,
"logps/chosen": -178.72860717773438,
"logps/ref_chosen": -61.662757873535156,
"logps/ref_rejected": -83.94496154785156,
"logps/rejected": -254.3733673095703,
"loss": 1.1177,
"margin_dpo/margin_mean": 53.36254119873047,
"margin_dpo/margin_std": 84.9062728881836,
"step": 432
},
{
"epoch": 0.654572940287226,
"fcm_dpo/beta": 0.007542489096522331,
"fcm_dpo/delta": -0.11253903806209564,
"fcm_dpo/margin": 67.21623229980469,
"fcm_dpo/q_t": 0.38378891348838806,
"grad_norm": 13.461320877075195,
"learning_rate": 1.6201483487445515e-07,
"logits/chosen": 0.8195096254348755,
"logits/rejected": 0.8181531429290771,
"logps/chosen": -179.8470458984375,
"logps/ref_chosen": -63.72917938232422,
"logps/ref_rejected": -65.8391342163086,
"logps/rejected": -249.1732177734375,
"loss": 1.0334,
"margin_dpo/margin_mean": 67.21623229980469,
"margin_dpo/margin_std": 82.00685119628906,
"step": 433
},
{
"epoch": 0.656084656084656,
"fcm_dpo/beta": 0.007294449955224991,
"fcm_dpo/delta": -0.1217169463634491,
"fcm_dpo/margin": 70.45217895507812,
"fcm_dpo/q_t": 0.38249802589416504,
"grad_norm": 12.421359062194824,
"learning_rate": 1.6077844460203204e-07,
"logits/chosen": 0.8312444090843201,
"logits/rejected": 0.7658089399337769,
"logps/chosen": -146.02633666992188,
"logps/ref_chosen": -47.97331619262695,
"logps/ref_rejected": -72.51132202148438,
"logps/rejected": -241.01651000976562,
"loss": 1.0482,
"margin_dpo/margin_mean": 70.4521713256836,
"margin_dpo/margin_std": 93.53169250488281,
"step": 434
},
{
"epoch": 0.6575963718820862,
"fcm_dpo/beta": 0.007317832671105862,
"fcm_dpo/delta": -0.012319110333919525,
"fcm_dpo/margin": 56.22063446044922,
"fcm_dpo/q_t": 0.4067924916744232,
"grad_norm": 13.529410362243652,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": 0.834632158279419,
"logits/rejected": 0.7947177886962891,
"logps/chosen": -177.00856018066406,
"logps/ref_chosen": -57.06024932861328,
"logps/ref_rejected": -71.69146728515625,
"logps/rejected": -247.86041259765625,
"loss": 1.1107,
"margin_dpo/margin_mean": 56.22063446044922,
"margin_dpo/margin_std": 85.4617919921875,
"step": 435
},
{
"epoch": 0.6591080876795162,
"fcm_dpo/beta": 0.0073493365198373795,
"fcm_dpo/delta": 0.0642806738615036,
"fcm_dpo/margin": 45.97419738769531,
"fcm_dpo/q_t": 0.4236651360988617,
"grad_norm": 14.502552032470703,
"learning_rate": 1.5831318572796847e-07,
"logits/chosen": 0.7367502450942993,
"logits/rejected": 0.6787878274917603,
"logps/chosen": -174.20272827148438,
"logps/ref_chosen": -56.158050537109375,
"logps/ref_rejected": -67.63787841796875,
"logps/rejected": -231.65673828125,
"loss": 1.1805,
"margin_dpo/margin_mean": 45.97420120239258,
"margin_dpo/margin_std": 90.24182891845703,
"step": 436
},
{
"epoch": 0.6606198034769464,
"fcm_dpo/beta": 0.007334660738706589,
"fcm_dpo/delta": 0.02720458060503006,
"fcm_dpo/margin": 50.770957946777344,
"fcm_dpo/q_t": 0.4157326817512512,
"grad_norm": 16.514482498168945,
"learning_rate": 1.5708438608491815e-07,
"logits/chosen": 0.7151650786399841,
"logits/rejected": 0.5829428434371948,
"logps/chosen": -182.744140625,
"logps/ref_chosen": -56.98578643798828,
"logps/ref_rejected": -85.61524963378906,
"logps/rejected": -262.14459228515625,
"loss": 1.1681,
"margin_dpo/margin_mean": 50.77096176147461,
"margin_dpo/margin_std": 93.88005065917969,
"step": 437
},
{
"epoch": 0.6621315192743764,
"fcm_dpo/beta": 0.007298264652490616,
"fcm_dpo/delta": -0.12693609297275543,
"fcm_dpo/margin": 71.32022094726562,
"fcm_dpo/q_t": 0.38320598006248474,
"grad_norm": 12.304542541503906,
"learning_rate": 1.558581854913253e-07,
"logits/chosen": 0.8022891283035278,
"logits/rejected": 0.7335547208786011,
"logps/chosen": -148.60336303710938,
"logps/ref_chosen": -41.27777862548828,
"logps/ref_rejected": -65.33840942382812,
"logps/rejected": -243.9842071533203,
"loss": 1.0229,
"margin_dpo/margin_mean": 71.32022094726562,
"margin_dpo/margin_std": 85.57806396484375,
"step": 438
},
{
"epoch": 0.6636432350718064,
"fcm_dpo/beta": 0.00723626371473074,
"fcm_dpo/delta": -0.027069322764873505,
"fcm_dpo/margin": 58.800289154052734,
"fcm_dpo/q_t": 0.40320390462875366,
"grad_norm": 12.547380447387695,
"learning_rate": 1.5463461824665658e-07,
"logits/chosen": 0.6307194232940674,
"logits/rejected": 0.5939302444458008,
"logps/chosen": -202.54116821289062,
"logps/ref_chosen": -81.41764831542969,
"logps/ref_rejected": -94.72309875488281,
"logps/rejected": -274.64691162109375,
"loss": 1.0892,
"margin_dpo/margin_mean": 58.800289154052734,
"margin_dpo/margin_std": 81.8350830078125,
"step": 439
},
{
"epoch": 0.6651549508692366,
"fcm_dpo/beta": 0.007164452224969864,
"fcm_dpo/delta": -0.05680684745311737,
"fcm_dpo/margin": 63.365570068359375,
"fcm_dpo/q_t": 0.3969983756542206,
"grad_norm": 21.674142837524414,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 0.6887756586074829,
"logits/rejected": 0.5909817218780518,
"logps/chosen": -150.68502807617188,
"logps/ref_chosen": -42.538185119628906,
"logps/ref_rejected": -69.78813934326172,
"logps/rejected": -241.30055236816406,
"loss": 1.0817,
"margin_dpo/margin_mean": 63.365570068359375,
"margin_dpo/margin_std": 89.2110595703125,
"step": 440
},
{
"epoch": 0.6666666666666666,
"fcm_dpo/beta": 0.007003414444625378,
"fcm_dpo/delta": -0.06929505616426468,
"fcm_dpo/margin": 66.45780181884766,
"fcm_dpo/q_t": 0.3919275403022766,
"grad_norm": 13.894318580627441,
"learning_rate": 1.521955206326976e-07,
"logits/chosen": 0.690746009349823,
"logits/rejected": 0.5955066084861755,
"logps/chosen": -163.53961181640625,
"logps/ref_chosen": -57.593223571777344,
"logps/ref_rejected": -84.82878875732422,
"logps/rejected": -257.2330017089844,
"loss": 1.0359,
"margin_dpo/margin_mean": 66.45780181884766,
"margin_dpo/margin_std": 72.28242492675781,
"step": 441
},
{
"epoch": 0.6681783824640968,
"fcm_dpo/beta": 0.006948241498321295,
"fcm_dpo/delta": -0.04673624783754349,
"fcm_dpo/margin": 63.97719955444336,
"fcm_dpo/q_t": 0.39758235216140747,
"grad_norm": 14.932785987854004,
"learning_rate": 1.5098005849021078e-07,
"logits/chosen": 0.6913585662841797,
"logits/rejected": 0.6380952000617981,
"logps/chosen": -194.80154418945312,
"logps/ref_chosen": -67.46121978759766,
"logps/ref_rejected": -89.0693588256836,
"logps/rejected": -280.3868713378906,
"loss": 1.0664,
"margin_dpo/margin_mean": 63.97719955444336,
"margin_dpo/margin_std": 81.89796447753906,
"step": 442
},
{
"epoch": 0.6696900982615268,
"fcm_dpo/beta": 0.006789367645978928,
"fcm_dpo/delta": -0.15468865633010864,
"fcm_dpo/margin": 80.46561431884766,
"fcm_dpo/q_t": 0.3747613728046417,
"grad_norm": 13.57395076751709,
"learning_rate": 1.4976736614834662e-07,
"logits/chosen": 0.7441185712814331,
"logits/rejected": 0.6723178029060364,
"logps/chosen": -160.47637939453125,
"logps/ref_chosen": -54.79610061645508,
"logps/ref_rejected": -77.80781555175781,
"logps/rejected": -263.9537048339844,
"loss": 1.0045,
"margin_dpo/margin_mean": 80.46562194824219,
"margin_dpo/margin_std": 92.77104949951172,
"step": 443
},
{
"epoch": 0.671201814058957,
"fcm_dpo/beta": 0.006895772181451321,
"fcm_dpo/delta": 0.19947421550750732,
"fcm_dpo/margin": 29.74011993408203,
"fcm_dpo/q_t": 0.4542357921600342,
"grad_norm": 17.251333236694336,
"learning_rate": 1.4855747752871654e-07,
"logits/chosen": 0.7567894458770752,
"logits/rejected": 0.6579380035400391,
"logps/chosen": -191.23162841796875,
"logps/ref_chosen": -58.749061584472656,
"logps/ref_rejected": -86.87396240234375,
"logps/rejected": -249.09664916992188,
"loss": 1.2867,
"margin_dpo/margin_mean": 29.7401180267334,
"margin_dpo/margin_std": 88.69583129882812,
"step": 444
},
{
"epoch": 0.672713529856387,
"fcm_dpo/beta": 0.006922256201505661,
"fcm_dpo/delta": -0.08680850267410278,
"fcm_dpo/margin": 69.72836303710938,
"fcm_dpo/q_t": 0.3895564079284668,
"grad_norm": 15.33248233795166,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": 0.6761714220046997,
"logits/rejected": 0.6643944978713989,
"logps/chosen": -184.99374389648438,
"logps/ref_chosen": -60.91743850708008,
"logps/ref_rejected": -71.5637435913086,
"logps/rejected": -265.368408203125,
"loss": 1.0445,
"margin_dpo/margin_mean": 69.72836303710938,
"margin_dpo/margin_std": 86.42784118652344,
"step": 445
},
{
"epoch": 0.674225245653817,
"fcm_dpo/beta": 0.006854848936200142,
"fcm_dpo/delta": -0.0958673357963562,
"fcm_dpo/margin": 71.51223754882812,
"fcm_dpo/q_t": 0.3870481252670288,
"grad_norm": 11.376906394958496,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": 0.7793463468551636,
"logits/rejected": 0.695709228515625,
"logps/chosen": -161.16827392578125,
"logps/ref_chosen": -48.79924774169922,
"logps/ref_rejected": -71.8719482421875,
"logps/rejected": -255.75323486328125,
"loss": 1.0255,
"margin_dpo/margin_mean": 71.51223754882812,
"margin_dpo/margin_std": 74.22274780273438,
"step": 446
},
{
"epoch": 0.6757369614512472,
"fcm_dpo/beta": 0.006591130513697863,
"fcm_dpo/delta": -0.13952668011188507,
"fcm_dpo/margin": 80.61551666259766,
"fcm_dpo/q_t": 0.3766905665397644,
"grad_norm": 16.281320571899414,
"learning_rate": 1.4494497203727843e-07,
"logits/chosen": 0.6757616996765137,
"logits/rejected": 0.5708225965499878,
"logps/chosen": -164.0203399658203,
"logps/ref_chosen": -53.682716369628906,
"logps/ref_rejected": -88.17315673828125,
"logps/rejected": -279.12628173828125,
"loss": 1.0128,
"margin_dpo/margin_mean": 80.61550903320312,
"margin_dpo/margin_std": 92.21812438964844,
"step": 447
},
{
"epoch": 0.6772486772486772,
"fcm_dpo/beta": 0.006543307099491358,
"fcm_dpo/delta": -0.024397023022174835,
"fcm_dpo/margin": 64.70095825195312,
"fcm_dpo/q_t": 0.4019678235054016,
"grad_norm": 11.577764511108398,
"learning_rate": 1.4374663593999256e-07,
"logits/chosen": 0.7559643983840942,
"logits/rejected": 0.7006471753120422,
"logps/chosen": -171.75296020507812,
"logps/ref_chosen": -53.75125503540039,
"logps/ref_rejected": -77.17623901367188,
"logps/rejected": -259.8788757324219,
"loss": 1.0809,
"margin_dpo/margin_mean": 64.70095825195312,
"margin_dpo/margin_std": 85.6202392578125,
"step": 448
},
{
"epoch": 0.6787603930461074,
"fcm_dpo/beta": 0.006716116331517696,
"fcm_dpo/delta": 0.19444824755191803,
"fcm_dpo/margin": 31.2590274810791,
"fcm_dpo/q_t": 0.4524422883987427,
"grad_norm": 18.466140747070312,
"learning_rate": 1.4255127197770707e-07,
"logits/chosen": 0.5716577768325806,
"logits/rejected": 0.5704319477081299,
"logps/chosen": -215.08062744140625,
"logps/ref_chosen": -75.82737731933594,
"logps/ref_rejected": -82.20687866210938,
"logps/rejected": -252.71914672851562,
"loss": 1.2551,
"margin_dpo/margin_mean": 31.2590274810791,
"margin_dpo/margin_std": 78.21199035644531,
"step": 449
},
{
"epoch": 0.6802721088435374,
"fcm_dpo/beta": 0.006843068636953831,
"fcm_dpo/delta": 0.06173437833786011,
"fcm_dpo/margin": 49.743858337402344,
"fcm_dpo/q_t": 0.4245959520339966,
"grad_norm": 13.465462684631348,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 0.8062694072723389,
"logits/rejected": 0.6891622543334961,
"logps/chosen": -165.41134643554688,
"logps/ref_chosen": -47.11572265625,
"logps/ref_rejected": -78.7546615600586,
"logps/rejected": -246.79415893554688,
"loss": 1.1677,
"margin_dpo/margin_mean": 49.743858337402344,
"margin_dpo/margin_std": 91.77166748046875,
"step": 450
},
{
"epoch": 0.6817838246409675,
"fcm_dpo/beta": 0.006978826597332954,
"fcm_dpo/delta": 0.08206881582736969,
"fcm_dpo/margin": 45.89945602416992,
"fcm_dpo/q_t": 0.4271845817565918,
"grad_norm": 13.454634666442871,
"learning_rate": 1.4016959412166437e-07,
"logits/chosen": 0.6572551727294922,
"logits/rejected": 0.6040632128715515,
"logps/chosen": -181.79440307617188,
"logps/ref_chosen": -63.350440979003906,
"logps/ref_rejected": -76.28530883789062,
"logps/rejected": -240.62872314453125,
"loss": 1.178,
"margin_dpo/margin_mean": 45.89945602416992,
"margin_dpo/margin_std": 86.73423767089844,
"step": 451
},
{
"epoch": 0.6832955404383976,
"fcm_dpo/beta": 0.007023798301815987,
"fcm_dpo/delta": 0.019556403160095215,
"fcm_dpo/margin": 54.26256561279297,
"fcm_dpo/q_t": 0.41338497400283813,
"grad_norm": 14.76894760131836,
"learning_rate": 1.3898334684855645e-07,
"logits/chosen": 0.6688199043273926,
"logits/rejected": 0.5848639607429504,
"logps/chosen": -171.7777557373047,
"logps/ref_chosen": -55.58583450317383,
"logps/ref_rejected": -77.68738555908203,
"logps/rejected": -248.141845703125,
"loss": 1.144,
"margin_dpo/margin_mean": 54.26256561279297,
"margin_dpo/margin_std": 93.56272888183594,
"step": 452
},
{
"epoch": 0.6848072562358276,
"fcm_dpo/beta": 0.007006727624684572,
"fcm_dpo/delta": -0.005369680933654308,
"fcm_dpo/margin": 57.817604064941406,
"fcm_dpo/q_t": 0.4089837074279785,
"grad_norm": 15.279993057250977,
"learning_rate": 1.3780020494988445e-07,
"logits/chosen": 0.6729337573051453,
"logits/rejected": 0.643782377243042,
"logps/chosen": -174.33375549316406,
"logps/ref_chosen": -61.778202056884766,
"logps/ref_rejected": -71.51403045654297,
"logps/rejected": -241.88719177246094,
"loss": 1.119,
"margin_dpo/margin_mean": 57.81760787963867,
"margin_dpo/margin_std": 91.65188598632812,
"step": 453
},
{
"epoch": 0.6863189720332578,
"fcm_dpo/beta": 0.0069506047293543816,
"fcm_dpo/delta": -0.052232228219509125,
"fcm_dpo/margin": 64.71170043945312,
"fcm_dpo/q_t": 0.3960624039173126,
"grad_norm": 13.125537872314453,
"learning_rate": 1.366202015206706e-07,
"logits/chosen": 0.6993863582611084,
"logits/rejected": 0.658939778804779,
"logps/chosen": -157.39630126953125,
"logps/ref_chosen": -51.59515380859375,
"logps/ref_rejected": -63.96732711791992,
"logps/rejected": -234.4801788330078,
"loss": 1.0737,
"margin_dpo/margin_mean": 64.71170043945312,
"margin_dpo/margin_std": 88.06663513183594,
"step": 454
},
{
"epoch": 0.6878306878306878,
"fcm_dpo/beta": 0.006900169886648655,
"fcm_dpo/delta": -0.027319904416799545,
"fcm_dpo/margin": 61.74366760253906,
"fcm_dpo/q_t": 0.40355247259140015,
"grad_norm": 12.596735954284668,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": 0.6100882291793823,
"logits/rejected": 0.5753176808357239,
"logps/chosen": -192.84078979492188,
"logps/ref_chosen": -70.65170288085938,
"logps/ref_rejected": -77.44276428222656,
"logps/rejected": -261.37554931640625,
"loss": 1.0855,
"margin_dpo/margin_mean": 61.74366760253906,
"margin_dpo/margin_std": 85.42503356933594,
"step": 455
},
{
"epoch": 0.6893424036281179,
"fcm_dpo/beta": 0.00692489929497242,
"fcm_dpo/delta": 0.03051423281431198,
"fcm_dpo/margin": 53.519752502441406,
"fcm_dpo/q_t": 0.41640913486480713,
"grad_norm": 15.563993453979492,
"learning_rate": 1.3426974201083439e-07,
"logits/chosen": 0.6147767901420593,
"logits/rejected": 0.5489587783813477,
"logps/chosen": -179.21768188476562,
"logps/ref_chosen": -56.398284912109375,
"logps/ref_rejected": -82.61642456054688,
"logps/rejected": -258.95556640625,
"loss": 1.1384,
"margin_dpo/margin_mean": 53.519752502441406,
"margin_dpo/margin_std": 88.47608184814453,
"step": 456
},
{
"epoch": 0.690854119425548,
"fcm_dpo/beta": 0.006909678690135479,
"fcm_dpo/delta": -0.013150712475180626,
"fcm_dpo/margin": 59.67945861816406,
"fcm_dpo/q_t": 0.40437084436416626,
"grad_norm": 12.112608909606934,
"learning_rate": 1.3309935167761717e-07,
"logits/chosen": 0.8191350698471069,
"logits/rejected": 0.7398391366004944,
"logps/chosen": -164.4642791748047,
"logps/ref_chosen": -44.72057342529297,
"logps/ref_rejected": -68.1158676147461,
"logps/rejected": -247.5390167236328,
"loss": 1.0835,
"margin_dpo/margin_mean": 59.67945098876953,
"margin_dpo/margin_std": 76.94285583496094,
"step": 457
},
{
"epoch": 0.6923658352229781,
"fcm_dpo/beta": 0.006901285611093044,
"fcm_dpo/delta": -0.02610252983868122,
"fcm_dpo/margin": 61.582176208496094,
"fcm_dpo/q_t": 0.4029829204082489,
"grad_norm": 13.647724151611328,
"learning_rate": 1.3193223130682936e-07,
"logits/chosen": 0.7404319643974304,
"logits/rejected": 0.616897463798523,
"logps/chosen": -165.63185119628906,
"logps/ref_chosen": -50.00569152832031,
"logps/ref_rejected": -87.50015258789062,
"logps/rejected": -264.70849609375,
"loss": 1.1008,
"margin_dpo/margin_mean": 61.582176208496094,
"margin_dpo/margin_std": 92.60580444335938,
"step": 458
},
{
"epoch": 0.6938775510204082,
"fcm_dpo/beta": 0.0068240780383348465,
"fcm_dpo/delta": -0.12303808331489563,
"fcm_dpo/margin": 75.65729522705078,
"fcm_dpo/q_t": 0.38151851296424866,
"grad_norm": 12.57613754272461,
"learning_rate": 1.3076841354533658e-07,
"logits/chosen": 0.7350834608078003,
"logits/rejected": 0.6995427012443542,
"logps/chosen": -173.1988525390625,
"logps/ref_chosen": -65.37794494628906,
"logps/ref_rejected": -88.19244384765625,
"logps/rejected": -271.670654296875,
"loss": 1.0222,
"margin_dpo/margin_mean": 75.65729522705078,
"margin_dpo/margin_std": 85.63201904296875,
"step": 459
},
{
"epoch": 0.6953892668178382,
"fcm_dpo/beta": 0.006613044999539852,
"fcm_dpo/delta": -0.08804592490196228,
"fcm_dpo/margin": 73.07688903808594,
"fcm_dpo/q_t": 0.38998186588287354,
"grad_norm": 12.085392951965332,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 0.7788273692131042,
"logits/rejected": 0.6462200880050659,
"logps/chosen": -186.39027404785156,
"logps/ref_chosen": -64.5616683959961,
"logps/ref_rejected": -88.67890167236328,
"logps/rejected": -283.58441162109375,
"loss": 1.0389,
"margin_dpo/margin_mean": 73.07688903808594,
"margin_dpo/margin_std": 87.7376708984375,
"step": 460
},
{
"epoch": 0.6969009826152683,
"fcm_dpo/beta": 0.006458759307861328,
"fcm_dpo/delta": -0.09030409157276154,
"fcm_dpo/margin": 74.86151885986328,
"fcm_dpo/q_t": 0.38834458589553833,
"grad_norm": 12.709549903869629,
"learning_rate": 1.2845081597488286e-07,
"logits/chosen": 0.8639695048332214,
"logits/rejected": 0.7700966596603394,
"logps/chosen": -149.4770965576172,
"logps/ref_chosen": -49.4779167175293,
"logps/ref_rejected": -72.65262603759766,
"logps/rejected": -247.51333618164062,
"loss": 1.0373,
"margin_dpo/margin_mean": 74.86151885986328,
"margin_dpo/margin_std": 84.41058349609375,
"step": 461
},
{
"epoch": 0.6984126984126984,
"fcm_dpo/beta": 0.00636675488203764,
"fcm_dpo/delta": -0.10163407772779465,
"fcm_dpo/margin": 77.89765930175781,
"fcm_dpo/q_t": 0.3845703601837158,
"grad_norm": 11.933963775634766,
"learning_rate": 1.27297100994108e-07,
"logits/chosen": 0.7207037210464478,
"logits/rejected": 0.6627846956253052,
"logps/chosen": -172.74496459960938,
"logps/ref_chosen": -60.4951171875,
"logps/ref_rejected": -74.82136535644531,
"logps/rejected": -264.9688720703125,
"loss": 1.0264,
"margin_dpo/margin_mean": 77.89765930175781,
"margin_dpo/margin_std": 88.62437438964844,
"step": 462
},
{
"epoch": 0.6999244142101285,
"fcm_dpo/beta": 0.006417134776711464,
"fcm_dpo/delta": 0.09052874892950058,
"fcm_dpo/margin": 48.669517517089844,
"fcm_dpo/q_t": 0.4276365637779236,
"grad_norm": 15.57586669921875,
"learning_rate": 1.2614681827718695e-07,
"logits/chosen": 0.7261140942573547,
"logits/rejected": 0.7274137139320374,
"logps/chosen": -199.56387329101562,
"logps/ref_chosen": -67.68511962890625,
"logps/ref_rejected": -71.32196044921875,
"logps/rejected": -251.8702392578125,
"loss": 1.1664,
"margin_dpo/margin_mean": 48.669517517089844,
"margin_dpo/margin_std": 81.40229034423828,
"step": 463
},
{
"epoch": 0.7014361300075586,
"fcm_dpo/beta": 0.0064452332444489,
"fcm_dpo/delta": -0.05545463413000107,
"fcm_dpo/margin": 70.26266479492188,
"fcm_dpo/q_t": 0.39617645740509033,
"grad_norm": 11.537524223327637,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 0.7131586074829102,
"logits/rejected": 0.6843728423118591,
"logps/chosen": -181.480712890625,
"logps/ref_chosen": -59.16564178466797,
"logps/ref_rejected": -69.56146240234375,
"logps/rejected": -262.13922119140625,
"loss": 1.0764,
"margin_dpo/margin_mean": 70.26266479492188,
"margin_dpo/margin_std": 95.04841613769531,
"step": 464
},
{
"epoch": 0.7029478458049887,
"fcm_dpo/beta": 0.006403525359928608,
"fcm_dpo/delta": 0.013384605757892132,
"fcm_dpo/margin": 60.45001220703125,
"fcm_dpo/q_t": 0.41165050864219666,
"grad_norm": 13.156111717224121,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": 0.7792471647262573,
"logits/rejected": 0.7154022455215454,
"logps/chosen": -186.728759765625,
"logps/ref_chosen": -58.513671875,
"logps/ref_rejected": -84.31745910644531,
"logps/rejected": -272.9825744628906,
"loss": 1.1211,
"margin_dpo/margin_mean": 60.450008392333984,
"margin_dpo/margin_std": 93.35221862792969,
"step": 465
},
{
"epoch": 0.7044595616024187,
"fcm_dpo/beta": 0.006560437381267548,
"fcm_dpo/delta": 0.15931177139282227,
"fcm_dpo/margin": 37.31903839111328,
"fcm_dpo/q_t": 0.444455087184906,
"grad_norm": 17.772686004638672,
"learning_rate": 1.2271688498291334e-07,
"logits/chosen": 0.7378557920455933,
"logits/rejected": 0.745481014251709,
"logps/chosen": -212.20864868164062,
"logps/ref_chosen": -73.26580810546875,
"logps/ref_rejected": -74.83621215820312,
"logps/rejected": -251.0980682373047,
"loss": 1.2267,
"margin_dpo/margin_mean": 37.319034576416016,
"margin_dpo/margin_std": 82.18032836914062,
"step": 466
},
{
"epoch": 0.7059712773998488,
"fcm_dpo/beta": 0.006677757948637009,
"fcm_dpo/delta": 0.049721457064151764,
"fcm_dpo/margin": 52.71526336669922,
"fcm_dpo/q_t": 0.4197397828102112,
"grad_norm": 12.660947799682617,
"learning_rate": 1.2158065210664848e-07,
"logits/chosen": 0.7742260694503784,
"logits/rejected": 0.6145238280296326,
"logps/chosen": -172.53135681152344,
"logps/ref_chosen": -47.57947540283203,
"logps/ref_rejected": -78.68522644042969,
"logps/rejected": -256.35235595703125,
"loss": 1.1333,
"margin_dpo/margin_mean": 52.71526336669922,
"margin_dpo/margin_std": 81.25802612304688,
"step": 467
},
{
"epoch": 0.7074829931972789,
"fcm_dpo/beta": 0.006609264761209488,
"fcm_dpo/delta": -0.09665481746196747,
"fcm_dpo/margin": 74.44082641601562,
"fcm_dpo/q_t": 0.38663211464881897,
"grad_norm": 15.095276832580566,
"learning_rate": 1.204480113956011e-07,
"logits/chosen": 0.6950595378875732,
"logits/rejected": 0.6864985823631287,
"logps/chosen": -180.8209991455078,
"logps/ref_chosen": -63.92778778076172,
"logps/ref_rejected": -76.51626586914062,
"logps/rejected": -267.8503112792969,
"loss": 1.0416,
"margin_dpo/margin_mean": 74.44082641601562,
"margin_dpo/margin_std": 92.40238952636719,
"step": 468
},
{
"epoch": 0.708994708994709,
"fcm_dpo/beta": 0.006470114924013615,
"fcm_dpo/delta": -0.05656132102012634,
"fcm_dpo/margin": 70.00104522705078,
"fcm_dpo/q_t": 0.39446496963500977,
"grad_norm": 12.5325345993042,
"learning_rate": 1.1931899453216697e-07,
"logits/chosen": 0.7946516871452332,
"logits/rejected": 0.7845124006271362,
"logps/chosen": -177.92825317382812,
"logps/ref_chosen": -59.05818176269531,
"logps/ref_rejected": -75.67672729492188,
"logps/rejected": -264.5478515625,
"loss": 1.0448,
"margin_dpo/margin_mean": 70.00105285644531,
"margin_dpo/margin_std": 76.44458770751953,
"step": 469
},
{
"epoch": 0.7105064247921391,
"fcm_dpo/beta": 0.0064891641959548,
"fcm_dpo/delta": -0.021754732355475426,
"fcm_dpo/margin": 64.8369369506836,
"fcm_dpo/q_t": 0.40303927659988403,
"grad_norm": 12.209174156188965,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 0.7553179860115051,
"logits/rejected": 0.6802823543548584,
"logps/chosen": -163.2248077392578,
"logps/ref_chosen": -47.86743927001953,
"logps/ref_rejected": -65.96859741210938,
"logps/rejected": -246.1628875732422,
"loss": 1.0883,
"margin_dpo/margin_mean": 64.83692932128906,
"margin_dpo/margin_std": 88.71844482421875,
"step": 470
},
{
"epoch": 0.7120181405895691,
"fcm_dpo/beta": 0.0063733188435435295,
"fcm_dpo/delta": -0.08390133082866669,
"fcm_dpo/margin": 75.26802062988281,
"fcm_dpo/q_t": 0.3887266218662262,
"grad_norm": 12.127059936523438,
"learning_rate": 1.1707195857000215e-07,
"logits/chosen": 0.7076698541641235,
"logits/rejected": 0.6516261100769043,
"logps/chosen": -167.81793212890625,
"logps/ref_chosen": -57.777854919433594,
"logps/ref_rejected": -73.81172180175781,
"logps/rejected": -259.11981201171875,
"loss": 1.0401,
"margin_dpo/margin_mean": 75.26802062988281,
"margin_dpo/margin_std": 89.51625061035156,
"step": 471
},
{
"epoch": 0.7135298563869993,
"fcm_dpo/beta": 0.006402644794434309,
"fcm_dpo/delta": 0.027136290445923805,
"fcm_dpo/margin": 58.34724426269531,
"fcm_dpo/q_t": 0.4152415990829468,
"grad_norm": 12.967268943786621,
"learning_rate": 1.1595400232569768e-07,
"logits/chosen": 0.722518801689148,
"logits/rejected": 0.6758139729499817,
"logps/chosen": -168.4672393798828,
"logps/ref_chosen": -55.908668518066406,
"logps/ref_rejected": -74.70294189453125,
"logps/rejected": -245.6087646484375,
"loss": 1.1481,
"margin_dpo/margin_mean": 58.34724426269531,
"margin_dpo/margin_std": 101.94171142578125,
"step": 472
},
{
"epoch": 0.7150415721844293,
"fcm_dpo/beta": 0.006403686944395304,
"fcm_dpo/delta": 0.015270838513970375,
"fcm_dpo/margin": 60.171531677246094,
"fcm_dpo/q_t": 0.41325926780700684,
"grad_norm": 13.97789192199707,
"learning_rate": 1.1483979563610069e-07,
"logits/chosen": 0.7790465354919434,
"logits/rejected": 0.6616805195808411,
"logps/chosen": -169.07672119140625,
"logps/ref_chosen": -54.16088104248047,
"logps/ref_rejected": -92.76789855957031,
"logps/rejected": -267.8552551269531,
"loss": 1.1431,
"margin_dpo/margin_mean": 60.171531677246094,
"margin_dpo/margin_std": 104.51329040527344,
"step": 473
},
{
"epoch": 0.7165532879818595,
"fcm_dpo/beta": 0.006446688901633024,
"fcm_dpo/delta": 0.03435041010379791,
"fcm_dpo/margin": 56.910247802734375,
"fcm_dpo/q_t": 0.4173174500465393,
"grad_norm": 16.706056594848633,
"learning_rate": 1.1372936966796709e-07,
"logits/chosen": 0.775465190410614,
"logits/rejected": 0.6970380544662476,
"logps/chosen": -166.1460723876953,
"logps/ref_chosen": -46.685707092285156,
"logps/ref_rejected": -71.44731903076172,
"logps/rejected": -247.81793212890625,
"loss": 1.1507,
"margin_dpo/margin_mean": 56.910247802734375,
"margin_dpo/margin_std": 100.28426361083984,
"step": 474
},
{
"epoch": 0.7180650037792895,
"fcm_dpo/beta": 0.006298656575381756,
"fcm_dpo/delta": -0.13723576068878174,
"fcm_dpo/margin": 84.00972747802734,
"fcm_dpo/q_t": 0.37886470556259155,
"grad_norm": 9.853565216064453,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": 0.7011395692825317,
"logits/rejected": 0.6511736512184143,
"logps/chosen": -170.49496459960938,
"logps/ref_chosen": -58.4873046875,
"logps/ref_rejected": -87.00187683105469,
"logps/rejected": -283.019287109375,
"loss": 1.0024,
"margin_dpo/margin_mean": 84.00973510742188,
"margin_dpo/margin_std": 91.04681396484375,
"step": 475
},
{
"epoch": 0.7195767195767195,
"fcm_dpo/beta": 0.00633836118504405,
"fcm_dpo/delta": 0.03369593247771263,
"fcm_dpo/margin": 57.90646743774414,
"fcm_dpo/q_t": 0.4163343906402588,
"grad_norm": 13.150678634643555,
"learning_rate": 1.1151998403347243e-07,
"logits/chosen": 0.6292476654052734,
"logits/rejected": 0.6282519698143005,
"logps/chosen": -210.76119995117188,
"logps/ref_chosen": -75.38162231445312,
"logps/ref_rejected": -76.99822235107422,
"logps/rejected": -270.2842712402344,
"loss": 1.1359,
"margin_dpo/margin_mean": 57.90646743774414,
"margin_dpo/margin_std": 92.76966857910156,
"step": 476
},
{
"epoch": 0.7210884353741497,
"fcm_dpo/beta": 0.006370065733790398,
"fcm_dpo/delta": 0.055561892688274384,
"fcm_dpo/margin": 54.3778076171875,
"fcm_dpo/q_t": 0.42135778069496155,
"grad_norm": 15.231217384338379,
"learning_rate": 1.1042108616837692e-07,
"logits/chosen": 0.7329668998718262,
"logits/rejected": 0.681520938873291,
"logps/chosen": -201.80587768554688,
"logps/ref_chosen": -61.073387145996094,
"logps/ref_rejected": -81.34375,
"logps/rejected": -276.45404052734375,
"loss": 1.19,
"margin_dpo/margin_mean": 54.3778076171875,
"margin_dpo/margin_std": 111.97293853759766,
"step": 477
},
{
"epoch": 0.7226001511715797,
"fcm_dpo/beta": 0.0064564854837954044,
"fcm_dpo/delta": 0.08435309678316116,
"fcm_dpo/margin": 49.316490173339844,
"fcm_dpo/q_t": 0.42754754424095154,
"grad_norm": 15.356504440307617,
"learning_rate": 1.0932609262554746e-07,
"logits/chosen": 0.6630824208259583,
"logits/rejected": 0.6739969253540039,
"logps/chosen": -180.30245971679688,
"logps/ref_chosen": -57.16731643676758,
"logps/ref_rejected": -53.30917739868164,
"logps/rejected": -225.76080322265625,
"loss": 1.188,
"margin_dpo/margin_mean": 49.316490173339844,
"margin_dpo/margin_std": 97.46630859375,
"step": 478
},
{
"epoch": 0.7241118669690099,
"fcm_dpo/beta": 0.006586470641195774,
"fcm_dpo/delta": 0.11177529394626617,
"fcm_dpo/margin": 44.27500915527344,
"fcm_dpo/q_t": 0.433511346578598,
"grad_norm": 14.682652473449707,
"learning_rate": 1.0823503403430734e-07,
"logits/chosen": 0.6922081708908081,
"logits/rejected": 0.6425020098686218,
"logps/chosen": -187.39501953125,
"logps/ref_chosen": -58.91331481933594,
"logps/ref_rejected": -63.7403450012207,
"logps/rejected": -236.4970703125,
"loss": 1.2032,
"margin_dpo/margin_mean": 44.27500915527344,
"margin_dpo/margin_std": 91.15420532226562,
"step": 479
},
{
"epoch": 0.7256235827664399,
"fcm_dpo/beta": 0.0066421665251255035,
"fcm_dpo/delta": -0.0520443469285965,
"fcm_dpo/margin": 67.61965942382812,
"fcm_dpo/q_t": 0.3969612419605255,
"grad_norm": 15.13430118560791,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 0.6692843437194824,
"logits/rejected": 0.6604632139205933,
"logps/chosen": -191.74949645996094,
"logps/ref_chosen": -62.80061340332031,
"logps/ref_rejected": -67.58859252929688,
"logps/rejected": -264.1571350097656,
"loss": 1.0838,
"margin_dpo/margin_mean": 67.61965942382812,
"margin_dpo/margin_std": 94.1576156616211,
"step": 480
},
{
"epoch": 0.72713529856387,
"fcm_dpo/beta": 0.006578594446182251,
"fcm_dpo/delta": 0.008969607762992382,
"fcm_dpo/margin": 59.488807678222656,
"fcm_dpo/q_t": 0.4112991988658905,
"grad_norm": 14.179603576660156,
"learning_rate": 1.0606484367268906e-07,
"logits/chosen": 0.640337347984314,
"logits/rejected": 0.6388770341873169,
"logps/chosen": -191.6460418701172,
"logps/ref_chosen": -65.28649139404297,
"logps/ref_rejected": -70.78668212890625,
"logps/rejected": -256.6350402832031,
"loss": 1.1274,
"margin_dpo/margin_mean": 59.48881149291992,
"margin_dpo/margin_std": 96.6456527709961,
"step": 481
},
{
"epoch": 0.7286470143613001,
"fcm_dpo/beta": 0.00661865808069706,
"fcm_dpo/delta": 0.01288910023868084,
"fcm_dpo/margin": 58.54414749145508,
"fcm_dpo/q_t": 0.41362571716308594,
"grad_norm": 14.701621055603027,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": 0.6496937274932861,
"logits/rejected": 0.49469494819641113,
"logps/chosen": -203.09133911132812,
"logps/ref_chosen": -60.906185150146484,
"logps/ref_rejected": -103.44656372070312,
"logps/rejected": -304.1758728027344,
"loss": 1.1531,
"margin_dpo/margin_mean": 58.54414749145508,
"margin_dpo/margin_std": 107.09422302246094,
"step": 482
},
{
"epoch": 0.7301587301587301,
"fcm_dpo/beta": 0.0065412987023591995,
"fcm_dpo/delta": -0.07768933475017548,
"fcm_dpo/margin": 72.47356414794922,
"fcm_dpo/q_t": 0.3910548985004425,
"grad_norm": 12.262384414672852,
"learning_rate": 1.0391075790138232e-07,
"logits/chosen": 0.7782041430473328,
"logits/rejected": 0.6690878868103027,
"logps/chosen": -174.45535278320312,
"logps/ref_chosen": -53.192012786865234,
"logps/ref_rejected": -81.83927154541016,
"logps/rejected": -275.576171875,
"loss": 1.0493,
"margin_dpo/margin_mean": 72.47357177734375,
"margin_dpo/margin_std": 90.563232421875,
"step": 483
},
{
"epoch": 0.7316704459561603,
"fcm_dpo/beta": 0.006601003929972649,
"fcm_dpo/delta": 0.06470303982496262,
"fcm_dpo/margin": 51.01061248779297,
"fcm_dpo/q_t": 0.42150557041168213,
"grad_norm": 17.135540008544922,
"learning_rate": 1.0283982962570681e-07,
"logits/chosen": 0.8079191446304321,
"logits/rejected": 0.770884096622467,
"logps/chosen": -181.22967529296875,
"logps/ref_chosen": -57.76945877075195,
"logps/ref_rejected": -71.6829833984375,
"logps/rejected": -246.15380859375,
"loss": 1.1319,
"margin_dpo/margin_mean": 51.01061248779297,
"margin_dpo/margin_std": 71.01333618164062,
"step": 484
},
{
"epoch": 0.7331821617535903,
"fcm_dpo/beta": 0.006590794771909714,
"fcm_dpo/delta": 0.04942598566412926,
"fcm_dpo/margin": 53.343475341796875,
"fcm_dpo/q_t": 0.4193718433380127,
"grad_norm": 14.788070678710938,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": 0.7287328243255615,
"logits/rejected": 0.7055760622024536,
"logps/chosen": -186.085693359375,
"logps/ref_chosen": -56.63584899902344,
"logps/ref_rejected": -70.85614013671875,
"logps/rejected": -253.64944458007812,
"loss": 1.137,
"margin_dpo/margin_mean": 53.343475341796875,
"margin_dpo/margin_std": 80.21830749511719,
"step": 485
},
{
"epoch": 0.7346938775510204,
"fcm_dpo/beta": 0.006727076135575771,
"fcm_dpo/delta": 0.0707259476184845,
"fcm_dpo/margin": 49.296295166015625,
"fcm_dpo/q_t": 0.42624154686927795,
"grad_norm": 13.047743797302246,
"learning_rate": 1.007103520743035e-07,
"logits/chosen": 0.772795557975769,
"logits/rejected": 0.6431682705879211,
"logps/chosen": -204.21878051757812,
"logps/ref_chosen": -56.347023010253906,
"logps/ref_rejected": -85.97221374511719,
"logps/rejected": -283.1402587890625,
"loss": 1.1863,
"margin_dpo/margin_mean": 49.296295166015625,
"margin_dpo/margin_std": 99.67465209960938,
"step": 486
},
{
"epoch": 0.7362055933484505,
"fcm_dpo/beta": 0.006734380032867193,
"fcm_dpo/delta": -0.011707952246069908,
"fcm_dpo/margin": 61.060760498046875,
"fcm_dpo/q_t": 0.40709030628204346,
"grad_norm": 16.7856502532959,
"learning_rate": 9.965186236464046e-08,
"logits/chosen": 0.8297072649002075,
"logits/rejected": 0.7643457651138306,
"logps/chosen": -192.57928466796875,
"logps/ref_chosen": -60.617218017578125,
"logps/ref_rejected": -82.50975036621094,
"logps/rejected": -275.5325927734375,
"loss": 1.105,
"margin_dpo/margin_mean": 61.060760498046875,
"margin_dpo/margin_std": 92.03384399414062,
"step": 487
},
{
"epoch": 0.7377173091458806,
"fcm_dpo/beta": 0.006715429946780205,
"fcm_dpo/delta": -0.04261378198862076,
"fcm_dpo/margin": 65.61810302734375,
"fcm_dpo/q_t": 0.40011560916900635,
"grad_norm": 17.23809051513672,
"learning_rate": 9.859757821558337e-08,
"logits/chosen": 0.7520276308059692,
"logits/rejected": 0.6840384602546692,
"logps/chosen": -188.2955322265625,
"logps/ref_chosen": -63.10905075073242,
"logps/ref_rejected": -82.49348449707031,
"logps/rejected": -273.2980651855469,
"loss": 1.0788,
"margin_dpo/margin_mean": 65.61810302734375,
"margin_dpo/margin_std": 89.72518920898438,
"step": 488
},
{
"epoch": 0.7392290249433107,
"fcm_dpo/beta": 0.006793505512177944,
"fcm_dpo/delta": 0.12338872253894806,
"fcm_dpo/margin": 41.24451446533203,
"fcm_dpo/q_t": 0.4370517432689667,
"grad_norm": 12.945178985595703,
"learning_rate": 9.754752911772615e-08,
"logits/chosen": 0.687408447265625,
"logits/rejected": 0.6387894153594971,
"logps/chosen": -209.18453979492188,
"logps/ref_chosen": -64.98896026611328,
"logps/ref_rejected": -84.39607238769531,
"logps/rejected": -269.8361511230469,
"loss": 1.2308,
"margin_dpo/margin_mean": 41.244510650634766,
"margin_dpo/margin_std": 98.086181640625,
"step": 489
},
{
"epoch": 0.7407407407407407,
"fcm_dpo/beta": 0.006871424615383148,
"fcm_dpo/delta": 0.05978693813085556,
"fcm_dpo/margin": 49.7767333984375,
"fcm_dpo/q_t": 0.42369672656059265,
"grad_norm": 12.462081909179688,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 0.7716453075408936,
"logits/rejected": 0.7463403344154358,
"logps/chosen": -190.76043701171875,
"logps/ref_chosen": -61.90874481201172,
"logps/ref_rejected": -70.58566284179688,
"logps/rejected": -249.21409606933594,
"loss": 1.2123,
"margin_dpo/margin_mean": 49.7767333984375,
"margin_dpo/margin_std": 111.2440414428711,
"step": 490
},
{
"epoch": 0.7422524565381708,
"fcm_dpo/beta": 0.006887979805469513,
"fcm_dpo/delta": 0.004202168434858322,
"fcm_dpo/margin": 57.374183654785156,
"fcm_dpo/q_t": 0.41012823581695557,
"grad_norm": 13.691835403442383,
"learning_rate": 9.546025344484868e-08,
"logits/chosen": 0.6863425970077515,
"logits/rejected": 0.6274754405021667,
"logps/chosen": -181.0035858154297,
"logps/ref_chosen": -55.47570037841797,
"logps/ref_rejected": -78.70318603515625,
"logps/rejected": -261.6052551269531,
"loss": 1.1145,
"margin_dpo/margin_mean": 57.374183654785156,
"margin_dpo/margin_std": 85.52940368652344,
"step": 491
},
{
"epoch": 0.7437641723356009,
"fcm_dpo/beta": 0.0070613836869597435,
"fcm_dpo/delta": 0.09045213460922241,
"fcm_dpo/margin": 44.112030029296875,
"fcm_dpo/q_t": 0.4296990633010864,
"grad_norm": 16.22068977355957,
"learning_rate": 9.442308525541589e-08,
"logits/chosen": 0.6796330809593201,
"logits/rejected": 0.6053134202957153,
"logps/chosen": -216.349365234375,
"logps/ref_chosen": -67.28638458251953,
"logps/ref_rejected": -82.78628540039062,
"logps/rejected": -275.9613037109375,
"loss": 1.2128,
"margin_dpo/margin_mean": 44.112030029296875,
"margin_dpo/margin_std": 96.07239532470703,
"step": 492
},
{
"epoch": 0.745275888133031,
"fcm_dpo/beta": 0.007019840180873871,
"fcm_dpo/delta": -0.08257926255464554,
"fcm_dpo/margin": 68.13697052001953,
"fcm_dpo/q_t": 0.39166688919067383,
"grad_norm": 13.662456512451172,
"learning_rate": 9.339026888672468e-08,
"logits/chosen": 0.6610127687454224,
"logits/rejected": 0.5783201456069946,
"logps/chosen": -177.93154907226562,
"logps/ref_chosen": -55.92750549316406,
"logps/ref_rejected": -79.12149810791016,
"logps/rejected": -269.26251220703125,
"loss": 1.0692,
"margin_dpo/margin_mean": 68.13697052001953,
"margin_dpo/margin_std": 93.68153381347656,
"step": 493
},
{
"epoch": 0.7467876039304611,
"fcm_dpo/beta": 0.0069389790296554565,
"fcm_dpo/delta": 0.0046972595155239105,
"fcm_dpo/margin": 56.990055084228516,
"fcm_dpo/q_t": 0.41134145855903625,
"grad_norm": 15.932513236999512,
"learning_rate": 9.236183322886945e-08,
"logits/chosen": 0.6411583423614502,
"logits/rejected": 0.5835065245628357,
"logps/chosen": -195.45303344726562,
"logps/ref_chosen": -67.95410919189453,
"logps/ref_rejected": -90.50865173339844,
"logps/rejected": -274.9976501464844,
"loss": 1.154,
"margin_dpo/margin_mean": 56.99005889892578,
"margin_dpo/margin_std": 105.76473999023438,
"step": 494
},
{
"epoch": 0.7482993197278912,
"fcm_dpo/beta": 0.0070509654469788074,
"fcm_dpo/delta": 0.07117826491594315,
"fcm_dpo/margin": 46.89491271972656,
"fcm_dpo/q_t": 0.42860695719718933,
"grad_norm": 17.861509323120117,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": 0.7724558711051941,
"logits/rejected": 0.7018958330154419,
"logps/chosen": -178.06219482421875,
"logps/ref_chosen": -52.62546157836914,
"logps/ref_rejected": -72.06781005859375,
"logps/rejected": -244.3994598388672,
"loss": 1.1992,
"margin_dpo/margin_mean": 46.89491271972656,
"margin_dpo/margin_std": 100.6576156616211,
"step": 495
},
{
"epoch": 0.7498110355253212,
"fcm_dpo/beta": 0.007005490828305483,
"fcm_dpo/delta": 0.0009736791253089905,
"fcm_dpo/margin": 56.84469985961914,
"fcm_dpo/q_t": 0.4145466089248657,
"grad_norm": 14.480406761169434,
"learning_rate": 9.031821899254797e-08,
"logits/chosen": 0.7183775901794434,
"logits/rejected": 0.5997449159622192,
"logps/chosen": -197.369384765625,
"logps/ref_chosen": -57.597320556640625,
"logps/ref_rejected": -94.36127471923828,
"logps/rejected": -290.97802734375,
"loss": 1.1545,
"margin_dpo/margin_mean": 56.844696044921875,
"margin_dpo/margin_std": 105.66326141357422,
"step": 496
},
{
"epoch": 0.7513227513227513,
"fcm_dpo/beta": 0.006960996426641941,
"fcm_dpo/delta": -0.07592535018920898,
"fcm_dpo/margin": 67.83001708984375,
"fcm_dpo/q_t": 0.3918842673301697,
"grad_norm": 11.594161033630371,
"learning_rate": 8.930309757836516e-08,
"logits/chosen": 0.7433227300643921,
"logits/rejected": 0.7107380628585815,
"logps/chosen": -210.66940307617188,
"logps/ref_chosen": -72.78994750976562,
"logps/ref_rejected": -89.48483276367188,
"logps/rejected": -295.1943054199219,
"loss": 1.068,
"margin_dpo/margin_mean": 67.83000946044922,
"margin_dpo/margin_std": 93.42587280273438,
"step": 497
},
{
"epoch": 0.7528344671201814,
"fcm_dpo/beta": 0.0068884193897247314,
"fcm_dpo/delta": -0.05250941216945648,
"fcm_dpo/margin": 65.34432983398438,
"fcm_dpo/q_t": 0.3966979384422302,
"grad_norm": 16.511926651000977,
"learning_rate": 8.829247120198563e-08,
"logits/chosen": 0.7198547720909119,
"logits/rejected": 0.6872572898864746,
"logps/chosen": -193.16600036621094,
"logps/ref_chosen": -68.36572265625,
"logps/ref_rejected": -71.28846740722656,
"logps/rejected": -261.43310546875,
"loss": 1.068,
"margin_dpo/margin_mean": 65.34432983398438,
"margin_dpo/margin_std": 86.45211029052734,
"step": 498
},
{
"epoch": 0.7543461829176115,
"fcm_dpo/beta": 0.006849166005849838,
"fcm_dpo/delta": -0.03738480433821678,
"fcm_dpo/margin": 63.62023162841797,
"fcm_dpo/q_t": 0.40244078636169434,
"grad_norm": 14.907723426818848,
"learning_rate": 8.728636813280163e-08,
"logits/chosen": 0.7367246747016907,
"logits/rejected": 0.6651813983917236,
"logps/chosen": -184.7228546142578,
"logps/ref_chosen": -61.90882873535156,
"logps/ref_rejected": -91.9411392211914,
"logps/rejected": -278.3753967285156,
"loss": 1.1302,
"margin_dpo/margin_mean": 63.62023162841797,
"margin_dpo/margin_std": 109.40308380126953,
"step": 499
},
{
"epoch": 0.7558578987150416,
"fcm_dpo/beta": 0.0068002426996827126,
"fcm_dpo/delta": -0.0016520768404006958,
"fcm_dpo/margin": 59.02922821044922,
"fcm_dpo/q_t": 0.40788906812667847,
"grad_norm": 16.023651123046875,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.6595408916473389,
"logits/rejected": 0.656460165977478,
"logps/chosen": -200.04827880859375,
"logps/ref_chosen": -70.225830078125,
"logps/ref_rejected": -71.72203063964844,
"logps/rejected": -260.57373046875,
"loss": 1.1502,
"margin_dpo/margin_mean": 59.02923583984375,
"margin_dpo/margin_std": 107.1384506225586,
"step": 500
},
{
"epoch": 0.7573696145124716,
"fcm_dpo/beta": 0.00688003096729517,
"fcm_dpo/delta": 0.03766298666596413,
"fcm_dpo/margin": 52.81761169433594,
"fcm_dpo/q_t": 0.4161261022090912,
"grad_norm": 13.691091537475586,
"learning_rate": 8.528784436016878e-08,
"logits/chosen": 0.6945023536682129,
"logits/rejected": 0.695048987865448,
"logps/chosen": -194.70086669921875,
"logps/ref_chosen": -64.59880828857422,
"logps/ref_rejected": -70.59329223632812,
"logps/rejected": -253.51296997070312,
"loss": 1.1158,
"margin_dpo/margin_mean": 52.81761169433594,
"margin_dpo/margin_std": 72.4344711303711,
"step": 501
},
{
"epoch": 0.7588813303099018,
"fcm_dpo/beta": 0.006907638628035784,
"fcm_dpo/delta": 0.019454587250947952,
"fcm_dpo/margin": 55.175048828125,
"fcm_dpo/q_t": 0.41293010115623474,
"grad_norm": 13.987884521484375,
"learning_rate": 8.4295479559726e-08,
"logits/chosen": 0.6924780607223511,
"logits/rejected": 0.6435062885284424,
"logps/chosen": -193.63800048828125,
"logps/ref_chosen": -65.46662902832031,
"logps/ref_rejected": -90.22233581542969,
"logps/rejected": -273.5687561035156,
"loss": 1.1205,
"margin_dpo/margin_mean": 55.175048828125,
"margin_dpo/margin_std": 83.9233627319336,
"step": 502
},
{
"epoch": 0.7603930461073318,
"fcm_dpo/beta": 0.006876880303025246,
"fcm_dpo/delta": -0.010101448744535446,
"fcm_dpo/margin": 59.565589904785156,
"fcm_dpo/q_t": 0.40677881240844727,
"grad_norm": 12.284053802490234,
"learning_rate": 8.330774987092712e-08,
"logits/chosen": 0.7192538976669312,
"logits/rejected": 0.7191355228424072,
"logps/chosen": -171.00331115722656,
"logps/ref_chosen": -51.83476257324219,
"logps/ref_rejected": -57.62522506713867,
"logps/rejected": -236.35935974121094,
"loss": 1.1189,
"margin_dpo/margin_mean": 59.56558609008789,
"margin_dpo/margin_std": 94.57598114013672,
"step": 503
},
{
"epoch": 0.7619047619047619,
"fcm_dpo/beta": 0.006783470045775175,
"fcm_dpo/delta": -0.1334783434867859,
"fcm_dpo/margin": 77.62858581542969,
"fcm_dpo/q_t": 0.37845832109451294,
"grad_norm": 17.458255767822266,
"learning_rate": 8.232468292269479e-08,
"logits/chosen": 0.6963123083114624,
"logits/rejected": 0.6729052066802979,
"logps/chosen": -189.20529174804688,
"logps/ref_chosen": -68.65119934082031,
"logps/ref_rejected": -77.91394805908203,
"logps/rejected": -276.09661865234375,
"loss": 1.0045,
"margin_dpo/margin_mean": 77.62858581542969,
"margin_dpo/margin_std": 83.20510864257812,
"step": 504
},
{
"epoch": 0.763416477702192,
"fcm_dpo/beta": 0.006723982747644186,
"fcm_dpo/delta": 0.06746991723775864,
"fcm_dpo/margin": 49.71613693237305,
"fcm_dpo/q_t": 0.4255116581916809,
"grad_norm": 13.61350154876709,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": 0.7165220975875854,
"logits/rejected": 0.6773439049720764,
"logps/chosen": -186.99365234375,
"logps/ref_chosen": -59.99884796142578,
"logps/ref_rejected": -76.88048553466797,
"logps/rejected": -253.5914306640625,
"loss": 1.195,
"margin_dpo/margin_mean": 49.71613693237305,
"margin_dpo/margin_std": 101.73808288574219,
"step": 505
},
{
"epoch": 0.764928193499622,
"fcm_dpo/beta": 0.006802085321396589,
"fcm_dpo/delta": 0.026249084621667862,
"fcm_dpo/margin": 55.080204010009766,
"fcm_dpo/q_t": 0.4139713644981384,
"grad_norm": 15.93222713470459,
"learning_rate": 8.037264711071698e-08,
"logits/chosen": 0.7465803623199463,
"logits/rejected": 0.727637767791748,
"logps/chosen": -192.9581298828125,
"logps/ref_chosen": -70.07130432128906,
"logps/ref_rejected": -82.03775024414062,
"logps/rejected": -260.0047607421875,
"loss": 1.1574,
"margin_dpo/margin_mean": 55.080204010009766,
"margin_dpo/margin_std": 100.28858947753906,
"step": 506
},
{
"epoch": 0.7664399092970522,
"fcm_dpo/beta": 0.006808855105191469,
"fcm_dpo/delta": 0.0011053308844566345,
"fcm_dpo/margin": 58.56227111816406,
"fcm_dpo/q_t": 0.4128578305244446,
"grad_norm": 13.58639144897461,
"learning_rate": 7.940373284960933e-08,
"logits/chosen": 0.7121202945709229,
"logits/rejected": 0.6578322649002075,
"logps/chosen": -208.14859008789062,
"logps/ref_chosen": -72.00703430175781,
"logps/ref_rejected": -93.94987487792969,
"logps/rejected": -288.6536865234375,
"loss": 1.1411,
"margin_dpo/margin_mean": 58.5622673034668,
"margin_dpo/margin_std": 102.10011291503906,
"step": 507
},
{
"epoch": 0.7679516250944822,
"fcm_dpo/beta": 0.006827831733971834,
"fcm_dpo/delta": -0.04231097176671028,
"fcm_dpo/margin": 64.44580078125,
"fcm_dpo/q_t": 0.40085405111312866,
"grad_norm": 15.900528907775879,
"learning_rate": 7.843959053281663e-08,
"logits/chosen": 0.6829763054847717,
"logits/rejected": 0.5482306480407715,
"logps/chosen": -184.63729858398438,
"logps/ref_chosen": -60.21992492675781,
"logps/ref_rejected": -95.9200668334961,
"logps/rejected": -284.78326416015625,
"loss": 1.0987,
"margin_dpo/margin_mean": 64.44580841064453,
"margin_dpo/margin_std": 96.61326599121094,
"step": 508
},
{
"epoch": 0.7694633408919124,
"fcm_dpo/beta": 0.00677447160705924,
"fcm_dpo/delta": 0.0181202981621027,
"fcm_dpo/margin": 56.46039962768555,
"fcm_dpo/q_t": 0.4128088355064392,
"grad_norm": 16.664478302001953,
"learning_rate": 7.748024712947204e-08,
"logits/chosen": 0.6257327198982239,
"logits/rejected": 0.6041054129600525,
"logps/chosen": -195.684326171875,
"logps/ref_chosen": -66.27017211914062,
"logps/ref_rejected": -71.73065185546875,
"logps/rejected": -257.605224609375,
"loss": 1.131,
"margin_dpo/margin_mean": 56.46039962768555,
"margin_dpo/margin_std": 91.35394287109375,
"step": 509
},
{
"epoch": 0.7709750566893424,
"fcm_dpo/beta": 0.00671444833278656,
"fcm_dpo/delta": -0.05051817744970322,
"fcm_dpo/margin": 66.65663146972656,
"fcm_dpo/q_t": 0.40036720037460327,
"grad_norm": 15.058761596679688,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 0.7969958782196045,
"logits/rejected": 0.6925790905952454,
"logps/chosen": -182.7880859375,
"logps/ref_chosen": -53.54487609863281,
"logps/ref_rejected": -91.36648559570312,
"logps/rejected": -287.26629638671875,
"loss": 1.1068,
"margin_dpo/margin_mean": 66.65662384033203,
"margin_dpo/margin_std": 106.01554870605469,
"step": 510
},
{
"epoch": 0.7724867724867724,
"fcm_dpo/beta": 0.006622654385864735,
"fcm_dpo/delta": -0.12184499204158783,
"fcm_dpo/margin": 77.87154388427734,
"fcm_dpo/q_t": 0.38256335258483887,
"grad_norm": 17.00933265686035,
"learning_rate": 7.557606426772961e-08,
"logits/chosen": 0.7343342304229736,
"logits/rejected": 0.6737358570098877,
"logps/chosen": -179.90391540527344,
"logps/ref_chosen": -55.844383239746094,
"logps/ref_rejected": -86.49819946289062,
"logps/rejected": -288.4292907714844,
"loss": 1.0325,
"margin_dpo/margin_mean": 77.87153625488281,
"margin_dpo/margin_std": 96.47059631347656,
"step": 511
},
{
"epoch": 0.7739984882842026,
"fcm_dpo/beta": 0.006611551158130169,
"fcm_dpo/delta": 0.053356267511844635,
"fcm_dpo/margin": 52.716392517089844,
"fcm_dpo/q_t": 0.42018306255340576,
"grad_norm": 18.820058822631836,
"learning_rate": 7.463127807341966e-08,
"logits/chosen": 0.6046255826950073,
"logits/rejected": 0.5958194732666016,
"logps/chosen": -186.82579040527344,
"logps/ref_chosen": -61.653038024902344,
"logps/ref_rejected": -72.83148193359375,
"logps/rejected": -250.7206268310547,
"loss": 1.1685,
"margin_dpo/margin_mean": 52.716392517089844,
"margin_dpo/margin_std": 98.232666015625,
"step": 512
},
{
"epoch": 0.7755102040816326,
"fcm_dpo/beta": 0.006585326977074146,
"fcm_dpo/delta": -0.02963019162416458,
"fcm_dpo/margin": 64.99451446533203,
"fcm_dpo/q_t": 0.4018802046775818,
"grad_norm": 12.53990650177002,
"learning_rate": 7.369139731924401e-08,
"logits/chosen": 0.8336935043334961,
"logits/rejected": 0.7769891023635864,
"logps/chosen": -163.33245849609375,
"logps/ref_chosen": -50.85256576538086,
"logps/ref_rejected": -69.21754455566406,
"logps/rejected": -246.6919403076172,
"loss": 1.0753,
"margin_dpo/margin_mean": 64.99451446533203,
"margin_dpo/margin_std": 83.66407775878906,
"step": 513
},
{
"epoch": 0.7770219198790628,
"fcm_dpo/beta": 0.006546557880938053,
"fcm_dpo/delta": -0.07992081344127655,
"fcm_dpo/margin": 72.72770690917969,
"fcm_dpo/q_t": 0.3906702697277069,
"grad_norm": 14.33784008026123,
"learning_rate": 7.275644829568747e-08,
"logits/chosen": 0.7393954396247864,
"logits/rejected": 0.7025594711303711,
"logps/chosen": -196.56289672851562,
"logps/ref_chosen": -69.38493347167969,
"logps/ref_rejected": -83.32447814941406,
"logps/rejected": -283.2301330566406,
"loss": 1.061,
"margin_dpo/margin_mean": 72.72770690917969,
"margin_dpo/margin_std": 96.85029602050781,
"step": 514
},
{
"epoch": 0.7785336356764928,
"fcm_dpo/beta": 0.0065173981711268425,
"fcm_dpo/delta": 0.027946949005126953,
"fcm_dpo/margin": 57.247520446777344,
"fcm_dpo/q_t": 0.41493576765060425,
"grad_norm": 17.1118221282959,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": 0.7443599700927734,
"logits/rejected": 0.6602603793144226,
"logps/chosen": -189.67813110351562,
"logps/ref_chosen": -53.687034606933594,
"logps/ref_rejected": -83.59614562988281,
"logps/rejected": -276.83477783203125,
"loss": 1.1414,
"margin_dpo/margin_mean": 57.24752426147461,
"margin_dpo/margin_std": 96.7652359008789,
"step": 515
},
{
"epoch": 0.780045351473923,
"fcm_dpo/beta": 0.006573040038347244,
"fcm_dpo/delta": 0.03187675401568413,
"fcm_dpo/margin": 56.167274475097656,
"fcm_dpo/q_t": 0.4146859049797058,
"grad_norm": 18.41140365600586,
"learning_rate": 7.090144991188568e-08,
"logits/chosen": 0.7212256193161011,
"logits/rejected": 0.6795363426208496,
"logps/chosen": -173.27099609375,
"logps/ref_chosen": -56.9017219543457,
"logps/ref_rejected": -67.83477783203125,
"logps/rejected": -240.371337890625,
"loss": 1.1466,
"margin_dpo/margin_mean": 56.167274475097656,
"margin_dpo/margin_std": 96.67643737792969,
"step": 516
},
{
"epoch": 0.781557067271353,
"fcm_dpo/beta": 0.006739528849720955,
"fcm_dpo/delta": 0.13554570078849792,
"fcm_dpo/margin": 39.611785888671875,
"fcm_dpo/q_t": 0.440265029668808,
"grad_norm": 17.20258903503418,
"learning_rate": 6.998145243993284e-08,
"logits/chosen": 0.7755421996116638,
"logits/rejected": 0.7712531089782715,
"logps/chosen": -202.08642578125,
"logps/ref_chosen": -61.775142669677734,
"logps/ref_rejected": -62.88270950317383,
"logps/rejected": -242.8057861328125,
"loss": 1.2254,
"margin_dpo/margin_mean": 39.611785888671875,
"margin_dpo/margin_std": 89.51103973388672,
"step": 517
},
{
"epoch": 0.783068783068783,
"fcm_dpo/beta": 0.006766768172383308,
"fcm_dpo/delta": 0.013192320242524147,
"fcm_dpo/margin": 57.238216400146484,
"fcm_dpo/q_t": 0.41345030069351196,
"grad_norm": 13.554084777832031,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": 0.7449650764465332,
"logits/rejected": 0.6929754018783569,
"logps/chosen": -179.30035400390625,
"logps/ref_chosen": -62.02523422241211,
"logps/ref_rejected": -79.06085205078125,
"logps/rejected": -253.57418823242188,
"loss": 1.1253,
"margin_dpo/margin_mean": 57.238216400146484,
"margin_dpo/margin_std": 91.79560089111328,
"step": 518
},
{
"epoch": 0.7845804988662132,
"fcm_dpo/beta": 0.006957560312002897,
"fcm_dpo/delta": 0.16204290091991425,
"fcm_dpo/margin": 34.67028045654297,
"fcm_dpo/q_t": 0.4466700553894043,
"grad_norm": 23.354780197143555,
"learning_rate": 6.815658960673781e-08,
"logits/chosen": 0.7701222896575928,
"logits/rejected": 0.7111513614654541,
"logps/chosen": -206.356689453125,
"logps/ref_chosen": -61.60636901855469,
"logps/ref_rejected": -74.50727844238281,
"logps/rejected": -253.92787170410156,
"loss": 1.3135,
"margin_dpo/margin_mean": 34.67028045654297,
"margin_dpo/margin_std": 115.71553039550781,
"step": 519
},
{
"epoch": 0.7860922146636432,
"fcm_dpo/beta": 0.007012324873358011,
"fcm_dpo/delta": 0.05751825124025345,
"fcm_dpo/margin": 49.06908416748047,
"fcm_dpo/q_t": 0.422906756401062,
"grad_norm": 16.057010650634766,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 0.8252368569374084,
"logits/rejected": 0.7620112895965576,
"logps/chosen": -194.60711669921875,
"logps/ref_chosen": -62.87343215942383,
"logps/ref_rejected": -76.505615234375,
"logps/rejected": -257.3083801269531,
"loss": 1.1649,
"margin_dpo/margin_mean": 49.06908416748047,
"margin_dpo/margin_std": 88.33916473388672,
"step": 520
},
{
"epoch": 0.7876039304610734,
"fcm_dpo/beta": 0.00697598559781909,
"fcm_dpo/delta": -0.10865991562604904,
"fcm_dpo/margin": 72.15048217773438,
"fcm_dpo/q_t": 0.3853059411048889,
"grad_norm": 13.737951278686523,
"learning_rate": 6.63520728356167e-08,
"logits/chosen": 0.5883495211601257,
"logits/rejected": 0.5072116255760193,
"logps/chosen": -194.3409423828125,
"logps/ref_chosen": -64.20668029785156,
"logps/ref_rejected": -92.28083038330078,
"logps/rejected": -294.5655822753906,
"loss": 1.0398,
"margin_dpo/margin_mean": 72.15048217773438,
"margin_dpo/margin_std": 91.8095474243164,
"step": 521
},
{
"epoch": 0.7891156462585034,
"fcm_dpo/beta": 0.006977587938308716,
"fcm_dpo/delta": 0.0699794664978981,
"fcm_dpo/margin": 47.636898040771484,
"fcm_dpo/q_t": 0.4260995388031006,
"grad_norm": 16.24595069885254,
"learning_rate": 6.545750740770336e-08,
"logits/chosen": 0.6884851455688477,
"logits/rejected": 0.6817054152488708,
"logps/chosen": -188.324462890625,
"logps/ref_chosen": -58.369720458984375,
"logps/ref_rejected": -68.79248046875,
"logps/rejected": -246.38412475585938,
"loss": 1.2182,
"margin_dpo/margin_mean": 47.63689422607422,
"margin_dpo/margin_std": 109.36659240722656,
"step": 522
},
{
"epoch": 0.7906273620559335,
"fcm_dpo/beta": 0.007021876983344555,
"fcm_dpo/delta": 0.017026737332344055,
"fcm_dpo/margin": 54.623382568359375,
"fcm_dpo/q_t": 0.41094690561294556,
"grad_norm": 20.093515396118164,
"learning_rate": 6.456810403001012e-08,
"logits/chosen": 0.7484044432640076,
"logits/rejected": 0.6152558326721191,
"logps/chosen": -199.60435485839844,
"logps/ref_chosen": -65.71324157714844,
"logps/ref_rejected": -91.98896789550781,
"logps/rejected": -280.50347900390625,
"loss": 1.1596,
"margin_dpo/margin_mean": 54.623382568359375,
"margin_dpo/margin_std": 101.72914123535156,
"step": 523
},
{
"epoch": 0.7921390778533636,
"fcm_dpo/beta": 0.007063503377139568,
"fcm_dpo/delta": -0.008377037942409515,
"fcm_dpo/margin": 57.727630615234375,
"fcm_dpo/q_t": 0.4065864086151123,
"grad_norm": 16.176006317138672,
"learning_rate": 6.368388758106134e-08,
"logits/chosen": 0.661557674407959,
"logits/rejected": 0.637451171875,
"logps/chosen": -190.521728515625,
"logps/ref_chosen": -76.35124969482422,
"logps/ref_rejected": -89.96072387695312,
"logps/rejected": -261.85882568359375,
"loss": 1.1107,
"margin_dpo/margin_mean": 57.727630615234375,
"margin_dpo/margin_std": 87.59829711914062,
"step": 524
},
{
"epoch": 0.7936507936507936,
"fcm_dpo/beta": 0.007091479375958443,
"fcm_dpo/delta": 0.05275290459394455,
"fcm_dpo/margin": 49.22231674194336,
"fcm_dpo/q_t": 0.42219555377960205,
"grad_norm": 18.684139251708984,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": 0.5486440658569336,
"logits/rejected": 0.5439519882202148,
"logps/chosen": -203.2303924560547,
"logps/ref_chosen": -75.49578857421875,
"logps/ref_rejected": -84.04852294921875,
"logps/rejected": -261.00543212890625,
"loss": 1.1759,
"margin_dpo/margin_mean": 49.22231674194336,
"margin_dpo/margin_std": 96.31336975097656,
"step": 525
},
{
"epoch": 0.7951625094482238,
"fcm_dpo/beta": 0.007162164896726608,
"fcm_dpo/delta": 0.1063753068447113,
"fcm_dpo/margin": 41.36842346191406,
"fcm_dpo/q_t": 0.43258005380630493,
"grad_norm": 16.012828826904297,
"learning_rate": 6.193111425735515e-08,
"logits/chosen": 0.7274391055107117,
"logits/rejected": 0.65253746509552,
"logps/chosen": -199.98507690429688,
"logps/ref_chosen": -61.29241943359375,
"logps/ref_rejected": -82.47763061523438,
"logps/rejected": -262.5387268066406,
"loss": 1.2155,
"margin_dpo/margin_mean": 41.36842346191406,
"margin_dpo/margin_std": 88.6600341796875,
"step": 526
},
{
"epoch": 0.7966742252456538,
"fcm_dpo/beta": 0.007441909518092871,
"fcm_dpo/delta": 0.13420581817626953,
"fcm_dpo/margin": 36.05781555175781,
"fcm_dpo/q_t": 0.44124549627304077,
"grad_norm": 16.688430786132812,
"learning_rate": 6.106260641143546e-08,
"logits/chosen": 0.8245516419410706,
"logits/rejected": 0.7362295389175415,
"logps/chosen": -209.2223358154297,
"logps/ref_chosen": -61.472625732421875,
"logps/ref_rejected": -90.52831268310547,
"logps/rejected": -274.3358154296875,
"loss": 1.2613,
"margin_dpo/margin_mean": 36.05781173706055,
"margin_dpo/margin_std": 97.74179077148438,
"step": 527
},
{
"epoch": 0.7981859410430839,
"fcm_dpo/beta": 0.0075470441952347755,
"fcm_dpo/delta": 0.09618767350912094,
"fcm_dpo/margin": 40.661521911621094,
"fcm_dpo/q_t": 0.4321001172065735,
"grad_norm": 18.79235076904297,
"learning_rate": 6.019938355056422e-08,
"logits/chosen": 0.6475770473480225,
"logits/rejected": 0.5618788003921509,
"logps/chosen": -189.43951416015625,
"logps/ref_chosen": -58.792015075683594,
"logps/ref_rejected": -71.82516479492188,
"logps/rejected": -243.13417053222656,
"loss": 1.2484,
"margin_dpo/margin_mean": 40.661521911621094,
"margin_dpo/margin_std": 104.12982177734375,
"step": 528
},
{
"epoch": 0.799697656840514,
"fcm_dpo/beta": 0.00729703065007925,
"fcm_dpo/delta": -0.2618618607521057,
"fcm_dpo/margin": 88.30415344238281,
"fcm_dpo/q_t": 0.3532322645187378,
"grad_norm": 16.628124237060547,
"learning_rate": 5.934146982094049e-08,
"logits/chosen": 0.6590827703475952,
"logits/rejected": 0.602139949798584,
"logps/chosen": -175.66940307617188,
"logps/ref_chosen": -55.070960998535156,
"logps/ref_rejected": -75.44007873535156,
"logps/rejected": -284.3426513671875,
"loss": 0.942,
"margin_dpo/margin_mean": 88.30415344238281,
"margin_dpo/margin_std": 90.58662414550781,
"step": 529
},
{
"epoch": 0.8012093726379441,
"fcm_dpo/beta": 0.007245873101055622,
"fcm_dpo/delta": 0.02474869042634964,
"fcm_dpo/margin": 51.91332244873047,
"fcm_dpo/q_t": 0.41642701625823975,
"grad_norm": 19.019323348999023,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.7544151544570923,
"logits/rejected": 0.706871747970581,
"logps/chosen": -182.08175659179688,
"logps/ref_chosen": -56.743812561035156,
"logps/ref_rejected": -76.6692123413086,
"logps/rejected": -253.9204864501953,
"loss": 1.1443,
"margin_dpo/margin_mean": 51.91332244873047,
"margin_dpo/margin_std": 89.03276062011719,
"step": 530
},
{
"epoch": 0.8027210884353742,
"fcm_dpo/beta": 0.007274698466062546,
"fcm_dpo/delta": -0.005907643586397171,
"fcm_dpo/margin": 55.718894958496094,
"fcm_dpo/q_t": 0.4090062081813812,
"grad_norm": 15.70089054107666,
"learning_rate": 5.7641665597021435e-08,
"logits/chosen": 0.6855486631393433,
"logits/rejected": 0.6101270318031311,
"logps/chosen": -177.32748413085938,
"logps/ref_chosen": -51.116455078125,
"logps/ref_rejected": -79.52884674072266,
"logps/rejected": -261.458740234375,
"loss": 1.1205,
"margin_dpo/margin_mean": 55.718894958496094,
"margin_dpo/margin_std": 88.86299133300781,
"step": 531
},
{
"epoch": 0.8042328042328042,
"fcm_dpo/beta": 0.007200066931545734,
"fcm_dpo/delta": -0.03519837558269501,
"fcm_dpo/margin": 60.228580474853516,
"fcm_dpo/q_t": 0.4023039937019348,
"grad_norm": 16.101490020751953,
"learning_rate": 5.679982264990424e-08,
"logits/chosen": 0.6560695171356201,
"logits/rejected": 0.6018407344818115,
"logps/chosen": -204.2732696533203,
"logps/ref_chosen": -58.279945373535156,
"logps/ref_rejected": -78.05426788330078,
"logps/rejected": -284.27618408203125,
"loss": 1.1038,
"margin_dpo/margin_mean": 60.228580474853516,
"margin_dpo/margin_std": 93.1765365600586,
"step": 532
},
{
"epoch": 0.8057445200302343,
"fcm_dpo/beta": 0.00716027244925499,
"fcm_dpo/delta": -0.03689378499984741,
"fcm_dpo/margin": 60.79398727416992,
"fcm_dpo/q_t": 0.40077680349349976,
"grad_norm": 15.326909065246582,
"learning_rate": 5.596338392706076e-08,
"logits/chosen": 0.8385459184646606,
"logits/rejected": 0.7652316093444824,
"logps/chosen": -158.7193145751953,
"logps/ref_chosen": -56.41801071166992,
"logps/ref_rejected": -73.89324951171875,
"logps/rejected": -236.98855590820312,
"loss": 1.0913,
"margin_dpo/margin_mean": 60.793983459472656,
"margin_dpo/margin_std": 87.72501373291016,
"step": 533
},
{
"epoch": 0.8072562358276644,
"fcm_dpo/beta": 0.007086427416652441,
"fcm_dpo/delta": -0.02248242497444153,
"fcm_dpo/margin": 59.43117141723633,
"fcm_dpo/q_t": 0.4066773056983948,
"grad_norm": 13.382204055786133,
"learning_rate": 5.513237282548033e-08,
"logits/chosen": 0.7272214293479919,
"logits/rejected": 0.6883162260055542,
"logps/chosen": -187.7013397216797,
"logps/ref_chosen": -60.748687744140625,
"logps/ref_rejected": -73.8623046875,
"logps/rejected": -260.2461242675781,
"loss": 1.1293,
"margin_dpo/margin_mean": 59.431175231933594,
"margin_dpo/margin_std": 101.31953430175781,
"step": 534
},
{
"epoch": 0.8087679516250945,
"fcm_dpo/beta": 0.007163902744650841,
"fcm_dpo/delta": 0.04707575589418411,
"fcm_dpo/margin": 49.48677444458008,
"fcm_dpo/q_t": 0.4207179546356201,
"grad_norm": 17.681066513061523,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": 0.6349679827690125,
"logits/rejected": 0.5681853294372559,
"logps/chosen": -200.6911163330078,
"logps/ref_chosen": -61.637413024902344,
"logps/ref_rejected": -80.93138885498047,
"logps/rejected": -269.47186279296875,
"loss": 1.1707,
"margin_dpo/margin_mean": 49.486778259277344,
"margin_dpo/margin_std": 94.28369140625,
"step": 535
},
{
"epoch": 0.8102796674225246,
"fcm_dpo/beta": 0.006997551769018173,
"fcm_dpo/delta": -0.16011017560958862,
"fcm_dpo/margin": 78.75794982910156,
"fcm_dpo/q_t": 0.37516194581985474,
"grad_norm": 11.368307113647461,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": 0.7681195735931396,
"logits/rejected": 0.6752403378486633,
"logps/chosen": -171.7135009765625,
"logps/ref_chosen": -51.88897705078125,
"logps/ref_rejected": -73.34864044189453,
"logps/rejected": -271.93115234375,
"loss": 0.9889,
"margin_dpo/margin_mean": 78.75794982910156,
"margin_dpo/margin_std": 84.82420349121094,
"step": 536
},
{
"epoch": 0.8117913832199547,
"fcm_dpo/beta": 0.006904451176524162,
"fcm_dpo/delta": -0.0123976431787014,
"fcm_dpo/margin": 59.59520721435547,
"fcm_dpo/q_t": 0.40800943970680237,
"grad_norm": 14.604050636291504,
"learning_rate": 5.267213693697695e-08,
"logits/chosen": 0.8269777297973633,
"logits/rejected": 0.7264485359191895,
"logps/chosen": -191.2030792236328,
"logps/ref_chosen": -54.248619079589844,
"logps/ref_rejected": -94.94343566894531,
"logps/rejected": -291.49310302734375,
"loss": 1.1189,
"margin_dpo/margin_mean": 59.59520721435547,
"margin_dpo/margin_std": 95.32566833496094,
"step": 537
},
{
"epoch": 0.8133030990173847,
"fcm_dpo/beta": 0.006886166054755449,
"fcm_dpo/delta": -0.06406421959400177,
"fcm_dpo/margin": 66.96125793457031,
"fcm_dpo/q_t": 0.39491915702819824,
"grad_norm": 13.988241195678711,
"learning_rate": 5.1863067244167144e-08,
"logits/chosen": 0.7210502624511719,
"logits/rejected": 0.6904141306877136,
"logps/chosen": -201.9376983642578,
"logps/ref_chosen": -70.09353637695312,
"logps/ref_rejected": -79.49833679199219,
"logps/rejected": -278.3037414550781,
"loss": 1.0629,
"margin_dpo/margin_mean": 66.96125793457031,
"margin_dpo/margin_std": 87.8351058959961,
"step": 538
},
{
"epoch": 0.8148148148148148,
"fcm_dpo/beta": 0.006873176898807287,
"fcm_dpo/delta": 0.052093926817178726,
"fcm_dpo/margin": 50.87729263305664,
"fcm_dpo/q_t": 0.4218023419380188,
"grad_norm": 14.85510540008545,
"learning_rate": 5.105953986729195e-08,
"logits/chosen": 0.6665756702423096,
"logits/rejected": 0.5819079875946045,
"logps/chosen": -204.06600952148438,
"logps/ref_chosen": -61.93169403076172,
"logps/ref_rejected": -84.08946228027344,
"logps/rejected": -277.10107421875,
"loss": 1.1559,
"margin_dpo/margin_mean": 50.877288818359375,
"margin_dpo/margin_std": 89.65788269042969,
"step": 539
},
{
"epoch": 0.8163265306122449,
"fcm_dpo/beta": 0.00684034638106823,
"fcm_dpo/delta": -0.1083156019449234,
"fcm_dpo/margin": 73.51258850097656,
"fcm_dpo/q_t": 0.38540440797805786,
"grad_norm": 13.257943153381348,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 0.7699329853057861,
"logits/rejected": 0.6641333103179932,
"logps/chosen": -191.4811553955078,
"logps/ref_chosen": -62.704254150390625,
"logps/ref_rejected": -95.63597106933594,
"logps/rejected": -297.92547607421875,
"loss": 1.0315,
"margin_dpo/margin_mean": 73.51258850097656,
"margin_dpo/margin_std": 87.4825439453125,
"step": 540
},
{
"epoch": 0.817838246409675,
"fcm_dpo/beta": 0.006668367423117161,
"fcm_dpo/delta": -0.05727549269795418,
"fcm_dpo/margin": 68.03440856933594,
"fcm_dpo/q_t": 0.3954416513442993,
"grad_norm": 14.147591590881348,
"learning_rate": 4.9469201811239035e-08,
"logits/chosen": 0.7798404693603516,
"logits/rejected": 0.804283857345581,
"logps/chosen": -189.17298889160156,
"logps/ref_chosen": -62.48084259033203,
"logps/ref_rejected": -57.55541229248047,
"logps/rejected": -252.281982421875,
"loss": 1.0639,
"margin_dpo/margin_mean": 68.03440856933594,
"margin_dpo/margin_std": 86.24921417236328,
"step": 541
},
{
"epoch": 0.8193499622071051,
"fcm_dpo/beta": 0.0065713440999388695,
"fcm_dpo/delta": -0.10843698680400848,
"fcm_dpo/margin": 76.51225280761719,
"fcm_dpo/q_t": 0.3853691816329956,
"grad_norm": 16.98206329345703,
"learning_rate": 4.868243561723534e-08,
"logits/chosen": 0.7922050952911377,
"logits/rejected": 0.7388157248497009,
"logps/chosen": -158.83099365234375,
"logps/ref_chosen": -49.454891204833984,
"logps/ref_rejected": -65.33275604248047,
"logps/rejected": -251.22113037109375,
"loss": 1.0495,
"margin_dpo/margin_mean": 76.51225280761719,
"margin_dpo/margin_std": 101.84271240234375,
"step": 542
},
{
"epoch": 0.8208616780045351,
"fcm_dpo/beta": 0.006503199227154255,
"fcm_dpo/delta": -0.06481535732746124,
"fcm_dpo/margin": 71.01345825195312,
"fcm_dpo/q_t": 0.3929465413093567,
"grad_norm": 11.35152530670166,
"learning_rate": 4.790130070827028e-08,
"logits/chosen": 0.731881856918335,
"logits/rejected": 0.6383688449859619,
"logps/chosen": -168.16604614257812,
"logps/ref_chosen": -51.100860595703125,
"logps/ref_rejected": -76.06130981445312,
"logps/rejected": -264.13995361328125,
"loss": 1.0598,
"margin_dpo/margin_mean": 71.01346588134766,
"margin_dpo/margin_std": 90.14659118652344,
"step": 543
},
{
"epoch": 0.8223733938019653,
"fcm_dpo/beta": 0.006377712823450565,
"fcm_dpo/delta": -0.09231595695018768,
"fcm_dpo/margin": 76.49650573730469,
"fcm_dpo/q_t": 0.38890010118484497,
"grad_norm": 14.788056373596191,
"learning_rate": 4.7125818934366454e-08,
"logits/chosen": 0.7259522676467896,
"logits/rejected": 0.6440372467041016,
"logps/chosen": -189.947265625,
"logps/ref_chosen": -60.2772331237793,
"logps/ref_rejected": -88.40553283691406,
"logps/rejected": -294.5720520019531,
"loss": 1.0524,
"margin_dpo/margin_mean": 76.49650573730469,
"margin_dpo/margin_std": 100.47244262695312,
"step": 544
},
{
"epoch": 0.8238851095993953,
"fcm_dpo/beta": 0.006433564238250256,
"fcm_dpo/delta": 0.09561902284622192,
"fcm_dpo/margin": 47.767616271972656,
"fcm_dpo/q_t": 0.4303530156612396,
"grad_norm": 13.540328025817871,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": 0.6927204728126526,
"logits/rejected": 0.6323798894882202,
"logps/chosen": -199.80316162109375,
"logps/ref_chosen": -61.61524963378906,
"logps/ref_rejected": -78.71266174316406,
"logps/rejected": -264.668212890625,
"loss": 1.1927,
"margin_dpo/margin_mean": 47.767616271972656,
"margin_dpo/margin_std": 95.30821990966797,
"step": 545
},
{
"epoch": 0.8253968253968254,
"fcm_dpo/beta": 0.006493829190731049,
"fcm_dpo/delta": 0.04412386938929558,
"fcm_dpo/margin": 55.045677185058594,
"fcm_dpo/q_t": 0.4179641008377075,
"grad_norm": 15.096487998962402,
"learning_rate": 4.559190140057428e-08,
"logits/chosen": 0.8139510154724121,
"logits/rejected": 0.8047194480895996,
"logps/chosen": -190.3719482421875,
"logps/ref_chosen": -59.313262939453125,
"logps/ref_rejected": -64.73631286621094,
"logps/rejected": -250.84066772460938,
"loss": 1.1597,
"margin_dpo/margin_mean": 55.045677185058594,
"margin_dpo/margin_std": 99.31507873535156,
"step": 546
},
{
"epoch": 0.8269085411942555,
"fcm_dpo/beta": 0.006442173384130001,
"fcm_dpo/delta": -0.06737470626831055,
"fcm_dpo/margin": 72.02572631835938,
"fcm_dpo/q_t": 0.3931256830692291,
"grad_norm": 13.689138412475586,
"learning_rate": 4.483350854765672e-08,
"logits/chosen": 0.6713754534721375,
"logits/rejected": 0.6014930009841919,
"logps/chosen": -171.71279907226562,
"logps/ref_chosen": -54.97674560546875,
"logps/ref_rejected": -75.35922241210938,
"logps/rejected": -264.1210021972656,
"loss": 1.0656,
"margin_dpo/margin_mean": 72.02572631835938,
"margin_dpo/margin_std": 96.70033264160156,
"step": 547
},
{
"epoch": 0.8284202569916855,
"fcm_dpo/beta": 0.006535450927913189,
"fcm_dpo/delta": 0.08367334306240082,
"fcm_dpo/margin": 48.743003845214844,
"fcm_dpo/q_t": 0.4280347526073456,
"grad_norm": 16.57050895690918,
"learning_rate": 4.4080854642541826e-08,
"logits/chosen": 0.6213726997375488,
"logits/rejected": 0.5548287630081177,
"logps/chosen": -197.66107177734375,
"logps/ref_chosen": -63.21067428588867,
"logps/ref_rejected": -81.23347473144531,
"logps/rejected": -264.4268798828125,
"loss": 1.1778,
"margin_dpo/margin_mean": 48.743003845214844,
"margin_dpo/margin_std": 91.22822570800781,
"step": 548
},
{
"epoch": 0.8299319727891157,
"fcm_dpo/beta": 0.006569857243448496,
"fcm_dpo/delta": 0.03660057857632637,
"fcm_dpo/margin": 55.518028259277344,
"fcm_dpo/q_t": 0.41681382060050964,
"grad_norm": 15.688474655151367,
"learning_rate": 4.333396073857723e-08,
"logits/chosen": 0.8061296343803406,
"logits/rejected": 0.7307944297790527,
"logps/chosen": -195.18118286132812,
"logps/ref_chosen": -64.27351379394531,
"logps/ref_rejected": -92.31663513183594,
"logps/rejected": -278.7423095703125,
"loss": 1.1524,
"margin_dpo/margin_mean": 55.518028259277344,
"margin_dpo/margin_std": 97.83292388916016,
"step": 549
},
{
"epoch": 0.8314436885865457,
"fcm_dpo/beta": 0.006715863943099976,
"fcm_dpo/delta": 0.15693125128746033,
"fcm_dpo/margin": 36.81257629394531,
"fcm_dpo/q_t": 0.44385606050491333,
"grad_norm": 17.09474754333496,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 0.7606515884399414,
"logits/rejected": 0.7276763916015625,
"logps/chosen": -199.18109130859375,
"logps/ref_chosen": -56.230438232421875,
"logps/ref_rejected": -62.59788513183594,
"logps/rejected": -242.3611297607422,
"loss": 1.2432,
"margin_dpo/margin_mean": 36.81257629394531,
"margin_dpo/margin_std": 89.00492858886719,
"step": 550
},
{
"epoch": 0.8329554043839759,
"fcm_dpo/beta": 0.0069015612825751305,
"fcm_dpo/delta": 0.07776844501495361,
"fcm_dpo/margin": 46.99622344970703,
"fcm_dpo/q_t": 0.42739948630332947,
"grad_norm": 14.707651138305664,
"learning_rate": 4.1857536341307176e-08,
"logits/chosen": 0.7485238313674927,
"logits/rejected": 0.7114731073379517,
"logps/chosen": -208.91763305664062,
"logps/ref_chosen": -67.74720764160156,
"logps/ref_rejected": -87.04285430908203,
"logps/rejected": -275.2095031738281,
"loss": 1.1647,
"margin_dpo/margin_mean": 46.996219635009766,
"margin_dpo/margin_std": 83.65496826171875,
"step": 551
},
{
"epoch": 0.8344671201814059,
"fcm_dpo/beta": 0.006933193188160658,
"fcm_dpo/delta": -0.00469888374209404,
"fcm_dpo/margin": 58.293643951416016,
"fcm_dpo/q_t": 0.404817670583725,
"grad_norm": 14.66103458404541,
"learning_rate": 4.112804714676593e-08,
"logits/chosen": 0.7346080541610718,
"logits/rejected": 0.676051139831543,
"logps/chosen": -194.23452758789062,
"logps/ref_chosen": -62.92625427246094,
"logps/ref_rejected": -82.98365783691406,
"logps/rejected": -272.5855712890625,
"loss": 1.1033,
"margin_dpo/margin_mean": 58.29364776611328,
"margin_dpo/margin_std": 83.52306365966797,
"step": 552
},
{
"epoch": 0.8359788359788359,
"fcm_dpo/beta": 0.00692669115960598,
"fcm_dpo/delta": 0.021221814677119255,
"fcm_dpo/margin": 54.79808807373047,
"fcm_dpo/q_t": 0.4154573976993561,
"grad_norm": 17.43486785888672,
"learning_rate": 4.0404400549748144e-08,
"logits/chosen": 0.6985148191452026,
"logits/rejected": 0.5853888988494873,
"logps/chosen": -198.54403686523438,
"logps/ref_chosen": -56.038490295410156,
"logps/ref_rejected": -84.48454284667969,
"logps/rejected": -281.78814697265625,
"loss": 1.1534,
"margin_dpo/margin_mean": 54.7980842590332,
"margin_dpo/margin_std": 100.00516510009766,
"step": 553
},
{
"epoch": 0.8374905517762661,
"fcm_dpo/beta": 0.006878286134451628,
"fcm_dpo/delta": -0.05534950643777847,
"fcm_dpo/margin": 65.84024810791016,
"fcm_dpo/q_t": 0.3967292606830597,
"grad_norm": 14.698594093322754,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": 0.69556725025177,
"logits/rejected": 0.6746841669082642,
"logps/chosen": -192.9752197265625,
"logps/ref_chosen": -64.53059387207031,
"logps/ref_rejected": -71.2155990600586,
"logps/rejected": -265.50048828125,
"loss": 1.0812,
"margin_dpo/margin_mean": 65.84025573730469,
"margin_dpo/margin_std": 93.83136749267578,
"step": 554
},
{
"epoch": 0.8390022675736961,
"fcm_dpo/beta": 0.00692109577357769,
"fcm_dpo/delta": 0.010154381394386292,
"fcm_dpo/margin": 56.18605422973633,
"fcm_dpo/q_t": 0.41057413816452026,
"grad_norm": 14.941075325012207,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": 0.7340927720069885,
"logits/rejected": 0.7052745819091797,
"logps/chosen": -206.43673706054688,
"logps/ref_chosen": -66.65191650390625,
"logps/ref_rejected": -68.6667251586914,
"logps/rejected": -264.6376037597656,
"loss": 1.1581,
"margin_dpo/margin_mean": 56.18605422973633,
"margin_dpo/margin_std": 100.85908508300781,
"step": 555
},
{
"epoch": 0.8405139833711263,
"fcm_dpo/beta": 0.006918167695403099,
"fcm_dpo/delta": 0.06461147218942642,
"fcm_dpo/margin": 48.79975128173828,
"fcm_dpo/q_t": 0.42574968934059143,
"grad_norm": 13.405830383300781,
"learning_rate": 3.826871794280192e-08,
"logits/chosen": 0.7548919916152954,
"logits/rejected": 0.700238823890686,
"logps/chosen": -194.24932861328125,
"logps/ref_chosen": -52.832366943359375,
"logps/ref_rejected": -64.49044036865234,
"logps/rejected": -254.70716857910156,
"loss": 1.198,
"margin_dpo/margin_mean": 48.79975128173828,
"margin_dpo/margin_std": 103.18643951416016,
"step": 556
},
{
"epoch": 0.8420256991685563,
"fcm_dpo/beta": 0.00680879969149828,
"fcm_dpo/delta": -0.1172577515244484,
"fcm_dpo/margin": 74.97007751464844,
"fcm_dpo/q_t": 0.3834077715873718,
"grad_norm": 12.292925834655762,
"learning_rate": 3.756864251262143e-08,
"logits/chosen": 0.8256399631500244,
"logits/rejected": 0.7433536052703857,
"logps/chosen": -193.7919158935547,
"logps/ref_chosen": -55.03598403930664,
"logps/ref_rejected": -75.80644989013672,
"logps/rejected": -289.532470703125,
"loss": 1.0183,
"margin_dpo/margin_mean": 74.97007751464844,
"margin_dpo/margin_std": 84.72743225097656,
"step": 557
},
{
"epoch": 0.8435374149659864,
"fcm_dpo/beta": 0.0066352728754282,
"fcm_dpo/delta": -0.13472692668437958,
"fcm_dpo/margin": 79.395263671875,
"fcm_dpo/q_t": 0.3804672360420227,
"grad_norm": 12.516437530517578,
"learning_rate": 3.687450924416341e-08,
"logits/chosen": 0.777481198310852,
"logits/rejected": 0.7188490629196167,
"logps/chosen": -188.71958923339844,
"logps/ref_chosen": -63.226348876953125,
"logps/ref_rejected": -91.46881866455078,
"logps/rejected": -296.3573303222656,
"loss": 1.0151,
"margin_dpo/margin_mean": 79.395263671875,
"margin_dpo/margin_std": 92.50755310058594,
"step": 558
},
{
"epoch": 0.8450491307634165,
"fcm_dpo/beta": 0.006523288786411285,
"fcm_dpo/delta": -0.04582027345895767,
"fcm_dpo/margin": 67.82418823242188,
"fcm_dpo/q_t": 0.40194329619407654,
"grad_norm": 12.600460052490234,
"learning_rate": 3.6186337553827743e-08,
"logits/chosen": 0.7076990604400635,
"logits/rejected": 0.6349881887435913,
"logps/chosen": -192.7144317626953,
"logps/ref_chosen": -61.521644592285156,
"logps/ref_rejected": -82.83859252929688,
"logps/rejected": -281.8555908203125,
"loss": 1.0989,
"margin_dpo/margin_mean": 67.82418823242188,
"margin_dpo/margin_std": 102.31001281738281,
"step": 559
},
{
"epoch": 0.8465608465608465,
"fcm_dpo/beta": 0.006590306758880615,
"fcm_dpo/delta": 0.002619616687297821,
"fcm_dpo/margin": 60.240455627441406,
"fcm_dpo/q_t": 0.409095823764801,
"grad_norm": 14.637415885925293,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 0.738136351108551,
"logits/rejected": 0.6958550214767456,
"logps/chosen": -203.272216796875,
"logps/ref_chosen": -60.64122009277344,
"logps/ref_rejected": -78.75474548339844,
"logps/rejected": -281.626220703125,
"loss": 1.1043,
"margin_dpo/margin_mean": 60.24045181274414,
"margin_dpo/margin_std": 84.55506896972656,
"step": 560
},
{
"epoch": 0.8480725623582767,
"fcm_dpo/beta": 0.006551677361130714,
"fcm_dpo/delta": 0.00024553295224905014,
"fcm_dpo/margin": 61.01319122314453,
"fcm_dpo/q_t": 0.41009342670440674,
"grad_norm": 13.768065452575684,
"learning_rate": 3.482795573879241e-08,
"logits/chosen": 0.6982570290565491,
"logits/rejected": 0.6633864641189575,
"logps/chosen": -192.55197143554688,
"logps/ref_chosen": -62.49859619140625,
"logps/ref_rejected": -78.72064208984375,
"logps/rejected": -269.78717041015625,
"loss": 1.1158,
"margin_dpo/margin_mean": 61.01319122314453,
"margin_dpo/margin_std": 94.03324890136719,
"step": 561
},
{
"epoch": 0.8495842781557067,
"fcm_dpo/beta": 0.006441822275519371,
"fcm_dpo/delta": -0.08493717759847641,
"fcm_dpo/margin": 74.48919677734375,
"fcm_dpo/q_t": 0.3915877342224121,
"grad_norm": 14.380753517150879,
"learning_rate": 3.415778361095226e-08,
"logits/chosen": 0.7225862741470337,
"logits/rejected": 0.6788659691810608,
"logps/chosen": -211.61782836914062,
"logps/ref_chosen": -74.78173828125,
"logps/ref_rejected": -92.63499450683594,
"logps/rejected": -303.96026611328125,
"loss": 1.0511,
"margin_dpo/margin_mean": 74.48919677734375,
"margin_dpo/margin_std": 94.54464721679688,
"step": 562
},
{
"epoch": 0.8510959939531368,
"fcm_dpo/beta": 0.006419507786631584,
"fcm_dpo/delta": -0.03327463939785957,
"fcm_dpo/margin": 67.2710952758789,
"fcm_dpo/q_t": 0.40121811628341675,
"grad_norm": 17.5394287109375,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": 0.809956431388855,
"logits/rejected": 0.7581348419189453,
"logps/chosen": -167.83493041992188,
"logps/ref_chosen": -50.19850158691406,
"logps/ref_rejected": -66.76687622070312,
"logps/rejected": -251.67440795898438,
"loss": 1.1022,
"margin_dpo/margin_mean": 67.27110290527344,
"margin_dpo/margin_std": 102.41062927246094,
"step": 563
},
{
"epoch": 0.8526077097505669,
"fcm_dpo/beta": 0.006282067857682705,
"fcm_dpo/delta": -0.12858623266220093,
"fcm_dpo/margin": 83.0648193359375,
"fcm_dpo/q_t": 0.37959784269332886,
"grad_norm": 13.681713104248047,
"learning_rate": 3.283557064487785e-08,
"logits/chosen": 0.6968977451324463,
"logits/rejected": 0.6630585789680481,
"logps/chosen": -175.38339233398438,
"logps/ref_chosen": -55.7408447265625,
"logps/ref_rejected": -74.82323455810547,
"logps/rejected": -277.5306091308594,
"loss": 1.0289,
"margin_dpo/margin_mean": 83.06480407714844,
"margin_dpo/margin_std": 102.96722412109375,
"step": 564
},
{
"epoch": 0.854119425547997,
"fcm_dpo/beta": 0.0063241226598620415,
"fcm_dpo/delta": 0.04935676231980324,
"fcm_dpo/margin": 55.58483123779297,
"fcm_dpo/q_t": 0.4191403090953827,
"grad_norm": 15.041940689086914,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": 0.7640155553817749,
"logits/rejected": 0.7049951553344727,
"logps/chosen": -205.72286987304688,
"logps/ref_chosen": -58.33738327026367,
"logps/ref_rejected": -78.31776428222656,
"logps/rejected": -281.2880859375,
"loss": 1.14,
"margin_dpo/margin_mean": 55.58483123779297,
"margin_dpo/margin_std": 87.38355255126953,
"step": 565
},
{
"epoch": 0.8556311413454271,
"fcm_dpo/beta": 0.006384224630892277,
"fcm_dpo/delta": 0.05589155852794647,
"fcm_dpo/margin": 54.115760803222656,
"fcm_dpo/q_t": 0.4228624105453491,
"grad_norm": 16.651105880737305,
"learning_rate": 3.1537655732553764e-08,
"logits/chosen": 0.7325712442398071,
"logits/rejected": 0.7150111794471741,
"logps/chosen": -205.9342041015625,
"logps/ref_chosen": -71.22373962402344,
"logps/ref_rejected": -71.11601257324219,
"logps/rejected": -259.9422302246094,
"loss": 1.1896,
"margin_dpo/margin_mean": 54.11576461791992,
"margin_dpo/margin_std": 111.10244750976562,
"step": 566
},
{
"epoch": 0.8571428571428571,
"fcm_dpo/beta": 0.006314173806458712,
"fcm_dpo/delta": -0.026270300149917603,
"fcm_dpo/margin": 67.20326232910156,
"fcm_dpo/q_t": 0.40221065282821655,
"grad_norm": 11.8645601272583,
"learning_rate": 3.089785553471233e-08,
"logits/chosen": 0.7259865999221802,
"logits/rejected": 0.6274834871292114,
"logps/chosen": -183.31649780273438,
"logps/ref_chosen": -52.669273376464844,
"logps/ref_rejected": -74.34785461425781,
"logps/rejected": -272.1983337402344,
"loss": 1.0897,
"margin_dpo/margin_mean": 67.20326232910156,
"margin_dpo/margin_std": 92.31834411621094,
"step": 567
},
{
"epoch": 0.8586545729402872,
"fcm_dpo/beta": 0.006262045819312334,
"fcm_dpo/delta": -0.1232818141579628,
"fcm_dpo/margin": 82.55564880371094,
"fcm_dpo/q_t": 0.3823985457420349,
"grad_norm": 14.214730262756348,
"learning_rate": 3.026418409484513e-08,
"logits/chosen": 0.7861194610595703,
"logits/rejected": 0.6945356130599976,
"logps/chosen": -171.9617919921875,
"logps/ref_chosen": -52.178001403808594,
"logps/ref_rejected": -85.8277587890625,
"logps/rejected": -288.1672058105469,
"loss": 1.0136,
"margin_dpo/margin_mean": 82.55564880371094,
"margin_dpo/margin_std": 90.54248046875,
"step": 568
},
{
"epoch": 0.8601662887377173,
"fcm_dpo/beta": 0.00627292413264513,
"fcm_dpo/delta": 0.13700155913829803,
"fcm_dpo/margin": 42.50354766845703,
"fcm_dpo/q_t": 0.43937602639198303,
"grad_norm": 14.89389419555664,
"learning_rate": 2.963665913810451e-08,
"logits/chosen": 0.6582707166671753,
"logits/rejected": 0.6227110624313354,
"logps/chosen": -201.08621215820312,
"logps/ref_chosen": -62.649261474609375,
"logps/ref_rejected": -75.4298324584961,
"logps/rejected": -256.370361328125,
"loss": 1.2184,
"margin_dpo/margin_mean": 42.503543853759766,
"margin_dpo/margin_std": 89.05029296875,
"step": 569
},
{
"epoch": 0.8616780045351474,
"fcm_dpo/beta": 0.006269059143960476,
"fcm_dpo/delta": -0.12943394482135773,
"fcm_dpo/margin": 83.39543151855469,
"fcm_dpo/q_t": 0.37981927394866943,
"grad_norm": 12.89783000946045,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 0.7100155353546143,
"logits/rejected": 0.6229262948036194,
"logps/chosen": -173.34877014160156,
"logps/ref_chosen": -50.04179382324219,
"logps/ref_rejected": -78.27146911621094,
"logps/rejected": -284.973876953125,
"loss": 1.0119,
"margin_dpo/margin_mean": 83.39543151855469,
"margin_dpo/margin_std": 93.0318603515625,
"step": 570
},
{
"epoch": 0.8631897203325775,
"fcm_dpo/beta": 0.006321952678263187,
"fcm_dpo/delta": 0.10337033122777939,
"fcm_dpo/margin": 47.27796936035156,
"fcm_dpo/q_t": 0.43055155873298645,
"grad_norm": 13.160158157348633,
"learning_rate": 2.840011871446962e-08,
"logits/chosen": 0.73212730884552,
"logits/rejected": 0.6987918019294739,
"logps/chosen": -189.09153747558594,
"logps/ref_chosen": -53.65681457519531,
"logps/ref_rejected": -66.13298034667969,
"logps/rejected": -248.84567260742188,
"loss": 1.2004,
"margin_dpo/margin_mean": 47.27796936035156,
"margin_dpo/margin_std": 95.1507568359375,
"step": 571
},
{
"epoch": 0.8647014361300076,
"fcm_dpo/beta": 0.006384381093084812,
"fcm_dpo/delta": 0.03750025853514671,
"fcm_dpo/margin": 56.937442779541016,
"fcm_dpo/q_t": 0.41591769456863403,
"grad_norm": 13.933595657348633,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": 0.7308815121650696,
"logits/rejected": 0.7777682542800903,
"logps/chosen": -205.65716552734375,
"logps/ref_chosen": -74.81792449951172,
"logps/ref_rejected": -65.88681030273438,
"logps/rejected": -253.66348266601562,
"loss": 1.1253,
"margin_dpo/margin_mean": 56.937442779541016,
"margin_dpo/margin_std": 84.29963684082031,
"step": 572
},
{
"epoch": 0.8662131519274376,
"fcm_dpo/beta": 0.0064005982130765915,
"fcm_dpo/delta": 0.03014349937438965,
"fcm_dpo/margin": 57.9608154296875,
"fcm_dpo/q_t": 0.41660982370376587,
"grad_norm": 14.19927978515625,
"learning_rate": 2.718837261761528e-08,
"logits/chosen": 0.7378900051116943,
"logits/rejected": 0.6873558759689331,
"logps/chosen": -214.39682006835938,
"logps/ref_chosen": -68.72564697265625,
"logps/ref_rejected": -88.16201782226562,
"logps/rejected": -291.79400634765625,
"loss": 1.1599,
"margin_dpo/margin_mean": 57.960819244384766,
"margin_dpo/margin_std": 107.89491271972656,
"step": 573
},
{
"epoch": 0.8677248677248677,
"fcm_dpo/beta": 0.006377051584422588,
"fcm_dpo/delta": -0.07006673514842987,
"fcm_dpo/margin": 73.18067169189453,
"fcm_dpo/q_t": 0.3916003108024597,
"grad_norm": 11.854135513305664,
"learning_rate": 2.659183991914696e-08,
"logits/chosen": 0.803756833076477,
"logits/rejected": 0.7289662957191467,
"logps/chosen": -186.39210510253906,
"logps/ref_chosen": -56.31340026855469,
"logps/ref_rejected": -83.91553497314453,
"logps/rejected": -287.1749267578125,
"loss": 1.0321,
"margin_dpo/margin_mean": 73.18067169189453,
"margin_dpo/margin_std": 77.18203735351562,
"step": 574
},
{
"epoch": 0.8692365835222978,
"fcm_dpo/beta": 0.006343858316540718,
"fcm_dpo/delta": 0.06961024552583694,
"fcm_dpo/margin": 52.35631561279297,
"fcm_dpo/q_t": 0.4264383614063263,
"grad_norm": 14.030909538269043,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": 0.7899061441421509,
"logits/rejected": 0.6997029781341553,
"logps/chosen": -198.152587890625,
"logps/ref_chosen": -64.5841293334961,
"logps/ref_rejected": -93.47034454345703,
"logps/rejected": -279.3951416015625,
"loss": 1.1954,
"margin_dpo/margin_mean": 52.356319427490234,
"margin_dpo/margin_std": 106.40255737304688,
"step": 575
},
{
"epoch": 0.8707482993197279,
"fcm_dpo/beta": 0.006311601027846336,
"fcm_dpo/delta": -0.06749401986598969,
"fcm_dpo/margin": 73.40779113769531,
"fcm_dpo/q_t": 0.394220232963562,
"grad_norm": 13.683633804321289,
"learning_rate": 2.5417538653170754e-08,
"logits/chosen": 0.731153130531311,
"logits/rejected": 0.6163959503173828,
"logps/chosen": -171.4643096923828,
"logps/ref_chosen": -53.28052520751953,
"logps/ref_rejected": -84.2000503540039,
"logps/rejected": -275.7916259765625,
"loss": 1.0627,
"margin_dpo/margin_mean": 73.40778350830078,
"margin_dpo/margin_std": 95.41714477539062,
"step": 576
},
{
"epoch": 0.872260015117158,
"fcm_dpo/beta": 0.006382349878549576,
"fcm_dpo/delta": 0.06438060849905014,
"fcm_dpo/margin": 52.93243408203125,
"fcm_dpo/q_t": 0.42242977023124695,
"grad_norm": 14.356147766113281,
"learning_rate": 2.4839802933393607e-08,
"logits/chosen": 0.7241270542144775,
"logits/rejected": 0.7086690664291382,
"logps/chosen": -194.91119384765625,
"logps/ref_chosen": -62.32468795776367,
"logps/ref_rejected": -67.300537109375,
"logps/rejected": -252.81947326660156,
"loss": 1.1692,
"margin_dpo/margin_mean": 52.93243408203125,
"margin_dpo/margin_std": 96.21859741210938,
"step": 577
},
{
"epoch": 0.873771730914588,
"fcm_dpo/beta": 0.0064790756441652775,
"fcm_dpo/delta": 0.08017978072166443,
"fcm_dpo/margin": 49.77034378051758,
"fcm_dpo/q_t": 0.4279744327068329,
"grad_norm": 16.847196578979492,
"learning_rate": 2.4268365428344733e-08,
"logits/chosen": 0.8302306532859802,
"logits/rejected": 0.8066465258598328,
"logps/chosen": -183.35203552246094,
"logps/ref_chosen": -56.65557861328125,
"logps/ref_rejected": -68.21835327148438,
"logps/rejected": -244.68516540527344,
"loss": 1.1898,
"margin_dpo/margin_mean": 49.77034378051758,
"margin_dpo/margin_std": 100.29454040527344,
"step": 578
},
{
"epoch": 0.8752834467120182,
"fcm_dpo/beta": 0.00643126480281353,
"fcm_dpo/delta": -0.07598722726106644,
"fcm_dpo/margin": 73.43510437011719,
"fcm_dpo/q_t": 0.39019495248794556,
"grad_norm": 13.77873706817627,
"learning_rate": 2.3703242122359357e-08,
"logits/chosen": 0.685715913772583,
"logits/rejected": 0.6591500043869019,
"logps/chosen": -189.93231201171875,
"logps/ref_chosen": -56.809661865234375,
"logps/ref_rejected": -68.09613037109375,
"logps/rejected": -274.65386962890625,
"loss": 1.0395,
"margin_dpo/margin_mean": 73.43510437011719,
"margin_dpo/margin_std": 85.38418579101562,
"step": 579
},
{
"epoch": 0.8767951625094482,
"fcm_dpo/beta": 0.0064583588391542435,
"fcm_dpo/delta": 0.020921528339385986,
"fcm_dpo/margin": 58.764244079589844,
"fcm_dpo/q_t": 0.41470351815223694,
"grad_norm": 13.418577194213867,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 0.7243668437004089,
"logits/rejected": 0.6619343161582947,
"logps/chosen": -190.41073608398438,
"logps/ref_chosen": -57.70011520385742,
"logps/ref_rejected": -77.90664672851562,
"logps/rejected": -269.3815002441406,
"loss": 1.1513,
"margin_dpo/margin_mean": 58.764244079589844,
"margin_dpo/margin_std": 104.921630859375,
"step": 580
},
{
"epoch": 0.8783068783068783,
"fcm_dpo/beta": 0.006431188900023699,
"fcm_dpo/delta": 0.012560145929455757,
"fcm_dpo/margin": 60.2906494140625,
"fcm_dpo/q_t": 0.4119495153427124,
"grad_norm": 14.395081520080566,
"learning_rate": 2.259200116137039e-08,
"logits/chosen": 0.7669543623924255,
"logits/rejected": 0.6934635639190674,
"logps/chosen": -203.38047790527344,
"logps/ref_chosen": -59.332359313964844,
"logps/ref_rejected": -83.64482116699219,
"logps/rejected": -287.98358154296875,
"loss": 1.1236,
"margin_dpo/margin_mean": 60.2906494140625,
"margin_dpo/margin_std": 94.43912506103516,
"step": 581
},
{
"epoch": 0.8798185941043084,
"fcm_dpo/beta": 0.0064775762148201466,
"fcm_dpo/delta": 0.008546445518732071,
"fcm_dpo/margin": 60.47076416015625,
"fcm_dpo/q_t": 0.41076183319091797,
"grad_norm": 11.891162872314453,
"learning_rate": 2.204591459016525e-08,
"logits/chosen": 0.7505415678024292,
"logits/rejected": 0.7810231447219849,
"logps/chosen": -193.03822326660156,
"logps/ref_chosen": -64.16285705566406,
"logps/ref_rejected": -58.632896423339844,
"logps/rejected": -247.97903442382812,
"loss": 1.1218,
"margin_dpo/margin_mean": 60.47076416015625,
"margin_dpo/margin_std": 94.79828643798828,
"step": 582
},
{
"epoch": 0.8813303099017384,
"fcm_dpo/beta": 0.006431900896131992,
"fcm_dpo/delta": -0.03729263320565224,
"fcm_dpo/margin": 67.7349853515625,
"fcm_dpo/q_t": 0.40107226371765137,
"grad_norm": 15.881672859191895,
"learning_rate": 2.1506204384751064e-08,
"logits/chosen": 0.862554132938385,
"logits/rejected": 0.7417807579040527,
"logps/chosen": -182.47802734375,
"logps/ref_chosen": -51.87239456176758,
"logps/ref_rejected": -83.86331176757812,
"logps/rejected": -282.20391845703125,
"loss": 1.1078,
"margin_dpo/margin_mean": 67.7349853515625,
"margin_dpo/margin_std": 106.33586120605469,
"step": 583
},
{
"epoch": 0.8828420256991686,
"fcm_dpo/beta": 0.006398425903171301,
"fcm_dpo/delta": 0.0037146955728530884,
"fcm_dpo/margin": 61.912025451660156,
"fcm_dpo/q_t": 0.41191157698631287,
"grad_norm": 12.762717247009277,
"learning_rate": 2.09728856419826e-08,
"logits/chosen": 0.8829727172851562,
"logits/rejected": 0.7666534185409546,
"logps/chosen": -165.6560516357422,
"logps/ref_chosen": -46.571388244628906,
"logps/ref_rejected": -80.67969512939453,
"logps/rejected": -261.6763916015625,
"loss": 1.144,
"margin_dpo/margin_mean": 61.912025451660156,
"margin_dpo/margin_std": 109.02485656738281,
"step": 584
},
{
"epoch": 0.8843537414965986,
"fcm_dpo/beta": 0.006562906317412853,
"fcm_dpo/delta": 0.11703015118837357,
"fcm_dpo/margin": 43.517791748046875,
"fcm_dpo/q_t": 0.4340656101703644,
"grad_norm": 13.247313499450684,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": 0.701311469078064,
"logits/rejected": 0.6506215333938599,
"logps/chosen": -196.23910522460938,
"logps/ref_chosen": -58.124534606933594,
"logps/ref_rejected": -79.00538635253906,
"logps/rejected": -260.63775634765625,
"loss": 1.2099,
"margin_dpo/margin_mean": 43.517784118652344,
"margin_dpo/margin_std": 92.49024963378906,
"step": 585
},
{
"epoch": 0.8858654572940288,
"fcm_dpo/beta": 0.006531290709972382,
"fcm_dpo/delta": -0.0264582596719265,
"fcm_dpo/margin": 65.08265686035156,
"fcm_dpo/q_t": 0.4013257324695587,
"grad_norm": 18.042287826538086,
"learning_rate": 1.9925482037469187e-08,
"logits/chosen": 0.781760573387146,
"logits/rejected": 0.7310307621955872,
"logps/chosen": -183.73556518554688,
"logps/ref_chosen": -54.10163879394531,
"logps/ref_rejected": -63.72113037109375,
"logps/rejected": -258.4377136230469,
"loss": 1.0613,
"margin_dpo/margin_mean": 65.08265686035156,
"margin_dpo/margin_std": 74.19046783447266,
"step": 586
},
{
"epoch": 0.8873771730914588,
"fcm_dpo/beta": 0.006523734889924526,
"fcm_dpo/delta": -0.01788686215877533,
"fcm_dpo/margin": 63.94139099121094,
"fcm_dpo/q_t": 0.403054416179657,
"grad_norm": 14.469686508178711,
"learning_rate": 1.9411426473854687e-08,
"logits/chosen": 0.7586396932601929,
"logits/rejected": 0.7525993585586548,
"logps/chosen": -195.1334686279297,
"logps/ref_chosen": -63.41719436645508,
"logps/ref_rejected": -63.47003936767578,
"logps/rejected": -259.127685546875,
"loss": 1.1416,
"margin_dpo/margin_mean": 63.9413948059082,
"margin_dpo/margin_std": 115.5007553100586,
"step": 587
},
{
"epoch": 0.8888888888888888,
"fcm_dpo/beta": 0.00644359365105629,
"fcm_dpo/delta": -0.06252604722976685,
"fcm_dpo/margin": 71.29679870605469,
"fcm_dpo/q_t": 0.39673376083374023,
"grad_norm": 15.000171661376953,
"learning_rate": 1.890382096832699e-08,
"logits/chosen": 0.7840080261230469,
"logits/rejected": 0.7370002269744873,
"logps/chosen": -195.41769409179688,
"logps/ref_chosen": -62.20103454589844,
"logps/ref_rejected": -82.10249328613281,
"logps/rejected": -286.615966796875,
"loss": 1.0822,
"margin_dpo/margin_mean": 71.29679870605469,
"margin_dpo/margin_std": 104.02099609375,
"step": 588
},
{
"epoch": 0.890400604686319,
"fcm_dpo/beta": 0.006399224046617746,
"fcm_dpo/delta": -0.047763481736183167,
"fcm_dpo/margin": 69.64301300048828,
"fcm_dpo/q_t": 0.3962135314941406,
"grad_norm": 11.454410552978516,
"learning_rate": 1.840267971970344e-08,
"logits/chosen": 0.7322001457214355,
"logits/rejected": 0.7018231153488159,
"logps/chosen": -183.03843688964844,
"logps/ref_chosen": -56.71361541748047,
"logps/ref_rejected": -76.7366943359375,
"logps/rejected": -272.70452880859375,
"loss": 1.0478,
"margin_dpo/margin_mean": 69.64301300048828,
"margin_dpo/margin_std": 77.81524658203125,
"step": 589
},
{
"epoch": 0.891912320483749,
"fcm_dpo/beta": 0.006368682254105806,
"fcm_dpo/delta": -0.02491743117570877,
"fcm_dpo/margin": 66.54501342773438,
"fcm_dpo/q_t": 0.4025854766368866,
"grad_norm": 14.082845687866211,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 0.6987200975418091,
"logits/rejected": 0.6662635803222656,
"logps/chosen": -207.60150146484375,
"logps/ref_chosen": -66.5138168334961,
"logps/ref_rejected": -85.70820617675781,
"logps/rejected": -293.34088134765625,
"loss": 1.0858,
"margin_dpo/margin_mean": 66.54501342773438,
"margin_dpo/margin_std": 90.5670166015625,
"step": 590
},
{
"epoch": 0.8934240362811792,
"fcm_dpo/beta": 0.0061980183236300945,
"fcm_dpo/delta": -0.10651122033596039,
"fcm_dpo/margin": 80.5565185546875,
"fcm_dpo/q_t": 0.38733240962028503,
"grad_norm": 15.332803726196289,
"learning_rate": 1.7419845883949098e-08,
"logits/chosen": 0.8256734609603882,
"logits/rejected": 0.7557948231697083,
"logps/chosen": -180.6138916015625,
"logps/ref_chosen": -60.697181701660156,
"logps/ref_rejected": -86.12278747558594,
"logps/rejected": -286.59600830078125,
"loss": 1.0646,
"margin_dpo/margin_mean": 80.5565185546875,
"margin_dpo/margin_std": 112.17295837402344,
"step": 591
},
{
"epoch": 0.8949357520786092,
"fcm_dpo/beta": 0.006209563929587603,
"fcm_dpo/delta": 0.030359894037246704,
"fcm_dpo/margin": 59.65741729736328,
"fcm_dpo/q_t": 0.41657984256744385,
"grad_norm": 15.251811027526855,
"learning_rate": 1.6938180788793556e-08,
"logits/chosen": 0.7907428741455078,
"logits/rejected": 0.6714023947715759,
"logps/chosen": -182.21139526367188,
"logps/ref_chosen": -51.237327575683594,
"logps/ref_rejected": -81.60242462158203,
"logps/rejected": -272.23388671875,
"loss": 1.1292,
"margin_dpo/margin_mean": 59.65741729736328,
"margin_dpo/margin_std": 91.72056579589844,
"step": 592
},
{
"epoch": 0.8964474678760394,
"fcm_dpo/beta": 0.00623913761228323,
"fcm_dpo/delta": 0.0039339009672403336,
"fcm_dpo/margin": 63.491966247558594,
"fcm_dpo/q_t": 0.40932101011276245,
"grad_norm": 15.088692665100098,
"learning_rate": 1.6463034933723336e-08,
"logits/chosen": 0.7265362739562988,
"logits/rejected": 0.6285638809204102,
"logps/chosen": -152.95541381835938,
"logps/ref_chosen": -42.08000183105469,
"logps/ref_rejected": -68.47499084472656,
"logps/rejected": -242.8423614501953,
"loss": 1.1179,
"margin_dpo/margin_mean": 63.49196243286133,
"margin_dpo/margin_std": 98.1021728515625,
"step": 593
},
{
"epoch": 0.8979591836734694,
"fcm_dpo/beta": 0.006280633620917797,
"fcm_dpo/delta": 0.03118686005473137,
"fcm_dpo/margin": 58.9074592590332,
"fcm_dpo/q_t": 0.4138728380203247,
"grad_norm": 13.728915214538574,
"learning_rate": 1.5994421609589385e-08,
"logits/chosen": 0.6779206991195679,
"logits/rejected": 0.6631582975387573,
"logps/chosen": -202.61465454101562,
"logps/ref_chosen": -63.658668518066406,
"logps/ref_rejected": -70.35597229003906,
"logps/rejected": -268.21942138671875,
"loss": 1.1204,
"margin_dpo/margin_mean": 58.9074592590332,
"margin_dpo/margin_std": 86.3900146484375,
"step": 594
},
{
"epoch": 0.8994708994708994,
"fcm_dpo/beta": 0.006229420658200979,
"fcm_dpo/delta": -0.08674081414937973,
"fcm_dpo/margin": 77.4767837524414,
"fcm_dpo/q_t": 0.3901749551296234,
"grad_norm": 11.558106422424316,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": 0.8139692544937134,
"logits/rejected": 0.7172808647155762,
"logps/chosen": -181.95928955078125,
"logps/ref_chosen": -56.21875762939453,
"logps/ref_rejected": -83.95773315429688,
"logps/rejected": -287.175048828125,
"loss": 1.0663,
"margin_dpo/margin_mean": 77.4767837524414,
"margin_dpo/margin_std": 107.94747924804688,
"step": 595
},
{
"epoch": 0.9009826152683296,
"fcm_dpo/beta": 0.006332032848149538,
"fcm_dpo/delta": 0.17451868951320648,
"fcm_dpo/margin": 36.29243850708008,
"fcm_dpo/q_t": 0.44889748096466064,
"grad_norm": 12.626412391662598,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": 0.6528673768043518,
"logits/rejected": 0.6745901107788086,
"logps/chosen": -213.66482543945312,
"logps/ref_chosen": -68.48088073730469,
"logps/ref_rejected": -61.732967376708984,
"logps/rejected": -243.2093505859375,
"loss": 1.2489,
"margin_dpo/margin_mean": 36.292442321777344,
"margin_dpo/margin_std": 90.35986328125,
"step": 596
},
{
"epoch": 0.9024943310657596,
"fcm_dpo/beta": 0.006430739536881447,
"fcm_dpo/delta": 0.020158810541033745,
"fcm_dpo/margin": 59.17926025390625,
"fcm_dpo/q_t": 0.41390174627304077,
"grad_norm": 11.819912910461426,
"learning_rate": 1.4627906988186111e-08,
"logits/chosen": 0.7465409636497498,
"logits/rejected": 0.7289406657218933,
"logps/chosen": -163.42794799804688,
"logps/ref_chosen": -48.85750961303711,
"logps/ref_rejected": -55.068084716796875,
"logps/rejected": -228.81777954101562,
"loss": 1.1278,
"margin_dpo/margin_mean": 59.17926025390625,
"margin_dpo/margin_std": 94.12771606445312,
"step": 597
},
{
"epoch": 0.9040060468631897,
"fcm_dpo/beta": 0.00660196878015995,
"fcm_dpo/delta": 0.15759900212287903,
"fcm_dpo/margin": 37.216529846191406,
"fcm_dpo/q_t": 0.44498932361602783,
"grad_norm": 14.393204689025879,
"learning_rate": 1.4185553036259095e-08,
"logits/chosen": 0.7748836874961853,
"logits/rejected": 0.688491702079773,
"logps/chosen": -207.56390380859375,
"logps/ref_chosen": -58.88715362548828,
"logps/ref_rejected": -81.43145751953125,
"logps/rejected": -267.32476806640625,
"loss": 1.2458,
"margin_dpo/margin_mean": 37.216529846191406,
"margin_dpo/margin_std": 93.11833190917969,
"step": 598
},
{
"epoch": 0.9055177626606198,
"fcm_dpo/beta": 0.006748649291694164,
"fcm_dpo/delta": 0.09477294981479645,
"fcm_dpo/margin": 45.604652404785156,
"fcm_dpo/q_t": 0.4309813380241394,
"grad_norm": 16.184436798095703,
"learning_rate": 1.3749795321332885e-08,
"logits/chosen": 0.807050883769989,
"logits/rejected": 0.7597008943557739,
"logps/chosen": -207.95523071289062,
"logps/ref_chosen": -57.60719299316406,
"logps/ref_rejected": -71.80469512939453,
"logps/rejected": -267.75738525390625,
"loss": 1.1988,
"margin_dpo/margin_mean": 45.604652404785156,
"margin_dpo/margin_std": 94.80332946777344,
"step": 599
},
{
"epoch": 0.9070294784580499,
"fcm_dpo/beta": 0.006845717318356037,
"fcm_dpo/delta": 0.03495318070054054,
"fcm_dpo/margin": 53.3426628112793,
"fcm_dpo/q_t": 0.41796183586120605,
"grad_norm": 15.72593879699707,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 0.8194034099578857,
"logits/rejected": 0.7605953216552734,
"logps/chosen": -195.68597412109375,
"logps/ref_chosen": -58.44231414794922,
"logps/ref_rejected": -83.64639282226562,
"logps/rejected": -274.23272705078125,
"loss": 1.1521,
"margin_dpo/margin_mean": 53.34266662597656,
"margin_dpo/margin_std": 92.48655700683594,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_fcm_dpo/beta": 0.006810956634581089,
"eval_logits/chosen": 0.7065654397010803,
"eval_logits/rejected": 0.6591749787330627,
"eval_logps/chosen": -207.26998901367188,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -269.10394287109375,
"eval_loss": 0.570812463760376,
"eval_margin_dpo/margin_mean": 57.144439697265625,
"eval_margin_dpo/margin_std": 97.93953704833984,
"eval_runtime": 38.0483,
"eval_samples_per_second": 60.528,
"eval_steps_per_second": 1.892,
"step": 600
},
{
"epoch": 0.90854119425548,
"fcm_dpo/beta": 0.006693072617053986,
"fcm_dpo/delta": -0.09248337894678116,
"fcm_dpo/margin": 72.8189697265625,
"fcm_dpo/q_t": 0.39083534479141235,
"grad_norm": 12.468385696411133,
"learning_rate": 1.2898117173950868e-08,
"logits/chosen": 0.7074885368347168,
"logits/rejected": 0.6287850141525269,
"logps/chosen": -176.33799743652344,
"logps/ref_chosen": -55.59432601928711,
"logps/ref_rejected": -83.68630981445312,
"logps/rejected": -277.24896240234375,
"loss": 1.0715,
"margin_dpo/margin_mean": 72.8189697265625,
"margin_dpo/margin_std": 104.85636138916016,
"step": 601
},
{
"epoch": 0.91005291005291,
"fcm_dpo/beta": 0.0066335154697299,
"fcm_dpo/delta": -0.0724048912525177,
"fcm_dpo/margin": 70.70862579345703,
"fcm_dpo/q_t": 0.3926694989204407,
"grad_norm": 15.210699081420898,
"learning_rate": 1.2482220564763667e-08,
"logits/chosen": 0.7195205688476562,
"logits/rejected": 0.6898149251937866,
"logps/chosen": -165.505615234375,
"logps/ref_chosen": -56.349185943603516,
"logps/ref_rejected": -71.9959716796875,
"logps/rejected": -251.86102294921875,
"loss": 1.054,
"margin_dpo/margin_mean": 70.70862579345703,
"margin_dpo/margin_std": 89.39802551269531,
"step": 602
},
{
"epoch": 0.9115646258503401,
"fcm_dpo/beta": 0.006544841453433037,
"fcm_dpo/delta": -0.0342128649353981,
"fcm_dpo/margin": 66.09971618652344,
"fcm_dpo/q_t": 0.40155255794525146,
"grad_norm": 15.755361557006836,
"learning_rate": 1.2072967838448051e-08,
"logits/chosen": 0.7011324167251587,
"logits/rejected": 0.6412418484687805,
"logps/chosen": -176.6502685546875,
"logps/ref_chosen": -53.16838836669922,
"logps/ref_rejected": -73.8604736328125,
"logps/rejected": -263.44207763671875,
"loss": 1.0945,
"margin_dpo/margin_mean": 66.09971618652344,
"margin_dpo/margin_std": 96.66177368164062,
"step": 603
},
{
"epoch": 0.9130763416477702,
"fcm_dpo/beta": 0.006565750576555729,
"fcm_dpo/delta": 0.011286220513284206,
"fcm_dpo/margin": 59.26122283935547,
"fcm_dpo/q_t": 0.4123130440711975,
"grad_norm": 14.889829635620117,
"learning_rate": 1.1670370442682459e-08,
"logits/chosen": 0.6894519329071045,
"logits/rejected": 0.6947340965270996,
"logps/chosen": -192.02676391601562,
"logps/ref_chosen": -72.64942169189453,
"logps/ref_rejected": -69.8792724609375,
"logps/rejected": -248.51783752441406,
"loss": 1.1337,
"margin_dpo/margin_mean": 59.26122283935547,
"margin_dpo/margin_std": 99.123291015625,
"step": 604
},
{
"epoch": 0.9145880574452003,
"fcm_dpo/beta": 0.006560338661074638,
"fcm_dpo/delta": 0.01623372733592987,
"fcm_dpo/margin": 58.58209228515625,
"fcm_dpo/q_t": 0.4119049906730652,
"grad_norm": 14.635528564453125,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": 0.7958291172981262,
"logits/rejected": 0.7365133166313171,
"logps/chosen": -199.8400421142578,
"logps/ref_chosen": -61.61284637451172,
"logps/ref_rejected": -79.34398651123047,
"logps/rejected": -276.1532897949219,
"loss": 1.1393,
"margin_dpo/margin_mean": 58.58209228515625,
"margin_dpo/margin_std": 99.20114135742188,
"step": 605
},
{
"epoch": 0.9160997732426304,
"fcm_dpo/beta": 0.0065470244735479355,
"fcm_dpo/delta": -0.061599597334861755,
"fcm_dpo/margin": 70.06275939941406,
"fcm_dpo/q_t": 0.3971368670463562,
"grad_norm": 16.981082916259766,
"learning_rate": 1.0885186502381016e-08,
"logits/chosen": 0.7151072025299072,
"logits/rejected": 0.6450438499450684,
"logps/chosen": -175.30453491210938,
"logps/ref_chosen": -54.46424102783203,
"logps/ref_rejected": -79.62708282470703,
"logps/rejected": -270.5301513671875,
"loss": 1.0781,
"margin_dpo/margin_mean": 70.06275939941406,
"margin_dpo/margin_std": 98.99264526367188,
"step": 606
},
{
"epoch": 0.9176114890400605,
"fcm_dpo/beta": 0.0064436523243784904,
"fcm_dpo/delta": -0.0057749077677726746,
"fcm_dpo/margin": 62.736549377441406,
"fcm_dpo/q_t": 0.4076859951019287,
"grad_norm": 14.168997764587402,
"learning_rate": 1.0502621921127774e-08,
"logits/chosen": 0.725983202457428,
"logits/rejected": 0.6987679600715637,
"logps/chosen": -198.48764038085938,
"logps/ref_chosen": -62.86086654663086,
"logps/ref_rejected": -72.5501937866211,
"logps/rejected": -270.91351318359375,
"loss": 1.1196,
"margin_dpo/margin_mean": 62.736549377441406,
"margin_dpo/margin_std": 96.45252990722656,
"step": 607
},
{
"epoch": 0.9191232048374905,
"fcm_dpo/beta": 0.006499715615063906,
"fcm_dpo/delta": 0.00026232190430164337,
"fcm_dpo/margin": 61.499488830566406,
"fcm_dpo/q_t": 0.408882200717926,
"grad_norm": 14.22420883178711,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": 0.7125911116600037,
"logits/rejected": 0.6294840574264526,
"logps/chosen": -200.25701904296875,
"logps/ref_chosen": -63.18071746826172,
"logps/ref_rejected": -99.15888214111328,
"logps/rejected": -297.73468017578125,
"loss": 1.1046,
"margin_dpo/margin_mean": 61.499488830566406,
"margin_dpo/margin_std": 88.79376220703125,
"step": 608
},
{
"epoch": 0.9206349206349206,
"fcm_dpo/beta": 0.006406780332326889,
"fcm_dpo/delta": -0.0627242773771286,
"fcm_dpo/margin": 71.68486785888672,
"fcm_dpo/q_t": 0.3930833339691162,
"grad_norm": 12.658600807189941,
"learning_rate": 9.757601041885694e-09,
"logits/chosen": 0.822446346282959,
"logits/rejected": 0.7840192914009094,
"logps/chosen": -170.25494384765625,
"logps/ref_chosen": -48.62322235107422,
"logps/ref_rejected": -68.28271484375,
"logps/rejected": -261.59930419921875,
"loss": 1.0534,
"margin_dpo/margin_mean": 71.68486785888672,
"margin_dpo/margin_std": 85.14846801757812,
"step": 609
},
{
"epoch": 0.9221466364323507,
"fcm_dpo/beta": 0.006349500268697739,
"fcm_dpo/delta": -0.041650526225566864,
"fcm_dpo/margin": 69.20389556884766,
"fcm_dpo/q_t": 0.4011300802230835,
"grad_norm": 14.117827415466309,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 0.7303283214569092,
"logits/rejected": 0.7262221574783325,
"logps/chosen": -203.7093505859375,
"logps/ref_chosen": -72.66513061523438,
"logps/ref_rejected": -87.15310668945312,
"logps/rejected": -287.4012145996094,
"loss": 1.086,
"margin_dpo/margin_mean": 69.20388793945312,
"margin_dpo/margin_std": 98.40621185302734,
"step": 610
},
{
"epoch": 0.9236583522297808,
"fcm_dpo/beta": 0.006394756026566029,
"fcm_dpo/delta": 0.05369244143366814,
"fcm_dpo/margin": 54.4486083984375,
"fcm_dpo/q_t": 0.4193510413169861,
"grad_norm": 14.468083381652832,
"learning_rate": 9.03946036001449e-09,
"logits/chosen": 0.7536121606826782,
"logits/rejected": 0.7031623125076294,
"logps/chosen": -171.63223266601562,
"logps/ref_chosen": -48.30857849121094,
"logps/ref_rejected": -70.6141128540039,
"logps/rejected": -248.38636779785156,
"loss": 1.1383,
"margin_dpo/margin_mean": 54.448604583740234,
"margin_dpo/margin_std": 84.60821533203125,
"step": 611
},
{
"epoch": 0.9251700680272109,
"fcm_dpo/beta": 0.006336958147585392,
"fcm_dpo/delta": -0.09963831305503845,
"fcm_dpo/margin": 78.07444763183594,
"fcm_dpo/q_t": 0.38563889265060425,
"grad_norm": 12.297904014587402,
"learning_rate": 8.690495320571839e-09,
"logits/chosen": 0.6890215277671814,
"logits/rejected": 0.6157269477844238,
"logps/chosen": -194.6959686279297,
"logps/ref_chosen": -61.23155975341797,
"logps/ref_rejected": -94.37979888916016,
"logps/rejected": -305.91864013671875,
"loss": 1.0354,
"margin_dpo/margin_mean": 78.07444763183594,
"margin_dpo/margin_std": 94.86837768554688,
"step": 612
},
{
"epoch": 0.926681783824641,
"fcm_dpo/beta": 0.006252289284020662,
"fcm_dpo/delta": -0.07290597259998322,
"fcm_dpo/margin": 75.09466552734375,
"fcm_dpo/q_t": 0.39151662588119507,
"grad_norm": 11.459220886230469,
"learning_rate": 8.348280226706722e-09,
"logits/chosen": 0.6596091985702515,
"logits/rejected": 0.6567984819412231,
"logps/chosen": -167.78414916992188,
"logps/ref_chosen": -53.98310852050781,
"logps/ref_rejected": -58.32208251953125,
"logps/rejected": -247.21780395507812,
"loss": 1.0501,
"margin_dpo/margin_mean": 75.09466552734375,
"margin_dpo/margin_std": 92.46086120605469,
"step": 613
},
{
"epoch": 0.9281934996220711,
"fcm_dpo/beta": 0.006214224733412266,
"fcm_dpo/delta": -0.024648673832416534,
"fcm_dpo/margin": 68.12400817871094,
"fcm_dpo/q_t": 0.4012283682823181,
"grad_norm": 14.5516939163208,
"learning_rate": 8.012824650910937e-09,
"logits/chosen": 0.7856525182723999,
"logits/rejected": 0.7776677012443542,
"logps/chosen": -194.23440551757812,
"logps/ref_chosen": -60.24303436279297,
"logps/ref_rejected": -72.26258850097656,
"logps/rejected": -274.3779602050781,
"loss": 1.0817,
"margin_dpo/margin_mean": 68.12400817871094,
"margin_dpo/margin_std": 88.94013977050781,
"step": 614
},
{
"epoch": 0.9297052154195011,
"fcm_dpo/beta": 0.006137081887573004,
"fcm_dpo/delta": -0.009469401091337204,
"fcm_dpo/margin": 66.5538558959961,
"fcm_dpo/q_t": 0.4069540798664093,
"grad_norm": 13.770292282104492,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": 0.6927610039710999,
"logits/rejected": 0.6372318863868713,
"logps/chosen": -212.01260375976562,
"logps/ref_chosen": -72.09467315673828,
"logps/ref_rejected": -104.02980041503906,
"logps/rejected": -310.5015869140625,
"loss": 1.1241,
"margin_dpo/margin_mean": 66.5538558959961,
"margin_dpo/margin_std": 107.617431640625,
"step": 615
},
{
"epoch": 0.9312169312169312,
"fcm_dpo/beta": 0.0061771986074745655,
"fcm_dpo/delta": 0.023163840174674988,
"fcm_dpo/margin": 61.14124298095703,
"fcm_dpo/q_t": 0.41386234760284424,
"grad_norm": 11.939630508422852,
"learning_rate": 7.36222939784098e-09,
"logits/chosen": 0.789786696434021,
"logits/rejected": 0.7031727433204651,
"logps/chosen": -190.14306640625,
"logps/ref_chosen": -58.530723571777344,
"logps/ref_rejected": -75.48025512695312,
"logps/rejected": -268.23382568359375,
"loss": 1.1217,
"margin_dpo/margin_mean": 61.14124298095703,
"margin_dpo/margin_std": 92.75981140136719,
"step": 616
},
{
"epoch": 0.9327286470143613,
"fcm_dpo/beta": 0.006244382821023464,
"fcm_dpo/delta": 0.1002284437417984,
"fcm_dpo/margin": 48.437278747558594,
"fcm_dpo/q_t": 0.4307482838630676,
"grad_norm": 15.972477912902832,
"learning_rate": 7.047107919114586e-09,
"logits/chosen": 0.7590749263763428,
"logits/rejected": 0.7044551372528076,
"logps/chosen": -203.44656372070312,
"logps/ref_chosen": -57.608673095703125,
"logps/ref_rejected": -81.22109985351562,
"logps/rejected": -275.49627685546875,
"loss": 1.1846,
"margin_dpo/margin_mean": 48.437278747558594,
"margin_dpo/margin_std": 87.9116439819336,
"step": 617
},
{
"epoch": 0.9342403628117913,
"fcm_dpo/beta": 0.006315155886113644,
"fcm_dpo/delta": 0.010729154571890831,
"fcm_dpo/margin": 61.689369201660156,
"fcm_dpo/q_t": 0.4114975333213806,
"grad_norm": 15.93805980682373,
"learning_rate": 6.738782355044048e-09,
"logits/chosen": 0.7244783639907837,
"logits/rejected": 0.6130063533782959,
"logps/chosen": -183.0014190673828,
"logps/ref_chosen": -56.69594192504883,
"logps/ref_rejected": -85.92362976074219,
"logps/rejected": -273.9184875488281,
"loss": 1.1092,
"margin_dpo/margin_mean": 61.689361572265625,
"margin_dpo/margin_std": 89.48055267333984,
"step": 618
},
{
"epoch": 0.9357520786092215,
"fcm_dpo/beta": 0.006306151859462261,
"fcm_dpo/delta": -0.02410227060317993,
"fcm_dpo/margin": 67.08335876464844,
"fcm_dpo/q_t": 0.4025493562221527,
"grad_norm": 13.588555335998535,
"learning_rate": 6.437261330158206e-09,
"logits/chosen": 0.8057536482810974,
"logits/rejected": 0.7263665795326233,
"logps/chosen": -178.52919006347656,
"logps/ref_chosen": -54.05841827392578,
"logps/ref_rejected": -83.55493927001953,
"logps/rejected": -275.10906982421875,
"loss": 1.0933,
"margin_dpo/margin_mean": 67.08335876464844,
"margin_dpo/margin_std": 96.20336151123047,
"step": 619
},
{
"epoch": 0.9372637944066515,
"fcm_dpo/beta": 0.006400998681783676,
"fcm_dpo/delta": 0.03517021983861923,
"fcm_dpo/margin": 56.845909118652344,
"fcm_dpo/q_t": 0.4166967272758484,
"grad_norm": 14.824642181396484,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 0.7471519708633423,
"logits/rejected": 0.7479252815246582,
"logps/chosen": -186.22177124023438,
"logps/ref_chosen": -63.36971664428711,
"logps/ref_rejected": -65.68269348144531,
"logps/rejected": -245.38064575195312,
"loss": 1.1397,
"margin_dpo/margin_mean": 56.845909118652344,
"margin_dpo/margin_std": 87.86839294433594,
"step": 620
},
{
"epoch": 0.9387755102040817,
"fcm_dpo/beta": 0.006424080580472946,
"fcm_dpo/delta": 0.06066777557134628,
"fcm_dpo/margin": 53.09856033325195,
"fcm_dpo/q_t": 0.4227798581123352,
"grad_norm": 16.096923828125,
"learning_rate": 5.854666444131934e-09,
"logits/chosen": 0.7947447896003723,
"logits/rejected": 0.6759539842605591,
"logps/chosen": -179.39161682128906,
"logps/ref_chosen": -52.321224212646484,
"logps/ref_rejected": -88.09001159667969,
"logps/rejected": -268.25897216796875,
"loss": 1.1665,
"margin_dpo/margin_mean": 53.09856033325195,
"margin_dpo/margin_std": 97.253173828125,
"step": 621
},
{
"epoch": 0.9402872260015117,
"fcm_dpo/beta": 0.006452606059610844,
"fcm_dpo/delta": 0.012101054191589355,
"fcm_dpo/margin": 60.169471740722656,
"fcm_dpo/q_t": 0.41136711835861206,
"grad_norm": 15.709192276000977,
"learning_rate": 5.573608879422875e-09,
"logits/chosen": 0.6908876299858093,
"logits/rejected": 0.6467409729957581,
"logps/chosen": -197.26239013671875,
"logps/ref_chosen": -59.86545944213867,
"logps/ref_rejected": -81.86668395996094,
"logps/rejected": -279.43310546875,
"loss": 1.1211,
"margin_dpo/margin_mean": 60.169471740722656,
"margin_dpo/margin_std": 93.21742248535156,
"step": 622
},
{
"epoch": 0.9417989417989417,
"fcm_dpo/beta": 0.006413621827960014,
"fcm_dpo/delta": -0.007789114490151405,
"fcm_dpo/margin": 63.49436950683594,
"fcm_dpo/q_t": 0.4069734215736389,
"grad_norm": 13.204484939575195,
"learning_rate": 5.299388446305342e-09,
"logits/chosen": 0.7018610239028931,
"logits/rejected": 0.6416829228401184,
"logps/chosen": -213.017822265625,
"logps/ref_chosen": -67.36846160888672,
"logps/ref_rejected": -82.02733612060547,
"logps/rejected": -291.17108154296875,
"loss": 1.1068,
"margin_dpo/margin_mean": 63.49437713623047,
"margin_dpo/margin_std": 94.36400604248047,
"step": 623
},
{
"epoch": 0.9433106575963719,
"fcm_dpo/beta": 0.006365837063640356,
"fcm_dpo/delta": -0.053610917180776596,
"fcm_dpo/margin": 70.83954620361328,
"fcm_dpo/q_t": 0.3987389802932739,
"grad_norm": 15.614096641540527,
"learning_rate": 5.03201281531429e-09,
"logits/chosen": 0.7449072599411011,
"logits/rejected": 0.6436171531677246,
"logps/chosen": -173.16964721679688,
"logps/ref_chosen": -51.02655029296875,
"logps/ref_rejected": -76.49203491210938,
"logps/rejected": -269.47467041015625,
"loss": 1.0817,
"margin_dpo/margin_mean": 70.83954620361328,
"margin_dpo/margin_std": 100.73272705078125,
"step": 624
},
{
"epoch": 0.9448223733938019,
"fcm_dpo/beta": 0.0064194174483418465,
"fcm_dpo/delta": 0.058818817138671875,
"fcm_dpo/margin": 53.46350860595703,
"fcm_dpo/q_t": 0.4236186742782593,
"grad_norm": 14.774746894836426,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": 0.7884582281112671,
"logits/rejected": 0.6976134181022644,
"logps/chosen": -180.1649932861328,
"logps/ref_chosen": -54.20761489868164,
"logps/ref_rejected": -84.93669128417969,
"logps/rejected": -264.35760498046875,
"loss": 1.1744,
"margin_dpo/margin_mean": 53.46350860595703,
"margin_dpo/margin_std": 102.96124267578125,
"step": 625
},
{
"epoch": 0.9463340891912321,
"fcm_dpo/beta": 0.006372136529535055,
"fcm_dpo/delta": -0.05755068361759186,
"fcm_dpo/margin": 71.36672973632812,
"fcm_dpo/q_t": 0.39941319823265076,
"grad_norm": 13.7017183303833,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": 0.8437789082527161,
"logits/rejected": 0.7019960880279541,
"logps/chosen": -164.62301635742188,
"logps/ref_chosen": -45.06201934814453,
"logps/ref_rejected": -89.66368103027344,
"logps/rejected": -280.5914306640625,
"loss": 1.0918,
"margin_dpo/margin_mean": 71.36672973632812,
"margin_dpo/margin_std": 108.4215316772461,
"step": 626
},
{
"epoch": 0.9478458049886621,
"fcm_dpo/beta": 0.0062685152515769005,
"fcm_dpo/delta": -0.08397074788808823,
"fcm_dpo/margin": 76.50228881835938,
"fcm_dpo/q_t": 0.38953036069869995,
"grad_norm": 13.963146209716797,
"learning_rate": 4.271028567242818e-09,
"logits/chosen": 0.6753963232040405,
"logits/rejected": 0.5504001975059509,
"logps/chosen": -194.23684692382812,
"logps/ref_chosen": -58.791053771972656,
"logps/ref_rejected": -94.90802001953125,
"logps/rejected": -306.8561096191406,
"loss": 1.0565,
"margin_dpo/margin_mean": 76.50228881835938,
"margin_dpo/margin_std": 100.38463592529297,
"step": 627
},
{
"epoch": 0.9493575207860923,
"fcm_dpo/beta": 0.0062351408414542675,
"fcm_dpo/delta": -0.08525798469781876,
"fcm_dpo/margin": 77.04563903808594,
"fcm_dpo/q_t": 0.3889637589454651,
"grad_norm": 17.903270721435547,
"learning_rate": 4.0311050177251895e-09,
"logits/chosen": 0.7402326464653015,
"logits/rejected": 0.7005974650382996,
"logps/chosen": -173.79000854492188,
"logps/ref_chosen": -52.80357360839844,
"logps/ref_rejected": -76.49468994140625,
"logps/rejected": -274.5267639160156,
"loss": 1.0697,
"margin_dpo/margin_mean": 77.04563903808594,
"margin_dpo/margin_std": 100.9229736328125,
"step": 628
},
{
"epoch": 0.9508692365835223,
"fcm_dpo/beta": 0.006222175434231758,
"fcm_dpo/delta": 0.054710421711206436,
"fcm_dpo/margin": 55.74674606323242,
"fcm_dpo/q_t": 0.42095980048179626,
"grad_norm": 11.830018997192383,
"learning_rate": 3.798061746947995e-09,
"logits/chosen": 0.7364556789398193,
"logits/rejected": 0.7304829359054565,
"logps/chosen": -201.89068603515625,
"logps/ref_chosen": -70.71749877929688,
"logps/ref_rejected": -78.96273803710938,
"logps/rejected": -265.8826599121094,
"loss": 1.1372,
"margin_dpo/margin_mean": 55.74674987792969,
"margin_dpo/margin_std": 85.56368255615234,
"step": 629
},
{
"epoch": 0.9523809523809523,
"fcm_dpo/beta": 0.006174253765493631,
"fcm_dpo/delta": -0.06831058114767075,
"fcm_dpo/margin": 75.34062194824219,
"fcm_dpo/q_t": 0.39373886585235596,
"grad_norm": 10.842805862426758,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 0.6691682934761047,
"logits/rejected": 0.6251407861709595,
"logps/chosen": -180.2628173828125,
"logps/ref_chosen": -56.201412200927734,
"logps/ref_rejected": -74.69807434082031,
"logps/rejected": -274.10009765625,
"loss": 1.0568,
"margin_dpo/margin_mean": 75.34062194824219,
"margin_dpo/margin_std": 95.94551086425781,
"step": 630
},
{
"epoch": 0.9538926681783825,
"fcm_dpo/beta": 0.005993704777210951,
"fcm_dpo/delta": -0.10610571503639221,
"fcm_dpo/margin": 83.22488403320312,
"fcm_dpo/q_t": 0.3868658244609833,
"grad_norm": 13.053339958190918,
"learning_rate": 3.352641923861144e-09,
"logits/chosen": 0.8554993867874146,
"logits/rejected": 0.735321044921875,
"logps/chosen": -176.91673278808594,
"logps/ref_chosen": -58.82059860229492,
"logps/ref_rejected": -96.51437377929688,
"logps/rejected": -297.8354187011719,
"loss": 1.0444,
"margin_dpo/margin_mean": 83.22488403320312,
"margin_dpo/margin_std": 104.36235046386719,
"step": 631
},
{
"epoch": 0.9554043839758125,
"fcm_dpo/beta": 0.005943012423813343,
"fcm_dpo/delta": -0.08964134752750397,
"fcm_dpo/margin": 81.67279052734375,
"fcm_dpo/q_t": 0.3873947858810425,
"grad_norm": 12.306257247924805,
"learning_rate": 3.140277830901428e-09,
"logits/chosen": 0.7849439382553101,
"logits/rejected": 0.7638136744499207,
"logps/chosen": -179.699951171875,
"logps/ref_chosen": -58.786048889160156,
"logps/ref_rejected": -67.21923828125,
"logps/rejected": -269.8059387207031,
"loss": 1.033,
"margin_dpo/margin_mean": 81.67279052734375,
"margin_dpo/margin_std": 94.06131744384766,
"step": 632
},
{
"epoch": 0.9569160997732427,
"fcm_dpo/beta": 0.005942155607044697,
"fcm_dpo/delta": 0.029256466776132584,
"fcm_dpo/margin": 62.54181671142578,
"fcm_dpo/q_t": 0.4152376651763916,
"grad_norm": 12.558691024780273,
"learning_rate": 2.9348189350335007e-09,
"logits/chosen": 0.729263424873352,
"logits/rejected": 0.6651930809020996,
"logps/chosen": -166.05075073242188,
"logps/ref_chosen": -52.13019561767578,
"logps/ref_rejected": -67.23016357421875,
"logps/rejected": -243.69253540039062,
"loss": 1.1228,
"margin_dpo/margin_mean": 62.54180908203125,
"margin_dpo/margin_std": 93.7554931640625,
"step": 633
},
{
"epoch": 0.9584278155706727,
"fcm_dpo/beta": 0.006169452797621489,
"fcm_dpo/delta": 0.2458600401878357,
"fcm_dpo/margin": 25.533851623535156,
"fcm_dpo/q_t": 0.4652412533760071,
"grad_norm": 15.92563533782959,
"learning_rate": 2.736270983384276e-09,
"logits/chosen": 0.8017531633377075,
"logits/rejected": 0.8143984079360962,
"logps/chosen": -203.40805053710938,
"logps/ref_chosen": -60.97979736328125,
"logps/ref_rejected": -58.50825119018555,
"logps/rejected": -226.47035217285156,
"loss": 1.3195,
"margin_dpo/margin_mean": 25.533855438232422,
"margin_dpo/margin_std": 96.38629150390625,
"step": 634
},
{
"epoch": 0.9599395313681028,
"fcm_dpo/beta": 0.006332189776003361,
"fcm_dpo/delta": 0.08927176892757416,
"fcm_dpo/margin": 49.46977233886719,
"fcm_dpo/q_t": 0.4298204183578491,
"grad_norm": 14.447734832763672,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": 0.6402159929275513,
"logits/rejected": 0.574030339717865,
"logps/chosen": -220.11471557617188,
"logps/ref_chosen": -65.9730224609375,
"logps/ref_rejected": -85.61317443847656,
"logps/rejected": -289.2246398925781,
"loss": 1.2003,
"margin_dpo/margin_mean": 49.46977233886719,
"margin_dpo/margin_std": 103.6912612915039,
"step": 635
},
{
"epoch": 0.9614512471655329,
"fcm_dpo/beta": 0.00630118977278471,
"fcm_dpo/delta": -0.030526097863912582,
"fcm_dpo/margin": 68.08009338378906,
"fcm_dpo/q_t": 0.4005919098854065,
"grad_norm": 11.525025367736816,
"learning_rate": 2.359929934524829e-09,
"logits/chosen": 0.7165452837944031,
"logits/rejected": 0.6174004077911377,
"logps/chosen": -170.86050415039062,
"logps/ref_chosen": -49.140167236328125,
"logps/ref_rejected": -81.26971435546875,
"logps/rejected": -271.07012939453125,
"loss": 1.0798,
"margin_dpo/margin_mean": 68.08008575439453,
"margin_dpo/margin_std": 90.331787109375,
"step": 636
},
{
"epoch": 0.9629629629629629,
"fcm_dpo/beta": 0.006400472484529018,
"fcm_dpo/delta": 0.07462954521179199,
"fcm_dpo/margin": 51.14664840698242,
"fcm_dpo/q_t": 0.42703452706336975,
"grad_norm": 15.60743236541748,
"learning_rate": 2.1821473643827137e-09,
"logits/chosen": 0.6757951974868774,
"logits/rejected": 0.6063965559005737,
"logps/chosen": -232.64633178710938,
"logps/ref_chosen": -73.69658660888672,
"logps/ref_rejected": -83.01487731933594,
"logps/rejected": -293.11126708984375,
"loss": 1.189,
"margin_dpo/margin_mean": 51.14665222167969,
"margin_dpo/margin_std": 103.88670349121094,
"step": 637
},
{
"epoch": 0.9644746787603931,
"fcm_dpo/beta": 0.006420266814529896,
"fcm_dpo/delta": 0.009079055860638618,
"fcm_dpo/margin": 60.937583923339844,
"fcm_dpo/q_t": 0.4115605056285858,
"grad_norm": 14.104433059692383,
"learning_rate": 2.0112967923011646e-09,
"logits/chosen": 0.7370562553405762,
"logits/rejected": 0.6876152753829956,
"logps/chosen": -203.04010009765625,
"logps/ref_chosen": -62.78158187866211,
"logps/ref_rejected": -85.40478515625,
"logps/rejected": -286.60089111328125,
"loss": 1.1168,
"margin_dpo/margin_mean": 60.937583923339844,
"margin_dpo/margin_std": 93.2264633178711,
"step": 638
},
{
"epoch": 0.9659863945578231,
"fcm_dpo/beta": 0.0063707176595926285,
"fcm_dpo/delta": -0.05552205443382263,
"fcm_dpo/margin": 71.11326599121094,
"fcm_dpo/q_t": 0.3982432782649994,
"grad_norm": 14.116220474243164,
"learning_rate": 1.847382997337943e-09,
"logits/chosen": 0.7373175024986267,
"logits/rejected": 0.6314177513122559,
"logps/chosen": -179.0370330810547,
"logps/ref_chosen": -53.76658630371094,
"logps/ref_rejected": -72.30009460449219,
"logps/rejected": -268.68377685546875,
"loss": 1.0759,
"margin_dpo/margin_mean": 71.11326599121094,
"margin_dpo/margin_std": 98.96980285644531,
"step": 639
},
{
"epoch": 0.9674981103552532,
"fcm_dpo/beta": 0.0063106524758040905,
"fcm_dpo/delta": -0.018060026690363884,
"fcm_dpo/margin": 66.09814453125,
"fcm_dpo/q_t": 0.4042467474937439,
"grad_norm": 13.222526550292969,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 0.8086303472518921,
"logits/rejected": 0.7425358295440674,
"logps/chosen": -181.33102416992188,
"logps/ref_chosen": -51.41777801513672,
"logps/ref_rejected": -77.27879333496094,
"logps/rejected": -273.2901611328125,
"loss": 1.0978,
"margin_dpo/margin_mean": 66.09814453125,
"margin_dpo/margin_std": 95.61439514160156,
"step": 640
},
{
"epoch": 0.9690098261526833,
"fcm_dpo/beta": 0.006334484554827213,
"fcm_dpo/delta": 0.02717267908155918,
"fcm_dpo/margin": 59.01282501220703,
"fcm_dpo/q_t": 0.41298502683639526,
"grad_norm": 13.800342559814453,
"learning_rate": 1.5403838846864692e-09,
"logits/chosen": 0.7327412366867065,
"logits/rejected": 0.711341381072998,
"logps/chosen": -208.6962127685547,
"logps/ref_chosen": -71.0546646118164,
"logps/ref_rejected": -82.2440185546875,
"logps/rejected": -278.89837646484375,
"loss": 1.1079,
"margin_dpo/margin_mean": 59.0128288269043,
"margin_dpo/margin_std": 80.039794921875,
"step": 641
},
{
"epoch": 0.9705215419501134,
"fcm_dpo/beta": 0.006432985886931419,
"fcm_dpo/delta": 0.12851060926914215,
"fcm_dpo/margin": 42.731483459472656,
"fcm_dpo/q_t": 0.43809741735458374,
"grad_norm": 16.78900146484375,
"learning_rate": 1.3973071544233218e-09,
"logits/chosen": 0.6944186687469482,
"logits/rejected": 0.712386965751648,
"logps/chosen": -216.907470703125,
"logps/ref_chosen": -68.92927551269531,
"logps/ref_rejected": -70.85682678222656,
"logps/rejected": -261.5665283203125,
"loss": 1.2293,
"margin_dpo/margin_mean": 42.731483459472656,
"margin_dpo/margin_std": 96.30165100097656,
"step": 642
},
{
"epoch": 0.9720332577475435,
"fcm_dpo/beta": 0.006475288886576891,
"fcm_dpo/delta": -0.036847636103630066,
"fcm_dpo/margin": 67.19929504394531,
"fcm_dpo/q_t": 0.4012775421142578,
"grad_norm": 20.203689575195312,
"learning_rate": 1.261184375888541e-09,
"logits/chosen": 0.68418288230896,
"logits/rejected": 0.5926668047904968,
"logps/chosen": -196.42494201660156,
"logps/ref_chosen": -65.30903625488281,
"logps/ref_rejected": -83.61613464355469,
"logps/rejected": -281.93133544921875,
"loss": 1.097,
"margin_dpo/margin_mean": 67.19929504394531,
"margin_dpo/margin_std": 99.99290466308594,
"step": 643
},
{
"epoch": 0.9735449735449735,
"fcm_dpo/beta": 0.0065782819874584675,
"fcm_dpo/delta": 0.06409407407045364,
"fcm_dpo/margin": 51.20503616333008,
"fcm_dpo/q_t": 0.42324745655059814,
"grad_norm": 13.637568473815918,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": 0.8207970261573792,
"logits/rejected": 0.7883522510528564,
"logps/chosen": -173.14559936523438,
"logps/ref_chosen": -51.002601623535156,
"logps/ref_rejected": -64.46372985839844,
"logps/rejected": -237.811767578125,
"loss": 1.1863,
"margin_dpo/margin_mean": 51.20503616333008,
"margin_dpo/margin_std": 101.434326171875,
"step": 644
},
{
"epoch": 0.9750566893424036,
"fcm_dpo/beta": 0.0065458714962005615,
"fcm_dpo/delta": -0.0054698544554412365,
"fcm_dpo/margin": 61.90576934814453,
"fcm_dpo/q_t": 0.4064505100250244,
"grad_norm": 15.224577903747559,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": 0.7137904763221741,
"logits/rejected": 0.695641279220581,
"logps/chosen": -193.05609130859375,
"logps/ref_chosen": -60.963409423828125,
"logps/ref_rejected": -69.73353576660156,
"logps/rejected": -263.73199462890625,
"loss": 1.0942,
"margin_dpo/margin_mean": 61.90576934814453,
"margin_dpo/margin_std": 84.93324279785156,
"step": 645
},
{
"epoch": 0.9765684051398337,
"fcm_dpo/beta": 0.006599565502256155,
"fcm_dpo/delta": 0.04752783104777336,
"fcm_dpo/margin": 53.65968322753906,
"fcm_dpo/q_t": 0.42021092772483826,
"grad_norm": 13.733308792114258,
"learning_rate": 8.945768539031783e-10,
"logits/chosen": 0.7736262679100037,
"logits/rejected": 0.7145426869392395,
"logps/chosen": -207.87789916992188,
"logps/ref_chosen": -62.290069580078125,
"logps/ref_rejected": -85.54812622070312,
"logps/rejected": -284.795654296875,
"loss": 1.1632,
"margin_dpo/margin_mean": 53.659690856933594,
"margin_dpo/margin_std": 99.115478515625,
"step": 646
},
{
"epoch": 0.9780801209372638,
"fcm_dpo/beta": 0.006466761231422424,
"fcm_dpo/delta": -0.16596609354019165,
"fcm_dpo/margin": 86.12970733642578,
"fcm_dpo/q_t": 0.37213167548179626,
"grad_norm": 14.168996810913086,
"learning_rate": 7.863060120144316e-10,
"logits/chosen": 0.7759414315223694,
"logits/rejected": 0.6749926805496216,
"logps/chosen": -209.0861358642578,
"logps/ref_chosen": -67.515869140625,
"logps/ref_rejected": -101.50871276855469,
"logps/rejected": -329.20867919921875,
"loss": 0.9844,
"margin_dpo/margin_mean": 86.12970733642578,
"margin_dpo/margin_std": 90.1838150024414,
"step": 647
},
{
"epoch": 0.9795918367346939,
"fcm_dpo/beta": 0.0063859750516712666,
"fcm_dpo/delta": 0.01350357010960579,
"fcm_dpo/margin": 60.570072174072266,
"fcm_dpo/q_t": 0.4120126962661743,
"grad_norm": 14.232726097106934,
"learning_rate": 6.850062128694045e-10,
"logits/chosen": 0.686457097530365,
"logits/rejected": 0.6205060482025146,
"logps/chosen": -205.75772094726562,
"logps/ref_chosen": -64.59593963623047,
"logps/ref_rejected": -83.384033203125,
"logps/rejected": -285.11590576171875,
"loss": 1.1388,
"margin_dpo/margin_mean": 60.570072174072266,
"margin_dpo/margin_std": 102.00686645507812,
"step": 648
},
{
"epoch": 0.981103552532124,
"fcm_dpo/beta": 0.006387336179614067,
"fcm_dpo/delta": 0.005479734390974045,
"fcm_dpo/margin": 61.72749328613281,
"fcm_dpo/q_t": 0.410408616065979,
"grad_norm": 18.840017318725586,
"learning_rate": 5.906802900412788e-10,
"logits/chosen": 0.7889381647109985,
"logits/rejected": 0.7258505821228027,
"logps/chosen": -181.8843994140625,
"logps/ref_chosen": -49.30964660644531,
"logps/ref_rejected": -73.73710632324219,
"logps/rejected": -268.03936767578125,
"loss": 1.1341,
"margin_dpo/margin_mean": 61.72749328613281,
"margin_dpo/margin_std": 102.57989501953125,
"step": 649
},
{
"epoch": 0.982615268329554,
"fcm_dpo/beta": 0.00640866719186306,
"fcm_dpo/delta": 0.0005240924656391144,
"fcm_dpo/margin": 62.324851989746094,
"fcm_dpo/q_t": 0.40963131189346313,
"grad_norm": 13.567985534667969,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 0.8344835042953491,
"logits/rejected": 0.7642044425010681,
"logps/chosen": -180.97103881835938,
"logps/ref_chosen": -55.06325912475586,
"logps/ref_rejected": -77.39610290527344,
"logps/rejected": -265.62872314453125,
"loss": 1.1328,
"margin_dpo/margin_mean": 62.32485580444336,
"margin_dpo/margin_std": 104.03192138671875,
"step": 650
},
{
"epoch": 0.9841269841269841,
"fcm_dpo/beta": 0.006482687778770924,
"fcm_dpo/delta": 0.05127622187137604,
"fcm_dpo/margin": 54.04645919799805,
"fcm_dpo/q_t": 0.421572208404541,
"grad_norm": 12.98111343383789,
"learning_rate": 4.2296043218295606e-10,
"logits/chosen": 0.8331591486930847,
"logits/rejected": 0.7519968152046204,
"logps/chosen": -180.65142822265625,
"logps/ref_chosen": -54.065162658691406,
"logps/ref_rejected": -77.79080200195312,
"logps/rejected": -258.42352294921875,
"loss": 1.15,
"margin_dpo/margin_mean": 54.04645919799805,
"margin_dpo/margin_std": 91.8290023803711,
"step": 651
},
{
"epoch": 0.9856386999244142,
"fcm_dpo/beta": 0.006511835381388664,
"fcm_dpo/delta": 0.05867896229028702,
"fcm_dpo/margin": 52.69062423706055,
"fcm_dpo/q_t": 0.42380571365356445,
"grad_norm": 15.500580787658691,
"learning_rate": 3.4957118863768176e-10,
"logits/chosen": 0.7394665479660034,
"logits/rejected": 0.6864569783210754,
"logps/chosen": -212.748779296875,
"logps/ref_chosen": -63.64030456542969,
"logps/ref_rejected": -78.86882019042969,
"logps/rejected": -280.66790771484375,
"loss": 1.1808,
"margin_dpo/margin_mean": 52.69062423706055,
"margin_dpo/margin_std": 102.77951049804688,
"step": 652
},
{
"epoch": 0.9871504157218443,
"fcm_dpo/beta": 0.0065590618178248405,
"fcm_dpo/delta": -0.02155652642250061,
"fcm_dpo/margin": 64.1231918334961,
"fcm_dpo/q_t": 0.4042917490005493,
"grad_norm": 14.692912101745605,
"learning_rate": 2.831652042480093e-10,
"logits/chosen": 0.7130542993545532,
"logits/rejected": 0.6688984632492065,
"logps/chosen": -195.92251586914062,
"logps/ref_chosen": -61.668373107910156,
"logps/ref_rejected": -73.83012390136719,
"logps/rejected": -272.20745849609375,
"loss": 1.1112,
"margin_dpo/margin_mean": 64.1231918334961,
"margin_dpo/margin_std": 100.28158569335938,
"step": 653
},
{
"epoch": 0.9886621315192744,
"fcm_dpo/beta": 0.006530907936394215,
"fcm_dpo/delta": 0.05786158889532089,
"fcm_dpo/margin": 52.50005340576172,
"fcm_dpo/q_t": 0.4228893518447876,
"grad_norm": 13.715435981750488,
"learning_rate": 2.2374433653205016e-10,
"logits/chosen": 0.7051883935928345,
"logits/rejected": 0.5993505120277405,
"logps/chosen": -196.5059051513672,
"logps/ref_chosen": -57.568267822265625,
"logps/ref_rejected": -87.74789428710938,
"logps/rejected": -279.1855773925781,
"loss": 1.1622,
"margin_dpo/margin_mean": 52.500057220458984,
"margin_dpo/margin_std": 88.70083618164062,
"step": 654
},
{
"epoch": 0.9901738473167044,
"fcm_dpo/beta": 0.006401837803423405,
"fcm_dpo/delta": -0.15566277503967285,
"fcm_dpo/margin": 85.00445556640625,
"fcm_dpo/q_t": 0.37384313344955444,
"grad_norm": 12.682045936584473,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": 0.7240867018699646,
"logits/rejected": 0.6303001642227173,
"logps/chosen": -161.2081756591797,
"logps/ref_chosen": -52.14714813232422,
"logps/ref_rejected": -80.85014343261719,
"logps/rejected": -274.9156494140625,
"loss": 0.9841,
"margin_dpo/margin_mean": 85.00445556640625,
"margin_dpo/margin_std": 81.77285766601562,
"step": 655
},
{
"epoch": 0.9916855631141346,
"fcm_dpo/beta": 0.006386594846844673,
"fcm_dpo/delta": -0.021982401609420776,
"fcm_dpo/margin": 65.92683410644531,
"fcm_dpo/q_t": 0.4033903479576111,
"grad_norm": 10.721435546875,
"learning_rate": 1.2586440420372934e-10,
"logits/chosen": 0.6641607284545898,
"logits/rejected": 0.613824188709259,
"logps/chosen": -213.263427734375,
"logps/ref_chosen": -73.25672912597656,
"logps/ref_rejected": -85.35127258300781,
"logps/rejected": -291.2847900390625,
"loss": 1.0946,
"margin_dpo/margin_mean": 65.92683410644531,
"margin_dpo/margin_std": 94.80165100097656,
"step": 656
},
{
"epoch": 0.9931972789115646,
"fcm_dpo/beta": 0.0063078515231609344,
"fcm_dpo/delta": -0.07679837942123413,
"fcm_dpo/margin": 75.00975036621094,
"fcm_dpo/q_t": 0.3922984004020691,
"grad_norm": 11.450084686279297,
"learning_rate": 8.740807750345913e-11,
"logits/chosen": 0.8300960063934326,
"logits/rejected": 0.7342487573623657,
"logps/chosen": -177.7998046875,
"logps/ref_chosen": -49.72339630126953,
"logps/ref_rejected": -75.1568603515625,
"logps/rejected": -278.2430419921875,
"loss": 1.0705,
"margin_dpo/margin_mean": 75.00975036621094,
"margin_dpo/margin_std": 104.88255310058594,
"step": 657
},
{
"epoch": 0.9947089947089947,
"fcm_dpo/beta": 0.006343472748994827,
"fcm_dpo/delta": 0.022385437041521072,
"fcm_dpo/margin": 59.52809143066406,
"fcm_dpo/q_t": 0.41501033306121826,
"grad_norm": 12.572582244873047,
"learning_rate": 5.594234322453539e-11,
"logits/chosen": 0.7570410966873169,
"logits/rejected": 0.7112739682197571,
"logps/chosen": -197.8555908203125,
"logps/ref_chosen": -63.04634094238281,
"logps/ref_rejected": -83.44963073730469,
"logps/rejected": -277.7869567871094,
"loss": 1.1611,
"margin_dpo/margin_mean": 59.52809143066406,
"margin_dpo/margin_std": 110.2429428100586,
"step": 658
},
{
"epoch": 0.9962207105064248,
"fcm_dpo/beta": 0.006361355073750019,
"fcm_dpo/delta": 0.09799276292324066,
"fcm_dpo/margin": 47.89827346801758,
"fcm_dpo/q_t": 0.43068748712539673,
"grad_norm": 17.234956741333008,
"learning_rate": 3.146808153123293e-11,
"logits/chosen": 0.8032434582710266,
"logits/rejected": 0.7370003461837769,
"logps/chosen": -194.7205810546875,
"logps/ref_chosen": -55.0802001953125,
"logps/ref_rejected": -71.91049194335938,
"logps/rejected": -259.44915771484375,
"loss": 1.2072,
"margin_dpo/margin_mean": 47.89827346801758,
"margin_dpo/margin_std": 100.34069061279297,
"step": 659
},
{
"epoch": 0.9977324263038548,
"fcm_dpo/beta": 0.0063782427459955215,
"fcm_dpo/delta": -0.0683525800704956,
"fcm_dpo/margin": 72.93805694580078,
"fcm_dpo/q_t": 0.3942536413669586,
"grad_norm": 13.592533111572266,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 0.8316740989685059,
"logits/rejected": 0.7547441720962524,
"logps/chosen": -186.29197692871094,
"logps/ref_chosen": -54.525917053222656,
"logps/ref_rejected": -81.23604583740234,
"logps/rejected": -285.940185546875,
"loss": 1.0561,
"margin_dpo/margin_mean": 72.93806457519531,
"margin_dpo/margin_std": 93.95339965820312,
"step": 660
},
{
"epoch": 0.999244142101285,
"fcm_dpo/beta": 0.0064483098685741425,
"fcm_dpo/delta": 0.08764594793319702,
"fcm_dpo/margin": 48.82136535644531,
"fcm_dpo/q_t": 0.42920881509780884,
"grad_norm": 14.784270286560059,
"learning_rate": 3.4965187065971735e-12,
"logits/chosen": 0.6959401965141296,
"logits/rejected": 0.6122831106185913,
"logps/chosen": -211.3416748046875,
"logps/ref_chosen": -60.37263870239258,
"logps/ref_rejected": -77.42874145507812,
"logps/rejected": -277.2191162109375,
"loss": 1.2096,
"margin_dpo/margin_mean": 48.82136535644531,
"margin_dpo/margin_std": 107.91928100585938,
"step": 661
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.1320684536863204,
"train_runtime": 1756.8176,
"train_samples_per_second": 24.098,
"train_steps_per_second": 0.376
}
],
"logging_steps": 1,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}