Files
llama-3-8b-base-new-dpo-har…/trainer_state.json
ModelHub XC 22d5030199 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-new-dpo-harmless-4xh200-s_star1.0
Source: Original Platform
2026-05-10 13:57:33 +08:00

2631 lines
96 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 200,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015117157974300832,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0036174654960632324,
"fcm_dpo/q_t": 0.5000090599060059,
"grad_norm": 2.850151538848877,
"learning_rate": 0.0,
"logits/chosen": 0.12559199333190918,
"logits/rejected": 0.11513248085975647,
"logps/chosen": -65.17359924316406,
"logps/ref_chosen": -65.2028579711914,
"logps/ref_rejected": -64.80973052978516,
"logps/rejected": -64.77685546875,
"loss": 1.3863,
"margin_dpo/margin_mean": -0.0036170482635498047,
"margin_dpo/margin_std": 0.2552323341369629,
"step": 1
},
{
"epoch": 0.007558578987150416,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.015368208289146423,
"fcm_dpo/q_t": 0.4999615550041199,
"grad_norm": 2.9636518955230713,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": 0.08381284773349762,
"logits/rejected": 0.056591667234897614,
"logps/chosen": -65.78416442871094,
"logps/ref_chosen": -65.79120635986328,
"logps/ref_rejected": -79.74447631835938,
"logps/rejected": -79.75279998779297,
"loss": 1.3861,
"margin_dpo/margin_mean": 0.015368461608886719,
"margin_dpo/margin_std": 0.30196240544319153,
"step": 5
},
{
"epoch": 0.015117157974300832,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.017599213868379593,
"fcm_dpo/q_t": 0.500044047832489,
"grad_norm": 2.9695703983306885,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": 0.10358067601919174,
"logits/rejected": 0.06364428251981735,
"logps/chosen": -57.39263153076172,
"logps/ref_chosen": -57.38689041137695,
"logps/ref_rejected": -80.92173767089844,
"logps/rejected": -80.90988159179688,
"loss": 1.3865,
"margin_dpo/margin_mean": -0.01759929582476616,
"margin_dpo/margin_std": 0.3119713366031647,
"step": 10
},
{
"epoch": 0.022675736961451247,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.009314382448792458,
"fcm_dpo/q_t": 0.4999767243862152,
"grad_norm": 3.320962905883789,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": 0.07827206701040268,
"logits/rejected": 0.04801332950592041,
"logps/chosen": -61.75555419921875,
"logps/ref_chosen": -61.75988006591797,
"logps/ref_rejected": -80.3942642211914,
"logps/rejected": -80.39925384521484,
"loss": 1.3862,
"margin_dpo/margin_mean": 0.00931442342698574,
"margin_dpo/margin_std": 0.3290034830570221,
"step": 15
},
{
"epoch": 0.030234315948601664,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.007567483000457287,
"fcm_dpo/q_t": 0.4999810755252838,
"grad_norm": 2.972181797027588,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": 0.08886369317770004,
"logits/rejected": 0.06055384874343872,
"logps/chosen": -56.6132698059082,
"logps/ref_chosen": -56.6275749206543,
"logps/ref_rejected": -78.54231262207031,
"logps/rejected": -78.53557586669922,
"loss": 1.3862,
"margin_dpo/margin_mean": 0.007567489054054022,
"margin_dpo/margin_std": 0.30747541785240173,
"step": 20
},
{
"epoch": 0.03779289493575208,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.021364277228713036,
"fcm_dpo/q_t": 0.49994659423828125,
"grad_norm": 2.9778642654418945,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": 0.10495474189519882,
"logits/rejected": 0.07259530574083328,
"logps/chosen": -61.929527282714844,
"logps/ref_chosen": -61.922279357910156,
"logps/ref_rejected": -83.95155334472656,
"logps/rejected": -83.98015594482422,
"loss": 1.3861,
"margin_dpo/margin_mean": 0.02136421762406826,
"margin_dpo/margin_std": 0.2991010844707489,
"step": 25
},
{
"epoch": 0.045351473922902494,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.004342102911323309,
"fcm_dpo/q_t": 0.4999891221523285,
"grad_norm": 3.0755813121795654,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": 0.09887897223234177,
"logits/rejected": 0.07384434342384338,
"logps/chosen": -61.931427001953125,
"logps/ref_chosen": -61.90684127807617,
"logps/ref_rejected": -79.56486511230469,
"logps/rejected": -79.59378814697266,
"loss": 1.3863,
"margin_dpo/margin_mean": 0.004342180676758289,
"margin_dpo/margin_std": 0.31154924631118774,
"step": 30
},
{
"epoch": 0.05291005291005291,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.04645770788192749,
"fcm_dpo/q_t": 0.49988383054733276,
"grad_norm": 2.7926185131073,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": 0.056805629283189774,
"logits/rejected": 0.03099716268479824,
"logps/chosen": -64.05213165283203,
"logps/ref_chosen": -64.01432800292969,
"logps/ref_rejected": -81.33033752441406,
"logps/rejected": -81.41459655761719,
"loss": 1.3858,
"margin_dpo/margin_mean": 0.04645807296037674,
"margin_dpo/margin_std": 0.3312261402606964,
"step": 35
},
{
"epoch": 0.06046863189720333,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.012222861871123314,
"fcm_dpo/q_t": 0.4999694228172302,
"grad_norm": 3.1424221992492676,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": 0.10701529681682587,
"logits/rejected": 0.061525583267211914,
"logps/chosen": -60.28235626220703,
"logps/ref_chosen": -60.1998176574707,
"logps/ref_rejected": -85.63372039794922,
"logps/rejected": -85.72847747802734,
"loss": 1.3862,
"margin_dpo/margin_mean": 0.012222832068800926,
"margin_dpo/margin_std": 0.3903924524784088,
"step": 40
},
{
"epoch": 0.06802721088435375,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.049396924674510956,
"fcm_dpo/q_t": 0.4998764991760254,
"grad_norm": 3.186418056488037,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": 0.06446581333875656,
"logits/rejected": 0.03697461634874344,
"logps/chosen": -66.853515625,
"logps/ref_chosen": -66.71932220458984,
"logps/ref_rejected": -84.73368835449219,
"logps/rejected": -84.91728210449219,
"loss": 1.3858,
"margin_dpo/margin_mean": 0.049397267401218414,
"margin_dpo/margin_std": 0.4028749465942383,
"step": 45
},
{
"epoch": 0.07558578987150416,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.11318810284137726,
"fcm_dpo/q_t": 0.4997170567512512,
"grad_norm": 2.841273069381714,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": 0.07657527923583984,
"logits/rejected": 0.04275421425700188,
"logps/chosen": -56.79387283325195,
"logps/ref_chosen": -56.59545135498047,
"logps/ref_rejected": -71.17185974121094,
"logps/rejected": -71.48346710205078,
"loss": 1.3852,
"margin_dpo/margin_mean": 0.11318818479776382,
"margin_dpo/margin_std": 0.4424575865268707,
"step": 50
},
{
"epoch": 0.08314436885865457,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.19668380916118622,
"fcm_dpo/q_t": 0.4995082914829254,
"grad_norm": 3.2169511318206787,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": 0.06426317989826202,
"logits/rejected": 0.026701394468545914,
"logps/chosen": -58.7703971862793,
"logps/ref_chosen": -58.43064498901367,
"logps/ref_rejected": -81.11677551269531,
"logps/rejected": -81.6532211303711,
"loss": 1.3843,
"margin_dpo/margin_mean": 0.19668370485305786,
"margin_dpo/margin_std": 0.5449806451797485,
"step": 55
},
{
"epoch": 0.09070294784580499,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.19983884692192078,
"fcm_dpo/q_t": 0.49950042366981506,
"grad_norm": 2.920549154281616,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": 0.10816339403390884,
"logits/rejected": 0.07374849915504456,
"logps/chosen": -61.71905517578125,
"logps/ref_chosen": -61.1767463684082,
"logps/ref_rejected": -75.71009063720703,
"logps/rejected": -76.45222473144531,
"loss": 1.3843,
"margin_dpo/margin_mean": 0.19983868300914764,
"margin_dpo/margin_std": 0.7893710136413574,
"step": 60
},
{
"epoch": 0.0982615268329554,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.29686489701271057,
"fcm_dpo/q_t": 0.4992578625679016,
"grad_norm": 3.1904947757720947,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": 0.1440560221672058,
"logits/rejected": 0.11248280853033066,
"logps/chosen": -61.19800567626953,
"logps/ref_chosen": -60.42144012451172,
"logps/ref_rejected": -77.3677749633789,
"logps/rejected": -78.44120025634766,
"loss": 1.3834,
"margin_dpo/margin_mean": 0.29686498641967773,
"margin_dpo/margin_std": 1.026890754699707,
"step": 65
},
{
"epoch": 0.10582010582010581,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.3897508978843689,
"fcm_dpo/q_t": 0.4990256726741791,
"grad_norm": 2.9328501224517822,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 0.10854315757751465,
"logits/rejected": 0.07236559689044952,
"logps/chosen": -69.1954574584961,
"logps/ref_chosen": -68.04537200927734,
"logps/ref_rejected": -83.14714050292969,
"logps/rejected": -84.68696594238281,
"loss": 1.3824,
"margin_dpo/margin_mean": 0.3897508978843689,
"margin_dpo/margin_std": 1.3252379894256592,
"step": 70
},
{
"epoch": 0.11337868480725624,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5443285703659058,
"fcm_dpo/q_t": 0.4986393451690674,
"grad_norm": 2.8421285152435303,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": 0.11565772444009781,
"logits/rejected": 0.07986008375883102,
"logps/chosen": -58.929412841796875,
"logps/ref_chosen": -57.3649787902832,
"logps/ref_rejected": -73.14057159423828,
"logps/rejected": -75.24932861328125,
"loss": 1.3809,
"margin_dpo/margin_mean": 0.5443285703659058,
"margin_dpo/margin_std": 1.6223704814910889,
"step": 75
},
{
"epoch": 0.12093726379440665,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.8009698987007141,
"fcm_dpo/q_t": 0.4979979991912842,
"grad_norm": 2.954160690307617,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 0.15183034539222717,
"logits/rejected": 0.10617075115442276,
"logps/chosen": -60.83113479614258,
"logps/ref_chosen": -58.77534103393555,
"logps/ref_rejected": -79.07672119140625,
"logps/rejected": -81.93347930908203,
"loss": 1.3784,
"margin_dpo/margin_mean": 0.8009698987007141,
"margin_dpo/margin_std": 2.1611573696136475,
"step": 80
},
{
"epoch": 0.12849584278155707,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2792611122131348,
"fcm_dpo/q_t": 0.49680256843566895,
"grad_norm": 3.1464085578918457,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": 0.15984781086444855,
"logits/rejected": 0.11359409987926483,
"logps/chosen": -60.38011932373047,
"logps/ref_chosen": -57.70839309692383,
"logps/ref_rejected": -76.26394653320312,
"logps/rejected": -80.21492767333984,
"loss": 1.3737,
"margin_dpo/margin_mean": 1.2792608737945557,
"margin_dpo/margin_std": 2.5403237342834473,
"step": 85
},
{
"epoch": 0.1360544217687075,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.5232597589492798,
"fcm_dpo/q_t": 0.4961939752101898,
"grad_norm": 3.333669662475586,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 0.2166169136762619,
"logits/rejected": 0.1685468852519989,
"logps/chosen": -62.62725067138672,
"logps/ref_chosen": -58.71812057495117,
"logps/ref_rejected": -82.2930908203125,
"logps/rejected": -87.7254867553711,
"loss": 1.3715,
"margin_dpo/margin_mean": 1.5232598781585693,
"margin_dpo/margin_std": 3.794527053833008,
"step": 90
},
{
"epoch": 0.1436130007558579,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.0687878131866455,
"fcm_dpo/q_t": 0.49483543634414673,
"grad_norm": 3.5051140785217285,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": 0.22696343064308167,
"logits/rejected": 0.1834408938884735,
"logps/chosen": -59.80878829956055,
"logps/ref_chosen": -54.887908935546875,
"logps/ref_rejected": -76.79985046386719,
"logps/rejected": -83.78950500488281,
"loss": 1.3665,
"margin_dpo/margin_mean": 2.0687873363494873,
"margin_dpo/margin_std": 5.531675338745117,
"step": 95
},
{
"epoch": 0.15117157974300832,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.6540107727050781,
"fcm_dpo/q_t": 0.49587664008140564,
"grad_norm": 3.7864036560058594,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 0.2681678235530853,
"logits/rejected": 0.23597940802574158,
"logps/chosen": -72.77100372314453,
"logps/ref_chosen": -65.1898422241211,
"logps/ref_rejected": -83.39742279052734,
"logps/rejected": -92.63258361816406,
"loss": 1.3719,
"margin_dpo/margin_mean": 1.6540113687515259,
"margin_dpo/margin_std": 8.77057933807373,
"step": 100
},
{
"epoch": 0.15873015873015872,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.923435688018799,
"fcm_dpo/q_t": 0.49274763464927673,
"grad_norm": 4.0385565757751465,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": 0.3018716275691986,
"logits/rejected": 0.25165122747421265,
"logps/chosen": -72.08647155761719,
"logps/ref_chosen": -63.611778259277344,
"logps/ref_rejected": -81.8642578125,
"logps/rejected": -93.26237487792969,
"loss": 1.3604,
"margin_dpo/margin_mean": 2.9234354496002197,
"margin_dpo/margin_std": 10.657812118530273,
"step": 105
},
{
"epoch": 0.16628873771730915,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.3822619915008545,
"fcm_dpo/q_t": 0.4940711557865143,
"grad_norm": 4.880163669586182,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 0.27982911467552185,
"logits/rejected": 0.25695186853408813,
"logps/chosen": -82.95537567138672,
"logps/ref_chosen": -70.61798858642578,
"logps/ref_rejected": -80.55892181396484,
"logps/rejected": -95.27857971191406,
"loss": 1.3683,
"margin_dpo/margin_mean": 2.3822619915008545,
"margin_dpo/margin_std": 14.694234848022461,
"step": 110
},
{
"epoch": 0.17384731670445955,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.24505615234375,
"fcm_dpo/q_t": 0.4895564913749695,
"grad_norm": 3.4979965686798096,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": 0.3670777380466461,
"logits/rejected": 0.31523239612579346,
"logps/chosen": -73.29847717285156,
"logps/ref_chosen": -60.36003494262695,
"logps/ref_rejected": -83.49537658691406,
"logps/rejected": -100.67887878417969,
"loss": 1.3508,
"margin_dpo/margin_mean": 4.24505615234375,
"margin_dpo/margin_std": 15.931065559387207,
"step": 115
},
{
"epoch": 0.18140589569160998,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.727739334106445,
"fcm_dpo/q_t": 0.48592695593833923,
"grad_norm": 4.6423659324646,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 0.40479379892349243,
"logits/rejected": 0.3574323058128357,
"logps/chosen": -72.22782897949219,
"logps/ref_chosen": -57.185150146484375,
"logps/ref_rejected": -76.90118408203125,
"logps/rejected": -97.67161560058594,
"loss": 1.3385,
"margin_dpo/margin_mean": 5.727739334106445,
"margin_dpo/margin_std": 17.80091667175293,
"step": 120
},
{
"epoch": 0.1889644746787604,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.763603210449219,
"fcm_dpo/q_t": 0.47878074645996094,
"grad_norm": 4.912358283996582,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": 0.46570539474487305,
"logits/rejected": 0.412194162607193,
"logps/chosen": -80.9414291381836,
"logps/ref_chosen": -60.63164138793945,
"logps/ref_rejected": -87.692138671875,
"logps/rejected": -116.76551818847656,
"loss": 1.3224,
"margin_dpo/margin_mean": 8.763603210449219,
"margin_dpo/margin_std": 29.222675323486328,
"step": 125
},
{
"epoch": 0.1965230536659108,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.171716213226318,
"fcm_dpo/q_t": 0.48291224241256714,
"grad_norm": 5.604545593261719,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 0.42708373069763184,
"logits/rejected": 0.3993222713470459,
"logps/chosen": -91.39217376708984,
"logps/ref_chosen": -65.96144104003906,
"logps/ref_rejected": -77.07868194580078,
"logps/rejected": -109.6811294555664,
"loss": 1.3402,
"margin_dpo/margin_mean": 7.171716213226318,
"margin_dpo/margin_std": 29.880590438842773,
"step": 130
},
{
"epoch": 0.20408163265306123,
"fcm_dpo/beta": 0.010768004693090916,
"fcm_dpo/delta": 0.07399419695138931,
"fcm_dpo/margin": 10.805874824523926,
"fcm_dpo/q_t": 0.47332343459129333,
"grad_norm": 4.438642978668213,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": 0.5009486079216003,
"logits/rejected": 0.4642602801322937,
"logps/chosen": -85.66596221923828,
"logps/ref_chosen": -58.002349853515625,
"logps/ref_rejected": -74.80711364746094,
"logps/rejected": -113.27659606933594,
"loss": 1.3098,
"margin_dpo/margin_mean": 10.80587387084961,
"margin_dpo/margin_std": 34.74369430541992,
"step": 135
},
{
"epoch": 0.21164021164021163,
"fcm_dpo/beta": 0.011533305048942566,
"fcm_dpo/delta": 0.06865964084863663,
"fcm_dpo/margin": 11.856359481811523,
"fcm_dpo/q_t": 0.4694371223449707,
"grad_norm": 7.658777713775635,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 0.4951881468296051,
"logits/rejected": 0.4550108015537262,
"logps/chosen": -100.08610534667969,
"logps/ref_chosen": -64.05648803710938,
"logps/ref_rejected": -80.10523986816406,
"logps/rejected": -127.9912109375,
"loss": 1.3239,
"margin_dpo/margin_mean": 11.856356620788574,
"margin_dpo/margin_std": 46.47296142578125,
"step": 140
},
{
"epoch": 0.21919879062736206,
"fcm_dpo/beta": 0.01241993997246027,
"fcm_dpo/delta": 0.07406426966190338,
"fcm_dpo/margin": 8.729610443115234,
"fcm_dpo/q_t": 0.47626978158950806,
"grad_norm": 16.21937370300293,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": 0.514064610004425,
"logits/rejected": 0.49313417077064514,
"logps/chosen": -103.54437255859375,
"logps/ref_chosen": -65.76856994628906,
"logps/ref_rejected": -81.22962951660156,
"logps/rejected": -127.73504638671875,
"loss": 1.3539,
"margin_dpo/margin_mean": 8.729610443115234,
"margin_dpo/margin_std": 44.03262710571289,
"step": 145
},
{
"epoch": 0.22675736961451248,
"fcm_dpo/beta": 0.012905704788863659,
"fcm_dpo/delta": 0.07531466335058212,
"fcm_dpo/margin": 10.41810417175293,
"fcm_dpo/q_t": 0.4676801264286041,
"grad_norm": 5.872846603393555,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 0.5009235143661499,
"logits/rejected": 0.4602130353450775,
"logps/chosen": -94.07633972167969,
"logps/ref_chosen": -60.346473693847656,
"logps/ref_rejected": -75.12642669677734,
"logps/rejected": -119.2743911743164,
"loss": 1.3045,
"margin_dpo/margin_mean": 10.418103218078613,
"margin_dpo/margin_std": 34.75267791748047,
"step": 150
},
{
"epoch": 0.23431594860166288,
"fcm_dpo/beta": 0.017845138907432556,
"fcm_dpo/delta": 0.4511590898036957,
"fcm_dpo/margin": 12.379961013793945,
"fcm_dpo/q_t": 0.45373255014419556,
"grad_norm": 13.452332496643066,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": 0.5150389671325684,
"logits/rejected": 0.482626348733902,
"logps/chosen": -93.2920913696289,
"logps/ref_chosen": -61.901710510253906,
"logps/ref_rejected": -75.51579284667969,
"logps/rejected": -119.28614807128906,
"loss": 1.2681,
"margin_dpo/margin_mean": 12.379961013793945,
"margin_dpo/margin_std": 33.46470642089844,
"step": 155
},
{
"epoch": 0.2418745275888133,
"fcm_dpo/beta": 0.028629502281546593,
"fcm_dpo/delta": 0.5467379093170166,
"fcm_dpo/margin": 13.615964889526367,
"fcm_dpo/q_t": 0.4253949522972107,
"grad_norm": 14.785847663879395,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": 0.48933330178260803,
"logits/rejected": 0.4520339369773865,
"logps/chosen": -91.06707763671875,
"logps/ref_chosen": -59.82744598388672,
"logps/ref_rejected": -76.28009033203125,
"logps/rejected": -121.13565826416016,
"loss": 1.1965,
"margin_dpo/margin_mean": 13.615964889526367,
"margin_dpo/margin_std": 29.862279891967773,
"step": 160
},
{
"epoch": 0.2494331065759637,
"fcm_dpo/beta": 0.04379943758249283,
"fcm_dpo/delta": 0.27622583508491516,
"fcm_dpo/margin": 16.814184188842773,
"fcm_dpo/q_t": 0.36278295516967773,
"grad_norm": 19.85223960876465,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": 0.5087782144546509,
"logits/rejected": 0.4584970474243164,
"logps/chosen": -86.49288940429688,
"logps/ref_chosen": -56.396278381347656,
"logps/ref_rejected": -77.31051635742188,
"logps/rejected": -124.2213134765625,
"loss": 1.0917,
"margin_dpo/margin_mean": 16.814186096191406,
"margin_dpo/margin_std": 28.670608520507812,
"step": 165
},
{
"epoch": 0.25699168556311414,
"fcm_dpo/beta": 0.053840864449739456,
"fcm_dpo/delta": 0.203588604927063,
"fcm_dpo/margin": 15.119009017944336,
"fcm_dpo/q_t": 0.36196133494377136,
"grad_norm": 30.58293342590332,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 0.46817341446876526,
"logits/rejected": 0.4275393486022949,
"logps/chosen": -88.77765655517578,
"logps/ref_chosen": -62.323707580566406,
"logps/ref_rejected": -78.42765808105469,
"logps/rejected": -120.00062561035156,
"loss": 1.1343,
"margin_dpo/margin_mean": 15.119009017944336,
"margin_dpo/margin_std": 27.05625343322754,
"step": 170
},
{
"epoch": 0.26455026455026454,
"fcm_dpo/beta": 0.05749092251062393,
"fcm_dpo/delta": -0.05166977643966675,
"fcm_dpo/margin": 18.14907455444336,
"fcm_dpo/q_t": 0.33645352721214294,
"grad_norm": 27.735427856445312,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": 0.49779874086380005,
"logits/rejected": 0.4384356141090393,
"logps/chosen": -84.31999206542969,
"logps/ref_chosen": -60.14301681518555,
"logps/ref_rejected": -82.65170288085938,
"logps/rejected": -124.97774505615234,
"loss": 1.0789,
"margin_dpo/margin_mean": 18.14907455444336,
"margin_dpo/margin_std": 29.03778648376465,
"step": 175
},
{
"epoch": 0.272108843537415,
"fcm_dpo/beta": 0.05699415132403374,
"fcm_dpo/delta": 0.09521742165088654,
"fcm_dpo/margin": 16.0145206451416,
"fcm_dpo/q_t": 0.34686630964279175,
"grad_norm": 28.401058197021484,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": 0.4524223208427429,
"logits/rejected": 0.4112408757209778,
"logps/chosen": -86.755859375,
"logps/ref_chosen": -65.02766418457031,
"logps/ref_rejected": -80.62745666503906,
"logps/rejected": -118.37019348144531,
"loss": 1.0718,
"margin_dpo/margin_mean": 16.0145206451416,
"margin_dpo/margin_std": 25.418670654296875,
"step": 180
},
{
"epoch": 0.2796674225245654,
"fcm_dpo/beta": 0.06562753766775131,
"fcm_dpo/delta": 0.15315786004066467,
"fcm_dpo/margin": 13.123418807983398,
"fcm_dpo/q_t": 0.3597589433193207,
"grad_norm": 31.76349449157715,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": 0.4652015268802643,
"logits/rejected": 0.4133850932121277,
"logps/chosen": -74.6918716430664,
"logps/ref_chosen": -57.59275436401367,
"logps/ref_rejected": -77.97161865234375,
"logps/rejected": -108.19415283203125,
"loss": 1.1242,
"margin_dpo/margin_mean": 13.123418807983398,
"margin_dpo/margin_std": 22.702762603759766,
"step": 185
},
{
"epoch": 0.2872260015117158,
"fcm_dpo/beta": 0.07316488027572632,
"fcm_dpo/delta": 0.03896424174308777,
"fcm_dpo/margin": 13.159120559692383,
"fcm_dpo/q_t": 0.3480309545993805,
"grad_norm": 35.800045013427734,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 0.4268653392791748,
"logits/rejected": 0.3715541362762451,
"logps/chosen": -83.3046875,
"logps/ref_chosen": -69.40254974365234,
"logps/ref_rejected": -87.62089538574219,
"logps/rejected": -114.6821517944336,
"loss": 1.0661,
"margin_dpo/margin_mean": 13.159120559692383,
"margin_dpo/margin_std": 21.489017486572266,
"step": 190
},
{
"epoch": 0.2947845804988662,
"fcm_dpo/beta": 0.08043137937784195,
"fcm_dpo/delta": 0.10039126873016357,
"fcm_dpo/margin": 11.223628997802734,
"fcm_dpo/q_t": 0.35878369212150574,
"grad_norm": 38.323482513427734,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": 0.4170468747615814,
"logits/rejected": 0.39506852626800537,
"logps/chosen": -78.15182495117188,
"logps/ref_chosen": -63.38518524169922,
"logps/ref_rejected": -72.65580749511719,
"logps/rejected": -98.64608001708984,
"loss": 1.1468,
"margin_dpo/margin_mean": 11.223628044128418,
"margin_dpo/margin_std": 20.275413513183594,
"step": 195
},
{
"epoch": 0.30234315948601664,
"fcm_dpo/beta": 0.08364946395158768,
"fcm_dpo/delta": -0.03751251846551895,
"fcm_dpo/margin": 12.287755966186523,
"fcm_dpo/q_t": 0.33327925205230713,
"grad_norm": 29.11920738220215,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.44890865683555603,
"logits/rejected": 0.39030200242996216,
"logps/chosen": -70.07976531982422,
"logps/ref_chosen": -57.999359130859375,
"logps/ref_rejected": -79.5167007446289,
"logps/rejected": -103.8848648071289,
"loss": 1.0502,
"margin_dpo/margin_mean": 12.28775691986084,
"margin_dpo/margin_std": 18.79964828491211,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_fcm_dpo/beta": 0.09359671175479889,
"eval_fcm_dpo/delta": 0.018532773479819298,
"eval_fcm_dpo/margin": 10.367452621459961,
"eval_fcm_dpo/q_t": 0.35254982113838196,
"eval_logits/chosen": 0.4369470477104187,
"eval_logits/rejected": 0.38747942447662354,
"eval_logps/chosen": -87.90721893310547,
"eval_logps/ref_chosen": -75.86933135986328,
"eval_logps/ref_rejected": -80.85771942138672,
"eval_logps/rejected": -103.26305389404297,
"eval_loss": 0.571725070476532,
"eval_margin_dpo/margin_mean": 10.367453575134277,
"eval_margin_dpo/margin_std": 18.42043113708496,
"eval_runtime": 38.6614,
"eval_samples_per_second": 59.568,
"eval_steps_per_second": 1.862,
"step": 200
},
{
"epoch": 0.30990173847316704,
"fcm_dpo/beta": 0.09407475590705872,
"fcm_dpo/delta": 0.060309164226055145,
"fcm_dpo/margin": 9.996593475341797,
"fcm_dpo/q_t": 0.35484787821769714,
"grad_norm": 45.5085563659668,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": 0.4219132363796234,
"logits/rejected": 0.35513609647750854,
"logps/chosen": -71.04405975341797,
"logps/ref_chosen": -58.64111328125,
"logps/ref_rejected": -84.33369445800781,
"logps/rejected": -106.73323822021484,
"loss": 1.1443,
"margin_dpo/margin_mean": 9.99659252166748,
"margin_dpo/margin_std": 18.014066696166992,
"step": 205
},
{
"epoch": 0.31746031746031744,
"fcm_dpo/beta": 0.09743638336658478,
"fcm_dpo/delta": -0.04126477986574173,
"fcm_dpo/margin": 10.603793144226074,
"fcm_dpo/q_t": 0.34100794792175293,
"grad_norm": 29.75200080871582,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 0.39647018909454346,
"logits/rejected": 0.3432803452014923,
"logps/chosen": -78.04428100585938,
"logps/ref_chosen": -65.22540283203125,
"logps/ref_rejected": -84.33940887451172,
"logps/rejected": -107.76206970214844,
"loss": 1.0715,
"margin_dpo/margin_mean": 10.60379409790039,
"margin_dpo/margin_std": 16.901790618896484,
"step": 210
},
{
"epoch": 0.3250188964474679,
"fcm_dpo/beta": 0.09210662543773651,
"fcm_dpo/delta": -0.050839781761169434,
"fcm_dpo/margin": 11.34312629699707,
"fcm_dpo/q_t": 0.3336792588233948,
"grad_norm": 30.288963317871094,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": 0.37580037117004395,
"logits/rejected": 0.31153884530067444,
"logps/chosen": -72.8878173828125,
"logps/ref_chosen": -61.34074020385742,
"logps/ref_rejected": -85.00725555419922,
"logps/rejected": -107.89747619628906,
"loss": 1.0263,
"margin_dpo/margin_mean": 11.343125343322754,
"margin_dpo/margin_std": 17.116981506347656,
"step": 215
},
{
"epoch": 0.3325774754346183,
"fcm_dpo/beta": 0.08995531499385834,
"fcm_dpo/delta": 0.10806653648614883,
"fcm_dpo/margin": 10.049107551574707,
"fcm_dpo/q_t": 0.34747129678726196,
"grad_norm": 29.705045700073242,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 0.3524443507194519,
"logits/rejected": 0.2914368212223053,
"logps/chosen": -74.65157318115234,
"logps/ref_chosen": -62.409584045410156,
"logps/ref_rejected": -81.9083023071289,
"logps/rejected": -104.19940185546875,
"loss": 1.0618,
"margin_dpo/margin_mean": 10.049107551574707,
"margin_dpo/margin_std": 15.988082885742188,
"step": 220
},
{
"epoch": 0.3401360544217687,
"fcm_dpo/beta": 0.0927683562040329,
"fcm_dpo/delta": -0.03574846684932709,
"fcm_dpo/margin": 11.09398078918457,
"fcm_dpo/q_t": 0.3298317790031433,
"grad_norm": 42.090301513671875,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": 0.41360267996788025,
"logits/rejected": 0.34270113706588745,
"logps/chosen": -75.1723403930664,
"logps/ref_chosen": -63.19435501098633,
"logps/ref_rejected": -94.3624038696289,
"logps/rejected": -117.43436431884766,
"loss": 0.9653,
"margin_dpo/margin_mean": 11.09398078918457,
"margin_dpo/margin_std": 15.557014465332031,
"step": 225
},
{
"epoch": 0.3476946334089191,
"fcm_dpo/beta": 0.0995025485754013,
"fcm_dpo/delta": 0.0801922157406807,
"fcm_dpo/margin": 9.299490928649902,
"fcm_dpo/q_t": 0.3476495146751404,
"grad_norm": 41.26875305175781,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 0.4226433336734772,
"logits/rejected": 0.37348127365112305,
"logps/chosen": -67.02788543701172,
"logps/ref_chosen": -55.014076232910156,
"logps/ref_rejected": -72.50662994384766,
"logps/rejected": -93.8199234008789,
"loss": 1.0886,
"margin_dpo/margin_mean": 9.299490928649902,
"margin_dpo/margin_std": 15.479217529296875,
"step": 230
},
{
"epoch": 0.35525321239606955,
"fcm_dpo/beta": 0.10012258589267731,
"fcm_dpo/delta": -0.0002490147890057415,
"fcm_dpo/margin": 9.948552131652832,
"fcm_dpo/q_t": 0.3335891366004944,
"grad_norm": 31.21560287475586,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": 0.35273051261901855,
"logits/rejected": 0.3253236711025238,
"logps/chosen": -77.28237915039062,
"logps/ref_chosen": -64.1020278930664,
"logps/ref_rejected": -73.81226348876953,
"logps/rejected": -96.94117736816406,
"loss": 1.0153,
"margin_dpo/margin_mean": 9.948553085327148,
"margin_dpo/margin_std": 14.477907180786133,
"step": 235
},
{
"epoch": 0.36281179138321995,
"fcm_dpo/beta": 0.0998903140425682,
"fcm_dpo/delta": -0.08263019472360611,
"fcm_dpo/margin": 10.745096206665039,
"fcm_dpo/q_t": 0.3202618360519409,
"grad_norm": 25.250896453857422,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 0.384564071893692,
"logits/rejected": 0.33145636320114136,
"logps/chosen": -81.09648132324219,
"logps/ref_chosen": -66.39305877685547,
"logps/ref_rejected": -88.76033020019531,
"logps/rejected": -114.2088394165039,
"loss": 0.9897,
"margin_dpo/margin_mean": 10.745096206665039,
"margin_dpo/margin_std": 14.994283676147461,
"step": 240
},
{
"epoch": 0.37037037037037035,
"fcm_dpo/beta": 0.09539251029491425,
"fcm_dpo/delta": -0.0672103613615036,
"fcm_dpo/margin": 11.129631996154785,
"fcm_dpo/q_t": 0.31643834710121155,
"grad_norm": 51.30845642089844,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": 0.3606599271297455,
"logits/rejected": 0.3225025534629822,
"logps/chosen": -81.35462951660156,
"logps/ref_chosen": -67.98930358886719,
"logps/ref_rejected": -77.23219299316406,
"logps/rejected": -101.7271499633789,
"loss": 0.9158,
"margin_dpo/margin_mean": 11.129631042480469,
"margin_dpo/margin_std": 13.993242263793945,
"step": 245
},
{
"epoch": 0.3779289493575208,
"fcm_dpo/beta": 0.0966949611902237,
"fcm_dpo/delta": 0.11792643368244171,
"fcm_dpo/margin": 9.158844947814941,
"fcm_dpo/q_t": 0.3497825860977173,
"grad_norm": 29.191015243530273,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 0.387145459651947,
"logits/rejected": 0.3767244815826416,
"logps/chosen": -89.218994140625,
"logps/ref_chosen": -72.36497497558594,
"logps/ref_rejected": -77.82171630859375,
"logps/rejected": -103.8345947265625,
"loss": 1.1125,
"margin_dpo/margin_mean": 9.158845901489258,
"margin_dpo/margin_std": 15.31347370147705,
"step": 250
},
{
"epoch": 0.3854875283446712,
"fcm_dpo/beta": 0.10761729627847672,
"fcm_dpo/delta": 0.08714894950389862,
"fcm_dpo/margin": 8.48575496673584,
"fcm_dpo/q_t": 0.35001808404922485,
"grad_norm": 32.06684112548828,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": 0.3626454174518585,
"logits/rejected": 0.3155224919319153,
"logps/chosen": -79.03126525878906,
"logps/ref_chosen": -63.40877151489258,
"logps/ref_rejected": -79.03904724121094,
"logps/rejected": -103.14729309082031,
"loss": 1.1116,
"margin_dpo/margin_mean": 8.48575496673584,
"margin_dpo/margin_std": 14.401884078979492,
"step": 255
},
{
"epoch": 0.3930461073318216,
"fcm_dpo/beta": 0.09919991344213486,
"fcm_dpo/delta": -0.1404997706413269,
"fcm_dpo/margin": 11.3255033493042,
"fcm_dpo/q_t": 0.31073272228240967,
"grad_norm": 30.121217727661133,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 0.3378009796142578,
"logits/rejected": 0.2627086341381073,
"logps/chosen": -79.64289855957031,
"logps/ref_chosen": -63.3157844543457,
"logps/ref_rejected": -93.57626342773438,
"logps/rejected": -121.2288818359375,
"loss": 0.9261,
"margin_dpo/margin_mean": 11.3255033493042,
"margin_dpo/margin_std": 14.669309616088867,
"step": 260
},
{
"epoch": 0.40060468631897206,
"fcm_dpo/beta": 0.10028767585754395,
"fcm_dpo/delta": 0.0695745199918747,
"fcm_dpo/margin": 9.310823440551758,
"fcm_dpo/q_t": 0.34190893173217773,
"grad_norm": 39.34469985961914,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": 0.3269875645637512,
"logits/rejected": 0.27384883165359497,
"logps/chosen": -82.93329620361328,
"logps/ref_chosen": -66.82787322998047,
"logps/ref_rejected": -79.1831283569336,
"logps/rejected": -104.599365234375,
"loss": 1.0288,
"margin_dpo/margin_mean": 9.310823440551758,
"margin_dpo/margin_std": 14.060315132141113,
"step": 265
},
{
"epoch": 0.40816326530612246,
"fcm_dpo/beta": 0.10525654256343842,
"fcm_dpo/delta": 0.06899507343769073,
"fcm_dpo/margin": 8.870966911315918,
"fcm_dpo/q_t": 0.33700481057167053,
"grad_norm": 34.62828063964844,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 0.35178321599960327,
"logits/rejected": 0.3009414076805115,
"logps/chosen": -78.264404296875,
"logps/ref_chosen": -63.54209518432617,
"logps/ref_rejected": -78.09616088867188,
"logps/rejected": -101.68943786621094,
"loss": 1.0151,
"margin_dpo/margin_mean": 8.870966911315918,
"margin_dpo/margin_std": 13.135915756225586,
"step": 270
},
{
"epoch": 0.41572184429327286,
"fcm_dpo/beta": 0.10544770956039429,
"fcm_dpo/delta": -0.05175945162773132,
"fcm_dpo/margin": 9.915693283081055,
"fcm_dpo/q_t": 0.3259262442588806,
"grad_norm": 30.281387329101562,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": 0.3330889642238617,
"logits/rejected": 0.28538888692855835,
"logps/chosen": -77.23836517333984,
"logps/ref_chosen": -63.090972900390625,
"logps/ref_rejected": -79.1383056640625,
"logps/rejected": -103.2013931274414,
"loss": 0.9663,
"margin_dpo/margin_mean": 9.915693283081055,
"margin_dpo/margin_std": 13.829241752624512,
"step": 275
},
{
"epoch": 0.42328042328042326,
"fcm_dpo/beta": 0.10395065695047379,
"fcm_dpo/delta": -0.05435022711753845,
"fcm_dpo/margin": 10.08836555480957,
"fcm_dpo/q_t": 0.31206685304641724,
"grad_norm": 23.842212677001953,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 0.31397441029548645,
"logits/rejected": 0.28041377663612366,
"logps/chosen": -75.13356018066406,
"logps/ref_chosen": -61.85026168823242,
"logps/ref_rejected": -73.87454986572266,
"logps/rejected": -97.2462158203125,
"loss": 0.9042,
"margin_dpo/margin_mean": 10.088364601135254,
"margin_dpo/margin_std": 12.210702896118164,
"step": 280
},
{
"epoch": 0.4308390022675737,
"fcm_dpo/beta": 0.09839525073766708,
"fcm_dpo/delta": 0.06181678920984268,
"fcm_dpo/margin": 9.582967758178711,
"fcm_dpo/q_t": 0.33559128642082214,
"grad_norm": 35.4563102722168,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": 0.33106130361557007,
"logits/rejected": 0.2812042832374573,
"logps/chosen": -78.70259094238281,
"logps/ref_chosen": -64.2256851196289,
"logps/ref_rejected": -80.54659271240234,
"logps/rejected": -104.6064682006836,
"loss": 0.9747,
"margin_dpo/margin_mean": 9.582967758178711,
"margin_dpo/margin_std": 13.323356628417969,
"step": 285
},
{
"epoch": 0.4383975812547241,
"fcm_dpo/beta": 0.0960320457816124,
"fcm_dpo/delta": -0.08064164221286774,
"fcm_dpo/margin": 11.12451171875,
"fcm_dpo/q_t": 0.31567567586898804,
"grad_norm": 36.847713470458984,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 0.3282826542854309,
"logits/rejected": 0.26554709672927856,
"logps/chosen": -74.4558334350586,
"logps/ref_chosen": -58.45670700073242,
"logps/ref_rejected": -80.57959747314453,
"logps/rejected": -107.7032241821289,
"loss": 0.9158,
"margin_dpo/margin_mean": 11.12451171875,
"margin_dpo/margin_std": 14.189311027526855,
"step": 290
},
{
"epoch": 0.4459561602418745,
"fcm_dpo/beta": 0.09265764057636261,
"fcm_dpo/delta": -0.019339444115757942,
"fcm_dpo/margin": 10.953125,
"fcm_dpo/q_t": 0.325559139251709,
"grad_norm": 31.604331970214844,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": 0.3938923478126526,
"logits/rejected": 0.32047176361083984,
"logps/chosen": -73.53590393066406,
"logps/ref_chosen": -56.701622009277344,
"logps/ref_rejected": -79.15914916992188,
"logps/rejected": -106.9465560913086,
"loss": 0.9464,
"margin_dpo/margin_mean": 10.953125,
"margin_dpo/margin_std": 14.6715726852417,
"step": 295
},
{
"epoch": 0.45351473922902497,
"fcm_dpo/beta": 0.09225670993328094,
"fcm_dpo/delta": 0.019766664132475853,
"fcm_dpo/margin": 10.636110305786133,
"fcm_dpo/q_t": 0.32959383726119995,
"grad_norm": 29.963603973388672,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 0.3454452157020569,
"logits/rejected": 0.28967177867889404,
"logps/chosen": -81.18212127685547,
"logps/ref_chosen": -62.49296951293945,
"logps/ref_rejected": -76.37828063964844,
"logps/rejected": -105.70356750488281,
"loss": 0.9604,
"margin_dpo/margin_mean": 10.636110305786133,
"margin_dpo/margin_std": 14.378878593444824,
"step": 300
},
{
"epoch": 0.46107331821617537,
"fcm_dpo/beta": 0.09693561494350433,
"fcm_dpo/delta": -0.0007272452348843217,
"fcm_dpo/margin": 10.295035362243652,
"fcm_dpo/q_t": 0.33259207010269165,
"grad_norm": 41.86263656616211,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": 0.32713833451271057,
"logits/rejected": 0.2873557209968567,
"logps/chosen": -83.19766235351562,
"logps/ref_chosen": -63.961265563964844,
"logps/ref_rejected": -79.19660949707031,
"logps/rejected": -108.72804260253906,
"loss": 1.012,
"margin_dpo/margin_mean": 10.295036315917969,
"margin_dpo/margin_std": 14.964961051940918,
"step": 305
},
{
"epoch": 0.46863189720332576,
"fcm_dpo/beta": 0.10219204425811768,
"fcm_dpo/delta": 0.1630358248949051,
"fcm_dpo/margin": 8.336259841918945,
"fcm_dpo/q_t": 0.3623664081096649,
"grad_norm": 43.633724212646484,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 0.3746958076953888,
"logits/rejected": 0.3229959309101105,
"logps/chosen": -84.5007553100586,
"logps/ref_chosen": -65.43470764160156,
"logps/ref_rejected": -76.08763885498047,
"logps/rejected": -103.48995208740234,
"loss": 1.1116,
"margin_dpo/margin_mean": 8.336259841918945,
"margin_dpo/margin_std": 14.339553833007812,
"step": 310
},
{
"epoch": 0.47619047619047616,
"fcm_dpo/beta": 0.10657189041376114,
"fcm_dpo/delta": -0.09334631264209747,
"fcm_dpo/margin": 10.140634536743164,
"fcm_dpo/q_t": 0.3195830285549164,
"grad_norm": 29.57124900817871,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": 0.3460700511932373,
"logits/rejected": 0.288535475730896,
"logps/chosen": -79.8516845703125,
"logps/ref_chosen": -62.9846305847168,
"logps/ref_rejected": -75.53777313232422,
"logps/rejected": -102.54544830322266,
"loss": 0.9646,
"margin_dpo/margin_mean": 10.14063549041748,
"margin_dpo/margin_std": 13.977231979370117,
"step": 315
},
{
"epoch": 0.4837490551776266,
"fcm_dpo/beta": 0.09681382775306702,
"fcm_dpo/delta": -0.04583514854311943,
"fcm_dpo/margin": 10.692605018615723,
"fcm_dpo/q_t": 0.3291170001029968,
"grad_norm": 32.62282943725586,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.3217319846153259,
"logits/rejected": 0.2573382556438446,
"logps/chosen": -72.19245147705078,
"logps/ref_chosen": -56.67329788208008,
"logps/ref_rejected": -81.22078704833984,
"logps/rejected": -107.43255615234375,
"loss": 1.0017,
"margin_dpo/margin_mean": 10.692605018615723,
"margin_dpo/margin_std": 15.321540832519531,
"step": 320
},
{
"epoch": 0.491307634164777,
"fcm_dpo/beta": 0.09586743265390396,
"fcm_dpo/delta": -0.005144490860402584,
"fcm_dpo/margin": 10.444814682006836,
"fcm_dpo/q_t": 0.32588261365890503,
"grad_norm": 29.304916381835938,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": 0.336866557598114,
"logits/rejected": 0.273507684469223,
"logps/chosen": -74.34947967529297,
"logps/ref_chosen": -58.42055130004883,
"logps/ref_rejected": -74.77824401855469,
"logps/rejected": -101.1519775390625,
"loss": 0.9637,
"margin_dpo/margin_mean": 10.444815635681152,
"margin_dpo/margin_std": 14.225895881652832,
"step": 325
},
{
"epoch": 0.4988662131519274,
"fcm_dpo/beta": 0.10534314811229706,
"fcm_dpo/delta": 0.07023780792951584,
"fcm_dpo/margin": 8.831705093383789,
"fcm_dpo/q_t": 0.34769195318222046,
"grad_norm": 31.103214263916016,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 0.2966001629829407,
"logits/rejected": 0.2543550431728363,
"logps/chosen": -83.2847900390625,
"logps/ref_chosen": -66.16510772705078,
"logps/ref_rejected": -79.58935546875,
"logps/rejected": -105.5407485961914,
"loss": 1.0797,
"margin_dpo/margin_mean": 8.831704139709473,
"margin_dpo/margin_std": 14.508381843566895,
"step": 330
},
{
"epoch": 0.5064247921390779,
"fcm_dpo/beta": 0.09514714032411575,
"fcm_dpo/delta": -0.08864019811153412,
"fcm_dpo/margin": 11.286481857299805,
"fcm_dpo/q_t": 0.3190566897392273,
"grad_norm": 30.666662216186523,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": 0.34321731328964233,
"logits/rejected": 0.29262733459472656,
"logps/chosen": -81.09008026123047,
"logps/ref_chosen": -64.61544799804688,
"logps/ref_rejected": -81.56526947021484,
"logps/rejected": -109.3263931274414,
"loss": 0.9389,
"margin_dpo/margin_mean": 11.286481857299805,
"margin_dpo/margin_std": 15.018880844116211,
"step": 335
},
{
"epoch": 0.5139833711262283,
"fcm_dpo/beta": 0.09981605410575867,
"fcm_dpo/delta": 0.05951204150915146,
"fcm_dpo/margin": 9.45673656463623,
"fcm_dpo/q_t": 0.34890830516815186,
"grad_norm": 35.18781661987305,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 0.3079679608345032,
"logits/rejected": 0.2665550112724304,
"logps/chosen": -81.59588623046875,
"logps/ref_chosen": -62.10752487182617,
"logps/ref_rejected": -77.66670227050781,
"logps/rejected": -106.6117935180664,
"loss": 1.0579,
"margin_dpo/margin_mean": 9.45673656463623,
"margin_dpo/margin_std": 15.178037643432617,
"step": 340
},
{
"epoch": 0.5215419501133787,
"fcm_dpo/beta": 0.09543491154909134,
"fcm_dpo/delta": -0.054286111146211624,
"fcm_dpo/margin": 10.175302505493164,
"fcm_dpo/q_t": 0.33326101303100586,
"grad_norm": 26.658878326416016,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": 0.340119332075119,
"logits/rejected": 0.2935033440589905,
"logps/chosen": -79.18875885009766,
"logps/ref_chosen": -61.37943649291992,
"logps/ref_rejected": -79.8868637084961,
"logps/rejected": -107.87149810791016,
"loss": 0.9934,
"margin_dpo/margin_mean": 10.175302505493164,
"margin_dpo/margin_std": 14.267759323120117,
"step": 345
},
{
"epoch": 0.5291005291005291,
"fcm_dpo/beta": 0.09232033044099808,
"fcm_dpo/delta": -0.09336394816637039,
"fcm_dpo/margin": 11.742635726928711,
"fcm_dpo/q_t": 0.321283221244812,
"grad_norm": 28.552518844604492,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 0.40391048789024353,
"logits/rejected": 0.3338189125061035,
"logps/chosen": -72.78422546386719,
"logps/ref_chosen": -56.05344772338867,
"logps/ref_rejected": -81.98738861083984,
"logps/rejected": -110.4608154296875,
"loss": 0.933,
"margin_dpo/margin_mean": 11.742635726928711,
"margin_dpo/margin_std": 15.581090927124023,
"step": 350
},
{
"epoch": 0.5366591080876795,
"fcm_dpo/beta": 0.08842920511960983,
"fcm_dpo/delta": 0.005702398717403412,
"fcm_dpo/margin": 11.23391056060791,
"fcm_dpo/q_t": 0.3230994641780853,
"grad_norm": 30.30412483215332,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": 0.3609849512577057,
"logits/rejected": 0.2793940007686615,
"logps/chosen": -72.1435317993164,
"logps/ref_chosen": -56.14973831176758,
"logps/ref_rejected": -78.04826354980469,
"logps/rejected": -105.2759780883789,
"loss": 0.963,
"margin_dpo/margin_mean": 11.23391056060791,
"margin_dpo/margin_std": 15.269304275512695,
"step": 355
},
{
"epoch": 0.54421768707483,
"fcm_dpo/beta": 0.09247281402349472,
"fcm_dpo/delta": 0.09105464816093445,
"fcm_dpo/margin": 9.872905731201172,
"fcm_dpo/q_t": 0.34462517499923706,
"grad_norm": 31.00304412841797,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 0.3180425763130188,
"logits/rejected": 0.27117234468460083,
"logps/chosen": -78.58186340332031,
"logps/ref_chosen": -61.611045837402344,
"logps/ref_rejected": -76.07168579101562,
"logps/rejected": -102.9154052734375,
"loss": 1.0624,
"margin_dpo/margin_mean": 9.872904777526855,
"margin_dpo/margin_std": 15.538159370422363,
"step": 360
},
{
"epoch": 0.5517762660619804,
"fcm_dpo/beta": 0.08917222917079926,
"fcm_dpo/delta": -0.11267988383769989,
"fcm_dpo/margin": 12.325703620910645,
"fcm_dpo/q_t": 0.3082793056964874,
"grad_norm": 23.29572296142578,
"learning_rate": 2.5e-07,
"logits/chosen": 0.35711944103240967,
"logits/rejected": 0.2748965919017792,
"logps/chosen": -76.3038558959961,
"logps/ref_chosen": -59.96733474731445,
"logps/ref_rejected": -85.49105834960938,
"logps/rejected": -114.15328216552734,
"loss": 0.8889,
"margin_dpo/margin_mean": 12.325704574584961,
"margin_dpo/margin_std": 15.190678596496582,
"step": 365
},
{
"epoch": 0.5593348450491308,
"fcm_dpo/beta": 0.09262686967849731,
"fcm_dpo/delta": 0.08950239419937134,
"fcm_dpo/margin": 9.877817153930664,
"fcm_dpo/q_t": 0.3416265845298767,
"grad_norm": 27.85451889038086,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 0.29736343026161194,
"logits/rejected": 0.24403652548789978,
"logps/chosen": -76.03514862060547,
"logps/ref_chosen": -60.001609802246094,
"logps/ref_rejected": -76.47229766845703,
"logps/rejected": -102.38365173339844,
"loss": 1.0434,
"margin_dpo/margin_mean": 9.877817153930664,
"margin_dpo/margin_std": 15.134869575500488,
"step": 370
},
{
"epoch": 0.5668934240362812,
"fcm_dpo/beta": 0.09520609676837921,
"fcm_dpo/delta": 0.03861137107014656,
"fcm_dpo/margin": 10.090354919433594,
"fcm_dpo/q_t": 0.3396856188774109,
"grad_norm": 35.437652587890625,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": 0.3540731370449066,
"logits/rejected": 0.29804927110671997,
"logps/chosen": -77.1456527709961,
"logps/ref_chosen": -59.98427200317383,
"logps/ref_rejected": -75.23977661132812,
"logps/rejected": -102.49151611328125,
"loss": 1.0658,
"margin_dpo/margin_mean": 10.090354919433594,
"margin_dpo/margin_std": 15.728398323059082,
"step": 375
},
{
"epoch": 0.5744520030234316,
"fcm_dpo/beta": 0.09189613163471222,
"fcm_dpo/delta": -0.08170835673809052,
"fcm_dpo/margin": 11.631962776184082,
"fcm_dpo/q_t": 0.319501131772995,
"grad_norm": 27.832942962646484,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 0.3677051365375519,
"logits/rejected": 0.31347864866256714,
"logps/chosen": -77.08321380615234,
"logps/ref_chosen": -60.21544647216797,
"logps/ref_rejected": -77.54380798339844,
"logps/rejected": -106.04354095458984,
"loss": 0.9542,
"margin_dpo/margin_mean": 11.631962776184082,
"margin_dpo/margin_std": 15.64165210723877,
"step": 380
},
{
"epoch": 0.582010582010582,
"fcm_dpo/beta": 0.09286109358072281,
"fcm_dpo/delta": 0.020266292616724968,
"fcm_dpo/margin": 9.64010238647461,
"fcm_dpo/q_t": 0.34380003809928894,
"grad_norm": 29.564525604248047,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": 0.31580013036727905,
"logits/rejected": 0.2819364070892334,
"logps/chosen": -85.32447814941406,
"logps/ref_chosen": -67.37496185302734,
"logps/ref_rejected": -77.77253723144531,
"logps/rejected": -105.36214447021484,
"loss": 1.0145,
"margin_dpo/margin_mean": 9.64010238647461,
"margin_dpo/margin_std": 14.208511352539062,
"step": 385
},
{
"epoch": 0.5895691609977324,
"fcm_dpo/beta": 0.09398090094327927,
"fcm_dpo/delta": 0.04170190542936325,
"fcm_dpo/margin": 10.224244117736816,
"fcm_dpo/q_t": 0.3312895596027374,
"grad_norm": 25.84682273864746,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 0.3518233597278595,
"logits/rejected": 0.29235878586769104,
"logps/chosen": -79.87808990478516,
"logps/ref_chosen": -62.08070755004883,
"logps/ref_rejected": -80.65849304199219,
"logps/rejected": -108.68013763427734,
"loss": 0.9694,
"margin_dpo/margin_mean": 10.224244117736816,
"margin_dpo/margin_std": 14.191637992858887,
"step": 390
},
{
"epoch": 0.5971277399848829,
"fcm_dpo/beta": 0.08911158889532089,
"fcm_dpo/delta": -0.06056561321020126,
"fcm_dpo/margin": 11.786225318908691,
"fcm_dpo/q_t": 0.31978386640548706,
"grad_norm": 27.2460994720459,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": 0.3753899037837982,
"logits/rejected": 0.3135729134082794,
"logps/chosen": -76.43754577636719,
"logps/ref_chosen": -59.841339111328125,
"logps/ref_rejected": -81.67756652832031,
"logps/rejected": -110.05999755859375,
"loss": 0.9292,
"margin_dpo/margin_mean": 11.786226272583008,
"margin_dpo/margin_std": 15.320358276367188,
"step": 395
},
{
"epoch": 0.6046863189720333,
"fcm_dpo/beta": 0.0909147709608078,
"fcm_dpo/delta": 0.033920951187610626,
"fcm_dpo/margin": 10.581128120422363,
"fcm_dpo/q_t": 0.3356344997882843,
"grad_norm": 40.5637321472168,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 0.32412275671958923,
"logits/rejected": 0.23972614109516144,
"logps/chosen": -80.27471923828125,
"logps/ref_chosen": -61.95880889892578,
"logps/ref_rejected": -89.60023498535156,
"logps/rejected": -118.49725341796875,
"loss": 1.0126,
"margin_dpo/margin_mean": 10.58112907409668,
"margin_dpo/margin_std": 15.470416069030762,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_fcm_dpo/beta": 0.10331619530916214,
"eval_fcm_dpo/delta": 0.009760010987520218,
"eval_fcm_dpo/margin": 9.474839210510254,
"eval_fcm_dpo/q_t": 0.3416881263256073,
"eval_logits/chosen": 0.35637208819389343,
"eval_logits/rejected": 0.3007829487323761,
"eval_logps/chosen": -92.93755340576172,
"eval_logps/ref_chosen": -75.86933135986328,
"eval_logps/ref_rejected": -80.85771942138672,
"eval_logps/rejected": -107.40077209472656,
"eval_loss": 0.5364252328872681,
"eval_margin_dpo/margin_mean": 9.474839210510254,
"eval_margin_dpo/margin_std": 15.286213874816895,
"eval_runtime": 38.7022,
"eval_samples_per_second": 59.506,
"eval_steps_per_second": 1.86,
"step": 400
},
{
"epoch": 0.6122448979591837,
"fcm_dpo/beta": 0.08817870914936066,
"fcm_dpo/delta": -0.1327141523361206,
"fcm_dpo/margin": 12.578492164611816,
"fcm_dpo/q_t": 0.3107960820198059,
"grad_norm": 23.5877742767334,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": 0.37417587637901306,
"logits/rejected": 0.2990309000015259,
"logps/chosen": -73.52447509765625,
"logps/ref_chosen": -57.03437423706055,
"logps/ref_rejected": -78.54074096679688,
"logps/rejected": -107.60932922363281,
"loss": 0.8956,
"margin_dpo/margin_mean": 12.5784912109375,
"margin_dpo/margin_std": 15.335866928100586,
"step": 405
},
{
"epoch": 0.6198034769463341,
"fcm_dpo/beta": 0.08702994883060455,
"fcm_dpo/delta": 0.04099176451563835,
"fcm_dpo/margin": 11.039661407470703,
"fcm_dpo/q_t": 0.3316665291786194,
"grad_norm": 29.719276428222656,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 0.3420962393283844,
"logits/rejected": 0.27093860507011414,
"logps/chosen": -82.31895446777344,
"logps/ref_chosen": -65.09486389160156,
"logps/ref_rejected": -82.60694885253906,
"logps/rejected": -110.87071228027344,
"loss": 0.9591,
"margin_dpo/margin_mean": 11.039661407470703,
"margin_dpo/margin_std": 14.888700485229492,
"step": 410
},
{
"epoch": 0.6273620559334845,
"fcm_dpo/beta": 0.08784516900777817,
"fcm_dpo/delta": -0.04525933414697647,
"fcm_dpo/margin": 11.834297180175781,
"fcm_dpo/q_t": 0.3292234539985657,
"grad_norm": 30.517234802246094,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": 0.3091197609901428,
"logits/rejected": 0.25511085987091064,
"logps/chosen": -78.46774291992188,
"logps/ref_chosen": -58.7742805480957,
"logps/ref_rejected": -72.8920669555664,
"logps/rejected": -104.4198226928711,
"loss": 0.9871,
"margin_dpo/margin_mean": 11.834297180175781,
"margin_dpo/margin_std": 17.207059860229492,
"step": 415
},
{
"epoch": 0.6349206349206349,
"fcm_dpo/beta": 0.09040302783250809,
"fcm_dpo/delta": 0.08599478006362915,
"fcm_dpo/margin": 10.107150077819824,
"fcm_dpo/q_t": 0.34641337394714355,
"grad_norm": 37.267417907714844,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 0.3381853699684143,
"logits/rejected": 0.29058149456977844,
"logps/chosen": -80.76959991455078,
"logps/ref_chosen": -59.88574981689453,
"logps/ref_rejected": -70.21773529052734,
"logps/rejected": -101.208740234375,
"loss": 1.1005,
"margin_dpo/margin_mean": 10.107150077819824,
"margin_dpo/margin_std": 16.638538360595703,
"step": 420
},
{
"epoch": 0.6424792139077853,
"fcm_dpo/beta": 0.09201686084270477,
"fcm_dpo/delta": -0.06255164742469788,
"fcm_dpo/margin": 11.48328685760498,
"fcm_dpo/q_t": 0.3294609785079956,
"grad_norm": 26.42628288269043,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": 0.3337697982788086,
"logits/rejected": 0.28397053480148315,
"logps/chosen": -78.5306167602539,
"logps/ref_chosen": -59.304222106933594,
"logps/ref_rejected": -75.0927963256836,
"logps/rejected": -105.80247497558594,
"loss": 1.0021,
"margin_dpo/margin_mean": 11.48328685760498,
"margin_dpo/margin_std": 16.985111236572266,
"step": 425
},
{
"epoch": 0.6500377928949358,
"fcm_dpo/beta": 0.09108453243970871,
"fcm_dpo/delta": -0.03131581097841263,
"fcm_dpo/margin": 11.279854774475098,
"fcm_dpo/q_t": 0.3330654799938202,
"grad_norm": 25.44510269165039,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 0.30293092131614685,
"logits/rejected": 0.2799461781978607,
"logps/chosen": -84.71260833740234,
"logps/ref_chosen": -63.816734313964844,
"logps/ref_rejected": -75.532470703125,
"logps/rejected": -107.70819091796875,
"loss": 1.0274,
"margin_dpo/margin_mean": 11.279854774475098,
"margin_dpo/margin_std": 17.151844024658203,
"step": 430
},
{
"epoch": 0.6575963718820862,
"fcm_dpo/beta": 0.08440228551626205,
"fcm_dpo/delta": -0.03266172856092453,
"fcm_dpo/margin": 12.173011779785156,
"fcm_dpo/q_t": 0.3264302611351013,
"grad_norm": 28.003742218017578,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": 0.40501174330711365,
"logits/rejected": 0.358463317155838,
"logps/chosen": -75.50459289550781,
"logps/ref_chosen": -56.96874237060547,
"logps/ref_rejected": -75.08180236816406,
"logps/rejected": -105.7906723022461,
"loss": 0.9586,
"margin_dpo/margin_mean": 12.173012733459473,
"margin_dpo/margin_std": 16.6791934967041,
"step": 435
},
{
"epoch": 0.6651549508692366,
"fcm_dpo/beta": 0.0831587016582489,
"fcm_dpo/delta": -0.03108084760606289,
"fcm_dpo/margin": 12.317110061645508,
"fcm_dpo/q_t": 0.32641124725341797,
"grad_norm": 28.61335563659668,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 0.33481085300445557,
"logits/rejected": 0.25145426392555237,
"logps/chosen": -75.74763488769531,
"logps/ref_chosen": -56.746910095214844,
"logps/ref_rejected": -77.73384857177734,
"logps/rejected": -109.05167388916016,
"loss": 0.9722,
"margin_dpo/margin_mean": 12.317110061645508,
"margin_dpo/margin_std": 16.91001319885254,
"step": 440
},
{
"epoch": 0.672713529856387,
"fcm_dpo/beta": 0.07718690484762192,
"fcm_dpo/delta": -0.042038463056087494,
"fcm_dpo/margin": 12.584096908569336,
"fcm_dpo/q_t": 0.3266654312610626,
"grad_norm": 29.00673484802246,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": 0.35013240575790405,
"logits/rejected": 0.2813698351383209,
"logps/chosen": -81.3695068359375,
"logps/ref_chosen": -61.107688903808594,
"logps/ref_rejected": -83.23820495605469,
"logps/rejected": -116.08412170410156,
"loss": 0.946,
"margin_dpo/margin_mean": 12.58409595489502,
"margin_dpo/margin_std": 16.468950271606445,
"step": 445
},
{
"epoch": 0.6802721088435374,
"fcm_dpo/beta": 0.07511289417743683,
"fcm_dpo/delta": 0.060914408415555954,
"fcm_dpo/margin": 12.590319633483887,
"fcm_dpo/q_t": 0.3290197253227234,
"grad_norm": 28.1600284576416,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 0.348872572183609,
"logits/rejected": 0.2725016176700592,
"logps/chosen": -77.0075454711914,
"logps/ref_chosen": -56.97221755981445,
"logps/ref_rejected": -80.6880874633789,
"logps/rejected": -113.31373596191406,
"loss": 0.9376,
"margin_dpo/margin_mean": 12.590319633483887,
"margin_dpo/margin_std": 15.93859577178955,
"step": 450
},
{
"epoch": 0.6878306878306878,
"fcm_dpo/beta": 0.0825229063630104,
"fcm_dpo/delta": -0.027547325938940048,
"fcm_dpo/margin": 12.420158386230469,
"fcm_dpo/q_t": 0.32720088958740234,
"grad_norm": 26.225448608398438,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": 0.2992292046546936,
"logits/rejected": 0.2455030232667923,
"logps/chosen": -82.07698822021484,
"logps/ref_chosen": -61.983673095703125,
"logps/ref_rejected": -74.9884033203125,
"logps/rejected": -107.50187683105469,
"loss": 0.9822,
"margin_dpo/margin_mean": 12.420158386230469,
"margin_dpo/margin_std": 17.609575271606445,
"step": 455
},
{
"epoch": 0.6953892668178382,
"fcm_dpo/beta": 0.07965027540922165,
"fcm_dpo/delta": -0.08106034994125366,
"fcm_dpo/margin": 13.459956169128418,
"fcm_dpo/q_t": 0.3144467771053314,
"grad_norm": 26.587221145629883,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 0.3679925501346588,
"logits/rejected": 0.280788391828537,
"logps/chosen": -78.30216979980469,
"logps/ref_chosen": -57.59019088745117,
"logps/ref_rejected": -84.5114517211914,
"logps/rejected": -118.68338775634766,
"loss": 0.9095,
"margin_dpo/margin_mean": 13.459956169128418,
"margin_dpo/margin_std": 16.770097732543945,
"step": 460
},
{
"epoch": 0.7029478458049887,
"fcm_dpo/beta": 0.07221703231334686,
"fcm_dpo/delta": 0.014492052607238293,
"fcm_dpo/margin": 13.642268180847168,
"fcm_dpo/q_t": 0.3231440782546997,
"grad_norm": 27.278940200805664,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": 0.37134069204330444,
"logits/rejected": 0.31564953923225403,
"logps/chosen": -80.76337432861328,
"logps/ref_chosen": -59.79584503173828,
"logps/ref_rejected": -75.25082397460938,
"logps/rejected": -109.86061096191406,
"loss": 0.9232,
"margin_dpo/margin_mean": 13.642268180847168,
"margin_dpo/margin_std": 17.10280418395996,
"step": 465
},
{
"epoch": 0.7105064247921391,
"fcm_dpo/beta": 0.07616017013788223,
"fcm_dpo/delta": -0.016742905601859093,
"fcm_dpo/margin": 13.275070190429688,
"fcm_dpo/q_t": 0.32321128249168396,
"grad_norm": 22.729766845703125,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 0.37919512391090393,
"logits/rejected": 0.32598358392715454,
"logps/chosen": -80.35087585449219,
"logps/ref_chosen": -59.0323486328125,
"logps/ref_rejected": -74.96698760986328,
"logps/rejected": -109.56058502197266,
"loss": 0.9218,
"margin_dpo/margin_mean": 13.275070190429688,
"margin_dpo/margin_std": 16.799335479736328,
"step": 470
},
{
"epoch": 0.7180650037792895,
"fcm_dpo/beta": 0.07094570249319077,
"fcm_dpo/delta": -0.080512635409832,
"fcm_dpo/margin": 15.083788871765137,
"fcm_dpo/q_t": 0.3183867931365967,
"grad_norm": 20.770301818847656,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": 0.3839421570301056,
"logits/rejected": 0.3089445233345032,
"logps/chosen": -77.02687072753906,
"logps/ref_chosen": -56.396690368652344,
"logps/ref_rejected": -81.70674133300781,
"logps/rejected": -117.42071533203125,
"loss": 0.9389,
"margin_dpo/margin_mean": 15.083788871765137,
"margin_dpo/margin_std": 19.728008270263672,
"step": 475
},
{
"epoch": 0.7256235827664399,
"fcm_dpo/beta": 0.07331489771604538,
"fcm_dpo/delta": 0.10081305354833603,
"fcm_dpo/margin": 12.341837882995605,
"fcm_dpo/q_t": 0.3461955189704895,
"grad_norm": 30.526918411254883,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 0.3099084198474884,
"logits/rejected": 0.2813830077648163,
"logps/chosen": -88.34848022460938,
"logps/ref_chosen": -64.63165283203125,
"logps/ref_rejected": -70.14222717285156,
"logps/rejected": -106.20088958740234,
"loss": 1.0757,
"margin_dpo/margin_mean": 12.341837882995605,
"margin_dpo/margin_std": 19.969202041625977,
"step": 480
},
{
"epoch": 0.7331821617535903,
"fcm_dpo/beta": 0.07241444289684296,
"fcm_dpo/delta": -0.03449578955769539,
"fcm_dpo/margin": 14.190716743469238,
"fcm_dpo/q_t": 0.3262421786785126,
"grad_norm": 23.741697311401367,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": 0.3603590130805969,
"logits/rejected": 0.29149702191352844,
"logps/chosen": -82.0468521118164,
"logps/ref_chosen": -59.954673767089844,
"logps/ref_rejected": -80.82916259765625,
"logps/rejected": -117.11204528808594,
"loss": 0.9667,
"margin_dpo/margin_mean": 14.190716743469238,
"margin_dpo/margin_std": 19.688003540039062,
"step": 485
},
{
"epoch": 0.7407407407407407,
"fcm_dpo/beta": 0.07517864555120468,
"fcm_dpo/delta": 0.08142177760601044,
"fcm_dpo/margin": 12.304274559020996,
"fcm_dpo/q_t": 0.34900832176208496,
"grad_norm": 34.26311492919922,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 0.3991266191005707,
"logits/rejected": 0.3289317190647125,
"logps/chosen": -86.1991195678711,
"logps/ref_chosen": -62.238365173339844,
"logps/ref_rejected": -81.98704528808594,
"logps/rejected": -118.2520751953125,
"loss": 1.0675,
"margin_dpo/margin_mean": 12.304274559020996,
"margin_dpo/margin_std": 19.93437385559082,
"step": 490
},
{
"epoch": 0.7482993197278912,
"fcm_dpo/beta": 0.07940609008073807,
"fcm_dpo/delta": 0.03455258160829544,
"fcm_dpo/margin": 12.161388397216797,
"fcm_dpo/q_t": 0.340284138917923,
"grad_norm": 31.470069885253906,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": 0.3151213526725769,
"logits/rejected": 0.24373404681682587,
"logps/chosen": -83.99868774414062,
"logps/ref_chosen": -60.60944747924805,
"logps/ref_rejected": -81.48342895507812,
"logps/rejected": -117.0340576171875,
"loss": 1.0687,
"margin_dpo/margin_mean": 12.161388397216797,
"margin_dpo/margin_std": 19.294418334960938,
"step": 495
},
{
"epoch": 0.7558578987150416,
"fcm_dpo/beta": 0.07518203556537628,
"fcm_dpo/delta": -0.09908589720726013,
"fcm_dpo/margin": 14.436444282531738,
"fcm_dpo/q_t": 0.3233835697174072,
"grad_norm": 30.398324966430664,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.36071914434432983,
"logits/rejected": 0.3034532070159912,
"logps/chosen": -90.01958465576172,
"logps/ref_chosen": -67.44170379638672,
"logps/ref_rejected": -85.10578155517578,
"logps/rejected": -122.12010192871094,
"loss": 0.9682,
"margin_dpo/margin_mean": 14.436445236206055,
"margin_dpo/margin_std": 20.496013641357422,
"step": 500
},
{
"epoch": 0.763416477702192,
"fcm_dpo/beta": 0.07424916326999664,
"fcm_dpo/delta": -0.0052908300422132015,
"fcm_dpo/margin": 13.501623153686523,
"fcm_dpo/q_t": 0.3237389028072357,
"grad_norm": 30.474390029907227,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": 0.35755619406700134,
"logits/rejected": 0.32606256008148193,
"logps/chosen": -84.3893814086914,
"logps/ref_chosen": -63.399513244628906,
"logps/ref_rejected": -75.75922393798828,
"logps/rejected": -110.250732421875,
"loss": 0.9268,
"margin_dpo/margin_mean": 13.501623153686523,
"margin_dpo/margin_std": 17.42774200439453,
"step": 505
},
{
"epoch": 0.7709750566893424,
"fcm_dpo/beta": 0.07383919507265091,
"fcm_dpo/delta": -0.002689933869987726,
"fcm_dpo/margin": 13.561166763305664,
"fcm_dpo/q_t": 0.3301324248313904,
"grad_norm": 25.812915802001953,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 0.35465937852859497,
"logits/rejected": 0.28478819131851196,
"logps/chosen": -87.05335998535156,
"logps/ref_chosen": -65.54673767089844,
"logps/ref_rejected": -88.05908203125,
"logps/rejected": -123.12687683105469,
"loss": 0.9774,
"margin_dpo/margin_mean": 13.56116771697998,
"margin_dpo/margin_std": 19.210861206054688,
"step": 510
},
{
"epoch": 0.7785336356764928,
"fcm_dpo/beta": 0.0702199786901474,
"fcm_dpo/delta": -0.04104772210121155,
"fcm_dpo/margin": 14.736480712890625,
"fcm_dpo/q_t": 0.3220558762550354,
"grad_norm": 25.192188262939453,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": 0.37450742721557617,
"logits/rejected": 0.3201286792755127,
"logps/chosen": -81.3960952758789,
"logps/ref_chosen": -58.967079162597656,
"logps/ref_rejected": -79.77230834960938,
"logps/rejected": -116.93778991699219,
"loss": 0.9213,
"margin_dpo/margin_mean": 14.736480712890625,
"margin_dpo/margin_std": 19.175683975219727,
"step": 515
},
{
"epoch": 0.7860922146636432,
"fcm_dpo/beta": 0.07708217203617096,
"fcm_dpo/delta": 0.17160889506340027,
"fcm_dpo/margin": 10.951704025268555,
"fcm_dpo/q_t": 0.35802939534187317,
"grad_norm": 30.653854370117188,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 0.39081019163131714,
"logits/rejected": 0.34083622694015503,
"logps/chosen": -85.04821014404297,
"logps/ref_chosen": -62.04914474487305,
"logps/ref_rejected": -73.25074768066406,
"logps/rejected": -107.20152282714844,
"loss": 1.0965,
"margin_dpo/margin_mean": 10.951704025268555,
"margin_dpo/margin_std": 18.67728042602539,
"step": 520
},
{
"epoch": 0.7936507936507936,
"fcm_dpo/beta": 0.08038587868213654,
"fcm_dpo/delta": 0.0009529069066047668,
"fcm_dpo/margin": 12.417234420776367,
"fcm_dpo/q_t": 0.32943472266197205,
"grad_norm": 32.0303955078125,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": 0.2926352620124817,
"logits/rejected": 0.23847930133342743,
"logps/chosen": -90.89793395996094,
"logps/ref_chosen": -68.93287658691406,
"logps/ref_rejected": -86.20756530761719,
"logps/rejected": -120.58984375,
"loss": 1.0199,
"margin_dpo/margin_mean": 12.417234420776367,
"margin_dpo/margin_std": 18.533092498779297,
"step": 525
},
{
"epoch": 0.8012093726379441,
"fcm_dpo/beta": 0.08636742830276489,
"fcm_dpo/delta": -0.015138429589569569,
"fcm_dpo/margin": 11.636919021606445,
"fcm_dpo/q_t": 0.33553168177604675,
"grad_norm": 28.005414962768555,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.33514514565467834,
"logits/rejected": 0.2650687098503113,
"logps/chosen": -82.46696472167969,
"logps/ref_chosen": -59.8493537902832,
"logps/ref_rejected": -80.61486053466797,
"logps/rejected": -114.86936950683594,
"loss": 1.0585,
"margin_dpo/margin_mean": 11.636918067932129,
"margin_dpo/margin_std": 17.36086082458496,
"step": 530
},
{
"epoch": 0.8087679516250945,
"fcm_dpo/beta": 0.08257714658975601,
"fcm_dpo/delta": -0.002869441406801343,
"fcm_dpo/margin": 12.110450744628906,
"fcm_dpo/q_t": 0.3353736996650696,
"grad_norm": 31.17659568786621,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": 0.3293718099594116,
"logits/rejected": 0.26540592312812805,
"logps/chosen": -80.54605865478516,
"logps/ref_chosen": -58.72953414916992,
"logps/ref_rejected": -78.62208557128906,
"logps/rejected": -112.54905700683594,
"loss": 1.0464,
"margin_dpo/margin_mean": 12.110448837280273,
"margin_dpo/margin_std": 18.811302185058594,
"step": 535
},
{
"epoch": 0.8163265306122449,
"fcm_dpo/beta": 0.07747219502925873,
"fcm_dpo/delta": -0.11059974133968353,
"fcm_dpo/margin": 14.1632661819458,
"fcm_dpo/q_t": 0.31101471185684204,
"grad_norm": 27.576501846313477,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 0.3622625470161438,
"logits/rejected": 0.2790268659591675,
"logps/chosen": -83.3790054321289,
"logps/ref_chosen": -61.27280807495117,
"logps/ref_rejected": -86.4178237915039,
"logps/rejected": -122.6872787475586,
"loss": 0.8863,
"margin_dpo/margin_mean": 14.1632661819458,
"margin_dpo/margin_std": 17.347553253173828,
"step": 540
},
{
"epoch": 0.8238851095993953,
"fcm_dpo/beta": 0.07073845714330673,
"fcm_dpo/delta": -0.046964578330516815,
"fcm_dpo/margin": 14.707046508789062,
"fcm_dpo/q_t": 0.3177848756313324,
"grad_norm": 25.9344539642334,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": 0.3474125266075134,
"logits/rejected": 0.2886578142642975,
"logps/chosen": -77.79493713378906,
"logps/ref_chosen": -57.53668975830078,
"logps/ref_rejected": -73.76582336425781,
"logps/rejected": -108.73112487792969,
"loss": 0.9144,
"margin_dpo/margin_mean": 14.707046508789062,
"margin_dpo/margin_std": 18.784482955932617,
"step": 545
},
{
"epoch": 0.8314436885865457,
"fcm_dpo/beta": 0.07421617209911346,
"fcm_dpo/delta": 0.14916327595710754,
"fcm_dpo/margin": 11.67965030670166,
"fcm_dpo/q_t": 0.34690457582473755,
"grad_norm": 27.697330474853516,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 0.3449970781803131,
"logits/rejected": 0.28996026515960693,
"logps/chosen": -81.71879577636719,
"logps/ref_chosen": -60.406890869140625,
"logps/ref_rejected": -76.10121154785156,
"logps/rejected": -109.0927734375,
"loss": 1.0218,
"margin_dpo/margin_mean": 11.679651260375977,
"margin_dpo/margin_std": 17.418115615844727,
"step": 550
},
{
"epoch": 0.8390022675736961,
"fcm_dpo/beta": 0.08086894452571869,
"fcm_dpo/delta": -0.021382993087172508,
"fcm_dpo/margin": 12.578062057495117,
"fcm_dpo/q_t": 0.3289267420768738,
"grad_norm": 31.26999855041504,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": 0.32917284965515137,
"logits/rejected": 0.2717982232570648,
"logps/chosen": -88.04100799560547,
"logps/ref_chosen": -65.4435806274414,
"logps/ref_rejected": -80.65763092041016,
"logps/rejected": -115.8331298828125,
"loss": 0.9988,
"margin_dpo/margin_mean": 12.578062057495117,
"margin_dpo/margin_std": 18.475194931030273,
"step": 555
},
{
"epoch": 0.8465608465608465,
"fcm_dpo/beta": 0.07681386172771454,
"fcm_dpo/delta": -0.0602848045527935,
"fcm_dpo/margin": 13.695526123046875,
"fcm_dpo/q_t": 0.33021193742752075,
"grad_norm": 26.484222412109375,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 0.3839051425457001,
"logits/rejected": 0.3160412907600403,
"logps/chosen": -82.10123443603516,
"logps/ref_chosen": -59.31481170654297,
"logps/ref_rejected": -79.35322570800781,
"logps/rejected": -115.83515930175781,
"loss": 0.9693,
"margin_dpo/margin_mean": 13.695526123046875,
"margin_dpo/margin_std": 19.27083969116211,
"step": 560
},
{
"epoch": 0.854119425547997,
"fcm_dpo/beta": 0.07272513210773468,
"fcm_dpo/delta": -0.029256444424390793,
"fcm_dpo/margin": 14.087471008300781,
"fcm_dpo/q_t": 0.3242108225822449,
"grad_norm": 28.179359436035156,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": 0.36016514897346497,
"logits/rejected": 0.3109044134616852,
"logps/chosen": -82.92088317871094,
"logps/ref_chosen": -61.065895080566406,
"logps/ref_rejected": -79.14593505859375,
"logps/rejected": -115.0884017944336,
"loss": 0.9432,
"margin_dpo/margin_mean": 14.087471008300781,
"margin_dpo/margin_std": 18.77931022644043,
"step": 565
},
{
"epoch": 0.8616780045351474,
"fcm_dpo/beta": 0.07263718545436859,
"fcm_dpo/delta": -0.05456575006246567,
"fcm_dpo/margin": 14.389799118041992,
"fcm_dpo/q_t": 0.3169875741004944,
"grad_norm": 21.003314971923828,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 0.33407479524612427,
"logits/rejected": 0.26317259669303894,
"logps/chosen": -80.6412124633789,
"logps/ref_chosen": -58.91632843017578,
"logps/ref_rejected": -78.48197937011719,
"logps/rejected": -114.59666442871094,
"loss": 0.9191,
"margin_dpo/margin_mean": 14.389799118041992,
"margin_dpo/margin_std": 17.922327041625977,
"step": 570
},
{
"epoch": 0.8692365835222978,
"fcm_dpo/beta": 0.07354731857776642,
"fcm_dpo/delta": 0.09992051124572754,
"fcm_dpo/margin": 12.301939010620117,
"fcm_dpo/q_t": 0.3429938554763794,
"grad_norm": 29.671510696411133,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": 0.3663448691368103,
"logits/rejected": 0.3218410015106201,
"logps/chosen": -87.20105743408203,
"logps/ref_chosen": -64.36775970458984,
"logps/ref_rejected": -80.37776184082031,
"logps/rejected": -115.51298522949219,
"loss": 1.016,
"margin_dpo/margin_mean": 12.301939010620117,
"margin_dpo/margin_std": 18.205175399780273,
"step": 575
},
{
"epoch": 0.8767951625094482,
"fcm_dpo/beta": 0.07715228199958801,
"fcm_dpo/delta": 0.006949651055037975,
"fcm_dpo/margin": 12.843107223510742,
"fcm_dpo/q_t": 0.33547455072402954,
"grad_norm": 29.658864974975586,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 0.3541300892829895,
"logits/rejected": 0.2988041043281555,
"logps/chosen": -79.77593994140625,
"logps/ref_chosen": -58.415260314941406,
"logps/ref_rejected": -74.52140045166016,
"logps/rejected": -108.72517395019531,
"loss": 0.9975,
"margin_dpo/margin_mean": 12.843107223510742,
"margin_dpo/margin_std": 18.566728591918945,
"step": 580
},
{
"epoch": 0.8843537414965986,
"fcm_dpo/beta": 0.07894281297922134,
"fcm_dpo/delta": 0.0601823627948761,
"fcm_dpo/margin": 11.971592903137207,
"fcm_dpo/q_t": 0.33943796157836914,
"grad_norm": 27.738752365112305,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": 0.39197593927383423,
"logits/rejected": 0.3263949751853943,
"logps/chosen": -78.97578430175781,
"logps/ref_chosen": -56.64149856567383,
"logps/ref_rejected": -77.79124450683594,
"logps/rejected": -112.09712219238281,
"loss": 1.0413,
"margin_dpo/margin_mean": 11.971592903137207,
"margin_dpo/margin_std": 18.696794509887695,
"step": 585
},
{
"epoch": 0.891912320483749,
"fcm_dpo/beta": 0.07663112133741379,
"fcm_dpo/delta": -0.1261170357465744,
"fcm_dpo/margin": 14.529006958007812,
"fcm_dpo/q_t": 0.3086654543876648,
"grad_norm": 25.352630615234375,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 0.35477882623672485,
"logits/rejected": 0.31469103693962097,
"logps/chosen": -82.77430725097656,
"logps/ref_chosen": -61.251670837402344,
"logps/ref_rejected": -75.03556823730469,
"logps/rejected": -111.08720397949219,
"loss": 0.9034,
"margin_dpo/margin_mean": 14.529006958007812,
"margin_dpo/margin_std": 18.56852149963379,
"step": 590
},
{
"epoch": 0.8994708994708994,
"fcm_dpo/beta": 0.07121709734201431,
"fcm_dpo/delta": -0.01630423031747341,
"fcm_dpo/margin": 14.214367866516113,
"fcm_dpo/q_t": 0.32772403955459595,
"grad_norm": 24.538618087768555,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": 0.36895376443862915,
"logits/rejected": 0.2859138250350952,
"logps/chosen": -75.7615737915039,
"logps/ref_chosen": -55.449249267578125,
"logps/ref_rejected": -78.81550598144531,
"logps/rejected": -113.34220123291016,
"loss": 0.9717,
"margin_dpo/margin_mean": 14.214367866516113,
"margin_dpo/margin_std": 19.63026237487793,
"step": 595
},
{
"epoch": 0.9070294784580499,
"fcm_dpo/beta": 0.07918272912502289,
"fcm_dpo/delta": 0.15003207325935364,
"fcm_dpo/margin": 10.823195457458496,
"fcm_dpo/q_t": 0.3627161383628845,
"grad_norm": 29.42555046081543,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 0.36016735434532166,
"logits/rejected": 0.3182498812675476,
"logps/chosen": -81.17481231689453,
"logps/ref_chosen": -58.89445877075195,
"logps/ref_rejected": -71.14781951904297,
"logps/rejected": -104.25135803222656,
"loss": 1.0944,
"margin_dpo/margin_mean": 10.82319450378418,
"margin_dpo/margin_std": 18.560047149658203,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_fcm_dpo/beta": 0.08356332033872604,
"eval_fcm_dpo/delta": -0.005035701673477888,
"eval_fcm_dpo/margin": 11.875618934631348,
"eval_fcm_dpo/q_t": 0.3380221426486969,
"eval_logits/chosen": 0.3597419559955597,
"eval_logits/rejected": 0.304570734500885,
"eval_logps/chosen": -96.24739074707031,
"eval_logps/ref_chosen": -75.86933135986328,
"eval_logps/ref_rejected": -80.85771942138672,
"eval_logps/rejected": -113.11141204833984,
"eval_loss": 0.5214306712150574,
"eval_margin_dpo/margin_mean": 11.875618934631348,
"eval_margin_dpo/margin_std": 18.387540817260742,
"eval_runtime": 38.6993,
"eval_samples_per_second": 59.51,
"eval_steps_per_second": 1.86,
"step": 600
},
{
"epoch": 0.9145880574452003,
"fcm_dpo/beta": 0.07584916055202484,
"fcm_dpo/delta": -0.017186608165502548,
"fcm_dpo/margin": 13.365765571594238,
"fcm_dpo/q_t": 0.32789379358291626,
"grad_norm": 28.76905059814453,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": 0.35118022561073303,
"logits/rejected": 0.299066424369812,
"logps/chosen": -79.40129089355469,
"logps/ref_chosen": -60.206268310546875,
"logps/ref_rejected": -76.11177825927734,
"logps/rejected": -108.6725845336914,
"loss": 0.963,
"margin_dpo/margin_mean": 13.365765571594238,
"margin_dpo/margin_std": 18.610855102539062,
"step": 605
},
{
"epoch": 0.9221466364323507,
"fcm_dpo/beta": 0.07705016434192657,
"fcm_dpo/delta": -0.0388740599155426,
"fcm_dpo/margin": 13.390531539916992,
"fcm_dpo/q_t": 0.3253692388534546,
"grad_norm": 31.618640899658203,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 0.3527846336364746,
"logits/rejected": 0.30443698167800903,
"logps/chosen": -82.28140258789062,
"logps/ref_chosen": -61.04254150390625,
"logps/ref_rejected": -82.46031188964844,
"logps/rejected": -117.0897216796875,
"loss": 0.9402,
"margin_dpo/margin_mean": 13.390533447265625,
"margin_dpo/margin_std": 17.899368286132812,
"step": 610
},
{
"epoch": 0.9297052154195011,
"fcm_dpo/beta": 0.0739460289478302,
"fcm_dpo/delta": -0.05259857699275017,
"fcm_dpo/margin": 14.14459228515625,
"fcm_dpo/q_t": 0.317154198884964,
"grad_norm": 28.697656631469727,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": 0.33570918440818787,
"logits/rejected": 0.2927141785621643,
"logps/chosen": -81.54222106933594,
"logps/ref_chosen": -60.49250411987305,
"logps/ref_rejected": -81.13261413574219,
"logps/rejected": -116.32688903808594,
"loss": 0.927,
"margin_dpo/margin_mean": 14.14459228515625,
"margin_dpo/margin_std": 18.297395706176758,
"step": 615
},
{
"epoch": 0.9372637944066515,
"fcm_dpo/beta": 0.07480698078870773,
"fcm_dpo/delta": 0.08452598005533218,
"fcm_dpo/margin": 12.292991638183594,
"fcm_dpo/q_t": 0.3419121503829956,
"grad_norm": 33.187259674072266,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 0.37463945150375366,
"logits/rejected": 0.3073200583457947,
"logps/chosen": -79.49095153808594,
"logps/ref_chosen": -58.75004959106445,
"logps/ref_rejected": -79.14283752441406,
"logps/rejected": -112.1767349243164,
"loss": 0.9754,
"margin_dpo/margin_mean": 12.292991638183594,
"margin_dpo/margin_std": 17.254846572875977,
"step": 620
},
{
"epoch": 0.9448223733938019,
"fcm_dpo/beta": 0.07812217622995377,
"fcm_dpo/delta": 0.025508109480142593,
"fcm_dpo/margin": 12.489466667175293,
"fcm_dpo/q_t": 0.33177176117897034,
"grad_norm": 28.660175323486328,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": 0.3732627034187317,
"logits/rejected": 0.28870144486427307,
"logps/chosen": -79.76417541503906,
"logps/ref_chosen": -57.77447509765625,
"logps/ref_rejected": -83.365966796875,
"logps/rejected": -117.8451156616211,
"loss": 0.9557,
"margin_dpo/margin_mean": 12.489466667175293,
"margin_dpo/margin_std": 16.900440216064453,
"step": 625
},
{
"epoch": 0.9523809523809523,
"fcm_dpo/beta": 0.07247981429100037,
"fcm_dpo/delta": -0.11990991979837418,
"fcm_dpo/margin": 15.216099739074707,
"fcm_dpo/q_t": 0.3133440315723419,
"grad_norm": 21.737163543701172,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 0.36857935786247253,
"logits/rejected": 0.2945020794868469,
"logps/chosen": -78.60719299316406,
"logps/ref_chosen": -58.47271728515625,
"logps/ref_rejected": -84.48008728027344,
"logps/rejected": -119.83065032958984,
"loss": 0.9124,
"margin_dpo/margin_mean": 15.216100692749023,
"margin_dpo/margin_std": 19.21467399597168,
"step": 630
},
{
"epoch": 0.9599395313681028,
"fcm_dpo/beta": 0.07038284093141556,
"fcm_dpo/delta": 0.0925895944237709,
"fcm_dpo/margin": 13.050129890441895,
"fcm_dpo/q_t": 0.34775209426879883,
"grad_norm": 32.16118621826172,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": 0.3829967677593231,
"logits/rejected": 0.3268979787826538,
"logps/chosen": -80.91036987304688,
"logps/ref_chosen": -60.0723991394043,
"logps/ref_rejected": -75.8419189453125,
"logps/rejected": -109.73001861572266,
"loss": 1.0213,
"margin_dpo/margin_mean": 13.050129890441895,
"margin_dpo/margin_std": 19.307937622070312,
"step": 635
},
{
"epoch": 0.9674981103552532,
"fcm_dpo/beta": 0.07497520744800568,
"fcm_dpo/delta": -0.0180673748254776,
"fcm_dpo/margin": 13.5342435836792,
"fcm_dpo/q_t": 0.327609658241272,
"grad_norm": 28.1774959564209,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 0.3562234938144684,
"logits/rejected": 0.27130261063575745,
"logps/chosen": -82.12296295166016,
"logps/ref_chosen": -59.24292755126953,
"logps/ref_rejected": -81.03025817871094,
"logps/rejected": -117.44453430175781,
"loss": 0.9614,
"margin_dpo/margin_mean": 13.5342435836792,
"margin_dpo/margin_std": 19.009403228759766,
"step": 640
},
{
"epoch": 0.9750566893424036,
"fcm_dpo/beta": 0.07942639291286469,
"fcm_dpo/delta": 0.06387078016996384,
"fcm_dpo/margin": 11.842493057250977,
"fcm_dpo/q_t": 0.3415600657463074,
"grad_norm": 27.630109786987305,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": 0.33102938532829285,
"logits/rejected": 0.293028324842453,
"logps/chosen": -86.07749938964844,
"logps/ref_chosen": -63.97548294067383,
"logps/ref_rejected": -74.65735626220703,
"logps/rejected": -108.60185241699219,
"loss": 1.0304,
"margin_dpo/margin_mean": 11.842493057250977,
"margin_dpo/margin_std": 18.239856719970703,
"step": 645
},
{
"epoch": 0.982615268329554,
"fcm_dpo/beta": 0.07533489167690277,
"fcm_dpo/delta": -0.09645902365446091,
"fcm_dpo/margin": 14.389871597290039,
"fcm_dpo/q_t": 0.3166733682155609,
"grad_norm": 29.344585418701172,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 0.38797593116760254,
"logits/rejected": 0.3146594166755676,
"logps/chosen": -82.97822570800781,
"logps/ref_chosen": -60.51557159423828,
"logps/ref_rejected": -85.11001586914062,
"logps/rejected": -121.96253967285156,
"loss": 0.9395,
"margin_dpo/margin_mean": 14.389869689941406,
"margin_dpo/margin_std": 19.199068069458008,
"step": 650
},
{
"epoch": 0.9901738473167044,
"fcm_dpo/beta": 0.07521242648363113,
"fcm_dpo/delta": 0.03413959592580795,
"fcm_dpo/margin": 12.832531929016113,
"fcm_dpo/q_t": 0.3351586163043976,
"grad_norm": 22.92190170288086,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": 0.37830454111099243,
"logits/rejected": 0.2979954779148102,
"logps/chosen": -80.21737670898438,
"logps/ref_chosen": -59.14573287963867,
"logps/ref_rejected": -80.98335266113281,
"logps/rejected": -114.88752746582031,
"loss": 0.9554,
"margin_dpo/margin_mean": 12.83253002166748,
"margin_dpo/margin_std": 17.429914474487305,
"step": 655
},
{
"epoch": 0.9977324263038548,
"fcm_dpo/beta": 0.07314083725214005,
"fcm_dpo/delta": -0.027313020080327988,
"fcm_dpo/margin": 13.969868659973145,
"fcm_dpo/q_t": 0.3238561749458313,
"grad_norm": 25.15906524658203,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 0.43907564878463745,
"logits/rejected": 0.36568042635917664,
"logps/chosen": -82.41399383544922,
"logps/ref_chosen": -60.18262481689453,
"logps/ref_rejected": -80.55596160888672,
"logps/rejected": -116.7572250366211,
"loss": 0.9519,
"margin_dpo/margin_mean": 13.969868659973145,
"margin_dpo/margin_std": 18.85131072998047,
"step": 660
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.0854419020769637,
"train_runtime": 1766.4629,
"train_samples_per_second": 23.967,
"train_steps_per_second": 0.374
}
],
"logging_steps": 5,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}