Model: jackf857/llama-3-8b-base-new-dpo-harmless-4xh200-s_star1.0 Source: Original Platform
2631 lines
96 KiB
JSON
2631 lines
96 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 200,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0036174654960632324,
|
|
"fcm_dpo/q_t": 0.5000090599060059,
|
|
"grad_norm": 2.850151538848877,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 0.12559199333190918,
|
|
"logits/rejected": 0.11513248085975647,
|
|
"logps/chosen": -65.17359924316406,
|
|
"logps/ref_chosen": -65.2028579711914,
|
|
"logps/ref_rejected": -64.80973052978516,
|
|
"logps/rejected": -64.77685546875,
|
|
"loss": 1.3863,
|
|
"margin_dpo/margin_mean": -0.0036170482635498047,
|
|
"margin_dpo/margin_std": 0.2552323341369629,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.015368208289146423,
|
|
"fcm_dpo/q_t": 0.4999615550041199,
|
|
"grad_norm": 2.9636518955230713,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 0.08381284773349762,
|
|
"logits/rejected": 0.056591667234897614,
|
|
"logps/chosen": -65.78416442871094,
|
|
"logps/ref_chosen": -65.79120635986328,
|
|
"logps/ref_rejected": -79.74447631835938,
|
|
"logps/rejected": -79.75279998779297,
|
|
"loss": 1.3861,
|
|
"margin_dpo/margin_mean": 0.015368461608886719,
|
|
"margin_dpo/margin_std": 0.30196240544319153,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.017599213868379593,
|
|
"fcm_dpo/q_t": 0.500044047832489,
|
|
"grad_norm": 2.9695703983306885,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 0.10358067601919174,
|
|
"logits/rejected": 0.06364428251981735,
|
|
"logps/chosen": -57.39263153076172,
|
|
"logps/ref_chosen": -57.38689041137695,
|
|
"logps/ref_rejected": -80.92173767089844,
|
|
"logps/rejected": -80.90988159179688,
|
|
"loss": 1.3865,
|
|
"margin_dpo/margin_mean": -0.01759929582476616,
|
|
"margin_dpo/margin_std": 0.3119713366031647,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.009314382448792458,
|
|
"fcm_dpo/q_t": 0.4999767243862152,
|
|
"grad_norm": 3.320962905883789,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 0.07827206701040268,
|
|
"logits/rejected": 0.04801332950592041,
|
|
"logps/chosen": -61.75555419921875,
|
|
"logps/ref_chosen": -61.75988006591797,
|
|
"logps/ref_rejected": -80.3942642211914,
|
|
"logps/rejected": -80.39925384521484,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.00931442342698574,
|
|
"margin_dpo/margin_std": 0.3290034830570221,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.007567483000457287,
|
|
"fcm_dpo/q_t": 0.4999810755252838,
|
|
"grad_norm": 2.972181797027588,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 0.08886369317770004,
|
|
"logits/rejected": 0.06055384874343872,
|
|
"logps/chosen": -56.6132698059082,
|
|
"logps/ref_chosen": -56.6275749206543,
|
|
"logps/ref_rejected": -78.54231262207031,
|
|
"logps/rejected": -78.53557586669922,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.007567489054054022,
|
|
"margin_dpo/margin_std": 0.30747541785240173,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.021364277228713036,
|
|
"fcm_dpo/q_t": 0.49994659423828125,
|
|
"grad_norm": 2.9778642654418945,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 0.10495474189519882,
|
|
"logits/rejected": 0.07259530574083328,
|
|
"logps/chosen": -61.929527282714844,
|
|
"logps/ref_chosen": -61.922279357910156,
|
|
"logps/ref_rejected": -83.95155334472656,
|
|
"logps/rejected": -83.98015594482422,
|
|
"loss": 1.3861,
|
|
"margin_dpo/margin_mean": 0.02136421762406826,
|
|
"margin_dpo/margin_std": 0.2991010844707489,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.004342102911323309,
|
|
"fcm_dpo/q_t": 0.4999891221523285,
|
|
"grad_norm": 3.0755813121795654,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 0.09887897223234177,
|
|
"logits/rejected": 0.07384434342384338,
|
|
"logps/chosen": -61.931427001953125,
|
|
"logps/ref_chosen": -61.90684127807617,
|
|
"logps/ref_rejected": -79.56486511230469,
|
|
"logps/rejected": -79.59378814697266,
|
|
"loss": 1.3863,
|
|
"margin_dpo/margin_mean": 0.004342180676758289,
|
|
"margin_dpo/margin_std": 0.31154924631118774,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04645770788192749,
|
|
"fcm_dpo/q_t": 0.49988383054733276,
|
|
"grad_norm": 2.7926185131073,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 0.056805629283189774,
|
|
"logits/rejected": 0.03099716268479824,
|
|
"logps/chosen": -64.05213165283203,
|
|
"logps/ref_chosen": -64.01432800292969,
|
|
"logps/ref_rejected": -81.33033752441406,
|
|
"logps/rejected": -81.41459655761719,
|
|
"loss": 1.3858,
|
|
"margin_dpo/margin_mean": 0.04645807296037674,
|
|
"margin_dpo/margin_std": 0.3312261402606964,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.012222861871123314,
|
|
"fcm_dpo/q_t": 0.4999694228172302,
|
|
"grad_norm": 3.1424221992492676,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 0.10701529681682587,
|
|
"logits/rejected": 0.061525583267211914,
|
|
"logps/chosen": -60.28235626220703,
|
|
"logps/ref_chosen": -60.1998176574707,
|
|
"logps/ref_rejected": -85.63372039794922,
|
|
"logps/rejected": -85.72847747802734,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.012222832068800926,
|
|
"margin_dpo/margin_std": 0.3903924524784088,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.049396924674510956,
|
|
"fcm_dpo/q_t": 0.4998764991760254,
|
|
"grad_norm": 3.186418056488037,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 0.06446581333875656,
|
|
"logits/rejected": 0.03697461634874344,
|
|
"logps/chosen": -66.853515625,
|
|
"logps/ref_chosen": -66.71932220458984,
|
|
"logps/ref_rejected": -84.73368835449219,
|
|
"logps/rejected": -84.91728210449219,
|
|
"loss": 1.3858,
|
|
"margin_dpo/margin_mean": 0.049397267401218414,
|
|
"margin_dpo/margin_std": 0.4028749465942383,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.11318810284137726,
|
|
"fcm_dpo/q_t": 0.4997170567512512,
|
|
"grad_norm": 2.841273069381714,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 0.07657527923583984,
|
|
"logits/rejected": 0.04275421425700188,
|
|
"logps/chosen": -56.79387283325195,
|
|
"logps/ref_chosen": -56.59545135498047,
|
|
"logps/ref_rejected": -71.17185974121094,
|
|
"logps/rejected": -71.48346710205078,
|
|
"loss": 1.3852,
|
|
"margin_dpo/margin_mean": 0.11318818479776382,
|
|
"margin_dpo/margin_std": 0.4424575865268707,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.19668380916118622,
|
|
"fcm_dpo/q_t": 0.4995082914829254,
|
|
"grad_norm": 3.2169511318206787,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 0.06426317989826202,
|
|
"logits/rejected": 0.026701394468545914,
|
|
"logps/chosen": -58.7703971862793,
|
|
"logps/ref_chosen": -58.43064498901367,
|
|
"logps/ref_rejected": -81.11677551269531,
|
|
"logps/rejected": -81.6532211303711,
|
|
"loss": 1.3843,
|
|
"margin_dpo/margin_mean": 0.19668370485305786,
|
|
"margin_dpo/margin_std": 0.5449806451797485,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.19983884692192078,
|
|
"fcm_dpo/q_t": 0.49950042366981506,
|
|
"grad_norm": 2.920549154281616,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 0.10816339403390884,
|
|
"logits/rejected": 0.07374849915504456,
|
|
"logps/chosen": -61.71905517578125,
|
|
"logps/ref_chosen": -61.1767463684082,
|
|
"logps/ref_rejected": -75.71009063720703,
|
|
"logps/rejected": -76.45222473144531,
|
|
"loss": 1.3843,
|
|
"margin_dpo/margin_mean": 0.19983868300914764,
|
|
"margin_dpo/margin_std": 0.7893710136413574,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.29686489701271057,
|
|
"fcm_dpo/q_t": 0.4992578625679016,
|
|
"grad_norm": 3.1904947757720947,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 0.1440560221672058,
|
|
"logits/rejected": 0.11248280853033066,
|
|
"logps/chosen": -61.19800567626953,
|
|
"logps/ref_chosen": -60.42144012451172,
|
|
"logps/ref_rejected": -77.3677749633789,
|
|
"logps/rejected": -78.44120025634766,
|
|
"loss": 1.3834,
|
|
"margin_dpo/margin_mean": 0.29686498641967773,
|
|
"margin_dpo/margin_std": 1.026890754699707,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3897508978843689,
|
|
"fcm_dpo/q_t": 0.4990256726741791,
|
|
"grad_norm": 2.9328501224517822,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 0.10854315757751465,
|
|
"logits/rejected": 0.07236559689044952,
|
|
"logps/chosen": -69.1954574584961,
|
|
"logps/ref_chosen": -68.04537200927734,
|
|
"logps/ref_rejected": -83.14714050292969,
|
|
"logps/rejected": -84.68696594238281,
|
|
"loss": 1.3824,
|
|
"margin_dpo/margin_mean": 0.3897508978843689,
|
|
"margin_dpo/margin_std": 1.3252379894256592,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5443285703659058,
|
|
"fcm_dpo/q_t": 0.4986393451690674,
|
|
"grad_norm": 2.8421285152435303,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 0.11565772444009781,
|
|
"logits/rejected": 0.07986008375883102,
|
|
"logps/chosen": -58.929412841796875,
|
|
"logps/ref_chosen": -57.3649787902832,
|
|
"logps/ref_rejected": -73.14057159423828,
|
|
"logps/rejected": -75.24932861328125,
|
|
"loss": 1.3809,
|
|
"margin_dpo/margin_mean": 0.5443285703659058,
|
|
"margin_dpo/margin_std": 1.6223704814910889,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8009698987007141,
|
|
"fcm_dpo/q_t": 0.4979979991912842,
|
|
"grad_norm": 2.954160690307617,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 0.15183034539222717,
|
|
"logits/rejected": 0.10617075115442276,
|
|
"logps/chosen": -60.83113479614258,
|
|
"logps/ref_chosen": -58.77534103393555,
|
|
"logps/ref_rejected": -79.07672119140625,
|
|
"logps/rejected": -81.93347930908203,
|
|
"loss": 1.3784,
|
|
"margin_dpo/margin_mean": 0.8009698987007141,
|
|
"margin_dpo/margin_std": 2.1611573696136475,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.2792611122131348,
|
|
"fcm_dpo/q_t": 0.49680256843566895,
|
|
"grad_norm": 3.1464085578918457,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 0.15984781086444855,
|
|
"logits/rejected": 0.11359409987926483,
|
|
"logps/chosen": -60.38011932373047,
|
|
"logps/ref_chosen": -57.70839309692383,
|
|
"logps/ref_rejected": -76.26394653320312,
|
|
"logps/rejected": -80.21492767333984,
|
|
"loss": 1.3737,
|
|
"margin_dpo/margin_mean": 1.2792608737945557,
|
|
"margin_dpo/margin_std": 2.5403237342834473,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5232597589492798,
|
|
"fcm_dpo/q_t": 0.4961939752101898,
|
|
"grad_norm": 3.333669662475586,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 0.2166169136762619,
|
|
"logits/rejected": 0.1685468852519989,
|
|
"logps/chosen": -62.62725067138672,
|
|
"logps/ref_chosen": -58.71812057495117,
|
|
"logps/ref_rejected": -82.2930908203125,
|
|
"logps/rejected": -87.7254867553711,
|
|
"loss": 1.3715,
|
|
"margin_dpo/margin_mean": 1.5232598781585693,
|
|
"margin_dpo/margin_std": 3.794527053833008,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.0687878131866455,
|
|
"fcm_dpo/q_t": 0.49483543634414673,
|
|
"grad_norm": 3.5051140785217285,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 0.22696343064308167,
|
|
"logits/rejected": 0.1834408938884735,
|
|
"logps/chosen": -59.80878829956055,
|
|
"logps/ref_chosen": -54.887908935546875,
|
|
"logps/ref_rejected": -76.79985046386719,
|
|
"logps/rejected": -83.78950500488281,
|
|
"loss": 1.3665,
|
|
"margin_dpo/margin_mean": 2.0687873363494873,
|
|
"margin_dpo/margin_std": 5.531675338745117,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.6540107727050781,
|
|
"fcm_dpo/q_t": 0.49587664008140564,
|
|
"grad_norm": 3.7864036560058594,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 0.2681678235530853,
|
|
"logits/rejected": 0.23597940802574158,
|
|
"logps/chosen": -72.77100372314453,
|
|
"logps/ref_chosen": -65.1898422241211,
|
|
"logps/ref_rejected": -83.39742279052734,
|
|
"logps/rejected": -92.63258361816406,
|
|
"loss": 1.3719,
|
|
"margin_dpo/margin_mean": 1.6540113687515259,
|
|
"margin_dpo/margin_std": 8.77057933807373,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.923435688018799,
|
|
"fcm_dpo/q_t": 0.49274763464927673,
|
|
"grad_norm": 4.0385565757751465,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 0.3018716275691986,
|
|
"logits/rejected": 0.25165122747421265,
|
|
"logps/chosen": -72.08647155761719,
|
|
"logps/ref_chosen": -63.611778259277344,
|
|
"logps/ref_rejected": -81.8642578125,
|
|
"logps/rejected": -93.26237487792969,
|
|
"loss": 1.3604,
|
|
"margin_dpo/margin_mean": 2.9234354496002197,
|
|
"margin_dpo/margin_std": 10.657812118530273,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.3822619915008545,
|
|
"fcm_dpo/q_t": 0.4940711557865143,
|
|
"grad_norm": 4.880163669586182,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 0.27982911467552185,
|
|
"logits/rejected": 0.25695186853408813,
|
|
"logps/chosen": -82.95537567138672,
|
|
"logps/ref_chosen": -70.61798858642578,
|
|
"logps/ref_rejected": -80.55892181396484,
|
|
"logps/rejected": -95.27857971191406,
|
|
"loss": 1.3683,
|
|
"margin_dpo/margin_mean": 2.3822619915008545,
|
|
"margin_dpo/margin_std": 14.694234848022461,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.24505615234375,
|
|
"fcm_dpo/q_t": 0.4895564913749695,
|
|
"grad_norm": 3.4979965686798096,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 0.3670777380466461,
|
|
"logits/rejected": 0.31523239612579346,
|
|
"logps/chosen": -73.29847717285156,
|
|
"logps/ref_chosen": -60.36003494262695,
|
|
"logps/ref_rejected": -83.49537658691406,
|
|
"logps/rejected": -100.67887878417969,
|
|
"loss": 1.3508,
|
|
"margin_dpo/margin_mean": 4.24505615234375,
|
|
"margin_dpo/margin_std": 15.931065559387207,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 5.727739334106445,
|
|
"fcm_dpo/q_t": 0.48592695593833923,
|
|
"grad_norm": 4.6423659324646,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 0.40479379892349243,
|
|
"logits/rejected": 0.3574323058128357,
|
|
"logps/chosen": -72.22782897949219,
|
|
"logps/ref_chosen": -57.185150146484375,
|
|
"logps/ref_rejected": -76.90118408203125,
|
|
"logps/rejected": -97.67161560058594,
|
|
"loss": 1.3385,
|
|
"margin_dpo/margin_mean": 5.727739334106445,
|
|
"margin_dpo/margin_std": 17.80091667175293,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.763603210449219,
|
|
"fcm_dpo/q_t": 0.47878074645996094,
|
|
"grad_norm": 4.912358283996582,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 0.46570539474487305,
|
|
"logits/rejected": 0.412194162607193,
|
|
"logps/chosen": -80.9414291381836,
|
|
"logps/ref_chosen": -60.63164138793945,
|
|
"logps/ref_rejected": -87.692138671875,
|
|
"logps/rejected": -116.76551818847656,
|
|
"loss": 1.3224,
|
|
"margin_dpo/margin_mean": 8.763603210449219,
|
|
"margin_dpo/margin_std": 29.222675323486328,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 7.171716213226318,
|
|
"fcm_dpo/q_t": 0.48291224241256714,
|
|
"grad_norm": 5.604545593261719,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 0.42708373069763184,
|
|
"logits/rejected": 0.3993222713470459,
|
|
"logps/chosen": -91.39217376708984,
|
|
"logps/ref_chosen": -65.96144104003906,
|
|
"logps/ref_rejected": -77.07868194580078,
|
|
"logps/rejected": -109.6811294555664,
|
|
"loss": 1.3402,
|
|
"margin_dpo/margin_mean": 7.171716213226318,
|
|
"margin_dpo/margin_std": 29.880590438842773,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 0.010768004693090916,
|
|
"fcm_dpo/delta": 0.07399419695138931,
|
|
"fcm_dpo/margin": 10.805874824523926,
|
|
"fcm_dpo/q_t": 0.47332343459129333,
|
|
"grad_norm": 4.438642978668213,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 0.5009486079216003,
|
|
"logits/rejected": 0.4642602801322937,
|
|
"logps/chosen": -85.66596221923828,
|
|
"logps/ref_chosen": -58.002349853515625,
|
|
"logps/ref_rejected": -74.80711364746094,
|
|
"logps/rejected": -113.27659606933594,
|
|
"loss": 1.3098,
|
|
"margin_dpo/margin_mean": 10.80587387084961,
|
|
"margin_dpo/margin_std": 34.74369430541992,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 0.011533305048942566,
|
|
"fcm_dpo/delta": 0.06865964084863663,
|
|
"fcm_dpo/margin": 11.856359481811523,
|
|
"fcm_dpo/q_t": 0.4694371223449707,
|
|
"grad_norm": 7.658777713775635,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 0.4951881468296051,
|
|
"logits/rejected": 0.4550108015537262,
|
|
"logps/chosen": -100.08610534667969,
|
|
"logps/ref_chosen": -64.05648803710938,
|
|
"logps/ref_rejected": -80.10523986816406,
|
|
"logps/rejected": -127.9912109375,
|
|
"loss": 1.3239,
|
|
"margin_dpo/margin_mean": 11.856356620788574,
|
|
"margin_dpo/margin_std": 46.47296142578125,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 0.01241993997246027,
|
|
"fcm_dpo/delta": 0.07406426966190338,
|
|
"fcm_dpo/margin": 8.729610443115234,
|
|
"fcm_dpo/q_t": 0.47626978158950806,
|
|
"grad_norm": 16.21937370300293,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 0.514064610004425,
|
|
"logits/rejected": 0.49313417077064514,
|
|
"logps/chosen": -103.54437255859375,
|
|
"logps/ref_chosen": -65.76856994628906,
|
|
"logps/ref_rejected": -81.22962951660156,
|
|
"logps/rejected": -127.73504638671875,
|
|
"loss": 1.3539,
|
|
"margin_dpo/margin_mean": 8.729610443115234,
|
|
"margin_dpo/margin_std": 44.03262710571289,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 0.012905704788863659,
|
|
"fcm_dpo/delta": 0.07531466335058212,
|
|
"fcm_dpo/margin": 10.41810417175293,
|
|
"fcm_dpo/q_t": 0.4676801264286041,
|
|
"grad_norm": 5.872846603393555,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 0.5009235143661499,
|
|
"logits/rejected": 0.4602130353450775,
|
|
"logps/chosen": -94.07633972167969,
|
|
"logps/ref_chosen": -60.346473693847656,
|
|
"logps/ref_rejected": -75.12642669677734,
|
|
"logps/rejected": -119.2743911743164,
|
|
"loss": 1.3045,
|
|
"margin_dpo/margin_mean": 10.418103218078613,
|
|
"margin_dpo/margin_std": 34.75267791748047,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 0.017845138907432556,
|
|
"fcm_dpo/delta": 0.4511590898036957,
|
|
"fcm_dpo/margin": 12.379961013793945,
|
|
"fcm_dpo/q_t": 0.45373255014419556,
|
|
"grad_norm": 13.452332496643066,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 0.5150389671325684,
|
|
"logits/rejected": 0.482626348733902,
|
|
"logps/chosen": -93.2920913696289,
|
|
"logps/ref_chosen": -61.901710510253906,
|
|
"logps/ref_rejected": -75.51579284667969,
|
|
"logps/rejected": -119.28614807128906,
|
|
"loss": 1.2681,
|
|
"margin_dpo/margin_mean": 12.379961013793945,
|
|
"margin_dpo/margin_std": 33.46470642089844,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 0.028629502281546593,
|
|
"fcm_dpo/delta": 0.5467379093170166,
|
|
"fcm_dpo/margin": 13.615964889526367,
|
|
"fcm_dpo/q_t": 0.4253949522972107,
|
|
"grad_norm": 14.785847663879395,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": 0.48933330178260803,
|
|
"logits/rejected": 0.4520339369773865,
|
|
"logps/chosen": -91.06707763671875,
|
|
"logps/ref_chosen": -59.82744598388672,
|
|
"logps/ref_rejected": -76.28009033203125,
|
|
"logps/rejected": -121.13565826416016,
|
|
"loss": 1.1965,
|
|
"margin_dpo/margin_mean": 13.615964889526367,
|
|
"margin_dpo/margin_std": 29.862279891967773,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 0.04379943758249283,
|
|
"fcm_dpo/delta": 0.27622583508491516,
|
|
"fcm_dpo/margin": 16.814184188842773,
|
|
"fcm_dpo/q_t": 0.36278295516967773,
|
|
"grad_norm": 19.85223960876465,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 0.5087782144546509,
|
|
"logits/rejected": 0.4584970474243164,
|
|
"logps/chosen": -86.49288940429688,
|
|
"logps/ref_chosen": -56.396278381347656,
|
|
"logps/ref_rejected": -77.31051635742188,
|
|
"logps/rejected": -124.2213134765625,
|
|
"loss": 1.0917,
|
|
"margin_dpo/margin_mean": 16.814186096191406,
|
|
"margin_dpo/margin_std": 28.670608520507812,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 0.053840864449739456,
|
|
"fcm_dpo/delta": 0.203588604927063,
|
|
"fcm_dpo/margin": 15.119009017944336,
|
|
"fcm_dpo/q_t": 0.36196133494377136,
|
|
"grad_norm": 30.58293342590332,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 0.46817341446876526,
|
|
"logits/rejected": 0.4275393486022949,
|
|
"logps/chosen": -88.77765655517578,
|
|
"logps/ref_chosen": -62.323707580566406,
|
|
"logps/ref_rejected": -78.42765808105469,
|
|
"logps/rejected": -120.00062561035156,
|
|
"loss": 1.1343,
|
|
"margin_dpo/margin_mean": 15.119009017944336,
|
|
"margin_dpo/margin_std": 27.05625343322754,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 0.05749092251062393,
|
|
"fcm_dpo/delta": -0.05166977643966675,
|
|
"fcm_dpo/margin": 18.14907455444336,
|
|
"fcm_dpo/q_t": 0.33645352721214294,
|
|
"grad_norm": 27.735427856445312,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 0.49779874086380005,
|
|
"logits/rejected": 0.4384356141090393,
|
|
"logps/chosen": -84.31999206542969,
|
|
"logps/ref_chosen": -60.14301681518555,
|
|
"logps/ref_rejected": -82.65170288085938,
|
|
"logps/rejected": -124.97774505615234,
|
|
"loss": 1.0789,
|
|
"margin_dpo/margin_mean": 18.14907455444336,
|
|
"margin_dpo/margin_std": 29.03778648376465,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 0.05699415132403374,
|
|
"fcm_dpo/delta": 0.09521742165088654,
|
|
"fcm_dpo/margin": 16.0145206451416,
|
|
"fcm_dpo/q_t": 0.34686630964279175,
|
|
"grad_norm": 28.401058197021484,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 0.4524223208427429,
|
|
"logits/rejected": 0.4112408757209778,
|
|
"logps/chosen": -86.755859375,
|
|
"logps/ref_chosen": -65.02766418457031,
|
|
"logps/ref_rejected": -80.62745666503906,
|
|
"logps/rejected": -118.37019348144531,
|
|
"loss": 1.0718,
|
|
"margin_dpo/margin_mean": 16.0145206451416,
|
|
"margin_dpo/margin_std": 25.418670654296875,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 0.06562753766775131,
|
|
"fcm_dpo/delta": 0.15315786004066467,
|
|
"fcm_dpo/margin": 13.123418807983398,
|
|
"fcm_dpo/q_t": 0.3597589433193207,
|
|
"grad_norm": 31.76349449157715,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 0.4652015268802643,
|
|
"logits/rejected": 0.4133850932121277,
|
|
"logps/chosen": -74.6918716430664,
|
|
"logps/ref_chosen": -57.59275436401367,
|
|
"logps/ref_rejected": -77.97161865234375,
|
|
"logps/rejected": -108.19415283203125,
|
|
"loss": 1.1242,
|
|
"margin_dpo/margin_mean": 13.123418807983398,
|
|
"margin_dpo/margin_std": 22.702762603759766,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 0.07316488027572632,
|
|
"fcm_dpo/delta": 0.03896424174308777,
|
|
"fcm_dpo/margin": 13.159120559692383,
|
|
"fcm_dpo/q_t": 0.3480309545993805,
|
|
"grad_norm": 35.800045013427734,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 0.4268653392791748,
|
|
"logits/rejected": 0.3715541362762451,
|
|
"logps/chosen": -83.3046875,
|
|
"logps/ref_chosen": -69.40254974365234,
|
|
"logps/ref_rejected": -87.62089538574219,
|
|
"logps/rejected": -114.6821517944336,
|
|
"loss": 1.0661,
|
|
"margin_dpo/margin_mean": 13.159120559692383,
|
|
"margin_dpo/margin_std": 21.489017486572266,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 0.08043137937784195,
|
|
"fcm_dpo/delta": 0.10039126873016357,
|
|
"fcm_dpo/margin": 11.223628997802734,
|
|
"fcm_dpo/q_t": 0.35878369212150574,
|
|
"grad_norm": 38.323482513427734,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 0.4170468747615814,
|
|
"logits/rejected": 0.39506852626800537,
|
|
"logps/chosen": -78.15182495117188,
|
|
"logps/ref_chosen": -63.38518524169922,
|
|
"logps/ref_rejected": -72.65580749511719,
|
|
"logps/rejected": -98.64608001708984,
|
|
"loss": 1.1468,
|
|
"margin_dpo/margin_mean": 11.223628044128418,
|
|
"margin_dpo/margin_std": 20.275413513183594,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 0.08364946395158768,
|
|
"fcm_dpo/delta": -0.03751251846551895,
|
|
"fcm_dpo/margin": 12.287755966186523,
|
|
"fcm_dpo/q_t": 0.33327925205230713,
|
|
"grad_norm": 29.11920738220215,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 0.44890865683555603,
|
|
"logits/rejected": 0.39030200242996216,
|
|
"logps/chosen": -70.07976531982422,
|
|
"logps/ref_chosen": -57.999359130859375,
|
|
"logps/ref_rejected": -79.5167007446289,
|
|
"logps/rejected": -103.8848648071289,
|
|
"loss": 1.0502,
|
|
"margin_dpo/margin_mean": 12.28775691986084,
|
|
"margin_dpo/margin_std": 18.79964828491211,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 0.09359671175479889,
|
|
"eval_fcm_dpo/delta": 0.018532773479819298,
|
|
"eval_fcm_dpo/margin": 10.367452621459961,
|
|
"eval_fcm_dpo/q_t": 0.35254982113838196,
|
|
"eval_logits/chosen": 0.4369470477104187,
|
|
"eval_logits/rejected": 0.38747942447662354,
|
|
"eval_logps/chosen": -87.90721893310547,
|
|
"eval_logps/ref_chosen": -75.86933135986328,
|
|
"eval_logps/ref_rejected": -80.85771942138672,
|
|
"eval_logps/rejected": -103.26305389404297,
|
|
"eval_loss": 0.571725070476532,
|
|
"eval_margin_dpo/margin_mean": 10.367453575134277,
|
|
"eval_margin_dpo/margin_std": 18.42043113708496,
|
|
"eval_runtime": 38.6614,
|
|
"eval_samples_per_second": 59.568,
|
|
"eval_steps_per_second": 1.862,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 0.09407475590705872,
|
|
"fcm_dpo/delta": 0.060309164226055145,
|
|
"fcm_dpo/margin": 9.996593475341797,
|
|
"fcm_dpo/q_t": 0.35484787821769714,
|
|
"grad_norm": 45.5085563659668,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 0.4219132363796234,
|
|
"logits/rejected": 0.35513609647750854,
|
|
"logps/chosen": -71.04405975341797,
|
|
"logps/ref_chosen": -58.64111328125,
|
|
"logps/ref_rejected": -84.33369445800781,
|
|
"logps/rejected": -106.73323822021484,
|
|
"loss": 1.1443,
|
|
"margin_dpo/margin_mean": 9.99659252166748,
|
|
"margin_dpo/margin_std": 18.014066696166992,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 0.09743638336658478,
|
|
"fcm_dpo/delta": -0.04126477986574173,
|
|
"fcm_dpo/margin": 10.603793144226074,
|
|
"fcm_dpo/q_t": 0.34100794792175293,
|
|
"grad_norm": 29.75200080871582,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 0.39647018909454346,
|
|
"logits/rejected": 0.3432803452014923,
|
|
"logps/chosen": -78.04428100585938,
|
|
"logps/ref_chosen": -65.22540283203125,
|
|
"logps/ref_rejected": -84.33940887451172,
|
|
"logps/rejected": -107.76206970214844,
|
|
"loss": 1.0715,
|
|
"margin_dpo/margin_mean": 10.60379409790039,
|
|
"margin_dpo/margin_std": 16.901790618896484,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.09210662543773651,
|
|
"fcm_dpo/delta": -0.050839781761169434,
|
|
"fcm_dpo/margin": 11.34312629699707,
|
|
"fcm_dpo/q_t": 0.3336792588233948,
|
|
"grad_norm": 30.288963317871094,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 0.37580037117004395,
|
|
"logits/rejected": 0.31153884530067444,
|
|
"logps/chosen": -72.8878173828125,
|
|
"logps/ref_chosen": -61.34074020385742,
|
|
"logps/ref_rejected": -85.00725555419922,
|
|
"logps/rejected": -107.89747619628906,
|
|
"loss": 1.0263,
|
|
"margin_dpo/margin_mean": 11.343125343322754,
|
|
"margin_dpo/margin_std": 17.116981506347656,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.08995531499385834,
|
|
"fcm_dpo/delta": 0.10806653648614883,
|
|
"fcm_dpo/margin": 10.049107551574707,
|
|
"fcm_dpo/q_t": 0.34747129678726196,
|
|
"grad_norm": 29.705045700073242,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 0.3524443507194519,
|
|
"logits/rejected": 0.2914368212223053,
|
|
"logps/chosen": -74.65157318115234,
|
|
"logps/ref_chosen": -62.409584045410156,
|
|
"logps/ref_rejected": -81.9083023071289,
|
|
"logps/rejected": -104.19940185546875,
|
|
"loss": 1.0618,
|
|
"margin_dpo/margin_mean": 10.049107551574707,
|
|
"margin_dpo/margin_std": 15.988082885742188,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.0927683562040329,
|
|
"fcm_dpo/delta": -0.03574846684932709,
|
|
"fcm_dpo/margin": 11.09398078918457,
|
|
"fcm_dpo/q_t": 0.3298317790031433,
|
|
"grad_norm": 42.090301513671875,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 0.41360267996788025,
|
|
"logits/rejected": 0.34270113706588745,
|
|
"logps/chosen": -75.1723403930664,
|
|
"logps/ref_chosen": -63.19435501098633,
|
|
"logps/ref_rejected": -94.3624038696289,
|
|
"logps/rejected": -117.43436431884766,
|
|
"loss": 0.9653,
|
|
"margin_dpo/margin_mean": 11.09398078918457,
|
|
"margin_dpo/margin_std": 15.557014465332031,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.0995025485754013,
|
|
"fcm_dpo/delta": 0.0801922157406807,
|
|
"fcm_dpo/margin": 9.299490928649902,
|
|
"fcm_dpo/q_t": 0.3476495146751404,
|
|
"grad_norm": 41.26875305175781,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 0.4226433336734772,
|
|
"logits/rejected": 0.37348127365112305,
|
|
"logps/chosen": -67.02788543701172,
|
|
"logps/ref_chosen": -55.014076232910156,
|
|
"logps/ref_rejected": -72.50662994384766,
|
|
"logps/rejected": -93.8199234008789,
|
|
"loss": 1.0886,
|
|
"margin_dpo/margin_mean": 9.299490928649902,
|
|
"margin_dpo/margin_std": 15.479217529296875,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.10012258589267731,
|
|
"fcm_dpo/delta": -0.0002490147890057415,
|
|
"fcm_dpo/margin": 9.948552131652832,
|
|
"fcm_dpo/q_t": 0.3335891366004944,
|
|
"grad_norm": 31.21560287475586,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 0.35273051261901855,
|
|
"logits/rejected": 0.3253236711025238,
|
|
"logps/chosen": -77.28237915039062,
|
|
"logps/ref_chosen": -64.1020278930664,
|
|
"logps/ref_rejected": -73.81226348876953,
|
|
"logps/rejected": -96.94117736816406,
|
|
"loss": 1.0153,
|
|
"margin_dpo/margin_mean": 9.948553085327148,
|
|
"margin_dpo/margin_std": 14.477907180786133,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.0998903140425682,
|
|
"fcm_dpo/delta": -0.08263019472360611,
|
|
"fcm_dpo/margin": 10.745096206665039,
|
|
"fcm_dpo/q_t": 0.3202618360519409,
|
|
"grad_norm": 25.250896453857422,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 0.384564071893692,
|
|
"logits/rejected": 0.33145636320114136,
|
|
"logps/chosen": -81.09648132324219,
|
|
"logps/ref_chosen": -66.39305877685547,
|
|
"logps/ref_rejected": -88.76033020019531,
|
|
"logps/rejected": -114.2088394165039,
|
|
"loss": 0.9897,
|
|
"margin_dpo/margin_mean": 10.745096206665039,
|
|
"margin_dpo/margin_std": 14.994283676147461,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.09539251029491425,
|
|
"fcm_dpo/delta": -0.0672103613615036,
|
|
"fcm_dpo/margin": 11.129631996154785,
|
|
"fcm_dpo/q_t": 0.31643834710121155,
|
|
"grad_norm": 51.30845642089844,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 0.3606599271297455,
|
|
"logits/rejected": 0.3225025534629822,
|
|
"logps/chosen": -81.35462951660156,
|
|
"logps/ref_chosen": -67.98930358886719,
|
|
"logps/ref_rejected": -77.23219299316406,
|
|
"logps/rejected": -101.7271499633789,
|
|
"loss": 0.9158,
|
|
"margin_dpo/margin_mean": 11.129631042480469,
|
|
"margin_dpo/margin_std": 13.993242263793945,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.0966949611902237,
|
|
"fcm_dpo/delta": 0.11792643368244171,
|
|
"fcm_dpo/margin": 9.158844947814941,
|
|
"fcm_dpo/q_t": 0.3497825860977173,
|
|
"grad_norm": 29.191015243530273,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 0.387145459651947,
|
|
"logits/rejected": 0.3767244815826416,
|
|
"logps/chosen": -89.218994140625,
|
|
"logps/ref_chosen": -72.36497497558594,
|
|
"logps/ref_rejected": -77.82171630859375,
|
|
"logps/rejected": -103.8345947265625,
|
|
"loss": 1.1125,
|
|
"margin_dpo/margin_mean": 9.158845901489258,
|
|
"margin_dpo/margin_std": 15.31347370147705,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.10761729627847672,
|
|
"fcm_dpo/delta": 0.08714894950389862,
|
|
"fcm_dpo/margin": 8.48575496673584,
|
|
"fcm_dpo/q_t": 0.35001808404922485,
|
|
"grad_norm": 32.06684112548828,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 0.3626454174518585,
|
|
"logits/rejected": 0.3155224919319153,
|
|
"logps/chosen": -79.03126525878906,
|
|
"logps/ref_chosen": -63.40877151489258,
|
|
"logps/ref_rejected": -79.03904724121094,
|
|
"logps/rejected": -103.14729309082031,
|
|
"loss": 1.1116,
|
|
"margin_dpo/margin_mean": 8.48575496673584,
|
|
"margin_dpo/margin_std": 14.401884078979492,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.09919991344213486,
|
|
"fcm_dpo/delta": -0.1404997706413269,
|
|
"fcm_dpo/margin": 11.3255033493042,
|
|
"fcm_dpo/q_t": 0.31073272228240967,
|
|
"grad_norm": 30.121217727661133,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 0.3378009796142578,
|
|
"logits/rejected": 0.2627086341381073,
|
|
"logps/chosen": -79.64289855957031,
|
|
"logps/ref_chosen": -63.3157844543457,
|
|
"logps/ref_rejected": -93.57626342773438,
|
|
"logps/rejected": -121.2288818359375,
|
|
"loss": 0.9261,
|
|
"margin_dpo/margin_mean": 11.3255033493042,
|
|
"margin_dpo/margin_std": 14.669309616088867,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.10028767585754395,
|
|
"fcm_dpo/delta": 0.0695745199918747,
|
|
"fcm_dpo/margin": 9.310823440551758,
|
|
"fcm_dpo/q_t": 0.34190893173217773,
|
|
"grad_norm": 39.34469985961914,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 0.3269875645637512,
|
|
"logits/rejected": 0.27384883165359497,
|
|
"logps/chosen": -82.93329620361328,
|
|
"logps/ref_chosen": -66.82787322998047,
|
|
"logps/ref_rejected": -79.1831283569336,
|
|
"logps/rejected": -104.599365234375,
|
|
"loss": 1.0288,
|
|
"margin_dpo/margin_mean": 9.310823440551758,
|
|
"margin_dpo/margin_std": 14.060315132141113,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.10525654256343842,
|
|
"fcm_dpo/delta": 0.06899507343769073,
|
|
"fcm_dpo/margin": 8.870966911315918,
|
|
"fcm_dpo/q_t": 0.33700481057167053,
|
|
"grad_norm": 34.62828063964844,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 0.35178321599960327,
|
|
"logits/rejected": 0.3009414076805115,
|
|
"logps/chosen": -78.264404296875,
|
|
"logps/ref_chosen": -63.54209518432617,
|
|
"logps/ref_rejected": -78.09616088867188,
|
|
"logps/rejected": -101.68943786621094,
|
|
"loss": 1.0151,
|
|
"margin_dpo/margin_mean": 8.870966911315918,
|
|
"margin_dpo/margin_std": 13.135915756225586,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.10544770956039429,
|
|
"fcm_dpo/delta": -0.05175945162773132,
|
|
"fcm_dpo/margin": 9.915693283081055,
|
|
"fcm_dpo/q_t": 0.3259262442588806,
|
|
"grad_norm": 30.281387329101562,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 0.3330889642238617,
|
|
"logits/rejected": 0.28538888692855835,
|
|
"logps/chosen": -77.23836517333984,
|
|
"logps/ref_chosen": -63.090972900390625,
|
|
"logps/ref_rejected": -79.1383056640625,
|
|
"logps/rejected": -103.2013931274414,
|
|
"loss": 0.9663,
|
|
"margin_dpo/margin_mean": 9.915693283081055,
|
|
"margin_dpo/margin_std": 13.829241752624512,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.10395065695047379,
|
|
"fcm_dpo/delta": -0.05435022711753845,
|
|
"fcm_dpo/margin": 10.08836555480957,
|
|
"fcm_dpo/q_t": 0.31206685304641724,
|
|
"grad_norm": 23.842212677001953,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 0.31397441029548645,
|
|
"logits/rejected": 0.28041377663612366,
|
|
"logps/chosen": -75.13356018066406,
|
|
"logps/ref_chosen": -61.85026168823242,
|
|
"logps/ref_rejected": -73.87454986572266,
|
|
"logps/rejected": -97.2462158203125,
|
|
"loss": 0.9042,
|
|
"margin_dpo/margin_mean": 10.088364601135254,
|
|
"margin_dpo/margin_std": 12.210702896118164,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.09839525073766708,
|
|
"fcm_dpo/delta": 0.06181678920984268,
|
|
"fcm_dpo/margin": 9.582967758178711,
|
|
"fcm_dpo/q_t": 0.33559128642082214,
|
|
"grad_norm": 35.4563102722168,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 0.33106130361557007,
|
|
"logits/rejected": 0.2812042832374573,
|
|
"logps/chosen": -78.70259094238281,
|
|
"logps/ref_chosen": -64.2256851196289,
|
|
"logps/ref_rejected": -80.54659271240234,
|
|
"logps/rejected": -104.6064682006836,
|
|
"loss": 0.9747,
|
|
"margin_dpo/margin_mean": 9.582967758178711,
|
|
"margin_dpo/margin_std": 13.323356628417969,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.0960320457816124,
|
|
"fcm_dpo/delta": -0.08064164221286774,
|
|
"fcm_dpo/margin": 11.12451171875,
|
|
"fcm_dpo/q_t": 0.31567567586898804,
|
|
"grad_norm": 36.847713470458984,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 0.3282826542854309,
|
|
"logits/rejected": 0.26554709672927856,
|
|
"logps/chosen": -74.4558334350586,
|
|
"logps/ref_chosen": -58.45670700073242,
|
|
"logps/ref_rejected": -80.57959747314453,
|
|
"logps/rejected": -107.7032241821289,
|
|
"loss": 0.9158,
|
|
"margin_dpo/margin_mean": 11.12451171875,
|
|
"margin_dpo/margin_std": 14.189311027526855,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.09265764057636261,
|
|
"fcm_dpo/delta": -0.019339444115757942,
|
|
"fcm_dpo/margin": 10.953125,
|
|
"fcm_dpo/q_t": 0.325559139251709,
|
|
"grad_norm": 31.604331970214844,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 0.3938923478126526,
|
|
"logits/rejected": 0.32047176361083984,
|
|
"logps/chosen": -73.53590393066406,
|
|
"logps/ref_chosen": -56.701622009277344,
|
|
"logps/ref_rejected": -79.15914916992188,
|
|
"logps/rejected": -106.9465560913086,
|
|
"loss": 0.9464,
|
|
"margin_dpo/margin_mean": 10.953125,
|
|
"margin_dpo/margin_std": 14.6715726852417,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.09225670993328094,
|
|
"fcm_dpo/delta": 0.019766664132475853,
|
|
"fcm_dpo/margin": 10.636110305786133,
|
|
"fcm_dpo/q_t": 0.32959383726119995,
|
|
"grad_norm": 29.963603973388672,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 0.3454452157020569,
|
|
"logits/rejected": 0.28967177867889404,
|
|
"logps/chosen": -81.18212127685547,
|
|
"logps/ref_chosen": -62.49296951293945,
|
|
"logps/ref_rejected": -76.37828063964844,
|
|
"logps/rejected": -105.70356750488281,
|
|
"loss": 0.9604,
|
|
"margin_dpo/margin_mean": 10.636110305786133,
|
|
"margin_dpo/margin_std": 14.378878593444824,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.09693561494350433,
|
|
"fcm_dpo/delta": -0.0007272452348843217,
|
|
"fcm_dpo/margin": 10.295035362243652,
|
|
"fcm_dpo/q_t": 0.33259207010269165,
|
|
"grad_norm": 41.86263656616211,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 0.32713833451271057,
|
|
"logits/rejected": 0.2873557209968567,
|
|
"logps/chosen": -83.19766235351562,
|
|
"logps/ref_chosen": -63.961265563964844,
|
|
"logps/ref_rejected": -79.19660949707031,
|
|
"logps/rejected": -108.72804260253906,
|
|
"loss": 1.012,
|
|
"margin_dpo/margin_mean": 10.295036315917969,
|
|
"margin_dpo/margin_std": 14.964961051940918,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.10219204425811768,
|
|
"fcm_dpo/delta": 0.1630358248949051,
|
|
"fcm_dpo/margin": 8.336259841918945,
|
|
"fcm_dpo/q_t": 0.3623664081096649,
|
|
"grad_norm": 43.633724212646484,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 0.3746958076953888,
|
|
"logits/rejected": 0.3229959309101105,
|
|
"logps/chosen": -84.5007553100586,
|
|
"logps/ref_chosen": -65.43470764160156,
|
|
"logps/ref_rejected": -76.08763885498047,
|
|
"logps/rejected": -103.48995208740234,
|
|
"loss": 1.1116,
|
|
"margin_dpo/margin_mean": 8.336259841918945,
|
|
"margin_dpo/margin_std": 14.339553833007812,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.10657189041376114,
|
|
"fcm_dpo/delta": -0.09334631264209747,
|
|
"fcm_dpo/margin": 10.140634536743164,
|
|
"fcm_dpo/q_t": 0.3195830285549164,
|
|
"grad_norm": 29.57124900817871,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 0.3460700511932373,
|
|
"logits/rejected": 0.288535475730896,
|
|
"logps/chosen": -79.8516845703125,
|
|
"logps/ref_chosen": -62.9846305847168,
|
|
"logps/ref_rejected": -75.53777313232422,
|
|
"logps/rejected": -102.54544830322266,
|
|
"loss": 0.9646,
|
|
"margin_dpo/margin_mean": 10.14063549041748,
|
|
"margin_dpo/margin_std": 13.977231979370117,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.09681382775306702,
|
|
"fcm_dpo/delta": -0.04583514854311943,
|
|
"fcm_dpo/margin": 10.692605018615723,
|
|
"fcm_dpo/q_t": 0.3291170001029968,
|
|
"grad_norm": 32.62282943725586,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 0.3217319846153259,
|
|
"logits/rejected": 0.2573382556438446,
|
|
"logps/chosen": -72.19245147705078,
|
|
"logps/ref_chosen": -56.67329788208008,
|
|
"logps/ref_rejected": -81.22078704833984,
|
|
"logps/rejected": -107.43255615234375,
|
|
"loss": 1.0017,
|
|
"margin_dpo/margin_mean": 10.692605018615723,
|
|
"margin_dpo/margin_std": 15.321540832519531,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.09586743265390396,
|
|
"fcm_dpo/delta": -0.005144490860402584,
|
|
"fcm_dpo/margin": 10.444814682006836,
|
|
"fcm_dpo/q_t": 0.32588261365890503,
|
|
"grad_norm": 29.304916381835938,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 0.336866557598114,
|
|
"logits/rejected": 0.273507684469223,
|
|
"logps/chosen": -74.34947967529297,
|
|
"logps/ref_chosen": -58.42055130004883,
|
|
"logps/ref_rejected": -74.77824401855469,
|
|
"logps/rejected": -101.1519775390625,
|
|
"loss": 0.9637,
|
|
"margin_dpo/margin_mean": 10.444815635681152,
|
|
"margin_dpo/margin_std": 14.225895881652832,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.10534314811229706,
|
|
"fcm_dpo/delta": 0.07023780792951584,
|
|
"fcm_dpo/margin": 8.831705093383789,
|
|
"fcm_dpo/q_t": 0.34769195318222046,
|
|
"grad_norm": 31.103214263916016,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 0.2966001629829407,
|
|
"logits/rejected": 0.2543550431728363,
|
|
"logps/chosen": -83.2847900390625,
|
|
"logps/ref_chosen": -66.16510772705078,
|
|
"logps/ref_rejected": -79.58935546875,
|
|
"logps/rejected": -105.5407485961914,
|
|
"loss": 1.0797,
|
|
"margin_dpo/margin_mean": 8.831704139709473,
|
|
"margin_dpo/margin_std": 14.508381843566895,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.09514714032411575,
|
|
"fcm_dpo/delta": -0.08864019811153412,
|
|
"fcm_dpo/margin": 11.286481857299805,
|
|
"fcm_dpo/q_t": 0.3190566897392273,
|
|
"grad_norm": 30.666662216186523,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 0.34321731328964233,
|
|
"logits/rejected": 0.29262733459472656,
|
|
"logps/chosen": -81.09008026123047,
|
|
"logps/ref_chosen": -64.61544799804688,
|
|
"logps/ref_rejected": -81.56526947021484,
|
|
"logps/rejected": -109.3263931274414,
|
|
"loss": 0.9389,
|
|
"margin_dpo/margin_mean": 11.286481857299805,
|
|
"margin_dpo/margin_std": 15.018880844116211,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.09981605410575867,
|
|
"fcm_dpo/delta": 0.05951204150915146,
|
|
"fcm_dpo/margin": 9.45673656463623,
|
|
"fcm_dpo/q_t": 0.34890830516815186,
|
|
"grad_norm": 35.18781661987305,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 0.3079679608345032,
|
|
"logits/rejected": 0.2665550112724304,
|
|
"logps/chosen": -81.59588623046875,
|
|
"logps/ref_chosen": -62.10752487182617,
|
|
"logps/ref_rejected": -77.66670227050781,
|
|
"logps/rejected": -106.6117935180664,
|
|
"loss": 1.0579,
|
|
"margin_dpo/margin_mean": 9.45673656463623,
|
|
"margin_dpo/margin_std": 15.178037643432617,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.09543491154909134,
|
|
"fcm_dpo/delta": -0.054286111146211624,
|
|
"fcm_dpo/margin": 10.175302505493164,
|
|
"fcm_dpo/q_t": 0.33326101303100586,
|
|
"grad_norm": 26.658878326416016,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 0.340119332075119,
|
|
"logits/rejected": 0.2935033440589905,
|
|
"logps/chosen": -79.18875885009766,
|
|
"logps/ref_chosen": -61.37943649291992,
|
|
"logps/ref_rejected": -79.8868637084961,
|
|
"logps/rejected": -107.87149810791016,
|
|
"loss": 0.9934,
|
|
"margin_dpo/margin_mean": 10.175302505493164,
|
|
"margin_dpo/margin_std": 14.267759323120117,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.09232033044099808,
|
|
"fcm_dpo/delta": -0.09336394816637039,
|
|
"fcm_dpo/margin": 11.742635726928711,
|
|
"fcm_dpo/q_t": 0.321283221244812,
|
|
"grad_norm": 28.552518844604492,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 0.40391048789024353,
|
|
"logits/rejected": 0.3338189125061035,
|
|
"logps/chosen": -72.78422546386719,
|
|
"logps/ref_chosen": -56.05344772338867,
|
|
"logps/ref_rejected": -81.98738861083984,
|
|
"logps/rejected": -110.4608154296875,
|
|
"loss": 0.933,
|
|
"margin_dpo/margin_mean": 11.742635726928711,
|
|
"margin_dpo/margin_std": 15.581090927124023,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.08842920511960983,
|
|
"fcm_dpo/delta": 0.005702398717403412,
|
|
"fcm_dpo/margin": 11.23391056060791,
|
|
"fcm_dpo/q_t": 0.3230994641780853,
|
|
"grad_norm": 30.30412483215332,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 0.3609849512577057,
|
|
"logits/rejected": 0.2793940007686615,
|
|
"logps/chosen": -72.1435317993164,
|
|
"logps/ref_chosen": -56.14973831176758,
|
|
"logps/ref_rejected": -78.04826354980469,
|
|
"logps/rejected": -105.2759780883789,
|
|
"loss": 0.963,
|
|
"margin_dpo/margin_mean": 11.23391056060791,
|
|
"margin_dpo/margin_std": 15.269304275512695,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.09247281402349472,
|
|
"fcm_dpo/delta": 0.09105464816093445,
|
|
"fcm_dpo/margin": 9.872905731201172,
|
|
"fcm_dpo/q_t": 0.34462517499923706,
|
|
"grad_norm": 31.00304412841797,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 0.3180425763130188,
|
|
"logits/rejected": 0.27117234468460083,
|
|
"logps/chosen": -78.58186340332031,
|
|
"logps/ref_chosen": -61.611045837402344,
|
|
"logps/ref_rejected": -76.07168579101562,
|
|
"logps/rejected": -102.9154052734375,
|
|
"loss": 1.0624,
|
|
"margin_dpo/margin_mean": 9.872904777526855,
|
|
"margin_dpo/margin_std": 15.538159370422363,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.08917222917079926,
|
|
"fcm_dpo/delta": -0.11267988383769989,
|
|
"fcm_dpo/margin": 12.325703620910645,
|
|
"fcm_dpo/q_t": 0.3082793056964874,
|
|
"grad_norm": 23.29572296142578,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.35711944103240967,
|
|
"logits/rejected": 0.2748965919017792,
|
|
"logps/chosen": -76.3038558959961,
|
|
"logps/ref_chosen": -59.96733474731445,
|
|
"logps/ref_rejected": -85.49105834960938,
|
|
"logps/rejected": -114.15328216552734,
|
|
"loss": 0.8889,
|
|
"margin_dpo/margin_mean": 12.325704574584961,
|
|
"margin_dpo/margin_std": 15.190678596496582,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.09262686967849731,
|
|
"fcm_dpo/delta": 0.08950239419937134,
|
|
"fcm_dpo/margin": 9.877817153930664,
|
|
"fcm_dpo/q_t": 0.3416265845298767,
|
|
"grad_norm": 27.85451889038086,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 0.29736343026161194,
|
|
"logits/rejected": 0.24403652548789978,
|
|
"logps/chosen": -76.03514862060547,
|
|
"logps/ref_chosen": -60.001609802246094,
|
|
"logps/ref_rejected": -76.47229766845703,
|
|
"logps/rejected": -102.38365173339844,
|
|
"loss": 1.0434,
|
|
"margin_dpo/margin_mean": 9.877817153930664,
|
|
"margin_dpo/margin_std": 15.134869575500488,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.09520609676837921,
|
|
"fcm_dpo/delta": 0.03861137107014656,
|
|
"fcm_dpo/margin": 10.090354919433594,
|
|
"fcm_dpo/q_t": 0.3396856188774109,
|
|
"grad_norm": 35.437652587890625,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 0.3540731370449066,
|
|
"logits/rejected": 0.29804927110671997,
|
|
"logps/chosen": -77.1456527709961,
|
|
"logps/ref_chosen": -59.98427200317383,
|
|
"logps/ref_rejected": -75.23977661132812,
|
|
"logps/rejected": -102.49151611328125,
|
|
"loss": 1.0658,
|
|
"margin_dpo/margin_mean": 10.090354919433594,
|
|
"margin_dpo/margin_std": 15.728398323059082,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.09189613163471222,
|
|
"fcm_dpo/delta": -0.08170835673809052,
|
|
"fcm_dpo/margin": 11.631962776184082,
|
|
"fcm_dpo/q_t": 0.319501131772995,
|
|
"grad_norm": 27.832942962646484,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 0.3677051365375519,
|
|
"logits/rejected": 0.31347864866256714,
|
|
"logps/chosen": -77.08321380615234,
|
|
"logps/ref_chosen": -60.21544647216797,
|
|
"logps/ref_rejected": -77.54380798339844,
|
|
"logps/rejected": -106.04354095458984,
|
|
"loss": 0.9542,
|
|
"margin_dpo/margin_mean": 11.631962776184082,
|
|
"margin_dpo/margin_std": 15.64165210723877,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.09286109358072281,
|
|
"fcm_dpo/delta": 0.020266292616724968,
|
|
"fcm_dpo/margin": 9.64010238647461,
|
|
"fcm_dpo/q_t": 0.34380003809928894,
|
|
"grad_norm": 29.564525604248047,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 0.31580013036727905,
|
|
"logits/rejected": 0.2819364070892334,
|
|
"logps/chosen": -85.32447814941406,
|
|
"logps/ref_chosen": -67.37496185302734,
|
|
"logps/ref_rejected": -77.77253723144531,
|
|
"logps/rejected": -105.36214447021484,
|
|
"loss": 1.0145,
|
|
"margin_dpo/margin_mean": 9.64010238647461,
|
|
"margin_dpo/margin_std": 14.208511352539062,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.09398090094327927,
|
|
"fcm_dpo/delta": 0.04170190542936325,
|
|
"fcm_dpo/margin": 10.224244117736816,
|
|
"fcm_dpo/q_t": 0.3312895596027374,
|
|
"grad_norm": 25.84682273864746,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 0.3518233597278595,
|
|
"logits/rejected": 0.29235878586769104,
|
|
"logps/chosen": -79.87808990478516,
|
|
"logps/ref_chosen": -62.08070755004883,
|
|
"logps/ref_rejected": -80.65849304199219,
|
|
"logps/rejected": -108.68013763427734,
|
|
"loss": 0.9694,
|
|
"margin_dpo/margin_mean": 10.224244117736816,
|
|
"margin_dpo/margin_std": 14.191637992858887,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.08911158889532089,
|
|
"fcm_dpo/delta": -0.06056561321020126,
|
|
"fcm_dpo/margin": 11.786225318908691,
|
|
"fcm_dpo/q_t": 0.31978386640548706,
|
|
"grad_norm": 27.2460994720459,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 0.3753899037837982,
|
|
"logits/rejected": 0.3135729134082794,
|
|
"logps/chosen": -76.43754577636719,
|
|
"logps/ref_chosen": -59.841339111328125,
|
|
"logps/ref_rejected": -81.67756652832031,
|
|
"logps/rejected": -110.05999755859375,
|
|
"loss": 0.9292,
|
|
"margin_dpo/margin_mean": 11.786226272583008,
|
|
"margin_dpo/margin_std": 15.320358276367188,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.0909147709608078,
|
|
"fcm_dpo/delta": 0.033920951187610626,
|
|
"fcm_dpo/margin": 10.581128120422363,
|
|
"fcm_dpo/q_t": 0.3356344997882843,
|
|
"grad_norm": 40.5637321472168,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 0.32412275671958923,
|
|
"logits/rejected": 0.23972614109516144,
|
|
"logps/chosen": -80.27471923828125,
|
|
"logps/ref_chosen": -61.95880889892578,
|
|
"logps/ref_rejected": -89.60023498535156,
|
|
"logps/rejected": -118.49725341796875,
|
|
"loss": 1.0126,
|
|
"margin_dpo/margin_mean": 10.58112907409668,
|
|
"margin_dpo/margin_std": 15.470416069030762,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.10331619530916214,
|
|
"eval_fcm_dpo/delta": 0.009760010987520218,
|
|
"eval_fcm_dpo/margin": 9.474839210510254,
|
|
"eval_fcm_dpo/q_t": 0.3416881263256073,
|
|
"eval_logits/chosen": 0.35637208819389343,
|
|
"eval_logits/rejected": 0.3007829487323761,
|
|
"eval_logps/chosen": -92.93755340576172,
|
|
"eval_logps/ref_chosen": -75.86933135986328,
|
|
"eval_logps/ref_rejected": -80.85771942138672,
|
|
"eval_logps/rejected": -107.40077209472656,
|
|
"eval_loss": 0.5364252328872681,
|
|
"eval_margin_dpo/margin_mean": 9.474839210510254,
|
|
"eval_margin_dpo/margin_std": 15.286213874816895,
|
|
"eval_runtime": 38.7022,
|
|
"eval_samples_per_second": 59.506,
|
|
"eval_steps_per_second": 1.86,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.08817870914936066,
|
|
"fcm_dpo/delta": -0.1327141523361206,
|
|
"fcm_dpo/margin": 12.578492164611816,
|
|
"fcm_dpo/q_t": 0.3107960820198059,
|
|
"grad_norm": 23.5877742767334,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 0.37417587637901306,
|
|
"logits/rejected": 0.2990309000015259,
|
|
"logps/chosen": -73.52447509765625,
|
|
"logps/ref_chosen": -57.03437423706055,
|
|
"logps/ref_rejected": -78.54074096679688,
|
|
"logps/rejected": -107.60932922363281,
|
|
"loss": 0.8956,
|
|
"margin_dpo/margin_mean": 12.5784912109375,
|
|
"margin_dpo/margin_std": 15.335866928100586,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.08702994883060455,
|
|
"fcm_dpo/delta": 0.04099176451563835,
|
|
"fcm_dpo/margin": 11.039661407470703,
|
|
"fcm_dpo/q_t": 0.3316665291786194,
|
|
"grad_norm": 29.719276428222656,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 0.3420962393283844,
|
|
"logits/rejected": 0.27093860507011414,
|
|
"logps/chosen": -82.31895446777344,
|
|
"logps/ref_chosen": -65.09486389160156,
|
|
"logps/ref_rejected": -82.60694885253906,
|
|
"logps/rejected": -110.87071228027344,
|
|
"loss": 0.9591,
|
|
"margin_dpo/margin_mean": 11.039661407470703,
|
|
"margin_dpo/margin_std": 14.888700485229492,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.08784516900777817,
|
|
"fcm_dpo/delta": -0.04525933414697647,
|
|
"fcm_dpo/margin": 11.834297180175781,
|
|
"fcm_dpo/q_t": 0.3292234539985657,
|
|
"grad_norm": 30.517234802246094,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 0.3091197609901428,
|
|
"logits/rejected": 0.25511085987091064,
|
|
"logps/chosen": -78.46774291992188,
|
|
"logps/ref_chosen": -58.7742805480957,
|
|
"logps/ref_rejected": -72.8920669555664,
|
|
"logps/rejected": -104.4198226928711,
|
|
"loss": 0.9871,
|
|
"margin_dpo/margin_mean": 11.834297180175781,
|
|
"margin_dpo/margin_std": 17.207059860229492,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.09040302783250809,
|
|
"fcm_dpo/delta": 0.08599478006362915,
|
|
"fcm_dpo/margin": 10.107150077819824,
|
|
"fcm_dpo/q_t": 0.34641337394714355,
|
|
"grad_norm": 37.267417907714844,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 0.3381853699684143,
|
|
"logits/rejected": 0.29058149456977844,
|
|
"logps/chosen": -80.76959991455078,
|
|
"logps/ref_chosen": -59.88574981689453,
|
|
"logps/ref_rejected": -70.21773529052734,
|
|
"logps/rejected": -101.208740234375,
|
|
"loss": 1.1005,
|
|
"margin_dpo/margin_mean": 10.107150077819824,
|
|
"margin_dpo/margin_std": 16.638538360595703,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.09201686084270477,
|
|
"fcm_dpo/delta": -0.06255164742469788,
|
|
"fcm_dpo/margin": 11.48328685760498,
|
|
"fcm_dpo/q_t": 0.3294609785079956,
|
|
"grad_norm": 26.42628288269043,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 0.3337697982788086,
|
|
"logits/rejected": 0.28397053480148315,
|
|
"logps/chosen": -78.5306167602539,
|
|
"logps/ref_chosen": -59.304222106933594,
|
|
"logps/ref_rejected": -75.0927963256836,
|
|
"logps/rejected": -105.80247497558594,
|
|
"loss": 1.0021,
|
|
"margin_dpo/margin_mean": 11.48328685760498,
|
|
"margin_dpo/margin_std": 16.985111236572266,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.09108453243970871,
|
|
"fcm_dpo/delta": -0.03131581097841263,
|
|
"fcm_dpo/margin": 11.279854774475098,
|
|
"fcm_dpo/q_t": 0.3330654799938202,
|
|
"grad_norm": 25.44510269165039,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 0.30293092131614685,
|
|
"logits/rejected": 0.2799461781978607,
|
|
"logps/chosen": -84.71260833740234,
|
|
"logps/ref_chosen": -63.816734313964844,
|
|
"logps/ref_rejected": -75.532470703125,
|
|
"logps/rejected": -107.70819091796875,
|
|
"loss": 1.0274,
|
|
"margin_dpo/margin_mean": 11.279854774475098,
|
|
"margin_dpo/margin_std": 17.151844024658203,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.08440228551626205,
|
|
"fcm_dpo/delta": -0.03266172856092453,
|
|
"fcm_dpo/margin": 12.173011779785156,
|
|
"fcm_dpo/q_t": 0.3264302611351013,
|
|
"grad_norm": 28.003742218017578,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 0.40501174330711365,
|
|
"logits/rejected": 0.358463317155838,
|
|
"logps/chosen": -75.50459289550781,
|
|
"logps/ref_chosen": -56.96874237060547,
|
|
"logps/ref_rejected": -75.08180236816406,
|
|
"logps/rejected": -105.7906723022461,
|
|
"loss": 0.9586,
|
|
"margin_dpo/margin_mean": 12.173012733459473,
|
|
"margin_dpo/margin_std": 16.6791934967041,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.0831587016582489,
|
|
"fcm_dpo/delta": -0.03108084760606289,
|
|
"fcm_dpo/margin": 12.317110061645508,
|
|
"fcm_dpo/q_t": 0.32641124725341797,
|
|
"grad_norm": 28.61335563659668,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 0.33481085300445557,
|
|
"logits/rejected": 0.25145426392555237,
|
|
"logps/chosen": -75.74763488769531,
|
|
"logps/ref_chosen": -56.746910095214844,
|
|
"logps/ref_rejected": -77.73384857177734,
|
|
"logps/rejected": -109.05167388916016,
|
|
"loss": 0.9722,
|
|
"margin_dpo/margin_mean": 12.317110061645508,
|
|
"margin_dpo/margin_std": 16.91001319885254,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.07718690484762192,
|
|
"fcm_dpo/delta": -0.042038463056087494,
|
|
"fcm_dpo/margin": 12.584096908569336,
|
|
"fcm_dpo/q_t": 0.3266654312610626,
|
|
"grad_norm": 29.00673484802246,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 0.35013240575790405,
|
|
"logits/rejected": 0.2813698351383209,
|
|
"logps/chosen": -81.3695068359375,
|
|
"logps/ref_chosen": -61.107688903808594,
|
|
"logps/ref_rejected": -83.23820495605469,
|
|
"logps/rejected": -116.08412170410156,
|
|
"loss": 0.946,
|
|
"margin_dpo/margin_mean": 12.58409595489502,
|
|
"margin_dpo/margin_std": 16.468950271606445,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.07511289417743683,
|
|
"fcm_dpo/delta": 0.060914408415555954,
|
|
"fcm_dpo/margin": 12.590319633483887,
|
|
"fcm_dpo/q_t": 0.3290197253227234,
|
|
"grad_norm": 28.1600284576416,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 0.348872572183609,
|
|
"logits/rejected": 0.2725016176700592,
|
|
"logps/chosen": -77.0075454711914,
|
|
"logps/ref_chosen": -56.97221755981445,
|
|
"logps/ref_rejected": -80.6880874633789,
|
|
"logps/rejected": -113.31373596191406,
|
|
"loss": 0.9376,
|
|
"margin_dpo/margin_mean": 12.590319633483887,
|
|
"margin_dpo/margin_std": 15.93859577178955,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.0825229063630104,
|
|
"fcm_dpo/delta": -0.027547325938940048,
|
|
"fcm_dpo/margin": 12.420158386230469,
|
|
"fcm_dpo/q_t": 0.32720088958740234,
|
|
"grad_norm": 26.225448608398438,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 0.2992292046546936,
|
|
"logits/rejected": 0.2455030232667923,
|
|
"logps/chosen": -82.07698822021484,
|
|
"logps/ref_chosen": -61.983673095703125,
|
|
"logps/ref_rejected": -74.9884033203125,
|
|
"logps/rejected": -107.50187683105469,
|
|
"loss": 0.9822,
|
|
"margin_dpo/margin_mean": 12.420158386230469,
|
|
"margin_dpo/margin_std": 17.609575271606445,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.07965027540922165,
|
|
"fcm_dpo/delta": -0.08106034994125366,
|
|
"fcm_dpo/margin": 13.459956169128418,
|
|
"fcm_dpo/q_t": 0.3144467771053314,
|
|
"grad_norm": 26.587221145629883,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 0.3679925501346588,
|
|
"logits/rejected": 0.280788391828537,
|
|
"logps/chosen": -78.30216979980469,
|
|
"logps/ref_chosen": -57.59019088745117,
|
|
"logps/ref_rejected": -84.5114517211914,
|
|
"logps/rejected": -118.68338775634766,
|
|
"loss": 0.9095,
|
|
"margin_dpo/margin_mean": 13.459956169128418,
|
|
"margin_dpo/margin_std": 16.770097732543945,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.07221703231334686,
|
|
"fcm_dpo/delta": 0.014492052607238293,
|
|
"fcm_dpo/margin": 13.642268180847168,
|
|
"fcm_dpo/q_t": 0.3231440782546997,
|
|
"grad_norm": 27.278940200805664,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 0.37134069204330444,
|
|
"logits/rejected": 0.31564953923225403,
|
|
"logps/chosen": -80.76337432861328,
|
|
"logps/ref_chosen": -59.79584503173828,
|
|
"logps/ref_rejected": -75.25082397460938,
|
|
"logps/rejected": -109.86061096191406,
|
|
"loss": 0.9232,
|
|
"margin_dpo/margin_mean": 13.642268180847168,
|
|
"margin_dpo/margin_std": 17.10280418395996,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.07616017013788223,
|
|
"fcm_dpo/delta": -0.016742905601859093,
|
|
"fcm_dpo/margin": 13.275070190429688,
|
|
"fcm_dpo/q_t": 0.32321128249168396,
|
|
"grad_norm": 22.729766845703125,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 0.37919512391090393,
|
|
"logits/rejected": 0.32598358392715454,
|
|
"logps/chosen": -80.35087585449219,
|
|
"logps/ref_chosen": -59.0323486328125,
|
|
"logps/ref_rejected": -74.96698760986328,
|
|
"logps/rejected": -109.56058502197266,
|
|
"loss": 0.9218,
|
|
"margin_dpo/margin_mean": 13.275070190429688,
|
|
"margin_dpo/margin_std": 16.799335479736328,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.07094570249319077,
|
|
"fcm_dpo/delta": -0.080512635409832,
|
|
"fcm_dpo/margin": 15.083788871765137,
|
|
"fcm_dpo/q_t": 0.3183867931365967,
|
|
"grad_norm": 20.770301818847656,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 0.3839421570301056,
|
|
"logits/rejected": 0.3089445233345032,
|
|
"logps/chosen": -77.02687072753906,
|
|
"logps/ref_chosen": -56.396690368652344,
|
|
"logps/ref_rejected": -81.70674133300781,
|
|
"logps/rejected": -117.42071533203125,
|
|
"loss": 0.9389,
|
|
"margin_dpo/margin_mean": 15.083788871765137,
|
|
"margin_dpo/margin_std": 19.728008270263672,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.07331489771604538,
|
|
"fcm_dpo/delta": 0.10081305354833603,
|
|
"fcm_dpo/margin": 12.341837882995605,
|
|
"fcm_dpo/q_t": 0.3461955189704895,
|
|
"grad_norm": 30.526918411254883,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 0.3099084198474884,
|
|
"logits/rejected": 0.2813830077648163,
|
|
"logps/chosen": -88.34848022460938,
|
|
"logps/ref_chosen": -64.63165283203125,
|
|
"logps/ref_rejected": -70.14222717285156,
|
|
"logps/rejected": -106.20088958740234,
|
|
"loss": 1.0757,
|
|
"margin_dpo/margin_mean": 12.341837882995605,
|
|
"margin_dpo/margin_std": 19.969202041625977,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.07241444289684296,
|
|
"fcm_dpo/delta": -0.03449578955769539,
|
|
"fcm_dpo/margin": 14.190716743469238,
|
|
"fcm_dpo/q_t": 0.3262421786785126,
|
|
"grad_norm": 23.741697311401367,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 0.3603590130805969,
|
|
"logits/rejected": 0.29149702191352844,
|
|
"logps/chosen": -82.0468521118164,
|
|
"logps/ref_chosen": -59.954673767089844,
|
|
"logps/ref_rejected": -80.82916259765625,
|
|
"logps/rejected": -117.11204528808594,
|
|
"loss": 0.9667,
|
|
"margin_dpo/margin_mean": 14.190716743469238,
|
|
"margin_dpo/margin_std": 19.688003540039062,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.07517864555120468,
|
|
"fcm_dpo/delta": 0.08142177760601044,
|
|
"fcm_dpo/margin": 12.304274559020996,
|
|
"fcm_dpo/q_t": 0.34900832176208496,
|
|
"grad_norm": 34.26311492919922,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 0.3991266191005707,
|
|
"logits/rejected": 0.3289317190647125,
|
|
"logps/chosen": -86.1991195678711,
|
|
"logps/ref_chosen": -62.238365173339844,
|
|
"logps/ref_rejected": -81.98704528808594,
|
|
"logps/rejected": -118.2520751953125,
|
|
"loss": 1.0675,
|
|
"margin_dpo/margin_mean": 12.304274559020996,
|
|
"margin_dpo/margin_std": 19.93437385559082,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.07940609008073807,
|
|
"fcm_dpo/delta": 0.03455258160829544,
|
|
"fcm_dpo/margin": 12.161388397216797,
|
|
"fcm_dpo/q_t": 0.340284138917923,
|
|
"grad_norm": 31.470069885253906,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 0.3151213526725769,
|
|
"logits/rejected": 0.24373404681682587,
|
|
"logps/chosen": -83.99868774414062,
|
|
"logps/ref_chosen": -60.60944747924805,
|
|
"logps/ref_rejected": -81.48342895507812,
|
|
"logps/rejected": -117.0340576171875,
|
|
"loss": 1.0687,
|
|
"margin_dpo/margin_mean": 12.161388397216797,
|
|
"margin_dpo/margin_std": 19.294418334960938,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.07518203556537628,
|
|
"fcm_dpo/delta": -0.09908589720726013,
|
|
"fcm_dpo/margin": 14.436444282531738,
|
|
"fcm_dpo/q_t": 0.3233835697174072,
|
|
"grad_norm": 30.398324966430664,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 0.36071914434432983,
|
|
"logits/rejected": 0.3034532070159912,
|
|
"logps/chosen": -90.01958465576172,
|
|
"logps/ref_chosen": -67.44170379638672,
|
|
"logps/ref_rejected": -85.10578155517578,
|
|
"logps/rejected": -122.12010192871094,
|
|
"loss": 0.9682,
|
|
"margin_dpo/margin_mean": 14.436445236206055,
|
|
"margin_dpo/margin_std": 20.496013641357422,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.07424916326999664,
|
|
"fcm_dpo/delta": -0.0052908300422132015,
|
|
"fcm_dpo/margin": 13.501623153686523,
|
|
"fcm_dpo/q_t": 0.3237389028072357,
|
|
"grad_norm": 30.474390029907227,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 0.35755619406700134,
|
|
"logits/rejected": 0.32606256008148193,
|
|
"logps/chosen": -84.3893814086914,
|
|
"logps/ref_chosen": -63.399513244628906,
|
|
"logps/ref_rejected": -75.75922393798828,
|
|
"logps/rejected": -110.250732421875,
|
|
"loss": 0.9268,
|
|
"margin_dpo/margin_mean": 13.501623153686523,
|
|
"margin_dpo/margin_std": 17.42774200439453,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.07383919507265091,
|
|
"fcm_dpo/delta": -0.002689933869987726,
|
|
"fcm_dpo/margin": 13.561166763305664,
|
|
"fcm_dpo/q_t": 0.3301324248313904,
|
|
"grad_norm": 25.812915802001953,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 0.35465937852859497,
|
|
"logits/rejected": 0.28478819131851196,
|
|
"logps/chosen": -87.05335998535156,
|
|
"logps/ref_chosen": -65.54673767089844,
|
|
"logps/ref_rejected": -88.05908203125,
|
|
"logps/rejected": -123.12687683105469,
|
|
"loss": 0.9774,
|
|
"margin_dpo/margin_mean": 13.56116771697998,
|
|
"margin_dpo/margin_std": 19.210861206054688,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.0702199786901474,
|
|
"fcm_dpo/delta": -0.04104772210121155,
|
|
"fcm_dpo/margin": 14.736480712890625,
|
|
"fcm_dpo/q_t": 0.3220558762550354,
|
|
"grad_norm": 25.192188262939453,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 0.37450742721557617,
|
|
"logits/rejected": 0.3201286792755127,
|
|
"logps/chosen": -81.3960952758789,
|
|
"logps/ref_chosen": -58.967079162597656,
|
|
"logps/ref_rejected": -79.77230834960938,
|
|
"logps/rejected": -116.93778991699219,
|
|
"loss": 0.9213,
|
|
"margin_dpo/margin_mean": 14.736480712890625,
|
|
"margin_dpo/margin_std": 19.175683975219727,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.07708217203617096,
|
|
"fcm_dpo/delta": 0.17160889506340027,
|
|
"fcm_dpo/margin": 10.951704025268555,
|
|
"fcm_dpo/q_t": 0.35802939534187317,
|
|
"grad_norm": 30.653854370117188,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 0.39081019163131714,
|
|
"logits/rejected": 0.34083622694015503,
|
|
"logps/chosen": -85.04821014404297,
|
|
"logps/ref_chosen": -62.04914474487305,
|
|
"logps/ref_rejected": -73.25074768066406,
|
|
"logps/rejected": -107.20152282714844,
|
|
"loss": 1.0965,
|
|
"margin_dpo/margin_mean": 10.951704025268555,
|
|
"margin_dpo/margin_std": 18.67728042602539,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.08038587868213654,
|
|
"fcm_dpo/delta": 0.0009529069066047668,
|
|
"fcm_dpo/margin": 12.417234420776367,
|
|
"fcm_dpo/q_t": 0.32943472266197205,
|
|
"grad_norm": 32.0303955078125,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.2926352620124817,
|
|
"logits/rejected": 0.23847930133342743,
|
|
"logps/chosen": -90.89793395996094,
|
|
"logps/ref_chosen": -68.93287658691406,
|
|
"logps/ref_rejected": -86.20756530761719,
|
|
"logps/rejected": -120.58984375,
|
|
"loss": 1.0199,
|
|
"margin_dpo/margin_mean": 12.417234420776367,
|
|
"margin_dpo/margin_std": 18.533092498779297,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.08636742830276489,
|
|
"fcm_dpo/delta": -0.015138429589569569,
|
|
"fcm_dpo/margin": 11.636919021606445,
|
|
"fcm_dpo/q_t": 0.33553168177604675,
|
|
"grad_norm": 28.005414962768555,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.33514514565467834,
|
|
"logits/rejected": 0.2650687098503113,
|
|
"logps/chosen": -82.46696472167969,
|
|
"logps/ref_chosen": -59.8493537902832,
|
|
"logps/ref_rejected": -80.61486053466797,
|
|
"logps/rejected": -114.86936950683594,
|
|
"loss": 1.0585,
|
|
"margin_dpo/margin_mean": 11.636918067932129,
|
|
"margin_dpo/margin_std": 17.36086082458496,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.08257714658975601,
|
|
"fcm_dpo/delta": -0.002869441406801343,
|
|
"fcm_dpo/margin": 12.110450744628906,
|
|
"fcm_dpo/q_t": 0.3353736996650696,
|
|
"grad_norm": 31.17659568786621,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 0.3293718099594116,
|
|
"logits/rejected": 0.26540592312812805,
|
|
"logps/chosen": -80.54605865478516,
|
|
"logps/ref_chosen": -58.72953414916992,
|
|
"logps/ref_rejected": -78.62208557128906,
|
|
"logps/rejected": -112.54905700683594,
|
|
"loss": 1.0464,
|
|
"margin_dpo/margin_mean": 12.110448837280273,
|
|
"margin_dpo/margin_std": 18.811302185058594,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.07747219502925873,
|
|
"fcm_dpo/delta": -0.11059974133968353,
|
|
"fcm_dpo/margin": 14.1632661819458,
|
|
"fcm_dpo/q_t": 0.31101471185684204,
|
|
"grad_norm": 27.576501846313477,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 0.3622625470161438,
|
|
"logits/rejected": 0.2790268659591675,
|
|
"logps/chosen": -83.3790054321289,
|
|
"logps/ref_chosen": -61.27280807495117,
|
|
"logps/ref_rejected": -86.4178237915039,
|
|
"logps/rejected": -122.6872787475586,
|
|
"loss": 0.8863,
|
|
"margin_dpo/margin_mean": 14.1632661819458,
|
|
"margin_dpo/margin_std": 17.347553253173828,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.07073845714330673,
|
|
"fcm_dpo/delta": -0.046964578330516815,
|
|
"fcm_dpo/margin": 14.707046508789062,
|
|
"fcm_dpo/q_t": 0.3177848756313324,
|
|
"grad_norm": 25.9344539642334,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 0.3474125266075134,
|
|
"logits/rejected": 0.2886578142642975,
|
|
"logps/chosen": -77.79493713378906,
|
|
"logps/ref_chosen": -57.53668975830078,
|
|
"logps/ref_rejected": -73.76582336425781,
|
|
"logps/rejected": -108.73112487792969,
|
|
"loss": 0.9144,
|
|
"margin_dpo/margin_mean": 14.707046508789062,
|
|
"margin_dpo/margin_std": 18.784482955932617,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.07421617209911346,
|
|
"fcm_dpo/delta": 0.14916327595710754,
|
|
"fcm_dpo/margin": 11.67965030670166,
|
|
"fcm_dpo/q_t": 0.34690457582473755,
|
|
"grad_norm": 27.697330474853516,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 0.3449970781803131,
|
|
"logits/rejected": 0.28996026515960693,
|
|
"logps/chosen": -81.71879577636719,
|
|
"logps/ref_chosen": -60.406890869140625,
|
|
"logps/ref_rejected": -76.10121154785156,
|
|
"logps/rejected": -109.0927734375,
|
|
"loss": 1.0218,
|
|
"margin_dpo/margin_mean": 11.679651260375977,
|
|
"margin_dpo/margin_std": 17.418115615844727,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.08086894452571869,
|
|
"fcm_dpo/delta": -0.021382993087172508,
|
|
"fcm_dpo/margin": 12.578062057495117,
|
|
"fcm_dpo/q_t": 0.3289267420768738,
|
|
"grad_norm": 31.26999855041504,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 0.32917284965515137,
|
|
"logits/rejected": 0.2717982232570648,
|
|
"logps/chosen": -88.04100799560547,
|
|
"logps/ref_chosen": -65.4435806274414,
|
|
"logps/ref_rejected": -80.65763092041016,
|
|
"logps/rejected": -115.8331298828125,
|
|
"loss": 0.9988,
|
|
"margin_dpo/margin_mean": 12.578062057495117,
|
|
"margin_dpo/margin_std": 18.475194931030273,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.07681386172771454,
|
|
"fcm_dpo/delta": -0.0602848045527935,
|
|
"fcm_dpo/margin": 13.695526123046875,
|
|
"fcm_dpo/q_t": 0.33021193742752075,
|
|
"grad_norm": 26.484222412109375,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 0.3839051425457001,
|
|
"logits/rejected": 0.3160412907600403,
|
|
"logps/chosen": -82.10123443603516,
|
|
"logps/ref_chosen": -59.31481170654297,
|
|
"logps/ref_rejected": -79.35322570800781,
|
|
"logps/rejected": -115.83515930175781,
|
|
"loss": 0.9693,
|
|
"margin_dpo/margin_mean": 13.695526123046875,
|
|
"margin_dpo/margin_std": 19.27083969116211,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.07272513210773468,
|
|
"fcm_dpo/delta": -0.029256444424390793,
|
|
"fcm_dpo/margin": 14.087471008300781,
|
|
"fcm_dpo/q_t": 0.3242108225822449,
|
|
"grad_norm": 28.179359436035156,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 0.36016514897346497,
|
|
"logits/rejected": 0.3109044134616852,
|
|
"logps/chosen": -82.92088317871094,
|
|
"logps/ref_chosen": -61.065895080566406,
|
|
"logps/ref_rejected": -79.14593505859375,
|
|
"logps/rejected": -115.0884017944336,
|
|
"loss": 0.9432,
|
|
"margin_dpo/margin_mean": 14.087471008300781,
|
|
"margin_dpo/margin_std": 18.77931022644043,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.07263718545436859,
|
|
"fcm_dpo/delta": -0.05456575006246567,
|
|
"fcm_dpo/margin": 14.389799118041992,
|
|
"fcm_dpo/q_t": 0.3169875741004944,
|
|
"grad_norm": 21.003314971923828,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 0.33407479524612427,
|
|
"logits/rejected": 0.26317259669303894,
|
|
"logps/chosen": -80.6412124633789,
|
|
"logps/ref_chosen": -58.91632843017578,
|
|
"logps/ref_rejected": -78.48197937011719,
|
|
"logps/rejected": -114.59666442871094,
|
|
"loss": 0.9191,
|
|
"margin_dpo/margin_mean": 14.389799118041992,
|
|
"margin_dpo/margin_std": 17.922327041625977,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.07354731857776642,
|
|
"fcm_dpo/delta": 0.09992051124572754,
|
|
"fcm_dpo/margin": 12.301939010620117,
|
|
"fcm_dpo/q_t": 0.3429938554763794,
|
|
"grad_norm": 29.671510696411133,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 0.3663448691368103,
|
|
"logits/rejected": 0.3218410015106201,
|
|
"logps/chosen": -87.20105743408203,
|
|
"logps/ref_chosen": -64.36775970458984,
|
|
"logps/ref_rejected": -80.37776184082031,
|
|
"logps/rejected": -115.51298522949219,
|
|
"loss": 1.016,
|
|
"margin_dpo/margin_mean": 12.301939010620117,
|
|
"margin_dpo/margin_std": 18.205175399780273,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.07715228199958801,
|
|
"fcm_dpo/delta": 0.006949651055037975,
|
|
"fcm_dpo/margin": 12.843107223510742,
|
|
"fcm_dpo/q_t": 0.33547455072402954,
|
|
"grad_norm": 29.658864974975586,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 0.3541300892829895,
|
|
"logits/rejected": 0.2988041043281555,
|
|
"logps/chosen": -79.77593994140625,
|
|
"logps/ref_chosen": -58.415260314941406,
|
|
"logps/ref_rejected": -74.52140045166016,
|
|
"logps/rejected": -108.72517395019531,
|
|
"loss": 0.9975,
|
|
"margin_dpo/margin_mean": 12.843107223510742,
|
|
"margin_dpo/margin_std": 18.566728591918945,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.07894281297922134,
|
|
"fcm_dpo/delta": 0.0601823627948761,
|
|
"fcm_dpo/margin": 11.971592903137207,
|
|
"fcm_dpo/q_t": 0.33943796157836914,
|
|
"grad_norm": 27.738752365112305,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 0.39197593927383423,
|
|
"logits/rejected": 0.3263949751853943,
|
|
"logps/chosen": -78.97578430175781,
|
|
"logps/ref_chosen": -56.64149856567383,
|
|
"logps/ref_rejected": -77.79124450683594,
|
|
"logps/rejected": -112.09712219238281,
|
|
"loss": 1.0413,
|
|
"margin_dpo/margin_mean": 11.971592903137207,
|
|
"margin_dpo/margin_std": 18.696794509887695,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.07663112133741379,
|
|
"fcm_dpo/delta": -0.1261170357465744,
|
|
"fcm_dpo/margin": 14.529006958007812,
|
|
"fcm_dpo/q_t": 0.3086654543876648,
|
|
"grad_norm": 25.352630615234375,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.35477882623672485,
|
|
"logits/rejected": 0.31469103693962097,
|
|
"logps/chosen": -82.77430725097656,
|
|
"logps/ref_chosen": -61.251670837402344,
|
|
"logps/ref_rejected": -75.03556823730469,
|
|
"logps/rejected": -111.08720397949219,
|
|
"loss": 0.9034,
|
|
"margin_dpo/margin_mean": 14.529006958007812,
|
|
"margin_dpo/margin_std": 18.56852149963379,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.07121709734201431,
|
|
"fcm_dpo/delta": -0.01630423031747341,
|
|
"fcm_dpo/margin": 14.214367866516113,
|
|
"fcm_dpo/q_t": 0.32772403955459595,
|
|
"grad_norm": 24.538618087768555,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 0.36895376443862915,
|
|
"logits/rejected": 0.2859138250350952,
|
|
"logps/chosen": -75.7615737915039,
|
|
"logps/ref_chosen": -55.449249267578125,
|
|
"logps/ref_rejected": -78.81550598144531,
|
|
"logps/rejected": -113.34220123291016,
|
|
"loss": 0.9717,
|
|
"margin_dpo/margin_mean": 14.214367866516113,
|
|
"margin_dpo/margin_std": 19.63026237487793,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.07918272912502289,
|
|
"fcm_dpo/delta": 0.15003207325935364,
|
|
"fcm_dpo/margin": 10.823195457458496,
|
|
"fcm_dpo/q_t": 0.3627161383628845,
|
|
"grad_norm": 29.42555046081543,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 0.36016735434532166,
|
|
"logits/rejected": 0.3182498812675476,
|
|
"logps/chosen": -81.17481231689453,
|
|
"logps/ref_chosen": -58.89445877075195,
|
|
"logps/ref_rejected": -71.14781951904297,
|
|
"logps/rejected": -104.25135803222656,
|
|
"loss": 1.0944,
|
|
"margin_dpo/margin_mean": 10.82319450378418,
|
|
"margin_dpo/margin_std": 18.560047149658203,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.08356332033872604,
|
|
"eval_fcm_dpo/delta": -0.005035701673477888,
|
|
"eval_fcm_dpo/margin": 11.875618934631348,
|
|
"eval_fcm_dpo/q_t": 0.3380221426486969,
|
|
"eval_logits/chosen": 0.3597419559955597,
|
|
"eval_logits/rejected": 0.304570734500885,
|
|
"eval_logps/chosen": -96.24739074707031,
|
|
"eval_logps/ref_chosen": -75.86933135986328,
|
|
"eval_logps/ref_rejected": -80.85771942138672,
|
|
"eval_logps/rejected": -113.11141204833984,
|
|
"eval_loss": 0.5214306712150574,
|
|
"eval_margin_dpo/margin_mean": 11.875618934631348,
|
|
"eval_margin_dpo/margin_std": 18.387540817260742,
|
|
"eval_runtime": 38.6993,
|
|
"eval_samples_per_second": 59.51,
|
|
"eval_steps_per_second": 1.86,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.07584916055202484,
|
|
"fcm_dpo/delta": -0.017186608165502548,
|
|
"fcm_dpo/margin": 13.365765571594238,
|
|
"fcm_dpo/q_t": 0.32789379358291626,
|
|
"grad_norm": 28.76905059814453,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 0.35118022561073303,
|
|
"logits/rejected": 0.299066424369812,
|
|
"logps/chosen": -79.40129089355469,
|
|
"logps/ref_chosen": -60.206268310546875,
|
|
"logps/ref_rejected": -76.11177825927734,
|
|
"logps/rejected": -108.6725845336914,
|
|
"loss": 0.963,
|
|
"margin_dpo/margin_mean": 13.365765571594238,
|
|
"margin_dpo/margin_std": 18.610855102539062,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.07705016434192657,
|
|
"fcm_dpo/delta": -0.0388740599155426,
|
|
"fcm_dpo/margin": 13.390531539916992,
|
|
"fcm_dpo/q_t": 0.3253692388534546,
|
|
"grad_norm": 31.618640899658203,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 0.3527846336364746,
|
|
"logits/rejected": 0.30443698167800903,
|
|
"logps/chosen": -82.28140258789062,
|
|
"logps/ref_chosen": -61.04254150390625,
|
|
"logps/ref_rejected": -82.46031188964844,
|
|
"logps/rejected": -117.0897216796875,
|
|
"loss": 0.9402,
|
|
"margin_dpo/margin_mean": 13.390533447265625,
|
|
"margin_dpo/margin_std": 17.899368286132812,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.0739460289478302,
|
|
"fcm_dpo/delta": -0.05259857699275017,
|
|
"fcm_dpo/margin": 14.14459228515625,
|
|
"fcm_dpo/q_t": 0.317154198884964,
|
|
"grad_norm": 28.697656631469727,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 0.33570918440818787,
|
|
"logits/rejected": 0.2927141785621643,
|
|
"logps/chosen": -81.54222106933594,
|
|
"logps/ref_chosen": -60.49250411987305,
|
|
"logps/ref_rejected": -81.13261413574219,
|
|
"logps/rejected": -116.32688903808594,
|
|
"loss": 0.927,
|
|
"margin_dpo/margin_mean": 14.14459228515625,
|
|
"margin_dpo/margin_std": 18.297395706176758,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.07480698078870773,
|
|
"fcm_dpo/delta": 0.08452598005533218,
|
|
"fcm_dpo/margin": 12.292991638183594,
|
|
"fcm_dpo/q_t": 0.3419121503829956,
|
|
"grad_norm": 33.187259674072266,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 0.37463945150375366,
|
|
"logits/rejected": 0.3073200583457947,
|
|
"logps/chosen": -79.49095153808594,
|
|
"logps/ref_chosen": -58.75004959106445,
|
|
"logps/ref_rejected": -79.14283752441406,
|
|
"logps/rejected": -112.1767349243164,
|
|
"loss": 0.9754,
|
|
"margin_dpo/margin_mean": 12.292991638183594,
|
|
"margin_dpo/margin_std": 17.254846572875977,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.07812217622995377,
|
|
"fcm_dpo/delta": 0.025508109480142593,
|
|
"fcm_dpo/margin": 12.489466667175293,
|
|
"fcm_dpo/q_t": 0.33177176117897034,
|
|
"grad_norm": 28.660175323486328,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 0.3732627034187317,
|
|
"logits/rejected": 0.28870144486427307,
|
|
"logps/chosen": -79.76417541503906,
|
|
"logps/ref_chosen": -57.77447509765625,
|
|
"logps/ref_rejected": -83.365966796875,
|
|
"logps/rejected": -117.8451156616211,
|
|
"loss": 0.9557,
|
|
"margin_dpo/margin_mean": 12.489466667175293,
|
|
"margin_dpo/margin_std": 16.900440216064453,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.07247981429100037,
|
|
"fcm_dpo/delta": -0.11990991979837418,
|
|
"fcm_dpo/margin": 15.216099739074707,
|
|
"fcm_dpo/q_t": 0.3133440315723419,
|
|
"grad_norm": 21.737163543701172,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 0.36857935786247253,
|
|
"logits/rejected": 0.2945020794868469,
|
|
"logps/chosen": -78.60719299316406,
|
|
"logps/ref_chosen": -58.47271728515625,
|
|
"logps/ref_rejected": -84.48008728027344,
|
|
"logps/rejected": -119.83065032958984,
|
|
"loss": 0.9124,
|
|
"margin_dpo/margin_mean": 15.216100692749023,
|
|
"margin_dpo/margin_std": 19.21467399597168,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.07038284093141556,
|
|
"fcm_dpo/delta": 0.0925895944237709,
|
|
"fcm_dpo/margin": 13.050129890441895,
|
|
"fcm_dpo/q_t": 0.34775209426879883,
|
|
"grad_norm": 32.16118621826172,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 0.3829967677593231,
|
|
"logits/rejected": 0.3268979787826538,
|
|
"logps/chosen": -80.91036987304688,
|
|
"logps/ref_chosen": -60.0723991394043,
|
|
"logps/ref_rejected": -75.8419189453125,
|
|
"logps/rejected": -109.73001861572266,
|
|
"loss": 1.0213,
|
|
"margin_dpo/margin_mean": 13.050129890441895,
|
|
"margin_dpo/margin_std": 19.307937622070312,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.07497520744800568,
|
|
"fcm_dpo/delta": -0.0180673748254776,
|
|
"fcm_dpo/margin": 13.5342435836792,
|
|
"fcm_dpo/q_t": 0.327609658241272,
|
|
"grad_norm": 28.1774959564209,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 0.3562234938144684,
|
|
"logits/rejected": 0.27130261063575745,
|
|
"logps/chosen": -82.12296295166016,
|
|
"logps/ref_chosen": -59.24292755126953,
|
|
"logps/ref_rejected": -81.03025817871094,
|
|
"logps/rejected": -117.44453430175781,
|
|
"loss": 0.9614,
|
|
"margin_dpo/margin_mean": 13.5342435836792,
|
|
"margin_dpo/margin_std": 19.009403228759766,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.07942639291286469,
|
|
"fcm_dpo/delta": 0.06387078016996384,
|
|
"fcm_dpo/margin": 11.842493057250977,
|
|
"fcm_dpo/q_t": 0.3415600657463074,
|
|
"grad_norm": 27.630109786987305,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 0.33102938532829285,
|
|
"logits/rejected": 0.293028324842453,
|
|
"logps/chosen": -86.07749938964844,
|
|
"logps/ref_chosen": -63.97548294067383,
|
|
"logps/ref_rejected": -74.65735626220703,
|
|
"logps/rejected": -108.60185241699219,
|
|
"loss": 1.0304,
|
|
"margin_dpo/margin_mean": 11.842493057250977,
|
|
"margin_dpo/margin_std": 18.239856719970703,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.07533489167690277,
|
|
"fcm_dpo/delta": -0.09645902365446091,
|
|
"fcm_dpo/margin": 14.389871597290039,
|
|
"fcm_dpo/q_t": 0.3166733682155609,
|
|
"grad_norm": 29.344585418701172,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 0.38797593116760254,
|
|
"logits/rejected": 0.3146594166755676,
|
|
"logps/chosen": -82.97822570800781,
|
|
"logps/ref_chosen": -60.51557159423828,
|
|
"logps/ref_rejected": -85.11001586914062,
|
|
"logps/rejected": -121.96253967285156,
|
|
"loss": 0.9395,
|
|
"margin_dpo/margin_mean": 14.389869689941406,
|
|
"margin_dpo/margin_std": 19.199068069458008,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.07521242648363113,
|
|
"fcm_dpo/delta": 0.03413959592580795,
|
|
"fcm_dpo/margin": 12.832531929016113,
|
|
"fcm_dpo/q_t": 0.3351586163043976,
|
|
"grad_norm": 22.92190170288086,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 0.37830454111099243,
|
|
"logits/rejected": 0.2979954779148102,
|
|
"logps/chosen": -80.21737670898438,
|
|
"logps/ref_chosen": -59.14573287963867,
|
|
"logps/ref_rejected": -80.98335266113281,
|
|
"logps/rejected": -114.88752746582031,
|
|
"loss": 0.9554,
|
|
"margin_dpo/margin_mean": 12.83253002166748,
|
|
"margin_dpo/margin_std": 17.429914474487305,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.07314083725214005,
|
|
"fcm_dpo/delta": -0.027313020080327988,
|
|
"fcm_dpo/margin": 13.969868659973145,
|
|
"fcm_dpo/q_t": 0.3238561749458313,
|
|
"grad_norm": 25.15906524658203,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 0.43907564878463745,
|
|
"logits/rejected": 0.36568042635917664,
|
|
"logps/chosen": -82.41399383544922,
|
|
"logps/ref_chosen": -60.18262481689453,
|
|
"logps/ref_rejected": -80.55596160888672,
|
|
"logps/rejected": -116.7572250366211,
|
|
"loss": 0.9519,
|
|
"margin_dpo/margin_mean": 13.969868659973145,
|
|
"margin_dpo/margin_std": 18.85131072998047,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.0854419020769637,
|
|
"train_runtime": 1766.4629,
|
|
"train_samples_per_second": 23.967,
|
|
"train_steps_per_second": 0.374
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|