Model: jackf857/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.85 Source: Original Platform
12654 lines
465 KiB
JSON
12654 lines
465 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 200,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.10886543244123459,
|
|
"fcm_dpo/delta": 0.4247117042541504,
|
|
"fcm_dpo/margin": -0.0013532638549804688,
|
|
"fcm_dpo/q_t": 0.5000430345535278,
|
|
"grad_norm": 30.727170944213867,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 0.13337239623069763,
|
|
"logits/rejected": 0.12492949515581131,
|
|
"logps/chosen": -64.5841293334961,
|
|
"logps/ref_chosen": -64.61280822753906,
|
|
"logps/ref_rejected": -64.17195129394531,
|
|
"logps/rejected": -64.14192199707031,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.0013527870178222656,
|
|
"margin_dpo/margin_std": 0.2561596930027008,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0030234315948601664,
|
|
"fcm_dpo/beta": 0.11363507062196732,
|
|
"fcm_dpo/delta": 0.4199795424938202,
|
|
"fcm_dpo/margin": 0.037450045347213745,
|
|
"fcm_dpo/q_t": 0.49898096919059753,
|
|
"grad_norm": 31.676591873168945,
|
|
"learning_rate": 7.462686567164179e-09,
|
|
"logits/chosen": 0.09414851665496826,
|
|
"logits/rejected": 0.07363267242908478,
|
|
"logps/chosen": -56.101890563964844,
|
|
"logps/ref_chosen": -56.0989990234375,
|
|
"logps/ref_rejected": -66.59971618652344,
|
|
"logps/rejected": -66.64006042480469,
|
|
"loss": 1.3821,
|
|
"margin_dpo/margin_mean": 0.03744968771934509,
|
|
"margin_dpo/margin_std": 0.27811938524246216,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0045351473922902496,
|
|
"fcm_dpo/beta": 0.12360024452209473,
|
|
"fcm_dpo/delta": 0.42059752345085144,
|
|
"fcm_dpo/margin": 0.004606842994689941,
|
|
"fcm_dpo/q_t": 0.4998636245727539,
|
|
"grad_norm": 38.59760665893555,
|
|
"learning_rate": 1.4925373134328357e-08,
|
|
"logits/chosen": 0.09945081174373627,
|
|
"logits/rejected": 0.06145160272717476,
|
|
"logps/chosen": -65.43189239501953,
|
|
"logps/ref_chosen": -65.45726013183594,
|
|
"logps/ref_rejected": -90.82853698730469,
|
|
"logps/rejected": -90.80776977539062,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.004606842994689941,
|
|
"margin_dpo/margin_std": 0.2735193371772766,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.006046863189720333,
|
|
"fcm_dpo/beta": 0.14635387063026428,
|
|
"fcm_dpo/delta": 0.8448625206947327,
|
|
"fcm_dpo/margin": 0.037091463804244995,
|
|
"fcm_dpo/q_t": 0.49872055649757385,
|
|
"grad_norm": 49.67146682739258,
|
|
"learning_rate": 2.2388059701492534e-08,
|
|
"logits/chosen": 0.10524652898311615,
|
|
"logits/rejected": 0.08913983404636383,
|
|
"logps/chosen": -76.82958984375,
|
|
"logps/ref_chosen": -76.86018371582031,
|
|
"logps/ref_rejected": -79.91523742675781,
|
|
"logps/rejected": -79.92173767089844,
|
|
"loss": 1.3815,
|
|
"margin_dpo/margin_mean": 0.03709092736244202,
|
|
"margin_dpo/margin_std": 0.3865681290626526,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.1591610610485077,
|
|
"fcm_dpo/delta": 0.41841045022010803,
|
|
"fcm_dpo/margin": 0.01996675133705139,
|
|
"fcm_dpo/q_t": 0.49924030900001526,
|
|
"grad_norm": 47.10337829589844,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 0.08479103446006775,
|
|
"logits/rejected": 0.04581887274980545,
|
|
"logps/chosen": -62.968536376953125,
|
|
"logps/ref_chosen": -62.97134017944336,
|
|
"logps/ref_rejected": -79.9192123413086,
|
|
"logps/rejected": -79.93637084960938,
|
|
"loss": 1.3833,
|
|
"margin_dpo/margin_mean": 0.01996755599975586,
|
|
"margin_dpo/margin_std": 0.2942441701889038,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.009070294784580499,
|
|
"fcm_dpo/beta": 0.16581664979457855,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02515178918838501,
|
|
"fcm_dpo/q_t": 0.5010402798652649,
|
|
"grad_norm": 49.417415618896484,
|
|
"learning_rate": 3.731343283582089e-08,
|
|
"logits/chosen": 0.15653175115585327,
|
|
"logits/rejected": 0.11625839024782181,
|
|
"logps/chosen": -51.34194564819336,
|
|
"logps/ref_chosen": -51.30736541748047,
|
|
"logps/ref_rejected": -82.77239227294922,
|
|
"logps/rejected": -82.78182220458984,
|
|
"loss": 1.3911,
|
|
"margin_dpo/margin_mean": -0.025151371955871582,
|
|
"margin_dpo/margin_std": 0.30363306403160095,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010582010582010581,
|
|
"fcm_dpo/beta": 0.18848590552806854,
|
|
"fcm_dpo/delta": 0.8485120534896851,
|
|
"fcm_dpo/margin": 0.008793026208877563,
|
|
"fcm_dpo/q_t": 0.49962806701660156,
|
|
"grad_norm": 51.45716857910156,
|
|
"learning_rate": 4.477611940298507e-08,
|
|
"logits/chosen": 0.02227121591567993,
|
|
"logits/rejected": -0.021784139797091484,
|
|
"logps/chosen": -51.428985595703125,
|
|
"logps/ref_chosen": -51.45941162109375,
|
|
"logps/ref_rejected": -66.3828125,
|
|
"logps/rejected": -66.36117553710938,
|
|
"loss": 1.3852,
|
|
"margin_dpo/margin_mean": 0.008793264627456665,
|
|
"margin_dpo/margin_std": 0.2343991994857788,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.012093726379440665,
|
|
"fcm_dpo/beta": 0.2231920063495636,
|
|
"fcm_dpo/delta": 0.8420383930206299,
|
|
"fcm_dpo/margin": 0.03777146339416504,
|
|
"fcm_dpo/q_t": 0.49801886081695557,
|
|
"grad_norm": 63.04829025268555,
|
|
"learning_rate": 5.223880597014925e-08,
|
|
"logits/chosen": 0.07406742870807648,
|
|
"logits/rejected": 0.05182163789868355,
|
|
"logps/chosen": -62.19073486328125,
|
|
"logps/ref_chosen": -62.197547912597656,
|
|
"logps/ref_rejected": -74.66180419921875,
|
|
"logps/rejected": -74.69276428222656,
|
|
"loss": 1.3792,
|
|
"margin_dpo/margin_mean": 0.03777092695236206,
|
|
"margin_dpo/margin_std": 0.34941548109054565,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013605442176870748,
|
|
"fcm_dpo/beta": 0.25312358140945435,
|
|
"fcm_dpo/delta": 0.42442524433135986,
|
|
"fcm_dpo/margin": -0.004266202449798584,
|
|
"fcm_dpo/q_t": 0.5002864599227905,
|
|
"grad_norm": 80.34166717529297,
|
|
"learning_rate": 5.970149253731343e-08,
|
|
"logits/chosen": 0.17168600857257843,
|
|
"logits/rejected": 0.11221244931221008,
|
|
"logps/chosen": -55.64410400390625,
|
|
"logps/ref_chosen": -55.629722595214844,
|
|
"logps/ref_rejected": -86.21221923828125,
|
|
"logps/rejected": -86.22233581542969,
|
|
"loss": 1.3894,
|
|
"margin_dpo/margin_mean": -0.004266202449798584,
|
|
"margin_dpo/margin_std": 0.35202211141586304,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.27516475319862366,
|
|
"fcm_dpo/delta": 0.4174611568450928,
|
|
"fcm_dpo/margin": 0.015347898006439209,
|
|
"fcm_dpo/q_t": 0.4991258382797241,
|
|
"grad_norm": 81.2122802734375,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 0.1367185264825821,
|
|
"logits/rejected": 0.10551740229129791,
|
|
"logps/chosen": -62.67530059814453,
|
|
"logps/ref_chosen": -62.69060134887695,
|
|
"logps/ref_rejected": -90.610107421875,
|
|
"logps/rejected": -90.61016845703125,
|
|
"loss": 1.3847,
|
|
"margin_dpo/margin_mean": 0.015347808599472046,
|
|
"margin_dpo/margin_std": 0.37078261375427246,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016628873771730914,
|
|
"fcm_dpo/beta": 0.2990139424800873,
|
|
"fcm_dpo/delta": 0.41560107469558716,
|
|
"fcm_dpo/margin": 0.029833942651748657,
|
|
"fcm_dpo/q_t": 0.49797219038009644,
|
|
"grad_norm": 87.37360382080078,
|
|
"learning_rate": 7.462686567164178e-08,
|
|
"logits/chosen": 0.1075279489159584,
|
|
"logits/rejected": 0.10058905184268951,
|
|
"logps/chosen": -65.76591491699219,
|
|
"logps/ref_chosen": -65.76712036132812,
|
|
"logps/ref_rejected": -72.4764633178711,
|
|
"logps/rejected": -72.50508880615234,
|
|
"loss": 1.3795,
|
|
"margin_dpo/margin_mean": 0.029834330081939697,
|
|
"margin_dpo/margin_std": 0.30201759934425354,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.018140589569160998,
|
|
"fcm_dpo/beta": 0.3249116837978363,
|
|
"fcm_dpo/delta": 0.415316104888916,
|
|
"fcm_dpo/margin": 0.021361559629440308,
|
|
"fcm_dpo/q_t": 0.49845293164253235,
|
|
"grad_norm": 92.45681762695312,
|
|
"learning_rate": 8.208955223880596e-08,
|
|
"logits/chosen": 0.010168695822358131,
|
|
"logits/rejected": -0.005617397837340832,
|
|
"logps/chosen": -60.72811508178711,
|
|
"logps/ref_chosen": -60.704891204833984,
|
|
"logps/ref_rejected": -69.41564178466797,
|
|
"logps/rejected": -69.4602279663086,
|
|
"loss": 1.3823,
|
|
"margin_dpo/margin_mean": 0.021361559629440308,
|
|
"margin_dpo/margin_std": 0.3284778594970703,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.019652305366591082,
|
|
"fcm_dpo/beta": 0.33930647373199463,
|
|
"fcm_dpo/delta": 0.42449629306793213,
|
|
"fcm_dpo/margin": -0.04561507701873779,
|
|
"fcm_dpo/q_t": 0.5036793947219849,
|
|
"grad_norm": 99.87137603759766,
|
|
"learning_rate": 8.955223880597014e-08,
|
|
"logits/chosen": 0.10898313671350479,
|
|
"logits/rejected": 0.046505216509103775,
|
|
"logps/chosen": -49.909000396728516,
|
|
"logps/ref_chosen": -49.90925598144531,
|
|
"logps/ref_rejected": -92.37818145751953,
|
|
"logps/rejected": -92.33231353759766,
|
|
"loss": 1.4034,
|
|
"margin_dpo/margin_mean": -0.04561561346054077,
|
|
"margin_dpo/margin_std": 0.2739795744419098,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.021164021164021163,
|
|
"fcm_dpo/beta": 0.4011165499687195,
|
|
"fcm_dpo/delta": 0.8268953561782837,
|
|
"fcm_dpo/margin": 0.060496360063552856,
|
|
"fcm_dpo/q_t": 0.49426159262657166,
|
|
"grad_norm": 115.7387466430664,
|
|
"learning_rate": 9.701492537313432e-08,
|
|
"logits/chosen": 0.10329781472682953,
|
|
"logits/rejected": 0.08507229387760162,
|
|
"logps/chosen": -60.59901428222656,
|
|
"logps/ref_chosen": -60.61879348754883,
|
|
"logps/ref_rejected": -71.79306030273438,
|
|
"logps/rejected": -71.83377838134766,
|
|
"loss": 1.3648,
|
|
"margin_dpo/margin_mean": 0.06049656867980957,
|
|
"margin_dpo/margin_std": 0.2852107584476471,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.4355963468551636,
|
|
"fcm_dpo/delta": 0.42002391815185547,
|
|
"fcm_dpo/margin": -0.011380374431610107,
|
|
"fcm_dpo/q_t": 0.5011443495750427,
|
|
"grad_norm": 146.9692840576172,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 0.07006427645683289,
|
|
"logits/rejected": 0.02684413641691208,
|
|
"logps/chosen": -63.50836944580078,
|
|
"logps/ref_chosen": -63.46953582763672,
|
|
"logps/ref_rejected": -88.88951110839844,
|
|
"logps/rejected": -88.91697692871094,
|
|
"loss": 1.3975,
|
|
"margin_dpo/margin_mean": -0.011380106210708618,
|
|
"margin_dpo/margin_std": 0.38208454847335815,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02418745275888133,
|
|
"fcm_dpo/beta": 0.49392595887184143,
|
|
"fcm_dpo/delta": 0.42274531722068787,
|
|
"fcm_dpo/margin": 0.0010766535997390747,
|
|
"fcm_dpo/q_t": 0.49993181228637695,
|
|
"grad_norm": 134.13528442382812,
|
|
"learning_rate": 1.1194029850746268e-07,
|
|
"logits/chosen": 0.12334809452295303,
|
|
"logits/rejected": 0.08568301796913147,
|
|
"logps/chosen": -46.53916549682617,
|
|
"logps/ref_chosen": -46.53229904174805,
|
|
"logps/ref_rejected": -74.27533721923828,
|
|
"logps/rejected": -74.28327941894531,
|
|
"loss": 1.3916,
|
|
"margin_dpo/margin_mean": 0.0010768026113510132,
|
|
"margin_dpo/margin_std": 0.3080522418022156,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.025699168556311415,
|
|
"fcm_dpo/beta": 0.49392595887184143,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.009143054485321045,
|
|
"fcm_dpo/q_t": 0.5011139512062073,
|
|
"grad_norm": 168.68765258789062,
|
|
"learning_rate": 1.1940298507462686e-07,
|
|
"logits/chosen": 0.03793691098690033,
|
|
"logits/rejected": 0.019878219813108444,
|
|
"logps/chosen": -64.09962463378906,
|
|
"logps/ref_chosen": -64.07783508300781,
|
|
"logps/ref_rejected": -86.40876770019531,
|
|
"logps/rejected": -86.42141723632812,
|
|
"loss": 1.3977,
|
|
"margin_dpo/margin_mean": -0.009143710136413574,
|
|
"margin_dpo/margin_std": 0.32370686531066895,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.027210884353741496,
|
|
"fcm_dpo/beta": 0.5146567821502686,
|
|
"fcm_dpo/delta": 0.40302640199661255,
|
|
"fcm_dpo/margin": 0.03949823975563049,
|
|
"fcm_dpo/q_t": 0.49513158202171326,
|
|
"grad_norm": 141.66015625,
|
|
"learning_rate": 1.2686567164179106e-07,
|
|
"logits/chosen": 0.10013440996408463,
|
|
"logits/rejected": 0.05400983989238739,
|
|
"logps/chosen": -44.85398864746094,
|
|
"logps/ref_chosen": -44.87433624267578,
|
|
"logps/ref_rejected": -70.97604370117188,
|
|
"logps/rejected": -70.99519348144531,
|
|
"loss": 1.3699,
|
|
"margin_dpo/margin_mean": 0.03949823975563049,
|
|
"margin_dpo/margin_std": 0.26558351516723633,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02872260015117158,
|
|
"fcm_dpo/beta": 0.6078168153762817,
|
|
"fcm_dpo/delta": 0.8412047624588013,
|
|
"fcm_dpo/margin": 0.016053587198257446,
|
|
"fcm_dpo/q_t": 0.4977712035179138,
|
|
"grad_norm": 190.8359832763672,
|
|
"learning_rate": 1.343283582089552e-07,
|
|
"logits/chosen": 0.07114684581756592,
|
|
"logits/rejected": 0.057568684220314026,
|
|
"logps/chosen": -68.10987854003906,
|
|
"logps/ref_chosen": -68.1598129272461,
|
|
"logps/ref_rejected": -81.17138671875,
|
|
"logps/rejected": -81.13750457763672,
|
|
"loss": 1.3852,
|
|
"margin_dpo/margin_mean": 0.016053855419158936,
|
|
"margin_dpo/margin_std": 0.3032751679420471,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.7179453372955322,
|
|
"fcm_dpo/delta": 0.8326936960220337,
|
|
"fcm_dpo/margin": 0.027033761143684387,
|
|
"fcm_dpo/q_t": 0.49571579694747925,
|
|
"grad_norm": 209.52816772460938,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 0.15309768915176392,
|
|
"logits/rejected": 0.12928786873817444,
|
|
"logps/chosen": -53.66864013671875,
|
|
"logps/ref_chosen": -53.67856216430664,
|
|
"logps/ref_rejected": -74.16911315917969,
|
|
"logps/rejected": -74.18623352050781,
|
|
"loss": 1.3828,
|
|
"margin_dpo/margin_mean": 0.027033761143684387,
|
|
"margin_dpo/margin_std": 0.33954381942749023,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"fcm_dpo/beta": 0.8144156336784363,
|
|
"fcm_dpo/delta": 0.4235011041164398,
|
|
"fcm_dpo/margin": -0.015689924359321594,
|
|
"fcm_dpo/q_t": 0.5032085180282593,
|
|
"grad_norm": 245.65411376953125,
|
|
"learning_rate": 1.4925373134328355e-07,
|
|
"logits/chosen": 0.09726514667272568,
|
|
"logits/rejected": 0.07214757055044174,
|
|
"logps/chosen": -64.7015151977539,
|
|
"logps/ref_chosen": -64.70155334472656,
|
|
"logps/ref_rejected": -81.02095031738281,
|
|
"logps/rejected": -81.00521087646484,
|
|
"loss": 1.4162,
|
|
"margin_dpo/margin_mean": -0.01569044589996338,
|
|
"margin_dpo/margin_std": 0.32213884592056274,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.03325774754346183,
|
|
"fcm_dpo/beta": 0.9216822385787964,
|
|
"fcm_dpo/delta": 0.8162908554077148,
|
|
"fcm_dpo/margin": 0.03929051756858826,
|
|
"fcm_dpo/q_t": 0.49158748984336853,
|
|
"grad_norm": 258.1221923828125,
|
|
"learning_rate": 1.5671641791044775e-07,
|
|
"logits/chosen": 0.0003290371969342232,
|
|
"logits/rejected": -0.02038179337978363,
|
|
"logps/chosen": -58.05890655517578,
|
|
"logps/ref_chosen": -58.03599166870117,
|
|
"logps/ref_rejected": -80.72721862792969,
|
|
"logps/rejected": -80.78941345214844,
|
|
"loss": 1.3677,
|
|
"margin_dpo/margin_mean": 0.03929010033607483,
|
|
"margin_dpo/margin_std": 0.2883184552192688,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.03476946334089191,
|
|
"fcm_dpo/beta": 0.9588446021080017,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.004442840814590454,
|
|
"fcm_dpo/q_t": 0.5012105703353882,
|
|
"grad_norm": 318.4825439453125,
|
|
"learning_rate": 1.6417910447761193e-07,
|
|
"logits/chosen": 0.15388762950897217,
|
|
"logits/rejected": 0.12773939967155457,
|
|
"logps/chosen": -66.37007141113281,
|
|
"logps/ref_chosen": -66.35608673095703,
|
|
"logps/ref_rejected": -93.02769470214844,
|
|
"logps/rejected": -93.0372314453125,
|
|
"loss": 1.4114,
|
|
"margin_dpo/margin_mean": -0.004443138837814331,
|
|
"margin_dpo/margin_std": 0.3012203574180603,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.036281179138321996,
|
|
"fcm_dpo/beta": 0.9588446021080017,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.04989251494407654,
|
|
"fcm_dpo/q_t": 0.511908233165741,
|
|
"grad_norm": 262.23992919921875,
|
|
"learning_rate": 1.716417910447761e-07,
|
|
"logits/chosen": 0.1356058567762375,
|
|
"logits/rejected": 0.10264482349157333,
|
|
"logps/chosen": -54.50740051269531,
|
|
"logps/ref_chosen": -54.461238861083984,
|
|
"logps/ref_rejected": -68.33817291259766,
|
|
"logps/rejected": -68.33444213867188,
|
|
"loss": 1.4523,
|
|
"margin_dpo/margin_mean": -0.04989221692085266,
|
|
"margin_dpo/margin_std": 0.270521342754364,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 1.0845671892166138,
|
|
"fcm_dpo/delta": 0.8133487701416016,
|
|
"fcm_dpo/margin": 0.03639337420463562,
|
|
"fcm_dpo/q_t": 0.4910878539085388,
|
|
"grad_norm": 317.3623352050781,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 0.10586078464984894,
|
|
"logits/rejected": 0.05431347340345383,
|
|
"logps/chosen": -60.022918701171875,
|
|
"logps/ref_chosen": -60.00420379638672,
|
|
"logps/ref_rejected": -90.47376251220703,
|
|
"logps/rejected": -90.52886962890625,
|
|
"loss": 1.3718,
|
|
"margin_dpo/margin_mean": 0.036393433809280396,
|
|
"margin_dpo/margin_std": 0.2930064797401428,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.039304610733182165,
|
|
"fcm_dpo/beta": 1.2240636348724365,
|
|
"fcm_dpo/delta": 0.4076637029647827,
|
|
"fcm_dpo/margin": 0.009161576628684998,
|
|
"fcm_dpo/q_t": 0.49825724959373474,
|
|
"grad_norm": 367.88232421875,
|
|
"learning_rate": 1.8656716417910447e-07,
|
|
"logits/chosen": 0.11543253064155579,
|
|
"logits/rejected": 0.09682787954807281,
|
|
"logps/chosen": -56.85016632080078,
|
|
"logps/ref_chosen": -56.81915283203125,
|
|
"logps/ref_rejected": -77.84333038330078,
|
|
"logps/rejected": -77.88349914550781,
|
|
"loss": 1.4092,
|
|
"margin_dpo/margin_mean": 0.009161293506622314,
|
|
"margin_dpo/margin_std": 0.3032963275909424,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.04081632653061224,
|
|
"fcm_dpo/beta": 1.2240636348724365,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.011037558317184448,
|
|
"fcm_dpo/q_t": 0.5028091669082642,
|
|
"grad_norm": 365.7059326171875,
|
|
"learning_rate": 1.9402985074626865e-07,
|
|
"logits/chosen": 0.11122125387191772,
|
|
"logits/rejected": 0.0861082598567009,
|
|
"logps/chosen": -62.909698486328125,
|
|
"logps/ref_chosen": -62.87702560424805,
|
|
"logps/ref_rejected": -71.34437561035156,
|
|
"logps/rejected": -71.36601257324219,
|
|
"loss": 1.4414,
|
|
"margin_dpo/margin_mean": -0.011037617921829224,
|
|
"margin_dpo/margin_std": 0.33684635162353516,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.042328042328042326,
|
|
"fcm_dpo/beta": 1.3243428468704224,
|
|
"fcm_dpo/delta": 0.3937011957168579,
|
|
"fcm_dpo/margin": -0.004308909177780151,
|
|
"fcm_dpo/q_t": 0.5018905401229858,
|
|
"grad_norm": 379.9216613769531,
|
|
"learning_rate": 2.0149253731343282e-07,
|
|
"logits/chosen": 0.05917387455701828,
|
|
"logits/rejected": 0.050440460443496704,
|
|
"logps/chosen": -59.853607177734375,
|
|
"logps/ref_chosen": -59.8333740234375,
|
|
"logps/ref_rejected": -70.39804077148438,
|
|
"logps/rejected": -70.4139633178711,
|
|
"loss": 1.4359,
|
|
"margin_dpo/margin_mean": -0.004308879375457764,
|
|
"margin_dpo/margin_std": 0.31428390741348267,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.04383975812547241,
|
|
"fcm_dpo/beta": 1.4270368814468384,
|
|
"fcm_dpo/delta": 0.3734189569950104,
|
|
"fcm_dpo/margin": 0.02456316351890564,
|
|
"fcm_dpo/q_t": 0.4919978380203247,
|
|
"grad_norm": 459.2485656738281,
|
|
"learning_rate": 2.08955223880597e-07,
|
|
"logits/chosen": 0.11985379457473755,
|
|
"logits/rejected": 0.10274408757686615,
|
|
"logps/chosen": -74.13998413085938,
|
|
"logps/ref_chosen": -74.12020111083984,
|
|
"logps/ref_rejected": -83.33099365234375,
|
|
"logps/rejected": -83.37533569335938,
|
|
"loss": 1.4074,
|
|
"margin_dpo/margin_mean": 0.02456343173980713,
|
|
"margin_dpo/margin_std": 0.327664315700531,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 1.536737322807312,
|
|
"fcm_dpo/delta": 0.3703068792819977,
|
|
"fcm_dpo/margin": 0.007722645998001099,
|
|
"fcm_dpo/q_t": 0.49483001232147217,
|
|
"grad_norm": 477.6687316894531,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 0.11647738516330719,
|
|
"logits/rejected": 0.0625992864370346,
|
|
"logps/chosen": -50.76539611816406,
|
|
"logps/ref_chosen": -50.75128936767578,
|
|
"logps/ref_rejected": -89.29063415527344,
|
|
"logps/rejected": -89.31246948242188,
|
|
"loss": 1.4322,
|
|
"margin_dpo/margin_mean": 0.007722735404968262,
|
|
"margin_dpo/margin_std": 0.31650030612945557,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04686318972033258,
|
|
"fcm_dpo/beta": 1.5832395553588867,
|
|
"fcm_dpo/delta": 0.29380002617836,
|
|
"fcm_dpo/margin": 0.08262354135513306,
|
|
"fcm_dpo/q_t": 0.4720662236213684,
|
|
"grad_norm": 524.7825927734375,
|
|
"learning_rate": 2.2388059701492537e-07,
|
|
"logits/chosen": 0.10439669340848923,
|
|
"logits/rejected": 0.05832277983427048,
|
|
"logps/chosen": -65.34173583984375,
|
|
"logps/ref_chosen": -65.33675384521484,
|
|
"logps/ref_rejected": -100.76666259765625,
|
|
"logps/rejected": -100.85426330566406,
|
|
"loss": 1.326,
|
|
"margin_dpo/margin_mean": 0.08262395858764648,
|
|
"margin_dpo/margin_std": 0.3305758833885193,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04837490551776266,
|
|
"fcm_dpo/beta": 1.8284441232681274,
|
|
"fcm_dpo/delta": 0.7290701866149902,
|
|
"fcm_dpo/margin": 0.06868910789489746,
|
|
"fcm_dpo/q_t": 0.4716106355190277,
|
|
"grad_norm": 563.244873046875,
|
|
"learning_rate": 2.3134328358208954e-07,
|
|
"logits/chosen": 0.08951601386070251,
|
|
"logits/rejected": 0.08166046440601349,
|
|
"logps/chosen": -67.16220092773438,
|
|
"logps/ref_chosen": -67.18333435058594,
|
|
"logps/ref_rejected": -82.80763244628906,
|
|
"logps/rejected": -82.85519409179688,
|
|
"loss": 1.3435,
|
|
"margin_dpo/margin_mean": 0.06868937611579895,
|
|
"margin_dpo/margin_std": 0.31365060806274414,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.049886621315192746,
|
|
"fcm_dpo/beta": 2.125655174255371,
|
|
"fcm_dpo/delta": 0.7907329201698303,
|
|
"fcm_dpo/margin": 0.030030831694602966,
|
|
"fcm_dpo/q_t": 0.4842595160007477,
|
|
"grad_norm": 734.3580322265625,
|
|
"learning_rate": 2.388059701492537e-07,
|
|
"logits/chosen": 0.028011824935674667,
|
|
"logits/rejected": 0.0021050497889518738,
|
|
"logps/chosen": -64.11384582519531,
|
|
"logps/ref_chosen": -64.03948211669922,
|
|
"logps/ref_rejected": -75.68357849121094,
|
|
"logps/rejected": -75.7879867553711,
|
|
"loss": 1.4391,
|
|
"margin_dpo/margin_mean": 0.030031487345695496,
|
|
"margin_dpo/margin_std": 0.3220931887626648,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.05139833711262283,
|
|
"fcm_dpo/beta": 2.4897372722625732,
|
|
"fcm_dpo/delta": 0.8030319809913635,
|
|
"fcm_dpo/margin": 0.02098938822746277,
|
|
"fcm_dpo/q_t": 0.48873692750930786,
|
|
"grad_norm": 721.6285400390625,
|
|
"learning_rate": 2.4626865671641786e-07,
|
|
"logits/chosen": 0.08669179677963257,
|
|
"logits/rejected": 0.05742088705301285,
|
|
"logps/chosen": -53.719154357910156,
|
|
"logps/ref_chosen": -53.6642951965332,
|
|
"logps/ref_rejected": -65.77989959716797,
|
|
"logps/rejected": -65.85574340820312,
|
|
"loss": 1.465,
|
|
"margin_dpo/margin_mean": 0.020989298820495605,
|
|
"margin_dpo/margin_std": 0.29796358942985535,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 2.5934488773345947,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02340644598007202,
|
|
"fcm_dpo/q_t": 0.5208801031112671,
|
|
"grad_norm": 830.2484741210938,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 0.06822899729013443,
|
|
"logits/rejected": 0.045317377895116806,
|
|
"logps/chosen": -61.09272766113281,
|
|
"logps/ref_chosen": -61.01686096191406,
|
|
"logps/ref_rejected": -72.78598022460938,
|
|
"logps/rejected": -72.83843994140625,
|
|
"loss": 1.6113,
|
|
"margin_dpo/margin_mean": -0.023406386375427246,
|
|
"margin_dpo/margin_std": 0.3278309106826782,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05442176870748299,
|
|
"fcm_dpo/beta": 2.6975440979003906,
|
|
"fcm_dpo/delta": 0.38608044385910034,
|
|
"fcm_dpo/margin": 0.0097598135471344,
|
|
"fcm_dpo/q_t": 0.491857647895813,
|
|
"grad_norm": 901.6752319335938,
|
|
"learning_rate": 2.611940298507462e-07,
|
|
"logits/chosen": 0.09897307306528091,
|
|
"logits/rejected": 0.04630749300122261,
|
|
"logps/chosen": -50.61589050292969,
|
|
"logps/ref_chosen": -50.53736114501953,
|
|
"logps/ref_rejected": -78.11678314208984,
|
|
"logps/rejected": -78.20507049560547,
|
|
"loss": 1.5817,
|
|
"margin_dpo/margin_mean": 0.009759783744812012,
|
|
"margin_dpo/margin_std": 0.3618467152118683,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.055933484504913075,
|
|
"fcm_dpo/beta": 3.038989543914795,
|
|
"fcm_dpo/delta": 0.5818780660629272,
|
|
"fcm_dpo/margin": 0.09463274478912354,
|
|
"fcm_dpo/q_t": 0.4537751078605652,
|
|
"grad_norm": 1168.1463623046875,
|
|
"learning_rate": 2.686567164179104e-07,
|
|
"logits/chosen": 0.10465328395366669,
|
|
"logits/rejected": 0.024744877591729164,
|
|
"logps/chosen": -59.59927749633789,
|
|
"logps/ref_chosen": -59.55394744873047,
|
|
"logps/ref_rejected": -108.27702331542969,
|
|
"logps/rejected": -108.4169921875,
|
|
"loss": 1.4775,
|
|
"margin_dpo/margin_mean": 0.09463286399841309,
|
|
"margin_dpo/margin_std": 0.4426842927932739,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.05744520030234316,
|
|
"fcm_dpo/beta": 3.270552158355713,
|
|
"fcm_dpo/delta": 0.19189269840717316,
|
|
"fcm_dpo/margin": 0.04368185997009277,
|
|
"fcm_dpo/q_t": 0.47869473695755005,
|
|
"grad_norm": 995.4248046875,
|
|
"learning_rate": 2.761194029850746e-07,
|
|
"logits/chosen": 0.048389457166194916,
|
|
"logits/rejected": 0.03467674180865288,
|
|
"logps/chosen": -65.86851501464844,
|
|
"logps/ref_chosen": -65.78836059570312,
|
|
"logps/ref_rejected": -76.1619873046875,
|
|
"logps/rejected": -76.28582763671875,
|
|
"loss": 1.6706,
|
|
"margin_dpo/margin_mean": 0.043681979179382324,
|
|
"margin_dpo/margin_std": 0.4209554195404053,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05895691609977324,
|
|
"fcm_dpo/beta": 3.536396026611328,
|
|
"fcm_dpo/delta": 0.4801386296749115,
|
|
"fcm_dpo/margin": 0.108737051486969,
|
|
"fcm_dpo/q_t": 0.4242640733718872,
|
|
"grad_norm": 939.8898315429688,
|
|
"learning_rate": 2.8358208955223876e-07,
|
|
"logits/chosen": 0.15147145092487335,
|
|
"logits/rejected": 0.12492187321186066,
|
|
"logps/chosen": -57.238365173339844,
|
|
"logps/ref_chosen": -57.17681121826172,
|
|
"logps/ref_rejected": -79.486328125,
|
|
"logps/rejected": -79.65663146972656,
|
|
"loss": 1.2786,
|
|
"margin_dpo/margin_mean": 0.10873657464981079,
|
|
"margin_dpo/margin_std": 0.2975834012031555,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 3.880685567855835,
|
|
"fcm_dpo/delta": 0.3751264214515686,
|
|
"fcm_dpo/margin": -0.05382639169692993,
|
|
"fcm_dpo/q_t": 0.5360496044158936,
|
|
"grad_norm": 1590.185302734375,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 0.10350456833839417,
|
|
"logits/rejected": 0.05420894920825958,
|
|
"logps/chosen": -61.45352554321289,
|
|
"logps/ref_chosen": -61.33416748046875,
|
|
"logps/ref_rejected": -79.10697174072266,
|
|
"logps/rejected": -79.1725082397461,
|
|
"loss": 1.9531,
|
|
"margin_dpo/margin_mean": -0.05382627248764038,
|
|
"margin_dpo/margin_std": 0.31318140029907227,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06198034769463341,
|
|
"fcm_dpo/beta": 4.317322254180908,
|
|
"fcm_dpo/delta": 0.6840596199035645,
|
|
"fcm_dpo/margin": 0.04054167866706848,
|
|
"fcm_dpo/q_t": 0.4672660231590271,
|
|
"grad_norm": 1532.7891845703125,
|
|
"learning_rate": 2.985074626865671e-07,
|
|
"logits/chosen": 0.03596208989620209,
|
|
"logits/rejected": 0.01636538654565811,
|
|
"logps/chosen": -67.6545639038086,
|
|
"logps/ref_chosen": -67.5467300415039,
|
|
"logps/ref_rejected": -83.87788391113281,
|
|
"logps/rejected": -84.02627563476562,
|
|
"loss": 1.8037,
|
|
"margin_dpo/margin_mean": 0.04054197669029236,
|
|
"margin_dpo/margin_std": 0.402584969997406,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"fcm_dpo/beta": 4.645079612731934,
|
|
"fcm_dpo/delta": 0.2149239331483841,
|
|
"fcm_dpo/margin": 0.013764455914497375,
|
|
"fcm_dpo/q_t": 0.47997668385505676,
|
|
"grad_norm": 1505.7274169921875,
|
|
"learning_rate": 3.059701492537313e-07,
|
|
"logits/chosen": 0.051899224519729614,
|
|
"logits/rejected": 0.03025246225297451,
|
|
"logps/chosen": -61.4012336730957,
|
|
"logps/ref_chosen": -61.26485824584961,
|
|
"logps/ref_rejected": -76.3629150390625,
|
|
"logps/rejected": -76.51305389404297,
|
|
"loss": 1.871,
|
|
"margin_dpo/margin_mean": 0.013764426112174988,
|
|
"margin_dpo/margin_std": 0.35858240723609924,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.06500377928949358,
|
|
"fcm_dpo/beta": 4.940211296081543,
|
|
"fcm_dpo/delta": 0.4239178001880646,
|
|
"fcm_dpo/margin": 0.09020450711250305,
|
|
"fcm_dpo/q_t": 0.4286651015281677,
|
|
"grad_norm": 1634.933349609375,
|
|
"learning_rate": 3.134328358208955e-07,
|
|
"logits/chosen": 0.08877776563167572,
|
|
"logits/rejected": 0.07763132452964783,
|
|
"logps/chosen": -71.8860092163086,
|
|
"logps/ref_chosen": -71.80902862548828,
|
|
"logps/ref_rejected": -81.12464141845703,
|
|
"logps/rejected": -81.29181671142578,
|
|
"loss": 1.7431,
|
|
"margin_dpo/margin_mean": 0.09020435810089111,
|
|
"margin_dpo/margin_std": 0.4161643981933594,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06651549508692366,
|
|
"fcm_dpo/beta": 5.494063854217529,
|
|
"fcm_dpo/delta": 0.41538116335868835,
|
|
"fcm_dpo/margin": -0.00924178957939148,
|
|
"fcm_dpo/q_t": 0.520038366317749,
|
|
"grad_norm": 2336.511474609375,
|
|
"learning_rate": 3.2089552238805965e-07,
|
|
"logits/chosen": 0.05336465686559677,
|
|
"logits/rejected": 0.022505655884742737,
|
|
"logps/chosen": -66.67141723632812,
|
|
"logps/ref_chosen": -66.55043029785156,
|
|
"logps/ref_rejected": -85.06198120117188,
|
|
"logps/rejected": -85.17372131347656,
|
|
"loss": 2.2627,
|
|
"margin_dpo/margin_mean": -0.009241342544555664,
|
|
"margin_dpo/margin_std": 0.39431923627853394,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 5.540068626403809,
|
|
"fcm_dpo/delta": 0.08240819722414017,
|
|
"fcm_dpo/margin": 0.1397048979997635,
|
|
"fcm_dpo/q_t": 0.37695854902267456,
|
|
"grad_norm": 1859.1026611328125,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 0.11681336164474487,
|
|
"logits/rejected": 0.06394165754318237,
|
|
"logps/chosen": -62.34455108642578,
|
|
"logps/ref_chosen": -62.24385452270508,
|
|
"logps/ref_rejected": -92.96665954589844,
|
|
"logps/rejected": -93.20706176757812,
|
|
"loss": 1.5415,
|
|
"margin_dpo/margin_mean": 0.13970479369163513,
|
|
"margin_dpo/margin_std": 0.38627296686172485,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06953892668178382,
|
|
"fcm_dpo/beta": 5.465640068054199,
|
|
"fcm_dpo/delta": -0.18290278315544128,
|
|
"fcm_dpo/margin": 0.1856483817100525,
|
|
"fcm_dpo/q_t": 0.3596438765525818,
|
|
"grad_norm": 1365.26123046875,
|
|
"learning_rate": 3.3582089552238805e-07,
|
|
"logits/chosen": 0.10824910551309586,
|
|
"logits/rejected": 0.06305693089962006,
|
|
"logps/chosen": -61.60289001464844,
|
|
"logps/ref_chosen": -61.498905181884766,
|
|
"logps/ref_rejected": -78.91172790527344,
|
|
"logps/rejected": -79.20137023925781,
|
|
"loss": 1.3262,
|
|
"margin_dpo/margin_mean": 0.1856483519077301,
|
|
"margin_dpo/margin_std": 0.3790084421634674,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0710506424792139,
|
|
"fcm_dpo/beta": 5.482945919036865,
|
|
"fcm_dpo/delta": 0.12942156195640564,
|
|
"fcm_dpo/margin": 0.1331653594970703,
|
|
"fcm_dpo/q_t": 0.38719505071640015,
|
|
"grad_norm": 1422.8770751953125,
|
|
"learning_rate": 3.432835820895522e-07,
|
|
"logits/chosen": 0.02270699478685856,
|
|
"logits/rejected": -0.01980304904282093,
|
|
"logps/chosen": -51.67852020263672,
|
|
"logps/ref_chosen": -51.578346252441406,
|
|
"logps/ref_rejected": -68.2215576171875,
|
|
"logps/rejected": -68.45490264892578,
|
|
"loss": 1.3589,
|
|
"margin_dpo/margin_mean": 0.13316544890403748,
|
|
"margin_dpo/margin_std": 0.31922364234924316,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07256235827664399,
|
|
"fcm_dpo/beta": 5.691621780395508,
|
|
"fcm_dpo/delta": 0.29117757081985474,
|
|
"fcm_dpo/margin": 0.017589092254638672,
|
|
"fcm_dpo/q_t": 0.4899485409259796,
|
|
"grad_norm": 1831.3077392578125,
|
|
"learning_rate": 3.507462686567164e-07,
|
|
"logits/chosen": 0.16300594806671143,
|
|
"logits/rejected": 0.13274288177490234,
|
|
"logps/chosen": -51.943580627441406,
|
|
"logps/ref_chosen": -51.79365158081055,
|
|
"logps/ref_rejected": -64.22503662109375,
|
|
"logps/rejected": -64.39256286621094,
|
|
"loss": 2.1944,
|
|
"margin_dpo/margin_mean": 0.017589718103408813,
|
|
"margin_dpo/margin_std": 0.3933258056640625,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07407407407407407,
|
|
"fcm_dpo/beta": 5.941600799560547,
|
|
"fcm_dpo/delta": 0.14188799262046814,
|
|
"fcm_dpo/margin": 0.01929350197315216,
|
|
"fcm_dpo/q_t": 0.4635244905948639,
|
|
"grad_norm": 1869.5223388671875,
|
|
"learning_rate": 3.5820895522388055e-07,
|
|
"logits/chosen": 0.012968342751264572,
|
|
"logits/rejected": -0.00816606730222702,
|
|
"logps/chosen": -58.31328582763672,
|
|
"logps/ref_chosen": -58.13460159301758,
|
|
"logps/ref_rejected": -64.63206481933594,
|
|
"logps/rejected": -64.83004760742188,
|
|
"loss": 2.1107,
|
|
"margin_dpo/margin_mean": 0.019293993711471558,
|
|
"margin_dpo/margin_std": 0.3666878342628479,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 6.342537879943848,
|
|
"fcm_dpo/delta": 0.38531041145324707,
|
|
"fcm_dpo/margin": 0.07645577192306519,
|
|
"fcm_dpo/q_t": 0.4381590187549591,
|
|
"grad_norm": 2065.01953125,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 0.11765280365943909,
|
|
"logits/rejected": 0.08736774325370789,
|
|
"logps/chosen": -53.062259674072266,
|
|
"logps/ref_chosen": -52.85643768310547,
|
|
"logps/ref_rejected": -72.17460632324219,
|
|
"logps/rejected": -72.45687866210938,
|
|
"loss": 1.8018,
|
|
"margin_dpo/margin_mean": 0.0764555037021637,
|
|
"margin_dpo/margin_std": 0.35330671072006226,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07709750566893424,
|
|
"fcm_dpo/beta": 6.480748653411865,
|
|
"fcm_dpo/delta": -0.008223239332437515,
|
|
"fcm_dpo/margin": 0.1322861611843109,
|
|
"fcm_dpo/q_t": 0.42159244418144226,
|
|
"grad_norm": 1923.1666259765625,
|
|
"learning_rate": 3.7313432835820895e-07,
|
|
"logits/chosen": 0.09921061992645264,
|
|
"logits/rejected": 0.07079657912254333,
|
|
"logps/chosen": -63.85832977294922,
|
|
"logps/ref_chosen": -63.65644073486328,
|
|
"logps/ref_rejected": -86.13229370117188,
|
|
"logps/rejected": -86.46647644042969,
|
|
"loss": 1.7411,
|
|
"margin_dpo/margin_mean": 0.1322861909866333,
|
|
"margin_dpo/margin_std": 0.4351498484611511,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.07860922146636433,
|
|
"fcm_dpo/beta": 6.4637298583984375,
|
|
"fcm_dpo/delta": 0.048552006483078,
|
|
"fcm_dpo/margin": 0.12421192228794098,
|
|
"fcm_dpo/q_t": 0.41275399923324585,
|
|
"grad_norm": 2170.326416015625,
|
|
"learning_rate": 3.805970149253731e-07,
|
|
"logits/chosen": 0.05486099421977997,
|
|
"logits/rejected": 0.007401124574244022,
|
|
"logps/chosen": -68.0733642578125,
|
|
"logps/ref_chosen": -67.8402099609375,
|
|
"logps/ref_rejected": -96.97090911865234,
|
|
"logps/rejected": -97.32827758789062,
|
|
"loss": 1.6966,
|
|
"margin_dpo/margin_mean": 0.12421198189258575,
|
|
"margin_dpo/margin_std": 0.39311325550079346,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.0801209372637944,
|
|
"fcm_dpo/beta": 6.674091339111328,
|
|
"fcm_dpo/delta": 0.08519239723682404,
|
|
"fcm_dpo/margin": 0.0567784458398819,
|
|
"fcm_dpo/q_t": 0.4542519748210907,
|
|
"grad_norm": 2142.70947265625,
|
|
"learning_rate": 3.880597014925373e-07,
|
|
"logits/chosen": 0.08207565546035767,
|
|
"logits/rejected": 0.07128915190696716,
|
|
"logps/chosen": -57.14149856567383,
|
|
"logps/ref_chosen": -56.87813949584961,
|
|
"logps/ref_rejected": -60.75569152832031,
|
|
"logps/rejected": -61.075828552246094,
|
|
"loss": 2.0249,
|
|
"margin_dpo/margin_mean": 0.05677822232246399,
|
|
"margin_dpo/margin_std": 0.35405731201171875,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.08163265306122448,
|
|
"fcm_dpo/beta": 6.724396705627441,
|
|
"fcm_dpo/delta": 0.20413607358932495,
|
|
"fcm_dpo/margin": 0.09712421894073486,
|
|
"fcm_dpo/q_t": 0.40143126249313354,
|
|
"grad_norm": 2092.410400390625,
|
|
"learning_rate": 3.9552238805970144e-07,
|
|
"logits/chosen": 0.07327298074960709,
|
|
"logits/rejected": 0.05757633596658707,
|
|
"logps/chosen": -47.52605056762695,
|
|
"logps/ref_chosen": -47.26692199707031,
|
|
"logps/ref_rejected": -62.19426727294922,
|
|
"logps/rejected": -62.55051803588867,
|
|
"loss": 1.7674,
|
|
"margin_dpo/margin_mean": 0.09712427854537964,
|
|
"margin_dpo/margin_std": 0.3280726373195648,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 6.342741012573242,
|
|
"fcm_dpo/delta": -0.6529929637908936,
|
|
"fcm_dpo/margin": 0.2226666957139969,
|
|
"fcm_dpo/q_t": 0.35867583751678467,
|
|
"grad_norm": 1971.2823486328125,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 0.009333456866443157,
|
|
"logits/rejected": -0.06498602777719498,
|
|
"logps/chosen": -50.54954528808594,
|
|
"logps/ref_chosen": -50.32619094848633,
|
|
"logps/ref_rejected": -92.44389343261719,
|
|
"logps/rejected": -92.88990783691406,
|
|
"loss": 1.5563,
|
|
"margin_dpo/margin_mean": 0.22266672551631927,
|
|
"margin_dpo/margin_std": 0.49195396900177,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08465608465608465,
|
|
"fcm_dpo/beta": 6.315101146697998,
|
|
"fcm_dpo/delta": 0.1984485387802124,
|
|
"fcm_dpo/margin": 0.1050320565700531,
|
|
"fcm_dpo/q_t": 0.4071798324584961,
|
|
"grad_norm": 1768.2681884765625,
|
|
"learning_rate": 4.1044776119402984e-07,
|
|
"logits/chosen": 0.13452336192131042,
|
|
"logits/rejected": 0.11181502044200897,
|
|
"logps/chosen": -56.96580505371094,
|
|
"logps/ref_chosen": -56.766971588134766,
|
|
"logps/ref_rejected": -66.30504608154297,
|
|
"logps/rejected": -66.60890197753906,
|
|
"loss": 1.6331,
|
|
"margin_dpo/margin_mean": 0.10503232479095459,
|
|
"margin_dpo/margin_std": 0.3666898310184479,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08616780045351474,
|
|
"fcm_dpo/beta": 5.578975677490234,
|
|
"fcm_dpo/delta": -0.6684575080871582,
|
|
"fcm_dpo/margin": 0.2490333914756775,
|
|
"fcm_dpo/q_t": 0.32324889302253723,
|
|
"grad_norm": 1445.676025390625,
|
|
"learning_rate": 4.17910447761194e-07,
|
|
"logits/chosen": 0.10891781747341156,
|
|
"logits/rejected": 0.043134208768606186,
|
|
"logps/chosen": -58.001564025878906,
|
|
"logps/ref_chosen": -57.76774597167969,
|
|
"logps/ref_rejected": -82.75698852539062,
|
|
"logps/rejected": -83.23983764648438,
|
|
"loss": 1.278,
|
|
"margin_dpo/margin_mean": 0.24903348088264465,
|
|
"margin_dpo/margin_std": 0.43858030438423157,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08767951625094482,
|
|
"fcm_dpo/beta": 5.610134124755859,
|
|
"fcm_dpo/delta": 0.21757441759109497,
|
|
"fcm_dpo/margin": 0.11425483226776123,
|
|
"fcm_dpo/q_t": 0.4058857560157776,
|
|
"grad_norm": 1669.9715576171875,
|
|
"learning_rate": 4.253731343283582e-07,
|
|
"logits/chosen": 0.03284445032477379,
|
|
"logits/rejected": 0.01788032241165638,
|
|
"logps/chosen": -73.05523681640625,
|
|
"logps/ref_chosen": -72.76408386230469,
|
|
"logps/ref_rejected": -84.49275207519531,
|
|
"logps/rejected": -84.89814758300781,
|
|
"loss": 1.7105,
|
|
"margin_dpo/margin_mean": 0.11425450444221497,
|
|
"margin_dpo/margin_std": 0.38692593574523926,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08919123204837491,
|
|
"fcm_dpo/beta": 5.876424789428711,
|
|
"fcm_dpo/delta": -0.19149622321128845,
|
|
"fcm_dpo/margin": 0.16999658942222595,
|
|
"fcm_dpo/q_t": 0.36740055680274963,
|
|
"grad_norm": 1536.5157470703125,
|
|
"learning_rate": 4.3283582089552234e-07,
|
|
"logits/chosen": 0.11274999380111694,
|
|
"logits/rejected": 0.047084975987672806,
|
|
"logps/chosen": -50.087608337402344,
|
|
"logps/ref_chosen": -49.820777893066406,
|
|
"logps/ref_rejected": -77.14368438720703,
|
|
"logps/rejected": -77.58052062988281,
|
|
"loss": 1.4267,
|
|
"margin_dpo/margin_mean": 0.16999676823616028,
|
|
"margin_dpo/margin_std": 0.3600447475910187,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 6.065890312194824,
|
|
"fcm_dpo/delta": 0.5198989510536194,
|
|
"fcm_dpo/margin": 0.05685025453567505,
|
|
"fcm_dpo/q_t": 0.4424615502357483,
|
|
"grad_norm": 2018.1671142578125,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 0.10006190836429596,
|
|
"logits/rejected": 0.09853567183017731,
|
|
"logps/chosen": -63.53302001953125,
|
|
"logps/ref_chosen": -63.22477340698242,
|
|
"logps/ref_rejected": -61.360477447509766,
|
|
"logps/rejected": -61.7255744934082,
|
|
"loss": 2.1575,
|
|
"margin_dpo/margin_mean": 0.05685010552406311,
|
|
"margin_dpo/margin_std": 0.4581069350242615,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.09221466364323508,
|
|
"fcm_dpo/beta": 6.438871383666992,
|
|
"fcm_dpo/delta": 0.36666175723075867,
|
|
"fcm_dpo/margin": 0.07799457013607025,
|
|
"fcm_dpo/q_t": 0.4422876238822937,
|
|
"grad_norm": 2275.224609375,
|
|
"learning_rate": 4.4776119402985074e-07,
|
|
"logits/chosen": 0.13344313204288483,
|
|
"logits/rejected": 0.10039305686950684,
|
|
"logps/chosen": -49.34346008300781,
|
|
"logps/ref_chosen": -49.01679992675781,
|
|
"logps/ref_rejected": -74.90817260742188,
|
|
"logps/rejected": -75.31282806396484,
|
|
"loss": 2.3946,
|
|
"margin_dpo/margin_mean": 0.07799449563026428,
|
|
"margin_dpo/margin_std": 0.47035545110702515,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09372637944066516,
|
|
"fcm_dpo/beta": 6.52736759185791,
|
|
"fcm_dpo/delta": -0.3168880343437195,
|
|
"fcm_dpo/margin": 0.17259901762008667,
|
|
"fcm_dpo/q_t": 0.3768240511417389,
|
|
"grad_norm": 2118.017578125,
|
|
"learning_rate": 4.552238805970149e-07,
|
|
"logits/chosen": 0.09595489501953125,
|
|
"logits/rejected": 0.05686543136835098,
|
|
"logps/chosen": -63.08671188354492,
|
|
"logps/ref_chosen": -62.751869201660156,
|
|
"logps/ref_rejected": -78.93360900878906,
|
|
"logps/rejected": -79.44105529785156,
|
|
"loss": 1.764,
|
|
"margin_dpo/margin_mean": 0.1725986897945404,
|
|
"margin_dpo/margin_std": 0.43187639117240906,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"fcm_dpo/beta": 5.918633460998535,
|
|
"fcm_dpo/delta": -0.517593264579773,
|
|
"fcm_dpo/margin": 0.2199816107749939,
|
|
"fcm_dpo/q_t": 0.31167125701904297,
|
|
"grad_norm": 1738.617431640625,
|
|
"learning_rate": 4.626865671641791e-07,
|
|
"logits/chosen": 0.18449003994464874,
|
|
"logits/rejected": 0.15899410843849182,
|
|
"logps/chosen": -60.80646514892578,
|
|
"logps/ref_chosen": -60.51525115966797,
|
|
"logps/ref_rejected": -85.11021423339844,
|
|
"logps/rejected": -85.62141418457031,
|
|
"loss": 1.3985,
|
|
"margin_dpo/margin_mean": 0.2199820578098297,
|
|
"margin_dpo/margin_std": 0.4385373294353485,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09674981103552532,
|
|
"fcm_dpo/beta": 5.581248760223389,
|
|
"fcm_dpo/delta": -0.05339386314153671,
|
|
"fcm_dpo/margin": 0.06814375519752502,
|
|
"fcm_dpo/q_t": 0.43303388357162476,
|
|
"grad_norm": 1775.4036865234375,
|
|
"learning_rate": 4.701492537313433e-07,
|
|
"logits/chosen": 0.08545556664466858,
|
|
"logits/rejected": 0.06017608195543289,
|
|
"logps/chosen": -51.58530044555664,
|
|
"logps/ref_chosen": -51.20684814453125,
|
|
"logps/ref_rejected": -66.93081665039062,
|
|
"logps/rejected": -67.3774185180664,
|
|
"loss": 1.9515,
|
|
"margin_dpo/margin_mean": 0.06814375519752502,
|
|
"margin_dpo/margin_std": 0.4061310291290283,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 4.899092674255371,
|
|
"fcm_dpo/delta": -0.8839624524116516,
|
|
"fcm_dpo/margin": 0.3248189091682434,
|
|
"fcm_dpo/q_t": 0.2600834369659424,
|
|
"grad_norm": 1128.9351806640625,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 0.1839187741279602,
|
|
"logits/rejected": 0.15427696704864502,
|
|
"logps/chosen": -67.62045288085938,
|
|
"logps/ref_chosen": -67.2886962890625,
|
|
"logps/ref_rejected": -74.44281005859375,
|
|
"logps/rejected": -75.09938049316406,
|
|
"loss": 1.0723,
|
|
"margin_dpo/margin_mean": 0.32481849193573,
|
|
"margin_dpo/margin_std": 0.4664004445075989,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09977324263038549,
|
|
"fcm_dpo/beta": 4.846595764160156,
|
|
"fcm_dpo/delta": 0.3371145725250244,
|
|
"fcm_dpo/margin": 0.10960313677787781,
|
|
"fcm_dpo/q_t": 0.40099650621414185,
|
|
"grad_norm": 1611.027587890625,
|
|
"learning_rate": 4.850746268656717e-07,
|
|
"logits/chosen": 0.07052364945411682,
|
|
"logits/rejected": 0.04705191031098366,
|
|
"logps/chosen": -71.12271118164062,
|
|
"logps/ref_chosen": -70.743408203125,
|
|
"logps/ref_rejected": -77.26499938964844,
|
|
"logps/rejected": -77.75389862060547,
|
|
"loss": 1.7288,
|
|
"margin_dpo/margin_mean": 0.10960283875465393,
|
|
"margin_dpo/margin_std": 0.41340774297714233,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.10128495842781557,
|
|
"fcm_dpo/beta": 4.897915840148926,
|
|
"fcm_dpo/delta": -0.19840705394744873,
|
|
"fcm_dpo/margin": 0.20993411540985107,
|
|
"fcm_dpo/q_t": 0.3695130944252014,
|
|
"grad_norm": 1324.92138671875,
|
|
"learning_rate": 4.925373134328357e-07,
|
|
"logits/chosen": 0.06483873724937439,
|
|
"logits/rejected": 0.009191485121846199,
|
|
"logps/chosen": -60.90003204345703,
|
|
"logps/ref_chosen": -60.60260009765625,
|
|
"logps/ref_rejected": -75.22235870361328,
|
|
"logps/rejected": -75.72972106933594,
|
|
"loss": 1.3653,
|
|
"margin_dpo/margin_mean": 0.20993369817733765,
|
|
"margin_dpo/margin_std": 0.46069464087486267,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.10279667422524566,
|
|
"fcm_dpo/beta": 4.430768966674805,
|
|
"fcm_dpo/delta": -0.389474093914032,
|
|
"fcm_dpo/margin": 0.2649560272693634,
|
|
"fcm_dpo/q_t": 0.3379044234752655,
|
|
"grad_norm": 1240.4334716796875,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 0.0564710795879364,
|
|
"logits/rejected": 0.0265452042222023,
|
|
"logps/chosen": -77.92970275878906,
|
|
"logps/ref_chosen": -77.52836608886719,
|
|
"logps/ref_rejected": -93.17778015136719,
|
|
"logps/rejected": -93.84407043457031,
|
|
"loss": 1.2815,
|
|
"margin_dpo/margin_mean": 0.2649560570716858,
|
|
"margin_dpo/margin_std": 0.5076867938041687,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.10430839002267574,
|
|
"fcm_dpo/beta": 4.559112548828125,
|
|
"fcm_dpo/delta": 0.041590481996536255,
|
|
"fcm_dpo/margin": 0.17647495865821838,
|
|
"fcm_dpo/q_t": 0.3657025992870331,
|
|
"grad_norm": 1222.0938720703125,
|
|
"learning_rate": 4.999965034812934e-07,
|
|
"logits/chosen": 0.07601971179246902,
|
|
"logits/rejected": 0.03346855193376541,
|
|
"logps/chosen": -66.33596801757812,
|
|
"logps/ref_chosen": -65.94305419921875,
|
|
"logps/ref_rejected": -89.7735595703125,
|
|
"logps/rejected": -90.34294891357422,
|
|
"loss": 1.3248,
|
|
"margin_dpo/margin_mean": 0.17647448182106018,
|
|
"margin_dpo/margin_std": 0.39660531282424927,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 4.578952312469482,
|
|
"fcm_dpo/delta": 0.2021067887544632,
|
|
"fcm_dpo/margin": 0.14442333579063416,
|
|
"fcm_dpo/q_t": 0.3830464482307434,
|
|
"grad_norm": 1205.255615234375,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 0.10305196791887283,
|
|
"logits/rejected": 0.08014979958534241,
|
|
"logps/chosen": -62.320560455322266,
|
|
"logps/ref_chosen": -61.95791244506836,
|
|
"logps/ref_rejected": -75.80945587158203,
|
|
"logps/rejected": -76.3165283203125,
|
|
"loss": 1.483,
|
|
"margin_dpo/margin_mean": 0.14442339539527893,
|
|
"margin_dpo/margin_std": 0.4010279178619385,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1073318216175359,
|
|
"fcm_dpo/beta": 4.783401012420654,
|
|
"fcm_dpo/delta": 0.1197274923324585,
|
|
"fcm_dpo/margin": 0.04047618806362152,
|
|
"fcm_dpo/q_t": 0.4602009356021881,
|
|
"grad_norm": 1565.259033203125,
|
|
"learning_rate": 4.999685319184688e-07,
|
|
"logits/chosen": 0.06028672307729721,
|
|
"logits/rejected": 0.04451918601989746,
|
|
"logps/chosen": -63.822017669677734,
|
|
"logps/ref_chosen": -63.34757995605469,
|
|
"logps/ref_rejected": -67.49658203125,
|
|
"logps/rejected": -68.0114974975586,
|
|
"loss": 2.0922,
|
|
"margin_dpo/margin_mean": 0.04047642648220062,
|
|
"margin_dpo/margin_std": 0.4636165499687195,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10884353741496598,
|
|
"fcm_dpo/beta": 4.545166969299316,
|
|
"fcm_dpo/delta": -0.38348883390426636,
|
|
"fcm_dpo/margin": 0.26161858439445496,
|
|
"fcm_dpo/q_t": 0.34238147735595703,
|
|
"grad_norm": 1248.3271484375,
|
|
"learning_rate": 4.999440576567755e-07,
|
|
"logits/chosen": 0.06008986011147499,
|
|
"logits/rejected": -0.0004155375063419342,
|
|
"logps/chosen": -56.19200134277344,
|
|
"logps/ref_chosen": -55.85929870605469,
|
|
"logps/ref_rejected": -68.45423889160156,
|
|
"logps/rejected": -69.04856872558594,
|
|
"loss": 1.317,
|
|
"margin_dpo/margin_mean": 0.26161882281303406,
|
|
"margin_dpo/margin_std": 0.5102354288101196,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.11035525321239607,
|
|
"fcm_dpo/beta": 4.637323379516602,
|
|
"fcm_dpo/delta": 0.24215392768383026,
|
|
"fcm_dpo/margin": 0.1334337592124939,
|
|
"fcm_dpo/q_t": 0.41474786400794983,
|
|
"grad_norm": 1419.067138671875,
|
|
"learning_rate": 4.999125919224965e-07,
|
|
"logits/chosen": 0.04662982001900673,
|
|
"logits/rejected": 0.032734230160713196,
|
|
"logps/chosen": -69.61697387695312,
|
|
"logps/ref_chosen": -69.13880920410156,
|
|
"logps/ref_rejected": -79.04586791992188,
|
|
"logps/rejected": -79.657470703125,
|
|
"loss": 1.743,
|
|
"margin_dpo/margin_mean": 0.1334337592124939,
|
|
"margin_dpo/margin_std": 0.49519866704940796,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.11186696900982615,
|
|
"fcm_dpo/beta": 4.333841323852539,
|
|
"fcm_dpo/delta": -0.5068634152412415,
|
|
"fcm_dpo/margin": 0.2982664108276367,
|
|
"fcm_dpo/q_t": 0.32818102836608887,
|
|
"grad_norm": 919.8814697265625,
|
|
"learning_rate": 4.998741355957963e-07,
|
|
"logits/chosen": 0.09711477905511856,
|
|
"logits/rejected": 0.046173036098480225,
|
|
"logps/chosen": -50.251930236816406,
|
|
"logps/ref_chosen": -49.923736572265625,
|
|
"logps/ref_rejected": -81.73213958740234,
|
|
"logps/rejected": -82.35859680175781,
|
|
"loss": 1.0557,
|
|
"margin_dpo/margin_mean": 0.2982656955718994,
|
|
"margin_dpo/margin_std": 0.4928857088088989,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 4.022831916809082,
|
|
"fcm_dpo/delta": -0.23631714284420013,
|
|
"fcm_dpo/margin": 0.263003945350647,
|
|
"fcm_dpo/q_t": 0.35024577379226685,
|
|
"grad_norm": 891.9666748046875,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 0.07056191563606262,
|
|
"logits/rejected": 0.04056151211261749,
|
|
"logps/chosen": -46.45494842529297,
|
|
"logps/ref_chosen": -46.06875228881836,
|
|
"logps/ref_rejected": -66.1181411743164,
|
|
"logps/rejected": -66.76734161376953,
|
|
"loss": 1.1421,
|
|
"margin_dpo/margin_mean": 0.26300370693206787,
|
|
"margin_dpo/margin_std": 0.46238186955451965,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11489040060468632,
|
|
"fcm_dpo/beta": 4.063389301300049,
|
|
"fcm_dpo/delta": 0.1831236034631729,
|
|
"fcm_dpo/margin": 0.16686102747917175,
|
|
"fcm_dpo/q_t": 0.3965134620666504,
|
|
"grad_norm": 1074.15380859375,
|
|
"learning_rate": 4.997762556634679e-07,
|
|
"logits/chosen": 0.065489761531353,
|
|
"logits/rejected": 0.02454444393515587,
|
|
"logps/chosen": -54.44904708862305,
|
|
"logps/ref_chosen": -54.06275177001953,
|
|
"logps/ref_rejected": -74.87464141845703,
|
|
"logps/rejected": -75.42780303955078,
|
|
"loss": 1.5124,
|
|
"margin_dpo/margin_mean": 0.16686102747917175,
|
|
"margin_dpo/margin_std": 0.4800097346305847,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1164021164021164,
|
|
"fcm_dpo/beta": 4.121022701263428,
|
|
"fcm_dpo/delta": 0.1284758448600769,
|
|
"fcm_dpo/margin": 0.17436596751213074,
|
|
"fcm_dpo/q_t": 0.38447582721710205,
|
|
"grad_norm": 1231.5897216796875,
|
|
"learning_rate": 4.99716834795752e-07,
|
|
"logits/chosen": 0.0951685756444931,
|
|
"logits/rejected": 0.05633886903524399,
|
|
"logps/chosen": -53.52678680419922,
|
|
"logps/ref_chosen": -53.07609176635742,
|
|
"logps/ref_rejected": -74.45601654052734,
|
|
"logps/rejected": -75.0810775756836,
|
|
"loss": 1.4287,
|
|
"margin_dpo/margin_mean": 0.17436623573303223,
|
|
"margin_dpo/margin_std": 0.42879635095596313,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.11791383219954649,
|
|
"fcm_dpo/beta": 4.575469970703125,
|
|
"fcm_dpo/delta": 0.39146047830581665,
|
|
"fcm_dpo/margin": 0.1026681512594223,
|
|
"fcm_dpo/q_t": 0.40832120180130005,
|
|
"grad_norm": 1497.64501953125,
|
|
"learning_rate": 4.996504288113623e-07,
|
|
"logits/chosen": 0.06529897451400757,
|
|
"logits/rejected": 0.04548865556716919,
|
|
"logps/chosen": -68.17215728759766,
|
|
"logps/ref_chosen": -67.72541809082031,
|
|
"logps/ref_rejected": -79.03926849365234,
|
|
"logps/rejected": -79.58867645263672,
|
|
"loss": 1.7305,
|
|
"margin_dpo/margin_mean": 0.10266757011413574,
|
|
"margin_dpo/margin_std": 0.4406697750091553,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11942554799697656,
|
|
"fcm_dpo/beta": 4.646597385406494,
|
|
"fcm_dpo/delta": -0.01867286115884781,
|
|
"fcm_dpo/margin": 0.1862274706363678,
|
|
"fcm_dpo/q_t": 0.36434149742126465,
|
|
"grad_norm": 1128.8731689453125,
|
|
"learning_rate": 4.995770395678171e-07,
|
|
"logits/chosen": 0.10144677013158798,
|
|
"logits/rejected": 0.04583786800503731,
|
|
"logps/chosen": -52.6339111328125,
|
|
"logps/ref_chosen": -52.16064453125,
|
|
"logps/ref_rejected": -83.31062316894531,
|
|
"logps/rejected": -83.97010803222656,
|
|
"loss": 1.2794,
|
|
"margin_dpo/margin_mean": 0.1862274706363678,
|
|
"margin_dpo/margin_std": 0.41245660185813904,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 4.5290632247924805,
|
|
"fcm_dpo/delta": -0.1917770802974701,
|
|
"fcm_dpo/margin": 0.2254573255777359,
|
|
"fcm_dpo/q_t": 0.3562784790992737,
|
|
"grad_norm": 1285.3790283203125,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 0.12082693725824356,
|
|
"logits/rejected": 0.06224146857857704,
|
|
"logps/chosen": -61.87617492675781,
|
|
"logps/ref_chosen": -61.410560607910156,
|
|
"logps/ref_rejected": -78.66004943847656,
|
|
"logps/rejected": -79.35111999511719,
|
|
"loss": 1.3237,
|
|
"margin_dpo/margin_mean": 0.22545722126960754,
|
|
"margin_dpo/margin_std": 0.46818071603775024,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12244897959183673,
|
|
"fcm_dpo/beta": 4.244363307952881,
|
|
"fcm_dpo/delta": -0.19199597835540771,
|
|
"fcm_dpo/margin": 0.2396332323551178,
|
|
"fcm_dpo/q_t": 0.35482287406921387,
|
|
"grad_norm": 1211.70751953125,
|
|
"learning_rate": 4.994093197099587e-07,
|
|
"logits/chosen": 0.0907532274723053,
|
|
"logits/rejected": 0.05751120299100876,
|
|
"logps/chosen": -64.23452758789062,
|
|
"logps/ref_chosen": -63.80437088012695,
|
|
"logps/ref_rejected": -79.3484115600586,
|
|
"logps/rejected": -80.01820373535156,
|
|
"loss": 1.3572,
|
|
"margin_dpo/margin_mean": 0.23963311314582825,
|
|
"margin_dpo/margin_std": 0.5050238966941833,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12396069538926682,
|
|
"fcm_dpo/beta": 4.048993110656738,
|
|
"fcm_dpo/delta": -0.4337691068649292,
|
|
"fcm_dpo/margin": 0.30429962277412415,
|
|
"fcm_dpo/q_t": 0.3015890121459961,
|
|
"grad_norm": 1029.6053466796875,
|
|
"learning_rate": 4.993149937871306e-07,
|
|
"logits/chosen": 0.05798634514212608,
|
|
"logits/rejected": -0.002718113362789154,
|
|
"logps/chosen": -49.222694396972656,
|
|
"logps/ref_chosen": -48.817893981933594,
|
|
"logps/ref_rejected": -70.31497955322266,
|
|
"logps/rejected": -71.02407836914062,
|
|
"loss": 1.0236,
|
|
"margin_dpo/margin_mean": 0.3042997121810913,
|
|
"margin_dpo/margin_std": 0.42897915840148926,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.1254724111866969,
|
|
"fcm_dpo/beta": 3.6137092113494873,
|
|
"fcm_dpo/delta": -0.46512115001678467,
|
|
"fcm_dpo/margin": 0.34642505645751953,
|
|
"fcm_dpo/q_t": 0.2966233491897583,
|
|
"grad_norm": 804.3886108398438,
|
|
"learning_rate": 4.992136939879856e-07,
|
|
"logits/chosen": 0.14557309448719025,
|
|
"logits/rejected": 0.09665486216545105,
|
|
"logps/chosen": -57.57659149169922,
|
|
"logps/ref_chosen": -57.15077209472656,
|
|
"logps/ref_rejected": -75.1710205078125,
|
|
"logps/rejected": -75.9432601928711,
|
|
"loss": 1.0428,
|
|
"margin_dpo/margin_mean": 0.34642475843429565,
|
|
"margin_dpo/margin_std": 0.505172610282898,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"fcm_dpo/beta": 3.7466650009155273,
|
|
"fcm_dpo/delta": 0.3175293207168579,
|
|
"fcm_dpo/margin": 0.14622744917869568,
|
|
"fcm_dpo/q_t": 0.4235062599182129,
|
|
"grad_norm": 1326.18408203125,
|
|
"learning_rate": 4.991054231460969e-07,
|
|
"logits/chosen": 0.11884990334510803,
|
|
"logits/rejected": 0.078602634370327,
|
|
"logps/chosen": -65.2996826171875,
|
|
"logps/ref_chosen": -64.77729797363281,
|
|
"logps/ref_rejected": -84.71949768066406,
|
|
"logps/rejected": -85.38810729980469,
|
|
"loss": 1.6192,
|
|
"margin_dpo/margin_mean": 0.14622774720191956,
|
|
"margin_dpo/margin_std": 0.5217863321304321,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 3.6050148010253906,
|
|
"fcm_dpo/delta": -0.3528403639793396,
|
|
"fcm_dpo/margin": 0.32242467999458313,
|
|
"fcm_dpo/q_t": 0.3231235146522522,
|
|
"grad_norm": 1045.094482421875,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 0.09119876474142075,
|
|
"logits/rejected": 0.04982073977589607,
|
|
"logps/chosen": -50.68433380126953,
|
|
"logps/ref_chosen": -50.25169372558594,
|
|
"logps/ref_rejected": -66.55439758300781,
|
|
"logps/rejected": -67.30945587158203,
|
|
"loss": 1.1877,
|
|
"margin_dpo/margin_mean": 0.3224252462387085,
|
|
"margin_dpo/margin_std": 0.5532187223434448,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.13000755857898716,
|
|
"fcm_dpo/beta": 3.499697685241699,
|
|
"fcm_dpo/delta": -0.12498529255390167,
|
|
"fcm_dpo/margin": 0.27497416734695435,
|
|
"fcm_dpo/q_t": 0.366787314414978,
|
|
"grad_norm": 1007.2182006835938,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 0.15556927025318146,
|
|
"logits/rejected": 0.13702501356601715,
|
|
"logps/chosen": -61.228721618652344,
|
|
"logps/ref_chosen": -60.72917938232422,
|
|
"logps/ref_rejected": -72.30961608886719,
|
|
"logps/rejected": -73.0841293334961,
|
|
"loss": 1.2257,
|
|
"margin_dpo/margin_mean": 0.2749743163585663,
|
|
"margin_dpo/margin_std": 0.5419769883155823,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.13151927437641722,
|
|
"fcm_dpo/beta": 3.2829439640045166,
|
|
"fcm_dpo/delta": -0.21701934933662415,
|
|
"fcm_dpo/margin": 0.31577974557876587,
|
|
"fcm_dpo/q_t": 0.33885273337364197,
|
|
"grad_norm": 955.0740966796875,
|
|
"learning_rate": 4.987388156241114e-07,
|
|
"logits/chosen": 0.0836351215839386,
|
|
"logits/rejected": 0.030040550976991653,
|
|
"logps/chosen": -66.27688598632812,
|
|
"logps/ref_chosen": -65.75796508789062,
|
|
"logps/ref_rejected": -84.81159973144531,
|
|
"logps/rejected": -85.64628601074219,
|
|
"loss": 1.2115,
|
|
"margin_dpo/margin_mean": 0.31578001379966736,
|
|
"margin_dpo/margin_std": 0.5747581720352173,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.1330309901738473,
|
|
"fcm_dpo/beta": 3.314173460006714,
|
|
"fcm_dpo/delta": 0.09749428927898407,
|
|
"fcm_dpo/margin": 0.2290271520614624,
|
|
"fcm_dpo/q_t": 0.38487881422042847,
|
|
"grad_norm": 1090.8194580078125,
|
|
"learning_rate": 4.986026928455767e-07,
|
|
"logits/chosen": 0.14258967339992523,
|
|
"logits/rejected": 0.11725766956806183,
|
|
"logps/chosen": -63.33268356323242,
|
|
"logps/ref_chosen": -62.82402801513672,
|
|
"logps/ref_rejected": -74.9607162475586,
|
|
"logps/rejected": -75.69840240478516,
|
|
"loss": 1.496,
|
|
"margin_dpo/margin_mean": 0.22902727127075195,
|
|
"margin_dpo/margin_std": 0.6007837653160095,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.1345427059712774,
|
|
"fcm_dpo/beta": 3.4935643672943115,
|
|
"fcm_dpo/delta": 0.11871908605098724,
|
|
"fcm_dpo/margin": 0.20880961418151855,
|
|
"fcm_dpo/q_t": 0.3681311309337616,
|
|
"grad_norm": 984.82470703125,
|
|
"learning_rate": 4.984596161153135e-07,
|
|
"logits/chosen": 0.1835154891014099,
|
|
"logits/rejected": 0.10450133681297302,
|
|
"logps/chosen": -41.65589141845703,
|
|
"logps/ref_chosen": -41.191436767578125,
|
|
"logps/ref_rejected": -85.44769287109375,
|
|
"logps/rejected": -86.12095642089844,
|
|
"loss": 1.611,
|
|
"margin_dpo/margin_mean": 0.2088102400302887,
|
|
"margin_dpo/margin_std": 0.6353697776794434,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 3.4933032989501953,
|
|
"fcm_dpo/delta": 0.05548207834362984,
|
|
"fcm_dpo/margin": 0.22840037941932678,
|
|
"fcm_dpo/q_t": 0.37616848945617676,
|
|
"grad_norm": 1013.5514526367188,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 0.08436588197946548,
|
|
"logits/rejected": 0.032395198941230774,
|
|
"logps/chosen": -57.09331512451172,
|
|
"logps/ref_chosen": -56.58390808105469,
|
|
"logps/ref_rejected": -86.86978149414062,
|
|
"logps/rejected": -87.60758972167969,
|
|
"loss": 1.5349,
|
|
"margin_dpo/margin_mean": 0.22840029001235962,
|
|
"margin_dpo/margin_std": 0.6292995810508728,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13756613756613756,
|
|
"fcm_dpo/beta": 3.3264732360839844,
|
|
"fcm_dpo/delta": -0.30509454011917114,
|
|
"fcm_dpo/margin": 0.3368951082229614,
|
|
"fcm_dpo/q_t": 0.33719223737716675,
|
|
"grad_norm": 858.5227661132812,
|
|
"learning_rate": 4.98152617002662e-07,
|
|
"logits/chosen": 0.09481631219387054,
|
|
"logits/rejected": 0.05292369797825813,
|
|
"logps/chosen": -52.90049362182617,
|
|
"logps/ref_chosen": -52.38234329223633,
|
|
"logps/ref_rejected": -72.17642211914062,
|
|
"logps/rejected": -73.0314712524414,
|
|
"loss": 1.3205,
|
|
"margin_dpo/margin_mean": 0.33689484000205994,
|
|
"margin_dpo/margin_std": 0.6805263757705688,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13907785336356765,
|
|
"fcm_dpo/beta": 3.2963027954101562,
|
|
"fcm_dpo/delta": -0.1682032346725464,
|
|
"fcm_dpo/margin": 0.2999283969402313,
|
|
"fcm_dpo/q_t": 0.3624107241630554,
|
|
"grad_norm": 807.3601684570312,
|
|
"learning_rate": 4.979887032076988e-07,
|
|
"logits/chosen": 0.13207153975963593,
|
|
"logits/rejected": 0.09528068453073502,
|
|
"logps/chosen": -53.570884704589844,
|
|
"logps/ref_chosen": -53.00870132446289,
|
|
"logps/ref_rejected": -79.77812957763672,
|
|
"logps/rejected": -80.64024353027344,
|
|
"loss": 1.2978,
|
|
"margin_dpo/margin_mean": 0.2999285161495209,
|
|
"margin_dpo/margin_std": 0.5701849460601807,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.14058956916099774,
|
|
"fcm_dpo/beta": 3.103659152984619,
|
|
"fcm_dpo/delta": -0.10753681510686874,
|
|
"fcm_dpo/margin": 0.3049851357936859,
|
|
"fcm_dpo/q_t": 0.3586532771587372,
|
|
"grad_norm": 682.6175537109375,
|
|
"learning_rate": 4.978178526356172e-07,
|
|
"logits/chosen": 0.08406171947717667,
|
|
"logits/rejected": 0.058728571981191635,
|
|
"logps/chosen": -45.46405792236328,
|
|
"logps/ref_chosen": -44.90705108642578,
|
|
"logps/ref_rejected": -58.7879524230957,
|
|
"logps/rejected": -59.649932861328125,
|
|
"loss": 1.3186,
|
|
"margin_dpo/margin_mean": 0.3049851953983307,
|
|
"margin_dpo/margin_std": 0.6701629161834717,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.1421012849584278,
|
|
"fcm_dpo/beta": 2.9392552375793457,
|
|
"fcm_dpo/delta": -0.2067282795906067,
|
|
"fcm_dpo/margin": 0.34755954146385193,
|
|
"fcm_dpo/q_t": 0.33474797010421753,
|
|
"grad_norm": 712.4808349609375,
|
|
"learning_rate": 4.976400700654751e-07,
|
|
"logits/chosen": 0.14686943590641022,
|
|
"logits/rejected": 0.11152657866477966,
|
|
"logps/chosen": -60.35730743408203,
|
|
"logps/ref_chosen": -59.93777084350586,
|
|
"logps/ref_rejected": -79.3138427734375,
|
|
"logps/rejected": -80.0809326171875,
|
|
"loss": 1.2872,
|
|
"margin_dpo/margin_mean": 0.3475595712661743,
|
|
"margin_dpo/margin_std": 0.7137982845306396,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 2.8255391120910645,
|
|
"fcm_dpo/delta": -0.2533861994743347,
|
|
"fcm_dpo/margin": 0.3784918189048767,
|
|
"fcm_dpo/q_t": 0.32923561334609985,
|
|
"grad_norm": 712.9932861328125,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 0.08593058586120605,
|
|
"logits/rejected": 0.02429114282131195,
|
|
"logps/chosen": -60.73108673095703,
|
|
"logps/ref_chosen": -60.168487548828125,
|
|
"logps/ref_rejected": -90.73665618896484,
|
|
"logps/rejected": -91.67774963378906,
|
|
"loss": 1.0566,
|
|
"margin_dpo/margin_mean": 0.3784918189048767,
|
|
"margin_dpo/margin_std": 0.5973398089408875,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14512471655328799,
|
|
"fcm_dpo/beta": 2.735480546951294,
|
|
"fcm_dpo/delta": -0.29815971851348877,
|
|
"fcm_dpo/margin": 0.40782594680786133,
|
|
"fcm_dpo/q_t": 0.3135584592819214,
|
|
"grad_norm": 572.3253784179688,
|
|
"learning_rate": 4.972637290166157e-07,
|
|
"logits/chosen": 0.10200202465057373,
|
|
"logits/rejected": 0.06153492629528046,
|
|
"logps/chosen": -61.17717361450195,
|
|
"logps/ref_chosen": -60.66877746582031,
|
|
"logps/ref_rejected": -88.30673217773438,
|
|
"logps/rejected": -89.22294616699219,
|
|
"loss": 1.0682,
|
|
"margin_dpo/margin_mean": 0.4078254997730255,
|
|
"margin_dpo/margin_std": 0.6238170862197876,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14663643235071808,
|
|
"fcm_dpo/beta": 2.6156375408172607,
|
|
"fcm_dpo/delta": -0.02265828847885132,
|
|
"fcm_dpo/margin": 0.3311424255371094,
|
|
"fcm_dpo/q_t": 0.373293936252594,
|
|
"grad_norm": 762.564697265625,
|
|
"learning_rate": 4.970651810649666e-07,
|
|
"logits/chosen": 0.049608707427978516,
|
|
"logits/rejected": 0.007526304107159376,
|
|
"logps/chosen": -65.6923828125,
|
|
"logps/ref_chosen": -65.04412078857422,
|
|
"logps/ref_rejected": -78.42092895507812,
|
|
"logps/rejected": -79.40032958984375,
|
|
"loss": 1.1484,
|
|
"margin_dpo/margin_mean": 0.33114248514175415,
|
|
"margin_dpo/margin_std": 0.6242318153381348,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14814814814814814,
|
|
"fcm_dpo/beta": 2.640901565551758,
|
|
"fcm_dpo/delta": 0.013076554983854294,
|
|
"fcm_dpo/margin": 0.3170929551124573,
|
|
"fcm_dpo/q_t": 0.36007198691368103,
|
|
"grad_norm": 630.3800659179688,
|
|
"learning_rate": 4.968597221690985e-07,
|
|
"logits/chosen": 0.1261807680130005,
|
|
"logits/rejected": 0.10040568560361862,
|
|
"logps/chosen": -55.91543197631836,
|
|
"logps/ref_chosen": -55.503231048583984,
|
|
"logps/ref_rejected": -72.81553649902344,
|
|
"logps/rejected": -73.54483032226562,
|
|
"loss": 1.0684,
|
|
"margin_dpo/margin_mean": 0.3170931935310364,
|
|
"margin_dpo/margin_std": 0.5252500772476196,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14965986394557823,
|
|
"fcm_dpo/beta": 2.610173463821411,
|
|
"fcm_dpo/delta": -0.0660782903432846,
|
|
"fcm_dpo/margin": 0.34793078899383545,
|
|
"fcm_dpo/q_t": 0.35216856002807617,
|
|
"grad_norm": 699.6543579101562,
|
|
"learning_rate": 4.966473580761389e-07,
|
|
"logits/chosen": 0.14399348199367523,
|
|
"logits/rejected": 0.10823240876197815,
|
|
"logps/chosen": -59.083717346191406,
|
|
"logps/ref_chosen": -58.57563781738281,
|
|
"logps/ref_rejected": -78.693603515625,
|
|
"logps/rejected": -79.54962158203125,
|
|
"loss": 1.0688,
|
|
"margin_dpo/margin_mean": 0.3479306697845459,
|
|
"margin_dpo/margin_std": 0.5678527355194092,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 2.6637752056121826,
|
|
"fcm_dpo/delta": -0.013185635209083557,
|
|
"fcm_dpo/margin": 0.3216173052787781,
|
|
"fcm_dpo/q_t": 0.37674546241760254,
|
|
"grad_norm": 795.3727416992188,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 0.12704704701900482,
|
|
"logits/rejected": 0.11997953057289124,
|
|
"logps/chosen": -80.09172058105469,
|
|
"logps/ref_chosen": -79.58343505859375,
|
|
"logps/ref_rejected": -92.152587890625,
|
|
"logps/rejected": -92.98249053955078,
|
|
"loss": 1.3194,
|
|
"margin_dpo/margin_mean": 0.3216173052787781,
|
|
"margin_dpo/margin_std": 0.7164607048034668,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15268329554043839,
|
|
"fcm_dpo/beta": 2.503209114074707,
|
|
"fcm_dpo/delta": -0.42283251881599426,
|
|
"fcm_dpo/margin": 0.48820391297340393,
|
|
"fcm_dpo/q_t": 0.2904645800590515,
|
|
"grad_norm": 486.78692626953125,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": 0.12482018768787384,
|
|
"logits/rejected": 0.08969442546367645,
|
|
"logps/chosen": -52.763427734375,
|
|
"logps/ref_chosen": -52.332786560058594,
|
|
"logps/ref_rejected": -69.55589294433594,
|
|
"logps/rejected": -70.47473907470703,
|
|
"loss": 0.8776,
|
|
"margin_dpo/margin_mean": 0.48820409178733826,
|
|
"margin_dpo/margin_std": 0.5912094116210938,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.15419501133786848,
|
|
"fcm_dpo/beta": 2.454827308654785,
|
|
"fcm_dpo/delta": 0.0955345630645752,
|
|
"fcm_dpo/margin": 0.3097192943096161,
|
|
"fcm_dpo/q_t": 0.37257254123687744,
|
|
"grad_norm": 702.9950561523438,
|
|
"learning_rate": 4.959688949822748e-07,
|
|
"logits/chosen": 0.03808064013719559,
|
|
"logits/rejected": 0.0005772793665528297,
|
|
"logps/chosen": -65.2833251953125,
|
|
"logps/ref_chosen": -64.74348449707031,
|
|
"logps/ref_rejected": -69.06132507324219,
|
|
"logps/rejected": -69.910888671875,
|
|
"loss": 1.2787,
|
|
"margin_dpo/margin_mean": 0.3097189664840698,
|
|
"margin_dpo/margin_std": 0.6924293041229248,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.15570672713529857,
|
|
"fcm_dpo/beta": 2.502030372619629,
|
|
"fcm_dpo/delta": 0.10205619037151337,
|
|
"fcm_dpo/margin": 0.3015629053115845,
|
|
"fcm_dpo/q_t": 0.3854670822620392,
|
|
"grad_norm": 735.919677734375,
|
|
"learning_rate": 4.957289714327572e-07,
|
|
"logits/chosen": 0.14897724986076355,
|
|
"logits/rejected": 0.11798413842916489,
|
|
"logps/chosen": -64.3989028930664,
|
|
"logps/ref_chosen": -63.83664321899414,
|
|
"logps/ref_rejected": -79.32362365722656,
|
|
"logps/rejected": -80.18745422363281,
|
|
"loss": 1.2513,
|
|
"margin_dpo/margin_mean": 0.30156272649765015,
|
|
"margin_dpo/margin_std": 0.6556486487388611,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.15721844293272866,
|
|
"fcm_dpo/beta": 2.617619514465332,
|
|
"fcm_dpo/delta": 0.2569226324558258,
|
|
"fcm_dpo/margin": 0.2314532995223999,
|
|
"fcm_dpo/q_t": 0.4117388129234314,
|
|
"grad_norm": 881.36279296875,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 0.12788525223731995,
|
|
"logits/rejected": 0.04946213215589523,
|
|
"logps/chosen": -61.58454895019531,
|
|
"logps/ref_chosen": -60.99920654296875,
|
|
"logps/ref_rejected": -98.84645080566406,
|
|
"logps/rejected": -99.66325378417969,
|
|
"loss": 1.3872,
|
|
"margin_dpo/margin_mean": 0.23145365715026855,
|
|
"margin_dpo/margin_std": 0.6401793956756592,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 2.609062671661377,
|
|
"fcm_dpo/delta": -0.13659755885601044,
|
|
"fcm_dpo/margin": 0.37271663546562195,
|
|
"fcm_dpo/q_t": 0.3249310255050659,
|
|
"grad_norm": 777.34033203125,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 0.08643177151679993,
|
|
"logits/rejected": 0.03597265109419823,
|
|
"logps/chosen": -71.42201232910156,
|
|
"logps/ref_chosen": -70.95027160644531,
|
|
"logps/ref_rejected": -87.88340759277344,
|
|
"logps/rejected": -88.72787475585938,
|
|
"loss": 1.1343,
|
|
"margin_dpo/margin_mean": 0.3727165460586548,
|
|
"margin_dpo/margin_std": 0.6506966352462769,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.1602418745275888,
|
|
"fcm_dpo/beta": 2.6030874252319336,
|
|
"fcm_dpo/delta": 0.14328259229660034,
|
|
"fcm_dpo/margin": 0.2754959166049957,
|
|
"fcm_dpo/q_t": 0.37446996569633484,
|
|
"grad_norm": 718.373779296875,
|
|
"learning_rate": 4.949679871846857e-07,
|
|
"logits/chosen": 0.10392872244119644,
|
|
"logits/rejected": 0.0915747880935669,
|
|
"logps/chosen": -62.947120666503906,
|
|
"logps/ref_chosen": -62.45933151245117,
|
|
"logps/ref_rejected": -67.00595092773438,
|
|
"logps/rejected": -67.76923370361328,
|
|
"loss": 1.2383,
|
|
"margin_dpo/margin_mean": 0.27549615502357483,
|
|
"margin_dpo/margin_std": 0.5983477234840393,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.1617535903250189,
|
|
"fcm_dpo/beta": 2.7499284744262695,
|
|
"fcm_dpo/delta": 0.3244553506374359,
|
|
"fcm_dpo/margin": 0.19795957207679749,
|
|
"fcm_dpo/q_t": 0.42030882835388184,
|
|
"grad_norm": 988.1804809570312,
|
|
"learning_rate": 4.947006115536947e-07,
|
|
"logits/chosen": 0.043431270867586136,
|
|
"logits/rejected": 0.025126943364739418,
|
|
"logps/chosen": -76.38347625732422,
|
|
"logps/ref_chosen": -75.83796691894531,
|
|
"logps/ref_rejected": -87.74038696289062,
|
|
"logps/rejected": -88.48384094238281,
|
|
"loss": 1.5542,
|
|
"margin_dpo/margin_mean": 0.19795984029769897,
|
|
"margin_dpo/margin_std": 0.7145728468894958,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.16326530612244897,
|
|
"fcm_dpo/beta": 2.7320618629455566,
|
|
"fcm_dpo/delta": -0.21486234664916992,
|
|
"fcm_dpo/margin": 0.3816843628883362,
|
|
"fcm_dpo/q_t": 0.35164210200309753,
|
|
"grad_norm": 659.1549682617188,
|
|
"learning_rate": 4.944263911205772e-07,
|
|
"logits/chosen": 0.0848315954208374,
|
|
"logits/rejected": 0.05806386470794678,
|
|
"logps/chosen": -68.84207153320312,
|
|
"logps/ref_chosen": -68.39323425292969,
|
|
"logps/ref_rejected": -83.24267578125,
|
|
"logps/rejected": -84.07319641113281,
|
|
"loss": 1.1571,
|
|
"margin_dpo/margin_mean": 0.38168424367904663,
|
|
"margin_dpo/margin_std": 0.6761659383773804,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16477702191987906,
|
|
"fcm_dpo/beta": 2.7161760330200195,
|
|
"fcm_dpo/delta": 0.05944516137242317,
|
|
"fcm_dpo/margin": 0.29280829429626465,
|
|
"fcm_dpo/q_t": 0.375564306974411,
|
|
"grad_norm": 748.4414672851562,
|
|
"learning_rate": 4.941453335558681e-07,
|
|
"logits/chosen": 0.11102043837308884,
|
|
"logits/rejected": 0.06125715374946594,
|
|
"logps/chosen": -56.02159881591797,
|
|
"logps/ref_chosen": -55.52748107910156,
|
|
"logps/ref_rejected": -83.55218505859375,
|
|
"logps/rejected": -84.339111328125,
|
|
"loss": 1.2473,
|
|
"margin_dpo/margin_mean": 0.292807400226593,
|
|
"margin_dpo/margin_std": 0.6247273683547974,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 2.8052263259887695,
|
|
"fcm_dpo/delta": 0.17110225558280945,
|
|
"fcm_dpo/margin": 0.24601304531097412,
|
|
"fcm_dpo/q_t": 0.39738592505455017,
|
|
"grad_norm": 919.9741821289062,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 0.04998182877898216,
|
|
"logits/rejected": 0.056924428790807724,
|
|
"logps/chosen": -81.71244049072266,
|
|
"logps/ref_chosen": -81.15874481201172,
|
|
"logps/ref_rejected": -72.56021118164062,
|
|
"logps/rejected": -73.35992431640625,
|
|
"loss": 1.426,
|
|
"margin_dpo/margin_mean": 0.24601292610168457,
|
|
"margin_dpo/margin_std": 0.6903856992721558,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16780045351473924,
|
|
"fcm_dpo/beta": 2.9170947074890137,
|
|
"fcm_dpo/delta": 0.15463948249816895,
|
|
"fcm_dpo/margin": 0.2409285306930542,
|
|
"fcm_dpo/q_t": 0.37675726413726807,
|
|
"grad_norm": 812.91015625,
|
|
"learning_rate": 4.935627386698418e-07,
|
|
"logits/chosen": 0.15344518423080444,
|
|
"logits/rejected": 0.12011007964611053,
|
|
"logps/chosen": -53.007442474365234,
|
|
"logps/ref_chosen": -52.358985900878906,
|
|
"logps/ref_rejected": -77.06150817871094,
|
|
"logps/rejected": -77.95088195800781,
|
|
"loss": 1.359,
|
|
"margin_dpo/margin_mean": 0.2409285306930542,
|
|
"margin_dpo/margin_std": 0.5762333869934082,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1693121693121693,
|
|
"fcm_dpo/beta": 2.845439910888672,
|
|
"fcm_dpo/delta": -0.06481163203716278,
|
|
"fcm_dpo/margin": 0.31798386573791504,
|
|
"fcm_dpo/q_t": 0.36621609330177307,
|
|
"grad_norm": 860.8700561523438,
|
|
"learning_rate": 4.932612176449559e-07,
|
|
"logits/chosen": 0.061526067554950714,
|
|
"logits/rejected": 0.006518724840134382,
|
|
"logps/chosen": -63.515830993652344,
|
|
"logps/ref_chosen": -63.02006530761719,
|
|
"logps/ref_rejected": -111.36941528320312,
|
|
"logps/rejected": -112.18316650390625,
|
|
"loss": 1.3607,
|
|
"margin_dpo/margin_mean": 0.3179827332496643,
|
|
"margin_dpo/margin_std": 0.6988146305084229,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.1708238851095994,
|
|
"fcm_dpo/beta": 2.9742894172668457,
|
|
"fcm_dpo/delta": 0.08943277597427368,
|
|
"fcm_dpo/margin": 0.25452733039855957,
|
|
"fcm_dpo/q_t": 0.38922226428985596,
|
|
"grad_norm": 920.0910034179688,
|
|
"learning_rate": 4.929528920808854e-07,
|
|
"logits/chosen": 0.0860273689031601,
|
|
"logits/rejected": 0.05197536200284958,
|
|
"logps/chosen": -56.43023681640625,
|
|
"logps/ref_chosen": -55.80766296386719,
|
|
"logps/ref_rejected": -69.84014129638672,
|
|
"logps/rejected": -70.71724700927734,
|
|
"loss": 1.4264,
|
|
"margin_dpo/margin_mean": 0.2545267343521118,
|
|
"margin_dpo/margin_std": 0.635810911655426,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.17233560090702948,
|
|
"fcm_dpo/beta": 2.616295337677002,
|
|
"fcm_dpo/delta": -0.5849612951278687,
|
|
"fcm_dpo/margin": 0.5088604688644409,
|
|
"fcm_dpo/q_t": 0.2969014048576355,
|
|
"grad_norm": 562.796630859375,
|
|
"learning_rate": 4.92637770602159e-07,
|
|
"logits/chosen": 0.1057385504245758,
|
|
"logits/rejected": 0.050528474152088165,
|
|
"logps/chosen": -66.74571228027344,
|
|
"logps/ref_chosen": -66.33277130126953,
|
|
"logps/ref_rejected": -71.61489868164062,
|
|
"logps/rejected": -72.53669738769531,
|
|
"loss": 0.9587,
|
|
"margin_dpo/margin_mean": 0.5088605880737305,
|
|
"margin_dpo/margin_std": 0.6802812814712524,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 2.7045106887817383,
|
|
"fcm_dpo/delta": 0.11715377867221832,
|
|
"fcm_dpo/margin": 0.27014443278312683,
|
|
"fcm_dpo/q_t": 0.3813377618789673,
|
|
"grad_norm": 830.4659423828125,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 0.11370372772216797,
|
|
"logits/rejected": 0.059039607644081116,
|
|
"logps/chosen": -56.367103576660156,
|
|
"logps/ref_chosen": -55.74903869628906,
|
|
"logps/ref_rejected": -79.59849548339844,
|
|
"logps/rejected": -80.48670959472656,
|
|
"loss": 1.2694,
|
|
"margin_dpo/margin_mean": 0.27014434337615967,
|
|
"margin_dpo/margin_std": 0.6018053889274597,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17535903250188964,
|
|
"fcm_dpo/beta": 2.6384530067443848,
|
|
"fcm_dpo/delta": -0.15713399648666382,
|
|
"fcm_dpo/margin": 0.37526413798332214,
|
|
"fcm_dpo/q_t": 0.3505271077156067,
|
|
"grad_norm": 637.3699340820312,
|
|
"learning_rate": 4.91987175349089e-07,
|
|
"logits/chosen": 0.09473671019077301,
|
|
"logits/rejected": 0.03647337108850479,
|
|
"logps/chosen": -49.91804504394531,
|
|
"logps/ref_chosen": -49.36516571044922,
|
|
"logps/ref_rejected": -72.84671020507812,
|
|
"logps/rejected": -73.77484893798828,
|
|
"loss": 1.1559,
|
|
"margin_dpo/margin_mean": 0.37526440620422363,
|
|
"margin_dpo/margin_std": 0.665389895439148,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17687074829931973,
|
|
"fcm_dpo/beta": 2.590177297592163,
|
|
"fcm_dpo/delta": 0.11993909627199173,
|
|
"fcm_dpo/margin": 0.2842903733253479,
|
|
"fcm_dpo/q_t": 0.37484538555145264,
|
|
"grad_norm": 673.7462768554688,
|
|
"learning_rate": 4.916517197732933e-07,
|
|
"logits/chosen": 0.11810576915740967,
|
|
"logits/rejected": 0.08560114353895187,
|
|
"logps/chosen": -58.2153205871582,
|
|
"logps/ref_chosen": -57.710899353027344,
|
|
"logps/ref_rejected": -69.77253723144531,
|
|
"logps/rejected": -70.5612564086914,
|
|
"loss": 1.2845,
|
|
"margin_dpo/margin_mean": 0.28429079055786133,
|
|
"margin_dpo/margin_std": 0.6136384010314941,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17838246409674982,
|
|
"fcm_dpo/beta": 2.5296597480773926,
|
|
"fcm_dpo/delta": -0.21083010733127594,
|
|
"fcm_dpo/margin": 0.40842828154563904,
|
|
"fcm_dpo/q_t": 0.32514363527297974,
|
|
"grad_norm": 606.0830078125,
|
|
"learning_rate": 4.913095046794281e-07,
|
|
"logits/chosen": 0.16361942887306213,
|
|
"logits/rejected": 0.1275358349084854,
|
|
"logps/chosen": -52.91107940673828,
|
|
"logps/ref_chosen": -52.479896545410156,
|
|
"logps/ref_rejected": -81.359130859375,
|
|
"logps/rejected": -82.19873809814453,
|
|
"loss": 0.9701,
|
|
"margin_dpo/margin_mean": 0.4084276854991913,
|
|
"margin_dpo/margin_std": 0.54984050989151,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17989417989417988,
|
|
"fcm_dpo/beta": 2.512711763381958,
|
|
"fcm_dpo/delta": -0.045835524797439575,
|
|
"fcm_dpo/margin": 0.354824960231781,
|
|
"fcm_dpo/q_t": 0.36521679162979126,
|
|
"grad_norm": 660.1326904296875,
|
|
"learning_rate": 4.909605396399855e-07,
|
|
"logits/chosen": 0.09545660018920898,
|
|
"logits/rejected": 0.06180203706026077,
|
|
"logps/chosen": -61.986427307128906,
|
|
"logps/ref_chosen": -61.35767364501953,
|
|
"logps/ref_rejected": -75.71510314941406,
|
|
"logps/rejected": -76.69867706298828,
|
|
"loss": 1.2155,
|
|
"margin_dpo/margin_mean": 0.35482484102249146,
|
|
"margin_dpo/margin_std": 0.6957262754440308,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 2.400790214538574,
|
|
"fcm_dpo/delta": -0.25915199518203735,
|
|
"fcm_dpo/margin": 0.4492540657520294,
|
|
"fcm_dpo/q_t": 0.3342965245246887,
|
|
"grad_norm": 572.9517822265625,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 0.08783574402332306,
|
|
"logits/rejected": 0.035401877015829086,
|
|
"logps/chosen": -60.40111541748047,
|
|
"logps/ref_chosen": -59.907569885253906,
|
|
"logps/ref_rejected": -79.6910629272461,
|
|
"logps/rejected": -80.63386535644531,
|
|
"loss": 1.018,
|
|
"margin_dpo/margin_mean": 0.44925418496131897,
|
|
"margin_dpo/margin_std": 0.7025552988052368,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.18291761148904007,
|
|
"fcm_dpo/beta": 2.3966712951660156,
|
|
"fcm_dpo/delta": 0.10127197206020355,
|
|
"fcm_dpo/margin": 0.31514492630958557,
|
|
"fcm_dpo/q_t": 0.3769919276237488,
|
|
"grad_norm": 587.5,
|
|
"learning_rate": 4.902423989581143e-07,
|
|
"logits/chosen": 0.16948141157627106,
|
|
"logits/rejected": 0.09744793176651001,
|
|
"logps/chosen": -56.259464263916016,
|
|
"logps/ref_chosen": -55.66604232788086,
|
|
"logps/ref_rejected": -101.56233978271484,
|
|
"logps/rejected": -102.47091674804688,
|
|
"loss": 1.2044,
|
|
"margin_dpo/margin_mean": 0.3151443600654602,
|
|
"margin_dpo/margin_std": 0.6316248178482056,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.18442932728647016,
|
|
"fcm_dpo/beta": 2.2983148097991943,
|
|
"fcm_dpo/delta": -0.4261060357093811,
|
|
"fcm_dpo/margin": 0.5331162214279175,
|
|
"fcm_dpo/q_t": 0.29028403759002686,
|
|
"grad_norm": 550.7630615234375,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 0.11293643712997437,
|
|
"logits/rejected": 0.0820358544588089,
|
|
"logps/chosen": -63.88978958129883,
|
|
"logps/ref_chosen": -63.334373474121094,
|
|
"logps/ref_rejected": -73.67523193359375,
|
|
"logps/rejected": -74.7637710571289,
|
|
"loss": 0.9413,
|
|
"margin_dpo/margin_mean": 0.5331156849861145,
|
|
"margin_dpo/margin_std": 0.7090832591056824,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18594104308390022,
|
|
"fcm_dpo/beta": 2.3069674968719482,
|
|
"fcm_dpo/delta": 0.09509618580341339,
|
|
"fcm_dpo/margin": 0.32712411880493164,
|
|
"fcm_dpo/q_t": 0.3730233907699585,
|
|
"grad_norm": 685.9718627929688,
|
|
"learning_rate": 4.894973780788722e-07,
|
|
"logits/chosen": 0.13213904201984406,
|
|
"logits/rejected": 0.094410739839077,
|
|
"logps/chosen": -57.433197021484375,
|
|
"logps/ref_chosen": -56.89874267578125,
|
|
"logps/ref_rejected": -78.97028350830078,
|
|
"logps/rejected": -79.83187103271484,
|
|
"loss": 1.3738,
|
|
"margin_dpo/margin_mean": 0.3271239697933197,
|
|
"margin_dpo/margin_std": 0.788016676902771,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.1874527588813303,
|
|
"fcm_dpo/beta": 2.1601970195770264,
|
|
"fcm_dpo/delta": -0.31936076283454895,
|
|
"fcm_dpo/margin": 0.5234503746032715,
|
|
"fcm_dpo/q_t": 0.3022237718105316,
|
|
"grad_norm": 433.8125,
|
|
"learning_rate": 4.89114813497619e-07,
|
|
"logits/chosen": 0.1536797285079956,
|
|
"logits/rejected": 0.10203144699335098,
|
|
"logps/chosen": -57.66457748413086,
|
|
"logps/ref_chosen": -57.116085052490234,
|
|
"logps/ref_rejected": -87.93074035644531,
|
|
"logps/rejected": -89.002685546875,
|
|
"loss": 0.8572,
|
|
"margin_dpo/margin_mean": 0.5234500169754028,
|
|
"margin_dpo/margin_std": 0.6252888441085815,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 2.037970781326294,
|
|
"fcm_dpo/delta": -0.2241692841053009,
|
|
"fcm_dpo/margin": 0.5122190117835999,
|
|
"fcm_dpo/q_t": 0.32587265968322754,
|
|
"grad_norm": 482.951171875,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 0.1680688112974167,
|
|
"logits/rejected": 0.11711547523736954,
|
|
"logps/chosen": -66.33033752441406,
|
|
"logps/ref_chosen": -65.7061767578125,
|
|
"logps/ref_rejected": -91.72711944580078,
|
|
"logps/rejected": -92.86349487304688,
|
|
"loss": 0.9948,
|
|
"margin_dpo/margin_mean": 0.5122197270393372,
|
|
"margin_dpo/margin_std": 0.7382586002349854,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"fcm_dpo/beta": 1.9791793823242188,
|
|
"fcm_dpo/delta": -0.211552694439888,
|
|
"fcm_dpo/margin": 0.5251641273498535,
|
|
"fcm_dpo/q_t": 0.3407570719718933,
|
|
"grad_norm": 396.5646057128906,
|
|
"learning_rate": 4.883296295573176e-07,
|
|
"logits/chosen": -0.02226438745856285,
|
|
"logits/rejected": -0.028386151418089867,
|
|
"logps/chosen": -68.68487548828125,
|
|
"logps/ref_chosen": -68.17608642578125,
|
|
"logps/ref_rejected": -65.1175537109375,
|
|
"logps/rejected": -66.15150451660156,
|
|
"loss": 1.0228,
|
|
"margin_dpo/margin_mean": 0.5251647233963013,
|
|
"margin_dpo/margin_std": 0.8751634359359741,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.19198790627362056,
|
|
"fcm_dpo/beta": 1.9311566352844238,
|
|
"fcm_dpo/delta": -0.020116418600082397,
|
|
"fcm_dpo/margin": 0.4483014643192291,
|
|
"fcm_dpo/q_t": 0.3441677689552307,
|
|
"grad_norm": 473.1387023925781,
|
|
"learning_rate": 4.87927032161552e-07,
|
|
"logits/chosen": 0.08045360445976257,
|
|
"logits/rejected": 0.05260235071182251,
|
|
"logps/chosen": -62.5809326171875,
|
|
"logps/ref_chosen": -61.88023376464844,
|
|
"logps/ref_rejected": -68.46012878417969,
|
|
"logps/rejected": -69.609130859375,
|
|
"loss": 1.0264,
|
|
"margin_dpo/margin_mean": 0.4483017027378082,
|
|
"margin_dpo/margin_std": 0.6659648418426514,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.19349962207105065,
|
|
"fcm_dpo/beta": 1.9168998003005981,
|
|
"fcm_dpo/delta": -0.033443547785282135,
|
|
"fcm_dpo/margin": 0.45753252506256104,
|
|
"fcm_dpo/q_t": 0.35872209072113037,
|
|
"grad_norm": 473.319091796875,
|
|
"learning_rate": 4.875177794352363e-07,
|
|
"logits/chosen": 0.11974930018186569,
|
|
"logits/rejected": 0.07149255275726318,
|
|
"logps/chosen": -67.38662719726562,
|
|
"logps/ref_chosen": -66.708984375,
|
|
"logps/ref_rejected": -94.97969055175781,
|
|
"logps/rejected": -96.1148681640625,
|
|
"loss": 1.1219,
|
|
"margin_dpo/margin_mean": 0.45753246545791626,
|
|
"margin_dpo/margin_std": 0.8322083950042725,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.19501133786848074,
|
|
"fcm_dpo/beta": 2.0064656734466553,
|
|
"fcm_dpo/delta": 0.22321073710918427,
|
|
"fcm_dpo/margin": 0.31931766867637634,
|
|
"fcm_dpo/q_t": 0.38313671946525574,
|
|
"grad_norm": 575.3306884765625,
|
|
"learning_rate": 4.871018828260491e-07,
|
|
"logits/chosen": 0.1001749187707901,
|
|
"logits/rejected": 0.09252482652664185,
|
|
"logps/chosen": -66.09547424316406,
|
|
"logps/ref_chosen": -65.33882904052734,
|
|
"logps/ref_rejected": -68.06109619140625,
|
|
"logps/rejected": -69.13705444335938,
|
|
"loss": 1.2288,
|
|
"margin_dpo/margin_mean": 0.3193177878856659,
|
|
"margin_dpo/margin_std": 0.6939840316772461,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 2.0064070224761963,
|
|
"fcm_dpo/delta": 0.050023213028907776,
|
|
"fcm_dpo/margin": 0.39817652106285095,
|
|
"fcm_dpo/q_t": 0.3601888418197632,
|
|
"grad_norm": 567.6577758789062,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 0.07646722346544266,
|
|
"logits/rejected": 0.03454095870256424,
|
|
"logps/chosen": -59.36882019042969,
|
|
"logps/ref_chosen": -58.660743713378906,
|
|
"logps/ref_rejected": -79.24510192871094,
|
|
"logps/rejected": -80.35134887695312,
|
|
"loss": 1.0687,
|
|
"margin_dpo/margin_mean": 0.39817655086517334,
|
|
"margin_dpo/margin_std": 0.6305863857269287,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1980347694633409,
|
|
"fcm_dpo/beta": 2.0193214416503906,
|
|
"fcm_dpo/delta": -0.10755333304405212,
|
|
"fcm_dpo/margin": 0.4692103862762451,
|
|
"fcm_dpo/q_t": 0.3443056643009186,
|
|
"grad_norm": 473.69549560546875,
|
|
"learning_rate": 4.86250204678667e-07,
|
|
"logits/chosen": 0.05275092273950577,
|
|
"logits/rejected": 0.004051988013088703,
|
|
"logps/chosen": -53.13858413696289,
|
|
"logps/ref_chosen": -52.51453399658203,
|
|
"logps/ref_rejected": -85.18299865722656,
|
|
"logps/rejected": -86.27625274658203,
|
|
"loss": 1.0233,
|
|
"margin_dpo/margin_mean": 0.4692104160785675,
|
|
"margin_dpo/margin_std": 0.719977855682373,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19954648526077098,
|
|
"fcm_dpo/beta": 2.014409303665161,
|
|
"fcm_dpo/delta": 0.0671209990978241,
|
|
"fcm_dpo/margin": 0.3912177085876465,
|
|
"fcm_dpo/q_t": 0.3556290864944458,
|
|
"grad_norm": 584.9998168945312,
|
|
"learning_rate": 4.858144469637408e-07,
|
|
"logits/chosen": 0.1664508730173111,
|
|
"logits/rejected": 0.1374509632587433,
|
|
"logps/chosen": -66.4674301147461,
|
|
"logps/ref_chosen": -65.68513488769531,
|
|
"logps/ref_rejected": -69.54120635986328,
|
|
"logps/rejected": -70.7147216796875,
|
|
"loss": 1.2363,
|
|
"margin_dpo/margin_mean": 0.3912178874015808,
|
|
"margin_dpo/margin_std": 0.8271607160568237,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.20105820105820105,
|
|
"fcm_dpo/beta": 2.066967248916626,
|
|
"fcm_dpo/delta": 0.10779528319835663,
|
|
"fcm_dpo/margin": 0.36285972595214844,
|
|
"fcm_dpo/q_t": 0.3746221363544464,
|
|
"grad_norm": 585.4243774414062,
|
|
"learning_rate": 4.853720930118138e-07,
|
|
"logits/chosen": 0.08279750496149063,
|
|
"logits/rejected": 0.07346326112747192,
|
|
"logps/chosen": -64.35284423828125,
|
|
"logps/ref_chosen": -63.598114013671875,
|
|
"logps/ref_rejected": -73.72798156738281,
|
|
"logps/rejected": -74.84557342529297,
|
|
"loss": 1.254,
|
|
"margin_dpo/margin_mean": 0.36285945773124695,
|
|
"margin_dpo/margin_std": 0.7753314971923828,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.20256991685563114,
|
|
"fcm_dpo/beta": 1.9597253799438477,
|
|
"fcm_dpo/delta": -0.31700825691223145,
|
|
"fcm_dpo/margin": 0.5747710466384888,
|
|
"fcm_dpo/q_t": 0.301810622215271,
|
|
"grad_norm": 413.8312683105469,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": 0.1624392867088318,
|
|
"logits/rejected": 0.1149115115404129,
|
|
"logps/chosen": -54.434722900390625,
|
|
"logps/ref_chosen": -53.79457092285156,
|
|
"logps/ref_rejected": -74.16741943359375,
|
|
"logps/rejected": -75.38233947753906,
|
|
"loss": 0.8856,
|
|
"margin_dpo/margin_mean": 0.5747714042663574,
|
|
"margin_dpo/margin_std": 0.7277013063430786,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 2.0122458934783936,
|
|
"fcm_dpo/delta": 0.2390393614768982,
|
|
"fcm_dpo/margin": 0.3112886846065521,
|
|
"fcm_dpo/q_t": 0.39257875084877014,
|
|
"grad_norm": 493.87615966796875,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 0.10040457546710968,
|
|
"logits/rejected": 0.07089127600193024,
|
|
"logps/chosen": -50.159515380859375,
|
|
"logps/ref_chosen": -49.441078186035156,
|
|
"logps/ref_rejected": -65.96878051757812,
|
|
"logps/rejected": -66.99850463867188,
|
|
"loss": 1.2206,
|
|
"margin_dpo/margin_mean": 0.31128865480422974,
|
|
"margin_dpo/margin_std": 0.6809731721878052,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.20559334845049132,
|
|
"fcm_dpo/beta": 2.0793657302856445,
|
|
"fcm_dpo/delta": 0.1040540561079979,
|
|
"fcm_dpo/margin": 0.3623303472995758,
|
|
"fcm_dpo/q_t": 0.39911192655563354,
|
|
"grad_norm": 749.82666015625,
|
|
"learning_rate": 4.840055783904106e-07,
|
|
"logits/chosen": 0.1170252338051796,
|
|
"logits/rejected": 0.05652901157736778,
|
|
"logps/chosen": -67.56098937988281,
|
|
"logps/ref_chosen": -66.75926208496094,
|
|
"logps/ref_rejected": -94.61787414550781,
|
|
"logps/rejected": -95.78192901611328,
|
|
"loss": 1.4101,
|
|
"margin_dpo/margin_mean": 0.3623313307762146,
|
|
"margin_dpo/margin_std": 0.962636411190033,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.20710506424792138,
|
|
"fcm_dpo/beta": 2.0693423748016357,
|
|
"fcm_dpo/delta": -0.1056227907538414,
|
|
"fcm_dpo/margin": 0.4567766785621643,
|
|
"fcm_dpo/q_t": 0.35195136070251465,
|
|
"grad_norm": 478.63922119140625,
|
|
"learning_rate": 4.835369650662767e-07,
|
|
"logits/chosen": 0.12457015365362167,
|
|
"logits/rejected": 0.10022950917482376,
|
|
"logps/chosen": -57.523067474365234,
|
|
"logps/ref_chosen": -56.78379821777344,
|
|
"logps/ref_rejected": -69.89952087402344,
|
|
"logps/rejected": -71.09556579589844,
|
|
"loss": 1.1134,
|
|
"margin_dpo/margin_mean": 0.45677661895751953,
|
|
"margin_dpo/margin_std": 0.781032383441925,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.20861678004535147,
|
|
"fcm_dpo/beta": 2.088082790374756,
|
|
"fcm_dpo/delta": 0.1478821486234665,
|
|
"fcm_dpo/margin": 0.34126073122024536,
|
|
"fcm_dpo/q_t": 0.3721213936805725,
|
|
"grad_norm": 539.020751953125,
|
|
"learning_rate": 4.830618192112065e-07,
|
|
"logits/chosen": 0.10970278084278107,
|
|
"logits/rejected": 0.07803289592266083,
|
|
"logps/chosen": -59.64773178100586,
|
|
"logps/ref_chosen": -58.766014099121094,
|
|
"logps/ref_rejected": -68.12371826171875,
|
|
"logps/rejected": -69.3467025756836,
|
|
"loss": 1.2453,
|
|
"margin_dpo/margin_mean": 0.3412603735923767,
|
|
"margin_dpo/margin_std": 0.7408077716827393,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.21012849584278157,
|
|
"fcm_dpo/beta": 2.0522103309631348,
|
|
"fcm_dpo/delta": -0.23259088397026062,
|
|
"fcm_dpo/margin": 0.5156592130661011,
|
|
"fcm_dpo/q_t": 0.33579808473587036,
|
|
"grad_norm": 497.1232604980469,
|
|
"learning_rate": 4.825801541160509e-07,
|
|
"logits/chosen": 0.10866852849721909,
|
|
"logits/rejected": 0.08270702511072159,
|
|
"logps/chosen": -72.01712036132812,
|
|
"logps/ref_chosen": -71.2255859375,
|
|
"logps/ref_rejected": -82.1834716796875,
|
|
"logps/rejected": -83.49066162109375,
|
|
"loss": 1.0294,
|
|
"margin_dpo/margin_mean": 0.515658974647522,
|
|
"margin_dpo/margin_std": 0.769908607006073,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 1.8860867023468018,
|
|
"fcm_dpo/delta": -0.42799749970436096,
|
|
"fcm_dpo/margin": 0.6498202085494995,
|
|
"fcm_dpo/q_t": 0.2918680012226105,
|
|
"grad_norm": 530.7315063476562,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 0.08455317467451096,
|
|
"logits/rejected": 0.04641052335500717,
|
|
"logps/chosen": -64.02261352539062,
|
|
"logps/ref_chosen": -63.27766418457031,
|
|
"logps/ref_rejected": -83.30647277832031,
|
|
"logps/rejected": -84.70124816894531,
|
|
"loss": 1.0826,
|
|
"margin_dpo/margin_mean": 0.649819552898407,
|
|
"margin_dpo/margin_std": 0.9729900360107422,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21315192743764172,
|
|
"fcm_dpo/beta": 1.8111655712127686,
|
|
"fcm_dpo/delta": -0.05867675691843033,
|
|
"fcm_dpo/margin": 0.4977453052997589,
|
|
"fcm_dpo/q_t": 0.36327552795410156,
|
|
"grad_norm": 493.1156005859375,
|
|
"learning_rate": 4.815973202802966e-07,
|
|
"logits/chosen": 0.12604521214962006,
|
|
"logits/rejected": 0.08942769467830658,
|
|
"logps/chosen": -62.58486557006836,
|
|
"logps/ref_chosen": -61.76676940917969,
|
|
"logps/ref_rejected": -88.60601806640625,
|
|
"logps/rejected": -89.92186737060547,
|
|
"loss": 1.1357,
|
|
"margin_dpo/margin_mean": 0.4977456331253052,
|
|
"margin_dpo/margin_std": 0.8883162140846252,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2146636432350718,
|
|
"fcm_dpo/beta": 1.8200013637542725,
|
|
"fcm_dpo/delta": 0.021424515172839165,
|
|
"fcm_dpo/margin": 0.4561222195625305,
|
|
"fcm_dpo/q_t": 0.3619512617588043,
|
|
"grad_norm": 431.1319274902344,
|
|
"learning_rate": 4.810961790316729e-07,
|
|
"logits/chosen": 0.11921191215515137,
|
|
"logits/rejected": 0.09684738516807556,
|
|
"logps/chosen": -65.99165344238281,
|
|
"logps/ref_chosen": -65.2747802734375,
|
|
"logps/ref_rejected": -81.1378173828125,
|
|
"logps/rejected": -82.31082153320312,
|
|
"loss": 1.0878,
|
|
"margin_dpo/margin_mean": 0.45612233877182007,
|
|
"margin_dpo/margin_std": 0.8028172254562378,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2161753590325019,
|
|
"fcm_dpo/beta": 1.8570456504821777,
|
|
"fcm_dpo/delta": 0.14970946311950684,
|
|
"fcm_dpo/margin": 0.3828372359275818,
|
|
"fcm_dpo/q_t": 0.37365925312042236,
|
|
"grad_norm": 583.10400390625,
|
|
"learning_rate": 4.805885735261454e-07,
|
|
"logits/chosen": 0.09592782706022263,
|
|
"logits/rejected": 0.08162565529346466,
|
|
"logps/chosen": -63.41217041015625,
|
|
"logps/ref_chosen": -62.617828369140625,
|
|
"logps/ref_rejected": -70.39239501953125,
|
|
"logps/rejected": -71.56956481933594,
|
|
"loss": 1.2184,
|
|
"margin_dpo/margin_mean": 0.38283705711364746,
|
|
"margin_dpo/margin_std": 0.7997678518295288,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21768707482993196,
|
|
"fcm_dpo/beta": 1.8605518341064453,
|
|
"fcm_dpo/delta": -0.11310499161481857,
|
|
"fcm_dpo/margin": 0.5119171142578125,
|
|
"fcm_dpo/q_t": 0.3569183945655823,
|
|
"grad_norm": 530.5613403320312,
|
|
"learning_rate": 4.800745179625307e-07,
|
|
"logits/chosen": 0.08935252577066422,
|
|
"logits/rejected": 0.06564676761627197,
|
|
"logps/chosen": -61.614990234375,
|
|
"logps/ref_chosen": -60.80268859863281,
|
|
"logps/ref_rejected": -79.07284545898438,
|
|
"logps/rejected": -80.39706420898438,
|
|
"loss": 1.1037,
|
|
"margin_dpo/margin_mean": 0.5119173526763916,
|
|
"margin_dpo/margin_std": 0.8959058523178101,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 1.8628008365631104,
|
|
"fcm_dpo/delta": 0.05122518166899681,
|
|
"fcm_dpo/margin": 0.4308074712753296,
|
|
"fcm_dpo/q_t": 0.37797456979751587,
|
|
"grad_norm": 631.3311157226562,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 0.06172323599457741,
|
|
"logits/rejected": 0.07850059121847153,
|
|
"logps/chosen": -75.40628051757812,
|
|
"logps/ref_chosen": -74.61146545410156,
|
|
"logps/ref_rejected": -83.24461364746094,
|
|
"logps/rejected": -84.4702377319336,
|
|
"loss": 1.2266,
|
|
"margin_dpo/margin_mean": 0.4308076798915863,
|
|
"margin_dpo/margin_std": 0.8929077386856079,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22071050642479215,
|
|
"fcm_dpo/beta": 1.8260502815246582,
|
|
"fcm_dpo/delta": -0.1331080198287964,
|
|
"fcm_dpo/margin": 0.5313305258750916,
|
|
"fcm_dpo/q_t": 0.33441442251205444,
|
|
"grad_norm": 422.6907958984375,
|
|
"learning_rate": 4.790271143580173e-07,
|
|
"logits/chosen": 0.0609685517847538,
|
|
"logits/rejected": 0.04538050293922424,
|
|
"logps/chosen": -58.57465744018555,
|
|
"logps/ref_chosen": -57.84098434448242,
|
|
"logps/ref_rejected": -67.47422790527344,
|
|
"logps/rejected": -68.73922729492188,
|
|
"loss": 1.0638,
|
|
"margin_dpo/margin_mean": 0.5313305854797363,
|
|
"margin_dpo/margin_std": 0.8781388998031616,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"fcm_dpo/beta": 1.8587777614593506,
|
|
"fcm_dpo/delta": 0.1607896387577057,
|
|
"fcm_dpo/margin": 0.3767205476760864,
|
|
"fcm_dpo/q_t": 0.37250229716300964,
|
|
"grad_norm": 581.6641235351562,
|
|
"learning_rate": 4.784937956152489e-07,
|
|
"logits/chosen": 0.0543268620967865,
|
|
"logits/rejected": 0.01894828863441944,
|
|
"logps/chosen": -67.6400375366211,
|
|
"logps/ref_chosen": -66.81346893310547,
|
|
"logps/ref_rejected": -81.1796875,
|
|
"logps/rejected": -82.38298034667969,
|
|
"loss": 1.1887,
|
|
"margin_dpo/margin_mean": 0.37672001123428345,
|
|
"margin_dpo/margin_std": 0.7602115869522095,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2237339380196523,
|
|
"fcm_dpo/beta": 1.8126616477966309,
|
|
"fcm_dpo/delta": -0.24772456288337708,
|
|
"fcm_dpo/margin": 0.5912690162658691,
|
|
"fcm_dpo/q_t": 0.3363417088985443,
|
|
"grad_norm": 373.9577331542969,
|
|
"learning_rate": 4.779540854098347e-07,
|
|
"logits/chosen": 0.1572439968585968,
|
|
"logits/rejected": 0.09718590974807739,
|
|
"logps/chosen": -49.48698806762695,
|
|
"logps/ref_chosen": -48.6877555847168,
|
|
"logps/ref_rejected": -67.50503540039062,
|
|
"logps/rejected": -68.89553833007812,
|
|
"loss": 1.0406,
|
|
"margin_dpo/margin_mean": 0.5912688970565796,
|
|
"margin_dpo/margin_std": 0.9395405054092407,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.2252456538170824,
|
|
"fcm_dpo/beta": 1.7451000213623047,
|
|
"fcm_dpo/delta": -0.16907699406147003,
|
|
"fcm_dpo/margin": 0.5744267702102661,
|
|
"fcm_dpo/q_t": 0.3332281708717346,
|
|
"grad_norm": 422.275634765625,
|
|
"learning_rate": 4.774079988386296e-07,
|
|
"logits/chosen": 0.06443419307470322,
|
|
"logits/rejected": 0.021416954696178436,
|
|
"logps/chosen": -56.10673904418945,
|
|
"logps/ref_chosen": -55.143775939941406,
|
|
"logps/ref_rejected": -64.79888916015625,
|
|
"logps/rejected": -66.33627319335938,
|
|
"loss": 1.0112,
|
|
"margin_dpo/margin_mean": 0.5744273662567139,
|
|
"margin_dpo/margin_std": 0.8520516157150269,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 1.6926888227462769,
|
|
"fcm_dpo/delta": -0.16446954011917114,
|
|
"fcm_dpo/margin": 0.5896965265274048,
|
|
"fcm_dpo/q_t": 0.31847673654556274,
|
|
"grad_norm": 410.8077697753906,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 0.10813453793525696,
|
|
"logits/rejected": 0.07115641236305237,
|
|
"logps/chosen": -68.07341003417969,
|
|
"logps/ref_chosen": -67.47074890136719,
|
|
"logps/ref_rejected": -89.21170806884766,
|
|
"logps/rejected": -90.40406799316406,
|
|
"loss": 0.9558,
|
|
"margin_dpo/margin_mean": 0.5896967649459839,
|
|
"margin_dpo/margin_std": 0.7787685394287109,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22826908541194255,
|
|
"fcm_dpo/beta": 1.6143739223480225,
|
|
"fcm_dpo/delta": -0.25309672951698303,
|
|
"fcm_dpo/margin": 0.6668341159820557,
|
|
"fcm_dpo/q_t": 0.3007487654685974,
|
|
"grad_norm": 306.0788269042969,
|
|
"learning_rate": 4.762967578776406e-07,
|
|
"logits/chosen": 0.0946289449930191,
|
|
"logits/rejected": 0.05068827420473099,
|
|
"logps/chosen": -53.11603546142578,
|
|
"logps/ref_chosen": -52.45954132080078,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -80.38633728027344,
|
|
"loss": 0.8835,
|
|
"margin_dpo/margin_mean": 0.6668342351913452,
|
|
"margin_dpo/margin_std": 0.7877708077430725,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22978080120937264,
|
|
"fcm_dpo/beta": 1.5444799661636353,
|
|
"fcm_dpo/delta": -0.18640229105949402,
|
|
"fcm_dpo/margin": 0.6589242219924927,
|
|
"fcm_dpo/q_t": 0.3180433511734009,
|
|
"grad_norm": 338.899658203125,
|
|
"learning_rate": 4.757316345716553e-07,
|
|
"logits/chosen": 0.14255166053771973,
|
|
"logits/rejected": 0.10172566026449203,
|
|
"logps/chosen": -57.358360290527344,
|
|
"logps/ref_chosen": -56.5538330078125,
|
|
"logps/ref_rejected": -76.55074310302734,
|
|
"logps/rejected": -78.01419067382812,
|
|
"loss": 0.9339,
|
|
"margin_dpo/margin_mean": 0.6589239835739136,
|
|
"margin_dpo/margin_std": 0.8599318265914917,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.23129251700680273,
|
|
"fcm_dpo/beta": 1.5402113199234009,
|
|
"fcm_dpo/delta": 0.06967150419950485,
|
|
"fcm_dpo/margin": 0.5102236270904541,
|
|
"fcm_dpo/q_t": 0.35215288400650024,
|
|
"grad_norm": 371.8020935058594,
|
|
"learning_rate": 4.751601970666064e-07,
|
|
"logits/chosen": 0.06787143647670746,
|
|
"logits/rejected": 0.03406914696097374,
|
|
"logps/chosen": -68.78588104248047,
|
|
"logps/ref_chosen": -68.00689697265625,
|
|
"logps/ref_rejected": -74.83482360839844,
|
|
"logps/rejected": -76.12403869628906,
|
|
"loss": 1.0502,
|
|
"margin_dpo/margin_mean": 0.5102236270904541,
|
|
"margin_dpo/margin_std": 0.7997216582298279,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2328042328042328,
|
|
"fcm_dpo/beta": 1.5851788520812988,
|
|
"fcm_dpo/delta": 0.15434226393699646,
|
|
"fcm_dpo/margin": 0.4456132650375366,
|
|
"fcm_dpo/q_t": 0.37568652629852295,
|
|
"grad_norm": 404.9493713378906,
|
|
"learning_rate": 4.745824613468292e-07,
|
|
"logits/chosen": 0.13575251400470734,
|
|
"logits/rejected": 0.13199105858802795,
|
|
"logps/chosen": -60.18925476074219,
|
|
"logps/ref_chosen": -59.222537994384766,
|
|
"logps/ref_rejected": -64.19131469726562,
|
|
"logps/rejected": -65.60365295410156,
|
|
"loss": 1.2275,
|
|
"margin_dpo/margin_mean": 0.4456136226654053,
|
|
"margin_dpo/margin_std": 0.9545999765396118,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 1.6388814449310303,
|
|
"fcm_dpo/delta": 0.1953224092721939,
|
|
"fcm_dpo/margin": 0.40763676166534424,
|
|
"fcm_dpo/q_t": 0.3838460147380829,
|
|
"grad_norm": 457.6610107421875,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 0.12242255359888077,
|
|
"logits/rejected": 0.10534698516130447,
|
|
"logps/chosen": -69.38691711425781,
|
|
"logps/ref_chosen": -68.45469665527344,
|
|
"logps/ref_rejected": -77.91763305664062,
|
|
"logps/rejected": -79.25749206542969,
|
|
"loss": 1.2961,
|
|
"margin_dpo/margin_mean": 0.40763652324676514,
|
|
"margin_dpo/margin_std": 0.9401887655258179,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.23582766439909297,
|
|
"fcm_dpo/beta": 1.6212600469589233,
|
|
"fcm_dpo/delta": -0.16775542497634888,
|
|
"fcm_dpo/margin": 0.6174951791763306,
|
|
"fcm_dpo/q_t": 0.33825477957725525,
|
|
"grad_norm": 420.23248291015625,
|
|
"learning_rate": 4.7340816008085305e-07,
|
|
"logits/chosen": 0.09560365974903107,
|
|
"logits/rejected": 0.05767889693379402,
|
|
"logps/chosen": -68.15042114257812,
|
|
"logps/ref_chosen": -67.26959991455078,
|
|
"logps/ref_rejected": -86.95914459228516,
|
|
"logps/rejected": -88.45746612548828,
|
|
"loss": 0.9711,
|
|
"margin_dpo/margin_mean": 0.6174949407577515,
|
|
"margin_dpo/margin_std": 0.9183490872383118,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.23733938019652306,
|
|
"fcm_dpo/beta": 1.5758273601531982,
|
|
"fcm_dpo/delta": -0.013841405510902405,
|
|
"fcm_dpo/margin": 0.5440424680709839,
|
|
"fcm_dpo/q_t": 0.334421306848526,
|
|
"grad_norm": 375.8836669921875,
|
|
"learning_rate": 4.728116273823847e-07,
|
|
"logits/chosen": 0.11276492476463318,
|
|
"logits/rejected": 0.09284964948892593,
|
|
"logps/chosen": -55.550567626953125,
|
|
"logps/ref_chosen": -54.77287292480469,
|
|
"logps/ref_rejected": -63.87866973876953,
|
|
"logps/rejected": -65.20040893554688,
|
|
"loss": 1.0336,
|
|
"margin_dpo/margin_mean": 0.5440424084663391,
|
|
"margin_dpo/margin_std": 0.816327691078186,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23885109599395313,
|
|
"fcm_dpo/beta": 1.5792837142944336,
|
|
"fcm_dpo/delta": -0.07293359935283661,
|
|
"fcm_dpo/margin": 0.5799823999404907,
|
|
"fcm_dpo/q_t": 0.3303491473197937,
|
|
"grad_norm": 387.7180480957031,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 0.10120804607868195,
|
|
"logits/rejected": 0.0727510154247284,
|
|
"logps/chosen": -65.7298583984375,
|
|
"logps/ref_chosen": -64.92271423339844,
|
|
"logps/ref_rejected": -82.23789978027344,
|
|
"logps/rejected": -83.62501525878906,
|
|
"loss": 0.9862,
|
|
"margin_dpo/margin_mean": 0.5799820423126221,
|
|
"margin_dpo/margin_std": 0.8221684098243713,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.24036281179138322,
|
|
"fcm_dpo/beta": 1.6407793760299683,
|
|
"fcm_dpo/delta": 0.1321692019701004,
|
|
"fcm_dpo/margin": 0.43719008564949036,
|
|
"fcm_dpo/q_t": 0.3659708499908447,
|
|
"grad_norm": 493.34710693359375,
|
|
"learning_rate": 4.715998812855304e-07,
|
|
"logits/chosen": 0.1393783539533615,
|
|
"logits/rejected": 0.1092163473367691,
|
|
"logps/chosen": -57.99412536621094,
|
|
"logps/ref_chosen": -57.046993255615234,
|
|
"logps/ref_rejected": -73.32441711425781,
|
|
"logps/rejected": -74.708740234375,
|
|
"loss": 1.228,
|
|
"margin_dpo/margin_mean": 0.43719035387039185,
|
|
"margin_dpo/margin_std": 0.8903641700744629,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 1.640321969985962,
|
|
"fcm_dpo/delta": 0.06273063272237778,
|
|
"fcm_dpo/margin": 0.48271435499191284,
|
|
"fcm_dpo/q_t": 0.3727272152900696,
|
|
"grad_norm": 414.4522399902344,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": -0.017423782497644424,
|
|
"logits/rejected": -0.04727357625961304,
|
|
"logps/chosen": -50.78994369506836,
|
|
"logps/ref_chosen": -49.806915283203125,
|
|
"logps/ref_rejected": -68.3370132446289,
|
|
"logps/rejected": -69.8027572631836,
|
|
"loss": 1.1378,
|
|
"margin_dpo/margin_mean": 0.48271453380584717,
|
|
"margin_dpo/margin_std": 0.9058699607849121,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.24338624338624337,
|
|
"fcm_dpo/beta": 1.6497435569763184,
|
|
"fcm_dpo/delta": -0.025253944098949432,
|
|
"fcm_dpo/margin": 0.5282729864120483,
|
|
"fcm_dpo/q_t": 0.36747777462005615,
|
|
"grad_norm": 402.39227294921875,
|
|
"learning_rate": 4.703633408618955e-07,
|
|
"logits/chosen": 0.11677715182304382,
|
|
"logits/rejected": 0.0853380635380745,
|
|
"logps/chosen": -53.445526123046875,
|
|
"logps/ref_chosen": -52.50048828125,
|
|
"logps/ref_rejected": -66.04540252685547,
|
|
"logps/rejected": -67.51871490478516,
|
|
"loss": 1.1383,
|
|
"margin_dpo/margin_mean": 0.5282737016677856,
|
|
"margin_dpo/margin_std": 0.960330069065094,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.24489795918367346,
|
|
"fcm_dpo/beta": 1.5421152114868164,
|
|
"fcm_dpo/delta": -0.3480435609817505,
|
|
"fcm_dpo/margin": 0.7493581175804138,
|
|
"fcm_dpo/q_t": 0.28938403725624084,
|
|
"grad_norm": 365.9737854003906,
|
|
"learning_rate": 4.697358159051549e-07,
|
|
"logits/chosen": 0.1516774296760559,
|
|
"logits/rejected": 0.1125471442937851,
|
|
"logps/chosen": -70.50188446044922,
|
|
"logps/ref_chosen": -69.46919250488281,
|
|
"logps/ref_rejected": -92.00952911376953,
|
|
"logps/rejected": -93.79158020019531,
|
|
"loss": 0.9088,
|
|
"margin_dpo/margin_mean": 0.7493584156036377,
|
|
"margin_dpo/margin_std": 0.9313629865646362,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.24640967498110355,
|
|
"fcm_dpo/beta": 1.480248212814331,
|
|
"fcm_dpo/delta": -0.2779368758201599,
|
|
"fcm_dpo/margin": 0.7414557933807373,
|
|
"fcm_dpo/q_t": 0.3119097352027893,
|
|
"grad_norm": 371.7340087890625,
|
|
"learning_rate": 4.691021444652876e-07,
|
|
"logits/chosen": 0.10076501965522766,
|
|
"logits/rejected": 0.06344390660524368,
|
|
"logps/chosen": -51.57759094238281,
|
|
"logps/ref_chosen": -50.613834381103516,
|
|
"logps/ref_rejected": -74.62033081054688,
|
|
"logps/rejected": -76.32554626464844,
|
|
"loss": 0.9404,
|
|
"margin_dpo/margin_mean": 0.7414567470550537,
|
|
"margin_dpo/margin_std": 0.9699376225471497,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24792139077853365,
|
|
"fcm_dpo/beta": 1.425843358039856,
|
|
"fcm_dpo/delta": -0.0862051472067833,
|
|
"fcm_dpo/margin": 0.6510294675827026,
|
|
"fcm_dpo/q_t": 0.3333462178707123,
|
|
"grad_norm": 325.4397888183594,
|
|
"learning_rate": 4.6846234426744624e-07,
|
|
"logits/chosen": 0.0917167067527771,
|
|
"logits/rejected": 0.041261181235313416,
|
|
"logps/chosen": -55.971466064453125,
|
|
"logps/ref_chosen": -54.848114013671875,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -80.83738708496094,
|
|
"loss": 0.9959,
|
|
"margin_dpo/margin_mean": 0.6510298252105713,
|
|
"margin_dpo/margin_std": 0.9245976805686951,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 1.4056333303451538,
|
|
"fcm_dpo/delta": -0.08572079241275787,
|
|
"fcm_dpo/margin": 0.6598940491676331,
|
|
"fcm_dpo/q_t": 0.32658183574676514,
|
|
"grad_norm": 278.6569519042969,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 0.15755686163902283,
|
|
"logits/rejected": 0.11023740470409393,
|
|
"logps/chosen": -52.1827392578125,
|
|
"logps/ref_chosen": -51.089210510253906,
|
|
"logps/ref_rejected": -71.23370361328125,
|
|
"logps/rejected": -72.98712158203125,
|
|
"loss": 0.944,
|
|
"margin_dpo/margin_mean": 0.6598936319351196,
|
|
"margin_dpo/margin_std": 0.8561975359916687,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.2509448223733938,
|
|
"fcm_dpo/beta": 1.4288179874420166,
|
|
"fcm_dpo/delta": 0.1548466831445694,
|
|
"fcm_dpo/margin": 0.4935830235481262,
|
|
"fcm_dpo/q_t": 0.378057599067688,
|
|
"grad_norm": 363.7931213378906,
|
|
"learning_rate": 4.6716442935512214e-07,
|
|
"logits/chosen": 0.11771443486213684,
|
|
"logits/rejected": 0.05061034858226776,
|
|
"logps/chosen": -64.18374633789062,
|
|
"logps/ref_chosen": -63.19081115722656,
|
|
"logps/ref_rejected": -93.8402099609375,
|
|
"logps/rejected": -95.32672119140625,
|
|
"loss": 1.1495,
|
|
"margin_dpo/margin_mean": 0.4935823976993561,
|
|
"margin_dpo/margin_std": 0.9294769167900085,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.25245653817082386,
|
|
"fcm_dpo/beta": 1.3979008197784424,
|
|
"fcm_dpo/delta": -0.15741577744483948,
|
|
"fcm_dpo/margin": 0.7090965509414673,
|
|
"fcm_dpo/q_t": 0.3177732825279236,
|
|
"grad_norm": 280.4785461425781,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": 0.059697844088077545,
|
|
"logits/rejected": 0.029954345896840096,
|
|
"logps/chosen": -59.82019805908203,
|
|
"logps/ref_chosen": -58.92427062988281,
|
|
"logps/ref_rejected": -72.97377014160156,
|
|
"logps/rejected": -74.57879638671875,
|
|
"loss": 0.9047,
|
|
"margin_dpo/margin_mean": 0.7090966701507568,
|
|
"margin_dpo/margin_std": 0.8868396282196045,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"fcm_dpo/beta": 1.4516682624816895,
|
|
"fcm_dpo/delta": 0.32116368412971497,
|
|
"fcm_dpo/margin": 0.3770996034145355,
|
|
"fcm_dpo/q_t": 0.39416801929473877,
|
|
"grad_norm": 410.0304870605469,
|
|
"learning_rate": 4.6584221638904767e-07,
|
|
"logits/chosen": 0.10317844152450562,
|
|
"logits/rejected": 0.079832062125206,
|
|
"logps/chosen": -66.82249450683594,
|
|
"logps/ref_chosen": -65.65138244628906,
|
|
"logps/ref_rejected": -79.71418762207031,
|
|
"logps/rejected": -81.26239776611328,
|
|
"loss": 1.181,
|
|
"margin_dpo/margin_mean": 0.37709951400756836,
|
|
"margin_dpo/margin_std": 0.7641937732696533,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.25547996976568405,
|
|
"fcm_dpo/beta": 1.438971996307373,
|
|
"fcm_dpo/delta": -0.19553548097610474,
|
|
"fcm_dpo/margin": 0.7125035524368286,
|
|
"fcm_dpo/q_t": 0.32765817642211914,
|
|
"grad_norm": 361.0661926269531,
|
|
"learning_rate": 4.651720442612075e-07,
|
|
"logits/chosen": 0.16506020724773407,
|
|
"logits/rejected": 0.13683012127876282,
|
|
"logps/chosen": -62.39191818237305,
|
|
"logps/ref_chosen": -61.425865173339844,
|
|
"logps/ref_rejected": -76.09590148925781,
|
|
"logps/rejected": -77.77445220947266,
|
|
"loss": 0.9547,
|
|
"margin_dpo/margin_mean": 0.7125036716461182,
|
|
"margin_dpo/margin_std": 1.0379252433776855,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 1.4439091682434082,
|
|
"fcm_dpo/delta": 0.06322521716356277,
|
|
"fcm_dpo/margin": 0.5480768084526062,
|
|
"fcm_dpo/q_t": 0.35138139128685,
|
|
"grad_norm": 313.36566162109375,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 0.08458521962165833,
|
|
"logits/rejected": 0.08335210382938385,
|
|
"logps/chosen": -57.703369140625,
|
|
"logps/ref_chosen": -56.65319061279297,
|
|
"logps/ref_rejected": -63.45965576171875,
|
|
"logps/rejected": -65.05790710449219,
|
|
"loss": 1.0725,
|
|
"margin_dpo/margin_mean": 0.5480765104293823,
|
|
"margin_dpo/margin_std": 0.8856065273284912,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2585034013605442,
|
|
"fcm_dpo/beta": 1.3877865076065063,
|
|
"fcm_dpo/delta": -0.18020044267177582,
|
|
"fcm_dpo/margin": 0.7254009246826172,
|
|
"fcm_dpo/q_t": 0.3164675533771515,
|
|
"grad_norm": 300.18817138671875,
|
|
"learning_rate": 4.6381366244617224e-07,
|
|
"logits/chosen": 0.16546288132667542,
|
|
"logits/rejected": 0.12482231855392456,
|
|
"logps/chosen": -64.7464599609375,
|
|
"logps/ref_chosen": -63.73476028442383,
|
|
"logps/ref_rejected": -78.50328063964844,
|
|
"logps/rejected": -80.24037170410156,
|
|
"loss": 0.9604,
|
|
"margin_dpo/margin_mean": 0.7254012823104858,
|
|
"margin_dpo/margin_std": 0.968841552734375,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2600151171579743,
|
|
"fcm_dpo/beta": 1.3836549520492554,
|
|
"fcm_dpo/delta": -0.12257562577724457,
|
|
"fcm_dpo/margin": 0.6935369372367859,
|
|
"fcm_dpo/q_t": 0.3231150507926941,
|
|
"grad_norm": 285.284423828125,
|
|
"learning_rate": 4.631254907558365e-07,
|
|
"logits/chosen": 0.15227068960666656,
|
|
"logits/rejected": 0.1104995459318161,
|
|
"logps/chosen": -53.32598114013672,
|
|
"logps/ref_chosen": -52.201759338378906,
|
|
"logps/ref_rejected": -82.85285949707031,
|
|
"logps/rejected": -84.67062377929688,
|
|
"loss": 0.9731,
|
|
"margin_dpo/margin_mean": 0.6935364007949829,
|
|
"margin_dpo/margin_std": 0.9233601689338684,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2615268329554044,
|
|
"fcm_dpo/beta": 1.3000613451004028,
|
|
"fcm_dpo/delta": -0.1376366764307022,
|
|
"fcm_dpo/margin": 0.7386313080787659,
|
|
"fcm_dpo/q_t": 0.34407109022140503,
|
|
"grad_norm": 271.6639709472656,
|
|
"learning_rate": 4.624313574873786e-07,
|
|
"logits/chosen": 0.1513877511024475,
|
|
"logits/rejected": 0.08314318209886551,
|
|
"logps/chosen": -56.60365676879883,
|
|
"logps/ref_chosen": -55.434722900390625,
|
|
"logps/ref_rejected": -77.81967163085938,
|
|
"logps/rejected": -79.72723388671875,
|
|
"loss": 1.0046,
|
|
"margin_dpo/margin_mean": 0.7386313676834106,
|
|
"margin_dpo/margin_std": 1.1225433349609375,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.26303854875283444,
|
|
"fcm_dpo/beta": 1.2976107597351074,
|
|
"fcm_dpo/delta": -0.1577700823545456,
|
|
"fcm_dpo/margin": 0.764767050743103,
|
|
"fcm_dpo/q_t": 0.329203724861145,
|
|
"grad_norm": 344.06982421875,
|
|
"learning_rate": 4.61731282057198e-07,
|
|
"logits/chosen": 0.14828677475452423,
|
|
"logits/rejected": 0.09550824761390686,
|
|
"logps/chosen": -58.302215576171875,
|
|
"logps/ref_chosen": -57.17195129394531,
|
|
"logps/ref_rejected": -85.47578430175781,
|
|
"logps/rejected": -87.37081146240234,
|
|
"loss": 0.9964,
|
|
"margin_dpo/margin_mean": 0.7647665143013,
|
|
"margin_dpo/margin_std": 1.099808931350708,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 1.2494964599609375,
|
|
"fcm_dpo/delta": -0.13457192480564117,
|
|
"fcm_dpo/margin": 0.776630163192749,
|
|
"fcm_dpo/q_t": 0.33105403184890747,
|
|
"grad_norm": 340.8416748046875,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 0.1482573300600052,
|
|
"logits/rejected": 0.12473028898239136,
|
|
"logps/chosen": -68.90888977050781,
|
|
"logps/ref_chosen": -67.6656265258789,
|
|
"logps/ref_rejected": -84.36766815185547,
|
|
"logps/rejected": -86.38755798339844,
|
|
"loss": 1.1129,
|
|
"margin_dpo/margin_mean": 0.776630163192749,
|
|
"margin_dpo/margin_std": 1.2866604328155518,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.2660619803476946,
|
|
"fcm_dpo/beta": 1.2561092376708984,
|
|
"fcm_dpo/delta": 0.07927154749631882,
|
|
"fcm_dpo/margin": 0.618436336517334,
|
|
"fcm_dpo/q_t": 0.37566858530044556,
|
|
"grad_norm": 377.3238525390625,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 0.12545280158519745,
|
|
"logits/rejected": 0.10419806838035583,
|
|
"logps/chosen": -79.16165924072266,
|
|
"logps/ref_chosen": -77.8587646484375,
|
|
"logps/ref_rejected": -81.08732604980469,
|
|
"logps/rejected": -83.00865936279297,
|
|
"loss": 1.2024,
|
|
"margin_dpo/margin_mean": 0.6184365153312683,
|
|
"margin_dpo/margin_std": 1.2843239307403564,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2675736961451247,
|
|
"fcm_dpo/beta": 1.175858497619629,
|
|
"fcm_dpo/delta": -0.4110918641090393,
|
|
"fcm_dpo/margin": 1.0227813720703125,
|
|
"fcm_dpo/q_t": 0.28089749813079834,
|
|
"grad_norm": 325.0892639160156,
|
|
"learning_rate": 4.5959559945025183e-07,
|
|
"logits/chosen": 0.2447664737701416,
|
|
"logits/rejected": 0.16549718379974365,
|
|
"logps/chosen": -56.427490234375,
|
|
"logps/ref_chosen": -55.22039794921875,
|
|
"logps/ref_rejected": -92.54973602294922,
|
|
"logps/rejected": -94.77960968017578,
|
|
"loss": 0.8547,
|
|
"margin_dpo/margin_mean": 1.0227817296981812,
|
|
"margin_dpo/margin_std": 1.196207880973816,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.2690854119425548,
|
|
"fcm_dpo/beta": 1.1856530904769897,
|
|
"fcm_dpo/delta": 0.15816958248615265,
|
|
"fcm_dpo/margin": 0.5928707122802734,
|
|
"fcm_dpo/q_t": 0.37204986810684204,
|
|
"grad_norm": 311.888916015625,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"logits/chosen": 0.1036371961236,
|
|
"logits/rejected": 0.06448065489530563,
|
|
"logps/chosen": -62.1276741027832,
|
|
"logps/ref_chosen": -60.81049346923828,
|
|
"logps/ref_rejected": -81.12973022460938,
|
|
"logps/rejected": -83.03977966308594,
|
|
"loss": 1.0769,
|
|
"margin_dpo/margin_mean": 0.592870831489563,
|
|
"margin_dpo/margin_std": 0.9916863441467285,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.2705971277399849,
|
|
"fcm_dpo/beta": 1.214045524597168,
|
|
"fcm_dpo/delta": 0.029299605637788773,
|
|
"fcm_dpo/margin": 0.6777085661888123,
|
|
"fcm_dpo/q_t": 0.3637648820877075,
|
|
"grad_norm": 311.5542297363281,
|
|
"learning_rate": 4.581424636586928e-07,
|
|
"logits/chosen": 0.16729748249053955,
|
|
"logits/rejected": 0.15362539887428284,
|
|
"logps/chosen": -67.09163665771484,
|
|
"logps/ref_chosen": -65.67171478271484,
|
|
"logps/ref_rejected": -75.32586669921875,
|
|
"logps/rejected": -77.42349243164062,
|
|
"loss": 1.0894,
|
|
"margin_dpo/margin_mean": 0.6777083873748779,
|
|
"margin_dpo/margin_std": 1.1790449619293213,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 1.2290477752685547,
|
|
"fcm_dpo/delta": 0.06452183425426483,
|
|
"fcm_dpo/margin": 0.6426513195037842,
|
|
"fcm_dpo/q_t": 0.35979628562927246,
|
|
"grad_norm": 315.0635986328125,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 0.018470853567123413,
|
|
"logits/rejected": 0.002668549306690693,
|
|
"logps/chosen": -57.85248565673828,
|
|
"logps/ref_chosen": -56.68280792236328,
|
|
"logps/ref_rejected": -64.94414520263672,
|
|
"logps/rejected": -66.75647735595703,
|
|
"loss": 1.0941,
|
|
"margin_dpo/margin_mean": 0.6426514387130737,
|
|
"margin_dpo/margin_std": 1.0852210521697998,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.273620559334845,
|
|
"fcm_dpo/beta": 1.1799449920654297,
|
|
"fcm_dpo/delta": -0.3260830044746399,
|
|
"fcm_dpo/margin": 0.9661595821380615,
|
|
"fcm_dpo/q_t": 0.2936689853668213,
|
|
"grad_norm": 262.0937194824219,
|
|
"learning_rate": 4.566660392614228e-07,
|
|
"logits/chosen": 0.17798230051994324,
|
|
"logits/rejected": 0.1470857560634613,
|
|
"logps/chosen": -61.9011344909668,
|
|
"logps/ref_chosen": -60.77604675292969,
|
|
"logps/ref_rejected": -83.98361206054688,
|
|
"logps/rejected": -86.07485961914062,
|
|
"loss": 0.8082,
|
|
"margin_dpo/margin_mean": 0.9661591053009033,
|
|
"margin_dpo/margin_std": 1.0348703861236572,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.2751322751322751,
|
|
"fcm_dpo/beta": 1.1442968845367432,
|
|
"fcm_dpo/delta": -0.008135635405778885,
|
|
"fcm_dpo/margin": 0.7490738034248352,
|
|
"fcm_dpo/q_t": 0.3455432653427124,
|
|
"grad_norm": 313.6883850097656,
|
|
"learning_rate": 4.5591914535745817e-07,
|
|
"logits/chosen": 0.11941174417734146,
|
|
"logits/rejected": 0.060549668967723846,
|
|
"logps/chosen": -61.594669342041016,
|
|
"logps/ref_chosen": -60.2537841796875,
|
|
"logps/ref_rejected": -89.7706298828125,
|
|
"logps/rejected": -91.86058807373047,
|
|
"loss": 1.1166,
|
|
"margin_dpo/margin_mean": 0.7490732669830322,
|
|
"margin_dpo/margin_std": 1.2752363681793213,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2766439909297052,
|
|
"fcm_dpo/beta": 1.2075517177581787,
|
|
"fcm_dpo/delta": 0.3678004741668701,
|
|
"fcm_dpo/margin": 0.41555270552635193,
|
|
"fcm_dpo/q_t": 0.40780603885650635,
|
|
"grad_norm": 330.7014465332031,
|
|
"learning_rate": 4.551664914523433e-07,
|
|
"logits/chosen": 0.12853467464447021,
|
|
"logits/rejected": 0.1137867271900177,
|
|
"logps/chosen": -63.4423713684082,
|
|
"logps/ref_chosen": -61.76142120361328,
|
|
"logps/ref_rejected": -72.54627990722656,
|
|
"logps/rejected": -74.64278411865234,
|
|
"loss": 1.2964,
|
|
"margin_dpo/margin_mean": 0.41555219888687134,
|
|
"margin_dpo/margin_std": 1.0669963359832764,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2781557067271353,
|
|
"fcm_dpo/beta": 1.2260205745697021,
|
|
"fcm_dpo/delta": 0.00919228047132492,
|
|
"fcm_dpo/margin": 0.6851847171783447,
|
|
"fcm_dpo/q_t": 0.34627607464790344,
|
|
"grad_norm": 230.51353454589844,
|
|
"learning_rate": 4.544080985994258e-07,
|
|
"logits/chosen": 0.20697131752967834,
|
|
"logits/rejected": 0.1586826741695404,
|
|
"logps/chosen": -48.165016174316406,
|
|
"logps/ref_chosen": -46.840721130371094,
|
|
"logps/ref_rejected": -69.3609390258789,
|
|
"logps/rejected": -71.37042236328125,
|
|
"loss": 0.9445,
|
|
"margin_dpo/margin_mean": 0.6851844191551208,
|
|
"margin_dpo/margin_std": 0.8955023288726807,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 1.229468822479248,
|
|
"fcm_dpo/delta": -0.02585327997803688,
|
|
"fcm_dpo/margin": 0.7103534936904907,
|
|
"fcm_dpo/q_t": 0.357565701007843,
|
|
"grad_norm": 296.47052001953125,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 0.13178446888923645,
|
|
"logits/rejected": 0.09441807866096497,
|
|
"logps/chosen": -53.799964904785156,
|
|
"logps/ref_chosen": -52.32114028930664,
|
|
"logps/ref_rejected": -68.3885726928711,
|
|
"logps/rejected": -70.57774353027344,
|
|
"loss": 1.1773,
|
|
"margin_dpo/margin_mean": 0.7103538513183594,
|
|
"margin_dpo/margin_std": 1.335863471031189,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2811791383219955,
|
|
"fcm_dpo/beta": 1.225243091583252,
|
|
"fcm_dpo/delta": 0.017454147338867188,
|
|
"fcm_dpo/margin": 0.6798607110977173,
|
|
"fcm_dpo/q_t": 0.3525392413139343,
|
|
"grad_norm": 322.0429382324219,
|
|
"learning_rate": 4.5287418106563354e-07,
|
|
"logits/chosen": 0.10579089820384979,
|
|
"logits/rejected": 0.07291128486394882,
|
|
"logps/chosen": -68.80215454101562,
|
|
"logps/ref_chosen": -67.42012786865234,
|
|
"logps/ref_rejected": -82.50968933105469,
|
|
"logps/rejected": -84.57157897949219,
|
|
"loss": 1.0273,
|
|
"margin_dpo/margin_mean": 0.6798614263534546,
|
|
"margin_dpo/margin_std": 1.0495535135269165,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.28269085411942557,
|
|
"fcm_dpo/beta": 1.190890908241272,
|
|
"fcm_dpo/delta": -0.14130036532878876,
|
|
"fcm_dpo/margin": 0.8149707317352295,
|
|
"fcm_dpo/q_t": 0.32857638597488403,
|
|
"grad_norm": 374.1227111816406,
|
|
"learning_rate": 4.520986992917297e-07,
|
|
"logits/chosen": 0.10602662712335587,
|
|
"logits/rejected": 0.06065261363983154,
|
|
"logps/chosen": -77.01930236816406,
|
|
"logps/ref_chosen": -75.52549743652344,
|
|
"logps/ref_rejected": -94.76289367675781,
|
|
"logps/rejected": -97.0716781616211,
|
|
"loss": 1.1516,
|
|
"margin_dpo/margin_mean": 0.8149705529212952,
|
|
"margin_dpo/margin_std": 1.4823930263519287,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2842025699168556,
|
|
"fcm_dpo/beta": 1.1827126741409302,
|
|
"fcm_dpo/delta": -0.07674290239810944,
|
|
"fcm_dpo/margin": 0.7770618200302124,
|
|
"fcm_dpo/q_t": 0.32536211609840393,
|
|
"grad_norm": 329.20703125,
|
|
"learning_rate": 4.5131756438276466e-07,
|
|
"logits/chosen": 0.146919846534729,
|
|
"logits/rejected": 0.11189775168895721,
|
|
"logps/chosen": -72.79115295410156,
|
|
"logps/ref_chosen": -71.52333068847656,
|
|
"logps/ref_rejected": -78.29949951171875,
|
|
"logps/rejected": -80.34439086914062,
|
|
"loss": 1.0643,
|
|
"margin_dpo/margin_mean": 0.7770620584487915,
|
|
"margin_dpo/margin_std": 1.269317388534546,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"fcm_dpo/beta": 1.1473429203033447,
|
|
"fcm_dpo/delta": -0.11207123845815659,
|
|
"fcm_dpo/margin": 0.8247168064117432,
|
|
"fcm_dpo/q_t": 0.325833261013031,
|
|
"grad_norm": 281.21337890625,
|
|
"learning_rate": 4.5053079818876096e-07,
|
|
"logits/chosen": 0.133220374584198,
|
|
"logits/rejected": 0.14191579818725586,
|
|
"logps/chosen": -73.43196105957031,
|
|
"logps/ref_chosen": -72.17626953125,
|
|
"logps/ref_rejected": -75.26313781738281,
|
|
"logps/rejected": -77.34353637695312,
|
|
"loss": 0.9465,
|
|
"margin_dpo/margin_mean": 0.8247175216674805,
|
|
"margin_dpo/margin_std": 1.0672475099563599,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 1.1403197050094604,
|
|
"fcm_dpo/delta": -0.09567119181156158,
|
|
"fcm_dpo/margin": 0.8214250802993774,
|
|
"fcm_dpo/q_t": 0.3279981315135956,
|
|
"grad_norm": 274.4941101074219,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 0.2039899379014969,
|
|
"logits/rejected": 0.0918896347284317,
|
|
"logps/chosen": -55.97621154785156,
|
|
"logps/ref_chosen": -54.624271392822266,
|
|
"logps/ref_rejected": -101.47068786621094,
|
|
"logps/rejected": -103.64405059814453,
|
|
"loss": 0.9625,
|
|
"margin_dpo/margin_mean": 0.821425199508667,
|
|
"margin_dpo/margin_std": 1.1282612085342407,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2887377173091459,
|
|
"fcm_dpo/beta": 1.1277894973754883,
|
|
"fcm_dpo/delta": -0.048582788556814194,
|
|
"fcm_dpo/margin": 0.7929339408874512,
|
|
"fcm_dpo/q_t": 0.33727309107780457,
|
|
"grad_norm": 331.9569396972656,
|
|
"learning_rate": 4.48940460132708e-07,
|
|
"logits/chosen": 0.19591175019741058,
|
|
"logits/rejected": 0.17645688354969025,
|
|
"logps/chosen": -74.50765991210938,
|
|
"logps/ref_chosen": -72.93251037597656,
|
|
"logps/ref_rejected": -89.95103454589844,
|
|
"logps/rejected": -92.31910705566406,
|
|
"loss": 1.0172,
|
|
"margin_dpo/margin_mean": 0.7929338216781616,
|
|
"margin_dpo/margin_std": 1.1808544397354126,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.29024943310657597,
|
|
"fcm_dpo/beta": 1.1447663307189941,
|
|
"fcm_dpo/delta": 0.11726969480514526,
|
|
"fcm_dpo/margin": 0.6475502252578735,
|
|
"fcm_dpo/q_t": 0.3605687618255615,
|
|
"grad_norm": 227.45828247070312,
|
|
"learning_rate": 4.481369327558329e-07,
|
|
"logits/chosen": 0.1682664304971695,
|
|
"logits/rejected": 0.14873062074184418,
|
|
"logps/chosen": -55.458255767822266,
|
|
"logps/ref_chosen": -54.001121520996094,
|
|
"logps/ref_rejected": -63.531551361083984,
|
|
"logps/rejected": -65.63623809814453,
|
|
"loss": 1.0626,
|
|
"margin_dpo/margin_mean": 0.6475502848625183,
|
|
"margin_dpo/margin_std": 1.047489047050476,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.29176114890400606,
|
|
"fcm_dpo/beta": 1.115247368812561,
|
|
"fcm_dpo/delta": -0.15359792113304138,
|
|
"fcm_dpo/margin": 0.8840051293373108,
|
|
"fcm_dpo/q_t": 0.32005080580711365,
|
|
"grad_norm": 219.44058227539062,
|
|
"learning_rate": 4.47327863063023e-07,
|
|
"logits/chosen": 0.10667343437671661,
|
|
"logits/rejected": 0.08797129988670349,
|
|
"logps/chosen": -58.189697265625,
|
|
"logps/ref_chosen": -56.74927520751953,
|
|
"logps/ref_rejected": -58.80629348754883,
|
|
"logps/rejected": -61.13072204589844,
|
|
"loss": 0.8919,
|
|
"margin_dpo/margin_mean": 0.8840053081512451,
|
|
"margin_dpo/margin_std": 1.11568284034729,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.29327286470143615,
|
|
"fcm_dpo/beta": 1.1258536577224731,
|
|
"fcm_dpo/delta": 0.11767183244228363,
|
|
"fcm_dpo/margin": 0.6577843427658081,
|
|
"fcm_dpo/q_t": 0.36665472388267517,
|
|
"grad_norm": 271.59124755859375,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 0.18203461170196533,
|
|
"logits/rejected": 0.15865445137023926,
|
|
"logps/chosen": -58.03636932373047,
|
|
"logps/ref_chosen": -56.64944076538086,
|
|
"logps/ref_rejected": -69.98954772949219,
|
|
"logps/rejected": -72.03425598144531,
|
|
"loss": 1.1466,
|
|
"margin_dpo/margin_mean": 0.6577843427658081,
|
|
"margin_dpo/margin_std": 1.216817021369934,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 1.1474632024765015,
|
|
"fcm_dpo/delta": 0.10708002001047134,
|
|
"fcm_dpo/margin": 0.6532057523727417,
|
|
"fcm_dpo/q_t": 0.3579810559749603,
|
|
"grad_norm": 295.77105712890625,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 0.07060997188091278,
|
|
"logits/rejected": 0.06883937120437622,
|
|
"logps/chosen": -72.13310241699219,
|
|
"logps/ref_chosen": -70.40977478027344,
|
|
"logps/ref_rejected": -74.39448547363281,
|
|
"logps/rejected": -76.77101135253906,
|
|
"loss": 1.1179,
|
|
"margin_dpo/margin_mean": 0.6532056331634521,
|
|
"margin_dpo/margin_std": 1.1192982196807861,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2962962962962963,
|
|
"fcm_dpo/beta": 1.1742520332336426,
|
|
"fcm_dpo/delta": -0.010713696479797363,
|
|
"fcm_dpo/margin": 0.7313066720962524,
|
|
"fcm_dpo/q_t": 0.34498167037963867,
|
|
"grad_norm": 252.9849395751953,
|
|
"learning_rate": 4.448676271745197e-07,
|
|
"logits/chosen": 0.16027367115020752,
|
|
"logits/rejected": 0.12814804911613464,
|
|
"logps/chosen": -60.709869384765625,
|
|
"logps/ref_chosen": -59.227577209472656,
|
|
"logps/ref_rejected": -83.54757690429688,
|
|
"logps/rejected": -85.76117706298828,
|
|
"loss": 1.0713,
|
|
"margin_dpo/margin_mean": 0.7313063144683838,
|
|
"margin_dpo/margin_std": 1.170758605003357,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.29780801209372637,
|
|
"fcm_dpo/beta": 1.1702077388763428,
|
|
"fcm_dpo/delta": -0.154099702835083,
|
|
"fcm_dpo/margin": 0.8365795016288757,
|
|
"fcm_dpo/q_t": 0.341280460357666,
|
|
"grad_norm": 327.5577697753906,
|
|
"learning_rate": 4.440366160729392e-07,
|
|
"logits/chosen": 0.22983162105083466,
|
|
"logits/rejected": 0.19321538507938385,
|
|
"logps/chosen": -53.02672576904297,
|
|
"logps/ref_chosen": -51.52912902832031,
|
|
"logps/ref_rejected": -73.70631408691406,
|
|
"logps/rejected": -76.04048156738281,
|
|
"loss": 1.2086,
|
|
"margin_dpo/margin_mean": 0.836578905582428,
|
|
"margin_dpo/margin_std": 1.5111768245697021,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.29931972789115646,
|
|
"fcm_dpo/beta": 1.096937894821167,
|
|
"fcm_dpo/delta": -0.2913700044155121,
|
|
"fcm_dpo/margin": 1.0111193656921387,
|
|
"fcm_dpo/q_t": 0.30249547958374023,
|
|
"grad_norm": 260.2440490722656,
|
|
"learning_rate": 4.432001773500957e-07,
|
|
"logits/chosen": 0.19865413010120392,
|
|
"logits/rejected": 0.1672060191631317,
|
|
"logps/chosen": -61.236427307128906,
|
|
"logps/ref_chosen": -59.78268051147461,
|
|
"logps/ref_rejected": -72.24533081054688,
|
|
"logps/rejected": -74.71019744873047,
|
|
"loss": 0.8902,
|
|
"margin_dpo/margin_mean": 1.0111192464828491,
|
|
"margin_dpo/margin_std": 1.2340037822723389,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.30083144368858655,
|
|
"fcm_dpo/beta": 1.0855709314346313,
|
|
"fcm_dpo/delta": 0.07997329533100128,
|
|
"fcm_dpo/margin": 0.7136896848678589,
|
|
"fcm_dpo/q_t": 0.3555886745452881,
|
|
"grad_norm": 296.0491943359375,
|
|
"learning_rate": 4.4235833440297856e-07,
|
|
"logits/chosen": 0.15217125415802002,
|
|
"logits/rejected": 0.08081059157848358,
|
|
"logps/chosen": -57.951934814453125,
|
|
"logps/ref_chosen": -56.38677215576172,
|
|
"logps/ref_rejected": -74.56779479980469,
|
|
"logps/rejected": -76.8466567993164,
|
|
"loss": 1.0663,
|
|
"margin_dpo/margin_mean": 0.7136895656585693,
|
|
"margin_dpo/margin_std": 1.1397019624710083,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 1.0411416292190552,
|
|
"fcm_dpo/delta": -0.19544623792171478,
|
|
"fcm_dpo/margin": 0.9813714027404785,
|
|
"fcm_dpo/q_t": 0.3251346945762634,
|
|
"grad_norm": 261.3872985839844,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 0.1957240104675293,
|
|
"logits/rejected": 0.13964782655239105,
|
|
"logps/chosen": -59.139198303222656,
|
|
"logps/ref_chosen": -57.82432556152344,
|
|
"logps/ref_rejected": -89.28246307373047,
|
|
"logps/rejected": -91.57870483398438,
|
|
"loss": 0.9719,
|
|
"margin_dpo/margin_mean": 0.9813716411590576,
|
|
"margin_dpo/margin_std": 1.381603479385376,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 1.0409806966781616,
|
|
"eval_logits/chosen": 0.1611482799053192,
|
|
"eval_logits/rejected": 0.12632358074188232,
|
|
"eval_logps/chosen": -76.3137435913086,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -81.69268035888672,
|
|
"eval_loss": 0.576444149017334,
|
|
"eval_margin_dpo/margin_mean": 0.6894133687019348,
|
|
"eval_margin_dpo/margin_std": 1.306614875793457,
|
|
"eval_runtime": 38.062,
|
|
"eval_samples_per_second": 60.506,
|
|
"eval_steps_per_second": 1.892,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30385487528344673,
|
|
"fcm_dpo/beta": 1.035691499710083,
|
|
"fcm_dpo/delta": -0.036999065428972244,
|
|
"fcm_dpo/margin": 0.8532841205596924,
|
|
"fcm_dpo/q_t": 0.33598366379737854,
|
|
"grad_norm": 237.65528869628906,
|
|
"learning_rate": 4.4065853017905953e-07,
|
|
"logits/chosen": 0.20192654430866241,
|
|
"logits/rejected": 0.16781750321388245,
|
|
"logps/chosen": -60.54228210449219,
|
|
"logps/ref_chosen": -58.999759674072266,
|
|
"logps/ref_rejected": -84.67575073242188,
|
|
"logps/rejected": -87.07154846191406,
|
|
"loss": 0.9362,
|
|
"margin_dpo/margin_mean": 0.8532842397689819,
|
|
"margin_dpo/margin_std": 1.0954551696777344,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.30536659108087677,
|
|
"fcm_dpo/beta": 1.0213491916656494,
|
|
"fcm_dpo/delta": -0.18040968477725983,
|
|
"fcm_dpo/margin": 0.9893622398376465,
|
|
"fcm_dpo/q_t": 0.31719034910202026,
|
|
"grad_norm": 201.83595275878906,
|
|
"learning_rate": 4.3980061644943575e-07,
|
|
"logits/chosen": 0.11923044919967651,
|
|
"logits/rejected": 0.06677506864070892,
|
|
"logps/chosen": -49.09148406982422,
|
|
"logps/ref_chosen": -47.660648345947266,
|
|
"logps/ref_rejected": -73.63249969482422,
|
|
"logps/rejected": -76.05269622802734,
|
|
"loss": 0.8957,
|
|
"margin_dpo/margin_mean": 0.9893627166748047,
|
|
"margin_dpo/margin_std": 1.2042524814605713,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.30687830687830686,
|
|
"fcm_dpo/beta": 1.0198414325714111,
|
|
"fcm_dpo/delta": 0.10756812244653702,
|
|
"fcm_dpo/margin": 0.73426353931427,
|
|
"fcm_dpo/q_t": 0.36804383993148804,
|
|
"grad_norm": 288.4985046386719,
|
|
"learning_rate": 4.3893739358856455e-07,
|
|
"logits/chosen": 0.20233526825904846,
|
|
"logits/rejected": 0.15018996596336365,
|
|
"logps/chosen": -63.7635498046875,
|
|
"logps/ref_chosen": -62.32553482055664,
|
|
"logps/ref_rejected": -99.37226104736328,
|
|
"logps/rejected": -101.54454040527344,
|
|
"loss": 1.1316,
|
|
"margin_dpo/margin_mean": 0.7342634797096252,
|
|
"margin_dpo/margin_std": 1.341469407081604,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.30839002267573695,
|
|
"fcm_dpo/beta": 1.0091928243637085,
|
|
"fcm_dpo/delta": 0.06542906165122986,
|
|
"fcm_dpo/margin": 0.7767828702926636,
|
|
"fcm_dpo/q_t": 0.3685183525085449,
|
|
"grad_norm": 244.5875244140625,
|
|
"learning_rate": 4.380688857426449e-07,
|
|
"logits/chosen": 0.1188371554017067,
|
|
"logits/rejected": 0.0663602203130722,
|
|
"logps/chosen": -52.25553894042969,
|
|
"logps/ref_chosen": -50.62931823730469,
|
|
"logps/ref_rejected": -66.60475158691406,
|
|
"logps/rejected": -69.00775909423828,
|
|
"loss": 1.1436,
|
|
"margin_dpo/margin_mean": 0.7767831087112427,
|
|
"margin_dpo/margin_std": 1.4177677631378174,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 1.0608057975769043,
|
|
"fcm_dpo/delta": 0.2543222904205322,
|
|
"fcm_dpo/margin": 0.57657790184021,
|
|
"fcm_dpo/q_t": 0.3946090340614319,
|
|
"grad_norm": 368.377197265625,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 0.18162226676940918,
|
|
"logits/rejected": 0.1382521241903305,
|
|
"logps/chosen": -72.12284851074219,
|
|
"logps/ref_chosen": -70.3561782836914,
|
|
"logps/ref_rejected": -93.39848327636719,
|
|
"logps/rejected": -95.7417221069336,
|
|
"loss": 1.3038,
|
|
"margin_dpo/margin_mean": 0.5765775442123413,
|
|
"margin_dpo/margin_std": 1.4098232984542847,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31141345427059713,
|
|
"fcm_dpo/beta": 1.1130549907684326,
|
|
"fcm_dpo/delta": 0.04998175799846649,
|
|
"fcm_dpo/margin": 0.7163587212562561,
|
|
"fcm_dpo/q_t": 0.36462822556495667,
|
|
"grad_norm": 297.85198974609375,
|
|
"learning_rate": 4.363161124189387e-07,
|
|
"logits/chosen": 0.17337989807128906,
|
|
"logits/rejected": 0.16002798080444336,
|
|
"logps/chosen": -69.37913513183594,
|
|
"logps/ref_chosen": -67.64547729492188,
|
|
"logps/ref_rejected": -79.89584350585938,
|
|
"logps/rejected": -82.34585571289062,
|
|
"loss": 1.1734,
|
|
"margin_dpo/margin_mean": 0.7163584232330322,
|
|
"margin_dpo/margin_std": 1.320636510848999,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3129251700680272,
|
|
"fcm_dpo/beta": 1.0816932916641235,
|
|
"fcm_dpo/delta": 0.017237961292266846,
|
|
"fcm_dpo/margin": 0.7667139768600464,
|
|
"fcm_dpo/q_t": 0.3583172559738159,
|
|
"grad_norm": 258.2594299316406,
|
|
"learning_rate": 4.3543189596998986e-07,
|
|
"logits/chosen": 0.1258050948381424,
|
|
"logits/rejected": 0.07419107109308243,
|
|
"logps/chosen": -69.36555480957031,
|
|
"logps/ref_chosen": -67.66419219970703,
|
|
"logps/ref_rejected": -85.10249328613281,
|
|
"logps/rejected": -87.57057189941406,
|
|
"loss": 1.0501,
|
|
"margin_dpo/margin_mean": 0.7667145729064941,
|
|
"margin_dpo/margin_std": 1.2524394989013672,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3144368858654573,
|
|
"fcm_dpo/beta": 1.1183842420578003,
|
|
"fcm_dpo/delta": 0.107530876994133,
|
|
"fcm_dpo/margin": 0.6710580587387085,
|
|
"fcm_dpo/q_t": 0.37669068574905396,
|
|
"grad_norm": 279.748291015625,
|
|
"learning_rate": 4.3454249259229664e-07,
|
|
"logits/chosen": 0.1450473964214325,
|
|
"logits/rejected": 0.12448123842477798,
|
|
"logps/chosen": -59.174312591552734,
|
|
"logps/ref_chosen": -57.731712341308594,
|
|
"logps/ref_rejected": -74.19276428222656,
|
|
"logps/rejected": -76.30642700195312,
|
|
"loss": 1.1878,
|
|
"margin_dpo/margin_mean": 0.6710573434829712,
|
|
"margin_dpo/margin_std": 1.319955825805664,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.31594860166288735,
|
|
"fcm_dpo/beta": 1.116701364517212,
|
|
"fcm_dpo/delta": -0.21553626656532288,
|
|
"fcm_dpo/margin": 0.928307294845581,
|
|
"fcm_dpo/q_t": 0.33162564039230347,
|
|
"grad_norm": 317.8649597167969,
|
|
"learning_rate": 4.336479271643833e-07,
|
|
"logits/chosen": 0.1369079351425171,
|
|
"logits/rejected": 0.09211073815822601,
|
|
"logps/chosen": -70.18434143066406,
|
|
"logps/ref_chosen": -68.55007934570312,
|
|
"logps/ref_rejected": -87.90541076660156,
|
|
"logps/rejected": -90.46798706054688,
|
|
"loss": 1.0438,
|
|
"margin_dpo/margin_mean": 0.9283081293106079,
|
|
"margin_dpo/margin_std": 1.3755784034729004,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 1.0468344688415527,
|
|
"fcm_dpo/delta": -0.23611387610435486,
|
|
"fcm_dpo/margin": 1.014129877090454,
|
|
"fcm_dpo/q_t": 0.32915377616882324,
|
|
"grad_norm": 255.683349609375,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 0.17080672085285187,
|
|
"logits/rejected": 0.10357716679573059,
|
|
"logps/chosen": -58.91559600830078,
|
|
"logps/ref_chosen": -57.268272399902344,
|
|
"logps/ref_rejected": -85.72807312011719,
|
|
"logps/rejected": -88.38953399658203,
|
|
"loss": 0.98,
|
|
"margin_dpo/margin_mean": 1.0141295194625854,
|
|
"margin_dpo/margin_std": 1.424011468887329,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.31897203325774753,
|
|
"fcm_dpo/beta": 1.031850814819336,
|
|
"fcm_dpo/delta": 0.012154560536146164,
|
|
"fcm_dpo/margin": 0.8127963542938232,
|
|
"fcm_dpo/q_t": 0.34257203340530396,
|
|
"grad_norm": 256.3596496582031,
|
|
"learning_rate": 4.3184341039326217e-07,
|
|
"logits/chosen": 0.20410758256912231,
|
|
"logits/rejected": 0.1363191306591034,
|
|
"logps/chosen": -55.072418212890625,
|
|
"logps/ref_chosen": -53.640708923339844,
|
|
"logps/ref_rejected": -93.0387954711914,
|
|
"logps/rejected": -95.28330993652344,
|
|
"loss": 0.9539,
|
|
"margin_dpo/margin_mean": 0.8127955198287964,
|
|
"margin_dpo/margin_std": 1.0826678276062012,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.3204837490551776,
|
|
"fcm_dpo/beta": 1.0302294492721558,
|
|
"fcm_dpo/delta": -0.022576171904802322,
|
|
"fcm_dpo/margin": 0.8448600172996521,
|
|
"fcm_dpo/q_t": 0.34546002745628357,
|
|
"grad_norm": 255.3373565673828,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 0.1806902289390564,
|
|
"logits/rejected": 0.12561562657356262,
|
|
"logps/chosen": -59.00420379638672,
|
|
"logps/ref_chosen": -57.36674499511719,
|
|
"logps/ref_rejected": -79.89643096923828,
|
|
"logps/rejected": -82.37875366210938,
|
|
"loss": 1.012,
|
|
"margin_dpo/margin_mean": 0.8448594212532043,
|
|
"margin_dpo/margin_std": 1.2606749534606934,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.3219954648526077,
|
|
"fcm_dpo/beta": 1.004958152770996,
|
|
"fcm_dpo/delta": -0.09056046605110168,
|
|
"fcm_dpo/margin": 0.9262406826019287,
|
|
"fcm_dpo/q_t": 0.3364323675632477,
|
|
"grad_norm": 259.2325134277344,
|
|
"learning_rate": 4.3001854756006724e-07,
|
|
"logits/chosen": 0.19759291410446167,
|
|
"logits/rejected": 0.1763480305671692,
|
|
"logps/chosen": -66.59672546386719,
|
|
"logps/ref_chosen": -65.22111511230469,
|
|
"logps/ref_rejected": -80.1810302734375,
|
|
"logps/rejected": -82.48287963867188,
|
|
"loss": 1.0256,
|
|
"margin_dpo/margin_mean": 0.9262403249740601,
|
|
"margin_dpo/margin_std": 1.395569086074829,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3235071806500378,
|
|
"fcm_dpo/beta": 1.0033698081970215,
|
|
"fcm_dpo/delta": -0.0978882685303688,
|
|
"fcm_dpo/margin": 0.9341999292373657,
|
|
"fcm_dpo/q_t": 0.33533668518066406,
|
|
"grad_norm": 262.6483154296875,
|
|
"learning_rate": 4.290985500881143e-07,
|
|
"logits/chosen": 0.0739990621805191,
|
|
"logits/rejected": 0.0537070631980896,
|
|
"logps/chosen": -62.82872772216797,
|
|
"logps/ref_chosen": -61.292327880859375,
|
|
"logps/ref_rejected": -67.69841003417969,
|
|
"logps/rejected": -70.16900634765625,
|
|
"loss": 0.983,
|
|
"margin_dpo/margin_mean": 0.9342003464698792,
|
|
"margin_dpo/margin_std": 1.3554797172546387,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.9648710489273071,
|
|
"fcm_dpo/delta": -0.2038130909204483,
|
|
"fcm_dpo/margin": 1.0707131624221802,
|
|
"fcm_dpo/q_t": 0.3213546872138977,
|
|
"grad_norm": 233.17303466796875,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 0.08917293697595596,
|
|
"logits/rejected": 0.014121760614216328,
|
|
"logps/chosen": -65.3584213256836,
|
|
"logps/ref_chosen": -63.869136810302734,
|
|
"logps/ref_rejected": -98.7657241821289,
|
|
"logps/rejected": -101.32572174072266,
|
|
"loss": 0.9073,
|
|
"margin_dpo/margin_mean": 1.0707132816314697,
|
|
"margin_dpo/margin_std": 1.3552508354187012,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.32653061224489793,
|
|
"fcm_dpo/beta": 0.9256983995437622,
|
|
"fcm_dpo/delta": -0.2675190567970276,
|
|
"fcm_dpo/margin": 1.174886703491211,
|
|
"fcm_dpo/q_t": 0.3485989570617676,
|
|
"grad_norm": 239.40997314453125,
|
|
"learning_rate": 4.2724355170431247e-07,
|
|
"logits/chosen": 0.2045373022556305,
|
|
"logits/rejected": 0.13870683312416077,
|
|
"logps/chosen": -69.51454162597656,
|
|
"logps/ref_chosen": -67.824951171875,
|
|
"logps/ref_rejected": -96.40231323242188,
|
|
"logps/rejected": -99.26679229736328,
|
|
"loss": 1.083,
|
|
"margin_dpo/margin_mean": 1.174886703491211,
|
|
"margin_dpo/margin_std": 2.2862184047698975,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.328042328042328,
|
|
"fcm_dpo/beta": 0.8413063883781433,
|
|
"fcm_dpo/delta": -0.3993903398513794,
|
|
"fcm_dpo/margin": 1.4241771697998047,
|
|
"fcm_dpo/q_t": 0.2878588140010834,
|
|
"grad_norm": 174.88531494140625,
|
|
"learning_rate": 4.26308602680756e-07,
|
|
"logits/chosen": 0.14676231145858765,
|
|
"logits/rejected": 0.06052399426698685,
|
|
"logps/chosen": -62.13254165649414,
|
|
"logps/ref_chosen": -60.5049934387207,
|
|
"logps/ref_rejected": -84.26618194580078,
|
|
"logps/rejected": -87.31790924072266,
|
|
"loss": 0.7768,
|
|
"margin_dpo/margin_mean": 1.4241769313812256,
|
|
"margin_dpo/margin_std": 1.5880484580993652,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3295540438397581,
|
|
"fcm_dpo/beta": 0.8780190944671631,
|
|
"fcm_dpo/delta": 0.30152568221092224,
|
|
"fcm_dpo/margin": 0.6365560293197632,
|
|
"fcm_dpo/q_t": 0.3970625102519989,
|
|
"grad_norm": 239.61326599121094,
|
|
"learning_rate": 4.253687219265803e-07,
|
|
"logits/chosen": 0.03338109701871872,
|
|
"logits/rejected": 0.026734884828329086,
|
|
"logps/chosen": -72.37188720703125,
|
|
"logps/ref_chosen": -70.59431457519531,
|
|
"logps/ref_rejected": -73.89038848876953,
|
|
"logps/rejected": -76.30451965332031,
|
|
"loss": 1.2469,
|
|
"margin_dpo/margin_mean": 0.6365566253662109,
|
|
"margin_dpo/margin_std": 1.4416258335113525,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.3310657596371882,
|
|
"fcm_dpo/beta": 0.9143224954605103,
|
|
"fcm_dpo/delta": 0.2528460621833801,
|
|
"fcm_dpo/margin": 0.670140266418457,
|
|
"fcm_dpo/q_t": 0.380919486284256,
|
|
"grad_norm": 221.69932556152344,
|
|
"learning_rate": 4.2442393573227043e-07,
|
|
"logits/chosen": 0.10697716474533081,
|
|
"logits/rejected": 0.07464110851287842,
|
|
"logps/chosen": -62.023380279541016,
|
|
"logps/ref_chosen": -60.490943908691406,
|
|
"logps/ref_rejected": -75.85001373291016,
|
|
"logps/rejected": -78.0525894165039,
|
|
"loss": 1.0959,
|
|
"margin_dpo/margin_mean": 0.6701398491859436,
|
|
"margin_dpo/margin_std": 1.143141269683838,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.9308043718338013,
|
|
"fcm_dpo/delta": 0.06596644967794418,
|
|
"fcm_dpo/margin": 0.8476877212524414,
|
|
"fcm_dpo/q_t": 0.3605768382549286,
|
|
"grad_norm": 188.4651641845703,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 0.18505269289016724,
|
|
"logits/rejected": 0.13934630155563354,
|
|
"logps/chosen": -46.70742416381836,
|
|
"logps/ref_chosen": -45.013397216796875,
|
|
"logps/ref_rejected": -70.49369812011719,
|
|
"logps/rejected": -73.03541564941406,
|
|
"loss": 1.0733,
|
|
"margin_dpo/margin_mean": 0.8476879596710205,
|
|
"margin_dpo/margin_std": 1.3994786739349365,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3340891912320484,
|
|
"fcm_dpo/beta": 0.9400441646575928,
|
|
"fcm_dpo/delta": -0.04620972275733948,
|
|
"fcm_dpo/margin": 0.948039174079895,
|
|
"fcm_dpo/q_t": 0.3494917154312134,
|
|
"grad_norm": 225.3359832763672,
|
|
"learning_rate": 4.22519752870528e-07,
|
|
"logits/chosen": 0.1869061291217804,
|
|
"logits/rejected": 0.13732600212097168,
|
|
"logps/chosen": -60.603050231933594,
|
|
"logps/ref_chosen": -59.09584045410156,
|
|
"logps/ref_rejected": -88.64388275146484,
|
|
"logps/rejected": -91.09913635253906,
|
|
"loss": 1.0056,
|
|
"margin_dpo/margin_mean": 0.948039174079895,
|
|
"margin_dpo/margin_std": 1.4668171405792236,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.3356009070294785,
|
|
"fcm_dpo/beta": 0.9103920459747314,
|
|
"fcm_dpo/delta": -0.15236330032348633,
|
|
"fcm_dpo/margin": 1.084613561630249,
|
|
"fcm_dpo/q_t": 0.31708869338035583,
|
|
"grad_norm": 218.53720092773438,
|
|
"learning_rate": 4.2156040946718343e-07,
|
|
"logits/chosen": 0.20063142478466034,
|
|
"logits/rejected": 0.1383713036775589,
|
|
"logps/chosen": -57.69280242919922,
|
|
"logps/ref_chosen": -55.9976921081543,
|
|
"logps/ref_rejected": -111.94727325439453,
|
|
"logps/rejected": -114.72698974609375,
|
|
"loss": 0.9187,
|
|
"margin_dpo/margin_mean": 1.0846132040023804,
|
|
"margin_dpo/margin_std": 1.366696834564209,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3371126228269085,
|
|
"fcm_dpo/beta": 0.8663169741630554,
|
|
"fcm_dpo/delta": -0.21477438509464264,
|
|
"fcm_dpo/margin": 1.199501633644104,
|
|
"fcm_dpo/q_t": 0.30882522463798523,
|
|
"grad_norm": 179.276123046875,
|
|
"learning_rate": 4.2059626715039065e-07,
|
|
"logits/chosen": 0.18869906663894653,
|
|
"logits/rejected": 0.14491060376167297,
|
|
"logps/chosen": -61.44518280029297,
|
|
"logps/ref_chosen": -59.891422271728516,
|
|
"logps/ref_rejected": -86.28954315185547,
|
|
"logps/rejected": -89.04280090332031,
|
|
"loss": 0.8567,
|
|
"margin_dpo/margin_mean": 1.1995023488998413,
|
|
"margin_dpo/margin_std": 1.3853929042816162,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.3386243386243386,
|
|
"fcm_dpo/beta": 0.8999690413475037,
|
|
"fcm_dpo/delta": 0.2813396155834198,
|
|
"fcm_dpo/margin": 0.6505717635154724,
|
|
"fcm_dpo/q_t": 0.38824737071990967,
|
|
"grad_norm": 239.49497985839844,
|
|
"learning_rate": 4.1962735288928304e-07,
|
|
"logits/chosen": 0.19951647520065308,
|
|
"logits/rejected": 0.1848064661026001,
|
|
"logps/chosen": -65.77809143066406,
|
|
"logps/ref_chosen": -64.04463195800781,
|
|
"logps/ref_rejected": -75.05450439453125,
|
|
"logps/rejected": -77.43852233886719,
|
|
"loss": 1.0938,
|
|
"margin_dpo/margin_mean": 0.6505719423294067,
|
|
"margin_dpo/margin_std": 1.119499683380127,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.8856638669967651,
|
|
"fcm_dpo/delta": -0.1330086588859558,
|
|
"fcm_dpo/margin": 1.0904693603515625,
|
|
"fcm_dpo/q_t": 0.33212995529174805,
|
|
"grad_norm": 278.0043640136719,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 0.1784878522157669,
|
|
"logits/rejected": 0.09040804207324982,
|
|
"logps/chosen": -67.76234436035156,
|
|
"logps/ref_chosen": -66.0958251953125,
|
|
"logps/ref_rejected": -97.68675231933594,
|
|
"logps/rejected": -100.4437255859375,
|
|
"loss": 0.9711,
|
|
"margin_dpo/margin_mean": 1.0904691219329834,
|
|
"margin_dpo/margin_std": 1.5158851146697998,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3416477702191988,
|
|
"fcm_dpo/beta": 0.888001561164856,
|
|
"fcm_dpo/delta": 0.01624445617198944,
|
|
"fcm_dpo/margin": 0.939960777759552,
|
|
"fcm_dpo/q_t": 0.35010144114494324,
|
|
"grad_norm": 175.6317138671875,
|
|
"learning_rate": 4.176753170773052e-07,
|
|
"logits/chosen": 0.1665615737438202,
|
|
"logits/rejected": 0.1313961148262024,
|
|
"logps/chosen": -53.05473327636719,
|
|
"logps/ref_chosen": -51.4168701171875,
|
|
"logps/ref_rejected": -66.30068969726562,
|
|
"logps/rejected": -68.87850952148438,
|
|
"loss": 1.02,
|
|
"margin_dpo/margin_mean": 0.9399595260620117,
|
|
"margin_dpo/margin_std": 1.428729772567749,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3431594860166289,
|
|
"fcm_dpo/beta": 0.9011565446853638,
|
|
"fcm_dpo/delta": 0.07179167866706848,
|
|
"fcm_dpo/margin": 0.8698738217353821,
|
|
"fcm_dpo/q_t": 0.3623507022857666,
|
|
"grad_norm": 259.9079895019531,
|
|
"learning_rate": 4.166922501290729e-07,
|
|
"logits/chosen": 0.20261543989181519,
|
|
"logits/rejected": 0.1713910549879074,
|
|
"logps/chosen": -59.633522033691406,
|
|
"logps/ref_chosen": -57.989776611328125,
|
|
"logps/ref_rejected": -75.05464172363281,
|
|
"logps/rejected": -77.5682601928711,
|
|
"loss": 1.1078,
|
|
"margin_dpo/margin_mean": 0.8698740005493164,
|
|
"margin_dpo/margin_std": 1.5209307670593262,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.34467120181405897,
|
|
"fcm_dpo/beta": 0.905102014541626,
|
|
"fcm_dpo/delta": -0.021704578772187233,
|
|
"fcm_dpo/margin": 0.9609760046005249,
|
|
"fcm_dpo/q_t": 0.34984922409057617,
|
|
"grad_norm": 212.20846557617188,
|
|
"learning_rate": 4.1570452044027405e-07,
|
|
"logits/chosen": 0.202199786901474,
|
|
"logits/rejected": 0.14202827215194702,
|
|
"logps/chosen": -57.361549377441406,
|
|
"logps/ref_chosen": -55.55936813354492,
|
|
"logps/ref_rejected": -77.02364349365234,
|
|
"logps/rejected": -79.78680419921875,
|
|
"loss": 1.0938,
|
|
"margin_dpo/margin_mean": 0.9609757661819458,
|
|
"margin_dpo/margin_std": 1.6258370876312256,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.34618291761148906,
|
|
"fcm_dpo/beta": 0.9085903167724609,
|
|
"fcm_dpo/delta": 0.06304832547903061,
|
|
"fcm_dpo/margin": 0.8716516494750977,
|
|
"fcm_dpo/q_t": 0.35091277956962585,
|
|
"grad_norm": 457.2859802246094,
|
|
"learning_rate": 4.147121556398312e-07,
|
|
"logits/chosen": 0.2513842284679413,
|
|
"logits/rejected": 0.20943355560302734,
|
|
"logps/chosen": -52.35901641845703,
|
|
"logps/ref_chosen": -50.79466247558594,
|
|
"logps/ref_rejected": -78.4474105834961,
|
|
"logps/rejected": -80.88341522216797,
|
|
"loss": 1.0559,
|
|
"margin_dpo/margin_mean": 0.8716517090797424,
|
|
"margin_dpo/margin_std": 1.377872109413147,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.9284258484840393,
|
|
"fcm_dpo/delta": -0.008764654397964478,
|
|
"fcm_dpo/margin": 0.9182890057563782,
|
|
"fcm_dpo/q_t": 0.3512055575847626,
|
|
"grad_norm": 235.49261474609375,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 0.13936060667037964,
|
|
"logits/rejected": 0.1368224024772644,
|
|
"logps/chosen": -58.469757080078125,
|
|
"logps/ref_chosen": -56.729225158691406,
|
|
"logps/ref_rejected": -62.99180603027344,
|
|
"logps/rejected": -65.65062713623047,
|
|
"loss": 1.095,
|
|
"margin_dpo/margin_mean": 0.9182896614074707,
|
|
"margin_dpo/margin_std": 1.5704759359359741,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"fcm_dpo/beta": 0.8553475141525269,
|
|
"fcm_dpo/delta": -0.3350568413734436,
|
|
"fcm_dpo/margin": 1.3306918144226074,
|
|
"fcm_dpo/q_t": 0.2868633270263672,
|
|
"grad_norm": 190.81130981445312,
|
|
"learning_rate": 4.1271363186719835e-07,
|
|
"logits/chosen": 0.12404580414295197,
|
|
"logits/rejected": 0.10656890273094177,
|
|
"logps/chosen": -74.32974243164062,
|
|
"logps/ref_chosen": -72.59709930419922,
|
|
"logps/ref_rejected": -86.2322998046875,
|
|
"logps/rejected": -89.29563903808594,
|
|
"loss": 0.7918,
|
|
"margin_dpo/margin_mean": 1.3306909799575806,
|
|
"margin_dpo/margin_std": 1.3364955186843872,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3507180650037793,
|
|
"fcm_dpo/beta": 0.8403864502906799,
|
|
"fcm_dpo/delta": -0.048685502260923386,
|
|
"fcm_dpo/margin": 1.0624288320541382,
|
|
"fcm_dpo/q_t": 0.3467335104942322,
|
|
"grad_norm": 216.3763427734375,
|
|
"learning_rate": 4.1170752879801436e-07,
|
|
"logits/chosen": 0.1653335839509964,
|
|
"logits/rejected": 0.14129364490509033,
|
|
"logps/chosen": -69.74419403076172,
|
|
"logps/ref_chosen": -68.1185302734375,
|
|
"logps/ref_rejected": -83.79415893554688,
|
|
"logps/rejected": -86.48225402832031,
|
|
"loss": 1.0269,
|
|
"margin_dpo/margin_mean": 1.0624287128448486,
|
|
"margin_dpo/margin_std": 1.6378672122955322,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.35222978080120937,
|
|
"fcm_dpo/beta": 0.8553488254547119,
|
|
"fcm_dpo/delta": 0.21577247977256775,
|
|
"fcm_dpo/margin": 0.7533851265907288,
|
|
"fcm_dpo/q_t": 0.3966567814350128,
|
|
"grad_norm": 225.2250213623047,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": 0.1627584844827652,
|
|
"logits/rejected": 0.11888204514980316,
|
|
"logps/chosen": -57.22935485839844,
|
|
"logps/ref_chosen": -55.070152282714844,
|
|
"logps/ref_rejected": -66.61845397949219,
|
|
"logps/rejected": -69.53103637695312,
|
|
"loss": 1.1841,
|
|
"margin_dpo/margin_mean": 0.7533849477767944,
|
|
"margin_dpo/margin_std": 1.5512669086456299,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.35374149659863946,
|
|
"fcm_dpo/beta": 0.9342302083969116,
|
|
"fcm_dpo/delta": 0.32500237226486206,
|
|
"fcm_dpo/margin": 0.5758814811706543,
|
|
"fcm_dpo/q_t": 0.4090895652770996,
|
|
"grad_norm": 245.4609375,
|
|
"learning_rate": 4.09681781007452e-07,
|
|
"logits/chosen": 0.09976236522197723,
|
|
"logits/rejected": 0.08703955262899399,
|
|
"logps/chosen": -57.818267822265625,
|
|
"logps/ref_chosen": -55.92589569091797,
|
|
"logps/ref_rejected": -51.11608123779297,
|
|
"logps/rejected": -53.584327697753906,
|
|
"loss": 1.3011,
|
|
"margin_dpo/margin_mean": 0.575881838798523,
|
|
"margin_dpo/margin_std": 1.4566171169281006,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.889824628829956,
|
|
"fcm_dpo/delta": -0.34530460834503174,
|
|
"fcm_dpo/margin": 1.2960200309753418,
|
|
"fcm_dpo/q_t": 0.27998316287994385,
|
|
"grad_norm": 195.18954467773438,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 0.17580223083496094,
|
|
"logits/rejected": 0.15976448357105255,
|
|
"logps/chosen": -66.0561752319336,
|
|
"logps/ref_chosen": -64.53972625732422,
|
|
"logps/ref_rejected": -77.69151306152344,
|
|
"logps/rejected": -80.50398254394531,
|
|
"loss": 0.7405,
|
|
"margin_dpo/margin_mean": 1.2960199117660522,
|
|
"margin_dpo/margin_std": 1.2017241716384888,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.35676492819349964,
|
|
"fcm_dpo/beta": 0.8656524419784546,
|
|
"fcm_dpo/delta": -0.07761366665363312,
|
|
"fcm_dpo/margin": 1.0628429651260376,
|
|
"fcm_dpo/q_t": 0.34958380460739136,
|
|
"grad_norm": 225.35562133789062,
|
|
"learning_rate": 4.076381667711306e-07,
|
|
"logits/chosen": 0.11892463266849518,
|
|
"logits/rejected": 0.1088462769985199,
|
|
"logps/chosen": -73.25985717773438,
|
|
"logps/ref_chosen": -71.15473937988281,
|
|
"logps/ref_rejected": -84.88541412353516,
|
|
"logps/rejected": -88.05337524414062,
|
|
"loss": 1.0714,
|
|
"margin_dpo/margin_mean": 1.062842607498169,
|
|
"margin_dpo/margin_std": 1.7734475135803223,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.35827664399092973,
|
|
"fcm_dpo/beta": 0.858435869216919,
|
|
"fcm_dpo/delta": 0.0019276365637779236,
|
|
"fcm_dpo/margin": 0.986770510673523,
|
|
"fcm_dpo/q_t": 0.3497011661529541,
|
|
"grad_norm": 243.37867736816406,
|
|
"learning_rate": 4.066097311132753e-07,
|
|
"logits/chosen": 0.18724274635314941,
|
|
"logits/rejected": 0.17509755492210388,
|
|
"logps/chosen": -77.92250061035156,
|
|
"logps/ref_chosen": -76.14201354980469,
|
|
"logps/ref_rejected": -80.88479614257812,
|
|
"logps/rejected": -83.65205383300781,
|
|
"loss": 1.0883,
|
|
"margin_dpo/margin_mean": 0.9867702722549438,
|
|
"margin_dpo/margin_std": 1.6136043071746826,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.35978835978835977,
|
|
"fcm_dpo/beta": 0.8781789541244507,
|
|
"fcm_dpo/delta": 0.06022172421216965,
|
|
"fcm_dpo/margin": 0.9036314487457275,
|
|
"fcm_dpo/q_t": 0.3595254719257355,
|
|
"grad_norm": 921.947021484375,
|
|
"learning_rate": 4.0557691474458414e-07,
|
|
"logits/chosen": 0.15353929996490479,
|
|
"logits/rejected": 0.13579578697681427,
|
|
"logps/chosen": -70.63078308105469,
|
|
"logps/ref_chosen": -68.88484954833984,
|
|
"logps/ref_rejected": -75.8946304321289,
|
|
"logps/rejected": -78.54420471191406,
|
|
"loss": 1.0725,
|
|
"margin_dpo/margin_mean": 0.903631329536438,
|
|
"margin_dpo/margin_std": 1.4686760902404785,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.36130007558578986,
|
|
"fcm_dpo/beta": 0.8707563877105713,
|
|
"fcm_dpo/delta": -0.0933084636926651,
|
|
"fcm_dpo/margin": 1.0726871490478516,
|
|
"fcm_dpo/q_t": 0.33421483635902405,
|
|
"grad_norm": 247.51217651367188,
|
|
"learning_rate": 4.045397465551513e-07,
|
|
"logits/chosen": 0.24642007052898407,
|
|
"logits/rejected": 0.16015931963920593,
|
|
"logps/chosen": -58.81086730957031,
|
|
"logps/ref_chosen": -56.771827697753906,
|
|
"logps/ref_rejected": -116.23050689697266,
|
|
"logps/rejected": -119.34222412109375,
|
|
"loss": 1.0295,
|
|
"margin_dpo/margin_mean": 1.072688341140747,
|
|
"margin_dpo/margin_std": 1.598010540008545,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.8458698987960815,
|
|
"fcm_dpo/delta": -0.2129267305135727,
|
|
"fcm_dpo/margin": 1.2287921905517578,
|
|
"fcm_dpo/q_t": 0.31920289993286133,
|
|
"grad_norm": 178.1742401123047,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 0.14184461534023285,
|
|
"logits/rejected": 0.0741148293018341,
|
|
"logps/chosen": -55.44900894165039,
|
|
"logps/ref_chosen": -53.35411071777344,
|
|
"logps/ref_rejected": -80.12019348144531,
|
|
"logps/rejected": -83.44389343261719,
|
|
"loss": 0.9645,
|
|
"margin_dpo/margin_mean": 1.2287919521331787,
|
|
"margin_dpo/margin_std": 1.6595501899719238,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.36432350718065004,
|
|
"fcm_dpo/beta": 0.8800208568572998,
|
|
"fcm_dpo/delta": 0.3664669394493103,
|
|
"fcm_dpo/margin": 0.5624747276306152,
|
|
"fcm_dpo/q_t": 0.4117809534072876,
|
|
"grad_norm": 292.58184814453125,
|
|
"learning_rate": 4.0245247088227377e-07,
|
|
"logits/chosen": 0.12403697520494461,
|
|
"logits/rejected": 0.09658272564411163,
|
|
"logps/chosen": -73.85459899902344,
|
|
"logps/ref_chosen": -71.89541625976562,
|
|
"logps/ref_rejected": -83.03492736816406,
|
|
"logps/rejected": -85.55657958984375,
|
|
"loss": 1.2604,
|
|
"margin_dpo/margin_mean": 0.5624747276306152,
|
|
"margin_dpo/margin_std": 1.3676249980926514,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.36583522297808013,
|
|
"fcm_dpo/beta": 0.8379240036010742,
|
|
"fcm_dpo/delta": -0.24741961061954498,
|
|
"fcm_dpo/margin": 1.2643775939941406,
|
|
"fcm_dpo/q_t": 0.31461301445961,
|
|
"grad_norm": 174.297119140625,
|
|
"learning_rate": 4.0140242178441665e-07,
|
|
"logits/chosen": 0.12286406755447388,
|
|
"logits/rejected": 0.10437546670436859,
|
|
"logps/chosen": -59.590065002441406,
|
|
"logps/ref_chosen": -57.927433013916016,
|
|
"logps/ref_rejected": -67.838623046875,
|
|
"logps/rejected": -70.765625,
|
|
"loss": 0.8866,
|
|
"margin_dpo/margin_mean": 1.2643779516220093,
|
|
"margin_dpo/margin_std": 1.5288593769073486,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3673469387755102,
|
|
"fcm_dpo/beta": 0.8361135125160217,
|
|
"fcm_dpo/delta": -0.02479562908411026,
|
|
"fcm_dpo/margin": 1.042922854423523,
|
|
"fcm_dpo/q_t": 0.3457787334918976,
|
|
"grad_norm": 210.47018432617188,
|
|
"learning_rate": 4.003481376353596e-07,
|
|
"logits/chosen": 0.1957082599401474,
|
|
"logits/rejected": 0.19097524881362915,
|
|
"logps/chosen": -76.08808898925781,
|
|
"logps/ref_chosen": -74.27667236328125,
|
|
"logps/ref_rejected": -73.24340057373047,
|
|
"logps/rejected": -76.09774017333984,
|
|
"loss": 0.9948,
|
|
"margin_dpo/margin_mean": 1.0429233312606812,
|
|
"margin_dpo/margin_std": 1.5180702209472656,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.3688586545729403,
|
|
"fcm_dpo/beta": 0.8155351877212524,
|
|
"fcm_dpo/delta": -0.20803546905517578,
|
|
"fcm_dpo/margin": 1.2712355852127075,
|
|
"fcm_dpo/q_t": 0.3006499111652374,
|
|
"grad_norm": 164.11488342285156,
|
|
"learning_rate": 3.9928964792569654e-07,
|
|
"logits/chosen": 0.18668177723884583,
|
|
"logits/rejected": 0.11918094754219055,
|
|
"logps/chosen": -55.324073791503906,
|
|
"logps/ref_chosen": -53.36390686035156,
|
|
"logps/ref_rejected": -71.10276794433594,
|
|
"logps/rejected": -74.33417510986328,
|
|
"loss": 0.8009,
|
|
"margin_dpo/margin_mean": 1.2712348699569702,
|
|
"margin_dpo/margin_std": 1.3225237131118774,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.7821507453918457,
|
|
"fcm_dpo/delta": -0.22865894436836243,
|
|
"fcm_dpo/margin": 1.3489384651184082,
|
|
"fcm_dpo/q_t": 0.3079974055290222,
|
|
"grad_norm": 259.376220703125,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 0.20032089948654175,
|
|
"logits/rejected": 0.17909392714500427,
|
|
"logps/chosen": -73.11966705322266,
|
|
"logps/ref_chosen": -71.19510650634766,
|
|
"logps/ref_rejected": -80.76235961914062,
|
|
"logps/rejected": -84.03585815429688,
|
|
"loss": 0.912,
|
|
"margin_dpo/margin_mean": 1.3489389419555664,
|
|
"margin_dpo/margin_std": 1.7213486433029175,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.37188208616780044,
|
|
"fcm_dpo/beta": 0.7464067935943604,
|
|
"fcm_dpo/delta": -0.1680677980184555,
|
|
"fcm_dpo/margin": 1.3396670818328857,
|
|
"fcm_dpo/q_t": 0.32120266556739807,
|
|
"grad_norm": 182.52078247070312,
|
|
"learning_rate": 3.971601703742932e-07,
|
|
"logits/chosen": 0.19234727323055267,
|
|
"logits/rejected": 0.14638212323188782,
|
|
"logps/chosen": -73.82595825195312,
|
|
"logps/ref_chosen": -71.62104797363281,
|
|
"logps/ref_rejected": -94.03392028808594,
|
|
"logps/rejected": -97.5784912109375,
|
|
"loss": 0.9033,
|
|
"margin_dpo/margin_mean": 1.3396670818328857,
|
|
"margin_dpo/margin_std": 1.6984140872955322,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.37339380196523053,
|
|
"fcm_dpo/beta": 0.7954495549201965,
|
|
"fcm_dpo/delta": 0.36253783106803894,
|
|
"fcm_dpo/margin": 0.6254063844680786,
|
|
"fcm_dpo/q_t": 0.40435123443603516,
|
|
"grad_norm": 241.6053924560547,
|
|
"learning_rate": 3.960892420986177e-07,
|
|
"logits/chosen": 0.1911957561969757,
|
|
"logits/rejected": 0.1811983436346054,
|
|
"logps/chosen": -82.40559387207031,
|
|
"logps/ref_chosen": -80.02254486083984,
|
|
"logps/ref_rejected": -89.22705841064453,
|
|
"logps/rejected": -92.23551940917969,
|
|
"loss": 1.273,
|
|
"margin_dpo/margin_mean": 0.6254061460494995,
|
|
"margin_dpo/margin_std": 1.573524832725525,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3749055177626606,
|
|
"fcm_dpo/beta": 0.8019500374794006,
|
|
"fcm_dpo/delta": -0.030305165797472,
|
|
"fcm_dpo/margin": 1.0933787822723389,
|
|
"fcm_dpo/q_t": 0.3430135250091553,
|
|
"grad_norm": 200.2981719970703,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 0.1514793336391449,
|
|
"logits/rejected": 0.17173510789871216,
|
|
"logps/chosen": -67.46146392822266,
|
|
"logps/ref_chosen": -65.37796020507812,
|
|
"logps/ref_rejected": -61.365787506103516,
|
|
"logps/rejected": -64.54267120361328,
|
|
"loss": 0.9833,
|
|
"margin_dpo/margin_mean": 1.0933786630630493,
|
|
"margin_dpo/margin_std": 1.5513389110565186,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3764172335600907,
|
|
"fcm_dpo/beta": 0.8616625070571899,
|
|
"fcm_dpo/delta": 0.4642504155635834,
|
|
"fcm_dpo/margin": 0.4600977599620819,
|
|
"fcm_dpo/q_t": 0.41762542724609375,
|
|
"grad_norm": 264.34185791015625,
|
|
"learning_rate": 3.9393515632731094e-07,
|
|
"logits/chosen": 0.12900127470493317,
|
|
"logits/rejected": 0.15251630544662476,
|
|
"logps/chosen": -77.03469848632812,
|
|
"logps/ref_chosen": -74.60145568847656,
|
|
"logps/ref_rejected": -63.79338455200195,
|
|
"logps/rejected": -66.68672180175781,
|
|
"loss": 1.4297,
|
|
"margin_dpo/margin_mean": 0.4600982069969177,
|
|
"margin_dpo/margin_std": 1.5953214168548584,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.8493759632110596,
|
|
"fcm_dpo/delta": -0.23597529530525208,
|
|
"fcm_dpo/margin": 1.2496278285980225,
|
|
"fcm_dpo/q_t": 0.3237505853176117,
|
|
"grad_norm": 190.65785217285156,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 0.24538511037826538,
|
|
"logits/rejected": 0.21211454272270203,
|
|
"logps/chosen": -64.02789306640625,
|
|
"logps/ref_chosen": -61.938209533691406,
|
|
"logps/ref_rejected": -72.21602630615234,
|
|
"logps/rejected": -75.55533599853516,
|
|
"loss": 0.9779,
|
|
"margin_dpo/margin_mean": 1.2496273517608643,
|
|
"margin_dpo/margin_std": 1.8356934785842896,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3794406651549509,
|
|
"fcm_dpo/beta": 0.8356133103370667,
|
|
"fcm_dpo/delta": 0.06252037733793259,
|
|
"fcm_dpo/margin": 0.9481085538864136,
|
|
"fcm_dpo/q_t": 0.37249547243118286,
|
|
"grad_norm": 214.85369873046875,
|
|
"learning_rate": 3.9176496596569265e-07,
|
|
"logits/chosen": 0.22139692306518555,
|
|
"logits/rejected": 0.19079577922821045,
|
|
"logps/chosen": -69.10639953613281,
|
|
"logps/ref_chosen": -66.85694885253906,
|
|
"logps/ref_rejected": -84.83396911621094,
|
|
"logps/rejected": -88.03153991699219,
|
|
"loss": 1.0963,
|
|
"margin_dpo/margin_mean": 0.9481081962585449,
|
|
"margin_dpo/margin_std": 1.7044211626052856,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"fcm_dpo/beta": 0.861457109451294,
|
|
"fcm_dpo/delta": -0.012143999338150024,
|
|
"fcm_dpo/margin": 0.9872934222221375,
|
|
"fcm_dpo/q_t": 0.337787389755249,
|
|
"grad_norm": 205.43638610839844,
|
|
"learning_rate": 3.9067390737445254e-07,
|
|
"logits/chosen": 0.1312238872051239,
|
|
"logits/rejected": 0.08839388191699982,
|
|
"logps/chosen": -58.39625549316406,
|
|
"logps/ref_chosen": -56.22393035888672,
|
|
"logps/ref_rejected": -77.1136245727539,
|
|
"logps/rejected": -80.27323913574219,
|
|
"loss": 1.1382,
|
|
"margin_dpo/margin_mean": 0.98729407787323,
|
|
"margin_dpo/margin_std": 1.6986348628997803,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.382464096749811,
|
|
"fcm_dpo/beta": 0.8236818909645081,
|
|
"fcm_dpo/delta": -0.03074963390827179,
|
|
"fcm_dpo/margin": 1.0598118305206299,
|
|
"fcm_dpo/q_t": 0.34884679317474365,
|
|
"grad_norm": 161.52169799804688,
|
|
"learning_rate": 3.8957891383162304e-07,
|
|
"logits/chosen": 0.19195407629013062,
|
|
"logits/rejected": 0.1622268557548523,
|
|
"logps/chosen": -54.37004089355469,
|
|
"logps/ref_chosen": -52.21001434326172,
|
|
"logps/ref_rejected": -58.75764846801758,
|
|
"logps/rejected": -61.97748565673828,
|
|
"loss": 0.9898,
|
|
"margin_dpo/margin_mean": 1.0598115921020508,
|
|
"margin_dpo/margin_std": 1.498337745666504,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.3839758125472411,
|
|
"fcm_dpo/beta": 0.815079927444458,
|
|
"fcm_dpo/delta": -0.1290552169084549,
|
|
"fcm_dpo/margin": 1.1844369173049927,
|
|
"fcm_dpo/q_t": 0.3433462381362915,
|
|
"grad_norm": 219.98338317871094,
|
|
"learning_rate": 3.884800159665276e-07,
|
|
"logits/chosen": 0.1558561623096466,
|
|
"logits/rejected": 0.11703409254550934,
|
|
"logps/chosen": -67.93498229980469,
|
|
"logps/ref_chosen": -65.63632202148438,
|
|
"logps/ref_rejected": -82.34425354003906,
|
|
"logps/rejected": -85.82734680175781,
|
|
"loss": 1.0388,
|
|
"margin_dpo/margin_mean": 1.184436559677124,
|
|
"margin_dpo/margin_std": 1.8613649606704712,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.809474527835846,
|
|
"fcm_dpo/delta": 0.004410445690155029,
|
|
"fcm_dpo/margin": 1.0441983938217163,
|
|
"fcm_dpo/q_t": 0.3516331613063812,
|
|
"grad_norm": 200.7998046875,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 0.1561092585325241,
|
|
"logits/rejected": 0.12892277538776398,
|
|
"logps/chosen": -69.9217758178711,
|
|
"logps/ref_chosen": -67.91108703613281,
|
|
"logps/ref_rejected": -83.89114379882812,
|
|
"logps/rejected": -86.9460220336914,
|
|
"loss": 1.0398,
|
|
"margin_dpo/margin_mean": 1.0441988706588745,
|
|
"margin_dpo/margin_std": 1.6480119228363037,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3869992441421013,
|
|
"fcm_dpo/beta": 0.804902195930481,
|
|
"fcm_dpo/delta": -0.09969654679298401,
|
|
"fcm_dpo/margin": 1.1683741807937622,
|
|
"fcm_dpo/q_t": 0.33634892106056213,
|
|
"grad_norm": 218.11962890625,
|
|
"learning_rate": 3.862706303320329e-07,
|
|
"logits/chosen": 0.12878583371639252,
|
|
"logits/rejected": 0.08810890465974808,
|
|
"logps/chosen": -65.82264709472656,
|
|
"logps/ref_chosen": -63.49998474121094,
|
|
"logps/ref_rejected": -90.77104187011719,
|
|
"logps/rejected": -94.2620849609375,
|
|
"loss": 1.0063,
|
|
"margin_dpo/margin_mean": 1.1683729887008667,
|
|
"margin_dpo/margin_std": 1.776692271232605,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.3885109599395314,
|
|
"fcm_dpo/beta": 0.7456899285316467,
|
|
"fcm_dpo/delta": -0.395789235830307,
|
|
"fcm_dpo/margin": 1.5996819734573364,
|
|
"fcm_dpo/q_t": 0.3006623685359955,
|
|
"grad_norm": 192.77134704589844,
|
|
"learning_rate": 3.851602043638994e-07,
|
|
"logits/chosen": 0.16418027877807617,
|
|
"logits/rejected": 0.11701178550720215,
|
|
"logps/chosen": -72.88190460205078,
|
|
"logps/ref_chosen": -70.60064697265625,
|
|
"logps/ref_rejected": -108.58313751220703,
|
|
"logps/rejected": -112.46407318115234,
|
|
"loss": 0.9113,
|
|
"margin_dpo/margin_mean": 1.5996819734573364,
|
|
"margin_dpo/margin_std": 2.1490330696105957,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3900226757369615,
|
|
"fcm_dpo/beta": 0.7447835206985474,
|
|
"fcm_dpo/delta": -0.006751693785190582,
|
|
"fcm_dpo/margin": 1.1468995809555054,
|
|
"fcm_dpo/q_t": 0.3328554034233093,
|
|
"grad_norm": 184.07823181152344,
|
|
"learning_rate": 3.840459976743023e-07,
|
|
"logits/chosen": 0.20480632781982422,
|
|
"logits/rejected": 0.16838130354881287,
|
|
"logps/chosen": -61.762428283691406,
|
|
"logps/ref_chosen": -59.25416564941406,
|
|
"logps/ref_rejected": -85.58709716796875,
|
|
"logps/rejected": -89.24226379394531,
|
|
"loss": 0.9089,
|
|
"margin_dpo/margin_mean": 1.1468994617462158,
|
|
"margin_dpo/margin_std": 1.3625071048736572,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3915343915343915,
|
|
"fcm_dpo/beta": 0.6809320449829102,
|
|
"fcm_dpo/delta": -0.46764737367630005,
|
|
"fcm_dpo/margin": 1.8398231267929077,
|
|
"fcm_dpo/q_t": 0.2804723381996155,
|
|
"grad_norm": 148.42945861816406,
|
|
"learning_rate": 3.8292804142999796e-07,
|
|
"logits/chosen": 0.12444441020488739,
|
|
"logits/rejected": 0.049020834267139435,
|
|
"logps/chosen": -67.50753021240234,
|
|
"logps/ref_chosen": -65.43487548828125,
|
|
"logps/ref_rejected": -95.41731262207031,
|
|
"logps/rejected": -99.32978820800781,
|
|
"loss": 0.7919,
|
|
"margin_dpo/margin_mean": 1.8398233652114868,
|
|
"margin_dpo/margin_std": 1.9219727516174316,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.6663204431533813,
|
|
"fcm_dpo/delta": -0.012575246393680573,
|
|
"fcm_dpo/margin": 1.2920453548431396,
|
|
"fcm_dpo/q_t": 0.3447534441947937,
|
|
"grad_norm": 153.9876251220703,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 0.16772714257240295,
|
|
"logits/rejected": 0.10460503399372101,
|
|
"logps/chosen": -51.540916442871094,
|
|
"logps/ref_chosen": -49.08958435058594,
|
|
"logps/ref_rejected": -79.01708221435547,
|
|
"logps/rejected": -82.76045989990234,
|
|
"loss": 0.9871,
|
|
"margin_dpo/margin_mean": 1.292044997215271,
|
|
"margin_dpo/margin_std": 1.8242688179016113,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3945578231292517,
|
|
"fcm_dpo/beta": 0.6728366613388062,
|
|
"fcm_dpo/delta": 0.06468392163515091,
|
|
"fcm_dpo/margin": 1.1746070384979248,
|
|
"fcm_dpo/q_t": 0.3549262285232544,
|
|
"grad_norm": 185.98231506347656,
|
|
"learning_rate": 3.806810054678331e-07,
|
|
"logits/chosen": 0.08221499621868134,
|
|
"logits/rejected": 0.09243050217628479,
|
|
"logps/chosen": -73.12066650390625,
|
|
"logps/ref_chosen": -70.87239074707031,
|
|
"logps/ref_rejected": -65.01522064208984,
|
|
"logps/rejected": -68.43810272216797,
|
|
"loss": 1.0405,
|
|
"margin_dpo/margin_mean": 1.174607753753662,
|
|
"margin_dpo/margin_std": 1.8360176086425781,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3960695389266818,
|
|
"fcm_dpo/beta": 0.6801202297210693,
|
|
"fcm_dpo/delta": -0.019086986780166626,
|
|
"fcm_dpo/margin": 1.2746036052703857,
|
|
"fcm_dpo/q_t": 0.34386593103408813,
|
|
"grad_norm": 170.8404083251953,
|
|
"learning_rate": 3.7955198860439887e-07,
|
|
"logits/chosen": 0.20572137832641602,
|
|
"logits/rejected": 0.16200119256973267,
|
|
"logps/chosen": -70.40789794921875,
|
|
"logps/ref_chosen": -67.8706283569336,
|
|
"logps/ref_rejected": -88.7205810546875,
|
|
"logps/rejected": -92.53245544433594,
|
|
"loss": 0.9448,
|
|
"margin_dpo/margin_mean": 1.274604320526123,
|
|
"margin_dpo/margin_std": 1.7153469324111938,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.3975812547241119,
|
|
"fcm_dpo/beta": 0.6981139779090881,
|
|
"fcm_dpo/delta": 0.134951651096344,
|
|
"fcm_dpo/margin": 1.0312126874923706,
|
|
"fcm_dpo/q_t": 0.36829593777656555,
|
|
"grad_norm": 167.5323486328125,
|
|
"learning_rate": 3.784193478933516e-07,
|
|
"logits/chosen": 0.1324343979358673,
|
|
"logits/rejected": 0.058942101895809174,
|
|
"logps/chosen": -57.610633850097656,
|
|
"logps/ref_chosen": -55.194583892822266,
|
|
"logps/ref_rejected": -80.54048156738281,
|
|
"logps/rejected": -83.98774719238281,
|
|
"loss": 1.0934,
|
|
"margin_dpo/margin_mean": 1.031212329864502,
|
|
"margin_dpo/margin_std": 1.735314130783081,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.39909297052154197,
|
|
"fcm_dpo/beta": 0.6857741475105286,
|
|
"fcm_dpo/delta": -0.11159483343362808,
|
|
"fcm_dpo/margin": 1.386823058128357,
|
|
"fcm_dpo/q_t": 0.34189584851264954,
|
|
"grad_norm": 195.39012145996094,
|
|
"learning_rate": 3.7728311501708674e-07,
|
|
"logits/chosen": 0.10226079821586609,
|
|
"logits/rejected": 0.06431174278259277,
|
|
"logps/chosen": -85.52044677734375,
|
|
"logps/ref_chosen": -83.17068481445312,
|
|
"logps/ref_rejected": -88.33625793457031,
|
|
"logps/rejected": -92.07284545898438,
|
|
"loss": 1.0629,
|
|
"margin_dpo/margin_mean": 1.3868227005004883,
|
|
"margin_dpo/margin_std": 2.2063112258911133,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.6698117256164551,
|
|
"fcm_dpo/delta": -0.11223047971725464,
|
|
"fcm_dpo/margin": 1.4208091497421265,
|
|
"fcm_dpo/q_t": 0.3248485028743744,
|
|
"grad_norm": 194.76893615722656,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 0.22791503369808197,
|
|
"logits/rejected": 0.17402033507823944,
|
|
"logps/chosen": -54.32725524902344,
|
|
"logps/ref_chosen": -51.66284942626953,
|
|
"logps/ref_rejected": -67.1720962524414,
|
|
"logps/rejected": -71.25730895996094,
|
|
"loss": 0.9819,
|
|
"margin_dpo/margin_mean": 1.4208089113235474,
|
|
"margin_dpo/margin_std": 1.9814412593841553,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.4021164021164021,
|
|
"fcm_dpo/beta": 0.6639435291290283,
|
|
"fcm_dpo/delta": -0.014321202412247658,
|
|
"fcm_dpo/margin": 1.299785852432251,
|
|
"fcm_dpo/q_t": 0.3548884093761444,
|
|
"grad_norm": 184.37330627441406,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 0.15636898577213287,
|
|
"logits/rejected": 0.1033758819103241,
|
|
"logps/chosen": -59.907691955566406,
|
|
"logps/ref_chosen": -57.45049285888672,
|
|
"logps/ref_rejected": -77.60826110839844,
|
|
"logps/rejected": -81.36524200439453,
|
|
"loss": 1.0529,
|
|
"margin_dpo/margin_mean": 1.299785852432251,
|
|
"margin_dpo/margin_std": 2.1626858711242676,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.4036281179138322,
|
|
"fcm_dpo/beta": 0.6879450082778931,
|
|
"fcm_dpo/delta": 0.1648833006620407,
|
|
"fcm_dpo/margin": 1.0033197402954102,
|
|
"fcm_dpo/q_t": 0.3694334626197815,
|
|
"grad_norm": 146.89279174804688,
|
|
"learning_rate": 3.738531817228131e-07,
|
|
"logits/chosen": 0.1628885567188263,
|
|
"logits/rejected": 0.15053339302539825,
|
|
"logps/chosen": -57.460472106933594,
|
|
"logps/ref_chosen": -55.03535079956055,
|
|
"logps/ref_rejected": -66.0953369140625,
|
|
"logps/rejected": -69.5237808227539,
|
|
"loss": 1.1668,
|
|
"margin_dpo/margin_mean": 1.0033204555511475,
|
|
"margin_dpo/margin_std": 1.9306485652923584,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.4051398337112623,
|
|
"fcm_dpo/beta": 0.6884621381759644,
|
|
"fcm_dpo/delta": 0.005110621452331543,
|
|
"fcm_dpo/margin": 1.2268198728561401,
|
|
"fcm_dpo/q_t": 0.35186707973480225,
|
|
"grad_norm": 164.82493591308594,
|
|
"learning_rate": 3.7270289900589204e-07,
|
|
"logits/chosen": 0.08974149078130722,
|
|
"logits/rejected": 0.07196816802024841,
|
|
"logps/chosen": -67.441162109375,
|
|
"logps/ref_chosen": -65.07174682617188,
|
|
"logps/ref_rejected": -71.42485809326172,
|
|
"logps/rejected": -75.0210952758789,
|
|
"loss": 0.9971,
|
|
"margin_dpo/margin_mean": 1.2268199920654297,
|
|
"margin_dpo/margin_std": 1.8398901224136353,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.40665154950869237,
|
|
"fcm_dpo/beta": 0.6847081184387207,
|
|
"fcm_dpo/delta": -0.12639540433883667,
|
|
"fcm_dpo/margin": 1.4018278121948242,
|
|
"fcm_dpo/q_t": 0.3242768943309784,
|
|
"grad_norm": 167.1279754638672,
|
|
"learning_rate": 3.7154918402511714e-07,
|
|
"logits/chosen": 0.233078271150589,
|
|
"logits/rejected": 0.1958373486995697,
|
|
"logps/chosen": -69.78608703613281,
|
|
"logps/ref_chosen": -67.1362075805664,
|
|
"logps/ref_rejected": -82.55778503417969,
|
|
"logps/rejected": -86.6094970703125,
|
|
"loss": 0.9398,
|
|
"margin_dpo/margin_mean": 1.4018274545669556,
|
|
"margin_dpo/margin_std": 1.7281426191329956,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.6847708821296692,
|
|
"fcm_dpo/delta": 0.15696696937084198,
|
|
"fcm_dpo/margin": 1.0275520086288452,
|
|
"fcm_dpo/q_t": 0.36070716381073,
|
|
"grad_norm": 188.62623596191406,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 0.20225092768669128,
|
|
"logits/rejected": 0.15019787847995758,
|
|
"logps/chosen": -69.22811889648438,
|
|
"logps/ref_chosen": -66.6886978149414,
|
|
"logps/ref_rejected": -85.16129302978516,
|
|
"logps/rejected": -88.728271484375,
|
|
"loss": 1.0767,
|
|
"margin_dpo/margin_mean": 1.0275520086288452,
|
|
"margin_dpo/margin_std": 1.7189823389053345,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.40967498110355255,
|
|
"fcm_dpo/beta": 0.7114520072937012,
|
|
"fcm_dpo/delta": 0.24464687705039978,
|
|
"fcm_dpo/margin": 0.8730853199958801,
|
|
"fcm_dpo/q_t": 0.40264761447906494,
|
|
"grad_norm": 210.4980926513672,
|
|
"learning_rate": 3.692315864546635e-07,
|
|
"logits/chosen": 0.20817291736602783,
|
|
"logits/rejected": 0.16300782561302185,
|
|
"logps/chosen": -75.00304412841797,
|
|
"logps/ref_chosen": -72.40754699707031,
|
|
"logps/ref_rejected": -92.06311798095703,
|
|
"logps/rejected": -95.53170776367188,
|
|
"loss": 1.3161,
|
|
"margin_dpo/margin_mean": 0.8730854392051697,
|
|
"margin_dpo/margin_std": 2.194467544555664,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.41118669690098264,
|
|
"fcm_dpo/beta": 0.6955171823501587,
|
|
"fcm_dpo/delta": -0.3055380582809448,
|
|
"fcm_dpo/margin": 1.6134432554244995,
|
|
"fcm_dpo/q_t": 0.30224862694740295,
|
|
"grad_norm": 148.1903839111328,
|
|
"learning_rate": 3.6806776869317067e-07,
|
|
"logits/chosen": 0.1571994125843048,
|
|
"logits/rejected": 0.15734095871448517,
|
|
"logps/chosen": -68.978759765625,
|
|
"logps/ref_chosen": -66.60140228271484,
|
|
"logps/ref_rejected": -67.74340057373047,
|
|
"logps/rejected": -71.73419952392578,
|
|
"loss": 0.8169,
|
|
"margin_dpo/margin_mean": 1.613443374633789,
|
|
"margin_dpo/margin_std": 1.8132987022399902,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"fcm_dpo/beta": 0.673996090888977,
|
|
"fcm_dpo/delta": -0.1313486248254776,
|
|
"fcm_dpo/margin": 1.43618905544281,
|
|
"fcm_dpo/q_t": 0.3279706537723541,
|
|
"grad_norm": 155.58108520507812,
|
|
"learning_rate": 3.669006483223828e-07,
|
|
"logits/chosen": 0.2045176476240158,
|
|
"logits/rejected": 0.1588110476732254,
|
|
"logps/chosen": -60.12089538574219,
|
|
"logps/ref_chosen": -57.35487747192383,
|
|
"logps/ref_rejected": -84.17168426513672,
|
|
"logps/rejected": -88.37388610839844,
|
|
"loss": 0.9412,
|
|
"margin_dpo/margin_mean": 1.4361895322799683,
|
|
"margin_dpo/margin_std": 1.9495878219604492,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.41421012849584277,
|
|
"fcm_dpo/beta": 0.6581387519836426,
|
|
"fcm_dpo/delta": -0.054179951548576355,
|
|
"fcm_dpo/margin": 1.366485595703125,
|
|
"fcm_dpo/q_t": 0.3364384174346924,
|
|
"grad_norm": 147.8866424560547,
|
|
"learning_rate": 3.657302579891656e-07,
|
|
"logits/chosen": 0.09074236452579498,
|
|
"logits/rejected": 0.07023008912801743,
|
|
"logps/chosen": -62.19242858886719,
|
|
"logps/ref_chosen": -59.64149475097656,
|
|
"logps/ref_rejected": -68.29348754882812,
|
|
"logps/rejected": -72.21089935302734,
|
|
"loss": 1.0003,
|
|
"margin_dpo/margin_mean": 1.3664849996566772,
|
|
"margin_dpo/margin_std": 1.996286392211914,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.6430532336235046,
|
|
"fcm_dpo/delta": -0.14487166702747345,
|
|
"fcm_dpo/margin": 1.5252916812896729,
|
|
"fcm_dpo/q_t": 0.33012163639068604,
|
|
"grad_norm": 146.9214324951172,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 0.16096563637256622,
|
|
"logits/rejected": 0.09621478617191315,
|
|
"logps/chosen": -55.98505783081055,
|
|
"logps/ref_chosen": -53.26664352416992,
|
|
"logps/ref_rejected": -73.84062194824219,
|
|
"logps/rejected": -78.08432006835938,
|
|
"loss": 0.9415,
|
|
"margin_dpo/margin_mean": 1.5252916812896729,
|
|
"margin_dpo/margin_std": 2.0830984115600586,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.41723356009070295,
|
|
"fcm_dpo/beta": 0.6220403909683228,
|
|
"fcm_dpo/delta": -0.1508302390575409,
|
|
"fcm_dpo/margin": 1.5846750736236572,
|
|
"fcm_dpo/q_t": 0.31243956089019775,
|
|
"grad_norm": 129.06207275390625,
|
|
"learning_rate": 3.633797984793294e-07,
|
|
"logits/chosen": 0.11567900329828262,
|
|
"logits/rejected": 0.0872059017419815,
|
|
"logps/chosen": -55.34302520751953,
|
|
"logps/ref_chosen": -53.02079772949219,
|
|
"logps/ref_rejected": -61.56678771972656,
|
|
"logps/rejected": -65.47369384765625,
|
|
"loss": 0.8598,
|
|
"margin_dpo/margin_mean": 1.5846753120422363,
|
|
"margin_dpo/margin_std": 1.8111482858657837,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.41874527588813304,
|
|
"fcm_dpo/beta": 0.6342289447784424,
|
|
"fcm_dpo/delta": 0.08050861954689026,
|
|
"fcm_dpo/margin": 1.2154932022094727,
|
|
"fcm_dpo/q_t": 0.3719508647918701,
|
|
"grad_norm": 160.6136474609375,
|
|
"learning_rate": 3.6219979505011555e-07,
|
|
"logits/chosen": 0.20484420657157898,
|
|
"logits/rejected": 0.21808558702468872,
|
|
"logps/chosen": -74.26129913330078,
|
|
"logps/ref_chosen": -71.43299102783203,
|
|
"logps/ref_rejected": -67.65852355957031,
|
|
"logps/rejected": -71.70232391357422,
|
|
"loss": 1.0903,
|
|
"margin_dpo/margin_mean": 1.2154929637908936,
|
|
"margin_dpo/margin_std": 2.1663973331451416,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.42025699168556313,
|
|
"fcm_dpo/beta": 0.6268476247787476,
|
|
"fcm_dpo/delta": -0.13949307799339294,
|
|
"fcm_dpo/margin": 1.5503504276275635,
|
|
"fcm_dpo/q_t": 0.31966453790664673,
|
|
"grad_norm": 143.23236083984375,
|
|
"learning_rate": 3.6101665315144353e-07,
|
|
"logits/chosen": 0.10400999337434769,
|
|
"logits/rejected": 0.07079809904098511,
|
|
"logps/chosen": -69.66726684570312,
|
|
"logps/ref_chosen": -67.11076354980469,
|
|
"logps/ref_rejected": -88.74851989746094,
|
|
"logps/rejected": -92.8553695678711,
|
|
"loss": 0.9289,
|
|
"margin_dpo/margin_mean": 1.5503500699996948,
|
|
"margin_dpo/margin_std": 1.8908162117004395,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.4217687074829932,
|
|
"fcm_dpo/beta": 0.5942553877830505,
|
|
"fcm_dpo/delta": -0.2251128852367401,
|
|
"fcm_dpo/margin": 1.7702274322509766,
|
|
"fcm_dpo/q_t": 0.2947637438774109,
|
|
"grad_norm": 111.46631622314453,
|
|
"learning_rate": 3.5983040587833563e-07,
|
|
"logits/chosen": 0.1307368278503418,
|
|
"logits/rejected": 0.09648337960243225,
|
|
"logps/chosen": -56.71268081665039,
|
|
"logps/ref_chosen": -54.49748611450195,
|
|
"logps/ref_rejected": -70.42373657226562,
|
|
"logps/rejected": -74.4091567993164,
|
|
"loss": 0.7973,
|
|
"margin_dpo/margin_mean": 1.7702279090881348,
|
|
"margin_dpo/margin_std": 1.7679616212844849,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.5645568370819092,
|
|
"fcm_dpo/delta": -0.19094182550907135,
|
|
"fcm_dpo/margin": 1.8066459894180298,
|
|
"fcm_dpo/q_t": 0.29915091395378113,
|
|
"grad_norm": 107.831298828125,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 0.19378116726875305,
|
|
"logits/rejected": 0.15992864966392517,
|
|
"logps/chosen": -62.757484436035156,
|
|
"logps/ref_chosen": -60.43281173706055,
|
|
"logps/ref_rejected": -78.39051818847656,
|
|
"logps/rejected": -82.52183532714844,
|
|
"loss": 0.784,
|
|
"margin_dpo/margin_mean": 1.806646704673767,
|
|
"margin_dpo/margin_std": 1.7414586544036865,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.42479213907785335,
|
|
"fcm_dpo/beta": 0.5530017614364624,
|
|
"fcm_dpo/delta": -0.0612470768392086,
|
|
"fcm_dpo/margin": 1.6353685855865479,
|
|
"fcm_dpo/q_t": 0.3299909234046936,
|
|
"grad_norm": 115.87896728515625,
|
|
"learning_rate": 3.574487280222929e-07,
|
|
"logits/chosen": 0.15672294795513153,
|
|
"logits/rejected": 0.15733087062835693,
|
|
"logps/chosen": -62.75849914550781,
|
|
"logps/ref_chosen": -60.2820930480957,
|
|
"logps/ref_rejected": -62.04009246826172,
|
|
"logps/rejected": -66.15187072753906,
|
|
"loss": 0.9472,
|
|
"margin_dpo/margin_mean": 1.6353683471679688,
|
|
"margin_dpo/margin_std": 2.125490188598633,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.42630385487528344,
|
|
"fcm_dpo/beta": 0.5638306140899658,
|
|
"fcm_dpo/delta": -0.040468767285346985,
|
|
"fcm_dpo/margin": 1.559096097946167,
|
|
"fcm_dpo/q_t": 0.3394070267677307,
|
|
"grad_norm": 134.23129272460938,
|
|
"learning_rate": 3.562533640600075e-07,
|
|
"logits/chosen": 0.11245854943990707,
|
|
"logits/rejected": 0.0724257230758667,
|
|
"logps/chosen": -63.39463806152344,
|
|
"logps/ref_chosen": -60.623924255371094,
|
|
"logps/ref_rejected": -68.67400360107422,
|
|
"logps/rejected": -73.00382995605469,
|
|
"loss": 0.9555,
|
|
"margin_dpo/margin_mean": 1.5590956211090088,
|
|
"margin_dpo/margin_std": 1.9817804098129272,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.42781557067271353,
|
|
"fcm_dpo/beta": 0.5626036524772644,
|
|
"fcm_dpo/delta": 0.08880946785211563,
|
|
"fcm_dpo/margin": 1.361499309539795,
|
|
"fcm_dpo/q_t": 0.3661307096481323,
|
|
"grad_norm": 173.3498992919922,
|
|
"learning_rate": 3.550550279627215e-07,
|
|
"logits/chosen": 0.12812848389148712,
|
|
"logits/rejected": 0.06249154359102249,
|
|
"logps/chosen": -70.5411376953125,
|
|
"logps/ref_chosen": -67.64775085449219,
|
|
"logps/ref_rejected": -99.96835327148438,
|
|
"logps/rejected": -104.2232437133789,
|
|
"loss": 1.0685,
|
|
"margin_dpo/margin_mean": 1.3614987134933472,
|
|
"margin_dpo/margin_std": 2.2333407402038574,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4293272864701436,
|
|
"fcm_dpo/beta": 0.5531774163246155,
|
|
"fcm_dpo/delta": -0.07725730538368225,
|
|
"fcm_dpo/margin": 1.6630501747131348,
|
|
"fcm_dpo/q_t": 0.3319721221923828,
|
|
"grad_norm": 116.7216567993164,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 0.1991874873638153,
|
|
"logits/rejected": 0.14788982272148132,
|
|
"logps/chosen": -59.54648971557617,
|
|
"logps/ref_chosen": -56.96742630004883,
|
|
"logps/ref_rejected": -86.36236572265625,
|
|
"logps/rejected": -90.60448455810547,
|
|
"loss": 0.9141,
|
|
"margin_dpo/margin_mean": 1.663050651550293,
|
|
"margin_dpo/margin_std": 2.190598487854004,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.565268337726593,
|
|
"fcm_dpo/delta": 0.11996881663799286,
|
|
"fcm_dpo/margin": 1.304656982421875,
|
|
"fcm_dpo/q_t": 0.36226344108581543,
|
|
"grad_norm": 138.88722229003906,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 0.2119036614894867,
|
|
"logits/rejected": 0.19066905975341797,
|
|
"logps/chosen": -74.73896789550781,
|
|
"logps/ref_chosen": -71.65611267089844,
|
|
"logps/ref_rejected": -81.63829803466797,
|
|
"logps/rejected": -86.02581787109375,
|
|
"loss": 1.0085,
|
|
"margin_dpo/margin_mean": 1.3046571016311646,
|
|
"margin_dpo/margin_std": 1.9224095344543457,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4323507180650038,
|
|
"fcm_dpo/beta": 0.5349950194358826,
|
|
"fcm_dpo/delta": -0.34029078483581543,
|
|
"fcm_dpo/margin": 2.147865056991577,
|
|
"fcm_dpo/q_t": 0.28494542837142944,
|
|
"grad_norm": 113.43247985839844,
|
|
"learning_rate": 3.514425224712835e-07,
|
|
"logits/chosen": 0.12417186796665192,
|
|
"logits/rejected": 0.05310012400150299,
|
|
"logps/chosen": -63.82947540283203,
|
|
"logps/ref_chosen": -61.07952117919922,
|
|
"logps/ref_rejected": -91.28128051757812,
|
|
"logps/rejected": -96.17909240722656,
|
|
"loss": 0.822,
|
|
"margin_dpo/margin_mean": 2.14786434173584,
|
|
"margin_dpo/margin_std": 2.381056308746338,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.43386243386243384,
|
|
"fcm_dpo/beta": 0.5180379152297974,
|
|
"fcm_dpo/delta": -0.15914088487625122,
|
|
"fcm_dpo/margin": 1.9180022478103638,
|
|
"fcm_dpo/q_t": 0.30951881408691406,
|
|
"grad_norm": 104.51337432861328,
|
|
"learning_rate": 3.502326338516534e-07,
|
|
"logits/chosen": 0.15763196349143982,
|
|
"logits/rejected": 0.12553146481513977,
|
|
"logps/chosen": -48.694236755371094,
|
|
"logps/ref_chosen": -46.035789489746094,
|
|
"logps/ref_rejected": -59.95293426513672,
|
|
"logps/rejected": -64.52938079833984,
|
|
"loss": 0.8438,
|
|
"margin_dpo/margin_mean": 1.9180022478103638,
|
|
"margin_dpo/margin_std": 2.110712766647339,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.43537414965986393,
|
|
"fcm_dpo/beta": 0.5219178795814514,
|
|
"fcm_dpo/delta": 0.1593482941389084,
|
|
"fcm_dpo/margin": 1.34527587890625,
|
|
"fcm_dpo/q_t": 0.37496429681777954,
|
|
"grad_norm": 154.528076171875,
|
|
"learning_rate": 3.490199415097892e-07,
|
|
"logits/chosen": 0.07364333420991898,
|
|
"logits/rejected": 0.03298754245042801,
|
|
"logps/chosen": -68.56511688232422,
|
|
"logps/ref_chosen": -65.3908462524414,
|
|
"logps/ref_rejected": -88.53607940673828,
|
|
"logps/rejected": -93.05561828613281,
|
|
"loss": 1.0938,
|
|
"margin_dpo/margin_mean": 1.345275640487671,
|
|
"margin_dpo/margin_std": 2.366204261779785,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.436885865457294,
|
|
"fcm_dpo/beta": 0.5325125455856323,
|
|
"fcm_dpo/delta": 0.054238371551036835,
|
|
"fcm_dpo/margin": 1.5024845600128174,
|
|
"fcm_dpo/q_t": 0.36477959156036377,
|
|
"grad_norm": 118.85664367675781,
|
|
"learning_rate": 3.4780447936730247e-07,
|
|
"logits/chosen": 0.22808194160461426,
|
|
"logits/rejected": 0.20086199045181274,
|
|
"logps/chosen": -57.79352569580078,
|
|
"logps/ref_chosen": -54.5936279296875,
|
|
"logps/ref_rejected": -67.20855712890625,
|
|
"logps/rejected": -71.91093444824219,
|
|
"loss": 1.0486,
|
|
"margin_dpo/margin_mean": 1.5024845600128174,
|
|
"margin_dpo/margin_std": 2.4532387256622314,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.5275927782058716,
|
|
"fcm_dpo/delta": -0.057161666452884674,
|
|
"fcm_dpo/margin": 1.7088825702667236,
|
|
"fcm_dpo/q_t": 0.3398800194263458,
|
|
"grad_norm": 145.15016174316406,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 0.2324827015399933,
|
|
"logits/rejected": 0.1766033172607422,
|
|
"logps/chosen": -64.75177001953125,
|
|
"logps/ref_chosen": -61.38457489013672,
|
|
"logps/ref_rejected": -91.92778015136719,
|
|
"logps/rejected": -97.00385284423828,
|
|
"loss": 0.9761,
|
|
"margin_dpo/margin_mean": 1.7088818550109863,
|
|
"margin_dpo/margin_std": 2.4079670906066895,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4399092970521542,
|
|
"fcm_dpo/beta": 0.5278656482696533,
|
|
"fcm_dpo/delta": -0.10052298754453659,
|
|
"fcm_dpo/margin": 1.7776007652282715,
|
|
"fcm_dpo/q_t": 0.32905471324920654,
|
|
"grad_norm": 116.88301086425781,
|
|
"learning_rate": 3.4536538175334343e-07,
|
|
"logits/chosen": 0.3069065809249878,
|
|
"logits/rejected": 0.25807487964630127,
|
|
"logps/chosen": -53.91313171386719,
|
|
"logps/ref_chosen": -50.863037109375,
|
|
"logps/ref_rejected": -82.20868682861328,
|
|
"logps/rejected": -87.03638458251953,
|
|
"loss": 0.9582,
|
|
"margin_dpo/margin_mean": 1.7776010036468506,
|
|
"margin_dpo/margin_std": 2.3519887924194336,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4414210128495843,
|
|
"fcm_dpo/beta": 0.5298629403114319,
|
|
"fcm_dpo/delta": 0.10786361247301102,
|
|
"fcm_dpo/margin": 1.4120714664459229,
|
|
"fcm_dpo/q_t": 0.3546355962753296,
|
|
"grad_norm": 154.5143280029297,
|
|
"learning_rate": 3.4414181450867465e-07,
|
|
"logits/chosen": 0.22030502557754517,
|
|
"logits/rejected": 0.17777778208255768,
|
|
"logps/chosen": -67.32970428466797,
|
|
"logps/ref_chosen": -64.34888458251953,
|
|
"logps/ref_rejected": -72.86434173583984,
|
|
"logps/rejected": -77.25723266601562,
|
|
"loss": 1.0112,
|
|
"margin_dpo/margin_mean": 1.4120711088180542,
|
|
"margin_dpo/margin_std": 2.0962743759155273,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4429327286470144,
|
|
"fcm_dpo/beta": 0.515381932258606,
|
|
"fcm_dpo/delta": -0.20761936902999878,
|
|
"fcm_dpo/margin": 2.0112385749816895,
|
|
"fcm_dpo/q_t": 0.30622002482414246,
|
|
"grad_norm": 96.73804473876953,
|
|
"learning_rate": 3.4291561391508185e-07,
|
|
"logits/chosen": 0.24665296077728271,
|
|
"logits/rejected": 0.18240293860435486,
|
|
"logps/chosen": -58.349090576171875,
|
|
"logps/ref_chosen": -54.869468688964844,
|
|
"logps/ref_rejected": -81.858642578125,
|
|
"logps/rejected": -87.34951782226562,
|
|
"loss": 0.8824,
|
|
"margin_dpo/margin_mean": 2.0112390518188477,
|
|
"margin_dpo/margin_std": 2.4112563133239746,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"fcm_dpo/beta": 0.49291643500328064,
|
|
"fcm_dpo/delta": -0.11736033111810684,
|
|
"fcm_dpo/margin": 1.9326649904251099,
|
|
"fcm_dpo/q_t": 0.3255431652069092,
|
|
"grad_norm": 89.73761749267578,
|
|
"learning_rate": 3.4168681427203153e-07,
|
|
"logits/chosen": 0.1944410502910614,
|
|
"logits/rejected": 0.16065473854541779,
|
|
"logps/chosen": -59.7293701171875,
|
|
"logps/ref_chosen": -56.670902252197266,
|
|
"logps/ref_rejected": -70.32819366455078,
|
|
"logps/rejected": -75.31932067871094,
|
|
"loss": 0.8748,
|
|
"margin_dpo/margin_mean": 1.932664155960083,
|
|
"margin_dpo/margin_std": 2.3227932453155518,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.5035637617111206,
|
|
"fcm_dpo/delta": 0.06118401512503624,
|
|
"fcm_dpo/margin": 1.5732038021087646,
|
|
"fcm_dpo/q_t": 0.35997217893600464,
|
|
"grad_norm": 111.81068420410156,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 0.21110177040100098,
|
|
"logits/rejected": 0.14388032257556915,
|
|
"logps/chosen": -53.704856872558594,
|
|
"logps/ref_chosen": -50.40088653564453,
|
|
"logps/ref_rejected": -83.43521881103516,
|
|
"logps/rejected": -88.31239318847656,
|
|
"loss": 1.0204,
|
|
"margin_dpo/margin_mean": 1.5732042789459229,
|
|
"margin_dpo/margin_std": 2.3596243858337402,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.4474678760393046,
|
|
"fcm_dpo/beta": 0.47933870553970337,
|
|
"fcm_dpo/delta": -0.27881085872650146,
|
|
"fcm_dpo/margin": 2.288297653198242,
|
|
"fcm_dpo/q_t": 0.2990821599960327,
|
|
"grad_norm": 114.7384033203125,
|
|
"learning_rate": 3.392215553979679e-07,
|
|
"logits/chosen": 0.1592310667037964,
|
|
"logits/rejected": 0.12974442541599274,
|
|
"logps/chosen": -72.52301025390625,
|
|
"logps/ref_chosen": -69.15034484863281,
|
|
"logps/ref_rejected": -89.60166931152344,
|
|
"logps/rejected": -95.26262664794922,
|
|
"loss": 0.8132,
|
|
"margin_dpo/margin_mean": 2.288297653198242,
|
|
"margin_dpo/margin_std": 2.4353115558624268,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.4489795918367347,
|
|
"fcm_dpo/beta": 0.4688694477081299,
|
|
"fcm_dpo/delta": -0.11125542968511581,
|
|
"fcm_dpo/margin": 2.027672529220581,
|
|
"fcm_dpo/q_t": 0.3116467297077179,
|
|
"grad_norm": 99.90240478515625,
|
|
"learning_rate": 3.3798516512554485e-07,
|
|
"logits/chosen": 0.19764375686645508,
|
|
"logits/rejected": 0.14518359303474426,
|
|
"logps/chosen": -61.69432830810547,
|
|
"logps/ref_chosen": -58.01630401611328,
|
|
"logps/ref_rejected": -69.95780944824219,
|
|
"logps/rejected": -75.66350555419922,
|
|
"loss": 0.8257,
|
|
"margin_dpo/margin_mean": 2.027672290802002,
|
|
"margin_dpo/margin_std": 2.0139260292053223,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.4504913076341648,
|
|
"fcm_dpo/beta": 0.4668663740158081,
|
|
"fcm_dpo/delta": 0.017975449562072754,
|
|
"fcm_dpo/margin": 1.7846050262451172,
|
|
"fcm_dpo/q_t": 0.34727245569229126,
|
|
"grad_norm": 103.42831420898438,
|
|
"learning_rate": 3.367463137189156e-07,
|
|
"logits/chosen": 0.2629234790802002,
|
|
"logits/rejected": 0.21024516224861145,
|
|
"logps/chosen": -59.81189727783203,
|
|
"logps/ref_chosen": -56.1693115234375,
|
|
"logps/ref_rejected": -68.55052185058594,
|
|
"logps/rejected": -73.97770690917969,
|
|
"loss": 1.0074,
|
|
"margin_dpo/margin_mean": 1.7846052646636963,
|
|
"margin_dpo/margin_std": 2.6018660068511963,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4520030234315949,
|
|
"fcm_dpo/beta": 0.47573158144950867,
|
|
"fcm_dpo/delta": 0.13766300678253174,
|
|
"fcm_dpo/margin": 1.5178146362304688,
|
|
"fcm_dpo/q_t": 0.3709160387516022,
|
|
"grad_norm": 113.95995330810547,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": 0.14425645768642426,
|
|
"logits/rejected": 0.12183210253715515,
|
|
"logps/chosen": -65.68190002441406,
|
|
"logps/ref_chosen": -62.31780242919922,
|
|
"logps/ref_rejected": -72.60028839111328,
|
|
"logps/rejected": -77.48220825195312,
|
|
"loss": 1.0751,
|
|
"margin_dpo/margin_mean": 1.517815351486206,
|
|
"margin_dpo/margin_std": 2.508615016937256,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.47874048352241516,
|
|
"fcm_dpo/delta": -0.004431587643921375,
|
|
"fcm_dpo/margin": 1.7839136123657227,
|
|
"fcm_dpo/q_t": 0.3367232084274292,
|
|
"grad_norm": 123.27234649658203,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 0.22687631845474243,
|
|
"logits/rejected": 0.1784324198961258,
|
|
"logps/chosen": -64.30814361572266,
|
|
"logps/ref_chosen": -60.38157653808594,
|
|
"logps/ref_rejected": -75.45442199707031,
|
|
"logps/rejected": -81.16490173339844,
|
|
"loss": 1.0008,
|
|
"margin_dpo/margin_mean": 1.7839126586914062,
|
|
"margin_dpo/margin_std": 2.55767822265625,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.455026455026455,
|
|
"fcm_dpo/beta": 0.4758816361427307,
|
|
"fcm_dpo/delta": 0.04150884598493576,
|
|
"fcm_dpo/margin": 1.7021515369415283,
|
|
"fcm_dpo/q_t": 0.36344343423843384,
|
|
"grad_norm": 114.94867706298828,
|
|
"learning_rate": 3.3301533956555885e-07,
|
|
"logits/chosen": 0.26481878757476807,
|
|
"logits/rejected": 0.23754526674747467,
|
|
"logps/chosen": -56.669219970703125,
|
|
"logps/ref_chosen": -52.85089111328125,
|
|
"logps/ref_rejected": -69.97584533691406,
|
|
"logps/rejected": -75.49632263183594,
|
|
"loss": 1.0754,
|
|
"margin_dpo/margin_mean": 1.7021512985229492,
|
|
"margin_dpo/margin_std": 2.8137896060943604,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4565381708238851,
|
|
"fcm_dpo/beta": 0.5032765865325928,
|
|
"fcm_dpo/delta": 0.2927466034889221,
|
|
"fcm_dpo/margin": 1.140923023223877,
|
|
"fcm_dpo/q_t": 0.3921450972557068,
|
|
"grad_norm": 140.91168212890625,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 0.1072927862405777,
|
|
"logits/rejected": 0.06553728133440018,
|
|
"logps/chosen": -70.84619140625,
|
|
"logps/ref_chosen": -66.96650695800781,
|
|
"logps/ref_rejected": -88.09510803222656,
|
|
"logps/rejected": -93.11572265625,
|
|
"loss": 1.1807,
|
|
"margin_dpo/margin_mean": 1.140923023223877,
|
|
"margin_dpo/margin_std": 2.358726978302002,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.4580498866213152,
|
|
"fcm_dpo/beta": 0.48211240768432617,
|
|
"fcm_dpo/delta": -0.4027268886566162,
|
|
"fcm_dpo/margin": 2.498141288757324,
|
|
"fcm_dpo/q_t": 0.2989780902862549,
|
|
"grad_norm": 100.8934097290039,
|
|
"learning_rate": 3.3051635489464793e-07,
|
|
"logits/chosen": 0.2042883336544037,
|
|
"logits/rejected": 0.1521485447883606,
|
|
"logps/chosen": -65.78520202636719,
|
|
"logps/ref_chosen": -62.12152862548828,
|
|
"logps/ref_rejected": -90.31204223632812,
|
|
"logps/rejected": -96.4738540649414,
|
|
"loss": 0.8664,
|
|
"margin_dpo/margin_mean": 2.498141050338745,
|
|
"margin_dpo/margin_std": 3.0501856803894043,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.4595616024187453,
|
|
"fcm_dpo/beta": 0.45722144842147827,
|
|
"fcm_dpo/delta": -0.24640443921089172,
|
|
"fcm_dpo/margin": 2.341660976409912,
|
|
"fcm_dpo/q_t": 0.29080310463905334,
|
|
"grad_norm": 92.65155029296875,
|
|
"learning_rate": 3.292634667444117e-07,
|
|
"logits/chosen": 0.21486225724220276,
|
|
"logits/rejected": 0.17082390189170837,
|
|
"logps/chosen": -64.18489837646484,
|
|
"logps/ref_chosen": -60.695091247558594,
|
|
"logps/ref_rejected": -78.2525405883789,
|
|
"logps/rejected": -84.08401489257812,
|
|
"loss": 0.7711,
|
|
"margin_dpo/margin_mean": 2.3416614532470703,
|
|
"margin_dpo/margin_std": 2.206408739089966,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.4463159441947937,
|
|
"fcm_dpo/delta": 0.014554038643836975,
|
|
"fcm_dpo/margin": 1.8721232414245605,
|
|
"fcm_dpo/q_t": 0.3510298430919647,
|
|
"grad_norm": 109.76885223388672,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 0.1453256607055664,
|
|
"logits/rejected": 0.152765691280365,
|
|
"logps/chosen": -77.118408203125,
|
|
"logps/ref_chosen": -72.69914245605469,
|
|
"logps/ref_rejected": -65.65670776367188,
|
|
"logps/rejected": -71.9480972290039,
|
|
"loss": 1.0397,
|
|
"margin_dpo/margin_mean": 1.872122883796692,
|
|
"margin_dpo/margin_std": 2.9213171005249023,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46258503401360546,
|
|
"fcm_dpo/beta": 0.44983065128326416,
|
|
"fcm_dpo/delta": 0.10561927407979965,
|
|
"fcm_dpo/margin": 1.6640582084655762,
|
|
"fcm_dpo/q_t": 0.3665274977684021,
|
|
"grad_norm": 105.87984466552734,
|
|
"learning_rate": 3.267510740432719e-07,
|
|
"logits/chosen": 0.22533267736434937,
|
|
"logits/rejected": 0.13813963532447815,
|
|
"logps/chosen": -58.389102935791016,
|
|
"logps/ref_chosen": -53.97052764892578,
|
|
"logps/ref_rejected": -71.02423095703125,
|
|
"logps/rejected": -77.10685729980469,
|
|
"loss": 1.0711,
|
|
"margin_dpo/margin_mean": 1.6640576124191284,
|
|
"margin_dpo/margin_std": 2.727498769760132,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.46409674981103555,
|
|
"fcm_dpo/beta": 0.47032594680786133,
|
|
"fcm_dpo/delta": 0.10532738268375397,
|
|
"fcm_dpo/margin": 1.5966615676879883,
|
|
"fcm_dpo/q_t": 0.37514978647232056,
|
|
"grad_norm": 117.74783325195312,
|
|
"learning_rate": 3.2549163976939285e-07,
|
|
"logits/chosen": 0.22091859579086304,
|
|
"logits/rejected": 0.18501907587051392,
|
|
"logps/chosen": -61.177223205566406,
|
|
"logps/ref_chosen": -57.413108825683594,
|
|
"logps/ref_rejected": -68.68010711669922,
|
|
"logps/rejected": -74.04087829589844,
|
|
"loss": 1.1527,
|
|
"margin_dpo/margin_mean": 1.59666109085083,
|
|
"margin_dpo/margin_std": 2.9838297367095947,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4656084656084656,
|
|
"fcm_dpo/beta": 0.45703125,
|
|
"fcm_dpo/delta": -0.17830657958984375,
|
|
"fcm_dpo/margin": 2.210214376449585,
|
|
"fcm_dpo/q_t": 0.31531810760498047,
|
|
"grad_norm": 98.66069793701172,
|
|
"learning_rate": 3.2423009383206874e-07,
|
|
"logits/chosen": 0.20415475964546204,
|
|
"logits/rejected": 0.1843133568763733,
|
|
"logps/chosen": -70.50775909423828,
|
|
"logps/ref_chosen": -66.59879302978516,
|
|
"logps/ref_rejected": -74.337158203125,
|
|
"logps/rejected": -80.45633697509766,
|
|
"loss": 0.8886,
|
|
"margin_dpo/margin_mean": 2.210214614868164,
|
|
"margin_dpo/margin_std": 2.7284340858459473,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4671201814058957,
|
|
"fcm_dpo/beta": 0.4430055022239685,
|
|
"fcm_dpo/delta": -0.08050990849733353,
|
|
"fcm_dpo/margin": 2.0769472122192383,
|
|
"fcm_dpo/q_t": 0.33381199836730957,
|
|
"grad_norm": 133.50289916992188,
|
|
"learning_rate": 3.229664715194511e-07,
|
|
"logits/chosen": 0.23776155710220337,
|
|
"logits/rejected": 0.19149421155452728,
|
|
"logps/chosen": -69.8235092163086,
|
|
"logps/ref_chosen": -65.39474487304688,
|
|
"logps/ref_rejected": -75.70930480957031,
|
|
"logps/rejected": -82.21501159667969,
|
|
"loss": 0.9315,
|
|
"margin_dpo/margin_mean": 2.0769472122192383,
|
|
"margin_dpo/margin_std": 2.7262511253356934,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.46543052792549133,
|
|
"fcm_dpo/delta": 0.28398555517196655,
|
|
"fcm_dpo/margin": 1.2512989044189453,
|
|
"fcm_dpo/q_t": 0.4003729820251465,
|
|
"grad_norm": 143.03170776367188,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 0.2502453923225403,
|
|
"logits/rejected": 0.24358004331588745,
|
|
"logps/chosen": -79.23374938964844,
|
|
"logps/ref_chosen": -74.66827392578125,
|
|
"logps/ref_rejected": -80.5689697265625,
|
|
"logps/rejected": -86.3857421875,
|
|
"loss": 1.1998,
|
|
"margin_dpo/margin_mean": 1.2512991428375244,
|
|
"margin_dpo/margin_std": 2.6639318466186523,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47014361300075586,
|
|
"fcm_dpo/beta": 0.4706069827079773,
|
|
"fcm_dpo/delta": 0.0076281167566776276,
|
|
"fcm_dpo/margin": 1.7907335758209229,
|
|
"fcm_dpo/q_t": 0.35300174355506897,
|
|
"grad_norm": 112.3255844116211,
|
|
"learning_rate": 3.204331392103574e-07,
|
|
"logits/chosen": 0.15300993621349335,
|
|
"logits/rejected": 0.06236676499247551,
|
|
"logps/chosen": -63.637657165527344,
|
|
"logps/ref_chosen": -59.738033294677734,
|
|
"logps/ref_rejected": -93.60757446289062,
|
|
"logps/rejected": -99.29792785644531,
|
|
"loss": 1.0718,
|
|
"margin_dpo/margin_mean": 1.7907336950302124,
|
|
"margin_dpo/margin_std": 3.0270423889160156,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.47165532879818595,
|
|
"fcm_dpo/beta": 0.46410685777664185,
|
|
"fcm_dpo/delta": -0.15572930872440338,
|
|
"fcm_dpo/margin": 2.134312868118286,
|
|
"fcm_dpo/q_t": 0.31550097465515137,
|
|
"grad_norm": 114.26778411865234,
|
|
"learning_rate": 3.1916350007663176e-07,
|
|
"logits/chosen": 0.2003878653049469,
|
|
"logits/rejected": 0.12340877950191498,
|
|
"logps/chosen": -57.946800231933594,
|
|
"logps/ref_chosen": -53.816436767578125,
|
|
"logps/ref_rejected": -68.6575698852539,
|
|
"logps/rejected": -74.92224884033203,
|
|
"loss": 0.935,
|
|
"margin_dpo/margin_mean": 2.134312629699707,
|
|
"margin_dpo/margin_std": 2.824443817138672,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.47316704459561604,
|
|
"fcm_dpo/beta": 0.4569798707962036,
|
|
"fcm_dpo/delta": 0.017772406339645386,
|
|
"fcm_dpo/margin": 1.823397159576416,
|
|
"fcm_dpo/q_t": 0.36278456449508667,
|
|
"grad_norm": 106.69249725341797,
|
|
"learning_rate": 3.178919262911314e-07,
|
|
"logits/chosen": 0.2684420347213745,
|
|
"logits/rejected": 0.24755266308784485,
|
|
"logps/chosen": -63.84406280517578,
|
|
"logps/ref_chosen": -59.957359313964844,
|
|
"logps/ref_rejected": -69.31729888916016,
|
|
"logps/rejected": -75.02740478515625,
|
|
"loss": 1.0815,
|
|
"margin_dpo/margin_mean": 1.8233965635299683,
|
|
"margin_dpo/margin_std": 3.1479573249816895,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.47467876039304613,
|
|
"fcm_dpo/beta": 0.4399215579032898,
|
|
"fcm_dpo/delta": -0.27059251070022583,
|
|
"fcm_dpo/margin": 2.476907253265381,
|
|
"fcm_dpo/q_t": 0.30071818828582764,
|
|
"grad_norm": 92.56130981445312,
|
|
"learning_rate": 3.166184534225087e-07,
|
|
"logits/chosen": 0.20626097917556763,
|
|
"logits/rejected": 0.21696346998214722,
|
|
"logps/chosen": -74.36647033691406,
|
|
"logps/ref_chosen": -70.26815795898438,
|
|
"logps/ref_rejected": -69.23971557617188,
|
|
"logps/rejected": -75.81494140625,
|
|
"loss": 0.81,
|
|
"margin_dpo/margin_mean": 2.476907253265381,
|
|
"margin_dpo/margin_std": 2.6668601036071777,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.43925973773002625,
|
|
"fcm_dpo/delta": -0.023149289190769196,
|
|
"fcm_dpo/margin": 1.9778019189834595,
|
|
"fcm_dpo/q_t": 0.33359387516975403,
|
|
"grad_norm": 96.88661193847656,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 0.14388734102249146,
|
|
"logits/rejected": 0.10625620931386948,
|
|
"logps/chosen": -72.1413803100586,
|
|
"logps/ref_chosen": -67.79469299316406,
|
|
"logps/ref_rejected": -74.55148315429688,
|
|
"logps/rejected": -80.87596893310547,
|
|
"loss": 0.9586,
|
|
"margin_dpo/margin_mean": 1.9778021574020386,
|
|
"margin_dpo/margin_std": 2.5699832439422607,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.47770219198790626,
|
|
"fcm_dpo/beta": 0.4275297522544861,
|
|
"fcm_dpo/delta": -0.23587097227573395,
|
|
"fcm_dpo/margin": 2.4735541343688965,
|
|
"fcm_dpo/q_t": 0.3129580616950989,
|
|
"grad_norm": 92.06230163574219,
|
|
"learning_rate": 3.1406595297511564e-07,
|
|
"logits/chosen": 0.14784720540046692,
|
|
"logits/rejected": 0.0601130872964859,
|
|
"logps/chosen": -59.30602264404297,
|
|
"logps/ref_chosen": -55.288482666015625,
|
|
"logps/ref_rejected": -96.15723419189453,
|
|
"logps/rejected": -102.64833068847656,
|
|
"loss": 0.8803,
|
|
"margin_dpo/margin_mean": 2.473552942276001,
|
|
"margin_dpo/margin_std": 2.830709934234619,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.47921390778533635,
|
|
"fcm_dpo/beta": 0.39385396242141724,
|
|
"fcm_dpo/delta": -0.29807132482528687,
|
|
"fcm_dpo/margin": 2.8259806632995605,
|
|
"fcm_dpo/q_t": 0.29416024684906006,
|
|
"grad_norm": 85.33952331542969,
|
|
"learning_rate": 3.1278699679526975e-07,
|
|
"logits/chosen": 0.24099749326705933,
|
|
"logits/rejected": 0.19853255152702332,
|
|
"logps/chosen": -58.54376220703125,
|
|
"logps/ref_chosen": -54.58137512207031,
|
|
"logps/ref_rejected": -72.77232360839844,
|
|
"logps/rejected": -79.56068420410156,
|
|
"loss": 0.7843,
|
|
"margin_dpo/margin_mean": 2.8259806632995605,
|
|
"margin_dpo/margin_std": 2.902916431427002,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.48072562358276644,
|
|
"fcm_dpo/beta": 0.3996548354625702,
|
|
"fcm_dpo/delta": 0.19046162068843842,
|
|
"fcm_dpo/margin": 1.683230996131897,
|
|
"fcm_dpo/q_t": 0.3898201286792755,
|
|
"grad_norm": 105.31067657470703,
|
|
"learning_rate": 3.1150628432815336e-07,
|
|
"logits/chosen": 0.25582095980644226,
|
|
"logits/rejected": 0.2070683240890503,
|
|
"logps/chosen": -57.41715621948242,
|
|
"logps/ref_chosen": -52.88822937011719,
|
|
"logps/ref_rejected": -80.63988494873047,
|
|
"logps/rejected": -86.85203552246094,
|
|
"loss": 1.2347,
|
|
"margin_dpo/margin_mean": 1.6832314729690552,
|
|
"margin_dpo/margin_std": 3.642604351043701,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.48223733938019653,
|
|
"fcm_dpo/beta": 0.3988262116909027,
|
|
"fcm_dpo/delta": -0.07955436408519745,
|
|
"fcm_dpo/margin": 2.311298370361328,
|
|
"fcm_dpo/q_t": 0.334533154964447,
|
|
"grad_norm": 92.12313079833984,
|
|
"learning_rate": 3.1022385139804707e-07,
|
|
"logits/chosen": 0.18351227045059204,
|
|
"logits/rejected": 0.1661396622657776,
|
|
"logps/chosen": -68.296630859375,
|
|
"logps/ref_chosen": -64.36333465576172,
|
|
"logps/ref_rejected": -79.47296142578125,
|
|
"logps/rejected": -85.7175521850586,
|
|
"loss": 0.9816,
|
|
"margin_dpo/margin_mean": 2.3112986087799072,
|
|
"margin_dpo/margin_std": 3.327394723892212,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.41219210624694824,
|
|
"fcm_dpo/delta": 0.10497826337814331,
|
|
"fcm_dpo/margin": 1.8063325881958008,
|
|
"fcm_dpo/q_t": 0.3807426691055298,
|
|
"grad_norm": 100.4907455444336,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 0.1191617101430893,
|
|
"logits/rejected": 0.08381935209035873,
|
|
"logps/chosen": -53.169158935546875,
|
|
"logps/ref_chosen": -49.558746337890625,
|
|
"logps/ref_rejected": -71.23444366455078,
|
|
"logps/rejected": -76.65119171142578,
|
|
"loss": 1.1398,
|
|
"margin_dpo/margin_mean": 1.8063322305679321,
|
|
"margin_dpo/margin_std": 3.381862163543701,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4852607709750567,
|
|
"fcm_dpo/beta": 0.4042707085609436,
|
|
"fcm_dpo/delta": 0.016887515783309937,
|
|
"fcm_dpo/margin": 2.0608034133911133,
|
|
"fcm_dpo/q_t": 0.3445979356765747,
|
|
"grad_norm": 84.57140350341797,
|
|
"learning_rate": 3.0765396768561004e-07,
|
|
"logits/chosen": 0.154787078499794,
|
|
"logits/rejected": 0.14245569705963135,
|
|
"logps/chosen": -56.562965393066406,
|
|
"logps/ref_chosen": -52.08526611328125,
|
|
"logps/ref_rejected": -55.58674621582031,
|
|
"logps/rejected": -62.125244140625,
|
|
"loss": 1.0386,
|
|
"margin_dpo/margin_mean": 2.0608034133911133,
|
|
"margin_dpo/margin_std": 3.0765933990478516,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.48677248677248675,
|
|
"fcm_dpo/beta": 0.3972187638282776,
|
|
"fcm_dpo/delta": -0.2467936873435974,
|
|
"fcm_dpo/margin": 2.695878267288208,
|
|
"fcm_dpo/q_t": 0.29130974411964417,
|
|
"grad_norm": 79.39057159423828,
|
|
"learning_rate": 3.063665887884511e-07,
|
|
"logits/chosen": 0.22061362862586975,
|
|
"logits/rejected": 0.15948614478111267,
|
|
"logps/chosen": -51.72674560546875,
|
|
"logps/ref_chosen": -47.404109954833984,
|
|
"logps/ref_rejected": -73.4260025024414,
|
|
"logps/rejected": -80.44451141357422,
|
|
"loss": 0.7937,
|
|
"margin_dpo/margin_mean": 2.695878267288208,
|
|
"margin_dpo/margin_std": 2.6556789875030518,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.48828420256991684,
|
|
"fcm_dpo/beta": 0.39542824029922485,
|
|
"fcm_dpo/delta": 0.02666623145341873,
|
|
"fcm_dpo/margin": 2.0780739784240723,
|
|
"fcm_dpo/q_t": 0.37355026602745056,
|
|
"grad_norm": 110.35505676269531,
|
|
"learning_rate": 3.0507763319663517e-07,
|
|
"logits/chosen": 0.17272543907165527,
|
|
"logits/rejected": 0.11414434015750885,
|
|
"logps/chosen": -74.46739196777344,
|
|
"logps/ref_chosen": -70.00630187988281,
|
|
"logps/ref_rejected": -86.96690368652344,
|
|
"logps/rejected": -93.50607299804688,
|
|
"loss": 1.1522,
|
|
"margin_dpo/margin_mean": 2.078073263168335,
|
|
"margin_dpo/margin_std": 3.899219512939453,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.4897959183673469,
|
|
"fcm_dpo/beta": 0.38537365198135376,
|
|
"fcm_dpo/delta": -0.061207324266433716,
|
|
"fcm_dpo/margin": 2.3483946323394775,
|
|
"fcm_dpo/q_t": 0.32360321283340454,
|
|
"grad_norm": 79.66868591308594,
|
|
"learning_rate": 3.0378713696502097e-07,
|
|
"logits/chosen": 0.2204309105873108,
|
|
"logits/rejected": 0.17378325760364532,
|
|
"logps/chosen": -59.973960876464844,
|
|
"logps/ref_chosen": -55.88882064819336,
|
|
"logps/ref_rejected": -75.23088073730469,
|
|
"logps/rejected": -81.66442108154297,
|
|
"loss": 0.8655,
|
|
"margin_dpo/margin_mean": 2.3483948707580566,
|
|
"margin_dpo/margin_std": 2.62096905708313,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.38281673192977905,
|
|
"fcm_dpo/delta": 0.013121634721755981,
|
|
"fcm_dpo/margin": 2.1842405796051025,
|
|
"fcm_dpo/q_t": 0.3410230278968811,
|
|
"grad_norm": 88.95374298095703,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 0.1963476836681366,
|
|
"logits/rejected": 0.14803680777549744,
|
|
"logps/chosen": -68.87274932861328,
|
|
"logps/ref_chosen": -64.14701843261719,
|
|
"logps/ref_rejected": -79.91143798828125,
|
|
"logps/rejected": -86.82140350341797,
|
|
"loss": 0.9619,
|
|
"margin_dpo/margin_mean": 2.1842405796051025,
|
|
"margin_dpo/margin_std": 2.953347682952881,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4928193499622071,
|
|
"fcm_dpo/beta": 0.40765079855918884,
|
|
"fcm_dpo/delta": 0.35959964990615845,
|
|
"fcm_dpo/margin": 1.2490381002426147,
|
|
"fcm_dpo/q_t": 0.407970130443573,
|
|
"grad_norm": 122.61116790771484,
|
|
"learning_rate": 3.012016670162977e-07,
|
|
"logits/chosen": 0.16117537021636963,
|
|
"logits/rejected": 0.1591501086950302,
|
|
"logps/chosen": -80.88282775878906,
|
|
"logps/ref_chosen": -75.53131103515625,
|
|
"logps/ref_rejected": -76.5898666381836,
|
|
"logps/rejected": -83.19041442871094,
|
|
"loss": 1.2647,
|
|
"margin_dpo/margin_mean": 1.249037742614746,
|
|
"margin_dpo/margin_std": 3.0318169593811035,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4943310657596372,
|
|
"fcm_dpo/beta": 0.41696861386299133,
|
|
"fcm_dpo/delta": -0.012611184269189835,
|
|
"fcm_dpo/margin": 2.0652732849121094,
|
|
"fcm_dpo/q_t": 0.35170531272888184,
|
|
"grad_norm": 102.3149185180664,
|
|
"learning_rate": 2.99906765620341e-07,
|
|
"logits/chosen": 0.13333944976329803,
|
|
"logits/rejected": 0.09463554620742798,
|
|
"logps/chosen": -73.87966918945312,
|
|
"logps/ref_chosen": -69.33717346191406,
|
|
"logps/ref_rejected": -73.37751770019531,
|
|
"logps/rejected": -79.98530578613281,
|
|
"loss": 1.0782,
|
|
"margin_dpo/margin_mean": 2.0652735233306885,
|
|
"margin_dpo/margin_std": 3.41621732711792,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4958427815570673,
|
|
"fcm_dpo/beta": 0.4161731004714966,
|
|
"fcm_dpo/delta": 0.029472189024090767,
|
|
"fcm_dpo/margin": 1.9774518013000488,
|
|
"fcm_dpo/q_t": 0.3513525724411011,
|
|
"grad_norm": 101.79065704345703,
|
|
"learning_rate": 2.9861046822486766e-07,
|
|
"logits/chosen": 0.16545960307121277,
|
|
"logits/rejected": 0.14433646202087402,
|
|
"logps/chosen": -65.78724670410156,
|
|
"logps/ref_chosen": -61.70623016357422,
|
|
"logps/ref_rejected": -83.73808288574219,
|
|
"logps/rejected": -89.79654693603516,
|
|
"loss": 1.0012,
|
|
"margin_dpo/margin_mean": 1.9774516820907593,
|
|
"margin_dpo/margin_std": 2.9047513008117676,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4973544973544973,
|
|
"fcm_dpo/beta": 0.4103434085845947,
|
|
"fcm_dpo/delta": -0.03602056950330734,
|
|
"fcm_dpo/margin": 2.1446993350982666,
|
|
"fcm_dpo/q_t": 0.3538691997528076,
|
|
"grad_norm": 116.51476287841797,
|
|
"learning_rate": 2.9731281109010253e-07,
|
|
"logits/chosen": 0.2215312123298645,
|
|
"logits/rejected": 0.17835497856140137,
|
|
"logps/chosen": -69.37106323242188,
|
|
"logps/ref_chosen": -64.4984130859375,
|
|
"logps/ref_rejected": -83.6591796875,
|
|
"logps/rejected": -90.676513671875,
|
|
"loss": 1.0608,
|
|
"margin_dpo/margin_mean": 2.1446990966796875,
|
|
"margin_dpo/margin_std": 3.5509347915649414,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.4071670174598694,
|
|
"fcm_dpo/delta": -0.150421604514122,
|
|
"fcm_dpo/margin": 2.4211106300354004,
|
|
"fcm_dpo/q_t": 0.32498639822006226,
|
|
"grad_norm": 86.76107025146484,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 0.22971756756305695,
|
|
"logits/rejected": 0.17051860690116882,
|
|
"logps/chosen": -58.83673095703125,
|
|
"logps/ref_chosen": -54.80464172363281,
|
|
"logps/ref_rejected": -75.3194351196289,
|
|
"logps/rejected": -81.77262878417969,
|
|
"loss": 1.0026,
|
|
"margin_dpo/margin_mean": 2.421110153198242,
|
|
"margin_dpo/margin_std": 3.4969735145568848,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5003779289493575,
|
|
"fcm_dpo/beta": 0.38293445110321045,
|
|
"fcm_dpo/delta": -0.2958265542984009,
|
|
"fcm_dpo/margin": 2.9018197059631348,
|
|
"fcm_dpo/q_t": 0.3032812178134918,
|
|
"grad_norm": 87.00528717041016,
|
|
"learning_rate": 2.947135628327544e-07,
|
|
"logits/chosen": 0.3037651777267456,
|
|
"logits/rejected": 0.2769252061843872,
|
|
"logps/chosen": -63.68206024169922,
|
|
"logps/ref_chosen": -59.242584228515625,
|
|
"logps/ref_rejected": -69.87483215332031,
|
|
"logps/rejected": -77.21614074707031,
|
|
"loss": 0.8903,
|
|
"margin_dpo/margin_mean": 2.901819944381714,
|
|
"margin_dpo/margin_std": 3.6702699661254883,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.5018896447467877,
|
|
"fcm_dpo/beta": 0.37952011823654175,
|
|
"fcm_dpo/delta": -0.07606175541877747,
|
|
"fcm_dpo/margin": 2.4156653881073,
|
|
"fcm_dpo/q_t": 0.3336995542049408,
|
|
"grad_norm": 90.73616027832031,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": 0.15260085463523865,
|
|
"logits/rejected": 0.11272098869085312,
|
|
"logps/chosen": -71.80547332763672,
|
|
"logps/ref_chosen": -67.10975646972656,
|
|
"logps/ref_rejected": -77.11839294433594,
|
|
"logps/rejected": -84.22976684570312,
|
|
"loss": 0.9245,
|
|
"margin_dpo/margin_mean": 2.4156653881073,
|
|
"margin_dpo/margin_std": 3.1381473541259766,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.5034013605442177,
|
|
"fcm_dpo/beta": 0.35634881258010864,
|
|
"fcm_dpo/delta": -0.3166324496269226,
|
|
"fcm_dpo/margin": 3.1737165451049805,
|
|
"fcm_dpo/q_t": 0.2953706979751587,
|
|
"grad_norm": 72.91551208496094,
|
|
"learning_rate": 2.921093116725076e-07,
|
|
"logits/chosen": 0.2314390391111374,
|
|
"logits/rejected": 0.17708361148834229,
|
|
"logps/chosen": -62.782108306884766,
|
|
"logps/ref_chosen": -58.381134033203125,
|
|
"logps/ref_rejected": -85.02839660644531,
|
|
"logps/rejected": -92.60308837890625,
|
|
"loss": 0.7954,
|
|
"margin_dpo/margin_mean": 3.1737163066864014,
|
|
"margin_dpo/margin_std": 3.3868823051452637,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.5049130763416477,
|
|
"fcm_dpo/beta": 0.35540711879730225,
|
|
"fcm_dpo/delta": 0.12635207176208496,
|
|
"fcm_dpo/margin": 2.0626180171966553,
|
|
"fcm_dpo/q_t": 0.36454758048057556,
|
|
"grad_norm": 94.0213623046875,
|
|
"learning_rate": 2.9080540104031484e-07,
|
|
"logits/chosen": 0.2191026508808136,
|
|
"logits/rejected": 0.18316911160945892,
|
|
"logps/chosen": -71.8309326171875,
|
|
"logps/ref_chosen": -66.89199829101562,
|
|
"logps/ref_rejected": -91.83695220947266,
|
|
"logps/rejected": -98.8385009765625,
|
|
"loss": 1.1093,
|
|
"margin_dpo/margin_mean": 2.062617778778076,
|
|
"margin_dpo/margin_std": 3.5854196548461914,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.3609894812107086,
|
|
"fcm_dpo/delta": -0.032357558608055115,
|
|
"fcm_dpo/margin": 2.4321510791778564,
|
|
"fcm_dpo/q_t": 0.34385281801223755,
|
|
"grad_norm": 80.12577056884766,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 0.2101183980703354,
|
|
"logits/rejected": 0.17928308248519897,
|
|
"logps/chosen": -66.1629638671875,
|
|
"logps/ref_chosen": -61.51445770263672,
|
|
"logps/ref_rejected": -75.68916320800781,
|
|
"logps/rejected": -82.76982879638672,
|
|
"loss": 1.0087,
|
|
"margin_dpo/margin_mean": 2.4321508407592773,
|
|
"margin_dpo/margin_std": 3.508328914642334,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"fcm_dpo/beta": 0.35370177030563354,
|
|
"fcm_dpo/delta": 0.04779374599456787,
|
|
"fcm_dpo/margin": 2.2667744159698486,
|
|
"fcm_dpo/q_t": 0.36775410175323486,
|
|
"grad_norm": 97.24869537353516,
|
|
"learning_rate": 2.8819419203668675e-07,
|
|
"logits/chosen": 0.1473027765750885,
|
|
"logits/rejected": 0.13241709768772125,
|
|
"logps/chosen": -74.49370574951172,
|
|
"logps/ref_chosen": -68.85006713867188,
|
|
"logps/ref_rejected": -92.99603271484375,
|
|
"logps/rejected": -100.90644836425781,
|
|
"loss": 1.0564,
|
|
"margin_dpo/margin_mean": 2.2667746543884277,
|
|
"margin_dpo/margin_std": 3.6942431926727295,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.509448223733938,
|
|
"fcm_dpo/beta": 0.3663819432258606,
|
|
"fcm_dpo/delta": 0.12714561820030212,
|
|
"fcm_dpo/margin": 1.9988073110580444,
|
|
"fcm_dpo/q_t": 0.36406582593917847,
|
|
"grad_norm": 92.78190612792969,
|
|
"learning_rate": 2.8688696670638053e-07,
|
|
"logits/chosen": 0.11123036593198776,
|
|
"logits/rejected": 0.08163227140903473,
|
|
"logps/chosen": -78.41031646728516,
|
|
"logps/ref_chosen": -73.18783569335938,
|
|
"logps/ref_rejected": -86.89118957519531,
|
|
"logps/rejected": -94.11248779296875,
|
|
"loss": 1.0628,
|
|
"margin_dpo/margin_mean": 1.9988073110580444,
|
|
"margin_dpo/margin_std": 3.276918649673462,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.5109599395313681,
|
|
"fcm_dpo/beta": 0.3737809658050537,
|
|
"fcm_dpo/delta": 0.052867673337459564,
|
|
"fcm_dpo/margin": 2.1428439617156982,
|
|
"fcm_dpo/q_t": 0.3521310091018677,
|
|
"grad_norm": 91.3255386352539,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 0.16858114302158356,
|
|
"logits/rejected": 0.1449778825044632,
|
|
"logps/chosen": -69.2572021484375,
|
|
"logps/ref_chosen": -63.939613342285156,
|
|
"logps/ref_rejected": -75.34243774414062,
|
|
"logps/rejected": -82.80287170410156,
|
|
"loss": 1.0087,
|
|
"margin_dpo/margin_mean": 2.14284348487854,
|
|
"margin_dpo/margin_std": 3.1413938999176025,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.5124716553287982,
|
|
"fcm_dpo/beta": 0.368240624666214,
|
|
"fcm_dpo/delta": -0.04275989532470703,
|
|
"fcm_dpo/margin": 2.410059690475464,
|
|
"fcm_dpo/q_t": 0.3387882709503174,
|
|
"grad_norm": 81.14483642578125,
|
|
"learning_rate": 2.842694572172736e-07,
|
|
"logits/chosen": 0.2619893550872803,
|
|
"logits/rejected": 0.19453716278076172,
|
|
"logps/chosen": -50.87765884399414,
|
|
"logps/ref_chosen": -45.54913330078125,
|
|
"logps/ref_rejected": -67.0482177734375,
|
|
"logps/rejected": -74.78680419921875,
|
|
"loss": 0.9471,
|
|
"margin_dpo/margin_mean": 2.4100594520568848,
|
|
"margin_dpo/margin_std": 3.20064115524292,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.3670642673969269,
|
|
"fcm_dpo/delta": -0.1399812549352646,
|
|
"fcm_dpo/margin": 2.6568050384521484,
|
|
"fcm_dpo/q_t": 0.3358362317085266,
|
|
"grad_norm": 76.66029357910156,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 0.18538600206375122,
|
|
"logits/rejected": 0.16327080130577087,
|
|
"logps/chosen": -59.5405158996582,
|
|
"logps/ref_chosen": -54.00564956665039,
|
|
"logps/ref_rejected": -61.314430236816406,
|
|
"logps/rejected": -69.506103515625,
|
|
"loss": 1.0199,
|
|
"margin_dpo/margin_mean": 2.6568050384521484,
|
|
"margin_dpo/margin_std": 3.9218854904174805,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5154950869236583,
|
|
"fcm_dpo/beta": 0.34759050607681274,
|
|
"fcm_dpo/delta": -0.07815767824649811,
|
|
"fcm_dpo/margin": 2.6133947372436523,
|
|
"fcm_dpo/q_t": 0.33973705768585205,
|
|
"grad_norm": 102.14833068847656,
|
|
"learning_rate": 2.816481133934373e-07,
|
|
"logits/chosen": 0.23524977266788483,
|
|
"logits/rejected": 0.19626188278198242,
|
|
"logps/chosen": -68.50048828125,
|
|
"logps/ref_chosen": -63.39509582519531,
|
|
"logps/ref_rejected": -76.20973205566406,
|
|
"logps/rejected": -83.92851257324219,
|
|
"loss": 1.0228,
|
|
"margin_dpo/margin_mean": 2.6133944988250732,
|
|
"margin_dpo/margin_std": 3.6573469638824463,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5170068027210885,
|
|
"fcm_dpo/beta": 0.3411981463432312,
|
|
"fcm_dpo/delta": -0.22494906187057495,
|
|
"fcm_dpo/margin": 3.076798677444458,
|
|
"fcm_dpo/q_t": 0.3198407292366028,
|
|
"grad_norm": 65.66190338134766,
|
|
"learning_rate": 2.8033609524527046e-07,
|
|
"logits/chosen": 0.20193436741828918,
|
|
"logits/rejected": 0.16605061292648315,
|
|
"logps/chosen": -58.59202575683594,
|
|
"logps/ref_chosen": -53.047813415527344,
|
|
"logps/ref_rejected": -68.2854232788086,
|
|
"logps/rejected": -76.90643310546875,
|
|
"loss": 0.8953,
|
|
"margin_dpo/margin_mean": 3.076798915863037,
|
|
"margin_dpo/margin_std": 3.8478684425354004,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5185185185185185,
|
|
"fcm_dpo/beta": 0.3504355549812317,
|
|
"fcm_dpo/delta": 0.31661123037338257,
|
|
"fcm_dpo/margin": 1.5729039907455444,
|
|
"fcm_dpo/q_t": 0.3953471779823303,
|
|
"grad_norm": 93.7979965209961,
|
|
"learning_rate": 2.7902322853130753e-07,
|
|
"logits/chosen": 0.14726698398590088,
|
|
"logits/rejected": 0.14337411522865295,
|
|
"logps/chosen": -75.63270568847656,
|
|
"logps/ref_chosen": -70.57852935791016,
|
|
"logps/ref_rejected": -84.73873901367188,
|
|
"logps/rejected": -91.36581420898438,
|
|
"loss": 1.2176,
|
|
"margin_dpo/margin_mean": 1.5729038715362549,
|
|
"margin_dpo/margin_std": 3.362330198287964,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5200302343159486,
|
|
"fcm_dpo/beta": 0.35818302631378174,
|
|
"fcm_dpo/delta": -0.029689103364944458,
|
|
"fcm_dpo/margin": 2.4477405548095703,
|
|
"fcm_dpo/q_t": 0.33762115240097046,
|
|
"grad_norm": 91.70993041992188,
|
|
"learning_rate": 2.7770954997525274e-07,
|
|
"logits/chosen": 0.201224684715271,
|
|
"logits/rejected": 0.1562662124633789,
|
|
"logps/chosen": -61.6827392578125,
|
|
"logps/ref_chosen": -55.811004638671875,
|
|
"logps/ref_rejected": -84.77637481689453,
|
|
"logps/rejected": -93.0958480834961,
|
|
"loss": 0.9849,
|
|
"margin_dpo/margin_mean": 2.447740077972412,
|
|
"margin_dpo/margin_std": 3.4348011016845703,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.3570266366004944,
|
|
"fcm_dpo/delta": -0.005030490458011627,
|
|
"fcm_dpo/margin": 2.3924896717071533,
|
|
"fcm_dpo/q_t": 0.3465713858604431,
|
|
"grad_norm": 74.15127563476562,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 0.2622082233428955,
|
|
"logits/rejected": 0.22717997431755066,
|
|
"logps/chosen": -62.51710510253906,
|
|
"logps/ref_chosen": -57.78609848022461,
|
|
"logps/ref_rejected": -78.91847229003906,
|
|
"logps/rejected": -86.0419692993164,
|
|
"loss": 0.9996,
|
|
"margin_dpo/margin_mean": 2.3924896717071533,
|
|
"margin_dpo/margin_std": 3.526125431060791,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5230536659108088,
|
|
"fcm_dpo/beta": 0.3490391969680786,
|
|
"fcm_dpo/delta": -0.2172488123178482,
|
|
"fcm_dpo/margin": 2.994063377380371,
|
|
"fcm_dpo/q_t": 0.315748393535614,
|
|
"grad_norm": 82.33354187011719,
|
|
"learning_rate": 2.7507990434420123e-07,
|
|
"logits/chosen": 0.27292877435684204,
|
|
"logits/rejected": 0.2155582308769226,
|
|
"logps/chosen": -61.15895080566406,
|
|
"logps/ref_chosen": -56.285125732421875,
|
|
"logps/ref_rejected": -91.15303039550781,
|
|
"logps/rejected": -99.02091979980469,
|
|
"loss": 0.9019,
|
|
"margin_dpo/margin_mean": 2.994063377380371,
|
|
"margin_dpo/margin_std": 3.723098039627075,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5245653817082389,
|
|
"fcm_dpo/beta": 0.343899130821228,
|
|
"fcm_dpo/delta": 0.03331548720598221,
|
|
"fcm_dpo/margin": 2.382420539855957,
|
|
"fcm_dpo/q_t": 0.3580579161643982,
|
|
"grad_norm": 79.35133361816406,
|
|
"learning_rate": 2.737640108260456e-07,
|
|
"logits/chosen": 0.3136757016181946,
|
|
"logits/rejected": 0.2722731828689575,
|
|
"logps/chosen": -59.121246337890625,
|
|
"logps/ref_chosen": -53.499542236328125,
|
|
"logps/ref_rejected": -72.52565002441406,
|
|
"logps/rejected": -80.52978515625,
|
|
"loss": 1.0411,
|
|
"margin_dpo/margin_mean": 2.382420539855957,
|
|
"margin_dpo/margin_std": 3.8586859703063965,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5260770975056689,
|
|
"fcm_dpo/beta": 0.32882171869277954,
|
|
"fcm_dpo/delta": -0.2895653247833252,
|
|
"fcm_dpo/margin": 3.359653949737549,
|
|
"fcm_dpo/q_t": 0.3357928991317749,
|
|
"grad_norm": 68.9252700805664,
|
|
"learning_rate": 2.724474525774229e-07,
|
|
"logits/chosen": 0.31114572286605835,
|
|
"logits/rejected": 0.28537189960479736,
|
|
"logps/chosen": -55.660499572753906,
|
|
"logps/ref_chosen": -50.78684997558594,
|
|
"logps/ref_rejected": -68.63732147216797,
|
|
"logps/rejected": -76.87062072753906,
|
|
"loss": 0.9576,
|
|
"margin_dpo/margin_mean": 3.3596534729003906,
|
|
"margin_dpo/margin_std": 4.968472480773926,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.527588813303099,
|
|
"fcm_dpo/beta": 0.32099393010139465,
|
|
"fcm_dpo/delta": -0.10274302214384079,
|
|
"fcm_dpo/margin": 2.9376416206359863,
|
|
"fcm_dpo/q_t": 0.3241846561431885,
|
|
"grad_norm": 76.00582122802734,
|
|
"learning_rate": 2.711302664252973e-07,
|
|
"logits/chosen": 0.2401653528213501,
|
|
"logits/rejected": 0.17262977361679077,
|
|
"logps/chosen": -58.41884231567383,
|
|
"logps/ref_chosen": -53.325008392333984,
|
|
"logps/ref_rejected": -83.21236419677734,
|
|
"logps/rejected": -91.24383544921875,
|
|
"loss": 0.9257,
|
|
"margin_dpo/margin_mean": 2.9376418590545654,
|
|
"margin_dpo/margin_std": 3.7340314388275146,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.3053371012210846,
|
|
"fcm_dpo/delta": -0.2579427659511566,
|
|
"fcm_dpo/margin": 3.528461217880249,
|
|
"fcm_dpo/q_t": 0.307132363319397,
|
|
"grad_norm": 78.71974182128906,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 0.18329796195030212,
|
|
"logits/rejected": 0.1252821534872055,
|
|
"logps/chosen": -67.06321716308594,
|
|
"logps/ref_chosen": -61.625770568847656,
|
|
"logps/ref_rejected": -87.63627624511719,
|
|
"logps/rejected": -96.6021728515625,
|
|
"loss": 0.851,
|
|
"margin_dpo/margin_mean": 3.5284602642059326,
|
|
"margin_dpo/margin_std": 4.088956832885742,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5306122448979592,
|
|
"fcm_dpo/beta": 0.304359495639801,
|
|
"fcm_dpo/delta": 0.031957440078258514,
|
|
"fcm_dpo/margin": 2.6963400840759277,
|
|
"fcm_dpo/q_t": 0.3338220417499542,
|
|
"grad_norm": 68.61235046386719,
|
|
"learning_rate": 2.6849415780518357e-07,
|
|
"logits/chosen": 0.15642112493515015,
|
|
"logits/rejected": 0.09655676037073135,
|
|
"logps/chosen": -61.657840728759766,
|
|
"logps/ref_chosen": -56.2563362121582,
|
|
"logps/ref_rejected": -79.11589813232422,
|
|
"logps/rejected": -87.2137451171875,
|
|
"loss": 1.0152,
|
|
"margin_dpo/margin_mean": 2.696340322494507,
|
|
"margin_dpo/margin_std": 3.8406622409820557,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5321239606953893,
|
|
"fcm_dpo/beta": 0.3055855333805084,
|
|
"fcm_dpo/delta": 0.049372974783182144,
|
|
"fcm_dpo/margin": 2.6310970783233643,
|
|
"fcm_dpo/q_t": 0.3505841791629791,
|
|
"grad_norm": 74.63536071777344,
|
|
"learning_rate": 2.6717530907482024e-07,
|
|
"logits/chosen": 0.2353535145521164,
|
|
"logits/rejected": 0.19251593947410583,
|
|
"logps/chosen": -68.26545715332031,
|
|
"logps/ref_chosen": -63.05195236206055,
|
|
"logps/ref_rejected": -85.52035522460938,
|
|
"logps/rejected": -93.36494445800781,
|
|
"loss": 0.9607,
|
|
"margin_dpo/margin_mean": 2.6310970783233643,
|
|
"margin_dpo/margin_std": 3.5701792240142822,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5336356764928194,
|
|
"fcm_dpo/beta": 0.31123608350753784,
|
|
"fcm_dpo/delta": 0.0005646422505378723,
|
|
"fcm_dpo/margin": 2.724490165710449,
|
|
"fcm_dpo/q_t": 0.34112924337387085,
|
|
"grad_norm": 71.16338348388672,
|
|
"learning_rate": 2.658559799141411e-07,
|
|
"logits/chosen": 0.2434076964855194,
|
|
"logits/rejected": 0.24078680574893951,
|
|
"logps/chosen": -74.14156341552734,
|
|
"logps/ref_chosen": -69.00918579101562,
|
|
"logps/ref_rejected": -72.65840148925781,
|
|
"logps/rejected": -80.51527404785156,
|
|
"loss": 0.9975,
|
|
"margin_dpo/margin_mean": 2.724489450454712,
|
|
"margin_dpo/margin_std": 3.953610420227051,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5351473922902494,
|
|
"fcm_dpo/beta": 0.301596999168396,
|
|
"fcm_dpo/delta": -0.08267806470394135,
|
|
"fcm_dpo/margin": 3.0578184127807617,
|
|
"fcm_dpo/q_t": 0.32855403423309326,
|
|
"grad_norm": 85.67142486572266,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"logits/chosen": 0.26027190685272217,
|
|
"logits/rejected": 0.15137949585914612,
|
|
"logps/chosen": -45.0567626953125,
|
|
"logps/ref_chosen": -39.78833770751953,
|
|
"logps/ref_rejected": -69.56885528564453,
|
|
"logps/rejected": -77.89509582519531,
|
|
"loss": 0.9408,
|
|
"margin_dpo/margin_mean": 3.0578184127807617,
|
|
"margin_dpo/margin_std": 3.9154303073883057,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.3026568293571472,
|
|
"fcm_dpo/delta": -0.0332593210041523,
|
|
"fcm_dpo/margin": 2.908555030822754,
|
|
"fcm_dpo/q_t": 0.3385379910469055,
|
|
"grad_norm": 74.2393569946289,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 0.21670889854431152,
|
|
"logits/rejected": 0.12240596115589142,
|
|
"logps/chosen": -51.702720642089844,
|
|
"logps/ref_chosen": -46.25537872314453,
|
|
"logps/ref_rejected": -78.20236206054688,
|
|
"logps/rejected": -86.55825805664062,
|
|
"loss": 0.9828,
|
|
"margin_dpo/margin_mean": 2.908555507659912,
|
|
"margin_dpo/margin_std": 4.00840950012207,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5381708238851096,
|
|
"fcm_dpo/beta": 0.3031303584575653,
|
|
"fcm_dpo/delta": 0.06947789341211319,
|
|
"fcm_dpo/margin": 2.592515230178833,
|
|
"fcm_dpo/q_t": 0.35653093457221985,
|
|
"grad_norm": 73.01097106933594,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 0.21377143263816833,
|
|
"logits/rejected": 0.15734095871448517,
|
|
"logps/chosen": -53.23834228515625,
|
|
"logps/ref_chosen": -47.906158447265625,
|
|
"logps/ref_rejected": -74.29397583007812,
|
|
"logps/rejected": -82.21867370605469,
|
|
"loss": 1.1381,
|
|
"margin_dpo/margin_mean": 2.592514991760254,
|
|
"margin_dpo/margin_std": 4.66711950302124,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"fcm_dpo/beta": 0.3081361651420593,
|
|
"fcm_dpo/delta": 0.1387558877468109,
|
|
"fcm_dpo/margin": 2.3360090255737305,
|
|
"fcm_dpo/q_t": 0.3646143972873688,
|
|
"grad_norm": 81.77513885498047,
|
|
"learning_rate": 2.6057459723762076e-07,
|
|
"logits/chosen": 0.23374928534030914,
|
|
"logits/rejected": 0.20612743496894836,
|
|
"logps/chosen": -68.37667083740234,
|
|
"logps/ref_chosen": -62.63500213623047,
|
|
"logps/ref_rejected": -65.11399841308594,
|
|
"logps/rejected": -73.19168853759766,
|
|
"loss": 1.087,
|
|
"margin_dpo/margin_mean": 2.3360087871551514,
|
|
"margin_dpo/margin_std": 3.8651585578918457,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5411942554799698,
|
|
"fcm_dpo/beta": 0.30569684505462646,
|
|
"fcm_dpo/delta": -0.21745963394641876,
|
|
"fcm_dpo/margin": 3.4191436767578125,
|
|
"fcm_dpo/q_t": 0.30736494064331055,
|
|
"grad_norm": 61.0597038269043,
|
|
"learning_rate": 2.5925341972508954e-07,
|
|
"logits/chosen": 0.17847847938537598,
|
|
"logits/rejected": 0.17569740116596222,
|
|
"logps/chosen": -72.6213150024414,
|
|
"logps/ref_chosen": -67.20960998535156,
|
|
"logps/ref_rejected": -69.34715270996094,
|
|
"logps/rejected": -78.17800903320312,
|
|
"loss": 0.852,
|
|
"margin_dpo/margin_mean": 3.41914439201355,
|
|
"margin_dpo/margin_std": 3.8466320037841797,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.5427059712773998,
|
|
"fcm_dpo/beta": 0.3135707676410675,
|
|
"fcm_dpo/delta": 0.16817688941955566,
|
|
"fcm_dpo/margin": 2.1862258911132812,
|
|
"fcm_dpo/q_t": 0.37526875734329224,
|
|
"grad_norm": 81.7992172241211,
|
|
"learning_rate": 2.579319833745169e-07,
|
|
"logits/chosen": 0.2080976814031601,
|
|
"logits/rejected": 0.18186524510383606,
|
|
"logps/chosen": -68.27560424804688,
|
|
"logps/ref_chosen": -62.52578353881836,
|
|
"logps/ref_rejected": -76.63114929199219,
|
|
"logps/rejected": -84.56719970703125,
|
|
"loss": 1.1152,
|
|
"margin_dpo/margin_mean": 2.186225652694702,
|
|
"margin_dpo/margin_std": 3.738008975982666,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.3083285391330719,
|
|
"fcm_dpo/delta": -0.04855549335479736,
|
|
"fcm_dpo/margin": 2.899702310562134,
|
|
"fcm_dpo/q_t": 0.3426200747489929,
|
|
"grad_norm": 78.34294891357422,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 0.16897408664226532,
|
|
"logits/rejected": 0.09226278215646744,
|
|
"logps/chosen": -69.31376647949219,
|
|
"logps/ref_chosen": -63.48772048950195,
|
|
"logps/ref_rejected": -90.6891098022461,
|
|
"logps/rejected": -99.41486358642578,
|
|
"loss": 0.9654,
|
|
"margin_dpo/margin_mean": 2.899702548980713,
|
|
"margin_dpo/margin_std": 4.131417274475098,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.54572940287226,
|
|
"fcm_dpo/beta": 0.3021819591522217,
|
|
"fcm_dpo/delta": -0.10609018802642822,
|
|
"fcm_dpo/margin": 3.1286063194274902,
|
|
"fcm_dpo/q_t": 0.3255624771118164,
|
|
"grad_norm": 71.85318756103516,
|
|
"learning_rate": 2.552884820191154e-07,
|
|
"logits/chosen": 0.2620081901550293,
|
|
"logits/rejected": 0.21690014004707336,
|
|
"logps/chosen": -63.33432388305664,
|
|
"logps/ref_chosen": -57.917144775390625,
|
|
"logps/ref_rejected": -72.39089965820312,
|
|
"logps/rejected": -80.93669128417969,
|
|
"loss": 0.929,
|
|
"margin_dpo/margin_mean": 3.1286067962646484,
|
|
"margin_dpo/margin_std": 4.0841383934021,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.54724111866969,
|
|
"fcm_dpo/beta": 0.30271005630493164,
|
|
"fcm_dpo/delta": -0.09775380790233612,
|
|
"fcm_dpo/margin": 3.0863213539123535,
|
|
"fcm_dpo/q_t": 0.32780078053474426,
|
|
"grad_norm": 71.3369369506836,
|
|
"learning_rate": 2.53966490958702e-07,
|
|
"logits/chosen": 0.27897951006889343,
|
|
"logits/rejected": 0.2024056762456894,
|
|
"logps/chosen": -68.86507415771484,
|
|
"logps/ref_chosen": -63.4434700012207,
|
|
"logps/ref_rejected": -103.45516967773438,
|
|
"logps/rejected": -111.96309661865234,
|
|
"loss": 0.9063,
|
|
"margin_dpo/margin_mean": 3.0863213539123535,
|
|
"margin_dpo/margin_std": 3.5979232788085938,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5487528344671202,
|
|
"fcm_dpo/beta": 0.29156380891799927,
|
|
"fcm_dpo/delta": -0.09345076978206635,
|
|
"fcm_dpo/margin": 3.205629587173462,
|
|
"fcm_dpo/q_t": 0.3229590952396393,
|
|
"grad_norm": 65.02751922607422,
|
|
"learning_rate": 2.526443889470099e-07,
|
|
"logits/chosen": 0.25476565957069397,
|
|
"logits/rejected": 0.1603575050830841,
|
|
"logps/chosen": -55.03227996826172,
|
|
"logps/ref_chosen": -48.65182876586914,
|
|
"logps/ref_rejected": -88.65904235839844,
|
|
"logps/rejected": -98.24512481689453,
|
|
"loss": 0.9196,
|
|
"margin_dpo/margin_mean": 3.205629348754883,
|
|
"margin_dpo/margin_std": 4.075560569763184,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5502645502645502,
|
|
"fcm_dpo/beta": 0.285260945558548,
|
|
"fcm_dpo/delta": -0.11832509934902191,
|
|
"fcm_dpo/margin": 3.354739189147949,
|
|
"fcm_dpo/q_t": 0.33564698696136475,
|
|
"grad_norm": 63.04609680175781,
|
|
"learning_rate": 2.513222129660744e-07,
|
|
"logits/chosen": 0.1435980498790741,
|
|
"logits/rejected": 0.06505458801984787,
|
|
"logps/chosen": -63.30625915527344,
|
|
"logps/ref_chosen": -57.87107467651367,
|
|
"logps/ref_rejected": -80.95503234863281,
|
|
"logps/rejected": -89.74494934082031,
|
|
"loss": 1.0008,
|
|
"margin_dpo/margin_mean": 3.3547396659851074,
|
|
"margin_dpo/margin_std": 4.903078079223633,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.2818984389305115,
|
|
"fcm_dpo/delta": -0.03541882336139679,
|
|
"fcm_dpo/margin": 3.1297309398651123,
|
|
"fcm_dpo/q_t": 0.3227863907814026,
|
|
"grad_norm": 58.35200119018555,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.26480984687805176,
|
|
"logits/rejected": 0.2539828419685364,
|
|
"logps/chosen": -69.91170501708984,
|
|
"logps/ref_chosen": -64.94217681884766,
|
|
"logps/ref_rejected": -74.8599853515625,
|
|
"logps/rejected": -82.95924377441406,
|
|
"loss": 0.8809,
|
|
"margin_dpo/margin_mean": 3.1297316551208496,
|
|
"margin_dpo/margin_std": 3.41985821723938,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5532879818594104,
|
|
"fcm_dpo/beta": 0.28988415002822876,
|
|
"fcm_dpo/delta": 0.10946245491504669,
|
|
"fcm_dpo/margin": 2.567075729370117,
|
|
"fcm_dpo/q_t": 0.36410263180732727,
|
|
"grad_norm": 70.50802612304688,
|
|
"learning_rate": 2.486777870339255e-07,
|
|
"logits/chosen": 0.1697695553302765,
|
|
"logits/rejected": 0.150035560131073,
|
|
"logps/chosen": -59.808685302734375,
|
|
"logps/ref_chosen": -55.16598129272461,
|
|
"logps/ref_rejected": -65.26121520996094,
|
|
"logps/rejected": -72.47099304199219,
|
|
"loss": 1.1025,
|
|
"margin_dpo/margin_mean": 2.5670762062072754,
|
|
"margin_dpo/margin_std": 4.370789527893066,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5547996976568406,
|
|
"fcm_dpo/beta": 0.29406261444091797,
|
|
"fcm_dpo/delta": 0.1747613400220871,
|
|
"fcm_dpo/margin": 2.338266372680664,
|
|
"fcm_dpo/q_t": 0.35922878980636597,
|
|
"grad_norm": 66.86473083496094,
|
|
"learning_rate": 2.4735561105299014e-07,
|
|
"logits/chosen": 0.2019767463207245,
|
|
"logits/rejected": 0.1173165887594223,
|
|
"logps/chosen": -61.685611724853516,
|
|
"logps/ref_chosen": -56.01046371459961,
|
|
"logps/ref_rejected": -77.31010437011719,
|
|
"logps/rejected": -85.32351684570312,
|
|
"loss": 1.0202,
|
|
"margin_dpo/margin_mean": 2.338266611099243,
|
|
"margin_dpo/margin_std": 3.347780227661133,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.5563114134542706,
|
|
"fcm_dpo/beta": 0.30230095982551575,
|
|
"fcm_dpo/delta": 0.11584046483039856,
|
|
"fcm_dpo/margin": 2.457531452178955,
|
|
"fcm_dpo/q_t": 0.36515921354293823,
|
|
"grad_norm": 76.8958969116211,
|
|
"learning_rate": 2.46033509041298e-07,
|
|
"logits/chosen": 0.10607978701591492,
|
|
"logits/rejected": 0.09705992043018341,
|
|
"logps/chosen": -80.48188781738281,
|
|
"logps/ref_chosen": -74.82927703857422,
|
|
"logps/ref_rejected": -76.11680603027344,
|
|
"logps/rejected": -84.2269515991211,
|
|
"loss": 1.0747,
|
|
"margin_dpo/margin_mean": 2.4575319290161133,
|
|
"margin_dpo/margin_std": 4.010162830352783,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5578231292517006,
|
|
"fcm_dpo/beta": 0.31162023544311523,
|
|
"fcm_dpo/delta": 0.09726077318191528,
|
|
"fcm_dpo/margin": 2.434133529663086,
|
|
"fcm_dpo/q_t": 0.35368454456329346,
|
|
"grad_norm": 68.05358123779297,
|
|
"learning_rate": 2.447115179808846e-07,
|
|
"logits/chosen": 0.19629350304603577,
|
|
"logits/rejected": 0.15964928269386292,
|
|
"logps/chosen": -63.93141174316406,
|
|
"logps/ref_chosen": -58.32621765136719,
|
|
"logps/ref_rejected": -80.92183685302734,
|
|
"logps/rejected": -88.96116638183594,
|
|
"loss": 1.0539,
|
|
"margin_dpo/margin_mean": 2.4341330528259277,
|
|
"margin_dpo/margin_std": 3.771906852722168,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.30336689949035645,
|
|
"fcm_dpo/delta": -0.14237123727798462,
|
|
"fcm_dpo/margin": 3.222742795944214,
|
|
"fcm_dpo/q_t": 0.3289080858230591,
|
|
"grad_norm": 71.9991226196289,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 0.2874883711338043,
|
|
"logits/rejected": 0.23224374651908875,
|
|
"logps/chosen": -58.521183013916016,
|
|
"logps/ref_chosen": -52.88372039794922,
|
|
"logps/ref_rejected": -79.43692016601562,
|
|
"logps/rejected": -88.297119140625,
|
|
"loss": 1.0348,
|
|
"margin_dpo/margin_mean": 3.222743034362793,
|
|
"margin_dpo/margin_std": 4.919537544250488,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5608465608465608,
|
|
"fcm_dpo/beta": 0.30383872985839844,
|
|
"fcm_dpo/delta": -0.03940815478563309,
|
|
"fcm_dpo/margin": 2.9095592498779297,
|
|
"fcm_dpo/q_t": 0.34773844480514526,
|
|
"grad_norm": 68.0054702758789,
|
|
"learning_rate": 2.420680166254831e-07,
|
|
"logits/chosen": 0.31215453147888184,
|
|
"logits/rejected": 0.28234991431236267,
|
|
"logps/chosen": -54.938941955566406,
|
|
"logps/ref_chosen": -49.224212646484375,
|
|
"logps/ref_rejected": -63.348472595214844,
|
|
"logps/rejected": -71.97276306152344,
|
|
"loss": 1.06,
|
|
"margin_dpo/margin_mean": 2.909559488296509,
|
|
"margin_dpo/margin_std": 4.57585334777832,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.562358276643991,
|
|
"fcm_dpo/beta": 0.31304311752319336,
|
|
"fcm_dpo/delta": 0.1990874856710434,
|
|
"fcm_dpo/margin": 2.10213565826416,
|
|
"fcm_dpo/q_t": 0.3859516382217407,
|
|
"grad_norm": 79.47772216796875,
|
|
"learning_rate": 2.4074658027491044e-07,
|
|
"logits/chosen": 0.24154727160930634,
|
|
"logits/rejected": 0.17342635989189148,
|
|
"logps/chosen": -58.218475341796875,
|
|
"logps/ref_chosen": -52.269554138183594,
|
|
"logps/ref_rejected": -72.99522399902344,
|
|
"logps/rejected": -81.0462875366211,
|
|
"loss": 1.2735,
|
|
"margin_dpo/margin_mean": 2.102135419845581,
|
|
"margin_dpo/margin_std": 4.756669998168945,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.563869992441421,
|
|
"fcm_dpo/beta": 0.3137848973274231,
|
|
"fcm_dpo/delta": 0.0420655757188797,
|
|
"fcm_dpo/margin": 2.5857388973236084,
|
|
"fcm_dpo/q_t": 0.36145395040512085,
|
|
"grad_norm": 99.41732788085938,
|
|
"learning_rate": 2.394254027623792e-07,
|
|
"logits/chosen": 0.2519097924232483,
|
|
"logits/rejected": 0.1964668333530426,
|
|
"logps/chosen": -67.56515502929688,
|
|
"logps/ref_chosen": -61.112998962402344,
|
|
"logps/ref_rejected": -76.24851989746094,
|
|
"logps/rejected": -85.28641510009766,
|
|
"loss": 1.1351,
|
|
"margin_dpo/margin_mean": 2.5857386589050293,
|
|
"margin_dpo/margin_std": 4.668540000915527,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5653817082388511,
|
|
"fcm_dpo/beta": 0.2995087504386902,
|
|
"fcm_dpo/delta": -0.29449182748794556,
|
|
"fcm_dpo/margin": 3.7042269706726074,
|
|
"fcm_dpo/q_t": 0.2900841534137726,
|
|
"grad_norm": 73.12067413330078,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 0.15426763892173767,
|
|
"logits/rejected": 0.1400105059146881,
|
|
"logps/chosen": -78.16365051269531,
|
|
"logps/ref_chosen": -72.66920471191406,
|
|
"logps/ref_rejected": -76.83158874511719,
|
|
"logps/rejected": -86.03025817871094,
|
|
"loss": 0.7882,
|
|
"margin_dpo/margin_mean": 3.7042269706726074,
|
|
"margin_dpo/margin_std": 3.675267457962036,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.30636027455329895,
|
|
"fcm_dpo/delta": 0.21007570624351501,
|
|
"fcm_dpo/margin": 2.1330573558807373,
|
|
"fcm_dpo/q_t": 0.38012027740478516,
|
|
"grad_norm": 88.26660919189453,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 0.2568998336791992,
|
|
"logits/rejected": 0.21344329416751862,
|
|
"logps/chosen": -63.1327018737793,
|
|
"logps/ref_chosen": -57.68330383300781,
|
|
"logps/ref_rejected": -79.34097290039062,
|
|
"logps/rejected": -86.92343139648438,
|
|
"loss": 1.1583,
|
|
"margin_dpo/margin_mean": 2.133056402206421,
|
|
"margin_dpo/margin_std": 4.111923694610596,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5684051398337112,
|
|
"fcm_dpo/beta": 0.29211366176605225,
|
|
"fcm_dpo/delta": -0.32011279463768005,
|
|
"fcm_dpo/margin": 3.864445924758911,
|
|
"fcm_dpo/q_t": 0.3048480749130249,
|
|
"grad_norm": 64.11585998535156,
|
|
"learning_rate": 2.3546379277238103e-07,
|
|
"logits/chosen": 0.2729426622390747,
|
|
"logits/rejected": 0.2218175083398819,
|
|
"logps/chosen": -57.24680709838867,
|
|
"logps/ref_chosen": -51.674072265625,
|
|
"logps/ref_rejected": -75.69713592529297,
|
|
"logps/rejected": -85.13431549072266,
|
|
"loss": 0.891,
|
|
"margin_dpo/margin_mean": 3.8644466400146484,
|
|
"margin_dpo/margin_std": 4.771137237548828,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.5699168556311414,
|
|
"fcm_dpo/beta": 0.2949063181877136,
|
|
"fcm_dpo/delta": 0.06371963024139404,
|
|
"fcm_dpo/margin": 2.6792593002319336,
|
|
"fcm_dpo/q_t": 0.3503820598125458,
|
|
"grad_norm": 61.93028259277344,
|
|
"learning_rate": 2.3414402008585886e-07,
|
|
"logits/chosen": 0.1965014487504959,
|
|
"logits/rejected": 0.17393234372138977,
|
|
"logps/chosen": -52.405757904052734,
|
|
"logps/ref_chosen": -46.17853546142578,
|
|
"logps/ref_rejected": -57.756500244140625,
|
|
"logps/rejected": -66.66297912597656,
|
|
"loss": 1.0073,
|
|
"margin_dpo/margin_mean": 2.6792590618133545,
|
|
"margin_dpo/margin_std": 3.86149263381958,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"fcm_dpo/beta": 0.30152004957199097,
|
|
"fcm_dpo/delta": 0.09856449067592621,
|
|
"fcm_dpo/margin": 2.506265163421631,
|
|
"fcm_dpo/q_t": 0.3678427040576935,
|
|
"grad_norm": 75.0130386352539,
|
|
"learning_rate": 2.3282469092517977e-07,
|
|
"logits/chosen": 0.2718163728713989,
|
|
"logits/rejected": 0.2303917407989502,
|
|
"logps/chosen": -65.19330596923828,
|
|
"logps/ref_chosen": -59.21887969970703,
|
|
"logps/ref_rejected": -71.24818420410156,
|
|
"logps/rejected": -79.7288818359375,
|
|
"loss": 1.0841,
|
|
"margin_dpo/margin_mean": 2.5062649250030518,
|
|
"margin_dpo/margin_std": 4.203706741333008,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5729402872260015,
|
|
"fcm_dpo/beta": 0.29446160793304443,
|
|
"fcm_dpo/delta": -0.12026385217905045,
|
|
"fcm_dpo/margin": 3.2557544708251953,
|
|
"fcm_dpo/q_t": 0.3261454403400421,
|
|
"grad_norm": 81.48579406738281,
|
|
"learning_rate": 2.3150584219481643e-07,
|
|
"logits/chosen": 0.27448570728302,
|
|
"logits/rejected": 0.22502100467681885,
|
|
"logps/chosen": -81.78964233398438,
|
|
"logps/ref_chosen": -76.31658935546875,
|
|
"logps/ref_rejected": -104.26200103759766,
|
|
"logps/rejected": -112.99081420898438,
|
|
"loss": 0.9215,
|
|
"margin_dpo/margin_mean": 3.2557549476623535,
|
|
"margin_dpo/margin_std": 4.196225166320801,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.28408801555633545,
|
|
"fcm_dpo/delta": -0.21791377663612366,
|
|
"fcm_dpo/margin": 3.680061101913452,
|
|
"fcm_dpo/q_t": 0.3053869307041168,
|
|
"grad_norm": 64.25228118896484,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 0.2684813141822815,
|
|
"logits/rejected": 0.23553693294525146,
|
|
"logps/chosen": -66.39458465576172,
|
|
"logps/ref_chosen": -61.283164978027344,
|
|
"logps/ref_rejected": -72.38892364501953,
|
|
"logps/rejected": -81.18040466308594,
|
|
"loss": 0.9043,
|
|
"margin_dpo/margin_mean": 3.6800613403320312,
|
|
"margin_dpo/margin_std": 4.509557723999023,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5759637188208617,
|
|
"fcm_dpo/beta": 0.30122846364974976,
|
|
"fcm_dpo/delta": 0.3828117847442627,
|
|
"fcm_dpo/margin": 1.577541470527649,
|
|
"fcm_dpo/q_t": 0.4115419387817383,
|
|
"grad_norm": 81.65371704101562,
|
|
"learning_rate": 2.288697335747027e-07,
|
|
"logits/chosen": 0.20629438757896423,
|
|
"logits/rejected": 0.18379296362400055,
|
|
"logps/chosen": -64.43083190917969,
|
|
"logps/ref_chosen": -58.2139892578125,
|
|
"logps/ref_rejected": -60.78669357299805,
|
|
"logps/rejected": -68.5810775756836,
|
|
"loss": 1.2574,
|
|
"margin_dpo/margin_mean": 1.5775421857833862,
|
|
"margin_dpo/margin_std": 3.8531484603881836,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5774754346182918,
|
|
"fcm_dpo/beta": 0.3108755052089691,
|
|
"fcm_dpo/delta": 0.0599745512008667,
|
|
"fcm_dpo/margin": 2.53261137008667,
|
|
"fcm_dpo/q_t": 0.3535318076610565,
|
|
"grad_norm": 71.12667083740234,
|
|
"learning_rate": 2.2755254742257706e-07,
|
|
"logits/chosen": 0.2725491523742676,
|
|
"logits/rejected": 0.23602010309696198,
|
|
"logps/chosen": -68.12150573730469,
|
|
"logps/ref_chosen": -61.82532501220703,
|
|
"logps/ref_rejected": -83.0452880859375,
|
|
"logps/rejected": -91.87407684326172,
|
|
"loss": 1.0033,
|
|
"margin_dpo/margin_mean": 2.53261137008667,
|
|
"margin_dpo/margin_std": 3.539917230606079,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5789871504157218,
|
|
"fcm_dpo/beta": 0.29766133427619934,
|
|
"fcm_dpo/delta": -0.1338372379541397,
|
|
"fcm_dpo/margin": 3.2536959648132324,
|
|
"fcm_dpo/q_t": 0.33455702662467957,
|
|
"grad_norm": 88.27291107177734,
|
|
"learning_rate": 2.2623598917395436e-07,
|
|
"logits/chosen": 0.17788708209991455,
|
|
"logits/rejected": 0.1857486367225647,
|
|
"logps/chosen": -86.0854721069336,
|
|
"logps/ref_chosen": -80.56326293945312,
|
|
"logps/ref_rejected": -74.62922668457031,
|
|
"logps/rejected": -83.4051284790039,
|
|
"loss": 0.9802,
|
|
"margin_dpo/margin_mean": 3.2536959648132324,
|
|
"margin_dpo/margin_std": 4.626866340637207,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5804988662131519,
|
|
"fcm_dpo/beta": 0.2986605763435364,
|
|
"fcm_dpo/delta": -0.017689041793346405,
|
|
"fcm_dpo/margin": 2.899064540863037,
|
|
"fcm_dpo/q_t": 0.3348064124584198,
|
|
"grad_norm": 76.15990447998047,
|
|
"learning_rate": 2.2492009565579875e-07,
|
|
"logits/chosen": 0.24556401371955872,
|
|
"logits/rejected": 0.20763534307479858,
|
|
"logps/chosen": -71.83543395996094,
|
|
"logps/ref_chosen": -65.47514343261719,
|
|
"logps/ref_rejected": -79.67378234863281,
|
|
"logps/rejected": -88.93313598632812,
|
|
"loss": 0.9493,
|
|
"margin_dpo/margin_mean": 2.899064064025879,
|
|
"margin_dpo/margin_std": 3.847461462020874,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.29407477378845215,
|
|
"fcm_dpo/delta": -0.0921003669500351,
|
|
"fcm_dpo/margin": 3.174560070037842,
|
|
"fcm_dpo/q_t": 0.3196459710597992,
|
|
"grad_norm": 72.75206756591797,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 0.18978914618492126,
|
|
"logits/rejected": 0.15941289067268372,
|
|
"logps/chosen": -71.92022705078125,
|
|
"logps/ref_chosen": -66.0565185546875,
|
|
"logps/ref_rejected": -86.68023681640625,
|
|
"logps/rejected": -95.718505859375,
|
|
"loss": 0.8738,
|
|
"margin_dpo/margin_mean": 3.1745595932006836,
|
|
"margin_dpo/margin_std": 3.579244375228882,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5835222978080121,
|
|
"fcm_dpo/beta": 0.2947796583175659,
|
|
"fcm_dpo/delta": 0.09643702208995819,
|
|
"fcm_dpo/margin": 2.581514835357666,
|
|
"fcm_dpo/q_t": 0.35556066036224365,
|
|
"grad_norm": 74.4271011352539,
|
|
"learning_rate": 2.2229045002474724e-07,
|
|
"logits/chosen": 0.18471872806549072,
|
|
"logits/rejected": 0.13904157280921936,
|
|
"logps/chosen": -81.9725341796875,
|
|
"logps/ref_chosen": -75.6236572265625,
|
|
"logps/ref_rejected": -92.62330627441406,
|
|
"logps/rejected": -101.55369567871094,
|
|
"loss": 1.0201,
|
|
"margin_dpo/margin_mean": 2.5815157890319824,
|
|
"margin_dpo/margin_std": 3.8327016830444336,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5850340136054422,
|
|
"fcm_dpo/beta": 0.28877580165863037,
|
|
"fcm_dpo/delta": -0.19857263565063477,
|
|
"fcm_dpo/margin": 3.560486316680908,
|
|
"fcm_dpo/q_t": 0.29805874824523926,
|
|
"grad_norm": 58.27238082885742,
|
|
"learning_rate": 2.209767714686924e-07,
|
|
"logits/chosen": 0.25704652070999146,
|
|
"logits/rejected": 0.18072374165058136,
|
|
"logps/chosen": -52.98728942871094,
|
|
"logps/ref_chosen": -47.22170639038086,
|
|
"logps/ref_rejected": -87.338134765625,
|
|
"logps/rejected": -96.6642074584961,
|
|
"loss": 0.7939,
|
|
"margin_dpo/margin_mean": 3.56048583984375,
|
|
"margin_dpo/margin_std": 3.5000014305114746,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5865457294028723,
|
|
"fcm_dpo/beta": 0.2872125208377838,
|
|
"fcm_dpo/delta": 0.10189881175756454,
|
|
"fcm_dpo/margin": 2.6280932426452637,
|
|
"fcm_dpo/q_t": 0.3697548806667328,
|
|
"grad_norm": 70.79524993896484,
|
|
"learning_rate": 2.1966390475472954e-07,
|
|
"logits/chosen": 0.23693621158599854,
|
|
"logits/rejected": 0.22322086989879608,
|
|
"logps/chosen": -80.55561828613281,
|
|
"logps/ref_chosen": -74.5794677734375,
|
|
"logps/ref_rejected": -79.92558288574219,
|
|
"logps/rejected": -88.52982330322266,
|
|
"loss": 1.0817,
|
|
"margin_dpo/margin_mean": 2.6280932426452637,
|
|
"margin_dpo/margin_std": 4.3980913162231445,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5880574452003023,
|
|
"fcm_dpo/beta": 0.28803473711013794,
|
|
"fcm_dpo/delta": -0.08635501563549042,
|
|
"fcm_dpo/margin": 3.223116874694824,
|
|
"fcm_dpo/q_t": 0.32394009828567505,
|
|
"grad_norm": 64.27388000488281,
|
|
"learning_rate": 2.1835188660656265e-07,
|
|
"logits/chosen": 0.25568056106567383,
|
|
"logits/rejected": 0.22184878587722778,
|
|
"logps/chosen": -67.70536041259766,
|
|
"logps/ref_chosen": -61.624366760253906,
|
|
"logps/ref_rejected": -76.50978088378906,
|
|
"logps/rejected": -85.81388854980469,
|
|
"loss": 0.9533,
|
|
"margin_dpo/margin_mean": 3.223116874694824,
|
|
"margin_dpo/margin_std": 4.228720664978027,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.2855742573738098,
|
|
"fcm_dpo/delta": -0.026870589703321457,
|
|
"fcm_dpo/margin": 3.0623464584350586,
|
|
"fcm_dpo/q_t": 0.3377484977245331,
|
|
"grad_norm": 57.22404479980469,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 0.2583312392234802,
|
|
"logits/rejected": 0.20761854946613312,
|
|
"logps/chosen": -51.456993103027344,
|
|
"logps/ref_chosen": -45.871864318847656,
|
|
"logps/ref_rejected": -61.305999755859375,
|
|
"logps/rejected": -69.95347595214844,
|
|
"loss": 0.9365,
|
|
"margin_dpo/margin_mean": 3.0623462200164795,
|
|
"margin_dpo/margin_std": 3.994518995285034,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5910808767951625,
|
|
"fcm_dpo/beta": 0.2780035734176636,
|
|
"fcm_dpo/delta": -0.16346335411071777,
|
|
"fcm_dpo/margin": 3.586273193359375,
|
|
"fcm_dpo/q_t": 0.3249879479408264,
|
|
"grad_norm": 62.95237731933594,
|
|
"learning_rate": 2.1573054278272636e-07,
|
|
"logits/chosen": 0.25140058994293213,
|
|
"logits/rejected": 0.1961970329284668,
|
|
"logps/chosen": -64.07249450683594,
|
|
"logps/ref_chosen": -58.18701171875,
|
|
"logps/ref_rejected": -83.63442993164062,
|
|
"logps/rejected": -93.10619354248047,
|
|
"loss": 1.0028,
|
|
"margin_dpo/margin_mean": 3.586272954940796,
|
|
"margin_dpo/margin_std": 5.170510768890381,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5925925925925926,
|
|
"fcm_dpo/beta": 0.26895231008529663,
|
|
"fcm_dpo/delta": -0.2476973831653595,
|
|
"fcm_dpo/margin": 3.9834837913513184,
|
|
"fcm_dpo/q_t": 0.31376171112060547,
|
|
"grad_norm": 75.80889892578125,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 0.3188512623310089,
|
|
"logits/rejected": 0.2672809958457947,
|
|
"logps/chosen": -74.83085632324219,
|
|
"logps/ref_chosen": -69.7445297241211,
|
|
"logps/ref_rejected": -94.05877685546875,
|
|
"logps/rejected": -103.12858581542969,
|
|
"loss": 0.9064,
|
|
"margin_dpo/margin_mean": 3.9834845066070557,
|
|
"margin_dpo/margin_std": 5.051880836486816,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5941043083900227,
|
|
"fcm_dpo/beta": 0.25690969824790955,
|
|
"fcm_dpo/delta": -0.06800644844770432,
|
|
"fcm_dpo/margin": 3.5361289978027344,
|
|
"fcm_dpo/q_t": 0.32431554794311523,
|
|
"grad_norm": 57.294376373291016,
|
|
"learning_rate": 2.131130332936195e-07,
|
|
"logits/chosen": 0.2550227642059326,
|
|
"logits/rejected": 0.22178462147712708,
|
|
"logps/chosen": -58.557674407958984,
|
|
"logps/ref_chosen": -52.33489990234375,
|
|
"logps/ref_rejected": -74.33809661865234,
|
|
"logps/rejected": -84.09700012207031,
|
|
"loss": 0.8692,
|
|
"margin_dpo/margin_mean": 3.5361287593841553,
|
|
"margin_dpo/margin_std": 3.8214592933654785,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5956160241874527,
|
|
"fcm_dpo/beta": 0.2616024911403656,
|
|
"fcm_dpo/delta": 0.02560308948159218,
|
|
"fcm_dpo/margin": 3.156092643737793,
|
|
"fcm_dpo/q_t": 0.33914124965667725,
|
|
"grad_norm": 60.21733856201172,
|
|
"learning_rate": 2.1180580796331323e-07,
|
|
"logits/chosen": 0.2926919460296631,
|
|
"logits/rejected": 0.2641337513923645,
|
|
"logps/chosen": -66.64264678955078,
|
|
"logps/ref_chosen": -60.6761360168457,
|
|
"logps/ref_rejected": -71.36074829101562,
|
|
"logps/rejected": -80.48336029052734,
|
|
"loss": 0.9327,
|
|
"margin_dpo/margin_mean": 3.1560916900634766,
|
|
"margin_dpo/margin_std": 3.9220170974731445,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.2669936418533325,
|
|
"fcm_dpo/delta": 0.11683942377567291,
|
|
"fcm_dpo/margin": 2.7706220149993896,
|
|
"fcm_dpo/q_t": 0.35929474234580994,
|
|
"grad_norm": 66.62834167480469,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 0.20315015316009521,
|
|
"logits/rejected": 0.13946810364723206,
|
|
"logps/chosen": -56.223976135253906,
|
|
"logps/ref_chosen": -50.60432434082031,
|
|
"logps/ref_rejected": -77.08731079101562,
|
|
"logps/rejected": -85.47758483886719,
|
|
"loss": 1.0212,
|
|
"margin_dpo/margin_mean": 2.7706220149993896,
|
|
"margin_dpo/margin_std": 4.05787992477417,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5986394557823129,
|
|
"fcm_dpo/beta": 0.2582937180995941,
|
|
"fcm_dpo/delta": -0.08063206076622009,
|
|
"fcm_dpo/margin": 3.540877103805542,
|
|
"fcm_dpo/q_t": 0.32276594638824463,
|
|
"grad_norm": 57.03807830810547,
|
|
"learning_rate": 2.0919459895968517e-07,
|
|
"logits/chosen": 0.23377332091331482,
|
|
"logits/rejected": 0.15629377961158752,
|
|
"logps/chosen": -56.608436584472656,
|
|
"logps/ref_chosen": -51.35961151123047,
|
|
"logps/ref_rejected": -79.89360046386719,
|
|
"logps/rejected": -88.68330383300781,
|
|
"loss": 0.873,
|
|
"margin_dpo/margin_mean": 3.540876865386963,
|
|
"margin_dpo/margin_std": 3.7023258209228516,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.600151171579743,
|
|
"fcm_dpo/beta": 0.27533477544784546,
|
|
"fcm_dpo/delta": 0.29201841354370117,
|
|
"fcm_dpo/margin": 2.0799665451049805,
|
|
"fcm_dpo/q_t": 0.38529521226882935,
|
|
"grad_norm": 87.17749786376953,
|
|
"learning_rate": 2.078906883274924e-07,
|
|
"logits/chosen": 0.19908317923545837,
|
|
"logits/rejected": 0.1614154875278473,
|
|
"logps/chosen": -72.62335205078125,
|
|
"logps/ref_chosen": -66.45622253417969,
|
|
"logps/ref_rejected": -85.74736785888672,
|
|
"logps/rejected": -93.99446105957031,
|
|
"loss": 1.2512,
|
|
"margin_dpo/margin_mean": 2.0799667835235596,
|
|
"margin_dpo/margin_std": 4.818110942840576,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.6016628873771731,
|
|
"fcm_dpo/beta": 0.2720872461795807,
|
|
"fcm_dpo/delta": -0.113968625664711,
|
|
"fcm_dpo/margin": 3.4996390342712402,
|
|
"fcm_dpo/q_t": 0.32238367199897766,
|
|
"grad_norm": 56.072662353515625,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": 0.22736221551895142,
|
|
"logits/rejected": 0.1738460659980774,
|
|
"logps/chosen": -54.861907958984375,
|
|
"logps/ref_chosen": -49.244239807128906,
|
|
"logps/ref_rejected": -75.18949127197266,
|
|
"logps/rejected": -84.30679321289062,
|
|
"loss": 0.8728,
|
|
"margin_dpo/margin_mean": 3.4996397495269775,
|
|
"margin_dpo/margin_std": 4.072452068328857,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"fcm_dpo/beta": 0.26396697759628296,
|
|
"fcm_dpo/delta": -0.1899876594543457,
|
|
"fcm_dpo/margin": 3.865616798400879,
|
|
"fcm_dpo/q_t": 0.32054808735847473,
|
|
"grad_norm": 63.06287384033203,
|
|
"learning_rate": 2.052864371672457e-07,
|
|
"logits/chosen": 0.18875831365585327,
|
|
"logits/rejected": 0.08430158346891403,
|
|
"logps/chosen": -74.19822692871094,
|
|
"logps/ref_chosen": -68.30679321289062,
|
|
"logps/ref_rejected": -113.2708511352539,
|
|
"logps/rejected": -123.02790069580078,
|
|
"loss": 0.896,
|
|
"margin_dpo/margin_mean": 3.8656165599823,
|
|
"margin_dpo/margin_std": 4.880072116851807,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.263761043548584,
|
|
"fcm_dpo/delta": 0.13915222883224487,
|
|
"fcm_dpo/margin": 2.7275257110595703,
|
|
"fcm_dpo/q_t": 0.368988037109375,
|
|
"grad_norm": 70.60572052001953,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 0.2591952085494995,
|
|
"logits/rejected": 0.2135709524154663,
|
|
"logps/chosen": -78.38478088378906,
|
|
"logps/ref_chosen": -71.62649536132812,
|
|
"logps/ref_rejected": -90.98765563964844,
|
|
"logps/rejected": -100.47346496582031,
|
|
"loss": 1.0301,
|
|
"margin_dpo/margin_mean": 2.727525472640991,
|
|
"margin_dpo/margin_std": 4.12076997756958,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.2689932584762573,
|
|
"eval_logits/chosen": 0.2485727071762085,
|
|
"eval_logits/rejected": 0.20795086026191711,
|
|
"eval_logps/chosen": -80.81339263916016,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -88.20954895019531,
|
|
"eval_loss": 0.5461485385894775,
|
|
"eval_margin_dpo/margin_mean": 2.706634283065796,
|
|
"eval_margin_dpo/margin_std": 4.596866130828857,
|
|
"eval_runtime": 37.9957,
|
|
"eval_samples_per_second": 60.612,
|
|
"eval_steps_per_second": 1.895,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6061980347694633,
|
|
"fcm_dpo/beta": 0.2562599778175354,
|
|
"fcm_dpo/delta": -0.3326801657676697,
|
|
"fcm_dpo/margin": 4.466838836669922,
|
|
"fcm_dpo/q_t": 0.29139143228530884,
|
|
"grad_norm": 48.446571350097656,
|
|
"learning_rate": 2.0268718890989752e-07,
|
|
"logits/chosen": 0.2457781285047531,
|
|
"logits/rejected": 0.1672484576702118,
|
|
"logps/chosen": -59.1573486328125,
|
|
"logps/ref_chosen": -53.72495651245117,
|
|
"logps/ref_rejected": -75.06304931640625,
|
|
"logps/rejected": -84.9622802734375,
|
|
"loss": 0.7902,
|
|
"margin_dpo/margin_mean": 4.466838836669922,
|
|
"margin_dpo/margin_std": 4.662143707275391,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.6077097505668935,
|
|
"fcm_dpo/beta": 0.2497277706861496,
|
|
"fcm_dpo/delta": -0.013817459344863892,
|
|
"fcm_dpo/margin": 3.4514431953430176,
|
|
"fcm_dpo/q_t": 0.33873581886291504,
|
|
"grad_norm": 60.394657135009766,
|
|
"learning_rate": 2.013895317751323e-07,
|
|
"logits/chosen": 0.2631894648075104,
|
|
"logits/rejected": 0.22899389266967773,
|
|
"logps/chosen": -67.79601287841797,
|
|
"logps/ref_chosen": -61.873931884765625,
|
|
"logps/ref_rejected": -66.15198516845703,
|
|
"logps/rejected": -75.5255126953125,
|
|
"loss": 0.9592,
|
|
"margin_dpo/margin_mean": 3.4514427185058594,
|
|
"margin_dpo/margin_std": 4.586933135986328,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.6092214663643235,
|
|
"fcm_dpo/beta": 0.24135196208953857,
|
|
"fcm_dpo/delta": -0.28697896003723145,
|
|
"fcm_dpo/margin": 4.581813812255859,
|
|
"fcm_dpo/q_t": 0.2987240254878998,
|
|
"grad_norm": 49.79636764526367,
|
|
"learning_rate": 2.0009323437965898e-07,
|
|
"logits/chosen": 0.3315698504447937,
|
|
"logits/rejected": 0.2662360370159149,
|
|
"logps/chosen": -57.209228515625,
|
|
"logps/ref_chosen": -51.321502685546875,
|
|
"logps/ref_rejected": -86.54010772705078,
|
|
"logps/rejected": -97.0096435546875,
|
|
"loss": 0.8399,
|
|
"margin_dpo/margin_mean": 4.581814289093018,
|
|
"margin_dpo/margin_std": 5.112116813659668,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.6107331821617535,
|
|
"fcm_dpo/beta": 0.2342020571231842,
|
|
"fcm_dpo/delta": -0.006323143839836121,
|
|
"fcm_dpo/margin": 3.6440062522888184,
|
|
"fcm_dpo/q_t": 0.33462953567504883,
|
|
"grad_norm": 57.89472198486328,
|
|
"learning_rate": 1.9879833298370237e-07,
|
|
"logits/chosen": 0.20489439368247986,
|
|
"logits/rejected": 0.1360418051481247,
|
|
"logps/chosen": -67.75517272949219,
|
|
"logps/ref_chosen": -62.26288604736328,
|
|
"logps/ref_rejected": -95.19029998779297,
|
|
"logps/rejected": -104.32658386230469,
|
|
"loss": 0.9366,
|
|
"margin_dpo/margin_mean": 3.6440062522888184,
|
|
"margin_dpo/margin_std": 4.503121852874756,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.23300763964653015,
|
|
"fcm_dpo/delta": 0.038883745670318604,
|
|
"fcm_dpo/margin": 3.459103584289551,
|
|
"fcm_dpo/q_t": 0.3483220934867859,
|
|
"grad_norm": 47.8633918762207,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 0.29452911019325256,
|
|
"logits/rejected": 0.2554025650024414,
|
|
"logps/chosen": -56.493064880371094,
|
|
"logps/ref_chosen": -50.5843391418457,
|
|
"logps/ref_rejected": -65.43156433105469,
|
|
"logps/rejected": -74.79939270019531,
|
|
"loss": 0.9609,
|
|
"margin_dpo/margin_mean": 3.459103584289551,
|
|
"margin_dpo/margin_std": 4.2689619064331055,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6137566137566137,
|
|
"fcm_dpo/beta": 0.23268848657608032,
|
|
"fcm_dpo/delta": -0.11276289820671082,
|
|
"fcm_dpo/margin": 4.082479000091553,
|
|
"fcm_dpo/q_t": 0.31986764073371887,
|
|
"grad_norm": 47.064353942871094,
|
|
"learning_rate": 1.9621286303497914e-07,
|
|
"logits/chosen": 0.2542474865913391,
|
|
"logits/rejected": 0.1390293538570404,
|
|
"logps/chosen": -54.7130126953125,
|
|
"logps/ref_chosen": -48.99560546875,
|
|
"logps/ref_rejected": -92.47774505615234,
|
|
"logps/rejected": -102.27763366699219,
|
|
"loss": 0.8894,
|
|
"margin_dpo/margin_mean": 4.082479476928711,
|
|
"margin_dpo/margin_std": 4.736398220062256,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.6152683295540439,
|
|
"fcm_dpo/beta": 0.2396630048751831,
|
|
"fcm_dpo/delta": 0.17042268812656403,
|
|
"fcm_dpo/margin": 2.883038282394409,
|
|
"fcm_dpo/q_t": 0.36978358030319214,
|
|
"grad_norm": 79.12983703613281,
|
|
"learning_rate": 1.9492236680336483e-07,
|
|
"logits/chosen": 0.18423596024513245,
|
|
"logits/rejected": 0.12922564148902893,
|
|
"logps/chosen": -96.18956756591797,
|
|
"logps/ref_chosen": -89.40056610107422,
|
|
"logps/ref_rejected": -99.28775024414062,
|
|
"logps/rejected": -108.95979309082031,
|
|
"loss": 1.0471,
|
|
"margin_dpo/margin_mean": 2.883037567138672,
|
|
"margin_dpo/margin_std": 4.590210914611816,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.6167800453514739,
|
|
"fcm_dpo/beta": 0.23743906617164612,
|
|
"fcm_dpo/delta": -0.19000959396362305,
|
|
"fcm_dpo/margin": 4.296039581298828,
|
|
"fcm_dpo/q_t": 0.3071790337562561,
|
|
"grad_norm": 45.11009216308594,
|
|
"learning_rate": 1.9363341121154895e-07,
|
|
"logits/chosen": 0.24663150310516357,
|
|
"logits/rejected": 0.18223854899406433,
|
|
"logps/chosen": -60.074920654296875,
|
|
"logps/ref_chosen": -54.70391845703125,
|
|
"logps/ref_rejected": -73.98648834228516,
|
|
"logps/rejected": -83.65353393554688,
|
|
"loss": 0.8398,
|
|
"margin_dpo/margin_mean": 4.296039581298828,
|
|
"margin_dpo/margin_std": 4.550353527069092,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.618291761148904,
|
|
"fcm_dpo/beta": 0.24273526668548584,
|
|
"fcm_dpo/delta": 0.29451048374176025,
|
|
"fcm_dpo/margin": 2.3577988147735596,
|
|
"fcm_dpo/q_t": 0.3989053964614868,
|
|
"grad_norm": 62.7466926574707,
|
|
"learning_rate": 1.9234603231438994e-07,
|
|
"logits/chosen": 0.24126318097114563,
|
|
"logits/rejected": 0.24055859446525574,
|
|
"logps/chosen": -68.67960357666016,
|
|
"logps/ref_chosen": -62.11822509765625,
|
|
"logps/ref_rejected": -61.933509826660156,
|
|
"logps/rejected": -70.85269165039062,
|
|
"loss": 1.1796,
|
|
"margin_dpo/margin_mean": 2.3577983379364014,
|
|
"margin_dpo/margin_std": 4.950777053833008,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.24500462412834167,
|
|
"fcm_dpo/delta": 0.04858040064573288,
|
|
"fcm_dpo/margin": 3.2761478424072266,
|
|
"fcm_dpo/q_t": 0.3398808240890503,
|
|
"grad_norm": 59.190223693847656,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 0.2868541479110718,
|
|
"logits/rejected": 0.2624509632587433,
|
|
"logps/chosen": -67.55817413330078,
|
|
"logps/ref_chosen": -61.80266189575195,
|
|
"logps/ref_rejected": -76.60002136230469,
|
|
"logps/rejected": -85.63168334960938,
|
|
"loss": 0.9129,
|
|
"margin_dpo/margin_mean": 3.2761478424072266,
|
|
"margin_dpo/margin_std": 3.6564488410949707,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6213151927437641,
|
|
"fcm_dpo/beta": 0.2463398426771164,
|
|
"fcm_dpo/delta": -0.03521712124347687,
|
|
"fcm_dpo/margin": 3.5785136222839355,
|
|
"fcm_dpo/q_t": 0.34663787484169006,
|
|
"grad_norm": 66.22770690917969,
|
|
"learning_rate": 1.8977614860195296e-07,
|
|
"logits/chosen": 0.2386634349822998,
|
|
"logits/rejected": 0.18876875936985016,
|
|
"logps/chosen": -61.16634750366211,
|
|
"logps/ref_chosen": -54.44539260864258,
|
|
"logps/ref_rejected": -74.5650863647461,
|
|
"logps/rejected": -84.86454772949219,
|
|
"loss": 1.0363,
|
|
"margin_dpo/margin_mean": 3.5785140991210938,
|
|
"margin_dpo/margin_std": 5.528660774230957,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.6228269085411943,
|
|
"fcm_dpo/beta": 0.2440623939037323,
|
|
"fcm_dpo/delta": -0.02338419109582901,
|
|
"fcm_dpo/margin": 3.56341814994812,
|
|
"fcm_dpo/q_t": 0.3446499705314636,
|
|
"grad_norm": 57.37812042236328,
|
|
"learning_rate": 1.8849371567184662e-07,
|
|
"logits/chosen": 0.2372305691242218,
|
|
"logits/rejected": 0.1809903085231781,
|
|
"logps/chosen": -62.51192092895508,
|
|
"logps/ref_chosen": -55.248085021972656,
|
|
"logps/ref_rejected": -68.96623229980469,
|
|
"logps/rejected": -79.7934799194336,
|
|
"loss": 0.9592,
|
|
"margin_dpo/margin_mean": 3.56341814994812,
|
|
"margin_dpo/margin_std": 4.865736961364746,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6243386243386243,
|
|
"fcm_dpo/beta": 0.24663466215133667,
|
|
"fcm_dpo/delta": 0.031184524297714233,
|
|
"fcm_dpo/margin": 3.33031964302063,
|
|
"fcm_dpo/q_t": 0.3551603853702545,
|
|
"grad_norm": 66.90776062011719,
|
|
"learning_rate": 1.872130032047302e-07,
|
|
"logits/chosen": 0.14157438278198242,
|
|
"logits/rejected": 0.11581124365329742,
|
|
"logps/chosen": -75.62931060791016,
|
|
"logps/ref_chosen": -68.72074890136719,
|
|
"logps/ref_rejected": -78.76539611816406,
|
|
"logps/rejected": -89.00428771972656,
|
|
"loss": 1.0651,
|
|
"margin_dpo/margin_mean": 3.330319404602051,
|
|
"margin_dpo/margin_std": 5.480165958404541,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6258503401360545,
|
|
"fcm_dpo/beta": 0.2433382272720337,
|
|
"fcm_dpo/delta": -0.0841752216219902,
|
|
"fcm_dpo/margin": 3.804185152053833,
|
|
"fcm_dpo/q_t": 0.33140766620635986,
|
|
"grad_norm": 54.20486068725586,
|
|
"learning_rate": 1.8593404702488436e-07,
|
|
"logits/chosen": 0.24406284093856812,
|
|
"logits/rejected": 0.1906662881374359,
|
|
"logps/chosen": -60.66529083251953,
|
|
"logps/ref_chosen": -54.138214111328125,
|
|
"logps/ref_rejected": -74.65741729736328,
|
|
"logps/rejected": -84.98867797851562,
|
|
"loss": 0.9494,
|
|
"margin_dpo/margin_mean": 3.8041844367980957,
|
|
"margin_dpo/margin_std": 5.061384201049805,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.24203211069107056,
|
|
"fcm_dpo/delta": -0.014722846448421478,
|
|
"fcm_dpo/margin": 3.5665831565856934,
|
|
"fcm_dpo/q_t": 0.3441469073295593,
|
|
"grad_norm": 58.89690017700195,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 0.22098597884178162,
|
|
"logits/rejected": 0.20324364304542542,
|
|
"logps/chosen": -62.7205810546875,
|
|
"logps/ref_chosen": -55.91856002807617,
|
|
"logps/ref_rejected": -61.747703552246094,
|
|
"logps/rejected": -72.1163101196289,
|
|
"loss": 1.026,
|
|
"margin_dpo/margin_mean": 3.5665831565856934,
|
|
"margin_dpo/margin_std": 5.469634056091309,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6288737717309146,
|
|
"fcm_dpo/beta": 0.25225508213043213,
|
|
"fcm_dpo/delta": 0.11526554822921753,
|
|
"fcm_dpo/margin": 2.9003496170043945,
|
|
"fcm_dpo/q_t": 0.3673805892467499,
|
|
"grad_norm": 69.7919921875,
|
|
"learning_rate": 1.8338154657749128e-07,
|
|
"logits/chosen": 0.22921934723854065,
|
|
"logits/rejected": 0.18885570764541626,
|
|
"logps/chosen": -61.164649963378906,
|
|
"logps/ref_chosen": -54.72308349609375,
|
|
"logps/ref_rejected": -69.17388916015625,
|
|
"logps/rejected": -78.51580810546875,
|
|
"loss": 1.102,
|
|
"margin_dpo/margin_mean": 2.9003496170043945,
|
|
"margin_dpo/margin_std": 4.846092224121094,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6303854875283447,
|
|
"fcm_dpo/beta": 0.24670086801052094,
|
|
"fcm_dpo/delta": -0.15245218575000763,
|
|
"fcm_dpo/margin": 3.9919228553771973,
|
|
"fcm_dpo/q_t": 0.31675058603286743,
|
|
"grad_norm": 66.77470397949219,
|
|
"learning_rate": 1.8210807370886849e-07,
|
|
"logits/chosen": 0.30952125787734985,
|
|
"logits/rejected": 0.2565500736236572,
|
|
"logps/chosen": -64.06097412109375,
|
|
"logps/ref_chosen": -56.791259765625,
|
|
"logps/ref_rejected": -68.7791748046875,
|
|
"logps/rejected": -80.04081726074219,
|
|
"loss": 0.936,
|
|
"margin_dpo/margin_mean": 3.9919233322143555,
|
|
"margin_dpo/margin_std": 5.018634796142578,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6318972033257747,
|
|
"fcm_dpo/beta": 0.2470259666442871,
|
|
"fcm_dpo/delta": 0.2453382909297943,
|
|
"fcm_dpo/margin": 2.509967803955078,
|
|
"fcm_dpo/q_t": 0.39756616950035095,
|
|
"grad_norm": 81.32603454589844,
|
|
"learning_rate": 1.8083649992336825e-07,
|
|
"logits/chosen": 0.2638748586177826,
|
|
"logits/rejected": 0.2686142921447754,
|
|
"logps/chosen": -77.12152099609375,
|
|
"logps/ref_chosen": -69.10798645019531,
|
|
"logps/ref_rejected": -75.09132385253906,
|
|
"logps/rejected": -85.61483764648438,
|
|
"loss": 1.2226,
|
|
"margin_dpo/margin_mean": 2.5099682807922363,
|
|
"margin_dpo/margin_std": 5.600542068481445,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6334089191232048,
|
|
"fcm_dpo/beta": 0.24269002676010132,
|
|
"fcm_dpo/delta": -0.21217991411685944,
|
|
"fcm_dpo/margin": 4.270475387573242,
|
|
"fcm_dpo/q_t": 0.31270739436149597,
|
|
"grad_norm": 52.399044036865234,
|
|
"learning_rate": 1.7956686078964255e-07,
|
|
"logits/chosen": 0.15831753611564636,
|
|
"logits/rejected": 0.11195243149995804,
|
|
"logps/chosen": -64.01142883300781,
|
|
"logps/ref_chosen": -58.1717643737793,
|
|
"logps/ref_rejected": -71.67066955566406,
|
|
"logps/rejected": -81.78080749511719,
|
|
"loss": 0.875,
|
|
"margin_dpo/margin_mean": 4.270475387573242,
|
|
"margin_dpo/margin_std": 5.095606803894043,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.24959485232830048,
|
|
"fcm_dpo/delta": 0.24828967452049255,
|
|
"fcm_dpo/margin": 2.474740505218506,
|
|
"fcm_dpo/q_t": 0.3962467908859253,
|
|
"grad_norm": 71.6791763305664,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 0.21745803952217102,
|
|
"logits/rejected": 0.1744053065776825,
|
|
"logps/chosen": -65.115966796875,
|
|
"logps/ref_chosen": -57.05351257324219,
|
|
"logps/ref_rejected": -62.670982360839844,
|
|
"logps/rejected": -73.20817565917969,
|
|
"loss": 1.2861,
|
|
"margin_dpo/margin_mean": 2.4747402667999268,
|
|
"margin_dpo/margin_std": 5.943141937255859,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.636432350718065,
|
|
"fcm_dpo/beta": 0.2592123746871948,
|
|
"fcm_dpo/delta": 0.14652492105960846,
|
|
"fcm_dpo/margin": 2.7551355361938477,
|
|
"fcm_dpo/q_t": 0.38477665185928345,
|
|
"grad_norm": 71.33401489257812,
|
|
"learning_rate": 1.7703352848054887e-07,
|
|
"logits/chosen": 0.2027522772550583,
|
|
"logits/rejected": 0.15381047129631042,
|
|
"logps/chosen": -65.26667022705078,
|
|
"logps/ref_chosen": -57.32324981689453,
|
|
"logps/ref_rejected": -75.33782958984375,
|
|
"logps/rejected": -86.03638458251953,
|
|
"loss": 1.2791,
|
|
"margin_dpo/margin_mean": 2.7551350593566895,
|
|
"margin_dpo/margin_std": 6.1498517990112305,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6379440665154951,
|
|
"fcm_dpo/beta": 0.2613186538219452,
|
|
"fcm_dpo/delta": -0.06008271127939224,
|
|
"fcm_dpo/margin": 3.4610629081726074,
|
|
"fcm_dpo/q_t": 0.34318864345550537,
|
|
"grad_norm": 69.64408874511719,
|
|
"learning_rate": 1.7576990616793137e-07,
|
|
"logits/chosen": 0.24464266002178192,
|
|
"logits/rejected": 0.212172269821167,
|
|
"logps/chosen": -73.77204895019531,
|
|
"logps/ref_chosen": -67.05757141113281,
|
|
"logps/ref_rejected": -72.12803649902344,
|
|
"logps/rejected": -82.3035659790039,
|
|
"loss": 1.0071,
|
|
"margin_dpo/margin_mean": 3.4610633850097656,
|
|
"margin_dpo/margin_std": 5.229635715484619,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6394557823129252,
|
|
"fcm_dpo/beta": 0.250480979681015,
|
|
"fcm_dpo/delta": -0.19729526340961456,
|
|
"fcm_dpo/margin": 4.0947794914245605,
|
|
"fcm_dpo/q_t": 0.3230004906654358,
|
|
"grad_norm": 55.7142448425293,
|
|
"learning_rate": 1.745083602306071e-07,
|
|
"logits/chosen": 0.27640843391418457,
|
|
"logits/rejected": 0.2188403606414795,
|
|
"logps/chosen": -60.870277404785156,
|
|
"logps/ref_chosen": -54.06167221069336,
|
|
"logps/ref_rejected": -76.64092254638672,
|
|
"logps/rejected": -87.5443115234375,
|
|
"loss": 0.93,
|
|
"margin_dpo/margin_mean": 4.0947794914245605,
|
|
"margin_dpo/margin_std": 5.4799394607543945,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6409674981103552,
|
|
"fcm_dpo/beta": 0.2431943267583847,
|
|
"fcm_dpo/delta": -0.15173768997192383,
|
|
"fcm_dpo/margin": 4.056219100952148,
|
|
"fcm_dpo/q_t": 0.32653218507766724,
|
|
"grad_norm": 60.763397216796875,
|
|
"learning_rate": 1.7324892595672804e-07,
|
|
"logits/chosen": 0.17878620326519012,
|
|
"logits/rejected": 0.14686840772628784,
|
|
"logps/chosen": -60.15678024291992,
|
|
"logps/ref_chosen": -53.60887145996094,
|
|
"logps/ref_rejected": -79.2139892578125,
|
|
"logps/rejected": -89.81812286376953,
|
|
"loss": 0.9293,
|
|
"margin_dpo/margin_mean": 4.056219577789307,
|
|
"margin_dpo/margin_std": 5.32462739944458,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.24080964922904968,
|
|
"fcm_dpo/delta": -0.02124994806945324,
|
|
"fcm_dpo/margin": 3.610257863998413,
|
|
"fcm_dpo/q_t": 0.34036120772361755,
|
|
"grad_norm": 53.22382736206055,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 0.2400674670934677,
|
|
"logits/rejected": 0.21408450603485107,
|
|
"logps/chosen": -65.2353515625,
|
|
"logps/ref_chosen": -58.41468048095703,
|
|
"logps/ref_rejected": -66.59054565429688,
|
|
"logps/rejected": -77.02146911621094,
|
|
"loss": 0.9689,
|
|
"margin_dpo/margin_mean": 3.610257625579834,
|
|
"margin_dpo/margin_std": 4.985018730163574,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6439909297052154,
|
|
"fcm_dpo/beta": 0.2545412480831146,
|
|
"fcm_dpo/delta": 0.3679015040397644,
|
|
"fcm_dpo/margin": 1.9646282196044922,
|
|
"fcm_dpo/q_t": 0.40837377309799194,
|
|
"grad_norm": 78.42141723632812,
|
|
"learning_rate": 1.7073653325558828e-07,
|
|
"logits/chosen": 0.1992775797843933,
|
|
"logits/rejected": 0.20033419132232666,
|
|
"logps/chosen": -79.82450866699219,
|
|
"logps/ref_chosen": -71.70822143554688,
|
|
"logps/ref_rejected": -73.57725524902344,
|
|
"logps/rejected": -83.6581802368164,
|
|
"loss": 1.2953,
|
|
"margin_dpo/margin_mean": 1.964627981185913,
|
|
"margin_dpo/margin_std": 5.136331558227539,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6455026455026455,
|
|
"fcm_dpo/beta": 0.2586546838283539,
|
|
"fcm_dpo/delta": -0.023448972031474113,
|
|
"fcm_dpo/margin": 3.368882656097412,
|
|
"fcm_dpo/q_t": 0.3508281707763672,
|
|
"grad_norm": 60.90048599243164,
|
|
"learning_rate": 1.6948364510535218e-07,
|
|
"logits/chosen": 0.29382023215293884,
|
|
"logits/rejected": 0.24823549389839172,
|
|
"logps/chosen": -66.44595336914062,
|
|
"logps/ref_chosen": -58.64276885986328,
|
|
"logps/ref_rejected": -86.25437927246094,
|
|
"logps/rejected": -97.42644500732422,
|
|
"loss": 1.0582,
|
|
"margin_dpo/margin_mean": 3.368882894515991,
|
|
"margin_dpo/margin_std": 5.518677234649658,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6470143613000756,
|
|
"fcm_dpo/beta": 0.25078386068344116,
|
|
"fcm_dpo/delta": -0.22688086330890656,
|
|
"fcm_dpo/margin": 4.200922966003418,
|
|
"fcm_dpo/q_t": 0.3197481632232666,
|
|
"grad_norm": 63.953575134277344,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 0.22337879240512848,
|
|
"logits/rejected": 0.17726576328277588,
|
|
"logps/chosen": -73.97862243652344,
|
|
"logps/ref_chosen": -66.5960464477539,
|
|
"logps/ref_rejected": -82.3941650390625,
|
|
"logps/rejected": -93.9776611328125,
|
|
"loss": 0.9094,
|
|
"margin_dpo/margin_mean": 4.200922966003418,
|
|
"margin_dpo/margin_std": 5.368341445922852,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6485260770975056,
|
|
"fcm_dpo/beta": 0.2456437349319458,
|
|
"fcm_dpo/delta": -0.059685856103897095,
|
|
"fcm_dpo/margin": 3.6800642013549805,
|
|
"fcm_dpo/q_t": 0.33456557989120483,
|
|
"grad_norm": 61.284217834472656,
|
|
"learning_rate": 1.669846604344412e-07,
|
|
"logits/chosen": 0.1801915019750595,
|
|
"logits/rejected": 0.1813092678785324,
|
|
"logps/chosen": -64.23383331298828,
|
|
"logps/ref_chosen": -57.00970458984375,
|
|
"logps/ref_rejected": -59.86549377441406,
|
|
"logps/rejected": -70.76969909667969,
|
|
"loss": 0.9927,
|
|
"margin_dpo/margin_mean": 3.6800642013549805,
|
|
"margin_dpo/margin_std": 5.228022575378418,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.23953106999397278,
|
|
"fcm_dpo/delta": -0.12144466489553452,
|
|
"fcm_dpo/margin": 4.007453918457031,
|
|
"fcm_dpo/q_t": 0.3295876979827881,
|
|
"grad_norm": 57.089744567871094,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 0.15598759055137634,
|
|
"logits/rejected": 0.14352598786354065,
|
|
"logps/chosen": -66.40084838867188,
|
|
"logps/ref_chosen": -59.563194274902344,
|
|
"logps/ref_rejected": -70.52289581298828,
|
|
"logps/rejected": -81.36799621582031,
|
|
"loss": 0.9546,
|
|
"margin_dpo/margin_mean": 4.007453918457031,
|
|
"margin_dpo/margin_std": 5.4395599365234375,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6515495086923658,
|
|
"fcm_dpo/beta": 0.23662912845611572,
|
|
"fcm_dpo/delta": 0.004378672689199448,
|
|
"fcm_dpo/margin": 3.5732975006103516,
|
|
"fcm_dpo/q_t": 0.35064366459846497,
|
|
"grad_norm": 56.98310089111328,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": 0.22091971337795258,
|
|
"logits/rejected": 0.18462924659252167,
|
|
"logps/chosen": -56.664363861083984,
|
|
"logps/ref_chosen": -50.20032501220703,
|
|
"logps/ref_rejected": -77.81680297851562,
|
|
"logps/rejected": -87.85414123535156,
|
|
"loss": 0.9811,
|
|
"margin_dpo/margin_mean": 3.5732975006103516,
|
|
"margin_dpo/margin_std": 5.193387031555176,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6530612244897959,
|
|
"fcm_dpo/beta": 0.23365934193134308,
|
|
"fcm_dpo/delta": -0.056146346032619476,
|
|
"fcm_dpo/margin": 3.847005844116211,
|
|
"fcm_dpo/q_t": 0.3386353850364685,
|
|
"grad_norm": 58.32561111450195,
|
|
"learning_rate": 1.632536862810844e-07,
|
|
"logits/chosen": 0.24721886217594147,
|
|
"logits/rejected": 0.2073393613100052,
|
|
"logps/chosen": -68.7791976928711,
|
|
"logps/ref_chosen": -61.662757873535156,
|
|
"logps/ref_rejected": -83.94496154785156,
|
|
"logps/rejected": -94.90840911865234,
|
|
"loss": 0.9881,
|
|
"margin_dpo/margin_mean": 3.8470053672790527,
|
|
"margin_dpo/margin_std": 5.408696174621582,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.654572940287226,
|
|
"fcm_dpo/beta": 0.22784093022346497,
|
|
"fcm_dpo/delta": -0.2313033938407898,
|
|
"fcm_dpo/margin": 4.640135288238525,
|
|
"fcm_dpo/q_t": 0.3186371326446533,
|
|
"grad_norm": 50.4363899230957,
|
|
"learning_rate": 1.6201483487445515e-07,
|
|
"logits/chosen": 0.3478536605834961,
|
|
"logits/rejected": 0.33294785022735596,
|
|
"logps/chosen": -71.23434448242188,
|
|
"logps/ref_chosen": -63.72917938232422,
|
|
"logps/ref_rejected": -65.8391342163086,
|
|
"logps/rejected": -77.98443603515625,
|
|
"loss": 0.9176,
|
|
"margin_dpo/margin_mean": 4.640135288238525,
|
|
"margin_dpo/margin_std": 5.971785545349121,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.656084656084656,
|
|
"fcm_dpo/beta": 0.21730631589889526,
|
|
"fcm_dpo/delta": -0.13528761267662048,
|
|
"fcm_dpo/margin": 4.452031135559082,
|
|
"fcm_dpo/q_t": 0.3225432336330414,
|
|
"grad_norm": 46.248233795166016,
|
|
"learning_rate": 1.6077844460203204e-07,
|
|
"logits/chosen": 0.30574116110801697,
|
|
"logits/rejected": 0.2512272894382477,
|
|
"logps/chosen": -54.50779724121094,
|
|
"logps/ref_chosen": -47.97331619262695,
|
|
"logps/ref_rejected": -72.51132202148438,
|
|
"logps/rejected": -83.49783325195312,
|
|
"loss": 0.9787,
|
|
"margin_dpo/margin_mean": 4.452031135559082,
|
|
"margin_dpo/margin_std": 6.084223747253418,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.22146287560462952,
|
|
"fcm_dpo/delta": 0.0298960879445076,
|
|
"fcm_dpo/margin": 3.7065372467041016,
|
|
"fcm_dpo/q_t": 0.3446485996246338,
|
|
"grad_norm": 52.24251937866211,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 0.3137568235397339,
|
|
"logits/rejected": 0.2790324091911316,
|
|
"logps/chosen": -64.93702697753906,
|
|
"logps/ref_chosen": -57.06024932861328,
|
|
"logps/ref_rejected": -71.69146728515625,
|
|
"logps/rejected": -83.2747802734375,
|
|
"loss": 1.0002,
|
|
"margin_dpo/margin_mean": 3.7065377235412598,
|
|
"margin_dpo/margin_std": 5.282249927520752,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6591080876795162,
|
|
"fcm_dpo/beta": 0.2214878499507904,
|
|
"fcm_dpo/delta": 0.02604525163769722,
|
|
"fcm_dpo/margin": 3.728400707244873,
|
|
"fcm_dpo/q_t": 0.34906071424484253,
|
|
"grad_norm": 52.046791076660156,
|
|
"learning_rate": 1.5831318572796847e-07,
|
|
"logits/chosen": 0.2411668300628662,
|
|
"logits/rejected": 0.19079329073429108,
|
|
"logps/chosen": -63.56175231933594,
|
|
"logps/ref_chosen": -56.158050537109375,
|
|
"logps/ref_rejected": -67.63787841796875,
|
|
"logps/rejected": -78.76997375488281,
|
|
"loss": 1.0039,
|
|
"margin_dpo/margin_mean": 3.728400945663452,
|
|
"margin_dpo/margin_std": 5.423241138458252,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6606198034769464,
|
|
"fcm_dpo/beta": 0.2188284695148468,
|
|
"fcm_dpo/delta": 0.0661238431930542,
|
|
"fcm_dpo/margin": 3.576071262359619,
|
|
"fcm_dpo/q_t": 0.3653622269630432,
|
|
"grad_norm": 58.89640426635742,
|
|
"learning_rate": 1.5708438608491815e-07,
|
|
"logits/chosen": 0.23456737399101257,
|
|
"logits/rejected": 0.12770959734916687,
|
|
"logps/chosen": -65.12676239013672,
|
|
"logps/ref_chosen": -56.98578643798828,
|
|
"logps/ref_rejected": -85.61524963378906,
|
|
"logps/rejected": -97.33230590820312,
|
|
"loss": 1.1308,
|
|
"margin_dpo/margin_mean": 3.5760717391967773,
|
|
"margin_dpo/margin_std": 6.206454277038574,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6621315192743764,
|
|
"fcm_dpo/beta": 0.22138898074626923,
|
|
"fcm_dpo/delta": -0.13801656663417816,
|
|
"fcm_dpo/margin": 4.400524139404297,
|
|
"fcm_dpo/q_t": 0.32329899072647095,
|
|
"grad_norm": 40.44050979614258,
|
|
"learning_rate": 1.558581854913253e-07,
|
|
"logits/chosen": 0.2807733416557312,
|
|
"logits/rejected": 0.2254466414451599,
|
|
"logps/chosen": -48.16633987426758,
|
|
"logps/ref_chosen": -41.27777862548828,
|
|
"logps/ref_rejected": -65.33840942382812,
|
|
"logps/rejected": -76.62749481201172,
|
|
"loss": 0.923,
|
|
"margin_dpo/margin_mean": 4.400524616241455,
|
|
"margin_dpo/margin_std": 5.516866683959961,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.6636432350718064,
|
|
"fcm_dpo/beta": 0.2201937735080719,
|
|
"fcm_dpo/delta": -0.016198471188545227,
|
|
"fcm_dpo/margin": 3.9138498306274414,
|
|
"fcm_dpo/q_t": 0.335410475730896,
|
|
"grad_norm": 62.81761169433594,
|
|
"learning_rate": 1.5463461824665658e-07,
|
|
"logits/chosen": 0.20101311802864075,
|
|
"logits/rejected": 0.1731702983379364,
|
|
"logps/chosen": -88.3592529296875,
|
|
"logps/ref_chosen": -81.41764831542969,
|
|
"logps/ref_rejected": -94.72309875488281,
|
|
"logps/rejected": -105.57853698730469,
|
|
"loss": 0.9611,
|
|
"margin_dpo/margin_mean": 3.913849353790283,
|
|
"margin_dpo/margin_std": 5.098721504211426,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.21266132593154907,
|
|
"fcm_dpo/delta": -0.17807799577713013,
|
|
"fcm_dpo/margin": 4.751244068145752,
|
|
"fcm_dpo/q_t": 0.3173283338546753,
|
|
"grad_norm": 46.422149658203125,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 0.19558237493038177,
|
|
"logits/rejected": 0.11220754683017731,
|
|
"logps/chosen": -49.08099365234375,
|
|
"logps/ref_chosen": -42.538185119628906,
|
|
"logps/ref_rejected": -69.78813934326172,
|
|
"logps/rejected": -81.08218383789062,
|
|
"loss": 0.8729,
|
|
"margin_dpo/margin_mean": 4.751243591308594,
|
|
"margin_dpo/margin_std": 5.614121437072754,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.20615889132022858,
|
|
"fcm_dpo/delta": -0.07547109574079514,
|
|
"fcm_dpo/margin": 4.451215744018555,
|
|
"fcm_dpo/q_t": 0.32043367624282837,
|
|
"grad_norm": 47.443084716796875,
|
|
"learning_rate": 1.521955206326976e-07,
|
|
"logits/chosen": 0.2026526778936386,
|
|
"logits/rejected": 0.1278238445520401,
|
|
"logps/chosen": -64.02963256835938,
|
|
"logps/ref_chosen": -57.593223571777344,
|
|
"logps/ref_rejected": -84.82878875732422,
|
|
"logps/rejected": -95.71641540527344,
|
|
"loss": 0.8561,
|
|
"margin_dpo/margin_mean": 4.451216220855713,
|
|
"margin_dpo/margin_std": 4.816239833831787,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6681783824640968,
|
|
"fcm_dpo/beta": 0.20843441784381866,
|
|
"fcm_dpo/delta": 0.0724664106965065,
|
|
"fcm_dpo/margin": 3.7578186988830566,
|
|
"fcm_dpo/q_t": 0.35375648736953735,
|
|
"grad_norm": 60.085304260253906,
|
|
"learning_rate": 1.5098005849021078e-07,
|
|
"logits/chosen": 0.29126518964767456,
|
|
"logits/rejected": 0.25034332275390625,
|
|
"logps/chosen": -75.44874572753906,
|
|
"logps/ref_chosen": -67.46121978759766,
|
|
"logps/ref_rejected": -89.0693588256836,
|
|
"logps/rejected": -100.81471252441406,
|
|
"loss": 1.0015,
|
|
"margin_dpo/margin_mean": 3.7578182220458984,
|
|
"margin_dpo/margin_std": 5.468325614929199,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6696900982615268,
|
|
"fcm_dpo/beta": 0.20462161302566528,
|
|
"fcm_dpo/delta": -0.17430701851844788,
|
|
"fcm_dpo/margin": 4.92136287689209,
|
|
"fcm_dpo/q_t": 0.31620925664901733,
|
|
"grad_norm": 45.6391716003418,
|
|
"learning_rate": 1.4976736614834662e-07,
|
|
"logits/chosen": 0.2725718021392822,
|
|
"logits/rejected": 0.2120482325553894,
|
|
"logps/chosen": -61.66060256958008,
|
|
"logps/ref_chosen": -54.79610061645508,
|
|
"logps/ref_rejected": -77.80781555175781,
|
|
"logps/rejected": -89.59368896484375,
|
|
"loss": 0.9136,
|
|
"margin_dpo/margin_mean": 4.921362400054932,
|
|
"margin_dpo/margin_std": 6.193216323852539,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.671201814058957,
|
|
"fcm_dpo/beta": 0.21466189622879028,
|
|
"fcm_dpo/delta": 0.45215946435928345,
|
|
"fcm_dpo/margin": 1.9520701169967651,
|
|
"fcm_dpo/q_t": 0.42362260818481445,
|
|
"grad_norm": 64.94635009765625,
|
|
"learning_rate": 1.4855747752871654e-07,
|
|
"logits/chosen": 0.2781602740287781,
|
|
"logits/rejected": 0.2134133130311966,
|
|
"logps/chosen": -67.09490966796875,
|
|
"logps/ref_chosen": -58.749061584472656,
|
|
"logps/ref_rejected": -86.87396240234375,
|
|
"logps/rejected": -97.17189025878906,
|
|
"loss": 1.3118,
|
|
"margin_dpo/margin_mean": 1.9520692825317383,
|
|
"margin_dpo/margin_std": 5.343048572540283,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.21728411316871643,
|
|
"fcm_dpo/delta": -0.12314336001873016,
|
|
"fcm_dpo/margin": 4.423813819885254,
|
|
"fcm_dpo/q_t": 0.32274460792541504,
|
|
"grad_norm": 56.279396057128906,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 0.2420450747013092,
|
|
"logits/rejected": 0.22509875893592834,
|
|
"logps/chosen": -69.55735778808594,
|
|
"logps/ref_chosen": -60.91743850708008,
|
|
"logps/ref_rejected": -71.5637435913086,
|
|
"logps/rejected": -84.62747192382812,
|
|
"loss": 0.9196,
|
|
"margin_dpo/margin_mean": 4.423814296722412,
|
|
"margin_dpo/margin_std": 5.549365520477295,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.674225245653817,
|
|
"fcm_dpo/beta": 0.20891538262367249,
|
|
"fcm_dpo/delta": -0.36004990339279175,
|
|
"fcm_dpo/margin": 5.58787202835083,
|
|
"fcm_dpo/q_t": 0.2773993909358978,
|
|
"grad_norm": 38.038795471191406,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 0.300902783870697,
|
|
"logits/rejected": 0.23340097069740295,
|
|
"logps/chosen": -56.217681884765625,
|
|
"logps/ref_chosen": -48.79924774169922,
|
|
"logps/ref_rejected": -71.8719482421875,
|
|
"logps/rejected": -84.87826538085938,
|
|
"loss": 0.7393,
|
|
"margin_dpo/margin_mean": 5.587871551513672,
|
|
"margin_dpo/margin_std": 4.869062900543213,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6757369614512472,
|
|
"fcm_dpo/beta": 0.19416730105876923,
|
|
"fcm_dpo/delta": -0.2389349639415741,
|
|
"fcm_dpo/margin": 5.477807521820068,
|
|
"fcm_dpo/q_t": 0.29294759035110474,
|
|
"grad_norm": 41.903839111328125,
|
|
"learning_rate": 1.4494497203727843e-07,
|
|
"logits/chosen": 0.23307769000530243,
|
|
"logits/rejected": 0.14838972687721252,
|
|
"logps/chosen": -60.058258056640625,
|
|
"logps/ref_chosen": -53.682716369628906,
|
|
"logps/ref_rejected": -88.17315673828125,
|
|
"logps/rejected": -100.02650451660156,
|
|
"loss": 0.8425,
|
|
"margin_dpo/margin_mean": 5.47780704498291,
|
|
"margin_dpo/margin_std": 6.009408950805664,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.6772486772486772,
|
|
"fcm_dpo/beta": 0.19167715311050415,
|
|
"fcm_dpo/delta": 0.008233718574047089,
|
|
"fcm_dpo/margin": 4.395143508911133,
|
|
"fcm_dpo/q_t": 0.3358060121536255,
|
|
"grad_norm": 41.25887680053711,
|
|
"learning_rate": 1.4374663593999256e-07,
|
|
"logits/chosen": 0.2761896848678589,
|
|
"logits/rejected": 0.23135310411453247,
|
|
"logps/chosen": -61.26433563232422,
|
|
"logps/ref_chosen": -53.75125503540039,
|
|
"logps/ref_rejected": -77.17623901367188,
|
|
"logps/rejected": -89.08446502685547,
|
|
"loss": 0.939,
|
|
"margin_dpo/margin_mean": 4.395143508911133,
|
|
"margin_dpo/margin_std": 5.651719093322754,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6787603930461074,
|
|
"fcm_dpo/beta": 0.2022373080253601,
|
|
"fcm_dpo/delta": 0.3825373649597168,
|
|
"fcm_dpo/margin": 2.4102249145507812,
|
|
"fcm_dpo/q_t": 0.4050193727016449,
|
|
"grad_norm": 57.15595245361328,
|
|
"learning_rate": 1.4255127197770707e-07,
|
|
"logits/chosen": 0.15068916976451874,
|
|
"logits/rejected": 0.13591475784778595,
|
|
"logps/chosen": -84.59050750732422,
|
|
"logps/ref_chosen": -75.82737731933594,
|
|
"logps/ref_rejected": -82.20687866210938,
|
|
"logps/rejected": -93.3802261352539,
|
|
"loss": 1.1481,
|
|
"margin_dpo/margin_mean": 2.4102249145507812,
|
|
"margin_dpo/margin_std": 4.6423020362854,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.21165935695171356,
|
|
"fcm_dpo/delta": 0.12616947293281555,
|
|
"fcm_dpo/margin": 3.4614691734313965,
|
|
"fcm_dpo/q_t": 0.3656800091266632,
|
|
"grad_norm": 56.690269470214844,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 0.311156302690506,
|
|
"logits/rejected": 0.22413130104541779,
|
|
"logps/chosen": -54.62956237792969,
|
|
"logps/ref_chosen": -47.11572265625,
|
|
"logps/ref_rejected": -78.7546615600586,
|
|
"logps/rejected": -89.72996520996094,
|
|
"loss": 1.0545,
|
|
"margin_dpo/margin_mean": 3.461467981338501,
|
|
"margin_dpo/margin_std": 5.579974174499512,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6817838246409675,
|
|
"fcm_dpo/beta": 0.2162429690361023,
|
|
"fcm_dpo/delta": 0.1287505030632019,
|
|
"fcm_dpo/margin": 3.379551887512207,
|
|
"fcm_dpo/q_t": 0.36499056220054626,
|
|
"grad_norm": 59.05227279663086,
|
|
"learning_rate": 1.4016959412166437e-07,
|
|
"logits/chosen": 0.24645878374576569,
|
|
"logits/rejected": 0.20367026329040527,
|
|
"logps/chosen": -71.26134490966797,
|
|
"logps/ref_chosen": -63.350440979003906,
|
|
"logps/ref_rejected": -76.28530883789062,
|
|
"logps/rejected": -87.57575988769531,
|
|
"loss": 1.0718,
|
|
"margin_dpo/margin_mean": 3.3795528411865234,
|
|
"margin_dpo/margin_std": 5.619390487670898,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6832955404383976,
|
|
"fcm_dpo/beta": 0.21667127311229706,
|
|
"fcm_dpo/delta": -0.04757612198591232,
|
|
"fcm_dpo/margin": 4.123049736022949,
|
|
"fcm_dpo/q_t": 0.3350658118724823,
|
|
"grad_norm": 57.07819747924805,
|
|
"learning_rate": 1.3898334684855645e-07,
|
|
"logits/chosen": 0.21967121958732605,
|
|
"logits/rejected": 0.15877500176429749,
|
|
"logps/chosen": -63.10958480834961,
|
|
"logps/ref_chosen": -55.58583450317383,
|
|
"logps/ref_rejected": -77.68738555908203,
|
|
"logps/rejected": -89.33418273925781,
|
|
"loss": 0.9949,
|
|
"margin_dpo/margin_mean": 4.123049736022949,
|
|
"margin_dpo/margin_std": 5.820711135864258,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6848072562358276,
|
|
"fcm_dpo/beta": 0.21547909080982208,
|
|
"fcm_dpo/delta": -0.022098319604992867,
|
|
"fcm_dpo/margin": 4.038339614868164,
|
|
"fcm_dpo/q_t": 0.3498329520225525,
|
|
"grad_norm": 50.18575668334961,
|
|
"learning_rate": 1.3780020494988445e-07,
|
|
"logits/chosen": 0.22223559021949768,
|
|
"logits/rejected": 0.19204358756542206,
|
|
"logps/chosen": -68.8441162109375,
|
|
"logps/ref_chosen": -61.778202056884766,
|
|
"logps/ref_rejected": -71.51403045654297,
|
|
"logps/rejected": -82.6182861328125,
|
|
"loss": 1.028,
|
|
"margin_dpo/margin_mean": 4.0383405685424805,
|
|
"margin_dpo/margin_std": 6.134515285491943,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6863189720332578,
|
|
"fcm_dpo/beta": 0.21051616966724396,
|
|
"fcm_dpo/delta": -0.13284781575202942,
|
|
"fcm_dpo/margin": 4.6063690185546875,
|
|
"fcm_dpo/q_t": 0.32341745495796204,
|
|
"grad_norm": 43.10291290283203,
|
|
"learning_rate": 1.366202015206706e-07,
|
|
"logits/chosen": 0.28594014048576355,
|
|
"logits/rejected": 0.24734768271446228,
|
|
"logps/chosen": -58.167015075683594,
|
|
"logps/ref_chosen": -51.59515380859375,
|
|
"logps/ref_rejected": -63.96732711791992,
|
|
"logps/rejected": -75.14555358886719,
|
|
"loss": 0.9569,
|
|
"margin_dpo/margin_mean": 4.6063690185546875,
|
|
"margin_dpo/margin_std": 6.16324520111084,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.20485688745975494,
|
|
"fcm_dpo/delta": -0.10616355389356613,
|
|
"fcm_dpo/margin": 4.611947059631348,
|
|
"fcm_dpo/q_t": 0.3254557251930237,
|
|
"grad_norm": 50.521175384521484,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 0.13476872444152832,
|
|
"logits/rejected": 0.09828665852546692,
|
|
"logps/chosen": -78.38386535644531,
|
|
"logps/ref_chosen": -70.65170288085938,
|
|
"logps/ref_rejected": -77.44276428222656,
|
|
"logps/rejected": -89.786865234375,
|
|
"loss": 0.9001,
|
|
"margin_dpo/margin_mean": 4.611947059631348,
|
|
"margin_dpo/margin_std": 5.726899147033691,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6893424036281179,
|
|
"fcm_dpo/beta": 0.20512458682060242,
|
|
"fcm_dpo/delta": 0.0033988687209784985,
|
|
"fcm_dpo/margin": 4.128653049468994,
|
|
"fcm_dpo/q_t": 0.338392049074173,
|
|
"grad_norm": 50.77932357788086,
|
|
"learning_rate": 1.3426974201083439e-07,
|
|
"logits/chosen": 0.16939248144626617,
|
|
"logits/rejected": 0.12530234456062317,
|
|
"logps/chosen": -64.22583770751953,
|
|
"logps/ref_chosen": -56.398284912109375,
|
|
"logps/ref_rejected": -82.61642456054688,
|
|
"logps/rejected": -94.5726318359375,
|
|
"loss": 0.9574,
|
|
"margin_dpo/margin_mean": 4.128653526306152,
|
|
"margin_dpo/margin_std": 5.482672691345215,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.690854119425548,
|
|
"fcm_dpo/beta": 0.2063646912574768,
|
|
"fcm_dpo/delta": 0.10697145760059357,
|
|
"fcm_dpo/margin": 3.6349315643310547,
|
|
"fcm_dpo/q_t": 0.35435450077056885,
|
|
"grad_norm": 46.9370231628418,
|
|
"learning_rate": 1.3309935167761717e-07,
|
|
"logits/chosen": 0.3234666883945465,
|
|
"logits/rejected": 0.2596738934516907,
|
|
"logps/chosen": -52.51266860961914,
|
|
"logps/ref_chosen": -44.72057342529297,
|
|
"logps/ref_rejected": -68.1158676147461,
|
|
"logps/rejected": -79.54289245605469,
|
|
"loss": 0.98,
|
|
"margin_dpo/margin_mean": 3.6349313259124756,
|
|
"margin_dpo/margin_std": 4.778542518615723,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6923658352229781,
|
|
"fcm_dpo/beta": 0.20488256216049194,
|
|
"fcm_dpo/delta": -0.11901578307151794,
|
|
"fcm_dpo/margin": 4.668992042541504,
|
|
"fcm_dpo/q_t": 0.31962084770202637,
|
|
"grad_norm": 49.14594650268555,
|
|
"learning_rate": 1.3193223130682936e-07,
|
|
"logits/chosen": 0.24556541442871094,
|
|
"logits/rejected": 0.151944100856781,
|
|
"logps/chosen": -56.84272003173828,
|
|
"logps/ref_chosen": -50.00569152832031,
|
|
"logps/ref_rejected": -87.50015258789062,
|
|
"logps/rejected": -99.00617218017578,
|
|
"loss": 0.9301,
|
|
"margin_dpo/margin_mean": 4.668992519378662,
|
|
"margin_dpo/margin_std": 5.956631660461426,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6938775510204082,
|
|
"fcm_dpo/beta": 0.2047109305858612,
|
|
"fcm_dpo/delta": -0.14493052661418915,
|
|
"fcm_dpo/margin": 4.762661933898926,
|
|
"fcm_dpo/q_t": 0.31784969568252563,
|
|
"grad_norm": 50.155799865722656,
|
|
"learning_rate": 1.3076841354533658e-07,
|
|
"logits/chosen": 0.28778326511383057,
|
|
"logits/rejected": 0.25329944491386414,
|
|
"logps/chosen": -72.57955932617188,
|
|
"logps/ref_chosen": -65.37794494628906,
|
|
"logps/ref_rejected": -88.19244384765625,
|
|
"logps/rejected": -100.1567153930664,
|
|
"loss": 0.8925,
|
|
"margin_dpo/margin_mean": 4.762661933898926,
|
|
"margin_dpo/margin_std": 5.247749328613281,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.1921759843826294,
|
|
"fcm_dpo/delta": -0.2016555219888687,
|
|
"fcm_dpo/margin": 5.358639717102051,
|
|
"fcm_dpo/q_t": 0.31141430139541626,
|
|
"grad_norm": 48.96949005126953,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 0.2766995429992676,
|
|
"logits/rejected": 0.17160335183143616,
|
|
"logps/chosen": -72.19833374023438,
|
|
"logps/ref_chosen": -64.5616683959961,
|
|
"logps/ref_rejected": -88.67890167236328,
|
|
"logps/rejected": -101.6741943359375,
|
|
"loss": 0.8422,
|
|
"margin_dpo/margin_mean": 5.358639717102051,
|
|
"margin_dpo/margin_std": 5.980681419372559,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6969009826152683,
|
|
"fcm_dpo/beta": 0.18721193075180054,
|
|
"fcm_dpo/delta": -0.04403623193502426,
|
|
"fcm_dpo/margin": 4.7339019775390625,
|
|
"fcm_dpo/q_t": 0.3311063051223755,
|
|
"grad_norm": 42.88606262207031,
|
|
"learning_rate": 1.2845081597488286e-07,
|
|
"logits/chosen": 0.3637089729309082,
|
|
"logits/rejected": 0.28602486848831177,
|
|
"logps/chosen": -56.37803649902344,
|
|
"logps/ref_chosen": -49.4779167175293,
|
|
"logps/ref_rejected": -72.65262603759766,
|
|
"logps/rejected": -84.28665161132812,
|
|
"loss": 0.9256,
|
|
"margin_dpo/margin_mean": 4.7339019775390625,
|
|
"margin_dpo/margin_std": 5.710514068603516,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"fcm_dpo/beta": 0.18369469046592712,
|
|
"fcm_dpo/delta": -0.15608486533164978,
|
|
"fcm_dpo/margin": 5.382818222045898,
|
|
"fcm_dpo/q_t": 0.30420053005218506,
|
|
"grad_norm": 40.72878646850586,
|
|
"learning_rate": 1.27297100994108e-07,
|
|
"logits/chosen": 0.23772986233234406,
|
|
"logits/rejected": 0.18360589444637299,
|
|
"logps/chosen": -67.96499633789062,
|
|
"logps/ref_chosen": -60.4951171875,
|
|
"logps/ref_rejected": -74.82136535644531,
|
|
"logps/rejected": -87.674072265625,
|
|
"loss": 0.8356,
|
|
"margin_dpo/margin_mean": 5.382818222045898,
|
|
"margin_dpo/margin_std": 5.639156341552734,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6999244142101285,
|
|
"fcm_dpo/beta": 0.185808002948761,
|
|
"fcm_dpo/delta": 0.10821240395307541,
|
|
"fcm_dpo/margin": 4.03656005859375,
|
|
"fcm_dpo/q_t": 0.3506065607070923,
|
|
"grad_norm": 43.807926177978516,
|
|
"learning_rate": 1.2614681827718695e-07,
|
|
"logits/chosen": 0.28047433495521545,
|
|
"logits/rejected": 0.2664549648761749,
|
|
"logps/chosen": -75.05906677246094,
|
|
"logps/ref_chosen": -67.68511962890625,
|
|
"logps/ref_rejected": -71.32196044921875,
|
|
"logps/rejected": -82.73246765136719,
|
|
"loss": 0.9477,
|
|
"margin_dpo/margin_mean": 4.036560535430908,
|
|
"margin_dpo/margin_std": 4.958671569824219,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.7014361300075586,
|
|
"fcm_dpo/beta": 0.19172126054763794,
|
|
"fcm_dpo/delta": 0.0832245945930481,
|
|
"fcm_dpo/margin": 4.018474578857422,
|
|
"fcm_dpo/q_t": 0.3552596867084503,
|
|
"grad_norm": 53.8079833984375,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 0.24116164445877075,
|
|
"logits/rejected": 0.21551595628261566,
|
|
"logps/chosen": -67.01548767089844,
|
|
"logps/ref_chosen": -59.16564178466797,
|
|
"logps/ref_rejected": -69.56146240234375,
|
|
"logps/rejected": -81.42977905273438,
|
|
"loss": 1.0547,
|
|
"margin_dpo/margin_mean": 4.018474102020264,
|
|
"margin_dpo/margin_std": 6.148181915283203,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.190629780292511,
|
|
"fcm_dpo/delta": 0.02115422859787941,
|
|
"fcm_dpo/margin": 4.356546401977539,
|
|
"fcm_dpo/q_t": 0.3453529477119446,
|
|
"grad_norm": 47.5476188659668,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 0.33090129494667053,
|
|
"logits/rejected": 0.2758718729019165,
|
|
"logps/chosen": -66.55125427246094,
|
|
"logps/ref_chosen": -58.513671875,
|
|
"logps/ref_rejected": -84.31745910644531,
|
|
"logps/rejected": -96.71158599853516,
|
|
"loss": 0.985,
|
|
"margin_dpo/margin_mean": 4.356546401977539,
|
|
"margin_dpo/margin_std": 6.026215553283691,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7044595616024187,
|
|
"fcm_dpo/beta": 0.198988139629364,
|
|
"fcm_dpo/delta": 0.28733205795288086,
|
|
"fcm_dpo/margin": 2.915306329727173,
|
|
"fcm_dpo/q_t": 0.38667044043540955,
|
|
"grad_norm": 60.51933670043945,
|
|
"learning_rate": 1.2271688498291334e-07,
|
|
"logits/chosen": 0.2653227150440216,
|
|
"logits/rejected": 0.2612670958042145,
|
|
"logps/chosen": -82.42771911621094,
|
|
"logps/ref_chosen": -73.26580810546875,
|
|
"logps/ref_rejected": -74.83621215820312,
|
|
"logps/rejected": -86.91342163085938,
|
|
"loss": 1.0816,
|
|
"margin_dpo/margin_mean": 2.915306568145752,
|
|
"margin_dpo/margin_std": 4.817732810974121,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.7059712773998488,
|
|
"fcm_dpo/beta": 0.20076759159564972,
|
|
"fcm_dpo/delta": -0.06342404335737228,
|
|
"fcm_dpo/margin": 4.520856857299805,
|
|
"fcm_dpo/q_t": 0.3295537531375885,
|
|
"grad_norm": 41.36443328857422,
|
|
"learning_rate": 1.2158065210664848e-07,
|
|
"logits/chosen": 0.26830387115478516,
|
|
"logits/rejected": 0.14777283370494843,
|
|
"logps/chosen": -55.57886505126953,
|
|
"logps/ref_chosen": -47.57947540283203,
|
|
"logps/ref_rejected": -78.68522644042969,
|
|
"logps/rejected": -91.20547485351562,
|
|
"loss": 0.9024,
|
|
"margin_dpo/margin_mean": 4.520857334136963,
|
|
"margin_dpo/margin_std": 5.519647598266602,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.7074829931972789,
|
|
"fcm_dpo/beta": 0.19205418229103088,
|
|
"fcm_dpo/delta": -0.3020942509174347,
|
|
"fcm_dpo/margin": 5.825957298278809,
|
|
"fcm_dpo/q_t": 0.2959809899330139,
|
|
"grad_norm": 44.799903869628906,
|
|
"learning_rate": 1.204480113956011e-07,
|
|
"logits/chosen": 0.2383967787027359,
|
|
"logits/rejected": 0.22732359170913696,
|
|
"logps/chosen": -70.88501739501953,
|
|
"logps/ref_chosen": -63.92778778076172,
|
|
"logps/ref_rejected": -76.51626586914062,
|
|
"logps/rejected": -89.29945373535156,
|
|
"loss": 0.8096,
|
|
"margin_dpo/margin_mean": 5.825956344604492,
|
|
"margin_dpo/margin_std": 6.250823020935059,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.708994708994709,
|
|
"fcm_dpo/beta": 0.1848841905593872,
|
|
"fcm_dpo/delta": -0.024960562586784363,
|
|
"fcm_dpo/margin": 4.694052696228027,
|
|
"fcm_dpo/q_t": 0.33774009346961975,
|
|
"grad_norm": 41.76699447631836,
|
|
"learning_rate": 1.1931899453216697e-07,
|
|
"logits/chosen": 0.32318294048309326,
|
|
"logits/rejected": 0.30566686391830444,
|
|
"logps/chosen": -66.29656982421875,
|
|
"logps/ref_chosen": -59.05818176269531,
|
|
"logps/ref_rejected": -75.67672729492188,
|
|
"logps/rejected": -87.60916137695312,
|
|
"loss": 0.9215,
|
|
"margin_dpo/margin_mean": 4.694052696228027,
|
|
"margin_dpo/margin_std": 5.674604892730713,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.18895672261714935,
|
|
"fcm_dpo/delta": 0.010860327631235123,
|
|
"fcm_dpo/margin": 4.441650867462158,
|
|
"fcm_dpo/q_t": 0.33179694414138794,
|
|
"grad_norm": 39.528404235839844,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 0.25164568424224854,
|
|
"logits/rejected": 0.19546663761138916,
|
|
"logps/chosen": -55.655845642089844,
|
|
"logps/ref_chosen": -47.86743927001953,
|
|
"logps/ref_rejected": -65.96859741210938,
|
|
"logps/rejected": -78.19864654541016,
|
|
"loss": 0.9389,
|
|
"margin_dpo/margin_mean": 4.441650867462158,
|
|
"margin_dpo/margin_std": 5.509934425354004,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7120181405895691,
|
|
"fcm_dpo/beta": 0.183636873960495,
|
|
"fcm_dpo/delta": -0.16288867592811584,
|
|
"fcm_dpo/margin": 5.427962303161621,
|
|
"fcm_dpo/q_t": 0.3086378872394562,
|
|
"grad_norm": 44.055545806884766,
|
|
"learning_rate": 1.1707195857000215e-07,
|
|
"logits/chosen": 0.2511060833930969,
|
|
"logits/rejected": 0.19487126171588898,
|
|
"logps/chosen": -64.90446472167969,
|
|
"logps/ref_chosen": -57.777854919433594,
|
|
"logps/ref_rejected": -73.81172180175781,
|
|
"logps/rejected": -86.36629486083984,
|
|
"loss": 0.8795,
|
|
"margin_dpo/margin_mean": 5.427962303161621,
|
|
"margin_dpo/margin_std": 6.277071952819824,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.7135298563869993,
|
|
"fcm_dpo/beta": 0.1824641078710556,
|
|
"fcm_dpo/delta": -0.035785011947155,
|
|
"fcm_dpo/margin": 4.831416130065918,
|
|
"fcm_dpo/q_t": 0.3352402150630951,
|
|
"grad_norm": 45.14156723022461,
|
|
"learning_rate": 1.1595400232569768e-07,
|
|
"logits/chosen": 0.28769558668136597,
|
|
"logits/rejected": 0.24208904802799225,
|
|
"logps/chosen": -62.80769348144531,
|
|
"logps/ref_chosen": -55.908668518066406,
|
|
"logps/ref_rejected": -74.70294189453125,
|
|
"logps/rejected": -86.43338775634766,
|
|
"loss": 0.967,
|
|
"margin_dpo/margin_mean": 4.831416130065918,
|
|
"margin_dpo/margin_std": 6.460024833679199,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.7150415721844293,
|
|
"fcm_dpo/beta": 0.179289311170578,
|
|
"fcm_dpo/delta": -0.016662299633026123,
|
|
"fcm_dpo/margin": 4.8229851722717285,
|
|
"fcm_dpo/q_t": 0.3476918935775757,
|
|
"grad_norm": 42.09556198120117,
|
|
"learning_rate": 1.1483979563610069e-07,
|
|
"logits/chosen": 0.3349047303199768,
|
|
"logits/rejected": 0.24838170409202576,
|
|
"logps/chosen": -61.18727493286133,
|
|
"logps/ref_chosen": -54.16088104248047,
|
|
"logps/ref_rejected": -92.76789855957031,
|
|
"logps/rejected": -104.61727142333984,
|
|
"loss": 1.0116,
|
|
"margin_dpo/margin_mean": 4.822985649108887,
|
|
"margin_dpo/margin_std": 7.087156772613525,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.7165532879818595,
|
|
"fcm_dpo/beta": 0.182792067527771,
|
|
"fcm_dpo/delta": 0.10003271698951721,
|
|
"fcm_dpo/margin": 4.1439433097839355,
|
|
"fcm_dpo/q_t": 0.36064907908439636,
|
|
"grad_norm": 47.92180252075195,
|
|
"learning_rate": 1.1372936966796709e-07,
|
|
"logits/chosen": 0.3086499273777008,
|
|
"logits/rejected": 0.24886074662208557,
|
|
"logps/chosen": -55.18489074707031,
|
|
"logps/ref_chosen": -46.685707092285156,
|
|
"logps/ref_rejected": -71.44731903076172,
|
|
"logps/rejected": -84.09043884277344,
|
|
"loss": 1.0493,
|
|
"margin_dpo/margin_mean": 4.1439433097839355,
|
|
"margin_dpo/margin_std": 6.442416191101074,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.17439153790473938,
|
|
"fcm_dpo/delta": -0.2883111834526062,
|
|
"fcm_dpo/margin": 6.32227897644043,
|
|
"fcm_dpo/q_t": 0.28803032636642456,
|
|
"grad_norm": 37.30523681640625,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 0.24227741360664368,
|
|
"logits/rejected": 0.19501101970672607,
|
|
"logps/chosen": -66.50431060791016,
|
|
"logps/ref_chosen": -58.4873046875,
|
|
"logps/ref_rejected": -87.00187683105469,
|
|
"logps/rejected": -101.3411636352539,
|
|
"loss": 0.7646,
|
|
"margin_dpo/margin_mean": 6.32227897644043,
|
|
"margin_dpo/margin_std": 5.994036674499512,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7195767195767195,
|
|
"fcm_dpo/beta": 0.17916938662528992,
|
|
"fcm_dpo/delta": 0.17899703979492188,
|
|
"fcm_dpo/margin": 3.8024401664733887,
|
|
"fcm_dpo/q_t": 0.3761757016181946,
|
|
"grad_norm": 57.621246337890625,
|
|
"learning_rate": 1.1151998403347243e-07,
|
|
"logits/chosen": 0.18236970901489258,
|
|
"logits/rejected": 0.1682473123073578,
|
|
"logps/chosen": -84.62786865234375,
|
|
"logps/ref_chosen": -75.38162231445312,
|
|
"logps/ref_rejected": -76.99822235107422,
|
|
"logps/rejected": -90.04690551757812,
|
|
"loss": 1.1027,
|
|
"margin_dpo/margin_mean": 3.802440881729126,
|
|
"margin_dpo/margin_std": 6.650286674499512,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7210884353741497,
|
|
"fcm_dpo/beta": 0.18340060114860535,
|
|
"fcm_dpo/delta": 0.13215288519859314,
|
|
"fcm_dpo/margin": 3.9657039642333984,
|
|
"fcm_dpo/q_t": 0.36404669284820557,
|
|
"grad_norm": 57.48204803466797,
|
|
"learning_rate": 1.1042108616837692e-07,
|
|
"logits/chosen": 0.27557092905044556,
|
|
"logits/rejected": 0.23873813450336456,
|
|
"logps/chosen": -70.09407043457031,
|
|
"logps/ref_chosen": -61.073387145996094,
|
|
"logps/ref_rejected": -81.34375,
|
|
"logps/rejected": -94.33013916015625,
|
|
"loss": 1.1397,
|
|
"margin_dpo/margin_mean": 3.9657044410705566,
|
|
"margin_dpo/margin_std": 7.145218849182129,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7226001511715797,
|
|
"fcm_dpo/beta": 0.18710201978683472,
|
|
"fcm_dpo/delta": 0.10514857620000839,
|
|
"fcm_dpo/margin": 4.023831367492676,
|
|
"fcm_dpo/q_t": 0.3657424747943878,
|
|
"grad_norm": 49.14711380004883,
|
|
"learning_rate": 1.0932609262554746e-07,
|
|
"logits/chosen": 0.21086078882217407,
|
|
"logits/rejected": 0.20800764858722687,
|
|
"logps/chosen": -65.02413940429688,
|
|
"logps/ref_chosen": -57.16731643676758,
|
|
"logps/ref_rejected": -53.30917739868164,
|
|
"logps/rejected": -65.18983459472656,
|
|
"loss": 1.0618,
|
|
"margin_dpo/margin_mean": 4.023830890655518,
|
|
"margin_dpo/margin_std": 6.53040885925293,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7241118669690099,
|
|
"fcm_dpo/beta": 0.19023653864860535,
|
|
"fcm_dpo/delta": 0.12472105771303177,
|
|
"fcm_dpo/margin": 3.858086347579956,
|
|
"fcm_dpo/q_t": 0.36114710569381714,
|
|
"grad_norm": 47.78655242919922,
|
|
"learning_rate": 1.0823503403430734e-07,
|
|
"logits/chosen": 0.1616727113723755,
|
|
"logits/rejected": 0.11323510110378265,
|
|
"logps/chosen": -67.915283203125,
|
|
"logps/ref_chosen": -58.91331481933594,
|
|
"logps/ref_rejected": -63.7403450012207,
|
|
"logps/rejected": -76.60040283203125,
|
|
"loss": 1.1059,
|
|
"margin_dpo/margin_mean": 3.858086585998535,
|
|
"margin_dpo/margin_std": 6.676839828491211,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.19588303565979004,
|
|
"fcm_dpo/delta": -0.023159652948379517,
|
|
"fcm_dpo/margin": 4.419933795928955,
|
|
"fcm_dpo/q_t": 0.3366478979587555,
|
|
"grad_norm": 56.999000549316406,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 0.2097318321466446,
|
|
"logits/rejected": 0.1971430778503418,
|
|
"logps/chosen": -71.11746978759766,
|
|
"logps/ref_chosen": -62.80061340332031,
|
|
"logps/ref_rejected": -67.58859252929688,
|
|
"logps/rejected": -80.32538604736328,
|
|
"loss": 1.0673,
|
|
"margin_dpo/margin_mean": 4.419934272766113,
|
|
"margin_dpo/margin_std": 6.8448004722595215,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.72713529856387,
|
|
"fcm_dpo/beta": 0.18832021951675415,
|
|
"fcm_dpo/delta": -0.12109113484621048,
|
|
"fcm_dpo/margin": 5.092473983764648,
|
|
"fcm_dpo/q_t": 0.33122923970222473,
|
|
"grad_norm": 46.02914047241211,
|
|
"learning_rate": 1.0606484367268906e-07,
|
|
"logits/chosen": 0.2160688042640686,
|
|
"logits/rejected": 0.20522311329841614,
|
|
"logps/chosen": -72.78104400634766,
|
|
"logps/ref_chosen": -65.28649139404297,
|
|
"logps/ref_rejected": -70.78668212890625,
|
|
"logps/rejected": -83.37371063232422,
|
|
"loss": 0.952,
|
|
"margin_dpo/margin_mean": 5.092473983764648,
|
|
"margin_dpo/margin_std": 6.993575572967529,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7286470143613001,
|
|
"fcm_dpo/beta": 0.1910247802734375,
|
|
"fcm_dpo/delta": 0.08871780335903168,
|
|
"fcm_dpo/margin": 4.016700267791748,
|
|
"fcm_dpo/q_t": 0.36484792828559875,
|
|
"grad_norm": 61.503326416015625,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 0.21902191638946533,
|
|
"logits/rejected": 0.10311245173215866,
|
|
"logps/chosen": -69.74750518798828,
|
|
"logps/ref_chosen": -60.906185150146484,
|
|
"logps/ref_rejected": -103.44656372070312,
|
|
"logps/rejected": -116.30458068847656,
|
|
"loss": 1.1322,
|
|
"margin_dpo/margin_mean": 4.016700267791748,
|
|
"margin_dpo/margin_std": 7.26608943939209,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"fcm_dpo/beta": 0.1862109899520874,
|
|
"fcm_dpo/delta": -0.14671653509140015,
|
|
"fcm_dpo/margin": 5.271034240722656,
|
|
"fcm_dpo/q_t": 0.32034099102020264,
|
|
"grad_norm": 44.32430648803711,
|
|
"learning_rate": 1.0391075790138232e-07,
|
|
"logits/chosen": 0.3017137050628662,
|
|
"logits/rejected": 0.21860943734645844,
|
|
"logps/chosen": -61.299842834472656,
|
|
"logps/ref_chosen": -53.192012786865234,
|
|
"logps/ref_rejected": -81.83927154541016,
|
|
"logps/rejected": -95.21813201904297,
|
|
"loss": 0.9343,
|
|
"margin_dpo/margin_mean": 5.271034240722656,
|
|
"margin_dpo/margin_std": 6.870976448059082,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.7316704459561603,
|
|
"fcm_dpo/beta": 0.1890522539615631,
|
|
"fcm_dpo/delta": 0.09609914571046829,
|
|
"fcm_dpo/margin": 4.021857738494873,
|
|
"fcm_dpo/q_t": 0.3523946702480316,
|
|
"grad_norm": 50.47954177856445,
|
|
"learning_rate": 1.0283982962570681e-07,
|
|
"logits/chosen": 0.30475252866744995,
|
|
"logits/rejected": 0.26562267541885376,
|
|
"logps/chosen": -65.94442749023438,
|
|
"logps/ref_chosen": -57.76945877075195,
|
|
"logps/ref_rejected": -71.6829833984375,
|
|
"logps/rejected": -83.87980651855469,
|
|
"loss": 0.9651,
|
|
"margin_dpo/margin_mean": 4.021857261657715,
|
|
"margin_dpo/margin_std": 5.328012466430664,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.1838168501853943,
|
|
"fcm_dpo/delta": -0.04106524586677551,
|
|
"fcm_dpo/margin": 4.771086692810059,
|
|
"fcm_dpo/q_t": 0.335861474275589,
|
|
"grad_norm": 45.630088806152344,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 0.296680212020874,
|
|
"logits/rejected": 0.26908427476882935,
|
|
"logps/chosen": -64.26364135742188,
|
|
"logps/ref_chosen": -56.63584899902344,
|
|
"logps/ref_rejected": -70.85614013671875,
|
|
"logps/rejected": -83.25502014160156,
|
|
"loss": 0.9562,
|
|
"margin_dpo/margin_mean": 4.771087646484375,
|
|
"margin_dpo/margin_std": 6.045078277587891,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7346938775510204,
|
|
"fcm_dpo/beta": 0.18868184089660645,
|
|
"fcm_dpo/delta": 0.03267286345362663,
|
|
"fcm_dpo/margin": 4.345629692077637,
|
|
"fcm_dpo/q_t": 0.3601461946964264,
|
|
"grad_norm": 48.578330993652344,
|
|
"learning_rate": 1.007103520743035e-07,
|
|
"logits/chosen": 0.2949785590171814,
|
|
"logits/rejected": 0.20217090845108032,
|
|
"logps/chosen": -65.90437316894531,
|
|
"logps/ref_chosen": -56.347023010253906,
|
|
"logps/ref_rejected": -85.97221374511719,
|
|
"logps/rejected": -99.87519836425781,
|
|
"loss": 1.0612,
|
|
"margin_dpo/margin_mean": 4.345630168914795,
|
|
"margin_dpo/margin_std": 7.313272476196289,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7362055933484505,
|
|
"fcm_dpo/beta": 0.1893056333065033,
|
|
"fcm_dpo/delta": 0.00844599213451147,
|
|
"fcm_dpo/margin": 4.448924541473389,
|
|
"fcm_dpo/q_t": 0.34452641010284424,
|
|
"grad_norm": 50.25308609008789,
|
|
"learning_rate": 9.965186236464046e-08,
|
|
"logits/chosen": 0.34107547998428345,
|
|
"logits/rejected": 0.29102659225463867,
|
|
"logps/chosen": -68.70523834228516,
|
|
"logps/ref_chosen": -60.617218017578125,
|
|
"logps/ref_rejected": -82.50975036621094,
|
|
"logps/rejected": -95.04669189453125,
|
|
"loss": 0.9623,
|
|
"margin_dpo/margin_mean": 4.448925018310547,
|
|
"margin_dpo/margin_std": 6.057121276855469,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7377173091458806,
|
|
"fcm_dpo/beta": 0.18650321662425995,
|
|
"fcm_dpo/delta": -0.1942426562309265,
|
|
"fcm_dpo/margin": 5.484685897827148,
|
|
"fcm_dpo/q_t": 0.30992603302001953,
|
|
"grad_norm": 43.414710998535156,
|
|
"learning_rate": 9.859757821558337e-08,
|
|
"logits/chosen": 0.2581988275051117,
|
|
"logits/rejected": 0.1931534707546234,
|
|
"logps/chosen": -70.77301025390625,
|
|
"logps/ref_chosen": -63.10905075073242,
|
|
"logps/ref_rejected": -82.49348449707031,
|
|
"logps/rejected": -95.64212036132812,
|
|
"loss": 0.8646,
|
|
"margin_dpo/margin_mean": 5.484686374664307,
|
|
"margin_dpo/margin_std": 6.123306751251221,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7392290249433107,
|
|
"fcm_dpo/beta": 0.1910599172115326,
|
|
"fcm_dpo/delta": 0.34434235095977783,
|
|
"fcm_dpo/margin": 2.7443325519561768,
|
|
"fcm_dpo/q_t": 0.40401116013526917,
|
|
"grad_norm": 54.903987884521484,
|
|
"learning_rate": 9.754752911772615e-08,
|
|
"logits/chosen": 0.29846933484077454,
|
|
"logits/rejected": 0.2590899169445038,
|
|
"logps/chosen": -73.30157470703125,
|
|
"logps/ref_chosen": -64.98896026611328,
|
|
"logps/ref_rejected": -84.39607238769531,
|
|
"logps/rejected": -95.45301818847656,
|
|
"loss": 1.2114,
|
|
"margin_dpo/margin_mean": 2.744333267211914,
|
|
"margin_dpo/margin_std": 6.05691385269165,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.199541836977005,
|
|
"fcm_dpo/delta": 0.13197389245033264,
|
|
"fcm_dpo/margin": 3.642141580581665,
|
|
"fcm_dpo/q_t": 0.3773440718650818,
|
|
"grad_norm": 61.38715744018555,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 0.326549768447876,
|
|
"logits/rejected": 0.30207884311676025,
|
|
"logps/chosen": -70.25138854980469,
|
|
"logps/ref_chosen": -61.90874481201172,
|
|
"logps/ref_rejected": -70.58566284179688,
|
|
"logps/rejected": -82.57044982910156,
|
|
"loss": 1.2245,
|
|
"margin_dpo/margin_mean": 3.642141342163086,
|
|
"margin_dpo/margin_std": 7.560006141662598,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7422524565381708,
|
|
"fcm_dpo/beta": 0.19735094904899597,
|
|
"fcm_dpo/delta": -0.0074592530727386475,
|
|
"fcm_dpo/margin": 4.32567024230957,
|
|
"fcm_dpo/q_t": 0.3363799452781677,
|
|
"grad_norm": 47.401344299316406,
|
|
"learning_rate": 9.546025344484868e-08,
|
|
"logits/chosen": 0.18645425140857697,
|
|
"logits/rejected": 0.13546575605869293,
|
|
"logps/chosen": -63.35127258300781,
|
|
"logps/ref_chosen": -55.47570037841797,
|
|
"logps/ref_rejected": -78.70318603515625,
|
|
"logps/rejected": -90.90443420410156,
|
|
"loss": 0.9645,
|
|
"margin_dpo/margin_mean": 4.3256707191467285,
|
|
"margin_dpo/margin_std": 5.561920166015625,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7437641723356009,
|
|
"fcm_dpo/beta": 0.20590060949325562,
|
|
"fcm_dpo/delta": 0.10966280847787857,
|
|
"fcm_dpo/margin": 3.6100869178771973,
|
|
"fcm_dpo/q_t": 0.37008100748062134,
|
|
"grad_norm": 61.88254928588867,
|
|
"learning_rate": 9.442308525541589e-08,
|
|
"logits/chosen": 0.24195045232772827,
|
|
"logits/rejected": 0.17962196469306946,
|
|
"logps/chosen": -77.16026306152344,
|
|
"logps/ref_chosen": -67.28638458251953,
|
|
"logps/ref_rejected": -82.78628540039062,
|
|
"logps/rejected": -96.27024841308594,
|
|
"loss": 1.1601,
|
|
"margin_dpo/margin_mean": 3.610086441040039,
|
|
"margin_dpo/margin_std": 6.680771827697754,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.745275888133031,
|
|
"fcm_dpo/beta": 0.1993444263935089,
|
|
"fcm_dpo/delta": -0.19864656031131744,
|
|
"fcm_dpo/margin": 5.160065650939941,
|
|
"fcm_dpo/q_t": 0.3086199164390564,
|
|
"grad_norm": 48.454776763916016,
|
|
"learning_rate": 9.339026888672468e-08,
|
|
"logits/chosen": 0.2442186325788498,
|
|
"logits/rejected": 0.1783977746963501,
|
|
"logps/chosen": -63.58871078491211,
|
|
"logps/ref_chosen": -55.92750549316406,
|
|
"logps/ref_rejected": -79.12149810791016,
|
|
"logps/rejected": -91.9427719116211,
|
|
"loss": 0.902,
|
|
"margin_dpo/margin_mean": 5.160066604614258,
|
|
"margin_dpo/margin_std": 6.3702802658081055,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7467876039304611,
|
|
"fcm_dpo/beta": 0.19964680075645447,
|
|
"fcm_dpo/delta": 0.14756183326244354,
|
|
"fcm_dpo/margin": 3.572193145751953,
|
|
"fcm_dpo/q_t": 0.37176692485809326,
|
|
"grad_norm": 62.97100830078125,
|
|
"learning_rate": 9.236183322886945e-08,
|
|
"logits/chosen": 0.16220593452453613,
|
|
"logits/rejected": 0.1173371970653534,
|
|
"logps/chosen": -75.95932006835938,
|
|
"logps/ref_chosen": -67.95410919189453,
|
|
"logps/ref_rejected": -90.50865173339844,
|
|
"logps/rejected": -102.0860595703125,
|
|
"loss": 1.178,
|
|
"margin_dpo/margin_mean": 3.572193145751953,
|
|
"margin_dpo/margin_std": 7.063486576080322,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.20778684318065643,
|
|
"fcm_dpo/delta": 0.14525321125984192,
|
|
"fcm_dpo/margin": 3.4341237545013428,
|
|
"fcm_dpo/q_t": 0.37397438287734985,
|
|
"grad_norm": 52.764556884765625,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 0.33206620812416077,
|
|
"logits/rejected": 0.27231916785240173,
|
|
"logps/chosen": -60.14783477783203,
|
|
"logps/ref_chosen": -52.62546157836914,
|
|
"logps/ref_rejected": -72.06781005859375,
|
|
"logps/rejected": -83.02430725097656,
|
|
"loss": 1.0904,
|
|
"margin_dpo/margin_mean": 3.4341230392456055,
|
|
"margin_dpo/margin_std": 5.9770073890686035,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7498110355253212,
|
|
"fcm_dpo/beta": 0.2041645646095276,
|
|
"fcm_dpo/delta": -0.07183012366294861,
|
|
"fcm_dpo/margin": 4.473697185516357,
|
|
"fcm_dpo/q_t": 0.3525882959365845,
|
|
"grad_norm": 54.563907623291016,
|
|
"learning_rate": 9.031821899254797e-08,
|
|
"logits/chosen": 0.2877293825149536,
|
|
"logits/rejected": 0.1940247118473053,
|
|
"logps/chosen": -65.89447021484375,
|
|
"logps/ref_chosen": -57.597320556640625,
|
|
"logps/ref_rejected": -94.36127471923828,
|
|
"logps/rejected": -107.13212585449219,
|
|
"loss": 1.0568,
|
|
"margin_dpo/margin_mean": 4.473696708679199,
|
|
"margin_dpo/margin_std": 7.258790969848633,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7513227513227513,
|
|
"fcm_dpo/beta": 0.1935914307832718,
|
|
"fcm_dpo/delta": -0.3445747196674347,
|
|
"fcm_dpo/margin": 5.949126243591309,
|
|
"fcm_dpo/q_t": 0.29749977588653564,
|
|
"grad_norm": 48.13323211669922,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": 0.2928311824798584,
|
|
"logits/rejected": 0.2597949504852295,
|
|
"logps/chosen": -81.34271240234375,
|
|
"logps/ref_chosen": -72.78994750976562,
|
|
"logps/ref_rejected": -89.48483276367188,
|
|
"logps/rejected": -103.98672485351562,
|
|
"loss": 0.8528,
|
|
"margin_dpo/margin_mean": 5.949126243591309,
|
|
"margin_dpo/margin_std": 7.026630401611328,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7528344671201814,
|
|
"fcm_dpo/beta": 0.188248872756958,
|
|
"fcm_dpo/delta": -0.13193053007125854,
|
|
"fcm_dpo/margin": 5.149151802062988,
|
|
"fcm_dpo/q_t": 0.32633334398269653,
|
|
"grad_norm": 52.20969009399414,
|
|
"learning_rate": 8.829247120198563e-08,
|
|
"logits/chosen": 0.279436856508255,
|
|
"logits/rejected": 0.24694347381591797,
|
|
"logps/chosen": -75.69930267333984,
|
|
"logps/ref_chosen": -68.36572265625,
|
|
"logps/ref_rejected": -71.28846740722656,
|
|
"logps/rejected": -83.77120971679688,
|
|
"loss": 0.8913,
|
|
"margin_dpo/margin_mean": 5.149151802062988,
|
|
"margin_dpo/margin_std": 6.356961250305176,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7543461829176115,
|
|
"fcm_dpo/beta": 0.1871645599603653,
|
|
"fcm_dpo/delta": 0.03554215282201767,
|
|
"fcm_dpo/margin": 4.36710786819458,
|
|
"fcm_dpo/q_t": 0.361484557390213,
|
|
"grad_norm": 50.920005798339844,
|
|
"learning_rate": 8.728636813280163e-08,
|
|
"logits/chosen": 0.257360577583313,
|
|
"logits/rejected": 0.19850978255271912,
|
|
"logps/chosen": -69.13787841796875,
|
|
"logps/ref_chosen": -61.90882873535156,
|
|
"logps/ref_rejected": -91.9411392211914,
|
|
"logps/rejected": -103.53729248046875,
|
|
"loss": 1.1192,
|
|
"margin_dpo/margin_mean": 4.367107391357422,
|
|
"margin_dpo/margin_std": 7.621745586395264,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.18969736993312836,
|
|
"fcm_dpo/delta": 0.06616081297397614,
|
|
"fcm_dpo/margin": 4.159477710723877,
|
|
"fcm_dpo/q_t": 0.3585333228111267,
|
|
"grad_norm": 54.2974853515625,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 0.20467233657836914,
|
|
"logits/rejected": 0.19438844919204712,
|
|
"logps/chosen": -78.15548706054688,
|
|
"logps/ref_chosen": -70.225830078125,
|
|
"logps/ref_rejected": -71.72203063964844,
|
|
"logps/rejected": -83.8111572265625,
|
|
"loss": 1.0841,
|
|
"margin_dpo/margin_mean": 4.159477233886719,
|
|
"margin_dpo/margin_std": 6.929226875305176,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7573696145124716,
|
|
"fcm_dpo/beta": 0.1939505636692047,
|
|
"fcm_dpo/delta": 0.0667010024189949,
|
|
"fcm_dpo/margin": 4.054576396942139,
|
|
"fcm_dpo/q_t": 0.3484607934951782,
|
|
"grad_norm": 45.469051361083984,
|
|
"learning_rate": 8.528784436016878e-08,
|
|
"logits/chosen": 0.2413794994354248,
|
|
"logits/rejected": 0.22745752334594727,
|
|
"logps/chosen": -72.44284057617188,
|
|
"logps/ref_chosen": -64.59880828857422,
|
|
"logps/ref_rejected": -70.59329223632812,
|
|
"logps/rejected": -82.49189758300781,
|
|
"loss": 0.9391,
|
|
"margin_dpo/margin_mean": 4.054576873779297,
|
|
"margin_dpo/margin_std": 5.029389381408691,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.7588813303099018,
|
|
"fcm_dpo/beta": 0.1954634189605713,
|
|
"fcm_dpo/delta": 0.08000719547271729,
|
|
"fcm_dpo/margin": 3.9702367782592773,
|
|
"fcm_dpo/q_t": 0.35143595933914185,
|
|
"grad_norm": 54.6177978515625,
|
|
"learning_rate": 8.4295479559726e-08,
|
|
"logits/chosen": 0.2817111015319824,
|
|
"logits/rejected": 0.24115899205207825,
|
|
"logps/chosen": -72.94097900390625,
|
|
"logps/ref_chosen": -65.46662902832031,
|
|
"logps/ref_rejected": -90.22233581542969,
|
|
"logps/rejected": -101.66691589355469,
|
|
"loss": 0.971,
|
|
"margin_dpo/margin_mean": 3.9702374935150146,
|
|
"margin_dpo/margin_std": 5.515082836151123,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7603930461073318,
|
|
"fcm_dpo/beta": 0.19681644439697266,
|
|
"fcm_dpo/delta": 0.032539092004299164,
|
|
"fcm_dpo/margin": 4.167027473449707,
|
|
"fcm_dpo/q_t": 0.3450298309326172,
|
|
"grad_norm": 50.10343933105469,
|
|
"learning_rate": 8.330774987092712e-08,
|
|
"logits/chosen": 0.26110100746154785,
|
|
"logits/rejected": 0.241864413022995,
|
|
"logps/chosen": -58.82035446166992,
|
|
"logps/ref_chosen": -51.83476257324219,
|
|
"logps/ref_rejected": -57.62522506713867,
|
|
"logps/rejected": -68.77783966064453,
|
|
"loss": 1.0317,
|
|
"margin_dpo/margin_mean": 4.167027473449707,
|
|
"margin_dpo/margin_std": 6.2261762619018555,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"fcm_dpo/beta": 0.1906535029411316,
|
|
"fcm_dpo/delta": -0.28664782643318176,
|
|
"fcm_dpo/margin": 5.799999713897705,
|
|
"fcm_dpo/q_t": 0.2936969995498657,
|
|
"grad_norm": 47.68788146972656,
|
|
"learning_rate": 8.232468292269479e-08,
|
|
"logits/chosen": 0.21891510486602783,
|
|
"logits/rejected": 0.19424328207969666,
|
|
"logps/chosen": -75.66064453125,
|
|
"logps/ref_chosen": -68.65119934082031,
|
|
"logps/ref_rejected": -77.91394805908203,
|
|
"logps/rejected": -90.72340393066406,
|
|
"loss": 0.7799,
|
|
"margin_dpo/margin_mean": 5.800000190734863,
|
|
"margin_dpo/margin_std": 5.795510768890381,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.18748188018798828,
|
|
"fcm_dpo/delta": 0.11835876107215881,
|
|
"fcm_dpo/margin": 3.9429283142089844,
|
|
"fcm_dpo/q_t": 0.3705042004585266,
|
|
"grad_norm": 56.70164108276367,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 0.2908661365509033,
|
|
"logits/rejected": 0.25435012578964233,
|
|
"logps/chosen": -67.60441589355469,
|
|
"logps/ref_chosen": -59.99884796142578,
|
|
"logps/ref_rejected": -76.88048553466797,
|
|
"logps/rejected": -88.42898559570312,
|
|
"loss": 1.1116,
|
|
"margin_dpo/margin_mean": 3.942927598953247,
|
|
"margin_dpo/margin_std": 6.963860988616943,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.764928193499622,
|
|
"fcm_dpo/beta": 0.19286711513996124,
|
|
"fcm_dpo/delta": 0.10626673698425293,
|
|
"fcm_dpo/margin": 3.8978567123413086,
|
|
"fcm_dpo/q_t": 0.3608360290527344,
|
|
"grad_norm": 59.737823486328125,
|
|
"learning_rate": 8.037264711071698e-08,
|
|
"logits/chosen": 0.27555349469184875,
|
|
"logits/rejected": 0.2518009543418884,
|
|
"logps/chosen": -77.1668472290039,
|
|
"logps/ref_chosen": -70.07130432128906,
|
|
"logps/ref_rejected": -82.03775024414062,
|
|
"logps/rejected": -93.03116607666016,
|
|
"loss": 1.0974,
|
|
"margin_dpo/margin_mean": 3.8978567123413086,
|
|
"margin_dpo/margin_std": 6.750155925750732,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7664399092970522,
|
|
"fcm_dpo/beta": 0.19243893027305603,
|
|
"fcm_dpo/delta": -0.06309761106967926,
|
|
"fcm_dpo/margin": 4.713381767272949,
|
|
"fcm_dpo/q_t": 0.3451889157295227,
|
|
"grad_norm": 51.14573669433594,
|
|
"learning_rate": 7.940373284960933e-08,
|
|
"logits/chosen": 0.2574292719364166,
|
|
"logits/rejected": 0.21393823623657227,
|
|
"logps/chosen": -79.86482238769531,
|
|
"logps/ref_chosen": -72.00703430175781,
|
|
"logps/ref_rejected": -93.94987487792969,
|
|
"logps/rejected": -106.52104187011719,
|
|
"loss": 1.0122,
|
|
"margin_dpo/margin_mean": 4.713381767272949,
|
|
"margin_dpo/margin_std": 7.080389976501465,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7679516250944822,
|
|
"fcm_dpo/beta": 0.19176054000854492,
|
|
"fcm_dpo/delta": -0.08605434000492096,
|
|
"fcm_dpo/margin": 4.834778308868408,
|
|
"fcm_dpo/q_t": 0.33275818824768066,
|
|
"grad_norm": 50.69294357299805,
|
|
"learning_rate": 7.843959053281663e-08,
|
|
"logits/chosen": 0.24106700718402863,
|
|
"logits/rejected": 0.13942265510559082,
|
|
"logps/chosen": -67.20732116699219,
|
|
"logps/ref_chosen": -60.21992492675781,
|
|
"logps/ref_rejected": -95.9200668334961,
|
|
"logps/rejected": -107.74224853515625,
|
|
"loss": 0.9514,
|
|
"margin_dpo/margin_mean": 4.834778308868408,
|
|
"margin_dpo/margin_std": 6.46806526184082,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7694633408919124,
|
|
"fcm_dpo/beta": 0.18921613693237305,
|
|
"fcm_dpo/delta": 0.007762765511870384,
|
|
"fcm_dpo/margin": 4.454644203186035,
|
|
"fcm_dpo/q_t": 0.3404228389263153,
|
|
"grad_norm": 50.47156524658203,
|
|
"learning_rate": 7.748024712947204e-08,
|
|
"logits/chosen": 0.21267962455749512,
|
|
"logits/rejected": 0.18627651035785675,
|
|
"logps/chosen": -73.61685180664062,
|
|
"logps/ref_chosen": -66.27017211914062,
|
|
"logps/ref_rejected": -71.73065185546875,
|
|
"logps/rejected": -83.53197479248047,
|
|
"loss": 0.9905,
|
|
"margin_dpo/margin_mean": 4.454644680023193,
|
|
"margin_dpo/margin_std": 6.248918533325195,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.18685214221477509,
|
|
"fcm_dpo/delta": -0.05087687447667122,
|
|
"fcm_dpo/margin": 4.791868209838867,
|
|
"fcm_dpo/q_t": 0.33913111686706543,
|
|
"grad_norm": 53.60639953613281,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 0.3427318334579468,
|
|
"logits/rejected": 0.2648513913154602,
|
|
"logps/chosen": -61.08159637451172,
|
|
"logps/ref_chosen": -53.54487609863281,
|
|
"logps/ref_rejected": -91.36648559570312,
|
|
"logps/rejected": -103.69507598876953,
|
|
"loss": 0.9909,
|
|
"margin_dpo/margin_mean": 4.791868686676025,
|
|
"margin_dpo/margin_std": 6.868839263916016,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7724867724867724,
|
|
"fcm_dpo/beta": 0.180000901222229,
|
|
"fcm_dpo/delta": -0.2913375198841095,
|
|
"fcm_dpo/margin": 6.163692951202393,
|
|
"fcm_dpo/q_t": 0.29723721742630005,
|
|
"grad_norm": 44.948909759521484,
|
|
"learning_rate": 7.557606426772961e-08,
|
|
"logits/chosen": 0.2703331410884857,
|
|
"logits/rejected": 0.2265818864107132,
|
|
"logps/chosen": -63.129844665527344,
|
|
"logps/ref_chosen": -55.844383239746094,
|
|
"logps/ref_rejected": -86.49819946289062,
|
|
"logps/rejected": -99.94735717773438,
|
|
"loss": 0.8221,
|
|
"margin_dpo/margin_mean": 6.163693428039551,
|
|
"margin_dpo/margin_std": 6.658810615539551,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7739984882842026,
|
|
"fcm_dpo/beta": 0.17689445614814758,
|
|
"fcm_dpo/delta": -0.06903138756752014,
|
|
"fcm_dpo/margin": 5.151975631713867,
|
|
"fcm_dpo/q_t": 0.3688731789588928,
|
|
"grad_norm": 48.45236587524414,
|
|
"learning_rate": 7.463127807341966e-08,
|
|
"logits/chosen": 0.15684255957603455,
|
|
"logits/rejected": 0.13879981637001038,
|
|
"logps/chosen": -69.19769287109375,
|
|
"logps/ref_chosen": -61.653038024902344,
|
|
"logps/ref_rejected": -72.83148193359375,
|
|
"logps/rejected": -85.52812194824219,
|
|
"loss": 1.0905,
|
|
"margin_dpo/margin_mean": 5.151974201202393,
|
|
"margin_dpo/margin_std": 11.439408302307129,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7755102040816326,
|
|
"fcm_dpo/beta": 0.1718558818101883,
|
|
"fcm_dpo/delta": -0.029932759702205658,
|
|
"fcm_dpo/margin": 5.091045379638672,
|
|
"fcm_dpo/q_t": 0.3317672610282898,
|
|
"grad_norm": 35.039642333984375,
|
|
"learning_rate": 7.369139731924401e-08,
|
|
"logits/chosen": 0.3575049042701721,
|
|
"logits/rejected": 0.31288132071495056,
|
|
"logps/chosen": -57.67890930175781,
|
|
"logps/ref_chosen": -50.85256576538086,
|
|
"logps/ref_rejected": -69.21754455566406,
|
|
"logps/rejected": -81.13493347167969,
|
|
"loss": 0.8854,
|
|
"margin_dpo/margin_mean": 5.091045379638672,
|
|
"margin_dpo/margin_std": 5.839650630950928,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7770219198790628,
|
|
"fcm_dpo/beta": 0.17328277230262756,
|
|
"fcm_dpo/delta": -0.023398784920573235,
|
|
"fcm_dpo/margin": 5.028005599975586,
|
|
"fcm_dpo/q_t": 0.3316155970096588,
|
|
"grad_norm": 45.34275817871094,
|
|
"learning_rate": 7.275644829568747e-08,
|
|
"logits/chosen": 0.296786367893219,
|
|
"logits/rejected": 0.2570733428001404,
|
|
"logps/chosen": -77.50872039794922,
|
|
"logps/ref_chosen": -69.38493347167969,
|
|
"logps/ref_rejected": -83.32447814941406,
|
|
"logps/rejected": -96.47626495361328,
|
|
"loss": 0.9483,
|
|
"margin_dpo/margin_mean": 5.028005599975586,
|
|
"margin_dpo/margin_std": 6.503722667694092,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.1716073453426361,
|
|
"fcm_dpo/delta": -0.023606671020388603,
|
|
"fcm_dpo/margin": 5.077951908111572,
|
|
"fcm_dpo/q_t": 0.33353549242019653,
|
|
"grad_norm": 41.92245864868164,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 0.28858014941215515,
|
|
"logits/rejected": 0.215793639421463,
|
|
"logps/chosen": -62.36624526977539,
|
|
"logps/ref_chosen": -53.687034606933594,
|
|
"logps/ref_rejected": -83.59614562988281,
|
|
"logps/rejected": -97.35330200195312,
|
|
"loss": 0.9296,
|
|
"margin_dpo/margin_mean": 5.077951431274414,
|
|
"margin_dpo/margin_std": 6.358786582946777,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.780045351473923,
|
|
"fcm_dpo/beta": 0.17564967274665833,
|
|
"fcm_dpo/delta": 0.12671038508415222,
|
|
"fcm_dpo/margin": 4.166781902313232,
|
|
"fcm_dpo/q_t": 0.35390928387641907,
|
|
"grad_norm": 43.83501052856445,
|
|
"learning_rate": 7.090144991188568e-08,
|
|
"logits/chosen": 0.2420632243156433,
|
|
"logits/rejected": 0.19160601496696472,
|
|
"logps/chosen": -63.76110076904297,
|
|
"logps/ref_chosen": -56.9017219543457,
|
|
"logps/ref_rejected": -67.83477783203125,
|
|
"logps/rejected": -78.8609390258789,
|
|
"loss": 1.0135,
|
|
"margin_dpo/margin_mean": 4.166782379150391,
|
|
"margin_dpo/margin_std": 6.096147537231445,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.781557067271353,
|
|
"fcm_dpo/beta": 0.17860382795333862,
|
|
"fcm_dpo/delta": 0.08228413015604019,
|
|
"fcm_dpo/margin": 4.333194255828857,
|
|
"fcm_dpo/q_t": 0.35983067750930786,
|
|
"grad_norm": 43.23679733276367,
|
|
"learning_rate": 6.998145243993284e-08,
|
|
"logits/chosen": 0.28628867864608765,
|
|
"logits/rejected": 0.27577298879623413,
|
|
"logps/chosen": -70.22264099121094,
|
|
"logps/ref_chosen": -61.775142669677734,
|
|
"logps/ref_rejected": -62.88270950317383,
|
|
"logps/rejected": -75.66339874267578,
|
|
"loss": 1.0162,
|
|
"margin_dpo/margin_mean": 4.333193778991699,
|
|
"margin_dpo/margin_std": 6.631152629852295,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.783068783068783,
|
|
"fcm_dpo/beta": 0.18542221188545227,
|
|
"fcm_dpo/delta": 0.19371888041496277,
|
|
"fcm_dpo/margin": 3.5995750427246094,
|
|
"fcm_dpo/q_t": 0.38120606541633606,
|
|
"grad_norm": 45.127986907958984,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 0.253539502620697,
|
|
"logits/rejected": 0.20896165072917938,
|
|
"logps/chosen": -69.38099670410156,
|
|
"logps/ref_chosen": -62.02523422241211,
|
|
"logps/ref_rejected": -79.06085205078125,
|
|
"logps/rejected": -90.01618957519531,
|
|
"loss": 1.1021,
|
|
"margin_dpo/margin_mean": 3.5995755195617676,
|
|
"margin_dpo/margin_std": 6.429238796234131,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.7845804988662132,
|
|
"fcm_dpo/beta": 0.19594839215278625,
|
|
"fcm_dpo/delta": 0.30930453538894653,
|
|
"fcm_dpo/margin": 2.8367679119110107,
|
|
"fcm_dpo/q_t": 0.4056819677352905,
|
|
"grad_norm": 65.59712982177734,
|
|
"learning_rate": 6.815658960673781e-08,
|
|
"logits/chosen": 0.28866493701934814,
|
|
"logits/rejected": 0.23657400906085968,
|
|
"logps/chosen": -70.72344207763672,
|
|
"logps/ref_chosen": -61.60636901855469,
|
|
"logps/ref_rejected": -74.50727844238281,
|
|
"logps/rejected": -86.46111297607422,
|
|
"loss": 1.3519,
|
|
"margin_dpo/margin_mean": 2.8367679119110107,
|
|
"margin_dpo/margin_std": 7.668980598449707,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.1965680867433548,
|
|
"fcm_dpo/delta": 0.009395897388458252,
|
|
"fcm_dpo/margin": 4.272980690002441,
|
|
"fcm_dpo/q_t": 0.33951810002326965,
|
|
"grad_norm": 48.875953674316406,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 0.32016807794570923,
|
|
"logits/rejected": 0.2701001763343811,
|
|
"logps/chosen": -70.08049774169922,
|
|
"logps/ref_chosen": -62.87343215942383,
|
|
"logps/ref_rejected": -76.505615234375,
|
|
"logps/rejected": -87.98565673828125,
|
|
"loss": 0.9418,
|
|
"margin_dpo/margin_mean": 4.272979736328125,
|
|
"margin_dpo/margin_std": 5.404156684875488,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7876039304610734,
|
|
"fcm_dpo/beta": 0.19937585294246674,
|
|
"fcm_dpo/delta": 0.0007315799593925476,
|
|
"fcm_dpo/margin": 4.257848262786865,
|
|
"fcm_dpo/q_t": 0.3392133116722107,
|
|
"grad_norm": 54.9530143737793,
|
|
"learning_rate": 6.63520728356167e-08,
|
|
"logits/chosen": 0.1798115372657776,
|
|
"logits/rejected": 0.11677288264036179,
|
|
"logps/chosen": -71.92523193359375,
|
|
"logps/ref_chosen": -64.20668029785156,
|
|
"logps/ref_rejected": -92.28083038330078,
|
|
"logps/rejected": -104.25723266601562,
|
|
"loss": 0.9967,
|
|
"margin_dpo/margin_mean": 4.257848262786865,
|
|
"margin_dpo/margin_std": 6.102939128875732,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7891156462585034,
|
|
"fcm_dpo/beta": 0.19949057698249817,
|
|
"fcm_dpo/delta": 0.02661752700805664,
|
|
"fcm_dpo/margin": 4.138130187988281,
|
|
"fcm_dpo/q_t": 0.3503352105617523,
|
|
"grad_norm": 52.36747741699219,
|
|
"learning_rate": 6.545750740770336e-08,
|
|
"logits/chosen": 0.24446165561676025,
|
|
"logits/rejected": 0.22852011024951935,
|
|
"logps/chosen": -65.54415130615234,
|
|
"logps/ref_chosen": -58.369720458984375,
|
|
"logps/ref_rejected": -68.79248046875,
|
|
"logps/rejected": -80.10504150390625,
|
|
"loss": 1.124,
|
|
"margin_dpo/margin_mean": 4.138129234313965,
|
|
"margin_dpo/margin_std": 7.349329948425293,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7906273620559335,
|
|
"fcm_dpo/beta": 0.19474643468856812,
|
|
"fcm_dpo/delta": -0.12762956321239471,
|
|
"fcm_dpo/margin": 4.950833320617676,
|
|
"fcm_dpo/q_t": 0.3200003504753113,
|
|
"grad_norm": 57.38889694213867,
|
|
"learning_rate": 6.456810403001012e-08,
|
|
"logits/chosen": 0.28123581409454346,
|
|
"logits/rejected": 0.18239277601242065,
|
|
"logps/chosen": -74.2017822265625,
|
|
"logps/ref_chosen": -65.71324157714844,
|
|
"logps/ref_rejected": -91.98896789550781,
|
|
"logps/rejected": -105.4283447265625,
|
|
"loss": 0.99,
|
|
"margin_dpo/margin_mean": 4.950833320617676,
|
|
"margin_dpo/margin_std": 6.844690322875977,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7921390778533636,
|
|
"fcm_dpo/beta": 0.19928528368473053,
|
|
"fcm_dpo/delta": 0.14152082800865173,
|
|
"fcm_dpo/margin": 3.602719306945801,
|
|
"fcm_dpo/q_t": 0.36274781823158264,
|
|
"grad_norm": 56.699684143066406,
|
|
"learning_rate": 6.368388758106134e-08,
|
|
"logits/chosen": 0.25369152426719666,
|
|
"logits/rejected": 0.23207354545593262,
|
|
"logps/chosen": -82.81105041503906,
|
|
"logps/ref_chosen": -76.35124969482422,
|
|
"logps/ref_rejected": -89.96072387695312,
|
|
"logps/rejected": -100.02325439453125,
|
|
"loss": 1.0615,
|
|
"margin_dpo/margin_mean": 3.602719783782959,
|
|
"margin_dpo/margin_std": 5.887624740600586,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.2056574821472168,
|
|
"fcm_dpo/delta": 0.18491694331169128,
|
|
"fcm_dpo/margin": 3.2947845458984375,
|
|
"fcm_dpo/q_t": 0.37222665548324585,
|
|
"grad_norm": 59.12623596191406,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.1436086744070053,
|
|
"logits/rejected": 0.12916171550750732,
|
|
"logps/chosen": -82.95500183105469,
|
|
"logps/ref_chosen": -75.49578857421875,
|
|
"logps/ref_rejected": -84.04852294921875,
|
|
"logps/rejected": -94.80252075195312,
|
|
"loss": 1.0611,
|
|
"margin_dpo/margin_mean": 3.2947843074798584,
|
|
"margin_dpo/margin_std": 5.342177391052246,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7951625094482238,
|
|
"fcm_dpo/beta": 0.20773226022720337,
|
|
"fcm_dpo/delta": 0.12321735173463821,
|
|
"fcm_dpo/margin": 3.5220367908477783,
|
|
"fcm_dpo/q_t": 0.3576337397098541,
|
|
"grad_norm": 53.73698806762695,
|
|
"learning_rate": 6.193111425735515e-08,
|
|
"logits/chosen": 0.2691265046596527,
|
|
"logits/rejected": 0.2151428908109665,
|
|
"logps/chosen": -69.07579040527344,
|
|
"logps/ref_chosen": -61.29241943359375,
|
|
"logps/ref_rejected": -82.47763061523438,
|
|
"logps/rejected": -93.78303527832031,
|
|
"loss": 1.0304,
|
|
"margin_dpo/margin_mean": 3.5220372676849365,
|
|
"margin_dpo/margin_std": 4.995627403259277,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7966742252456538,
|
|
"fcm_dpo/beta": 0.2198951095342636,
|
|
"fcm_dpo/delta": 0.2048795521259308,
|
|
"fcm_dpo/margin": 2.992419719696045,
|
|
"fcm_dpo/q_t": 0.3876710534095764,
|
|
"grad_norm": 77.22991943359375,
|
|
"learning_rate": 6.106260641143546e-08,
|
|
"logits/chosen": 0.34561246633529663,
|
|
"logits/rejected": 0.2859325706958771,
|
|
"logps/chosen": -69.87942504882812,
|
|
"logps/ref_chosen": -61.472625732421875,
|
|
"logps/ref_rejected": -90.52831268310547,
|
|
"logps/rejected": -101.92753601074219,
|
|
"loss": 1.2208,
|
|
"margin_dpo/margin_mean": 2.9924192428588867,
|
|
"margin_dpo/margin_std": 6.500236511230469,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.7981859410430839,
|
|
"fcm_dpo/beta": 0.22476676106452942,
|
|
"fcm_dpo/delta": 0.16451933979988098,
|
|
"fcm_dpo/margin": 3.0980749130249023,
|
|
"fcm_dpo/q_t": 0.3729293942451477,
|
|
"grad_norm": 56.562042236328125,
|
|
"learning_rate": 6.019938355056422e-08,
|
|
"logits/chosen": 0.15835247933864594,
|
|
"logits/rejected": 0.09338517487049103,
|
|
"logps/chosen": -66.64041900634766,
|
|
"logps/ref_chosen": -58.792015075683594,
|
|
"logps/ref_rejected": -71.82516479492188,
|
|
"logps/rejected": -82.77163696289062,
|
|
"loss": 1.2032,
|
|
"margin_dpo/margin_mean": 3.0980749130249023,
|
|
"margin_dpo/margin_std": 6.16510534286499,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.799697656840514,
|
|
"fcm_dpo/beta": 0.2031538188457489,
|
|
"fcm_dpo/delta": -0.6506019234657288,
|
|
"fcm_dpo/margin": 6.816672325134277,
|
|
"fcm_dpo/q_t": 0.2616058588027954,
|
|
"grad_norm": 40.82773208618164,
|
|
"learning_rate": 5.934146982094049e-08,
|
|
"logits/chosen": 0.18278737366199493,
|
|
"logits/rejected": 0.12872397899627686,
|
|
"logps/chosen": -61.67981719970703,
|
|
"logps/ref_chosen": -55.070960998535156,
|
|
"logps/ref_rejected": -75.44007873535156,
|
|
"logps/rejected": -88.8656005859375,
|
|
"loss": 0.759,
|
|
"margin_dpo/margin_mean": 6.816672325134277,
|
|
"margin_dpo/margin_std": 6.74721622467041,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.20093229413032532,
|
|
"fcm_dpo/delta": -0.02702292613685131,
|
|
"fcm_dpo/margin": 4.352562427520752,
|
|
"fcm_dpo/q_t": 0.3375556468963623,
|
|
"grad_norm": 45.43187713623047,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.28208237886428833,
|
|
"logits/rejected": 0.24441692233085632,
|
|
"logps/chosen": -64.21714782714844,
|
|
"logps/ref_chosen": -56.743812561035156,
|
|
"logps/ref_rejected": -76.6692123413086,
|
|
"logps/rejected": -88.49510192871094,
|
|
"loss": 0.935,
|
|
"margin_dpo/margin_mean": 4.35256290435791,
|
|
"margin_dpo/margin_std": 5.480373382568359,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8027210884353742,
|
|
"fcm_dpo/beta": 0.20748470723628998,
|
|
"fcm_dpo/delta": 0.13889390230178833,
|
|
"fcm_dpo/margin": 3.442054033279419,
|
|
"fcm_dpo/q_t": 0.37123921513557434,
|
|
"grad_norm": 58.889068603515625,
|
|
"learning_rate": 5.7641665597021435e-08,
|
|
"logits/chosen": 0.23493140935897827,
|
|
"logits/rejected": 0.18107816576957703,
|
|
"logps/chosen": -58.95187759399414,
|
|
"logps/ref_chosen": -51.116455078125,
|
|
"logps/ref_rejected": -79.52884674072266,
|
|
"logps/rejected": -90.80632019042969,
|
|
"loss": 1.0841,
|
|
"margin_dpo/margin_mean": 3.44205379486084,
|
|
"margin_dpo/margin_std": 5.732078552246094,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.8042328042328042,
|
|
"fcm_dpo/beta": 0.20114368200302124,
|
|
"fcm_dpo/delta": -0.1241353303194046,
|
|
"fcm_dpo/margin": 4.7797698974609375,
|
|
"fcm_dpo/q_t": 0.332572877407074,
|
|
"grad_norm": 49.33393096923828,
|
|
"learning_rate": 5.679982264990424e-08,
|
|
"logits/chosen": 0.19763997197151184,
|
|
"logits/rejected": 0.15084370970726013,
|
|
"logps/chosen": -66.58512115478516,
|
|
"logps/ref_chosen": -58.279945373535156,
|
|
"logps/ref_rejected": -78.05426788330078,
|
|
"logps/rejected": -91.13921356201172,
|
|
"loss": 0.969,
|
|
"margin_dpo/margin_mean": 4.779770374298096,
|
|
"margin_dpo/margin_std": 6.663928031921387,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.8057445200302343,
|
|
"fcm_dpo/beta": 0.1999870240688324,
|
|
"fcm_dpo/delta": -0.04222950339317322,
|
|
"fcm_dpo/margin": 4.442439079284668,
|
|
"fcm_dpo/q_t": 0.33722180128097534,
|
|
"grad_norm": 48.564510345458984,
|
|
"learning_rate": 5.596338392706076e-08,
|
|
"logits/chosen": 0.3552475869655609,
|
|
"logits/rejected": 0.3035711944103241,
|
|
"logps/chosen": -62.87090301513672,
|
|
"logps/ref_chosen": -56.41801071166992,
|
|
"logps/ref_rejected": -73.89324951171875,
|
|
"logps/rejected": -84.78857421875,
|
|
"loss": 0.991,
|
|
"margin_dpo/margin_mean": 4.442439079284668,
|
|
"margin_dpo/margin_std": 6.396747589111328,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.8072562358276644,
|
|
"fcm_dpo/beta": 0.19827596843242645,
|
|
"fcm_dpo/delta": 0.030380956828594208,
|
|
"fcm_dpo/margin": 4.141201019287109,
|
|
"fcm_dpo/q_t": 0.3470977544784546,
|
|
"grad_norm": 49.71409225463867,
|
|
"learning_rate": 5.513237282548033e-08,
|
|
"logits/chosen": 0.25465166568756104,
|
|
"logits/rejected": 0.2110665887594223,
|
|
"logps/chosen": -67.78778076171875,
|
|
"logps/ref_chosen": -60.748687744140625,
|
|
"logps/ref_rejected": -73.8623046875,
|
|
"logps/rejected": -85.04259490966797,
|
|
"loss": 1.0232,
|
|
"margin_dpo/margin_mean": 4.141200065612793,
|
|
"margin_dpo/margin_std": 6.159672737121582,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.20083844661712646,
|
|
"fcm_dpo/delta": -0.018803313374519348,
|
|
"fcm_dpo/margin": 4.314824104309082,
|
|
"fcm_dpo/q_t": 0.3484407663345337,
|
|
"grad_norm": 49.287715911865234,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 0.16682550311088562,
|
|
"logits/rejected": 0.10979054868221283,
|
|
"logps/chosen": -69.68934631347656,
|
|
"logps/ref_chosen": -61.637413024902344,
|
|
"logps/ref_rejected": -80.93138885498047,
|
|
"logps/rejected": -93.29814147949219,
|
|
"loss": 1.0629,
|
|
"margin_dpo/margin_mean": 4.314825057983398,
|
|
"margin_dpo/margin_std": 6.968698501586914,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8102796674225246,
|
|
"fcm_dpo/beta": 0.19315822422504425,
|
|
"fcm_dpo/delta": -0.16967397928237915,
|
|
"fcm_dpo/margin": 5.180801868438721,
|
|
"fcm_dpo/q_t": 0.31073105335235596,
|
|
"grad_norm": 40.0985107421875,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 0.2999964952468872,
|
|
"logits/rejected": 0.22906377911567688,
|
|
"logps/chosen": -59.02777862548828,
|
|
"logps/ref_chosen": -51.88897705078125,
|
|
"logps/ref_rejected": -73.34864044189453,
|
|
"logps/rejected": -85.66825103759766,
|
|
"loss": 0.8271,
|
|
"margin_dpo/margin_mean": 5.1808013916015625,
|
|
"margin_dpo/margin_std": 5.521551132202148,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.8117913832199547,
|
|
"fcm_dpo/beta": 0.19068770110607147,
|
|
"fcm_dpo/delta": -0.007951788604259491,
|
|
"fcm_dpo/margin": 4.485739707946777,
|
|
"fcm_dpo/q_t": 0.3408752679824829,
|
|
"grad_norm": 45.67889404296875,
|
|
"learning_rate": 5.267213693697695e-08,
|
|
"logits/chosen": 0.3263600468635559,
|
|
"logits/rejected": 0.25088340044021606,
|
|
"logps/chosen": -62.604698181152344,
|
|
"logps/ref_chosen": -54.248619079589844,
|
|
"logps/ref_rejected": -94.94343566894531,
|
|
"logps/rejected": -107.78524780273438,
|
|
"loss": 1.0353,
|
|
"margin_dpo/margin_mean": 4.485739707946777,
|
|
"margin_dpo/margin_std": 6.675798416137695,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.8133030990173847,
|
|
"fcm_dpo/beta": 0.18905366957187653,
|
|
"fcm_dpo/delta": -0.18150761723518372,
|
|
"fcm_dpo/margin": 5.358842849731445,
|
|
"fcm_dpo/q_t": 0.30919575691223145,
|
|
"grad_norm": 49.55366516113281,
|
|
"learning_rate": 5.1863067244167144e-08,
|
|
"logits/chosen": 0.25182631611824036,
|
|
"logits/rejected": 0.2184920608997345,
|
|
"logps/chosen": -78.08734130859375,
|
|
"logps/ref_chosen": -70.09353637695312,
|
|
"logps/ref_rejected": -79.49833679199219,
|
|
"logps/rejected": -92.85098266601562,
|
|
"loss": 0.8536,
|
|
"margin_dpo/margin_mean": 5.358841896057129,
|
|
"margin_dpo/margin_std": 6.0890302658081055,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.8148148148148148,
|
|
"fcm_dpo/beta": 0.18460465967655182,
|
|
"fcm_dpo/delta": -0.013019578531384468,
|
|
"fcm_dpo/margin": 4.667385101318359,
|
|
"fcm_dpo/q_t": 0.33970072865486145,
|
|
"grad_norm": 46.4735107421875,
|
|
"learning_rate": 5.105953986729195e-08,
|
|
"logits/chosen": 0.2540702223777771,
|
|
"logits/rejected": 0.19249705970287323,
|
|
"logps/chosen": -69.79113006591797,
|
|
"logps/ref_chosen": -61.93169403076172,
|
|
"logps/ref_rejected": -84.08946228027344,
|
|
"logps/rejected": -96.61628723144531,
|
|
"loss": 0.9249,
|
|
"margin_dpo/margin_mean": 4.667386054992676,
|
|
"margin_dpo/margin_std": 5.918379783630371,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.18176347017288208,
|
|
"fcm_dpo/delta": -0.2018444836139679,
|
|
"fcm_dpo/margin": 5.670566558837891,
|
|
"fcm_dpo/q_t": 0.31293582916259766,
|
|
"grad_norm": 48.642704010009766,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 0.28550148010253906,
|
|
"logits/rejected": 0.21018415689468384,
|
|
"logps/chosen": -70.13665771484375,
|
|
"logps/ref_chosen": -62.704254150390625,
|
|
"logps/ref_rejected": -95.63597106933594,
|
|
"logps/rejected": -108.73894500732422,
|
|
"loss": 0.8836,
|
|
"margin_dpo/margin_mean": 5.670566558837891,
|
|
"margin_dpo/margin_std": 6.66909122467041,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.817838246409675,
|
|
"fcm_dpo/beta": 0.17354460060596466,
|
|
"fcm_dpo/delta": -0.10751892626285553,
|
|
"fcm_dpo/margin": 5.446465969085693,
|
|
"fcm_dpo/q_t": 0.325096070766449,
|
|
"grad_norm": 43.86935806274414,
|
|
"learning_rate": 4.9469201811239035e-08,
|
|
"logits/chosen": 0.26334431767463684,
|
|
"logits/rejected": 0.2641550600528717,
|
|
"logps/chosen": -69.57955932617188,
|
|
"logps/ref_chosen": -62.48084259033203,
|
|
"logps/ref_rejected": -57.55541229248047,
|
|
"logps/rejected": -70.10059356689453,
|
|
"loss": 0.9464,
|
|
"margin_dpo/margin_mean": 5.446465492248535,
|
|
"margin_dpo/margin_std": 7.033263683319092,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.8193499622071051,
|
|
"fcm_dpo/beta": 0.1704559624195099,
|
|
"fcm_dpo/delta": -0.12397484481334686,
|
|
"fcm_dpo/margin": 5.641759872436523,
|
|
"fcm_dpo/q_t": 0.3238675892353058,
|
|
"grad_norm": 35.97234344482422,
|
|
"learning_rate": 4.868243561723534e-08,
|
|
"logits/chosen": 0.29136067628860474,
|
|
"logits/rejected": 0.240421861410141,
|
|
"logps/chosen": -55.7423095703125,
|
|
"logps/ref_chosen": -49.454891204833984,
|
|
"logps/ref_rejected": -65.33275604248047,
|
|
"logps/rejected": -77.26193237304688,
|
|
"loss": 0.9075,
|
|
"margin_dpo/margin_mean": 5.641759872436523,
|
|
"margin_dpo/margin_std": 6.985322952270508,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.8208616780045351,
|
|
"fcm_dpo/beta": 0.1713915467262268,
|
|
"fcm_dpo/delta": 0.03326155245304108,
|
|
"fcm_dpo/margin": 4.7779693603515625,
|
|
"fcm_dpo/q_t": 0.3359708786010742,
|
|
"grad_norm": 35.59272384643555,
|
|
"learning_rate": 4.790130070827028e-08,
|
|
"logits/chosen": 0.2787627577781677,
|
|
"logits/rejected": 0.20168402791023254,
|
|
"logps/chosen": -58.36046600341797,
|
|
"logps/ref_chosen": -51.100860595703125,
|
|
"logps/ref_rejected": -76.06130981445312,
|
|
"logps/rejected": -88.09889221191406,
|
|
"loss": 0.9651,
|
|
"margin_dpo/margin_mean": 4.7779693603515625,
|
|
"margin_dpo/margin_std": 6.366022109985352,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.8223733938019653,
|
|
"fcm_dpo/beta": 0.16629686951637268,
|
|
"fcm_dpo/delta": -0.18447908759117126,
|
|
"fcm_dpo/margin": 6.10788631439209,
|
|
"fcm_dpo/q_t": 0.3162338137626648,
|
|
"grad_norm": 39.026893615722656,
|
|
"learning_rate": 4.7125818934366454e-08,
|
|
"logits/chosen": 0.2566012740135193,
|
|
"logits/rejected": 0.19304610788822174,
|
|
"logps/chosen": -67.98334503173828,
|
|
"logps/ref_chosen": -60.2772331237793,
|
|
"logps/ref_rejected": -88.40553283691406,
|
|
"logps/rejected": -102.21954345703125,
|
|
"loss": 0.9123,
|
|
"margin_dpo/margin_mean": 6.10788631439209,
|
|
"margin_dpo/margin_std": 7.644372940063477,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.16722407937049866,
|
|
"fcm_dpo/delta": 0.11144264042377472,
|
|
"fcm_dpo/margin": 4.466965675354004,
|
|
"fcm_dpo/q_t": 0.35912227630615234,
|
|
"grad_norm": 46.097412109375,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 0.2190103530883789,
|
|
"logits/rejected": 0.16595765948295593,
|
|
"logps/chosen": -69.51997375488281,
|
|
"logps/ref_chosen": -61.61524963378906,
|
|
"logps/ref_rejected": -78.71266174316406,
|
|
"logps/rejected": -91.0843505859375,
|
|
"loss": 1.0322,
|
|
"margin_dpo/margin_mean": 4.466965198516846,
|
|
"margin_dpo/margin_std": 6.752434730529785,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"fcm_dpo/beta": 0.1708962321281433,
|
|
"fcm_dpo/delta": 0.09486165642738342,
|
|
"fcm_dpo/margin": 4.460666656494141,
|
|
"fcm_dpo/q_t": 0.349132239818573,
|
|
"grad_norm": 47.134735107421875,
|
|
"learning_rate": 4.559190140057428e-08,
|
|
"logits/chosen": 0.26342087984085083,
|
|
"logits/rejected": 0.24543830752372742,
|
|
"logps/chosen": -66.58544921875,
|
|
"logps/ref_chosen": -59.313262939453125,
|
|
"logps/ref_rejected": -64.73631286621094,
|
|
"logps/rejected": -76.46916198730469,
|
|
"loss": 1.0067,
|
|
"margin_dpo/margin_mean": 4.460666656494141,
|
|
"margin_dpo/margin_std": 6.285987377166748,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8269085411942555,
|
|
"fcm_dpo/beta": 0.16892951726913452,
|
|
"fcm_dpo/delta": -0.09182556718587875,
|
|
"fcm_dpo/margin": 5.522754192352295,
|
|
"fcm_dpo/q_t": 0.31475168466567993,
|
|
"grad_norm": 37.49753189086914,
|
|
"learning_rate": 4.483350854765672e-08,
|
|
"logits/chosen": 0.2093636393547058,
|
|
"logits/rejected": 0.14936429262161255,
|
|
"logps/chosen": -61.5643310546875,
|
|
"logps/ref_chosen": -54.97674560546875,
|
|
"logps/ref_rejected": -75.35922241210938,
|
|
"logps/rejected": -87.46955871582031,
|
|
"loss": 0.8833,
|
|
"margin_dpo/margin_mean": 5.522754192352295,
|
|
"margin_dpo/margin_std": 6.282604217529297,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8284202569916855,
|
|
"fcm_dpo/beta": 0.17445912957191467,
|
|
"fcm_dpo/delta": 0.1631871610879898,
|
|
"fcm_dpo/margin": 3.9804792404174805,
|
|
"fcm_dpo/q_t": 0.37146803736686707,
|
|
"grad_norm": 47.63283920288086,
|
|
"learning_rate": 4.4080854642541826e-08,
|
|
"logits/chosen": 0.18536588549613953,
|
|
"logits/rejected": 0.12500083446502686,
|
|
"logps/chosen": -71.25007629394531,
|
|
"logps/ref_chosen": -63.21067428588867,
|
|
"logps/ref_rejected": -81.23347473144531,
|
|
"logps/rejected": -93.25334930419922,
|
|
"loss": 1.0408,
|
|
"margin_dpo/margin_mean": 3.9804794788360596,
|
|
"margin_dpo/margin_std": 6.179323673248291,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8299319727891157,
|
|
"fcm_dpo/beta": 0.17808356881141663,
|
|
"fcm_dpo/delta": 0.12966041266918182,
|
|
"fcm_dpo/margin": 4.094516754150391,
|
|
"fcm_dpo/q_t": 0.36078569293022156,
|
|
"grad_norm": 52.33575439453125,
|
|
"learning_rate": 4.333396073857723e-08,
|
|
"logits/chosen": 0.3678380846977234,
|
|
"logits/rejected": 0.3106314539909363,
|
|
"logps/chosen": -71.68885803222656,
|
|
"logps/ref_chosen": -64.27351379394531,
|
|
"logps/ref_rejected": -92.31663513183594,
|
|
"logps/rejected": -103.82649230957031,
|
|
"loss": 1.0571,
|
|
"margin_dpo/margin_mean": 4.094517707824707,
|
|
"margin_dpo/margin_std": 6.406397819519043,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.18214035034179688,
|
|
"fcm_dpo/delta": 0.19023308157920837,
|
|
"fcm_dpo/margin": 3.6925172805786133,
|
|
"fcm_dpo/q_t": 0.37335193157196045,
|
|
"grad_norm": 42.97513961791992,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 0.25656658411026,
|
|
"logits/rejected": 0.22733622789382935,
|
|
"logps/chosen": -64.19790649414062,
|
|
"logps/ref_chosen": -56.230438232421875,
|
|
"logps/ref_rejected": -62.59788513183594,
|
|
"logps/rejected": -74.25787353515625,
|
|
"loss": 1.0665,
|
|
"margin_dpo/margin_mean": 3.692517042160034,
|
|
"margin_dpo/margin_std": 5.977490425109863,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8329554043839759,
|
|
"fcm_dpo/beta": 0.18962660431861877,
|
|
"fcm_dpo/delta": 0.09512817859649658,
|
|
"fcm_dpo/margin": 4.010119438171387,
|
|
"fcm_dpo/q_t": 0.35618987679481506,
|
|
"grad_norm": 47.10663986206055,
|
|
"learning_rate": 4.1857536341307176e-08,
|
|
"logits/chosen": 0.30963271856307983,
|
|
"logits/rejected": 0.2778348922729492,
|
|
"logps/chosen": -75.92347717285156,
|
|
"logps/ref_chosen": -67.74720764160156,
|
|
"logps/ref_rejected": -87.04285430908203,
|
|
"logps/rejected": -99.229248046875,
|
|
"loss": 0.993,
|
|
"margin_dpo/margin_mean": 4.0101189613342285,
|
|
"margin_dpo/margin_std": 5.625774383544922,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8344671201814059,
|
|
"fcm_dpo/beta": 0.18955287337303162,
|
|
"fcm_dpo/delta": -0.06805290281772614,
|
|
"fcm_dpo/margin": 4.80015230178833,
|
|
"fcm_dpo/q_t": 0.31782644987106323,
|
|
"grad_norm": 48.02134704589844,
|
|
"learning_rate": 4.112804714676593e-08,
|
|
"logits/chosen": 0.2556304633617401,
|
|
"logits/rejected": 0.20716384053230286,
|
|
"logps/chosen": -70.34280395507812,
|
|
"logps/ref_chosen": -62.92625427246094,
|
|
"logps/ref_rejected": -82.98365783691406,
|
|
"logps/rejected": -95.20034790039062,
|
|
"loss": 0.9436,
|
|
"margin_dpo/margin_mean": 4.80015230178833,
|
|
"margin_dpo/margin_std": 6.0169830322265625,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8359788359788359,
|
|
"fcm_dpo/beta": 0.1859016716480255,
|
|
"fcm_dpo/delta": 0.005417622625827789,
|
|
"fcm_dpo/margin": 4.543527126312256,
|
|
"fcm_dpo/q_t": 0.36400720477104187,
|
|
"grad_norm": 50.315792083740234,
|
|
"learning_rate": 4.0404400549748144e-08,
|
|
"logits/chosen": 0.219588965177536,
|
|
"logits/rejected": 0.13211965560913086,
|
|
"logps/chosen": -64.96172332763672,
|
|
"logps/ref_chosen": -56.038490295410156,
|
|
"logps/ref_rejected": -84.48454284667969,
|
|
"logps/rejected": -97.9512939453125,
|
|
"loss": 1.1175,
|
|
"margin_dpo/margin_mean": 4.543527603149414,
|
|
"margin_dpo/margin_std": 8.079211235046387,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8374905517762661,
|
|
"fcm_dpo/beta": 0.1871982216835022,
|
|
"fcm_dpo/delta": -0.0026739854365587234,
|
|
"fcm_dpo/margin": 4.553138732910156,
|
|
"fcm_dpo/q_t": 0.35031062364578247,
|
|
"grad_norm": 50.560482025146484,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 0.24321147799491882,
|
|
"logits/rejected": 0.21679717302322388,
|
|
"logps/chosen": -72.2421875,
|
|
"logps/ref_chosen": -64.53059387207031,
|
|
"logps/ref_rejected": -71.2155990600586,
|
|
"logps/rejected": -83.48033905029297,
|
|
"loss": 1.0192,
|
|
"margin_dpo/margin_mean": 4.553138732910156,
|
|
"margin_dpo/margin_std": 7.070949554443359,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.19130748510360718,
|
|
"fcm_dpo/delta": 0.047974929213523865,
|
|
"fcm_dpo/margin": 4.179170608520508,
|
|
"fcm_dpo/q_t": 0.34389621019363403,
|
|
"grad_norm": 54.529205322265625,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 0.23633408546447754,
|
|
"logits/rejected": 0.20477698743343353,
|
|
"logps/chosen": -75.52586364746094,
|
|
"logps/ref_chosen": -66.65191650390625,
|
|
"logps/ref_rejected": -68.6667251586914,
|
|
"logps/rejected": -81.71983337402344,
|
|
"loss": 1.0644,
|
|
"margin_dpo/margin_mean": 4.179170608520508,
|
|
"margin_dpo/margin_std": 6.368129253387451,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8405139833711263,
|
|
"fcm_dpo/beta": 0.18900999426841736,
|
|
"fcm_dpo/delta": 0.013541316613554955,
|
|
"fcm_dpo/margin": 4.431397438049316,
|
|
"fcm_dpo/q_t": 0.3580917716026306,
|
|
"grad_norm": 47.6467170715332,
|
|
"learning_rate": 3.826871794280192e-08,
|
|
"logits/chosen": 0.27349787950515747,
|
|
"logits/rejected": 0.22483891248703003,
|
|
"logps/chosen": -61.78699493408203,
|
|
"logps/ref_chosen": -52.832366943359375,
|
|
"logps/ref_rejected": -64.49044036865234,
|
|
"logps/rejected": -77.87646484375,
|
|
"loss": 1.0529,
|
|
"margin_dpo/margin_mean": 4.431397914886475,
|
|
"margin_dpo/margin_std": 7.119260311126709,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8420256991685563,
|
|
"fcm_dpo/beta": 0.1805565357208252,
|
|
"fcm_dpo/delta": -0.22024190425872803,
|
|
"fcm_dpo/margin": 5.766865253448486,
|
|
"fcm_dpo/q_t": 0.3106374740600586,
|
|
"grad_norm": 45.826904296875,
|
|
"learning_rate": 3.756864251262143e-08,
|
|
"logits/chosen": 0.31960952281951904,
|
|
"logits/rejected": 0.2534366548061371,
|
|
"logps/chosen": -63.537750244140625,
|
|
"logps/ref_chosen": -55.03598403930664,
|
|
"logps/ref_rejected": -75.80644989013672,
|
|
"logps/rejected": -90.07508087158203,
|
|
"loss": 0.8573,
|
|
"margin_dpo/margin_mean": 5.766864776611328,
|
|
"margin_dpo/margin_std": 6.593277454376221,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8435374149659864,
|
|
"fcm_dpo/beta": 0.17705780267715454,
|
|
"fcm_dpo/delta": -0.10933573544025421,
|
|
"fcm_dpo/margin": 5.353769779205322,
|
|
"fcm_dpo/q_t": 0.327720582485199,
|
|
"grad_norm": 47.34086990356445,
|
|
"learning_rate": 3.687450924416341e-08,
|
|
"logits/chosen": 0.32476723194122314,
|
|
"logits/rejected": 0.2724132239818573,
|
|
"logps/chosen": -70.65055847167969,
|
|
"logps/ref_chosen": -63.226348876953125,
|
|
"logps/ref_rejected": -91.46881866455078,
|
|
"logps/rejected": -104.24679565429688,
|
|
"loss": 0.9053,
|
|
"margin_dpo/margin_mean": 5.353769302368164,
|
|
"margin_dpo/margin_std": 6.638795852661133,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8450491307634165,
|
|
"fcm_dpo/beta": 0.17447656393051147,
|
|
"fcm_dpo/delta": -0.02185794711112976,
|
|
"fcm_dpo/margin": 4.971553802490234,
|
|
"fcm_dpo/q_t": 0.34655678272247314,
|
|
"grad_norm": 44.358978271484375,
|
|
"learning_rate": 3.6186337553827743e-08,
|
|
"logits/chosen": 0.23880869150161743,
|
|
"logits/rejected": 0.17617136240005493,
|
|
"logps/chosen": -69.51097869873047,
|
|
"logps/ref_chosen": -61.521644592285156,
|
|
"logps/ref_rejected": -82.83859252929688,
|
|
"logps/rejected": -95.79948425292969,
|
|
"loss": 1.0178,
|
|
"margin_dpo/margin_mean": 4.971553802490234,
|
|
"margin_dpo/margin_std": 7.330011367797852,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.17926636338233948,
|
|
"fcm_dpo/delta": 0.02828364074230194,
|
|
"fcm_dpo/margin": 4.576733589172363,
|
|
"fcm_dpo/q_t": 0.3509957492351532,
|
|
"grad_norm": 46.233280181884766,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 0.2628178298473358,
|
|
"logits/rejected": 0.22722047567367554,
|
|
"logps/chosen": -68.85298919677734,
|
|
"logps/ref_chosen": -60.64122009277344,
|
|
"logps/ref_rejected": -78.75474548339844,
|
|
"logps/rejected": -91.54324340820312,
|
|
"loss": 1.0091,
|
|
"margin_dpo/margin_mean": 4.576733589172363,
|
|
"margin_dpo/margin_std": 6.540881633758545,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8480725623582767,
|
|
"fcm_dpo/beta": 0.17519283294677734,
|
|
"fcm_dpo/delta": -0.06392641365528107,
|
|
"fcm_dpo/margin": 5.182496547698975,
|
|
"fcm_dpo/q_t": 0.3386669456958771,
|
|
"grad_norm": 41.15628433227539,
|
|
"learning_rate": 3.482795573879241e-08,
|
|
"logits/chosen": 0.2851717174053192,
|
|
"logits/rejected": 0.25381070375442505,
|
|
"logps/chosen": -69.367919921875,
|
|
"logps/ref_chosen": -62.49859619140625,
|
|
"logps/ref_rejected": -78.72064208984375,
|
|
"logps/rejected": -90.77245330810547,
|
|
"loss": 0.9509,
|
|
"margin_dpo/margin_mean": 5.182496547698975,
|
|
"margin_dpo/margin_std": 7.013888359069824,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8495842781557067,
|
|
"fcm_dpo/beta": 0.17091301083564758,
|
|
"fcm_dpo/delta": -0.11639774590730667,
|
|
"fcm_dpo/margin": 5.581327438354492,
|
|
"fcm_dpo/q_t": 0.3209976553916931,
|
|
"grad_norm": 40.8580322265625,
|
|
"learning_rate": 3.415778361095226e-08,
|
|
"logits/chosen": 0.2667577266693115,
|
|
"logits/rejected": 0.23144987225532532,
|
|
"logps/chosen": -82.92703247070312,
|
|
"logps/ref_chosen": -74.78173828125,
|
|
"logps/ref_rejected": -92.63499450683594,
|
|
"logps/rejected": -106.36161804199219,
|
|
"loss": 0.8912,
|
|
"margin_dpo/margin_mean": 5.58132791519165,
|
|
"margin_dpo/margin_std": 6.571352005004883,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8510959939531368,
|
|
"fcm_dpo/beta": 0.17142033576965332,
|
|
"fcm_dpo/delta": 0.007226529531180859,
|
|
"fcm_dpo/margin": 4.919520378112793,
|
|
"fcm_dpo/q_t": 0.3452809453010559,
|
|
"grad_norm": 44.207237243652344,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": 0.3307056427001953,
|
|
"logits/rejected": 0.28086477518081665,
|
|
"logps/chosen": -57.743751525878906,
|
|
"logps/ref_chosen": -50.19850158691406,
|
|
"logps/ref_rejected": -66.76687622070312,
|
|
"logps/rejected": -79.23164367675781,
|
|
"loss": 1.0449,
|
|
"margin_dpo/margin_mean": 4.919520378112793,
|
|
"margin_dpo/margin_std": 7.567540168762207,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8526077097505669,
|
|
"fcm_dpo/beta": 0.17031943798065186,
|
|
"fcm_dpo/delta": -0.06808815151453018,
|
|
"fcm_dpo/margin": 5.352931976318359,
|
|
"fcm_dpo/q_t": 0.3269733786582947,
|
|
"grad_norm": 41.774166107177734,
|
|
"learning_rate": 3.283557064487785e-08,
|
|
"logits/chosen": 0.23458395898342133,
|
|
"logits/rejected": 0.2039821743965149,
|
|
"logps/chosen": -62.68193054199219,
|
|
"logps/ref_chosen": -55.7408447265625,
|
|
"logps/ref_rejected": -74.82323455810547,
|
|
"logps/rejected": -87.11726379394531,
|
|
"loss": 0.9261,
|
|
"margin_dpo/margin_mean": 5.352931022644043,
|
|
"margin_dpo/margin_std": 6.691219806671143,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.17381291091442108,
|
|
"fcm_dpo/delta": 0.14001916348934174,
|
|
"fcm_dpo/margin": 4.128961086273193,
|
|
"fcm_dpo/q_t": 0.36346155405044556,
|
|
"grad_norm": 47.45510482788086,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 0.304351806640625,
|
|
"logits/rejected": 0.25713908672332764,
|
|
"logps/chosen": -67.73616027832031,
|
|
"logps/ref_chosen": -58.33738327026367,
|
|
"logps/ref_rejected": -78.31776428222656,
|
|
"logps/rejected": -91.84550476074219,
|
|
"loss": 1.0411,
|
|
"margin_dpo/margin_mean": 4.128960609436035,
|
|
"margin_dpo/margin_std": 6.340240001678467,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8556311413454271,
|
|
"fcm_dpo/beta": 0.17765888571739197,
|
|
"fcm_dpo/delta": 0.09049337357282639,
|
|
"fcm_dpo/margin": 4.29995584487915,
|
|
"fcm_dpo/q_t": 0.36482471227645874,
|
|
"grad_norm": 55.42988586425781,
|
|
"learning_rate": 3.1537655732553764e-08,
|
|
"logits/chosen": 0.3093351721763611,
|
|
"logits/rejected": 0.28633812069892883,
|
|
"logps/chosen": -79.33909606933594,
|
|
"logps/ref_chosen": -71.22373962402344,
|
|
"logps/ref_rejected": -71.11601257324219,
|
|
"logps/rejected": -83.53132629394531,
|
|
"loss": 1.1324,
|
|
"margin_dpo/margin_mean": 4.299956321716309,
|
|
"margin_dpo/margin_std": 7.633435249328613,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"fcm_dpo/beta": 0.17416879534721375,
|
|
"fcm_dpo/delta": -0.03981255739927292,
|
|
"fcm_dpo/margin": 5.07735538482666,
|
|
"fcm_dpo/q_t": 0.33635973930358887,
|
|
"grad_norm": 41.349308013916016,
|
|
"learning_rate": 3.089785553471233e-08,
|
|
"logits/chosen": 0.3112683892250061,
|
|
"logits/rejected": 0.22656577825546265,
|
|
"logps/chosen": -60.94340133666992,
|
|
"logps/ref_chosen": -52.669273376464844,
|
|
"logps/ref_rejected": -74.34785461425781,
|
|
"logps/rejected": -87.6993408203125,
|
|
"loss": 0.9308,
|
|
"margin_dpo/margin_mean": 5.077354907989502,
|
|
"margin_dpo/margin_std": 6.490789413452148,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8586545729402872,
|
|
"fcm_dpo/beta": 0.17367665469646454,
|
|
"fcm_dpo/delta": -0.14223557710647583,
|
|
"fcm_dpo/margin": 5.628249168395996,
|
|
"fcm_dpo/q_t": 0.31329670548439026,
|
|
"grad_norm": 38.60588836669922,
|
|
"learning_rate": 3.026418409484513e-08,
|
|
"logits/chosen": 0.27716493606567383,
|
|
"logits/rejected": 0.2054300308227539,
|
|
"logps/chosen": -59.362396240234375,
|
|
"logps/ref_chosen": -52.178001403808594,
|
|
"logps/ref_rejected": -85.8277587890625,
|
|
"logps/rejected": -98.64041137695312,
|
|
"loss": 0.8378,
|
|
"margin_dpo/margin_mean": 5.6282477378845215,
|
|
"margin_dpo/margin_std": 5.813695907592773,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8601662887377173,
|
|
"fcm_dpo/beta": 0.17136166989803314,
|
|
"fcm_dpo/delta": 0.12116237729787827,
|
|
"fcm_dpo/margin": 4.294561862945557,
|
|
"fcm_dpo/q_t": 0.3553329408168793,
|
|
"grad_norm": 44.1339111328125,
|
|
"learning_rate": 2.963665913810451e-08,
|
|
"logits/chosen": 0.18196213245391846,
|
|
"logits/rejected": 0.1568015217781067,
|
|
"logps/chosen": -70.65408325195312,
|
|
"logps/ref_chosen": -62.649261474609375,
|
|
"logps/ref_rejected": -75.4298324584961,
|
|
"logps/rejected": -87.72921752929688,
|
|
"loss": 1.0417,
|
|
"margin_dpo/margin_mean": 4.294561386108398,
|
|
"margin_dpo/margin_std": 6.439499378204346,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.1683310866355896,
|
|
"fcm_dpo/delta": -0.32472285628318787,
|
|
"fcm_dpo/margin": 6.764734268188477,
|
|
"fcm_dpo/q_t": 0.2851110100746155,
|
|
"grad_norm": 35.5472526550293,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 0.2262299358844757,
|
|
"logits/rejected": 0.14589998126029968,
|
|
"logps/chosen": -56.79124450683594,
|
|
"logps/ref_chosen": -50.04179382324219,
|
|
"logps/ref_rejected": -78.27146911621094,
|
|
"logps/rejected": -91.78564453125,
|
|
"loss": 0.7689,
|
|
"margin_dpo/margin_mean": 6.764734268188477,
|
|
"margin_dpo/margin_std": 6.428610324859619,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8631897203325775,
|
|
"fcm_dpo/beta": 0.16944804787635803,
|
|
"fcm_dpo/delta": 0.19956818222999573,
|
|
"fcm_dpo/margin": 3.9083523750305176,
|
|
"fcm_dpo/q_t": 0.3683198392391205,
|
|
"grad_norm": 42.308650970458984,
|
|
"learning_rate": 2.840011871446962e-08,
|
|
"logits/chosen": 0.2377762794494629,
|
|
"logits/rejected": 0.2050282061100006,
|
|
"logps/chosen": -61.697296142578125,
|
|
"logps/ref_chosen": -53.65681457519531,
|
|
"logps/ref_rejected": -66.13298034667969,
|
|
"logps/rejected": -78.0818099975586,
|
|
"loss": 1.0888,
|
|
"margin_dpo/margin_mean": 3.908352851867676,
|
|
"margin_dpo/margin_std": 6.5086469650268555,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8647014361300076,
|
|
"fcm_dpo/beta": 0.17394644021987915,
|
|
"fcm_dpo/delta": 0.11392390727996826,
|
|
"fcm_dpo/margin": 4.276225566864014,
|
|
"fcm_dpo/q_t": 0.3549836277961731,
|
|
"grad_norm": 49.87938690185547,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 0.29352182149887085,
|
|
"logits/rejected": 0.3179316520690918,
|
|
"logps/chosen": -82.94725036621094,
|
|
"logps/ref_chosen": -74.81792449951172,
|
|
"logps/ref_rejected": -65.88681030273438,
|
|
"logps/rejected": -78.29235076904297,
|
|
"loss": 0.9979,
|
|
"margin_dpo/margin_mean": 4.276226043701172,
|
|
"margin_dpo/margin_std": 6.016283988952637,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8662131519274376,
|
|
"fcm_dpo/beta": 0.17347398400306702,
|
|
"fcm_dpo/delta": 0.006579414010047913,
|
|
"fcm_dpo/margin": 4.861777305603027,
|
|
"fcm_dpo/q_t": 0.3557564616203308,
|
|
"grad_norm": 50.46380615234375,
|
|
"learning_rate": 2.718837261761528e-08,
|
|
"logits/chosen": 0.2632012963294983,
|
|
"logits/rejected": 0.2227526158094406,
|
|
"logps/chosen": -77.41082763671875,
|
|
"logps/ref_chosen": -68.72564697265625,
|
|
"logps/ref_rejected": -88.16201782226562,
|
|
"logps/rejected": -101.70896911621094,
|
|
"loss": 1.0797,
|
|
"margin_dpo/margin_mean": 4.8617777824401855,
|
|
"margin_dpo/margin_std": 8.154335021972656,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8677248677248677,
|
|
"fcm_dpo/beta": 0.17341530323028564,
|
|
"fcm_dpo/delta": -0.11930923908948898,
|
|
"fcm_dpo/margin": 5.5170817375183105,
|
|
"fcm_dpo/q_t": 0.31562137603759766,
|
|
"grad_norm": 38.88788986206055,
|
|
"learning_rate": 2.659183991914696e-08,
|
|
"logits/chosen": 0.3164798617362976,
|
|
"logits/rejected": 0.25295859575271606,
|
|
"logps/chosen": -64.34851837158203,
|
|
"logps/ref_chosen": -56.31340026855469,
|
|
"logps/ref_rejected": -83.91553497314453,
|
|
"logps/rejected": -97.46773529052734,
|
|
"loss": 0.8352,
|
|
"margin_dpo/margin_mean": 5.517082214355469,
|
|
"margin_dpo/margin_std": 5.712655067443848,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.17326758801937103,
|
|
"fcm_dpo/delta": 0.19507335126399994,
|
|
"fcm_dpo/margin": 3.850635051727295,
|
|
"fcm_dpo/q_t": 0.3820122182369232,
|
|
"grad_norm": 47.62089920043945,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 0.32219284772872925,
|
|
"logits/rejected": 0.2561734616756439,
|
|
"logps/chosen": -72.64906311035156,
|
|
"logps/ref_chosen": -64.5841293334961,
|
|
"logps/ref_rejected": -93.47034454345703,
|
|
"logps/rejected": -105.38591766357422,
|
|
"loss": 1.147,
|
|
"margin_dpo/margin_mean": 3.850634813308716,
|
|
"margin_dpo/margin_std": 7.171681880950928,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8707482993197279,
|
|
"fcm_dpo/beta": 0.17552167177200317,
|
|
"fcm_dpo/delta": -0.030258819460868835,
|
|
"fcm_dpo/margin": 4.996522903442383,
|
|
"fcm_dpo/q_t": 0.3442118763923645,
|
|
"grad_norm": 52.461891174316406,
|
|
"learning_rate": 2.5417538653170754e-08,
|
|
"logits/chosen": 0.28439998626708984,
|
|
"logits/rejected": 0.19962754845619202,
|
|
"logps/chosen": -60.18579864501953,
|
|
"logps/ref_chosen": -53.28052520751953,
|
|
"logps/ref_rejected": -84.2000503540039,
|
|
"logps/rejected": -96.10183715820312,
|
|
"loss": 0.9895,
|
|
"margin_dpo/margin_mean": 4.996521949768066,
|
|
"margin_dpo/margin_std": 7.055662631988525,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.872260015117158,
|
|
"fcm_dpo/beta": 0.18019238114356995,
|
|
"fcm_dpo/delta": 0.18000566959381104,
|
|
"fcm_dpo/margin": 3.7884044647216797,
|
|
"fcm_dpo/q_t": 0.3773168623447418,
|
|
"grad_norm": 47.84729766845703,
|
|
"learning_rate": 2.4839802933393607e-08,
|
|
"logits/chosen": 0.2548506557941437,
|
|
"logits/rejected": 0.23487058281898499,
|
|
"logps/chosen": -69.96644592285156,
|
|
"logps/ref_chosen": -62.32468795776367,
|
|
"logps/ref_rejected": -67.300537109375,
|
|
"logps/rejected": -78.73069763183594,
|
|
"loss": 1.1324,
|
|
"margin_dpo/margin_mean": 3.788404941558838,
|
|
"margin_dpo/margin_std": 7.017814636230469,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.873771730914588,
|
|
"fcm_dpo/beta": 0.1872500777244568,
|
|
"fcm_dpo/delta": 0.2222413420677185,
|
|
"fcm_dpo/margin": 3.4311113357543945,
|
|
"fcm_dpo/q_t": 0.38888439536094666,
|
|
"grad_norm": 50.03500747680664,
|
|
"learning_rate": 2.4268365428344733e-08,
|
|
"logits/chosen": 0.310182124376297,
|
|
"logits/rejected": 0.2835952341556549,
|
|
"logps/chosen": -64.78092956542969,
|
|
"logps/ref_chosen": -56.65557861328125,
|
|
"logps/ref_rejected": -68.21835327148438,
|
|
"logps/rejected": -79.77481079101562,
|
|
"loss": 1.1301,
|
|
"margin_dpo/margin_mean": 3.4311115741729736,
|
|
"margin_dpo/margin_std": 6.33961296081543,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8752834467120182,
|
|
"fcm_dpo/beta": 0.18103978037834167,
|
|
"fcm_dpo/delta": -0.32594001293182373,
|
|
"fcm_dpo/margin": 6.280138969421387,
|
|
"fcm_dpo/q_t": 0.29298943281173706,
|
|
"grad_norm": 42.93381118774414,
|
|
"learning_rate": 2.3703242122359357e-08,
|
|
"logits/chosen": 0.2113402634859085,
|
|
"logits/rejected": 0.17864689230918884,
|
|
"logps/chosen": -64.75434875488281,
|
|
"logps/ref_chosen": -56.809661865234375,
|
|
"logps/ref_rejected": -68.09613037109375,
|
|
"logps/rejected": -82.32095336914062,
|
|
"loss": 0.805,
|
|
"margin_dpo/margin_mean": 6.280138969421387,
|
|
"margin_dpo/margin_std": 6.796194076538086,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.18160466849803925,
|
|
"fcm_dpo/delta": 0.059685517102479935,
|
|
"fcm_dpo/margin": 4.3713226318359375,
|
|
"fcm_dpo/q_t": 0.35958653688430786,
|
|
"grad_norm": 43.264957427978516,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 0.21349485218524933,
|
|
"logits/rejected": 0.17105573415756226,
|
|
"logps/chosen": -65.28569030761719,
|
|
"logps/ref_chosen": -57.70011520385742,
|
|
"logps/ref_rejected": -77.90664672851562,
|
|
"logps/rejected": -89.86354064941406,
|
|
"loss": 1.0838,
|
|
"margin_dpo/margin_mean": 4.371322154998779,
|
|
"margin_dpo/margin_std": 7.185041427612305,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8783068783068783,
|
|
"fcm_dpo/beta": 0.18188832700252533,
|
|
"fcm_dpo/delta": 0.08677008748054504,
|
|
"fcm_dpo/margin": 4.2297773361206055,
|
|
"fcm_dpo/q_t": 0.3590962886810303,
|
|
"grad_norm": 51.48876190185547,
|
|
"learning_rate": 2.259200116137039e-08,
|
|
"logits/chosen": 0.3077224791049957,
|
|
"logits/rejected": 0.2525234818458557,
|
|
"logps/chosen": -67.98102569580078,
|
|
"logps/ref_chosen": -59.332359313964844,
|
|
"logps/ref_rejected": -83.64482116699219,
|
|
"logps/rejected": -96.52326965332031,
|
|
"loss": 1.0355,
|
|
"margin_dpo/margin_mean": 4.2297773361206055,
|
|
"margin_dpo/margin_std": 6.5604119300842285,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8798185941043084,
|
|
"fcm_dpo/beta": 0.18250367045402527,
|
|
"fcm_dpo/delta": -0.04962211474776268,
|
|
"fcm_dpo/margin": 4.9043426513671875,
|
|
"fcm_dpo/q_t": 0.33205729722976685,
|
|
"grad_norm": 45.71406555175781,
|
|
"learning_rate": 2.204591459016525e-08,
|
|
"logits/chosen": 0.2833797335624695,
|
|
"logits/rejected": 0.30160412192344666,
|
|
"logps/chosen": -71.7133560180664,
|
|
"logps/ref_chosen": -64.16285705566406,
|
|
"logps/ref_rejected": -58.632896423339844,
|
|
"logps/rejected": -71.08773803710938,
|
|
"loss": 0.9396,
|
|
"margin_dpo/margin_mean": 4.9043426513671875,
|
|
"margin_dpo/margin_std": 6.365811347961426,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.8813303099017384,
|
|
"fcm_dpo/beta": 0.18371570110321045,
|
|
"fcm_dpo/delta": 0.05710229277610779,
|
|
"fcm_dpo/margin": 4.340847015380859,
|
|
"fcm_dpo/q_t": 0.36037206649780273,
|
|
"grad_norm": 49.90797424316406,
|
|
"learning_rate": 2.1506204384751064e-08,
|
|
"logits/chosen": 0.35961782932281494,
|
|
"logits/rejected": 0.27381908893585205,
|
|
"logps/chosen": -59.777198791503906,
|
|
"logps/ref_chosen": -51.87239456176758,
|
|
"logps/ref_rejected": -83.86331176757812,
|
|
"logps/rejected": -96.10896301269531,
|
|
"loss": 1.0971,
|
|
"margin_dpo/margin_mean": 4.340846061706543,
|
|
"margin_dpo/margin_std": 7.52435827255249,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8828420256991686,
|
|
"fcm_dpo/beta": 0.18236804008483887,
|
|
"fcm_dpo/delta": -0.02732960134744644,
|
|
"fcm_dpo/margin": 4.791356563568115,
|
|
"fcm_dpo/q_t": 0.34965622425079346,
|
|
"grad_norm": 45.7866325378418,
|
|
"learning_rate": 2.09728856419826e-08,
|
|
"logits/chosen": 0.3540344834327698,
|
|
"logits/rejected": 0.2653522789478302,
|
|
"logps/chosen": -53.58039855957031,
|
|
"logps/ref_chosen": -46.571388244628906,
|
|
"logps/ref_rejected": -80.67969512939453,
|
|
"logps/rejected": -92.48005676269531,
|
|
"loss": 1.0547,
|
|
"margin_dpo/margin_mean": 4.791356563568115,
|
|
"margin_dpo/margin_std": 7.583406448364258,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.18653377890586853,
|
|
"fcm_dpo/delta": 0.10668753832578659,
|
|
"fcm_dpo/margin": 4.0271315574646,
|
|
"fcm_dpo/q_t": 0.35192471742630005,
|
|
"grad_norm": 47.550262451171875,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 0.23220500349998474,
|
|
"logits/rejected": 0.19631624221801758,
|
|
"logps/chosen": -65.75765228271484,
|
|
"logps/ref_chosen": -58.124534606933594,
|
|
"logps/ref_rejected": -79.00538635253906,
|
|
"logps/rejected": -90.66563415527344,
|
|
"loss": 1.0511,
|
|
"margin_dpo/margin_mean": 4.027131080627441,
|
|
"margin_dpo/margin_std": 6.313577175140381,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8858654572940288,
|
|
"fcm_dpo/beta": 0.18284255266189575,
|
|
"fcm_dpo/delta": -0.12782521545886993,
|
|
"fcm_dpo/margin": 5.273736000061035,
|
|
"fcm_dpo/q_t": 0.3119266927242279,
|
|
"grad_norm": 40.83317184448242,
|
|
"learning_rate": 1.9925482037469187e-08,
|
|
"logits/chosen": 0.2742339074611664,
|
|
"logits/rejected": 0.22679969668388367,
|
|
"logps/chosen": -62.187355041503906,
|
|
"logps/ref_chosen": -54.10163879394531,
|
|
"logps/ref_rejected": -63.72113037109375,
|
|
"logps/rejected": -77.08058166503906,
|
|
"loss": 0.8626,
|
|
"margin_dpo/margin_mean": 5.273736000061035,
|
|
"margin_dpo/margin_std": 5.873756408691406,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8873771730914588,
|
|
"fcm_dpo/beta": 0.18256214261054993,
|
|
"fcm_dpo/delta": -0.001638067769818008,
|
|
"fcm_dpo/margin": 4.664154052734375,
|
|
"fcm_dpo/q_t": 0.3415156304836273,
|
|
"grad_norm": 54.03948974609375,
|
|
"learning_rate": 1.9411426473854687e-08,
|
|
"logits/chosen": 0.2829993665218353,
|
|
"logits/rejected": 0.2623155117034912,
|
|
"logps/chosen": -70.61738586425781,
|
|
"logps/ref_chosen": -63.41719436645508,
|
|
"logps/ref_rejected": -63.47003936767578,
|
|
"logps/rejected": -75.3343734741211,
|
|
"loss": 1.0717,
|
|
"margin_dpo/margin_mean": 4.664153575897217,
|
|
"margin_dpo/margin_std": 7.594302654266357,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"fcm_dpo/beta": 0.1814488172531128,
|
|
"fcm_dpo/delta": -0.027305468916893005,
|
|
"fcm_dpo/margin": 4.821053504943848,
|
|
"fcm_dpo/q_t": 0.3352981507778168,
|
|
"grad_norm": 48.0519905090332,
|
|
"learning_rate": 1.890382096832699e-08,
|
|
"logits/chosen": 0.3110392093658447,
|
|
"logits/rejected": 0.26618409156799316,
|
|
"logps/chosen": -70.01336669921875,
|
|
"logps/ref_chosen": -62.20103454589844,
|
|
"logps/ref_rejected": -82.10249328613281,
|
|
"logps/rejected": -94.73588562011719,
|
|
"loss": 0.9675,
|
|
"margin_dpo/margin_mean": 4.821053504943848,
|
|
"margin_dpo/margin_std": 6.605319499969482,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.890400604686319,
|
|
"fcm_dpo/beta": 0.1763853132724762,
|
|
"fcm_dpo/delta": -0.18737711012363434,
|
|
"fcm_dpo/margin": 5.771547317504883,
|
|
"fcm_dpo/q_t": 0.30814313888549805,
|
|
"grad_norm": 43.660152435302734,
|
|
"learning_rate": 1.840267971970344e-08,
|
|
"logits/chosen": 0.25168222188949585,
|
|
"logits/rejected": 0.22019024193286896,
|
|
"logps/chosen": -63.57328414916992,
|
|
"logps/ref_chosen": -56.71361541748047,
|
|
"logps/ref_rejected": -76.7366943359375,
|
|
"logps/rejected": -89.36790466308594,
|
|
"loss": 0.8337,
|
|
"margin_dpo/margin_mean": 5.771548271179199,
|
|
"margin_dpo/margin_std": 6.418168067932129,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.1695275604724884,
|
|
"fcm_dpo/delta": -0.2477089762687683,
|
|
"fcm_dpo/margin": 6.322257041931152,
|
|
"fcm_dpo/q_t": 0.3001672029495239,
|
|
"grad_norm": 39.7830810546875,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.2025907039642334,
|
|
"logits/rejected": 0.17383888363838196,
|
|
"logps/chosen": -74.46649169921875,
|
|
"logps/ref_chosen": -66.5138168334961,
|
|
"logps/ref_rejected": -85.70820617675781,
|
|
"logps/rejected": -99.98313903808594,
|
|
"loss": 0.8505,
|
|
"margin_dpo/margin_mean": 6.322257041931152,
|
|
"margin_dpo/margin_std": 7.0092453956604,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8934240362811792,
|
|
"fcm_dpo/beta": 0.16398374736309052,
|
|
"fcm_dpo/delta": -0.02152249962091446,
|
|
"fcm_dpo/margin": 5.285676002502441,
|
|
"fcm_dpo/q_t": 0.3420785665512085,
|
|
"grad_norm": 41.17580795288086,
|
|
"learning_rate": 1.7419845883949098e-08,
|
|
"logits/chosen": 0.3504617214202881,
|
|
"logits/rejected": 0.2953767776489258,
|
|
"logps/chosen": -67.76081848144531,
|
|
"logps/ref_chosen": -60.697181701660156,
|
|
"logps/ref_rejected": -86.12278747558594,
|
|
"logps/rejected": -98.47210693359375,
|
|
"loss": 0.9932,
|
|
"margin_dpo/margin_mean": 5.285675525665283,
|
|
"margin_dpo/margin_std": 7.387969970703125,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.8949357520786092,
|
|
"fcm_dpo/beta": 0.16582736372947693,
|
|
"fcm_dpo/delta": 0.05810259282588959,
|
|
"fcm_dpo/margin": 4.798344135284424,
|
|
"fcm_dpo/q_t": 0.35599485039711,
|
|
"grad_norm": 43.80263137817383,
|
|
"learning_rate": 1.6938180788793556e-08,
|
|
"logits/chosen": 0.2896095812320709,
|
|
"logits/rejected": 0.19301211833953857,
|
|
"logps/chosen": -58.819942474365234,
|
|
"logps/ref_chosen": -51.237327575683594,
|
|
"logps/ref_rejected": -81.60242462158203,
|
|
"logps/rejected": -93.98338317871094,
|
|
"loss": 0.9877,
|
|
"margin_dpo/margin_mean": 4.798343658447266,
|
|
"margin_dpo/margin_std": 6.754156112670898,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8964474678760394,
|
|
"fcm_dpo/beta": 0.16896365582942963,
|
|
"fcm_dpo/delta": 0.05192846059799194,
|
|
"fcm_dpo/margin": 4.747987747192383,
|
|
"fcm_dpo/q_t": 0.3541349172592163,
|
|
"grad_norm": 43.26925277709961,
|
|
"learning_rate": 1.6463034933723336e-08,
|
|
"logits/chosen": 0.26381367444992065,
|
|
"logits/rejected": 0.18189498782157898,
|
|
"logps/chosen": -48.68609619140625,
|
|
"logps/ref_chosen": -42.08000183105469,
|
|
"logps/ref_rejected": -68.47499084472656,
|
|
"logps/rejected": -79.82907104492188,
|
|
"loss": 1.088,
|
|
"margin_dpo/margin_mean": 4.747987747192383,
|
|
"margin_dpo/margin_std": 7.88329553604126,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8979591836734694,
|
|
"fcm_dpo/beta": 0.17408084869384766,
|
|
"fcm_dpo/delta": 0.21379601955413818,
|
|
"fcm_dpo/margin": 3.7374889850616455,
|
|
"fcm_dpo/q_t": 0.3792218267917633,
|
|
"grad_norm": 43.85451889038086,
|
|
"learning_rate": 1.5994421609589385e-08,
|
|
"logits/chosen": 0.19765910506248474,
|
|
"logits/rejected": 0.17842236161231995,
|
|
"logps/chosen": -71.97224426269531,
|
|
"logps/ref_chosen": -63.658668518066406,
|
|
"logps/ref_rejected": -70.35597229003906,
|
|
"logps/rejected": -82.40703582763672,
|
|
"loss": 1.1051,
|
|
"margin_dpo/margin_mean": 3.7374887466430664,
|
|
"margin_dpo/margin_std": 6.5507001876831055,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.17081937193870544,
|
|
"fcm_dpo/delta": -0.2570795714855194,
|
|
"fcm_dpo/margin": 6.321621417999268,
|
|
"fcm_dpo/q_t": 0.3001200556755066,
|
|
"grad_norm": 40.53380584716797,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 0.3341759443283081,
|
|
"logits/rejected": 0.24922290444374084,
|
|
"logps/chosen": -63.90322494506836,
|
|
"logps/ref_chosen": -56.21875762939453,
|
|
"logps/ref_rejected": -83.95773315429688,
|
|
"logps/rejected": -97.96382141113281,
|
|
"loss": 0.8964,
|
|
"margin_dpo/margin_mean": 6.321621894836426,
|
|
"margin_dpo/margin_std": 7.671756744384766,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9009826152683296,
|
|
"fcm_dpo/beta": 0.17557457089424133,
|
|
"fcm_dpo/delta": 0.30167368054389954,
|
|
"fcm_dpo/margin": 3.221146583557129,
|
|
"fcm_dpo/q_t": 0.403555303812027,
|
|
"grad_norm": 46.12143325805664,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": 0.2137114405632019,
|
|
"logits/rejected": 0.21473908424377441,
|
|
"logps/chosen": -76.90093994140625,
|
|
"logps/ref_chosen": -68.48088073730469,
|
|
"logps/ref_rejected": -61.732967376708984,
|
|
"logps/rejected": -73.3741683959961,
|
|
"loss": 1.1891,
|
|
"margin_dpo/margin_mean": 3.221146583557129,
|
|
"margin_dpo/margin_std": 6.960387229919434,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.9024943310657596,
|
|
"fcm_dpo/beta": 0.17845208942890167,
|
|
"fcm_dpo/delta": 0.019212447106838226,
|
|
"fcm_dpo/margin": 4.664018154144287,
|
|
"fcm_dpo/q_t": 0.3509853184223175,
|
|
"grad_norm": 38.720130920410156,
|
|
"learning_rate": 1.4627906988186111e-08,
|
|
"logits/chosen": 0.2167622148990631,
|
|
"logits/rejected": 0.18858037889003754,
|
|
"logps/chosen": -55.6777458190918,
|
|
"logps/ref_chosen": -48.85750961303711,
|
|
"logps/ref_rejected": -55.068084716796875,
|
|
"logps/rejected": -66.55233764648438,
|
|
"loss": 0.9812,
|
|
"margin_dpo/margin_mean": 4.664018154144287,
|
|
"margin_dpo/margin_std": 6.593279838562012,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.9040060468631897,
|
|
"fcm_dpo/beta": 0.1865067183971405,
|
|
"fcm_dpo/delta": 0.26280879974365234,
|
|
"fcm_dpo/margin": 3.2310941219329834,
|
|
"fcm_dpo/q_t": 0.39431557059288025,
|
|
"grad_norm": 50.766639709472656,
|
|
"learning_rate": 1.4185553036259095e-08,
|
|
"logits/chosen": 0.2725675702095032,
|
|
"logits/rejected": 0.20792771875858307,
|
|
"logps/chosen": -67.86152648925781,
|
|
"logps/ref_chosen": -58.88715362548828,
|
|
"logps/ref_rejected": -81.43145751953125,
|
|
"logps/rejected": -93.63692474365234,
|
|
"loss": 1.1975,
|
|
"margin_dpo/margin_mean": 3.2310941219329834,
|
|
"margin_dpo/margin_std": 6.851696014404297,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.9055177626606198,
|
|
"fcm_dpo/beta": 0.19415175914764404,
|
|
"fcm_dpo/delta": 0.1273910403251648,
|
|
"fcm_dpo/margin": 3.755466938018799,
|
|
"fcm_dpo/q_t": 0.37717127799987793,
|
|
"grad_norm": 55.01444625854492,
|
|
"learning_rate": 1.3749795321332885e-08,
|
|
"logits/chosen": 0.38753992319107056,
|
|
"logits/rejected": 0.3443824052810669,
|
|
"logps/chosen": -66.8466796875,
|
|
"logps/ref_chosen": -57.60719299316406,
|
|
"logps/ref_rejected": -71.80469512939453,
|
|
"logps/rejected": -84.79964447021484,
|
|
"loss": 1.1353,
|
|
"margin_dpo/margin_mean": 3.755467414855957,
|
|
"margin_dpo/margin_std": 6.981395721435547,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.20105046033859253,
|
|
"fcm_dpo/delta": 0.1314106285572052,
|
|
"fcm_dpo/margin": 3.578876495361328,
|
|
"fcm_dpo/q_t": 0.37118592858314514,
|
|
"grad_norm": 54.210601806640625,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 0.3219030499458313,
|
|
"logits/rejected": 0.28190159797668457,
|
|
"logps/chosen": -67.0551528930664,
|
|
"logps/ref_chosen": -58.44231414794922,
|
|
"logps/ref_rejected": -83.64639282226562,
|
|
"logps/rejected": -95.83810424804688,
|
|
"loss": 1.1541,
|
|
"margin_dpo/margin_mean": 3.57887601852417,
|
|
"margin_dpo/margin_std": 6.589799880981445,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.19878557324409485,
|
|
"eval_logits/chosen": 0.29669952392578125,
|
|
"eval_logits/rejected": 0.25336459279060364,
|
|
"eval_logps/chosen": -82.59681701660156,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -91.48360443115234,
|
|
"eval_loss": 0.5392169952392578,
|
|
"eval_margin_dpo/margin_mean": 4.197268009185791,
|
|
"eval_margin_dpo/margin_std": 6.897202491760254,
|
|
"eval_runtime": 38.0373,
|
|
"eval_samples_per_second": 60.546,
|
|
"eval_steps_per_second": 1.893,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.90854119425548,
|
|
"fcm_dpo/beta": 0.19258500635623932,
|
|
"fcm_dpo/delta": -0.149948388338089,
|
|
"fcm_dpo/margin": 5.099976539611816,
|
|
"fcm_dpo/q_t": 0.33194196224212646,
|
|
"grad_norm": 48.01551818847656,
|
|
"learning_rate": 1.2898117173950868e-08,
|
|
"logits/chosen": 0.28927916288375854,
|
|
"logits/rejected": 0.22528886795043945,
|
|
"logps/chosen": -62.39053726196289,
|
|
"logps/ref_chosen": -55.59432601928711,
|
|
"logps/ref_rejected": -83.68630981445312,
|
|
"logps/rejected": -95.58250427246094,
|
|
"loss": 0.9898,
|
|
"margin_dpo/margin_mean": 5.099976539611816,
|
|
"margin_dpo/margin_std": 7.26269006729126,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.91005291005291,
|
|
"fcm_dpo/beta": 0.19018596410751343,
|
|
"fcm_dpo/delta": -0.12933437526226044,
|
|
"fcm_dpo/margin": 5.084250450134277,
|
|
"fcm_dpo/q_t": 0.3237204849720001,
|
|
"grad_norm": 44.3646240234375,
|
|
"learning_rate": 1.2482220564763667e-08,
|
|
"logits/chosen": 0.2777378559112549,
|
|
"logits/rejected": 0.24087250232696533,
|
|
"logps/chosen": -62.39347839355469,
|
|
"logps/ref_chosen": -56.349185943603516,
|
|
"logps/ref_rejected": -71.9959716796875,
|
|
"logps/rejected": -83.12451171875,
|
|
"loss": 0.8914,
|
|
"margin_dpo/margin_mean": 5.0842509269714355,
|
|
"margin_dpo/margin_std": 6.108532905578613,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.9115646258503401,
|
|
"fcm_dpo/beta": 0.18402621150016785,
|
|
"fcm_dpo/delta": -0.10828899592161179,
|
|
"fcm_dpo/margin": 5.145867824554443,
|
|
"fcm_dpo/q_t": 0.3228384852409363,
|
|
"grad_norm": 39.86903762817383,
|
|
"learning_rate": 1.2072967838448051e-08,
|
|
"logits/chosen": 0.21399766206741333,
|
|
"logits/rejected": 0.16194592416286469,
|
|
"logps/chosen": -59.277183532714844,
|
|
"logps/ref_chosen": -53.16838836669922,
|
|
"logps/ref_rejected": -73.8604736328125,
|
|
"logps/rejected": -85.11514282226562,
|
|
"loss": 0.8905,
|
|
"margin_dpo/margin_mean": 5.145867824554443,
|
|
"margin_dpo/margin_std": 6.194065093994141,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.9130763416477702,
|
|
"fcm_dpo/beta": 0.18175940215587616,
|
|
"fcm_dpo/delta": -0.0712120309472084,
|
|
"fcm_dpo/margin": 5.031689643859863,
|
|
"fcm_dpo/q_t": 0.3407028615474701,
|
|
"grad_norm": 46.584659576416016,
|
|
"learning_rate": 1.1670370442682459e-08,
|
|
"logits/chosen": 0.2513910233974457,
|
|
"logits/rejected": 0.24693363904953003,
|
|
"logps/chosen": -79.33577728271484,
|
|
"logps/ref_chosen": -72.64942169189453,
|
|
"logps/ref_rejected": -69.8792724609375,
|
|
"logps/rejected": -81.5973129272461,
|
|
"loss": 1.0237,
|
|
"margin_dpo/margin_mean": 5.03169059753418,
|
|
"margin_dpo/margin_std": 7.756058692932129,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.18186496198177338,
|
|
"fcm_dpo/delta": 0.04116290062665939,
|
|
"fcm_dpo/margin": 4.465373992919922,
|
|
"fcm_dpo/q_t": 0.3500198721885681,
|
|
"grad_norm": 47.683250427246094,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 0.3524032235145569,
|
|
"logits/rejected": 0.2950265407562256,
|
|
"logps/chosen": -69.99392700195312,
|
|
"logps/ref_chosen": -61.61284637451172,
|
|
"logps/ref_rejected": -79.34398651123047,
|
|
"logps/rejected": -92.19044494628906,
|
|
"loss": 1.0198,
|
|
"margin_dpo/margin_mean": 4.465373992919922,
|
|
"margin_dpo/margin_std": 6.671446800231934,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9160997732426304,
|
|
"fcm_dpo/beta": 0.18503758311271667,
|
|
"fcm_dpo/delta": 0.008188098669052124,
|
|
"fcm_dpo/margin": 4.541077613830566,
|
|
"fcm_dpo/q_t": 0.34726542234420776,
|
|
"grad_norm": 46.52579116821289,
|
|
"learning_rate": 1.0885186502381016e-08,
|
|
"logits/chosen": 0.26749229431152344,
|
|
"logits/rejected": 0.21147756278514862,
|
|
"logps/chosen": -61.70413589477539,
|
|
"logps/ref_chosen": -54.46424102783203,
|
|
"logps/ref_rejected": -79.62708282470703,
|
|
"logps/rejected": -91.40805053710938,
|
|
"loss": 0.9797,
|
|
"margin_dpo/margin_mean": 4.541077613830566,
|
|
"margin_dpo/margin_std": 6.255065441131592,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.9176114890400605,
|
|
"fcm_dpo/beta": 0.17840437591075897,
|
|
"fcm_dpo/delta": -0.06762778759002686,
|
|
"fcm_dpo/margin": 5.074832916259766,
|
|
"fcm_dpo/q_t": 0.3349849283695221,
|
|
"grad_norm": 46.92865753173828,
|
|
"learning_rate": 1.0502621921127774e-08,
|
|
"logits/chosen": 0.2795192301273346,
|
|
"logits/rejected": 0.24547183513641357,
|
|
"logps/chosen": -70.56070709228516,
|
|
"logps/ref_chosen": -62.86086654663086,
|
|
"logps/ref_rejected": -72.5501937866211,
|
|
"logps/rejected": -85.32487487792969,
|
|
"loss": 0.9709,
|
|
"margin_dpo/margin_mean": 5.074833869934082,
|
|
"margin_dpo/margin_std": 6.887617588043213,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.9191232048374905,
|
|
"fcm_dpo/beta": 0.18580615520477295,
|
|
"fcm_dpo/delta": 0.18324339389801025,
|
|
"fcm_dpo/margin": 3.6547422409057617,
|
|
"fcm_dpo/q_t": 0.375454306602478,
|
|
"grad_norm": 59.2383918762207,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 0.2840557098388672,
|
|
"logits/rejected": 0.2288847118616104,
|
|
"logps/chosen": -71.39105224609375,
|
|
"logps/ref_chosen": -63.18071746826172,
|
|
"logps/ref_rejected": -99.15888214111328,
|
|
"logps/rejected": -111.02395629882812,
|
|
"loss": 1.0933,
|
|
"margin_dpo/margin_mean": 3.6547417640686035,
|
|
"margin_dpo/margin_std": 6.374434471130371,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"fcm_dpo/beta": 0.1796351969242096,
|
|
"fcm_dpo/delta": -0.18832767009735107,
|
|
"fcm_dpo/margin": 5.636974811553955,
|
|
"fcm_dpo/q_t": 0.3059696555137634,
|
|
"grad_norm": 34.60963821411133,
|
|
"learning_rate": 9.757601041885694e-09,
|
|
"logits/chosen": 0.30717933177948,
|
|
"logits/rejected": 0.2738397717475891,
|
|
"logps/chosen": -55.23808288574219,
|
|
"logps/ref_chosen": -48.62322235107422,
|
|
"logps/ref_rejected": -68.28271484375,
|
|
"logps/rejected": -80.53456115722656,
|
|
"loss": 0.8554,
|
|
"margin_dpo/margin_mean": 5.636974334716797,
|
|
"margin_dpo/margin_std": 6.186499118804932,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.17905402183532715,
|
|
"fcm_dpo/delta": -0.06748346984386444,
|
|
"fcm_dpo/margin": 5.089980125427246,
|
|
"fcm_dpo/q_t": 0.3470401167869568,
|
|
"grad_norm": 56.96098327636719,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 0.24939410388469696,
|
|
"logits/rejected": 0.24990352988243103,
|
|
"logps/chosen": -80.74764251708984,
|
|
"logps/ref_chosen": -72.66513061523438,
|
|
"logps/ref_rejected": -87.15310668945312,
|
|
"logps/rejected": -100.32559967041016,
|
|
"loss": 1.0303,
|
|
"margin_dpo/margin_mean": 5.089980125427246,
|
|
"margin_dpo/margin_std": 7.849494934082031,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9236583522297808,
|
|
"fcm_dpo/beta": 0.18114158511161804,
|
|
"fcm_dpo/delta": 0.2056940793991089,
|
|
"fcm_dpo/margin": 3.6257357597351074,
|
|
"fcm_dpo/q_t": 0.3758603036403656,
|
|
"grad_norm": 45.56681823730469,
|
|
"learning_rate": 9.03946036001449e-09,
|
|
"logits/chosen": 0.3103299140930176,
|
|
"logits/rejected": 0.26671937108039856,
|
|
"logps/chosen": -55.76822280883789,
|
|
"logps/ref_chosen": -48.30857849121094,
|
|
"logps/ref_rejected": -70.6141128540039,
|
|
"logps/rejected": -81.69949340820312,
|
|
"loss": 1.0748,
|
|
"margin_dpo/margin_mean": 3.62573504447937,
|
|
"margin_dpo/margin_std": 5.797907829284668,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.9251700680272109,
|
|
"fcm_dpo/beta": 0.1818360984325409,
|
|
"fcm_dpo/delta": -0.15190520882606506,
|
|
"fcm_dpo/margin": 5.42856502532959,
|
|
"fcm_dpo/q_t": 0.32411032915115356,
|
|
"grad_norm": 46.31725311279297,
|
|
"learning_rate": 8.690495320571839e-09,
|
|
"logits/chosen": 0.22141186892986298,
|
|
"logits/rejected": 0.1634463667869568,
|
|
"logps/chosen": -68.8099365234375,
|
|
"logps/ref_chosen": -61.23155975341797,
|
|
"logps/ref_rejected": -94.37979888916016,
|
|
"logps/rejected": -107.38673400878906,
|
|
"loss": 0.9778,
|
|
"margin_dpo/margin_mean": 5.428564071655273,
|
|
"margin_dpo/margin_std": 7.684731483459473,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.926681783824641,
|
|
"fcm_dpo/beta": 0.17303548753261566,
|
|
"fcm_dpo/delta": -0.30681759119033813,
|
|
"fcm_dpo/margin": 6.491569519042969,
|
|
"fcm_dpo/q_t": 0.29310303926467896,
|
|
"grad_norm": 38.10737991333008,
|
|
"learning_rate": 8.348280226706722e-09,
|
|
"logits/chosen": 0.2111150324344635,
|
|
"logits/rejected": 0.19616663455963135,
|
|
"logps/chosen": -60.43955612182617,
|
|
"logps/ref_chosen": -53.98310852050781,
|
|
"logps/ref_rejected": -58.32208251953125,
|
|
"logps/rejected": -71.27009582519531,
|
|
"loss": 0.8138,
|
|
"margin_dpo/margin_mean": 6.491570472717285,
|
|
"margin_dpo/margin_std": 6.981858253479004,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9281934996220711,
|
|
"fcm_dpo/beta": 0.17160148918628693,
|
|
"fcm_dpo/delta": 0.05498047545552254,
|
|
"fcm_dpo/margin": 4.653756141662598,
|
|
"fcm_dpo/q_t": 0.3367941379547119,
|
|
"grad_norm": 41.467777252197266,
|
|
"learning_rate": 8.012824650910937e-09,
|
|
"logits/chosen": 0.28697025775909424,
|
|
"logits/rejected": 0.27061259746551514,
|
|
"logps/chosen": -68.2115249633789,
|
|
"logps/ref_chosen": -60.24303436279297,
|
|
"logps/ref_rejected": -72.26258850097656,
|
|
"logps/rejected": -84.88483428955078,
|
|
"loss": 0.9136,
|
|
"margin_dpo/margin_mean": 4.653756141662598,
|
|
"margin_dpo/margin_std": 5.355663299560547,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.1685989797115326,
|
|
"fcm_dpo/delta": -0.06165684387087822,
|
|
"fcm_dpo/margin": 5.367961883544922,
|
|
"fcm_dpo/q_t": 0.33193883299827576,
|
|
"grad_norm": 50.48664474487305,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 0.28863102197647095,
|
|
"logits/rejected": 0.24735435843467712,
|
|
"logps/chosen": -80.24966430664062,
|
|
"logps/ref_chosen": -72.09467315673828,
|
|
"logps/ref_rejected": -104.02980041503906,
|
|
"logps/rejected": -117.55276489257812,
|
|
"loss": 0.9757,
|
|
"margin_dpo/margin_mean": 5.367961883544922,
|
|
"margin_dpo/margin_std": 7.349823951721191,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9312169312169312,
|
|
"fcm_dpo/beta": 0.17112982273101807,
|
|
"fcm_dpo/delta": 0.10860873758792877,
|
|
"fcm_dpo/margin": 4.380434036254883,
|
|
"fcm_dpo/q_t": 0.362366259098053,
|
|
"grad_norm": 41.709930419921875,
|
|
"learning_rate": 7.36222939784098e-09,
|
|
"logits/chosen": 0.3024919033050537,
|
|
"logits/rejected": 0.23271536827087402,
|
|
"logps/chosen": -66.6510009765625,
|
|
"logps/ref_chosen": -58.530723571777344,
|
|
"logps/ref_rejected": -75.48025512695312,
|
|
"logps/rejected": -87.98096466064453,
|
|
"loss": 1.0149,
|
|
"margin_dpo/margin_mean": 4.380434513092041,
|
|
"margin_dpo/margin_std": 6.544014930725098,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9327286470143613,
|
|
"fcm_dpo/beta": 0.17270605266094208,
|
|
"fcm_dpo/delta": 0.0973830297589302,
|
|
"fcm_dpo/margin": 4.382899284362793,
|
|
"fcm_dpo/q_t": 0.35784029960632324,
|
|
"grad_norm": 44.091949462890625,
|
|
"learning_rate": 7.047107919114586e-09,
|
|
"logits/chosen": 0.25396719574928284,
|
|
"logits/rejected": 0.21397271752357483,
|
|
"logps/chosen": -66.15753936767578,
|
|
"logps/ref_chosen": -57.608673095703125,
|
|
"logps/ref_rejected": -81.22109985351562,
|
|
"logps/rejected": -94.15286254882812,
|
|
"loss": 1.0028,
|
|
"margin_dpo/margin_mean": 4.382899284362793,
|
|
"margin_dpo/margin_std": 6.131780624389648,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9342403628117913,
|
|
"fcm_dpo/beta": 0.17426848411560059,
|
|
"fcm_dpo/delta": 0.008210502564907074,
|
|
"fcm_dpo/margin": 4.818388938903809,
|
|
"fcm_dpo/q_t": 0.34145089983940125,
|
|
"grad_norm": 44.20525360107422,
|
|
"learning_rate": 6.738782355044048e-09,
|
|
"logits/chosen": 0.2514890134334564,
|
|
"logits/rejected": 0.1663253903388977,
|
|
"logps/chosen": -63.71135711669922,
|
|
"logps/ref_chosen": -56.69594192504883,
|
|
"logps/ref_rejected": -85.92362976074219,
|
|
"logps/rejected": -97.75743865966797,
|
|
"loss": 0.9383,
|
|
"margin_dpo/margin_mean": 4.818388938903809,
|
|
"margin_dpo/margin_std": 6.156173229217529,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.9357520786092215,
|
|
"fcm_dpo/beta": 0.1756330132484436,
|
|
"fcm_dpo/delta": 0.005198441445827484,
|
|
"fcm_dpo/margin": 4.809282302856445,
|
|
"fcm_dpo/q_t": 0.3440103828907013,
|
|
"grad_norm": 44.29880142211914,
|
|
"learning_rate": 6.437261330158206e-09,
|
|
"logits/chosen": 0.343215674161911,
|
|
"logits/rejected": 0.2847067713737488,
|
|
"logps/chosen": -61.114315032958984,
|
|
"logps/ref_chosen": -54.05841827392578,
|
|
"logps/ref_rejected": -83.55493927001953,
|
|
"logps/rejected": -95.42012023925781,
|
|
"loss": 0.9784,
|
|
"margin_dpo/margin_mean": 4.809283256530762,
|
|
"margin_dpo/margin_std": 6.668694496154785,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.18186062574386597,
|
|
"fcm_dpo/delta": 0.08738522231578827,
|
|
"fcm_dpo/margin": 4.195881366729736,
|
|
"fcm_dpo/q_t": 0.36279648542404175,
|
|
"grad_norm": 51.88892364501953,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 0.29026031494140625,
|
|
"logits/rejected": 0.27886366844177246,
|
|
"logps/chosen": -70.6176986694336,
|
|
"logps/ref_chosen": -63.36971664428711,
|
|
"logps/ref_rejected": -65.68269348144531,
|
|
"logps/rejected": -77.12655639648438,
|
|
"loss": 1.034,
|
|
"margin_dpo/margin_mean": 4.1958818435668945,
|
|
"margin_dpo/margin_std": 6.316009521484375,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9387755102040817,
|
|
"fcm_dpo/beta": 0.18603846430778503,
|
|
"fcm_dpo/delta": 0.1751212179660797,
|
|
"fcm_dpo/margin": 3.6776561737060547,
|
|
"fcm_dpo/q_t": 0.37486201524734497,
|
|
"grad_norm": 51.965782165527344,
|
|
"learning_rate": 5.854666444131934e-09,
|
|
"logits/chosen": 0.3206130862236023,
|
|
"logits/rejected": 0.2369777262210846,
|
|
"logps/chosen": -60.00037384033203,
|
|
"logps/ref_chosen": -52.321224212646484,
|
|
"logps/ref_rejected": -88.09001159667969,
|
|
"logps/rejected": -99.44680786132812,
|
|
"loss": 1.0999,
|
|
"margin_dpo/margin_mean": 3.6776556968688965,
|
|
"margin_dpo/margin_std": 6.303377151489258,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9402872260015117,
|
|
"fcm_dpo/beta": 0.18810513615608215,
|
|
"fcm_dpo/delta": 0.0432133674621582,
|
|
"fcm_dpo/margin": 4.305281639099121,
|
|
"fcm_dpo/q_t": 0.35074567794799805,
|
|
"grad_norm": 47.06365966796875,
|
|
"learning_rate": 5.573608879422875e-09,
|
|
"logits/chosen": 0.24988241493701935,
|
|
"logits/rejected": 0.2150622308254242,
|
|
"logps/chosen": -67.39698028564453,
|
|
"logps/ref_chosen": -59.86545944213867,
|
|
"logps/ref_rejected": -81.86668395996094,
|
|
"logps/rejected": -93.7034912109375,
|
|
"loss": 0.9729,
|
|
"margin_dpo/margin_mean": 4.305282115936279,
|
|
"margin_dpo/margin_std": 5.978768825531006,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9417989417989417,
|
|
"fcm_dpo/beta": 0.18544979393482208,
|
|
"fcm_dpo/delta": -0.08368836343288422,
|
|
"fcm_dpo/margin": 4.992826461791992,
|
|
"fcm_dpo/q_t": 0.32211193442344666,
|
|
"grad_norm": 41.866485595703125,
|
|
"learning_rate": 5.299388446305342e-09,
|
|
"logits/chosen": 0.2434006631374359,
|
|
"logits/rejected": 0.1846180558204651,
|
|
"logps/chosen": -75.88226318359375,
|
|
"logps/ref_chosen": -67.36846160888672,
|
|
"logps/ref_rejected": -82.02733612060547,
|
|
"logps/rejected": -95.53396606445312,
|
|
"loss": 0.8903,
|
|
"margin_dpo/margin_mean": 4.992826461791992,
|
|
"margin_dpo/margin_std": 5.761674880981445,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9433106575963719,
|
|
"fcm_dpo/beta": 0.18060320615768433,
|
|
"fcm_dpo/delta": -0.112208291888237,
|
|
"fcm_dpo/margin": 5.261911392211914,
|
|
"fcm_dpo/q_t": 0.33041465282440186,
|
|
"grad_norm": 42.327178955078125,
|
|
"learning_rate": 5.03201281531429e-09,
|
|
"logits/chosen": 0.25088438391685486,
|
|
"logits/rejected": 0.16487614810466766,
|
|
"logps/chosen": -58.05131530761719,
|
|
"logps/ref_chosen": -51.02655029296875,
|
|
"logps/ref_rejected": -76.49203491210938,
|
|
"logps/rejected": -88.77871704101562,
|
|
"loss": 0.9333,
|
|
"margin_dpo/margin_mean": 5.261911392211914,
|
|
"margin_dpo/margin_std": 6.842192649841309,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.1855674684047699,
|
|
"fcm_dpo/delta": 0.14608462154865265,
|
|
"fcm_dpo/margin": 3.8421430587768555,
|
|
"fcm_dpo/q_t": 0.37177574634552,
|
|
"grad_norm": 48.58590316772461,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 0.33973124623298645,
|
|
"logits/rejected": 0.2644117474555969,
|
|
"logps/chosen": -62.346839904785156,
|
|
"logps/ref_chosen": -54.20761489868164,
|
|
"logps/ref_rejected": -84.93669128417969,
|
|
"logps/rejected": -96.91806030273438,
|
|
"loss": 1.0799,
|
|
"margin_dpo/margin_mean": 3.8421425819396973,
|
|
"margin_dpo/margin_std": 6.479580879211426,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9463340891912321,
|
|
"fcm_dpo/beta": 0.180339515209198,
|
|
"fcm_dpo/delta": -0.19849389791488647,
|
|
"fcm_dpo/margin": 5.699306011199951,
|
|
"fcm_dpo/q_t": 0.32750552892684937,
|
|
"grad_norm": 42.651458740234375,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 0.34960970282554626,
|
|
"logits/rejected": 0.24316346645355225,
|
|
"logps/chosen": -51.93178176879883,
|
|
"logps/ref_chosen": -45.06201934814453,
|
|
"logps/ref_rejected": -89.66368103027344,
|
|
"logps/rejected": -102.23274230957031,
|
|
"loss": 0.9744,
|
|
"margin_dpo/margin_mean": 5.699305534362793,
|
|
"margin_dpo/margin_std": 8.007684707641602,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.9478458049886621,
|
|
"fcm_dpo/beta": 0.1721944957971573,
|
|
"fcm_dpo/delta": -0.2246345579624176,
|
|
"fcm_dpo/margin": 6.09731388092041,
|
|
"fcm_dpo/q_t": 0.31280094385147095,
|
|
"grad_norm": 46.61338424682617,
|
|
"learning_rate": 4.271028567242818e-09,
|
|
"logits/chosen": 0.22155846655368805,
|
|
"logits/rejected": 0.12805700302124023,
|
|
"logps/chosen": -66.43733215332031,
|
|
"logps/ref_chosen": -58.791053771972656,
|
|
"logps/ref_rejected": -94.90802001953125,
|
|
"logps/rejected": -108.651611328125,
|
|
"loss": 0.893,
|
|
"margin_dpo/margin_mean": 6.097313404083252,
|
|
"margin_dpo/margin_std": 7.516200065612793,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.9493575207860923,
|
|
"fcm_dpo/beta": 0.16774258017539978,
|
|
"fcm_dpo/delta": -0.2266627550125122,
|
|
"fcm_dpo/margin": 6.269933700561523,
|
|
"fcm_dpo/q_t": 0.3056507706642151,
|
|
"grad_norm": 40.3947868347168,
|
|
"learning_rate": 4.0311050177251895e-09,
|
|
"logits/chosen": 0.2967239022254944,
|
|
"logits/rejected": 0.26311272382736206,
|
|
"logps/chosen": -59.4424934387207,
|
|
"logps/ref_chosen": -52.80357360839844,
|
|
"logps/ref_rejected": -76.49468994140625,
|
|
"logps/rejected": -89.4035415649414,
|
|
"loss": 0.927,
|
|
"margin_dpo/margin_mean": 6.269933700561523,
|
|
"margin_dpo/margin_std": 7.717181205749512,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9508692365835223,
|
|
"fcm_dpo/beta": 0.1646927446126938,
|
|
"fcm_dpo/delta": 0.023073244839906693,
|
|
"fcm_dpo/margin": 5.028815746307373,
|
|
"fcm_dpo/q_t": 0.3410007059574127,
|
|
"grad_norm": 38.630470275878906,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": 0.2684643864631653,
|
|
"logits/rejected": 0.2583003640174866,
|
|
"logps/chosen": -77.83274841308594,
|
|
"logps/ref_chosen": -70.71749877929688,
|
|
"logps/ref_rejected": -78.96273803710938,
|
|
"logps/rejected": -91.1068115234375,
|
|
"loss": 0.907,
|
|
"margin_dpo/margin_mean": 5.028815746307373,
|
|
"margin_dpo/margin_std": 5.933478832244873,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.16420958936214447,
|
|
"fcm_dpo/delta": -0.05578102171421051,
|
|
"fcm_dpo/margin": 5.478397846221924,
|
|
"fcm_dpo/q_t": 0.3335376977920532,
|
|
"grad_norm": 35.65196228027344,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 0.2280709147453308,
|
|
"logits/rejected": 0.18566131591796875,
|
|
"logps/chosen": -63.271915435791016,
|
|
"logps/ref_chosen": -56.201412200927734,
|
|
"logps/ref_rejected": -74.69807434082031,
|
|
"logps/rejected": -87.24697875976562,
|
|
"loss": 0.9099,
|
|
"margin_dpo/margin_mean": 5.478397846221924,
|
|
"margin_dpo/margin_std": 6.630637168884277,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9538926681783825,
|
|
"fcm_dpo/beta": 0.156265988945961,
|
|
"fcm_dpo/delta": -0.1330309510231018,
|
|
"fcm_dpo/margin": 6.156580924987793,
|
|
"fcm_dpo/q_t": 0.32488080859184265,
|
|
"grad_norm": 43.58830261230469,
|
|
"learning_rate": 3.352641923861144e-09,
|
|
"logits/chosen": 0.36850327253341675,
|
|
"logits/rejected": 0.2770962715148926,
|
|
"logps/chosen": -65.9747314453125,
|
|
"logps/ref_chosen": -58.82059860229492,
|
|
"logps/ref_rejected": -96.51437377929688,
|
|
"logps/rejected": -109.82508850097656,
|
|
"loss": 0.9339,
|
|
"margin_dpo/margin_mean": 6.156581878662109,
|
|
"margin_dpo/margin_std": 7.784612655639648,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9554043839758125,
|
|
"fcm_dpo/beta": 0.15440338850021362,
|
|
"fcm_dpo/delta": -0.1414494812488556,
|
|
"fcm_dpo/margin": 6.329607963562012,
|
|
"fcm_dpo/q_t": 0.306906521320343,
|
|
"grad_norm": 34.58030700683594,
|
|
"learning_rate": 3.140277830901428e-09,
|
|
"logits/chosen": 0.33119070529937744,
|
|
"logits/rejected": 0.3036194443702698,
|
|
"logps/chosen": -65.92765808105469,
|
|
"logps/ref_chosen": -58.786048889160156,
|
|
"logps/ref_rejected": -67.21923828125,
|
|
"logps/rejected": -80.69046020507812,
|
|
"loss": 0.8424,
|
|
"margin_dpo/margin_mean": 6.3296074867248535,
|
|
"margin_dpo/margin_std": 6.613114356994629,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9569160997732427,
|
|
"fcm_dpo/beta": 0.1560203731060028,
|
|
"fcm_dpo/delta": 0.08354716002941132,
|
|
"fcm_dpo/margin": 4.950506210327148,
|
|
"fcm_dpo/q_t": 0.3561435043811798,
|
|
"grad_norm": 34.561824798583984,
|
|
"learning_rate": 2.9348189350335007e-09,
|
|
"logits/chosen": 0.2669551968574524,
|
|
"logits/rejected": 0.2126702070236206,
|
|
"logps/chosen": -58.36284255981445,
|
|
"logps/ref_chosen": -52.13019561767578,
|
|
"logps/ref_rejected": -67.23016357421875,
|
|
"logps/rejected": -78.41331481933594,
|
|
"loss": 0.9923,
|
|
"margin_dpo/margin_mean": 4.950506210327148,
|
|
"margin_dpo/margin_std": 7.004974365234375,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9584278155706727,
|
|
"fcm_dpo/beta": 0.16562215983867645,
|
|
"fcm_dpo/delta": 0.3642282485961914,
|
|
"fcm_dpo/margin": 3.030942440032959,
|
|
"fcm_dpo/q_t": 0.4143024682998657,
|
|
"grad_norm": 51.9135856628418,
|
|
"learning_rate": 2.736270983384276e-09,
|
|
"logits/chosen": 0.32782524824142456,
|
|
"logits/rejected": 0.3300801217556,
|
|
"logps/chosen": -68.82040405273438,
|
|
"logps/ref_chosen": -60.97979736328125,
|
|
"logps/ref_rejected": -58.50825119018555,
|
|
"logps/rejected": -69.37980651855469,
|
|
"loss": 1.2159,
|
|
"margin_dpo/margin_mean": 3.03094220161438,
|
|
"margin_dpo/margin_std": 6.932095527648926,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.172117680311203,
|
|
"fcm_dpo/delta": 0.15006113052368164,
|
|
"fcm_dpo/margin": 4.126021862030029,
|
|
"fcm_dpo/q_t": 0.3806537687778473,
|
|
"grad_norm": 51.643699645996094,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 0.200457364320755,
|
|
"logits/rejected": 0.14902645349502563,
|
|
"logps/chosen": -75.00791931152344,
|
|
"logps/ref_chosen": -65.9730224609375,
|
|
"logps/ref_rejected": -85.61317443847656,
|
|
"logps/rejected": -98.77409362792969,
|
|
"loss": 1.1453,
|
|
"margin_dpo/margin_mean": 4.126021385192871,
|
|
"margin_dpo/margin_std": 7.76697301864624,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9614512471655329,
|
|
"fcm_dpo/beta": 0.16995760798454285,
|
|
"fcm_dpo/delta": -0.10415925085544586,
|
|
"fcm_dpo/margin": 5.553957462310791,
|
|
"fcm_dpo/q_t": 0.31619903445243835,
|
|
"grad_norm": 38.7966423034668,
|
|
"learning_rate": 2.359929934524829e-09,
|
|
"logits/chosen": 0.22739680111408234,
|
|
"logits/rejected": 0.1509087234735489,
|
|
"logps/chosen": -56.36328125,
|
|
"logps/ref_chosen": -49.140167236328125,
|
|
"logps/ref_rejected": -81.26971435546875,
|
|
"logps/rejected": -94.04678344726562,
|
|
"loss": 0.8522,
|
|
"margin_dpo/margin_mean": 5.553957939147949,
|
|
"margin_dpo/margin_std": 6.102416515350342,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9629629629629629,
|
|
"fcm_dpo/beta": 0.17249064147472382,
|
|
"fcm_dpo/delta": 0.06623414158821106,
|
|
"fcm_dpo/margin": 4.564568519592285,
|
|
"fcm_dpo/q_t": 0.3609241545200348,
|
|
"grad_norm": 49.76155471801758,
|
|
"learning_rate": 2.1821473643827137e-09,
|
|
"logits/chosen": 0.25316452980041504,
|
|
"logits/rejected": 0.18915875256061554,
|
|
"logps/chosen": -82.8052978515625,
|
|
"logps/ref_chosen": -73.69658660888672,
|
|
"logps/ref_rejected": -83.01487731933594,
|
|
"logps/rejected": -96.68817138671875,
|
|
"loss": 1.0562,
|
|
"margin_dpo/margin_mean": 4.564568519592285,
|
|
"margin_dpo/margin_std": 7.476813316345215,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9644746787603931,
|
|
"fcm_dpo/beta": 0.16861847043037415,
|
|
"fcm_dpo/delta": -0.1186608374118805,
|
|
"fcm_dpo/margin": 5.676681041717529,
|
|
"fcm_dpo/q_t": 0.3233264684677124,
|
|
"grad_norm": 43.48808670043945,
|
|
"learning_rate": 2.0112967923011646e-09,
|
|
"logits/chosen": 0.23133961856365204,
|
|
"logits/rejected": 0.18495085835456848,
|
|
"logps/chosen": -71.15939331054688,
|
|
"logps/ref_chosen": -62.78158187866211,
|
|
"logps/ref_rejected": -85.40478515625,
|
|
"logps/rejected": -99.45928955078125,
|
|
"loss": 0.8993,
|
|
"margin_dpo/margin_mean": 5.6766815185546875,
|
|
"margin_dpo/margin_std": 6.878664970397949,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9659863945578231,
|
|
"fcm_dpo/beta": 0.16743648052215576,
|
|
"fcm_dpo/delta": -0.024798255413770676,
|
|
"fcm_dpo/margin": 5.211551666259766,
|
|
"fcm_dpo/q_t": 0.33889099955558777,
|
|
"grad_norm": 41.27330780029297,
|
|
"learning_rate": 1.847382997337943e-09,
|
|
"logits/chosen": 0.2327461540699005,
|
|
"logits/rejected": 0.12944665551185608,
|
|
"logps/chosen": -61.39842987060547,
|
|
"logps/ref_chosen": -53.76658630371094,
|
|
"logps/ref_rejected": -72.30009460449219,
|
|
"logps/rejected": -85.14349365234375,
|
|
"loss": 0.941,
|
|
"margin_dpo/margin_mean": 5.211551189422607,
|
|
"margin_dpo/margin_std": 6.853907108306885,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.170379638671875,
|
|
"fcm_dpo/delta": 0.15754404664039612,
|
|
"fcm_dpo/margin": 4.130763053894043,
|
|
"fcm_dpo/q_t": 0.3569382131099701,
|
|
"grad_norm": 44.13318634033203,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 0.34077322483062744,
|
|
"logits/rejected": 0.29080483317375183,
|
|
"logps/chosen": -59.03297805786133,
|
|
"logps/ref_chosen": -51.41777801513672,
|
|
"logps/ref_rejected": -77.27879333496094,
|
|
"logps/rejected": -89.0247573852539,
|
|
"loss": 1.0821,
|
|
"margin_dpo/margin_mean": 4.130763053894043,
|
|
"margin_dpo/margin_std": 6.8070220947265625,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9690098261526833,
|
|
"fcm_dpo/beta": 0.17606830596923828,
|
|
"fcm_dpo/delta": 0.12295868247747421,
|
|
"fcm_dpo/margin": 4.176577568054199,
|
|
"fcm_dpo/q_t": 0.35564327239990234,
|
|
"grad_norm": 62.344032287597656,
|
|
"learning_rate": 1.5403838846864692e-09,
|
|
"logits/chosen": 0.2501823902130127,
|
|
"logits/rejected": 0.22024014592170715,
|
|
"logps/chosen": -79.21989440917969,
|
|
"logps/ref_chosen": -71.0546646118164,
|
|
"logps/ref_rejected": -82.2440185546875,
|
|
"logps/rejected": -94.5858154296875,
|
|
"loss": 1.0164,
|
|
"margin_dpo/margin_mean": 4.176577091217041,
|
|
"margin_dpo/margin_std": 6.268715858459473,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9705215419501134,
|
|
"fcm_dpo/beta": 0.18004068732261658,
|
|
"fcm_dpo/delta": 0.20534493029117584,
|
|
"fcm_dpo/margin": 3.6544651985168457,
|
|
"fcm_dpo/q_t": 0.3828880786895752,
|
|
"grad_norm": 56.02330780029297,
|
|
"learning_rate": 1.3973071544233218e-09,
|
|
"logits/chosen": 0.2151576280593872,
|
|
"logits/rejected": 0.21160337328910828,
|
|
"logps/chosen": -77.8289566040039,
|
|
"logps/ref_chosen": -68.92927551269531,
|
|
"logps/ref_rejected": -70.85682678222656,
|
|
"logps/rejected": -83.41098022460938,
|
|
"loss": 1.1699,
|
|
"margin_dpo/margin_mean": 3.6544651985168457,
|
|
"margin_dpo/margin_std": 7.091136455535889,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9720332577475435,
|
|
"fcm_dpo/beta": 0.18671706318855286,
|
|
"fcm_dpo/delta": 0.19122450053691864,
|
|
"fcm_dpo/margin": 3.5908524990081787,
|
|
"fcm_dpo/q_t": 0.3652816712856293,
|
|
"grad_norm": 699.5745849609375,
|
|
"learning_rate": 1.261184375888541e-09,
|
|
"logits/chosen": 0.19385960698127747,
|
|
"logits/rejected": 0.12275560200214386,
|
|
"logps/chosen": -73.89380645751953,
|
|
"logps/ref_chosen": -65.30903625488281,
|
|
"logps/ref_rejected": -83.61613464355469,
|
|
"logps/rejected": -95.791748046875,
|
|
"loss": 1.2635,
|
|
"margin_dpo/margin_mean": 3.590852737426758,
|
|
"margin_dpo/margin_std": 8.115373611450195,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9735449735449735,
|
|
"fcm_dpo/beta": 0.1939758062362671,
|
|
"fcm_dpo/delta": 0.07629616558551788,
|
|
"fcm_dpo/margin": 4.0165934562683105,
|
|
"fcm_dpo/q_t": 0.36271920800209045,
|
|
"grad_norm": 54.096920013427734,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 0.3378884792327881,
|
|
"logits/rejected": 0.30602043867111206,
|
|
"logps/chosen": -58.58208084106445,
|
|
"logps/ref_chosen": -51.002601623535156,
|
|
"logps/ref_rejected": -64.46372985839844,
|
|
"logps/rejected": -76.05979919433594,
|
|
"loss": 1.1655,
|
|
"margin_dpo/margin_mean": 4.0165934562683105,
|
|
"margin_dpo/margin_std": 7.581402778625488,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.19180044531822205,
|
|
"fcm_dpo/delta": -0.06086551398038864,
|
|
"fcm_dpo/margin": 4.7189226150512695,
|
|
"fcm_dpo/q_t": 0.3323266804218292,
|
|
"grad_norm": 49.45684051513672,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 0.23067082464694977,
|
|
"logits/rejected": 0.2046801596879959,
|
|
"logps/chosen": -69.1575698852539,
|
|
"logps/ref_chosen": -60.963409423828125,
|
|
"logps/ref_rejected": -69.73353576660156,
|
|
"logps/rejected": -82.64661407470703,
|
|
"loss": 0.9268,
|
|
"margin_dpo/margin_mean": 4.7189226150512695,
|
|
"margin_dpo/margin_std": 6.106879234313965,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9765684051398337,
|
|
"fcm_dpo/beta": 0.18813073635101318,
|
|
"fcm_dpo/delta": -0.12552106380462646,
|
|
"fcm_dpo/margin": 5.120940208435059,
|
|
"fcm_dpo/q_t": 0.32650938630104065,
|
|
"grad_norm": 43.06781005859375,
|
|
"learning_rate": 8.945768539031783e-10,
|
|
"logits/chosen": 0.33291056752204895,
|
|
"logits/rejected": 0.28602612018585205,
|
|
"logps/chosen": -71.06059265136719,
|
|
"logps/ref_chosen": -62.290069580078125,
|
|
"logps/ref_rejected": -85.54812622070312,
|
|
"logps/rejected": -99.43959045410156,
|
|
"loss": 0.9471,
|
|
"margin_dpo/margin_mean": 5.120940208435059,
|
|
"margin_dpo/margin_std": 6.938012599945068,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9780801209372638,
|
|
"fcm_dpo/beta": 0.17964306473731995,
|
|
"fcm_dpo/delta": -0.3153572380542755,
|
|
"fcm_dpo/margin": 6.294256210327148,
|
|
"fcm_dpo/q_t": 0.29699230194091797,
|
|
"grad_norm": 52.842838287353516,
|
|
"learning_rate": 7.863060120144316e-10,
|
|
"logits/chosen": 0.3208690881729126,
|
|
"logits/rejected": 0.23945260047912598,
|
|
"logps/chosen": -75.46033477783203,
|
|
"logps/ref_chosen": -67.515869140625,
|
|
"logps/ref_rejected": -101.50871276855469,
|
|
"logps/rejected": -115.74742126464844,
|
|
"loss": 0.8364,
|
|
"margin_dpo/margin_mean": 6.294255256652832,
|
|
"margin_dpo/margin_std": 7.129534721374512,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9795918367346939,
|
|
"fcm_dpo/beta": 0.17466625571250916,
|
|
"fcm_dpo/delta": 0.014513436704874039,
|
|
"fcm_dpo/margin": 4.785521507263184,
|
|
"fcm_dpo/q_t": 0.33822280168533325,
|
|
"grad_norm": 44.56800842285156,
|
|
"learning_rate": 6.850062128694045e-10,
|
|
"logits/chosen": 0.24528364837169647,
|
|
"logits/rejected": 0.18555161356925964,
|
|
"logps/chosen": -73.12615203857422,
|
|
"logps/ref_chosen": -64.59593963623047,
|
|
"logps/ref_rejected": -83.384033203125,
|
|
"logps/rejected": -96.69976806640625,
|
|
"loss": 0.9935,
|
|
"margin_dpo/margin_mean": 4.785521507263184,
|
|
"margin_dpo/margin_std": 6.645514488220215,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.981103552532124,
|
|
"fcm_dpo/beta": 0.17323724925518036,
|
|
"fcm_dpo/delta": -0.04737182706594467,
|
|
"fcm_dpo/margin": 5.144677639007568,
|
|
"fcm_dpo/q_t": 0.3352319300174713,
|
|
"grad_norm": 47.017181396484375,
|
|
"learning_rate": 5.906802900412788e-10,
|
|
"logits/chosen": 0.2713914215564728,
|
|
"logits/rejected": 0.21891635656356812,
|
|
"logps/chosen": -56.810150146484375,
|
|
"logps/ref_chosen": -49.30964660644531,
|
|
"logps/ref_rejected": -73.73710632324219,
|
|
"logps/rejected": -86.38228607177734,
|
|
"loss": 0.9812,
|
|
"margin_dpo/margin_mean": 5.14467716217041,
|
|
"margin_dpo/margin_std": 7.023676872253418,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.17615830898284912,
|
|
"fcm_dpo/delta": 0.04753459244966507,
|
|
"fcm_dpo/margin": 4.5751237869262695,
|
|
"fcm_dpo/q_t": 0.3590129613876343,
|
|
"grad_norm": 56.19305419921875,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 0.3633171021938324,
|
|
"logits/rejected": 0.3100350499153137,
|
|
"logps/chosen": -62.552730560302734,
|
|
"logps/ref_chosen": -55.06325912475586,
|
|
"logps/ref_rejected": -77.39610290527344,
|
|
"logps/rejected": -89.460693359375,
|
|
"loss": 1.0783,
|
|
"margin_dpo/margin_mean": 4.575124740600586,
|
|
"margin_dpo/margin_std": 7.5110650062561035,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"fcm_dpo/beta": 0.17935200035572052,
|
|
"fcm_dpo/delta": 0.041574351489543915,
|
|
"fcm_dpo/margin": 4.506626129150391,
|
|
"fcm_dpo/q_t": 0.3520933985710144,
|
|
"grad_norm": 50.82936477661133,
|
|
"learning_rate": 4.2296043218295606e-10,
|
|
"logits/chosen": 0.35629215836524963,
|
|
"logits/rejected": 0.2943439483642578,
|
|
"logps/chosen": -61.30165481567383,
|
|
"logps/ref_chosen": -54.065162658691406,
|
|
"logps/ref_rejected": -77.79080200195312,
|
|
"logps/rejected": -89.53392791748047,
|
|
"loss": 0.9943,
|
|
"margin_dpo/margin_mean": 4.506626129150391,
|
|
"margin_dpo/margin_std": 6.297160625457764,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9856386999244142,
|
|
"fcm_dpo/beta": 0.17824706435203552,
|
|
"fcm_dpo/delta": 0.06212994083762169,
|
|
"fcm_dpo/margin": 4.44667911529541,
|
|
"fcm_dpo/q_t": 0.3550780117511749,
|
|
"grad_norm": 47.756290435791016,
|
|
"learning_rate": 3.4957118863768176e-10,
|
|
"logits/chosen": 0.30222636461257935,
|
|
"logits/rejected": 0.24972417950630188,
|
|
"logps/chosen": -72.32354736328125,
|
|
"logps/ref_chosen": -63.64030456542969,
|
|
"logps/ref_rejected": -78.86882019042969,
|
|
"logps/rejected": -91.99874114990234,
|
|
"loss": 1.0128,
|
|
"margin_dpo/margin_mean": 4.446678161621094,
|
|
"margin_dpo/margin_std": 6.5444793701171875,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9871504157218443,
|
|
"fcm_dpo/beta": 0.1795337200164795,
|
|
"fcm_dpo/delta": -0.0007160389795899391,
|
|
"fcm_dpo/margin": 4.738056182861328,
|
|
"fcm_dpo/q_t": 0.34943753480911255,
|
|
"grad_norm": 47.07265090942383,
|
|
"learning_rate": 2.831652042480093e-10,
|
|
"logits/chosen": 0.24882294237613678,
|
|
"logits/rejected": 0.20429188013076782,
|
|
"logps/chosen": -69.21574401855469,
|
|
"logps/ref_chosen": -61.668373107910156,
|
|
"logps/ref_rejected": -73.83012390136719,
|
|
"logps/rejected": -86.11554718017578,
|
|
"loss": 0.9995,
|
|
"margin_dpo/margin_mean": 4.73805570602417,
|
|
"margin_dpo/margin_std": 6.993717193603516,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.9886621315192744,
|
|
"fcm_dpo/beta": 0.1786322295665741,
|
|
"fcm_dpo/delta": 0.027579210698604584,
|
|
"fcm_dpo/margin": 4.607399940490723,
|
|
"fcm_dpo/q_t": 0.3561086058616638,
|
|
"grad_norm": 51.774452209472656,
|
|
"learning_rate": 2.2374433653205016e-10,
|
|
"logits/chosen": 0.26551055908203125,
|
|
"logits/rejected": 0.18796709179878235,
|
|
"logps/chosen": -65.25658416748047,
|
|
"logps/ref_chosen": -57.568267822265625,
|
|
"logps/ref_rejected": -87.74789428710938,
|
|
"logps/rejected": -100.04360961914062,
|
|
"loss": 1.0041,
|
|
"margin_dpo/margin_mean": 4.6073994636535645,
|
|
"margin_dpo/margin_std": 6.91198205947876,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.17491678893566132,
|
|
"fcm_dpo/delta": -0.10167790949344635,
|
|
"fcm_dpo/margin": 5.344045639038086,
|
|
"fcm_dpo/q_t": 0.3169988989830017,
|
|
"grad_norm": 36.369998931884766,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 0.2785993218421936,
|
|
"logits/rejected": 0.1956745982170105,
|
|
"logps/chosen": -58.907257080078125,
|
|
"logps/ref_chosen": -52.14714813232422,
|
|
"logps/ref_rejected": -80.85014343261719,
|
|
"logps/rejected": -92.95428466796875,
|
|
"loss": 0.8513,
|
|
"margin_dpo/margin_mean": 5.344045639038086,
|
|
"margin_dpo/margin_std": 5.551706314086914,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9916855631141346,
|
|
"fcm_dpo/beta": 0.17215146124362946,
|
|
"fcm_dpo/delta": -0.15092015266418457,
|
|
"fcm_dpo/margin": 5.719700813293457,
|
|
"fcm_dpo/q_t": 0.3209837079048157,
|
|
"grad_norm": 42.81301498413086,
|
|
"learning_rate": 1.2586440420372934e-10,
|
|
"logits/chosen": 0.256664901971817,
|
|
"logits/rejected": 0.20875748991966248,
|
|
"logps/chosen": -81.50572204589844,
|
|
"logps/ref_chosen": -73.25672912597656,
|
|
"logps/ref_rejected": -85.35127258300781,
|
|
"logps/rejected": -99.3199691772461,
|
|
"loss": 0.889,
|
|
"margin_dpo/margin_mean": 5.719700336456299,
|
|
"margin_dpo/margin_std": 6.941084861755371,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9931972789115646,
|
|
"fcm_dpo/beta": 0.16662049293518066,
|
|
"fcm_dpo/delta": -0.16033346951007843,
|
|
"fcm_dpo/margin": 5.956956386566162,
|
|
"fcm_dpo/q_t": 0.3180977702140808,
|
|
"grad_norm": 39.3922004699707,
|
|
"learning_rate": 8.740807750345913e-11,
|
|
"logits/chosen": 0.38615646958351135,
|
|
"logits/rejected": 0.30962836742401123,
|
|
"logps/chosen": -57.52777862548828,
|
|
"logps/ref_chosen": -49.72339630126953,
|
|
"logps/ref_rejected": -75.1568603515625,
|
|
"logps/rejected": -88.91819763183594,
|
|
"loss": 0.9139,
|
|
"margin_dpo/margin_mean": 5.956956386566162,
|
|
"margin_dpo/margin_std": 7.429527282714844,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9947089947089947,
|
|
"fcm_dpo/beta": 0.17064854502677917,
|
|
"fcm_dpo/delta": 0.09364507347345352,
|
|
"fcm_dpo/margin": 4.452823638916016,
|
|
"fcm_dpo/q_t": 0.36766210198402405,
|
|
"grad_norm": 46.99626922607422,
|
|
"learning_rate": 5.594234322453539e-11,
|
|
"logits/chosen": 0.3046637773513794,
|
|
"logits/rejected": 0.2689476013183594,
|
|
"logps/chosen": -71.40650939941406,
|
|
"logps/ref_chosen": -63.04634094238281,
|
|
"logps/ref_rejected": -83.44963073730469,
|
|
"logps/rejected": -96.26261901855469,
|
|
"loss": 1.124,
|
|
"margin_dpo/margin_mean": 4.452824115753174,
|
|
"margin_dpo/margin_std": 7.831037521362305,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9962207105064248,
|
|
"fcm_dpo/beta": 0.17275840044021606,
|
|
"fcm_dpo/delta": 0.1908574402332306,
|
|
"fcm_dpo/margin": 3.889401435852051,
|
|
"fcm_dpo/q_t": 0.3744069039821625,
|
|
"grad_norm": 44.81747817993164,
|
|
"learning_rate": 3.146808153123293e-11,
|
|
"logits/chosen": 0.3521784543991089,
|
|
"logits/rejected": 0.29007095098495483,
|
|
"logps/chosen": -63.78364562988281,
|
|
"logps/ref_chosen": -55.0802001953125,
|
|
"logps/ref_rejected": -71.91049194335938,
|
|
"logps/rejected": -84.50334167480469,
|
|
"loss": 1.1285,
|
|
"margin_dpo/margin_mean": 3.8894009590148926,
|
|
"margin_dpo/margin_std": 7.039710998535156,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.17371876537799835,
|
|
"fcm_dpo/delta": -0.0941925197839737,
|
|
"fcm_dpo/margin": 5.3846049308776855,
|
|
"fcm_dpo/q_t": 0.32256758213043213,
|
|
"grad_norm": 42.97410202026367,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 0.3720157742500305,
|
|
"logits/rejected": 0.3104935884475708,
|
|
"logps/chosen": -62.22294616699219,
|
|
"logps/ref_chosen": -54.525917053222656,
|
|
"logps/ref_rejected": -81.23604583740234,
|
|
"logps/rejected": -94.31768798828125,
|
|
"loss": 0.8823,
|
|
"margin_dpo/margin_mean": 5.384605407714844,
|
|
"margin_dpo/margin_std": 6.330369472503662,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"fcm_dpo/beta": 0.17523688077926636,
|
|
"fcm_dpo/delta": 0.0759362056851387,
|
|
"fcm_dpo/margin": 4.449112892150879,
|
|
"fcm_dpo/q_t": 0.3557879328727722,
|
|
"grad_norm": 45.270999908447266,
|
|
"learning_rate": 3.4965187065971735e-12,
|
|
"logits/chosen": 0.24594025313854218,
|
|
"logits/rejected": 0.17398732900619507,
|
|
"logps/chosen": -69.74138641357422,
|
|
"logps/ref_chosen": -60.37263870239258,
|
|
"logps/ref_rejected": -77.42874145507812,
|
|
"logps/rejected": -91.24661254882812,
|
|
"loss": 1.0872,
|
|
"margin_dpo/margin_mean": 4.449113845825195,
|
|
"margin_dpo/margin_std": 7.379522323608398,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.0861937751748378,
|
|
"train_runtime": 1754.5635,
|
|
"train_samples_per_second": 24.129,
|
|
"train_steps_per_second": 0.377
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|