Model: jackf857/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-0.6 Source: Original Platform
12654 lines
466 KiB
JSON
12654 lines
466 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 200,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.10617753118276596,
|
|
"fcm_dpo/delta": 0.2997117042541504,
|
|
"fcm_dpo/margin": -0.0013532638549804688,
|
|
"fcm_dpo/q_t": 0.5000401735305786,
|
|
"grad_norm": 29.968503952026367,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 0.13337239623069763,
|
|
"logits/rejected": 0.12492949515581131,
|
|
"logps/chosen": -64.5841293334961,
|
|
"logps/ref_chosen": -64.61280822753906,
|
|
"logps/ref_rejected": -64.17195129394531,
|
|
"logps/rejected": -64.14192199707031,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.0013527870178222656,
|
|
"margin_dpo/margin_std": 0.2561596930027008,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0030234315948601664,
|
|
"fcm_dpo/beta": 0.10940517485141754,
|
|
"fcm_dpo/delta": 0.2951034903526306,
|
|
"fcm_dpo/margin": 0.037450045347213745,
|
|
"fcm_dpo/q_t": 0.49900609254837036,
|
|
"grad_norm": 30.472522735595703,
|
|
"learning_rate": 7.462686567164179e-09,
|
|
"logits/chosen": 0.09414851665496826,
|
|
"logits/rejected": 0.07363267242908478,
|
|
"logps/chosen": -56.101890563964844,
|
|
"logps/ref_chosen": -56.0989990234375,
|
|
"logps/ref_rejected": -66.59971618652344,
|
|
"logps/rejected": -66.64006042480469,
|
|
"loss": 1.3823,
|
|
"margin_dpo/margin_mean": 0.03744968771934509,
|
|
"margin_dpo/margin_std": 0.27811938524246216,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0045351473922902496,
|
|
"fcm_dpo/beta": 0.1160772293806076,
|
|
"fcm_dpo/delta": 0.2968211770057678,
|
|
"fcm_dpo/margin": -0.018296539783477783,
|
|
"fcm_dpo/q_t": 0.5005147457122803,
|
|
"grad_norm": 36.36097717285156,
|
|
"learning_rate": 1.4925373134328357e-08,
|
|
"logits/chosen": 0.0993613749742508,
|
|
"logits/rejected": 0.061305850744247437,
|
|
"logps/chosen": -65.44808959960938,
|
|
"logps/ref_chosen": -65.45726013183594,
|
|
"logps/ref_rejected": -90.82853698730469,
|
|
"logps/rejected": -90.80107116699219,
|
|
"loss": 1.3885,
|
|
"margin_dpo/margin_mean": -0.01829671859741211,
|
|
"margin_dpo/margin_std": 0.29925334453582764,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.006046863189720333,
|
|
"fcm_dpo/beta": 0.12316329777240753,
|
|
"fcm_dpo/delta": 0.2957630753517151,
|
|
"fcm_dpo/margin": 0.005419373512268066,
|
|
"fcm_dpo/q_t": 0.49983808398246765,
|
|
"grad_norm": 42.36802673339844,
|
|
"learning_rate": 2.2388059701492534e-08,
|
|
"logits/chosen": 0.10537564754486084,
|
|
"logits/rejected": 0.08931504189968109,
|
|
"logps/chosen": -76.84124755859375,
|
|
"logps/ref_chosen": -76.86018371582031,
|
|
"logps/ref_rejected": -79.91523742675781,
|
|
"logps/rejected": -79.90172576904297,
|
|
"loss": 1.3859,
|
|
"margin_dpo/margin_mean": 0.005418956279754639,
|
|
"margin_dpo/margin_std": 0.36413294076919556,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.13464157283306122,
|
|
"fcm_dpo/delta": 0.2998310327529907,
|
|
"fcm_dpo/margin": -0.019110530614852905,
|
|
"fcm_dpo/q_t": 0.5006451606750488,
|
|
"grad_norm": 39.96297836303711,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 0.08226083219051361,
|
|
"logits/rejected": 0.04336044192314148,
|
|
"logps/chosen": -63.03343963623047,
|
|
"logps/ref_chosen": -62.97134017944336,
|
|
"logps/ref_rejected": -79.9192123413086,
|
|
"logps/rejected": -79.96220397949219,
|
|
"loss": 1.3894,
|
|
"margin_dpo/margin_mean": -0.019109666347503662,
|
|
"margin_dpo/margin_std": 0.33363407850265503,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.009070294784580499,
|
|
"fcm_dpo/beta": 0.14293737709522247,
|
|
"fcm_dpo/delta": 0.29895198345184326,
|
|
"fcm_dpo/margin": -0.03577873110771179,
|
|
"fcm_dpo/q_t": 0.501295268535614,
|
|
"grad_norm": 42.36745834350586,
|
|
"learning_rate": 3.731343283582089e-08,
|
|
"logits/chosen": 0.1481696516275406,
|
|
"logits/rejected": 0.10847893357276917,
|
|
"logps/chosen": -51.35020065307617,
|
|
"logps/ref_chosen": -51.30736541748047,
|
|
"logps/ref_rejected": -82.77239227294922,
|
|
"logps/rejected": -82.77944946289062,
|
|
"loss": 1.3922,
|
|
"margin_dpo/margin_mean": -0.035778701305389404,
|
|
"margin_dpo/margin_std": 0.3935219645500183,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010582010582010581,
|
|
"fcm_dpo/beta": 0.1473204642534256,
|
|
"fcm_dpo/delta": 0.2976083755493164,
|
|
"fcm_dpo/margin": 0.007407635450363159,
|
|
"fcm_dpo/q_t": 0.4997352957725525,
|
|
"grad_norm": 40.037208557128906,
|
|
"learning_rate": 4.477611940298507e-08,
|
|
"logits/chosen": 0.01973375305533409,
|
|
"logits/rejected": -0.024250730872154236,
|
|
"logps/chosen": -51.42656707763672,
|
|
"logps/ref_chosen": -51.45941162109375,
|
|
"logps/ref_rejected": -66.3828125,
|
|
"logps/rejected": -66.35736846923828,
|
|
"loss": 1.3854,
|
|
"margin_dpo/margin_mean": 0.007407456636428833,
|
|
"margin_dpo/margin_std": 0.24686771631240845,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.012093726379440665,
|
|
"fcm_dpo/beta": 0.16093291342258453,
|
|
"fcm_dpo/delta": 0.29529640078544617,
|
|
"fcm_dpo/margin": 0.01261255145072937,
|
|
"fcm_dpo/q_t": 0.4995649755001068,
|
|
"grad_norm": 46.08513259887695,
|
|
"learning_rate": 5.223880597014925e-08,
|
|
"logits/chosen": 0.10268443822860718,
|
|
"logits/rejected": 0.0798833817243576,
|
|
"logps/chosen": -62.18996810913086,
|
|
"logps/ref_chosen": -62.197547912597656,
|
|
"logps/ref_rejected": -74.66180419921875,
|
|
"logps/rejected": -74.66683959960938,
|
|
"loss": 1.385,
|
|
"margin_dpo/margin_mean": 0.012611836194992065,
|
|
"margin_dpo/margin_std": 0.323344886302948,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013605442176870748,
|
|
"fcm_dpo/beta": 0.1707511693239212,
|
|
"fcm_dpo/delta": 0.29609861969947815,
|
|
"fcm_dpo/margin": 0.01630309224128723,
|
|
"fcm_dpo/q_t": 0.49936288595199585,
|
|
"grad_norm": 53.85386657714844,
|
|
"learning_rate": 5.970149253731343e-08,
|
|
"logits/chosen": 0.175694078207016,
|
|
"logits/rejected": 0.11593098938465118,
|
|
"logps/chosen": -55.649192810058594,
|
|
"logps/ref_chosen": -55.629722595214844,
|
|
"logps/ref_rejected": -86.21221923828125,
|
|
"logps/rejected": -86.24800109863281,
|
|
"loss": 1.3845,
|
|
"margin_dpo/margin_mean": 0.016303330659866333,
|
|
"margin_dpo/margin_std": 0.35587644577026367,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.18108926713466644,
|
|
"fcm_dpo/delta": 0.2939136326313019,
|
|
"fcm_dpo/margin": -0.010755598545074463,
|
|
"fcm_dpo/q_t": 0.5005801916122437,
|
|
"grad_norm": 53.989009857177734,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 0.14876845479011536,
|
|
"logits/rejected": 0.11683596670627594,
|
|
"logps/chosen": -62.660911560058594,
|
|
"logps/ref_chosen": -62.69060134887695,
|
|
"logps/ref_rejected": -90.610107421875,
|
|
"logps/rejected": -90.56967163085938,
|
|
"loss": 1.3898,
|
|
"margin_dpo/margin_mean": -0.010755836963653564,
|
|
"margin_dpo/margin_std": 0.42465806007385254,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016628873771730914,
|
|
"fcm_dpo/beta": 0.19215525686740875,
|
|
"fcm_dpo/delta": 0.29656803607940674,
|
|
"fcm_dpo/margin": -0.036903828382492065,
|
|
"fcm_dpo/q_t": 0.5018194913864136,
|
|
"grad_norm": 56.99840545654297,
|
|
"learning_rate": 7.462686567164178e-08,
|
|
"logits/chosen": 0.0916949212551117,
|
|
"logits/rejected": 0.0850258320569992,
|
|
"logps/chosen": -65.7918701171875,
|
|
"logps/ref_chosen": -65.76712036132812,
|
|
"logps/ref_rejected": -72.4764633178711,
|
|
"logps/rejected": -72.46430969238281,
|
|
"loss": 1.3942,
|
|
"margin_dpo/margin_mean": -0.036903709173202515,
|
|
"margin_dpo/margin_std": 0.27903497219085693,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.018140589569160998,
|
|
"fcm_dpo/beta": 0.19215525686740875,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06204596161842346,
|
|
"fcm_dpo/q_t": 0.5029789209365845,
|
|
"grad_norm": 54.74998474121094,
|
|
"learning_rate": 8.208955223880596e-08,
|
|
"logits/chosen": -0.0007961541414260864,
|
|
"logits/rejected": -0.016335628926753998,
|
|
"logps/chosen": -60.76300048828125,
|
|
"logps/ref_chosen": -60.704891204833984,
|
|
"logps/ref_rejected": -69.41564178466797,
|
|
"logps/rejected": -69.41170501708984,
|
|
"loss": 1.399,
|
|
"margin_dpo/margin_mean": -0.06204575300216675,
|
|
"margin_dpo/margin_std": 0.28618353605270386,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.019652305366591082,
|
|
"fcm_dpo/beta": 0.19215525686740875,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06563130021095276,
|
|
"fcm_dpo/q_t": 0.5031505823135376,
|
|
"grad_norm": 56.53636932373047,
|
|
"learning_rate": 8.955223880597014e-08,
|
|
"logits/chosen": 0.13603587448596954,
|
|
"logits/rejected": 0.07090998440980911,
|
|
"logps/chosen": -49.936485290527344,
|
|
"logps/ref_chosen": -49.90925598144531,
|
|
"logps/ref_rejected": -92.37818145751953,
|
|
"logps/rejected": -92.33977508544922,
|
|
"loss": 1.3996,
|
|
"margin_dpo/margin_mean": -0.06563141942024231,
|
|
"margin_dpo/margin_std": 0.2707768678665161,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.021164021164021163,
|
|
"fcm_dpo/beta": 0.2097225785255432,
|
|
"fcm_dpo/delta": 0.5815231800079346,
|
|
"fcm_dpo/margin": 0.09396466612815857,
|
|
"fcm_dpo/q_t": 0.4953842759132385,
|
|
"grad_norm": 61.18081283569336,
|
|
"learning_rate": 9.701492537313432e-08,
|
|
"logits/chosen": 0.08565383404493332,
|
|
"logits/rejected": 0.06797105073928833,
|
|
"logps/chosen": -60.578643798828125,
|
|
"logps/ref_chosen": -60.61879348754883,
|
|
"logps/ref_rejected": -71.79306030273438,
|
|
"logps/rejected": -71.84687042236328,
|
|
"loss": 1.3676,
|
|
"margin_dpo/margin_mean": 0.09396436810493469,
|
|
"margin_dpo/margin_std": 0.2676827907562256,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.21585531532764435,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.01250794529914856,
|
|
"fcm_dpo/q_t": 0.5006709098815918,
|
|
"grad_norm": 72.49380493164062,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 0.07883284986019135,
|
|
"logits/rejected": 0.03507314249873161,
|
|
"logps/chosen": -63.48413848876953,
|
|
"logps/ref_chosen": -63.46953582763672,
|
|
"logps/ref_rejected": -88.88951110839844,
|
|
"logps/rejected": -88.89160919189453,
|
|
"loss": 1.3902,
|
|
"margin_dpo/margin_mean": -0.01250794529914856,
|
|
"margin_dpo/margin_std": 0.3276939392089844,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02418745275888133,
|
|
"fcm_dpo/beta": 0.22911374270915985,
|
|
"fcm_dpo/delta": 0.29805102944374084,
|
|
"fcm_dpo/margin": -0.0001682192087173462,
|
|
"fcm_dpo/q_t": 0.5000435709953308,
|
|
"grad_norm": 61.602413177490234,
|
|
"learning_rate": 1.1194029850746268e-07,
|
|
"logits/chosen": 0.10727139562368393,
|
|
"logits/rejected": 0.07074932754039764,
|
|
"logps/chosen": -46.52628707885742,
|
|
"logps/ref_chosen": -46.53229904174805,
|
|
"logps/ref_rejected": -74.27533721923828,
|
|
"logps/rejected": -74.2691650390625,
|
|
"loss": 1.3874,
|
|
"margin_dpo/margin_mean": -0.00016827881336212158,
|
|
"margin_dpo/margin_std": 0.2899588346481323,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.025699168556311415,
|
|
"fcm_dpo/beta": 0.23615270853042603,
|
|
"fcm_dpo/delta": 0.2981564998626709,
|
|
"fcm_dpo/margin": -0.013454735279083252,
|
|
"fcm_dpo/q_t": 0.5007691979408264,
|
|
"grad_norm": 77.29975128173828,
|
|
"learning_rate": 1.1940298507462686e-07,
|
|
"logits/chosen": 0.05410391837358475,
|
|
"logits/rejected": 0.03542998805642128,
|
|
"logps/chosen": -64.09270477294922,
|
|
"logps/ref_chosen": -64.07783508300781,
|
|
"logps/ref_rejected": -86.40876770019531,
|
|
"logps/rejected": -86.41018676757812,
|
|
"loss": 1.3906,
|
|
"margin_dpo/margin_mean": -0.013455450534820557,
|
|
"margin_dpo/margin_std": 0.2995184659957886,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.027210884353741496,
|
|
"fcm_dpo/beta": 0.26530909538269043,
|
|
"fcm_dpo/delta": 0.5765583515167236,
|
|
"fcm_dpo/margin": 0.0933290421962738,
|
|
"fcm_dpo/q_t": 0.49414387345314026,
|
|
"grad_norm": 73.40933227539062,
|
|
"learning_rate": 1.2686567164179106e-07,
|
|
"logits/chosen": 0.07863786071538925,
|
|
"logits/rejected": 0.03418922424316406,
|
|
"logps/chosen": -44.8294563293457,
|
|
"logps/ref_chosen": -44.87433624267578,
|
|
"logps/ref_rejected": -70.97604370117188,
|
|
"logps/rejected": -71.02449798583984,
|
|
"loss": 1.363,
|
|
"margin_dpo/margin_mean": 0.09332945942878723,
|
|
"margin_dpo/margin_std": 0.28065070509910583,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02872260015117158,
|
|
"fcm_dpo/beta": 0.2983761131763458,
|
|
"fcm_dpo/delta": 0.5883051156997681,
|
|
"fcm_dpo/margin": 0.04047667980194092,
|
|
"fcm_dpo/q_t": 0.49707770347595215,
|
|
"grad_norm": 92.44178009033203,
|
|
"learning_rate": 1.343283582089552e-07,
|
|
"logits/chosen": 0.06618274748325348,
|
|
"logits/rejected": 0.05268959701061249,
|
|
"logps/chosen": -68.09764099121094,
|
|
"logps/ref_chosen": -68.1598129272461,
|
|
"logps/ref_rejected": -81.17138671875,
|
|
"logps/rejected": -81.14969635009766,
|
|
"loss": 1.3764,
|
|
"margin_dpo/margin_mean": 0.04047694802284241,
|
|
"margin_dpo/margin_std": 0.33094900846481323,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.32533735036849976,
|
|
"fcm_dpo/delta": 0.29020002484321594,
|
|
"fcm_dpo/margin": 0.029502317309379578,
|
|
"fcm_dpo/q_t": 0.4977492094039917,
|
|
"grad_norm": 94.25809478759766,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 0.16051048040390015,
|
|
"logits/rejected": 0.1364545077085495,
|
|
"logps/chosen": -53.649314880371094,
|
|
"logps/ref_chosen": -53.67856216430664,
|
|
"logps/ref_rejected": -74.16911315917969,
|
|
"logps/rejected": -74.16937255859375,
|
|
"loss": 1.3783,
|
|
"margin_dpo/margin_mean": 0.029502198100090027,
|
|
"margin_dpo/margin_std": 0.23968058824539185,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"fcm_dpo/beta": 0.33534562587738037,
|
|
"fcm_dpo/delta": 0.2985358238220215,
|
|
"fcm_dpo/margin": -0.01896098256111145,
|
|
"fcm_dpo/q_t": 0.501542866230011,
|
|
"grad_norm": 98.49360656738281,
|
|
"learning_rate": 1.4925373134328355e-07,
|
|
"logits/chosen": 0.0989365428686142,
|
|
"logits/rejected": 0.07379420101642609,
|
|
"logps/chosen": -64.69507598876953,
|
|
"logps/ref_chosen": -64.70155334472656,
|
|
"logps/ref_rejected": -81.02095031738281,
|
|
"logps/rejected": -80.9955062866211,
|
|
"loss": 1.395,
|
|
"margin_dpo/margin_mean": -0.018961191177368164,
|
|
"margin_dpo/margin_std": 0.303374707698822,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.03325774754346183,
|
|
"fcm_dpo/beta": 0.37750545144081116,
|
|
"fcm_dpo/delta": 0.589049756526947,
|
|
"fcm_dpo/margin": 0.03040817379951477,
|
|
"fcm_dpo/q_t": 0.49726974964141846,
|
|
"grad_norm": 108.48323059082031,
|
|
"learning_rate": 1.5671641791044775e-07,
|
|
"logits/chosen": 0.0004040743806399405,
|
|
"logits/rejected": -0.020364250987768173,
|
|
"logps/chosen": -58.048194885253906,
|
|
"logps/ref_chosen": -58.03599166870117,
|
|
"logps/ref_rejected": -80.72721862792969,
|
|
"logps/rejected": -80.76982116699219,
|
|
"loss": 1.3781,
|
|
"margin_dpo/margin_mean": 0.030408114194869995,
|
|
"margin_dpo/margin_std": 0.3076469302177429,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.03476946334089191,
|
|
"fcm_dpo/beta": 0.42479178309440613,
|
|
"fcm_dpo/delta": 0.5902184844017029,
|
|
"fcm_dpo/margin": 0.024062812328338623,
|
|
"fcm_dpo/q_t": 0.49758607149124146,
|
|
"grad_norm": 138.24923706054688,
|
|
"learning_rate": 1.6417910447761193e-07,
|
|
"logits/chosen": 0.1478240191936493,
|
|
"logits/rejected": 0.1219228208065033,
|
|
"logps/chosen": -66.36636352539062,
|
|
"logps/ref_chosen": -66.35608673095703,
|
|
"logps/ref_rejected": -93.02769470214844,
|
|
"logps/rejected": -93.06202697753906,
|
|
"loss": 1.3801,
|
|
"margin_dpo/margin_mean": 0.024062126874923706,
|
|
"margin_dpo/margin_std": 0.30408918857574463,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.036281179138321996,
|
|
"fcm_dpo/beta": 0.43721428513526917,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.05492676794528961,
|
|
"fcm_dpo/q_t": 0.5059708952903748,
|
|
"grad_norm": 117.01378631591797,
|
|
"learning_rate": 1.716417910447761e-07,
|
|
"logits/chosen": 0.13261155784130096,
|
|
"logits/rejected": 0.09979216754436493,
|
|
"logps/chosen": -54.479827880859375,
|
|
"logps/ref_chosen": -54.461238861083984,
|
|
"logps/ref_rejected": -68.33817291259766,
|
|
"logps/rejected": -68.30183410644531,
|
|
"loss": 1.4136,
|
|
"margin_dpo/margin_mean": -0.054926902055740356,
|
|
"margin_dpo/margin_std": 0.2555236518383026,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 0.47826117277145386,
|
|
"fcm_dpo/delta": 0.5947252511978149,
|
|
"fcm_dpo/margin": 0.011602401733398438,
|
|
"fcm_dpo/q_t": 0.4986857771873474,
|
|
"grad_norm": 141.31944274902344,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 0.13031867146492004,
|
|
"logits/rejected": 0.07699558138847351,
|
|
"logps/chosen": -60.027915954589844,
|
|
"logps/ref_chosen": -60.00420379638672,
|
|
"logps/ref_rejected": -90.47376251220703,
|
|
"logps/rejected": -90.50907897949219,
|
|
"loss": 1.3869,
|
|
"margin_dpo/margin_mean": 0.011602312326431274,
|
|
"margin_dpo/margin_std": 0.3307107388973236,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.039304610733182165,
|
|
"fcm_dpo/beta": 0.522553026676178,
|
|
"fcm_dpo/delta": 0.2967897057533264,
|
|
"fcm_dpo/margin": -0.020512163639068604,
|
|
"fcm_dpo/q_t": 0.5027469396591187,
|
|
"grad_norm": 156.2456512451172,
|
|
"learning_rate": 1.8656716417910447e-07,
|
|
"logits/chosen": 0.10858827084302902,
|
|
"logits/rejected": 0.09025729447603226,
|
|
"logps/chosen": -56.84931945800781,
|
|
"logps/ref_chosen": -56.81915283203125,
|
|
"logps/ref_rejected": -77.84333038330078,
|
|
"logps/rejected": -77.85298156738281,
|
|
"loss": 1.4085,
|
|
"margin_dpo/margin_mean": -0.020512670278549194,
|
|
"margin_dpo/margin_std": 0.4012720584869385,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.04081632653061224,
|
|
"fcm_dpo/beta": 0.522553026676178,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.018406003713607788,
|
|
"fcm_dpo/q_t": 0.5024095177650452,
|
|
"grad_norm": 150.67922973632812,
|
|
"learning_rate": 1.9402985074626865e-07,
|
|
"logits/chosen": 0.0803753212094307,
|
|
"logits/rejected": 0.056419409811496735,
|
|
"logps/chosen": -62.89083480834961,
|
|
"logps/ref_chosen": -62.87702560424805,
|
|
"logps/ref_rejected": -71.34437561035156,
|
|
"logps/rejected": -71.33978271484375,
|
|
"loss": 1.4023,
|
|
"margin_dpo/margin_mean": -0.018405765295028687,
|
|
"margin_dpo/margin_std": 0.3038307726383209,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.042328042328042326,
|
|
"fcm_dpo/beta": 0.5533031821250916,
|
|
"fcm_dpo/delta": 0.2858981192111969,
|
|
"fcm_dpo/margin": 0.021839946508407593,
|
|
"fcm_dpo/q_t": 0.4972037672996521,
|
|
"grad_norm": 153.09165954589844,
|
|
"learning_rate": 2.0149253731343282e-07,
|
|
"logits/chosen": 0.053669240325689316,
|
|
"logits/rejected": 0.045026686042547226,
|
|
"logps/chosen": -59.809776306152344,
|
|
"logps/ref_chosen": -59.8333740234375,
|
|
"logps/ref_rejected": -70.39804077148438,
|
|
"logps/rejected": -70.39627838134766,
|
|
"loss": 1.3808,
|
|
"margin_dpo/margin_mean": 0.021840453147888184,
|
|
"margin_dpo/margin_std": 0.2902475595474243,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.04383975812547241,
|
|
"fcm_dpo/beta": 0.5869318842887878,
|
|
"fcm_dpo/delta": 0.2950134873390198,
|
|
"fcm_dpo/margin": -0.0369829386472702,
|
|
"fcm_dpo/q_t": 0.5054592490196228,
|
|
"grad_norm": 198.2787628173828,
|
|
"learning_rate": 2.08955223880597e-07,
|
|
"logits/chosen": 0.14541292190551758,
|
|
"logits/rejected": 0.12754370272159576,
|
|
"logps/chosen": -74.16682434082031,
|
|
"logps/ref_chosen": -74.12020111083984,
|
|
"logps/ref_rejected": -83.33099365234375,
|
|
"logps/rejected": -83.34062957763672,
|
|
"loss": 1.4175,
|
|
"margin_dpo/margin_mean": -0.03698325157165527,
|
|
"margin_dpo/margin_std": 0.3256310224533081,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 0.603141188621521,
|
|
"fcm_dpo/delta": 0.2688133418560028,
|
|
"fcm_dpo/margin": 0.046625733375549316,
|
|
"fcm_dpo/q_t": 0.4932544231414795,
|
|
"grad_norm": 177.89276123046875,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 0.1190497875213623,
|
|
"logits/rejected": 0.06499116122722626,
|
|
"logps/chosen": -50.79059600830078,
|
|
"logps/ref_chosen": -50.75128936767578,
|
|
"logps/ref_rejected": -89.29063415527344,
|
|
"logps/rejected": -89.3765640258789,
|
|
"loss": 1.3655,
|
|
"margin_dpo/margin_mean": 0.04662585258483887,
|
|
"margin_dpo/margin_std": 0.29222309589385986,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04686318972033258,
|
|
"fcm_dpo/beta": 0.6370232105255127,
|
|
"fcm_dpo/delta": 0.27749747037887573,
|
|
"fcm_dpo/margin": 0.0308951735496521,
|
|
"fcm_dpo/q_t": 0.4952976703643799,
|
|
"grad_norm": 219.1363983154297,
|
|
"learning_rate": 2.2388059701492537e-07,
|
|
"logits/chosen": 0.11047565937042236,
|
|
"logits/rejected": 0.06402811408042908,
|
|
"logps/chosen": -65.35533905029297,
|
|
"logps/ref_chosen": -65.33675384521484,
|
|
"logps/ref_rejected": -100.76666259765625,
|
|
"logps/rejected": -100.8161392211914,
|
|
"loss": 1.3758,
|
|
"margin_dpo/margin_mean": 0.03089618682861328,
|
|
"margin_dpo/margin_std": 0.30930471420288086,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04837490551776266,
|
|
"fcm_dpo/beta": 0.6727170944213867,
|
|
"fcm_dpo/delta": 0.26795029640197754,
|
|
"fcm_dpo/margin": 0.04653581976890564,
|
|
"fcm_dpo/q_t": 0.49249711632728577,
|
|
"grad_norm": 198.70797729492188,
|
|
"learning_rate": 2.3134328358208954e-07,
|
|
"logits/chosen": 0.08043690025806427,
|
|
"logits/rejected": 0.07259618490934372,
|
|
"logps/chosen": -67.17460632324219,
|
|
"logps/ref_chosen": -67.18333435058594,
|
|
"logps/ref_rejected": -82.80763244628906,
|
|
"logps/rejected": -82.84544372558594,
|
|
"loss": 1.3664,
|
|
"margin_dpo/margin_mean": 0.04653611779212952,
|
|
"margin_dpo/margin_std": 0.3230898380279541,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.049886621315192746,
|
|
"fcm_dpo/beta": 0.7546996474266052,
|
|
"fcm_dpo/delta": 0.5825432538986206,
|
|
"fcm_dpo/margin": 0.023887306451797485,
|
|
"fcm_dpo/q_t": 0.4956985116004944,
|
|
"grad_norm": 240.79859924316406,
|
|
"learning_rate": 2.388059701492537e-07,
|
|
"logits/chosen": 0.028976380825042725,
|
|
"logits/rejected": 0.0030624661594629288,
|
|
"logps/chosen": -64.09410095214844,
|
|
"logps/ref_chosen": -64.03948211669922,
|
|
"logps/ref_rejected": -75.68357849121094,
|
|
"logps/rejected": -75.76209259033203,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.023888081312179565,
|
|
"margin_dpo/margin_std": 0.3596636652946472,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.05139833711262283,
|
|
"fcm_dpo/beta": 0.8192043900489807,
|
|
"fcm_dpo/delta": 0.27031949162483215,
|
|
"fcm_dpo/margin": 0.035017967224121094,
|
|
"fcm_dpo/q_t": 0.49334418773651123,
|
|
"grad_norm": 234.72634887695312,
|
|
"learning_rate": 2.4626865671641786e-07,
|
|
"logits/chosen": 0.09738805890083313,
|
|
"logits/rejected": 0.067594513297081,
|
|
"logps/chosen": -53.69878387451172,
|
|
"logps/ref_chosen": -53.6642951965332,
|
|
"logps/ref_rejected": -65.77989959716797,
|
|
"logps/rejected": -65.84939575195312,
|
|
"loss": 1.3704,
|
|
"margin_dpo/margin_mean": 0.03501778841018677,
|
|
"margin_dpo/margin_std": 0.27197498083114624,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 0.8430140018463135,
|
|
"fcm_dpo/delta": 0.2825099527835846,
|
|
"fcm_dpo/margin": 0.009096980094909668,
|
|
"fcm_dpo/q_t": 0.498089462518692,
|
|
"grad_norm": 236.1203155517578,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 0.05875828117132187,
|
|
"logits/rejected": 0.03618273511528969,
|
|
"logps/chosen": -61.09056854248047,
|
|
"logps/ref_chosen": -61.01686096191406,
|
|
"logps/ref_rejected": -72.78598022460938,
|
|
"logps/rejected": -72.86878967285156,
|
|
"loss": 1.3968,
|
|
"margin_dpo/margin_mean": 0.009096503257751465,
|
|
"margin_dpo/margin_std": 0.3263735771179199,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05442176870748299,
|
|
"fcm_dpo/beta": 0.8931436538696289,
|
|
"fcm_dpo/delta": 0.2947741150856018,
|
|
"fcm_dpo/margin": 0.0023790299892425537,
|
|
"fcm_dpo/q_t": 0.49950528144836426,
|
|
"grad_norm": 255.0270538330078,
|
|
"learning_rate": 2.611940298507462e-07,
|
|
"logits/chosen": 0.10544905811548233,
|
|
"logits/rejected": 0.05225067213177681,
|
|
"logps/chosen": -50.6300048828125,
|
|
"logps/ref_chosen": -50.53736114501953,
|
|
"logps/ref_rejected": -78.11678314208984,
|
|
"logps/rejected": -78.21180725097656,
|
|
"loss": 1.4039,
|
|
"margin_dpo/margin_mean": 0.002379119396209717,
|
|
"margin_dpo/margin_std": 0.3160307705402374,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.055933484504913075,
|
|
"fcm_dpo/beta": 0.9926947951316833,
|
|
"fcm_dpo/delta": 0.5041809678077698,
|
|
"fcm_dpo/margin": 0.10119250416755676,
|
|
"fcm_dpo/q_t": 0.47944971919059753,
|
|
"grad_norm": 356.8680725097656,
|
|
"learning_rate": 2.686567164179104e-07,
|
|
"logits/chosen": 0.08121801912784576,
|
|
"logits/rejected": 0.0041434187442064285,
|
|
"logps/chosen": -59.60346221923828,
|
|
"logps/ref_chosen": -59.55394744873047,
|
|
"logps/ref_rejected": -108.27702331542969,
|
|
"logps/rejected": -108.427734375,
|
|
"loss": 1.3356,
|
|
"margin_dpo/margin_mean": 0.10119268298149109,
|
|
"margin_dpo/margin_std": 0.44900059700012207,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.05744520030234316,
|
|
"fcm_dpo/beta": 1.0603744983673096,
|
|
"fcm_dpo/delta": 0.20875374972820282,
|
|
"fcm_dpo/margin": 0.05771473050117493,
|
|
"fcm_dpo/q_t": 0.4865277409553528,
|
|
"grad_norm": 308.198974609375,
|
|
"learning_rate": 2.761194029850746e-07,
|
|
"logits/chosen": 0.07032038271427155,
|
|
"logits/rejected": 0.0563180111348629,
|
|
"logps/chosen": -65.86856079101562,
|
|
"logps/ref_chosen": -65.78836059570312,
|
|
"logps/ref_rejected": -76.1619873046875,
|
|
"logps/rejected": -76.29991149902344,
|
|
"loss": 1.3607,
|
|
"margin_dpo/margin_mean": 0.05771511793136597,
|
|
"margin_dpo/margin_std": 0.3312731385231018,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05895691609977324,
|
|
"fcm_dpo/beta": 1.1480540037155151,
|
|
"fcm_dpo/delta": 0.5104781985282898,
|
|
"fcm_dpo/margin": 0.08076709508895874,
|
|
"fcm_dpo/q_t": 0.47740280628204346,
|
|
"grad_norm": 329.3815612792969,
|
|
"learning_rate": 2.8358208955223876e-07,
|
|
"logits/chosen": 0.1415819525718689,
|
|
"logits/rejected": 0.11533431708812714,
|
|
"logps/chosen": -57.262725830078125,
|
|
"logps/ref_chosen": -57.17681121826172,
|
|
"logps/ref_rejected": -79.486328125,
|
|
"logps/rejected": -79.65301513671875,
|
|
"loss": 1.3441,
|
|
"margin_dpo/margin_mean": 0.08076697587966919,
|
|
"margin_dpo/margin_std": 0.3901514708995819,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 1.2407445907592773,
|
|
"fcm_dpo/delta": 0.2749688923358917,
|
|
"fcm_dpo/margin": 0.0019149184226989746,
|
|
"fcm_dpo/q_t": 0.49921295046806335,
|
|
"grad_norm": 422.4781188964844,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 0.1148887425661087,
|
|
"logits/rejected": 0.06507512927055359,
|
|
"logps/chosen": -61.413368225097656,
|
|
"logps/ref_chosen": -61.33416748046875,
|
|
"logps/ref_rejected": -79.10697174072266,
|
|
"logps/rejected": -79.1880874633789,
|
|
"loss": 1.4165,
|
|
"margin_dpo/margin_mean": 0.0019148588180541992,
|
|
"margin_dpo/margin_std": 0.2892666459083557,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06198034769463341,
|
|
"fcm_dpo/beta": 1.3399341106414795,
|
|
"fcm_dpo/delta": 0.49436837434768677,
|
|
"fcm_dpo/margin": 0.08180907368659973,
|
|
"fcm_dpo/q_t": 0.4749258756637573,
|
|
"grad_norm": 407.1304626464844,
|
|
"learning_rate": 2.985074626865671e-07,
|
|
"logits/chosen": 0.05015815421938896,
|
|
"logits/rejected": 0.03012828528881073,
|
|
"logps/chosen": -67.65704345703125,
|
|
"logps/ref_chosen": -67.5467300415039,
|
|
"logps/ref_rejected": -83.87788391113281,
|
|
"logps/rejected": -84.07000732421875,
|
|
"loss": 1.344,
|
|
"margin_dpo/margin_mean": 0.08180946111679077,
|
|
"margin_dpo/margin_std": 0.386926531791687,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"fcm_dpo/beta": 1.4246625900268555,
|
|
"fcm_dpo/delta": 0.19674822688102722,
|
|
"fcm_dpo/margin": 0.06773808598518372,
|
|
"fcm_dpo/q_t": 0.47795066237449646,
|
|
"grad_norm": 400.0076599121094,
|
|
"learning_rate": 3.059701492537313e-07,
|
|
"logits/chosen": 0.0690753310918808,
|
|
"logits/rejected": 0.046986717730760574,
|
|
"logps/chosen": -61.381797790527344,
|
|
"logps/ref_chosen": -61.26485824584961,
|
|
"logps/ref_rejected": -76.3629150390625,
|
|
"logps/rejected": -76.54759216308594,
|
|
"loss": 1.3368,
|
|
"margin_dpo/margin_mean": 0.067737877368927,
|
|
"margin_dpo/margin_std": 0.28877195715904236,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.06500377928949358,
|
|
"fcm_dpo/beta": 1.5338796377182007,
|
|
"fcm_dpo/delta": 0.48754990100860596,
|
|
"fcm_dpo/margin": 0.07686775922775269,
|
|
"fcm_dpo/q_t": 0.4750903844833374,
|
|
"grad_norm": 511.4953308105469,
|
|
"learning_rate": 3.134328358208955e-07,
|
|
"logits/chosen": 0.08270905166864395,
|
|
"logits/rejected": 0.07184214890003204,
|
|
"logps/chosen": -71.89559936523438,
|
|
"logps/ref_chosen": -71.80902862548828,
|
|
"logps/ref_rejected": -81.12464141845703,
|
|
"logps/rejected": -81.28807830810547,
|
|
"loss": 1.3775,
|
|
"margin_dpo/margin_mean": 0.07686793804168701,
|
|
"margin_dpo/margin_std": 0.4142921566963196,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06651549508692366,
|
|
"fcm_dpo/beta": 1.6183886528015137,
|
|
"fcm_dpo/delta": 0.29549404978752136,
|
|
"fcm_dpo/margin": -0.0225009024143219,
|
|
"fcm_dpo/q_t": 0.5110141038894653,
|
|
"grad_norm": 553.7533569335938,
|
|
"learning_rate": 3.2089552238805965e-07,
|
|
"logits/chosen": 0.05938286706805229,
|
|
"logits/rejected": 0.028474589809775352,
|
|
"logps/chosen": -66.68466186523438,
|
|
"logps/ref_chosen": -66.55043029785156,
|
|
"logps/ref_rejected": -85.06198120117188,
|
|
"logps/rejected": -85.1737060546875,
|
|
"loss": 1.4972,
|
|
"margin_dpo/margin_mean": -0.022500991821289062,
|
|
"margin_dpo/margin_std": 0.346932590007782,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 1.7693145275115967,
|
|
"fcm_dpo/delta": 0.4276418685913086,
|
|
"fcm_dpo/margin": 0.102606400847435,
|
|
"fcm_dpo/q_t": 0.45728254318237305,
|
|
"grad_norm": 532.6951293945312,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 0.1108519434928894,
|
|
"logits/rejected": 0.058622151613235474,
|
|
"logps/chosen": -62.343910217285156,
|
|
"logps/ref_chosen": -62.24385452270508,
|
|
"logps/ref_rejected": -92.96665954589844,
|
|
"logps/rejected": -93.16932678222656,
|
|
"loss": 1.3183,
|
|
"margin_dpo/margin_mean": 0.10260695219039917,
|
|
"margin_dpo/margin_std": 0.3721820116043091,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06953892668178382,
|
|
"fcm_dpo/beta": 1.9173414707183838,
|
|
"fcm_dpo/delta": 0.3439553380012512,
|
|
"fcm_dpo/margin": 0.13755828142166138,
|
|
"fcm_dpo/q_t": 0.4469168782234192,
|
|
"grad_norm": 541.6246337890625,
|
|
"learning_rate": 3.3582089552238805e-07,
|
|
"logits/chosen": 0.11820603907108307,
|
|
"logits/rejected": 0.07246644049882889,
|
|
"logps/chosen": -61.56231689453125,
|
|
"logps/ref_chosen": -61.498905181884766,
|
|
"logps/ref_rejected": -78.91172790527344,
|
|
"logps/rejected": -79.11270141601562,
|
|
"loss": 1.2947,
|
|
"margin_dpo/margin_mean": 0.13755744695663452,
|
|
"margin_dpo/margin_std": 0.42911720275878906,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0710506424792139,
|
|
"fcm_dpo/beta": 1.999995231628418,
|
|
"fcm_dpo/delta": 0.20687083899974823,
|
|
"fcm_dpo/margin": 0.20091423392295837,
|
|
"fcm_dpo/q_t": 0.410871297121048,
|
|
"grad_norm": 481.5821533203125,
|
|
"learning_rate": 3.432835820895522e-07,
|
|
"logits/chosen": 0.04726361110806465,
|
|
"logits/rejected": 0.0036482480354607105,
|
|
"logps/chosen": -51.672603607177734,
|
|
"logps/ref_chosen": -51.578346252441406,
|
|
"logps/ref_rejected": -68.2215576171875,
|
|
"logps/rejected": -68.51673889160156,
|
|
"loss": 1.1136,
|
|
"margin_dpo/margin_mean": 0.200914204120636,
|
|
"margin_dpo/margin_std": 0.30879902839660645,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07256235827664399,
|
|
"fcm_dpo/beta": 2.1475226879119873,
|
|
"fcm_dpo/delta": 0.29038742184638977,
|
|
"fcm_dpo/margin": 0.0035857856273651123,
|
|
"fcm_dpo/q_t": 0.5014243125915527,
|
|
"grad_norm": 611.6590576171875,
|
|
"learning_rate": 3.507462686567164e-07,
|
|
"logits/chosen": 0.14877364039421082,
|
|
"logits/rejected": 0.11894898861646652,
|
|
"logps/chosen": -51.974037170410156,
|
|
"logps/ref_chosen": -51.79365158081055,
|
|
"logps/ref_rejected": -64.22503662109375,
|
|
"logps/rejected": -64.40901184082031,
|
|
"loss": 1.5324,
|
|
"margin_dpo/margin_mean": 0.0035860538482666016,
|
|
"margin_dpo/margin_std": 0.3811490833759308,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07407407407407407,
|
|
"fcm_dpo/beta": 2.2708346843719482,
|
|
"fcm_dpo/delta": 0.3342975378036499,
|
|
"fcm_dpo/margin": 0.11987686157226562,
|
|
"fcm_dpo/q_t": 0.4392061233520508,
|
|
"grad_norm": 582.0693969726562,
|
|
"learning_rate": 3.5820895522388055e-07,
|
|
"logits/chosen": 0.02266620472073555,
|
|
"logits/rejected": 0.0014079846441745758,
|
|
"logps/chosen": -58.300750732421875,
|
|
"logps/ref_chosen": -58.13460159301758,
|
|
"logps/ref_rejected": -64.63206481933594,
|
|
"logps/rejected": -64.9180908203125,
|
|
"loss": 1.3162,
|
|
"margin_dpo/margin_mean": 0.11987724900245667,
|
|
"margin_dpo/margin_std": 0.40768203139305115,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 2.3943064212799072,
|
|
"fcm_dpo/delta": 0.2095961570739746,
|
|
"fcm_dpo/margin": 0.03892210125923157,
|
|
"fcm_dpo/q_t": 0.48075801134109497,
|
|
"grad_norm": 678.2787475585938,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 0.11922754347324371,
|
|
"logits/rejected": 0.08905205130577087,
|
|
"logps/chosen": -53.13764190673828,
|
|
"logps/ref_chosen": -52.85643768310547,
|
|
"logps/ref_rejected": -72.17460632324219,
|
|
"logps/rejected": -72.49472045898438,
|
|
"loss": 1.4695,
|
|
"margin_dpo/margin_mean": 0.03892248868942261,
|
|
"margin_dpo/margin_std": 0.3610963225364685,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07709750566893424,
|
|
"fcm_dpo/beta": 2.45587158203125,
|
|
"fcm_dpo/delta": 0.1764744520187378,
|
|
"fcm_dpo/margin": 0.17561078071594238,
|
|
"fcm_dpo/q_t": 0.4224376082420349,
|
|
"grad_norm": 632.7899780273438,
|
|
"learning_rate": 3.7313432835820895e-07,
|
|
"logits/chosen": 0.06935389339923859,
|
|
"logits/rejected": 0.042039111256599426,
|
|
"logps/chosen": -63.871707916259766,
|
|
"logps/ref_chosen": -63.65644073486328,
|
|
"logps/ref_rejected": -86.13229370117188,
|
|
"logps/rejected": -86.52317810058594,
|
|
"loss": 1.303,
|
|
"margin_dpo/margin_mean": 0.17561128735542297,
|
|
"margin_dpo/margin_std": 0.49183645844459534,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.07860922146636433,
|
|
"fcm_dpo/beta": 2.520862102508545,
|
|
"fcm_dpo/delta": 0.10279199481010437,
|
|
"fcm_dpo/margin": 0.19924332201480865,
|
|
"fcm_dpo/q_t": 0.40319594740867615,
|
|
"grad_norm": 748.8369140625,
|
|
"learning_rate": 3.805970149253731e-07,
|
|
"logits/chosen": 0.0923246219754219,
|
|
"logits/rejected": 0.04198576882481575,
|
|
"logps/chosen": -68.06822204589844,
|
|
"logps/ref_chosen": -67.8402099609375,
|
|
"logps/ref_rejected": -96.97090911865234,
|
|
"logps/rejected": -97.39816284179688,
|
|
"loss": 1.2025,
|
|
"margin_dpo/margin_mean": 0.19924335181713104,
|
|
"margin_dpo/margin_std": 0.4342193007469177,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.0801209372637944,
|
|
"fcm_dpo/beta": 2.6138997077941895,
|
|
"fcm_dpo/delta": 0.2746652364730835,
|
|
"fcm_dpo/margin": 0.12827840447425842,
|
|
"fcm_dpo/q_t": 0.43137750029563904,
|
|
"grad_norm": 688.0023193359375,
|
|
"learning_rate": 3.880597014925373e-07,
|
|
"logits/chosen": 0.07982613146305084,
|
|
"logits/rejected": 0.06915253400802612,
|
|
"logps/chosen": -57.14837646484375,
|
|
"logps/ref_chosen": -56.87813949584961,
|
|
"logps/ref_rejected": -60.75569152832031,
|
|
"logps/rejected": -61.15420913696289,
|
|
"loss": 1.3153,
|
|
"margin_dpo/margin_mean": 0.12827906012535095,
|
|
"margin_dpo/margin_std": 0.39129942655563354,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.08163265306122448,
|
|
"fcm_dpo/beta": 2.705203056335449,
|
|
"fcm_dpo/delta": 0.18513087928295135,
|
|
"fcm_dpo/margin": 0.15532580018043518,
|
|
"fcm_dpo/q_t": 0.41879549622535706,
|
|
"grad_norm": 747.0259399414062,
|
|
"learning_rate": 3.9552238805970144e-07,
|
|
"logits/chosen": 0.059670425951480865,
|
|
"logits/rejected": 0.04430554807186127,
|
|
"logps/chosen": -47.553924560546875,
|
|
"logps/ref_chosen": -47.26692199707031,
|
|
"logps/ref_rejected": -62.19426727294922,
|
|
"logps/rejected": -62.63658905029297,
|
|
"loss": 1.3013,
|
|
"margin_dpo/margin_mean": 0.15532565116882324,
|
|
"margin_dpo/margin_std": 0.4225253760814667,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 2.7942681312561035,
|
|
"fcm_dpo/delta": -0.0019312426447868347,
|
|
"fcm_dpo/margin": 0.21504637598991394,
|
|
"fcm_dpo/q_t": 0.39572954177856445,
|
|
"grad_norm": 801.02685546875,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 0.005894917994737625,
|
|
"logits/rejected": -0.06754656881093979,
|
|
"logps/chosen": -50.632896423339844,
|
|
"logps/ref_chosen": -50.32619094848633,
|
|
"logps/ref_rejected": -92.44389343261719,
|
|
"logps/rejected": -92.96563720703125,
|
|
"loss": 1.2136,
|
|
"margin_dpo/margin_mean": 0.2150469720363617,
|
|
"margin_dpo/margin_std": 0.46592026948928833,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08465608465608465,
|
|
"fcm_dpo/beta": 2.8418025970458984,
|
|
"fcm_dpo/delta": 0.15264582633972168,
|
|
"fcm_dpo/margin": 0.1597883105278015,
|
|
"fcm_dpo/q_t": 0.4092499613761902,
|
|
"grad_norm": 765.663818359375,
|
|
"learning_rate": 4.1044776119402984e-07,
|
|
"logits/chosen": 0.12413370609283447,
|
|
"logits/rejected": 0.10176189243793488,
|
|
"logps/chosen": -57.053218841552734,
|
|
"logps/ref_chosen": -56.766971588134766,
|
|
"logps/ref_rejected": -66.30504608154297,
|
|
"logps/rejected": -66.7510757446289,
|
|
"loss": 1.2304,
|
|
"margin_dpo/margin_mean": 0.15978825092315674,
|
|
"margin_dpo/margin_std": 0.38756871223449707,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08616780045351474,
|
|
"fcm_dpo/beta": 2.760141134262085,
|
|
"fcm_dpo/delta": -0.1479550153017044,
|
|
"fcm_dpo/margin": 0.265031099319458,
|
|
"fcm_dpo/q_t": 0.367247074842453,
|
|
"grad_norm": 623.2364501953125,
|
|
"learning_rate": 4.17910447761194e-07,
|
|
"logits/chosen": 0.09955069422721863,
|
|
"logits/rejected": 0.03469717875123024,
|
|
"logps/chosen": -58.13420104980469,
|
|
"logps/ref_chosen": -57.76774597167969,
|
|
"logps/ref_rejected": -82.75698852539062,
|
|
"logps/rejected": -83.38848114013672,
|
|
"loss": 1.1,
|
|
"margin_dpo/margin_mean": 0.26503095030784607,
|
|
"margin_dpo/margin_std": 0.4654003381729126,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08767951625094482,
|
|
"fcm_dpo/beta": 2.6086440086364746,
|
|
"fcm_dpo/delta": -0.313137412071228,
|
|
"fcm_dpo/margin": 0.21258825063705444,
|
|
"fcm_dpo/q_t": 0.42052599787712097,
|
|
"grad_norm": 794.4469604492188,
|
|
"learning_rate": 4.253731343283582e-07,
|
|
"logits/chosen": 0.06267602741718292,
|
|
"logits/rejected": 0.04661073535680771,
|
|
"logps/chosen": -73.14319610595703,
|
|
"logps/ref_chosen": -72.76408386230469,
|
|
"logps/ref_rejected": -84.49275207519531,
|
|
"logps/rejected": -85.08444213867188,
|
|
"loss": 1.3993,
|
|
"margin_dpo/margin_mean": 0.21258807182312012,
|
|
"margin_dpo/margin_std": 0.5894132852554321,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08919123204837491,
|
|
"fcm_dpo/beta": 2.5581846237182617,
|
|
"fcm_dpo/delta": -0.19727279245853424,
|
|
"fcm_dpo/margin": 0.1872905194759369,
|
|
"fcm_dpo/q_t": 0.424525648355484,
|
|
"grad_norm": 646.26611328125,
|
|
"learning_rate": 4.3283582089552234e-07,
|
|
"logits/chosen": 0.13470959663391113,
|
|
"logits/rejected": 0.06717785447835922,
|
|
"logps/chosen": -50.17283630371094,
|
|
"logps/ref_chosen": -49.820777893066406,
|
|
"logps/ref_rejected": -77.14368438720703,
|
|
"logps/rejected": -77.68303680419922,
|
|
"loss": 1.3031,
|
|
"margin_dpo/margin_mean": 0.187290757894516,
|
|
"margin_dpo/margin_std": 0.4808598756790161,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 2.6267943382263184,
|
|
"fcm_dpo/delta": 0.35662251710891724,
|
|
"fcm_dpo/margin": 0.09666135907173157,
|
|
"fcm_dpo/q_t": 0.44885069131851196,
|
|
"grad_norm": 917.6198120117188,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 0.10107119381427765,
|
|
"logits/rejected": 0.09958083927631378,
|
|
"logps/chosen": -63.605628967285156,
|
|
"logps/ref_chosen": -63.22477340698242,
|
|
"logps/ref_rejected": -61.360477447509766,
|
|
"logps/rejected": -61.83799743652344,
|
|
"loss": 1.4647,
|
|
"margin_dpo/margin_mean": 0.09666106104850769,
|
|
"margin_dpo/margin_std": 0.4546535611152649,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.09221466364323508,
|
|
"fcm_dpo/beta": 2.7735347747802734,
|
|
"fcm_dpo/delta": 0.24584190547466278,
|
|
"fcm_dpo/margin": 0.1311599463224411,
|
|
"fcm_dpo/q_t": 0.43129193782806396,
|
|
"grad_norm": 845.3485107421875,
|
|
"learning_rate": 4.4776119402985074e-07,
|
|
"logits/chosen": 0.12099134922027588,
|
|
"logits/rejected": 0.08859476447105408,
|
|
"logps/chosen": -49.42567443847656,
|
|
"logps/ref_chosen": -49.01679992675781,
|
|
"logps/ref_rejected": -74.90817260742188,
|
|
"logps/rejected": -75.44821166992188,
|
|
"loss": 1.4356,
|
|
"margin_dpo/margin_mean": 0.13115930557250977,
|
|
"margin_dpo/margin_std": 0.48122483491897583,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09372637944066516,
|
|
"fcm_dpo/beta": 2.867135524749756,
|
|
"fcm_dpo/delta": -0.01591368019580841,
|
|
"fcm_dpo/margin": 0.21290084719657898,
|
|
"fcm_dpo/q_t": 0.3772019147872925,
|
|
"grad_norm": 801.1388549804688,
|
|
"learning_rate": 4.552238805970149e-07,
|
|
"logits/chosen": 0.11106555908918381,
|
|
"logits/rejected": 0.07138316333293915,
|
|
"logps/chosen": -63.18655014038086,
|
|
"logps/ref_chosen": -62.751869201660156,
|
|
"logps/ref_rejected": -78.93360900878906,
|
|
"logps/rejected": -79.58119201660156,
|
|
"loss": 1.3206,
|
|
"margin_dpo/margin_mean": 0.21290069818496704,
|
|
"margin_dpo/margin_std": 0.5628150701522827,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"fcm_dpo/beta": 2.718989372253418,
|
|
"fcm_dpo/delta": -0.3079483211040497,
|
|
"fcm_dpo/margin": 0.3243086040019989,
|
|
"fcm_dpo/q_t": 0.3317871689796448,
|
|
"grad_norm": 665.6367797851562,
|
|
"learning_rate": 4.626865671641791e-07,
|
|
"logits/chosen": 0.16446399688720703,
|
|
"logits/rejected": 0.13985374569892883,
|
|
"logps/chosen": -60.89350509643555,
|
|
"logps/ref_chosen": -60.51525115966797,
|
|
"logps/ref_rejected": -85.11021423339844,
|
|
"logps/rejected": -85.81277465820312,
|
|
"loss": 0.9762,
|
|
"margin_dpo/margin_mean": 0.32430848479270935,
|
|
"margin_dpo/margin_std": 0.4499557614326477,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09674981103552532,
|
|
"fcm_dpo/beta": 2.7576589584350586,
|
|
"fcm_dpo/delta": 0.3326021730899811,
|
|
"fcm_dpo/margin": 0.10074976086616516,
|
|
"fcm_dpo/q_t": 0.4503590166568756,
|
|
"grad_norm": 842.7352294921875,
|
|
"learning_rate": 4.701492537313433e-07,
|
|
"logits/chosen": 0.07350215315818787,
|
|
"logits/rejected": 0.04859868437051773,
|
|
"logps/chosen": -51.69322204589844,
|
|
"logps/ref_chosen": -51.20684814453125,
|
|
"logps/ref_rejected": -66.93081665039062,
|
|
"logps/rejected": -67.5179443359375,
|
|
"loss": 1.4735,
|
|
"margin_dpo/margin_mean": 0.10075005888938904,
|
|
"margin_dpo/margin_std": 0.4669637978076935,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 2.659918785095215,
|
|
"fcm_dpo/delta": -0.42128875851631165,
|
|
"fcm_dpo/margin": 0.36822575330734253,
|
|
"fcm_dpo/q_t": 0.32521775364875793,
|
|
"grad_norm": 680.5181884765625,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 0.16169877350330353,
|
|
"logits/rejected": 0.1325380802154541,
|
|
"logps/chosen": -67.74839782714844,
|
|
"logps/ref_chosen": -67.2886962890625,
|
|
"logps/ref_rejected": -74.44281005859375,
|
|
"logps/rejected": -75.27073669433594,
|
|
"loss": 1.0552,
|
|
"margin_dpo/margin_mean": 0.3682248294353485,
|
|
"margin_dpo/margin_std": 0.5791603326797485,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09977324263038549,
|
|
"fcm_dpo/beta": 2.555934429168701,
|
|
"fcm_dpo/delta": -0.0033800601959228516,
|
|
"fcm_dpo/margin": 0.23333951830863953,
|
|
"fcm_dpo/q_t": 0.3993811309337616,
|
|
"grad_norm": 727.94482421875,
|
|
"learning_rate": 4.850746268656717e-07,
|
|
"logits/chosen": 0.09812623262405396,
|
|
"logits/rejected": 0.07371249049901962,
|
|
"logps/chosen": -71.21185302734375,
|
|
"logps/ref_chosen": -70.743408203125,
|
|
"logps/ref_rejected": -77.26499938964844,
|
|
"logps/rejected": -77.96678161621094,
|
|
"loss": 1.2259,
|
|
"margin_dpo/margin_mean": 0.23333919048309326,
|
|
"margin_dpo/margin_std": 0.53450608253479,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.10128495842781557,
|
|
"fcm_dpo/beta": 2.613276958465576,
|
|
"fcm_dpo/delta": 0.020865630358457565,
|
|
"fcm_dpo/margin": 0.22206446528434753,
|
|
"fcm_dpo/q_t": 0.41789835691452026,
|
|
"grad_norm": 757.7228393554688,
|
|
"learning_rate": 4.925373134328357e-07,
|
|
"logits/chosen": 0.08542338758707047,
|
|
"logits/rejected": 0.028661586344242096,
|
|
"logps/chosen": -60.98101043701172,
|
|
"logps/ref_chosen": -60.60260009765625,
|
|
"logps/ref_rejected": -75.22235870361328,
|
|
"logps/rejected": -75.82283020019531,
|
|
"loss": 1.2948,
|
|
"margin_dpo/margin_mean": 0.22206458449363708,
|
|
"margin_dpo/margin_std": 0.5843450427055359,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.10279667422524566,
|
|
"fcm_dpo/beta": 2.542264461517334,
|
|
"fcm_dpo/delta": -0.09804828464984894,
|
|
"fcm_dpo/margin": 0.2701076567173004,
|
|
"fcm_dpo/q_t": 0.3731071352958679,
|
|
"grad_norm": 731.7794189453125,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 0.023803437128663063,
|
|
"logits/rejected": -0.00498470664024353,
|
|
"logps/chosen": -78.01327514648438,
|
|
"logps/ref_chosen": -77.52836608886719,
|
|
"logps/ref_rejected": -93.17778015136719,
|
|
"logps/rejected": -93.93280029296875,
|
|
"loss": 1.2249,
|
|
"margin_dpo/margin_mean": 0.27010834217071533,
|
|
"margin_dpo/margin_std": 0.5684667825698853,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.10430839002267574,
|
|
"fcm_dpo/beta": 2.6366710662841797,
|
|
"fcm_dpo/delta": 0.053127557039260864,
|
|
"fcm_dpo/margin": 0.20509648323059082,
|
|
"fcm_dpo/q_t": 0.3979250490665436,
|
|
"grad_norm": 705.78271484375,
|
|
"learning_rate": 4.999965034812934e-07,
|
|
"logits/chosen": 0.09332907199859619,
|
|
"logits/rejected": 0.04950705170631409,
|
|
"logps/chosen": -66.47467041015625,
|
|
"logps/ref_chosen": -65.94305419921875,
|
|
"logps/ref_rejected": -89.7735595703125,
|
|
"logps/rejected": -90.51026916503906,
|
|
"loss": 1.3071,
|
|
"margin_dpo/margin_mean": 0.20509576797485352,
|
|
"margin_dpo/margin_std": 0.48671072721481323,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 2.5765388011932373,
|
|
"fcm_dpo/delta": 0.06844906508922577,
|
|
"fcm_dpo/margin": 0.2062298059463501,
|
|
"fcm_dpo/q_t": 0.40513622760772705,
|
|
"grad_norm": 689.6810302734375,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 0.11499057710170746,
|
|
"logits/rejected": 0.09173914790153503,
|
|
"logps/chosen": -62.437904357910156,
|
|
"logps/ref_chosen": -61.95791244506836,
|
|
"logps/ref_rejected": -75.80945587158203,
|
|
"logps/rejected": -76.49568176269531,
|
|
"loss": 1.3418,
|
|
"margin_dpo/margin_mean": 0.2062298059463501,
|
|
"margin_dpo/margin_std": 0.5412454605102539,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1073318216175359,
|
|
"fcm_dpo/beta": 2.740046977996826,
|
|
"fcm_dpo/delta": 0.2646937966346741,
|
|
"fcm_dpo/margin": 0.12536178529262543,
|
|
"fcm_dpo/q_t": 0.4393612742424011,
|
|
"grad_norm": 829.3287963867188,
|
|
"learning_rate": 4.999685319184688e-07,
|
|
"logits/chosen": 0.04790864139795303,
|
|
"logits/rejected": 0.032932039350271225,
|
|
"logps/chosen": -63.92980194091797,
|
|
"logps/ref_chosen": -63.34757995605469,
|
|
"logps/ref_rejected": -67.49658203125,
|
|
"logps/rejected": -68.20416259765625,
|
|
"loss": 1.4527,
|
|
"margin_dpo/margin_mean": 0.1253620833158493,
|
|
"margin_dpo/margin_std": 0.501061737537384,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10884353741496598,
|
|
"fcm_dpo/beta": 2.6920130252838135,
|
|
"fcm_dpo/delta": -0.16975723206996918,
|
|
"fcm_dpo/margin": 0.2809232175350189,
|
|
"fcm_dpo/q_t": 0.3771663010120392,
|
|
"grad_norm": 761.12744140625,
|
|
"learning_rate": 4.999440576567755e-07,
|
|
"logits/chosen": 0.1025976687669754,
|
|
"logits/rejected": 0.038601793348789215,
|
|
"logps/chosen": -56.350685119628906,
|
|
"logps/ref_chosen": -55.85929870605469,
|
|
"logps/ref_rejected": -68.45423889160156,
|
|
"logps/rejected": -69.22655487060547,
|
|
"loss": 1.228,
|
|
"margin_dpo/margin_mean": 0.2809233069419861,
|
|
"margin_dpo/margin_std": 0.5827616453170776,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.11035525321239607,
|
|
"fcm_dpo/beta": 2.8283796310424805,
|
|
"fcm_dpo/delta": 0.3381425738334656,
|
|
"fcm_dpo/margin": 0.0951181948184967,
|
|
"fcm_dpo/q_t": 0.45442691445350647,
|
|
"grad_norm": 973.8218994140625,
|
|
"learning_rate": 4.999125919224965e-07,
|
|
"logits/chosen": 0.06888525933027267,
|
|
"logits/rejected": 0.05457981303334236,
|
|
"logps/chosen": -69.80500793457031,
|
|
"logps/ref_chosen": -69.13880920410156,
|
|
"logps/ref_rejected": -79.04586791992188,
|
|
"logps/rejected": -79.80718231201172,
|
|
"loss": 1.6541,
|
|
"margin_dpo/margin_mean": 0.09511902928352356,
|
|
"margin_dpo/margin_std": 0.5735561847686768,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.11186696900982615,
|
|
"fcm_dpo/beta": 2.7798233032226562,
|
|
"fcm_dpo/delta": -0.34147214889526367,
|
|
"fcm_dpo/margin": 0.32702162861824036,
|
|
"fcm_dpo/q_t": 0.35246068239212036,
|
|
"grad_norm": 655.92626953125,
|
|
"learning_rate": 4.998741355957963e-07,
|
|
"logits/chosen": 0.11409755051136017,
|
|
"logits/rejected": 0.061128612607717514,
|
|
"logps/chosen": -50.37104034423828,
|
|
"logps/ref_chosen": -49.923736572265625,
|
|
"logps/ref_rejected": -81.73213958740234,
|
|
"logps/rejected": -82.5064697265625,
|
|
"loss": 1.135,
|
|
"margin_dpo/margin_mean": 0.32702159881591797,
|
|
"margin_dpo/margin_std": 0.5811691284179688,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 2.5214171409606934,
|
|
"fcm_dpo/delta": -0.28205248713493347,
|
|
"fcm_dpo/margin": 0.33626946806907654,
|
|
"fcm_dpo/q_t": 0.35245347023010254,
|
|
"grad_norm": 540.9854736328125,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 0.0784030556678772,
|
|
"logits/rejected": 0.04736529663205147,
|
|
"logps/chosen": -46.60426330566406,
|
|
"logps/ref_chosen": -46.06875228881836,
|
|
"logps/ref_rejected": -66.1181411743164,
|
|
"logps/rejected": -66.98992156982422,
|
|
"loss": 1.1406,
|
|
"margin_dpo/margin_mean": 0.33626896142959595,
|
|
"margin_dpo/margin_std": 0.5945340991020203,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11489040060468632,
|
|
"fcm_dpo/beta": 2.5335192680358887,
|
|
"fcm_dpo/delta": 0.030991412699222565,
|
|
"fcm_dpo/margin": 0.22527188062667847,
|
|
"fcm_dpo/q_t": 0.4082220196723938,
|
|
"grad_norm": 678.3681030273438,
|
|
"learning_rate": 4.997762556634679e-07,
|
|
"logits/chosen": 0.06428511440753937,
|
|
"logits/rejected": 0.023087866604328156,
|
|
"logps/chosen": -54.62993240356445,
|
|
"logps/ref_chosen": -54.06275177001953,
|
|
"logps/ref_rejected": -74.87464141845703,
|
|
"logps/rejected": -75.66709899902344,
|
|
"loss": 1.3191,
|
|
"margin_dpo/margin_mean": 0.22527173161506653,
|
|
"margin_dpo/margin_std": 0.5910253524780273,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1164021164021164,
|
|
"fcm_dpo/beta": 2.543339490890503,
|
|
"fcm_dpo/delta": 0.14452725648880005,
|
|
"fcm_dpo/margin": 0.17910584807395935,
|
|
"fcm_dpo/q_t": 0.41442954540252686,
|
|
"grad_norm": 735.1400146484375,
|
|
"learning_rate": 4.99716834795752e-07,
|
|
"logits/chosen": 0.09905283898115158,
|
|
"logits/rejected": 0.059680838137865067,
|
|
"logps/chosen": -53.726112365722656,
|
|
"logps/ref_chosen": -53.07609176635742,
|
|
"logps/ref_rejected": -74.45601654052734,
|
|
"logps/rejected": -75.28514099121094,
|
|
"loss": 1.4008,
|
|
"margin_dpo/margin_mean": 0.1791057288646698,
|
|
"margin_dpo/margin_std": 0.5253371000289917,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.11791383219954649,
|
|
"fcm_dpo/beta": 2.617399215698242,
|
|
"fcm_dpo/delta": 0.07255343347787857,
|
|
"fcm_dpo/margin": 0.20233109593391418,
|
|
"fcm_dpo/q_t": 0.4078395366668701,
|
|
"grad_norm": 876.5040893554688,
|
|
"learning_rate": 4.996504288113623e-07,
|
|
"logits/chosen": 0.07440010458230972,
|
|
"logits/rejected": 0.05441536009311676,
|
|
"logps/chosen": -68.28507995605469,
|
|
"logps/ref_chosen": -67.72541809082031,
|
|
"logps/ref_rejected": -79.03926849365234,
|
|
"logps/rejected": -79.80126953125,
|
|
"loss": 1.4932,
|
|
"margin_dpo/margin_mean": 0.20233124494552612,
|
|
"margin_dpo/margin_std": 0.6498842239379883,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11942554799697656,
|
|
"fcm_dpo/beta": 2.5738704204559326,
|
|
"fcm_dpo/delta": -0.23911376297473907,
|
|
"fcm_dpo/margin": 0.3186035454273224,
|
|
"fcm_dpo/q_t": 0.3460078537464142,
|
|
"grad_norm": 575.8170776367188,
|
|
"learning_rate": 4.995770395678171e-07,
|
|
"logits/chosen": 0.1345619410276413,
|
|
"logits/rejected": 0.07574545592069626,
|
|
"logps/chosen": -52.80107879638672,
|
|
"logps/ref_chosen": -52.16064453125,
|
|
"logps/ref_rejected": -83.31062316894531,
|
|
"logps/rejected": -84.2696533203125,
|
|
"loss": 1.0213,
|
|
"margin_dpo/margin_mean": 0.3186036944389343,
|
|
"margin_dpo/margin_std": 0.49217498302459717,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 2.5109777450561523,
|
|
"fcm_dpo/delta": -0.09966981410980225,
|
|
"fcm_dpo/margin": 0.2758336067199707,
|
|
"fcm_dpo/q_t": 0.3807419538497925,
|
|
"grad_norm": 784.2391967773438,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 0.09654100239276886,
|
|
"logits/rejected": 0.03951960429549217,
|
|
"logps/chosen": -62.05357360839844,
|
|
"logps/ref_chosen": -61.410560607910156,
|
|
"logps/ref_rejected": -78.66004943847656,
|
|
"logps/rejected": -79.57888793945312,
|
|
"loss": 1.2567,
|
|
"margin_dpo/margin_mean": 0.2758333683013916,
|
|
"margin_dpo/margin_std": 0.6135913133621216,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12244897959183673,
|
|
"fcm_dpo/beta": 2.3567330837249756,
|
|
"fcm_dpo/delta": -0.29948320984840393,
|
|
"fcm_dpo/margin": 0.36920058727264404,
|
|
"fcm_dpo/q_t": 0.35193294286727905,
|
|
"grad_norm": 582.5974731445312,
|
|
"learning_rate": 4.994093197099587e-07,
|
|
"logits/chosen": 0.08831678330898285,
|
|
"logits/rejected": 0.05518771708011627,
|
|
"logps/chosen": -64.47744750976562,
|
|
"logps/ref_chosen": -63.80437088012695,
|
|
"logps/ref_rejected": -79.3484115600586,
|
|
"logps/rejected": -80.39068603515625,
|
|
"loss": 1.0674,
|
|
"margin_dpo/margin_mean": 0.36920061707496643,
|
|
"margin_dpo/margin_std": 0.6044883728027344,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12396069538926682,
|
|
"fcm_dpo/beta": 2.2348222732543945,
|
|
"fcm_dpo/delta": -0.3301578760147095,
|
|
"fcm_dpo/margin": 0.4032464921474457,
|
|
"fcm_dpo/q_t": 0.31583303213119507,
|
|
"grad_norm": 494.9556884765625,
|
|
"learning_rate": 4.993149937871306e-07,
|
|
"logits/chosen": 0.07512053102254868,
|
|
"logits/rejected": 0.012894164770841599,
|
|
"logps/chosen": -49.45317840576172,
|
|
"logps/ref_chosen": -48.817893981933594,
|
|
"logps/ref_rejected": -70.31497955322266,
|
|
"logps/rejected": -71.353515625,
|
|
"loss": 0.9006,
|
|
"margin_dpo/margin_mean": 0.4032464325428009,
|
|
"margin_dpo/margin_std": 0.4665597677230835,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.1254724111866969,
|
|
"fcm_dpo/beta": 2.105210781097412,
|
|
"fcm_dpo/delta": -0.2685595750808716,
|
|
"fcm_dpo/margin": 0.401851087808609,
|
|
"fcm_dpo/q_t": 0.3413703143596649,
|
|
"grad_norm": 505.2091979980469,
|
|
"learning_rate": 4.992136939879856e-07,
|
|
"logits/chosen": 0.13643789291381836,
|
|
"logits/rejected": 0.08787815272808075,
|
|
"logps/chosen": -57.84598159790039,
|
|
"logps/ref_chosen": -57.15077209472656,
|
|
"logps/ref_rejected": -75.1710205078125,
|
|
"logps/rejected": -76.26808166503906,
|
|
"loss": 1.0579,
|
|
"margin_dpo/margin_mean": 0.40185046195983887,
|
|
"margin_dpo/margin_std": 0.6407204866409302,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"fcm_dpo/beta": 2.1374802589416504,
|
|
"fcm_dpo/delta": 0.2308960258960724,
|
|
"fcm_dpo/margin": 0.17705368995666504,
|
|
"fcm_dpo/q_t": 0.4283042848110199,
|
|
"grad_norm": 767.9581909179688,
|
|
"learning_rate": 4.991054231460969e-07,
|
|
"logits/chosen": 0.12467605620622635,
|
|
"logits/rejected": 0.08387472480535507,
|
|
"logps/chosen": -65.62751007080078,
|
|
"logps/ref_chosen": -64.77729797363281,
|
|
"logps/ref_rejected": -84.71949768066406,
|
|
"logps/rejected": -85.74675750732422,
|
|
"loss": 1.3964,
|
|
"margin_dpo/margin_mean": 0.17705348134040833,
|
|
"margin_dpo/margin_std": 0.594845175743103,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 2.0624887943267822,
|
|
"fcm_dpo/delta": -0.3910365104675293,
|
|
"fcm_dpo/margin": 0.4630565047264099,
|
|
"fcm_dpo/q_t": 0.33032843470573425,
|
|
"grad_norm": 518.486083984375,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 0.11759524047374725,
|
|
"logits/rejected": 0.07492982596158981,
|
|
"logps/chosen": -50.919776916503906,
|
|
"logps/ref_chosen": -50.25169372558594,
|
|
"logps/ref_rejected": -66.55439758300781,
|
|
"logps/rejected": -67.68553161621094,
|
|
"loss": 1.0213,
|
|
"margin_dpo/margin_mean": 0.46305617690086365,
|
|
"margin_dpo/margin_std": 0.6838178038597107,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.13000755857898716,
|
|
"fcm_dpo/beta": 1.9690525531768799,
|
|
"fcm_dpo/delta": -0.09890329092741013,
|
|
"fcm_dpo/margin": 0.3509269654750824,
|
|
"fcm_dpo/q_t": 0.37694597244262695,
|
|
"grad_norm": 459.40576171875,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 0.14702852070331573,
|
|
"logits/rejected": 0.12872420251369476,
|
|
"logps/chosen": -61.52762985229492,
|
|
"logps/ref_chosen": -60.72917938232422,
|
|
"logps/ref_rejected": -72.30961608886719,
|
|
"logps/rejected": -73.45899200439453,
|
|
"loss": 1.1121,
|
|
"margin_dpo/margin_mean": 0.35092705488204956,
|
|
"margin_dpo/margin_std": 0.6370328664779663,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.13151927437641722,
|
|
"fcm_dpo/beta": 1.907198190689087,
|
|
"fcm_dpo/delta": -0.10468879342079163,
|
|
"fcm_dpo/margin": 0.3619868755340576,
|
|
"fcm_dpo/q_t": 0.3715543746948242,
|
|
"grad_norm": 590.1043701171875,
|
|
"learning_rate": 4.987388156241114e-07,
|
|
"logits/chosen": 0.0830545723438263,
|
|
"logits/rejected": 0.02916475385427475,
|
|
"logps/chosen": -66.58967590332031,
|
|
"logps/ref_chosen": -65.75796508789062,
|
|
"logps/ref_rejected": -84.81159973144531,
|
|
"logps/rejected": -86.00529479980469,
|
|
"loss": 1.2636,
|
|
"margin_dpo/margin_mean": 0.3619861900806427,
|
|
"margin_dpo/margin_std": 0.7822234630584717,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.1330309901738473,
|
|
"fcm_dpo/beta": 1.892395257949829,
|
|
"fcm_dpo/delta": -0.09083393216133118,
|
|
"fcm_dpo/margin": 0.361200749874115,
|
|
"fcm_dpo/q_t": 0.37534597516059875,
|
|
"grad_norm": 558.025146484375,
|
|
"learning_rate": 4.986026928455767e-07,
|
|
"logits/chosen": 0.15574653446674347,
|
|
"logits/rejected": 0.13011637330055237,
|
|
"logps/chosen": -63.60407257080078,
|
|
"logps/ref_chosen": -62.82402801513672,
|
|
"logps/ref_rejected": -74.9607162475586,
|
|
"logps/rejected": -76.10195922851562,
|
|
"loss": 1.2043,
|
|
"margin_dpo/margin_mean": 0.36120113730430603,
|
|
"margin_dpo/margin_std": 0.7272647023200989,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.1345427059712774,
|
|
"fcm_dpo/beta": 1.8832402229309082,
|
|
"fcm_dpo/delta": -0.11566749215126038,
|
|
"fcm_dpo/margin": 0.3750349283218384,
|
|
"fcm_dpo/q_t": 0.3602682650089264,
|
|
"grad_norm": 456.3847351074219,
|
|
"learning_rate": 4.984596161153135e-07,
|
|
"logits/chosen": 0.20242905616760254,
|
|
"logits/rejected": 0.12148602306842804,
|
|
"logps/chosen": -41.91245651245117,
|
|
"logps/ref_chosen": -41.191436767578125,
|
|
"logps/ref_rejected": -85.44769287109375,
|
|
"logps/rejected": -86.54374694824219,
|
|
"loss": 1.1854,
|
|
"margin_dpo/margin_mean": 0.37503573298454285,
|
|
"margin_dpo/margin_std": 0.7346115708351135,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 1.8486794233322144,
|
|
"fcm_dpo/delta": 0.013222428038716316,
|
|
"fcm_dpo/margin": 0.31774118542671204,
|
|
"fcm_dpo/q_t": 0.39341413974761963,
|
|
"grad_norm": 499.37640380859375,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 0.07887136936187744,
|
|
"logits/rejected": 0.027213769033551216,
|
|
"logps/chosen": -57.39919662475586,
|
|
"logps/ref_chosen": -56.58390808105469,
|
|
"logps/ref_rejected": -86.86978149414062,
|
|
"logps/rejected": -88.00281524658203,
|
|
"loss": 1.2172,
|
|
"margin_dpo/margin_mean": 0.317741334438324,
|
|
"margin_dpo/margin_std": 0.7089375853538513,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13756613756613756,
|
|
"fcm_dpo/beta": 1.7621798515319824,
|
|
"fcm_dpo/delta": -0.3605840802192688,
|
|
"fcm_dpo/margin": 0.5263523459434509,
|
|
"fcm_dpo/q_t": 0.3355669379234314,
|
|
"grad_norm": 379.0741271972656,
|
|
"learning_rate": 4.98152617002662e-07,
|
|
"logits/chosen": 0.0857606828212738,
|
|
"logits/rejected": 0.044405438005924225,
|
|
"logps/chosen": -53.12702560424805,
|
|
"logps/ref_chosen": -52.38234329223633,
|
|
"logps/ref_rejected": -72.17642211914062,
|
|
"logps/rejected": -73.44746398925781,
|
|
"loss": 0.9948,
|
|
"margin_dpo/margin_mean": 0.5263521671295166,
|
|
"margin_dpo/margin_std": 0.7791818380355835,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13907785336356765,
|
|
"fcm_dpo/beta": 1.7615249156951904,
|
|
"fcm_dpo/delta": -0.010726943612098694,
|
|
"fcm_dpo/margin": 0.3414357304573059,
|
|
"fcm_dpo/q_t": 0.38870492577552795,
|
|
"grad_norm": 441.7849426269531,
|
|
"learning_rate": 4.979887032076988e-07,
|
|
"logits/chosen": 0.16990810632705688,
|
|
"logits/rejected": 0.1302517056465149,
|
|
"logps/chosen": -53.923187255859375,
|
|
"logps/ref_chosen": -53.00870132446289,
|
|
"logps/ref_rejected": -79.77812957763672,
|
|
"logps/rejected": -81.0340576171875,
|
|
"loss": 1.2497,
|
|
"margin_dpo/margin_mean": 0.34143590927124023,
|
|
"margin_dpo/margin_std": 0.7262367010116577,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.14058956916099774,
|
|
"fcm_dpo/beta": 1.7267677783966064,
|
|
"fcm_dpo/delta": -0.014829907566308975,
|
|
"fcm_dpo/margin": 0.35534825921058655,
|
|
"fcm_dpo/q_t": 0.38280731439590454,
|
|
"grad_norm": 380.222412109375,
|
|
"learning_rate": 4.978178526356172e-07,
|
|
"logits/chosen": 0.13078534603118896,
|
|
"logits/rejected": 0.10297398269176483,
|
|
"logps/chosen": -45.80891799926758,
|
|
"logps/ref_chosen": -44.90705108642578,
|
|
"logps/ref_rejected": -58.7879524230957,
|
|
"logps/rejected": -60.045162200927734,
|
|
"loss": 1.1901,
|
|
"margin_dpo/margin_mean": 0.35534799098968506,
|
|
"margin_dpo/margin_std": 0.7640775442123413,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.1421012849584278,
|
|
"fcm_dpo/beta": 1.669801115989685,
|
|
"fcm_dpo/delta": -0.09243573993444443,
|
|
"fcm_dpo/margin": 0.40765514969825745,
|
|
"fcm_dpo/q_t": 0.37269920110702515,
|
|
"grad_norm": 421.4466552734375,
|
|
"learning_rate": 4.976400700654751e-07,
|
|
"logits/chosen": 0.1810636818408966,
|
|
"logits/rejected": 0.1434386819601059,
|
|
"logps/chosen": -60.6273193359375,
|
|
"logps/ref_chosen": -59.93777084350586,
|
|
"logps/ref_rejected": -79.3138427734375,
|
|
"logps/rejected": -80.41104125976562,
|
|
"loss": 1.217,
|
|
"margin_dpo/margin_mean": 0.4076548218727112,
|
|
"margin_dpo/margin_std": 0.9024351835250854,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 1.5891482830047607,
|
|
"fcm_dpo/delta": -0.3270946741104126,
|
|
"fcm_dpo/margin": 0.5614709854125977,
|
|
"fcm_dpo/q_t": 0.32878196239471436,
|
|
"grad_norm": 361.7147216796875,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 0.07957080006599426,
|
|
"logits/rejected": 0.01844627410173416,
|
|
"logps/chosen": -61.05595397949219,
|
|
"logps/ref_chosen": -60.168487548828125,
|
|
"logps/ref_rejected": -90.73665618896484,
|
|
"logps/rejected": -92.18559265136719,
|
|
"loss": 0.9685,
|
|
"margin_dpo/margin_mean": 0.5614703297615051,
|
|
"margin_dpo/margin_std": 0.7650089859962463,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14512471655328799,
|
|
"fcm_dpo/beta": 1.5399678945541382,
|
|
"fcm_dpo/delta": -0.2155621349811554,
|
|
"fcm_dpo/margin": 0.5187313556671143,
|
|
"fcm_dpo/q_t": 0.3439177870750427,
|
|
"grad_norm": 334.061279296875,
|
|
"learning_rate": 4.972637290166157e-07,
|
|
"logits/chosen": 0.11804474890232086,
|
|
"logits/rejected": 0.0765266865491867,
|
|
"logps/chosen": -61.5324821472168,
|
|
"logps/ref_chosen": -60.66877746582031,
|
|
"logps/ref_rejected": -88.30673217773438,
|
|
"logps/rejected": -89.68916320800781,
|
|
"loss": 1.0112,
|
|
"margin_dpo/margin_mean": 0.518730878829956,
|
|
"margin_dpo/margin_std": 0.7388438582420349,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14663643235071808,
|
|
"fcm_dpo/beta": 1.4972609281539917,
|
|
"fcm_dpo/delta": 0.018384963274002075,
|
|
"fcm_dpo/margin": 0.3870452344417572,
|
|
"fcm_dpo/q_t": 0.40638962388038635,
|
|
"grad_norm": 447.0497741699219,
|
|
"learning_rate": 4.970651810649666e-07,
|
|
"logits/chosen": 0.06303801387548447,
|
|
"logits/rejected": 0.02052931673824787,
|
|
"logps/chosen": -66.08944702148438,
|
|
"logps/ref_chosen": -65.04412078857422,
|
|
"logps/ref_rejected": -78.42092895507812,
|
|
"logps/rejected": -79.85330200195312,
|
|
"loss": 1.1872,
|
|
"margin_dpo/margin_mean": 0.38704511523246765,
|
|
"margin_dpo/margin_std": 0.8554011583328247,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14814814814814814,
|
|
"fcm_dpo/beta": 1.5304303169250488,
|
|
"fcm_dpo/delta": 0.13418416678905487,
|
|
"fcm_dpo/margin": 0.3075883686542511,
|
|
"fcm_dpo/q_t": 0.4082970917224884,
|
|
"grad_norm": 418.0280456542969,
|
|
"learning_rate": 4.968597221690985e-07,
|
|
"logits/chosen": 0.1497489959001541,
|
|
"logits/rejected": 0.12327147275209427,
|
|
"logps/chosen": -56.33129119873047,
|
|
"logps/ref_chosen": -55.503231048583984,
|
|
"logps/ref_rejected": -72.81553649902344,
|
|
"logps/rejected": -73.9511947631836,
|
|
"loss": 1.2315,
|
|
"margin_dpo/margin_mean": 0.3075885772705078,
|
|
"margin_dpo/margin_std": 0.695486307144165,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14965986394557823,
|
|
"fcm_dpo/beta": 1.5460871458053589,
|
|
"fcm_dpo/delta": -0.0771804004907608,
|
|
"fcm_dpo/margin": 0.4346781373023987,
|
|
"fcm_dpo/q_t": 0.3763027489185333,
|
|
"grad_norm": 433.2595520019531,
|
|
"learning_rate": 4.966473580761389e-07,
|
|
"logits/chosen": 0.15021108090877533,
|
|
"logits/rejected": 0.11408122628927231,
|
|
"logps/chosen": -59.42683410644531,
|
|
"logps/ref_chosen": -58.57563781738281,
|
|
"logps/ref_rejected": -78.693603515625,
|
|
"logps/rejected": -79.97947692871094,
|
|
"loss": 1.1167,
|
|
"margin_dpo/margin_mean": 0.43467745184898376,
|
|
"margin_dpo/margin_std": 0.7960972189903259,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 1.5719187259674072,
|
|
"fcm_dpo/delta": 0.07484941184520721,
|
|
"fcm_dpo/margin": 0.3342527747154236,
|
|
"fcm_dpo/q_t": 0.41647306084632874,
|
|
"grad_norm": 518.3218383789062,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 0.12237241864204407,
|
|
"logits/rejected": 0.11543639004230499,
|
|
"logps/chosen": -80.54015350341797,
|
|
"logps/ref_chosen": -79.58343505859375,
|
|
"logps/ref_rejected": -92.152587890625,
|
|
"logps/rejected": -93.44355773925781,
|
|
"loss": 1.3578,
|
|
"margin_dpo/margin_mean": 0.3342531621456146,
|
|
"margin_dpo/margin_std": 0.9492334127426147,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15268329554043839,
|
|
"fcm_dpo/beta": 1.5015451908111572,
|
|
"fcm_dpo/delta": -0.34811440110206604,
|
|
"fcm_dpo/margin": 0.610725998878479,
|
|
"fcm_dpo/q_t": 0.3262676000595093,
|
|
"grad_norm": 315.0995788574219,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": 0.14972041547298431,
|
|
"logits/rejected": 0.1133972704410553,
|
|
"logps/chosen": -53.138023376464844,
|
|
"logps/ref_chosen": -52.332786560058594,
|
|
"logps/ref_rejected": -69.55589294433594,
|
|
"logps/rejected": -70.97186279296875,
|
|
"loss": 0.9147,
|
|
"margin_dpo/margin_mean": 0.6107259392738342,
|
|
"margin_dpo/margin_std": 0.741790771484375,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.15419501133786848,
|
|
"fcm_dpo/beta": 1.4856407642364502,
|
|
"fcm_dpo/delta": 0.10125008225440979,
|
|
"fcm_dpo/margin": 0.33820411562919617,
|
|
"fcm_dpo/q_t": 0.40724170207977295,
|
|
"grad_norm": 422.3230895996094,
|
|
"learning_rate": 4.959688949822748e-07,
|
|
"logits/chosen": 0.07880719006061554,
|
|
"logits/rejected": 0.0402679443359375,
|
|
"logps/chosen": -65.69737243652344,
|
|
"logps/ref_chosen": -64.74348449707031,
|
|
"logps/ref_rejected": -69.06132507324219,
|
|
"logps/rejected": -70.35342407226562,
|
|
"loss": 1.2587,
|
|
"margin_dpo/margin_mean": 0.33820363879203796,
|
|
"margin_dpo/margin_std": 0.8350169062614441,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.15570672713529857,
|
|
"fcm_dpo/beta": 1.474139928817749,
|
|
"fcm_dpo/delta": -0.0702066570520401,
|
|
"fcm_dpo/margin": 0.4514557421207428,
|
|
"fcm_dpo/q_t": 0.38118183612823486,
|
|
"grad_norm": 412.3791198730469,
|
|
"learning_rate": 4.957289714327572e-07,
|
|
"logits/chosen": 0.13948319852352142,
|
|
"logits/rejected": 0.1087922602891922,
|
|
"logps/chosen": -64.72733306884766,
|
|
"logps/ref_chosen": -63.83664321899414,
|
|
"logps/ref_rejected": -79.32362365722656,
|
|
"logps/rejected": -80.665771484375,
|
|
"loss": 1.1328,
|
|
"margin_dpo/margin_mean": 0.45145532488822937,
|
|
"margin_dpo/margin_std": 0.8555996417999268,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.15721844293272866,
|
|
"fcm_dpo/beta": 1.4898967742919922,
|
|
"fcm_dpo/delta": 0.06615878641605377,
|
|
"fcm_dpo/margin": 0.35946568846702576,
|
|
"fcm_dpo/q_t": 0.407916396856308,
|
|
"grad_norm": 478.3046569824219,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 0.16390858590602875,
|
|
"logits/rejected": 0.08152991533279419,
|
|
"logps/chosen": -61.924827575683594,
|
|
"logps/ref_chosen": -60.99920654296875,
|
|
"logps/ref_rejected": -98.84645080566406,
|
|
"logps/rejected": -100.13154602050781,
|
|
"loss": 1.2451,
|
|
"margin_dpo/margin_mean": 0.3594653904438019,
|
|
"margin_dpo/margin_std": 0.8471232652664185,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 1.4996893405914307,
|
|
"fcm_dpo/delta": 0.011385314166545868,
|
|
"fcm_dpo/margin": 0.3916703760623932,
|
|
"fcm_dpo/q_t": 0.39221182465553284,
|
|
"grad_norm": 480.3543395996094,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 0.10116258263587952,
|
|
"logits/rejected": 0.04981735721230507,
|
|
"logps/chosen": -71.88473510742188,
|
|
"logps/ref_chosen": -70.95027160644531,
|
|
"logps/ref_rejected": -87.88340759277344,
|
|
"logps/rejected": -89.20955657958984,
|
|
"loss": 1.2319,
|
|
"margin_dpo/margin_mean": 0.39167043566703796,
|
|
"margin_dpo/margin_std": 0.8842525482177734,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.1602418745275888,
|
|
"fcm_dpo/beta": 1.4681777954101562,
|
|
"fcm_dpo/delta": -0.06567725539207458,
|
|
"fcm_dpo/margin": 0.4502679407596588,
|
|
"fcm_dpo/q_t": 0.37183088064193726,
|
|
"grad_norm": 399.4701232910156,
|
|
"learning_rate": 4.949679871846857e-07,
|
|
"logits/chosen": 0.1115492507815361,
|
|
"logits/rejected": 0.0991833508014679,
|
|
"logps/chosen": -63.30242919921875,
|
|
"logps/ref_chosen": -62.45933151245117,
|
|
"logps/ref_rejected": -67.00595092773438,
|
|
"logps/rejected": -68.29930877685547,
|
|
"loss": 1.1228,
|
|
"margin_dpo/margin_mean": 0.450268030166626,
|
|
"margin_dpo/margin_std": 0.8252195119857788,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.1617535903250189,
|
|
"fcm_dpo/beta": 1.491356372833252,
|
|
"fcm_dpo/delta": 0.19463634490966797,
|
|
"fcm_dpo/margin": 0.27668291330337524,
|
|
"fcm_dpo/q_t": 0.428573340177536,
|
|
"grad_norm": 520.7399291992188,
|
|
"learning_rate": 4.947006115536947e-07,
|
|
"logits/chosen": 0.07313170284032822,
|
|
"logits/rejected": 0.053381193429231644,
|
|
"logps/chosen": -76.79938507080078,
|
|
"logps/ref_chosen": -75.83796691894531,
|
|
"logps/ref_rejected": -87.74038696289062,
|
|
"logps/rejected": -88.97848510742188,
|
|
"loss": 1.3955,
|
|
"margin_dpo/margin_mean": 0.2766827940940857,
|
|
"margin_dpo/margin_std": 0.8988056778907776,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.16326530612244897,
|
|
"fcm_dpo/beta": 1.5026856660842896,
|
|
"fcm_dpo/delta": -0.1375723034143448,
|
|
"fcm_dpo/margin": 0.48424917459487915,
|
|
"fcm_dpo/q_t": 0.367892861366272,
|
|
"grad_norm": 377.7269287109375,
|
|
"learning_rate": 4.944263911205772e-07,
|
|
"logits/chosen": 0.09277549386024475,
|
|
"logits/rejected": 0.0655142068862915,
|
|
"logps/chosen": -69.25541687011719,
|
|
"logps/ref_chosen": -68.39323425292969,
|
|
"logps/ref_rejected": -83.24267578125,
|
|
"logps/rejected": -84.58909606933594,
|
|
"loss": 1.0992,
|
|
"margin_dpo/margin_mean": 0.4842491149902344,
|
|
"margin_dpo/margin_std": 0.8379406332969666,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16477702191987906,
|
|
"fcm_dpo/beta": 1.4416108131408691,
|
|
"fcm_dpo/delta": -0.14801928400993347,
|
|
"fcm_dpo/margin": 0.5100686550140381,
|
|
"fcm_dpo/q_t": 0.3660755157470703,
|
|
"grad_norm": 357.8731994628906,
|
|
"learning_rate": 4.941453335558681e-07,
|
|
"logits/chosen": 0.08502168953418732,
|
|
"logits/rejected": 0.03738076239824295,
|
|
"logps/chosen": -56.36671829223633,
|
|
"logps/ref_chosen": -55.52748107910156,
|
|
"logps/ref_rejected": -83.55218505859375,
|
|
"logps/rejected": -84.9014892578125,
|
|
"loss": 1.0253,
|
|
"margin_dpo/margin_mean": 0.510068416595459,
|
|
"margin_dpo/margin_std": 0.8034826517105103,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 1.4788618087768555,
|
|
"fcm_dpo/delta": 0.11002928763628006,
|
|
"fcm_dpo/margin": 0.33293917775154114,
|
|
"fcm_dpo/q_t": 0.4101300537586212,
|
|
"grad_norm": 451.916015625,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 0.07888751477003098,
|
|
"logits/rejected": 0.08660773187875748,
|
|
"logps/chosen": -82.09772491455078,
|
|
"logps/ref_chosen": -81.15874481201172,
|
|
"logps/ref_rejected": -72.56021118164062,
|
|
"logps/rejected": -73.83213806152344,
|
|
"loss": 1.246,
|
|
"margin_dpo/margin_mean": 0.3329389691352844,
|
|
"margin_dpo/margin_std": 0.8031635284423828,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16780045351473924,
|
|
"fcm_dpo/beta": 1.4763123989105225,
|
|
"fcm_dpo/delta": -0.048699431121349335,
|
|
"fcm_dpo/margin": 0.43631669878959656,
|
|
"fcm_dpo/q_t": 0.38030728697776794,
|
|
"grad_norm": 397.7984313964844,
|
|
"learning_rate": 4.935627386698418e-07,
|
|
"logits/chosen": 0.17604178190231323,
|
|
"logits/rejected": 0.1414678990840912,
|
|
"logps/chosen": -53.380836486816406,
|
|
"logps/ref_chosen": -52.358985900878906,
|
|
"logps/ref_rejected": -77.06150817871094,
|
|
"logps/rejected": -78.51966857910156,
|
|
"loss": 1.2185,
|
|
"margin_dpo/margin_mean": 0.43631690740585327,
|
|
"margin_dpo/margin_std": 0.8934418559074402,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1693121693121693,
|
|
"fcm_dpo/beta": 1.439178228378296,
|
|
"fcm_dpo/delta": -0.10804037004709244,
|
|
"fcm_dpo/margin": 0.4868055582046509,
|
|
"fcm_dpo/q_t": 0.3607805073261261,
|
|
"grad_norm": 399.50689697265625,
|
|
"learning_rate": 4.932612176449559e-07,
|
|
"logits/chosen": 0.061367664486169815,
|
|
"logits/rejected": 0.0060233473777771,
|
|
"logps/chosen": -63.8758544921875,
|
|
"logps/ref_chosen": -63.02006530761719,
|
|
"logps/ref_rejected": -111.36941528320312,
|
|
"logps/rejected": -112.71200561523438,
|
|
"loss": 1.1135,
|
|
"margin_dpo/margin_mean": 0.48680511116981506,
|
|
"margin_dpo/margin_std": 0.843925952911377,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.1708238851095994,
|
|
"fcm_dpo/beta": 1.4334089756011963,
|
|
"fcm_dpo/delta": -0.07620993256568909,
|
|
"fcm_dpo/margin": 0.4656754732131958,
|
|
"fcm_dpo/q_t": 0.3700527548789978,
|
|
"grad_norm": 418.9619445800781,
|
|
"learning_rate": 4.929528920808854e-07,
|
|
"logits/chosen": 0.07376405596733093,
|
|
"logits/rejected": 0.04032863304018974,
|
|
"logps/chosen": -56.809410095214844,
|
|
"logps/ref_chosen": -55.80766296386719,
|
|
"logps/ref_rejected": -69.84014129638672,
|
|
"logps/rejected": -71.30755615234375,
|
|
"loss": 1.2208,
|
|
"margin_dpo/margin_mean": 0.46567538380622864,
|
|
"margin_dpo/margin_std": 0.9440125226974487,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.17233560090702948,
|
|
"fcm_dpo/beta": 1.3131306171417236,
|
|
"fcm_dpo/delta": -0.38322171568870544,
|
|
"fcm_dpo/margin": 0.7164374589920044,
|
|
"fcm_dpo/q_t": 0.32581445574760437,
|
|
"grad_norm": 272.3829040527344,
|
|
"learning_rate": 4.92637770602159e-07,
|
|
"logits/chosen": 0.15654101967811584,
|
|
"logits/rejected": 0.0994241014122963,
|
|
"logps/chosen": -67.11263275146484,
|
|
"logps/ref_chosen": -66.33277130126953,
|
|
"logps/ref_rejected": -71.61489868164062,
|
|
"logps/rejected": -73.1112060546875,
|
|
"loss": 0.9336,
|
|
"margin_dpo/margin_mean": 0.716437578201294,
|
|
"margin_dpo/margin_std": 0.9500221610069275,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 1.2982274293899536,
|
|
"fcm_dpo/delta": -0.07483263313770294,
|
|
"fcm_dpo/margin": 0.5153573751449585,
|
|
"fcm_dpo/q_t": 0.3759816586971283,
|
|
"grad_norm": 347.9417724609375,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 0.11214175820350647,
|
|
"logits/rejected": 0.05748974159359932,
|
|
"logps/chosen": -56.71136474609375,
|
|
"logps/ref_chosen": -55.74903869628906,
|
|
"logps/ref_rejected": -79.59849548339844,
|
|
"logps/rejected": -81.07618713378906,
|
|
"loss": 1.0579,
|
|
"margin_dpo/margin_mean": 0.5153576135635376,
|
|
"margin_dpo/margin_std": 0.8605426549911499,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17535903250188964,
|
|
"fcm_dpo/beta": 1.2686808109283447,
|
|
"fcm_dpo/delta": -0.09204667806625366,
|
|
"fcm_dpo/margin": 0.5405768752098083,
|
|
"fcm_dpo/q_t": 0.3639325499534607,
|
|
"grad_norm": 282.1678771972656,
|
|
"learning_rate": 4.91987175349089e-07,
|
|
"logits/chosen": 0.11463910341262817,
|
|
"logits/rejected": 0.054369859397411346,
|
|
"logps/chosen": -50.27439880371094,
|
|
"logps/ref_chosen": -49.36516571044922,
|
|
"logps/ref_rejected": -72.84671020507812,
|
|
"logps/rejected": -74.2965087890625,
|
|
"loss": 1.0219,
|
|
"margin_dpo/margin_mean": 0.5405769348144531,
|
|
"margin_dpo/margin_std": 0.7944033145904541,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17687074829931973,
|
|
"fcm_dpo/beta": 1.2446372509002686,
|
|
"fcm_dpo/delta": -0.007452197372913361,
|
|
"fcm_dpo/margin": 0.4859652817249298,
|
|
"fcm_dpo/q_t": 0.3739723563194275,
|
|
"grad_norm": 297.24615478515625,
|
|
"learning_rate": 4.916517197732933e-07,
|
|
"logits/chosen": 0.13651910424232483,
|
|
"logits/rejected": 0.10319022834300995,
|
|
"logps/chosen": -58.53379821777344,
|
|
"logps/ref_chosen": -57.710899353027344,
|
|
"logps/ref_rejected": -69.77253723144531,
|
|
"logps/rejected": -71.08141326904297,
|
|
"loss": 1.117,
|
|
"margin_dpo/margin_mean": 0.48596563935279846,
|
|
"margin_dpo/margin_std": 0.8542994260787964,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17838246409674982,
|
|
"fcm_dpo/beta": 1.234614610671997,
|
|
"fcm_dpo/delta": -0.08241216838359833,
|
|
"fcm_dpo/margin": 0.5473380088806152,
|
|
"fcm_dpo/q_t": 0.36874717473983765,
|
|
"grad_norm": 312.6552734375,
|
|
"learning_rate": 4.913095046794281e-07,
|
|
"logits/chosen": 0.2066066563129425,
|
|
"logits/rejected": 0.16842034459114075,
|
|
"logps/chosen": -53.291236877441406,
|
|
"logps/ref_chosen": -52.479896545410156,
|
|
"logps/ref_rejected": -81.359130859375,
|
|
"logps/rejected": -82.71780395507812,
|
|
"loss": 1.0609,
|
|
"margin_dpo/margin_mean": 0.547337532043457,
|
|
"margin_dpo/margin_std": 0.8634383678436279,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17989417989417988,
|
|
"fcm_dpo/beta": 1.2622461318969727,
|
|
"fcm_dpo/delta": 0.11250065267086029,
|
|
"fcm_dpo/margin": 0.3900427222251892,
|
|
"fcm_dpo/q_t": 0.40436500310897827,
|
|
"grad_norm": 346.1368408203125,
|
|
"learning_rate": 4.909605396399855e-07,
|
|
"logits/chosen": 0.08708840608596802,
|
|
"logits/rejected": 0.053689248859882355,
|
|
"logps/chosen": -62.44677734375,
|
|
"logps/ref_chosen": -61.35767364501953,
|
|
"logps/ref_rejected": -75.71510314941406,
|
|
"logps/rejected": -77.19424438476562,
|
|
"loss": 1.2627,
|
|
"margin_dpo/margin_mean": 0.3900427222251892,
|
|
"margin_dpo/margin_std": 0.9560626745223999,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 1.2444019317626953,
|
|
"fcm_dpo/delta": -0.11569002270698547,
|
|
"fcm_dpo/margin": 0.5686198472976685,
|
|
"fcm_dpo/q_t": 0.3610965609550476,
|
|
"grad_norm": 287.4088439941406,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 0.10341674834489822,
|
|
"logits/rejected": 0.04983753338456154,
|
|
"logps/chosen": -60.8148078918457,
|
|
"logps/ref_chosen": -59.907569885253906,
|
|
"logps/ref_rejected": -79.6910629272461,
|
|
"logps/rejected": -81.16691589355469,
|
|
"loss": 0.9954,
|
|
"margin_dpo/margin_mean": 0.5686200261116028,
|
|
"margin_dpo/margin_std": 0.7998465299606323,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.18291761148904007,
|
|
"fcm_dpo/beta": 1.2363691329956055,
|
|
"fcm_dpo/delta": 0.008508594706654549,
|
|
"fcm_dpo/margin": 0.4788138270378113,
|
|
"fcm_dpo/q_t": 0.38364630937576294,
|
|
"grad_norm": 282.2060546875,
|
|
"learning_rate": 4.902423989581143e-07,
|
|
"logits/chosen": 0.21674856543540955,
|
|
"logits/rejected": 0.13925334811210632,
|
|
"logps/chosen": -56.681575775146484,
|
|
"logps/ref_chosen": -55.66604232788086,
|
|
"logps/ref_rejected": -101.56233978271484,
|
|
"logps/rejected": -103.05668640136719,
|
|
"loss": 1.0875,
|
|
"margin_dpo/margin_mean": 0.47881391644477844,
|
|
"margin_dpo/margin_std": 0.8130632638931274,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.18442932728647016,
|
|
"fcm_dpo/beta": 1.1877247095108032,
|
|
"fcm_dpo/delta": -0.3032481074333191,
|
|
"fcm_dpo/margin": 0.7386814951896667,
|
|
"fcm_dpo/q_t": 0.33100205659866333,
|
|
"grad_norm": 291.67059326171875,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 0.11649955809116364,
|
|
"logits/rejected": 0.08512992411851883,
|
|
"logps/chosen": -64.32618713378906,
|
|
"logps/ref_chosen": -63.334373474121094,
|
|
"logps/ref_rejected": -73.67523193359375,
|
|
"logps/rejected": -75.40573120117188,
|
|
"loss": 0.9224,
|
|
"margin_dpo/margin_mean": 0.7386811971664429,
|
|
"margin_dpo/margin_std": 0.961184024810791,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18594104308390022,
|
|
"fcm_dpo/beta": 1.1708195209503174,
|
|
"fcm_dpo/delta": -0.07181403040885925,
|
|
"fcm_dpo/margin": 0.5666613578796387,
|
|
"fcm_dpo/q_t": 0.3692570924758911,
|
|
"grad_norm": 300.7272033691406,
|
|
"learning_rate": 4.894973780788722e-07,
|
|
"logits/chosen": 0.10572931170463562,
|
|
"logits/rejected": 0.06912669539451599,
|
|
"logps/chosen": -57.7982177734375,
|
|
"logps/ref_chosen": -56.89874267578125,
|
|
"logps/ref_rejected": -78.97028350830078,
|
|
"logps/rejected": -80.4364242553711,
|
|
"loss": 1.0918,
|
|
"margin_dpo/margin_mean": 0.5666618347167969,
|
|
"margin_dpo/margin_std": 0.9293673038482666,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.1874527588813303,
|
|
"fcm_dpo/beta": 1.1005598306655884,
|
|
"fcm_dpo/delta": -0.28181761503219604,
|
|
"fcm_dpo/margin": 0.7790488004684448,
|
|
"fcm_dpo/q_t": 0.32957297563552856,
|
|
"grad_norm": 234.5536346435547,
|
|
"learning_rate": 4.89114813497619e-07,
|
|
"logits/chosen": 0.15644077956676483,
|
|
"logits/rejected": 0.10393092036247253,
|
|
"logps/chosen": -58.090946197509766,
|
|
"logps/ref_chosen": -57.116085052490234,
|
|
"logps/ref_rejected": -87.93074035644531,
|
|
"logps/rejected": -89.68465423583984,
|
|
"loss": 0.9107,
|
|
"margin_dpo/margin_mean": 0.7790486812591553,
|
|
"margin_dpo/margin_std": 0.9213578701019287,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 1.066893458366394,
|
|
"fcm_dpo/delta": -0.0766465961933136,
|
|
"fcm_dpo/margin": 0.628391683101654,
|
|
"fcm_dpo/q_t": 0.36512354016304016,
|
|
"grad_norm": 266.8402099609375,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 0.16830167174339294,
|
|
"logits/rejected": 0.11665754020214081,
|
|
"logps/chosen": -66.76890563964844,
|
|
"logps/ref_chosen": -65.7061767578125,
|
|
"logps/ref_rejected": -91.72711944580078,
|
|
"logps/rejected": -93.41824340820312,
|
|
"loss": 1.0499,
|
|
"margin_dpo/margin_mean": 0.6283919215202332,
|
|
"margin_dpo/margin_std": 0.96758633852005,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"fcm_dpo/beta": 1.0625847578048706,
|
|
"fcm_dpo/delta": -0.014553094282746315,
|
|
"fcm_dpo/margin": 0.5772075653076172,
|
|
"fcm_dpo/q_t": 0.390198290348053,
|
|
"grad_norm": 249.5762176513672,
|
|
"learning_rate": 4.883296295573176e-07,
|
|
"logits/chosen": 0.007961070165038109,
|
|
"logits/rejected": 0.001765979453921318,
|
|
"logps/chosen": -69.1175308227539,
|
|
"logps/ref_chosen": -68.17608642578125,
|
|
"logps/ref_rejected": -65.1175537109375,
|
|
"logps/rejected": -66.63619995117188,
|
|
"loss": 1.1388,
|
|
"margin_dpo/margin_mean": 0.577207624912262,
|
|
"margin_dpo/margin_std": 1.1864867210388184,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.19198790627362056,
|
|
"fcm_dpo/beta": 1.034506916999817,
|
|
"fcm_dpo/delta": -0.14043903350830078,
|
|
"fcm_dpo/margin": 0.703222393989563,
|
|
"fcm_dpo/q_t": 0.3558955490589142,
|
|
"grad_norm": 248.7368621826172,
|
|
"learning_rate": 4.87927032161552e-07,
|
|
"logits/chosen": 0.11731210350990295,
|
|
"logits/rejected": 0.08830760419368744,
|
|
"logps/chosen": -62.924774169921875,
|
|
"logps/ref_chosen": -61.88023376464844,
|
|
"logps/ref_rejected": -68.46012878417969,
|
|
"logps/rejected": -70.20789337158203,
|
|
"loss": 0.9918,
|
|
"margin_dpo/margin_mean": 0.7032221555709839,
|
|
"margin_dpo/margin_std": 0.9526394605636597,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.19349962207105065,
|
|
"fcm_dpo/beta": 1.027363896369934,
|
|
"fcm_dpo/delta": -0.016578957438468933,
|
|
"fcm_dpo/margin": 0.5983456373214722,
|
|
"fcm_dpo/q_t": 0.37779900431632996,
|
|
"grad_norm": 250.40814208984375,
|
|
"learning_rate": 4.875177794352363e-07,
|
|
"logits/chosen": 0.14589917659759521,
|
|
"logits/rejected": 0.09506042301654816,
|
|
"logps/chosen": -67.86463165283203,
|
|
"logps/ref_chosen": -66.708984375,
|
|
"logps/ref_rejected": -94.97969055175781,
|
|
"logps/rejected": -96.73368835449219,
|
|
"loss": 1.1327,
|
|
"margin_dpo/margin_mean": 0.5983456373214722,
|
|
"margin_dpo/margin_std": 1.1081733703613281,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.19501133786848074,
|
|
"fcm_dpo/beta": 1.0448354482650757,
|
|
"fcm_dpo/delta": 0.09077582508325577,
|
|
"fcm_dpo/margin": 0.4919281601905823,
|
|
"fcm_dpo/q_t": 0.3963082730770111,
|
|
"grad_norm": 280.36907958984375,
|
|
"learning_rate": 4.871018828260491e-07,
|
|
"logits/chosen": 0.11568663269281387,
|
|
"logits/rejected": 0.10815045237541199,
|
|
"logps/chosen": -66.55685424804688,
|
|
"logps/ref_chosen": -65.33882904052734,
|
|
"logps/ref_rejected": -68.06109619140625,
|
|
"logps/rejected": -69.77104187011719,
|
|
"loss": 1.1198,
|
|
"margin_dpo/margin_mean": 0.4919281303882599,
|
|
"margin_dpo/margin_std": 0.8836801052093506,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 1.020601749420166,
|
|
"fcm_dpo/delta": -0.08440172672271729,
|
|
"fcm_dpo/margin": 0.6577671766281128,
|
|
"fcm_dpo/q_t": 0.36100125312805176,
|
|
"grad_norm": 262.4644470214844,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 0.0954977497458458,
|
|
"logits/rejected": 0.051679350435733795,
|
|
"logps/chosen": -59.74810791015625,
|
|
"logps/ref_chosen": -58.660743713378906,
|
|
"logps/ref_rejected": -79.24510192871094,
|
|
"logps/rejected": -80.990234375,
|
|
"loss": 0.9852,
|
|
"margin_dpo/margin_mean": 0.6577669382095337,
|
|
"margin_dpo/margin_std": 0.7911086082458496,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1980347694633409,
|
|
"fcm_dpo/beta": 1.0078396797180176,
|
|
"fcm_dpo/delta": -0.15889500081539154,
|
|
"fcm_dpo/margin": 0.7407888770103455,
|
|
"fcm_dpo/q_t": 0.3538159728050232,
|
|
"grad_norm": 228.83726501464844,
|
|
"learning_rate": 4.86250204678667e-07,
|
|
"logits/chosen": 0.09853261709213257,
|
|
"logits/rejected": 0.04450761526823044,
|
|
"logps/chosen": -53.55117416381836,
|
|
"logps/ref_chosen": -52.51453399658203,
|
|
"logps/ref_rejected": -85.18299865722656,
|
|
"logps/rejected": -86.96041870117188,
|
|
"loss": 1.0013,
|
|
"margin_dpo/margin_mean": 0.7407891750335693,
|
|
"margin_dpo/margin_std": 1.04507577419281,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19954648526077098,
|
|
"fcm_dpo/beta": 1.000880479812622,
|
|
"fcm_dpo/delta": -0.013521028682589531,
|
|
"fcm_dpo/margin": 0.612145185470581,
|
|
"fcm_dpo/q_t": 0.37782806158065796,
|
|
"grad_norm": 272.0556640625,
|
|
"learning_rate": 4.858144469637408e-07,
|
|
"logits/chosen": 0.17530867457389832,
|
|
"logits/rejected": 0.14589731395244598,
|
|
"logps/chosen": -66.89759826660156,
|
|
"logps/ref_chosen": -65.68513488769531,
|
|
"logps/ref_rejected": -69.54120635986328,
|
|
"logps/rejected": -71.36582946777344,
|
|
"loss": 1.1305,
|
|
"margin_dpo/margin_mean": 0.6121453046798706,
|
|
"margin_dpo/margin_std": 1.148574709892273,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.20105820105820105,
|
|
"fcm_dpo/beta": 1.0055501461029053,
|
|
"fcm_dpo/delta": 0.04156200587749481,
|
|
"fcm_dpo/margin": 0.557662844657898,
|
|
"fcm_dpo/q_t": 0.38376274704933167,
|
|
"grad_norm": 264.5892639160156,
|
|
"learning_rate": 4.853720930118138e-07,
|
|
"logits/chosen": 0.1060660257935524,
|
|
"logits/rejected": 0.09657873213291168,
|
|
"logps/chosen": -64.78939819335938,
|
|
"logps/ref_chosen": -63.598114013671875,
|
|
"logps/ref_rejected": -73.72798156738281,
|
|
"logps/rejected": -75.4769287109375,
|
|
"loss": 1.108,
|
|
"margin_dpo/margin_mean": 0.5576624870300293,
|
|
"margin_dpo/margin_std": 0.9732006192207336,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.20256991685563114,
|
|
"fcm_dpo/beta": 0.9682743549346924,
|
|
"fcm_dpo/delta": -0.23356372117996216,
|
|
"fcm_dpo/margin": 0.8395987153053284,
|
|
"fcm_dpo/q_t": 0.3344127833843231,
|
|
"grad_norm": 208.65017700195312,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": 0.20197008550167084,
|
|
"logits/rejected": 0.15169410407543182,
|
|
"logps/chosen": -54.892337799072266,
|
|
"logps/ref_chosen": -53.79457092285156,
|
|
"logps/ref_rejected": -74.16741943359375,
|
|
"logps/rejected": -76.10478973388672,
|
|
"loss": 0.9149,
|
|
"margin_dpo/margin_mean": 0.8395991325378418,
|
|
"margin_dpo/margin_std": 0.9888654351234436,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 0.9734876751899719,
|
|
"fcm_dpo/delta": 0.10381930321455002,
|
|
"fcm_dpo/margin": 0.5151646137237549,
|
|
"fcm_dpo/q_t": 0.4011284112930298,
|
|
"grad_norm": 222.495849609375,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 0.13704870641231537,
|
|
"logits/rejected": 0.10613438487052917,
|
|
"logps/chosen": -50.54332733154297,
|
|
"logps/ref_chosen": -49.441078186035156,
|
|
"logps/ref_rejected": -65.96878051757812,
|
|
"logps/rejected": -67.58619689941406,
|
|
"loss": 1.1536,
|
|
"margin_dpo/margin_mean": 0.5151640176773071,
|
|
"margin_dpo/margin_std": 1.0082441568374634,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.20559334845049132,
|
|
"fcm_dpo/beta": 0.9727784395217896,
|
|
"fcm_dpo/delta": -0.06393898278474808,
|
|
"fcm_dpo/margin": 0.6782030463218689,
|
|
"fcm_dpo/q_t": 0.37919890880584717,
|
|
"grad_norm": 289.11376953125,
|
|
"learning_rate": 4.840055783904106e-07,
|
|
"logits/chosen": 0.1222413182258606,
|
|
"logits/rejected": 0.06046907976269722,
|
|
"logps/chosen": -68.07365417480469,
|
|
"logps/ref_chosen": -66.75926208496094,
|
|
"logps/ref_rejected": -94.61787414550781,
|
|
"logps/rejected": -96.61046600341797,
|
|
"loss": 1.178,
|
|
"margin_dpo/margin_mean": 0.6782038807868958,
|
|
"margin_dpo/margin_std": 1.3909220695495605,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.20710506424792138,
|
|
"fcm_dpo/beta": 0.9643727540969849,
|
|
"fcm_dpo/delta": -0.053648076951503754,
|
|
"fcm_dpo/margin": 0.6741567850112915,
|
|
"fcm_dpo/q_t": 0.37186992168426514,
|
|
"grad_norm": 218.29730224609375,
|
|
"learning_rate": 4.835369650662767e-07,
|
|
"logits/chosen": 0.14991703629493713,
|
|
"logits/rejected": 0.12426199018955231,
|
|
"logps/chosen": -57.97966766357422,
|
|
"logps/ref_chosen": -56.78379821777344,
|
|
"logps/ref_rejected": -69.89952087402344,
|
|
"logps/rejected": -71.76954650878906,
|
|
"loss": 1.1013,
|
|
"margin_dpo/margin_mean": 0.6741572618484497,
|
|
"margin_dpo/margin_std": 1.1703739166259766,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.20861678004535147,
|
|
"fcm_dpo/beta": 0.978662371635437,
|
|
"fcm_dpo/delta": 0.12415525317192078,
|
|
"fcm_dpo/margin": 0.49200883507728577,
|
|
"fcm_dpo/q_t": 0.4015718102455139,
|
|
"grad_norm": 254.64759826660156,
|
|
"learning_rate": 4.830618192112065e-07,
|
|
"logits/chosen": 0.1492946743965149,
|
|
"logits/rejected": 0.11607992649078369,
|
|
"logps/chosen": -60.26990509033203,
|
|
"logps/ref_chosen": -58.766014099121094,
|
|
"logps/ref_rejected": -68.12371826171875,
|
|
"logps/rejected": -70.11962890625,
|
|
"loss": 1.2273,
|
|
"margin_dpo/margin_mean": 0.49200862646102905,
|
|
"margin_dpo/margin_std": 1.141247034072876,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.21012849584278157,
|
|
"fcm_dpo/beta": 0.9795228242874146,
|
|
"fcm_dpo/delta": -0.11077337712049484,
|
|
"fcm_dpo/margin": 0.7158379554748535,
|
|
"fcm_dpo/q_t": 0.3632628917694092,
|
|
"grad_norm": 245.56246948242188,
|
|
"learning_rate": 4.825801541160509e-07,
|
|
"logits/chosen": 0.10423211008310318,
|
|
"logits/rejected": 0.07835687696933746,
|
|
"logps/chosen": -72.62336730957031,
|
|
"logps/ref_chosen": -71.2255859375,
|
|
"logps/ref_rejected": -82.1834716796875,
|
|
"logps/rejected": -84.29708862304688,
|
|
"loss": 1.0295,
|
|
"margin_dpo/margin_mean": 0.7158380746841431,
|
|
"margin_dpo/margin_std": 1.026993751525879,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 0.9235955476760864,
|
|
"fcm_dpo/delta": -0.28677302598953247,
|
|
"fcm_dpo/margin": 0.9338580369949341,
|
|
"fcm_dpo/q_t": 0.3302295506000519,
|
|
"grad_norm": 248.67660522460938,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 0.11341448128223419,
|
|
"logits/rejected": 0.07324690371751785,
|
|
"logps/chosen": -64.4937973022461,
|
|
"logps/ref_chosen": -63.27766418457031,
|
|
"logps/ref_rejected": -83.30647277832031,
|
|
"logps/rejected": -85.45646667480469,
|
|
"loss": 1.0038,
|
|
"margin_dpo/margin_mean": 0.9338581562042236,
|
|
"margin_dpo/margin_std": 1.3346967697143555,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21315192743764172,
|
|
"fcm_dpo/beta": 0.8918517827987671,
|
|
"fcm_dpo/delta": -0.07686886191368103,
|
|
"fcm_dpo/margin": 0.7517862319946289,
|
|
"fcm_dpo/q_t": 0.3738592863082886,
|
|
"grad_norm": 238.80625915527344,
|
|
"learning_rate": 4.815973202802966e-07,
|
|
"logits/chosen": 0.14074575901031494,
|
|
"logits/rejected": 0.10288909077644348,
|
|
"logps/chosen": -63.11772155761719,
|
|
"logps/ref_chosen": -61.76676940917969,
|
|
"logps/ref_rejected": -88.60601806640625,
|
|
"logps/rejected": -90.7087631225586,
|
|
"loss": 1.0866,
|
|
"margin_dpo/margin_mean": 0.7517873048782349,
|
|
"margin_dpo/margin_std": 1.2767117023468018,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2146636432350718,
|
|
"fcm_dpo/beta": 0.9092315435409546,
|
|
"fcm_dpo/delta": 0.09595802426338196,
|
|
"fcm_dpo/margin": 0.5593085885047913,
|
|
"fcm_dpo/q_t": 0.39656299352645874,
|
|
"grad_norm": 221.80491638183594,
|
|
"learning_rate": 4.810961790316729e-07,
|
|
"logits/chosen": 0.11350887268781662,
|
|
"logits/rejected": 0.09118533134460449,
|
|
"logps/chosen": -66.62858581542969,
|
|
"logps/ref_chosen": -65.2747802734375,
|
|
"logps/ref_rejected": -81.1378173828125,
|
|
"logps/rejected": -83.05094146728516,
|
|
"loss": 1.1396,
|
|
"margin_dpo/margin_mean": 0.5593085289001465,
|
|
"margin_dpo/margin_std": 1.056554913520813,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2161753590325019,
|
|
"fcm_dpo/beta": 0.9147884845733643,
|
|
"fcm_dpo/delta": 0.02730133756995201,
|
|
"fcm_dpo/margin": 0.6277639865875244,
|
|
"fcm_dpo/q_t": 0.3815556764602661,
|
|
"grad_norm": 275.406982421875,
|
|
"learning_rate": 4.805885735261454e-07,
|
|
"logits/chosen": 0.13471481204032898,
|
|
"logits/rejected": 0.11962398886680603,
|
|
"logps/chosen": -63.97998046875,
|
|
"logps/ref_chosen": -62.617828369140625,
|
|
"logps/ref_rejected": -70.39239501953125,
|
|
"logps/rejected": -72.38230895996094,
|
|
"loss": 1.135,
|
|
"margin_dpo/margin_mean": 0.6277639865875244,
|
|
"margin_dpo/margin_std": 1.1966910362243652,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21768707482993196,
|
|
"fcm_dpo/beta": 0.9090726375579834,
|
|
"fcm_dpo/delta": -0.06992662698030472,
|
|
"fcm_dpo/margin": 0.7318644523620605,
|
|
"fcm_dpo/q_t": 0.37165123224258423,
|
|
"grad_norm": 247.63697814941406,
|
|
"learning_rate": 4.800745179625307e-07,
|
|
"logits/chosen": 0.11681709438562393,
|
|
"logits/rejected": 0.09179212152957916,
|
|
"logps/chosen": -62.25020980834961,
|
|
"logps/ref_chosen": -60.80268859863281,
|
|
"logps/ref_rejected": -79.07284545898438,
|
|
"logps/rejected": -81.25222778320312,
|
|
"loss": 1.1048,
|
|
"margin_dpo/margin_mean": 0.7318645715713501,
|
|
"margin_dpo/margin_std": 1.285665512084961,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 0.8981227874755859,
|
|
"fcm_dpo/delta": -0.06589463353157043,
|
|
"fcm_dpo/margin": 0.736532986164093,
|
|
"fcm_dpo/q_t": 0.3731478452682495,
|
|
"grad_norm": 267.699951171875,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 0.06849057227373123,
|
|
"logits/rejected": 0.08533404767513275,
|
|
"logps/chosen": -75.94109344482422,
|
|
"logps/ref_chosen": -74.61146545410156,
|
|
"logps/ref_rejected": -83.24461364746094,
|
|
"logps/rejected": -85.31077575683594,
|
|
"loss": 1.1106,
|
|
"margin_dpo/margin_mean": 0.73653244972229,
|
|
"margin_dpo/margin_std": 1.3149917125701904,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22071050642479215,
|
|
"fcm_dpo/beta": 0.8851872086524963,
|
|
"fcm_dpo/delta": -0.06594666093587875,
|
|
"fcm_dpo/margin": 0.7474347352981567,
|
|
"fcm_dpo/q_t": 0.370151549577713,
|
|
"grad_norm": 210.81593322753906,
|
|
"learning_rate": 4.790271143580173e-07,
|
|
"logits/chosen": 0.06502532958984375,
|
|
"logits/rejected": 0.049605365842580795,
|
|
"logps/chosen": -59.10708236694336,
|
|
"logps/ref_chosen": -57.84098434448242,
|
|
"logps/ref_rejected": -67.47422790527344,
|
|
"logps/rejected": -69.48776245117188,
|
|
"loss": 1.0465,
|
|
"margin_dpo/margin_mean": 0.7474343776702881,
|
|
"margin_dpo/margin_std": 1.1844416856765747,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"fcm_dpo/beta": 0.8997819423675537,
|
|
"fcm_dpo/delta": 0.11252903193235397,
|
|
"fcm_dpo/margin": 0.5466242432594299,
|
|
"fcm_dpo/q_t": 0.39630264043807983,
|
|
"grad_norm": 281.1480407714844,
|
|
"learning_rate": 4.784937956152489e-07,
|
|
"logits/chosen": 0.10493030399084091,
|
|
"logits/rejected": 0.06713957339525223,
|
|
"logps/chosen": -68.31733703613281,
|
|
"logps/ref_chosen": -66.81346893310547,
|
|
"logps/ref_rejected": -81.1796875,
|
|
"logps/rejected": -83.23017883300781,
|
|
"loss": 1.2139,
|
|
"margin_dpo/margin_mean": 0.546623945236206,
|
|
"margin_dpo/margin_std": 1.2387210130691528,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2237339380196523,
|
|
"fcm_dpo/beta": 0.8874396085739136,
|
|
"fcm_dpo/delta": -0.14694717526435852,
|
|
"fcm_dpo/margin": 0.8294271230697632,
|
|
"fcm_dpo/q_t": 0.35729408264160156,
|
|
"grad_norm": 183.421875,
|
|
"learning_rate": 4.779540854098347e-07,
|
|
"logits/chosen": 0.21212086081504822,
|
|
"logits/rejected": 0.1479133516550064,
|
|
"logps/chosen": -50.136993408203125,
|
|
"logps/ref_chosen": -48.6877555847168,
|
|
"logps/ref_rejected": -67.50503540039062,
|
|
"logps/rejected": -69.78369903564453,
|
|
"loss": 1.0178,
|
|
"margin_dpo/margin_mean": 0.8294271230697632,
|
|
"margin_dpo/margin_std": 1.2071778774261475,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.2252456538170824,
|
|
"fcm_dpo/beta": 0.8515768051147461,
|
|
"fcm_dpo/delta": -0.18191197514533997,
|
|
"fcm_dpo/margin": 0.9019421935081482,
|
|
"fcm_dpo/q_t": 0.35212624073028564,
|
|
"grad_norm": 195.56973266601562,
|
|
"learning_rate": 4.774079988386296e-07,
|
|
"logits/chosen": 0.05525077506899834,
|
|
"logits/rejected": 0.012519324198365211,
|
|
"logps/chosen": -56.84036636352539,
|
|
"logps/ref_chosen": -55.143775939941406,
|
|
"logps/ref_rejected": -64.79888916015625,
|
|
"logps/rejected": -67.39742279052734,
|
|
"loss": 0.9851,
|
|
"margin_dpo/margin_mean": 0.9019420742988586,
|
|
"margin_dpo/margin_std": 1.2582323551177979,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 0.8074795007705688,
|
|
"fcm_dpo/delta": -0.29154300689697266,
|
|
"fcm_dpo/margin": 1.072885274887085,
|
|
"fcm_dpo/q_t": 0.3229233920574188,
|
|
"grad_norm": 183.36480712890625,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 0.10887187719345093,
|
|
"logits/rejected": 0.07173850387334824,
|
|
"logps/chosen": -68.60828399658203,
|
|
"logps/ref_chosen": -67.47074890136719,
|
|
"logps/ref_rejected": -89.21170806884766,
|
|
"logps/rejected": -91.422119140625,
|
|
"loss": 0.9129,
|
|
"margin_dpo/margin_mean": 1.072885274887085,
|
|
"margin_dpo/margin_std": 1.268955945968628,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22826908541194255,
|
|
"fcm_dpo/beta": 0.7697363495826721,
|
|
"fcm_dpo/delta": -0.23718157410621643,
|
|
"fcm_dpo/margin": 1.0629806518554688,
|
|
"fcm_dpo/q_t": 0.33049389719963074,
|
|
"grad_norm": 152.50540161132812,
|
|
"learning_rate": 4.762967578776406e-07,
|
|
"logits/chosen": 0.1117258220911026,
|
|
"logits/rejected": 0.06622982025146484,
|
|
"logps/chosen": -53.716087341308594,
|
|
"logps/ref_chosen": -52.45954132080078,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -81.38253784179688,
|
|
"loss": 0.8937,
|
|
"margin_dpo/margin_mean": 1.0629799365997314,
|
|
"margin_dpo/margin_std": 1.1620628833770752,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22978080120937264,
|
|
"fcm_dpo/beta": 0.7504318356513977,
|
|
"fcm_dpo/delta": -0.11411969363689423,
|
|
"fcm_dpo/margin": 0.9407040476799011,
|
|
"fcm_dpo/q_t": 0.35981130599975586,
|
|
"grad_norm": 173.67433166503906,
|
|
"learning_rate": 4.757316345716553e-07,
|
|
"logits/chosen": 0.18621572852134705,
|
|
"logits/rejected": 0.1421864628791809,
|
|
"logps/chosen": -58.064674377441406,
|
|
"logps/ref_chosen": -56.5538330078125,
|
|
"logps/ref_rejected": -76.55074310302734,
|
|
"logps/rejected": -79.00228881835938,
|
|
"loss": 1.0133,
|
|
"margin_dpo/margin_mean": 0.9407035112380981,
|
|
"margin_dpo/margin_std": 1.3374671936035156,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.23129251700680273,
|
|
"fcm_dpo/beta": 0.7341737151145935,
|
|
"fcm_dpo/delta": -0.054202862083911896,
|
|
"fcm_dpo/margin": 0.8861300945281982,
|
|
"fcm_dpo/q_t": 0.36215510964393616,
|
|
"grad_norm": 165.9080810546875,
|
|
"learning_rate": 4.751601970666064e-07,
|
|
"logits/chosen": 0.09495476633310318,
|
|
"logits/rejected": 0.060364123433828354,
|
|
"logps/chosen": -69.46710205078125,
|
|
"logps/ref_chosen": -68.00689697265625,
|
|
"logps/ref_rejected": -74.83482360839844,
|
|
"logps/rejected": -77.18115997314453,
|
|
"loss": 0.9848,
|
|
"margin_dpo/margin_mean": 0.8861297369003296,
|
|
"margin_dpo/margin_std": 1.1169935464859009,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2328042328042328,
|
|
"fcm_dpo/beta": 0.741860032081604,
|
|
"fcm_dpo/delta": 0.07353915274143219,
|
|
"fcm_dpo/margin": 0.7147952914237976,
|
|
"fcm_dpo/q_t": 0.39196473360061646,
|
|
"grad_norm": 181.0109405517578,
|
|
"learning_rate": 4.745824613468292e-07,
|
|
"logits/chosen": 0.15605072677135468,
|
|
"logits/rejected": 0.1525098979473114,
|
|
"logps/chosen": -60.95063400268555,
|
|
"logps/ref_chosen": -59.222537994384766,
|
|
"logps/ref_rejected": -64.19131469726562,
|
|
"logps/rejected": -66.63421630859375,
|
|
"loss": 1.1976,
|
|
"margin_dpo/margin_mean": 0.7147954702377319,
|
|
"margin_dpo/margin_std": 1.5419955253601074,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 0.7455950975418091,
|
|
"fcm_dpo/delta": 0.033561088144779205,
|
|
"fcm_dpo/margin": 0.7621853351593018,
|
|
"fcm_dpo/q_t": 0.3880825638771057,
|
|
"grad_norm": 202.052490234375,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 0.17025849223136902,
|
|
"logits/rejected": 0.15225285291671753,
|
|
"logps/chosen": -70.11236572265625,
|
|
"logps/ref_chosen": -68.45469665527344,
|
|
"logps/ref_rejected": -77.91763305664062,
|
|
"logps/rejected": -80.33748626708984,
|
|
"loss": 1.1799,
|
|
"margin_dpo/margin_mean": 0.7621854543685913,
|
|
"margin_dpo/margin_std": 1.5514647960662842,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.23582766439909297,
|
|
"fcm_dpo/beta": 0.7278070449829102,
|
|
"fcm_dpo/delta": -0.19898098707199097,
|
|
"fcm_dpo/margin": 1.0765371322631836,
|
|
"fcm_dpo/q_t": 0.3450552225112915,
|
|
"grad_norm": 177.1094970703125,
|
|
"learning_rate": 4.7340816008085305e-07,
|
|
"logits/chosen": 0.12135711312294006,
|
|
"logits/rejected": 0.08175168931484222,
|
|
"logps/chosen": -68.89403533935547,
|
|
"logps/ref_chosen": -67.26959991455078,
|
|
"logps/ref_rejected": -86.95914459228516,
|
|
"logps/rejected": -89.66011047363281,
|
|
"loss": 0.9381,
|
|
"margin_dpo/margin_mean": 1.0765368938446045,
|
|
"margin_dpo/margin_std": 1.3415881395339966,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.23733938019652306,
|
|
"fcm_dpo/beta": 0.7118189930915833,
|
|
"fcm_dpo/delta": -0.0019494742155075073,
|
|
"fcm_dpo/margin": 0.8422449827194214,
|
|
"fcm_dpo/q_t": 0.37625086307525635,
|
|
"grad_norm": 170.37062072753906,
|
|
"learning_rate": 4.728116273823847e-07,
|
|
"logits/chosen": 0.10696057975292206,
|
|
"logits/rejected": 0.08737646043300629,
|
|
"logps/chosen": -56.2518424987793,
|
|
"logps/ref_chosen": -54.77287292480469,
|
|
"logps/ref_rejected": -63.87866973876953,
|
|
"logps/rejected": -66.19988250732422,
|
|
"loss": 1.068,
|
|
"margin_dpo/margin_mean": 0.8422449827194214,
|
|
"margin_dpo/margin_std": 1.3212616443634033,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23885109599395313,
|
|
"fcm_dpo/beta": 0.7147585153579712,
|
|
"fcm_dpo/delta": -0.01963052526116371,
|
|
"fcm_dpo/margin": 0.8647520542144775,
|
|
"fcm_dpo/q_t": 0.3729850649833679,
|
|
"grad_norm": 183.34059143066406,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 0.12363462150096893,
|
|
"logits/rejected": 0.09393608570098877,
|
|
"logps/chosen": -66.52328491210938,
|
|
"logps/ref_chosen": -64.92271423339844,
|
|
"logps/ref_rejected": -82.23789978027344,
|
|
"logps/rejected": -84.70321655273438,
|
|
"loss": 1.0499,
|
|
"margin_dpo/margin_mean": 0.8647524118423462,
|
|
"margin_dpo/margin_std": 1.3220181465148926,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.24036281179138322,
|
|
"fcm_dpo/beta": 0.721579909324646,
|
|
"fcm_dpo/delta": -0.027753673493862152,
|
|
"fcm_dpo/margin": 0.864780068397522,
|
|
"fcm_dpo/q_t": 0.36861127614974976,
|
|
"grad_norm": 200.0271453857422,
|
|
"learning_rate": 4.715998812855304e-07,
|
|
"logits/chosen": 0.15346220135688782,
|
|
"logits/rejected": 0.12215955555438995,
|
|
"logps/chosen": -58.74644470214844,
|
|
"logps/ref_chosen": -57.046993255615234,
|
|
"logps/ref_rejected": -73.32441711425781,
|
|
"logps/rejected": -75.88864135742188,
|
|
"loss": 1.0532,
|
|
"margin_dpo/margin_mean": 0.8647797107696533,
|
|
"margin_dpo/margin_std": 1.29610013961792,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 0.7163674831390381,
|
|
"fcm_dpo/delta": 0.023492824286222458,
|
|
"fcm_dpo/margin": 0.8065335154533386,
|
|
"fcm_dpo/q_t": 0.38599950075149536,
|
|
"grad_norm": 164.5467071533203,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": 0.02466520667076111,
|
|
"logits/rejected": -0.009153635241091251,
|
|
"logps/chosen": -51.687294006347656,
|
|
"logps/ref_chosen": -49.806915283203125,
|
|
"logps/ref_rejected": -68.3370132446289,
|
|
"logps/rejected": -71.02392578125,
|
|
"loss": 1.0989,
|
|
"margin_dpo/margin_mean": 0.806533694267273,
|
|
"margin_dpo/margin_std": 1.4155818223953247,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.24338624338624337,
|
|
"fcm_dpo/beta": 0.7120651006698608,
|
|
"fcm_dpo/delta": -0.07392075657844543,
|
|
"fcm_dpo/margin": 0.9392312169075012,
|
|
"fcm_dpo/q_t": 0.37427765130996704,
|
|
"grad_norm": 163.46099853515625,
|
|
"learning_rate": 4.703633408618955e-07,
|
|
"logits/chosen": 0.13712120056152344,
|
|
"logits/rejected": 0.1043066680431366,
|
|
"logps/chosen": -54.344390869140625,
|
|
"logps/ref_chosen": -52.50048828125,
|
|
"logps/ref_rejected": -66.04540252685547,
|
|
"logps/rejected": -68.82853698730469,
|
|
"loss": 1.066,
|
|
"margin_dpo/margin_mean": 0.9392315149307251,
|
|
"margin_dpo/margin_std": 1.528224229812622,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.24489795918367346,
|
|
"fcm_dpo/beta": 0.674642026424408,
|
|
"fcm_dpo/delta": -0.27861732244491577,
|
|
"fcm_dpo/margin": 1.2653286457061768,
|
|
"fcm_dpo/q_t": 0.3272291421890259,
|
|
"grad_norm": 152.6450653076172,
|
|
"learning_rate": 4.697358159051549e-07,
|
|
"logits/chosen": 0.18720999360084534,
|
|
"logits/rejected": 0.14557072520256042,
|
|
"logps/chosen": -71.4220962524414,
|
|
"logps/ref_chosen": -69.46919250488281,
|
|
"logps/ref_rejected": -92.00952911376953,
|
|
"logps/rejected": -95.22775268554688,
|
|
"loss": 0.9046,
|
|
"margin_dpo/margin_mean": 1.2653292417526245,
|
|
"margin_dpo/margin_std": 1.4768035411834717,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.24640967498110355,
|
|
"fcm_dpo/beta": 0.6542046070098877,
|
|
"fcm_dpo/delta": -0.20735059678554535,
|
|
"fcm_dpo/margin": 1.2088186740875244,
|
|
"fcm_dpo/q_t": 0.34692829847335815,
|
|
"grad_norm": 157.87530517578125,
|
|
"learning_rate": 4.691021444652876e-07,
|
|
"logits/chosen": 0.12066847085952759,
|
|
"logits/rejected": 0.08132156729698181,
|
|
"logps/chosen": -52.40902328491211,
|
|
"logps/ref_chosen": -50.613834381103516,
|
|
"logps/ref_rejected": -74.62033081054688,
|
|
"logps/rejected": -77.62433624267578,
|
|
"loss": 0.9751,
|
|
"margin_dpo/margin_mean": 1.2088189125061035,
|
|
"margin_dpo/margin_std": 1.6171760559082031,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24792139077853365,
|
|
"fcm_dpo/beta": 0.6249392032623291,
|
|
"fcm_dpo/delta": -0.18022370338439941,
|
|
"fcm_dpo/margin": 1.226841926574707,
|
|
"fcm_dpo/q_t": 0.3479166030883789,
|
|
"grad_norm": 136.3046112060547,
|
|
"learning_rate": 4.6846234426744624e-07,
|
|
"logits/chosen": 0.11843805015087128,
|
|
"logits/rejected": 0.0651586502790451,
|
|
"logps/chosen": -56.87559509277344,
|
|
"logps/ref_chosen": -54.848114013671875,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -82.31733703613281,
|
|
"loss": 0.9642,
|
|
"margin_dpo/margin_mean": 1.226841926574707,
|
|
"margin_dpo/margin_std": 1.567777395248413,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 0.6082509160041809,
|
|
"fcm_dpo/delta": -0.1315651535987854,
|
|
"fcm_dpo/margin": 1.187391757965088,
|
|
"fcm_dpo/q_t": 0.34501856565475464,
|
|
"grad_norm": 124.84252166748047,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 0.15890663862228394,
|
|
"logits/rejected": 0.11084471642971039,
|
|
"logps/chosen": -53.137123107910156,
|
|
"logps/ref_chosen": -51.089210510253906,
|
|
"logps/ref_rejected": -71.23370361328125,
|
|
"logps/rejected": -74.46900177001953,
|
|
"loss": 0.9169,
|
|
"margin_dpo/margin_mean": 1.1873915195465088,
|
|
"margin_dpo/margin_std": 1.251267433166504,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.2509448223733938,
|
|
"fcm_dpo/beta": 0.610527753829956,
|
|
"fcm_dpo/delta": 0.0991782620549202,
|
|
"fcm_dpo/margin": 0.8285017609596252,
|
|
"fcm_dpo/q_t": 0.3985018730163574,
|
|
"grad_norm": 163.606689453125,
|
|
"learning_rate": 4.6716442935512214e-07,
|
|
"logits/chosen": 0.12126700580120087,
|
|
"logits/rejected": 0.05116554722189903,
|
|
"logps/chosen": -65.15718841552734,
|
|
"logps/ref_chosen": -63.19081115722656,
|
|
"logps/ref_rejected": -93.8402099609375,
|
|
"logps/rejected": -96.63508605957031,
|
|
"loss": 1.1193,
|
|
"margin_dpo/margin_mean": 0.8285011053085327,
|
|
"margin_dpo/margin_std": 1.486143946647644,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.25245653817082386,
|
|
"fcm_dpo/beta": 0.5978639721870422,
|
|
"fcm_dpo/delta": -0.133498877286911,
|
|
"fcm_dpo/margin": 1.2082691192626953,
|
|
"fcm_dpo/q_t": 0.3483770489692688,
|
|
"grad_norm": 123.5165023803711,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": 0.07217014580965042,
|
|
"logits/rejected": 0.041656773537397385,
|
|
"logps/chosen": -60.747589111328125,
|
|
"logps/ref_chosen": -58.92427062988281,
|
|
"logps/ref_rejected": -72.97377014160156,
|
|
"logps/rejected": -76.00535583496094,
|
|
"loss": 0.9345,
|
|
"margin_dpo/margin_mean": 1.208269476890564,
|
|
"margin_dpo/margin_std": 1.390377163887024,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"fcm_dpo/beta": 0.6161779165267944,
|
|
"fcm_dpo/delta": 0.20701685547828674,
|
|
"fcm_dpo/margin": 0.6512250900268555,
|
|
"fcm_dpo/q_t": 0.4141233265399933,
|
|
"grad_norm": 171.8645477294922,
|
|
"learning_rate": 4.6584221638904767e-07,
|
|
"logits/chosen": 0.1256096065044403,
|
|
"logits/rejected": 0.09992053359746933,
|
|
"logps/chosen": -67.95152282714844,
|
|
"logps/ref_chosen": -65.65138244628906,
|
|
"logps/ref_rejected": -79.71418762207031,
|
|
"logps/rejected": -82.66555786132812,
|
|
"loss": 1.1715,
|
|
"margin_dpo/margin_mean": 0.651225209236145,
|
|
"margin_dpo/margin_std": 1.301137089729309,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.25547996976568405,
|
|
"fcm_dpo/beta": 0.6163840889930725,
|
|
"fcm_dpo/delta": -0.07705948501825333,
|
|
"fcm_dpo/margin": 1.090137004852295,
|
|
"fcm_dpo/q_t": 0.372364342212677,
|
|
"grad_norm": 154.63226318359375,
|
|
"learning_rate": 4.651720442612075e-07,
|
|
"logits/chosen": 0.17598003149032593,
|
|
"logits/rejected": 0.1469280868768692,
|
|
"logps/chosen": -63.419395446777344,
|
|
"logps/ref_chosen": -61.425865173339844,
|
|
"logps/ref_rejected": -76.09590148925781,
|
|
"logps/rejected": -79.1795654296875,
|
|
"loss": 1.0473,
|
|
"margin_dpo/margin_mean": 1.090137243270874,
|
|
"margin_dpo/margin_std": 1.7698707580566406,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 0.6154909133911133,
|
|
"fcm_dpo/delta": 0.023734763264656067,
|
|
"fcm_dpo/margin": 0.9384795427322388,
|
|
"fcm_dpo/q_t": 0.3792095482349396,
|
|
"grad_norm": 138.38641357421875,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 0.11248569190502167,
|
|
"logits/rejected": 0.11067883670330048,
|
|
"logps/chosen": -58.78153610229492,
|
|
"logps/ref_chosen": -56.65319061279297,
|
|
"logps/ref_rejected": -63.45965576171875,
|
|
"logps/rejected": -66.52647399902344,
|
|
"loss": 1.0942,
|
|
"margin_dpo/margin_mean": 0.9384795427322388,
|
|
"margin_dpo/margin_std": 1.5680842399597168,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2585034013605442,
|
|
"fcm_dpo/beta": 0.601055383682251,
|
|
"fcm_dpo/delta": -0.09785895049571991,
|
|
"fcm_dpo/margin": 1.1450403928756714,
|
|
"fcm_dpo/q_t": 0.36030152440071106,
|
|
"grad_norm": 157.53836059570312,
|
|
"learning_rate": 4.6381366244617224e-07,
|
|
"logits/chosen": 0.16954882442951202,
|
|
"logits/rejected": 0.12765035033226013,
|
|
"logps/chosen": -65.85299682617188,
|
|
"logps/ref_chosen": -63.73476028442383,
|
|
"logps/ref_rejected": -78.50328063964844,
|
|
"logps/rejected": -81.76654815673828,
|
|
"loss": 1.0409,
|
|
"margin_dpo/margin_mean": 1.1450397968292236,
|
|
"margin_dpo/margin_std": 1.7091573476791382,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2600151171579743,
|
|
"fcm_dpo/beta": 0.5899873971939087,
|
|
"fcm_dpo/delta": -0.2224036306142807,
|
|
"fcm_dpo/margin": 1.3640680313110352,
|
|
"fcm_dpo/q_t": 0.3370872139930725,
|
|
"grad_norm": 118.71820068359375,
|
|
"learning_rate": 4.631254907558365e-07,
|
|
"logits/chosen": 0.20709475874900818,
|
|
"logits/rejected": 0.1604321449995041,
|
|
"logps/chosen": -54.41551971435547,
|
|
"logps/ref_chosen": -52.201759338378906,
|
|
"logps/ref_rejected": -82.85285949707031,
|
|
"logps/rejected": -86.43069458007812,
|
|
"loss": 0.9441,
|
|
"margin_dpo/margin_mean": 1.3640680313110352,
|
|
"margin_dpo/margin_std": 1.687800407409668,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2615268329554044,
|
|
"fcm_dpo/beta": 0.5607134103775024,
|
|
"fcm_dpo/delta": -0.10873574763536453,
|
|
"fcm_dpo/margin": 1.2412900924682617,
|
|
"fcm_dpo/q_t": 0.36867547035217285,
|
|
"grad_norm": 128.23153686523438,
|
|
"learning_rate": 4.624313574873786e-07,
|
|
"logits/chosen": 0.18149101734161377,
|
|
"logits/rejected": 0.10862697660923004,
|
|
"logps/chosen": -57.77250671386719,
|
|
"logps/ref_chosen": -55.434722900390625,
|
|
"logps/ref_rejected": -77.81967163085938,
|
|
"logps/rejected": -81.39875793457031,
|
|
"loss": 1.0666,
|
|
"margin_dpo/margin_mean": 1.2412903308868408,
|
|
"margin_dpo/margin_std": 1.9681799411773682,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.26303854875283444,
|
|
"fcm_dpo/beta": 0.5515092015266418,
|
|
"fcm_dpo/delta": -0.16861163079738617,
|
|
"fcm_dpo/margin": 1.3705697059631348,
|
|
"fcm_dpo/q_t": 0.3501899838447571,
|
|
"grad_norm": 139.6669158935547,
|
|
"learning_rate": 4.61731282057198e-07,
|
|
"logits/chosen": 0.17310944199562073,
|
|
"logits/rejected": 0.11688442528247833,
|
|
"logps/chosen": -59.5833740234375,
|
|
"logps/ref_chosen": -57.17195129394531,
|
|
"logps/ref_rejected": -85.47578430175781,
|
|
"logps/rejected": -89.25776672363281,
|
|
"loss": 1.0024,
|
|
"margin_dpo/margin_mean": 1.3705697059631348,
|
|
"margin_dpo/margin_std": 1.9443564414978027,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 0.529313862323761,
|
|
"fcm_dpo/delta": -0.20316217839717865,
|
|
"fcm_dpo/margin": 1.4862399101257324,
|
|
"fcm_dpo/q_t": 0.3484209477901459,
|
|
"grad_norm": 134.76806640625,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 0.18298515677452087,
|
|
"logits/rejected": 0.15775969624519348,
|
|
"logps/chosen": -70.1815185546875,
|
|
"logps/ref_chosen": -67.6656265258789,
|
|
"logps/ref_rejected": -84.36766815185547,
|
|
"logps/rejected": -88.36979675292969,
|
|
"loss": 1.0048,
|
|
"margin_dpo/margin_mean": 1.4862403869628906,
|
|
"margin_dpo/margin_std": 2.140425682067871,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.2660619803476946,
|
|
"fcm_dpo/beta": 0.522968053817749,
|
|
"fcm_dpo/delta": 0.002730097621679306,
|
|
"fcm_dpo/margin": 1.1418085098266602,
|
|
"fcm_dpo/q_t": 0.3858110308647156,
|
|
"grad_norm": 153.61614990234375,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 0.11803459376096725,
|
|
"logits/rejected": 0.09647019952535629,
|
|
"logps/chosen": -80.58767700195312,
|
|
"logps/ref_chosen": -77.8587646484375,
|
|
"logps/ref_rejected": -81.08732604980469,
|
|
"logps/rejected": -84.95805358886719,
|
|
"loss": 1.1419,
|
|
"margin_dpo/margin_mean": 1.141808271408081,
|
|
"margin_dpo/margin_std": 2.209990978240967,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2675736961451247,
|
|
"fcm_dpo/beta": 0.49255213141441345,
|
|
"fcm_dpo/delta": -0.38714170455932617,
|
|
"fcm_dpo/margin": 1.9227688312530518,
|
|
"fcm_dpo/q_t": 0.30820608139038086,
|
|
"grad_norm": 130.02647399902344,
|
|
"learning_rate": 4.5959559945025183e-07,
|
|
"logits/chosen": 0.26098543405532837,
|
|
"logits/rejected": 0.17869731783866882,
|
|
"logps/chosen": -57.80853271484375,
|
|
"logps/ref_chosen": -55.22039794921875,
|
|
"logps/ref_rejected": -92.54973602294922,
|
|
"logps/rejected": -97.0606460571289,
|
|
"loss": 0.8372,
|
|
"margin_dpo/margin_mean": 1.9227689504623413,
|
|
"margin_dpo/margin_std": 2.0203702449798584,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.2690854119425548,
|
|
"fcm_dpo/beta": 0.484801709651947,
|
|
"fcm_dpo/delta": 0.052113160490989685,
|
|
"fcm_dpo/margin": 1.1327934265136719,
|
|
"fcm_dpo/q_t": 0.3832756578922272,
|
|
"grad_norm": 118.76311492919922,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"logits/chosen": 0.14650282263755798,
|
|
"logits/rejected": 0.10431109368801117,
|
|
"logps/chosen": -63.50492858886719,
|
|
"logps/ref_chosen": -60.81049346923828,
|
|
"logps/ref_rejected": -81.12973022460938,
|
|
"logps/rejected": -84.95695495605469,
|
|
"loss": 1.0522,
|
|
"margin_dpo/margin_mean": 1.1327934265136719,
|
|
"margin_dpo/margin_std": 1.5966503620147705,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.2705971277399849,
|
|
"fcm_dpo/beta": 0.49064087867736816,
|
|
"fcm_dpo/delta": -0.036107227206230164,
|
|
"fcm_dpo/margin": 1.2913639545440674,
|
|
"fcm_dpo/q_t": 0.3769165277481079,
|
|
"grad_norm": 124.46369171142578,
|
|
"learning_rate": 4.581424636586928e-07,
|
|
"logits/chosen": 0.2028494030237198,
|
|
"logits/rejected": 0.18809382617473602,
|
|
"logps/chosen": -68.69247436523438,
|
|
"logps/ref_chosen": -65.67171478271484,
|
|
"logps/ref_rejected": -75.32586669921875,
|
|
"logps/rejected": -79.63799285888672,
|
|
"loss": 1.0848,
|
|
"margin_dpo/margin_mean": 1.2913641929626465,
|
|
"margin_dpo/margin_std": 2.173964500427246,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 0.4949728846549988,
|
|
"fcm_dpo/delta": 0.06321872025728226,
|
|
"fcm_dpo/margin": 1.0894110202789307,
|
|
"fcm_dpo/q_t": 0.39300286769866943,
|
|
"grad_norm": 128.481201171875,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 0.03600196912884712,
|
|
"logits/rejected": 0.019377566874027252,
|
|
"logps/chosen": -59.216148376464844,
|
|
"logps/ref_chosen": -56.68280792236328,
|
|
"logps/ref_rejected": -64.94414520263672,
|
|
"logps/rejected": -68.56689453125,
|
|
"loss": 1.1358,
|
|
"margin_dpo/margin_mean": 1.0894112586975098,
|
|
"margin_dpo/margin_std": 2.019700765609741,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.273620559334845,
|
|
"fcm_dpo/beta": 0.4756387174129486,
|
|
"fcm_dpo/delta": -0.2628810703754425,
|
|
"fcm_dpo/margin": 1.768264651298523,
|
|
"fcm_dpo/q_t": 0.32634565234184265,
|
|
"grad_norm": 107.11132049560547,
|
|
"learning_rate": 4.566660392614228e-07,
|
|
"logits/chosen": 0.1837688833475113,
|
|
"logits/rejected": 0.15156866610050201,
|
|
"logps/chosen": -63.249237060546875,
|
|
"logps/ref_chosen": -60.77604675292969,
|
|
"logps/ref_rejected": -83.98361206054688,
|
|
"logps/rejected": -88.22506713867188,
|
|
"loss": 0.8683,
|
|
"margin_dpo/margin_mean": 1.768264651298523,
|
|
"margin_dpo/margin_std": 1.8696491718292236,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.2751322751322751,
|
|
"fcm_dpo/beta": 0.45705896615982056,
|
|
"fcm_dpo/delta": -0.14843697845935822,
|
|
"fcm_dpo/margin": 1.6124354600906372,
|
|
"fcm_dpo/q_t": 0.3526133894920349,
|
|
"grad_norm": 119.8134765625,
|
|
"learning_rate": 4.5591914535745817e-07,
|
|
"logits/chosen": 0.1729632019996643,
|
|
"logits/rejected": 0.10781346261501312,
|
|
"logps/chosen": -63.12035369873047,
|
|
"logps/ref_chosen": -60.2537841796875,
|
|
"logps/ref_rejected": -89.7706298828125,
|
|
"logps/rejected": -94.2496337890625,
|
|
"loss": 1.0022,
|
|
"margin_dpo/margin_mean": 1.6124353408813477,
|
|
"margin_dpo/margin_std": 2.2613399028778076,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2766439909297052,
|
|
"fcm_dpo/beta": 0.4692537188529968,
|
|
"fcm_dpo/delta": 0.23354226350784302,
|
|
"fcm_dpo/margin": 0.8008232712745667,
|
|
"fcm_dpo/q_t": 0.41931334137916565,
|
|
"grad_norm": 124.27472686767578,
|
|
"learning_rate": 4.551664914523433e-07,
|
|
"logits/chosen": 0.17078173160552979,
|
|
"logits/rejected": 0.15395784378051758,
|
|
"logps/chosen": -65.33309173583984,
|
|
"logps/ref_chosen": -61.76142120361328,
|
|
"logps/ref_rejected": -72.54627990722656,
|
|
"logps/rejected": -76.91877746582031,
|
|
"loss": 1.2418,
|
|
"margin_dpo/margin_mean": 0.8008227348327637,
|
|
"margin_dpo/margin_std": 1.9933383464813232,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2781557067271353,
|
|
"fcm_dpo/beta": 0.46749451756477356,
|
|
"fcm_dpo/delta": -0.054233402013778687,
|
|
"fcm_dpo/margin": 1.3855884075164795,
|
|
"fcm_dpo/q_t": 0.36680716276168823,
|
|
"grad_norm": 91.9602279663086,
|
|
"learning_rate": 4.544080985994258e-07,
|
|
"logits/chosen": 0.2527492642402649,
|
|
"logits/rejected": 0.1999446451663971,
|
|
"logps/chosen": -49.69728469848633,
|
|
"logps/ref_chosen": -46.840721130371094,
|
|
"logps/ref_rejected": -69.3609390258789,
|
|
"logps/rejected": -73.60308837890625,
|
|
"loss": 0.983,
|
|
"margin_dpo/margin_mean": 1.3855886459350586,
|
|
"margin_dpo/margin_std": 1.710758924484253,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 0.4638601243495941,
|
|
"fcm_dpo/delta": -0.064957395195961,
|
|
"fcm_dpo/margin": 1.4216983318328857,
|
|
"fcm_dpo/q_t": 0.37293511629104614,
|
|
"grad_norm": 105.73377990722656,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 0.17072643339633942,
|
|
"logits/rejected": 0.1306096911430359,
|
|
"logps/chosen": -55.55766677856445,
|
|
"logps/ref_chosen": -52.32114028930664,
|
|
"logps/ref_rejected": -68.3885726928711,
|
|
"logps/rejected": -73.04679870605469,
|
|
"loss": 1.1099,
|
|
"margin_dpo/margin_mean": 1.4216983318328857,
|
|
"margin_dpo/margin_std": 2.5109610557556152,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2811791383219955,
|
|
"fcm_dpo/beta": 0.46233463287353516,
|
|
"fcm_dpo/delta": -0.024930700659751892,
|
|
"fcm_dpo/margin": 1.3478795289993286,
|
|
"fcm_dpo/q_t": 0.37602150440216064,
|
|
"grad_norm": 126.88275146484375,
|
|
"learning_rate": 4.5287418106563354e-07,
|
|
"logits/chosen": 0.12501423060894012,
|
|
"logits/rejected": 0.09016162157058716,
|
|
"logps/chosen": -70.59927368164062,
|
|
"logps/ref_chosen": -67.42012786865234,
|
|
"logps/ref_rejected": -82.50968933105469,
|
|
"logps/rejected": -87.03671264648438,
|
|
"loss": 1.0736,
|
|
"margin_dpo/margin_mean": 1.3478801250457764,
|
|
"margin_dpo/margin_std": 2.204350233078003,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.28269085411942557,
|
|
"fcm_dpo/beta": 0.4492034614086151,
|
|
"fcm_dpo/delta": -0.14501912891864777,
|
|
"fcm_dpo/margin": 1.629001259803772,
|
|
"fcm_dpo/q_t": 0.3524951934814453,
|
|
"grad_norm": 132.88339233398438,
|
|
"learning_rate": 4.520986992917297e-07,
|
|
"logits/chosen": 0.14765876531600952,
|
|
"logits/rejected": 0.09966124594211578,
|
|
"logps/chosen": -78.94313049316406,
|
|
"logps/ref_chosen": -75.52549743652344,
|
|
"logps/ref_rejected": -94.76289367675781,
|
|
"logps/rejected": -99.80952453613281,
|
|
"loss": 1.0305,
|
|
"margin_dpo/margin_mean": 1.6290010213851929,
|
|
"margin_dpo/margin_std": 2.3944525718688965,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2842025699168556,
|
|
"fcm_dpo/beta": 0.4454974830150604,
|
|
"fcm_dpo/delta": -0.04850031062960625,
|
|
"fcm_dpo/margin": 1.4481823444366455,
|
|
"fcm_dpo/q_t": 0.36388927698135376,
|
|
"grad_norm": 128.6500701904297,
|
|
"learning_rate": 4.5131756438276466e-07,
|
|
"logits/chosen": 0.17862358689308167,
|
|
"logits/rejected": 0.14158707857131958,
|
|
"logps/chosen": -74.60177612304688,
|
|
"logps/ref_chosen": -71.52333068847656,
|
|
"logps/ref_rejected": -78.29949951171875,
|
|
"logps/rejected": -82.82612609863281,
|
|
"loss": 1.0798,
|
|
"margin_dpo/margin_mean": 1.4481827020645142,
|
|
"margin_dpo/margin_std": 2.430912733078003,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"fcm_dpo/beta": 0.4300735890865326,
|
|
"fcm_dpo/delta": -0.1480191946029663,
|
|
"fcm_dpo/margin": 1.7021592855453491,
|
|
"fcm_dpo/q_t": 0.349149227142334,
|
|
"grad_norm": 105.27059936523438,
|
|
"learning_rate": 4.5053079818876096e-07,
|
|
"logits/chosen": 0.23444759845733643,
|
|
"logits/rejected": 0.2442852109670639,
|
|
"logps/chosen": -75.18251037597656,
|
|
"logps/ref_chosen": -72.17626953125,
|
|
"logps/ref_rejected": -75.26313781738281,
|
|
"logps/rejected": -79.97154235839844,
|
|
"loss": 0.9592,
|
|
"margin_dpo/margin_mean": 1.7021600008010864,
|
|
"margin_dpo/margin_std": 2.0677247047424316,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 0.41945913434028625,
|
|
"fcm_dpo/delta": -0.2064790427684784,
|
|
"fcm_dpo/margin": 1.8837922811508179,
|
|
"fcm_dpo/q_t": 0.3407885730266571,
|
|
"grad_norm": 102.42926025390625,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 0.23205827176570892,
|
|
"logits/rejected": 0.11382026970386505,
|
|
"logps/chosen": -57.74479293823242,
|
|
"logps/ref_chosen": -54.624271392822266,
|
|
"logps/ref_rejected": -101.47068786621094,
|
|
"logps/rejected": -106.47500610351562,
|
|
"loss": 0.9484,
|
|
"margin_dpo/margin_mean": 1.8837926387786865,
|
|
"margin_dpo/margin_std": 2.3639721870422363,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2887377173091459,
|
|
"fcm_dpo/beta": 0.4082399904727936,
|
|
"fcm_dpo/delta": -0.1050073504447937,
|
|
"fcm_dpo/margin": 1.7088083028793335,
|
|
"fcm_dpo/q_t": 0.35841071605682373,
|
|
"grad_norm": 117.19302368164062,
|
|
"learning_rate": 4.48940460132708e-07,
|
|
"logits/chosen": 0.24298545718193054,
|
|
"logits/rejected": 0.22123336791992188,
|
|
"logps/chosen": -76.58650207519531,
|
|
"logps/ref_chosen": -72.93251037597656,
|
|
"logps/ref_rejected": -89.95103454589844,
|
|
"logps/rejected": -95.31382751464844,
|
|
"loss": 1.0019,
|
|
"margin_dpo/margin_mean": 1.708808422088623,
|
|
"margin_dpo/margin_std": 2.35361909866333,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.29024943310657597,
|
|
"fcm_dpo/beta": 0.41070353984832764,
|
|
"fcm_dpo/delta": 0.06258545815944672,
|
|
"fcm_dpo/margin": 1.31644868850708,
|
|
"fcm_dpo/q_t": 0.3867712914943695,
|
|
"grad_norm": 88.0949935913086,
|
|
"learning_rate": 4.481369327558329e-07,
|
|
"logits/chosen": 0.23867926001548767,
|
|
"logits/rejected": 0.21680811047554016,
|
|
"logps/chosen": -57.586605072021484,
|
|
"logps/ref_chosen": -54.001121520996094,
|
|
"logps/ref_rejected": -63.531551361083984,
|
|
"logps/rejected": -68.43347930908203,
|
|
"loss": 1.0851,
|
|
"margin_dpo/margin_mean": 1.31644868850708,
|
|
"margin_dpo/margin_std": 2.15932559967041,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.29176114890400606,
|
|
"fcm_dpo/beta": 0.4014725089073181,
|
|
"fcm_dpo/delta": -0.1198146715760231,
|
|
"fcm_dpo/margin": 1.7677130699157715,
|
|
"fcm_dpo/q_t": 0.35724347829818726,
|
|
"grad_norm": 87.17979431152344,
|
|
"learning_rate": 4.47327863063023e-07,
|
|
"logits/chosen": 0.14901183545589447,
|
|
"logits/rejected": 0.12912751734256744,
|
|
"logps/chosen": -60.38057327270508,
|
|
"logps/ref_chosen": -56.74927520751953,
|
|
"logps/ref_rejected": -58.80629348754883,
|
|
"logps/rejected": -64.20530700683594,
|
|
"loss": 0.9639,
|
|
"margin_dpo/margin_mean": 1.767713189125061,
|
|
"margin_dpo/margin_std": 2.299346446990967,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.29327286470143615,
|
|
"fcm_dpo/beta": 0.3976183533668518,
|
|
"fcm_dpo/delta": -0.013770565390586853,
|
|
"fcm_dpo/margin": 1.537994146347046,
|
|
"fcm_dpo/q_t": 0.3772721290588379,
|
|
"grad_norm": 91.97148132324219,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 0.21334470808506012,
|
|
"logits/rejected": 0.18826696276664734,
|
|
"logps/chosen": -60.034690856933594,
|
|
"logps/ref_chosen": -56.64944076538086,
|
|
"logps/ref_rejected": -69.98954772949219,
|
|
"logps/rejected": -74.91279602050781,
|
|
"loss": 1.0845,
|
|
"margin_dpo/margin_mean": 1.537994623184204,
|
|
"margin_dpo/margin_std": 2.5277225971221924,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 0.393227219581604,
|
|
"fcm_dpo/delta": -0.05794385075569153,
|
|
"fcm_dpo/margin": 1.6573469638824463,
|
|
"fcm_dpo/q_t": 0.3646981716156006,
|
|
"grad_norm": 98.00434112548828,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 0.13874436914920807,
|
|
"logits/rejected": 0.13722333312034607,
|
|
"logps/chosen": -74.5829849243164,
|
|
"logps/ref_chosen": -70.40977478027344,
|
|
"logps/ref_rejected": -74.39448547363281,
|
|
"logps/rejected": -80.22503662109375,
|
|
"loss": 1.0265,
|
|
"margin_dpo/margin_mean": 1.6573466062545776,
|
|
"margin_dpo/margin_std": 2.350985050201416,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2962962962962963,
|
|
"fcm_dpo/beta": 0.39594680070877075,
|
|
"fcm_dpo/delta": -0.05085838586091995,
|
|
"fcm_dpo/margin": 1.6336736679077148,
|
|
"fcm_dpo/q_t": 0.36142659187316895,
|
|
"grad_norm": 82.5360107421875,
|
|
"learning_rate": 4.448676271745197e-07,
|
|
"logits/chosen": 0.22305762767791748,
|
|
"logits/rejected": 0.18741697072982788,
|
|
"logps/chosen": -62.90294647216797,
|
|
"logps/ref_chosen": -59.227577209472656,
|
|
"logps/ref_rejected": -83.54757690429688,
|
|
"logps/rejected": -88.85661315917969,
|
|
"loss": 1.029,
|
|
"margin_dpo/margin_mean": 1.6336736679077148,
|
|
"margin_dpo/margin_std": 2.302030563354492,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.29780801209372637,
|
|
"fcm_dpo/beta": 0.3870530128479004,
|
|
"fcm_dpo/delta": -0.17038501799106598,
|
|
"fcm_dpo/margin": 1.954482078552246,
|
|
"fcm_dpo/q_t": 0.3541423976421356,
|
|
"grad_norm": 97.38890075683594,
|
|
"learning_rate": 4.440366160729392e-07,
|
|
"logits/chosen": 0.2841119170188904,
|
|
"logits/rejected": 0.2431061565876007,
|
|
"logps/chosen": -55.41398620605469,
|
|
"logps/ref_chosen": -51.52912902832031,
|
|
"logps/ref_rejected": -73.70631408691406,
|
|
"logps/rejected": -79.545654296875,
|
|
"loss": 1.0967,
|
|
"margin_dpo/margin_mean": 1.9544826745986938,
|
|
"margin_dpo/margin_std": 3.2290852069854736,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.29931972789115646,
|
|
"fcm_dpo/beta": 0.3700205683708191,
|
|
"fcm_dpo/delta": -0.21359995007514954,
|
|
"fcm_dpo/margin": 2.1537539958953857,
|
|
"fcm_dpo/q_t": 0.3357986807823181,
|
|
"grad_norm": 87.0055923461914,
|
|
"learning_rate": 4.432001773500957e-07,
|
|
"logits/chosen": 0.24669349193572998,
|
|
"logits/rejected": 0.2129189372062683,
|
|
"logps/chosen": -63.674686431884766,
|
|
"logps/ref_chosen": -59.78268051147461,
|
|
"logps/ref_rejected": -72.24533081054688,
|
|
"logps/rejected": -78.29109191894531,
|
|
"loss": 0.9107,
|
|
"margin_dpo/margin_mean": 2.1537539958953857,
|
|
"margin_dpo/margin_std": 2.4757540225982666,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.30083144368858655,
|
|
"fcm_dpo/beta": 0.3673195540904999,
|
|
"fcm_dpo/delta": 0.0151963010430336,
|
|
"fcm_dpo/margin": 1.5892671346664429,
|
|
"fcm_dpo/q_t": 0.3766128718852997,
|
|
"grad_norm": 93.22285461425781,
|
|
"learning_rate": 4.4235833440297856e-07,
|
|
"logits/chosen": 0.19999980926513672,
|
|
"logits/rejected": 0.12446457147598267,
|
|
"logps/chosen": -60.5451545715332,
|
|
"logps/ref_chosen": -56.38677215576172,
|
|
"logps/ref_rejected": -74.56779479980469,
|
|
"logps/rejected": -80.31544494628906,
|
|
"loss": 1.1037,
|
|
"margin_dpo/margin_mean": 1.5892664194107056,
|
|
"margin_dpo/margin_std": 2.644639492034912,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 0.3473024368286133,
|
|
"fcm_dpo/delta": -0.2598249912261963,
|
|
"fcm_dpo/margin": 2.40299654006958,
|
|
"fcm_dpo/q_t": 0.3380799889564514,
|
|
"grad_norm": 84.88159942626953,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 0.2574974298477173,
|
|
"logits/rejected": 0.19607925415039062,
|
|
"logps/chosen": -61.37678909301758,
|
|
"logps/ref_chosen": -57.82432556152344,
|
|
"logps/ref_rejected": -89.28246307373047,
|
|
"logps/rejected": -95.23792266845703,
|
|
"loss": 0.9546,
|
|
"margin_dpo/margin_mean": 2.40299654006958,
|
|
"margin_dpo/margin_std": 3.1777901649475098,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 0.34564897418022156,
|
|
"eval_logits/chosen": 0.21787600219249725,
|
|
"eval_logits/rejected": 0.1796640008687973,
|
|
"eval_logps/chosen": -78.88257598876953,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -85.223876953125,
|
|
"eval_loss": 0.5604943037033081,
|
|
"eval_margin_dpo/margin_mean": 1.6517823934555054,
|
|
"eval_margin_dpo/margin_std": 2.969238042831421,
|
|
"eval_runtime": 38.0736,
|
|
"eval_samples_per_second": 60.488,
|
|
"eval_steps_per_second": 1.891,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30385487528344673,
|
|
"fcm_dpo/beta": 0.34147608280181885,
|
|
"fcm_dpo/delta": -0.05875827372074127,
|
|
"fcm_dpo/margin": 1.9166682958602905,
|
|
"fcm_dpo/q_t": 0.3675483465194702,
|
|
"grad_norm": 89.931884765625,
|
|
"learning_rate": 4.4065853017905953e-07,
|
|
"logits/chosen": 0.26193559169769287,
|
|
"logits/rejected": 0.22425445914268494,
|
|
"logps/chosen": -63.281890869140625,
|
|
"logps/ref_chosen": -58.999759674072266,
|
|
"logps/ref_rejected": -84.67575073242188,
|
|
"logps/rejected": -90.87454223632812,
|
|
"loss": 1.0029,
|
|
"margin_dpo/margin_mean": 1.916668176651001,
|
|
"margin_dpo/margin_std": 2.625683307647705,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.30536659108087677,
|
|
"fcm_dpo/beta": 0.33684271574020386,
|
|
"fcm_dpo/delta": -0.1786990910768509,
|
|
"fcm_dpo/margin": 2.268734931945801,
|
|
"fcm_dpo/q_t": 0.34659096598625183,
|
|
"grad_norm": 71.41585540771484,
|
|
"learning_rate": 4.3980061644943575e-07,
|
|
"logits/chosen": 0.16486480832099915,
|
|
"logits/rejected": 0.10804621875286102,
|
|
"logps/chosen": -51.65721893310547,
|
|
"logps/ref_chosen": -47.660648345947266,
|
|
"logps/ref_rejected": -73.63249969482422,
|
|
"logps/rejected": -79.89779663085938,
|
|
"loss": 0.9641,
|
|
"margin_dpo/margin_mean": 2.268734931945801,
|
|
"margin_dpo/margin_std": 2.9032340049743652,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.30687830687830686,
|
|
"fcm_dpo/beta": 0.33270418643951416,
|
|
"fcm_dpo/delta": 0.032331258058547974,
|
|
"fcm_dpo/margin": 1.710151195526123,
|
|
"fcm_dpo/q_t": 0.3836914598941803,
|
|
"grad_norm": 91.3531265258789,
|
|
"learning_rate": 4.3893739358856455e-07,
|
|
"logits/chosen": 0.26537784934043884,
|
|
"logits/rejected": 0.20603656768798828,
|
|
"logps/chosen": -66.40350341796875,
|
|
"logps/ref_chosen": -62.32553482055664,
|
|
"logps/ref_rejected": -99.37226104736328,
|
|
"logps/rejected": -105.16038513183594,
|
|
"loss": 1.0735,
|
|
"margin_dpo/margin_mean": 1.7101507186889648,
|
|
"margin_dpo/margin_std": 2.758167266845703,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.30839002267573695,
|
|
"fcm_dpo/beta": 0.3235759139060974,
|
|
"fcm_dpo/delta": -0.05550873279571533,
|
|
"fcm_dpo/margin": 1.9951179027557373,
|
|
"fcm_dpo/q_t": 0.37603023648262024,
|
|
"grad_norm": 76.90956115722656,
|
|
"learning_rate": 4.380688857426449e-07,
|
|
"logits/chosen": 0.18045659363269806,
|
|
"logits/rejected": 0.12303562462329865,
|
|
"logps/chosen": -55.126976013183594,
|
|
"logps/ref_chosen": -50.62931823730469,
|
|
"logps/ref_rejected": -66.60475158691406,
|
|
"logps/rejected": -73.0975341796875,
|
|
"loss": 1.0797,
|
|
"margin_dpo/margin_mean": 1.995117425918579,
|
|
"margin_dpo/margin_std": 3.2013866901397705,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 0.3267316222190857,
|
|
"fcm_dpo/delta": 0.013398218899965286,
|
|
"fcm_dpo/margin": 1.7957442998886108,
|
|
"fcm_dpo/q_t": 0.3864005208015442,
|
|
"grad_norm": 108.05115509033203,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 0.26472169160842896,
|
|
"logits/rejected": 0.21426978707313538,
|
|
"logps/chosen": -75.12602233886719,
|
|
"logps/ref_chosen": -70.3561782836914,
|
|
"logps/ref_rejected": -93.39848327636719,
|
|
"logps/rejected": -99.96406555175781,
|
|
"loss": 1.1463,
|
|
"margin_dpo/margin_mean": 1.7957442998886108,
|
|
"margin_dpo/margin_std": 3.4385950565338135,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31141345427059713,
|
|
"fcm_dpo/beta": 0.33613136410713196,
|
|
"fcm_dpo/delta": 0.07342677563428879,
|
|
"fcm_dpo/margin": 1.56940758228302,
|
|
"fcm_dpo/q_t": 0.39384615421295166,
|
|
"grad_norm": 90.17076110839844,
|
|
"learning_rate": 4.363161124189387e-07,
|
|
"logits/chosen": 0.25817301869392395,
|
|
"logits/rejected": 0.2432713508605957,
|
|
"logps/chosen": -72.54843139648438,
|
|
"logps/ref_chosen": -67.64547729492188,
|
|
"logps/ref_rejected": -79.89584350585938,
|
|
"logps/rejected": -86.36820220947266,
|
|
"loss": 1.1686,
|
|
"margin_dpo/margin_mean": 1.5694081783294678,
|
|
"margin_dpo/margin_std": 3.069244623184204,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3129251700680272,
|
|
"fcm_dpo/beta": 0.32442450523376465,
|
|
"fcm_dpo/delta": -0.12306281924247742,
|
|
"fcm_dpo/margin": 2.1917169094085693,
|
|
"fcm_dpo/q_t": 0.36162668466567993,
|
|
"grad_norm": 76.4348373413086,
|
|
"learning_rate": 4.3543189596998986e-07,
|
|
"logits/chosen": 0.17872723937034607,
|
|
"logits/rejected": 0.12270534038543701,
|
|
"logps/chosen": -72.61674499511719,
|
|
"logps/ref_chosen": -67.66419219970703,
|
|
"logps/ref_rejected": -85.10249328613281,
|
|
"logps/rejected": -92.24675750732422,
|
|
"loss": 1.0183,
|
|
"margin_dpo/margin_mean": 2.1917169094085693,
|
|
"margin_dpo/margin_std": 3.2143168449401855,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3144368858654573,
|
|
"fcm_dpo/beta": 0.3316575884819031,
|
|
"fcm_dpo/delta": 0.10988043993711472,
|
|
"fcm_dpo/margin": 1.4934909343719482,
|
|
"fcm_dpo/q_t": 0.40299874544143677,
|
|
"grad_norm": 84.07500457763672,
|
|
"learning_rate": 4.3454249259229664e-07,
|
|
"logits/chosen": 0.2049015909433365,
|
|
"logits/rejected": 0.1825580596923828,
|
|
"logps/chosen": -62.14056396484375,
|
|
"logps/ref_chosen": -57.731712341308594,
|
|
"logps/ref_rejected": -74.19276428222656,
|
|
"logps/rejected": -80.09510803222656,
|
|
"loss": 1.1711,
|
|
"margin_dpo/margin_mean": 1.493491291999817,
|
|
"margin_dpo/margin_std": 3.0397095680236816,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.31594860166288735,
|
|
"fcm_dpo/beta": 0.3277115523815155,
|
|
"fcm_dpo/delta": -0.19000059366226196,
|
|
"fcm_dpo/margin": 2.3637375831604004,
|
|
"fcm_dpo/q_t": 0.35109424591064453,
|
|
"grad_norm": 93.42904663085938,
|
|
"learning_rate": 4.336479271643833e-07,
|
|
"logits/chosen": 0.204912006855011,
|
|
"logits/rejected": 0.1559399664402008,
|
|
"logps/chosen": -73.3060073852539,
|
|
"logps/ref_chosen": -68.55007934570312,
|
|
"logps/ref_rejected": -87.90541076660156,
|
|
"logps/rejected": -95.02507781982422,
|
|
"loss": 1.0241,
|
|
"margin_dpo/margin_mean": 2.363738536834717,
|
|
"margin_dpo/margin_std": 3.465147018432617,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 0.31141048669815063,
|
|
"fcm_dpo/delta": -0.2101190686225891,
|
|
"fcm_dpo/margin": 2.548922538757324,
|
|
"fcm_dpo/q_t": 0.34733301401138306,
|
|
"grad_norm": 71.60018157958984,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 0.28091442584991455,
|
|
"logits/rejected": 0.20208045840263367,
|
|
"logps/chosen": -62.09604263305664,
|
|
"logps/ref_chosen": -57.268272399902344,
|
|
"logps/ref_rejected": -85.72807312011719,
|
|
"logps/rejected": -93.10476684570312,
|
|
"loss": 0.9873,
|
|
"margin_dpo/margin_mean": 2.548922061920166,
|
|
"margin_dpo/margin_std": 3.552750587463379,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.31897203325774753,
|
|
"fcm_dpo/beta": 0.30577632784843445,
|
|
"fcm_dpo/delta": -0.07666610926389694,
|
|
"fcm_dpo/margin": 2.1951539516448975,
|
|
"fcm_dpo/q_t": 0.3637697696685791,
|
|
"grad_norm": 83.09772491455078,
|
|
"learning_rate": 4.3184341039326217e-07,
|
|
"logits/chosen": 0.2706531882286072,
|
|
"logits/rejected": 0.19553887844085693,
|
|
"logps/chosen": -58.080902099609375,
|
|
"logps/ref_chosen": -53.640708923339844,
|
|
"logps/ref_rejected": -93.0387954711914,
|
|
"logps/rejected": -99.67414855957031,
|
|
"loss": 0.9942,
|
|
"margin_dpo/margin_mean": 2.1951539516448975,
|
|
"margin_dpo/margin_std": 2.999103546142578,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.3204837490551776,
|
|
"fcm_dpo/beta": 0.2980360984802246,
|
|
"fcm_dpo/delta": -0.10260573029518127,
|
|
"fcm_dpo/margin": 2.3334219455718994,
|
|
"fcm_dpo/q_t": 0.36129042506217957,
|
|
"grad_norm": 74.72057342529297,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 0.25780636072158813,
|
|
"logits/rejected": 0.19710323214530945,
|
|
"logps/chosen": -62.407325744628906,
|
|
"logps/ref_chosen": -57.36674499511719,
|
|
"logps/ref_rejected": -79.89643096923828,
|
|
"logps/rejected": -87.27043151855469,
|
|
"loss": 1.036,
|
|
"margin_dpo/margin_mean": 2.333421230316162,
|
|
"margin_dpo/margin_std": 3.5129289627075195,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.3219954648526077,
|
|
"fcm_dpo/beta": 0.28785935044288635,
|
|
"fcm_dpo/delta": -0.16928018629550934,
|
|
"fcm_dpo/margin": 2.624055862426758,
|
|
"fcm_dpo/q_t": 0.35196423530578613,
|
|
"grad_norm": 74.51473236083984,
|
|
"learning_rate": 4.3001854756006724e-07,
|
|
"logits/chosen": 0.2479405403137207,
|
|
"logits/rejected": 0.2258269488811493,
|
|
"logps/chosen": -69.40491485595703,
|
|
"logps/ref_chosen": -65.22111511230469,
|
|
"logps/ref_rejected": -80.1810302734375,
|
|
"logps/rejected": -86.98887634277344,
|
|
"loss": 1.0117,
|
|
"margin_dpo/margin_mean": 2.624055862426758,
|
|
"margin_dpo/margin_std": 3.8042173385620117,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3235071806500378,
|
|
"fcm_dpo/beta": 0.28325408697128296,
|
|
"fcm_dpo/delta": -0.1327255666255951,
|
|
"fcm_dpo/margin": 2.5523083209991455,
|
|
"fcm_dpo/q_t": 0.35900163650512695,
|
|
"grad_norm": 74.05313873291016,
|
|
"learning_rate": 4.290985500881143e-07,
|
|
"logits/chosen": 0.14122872054576874,
|
|
"logits/rejected": 0.11947432160377502,
|
|
"logps/chosen": -66.09378814697266,
|
|
"logps/ref_chosen": -61.292327880859375,
|
|
"logps/ref_rejected": -67.69841003417969,
|
|
"logps/rejected": -75.05216979980469,
|
|
"loss": 0.9921,
|
|
"margin_dpo/margin_mean": 2.552309036254883,
|
|
"margin_dpo/margin_std": 3.5551910400390625,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.2725156843662262,
|
|
"fcm_dpo/delta": -0.2077074944972992,
|
|
"fcm_dpo/margin": 2.905134677886963,
|
|
"fcm_dpo/q_t": 0.3412568271160126,
|
|
"grad_norm": 67.4879150390625,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 0.15792277455329895,
|
|
"logits/rejected": 0.073989138007164,
|
|
"logps/chosen": -68.87016296386719,
|
|
"logps/ref_chosen": -63.869136810302734,
|
|
"logps/ref_rejected": -98.7657241821289,
|
|
"logps/rejected": -106.67188262939453,
|
|
"loss": 0.9368,
|
|
"margin_dpo/margin_mean": 2.9051353931427,
|
|
"margin_dpo/margin_std": 3.5075745582580566,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.32653061224489793,
|
|
"fcm_dpo/beta": 0.2604866623878479,
|
|
"fcm_dpo/delta": -0.22343488037586212,
|
|
"fcm_dpo/margin": 3.094121217727661,
|
|
"fcm_dpo/q_t": 0.3563622832298279,
|
|
"grad_norm": 61.407466888427734,
|
|
"learning_rate": 4.2724355170431247e-07,
|
|
"logits/chosen": 0.2697373032569885,
|
|
"logits/rejected": 0.1980409026145935,
|
|
"logps/chosen": -73.16397094726562,
|
|
"logps/ref_chosen": -67.824951171875,
|
|
"logps/ref_rejected": -96.40231323242188,
|
|
"logps/rejected": -104.83544921875,
|
|
"loss": 0.993,
|
|
"margin_dpo/margin_mean": 3.094120740890503,
|
|
"margin_dpo/margin_std": 4.95653772354126,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.328042328042328,
|
|
"fcm_dpo/beta": 0.24557505548000336,
|
|
"fcm_dpo/delta": -0.2807563543319702,
|
|
"fcm_dpo/margin": 3.488009214401245,
|
|
"fcm_dpo/q_t": 0.32953548431396484,
|
|
"grad_norm": 58.03193664550781,
|
|
"learning_rate": 4.26308602680756e-07,
|
|
"logits/chosen": 0.2073604166507721,
|
|
"logits/rejected": 0.11603499948978424,
|
|
"logps/chosen": -66.12384796142578,
|
|
"logps/ref_chosen": -60.5049934387207,
|
|
"logps/ref_rejected": -84.26618194580078,
|
|
"logps/rejected": -93.373046875,
|
|
"loss": 0.8786,
|
|
"margin_dpo/margin_mean": 3.488009452819824,
|
|
"margin_dpo/margin_std": 3.95634126663208,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3295540438397581,
|
|
"fcm_dpo/beta": 0.2504267394542694,
|
|
"fcm_dpo/delta": 0.1592218428850174,
|
|
"fcm_dpo/margin": 1.7742626667022705,
|
|
"fcm_dpo/q_t": 0.4074835777282715,
|
|
"grad_norm": 69.1903076171875,
|
|
"learning_rate": 4.253687219265803e-07,
|
|
"logits/chosen": 0.11688175797462463,
|
|
"logits/rejected": 0.11001837253570557,
|
|
"logps/chosen": -76.50672912597656,
|
|
"logps/ref_chosen": -70.59431457519531,
|
|
"logps/ref_rejected": -73.89038848876953,
|
|
"logps/rejected": -81.57705688476562,
|
|
"loss": 1.2216,
|
|
"margin_dpo/margin_mean": 1.7742631435394287,
|
|
"margin_dpo/margin_std": 4.000538349151611,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.3310657596371882,
|
|
"fcm_dpo/beta": 0.2547418475151062,
|
|
"fcm_dpo/delta": 0.14973081648349762,
|
|
"fcm_dpo/margin": 1.7954492568969727,
|
|
"fcm_dpo/q_t": 0.40601488947868347,
|
|
"grad_norm": 68.71503448486328,
|
|
"learning_rate": 4.2442393573227043e-07,
|
|
"logits/chosen": 0.1661921888589859,
|
|
"logits/rejected": 0.1311841607093811,
|
|
"logps/chosen": -65.78245544433594,
|
|
"logps/ref_chosen": -60.490943908691406,
|
|
"logps/ref_rejected": -75.85001373291016,
|
|
"logps/rejected": -82.93697357177734,
|
|
"loss": 1.1538,
|
|
"margin_dpo/margin_mean": 1.7954490184783936,
|
|
"margin_dpo/margin_std": 3.453105926513672,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.25497251749038696,
|
|
"fcm_dpo/delta": -0.04306017607450485,
|
|
"fcm_dpo/margin": 2.509706974029541,
|
|
"fcm_dpo/q_t": 0.3725942373275757,
|
|
"grad_norm": 52.4035530090332,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 0.25568246841430664,
|
|
"logits/rejected": 0.20475764572620392,
|
|
"logps/chosen": -50.56587219238281,
|
|
"logps/ref_chosen": -45.013397216796875,
|
|
"logps/ref_rejected": -70.49369812011719,
|
|
"logps/rejected": -78.55587768554688,
|
|
"loss": 1.0567,
|
|
"margin_dpo/margin_mean": 2.509706497192383,
|
|
"margin_dpo/margin_std": 3.935103416442871,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3340891912320484,
|
|
"fcm_dpo/beta": 0.2531549334526062,
|
|
"fcm_dpo/delta": -0.1000896617770195,
|
|
"fcm_dpo/margin": 2.737396717071533,
|
|
"fcm_dpo/q_t": 0.3685762882232666,
|
|
"grad_norm": 61.76700210571289,
|
|
"learning_rate": 4.22519752870528e-07,
|
|
"logits/chosen": 0.23615026473999023,
|
|
"logits/rejected": 0.18113240599632263,
|
|
"logps/chosen": -64.33263397216797,
|
|
"logps/ref_chosen": -59.09584045410156,
|
|
"logps/ref_rejected": -88.64388275146484,
|
|
"logps/rejected": -96.61807250976562,
|
|
"loss": 1.0304,
|
|
"margin_dpo/margin_mean": 2.737396717071533,
|
|
"margin_dpo/margin_std": 4.217761993408203,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.3356009070294785,
|
|
"fcm_dpo/beta": 0.2420484572649002,
|
|
"fcm_dpo/delta": -0.23359233140945435,
|
|
"fcm_dpo/margin": 3.366135597229004,
|
|
"fcm_dpo/q_t": 0.3340812921524048,
|
|
"grad_norm": 63.084312438964844,
|
|
"learning_rate": 4.2156040946718343e-07,
|
|
"logits/chosen": 0.30527517199516296,
|
|
"logits/rejected": 0.23241345584392548,
|
|
"logps/chosen": -61.709754943847656,
|
|
"logps/ref_chosen": -55.9976921081543,
|
|
"logps/ref_rejected": -111.94727325439453,
|
|
"logps/rejected": -121.02547454833984,
|
|
"loss": 0.9189,
|
|
"margin_dpo/margin_mean": 3.3661351203918457,
|
|
"margin_dpo/margin_std": 3.9613122940063477,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3371126228269085,
|
|
"fcm_dpo/beta": 0.22976723313331604,
|
|
"fcm_dpo/delta": -0.21587347984313965,
|
|
"fcm_dpo/margin": 3.46626877784729,
|
|
"fcm_dpo/q_t": 0.3348970413208008,
|
|
"grad_norm": 50.928836822509766,
|
|
"learning_rate": 4.2059626715039065e-07,
|
|
"logits/chosen": 0.2393488734960556,
|
|
"logits/rejected": 0.19234737753868103,
|
|
"logps/chosen": -65.4186019897461,
|
|
"logps/ref_chosen": -59.891422271728516,
|
|
"logps/ref_rejected": -86.28954315185547,
|
|
"logps/rejected": -95.28298950195312,
|
|
"loss": 0.8931,
|
|
"margin_dpo/margin_mean": 3.4662694931030273,
|
|
"margin_dpo/margin_std": 3.742668628692627,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.3386243386243386,
|
|
"fcm_dpo/beta": 0.2317299097776413,
|
|
"fcm_dpo/delta": 0.10583598166704178,
|
|
"fcm_dpo/margin": 2.155870199203491,
|
|
"fcm_dpo/q_t": 0.39710187911987305,
|
|
"grad_norm": 62.04116439819336,
|
|
"learning_rate": 4.1962735288928304e-07,
|
|
"logits/chosen": 0.2909647822380066,
|
|
"logits/rejected": 0.2737959623336792,
|
|
"logps/chosen": -69.97515869140625,
|
|
"logps/ref_chosen": -64.04463195800781,
|
|
"logps/ref_rejected": -75.05450439453125,
|
|
"logps/rejected": -83.14089965820312,
|
|
"loss": 1.0852,
|
|
"margin_dpo/margin_mean": 2.1558704376220703,
|
|
"margin_dpo/margin_std": 3.457667589187622,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.22542855143547058,
|
|
"fcm_dpo/delta": -0.18165551126003265,
|
|
"fcm_dpo/margin": 3.39239764213562,
|
|
"fcm_dpo/q_t": 0.34687235951423645,
|
|
"grad_norm": 69.0022201538086,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 0.2634621262550354,
|
|
"logits/rejected": 0.1664544939994812,
|
|
"logps/chosen": -72.20433044433594,
|
|
"logps/ref_chosen": -66.0958251953125,
|
|
"logps/ref_rejected": -97.68675231933594,
|
|
"logps/rejected": -107.18766784667969,
|
|
"loss": 0.955,
|
|
"margin_dpo/margin_mean": 3.392397403717041,
|
|
"margin_dpo/margin_std": 4.276772499084473,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3416477702191988,
|
|
"fcm_dpo/beta": 0.22262665629386902,
|
|
"fcm_dpo/delta": -0.07381819188594818,
|
|
"fcm_dpo/margin": 3.0040860176086426,
|
|
"fcm_dpo/q_t": 0.3633517026901245,
|
|
"grad_norm": 46.30059051513672,
|
|
"learning_rate": 4.176753170773052e-07,
|
|
"logits/chosen": 0.2735338509082794,
|
|
"logits/rejected": 0.23284432291984558,
|
|
"logps/chosen": -57.213523864746094,
|
|
"logps/ref_chosen": -51.4168701171875,
|
|
"logps/ref_rejected": -66.30068969726562,
|
|
"logps/rejected": -75.10142517089844,
|
|
"loss": 1.0239,
|
|
"margin_dpo/margin_mean": 3.0040855407714844,
|
|
"margin_dpo/margin_std": 4.355281829833984,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3431594860166289,
|
|
"fcm_dpo/beta": 0.21982993185520172,
|
|
"fcm_dpo/delta": -0.030905138701200485,
|
|
"fcm_dpo/margin": 2.857992172241211,
|
|
"fcm_dpo/q_t": 0.37706050276756287,
|
|
"grad_norm": 60.83171081542969,
|
|
"learning_rate": 4.166922501290729e-07,
|
|
"logits/chosen": 0.31351593136787415,
|
|
"logits/rejected": 0.2783881425857544,
|
|
"logps/chosen": -64.13533020019531,
|
|
"logps/ref_chosen": -57.989776611328125,
|
|
"logps/ref_rejected": -75.05464172363281,
|
|
"logps/rejected": -84.05818939208984,
|
|
"loss": 1.0752,
|
|
"margin_dpo/margin_mean": 2.857992172241211,
|
|
"margin_dpo/margin_std": 4.642690658569336,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.34467120181405897,
|
|
"fcm_dpo/beta": 0.21965107321739197,
|
|
"fcm_dpo/delta": -0.05902150273323059,
|
|
"fcm_dpo/margin": 2.9821901321411133,
|
|
"fcm_dpo/q_t": 0.370593786239624,
|
|
"grad_norm": 51.929935455322266,
|
|
"learning_rate": 4.1570452044027405e-07,
|
|
"logits/chosen": 0.2829626798629761,
|
|
"logits/rejected": 0.21694326400756836,
|
|
"logps/chosen": -62.26646423339844,
|
|
"logps/ref_chosen": -55.55936813354492,
|
|
"logps/ref_rejected": -77.02364349365234,
|
|
"logps/rejected": -86.71292877197266,
|
|
"loss": 1.0521,
|
|
"margin_dpo/margin_mean": 2.9821906089782715,
|
|
"margin_dpo/margin_std": 4.6442036628723145,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.34618291761148906,
|
|
"fcm_dpo/beta": 0.21824893355369568,
|
|
"fcm_dpo/delta": 0.01088004931807518,
|
|
"fcm_dpo/margin": 2.7022318840026855,
|
|
"fcm_dpo/q_t": 0.3750082552433014,
|
|
"grad_norm": 150.70008850097656,
|
|
"learning_rate": 4.147121556398312e-07,
|
|
"logits/chosen": 0.34077906608581543,
|
|
"logits/rejected": 0.29125073552131653,
|
|
"logps/chosen": -56.72576904296875,
|
|
"logps/ref_chosen": -50.79466247558594,
|
|
"logps/ref_rejected": -78.4474105834961,
|
|
"logps/rejected": -87.08074951171875,
|
|
"loss": 1.0911,
|
|
"margin_dpo/margin_mean": 2.7022314071655273,
|
|
"margin_dpo/margin_std": 4.491253852844238,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.21833573281764984,
|
|
"fcm_dpo/delta": -0.08132877200841904,
|
|
"fcm_dpo/margin": 3.0884265899658203,
|
|
"fcm_dpo/q_t": 0.36123067140579224,
|
|
"grad_norm": 57.547664642333984,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 0.26574379205703735,
|
|
"logits/rejected": 0.2626535892486572,
|
|
"logps/chosen": -63.243568420410156,
|
|
"logps/ref_chosen": -56.729225158691406,
|
|
"logps/ref_rejected": -62.99180603027344,
|
|
"logps/rejected": -72.59457397460938,
|
|
"loss": 1.0252,
|
|
"margin_dpo/margin_mean": 3.0884273052215576,
|
|
"margin_dpo/margin_std": 4.373671531677246,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"fcm_dpo/beta": 0.2022572159767151,
|
|
"fcm_dpo/delta": -0.35897156596183777,
|
|
"fcm_dpo/margin": 4.558291435241699,
|
|
"fcm_dpo/q_t": 0.30946117639541626,
|
|
"grad_norm": 49.22685623168945,
|
|
"learning_rate": 4.1271363186719835e-07,
|
|
"logits/chosen": 0.20000478625297546,
|
|
"logits/rejected": 0.18240705132484436,
|
|
"logps/chosen": -79.33709716796875,
|
|
"logps/ref_chosen": -72.59709930419922,
|
|
"logps/ref_rejected": -86.2322998046875,
|
|
"logps/rejected": -97.53059387207031,
|
|
"loss": 0.8372,
|
|
"margin_dpo/margin_mean": 4.558291435241699,
|
|
"margin_dpo/margin_std": 4.532693386077881,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3507180650037793,
|
|
"fcm_dpo/beta": 0.19782654941082,
|
|
"fcm_dpo/delta": -0.05669552460312843,
|
|
"fcm_dpo/margin": 3.2991318702697754,
|
|
"fcm_dpo/q_t": 0.37337297201156616,
|
|
"grad_norm": 53.600704193115234,
|
|
"learning_rate": 4.1170752879801436e-07,
|
|
"logits/chosen": 0.23124045133590698,
|
|
"logits/rejected": 0.2049122452735901,
|
|
"logps/chosen": -74.73058319091797,
|
|
"logps/ref_chosen": -68.1185302734375,
|
|
"logps/ref_rejected": -83.79415893554688,
|
|
"logps/rejected": -93.7053451538086,
|
|
"loss": 1.0636,
|
|
"margin_dpo/margin_mean": 3.2991318702697754,
|
|
"margin_dpo/margin_std": 5.3659348487854,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.35222978080120937,
|
|
"fcm_dpo/beta": 0.19616064429283142,
|
|
"fcm_dpo/delta": 0.051648423075675964,
|
|
"fcm_dpo/margin": 2.7871627807617188,
|
|
"fcm_dpo/q_t": 0.3915916085243225,
|
|
"grad_norm": 49.19864273071289,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": 0.23559258878231049,
|
|
"logits/rejected": 0.18777360022068024,
|
|
"logps/chosen": -63.05714416503906,
|
|
"logps/ref_chosen": -55.070152282714844,
|
|
"logps/ref_rejected": -66.61845397949219,
|
|
"logps/rejected": -77.39260864257812,
|
|
"loss": 1.1095,
|
|
"margin_dpo/margin_mean": 2.787163257598877,
|
|
"margin_dpo/margin_std": 4.586450576782227,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.35374149659863946,
|
|
"fcm_dpo/beta": 0.20507487654685974,
|
|
"fcm_dpo/delta": 0.10537093132734299,
|
|
"fcm_dpo/margin": 2.423401355743408,
|
|
"fcm_dpo/q_t": 0.40045344829559326,
|
|
"grad_norm": 52.23374938964844,
|
|
"learning_rate": 4.09681781007452e-07,
|
|
"logits/chosen": 0.2050936222076416,
|
|
"logits/rejected": 0.19188997149467468,
|
|
"logps/chosen": -63.265869140625,
|
|
"logps/ref_chosen": -55.92589569091797,
|
|
"logps/ref_rejected": -51.11608123779297,
|
|
"logps/rejected": -60.87945556640625,
|
|
"loss": 1.1624,
|
|
"margin_dpo/margin_mean": 2.423401355743408,
|
|
"margin_dpo/margin_std": 4.720711708068848,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.19567573070526123,
|
|
"fcm_dpo/delta": -0.2706778347492218,
|
|
"fcm_dpo/margin": 4.328647613525391,
|
|
"fcm_dpo/q_t": 0.31999263167381287,
|
|
"grad_norm": 47.88417053222656,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 0.30758044123649597,
|
|
"logits/rejected": 0.28944361209869385,
|
|
"logps/chosen": -71.12437438964844,
|
|
"logps/ref_chosen": -64.53972625732422,
|
|
"logps/ref_rejected": -77.69151306152344,
|
|
"logps/rejected": -88.60479736328125,
|
|
"loss": 0.8412,
|
|
"margin_dpo/margin_mean": 4.328647613525391,
|
|
"margin_dpo/margin_std": 4.075915336608887,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.35676492819349964,
|
|
"fcm_dpo/beta": 0.19021353125572205,
|
|
"fcm_dpo/delta": -0.08151474595069885,
|
|
"fcm_dpo/margin": 3.550055980682373,
|
|
"fcm_dpo/q_t": 0.36800432205200195,
|
|
"grad_norm": 50.881134033203125,
|
|
"learning_rate": 4.076381667711306e-07,
|
|
"logits/chosen": 0.22476598620414734,
|
|
"logits/rejected": 0.2125941663980484,
|
|
"logps/chosen": -79.9710464477539,
|
|
"logps/ref_chosen": -71.15473937988281,
|
|
"logps/ref_rejected": -84.88541412353516,
|
|
"logps/rejected": -97.25178527832031,
|
|
"loss": 1.0551,
|
|
"margin_dpo/margin_mean": 3.550055980682373,
|
|
"margin_dpo/margin_std": 5.55735969543457,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.35827664399092973,
|
|
"fcm_dpo/beta": 0.18786309659481049,
|
|
"fcm_dpo/delta": -0.06414446234703064,
|
|
"fcm_dpo/margin": 3.5106282234191895,
|
|
"fcm_dpo/q_t": 0.36550983786582947,
|
|
"grad_norm": 52.91632843017578,
|
|
"learning_rate": 4.066097311132753e-07,
|
|
"logits/chosen": 0.2820720672607422,
|
|
"logits/rejected": 0.2691257894039154,
|
|
"logps/chosen": -83.85193634033203,
|
|
"logps/ref_chosen": -76.14201354980469,
|
|
"logps/ref_rejected": -80.88479614257812,
|
|
"logps/rejected": -92.1053466796875,
|
|
"loss": 1.0547,
|
|
"margin_dpo/margin_mean": 3.5106277465820312,
|
|
"margin_dpo/margin_std": 5.33742618560791,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.35978835978835977,
|
|
"fcm_dpo/beta": 0.18471507728099823,
|
|
"fcm_dpo/delta": -0.10358825325965881,
|
|
"fcm_dpo/margin": 3.7690069675445557,
|
|
"fcm_dpo/q_t": 0.3609452247619629,
|
|
"grad_norm": 65.1036148071289,
|
|
"learning_rate": 4.0557691474458414e-07,
|
|
"logits/chosen": 0.2169681191444397,
|
|
"logits/rejected": 0.19963425397872925,
|
|
"logps/chosen": -76.23017120361328,
|
|
"logps/ref_chosen": -68.88484954833984,
|
|
"logps/ref_rejected": -75.8946304321289,
|
|
"logps/rejected": -87.00895690917969,
|
|
"loss": 1.0214,
|
|
"margin_dpo/margin_mean": 3.7690064907073975,
|
|
"margin_dpo/margin_std": 5.457906723022461,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.36130007558578986,
|
|
"fcm_dpo/beta": 0.18199658393859863,
|
|
"fcm_dpo/delta": -0.13259382545948029,
|
|
"fcm_dpo/margin": 3.9706599712371826,
|
|
"fcm_dpo/q_t": 0.3562832474708557,
|
|
"grad_norm": 49.44264602661133,
|
|
"learning_rate": 4.045397465551513e-07,
|
|
"logits/chosen": 0.3672158718109131,
|
|
"logits/rejected": 0.26534122228622437,
|
|
"logps/chosen": -65.59939575195312,
|
|
"logps/ref_chosen": -56.771827697753906,
|
|
"logps/ref_rejected": -116.23050689697266,
|
|
"logps/rejected": -129.0287322998047,
|
|
"loss": 1.0188,
|
|
"margin_dpo/margin_mean": 3.9706602096557617,
|
|
"margin_dpo/margin_std": 5.715615749359131,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.17385989427566528,
|
|
"fcm_dpo/delta": -0.24004262685775757,
|
|
"fcm_dpo/margin": 4.721988677978516,
|
|
"fcm_dpo/q_t": 0.3290281593799591,
|
|
"grad_norm": 37.56927490234375,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 0.26035207509994507,
|
|
"logits/rejected": 0.18253850936889648,
|
|
"logps/chosen": -62.00343322753906,
|
|
"logps/ref_chosen": -53.35411071777344,
|
|
"logps/ref_rejected": -80.12019348144531,
|
|
"logps/rejected": -93.49150085449219,
|
|
"loss": 0.9114,
|
|
"margin_dpo/margin_mean": 4.721988677978516,
|
|
"margin_dpo/margin_std": 5.341388702392578,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.36432350718065004,
|
|
"fcm_dpo/beta": 0.17665785551071167,
|
|
"fcm_dpo/delta": 0.19322022795677185,
|
|
"fcm_dpo/margin": 2.3397579193115234,
|
|
"fcm_dpo/q_t": 0.4169955849647522,
|
|
"grad_norm": 53.57221984863281,
|
|
"learning_rate": 4.0245247088227377e-07,
|
|
"logits/chosen": 0.23470798134803772,
|
|
"logits/rejected": 0.20348861813545227,
|
|
"logps/chosen": -80.33531951904297,
|
|
"logps/ref_chosen": -71.89541625976562,
|
|
"logps/ref_rejected": -83.03492736816406,
|
|
"logps/rejected": -93.81459045410156,
|
|
"loss": 1.1753,
|
|
"margin_dpo/margin_mean": 2.3397579193115234,
|
|
"margin_dpo/margin_std": 4.811473846435547,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.36583522297808013,
|
|
"fcm_dpo/beta": 0.16935241222381592,
|
|
"fcm_dpo/delta": -0.23963555693626404,
|
|
"fcm_dpo/margin": 4.812247276306152,
|
|
"fcm_dpo/q_t": 0.33974403142929077,
|
|
"grad_norm": 37.257667541503906,
|
|
"learning_rate": 4.0140242178441665e-07,
|
|
"logits/chosen": 0.24610793590545654,
|
|
"logits/rejected": 0.2245916724205017,
|
|
"logps/chosen": -65.82247924804688,
|
|
"logps/ref_chosen": -57.927433013916016,
|
|
"logps/ref_rejected": -67.838623046875,
|
|
"logps/rejected": -80.54591369628906,
|
|
"loss": 0.9602,
|
|
"margin_dpo/margin_mean": 4.812246799468994,
|
|
"margin_dpo/margin_std": 6.34306526184082,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3673469387755102,
|
|
"fcm_dpo/beta": 0.1678386628627777,
|
|
"fcm_dpo/delta": -0.04528873786330223,
|
|
"fcm_dpo/margin": 3.8267951011657715,
|
|
"fcm_dpo/q_t": 0.3693522810935974,
|
|
"grad_norm": 46.92771530151367,
|
|
"learning_rate": 4.003481376353596e-07,
|
|
"logits/chosen": 0.2732234597206116,
|
|
"logits/rejected": 0.2697628438472748,
|
|
"logps/chosen": -82.59149169921875,
|
|
"logps/ref_chosen": -74.27667236328125,
|
|
"logps/ref_rejected": -73.24340057373047,
|
|
"logps/rejected": -85.38501739501953,
|
|
"loss": 1.032,
|
|
"margin_dpo/margin_mean": 3.8267946243286133,
|
|
"margin_dpo/margin_std": 5.578545093536377,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.3688586545729403,
|
|
"fcm_dpo/beta": 0.16238996386528015,
|
|
"fcm_dpo/delta": -0.23858490586280823,
|
|
"fcm_dpo/margin": 5.046672821044922,
|
|
"fcm_dpo/q_t": 0.3252617120742798,
|
|
"grad_norm": 34.678497314453125,
|
|
"learning_rate": 3.9928964792569654e-07,
|
|
"logits/chosen": 0.2962725758552551,
|
|
"logits/rejected": 0.2212146818637848,
|
|
"logps/chosen": -62.00054931640625,
|
|
"logps/ref_chosen": -53.36390686035156,
|
|
"logps/ref_rejected": -71.10276794433594,
|
|
"logps/rejected": -84.78608703613281,
|
|
"loss": 0.8479,
|
|
"margin_dpo/margin_mean": 5.0466718673706055,
|
|
"margin_dpo/margin_std": 4.714447498321533,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.1527130901813507,
|
|
"fcm_dpo/delta": -0.3079785108566284,
|
|
"fcm_dpo/margin": 5.767277240753174,
|
|
"fcm_dpo/q_t": 0.3175172805786133,
|
|
"grad_norm": 56.39506530761719,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 0.3219825029373169,
|
|
"logits/rejected": 0.2969723641872406,
|
|
"logps/chosen": -79.81932067871094,
|
|
"logps/ref_chosen": -71.19510650634766,
|
|
"logps/ref_rejected": -80.76235961914062,
|
|
"logps/rejected": -95.15385437011719,
|
|
"loss": 0.8586,
|
|
"margin_dpo/margin_mean": 5.767277717590332,
|
|
"margin_dpo/margin_std": 6.025437355041504,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.37188208616780044,
|
|
"fcm_dpo/beta": 0.14736124873161316,
|
|
"fcm_dpo/delta": -0.14660793542861938,
|
|
"fcm_dpo/margin": 4.994536399841309,
|
|
"fcm_dpo/q_t": 0.35354191064834595,
|
|
"grad_norm": 42.80738067626953,
|
|
"learning_rate": 3.971601703742932e-07,
|
|
"logits/chosen": 0.321519672870636,
|
|
"logits/rejected": 0.268222451210022,
|
|
"logps/chosen": -81.36954498291016,
|
|
"logps/ref_chosen": -71.62104797363281,
|
|
"logps/ref_rejected": -94.03392028808594,
|
|
"logps/rejected": -108.77696228027344,
|
|
"loss": 0.9924,
|
|
"margin_dpo/margin_mean": 4.994536399841309,
|
|
"margin_dpo/margin_std": 6.899087905883789,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.37339380196523053,
|
|
"fcm_dpo/beta": 0.15197286009788513,
|
|
"fcm_dpo/delta": 0.19533714652061462,
|
|
"fcm_dpo/margin": 2.685239315032959,
|
|
"fcm_dpo/q_t": 0.41240644454956055,
|
|
"grad_norm": 48.20946502685547,
|
|
"learning_rate": 3.960892420986177e-07,
|
|
"logits/chosen": 0.3213634490966797,
|
|
"logits/rejected": 0.31023019552230835,
|
|
"logps/chosen": -90.37286376953125,
|
|
"logps/ref_chosen": -80.02254486083984,
|
|
"logps/ref_rejected": -89.22705841064453,
|
|
"logps/rejected": -102.26261138916016,
|
|
"loss": 1.2183,
|
|
"margin_dpo/margin_mean": 2.685239315032959,
|
|
"margin_dpo/margin_std": 6.087411880493164,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3749055177626606,
|
|
"fcm_dpo/beta": 0.1494050920009613,
|
|
"fcm_dpo/delta": -0.13823390007019043,
|
|
"fcm_dpo/margin": 4.874991416931152,
|
|
"fcm_dpo/q_t": 0.36219164729118347,
|
|
"grad_norm": 41.61267852783203,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 0.26923084259033203,
|
|
"logits/rejected": 0.29580309987068176,
|
|
"logps/chosen": -74.59526062011719,
|
|
"logps/ref_chosen": -65.37796020507812,
|
|
"logps/ref_rejected": -61.365787506103516,
|
|
"logps/rejected": -75.45808410644531,
|
|
"loss": 1.0236,
|
|
"margin_dpo/margin_mean": 4.8749918937683105,
|
|
"margin_dpo/margin_std": 7.288352012634277,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3764172335600907,
|
|
"fcm_dpo/beta": 0.1551496684551239,
|
|
"fcm_dpo/delta": 0.28272420167922974,
|
|
"fcm_dpo/margin": 2.0840535163879395,
|
|
"fcm_dpo/q_t": 0.42946404218673706,
|
|
"grad_norm": 52.263946533203125,
|
|
"learning_rate": 3.9393515632731094e-07,
|
|
"logits/chosen": 0.2614513039588928,
|
|
"logits/rejected": 0.2892727851867676,
|
|
"logps/chosen": -85.43158721923828,
|
|
"logps/ref_chosen": -74.60145568847656,
|
|
"logps/ref_rejected": -63.79338455200195,
|
|
"logps/rejected": -76.70757293701172,
|
|
"loss": 1.3308,
|
|
"margin_dpo/margin_mean": 2.0840537548065186,
|
|
"margin_dpo/margin_std": 6.5972580909729,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.151943176984787,
|
|
"fcm_dpo/delta": -0.22958946228027344,
|
|
"fcm_dpo/margin": 5.341045379638672,
|
|
"fcm_dpo/q_t": 0.3368905484676361,
|
|
"grad_norm": 38.571678161621094,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 0.3983957767486572,
|
|
"logits/rejected": 0.3594222962856293,
|
|
"logps/chosen": -71.35509490966797,
|
|
"logps/ref_chosen": -61.938209533691406,
|
|
"logps/ref_rejected": -72.21602630615234,
|
|
"logps/rejected": -86.97395324707031,
|
|
"loss": 0.9672,
|
|
"margin_dpo/margin_mean": 5.341045379638672,
|
|
"margin_dpo/margin_std": 7.081840515136719,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3794406651549509,
|
|
"fcm_dpo/beta": 0.150055930018425,
|
|
"fcm_dpo/delta": 0.051377397030591965,
|
|
"fcm_dpo/margin": 3.6751105785369873,
|
|
"fcm_dpo/q_t": 0.3900037407875061,
|
|
"grad_norm": 44.067535400390625,
|
|
"learning_rate": 3.9176496596569265e-07,
|
|
"logits/chosen": 0.35311421751976013,
|
|
"logits/rejected": 0.31758221983909607,
|
|
"logps/chosen": -76.70925903320312,
|
|
"logps/ref_chosen": -66.85694885253906,
|
|
"logps/ref_rejected": -84.83396911621094,
|
|
"logps/rejected": -98.36139678955078,
|
|
"loss": 1.1125,
|
|
"margin_dpo/margin_mean": 3.675110340118408,
|
|
"margin_dpo/margin_std": 6.56778621673584,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"fcm_dpo/beta": 0.154201477766037,
|
|
"fcm_dpo/delta": 0.03998170793056488,
|
|
"fcm_dpo/margin": 3.6078786849975586,
|
|
"fcm_dpo/q_t": 0.3854004144668579,
|
|
"grad_norm": 43.117156982421875,
|
|
"learning_rate": 3.9067390737445254e-07,
|
|
"logits/chosen": 0.26551708579063416,
|
|
"logits/rejected": 0.21662738919258118,
|
|
"logps/chosen": -65.75033569335938,
|
|
"logps/ref_chosen": -56.22393035888672,
|
|
"logps/ref_rejected": -77.1136245727539,
|
|
"logps/rejected": -90.24790954589844,
|
|
"loss": 1.1662,
|
|
"margin_dpo/margin_mean": 3.6078789234161377,
|
|
"margin_dpo/margin_std": 6.923517227172852,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.382464096749811,
|
|
"fcm_dpo/beta": 0.15114212036132812,
|
|
"fcm_dpo/delta": 0.043711431324481964,
|
|
"fcm_dpo/margin": 3.6810967922210693,
|
|
"fcm_dpo/q_t": 0.3912495970726013,
|
|
"grad_norm": 39.50165939331055,
|
|
"learning_rate": 3.8957891383162304e-07,
|
|
"logits/chosen": 0.35707587003707886,
|
|
"logits/rejected": 0.3203071355819702,
|
|
"logps/chosen": -62.26034164428711,
|
|
"logps/ref_chosen": -52.21001434326172,
|
|
"logps/ref_rejected": -58.75764846801758,
|
|
"logps/rejected": -72.48906707763672,
|
|
"loss": 1.1144,
|
|
"margin_dpo/margin_mean": 3.6810965538024902,
|
|
"margin_dpo/margin_std": 6.311221122741699,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.3839758125472411,
|
|
"fcm_dpo/beta": 0.15054289996623993,
|
|
"fcm_dpo/delta": -0.09489809721708298,
|
|
"fcm_dpo/margin": 4.567934513092041,
|
|
"fcm_dpo/q_t": 0.36535903811454773,
|
|
"grad_norm": 43.543678283691406,
|
|
"learning_rate": 3.884800159665276e-07,
|
|
"logits/chosen": 0.2740534543991089,
|
|
"logits/rejected": 0.22902369499206543,
|
|
"logps/chosen": -75.52339935302734,
|
|
"logps/ref_chosen": -65.63632202148438,
|
|
"logps/ref_rejected": -82.34425354003906,
|
|
"logps/rejected": -96.79925537109375,
|
|
"loss": 1.03,
|
|
"margin_dpo/margin_mean": 4.567934036254883,
|
|
"margin_dpo/margin_std": 6.8034586906433105,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.14535465836524963,
|
|
"fcm_dpo/delta": -0.19926312565803528,
|
|
"fcm_dpo/margin": 5.382228374481201,
|
|
"fcm_dpo/q_t": 0.34879204630851746,
|
|
"grad_norm": 36.42246627807617,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 0.30461984872817993,
|
|
"logits/rejected": 0.27335721254348755,
|
|
"logps/chosen": -76.67054748535156,
|
|
"logps/ref_chosen": -67.91108703613281,
|
|
"logps/ref_rejected": -83.89114379882812,
|
|
"logps/rejected": -98.03282928466797,
|
|
"loss": 0.9773,
|
|
"margin_dpo/margin_mean": 5.382227897644043,
|
|
"margin_dpo/margin_std": 7.389373779296875,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3869992441421013,
|
|
"fcm_dpo/beta": 0.14256221055984497,
|
|
"fcm_dpo/delta": -0.06361023336648941,
|
|
"fcm_dpo/margin": 4.620975971221924,
|
|
"fcm_dpo/q_t": 0.3729037642478943,
|
|
"grad_norm": 42.76344680786133,
|
|
"learning_rate": 3.862706303320329e-07,
|
|
"logits/chosen": 0.2616586685180664,
|
|
"logits/rejected": 0.21148879826068878,
|
|
"logps/chosen": -73.64082336425781,
|
|
"logps/ref_chosen": -63.49998474121094,
|
|
"logps/ref_rejected": -90.77104187011719,
|
|
"logps/rejected": -105.53286743164062,
|
|
"loss": 1.0775,
|
|
"margin_dpo/margin_mean": 4.620975494384766,
|
|
"margin_dpo/margin_std": 7.655024528503418,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.3885109599395314,
|
|
"fcm_dpo/beta": 0.1366242617368698,
|
|
"fcm_dpo/delta": -0.27341482043266296,
|
|
"fcm_dpo/margin": 6.215221881866455,
|
|
"fcm_dpo/q_t": 0.3420974016189575,
|
|
"grad_norm": 43.8852424621582,
|
|
"learning_rate": 3.851602043638994e-07,
|
|
"logits/chosen": 0.2920815050601959,
|
|
"logits/rejected": 0.2364548146724701,
|
|
"logps/chosen": -81.00942993164062,
|
|
"logps/ref_chosen": -70.60064697265625,
|
|
"logps/ref_rejected": -108.58313751220703,
|
|
"logps/rejected": -125.20713806152344,
|
|
"loss": 0.9761,
|
|
"margin_dpo/margin_mean": 6.215222358703613,
|
|
"margin_dpo/margin_std": 8.710794448852539,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3900226757369615,
|
|
"fcm_dpo/beta": 0.13516320288181305,
|
|
"fcm_dpo/delta": -0.05724785476922989,
|
|
"fcm_dpo/margin": 4.827666282653809,
|
|
"fcm_dpo/q_t": 0.3620738387107849,
|
|
"grad_norm": 38.29178237915039,
|
|
"learning_rate": 3.840459976743023e-07,
|
|
"logits/chosen": 0.304485023021698,
|
|
"logits/rejected": 0.2620296776294708,
|
|
"logps/chosen": -70.34207153320312,
|
|
"logps/ref_chosen": -59.25416564941406,
|
|
"logps/ref_rejected": -85.58709716796875,
|
|
"logps/rejected": -101.50267028808594,
|
|
"loss": 0.9813,
|
|
"margin_dpo/margin_mean": 4.827666282653809,
|
|
"margin_dpo/margin_std": 5.940984725952148,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3915343915343915,
|
|
"fcm_dpo/beta": 0.12516869604587555,
|
|
"fcm_dpo/delta": -0.39691078662872314,
|
|
"fcm_dpo/margin": 7.635554313659668,
|
|
"fcm_dpo/q_t": 0.305417001247406,
|
|
"grad_norm": 31.369762420654297,
|
|
"learning_rate": 3.8292804142999796e-07,
|
|
"logits/chosen": 0.2632516622543335,
|
|
"logits/rejected": 0.1743551343679428,
|
|
"logps/chosen": -74.25553131103516,
|
|
"logps/ref_chosen": -65.43487548828125,
|
|
"logps/ref_rejected": -95.41731262207031,
|
|
"logps/rejected": -111.87351989746094,
|
|
"loss": 0.839,
|
|
"margin_dpo/margin_mean": 7.635554790496826,
|
|
"margin_dpo/margin_std": 7.796221733093262,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.12094450742006302,
|
|
"fcm_dpo/delta": -0.1034887433052063,
|
|
"fcm_dpo/margin": 5.749767303466797,
|
|
"fcm_dpo/q_t": 0.3611637055873871,
|
|
"grad_norm": 31.525815963745117,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 0.26589736342430115,
|
|
"logits/rejected": 0.19382989406585693,
|
|
"logps/chosen": -59.13935089111328,
|
|
"logps/ref_chosen": -49.08958435058594,
|
|
"logps/ref_rejected": -79.01708221435547,
|
|
"logps/rejected": -94.81661987304688,
|
|
"loss": 1.0165,
|
|
"margin_dpo/margin_mean": 5.749767303466797,
|
|
"margin_dpo/margin_std": 8.208473205566406,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3945578231292517,
|
|
"fcm_dpo/beta": 0.12129713594913483,
|
|
"fcm_dpo/delta": 0.0323747955262661,
|
|
"fcm_dpo/margin": 4.69488525390625,
|
|
"fcm_dpo/q_t": 0.3866156339645386,
|
|
"grad_norm": 39.33859634399414,
|
|
"learning_rate": 3.806810054678331e-07,
|
|
"logits/chosen": 0.17265966534614563,
|
|
"logits/rejected": 0.18686804175376892,
|
|
"logps/chosen": -80.5276870727539,
|
|
"logps/ref_chosen": -70.87239074707031,
|
|
"logps/ref_rejected": -65.01522064208984,
|
|
"logps/rejected": -79.36540222167969,
|
|
"loss": 1.0891,
|
|
"margin_dpo/margin_mean": 4.69488525390625,
|
|
"margin_dpo/margin_std": 7.86189079284668,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3960695389266818,
|
|
"fcm_dpo/beta": 0.12047137320041656,
|
|
"fcm_dpo/delta": -0.08758753538131714,
|
|
"fcm_dpo/margin": 5.658448219299316,
|
|
"fcm_dpo/q_t": 0.3600999712944031,
|
|
"grad_norm": 33.62800598144531,
|
|
"learning_rate": 3.7955198860439887e-07,
|
|
"logits/chosen": 0.3352086544036865,
|
|
"logits/rejected": 0.28318509459495544,
|
|
"logps/chosen": -78.01824951171875,
|
|
"logps/ref_chosen": -67.8706283569336,
|
|
"logps/ref_rejected": -88.7205810546875,
|
|
"logps/rejected": -104.52664947509766,
|
|
"loss": 0.9836,
|
|
"margin_dpo/margin_mean": 5.658448696136475,
|
|
"margin_dpo/margin_std": 7.331066131591797,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.3975812547241119,
|
|
"fcm_dpo/beta": 0.12068259716033936,
|
|
"fcm_dpo/delta": 0.03326220065355301,
|
|
"fcm_dpo/margin": 4.707237720489502,
|
|
"fcm_dpo/q_t": 0.38229966163635254,
|
|
"grad_norm": 32.8046989440918,
|
|
"learning_rate": 3.784193478933516e-07,
|
|
"logits/chosen": 0.28113555908203125,
|
|
"logits/rejected": 0.1925010085105896,
|
|
"logps/chosen": -65.09586334228516,
|
|
"logps/ref_chosen": -55.194583892822266,
|
|
"logps/ref_rejected": -80.54048156738281,
|
|
"logps/rejected": -95.14900207519531,
|
|
"loss": 1.0815,
|
|
"margin_dpo/margin_mean": 4.707237243652344,
|
|
"margin_dpo/margin_std": 7.578971862792969,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.39909297052154197,
|
|
"fcm_dpo/beta": 0.11911486089229584,
|
|
"fcm_dpo/delta": -0.07203864306211472,
|
|
"fcm_dpo/margin": 5.601800918579102,
|
|
"fcm_dpo/q_t": 0.36986517906188965,
|
|
"grad_norm": 37.93541717529297,
|
|
"learning_rate": 3.7728311501708674e-07,
|
|
"logits/chosen": 0.22223809361457825,
|
|
"logits/rejected": 0.1795908510684967,
|
|
"logps/chosen": -93.02720642089844,
|
|
"logps/ref_chosen": -83.17068481445312,
|
|
"logps/ref_rejected": -88.33625793457031,
|
|
"logps/rejected": -103.79458618164062,
|
|
"loss": 1.0562,
|
|
"margin_dpo/margin_mean": 5.601801872253418,
|
|
"margin_dpo/margin_std": 8.86518669128418,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.11703728884458542,
|
|
"fcm_dpo/delta": -0.13774295151233673,
|
|
"fcm_dpo/margin": 6.217557430267334,
|
|
"fcm_dpo/q_t": 0.3545699119567871,
|
|
"grad_norm": 36.12660598754883,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 0.361573725938797,
|
|
"logits/rejected": 0.3007473647594452,
|
|
"logps/chosen": -61.666404724121094,
|
|
"logps/ref_chosen": -51.66284942626953,
|
|
"logps/ref_rejected": -67.1720962524414,
|
|
"logps/rejected": -83.3932113647461,
|
|
"loss": 1.0502,
|
|
"margin_dpo/margin_mean": 6.217557907104492,
|
|
"margin_dpo/margin_std": 9.483875274658203,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.4021164021164021,
|
|
"fcm_dpo/beta": 0.11421965062618256,
|
|
"fcm_dpo/delta": -0.060123834758996964,
|
|
"fcm_dpo/margin": 5.743941783905029,
|
|
"fcm_dpo/q_t": 0.3692956864833832,
|
|
"grad_norm": 33.072265625,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 0.3172535300254822,
|
|
"logits/rejected": 0.2529382109642029,
|
|
"logps/chosen": -66.79499816894531,
|
|
"logps/ref_chosen": -57.45049285888672,
|
|
"logps/ref_rejected": -77.60826110839844,
|
|
"logps/rejected": -92.69670104980469,
|
|
"loss": 1.0264,
|
|
"margin_dpo/margin_mean": 5.7439422607421875,
|
|
"margin_dpo/margin_std": 8.409375190734863,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.4036281179138322,
|
|
"fcm_dpo/beta": 0.11638803780078888,
|
|
"fcm_dpo/delta": 0.06048017740249634,
|
|
"fcm_dpo/margin": 4.635496616363525,
|
|
"fcm_dpo/q_t": 0.3898630738258362,
|
|
"grad_norm": 29.81191062927246,
|
|
"learning_rate": 3.738531817228131e-07,
|
|
"logits/chosen": 0.325040340423584,
|
|
"logits/rejected": 0.30857351422309875,
|
|
"logps/chosen": -63.52793884277344,
|
|
"logps/ref_chosen": -55.03535079956055,
|
|
"logps/ref_rejected": -66.0953369140625,
|
|
"logps/rejected": -79.22342681884766,
|
|
"loss": 1.1486,
|
|
"margin_dpo/margin_mean": 4.635497093200684,
|
|
"margin_dpo/margin_std": 8.722969055175781,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.4051398337112623,
|
|
"fcm_dpo/beta": 0.1157803162932396,
|
|
"fcm_dpo/delta": -0.022753987461328506,
|
|
"fcm_dpo/margin": 5.360468864440918,
|
|
"fcm_dpo/q_t": 0.37341806292533875,
|
|
"grad_norm": 30.24585723876953,
|
|
"learning_rate": 3.7270289900589204e-07,
|
|
"logits/chosen": 0.2242826521396637,
|
|
"logits/rejected": 0.20411059260368347,
|
|
"logps/chosen": -73.67584228515625,
|
|
"logps/ref_chosen": -65.07174682617188,
|
|
"logps/ref_rejected": -71.42485809326172,
|
|
"logps/rejected": -85.38943481445312,
|
|
"loss": 1.0239,
|
|
"margin_dpo/margin_mean": 5.360469341278076,
|
|
"margin_dpo/margin_std": 7.504269123077393,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.40665154950869237,
|
|
"fcm_dpo/beta": 0.11419200897216797,
|
|
"fcm_dpo/delta": -0.13315117359161377,
|
|
"fcm_dpo/margin": 6.319992542266846,
|
|
"fcm_dpo/q_t": 0.35147666931152344,
|
|
"grad_norm": 30.362510681152344,
|
|
"learning_rate": 3.7154918402511714e-07,
|
|
"logits/chosen": 0.38946646451950073,
|
|
"logits/rejected": 0.346447229385376,
|
|
"logps/chosen": -76.6282730102539,
|
|
"logps/ref_chosen": -67.1362075805664,
|
|
"logps/ref_rejected": -82.55778503417969,
|
|
"logps/rejected": -98.36984252929688,
|
|
"loss": 0.9771,
|
|
"margin_dpo/margin_mean": 6.3199920654296875,
|
|
"margin_dpo/margin_std": 7.847947120666504,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.11265287548303604,
|
|
"fcm_dpo/delta": 0.07302689552307129,
|
|
"fcm_dpo/margin": 4.712079048156738,
|
|
"fcm_dpo/q_t": 0.3870832324028015,
|
|
"grad_norm": 35.79549026489258,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 0.3457132577896118,
|
|
"logits/rejected": 0.28162434697151184,
|
|
"logps/chosen": -75.90498352050781,
|
|
"logps/ref_chosen": -66.6886978149414,
|
|
"logps/ref_rejected": -85.16129302978516,
|
|
"logps/rejected": -99.08966064453125,
|
|
"loss": 1.1083,
|
|
"margin_dpo/margin_mean": 4.712078094482422,
|
|
"margin_dpo/margin_std": 8.200397491455078,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.40967498110355255,
|
|
"fcm_dpo/beta": 0.1142662763595581,
|
|
"fcm_dpo/delta": 0.08464036136865616,
|
|
"fcm_dpo/margin": 4.546772480010986,
|
|
"fcm_dpo/q_t": 0.408853679895401,
|
|
"grad_norm": 37.240455627441406,
|
|
"learning_rate": 3.692315864546635e-07,
|
|
"logits/chosen": 0.33662497997283936,
|
|
"logits/rejected": 0.2839156985282898,
|
|
"logps/chosen": -81.24642181396484,
|
|
"logps/ref_chosen": -72.40754699707031,
|
|
"logps/ref_rejected": -92.06311798095703,
|
|
"logps/rejected": -105.44876861572266,
|
|
"loss": 1.2112,
|
|
"margin_dpo/margin_mean": 4.546772480010986,
|
|
"margin_dpo/margin_std": 10.11610221862793,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.41118669690098264,
|
|
"fcm_dpo/beta": 0.11065268516540527,
|
|
"fcm_dpo/delta": -0.3351590037345886,
|
|
"fcm_dpo/margin": 8.187671661376953,
|
|
"fcm_dpo/q_t": 0.3182171881198883,
|
|
"grad_norm": 29.123409271240234,
|
|
"learning_rate": 3.6806776869317067e-07,
|
|
"logits/chosen": 0.3442276120185852,
|
|
"logits/rejected": 0.3472369909286499,
|
|
"logps/chosen": -73.9910888671875,
|
|
"logps/ref_chosen": -66.60140228271484,
|
|
"logps/ref_rejected": -67.74340057373047,
|
|
"logps/rejected": -83.32075500488281,
|
|
"loss": 0.849,
|
|
"margin_dpo/margin_mean": 8.187671661376953,
|
|
"margin_dpo/margin_std": 8.43535041809082,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"fcm_dpo/beta": 0.10678541660308838,
|
|
"fcm_dpo/delta": -0.08693182468414307,
|
|
"fcm_dpo/margin": 6.37816858291626,
|
|
"fcm_dpo/q_t": 0.3652857840061188,
|
|
"grad_norm": 32.0718879699707,
|
|
"learning_rate": 3.669006483223828e-07,
|
|
"logits/chosen": 0.3392181396484375,
|
|
"logits/rejected": 0.2840285897254944,
|
|
"logps/chosen": -67.47334289550781,
|
|
"logps/ref_chosen": -57.35487747192383,
|
|
"logps/ref_rejected": -84.17168426513672,
|
|
"logps/rejected": -100.66831970214844,
|
|
"loss": 1.0624,
|
|
"margin_dpo/margin_mean": 6.378169059753418,
|
|
"margin_dpo/margin_std": 10.178689002990723,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.41421012849584277,
|
|
"fcm_dpo/beta": 0.10466930270195007,
|
|
"fcm_dpo/delta": -0.0918121486902237,
|
|
"fcm_dpo/margin": 6.54940128326416,
|
|
"fcm_dpo/q_t": 0.36318063735961914,
|
|
"grad_norm": 26.941736221313477,
|
|
"learning_rate": 3.657302579891656e-07,
|
|
"logits/chosen": 0.21288266777992249,
|
|
"logits/rejected": 0.19043758511543274,
|
|
"logps/chosen": -69.32371520996094,
|
|
"logps/ref_chosen": -59.64149475097656,
|
|
"logps/ref_rejected": -68.29348754882812,
|
|
"logps/rejected": -84.52510070800781,
|
|
"loss": 1.0363,
|
|
"margin_dpo/margin_mean": 6.54940128326416,
|
|
"margin_dpo/margin_std": 9.826854705810547,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.10242324322462082,
|
|
"fcm_dpo/delta": -0.13851945102214813,
|
|
"fcm_dpo/margin": 7.114006042480469,
|
|
"fcm_dpo/q_t": 0.3521403670310974,
|
|
"grad_norm": 27.108287811279297,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 0.33497947454452515,
|
|
"logits/rejected": 0.2582448720932007,
|
|
"logps/chosen": -62.62168884277344,
|
|
"logps/ref_chosen": -53.26664352416992,
|
|
"logps/ref_rejected": -73.84062194824219,
|
|
"logps/rejected": -90.3096694946289,
|
|
"loss": 0.9787,
|
|
"margin_dpo/margin_mean": 7.1140055656433105,
|
|
"margin_dpo/margin_std": 9.458660125732422,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.41723356009070295,
|
|
"fcm_dpo/beta": 0.09872230142354965,
|
|
"fcm_dpo/delta": -0.14825645089149475,
|
|
"fcm_dpo/margin": 7.460000038146973,
|
|
"fcm_dpo/q_t": 0.3443659543991089,
|
|
"grad_norm": 26.00741195678711,
|
|
"learning_rate": 3.633797984793294e-07,
|
|
"logits/chosen": 0.2792910039424896,
|
|
"logits/rejected": 0.2462400197982788,
|
|
"logps/chosen": -61.21537399291992,
|
|
"logps/ref_chosen": -53.02079772949219,
|
|
"logps/ref_rejected": -61.56678771972656,
|
|
"logps/rejected": -77.22136688232422,
|
|
"loss": 0.9353,
|
|
"margin_dpo/margin_mean": 7.460000038146973,
|
|
"margin_dpo/margin_std": 8.583602905273438,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.41874527588813304,
|
|
"fcm_dpo/beta": 0.1010965034365654,
|
|
"fcm_dpo/delta": 0.15551161766052246,
|
|
"fcm_dpo/margin": 4.456169128417969,
|
|
"fcm_dpo/q_t": 0.40854841470718384,
|
|
"grad_norm": 30.41317367553711,
|
|
"learning_rate": 3.6219979505011555e-07,
|
|
"logits/chosen": 0.3963392674922943,
|
|
"logits/rejected": 0.4135650098323822,
|
|
"logps/chosen": -81.75311279296875,
|
|
"logps/ref_chosen": -71.43299102783203,
|
|
"logps/ref_rejected": -67.65852355957031,
|
|
"logps/rejected": -82.434814453125,
|
|
"loss": 1.1704,
|
|
"margin_dpo/margin_mean": 4.4561686515808105,
|
|
"margin_dpo/margin_std": 9.094401359558105,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.42025699168556313,
|
|
"fcm_dpo/beta": 0.10135230422019958,
|
|
"fcm_dpo/delta": -0.08824028819799423,
|
|
"fcm_dpo/margin": 6.714378356933594,
|
|
"fcm_dpo/q_t": 0.3628871738910675,
|
|
"grad_norm": 34.11618423461914,
|
|
"learning_rate": 3.6101665315144353e-07,
|
|
"logits/chosen": 0.2849266827106476,
|
|
"logits/rejected": 0.24051225185394287,
|
|
"logps/chosen": -77.12722778320312,
|
|
"logps/ref_chosen": -67.11076354980469,
|
|
"logps/ref_rejected": -88.74851989746094,
|
|
"logps/rejected": -105.4793701171875,
|
|
"loss": 1.031,
|
|
"margin_dpo/margin_mean": 6.714378356933594,
|
|
"margin_dpo/margin_std": 9.721296310424805,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.4217687074829932,
|
|
"fcm_dpo/beta": 0.09616866707801819,
|
|
"fcm_dpo/delta": -0.2708485722541809,
|
|
"fcm_dpo/margin": 8.823453903198242,
|
|
"fcm_dpo/q_t": 0.3255208432674408,
|
|
"grad_norm": 24.725061416625977,
|
|
"learning_rate": 3.5983040587833563e-07,
|
|
"logits/chosen": 0.31211304664611816,
|
|
"logits/rejected": 0.27127406001091003,
|
|
"logps/chosen": -61.78339385986328,
|
|
"logps/ref_chosen": -54.49748611450195,
|
|
"logps/ref_rejected": -70.42373657226562,
|
|
"logps/rejected": -86.53308868408203,
|
|
"loss": 0.8828,
|
|
"margin_dpo/margin_mean": 8.823453903198242,
|
|
"margin_dpo/margin_std": 9.46353816986084,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.09034930169582367,
|
|
"fcm_dpo/delta": -0.2888822555541992,
|
|
"fcm_dpo/margin": 9.556791305541992,
|
|
"fcm_dpo/q_t": 0.31915369629859924,
|
|
"grad_norm": 22.01110076904297,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 0.3331267237663269,
|
|
"logits/rejected": 0.2932952046394348,
|
|
"logps/chosen": -68.28158569335938,
|
|
"logps/ref_chosen": -60.43281173706055,
|
|
"logps/ref_rejected": -78.39051818847656,
|
|
"logps/rejected": -95.79608154296875,
|
|
"loss": 0.8307,
|
|
"margin_dpo/margin_mean": 9.556791305541992,
|
|
"margin_dpo/margin_std": 9.003580093383789,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.42479213907785335,
|
|
"fcm_dpo/beta": 0.08677025139331818,
|
|
"fcm_dpo/delta": -0.1635596752166748,
|
|
"fcm_dpo/margin": 8.651966094970703,
|
|
"fcm_dpo/q_t": 0.34282469749450684,
|
|
"grad_norm": 21.93960952758789,
|
|
"learning_rate": 3.574487280222929e-07,
|
|
"logits/chosen": 0.3163455128669739,
|
|
"logits/rejected": 0.3208748400211334,
|
|
"logps/chosen": -69.17593383789062,
|
|
"logps/ref_chosen": -60.2820930480957,
|
|
"logps/ref_rejected": -62.04009246826172,
|
|
"logps/rejected": -79.58589935302734,
|
|
"loss": 0.9387,
|
|
"margin_dpo/margin_mean": 8.651966094970703,
|
|
"margin_dpo/margin_std": 10.228010177612305,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.42630385487528344,
|
|
"fcm_dpo/beta": 0.08634951710700989,
|
|
"fcm_dpo/delta": -0.08936205506324768,
|
|
"fcm_dpo/margin": 7.882460117340088,
|
|
"fcm_dpo/q_t": 0.3642592430114746,
|
|
"grad_norm": 26.94883155822754,
|
|
"learning_rate": 3.562533640600075e-07,
|
|
"logits/chosen": 0.2749297618865967,
|
|
"logits/rejected": 0.2267666459083557,
|
|
"logps/chosen": -71.17241668701172,
|
|
"logps/ref_chosen": -60.623924255371094,
|
|
"logps/ref_rejected": -68.67400360107422,
|
|
"logps/rejected": -87.10496520996094,
|
|
"loss": 1.0174,
|
|
"margin_dpo/margin_mean": 7.88245964050293,
|
|
"margin_dpo/margin_std": 10.839717864990234,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.42781557067271353,
|
|
"fcm_dpo/beta": 0.08574334532022476,
|
|
"fcm_dpo/delta": 0.031460996717214584,
|
|
"fcm_dpo/margin": 6.637720584869385,
|
|
"fcm_dpo/q_t": 0.38565975427627563,
|
|
"grad_norm": 31.932655334472656,
|
|
"learning_rate": 3.550550279627215e-07,
|
|
"logits/chosen": 0.3168482780456543,
|
|
"logits/rejected": 0.2327718585729599,
|
|
"logps/chosen": -78.9610824584961,
|
|
"logps/ref_chosen": -67.64775085449219,
|
|
"logps/ref_rejected": -99.96835327148438,
|
|
"logps/rejected": -117.91941833496094,
|
|
"loss": 1.0944,
|
|
"margin_dpo/margin_mean": 6.637721061706543,
|
|
"margin_dpo/margin_std": 11.038079261779785,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4293272864701436,
|
|
"fcm_dpo/beta": 0.08396576344966888,
|
|
"fcm_dpo/delta": -0.08251890540122986,
|
|
"fcm_dpo/margin": 8.056709289550781,
|
|
"fcm_dpo/q_t": 0.3591376543045044,
|
|
"grad_norm": 22.93587875366211,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 0.4004897475242615,
|
|
"logits/rejected": 0.33910277485847473,
|
|
"logps/chosen": -67.47752380371094,
|
|
"logps/ref_chosen": -56.96742630004883,
|
|
"logps/ref_rejected": -86.36236572265625,
|
|
"logps/rejected": -104.92916870117188,
|
|
"loss": 0.9775,
|
|
"margin_dpo/margin_mean": 8.056710243225098,
|
|
"margin_dpo/margin_std": 10.448408126831055,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.08457425236701965,
|
|
"fcm_dpo/delta": 0.02803485468029976,
|
|
"fcm_dpo/margin": 6.776731014251709,
|
|
"fcm_dpo/q_t": 0.38514000177383423,
|
|
"grad_norm": 28.730764389038086,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 0.3962095379829407,
|
|
"logits/rejected": 0.36934328079223633,
|
|
"logps/chosen": -85.16078186035156,
|
|
"logps/ref_chosen": -71.65611267089844,
|
|
"logps/ref_rejected": -81.63829803466797,
|
|
"logps/rejected": -101.91970825195312,
|
|
"loss": 1.0692,
|
|
"margin_dpo/margin_mean": 6.776730537414551,
|
|
"margin_dpo/margin_std": 10.675975799560547,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4323507180650038,
|
|
"fcm_dpo/beta": 0.08037659525871277,
|
|
"fcm_dpo/delta": -0.30072513222694397,
|
|
"fcm_dpo/margin": 10.86276626586914,
|
|
"fcm_dpo/q_t": 0.31989628076553345,
|
|
"grad_norm": 23.31649398803711,
|
|
"learning_rate": 3.514425224712835e-07,
|
|
"logits/chosen": 0.2933613657951355,
|
|
"logits/rejected": 0.2070850282907486,
|
|
"logps/chosen": -73.17205047607422,
|
|
"logps/ref_chosen": -61.07952117919922,
|
|
"logps/ref_rejected": -91.28128051757812,
|
|
"logps/rejected": -114.236572265625,
|
|
"loss": 0.8592,
|
|
"margin_dpo/margin_mean": 10.862764358520508,
|
|
"margin_dpo/margin_std": 11.261336326599121,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.43386243386243384,
|
|
"fcm_dpo/beta": 0.07688654214143753,
|
|
"fcm_dpo/delta": -0.24263165891170502,
|
|
"fcm_dpo/margin": 10.706443786621094,
|
|
"fcm_dpo/q_t": 0.33241310715675354,
|
|
"grad_norm": 21.12100601196289,
|
|
"learning_rate": 3.502326338516534e-07,
|
|
"logits/chosen": 0.34049081802368164,
|
|
"logits/rejected": 0.30252766609191895,
|
|
"logps/chosen": -56.355995178222656,
|
|
"logps/ref_chosen": -46.035789489746094,
|
|
"logps/ref_rejected": -59.95293426513672,
|
|
"logps/rejected": -80.97958374023438,
|
|
"loss": 0.8952,
|
|
"margin_dpo/margin_mean": 10.706443786621094,
|
|
"margin_dpo/margin_std": 11.937873840332031,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.43537414965986393,
|
|
"fcm_dpo/beta": 0.0763639435172081,
|
|
"fcm_dpo/delta": 0.05032455921173096,
|
|
"fcm_dpo/margin": 7.232652187347412,
|
|
"fcm_dpo/q_t": 0.38707786798477173,
|
|
"grad_norm": 27.177230834960938,
|
|
"learning_rate": 3.490199415097892e-07,
|
|
"logits/chosen": 0.24442994594573975,
|
|
"logits/rejected": 0.19374153017997742,
|
|
"logps/chosen": -79.61978912353516,
|
|
"logps/ref_chosen": -65.3908462524414,
|
|
"logps/ref_rejected": -88.53607940673828,
|
|
"logps/rejected": -109.99766540527344,
|
|
"loss": 1.0832,
|
|
"margin_dpo/margin_mean": 7.2326507568359375,
|
|
"margin_dpo/margin_std": 11.782697677612305,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.436885865457294,
|
|
"fcm_dpo/beta": 0.07672218978404999,
|
|
"fcm_dpo/delta": -0.0018288381397724152,
|
|
"fcm_dpo/margin": 7.838685989379883,
|
|
"fcm_dpo/q_t": 0.3824378252029419,
|
|
"grad_norm": 24.673717498779297,
|
|
"learning_rate": 3.4780447936730247e-07,
|
|
"logits/chosen": 0.4334472417831421,
|
|
"logits/rejected": 0.3991202414035797,
|
|
"logps/chosen": -68.89795684814453,
|
|
"logps/ref_chosen": -54.5936279296875,
|
|
"logps/ref_rejected": -67.20855712890625,
|
|
"logps/rejected": -89.35157012939453,
|
|
"loss": 1.0778,
|
|
"margin_dpo/margin_mean": 7.838686943054199,
|
|
"margin_dpo/margin_std": 12.600842475891113,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.07465630769729614,
|
|
"fcm_dpo/delta": -0.12069503962993622,
|
|
"fcm_dpo/margin": 9.523682594299316,
|
|
"fcm_dpo/q_t": 0.3567933142185211,
|
|
"grad_norm": 27.99603271484375,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 0.444963276386261,
|
|
"logits/rejected": 0.3783135414123535,
|
|
"logps/chosen": -78.06217193603516,
|
|
"logps/ref_chosen": -61.38457489013672,
|
|
"logps/ref_rejected": -91.92778015136719,
|
|
"logps/rejected": -118.12904357910156,
|
|
"loss": 0.9965,
|
|
"margin_dpo/margin_mean": 9.523681640625,
|
|
"margin_dpo/margin_std": 13.034603118896484,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4399092970521542,
|
|
"fcm_dpo/beta": 0.07467788457870483,
|
|
"fcm_dpo/delta": -0.09996376186609268,
|
|
"fcm_dpo/margin": 9.23512077331543,
|
|
"fcm_dpo/q_t": 0.3609282970428467,
|
|
"grad_norm": 27.58074378967285,
|
|
"learning_rate": 3.4536538175334343e-07,
|
|
"logits/chosen": 0.5009055733680725,
|
|
"logits/rejected": 0.44027698040008545,
|
|
"logps/chosen": -66.63168334960938,
|
|
"logps/ref_chosen": -50.863037109375,
|
|
"logps/ref_rejected": -82.20868682861328,
|
|
"logps/rejected": -107.21245574951172,
|
|
"loss": 1.006,
|
|
"margin_dpo/margin_mean": 9.23512077331543,
|
|
"margin_dpo/margin_std": 12.100658416748047,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4414210128495843,
|
|
"fcm_dpo/beta": 0.07281124591827393,
|
|
"fcm_dpo/delta": -0.03628935664892197,
|
|
"fcm_dpo/margin": 8.707426071166992,
|
|
"fcm_dpo/q_t": 0.3691728711128235,
|
|
"grad_norm": 28.966171264648438,
|
|
"learning_rate": 3.4414181450867465e-07,
|
|
"logits/chosen": 0.40035903453826904,
|
|
"logits/rejected": 0.35217487812042236,
|
|
"logps/chosen": -79.63127136230469,
|
|
"logps/ref_chosen": -64.34888458251953,
|
|
"logps/ref_rejected": -72.86434173583984,
|
|
"logps/rejected": -96.85415649414062,
|
|
"loss": 1.0421,
|
|
"margin_dpo/margin_mean": 8.707426071166992,
|
|
"margin_dpo/margin_std": 12.978470802307129,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4429327286470144,
|
|
"fcm_dpo/beta": 0.07096080482006073,
|
|
"fcm_dpo/delta": -0.2058958262205124,
|
|
"fcm_dpo/margin": 11.13209342956543,
|
|
"fcm_dpo/q_t": 0.34235402941703796,
|
|
"grad_norm": 19.71794891357422,
|
|
"learning_rate": 3.4291561391508185e-07,
|
|
"logits/chosen": 0.4608747661113739,
|
|
"logits/rejected": 0.3821406662464142,
|
|
"logps/chosen": -71.60606384277344,
|
|
"logps/ref_chosen": -54.869468688964844,
|
|
"logps/ref_rejected": -81.858642578125,
|
|
"logps/rejected": -109.72734069824219,
|
|
"loss": 0.9815,
|
|
"margin_dpo/margin_mean": 11.132092475891113,
|
|
"margin_dpo/margin_std": 14.750845909118652,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"fcm_dpo/beta": 0.06872005015611649,
|
|
"fcm_dpo/delta": -0.03578688204288483,
|
|
"fcm_dpo/margin": 9.196503639221191,
|
|
"fcm_dpo/q_t": 0.37264391779899597,
|
|
"grad_norm": 20.966272354125977,
|
|
"learning_rate": 3.4168681427203153e-07,
|
|
"logits/chosen": 0.41916030645370483,
|
|
"logits/rejected": 0.3763779401779175,
|
|
"logps/chosen": -72.47305297851562,
|
|
"logps/ref_chosen": -56.670902252197266,
|
|
"logps/ref_rejected": -70.32819366455078,
|
|
"logps/rejected": -95.32685089111328,
|
|
"loss": 1.02,
|
|
"margin_dpo/margin_mean": 9.196502685546875,
|
|
"margin_dpo/margin_std": 12.933424949645996,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.06950195878744125,
|
|
"fcm_dpo/delta": 0.04927371069788933,
|
|
"fcm_dpo/margin": 7.9632248878479,
|
|
"fcm_dpo/q_t": 0.39062416553497314,
|
|
"grad_norm": 26.211135864257812,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 0.41103291511535645,
|
|
"logits/rejected": 0.32525143027305603,
|
|
"logps/chosen": -67.91084289550781,
|
|
"logps/ref_chosen": -50.40088653564453,
|
|
"logps/ref_rejected": -83.43521881103516,
|
|
"logps/rejected": -108.90840148925781,
|
|
"loss": 1.0986,
|
|
"margin_dpo/margin_mean": 7.963224411010742,
|
|
"margin_dpo/margin_std": 13.621759414672852,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.4474678760393046,
|
|
"fcm_dpo/beta": 0.06849856674671173,
|
|
"fcm_dpo/delta": -0.0997304916381836,
|
|
"fcm_dpo/margin": 10.102575302124023,
|
|
"fcm_dpo/q_t": 0.3605468273162842,
|
|
"grad_norm": 23.900463104248047,
|
|
"learning_rate": 3.392215553979679e-07,
|
|
"logits/chosen": 0.36223775148391724,
|
|
"logits/rejected": 0.32354265451431274,
|
|
"logps/chosen": -86.39460754394531,
|
|
"logps/ref_chosen": -69.15034484863281,
|
|
"logps/ref_rejected": -89.60166931152344,
|
|
"logps/rejected": -116.94850158691406,
|
|
"loss": 0.9945,
|
|
"margin_dpo/margin_mean": 10.102575302124023,
|
|
"margin_dpo/margin_std": 13.747122764587402,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.4489795918367347,
|
|
"fcm_dpo/beta": 0.06773459911346436,
|
|
"fcm_dpo/delta": -0.12187168002128601,
|
|
"fcm_dpo/margin": 10.525908470153809,
|
|
"fcm_dpo/q_t": 0.35121363401412964,
|
|
"grad_norm": 20.95337677001953,
|
|
"learning_rate": 3.3798516512554485e-07,
|
|
"logits/chosen": 0.4012449383735657,
|
|
"logits/rejected": 0.34180164337158203,
|
|
"logps/chosen": -76.49490356445312,
|
|
"logps/ref_chosen": -58.01630401611328,
|
|
"logps/ref_rejected": -69.95780944824219,
|
|
"logps/rejected": -98.96231079101562,
|
|
"loss": 0.9485,
|
|
"margin_dpo/margin_mean": 10.525908470153809,
|
|
"margin_dpo/margin_std": 12.319080352783203,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.4504913076341648,
|
|
"fcm_dpo/beta": 0.06668893992900848,
|
|
"fcm_dpo/delta": -0.0053915292955935,
|
|
"fcm_dpo/margin": 9.072604179382324,
|
|
"fcm_dpo/q_t": 0.3817402124404907,
|
|
"grad_norm": 23.481307983398438,
|
|
"learning_rate": 3.367463137189156e-07,
|
|
"logits/chosen": 0.5120540857315063,
|
|
"logits/rejected": 0.45104530453681946,
|
|
"logps/chosen": -73.73829650878906,
|
|
"logps/ref_chosen": -56.1693115234375,
|
|
"logps/ref_rejected": -68.55052185058594,
|
|
"logps/rejected": -95.19210815429688,
|
|
"loss": 1.1053,
|
|
"margin_dpo/margin_mean": 9.072603225708008,
|
|
"margin_dpo/margin_std": 15.78736686706543,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4520030234315949,
|
|
"fcm_dpo/beta": 0.0676107257604599,
|
|
"fcm_dpo/delta": 0.11564286053180695,
|
|
"fcm_dpo/margin": 7.249474048614502,
|
|
"fcm_dpo/q_t": 0.40451353788375854,
|
|
"grad_norm": 25.117929458618164,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": 0.33155155181884766,
|
|
"logits/rejected": 0.30397695302963257,
|
|
"logps/chosen": -79.69677734375,
|
|
"logps/ref_chosen": -62.31780242919922,
|
|
"logps/ref_rejected": -72.60028839111328,
|
|
"logps/rejected": -97.22874450683594,
|
|
"loss": 1.2026,
|
|
"margin_dpo/margin_mean": 7.24947452545166,
|
|
"margin_dpo/margin_std": 15.536123275756836,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.0678647980093956,
|
|
"fcm_dpo/delta": -0.053635694086551666,
|
|
"fcm_dpo/margin": 9.58050537109375,
|
|
"fcm_dpo/q_t": 0.3681472837924957,
|
|
"grad_norm": 23.66339874267578,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 0.4309826195240021,
|
|
"logits/rejected": 0.37077367305755615,
|
|
"logps/chosen": -77.88261413574219,
|
|
"logps/ref_chosen": -60.38157653808594,
|
|
"logps/ref_rejected": -75.45442199707031,
|
|
"logps/rejected": -102.53596496582031,
|
|
"loss": 1.0712,
|
|
"margin_dpo/margin_mean": 9.58050537109375,
|
|
"margin_dpo/margin_std": 15.316072463989258,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.455026455026455,
|
|
"fcm_dpo/beta": 0.06702058017253876,
|
|
"fcm_dpo/delta": -0.002799011766910553,
|
|
"fcm_dpo/margin": 8.974074363708496,
|
|
"fcm_dpo/q_t": 0.38000398874282837,
|
|
"grad_norm": 20.609079360961914,
|
|
"learning_rate": 3.3301533956555885e-07,
|
|
"logits/chosen": 0.4343421459197998,
|
|
"logits/rejected": 0.4045522212982178,
|
|
"logps/chosen": -69.66383361816406,
|
|
"logps/ref_chosen": -52.85089111328125,
|
|
"logps/ref_rejected": -69.97584533691406,
|
|
"logps/rejected": -95.76286315917969,
|
|
"loss": 1.0996,
|
|
"margin_dpo/margin_mean": 8.974075317382812,
|
|
"margin_dpo/margin_std": 15.309377670288086,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4565381708238851,
|
|
"fcm_dpo/beta": 0.0691293403506279,
|
|
"fcm_dpo/delta": 0.16866973042488098,
|
|
"fcm_dpo/margin": 6.349787712097168,
|
|
"fcm_dpo/q_t": 0.4120634198188782,
|
|
"grad_norm": 28.044296264648438,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 0.30643230676651,
|
|
"logits/rejected": 0.2559657692909241,
|
|
"logps/chosen": -85.57098388671875,
|
|
"logps/ref_chosen": -66.96650695800781,
|
|
"logps/ref_rejected": -88.09510803222656,
|
|
"logps/rejected": -113.04937744140625,
|
|
"loss": 1.2034,
|
|
"margin_dpo/margin_mean": 6.349788188934326,
|
|
"margin_dpo/margin_std": 14.239937782287598,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.4580498866213152,
|
|
"fcm_dpo/beta": 0.06739898025989532,
|
|
"fcm_dpo/delta": -0.25346097350120544,
|
|
"fcm_dpo/margin": 12.354694366455078,
|
|
"fcm_dpo/q_t": 0.33981257677078247,
|
|
"grad_norm": 21.23597526550293,
|
|
"learning_rate": 3.3051635489464793e-07,
|
|
"logits/chosen": 0.4204411506652832,
|
|
"logits/rejected": 0.35760125517845154,
|
|
"logps/chosen": -77.63226318359375,
|
|
"logps/ref_chosen": -62.12152862548828,
|
|
"logps/ref_rejected": -90.31204223632812,
|
|
"logps/rejected": -118.17748260498047,
|
|
"loss": 0.9833,
|
|
"margin_dpo/margin_mean": 12.354693412780762,
|
|
"margin_dpo/margin_std": 16.887344360351562,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.4595616024187453,
|
|
"fcm_dpo/beta": 0.0645943209528923,
|
|
"fcm_dpo/delta": -0.1871630847454071,
|
|
"fcm_dpo/margin": 11.962800979614258,
|
|
"fcm_dpo/q_t": 0.336465060710907,
|
|
"grad_norm": 20.540264129638672,
|
|
"learning_rate": 3.292634667444117e-07,
|
|
"logits/chosen": 0.4185236394405365,
|
|
"logits/rejected": 0.3664189577102661,
|
|
"logps/chosen": -74.27428436279297,
|
|
"logps/ref_chosen": -60.695091247558594,
|
|
"logps/ref_rejected": -78.2525405883789,
|
|
"logps/rejected": -103.79454040527344,
|
|
"loss": 0.9063,
|
|
"margin_dpo/margin_mean": 11.962800979614258,
|
|
"margin_dpo/margin_std": 12.955540657043457,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.06333063542842865,
|
|
"fcm_dpo/delta": -0.008959665894508362,
|
|
"fcm_dpo/margin": 9.583664894104004,
|
|
"fcm_dpo/q_t": 0.37659746408462524,
|
|
"grad_norm": 24.14905548095703,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 0.356367290019989,
|
|
"logits/rejected": 0.37138211727142334,
|
|
"logps/chosen": -89.701416015625,
|
|
"logps/ref_chosen": -72.69914245605469,
|
|
"logps/ref_rejected": -65.65670776367188,
|
|
"logps/rejected": -92.24266052246094,
|
|
"loss": 1.0968,
|
|
"margin_dpo/margin_mean": 9.58366584777832,
|
|
"margin_dpo/margin_std": 16.054397583007812,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46258503401360546,
|
|
"fcm_dpo/beta": 0.06377913057804108,
|
|
"fcm_dpo/delta": 0.05049442499876022,
|
|
"fcm_dpo/margin": 8.643620491027832,
|
|
"fcm_dpo/q_t": 0.38526010513305664,
|
|
"grad_norm": 20.86713981628418,
|
|
"learning_rate": 3.267510740432719e-07,
|
|
"logits/chosen": 0.4515618681907654,
|
|
"logits/rejected": 0.3459406793117523,
|
|
"logps/chosen": -69.68206787109375,
|
|
"logps/ref_chosen": -53.97052764892578,
|
|
"logps/ref_rejected": -71.02423095703125,
|
|
"logps/rejected": -95.37939453125,
|
|
"loss": 1.0697,
|
|
"margin_dpo/margin_mean": 8.643620491027832,
|
|
"margin_dpo/margin_std": 13.185958862304688,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.46409674981103555,
|
|
"fcm_dpo/beta": 0.06696438044309616,
|
|
"fcm_dpo/delta": 0.20386965572834015,
|
|
"fcm_dpo/margin": 6.0009446144104,
|
|
"fcm_dpo/q_t": 0.42609280347824097,
|
|
"grad_norm": 28.50688934326172,
|
|
"learning_rate": 3.2549163976939285e-07,
|
|
"logits/chosen": 0.4598926901817322,
|
|
"logits/rejected": 0.41593605279922485,
|
|
"logps/chosen": -70.69493865966797,
|
|
"logps/ref_chosen": -57.413108825683594,
|
|
"logps/ref_rejected": -68.68010711669922,
|
|
"logps/rejected": -87.96287536621094,
|
|
"loss": 1.2877,
|
|
"margin_dpo/margin_mean": 6.000943183898926,
|
|
"margin_dpo/margin_std": 16.271331787109375,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4656084656084656,
|
|
"fcm_dpo/beta": 0.06626871228218079,
|
|
"fcm_dpo/delta": -0.051046308130025864,
|
|
"fcm_dpo/margin": 9.75979232788086,
|
|
"fcm_dpo/q_t": 0.36723071336746216,
|
|
"grad_norm": 20.633642196655273,
|
|
"learning_rate": 3.2423009383206874e-07,
|
|
"logits/chosen": 0.41328901052474976,
|
|
"logits/rejected": 0.3950084447860718,
|
|
"logps/chosen": -80.57633972167969,
|
|
"logps/ref_chosen": -66.59879302978516,
|
|
"logps/ref_rejected": -74.337158203125,
|
|
"logps/rejected": -98.07449340820312,
|
|
"loss": 1.0541,
|
|
"margin_dpo/margin_mean": 9.75979232788086,
|
|
"margin_dpo/margin_std": 14.91418170928955,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4671201814058957,
|
|
"fcm_dpo/beta": 0.06552757322788239,
|
|
"fcm_dpo/delta": -0.05448343604803085,
|
|
"fcm_dpo/margin": 9.916336059570312,
|
|
"fcm_dpo/q_t": 0.36798107624053955,
|
|
"grad_norm": 24.828540802001953,
|
|
"learning_rate": 3.229664715194511e-07,
|
|
"logits/chosen": 0.47302621603012085,
|
|
"logits/rejected": 0.4183216691017151,
|
|
"logps/chosen": -81.97295379638672,
|
|
"logps/ref_chosen": -65.39474487304688,
|
|
"logps/ref_rejected": -75.70930480957031,
|
|
"logps/rejected": -102.20384979248047,
|
|
"loss": 1.0241,
|
|
"margin_dpo/margin_mean": 9.916337966918945,
|
|
"margin_dpo/margin_std": 14.151466369628906,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.0674559623003006,
|
|
"fcm_dpo/delta": 0.19796836376190186,
|
|
"fcm_dpo/margin": 6.087133407592773,
|
|
"fcm_dpo/q_t": 0.4217107594013214,
|
|
"grad_norm": 27.96128273010254,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 0.45399510860443115,
|
|
"logits/rejected": 0.4443548321723938,
|
|
"logps/chosen": -91.49375915527344,
|
|
"logps/ref_chosen": -74.66827392578125,
|
|
"logps/ref_rejected": -80.5689697265625,
|
|
"logps/rejected": -103.48159790039062,
|
|
"loss": 1.2421,
|
|
"margin_dpo/margin_mean": 6.087133407592773,
|
|
"margin_dpo/margin_std": 14.875473976135254,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47014361300075586,
|
|
"fcm_dpo/beta": 0.06830902397632599,
|
|
"fcm_dpo/delta": 0.02322382479906082,
|
|
"fcm_dpo/margin": 8.457318305969238,
|
|
"fcm_dpo/q_t": 0.38491734862327576,
|
|
"grad_norm": 25.18285369873047,
|
|
"learning_rate": 3.204331392103574e-07,
|
|
"logits/chosen": 0.3568248450756073,
|
|
"logits/rejected": 0.24125118553638458,
|
|
"logps/chosen": -72.52886962890625,
|
|
"logps/ref_chosen": -59.738033294677734,
|
|
"logps/ref_rejected": -93.60757446289062,
|
|
"logps/rejected": -114.85572814941406,
|
|
"loss": 1.0963,
|
|
"margin_dpo/margin_mean": 8.457318305969238,
|
|
"margin_dpo/margin_std": 14.571065902709961,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.47165532879818595,
|
|
"fcm_dpo/beta": 0.06694841384887695,
|
|
"fcm_dpo/delta": -0.21142852306365967,
|
|
"fcm_dpo/margin": 11.876443862915039,
|
|
"fcm_dpo/q_t": 0.3356952965259552,
|
|
"grad_norm": 20.244081497192383,
|
|
"learning_rate": 3.1916350007663176e-07,
|
|
"logits/chosen": 0.43080633878707886,
|
|
"logits/rejected": 0.3392553925514221,
|
|
"logps/chosen": -67.07718658447266,
|
|
"logps/ref_chosen": -53.816436767578125,
|
|
"logps/ref_rejected": -68.6575698852539,
|
|
"logps/rejected": -93.79476928710938,
|
|
"loss": 0.9214,
|
|
"margin_dpo/margin_mean": 11.876442909240723,
|
|
"margin_dpo/margin_std": 13.817426681518555,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.47316704459561604,
|
|
"fcm_dpo/beta": 0.06668175011873245,
|
|
"fcm_dpo/delta": 0.09295180439949036,
|
|
"fcm_dpo/margin": 7.676837921142578,
|
|
"fcm_dpo/q_t": 0.4016116261482239,
|
|
"grad_norm": 23.693931579589844,
|
|
"learning_rate": 3.178919262911314e-07,
|
|
"logits/chosen": 0.4855688810348511,
|
|
"logits/rejected": 0.4634004533290863,
|
|
"logps/chosen": -72.90442657470703,
|
|
"logps/ref_chosen": -59.957359313964844,
|
|
"logps/ref_rejected": -69.31729888916016,
|
|
"logps/rejected": -89.94120788574219,
|
|
"loss": 1.173,
|
|
"margin_dpo/margin_mean": 7.676837921142578,
|
|
"margin_dpo/margin_std": 15.700519561767578,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.47467876039304613,
|
|
"fcm_dpo/beta": 0.06443378329277039,
|
|
"fcm_dpo/delta": -0.2552639842033386,
|
|
"fcm_dpo/margin": 12.927923202514648,
|
|
"fcm_dpo/q_t": 0.33266928791999817,
|
|
"grad_norm": 20.972042083740234,
|
|
"learning_rate": 3.166184534225087e-07,
|
|
"logits/chosen": 0.42626792192459106,
|
|
"logits/rejected": 0.44356074929237366,
|
|
"logps/chosen": -82.15957641601562,
|
|
"logps/ref_chosen": -70.26815795898438,
|
|
"logps/ref_rejected": -69.23971557617188,
|
|
"logps/rejected": -94.05905151367188,
|
|
"loss": 0.8906,
|
|
"margin_dpo/margin_mean": 12.927923202514648,
|
|
"margin_dpo/margin_std": 14.38834285736084,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.0640818327665329,
|
|
"fcm_dpo/delta": -0.038807280361652374,
|
|
"fcm_dpo/margin": 9.904523849487305,
|
|
"fcm_dpo/q_t": 0.36713922023773193,
|
|
"grad_norm": 20.750200271606445,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 0.3727494478225708,
|
|
"logits/rejected": 0.3308834433555603,
|
|
"logps/chosen": -81.04085540771484,
|
|
"logps/ref_chosen": -67.79469299316406,
|
|
"logps/ref_rejected": -74.55148315429688,
|
|
"logps/rejected": -97.70216369628906,
|
|
"loss": 1.0285,
|
|
"margin_dpo/margin_mean": 9.904522895812988,
|
|
"margin_dpo/margin_std": 13.835953712463379,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.47770219198790626,
|
|
"fcm_dpo/beta": 0.062294527888298035,
|
|
"fcm_dpo/delta": -0.20774495601654053,
|
|
"fcm_dpo/margin": 12.683357238769531,
|
|
"fcm_dpo/q_t": 0.33479374647140503,
|
|
"grad_norm": 22.39960289001465,
|
|
"learning_rate": 3.1406595297511564e-07,
|
|
"logits/chosen": 0.34574979543685913,
|
|
"logits/rejected": 0.23477637767791748,
|
|
"logps/chosen": -67.19189453125,
|
|
"logps/ref_chosen": -55.288482666015625,
|
|
"logps/ref_rejected": -96.15723419189453,
|
|
"logps/rejected": -120.74400329589844,
|
|
"loss": 0.9071,
|
|
"margin_dpo/margin_mean": 12.683357238769531,
|
|
"margin_dpo/margin_std": 13.058753967285156,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.47921390778533635,
|
|
"fcm_dpo/beta": 0.058482684195041656,
|
|
"fcm_dpo/delta": -0.22425857186317444,
|
|
"fcm_dpo/margin": 13.763182640075684,
|
|
"fcm_dpo/q_t": 0.33089691400527954,
|
|
"grad_norm": 21.47138786315918,
|
|
"learning_rate": 3.1278699679526975e-07,
|
|
"logits/chosen": 0.46589696407318115,
|
|
"logits/rejected": 0.4175897240638733,
|
|
"logps/chosen": -65.06788635253906,
|
|
"logps/ref_chosen": -54.58137512207031,
|
|
"logps/ref_rejected": -72.77232360839844,
|
|
"logps/rejected": -97.02201843261719,
|
|
"loss": 0.8863,
|
|
"margin_dpo/margin_mean": 13.763182640075684,
|
|
"margin_dpo/margin_std": 14.472084045410156,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.48072562358276644,
|
|
"fcm_dpo/beta": 0.05881628394126892,
|
|
"fcm_dpo/delta": 0.09629607200622559,
|
|
"fcm_dpo/margin": 8.648832321166992,
|
|
"fcm_dpo/q_t": 0.4013023376464844,
|
|
"grad_norm": 22.732677459716797,
|
|
"learning_rate": 3.1150628432815336e-07,
|
|
"logits/chosen": 0.4817764163017273,
|
|
"logits/rejected": 0.420946329832077,
|
|
"logps/chosen": -66.19764709472656,
|
|
"logps/ref_chosen": -52.88822937011719,
|
|
"logps/ref_rejected": -80.63988494873047,
|
|
"logps/rejected": -102.59813690185547,
|
|
"loss": 1.2008,
|
|
"margin_dpo/margin_mean": 8.648832321166992,
|
|
"margin_dpo/margin_std": 18.586837768554688,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.48223733938019653,
|
|
"fcm_dpo/beta": 0.058495644479990005,
|
|
"fcm_dpo/delta": -0.06954564899206161,
|
|
"fcm_dpo/margin": 11.364679336547852,
|
|
"fcm_dpo/q_t": 0.3667501211166382,
|
|
"grad_norm": 20.766460418701172,
|
|
"learning_rate": 3.1022385139804707e-07,
|
|
"logits/chosen": 0.4081874191761017,
|
|
"logits/rejected": 0.3880736231803894,
|
|
"logps/chosen": -75.96013641357422,
|
|
"logps/ref_chosen": -64.36333465576172,
|
|
"logps/ref_rejected": -79.47296142578125,
|
|
"logps/rejected": -102.43443298339844,
|
|
"loss": 1.0272,
|
|
"margin_dpo/margin_mean": 11.364680290222168,
|
|
"margin_dpo/margin_std": 16.68465805053711,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.05962809920310974,
|
|
"fcm_dpo/delta": 0.043348222970962524,
|
|
"fcm_dpo/margin": 9.29432201385498,
|
|
"fcm_dpo/q_t": 0.3930048942565918,
|
|
"grad_norm": 21.635061264038086,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 0.32784324884414673,
|
|
"logits/rejected": 0.2866894602775574,
|
|
"logps/chosen": -60.18971633911133,
|
|
"logps/ref_chosen": -49.558746337890625,
|
|
"logps/ref_rejected": -71.23444366455078,
|
|
"logps/rejected": -91.15972900390625,
|
|
"loss": 1.1116,
|
|
"margin_dpo/margin_mean": 9.29432201385498,
|
|
"margin_dpo/margin_std": 15.848209381103516,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4852607709750567,
|
|
"fcm_dpo/beta": 0.05789444223046303,
|
|
"fcm_dpo/delta": -0.04901197552680969,
|
|
"fcm_dpo/margin": 11.1171236038208,
|
|
"fcm_dpo/q_t": 0.3659276068210602,
|
|
"grad_norm": 22.60474967956543,
|
|
"learning_rate": 3.0765396768561004e-07,
|
|
"logits/chosen": 0.40224313735961914,
|
|
"logits/rejected": 0.3855370283126831,
|
|
"logps/chosen": -64.09539794921875,
|
|
"logps/ref_chosen": -52.08526611328125,
|
|
"logps/ref_rejected": -55.58674621582031,
|
|
"logps/rejected": -78.71399688720703,
|
|
"loss": 1.0382,
|
|
"margin_dpo/margin_mean": 11.1171236038208,
|
|
"margin_dpo/margin_std": 15.96502685546875,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.48677248677248675,
|
|
"fcm_dpo/beta": 0.056709811091423035,
|
|
"fcm_dpo/delta": -0.2197633981704712,
|
|
"fcm_dpo/margin": 14.153626441955566,
|
|
"fcm_dpo/q_t": 0.33018940687179565,
|
|
"grad_norm": 18.229646682739258,
|
|
"learning_rate": 3.063665887884511e-07,
|
|
"logits/chosen": 0.466824471950531,
|
|
"logits/rejected": 0.39176371693611145,
|
|
"logps/chosen": -61.10657501220703,
|
|
"logps/ref_chosen": -47.404109954833984,
|
|
"logps/ref_rejected": -73.4260025024414,
|
|
"logps/rejected": -101.28208923339844,
|
|
"loss": 0.888,
|
|
"margin_dpo/margin_mean": 14.153627395629883,
|
|
"margin_dpo/margin_std": 14.78386116027832,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.48828420256991684,
|
|
"fcm_dpo/beta": 0.05662210285663605,
|
|
"fcm_dpo/delta": 0.04281177371740341,
|
|
"fcm_dpo/margin": 9.853229522705078,
|
|
"fcm_dpo/q_t": 0.3951577842235565,
|
|
"grad_norm": 21.729673385620117,
|
|
"learning_rate": 3.0507763319663517e-07,
|
|
"logits/chosen": 0.36065399646759033,
|
|
"logits/rejected": 0.29074978828430176,
|
|
"logps/chosen": -84.03905487060547,
|
|
"logps/ref_chosen": -70.00630187988281,
|
|
"logps/ref_rejected": -86.96690368652344,
|
|
"logps/rejected": -110.8528823852539,
|
|
"loss": 1.1531,
|
|
"margin_dpo/margin_mean": 9.853229522705078,
|
|
"margin_dpo/margin_std": 19.110153198242188,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.4897959183673469,
|
|
"fcm_dpo/beta": 0.055441729724407196,
|
|
"fcm_dpo/delta": -0.06376861035823822,
|
|
"fcm_dpo/margin": 11.880046844482422,
|
|
"fcm_dpo/q_t": 0.3605431914329529,
|
|
"grad_norm": 22.298242568969727,
|
|
"learning_rate": 3.0378713696502097e-07,
|
|
"logits/chosen": 0.48270517587661743,
|
|
"logits/rejected": 0.4262162446975708,
|
|
"logps/chosen": -67.59870147705078,
|
|
"logps/ref_chosen": -55.88882064819336,
|
|
"logps/ref_rejected": -75.23088073730469,
|
|
"logps/rejected": -98.82080078125,
|
|
"loss": 0.9783,
|
|
"margin_dpo/margin_mean": 11.880046844482422,
|
|
"margin_dpo/margin_std": 14.770977973937988,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.05457047373056412,
|
|
"fcm_dpo/delta": -0.04917249083518982,
|
|
"fcm_dpo/margin": 11.787601470947266,
|
|
"fcm_dpo/q_t": 0.3657293915748596,
|
|
"grad_norm": 19.67741584777832,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 0.43900197744369507,
|
|
"logits/rejected": 0.37955278158187866,
|
|
"logps/chosen": -79.83528137207031,
|
|
"logps/ref_chosen": -64.14701843261719,
|
|
"logps/ref_rejected": -79.91143798828125,
|
|
"logps/rejected": -107.38729858398438,
|
|
"loss": 1.0173,
|
|
"margin_dpo/margin_mean": 11.787601470947266,
|
|
"margin_dpo/margin_std": 16.2464542388916,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4928193499622071,
|
|
"fcm_dpo/beta": 0.056850794702768326,
|
|
"fcm_dpo/delta": 0.23083430528640747,
|
|
"fcm_dpo/margin": 6.6530256271362305,
|
|
"fcm_dpo/q_t": 0.42433783411979675,
|
|
"grad_norm": 23.759443283081055,
|
|
"learning_rate": 3.012016670162977e-07,
|
|
"logits/chosen": 0.40460771322250366,
|
|
"logits/rejected": 0.40543869137763977,
|
|
"logps/chosen": -95.39392852783203,
|
|
"logps/ref_chosen": -75.53131103515625,
|
|
"logps/ref_rejected": -76.5898666381836,
|
|
"logps/rejected": -103.10550689697266,
|
|
"loss": 1.2501,
|
|
"margin_dpo/margin_mean": 6.6530256271362305,
|
|
"margin_dpo/margin_std": 16.712099075317383,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4943310657596372,
|
|
"fcm_dpo/beta": 0.05837913602590561,
|
|
"fcm_dpo/delta": 0.055630847811698914,
|
|
"fcm_dpo/margin": 9.364798545837402,
|
|
"fcm_dpo/q_t": 0.39527618885040283,
|
|
"grad_norm": 25.109912872314453,
|
|
"learning_rate": 2.99906765620341e-07,
|
|
"logits/chosen": 0.35513800382614136,
|
|
"logits/rejected": 0.31463754177093506,
|
|
"logps/chosen": -87.85820770263672,
|
|
"logps/ref_chosen": -69.33717346191406,
|
|
"logps/ref_rejected": -73.37751770019531,
|
|
"logps/rejected": -101.26336669921875,
|
|
"loss": 1.1821,
|
|
"margin_dpo/margin_mean": 9.364799499511719,
|
|
"margin_dpo/margin_std": 19.344242095947266,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4958427815570673,
|
|
"fcm_dpo/beta": 0.05786886066198349,
|
|
"fcm_dpo/delta": -0.04748005419969559,
|
|
"fcm_dpo/margin": 11.136017799377441,
|
|
"fcm_dpo/q_t": 0.37086835503578186,
|
|
"grad_norm": 21.607135772705078,
|
|
"learning_rate": 2.9861046822486766e-07,
|
|
"logits/chosen": 0.4023295044898987,
|
|
"logits/rejected": 0.37287038564682007,
|
|
"logps/chosen": -76.78651428222656,
|
|
"logps/ref_chosen": -61.70623016357422,
|
|
"logps/ref_rejected": -83.73808288574219,
|
|
"logps/rejected": -109.95437622070312,
|
|
"loss": 1.0223,
|
|
"margin_dpo/margin_mean": 11.136016845703125,
|
|
"margin_dpo/margin_std": 16.010103225708008,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4973544973544973,
|
|
"fcm_dpo/beta": 0.056619107723236084,
|
|
"fcm_dpo/delta": -0.11470720171928406,
|
|
"fcm_dpo/margin": 12.474262237548828,
|
|
"fcm_dpo/q_t": 0.35940489172935486,
|
|
"grad_norm": 22.656681060791016,
|
|
"learning_rate": 2.9731281109010253e-07,
|
|
"logits/chosen": 0.49048396944999695,
|
|
"logits/rejected": 0.4374903738498688,
|
|
"logps/chosen": -82.60576629638672,
|
|
"logps/ref_chosen": -64.4984130859375,
|
|
"logps/ref_rejected": -83.6591796875,
|
|
"logps/rejected": -114.24079132080078,
|
|
"loss": 0.9853,
|
|
"margin_dpo/margin_mean": 12.474262237548828,
|
|
"margin_dpo/margin_std": 16.834518432617188,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.05554806441068649,
|
|
"fcm_dpo/delta": -0.13988427817821503,
|
|
"fcm_dpo/margin": 13.138544082641602,
|
|
"fcm_dpo/q_t": 0.3539305031299591,
|
|
"grad_norm": 19.861940383911133,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 0.4434637427330017,
|
|
"logits/rejected": 0.37669873237609863,
|
|
"logps/chosen": -69.35079193115234,
|
|
"logps/ref_chosen": -54.80464172363281,
|
|
"logps/ref_rejected": -75.3194351196289,
|
|
"logps/rejected": -103.00413513183594,
|
|
"loss": 1.0449,
|
|
"margin_dpo/margin_mean": 13.138545036315918,
|
|
"margin_dpo/margin_std": 19.897079467773438,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5003779289493575,
|
|
"fcm_dpo/beta": 0.05216227471828461,
|
|
"fcm_dpo/delta": -0.3423612713813782,
|
|
"fcm_dpo/margin": 17.468751907348633,
|
|
"fcm_dpo/q_t": 0.31727951765060425,
|
|
"grad_norm": 18.028335571289062,
|
|
"learning_rate": 2.947135628327544e-07,
|
|
"logits/chosen": 0.5425066947937012,
|
|
"logits/rejected": 0.5136238932609558,
|
|
"logps/chosen": -75.56310272216797,
|
|
"logps/ref_chosen": -59.242584228515625,
|
|
"logps/ref_rejected": -69.87483215332031,
|
|
"logps/rejected": -103.66410827636719,
|
|
"loss": 0.8929,
|
|
"margin_dpo/margin_mean": 17.468753814697266,
|
|
"margin_dpo/margin_std": 20.137441635131836,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.5018896447467877,
|
|
"fcm_dpo/beta": 0.050989434123039246,
|
|
"fcm_dpo/delta": -0.1064976155757904,
|
|
"fcm_dpo/margin": 13.684735298156738,
|
|
"fcm_dpo/q_t": 0.3593630790710449,
|
|
"grad_norm": 18.97589111328125,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": 0.4010659456253052,
|
|
"logits/rejected": 0.3557261824607849,
|
|
"logps/chosen": -85.42317199707031,
|
|
"logps/ref_chosen": -67.10975646972656,
|
|
"logps/ref_rejected": -77.11839294433594,
|
|
"logps/rejected": -109.11653900146484,
|
|
"loss": 0.9728,
|
|
"margin_dpo/margin_mean": 13.684735298156738,
|
|
"margin_dpo/margin_std": 17.230125427246094,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.5034013605442177,
|
|
"fcm_dpo/beta": 0.04889250546693802,
|
|
"fcm_dpo/delta": -0.1948154717683792,
|
|
"fcm_dpo/margin": 15.953669548034668,
|
|
"fcm_dpo/q_t": 0.3400271236896515,
|
|
"grad_norm": 16.75796890258789,
|
|
"learning_rate": 2.921093116725076e-07,
|
|
"logits/chosen": 0.47107183933258057,
|
|
"logits/rejected": 0.40521296858787537,
|
|
"logps/chosen": -77.97359466552734,
|
|
"logps/ref_chosen": -58.381134033203125,
|
|
"logps/ref_rejected": -85.02839660644531,
|
|
"logps/rejected": -120.57453155517578,
|
|
"loss": 0.9096,
|
|
"margin_dpo/margin_mean": 15.953670501708984,
|
|
"margin_dpo/margin_std": 17.924297332763672,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.5049130763416477,
|
|
"fcm_dpo/beta": 0.04890444874763489,
|
|
"fcm_dpo/delta": 0.07669065147638321,
|
|
"fcm_dpo/margin": 10.776724815368652,
|
|
"fcm_dpo/q_t": 0.3956913352012634,
|
|
"grad_norm": 21.935901641845703,
|
|
"learning_rate": 2.9080540104031484e-07,
|
|
"logits/chosen": 0.47279083728790283,
|
|
"logits/rejected": 0.4287077784538269,
|
|
"logps/chosen": -87.2547378540039,
|
|
"logps/ref_chosen": -66.89199829101562,
|
|
"logps/ref_rejected": -91.83695220947266,
|
|
"logps/rejected": -122.97640991210938,
|
|
"loss": 1.1689,
|
|
"margin_dpo/margin_mean": 10.776723861694336,
|
|
"margin_dpo/margin_std": 21.717811584472656,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.048929620534181595,
|
|
"fcm_dpo/delta": -0.038483135402202606,
|
|
"fcm_dpo/margin": 12.993217468261719,
|
|
"fcm_dpo/q_t": 0.37484511733055115,
|
|
"grad_norm": 22.736284255981445,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 0.4713771939277649,
|
|
"logits/rejected": 0.4355486035346985,
|
|
"logps/chosen": -81.71247100830078,
|
|
"logps/ref_chosen": -61.51445770263672,
|
|
"logps/ref_rejected": -75.68916320800781,
|
|
"logps/rejected": -108.8803939819336,
|
|
"loss": 1.0595,
|
|
"margin_dpo/margin_mean": 12.993217468261719,
|
|
"margin_dpo/margin_std": 20.287212371826172,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"fcm_dpo/beta": 0.04836486279964447,
|
|
"fcm_dpo/delta": 0.02678491175174713,
|
|
"fcm_dpo/margin": 11.85739517211914,
|
|
"fcm_dpo/q_t": 0.38848936557769775,
|
|
"grad_norm": 20.710521697998047,
|
|
"learning_rate": 2.8819419203668675e-07,
|
|
"logits/chosen": 0.40805622935295105,
|
|
"logits/rejected": 0.3888956606388092,
|
|
"logps/chosen": -94.53564453125,
|
|
"logps/ref_chosen": -68.85006713867188,
|
|
"logps/ref_rejected": -92.99603271484375,
|
|
"logps/rejected": -130.53900146484375,
|
|
"loss": 1.0894,
|
|
"margin_dpo/margin_mean": 11.857397079467773,
|
|
"margin_dpo/margin_std": 19.84499740600586,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.509448223733938,
|
|
"fcm_dpo/beta": 0.049236997961997986,
|
|
"fcm_dpo/delta": 0.05038725584745407,
|
|
"fcm_dpo/margin": 11.218805313110352,
|
|
"fcm_dpo/q_t": 0.3871212899684906,
|
|
"grad_norm": 19.411130905151367,
|
|
"learning_rate": 2.8688696670638053e-07,
|
|
"logits/chosen": 0.3875211775302887,
|
|
"logits/rejected": 0.35309669375419617,
|
|
"logps/chosen": -97.92208862304688,
|
|
"logps/ref_chosen": -73.18783569335938,
|
|
"logps/ref_rejected": -86.89118957519531,
|
|
"logps/rejected": -122.84425354003906,
|
|
"loss": 1.0883,
|
|
"margin_dpo/margin_mean": 11.218805313110352,
|
|
"margin_dpo/margin_std": 18.6793212890625,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.5109599395313681,
|
|
"fcm_dpo/beta": 0.04963863641023636,
|
|
"fcm_dpo/delta": 0.037454307079315186,
|
|
"fcm_dpo/margin": 11.375207901000977,
|
|
"fcm_dpo/q_t": 0.38713350892066956,
|
|
"grad_norm": 20.63473129272461,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 0.42949801683425903,
|
|
"logits/rejected": 0.4031613767147064,
|
|
"logps/chosen": -88.67749786376953,
|
|
"logps/ref_chosen": -63.939613342285156,
|
|
"logps/ref_rejected": -75.34243774414062,
|
|
"logps/rejected": -111.45553588867188,
|
|
"loss": 1.1082,
|
|
"margin_dpo/margin_mean": 11.37520694732666,
|
|
"margin_dpo/margin_std": 20.042879104614258,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.5124716553287982,
|
|
"fcm_dpo/beta": 0.04877196252346039,
|
|
"fcm_dpo/delta": -0.09286697953939438,
|
|
"fcm_dpo/margin": 14.053956031799316,
|
|
"fcm_dpo/q_t": 0.36065682768821716,
|
|
"grad_norm": 19.03911781311035,
|
|
"learning_rate": 2.842694572172736e-07,
|
|
"logits/chosen": 0.5791463255882263,
|
|
"logits/rejected": 0.4974604845046997,
|
|
"logps/chosen": -65.61044311523438,
|
|
"logps/ref_chosen": -45.54913330078125,
|
|
"logps/ref_rejected": -67.0482177734375,
|
|
"logps/rejected": -101.16348266601562,
|
|
"loss": 0.9885,
|
|
"margin_dpo/margin_mean": 14.053956985473633,
|
|
"margin_dpo/margin_std": 18.47199058532715,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.048321135342121124,
|
|
"fcm_dpo/delta": -0.12813809514045715,
|
|
"fcm_dpo/margin": 14.868640899658203,
|
|
"fcm_dpo/q_t": 0.36593562364578247,
|
|
"grad_norm": 19.047794342041016,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 0.4682982563972473,
|
|
"logits/rejected": 0.44503962993621826,
|
|
"logps/chosen": -78.10293579101562,
|
|
"logps/ref_chosen": -54.00564956665039,
|
|
"logps/ref_rejected": -61.314430236816406,
|
|
"logps/rejected": -100.28036499023438,
|
|
"loss": 1.065,
|
|
"margin_dpo/margin_mean": 14.868640899658203,
|
|
"margin_dpo/margin_std": 23.708301544189453,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5154950869236583,
|
|
"fcm_dpo/beta": 0.04579398036003113,
|
|
"fcm_dpo/delta": -0.11630547046661377,
|
|
"fcm_dpo/margin": 15.230375289916992,
|
|
"fcm_dpo/q_t": 0.3636672794818878,
|
|
"grad_norm": 19.403221130371094,
|
|
"learning_rate": 2.816481133934373e-07,
|
|
"logits/chosen": 0.48653745651245117,
|
|
"logits/rejected": 0.4402218759059906,
|
|
"logps/chosen": -86.47044372558594,
|
|
"logps/ref_chosen": -63.39509582519531,
|
|
"logps/ref_rejected": -76.20973205566406,
|
|
"logps/rejected": -114.51545715332031,
|
|
"loss": 1.0637,
|
|
"margin_dpo/margin_mean": 15.230375289916992,
|
|
"margin_dpo/margin_std": 22.663076400756836,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5170068027210885,
|
|
"fcm_dpo/beta": 0.04497259855270386,
|
|
"fcm_dpo/delta": -0.1905035674571991,
|
|
"fcm_dpo/margin": 17.22130012512207,
|
|
"fcm_dpo/q_t": 0.348552405834198,
|
|
"grad_norm": 16.44388198852539,
|
|
"learning_rate": 2.8033609524527046e-07,
|
|
"logits/chosen": 0.5286259055137634,
|
|
"logits/rejected": 0.48626136779785156,
|
|
"logps/chosen": -76.10055541992188,
|
|
"logps/ref_chosen": -53.047813415527344,
|
|
"logps/ref_rejected": -68.2854232788086,
|
|
"logps/rejected": -108.55946350097656,
|
|
"loss": 0.954,
|
|
"margin_dpo/margin_mean": 17.22130012512207,
|
|
"margin_dpo/margin_std": 21.990345001220703,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5185185185185185,
|
|
"fcm_dpo/beta": 0.04516913741827011,
|
|
"fcm_dpo/delta": 0.15312719345092773,
|
|
"fcm_dpo/margin": 10.011658668518066,
|
|
"fcm_dpo/q_t": 0.40422749519348145,
|
|
"grad_norm": 18.471750259399414,
|
|
"learning_rate": 2.7902322853130753e-07,
|
|
"logits/chosen": 0.4091396927833557,
|
|
"logits/rejected": 0.40280401706695557,
|
|
"logps/chosen": -92.39411926269531,
|
|
"logps/ref_chosen": -70.57852935791016,
|
|
"logps/ref_rejected": -84.73873901367188,
|
|
"logps/rejected": -116.56597900390625,
|
|
"loss": 1.1685,
|
|
"margin_dpo/margin_mean": 10.011658668518066,
|
|
"margin_dpo/margin_std": 19.0828800201416,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5200302343159486,
|
|
"fcm_dpo/beta": 0.04532856121659279,
|
|
"fcm_dpo/delta": -0.08428593724966049,
|
|
"fcm_dpo/margin": 14.950438499450684,
|
|
"fcm_dpo/q_t": 0.3651253283023834,
|
|
"grad_norm": 19.586933135986328,
|
|
"learning_rate": 2.7770954997525274e-07,
|
|
"logits/chosen": 0.5108197927474976,
|
|
"logits/rejected": 0.4525598883628845,
|
|
"logps/chosen": -82.04597473144531,
|
|
"logps/ref_chosen": -55.811004638671875,
|
|
"logps/ref_rejected": -84.77637481689453,
|
|
"logps/rejected": -125.96177673339844,
|
|
"loss": 1.0386,
|
|
"margin_dpo/margin_mean": 14.950438499450684,
|
|
"margin_dpo/margin_std": 22.50165557861328,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.04496072232723236,
|
|
"fcm_dpo/delta": -0.06477053463459015,
|
|
"fcm_dpo/margin": 14.691195487976074,
|
|
"fcm_dpo/q_t": 0.3674304783344269,
|
|
"grad_norm": 18.69040870666504,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 0.5651530623435974,
|
|
"logits/rejected": 0.5218570232391357,
|
|
"logps/chosen": -76.76480102539062,
|
|
"logps/ref_chosen": -57.78609848022461,
|
|
"logps/ref_rejected": -78.91847229003906,
|
|
"logps/rejected": -112.58836364746094,
|
|
"loss": 1.038,
|
|
"margin_dpo/margin_mean": 14.691194534301758,
|
|
"margin_dpo/margin_std": 22.197650909423828,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5230536659108088,
|
|
"fcm_dpo/beta": 0.04380536079406738,
|
|
"fcm_dpo/delta": -0.18842804431915283,
|
|
"fcm_dpo/margin": 17.673648834228516,
|
|
"fcm_dpo/q_t": 0.3476044535636902,
|
|
"grad_norm": 19.525056838989258,
|
|
"learning_rate": 2.7507990434420123e-07,
|
|
"logits/chosen": 0.5473288297653198,
|
|
"logits/rejected": 0.47578173875808716,
|
|
"logps/chosen": -77.24856567382812,
|
|
"logps/ref_chosen": -56.285125732421875,
|
|
"logps/ref_rejected": -91.15303039550781,
|
|
"logps/rejected": -129.7901153564453,
|
|
"loss": 0.9867,
|
|
"margin_dpo/margin_mean": 17.673648834228516,
|
|
"margin_dpo/margin_std": 23.69552993774414,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5245653817082389,
|
|
"fcm_dpo/beta": 0.04321366548538208,
|
|
"fcm_dpo/delta": 0.0015529077500104904,
|
|
"fcm_dpo/margin": 13.848976135253906,
|
|
"fcm_dpo/q_t": 0.385122686624527,
|
|
"grad_norm": 19.80738639831543,
|
|
"learning_rate": 2.737640108260456e-07,
|
|
"logits/chosen": 0.6206649541854858,
|
|
"logits/rejected": 0.5723800659179688,
|
|
"logps/chosen": -78.5164566040039,
|
|
"logps/ref_chosen": -53.499542236328125,
|
|
"logps/ref_rejected": -72.52565002441406,
|
|
"logps/rejected": -111.39154815673828,
|
|
"loss": 1.0826,
|
|
"margin_dpo/margin_mean": 13.848976135253906,
|
|
"margin_dpo/margin_std": 23.184226989746094,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5260770975056689,
|
|
"fcm_dpo/beta": 0.04169192165136337,
|
|
"fcm_dpo/delta": -0.18153540790081024,
|
|
"fcm_dpo/margin": 18.360071182250977,
|
|
"fcm_dpo/q_t": 0.35915493965148926,
|
|
"grad_norm": 17.797361373901367,
|
|
"learning_rate": 2.724474525774229e-07,
|
|
"logits/chosen": 0.6300745606422424,
|
|
"logits/rejected": 0.600340723991394,
|
|
"logps/chosen": -72.2784194946289,
|
|
"logps/ref_chosen": -50.78684997558594,
|
|
"logps/ref_rejected": -68.63732147216797,
|
|
"logps/rejected": -108.48896026611328,
|
|
"loss": 1.005,
|
|
"margin_dpo/margin_mean": 18.360071182250977,
|
|
"margin_dpo/margin_std": 26.658245086669922,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.527588813303099,
|
|
"fcm_dpo/beta": 0.041053976863622665,
|
|
"fcm_dpo/delta": -0.12168022990226746,
|
|
"fcm_dpo/margin": 17.36975860595703,
|
|
"fcm_dpo/q_t": 0.35872676968574524,
|
|
"grad_norm": 18.761396408081055,
|
|
"learning_rate": 2.711302664252973e-07,
|
|
"logits/chosen": 0.5437895655632019,
|
|
"logits/rejected": 0.4607025980949402,
|
|
"logps/chosen": -75.03057861328125,
|
|
"logps/ref_chosen": -53.325008392333984,
|
|
"logps/ref_rejected": -83.21236419677734,
|
|
"logps/rejected": -122.28768157958984,
|
|
"loss": 1.002,
|
|
"margin_dpo/margin_mean": 17.36975860595703,
|
|
"margin_dpo/margin_std": 24.134292602539062,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.03879928961396217,
|
|
"fcm_dpo/delta": -0.27879321575164795,
|
|
"fcm_dpo/margin": 22.000972747802734,
|
|
"fcm_dpo/q_t": 0.32647570967674255,
|
|
"grad_norm": 18.498384475708008,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 0.5079820156097412,
|
|
"logits/rejected": 0.4346851408481598,
|
|
"logps/chosen": -85.52665710449219,
|
|
"logps/ref_chosen": -61.625770568847656,
|
|
"logps/ref_rejected": -87.63627624511719,
|
|
"logps/rejected": -133.53811645507812,
|
|
"loss": 0.8827,
|
|
"margin_dpo/margin_mean": 22.000972747802734,
|
|
"margin_dpo/margin_std": 24.152040481567383,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5306122448979592,
|
|
"fcm_dpo/beta": 0.037720829248428345,
|
|
"fcm_dpo/delta": -0.10465708374977112,
|
|
"fcm_dpo/margin": 18.47935676574707,
|
|
"fcm_dpo/q_t": 0.3536849617958069,
|
|
"grad_norm": 15.722846031188965,
|
|
"learning_rate": 2.6849415780518357e-07,
|
|
"logits/chosen": 0.4817102551460266,
|
|
"logits/rejected": 0.4085959196090698,
|
|
"logps/chosen": -79.06599426269531,
|
|
"logps/ref_chosen": -56.2563362121582,
|
|
"logps/ref_rejected": -79.11589813232422,
|
|
"logps/rejected": -120.4049072265625,
|
|
"loss": 1.0295,
|
|
"margin_dpo/margin_mean": 18.47935676574707,
|
|
"margin_dpo/margin_std": 26.676305770874023,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5321239606953893,
|
|
"fcm_dpo/beta": 0.03718000277876854,
|
|
"fcm_dpo/delta": -0.042777154594659805,
|
|
"fcm_dpo/margin": 17.197158813476562,
|
|
"fcm_dpo/q_t": 0.3716466426849365,
|
|
"grad_norm": 17.15549087524414,
|
|
"learning_rate": 2.6717530907482024e-07,
|
|
"logits/chosen": 0.5181227922439575,
|
|
"logits/rejected": 0.46758154034614563,
|
|
"logps/chosen": -85.7076644897461,
|
|
"logps/ref_chosen": -63.05195236206055,
|
|
"logps/ref_rejected": -85.52035522460938,
|
|
"logps/rejected": -125.37321472167969,
|
|
"loss": 1.0158,
|
|
"margin_dpo/margin_mean": 17.197158813476562,
|
|
"margin_dpo/margin_std": 24.20104217529297,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5336356764928194,
|
|
"fcm_dpo/beta": 0.03698758780956268,
|
|
"fcm_dpo/delta": -0.08900751918554306,
|
|
"fcm_dpo/margin": 18.460941314697266,
|
|
"fcm_dpo/q_t": 0.3600703477859497,
|
|
"grad_norm": 17.056467056274414,
|
|
"learning_rate": 2.658559799141411e-07,
|
|
"logits/chosen": 0.5669195055961609,
|
|
"logits/rejected": 0.5683079957962036,
|
|
"logps/chosen": -90.16744232177734,
|
|
"logps/ref_chosen": -69.00918579101562,
|
|
"logps/ref_rejected": -72.65840148925781,
|
|
"logps/rejected": -112.27760314941406,
|
|
"loss": 1.0175,
|
|
"margin_dpo/margin_mean": 18.460941314697266,
|
|
"margin_dpo/margin_std": 26.078454971313477,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5351473922902494,
|
|
"fcm_dpo/beta": 0.03558646887540817,
|
|
"fcm_dpo/delta": -0.14204002916812897,
|
|
"fcm_dpo/margin": 20.501020431518555,
|
|
"fcm_dpo/q_t": 0.35099613666534424,
|
|
"grad_norm": 18.90435028076172,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"logits/chosen": 0.5953085422515869,
|
|
"logits/rejected": 0.4663833975791931,
|
|
"logps/chosen": -61.88752365112305,
|
|
"logps/ref_chosen": -39.78833770751953,
|
|
"logps/ref_rejected": -69.56885528564453,
|
|
"logps/rejected": -112.1690673828125,
|
|
"loss": 0.9858,
|
|
"margin_dpo/margin_mean": 20.501022338867188,
|
|
"margin_dpo/margin_std": 27.029680252075195,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.03496289253234863,
|
|
"fcm_dpo/delta": -0.1490025818347931,
|
|
"fcm_dpo/margin": 21.115114212036133,
|
|
"fcm_dpo/q_t": 0.34892308712005615,
|
|
"grad_norm": 16.861616134643555,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 0.5601600408554077,
|
|
"logits/rejected": 0.43914663791656494,
|
|
"logps/chosen": -71.95262145996094,
|
|
"logps/ref_chosen": -46.25537872314453,
|
|
"logps/ref_rejected": -78.20236206054688,
|
|
"logps/rejected": -125.01471710205078,
|
|
"loss": 0.9849,
|
|
"margin_dpo/margin_mean": 21.115114212036133,
|
|
"margin_dpo/margin_std": 27.885860443115234,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5381708238851096,
|
|
"fcm_dpo/beta": 0.03437604755163193,
|
|
"fcm_dpo/delta": 0.0072648786008358,
|
|
"fcm_dpo/margin": 17.235088348388672,
|
|
"fcm_dpo/q_t": 0.3816342353820801,
|
|
"grad_norm": 17.55934715270996,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 0.5410733222961426,
|
|
"logits/rejected": 0.4691797196865082,
|
|
"logps/chosen": -71.19071197509766,
|
|
"logps/ref_chosen": -47.906158447265625,
|
|
"logps/ref_rejected": -74.29397583007812,
|
|
"logps/rejected": -114.81361389160156,
|
|
"loss": 1.1678,
|
|
"margin_dpo/margin_mean": 17.235088348388672,
|
|
"margin_dpo/margin_std": 33.58668899536133,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"fcm_dpo/beta": 0.03437814116477966,
|
|
"fcm_dpo/delta": 0.050616808235645294,
|
|
"fcm_dpo/margin": 15.968550682067871,
|
|
"fcm_dpo/q_t": 0.38629966974258423,
|
|
"grad_norm": 17.052623748779297,
|
|
"learning_rate": 2.6057459723762076e-07,
|
|
"logits/chosen": 0.5412014722824097,
|
|
"logits/rejected": 0.5130825042724609,
|
|
"logps/chosen": -92.02487182617188,
|
|
"logps/ref_chosen": -62.63500213623047,
|
|
"logps/ref_rejected": -65.11399841308594,
|
|
"logps/rejected": -110.47242736816406,
|
|
"loss": 1.0998,
|
|
"margin_dpo/margin_mean": 15.968551635742188,
|
|
"margin_dpo/margin_std": 25.727272033691406,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5411942554799698,
|
|
"fcm_dpo/beta": 0.034367240965366364,
|
|
"fcm_dpo/delta": -0.15965090692043304,
|
|
"fcm_dpo/margin": 21.760364532470703,
|
|
"fcm_dpo/q_t": 0.3511369824409485,
|
|
"grad_norm": 21.150100708007812,
|
|
"learning_rate": 2.5925341972508954e-07,
|
|
"logits/chosen": 0.4965895712375641,
|
|
"logits/rejected": 0.5016806125640869,
|
|
"logps/chosen": -94.08679962158203,
|
|
"logps/ref_chosen": -67.20960998535156,
|
|
"logps/ref_rejected": -69.34715270996094,
|
|
"logps/rejected": -117.98471069335938,
|
|
"loss": 0.978,
|
|
"margin_dpo/margin_mean": 21.760364532470703,
|
|
"margin_dpo/margin_std": 28.61833953857422,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.5427059712773998,
|
|
"fcm_dpo/beta": 0.035116057842969894,
|
|
"fcm_dpo/delta": 0.17041327059268951,
|
|
"fcm_dpo/margin": 12.320904731750488,
|
|
"fcm_dpo/q_t": 0.4136947691440582,
|
|
"grad_norm": 18.667757034301758,
|
|
"learning_rate": 2.579319833745169e-07,
|
|
"logits/chosen": 0.5113773941993713,
|
|
"logits/rejected": 0.4831411838531494,
|
|
"logps/chosen": -92.3634033203125,
|
|
"logps/ref_chosen": -62.52578353881836,
|
|
"logps/ref_rejected": -76.63114929199219,
|
|
"logps/rejected": -118.7896728515625,
|
|
"loss": 1.1787,
|
|
"margin_dpo/margin_mean": 12.320904731750488,
|
|
"margin_dpo/margin_std": 24.88362693786621,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.03468718379735947,
|
|
"fcm_dpo/delta": -0.04342162236571312,
|
|
"fcm_dpo/margin": 18.45904541015625,
|
|
"fcm_dpo/q_t": 0.37331700325012207,
|
|
"grad_norm": 17.929542541503906,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 0.45557552576065063,
|
|
"logits/rejected": 0.3662692904472351,
|
|
"logps/chosen": -93.12303161621094,
|
|
"logps/ref_chosen": -63.48772048950195,
|
|
"logps/ref_rejected": -90.6891098022461,
|
|
"logps/rejected": -138.78347778320312,
|
|
"loss": 1.0288,
|
|
"margin_dpo/margin_mean": 18.45904541015625,
|
|
"margin_dpo/margin_std": 27.04231834411621,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.54572940287226,
|
|
"fcm_dpo/beta": 0.03404659405350685,
|
|
"fcm_dpo/delta": -0.1389157474040985,
|
|
"fcm_dpo/margin": 21.408405303955078,
|
|
"fcm_dpo/q_t": 0.3507522940635681,
|
|
"grad_norm": 20.36210823059082,
|
|
"learning_rate": 2.552884820191154e-07,
|
|
"logits/chosen": 0.6088442206382751,
|
|
"logits/rejected": 0.5571874380111694,
|
|
"logps/chosen": -86.91483306884766,
|
|
"logps/ref_chosen": -57.917144775390625,
|
|
"logps/ref_rejected": -72.39089965820312,
|
|
"logps/rejected": -122.7969970703125,
|
|
"loss": 0.973,
|
|
"margin_dpo/margin_mean": 21.408405303955078,
|
|
"margin_dpo/margin_std": 27.88947296142578,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.54724111866969,
|
|
"fcm_dpo/beta": 0.033488351851701736,
|
|
"fcm_dpo/delta": -0.12048141658306122,
|
|
"fcm_dpo/margin": 21.240074157714844,
|
|
"fcm_dpo/q_t": 0.3562188744544983,
|
|
"grad_norm": 17.9212589263916,
|
|
"learning_rate": 2.53966490958702e-07,
|
|
"logits/chosen": 0.6075149774551392,
|
|
"logits/rejected": 0.5085099935531616,
|
|
"logps/chosen": -91.7391357421875,
|
|
"logps/ref_chosen": -63.4434700012207,
|
|
"logps/ref_rejected": -103.45516967773438,
|
|
"logps/rejected": -152.99090576171875,
|
|
"loss": 1.004,
|
|
"margin_dpo/margin_mean": 21.240074157714844,
|
|
"margin_dpo/margin_std": 29.03826141357422,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5487528344671202,
|
|
"fcm_dpo/beta": 0.032427407801151276,
|
|
"fcm_dpo/delta": -0.09851216524839401,
|
|
"fcm_dpo/margin": 21.322847366333008,
|
|
"fcm_dpo/q_t": 0.3581845164299011,
|
|
"grad_norm": 19.257793426513672,
|
|
"learning_rate": 2.526443889470099e-07,
|
|
"logits/chosen": 0.6183967590332031,
|
|
"logits/rejected": 0.49492183327674866,
|
|
"logps/chosen": -81.70713806152344,
|
|
"logps/ref_chosen": -48.65182876586914,
|
|
"logps/ref_rejected": -88.65904235839844,
|
|
"logps/rejected": -143.03720092773438,
|
|
"loss": 0.9931,
|
|
"margin_dpo/margin_mean": 21.322847366333008,
|
|
"margin_dpo/margin_std": 28.826107025146484,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5502645502645502,
|
|
"fcm_dpo/beta": 0.03153586387634277,
|
|
"fcm_dpo/delta": -0.14443664252758026,
|
|
"fcm_dpo/margin": 23.249065399169922,
|
|
"fcm_dpo/q_t": 0.35703152418136597,
|
|
"grad_norm": 15.726482391357422,
|
|
"learning_rate": 2.513222129660744e-07,
|
|
"logits/chosen": 0.45400315523147583,
|
|
"logits/rejected": 0.36352038383483887,
|
|
"logps/chosen": -86.7956314086914,
|
|
"logps/ref_chosen": -57.87107467651367,
|
|
"logps/ref_rejected": -80.95503234863281,
|
|
"logps/rejected": -133.12864685058594,
|
|
"loss": 1.0311,
|
|
"margin_dpo/margin_mean": 23.249065399169922,
|
|
"margin_dpo/margin_std": 34.755950927734375,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.031077580526471138,
|
|
"fcm_dpo/delta": -0.06097899004817009,
|
|
"fcm_dpo/margin": 21.13581085205078,
|
|
"fcm_dpo/q_t": 0.36084866523742676,
|
|
"grad_norm": 14.798433303833008,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.5402770042419434,
|
|
"logits/rejected": 0.5327832102775574,
|
|
"logps/chosen": -89.29142761230469,
|
|
"logps/ref_chosen": -64.94217681884766,
|
|
"logps/ref_rejected": -74.8599853515625,
|
|
"logps/rejected": -120.34504699707031,
|
|
"loss": 0.977,
|
|
"margin_dpo/margin_mean": 21.135812759399414,
|
|
"margin_dpo/margin_std": 25.899639129638672,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5532879818594104,
|
|
"fcm_dpo/beta": 0.031172068789601326,
|
|
"fcm_dpo/delta": -0.010473225265741348,
|
|
"fcm_dpo/margin": 19.541061401367188,
|
|
"fcm_dpo/q_t": 0.3788078725337982,
|
|
"grad_norm": 16.791519165039062,
|
|
"learning_rate": 2.486777870339255e-07,
|
|
"logits/chosen": 0.4921402931213379,
|
|
"logits/rejected": 0.4748532474040985,
|
|
"logps/chosen": -78.04362487792969,
|
|
"logps/ref_chosen": -55.16598129272461,
|
|
"logps/ref_rejected": -65.26121520996094,
|
|
"logps/rejected": -107.67991638183594,
|
|
"loss": 1.0843,
|
|
"margin_dpo/margin_mean": 19.541061401367188,
|
|
"margin_dpo/margin_std": 31.972339630126953,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5547996976568406,
|
|
"fcm_dpo/beta": 0.030978696420788765,
|
|
"fcm_dpo/delta": 0.030442271381616592,
|
|
"fcm_dpo/margin": 18.435543060302734,
|
|
"fcm_dpo/q_t": 0.3786276578903198,
|
|
"grad_norm": 17.63971710205078,
|
|
"learning_rate": 2.4735561105299014e-07,
|
|
"logits/chosen": 0.5248080492019653,
|
|
"logits/rejected": 0.4257400929927826,
|
|
"logps/chosen": -84.86387634277344,
|
|
"logps/ref_chosen": -56.01046371459961,
|
|
"logps/ref_rejected": -77.31010437011719,
|
|
"logps/rejected": -124.59906768798828,
|
|
"loss": 1.0777,
|
|
"margin_dpo/margin_mean": 18.435543060302734,
|
|
"margin_dpo/margin_std": 29.5982666015625,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.5563114134542706,
|
|
"fcm_dpo/beta": 0.031203612685203552,
|
|
"fcm_dpo/delta": 0.0020916834473609924,
|
|
"fcm_dpo/margin": 19.164134979248047,
|
|
"fcm_dpo/q_t": 0.3759109377861023,
|
|
"grad_norm": 17.352895736694336,
|
|
"learning_rate": 2.46033509041298e-07,
|
|
"logits/chosen": 0.40486133098602295,
|
|
"logits/rejected": 0.39880359172821045,
|
|
"logps/chosen": -106.20714569091797,
|
|
"logps/ref_chosen": -74.82927703857422,
|
|
"logps/ref_rejected": -76.11680603027344,
|
|
"logps/rejected": -126.65880584716797,
|
|
"loss": 1.066,
|
|
"margin_dpo/margin_mean": 19.164134979248047,
|
|
"margin_dpo/margin_std": 29.87588882446289,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5578231292517006,
|
|
"fcm_dpo/beta": 0.031594499945640564,
|
|
"fcm_dpo/delta": 0.048815835267305374,
|
|
"fcm_dpo/margin": 17.495262145996094,
|
|
"fcm_dpo/q_t": 0.3873006999492645,
|
|
"grad_norm": 17.4019832611084,
|
|
"learning_rate": 2.447115179808846e-07,
|
|
"logits/chosen": 0.5595611929893494,
|
|
"logits/rejected": 0.5114997029304504,
|
|
"logps/chosen": -87.9697494506836,
|
|
"logps/ref_chosen": -58.32621765136719,
|
|
"logps/ref_rejected": -80.92183685302734,
|
|
"logps/rejected": -128.0606231689453,
|
|
"loss": 1.094,
|
|
"margin_dpo/margin_mean": 17.495262145996094,
|
|
"margin_dpo/margin_std": 29.117238998413086,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.030709534883499146,
|
|
"fcm_dpo/delta": -0.15342025458812714,
|
|
"fcm_dpo/margin": 24.153587341308594,
|
|
"fcm_dpo/q_t": 0.35305267572402954,
|
|
"grad_norm": 16.30162811279297,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 0.6652737259864807,
|
|
"logits/rejected": 0.5999115705490112,
|
|
"logps/chosen": -82.10697937011719,
|
|
"logps/ref_chosen": -52.88372039794922,
|
|
"logps/ref_rejected": -79.43692016601562,
|
|
"logps/rejected": -132.8137664794922,
|
|
"loss": 1.0301,
|
|
"margin_dpo/margin_mean": 24.153587341308594,
|
|
"margin_dpo/margin_std": 36.119441986083984,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5608465608465608,
|
|
"fcm_dpo/beta": 0.030408132821321487,
|
|
"fcm_dpo/delta": -0.07384546846151352,
|
|
"fcm_dpo/margin": 21.980266571044922,
|
|
"fcm_dpo/q_t": 0.3661743402481079,
|
|
"grad_norm": 17.487380981445312,
|
|
"learning_rate": 2.420680166254831e-07,
|
|
"logits/chosen": 0.6762181520462036,
|
|
"logits/rejected": 0.6435130834579468,
|
|
"logps/chosen": -78.06260681152344,
|
|
"logps/ref_chosen": -49.224212646484375,
|
|
"logps/ref_rejected": -63.348472595214844,
|
|
"logps/rejected": -114.16712951660156,
|
|
"loss": 1.0248,
|
|
"margin_dpo/margin_mean": 21.980266571044922,
|
|
"margin_dpo/margin_std": 31.435279846191406,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.562358276643991,
|
|
"fcm_dpo/beta": 0.031202610582113266,
|
|
"fcm_dpo/delta": 0.17676496505737305,
|
|
"fcm_dpo/margin": 13.67655086517334,
|
|
"fcm_dpo/q_t": 0.41596049070358276,
|
|
"grad_norm": 21.666107177734375,
|
|
"learning_rate": 2.4074658027491044e-07,
|
|
"logits/chosen": 0.5750303268432617,
|
|
"logits/rejected": 0.4893125593662262,
|
|
"logps/chosen": -84.21045684814453,
|
|
"logps/ref_chosen": -52.269554138183594,
|
|
"logps/ref_rejected": -72.99522399902344,
|
|
"logps/rejected": -118.61267852783203,
|
|
"loss": 1.3036,
|
|
"margin_dpo/margin_mean": 13.676551818847656,
|
|
"margin_dpo/margin_std": 37.41542053222656,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.563869992441421,
|
|
"fcm_dpo/beta": 0.03156846761703491,
|
|
"fcm_dpo/delta": 0.07378996908664703,
|
|
"fcm_dpo/margin": 16.784202575683594,
|
|
"fcm_dpo/q_t": 0.4008631110191345,
|
|
"grad_norm": 24.01235008239746,
|
|
"learning_rate": 2.394254027623792e-07,
|
|
"logits/chosen": 0.5762922763824463,
|
|
"logits/rejected": 0.5083121061325073,
|
|
"logps/chosen": -97.91470336914062,
|
|
"logps/ref_chosen": -61.112998962402344,
|
|
"logps/ref_rejected": -76.24851989746094,
|
|
"logps/rejected": -129.8344268798828,
|
|
"loss": 1.2322,
|
|
"margin_dpo/margin_mean": 16.784204483032227,
|
|
"margin_dpo/margin_std": 38.091888427734375,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5653817082388511,
|
|
"fcm_dpo/beta": 0.02992125041782856,
|
|
"fcm_dpo/delta": -0.3461046814918518,
|
|
"fcm_dpo/margin": 30.510740280151367,
|
|
"fcm_dpo/q_t": 0.31146040558815,
|
|
"grad_norm": 19.470178604125977,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 0.47531217336654663,
|
|
"logits/rejected": 0.46916258335113525,
|
|
"logps/chosen": -101.13727569580078,
|
|
"logps/ref_chosen": -72.66920471191406,
|
|
"logps/ref_rejected": -76.83158874511719,
|
|
"logps/rejected": -135.81039428710938,
|
|
"loss": 0.8386,
|
|
"margin_dpo/margin_mean": 30.510740280151367,
|
|
"margin_dpo/margin_std": 30.510784149169922,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.029693206772208214,
|
|
"fcm_dpo/delta": 0.06657587736845016,
|
|
"fcm_dpo/margin": 18.084556579589844,
|
|
"fcm_dpo/q_t": 0.3913113474845886,
|
|
"grad_norm": 20.931381225585938,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 0.5903283953666687,
|
|
"logits/rejected": 0.537385106086731,
|
|
"logps/chosen": -86.04524993896484,
|
|
"logps/ref_chosen": -57.68330383300781,
|
|
"logps/ref_rejected": -79.34097290039062,
|
|
"logps/rejected": -125.78746795654297,
|
|
"loss": 1.1199,
|
|
"margin_dpo/margin_mean": 18.08455467224121,
|
|
"margin_dpo/margin_std": 32.31146240234375,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5684051398337112,
|
|
"fcm_dpo/beta": 0.02892487682402134,
|
|
"fcm_dpo/delta": -0.2050780951976776,
|
|
"fcm_dpo/margin": 27.252056121826172,
|
|
"fcm_dpo/q_t": 0.34515899419784546,
|
|
"grad_norm": 19.573123931884766,
|
|
"learning_rate": 2.3546379277238103e-07,
|
|
"logits/chosen": 0.6316548585891724,
|
|
"logits/rejected": 0.5691288709640503,
|
|
"logps/chosen": -83.20816040039062,
|
|
"logps/ref_chosen": -51.674072265625,
|
|
"logps/ref_rejected": -75.69713592529297,
|
|
"logps/rejected": -134.4832763671875,
|
|
"loss": 0.9825,
|
|
"margin_dpo/margin_mean": 27.252056121826172,
|
|
"margin_dpo/margin_std": 36.99662780761719,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.5699168556311414,
|
|
"fcm_dpo/beta": 0.02893291413784027,
|
|
"fcm_dpo/delta": 0.02121102437376976,
|
|
"fcm_dpo/margin": 20.025833129882812,
|
|
"fcm_dpo/q_t": 0.3795938193798065,
|
|
"grad_norm": 17.584617614746094,
|
|
"learning_rate": 2.3414402008585886e-07,
|
|
"logits/chosen": 0.5613811016082764,
|
|
"logits/rejected": 0.5406124591827393,
|
|
"logps/chosen": -79.32124328613281,
|
|
"logps/ref_chosen": -46.17853546142578,
|
|
"logps/ref_rejected": -57.756500244140625,
|
|
"logps/rejected": -110.92504119873047,
|
|
"loss": 1.0804,
|
|
"margin_dpo/margin_mean": 20.025833129882812,
|
|
"margin_dpo/margin_std": 32.1743278503418,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"fcm_dpo/beta": 0.029261738061904907,
|
|
"fcm_dpo/delta": 0.07294605672359467,
|
|
"fcm_dpo/margin": 18.119808197021484,
|
|
"fcm_dpo/q_t": 0.39253151416778564,
|
|
"grad_norm": 17.491439819335938,
|
|
"learning_rate": 2.3282469092517977e-07,
|
|
"logits/chosen": 0.6229523420333862,
|
|
"logits/rejected": 0.5752782225608826,
|
|
"logps/chosen": -90.11911010742188,
|
|
"logps/ref_chosen": -59.21887969970703,
|
|
"logps/ref_rejected": -71.24818420410156,
|
|
"logps/rejected": -120.26823425292969,
|
|
"loss": 1.118,
|
|
"margin_dpo/margin_mean": 18.119808197021484,
|
|
"margin_dpo/margin_std": 31.92162322998047,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5729402872260015,
|
|
"fcm_dpo/beta": 0.028815243393182755,
|
|
"fcm_dpo/delta": -0.10706745088100433,
|
|
"fcm_dpo/margin": 24.281211853027344,
|
|
"fcm_dpo/q_t": 0.36030155420303345,
|
|
"grad_norm": 18.351364135742188,
|
|
"learning_rate": 2.3150584219481643e-07,
|
|
"logits/chosen": 0.600252628326416,
|
|
"logits/rejected": 0.535982072353363,
|
|
"logps/chosen": -104.5278091430664,
|
|
"logps/ref_chosen": -76.31658935546875,
|
|
"logps/ref_rejected": -104.26200103759766,
|
|
"logps/rejected": -156.75442504882812,
|
|
"loss": 0.9933,
|
|
"margin_dpo/margin_mean": 24.281211853027344,
|
|
"margin_dpo/margin_std": 33.16697692871094,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.027855779975652695,
|
|
"fcm_dpo/delta": -0.18078401684761047,
|
|
"fcm_dpo/margin": 27.52959442138672,
|
|
"fcm_dpo/q_t": 0.3419705629348755,
|
|
"grad_norm": 18.710771560668945,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 0.5741807818412781,
|
|
"logits/rejected": 0.5386440753936768,
|
|
"logps/chosen": -86.78734588623047,
|
|
"logps/ref_chosen": -61.283164978027344,
|
|
"logps/ref_rejected": -72.38892364501953,
|
|
"logps/rejected": -125.42269897460938,
|
|
"loss": 0.9754,
|
|
"margin_dpo/margin_mean": 27.52959442138672,
|
|
"margin_dpo/margin_std": 35.956398010253906,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5759637188208617,
|
|
"fcm_dpo/beta": 0.029031910002231598,
|
|
"fcm_dpo/delta": 0.2659192681312561,
|
|
"fcm_dpo/margin": 11.643007278442383,
|
|
"fcm_dpo/q_t": 0.4340417981147766,
|
|
"grad_norm": 19.243091583251953,
|
|
"learning_rate": 2.288697335747027e-07,
|
|
"logits/chosen": 0.5794563293457031,
|
|
"logits/rejected": 0.5545228123664856,
|
|
"logps/chosen": -92.53129577636719,
|
|
"logps/ref_chosen": -58.2139892578125,
|
|
"logps/ref_rejected": -60.78669357299805,
|
|
"logps/rejected": -106.74700927734375,
|
|
"loss": 1.2855,
|
|
"margin_dpo/margin_mean": 11.6430082321167,
|
|
"margin_dpo/margin_std": 32.82004928588867,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5774754346182918,
|
|
"fcm_dpo/beta": 0.02958078868687153,
|
|
"fcm_dpo/delta": 0.04316580295562744,
|
|
"fcm_dpo/margin": 18.832475662231445,
|
|
"fcm_dpo/q_t": 0.38638246059417725,
|
|
"grad_norm": 19.289762496948242,
|
|
"learning_rate": 2.2755254742257706e-07,
|
|
"logits/chosen": 0.6157612800598145,
|
|
"logits/rejected": 0.5689198970794678,
|
|
"logps/chosen": -97.4847412109375,
|
|
"logps/ref_chosen": -61.82532501220703,
|
|
"logps/ref_rejected": -83.0452880859375,
|
|
"logps/rejected": -137.53717041015625,
|
|
"loss": 1.0714,
|
|
"margin_dpo/margin_mean": 18.832475662231445,
|
|
"margin_dpo/margin_std": 29.27760887145996,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5789871504157218,
|
|
"fcm_dpo/beta": 0.028668655082583427,
|
|
"fcm_dpo/delta": -0.11247755587100983,
|
|
"fcm_dpo/margin": 24.513856887817383,
|
|
"fcm_dpo/q_t": 0.363398015499115,
|
|
"grad_norm": 19.029403686523438,
|
|
"learning_rate": 2.2623598917395436e-07,
|
|
"logits/chosen": 0.48890191316604614,
|
|
"logits/rejected": 0.5110622644424438,
|
|
"logps/chosen": -112.27488708496094,
|
|
"logps/ref_chosen": -80.56326293945312,
|
|
"logps/ref_rejected": -74.62922668457031,
|
|
"logps/rejected": -130.85470581054688,
|
|
"loss": 1.0314,
|
|
"margin_dpo/margin_mean": 24.513858795166016,
|
|
"margin_dpo/margin_std": 36.4691162109375,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5804988662131519,
|
|
"fcm_dpo/beta": 0.028615575283765793,
|
|
"fcm_dpo/delta": -0.03850318491458893,
|
|
"fcm_dpo/margin": 22.226734161376953,
|
|
"fcm_dpo/q_t": 0.3695475459098816,
|
|
"grad_norm": 17.54273796081543,
|
|
"learning_rate": 2.2492009565579875e-07,
|
|
"logits/chosen": 0.6078575849533081,
|
|
"logits/rejected": 0.5643937587738037,
|
|
"logps/chosen": -99.3906478881836,
|
|
"logps/ref_chosen": -65.47514343261719,
|
|
"logps/ref_rejected": -79.67378234863281,
|
|
"logps/rejected": -135.81600952148438,
|
|
"loss": 1.0225,
|
|
"margin_dpo/margin_mean": 22.22673225402832,
|
|
"margin_dpo/margin_std": 31.673675537109375,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.02794378250837326,
|
|
"fcm_dpo/delta": -0.16195213794708252,
|
|
"fcm_dpo/margin": 26.842784881591797,
|
|
"fcm_dpo/q_t": 0.34499049186706543,
|
|
"grad_norm": 16.783960342407227,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 0.5352010726928711,
|
|
"logits/rejected": 0.4990323483943939,
|
|
"logps/chosen": -98.1661605834961,
|
|
"logps/ref_chosen": -66.0565185546875,
|
|
"logps/ref_rejected": -86.68023681640625,
|
|
"logps/rejected": -145.63265991210938,
|
|
"loss": 0.9344,
|
|
"margin_dpo/margin_mean": 26.842784881591797,
|
|
"margin_dpo/margin_std": 31.331087112426758,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5835222978080121,
|
|
"fcm_dpo/beta": 0.02810431644320488,
|
|
"fcm_dpo/delta": 0.14293362200260162,
|
|
"fcm_dpo/margin": 16.507038116455078,
|
|
"fcm_dpo/q_t": 0.4019835591316223,
|
|
"grad_norm": 18.913597106933594,
|
|
"learning_rate": 2.2229045002474724e-07,
|
|
"logits/chosen": 0.5284489393234253,
|
|
"logits/rejected": 0.47032076120376587,
|
|
"logps/chosen": -113.90336608886719,
|
|
"logps/ref_chosen": -75.6236572265625,
|
|
"logps/ref_rejected": -92.62330627441406,
|
|
"logps/rejected": -147.4100341796875,
|
|
"loss": 1.1448,
|
|
"margin_dpo/margin_mean": 16.507038116455078,
|
|
"margin_dpo/margin_std": 31.028078079223633,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5850340136054422,
|
|
"fcm_dpo/beta": 0.02792777121067047,
|
|
"fcm_dpo/delta": -0.11489059031009674,
|
|
"fcm_dpo/margin": 25.30979347229004,
|
|
"fcm_dpo/q_t": 0.35611510276794434,
|
|
"grad_norm": 17.214059829711914,
|
|
"learning_rate": 2.209767714686924e-07,
|
|
"logits/chosen": 0.5747230052947998,
|
|
"logits/rejected": 0.4819261431694031,
|
|
"logps/chosen": -81.60426330566406,
|
|
"logps/ref_chosen": -47.22170639038086,
|
|
"logps/ref_rejected": -87.338134765625,
|
|
"logps/rejected": -147.03048706054688,
|
|
"loss": 0.9573,
|
|
"margin_dpo/margin_mean": 25.30979347229004,
|
|
"margin_dpo/margin_std": 31.456417083740234,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5865457294028723,
|
|
"fcm_dpo/beta": 0.02807321399450302,
|
|
"fcm_dpo/delta": 0.12129095196723938,
|
|
"fcm_dpo/margin": 17.26071548461914,
|
|
"fcm_dpo/q_t": 0.4033401608467102,
|
|
"grad_norm": 18.226442337036133,
|
|
"learning_rate": 2.1966390475472954e-07,
|
|
"logits/chosen": 0.5805580615997314,
|
|
"logits/rejected": 0.5685257911682129,
|
|
"logps/chosen": -108.80488586425781,
|
|
"logps/ref_chosen": -74.5794677734375,
|
|
"logps/ref_rejected": -79.92558288574219,
|
|
"logps/rejected": -131.41171264648438,
|
|
"loss": 1.1656,
|
|
"margin_dpo/margin_mean": 17.26071548461914,
|
|
"margin_dpo/margin_std": 34.313987731933594,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5880574452003023,
|
|
"fcm_dpo/beta": 0.027882620692253113,
|
|
"fcm_dpo/delta": -0.1335650533437729,
|
|
"fcm_dpo/margin": 25.965024948120117,
|
|
"fcm_dpo/q_t": 0.3534383177757263,
|
|
"grad_norm": 32.512664794921875,
|
|
"learning_rate": 2.1835188660656265e-07,
|
|
"logits/chosen": 0.5873498916625977,
|
|
"logits/rejected": 0.551581621170044,
|
|
"logps/chosen": -95.86332702636719,
|
|
"logps/ref_chosen": -61.624366760253906,
|
|
"logps/ref_rejected": -76.50978088378906,
|
|
"logps/rejected": -136.71377563476562,
|
|
"loss": 1.0076,
|
|
"margin_dpo/margin_mean": 25.965023040771484,
|
|
"margin_dpo/margin_std": 36.41337585449219,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.027584530413150787,
|
|
"fcm_dpo/delta": -0.008268344216048717,
|
|
"fcm_dpo/margin": 22.029890060424805,
|
|
"fcm_dpo/q_t": 0.3749474883079529,
|
|
"grad_norm": 15.995476722717285,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 0.6494970917701721,
|
|
"logits/rejected": 0.5868979096412659,
|
|
"logps/chosen": -77.04705047607422,
|
|
"logps/ref_chosen": -45.871864318847656,
|
|
"logps/ref_rejected": -61.305999755859375,
|
|
"logps/rejected": -114.51107788085938,
|
|
"loss": 1.0388,
|
|
"margin_dpo/margin_mean": 22.029890060424805,
|
|
"margin_dpo/margin_std": 32.316524505615234,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5910808767951625,
|
|
"fcm_dpo/beta": 0.027057552710175514,
|
|
"fcm_dpo/delta": -0.10849063843488693,
|
|
"fcm_dpo/margin": 25.87117576599121,
|
|
"fcm_dpo/q_t": 0.3591760993003845,
|
|
"grad_norm": 17.0324764251709,
|
|
"learning_rate": 2.1573054278272636e-07,
|
|
"logits/chosen": 0.5981206297874451,
|
|
"logits/rejected": 0.5333592891693115,
|
|
"logps/chosen": -90.99639892578125,
|
|
"logps/ref_chosen": -58.18701171875,
|
|
"logps/ref_rejected": -83.63442993164062,
|
|
"logps/rejected": -142.31500244140625,
|
|
"loss": 1.0371,
|
|
"margin_dpo/margin_mean": 25.871173858642578,
|
|
"margin_dpo/margin_std": 38.77320861816406,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5925925925925926,
|
|
"fcm_dpo/beta": 0.02655864879488945,
|
|
"fcm_dpo/delta": -0.17987582087516785,
|
|
"fcm_dpo/margin": 28.844318389892578,
|
|
"fcm_dpo/q_t": 0.34854570031166077,
|
|
"grad_norm": 15.680495262145996,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 0.676337718963623,
|
|
"logits/rejected": 0.6182987093925476,
|
|
"logps/chosen": -96.6463623046875,
|
|
"logps/ref_chosen": -69.7445297241211,
|
|
"logps/ref_rejected": -94.05877685546875,
|
|
"logps/rejected": -149.804931640625,
|
|
"loss": 0.9825,
|
|
"margin_dpo/margin_mean": 28.844318389892578,
|
|
"margin_dpo/margin_std": 38.47731018066406,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5941043083900227,
|
|
"fcm_dpo/beta": 0.025468815118074417,
|
|
"fcm_dpo/delta": -0.12191242724657059,
|
|
"fcm_dpo/margin": 27.956777572631836,
|
|
"fcm_dpo/q_t": 0.3524329364299774,
|
|
"grad_norm": 15.502283096313477,
|
|
"learning_rate": 2.131130332936195e-07,
|
|
"logits/chosen": 0.6358774900436401,
|
|
"logits/rejected": 0.5986746549606323,
|
|
"logps/chosen": -87.28994750976562,
|
|
"logps/ref_chosen": -52.33489990234375,
|
|
"logps/ref_rejected": -74.33809661865234,
|
|
"logps/rejected": -137.24993896484375,
|
|
"loss": 0.9476,
|
|
"margin_dpo/margin_mean": 27.956775665283203,
|
|
"margin_dpo/margin_std": 33.050567626953125,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5956160241874527,
|
|
"fcm_dpo/beta": 0.025639474391937256,
|
|
"fcm_dpo/delta": 0.014459993690252304,
|
|
"fcm_dpo/margin": 22.846038818359375,
|
|
"fcm_dpo/q_t": 0.37462514638900757,
|
|
"grad_norm": 19.941524505615234,
|
|
"learning_rate": 2.1180580796331323e-07,
|
|
"logits/chosen": 0.671512246131897,
|
|
"logits/rejected": 0.6399627327919006,
|
|
"logps/chosen": -92.83706665039062,
|
|
"logps/ref_chosen": -60.6761360168457,
|
|
"logps/ref_rejected": -71.36074829101562,
|
|
"logps/rejected": -126.36771392822266,
|
|
"loss": 1.0159,
|
|
"margin_dpo/margin_mean": 22.846038818359375,
|
|
"margin_dpo/margin_std": 30.038795471191406,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.026083827018737793,
|
|
"fcm_dpo/delta": 0.10105286538600922,
|
|
"fcm_dpo/margin": 19.264225006103516,
|
|
"fcm_dpo/q_t": 0.39714348316192627,
|
|
"grad_norm": 21.03672218322754,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 0.6039600968360901,
|
|
"logits/rejected": 0.5188575983047485,
|
|
"logps/chosen": -85.5516357421875,
|
|
"logps/ref_chosen": -50.60432434082031,
|
|
"logps/ref_rejected": -77.08731079101562,
|
|
"logps/rejected": -131.29885864257812,
|
|
"loss": 1.107,
|
|
"margin_dpo/margin_mean": 19.264225006103516,
|
|
"margin_dpo/margin_std": 32.470375061035156,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5986394557823129,
|
|
"fcm_dpo/beta": 0.025592200458049774,
|
|
"fcm_dpo/delta": -0.030677609145641327,
|
|
"fcm_dpo/margin": 24.426685333251953,
|
|
"fcm_dpo/q_t": 0.369695782661438,
|
|
"grad_norm": 17.182994842529297,
|
|
"learning_rate": 2.0919459895968517e-07,
|
|
"logits/chosen": 0.6177434921264648,
|
|
"logits/rejected": 0.5232092142105103,
|
|
"logps/chosen": -84.13562774658203,
|
|
"logps/ref_chosen": -51.35961151123047,
|
|
"logps/ref_rejected": -79.89360046386719,
|
|
"logps/rejected": -137.09629821777344,
|
|
"loss": 1.007,
|
|
"margin_dpo/margin_mean": 24.42668342590332,
|
|
"margin_dpo/margin_std": 31.296443939208984,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.600151171579743,
|
|
"fcm_dpo/beta": 0.0270930714905262,
|
|
"fcm_dpo/delta": 0.2718348205089569,
|
|
"fcm_dpo/margin": 12.380789756774902,
|
|
"fcm_dpo/q_t": 0.4283815622329712,
|
|
"grad_norm": 23.26795768737793,
|
|
"learning_rate": 2.078906883274924e-07,
|
|
"logits/chosen": 0.5592355728149414,
|
|
"logits/rejected": 0.5140686631202698,
|
|
"logps/chosen": -105.91973114013672,
|
|
"logps/ref_chosen": -66.45622253417969,
|
|
"logps/ref_rejected": -85.74736785888672,
|
|
"logps/rejected": -137.59164428710938,
|
|
"loss": 1.3212,
|
|
"margin_dpo/margin_mean": 12.380790710449219,
|
|
"margin_dpo/margin_std": 38.163108825683594,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.6016628873771731,
|
|
"fcm_dpo/beta": 0.02662910521030426,
|
|
"fcm_dpo/delta": -0.1545412540435791,
|
|
"fcm_dpo/margin": 27.880821228027344,
|
|
"fcm_dpo/q_t": 0.34979376196861267,
|
|
"grad_norm": 16.451763153076172,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": 0.5903204679489136,
|
|
"logits/rejected": 0.5277484059333801,
|
|
"logps/chosen": -82.19943237304688,
|
|
"logps/ref_chosen": -49.244239807128906,
|
|
"logps/ref_rejected": -75.18949127197266,
|
|
"logps/rejected": -136.0255126953125,
|
|
"loss": 0.9379,
|
|
"margin_dpo/margin_mean": 27.88081932067871,
|
|
"margin_dpo/margin_std": 33.525672912597656,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"fcm_dpo/beta": 0.025618739426136017,
|
|
"fcm_dpo/delta": -0.21381694078445435,
|
|
"fcm_dpo/margin": 31.081968307495117,
|
|
"fcm_dpo/q_t": 0.3390474319458008,
|
|
"grad_norm": 17.575740814208984,
|
|
"learning_rate": 2.052864371672457e-07,
|
|
"logits/chosen": 0.533795952796936,
|
|
"logits/rejected": 0.4038241505622864,
|
|
"logps/chosen": -106.9549560546875,
|
|
"logps/ref_chosen": -68.30679321289062,
|
|
"logps/ref_rejected": -113.2708511352539,
|
|
"logps/rejected": -183.0009765625,
|
|
"loss": 0.9195,
|
|
"margin_dpo/margin_mean": 31.081968307495117,
|
|
"margin_dpo/margin_std": 36.69319152832031,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.025582678616046906,
|
|
"fcm_dpo/delta": 0.1144244372844696,
|
|
"fcm_dpo/margin": 19.171369552612305,
|
|
"fcm_dpo/q_t": 0.4018818438053131,
|
|
"grad_norm": 26.281423568725586,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 0.5914314985275269,
|
|
"logits/rejected": 0.5376293659210205,
|
|
"logps/chosen": -116.35350036621094,
|
|
"logps/ref_chosen": -71.62649536132812,
|
|
"logps/ref_rejected": -90.98765563964844,
|
|
"logps/rejected": -154.8860321044922,
|
|
"loss": 1.1411,
|
|
"margin_dpo/margin_mean": 19.171369552612305,
|
|
"margin_dpo/margin_std": 35.359981536865234,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.025976210832595825,
|
|
"eval_logits/chosen": 0.6214596033096313,
|
|
"eval_logits/rejected": 0.5730489492416382,
|
|
"eval_logps/chosen": -110.48468017578125,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -137.4872283935547,
|
|
"eval_loss": 0.5378274917602539,
|
|
"eval_margin_dpo/margin_mean": 22.313037872314453,
|
|
"eval_margin_dpo/margin_std": 35.38352584838867,
|
|
"eval_runtime": 38.0499,
|
|
"eval_samples_per_second": 60.526,
|
|
"eval_steps_per_second": 1.892,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6061980347694633,
|
|
"fcm_dpo/beta": 0.02523546852171421,
|
|
"fcm_dpo/delta": -0.1849718540906906,
|
|
"fcm_dpo/margin": 30.52855682373047,
|
|
"fcm_dpo/q_t": 0.3418422341346741,
|
|
"grad_norm": 12.788175582885742,
|
|
"learning_rate": 2.0268718890989752e-07,
|
|
"logits/chosen": 0.6369705200195312,
|
|
"logits/rejected": 0.5418835878372192,
|
|
"logps/chosen": -85.19168090820312,
|
|
"logps/ref_chosen": -53.72495651245117,
|
|
"logps/ref_rejected": -75.06304931640625,
|
|
"logps/rejected": -137.05831909179688,
|
|
"loss": 0.907,
|
|
"margin_dpo/margin_mean": 30.528560638427734,
|
|
"margin_dpo/margin_std": 33.80531311035156,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.6077097505668935,
|
|
"fcm_dpo/beta": 0.02487211488187313,
|
|
"fcm_dpo/delta": 0.009320281445980072,
|
|
"fcm_dpo/margin": 23.725364685058594,
|
|
"fcm_dpo/q_t": 0.3787681460380554,
|
|
"grad_norm": 16.4634952545166,
|
|
"learning_rate": 2.013895317751323e-07,
|
|
"logits/chosen": 0.6205127239227295,
|
|
"logits/rejected": 0.5858608484268188,
|
|
"logps/chosen": -98.28569793701172,
|
|
"logps/ref_chosen": -61.873931884765625,
|
|
"logps/ref_rejected": -66.15198516845703,
|
|
"logps/rejected": -126.28912353515625,
|
|
"loss": 1.0598,
|
|
"margin_dpo/margin_mean": 23.725364685058594,
|
|
"margin_dpo/margin_std": 35.9758415222168,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.6092214663643235,
|
|
"fcm_dpo/beta": 0.024643344804644585,
|
|
"fcm_dpo/delta": -0.15608033537864685,
|
|
"fcm_dpo/margin": 30.217971801757812,
|
|
"fcm_dpo/q_t": 0.3511902689933777,
|
|
"grad_norm": 17.57577896118164,
|
|
"learning_rate": 2.0009323437965898e-07,
|
|
"logits/chosen": 0.7137749195098877,
|
|
"logits/rejected": 0.6363557577133179,
|
|
"logps/chosen": -90.44751739501953,
|
|
"logps/ref_chosen": -51.321502685546875,
|
|
"logps/ref_rejected": -86.54010772705078,
|
|
"logps/rejected": -155.88409423828125,
|
|
"loss": 0.9707,
|
|
"margin_dpo/margin_mean": 30.217971801757812,
|
|
"margin_dpo/margin_std": 38.760894775390625,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.6107331821617535,
|
|
"fcm_dpo/beta": 0.023660175502300262,
|
|
"fcm_dpo/delta": -0.11365822702646255,
|
|
"fcm_dpo/margin": 29.694786071777344,
|
|
"fcm_dpo/q_t": 0.3559054732322693,
|
|
"grad_norm": 17.716331481933594,
|
|
"learning_rate": 1.9879833298370237e-07,
|
|
"logits/chosen": 0.5810792446136475,
|
|
"logits/rejected": 0.4951407313346863,
|
|
"logps/chosen": -97.16690063476562,
|
|
"logps/ref_chosen": -62.26288604736328,
|
|
"logps/ref_rejected": -95.19029998779297,
|
|
"logps/rejected": -159.78909301757812,
|
|
"loss": 0.9881,
|
|
"margin_dpo/margin_mean": 29.694786071777344,
|
|
"margin_dpo/margin_std": 38.58500671386719,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.023709017783403397,
|
|
"fcm_dpo/delta": 0.059486567974090576,
|
|
"fcm_dpo/margin": 22.857555389404297,
|
|
"fcm_dpo/q_t": 0.3891262412071228,
|
|
"grad_norm": 15.247061729431152,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 0.6707639694213867,
|
|
"logits/rejected": 0.6255220770835876,
|
|
"logps/chosen": -89.48225402832031,
|
|
"logps/ref_chosen": -50.5843391418457,
|
|
"logps/ref_rejected": -65.43156433105469,
|
|
"logps/rejected": -127.1870346069336,
|
|
"loss": 1.076,
|
|
"margin_dpo/margin_mean": 22.857553482055664,
|
|
"margin_dpo/margin_std": 34.472572326660156,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6137566137566137,
|
|
"fcm_dpo/beta": 0.023622972890734673,
|
|
"fcm_dpo/delta": -0.10287454724311829,
|
|
"fcm_dpo/margin": 29.435489654541016,
|
|
"fcm_dpo/q_t": 0.35592135787010193,
|
|
"grad_norm": 16.751718521118164,
|
|
"learning_rate": 1.9621286303497914e-07,
|
|
"logits/chosen": 0.645000159740448,
|
|
"logits/rejected": 0.49971795082092285,
|
|
"logps/chosen": -86.35372924804688,
|
|
"logps/ref_chosen": -48.99560546875,
|
|
"logps/ref_rejected": -92.47774505615234,
|
|
"logps/rejected": -159.2713623046875,
|
|
"loss": 0.998,
|
|
"margin_dpo/margin_mean": 29.435489654541016,
|
|
"margin_dpo/margin_std": 39.57469940185547,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.6152683295540439,
|
|
"fcm_dpo/beta": 0.023810304701328278,
|
|
"fcm_dpo/delta": 0.06428097188472748,
|
|
"fcm_dpo/margin": 22.641036987304688,
|
|
"fcm_dpo/q_t": 0.3884776830673218,
|
|
"grad_norm": 17.64628791809082,
|
|
"learning_rate": 1.9492236680336483e-07,
|
|
"logits/chosen": 0.5463271141052246,
|
|
"logits/rejected": 0.47302818298339844,
|
|
"logps/chosen": -136.9144287109375,
|
|
"logps/ref_chosen": -89.40056610107422,
|
|
"logps/ref_rejected": -99.28775024414062,
|
|
"logps/rejected": -169.44264221191406,
|
|
"loss": 1.0804,
|
|
"margin_dpo/margin_mean": 22.641036987304688,
|
|
"margin_dpo/margin_std": 36.08110046386719,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.6167800453514739,
|
|
"fcm_dpo/beta": 0.023345306515693665,
|
|
"fcm_dpo/delta": -0.2195199728012085,
|
|
"fcm_dpo/margin": 34.35175323486328,
|
|
"fcm_dpo/q_t": 0.3320169746875763,
|
|
"grad_norm": 13.968607902526855,
|
|
"learning_rate": 1.9363341121154895e-07,
|
|
"logits/chosen": 0.6046391129493713,
|
|
"logits/rejected": 0.5297787189483643,
|
|
"logps/chosen": -89.42475128173828,
|
|
"logps/ref_chosen": -54.70391845703125,
|
|
"logps/ref_rejected": -73.98648834228516,
|
|
"logps/rejected": -143.05908203125,
|
|
"loss": 0.9009,
|
|
"margin_dpo/margin_mean": 34.35175323486328,
|
|
"margin_dpo/margin_std": 36.27836227416992,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.618291761148904,
|
|
"fcm_dpo/beta": 0.023423273116350174,
|
|
"fcm_dpo/delta": 0.1822575032711029,
|
|
"fcm_dpo/margin": 18.17670440673828,
|
|
"fcm_dpo/q_t": 0.41613560914993286,
|
|
"grad_norm": 17.823652267456055,
|
|
"learning_rate": 1.9234603231438994e-07,
|
|
"logits/chosen": 0.6338742971420288,
|
|
"logits/rejected": 0.6387898325920105,
|
|
"logps/chosen": -106.66569519042969,
|
|
"logps/ref_chosen": -62.11822509765625,
|
|
"logps/ref_rejected": -61.933509826660156,
|
|
"logps/rejected": -124.65768432617188,
|
|
"loss": 1.1785,
|
|
"margin_dpo/margin_mean": 18.17670440673828,
|
|
"margin_dpo/margin_std": 37.99756622314453,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.02331661805510521,
|
|
"fcm_dpo/delta": -0.0475025437772274,
|
|
"fcm_dpo/margin": 27.560718536376953,
|
|
"fcm_dpo/q_t": 0.36133018136024475,
|
|
"grad_norm": 16.006240844726562,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 0.6429124474525452,
|
|
"logits/rejected": 0.619254469871521,
|
|
"logps/chosen": -102.84481048583984,
|
|
"logps/ref_chosen": -61.80266189575195,
|
|
"logps/ref_rejected": -76.60002136230469,
|
|
"logps/rejected": -145.202880859375,
|
|
"loss": 0.9526,
|
|
"margin_dpo/margin_mean": 27.560718536376953,
|
|
"margin_dpo/margin_std": 29.782230377197266,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6213151927437641,
|
|
"fcm_dpo/beta": 0.02313273400068283,
|
|
"fcm_dpo/delta": -0.08924700319766998,
|
|
"fcm_dpo/margin": 29.52808380126953,
|
|
"fcm_dpo/q_t": 0.36143290996551514,
|
|
"grad_norm": 16.007495880126953,
|
|
"learning_rate": 1.8977614860195296e-07,
|
|
"logits/chosen": 0.6322345733642578,
|
|
"logits/rejected": 0.5753542184829712,
|
|
"logps/chosen": -100.47373962402344,
|
|
"logps/ref_chosen": -54.44539260864258,
|
|
"logps/ref_rejected": -74.5650863647461,
|
|
"logps/rejected": -150.12149047851562,
|
|
"loss": 1.0053,
|
|
"margin_dpo/margin_mean": 29.52808380126953,
|
|
"margin_dpo/margin_std": 41.01704406738281,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.6228269085411943,
|
|
"fcm_dpo/beta": 0.022871272638440132,
|
|
"fcm_dpo/delta": -0.019356705248355865,
|
|
"fcm_dpo/margin": 27.005107879638672,
|
|
"fcm_dpo/q_t": 0.37393760681152344,
|
|
"grad_norm": 16.466289520263672,
|
|
"learning_rate": 1.8849371567184662e-07,
|
|
"logits/chosen": 0.6046679019927979,
|
|
"logits/rejected": 0.5413898229598999,
|
|
"logps/chosen": -105.70858764648438,
|
|
"logps/ref_chosen": -55.248085021972656,
|
|
"logps/ref_rejected": -68.96623229980469,
|
|
"logps/rejected": -146.43185424804688,
|
|
"loss": 1.0133,
|
|
"margin_dpo/margin_mean": 27.005109786987305,
|
|
"margin_dpo/margin_std": 36.81440734863281,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6243386243386243,
|
|
"fcm_dpo/beta": 0.023207662627100945,
|
|
"fcm_dpo/delta": 0.0680059865117073,
|
|
"fcm_dpo/margin": 23.077539443969727,
|
|
"fcm_dpo/q_t": 0.3903021216392517,
|
|
"grad_norm": 20.254179000854492,
|
|
"learning_rate": 1.872130032047302e-07,
|
|
"logits/chosen": 0.49796950817108154,
|
|
"logits/rejected": 0.46667468547821045,
|
|
"logps/chosen": -122.41281127929688,
|
|
"logps/ref_chosen": -68.72074890136719,
|
|
"logps/ref_rejected": -78.76539611816406,
|
|
"logps/rejected": -155.53500366210938,
|
|
"loss": 1.1366,
|
|
"margin_dpo/margin_mean": 23.077537536621094,
|
|
"margin_dpo/margin_std": 42.77297592163086,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6258503401360545,
|
|
"fcm_dpo/beta": 0.02289092168211937,
|
|
"fcm_dpo/delta": -0.10468479245901108,
|
|
"fcm_dpo/margin": 30.4606990814209,
|
|
"fcm_dpo/q_t": 0.35490280389785767,
|
|
"grad_norm": 15.836713790893555,
|
|
"learning_rate": 1.8593404702488436e-07,
|
|
"logits/chosen": 0.6493165493011475,
|
|
"logits/rejected": 0.5884075164794922,
|
|
"logps/chosen": -101.4513168334961,
|
|
"logps/ref_chosen": -54.138214111328125,
|
|
"logps/ref_rejected": -74.65741729736328,
|
|
"logps/rejected": -152.43121337890625,
|
|
"loss": 0.973,
|
|
"margin_dpo/margin_mean": 30.4606990814209,
|
|
"margin_dpo/margin_std": 38.70647048950195,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.022673513740301132,
|
|
"fcm_dpo/delta": -0.018438737839460373,
|
|
"fcm_dpo/margin": 27.21916961669922,
|
|
"fcm_dpo/q_t": 0.3717983365058899,
|
|
"grad_norm": 17.9824275970459,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 0.6430912017822266,
|
|
"logits/rejected": 0.6277749538421631,
|
|
"logps/chosen": -104.0219955444336,
|
|
"logps/ref_chosen": -55.91856002807617,
|
|
"logps/ref_rejected": -61.747703552246094,
|
|
"logps/rejected": -137.0703125,
|
|
"loss": 1.0492,
|
|
"margin_dpo/margin_mean": 27.21916961669922,
|
|
"margin_dpo/margin_std": 41.05143737792969,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6288737717309146,
|
|
"fcm_dpo/beta": 0.023443318903446198,
|
|
"fcm_dpo/delta": 0.08808435499668121,
|
|
"fcm_dpo/margin": 21.66620445251465,
|
|
"fcm_dpo/q_t": 0.3991958498954773,
|
|
"grad_norm": 20.130155563354492,
|
|
"learning_rate": 1.8338154657749128e-07,
|
|
"logits/chosen": 0.6014896631240845,
|
|
"logits/rejected": 0.5532357096672058,
|
|
"logps/chosen": -106.42463684082031,
|
|
"logps/ref_chosen": -54.72308349609375,
|
|
"logps/ref_rejected": -69.17388916015625,
|
|
"logps/rejected": -142.54165649414062,
|
|
"loss": 1.1462,
|
|
"margin_dpo/margin_mean": 21.66620445251465,
|
|
"margin_dpo/margin_std": 38.5516357421875,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6303854875283447,
|
|
"fcm_dpo/beta": 0.02283429354429245,
|
|
"fcm_dpo/delta": -0.12874314188957214,
|
|
"fcm_dpo/margin": 31.50384521484375,
|
|
"fcm_dpo/q_t": 0.34769943356513977,
|
|
"grad_norm": 16.118452072143555,
|
|
"learning_rate": 1.8210807370886849e-07,
|
|
"logits/chosen": 0.7364401817321777,
|
|
"logits/rejected": 0.6695940494537354,
|
|
"logps/chosen": -110.97312927246094,
|
|
"logps/ref_chosen": -56.791259765625,
|
|
"logps/ref_rejected": -68.7791748046875,
|
|
"logps/rejected": -154.4648895263672,
|
|
"loss": 0.9872,
|
|
"margin_dpo/margin_mean": 31.503847122192383,
|
|
"margin_dpo/margin_std": 41.09132385253906,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6318972033257747,
|
|
"fcm_dpo/beta": 0.022661913186311722,
|
|
"fcm_dpo/delta": 0.10957328975200653,
|
|
"fcm_dpo/margin": 21.837736129760742,
|
|
"fcm_dpo/q_t": 0.40230679512023926,
|
|
"grad_norm": 19.2397518157959,
|
|
"learning_rate": 1.8083649992336825e-07,
|
|
"logits/chosen": 0.668450117111206,
|
|
"logits/rejected": 0.6779706478118896,
|
|
"logps/chosen": -128.31222534179688,
|
|
"logps/ref_chosen": -69.10798645019531,
|
|
"logps/ref_rejected": -75.09132385253906,
|
|
"logps/rejected": -156.13330078125,
|
|
"loss": 1.1355,
|
|
"margin_dpo/margin_mean": 21.837738037109375,
|
|
"margin_dpo/margin_std": 39.48761749267578,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6334089191232048,
|
|
"fcm_dpo/beta": 0.022264618426561356,
|
|
"fcm_dpo/delta": -0.1917235106229782,
|
|
"fcm_dpo/margin": 34.83051300048828,
|
|
"fcm_dpo/q_t": 0.34440430998802185,
|
|
"grad_norm": 16.647216796875,
|
|
"learning_rate": 1.7956686078964255e-07,
|
|
"logits/chosen": 0.5454314947128296,
|
|
"logits/rejected": 0.4921835660934448,
|
|
"logps/chosen": -102.27273559570312,
|
|
"logps/ref_chosen": -58.1717643737793,
|
|
"logps/ref_rejected": -71.67066955566406,
|
|
"logps/rejected": -150.60215759277344,
|
|
"loss": 0.9429,
|
|
"margin_dpo/margin_mean": 34.83051300048828,
|
|
"margin_dpo/margin_std": 42.898826599121094,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.022696789354085922,
|
|
"fcm_dpo/delta": 0.21054378151893616,
|
|
"fcm_dpo/margin": 17.547386169433594,
|
|
"fcm_dpo/q_t": 0.42180436849594116,
|
|
"grad_norm": 18.340421676635742,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 0.6378327012062073,
|
|
"logits/rejected": 0.5907524228096008,
|
|
"logps/chosen": -117.64787292480469,
|
|
"logps/ref_chosen": -57.05351257324219,
|
|
"logps/ref_rejected": -62.670982360839844,
|
|
"logps/rejected": -140.81272888183594,
|
|
"loss": 1.2553,
|
|
"margin_dpo/margin_mean": 17.547386169433594,
|
|
"margin_dpo/margin_std": 44.069618225097656,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.636432350718065,
|
|
"fcm_dpo/beta": 0.02310902252793312,
|
|
"fcm_dpo/delta": 0.035230088979005814,
|
|
"fcm_dpo/margin": 24.51070213317871,
|
|
"fcm_dpo/q_t": 0.38860011100769043,
|
|
"grad_norm": 18.31899070739746,
|
|
"learning_rate": 1.7703352848054887e-07,
|
|
"logits/chosen": 0.6146277785301208,
|
|
"logits/rejected": 0.556212306022644,
|
|
"logps/chosen": -114.83110046386719,
|
|
"logps/ref_chosen": -57.32324981689453,
|
|
"logps/ref_rejected": -75.33782958984375,
|
|
"logps/rejected": -157.35638427734375,
|
|
"loss": 1.1604,
|
|
"margin_dpo/margin_mean": 24.510700225830078,
|
|
"margin_dpo/margin_std": 47.4954719543457,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6379440665154951,
|
|
"fcm_dpo/beta": 0.022881243377923965,
|
|
"fcm_dpo/delta": -0.12513823807239532,
|
|
"fcm_dpo/margin": 31.305706024169922,
|
|
"fcm_dpo/q_t": 0.3527275621891022,
|
|
"grad_norm": 19.757509231567383,
|
|
"learning_rate": 1.7576990616793137e-07,
|
|
"logits/chosen": 0.6293450593948364,
|
|
"logits/rejected": 0.602583110332489,
|
|
"logps/chosen": -115.69070434570312,
|
|
"logps/ref_chosen": -67.05757141113281,
|
|
"logps/ref_rejected": -72.12803649902344,
|
|
"logps/rejected": -152.06686401367188,
|
|
"loss": 0.9678,
|
|
"margin_dpo/margin_mean": 31.305706024169922,
|
|
"margin_dpo/margin_std": 39.57312774658203,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6394557823129252,
|
|
"fcm_dpo/beta": 0.02209470421075821,
|
|
"fcm_dpo/delta": -0.1590176820755005,
|
|
"fcm_dpo/margin": 33.78448486328125,
|
|
"fcm_dpo/q_t": 0.3498442769050598,
|
|
"grad_norm": 15.129724502563477,
|
|
"learning_rate": 1.745083602306071e-07,
|
|
"logits/chosen": 0.6799654364585876,
|
|
"logits/rejected": 0.613652765750885,
|
|
"logps/chosen": -105.32487487792969,
|
|
"logps/ref_chosen": -54.06167221069336,
|
|
"logps/ref_rejected": -76.64092254638672,
|
|
"logps/rejected": -161.68861389160156,
|
|
"loss": 0.955,
|
|
"margin_dpo/margin_mean": 33.78448486328125,
|
|
"margin_dpo/margin_std": 42.586509704589844,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6409674981103552,
|
|
"fcm_dpo/beta": 0.021351175382733345,
|
|
"fcm_dpo/delta": -0.14496149122714996,
|
|
"fcm_dpo/margin": 34.29045867919922,
|
|
"fcm_dpo/q_t": 0.350393682718277,
|
|
"grad_norm": 17.672733306884766,
|
|
"learning_rate": 1.7324892595672804e-07,
|
|
"logits/chosen": 0.5625635385513306,
|
|
"logits/rejected": 0.5283594131469727,
|
|
"logps/chosen": -107.4459228515625,
|
|
"logps/ref_chosen": -53.60887145996094,
|
|
"logps/ref_rejected": -79.2139892578125,
|
|
"logps/rejected": -167.34152221679688,
|
|
"loss": 0.9617,
|
|
"margin_dpo/margin_mean": 34.29045867919922,
|
|
"margin_dpo/margin_std": 42.79728698730469,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.02123691514134407,
|
|
"fcm_dpo/delta": -0.02055555209517479,
|
|
"fcm_dpo/margin": 29.152355194091797,
|
|
"fcm_dpo/q_t": 0.3736664652824402,
|
|
"grad_norm": 17.606735229492188,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 0.677306592464447,
|
|
"logits/rejected": 0.6497554779052734,
|
|
"logps/chosen": -110.3130111694336,
|
|
"logps/ref_chosen": -58.41468048095703,
|
|
"logps/ref_rejected": -66.59054565429688,
|
|
"logps/rejected": -147.6412353515625,
|
|
"loss": 1.0286,
|
|
"margin_dpo/margin_mean": 29.15235137939453,
|
|
"margin_dpo/margin_std": 41.597991943359375,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6439909297052154,
|
|
"fcm_dpo/beta": 0.02198956534266472,
|
|
"fcm_dpo/delta": 0.22994250059127808,
|
|
"fcm_dpo/margin": 17.23443031311035,
|
|
"fcm_dpo/q_t": 0.42192342877388,
|
|
"grad_norm": 20.545143127441406,
|
|
"learning_rate": 1.7073653325558828e-07,
|
|
"logits/chosen": 0.6122332811355591,
|
|
"logits/rejected": 0.6184878945350647,
|
|
"logps/chosen": -137.40863037109375,
|
|
"logps/ref_chosen": -71.70822143554688,
|
|
"logps/ref_rejected": -73.57725524902344,
|
|
"logps/rejected": -156.51211547851562,
|
|
"loss": 1.2701,
|
|
"margin_dpo/margin_mean": 17.234432220458984,
|
|
"margin_dpo/margin_std": 46.23017120361328,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6455026455026455,
|
|
"fcm_dpo/beta": 0.022307440638542175,
|
|
"fcm_dpo/delta": 0.015383723191916943,
|
|
"fcm_dpo/margin": 26.24768829345703,
|
|
"fcm_dpo/q_t": 0.38463151454925537,
|
|
"grad_norm": 19.993911743164062,
|
|
"learning_rate": 1.6948364510535218e-07,
|
|
"logits/chosen": 0.6968556642532349,
|
|
"logits/rejected": 0.6430996656417847,
|
|
"logps/chosen": -120.65845489501953,
|
|
"logps/ref_chosen": -58.64276885986328,
|
|
"logps/ref_rejected": -86.25437927246094,
|
|
"logps/rejected": -174.51776123046875,
|
|
"loss": 1.1157,
|
|
"margin_dpo/margin_mean": 26.24768829345703,
|
|
"margin_dpo/margin_std": 46.92239761352539,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6470143613000756,
|
|
"fcm_dpo/beta": 0.022043775767087936,
|
|
"fcm_dpo/delta": -0.12670910358428955,
|
|
"fcm_dpo/margin": 32.557342529296875,
|
|
"fcm_dpo/q_t": 0.3596833646297455,
|
|
"grad_norm": 16.594730377197266,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 0.5824407339096069,
|
|
"logits/rejected": 0.5360796451568604,
|
|
"logps/chosen": -123.86228942871094,
|
|
"logps/ref_chosen": -66.5960464477539,
|
|
"logps/ref_rejected": -82.3941650390625,
|
|
"logps/rejected": -172.21774291992188,
|
|
"loss": 1.0117,
|
|
"margin_dpo/margin_mean": 32.557342529296875,
|
|
"margin_dpo/margin_std": 46.57630157470703,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6485260770975056,
|
|
"fcm_dpo/beta": 0.022087689489126205,
|
|
"fcm_dpo/delta": 0.04340841621160507,
|
|
"fcm_dpo/margin": 25.24394989013672,
|
|
"fcm_dpo/q_t": 0.3880462646484375,
|
|
"grad_norm": 19.481197357177734,
|
|
"learning_rate": 1.669846604344412e-07,
|
|
"logits/chosen": 0.605602502822876,
|
|
"logits/rejected": 0.6182312965393066,
|
|
"logps/chosen": -118.7700424194336,
|
|
"logps/ref_chosen": -57.00970458984375,
|
|
"logps/ref_rejected": -59.86549377441406,
|
|
"logps/rejected": -146.86978149414062,
|
|
"loss": 1.1297,
|
|
"margin_dpo/margin_mean": 25.24394989013672,
|
|
"margin_dpo/margin_std": 45.29219436645508,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.021354306489229202,
|
|
"fcm_dpo/delta": -0.1940208375453949,
|
|
"fcm_dpo/margin": 36.47758483886719,
|
|
"fcm_dpo/q_t": 0.34010353684425354,
|
|
"grad_norm": 16.44814109802246,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 0.5480879545211792,
|
|
"logits/rejected": 0.5461628437042236,
|
|
"logps/chosen": -114.69402313232422,
|
|
"logps/ref_chosen": -59.563194274902344,
|
|
"logps/ref_rejected": -70.52289581298828,
|
|
"logps/rejected": -162.1313018798828,
|
|
"loss": 0.9438,
|
|
"margin_dpo/margin_mean": 36.47758483886719,
|
|
"margin_dpo/margin_std": 44.4223747253418,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6515495086923658,
|
|
"fcm_dpo/beta": 0.021211152896285057,
|
|
"fcm_dpo/delta": 0.0317416712641716,
|
|
"fcm_dpo/margin": 26.876333236694336,
|
|
"fcm_dpo/q_t": 0.38250017166137695,
|
|
"grad_norm": 16.88580322265625,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": 0.6321650743484497,
|
|
"logits/rejected": 0.58774334192276,
|
|
"logps/chosen": -100.53054809570312,
|
|
"logps/ref_chosen": -50.20032501220703,
|
|
"logps/ref_rejected": -77.81680297851562,
|
|
"logps/rejected": -155.0233612060547,
|
|
"loss": 1.0702,
|
|
"margin_dpo/margin_mean": 26.876333236694336,
|
|
"margin_dpo/margin_std": 42.4122314453125,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6530612244897959,
|
|
"fcm_dpo/beta": 0.021274283528327942,
|
|
"fcm_dpo/delta": 0.0018111169338226318,
|
|
"fcm_dpo/margin": 28.123023986816406,
|
|
"fcm_dpo/q_t": 0.3795931339263916,
|
|
"grad_norm": 16.432628631591797,
|
|
"learning_rate": 1.632536862810844e-07,
|
|
"logits/chosen": 0.6638392210006714,
|
|
"logits/rejected": 0.6173241138458252,
|
|
"logps/chosen": -115.13069152832031,
|
|
"logps/ref_chosen": -61.662757873535156,
|
|
"logps/ref_rejected": -83.94496154785156,
|
|
"logps/rejected": -165.53591918945312,
|
|
"loss": 1.0911,
|
|
"margin_dpo/margin_mean": 28.123023986816406,
|
|
"margin_dpo/margin_std": 47.277862548828125,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.654572940287226,
|
|
"fcm_dpo/beta": 0.02076822891831398,
|
|
"fcm_dpo/delta": -0.19751861691474915,
|
|
"fcm_dpo/margin": 37.677452087402344,
|
|
"fcm_dpo/q_t": 0.33977317810058594,
|
|
"grad_norm": 16.52134132385254,
|
|
"learning_rate": 1.6201483487445515e-07,
|
|
"logits/chosen": 0.7488293051719666,
|
|
"logits/rejected": 0.7424620389938354,
|
|
"logps/chosen": -116.32231140136719,
|
|
"logps/ref_chosen": -63.72917938232422,
|
|
"logps/ref_rejected": -65.8391342163086,
|
|
"logps/rejected": -156.10971069335938,
|
|
"loss": 0.9462,
|
|
"margin_dpo/margin_mean": 37.677452087402344,
|
|
"margin_dpo/margin_std": 45.816856384277344,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.656084656084656,
|
|
"fcm_dpo/beta": 0.019872212782502174,
|
|
"fcm_dpo/delta": -0.13274145126342773,
|
|
"fcm_dpo/margin": 36.2554817199707,
|
|
"fcm_dpo/q_t": 0.34939053654670715,
|
|
"grad_norm": 14.539586067199707,
|
|
"learning_rate": 1.6077844460203204e-07,
|
|
"logits/chosen": 0.7579059600830078,
|
|
"logits/rejected": 0.6943522691726685,
|
|
"logps/chosen": -94.58082580566406,
|
|
"logps/ref_chosen": -47.97331619262695,
|
|
"logps/ref_rejected": -72.51132202148438,
|
|
"logps/rejected": -155.37432861328125,
|
|
"loss": 1.0036,
|
|
"margin_dpo/margin_mean": 36.25547790527344,
|
|
"margin_dpo/margin_std": 49.25444030761719,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.019995521754026413,
|
|
"fcm_dpo/delta": -0.024610616266727448,
|
|
"fcm_dpo/margin": 31.11350440979004,
|
|
"fcm_dpo/q_t": 0.3718177378177643,
|
|
"grad_norm": 17.21233558654785,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 0.7389936447143555,
|
|
"logits/rejected": 0.7017002105712891,
|
|
"logps/chosen": -113.01530456542969,
|
|
"logps/ref_chosen": -57.06024932861328,
|
|
"logps/ref_rejected": -71.69146728515625,
|
|
"logps/rejected": -158.76004028320312,
|
|
"loss": 1.0395,
|
|
"margin_dpo/margin_mean": 31.11350440979004,
|
|
"margin_dpo/margin_std": 45.02946090698242,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6591080876795162,
|
|
"fcm_dpo/beta": 0.020023200660943985,
|
|
"fcm_dpo/delta": 0.0525226816534996,
|
|
"fcm_dpo/margin": 27.47412872314453,
|
|
"fcm_dpo/q_t": 0.3874613046646118,
|
|
"grad_norm": 15.634448051452637,
|
|
"learning_rate": 1.5831318572796847e-07,
|
|
"logits/chosen": 0.6600745320320129,
|
|
"logits/rejected": 0.6036931872367859,
|
|
"logps/chosen": -110.47145080566406,
|
|
"logps/ref_chosen": -56.158050537109375,
|
|
"logps/ref_rejected": -67.63787841796875,
|
|
"logps/rejected": -149.4254150390625,
|
|
"loss": 1.0997,
|
|
"margin_dpo/margin_mean": 27.47412872314453,
|
|
"margin_dpo/margin_std": 46.383445739746094,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6606198034769464,
|
|
"fcm_dpo/beta": 0.019956011325120926,
|
|
"fcm_dpo/delta": 0.05999675393104553,
|
|
"fcm_dpo/margin": 27.10342788696289,
|
|
"fcm_dpo/q_t": 0.38999560475349426,
|
|
"grad_norm": 17.91028594970703,
|
|
"learning_rate": 1.5708438608491815e-07,
|
|
"logits/chosen": 0.6346490383148193,
|
|
"logits/rejected": 0.5062617659568787,
|
|
"logps/chosen": -116.76597595214844,
|
|
"logps/ref_chosen": -56.98578643798828,
|
|
"logps/ref_rejected": -85.61524963378906,
|
|
"logps/rejected": -172.49887084960938,
|
|
"loss": 1.1374,
|
|
"margin_dpo/margin_mean": 27.10342788696289,
|
|
"margin_dpo/margin_std": 48.52922439575195,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6621315192743764,
|
|
"fcm_dpo/beta": 0.019836513325572014,
|
|
"fcm_dpo/delta": -0.17882977426052094,
|
|
"fcm_dpo/margin": 38.5899772644043,
|
|
"fcm_dpo/q_t": 0.34486445784568787,
|
|
"grad_norm": 16.361831665039062,
|
|
"learning_rate": 1.558581854913253e-07,
|
|
"logits/chosen": 0.7000746130943298,
|
|
"logits/rejected": 0.6368303894996643,
|
|
"logps/chosen": -92.46006774902344,
|
|
"logps/ref_chosen": -41.27777862548828,
|
|
"logps/ref_rejected": -65.33840942382812,
|
|
"logps/rejected": -155.1106719970703,
|
|
"loss": 0.9389,
|
|
"margin_dpo/margin_mean": 38.58998107910156,
|
|
"margin_dpo/margin_std": 46.060386657714844,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.6636432350718064,
|
|
"fcm_dpo/beta": 0.01960304006934166,
|
|
"fcm_dpo/delta": -0.03375595808029175,
|
|
"fcm_dpo/margin": 32.17954635620117,
|
|
"fcm_dpo/q_t": 0.3675764799118042,
|
|
"grad_norm": 15.4530611038208,
|
|
"learning_rate": 1.5463461824665658e-07,
|
|
"logits/chosen": 0.5799434781074524,
|
|
"logits/rejected": 0.5474724769592285,
|
|
"logps/chosen": -136.7943572998047,
|
|
"logps/ref_chosen": -81.41764831542969,
|
|
"logps/ref_rejected": -94.72309875488281,
|
|
"logps/rejected": -182.27935791015625,
|
|
"loss": 1.0031,
|
|
"margin_dpo/margin_mean": 32.17954635620117,
|
|
"margin_dpo/margin_std": 42.14781951904297,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.019320860505104065,
|
|
"fcm_dpo/delta": -0.08684191107749939,
|
|
"fcm_dpo/margin": 35.21943283081055,
|
|
"fcm_dpo/q_t": 0.36134228110313416,
|
|
"grad_norm": 21.23478889465332,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 0.6408597230911255,
|
|
"logits/rejected": 0.545317530632019,
|
|
"logps/chosen": -93.93386840820312,
|
|
"logps/ref_chosen": -42.538185119628906,
|
|
"logps/ref_rejected": -69.78813934326172,
|
|
"logps/rejected": -156.40325927734375,
|
|
"loss": 1.0121,
|
|
"margin_dpo/margin_mean": 35.21943283081055,
|
|
"margin_dpo/margin_std": 48.64961242675781,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.018760252743959427,
|
|
"fcm_dpo/delta": -0.07676707208156586,
|
|
"fcm_dpo/margin": 35.738895416259766,
|
|
"fcm_dpo/q_t": 0.3565472364425659,
|
|
"grad_norm": 14.383801460266113,
|
|
"learning_rate": 1.521955206326976e-07,
|
|
"logits/chosen": 0.6243678331375122,
|
|
"logits/rejected": 0.5356660485267639,
|
|
"logps/chosen": -106.02540588378906,
|
|
"logps/ref_chosen": -57.593223571777344,
|
|
"logps/ref_rejected": -84.82878875732422,
|
|
"logps/rejected": -168.99986267089844,
|
|
"loss": 0.9456,
|
|
"margin_dpo/margin_mean": 35.738895416259766,
|
|
"margin_dpo/margin_std": 39.57575988769531,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6681783824640968,
|
|
"fcm_dpo/beta": 0.01864861510694027,
|
|
"fcm_dpo/delta": -0.04125872999429703,
|
|
"fcm_dpo/margin": 34.23713684082031,
|
|
"fcm_dpo/q_t": 0.3659091889858246,
|
|
"grad_norm": 16.47283363342285,
|
|
"learning_rate": 1.5098005849021078e-07,
|
|
"logits/chosen": 0.6572163701057434,
|
|
"logits/rejected": 0.610891580581665,
|
|
"logps/chosen": -128.51307678222656,
|
|
"logps/ref_chosen": -67.46121978759766,
|
|
"logps/ref_rejected": -89.0693588256836,
|
|
"logps/rejected": -184.3583526611328,
|
|
"loss": 1.0119,
|
|
"margin_dpo/margin_mean": 34.23713684082031,
|
|
"margin_dpo/margin_std": 46.631752014160156,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6696900982615268,
|
|
"fcm_dpo/beta": 0.018264703452587128,
|
|
"fcm_dpo/delta": -0.15920081734657288,
|
|
"fcm_dpo/margin": 40.92947006225586,
|
|
"fcm_dpo/q_t": 0.3452005982398987,
|
|
"grad_norm": 16.483232498168945,
|
|
"learning_rate": 1.4976736614834662e-07,
|
|
"logits/chosen": 0.6689678430557251,
|
|
"logits/rejected": 0.5985565185546875,
|
|
"logps/chosen": -105.98631286621094,
|
|
"logps/ref_chosen": -54.79610061645508,
|
|
"logps/ref_rejected": -77.80781555175781,
|
|
"logps/rejected": -169.92750549316406,
|
|
"loss": 0.9651,
|
|
"margin_dpo/margin_mean": 40.929473876953125,
|
|
"margin_dpo/margin_std": 51.907249450683594,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.671201814058957,
|
|
"fcm_dpo/beta": 0.01889648288488388,
|
|
"fcm_dpo/delta": 0.3607651889324188,
|
|
"fcm_dpo/margin": 13.21998405456543,
|
|
"fcm_dpo/q_t": 0.4478858411312103,
|
|
"grad_norm": 19.747303009033203,
|
|
"learning_rate": 1.4855747752871654e-07,
|
|
"logits/chosen": 0.6540986895561218,
|
|
"logits/rejected": 0.5701756477355957,
|
|
"logps/chosen": -124.91129302978516,
|
|
"logps/ref_chosen": -58.749061584472656,
|
|
"logps/ref_rejected": -86.87396240234375,
|
|
"logps/rejected": -166.2561798095703,
|
|
"loss": 1.327,
|
|
"margin_dpo/margin_mean": 13.219983100891113,
|
|
"margin_dpo/margin_std": 44.31674575805664,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.019091159105300903,
|
|
"fcm_dpo/delta": -0.12670674920082092,
|
|
"fcm_dpo/margin": 37.597110748291016,
|
|
"fcm_dpo/q_t": 0.35341084003448486,
|
|
"grad_norm": 16.564191818237305,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 0.6639256477355957,
|
|
"logits/rejected": 0.6510766744613647,
|
|
"logps/chosen": -123.94331359863281,
|
|
"logps/ref_chosen": -60.91743850708008,
|
|
"logps/ref_rejected": -71.5637435913086,
|
|
"logps/rejected": -172.18673706054688,
|
|
"loss": 0.9766,
|
|
"margin_dpo/margin_mean": 37.597110748291016,
|
|
"margin_dpo/margin_std": 48.70800018310547,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.674225245653817,
|
|
"fcm_dpo/beta": 0.018637076020240784,
|
|
"fcm_dpo/delta": -0.20336636900901794,
|
|
"fcm_dpo/margin": 42.1799201965332,
|
|
"fcm_dpo/q_t": 0.33526235818862915,
|
|
"grad_norm": 13.191262245178223,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 0.7034658193588257,
|
|
"logits/rejected": 0.6283047199249268,
|
|
"logps/chosen": -103.102783203125,
|
|
"logps/ref_chosen": -48.79924774169922,
|
|
"logps/ref_rejected": -71.8719482421875,
|
|
"logps/rejected": -168.35540771484375,
|
|
"loss": 0.8875,
|
|
"margin_dpo/margin_mean": 42.17991638183594,
|
|
"margin_dpo/margin_std": 40.7318229675293,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6757369614512472,
|
|
"fcm_dpo/beta": 0.01757156476378441,
|
|
"fcm_dpo/delta": -0.19900262355804443,
|
|
"fcm_dpo/margin": 44.52255630493164,
|
|
"fcm_dpo/q_t": 0.33368557691574097,
|
|
"grad_norm": 17.807926177978516,
|
|
"learning_rate": 1.4494497203727843e-07,
|
|
"logits/chosen": 0.6144707202911377,
|
|
"logits/rejected": 0.5171197056770325,
|
|
"logps/chosen": -104.90130615234375,
|
|
"logps/ref_chosen": -53.682716369628906,
|
|
"logps/ref_rejected": -88.17315673828125,
|
|
"logps/rejected": -183.914306640625,
|
|
"loss": 0.9309,
|
|
"margin_dpo/margin_mean": 44.522560119628906,
|
|
"margin_dpo/margin_std": 51.547584533691406,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.6772486772486772,
|
|
"fcm_dpo/beta": 0.017467539757490158,
|
|
"fcm_dpo/delta": 0.0035621817223727703,
|
|
"fcm_dpo/margin": 34.157630920410156,
|
|
"fcm_dpo/q_t": 0.37248873710632324,
|
|
"grad_norm": 15.845419883728027,
|
|
"learning_rate": 1.4374663593999256e-07,
|
|
"logits/chosen": 0.6645723581314087,
|
|
"logits/rejected": 0.6151407957077026,
|
|
"logps/chosen": -111.9876708984375,
|
|
"logps/ref_chosen": -53.75125503540039,
|
|
"logps/ref_rejected": -77.17623901367188,
|
|
"logps/rejected": -169.57028198242188,
|
|
"loss": 1.0297,
|
|
"margin_dpo/margin_mean": 34.157630920410156,
|
|
"margin_dpo/margin_std": 47.70314025878906,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6787603930461074,
|
|
"fcm_dpo/beta": 0.018148906528949738,
|
|
"fcm_dpo/delta": 0.284239262342453,
|
|
"fcm_dpo/margin": 17.95632553100586,
|
|
"fcm_dpo/q_t": 0.43146008253097534,
|
|
"grad_norm": 23.75647735595703,
|
|
"learning_rate": 1.4255127197770707e-07,
|
|
"logits/chosen": 0.5378659963607788,
|
|
"logits/rejected": 0.528540849685669,
|
|
"logps/chosen": -145.61447143554688,
|
|
"logps/ref_chosen": -75.82737731933594,
|
|
"logps/ref_rejected": -82.20687866210938,
|
|
"logps/rejected": -169.95028686523438,
|
|
"loss": 1.213,
|
|
"margin_dpo/margin_mean": 17.95632553100586,
|
|
"margin_dpo/margin_std": 40.200843811035156,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.018794924020767212,
|
|
"fcm_dpo/delta": 0.1283785104751587,
|
|
"fcm_dpo/margin": 25.42965316772461,
|
|
"fcm_dpo/q_t": 0.4035590589046478,
|
|
"grad_norm": 18.1055850982666,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 0.754492461681366,
|
|
"logits/rejected": 0.6441506147384644,
|
|
"logps/chosen": -103.79361724853516,
|
|
"logps/ref_chosen": -47.11572265625,
|
|
"logps/ref_rejected": -78.7546615600586,
|
|
"logps/rejected": -160.86221313476562,
|
|
"loss": 1.1364,
|
|
"margin_dpo/margin_mean": 25.42965316772461,
|
|
"margin_dpo/margin_std": 46.92584991455078,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6817838246409675,
|
|
"fcm_dpo/beta": 0.01930905692279339,
|
|
"fcm_dpo/delta": 0.09509407728910446,
|
|
"fcm_dpo/margin": 26.37521743774414,
|
|
"fcm_dpo/q_t": 0.3959817886352539,
|
|
"grad_norm": 18.775625228881836,
|
|
"learning_rate": 1.4016959412166437e-07,
|
|
"logits/chosen": 0.6403440237045288,
|
|
"logits/rejected": 0.5913487672805786,
|
|
"logps/chosen": -120.57457733154297,
|
|
"logps/ref_chosen": -63.350440979003906,
|
|
"logps/ref_rejected": -76.28530883789062,
|
|
"logps/rejected": -159.88465881347656,
|
|
"loss": 1.1225,
|
|
"margin_dpo/margin_mean": 26.37521743774414,
|
|
"margin_dpo/margin_std": 47.211151123046875,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6832955404383976,
|
|
"fcm_dpo/beta": 0.019437428563833237,
|
|
"fcm_dpo/delta": 0.025507230311632156,
|
|
"fcm_dpo/margin": 29.629127502441406,
|
|
"fcm_dpo/q_t": 0.37918704748153687,
|
|
"grad_norm": 17.062150955200195,
|
|
"learning_rate": 1.3898334684855645e-07,
|
|
"logits/chosen": 0.6154012680053711,
|
|
"logits/rejected": 0.5401051044464111,
|
|
"logps/chosen": -112.09454345703125,
|
|
"logps/ref_chosen": -55.58583450317383,
|
|
"logps/ref_rejected": -77.68738555908203,
|
|
"logps/rejected": -163.82521057128906,
|
|
"loss": 1.1002,
|
|
"margin_dpo/margin_mean": 29.629127502441406,
|
|
"margin_dpo/margin_std": 49.8187255859375,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6848072562358276,
|
|
"fcm_dpo/beta": 0.01923062652349472,
|
|
"fcm_dpo/delta": -0.060018863528966904,
|
|
"fcm_dpo/margin": 34.10637664794922,
|
|
"fcm_dpo/q_t": 0.37033775448799133,
|
|
"grad_norm": 20.816280364990234,
|
|
"learning_rate": 1.3780020494988445e-07,
|
|
"logits/chosen": 0.616425633430481,
|
|
"logits/rejected": 0.5868883728981018,
|
|
"logps/chosen": -115.68905639648438,
|
|
"logps/ref_chosen": -61.778202056884766,
|
|
"logps/ref_rejected": -71.51403045654297,
|
|
"logps/rejected": -159.53126525878906,
|
|
"loss": 1.047,
|
|
"margin_dpo/margin_mean": 34.10637664794922,
|
|
"margin_dpo/margin_std": 51.63662338256836,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6863189720332578,
|
|
"fcm_dpo/beta": 0.019015073776245117,
|
|
"fcm_dpo/delta": -0.06713598966598511,
|
|
"fcm_dpo/margin": 34.84968566894531,
|
|
"fcm_dpo/q_t": 0.3608110845088959,
|
|
"grad_norm": 14.855917930603027,
|
|
"learning_rate": 1.366202015206706e-07,
|
|
"logits/chosen": 0.6720168590545654,
|
|
"logits/rejected": 0.631004810333252,
|
|
"logps/chosen": -101.51078796386719,
|
|
"logps/ref_chosen": -51.59515380859375,
|
|
"logps/ref_rejected": -63.96732711791992,
|
|
"logps/rejected": -148.73263549804688,
|
|
"loss": 1.0185,
|
|
"margin_dpo/margin_mean": 34.84968566894531,
|
|
"margin_dpo/margin_std": 49.115013122558594,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.01869376376271248,
|
|
"fcm_dpo/delta": -0.07124396413564682,
|
|
"fcm_dpo/margin": 35.63090896606445,
|
|
"fcm_dpo/q_t": 0.3637927770614624,
|
|
"grad_norm": 16.458694458007812,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 0.5351288318634033,
|
|
"logits/rejected": 0.5000825524330139,
|
|
"logps/chosen": -129.70745849609375,
|
|
"logps/ref_chosen": -70.65170288085938,
|
|
"logps/ref_rejected": -77.44276428222656,
|
|
"logps/rejected": -172.12942504882812,
|
|
"loss": 0.9883,
|
|
"margin_dpo/margin_mean": 35.63090515136719,
|
|
"margin_dpo/margin_std": 46.63299560546875,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6893424036281179,
|
|
"fcm_dpo/beta": 0.018715500831604004,
|
|
"fcm_dpo/delta": 0.02745303139090538,
|
|
"fcm_dpo/margin": 30.67329978942871,
|
|
"fcm_dpo/q_t": 0.38108596205711365,
|
|
"grad_norm": 19.61562156677246,
|
|
"learning_rate": 1.3426974201083439e-07,
|
|
"logits/chosen": 0.5628154277801514,
|
|
"logits/rejected": 0.5071948170661926,
|
|
"logps/chosen": -114.44416046142578,
|
|
"logps/ref_chosen": -56.398284912109375,
|
|
"logps/ref_rejected": -82.61642456054688,
|
|
"logps/rejected": -171.33560180664062,
|
|
"loss": 1.0582,
|
|
"margin_dpo/margin_mean": 30.673297882080078,
|
|
"margin_dpo/margin_std": 46.49111557006836,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.690854119425548,
|
|
"fcm_dpo/beta": 0.018704025074839592,
|
|
"fcm_dpo/delta": 0.019063390791416168,
|
|
"fcm_dpo/margin": 31.069124221801758,
|
|
"fcm_dpo/q_t": 0.3758009970188141,
|
|
"grad_norm": 14.574370384216309,
|
|
"learning_rate": 1.3309935167761717e-07,
|
|
"logits/chosen": 0.7333712577819824,
|
|
"logits/rejected": 0.6618935465812683,
|
|
"logps/chosen": -103.5023193359375,
|
|
"logps/ref_chosen": -44.72057342529297,
|
|
"logps/ref_rejected": -68.1158676147461,
|
|
"logps/rejected": -157.96673583984375,
|
|
"loss": 1.0142,
|
|
"margin_dpo/margin_mean": 31.069124221801758,
|
|
"margin_dpo/margin_std": 39.806251525878906,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6923658352229781,
|
|
"fcm_dpo/beta": 0.018755577504634857,
|
|
"fcm_dpo/delta": -0.030353626236319542,
|
|
"fcm_dpo/margin": 33.504337310791016,
|
|
"fcm_dpo/q_t": 0.3689650893211365,
|
|
"grad_norm": 14.937610626220703,
|
|
"learning_rate": 1.3193223130682936e-07,
|
|
"logits/chosen": 0.6596442461013794,
|
|
"logits/rejected": 0.5467248558998108,
|
|
"logps/chosen": -103.88152313232422,
|
|
"logps/ref_chosen": -50.00569152832031,
|
|
"logps/ref_rejected": -87.50015258789062,
|
|
"logps/rejected": -174.88031005859375,
|
|
"loss": 1.0325,
|
|
"margin_dpo/margin_mean": 33.504337310791016,
|
|
"margin_dpo/margin_std": 48.71820831298828,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6938775510204082,
|
|
"fcm_dpo/beta": 0.018601369112730026,
|
|
"fcm_dpo/delta": -0.1576491743326187,
|
|
"fcm_dpo/margin": 39.97838592529297,
|
|
"fcm_dpo/q_t": 0.34621572494506836,
|
|
"grad_norm": 17.257484436035156,
|
|
"learning_rate": 1.3076841354533658e-07,
|
|
"logits/chosen": 0.6838634014129639,
|
|
"logits/rejected": 0.6482840180397034,
|
|
"logps/chosen": -117.18104553222656,
|
|
"logps/ref_chosen": -65.37794494628906,
|
|
"logps/ref_rejected": -88.19244384765625,
|
|
"logps/rejected": -179.9739227294922,
|
|
"loss": 0.9548,
|
|
"margin_dpo/margin_mean": 39.97838592529297,
|
|
"margin_dpo/margin_std": 46.098365783691406,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.01769839972257614,
|
|
"fcm_dpo/delta": -0.13911324739456177,
|
|
"fcm_dpo/margin": 41.11860656738281,
|
|
"fcm_dpo/q_t": 0.3479554057121277,
|
|
"grad_norm": 14.071867942810059,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 0.7055972218513489,
|
|
"logits/rejected": 0.5804663896560669,
|
|
"logps/chosen": -123.29414367675781,
|
|
"logps/ref_chosen": -64.5616683959961,
|
|
"logps/ref_rejected": -88.67890167236328,
|
|
"logps/rejected": -188.5299835205078,
|
|
"loss": 0.9342,
|
|
"margin_dpo/margin_mean": 41.11861038208008,
|
|
"margin_dpo/margin_std": 46.839012145996094,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6969009826152683,
|
|
"fcm_dpo/beta": 0.01716051995754242,
|
|
"fcm_dpo/delta": -0.11889907717704773,
|
|
"fcm_dpo/margin": 41.18246078491211,
|
|
"fcm_dpo/q_t": 0.3509414792060852,
|
|
"grad_norm": 14.316862106323242,
|
|
"learning_rate": 1.2845081597488286e-07,
|
|
"logits/chosen": 0.7764365673065186,
|
|
"logits/rejected": 0.687043309211731,
|
|
"logps/chosen": -96.71049499511719,
|
|
"logps/ref_chosen": -49.4779167175293,
|
|
"logps/ref_rejected": -72.65262603759766,
|
|
"logps/rejected": -161.06765747070312,
|
|
"loss": 0.9482,
|
|
"margin_dpo/margin_mean": 41.18246078491211,
|
|
"margin_dpo/margin_std": 46.42418670654297,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"fcm_dpo/beta": 0.016697831451892853,
|
|
"fcm_dpo/delta": -0.17689791321754456,
|
|
"fcm_dpo/margin": 45.61006164550781,
|
|
"fcm_dpo/q_t": 0.33651816844940186,
|
|
"grad_norm": 13.249038696289062,
|
|
"learning_rate": 1.27297100994108e-07,
|
|
"logits/chosen": 0.6552219390869141,
|
|
"logits/rejected": 0.6001119613647461,
|
|
"logps/chosen": -114.67098999023438,
|
|
"logps/ref_chosen": -60.4951171875,
|
|
"logps/ref_rejected": -74.82136535644531,
|
|
"logps/rejected": -174.60731506347656,
|
|
"loss": 0.9112,
|
|
"margin_dpo/margin_mean": 45.61006164550781,
|
|
"margin_dpo/margin_std": 48.99812316894531,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6999244142101285,
|
|
"fcm_dpo/beta": 0.016872413456439972,
|
|
"fcm_dpo/delta": 0.11362668126821518,
|
|
"fcm_dpo/margin": 29.165672302246094,
|
|
"fcm_dpo/q_t": 0.39225536584854126,
|
|
"grad_norm": 17.52629852294922,
|
|
"learning_rate": 1.2614681827718695e-07,
|
|
"logits/chosen": 0.6610893607139587,
|
|
"logits/rejected": 0.6551651954650879,
|
|
"logps/chosen": -129.19302368164062,
|
|
"logps/ref_chosen": -67.68511962890625,
|
|
"logps/ref_rejected": -71.32196044921875,
|
|
"logps/rejected": -161.9955291748047,
|
|
"loss": 1.0752,
|
|
"margin_dpo/margin_mean": 29.165672302246094,
|
|
"margin_dpo/margin_std": 42.47758483886719,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.7014361300075586,
|
|
"fcm_dpo/beta": 0.01695796474814415,
|
|
"fcm_dpo/delta": -0.06330247223377228,
|
|
"fcm_dpo/margin": 38.858402252197266,
|
|
"fcm_dpo/q_t": 0.36157849431037903,
|
|
"grad_norm": 16.41646385192871,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 0.6483955979347229,
|
|
"logits/rejected": 0.6234833598136902,
|
|
"logps/chosen": -118.78719329833984,
|
|
"logps/ref_chosen": -59.16564178466797,
|
|
"logps/ref_rejected": -69.56146240234375,
|
|
"logps/rejected": -168.04141235351562,
|
|
"loss": 1.0087,
|
|
"margin_dpo/margin_mean": 38.8583984375,
|
|
"margin_dpo/margin_std": 51.726806640625,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.016867714002728462,
|
|
"fcm_dpo/delta": 0.03191475197672844,
|
|
"fcm_dpo/margin": 33.78591537475586,
|
|
"fcm_dpo/q_t": 0.382260799407959,
|
|
"grad_norm": 19.196998596191406,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 0.7347100973129272,
|
|
"logits/rejected": 0.6733226776123047,
|
|
"logps/chosen": -122.17825317382812,
|
|
"logps/ref_chosen": -58.513671875,
|
|
"logps/ref_rejected": -84.31745910644531,
|
|
"logps/rejected": -181.76795959472656,
|
|
"loss": 1.0671,
|
|
"margin_dpo/margin_mean": 33.785911560058594,
|
|
"margin_dpo/margin_std": 52.060218811035156,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7044595616024187,
|
|
"fcm_dpo/beta": 0.017417848110198975,
|
|
"fcm_dpo/delta": 0.21429502964019775,
|
|
"fcm_dpo/margin": 22.65721321105957,
|
|
"fcm_dpo/q_t": 0.4179609715938568,
|
|
"grad_norm": 22.925125122070312,
|
|
"learning_rate": 1.2271688498291334e-07,
|
|
"logits/chosen": 0.6901623010635376,
|
|
"logits/rejected": 0.6949342489242554,
|
|
"logps/chosen": -140.54638671875,
|
|
"logps/ref_chosen": -73.26580810546875,
|
|
"logps/ref_rejected": -74.83621215820312,
|
|
"logps/rejected": -164.77401733398438,
|
|
"loss": 1.1498,
|
|
"margin_dpo/margin_mean": 22.657211303710938,
|
|
"margin_dpo/margin_std": 41.78242492675781,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.7059712773998488,
|
|
"fcm_dpo/beta": 0.01785450614988804,
|
|
"fcm_dpo/delta": 0.07046917825937271,
|
|
"fcm_dpo/margin": 29.86865997314453,
|
|
"fcm_dpo/q_t": 0.3858645260334015,
|
|
"grad_norm": 15.465002059936523,
|
|
"learning_rate": 1.2158065210664848e-07,
|
|
"logits/chosen": 0.7200223803520203,
|
|
"logits/rejected": 0.5697600841522217,
|
|
"logps/chosen": -109.60458374023438,
|
|
"logps/ref_chosen": -47.57947540283203,
|
|
"logps/ref_rejected": -78.68522644042969,
|
|
"logps/rejected": -170.57899475097656,
|
|
"loss": 1.0483,
|
|
"margin_dpo/margin_mean": 29.86865997314453,
|
|
"margin_dpo/margin_std": 42.51439666748047,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.7074829931972789,
|
|
"fcm_dpo/beta": 0.017471440136432648,
|
|
"fcm_dpo/delta": -0.19157786667346954,
|
|
"fcm_dpo/margin": 44.475189208984375,
|
|
"fcm_dpo/q_t": 0.3382449746131897,
|
|
"grad_norm": 16.933467864990234,
|
|
"learning_rate": 1.204480113956011e-07,
|
|
"logits/chosen": 0.6419360637664795,
|
|
"logits/rejected": 0.6371017694473267,
|
|
"logps/chosen": -118.92312622070312,
|
|
"logps/ref_chosen": -63.92778778076172,
|
|
"logps/ref_rejected": -76.51626586914062,
|
|
"logps/rejected": -175.98680114746094,
|
|
"loss": 0.9168,
|
|
"margin_dpo/margin_mean": 44.47518539428711,
|
|
"margin_dpo/margin_std": 50.01071548461914,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.708994708994709,
|
|
"fcm_dpo/beta": 0.0168905146420002,
|
|
"fcm_dpo/delta": -0.07182085514068604,
|
|
"fcm_dpo/margin": 39.333194732666016,
|
|
"fcm_dpo/q_t": 0.35752636194229126,
|
|
"grad_norm": 16.476343154907227,
|
|
"learning_rate": 1.1931899453216697e-07,
|
|
"logits/chosen": 0.7270421981811523,
|
|
"logits/rejected": 0.716751217842102,
|
|
"logps/chosen": -116.4421157836914,
|
|
"logps/ref_chosen": -59.05818176269531,
|
|
"logps/ref_rejected": -75.67672729492188,
|
|
"logps/rejected": -172.39385986328125,
|
|
"loss": 0.955,
|
|
"margin_dpo/margin_mean": 39.333194732666016,
|
|
"margin_dpo/margin_std": 43.63664245605469,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.01703723520040512,
|
|
"fcm_dpo/delta": -0.01729283481836319,
|
|
"fcm_dpo/margin": 36.148399353027344,
|
|
"fcm_dpo/q_t": 0.3683815896511078,
|
|
"grad_norm": 14.40355110168457,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 0.6840169429779053,
|
|
"logits/rejected": 0.6180592179298401,
|
|
"logps/chosen": -105.3543701171875,
|
|
"logps/ref_chosen": -47.86743927001953,
|
|
"logps/ref_rejected": -65.96859741210938,
|
|
"logps/rejected": -159.60391235351562,
|
|
"loss": 1.0213,
|
|
"margin_dpo/margin_mean": 36.148399353027344,
|
|
"margin_dpo/margin_std": 49.47236633300781,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7120181405895691,
|
|
"fcm_dpo/beta": 0.01653927005827427,
|
|
"fcm_dpo/delta": -0.16447000205516815,
|
|
"fcm_dpo/margin": 45.47620391845703,
|
|
"fcm_dpo/q_t": 0.3416385054588318,
|
|
"grad_norm": 14.304550170898438,
|
|
"learning_rate": 1.1707195857000215e-07,
|
|
"logits/chosen": 0.6405422687530518,
|
|
"logits/rejected": 0.5829768180847168,
|
|
"logps/chosen": -109.19689178466797,
|
|
"logps/ref_chosen": -57.777854919433594,
|
|
"logps/ref_rejected": -73.81172180175781,
|
|
"logps/rejected": -170.70697021484375,
|
|
"loss": 0.9287,
|
|
"margin_dpo/margin_mean": 45.47620391845703,
|
|
"margin_dpo/margin_std": 51.99640655517578,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.7135298563869993,
|
|
"fcm_dpo/beta": 0.01654931902885437,
|
|
"fcm_dpo/delta": 0.019277174025774002,
|
|
"fcm_dpo/margin": 35.099578857421875,
|
|
"fcm_dpo/q_t": 0.3808854818344116,
|
|
"grad_norm": 17.019304275512695,
|
|
"learning_rate": 1.1595400232569768e-07,
|
|
"logits/chosen": 0.6810761094093323,
|
|
"logits/rejected": 0.634406328201294,
|
|
"logps/chosen": -110.3800048828125,
|
|
"logps/ref_chosen": -55.908668518066406,
|
|
"logps/ref_rejected": -74.70294189453125,
|
|
"logps/rejected": -164.27386474609375,
|
|
"loss": 1.0809,
|
|
"margin_dpo/margin_mean": 35.099578857421875,
|
|
"margin_dpo/margin_std": 56.11036682128906,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.7150415721844293,
|
|
"fcm_dpo/beta": 0.016645582392811775,
|
|
"fcm_dpo/delta": 0.07657499611377716,
|
|
"fcm_dpo/margin": 31.687740325927734,
|
|
"fcm_dpo/q_t": 0.3922686278820038,
|
|
"grad_norm": 19.925630569458008,
|
|
"learning_rate": 1.1483979563610069e-07,
|
|
"logits/chosen": 0.7695510387420654,
|
|
"logits/rejected": 0.6590477228164673,
|
|
"logps/chosen": -109.32481384277344,
|
|
"logps/ref_chosen": -54.16088104248047,
|
|
"logps/ref_rejected": -92.76789855957031,
|
|
"logps/rejected": -179.61956787109375,
|
|
"loss": 1.1266,
|
|
"margin_dpo/margin_mean": 31.687740325927734,
|
|
"margin_dpo/margin_std": 57.10271453857422,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.7165532879818595,
|
|
"fcm_dpo/beta": 0.01691177673637867,
|
|
"fcm_dpo/delta": 0.08062369376420975,
|
|
"fcm_dpo/margin": 30.960586547851562,
|
|
"fcm_dpo/q_t": 0.3930462598800659,
|
|
"grad_norm": 21.025999069213867,
|
|
"learning_rate": 1.1372936966796709e-07,
|
|
"logits/chosen": 0.7726494669914246,
|
|
"logits/rejected": 0.6976436376571655,
|
|
"logps/chosen": -108.3525390625,
|
|
"logps/ref_chosen": -46.685707092285156,
|
|
"logps/ref_rejected": -71.44731903076172,
|
|
"logps/rejected": -164.07473754882812,
|
|
"loss": 1.1183,
|
|
"margin_dpo/margin_mean": 30.960586547851562,
|
|
"margin_dpo/margin_std": 54.630615234375,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.01635439321398735,
|
|
"fcm_dpo/delta": -0.20893925428390503,
|
|
"fcm_dpo/margin": 48.320587158203125,
|
|
"fcm_dpo/q_t": 0.3328055143356323,
|
|
"grad_norm": 12.721051216125488,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 0.6676898002624512,
|
|
"logits/rejected": 0.6170685887336731,
|
|
"logps/chosen": -115.66683959960938,
|
|
"logps/ref_chosen": -58.4873046875,
|
|
"logps/ref_rejected": -87.00187683105469,
|
|
"logps/rejected": -192.5019989013672,
|
|
"loss": 0.8783,
|
|
"margin_dpo/margin_mean": 48.320594787597656,
|
|
"margin_dpo/margin_std": 48.11223602294922,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7195767195767195,
|
|
"fcm_dpo/beta": 0.01653391122817993,
|
|
"fcm_dpo/delta": 0.06583425402641296,
|
|
"fcm_dpo/margin": 32.468589782714844,
|
|
"fcm_dpo/q_t": 0.3886245787143707,
|
|
"grad_norm": 16.372636795043945,
|
|
"learning_rate": 1.1151998403347243e-07,
|
|
"logits/chosen": 0.6212706565856934,
|
|
"logits/rejected": 0.6143991947174072,
|
|
"logps/chosen": -143.81753540039062,
|
|
"logps/ref_chosen": -75.38162231445312,
|
|
"logps/ref_rejected": -76.99822235107422,
|
|
"logps/rejected": -177.90272521972656,
|
|
"loss": 1.0843,
|
|
"margin_dpo/margin_mean": 32.468589782714844,
|
|
"margin_dpo/margin_std": 51.891624450683594,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7210884353741497,
|
|
"fcm_dpo/beta": 0.016756640747189522,
|
|
"fcm_dpo/delta": 0.10726511478424072,
|
|
"fcm_dpo/margin": 29.727947235107422,
|
|
"fcm_dpo/q_t": 0.3972741961479187,
|
|
"grad_norm": 19.15401840209961,
|
|
"learning_rate": 1.1042108616837692e-07,
|
|
"logits/chosen": 0.6817283630371094,
|
|
"logits/rejected": 0.636535108089447,
|
|
"logps/chosen": -133.14248657226562,
|
|
"logps/ref_chosen": -61.073387145996094,
|
|
"logps/ref_rejected": -81.34375,
|
|
"logps/rejected": -183.14080810546875,
|
|
"loss": 1.178,
|
|
"margin_dpo/margin_mean": 29.727947235107422,
|
|
"margin_dpo/margin_std": 60.472991943359375,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7226001511715797,
|
|
"fcm_dpo/beta": 0.017096243798732758,
|
|
"fcm_dpo/delta": 0.0992036759853363,
|
|
"fcm_dpo/margin": 29.590330123901367,
|
|
"fcm_dpo/q_t": 0.3972923159599304,
|
|
"grad_norm": 19.976335525512695,
|
|
"learning_rate": 1.0932609262554746e-07,
|
|
"logits/chosen": 0.6263631582260132,
|
|
"logits/rejected": 0.6319549679756165,
|
|
"logps/chosen": -118.4478530883789,
|
|
"logps/ref_chosen": -57.16731643676758,
|
|
"logps/ref_rejected": -53.30917739868164,
|
|
"logps/rejected": -144.1800537109375,
|
|
"loss": 1.1515,
|
|
"margin_dpo/margin_mean": 29.59033203125,
|
|
"margin_dpo/margin_std": 56.895023345947266,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7241118669690099,
|
|
"fcm_dpo/beta": 0.017538445070385933,
|
|
"fcm_dpo/delta": 0.16506525874137878,
|
|
"fcm_dpo/margin": 25.232295989990234,
|
|
"fcm_dpo/q_t": 0.4069156050682068,
|
|
"grad_norm": 20.780662536621094,
|
|
"learning_rate": 1.0823503403430734e-07,
|
|
"logits/chosen": 0.610386848449707,
|
|
"logits/rejected": 0.5595101118087769,
|
|
"logps/chosen": -124.1976547241211,
|
|
"logps/ref_chosen": -58.91331481933594,
|
|
"logps/ref_rejected": -63.7403450012207,
|
|
"logps/rejected": -154.25698852539062,
|
|
"loss": 1.1754,
|
|
"margin_dpo/margin_mean": 25.2322940826416,
|
|
"margin_dpo/margin_std": 51.18651580810547,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.017718058079481125,
|
|
"fcm_dpo/delta": -0.08875527232885361,
|
|
"fcm_dpo/margin": 38.47267150878906,
|
|
"fcm_dpo/q_t": 0.357020765542984,
|
|
"grad_norm": 20.216873168945312,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 0.6035921573638916,
|
|
"logits/rejected": 0.5966066122055054,
|
|
"logps/chosen": -125.77318572998047,
|
|
"logps/ref_chosen": -62.80061340332031,
|
|
"logps/ref_rejected": -67.58859252929688,
|
|
"logps/rejected": -169.03384399414062,
|
|
"loss": 1.0383,
|
|
"margin_dpo/margin_mean": 38.47267532348633,
|
|
"margin_dpo/margin_std": 55.48522186279297,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.72713529856387,
|
|
"fcm_dpo/beta": 0.017298948019742966,
|
|
"fcm_dpo/delta": -0.05451624467968941,
|
|
"fcm_dpo/margin": 37.62444305419922,
|
|
"fcm_dpo/q_t": 0.3680918216705322,
|
|
"grad_norm": 17.83654022216797,
|
|
"learning_rate": 1.0606484367268906e-07,
|
|
"logits/chosen": 0.6130036115646362,
|
|
"logits/rejected": 0.6114796996116638,
|
|
"logps/chosen": -126.92695617675781,
|
|
"logps/ref_chosen": -65.28649139404297,
|
|
"logps/ref_rejected": -70.78668212890625,
|
|
"logps/rejected": -170.0515899658203,
|
|
"loss": 1.0328,
|
|
"margin_dpo/margin_mean": 37.62444305419922,
|
|
"margin_dpo/margin_std": 55.061363220214844,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7286470143613001,
|
|
"fcm_dpo/beta": 0.017456453293561935,
|
|
"fcm_dpo/delta": 0.03557737171649933,
|
|
"fcm_dpo/margin": 32.40789794921875,
|
|
"fcm_dpo/q_t": 0.3861989974975586,
|
|
"grad_norm": 21.065109252929688,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 0.6204051375389099,
|
|
"logits/rejected": 0.47864723205566406,
|
|
"logps/chosen": -131.6238555908203,
|
|
"logps/ref_chosen": -60.906185150146484,
|
|
"logps/ref_rejected": -103.44656372070312,
|
|
"logps/rejected": -206.57212829589844,
|
|
"loss": 1.1402,
|
|
"margin_dpo/margin_mean": 32.40789794921875,
|
|
"margin_dpo/margin_std": 60.17729949951172,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"fcm_dpo/beta": 0.01712999865412712,
|
|
"fcm_dpo/delta": -0.11436626315116882,
|
|
"fcm_dpo/margin": 41.23766326904297,
|
|
"fcm_dpo/q_t": 0.3495997190475464,
|
|
"grad_norm": 16.394908905029297,
|
|
"learning_rate": 1.0391075790138232e-07,
|
|
"logits/chosen": 0.7566468715667725,
|
|
"logits/rejected": 0.6558492183685303,
|
|
"logps/chosen": -112.64329528808594,
|
|
"logps/ref_chosen": -53.192012786865234,
|
|
"logps/ref_rejected": -81.83927154541016,
|
|
"logps/rejected": -182.52822875976562,
|
|
"loss": 0.9599,
|
|
"margin_dpo/margin_mean": 41.23766326904297,
|
|
"margin_dpo/margin_std": 50.17535400390625,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.7316704459561603,
|
|
"fcm_dpo/beta": 0.017164533957839012,
|
|
"fcm_dpo/delta": 0.027778685092926025,
|
|
"fcm_dpo/margin": 33.390480041503906,
|
|
"fcm_dpo/q_t": 0.3766096830368042,
|
|
"grad_norm": 18.586793899536133,
|
|
"learning_rate": 1.0283982962570681e-07,
|
|
"logits/chosen": 0.7305570840835571,
|
|
"logits/rejected": 0.6929386854171753,
|
|
"logps/chosen": -117.24603271484375,
|
|
"logps/ref_chosen": -57.76945877075195,
|
|
"logps/ref_rejected": -71.6829833984375,
|
|
"logps/rejected": -164.550048828125,
|
|
"loss": 1.0136,
|
|
"margin_dpo/margin_mean": 33.390480041503906,
|
|
"margin_dpo/margin_std": 42.75909423828125,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.016995690762996674,
|
|
"fcm_dpo/delta": 0.030734747648239136,
|
|
"fcm_dpo/margin": 33.485816955566406,
|
|
"fcm_dpo/q_t": 0.38060659170150757,
|
|
"grad_norm": 16.272403717041016,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 0.6844367980957031,
|
|
"logits/rejected": 0.6609545946121216,
|
|
"logps/chosen": -118.37860107421875,
|
|
"logps/ref_chosen": -56.63584899902344,
|
|
"logps/ref_rejected": -70.85614013671875,
|
|
"logps/rejected": -166.08470153808594,
|
|
"loss": 1.0547,
|
|
"margin_dpo/margin_mean": 33.485816955566406,
|
|
"margin_dpo/margin_std": 48.70667266845703,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7346938775510204,
|
|
"fcm_dpo/beta": 0.017520343884825706,
|
|
"fcm_dpo/delta": 0.11381173133850098,
|
|
"fcm_dpo/margin": 28.05940055847168,
|
|
"fcm_dpo/q_t": 0.4005647599697113,
|
|
"grad_norm": 17.28680419921875,
|
|
"learning_rate": 1.007103520743035e-07,
|
|
"logits/chosen": 0.7284814715385437,
|
|
"logits/rejected": 0.6097695827484131,
|
|
"logps/chosen": -131.38426208496094,
|
|
"logps/ref_chosen": -56.347023010253906,
|
|
"logps/ref_rejected": -85.97221374511719,
|
|
"logps/rejected": -189.06886291503906,
|
|
"loss": 1.1411,
|
|
"margin_dpo/margin_mean": 28.059402465820312,
|
|
"margin_dpo/margin_std": 53.089866638183594,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7362055933484505,
|
|
"fcm_dpo/beta": 0.017645984888076782,
|
|
"fcm_dpo/delta": 0.031026359647512436,
|
|
"fcm_dpo/margin": 32.34307098388672,
|
|
"fcm_dpo/q_t": 0.3809584975242615,
|
|
"grad_norm": 22.65226173400879,
|
|
"learning_rate": 9.965186236464046e-08,
|
|
"logits/chosen": 0.7624588012695312,
|
|
"logits/rejected": 0.7029759883880615,
|
|
"logps/chosen": -127.072998046875,
|
|
"logps/ref_chosen": -60.617218017578125,
|
|
"logps/ref_rejected": -82.50975036621094,
|
|
"logps/rejected": -181.30859375,
|
|
"loss": 1.0653,
|
|
"margin_dpo/margin_mean": 32.34307098388672,
|
|
"margin_dpo/margin_std": 50.19340515136719,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7377173091458806,
|
|
"fcm_dpo/beta": 0.017560675740242004,
|
|
"fcm_dpo/delta": -0.09279748052358627,
|
|
"fcm_dpo/margin": 39.08435821533203,
|
|
"fcm_dpo/q_t": 0.35840192437171936,
|
|
"grad_norm": 21.256258010864258,
|
|
"learning_rate": 9.859757821558337e-08,
|
|
"logits/chosen": 0.6977435946464539,
|
|
"logits/rejected": 0.6269375085830688,
|
|
"logps/chosen": -123.77168273925781,
|
|
"logps/ref_chosen": -63.10905075073242,
|
|
"logps/ref_rejected": -82.49348449707031,
|
|
"logps/rejected": -182.24046325683594,
|
|
"loss": 0.9753,
|
|
"margin_dpo/margin_mean": 39.08435821533203,
|
|
"margin_dpo/margin_std": 49.42118835449219,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7392290249433107,
|
|
"fcm_dpo/beta": 0.017897412180900574,
|
|
"fcm_dpo/delta": 0.2119128406047821,
|
|
"fcm_dpo/margin": 22.175716400146484,
|
|
"fcm_dpo/q_t": 0.4207577109336853,
|
|
"grad_norm": 20.755985260009766,
|
|
"learning_rate": 9.754752911772615e-08,
|
|
"logits/chosen": 0.6768500208854675,
|
|
"logits/rejected": 0.6314944624900818,
|
|
"logps/chosen": -135.53665161132812,
|
|
"logps/ref_chosen": -64.98896026611328,
|
|
"logps/ref_rejected": -84.39607238769531,
|
|
"logps/rejected": -177.11947631835938,
|
|
"loss": 1.2272,
|
|
"margin_dpo/margin_mean": 22.175716400146484,
|
|
"margin_dpo/margin_std": 52.71125793457031,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.01822846755385399,
|
|
"fcm_dpo/delta": 0.059169452637434006,
|
|
"fcm_dpo/margin": 29.84263038635254,
|
|
"fcm_dpo/q_t": 0.39093446731567383,
|
|
"grad_norm": 17.13382339477539,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 0.7506576776504517,
|
|
"logits/rejected": 0.7263511419296265,
|
|
"logps/chosen": -123.81832885742188,
|
|
"logps/ref_chosen": -61.90874481201172,
|
|
"logps/ref_rejected": -70.58566284179688,
|
|
"logps/rejected": -162.33787536621094,
|
|
"loss": 1.188,
|
|
"margin_dpo/margin_mean": 29.84263038635254,
|
|
"margin_dpo/margin_std": 61.338348388671875,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7422524565381708,
|
|
"fcm_dpo/beta": 0.018151359632611275,
|
|
"fcm_dpo/delta": -0.016892343759536743,
|
|
"fcm_dpo/margin": 33.84369659423828,
|
|
"fcm_dpo/q_t": 0.3726619780063629,
|
|
"grad_norm": 16.95362091064453,
|
|
"learning_rate": 9.546025344484868e-08,
|
|
"logits/chosen": 0.6099548935890198,
|
|
"logits/rejected": 0.5560557246208191,
|
|
"logps/chosen": -116.60749053955078,
|
|
"logps/ref_chosen": -55.47570037841797,
|
|
"logps/ref_rejected": -78.70318603515625,
|
|
"logps/rejected": -173.6786651611328,
|
|
"loss": 1.0401,
|
|
"margin_dpo/margin_mean": 33.84369659423828,
|
|
"margin_dpo/margin_std": 48.538536071777344,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7437641723356009,
|
|
"fcm_dpo/beta": 0.018728461116552353,
|
|
"fcm_dpo/delta": 0.10250745713710785,
|
|
"fcm_dpo/margin": 26.719730377197266,
|
|
"fcm_dpo/q_t": 0.400844931602478,
|
|
"grad_norm": 21.921876907348633,
|
|
"learning_rate": 9.442308525541589e-08,
|
|
"logits/chosen": 0.6604113578796387,
|
|
"logits/rejected": 0.586262583732605,
|
|
"logps/chosen": -143.04898071289062,
|
|
"logps/ref_chosen": -67.28638458251953,
|
|
"logps/ref_rejected": -82.78628540039062,
|
|
"logps/rejected": -185.26861572265625,
|
|
"loss": 1.17,
|
|
"margin_dpo/margin_mean": 26.719728469848633,
|
|
"margin_dpo/margin_std": 53.01930236816406,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.745275888133031,
|
|
"fcm_dpo/beta": 0.018443478271365166,
|
|
"fcm_dpo/delta": -0.12488085776567459,
|
|
"fcm_dpo/margin": 38.8165283203125,
|
|
"fcm_dpo/q_t": 0.35293227434158325,
|
|
"grad_norm": 17.643783569335938,
|
|
"learning_rate": 9.339026888672468e-08,
|
|
"logits/chosen": 0.6158891916275024,
|
|
"logits/rejected": 0.5381425619125366,
|
|
"logps/chosen": -115.87471008300781,
|
|
"logps/ref_chosen": -55.92750549316406,
|
|
"logps/ref_rejected": -79.12149810791016,
|
|
"logps/rejected": -177.88523864746094,
|
|
"loss": 1.0033,
|
|
"margin_dpo/margin_mean": 38.8165283203125,
|
|
"margin_dpo/margin_std": 53.64328384399414,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7467876039304611,
|
|
"fcm_dpo/beta": 0.018356945365667343,
|
|
"fcm_dpo/delta": 0.07535573840141296,
|
|
"fcm_dpo/margin": 28.798974990844727,
|
|
"fcm_dpo/q_t": 0.3923889994621277,
|
|
"grad_norm": 21.99494743347168,
|
|
"learning_rate": 9.236183322886945e-08,
|
|
"logits/chosen": 0.5371869802474976,
|
|
"logits/rejected": 0.4848790168762207,
|
|
"logps/chosen": -130.42276000976562,
|
|
"logps/ref_chosen": -67.95410919189453,
|
|
"logps/ref_rejected": -90.50865173339844,
|
|
"logps/rejected": -181.7762908935547,
|
|
"loss": 1.1839,
|
|
"margin_dpo/margin_mean": 28.798974990844727,
|
|
"margin_dpo/margin_std": 59.69166564941406,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.01888749748468399,
|
|
"fcm_dpo/delta": 0.10935745388269424,
|
|
"fcm_dpo/margin": 26.206092834472656,
|
|
"fcm_dpo/q_t": 0.4023835062980652,
|
|
"grad_norm": 23.057941436767578,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 0.7392085790634155,
|
|
"logits/rejected": 0.6697524785995483,
|
|
"logps/chosen": -114.02333068847656,
|
|
"logps/ref_chosen": -52.62546157836914,
|
|
"logps/ref_rejected": -72.06781005859375,
|
|
"logps/rejected": -159.6717529296875,
|
|
"loss": 1.1555,
|
|
"margin_dpo/margin_mean": 26.206092834472656,
|
|
"margin_dpo/margin_std": 51.413429260253906,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7498110355253212,
|
|
"fcm_dpo/beta": 0.01871339976787567,
|
|
"fcm_dpo/delta": -0.004845082759857178,
|
|
"fcm_dpo/margin": 32.221229553222656,
|
|
"fcm_dpo/q_t": 0.3820890784263611,
|
|
"grad_norm": 18.55131721496582,
|
|
"learning_rate": 9.031821899254797e-08,
|
|
"logits/chosen": 0.7021945118904114,
|
|
"logits/rejected": 0.5876121520996094,
|
|
"logps/chosen": -126.39350891113281,
|
|
"logps/ref_chosen": -57.597320556640625,
|
|
"logps/ref_rejected": -94.36127471923828,
|
|
"logps/rejected": -195.37869262695312,
|
|
"loss": 1.0993,
|
|
"margin_dpo/margin_mean": 32.22122573852539,
|
|
"margin_dpo/margin_std": 54.520545959472656,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7513227513227513,
|
|
"fcm_dpo/beta": 0.018337702378630638,
|
|
"fcm_dpo/delta": -0.17800766229629517,
|
|
"fcm_dpo/margin": 41.654964447021484,
|
|
"fcm_dpo/q_t": 0.3428105115890503,
|
|
"grad_norm": 16.170692443847656,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": 0.7111356258392334,
|
|
"logits/rejected": 0.678577721118927,
|
|
"logps/chosen": -141.17437744140625,
|
|
"logps/ref_chosen": -72.78994750976562,
|
|
"logps/ref_rejected": -89.48483276367188,
|
|
"logps/rejected": -199.5242156982422,
|
|
"loss": 0.9558,
|
|
"margin_dpo/margin_mean": 41.654964447021484,
|
|
"margin_dpo/margin_std": 52.02744674682617,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7528344671201814,
|
|
"fcm_dpo/beta": 0.018001677468419075,
|
|
"fcm_dpo/delta": -0.08155323565006256,
|
|
"fcm_dpo/margin": 37.55533981323242,
|
|
"fcm_dpo/q_t": 0.3615556061267853,
|
|
"grad_norm": 23.24351692199707,
|
|
"learning_rate": 8.829247120198563e-08,
|
|
"logits/chosen": 0.695479154586792,
|
|
"logits/rejected": 0.6608887910842896,
|
|
"logps/chosen": -129.8267364501953,
|
|
"logps/ref_chosen": -68.36572265625,
|
|
"logps/ref_rejected": -71.28846740722656,
|
|
"logps/rejected": -170.30484008789062,
|
|
"loss": 0.9793,
|
|
"margin_dpo/margin_mean": 37.55533981323242,
|
|
"margin_dpo/margin_std": 48.39889144897461,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7543461829176115,
|
|
"fcm_dpo/beta": 0.017900779843330383,
|
|
"fcm_dpo/delta": -0.03278336673974991,
|
|
"fcm_dpo/margin": 35.225341796875,
|
|
"fcm_dpo/q_t": 0.3726680278778076,
|
|
"grad_norm": 19.511375427246094,
|
|
"learning_rate": 8.728636813280163e-08,
|
|
"logits/chosen": 0.6967817544937134,
|
|
"logits/rejected": 0.6287128925323486,
|
|
"logps/chosen": -120.04055786132812,
|
|
"logps/ref_chosen": -61.90882873535156,
|
|
"logps/ref_rejected": -91.9411392211914,
|
|
"logps/rejected": -185.29820251464844,
|
|
"loss": 1.0937,
|
|
"margin_dpo/margin_mean": 35.225341796875,
|
|
"margin_dpo/margin_std": 58.685157775878906,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.017738111317157745,
|
|
"fcm_dpo/delta": -0.015023061074316502,
|
|
"fcm_dpo/margin": 34.61691665649414,
|
|
"fcm_dpo/q_t": 0.37329190969467163,
|
|
"grad_norm": 23.707897186279297,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 0.6063783168792725,
|
|
"logits/rejected": 0.6012179851531982,
|
|
"logps/chosen": -134.2138671875,
|
|
"logps/ref_chosen": -70.225830078125,
|
|
"logps/ref_rejected": -71.72203063964844,
|
|
"logps/rejected": -170.32699584960938,
|
|
"loss": 1.078,
|
|
"margin_dpo/margin_mean": 34.61691665649414,
|
|
"margin_dpo/margin_std": 55.942474365234375,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7573696145124716,
|
|
"fcm_dpo/beta": 0.018007531762123108,
|
|
"fcm_dpo/delta": 0.06094428896903992,
|
|
"fcm_dpo/margin": 30.07050323486328,
|
|
"fcm_dpo/q_t": 0.3839370608329773,
|
|
"grad_norm": 13.959453582763672,
|
|
"learning_rate": 8.528784436016878e-08,
|
|
"logits/chosen": 0.6482034921646118,
|
|
"logits/rejected": 0.6431034803390503,
|
|
"logps/chosen": -127.50128173828125,
|
|
"logps/ref_chosen": -64.59880828857422,
|
|
"logps/ref_rejected": -70.59329223632812,
|
|
"logps/rejected": -163.5662841796875,
|
|
"loss": 1.0386,
|
|
"margin_dpo/margin_mean": 30.07050323486328,
|
|
"margin_dpo/margin_std": 40.626930236816406,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.7588813303099018,
|
|
"fcm_dpo/beta": 0.01812717691063881,
|
|
"fcm_dpo/delta": 0.0442255400121212,
|
|
"fcm_dpo/margin": 30.785253524780273,
|
|
"fcm_dpo/q_t": 0.3837216794490814,
|
|
"grad_norm": 19.163633346557617,
|
|
"learning_rate": 8.4295479559726e-08,
|
|
"logits/chosen": 0.6712077856063843,
|
|
"logits/rejected": 0.6248115301132202,
|
|
"logps/chosen": -126.57681274414062,
|
|
"logps/ref_chosen": -65.46662902832031,
|
|
"logps/ref_rejected": -90.22233581542969,
|
|
"logps/rejected": -182.11776733398438,
|
|
"loss": 1.0425,
|
|
"margin_dpo/margin_mean": 30.78525161743164,
|
|
"margin_dpo/margin_std": 44.158851623535156,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7603930461073318,
|
|
"fcm_dpo/beta": 0.017972594127058983,
|
|
"fcm_dpo/delta": -0.040539998561143875,
|
|
"fcm_dpo/margin": 35.48341369628906,
|
|
"fcm_dpo/q_t": 0.3686982989311218,
|
|
"grad_norm": 16.06653594970703,
|
|
"learning_rate": 8.330774987092712e-08,
|
|
"logits/chosen": 0.6875156164169312,
|
|
"logits/rejected": 0.6794095039367676,
|
|
"logps/chosen": -108.67987060546875,
|
|
"logps/ref_chosen": -51.83476257324219,
|
|
"logps/ref_rejected": -57.62522506713867,
|
|
"logps/rejected": -149.9537353515625,
|
|
"loss": 1.0504,
|
|
"margin_dpo/margin_mean": 35.4834098815918,
|
|
"margin_dpo/margin_std": 53.49339294433594,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"fcm_dpo/beta": 0.017518125474452972,
|
|
"fcm_dpo/delta": -0.2152797430753708,
|
|
"fcm_dpo/margin": 45.586830139160156,
|
|
"fcm_dpo/q_t": 0.3326077163219452,
|
|
"grad_norm": 21.208383560180664,
|
|
"learning_rate": 8.232468292269479e-08,
|
|
"logits/chosen": 0.6347143650054932,
|
|
"logits/rejected": 0.6134928464889526,
|
|
"logps/chosen": -126.3889389038086,
|
|
"logps/ref_chosen": -68.65119934082031,
|
|
"logps/ref_rejected": -77.91394805908203,
|
|
"logps/rejected": -181.238525390625,
|
|
"loss": 0.887,
|
|
"margin_dpo/margin_mean": 45.586830139160156,
|
|
"margin_dpo/margin_std": 47.29257583618164,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.01733359880745411,
|
|
"fcm_dpo/delta": 0.10931959003210068,
|
|
"fcm_dpo/margin": 28.567665100097656,
|
|
"fcm_dpo/q_t": 0.4036433696746826,
|
|
"grad_norm": 19.609338760375977,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 0.6816761493682861,
|
|
"logits/rejected": 0.6424638032913208,
|
|
"logps/chosen": -120.88430786132812,
|
|
"logps/ref_chosen": -59.99884796142578,
|
|
"logps/ref_rejected": -76.88048553466797,
|
|
"logps/rejected": -166.33358764648438,
|
|
"loss": 1.1797,
|
|
"margin_dpo/margin_mean": 28.567665100097656,
|
|
"margin_dpo/margin_std": 57.868568420410156,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.764928193499622,
|
|
"fcm_dpo/beta": 0.017731061205267906,
|
|
"fcm_dpo/delta": 0.0775858610868454,
|
|
"fcm_dpo/margin": 29.690635681152344,
|
|
"fcm_dpo/q_t": 0.3898463249206543,
|
|
"grad_norm": 19.283348083496094,
|
|
"learning_rate": 8.037264711071698e-08,
|
|
"logits/chosen": 0.6876957416534424,
|
|
"logits/rejected": 0.6667051315307617,
|
|
"logps/chosen": -128.15101623535156,
|
|
"logps/ref_chosen": -70.07130432128906,
|
|
"logps/ref_rejected": -82.03775024414062,
|
|
"logps/rejected": -169.80810546875,
|
|
"loss": 1.1416,
|
|
"margin_dpo/margin_mean": 29.690635681152344,
|
|
"margin_dpo/margin_std": 55.41517639160156,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7664399092970522,
|
|
"fcm_dpo/beta": 0.01782643049955368,
|
|
"fcm_dpo/delta": -0.006546961143612862,
|
|
"fcm_dpo/margin": 33.99785614013672,
|
|
"fcm_dpo/q_t": 0.3816778361797333,
|
|
"grad_norm": 19.276065826416016,
|
|
"learning_rate": 7.940373284960933e-08,
|
|
"logits/chosen": 0.6648691892623901,
|
|
"logits/rejected": 0.6145754456520081,
|
|
"logps/chosen": -137.01760864257812,
|
|
"logps/ref_chosen": -72.00703430175781,
|
|
"logps/ref_rejected": -93.94987487792969,
|
|
"logps/rejected": -192.9582977294922,
|
|
"loss": 1.0757,
|
|
"margin_dpo/margin_mean": 33.99785614013672,
|
|
"margin_dpo/margin_std": 55.21949005126953,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7679516250944822,
|
|
"fcm_dpo/beta": 0.017841465771198273,
|
|
"fcm_dpo/delta": -0.05562988296151161,
|
|
"fcm_dpo/margin": 36.50687789916992,
|
|
"fcm_dpo/q_t": 0.3688430190086365,
|
|
"grad_norm": 19.247188568115234,
|
|
"learning_rate": 7.843959053281663e-08,
|
|
"logits/chosen": 0.6344867944717407,
|
|
"logits/rejected": 0.5096554160118103,
|
|
"logps/chosen": -118.91818237304688,
|
|
"logps/ref_chosen": -60.21992492675781,
|
|
"logps/ref_rejected": -95.9200668334961,
|
|
"logps/rejected": -191.1251983642578,
|
|
"loss": 1.0282,
|
|
"margin_dpo/margin_mean": 36.50688171386719,
|
|
"margin_dpo/margin_std": 52.37531280517578,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7694633408919124,
|
|
"fcm_dpo/beta": 0.017571065574884415,
|
|
"fcm_dpo/delta": -0.003388401120901108,
|
|
"fcm_dpo/margin": 34.30541229248047,
|
|
"fcm_dpo/q_t": 0.37258362770080566,
|
|
"grad_norm": 18.990793228149414,
|
|
"learning_rate": 7.748024712947204e-08,
|
|
"logits/chosen": 0.6367828249931335,
|
|
"logits/rejected": 0.6140943765640259,
|
|
"logps/chosen": -127.34040069580078,
|
|
"logps/ref_chosen": -66.27017211914062,
|
|
"logps/ref_rejected": -71.73065185546875,
|
|
"logps/rejected": -167.10629272460938,
|
|
"loss": 1.029,
|
|
"margin_dpo/margin_mean": 34.30541229248047,
|
|
"margin_dpo/margin_std": 47.9427604675293,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.01737632229924202,
|
|
"fcm_dpo/delta": -0.06894849985837936,
|
|
"fcm_dpo/margin": 38.18497085571289,
|
|
"fcm_dpo/q_t": 0.3653348386287689,
|
|
"grad_norm": 17.3432674407959,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 0.7944908142089844,
|
|
"logits/rejected": 0.6980875730514526,
|
|
"logps/chosen": -113.6026840209961,
|
|
"logps/ref_chosen": -53.54487609863281,
|
|
"logps/ref_rejected": -91.36648559570312,
|
|
"logps/rejected": -189.6092529296875,
|
|
"loss": 1.0255,
|
|
"margin_dpo/margin_mean": 38.184967041015625,
|
|
"margin_dpo/margin_std": 54.98851013183594,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7724867724867724,
|
|
"fcm_dpo/beta": 0.017056778073310852,
|
|
"fcm_dpo/delta": -0.15736544132232666,
|
|
"fcm_dpo/margin": 43.72998809814453,
|
|
"fcm_dpo/q_t": 0.3473592698574066,
|
|
"grad_norm": 18.05554962158203,
|
|
"learning_rate": 7.557606426772961e-08,
|
|
"logits/chosen": 0.685520589351654,
|
|
"logits/rejected": 0.6331069469451904,
|
|
"logps/chosen": -116.88287353515625,
|
|
"logps/ref_chosen": -55.844383239746094,
|
|
"logps/ref_rejected": -86.49819946289062,
|
|
"logps/rejected": -191.2666778564453,
|
|
"loss": 0.9905,
|
|
"margin_dpo/margin_mean": 43.72998809814453,
|
|
"margin_dpo/margin_std": 58.842472076416016,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7739984882842026,
|
|
"fcm_dpo/beta": 0.01700318232178688,
|
|
"fcm_dpo/delta": 0.05129774659872055,
|
|
"fcm_dpo/margin": 32.43523406982422,
|
|
"fcm_dpo/q_t": 0.388268381357193,
|
|
"grad_norm": 20.476276397705078,
|
|
"learning_rate": 7.463127807341966e-08,
|
|
"logits/chosen": 0.5704078674316406,
|
|
"logits/rejected": 0.5559252500534058,
|
|
"logps/chosen": -120.79446411132812,
|
|
"logps/ref_chosen": -61.653038024902344,
|
|
"logps/ref_rejected": -72.83148193359375,
|
|
"logps/rejected": -164.40814208984375,
|
|
"loss": 1.111,
|
|
"margin_dpo/margin_mean": 32.43523406982422,
|
|
"margin_dpo/margin_std": 56.39421081542969,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7755102040816326,
|
|
"fcm_dpo/beta": 0.01689431443810463,
|
|
"fcm_dpo/delta": -0.01903168112039566,
|
|
"fcm_dpo/margin": 36.52797317504883,
|
|
"fcm_dpo/q_t": 0.37010854482650757,
|
|
"grad_norm": 15.648270606994629,
|
|
"learning_rate": 7.369139731924401e-08,
|
|
"logits/chosen": 0.7794561386108398,
|
|
"logits/rejected": 0.7309125065803528,
|
|
"logps/chosen": -104.30062866210938,
|
|
"logps/ref_chosen": -50.85256576538086,
|
|
"logps/ref_rejected": -69.21754455566406,
|
|
"logps/rejected": -159.19357299804688,
|
|
"loss": 0.9905,
|
|
"margin_dpo/margin_mean": 36.527976989746094,
|
|
"margin_dpo/margin_std": 45.804161071777344,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7770219198790628,
|
|
"fcm_dpo/beta": 0.016719449311494827,
|
|
"fcm_dpo/delta": -0.12295001745223999,
|
|
"fcm_dpo/margin": 42.724876403808594,
|
|
"fcm_dpo/q_t": 0.34755611419677734,
|
|
"grad_norm": 15.384542465209961,
|
|
"learning_rate": 7.275644829568747e-08,
|
|
"logits/chosen": 0.7162846326828003,
|
|
"logits/rejected": 0.6777454614639282,
|
|
"logps/chosen": -129.981201171875,
|
|
"logps/ref_chosen": -69.38493347167969,
|
|
"logps/ref_rejected": -83.32447814941406,
|
|
"logps/rejected": -186.64561462402344,
|
|
"loss": 0.9488,
|
|
"margin_dpo/margin_mean": 42.724876403808594,
|
|
"margin_dpo/margin_std": 50.2280387878418,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.016596363857388496,
|
|
"fcm_dpo/delta": 0.032611072063446045,
|
|
"fcm_dpo/margin": 34.295352935791016,
|
|
"fcm_dpo/q_t": 0.38189035654067993,
|
|
"grad_norm": 19.570980072021484,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 0.7053956985473633,
|
|
"logits/rejected": 0.6231397390365601,
|
|
"logps/chosen": -123.07106018066406,
|
|
"logps/ref_chosen": -53.687034606933594,
|
|
"logps/ref_rejected": -83.59614562988281,
|
|
"logps/rejected": -187.27552795410156,
|
|
"loss": 1.0657,
|
|
"margin_dpo/margin_mean": 34.295352935791016,
|
|
"margin_dpo/margin_std": 52.851593017578125,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.780045351473923,
|
|
"fcm_dpo/beta": 0.01694701611995697,
|
|
"fcm_dpo/delta": 0.09288481622934341,
|
|
"fcm_dpo/margin": 30.182415008544922,
|
|
"fcm_dpo/q_t": 0.3905951678752899,
|
|
"grad_norm": 21.40145492553711,
|
|
"learning_rate": 7.090144991188568e-08,
|
|
"logits/chosen": 0.6563661098480225,
|
|
"logits/rejected": 0.6072605848312378,
|
|
"logps/chosen": -114.33444213867188,
|
|
"logps/ref_chosen": -56.9017219543457,
|
|
"logps/ref_rejected": -67.83477783203125,
|
|
"logps/rejected": -155.44992065429688,
|
|
"loss": 1.1149,
|
|
"margin_dpo/margin_mean": 30.182415008544922,
|
|
"margin_dpo/margin_std": 52.542152404785156,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.781557067271353,
|
|
"fcm_dpo/beta": 0.017495661973953247,
|
|
"fcm_dpo/delta": 0.16309672594070435,
|
|
"fcm_dpo/margin": 25.303401947021484,
|
|
"fcm_dpo/q_t": 0.4092411398887634,
|
|
"grad_norm": 20.3939208984375,
|
|
"learning_rate": 6.998145243993284e-08,
|
|
"logits/chosen": 0.7242580652236938,
|
|
"logits/rejected": 0.7175397872924805,
|
|
"logps/chosen": -131.94554138183594,
|
|
"logps/ref_chosen": -61.775142669677734,
|
|
"logps/ref_rejected": -62.88270950317383,
|
|
"logps/rejected": -158.35650634765625,
|
|
"loss": 1.1588,
|
|
"margin_dpo/margin_mean": 25.303401947021484,
|
|
"margin_dpo/margin_std": 49.52540969848633,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.783068783068783,
|
|
"fcm_dpo/beta": 0.01770472340285778,
|
|
"fcm_dpo/delta": 0.0501800999045372,
|
|
"fcm_dpo/margin": 31.204975128173828,
|
|
"fcm_dpo/q_t": 0.38718700408935547,
|
|
"grad_norm": 16.25372886657715,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 0.6553936004638672,
|
|
"logits/rejected": 0.6047691106796265,
|
|
"logps/chosen": -118.64441680908203,
|
|
"logps/ref_chosen": -62.02523422241211,
|
|
"logps/ref_rejected": -79.06085205078125,
|
|
"logps/rejected": -166.885009765625,
|
|
"loss": 1.0764,
|
|
"margin_dpo/margin_mean": 31.204975128173828,
|
|
"margin_dpo/margin_std": 49.72199249267578,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.7845804988662132,
|
|
"fcm_dpo/beta": 0.018285606056451797,
|
|
"fcm_dpo/delta": 0.1760295182466507,
|
|
"fcm_dpo/margin": 23.527294158935547,
|
|
"fcm_dpo/q_t": 0.41504037380218506,
|
|
"grad_norm": 29.17710304260254,
|
|
"learning_rate": 6.815658960673781e-08,
|
|
"logits/chosen": 0.7163988351821899,
|
|
"logits/rejected": 0.6581120491027832,
|
|
"logps/chosen": -132.91104125976562,
|
|
"logps/ref_chosen": -61.60636901855469,
|
|
"logps/ref_rejected": -74.50727844238281,
|
|
"logps/rejected": -169.33924865722656,
|
|
"loss": 1.2797,
|
|
"margin_dpo/margin_mean": 23.527294158935547,
|
|
"margin_dpo/margin_std": 61.222747802734375,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.018460825085639954,
|
|
"fcm_dpo/delta": 0.09022289514541626,
|
|
"fcm_dpo/margin": 27.83536148071289,
|
|
"fcm_dpo/q_t": 0.39378347992897034,
|
|
"grad_norm": 21.448122024536133,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 0.7613925337791443,
|
|
"logits/rejected": 0.7015221118927002,
|
|
"logps/chosen": -126.41165161132812,
|
|
"logps/ref_chosen": -62.87343215942383,
|
|
"logps/ref_rejected": -76.505615234375,
|
|
"logps/rejected": -167.8791961669922,
|
|
"loss": 1.1113,
|
|
"margin_dpo/margin_mean": 27.83536148071289,
|
|
"margin_dpo/margin_std": 47.354896545410156,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7876039304610734,
|
|
"fcm_dpo/beta": 0.018552154302597046,
|
|
"fcm_dpo/delta": -0.07973211258649826,
|
|
"fcm_dpo/margin": 36.347511291503906,
|
|
"fcm_dpo/q_t": 0.3598254919052124,
|
|
"grad_norm": 21.132991790771484,
|
|
"learning_rate": 6.63520728356167e-08,
|
|
"logits/chosen": 0.5479804277420044,
|
|
"logits/rejected": 0.47508206963539124,
|
|
"logps/chosen": -129.48300170898438,
|
|
"logps/ref_chosen": -64.20668029785156,
|
|
"logps/ref_rejected": -92.28083038330078,
|
|
"logps/rejected": -193.9046630859375,
|
|
"loss": 1.0013,
|
|
"margin_dpo/margin_mean": 36.347511291503906,
|
|
"margin_dpo/margin_std": 49.076534271240234,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7891156462585034,
|
|
"fcm_dpo/beta": 0.018455319106578827,
|
|
"fcm_dpo/delta": 0.04709668457508087,
|
|
"fcm_dpo/margin": 30.094348907470703,
|
|
"fcm_dpo/q_t": 0.39099377393722534,
|
|
"grad_norm": 21.805530548095703,
|
|
"learning_rate": 6.545750740770336e-08,
|
|
"logits/chosen": 0.6754323244094849,
|
|
"logits/rejected": 0.6666183471679688,
|
|
"logps/chosen": -121.40290832519531,
|
|
"logps/ref_chosen": -58.369720458984375,
|
|
"logps/ref_rejected": -68.79248046875,
|
|
"logps/rejected": -161.92001342773438,
|
|
"loss": 1.1803,
|
|
"margin_dpo/margin_mean": 30.09434700012207,
|
|
"margin_dpo/margin_std": 61.12078857421875,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7906273620559335,
|
|
"fcm_dpo/beta": 0.018495675176382065,
|
|
"fcm_dpo/delta": 0.007439367473125458,
|
|
"fcm_dpo/margin": 32.03522491455078,
|
|
"fcm_dpo/q_t": 0.37477177381515503,
|
|
"grad_norm": 25.171443939208984,
|
|
"learning_rate": 6.456810403001012e-08,
|
|
"logits/chosen": 0.6811109781265259,
|
|
"logits/rejected": 0.5619876980781555,
|
|
"logps/chosen": -133.40420532226562,
|
|
"logps/ref_chosen": -65.71324157714844,
|
|
"logps/ref_rejected": -91.98896789550781,
|
|
"logps/rejected": -191.71514892578125,
|
|
"loss": 1.1145,
|
|
"margin_dpo/margin_mean": 32.03522491455078,
|
|
"margin_dpo/margin_std": 55.60651779174805,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7921390778533636,
|
|
"fcm_dpo/beta": 0.018733873963356018,
|
|
"fcm_dpo/delta": 0.011134952306747437,
|
|
"fcm_dpo/margin": 31.432987213134766,
|
|
"fcm_dpo/q_t": 0.37468042969703674,
|
|
"grad_norm": 18.025999069213867,
|
|
"learning_rate": 6.368388758106134e-08,
|
|
"logits/chosen": 0.6386536359786987,
|
|
"logits/rejected": 0.6183046102523804,
|
|
"logps/chosen": -132.13656616210938,
|
|
"logps/ref_chosen": -76.35124969482422,
|
|
"logps/ref_rejected": -89.96072387695312,
|
|
"logps/rejected": -177.1790313720703,
|
|
"loss": 1.0466,
|
|
"margin_dpo/margin_mean": 31.432987213134766,
|
|
"margin_dpo/margin_std": 45.8270378112793,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.018994202837347984,
|
|
"fcm_dpo/delta": 0.12279119342565536,
|
|
"fcm_dpo/margin": 25.432735443115234,
|
|
"fcm_dpo/q_t": 0.40213990211486816,
|
|
"grad_norm": 23.485536575317383,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.5125986337661743,
|
|
"logits/rejected": 0.5042914748191833,
|
|
"logps/chosen": -138.84378051757812,
|
|
"logps/ref_chosen": -75.49578857421875,
|
|
"logps/ref_rejected": -84.04852294921875,
|
|
"logps/rejected": -172.82925415039062,
|
|
"loss": 1.1386,
|
|
"margin_dpo/margin_mean": 25.432735443115234,
|
|
"margin_dpo/margin_std": 47.67351531982422,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7951625094482238,
|
|
"fcm_dpo/beta": 0.01910669542849064,
|
|
"fcm_dpo/delta": 0.09421326220035553,
|
|
"fcm_dpo/margin": 26.58679962158203,
|
|
"fcm_dpo/q_t": 0.39384567737579346,
|
|
"grad_norm": 20.159889221191406,
|
|
"learning_rate": 6.193111425735515e-08,
|
|
"logits/chosen": 0.6956998109817505,
|
|
"logits/rejected": 0.6291480660438538,
|
|
"logps/chosen": -128.66256713867188,
|
|
"logps/ref_chosen": -61.29241943359375,
|
|
"logps/ref_rejected": -82.47763061523438,
|
|
"logps/rejected": -176.4345703125,
|
|
"loss": 1.1305,
|
|
"margin_dpo/margin_mean": 26.58679962158203,
|
|
"margin_dpo/margin_std": 45.9437255859375,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7966742252456538,
|
|
"fcm_dpo/beta": 0.020175091922283173,
|
|
"fcm_dpo/delta": 0.2082231193780899,
|
|
"fcm_dpo/margin": 19.762725830078125,
|
|
"fcm_dpo/q_t": 0.42157065868377686,
|
|
"grad_norm": 24.948040008544922,
|
|
"learning_rate": 6.106260641143546e-08,
|
|
"logits/chosen": 0.7550373077392578,
|
|
"logits/rejected": 0.6782054901123047,
|
|
"logps/chosen": -134.86746215820312,
|
|
"logps/ref_chosen": -61.472625732421875,
|
|
"logps/ref_rejected": -90.52831268310547,
|
|
"logps/rejected": -183.68588256835938,
|
|
"loss": 1.2637,
|
|
"margin_dpo/margin_mean": 19.762725830078125,
|
|
"margin_dpo/margin_std": 51.479583740234375,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.7981859410430839,
|
|
"fcm_dpo/beta": 0.020768921822309494,
|
|
"fcm_dpo/delta": 0.18035244941711426,
|
|
"fcm_dpo/margin": 20.595016479492188,
|
|
"fcm_dpo/q_t": 0.4134521782398224,
|
|
"grad_norm": 26.238304138183594,
|
|
"learning_rate": 6.019938355056422e-08,
|
|
"logits/chosen": 0.5912600159645081,
|
|
"logits/rejected": 0.5084381103515625,
|
|
"logps/chosen": -124.909912109375,
|
|
"logps/ref_chosen": -58.792015075683594,
|
|
"logps/ref_rejected": -71.82516479492188,
|
|
"logps/rejected": -158.5380859375,
|
|
"loss": 1.2727,
|
|
"margin_dpo/margin_mean": 20.59501838684082,
|
|
"margin_dpo/margin_std": 53.321876525878906,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.799697656840514,
|
|
"fcm_dpo/beta": 0.01942495070397854,
|
|
"fcm_dpo/delta": -0.47512802481651306,
|
|
"fcm_dpo/margin": 52.53473663330078,
|
|
"fcm_dpo/q_t": 0.29438188672065735,
|
|
"grad_norm": 17.863595962524414,
|
|
"learning_rate": 5.934146982094049e-08,
|
|
"logits/chosen": 0.602439284324646,
|
|
"logits/rejected": 0.5486577153205872,
|
|
"logps/chosen": -114.20620727539062,
|
|
"logps/ref_chosen": -55.070960998535156,
|
|
"logps/ref_rejected": -75.44007873535156,
|
|
"logps/rejected": -187.1100616455078,
|
|
"loss": 0.8219,
|
|
"margin_dpo/margin_mean": 52.53473663330078,
|
|
"margin_dpo/margin_std": 52.704856872558594,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.019187599420547485,
|
|
"fcm_dpo/delta": 0.0037046317011117935,
|
|
"fcm_dpo/margin": 31.082916259765625,
|
|
"fcm_dpo/q_t": 0.37992024421691895,
|
|
"grad_norm": 22.0632266998291,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.6941254734992981,
|
|
"logits/rejected": 0.654128909111023,
|
|
"logps/chosen": -118.39797973632812,
|
|
"logps/ref_chosen": -56.743812561035156,
|
|
"logps/ref_rejected": -76.6692123413086,
|
|
"logps/rejected": -169.40631103515625,
|
|
"loss": 1.0404,
|
|
"margin_dpo/margin_mean": 31.082918167114258,
|
|
"margin_dpo/margin_std": 45.24073028564453,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8027210884353742,
|
|
"fcm_dpo/beta": 0.019356567412614822,
|
|
"fcm_dpo/delta": -0.005651660263538361,
|
|
"fcm_dpo/margin": 31.139101028442383,
|
|
"fcm_dpo/q_t": 0.37849652767181396,
|
|
"grad_norm": 21.71436309814453,
|
|
"learning_rate": 5.7641665597021435e-08,
|
|
"logits/chosen": 0.6697413921356201,
|
|
"logits/rejected": 0.6052749156951904,
|
|
"logps/chosen": -115.15210723876953,
|
|
"logps/ref_chosen": -51.116455078125,
|
|
"logps/ref_rejected": -79.52884674072266,
|
|
"logps/rejected": -174.70359802246094,
|
|
"loss": 1.0816,
|
|
"margin_dpo/margin_mean": 31.139101028442383,
|
|
"margin_dpo/margin_std": 49.85601806640625,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.8042328042328042,
|
|
"fcm_dpo/beta": 0.018879860639572144,
|
|
"fcm_dpo/delta": -0.07339326292276382,
|
|
"fcm_dpo/margin": 35.395713806152344,
|
|
"fcm_dpo/q_t": 0.36597567796707153,
|
|
"grad_norm": 19.15670394897461,
|
|
"learning_rate": 5.679982264990424e-08,
|
|
"logits/chosen": 0.6101032495498657,
|
|
"logits/rejected": 0.559300422668457,
|
|
"logps/chosen": -132.02627563476562,
|
|
"logps/ref_chosen": -58.279945373535156,
|
|
"logps/ref_rejected": -78.05426788330078,
|
|
"logps/rejected": -187.19631958007812,
|
|
"loss": 1.0516,
|
|
"margin_dpo/margin_mean": 35.395713806152344,
|
|
"margin_dpo/margin_std": 54.57707977294922,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.8057445200302343,
|
|
"fcm_dpo/beta": 0.01890089176595211,
|
|
"fcm_dpo/delta": 0.00393468514084816,
|
|
"fcm_dpo/margin": 31.543041229248047,
|
|
"fcm_dpo/q_t": 0.37516263127326965,
|
|
"grad_norm": 16.823314666748047,
|
|
"learning_rate": 5.596338392706076e-08,
|
|
"logits/chosen": 0.7527922987937927,
|
|
"logits/rejected": 0.692599356174469,
|
|
"logps/chosen": -107.6111831665039,
|
|
"logps/ref_chosen": -56.41801071166992,
|
|
"logps/ref_rejected": -73.89324951171875,
|
|
"logps/rejected": -156.62945556640625,
|
|
"loss": 1.0561,
|
|
"margin_dpo/margin_mean": 31.543039321899414,
|
|
"margin_dpo/margin_std": 48.05528259277344,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.8072562358276644,
|
|
"fcm_dpo/beta": 0.018499422818422318,
|
|
"fcm_dpo/delta": -0.0869862511754036,
|
|
"fcm_dpo/margin": 36.743194580078125,
|
|
"fcm_dpo/q_t": 0.36393046379089355,
|
|
"grad_norm": 17.313426971435547,
|
|
"learning_rate": 5.513237282548033e-08,
|
|
"logits/chosen": 0.677596926689148,
|
|
"logits/rejected": 0.6374635100364685,
|
|
"logps/chosen": -120.32328033447266,
|
|
"logps/ref_chosen": -60.748687744140625,
|
|
"logps/ref_rejected": -73.8623046875,
|
|
"logps/rejected": -170.18008422851562,
|
|
"loss": 1.0367,
|
|
"margin_dpo/margin_mean": 36.743194580078125,
|
|
"margin_dpo/margin_std": 54.76251983642578,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.018779411911964417,
|
|
"fcm_dpo/delta": 0.06701276451349258,
|
|
"fcm_dpo/margin": 28.552780151367188,
|
|
"fcm_dpo/q_t": 0.39095014333724976,
|
|
"grad_norm": 20.778942108154297,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 0.5685528516769409,
|
|
"logits/rejected": 0.5054241418838501,
|
|
"logps/chosen": -132.29876708984375,
|
|
"logps/ref_chosen": -61.637413024902344,
|
|
"logps/ref_rejected": -80.93138885498047,
|
|
"logps/rejected": -180.1455078125,
|
|
"loss": 1.1322,
|
|
"margin_dpo/margin_mean": 28.552780151367188,
|
|
"margin_dpo/margin_std": 52.61593246459961,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8102796674225246,
|
|
"fcm_dpo/beta": 0.01826276257634163,
|
|
"fcm_dpo/delta": -0.18799223005771637,
|
|
"fcm_dpo/margin": 42.34637451171875,
|
|
"fcm_dpo/q_t": 0.33896517753601074,
|
|
"grad_norm": 15.240792274475098,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 0.7089515924453735,
|
|
"logits/rejected": 0.6250416040420532,
|
|
"logps/chosen": -112.83555603027344,
|
|
"logps/ref_chosen": -51.88897705078125,
|
|
"logps/ref_rejected": -73.34864044189453,
|
|
"logps/rejected": -176.6416015625,
|
|
"loss": 0.891,
|
|
"margin_dpo/margin_mean": 42.34637451171875,
|
|
"margin_dpo/margin_std": 44.537410736083984,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.8117913832199547,
|
|
"fcm_dpo/beta": 0.018009407445788383,
|
|
"fcm_dpo/delta": 0.005187440663576126,
|
|
"fcm_dpo/margin": 33.01459503173828,
|
|
"fcm_dpo/q_t": 0.38105130195617676,
|
|
"grad_norm": 16.58041763305664,
|
|
"learning_rate": 5.267213693697695e-08,
|
|
"logits/chosen": 0.7628644704818726,
|
|
"logits/rejected": 0.6717967987060547,
|
|
"logps/chosen": -124.62332916259766,
|
|
"logps/ref_chosen": -54.248619079589844,
|
|
"logps/ref_rejected": -94.94343566894531,
|
|
"logps/rejected": -198.33274841308594,
|
|
"loss": 1.0657,
|
|
"margin_dpo/margin_mean": 33.01459884643555,
|
|
"margin_dpo/margin_std": 51.61365509033203,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.8133030990173847,
|
|
"fcm_dpo/beta": 0.018021808937191963,
|
|
"fcm_dpo/delta": -0.08093195408582687,
|
|
"fcm_dpo/margin": 37.45811462402344,
|
|
"fcm_dpo/q_t": 0.3603672385215759,
|
|
"grad_norm": 18.360652923583984,
|
|
"learning_rate": 5.1863067244167144e-08,
|
|
"logits/chosen": 0.6641885042190552,
|
|
"logits/rejected": 0.6308572292327881,
|
|
"logps/chosen": -136.5439453125,
|
|
"logps/ref_chosen": -70.09353637695312,
|
|
"logps/ref_rejected": -79.49833679199219,
|
|
"logps/rejected": -183.40684509277344,
|
|
"loss": 0.999,
|
|
"margin_dpo/margin_mean": 37.45811462402344,
|
|
"margin_dpo/margin_std": 50.08565139770508,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.8148148148148148,
|
|
"fcm_dpo/beta": 0.01791461370885372,
|
|
"fcm_dpo/delta": 0.06643770635128021,
|
|
"fcm_dpo/margin": 29.97534942626953,
|
|
"fcm_dpo/q_t": 0.38791027665138245,
|
|
"grad_norm": 18.469646453857422,
|
|
"learning_rate": 5.105953986729195e-08,
|
|
"logits/chosen": 0.6261240243911743,
|
|
"logits/rejected": 0.5512057542800903,
|
|
"logps/chosen": -132.72520446777344,
|
|
"logps/ref_chosen": -61.93169403076172,
|
|
"logps/ref_rejected": -84.08946228027344,
|
|
"logps/rejected": -184.8583221435547,
|
|
"loss": 1.0637,
|
|
"margin_dpo/margin_mean": 29.97534942626953,
|
|
"margin_dpo/margin_std": 45.154361724853516,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.017926346510648727,
|
|
"fcm_dpo/delta": -0.12926867604255676,
|
|
"fcm_dpo/margin": 40.096641540527344,
|
|
"fcm_dpo/q_t": 0.3533179759979248,
|
|
"grad_norm": 17.8641414642334,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 0.6939501762390137,
|
|
"logits/rejected": 0.6017695069313049,
|
|
"logps/chosen": -126.96577453613281,
|
|
"logps/ref_chosen": -62.704254150390625,
|
|
"logps/ref_rejected": -95.63597106933594,
|
|
"logps/rejected": -199.994140625,
|
|
"loss": 0.9672,
|
|
"margin_dpo/margin_mean": 40.096641540527344,
|
|
"margin_dpo/margin_std": 49.30036926269531,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.817838246409675,
|
|
"fcm_dpo/beta": 0.017087796702980995,
|
|
"fcm_dpo/delta": -0.13908647000789642,
|
|
"fcm_dpo/margin": 42.50551986694336,
|
|
"fcm_dpo/q_t": 0.3478375971317291,
|
|
"grad_norm": 15.025036811828613,
|
|
"learning_rate": 4.9469201811239035e-08,
|
|
"logits/chosen": 0.6994584202766418,
|
|
"logits/rejected": 0.7143793106079102,
|
|
"logps/chosen": -123.77883911132812,
|
|
"logps/ref_chosen": -62.48084259033203,
|
|
"logps/ref_rejected": -57.55541229248047,
|
|
"logps/rejected": -161.3589324951172,
|
|
"loss": 0.9436,
|
|
"margin_dpo/margin_mean": 42.50551986694336,
|
|
"margin_dpo/margin_std": 49.231876373291016,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.8193499622071051,
|
|
"fcm_dpo/beta": 0.01672377809882164,
|
|
"fcm_dpo/delta": -0.1440107524394989,
|
|
"fcm_dpo/margin": 43.837928771972656,
|
|
"fcm_dpo/q_t": 0.34867095947265625,
|
|
"grad_norm": 15.291959762573242,
|
|
"learning_rate": 4.868243561723534e-08,
|
|
"logits/chosen": 0.7241077423095703,
|
|
"logits/rejected": 0.6722509860992432,
|
|
"logps/chosen": -102.95439910888672,
|
|
"logps/ref_chosen": -49.454891204833984,
|
|
"logps/ref_rejected": -65.33275604248047,
|
|
"logps/rejected": -162.67019653320312,
|
|
"loss": 0.968,
|
|
"margin_dpo/margin_mean": 43.837928771972656,
|
|
"margin_dpo/margin_std": 55.83599853515625,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.8208616780045351,
|
|
"fcm_dpo/beta": 0.016603415831923485,
|
|
"fcm_dpo/delta": -0.038653384894132614,
|
|
"fcm_dpo/margin": 38.306365966796875,
|
|
"fcm_dpo/q_t": 0.3619435131549835,
|
|
"grad_norm": 12.996235847473145,
|
|
"learning_rate": 4.790130070827028e-08,
|
|
"logits/chosen": 0.6842800378799438,
|
|
"logits/rejected": 0.5959610939025879,
|
|
"logps/chosen": -111.56507873535156,
|
|
"logps/ref_chosen": -51.100860595703125,
|
|
"logps/ref_rejected": -76.06130981445312,
|
|
"logps/rejected": -174.83189392089844,
|
|
"loss": 1.0011,
|
|
"margin_dpo/margin_mean": 38.306365966796875,
|
|
"margin_dpo/margin_std": 49.799415588378906,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.8223733938019653,
|
|
"fcm_dpo/beta": 0.0162653811275959,
|
|
"fcm_dpo/delta": -0.1092371717095375,
|
|
"fcm_dpo/margin": 43.14006805419922,
|
|
"fcm_dpo/q_t": 0.35268715023994446,
|
|
"grad_norm": 14.857081413269043,
|
|
"learning_rate": 4.7125818934366454e-08,
|
|
"logits/chosen": 0.672330379486084,
|
|
"logits/rejected": 0.5996150374412537,
|
|
"logps/chosen": -124.62445068359375,
|
|
"logps/ref_chosen": -60.2772331237793,
|
|
"logps/ref_rejected": -88.40553283691406,
|
|
"logps/rejected": -195.892822265625,
|
|
"loss": 0.9689,
|
|
"margin_dpo/margin_mean": 43.14006805419922,
|
|
"margin_dpo/margin_std": 53.813167572021484,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.01642528548836708,
|
|
"fcm_dpo/delta": 0.12011280655860901,
|
|
"fcm_dpo/margin": 29.575916290283203,
|
|
"fcm_dpo/q_t": 0.39873120188713074,
|
|
"grad_norm": 17.39080047607422,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 0.6350932121276855,
|
|
"logits/rejected": 0.5763211846351624,
|
|
"logps/chosen": -130.05453491210938,
|
|
"logps/ref_chosen": -61.61524963378906,
|
|
"logps/ref_rejected": -78.71266174316406,
|
|
"logps/rejected": -176.72787475585938,
|
|
"loss": 1.1295,
|
|
"margin_dpo/margin_mean": 29.575916290283203,
|
|
"margin_dpo/margin_std": 53.032081604003906,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"fcm_dpo/beta": 0.016498159617185593,
|
|
"fcm_dpo/delta": 0.004416411742568016,
|
|
"fcm_dpo/margin": 36.10563659667969,
|
|
"fcm_dpo/q_t": 0.37277141213417053,
|
|
"grad_norm": 19.63165283203125,
|
|
"learning_rate": 4.559190140057428e-08,
|
|
"logits/chosen": 0.7387676239013672,
|
|
"logits/rejected": 0.7273943424224854,
|
|
"logps/chosen": -122.2755126953125,
|
|
"logps/ref_chosen": -59.313262939453125,
|
|
"logps/ref_rejected": -64.73631286621094,
|
|
"logps/rejected": -163.80419921875,
|
|
"loss": 1.0279,
|
|
"margin_dpo/margin_mean": 36.10563659667969,
|
|
"margin_dpo/margin_std": 49.768165588378906,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8269085411942555,
|
|
"fcm_dpo/beta": 0.016392838209867477,
|
|
"fcm_dpo/delta": -0.05539948120713234,
|
|
"fcm_dpo/margin": 39.75334930419922,
|
|
"fcm_dpo/q_t": 0.3588758111000061,
|
|
"grad_norm": 18.761903762817383,
|
|
"learning_rate": 4.483350854765672e-08,
|
|
"logits/chosen": 0.632850170135498,
|
|
"logits/rejected": 0.56563401222229,
|
|
"logps/chosen": -113.59066772460938,
|
|
"logps/ref_chosen": -54.97674560546875,
|
|
"logps/ref_rejected": -75.35922241210938,
|
|
"logps/rejected": -173.7264862060547,
|
|
"loss": 0.9989,
|
|
"margin_dpo/margin_mean": 39.75334930419922,
|
|
"margin_dpo/margin_std": 51.920860290527344,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8284202569916855,
|
|
"fcm_dpo/beta": 0.016840200871229172,
|
|
"fcm_dpo/delta": 0.14554663002490997,
|
|
"fcm_dpo/margin": 27.277130126953125,
|
|
"fcm_dpo/q_t": 0.4067327380180359,
|
|
"grad_norm": 18.979921340942383,
|
|
"learning_rate": 4.4080854642541826e-08,
|
|
"logits/chosen": 0.5610533952713013,
|
|
"logits/rejected": 0.49250972270965576,
|
|
"logps/chosen": -131.07614135742188,
|
|
"logps/ref_chosen": -63.21067428588867,
|
|
"logps/ref_rejected": -81.23347473144531,
|
|
"logps/rejected": -176.37606811523438,
|
|
"loss": 1.1475,
|
|
"margin_dpo/margin_mean": 27.277130126953125,
|
|
"margin_dpo/margin_std": 51.35624694824219,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8299319727891157,
|
|
"fcm_dpo/beta": 0.017113741487264633,
|
|
"fcm_dpo/delta": 0.1011405736207962,
|
|
"fcm_dpo/margin": 29.445924758911133,
|
|
"fcm_dpo/q_t": 0.394071489572525,
|
|
"grad_norm": 17.566940307617188,
|
|
"learning_rate": 4.333396073857723e-08,
|
|
"logits/chosen": 0.7469007968902588,
|
|
"logits/rejected": 0.6796466112136841,
|
|
"logps/chosen": -129.56695556640625,
|
|
"logps/ref_chosen": -64.27351379394531,
|
|
"logps/ref_rejected": -92.31663513183594,
|
|
"logps/rejected": -187.05599975585938,
|
|
"loss": 1.1245,
|
|
"margin_dpo/margin_mean": 29.4459228515625,
|
|
"margin_dpo/margin_std": 52.37643051147461,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.017646487802267075,
|
|
"fcm_dpo/delta": 0.21381746232509613,
|
|
"fcm_dpo/margin": 22.392440795898438,
|
|
"fcm_dpo/q_t": 0.41807249188423157,
|
|
"grad_norm": 22.020166397094727,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 0.6913200616836548,
|
|
"logits/rejected": 0.6597945690155029,
|
|
"logps/chosen": -130.39352416992188,
|
|
"logps/ref_chosen": -56.230438232421875,
|
|
"logps/ref_rejected": -62.59788513183594,
|
|
"logps/rejected": -159.15341186523438,
|
|
"loss": 1.1887,
|
|
"margin_dpo/margin_mean": 22.392440795898438,
|
|
"margin_dpo/margin_std": 47.07420349121094,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8329554043839759,
|
|
"fcm_dpo/beta": 0.01831832528114319,
|
|
"fcm_dpo/delta": 0.10014162957668304,
|
|
"fcm_dpo/margin": 27.50286865234375,
|
|
"fcm_dpo/q_t": 0.3956736922264099,
|
|
"grad_norm": 19.295076370239258,
|
|
"learning_rate": 4.1857536341307176e-08,
|
|
"logits/chosen": 0.7169756889343262,
|
|
"logits/rejected": 0.6815935373306274,
|
|
"logps/chosen": -137.21009826660156,
|
|
"logps/ref_chosen": -67.74720764160156,
|
|
"logps/ref_rejected": -87.04285430908203,
|
|
"logps/rejected": -184.00860595703125,
|
|
"loss": 1.0868,
|
|
"margin_dpo/margin_mean": 27.502866744995117,
|
|
"margin_dpo/margin_std": 44.02717590332031,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8344671201814059,
|
|
"fcm_dpo/beta": 0.018308354541659355,
|
|
"fcm_dpo/delta": -0.051000580191612244,
|
|
"fcm_dpo/margin": 35.34024429321289,
|
|
"fcm_dpo/q_t": 0.3574460744857788,
|
|
"grad_norm": 22.781795501708984,
|
|
"learning_rate": 4.112804714676593e-08,
|
|
"logits/chosen": 0.659945011138916,
|
|
"logits/rejected": 0.6055397391319275,
|
|
"logps/chosen": -127.8887939453125,
|
|
"logps/ref_chosen": -62.92625427246094,
|
|
"logps/ref_rejected": -82.98365783691406,
|
|
"logps/rejected": -183.2864227294922,
|
|
"loss": 1.0082,
|
|
"margin_dpo/margin_mean": 35.34024429321289,
|
|
"margin_dpo/margin_std": 46.828224182128906,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8359788359788359,
|
|
"fcm_dpo/beta": 0.01820247806608677,
|
|
"fcm_dpo/delta": 0.038443662226200104,
|
|
"fcm_dpo/margin": 30.970130920410156,
|
|
"fcm_dpo/q_t": 0.3889210820198059,
|
|
"grad_norm": 23.225704193115234,
|
|
"learning_rate": 4.0404400549748144e-08,
|
|
"logits/chosen": 0.6440668106079102,
|
|
"logits/rejected": 0.5367642045021057,
|
|
"logps/chosen": -128.24612426757812,
|
|
"logps/ref_chosen": -56.038490295410156,
|
|
"logps/ref_rejected": -84.48454284667969,
|
|
"logps/rejected": -187.6623077392578,
|
|
"loss": 1.1264,
|
|
"margin_dpo/margin_mean": 30.97012710571289,
|
|
"margin_dpo/margin_std": 56.776390075683594,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8374905517762661,
|
|
"fcm_dpo/beta": 0.018095003440976143,
|
|
"fcm_dpo/delta": -0.06582193076610565,
|
|
"fcm_dpo/margin": 36.556236267089844,
|
|
"fcm_dpo/q_t": 0.3641040623188019,
|
|
"grad_norm": 19.767202377319336,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 0.6238239407539368,
|
|
"logits/rejected": 0.6004227995872498,
|
|
"logps/chosen": -129.68283081054688,
|
|
"logps/ref_chosen": -64.53059387207031,
|
|
"logps/ref_rejected": -71.2155990600586,
|
|
"logps/rejected": -172.924072265625,
|
|
"loss": 1.0152,
|
|
"margin_dpo/margin_mean": 36.55623245239258,
|
|
"margin_dpo/margin_std": 51.607032775878906,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.018217993900179863,
|
|
"fcm_dpo/delta": -0.023561611771583557,
|
|
"fcm_dpo/margin": 33.95972442626953,
|
|
"fcm_dpo/q_t": 0.37040045857429504,
|
|
"grad_norm": 20.464981079101562,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 0.6768993139266968,
|
|
"logits/rejected": 0.6472059488296509,
|
|
"logps/chosen": -139.9117431640625,
|
|
"logps/ref_chosen": -66.65191650390625,
|
|
"logps/ref_rejected": -68.6667251586914,
|
|
"logps/rejected": -175.88629150390625,
|
|
"loss": 1.0948,
|
|
"margin_dpo/margin_mean": 33.95972442626953,
|
|
"margin_dpo/margin_std": 54.30375671386719,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8405139833711263,
|
|
"fcm_dpo/beta": 0.018018240109086037,
|
|
"fcm_dpo/delta": 0.0479285754263401,
|
|
"fcm_dpo/margin": 30.78514289855957,
|
|
"fcm_dpo/q_t": 0.3913532793521881,
|
|
"grad_norm": 17.889385223388672,
|
|
"learning_rate": 3.826871794280192e-08,
|
|
"logits/chosen": 0.7341389656066895,
|
|
"logits/rejected": 0.6801250576972961,
|
|
"logps/chosen": -126.58598327636719,
|
|
"logps/ref_chosen": -52.832366943359375,
|
|
"logps/ref_rejected": -64.49044036865234,
|
|
"logps/rejected": -169.02919006347656,
|
|
"loss": 1.1322,
|
|
"margin_dpo/margin_mean": 30.78514289855957,
|
|
"margin_dpo/margin_std": 56.485740661621094,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8420256991685563,
|
|
"fcm_dpo/beta": 0.017613768577575684,
|
|
"fcm_dpo/delta": -0.1427953541278839,
|
|
"fcm_dpo/margin": 41.473854064941406,
|
|
"fcm_dpo/q_t": 0.3491743505001068,
|
|
"grad_norm": 18.236114501953125,
|
|
"learning_rate": 3.756864251262143e-08,
|
|
"logits/chosen": 0.7962363362312317,
|
|
"logits/rejected": 0.7152137160301208,
|
|
"logps/chosen": -127.98336029052734,
|
|
"logps/ref_chosen": -55.03598403930664,
|
|
"logps/ref_rejected": -75.80644989013672,
|
|
"logps/rejected": -190.22769165039062,
|
|
"loss": 0.9416,
|
|
"margin_dpo/margin_mean": 41.473854064941406,
|
|
"margin_dpo/margin_std": 48.5777587890625,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8435374149659864,
|
|
"fcm_dpo/beta": 0.01709606498479843,
|
|
"fcm_dpo/delta": -0.16726849973201752,
|
|
"fcm_dpo/margin": 44.07666778564453,
|
|
"fcm_dpo/q_t": 0.34456583857536316,
|
|
"grad_norm": 14.090262413024902,
|
|
"learning_rate": 3.687450924416341e-08,
|
|
"logits/chosen": 0.7165727615356445,
|
|
"logits/rejected": 0.6611793637275696,
|
|
"logps/chosen": -125.71041107177734,
|
|
"logps/ref_chosen": -63.226348876953125,
|
|
"logps/ref_rejected": -91.46881866455078,
|
|
"logps/rejected": -198.02957153320312,
|
|
"loss": 0.9276,
|
|
"margin_dpo/margin_mean": 44.07666778564453,
|
|
"margin_dpo/margin_std": 50.671913146972656,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8450491307634165,
|
|
"fcm_dpo/beta": 0.016778361052274704,
|
|
"fcm_dpo/delta": -0.028636924922466278,
|
|
"fcm_dpo/margin": 37.217071533203125,
|
|
"fcm_dpo/q_t": 0.3728501796722412,
|
|
"grad_norm": 15.830818176269531,
|
|
"learning_rate": 3.6186337553827743e-08,
|
|
"logits/chosen": 0.6524174213409424,
|
|
"logits/rejected": 0.5823867321014404,
|
|
"logps/chosen": -130.78497314453125,
|
|
"logps/ref_chosen": -61.521644592285156,
|
|
"logps/ref_rejected": -82.83859252929688,
|
|
"logps/rejected": -189.31900024414062,
|
|
"loss": 1.0403,
|
|
"margin_dpo/margin_mean": 37.217071533203125,
|
|
"margin_dpo/margin_std": 53.64512634277344,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.01705295592546463,
|
|
"fcm_dpo/delta": -0.007127054035663605,
|
|
"fcm_dpo/margin": 35.50212097167969,
|
|
"fcm_dpo/q_t": 0.374332070350647,
|
|
"grad_norm": 16.82029914855957,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 0.6676818132400513,
|
|
"logits/rejected": 0.6309837102890015,
|
|
"logps/chosen": -132.1116943359375,
|
|
"logps/ref_chosen": -60.64122009277344,
|
|
"logps/ref_rejected": -78.75474548339844,
|
|
"logps/rejected": -185.72735595703125,
|
|
"loss": 1.0186,
|
|
"margin_dpo/margin_mean": 35.50212097167969,
|
|
"margin_dpo/margin_std": 47.463401794433594,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8480725623582767,
|
|
"fcm_dpo/beta": 0.016799356788396835,
|
|
"fcm_dpo/delta": -0.03773471340537071,
|
|
"fcm_dpo/margin": 37.817176818847656,
|
|
"fcm_dpo/q_t": 0.37105944752693176,
|
|
"grad_norm": 15.695465087890625,
|
|
"learning_rate": 3.482795573879241e-08,
|
|
"logits/chosen": 0.680199921131134,
|
|
"logits/rejected": 0.6491532325744629,
|
|
"logps/chosen": -126.62320709228516,
|
|
"logps/ref_chosen": -62.49859619140625,
|
|
"logps/ref_rejected": -78.72064208984375,
|
|
"logps/rejected": -180.66241455078125,
|
|
"loss": 1.026,
|
|
"margin_dpo/margin_mean": 37.817176818847656,
|
|
"margin_dpo/margin_std": 53.56718444824219,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8495842781557067,
|
|
"fcm_dpo/beta": 0.016474956646561623,
|
|
"fcm_dpo/delta": -0.0877566933631897,
|
|
"fcm_dpo/margin": 41.32499313354492,
|
|
"fcm_dpo/q_t": 0.3588718771934509,
|
|
"grad_norm": 19.099084854125977,
|
|
"learning_rate": 3.415778361095226e-08,
|
|
"logits/chosen": 0.6784518957138062,
|
|
"logits/rejected": 0.6398619413375854,
|
|
"logps/chosen": -143.8610382080078,
|
|
"logps/ref_chosen": -74.78173828125,
|
|
"logps/ref_rejected": -92.63499450683594,
|
|
"logps/rejected": -203.03929138183594,
|
|
"loss": 0.9801,
|
|
"margin_dpo/margin_mean": 41.324989318847656,
|
|
"margin_dpo/margin_std": 52.4063606262207,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8510959939531368,
|
|
"fcm_dpo/beta": 0.01647357828915119,
|
|
"fcm_dpo/delta": -0.01924915984272957,
|
|
"fcm_dpo/margin": 37.51539993286133,
|
|
"fcm_dpo/q_t": 0.3716714382171631,
|
|
"grad_norm": 21.878982543945312,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": 0.7664488554000854,
|
|
"logits/rejected": 0.7137491106987,
|
|
"logps/chosen": -111.7672348022461,
|
|
"logps/ref_chosen": -50.19850158691406,
|
|
"logps/ref_rejected": -66.76687622070312,
|
|
"logps/rejected": -165.8509979248047,
|
|
"loss": 1.0639,
|
|
"margin_dpo/margin_mean": 37.51539611816406,
|
|
"margin_dpo/margin_std": 58.186012268066406,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8526077097505669,
|
|
"fcm_dpo/beta": 0.016082683578133583,
|
|
"fcm_dpo/delta": -0.14546218514442444,
|
|
"fcm_dpo/margin": 45.69166946411133,
|
|
"fcm_dpo/q_t": 0.3453413248062134,
|
|
"grad_norm": 14.259784698486328,
|
|
"learning_rate": 3.283557064487785e-08,
|
|
"logits/chosen": 0.6534693241119385,
|
|
"logits/rejected": 0.6237531304359436,
|
|
"logps/chosen": -116.24386596679688,
|
|
"logps/ref_chosen": -55.7408447265625,
|
|
"logps/ref_rejected": -74.82323455810547,
|
|
"logps/rejected": -181.01791381835938,
|
|
"loss": 0.9615,
|
|
"margin_dpo/margin_mean": 45.69166564941406,
|
|
"margin_dpo/margin_std": 56.90264892578125,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.016341106966137886,
|
|
"fcm_dpo/delta": 0.12114652246236801,
|
|
"fcm_dpo/margin": 29.57483673095703,
|
|
"fcm_dpo/q_t": 0.3988476097583771,
|
|
"grad_norm": 17.22108268737793,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 0.7081042528152466,
|
|
"logits/rejected": 0.6530278921127319,
|
|
"logps/chosen": -135.77761840820312,
|
|
"logps/ref_chosen": -58.33738327026367,
|
|
"logps/ref_rejected": -78.31776428222656,
|
|
"logps/rejected": -185.33282470703125,
|
|
"loss": 1.1058,
|
|
"margin_dpo/margin_mean": 29.57483673095703,
|
|
"margin_dpo/margin_std": 49.10877990722656,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8556311413454271,
|
|
"fcm_dpo/beta": 0.016569407656788826,
|
|
"fcm_dpo/delta": 0.06318099051713943,
|
|
"fcm_dpo/margin": 32.56593704223633,
|
|
"fcm_dpo/q_t": 0.3885696232318878,
|
|
"grad_norm": 22.122758865356445,
|
|
"learning_rate": 3.1537655732553764e-08,
|
|
"logits/chosen": 0.6910306215286255,
|
|
"logits/rejected": 0.671326756477356,
|
|
"logps/chosen": -139.0428009033203,
|
|
"logps/ref_chosen": -71.22373962402344,
|
|
"logps/ref_rejected": -71.11601257324219,
|
|
"logps/rejected": -171.50100708007812,
|
|
"loss": 1.1378,
|
|
"margin_dpo/margin_mean": 32.56593322753906,
|
|
"margin_dpo/margin_std": 60.11833190917969,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"fcm_dpo/beta": 0.01638803631067276,
|
|
"fcm_dpo/delta": -0.00884208083152771,
|
|
"fcm_dpo/margin": 37.02666473388672,
|
|
"fcm_dpo/q_t": 0.3724210262298584,
|
|
"grad_norm": 14.405105590820312,
|
|
"learning_rate": 3.089785553471233e-08,
|
|
"logits/chosen": 0.6986731886863708,
|
|
"logits/rejected": 0.6037529706954956,
|
|
"logps/chosen": -119.60701751708984,
|
|
"logps/ref_chosen": -52.669273376464844,
|
|
"logps/ref_rejected": -74.34785461425781,
|
|
"logps/rejected": -178.312255859375,
|
|
"loss": 1.0172,
|
|
"margin_dpo/margin_mean": 37.02666473388672,
|
|
"margin_dpo/margin_std": 49.046302795410156,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8586545729402872,
|
|
"fcm_dpo/beta": 0.016353819519281387,
|
|
"fcm_dpo/delta": -0.10809920728206635,
|
|
"fcm_dpo/margin": 42.83213806152344,
|
|
"fcm_dpo/q_t": 0.3527492880821228,
|
|
"grad_norm": 15.531497955322266,
|
|
"learning_rate": 3.026418409484513e-08,
|
|
"logits/chosen": 0.6940714120864868,
|
|
"logits/rejected": 0.6082979440689087,
|
|
"logps/chosen": -114.17964172363281,
|
|
"logps/ref_chosen": -52.178001403808594,
|
|
"logps/ref_rejected": -85.8277587890625,
|
|
"logps/rejected": -190.6615447998047,
|
|
"loss": 0.9465,
|
|
"margin_dpo/margin_mean": 42.83213806152344,
|
|
"margin_dpo/margin_std": 48.710716247558594,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8601662887377173,
|
|
"fcm_dpo/beta": 0.016410736367106438,
|
|
"fcm_dpo/delta": 0.16210441291332245,
|
|
"fcm_dpo/margin": 27.106124877929688,
|
|
"fcm_dpo/q_t": 0.4084968566894531,
|
|
"grad_norm": 17.497173309326172,
|
|
"learning_rate": 2.963665913810451e-08,
|
|
"logits/chosen": 0.605643630027771,
|
|
"logits/rejected": 0.5765886306762695,
|
|
"logps/chosen": -133.26766967773438,
|
|
"logps/ref_chosen": -62.649261474609375,
|
|
"logps/ref_rejected": -75.4298324584961,
|
|
"logps/rejected": -173.15435791015625,
|
|
"loss": 1.1579,
|
|
"margin_dpo/margin_mean": 27.106124877929688,
|
|
"margin_dpo/margin_std": 51.35100173950195,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.016275301575660706,
|
|
"fcm_dpo/delta": -0.22408686578273773,
|
|
"fcm_dpo/margin": 49.55414581298828,
|
|
"fcm_dpo/q_t": 0.3300275206565857,
|
|
"grad_norm": 14.673301696777344,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 0.6282495260238647,
|
|
"logits/rejected": 0.5449954271316528,
|
|
"logps/chosen": -112.49989318847656,
|
|
"logps/ref_chosen": -50.04179382324219,
|
|
"logps/ref_rejected": -78.27146911621094,
|
|
"logps/rejected": -190.28370666503906,
|
|
"loss": 0.8892,
|
|
"margin_dpo/margin_mean": 49.55415344238281,
|
|
"margin_dpo/margin_std": 51.157962799072266,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8631897203325775,
|
|
"fcm_dpo/beta": 0.016408953815698624,
|
|
"fcm_dpo/delta": 0.1358412206172943,
|
|
"fcm_dpo/margin": 28.562528610229492,
|
|
"fcm_dpo/q_t": 0.3967515230178833,
|
|
"grad_norm": 16.442455291748047,
|
|
"learning_rate": 2.840011871446962e-08,
|
|
"logits/chosen": 0.6660867929458618,
|
|
"logits/rejected": 0.6316110491752625,
|
|
"logps/chosen": -123.6998291015625,
|
|
"logps/ref_chosen": -53.65681457519531,
|
|
"logps/ref_rejected": -66.13298034667969,
|
|
"logps/rejected": -164.738525390625,
|
|
"loss": 1.1327,
|
|
"margin_dpo/margin_mean": 28.562530517578125,
|
|
"margin_dpo/margin_std": 49.88398742675781,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8647014361300076,
|
|
"fcm_dpo/beta": 0.01645139418542385,
|
|
"fcm_dpo/delta": 0.0013835076242685318,
|
|
"fcm_dpo/margin": 36.385189056396484,
|
|
"fcm_dpo/q_t": 0.37128400802612305,
|
|
"grad_norm": 17.820674896240234,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 0.68881756067276,
|
|
"logits/rejected": 0.7318768501281738,
|
|
"logps/chosen": -141.16531372070312,
|
|
"logps/ref_chosen": -74.81792449951172,
|
|
"logps/ref_rejected": -65.88681030273438,
|
|
"logps/rejected": -168.619384765625,
|
|
"loss": 0.9925,
|
|
"margin_dpo/margin_mean": 36.385189056396484,
|
|
"margin_dpo/margin_std": 44.444793701171875,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8662131519274376,
|
|
"fcm_dpo/beta": 0.016566410660743713,
|
|
"fcm_dpo/delta": 0.08530230820178986,
|
|
"fcm_dpo/margin": 31.338947296142578,
|
|
"fcm_dpo/q_t": 0.393259197473526,
|
|
"grad_norm": 20.416149139404297,
|
|
"learning_rate": 2.718837261761528e-08,
|
|
"logits/chosen": 0.6766190528869629,
|
|
"logits/rejected": 0.6312921643257141,
|
|
"logps/chosen": -143.67018127441406,
|
|
"logps/ref_chosen": -68.72564697265625,
|
|
"logps/ref_rejected": -88.16201782226562,
|
|
"logps/rejected": -194.44549560546875,
|
|
"loss": 1.1457,
|
|
"margin_dpo/margin_mean": 31.338947296142578,
|
|
"margin_dpo/margin_std": 59.51763916015625,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8677248677248677,
|
|
"fcm_dpo/beta": 0.016609128564596176,
|
|
"fcm_dpo/delta": -0.07822871953248978,
|
|
"fcm_dpo/margin": 40.49908447265625,
|
|
"fcm_dpo/q_t": 0.3569110631942749,
|
|
"grad_norm": 14.132019996643066,
|
|
"learning_rate": 2.659183991914696e-08,
|
|
"logits/chosen": 0.7488802671432495,
|
|
"logits/rejected": 0.6764457821846008,
|
|
"logps/chosen": -123.04143524169922,
|
|
"logps/ref_chosen": -56.31340026855469,
|
|
"logps/ref_rejected": -83.91553497314453,
|
|
"logps/rejected": -191.14266967773438,
|
|
"loss": 0.951,
|
|
"margin_dpo/margin_mean": 40.49908447265625,
|
|
"margin_dpo/margin_std": 45.910301208496094,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.016653649508953094,
|
|
"fcm_dpo/delta": 0.15081343054771423,
|
|
"fcm_dpo/margin": 27.361234664916992,
|
|
"fcm_dpo/q_t": 0.4088207185268402,
|
|
"grad_norm": 18.94614601135254,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 0.7127535343170166,
|
|
"logits/rejected": 0.632762610912323,
|
|
"logps/chosen": -132.92755126953125,
|
|
"logps/ref_chosen": -64.5841293334961,
|
|
"logps/ref_rejected": -93.47034454345703,
|
|
"logps/rejected": -189.1750030517578,
|
|
"loss": 1.1885,
|
|
"margin_dpo/margin_mean": 27.361234664916992,
|
|
"margin_dpo/margin_std": 56.532806396484375,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8707482993197279,
|
|
"fcm_dpo/beta": 0.01663918048143387,
|
|
"fcm_dpo/delta": -0.0805702805519104,
|
|
"fcm_dpo/margin": 40.50676727294922,
|
|
"fcm_dpo/q_t": 0.3621788024902344,
|
|
"grad_norm": 15.60090446472168,
|
|
"learning_rate": 2.5417538653170754e-08,
|
|
"logits/chosen": 0.702485203742981,
|
|
"logits/rejected": 0.597660481929779,
|
|
"logps/chosen": -113.17729187011719,
|
|
"logps/ref_chosen": -53.28052520751953,
|
|
"logps/ref_rejected": -84.2000503540039,
|
|
"logps/rejected": -184.60357666015625,
|
|
"loss": 1.0073,
|
|
"margin_dpo/margin_mean": 40.50676727294922,
|
|
"margin_dpo/margin_std": 55.57630920410156,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.872260015117158,
|
|
"fcm_dpo/beta": 0.0168609656393528,
|
|
"fcm_dpo/delta": 0.091962069272995,
|
|
"fcm_dpo/margin": 30.416515350341797,
|
|
"fcm_dpo/q_t": 0.39214855432510376,
|
|
"grad_norm": 16.009185791015625,
|
|
"learning_rate": 2.4839802933393607e-08,
|
|
"logits/chosen": 0.6752563714981079,
|
|
"logits/rejected": 0.6577494144439697,
|
|
"logps/chosen": -128.01956176757812,
|
|
"logps/ref_chosen": -62.32468795776367,
|
|
"logps/ref_rejected": -67.300537109375,
|
|
"logps/rejected": -163.41192626953125,
|
|
"loss": 1.1286,
|
|
"margin_dpo/margin_mean": 30.416515350341797,
|
|
"margin_dpo/margin_std": 54.648590087890625,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.873771730914588,
|
|
"fcm_dpo/beta": 0.01736040972173214,
|
|
"fcm_dpo/delta": 0.17633679509162903,
|
|
"fcm_dpo/margin": 24.863304138183594,
|
|
"fcm_dpo/q_t": 0.4127289652824402,
|
|
"grad_norm": 18.371273040771484,
|
|
"learning_rate": 2.4268365428344733e-08,
|
|
"logits/chosen": 0.7377026081085205,
|
|
"logits/rejected": 0.7115747928619385,
|
|
"logps/chosen": -122.48710632324219,
|
|
"logps/ref_chosen": -56.65557861328125,
|
|
"logps/ref_rejected": -68.21835327148438,
|
|
"logps/rejected": -158.91317749023438,
|
|
"loss": 1.1781,
|
|
"margin_dpo/margin_mean": 24.863304138183594,
|
|
"margin_dpo/margin_std": 50.95249938964844,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8752834467120182,
|
|
"fcm_dpo/beta": 0.017089959233999252,
|
|
"fcm_dpo/delta": -0.18611499667167664,
|
|
"fcm_dpo/margin": 45.14439392089844,
|
|
"fcm_dpo/q_t": 0.3376918435096741,
|
|
"grad_norm": 14.985367774963379,
|
|
"learning_rate": 2.3703242122359357e-08,
|
|
"logits/chosen": 0.6345067024230957,
|
|
"logits/rejected": 0.6059737205505371,
|
|
"logps/chosen": -125.25282287597656,
|
|
"logps/ref_chosen": -56.809661865234375,
|
|
"logps/ref_rejected": -68.09613037109375,
|
|
"logps/rejected": -181.68368530273438,
|
|
"loss": 0.9086,
|
|
"margin_dpo/margin_mean": 45.14439392089844,
|
|
"margin_dpo/margin_std": 49.639869689941406,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.01717275008559227,
|
|
"fcm_dpo/delta": 0.05046956241130829,
|
|
"fcm_dpo/margin": 32.10778045654297,
|
|
"fcm_dpo/q_t": 0.3875921666622162,
|
|
"grad_norm": 18.05984878540039,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 0.650100827217102,
|
|
"logits/rejected": 0.5966248512268066,
|
|
"logps/chosen": -124.11175537109375,
|
|
"logps/ref_chosen": -57.70011520385742,
|
|
"logps/ref_rejected": -77.90664672851562,
|
|
"logps/rejected": -176.42605590820312,
|
|
"loss": 1.1186,
|
|
"margin_dpo/margin_mean": 32.1077766418457,
|
|
"margin_dpo/margin_std": 56.66058349609375,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8783068783068783,
|
|
"fcm_dpo/beta": 0.017328936606645584,
|
|
"fcm_dpo/delta": 0.1224561482667923,
|
|
"fcm_dpo/margin": 27.898914337158203,
|
|
"fcm_dpo/q_t": 0.40121370553970337,
|
|
"grad_norm": 20.932573318481445,
|
|
"learning_rate": 2.259200116137039e-08,
|
|
"logits/chosen": 0.7106508612632751,
|
|
"logits/rejected": 0.6442577838897705,
|
|
"logps/chosen": -135.62893676757812,
|
|
"logps/ref_chosen": -59.332359313964844,
|
|
"logps/ref_rejected": -83.64482116699219,
|
|
"logps/rejected": -187.84033203125,
|
|
"loss": 1.1397,
|
|
"margin_dpo/margin_mean": 27.898914337158203,
|
|
"margin_dpo/margin_std": 51.49224853515625,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8798185941043084,
|
|
"fcm_dpo/beta": 0.017463190481066704,
|
|
"fcm_dpo/delta": -0.039771128445863724,
|
|
"fcm_dpo/margin": 36.49109649658203,
|
|
"fcm_dpo/q_t": 0.367986798286438,
|
|
"grad_norm": 18.59633445739746,
|
|
"learning_rate": 2.204591459016525e-08,
|
|
"logits/chosen": 0.7187392711639404,
|
|
"logits/rejected": 0.7509971857070923,
|
|
"logps/chosen": -130.5426025390625,
|
|
"logps/ref_chosen": -64.16285705566406,
|
|
"logps/ref_rejected": -58.632896423339844,
|
|
"logps/rejected": -161.50375366210938,
|
|
"loss": 1.0385,
|
|
"margin_dpo/margin_mean": 36.49109649658203,
|
|
"margin_dpo/margin_std": 53.77030944824219,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.8813303099017384,
|
|
"fcm_dpo/beta": 0.017327800393104553,
|
|
"fcm_dpo/delta": -0.0331776961684227,
|
|
"fcm_dpo/margin": 36.420982360839844,
|
|
"fcm_dpo/q_t": 0.3738207221031189,
|
|
"grad_norm": 21.699419021606445,
|
|
"learning_rate": 2.1506204384751064e-08,
|
|
"logits/chosen": 0.792787492275238,
|
|
"logits/rejected": 0.6811857223510742,
|
|
"logps/chosen": -119.08982849121094,
|
|
"logps/ref_chosen": -51.87239456176758,
|
|
"logps/ref_rejected": -83.86331176757812,
|
|
"logps/rejected": -187.50172424316406,
|
|
"loss": 1.094,
|
|
"margin_dpo/margin_mean": 36.42098617553711,
|
|
"margin_dpo/margin_std": 61.2724609375,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8828420256991686,
|
|
"fcm_dpo/beta": 0.017169862985610962,
|
|
"fcm_dpo/delta": 0.004221245646476746,
|
|
"fcm_dpo/margin": 34.651824951171875,
|
|
"fcm_dpo/q_t": 0.3810550570487976,
|
|
"grad_norm": 18.9495849609375,
|
|
"learning_rate": 2.09728856419826e-08,
|
|
"logits/chosen": 0.7909083366394043,
|
|
"logits/rejected": 0.6847249865531921,
|
|
"logps/chosen": -106.61726379394531,
|
|
"logps/ref_chosen": -46.571388244628906,
|
|
"logps/ref_rejected": -80.67969512939453,
|
|
"logps/rejected": -175.3773956298828,
|
|
"loss": 1.113,
|
|
"margin_dpo/margin_mean": 34.65182876586914,
|
|
"margin_dpo/margin_std": 60.08186340332031,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.017794519662857056,
|
|
"fcm_dpo/delta": 0.14609597623348236,
|
|
"fcm_dpo/margin": 25.79193878173828,
|
|
"fcm_dpo/q_t": 0.4023086428642273,
|
|
"grad_norm": 16.919166564941406,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 0.6519303321838379,
|
|
"logits/rejected": 0.6100852489471436,
|
|
"logps/chosen": -127.17724609375,
|
|
"logps/ref_chosen": -58.124534606933594,
|
|
"logps/ref_rejected": -79.00538635253906,
|
|
"logps/rejected": -173.85003662109375,
|
|
"loss": 1.1695,
|
|
"margin_dpo/margin_mean": 25.79193687438965,
|
|
"margin_dpo/margin_std": 51.46417236328125,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8858654572940288,
|
|
"fcm_dpo/beta": 0.017533529549837112,
|
|
"fcm_dpo/delta": -0.07861245423555374,
|
|
"fcm_dpo/margin": 38.363807678222656,
|
|
"fcm_dpo/q_t": 0.35704219341278076,
|
|
"grad_norm": 19.74669647216797,
|
|
"learning_rate": 1.9925482037469187e-08,
|
|
"logits/chosen": 0.7134085297584534,
|
|
"logits/rejected": 0.6633602380752563,
|
|
"logps/chosen": -121.17387390136719,
|
|
"logps/ref_chosen": -54.10163879394531,
|
|
"logps/ref_rejected": -63.72113037109375,
|
|
"logps/rejected": -169.15716552734375,
|
|
"loss": 0.9595,
|
|
"margin_dpo/margin_mean": 38.363807678222656,
|
|
"margin_dpo/margin_std": 45.4287223815918,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8873771730914588,
|
|
"fcm_dpo/beta": 0.017398815602064133,
|
|
"fcm_dpo/delta": -0.05770276114344597,
|
|
"fcm_dpo/margin": 37.58617401123047,
|
|
"fcm_dpo/q_t": 0.36453670263290405,
|
|
"grad_norm": 16.288728713989258,
|
|
"learning_rate": 1.9411426473854687e-08,
|
|
"logits/chosen": 0.7234928011894226,
|
|
"logits/rejected": 0.7105797529220581,
|
|
"logps/chosen": -127.94309997558594,
|
|
"logps/ref_chosen": -63.41719436645508,
|
|
"logps/ref_rejected": -63.47003936767578,
|
|
"logps/rejected": -165.58212280273438,
|
|
"loss": 1.0827,
|
|
"margin_dpo/margin_mean": 37.58617401123047,
|
|
"margin_dpo/margin_std": 61.84636306762695,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"fcm_dpo/beta": 0.017139676958322525,
|
|
"fcm_dpo/delta": -0.0592581145465374,
|
|
"fcm_dpo/margin": 38.221168518066406,
|
|
"fcm_dpo/q_t": 0.3644402027130127,
|
|
"grad_norm": 18.18668556213379,
|
|
"learning_rate": 1.890382096832699e-08,
|
|
"logits/chosen": 0.726331353187561,
|
|
"logits/rejected": 0.6786773800849915,
|
|
"logps/chosen": -131.14239501953125,
|
|
"logps/ref_chosen": -62.20103454589844,
|
|
"logps/ref_rejected": -82.10249328613281,
|
|
"logps/rejected": -189.26502990722656,
|
|
"loss": 1.0204,
|
|
"margin_dpo/margin_mean": 38.221168518066406,
|
|
"margin_dpo/margin_std": 54.02344512939453,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.890400604686319,
|
|
"fcm_dpo/beta": 0.01690484955906868,
|
|
"fcm_dpo/delta": -0.09004709124565125,
|
|
"fcm_dpo/margin": 40.455116271972656,
|
|
"fcm_dpo/q_t": 0.35561931133270264,
|
|
"grad_norm": 18.06881332397461,
|
|
"learning_rate": 1.840267971970344e-08,
|
|
"logits/chosen": 0.6733403205871582,
|
|
"logits/rejected": 0.644467830657959,
|
|
"logps/chosen": -118.9821548461914,
|
|
"logps/ref_chosen": -56.71361541748047,
|
|
"logps/ref_rejected": -76.7366943359375,
|
|
"logps/rejected": -179.46034240722656,
|
|
"loss": 0.9454,
|
|
"margin_dpo/margin_mean": 40.455116271972656,
|
|
"margin_dpo/margin_std": 46.39854431152344,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.01654157042503357,
|
|
"fcm_dpo/delta": -0.12827324867248535,
|
|
"fcm_dpo/margin": 43.478607177734375,
|
|
"fcm_dpo/q_t": 0.3495979905128479,
|
|
"grad_norm": 16.176586151123047,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.6159425973892212,
|
|
"logits/rejected": 0.589844286441803,
|
|
"logps/chosen": -136.01022338867188,
|
|
"logps/ref_chosen": -66.5138168334961,
|
|
"logps/ref_rejected": -85.70820617675781,
|
|
"logps/rejected": -198.6832275390625,
|
|
"loss": 0.9602,
|
|
"margin_dpo/margin_mean": 43.478607177734375,
|
|
"margin_dpo/margin_std": 53.51036834716797,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8934240362811792,
|
|
"fcm_dpo/beta": 0.015954457223415375,
|
|
"fcm_dpo/delta": -0.12046321481466293,
|
|
"fcm_dpo/margin": 44.440185546875,
|
|
"fcm_dpo/q_t": 0.3531830310821533,
|
|
"grad_norm": 19.646526336669922,
|
|
"learning_rate": 1.7419845883949098e-08,
|
|
"logits/chosen": 0.798710823059082,
|
|
"logits/rejected": 0.7333512306213379,
|
|
"logps/chosen": -120.38575744628906,
|
|
"logps/ref_chosen": -60.697181701660156,
|
|
"logps/ref_rejected": -86.12278747558594,
|
|
"logps/rejected": -190.25155639648438,
|
|
"loss": 1.0121,
|
|
"margin_dpo/margin_mean": 44.440185546875,
|
|
"margin_dpo/margin_std": 60.961891174316406,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.8949357520786092,
|
|
"fcm_dpo/beta": 0.015935592353343964,
|
|
"fcm_dpo/delta": 0.02253812551498413,
|
|
"fcm_dpo/margin": 36.251068115234375,
|
|
"fcm_dpo/q_t": 0.3809935748577118,
|
|
"grad_norm": 17.447284698486328,
|
|
"learning_rate": 1.6938180788793556e-08,
|
|
"logits/chosen": 0.7249664664268494,
|
|
"logits/rejected": 0.6130805611610413,
|
|
"logps/chosen": -113.61046600341797,
|
|
"logps/ref_chosen": -51.237327575683594,
|
|
"logps/ref_rejected": -81.60242462158203,
|
|
"logps/rejected": -180.22662353515625,
|
|
"loss": 1.0401,
|
|
"margin_dpo/margin_mean": 36.25107192993164,
|
|
"margin_dpo/margin_std": 51.01573944091797,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8964474678760394,
|
|
"fcm_dpo/beta": 0.016153138130903244,
|
|
"fcm_dpo/delta": 0.04338935390114784,
|
|
"fcm_dpo/margin": 34.608680725097656,
|
|
"fcm_dpo/q_t": 0.3850771486759186,
|
|
"grad_norm": 19.05673599243164,
|
|
"learning_rate": 1.6463034933723336e-08,
|
|
"logits/chosen": 0.6903887987136841,
|
|
"logits/rejected": 0.5974088311195374,
|
|
"logps/chosen": -99.88896179199219,
|
|
"logps/ref_chosen": -42.08000183105469,
|
|
"logps/ref_rejected": -68.47499084472656,
|
|
"logps/rejected": -160.89260864257812,
|
|
"loss": 1.1043,
|
|
"margin_dpo/margin_mean": 34.608680725097656,
|
|
"margin_dpo/margin_std": 58.95058822631836,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8979591836734694,
|
|
"fcm_dpo/beta": 0.01636182889342308,
|
|
"fcm_dpo/delta": 0.08321181684732437,
|
|
"fcm_dpo/margin": 31.851009368896484,
|
|
"fcm_dpo/q_t": 0.38907164335250854,
|
|
"grad_norm": 16.617149353027344,
|
|
"learning_rate": 1.5994421609589385e-08,
|
|
"logits/chosen": 0.6165971755981445,
|
|
"logits/rejected": 0.5995768308639526,
|
|
"logps/chosen": -135.38876342773438,
|
|
"logps/ref_chosen": -63.658668518066406,
|
|
"logps/ref_rejected": -70.35597229003906,
|
|
"logps/rejected": -173.9370880126953,
|
|
"loss": 1.0775,
|
|
"margin_dpo/margin_mean": 31.851009368896484,
|
|
"margin_dpo/margin_std": 49.175872802734375,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.016220130026340485,
|
|
"fcm_dpo/delta": -0.13953274488449097,
|
|
"fcm_dpo/margin": 44.979042053222656,
|
|
"fcm_dpo/q_t": 0.3480144143104553,
|
|
"grad_norm": 15.34145450592041,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 0.7449917793273926,
|
|
"logits/rejected": 0.6512782573699951,
|
|
"logps/chosen": -119.75555419921875,
|
|
"logps/ref_chosen": -56.21875762939453,
|
|
"logps/ref_rejected": -83.95773315429688,
|
|
"logps/rejected": -192.47357177734375,
|
|
"loss": 0.9818,
|
|
"margin_dpo/margin_mean": 44.979042053222656,
|
|
"margin_dpo/margin_std": 58.99134063720703,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9009826152683296,
|
|
"fcm_dpo/beta": 0.016492169350385666,
|
|
"fcm_dpo/delta": 0.2100110501050949,
|
|
"fcm_dpo/margin": 24.183456420898438,
|
|
"fcm_dpo/q_t": 0.42047858238220215,
|
|
"grad_norm": 16.09357261657715,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": 0.6181765794754028,
|
|
"logits/rejected": 0.6303185224533081,
|
|
"logps/chosen": -140.47659301757812,
|
|
"logps/ref_chosen": -68.48088073730469,
|
|
"logps/ref_rejected": -61.732967376708984,
|
|
"logps/rejected": -157.91213989257812,
|
|
"loss": 1.1953,
|
|
"margin_dpo/margin_mean": 24.183456420898438,
|
|
"margin_dpo/margin_std": 52.747528076171875,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.9024943310657596,
|
|
"fcm_dpo/beta": 0.016784945502877235,
|
|
"fcm_dpo/delta": 0.02770313434302807,
|
|
"fcm_dpo/margin": 34.191078186035156,
|
|
"fcm_dpo/q_t": 0.38534241914749146,
|
|
"grad_norm": 16.53261947631836,
|
|
"learning_rate": 1.4627906988186111e-08,
|
|
"logits/chosen": 0.6633099317550659,
|
|
"logits/rejected": 0.6418108940124512,
|
|
"logps/chosen": -106.30998229980469,
|
|
"logps/ref_chosen": -48.85750961303711,
|
|
"logps/ref_rejected": -55.068084716796875,
|
|
"logps/rejected": -146.71163940429688,
|
|
"loss": 1.0651,
|
|
"margin_dpo/margin_mean": 34.191078186035156,
|
|
"margin_dpo/margin_std": 53.32984161376953,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.9040060468631897,
|
|
"fcm_dpo/beta": 0.017436373978853226,
|
|
"fcm_dpo/delta": 0.21474510431289673,
|
|
"fcm_dpo/margin": 22.515594482421875,
|
|
"fcm_dpo/q_t": 0.42177683115005493,
|
|
"grad_norm": 26.964988708496094,
|
|
"learning_rate": 1.4185553036259095e-08,
|
|
"logits/chosen": 0.7077724933624268,
|
|
"logits/rejected": 0.6297961473464966,
|
|
"logps/chosen": -135.16842651367188,
|
|
"logps/ref_chosen": -58.88715362548828,
|
|
"logps/ref_rejected": -81.43145751953125,
|
|
"logps/rejected": -180.22833251953125,
|
|
"loss": 1.2212,
|
|
"margin_dpo/margin_mean": 22.515594482421875,
|
|
"margin_dpo/margin_std": 52.95011520385742,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.9055177626606198,
|
|
"fcm_dpo/beta": 0.017993086948990822,
|
|
"fcm_dpo/delta": 0.1332523375749588,
|
|
"fcm_dpo/margin": 26.235389709472656,
|
|
"fcm_dpo/q_t": 0.4060378074645996,
|
|
"grad_norm": 20.685775756835938,
|
|
"learning_rate": 1.3749795321332885e-08,
|
|
"logits/chosen": 0.774587869644165,
|
|
"logits/rejected": 0.7272125482559204,
|
|
"logps/chosen": -135.90377807617188,
|
|
"logps/ref_chosen": -57.60719299316406,
|
|
"logps/ref_rejected": -71.80469512939453,
|
|
"logps/rejected": -176.336669921875,
|
|
"loss": 1.1719,
|
|
"margin_dpo/margin_mean": 26.235389709472656,
|
|
"margin_dpo/margin_std": 53.39300537109375,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.018428601324558258,
|
|
"fcm_dpo/delta": 0.06073428690433502,
|
|
"fcm_dpo/margin": 29.233776092529297,
|
|
"fcm_dpo/q_t": 0.3916233777999878,
|
|
"grad_norm": 21.98250961303711,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 0.758099377155304,
|
|
"logits/rejected": 0.7081928849220276,
|
|
"logps/chosen": -129.58924865722656,
|
|
"logps/ref_chosen": -58.44231414794922,
|
|
"logps/ref_rejected": -83.64639282226562,
|
|
"logps/rejected": -184.027099609375,
|
|
"loss": 1.1307,
|
|
"margin_dpo/margin_mean": 29.23377799987793,
|
|
"margin_dpo/margin_std": 52.01209259033203,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.018254384398460388,
|
|
"eval_logits/chosen": 0.6983587741851807,
|
|
"eval_logits/rejected": 0.6510134935379028,
|
|
"eval_logps/chosen": -139.107177734375,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -178.12290954589844,
|
|
"eval_loss": 0.5317867994308472,
|
|
"eval_margin_dpo/margin_mean": 34.32622146606445,
|
|
"eval_margin_dpo/margin_std": 53.363624572753906,
|
|
"eval_runtime": 38.0129,
|
|
"eval_samples_per_second": 60.585,
|
|
"eval_steps_per_second": 1.894,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.90854119425548,
|
|
"fcm_dpo/beta": 0.01776060089468956,
|
|
"fcm_dpo/delta": -0.14013047516345978,
|
|
"fcm_dpo/margin": 41.001068115234375,
|
|
"fcm_dpo/q_t": 0.3542864918708801,
|
|
"grad_norm": 16.561017990112305,
|
|
"learning_rate": 1.2898117173950868e-08,
|
|
"logits/chosen": 0.689449667930603,
|
|
"logits/rejected": 0.6153823137283325,
|
|
"logps/chosen": -114.82102966308594,
|
|
"logps/ref_chosen": -55.59432601928711,
|
|
"logps/ref_rejected": -83.68630981445312,
|
|
"logps/rejected": -183.91409301757812,
|
|
"loss": 1.0035,
|
|
"margin_dpo/margin_mean": 41.001068115234375,
|
|
"margin_dpo/margin_std": 56.815673828125,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.91005291005291,
|
|
"fcm_dpo/beta": 0.017588762566447258,
|
|
"fcm_dpo/delta": -0.08487124741077423,
|
|
"fcm_dpo/margin": 38.61279296875,
|
|
"fcm_dpo/q_t": 0.3599042296409607,
|
|
"grad_norm": 16.240304946899414,
|
|
"learning_rate": 1.2482220564763667e-08,
|
|
"logits/chosen": 0.68162602186203,
|
|
"logits/rejected": 0.6481719017028809,
|
|
"logps/chosen": -110.06434631347656,
|
|
"logps/ref_chosen": -56.349185943603516,
|
|
"logps/ref_rejected": -71.9959716796875,
|
|
"logps/rejected": -164.3239288330078,
|
|
"loss": 0.9813,
|
|
"margin_dpo/margin_mean": 38.61279296875,
|
|
"margin_dpo/margin_std": 49.46638107299805,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.9115646258503401,
|
|
"fcm_dpo/beta": 0.017239883542060852,
|
|
"fcm_dpo/delta": -0.0702548399567604,
|
|
"fcm_dpo/margin": 38.59679412841797,
|
|
"fcm_dpo/q_t": 0.36295539140701294,
|
|
"grad_norm": 19.132186889648438,
|
|
"learning_rate": 1.2072967838448051e-08,
|
|
"logits/chosen": 0.6549055576324463,
|
|
"logits/rejected": 0.5953928232192993,
|
|
"logps/chosen": -114.13343048095703,
|
|
"logps/ref_chosen": -53.16838836669922,
|
|
"logps/ref_rejected": -73.8604736328125,
|
|
"logps/rejected": -173.4223175048828,
|
|
"loss": 1.0085,
|
|
"margin_dpo/margin_mean": 38.59679412841797,
|
|
"margin_dpo/margin_std": 53.11018371582031,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.9130763416477702,
|
|
"fcm_dpo/beta": 0.017200354486703873,
|
|
"fcm_dpo/delta": -0.011806067079305649,
|
|
"fcm_dpo/margin": 35.52631378173828,
|
|
"fcm_dpo/q_t": 0.377369225025177,
|
|
"grad_norm": 19.996925354003906,
|
|
"learning_rate": 1.1670370442682459e-08,
|
|
"logits/chosen": 0.6451849937438965,
|
|
"logits/rejected": 0.6497629880905151,
|
|
"logps/chosen": -131.07199096679688,
|
|
"logps/ref_chosen": -72.64942169189453,
|
|
"logps/ref_rejected": -69.8792724609375,
|
|
"logps/rejected": -163.82815551757812,
|
|
"loss": 1.0794,
|
|
"margin_dpo/margin_mean": 35.52631378173828,
|
|
"margin_dpo/margin_std": 58.30902099609375,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.017121510580182076,
|
|
"fcm_dpo/delta": 0.015186280012130737,
|
|
"fcm_dpo/margin": 34.185150146484375,
|
|
"fcm_dpo/q_t": 0.37800726294517517,
|
|
"grad_norm": 18.581581115722656,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 0.7332407236099243,
|
|
"logits/rejected": 0.6724931001663208,
|
|
"logps/chosen": -131.06732177734375,
|
|
"logps/ref_chosen": -61.61284637451172,
|
|
"logps/ref_rejected": -79.34398651123047,
|
|
"logps/rejected": -182.98361206054688,
|
|
"loss": 1.0838,
|
|
"margin_dpo/margin_mean": 34.18514633178711,
|
|
"margin_dpo/margin_std": 55.458526611328125,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9160997732426304,
|
|
"fcm_dpo/beta": 0.017126478254795074,
|
|
"fcm_dpo/delta": -0.07169067859649658,
|
|
"fcm_dpo/margin": 38.930240631103516,
|
|
"fcm_dpo/q_t": 0.36326584219932556,
|
|
"grad_norm": 17.81355094909668,
|
|
"learning_rate": 1.0885186502381016e-08,
|
|
"logits/chosen": 0.697436511516571,
|
|
"logits/rejected": 0.6320433616638184,
|
|
"logps/chosen": -114.82478332519531,
|
|
"logps/ref_chosen": -54.46424102783203,
|
|
"logps/ref_rejected": -79.62708282470703,
|
|
"logps/rejected": -178.91786193847656,
|
|
"loss": 0.9987,
|
|
"margin_dpo/margin_mean": 38.930240631103516,
|
|
"margin_dpo/margin_std": 51.813507080078125,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.9176114890400605,
|
|
"fcm_dpo/beta": 0.016687501221895218,
|
|
"fcm_dpo/delta": -0.03947858512401581,
|
|
"fcm_dpo/margin": 37.99995803833008,
|
|
"fcm_dpo/q_t": 0.3684396743774414,
|
|
"grad_norm": 17.625709533691406,
|
|
"learning_rate": 1.0502621921127774e-08,
|
|
"logits/chosen": 0.6601325273513794,
|
|
"logits/rejected": 0.6314581632614136,
|
|
"logps/chosen": -131.482666015625,
|
|
"logps/ref_chosen": -62.86086654663086,
|
|
"logps/ref_rejected": -72.5501937866211,
|
|
"logps/rejected": -179.17196655273438,
|
|
"loss": 1.0299,
|
|
"margin_dpo/margin_mean": 37.99995422363281,
|
|
"margin_dpo/margin_std": 52.8006706237793,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.9191232048374905,
|
|
"fcm_dpo/beta": 0.017145490273833275,
|
|
"fcm_dpo/delta": 0.1086527556180954,
|
|
"fcm_dpo/margin": 28.957908630371094,
|
|
"fcm_dpo/q_t": 0.3978094458580017,
|
|
"grad_norm": 18.91431999206543,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 0.6649261116981506,
|
|
"logits/rejected": 0.5938790440559387,
|
|
"logps/chosen": -134.17208862304688,
|
|
"logps/ref_chosen": -63.18071746826172,
|
|
"logps/ref_rejected": -99.15888214111328,
|
|
"logps/rejected": -199.10816955566406,
|
|
"loss": 1.1162,
|
|
"margin_dpo/margin_mean": 28.957908630371094,
|
|
"margin_dpo/margin_std": 50.635528564453125,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"fcm_dpo/beta": 0.016784831881523132,
|
|
"fcm_dpo/delta": -0.12502390146255493,
|
|
"fcm_dpo/margin": 42.56886291503906,
|
|
"fcm_dpo/q_t": 0.344668447971344,
|
|
"grad_norm": 13.991439819335938,
|
|
"learning_rate": 9.757601041885694e-09,
|
|
"logits/chosen": 0.7602401971817017,
|
|
"logits/rejected": 0.7267245650291443,
|
|
"logps/chosen": -108.38632202148438,
|
|
"logps/ref_chosen": -48.62322235107422,
|
|
"logps/ref_rejected": -68.28271484375,
|
|
"logps/rejected": -170.61468505859375,
|
|
"loss": 0.9301,
|
|
"margin_dpo/margin_mean": 42.56886291503906,
|
|
"margin_dpo/margin_std": 46.08442687988281,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.016544140875339508,
|
|
"fcm_dpo/delta": -0.06457509100437164,
|
|
"fcm_dpo/margin": 39.86622619628906,
|
|
"fcm_dpo/q_t": 0.3651999235153198,
|
|
"grad_norm": 16.56065559387207,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 0.6606322526931763,
|
|
"logits/rejected": 0.6659835577011108,
|
|
"logps/chosen": -139.08177185058594,
|
|
"logps/ref_chosen": -72.66513061523438,
|
|
"logps/ref_rejected": -87.15310668945312,
|
|
"logps/rejected": -193.43597412109375,
|
|
"loss": 0.9975,
|
|
"margin_dpo/margin_mean": 39.86622619628906,
|
|
"margin_dpo/margin_std": 53.14252471923828,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9236583522297808,
|
|
"fcm_dpo/beta": 0.016713187098503113,
|
|
"fcm_dpo/delta": 0.09823843091726303,
|
|
"fcm_dpo/margin": 30.310468673706055,
|
|
"fcm_dpo/q_t": 0.39233702421188354,
|
|
"grad_norm": 18.44264030456543,
|
|
"learning_rate": 9.03946036001449e-09,
|
|
"logits/chosen": 0.7202863097190857,
|
|
"logits/rejected": 0.6715569496154785,
|
|
"logps/chosen": -110.45277404785156,
|
|
"logps/ref_chosen": -48.30857849121094,
|
|
"logps/ref_rejected": -70.6141128540039,
|
|
"logps/rejected": -163.06878662109375,
|
|
"loss": 1.0841,
|
|
"margin_dpo/margin_mean": 30.310466766357422,
|
|
"margin_dpo/margin_std": 46.933807373046875,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.9251700680272109,
|
|
"fcm_dpo/beta": 0.01676209270954132,
|
|
"fcm_dpo/delta": -0.055530332028865814,
|
|
"fcm_dpo/margin": 38.89222717285156,
|
|
"fcm_dpo/q_t": 0.36272430419921875,
|
|
"grad_norm": 17.055858612060547,
|
|
"learning_rate": 8.690495320571839e-09,
|
|
"logits/chosen": 0.6043254137039185,
|
|
"logits/rejected": 0.5377599000930786,
|
|
"logps/chosen": -130.5507354736328,
|
|
"logps/ref_chosen": -61.23155975341797,
|
|
"logps/ref_rejected": -94.37979888916016,
|
|
"logps/rejected": -202.5911865234375,
|
|
"loss": 1.0225,
|
|
"margin_dpo/margin_mean": 38.89222717285156,
|
|
"margin_dpo/margin_std": 55.477169036865234,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.926681783824641,
|
|
"fcm_dpo/beta": 0.016338884830474854,
|
|
"fcm_dpo/delta": -0.18059828877449036,
|
|
"fcm_dpo/margin": 46.94964599609375,
|
|
"fcm_dpo/q_t": 0.3390156626701355,
|
|
"grad_norm": 14.02810287475586,
|
|
"learning_rate": 8.348280226706722e-09,
|
|
"logits/chosen": 0.6071598529815674,
|
|
"logits/rejected": 0.604604959487915,
|
|
"logps/chosen": -110.54042053222656,
|
|
"logps/ref_chosen": -53.98310852050781,
|
|
"logps/ref_rejected": -58.32208251953125,
|
|
"logps/rejected": -161.82904052734375,
|
|
"loss": 0.9147,
|
|
"margin_dpo/margin_mean": 46.94964599609375,
|
|
"margin_dpo/margin_std": 52.04528045654297,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9281934996220711,
|
|
"fcm_dpo/beta": 0.01610434241592884,
|
|
"fcm_dpo/delta": -0.02669554390013218,
|
|
"fcm_dpo/margin": 38.80487060546875,
|
|
"fcm_dpo/q_t": 0.36256033182144165,
|
|
"grad_norm": 17.52783203125,
|
|
"learning_rate": 8.012824650910937e-09,
|
|
"logits/chosen": 0.7269790172576904,
|
|
"logits/rejected": 0.7166494727134705,
|
|
"logps/chosen": -127.43746185302734,
|
|
"logps/ref_chosen": -60.24303436279297,
|
|
"logps/ref_rejected": -72.26258850097656,
|
|
"logps/rejected": -178.2618865966797,
|
|
"loss": 0.9902,
|
|
"margin_dpo/margin_mean": 38.80487060546875,
|
|
"margin_dpo/margin_std": 47.703529357910156,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.015918750315904617,
|
|
"fcm_dpo/delta": -0.01292453333735466,
|
|
"fcm_dpo/margin": 38.415618896484375,
|
|
"fcm_dpo/q_t": 0.3736609220504761,
|
|
"grad_norm": 16.462543487548828,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 0.6522448062896729,
|
|
"logits/rejected": 0.6058327555656433,
|
|
"logps/chosen": -141.9201202392578,
|
|
"logps/ref_chosen": -72.09467315673828,
|
|
"logps/ref_rejected": -104.02980041503906,
|
|
"logps/rejected": -212.2708740234375,
|
|
"loss": 1.0683,
|
|
"margin_dpo/margin_mean": 38.415618896484375,
|
|
"margin_dpo/margin_std": 60.30884552001953,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9312169312169312,
|
|
"fcm_dpo/beta": 0.016069892793893814,
|
|
"fcm_dpo/delta": 0.0499076284468174,
|
|
"fcm_dpo/margin": 34.402427673339844,
|
|
"fcm_dpo/q_t": 0.38505834341049194,
|
|
"grad_norm": 15.858017921447754,
|
|
"learning_rate": 7.36222939784098e-09,
|
|
"logits/chosen": 0.7267533540725708,
|
|
"logits/rejected": 0.6444547176361084,
|
|
"logps/chosen": -125.30108642578125,
|
|
"logps/ref_chosen": -58.530723571777344,
|
|
"logps/ref_rejected": -75.48025512695312,
|
|
"logps/rejected": -176.65304565429688,
|
|
"loss": 1.0629,
|
|
"margin_dpo/margin_mean": 34.402427673339844,
|
|
"margin_dpo/margin_std": 52.15081024169922,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9327286470143613,
|
|
"fcm_dpo/beta": 0.016335247084498405,
|
|
"fcm_dpo/delta": 0.1371011584997177,
|
|
"fcm_dpo/margin": 28.701946258544922,
|
|
"fcm_dpo/q_t": 0.4002954661846161,
|
|
"grad_norm": 19.695823669433594,
|
|
"learning_rate": 7.047107919114586e-09,
|
|
"logits/chosen": 0.692958414554596,
|
|
"logits/rejected": 0.6451589465141296,
|
|
"logps/chosen": -133.67486572265625,
|
|
"logps/ref_chosen": -57.608673095703125,
|
|
"logps/ref_rejected": -81.22109985351562,
|
|
"logps/rejected": -185.98922729492188,
|
|
"loss": 1.1238,
|
|
"margin_dpo/margin_mean": 28.701946258544922,
|
|
"margin_dpo/margin_std": 48.95549392700195,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9342403628117913,
|
|
"fcm_dpo/beta": 0.016573436558246613,
|
|
"fcm_dpo/delta": 0.03523973748087883,
|
|
"fcm_dpo/margin": 34.153472900390625,
|
|
"fcm_dpo/q_t": 0.38183510303497314,
|
|
"grad_norm": 19.02802085876465,
|
|
"learning_rate": 6.738782355044048e-09,
|
|
"logits/chosen": 0.664625346660614,
|
|
"logits/rejected": 0.5642604827880859,
|
|
"logps/chosen": -120.1915512084961,
|
|
"logps/ref_chosen": -56.69594192504883,
|
|
"logps/ref_rejected": -85.92362976074219,
|
|
"logps/rejected": -183.57272338867188,
|
|
"loss": 1.0365,
|
|
"margin_dpo/margin_mean": 34.153472900390625,
|
|
"margin_dpo/margin_std": 47.633235931396484,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.9357520786092215,
|
|
"fcm_dpo/beta": 0.016661301255226135,
|
|
"fcm_dpo/delta": -0.003810018301010132,
|
|
"fcm_dpo/margin": 36.216026306152344,
|
|
"fcm_dpo/q_t": 0.3725661635398865,
|
|
"grad_norm": 16.40776252746582,
|
|
"learning_rate": 6.437261330158206e-09,
|
|
"logits/chosen": 0.8066527843475342,
|
|
"logits/rejected": 0.7323254346847534,
|
|
"logps/chosen": -116.28726196289062,
|
|
"logps/ref_chosen": -54.05841827392578,
|
|
"logps/ref_rejected": -83.55493927001953,
|
|
"logps/rejected": -181.99981689453125,
|
|
"loss": 1.0208,
|
|
"margin_dpo/margin_mean": 36.216026306152344,
|
|
"margin_dpo/margin_std": 49.7095947265625,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.017014428973197937,
|
|
"fcm_dpo/delta": 0.02767297625541687,
|
|
"fcm_dpo/margin": 33.48244857788086,
|
|
"fcm_dpo/q_t": 0.3834364712238312,
|
|
"grad_norm": 20.08639907836914,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 0.6961303353309631,
|
|
"logits/rejected": 0.6932598352432251,
|
|
"logps/chosen": -124.36492919921875,
|
|
"logps/ref_chosen": -63.36971664428711,
|
|
"logps/ref_rejected": -65.68269348144531,
|
|
"logps/rejected": -160.16033935546875,
|
|
"loss": 1.0699,
|
|
"margin_dpo/margin_mean": 33.48244857788086,
|
|
"margin_dpo/margin_std": 49.82050323486328,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9387755102040817,
|
|
"fcm_dpo/beta": 0.017159054055809975,
|
|
"fcm_dpo/delta": 0.12095116078853607,
|
|
"fcm_dpo/margin": 28.226398468017578,
|
|
"fcm_dpo/q_t": 0.39944252371788025,
|
|
"grad_norm": 17.94400978088379,
|
|
"learning_rate": 5.854666444131934e-09,
|
|
"logits/chosen": 0.7288126945495605,
|
|
"logits/rejected": 0.6246720552444458,
|
|
"logps/chosen": -117.13513946533203,
|
|
"logps/ref_chosen": -52.321224212646484,
|
|
"logps/ref_rejected": -88.09001159667969,
|
|
"logps/rejected": -181.1303253173828,
|
|
"loss": 1.1334,
|
|
"margin_dpo/margin_mean": 28.226398468017578,
|
|
"margin_dpo/margin_std": 51.35742950439453,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9402872260015117,
|
|
"fcm_dpo/beta": 0.017424535006284714,
|
|
"fcm_dpo/delta": 0.060253530740737915,
|
|
"fcm_dpo/margin": 31.139747619628906,
|
|
"fcm_dpo/q_t": 0.38587862253189087,
|
|
"grad_norm": 20.17845916748047,
|
|
"learning_rate": 5.573608879422875e-09,
|
|
"logits/chosen": 0.6671361923217773,
|
|
"logits/rejected": 0.6277763843536377,
|
|
"logps/chosen": -130.28573608398438,
|
|
"logps/ref_chosen": -59.86545944213867,
|
|
"logps/ref_rejected": -81.86668395996094,
|
|
"logps/rejected": -183.42669677734375,
|
|
"loss": 1.0714,
|
|
"margin_dpo/margin_mean": 31.139747619628906,
|
|
"margin_dpo/margin_std": 47.877464294433594,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9417989417989417,
|
|
"fcm_dpo/beta": 0.01731981709599495,
|
|
"fcm_dpo/delta": -0.02505187690258026,
|
|
"fcm_dpo/margin": 35.991477966308594,
|
|
"fcm_dpo/q_t": 0.36854812502861023,
|
|
"grad_norm": 16.328716278076172,
|
|
"learning_rate": 5.299388446305342e-09,
|
|
"logits/chosen": 0.66575688123703,
|
|
"logits/rejected": 0.5999845266342163,
|
|
"logps/chosen": -142.4012451171875,
|
|
"logps/ref_chosen": -67.36846160888672,
|
|
"logps/ref_rejected": -82.02733612060547,
|
|
"logps/rejected": -193.05160522460938,
|
|
"loss": 1.0023,
|
|
"margin_dpo/margin_mean": 35.991477966308594,
|
|
"margin_dpo/margin_std": 46.96021270751953,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9433106575963719,
|
|
"fcm_dpo/beta": 0.017001666128635406,
|
|
"fcm_dpo/delta": -0.10930600017309189,
|
|
"fcm_dpo/margin": 41.240196228027344,
|
|
"fcm_dpo/q_t": 0.35751086473464966,
|
|
"grad_norm": 18.303504943847656,
|
|
"learning_rate": 5.03201281531429e-09,
|
|
"logits/chosen": 0.6909003853797913,
|
|
"logits/rejected": 0.592226505279541,
|
|
"logps/chosen": -112.12557983398438,
|
|
"logps/ref_chosen": -51.02655029296875,
|
|
"logps/ref_rejected": -76.49203491210938,
|
|
"logps/rejected": -178.83126831054688,
|
|
"loss": 0.9887,
|
|
"margin_dpo/margin_mean": 41.240196228027344,
|
|
"margin_dpo/margin_std": 54.28327941894531,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.017218362540006638,
|
|
"fcm_dpo/delta": 0.09852743148803711,
|
|
"fcm_dpo/margin": 29.410263061523438,
|
|
"fcm_dpo/q_t": 0.39616650342941284,
|
|
"grad_norm": 18.753204345703125,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 0.7545461654663086,
|
|
"logits/rejected": 0.6679770350456238,
|
|
"logps/chosen": -119.49295043945312,
|
|
"logps/ref_chosen": -54.20761489868164,
|
|
"logps/ref_rejected": -84.93669128417969,
|
|
"logps/rejected": -179.63229370117188,
|
|
"loss": 1.1324,
|
|
"margin_dpo/margin_mean": 29.410261154174805,
|
|
"margin_dpo/margin_std": 54.021522521972656,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9463340891912321,
|
|
"fcm_dpo/beta": 0.017001252621412277,
|
|
"fcm_dpo/delta": -0.09621863812208176,
|
|
"fcm_dpo/margin": 40.53666687011719,
|
|
"fcm_dpo/q_t": 0.3652860224246979,
|
|
"grad_norm": 17.47981834411621,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 0.783034086227417,
|
|
"logits/rejected": 0.6518374681472778,
|
|
"logps/chosen": -106.47137451171875,
|
|
"logps/ref_chosen": -45.06201934814453,
|
|
"logps/ref_rejected": -89.66368103027344,
|
|
"logps/rejected": -191.60971069335938,
|
|
"loss": 1.0556,
|
|
"margin_dpo/margin_mean": 40.53666687011719,
|
|
"margin_dpo/margin_std": 62.77349853515625,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.9478458049886621,
|
|
"fcm_dpo/beta": 0.016569461673498154,
|
|
"fcm_dpo/delta": -0.13161586225032806,
|
|
"fcm_dpo/margin": 43.53169250488281,
|
|
"fcm_dpo/q_t": 0.35016459226608276,
|
|
"grad_norm": 17.202301025390625,
|
|
"learning_rate": 4.271028567242818e-09,
|
|
"logits/chosen": 0.6269781589508057,
|
|
"logits/rejected": 0.5125735402107239,
|
|
"logps/chosen": -126.37629699707031,
|
|
"logps/ref_chosen": -58.791053771972656,
|
|
"logps/ref_rejected": -94.90802001953125,
|
|
"logps/rejected": -206.02496337890625,
|
|
"loss": 0.979,
|
|
"margin_dpo/margin_mean": 43.53169250488281,
|
|
"margin_dpo/margin_std": 56.438446044921875,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.9493575207860923,
|
|
"fcm_dpo/beta": 0.016308607533574104,
|
|
"fcm_dpo/delta": -0.19555062055587769,
|
|
"fcm_dpo/margin": 47.73688507080078,
|
|
"fcm_dpo/q_t": 0.3357737958431244,
|
|
"grad_norm": 17.734283447265625,
|
|
"learning_rate": 4.0311050177251895e-09,
|
|
"logits/chosen": 0.7015246152877808,
|
|
"logits/rejected": 0.6676697731018066,
|
|
"logps/chosen": -111.4091796875,
|
|
"logps/ref_chosen": -52.80357360839844,
|
|
"logps/ref_rejected": -76.49468994140625,
|
|
"logps/rejected": -182.83717346191406,
|
|
"loss": 0.9789,
|
|
"margin_dpo/margin_mean": 47.73688507080078,
|
|
"margin_dpo/margin_std": 57.750762939453125,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9508692365835223,
|
|
"fcm_dpo/beta": 0.016032151877880096,
|
|
"fcm_dpo/delta": 0.03456023707985878,
|
|
"fcm_dpo/margin": 35.36682891845703,
|
|
"fcm_dpo/q_t": 0.3803756535053253,
|
|
"grad_norm": 15.833183288574219,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": 0.69905686378479,
|
|
"logits/rejected": 0.6941945552825928,
|
|
"logps/chosen": -134.60055541992188,
|
|
"logps/ref_chosen": -70.71749877929688,
|
|
"logps/ref_rejected": -78.96273803710938,
|
|
"logps/rejected": -178.21261596679688,
|
|
"loss": 1.0171,
|
|
"margin_dpo/margin_mean": 35.36682891845703,
|
|
"margin_dpo/margin_std": 45.6728515625,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.015975255519151688,
|
|
"fcm_dpo/delta": -0.04418795555830002,
|
|
"fcm_dpo/margin": 40.13007354736328,
|
|
"fcm_dpo/q_t": 0.3665482997894287,
|
|
"grad_norm": 13.421211242675781,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 0.6364885568618774,
|
|
"logits/rejected": 0.5930874347686768,
|
|
"logps/chosen": -120.07940673828125,
|
|
"logps/ref_chosen": -56.201412200927734,
|
|
"logps/ref_rejected": -74.69807434082031,
|
|
"logps/rejected": -178.70614624023438,
|
|
"loss": 0.9952,
|
|
"margin_dpo/margin_mean": 40.13007354736328,
|
|
"margin_dpo/margin_std": 51.83021545410156,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9538926681783825,
|
|
"fcm_dpo/beta": 0.015368154272437096,
|
|
"fcm_dpo/delta": -0.12270902097225189,
|
|
"fcm_dpo/margin": 46.13782501220703,
|
|
"fcm_dpo/q_t": 0.35464248061180115,
|
|
"grad_norm": 16.841995239257812,
|
|
"learning_rate": 3.352641923861144e-09,
|
|
"logits/chosen": 0.7928265929222107,
|
|
"logits/rejected": 0.6835281252861023,
|
|
"logps/chosen": -118.16952514648438,
|
|
"logps/ref_chosen": -58.82059860229492,
|
|
"logps/ref_rejected": -96.51437377929688,
|
|
"logps/rejected": -202.0011444091797,
|
|
"loss": 0.9862,
|
|
"margin_dpo/margin_mean": 46.1378288269043,
|
|
"margin_dpo/margin_std": 58.701908111572266,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9554043839758125,
|
|
"fcm_dpo/beta": 0.015180578455328941,
|
|
"fcm_dpo/delta": -0.13757643103599548,
|
|
"fcm_dpo/margin": 47.93510437011719,
|
|
"fcm_dpo/q_t": 0.34288290143013,
|
|
"grad_norm": 14.054166793823242,
|
|
"learning_rate": 3.140277830901428e-09,
|
|
"logits/chosen": 0.7194072008132935,
|
|
"logits/rejected": 0.6981650590896606,
|
|
"logps/chosen": -120.36946105957031,
|
|
"logps/ref_chosen": -58.786048889160156,
|
|
"logps/ref_rejected": -67.21923828125,
|
|
"logps/rejected": -176.73776245117188,
|
|
"loss": 0.9196,
|
|
"margin_dpo/margin_mean": 47.93510437011719,
|
|
"margin_dpo/margin_std": 51.463253021240234,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9569160997732427,
|
|
"fcm_dpo/beta": 0.015230704098939896,
|
|
"fcm_dpo/delta": 0.06359954923391342,
|
|
"fcm_dpo/margin": 35.429588317871094,
|
|
"fcm_dpo/q_t": 0.3875540494918823,
|
|
"grad_norm": 16.164337158203125,
|
|
"learning_rate": 2.9348189350335007e-09,
|
|
"logits/chosen": 0.6756185293197632,
|
|
"logits/rejected": 0.615243136882782,
|
|
"logps/chosen": -108.6497802734375,
|
|
"logps/ref_chosen": -52.13019561767578,
|
|
"logps/ref_rejected": -67.23016357421875,
|
|
"logps/rejected": -159.1793212890625,
|
|
"loss": 1.067,
|
|
"margin_dpo/margin_mean": 35.429588317871094,
|
|
"margin_dpo/margin_std": 53.79712677001953,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9584278155706727,
|
|
"fcm_dpo/beta": 0.01600709743797779,
|
|
"fcm_dpo/delta": 0.30318742990493774,
|
|
"fcm_dpo/margin": 19.04725456237793,
|
|
"fcm_dpo/q_t": 0.4390316605567932,
|
|
"grad_norm": 21.8569393157959,
|
|
"learning_rate": 2.736270983384276e-09,
|
|
"logits/chosen": 0.7564660310745239,
|
|
"logits/rejected": 0.7673421502113342,
|
|
"logps/chosen": -129.17764282226562,
|
|
"logps/ref_chosen": -60.97979736328125,
|
|
"logps/ref_rejected": -58.50825119018555,
|
|
"logps/rejected": -145.75335693359375,
|
|
"loss": 1.2675,
|
|
"margin_dpo/margin_mean": 19.047256469726562,
|
|
"margin_dpo/margin_std": 52.853660583496094,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.016620900481939316,
|
|
"fcm_dpo/delta": 0.15847182273864746,
|
|
"fcm_dpo/margin": 26.969745635986328,
|
|
"fcm_dpo/q_t": 0.4112244248390198,
|
|
"grad_norm": 16.682920455932617,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 0.5778566002845764,
|
|
"logits/rejected": 0.5155054330825806,
|
|
"logps/chosen": -147.23043823242188,
|
|
"logps/ref_chosen": -65.9730224609375,
|
|
"logps/ref_rejected": -85.61317443847656,
|
|
"logps/rejected": -193.84033203125,
|
|
"loss": 1.2101,
|
|
"margin_dpo/margin_mean": 26.969745635986328,
|
|
"margin_dpo/margin_std": 60.308692932128906,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9614512471655329,
|
|
"fcm_dpo/beta": 0.016455478966236115,
|
|
"fcm_dpo/delta": -0.07432037591934204,
|
|
"fcm_dpo/margin": 40.624237060546875,
|
|
"fcm_dpo/q_t": 0.3579748272895813,
|
|
"grad_norm": 14.709686279296875,
|
|
"learning_rate": 2.359929934524829e-09,
|
|
"logits/chosen": 0.6613017916679382,
|
|
"logits/rejected": 0.5728839635848999,
|
|
"logps/chosen": -111.96475219726562,
|
|
"logps/ref_chosen": -49.140167236328125,
|
|
"logps/ref_rejected": -81.26971435546875,
|
|
"logps/rejected": -184.71853637695312,
|
|
"loss": 0.9803,
|
|
"margin_dpo/margin_mean": 40.624237060546875,
|
|
"margin_dpo/margin_std": 51.248046875,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9629629629629629,
|
|
"fcm_dpo/beta": 0.01680588349699974,
|
|
"fcm_dpo/delta": 0.1063363254070282,
|
|
"fcm_dpo/margin": 29.647350311279297,
|
|
"fcm_dpo/q_t": 0.40189188718795776,
|
|
"grad_norm": 18.443958282470703,
|
|
"learning_rate": 2.1821473643827137e-09,
|
|
"logits/chosen": 0.6527888774871826,
|
|
"logits/rejected": 0.5813695192337036,
|
|
"logps/chosen": -155.8303680419922,
|
|
"logps/ref_chosen": -73.69658660888672,
|
|
"logps/ref_rejected": -83.01487731933594,
|
|
"logps/rejected": -194.79600524902344,
|
|
"loss": 1.155,
|
|
"margin_dpo/margin_mean": 29.647350311279297,
|
|
"margin_dpo/margin_std": 57.743892669677734,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9644746787603931,
|
|
"fcm_dpo/beta": 0.01687886193394661,
|
|
"fcm_dpo/delta": -0.0013024341315031052,
|
|
"fcm_dpo/margin": 35.60945129394531,
|
|
"fcm_dpo/q_t": 0.3767626881599426,
|
|
"grad_norm": 18.627864837646484,
|
|
"learning_rate": 2.0112967923011646e-09,
|
|
"logits/chosen": 0.6956614255905151,
|
|
"logits/rejected": 0.6454405188560486,
|
|
"logps/chosen": -135.54721069335938,
|
|
"logps/ref_chosen": -62.78158187866211,
|
|
"logps/ref_rejected": -85.40478515625,
|
|
"logps/rejected": -193.7798614501953,
|
|
"loss": 1.0436,
|
|
"margin_dpo/margin_mean": 35.60945129394531,
|
|
"margin_dpo/margin_std": 52.48899459838867,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9659863945578231,
|
|
"fcm_dpo/beta": 0.016702190041542053,
|
|
"fcm_dpo/delta": -0.07177025079727173,
|
|
"fcm_dpo/margin": 39.934791564941406,
|
|
"fcm_dpo/q_t": 0.3631782829761505,
|
|
"grad_norm": 19.758222579956055,
|
|
"learning_rate": 1.847382997337943e-09,
|
|
"logits/chosen": 0.6821843385696411,
|
|
"logits/rejected": 0.5689026117324829,
|
|
"logps/chosen": -119.10699462890625,
|
|
"logps/ref_chosen": -53.76658630371094,
|
|
"logps/ref_rejected": -72.30009460449219,
|
|
"logps/rejected": -177.57528686523438,
|
|
"loss": 0.9971,
|
|
"margin_dpo/margin_mean": 39.934791564941406,
|
|
"margin_dpo/margin_std": 53.109901428222656,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.01664363034069538,
|
|
"fcm_dpo/delta": 0.033666323870420456,
|
|
"fcm_dpo/margin": 34.142127990722656,
|
|
"fcm_dpo/q_t": 0.3770233392715454,
|
|
"grad_norm": 16.493188858032227,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 0.7594385147094727,
|
|
"logits/rejected": 0.6996129751205444,
|
|
"logps/chosen": -119.43183135986328,
|
|
"logps/ref_chosen": -51.41777801513672,
|
|
"logps/ref_rejected": -77.27879333496094,
|
|
"logps/rejected": -179.43496704101562,
|
|
"loss": 1.0664,
|
|
"margin_dpo/margin_mean": 34.142127990722656,
|
|
"margin_dpo/margin_std": 52.741458892822266,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9690098261526833,
|
|
"fcm_dpo/beta": 0.01675679162144661,
|
|
"fcm_dpo/delta": 0.039283640682697296,
|
|
"fcm_dpo/margin": 33.59276580810547,
|
|
"fcm_dpo/q_t": 0.3780820965766907,
|
|
"grad_norm": 15.796224594116211,
|
|
"learning_rate": 1.5403838846864692e-09,
|
|
"logits/chosen": 0.6780893802642822,
|
|
"logits/rejected": 0.6513991355895996,
|
|
"logps/chosen": -141.1656951904297,
|
|
"logps/ref_chosen": -71.0546646118164,
|
|
"logps/ref_rejected": -82.2440185546875,
|
|
"logps/rejected": -185.94781494140625,
|
|
"loss": 1.0314,
|
|
"margin_dpo/margin_mean": 33.59276580810547,
|
|
"margin_dpo/margin_std": 46.151939392089844,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9705215419501134,
|
|
"fcm_dpo/beta": 0.017033934593200684,
|
|
"fcm_dpo/delta": 0.1573614478111267,
|
|
"fcm_dpo/margin": 26.348485946655273,
|
|
"fcm_dpo/q_t": 0.40937286615371704,
|
|
"grad_norm": 21.933429718017578,
|
|
"learning_rate": 1.3973071544233218e-09,
|
|
"logits/chosen": 0.6313962936401367,
|
|
"logits/rejected": 0.6405247449874878,
|
|
"logps/chosen": -144.66749572753906,
|
|
"logps/ref_chosen": -68.92927551269531,
|
|
"logps/ref_rejected": -70.85682678222656,
|
|
"logps/rejected": -172.9435272216797,
|
|
"loss": 1.1935,
|
|
"margin_dpo/margin_mean": 26.348485946655273,
|
|
"margin_dpo/margin_std": 54.467716217041016,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9720332577475435,
|
|
"fcm_dpo/beta": 0.017401862889528275,
|
|
"fcm_dpo/delta": 0.02936522290110588,
|
|
"fcm_dpo/margin": 32.88540267944336,
|
|
"fcm_dpo/q_t": 0.3840622305870056,
|
|
"grad_norm": 27.94525718688965,
|
|
"learning_rate": 1.261184375888541e-09,
|
|
"logits/chosen": 0.6284259557723999,
|
|
"logits/rejected": 0.5389350056648254,
|
|
"logps/chosen": -135.20681762695312,
|
|
"logps/ref_chosen": -65.30903625488281,
|
|
"logps/ref_rejected": -83.61613464355469,
|
|
"logps/rejected": -186.39932250976562,
|
|
"loss": 1.0915,
|
|
"margin_dpo/margin_mean": 32.885398864746094,
|
|
"margin_dpo/margin_std": 54.64806365966797,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9735449735449735,
|
|
"fcm_dpo/beta": 0.017747625708580017,
|
|
"fcm_dpo/delta": 0.05810273438692093,
|
|
"fcm_dpo/margin": 30.634418487548828,
|
|
"fcm_dpo/q_t": 0.3914340138435364,
|
|
"grad_norm": 16.880868911743164,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 0.7576817274093628,
|
|
"logits/rejected": 0.7244468927383423,
|
|
"logps/chosen": -113.37348937988281,
|
|
"logps/ref_chosen": -51.002601623535156,
|
|
"logps/ref_rejected": -64.46372985839844,
|
|
"logps/rejected": -157.4690399169922,
|
|
"loss": 1.1632,
|
|
"margin_dpo/margin_mean": 30.634418487548828,
|
|
"margin_dpo/margin_std": 60.088741302490234,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.017687616869807243,
|
|
"fcm_dpo/delta": 0.030620308592915535,
|
|
"fcm_dpo/margin": 32.27980041503906,
|
|
"fcm_dpo/q_t": 0.382048100233078,
|
|
"grad_norm": 19.38361930847168,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 0.6390470266342163,
|
|
"logits/rejected": 0.6177409291267395,
|
|
"logps/chosen": -129.932861328125,
|
|
"logps/ref_chosen": -60.963409423828125,
|
|
"logps/ref_rejected": -69.73353576660156,
|
|
"logps/rejected": -170.9827880859375,
|
|
"loss": 1.0597,
|
|
"margin_dpo/margin_mean": 32.27980041503906,
|
|
"margin_dpo/margin_std": 49.32701873779297,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9765684051398337,
|
|
"fcm_dpo/beta": 0.01789352297782898,
|
|
"fcm_dpo/delta": 0.039331234991550446,
|
|
"fcm_dpo/margin": 31.456527709960938,
|
|
"fcm_dpo/q_t": 0.3862152695655823,
|
|
"grad_norm": 18.05316925048828,
|
|
"learning_rate": 8.945768539031783e-10,
|
|
"logits/chosen": 0.7234746813774109,
|
|
"logits/rejected": 0.6710031032562256,
|
|
"logps/chosen": -135.71275329589844,
|
|
"logps/ref_chosen": -62.290069580078125,
|
|
"logps/ref_rejected": -85.54812622070312,
|
|
"logps/rejected": -190.42733764648438,
|
|
"loss": 1.1176,
|
|
"margin_dpo/margin_mean": 31.456527709960938,
|
|
"margin_dpo/margin_std": 56.255977630615234,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9780801209372638,
|
|
"fcm_dpo/beta": 0.017382677644491196,
|
|
"fcm_dpo/delta": -0.22605100274085999,
|
|
"fcm_dpo/margin": 46.494606018066406,
|
|
"fcm_dpo/q_t": 0.334111750125885,
|
|
"grad_norm": 18.57387924194336,
|
|
"learning_rate": 7.863060120144316e-10,
|
|
"logits/chosen": 0.7320365905761719,
|
|
"logits/rejected": 0.6377066373825073,
|
|
"logps/chosen": -139.47146606445312,
|
|
"logps/ref_chosen": -67.515869140625,
|
|
"logps/ref_rejected": -101.50871276855469,
|
|
"logps/rejected": -219.95889282226562,
|
|
"loss": 0.905,
|
|
"margin_dpo/margin_mean": 46.494606018066406,
|
|
"margin_dpo/margin_std": 52.26645278930664,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9795918367346939,
|
|
"fcm_dpo/beta": 0.017017535865306854,
|
|
"fcm_dpo/delta": -0.011982899159193039,
|
|
"fcm_dpo/margin": 35.888240814208984,
|
|
"fcm_dpo/q_t": 0.3698846697807312,
|
|
"grad_norm": 15.682772636413574,
|
|
"learning_rate": 6.850062128694045e-10,
|
|
"logits/chosen": 0.6553179025650024,
|
|
"logits/rejected": 0.5904830694198608,
|
|
"logps/chosen": -135.27606201171875,
|
|
"logps/ref_chosen": -64.59593963623047,
|
|
"logps/ref_rejected": -83.384033203125,
|
|
"logps/rejected": -189.952392578125,
|
|
"loss": 1.0545,
|
|
"margin_dpo/margin_mean": 35.88824462890625,
|
|
"margin_dpo/margin_std": 53.6502571105957,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.981103552532124,
|
|
"fcm_dpo/beta": 0.016928700730204582,
|
|
"fcm_dpo/delta": -0.009675152599811554,
|
|
"fcm_dpo/margin": 35.89997100830078,
|
|
"fcm_dpo/q_t": 0.3757920265197754,
|
|
"grad_norm": 24.2320556640625,
|
|
"learning_rate": 5.906802900412788e-10,
|
|
"logits/chosen": 0.7032018899917603,
|
|
"logits/rejected": 0.6470180749893188,
|
|
"logps/chosen": -117.7570571899414,
|
|
"logps/ref_chosen": -49.30964660644531,
|
|
"logps/ref_rejected": -73.73710632324219,
|
|
"logps/rejected": -178.08450317382812,
|
|
"loss": 1.0723,
|
|
"margin_dpo/margin_mean": 35.89997100830078,
|
|
"margin_dpo/margin_std": 56.242855072021484,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.017035439610481262,
|
|
"fcm_dpo/delta": 0.0025629187002778053,
|
|
"fcm_dpo/margin": 35.0762825012207,
|
|
"fcm_dpo/q_t": 0.38023632764816284,
|
|
"grad_norm": 19.767776489257812,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 0.7739812135696411,
|
|
"logits/rejected": 0.710270881652832,
|
|
"logps/chosen": -119.4194107055664,
|
|
"logps/ref_chosen": -55.06325912475586,
|
|
"logps/ref_rejected": -77.39610290527344,
|
|
"logps/rejected": -176.8285369873047,
|
|
"loss": 1.1056,
|
|
"margin_dpo/margin_mean": 35.07628631591797,
|
|
"margin_dpo/margin_std": 59.95784378051758,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"fcm_dpo/beta": 0.017338156700134277,
|
|
"fcm_dpo/delta": 0.06786399334669113,
|
|
"fcm_dpo/margin": 30.842973709106445,
|
|
"fcm_dpo/q_t": 0.39106687903404236,
|
|
"grad_norm": 18.881311416625977,
|
|
"learning_rate": 4.2296043218295606e-10,
|
|
"logits/chosen": 0.7450631856918335,
|
|
"logits/rejected": 0.6721943616867065,
|
|
"logps/chosen": -118.5746841430664,
|
|
"logps/ref_chosen": -54.065162658691406,
|
|
"logps/ref_rejected": -77.79080200195312,
|
|
"logps/rejected": -173.143310546875,
|
|
"loss": 1.0911,
|
|
"margin_dpo/margin_mean": 30.842973709106445,
|
|
"margin_dpo/margin_std": 50.0612678527832,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9856386999244142,
|
|
"fcm_dpo/beta": 0.017395323142409325,
|
|
"fcm_dpo/delta": 0.06672574579715729,
|
|
"fcm_dpo/margin": 30.856266021728516,
|
|
"fcm_dpo/q_t": 0.39265865087509155,
|
|
"grad_norm": 19.28176498413086,
|
|
"learning_rate": 3.4957118863768176e-10,
|
|
"logits/chosen": 0.695549726486206,
|
|
"logits/rejected": 0.6393716931343079,
|
|
"logps/chosen": -140.04200744628906,
|
|
"logps/ref_chosen": -63.64030456542969,
|
|
"logps/ref_rejected": -78.86882019042969,
|
|
"logps/rejected": -186.12680053710938,
|
|
"loss": 1.1184,
|
|
"margin_dpo/margin_mean": 30.856264114379883,
|
|
"margin_dpo/margin_std": 54.582725524902344,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9871504157218443,
|
|
"fcm_dpo/beta": 0.017352044582366943,
|
|
"fcm_dpo/delta": -0.08883976191282272,
|
|
"fcm_dpo/margin": 39.352874755859375,
|
|
"fcm_dpo/q_t": 0.360774964094162,
|
|
"grad_norm": 20.68842315673828,
|
|
"learning_rate": 2.831652042480093e-10,
|
|
"logits/chosen": 0.6648276448249817,
|
|
"logits/rejected": 0.6195430159568787,
|
|
"logps/chosen": -129.05657958984375,
|
|
"logps/ref_chosen": -61.668373107910156,
|
|
"logps/ref_rejected": -73.83012390136719,
|
|
"logps/rejected": -180.57122802734375,
|
|
"loss": 1.022,
|
|
"margin_dpo/margin_mean": 39.352874755859375,
|
|
"margin_dpo/margin_std": 56.605133056640625,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.9886621315192744,
|
|
"fcm_dpo/beta": 0.017307717353105545,
|
|
"fcm_dpo/delta": 0.11917576938867569,
|
|
"fcm_dpo/margin": 28.009092330932617,
|
|
"fcm_dpo/q_t": 0.4049610197544098,
|
|
"grad_norm": 19.33361053466797,
|
|
"learning_rate": 2.2374433653205016e-10,
|
|
"logits/chosen": 0.6821545362472534,
|
|
"logits/rejected": 0.5850452184677124,
|
|
"logps/chosen": -127.31944274902344,
|
|
"logps/ref_chosen": -57.568267822265625,
|
|
"logps/ref_rejected": -87.74789428710938,
|
|
"logps/rejected": -185.50816345214844,
|
|
"loss": 1.1431,
|
|
"margin_dpo/margin_mean": 28.009090423583984,
|
|
"margin_dpo/margin_std": 50.39651107788086,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.016991257667541504,
|
|
"fcm_dpo/delta": -0.1648244857788086,
|
|
"fcm_dpo/margin": 43.98023986816406,
|
|
"fcm_dpo/q_t": 0.33990585803985596,
|
|
"grad_norm": 14.38284969329834,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 0.6561794281005859,
|
|
"logits/rejected": 0.5625892877578735,
|
|
"logps/chosen": -108.23675537109375,
|
|
"logps/ref_chosen": -52.14714813232422,
|
|
"logps/ref_rejected": -80.85014343261719,
|
|
"logps/rejected": -180.91998291015625,
|
|
"loss": 0.9022,
|
|
"margin_dpo/margin_mean": 43.98023986816406,
|
|
"margin_dpo/margin_std": 43.58360290527344,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9916855631141346,
|
|
"fcm_dpo/beta": 0.016844086349010468,
|
|
"fcm_dpo/delta": -0.08287950605154037,
|
|
"fcm_dpo/margin": 40.203338623046875,
|
|
"fcm_dpo/q_t": 0.3588668704032898,
|
|
"grad_norm": 15.275532722473145,
|
|
"learning_rate": 1.2586440420372934e-10,
|
|
"logits/chosen": 0.627634584903717,
|
|
"logits/rejected": 0.5770636796951294,
|
|
"logps/chosen": -143.4762725830078,
|
|
"logps/ref_chosen": -73.25672912597656,
|
|
"logps/ref_rejected": -85.35127258300781,
|
|
"logps/rejected": -195.77413940429688,
|
|
"loss": 0.9826,
|
|
"margin_dpo/margin_mean": 40.203338623046875,
|
|
"margin_dpo/margin_std": 51.91832733154297,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9931972789115646,
|
|
"fcm_dpo/beta": 0.016424383968114853,
|
|
"fcm_dpo/delta": -0.12891708314418793,
|
|
"fcm_dpo/margin": 43.787635803222656,
|
|
"fcm_dpo/q_t": 0.3502225875854492,
|
|
"grad_norm": 15.052490234375,
|
|
"learning_rate": 8.740807750345913e-11,
|
|
"logits/chosen": 0.8135035037994385,
|
|
"logits/rejected": 0.7229472398757935,
|
|
"logps/chosen": -114.978271484375,
|
|
"logps/ref_chosen": -49.72339630126953,
|
|
"logps/ref_rejected": -75.1568603515625,
|
|
"logps/rejected": -184.19937133789062,
|
|
"loss": 0.9865,
|
|
"margin_dpo/margin_mean": 43.78763961791992,
|
|
"margin_dpo/margin_std": 57.974388122558594,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9947089947089947,
|
|
"fcm_dpo/beta": 0.016662094742059708,
|
|
"fcm_dpo/delta": 0.07724005728960037,
|
|
"fcm_dpo/margin": 31.531997680664062,
|
|
"fcm_dpo/q_t": 0.39289391040802,
|
|
"grad_norm": 19.596899032592773,
|
|
"learning_rate": 5.594234322453539e-11,
|
|
"logits/chosen": 0.7062351703643799,
|
|
"logits/rejected": 0.6689351797103882,
|
|
"logps/chosen": -133.1918487548828,
|
|
"logps/ref_chosen": -63.04634094238281,
|
|
"logps/ref_rejected": -83.44963073730469,
|
|
"logps/rejected": -185.12713623046875,
|
|
"loss": 1.1634,
|
|
"margin_dpo/margin_mean": 31.531997680664062,
|
|
"margin_dpo/margin_std": 61.76860809326172,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9962207105064248,
|
|
"fcm_dpo/beta": 0.016693908721208572,
|
|
"fcm_dpo/delta": 0.10468055307865143,
|
|
"fcm_dpo/margin": 29.910579681396484,
|
|
"fcm_dpo/q_t": 0.39701542258262634,
|
|
"grad_norm": 18.149425506591797,
|
|
"learning_rate": 3.146808153123293e-11,
|
|
"logits/chosen": 0.7528729438781738,
|
|
"logits/rejected": 0.6886565685272217,
|
|
"logps/chosen": -125.83087921142578,
|
|
"logps/ref_chosen": -55.0802001953125,
|
|
"logps/ref_rejected": -71.91049194335938,
|
|
"logps/rejected": -172.57176208496094,
|
|
"loss": 1.1432,
|
|
"margin_dpo/margin_mean": 29.910579681396484,
|
|
"margin_dpo/margin_std": 54.502227783203125,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.016900621354579926,
|
|
"fcm_dpo/delta": -0.035610876977443695,
|
|
"fcm_dpo/margin": 37.475677490234375,
|
|
"fcm_dpo/q_t": 0.3650882840156555,
|
|
"grad_norm": 17.15520668029785,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 0.7790225744247437,
|
|
"logits/rejected": 0.7075534462928772,
|
|
"logps/chosen": -124.61892700195312,
|
|
"logps/ref_chosen": -54.525917053222656,
|
|
"logps/ref_rejected": -81.23604583740234,
|
|
"logps/rejected": -188.8047332763672,
|
|
"loss": 0.988,
|
|
"margin_dpo/margin_mean": 37.475677490234375,
|
|
"margin_dpo/margin_std": 47.050201416015625,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"fcm_dpo/beta": 0.017178639769554138,
|
|
"fcm_dpo/delta": 0.10657184571027756,
|
|
"fcm_dpo/margin": 28.993637084960938,
|
|
"fcm_dpo/q_t": 0.3975844383239746,
|
|
"grad_norm": 20.30781364440918,
|
|
"learning_rate": 3.4965187065971735e-12,
|
|
"logits/chosen": 0.6451204419136047,
|
|
"logits/rejected": 0.562833845615387,
|
|
"logps/chosen": -139.31686401367188,
|
|
"logps/ref_chosen": -60.37263870239258,
|
|
"logps/ref_rejected": -77.42874145507812,
|
|
"logps/rejected": -185.3666229248047,
|
|
"loss": 1.153,
|
|
"margin_dpo/margin_mean": 28.99363899230957,
|
|
"margin_dpo/margin_std": 56.09125518798828,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.0886367675756723,
|
|
"train_runtime": 1755.0349,
|
|
"train_samples_per_second": 24.123,
|
|
"train_steps_per_second": 0.377
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|