Model: jackf857/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0 Source: Original Platform
12654 lines
465 KiB
JSON
12654 lines
465 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 200,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.11051072180271149,
|
|
"fcm_dpo/delta": 0.49971169233322144,
|
|
"fcm_dpo/margin": -0.0013532638549804688,
|
|
"fcm_dpo/q_t": 0.5000448226928711,
|
|
"grad_norm": 31.18895149230957,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 0.13337239623069763,
|
|
"logits/rejected": 0.12492949515581131,
|
|
"logps/chosen": -64.5841293334961,
|
|
"logps/ref_chosen": -64.61280822753906,
|
|
"logps/ref_rejected": -64.17195129394531,
|
|
"logps/rejected": -64.14192199707031,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.0013527870178222656,
|
|
"margin_dpo/margin_std": 0.2561596930027008,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0030234315948601664,
|
|
"fcm_dpo/beta": 0.11625976860523224,
|
|
"fcm_dpo/delta": 0.49490365386009216,
|
|
"fcm_dpo/margin": 0.037450045347213745,
|
|
"fcm_dpo/q_t": 0.4989655911922455,
|
|
"grad_norm": 32.422725677490234,
|
|
"learning_rate": 7.462686567164179e-09,
|
|
"logits/chosen": 0.09414851665496826,
|
|
"logits/rejected": 0.07363267242908478,
|
|
"logps/chosen": -56.101890563964844,
|
|
"logps/ref_chosen": -56.0989990234375,
|
|
"logps/ref_rejected": -66.59971618652344,
|
|
"logps/rejected": -66.64006042480469,
|
|
"loss": 1.3819,
|
|
"margin_dpo/margin_mean": 0.03744968771934509,
|
|
"margin_dpo/margin_std": 0.27811938524246216,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0045351473922902496,
|
|
"fcm_dpo/beta": 0.12837310135364532,
|
|
"fcm_dpo/delta": 0.4961715638637543,
|
|
"fcm_dpo/margin": 0.002266407012939453,
|
|
"fcm_dpo/q_t": 0.4999309778213501,
|
|
"grad_norm": 40.0953483581543,
|
|
"learning_rate": 1.4925373134328357e-08,
|
|
"logits/chosen": 0.0993770956993103,
|
|
"logits/rejected": 0.06136491894721985,
|
|
"logps/chosen": -65.4115219116211,
|
|
"logps/ref_chosen": -65.45726013183594,
|
|
"logps/ref_rejected": -90.82853698730469,
|
|
"logps/rejected": -90.78506469726562,
|
|
"loss": 1.3859,
|
|
"margin_dpo/margin_mean": 0.0022667646408081055,
|
|
"margin_dpo/margin_std": 0.26775944232940674,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.006046863189720333,
|
|
"fcm_dpo/beta": 0.14178214967250824,
|
|
"fcm_dpo/delta": 0.4972817301750183,
|
|
"fcm_dpo/margin": -0.031194627285003662,
|
|
"fcm_dpo/q_t": 0.5010493993759155,
|
|
"grad_norm": 48.728790283203125,
|
|
"learning_rate": 2.2388059701492534e-08,
|
|
"logits/chosen": 0.10073457658290863,
|
|
"logits/rejected": 0.08476720750331879,
|
|
"logps/chosen": -76.87289428710938,
|
|
"logps/ref_chosen": -76.86018371582031,
|
|
"logps/ref_rejected": -79.91523742675781,
|
|
"logps/rejected": -79.8967514038086,
|
|
"loss": 1.3908,
|
|
"margin_dpo/margin_mean": -0.031194984912872314,
|
|
"margin_dpo/margin_std": 0.3357463479042053,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.1644459217786789,
|
|
"fcm_dpo/delta": 0.49898889660835266,
|
|
"fcm_dpo/margin": -0.022104412317276,
|
|
"fcm_dpo/q_t": 0.5009359121322632,
|
|
"grad_norm": 48.499725341796875,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 0.08101461827754974,
|
|
"logits/rejected": 0.04222995042800903,
|
|
"logps/chosen": -62.999996185302734,
|
|
"logps/ref_chosen": -62.97134017944336,
|
|
"logps/ref_rejected": -79.9192123413086,
|
|
"logps/rejected": -79.92576599121094,
|
|
"loss": 1.3905,
|
|
"margin_dpo/margin_mean": -0.02210336923599243,
|
|
"margin_dpo/margin_std": 0.283627986907959,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.009070294784580499,
|
|
"fcm_dpo/beta": 0.1644459217786789,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.07127216458320618,
|
|
"fcm_dpo/q_t": 0.5029286742210388,
|
|
"grad_norm": 48.75336456298828,
|
|
"learning_rate": 3.731343283582089e-08,
|
|
"logits/chosen": 0.13917648792266846,
|
|
"logits/rejected": 0.09997415542602539,
|
|
"logps/chosen": -51.33795166015625,
|
|
"logps/ref_chosen": -51.30736541748047,
|
|
"logps/ref_rejected": -82.77239227294922,
|
|
"logps/rejected": -82.73170471191406,
|
|
"loss": 1.3986,
|
|
"margin_dpo/margin_mean": -0.07127270102500916,
|
|
"margin_dpo/margin_std": 0.29276320338249207,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010582010582010581,
|
|
"fcm_dpo/beta": 0.1816796362400055,
|
|
"fcm_dpo/delta": 0.4983155131340027,
|
|
"fcm_dpo/margin": -0.0058057308197021484,
|
|
"fcm_dpo/q_t": 0.500307023525238,
|
|
"grad_norm": 49.4698600769043,
|
|
"learning_rate": 4.477611940298507e-08,
|
|
"logits/chosen": 0.017587212845683098,
|
|
"logits/rejected": -0.02612943761050701,
|
|
"logps/chosen": -51.4460334777832,
|
|
"logps/ref_chosen": -51.45941162109375,
|
|
"logps/ref_rejected": -66.3828125,
|
|
"logps/rejected": -66.3636245727539,
|
|
"loss": 1.388,
|
|
"margin_dpo/margin_mean": -0.005805850028991699,
|
|
"margin_dpo/margin_std": 0.2854662537574768,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.012093726379440665,
|
|
"fcm_dpo/beta": 0.1816796362400055,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.029055893421173096,
|
|
"fcm_dpo/q_t": 0.5013211965560913,
|
|
"grad_norm": 52.10679244995117,
|
|
"learning_rate": 5.223880597014925e-08,
|
|
"logits/chosen": 0.09742860496044159,
|
|
"logits/rejected": 0.07472395896911621,
|
|
"logps/chosen": -62.214691162109375,
|
|
"logps/ref_chosen": -62.197547912597656,
|
|
"logps/ref_rejected": -74.66180419921875,
|
|
"logps/rejected": -74.64989471435547,
|
|
"loss": 1.3925,
|
|
"margin_dpo/margin_mean": -0.02905610203742981,
|
|
"margin_dpo/margin_std": 0.3362266421318054,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013605442176870748,
|
|
"fcm_dpo/beta": 0.21118226647377014,
|
|
"fcm_dpo/delta": 0.9957462549209595,
|
|
"fcm_dpo/margin": 0.02298596501350403,
|
|
"fcm_dpo/q_t": 0.498937726020813,
|
|
"grad_norm": 66.31742858886719,
|
|
"learning_rate": 5.970149253731343e-08,
|
|
"logits/chosen": 0.17290404438972473,
|
|
"logits/rejected": 0.1132848858833313,
|
|
"logps/chosen": -55.65576934814453,
|
|
"logps/ref_chosen": -55.629722595214844,
|
|
"logps/ref_rejected": -86.21221923828125,
|
|
"logps/rejected": -86.2612533569336,
|
|
"loss": 1.3826,
|
|
"margin_dpo/margin_mean": 0.02298620343208313,
|
|
"margin_dpo/margin_std": 0.30049267411231995,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.24479255080223083,
|
|
"fcm_dpo/delta": 0.4950849413871765,
|
|
"fcm_dpo/margin": 0.01253315806388855,
|
|
"fcm_dpo/q_t": 0.4993648827075958,
|
|
"grad_norm": 72.88284301757812,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 0.11379828304052353,
|
|
"logits/rejected": 0.08385583758354187,
|
|
"logps/chosen": -62.68762969970703,
|
|
"logps/ref_chosen": -62.69060134887695,
|
|
"logps/ref_rejected": -90.610107421875,
|
|
"logps/rejected": -90.61967468261719,
|
|
"loss": 1.3853,
|
|
"margin_dpo/margin_mean": 0.01253288984298706,
|
|
"margin_dpo/margin_std": 0.3701857328414917,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016628873771730914,
|
|
"fcm_dpo/beta": 0.26968804001808167,
|
|
"fcm_dpo/delta": 0.484273761510849,
|
|
"fcm_dpo/margin": 0.04408371448516846,
|
|
"fcm_dpo/q_t": 0.4974249601364136,
|
|
"grad_norm": 79.30926513671875,
|
|
"learning_rate": 7.462686567164178e-08,
|
|
"logits/chosen": 0.10774752497673035,
|
|
"logits/rejected": 0.10090956091880798,
|
|
"logps/chosen": -65.75579071044922,
|
|
"logps/ref_chosen": -65.76712036132812,
|
|
"logps/ref_rejected": -72.4764633178711,
|
|
"logps/rejected": -72.50921630859375,
|
|
"loss": 1.376,
|
|
"margin_dpo/margin_mean": 0.04408392310142517,
|
|
"margin_dpo/margin_std": 0.2787271738052368,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.018140589569160998,
|
|
"fcm_dpo/beta": 0.26968804001808167,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.013816118240356445,
|
|
"fcm_dpo/q_t": 0.5009292364120483,
|
|
"grad_norm": 76.66712951660156,
|
|
"learning_rate": 8.208955223880596e-08,
|
|
"logits/chosen": 0.03750212490558624,
|
|
"logits/rejected": 0.02106173150241375,
|
|
"logps/chosen": -60.710899353027344,
|
|
"logps/ref_chosen": -60.704891204833984,
|
|
"logps/ref_rejected": -69.41564178466797,
|
|
"logps/rejected": -69.4078369140625,
|
|
"loss": 1.3914,
|
|
"margin_dpo/margin_mean": -0.013815999031066895,
|
|
"margin_dpo/margin_std": 0.2728922367095947,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.019652305366591082,
|
|
"fcm_dpo/beta": 0.26968804001808167,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.035725027322769165,
|
|
"fcm_dpo/q_t": 0.5023995637893677,
|
|
"grad_norm": 78.57530212402344,
|
|
"learning_rate": 8.955223880597014e-08,
|
|
"logits/chosen": 0.10675495862960815,
|
|
"logits/rejected": 0.044550854712724686,
|
|
"logps/chosen": -49.91514587402344,
|
|
"logps/ref_chosen": -49.90925598144531,
|
|
"logps/ref_rejected": -92.37818145751953,
|
|
"logps/rejected": -92.34834289550781,
|
|
"loss": 1.3971,
|
|
"margin_dpo/margin_mean": -0.03572601079940796,
|
|
"margin_dpo/margin_std": 0.24918314814567566,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.021164021164021163,
|
|
"fcm_dpo/beta": 0.31274157762527466,
|
|
"fcm_dpo/delta": 0.9786568880081177,
|
|
"fcm_dpo/margin": 0.075018972158432,
|
|
"fcm_dpo/q_t": 0.49468034505844116,
|
|
"grad_norm": 91.86367797851562,
|
|
"learning_rate": 9.701492537313432e-08,
|
|
"logits/chosen": 0.08078277111053467,
|
|
"logits/rejected": 0.06322959065437317,
|
|
"logps/chosen": -60.599849700927734,
|
|
"logps/ref_chosen": -60.61879348754883,
|
|
"logps/ref_rejected": -71.79306030273438,
|
|
"logps/rejected": -71.84913635253906,
|
|
"loss": 1.3649,
|
|
"margin_dpo/margin_mean": 0.07501909136772156,
|
|
"margin_dpo/margin_std": 0.2812075614929199,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.3279946446418762,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02524346113204956,
|
|
"fcm_dpo/q_t": 0.5020579099655151,
|
|
"grad_norm": 109.66602325439453,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 0.06694771349430084,
|
|
"logits/rejected": 0.023975659161806107,
|
|
"logps/chosen": -63.49495315551758,
|
|
"logps/ref_chosen": -63.46953582763672,
|
|
"logps/ref_rejected": -88.88951110839844,
|
|
"logps/rejected": -88.88968658447266,
|
|
"loss": 1.3976,
|
|
"margin_dpo/margin_mean": -0.025244086980819702,
|
|
"margin_dpo/margin_std": 0.3348177969455719,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02418745275888133,
|
|
"fcm_dpo/beta": 0.34503084421157837,
|
|
"fcm_dpo/delta": 0.4941604435443878,
|
|
"fcm_dpo/margin": 0.014522925019264221,
|
|
"fcm_dpo/q_t": 0.4988110661506653,
|
|
"grad_norm": 93.19413757324219,
|
|
"learning_rate": 1.1194029850746268e-07,
|
|
"logits/chosen": 0.11576000601053238,
|
|
"logits/rejected": 0.07864248752593994,
|
|
"logps/chosen": -46.55487823486328,
|
|
"logps/ref_chosen": -46.53229904174805,
|
|
"logps/ref_rejected": -74.27533721923828,
|
|
"logps/rejected": -74.31243896484375,
|
|
"loss": 1.3833,
|
|
"margin_dpo/margin_mean": 0.014522776007652283,
|
|
"margin_dpo/margin_std": 0.27633634209632874,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.025699168556311415,
|
|
"fcm_dpo/beta": 0.39912450313568115,
|
|
"fcm_dpo/delta": 0.487219899892807,
|
|
"fcm_dpo/margin": -0.00017780065536499023,
|
|
"fcm_dpo/q_t": 0.5003235936164856,
|
|
"grad_norm": 134.72512817382812,
|
|
"learning_rate": 1.1940298507462686e-07,
|
|
"logits/chosen": 0.04781803488731384,
|
|
"logits/rejected": 0.02929597906768322,
|
|
"logps/chosen": -64.07421875,
|
|
"logps/ref_chosen": -64.07783508300781,
|
|
"logps/ref_rejected": -86.40876770019531,
|
|
"logps/rejected": -86.40497589111328,
|
|
"loss": 1.3906,
|
|
"margin_dpo/margin_mean": -0.00017789006233215332,
|
|
"margin_dpo/margin_std": 0.3190717101097107,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.027210884353741496,
|
|
"fcm_dpo/beta": 0.4197884202003479,
|
|
"fcm_dpo/delta": 0.4926441013813019,
|
|
"fcm_dpo/margin": -0.029925107955932617,
|
|
"fcm_dpo/q_t": 0.5029613971710205,
|
|
"grad_norm": 120.34202575683594,
|
|
"learning_rate": 1.2686567164179106e-07,
|
|
"logits/chosen": 0.10955735296010971,
|
|
"logits/rejected": 0.06258425116539001,
|
|
"logps/chosen": -44.86344528198242,
|
|
"logps/ref_chosen": -44.87433624267578,
|
|
"logps/ref_rejected": -70.97604370117188,
|
|
"logps/rejected": -70.93523406982422,
|
|
"loss": 1.4019,
|
|
"margin_dpo/margin_mean": -0.029924869537353516,
|
|
"margin_dpo/margin_std": 0.30913057923316956,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02872260015117158,
|
|
"fcm_dpo/beta": 0.5113855004310608,
|
|
"fcm_dpo/delta": 0.9829479455947876,
|
|
"fcm_dpo/margin": 0.03535567224025726,
|
|
"fcm_dpo/q_t": 0.4958151578903198,
|
|
"grad_norm": 156.30233764648438,
|
|
"learning_rate": 1.343283582089552e-07,
|
|
"logits/chosen": 0.07092909514904022,
|
|
"logits/rejected": 0.05735887587070465,
|
|
"logps/chosen": -68.16175842285156,
|
|
"logps/ref_chosen": -68.1598129272461,
|
|
"logps/ref_rejected": -81.17138671875,
|
|
"logps/rejected": -81.20869445800781,
|
|
"loss": 1.3751,
|
|
"margin_dpo/margin_mean": 0.03535632789134979,
|
|
"margin_dpo/margin_std": 0.33560460805892944,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.621636152267456,
|
|
"fcm_dpo/delta": 0.9787266254425049,
|
|
"fcm_dpo/margin": 0.03790883719921112,
|
|
"fcm_dpo/q_t": 0.49468833208084106,
|
|
"grad_norm": 184.15403747558594,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 0.11737000942230225,
|
|
"logits/rejected": 0.09504500031471252,
|
|
"logps/chosen": -53.65413284301758,
|
|
"logps/ref_chosen": -53.67856216430664,
|
|
"logps/ref_rejected": -74.16911315917969,
|
|
"logps/rejected": -74.18260192871094,
|
|
"loss": 1.3704,
|
|
"margin_dpo/margin_mean": 0.03790910542011261,
|
|
"margin_dpo/margin_std": 0.27916550636291504,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"fcm_dpo/beta": 0.719708263874054,
|
|
"fcm_dpo/delta": 0.49354374408721924,
|
|
"fcm_dpo/margin": 0.006038039922714233,
|
|
"fcm_dpo/q_t": 0.49931585788726807,
|
|
"grad_norm": 208.18948364257812,
|
|
"learning_rate": 1.4925373134328355e-07,
|
|
"logits/chosen": 0.09322724491357803,
|
|
"logits/rejected": 0.06828559935092926,
|
|
"logps/chosen": -64.68141174316406,
|
|
"logps/ref_chosen": -64.70155334472656,
|
|
"logps/ref_rejected": -81.02095031738281,
|
|
"logps/rejected": -81.0068359375,
|
|
"loss": 1.397,
|
|
"margin_dpo/margin_mean": 0.0060374438762664795,
|
|
"margin_dpo/margin_std": 0.34163737297058105,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.03325774754346183,
|
|
"fcm_dpo/beta": 0.7948847413063049,
|
|
"fcm_dpo/delta": 0.4967557489871979,
|
|
"fcm_dpo/margin": -0.008793145418167114,
|
|
"fcm_dpo/q_t": 0.501756489276886,
|
|
"grad_norm": 235.72225952148438,
|
|
"learning_rate": 1.5671641791044775e-07,
|
|
"logits/chosen": 0.020929213613271713,
|
|
"logits/rejected": -0.0007745649782009423,
|
|
"logps/chosen": -58.05523681640625,
|
|
"logps/ref_chosen": -58.03599166870117,
|
|
"logps/ref_rejected": -80.72721862792969,
|
|
"logps/rejected": -80.7376708984375,
|
|
"loss": 1.4021,
|
|
"margin_dpo/margin_mean": -0.008793413639068604,
|
|
"margin_dpo/margin_std": 0.23543663322925568,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.03476946334089191,
|
|
"fcm_dpo/beta": 0.8356242179870605,
|
|
"fcm_dpo/delta": 0.4879206120967865,
|
|
"fcm_dpo/margin": -0.007578670978546143,
|
|
"fcm_dpo/q_t": 0.5014785528182983,
|
|
"grad_norm": 285.7133483886719,
|
|
"learning_rate": 1.6417910447761193e-07,
|
|
"logits/chosen": 0.1404346227645874,
|
|
"logits/rejected": 0.11506737768650055,
|
|
"logps/chosen": -66.38188934326172,
|
|
"logps/ref_chosen": -66.35608673095703,
|
|
"logps/ref_rejected": -93.02769470214844,
|
|
"logps/rejected": -93.04591369628906,
|
|
"loss": 1.4157,
|
|
"margin_dpo/margin_mean": -0.007578998804092407,
|
|
"margin_dpo/margin_std": 0.3775022029876709,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.036281179138321996,
|
|
"fcm_dpo/beta": 0.8763637542724609,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.023401737213134766,
|
|
"fcm_dpo/q_t": 0.5050784349441528,
|
|
"grad_norm": 237.49981689453125,
|
|
"learning_rate": 1.716417910447761e-07,
|
|
"logits/chosen": 0.13775691390037537,
|
|
"logits/rejected": 0.10474318265914917,
|
|
"logps/chosen": -54.4842529296875,
|
|
"logps/ref_chosen": -54.461238861083984,
|
|
"logps/ref_rejected": -68.33817291259766,
|
|
"logps/rejected": -68.33778381347656,
|
|
"loss": 1.4189,
|
|
"margin_dpo/margin_mean": -0.02340218424797058,
|
|
"margin_dpo/margin_std": 0.2496114820241928,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 1.0109740495681763,
|
|
"fcm_dpo/delta": 0.933335542678833,
|
|
"fcm_dpo/margin": 0.07055863738059998,
|
|
"fcm_dpo/q_t": 0.48359498381614685,
|
|
"grad_norm": 284.9603271484375,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 0.09323587268590927,
|
|
"logits/rejected": 0.04254044592380524,
|
|
"logps/chosen": -60.0122184753418,
|
|
"logps/ref_chosen": -60.00420379638672,
|
|
"logps/ref_rejected": -90.47376251220703,
|
|
"logps/rejected": -90.55233764648438,
|
|
"loss": 1.3345,
|
|
"margin_dpo/margin_mean": 0.07055890560150146,
|
|
"margin_dpo/margin_std": 0.27643194794654846,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.039304610733182165,
|
|
"fcm_dpo/beta": 1.2268295288085938,
|
|
"fcm_dpo/delta": 0.9872031211853027,
|
|
"fcm_dpo/margin": 0.011123806238174438,
|
|
"fcm_dpo/q_t": 0.49779731035232544,
|
|
"grad_norm": 363.8032531738281,
|
|
"learning_rate": 1.8656716417910447e-07,
|
|
"logits/chosen": 0.12300634384155273,
|
|
"logits/rejected": 0.1041463315486908,
|
|
"logps/chosen": -56.83332061767578,
|
|
"logps/ref_chosen": -56.81915283203125,
|
|
"logps/ref_rejected": -77.84333038330078,
|
|
"logps/rejected": -77.86862182617188,
|
|
"loss": 1.418,
|
|
"margin_dpo/margin_mean": 0.011123299598693848,
|
|
"margin_dpo/margin_std": 0.3511189818382263,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.04081632653061224,
|
|
"fcm_dpo/beta": 1.4186409711837769,
|
|
"fcm_dpo/delta": 0.4878283143043518,
|
|
"fcm_dpo/margin": -0.018930166959762573,
|
|
"fcm_dpo/q_t": 0.5060181617736816,
|
|
"grad_norm": 426.4020690917969,
|
|
"learning_rate": 1.9402985074626865e-07,
|
|
"logits/chosen": 0.10950794070959091,
|
|
"logits/rejected": 0.08432013541460037,
|
|
"logps/chosen": -62.87256622314453,
|
|
"logps/ref_chosen": -62.87702560424805,
|
|
"logps/ref_rejected": -71.34437561035156,
|
|
"logps/rejected": -71.32098388671875,
|
|
"loss": 1.465,
|
|
"margin_dpo/margin_mean": -0.0189303457736969,
|
|
"margin_dpo/margin_std": 0.3256291151046753,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.042328042328042326,
|
|
"fcm_dpo/beta": 1.5392485857009888,
|
|
"fcm_dpo/delta": 0.40797513723373413,
|
|
"fcm_dpo/margin": 0.019986987113952637,
|
|
"fcm_dpo/q_t": 0.49472981691360474,
|
|
"grad_norm": 428.8187561035156,
|
|
"learning_rate": 2.0149253731343282e-07,
|
|
"logits/chosen": 0.05481361597776413,
|
|
"logits/rejected": 0.04620601236820221,
|
|
"logps/chosen": -59.83418273925781,
|
|
"logps/ref_chosen": -59.8333740234375,
|
|
"logps/ref_rejected": -70.39804077148438,
|
|
"logps/rejected": -70.4188232421875,
|
|
"loss": 1.4194,
|
|
"margin_dpo/margin_mean": 0.019986748695373535,
|
|
"margin_dpo/margin_std": 0.31196290254592896,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.04383975812547241,
|
|
"fcm_dpo/beta": 1.7656760215759277,
|
|
"fcm_dpo/delta": 0.9155327081680298,
|
|
"fcm_dpo/margin": 0.05319638550281525,
|
|
"fcm_dpo/q_t": 0.47983771562576294,
|
|
"grad_norm": 544.4055786132812,
|
|
"learning_rate": 2.08955223880597e-07,
|
|
"logits/chosen": 0.13988614082336426,
|
|
"logits/rejected": 0.12211866676807404,
|
|
"logps/chosen": -74.13853454589844,
|
|
"logps/ref_chosen": -74.12020111083984,
|
|
"logps/ref_rejected": -83.33099365234375,
|
|
"logps/rejected": -83.40251159667969,
|
|
"loss": 1.3481,
|
|
"margin_dpo/margin_mean": 0.053196460008621216,
|
|
"margin_dpo/margin_std": 0.2625643312931061,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 2.0389435291290283,
|
|
"fcm_dpo/delta": 0.4901547431945801,
|
|
"fcm_dpo/margin": -0.004268288612365723,
|
|
"fcm_dpo/q_t": 0.5039973258972168,
|
|
"grad_norm": 641.551025390625,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 0.13575318455696106,
|
|
"logits/rejected": 0.08044232428073883,
|
|
"logps/chosen": -50.786643981933594,
|
|
"logps/ref_chosen": -50.75128936767578,
|
|
"logps/ref_rejected": -89.29063415527344,
|
|
"logps/rejected": -89.32171630859375,
|
|
"loss": 1.4942,
|
|
"margin_dpo/margin_mean": -0.004268676042556763,
|
|
"margin_dpo/margin_std": 0.32202666997909546,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04686318972033258,
|
|
"fcm_dpo/beta": 2.3525500297546387,
|
|
"fcm_dpo/delta": 0.93133544921875,
|
|
"fcm_dpo/margin": 0.0310114324092865,
|
|
"fcm_dpo/q_t": 0.4873350262641907,
|
|
"grad_norm": 821.974365234375,
|
|
"learning_rate": 2.2388059701492537e-07,
|
|
"logits/chosen": 0.10764593631029129,
|
|
"logits/rejected": 0.061387479305267334,
|
|
"logps/chosen": -65.36897277832031,
|
|
"logps/ref_chosen": -65.33675384521484,
|
|
"logps/ref_rejected": -100.76666259765625,
|
|
"logps/rejected": -100.82989501953125,
|
|
"loss": 1.4747,
|
|
"margin_dpo/margin_mean": 0.03101155161857605,
|
|
"margin_dpo/margin_std": 0.35841095447540283,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04837490551776266,
|
|
"fcm_dpo/beta": 2.5499143600463867,
|
|
"fcm_dpo/delta": 0.3668806552886963,
|
|
"fcm_dpo/margin": 0.045269906520843506,
|
|
"fcm_dpo/q_t": 0.47517523169517517,
|
|
"grad_norm": 814.4436645507812,
|
|
"learning_rate": 2.3134328358208954e-07,
|
|
"logits/chosen": 0.10604210197925568,
|
|
"logits/rejected": 0.09800020605325699,
|
|
"logps/chosen": -67.19169616699219,
|
|
"logps/ref_chosen": -67.18333435058594,
|
|
"logps/ref_rejected": -82.80763244628906,
|
|
"logps/rejected": -82.86127471923828,
|
|
"loss": 1.4012,
|
|
"margin_dpo/margin_mean": 0.045270055532455444,
|
|
"margin_dpo/margin_std": 0.2860063314437866,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.049886621315192746,
|
|
"fcm_dpo/beta": 2.7437379360198975,
|
|
"fcm_dpo/delta": 0.3657742738723755,
|
|
"fcm_dpo/margin": 0.042653635144233704,
|
|
"fcm_dpo/q_t": 0.4748280644416809,
|
|
"grad_norm": 934.1159057617188,
|
|
"learning_rate": 2.388059701492537e-07,
|
|
"logits/chosen": 0.0492779016494751,
|
|
"logits/rejected": 0.022379783913493156,
|
|
"logps/chosen": -64.0654067993164,
|
|
"logps/ref_chosen": -64.03948211669922,
|
|
"logps/ref_rejected": -75.68357849121094,
|
|
"logps/rejected": -75.75216674804688,
|
|
"loss": 1.4557,
|
|
"margin_dpo/margin_mean": 0.042654380202293396,
|
|
"margin_dpo/margin_std": 0.3218376338481903,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.05139833711262283,
|
|
"fcm_dpo/beta": 3.2462871074676514,
|
|
"fcm_dpo/delta": 0.868577241897583,
|
|
"fcm_dpo/margin": 0.04391145706176758,
|
|
"fcm_dpo/q_t": 0.47198355197906494,
|
|
"grad_norm": 959.0977172851562,
|
|
"learning_rate": 2.4626865671641786e-07,
|
|
"logits/chosen": 0.0971483588218689,
|
|
"logits/rejected": 0.0673779547214508,
|
|
"logps/chosen": -53.695762634277344,
|
|
"logps/ref_chosen": -53.6642951965332,
|
|
"logps/ref_rejected": -65.77989959716797,
|
|
"logps/rejected": -65.85527038574219,
|
|
"loss": 1.4312,
|
|
"margin_dpo/margin_mean": 0.04391142725944519,
|
|
"margin_dpo/margin_std": 0.2787018120288849,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 3.5613765716552734,
|
|
"fcm_dpo/delta": 0.49954742193222046,
|
|
"fcm_dpo/margin": 0.0079115629196167,
|
|
"fcm_dpo/q_t": 0.4954897165298462,
|
|
"grad_norm": 1141.895751953125,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 0.04493723437190056,
|
|
"logits/rejected": 0.022909432649612427,
|
|
"logps/chosen": -61.07041549682617,
|
|
"logps/ref_chosen": -61.01686096191406,
|
|
"logps/ref_rejected": -72.78598022460938,
|
|
"logps/rejected": -72.84745025634766,
|
|
"loss": 1.6389,
|
|
"margin_dpo/margin_mean": 0.0079115629196167,
|
|
"margin_dpo/margin_std": 0.32450151443481445,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05442176870748299,
|
|
"fcm_dpo/beta": 3.89394474029541,
|
|
"fcm_dpo/delta": 0.39777103066444397,
|
|
"fcm_dpo/margin": 0.013609737157821655,
|
|
"fcm_dpo/q_t": 0.5010133981704712,
|
|
"grad_norm": 1230.4146728515625,
|
|
"learning_rate": 2.611940298507462e-07,
|
|
"logits/chosen": 0.1099657341837883,
|
|
"logits/rejected": 0.056441109627485275,
|
|
"logps/chosen": -50.61913299560547,
|
|
"logps/ref_chosen": -50.53736114501953,
|
|
"logps/ref_rejected": -78.11678314208984,
|
|
"logps/rejected": -78.212158203125,
|
|
"loss": 1.6768,
|
|
"margin_dpo/margin_mean": 0.013609647750854492,
|
|
"margin_dpo/margin_std": 0.32924020290374756,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.055933484504913075,
|
|
"fcm_dpo/beta": 4.411220550537109,
|
|
"fcm_dpo/delta": 0.5480049252510071,
|
|
"fcm_dpo/margin": 0.10784178972244263,
|
|
"fcm_dpo/q_t": 0.4541955292224884,
|
|
"grad_norm": 1664.73095703125,
|
|
"learning_rate": 2.686567164179104e-07,
|
|
"logits/chosen": 0.08195307105779648,
|
|
"logits/rejected": 0.004701277241110802,
|
|
"logps/chosen": -59.608680725097656,
|
|
"logps/ref_chosen": -59.55394744873047,
|
|
"logps/ref_rejected": -108.27702331542969,
|
|
"logps/rejected": -108.43960571289062,
|
|
"loss": 1.4659,
|
|
"margin_dpo/margin_mean": 0.10784146189689636,
|
|
"margin_dpo/margin_std": 0.4072269797325134,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.05744520030234316,
|
|
"fcm_dpo/beta": 4.725587844848633,
|
|
"fcm_dpo/delta": 0.22491098940372467,
|
|
"fcm_dpo/margin": 0.04019525647163391,
|
|
"fcm_dpo/q_t": 0.4804103374481201,
|
|
"grad_norm": 1523.71044921875,
|
|
"learning_rate": 2.761194029850746e-07,
|
|
"logits/chosen": 0.056998323649168015,
|
|
"logits/rejected": 0.04311235621571541,
|
|
"logps/chosen": -65.85867309570312,
|
|
"logps/ref_chosen": -65.78836059570312,
|
|
"logps/ref_rejected": -76.1619873046875,
|
|
"logps/rejected": -76.27249908447266,
|
|
"loss": 1.7981,
|
|
"margin_dpo/margin_mean": 0.040194928646087646,
|
|
"margin_dpo/margin_std": 0.36567050218582153,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05895691609977324,
|
|
"fcm_dpo/beta": 5.4121809005737305,
|
|
"fcm_dpo/delta": 0.8187992572784424,
|
|
"fcm_dpo/margin": 0.03373938798904419,
|
|
"fcm_dpo/q_t": 0.4426850378513336,
|
|
"grad_norm": 1836.44189453125,
|
|
"learning_rate": 2.8358208955223876e-07,
|
|
"logits/chosen": 0.14140446484088898,
|
|
"logits/rejected": 0.11523914337158203,
|
|
"logps/chosen": -57.26402282714844,
|
|
"logps/ref_chosen": -57.17681121826172,
|
|
"logps/ref_rejected": -79.486328125,
|
|
"logps/rejected": -79.60729217529297,
|
|
"loss": 1.9268,
|
|
"margin_dpo/margin_mean": 0.033740073442459106,
|
|
"margin_dpo/margin_std": 0.3646671772003174,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 5.907527923583984,
|
|
"fcm_dpo/delta": 0.297378271818161,
|
|
"fcm_dpo/margin": 0.006361484527587891,
|
|
"fcm_dpo/q_t": 0.4902653098106384,
|
|
"grad_norm": 2190.686767578125,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 0.10774768888950348,
|
|
"logits/rejected": 0.058216311037540436,
|
|
"logps/chosen": -61.42626953125,
|
|
"logps/ref_chosen": -61.33416748046875,
|
|
"logps/ref_rejected": -79.10697174072266,
|
|
"logps/rejected": -79.20543670654297,
|
|
"loss": 2.1253,
|
|
"margin_dpo/margin_mean": 0.00636136531829834,
|
|
"margin_dpo/margin_std": 0.3456147611141205,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06198034769463341,
|
|
"fcm_dpo/beta": 6.096949100494385,
|
|
"fcm_dpo/delta": 0.3107817769050598,
|
|
"fcm_dpo/margin": 0.03788435459136963,
|
|
"fcm_dpo/q_t": 0.4918346107006073,
|
|
"grad_norm": 2313.122314453125,
|
|
"learning_rate": 2.985074626865671e-07,
|
|
"logits/chosen": 0.06469070911407471,
|
|
"logits/rejected": 0.04410509765148163,
|
|
"logps/chosen": -67.65518188476562,
|
|
"logps/ref_chosen": -67.5467300415039,
|
|
"logps/ref_rejected": -83.87788391113281,
|
|
"logps/rejected": -84.02423095703125,
|
|
"loss": 2.1107,
|
|
"margin_dpo/margin_mean": 0.03788486123085022,
|
|
"margin_dpo/margin_std": 0.3886951506137848,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"fcm_dpo/beta": 6.579242706298828,
|
|
"fcm_dpo/delta": 0.22767893970012665,
|
|
"fcm_dpo/margin": 0.00848454236984253,
|
|
"fcm_dpo/q_t": 0.4764997959136963,
|
|
"grad_norm": 2254.65283203125,
|
|
"learning_rate": 3.059701492537313e-07,
|
|
"logits/chosen": 0.04841721057891846,
|
|
"logits/rejected": 0.026929516345262527,
|
|
"logps/chosen": -61.369564056396484,
|
|
"logps/ref_chosen": -61.26485824584961,
|
|
"logps/ref_rejected": -76.3629150390625,
|
|
"logps/rejected": -76.47610473632812,
|
|
"loss": 2.2164,
|
|
"margin_dpo/margin_mean": 0.008484512567520142,
|
|
"margin_dpo/margin_std": 0.33282727003097534,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.06500377928949358,
|
|
"fcm_dpo/beta": 7.226245880126953,
|
|
"fcm_dpo/delta": 0.5711226463317871,
|
|
"fcm_dpo/margin": 0.06204667687416077,
|
|
"fcm_dpo/q_t": 0.44025903940200806,
|
|
"grad_norm": 2534.08984375,
|
|
"learning_rate": 3.134328358208955e-07,
|
|
"logits/chosen": 0.062497012317180634,
|
|
"logits/rejected": 0.051983729004859924,
|
|
"logps/chosen": -71.88862609863281,
|
|
"logps/ref_chosen": -71.80902862548828,
|
|
"logps/ref_rejected": -81.12464141845703,
|
|
"logps/rejected": -81.26628875732422,
|
|
"loss": 1.9347,
|
|
"margin_dpo/margin_mean": 0.0620463490486145,
|
|
"margin_dpo/margin_std": 0.32832396030426025,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06651549508692366,
|
|
"fcm_dpo/beta": 7.3753557205200195,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.003143906593322754,
|
|
"fcm_dpo/q_t": 0.5123412609100342,
|
|
"grad_norm": 3110.311279296875,
|
|
"learning_rate": 3.2089552238805965e-07,
|
|
"logits/chosen": 0.058967474848032,
|
|
"logits/rejected": 0.02790246158838272,
|
|
"logps/chosen": -66.68571472167969,
|
|
"logps/ref_chosen": -66.55043029785156,
|
|
"logps/ref_rejected": -85.06198120117188,
|
|
"logps/rejected": -85.19412994384766,
|
|
"loss": 2.7173,
|
|
"margin_dpo/margin_mean": -0.00314408540725708,
|
|
"margin_dpo/margin_std": 0.39732399582862854,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 7.334336280822754,
|
|
"fcm_dpo/delta": -0.0020287036895751953,
|
|
"fcm_dpo/margin": 0.13652004301548004,
|
|
"fcm_dpo/q_t": 0.35875630378723145,
|
|
"grad_norm": 1952.19775390625,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 0.10706457495689392,
|
|
"logits/rejected": 0.05497971177101135,
|
|
"logps/chosen": -62.34738540649414,
|
|
"logps/ref_chosen": -62.24385452270508,
|
|
"logps/ref_rejected": -92.96665954589844,
|
|
"logps/rejected": -93.20671081542969,
|
|
"loss": 1.5806,
|
|
"margin_dpo/margin_mean": 0.1365204155445099,
|
|
"margin_dpo/margin_std": 0.33906716108322144,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06953892668178382,
|
|
"fcm_dpo/beta": 7.5740742683410645,
|
|
"fcm_dpo/delta": 0.09216267615556717,
|
|
"fcm_dpo/margin": 0.12037345767021179,
|
|
"fcm_dpo/q_t": 0.4014553129673004,
|
|
"grad_norm": 2483.462646484375,
|
|
"learning_rate": 3.3582089552238805e-07,
|
|
"logits/chosen": 0.10916443914175034,
|
|
"logits/rejected": 0.06387359648942947,
|
|
"logps/chosen": -61.58867263793945,
|
|
"logps/ref_chosen": -61.498905181884766,
|
|
"logps/ref_rejected": -78.91172790527344,
|
|
"logps/rejected": -79.12187194824219,
|
|
"loss": 1.8488,
|
|
"margin_dpo/margin_mean": 0.12037333846092224,
|
|
"margin_dpo/margin_std": 0.37311580777168274,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0710506424792139,
|
|
"fcm_dpo/beta": 7.277153968811035,
|
|
"fcm_dpo/delta": -0.1604071408510208,
|
|
"fcm_dpo/margin": 0.15663331747055054,
|
|
"fcm_dpo/q_t": 0.3513562083244324,
|
|
"grad_norm": 1870.4586181640625,
|
|
"learning_rate": 3.432835820895522e-07,
|
|
"logits/chosen": 0.04342661425471306,
|
|
"logits/rejected": -0.00011028675362467766,
|
|
"logps/chosen": -51.65697479248047,
|
|
"logps/ref_chosen": -51.578346252441406,
|
|
"logps/ref_rejected": -68.2215576171875,
|
|
"logps/rejected": -68.45681762695312,
|
|
"loss": 1.3277,
|
|
"margin_dpo/margin_mean": 0.1566331386566162,
|
|
"margin_dpo/margin_std": 0.3138054609298706,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07256235827664399,
|
|
"fcm_dpo/beta": 8.184296607971191,
|
|
"fcm_dpo/delta": 0.7369337677955627,
|
|
"fcm_dpo/margin": 0.03382202982902527,
|
|
"fcm_dpo/q_t": 0.485114187002182,
|
|
"grad_norm": 2785.64453125,
|
|
"learning_rate": 3.507462686567164e-07,
|
|
"logits/chosen": 0.16853903234004974,
|
|
"logits/rejected": 0.1380309760570526,
|
|
"logps/chosen": -51.94841003417969,
|
|
"logps/ref_chosen": -51.79365158081055,
|
|
"logps/ref_rejected": -64.22503662109375,
|
|
"logps/rejected": -64.41361999511719,
|
|
"loss": 2.617,
|
|
"margin_dpo/margin_mean": 0.03382223844528198,
|
|
"margin_dpo/margin_std": 0.38840365409851074,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07407407407407407,
|
|
"fcm_dpo/beta": 9.334107398986816,
|
|
"fcm_dpo/delta": 0.5954843163490295,
|
|
"fcm_dpo/margin": 0.044813498854637146,
|
|
"fcm_dpo/q_t": 0.4577757716178894,
|
|
"grad_norm": 2963.8173828125,
|
|
"learning_rate": 3.5820895522388055e-07,
|
|
"logits/chosen": 0.038059771060943604,
|
|
"logits/rejected": 0.01622236706316471,
|
|
"logps/chosen": -58.26384353637695,
|
|
"logps/ref_chosen": -58.13460159301758,
|
|
"logps/ref_rejected": -64.63206481933594,
|
|
"logps/rejected": -64.80612182617188,
|
|
"loss": 2.5832,
|
|
"margin_dpo/margin_mean": 0.04481416940689087,
|
|
"margin_dpo/margin_std": 0.3690647482872009,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 9.334890365600586,
|
|
"fcm_dpo/delta": -0.06085062772035599,
|
|
"fcm_dpo/margin": 0.11278587579727173,
|
|
"fcm_dpo/q_t": 0.4165031909942627,
|
|
"grad_norm": 2980.169921875,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 0.11696229875087738,
|
|
"logits/rejected": 0.08666031062602997,
|
|
"logps/chosen": -52.99673080444336,
|
|
"logps/ref_chosen": -52.85643768310547,
|
|
"logps/ref_rejected": -72.17460632324219,
|
|
"logps/rejected": -72.42768859863281,
|
|
"loss": 2.2159,
|
|
"margin_dpo/margin_mean": 0.11278638243675232,
|
|
"margin_dpo/margin_std": 0.39860716462135315,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07709750566893424,
|
|
"fcm_dpo/beta": 8.363540649414062,
|
|
"fcm_dpo/delta": -0.5726056098937988,
|
|
"fcm_dpo/margin": 0.15886437892913818,
|
|
"fcm_dpo/q_t": 0.4000622630119324,
|
|
"grad_norm": 2550.51953125,
|
|
"learning_rate": 3.7313432835820895e-07,
|
|
"logits/chosen": 0.08093120157718658,
|
|
"logits/rejected": 0.05302312225103378,
|
|
"logps/chosen": -63.80950927734375,
|
|
"logps/ref_chosen": -63.65644073486328,
|
|
"logps/ref_rejected": -86.13229370117188,
|
|
"logps/rejected": -86.44422912597656,
|
|
"loss": 1.8986,
|
|
"margin_dpo/margin_mean": 0.15886464715003967,
|
|
"margin_dpo/margin_std": 0.420296311378479,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.07860922146636433,
|
|
"fcm_dpo/beta": 8.45156478881836,
|
|
"fcm_dpo/delta": 0.09679965674877167,
|
|
"fcm_dpo/margin": 0.10791899263858795,
|
|
"fcm_dpo/q_t": 0.4053017497062683,
|
|
"grad_norm": 3315.59912109375,
|
|
"learning_rate": 3.805970149253731e-07,
|
|
"logits/chosen": 0.07245868444442749,
|
|
"logits/rejected": 0.023480474948883057,
|
|
"logps/chosen": -68.0364990234375,
|
|
"logps/ref_chosen": -67.8402099609375,
|
|
"logps/ref_rejected": -96.97090911865234,
|
|
"logps/rejected": -97.27511596679688,
|
|
"loss": 2.2996,
|
|
"margin_dpo/margin_mean": 0.10791890323162079,
|
|
"margin_dpo/margin_std": 0.4656970500946045,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.0801209372637944,
|
|
"fcm_dpo/beta": 8.50004768371582,
|
|
"fcm_dpo/delta": 0.018193505704402924,
|
|
"fcm_dpo/margin": 0.11565050482749939,
|
|
"fcm_dpo/q_t": 0.371160626411438,
|
|
"grad_norm": 2222.445556640625,
|
|
"learning_rate": 3.880597014925373e-07,
|
|
"logits/chosen": 0.07711566984653473,
|
|
"logits/rejected": 0.06644274294376373,
|
|
"logps/chosen": -57.06134796142578,
|
|
"logps/ref_chosen": -56.87813949584961,
|
|
"logps/ref_rejected": -60.75569152832031,
|
|
"logps/rejected": -61.05455017089844,
|
|
"loss": 1.7873,
|
|
"margin_dpo/margin_mean": 0.11565083265304565,
|
|
"margin_dpo/margin_std": 0.32278263568878174,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.08163265306122448,
|
|
"fcm_dpo/beta": 8.553638458251953,
|
|
"fcm_dpo/delta": 0.20764021575450897,
|
|
"fcm_dpo/margin": 0.09295859932899475,
|
|
"fcm_dpo/q_t": 0.40855568647384644,
|
|
"grad_norm": 2532.174072265625,
|
|
"learning_rate": 3.9552238805970144e-07,
|
|
"logits/chosen": 0.03949524462223053,
|
|
"logits/rejected": 0.024343054741621017,
|
|
"logps/chosen": -47.497901916503906,
|
|
"logps/ref_chosen": -47.26692199707031,
|
|
"logps/ref_rejected": -62.19426727294922,
|
|
"logps/rejected": -62.51820373535156,
|
|
"loss": 2.2098,
|
|
"margin_dpo/margin_mean": 0.09295853972434998,
|
|
"margin_dpo/margin_std": 0.3836787939071655,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 7.794929027557373,
|
|
"fcm_dpo/delta": -0.9819808006286621,
|
|
"fcm_dpo/margin": 0.23201557993888855,
|
|
"fcm_dpo/q_t": 0.3249555230140686,
|
|
"grad_norm": 2186.456298828125,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 0.029716331511735916,
|
|
"logits/rejected": -0.04705657809972763,
|
|
"logps/chosen": -50.511959075927734,
|
|
"logps/ref_chosen": -50.32619094848633,
|
|
"logps/ref_rejected": -92.44389343261719,
|
|
"logps/rejected": -92.8616714477539,
|
|
"loss": 1.4207,
|
|
"margin_dpo/margin_mean": 0.2320151925086975,
|
|
"margin_dpo/margin_std": 0.42721158266067505,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08465608465608465,
|
|
"fcm_dpo/beta": 7.265107154846191,
|
|
"fcm_dpo/delta": -0.08848509937524796,
|
|
"fcm_dpo/margin": 0.1485108733177185,
|
|
"fcm_dpo/q_t": 0.32371559739112854,
|
|
"grad_norm": 1776.4510498046875,
|
|
"learning_rate": 4.1044776119402984e-07,
|
|
"logits/chosen": 0.12203441560268402,
|
|
"logits/rejected": 0.09960527718067169,
|
|
"logps/chosen": -56.979713439941406,
|
|
"logps/ref_chosen": -56.766971588134766,
|
|
"logps/ref_rejected": -66.30504608154297,
|
|
"logps/rejected": -66.66629791259766,
|
|
"loss": 1.5337,
|
|
"margin_dpo/margin_mean": 0.1485109031200409,
|
|
"margin_dpo/margin_std": 0.36086180806159973,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08616780045351474,
|
|
"fcm_dpo/beta": 7.139953136444092,
|
|
"fcm_dpo/delta": -0.0027963966131210327,
|
|
"fcm_dpo/margin": 0.14022627472877502,
|
|
"fcm_dpo/q_t": 0.3702242970466614,
|
|
"grad_norm": 2208.6005859375,
|
|
"learning_rate": 4.17910447761194e-07,
|
|
"logits/chosen": 0.09269669651985168,
|
|
"logits/rejected": 0.02815322019159794,
|
|
"logps/chosen": -58.0312614440918,
|
|
"logps/ref_chosen": -57.76774597167969,
|
|
"logps/ref_rejected": -82.75698852539062,
|
|
"logps/rejected": -83.16073608398438,
|
|
"loss": 1.9812,
|
|
"margin_dpo/margin_mean": 0.14022645354270935,
|
|
"margin_dpo/margin_std": 0.4531812369823456,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08767951625094482,
|
|
"fcm_dpo/beta": 6.859474182128906,
|
|
"fcm_dpo/delta": -0.0021596550941467285,
|
|
"fcm_dpo/margin": 0.13939353823661804,
|
|
"fcm_dpo/q_t": 0.4166187345981598,
|
|
"grad_norm": 2155.36083984375,
|
|
"learning_rate": 4.253731343283582e-07,
|
|
"logits/chosen": 0.04018617421388626,
|
|
"logits/rejected": 0.024990694597363472,
|
|
"logps/chosen": -72.99790954589844,
|
|
"logps/ref_chosen": -72.76408386230469,
|
|
"logps/ref_rejected": -84.49275207519531,
|
|
"logps/rejected": -84.865966796875,
|
|
"loss": 2.1325,
|
|
"margin_dpo/margin_mean": 0.13939306139945984,
|
|
"margin_dpo/margin_std": 0.4941212832927704,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08919123204837491,
|
|
"fcm_dpo/beta": 7.301891326904297,
|
|
"fcm_dpo/delta": -0.22430884838104248,
|
|
"fcm_dpo/margin": 0.15986737608909607,
|
|
"fcm_dpo/q_t": 0.36295658349990845,
|
|
"grad_norm": 1929.1551513671875,
|
|
"learning_rate": 4.3283582089552234e-07,
|
|
"logits/chosen": 0.10927902162075043,
|
|
"logits/rejected": 0.04400138556957245,
|
|
"logps/chosen": -50.06452178955078,
|
|
"logps/ref_chosen": -49.820777893066406,
|
|
"logps/ref_rejected": -77.14368438720703,
|
|
"logps/rejected": -77.54730224609375,
|
|
"loss": 1.5376,
|
|
"margin_dpo/margin_mean": 0.15986764430999756,
|
|
"margin_dpo/margin_std": 0.36004385352134705,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 7.388426780700684,
|
|
"fcm_dpo/delta": 0.45878517627716064,
|
|
"fcm_dpo/margin": 0.07655715942382812,
|
|
"fcm_dpo/q_t": 0.44889310002326965,
|
|
"grad_norm": 2771.480712890625,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 0.09602642804384232,
|
|
"logits/rejected": 0.09458990395069122,
|
|
"logps/chosen": -63.469207763671875,
|
|
"logps/ref_chosen": -63.22477340698242,
|
|
"logps/ref_rejected": -61.360477447509766,
|
|
"logps/rejected": -61.68146896362305,
|
|
"loss": 2.1317,
|
|
"margin_dpo/margin_mean": 0.07655695080757141,
|
|
"margin_dpo/margin_std": 0.3793250024318695,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.09221466364323508,
|
|
"fcm_dpo/beta": 7.838181495666504,
|
|
"fcm_dpo/delta": 0.23324143886566162,
|
|
"fcm_dpo/margin": 0.09999506175518036,
|
|
"fcm_dpo/q_t": 0.40534743666648865,
|
|
"grad_norm": 2478.5703125,
|
|
"learning_rate": 4.4776119402985074e-07,
|
|
"logits/chosen": 0.12898309528827667,
|
|
"logits/rejected": 0.0962858498096466,
|
|
"logps/chosen": -49.336029052734375,
|
|
"logps/ref_chosen": -49.01679992675781,
|
|
"logps/ref_rejected": -74.90817260742188,
|
|
"logps/rejected": -75.327392578125,
|
|
"loss": 2.1133,
|
|
"margin_dpo/margin_mean": 0.09999510645866394,
|
|
"margin_dpo/margin_std": 0.3832091987133026,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09372637944066516,
|
|
"fcm_dpo/beta": 7.966916084289551,
|
|
"fcm_dpo/delta": -0.13704264163970947,
|
|
"fcm_dpo/margin": 0.13938570022583008,
|
|
"fcm_dpo/q_t": 0.36962825059890747,
|
|
"grad_norm": 2647.16162109375,
|
|
"learning_rate": 4.552238805970149e-07,
|
|
"logits/chosen": 0.11322169005870819,
|
|
"logits/rejected": 0.07340162247419357,
|
|
"logps/chosen": -63.07339859008789,
|
|
"logps/ref_chosen": -62.751869201660156,
|
|
"logps/ref_rejected": -78.93360900878906,
|
|
"logps/rejected": -79.39452362060547,
|
|
"loss": 2.0748,
|
|
"margin_dpo/margin_mean": 0.13938573002815247,
|
|
"margin_dpo/margin_std": 0.46554332971572876,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"fcm_dpo/beta": 7.405724048614502,
|
|
"fcm_dpo/delta": -0.43855780363082886,
|
|
"fcm_dpo/margin": 0.1857583224773407,
|
|
"fcm_dpo/q_t": 0.3208463490009308,
|
|
"grad_norm": 2495.121826171875,
|
|
"learning_rate": 4.626865671641791e-07,
|
|
"logits/chosen": 0.17758890986442566,
|
|
"logits/rejected": 0.15252208709716797,
|
|
"logps/chosen": -60.78285598754883,
|
|
"logps/ref_chosen": -60.51525115966797,
|
|
"logps/ref_rejected": -85.11021423339844,
|
|
"logps/rejected": -85.56358337402344,
|
|
"loss": 1.4936,
|
|
"margin_dpo/margin_mean": 0.1857585310935974,
|
|
"margin_dpo/margin_std": 0.3698121905326843,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09674981103552532,
|
|
"fcm_dpo/beta": 7.346306324005127,
|
|
"fcm_dpo/delta": 0.42287638783454895,
|
|
"fcm_dpo/margin": 0.08198145031929016,
|
|
"fcm_dpo/q_t": 0.44880834221839905,
|
|
"grad_norm": 2452.107421875,
|
|
"learning_rate": 4.701492537313433e-07,
|
|
"logits/chosen": 0.07220865786075592,
|
|
"logits/rejected": 0.047520771622657776,
|
|
"logps/chosen": -51.5291748046875,
|
|
"logps/ref_chosen": -51.20684814453125,
|
|
"logps/ref_rejected": -66.93081665039062,
|
|
"logps/rejected": -67.33512878417969,
|
|
"loss": 2.1847,
|
|
"margin_dpo/margin_mean": 0.08198148012161255,
|
|
"margin_dpo/margin_std": 0.39635199308395386,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 6.500675201416016,
|
|
"fcm_dpo/delta": -1.11879301071167,
|
|
"fcm_dpo/margin": 0.2926085889339447,
|
|
"fcm_dpo/q_t": 0.2865545451641083,
|
|
"grad_norm": 2000.08251953125,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 0.1627321094274521,
|
|
"logits/rejected": 0.13373470306396484,
|
|
"logps/chosen": -67.60956573486328,
|
|
"logps/ref_chosen": -67.2886962890625,
|
|
"logps/ref_rejected": -74.44281005859375,
|
|
"logps/rejected": -75.05628967285156,
|
|
"loss": 1.3075,
|
|
"margin_dpo/margin_mean": 0.29260820150375366,
|
|
"margin_dpo/margin_std": 0.4580235481262207,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09977324263038549,
|
|
"fcm_dpo/beta": 6.545133590698242,
|
|
"fcm_dpo/delta": 0.5023878216743469,
|
|
"fcm_dpo/margin": 0.08048596978187561,
|
|
"fcm_dpo/q_t": 0.435330331325531,
|
|
"grad_norm": 2383.31005859375,
|
|
"learning_rate": 4.850746268656717e-07,
|
|
"logits/chosen": 0.08348944783210754,
|
|
"logits/rejected": 0.05969405546784401,
|
|
"logps/chosen": -71.08229064941406,
|
|
"logps/ref_chosen": -70.743408203125,
|
|
"logps/ref_rejected": -77.26499938964844,
|
|
"logps/rejected": -77.68437194824219,
|
|
"loss": 1.9852,
|
|
"margin_dpo/margin_mean": 0.08048596978187561,
|
|
"margin_dpo/margin_std": 0.4329206943511963,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.10128495842781557,
|
|
"fcm_dpo/beta": 6.768010139465332,
|
|
"fcm_dpo/delta": 0.0033745458349585533,
|
|
"fcm_dpo/margin": 0.1473006308078766,
|
|
"fcm_dpo/q_t": 0.39285334944725037,
|
|
"grad_norm": 2144.995849609375,
|
|
"learning_rate": 4.925373134328357e-07,
|
|
"logits/chosen": 0.06257347017526627,
|
|
"logits/rejected": 0.007243716157972813,
|
|
"logps/chosen": -60.883270263671875,
|
|
"logps/ref_chosen": -60.60260009765625,
|
|
"logps/ref_rejected": -75.22235870361328,
|
|
"logps/rejected": -75.65032958984375,
|
|
"loss": 1.6865,
|
|
"margin_dpo/margin_mean": 0.1473003625869751,
|
|
"margin_dpo/margin_std": 0.44022125005722046,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.10279667422524566,
|
|
"fcm_dpo/beta": 6.251596450805664,
|
|
"fcm_dpo/delta": -0.4169546067714691,
|
|
"fcm_dpo/margin": 0.21536040306091309,
|
|
"fcm_dpo/q_t": 0.33271753787994385,
|
|
"grad_norm": 1849.2083740234375,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 0.04055653512477875,
|
|
"logits/rejected": 0.01153562217950821,
|
|
"logps/chosen": -77.89004516601562,
|
|
"logps/ref_chosen": -77.52836608886719,
|
|
"logps/ref_rejected": -93.17778015136719,
|
|
"logps/rejected": -93.75480651855469,
|
|
"loss": 1.5844,
|
|
"margin_dpo/margin_mean": 0.21536031365394592,
|
|
"margin_dpo/margin_std": 0.4642139673233032,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.10430839002267574,
|
|
"fcm_dpo/beta": 6.393548488616943,
|
|
"fcm_dpo/delta": -0.062498897314071655,
|
|
"fcm_dpo/margin": 0.16213825345039368,
|
|
"fcm_dpo/q_t": 0.3610564172267914,
|
|
"grad_norm": 1912.0982666015625,
|
|
"learning_rate": 4.999965034812934e-07,
|
|
"logits/chosen": 0.06711920350790024,
|
|
"logits/rejected": 0.02523168735206127,
|
|
"logps/chosen": -66.31640625,
|
|
"logps/ref_chosen": -65.94305419921875,
|
|
"logps/ref_rejected": -89.7735595703125,
|
|
"logps/rejected": -90.30905151367188,
|
|
"loss": 1.8845,
|
|
"margin_dpo/margin_mean": 0.16213801503181458,
|
|
"margin_dpo/margin_std": 0.45384150743484497,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 5.698063850402832,
|
|
"fcm_dpo/delta": -0.3884986937046051,
|
|
"fcm_dpo/margin": 0.13598540425300598,
|
|
"fcm_dpo/q_t": 0.3947943449020386,
|
|
"grad_norm": 1745.823486328125,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 0.11856390535831451,
|
|
"logits/rejected": 0.09516175091266632,
|
|
"logps/chosen": -62.308258056640625,
|
|
"logps/ref_chosen": -61.95791244506836,
|
|
"logps/ref_rejected": -75.80945587158203,
|
|
"logps/rejected": -76.29579162597656,
|
|
"loss": 1.7483,
|
|
"margin_dpo/margin_mean": 0.1359853446483612,
|
|
"margin_dpo/margin_std": 0.397053986787796,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1073318216175359,
|
|
"fcm_dpo/beta": 6.111842632293701,
|
|
"fcm_dpo/delta": 0.5105581283569336,
|
|
"fcm_dpo/margin": 0.08478209376335144,
|
|
"fcm_dpo/q_t": 0.4329409897327423,
|
|
"grad_norm": 2060.398193359375,
|
|
"learning_rate": 4.999685319184688e-07,
|
|
"logits/chosen": 0.0789206326007843,
|
|
"logits/rejected": 0.06302116811275482,
|
|
"logps/chosen": -63.77684020996094,
|
|
"logps/ref_chosen": -63.34757995605469,
|
|
"logps/ref_rejected": -67.49658203125,
|
|
"logps/rejected": -68.0106201171875,
|
|
"loss": 2.1644,
|
|
"margin_dpo/margin_mean": 0.08478212356567383,
|
|
"margin_dpo/margin_std": 0.4536302983760834,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10884353741496598,
|
|
"fcm_dpo/beta": 5.628780364990234,
|
|
"fcm_dpo/delta": -0.7731190919876099,
|
|
"fcm_dpo/margin": 0.29211413860321045,
|
|
"fcm_dpo/q_t": 0.3161046504974365,
|
|
"grad_norm": 1577.560302734375,
|
|
"learning_rate": 4.999440576567755e-07,
|
|
"logits/chosen": 0.0920741856098175,
|
|
"logits/rejected": 0.029652319848537445,
|
|
"logps/chosen": -56.157230377197266,
|
|
"logps/ref_chosen": -55.85929870605469,
|
|
"logps/ref_rejected": -68.45423889160156,
|
|
"logps/rejected": -69.04428100585938,
|
|
"loss": 1.1926,
|
|
"margin_dpo/margin_mean": 0.29211464524269104,
|
|
"margin_dpo/margin_std": 0.4848480224609375,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.11035525321239607,
|
|
"fcm_dpo/beta": 5.599390029907227,
|
|
"fcm_dpo/delta": 0.03130987286567688,
|
|
"fcm_dpo/margin": 0.17031516134738922,
|
|
"fcm_dpo/q_t": 0.40101659297943115,
|
|
"grad_norm": 1934.7177734375,
|
|
"learning_rate": 4.999125919224965e-07,
|
|
"logits/chosen": 0.06832102686166763,
|
|
"logits/rejected": 0.05396275222301483,
|
|
"logps/chosen": -69.59184265136719,
|
|
"logps/ref_chosen": -69.13880920410156,
|
|
"logps/ref_rejected": -79.04586791992188,
|
|
"logps/rejected": -79.66921997070312,
|
|
"loss": 1.8098,
|
|
"margin_dpo/margin_mean": 0.17031550407409668,
|
|
"margin_dpo/margin_std": 0.48382318019866943,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.11186696900982615,
|
|
"fcm_dpo/beta": 5.374726295471191,
|
|
"fcm_dpo/delta": -0.2641603350639343,
|
|
"fcm_dpo/margin": 0.22797469794750214,
|
|
"fcm_dpo/q_t": 0.36758503317832947,
|
|
"grad_norm": 1349.6485595703125,
|
|
"learning_rate": 4.998741355957963e-07,
|
|
"logits/chosen": 0.09487976133823395,
|
|
"logits/rejected": 0.04454671964049339,
|
|
"logps/chosen": -50.26209259033203,
|
|
"logps/ref_chosen": -49.923736572265625,
|
|
"logps/ref_rejected": -81.73213958740234,
|
|
"logps/rejected": -82.2984619140625,
|
|
"loss": 1.4011,
|
|
"margin_dpo/margin_mean": 0.22797417640686035,
|
|
"margin_dpo/margin_std": 0.46810248494148254,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 4.875063896179199,
|
|
"fcm_dpo/delta": -0.2485802173614502,
|
|
"fcm_dpo/margin": 0.24743963778018951,
|
|
"fcm_dpo/q_t": 0.34337544441223145,
|
|
"grad_norm": 1073.2872314453125,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 0.07122410833835602,
|
|
"logits/rejected": 0.04160505533218384,
|
|
"logps/chosen": -46.41517639160156,
|
|
"logps/ref_chosen": -46.06875228881836,
|
|
"logps/ref_rejected": -66.1181411743164,
|
|
"logps/rejected": -66.71200561523438,
|
|
"loss": 1.2434,
|
|
"margin_dpo/margin_mean": 0.24743930995464325,
|
|
"margin_dpo/margin_std": 0.4698425531387329,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11489040060468632,
|
|
"fcm_dpo/beta": 5.057272911071777,
|
|
"fcm_dpo/delta": 0.1727055013179779,
|
|
"fcm_dpo/margin": 0.16627338528633118,
|
|
"fcm_dpo/q_t": 0.3987014889717102,
|
|
"grad_norm": 1406.1661376953125,
|
|
"learning_rate": 4.997762556634679e-07,
|
|
"logits/chosen": 0.10183432698249817,
|
|
"logits/rejected": 0.058754947036504745,
|
|
"logps/chosen": -54.42407989501953,
|
|
"logps/ref_chosen": -54.06275177001953,
|
|
"logps/ref_rejected": -74.87464141845703,
|
|
"logps/rejected": -75.4022445678711,
|
|
"loss": 1.5553,
|
|
"margin_dpo/margin_mean": 0.16627269983291626,
|
|
"margin_dpo/margin_std": 0.4644964933395386,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1164021164021164,
|
|
"fcm_dpo/beta": 5.224140167236328,
|
|
"fcm_dpo/delta": 0.3271293044090271,
|
|
"fcm_dpo/margin": 0.1326499581336975,
|
|
"fcm_dpo/q_t": 0.41528427600860596,
|
|
"grad_norm": 1620.2890625,
|
|
"learning_rate": 4.99716834795752e-07,
|
|
"logits/chosen": 0.10804985463619232,
|
|
"logits/rejected": 0.06888192892074585,
|
|
"logps/chosen": -53.52935028076172,
|
|
"logps/ref_chosen": -53.07609176635742,
|
|
"logps/ref_rejected": -74.45601654052734,
|
|
"logps/rejected": -75.04192352294922,
|
|
"loss": 1.6511,
|
|
"margin_dpo/margin_mean": 0.1326504349708557,
|
|
"margin_dpo/margin_std": 0.44195854663848877,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.11791383219954649,
|
|
"fcm_dpo/beta": 5.861191749572754,
|
|
"fcm_dpo/delta": 0.4837532937526703,
|
|
"fcm_dpo/margin": 0.09206165373325348,
|
|
"fcm_dpo/q_t": 0.40167397260665894,
|
|
"grad_norm": 2161.646240234375,
|
|
"learning_rate": 4.996504288113623e-07,
|
|
"logits/chosen": 0.039431821554899216,
|
|
"logits/rejected": 0.01997038722038269,
|
|
"logps/chosen": -68.12590026855469,
|
|
"logps/ref_chosen": -67.72541809082031,
|
|
"logps/ref_rejected": -79.03926849365234,
|
|
"logps/rejected": -79.53181457519531,
|
|
"loss": 2.4878,
|
|
"margin_dpo/margin_mean": 0.09206125140190125,
|
|
"margin_dpo/margin_std": 0.583328366279602,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11942554799697656,
|
|
"fcm_dpo/beta": 6.234781265258789,
|
|
"fcm_dpo/delta": 0.15417495369911194,
|
|
"fcm_dpo/margin": 0.13618767261505127,
|
|
"fcm_dpo/q_t": 0.389265775680542,
|
|
"grad_norm": 1901.2857666015625,
|
|
"learning_rate": 4.995770395678171e-07,
|
|
"logits/chosen": 0.150520920753479,
|
|
"logits/rejected": 0.09114135801792145,
|
|
"logps/chosen": -52.598140716552734,
|
|
"logps/ref_chosen": -52.16064453125,
|
|
"logps/ref_rejected": -83.31062316894531,
|
|
"logps/rejected": -83.8843002319336,
|
|
"loss": 1.8135,
|
|
"margin_dpo/margin_mean": 0.13618725538253784,
|
|
"margin_dpo/margin_std": 0.4358598589897156,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 6.20821475982666,
|
|
"fcm_dpo/delta": -0.13912838697433472,
|
|
"fcm_dpo/margin": 0.17933352291584015,
|
|
"fcm_dpo/q_t": 0.37054312229156494,
|
|
"grad_norm": 1953.0830078125,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 0.09082512557506561,
|
|
"logits/rejected": 0.035024385899305344,
|
|
"logps/chosen": -61.85608673095703,
|
|
"logps/ref_chosen": -61.410560607910156,
|
|
"logps/ref_rejected": -78.66004943847656,
|
|
"logps/rejected": -79.284912109375,
|
|
"loss": 1.7668,
|
|
"margin_dpo/margin_mean": 0.1793329417705536,
|
|
"margin_dpo/margin_std": 0.4744713306427002,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12244897959183673,
|
|
"fcm_dpo/beta": 5.641842842102051,
|
|
"fcm_dpo/delta": -0.34320950508117676,
|
|
"fcm_dpo/margin": 0.2292810082435608,
|
|
"fcm_dpo/q_t": 0.34941136837005615,
|
|
"grad_norm": 1636.6348876953125,
|
|
"learning_rate": 4.994093197099587e-07,
|
|
"logits/chosen": 0.07053244113922119,
|
|
"logits/rejected": 0.03839043155312538,
|
|
"logps/chosen": -64.20402526855469,
|
|
"logps/ref_chosen": -63.80437088012695,
|
|
"logps/ref_rejected": -79.3484115600586,
|
|
"logps/rejected": -79.97734069824219,
|
|
"loss": 1.4575,
|
|
"margin_dpo/margin_mean": 0.22928106784820557,
|
|
"margin_dpo/margin_std": 0.4831709563732147,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12396069538926682,
|
|
"fcm_dpo/beta": 5.125918388366699,
|
|
"fcm_dpo/delta": -0.6294834017753601,
|
|
"fcm_dpo/margin": 0.29933592677116394,
|
|
"fcm_dpo/q_t": 0.2602464258670807,
|
|
"grad_norm": 1189.437255859375,
|
|
"learning_rate": 4.993149937871306e-07,
|
|
"logits/chosen": 0.07535186409950256,
|
|
"logits/rejected": 0.013600241392850876,
|
|
"logps/chosen": -49.17131805419922,
|
|
"logps/ref_chosen": -48.817893981933594,
|
|
"logps/ref_rejected": -70.31497955322266,
|
|
"logps/rejected": -70.96774291992188,
|
|
"loss": 1.0653,
|
|
"margin_dpo/margin_mean": 0.2993359863758087,
|
|
"margin_dpo/margin_std": 0.41738319396972656,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.1254724111866969,
|
|
"fcm_dpo/beta": 4.8786211013793945,
|
|
"fcm_dpo/delta": -0.19270329177379608,
|
|
"fcm_dpo/margin": 0.23940634727478027,
|
|
"fcm_dpo/q_t": 0.3403007388114929,
|
|
"grad_norm": 1305.392333984375,
|
|
"learning_rate": 4.992136939879856e-07,
|
|
"logits/chosen": 0.1480909138917923,
|
|
"logits/rejected": 0.09921949356794357,
|
|
"logps/chosen": -57.55104064941406,
|
|
"logps/ref_chosen": -57.15077209472656,
|
|
"logps/ref_rejected": -75.1710205078125,
|
|
"logps/rejected": -75.81069946289062,
|
|
"loss": 1.3796,
|
|
"margin_dpo/margin_mean": 0.23940622806549072,
|
|
"margin_dpo/margin_std": 0.48909199237823486,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"fcm_dpo/beta": 5.00314998626709,
|
|
"fcm_dpo/delta": 0.3583996295928955,
|
|
"fcm_dpo/margin": 0.13308590650558472,
|
|
"fcm_dpo/q_t": 0.4098473787307739,
|
|
"grad_norm": 1782.189208984375,
|
|
"learning_rate": 4.991054231460969e-07,
|
|
"logits/chosen": 0.12158288061618805,
|
|
"logits/rejected": 0.08126094937324524,
|
|
"logps/chosen": -65.28231048583984,
|
|
"logps/ref_chosen": -64.77729797363281,
|
|
"logps/ref_rejected": -84.71949768066406,
|
|
"logps/rejected": -85.35758972167969,
|
|
"loss": 2.005,
|
|
"margin_dpo/margin_mean": 0.1330859661102295,
|
|
"margin_dpo/margin_std": 0.5464334487915039,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 4.760544300079346,
|
|
"fcm_dpo/delta": -0.5031009912490845,
|
|
"fcm_dpo/margin": 0.3008922040462494,
|
|
"fcm_dpo/q_t": 0.3266737163066864,
|
|
"grad_norm": 1381.7164306640625,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 0.10575494170188904,
|
|
"logits/rejected": 0.06379462033510208,
|
|
"logps/chosen": -50.625328063964844,
|
|
"logps/ref_chosen": -50.25169372558594,
|
|
"logps/ref_rejected": -66.55439758300781,
|
|
"logps/rejected": -67.22891235351562,
|
|
"loss": 1.3231,
|
|
"margin_dpo/margin_mean": 0.30089178681373596,
|
|
"margin_dpo/margin_std": 0.5666717290878296,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.13000755857898716,
|
|
"fcm_dpo/beta": 4.623910427093506,
|
|
"fcm_dpo/delta": -0.011702943593263626,
|
|
"fcm_dpo/margin": 0.21848827600479126,
|
|
"fcm_dpo/q_t": 0.3801780939102173,
|
|
"grad_norm": 1446.2276611328125,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 0.11094961315393448,
|
|
"logits/rejected": 0.09372542053461075,
|
|
"logps/chosen": -61.207275390625,
|
|
"logps/ref_chosen": -60.72917938232422,
|
|
"logps/ref_rejected": -72.30961608886719,
|
|
"logps/rejected": -73.0062026977539,
|
|
"loss": 1.5695,
|
|
"margin_dpo/margin_mean": 0.2184884250164032,
|
|
"margin_dpo/margin_std": 0.5658543109893799,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.13151927437641722,
|
|
"fcm_dpo/beta": 4.357776641845703,
|
|
"fcm_dpo/delta": -0.31460562348365784,
|
|
"fcm_dpo/margin": 0.2916297912597656,
|
|
"fcm_dpo/q_t": 0.3396541476249695,
|
|
"grad_norm": 1350.0787353515625,
|
|
"learning_rate": 4.987388156241114e-07,
|
|
"logits/chosen": 0.08588653057813644,
|
|
"logits/rejected": 0.03219534084200859,
|
|
"logps/chosen": -66.21820831298828,
|
|
"logps/ref_chosen": -65.75796508789062,
|
|
"logps/ref_rejected": -84.81159973144531,
|
|
"logps/rejected": -85.56346130371094,
|
|
"loss": 1.3305,
|
|
"margin_dpo/margin_mean": 0.29163050651550293,
|
|
"margin_dpo/margin_std": 0.5484292507171631,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.1330309901738473,
|
|
"fcm_dpo/beta": 4.051568984985352,
|
|
"fcm_dpo/delta": -0.3438121974468231,
|
|
"fcm_dpo/margin": 0.3187229037284851,
|
|
"fcm_dpo/q_t": 0.3379303812980652,
|
|
"grad_norm": 1034.83447265625,
|
|
"learning_rate": 4.986026928455767e-07,
|
|
"logits/chosen": 0.1511228084564209,
|
|
"logits/rejected": 0.12557803094387054,
|
|
"logps/chosen": -63.23377227783203,
|
|
"logps/ref_chosen": -62.82402801513672,
|
|
"logps/ref_rejected": -74.9607162475586,
|
|
"logps/rejected": -75.68917846679688,
|
|
"loss": 1.2325,
|
|
"margin_dpo/margin_mean": 0.31872305274009705,
|
|
"margin_dpo/margin_std": 0.5725570917129517,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.1345427059712774,
|
|
"fcm_dpo/beta": 4.111684799194336,
|
|
"fcm_dpo/delta": -0.005461782217025757,
|
|
"fcm_dpo/margin": 0.2426835298538208,
|
|
"fcm_dpo/q_t": 0.36708956956863403,
|
|
"grad_norm": 1158.6134033203125,
|
|
"learning_rate": 4.984596161153135e-07,
|
|
"logits/chosen": 0.18977168202400208,
|
|
"logits/rejected": 0.11028344929218292,
|
|
"logps/chosen": -41.57792663574219,
|
|
"logps/ref_chosen": -41.191436767578125,
|
|
"logps/ref_rejected": -85.44769287109375,
|
|
"logps/rejected": -86.07687377929688,
|
|
"loss": 1.5568,
|
|
"margin_dpo/margin_mean": 0.24268493056297302,
|
|
"margin_dpo/margin_std": 0.5871646404266357,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 3.9873642921447754,
|
|
"fcm_dpo/delta": -0.07796984910964966,
|
|
"fcm_dpo/margin": 0.26826444268226624,
|
|
"fcm_dpo/q_t": 0.3403598964214325,
|
|
"grad_norm": 1097.566650390625,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 0.08516630530357361,
|
|
"logits/rejected": 0.03322757035493851,
|
|
"logps/chosen": -56.98159408569336,
|
|
"logps/ref_chosen": -56.58390808105469,
|
|
"logps/ref_rejected": -86.86978149414062,
|
|
"logps/rejected": -87.53573608398438,
|
|
"loss": 1.3846,
|
|
"margin_dpo/margin_mean": 0.26826387643814087,
|
|
"margin_dpo/margin_std": 0.6303993463516235,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13756613756613756,
|
|
"fcm_dpo/beta": 3.869006633758545,
|
|
"fcm_dpo/delta": -0.14137369394302368,
|
|
"fcm_dpo/margin": 0.2906471788883209,
|
|
"fcm_dpo/q_t": 0.34270262718200684,
|
|
"grad_norm": 1045.7557373046875,
|
|
"learning_rate": 4.98152617002662e-07,
|
|
"logits/chosen": 0.10475227236747742,
|
|
"logits/rejected": 0.06226480007171631,
|
|
"logps/chosen": -52.82066345214844,
|
|
"logps/ref_chosen": -52.38234329223633,
|
|
"logps/ref_rejected": -72.17642211914062,
|
|
"logps/rejected": -72.90538787841797,
|
|
"loss": 1.3782,
|
|
"margin_dpo/margin_mean": 0.29064705967903137,
|
|
"margin_dpo/margin_std": 0.6195999383926392,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13907785336356765,
|
|
"fcm_dpo/beta": 4.068203926086426,
|
|
"fcm_dpo/delta": 0.24122354388237,
|
|
"fcm_dpo/margin": 0.1886722892522812,
|
|
"fcm_dpo/q_t": 0.40757930278778076,
|
|
"grad_norm": 1103.524658203125,
|
|
"learning_rate": 4.979887032076988e-07,
|
|
"logits/chosen": 0.13855835795402527,
|
|
"logits/rejected": 0.10122767090797424,
|
|
"logps/chosen": -53.529884338378906,
|
|
"logps/ref_chosen": -53.00870132446289,
|
|
"logps/ref_rejected": -79.77812957763672,
|
|
"logps/rejected": -80.48798370361328,
|
|
"loss": 1.5458,
|
|
"margin_dpo/margin_mean": 0.18867191672325134,
|
|
"margin_dpo/margin_std": 0.5168828368186951,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.14058956916099774,
|
|
"fcm_dpo/beta": 4.217402458190918,
|
|
"fcm_dpo/delta": 0.23529532551765442,
|
|
"fcm_dpo/margin": 0.1851963996887207,
|
|
"fcm_dpo/q_t": 0.3896501064300537,
|
|
"grad_norm": 1094.4332275390625,
|
|
"learning_rate": 4.978178526356172e-07,
|
|
"logits/chosen": 0.10360229760408401,
|
|
"logits/rejected": 0.07716604322195053,
|
|
"logps/chosen": -45.443016052246094,
|
|
"logps/ref_chosen": -44.90705108642578,
|
|
"logps/ref_rejected": -58.7879524230957,
|
|
"logps/rejected": -59.50910949707031,
|
|
"loss": 1.5698,
|
|
"margin_dpo/margin_mean": 0.185196191072464,
|
|
"margin_dpo/margin_std": 0.5235726237297058,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.1421012849584278,
|
|
"fcm_dpo/beta": 3.851269006729126,
|
|
"fcm_dpo/delta": -0.4641948938369751,
|
|
"fcm_dpo/margin": 0.3565562069416046,
|
|
"fcm_dpo/q_t": 0.2976396679878235,
|
|
"grad_norm": 884.6483764648438,
|
|
"learning_rate": 4.976400700654751e-07,
|
|
"logits/chosen": 0.16318684816360474,
|
|
"logits/rejected": 0.12688644230365753,
|
|
"logps/chosen": -60.265010833740234,
|
|
"logps/ref_chosen": -59.93777084350586,
|
|
"logps/ref_rejected": -79.3138427734375,
|
|
"logps/rejected": -79.99763488769531,
|
|
"loss": 1.2145,
|
|
"margin_dpo/margin_mean": 0.3565560281276703,
|
|
"margin_dpo/margin_std": 0.6176527142524719,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 3.51151704788208,
|
|
"fcm_dpo/delta": -0.556129515171051,
|
|
"fcm_dpo/margin": 0.41667065024375916,
|
|
"fcm_dpo/q_t": 0.27452635765075684,
|
|
"grad_norm": 725.6941528320312,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 0.04962404444813728,
|
|
"logits/rejected": -0.009035153314471245,
|
|
"logps/chosen": -60.609527587890625,
|
|
"logps/ref_chosen": -60.168487548828125,
|
|
"logps/ref_rejected": -90.73665618896484,
|
|
"logps/rejected": -91.59436798095703,
|
|
"loss": 0.8553,
|
|
"margin_dpo/margin_mean": 0.4166697561740875,
|
|
"margin_dpo/margin_std": 0.5119737386703491,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14512471655328799,
|
|
"fcm_dpo/beta": 3.3476004600524902,
|
|
"fcm_dpo/delta": -0.3292371928691864,
|
|
"fcm_dpo/margin": 0.38447052240371704,
|
|
"fcm_dpo/q_t": 0.3057492971420288,
|
|
"grad_norm": 765.777587890625,
|
|
"learning_rate": 4.972637290166157e-07,
|
|
"logits/chosen": 0.09734475612640381,
|
|
"logits/rejected": 0.05707583576440811,
|
|
"logps/chosen": -61.152687072753906,
|
|
"logps/ref_chosen": -60.66877746582031,
|
|
"logps/ref_rejected": -88.30673217773438,
|
|
"logps/rejected": -89.17510986328125,
|
|
"loss": 1.0612,
|
|
"margin_dpo/margin_mean": 0.3844701647758484,
|
|
"margin_dpo/margin_std": 0.554520845413208,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14663643235071808,
|
|
"fcm_dpo/beta": 3.2319469451904297,
|
|
"fcm_dpo/delta": 0.1403380036354065,
|
|
"fcm_dpo/margin": 0.2673853933811188,
|
|
"fcm_dpo/q_t": 0.3827268183231354,
|
|
"grad_norm": 1068.170654296875,
|
|
"learning_rate": 4.970651810649666e-07,
|
|
"logits/chosen": 0.04420812800526619,
|
|
"logits/rejected": 0.002325967885553837,
|
|
"logps/chosen": -65.61512756347656,
|
|
"logps/ref_chosen": -65.04412078857422,
|
|
"logps/ref_rejected": -78.42092895507812,
|
|
"logps/rejected": -79.25931549072266,
|
|
"loss": 1.3514,
|
|
"margin_dpo/margin_mean": 0.2673855423927307,
|
|
"margin_dpo/margin_std": 0.6004109382629395,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14814814814814814,
|
|
"fcm_dpo/beta": 3.42587947845459,
|
|
"fcm_dpo/delta": 0.1983877718448639,
|
|
"fcm_dpo/margin": 0.23879370093345642,
|
|
"fcm_dpo/q_t": 0.3823162317276001,
|
|
"grad_norm": 922.5922241210938,
|
|
"learning_rate": 4.968597221690985e-07,
|
|
"logits/chosen": 0.13439376652240753,
|
|
"logits/rejected": 0.10842472314834595,
|
|
"logps/chosen": -55.90753936767578,
|
|
"logps/ref_chosen": -55.503231048583984,
|
|
"logps/ref_rejected": -72.81553649902344,
|
|
"logps/rejected": -73.45864868164062,
|
|
"loss": 1.2262,
|
|
"margin_dpo/margin_mean": 0.23879370093345642,
|
|
"margin_dpo/margin_std": 0.4960702657699585,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14965986394557823,
|
|
"fcm_dpo/beta": 3.3395771980285645,
|
|
"fcm_dpo/delta": -0.2737107276916504,
|
|
"fcm_dpo/margin": 0.3713855743408203,
|
|
"fcm_dpo/q_t": 0.31995880603790283,
|
|
"grad_norm": 838.7493286132812,
|
|
"learning_rate": 4.966473580761389e-07,
|
|
"logits/chosen": 0.14741893112659454,
|
|
"logits/rejected": 0.11150997132062912,
|
|
"logps/chosen": -58.98797607421875,
|
|
"logps/ref_chosen": -58.57563781738281,
|
|
"logps/ref_rejected": -78.693603515625,
|
|
"logps/rejected": -79.47733306884766,
|
|
"loss": 1.0331,
|
|
"margin_dpo/margin_mean": 0.37138599157333374,
|
|
"margin_dpo/margin_std": 0.5738701820373535,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 3.413600206375122,
|
|
"fcm_dpo/delta": 0.0910910964012146,
|
|
"fcm_dpo/margin": 0.2643635869026184,
|
|
"fcm_dpo/q_t": 0.3801841139793396,
|
|
"grad_norm": 1048.221923828125,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 0.12782281637191772,
|
|
"logits/rejected": 0.12069296091794968,
|
|
"logps/chosen": -80.02874755859375,
|
|
"logps/ref_chosen": -79.58343505859375,
|
|
"logps/ref_rejected": -92.152587890625,
|
|
"logps/rejected": -92.86225891113281,
|
|
"loss": 1.5245,
|
|
"margin_dpo/margin_mean": 0.264363557100296,
|
|
"margin_dpo/margin_std": 0.6884479522705078,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15268329554043839,
|
|
"fcm_dpo/beta": 3.2258787155151367,
|
|
"fcm_dpo/delta": -0.34300971031188965,
|
|
"fcm_dpo/margin": 0.40265652537345886,
|
|
"fcm_dpo/q_t": 0.29698413610458374,
|
|
"grad_norm": 702.3856811523438,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": 0.1229773610830307,
|
|
"logits/rejected": 0.08786555379629135,
|
|
"logps/chosen": -52.74365234375,
|
|
"logps/ref_chosen": -52.332786560058594,
|
|
"logps/ref_rejected": -69.55589294433594,
|
|
"logps/rejected": -70.36941528320312,
|
|
"loss": 0.9957,
|
|
"margin_dpo/margin_mean": 0.40265610814094543,
|
|
"margin_dpo/margin_std": 0.5584487318992615,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.15419501133786848,
|
|
"fcm_dpo/beta": 3.221522808074951,
|
|
"fcm_dpo/delta": 0.1677694320678711,
|
|
"fcm_dpo/margin": 0.26210707426071167,
|
|
"fcm_dpo/q_t": 0.3745608925819397,
|
|
"grad_norm": 933.2718505859375,
|
|
"learning_rate": 4.959688949822748e-07,
|
|
"logits/chosen": 0.05017256736755371,
|
|
"logits/rejected": 0.012172428891062737,
|
|
"logps/chosen": -65.21527099609375,
|
|
"logps/ref_chosen": -64.74348449707031,
|
|
"logps/ref_rejected": -69.06132507324219,
|
|
"logps/rejected": -69.79522705078125,
|
|
"loss": 1.3826,
|
|
"margin_dpo/margin_mean": 0.26210689544677734,
|
|
"margin_dpo/margin_std": 0.6122475862503052,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.15570672713529857,
|
|
"fcm_dpo/beta": 3.343921661376953,
|
|
"fcm_dpo/delta": 0.14063423871994019,
|
|
"fcm_dpo/margin": 0.25882646441459656,
|
|
"fcm_dpo/q_t": 0.3664790987968445,
|
|
"grad_norm": 876.5048828125,
|
|
"learning_rate": 4.957289714327572e-07,
|
|
"logits/chosen": 0.14401525259017944,
|
|
"logits/rejected": 0.11314252763986588,
|
|
"logps/chosen": -64.34033203125,
|
|
"logps/ref_chosen": -63.83664321899414,
|
|
"logps/ref_rejected": -79.32362365722656,
|
|
"logps/rejected": -80.08615112304688,
|
|
"loss": 1.2541,
|
|
"margin_dpo/margin_mean": 0.2588259279727936,
|
|
"margin_dpo/margin_std": 0.537617564201355,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.15721844293272866,
|
|
"fcm_dpo/beta": 3.3820950984954834,
|
|
"fcm_dpo/delta": 0.11869892477989197,
|
|
"fcm_dpo/margin": 0.2636609375476837,
|
|
"fcm_dpo/q_t": 0.37176138162612915,
|
|
"grad_norm": 1155.879150390625,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 0.10751787573099136,
|
|
"logits/rejected": 0.03126327693462372,
|
|
"logps/chosen": -61.48141098022461,
|
|
"logps/ref_chosen": -60.99920654296875,
|
|
"logps/ref_rejected": -98.84645080566406,
|
|
"logps/rejected": -99.59231567382812,
|
|
"loss": 1.4641,
|
|
"margin_dpo/margin_mean": 0.26366138458251953,
|
|
"margin_dpo/margin_std": 0.6468815803527832,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 3.3653788566589355,
|
|
"fcm_dpo/delta": -0.06612719595432281,
|
|
"fcm_dpo/margin": 0.31470757722854614,
|
|
"fcm_dpo/q_t": 0.3323326110839844,
|
|
"grad_norm": 1119.451171875,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 0.09568033367395401,
|
|
"logits/rejected": 0.044659968465566635,
|
|
"logps/chosen": -71.36830139160156,
|
|
"logps/ref_chosen": -70.95027160644531,
|
|
"logps/ref_rejected": -87.88340759277344,
|
|
"logps/rejected": -88.61614227294922,
|
|
"loss": 1.3043,
|
|
"margin_dpo/margin_mean": 0.3147069811820984,
|
|
"margin_dpo/margin_std": 0.6669665575027466,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.1602418745275888,
|
|
"fcm_dpo/beta": 3.4193921089172363,
|
|
"fcm_dpo/delta": 0.15833157300949097,
|
|
"fcm_dpo/margin": 0.25018543004989624,
|
|
"fcm_dpo/q_t": 0.3685312867164612,
|
|
"grad_norm": 1039.7274169921875,
|
|
"learning_rate": 4.949679871846857e-07,
|
|
"logits/chosen": 0.1333768665790558,
|
|
"logits/rejected": 0.1204344779253006,
|
|
"logps/chosen": -62.8930549621582,
|
|
"logps/ref_chosen": -62.45933151245117,
|
|
"logps/ref_rejected": -67.00595092773438,
|
|
"logps/rejected": -67.68985748291016,
|
|
"loss": 1.3112,
|
|
"margin_dpo/margin_mean": 0.2501852214336395,
|
|
"margin_dpo/margin_std": 0.5423716306686401,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.1617535903250189,
|
|
"fcm_dpo/beta": 3.652026653289795,
|
|
"fcm_dpo/delta": 0.4439771771430969,
|
|
"fcm_dpo/margin": 0.15993273258209229,
|
|
"fcm_dpo/q_t": 0.43477770686149597,
|
|
"grad_norm": 1451.044677734375,
|
|
"learning_rate": 4.947006115536947e-07,
|
|
"logits/chosen": 0.06545466929674149,
|
|
"logits/rejected": 0.046112120151519775,
|
|
"logps/chosen": -76.338623046875,
|
|
"logps/ref_chosen": -75.83796691894531,
|
|
"logps/ref_rejected": -87.74038696289062,
|
|
"logps/rejected": -88.40097045898438,
|
|
"loss": 1.8206,
|
|
"margin_dpo/margin_mean": 0.15993207693099976,
|
|
"margin_dpo/margin_std": 0.673768162727356,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.16326530612244897,
|
|
"fcm_dpo/beta": 3.626925468444824,
|
|
"fcm_dpo/delta": -0.3162718117237091,
|
|
"fcm_dpo/margin": 0.3520002067089081,
|
|
"fcm_dpo/q_t": 0.33733585476875305,
|
|
"grad_norm": 953.0491943359375,
|
|
"learning_rate": 4.944263911205772e-07,
|
|
"logits/chosen": 0.07331952452659607,
|
|
"logits/rejected": 0.047261983156204224,
|
|
"logps/chosen": -68.7996826171875,
|
|
"logps/ref_chosen": -68.39323425292969,
|
|
"logps/ref_rejected": -83.24267578125,
|
|
"logps/rejected": -84.00111389160156,
|
|
"loss": 1.2468,
|
|
"margin_dpo/margin_mean": 0.35200008749961853,
|
|
"margin_dpo/margin_std": 0.6720170974731445,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16477702191987906,
|
|
"fcm_dpo/beta": 3.5566816329956055,
|
|
"fcm_dpo/delta": 0.07203048467636108,
|
|
"fcm_dpo/margin": 0.2625024616718292,
|
|
"fcm_dpo/q_t": 0.38299477100372314,
|
|
"grad_norm": 1039.4810791015625,
|
|
"learning_rate": 4.941453335558681e-07,
|
|
"logits/chosen": 0.06151915714144707,
|
|
"logits/rejected": 0.016085410490632057,
|
|
"logps/chosen": -55.96140670776367,
|
|
"logps/ref_chosen": -55.52748107910156,
|
|
"logps/ref_rejected": -83.55218505859375,
|
|
"logps/rejected": -84.24861907958984,
|
|
"loss": 1.3522,
|
|
"margin_dpo/margin_mean": 0.26250216364860535,
|
|
"margin_dpo/margin_std": 0.6019136309623718,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 3.8665976524353027,
|
|
"fcm_dpo/delta": 0.39112499356269836,
|
|
"fcm_dpo/margin": 0.16184496879577637,
|
|
"fcm_dpo/q_t": 0.41690564155578613,
|
|
"grad_norm": 1377.6337890625,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 0.033917784690856934,
|
|
"logits/rejected": 0.04065680876374245,
|
|
"logps/chosen": -81.66863250732422,
|
|
"logps/ref_chosen": -81.15874481201172,
|
|
"logps/ref_rejected": -72.56021118164062,
|
|
"logps/rejected": -73.23194885253906,
|
|
"loss": 1.6791,
|
|
"margin_dpo/margin_mean": 0.16184476017951965,
|
|
"margin_dpo/margin_std": 0.5766524076461792,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16780045351473924,
|
|
"fcm_dpo/beta": 3.9139037132263184,
|
|
"fcm_dpo/delta": -0.052766673266887665,
|
|
"fcm_dpo/margin": 0.2670096457004547,
|
|
"fcm_dpo/q_t": 0.36203914880752563,
|
|
"grad_norm": 1263.3140869140625,
|
|
"learning_rate": 4.935627386698418e-07,
|
|
"logits/chosen": 0.13472726941108704,
|
|
"logits/rejected": 0.10240040719509125,
|
|
"logps/chosen": -52.87812042236328,
|
|
"logps/ref_chosen": -52.358985900878906,
|
|
"logps/ref_rejected": -77.06150817871094,
|
|
"logps/rejected": -77.84764862060547,
|
|
"loss": 1.6275,
|
|
"margin_dpo/margin_mean": 0.26701000332832336,
|
|
"margin_dpo/margin_std": 0.6607059836387634,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1693121693121693,
|
|
"fcm_dpo/beta": 3.622741937637329,
|
|
"fcm_dpo/delta": -0.296929270029068,
|
|
"fcm_dpo/margin": 0.34460121393203735,
|
|
"fcm_dpo/q_t": 0.33765172958374023,
|
|
"grad_norm": 1077.2196044921875,
|
|
"learning_rate": 4.932612176449559e-07,
|
|
"logits/chosen": 0.05540486425161362,
|
|
"logits/rejected": 0.0011176479747518897,
|
|
"logps/chosen": -63.441078186035156,
|
|
"logps/ref_chosen": -63.02006530761719,
|
|
"logps/ref_rejected": -111.36941528320312,
|
|
"logps/rejected": -112.13502502441406,
|
|
"loss": 1.3207,
|
|
"margin_dpo/margin_mean": 0.34459996223449707,
|
|
"margin_dpo/margin_std": 0.6546406745910645,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.1708238851095994,
|
|
"fcm_dpo/beta": 3.7972545623779297,
|
|
"fcm_dpo/delta": 0.12773901224136353,
|
|
"fcm_dpo/margin": 0.22868876159191132,
|
|
"fcm_dpo/q_t": 0.37673860788345337,
|
|
"grad_norm": 1496.3253173828125,
|
|
"learning_rate": 4.929528920808854e-07,
|
|
"logits/chosen": 0.09782901406288147,
|
|
"logits/rejected": 0.06331203132867813,
|
|
"logps/chosen": -56.33560562133789,
|
|
"logps/ref_chosen": -55.80766296386719,
|
|
"logps/ref_rejected": -69.84014129638672,
|
|
"logps/rejected": -70.59677124023438,
|
|
"loss": 1.5844,
|
|
"margin_dpo/margin_mean": 0.22868850827217102,
|
|
"margin_dpo/margin_std": 0.5964616537094116,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.17233560090702948,
|
|
"fcm_dpo/beta": 3.3383381366729736,
|
|
"fcm_dpo/delta": -0.5678998231887817,
|
|
"fcm_dpo/margin": 0.4367901682853699,
|
|
"fcm_dpo/q_t": 0.29958969354629517,
|
|
"grad_norm": 712.6404418945312,
|
|
"learning_rate": 4.92637770602159e-07,
|
|
"logits/chosen": 0.13647404313087463,
|
|
"logits/rejected": 0.08034436404705048,
|
|
"logps/chosen": -66.70956420898438,
|
|
"logps/ref_chosen": -66.33277130126953,
|
|
"logps/ref_rejected": -71.61489868164062,
|
|
"logps/rejected": -72.42848205566406,
|
|
"loss": 1.0132,
|
|
"margin_dpo/margin_mean": 0.43679025769233704,
|
|
"margin_dpo/margin_std": 0.6181018352508545,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 3.3931922912597656,
|
|
"fcm_dpo/delta": 0.006447508931159973,
|
|
"fcm_dpo/margin": 0.291039377450943,
|
|
"fcm_dpo/q_t": 0.3765663206577301,
|
|
"grad_norm": 1062.108154296875,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 0.10150371491909027,
|
|
"logits/rejected": 0.047993022948503494,
|
|
"logps/chosen": -56.26490783691406,
|
|
"logps/ref_chosen": -55.74903869628906,
|
|
"logps/ref_rejected": -79.59849548339844,
|
|
"logps/rejected": -80.40541076660156,
|
|
"loss": 1.3013,
|
|
"margin_dpo/margin_mean": 0.29103943705558777,
|
|
"margin_dpo/margin_std": 0.6191039085388184,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17535903250188964,
|
|
"fcm_dpo/beta": 3.4025328159332275,
|
|
"fcm_dpo/delta": -0.009448423981666565,
|
|
"fcm_dpo/margin": 0.29369187355041504,
|
|
"fcm_dpo/q_t": 0.3570387065410614,
|
|
"grad_norm": 851.5135498046875,
|
|
"learning_rate": 4.91987175349089e-07,
|
|
"logits/chosen": 0.1060943752527237,
|
|
"logits/rejected": 0.04715292900800705,
|
|
"logps/chosen": -49.830238342285156,
|
|
"logps/ref_chosen": -49.36516571044922,
|
|
"logps/ref_rejected": -72.84671020507812,
|
|
"logps/rejected": -73.60546875,
|
|
"loss": 1.224,
|
|
"margin_dpo/margin_mean": 0.29369184374809265,
|
|
"margin_dpo/margin_std": 0.5422225594520569,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17687074829931973,
|
|
"fcm_dpo/beta": 3.344947338104248,
|
|
"fcm_dpo/delta": 0.09896770864725113,
|
|
"fcm_dpo/margin": 0.27185767889022827,
|
|
"fcm_dpo/q_t": 0.3576691150665283,
|
|
"grad_norm": 870.2111206054688,
|
|
"learning_rate": 4.916517197732933e-07,
|
|
"logits/chosen": 0.129032701253891,
|
|
"logits/rejected": 0.09620364010334015,
|
|
"logps/chosen": -58.112640380859375,
|
|
"logps/ref_chosen": -57.710899353027344,
|
|
"logps/ref_rejected": -69.77253723144531,
|
|
"logps/rejected": -70.4461441040039,
|
|
"loss": 1.3806,
|
|
"margin_dpo/margin_mean": 0.27185723185539246,
|
|
"margin_dpo/margin_std": 0.6084860563278198,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17838246409674982,
|
|
"fcm_dpo/beta": 3.271368980407715,
|
|
"fcm_dpo/delta": -0.09690429270267487,
|
|
"fcm_dpo/margin": 0.32874226570129395,
|
|
"fcm_dpo/q_t": 0.34997400641441345,
|
|
"grad_norm": 930.0485229492188,
|
|
"learning_rate": 4.913095046794281e-07,
|
|
"logits/chosen": 0.13153867423534393,
|
|
"logits/rejected": 0.09707070142030716,
|
|
"logps/chosen": -52.88352966308594,
|
|
"logps/ref_chosen": -52.479896545410156,
|
|
"logps/ref_rejected": -81.359130859375,
|
|
"logps/rejected": -82.09149169921875,
|
|
"loss": 1.2167,
|
|
"margin_dpo/margin_mean": 0.32874205708503723,
|
|
"margin_dpo/margin_std": 0.582424521446228,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17989417989417988,
|
|
"fcm_dpo/beta": 3.331033229827881,
|
|
"fcm_dpo/delta": 0.008004724979400635,
|
|
"fcm_dpo/margin": 0.2980421185493469,
|
|
"fcm_dpo/q_t": 0.35352998971939087,
|
|
"grad_norm": 886.5343017578125,
|
|
"learning_rate": 4.909605396399855e-07,
|
|
"logits/chosen": 0.08484401553869247,
|
|
"logits/rejected": 0.05185426026582718,
|
|
"logps/chosen": -61.905765533447266,
|
|
"logps/ref_chosen": -61.35767364501953,
|
|
"logps/ref_rejected": -75.71510314941406,
|
|
"logps/rejected": -76.56123352050781,
|
|
"loss": 1.2981,
|
|
"margin_dpo/margin_mean": 0.298042356967926,
|
|
"margin_dpo/margin_std": 0.5981870293617249,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 3.182232618331909,
|
|
"fcm_dpo/delta": -0.24365851283073425,
|
|
"fcm_dpo/margin": 0.3802499771118164,
|
|
"fcm_dpo/q_t": 0.3237895369529724,
|
|
"grad_norm": 770.2077026367188,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 0.07990750670433044,
|
|
"logits/rejected": 0.02834871970117092,
|
|
"logps/chosen": -60.297908782958984,
|
|
"logps/ref_chosen": -59.907569885253906,
|
|
"logps/ref_rejected": -79.6910629272461,
|
|
"logps/rejected": -80.46165466308594,
|
|
"loss": 1.0967,
|
|
"margin_dpo/margin_mean": 0.3802502751350403,
|
|
"margin_dpo/margin_std": 0.6159936189651489,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.18291761148904007,
|
|
"fcm_dpo/beta": 3.1936514377593994,
|
|
"fcm_dpo/delta": 0.05449778214097023,
|
|
"fcm_dpo/margin": 0.29768818616867065,
|
|
"fcm_dpo/q_t": 0.3674450218677521,
|
|
"grad_norm": 816.0641479492188,
|
|
"learning_rate": 4.902423989581143e-07,
|
|
"logits/chosen": 0.17014677822589874,
|
|
"logits/rejected": 0.09852974861860275,
|
|
"logps/chosen": -56.18149948120117,
|
|
"logps/ref_chosen": -55.66604232788086,
|
|
"logps/ref_rejected": -101.56233978271484,
|
|
"logps/rejected": -102.37548828125,
|
|
"loss": 1.2968,
|
|
"margin_dpo/margin_mean": 0.2976876497268677,
|
|
"margin_dpo/margin_std": 0.6306780576705933,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.18442932728647016,
|
|
"fcm_dpo/beta": 3.019984722137451,
|
|
"fcm_dpo/delta": -0.44598841667175293,
|
|
"fcm_dpo/margin": 0.4587884843349457,
|
|
"fcm_dpo/q_t": 0.29104509949684143,
|
|
"grad_norm": 804.1510009765625,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 0.09989838302135468,
|
|
"logits/rejected": 0.06974966824054718,
|
|
"logps/chosen": -63.821380615234375,
|
|
"logps/ref_chosen": -63.334373474121094,
|
|
"logps/ref_rejected": -73.67523193359375,
|
|
"logps/rejected": -74.62103271484375,
|
|
"loss": 0.969,
|
|
"margin_dpo/margin_mean": 0.45878836512565613,
|
|
"margin_dpo/margin_std": 0.6608290672302246,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18594104308390022,
|
|
"fcm_dpo/beta": 2.9597277641296387,
|
|
"fcm_dpo/delta": -0.11771807074546814,
|
|
"fcm_dpo/margin": 0.37006598711013794,
|
|
"fcm_dpo/q_t": 0.31938499212265015,
|
|
"grad_norm": 801.2340698242188,
|
|
"learning_rate": 4.894973780788722e-07,
|
|
"logits/chosen": 0.13346600532531738,
|
|
"logits/rejected": 0.09592346101999283,
|
|
"logps/chosen": -57.292945861816406,
|
|
"logps/ref_chosen": -56.89874267578125,
|
|
"logps/ref_rejected": -78.97028350830078,
|
|
"logps/rejected": -79.73455810546875,
|
|
"loss": 1.2025,
|
|
"margin_dpo/margin_mean": 0.37006592750549316,
|
|
"margin_dpo/margin_std": 0.640432596206665,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.1874527588813303,
|
|
"fcm_dpo/beta": 2.767918109893799,
|
|
"fcm_dpo/delta": -0.2092204988002777,
|
|
"fcm_dpo/margin": 0.4275299310684204,
|
|
"fcm_dpo/q_t": 0.31604132056236267,
|
|
"grad_norm": 655.9889526367188,
|
|
"learning_rate": 4.89114813497619e-07,
|
|
"logits/chosen": 0.1320020854473114,
|
|
"logits/rejected": 0.08239568769931793,
|
|
"logps/chosen": -57.52501678466797,
|
|
"logps/ref_chosen": -57.116085052490234,
|
|
"logps/ref_rejected": -87.93074035644531,
|
|
"logps/rejected": -88.76720428466797,
|
|
"loss": 0.9876,
|
|
"margin_dpo/margin_mean": 0.4275299310684204,
|
|
"margin_dpo/margin_std": 0.6369043588638306,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 2.691709518432617,
|
|
"fcm_dpo/delta": -0.04524332284927368,
|
|
"fcm_dpo/margin": 0.3849112093448639,
|
|
"fcm_dpo/q_t": 0.32585692405700684,
|
|
"grad_norm": 650.68212890625,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 0.18828628957271576,
|
|
"logits/rejected": 0.1363983303308487,
|
|
"logps/chosen": -66.21900939941406,
|
|
"logps/ref_chosen": -65.7061767578125,
|
|
"logps/ref_rejected": -91.72711944580078,
|
|
"logps/rejected": -92.62486267089844,
|
|
"loss": 1.0744,
|
|
"margin_dpo/margin_mean": 0.3849112391471863,
|
|
"margin_dpo/margin_std": 0.5876812934875488,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"fcm_dpo/beta": 2.652081251144409,
|
|
"fcm_dpo/delta": -0.10823916643857956,
|
|
"fcm_dpo/margin": 0.41218724846839905,
|
|
"fcm_dpo/q_t": 0.3507199287414551,
|
|
"grad_norm": 565.577880859375,
|
|
"learning_rate": 4.883296295573176e-07,
|
|
"logits/chosen": -0.01404772698879242,
|
|
"logits/rejected": -0.02040482684969902,
|
|
"logps/chosen": -68.63446807861328,
|
|
"logps/ref_chosen": -68.17608642578125,
|
|
"logps/ref_rejected": -65.1175537109375,
|
|
"logps/rejected": -65.98812103271484,
|
|
"loss": 1.143,
|
|
"margin_dpo/margin_mean": 0.41218748688697815,
|
|
"margin_dpo/margin_std": 0.8156576156616211,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.19198790627362056,
|
|
"fcm_dpo/beta": 2.6257121562957764,
|
|
"fcm_dpo/delta": -0.039457425475120544,
|
|
"fcm_dpo/margin": 0.39357566833496094,
|
|
"fcm_dpo/q_t": 0.32776835560798645,
|
|
"grad_norm": 675.0282592773438,
|
|
"learning_rate": 4.87927032161552e-07,
|
|
"logits/chosen": 0.06537148356437683,
|
|
"logits/rejected": 0.037958111613988876,
|
|
"logps/chosen": -62.41735076904297,
|
|
"logps/ref_chosen": -61.88023376464844,
|
|
"logps/ref_rejected": -68.46012878417969,
|
|
"logps/rejected": -69.39082336425781,
|
|
"loss": 1.1334,
|
|
"margin_dpo/margin_mean": 0.3935753107070923,
|
|
"margin_dpo/margin_std": 0.6591010093688965,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.19349962207105065,
|
|
"fcm_dpo/beta": 2.6458208560943604,
|
|
"fcm_dpo/delta": 0.08855466544628143,
|
|
"fcm_dpo/margin": 0.3467669188976288,
|
|
"fcm_dpo/q_t": 0.36068370938301086,
|
|
"grad_norm": 708.3317260742188,
|
|
"learning_rate": 4.875177794352363e-07,
|
|
"logits/chosen": 0.09111534804105759,
|
|
"logits/rejected": 0.045390479266643524,
|
|
"logps/chosen": -67.32196044921875,
|
|
"logps/ref_chosen": -66.708984375,
|
|
"logps/ref_rejected": -94.97969055175781,
|
|
"logps/rejected": -95.9394302368164,
|
|
"loss": 1.2229,
|
|
"margin_dpo/margin_mean": 0.34676679968833923,
|
|
"margin_dpo/margin_std": 0.687119722366333,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.19501133786848074,
|
|
"fcm_dpo/beta": 2.789608955383301,
|
|
"fcm_dpo/delta": 0.23963144421577454,
|
|
"fcm_dpo/margin": 0.2792533040046692,
|
|
"fcm_dpo/q_t": 0.38132244348526,
|
|
"grad_norm": 765.4723510742188,
|
|
"learning_rate": 4.871018828260491e-07,
|
|
"logits/chosen": 0.1117800921201706,
|
|
"logits/rejected": 0.10394299030303955,
|
|
"logps/chosen": -65.94437408447266,
|
|
"logps/ref_chosen": -65.33882904052734,
|
|
"logps/ref_rejected": -68.06109619140625,
|
|
"logps/rejected": -68.94589233398438,
|
|
"loss": 1.2034,
|
|
"margin_dpo/margin_mean": 0.279253751039505,
|
|
"margin_dpo/margin_std": 0.5657248497009277,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 2.7729220390319824,
|
|
"fcm_dpo/delta": 0.050821587443351746,
|
|
"fcm_dpo/margin": 0.33975258469581604,
|
|
"fcm_dpo/q_t": 0.35864949226379395,
|
|
"grad_norm": 768.4454956054688,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 0.09036006778478622,
|
|
"logits/rejected": 0.04790624603629112,
|
|
"logps/chosen": -59.219017028808594,
|
|
"logps/ref_chosen": -58.660743713378906,
|
|
"logps/ref_rejected": -79.24510192871094,
|
|
"logps/rejected": -80.14312744140625,
|
|
"loss": 1.1136,
|
|
"margin_dpo/margin_mean": 0.3397524058818817,
|
|
"margin_dpo/margin_std": 0.5575762987136841,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1980347694633409,
|
|
"fcm_dpo/beta": 2.782139301300049,
|
|
"fcm_dpo/delta": -0.2223011553287506,
|
|
"fcm_dpo/margin": 0.4299342930316925,
|
|
"fcm_dpo/q_t": 0.3284626305103302,
|
|
"grad_norm": 713.0172119140625,
|
|
"learning_rate": 4.86250204678667e-07,
|
|
"logits/chosen": 0.10117419809103012,
|
|
"logits/rejected": 0.048455823212862015,
|
|
"logps/chosen": -52.9766845703125,
|
|
"logps/ref_chosen": -52.51453399658203,
|
|
"logps/ref_rejected": -85.18299865722656,
|
|
"logps/rejected": -86.07508087158203,
|
|
"loss": 1.152,
|
|
"margin_dpo/margin_mean": 0.42993444204330444,
|
|
"margin_dpo/margin_std": 0.7103478908538818,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19954648526077098,
|
|
"fcm_dpo/beta": 2.728858470916748,
|
|
"fcm_dpo/delta": 0.014488308690488338,
|
|
"fcm_dpo/margin": 0.3616468608379364,
|
|
"fcm_dpo/q_t": 0.3465641736984253,
|
|
"grad_norm": 803.3519897460938,
|
|
"learning_rate": 4.858144469637408e-07,
|
|
"logits/chosen": 0.16957558691501617,
|
|
"logits/rejected": 0.1406971514225006,
|
|
"logps/chosen": -66.2645263671875,
|
|
"logps/ref_chosen": -65.68513488769531,
|
|
"logps/ref_rejected": -69.54120635986328,
|
|
"logps/rejected": -70.48225402832031,
|
|
"loss": 1.3716,
|
|
"margin_dpo/margin_mean": 0.36164700984954834,
|
|
"margin_dpo/margin_std": 0.8076159358024597,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.20105820105820105,
|
|
"fcm_dpo/beta": 2.8398377895355225,
|
|
"fcm_dpo/delta": 0.20342613756656647,
|
|
"fcm_dpo/margin": 0.2855343222618103,
|
|
"fcm_dpo/q_t": 0.3706758916378021,
|
|
"grad_norm": 806.1981811523438,
|
|
"learning_rate": 4.853720930118138e-07,
|
|
"logits/chosen": 0.07018555700778961,
|
|
"logits/rejected": 0.06098049134016037,
|
|
"logps/chosen": -64.16683959960938,
|
|
"logps/ref_chosen": -63.598114013671875,
|
|
"logps/ref_rejected": -73.72798156738281,
|
|
"logps/rejected": -74.58223724365234,
|
|
"loss": 1.3296,
|
|
"margin_dpo/margin_mean": 0.2855341136455536,
|
|
"margin_dpo/margin_std": 0.6436434984207153,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.20256991685563114,
|
|
"fcm_dpo/beta": 2.7438831329345703,
|
|
"fcm_dpo/delta": -0.14177896082401276,
|
|
"fcm_dpo/margin": 0.40749433636665344,
|
|
"fcm_dpo/q_t": 0.32042205333709717,
|
|
"grad_norm": 637.2561645507812,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": 0.17743632197380066,
|
|
"logits/rejected": 0.129373237490654,
|
|
"logps/chosen": -54.34275817871094,
|
|
"logps/ref_chosen": -53.79457092285156,
|
|
"logps/ref_rejected": -74.16741943359375,
|
|
"logps/rejected": -75.12309265136719,
|
|
"loss": 1.0797,
|
|
"margin_dpo/margin_mean": 0.40749499201774597,
|
|
"margin_dpo/margin_std": 0.6834430694580078,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 2.8520781993865967,
|
|
"fcm_dpo/delta": 0.2403937578201294,
|
|
"fcm_dpo/margin": 0.2731159031391144,
|
|
"fcm_dpo/q_t": 0.3797072768211365,
|
|
"grad_norm": 688.9449462890625,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 0.09586119651794434,
|
|
"logits/rejected": 0.06667510420084,
|
|
"logps/chosen": -49.97583770751953,
|
|
"logps/ref_chosen": -49.441078186035156,
|
|
"logps/ref_rejected": -65.96878051757812,
|
|
"logps/rejected": -66.77665710449219,
|
|
"loss": 1.3673,
|
|
"margin_dpo/margin_mean": 0.27311572432518005,
|
|
"margin_dpo/margin_std": 0.6469433307647705,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.20559334845049132,
|
|
"fcm_dpo/beta": 2.90311861038208,
|
|
"fcm_dpo/delta": 0.004882900044322014,
|
|
"fcm_dpo/margin": 0.3429165482521057,
|
|
"fcm_dpo/q_t": 0.36461225152015686,
|
|
"grad_norm": 1105.5750732421875,
|
|
"learning_rate": 4.840055783904106e-07,
|
|
"logits/chosen": 0.10260805487632751,
|
|
"logits/rejected": 0.04400411248207092,
|
|
"logps/chosen": -67.37596893310547,
|
|
"logps/ref_chosen": -66.75926208496094,
|
|
"logps/ref_rejected": -94.61787414550781,
|
|
"logps/rejected": -95.57749938964844,
|
|
"loss": 1.5811,
|
|
"margin_dpo/margin_mean": 0.34291741251945496,
|
|
"margin_dpo/margin_std": 0.9085370302200317,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.20710506424792138,
|
|
"fcm_dpo/beta": 2.891000747680664,
|
|
"fcm_dpo/delta": -0.11807064712047577,
|
|
"fcm_dpo/margin": 0.38187700510025024,
|
|
"fcm_dpo/q_t": 0.3429448902606964,
|
|
"grad_norm": 694.5245971679688,
|
|
"learning_rate": 4.835369650662767e-07,
|
|
"logits/chosen": 0.10072774440050125,
|
|
"logits/rejected": 0.07752367854118347,
|
|
"logps/chosen": -57.34507751464844,
|
|
"logps/ref_chosen": -56.78379821777344,
|
|
"logps/ref_rejected": -69.89952087402344,
|
|
"logps/rejected": -70.8426742553711,
|
|
"loss": 1.2,
|
|
"margin_dpo/margin_mean": 0.3818773031234741,
|
|
"margin_dpo/margin_std": 0.6857679486274719,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.20861678004535147,
|
|
"fcm_dpo/beta": 2.905198574066162,
|
|
"fcm_dpo/delta": 0.12694688141345978,
|
|
"fcm_dpo/margin": 0.3041801452636719,
|
|
"fcm_dpo/q_t": 0.3584628403186798,
|
|
"grad_norm": 783.094482421875,
|
|
"learning_rate": 4.830618192112065e-07,
|
|
"logits/chosen": 0.0714266374707222,
|
|
"logits/rejected": 0.04139017313718796,
|
|
"logps/chosen": -59.46550750732422,
|
|
"logps/ref_chosen": -58.766014099121094,
|
|
"logps/ref_rejected": -68.12371826171875,
|
|
"logps/rejected": -69.12739562988281,
|
|
"loss": 1.3424,
|
|
"margin_dpo/margin_mean": 0.3041801452636719,
|
|
"margin_dpo/margin_std": 0.6721060276031494,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.21012849584278157,
|
|
"fcm_dpo/beta": 2.842034101486206,
|
|
"fcm_dpo/delta": -0.29515254497528076,
|
|
"fcm_dpo/margin": 0.4422586262226105,
|
|
"fcm_dpo/q_t": 0.32127201557159424,
|
|
"grad_norm": 698.6984252929688,
|
|
"learning_rate": 4.825801541160509e-07,
|
|
"logits/chosen": 0.058577846735715866,
|
|
"logits/rejected": 0.034485623240470886,
|
|
"logps/chosen": -71.8461685180664,
|
|
"logps/ref_chosen": -71.2255859375,
|
|
"logps/ref_rejected": -82.1834716796875,
|
|
"logps/rejected": -83.2463150024414,
|
|
"loss": 1.0673,
|
|
"margin_dpo/margin_mean": 0.44225820899009705,
|
|
"margin_dpo/margin_std": 0.6527875661849976,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 2.5584716796875,
|
|
"fcm_dpo/delta": -0.5142702460289001,
|
|
"fcm_dpo/margin": 0.5633730888366699,
|
|
"fcm_dpo/q_t": 0.28819897770881653,
|
|
"grad_norm": 720.5505981445312,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 0.06424537301063538,
|
|
"logits/rejected": 0.027711138129234314,
|
|
"logps/chosen": -63.82072830200195,
|
|
"logps/ref_chosen": -63.27766418457031,
|
|
"logps/ref_rejected": -83.30647277832031,
|
|
"logps/rejected": -84.41291809082031,
|
|
"loss": 1.1217,
|
|
"margin_dpo/margin_mean": 0.5633726119995117,
|
|
"margin_dpo/margin_std": 0.8722689151763916,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21315192743764172,
|
|
"fcm_dpo/beta": 2.474776029586792,
|
|
"fcm_dpo/delta": 0.05119156837463379,
|
|
"fcm_dpo/margin": 0.3848419487476349,
|
|
"fcm_dpo/q_t": 0.3656679391860962,
|
|
"grad_norm": 698.813720703125,
|
|
"learning_rate": 4.815973202802966e-07,
|
|
"logits/chosen": 0.09540177881717682,
|
|
"logits/rejected": 0.06088024377822876,
|
|
"logps/chosen": -62.38557052612305,
|
|
"logps/ref_chosen": -61.76676940917969,
|
|
"logps/ref_rejected": -88.60601806640625,
|
|
"logps/rejected": -89.60966491699219,
|
|
"loss": 1.2344,
|
|
"margin_dpo/margin_mean": 0.38484299182891846,
|
|
"margin_dpo/margin_std": 0.7531988620758057,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2146636432350718,
|
|
"fcm_dpo/beta": 2.6312007904052734,
|
|
"fcm_dpo/delta": 0.25451576709747314,
|
|
"fcm_dpo/margin": 0.2886815369129181,
|
|
"fcm_dpo/q_t": 0.39209288358688354,
|
|
"grad_norm": 677.07763671875,
|
|
"learning_rate": 4.810961790316729e-07,
|
|
"logits/chosen": 0.08383051306009293,
|
|
"logits/rejected": 0.06255074590444565,
|
|
"logps/chosen": -65.8593521118164,
|
|
"logps/ref_chosen": -65.2747802734375,
|
|
"logps/ref_rejected": -81.1378173828125,
|
|
"logps/rejected": -82.01107788085938,
|
|
"loss": 1.3684,
|
|
"margin_dpo/margin_mean": 0.2886812686920166,
|
|
"margin_dpo/margin_std": 0.7156628370285034,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2161753590325019,
|
|
"fcm_dpo/beta": 2.587679386138916,
|
|
"fcm_dpo/delta": -0.12541311979293823,
|
|
"fcm_dpo/margin": 0.42952513694763184,
|
|
"fcm_dpo/q_t": 0.3180665075778961,
|
|
"grad_norm": 654.9125366210938,
|
|
"learning_rate": 4.805885735261454e-07,
|
|
"logits/chosen": 0.12669947743415833,
|
|
"logits/rejected": 0.11181487888097763,
|
|
"logps/chosen": -63.13290023803711,
|
|
"logps/ref_chosen": -62.617828369140625,
|
|
"logps/ref_rejected": -70.39239501953125,
|
|
"logps/rejected": -71.33699035644531,
|
|
"loss": 1.0608,
|
|
"margin_dpo/margin_mean": 0.4295256435871124,
|
|
"margin_dpo/margin_std": 0.6682602167129517,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21768707482993196,
|
|
"fcm_dpo/beta": 2.556525707244873,
|
|
"fcm_dpo/delta": -0.07277373969554901,
|
|
"fcm_dpo/margin": 0.4165218472480774,
|
|
"fcm_dpo/q_t": 0.34666940569877625,
|
|
"grad_norm": 743.940185546875,
|
|
"learning_rate": 4.800745179625307e-07,
|
|
"logits/chosen": 0.11285848915576935,
|
|
"logits/rejected": 0.0883728414773941,
|
|
"logps/chosen": -61.44839096069336,
|
|
"logps/ref_chosen": -60.80268859863281,
|
|
"logps/ref_rejected": -79.07284545898438,
|
|
"logps/rejected": -80.13507080078125,
|
|
"loss": 1.1906,
|
|
"margin_dpo/margin_mean": 0.4165222942829132,
|
|
"margin_dpo/margin_std": 0.7579972743988037,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 2.5957999229431152,
|
|
"fcm_dpo/delta": 0.1555352658033371,
|
|
"fcm_dpo/margin": 0.33037135004997253,
|
|
"fcm_dpo/q_t": 0.3795892000198364,
|
|
"grad_norm": 965.2445678710938,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 0.07255662977695465,
|
|
"logits/rejected": 0.08949023485183716,
|
|
"logps/chosen": -75.23291015625,
|
|
"logps/ref_chosen": -74.61146545410156,
|
|
"logps/ref_rejected": -83.24461364746094,
|
|
"logps/rejected": -84.19642639160156,
|
|
"loss": 1.456,
|
|
"margin_dpo/margin_mean": 0.3303707540035248,
|
|
"margin_dpo/margin_std": 0.8115462064743042,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22071050642479215,
|
|
"fcm_dpo/beta": 2.5885000228881836,
|
|
"fcm_dpo/delta": -0.06581529229879379,
|
|
"fcm_dpo/margin": 0.40906020998954773,
|
|
"fcm_dpo/q_t": 0.33817818760871887,
|
|
"grad_norm": 619.466064453125,
|
|
"learning_rate": 4.790271143580173e-07,
|
|
"logits/chosen": 0.05093229562044144,
|
|
"logits/rejected": 0.03536106273531914,
|
|
"logps/chosen": -58.38066101074219,
|
|
"logps/ref_chosen": -57.84098434448242,
|
|
"logps/ref_rejected": -67.47422790527344,
|
|
"logps/rejected": -68.42295837402344,
|
|
"loss": 1.1586,
|
|
"margin_dpo/margin_mean": 0.40906035900115967,
|
|
"margin_dpo/margin_std": 0.7477720975875854,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"fcm_dpo/beta": 2.669675588607788,
|
|
"fcm_dpo/delta": 0.24072621762752533,
|
|
"fcm_dpo/margin": 0.29151690006256104,
|
|
"fcm_dpo/q_t": 0.38209617137908936,
|
|
"grad_norm": 970.9570922851562,
|
|
"learning_rate": 4.784937956152489e-07,
|
|
"logits/chosen": 0.05634861811995506,
|
|
"logits/rejected": 0.02099587954580784,
|
|
"logps/chosen": -67.4126968383789,
|
|
"logps/ref_chosen": -66.81346893310547,
|
|
"logps/ref_rejected": -81.1796875,
|
|
"logps/rejected": -82.0704345703125,
|
|
"loss": 1.4796,
|
|
"margin_dpo/margin_mean": 0.29151687026023865,
|
|
"margin_dpo/margin_std": 0.7656582593917847,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2237339380196523,
|
|
"fcm_dpo/beta": 2.6029810905456543,
|
|
"fcm_dpo/delta": -0.31015288829803467,
|
|
"fcm_dpo/margin": 0.48854613304138184,
|
|
"fcm_dpo/q_t": 0.3191307783126831,
|
|
"grad_norm": 515.7787475585938,
|
|
"learning_rate": 4.779540854098347e-07,
|
|
"logits/chosen": 0.19165629148483276,
|
|
"logits/rejected": 0.12959185242652893,
|
|
"logps/chosen": -49.28028106689453,
|
|
"logps/ref_chosen": -48.6877555847168,
|
|
"logps/ref_rejected": -67.50503540039062,
|
|
"logps/rejected": -68.58610534667969,
|
|
"loss": 1.0437,
|
|
"margin_dpo/margin_mean": 0.488546222448349,
|
|
"margin_dpo/margin_std": 0.7724089622497559,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.2252456538170824,
|
|
"fcm_dpo/beta": 2.533602476119995,
|
|
"fcm_dpo/delta": -0.09391121566295624,
|
|
"fcm_dpo/margin": 0.42736145853996277,
|
|
"fcm_dpo/q_t": 0.34449946880340576,
|
|
"grad_norm": 640.553466796875,
|
|
"learning_rate": 4.774079988386296e-07,
|
|
"logits/chosen": 0.05926530063152313,
|
|
"logits/rejected": 0.01661105640232563,
|
|
"logps/chosen": -55.83122253417969,
|
|
"logps/ref_chosen": -55.143775939941406,
|
|
"logps/ref_rejected": -64.79888916015625,
|
|
"logps/rejected": -65.9136962890625,
|
|
"loss": 1.1159,
|
|
"margin_dpo/margin_mean": 0.4273618459701538,
|
|
"margin_dpo/margin_std": 0.7076586484909058,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 2.3987488746643066,
|
|
"fcm_dpo/delta": -0.2589126229286194,
|
|
"fcm_dpo/margin": 0.5113659501075745,
|
|
"fcm_dpo/q_t": 0.2972312569618225,
|
|
"grad_norm": 609.1364135742188,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 0.10247902572154999,
|
|
"logits/rejected": 0.06602032482624054,
|
|
"logps/chosen": -67.88574981689453,
|
|
"logps/ref_chosen": -67.47074890136719,
|
|
"logps/ref_rejected": -89.21170806884766,
|
|
"logps/rejected": -90.13807678222656,
|
|
"loss": 0.9804,
|
|
"margin_dpo/margin_mean": 0.5113657712936401,
|
|
"margin_dpo/margin_std": 0.6983498334884644,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22826908541194255,
|
|
"fcm_dpo/beta": 2.3255553245544434,
|
|
"fcm_dpo/delta": -0.19754400849342346,
|
|
"fcm_dpo/margin": 0.5049396753311157,
|
|
"fcm_dpo/q_t": 0.3148772716522217,
|
|
"grad_norm": 509.62213134765625,
|
|
"learning_rate": 4.762967578776406e-07,
|
|
"logits/chosen": 0.06744161248207092,
|
|
"logits/rejected": 0.025958776473999023,
|
|
"logps/chosen": -52.93949890136719,
|
|
"logps/ref_chosen": -52.45954132080078,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -80.04790496826172,
|
|
"loss": 0.9958,
|
|
"margin_dpo/margin_mean": 0.5049391388893127,
|
|
"margin_dpo/margin_std": 0.7245649099349976,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22978080120937264,
|
|
"fcm_dpo/beta": 2.2168989181518555,
|
|
"fcm_dpo/delta": -0.2083718478679657,
|
|
"fcm_dpo/margin": 0.5341185927391052,
|
|
"fcm_dpo/q_t": 0.30489107966423035,
|
|
"grad_norm": 501.7793884277344,
|
|
"learning_rate": 4.757316345716553e-07,
|
|
"logits/chosen": 0.1456744223833084,
|
|
"logits/rejected": 0.10498102009296417,
|
|
"logps/chosen": -57.103904724121094,
|
|
"logps/ref_chosen": -56.5538330078125,
|
|
"logps/ref_rejected": -76.55074310302734,
|
|
"logps/rejected": -77.63492584228516,
|
|
"loss": 0.9032,
|
|
"margin_dpo/margin_mean": 0.5341184139251709,
|
|
"margin_dpo/margin_std": 0.6747971773147583,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.23129251700680273,
|
|
"fcm_dpo/beta": 2.214939832687378,
|
|
"fcm_dpo/delta": 0.11927812546491623,
|
|
"fcm_dpo/margin": 0.40247973799705505,
|
|
"fcm_dpo/q_t": 0.3638674020767212,
|
|
"grad_norm": 600.3435668945312,
|
|
"learning_rate": 4.751601970666064e-07,
|
|
"logits/chosen": 0.05303303897380829,
|
|
"logits/rejected": 0.01978529989719391,
|
|
"logps/chosen": -68.58604431152344,
|
|
"logps/ref_chosen": -68.00689697265625,
|
|
"logps/ref_rejected": -74.83482360839844,
|
|
"logps/rejected": -75.81645202636719,
|
|
"loss": 1.2482,
|
|
"margin_dpo/margin_mean": 0.4024793207645416,
|
|
"margin_dpo/margin_std": 0.7963600158691406,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2328042328042328,
|
|
"fcm_dpo/beta": 2.284209728240967,
|
|
"fcm_dpo/delta": 0.13816551864147186,
|
|
"fcm_dpo/margin": 0.38259416818618774,
|
|
"fcm_dpo/q_t": 0.3632936179637909,
|
|
"grad_norm": 581.8963623046875,
|
|
"learning_rate": 4.745824613468292e-07,
|
|
"logits/chosen": 0.13186918199062347,
|
|
"logits/rejected": 0.12810632586479187,
|
|
"logps/chosen": -59.913002014160156,
|
|
"logps/ref_chosen": -59.222537994384766,
|
|
"logps/ref_rejected": -64.19131469726562,
|
|
"logps/rejected": -65.2643814086914,
|
|
"loss": 1.2855,
|
|
"margin_dpo/margin_mean": 0.3825940191745758,
|
|
"margin_dpo/margin_std": 0.8229261636734009,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 2.359236717224121,
|
|
"fcm_dpo/delta": 0.17262253165245056,
|
|
"fcm_dpo/margin": 0.3568933308124542,
|
|
"fcm_dpo/q_t": 0.3739526867866516,
|
|
"grad_norm": 687.4069213867188,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 0.14357620477676392,
|
|
"logits/rejected": 0.1261100172996521,
|
|
"logps/chosen": -69.06387329101562,
|
|
"logps/ref_chosen": -68.45469665527344,
|
|
"logps/ref_rejected": -77.91763305664062,
|
|
"logps/rejected": -78.88371276855469,
|
|
"loss": 1.4692,
|
|
"margin_dpo/margin_mean": 0.35689258575439453,
|
|
"margin_dpo/margin_std": 0.879474401473999,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.23582766439909297,
|
|
"fcm_dpo/beta": 2.3249125480651855,
|
|
"fcm_dpo/delta": -0.2846854329109192,
|
|
"fcm_dpo/margin": 0.5367815494537354,
|
|
"fcm_dpo/q_t": 0.33714932203292847,
|
|
"grad_norm": 715.7948608398438,
|
|
"learning_rate": 4.7340816008085305e-07,
|
|
"logits/chosen": 0.09127533435821533,
|
|
"logits/rejected": 0.05366864800453186,
|
|
"logps/chosen": -67.89020538330078,
|
|
"logps/ref_chosen": -67.26959991455078,
|
|
"logps/ref_rejected": -86.95914459228516,
|
|
"logps/rejected": -88.11653137207031,
|
|
"loss": 1.1585,
|
|
"margin_dpo/margin_mean": 0.5367816686630249,
|
|
"margin_dpo/margin_std": 0.9481757879257202,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.23733938019652306,
|
|
"fcm_dpo/beta": 2.2261717319488525,
|
|
"fcm_dpo/delta": -0.006286881864070892,
|
|
"fcm_dpo/margin": 0.45090270042419434,
|
|
"fcm_dpo/q_t": 0.3336307406425476,
|
|
"grad_norm": 524.041259765625,
|
|
"learning_rate": 4.728116273823847e-07,
|
|
"logits/chosen": 0.08175022900104523,
|
|
"logits/rejected": 0.062264494597911835,
|
|
"logps/chosen": -55.3340950012207,
|
|
"logps/ref_chosen": -54.77287292480469,
|
|
"logps/ref_rejected": -63.87866973876953,
|
|
"logps/rejected": -64.89079284667969,
|
|
"loss": 1.0499,
|
|
"margin_dpo/margin_mean": 0.45090246200561523,
|
|
"margin_dpo/margin_std": 0.7311956286430359,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23885109599395313,
|
|
"fcm_dpo/beta": 2.172646999359131,
|
|
"fcm_dpo/delta": -0.2092001736164093,
|
|
"fcm_dpo/margin": 0.5448204278945923,
|
|
"fcm_dpo/q_t": 0.3070847988128662,
|
|
"grad_norm": 502.65093994140625,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 0.1196097731590271,
|
|
"logits/rejected": 0.0907188355922699,
|
|
"logps/chosen": -65.47095489501953,
|
|
"logps/ref_chosen": -64.92271423339844,
|
|
"logps/ref_rejected": -82.23789978027344,
|
|
"logps/rejected": -83.3309555053711,
|
|
"loss": 0.9403,
|
|
"margin_dpo/margin_mean": 0.5448204278945923,
|
|
"margin_dpo/margin_std": 0.7522009015083313,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.24036281179138322,
|
|
"fcm_dpo/beta": 2.2735085487365723,
|
|
"fcm_dpo/delta": 0.19475619494915009,
|
|
"fcm_dpo/margin": 0.3540440499782562,
|
|
"fcm_dpo/q_t": 0.35417577624320984,
|
|
"grad_norm": 675.0926513671875,
|
|
"learning_rate": 4.715998812855304e-07,
|
|
"logits/chosen": 0.13686862587928772,
|
|
"logits/rejected": 0.10717260837554932,
|
|
"logps/chosen": -57.696720123291016,
|
|
"logps/ref_chosen": -57.046993255615234,
|
|
"logps/ref_rejected": -73.32441711425781,
|
|
"logps/rejected": -74.32818603515625,
|
|
"loss": 1.2425,
|
|
"margin_dpo/margin_mean": 0.3540443778038025,
|
|
"margin_dpo/margin_std": 0.6996503472328186,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 2.3114399909973145,
|
|
"fcm_dpo/delta": 0.17240478098392487,
|
|
"fcm_dpo/margin": 0.3636714816093445,
|
|
"fcm_dpo/q_t": 0.3722858130931854,
|
|
"grad_norm": 635.1336669921875,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": -0.020404599606990814,
|
|
"logits/rejected": -0.04949381574988365,
|
|
"logps/chosen": -50.52197265625,
|
|
"logps/ref_chosen": -49.806915283203125,
|
|
"logps/ref_rejected": -68.3370132446289,
|
|
"logps/rejected": -69.41574096679688,
|
|
"loss": 1.2751,
|
|
"margin_dpo/margin_mean": 0.3636714518070221,
|
|
"margin_dpo/margin_std": 0.8036404848098755,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.24338624338624337,
|
|
"fcm_dpo/beta": 2.285512924194336,
|
|
"fcm_dpo/delta": -0.16357703506946564,
|
|
"fcm_dpo/margin": 0.5008035898208618,
|
|
"fcm_dpo/q_t": 0.35015690326690674,
|
|
"grad_norm": 537.9451293945312,
|
|
"learning_rate": 4.703633408618955e-07,
|
|
"logits/chosen": 0.10273732244968414,
|
|
"logits/rejected": 0.07215458899736404,
|
|
"logps/chosen": -53.11811065673828,
|
|
"logps/ref_chosen": -52.50048828125,
|
|
"logps/ref_rejected": -66.04540252685547,
|
|
"logps/rejected": -67.16382598876953,
|
|
"loss": 1.1267,
|
|
"margin_dpo/margin_mean": 0.5008042454719543,
|
|
"margin_dpo/margin_std": 0.8614367246627808,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.24489795918367346,
|
|
"fcm_dpo/beta": 2.08780574798584,
|
|
"fcm_dpo/delta": -0.3560563623905182,
|
|
"fcm_dpo/margin": 0.6207355260848999,
|
|
"fcm_dpo/q_t": 0.2931872010231018,
|
|
"grad_norm": 511.5837097167969,
|
|
"learning_rate": 4.697358159051549e-07,
|
|
"logits/chosen": 0.16846126317977905,
|
|
"logits/rejected": 0.12896160781383514,
|
|
"logps/chosen": -70.21339416503906,
|
|
"logps/ref_chosen": -69.46919250488281,
|
|
"logps/ref_rejected": -92.00952911376953,
|
|
"logps/rejected": -93.37446594238281,
|
|
"loss": 0.9736,
|
|
"margin_dpo/margin_mean": 0.6207360029220581,
|
|
"margin_dpo/margin_std": 0.8480439186096191,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.24640967498110355,
|
|
"fcm_dpo/beta": 2.0367884635925293,
|
|
"fcm_dpo/delta": -0.2981437146663666,
|
|
"fcm_dpo/margin": 0.618397057056427,
|
|
"fcm_dpo/q_t": 0.29892927408218384,
|
|
"grad_norm": 515.961181640625,
|
|
"learning_rate": 4.691021444652876e-07,
|
|
"logits/chosen": 0.10065104067325592,
|
|
"logits/rejected": 0.06360255181789398,
|
|
"logps/chosen": -51.2899055480957,
|
|
"logps/ref_chosen": -50.613834381103516,
|
|
"logps/ref_rejected": -74.62033081054688,
|
|
"logps/rejected": -75.914794921875,
|
|
"loss": 0.9615,
|
|
"margin_dpo/margin_mean": 0.6183971762657166,
|
|
"margin_dpo/margin_std": 0.8240780830383301,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24792139077853365,
|
|
"fcm_dpo/beta": 1.9806370735168457,
|
|
"fcm_dpo/delta": -0.047169312834739685,
|
|
"fcm_dpo/margin": 0.5249905586242676,
|
|
"fcm_dpo/q_t": 0.32894620299339294,
|
|
"grad_norm": 464.32489013671875,
|
|
"learning_rate": 4.6846234426744624e-07,
|
|
"logits/chosen": 0.08540582656860352,
|
|
"logits/rejected": 0.03592706099152565,
|
|
"logps/chosen": -55.629249572753906,
|
|
"logps/ref_chosen": -54.848114013671875,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -80.369140625,
|
|
"loss": 1.0554,
|
|
"margin_dpo/margin_mean": 0.524990439414978,
|
|
"margin_dpo/margin_std": 0.7865326404571533,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 1.966191291809082,
|
|
"fcm_dpo/delta": -0.0075155869126319885,
|
|
"fcm_dpo/margin": 0.5113043785095215,
|
|
"fcm_dpo/q_t": 0.3119150400161743,
|
|
"grad_norm": 391.06072998046875,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 0.1477801650762558,
|
|
"logits/rejected": 0.10125482082366943,
|
|
"logps/chosen": -51.86711883544922,
|
|
"logps/ref_chosen": -51.089210510253906,
|
|
"logps/ref_rejected": -71.23370361328125,
|
|
"logps/rejected": -72.52291870117188,
|
|
"loss": 0.9392,
|
|
"margin_dpo/margin_mean": 0.5113040804862976,
|
|
"margin_dpo/margin_std": 0.6569217443466187,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.2509448223733938,
|
|
"fcm_dpo/beta": 2.0378761291503906,
|
|
"fcm_dpo/delta": 0.31097179651260376,
|
|
"fcm_dpo/margin": 0.34910979866981506,
|
|
"fcm_dpo/q_t": 0.38307300209999084,
|
|
"grad_norm": 556.363037109375,
|
|
"learning_rate": 4.6716442935512214e-07,
|
|
"logits/chosen": 0.10038108378648758,
|
|
"logits/rejected": 0.036018554121255875,
|
|
"logps/chosen": -63.89585876464844,
|
|
"logps/ref_chosen": -63.19081115722656,
|
|
"logps/ref_rejected": -93.8402099609375,
|
|
"logps/rejected": -94.89436340332031,
|
|
"loss": 1.2454,
|
|
"margin_dpo/margin_mean": 0.3491097092628479,
|
|
"margin_dpo/margin_std": 0.7657175064086914,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.25245653817082386,
|
|
"fcm_dpo/beta": 2.0208253860473633,
|
|
"fcm_dpo/delta": -0.17621225118637085,
|
|
"fcm_dpo/margin": 0.5719941854476929,
|
|
"fcm_dpo/q_t": 0.2979215979576111,
|
|
"grad_norm": 404.3883056640625,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": 0.06507319211959839,
|
|
"logits/rejected": 0.03547991067171097,
|
|
"logps/chosen": -59.553977966308594,
|
|
"logps/ref_chosen": -58.92427062988281,
|
|
"logps/ref_rejected": -72.97377014160156,
|
|
"logps/rejected": -74.17547607421875,
|
|
"loss": 0.8796,
|
|
"margin_dpo/margin_mean": 0.5719939470291138,
|
|
"margin_dpo/margin_std": 0.6960855722427368,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"fcm_dpo/beta": 2.060161828994751,
|
|
"fcm_dpo/delta": 0.1698514223098755,
|
|
"fcm_dpo/margin": 0.4094662666320801,
|
|
"fcm_dpo/q_t": 0.3545387387275696,
|
|
"grad_norm": 583.7806396484375,
|
|
"learning_rate": 4.6584221638904767e-07,
|
|
"logits/chosen": 0.07120160013437271,
|
|
"logits/rejected": 0.04992123693227768,
|
|
"logps/chosen": -66.4698486328125,
|
|
"logps/ref_chosen": -65.65138244628906,
|
|
"logps/ref_rejected": -79.71418762207031,
|
|
"logps/rejected": -80.94212341308594,
|
|
"loss": 1.1174,
|
|
"margin_dpo/margin_mean": 0.40946611762046814,
|
|
"margin_dpo/margin_std": 0.7269895076751709,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.25547996976568405,
|
|
"fcm_dpo/beta": 2.0494308471679688,
|
|
"fcm_dpo/delta": -0.08093604445457458,
|
|
"fcm_dpo/margin": 0.5232309699058533,
|
|
"fcm_dpo/q_t": 0.34376293420791626,
|
|
"grad_norm": 552.8328857421875,
|
|
"learning_rate": 4.651720442612075e-07,
|
|
"logits/chosen": 0.15064923465251923,
|
|
"logits/rejected": 0.12327395379543304,
|
|
"logps/chosen": -62.13090515136719,
|
|
"logps/ref_chosen": -61.425865173339844,
|
|
"logps/ref_rejected": -76.09590148925781,
|
|
"logps/rejected": -77.32416534423828,
|
|
"loss": 1.0984,
|
|
"margin_dpo/margin_mean": 0.523231029510498,
|
|
"margin_dpo/margin_std": 0.913813591003418,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 2.1044416427612305,
|
|
"fcm_dpo/delta": 0.22101661562919617,
|
|
"fcm_dpo/margin": 0.3785492181777954,
|
|
"fcm_dpo/q_t": 0.3640963137149811,
|
|
"grad_norm": 517.9861450195312,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 0.05246744677424431,
|
|
"logits/rejected": 0.051534149795770645,
|
|
"logps/chosen": -57.42051696777344,
|
|
"logps/ref_chosen": -56.65319061279297,
|
|
"logps/ref_rejected": -63.45965576171875,
|
|
"logps/rejected": -64.60552215576172,
|
|
"loss": 1.2398,
|
|
"margin_dpo/margin_mean": 0.3785494565963745,
|
|
"margin_dpo/margin_std": 0.7789652943611145,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2585034013605442,
|
|
"fcm_dpo/beta": 2.0837273597717285,
|
|
"fcm_dpo/delta": -0.041708558797836304,
|
|
"fcm_dpo/margin": 0.49539345502853394,
|
|
"fcm_dpo/q_t": 0.31717920303344727,
|
|
"grad_norm": 528.3797607421875,
|
|
"learning_rate": 4.6381366244617224e-07,
|
|
"logits/chosen": 0.12783187627792358,
|
|
"logits/rejected": 0.08980339765548706,
|
|
"logps/chosen": -64.4964599609375,
|
|
"logps/ref_chosen": -63.73476028442383,
|
|
"logps/ref_rejected": -78.50328063964844,
|
|
"logps/rejected": -79.76036834716797,
|
|
"loss": 1.0854,
|
|
"margin_dpo/margin_mean": 0.49539363384246826,
|
|
"margin_dpo/margin_std": 0.778314471244812,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2600151171579743,
|
|
"fcm_dpo/beta": 2.0559816360473633,
|
|
"fcm_dpo/delta": -0.2410603016614914,
|
|
"fcm_dpo/margin": 0.589635968208313,
|
|
"fcm_dpo/q_t": 0.2946144938468933,
|
|
"grad_norm": 428.92987060546875,
|
|
"learning_rate": 4.631254907558365e-07,
|
|
"logits/chosen": 0.17183159291744232,
|
|
"logits/rejected": 0.12959660589694977,
|
|
"logps/chosen": -52.974788665771484,
|
|
"logps/ref_chosen": -52.201759338378906,
|
|
"logps/ref_rejected": -82.85285949707031,
|
|
"logps/rejected": -84.21553039550781,
|
|
"loss": 0.949,
|
|
"margin_dpo/margin_mean": 0.5896360874176025,
|
|
"margin_dpo/margin_std": 0.7388289570808411,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2615268329554044,
|
|
"fcm_dpo/beta": 1.8781511783599854,
|
|
"fcm_dpo/delta": -0.25002074241638184,
|
|
"fcm_dpo/margin": 0.6354281902313232,
|
|
"fcm_dpo/q_t": 0.32253411412239075,
|
|
"grad_norm": 381.1091613769531,
|
|
"learning_rate": 4.624313574873786e-07,
|
|
"logits/chosen": 0.14371845126152039,
|
|
"logits/rejected": 0.07756569981575012,
|
|
"logps/chosen": -56.25102233886719,
|
|
"logps/ref_chosen": -55.434722900390625,
|
|
"logps/ref_rejected": -77.81967163085938,
|
|
"logps/rejected": -79.27140045166016,
|
|
"loss": 0.9918,
|
|
"margin_dpo/margin_mean": 0.6354283094406128,
|
|
"margin_dpo/margin_std": 0.9134526252746582,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.26303854875283444,
|
|
"fcm_dpo/beta": 1.8751147985458374,
|
|
"fcm_dpo/delta": -0.21836894750595093,
|
|
"fcm_dpo/margin": 0.6357536315917969,
|
|
"fcm_dpo/q_t": 0.3092048168182373,
|
|
"grad_norm": 498.1719665527344,
|
|
"learning_rate": 4.61731282057198e-07,
|
|
"logits/chosen": 0.12083408981561661,
|
|
"logits/rejected": 0.07103556394577026,
|
|
"logps/chosen": -57.99762725830078,
|
|
"logps/ref_chosen": -57.17195129394531,
|
|
"logps/ref_rejected": -85.47578430175781,
|
|
"logps/rejected": -86.93720245361328,
|
|
"loss": 1.0066,
|
|
"margin_dpo/margin_mean": 0.6357530355453491,
|
|
"margin_dpo/margin_std": 0.8986474275588989,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 1.770094394683838,
|
|
"fcm_dpo/delta": -0.23501265048980713,
|
|
"fcm_dpo/margin": 0.6816864013671875,
|
|
"fcm_dpo/q_t": 0.30375754833221436,
|
|
"grad_norm": 463.9217529296875,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 0.18367326259613037,
|
|
"logits/rejected": 0.15908128023147583,
|
|
"logps/chosen": -68.52366638183594,
|
|
"logps/ref_chosen": -67.6656265258789,
|
|
"logps/ref_rejected": -84.36766815185547,
|
|
"logps/rejected": -85.90739440917969,
|
|
"loss": 0.9656,
|
|
"margin_dpo/margin_mean": 0.6816866397857666,
|
|
"margin_dpo/margin_std": 0.915657639503479,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.2660619803476946,
|
|
"fcm_dpo/beta": 1.7365822792053223,
|
|
"fcm_dpo/delta": 0.01569700986146927,
|
|
"fcm_dpo/margin": 0.5668948888778687,
|
|
"fcm_dpo/q_t": 0.37061506509780884,
|
|
"grad_norm": 517.9691162109375,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 0.08695434033870697,
|
|
"logits/rejected": 0.06711474061012268,
|
|
"logps/chosen": -78.7764892578125,
|
|
"logps/ref_chosen": -77.8587646484375,
|
|
"logps/ref_rejected": -81.08732604980469,
|
|
"logps/rejected": -82.57195281982422,
|
|
"loss": 1.2356,
|
|
"margin_dpo/margin_mean": 0.5668948292732239,
|
|
"margin_dpo/margin_std": 1.1584200859069824,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2675736961451247,
|
|
"fcm_dpo/beta": 1.6431140899658203,
|
|
"fcm_dpo/delta": -0.37270694971084595,
|
|
"fcm_dpo/margin": 0.8022236227989197,
|
|
"fcm_dpo/q_t": 0.2722761631011963,
|
|
"grad_norm": 448.8039245605469,
|
|
"learning_rate": 4.5959559945025183e-07,
|
|
"logits/chosen": 0.24163630604743958,
|
|
"logits/rejected": 0.1636749804019928,
|
|
"logps/chosen": -56.08782958984375,
|
|
"logps/ref_chosen": -55.22039794921875,
|
|
"logps/ref_rejected": -92.54973602294922,
|
|
"logps/rejected": -94.21939086914062,
|
|
"loss": 0.8902,
|
|
"margin_dpo/margin_mean": 0.8022229671478271,
|
|
"margin_dpo/margin_std": 1.0025627613067627,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.2690854119425548,
|
|
"fcm_dpo/beta": 1.6737594604492188,
|
|
"fcm_dpo/delta": 0.21938863396644592,
|
|
"fcm_dpo/margin": 0.47716161608695984,
|
|
"fcm_dpo/q_t": 0.3556697368621826,
|
|
"grad_norm": 458.8934020996094,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"logits/chosen": 0.08861234784126282,
|
|
"logits/rejected": 0.05083230137825012,
|
|
"logps/chosen": -61.76763916015625,
|
|
"logps/ref_chosen": -60.81049346923828,
|
|
"logps/ref_rejected": -81.12973022460938,
|
|
"logps/rejected": -82.56403350830078,
|
|
"loss": 1.0621,
|
|
"margin_dpo/margin_mean": 0.47716209292411804,
|
|
"margin_dpo/margin_std": 0.7696354985237122,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.2705971277399849,
|
|
"fcm_dpo/beta": 1.7257657051086426,
|
|
"fcm_dpo/delta": 0.10213658213615417,
|
|
"fcm_dpo/margin": 0.5256574153900146,
|
|
"fcm_dpo/q_t": 0.35149049758911133,
|
|
"grad_norm": 419.9453125,
|
|
"learning_rate": 4.581424636586928e-07,
|
|
"logits/chosen": 0.16226297616958618,
|
|
"logits/rejected": 0.1488857865333557,
|
|
"logps/chosen": -66.7244644165039,
|
|
"logps/ref_chosen": -65.67171478271484,
|
|
"logps/ref_rejected": -75.32586669921875,
|
|
"logps/rejected": -76.9042739868164,
|
|
"loss": 1.0632,
|
|
"margin_dpo/margin_mean": 0.5256578922271729,
|
|
"margin_dpo/margin_std": 0.8762655258178711,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 1.8206181526184082,
|
|
"fcm_dpo/delta": 0.2291109263896942,
|
|
"fcm_dpo/margin": 0.42961639165878296,
|
|
"fcm_dpo/q_t": 0.36887508630752563,
|
|
"grad_norm": 520.1433715820312,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": -0.0028491299599409103,
|
|
"logits/rejected": -0.017982792109251022,
|
|
"logps/chosen": -57.53323745727539,
|
|
"logps/ref_chosen": -56.68280792236328,
|
|
"logps/ref_rejected": -64.94414520263672,
|
|
"logps/rejected": -66.22418975830078,
|
|
"loss": 1.2665,
|
|
"margin_dpo/margin_mean": 0.4296168386936188,
|
|
"margin_dpo/margin_std": 0.9101868867874146,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.273620559334845,
|
|
"fcm_dpo/beta": 1.7677171230316162,
|
|
"fcm_dpo/delta": -0.24141666293144226,
|
|
"fcm_dpo/margin": 0.6860055923461914,
|
|
"fcm_dpo/q_t": 0.2977555990219116,
|
|
"grad_norm": 448.171630859375,
|
|
"learning_rate": 4.566660392614228e-07,
|
|
"logits/chosen": 0.13813161849975586,
|
|
"logits/rejected": 0.10955438017845154,
|
|
"logps/chosen": -61.58479309082031,
|
|
"logps/ref_chosen": -60.77604675292969,
|
|
"logps/ref_rejected": -83.98361206054688,
|
|
"logps/rejected": -85.47836303710938,
|
|
"loss": 0.9069,
|
|
"margin_dpo/margin_mean": 0.686005711555481,
|
|
"margin_dpo/margin_std": 0.8478412628173828,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.2751322751322751,
|
|
"fcm_dpo/beta": 1.698185920715332,
|
|
"fcm_dpo/delta": -0.05800933390855789,
|
|
"fcm_dpo/margin": 0.6177934408187866,
|
|
"fcm_dpo/q_t": 0.3166268467903137,
|
|
"grad_norm": 430.59722900390625,
|
|
"learning_rate": 4.5591914535745817e-07,
|
|
"logits/chosen": 0.1499433070421219,
|
|
"logits/rejected": 0.09003090858459473,
|
|
"logps/chosen": -61.18524932861328,
|
|
"logps/ref_chosen": -60.2537841796875,
|
|
"logps/ref_rejected": -89.7706298828125,
|
|
"logps/rejected": -91.31988525390625,
|
|
"loss": 1.0536,
|
|
"margin_dpo/margin_mean": 0.6177935004234314,
|
|
"margin_dpo/margin_std": 0.9228367209434509,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2766439909297052,
|
|
"fcm_dpo/beta": 1.8208626508712769,
|
|
"fcm_dpo/delta": 0.4041329324245453,
|
|
"fcm_dpo/margin": 0.34058958292007446,
|
|
"fcm_dpo/q_t": 0.3961232900619507,
|
|
"grad_norm": 510.5823059082031,
|
|
"learning_rate": 4.551664914523433e-07,
|
|
"logits/chosen": 0.1353759616613388,
|
|
"logits/rejected": 0.12078934907913208,
|
|
"logps/chosen": -62.945030212402344,
|
|
"logps/ref_chosen": -61.76142120361328,
|
|
"logps/ref_rejected": -72.54627990722656,
|
|
"logps/rejected": -74.07048034667969,
|
|
"loss": 1.3364,
|
|
"margin_dpo/margin_mean": 0.3405901789665222,
|
|
"margin_dpo/margin_std": 0.8557813763618469,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2781557067271353,
|
|
"fcm_dpo/beta": 1.8339712619781494,
|
|
"fcm_dpo/delta": 0.05328105390071869,
|
|
"fcm_dpo/margin": 0.5154128074645996,
|
|
"fcm_dpo/q_t": 0.34025606513023376,
|
|
"grad_norm": 346.7197265625,
|
|
"learning_rate": 4.544080985994258e-07,
|
|
"logits/chosen": 0.21997570991516113,
|
|
"logits/rejected": 0.1718028038740158,
|
|
"logps/chosen": -47.82099151611328,
|
|
"logps/ref_chosen": -46.840721130371094,
|
|
"logps/ref_rejected": -69.3609390258789,
|
|
"logps/rejected": -70.85662841796875,
|
|
"loss": 0.989,
|
|
"margin_dpo/margin_mean": 0.5154126882553101,
|
|
"margin_dpo/margin_std": 0.7232804298400879,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 1.8281052112579346,
|
|
"fcm_dpo/delta": -0.15045057237148285,
|
|
"fcm_dpo/margin": 0.6196208000183105,
|
|
"fcm_dpo/q_t": 0.321952223777771,
|
|
"grad_norm": 413.64068603515625,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 0.14673639833927155,
|
|
"logits/rejected": 0.10907743126153946,
|
|
"logps/chosen": -53.348968505859375,
|
|
"logps/ref_chosen": -52.32114028930664,
|
|
"logps/ref_rejected": -68.3885726928711,
|
|
"logps/rejected": -70.03601837158203,
|
|
"loss": 1.0869,
|
|
"margin_dpo/margin_mean": 0.6196208000183105,
|
|
"margin_dpo/margin_std": 1.0106725692749023,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2811791383219955,
|
|
"fcm_dpo/beta": 1.8034803867340088,
|
|
"fcm_dpo/delta": -0.005721554160118103,
|
|
"fcm_dpo/margin": 0.5566083788871765,
|
|
"fcm_dpo/q_t": 0.3407011032104492,
|
|
"grad_norm": 465.7325744628906,
|
|
"learning_rate": 4.5287418106563354e-07,
|
|
"logits/chosen": 0.07695234566926956,
|
|
"logits/rejected": 0.04599303752183914,
|
|
"logps/chosen": -68.40414428710938,
|
|
"logps/ref_chosen": -67.42012786865234,
|
|
"logps/ref_rejected": -82.50968933105469,
|
|
"logps/rejected": -84.0503158569336,
|
|
"loss": 1.0608,
|
|
"margin_dpo/margin_mean": 0.5566080808639526,
|
|
"margin_dpo/margin_std": 0.8762015104293823,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.28269085411942557,
|
|
"fcm_dpo/beta": 1.7329106330871582,
|
|
"fcm_dpo/delta": -0.18373380601406097,
|
|
"fcm_dpo/margin": 0.6642186641693115,
|
|
"fcm_dpo/q_t": 0.32172733545303345,
|
|
"grad_norm": 530.1708374023438,
|
|
"learning_rate": 4.520986992917297e-07,
|
|
"logits/chosen": 0.14111362397670746,
|
|
"logits/rejected": 0.09435050934553146,
|
|
"logps/chosen": -76.567138671875,
|
|
"logps/ref_chosen": -75.52549743652344,
|
|
"logps/ref_rejected": -94.76289367675781,
|
|
"logps/rejected": -96.46875,
|
|
"loss": 1.1758,
|
|
"margin_dpo/margin_mean": 0.6642183661460876,
|
|
"margin_dpo/margin_std": 1.1894935369491577,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2842025699168556,
|
|
"fcm_dpo/beta": 1.744195818901062,
|
|
"fcm_dpo/delta": 0.005925014615058899,
|
|
"fcm_dpo/margin": 0.5699671506881714,
|
|
"fcm_dpo/q_t": 0.3193795382976532,
|
|
"grad_norm": 516.5187377929688,
|
|
"learning_rate": 4.5131756438276466e-07,
|
|
"logits/chosen": 0.151597797870636,
|
|
"logits/rejected": 0.11677326261997223,
|
|
"logps/chosen": -72.44251251220703,
|
|
"logps/ref_chosen": -71.52333068847656,
|
|
"logps/ref_rejected": -78.29949951171875,
|
|
"logps/rejected": -79.78865814208984,
|
|
"loss": 1.1632,
|
|
"margin_dpo/margin_mean": 0.56996750831604,
|
|
"margin_dpo/margin_std": 1.0281386375427246,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"fcm_dpo/beta": 1.6932382583618164,
|
|
"fcm_dpo/delta": -0.15139150619506836,
|
|
"fcm_dpo/margin": 0.6669385433197021,
|
|
"fcm_dpo/q_t": 0.30612969398498535,
|
|
"grad_norm": 391.1927795410156,
|
|
"learning_rate": 4.5053079818876096e-07,
|
|
"logits/chosen": 0.12143361568450928,
|
|
"logits/rejected": 0.12991394102573395,
|
|
"logps/chosen": -73.05665588378906,
|
|
"logps/ref_chosen": -72.17626953125,
|
|
"logps/ref_rejected": -75.26313781738281,
|
|
"logps/rejected": -76.81045532226562,
|
|
"loss": 0.8928,
|
|
"margin_dpo/margin_mean": 0.6669397950172424,
|
|
"margin_dpo/margin_std": 0.832381010055542,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 1.6875749826431274,
|
|
"fcm_dpo/delta": -0.09505629539489746,
|
|
"fcm_dpo/margin": 0.642721951007843,
|
|
"fcm_dpo/q_t": 0.32302048802375793,
|
|
"grad_norm": 424.7405090332031,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 0.19074919819831848,
|
|
"logits/rejected": 0.081771120429039,
|
|
"logps/chosen": -55.586639404296875,
|
|
"logps/ref_chosen": -54.624271392822266,
|
|
"logps/ref_rejected": -101.47068786621094,
|
|
"logps/rejected": -103.0757827758789,
|
|
"loss": 1.0135,
|
|
"margin_dpo/margin_mean": 0.6427220106124878,
|
|
"margin_dpo/margin_std": 0.9737996459007263,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2887377173091459,
|
|
"fcm_dpo/beta": 1.6515135765075684,
|
|
"fcm_dpo/delta": -0.04676612466573715,
|
|
"fcm_dpo/margin": 0.6305712461471558,
|
|
"fcm_dpo/q_t": 0.31939807534217834,
|
|
"grad_norm": 493.140625,
|
|
"learning_rate": 4.48940460132708e-07,
|
|
"logits/chosen": 0.1993117779493332,
|
|
"logits/rejected": 0.1801232248544693,
|
|
"logps/chosen": -74.03681945800781,
|
|
"logps/ref_chosen": -72.93251037597656,
|
|
"logps/ref_rejected": -89.95103454589844,
|
|
"logps/rejected": -91.68590545654297,
|
|
"loss": 1.047,
|
|
"margin_dpo/margin_mean": 0.6305709481239319,
|
|
"margin_dpo/margin_std": 0.9657796621322632,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.29024943310657597,
|
|
"fcm_dpo/beta": 1.7161282300949097,
|
|
"fcm_dpo/delta": 0.19982855021953583,
|
|
"fcm_dpo/margin": 0.47440922260284424,
|
|
"fcm_dpo/q_t": 0.3627406060695648,
|
|
"grad_norm": 372.83734130859375,
|
|
"learning_rate": 4.481369327558329e-07,
|
|
"logits/chosen": 0.16536489129066467,
|
|
"logits/rejected": 0.1462571918964386,
|
|
"logps/chosen": -55.00927734375,
|
|
"logps/ref_chosen": -54.001121520996094,
|
|
"logps/ref_rejected": -63.531551361083984,
|
|
"logps/rejected": -65.01411437988281,
|
|
"loss": 1.1335,
|
|
"margin_dpo/margin_mean": 0.4744090139865875,
|
|
"margin_dpo/margin_std": 0.8528145551681519,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.29176114890400606,
|
|
"fcm_dpo/beta": 1.6729130744934082,
|
|
"fcm_dpo/delta": -0.11725394427776337,
|
|
"fcm_dpo/margin": 0.6579139828681946,
|
|
"fcm_dpo/q_t": 0.3185346722602844,
|
|
"grad_norm": 344.4507751464844,
|
|
"learning_rate": 4.47327863063023e-07,
|
|
"logits/chosen": 0.09357620775699615,
|
|
"logits/rejected": 0.07541916519403458,
|
|
"logps/chosen": -57.7445182800293,
|
|
"logps/ref_chosen": -56.74927520751953,
|
|
"logps/ref_rejected": -58.80629348754883,
|
|
"logps/rejected": -60.459449768066406,
|
|
"loss": 0.9244,
|
|
"margin_dpo/margin_mean": 0.6579139232635498,
|
|
"margin_dpo/margin_std": 0.9124239087104797,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.29327286470143615,
|
|
"fcm_dpo/beta": 1.6954293251037598,
|
|
"fcm_dpo/delta": 0.1348000019788742,
|
|
"fcm_dpo/margin": 0.5165129899978638,
|
|
"fcm_dpo/q_t": 0.36234208941459656,
|
|
"grad_norm": 409.556396484375,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 0.1781534105539322,
|
|
"logits/rejected": 0.15505832433700562,
|
|
"logps/chosen": -57.62710189819336,
|
|
"logps/ref_chosen": -56.64944076538086,
|
|
"logps/ref_rejected": -69.98954772949219,
|
|
"logps/rejected": -71.48371887207031,
|
|
"loss": 1.1709,
|
|
"margin_dpo/margin_mean": 0.5165130496025085,
|
|
"margin_dpo/margin_std": 0.9614365100860596,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 1.7598028182983398,
|
|
"fcm_dpo/delta": 0.22711437940597534,
|
|
"fcm_dpo/margin": 0.4485671818256378,
|
|
"fcm_dpo/q_t": 0.3598003685474396,
|
|
"grad_norm": 496.6513977050781,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 0.09457789361476898,
|
|
"logits/rejected": 0.09252005815505981,
|
|
"logps/chosen": -71.65326690673828,
|
|
"logps/ref_chosen": -70.40977478027344,
|
|
"logps/ref_rejected": -74.39448547363281,
|
|
"logps/rejected": -76.08654022216797,
|
|
"loss": 1.2491,
|
|
"margin_dpo/margin_mean": 0.44856685400009155,
|
|
"margin_dpo/margin_std": 0.9060893058776855,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2962962962962963,
|
|
"fcm_dpo/beta": 1.8433669805526733,
|
|
"fcm_dpo/delta": 0.033393874764442444,
|
|
"fcm_dpo/margin": 0.5237653255462646,
|
|
"fcm_dpo/q_t": 0.34850770235061646,
|
|
"grad_norm": 410.0362854003906,
|
|
"learning_rate": 4.448676271745197e-07,
|
|
"logits/chosen": 0.1736685037612915,
|
|
"logits/rejected": 0.14138346910476685,
|
|
"logps/chosen": -60.27099609375,
|
|
"logps/ref_chosen": -59.227577209472656,
|
|
"logps/ref_rejected": -83.54757690429688,
|
|
"logps/rejected": -85.11476135253906,
|
|
"loss": 1.193,
|
|
"margin_dpo/margin_mean": 0.5237653255462646,
|
|
"margin_dpo/margin_std": 0.9456428289413452,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.29780801209372637,
|
|
"fcm_dpo/beta": 1.8353400230407715,
|
|
"fcm_dpo/delta": -0.15863925218582153,
|
|
"fcm_dpo/margin": 0.6141480803489685,
|
|
"fcm_dpo/q_t": 0.33761459589004517,
|
|
"grad_norm": 520.2391967773438,
|
|
"learning_rate": 4.440366160729392e-07,
|
|
"logits/chosen": 0.23479902744293213,
|
|
"logits/rejected": 0.1985134482383728,
|
|
"logps/chosen": -52.59620666503906,
|
|
"logps/ref_chosen": -51.52912902832031,
|
|
"logps/ref_rejected": -73.70631408691406,
|
|
"logps/rejected": -75.38753509521484,
|
|
"loss": 1.2601,
|
|
"margin_dpo/margin_mean": 0.6141484975814819,
|
|
"margin_dpo/margin_std": 1.1263779401779175,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.29931972789115646,
|
|
"fcm_dpo/beta": 1.6845048666000366,
|
|
"fcm_dpo/delta": -0.392536461353302,
|
|
"fcm_dpo/margin": 0.796295702457428,
|
|
"fcm_dpo/q_t": 0.2878139615058899,
|
|
"grad_norm": 392.1756591796875,
|
|
"learning_rate": 4.432001773500957e-07,
|
|
"logits/chosen": 0.19247442483901978,
|
|
"logits/rejected": 0.16175703704357147,
|
|
"logps/chosen": -60.77484893798828,
|
|
"logps/ref_chosen": -59.78268051147461,
|
|
"logps/ref_rejected": -72.24533081054688,
|
|
"logps/rejected": -74.03379821777344,
|
|
"loss": 0.8946,
|
|
"margin_dpo/margin_mean": 0.796296238899231,
|
|
"margin_dpo/margin_std": 1.0002985000610352,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.30083144368858655,
|
|
"fcm_dpo/beta": 1.6466844081878662,
|
|
"fcm_dpo/delta": 0.005560420453548431,
|
|
"fcm_dpo/margin": 0.6032355427742004,
|
|
"fcm_dpo/q_t": 0.3430374562740326,
|
|
"grad_norm": 385.1544494628906,
|
|
"learning_rate": 4.4235833440297856e-07,
|
|
"logits/chosen": 0.13208839297294617,
|
|
"logits/rejected": 0.0626702532172203,
|
|
"logps/chosen": -57.460811614990234,
|
|
"logps/ref_chosen": -56.38677215576172,
|
|
"logps/ref_rejected": -74.56779479980469,
|
|
"logps/rejected": -76.24507141113281,
|
|
"loss": 1.0833,
|
|
"margin_dpo/margin_mean": 0.6032348275184631,
|
|
"margin_dpo/margin_std": 0.970983624458313,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 1.5712354183197021,
|
|
"fcm_dpo/delta": -0.1618885099887848,
|
|
"fcm_dpo/margin": 0.7240477800369263,
|
|
"fcm_dpo/q_t": 0.327957421541214,
|
|
"grad_norm": 412.8072204589844,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 0.1844407021999359,
|
|
"logits/rejected": 0.12967121601104736,
|
|
"logps/chosen": -58.72947692871094,
|
|
"logps/ref_chosen": -57.82432556152344,
|
|
"logps/ref_rejected": -89.28246307373047,
|
|
"logps/rejected": -90.91166687011719,
|
|
"loss": 1.0309,
|
|
"margin_dpo/margin_mean": 0.7240477800369263,
|
|
"margin_dpo/margin_std": 1.1023731231689453,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 1.5800285339355469,
|
|
"eval_logits/chosen": 0.16845357418060303,
|
|
"eval_logits/rejected": 0.13373498618602753,
|
|
"eval_logps/chosen": -75.8633041381836,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -81.05730438232422,
|
|
"eval_loss": 0.6114334464073181,
|
|
"eval_margin_dpo/margin_mean": 0.5044752359390259,
|
|
"eval_margin_dpo/margin_std": 1.028841495513916,
|
|
"eval_runtime": 38.0264,
|
|
"eval_samples_per_second": 60.563,
|
|
"eval_steps_per_second": 1.893,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30385487528344673,
|
|
"fcm_dpo/beta": 1.5763221979141235,
|
|
"fcm_dpo/delta": 0.00995655357837677,
|
|
"fcm_dpo/margin": 0.6285428404808044,
|
|
"fcm_dpo/q_t": 0.33469393849372864,
|
|
"grad_norm": 368.6453552246094,
|
|
"learning_rate": 4.4065853017905953e-07,
|
|
"logits/chosen": 0.20987267792224884,
|
|
"logits/rejected": 0.17573854327201843,
|
|
"logps/chosen": -60.036251068115234,
|
|
"logps/ref_chosen": -58.999759674072266,
|
|
"logps/ref_rejected": -84.67575073242188,
|
|
"logps/rejected": -86.34077453613281,
|
|
"loss": 0.9778,
|
|
"margin_dpo/margin_mean": 0.6285424828529358,
|
|
"margin_dpo/margin_std": 0.8790519833564758,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.30536659108087677,
|
|
"fcm_dpo/beta": 1.5742418766021729,
|
|
"fcm_dpo/delta": -0.20220552384853363,
|
|
"fcm_dpo/margin": 0.7447031736373901,
|
|
"fcm_dpo/q_t": 0.306610643863678,
|
|
"grad_norm": 324.744384765625,
|
|
"learning_rate": 4.3980061644943575e-07,
|
|
"logits/chosen": 0.08034056425094604,
|
|
"logits/rejected": 0.031506434082984924,
|
|
"logps/chosen": -48.6182861328125,
|
|
"logps/ref_chosen": -47.660648345947266,
|
|
"logps/ref_rejected": -73.63249969482422,
|
|
"logps/rejected": -75.3348388671875,
|
|
"loss": 0.918,
|
|
"margin_dpo/margin_mean": 0.7447031736373901,
|
|
"margin_dpo/margin_std": 0.9296808242797852,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.30687830687830686,
|
|
"fcm_dpo/beta": 1.5995709896087646,
|
|
"fcm_dpo/delta": 0.29017671942710876,
|
|
"fcm_dpo/margin": 0.45493584871292114,
|
|
"fcm_dpo/q_t": 0.3810551166534424,
|
|
"grad_norm": 469.91229248046875,
|
|
"learning_rate": 4.3893739358856455e-07,
|
|
"logits/chosen": 0.19242677092552185,
|
|
"logits/rejected": 0.14212460815906525,
|
|
"logps/chosen": -63.31700134277344,
|
|
"logps/ref_chosen": -62.32553482055664,
|
|
"logps/ref_rejected": -99.37226104736328,
|
|
"logps/rejected": -100.81866455078125,
|
|
"loss": 1.2402,
|
|
"margin_dpo/margin_mean": 0.454935759305954,
|
|
"margin_dpo/margin_std": 0.9751724004745483,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.30839002267573695,
|
|
"fcm_dpo/beta": 1.5674870014190674,
|
|
"fcm_dpo/delta": -0.029434487223625183,
|
|
"fcm_dpo/margin": 0.6477770209312439,
|
|
"fcm_dpo/q_t": 0.3433707654476166,
|
|
"grad_norm": 368.2581787109375,
|
|
"learning_rate": 4.380688857426449e-07,
|
|
"logits/chosen": 0.0801263153553009,
|
|
"logits/rejected": 0.030903467908501625,
|
|
"logps/chosen": -51.68293762207031,
|
|
"logps/ref_chosen": -50.62931823730469,
|
|
"logps/ref_rejected": -66.60475158691406,
|
|
"logps/rejected": -68.30615234375,
|
|
"loss": 1.1195,
|
|
"margin_dpo/margin_mean": 0.6477770805358887,
|
|
"margin_dpo/margin_std": 1.0862679481506348,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 1.653544306755066,
|
|
"fcm_dpo/delta": 0.27683955430984497,
|
|
"fcm_dpo/margin": 0.450222909450531,
|
|
"fcm_dpo/q_t": 0.3771224319934845,
|
|
"grad_norm": 536.0993041992188,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 0.16879329085350037,
|
|
"logits/rejected": 0.12696264684200287,
|
|
"logps/chosen": -71.54264831542969,
|
|
"logps/ref_chosen": -70.3561782836914,
|
|
"logps/ref_rejected": -93.39848327636719,
|
|
"logps/rejected": -95.03517150878906,
|
|
"loss": 1.3927,
|
|
"margin_dpo/margin_mean": 0.4502222239971161,
|
|
"margin_dpo/margin_std": 1.1024572849273682,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31141345427059713,
|
|
"fcm_dpo/beta": 1.7623982429504395,
|
|
"fcm_dpo/delta": 0.12155643105506897,
|
|
"fcm_dpo/margin": 0.49780306220054626,
|
|
"fcm_dpo/q_t": 0.3561771512031555,
|
|
"grad_norm": 486.98162841796875,
|
|
"learning_rate": 4.363161124189387e-07,
|
|
"logits/chosen": 0.18706491589546204,
|
|
"logits/rejected": 0.17341138422489166,
|
|
"logps/chosen": -68.82878112792969,
|
|
"logps/ref_chosen": -67.64547729492188,
|
|
"logps/ref_rejected": -79.89584350585938,
|
|
"logps/rejected": -81.57694244384766,
|
|
"loss": 1.2778,
|
|
"margin_dpo/margin_mean": 0.497803270816803,
|
|
"margin_dpo/margin_std": 0.9891307353973389,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3129251700680272,
|
|
"fcm_dpo/beta": 1.7180607318878174,
|
|
"fcm_dpo/delta": 0.0706307590007782,
|
|
"fcm_dpo/margin": 0.5409280061721802,
|
|
"fcm_dpo/q_t": 0.35449251532554626,
|
|
"grad_norm": 428.377685546875,
|
|
"learning_rate": 4.3543189596998986e-07,
|
|
"logits/chosen": 0.10714876651763916,
|
|
"logits/rejected": 0.05734197795391083,
|
|
"logps/chosen": -68.83715057373047,
|
|
"logps/ref_chosen": -67.66419219970703,
|
|
"logps/ref_rejected": -85.10249328613281,
|
|
"logps/rejected": -86.81637573242188,
|
|
"loss": 1.1493,
|
|
"margin_dpo/margin_mean": 0.5409282445907593,
|
|
"margin_dpo/margin_std": 1.005875825881958,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3144368858654573,
|
|
"fcm_dpo/beta": 1.7887952327728271,
|
|
"fcm_dpo/delta": 0.09666138887405396,
|
|
"fcm_dpo/margin": 0.509753942489624,
|
|
"fcm_dpo/q_t": 0.3555990755558014,
|
|
"grad_norm": 445.3408508300781,
|
|
"learning_rate": 4.3454249259229664e-07,
|
|
"logits/chosen": 0.13969993591308594,
|
|
"logits/rejected": 0.11949601769447327,
|
|
"logps/chosen": -58.69970703125,
|
|
"logps/ref_chosen": -57.731712341308594,
|
|
"logps/ref_rejected": -74.19276428222656,
|
|
"logps/rejected": -75.6705093383789,
|
|
"loss": 1.2387,
|
|
"margin_dpo/margin_mean": 0.509753406047821,
|
|
"margin_dpo/margin_std": 0.9776418209075928,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.31594860166288735,
|
|
"fcm_dpo/beta": 1.7703063488006592,
|
|
"fcm_dpo/delta": -0.3099019527435303,
|
|
"fcm_dpo/margin": 0.7123583555221558,
|
|
"fcm_dpo/q_t": 0.3118298351764679,
|
|
"grad_norm": 475.9559020996094,
|
|
"learning_rate": 4.336479271643833e-07,
|
|
"logits/chosen": 0.08468753099441528,
|
|
"logits/rejected": 0.04272126033902168,
|
|
"logps/chosen": -69.55844116210938,
|
|
"logps/ref_chosen": -68.55007934570312,
|
|
"logps/ref_rejected": -87.90541076660156,
|
|
"logps/rejected": -89.62612915039062,
|
|
"loss": 1.059,
|
|
"margin_dpo/margin_mean": 0.7123589515686035,
|
|
"margin_dpo/margin_std": 1.0521396398544312,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 1.604590892791748,
|
|
"fcm_dpo/delta": -0.3447558283805847,
|
|
"fcm_dpo/margin": 0.8102937340736389,
|
|
"fcm_dpo/q_t": 0.31004101037979126,
|
|
"grad_norm": 367.3415832519531,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 0.17499999701976776,
|
|
"logits/rejected": 0.10814614593982697,
|
|
"logps/chosen": -58.340545654296875,
|
|
"logps/ref_chosen": -57.268272399902344,
|
|
"logps/ref_rejected": -85.72807312011719,
|
|
"logps/rejected": -87.61064147949219,
|
|
"loss": 0.9243,
|
|
"margin_dpo/margin_mean": 0.8102930784225464,
|
|
"margin_dpo/margin_std": 1.078457236289978,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.31897203325774753,
|
|
"fcm_dpo/beta": 1.5751144886016846,
|
|
"fcm_dpo/delta": -0.008975658565759659,
|
|
"fcm_dpo/margin": 0.6399465799331665,
|
|
"fcm_dpo/q_t": 0.33030977845191956,
|
|
"grad_norm": 443.0260009765625,
|
|
"learning_rate": 4.3184341039326217e-07,
|
|
"logits/chosen": 0.1592363566160202,
|
|
"logits/rejected": 0.09597043693065643,
|
|
"logps/chosen": -54.56357192993164,
|
|
"logps/ref_chosen": -53.640708923339844,
|
|
"logps/ref_rejected": -93.0387954711914,
|
|
"logps/rejected": -94.60160827636719,
|
|
"loss": 0.9671,
|
|
"margin_dpo/margin_mean": 0.6399465799331665,
|
|
"margin_dpo/margin_std": 0.9168812036514282,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.3204837490551776,
|
|
"fcm_dpo/beta": 1.564136028289795,
|
|
"fcm_dpo/delta": -0.06847534328699112,
|
|
"fcm_dpo/margin": 0.678282618522644,
|
|
"fcm_dpo/q_t": 0.3206981420516968,
|
|
"grad_norm": 370.3670349121094,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 0.17273937165737152,
|
|
"logits/rejected": 0.11869757622480392,
|
|
"logps/chosen": -58.41630554199219,
|
|
"logps/ref_chosen": -57.36674499511719,
|
|
"logps/ref_rejected": -79.89643096923828,
|
|
"logps/rejected": -81.62427520751953,
|
|
"loss": 1.027,
|
|
"margin_dpo/margin_mean": 0.678282618522644,
|
|
"margin_dpo/margin_std": 1.0015318393707275,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.3219954648526077,
|
|
"fcm_dpo/beta": 1.527937889099121,
|
|
"fcm_dpo/delta": -0.042714398354291916,
|
|
"fcm_dpo/margin": 0.6786133050918579,
|
|
"fcm_dpo/q_t": 0.3305957615375519,
|
|
"grad_norm": 391.22064208984375,
|
|
"learning_rate": 4.3001854756006724e-07,
|
|
"logits/chosen": 0.15932638943195343,
|
|
"logits/rejected": 0.138889878988266,
|
|
"logps/chosen": -66.10162353515625,
|
|
"logps/ref_chosen": -65.22111511230469,
|
|
"logps/ref_rejected": -80.1810302734375,
|
|
"logps/rejected": -81.74015045166016,
|
|
"loss": 1.0605,
|
|
"margin_dpo/margin_mean": 0.6786131858825684,
|
|
"margin_dpo/margin_std": 1.066502332687378,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3235071806500378,
|
|
"fcm_dpo/beta": 1.542710781097412,
|
|
"fcm_dpo/delta": -0.057289645075798035,
|
|
"fcm_dpo/margin": 0.6799356937408447,
|
|
"fcm_dpo/q_t": 0.33246564865112305,
|
|
"grad_norm": 433.1566467285156,
|
|
"learning_rate": 4.290985500881143e-07,
|
|
"logits/chosen": 0.06008949503302574,
|
|
"logits/rejected": 0.0402056947350502,
|
|
"logps/chosen": -62.26348114013672,
|
|
"logps/ref_chosen": -61.292327880859375,
|
|
"logps/ref_rejected": -67.69841003417969,
|
|
"logps/rejected": -69.3494873046875,
|
|
"loss": 1.0116,
|
|
"margin_dpo/margin_mean": 0.679936408996582,
|
|
"margin_dpo/margin_std": 1.0450165271759033,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 1.5210988521575928,
|
|
"fcm_dpo/delta": -0.0631207600235939,
|
|
"fcm_dpo/margin": 0.6934947371482849,
|
|
"fcm_dpo/q_t": 0.33989161252975464,
|
|
"grad_norm": 409.9744567871094,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 0.08377814292907715,
|
|
"logits/rejected": 0.010123915039002895,
|
|
"logps/chosen": -64.82623291015625,
|
|
"logps/ref_chosen": -63.869136810302734,
|
|
"logps/ref_rejected": -98.7657241821289,
|
|
"logps/rejected": -100.41632080078125,
|
|
"loss": 1.0766,
|
|
"margin_dpo/margin_mean": 0.6934951543807983,
|
|
"margin_dpo/margin_std": 1.1064403057098389,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.32653061224489793,
|
|
"fcm_dpo/beta": 1.4754486083984375,
|
|
"fcm_dpo/delta": -0.2396281659603119,
|
|
"fcm_dpo/margin": 0.8188655972480774,
|
|
"fcm_dpo/q_t": 0.34051093459129333,
|
|
"grad_norm": 398.4416198730469,
|
|
"learning_rate": 4.2724355170431247e-07,
|
|
"logits/chosen": 0.16970112919807434,
|
|
"logits/rejected": 0.10684916377067566,
|
|
"logps/chosen": -68.89656829833984,
|
|
"logps/ref_chosen": -67.824951171875,
|
|
"logps/ref_rejected": -96.40231323242188,
|
|
"logps/rejected": -98.29280090332031,
|
|
"loss": 1.1031,
|
|
"margin_dpo/margin_mean": 0.8188657760620117,
|
|
"margin_dpo/margin_std": 1.4882557392120361,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.328042328042328,
|
|
"fcm_dpo/beta": 1.344929575920105,
|
|
"fcm_dpo/delta": -0.35066401958465576,
|
|
"fcm_dpo/margin": 0.9668929576873779,
|
|
"fcm_dpo/q_t": 0.28799083828926086,
|
|
"grad_norm": 300.48565673828125,
|
|
"learning_rate": 4.26308602680756e-07,
|
|
"logits/chosen": 0.11938208341598511,
|
|
"logits/rejected": 0.03593681752681732,
|
|
"logps/chosen": -61.565162658691406,
|
|
"logps/ref_chosen": -60.5049934387207,
|
|
"logps/ref_rejected": -84.26618194580078,
|
|
"logps/rejected": -86.29324340820312,
|
|
"loss": 0.8155,
|
|
"margin_dpo/margin_mean": 0.966893196105957,
|
|
"margin_dpo/margin_std": 1.1881346702575684,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3295540438397581,
|
|
"fcm_dpo/beta": 1.4142457246780396,
|
|
"fcm_dpo/delta": 0.2700718343257904,
|
|
"fcm_dpo/margin": 0.5216431617736816,
|
|
"fcm_dpo/q_t": 0.3781411647796631,
|
|
"grad_norm": 394.3163757324219,
|
|
"learning_rate": 4.253687219265803e-07,
|
|
"logits/chosen": 0.024805322289466858,
|
|
"logits/rejected": 0.01834661327302456,
|
|
"logps/chosen": -71.73900604248047,
|
|
"logps/ref_chosen": -70.59431457519531,
|
|
"logps/ref_rejected": -73.89038848876953,
|
|
"logps/rejected": -75.55671691894531,
|
|
"loss": 1.2642,
|
|
"margin_dpo/margin_mean": 0.5216437578201294,
|
|
"margin_dpo/margin_std": 1.1325141191482544,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.3310657596371882,
|
|
"fcm_dpo/beta": 1.4622104167938232,
|
|
"fcm_dpo/delta": 0.27110588550567627,
|
|
"fcm_dpo/margin": 0.5126116275787354,
|
|
"fcm_dpo/q_t": 0.36937472224235535,
|
|
"grad_norm": 361.056884765625,
|
|
"learning_rate": 4.2442393573227043e-07,
|
|
"logits/chosen": 0.08768868446350098,
|
|
"logits/rejected": 0.05643084645271301,
|
|
"logps/chosen": -61.44779586791992,
|
|
"logps/ref_chosen": -60.490943908691406,
|
|
"logps/ref_rejected": -75.85001373291016,
|
|
"logps/rejected": -77.31947326660156,
|
|
"loss": 1.0874,
|
|
"margin_dpo/margin_mean": 0.512610912322998,
|
|
"margin_dpo/margin_std": 0.8701074123382568,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 1.4802911281585693,
|
|
"fcm_dpo/delta": 0.026991277933120728,
|
|
"fcm_dpo/margin": 0.6585407257080078,
|
|
"fcm_dpo/q_t": 0.33476772904396057,
|
|
"grad_norm": 283.6431579589844,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 0.17591653764247894,
|
|
"logits/rejected": 0.13129651546478271,
|
|
"logps/chosen": -46.09846496582031,
|
|
"logps/ref_chosen": -45.013397216796875,
|
|
"logps/ref_rejected": -70.49369812011719,
|
|
"logps/rejected": -72.23731231689453,
|
|
"loss": 0.9934,
|
|
"margin_dpo/margin_mean": 0.6585406064987183,
|
|
"margin_dpo/margin_std": 0.9634271264076233,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3340891912320484,
|
|
"fcm_dpo/beta": 1.486309289932251,
|
|
"fcm_dpo/delta": -0.12030621618032455,
|
|
"fcm_dpo/margin": 0.7435950636863708,
|
|
"fcm_dpo/q_t": 0.32344067096710205,
|
|
"grad_norm": 338.2693176269531,
|
|
"learning_rate": 4.22519752870528e-07,
|
|
"logits/chosen": 0.16085302829742432,
|
|
"logits/rejected": 0.11432051658630371,
|
|
"logps/chosen": -60.045074462890625,
|
|
"logps/ref_chosen": -59.09584045410156,
|
|
"logps/ref_rejected": -88.64388275146484,
|
|
"logps/rejected": -90.33671569824219,
|
|
"loss": 0.9562,
|
|
"margin_dpo/margin_mean": 0.7435950636863708,
|
|
"margin_dpo/margin_std": 1.0672008991241455,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.3356009070294785,
|
|
"fcm_dpo/beta": 1.4272222518920898,
|
|
"fcm_dpo/delta": -0.1927495002746582,
|
|
"fcm_dpo/margin": 0.8200865983963013,
|
|
"fcm_dpo/q_t": 0.2971838712692261,
|
|
"grad_norm": 337.138671875,
|
|
"learning_rate": 4.2156040946718343e-07,
|
|
"logits/chosen": 0.16604246199131012,
|
|
"logits/rejected": 0.10780028998851776,
|
|
"logps/chosen": -57.0385627746582,
|
|
"logps/ref_chosen": -55.9976921081543,
|
|
"logps/ref_rejected": -111.94727325439453,
|
|
"logps/rejected": -113.8082275390625,
|
|
"loss": 0.856,
|
|
"margin_dpo/margin_mean": 0.8200874328613281,
|
|
"margin_dpo/margin_std": 0.9770439863204956,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3371126228269085,
|
|
"fcm_dpo/beta": 1.3298912048339844,
|
|
"fcm_dpo/delta": -0.2783888578414917,
|
|
"fcm_dpo/margin": 0.9309906363487244,
|
|
"fcm_dpo/q_t": 0.2817830443382263,
|
|
"grad_norm": 263.7952880859375,
|
|
"learning_rate": 4.2059626715039065e-07,
|
|
"logits/chosen": 0.18012914061546326,
|
|
"logits/rejected": 0.13743728399276733,
|
|
"logps/chosen": -60.831424713134766,
|
|
"logps/ref_chosen": -59.891422271728516,
|
|
"logps/ref_rejected": -86.28954315185547,
|
|
"logps/rejected": -88.16053771972656,
|
|
"loss": 0.7994,
|
|
"margin_dpo/margin_mean": 0.9309903383255005,
|
|
"margin_dpo/margin_std": 0.9911828637123108,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.3386243386243386,
|
|
"fcm_dpo/beta": 1.3959991931915283,
|
|
"fcm_dpo/delta": 0.3523348867893219,
|
|
"fcm_dpo/margin": 0.48118603229522705,
|
|
"fcm_dpo/q_t": 0.3791411519050598,
|
|
"grad_norm": 364.8034362792969,
|
|
"learning_rate": 4.1962735288928304e-07,
|
|
"logits/chosen": 0.19306407868862152,
|
|
"logits/rejected": 0.17875471711158752,
|
|
"logps/chosen": -65.12818908691406,
|
|
"logps/ref_chosen": -64.04463195800781,
|
|
"logps/ref_rejected": -75.05450439453125,
|
|
"logps/rejected": -76.61923217773438,
|
|
"loss": 1.0923,
|
|
"margin_dpo/margin_mean": 0.4811859726905823,
|
|
"margin_dpo/margin_std": 0.8308462500572205,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 1.395388126373291,
|
|
"fcm_dpo/delta": -0.05797035992145538,
|
|
"fcm_dpo/margin": 0.7516753673553467,
|
|
"fcm_dpo/q_t": 0.3332204222679138,
|
|
"grad_norm": 468.0265197753906,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 0.1659896969795227,
|
|
"logits/rejected": 0.07978636771440506,
|
|
"logps/chosen": -67.14776611328125,
|
|
"logps/ref_chosen": -66.0958251953125,
|
|
"logps/ref_rejected": -97.68675231933594,
|
|
"logps/rejected": -99.49036407470703,
|
|
"loss": 1.0067,
|
|
"margin_dpo/margin_mean": 0.7516759634017944,
|
|
"margin_dpo/margin_std": 1.1150200366973877,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3416477702191988,
|
|
"fcm_dpo/beta": 1.4121769666671753,
|
|
"fcm_dpo/delta": 0.040757764130830765,
|
|
"fcm_dpo/margin": 0.6820341348648071,
|
|
"fcm_dpo/q_t": 0.3408370018005371,
|
|
"grad_norm": 292.9568176269531,
|
|
"learning_rate": 4.176753170773052e-07,
|
|
"logits/chosen": 0.18655280768871307,
|
|
"logits/rejected": 0.15081270039081573,
|
|
"logps/chosen": -52.478660583496094,
|
|
"logps/ref_chosen": -51.4168701171875,
|
|
"logps/ref_rejected": -66.30068969726562,
|
|
"logps/rejected": -68.04450988769531,
|
|
"loss": 1.0394,
|
|
"margin_dpo/margin_mean": 0.6820334792137146,
|
|
"margin_dpo/margin_std": 1.0649542808532715,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3431594860166289,
|
|
"fcm_dpo/beta": 1.4467318058013916,
|
|
"fcm_dpo/delta": 0.14849498867988586,
|
|
"fcm_dpo/margin": 0.5975882411003113,
|
|
"fcm_dpo/q_t": 0.3471581041812897,
|
|
"grad_norm": 412.5190734863281,
|
|
"learning_rate": 4.166922501290729e-07,
|
|
"logits/chosen": 0.20183053612709045,
|
|
"logits/rejected": 0.17074134945869446,
|
|
"logps/chosen": -59.01850891113281,
|
|
"logps/ref_chosen": -57.989776611328125,
|
|
"logps/ref_rejected": -75.05464172363281,
|
|
"logps/rejected": -76.68096160888672,
|
|
"loss": 1.1633,
|
|
"margin_dpo/margin_mean": 0.5975878238677979,
|
|
"margin_dpo/margin_std": 1.1138098239898682,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.34467120181405897,
|
|
"fcm_dpo/beta": 1.4542537927627563,
|
|
"fcm_dpo/delta": -0.03622462600469589,
|
|
"fcm_dpo/margin": 0.7099840641021729,
|
|
"fcm_dpo/q_t": 0.3378611207008362,
|
|
"grad_norm": 351.0310974121094,
|
|
"learning_rate": 4.1570452044027405e-07,
|
|
"logits/chosen": 0.17106792330741882,
|
|
"logits/rejected": 0.11330675333738327,
|
|
"logps/chosen": -56.669921875,
|
|
"logps/ref_chosen": -55.55936813354492,
|
|
"logps/ref_rejected": -77.02364349365234,
|
|
"logps/rejected": -78.84417724609375,
|
|
"loss": 1.1439,
|
|
"margin_dpo/margin_mean": 0.709984540939331,
|
|
"margin_dpo/margin_std": 1.241539716720581,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.34618291761148906,
|
|
"fcm_dpo/beta": 1.4833769798278809,
|
|
"fcm_dpo/delta": 0.18256068229675293,
|
|
"fcm_dpo/margin": 0.5615620613098145,
|
|
"fcm_dpo/q_t": 0.3450517952442169,
|
|
"grad_norm": 629.395263671875,
|
|
"learning_rate": 4.147121556398312e-07,
|
|
"logits/chosen": 0.23741164803504944,
|
|
"logits/rejected": 0.19696751236915588,
|
|
"logps/chosen": -51.810546875,
|
|
"logps/ref_chosen": -50.79466247558594,
|
|
"logps/ref_rejected": -78.4474105834961,
|
|
"logps/rejected": -80.02485656738281,
|
|
"loss": 1.0995,
|
|
"margin_dpo/margin_mean": 0.5615620613098145,
|
|
"margin_dpo/margin_std": 0.9383633732795715,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 1.5409061908721924,
|
|
"fcm_dpo/delta": 0.06196488440036774,
|
|
"fcm_dpo/margin": 0.6093405485153198,
|
|
"fcm_dpo/q_t": 0.35204917192459106,
|
|
"grad_norm": 399.1847839355469,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 0.1482779085636139,
|
|
"logits/rejected": 0.14588508009910583,
|
|
"logps/chosen": -57.8225212097168,
|
|
"logps/ref_chosen": -56.729225158691406,
|
|
"logps/ref_rejected": -62.99180603027344,
|
|
"logps/rejected": -64.69444274902344,
|
|
"loss": 1.1512,
|
|
"margin_dpo/margin_mean": 0.6093416213989258,
|
|
"margin_dpo/margin_std": 1.131590485572815,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"fcm_dpo/beta": 1.4233132600784302,
|
|
"fcm_dpo/delta": -0.39863085746765137,
|
|
"fcm_dpo/margin": 0.9415854215621948,
|
|
"fcm_dpo/q_t": 0.27584952116012573,
|
|
"grad_norm": 315.82147216796875,
|
|
"learning_rate": 4.1271363186719835e-07,
|
|
"logits/chosen": 0.11343254894018173,
|
|
"logits/rejected": 0.09590326249599457,
|
|
"logps/chosen": -73.67927551269531,
|
|
"logps/ref_chosen": -72.59709930419922,
|
|
"logps/ref_rejected": -86.2322998046875,
|
|
"logps/rejected": -88.25605773925781,
|
|
"loss": 0.7891,
|
|
"margin_dpo/margin_mean": 0.9415853023529053,
|
|
"margin_dpo/margin_std": 1.024916172027588,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3507180650037793,
|
|
"fcm_dpo/beta": 1.3663169145584106,
|
|
"fcm_dpo/delta": -0.1273672878742218,
|
|
"fcm_dpo/margin": 0.8124350309371948,
|
|
"fcm_dpo/q_t": 0.33132317662239075,
|
|
"grad_norm": 345.0347595214844,
|
|
"learning_rate": 4.1170752879801436e-07,
|
|
"logits/chosen": 0.14546144008636475,
|
|
"logits/rejected": 0.12237675487995148,
|
|
"logps/chosen": -69.10570526123047,
|
|
"logps/ref_chosen": -68.1185302734375,
|
|
"logps/ref_rejected": -83.79415893554688,
|
|
"logps/rejected": -85.5937728881836,
|
|
"loss": 1.0339,
|
|
"margin_dpo/margin_mean": 0.812435507774353,
|
|
"margin_dpo/margin_std": 1.2407793998718262,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.35222978080120937,
|
|
"fcm_dpo/beta": 1.4025087356567383,
|
|
"fcm_dpo/delta": 0.2927402853965759,
|
|
"fcm_dpo/margin": 0.5172353386878967,
|
|
"fcm_dpo/q_t": 0.3894173204898834,
|
|
"grad_norm": 380.82867431640625,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": 0.10903730243444443,
|
|
"logits/rejected": 0.06897353380918503,
|
|
"logps/chosen": -56.4796142578125,
|
|
"logps/ref_chosen": -55.070152282714844,
|
|
"logps/ref_rejected": -66.61845397949219,
|
|
"logps/rejected": -68.5451431274414,
|
|
"loss": 1.212,
|
|
"margin_dpo/margin_mean": 0.5172350406646729,
|
|
"margin_dpo/margin_std": 1.0877723693847656,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.35374149659863946,
|
|
"fcm_dpo/beta": 1.5282173156738281,
|
|
"fcm_dpo/delta": 0.27392610907554626,
|
|
"fcm_dpo/margin": 0.48579320311546326,
|
|
"fcm_dpo/q_t": 0.3870254158973694,
|
|
"grad_norm": 388.5447082519531,
|
|
"learning_rate": 4.09681781007452e-07,
|
|
"logits/chosen": 0.09753985702991486,
|
|
"logits/rejected": 0.08481541275978088,
|
|
"logps/chosen": -57.07015609741211,
|
|
"logps/ref_chosen": -55.92589569091797,
|
|
"logps/ref_rejected": -51.11608123779297,
|
|
"logps/rejected": -52.746131896972656,
|
|
"loss": 1.2257,
|
|
"margin_dpo/margin_mean": 0.48579323291778564,
|
|
"margin_dpo/margin_std": 1.005543828010559,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 1.4440486431121826,
|
|
"fcm_dpo/delta": -0.3470792770385742,
|
|
"fcm_dpo/margin": 0.8979411721229553,
|
|
"fcm_dpo/q_t": 0.2706488370895386,
|
|
"grad_norm": 331.3047180175781,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 0.18871155381202698,
|
|
"logits/rejected": 0.17248067259788513,
|
|
"logps/chosen": -65.49934387207031,
|
|
"logps/ref_chosen": -64.53972625732422,
|
|
"logps/ref_rejected": -77.69151306152344,
|
|
"logps/rejected": -79.549072265625,
|
|
"loss": 0.7827,
|
|
"margin_dpo/margin_mean": 0.897940993309021,
|
|
"margin_dpo/margin_std": 0.9550020694732666,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.35676492819349964,
|
|
"fcm_dpo/beta": 1.4077339172363281,
|
|
"fcm_dpo/delta": -0.08833800256252289,
|
|
"fcm_dpo/margin": 0.7657995223999023,
|
|
"fcm_dpo/q_t": 0.34863966703414917,
|
|
"grad_norm": 387.498291015625,
|
|
"learning_rate": 4.076381667711306e-07,
|
|
"logits/chosen": 0.10846032202243805,
|
|
"logits/rejected": 0.09852010011672974,
|
|
"logps/chosen": -72.46954345703125,
|
|
"logps/ref_chosen": -71.15473937988281,
|
|
"logps/ref_rejected": -84.88541412353516,
|
|
"logps/rejected": -86.96601867675781,
|
|
"loss": 1.1004,
|
|
"margin_dpo/margin_mean": 0.7657992839813232,
|
|
"margin_dpo/margin_std": 1.3017526865005493,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.35827664399092973,
|
|
"fcm_dpo/beta": 1.3855292797088623,
|
|
"fcm_dpo/delta": -0.0012986212968826294,
|
|
"fcm_dpo/margin": 0.7191513776779175,
|
|
"fcm_dpo/q_t": 0.3367508053779602,
|
|
"grad_norm": 383.1600036621094,
|
|
"learning_rate": 4.066097311132753e-07,
|
|
"logits/chosen": 0.20629596710205078,
|
|
"logits/rejected": 0.19376662373542786,
|
|
"logps/chosen": -77.21894836425781,
|
|
"logps/ref_chosen": -76.14201354980469,
|
|
"logps/ref_rejected": -80.88479614257812,
|
|
"logps/rejected": -82.6808853149414,
|
|
"loss": 1.1096,
|
|
"margin_dpo/margin_mean": 0.7191513180732727,
|
|
"margin_dpo/margin_std": 1.1922106742858887,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.35978835978835977,
|
|
"fcm_dpo/beta": 1.488319993019104,
|
|
"fcm_dpo/delta": 0.24613891541957855,
|
|
"fcm_dpo/margin": 0.5112044811248779,
|
|
"fcm_dpo/q_t": 0.3709501624107361,
|
|
"grad_norm": 2477.388916015625,
|
|
"learning_rate": 4.0557691474458414e-07,
|
|
"logits/chosen": 0.13462495803833008,
|
|
"logits/rejected": 0.11732495576143265,
|
|
"logps/chosen": -70.04851531982422,
|
|
"logps/ref_chosen": -68.88484954833984,
|
|
"logps/ref_rejected": -75.8946304321289,
|
|
"logps/rejected": -77.56949615478516,
|
|
"loss": 1.4557,
|
|
"margin_dpo/margin_mean": 0.5112046003341675,
|
|
"margin_dpo/margin_std": 1.32561194896698,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.36130007558578986,
|
|
"fcm_dpo/beta": 1.4520866870880127,
|
|
"fcm_dpo/delta": -0.12432458996772766,
|
|
"fcm_dpo/margin": 0.7647981643676758,
|
|
"fcm_dpo/q_t": 0.31985604763031006,
|
|
"grad_norm": 413.1396789550781,
|
|
"learning_rate": 4.045397465551513e-07,
|
|
"logits/chosen": 0.19768695533275604,
|
|
"logits/rejected": 0.11818195134401321,
|
|
"logps/chosen": -58.036109924316406,
|
|
"logps/ref_chosen": -56.771827697753906,
|
|
"logps/ref_rejected": -116.23050689697266,
|
|
"logps/rejected": -118.25959014892578,
|
|
"loss": 1.0398,
|
|
"margin_dpo/margin_mean": 0.764798641204834,
|
|
"margin_dpo/margin_std": 1.1497983932495117,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 1.4255015850067139,
|
|
"fcm_dpo/delta": -0.19121024012565613,
|
|
"fcm_dpo/margin": 0.8184474110603333,
|
|
"fcm_dpo/q_t": 0.30751824378967285,
|
|
"grad_norm": 311.9652404785156,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 0.14041496813297272,
|
|
"logits/rejected": 0.07365534454584122,
|
|
"logps/chosen": -54.64623260498047,
|
|
"logps/ref_chosen": -53.35411071777344,
|
|
"logps/ref_rejected": -80.12019348144531,
|
|
"logps/rejected": -82.23076629638672,
|
|
"loss": 0.9905,
|
|
"margin_dpo/margin_mean": 0.8184475898742676,
|
|
"margin_dpo/margin_std": 1.1477313041687012,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.36432350718065004,
|
|
"fcm_dpo/beta": 1.5252119302749634,
|
|
"fcm_dpo/delta": 0.4934987425804138,
|
|
"fcm_dpo/margin": 0.336672842502594,
|
|
"fcm_dpo/q_t": 0.4018644094467163,
|
|
"grad_norm": 527.2469482421875,
|
|
"learning_rate": 4.0245247088227377e-07,
|
|
"logits/chosen": 0.12555167078971863,
|
|
"logits/rejected": 0.09838816523551941,
|
|
"logps/chosen": -73.14974975585938,
|
|
"logps/ref_chosen": -71.89541625976562,
|
|
"logps/ref_rejected": -83.03492736816406,
|
|
"logps/rejected": -84.62593078613281,
|
|
"loss": 1.4036,
|
|
"margin_dpo/margin_mean": 0.33667343854904175,
|
|
"margin_dpo/margin_std": 0.9868639707565308,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.36583522297808013,
|
|
"fcm_dpo/beta": 1.443713903427124,
|
|
"fcm_dpo/delta": -0.2312009036540985,
|
|
"fcm_dpo/margin": 0.8218022584915161,
|
|
"fcm_dpo/q_t": 0.31463247537612915,
|
|
"grad_norm": 314.4766845703125,
|
|
"learning_rate": 4.0140242178441665e-07,
|
|
"logits/chosen": 0.1157296746969223,
|
|
"logits/rejected": 0.09765278548002243,
|
|
"logps/chosen": -58.899356842041016,
|
|
"logps/ref_chosen": -57.927433013916016,
|
|
"logps/ref_rejected": -67.838623046875,
|
|
"logps/rejected": -69.6323471069336,
|
|
"loss": 0.9264,
|
|
"margin_dpo/margin_mean": 0.8218023180961609,
|
|
"margin_dpo/margin_std": 1.0703468322753906,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3673469387755102,
|
|
"fcm_dpo/beta": 1.4689741134643555,
|
|
"fcm_dpo/delta": 0.049783095717430115,
|
|
"fcm_dpo/margin": 0.6500656604766846,
|
|
"fcm_dpo/q_t": 0.3428027033805847,
|
|
"grad_norm": 388.419921875,
|
|
"learning_rate": 4.003481376353596e-07,
|
|
"logits/chosen": 0.10413776338100433,
|
|
"logits/rejected": 0.09902875125408173,
|
|
"logps/chosen": -75.4229965209961,
|
|
"logps/ref_chosen": -74.27667236328125,
|
|
"logps/ref_rejected": -73.24340057373047,
|
|
"logps/rejected": -75.03977966308594,
|
|
"loss": 1.1186,
|
|
"margin_dpo/margin_mean": 0.650065541267395,
|
|
"margin_dpo/margin_std": 1.1026396751403809,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.3688586545729403,
|
|
"fcm_dpo/beta": 1.4141755104064941,
|
|
"fcm_dpo/delta": -0.2628709375858307,
|
|
"fcm_dpo/margin": 0.8689752817153931,
|
|
"fcm_dpo/q_t": 0.28472983837127686,
|
|
"grad_norm": 275.4247131347656,
|
|
"learning_rate": 3.9928964792569654e-07,
|
|
"logits/chosen": 0.15633343160152435,
|
|
"logits/rejected": 0.09156134724617004,
|
|
"logps/chosen": -54.57572555541992,
|
|
"logps/ref_chosen": -53.36390686035156,
|
|
"logps/ref_rejected": -71.10276794433594,
|
|
"logps/rejected": -73.18356323242188,
|
|
"loss": 0.7643,
|
|
"margin_dpo/margin_mean": 0.8689748048782349,
|
|
"margin_dpo/margin_std": 0.9518204927444458,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 1.3580291271209717,
|
|
"fcm_dpo/delta": -0.23086267709732056,
|
|
"fcm_dpo/margin": 0.8861613273620605,
|
|
"fcm_dpo/q_t": 0.3000994920730591,
|
|
"grad_norm": 464.6539611816406,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 0.16047267615795135,
|
|
"logits/rejected": 0.14088091254234314,
|
|
"logps/chosen": -72.37004089355469,
|
|
"logps/ref_chosen": -71.19510650634766,
|
|
"logps/ref_rejected": -80.76235961914062,
|
|
"logps/rejected": -82.82345581054688,
|
|
"loss": 0.9896,
|
|
"margin_dpo/margin_mean": 0.8861616849899292,
|
|
"margin_dpo/margin_std": 1.2600171566009521,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.37188208616780044,
|
|
"fcm_dpo/beta": 1.2877942323684692,
|
|
"fcm_dpo/delta": -0.19463737308979034,
|
|
"fcm_dpo/margin": 0.9072257280349731,
|
|
"fcm_dpo/q_t": 0.3076491057872772,
|
|
"grad_norm": 311.3534240722656,
|
|
"learning_rate": 3.971601703742932e-07,
|
|
"logits/chosen": 0.15642526745796204,
|
|
"logits/rejected": 0.11311867088079453,
|
|
"logps/chosen": -72.98123168945312,
|
|
"logps/ref_chosen": -71.62104797363281,
|
|
"logps/ref_rejected": -94.03392028808594,
|
|
"logps/rejected": -96.30133056640625,
|
|
"loss": 0.8849,
|
|
"margin_dpo/margin_mean": 0.9072257876396179,
|
|
"margin_dpo/margin_std": 1.1439390182495117,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.37339380196523053,
|
|
"fcm_dpo/beta": 1.385887622833252,
|
|
"fcm_dpo/delta": 0.3913596272468567,
|
|
"fcm_dpo/margin": 0.44914084672927856,
|
|
"fcm_dpo/q_t": 0.3937835395336151,
|
|
"grad_norm": 441.7315673828125,
|
|
"learning_rate": 3.960892420986177e-07,
|
|
"logits/chosen": 0.14804767072200775,
|
|
"logits/rejected": 0.13864608108997345,
|
|
"logps/chosen": -81.47723388671875,
|
|
"logps/ref_chosen": -80.02254486083984,
|
|
"logps/ref_rejected": -89.22705841064453,
|
|
"logps/rejected": -91.13088989257812,
|
|
"loss": 1.3732,
|
|
"margin_dpo/margin_mean": 0.44914010167121887,
|
|
"margin_dpo/margin_std": 1.1773467063903809,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3749055177626606,
|
|
"fcm_dpo/beta": 1.4095741510391235,
|
|
"fcm_dpo/delta": 0.022829867899417877,
|
|
"fcm_dpo/margin": 0.6935802698135376,
|
|
"fcm_dpo/q_t": 0.3401643633842468,
|
|
"grad_norm": 382.5201110839844,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 0.1225530132651329,
|
|
"logits/rejected": 0.14110150933265686,
|
|
"logps/chosen": -66.71062469482422,
|
|
"logps/ref_chosen": -65.37796020507812,
|
|
"logps/ref_rejected": -61.365787506103516,
|
|
"logps/rejected": -63.392032623291016,
|
|
"loss": 1.0538,
|
|
"margin_dpo/margin_mean": 0.6935799717903137,
|
|
"margin_dpo/margin_std": 1.1022088527679443,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3764172335600907,
|
|
"fcm_dpo/beta": 1.5373433828353882,
|
|
"fcm_dpo/delta": 0.5078557133674622,
|
|
"fcm_dpo/margin": 0.3260525166988373,
|
|
"fcm_dpo/q_t": 0.40423983335494995,
|
|
"grad_norm": 464.44989013671875,
|
|
"learning_rate": 3.9393515632731094e-07,
|
|
"logits/chosen": 0.10547161102294922,
|
|
"logits/rejected": 0.12796463072299957,
|
|
"logps/chosen": -76.10067749023438,
|
|
"logps/ref_chosen": -74.60145568847656,
|
|
"logps/ref_rejected": -63.79338455200195,
|
|
"logps/rejected": -65.61865234375,
|
|
"loss": 1.4834,
|
|
"margin_dpo/margin_mean": 0.3260522484779358,
|
|
"margin_dpo/margin_std": 1.0597259998321533,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 1.4959361553192139,
|
|
"fcm_dpo/delta": -0.35001400113105774,
|
|
"fcm_dpo/margin": 0.871913731098175,
|
|
"fcm_dpo/q_t": 0.305349200963974,
|
|
"grad_norm": 331.92315673828125,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 0.21487398445606232,
|
|
"logits/rejected": 0.18293528258800507,
|
|
"logps/chosen": -63.18694305419922,
|
|
"logps/ref_chosen": -61.938209533691406,
|
|
"logps/ref_rejected": -72.21602630615234,
|
|
"logps/rejected": -74.336669921875,
|
|
"loss": 0.9596,
|
|
"margin_dpo/margin_mean": 0.8719134330749512,
|
|
"margin_dpo/margin_std": 1.247259259223938,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3794406651549509,
|
|
"fcm_dpo/beta": 1.4492324590682983,
|
|
"fcm_dpo/delta": 0.03792502358555794,
|
|
"fcm_dpo/margin": 0.6663841009140015,
|
|
"fcm_dpo/q_t": 0.3597896099090576,
|
|
"grad_norm": 376.5051574707031,
|
|
"learning_rate": 3.9176496596569265e-07,
|
|
"logits/chosen": 0.2115197777748108,
|
|
"logits/rejected": 0.1815691590309143,
|
|
"logps/chosen": -68.17942810058594,
|
|
"logps/ref_chosen": -66.85694885253906,
|
|
"logps/ref_rejected": -84.83396911621094,
|
|
"logps/rejected": -86.82284545898438,
|
|
"loss": 1.1816,
|
|
"margin_dpo/margin_mean": 0.6663837432861328,
|
|
"margin_dpo/margin_std": 1.2780930995941162,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"fcm_dpo/beta": 1.496967077255249,
|
|
"fcm_dpo/delta": -0.03972265124320984,
|
|
"fcm_dpo/margin": 0.6779400110244751,
|
|
"fcm_dpo/q_t": 0.32841235399246216,
|
|
"grad_norm": 365.8186340332031,
|
|
"learning_rate": 3.9067390737445254e-07,
|
|
"logits/chosen": 0.12412711977958679,
|
|
"logits/rejected": 0.0818972960114479,
|
|
"logps/chosen": -57.51960372924805,
|
|
"logps/ref_chosen": -56.22393035888672,
|
|
"logps/ref_rejected": -77.1136245727539,
|
|
"logps/rejected": -79.08723449707031,
|
|
"loss": 1.2777,
|
|
"margin_dpo/margin_mean": 0.6779407262802124,
|
|
"margin_dpo/margin_std": 1.3109935522079468,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.382464096749811,
|
|
"fcm_dpo/beta": 1.3989202976226807,
|
|
"fcm_dpo/delta": -0.0833391547203064,
|
|
"fcm_dpo/margin": 0.7623114585876465,
|
|
"fcm_dpo/q_t": 0.3369859755039215,
|
|
"grad_norm": 290.3008117675781,
|
|
"learning_rate": 3.8957891383162304e-07,
|
|
"logits/chosen": 0.17171865701675415,
|
|
"logits/rejected": 0.14320127665996552,
|
|
"logps/chosen": -53.48502731323242,
|
|
"logps/ref_chosen": -52.21001434326172,
|
|
"logps/ref_rejected": -58.75764846801758,
|
|
"logps/rejected": -60.79496765136719,
|
|
"loss": 1.0203,
|
|
"margin_dpo/margin_mean": 0.762311577796936,
|
|
"margin_dpo/margin_std": 1.1594964265823364,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.3839758125472411,
|
|
"fcm_dpo/beta": 1.3931760787963867,
|
|
"fcm_dpo/delta": -0.08251707255840302,
|
|
"fcm_dpo/margin": 0.7698103189468384,
|
|
"fcm_dpo/q_t": 0.33657705783843994,
|
|
"grad_norm": 385.88311767578125,
|
|
"learning_rate": 3.884800159665276e-07,
|
|
"logits/chosen": 0.13405011594295502,
|
|
"logits/rejected": 0.09684339165687561,
|
|
"logps/chosen": -66.99877166748047,
|
|
"logps/ref_chosen": -65.63632202148438,
|
|
"logps/ref_rejected": -82.34425354003906,
|
|
"logps/rejected": -84.47650146484375,
|
|
"loss": 1.1088,
|
|
"margin_dpo/margin_mean": 0.7698097229003906,
|
|
"margin_dpo/margin_std": 1.271782398223877,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 1.3778660297393799,
|
|
"fcm_dpo/delta": -0.019536815583705902,
|
|
"fcm_dpo/margin": 0.7369774580001831,
|
|
"fcm_dpo/q_t": 0.33573752641677856,
|
|
"grad_norm": 347.2344055175781,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 0.1548142433166504,
|
|
"logits/rejected": 0.12779046595096588,
|
|
"logps/chosen": -69.03644561767578,
|
|
"logps/ref_chosen": -67.91108703613281,
|
|
"logps/ref_rejected": -83.89114379882812,
|
|
"logps/rejected": -85.75347900390625,
|
|
"loss": 1.068,
|
|
"margin_dpo/margin_mean": 0.7369774580001831,
|
|
"margin_dpo/margin_std": 1.1885807514190674,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3869992441421013,
|
|
"fcm_dpo/beta": 1.396597146987915,
|
|
"fcm_dpo/delta": -0.011127792298793793,
|
|
"fcm_dpo/margin": 0.7227224111557007,
|
|
"fcm_dpo/q_t": 0.3369947075843811,
|
|
"grad_norm": 397.8918762207031,
|
|
"learning_rate": 3.862706303320329e-07,
|
|
"logits/chosen": 0.1215638667345047,
|
|
"logits/rejected": 0.08205322921276093,
|
|
"logps/chosen": -64.94349670410156,
|
|
"logps/ref_chosen": -63.49998474121094,
|
|
"logps/ref_rejected": -90.77104187011719,
|
|
"logps/rejected": -92.93727111816406,
|
|
"loss": 1.0738,
|
|
"margin_dpo/margin_mean": 0.7227222323417664,
|
|
"margin_dpo/margin_std": 1.203334927558899,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.3885109599395314,
|
|
"fcm_dpo/beta": 1.2768826484680176,
|
|
"fcm_dpo/delta": -0.461418092250824,
|
|
"fcm_dpo/margin": 1.086154580116272,
|
|
"fcm_dpo/q_t": 0.28833481669425964,
|
|
"grad_norm": 322.4739685058594,
|
|
"learning_rate": 3.851602043638994e-07,
|
|
"logits/chosen": 0.1613893210887909,
|
|
"logits/rejected": 0.11499130725860596,
|
|
"logps/chosen": -71.91011810302734,
|
|
"logps/ref_chosen": -70.60064697265625,
|
|
"logps/ref_rejected": -108.58313751220703,
|
|
"logps/rejected": -110.978759765625,
|
|
"loss": 0.9387,
|
|
"margin_dpo/margin_mean": 1.0861549377441406,
|
|
"margin_dpo/margin_std": 1.5388684272766113,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3900226757369615,
|
|
"fcm_dpo/beta": 1.2841336727142334,
|
|
"fcm_dpo/delta": 0.00753195583820343,
|
|
"fcm_dpo/margin": 0.7705293297767639,
|
|
"fcm_dpo/q_t": 0.32425814867019653,
|
|
"grad_norm": 300.5933837890625,
|
|
"learning_rate": 3.840459976743023e-07,
|
|
"logits/chosen": 0.1647588312625885,
|
|
"logits/rejected": 0.1312163770198822,
|
|
"logps/chosen": -60.74152374267578,
|
|
"logps/ref_chosen": -59.25416564941406,
|
|
"logps/ref_rejected": -85.58709716796875,
|
|
"logps/rejected": -87.84498596191406,
|
|
"loss": 0.933,
|
|
"margin_dpo/margin_mean": 0.7705295085906982,
|
|
"margin_dpo/margin_std": 1.0195441246032715,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3915343915343915,
|
|
"fcm_dpo/beta": 1.1445305347442627,
|
|
"fcm_dpo/delta": -0.5759162902832031,
|
|
"fcm_dpo/margin": 1.2901654243469238,
|
|
"fcm_dpo/q_t": 0.2562459111213684,
|
|
"grad_norm": 223.67605590820312,
|
|
"learning_rate": 3.8292804142999796e-07,
|
|
"logits/chosen": 0.09316843003034592,
|
|
"logits/rejected": 0.021822050213813782,
|
|
"logps/chosen": -66.56085205078125,
|
|
"logps/ref_chosen": -65.43487548828125,
|
|
"logps/ref_rejected": -95.41731262207031,
|
|
"logps/rejected": -97.83345031738281,
|
|
"loss": 0.7207,
|
|
"margin_dpo/margin_mean": 1.2901657819747925,
|
|
"margin_dpo/margin_std": 1.2492828369140625,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 1.1257474422454834,
|
|
"fcm_dpo/delta": 0.017584767192602158,
|
|
"fcm_dpo/margin": 0.8737805485725403,
|
|
"fcm_dpo/q_t": 0.33795487880706787,
|
|
"grad_norm": 259.8592834472656,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 0.127943217754364,
|
|
"logits/rejected": 0.06927517056465149,
|
|
"logps/chosen": -50.53019714355469,
|
|
"logps/ref_chosen": -49.08958435058594,
|
|
"logps/ref_rejected": -79.01708221435547,
|
|
"logps/rejected": -81.33147430419922,
|
|
"loss": 0.9979,
|
|
"margin_dpo/margin_mean": 0.873779296875,
|
|
"margin_dpo/margin_std": 1.2833609580993652,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3945578231292517,
|
|
"fcm_dpo/beta": 1.1455121040344238,
|
|
"fcm_dpo/delta": 0.08530843257904053,
|
|
"fcm_dpo/margin": 0.8054367303848267,
|
|
"fcm_dpo/q_t": 0.34108513593673706,
|
|
"grad_norm": 306.086181640625,
|
|
"learning_rate": 3.806810054678331e-07,
|
|
"logits/chosen": 0.03654761239886284,
|
|
"logits/rejected": 0.04507092386484146,
|
|
"logps/chosen": -72.11337280273438,
|
|
"logps/ref_chosen": -70.87239074707031,
|
|
"logps/ref_rejected": -65.01522064208984,
|
|
"logps/rejected": -67.06163787841797,
|
|
"loss": 0.9947,
|
|
"margin_dpo/margin_mean": 0.805436909198761,
|
|
"margin_dpo/margin_std": 1.1771018505096436,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3960695389266818,
|
|
"fcm_dpo/beta": 1.1686980724334717,
|
|
"fcm_dpo/delta": 0.04035666957497597,
|
|
"fcm_dpo/margin": 0.8231496810913086,
|
|
"fcm_dpo/q_t": 0.3458126187324524,
|
|
"grad_norm": 323.2743225097656,
|
|
"learning_rate": 3.7955198860439887e-07,
|
|
"logits/chosen": 0.17141227424144745,
|
|
"logits/rejected": 0.13052189350128174,
|
|
"logps/chosen": -69.32987976074219,
|
|
"logps/ref_chosen": -67.8706283569336,
|
|
"logps/ref_rejected": -88.7205810546875,
|
|
"logps/rejected": -91.00297546386719,
|
|
"loss": 0.9832,
|
|
"margin_dpo/margin_mean": 0.8231501579284668,
|
|
"margin_dpo/margin_std": 1.2316184043884277,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.3975812547241119,
|
|
"fcm_dpo/beta": 1.23178231716156,
|
|
"fcm_dpo/delta": 0.2991076111793518,
|
|
"fcm_dpo/margin": 0.5800197124481201,
|
|
"fcm_dpo/q_t": 0.38507431745529175,
|
|
"grad_norm": 314.1044616699219,
|
|
"learning_rate": 3.784193478933516e-07,
|
|
"logits/chosen": 0.13119199872016907,
|
|
"logits/rejected": 0.05917968600988388,
|
|
"logps/chosen": -56.671165466308594,
|
|
"logps/ref_chosen": -55.194583892822266,
|
|
"logps/ref_rejected": -80.54048156738281,
|
|
"logps/rejected": -82.59709167480469,
|
|
"loss": 1.231,
|
|
"margin_dpo/margin_mean": 0.5800192356109619,
|
|
"margin_dpo/margin_std": 1.2319090366363525,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.39909297052154197,
|
|
"fcm_dpo/beta": 1.2216218709945679,
|
|
"fcm_dpo/delta": -0.13029904663562775,
|
|
"fcm_dpo/margin": 0.9128764867782593,
|
|
"fcm_dpo/q_t": 0.34319591522216797,
|
|
"grad_norm": 366.9609069824219,
|
|
"learning_rate": 3.7728311501708674e-07,
|
|
"logits/chosen": 0.08489110320806503,
|
|
"logits/rejected": 0.04814103990793228,
|
|
"logps/chosen": -84.4659652709961,
|
|
"logps/ref_chosen": -83.17068481445312,
|
|
"logps/ref_rejected": -88.33625793457031,
|
|
"logps/rejected": -90.54441833496094,
|
|
"loss": 1.0999,
|
|
"margin_dpo/margin_mean": 0.9128766655921936,
|
|
"margin_dpo/margin_std": 1.5033926963806152,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 1.1946429014205933,
|
|
"fcm_dpo/delta": -0.042388565838336945,
|
|
"fcm_dpo/margin": 0.8688783049583435,
|
|
"fcm_dpo/q_t": 0.31362420320510864,
|
|
"grad_norm": 336.9339904785156,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 0.1749192476272583,
|
|
"logits/rejected": 0.12444747984409332,
|
|
"logps/chosen": -53.23479080200195,
|
|
"logps/ref_chosen": -51.66284942626953,
|
|
"logps/ref_rejected": -67.1720962524414,
|
|
"logps/rejected": -69.6129150390625,
|
|
"loss": 0.9924,
|
|
"margin_dpo/margin_mean": 0.8688779473304749,
|
|
"margin_dpo/margin_std": 1.2264655828475952,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.4021164021164021,
|
|
"fcm_dpo/beta": 1.200698971748352,
|
|
"fcm_dpo/delta": 0.018015079200267792,
|
|
"fcm_dpo/margin": 0.8188613653182983,
|
|
"fcm_dpo/q_t": 0.342385858297348,
|
|
"grad_norm": 329.1114807128906,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 0.1544215977191925,
|
|
"logits/rejected": 0.10259807109832764,
|
|
"logps/chosen": -58.89800262451172,
|
|
"logps/ref_chosen": -57.45049285888672,
|
|
"logps/ref_rejected": -77.60826110839844,
|
|
"logps/rejected": -79.87462615966797,
|
|
"loss": 1.0917,
|
|
"margin_dpo/margin_mean": 0.8188612461090088,
|
|
"margin_dpo/margin_std": 1.41795015335083,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.4036281179138322,
|
|
"fcm_dpo/beta": 1.2707109451293945,
|
|
"fcm_dpo/delta": 0.28088656067848206,
|
|
"fcm_dpo/margin": 0.5725541710853577,
|
|
"fcm_dpo/q_t": 0.3735983073711395,
|
|
"grad_norm": 291.73394775390625,
|
|
"learning_rate": 3.738531817228131e-07,
|
|
"logits/chosen": 0.15856996178627014,
|
|
"logits/rejected": 0.1470004916191101,
|
|
"logps/chosen": -56.46894836425781,
|
|
"logps/ref_chosen": -55.03535079956055,
|
|
"logps/ref_rejected": -66.0953369140625,
|
|
"logps/rejected": -68.10148620605469,
|
|
"loss": 1.2705,
|
|
"margin_dpo/margin_mean": 0.5725547075271606,
|
|
"margin_dpo/margin_std": 1.2551192045211792,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.4051398337112623,
|
|
"fcm_dpo/beta": 1.2686963081359863,
|
|
"fcm_dpo/delta": -0.053165629506111145,
|
|
"fcm_dpo/margin": 0.8244317770004272,
|
|
"fcm_dpo/q_t": 0.34110718965530396,
|
|
"grad_norm": 308.4896240234375,
|
|
"learning_rate": 3.7270289900589204e-07,
|
|
"logits/chosen": 0.07009106129407883,
|
|
"logits/rejected": 0.05312522128224373,
|
|
"logps/chosen": -66.37588500976562,
|
|
"logps/ref_chosen": -65.07174682617188,
|
|
"logps/ref_rejected": -71.42485809326172,
|
|
"logps/rejected": -73.5534439086914,
|
|
"loss": 0.9779,
|
|
"margin_dpo/margin_mean": 0.8244317770004272,
|
|
"margin_dpo/margin_std": 1.2355961799621582,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.40665154950869237,
|
|
"fcm_dpo/beta": 1.2246897220611572,
|
|
"fcm_dpo/delta": -0.29109132289886475,
|
|
"fcm_dpo/margin": 1.0223791599273682,
|
|
"fcm_dpo/q_t": 0.3000904321670532,
|
|
"grad_norm": 285.43951416015625,
|
|
"learning_rate": 3.7154918402511714e-07,
|
|
"logits/chosen": 0.22535249590873718,
|
|
"logits/rejected": 0.1888647824525833,
|
|
"logps/chosen": -68.56861114501953,
|
|
"logps/ref_chosen": -67.1362075805664,
|
|
"logps/ref_rejected": -82.55778503417969,
|
|
"logps/rejected": -85.01256561279297,
|
|
"loss": 0.9234,
|
|
"margin_dpo/margin_mean": 1.0223793983459473,
|
|
"margin_dpo/margin_std": 1.2574834823608398,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 1.2294461727142334,
|
|
"fcm_dpo/delta": 0.26967453956604004,
|
|
"fcm_dpo/margin": 0.6104166507720947,
|
|
"fcm_dpo/q_t": 0.3626842498779297,
|
|
"grad_norm": 348.1169738769531,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 0.18059831857681274,
|
|
"logits/rejected": 0.13146328926086426,
|
|
"logps/chosen": -68.18730163574219,
|
|
"logps/ref_chosen": -66.6886978149414,
|
|
"logps/ref_rejected": -85.16129302978516,
|
|
"logps/rejected": -87.27030944824219,
|
|
"loss": 1.1816,
|
|
"margin_dpo/margin_mean": 0.6104167103767395,
|
|
"margin_dpo/margin_std": 1.19452965259552,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.40967498110355255,
|
|
"fcm_dpo/beta": 1.307576060295105,
|
|
"fcm_dpo/delta": 0.36521047353744507,
|
|
"fcm_dpo/margin": 0.5049761533737183,
|
|
"fcm_dpo/q_t": 0.4147690534591675,
|
|
"grad_norm": 440.6063537597656,
|
|
"learning_rate": 3.692315864546635e-07,
|
|
"logits/chosen": 0.1853274405002594,
|
|
"logits/rejected": 0.14235132932662964,
|
|
"logps/chosen": -73.94186401367188,
|
|
"logps/ref_chosen": -72.40754699707031,
|
|
"logps/ref_rejected": -92.06311798095703,
|
|
"logps/rejected": -94.10240173339844,
|
|
"loss": 1.5095,
|
|
"margin_dpo/margin_mean": 0.5049762725830078,
|
|
"margin_dpo/margin_std": 1.5028947591781616,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.41118669690098264,
|
|
"fcm_dpo/beta": 1.266921043395996,
|
|
"fcm_dpo/delta": -0.4322693943977356,
|
|
"fcm_dpo/margin": 1.0848881006240845,
|
|
"fcm_dpo/q_t": 0.28500691056251526,
|
|
"grad_norm": 282.21990966796875,
|
|
"learning_rate": 3.6806776869317067e-07,
|
|
"logits/chosen": 0.15356749296188354,
|
|
"logits/rejected": 0.15307673811912537,
|
|
"logps/chosen": -67.91374206542969,
|
|
"logps/ref_chosen": -66.60140228271484,
|
|
"logps/ref_rejected": -67.74340057373047,
|
|
"logps/rejected": -70.140625,
|
|
"loss": 0.7882,
|
|
"margin_dpo/margin_mean": 1.0848881006240845,
|
|
"margin_dpo/margin_std": 1.259714126586914,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"fcm_dpo/beta": 1.2134504318237305,
|
|
"fcm_dpo/delta": -0.202475443482399,
|
|
"fcm_dpo/margin": 0.9696700572967529,
|
|
"fcm_dpo/q_t": 0.3056218922138214,
|
|
"grad_norm": 257.2859191894531,
|
|
"learning_rate": 3.669006483223828e-07,
|
|
"logits/chosen": 0.18354235589504242,
|
|
"logits/rejected": 0.14025747776031494,
|
|
"logps/chosen": -58.94585037231445,
|
|
"logps/ref_chosen": -57.35487747192383,
|
|
"logps/ref_rejected": -84.17168426513672,
|
|
"logps/rejected": -86.73233032226562,
|
|
"loss": 0.885,
|
|
"margin_dpo/margin_mean": 0.9696696996688843,
|
|
"margin_dpo/margin_std": 1.2579290866851807,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.41421012849584277,
|
|
"fcm_dpo/beta": 1.17547607421875,
|
|
"fcm_dpo/delta": -0.022935807704925537,
|
|
"fcm_dpo/margin": 0.8681968450546265,
|
|
"fcm_dpo/q_t": 0.3227364718914032,
|
|
"grad_norm": 261.911376953125,
|
|
"learning_rate": 3.657302579891656e-07,
|
|
"logits/chosen": 0.0659763365983963,
|
|
"logits/rejected": 0.04620751738548279,
|
|
"logps/chosen": -61.098419189453125,
|
|
"logps/ref_chosen": -59.64149475097656,
|
|
"logps/ref_rejected": -68.29348754882812,
|
|
"logps/rejected": -70.61860656738281,
|
|
"loss": 1.004,
|
|
"margin_dpo/margin_mean": 0.8681962490081787,
|
|
"margin_dpo/margin_std": 1.2701618671417236,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 1.157713532447815,
|
|
"fcm_dpo/delta": -0.10604125261306763,
|
|
"fcm_dpo/margin": 0.9453713893890381,
|
|
"fcm_dpo/q_t": 0.32869789004325867,
|
|
"grad_norm": 274.95843505859375,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 0.1414085328578949,
|
|
"logits/rejected": 0.07877371460199356,
|
|
"logps/chosen": -54.83768844604492,
|
|
"logps/ref_chosen": -53.26664352416992,
|
|
"logps/ref_rejected": -73.84062194824219,
|
|
"logps/rejected": -76.3570327758789,
|
|
"loss": 0.9802,
|
|
"margin_dpo/margin_mean": 0.9453713893890381,
|
|
"margin_dpo/margin_std": 1.3858097791671753,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.41723356009070295,
|
|
"fcm_dpo/beta": 1.1194026470184326,
|
|
"fcm_dpo/delta": -0.17233465611934662,
|
|
"fcm_dpo/margin": 1.0291403532028198,
|
|
"fcm_dpo/q_t": 0.29955726861953735,
|
|
"grad_norm": 242.73602294921875,
|
|
"learning_rate": 3.633797984793294e-07,
|
|
"logits/chosen": 0.11177192628383636,
|
|
"logits/rejected": 0.08380501717329025,
|
|
"logps/chosen": -54.313621520996094,
|
|
"logps/ref_chosen": -53.02079772949219,
|
|
"logps/ref_rejected": -61.56678771972656,
|
|
"logps/rejected": -63.888755798339844,
|
|
"loss": 0.8804,
|
|
"margin_dpo/margin_mean": 1.0291404724121094,
|
|
"margin_dpo/margin_std": 1.2605907917022705,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.41874527588813304,
|
|
"fcm_dpo/beta": 1.16245436668396,
|
|
"fcm_dpo/delta": 0.17784440517425537,
|
|
"fcm_dpo/margin": 0.7118735313415527,
|
|
"fcm_dpo/q_t": 0.3743218183517456,
|
|
"grad_norm": 298.37750244140625,
|
|
"learning_rate": 3.6219979505011555e-07,
|
|
"logits/chosen": 0.2183254361152649,
|
|
"logits/rejected": 0.23113352060317993,
|
|
"logps/chosen": -73.12422180175781,
|
|
"logps/ref_chosen": -71.43299102783203,
|
|
"logps/ref_rejected": -67.65852355957031,
|
|
"logps/rejected": -70.06163024902344,
|
|
"loss": 1.1743,
|
|
"margin_dpo/margin_mean": 0.7118737697601318,
|
|
"margin_dpo/margin_std": 1.4327894449234009,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.42025699168556313,
|
|
"fcm_dpo/beta": 1.172609806060791,
|
|
"fcm_dpo/delta": -0.03564952313899994,
|
|
"fcm_dpo/margin": 0.8724105358123779,
|
|
"fcm_dpo/q_t": 0.32611083984375,
|
|
"grad_norm": 296.2506408691406,
|
|
"learning_rate": 3.6101665315144353e-07,
|
|
"logits/chosen": 0.1023668497800827,
|
|
"logits/rejected": 0.07007478922605515,
|
|
"logps/chosen": -68.64065551757812,
|
|
"logps/ref_chosen": -67.11076354980469,
|
|
"logps/ref_rejected": -88.74851989746094,
|
|
"logps/rejected": -91.15081787109375,
|
|
"loss": 1.0143,
|
|
"margin_dpo/margin_mean": 0.8724101781845093,
|
|
"margin_dpo/margin_std": 1.2294014692306519,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.4217687074829932,
|
|
"fcm_dpo/beta": 1.1110622882843018,
|
|
"fcm_dpo/delta": -0.26436370611190796,
|
|
"fcm_dpo/margin": 1.1091269254684448,
|
|
"fcm_dpo/q_t": 0.276674747467041,
|
|
"grad_norm": 208.7923583984375,
|
|
"learning_rate": 3.5983040587833563e-07,
|
|
"logits/chosen": 0.1121751144528389,
|
|
"logits/rejected": 0.0790662094950676,
|
|
"logps/chosen": -55.73821258544922,
|
|
"logps/ref_chosen": -54.49748611450195,
|
|
"logps/ref_rejected": -70.42373657226562,
|
|
"logps/rejected": -72.77357482910156,
|
|
"loss": 0.778,
|
|
"margin_dpo/margin_mean": 1.1091272830963135,
|
|
"margin_dpo/margin_std": 1.1377835273742676,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 1.045201063156128,
|
|
"fcm_dpo/delta": -0.19996249675750732,
|
|
"fcm_dpo/margin": 1.1231290102005005,
|
|
"fcm_dpo/q_t": 0.2829288840293884,
|
|
"grad_norm": 192.96217346191406,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 0.15486222505569458,
|
|
"logits/rejected": 0.123613640666008,
|
|
"logps/chosen": -61.731178283691406,
|
|
"logps/ref_chosen": -60.43281173706055,
|
|
"logps/ref_rejected": -78.39051818847656,
|
|
"logps/rejected": -80.81201171875,
|
|
"loss": 0.7481,
|
|
"margin_dpo/margin_mean": 1.1231298446655273,
|
|
"margin_dpo/margin_std": 1.1084861755371094,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.42479213907785335,
|
|
"fcm_dpo/beta": 1.0173039436340332,
|
|
"fcm_dpo/delta": -0.09447715431451797,
|
|
"fcm_dpo/margin": 1.0629010200500488,
|
|
"fcm_dpo/q_t": 0.3139858841896057,
|
|
"grad_norm": 206.9331817626953,
|
|
"learning_rate": 3.574487280222929e-07,
|
|
"logits/chosen": 0.15210747718811035,
|
|
"logits/rejected": 0.15197323262691498,
|
|
"logps/chosen": -61.607582092285156,
|
|
"logps/ref_chosen": -60.2820930480957,
|
|
"logps/ref_rejected": -62.04009246826172,
|
|
"logps/rejected": -64.42848205566406,
|
|
"loss": 0.9316,
|
|
"margin_dpo/margin_mean": 1.0629009008407593,
|
|
"margin_dpo/margin_std": 1.3708744049072266,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.42630385487528344,
|
|
"fcm_dpo/beta": 1.054863452911377,
|
|
"fcm_dpo/delta": 0.04717801511287689,
|
|
"fcm_dpo/margin": 0.8964927196502686,
|
|
"fcm_dpo/q_t": 0.33858174085617065,
|
|
"grad_norm": 262.0866394042969,
|
|
"learning_rate": 3.562533640600075e-07,
|
|
"logits/chosen": 0.1025453507900238,
|
|
"logits/rejected": 0.06392862647771835,
|
|
"logps/chosen": -62.19837951660156,
|
|
"logps/ref_chosen": -60.623924255371094,
|
|
"logps/ref_rejected": -68.67400360107422,
|
|
"logps/rejected": -71.14495849609375,
|
|
"loss": 1.0039,
|
|
"margin_dpo/margin_mean": 0.8964922428131104,
|
|
"margin_dpo/margin_std": 1.2645740509033203,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.42781557067271353,
|
|
"fcm_dpo/beta": 1.0366630554199219,
|
|
"fcm_dpo/delta": -0.0020070038735866547,
|
|
"fcm_dpo/margin": 0.9660577774047852,
|
|
"fcm_dpo/q_t": 0.3374154269695282,
|
|
"grad_norm": 295.226318359375,
|
|
"learning_rate": 3.550550279627215e-07,
|
|
"logits/chosen": 0.11783361434936523,
|
|
"logits/rejected": 0.054866328835487366,
|
|
"logps/chosen": -69.20338439941406,
|
|
"logps/ref_chosen": -67.64775085449219,
|
|
"logps/ref_rejected": -99.96835327148438,
|
|
"logps/rejected": -102.49005126953125,
|
|
"loss": 1.0551,
|
|
"margin_dpo/margin_mean": 0.9660578370094299,
|
|
"margin_dpo/margin_std": 1.526496171951294,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4293272864701436,
|
|
"fcm_dpo/beta": 1.0322619676589966,
|
|
"fcm_dpo/delta": 0.00461952667683363,
|
|
"fcm_dpo/margin": 0.9647125005722046,
|
|
"fcm_dpo/q_t": 0.33289098739624023,
|
|
"grad_norm": 231.38433837890625,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 0.17530453205108643,
|
|
"logits/rejected": 0.12596732378005981,
|
|
"logps/chosen": -58.40034103393555,
|
|
"logps/ref_chosen": -56.96742630004883,
|
|
"logps/ref_rejected": -86.36236572265625,
|
|
"logps/rejected": -88.75999450683594,
|
|
"loss": 0.9914,
|
|
"margin_dpo/margin_mean": 0.9647125005722046,
|
|
"margin_dpo/margin_std": 1.445831298828125,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 1.0661512613296509,
|
|
"fcm_dpo/delta": 0.16971619427204132,
|
|
"fcm_dpo/margin": 0.7905272245407104,
|
|
"fcm_dpo/q_t": 0.34532633423805237,
|
|
"grad_norm": 249.1006317138672,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 0.18527567386627197,
|
|
"logits/rejected": 0.1653136909008026,
|
|
"logps/chosen": -73.34848022460938,
|
|
"logps/ref_chosen": -71.65611267089844,
|
|
"logps/ref_rejected": -81.63829803466797,
|
|
"logps/rejected": -84.12120056152344,
|
|
"loss": 0.9726,
|
|
"margin_dpo/margin_mean": 0.7905269861221313,
|
|
"margin_dpo/margin_std": 1.0835275650024414,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4323507180650038,
|
|
"fcm_dpo/beta": 1.005662441253662,
|
|
"fcm_dpo/delta": -0.34280824661254883,
|
|
"fcm_dpo/margin": 1.2858762741088867,
|
|
"fcm_dpo/q_t": 0.27013134956359863,
|
|
"grad_norm": 211.03579711914062,
|
|
"learning_rate": 3.514425224712835e-07,
|
|
"logits/chosen": 0.10800629109144211,
|
|
"logits/rejected": 0.0394493006169796,
|
|
"logps/chosen": -62.53429412841797,
|
|
"logps/ref_chosen": -61.07952117919922,
|
|
"logps/ref_rejected": -91.28128051757812,
|
|
"logps/rejected": -94.02192687988281,
|
|
"loss": 0.8318,
|
|
"margin_dpo/margin_mean": 1.2858755588531494,
|
|
"margin_dpo/margin_std": 1.5294857025146484,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.43386243386243384,
|
|
"fcm_dpo/beta": 0.992131769657135,
|
|
"fcm_dpo/delta": -0.06625291705131531,
|
|
"fcm_dpo/margin": 1.0675835609436035,
|
|
"fcm_dpo/q_t": 0.3116587698459625,
|
|
"grad_norm": 213.88555908203125,
|
|
"learning_rate": 3.502326338516534e-07,
|
|
"logits/chosen": 0.13146105408668518,
|
|
"logits/rejected": 0.10076682269573212,
|
|
"logps/chosen": -47.52275848388672,
|
|
"logps/ref_chosen": -46.035789489746094,
|
|
"logps/ref_rejected": -59.95293426513672,
|
|
"logps/rejected": -62.507484436035156,
|
|
"loss": 0.8751,
|
|
"margin_dpo/margin_mean": 1.067583441734314,
|
|
"margin_dpo/margin_std": 1.295511245727539,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.43537414965986393,
|
|
"fcm_dpo/beta": 0.9990655183792114,
|
|
"fcm_dpo/delta": 0.13602013885974884,
|
|
"fcm_dpo/margin": 0.8767856359481812,
|
|
"fcm_dpo/q_t": 0.35159996151924133,
|
|
"grad_norm": 293.45867919921875,
|
|
"learning_rate": 3.490199415097892e-07,
|
|
"logits/chosen": 0.053915057331323624,
|
|
"logits/rejected": 0.014950074255466461,
|
|
"logps/chosen": -67.08319854736328,
|
|
"logps/ref_chosen": -65.3908462524414,
|
|
"logps/ref_rejected": -88.53607940673828,
|
|
"logps/rejected": -91.10520935058594,
|
|
"loss": 1.0811,
|
|
"margin_dpo/margin_mean": 0.8767852187156677,
|
|
"margin_dpo/margin_std": 1.4756031036376953,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.436885865457294,
|
|
"fcm_dpo/beta": 1.0097713470458984,
|
|
"fcm_dpo/delta": 0.013888869434595108,
|
|
"fcm_dpo/margin": 0.9776356220245361,
|
|
"fcm_dpo/q_t": 0.3457440435886383,
|
|
"grad_norm": 220.5363006591797,
|
|
"learning_rate": 3.4780447936730247e-07,
|
|
"logits/chosen": 0.16835784912109375,
|
|
"logits/rejected": 0.14368662238121033,
|
|
"logps/chosen": -56.29551696777344,
|
|
"logps/ref_chosen": -54.5936279296875,
|
|
"logps/ref_rejected": -67.20855712890625,
|
|
"logps/rejected": -69.88806915283203,
|
|
"loss": 1.0415,
|
|
"margin_dpo/margin_mean": 0.9776356220245361,
|
|
"margin_dpo/margin_std": 1.556645393371582,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 1.0043764114379883,
|
|
"fcm_dpo/delta": -0.04867362976074219,
|
|
"fcm_dpo/margin": 1.0387194156646729,
|
|
"fcm_dpo/q_t": 0.33352506160736084,
|
|
"grad_norm": 238.85610961914062,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 0.19789519906044006,
|
|
"logits/rejected": 0.14447157084941864,
|
|
"logps/chosen": -63.15911865234375,
|
|
"logps/ref_chosen": -61.38457489013672,
|
|
"logps/ref_rejected": -91.92778015136719,
|
|
"logps/rejected": -94.74103546142578,
|
|
"loss": 0.9701,
|
|
"margin_dpo/margin_mean": 1.038718819618225,
|
|
"margin_dpo/margin_std": 1.499373435974121,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4399092970521542,
|
|
"fcm_dpo/beta": 1.0028091669082642,
|
|
"fcm_dpo/delta": -0.10185343772172928,
|
|
"fcm_dpo/margin": 1.0858376026153564,
|
|
"fcm_dpo/q_t": 0.3095766305923462,
|
|
"grad_norm": 204.3426055908203,
|
|
"learning_rate": 3.4536538175334343e-07,
|
|
"logits/chosen": 0.23501402139663696,
|
|
"logits/rejected": 0.1915348768234253,
|
|
"logps/chosen": -52.46550750732422,
|
|
"logps/ref_chosen": -50.863037109375,
|
|
"logps/ref_rejected": -82.20868682861328,
|
|
"logps/rejected": -84.89698791503906,
|
|
"loss": 0.8974,
|
|
"margin_dpo/margin_mean": 1.085837960243225,
|
|
"margin_dpo/margin_std": 1.3475749492645264,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4414210128495843,
|
|
"fcm_dpo/beta": 1.0263406038284302,
|
|
"fcm_dpo/delta": 0.24256381392478943,
|
|
"fcm_dpo/margin": 0.7545459270477295,
|
|
"fcm_dpo/q_t": 0.3599158823490143,
|
|
"grad_norm": 320.62713623046875,
|
|
"learning_rate": 3.4414181450867465e-07,
|
|
"logits/chosen": 0.1599317491054535,
|
|
"logits/rejected": 0.11992324888706207,
|
|
"logps/chosen": -65.92310333251953,
|
|
"logps/ref_chosen": -64.34888458251953,
|
|
"logps/ref_rejected": -72.86434173583984,
|
|
"logps/rejected": -75.193115234375,
|
|
"loss": 1.0762,
|
|
"margin_dpo/margin_mean": 0.7545456886291504,
|
|
"margin_dpo/margin_std": 1.2547924518585205,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4429327286470144,
|
|
"fcm_dpo/beta": 1.0156301259994507,
|
|
"fcm_dpo/delta": -0.14779676496982574,
|
|
"fcm_dpo/margin": 1.1138885021209717,
|
|
"fcm_dpo/q_t": 0.29969820380210876,
|
|
"grad_norm": 190.7615966796875,
|
|
"learning_rate": 3.4291561391508185e-07,
|
|
"logits/chosen": 0.22061100602149963,
|
|
"logits/rejected": 0.15896283090114594,
|
|
"logps/chosen": -56.73548889160156,
|
|
"logps/ref_chosen": -54.869468688964844,
|
|
"logps/ref_rejected": -81.858642578125,
|
|
"logps/rejected": -84.83856201171875,
|
|
"loss": 0.8801,
|
|
"margin_dpo/margin_mean": 1.1138887405395508,
|
|
"margin_dpo/margin_std": 1.374760627746582,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"fcm_dpo/beta": 0.9778472185134888,
|
|
"fcm_dpo/delta": -0.08654538542032242,
|
|
"fcm_dpo/margin": 1.0972121953964233,
|
|
"fcm_dpo/q_t": 0.309672474861145,
|
|
"grad_norm": 168.36590576171875,
|
|
"learning_rate": 3.4168681427203153e-07,
|
|
"logits/chosen": 0.17619842290878296,
|
|
"logits/rejected": 0.143341526389122,
|
|
"logps/chosen": -58.26924133300781,
|
|
"logps/ref_chosen": -56.670902252197266,
|
|
"logps/ref_rejected": -70.32819366455078,
|
|
"logps/rejected": -73.02374267578125,
|
|
"loss": 0.8444,
|
|
"margin_dpo/margin_mean": 1.0972115993499756,
|
|
"margin_dpo/margin_std": 1.2684025764465332,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 1.0054044723510742,
|
|
"fcm_dpo/delta": 0.08222609758377075,
|
|
"fcm_dpo/margin": 0.9189479947090149,
|
|
"fcm_dpo/q_t": 0.35169196128845215,
|
|
"grad_norm": 228.4792938232422,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 0.1701010763645172,
|
|
"logits/rejected": 0.10789903253316879,
|
|
"logps/chosen": -52.09330749511719,
|
|
"logps/ref_chosen": -50.40088653564453,
|
|
"logps/ref_rejected": -83.43521881103516,
|
|
"logps/rejected": -86.04659271240234,
|
|
"loss": 1.0393,
|
|
"margin_dpo/margin_mean": 0.918948233127594,
|
|
"margin_dpo/margin_std": 1.4247148036956787,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.4474678760393046,
|
|
"fcm_dpo/beta": 0.9557000994682312,
|
|
"fcm_dpo/delta": -0.28874313831329346,
|
|
"fcm_dpo/margin": 1.3085522651672363,
|
|
"fcm_dpo/q_t": 0.28932487964630127,
|
|
"grad_norm": 233.505126953125,
|
|
"learning_rate": 3.392215553979679e-07,
|
|
"logits/chosen": 0.12661093473434448,
|
|
"logits/rejected": 0.09931506216526031,
|
|
"logps/chosen": -70.89358520507812,
|
|
"logps/ref_chosen": -69.15034484863281,
|
|
"logps/ref_rejected": -89.60166931152344,
|
|
"logps/rejected": -92.65345764160156,
|
|
"loss": 0.8386,
|
|
"margin_dpo/margin_mean": 1.3085522651672363,
|
|
"margin_dpo/margin_std": 1.4957661628723145,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.4489795918367347,
|
|
"fcm_dpo/beta": 0.9359762668609619,
|
|
"fcm_dpo/delta": -0.1143687292933464,
|
|
"fcm_dpo/margin": 1.1771044731140137,
|
|
"fcm_dpo/q_t": 0.30046796798706055,
|
|
"grad_norm": 200.27796936035156,
|
|
"learning_rate": 3.3798516512554485e-07,
|
|
"logits/chosen": 0.13300062716007233,
|
|
"logits/rejected": 0.08352112770080566,
|
|
"logps/chosen": -59.854644775390625,
|
|
"logps/ref_chosen": -58.01630401611328,
|
|
"logps/ref_rejected": -69.95780944824219,
|
|
"logps/rejected": -72.97325134277344,
|
|
"loss": 0.8157,
|
|
"margin_dpo/margin_mean": 1.1771044731140137,
|
|
"margin_dpo/margin_std": 1.2866284847259521,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.4504913076341648,
|
|
"fcm_dpo/beta": 0.9372185468673706,
|
|
"fcm_dpo/delta": 0.04445381462574005,
|
|
"fcm_dpo/margin": 1.0229597091674805,
|
|
"fcm_dpo/q_t": 0.3366781175136566,
|
|
"grad_norm": 205.09780883789062,
|
|
"learning_rate": 3.367463137189156e-07,
|
|
"logits/chosen": 0.2418043464422226,
|
|
"logits/rejected": 0.18982425332069397,
|
|
"logps/chosen": -58.04621505737305,
|
|
"logps/ref_chosen": -56.1693115234375,
|
|
"logps/ref_rejected": -68.55052185058594,
|
|
"logps/rejected": -71.45037841796875,
|
|
"loss": 1.0065,
|
|
"margin_dpo/margin_mean": 1.0229599475860596,
|
|
"margin_dpo/margin_std": 1.4922395944595337,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4520030234315949,
|
|
"fcm_dpo/beta": 0.9768006801605225,
|
|
"fcm_dpo/delta": 0.26853734254837036,
|
|
"fcm_dpo/margin": 0.7673162817955017,
|
|
"fcm_dpo/q_t": 0.3727618455886841,
|
|
"grad_norm": 255.50270080566406,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": 0.08510833978652954,
|
|
"logits/rejected": 0.06492967158555984,
|
|
"logps/chosen": -64.02816009521484,
|
|
"logps/ref_chosen": -62.31780242919922,
|
|
"logps/ref_rejected": -72.60028839111328,
|
|
"logps/rejected": -75.07796478271484,
|
|
"loss": 1.1587,
|
|
"margin_dpo/margin_mean": 0.7673170566558838,
|
|
"margin_dpo/margin_std": 1.4521667957305908,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.982731282711029,
|
|
"fcm_dpo/delta": -0.016341693699359894,
|
|
"fcm_dpo/margin": 1.0325164794921875,
|
|
"fcm_dpo/q_t": 0.3216491937637329,
|
|
"grad_norm": 243.2555389404297,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 0.14876267313957214,
|
|
"logits/rejected": 0.10573962330818176,
|
|
"logps/chosen": -62.35066223144531,
|
|
"logps/ref_chosen": -60.38157653808594,
|
|
"logps/ref_rejected": -75.45442199707031,
|
|
"logps/rejected": -78.45602416992188,
|
|
"loss": 0.9532,
|
|
"margin_dpo/margin_mean": 1.0325164794921875,
|
|
"margin_dpo/margin_std": 1.3928803205490112,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.455026455026455,
|
|
"fcm_dpo/beta": 0.9845176935195923,
|
|
"fcm_dpo/delta": 0.09773456305265427,
|
|
"fcm_dpo/margin": 0.9239287376403809,
|
|
"fcm_dpo/q_t": 0.36213570833206177,
|
|
"grad_norm": 244.90597534179688,
|
|
"learning_rate": 3.3301533956555885e-07,
|
|
"logits/chosen": 0.16752877831459045,
|
|
"logits/rejected": 0.14301586151123047,
|
|
"logps/chosen": -54.7681999206543,
|
|
"logps/ref_chosen": -52.85089111328125,
|
|
"logps/ref_rejected": -69.97584533691406,
|
|
"logps/rejected": -72.81707763671875,
|
|
"loss": 1.1465,
|
|
"margin_dpo/margin_mean": 0.9239292740821838,
|
|
"margin_dpo/margin_std": 1.6796950101852417,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4565381708238851,
|
|
"fcm_dpo/beta": 1.0574851036071777,
|
|
"fcm_dpo/delta": 0.3823769986629486,
|
|
"fcm_dpo/margin": 0.607899010181427,
|
|
"fcm_dpo/q_t": 0.3950553238391876,
|
|
"grad_norm": 319.1892395019531,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 0.06301631033420563,
|
|
"logits/rejected": 0.02415418066084385,
|
|
"logps/chosen": -68.90666198730469,
|
|
"logps/ref_chosen": -66.96650695800781,
|
|
"logps/ref_rejected": -88.09510803222656,
|
|
"logps/rejected": -90.64315795898438,
|
|
"loss": 1.2959,
|
|
"margin_dpo/margin_mean": 0.6078989505767822,
|
|
"margin_dpo/margin_std": 1.4545881748199463,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.4580498866213152,
|
|
"fcm_dpo/beta": 0.9939075112342834,
|
|
"fcm_dpo/delta": -0.5528866052627563,
|
|
"fcm_dpo/margin": 1.478973388671875,
|
|
"fcm_dpo/q_t": 0.2767646014690399,
|
|
"grad_norm": 199.89747619628906,
|
|
"learning_rate": 3.3051635489464793e-07,
|
|
"logits/chosen": 0.17920634150505066,
|
|
"logits/rejected": 0.1289132535457611,
|
|
"logps/chosen": -63.91869354248047,
|
|
"logps/ref_chosen": -62.12152862548828,
|
|
"logps/ref_rejected": -90.31204223632812,
|
|
"logps/rejected": -93.58818817138672,
|
|
"loss": 0.8206,
|
|
"margin_dpo/margin_mean": 1.4789727926254272,
|
|
"margin_dpo/margin_std": 1.7652822732925415,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.4595616024187453,
|
|
"fcm_dpo/beta": 0.9474884271621704,
|
|
"fcm_dpo/delta": -0.1770581603050232,
|
|
"fcm_dpo/margin": 1.2210001945495605,
|
|
"fcm_dpo/q_t": 0.2853131592273712,
|
|
"grad_norm": 196.20419311523438,
|
|
"learning_rate": 3.292634667444117e-07,
|
|
"logits/chosen": 0.14427047967910767,
|
|
"logits/rejected": 0.10413600504398346,
|
|
"logps/chosen": -62.46052551269531,
|
|
"logps/ref_chosen": -60.695091247558594,
|
|
"logps/ref_rejected": -78.2525405883789,
|
|
"logps/rejected": -81.23897552490234,
|
|
"loss": 0.7895,
|
|
"margin_dpo/margin_mean": 1.2210009098052979,
|
|
"margin_dpo/margin_std": 1.2481887340545654,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.9275550842285156,
|
|
"fcm_dpo/delta": 0.015897810459136963,
|
|
"fcm_dpo/margin": 1.0611162185668945,
|
|
"fcm_dpo/q_t": 0.3354414701461792,
|
|
"grad_norm": 219.49313354492188,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 0.09203135967254639,
|
|
"logits/rejected": 0.09808552265167236,
|
|
"logps/chosen": -74.84890747070312,
|
|
"logps/ref_chosen": -72.69914245605469,
|
|
"logps/ref_rejected": -65.65670776367188,
|
|
"logps/rejected": -68.86759185791016,
|
|
"loss": 1.0316,
|
|
"margin_dpo/margin_mean": 1.0611159801483154,
|
|
"margin_dpo/margin_std": 1.6295936107635498,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46258503401360546,
|
|
"fcm_dpo/beta": 0.9322211742401123,
|
|
"fcm_dpo/delta": 0.10965774953365326,
|
|
"fcm_dpo/margin": 0.958516001701355,
|
|
"fcm_dpo/q_t": 0.34834927320480347,
|
|
"grad_norm": 212.98146057128906,
|
|
"learning_rate": 3.267510740432719e-07,
|
|
"logits/chosen": 0.15844234824180603,
|
|
"logits/rejected": 0.07754644751548767,
|
|
"logps/chosen": -56.162513732910156,
|
|
"logps/ref_chosen": -53.97052764892578,
|
|
"logps/ref_rejected": -71.02423095703125,
|
|
"logps/rejected": -74.17473602294922,
|
|
"loss": 1.0813,
|
|
"margin_dpo/margin_mean": 0.9585161209106445,
|
|
"margin_dpo/margin_std": 1.579054355621338,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.46409674981103555,
|
|
"fcm_dpo/beta": 0.9765808582305908,
|
|
"fcm_dpo/delta": 0.09200756251811981,
|
|
"fcm_dpo/margin": 0.9371168613433838,
|
|
"fcm_dpo/q_t": 0.35316699743270874,
|
|
"grad_norm": 227.36492919921875,
|
|
"learning_rate": 3.2549163976939285e-07,
|
|
"logits/chosen": 0.19748598337173462,
|
|
"logits/rejected": 0.16263772547245026,
|
|
"logps/chosen": -59.23182678222656,
|
|
"logps/ref_chosen": -57.413108825683594,
|
|
"logps/ref_rejected": -68.68010711669922,
|
|
"logps/rejected": -71.43594360351562,
|
|
"loss": 1.1199,
|
|
"margin_dpo/margin_mean": 0.9371169805526733,
|
|
"margin_dpo/margin_std": 1.6100661754608154,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4656084656084656,
|
|
"fcm_dpo/beta": 0.9509669542312622,
|
|
"fcm_dpo/delta": -0.15045209228992462,
|
|
"fcm_dpo/margin": 1.1912171840667725,
|
|
"fcm_dpo/q_t": 0.30430689454078674,
|
|
"grad_norm": 198.90879821777344,
|
|
"learning_rate": 3.2423009383206874e-07,
|
|
"logits/chosen": 0.14542096853256226,
|
|
"logits/rejected": 0.1255512833595276,
|
|
"logps/chosen": -68.51435852050781,
|
|
"logps/ref_chosen": -66.59879302978516,
|
|
"logps/ref_rejected": -74.337158203125,
|
|
"logps/rejected": -77.44393920898438,
|
|
"loss": 0.8821,
|
|
"margin_dpo/margin_mean": 1.1912175416946411,
|
|
"margin_dpo/margin_std": 1.509572982788086,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4671201814058957,
|
|
"fcm_dpo/beta": 0.9307016134262085,
|
|
"fcm_dpo/delta": -0.036133162677288055,
|
|
"fcm_dpo/margin": 1.1062381267547607,
|
|
"fcm_dpo/q_t": 0.32213109731674194,
|
|
"grad_norm": 279.2270812988281,
|
|
"learning_rate": 3.229664715194511e-07,
|
|
"logits/chosen": 0.1940372884273529,
|
|
"logits/rejected": 0.14977452158927917,
|
|
"logps/chosen": -67.56904602050781,
|
|
"logps/ref_chosen": -65.39474487304688,
|
|
"logps/ref_rejected": -75.70930480957031,
|
|
"logps/rejected": -78.98983764648438,
|
|
"loss": 0.926,
|
|
"margin_dpo/margin_mean": 1.1062389612197876,
|
|
"margin_dpo/margin_std": 1.470036506652832,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.9882631897926331,
|
|
"fcm_dpo/delta": 0.319682240486145,
|
|
"fcm_dpo/margin": 0.7099736928939819,
|
|
"fcm_dpo/q_t": 0.38903123140335083,
|
|
"grad_norm": 270.8043518066406,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 0.19459328055381775,
|
|
"logits/rejected": 0.18906690180301666,
|
|
"logps/chosen": -76.8685302734375,
|
|
"logps/ref_chosen": -74.66827392578125,
|
|
"logps/ref_rejected": -80.5689697265625,
|
|
"logps/rejected": -83.47919464111328,
|
|
"loss": 1.2064,
|
|
"margin_dpo/margin_mean": 0.709973931312561,
|
|
"margin_dpo/margin_std": 1.481546401977539,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47014361300075586,
|
|
"fcm_dpo/beta": 0.997173547744751,
|
|
"fcm_dpo/delta": -0.014330286532640457,
|
|
"fcm_dpo/margin": 1.015620231628418,
|
|
"fcm_dpo/q_t": 0.33949679136276245,
|
|
"grad_norm": 241.86203002929688,
|
|
"learning_rate": 3.204331392103574e-07,
|
|
"logits/chosen": 0.11166363954544067,
|
|
"logits/rejected": 0.02732861414551735,
|
|
"logps/chosen": -61.56627655029297,
|
|
"logps/ref_chosen": -59.738033294677734,
|
|
"logps/ref_rejected": -93.60757446289062,
|
|
"logps/rejected": -96.4514389038086,
|
|
"loss": 1.1047,
|
|
"margin_dpo/margin_mean": 1.0156208276748657,
|
|
"margin_dpo/margin_std": 1.744983434677124,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.47165532879818595,
|
|
"fcm_dpo/beta": 1.0016117095947266,
|
|
"fcm_dpo/delta": -0.043600842356681824,
|
|
"fcm_dpo/margin": 1.0361428260803223,
|
|
"fcm_dpo/q_t": 0.3195294141769409,
|
|
"grad_norm": 267.0063781738281,
|
|
"learning_rate": 3.1916350007663176e-07,
|
|
"logits/chosen": 0.16160011291503906,
|
|
"logits/rejected": 0.08802653849124908,
|
|
"logps/chosen": -55.83079528808594,
|
|
"logps/ref_chosen": -53.816436767578125,
|
|
"logps/ref_rejected": -68.6575698852539,
|
|
"logps/rejected": -71.70807647705078,
|
|
"loss": 1.0571,
|
|
"margin_dpo/margin_mean": 1.0361424684524536,
|
|
"margin_dpo/margin_std": 1.636512041091919,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.47316704459561604,
|
|
"fcm_dpo/beta": 0.9808007478713989,
|
|
"fcm_dpo/delta": -0.0073838010430336,
|
|
"fcm_dpo/margin": 1.0249512195587158,
|
|
"fcm_dpo/q_t": 0.3501819968223572,
|
|
"grad_norm": 224.44908142089844,
|
|
"learning_rate": 3.178919262911314e-07,
|
|
"logits/chosen": 0.19469937682151794,
|
|
"logits/rejected": 0.17493098974227905,
|
|
"logps/chosen": -61.82642364501953,
|
|
"logps/ref_chosen": -59.957359313964844,
|
|
"logps/ref_rejected": -69.31729888916016,
|
|
"logps/rejected": -72.21131896972656,
|
|
"loss": 1.077,
|
|
"margin_dpo/margin_mean": 1.0249509811401367,
|
|
"margin_dpo/margin_std": 1.7490208148956299,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.47467876039304613,
|
|
"fcm_dpo/beta": 0.9450622200965881,
|
|
"fcm_dpo/delta": -0.23715783655643463,
|
|
"fcm_dpo/margin": 1.274531364440918,
|
|
"fcm_dpo/q_t": 0.2915083169937134,
|
|
"grad_norm": 200.15219116210938,
|
|
"learning_rate": 3.166184534225087e-07,
|
|
"logits/chosen": 0.16370022296905518,
|
|
"logits/rejected": 0.17338353395462036,
|
|
"logps/chosen": -72.26028442382812,
|
|
"logps/ref_chosen": -70.26815795898438,
|
|
"logps/ref_rejected": -69.23971557617188,
|
|
"logps/rejected": -72.50637817382812,
|
|
"loss": 0.8243,
|
|
"margin_dpo/margin_mean": 1.2745311260223389,
|
|
"margin_dpo/margin_std": 1.4535917043685913,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.9470343589782715,
|
|
"fcm_dpo/delta": -0.0004953928291797638,
|
|
"fcm_dpo/margin": 1.0559828281402588,
|
|
"fcm_dpo/q_t": 0.328433096408844,
|
|
"grad_norm": 213.31190490722656,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 0.09684689342975616,
|
|
"logits/rejected": 0.060719601809978485,
|
|
"logps/chosen": -69.97232055664062,
|
|
"logps/ref_chosen": -67.79469299316406,
|
|
"logps/ref_rejected": -74.55148315429688,
|
|
"logps/rejected": -77.78509521484375,
|
|
"loss": 0.9555,
|
|
"margin_dpo/margin_mean": 1.0559827089309692,
|
|
"margin_dpo/margin_std": 1.4584524631500244,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.47770219198790626,
|
|
"fcm_dpo/beta": 0.9314021468162537,
|
|
"fcm_dpo/delta": -0.23499611020088196,
|
|
"fcm_dpo/margin": 1.2908413410186768,
|
|
"fcm_dpo/q_t": 0.3175312876701355,
|
|
"grad_norm": 208.80650329589844,
|
|
"learning_rate": 3.1406595297511564e-07,
|
|
"logits/chosen": 0.08339610695838928,
|
|
"logits/rejected": 0.005085200071334839,
|
|
"logps/chosen": -57.207271575927734,
|
|
"logps/ref_chosen": -55.288482666015625,
|
|
"logps/ref_rejected": -96.15723419189453,
|
|
"logps/rejected": -99.36686706542969,
|
|
"loss": 0.9423,
|
|
"margin_dpo/margin_mean": 1.290840983390808,
|
|
"margin_dpo/margin_std": 1.7332630157470703,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.47921390778533635,
|
|
"fcm_dpo/beta": 0.8467363119125366,
|
|
"fcm_dpo/delta": -0.33823323249816895,
|
|
"fcm_dpo/margin": 1.5226337909698486,
|
|
"fcm_dpo/q_t": 0.2754213809967041,
|
|
"grad_norm": 168.14048767089844,
|
|
"learning_rate": 3.1278699679526975e-07,
|
|
"logits/chosen": 0.2033698856830597,
|
|
"logits/rejected": 0.1626172512769699,
|
|
"logps/chosen": -56.44146728515625,
|
|
"logps/ref_chosen": -54.58137512207031,
|
|
"logps/ref_rejected": -72.77232360839844,
|
|
"logps/rejected": -76.15504455566406,
|
|
"loss": 0.7402,
|
|
"margin_dpo/margin_mean": 1.5226335525512695,
|
|
"margin_dpo/margin_std": 1.537990689277649,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.48072562358276644,
|
|
"fcm_dpo/beta": 0.880409836769104,
|
|
"fcm_dpo/delta": 0.30436062812805176,
|
|
"fcm_dpo/margin": 0.8145400285720825,
|
|
"fcm_dpo/q_t": 0.3881235420703888,
|
|
"grad_norm": 241.8609619140625,
|
|
"learning_rate": 3.1150628432815336e-07,
|
|
"logits/chosen": 0.21144279837608337,
|
|
"logits/rejected": 0.1659296602010727,
|
|
"logps/chosen": -55.05265426635742,
|
|
"logps/ref_chosen": -52.88822937011719,
|
|
"logps/ref_rejected": -80.63988494873047,
|
|
"logps/rejected": -83.61885070800781,
|
|
"loss": 1.3193,
|
|
"margin_dpo/margin_mean": 0.8145396709442139,
|
|
"margin_dpo/margin_std": 1.8883013725280762,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.48223733938019653,
|
|
"fcm_dpo/beta": 0.8839104771614075,
|
|
"fcm_dpo/delta": -0.04694606736302376,
|
|
"fcm_dpo/margin": 1.1784477233886719,
|
|
"fcm_dpo/q_t": 0.3256542682647705,
|
|
"grad_norm": 204.67640686035156,
|
|
"learning_rate": 3.1022385139804707e-07,
|
|
"logits/chosen": 0.14550672471523285,
|
|
"logits/rejected": 0.12916123867034912,
|
|
"logps/chosen": -66.23543548583984,
|
|
"logps/ref_chosen": -64.36333465576172,
|
|
"logps/ref_rejected": -79.47296142578125,
|
|
"logps/rejected": -82.52351379394531,
|
|
"loss": 1.0132,
|
|
"margin_dpo/margin_mean": 1.1784476041793823,
|
|
"margin_dpo/margin_std": 1.7645900249481201,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.9131995439529419,
|
|
"fcm_dpo/delta": 0.07204453647136688,
|
|
"fcm_dpo/margin": 1.013108253479004,
|
|
"fcm_dpo/q_t": 0.37524113059043884,
|
|
"grad_norm": 217.01268005371094,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 0.055543892085552216,
|
|
"logits/rejected": 0.023240717127919197,
|
|
"logps/chosen": -51.213958740234375,
|
|
"logps/ref_chosen": -49.558746337890625,
|
|
"logps/ref_rejected": -71.23444366455078,
|
|
"logps/rejected": -73.90277099609375,
|
|
"loss": 1.1825,
|
|
"margin_dpo/margin_mean": 1.0131080150604248,
|
|
"margin_dpo/margin_std": 1.9926589727401733,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4852607709750567,
|
|
"fcm_dpo/beta": 0.8980337381362915,
|
|
"fcm_dpo/delta": 0.061922501772642136,
|
|
"fcm_dpo/margin": 1.0499402284622192,
|
|
"fcm_dpo/q_t": 0.33765465021133423,
|
|
"grad_norm": 189.1380615234375,
|
|
"learning_rate": 3.0765396768561004e-07,
|
|
"logits/chosen": 0.11102212965488434,
|
|
"logits/rejected": 0.09998691082000732,
|
|
"logps/chosen": -54.228790283203125,
|
|
"logps/ref_chosen": -52.08526611328125,
|
|
"logps/ref_rejected": -55.58674621582031,
|
|
"logps/rejected": -58.78020477294922,
|
|
"loss": 1.0637,
|
|
"margin_dpo/margin_mean": 1.0499403476715088,
|
|
"margin_dpo/margin_std": 1.613488793373108,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.48677248677248675,
|
|
"fcm_dpo/beta": 0.8880925178527832,
|
|
"fcm_dpo/delta": -0.23487280309200287,
|
|
"fcm_dpo/margin": 1.3585911989212036,
|
|
"fcm_dpo/q_t": 0.28043854236602783,
|
|
"grad_norm": 176.82778930664062,
|
|
"learning_rate": 3.063665887884511e-07,
|
|
"logits/chosen": 0.17297013103961945,
|
|
"logits/rejected": 0.1158091127872467,
|
|
"logps/chosen": -49.436309814453125,
|
|
"logps/ref_chosen": -47.404109954833984,
|
|
"logps/ref_rejected": -73.4260025024414,
|
|
"logps/rejected": -76.81678771972656,
|
|
"loss": 0.7812,
|
|
"margin_dpo/margin_mean": 1.3585913181304932,
|
|
"margin_dpo/margin_std": 1.383784532546997,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.48828420256991684,
|
|
"fcm_dpo/beta": 0.8809718489646912,
|
|
"fcm_dpo/delta": -0.021233975887298584,
|
|
"fcm_dpo/margin": 1.149742841720581,
|
|
"fcm_dpo/q_t": 0.359465092420578,
|
|
"grad_norm": 248.77261352539062,
|
|
"learning_rate": 3.0507763319663517e-07,
|
|
"logits/chosen": 0.10760500282049179,
|
|
"logits/rejected": 0.054044321179389954,
|
|
"logps/chosen": -72.1104736328125,
|
|
"logps/ref_chosen": -70.00630187988281,
|
|
"logps/ref_rejected": -86.96690368652344,
|
|
"logps/rejected": -90.22081756591797,
|
|
"loss": 1.1742,
|
|
"margin_dpo/margin_mean": 1.1497416496276855,
|
|
"margin_dpo/margin_std": 2.1441869735717773,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.4897959183673469,
|
|
"fcm_dpo/beta": 0.8546582460403442,
|
|
"fcm_dpo/delta": -0.05345672369003296,
|
|
"fcm_dpo/margin": 1.2257628440856934,
|
|
"fcm_dpo/q_t": 0.3085279166698456,
|
|
"grad_norm": 156.60922241210938,
|
|
"learning_rate": 3.0378713696502097e-07,
|
|
"logits/chosen": 0.18619199097156525,
|
|
"logits/rejected": 0.1414175033569336,
|
|
"logps/chosen": -57.747230529785156,
|
|
"logps/ref_chosen": -55.88882064819336,
|
|
"logps/ref_rejected": -75.23088073730469,
|
|
"logps/rejected": -78.31505584716797,
|
|
"loss": 0.8287,
|
|
"margin_dpo/margin_mean": 1.2257624864578247,
|
|
"margin_dpo/margin_std": 1.363228678703308,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.8600409626960754,
|
|
"fcm_dpo/delta": 0.09678801149129868,
|
|
"fcm_dpo/margin": 1.0598913431167603,
|
|
"fcm_dpo/q_t": 0.3425368666648865,
|
|
"grad_norm": 200.6235809326172,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 0.1525503695011139,
|
|
"logits/rejected": 0.1075083315372467,
|
|
"logps/chosen": -66.32670593261719,
|
|
"logps/ref_chosen": -64.14701843261719,
|
|
"logps/ref_rejected": -79.91143798828125,
|
|
"logps/rejected": -83.15100860595703,
|
|
"loss": 0.9779,
|
|
"margin_dpo/margin_mean": 1.059891700744629,
|
|
"margin_dpo/margin_std": 1.5498141050338745,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4928193499622071,
|
|
"fcm_dpo/beta": 0.9292858242988586,
|
|
"fcm_dpo/delta": 0.4350077509880066,
|
|
"fcm_dpo/margin": 0.6357402801513672,
|
|
"fcm_dpo/q_t": 0.40372180938720703,
|
|
"grad_norm": 280.2021179199219,
|
|
"learning_rate": 3.012016670162977e-07,
|
|
"logits/chosen": 0.11779944598674774,
|
|
"logits/rejected": 0.11533387750387192,
|
|
"logps/chosen": -78.00666046142578,
|
|
"logps/ref_chosen": -75.53131103515625,
|
|
"logps/ref_rejected": -76.5898666381836,
|
|
"logps/rejected": -79.70095825195312,
|
|
"loss": 1.3715,
|
|
"margin_dpo/margin_mean": 0.6357403993606567,
|
|
"margin_dpo/margin_std": 1.7088639736175537,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4943310657596372,
|
|
"fcm_dpo/beta": 0.9441518783569336,
|
|
"fcm_dpo/delta": -0.06476020067930222,
|
|
"fcm_dpo/margin": 1.120490312576294,
|
|
"fcm_dpo/q_t": 0.32473599910736084,
|
|
"grad_norm": 212.27267456054688,
|
|
"learning_rate": 2.99906765620341e-07,
|
|
"logits/chosen": 0.09260990470647812,
|
|
"logits/rejected": 0.054583217948675156,
|
|
"logps/chosen": -71.31211853027344,
|
|
"logps/ref_chosen": -69.33717346191406,
|
|
"logps/ref_rejected": -73.37751770019531,
|
|
"logps/rejected": -76.47296142578125,
|
|
"loss": 1.0397,
|
|
"margin_dpo/margin_mean": 1.1204906702041626,
|
|
"margin_dpo/margin_std": 1.7054201364517212,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4958427815570673,
|
|
"fcm_dpo/beta": 0.938388466835022,
|
|
"fcm_dpo/delta": -0.004852544516324997,
|
|
"fcm_dpo/margin": 1.0702922344207764,
|
|
"fcm_dpo/q_t": 0.3356061577796936,
|
|
"grad_norm": 220.39108276367188,
|
|
"learning_rate": 2.9861046822486766e-07,
|
|
"logits/chosen": 0.1284867823123932,
|
|
"logits/rejected": 0.10963472723960876,
|
|
"logps/chosen": -63.444252014160156,
|
|
"logps/ref_chosen": -61.70623016357422,
|
|
"logps/ref_rejected": -83.73808288574219,
|
|
"logps/rejected": -86.54638671875,
|
|
"loss": 0.9952,
|
|
"margin_dpo/margin_mean": 1.0702919960021973,
|
|
"margin_dpo/margin_std": 1.561734914779663,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4973544973544973,
|
|
"fcm_dpo/beta": 0.926159143447876,
|
|
"fcm_dpo/delta": 0.030632048845291138,
|
|
"fcm_dpo/margin": 1.044736385345459,
|
|
"fcm_dpo/q_t": 0.36076274514198303,
|
|
"grad_norm": 272.3174743652344,
|
|
"learning_rate": 2.9731281109010253e-07,
|
|
"logits/chosen": 0.165533185005188,
|
|
"logits/rejected": 0.12572559714317322,
|
|
"logps/chosen": -66.76375579833984,
|
|
"logps/ref_chosen": -64.4984130859375,
|
|
"logps/ref_rejected": -83.6591796875,
|
|
"logps/rejected": -86.96925354003906,
|
|
"loss": 1.169,
|
|
"margin_dpo/margin_mean": 1.0447362661361694,
|
|
"margin_dpo/margin_std": 1.9857655763626099,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.9391987323760986,
|
|
"fcm_dpo/delta": -0.06099821627140045,
|
|
"fcm_dpo/margin": 1.122628927230835,
|
|
"fcm_dpo/q_t": 0.32373300194740295,
|
|
"grad_norm": 196.6431427001953,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 0.13443490862846375,
|
|
"logits/rejected": 0.08146592229604721,
|
|
"logps/chosen": -56.5888671875,
|
|
"logps/ref_chosen": -54.80464172363281,
|
|
"logps/ref_rejected": -75.3194351196289,
|
|
"logps/rejected": -78.22628784179688,
|
|
"loss": 1.0489,
|
|
"margin_dpo/margin_mean": 1.1226279735565186,
|
|
"margin_dpo/margin_std": 1.7134172916412354,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5003779289493575,
|
|
"fcm_dpo/beta": 0.8843910694122314,
|
|
"fcm_dpo/delta": -0.29690316319465637,
|
|
"fcm_dpo/margin": 1.4203097820281982,
|
|
"fcm_dpo/q_t": 0.29872971773147583,
|
|
"grad_norm": 204.7743377685547,
|
|
"learning_rate": 2.947135628327544e-07,
|
|
"logits/chosen": 0.24195344746112823,
|
|
"logits/rejected": 0.2168186902999878,
|
|
"logps/chosen": -61.22477340698242,
|
|
"logps/ref_chosen": -59.242584228515625,
|
|
"logps/ref_rejected": -69.87483215332031,
|
|
"logps/rejected": -73.27733612060547,
|
|
"loss": 0.8837,
|
|
"margin_dpo/margin_mean": 1.4203091859817505,
|
|
"margin_dpo/margin_std": 1.860417127609253,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.5018896447467877,
|
|
"fcm_dpo/beta": 0.8774402141571045,
|
|
"fcm_dpo/delta": -0.07574308663606644,
|
|
"fcm_dpo/margin": 1.2151854038238525,
|
|
"fcm_dpo/q_t": 0.320780485868454,
|
|
"grad_norm": 200.2390899658203,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": 0.1037403866648674,
|
|
"logits/rejected": 0.0657915323972702,
|
|
"logps/chosen": -69.155517578125,
|
|
"logps/ref_chosen": -67.10975646972656,
|
|
"logps/ref_rejected": -77.11839294433594,
|
|
"logps/rejected": -80.37932586669922,
|
|
"loss": 0.9145,
|
|
"margin_dpo/margin_mean": 1.2151854038238525,
|
|
"margin_dpo/margin_std": 1.6321237087249756,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.5034013605442177,
|
|
"fcm_dpo/beta": 0.8272979259490967,
|
|
"fcm_dpo/delta": -0.3091200590133667,
|
|
"fcm_dpo/margin": 1.535266637802124,
|
|
"fcm_dpo/q_t": 0.2927742302417755,
|
|
"grad_norm": 172.9521942138672,
|
|
"learning_rate": 2.921093116725076e-07,
|
|
"logits/chosen": 0.1792515516281128,
|
|
"logits/rejected": 0.12908412516117096,
|
|
"logps/chosen": -60.283103942871094,
|
|
"logps/ref_chosen": -58.381134033203125,
|
|
"logps/ref_rejected": -85.02839660644531,
|
|
"logps/rejected": -88.46562957763672,
|
|
"loss": 0.8145,
|
|
"margin_dpo/margin_mean": 1.5352662801742554,
|
|
"margin_dpo/margin_std": 1.7731688022613525,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.5049130763416477,
|
|
"fcm_dpo/beta": 0.8177670240402222,
|
|
"fcm_dpo/delta": 0.09361538290977478,
|
|
"fcm_dpo/margin": 1.1182286739349365,
|
|
"fcm_dpo/q_t": 0.3452100455760956,
|
|
"grad_norm": 200.41497802734375,
|
|
"learning_rate": 2.9080540104031484e-07,
|
|
"logits/chosen": 0.16784140467643738,
|
|
"logits/rejected": 0.13531029224395752,
|
|
"logps/chosen": -69.00025939941406,
|
|
"logps/ref_chosen": -66.89199829101562,
|
|
"logps/ref_rejected": -91.83695220947266,
|
|
"logps/rejected": -95.06344604492188,
|
|
"loss": 1.063,
|
|
"margin_dpo/margin_mean": 1.118227481842041,
|
|
"margin_dpo/margin_std": 1.7574753761291504,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.8364279866218567,
|
|
"fcm_dpo/delta": -0.04344947636127472,
|
|
"fcm_dpo/margin": 1.23694908618927,
|
|
"fcm_dpo/q_t": 0.32282981276512146,
|
|
"grad_norm": 176.56353759765625,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 0.15401214361190796,
|
|
"logits/rejected": 0.12541456520557404,
|
|
"logps/chosen": -63.48824691772461,
|
|
"logps/ref_chosen": -61.51445770263672,
|
|
"logps/ref_rejected": -75.68916320800781,
|
|
"logps/rejected": -78.89990234375,
|
|
"loss": 0.9844,
|
|
"margin_dpo/margin_mean": 1.2369496822357178,
|
|
"margin_dpo/margin_std": 1.6888550519943237,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"fcm_dpo/beta": 0.8125041127204895,
|
|
"fcm_dpo/delta": 0.06346192955970764,
|
|
"fcm_dpo/margin": 1.1519867181777954,
|
|
"fcm_dpo/q_t": 0.35848677158355713,
|
|
"grad_norm": 223.40049743652344,
|
|
"learning_rate": 2.8819419203668675e-07,
|
|
"logits/chosen": 0.09962709248065948,
|
|
"logits/rejected": 0.0864500105381012,
|
|
"logps/chosen": -71.24321746826172,
|
|
"logps/ref_chosen": -68.85006713867188,
|
|
"logps/ref_rejected": -92.99603271484375,
|
|
"logps/rejected": -96.54116821289062,
|
|
"loss": 1.0724,
|
|
"margin_dpo/margin_mean": 1.151987075805664,
|
|
"margin_dpo/margin_std": 1.9282138347625732,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.509448223733938,
|
|
"fcm_dpo/beta": 0.8386461734771729,
|
|
"fcm_dpo/delta": 0.09305550158023834,
|
|
"fcm_dpo/margin": 1.0915064811706543,
|
|
"fcm_dpo/q_t": 0.3443507254123688,
|
|
"grad_norm": 215.70370483398438,
|
|
"learning_rate": 2.8688696670638053e-07,
|
|
"logits/chosen": 0.08634011447429657,
|
|
"logits/rejected": 0.05762971565127373,
|
|
"logps/chosen": -75.31697845458984,
|
|
"logps/ref_chosen": -73.18783569335938,
|
|
"logps/ref_rejected": -86.89118957519531,
|
|
"logps/rejected": -90.11184692382812,
|
|
"loss": 1.0527,
|
|
"margin_dpo/margin_mean": 1.0915067195892334,
|
|
"margin_dpo/margin_std": 1.7208020687103271,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.5109599395313681,
|
|
"fcm_dpo/beta": 0.8704172372817993,
|
|
"fcm_dpo/delta": 0.1373847872018814,
|
|
"fcm_dpo/margin": 1.0027812719345093,
|
|
"fcm_dpo/q_t": 0.3466625213623047,
|
|
"grad_norm": 219.7271270751953,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 0.11361702531576157,
|
|
"logits/rejected": 0.09130830317735672,
|
|
"logps/chosen": -66.18433380126953,
|
|
"logps/ref_chosen": -63.939613342285156,
|
|
"logps/ref_rejected": -75.34243774414062,
|
|
"logps/rejected": -78.58993530273438,
|
|
"loss": 1.0112,
|
|
"margin_dpo/margin_mean": 1.0027809143066406,
|
|
"margin_dpo/margin_std": 1.4872009754180908,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.5124716553287982,
|
|
"fcm_dpo/beta": 0.8656154274940491,
|
|
"fcm_dpo/delta": 0.016111478209495544,
|
|
"fcm_dpo/margin": 1.137109398841858,
|
|
"fcm_dpo/q_t": 0.3326491713523865,
|
|
"grad_norm": 190.49337768554688,
|
|
"learning_rate": 2.842694572172736e-07,
|
|
"logits/chosen": 0.19113630056381226,
|
|
"logits/rejected": 0.12893235683441162,
|
|
"logps/chosen": -47.86590576171875,
|
|
"logps/ref_chosen": -45.54913330078125,
|
|
"logps/ref_rejected": -67.0482177734375,
|
|
"logps/rejected": -70.50209045410156,
|
|
"loss": 0.958,
|
|
"margin_dpo/margin_mean": 1.137109398841858,
|
|
"margin_dpo/margin_std": 1.5872113704681396,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.8644633889198303,
|
|
"fcm_dpo/delta": -0.16399508714675903,
|
|
"fcm_dpo/margin": 1.3231072425842285,
|
|
"fcm_dpo/q_t": 0.3184114396572113,
|
|
"grad_norm": 165.63905334472656,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 0.14443045854568481,
|
|
"logits/rejected": 0.12298154830932617,
|
|
"logps/chosen": -56.29311752319336,
|
|
"logps/ref_chosen": -54.00564956665039,
|
|
"logps/ref_rejected": -61.314430236816406,
|
|
"logps/rejected": -64.92501068115234,
|
|
"loss": 1.0075,
|
|
"margin_dpo/margin_mean": 1.3231074810028076,
|
|
"margin_dpo/margin_std": 1.933672547340393,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5154950869236583,
|
|
"fcm_dpo/beta": 0.8116443157196045,
|
|
"fcm_dpo/delta": -0.10012944042682648,
|
|
"fcm_dpo/margin": 1.3234137296676636,
|
|
"fcm_dpo/q_t": 0.31874704360961914,
|
|
"grad_norm": 227.253662109375,
|
|
"learning_rate": 2.816481133934373e-07,
|
|
"logits/chosen": 0.1503913700580597,
|
|
"logits/rejected": 0.11632785201072693,
|
|
"logps/chosen": -65.45622253417969,
|
|
"logps/ref_chosen": -63.39509582519531,
|
|
"logps/ref_rejected": -76.20973205566406,
|
|
"logps/rejected": -79.59427642822266,
|
|
"loss": 0.9461,
|
|
"margin_dpo/margin_mean": 1.3234134912490845,
|
|
"margin_dpo/margin_std": 1.674351453781128,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5170068027210885,
|
|
"fcm_dpo/beta": 0.8011665940284729,
|
|
"fcm_dpo/delta": -0.21002721786499023,
|
|
"fcm_dpo/margin": 1.4784516096115112,
|
|
"fcm_dpo/q_t": 0.30981749296188354,
|
|
"grad_norm": 152.3086700439453,
|
|
"learning_rate": 2.8033609524527046e-07,
|
|
"logits/chosen": 0.17057004570960999,
|
|
"logits/rejected": 0.13639067113399506,
|
|
"logps/chosen": -55.32265853881836,
|
|
"logps/ref_chosen": -53.047813415527344,
|
|
"logps/ref_rejected": -68.2854232788086,
|
|
"logps/rejected": -72.03872680664062,
|
|
"loss": 0.8854,
|
|
"margin_dpo/margin_mean": 1.4784512519836426,
|
|
"margin_dpo/margin_std": 1.8578169345855713,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5185185185185185,
|
|
"fcm_dpo/beta": 0.8318637609481812,
|
|
"fcm_dpo/delta": 0.3891153335571289,
|
|
"fcm_dpo/margin": 0.7660273909568787,
|
|
"fcm_dpo/q_t": 0.38361668586730957,
|
|
"grad_norm": 204.89308166503906,
|
|
"learning_rate": 2.7902322853130753e-07,
|
|
"logits/chosen": 0.08965672552585602,
|
|
"logits/rejected": 0.08724290132522583,
|
|
"logps/chosen": -72.62460327148438,
|
|
"logps/ref_chosen": -70.57852935791016,
|
|
"logps/ref_rejected": -84.73873901367188,
|
|
"logps/rejected": -87.55084228515625,
|
|
"loss": 1.2263,
|
|
"margin_dpo/margin_mean": 0.7660267353057861,
|
|
"margin_dpo/margin_std": 1.6297626495361328,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5200302343159486,
|
|
"fcm_dpo/beta": 0.8591570854187012,
|
|
"fcm_dpo/delta": 0.02412712574005127,
|
|
"fcm_dpo/margin": 1.1384611129760742,
|
|
"fcm_dpo/q_t": 0.3305787742137909,
|
|
"grad_norm": 197.9630584716797,
|
|
"learning_rate": 2.7770954997525274e-07,
|
|
"logits/chosen": 0.15758341550827026,
|
|
"logits/rejected": 0.11625611782073975,
|
|
"logps/chosen": -58.18259811401367,
|
|
"logps/ref_chosen": -55.811004638671875,
|
|
"logps/ref_rejected": -84.77637481689453,
|
|
"logps/rejected": -88.28643035888672,
|
|
"loss": 0.982,
|
|
"margin_dpo/margin_mean": 1.138460636138916,
|
|
"margin_dpo/margin_std": 1.6236982345581055,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.8624707460403442,
|
|
"fcm_dpo/delta": 0.05118731036782265,
|
|
"fcm_dpo/margin": 1.1047433614730835,
|
|
"fcm_dpo/q_t": 0.3428131341934204,
|
|
"grad_norm": 181.71697998046875,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 0.2466953694820404,
|
|
"logits/rejected": 0.2128785401582718,
|
|
"logps/chosen": -59.707969665527344,
|
|
"logps/ref_chosen": -57.78609848022461,
|
|
"logps/ref_rejected": -78.91847229003906,
|
|
"logps/rejected": -81.94508361816406,
|
|
"loss": 1.0228,
|
|
"margin_dpo/margin_mean": 1.1047430038452148,
|
|
"margin_dpo/margin_std": 1.6839402914047241,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5230536659108088,
|
|
"fcm_dpo/beta": 0.8550155162811279,
|
|
"fcm_dpo/delta": -0.15863507986068726,
|
|
"fcm_dpo/margin": 1.334245204925537,
|
|
"fcm_dpo/q_t": 0.3107675015926361,
|
|
"grad_norm": 217.05181884765625,
|
|
"learning_rate": 2.7507990434420123e-07,
|
|
"logits/chosen": 0.18394121527671814,
|
|
"logits/rejected": 0.13396984338760376,
|
|
"logps/chosen": -58.29369354248047,
|
|
"logps/ref_chosen": -56.285125732421875,
|
|
"logps/ref_rejected": -91.15303039550781,
|
|
"logps/rejected": -94.495849609375,
|
|
"loss": 0.9038,
|
|
"margin_dpo/margin_mean": 1.334245204925537,
|
|
"margin_dpo/margin_std": 1.7104822397232056,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5245653817082389,
|
|
"fcm_dpo/beta": 0.8462698459625244,
|
|
"fcm_dpo/delta": 0.07013484090566635,
|
|
"fcm_dpo/margin": 1.1055729389190674,
|
|
"fcm_dpo/q_t": 0.34362083673477173,
|
|
"grad_norm": 187.4880828857422,
|
|
"learning_rate": 2.737640108260456e-07,
|
|
"logits/chosen": 0.2319878786802292,
|
|
"logits/rejected": 0.19426073133945465,
|
|
"logps/chosen": -55.76850891113281,
|
|
"logps/ref_chosen": -53.499542236328125,
|
|
"logps/ref_rejected": -72.52565002441406,
|
|
"logps/rejected": -75.90019226074219,
|
|
"loss": 1.0505,
|
|
"margin_dpo/margin_mean": 1.1055727005004883,
|
|
"margin_dpo/margin_std": 1.7748844623565674,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5260770975056689,
|
|
"fcm_dpo/beta": 0.8080967664718628,
|
|
"fcm_dpo/delta": -0.3646644949913025,
|
|
"fcm_dpo/margin": 1.6269646883010864,
|
|
"fcm_dpo/q_t": 0.3286696970462799,
|
|
"grad_norm": 163.6527099609375,
|
|
"learning_rate": 2.724474525774229e-07,
|
|
"logits/chosen": 0.23792192339897156,
|
|
"logits/rejected": 0.21459215879440308,
|
|
"logps/chosen": -52.67319107055664,
|
|
"logps/ref_chosen": -50.78684997558594,
|
|
"logps/ref_rejected": -68.63732147216797,
|
|
"logps/rejected": -72.15061950683594,
|
|
"loss": 0.9542,
|
|
"margin_dpo/margin_mean": 1.6269644498825073,
|
|
"margin_dpo/margin_std": 2.543069362640381,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.527588813303099,
|
|
"fcm_dpo/beta": 0.779322624206543,
|
|
"fcm_dpo/delta": -0.11380349844694138,
|
|
"fcm_dpo/margin": 1.411959171295166,
|
|
"fcm_dpo/q_t": 0.3116574287414551,
|
|
"grad_norm": 173.95448303222656,
|
|
"learning_rate": 2.711302664252973e-07,
|
|
"logits/chosen": 0.16869422793388367,
|
|
"logits/rejected": 0.10714869201183319,
|
|
"logps/chosen": -55.361000061035156,
|
|
"logps/ref_chosen": -53.325008392333984,
|
|
"logps/ref_rejected": -83.21236419677734,
|
|
"logps/rejected": -86.66030883789062,
|
|
"loss": 0.9061,
|
|
"margin_dpo/margin_mean": 1.4119596481323242,
|
|
"margin_dpo/margin_std": 1.8286110162734985,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.7421770095825195,
|
|
"fcm_dpo/delta": -0.2771596610546112,
|
|
"fcm_dpo/margin": 1.6710288524627686,
|
|
"fcm_dpo/q_t": 0.29638049006462097,
|
|
"grad_norm": 182.74623107910156,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 0.1460535228252411,
|
|
"logits/rejected": 0.09140360355377197,
|
|
"logps/chosen": -63.824440002441406,
|
|
"logps/ref_chosen": -61.625770568847656,
|
|
"logps/ref_rejected": -87.63627624511719,
|
|
"logps/rejected": -91.50596618652344,
|
|
"loss": 0.8304,
|
|
"margin_dpo/margin_mean": 1.6710278987884521,
|
|
"margin_dpo/margin_std": 1.9888241291046143,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5306122448979592,
|
|
"fcm_dpo/beta": 0.7518589496612549,
|
|
"fcm_dpo/delta": 0.1678137630224228,
|
|
"fcm_dpo/margin": 1.1261451244354248,
|
|
"fcm_dpo/q_t": 0.33188802003860474,
|
|
"grad_norm": 165.69610595703125,
|
|
"learning_rate": 2.6849415780518357e-07,
|
|
"logits/chosen": 0.09675121307373047,
|
|
"logits/rejected": 0.0414729006588459,
|
|
"logps/chosen": -58.45286178588867,
|
|
"logps/ref_chosen": -56.2563362121582,
|
|
"logps/ref_rejected": -79.11589813232422,
|
|
"logps/rejected": -82.43856811523438,
|
|
"loss": 1.0476,
|
|
"margin_dpo/margin_mean": 1.1261451244354248,
|
|
"margin_dpo/margin_std": 1.6900222301483154,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5321239606953893,
|
|
"fcm_dpo/beta": 0.7760653495788574,
|
|
"fcm_dpo/delta": 0.16433821618556976,
|
|
"fcm_dpo/margin": 1.0951128005981445,
|
|
"fcm_dpo/q_t": 0.35313987731933594,
|
|
"grad_norm": 193.21861267089844,
|
|
"learning_rate": 2.6717530907482024e-07,
|
|
"logits/chosen": 0.16212013363838196,
|
|
"logits/rejected": 0.12353149801492691,
|
|
"logps/chosen": -65.12254333496094,
|
|
"logps/ref_chosen": -63.05195236206055,
|
|
"logps/ref_rejected": -85.52035522460938,
|
|
"logps/rejected": -88.68605041503906,
|
|
"loss": 0.9931,
|
|
"margin_dpo/margin_mean": 1.0951130390167236,
|
|
"margin_dpo/margin_std": 1.6728229522705078,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5336356764928194,
|
|
"fcm_dpo/beta": 0.7995505332946777,
|
|
"fcm_dpo/delta": 0.018024399876594543,
|
|
"fcm_dpo/margin": 1.2259821891784668,
|
|
"fcm_dpo/q_t": 0.33103376626968384,
|
|
"grad_norm": 166.7522430419922,
|
|
"learning_rate": 2.658559799141411e-07,
|
|
"logits/chosen": 0.1692410409450531,
|
|
"logits/rejected": 0.16683810949325562,
|
|
"logps/chosen": -71.08948516845703,
|
|
"logps/ref_chosen": -69.00918579101562,
|
|
"logps/ref_rejected": -72.65840148925781,
|
|
"logps/rejected": -75.96468353271484,
|
|
"loss": 1.0033,
|
|
"margin_dpo/margin_mean": 1.2259814739227295,
|
|
"margin_dpo/margin_std": 1.810058832168579,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5351473922902494,
|
|
"fcm_dpo/beta": 0.7726951837539673,
|
|
"fcm_dpo/delta": -0.05429168790578842,
|
|
"fcm_dpo/margin": 1.3514502048492432,
|
|
"fcm_dpo/q_t": 0.319754034280777,
|
|
"grad_norm": 218.87094116210938,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"logits/chosen": 0.20377589762210846,
|
|
"logits/rejected": 0.10091987252235413,
|
|
"logps/chosen": -41.94265365600586,
|
|
"logps/ref_chosen": -39.78833770751953,
|
|
"logps/ref_rejected": -69.56885528564453,
|
|
"logps/rejected": -73.07461547851562,
|
|
"loss": 0.93,
|
|
"margin_dpo/margin_mean": 1.3514502048492432,
|
|
"margin_dpo/margin_std": 1.7668390274047852,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.7757099866867065,
|
|
"fcm_dpo/delta": -0.04570357874035835,
|
|
"fcm_dpo/margin": 1.3419021368026733,
|
|
"fcm_dpo/q_t": 0.32356125116348267,
|
|
"grad_norm": 184.73477172851562,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 0.14926283061504364,
|
|
"logits/rejected": 0.0633564367890358,
|
|
"logps/chosen": -48.385475158691406,
|
|
"logps/ref_chosen": -46.25537872314453,
|
|
"logps/ref_rejected": -78.20236206054688,
|
|
"logps/rejected": -81.67436218261719,
|
|
"loss": 0.9503,
|
|
"margin_dpo/margin_mean": 1.3419021368026733,
|
|
"margin_dpo/margin_std": 1.8027377128601074,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5381708238851096,
|
|
"fcm_dpo/beta": 0.7802422046661377,
|
|
"fcm_dpo/delta": 0.09977808594703674,
|
|
"fcm_dpo/margin": 1.165019154548645,
|
|
"fcm_dpo/q_t": 0.34947988390922546,
|
|
"grad_norm": 180.77276611328125,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 0.15579620003700256,
|
|
"logits/rejected": 0.1042039543390274,
|
|
"logps/chosen": -50.003562927246094,
|
|
"logps/ref_chosen": -47.906158447265625,
|
|
"logps/ref_rejected": -74.29397583007812,
|
|
"logps/rejected": -77.556396484375,
|
|
"loss": 1.1606,
|
|
"margin_dpo/margin_mean": 1.1650193929672241,
|
|
"margin_dpo/margin_std": 2.1200852394104004,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"fcm_dpo/beta": 0.7977977991104126,
|
|
"fcm_dpo/delta": 0.14315135776996613,
|
|
"fcm_dpo/margin": 1.0884279012680054,
|
|
"fcm_dpo/q_t": 0.3529171347618103,
|
|
"grad_norm": 215.51937866210938,
|
|
"learning_rate": 2.6057459723762076e-07,
|
|
"logits/chosen": 0.16426903009414673,
|
|
"logits/rejected": 0.13840454816818237,
|
|
"logps/chosen": -64.85040283203125,
|
|
"logps/ref_chosen": -62.63500213623047,
|
|
"logps/ref_rejected": -65.11399841308594,
|
|
"logps/rejected": -68.41783142089844,
|
|
"loss": 1.116,
|
|
"margin_dpo/margin_mean": 1.0884283781051636,
|
|
"margin_dpo/margin_std": 1.8702547550201416,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5411942554799698,
|
|
"fcm_dpo/beta": 0.7857924103736877,
|
|
"fcm_dpo/delta": -0.2809675335884094,
|
|
"fcm_dpo/margin": 1.586600661277771,
|
|
"fcm_dpo/q_t": 0.2850938141345978,
|
|
"grad_norm": 148.0938720703125,
|
|
"learning_rate": 2.5925341972508954e-07,
|
|
"logits/chosen": 0.1491168886423111,
|
|
"logits/rejected": 0.14593584835529327,
|
|
"logps/chosen": -69.29679107666016,
|
|
"logps/ref_chosen": -67.20960998535156,
|
|
"logps/ref_rejected": -69.34715270996094,
|
|
"logps/rejected": -73.02093505859375,
|
|
"loss": 0.798,
|
|
"margin_dpo/margin_mean": 1.586600661277771,
|
|
"margin_dpo/margin_std": 1.7209175825119019,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.5427059712773998,
|
|
"fcm_dpo/beta": 0.7968997955322266,
|
|
"fcm_dpo/delta": 0.10445237159729004,
|
|
"fcm_dpo/margin": 1.1224002838134766,
|
|
"fcm_dpo/q_t": 0.34582918882369995,
|
|
"grad_norm": 201.40667724609375,
|
|
"learning_rate": 2.579319833745169e-07,
|
|
"logits/chosen": 0.14725103974342346,
|
|
"logits/rejected": 0.12238387763500214,
|
|
"logps/chosen": -64.69267272949219,
|
|
"logps/ref_chosen": -62.52578353881836,
|
|
"logps/ref_rejected": -76.63114929199219,
|
|
"logps/rejected": -79.9204330444336,
|
|
"loss": 1.0804,
|
|
"margin_dpo/margin_mean": 1.1224000453948975,
|
|
"margin_dpo/margin_std": 1.7543888092041016,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.7883453369140625,
|
|
"fcm_dpo/delta": 0.023167556151747704,
|
|
"fcm_dpo/margin": 1.241877794265747,
|
|
"fcm_dpo/q_t": 0.3393666446208954,
|
|
"grad_norm": 213.6732635498047,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 0.11307230591773987,
|
|
"logits/rejected": 0.0418628454208374,
|
|
"logps/chosen": -65.72219848632812,
|
|
"logps/ref_chosen": -63.48772048950195,
|
|
"logps/ref_rejected": -90.6891098022461,
|
|
"logps/rejected": -94.16546630859375,
|
|
"loss": 1.0189,
|
|
"margin_dpo/margin_mean": 1.2418781518936157,
|
|
"margin_dpo/margin_std": 1.9586660861968994,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.54572940287226,
|
|
"fcm_dpo/beta": 0.7847793698310852,
|
|
"fcm_dpo/delta": -0.006262313574552536,
|
|
"fcm_dpo/margin": 1.2811236381530762,
|
|
"fcm_dpo/q_t": 0.32984602451324463,
|
|
"grad_norm": 186.03604125976562,
|
|
"learning_rate": 2.552884820191154e-07,
|
|
"logits/chosen": 0.21646341681480408,
|
|
"logits/rejected": 0.17365378141403198,
|
|
"logps/chosen": -60.005924224853516,
|
|
"logps/ref_chosen": -57.917144775390625,
|
|
"logps/ref_rejected": -72.39089965820312,
|
|
"logps/rejected": -75.76080322265625,
|
|
"loss": 0.9667,
|
|
"margin_dpo/margin_mean": 1.2811236381530762,
|
|
"margin_dpo/margin_std": 1.8610559701919556,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.54724111866969,
|
|
"fcm_dpo/beta": 0.7910502552986145,
|
|
"fcm_dpo/delta": -0.12872016429901123,
|
|
"fcm_dpo/margin": 1.3991649150848389,
|
|
"fcm_dpo/q_t": 0.3150370121002197,
|
|
"grad_norm": 179.85292053222656,
|
|
"learning_rate": 2.53966490958702e-07,
|
|
"logits/chosen": 0.20901912450790405,
|
|
"logits/rejected": 0.14084625244140625,
|
|
"logps/chosen": -65.4914321899414,
|
|
"logps/ref_chosen": -63.4434700012207,
|
|
"logps/ref_rejected": -103.45516967773438,
|
|
"logps/rejected": -106.90229797363281,
|
|
"loss": 0.8868,
|
|
"margin_dpo/margin_mean": 1.3991665840148926,
|
|
"margin_dpo/margin_std": 1.6471703052520752,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5487528344671202,
|
|
"fcm_dpo/beta": 0.7514413595199585,
|
|
"fcm_dpo/delta": -0.15377500653266907,
|
|
"fcm_dpo/margin": 1.512410044670105,
|
|
"fcm_dpo/q_t": 0.3079715967178345,
|
|
"grad_norm": 172.7432403564453,
|
|
"learning_rate": 2.526443889470099e-07,
|
|
"logits/chosen": 0.18472757935523987,
|
|
"logits/rejected": 0.09980542212724686,
|
|
"logps/chosen": -51.084228515625,
|
|
"logps/ref_chosen": -48.65182876586914,
|
|
"logps/ref_rejected": -88.65904235839844,
|
|
"logps/rejected": -92.60385131835938,
|
|
"loss": 0.9557,
|
|
"margin_dpo/margin_mean": 1.512410044670105,
|
|
"margin_dpo/margin_std": 2.1208112239837646,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5502645502645502,
|
|
"fcm_dpo/beta": 0.7271950840950012,
|
|
"fcm_dpo/delta": -0.1419481635093689,
|
|
"fcm_dpo/margin": 1.5479559898376465,
|
|
"fcm_dpo/q_t": 0.31253015995025635,
|
|
"grad_norm": 143.4964599609375,
|
|
"learning_rate": 2.513222129660744e-07,
|
|
"logits/chosen": 0.09488549828529358,
|
|
"logits/rejected": 0.020679466426372528,
|
|
"logps/chosen": -59.90724563598633,
|
|
"logps/ref_chosen": -57.87107467651367,
|
|
"logps/ref_rejected": -80.95503234863281,
|
|
"logps/rejected": -84.53915405273438,
|
|
"loss": 0.9253,
|
|
"margin_dpo/margin_mean": 1.5479564666748047,
|
|
"margin_dpo/margin_std": 2.0348784923553467,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.7135534882545471,
|
|
"fcm_dpo/delta": -0.0528385192155838,
|
|
"fcm_dpo/margin": 1.4665672779083252,
|
|
"fcm_dpo/q_t": 0.30548185110092163,
|
|
"grad_norm": 147.20799255371094,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.19886715710163116,
|
|
"logits/rejected": 0.18875735998153687,
|
|
"logps/chosen": -66.82514190673828,
|
|
"logps/ref_chosen": -64.94217681884766,
|
|
"logps/ref_rejected": -74.8599853515625,
|
|
"logps/rejected": -78.20951843261719,
|
|
"loss": 0.868,
|
|
"margin_dpo/margin_mean": 1.4665677547454834,
|
|
"margin_dpo/margin_std": 1.6899524927139282,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5532879818594104,
|
|
"fcm_dpo/beta": 0.749003529548645,
|
|
"fcm_dpo/delta": 0.20677639544010162,
|
|
"fcm_dpo/margin": 1.071431279182434,
|
|
"fcm_dpo/q_t": 0.36068403720855713,
|
|
"grad_norm": 177.8300323486328,
|
|
"learning_rate": 2.486777870339255e-07,
|
|
"logits/chosen": 0.1121918261051178,
|
|
"logits/rejected": 0.09326402097940445,
|
|
"logps/chosen": -56.96438217163086,
|
|
"logps/ref_chosen": -55.16598129272461,
|
|
"logps/ref_rejected": -65.26121520996094,
|
|
"logps/rejected": -68.13104248046875,
|
|
"loss": 1.1563,
|
|
"margin_dpo/margin_mean": 1.0714313983917236,
|
|
"margin_dpo/margin_std": 1.9580434560775757,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5547996976568406,
|
|
"fcm_dpo/beta": 0.7712104320526123,
|
|
"fcm_dpo/delta": 0.2716788947582245,
|
|
"fcm_dpo/margin": 0.9718486070632935,
|
|
"fcm_dpo/q_t": 0.3597671687602997,
|
|
"grad_norm": 178.6884002685547,
|
|
"learning_rate": 2.4735561105299014e-07,
|
|
"logits/chosen": 0.1118677407503128,
|
|
"logits/rejected": 0.03625689074397087,
|
|
"logps/chosen": -58.22844696044922,
|
|
"logps/ref_chosen": -56.01046371459961,
|
|
"logps/ref_rejected": -77.31010437011719,
|
|
"logps/rejected": -80.49993896484375,
|
|
"loss": 1.0665,
|
|
"margin_dpo/margin_mean": 0.971848726272583,
|
|
"margin_dpo/margin_std": 1.5560146570205688,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.5563114134542706,
|
|
"fcm_dpo/beta": 0.8040578365325928,
|
|
"fcm_dpo/delta": 0.1679600328207016,
|
|
"fcm_dpo/margin": 1.0528676509857178,
|
|
"fcm_dpo/q_t": 0.3611965775489807,
|
|
"grad_norm": 214.42361450195312,
|
|
"learning_rate": 2.46033509041298e-07,
|
|
"logits/chosen": 0.06508797407150269,
|
|
"logits/rejected": 0.056503720581531525,
|
|
"logps/chosen": -76.96674346923828,
|
|
"logps/ref_chosen": -74.82927703857422,
|
|
"logps/ref_rejected": -76.11680603027344,
|
|
"logps/rejected": -79.30712890625,
|
|
"loss": 1.1432,
|
|
"margin_dpo/margin_mean": 1.052868127822876,
|
|
"margin_dpo/margin_std": 1.877270221710205,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5578231292517006,
|
|
"fcm_dpo/beta": 0.83278489112854,
|
|
"fcm_dpo/delta": 0.09441757202148438,
|
|
"fcm_dpo/margin": 1.095184564590454,
|
|
"fcm_dpo/q_t": 0.3382675051689148,
|
|
"grad_norm": 172.21173095703125,
|
|
"learning_rate": 2.447115179808846e-07,
|
|
"logits/chosen": 0.122508205473423,
|
|
"logits/rejected": 0.09032686054706573,
|
|
"logps/chosen": -60.496971130371094,
|
|
"logps/ref_chosen": -58.32621765136719,
|
|
"logps/ref_rejected": -80.92183685302734,
|
|
"logps/rejected": -84.18777465820312,
|
|
"loss": 1.0485,
|
|
"margin_dpo/margin_mean": 1.0951846837997437,
|
|
"margin_dpo/margin_std": 1.6767044067382812,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.8102331757545471,
|
|
"fcm_dpo/delta": -0.10076209902763367,
|
|
"fcm_dpo/margin": 1.342164397239685,
|
|
"fcm_dpo/q_t": 0.3211871385574341,
|
|
"grad_norm": 181.2273406982422,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 0.24069687724113464,
|
|
"logits/rejected": 0.18847504258155823,
|
|
"logps/chosen": -55.11189270019531,
|
|
"logps/ref_chosen": -52.88372039794922,
|
|
"logps/ref_rejected": -79.43692016601562,
|
|
"logps/rejected": -83.00725555419922,
|
|
"loss": 1.0596,
|
|
"margin_dpo/margin_mean": 1.3421647548675537,
|
|
"margin_dpo/margin_std": 2.065488576889038,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5608465608465608,
|
|
"fcm_dpo/beta": 0.8171911239624023,
|
|
"fcm_dpo/delta": -0.08116129040718079,
|
|
"fcm_dpo/margin": 1.3074921369552612,
|
|
"fcm_dpo/q_t": 0.3368060290813446,
|
|
"grad_norm": 183.6264190673828,
|
|
"learning_rate": 2.420680166254831e-07,
|
|
"logits/chosen": 0.2111239731311798,
|
|
"logits/rejected": 0.18440525233745575,
|
|
"logps/chosen": -51.46292495727539,
|
|
"logps/ref_chosen": -49.224212646484375,
|
|
"logps/ref_rejected": -63.348472595214844,
|
|
"logps/rejected": -66.8946762084961,
|
|
"loss": 1.0896,
|
|
"margin_dpo/margin_mean": 1.3074921369552612,
|
|
"margin_dpo/margin_std": 2.0974409580230713,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.562358276643991,
|
|
"fcm_dpo/beta": 0.8467217683792114,
|
|
"fcm_dpo/delta": 0.2932736575603485,
|
|
"fcm_dpo/margin": 0.851443350315094,
|
|
"fcm_dpo/q_t": 0.3849431276321411,
|
|
"grad_norm": 214.92108154296875,
|
|
"learning_rate": 2.4074658027491044e-07,
|
|
"logits/chosen": 0.18400293588638306,
|
|
"logits/rejected": 0.12166933715343475,
|
|
"logps/chosen": -54.62636184692383,
|
|
"logps/ref_chosen": -52.269554138183594,
|
|
"logps/ref_rejected": -72.99522399902344,
|
|
"logps/rejected": -76.20347595214844,
|
|
"loss": 1.3565,
|
|
"margin_dpo/margin_mean": 0.8514436483383179,
|
|
"margin_dpo/margin_std": 2.075626850128174,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.563869992441421,
|
|
"fcm_dpo/beta": 0.8382232189178467,
|
|
"fcm_dpo/delta": -0.05690415948629379,
|
|
"fcm_dpo/margin": 1.2532103061676025,
|
|
"fcm_dpo/q_t": 0.3378145694732666,
|
|
"grad_norm": 255.2552947998047,
|
|
"learning_rate": 2.394254027623792e-07,
|
|
"logits/chosen": 0.19523033499717712,
|
|
"logits/rejected": 0.14481961727142334,
|
|
"logps/chosen": -63.56736755371094,
|
|
"logps/ref_chosen": -61.112998962402344,
|
|
"logps/ref_rejected": -76.24851989746094,
|
|
"logps/rejected": -79.95610046386719,
|
|
"loss": 1.0989,
|
|
"margin_dpo/margin_mean": 1.2532098293304443,
|
|
"margin_dpo/margin_std": 2.067584991455078,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5653817082388511,
|
|
"fcm_dpo/beta": 0.788150429725647,
|
|
"fcm_dpo/delta": -0.36466753482818604,
|
|
"fcm_dpo/margin": 1.6652493476867676,
|
|
"fcm_dpo/q_t": 0.2760714292526245,
|
|
"grad_norm": 186.27012634277344,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 0.12782002985477448,
|
|
"logits/rejected": 0.11331016570329666,
|
|
"logps/chosen": -74.75846099853516,
|
|
"logps/ref_chosen": -72.66920471191406,
|
|
"logps/ref_rejected": -76.83158874511719,
|
|
"logps/rejected": -80.58609771728516,
|
|
"loss": 0.7647,
|
|
"margin_dpo/margin_mean": 1.6652493476867676,
|
|
"margin_dpo/margin_std": 1.7359645366668701,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.8175476789474487,
|
|
"fcm_dpo/delta": 0.3215762972831726,
|
|
"fcm_dpo/margin": 0.8576102256774902,
|
|
"fcm_dpo/q_t": 0.37826499342918396,
|
|
"grad_norm": 234.281494140625,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 0.1810404360294342,
|
|
"logits/rejected": 0.1419374942779541,
|
|
"logps/chosen": -59.804378509521484,
|
|
"logps/ref_chosen": -57.68330383300781,
|
|
"logps/ref_rejected": -79.34097290039062,
|
|
"logps/rejected": -82.31965637207031,
|
|
"loss": 1.1949,
|
|
"margin_dpo/margin_mean": 0.8576098680496216,
|
|
"margin_dpo/margin_std": 1.7258224487304688,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5684051398337112,
|
|
"fcm_dpo/beta": 0.7721197605133057,
|
|
"fcm_dpo/delta": -0.40702202916145325,
|
|
"fcm_dpo/margin": 1.7395694255828857,
|
|
"fcm_dpo/q_t": 0.28671878576278687,
|
|
"grad_norm": 153.21202087402344,
|
|
"learning_rate": 2.3546379277238103e-07,
|
|
"logits/chosen": 0.19977155327796936,
|
|
"logits/rejected": 0.15468192100524902,
|
|
"logps/chosen": -53.76110076904297,
|
|
"logps/ref_chosen": -51.674072265625,
|
|
"logps/ref_rejected": -75.69713592529297,
|
|
"logps/rejected": -79.52372741699219,
|
|
"loss": 0.8446,
|
|
"margin_dpo/margin_mean": 1.739569067955017,
|
|
"margin_dpo/margin_std": 2.094245433807373,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.5699168556311414,
|
|
"fcm_dpo/beta": 0.7831248641014099,
|
|
"fcm_dpo/delta": 0.08697693049907684,
|
|
"fcm_dpo/margin": 1.1734894514083862,
|
|
"fcm_dpo/q_t": 0.3411799371242523,
|
|
"grad_norm": 162.50051879882812,
|
|
"learning_rate": 2.3414402008585886e-07,
|
|
"logits/chosen": 0.11727416515350342,
|
|
"logits/rejected": 0.09625902771949768,
|
|
"logps/chosen": -48.64626693725586,
|
|
"logps/ref_chosen": -46.17853546142578,
|
|
"logps/ref_rejected": -57.756500244140625,
|
|
"logps/rejected": -61.39772415161133,
|
|
"loss": 1.0109,
|
|
"margin_dpo/margin_mean": 1.1734893321990967,
|
|
"margin_dpo/margin_std": 1.7293052673339844,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"fcm_dpo/beta": 0.8013092875480652,
|
|
"fcm_dpo/delta": 0.08626553416252136,
|
|
"fcm_dpo/margin": 1.1440807580947876,
|
|
"fcm_dpo/q_t": 0.35015422105789185,
|
|
"grad_norm": 186.11314392089844,
|
|
"learning_rate": 2.3282469092517977e-07,
|
|
"logits/chosen": 0.19343531131744385,
|
|
"logits/rejected": 0.15595267713069916,
|
|
"logps/chosen": -61.52544021606445,
|
|
"logps/ref_chosen": -59.21887969970703,
|
|
"logps/ref_rejected": -71.24818420410156,
|
|
"logps/rejected": -74.69883728027344,
|
|
"loss": 1.045,
|
|
"margin_dpo/margin_mean": 1.1440809965133667,
|
|
"margin_dpo/margin_std": 1.8243110179901123,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5729402872260015,
|
|
"fcm_dpo/beta": 0.7728543281555176,
|
|
"fcm_dpo/delta": -0.18132196366786957,
|
|
"fcm_dpo/margin": 1.5013034343719482,
|
|
"fcm_dpo/q_t": 0.3048959970474243,
|
|
"grad_norm": 197.8959197998047,
|
|
"learning_rate": 2.3150584219481643e-07,
|
|
"logits/chosen": 0.21861502528190613,
|
|
"logits/rejected": 0.1745017170906067,
|
|
"logps/chosen": -78.35917663574219,
|
|
"logps/ref_chosen": -76.31658935546875,
|
|
"logps/ref_rejected": -104.26200103759766,
|
|
"logps/rejected": -107.80589294433594,
|
|
"loss": 0.865,
|
|
"margin_dpo/margin_mean": 1.501303791999817,
|
|
"margin_dpo/margin_std": 1.88877534866333,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.732434093952179,
|
|
"fcm_dpo/delta": -0.3159186840057373,
|
|
"fcm_dpo/margin": 1.7422823905944824,
|
|
"fcm_dpo/q_t": 0.2831631302833557,
|
|
"grad_norm": 152.97654724121094,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 0.18231691420078278,
|
|
"logits/rejected": 0.15374861657619476,
|
|
"logps/chosen": -63.16365051269531,
|
|
"logps/ref_chosen": -61.283164978027344,
|
|
"logps/ref_rejected": -72.38892364501953,
|
|
"logps/rejected": -76.01168823242188,
|
|
"loss": 0.8666,
|
|
"margin_dpo/margin_mean": 1.7422822713851929,
|
|
"margin_dpo/margin_std": 2.142005443572998,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5759637188208617,
|
|
"fcm_dpo/beta": 0.7804316282272339,
|
|
"fcm_dpo/delta": 0.42041903734207153,
|
|
"fcm_dpo/margin": 0.7539185285568237,
|
|
"fcm_dpo/q_t": 0.3955872654914856,
|
|
"grad_norm": 201.4615936279297,
|
|
"learning_rate": 2.288697335747027e-07,
|
|
"logits/chosen": 0.12479900568723679,
|
|
"logits/rejected": 0.10375261306762695,
|
|
"logps/chosen": -60.67311096191406,
|
|
"logps/ref_chosen": -58.2139892578125,
|
|
"logps/ref_rejected": -60.78669357299805,
|
|
"logps/rejected": -63.999732971191406,
|
|
"loss": 1.2184,
|
|
"margin_dpo/margin_mean": 0.7539188861846924,
|
|
"margin_dpo/margin_std": 1.6283390522003174,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5774754346182918,
|
|
"fcm_dpo/beta": 0.8213146924972534,
|
|
"fcm_dpo/delta": 0.16648587584495544,
|
|
"fcm_dpo/margin": 1.0126454830169678,
|
|
"fcm_dpo/q_t": 0.3555706739425659,
|
|
"grad_norm": 187.99722290039062,
|
|
"learning_rate": 2.2755254742257706e-07,
|
|
"logits/chosen": 0.1935308575630188,
|
|
"logits/rejected": 0.1620044708251953,
|
|
"logps/chosen": -64.31198120117188,
|
|
"logps/ref_chosen": -61.82532501220703,
|
|
"logps/ref_rejected": -83.0452880859375,
|
|
"logps/rejected": -86.54458618164062,
|
|
"loss": 1.0464,
|
|
"margin_dpo/margin_mean": 1.0126454830169678,
|
|
"margin_dpo/margin_std": 1.5206871032714844,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5789871504157218,
|
|
"fcm_dpo/beta": 0.7823382616043091,
|
|
"fcm_dpo/delta": -0.15291021764278412,
|
|
"fcm_dpo/margin": 1.4475116729736328,
|
|
"fcm_dpo/q_t": 0.3087931275367737,
|
|
"grad_norm": 219.67236328125,
|
|
"learning_rate": 2.2623598917395436e-07,
|
|
"logits/chosen": 0.10844056308269501,
|
|
"logits/rejected": 0.11368384212255478,
|
|
"logps/chosen": -82.67160034179688,
|
|
"logps/ref_chosen": -80.56326293945312,
|
|
"logps/ref_rejected": -74.62922668457031,
|
|
"logps/rejected": -78.18507385253906,
|
|
"loss": 0.9182,
|
|
"margin_dpo/margin_mean": 1.4475116729736328,
|
|
"margin_dpo/margin_std": 1.882810354232788,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5804988662131519,
|
|
"fcm_dpo/beta": 0.8081178665161133,
|
|
"fcm_dpo/delta": 0.1466008424758911,
|
|
"fcm_dpo/margin": 1.0682569742202759,
|
|
"fcm_dpo/q_t": 0.3386213779449463,
|
|
"grad_norm": 210.8586883544922,
|
|
"learning_rate": 2.2492009565579875e-07,
|
|
"logits/chosen": 0.15908417105674744,
|
|
"logits/rejected": 0.1250711977481842,
|
|
"logps/chosen": -68.10568237304688,
|
|
"logps/ref_chosen": -65.47514343261719,
|
|
"logps/ref_rejected": -79.67378234863281,
|
|
"logps/rejected": -83.37257385253906,
|
|
"loss": 1.014,
|
|
"margin_dpo/margin_mean": 1.0682566165924072,
|
|
"margin_dpo/margin_std": 1.5980737209320068,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.8039337396621704,
|
|
"fcm_dpo/delta": -0.06407226622104645,
|
|
"fcm_dpo/margin": 1.3148212432861328,
|
|
"fcm_dpo/q_t": 0.3122956156730652,
|
|
"grad_norm": 201.8307647705078,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 0.14007516205310822,
|
|
"logits/rejected": 0.11297205090522766,
|
|
"logps/chosen": -68.39266967773438,
|
|
"logps/ref_chosen": -66.0565185546875,
|
|
"logps/ref_rejected": -86.68023681640625,
|
|
"logps/rejected": -90.33121490478516,
|
|
"loss": 0.9181,
|
|
"margin_dpo/margin_mean": 1.3148208856582642,
|
|
"margin_dpo/margin_std": 1.7250840663909912,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5835222978080121,
|
|
"fcm_dpo/beta": 0.7948161363601685,
|
|
"fcm_dpo/delta": 0.02499794214963913,
|
|
"fcm_dpo/margin": 1.229053020477295,
|
|
"fcm_dpo/q_t": 0.3376579284667969,
|
|
"grad_norm": 190.71925354003906,
|
|
"learning_rate": 2.2229045002474724e-07,
|
|
"logits/chosen": 0.1209828183054924,
|
|
"logits/rejected": 0.0802159532904625,
|
|
"logps/chosen": -78.08750915527344,
|
|
"logps/ref_chosen": -75.6236572265625,
|
|
"logps/ref_rejected": -92.62330627441406,
|
|
"logps/rejected": -96.31620788574219,
|
|
"loss": 1.0068,
|
|
"margin_dpo/margin_mean": 1.229053258895874,
|
|
"margin_dpo/margin_std": 1.8680897951126099,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5850340136054422,
|
|
"fcm_dpo/beta": 0.7716137170791626,
|
|
"fcm_dpo/delta": -0.2335832417011261,
|
|
"fcm_dpo/margin": 1.5613083839416504,
|
|
"fcm_dpo/q_t": 0.27384334802627563,
|
|
"grad_norm": 133.3782501220703,
|
|
"learning_rate": 2.209767714686924e-07,
|
|
"logits/chosen": 0.15220069885253906,
|
|
"logits/rejected": 0.08792141824960709,
|
|
"logps/chosen": -49.354488372802734,
|
|
"logps/ref_chosen": -47.22170639038086,
|
|
"logps/ref_rejected": -87.338134765625,
|
|
"logps/rejected": -91.03223419189453,
|
|
"loss": 0.7497,
|
|
"margin_dpo/margin_mean": 1.5613081455230713,
|
|
"margin_dpo/margin_std": 1.4777767658233643,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5865457294028723,
|
|
"fcm_dpo/beta": 0.763427734375,
|
|
"fcm_dpo/delta": 0.034832365810871124,
|
|
"fcm_dpo/margin": 1.267984390258789,
|
|
"fcm_dpo/q_t": 0.3503814935684204,
|
|
"grad_norm": 178.84954833984375,
|
|
"learning_rate": 2.1966390475472954e-07,
|
|
"logits/chosen": 0.16702687740325928,
|
|
"logits/rejected": 0.1540418267250061,
|
|
"logps/chosen": -76.91740417480469,
|
|
"logps/ref_chosen": -74.5794677734375,
|
|
"logps/ref_rejected": -79.92558288574219,
|
|
"logps/rejected": -83.531494140625,
|
|
"loss": 1.0483,
|
|
"margin_dpo/margin_mean": 1.2679840326309204,
|
|
"margin_dpo/margin_std": 2.043673515319824,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5880574452003023,
|
|
"fcm_dpo/beta": 0.7542685270309448,
|
|
"fcm_dpo/delta": -0.14975543320178986,
|
|
"fcm_dpo/margin": 1.5020861625671387,
|
|
"fcm_dpo/q_t": 0.3079478144645691,
|
|
"grad_norm": 161.67514038085938,
|
|
"learning_rate": 2.1835188660656265e-07,
|
|
"logits/chosen": 0.17143261432647705,
|
|
"logits/rejected": 0.1412460058927536,
|
|
"logps/chosen": -63.959442138671875,
|
|
"logps/ref_chosen": -61.624366760253906,
|
|
"logps/ref_rejected": -76.50978088378906,
|
|
"logps/rejected": -80.3469467163086,
|
|
"loss": 0.9259,
|
|
"margin_dpo/margin_mean": 1.5020864009857178,
|
|
"margin_dpo/margin_std": 1.9342763423919678,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.7468278408050537,
|
|
"fcm_dpo/delta": 0.005739331711083651,
|
|
"fcm_dpo/margin": 1.3320647478103638,
|
|
"fcm_dpo/q_t": 0.3248102068901062,
|
|
"grad_norm": 146.38316345214844,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 0.2075425386428833,
|
|
"logits/rejected": 0.16074812412261963,
|
|
"logps/chosen": -48.055908203125,
|
|
"logps/ref_chosen": -45.871864318847656,
|
|
"logps/ref_rejected": -61.305999755859375,
|
|
"logps/rejected": -64.82211303710938,
|
|
"loss": 0.9054,
|
|
"margin_dpo/margin_mean": 1.3320646286010742,
|
|
"margin_dpo/margin_std": 1.6783784627914429,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5910808767951625,
|
|
"fcm_dpo/beta": 0.7326708436012268,
|
|
"fcm_dpo/delta": -0.18802016973495483,
|
|
"fcm_dpo/margin": 1.591862440109253,
|
|
"fcm_dpo/q_t": 0.31688395142555237,
|
|
"grad_norm": 162.29026794433594,
|
|
"learning_rate": 2.1573054278272636e-07,
|
|
"logits/chosen": 0.17586693167686462,
|
|
"logits/rejected": 0.12610454857349396,
|
|
"logps/chosen": -60.547569274902344,
|
|
"logps/ref_chosen": -58.18701171875,
|
|
"logps/ref_rejected": -83.63442993164062,
|
|
"logps/rejected": -87.58686065673828,
|
|
"loss": 1.01,
|
|
"margin_dpo/margin_mean": 1.5918623208999634,
|
|
"margin_dpo/margin_std": 2.3288512229919434,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5925925925925926,
|
|
"fcm_dpo/beta": 0.6947846412658691,
|
|
"fcm_dpo/delta": -0.25457581877708435,
|
|
"fcm_dpo/margin": 1.7612890005111694,
|
|
"fcm_dpo/q_t": 0.3048982322216034,
|
|
"grad_norm": 162.5065155029297,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 0.24103191494941711,
|
|
"logits/rejected": 0.19386835396289825,
|
|
"logps/chosen": -71.8067626953125,
|
|
"logps/ref_chosen": -69.7445297241211,
|
|
"logps/ref_rejected": -94.05877685546875,
|
|
"logps/rejected": -97.8823013305664,
|
|
"loss": 0.9129,
|
|
"margin_dpo/margin_mean": 1.761289358139038,
|
|
"margin_dpo/margin_std": 2.361508846282959,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5941043083900227,
|
|
"fcm_dpo/beta": 0.6695666313171387,
|
|
"fcm_dpo/delta": -0.061286166310310364,
|
|
"fcm_dpo/margin": 1.5697075128555298,
|
|
"fcm_dpo/q_t": 0.30320626497268677,
|
|
"grad_norm": 139.36740112304688,
|
|
"learning_rate": 2.131130332936195e-07,
|
|
"logits/chosen": 0.20045891404151917,
|
|
"logits/rejected": 0.17006459832191467,
|
|
"logps/chosen": -54.7346076965332,
|
|
"logps/ref_chosen": -52.33489990234375,
|
|
"logps/ref_rejected": -74.33809661865234,
|
|
"logps/rejected": -78.30751037597656,
|
|
"loss": 0.8145,
|
|
"margin_dpo/margin_mean": 1.5697076320648193,
|
|
"margin_dpo/margin_std": 1.6137369871139526,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5956160241874527,
|
|
"fcm_dpo/beta": 0.683825671672821,
|
|
"fcm_dpo/delta": 0.04361763596534729,
|
|
"fcm_dpo/margin": 1.4036848545074463,
|
|
"fcm_dpo/q_t": 0.3363683223724365,
|
|
"grad_norm": 153.11077880859375,
|
|
"learning_rate": 2.1180580796331323e-07,
|
|
"logits/chosen": 0.22308960556983948,
|
|
"logits/rejected": 0.1974237710237503,
|
|
"logps/chosen": -63.075340270996094,
|
|
"logps/ref_chosen": -60.6761360168457,
|
|
"logps/ref_rejected": -71.36074829101562,
|
|
"logps/rejected": -75.16364288330078,
|
|
"loss": 0.9732,
|
|
"margin_dpo/margin_mean": 1.4036844968795776,
|
|
"margin_dpo/margin_std": 2.0274405479431152,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.6959511041641235,
|
|
"fcm_dpo/delta": 0.1088649109005928,
|
|
"fcm_dpo/margin": 1.2934165000915527,
|
|
"fcm_dpo/q_t": 0.3352372646331787,
|
|
"grad_norm": 158.16360473632812,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 0.1776614785194397,
|
|
"logits/rejected": 0.117831751704216,
|
|
"logps/chosen": -52.74278259277344,
|
|
"logps/ref_chosen": -50.60432434082031,
|
|
"logps/ref_rejected": -77.08731079101562,
|
|
"logps/rejected": -80.51919555664062,
|
|
"loss": 0.9631,
|
|
"margin_dpo/margin_mean": 1.2934160232543945,
|
|
"margin_dpo/margin_std": 1.7340922355651855,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5986394557823129,
|
|
"fcm_dpo/beta": 0.6787519454956055,
|
|
"fcm_dpo/delta": -0.02714592218399048,
|
|
"fcm_dpo/margin": 1.4940762519836426,
|
|
"fcm_dpo/q_t": 0.31732630729675293,
|
|
"grad_norm": 145.00909423828125,
|
|
"learning_rate": 2.0919459895968517e-07,
|
|
"logits/chosen": 0.17002803087234497,
|
|
"logits/rejected": 0.09879619628190994,
|
|
"logps/chosen": -53.40643310546875,
|
|
"logps/ref_chosen": -51.35961151123047,
|
|
"logps/ref_rejected": -79.89360046386719,
|
|
"logps/rejected": -83.43449401855469,
|
|
"loss": 0.8661,
|
|
"margin_dpo/margin_mean": 1.4940763711929321,
|
|
"margin_dpo/margin_std": 1.6158559322357178,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.600151171579743,
|
|
"fcm_dpo/beta": 0.7337859869003296,
|
|
"fcm_dpo/delta": 0.3556361794471741,
|
|
"fcm_dpo/margin": 0.9081060886383057,
|
|
"fcm_dpo/q_t": 0.37586018443107605,
|
|
"grad_norm": 224.95326232910156,
|
|
"learning_rate": 2.078906883274924e-07,
|
|
"logits/chosen": 0.12839001417160034,
|
|
"logits/rejected": 0.09453357756137848,
|
|
"logps/chosen": -68.88770294189453,
|
|
"logps/ref_chosen": -66.45622253417969,
|
|
"logps/ref_rejected": -85.74736785888672,
|
|
"logps/rejected": -89.08694458007812,
|
|
"loss": 1.3125,
|
|
"margin_dpo/margin_mean": 0.9081062078475952,
|
|
"margin_dpo/margin_std": 2.2067739963531494,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.6016628873771731,
|
|
"fcm_dpo/beta": 0.7362475395202637,
|
|
"fcm_dpo/delta": -0.06381751596927643,
|
|
"fcm_dpo/margin": 1.4355955123901367,
|
|
"fcm_dpo/q_t": 0.3111931383609772,
|
|
"grad_norm": 139.69631958007812,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": 0.16362245380878448,
|
|
"logits/rejected": 0.11482103168964386,
|
|
"logps/chosen": -51.42768859863281,
|
|
"logps/ref_chosen": -49.244239807128906,
|
|
"logps/ref_rejected": -75.18949127197266,
|
|
"logps/rejected": -78.80854034423828,
|
|
"loss": 0.8542,
|
|
"margin_dpo/margin_mean": 1.4355957508087158,
|
|
"margin_dpo/margin_std": 1.6850342750549316,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"fcm_dpo/beta": 0.7134385108947754,
|
|
"fcm_dpo/delta": -0.1938168704509735,
|
|
"fcm_dpo/margin": 1.6412277221679688,
|
|
"fcm_dpo/q_t": 0.32044440507888794,
|
|
"grad_norm": 166.16122436523438,
|
|
"learning_rate": 2.052864371672457e-07,
|
|
"logits/chosen": 0.12605230510234833,
|
|
"logits/rejected": 0.031615402549505234,
|
|
"logps/chosen": -70.56350708007812,
|
|
"logps/ref_chosen": -68.30679321289062,
|
|
"logps/ref_rejected": -113.2708511352539,
|
|
"logps/rejected": -117.16879272460938,
|
|
"loss": 0.9153,
|
|
"margin_dpo/margin_mean": 1.641228437423706,
|
|
"margin_dpo/margin_std": 2.2904515266418457,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.7214968204498291,
|
|
"fcm_dpo/delta": 0.2427944540977478,
|
|
"fcm_dpo/margin": 1.0737788677215576,
|
|
"fcm_dpo/q_t": 0.37354040145874023,
|
|
"grad_norm": 195.83456420898438,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 0.18185412883758545,
|
|
"logits/rejected": 0.14102932810783386,
|
|
"logps/chosen": -74.3447036743164,
|
|
"logps/ref_chosen": -71.62649536132812,
|
|
"logps/ref_rejected": -90.98765563964844,
|
|
"logps/rejected": -94.77964782714844,
|
|
"loss": 1.0799,
|
|
"margin_dpo/margin_mean": 1.0737799406051636,
|
|
"margin_dpo/margin_std": 1.769069790840149,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.7420421838760376,
|
|
"eval_logits/chosen": 0.20529566705226898,
|
|
"eval_logits/rejected": 0.1674942821264267,
|
|
"eval_logps/chosen": -77.24707794189453,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -83.04328918457031,
|
|
"eval_loss": 0.5736358761787415,
|
|
"eval_margin_dpo/margin_mean": 1.1066911220550537,
|
|
"eval_margin_dpo/margin_std": 2.0390639305114746,
|
|
"eval_runtime": 37.9981,
|
|
"eval_samples_per_second": 60.608,
|
|
"eval_steps_per_second": 1.895,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6061980347694633,
|
|
"fcm_dpo/beta": 0.7018467783927917,
|
|
"fcm_dpo/delta": -0.38142985105514526,
|
|
"fcm_dpo/margin": 1.8959496021270752,
|
|
"fcm_dpo/q_t": 0.28283387422561646,
|
|
"grad_norm": 134.66848754882812,
|
|
"learning_rate": 2.0268718890989752e-07,
|
|
"logits/chosen": 0.19736307859420776,
|
|
"logits/rejected": 0.12394518405199051,
|
|
"logps/chosen": -55.94346618652344,
|
|
"logps/ref_chosen": -53.72495651245117,
|
|
"logps/ref_rejected": -75.06304931640625,
|
|
"logps/rejected": -79.17750549316406,
|
|
"loss": 0.7958,
|
|
"margin_dpo/margin_mean": 1.8959496021270752,
|
|
"margin_dpo/margin_std": 2.1448566913604736,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.6077097505668935,
|
|
"fcm_dpo/beta": 0.6789320707321167,
|
|
"fcm_dpo/delta": -0.07589547336101532,
|
|
"fcm_dpo/margin": 1.5725529193878174,
|
|
"fcm_dpo/q_t": 0.31703245639801025,
|
|
"grad_norm": 150.985595703125,
|
|
"learning_rate": 2.013895317751323e-07,
|
|
"logits/chosen": 0.18823865056037903,
|
|
"logits/rejected": 0.155757337808609,
|
|
"logps/chosen": -64.17735290527344,
|
|
"logps/ref_chosen": -61.873931884765625,
|
|
"logps/ref_rejected": -66.15198516845703,
|
|
"logps/rejected": -70.0279541015625,
|
|
"loss": 0.8937,
|
|
"margin_dpo/margin_mean": 1.57255220413208,
|
|
"margin_dpo/margin_std": 1.990880012512207,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.6092214663643235,
|
|
"fcm_dpo/beta": 0.6459471583366394,
|
|
"fcm_dpo/delta": -0.33136266469955444,
|
|
"fcm_dpo/margin": 1.995465636253357,
|
|
"fcm_dpo/q_t": 0.2850838303565979,
|
|
"grad_norm": 122.05958557128906,
|
|
"learning_rate": 2.0009323437965898e-07,
|
|
"logits/chosen": 0.254935085773468,
|
|
"logits/rejected": 0.19566068053245544,
|
|
"logps/chosen": -53.623878479003906,
|
|
"logps/ref_chosen": -51.321502685546875,
|
|
"logps/ref_rejected": -86.54010772705078,
|
|
"logps/rejected": -90.83795166015625,
|
|
"loss": 0.822,
|
|
"margin_dpo/margin_mean": 1.9954662322998047,
|
|
"margin_dpo/margin_std": 2.2620480060577393,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.6107331821617535,
|
|
"fcm_dpo/beta": 0.6391547918319702,
|
|
"fcm_dpo/delta": 0.12487616389989853,
|
|
"fcm_dpo/margin": 1.3852322101593018,
|
|
"fcm_dpo/q_t": 0.33947524428367615,
|
|
"grad_norm": 162.3168487548828,
|
|
"learning_rate": 1.9879833298370237e-07,
|
|
"logits/chosen": 0.15734031796455383,
|
|
"logits/rejected": 0.0940411314368248,
|
|
"logps/chosen": -64.49229431152344,
|
|
"logps/ref_chosen": -62.26288604736328,
|
|
"logps/ref_rejected": -95.19029998779297,
|
|
"logps/rejected": -98.80493927001953,
|
|
"loss": 0.9692,
|
|
"margin_dpo/margin_mean": 1.3852319717407227,
|
|
"margin_dpo/margin_std": 1.8755829334259033,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.634131669998169,
|
|
"fcm_dpo/delta": 0.03272247314453125,
|
|
"fcm_dpo/margin": 1.5051655769348145,
|
|
"fcm_dpo/q_t": 0.3402136266231537,
|
|
"grad_norm": 127.78704071044922,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 0.19416359066963196,
|
|
"logits/rejected": 0.16025002300739288,
|
|
"logps/chosen": -52.953269958496094,
|
|
"logps/ref_chosen": -50.5843391418457,
|
|
"logps/ref_rejected": -65.43156433105469,
|
|
"logps/rejected": -69.3056640625,
|
|
"loss": 0.968,
|
|
"margin_dpo/margin_mean": 1.5051651000976562,
|
|
"margin_dpo/margin_std": 2.0198493003845215,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6137566137566137,
|
|
"fcm_dpo/beta": 0.632315993309021,
|
|
"fcm_dpo/delta": -0.13053575158119202,
|
|
"fcm_dpo/margin": 1.755932331085205,
|
|
"fcm_dpo/q_t": 0.299167662858963,
|
|
"grad_norm": 111.40565490722656,
|
|
"learning_rate": 1.9621286303497914e-07,
|
|
"logits/chosen": 0.19205182790756226,
|
|
"logits/rejected": 0.0876828134059906,
|
|
"logps/chosen": -51.30841827392578,
|
|
"logps/ref_chosen": -48.99560546875,
|
|
"logps/ref_rejected": -92.47774505615234,
|
|
"logps/rejected": -96.54649353027344,
|
|
"loss": 0.8306,
|
|
"margin_dpo/margin_mean": 1.7559325695037842,
|
|
"margin_dpo/margin_std": 1.8786392211914062,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.6152683295540439,
|
|
"fcm_dpo/beta": 0.657385528087616,
|
|
"fcm_dpo/delta": 0.21200606226921082,
|
|
"fcm_dpo/margin": 1.224708914756775,
|
|
"fcm_dpo/q_t": 0.3558533787727356,
|
|
"grad_norm": 209.7600555419922,
|
|
"learning_rate": 1.9492236680336483e-07,
|
|
"logits/chosen": 0.13539977371692657,
|
|
"logits/rejected": 0.08569268882274628,
|
|
"logps/chosen": -92.08710479736328,
|
|
"logps/ref_chosen": -89.40056610107422,
|
|
"logps/ref_rejected": -99.28775024414062,
|
|
"logps/rejected": -103.19900512695312,
|
|
"loss": 1.0517,
|
|
"margin_dpo/margin_mean": 1.224708914756775,
|
|
"margin_dpo/margin_std": 2.031216621398926,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.6167800453514739,
|
|
"fcm_dpo/beta": 0.6533396244049072,
|
|
"fcm_dpo/delta": -0.2026488482952118,
|
|
"fcm_dpo/margin": 1.8033205270767212,
|
|
"fcm_dpo/q_t": 0.2949105501174927,
|
|
"grad_norm": 119.43750762939453,
|
|
"learning_rate": 1.9363341121154895e-07,
|
|
"logits/chosen": 0.1879599392414093,
|
|
"logits/rejected": 0.1291370391845703,
|
|
"logps/chosen": -56.83644485473633,
|
|
"logps/ref_chosen": -54.70391845703125,
|
|
"logps/ref_rejected": -73.98648834228516,
|
|
"logps/rejected": -77.92233276367188,
|
|
"loss": 0.8189,
|
|
"margin_dpo/margin_mean": 1.8033205270767212,
|
|
"margin_dpo/margin_std": 1.967519998550415,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.618291761148904,
|
|
"fcm_dpo/beta": 0.675295352935791,
|
|
"fcm_dpo/delta": 0.34728795289993286,
|
|
"fcm_dpo/margin": 0.9984000325202942,
|
|
"fcm_dpo/q_t": 0.3867419362068176,
|
|
"grad_norm": 168.26300048828125,
|
|
"learning_rate": 1.9234603231438994e-07,
|
|
"logits/chosen": 0.1908985674381256,
|
|
"logits/rejected": 0.19027680158615112,
|
|
"logps/chosen": -64.74275970458984,
|
|
"logps/ref_chosen": -62.11822509765625,
|
|
"logps/ref_rejected": -61.933509826660156,
|
|
"logps/rejected": -65.55644226074219,
|
|
"loss": 1.1776,
|
|
"margin_dpo/margin_mean": 0.9983994960784912,
|
|
"margin_dpo/margin_std": 2.0483100414276123,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.6897294521331787,
|
|
"fcm_dpo/delta": 0.15048189461231232,
|
|
"fcm_dpo/margin": 1.2474337816238403,
|
|
"fcm_dpo/q_t": 0.3310784697532654,
|
|
"grad_norm": 171.6697998046875,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 0.19930198788642883,
|
|
"logits/rejected": 0.1775505691766739,
|
|
"logps/chosen": -64.07925415039062,
|
|
"logps/ref_chosen": -61.80266189575195,
|
|
"logps/ref_rejected": -76.60002136230469,
|
|
"logps/rejected": -80.12403869628906,
|
|
"loss": 0.9295,
|
|
"margin_dpo/margin_mean": 1.2474339008331299,
|
|
"margin_dpo/margin_std": 1.4843469858169556,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6213151927437641,
|
|
"fcm_dpo/beta": 0.6963478326797485,
|
|
"fcm_dpo/delta": -0.008774511516094208,
|
|
"fcm_dpo/margin": 1.4437086582183838,
|
|
"fcm_dpo/q_t": 0.3453066349029541,
|
|
"grad_norm": 181.04974365234375,
|
|
"learning_rate": 1.8977614860195296e-07,
|
|
"logits/chosen": 0.15856947004795074,
|
|
"logits/rejected": 0.1135367900133133,
|
|
"logps/chosen": -57.06800842285156,
|
|
"logps/ref_chosen": -54.44539260864258,
|
|
"logps/ref_rejected": -74.5650863647461,
|
|
"logps/rejected": -78.63140869140625,
|
|
"loss": 1.0746,
|
|
"margin_dpo/margin_mean": 1.4437092542648315,
|
|
"margin_dpo/margin_std": 2.3472909927368164,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.6228269085411943,
|
|
"fcm_dpo/beta": 0.6941448450088501,
|
|
"fcm_dpo/delta": -0.037317849695682526,
|
|
"fcm_dpo/margin": 1.4864020347595215,
|
|
"fcm_dpo/q_t": 0.3218909502029419,
|
|
"grad_norm": 150.66571044921875,
|
|
"learning_rate": 1.8849371567184662e-07,
|
|
"logits/chosen": 0.15843887627124786,
|
|
"logits/rejected": 0.1075374186038971,
|
|
"logps/chosen": -58.101863861083984,
|
|
"logps/ref_chosen": -55.248085021972656,
|
|
"logps/ref_rejected": -68.96623229980469,
|
|
"logps/rejected": -73.30641174316406,
|
|
"loss": 0.9281,
|
|
"margin_dpo/margin_mean": 1.4864020347595215,
|
|
"margin_dpo/margin_std": 1.928009033203125,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6243386243386243,
|
|
"fcm_dpo/beta": 0.7043063640594482,
|
|
"fcm_dpo/delta": 0.07277508080005646,
|
|
"fcm_dpo/margin": 1.3262073993682861,
|
|
"fcm_dpo/q_t": 0.3529996871948242,
|
|
"grad_norm": 191.52565002441406,
|
|
"learning_rate": 1.872130032047302e-07,
|
|
"logits/chosen": 0.07102949917316437,
|
|
"logits/rejected": 0.04926881939172745,
|
|
"logps/chosen": -71.4292984008789,
|
|
"logps/ref_chosen": -68.72074890136719,
|
|
"logps/ref_rejected": -78.76539611816406,
|
|
"logps/rejected": -82.80016326904297,
|
|
"loss": 1.102,
|
|
"margin_dpo/margin_mean": 1.3262066841125488,
|
|
"margin_dpo/margin_std": 2.299673557281494,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6258503401360545,
|
|
"fcm_dpo/beta": 0.6957840919494629,
|
|
"fcm_dpo/delta": -0.08349283784627914,
|
|
"fcm_dpo/margin": 1.542493462562561,
|
|
"fcm_dpo/q_t": 0.3177918791770935,
|
|
"grad_norm": 144.82449340820312,
|
|
"learning_rate": 1.8593404702488436e-07,
|
|
"logits/chosen": 0.17691153287887573,
|
|
"logits/rejected": 0.12794461846351624,
|
|
"logps/chosen": -56.64301300048828,
|
|
"logps/ref_chosen": -54.138214111328125,
|
|
"logps/ref_rejected": -74.65741729736328,
|
|
"logps/rejected": -78.7047119140625,
|
|
"loss": 0.9185,
|
|
"margin_dpo/margin_mean": 1.542493462562561,
|
|
"margin_dpo/margin_std": 2.0337252616882324,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.7059125900268555,
|
|
"fcm_dpo/delta": 0.12531909346580505,
|
|
"fcm_dpo/margin": 1.2547566890716553,
|
|
"fcm_dpo/q_t": 0.3529791235923767,
|
|
"grad_norm": 175.94845581054688,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 0.17382574081420898,
|
|
"logits/rejected": 0.15772220492362976,
|
|
"logps/chosen": -58.64318084716797,
|
|
"logps/ref_chosen": -55.91856002807617,
|
|
"logps/ref_rejected": -61.747703552246094,
|
|
"logps/rejected": -65.72708129882812,
|
|
"loss": 1.1261,
|
|
"margin_dpo/margin_mean": 1.2547566890716553,
|
|
"margin_dpo/margin_std": 2.235924243927002,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6288737717309146,
|
|
"fcm_dpo/beta": 0.7500655651092529,
|
|
"fcm_dpo/delta": 0.13376402854919434,
|
|
"fcm_dpo/margin": 1.1456831693649292,
|
|
"fcm_dpo/q_t": 0.356852650642395,
|
|
"grad_norm": 205.03404235839844,
|
|
"learning_rate": 1.8338154657749128e-07,
|
|
"logits/chosen": 0.18582022190093994,
|
|
"logits/rejected": 0.1482175588607788,
|
|
"logps/chosen": -57.20629119873047,
|
|
"logps/ref_chosen": -54.72308349609375,
|
|
"logps/ref_rejected": -69.17388916015625,
|
|
"logps/rejected": -72.80278015136719,
|
|
"loss": 1.154,
|
|
"margin_dpo/margin_mean": 1.1456834077835083,
|
|
"margin_dpo/margin_std": 2.031163454055786,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6303854875283447,
|
|
"fcm_dpo/beta": 0.7249786853790283,
|
|
"fcm_dpo/delta": -0.24252735078334808,
|
|
"fcm_dpo/margin": 1.6672532558441162,
|
|
"fcm_dpo/q_t": 0.29793938994407654,
|
|
"grad_norm": 194.7884521484375,
|
|
"learning_rate": 1.8210807370886849e-07,
|
|
"logits/chosen": 0.24135205149650574,
|
|
"logits/rejected": 0.19347181916236877,
|
|
"logps/chosen": -59.584938049316406,
|
|
"logps/ref_chosen": -56.791259765625,
|
|
"logps/ref_rejected": -68.7791748046875,
|
|
"logps/rejected": -73.2401123046875,
|
|
"loss": 0.9246,
|
|
"margin_dpo/margin_mean": 1.6672537326812744,
|
|
"margin_dpo/margin_std": 2.092496395111084,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6318972033257747,
|
|
"fcm_dpo/beta": 0.7283662557601929,
|
|
"fcm_dpo/delta": 0.3391045331954956,
|
|
"fcm_dpo/margin": 0.9409202337265015,
|
|
"fcm_dpo/q_t": 0.3989126980304718,
|
|
"grad_norm": 244.12945556640625,
|
|
"learning_rate": 1.8083649992336825e-07,
|
|
"logits/chosen": 0.19916735589504242,
|
|
"logits/rejected": 0.2042698860168457,
|
|
"logps/chosen": -72.2450942993164,
|
|
"logps/ref_chosen": -69.10798645019531,
|
|
"logps/ref_rejected": -75.09132385253906,
|
|
"logps/rejected": -79.16935729980469,
|
|
"loss": 1.3312,
|
|
"margin_dpo/margin_mean": 0.9409199357032776,
|
|
"margin_dpo/margin_std": 2.4089250564575195,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6334089191232048,
|
|
"fcm_dpo/beta": 0.7232016324996948,
|
|
"fcm_dpo/delta": -0.157709002494812,
|
|
"fcm_dpo/margin": 1.570444941520691,
|
|
"fcm_dpo/q_t": 0.3125431537628174,
|
|
"grad_norm": 159.44833374023438,
|
|
"learning_rate": 1.7956686078964255e-07,
|
|
"logits/chosen": 0.09053687751293182,
|
|
"logits/rejected": 0.04949123412370682,
|
|
"logps/chosen": -60.52400588989258,
|
|
"logps/ref_chosen": -58.1717643737793,
|
|
"logps/ref_rejected": -71.67066955566406,
|
|
"logps/rejected": -75.59335327148438,
|
|
"loss": 0.9202,
|
|
"margin_dpo/margin_mean": 1.5704445838928223,
|
|
"margin_dpo/margin_std": 2.1407108306884766,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.7380191683769226,
|
|
"fcm_dpo/delta": 0.1838487833738327,
|
|
"fcm_dpo/margin": 1.1262156963348389,
|
|
"fcm_dpo/q_t": 0.37532341480255127,
|
|
"grad_norm": 189.95645141601562,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 0.15650036931037903,
|
|
"logits/rejected": 0.11561809480190277,
|
|
"logps/chosen": -60.085960388183594,
|
|
"logps/ref_chosen": -57.05351257324219,
|
|
"logps/ref_rejected": -62.670982360839844,
|
|
"logps/rejected": -66.82964324951172,
|
|
"loss": 1.2387,
|
|
"margin_dpo/margin_mean": 1.126215934753418,
|
|
"margin_dpo/margin_std": 2.343519687652588,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.636432350718065,
|
|
"fcm_dpo/beta": 0.7731253504753113,
|
|
"fcm_dpo/delta": 0.1831362247467041,
|
|
"fcm_dpo/margin": 1.076323390007019,
|
|
"fcm_dpo/q_t": 0.3773455023765564,
|
|
"grad_norm": 191.1004180908203,
|
|
"learning_rate": 1.7703352848054887e-07,
|
|
"logits/chosen": 0.13414627313613892,
|
|
"logits/rejected": 0.09018285572528839,
|
|
"logps/chosen": -60.364906311035156,
|
|
"logps/ref_chosen": -57.32324981689453,
|
|
"logps/ref_rejected": -75.33782958984375,
|
|
"logps/rejected": -79.455810546875,
|
|
"loss": 1.3191,
|
|
"margin_dpo/margin_mean": 1.0763235092163086,
|
|
"margin_dpo/margin_std": 2.4303441047668457,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6379440665154951,
|
|
"fcm_dpo/beta": 0.7678795456886292,
|
|
"fcm_dpo/delta": -0.15709903836250305,
|
|
"fcm_dpo/margin": 1.4837114810943604,
|
|
"fcm_dpo/q_t": 0.3305337727069855,
|
|
"grad_norm": 208.15638732910156,
|
|
"learning_rate": 1.7576990616793137e-07,
|
|
"logits/chosen": 0.18905231356620789,
|
|
"logits/rejected": 0.15734535455703735,
|
|
"logps/chosen": -69.5533447265625,
|
|
"logps/ref_chosen": -67.05757141113281,
|
|
"logps/ref_rejected": -72.12803649902344,
|
|
"logps/rejected": -76.10751342773438,
|
|
"loss": 1.0168,
|
|
"margin_dpo/margin_mean": 1.4837113618850708,
|
|
"margin_dpo/margin_std": 2.3469998836517334,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6394557823129252,
|
|
"fcm_dpo/beta": 0.7414518594741821,
|
|
"fcm_dpo/delta": -0.12381698191165924,
|
|
"fcm_dpo/margin": 1.4967904090881348,
|
|
"fcm_dpo/q_t": 0.3256801962852478,
|
|
"grad_norm": 163.99639892578125,
|
|
"learning_rate": 1.745083602306071e-07,
|
|
"logits/chosen": 0.17420369386672974,
|
|
"logits/rejected": 0.12361004948616028,
|
|
"logps/chosen": -56.651309967041016,
|
|
"logps/ref_chosen": -54.06167221069336,
|
|
"logps/ref_rejected": -76.64092254638672,
|
|
"logps/rejected": -80.72735595703125,
|
|
"loss": 1.0145,
|
|
"margin_dpo/margin_mean": 1.4967900514602661,
|
|
"margin_dpo/margin_std": 2.2581710815429688,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6409674981103552,
|
|
"fcm_dpo/beta": 0.7214820384979248,
|
|
"fcm_dpo/delta": -0.13758614659309387,
|
|
"fcm_dpo/margin": 1.5547800064086914,
|
|
"fcm_dpo/q_t": 0.3215063512325287,
|
|
"grad_norm": 178.93624877929688,
|
|
"learning_rate": 1.7324892595672804e-07,
|
|
"logits/chosen": 0.12239620089530945,
|
|
"logits/rejected": 0.09341743588447571,
|
|
"logps/chosen": -56.10038757324219,
|
|
"logps/ref_chosen": -53.60887145996094,
|
|
"logps/ref_rejected": -79.2139892578125,
|
|
"logps/rejected": -83.26029205322266,
|
|
"loss": 0.9368,
|
|
"margin_dpo/margin_mean": 1.554780125617981,
|
|
"margin_dpo/margin_std": 2.143885612487793,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.7137551307678223,
|
|
"fcm_dpo/delta": -0.05110887810587883,
|
|
"fcm_dpo/margin": 1.4651919603347778,
|
|
"fcm_dpo/q_t": 0.32126736640930176,
|
|
"grad_norm": 145.7388153076172,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 0.18603307008743286,
|
|
"logits/rejected": 0.16199590265750885,
|
|
"logps/chosen": -60.9288330078125,
|
|
"logps/ref_chosen": -58.41468048095703,
|
|
"logps/ref_rejected": -66.59054565429688,
|
|
"logps/rejected": -70.56989288330078,
|
|
"loss": 0.9247,
|
|
"margin_dpo/margin_mean": 1.4651916027069092,
|
|
"margin_dpo/margin_std": 1.9558470249176025,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6439909297052154,
|
|
"fcm_dpo/beta": 0.7687985301017761,
|
|
"fcm_dpo/delta": 0.5285735130310059,
|
|
"fcm_dpo/margin": 0.6476625204086304,
|
|
"fcm_dpo/q_t": 0.4075689911842346,
|
|
"grad_norm": 223.34030151367188,
|
|
"learning_rate": 1.7073653325558828e-07,
|
|
"logits/chosen": 0.14943718910217285,
|
|
"logits/rejected": 0.15013810992240906,
|
|
"logps/chosen": -74.68634033203125,
|
|
"logps/ref_chosen": -71.70822143554688,
|
|
"logps/ref_rejected": -73.57725524902344,
|
|
"logps/rejected": -77.20303344726562,
|
|
"loss": 1.3908,
|
|
"margin_dpo/margin_mean": 0.6476625800132751,
|
|
"margin_dpo/margin_std": 1.9961354732513428,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6455026455026455,
|
|
"fcm_dpo/beta": 0.7852897644042969,
|
|
"fcm_dpo/delta": -0.0677163228392601,
|
|
"fcm_dpo/margin": 1.3504165410995483,
|
|
"fcm_dpo/q_t": 0.33633100986480713,
|
|
"grad_norm": 163.32342529296875,
|
|
"learning_rate": 1.6948364510535218e-07,
|
|
"logits/chosen": 0.20361992716789246,
|
|
"logits/rejected": 0.16401880979537964,
|
|
"logps/chosen": -61.42845916748047,
|
|
"logps/ref_chosen": -58.64276885986328,
|
|
"logps/ref_rejected": -86.25437927246094,
|
|
"logps/rejected": -90.39048767089844,
|
|
"loss": 1.0482,
|
|
"margin_dpo/margin_mean": 1.350417137145996,
|
|
"margin_dpo/margin_std": 2.189502716064453,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6470143613000756,
|
|
"fcm_dpo/beta": 0.7506411671638489,
|
|
"fcm_dpo/delta": -0.2936919033527374,
|
|
"fcm_dpo/margin": 1.6753690242767334,
|
|
"fcm_dpo/q_t": 0.29713624715805054,
|
|
"grad_norm": 168.94140625,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 0.1335376501083374,
|
|
"logits/rejected": 0.09132213890552521,
|
|
"logps/chosen": -69.21764373779297,
|
|
"logps/ref_chosen": -66.5960464477539,
|
|
"logps/ref_rejected": -82.3941650390625,
|
|
"logps/rejected": -86.69113159179688,
|
|
"loss": 0.8538,
|
|
"margin_dpo/margin_mean": 1.6753690242767334,
|
|
"margin_dpo/margin_std": 2.044569969177246,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6485260770975056,
|
|
"fcm_dpo/beta": 0.730757474899292,
|
|
"fcm_dpo/delta": -0.06324490904808044,
|
|
"fcm_dpo/margin": 1.4457752704620361,
|
|
"fcm_dpo/q_t": 0.3198990225791931,
|
|
"grad_norm": 177.0709991455078,
|
|
"learning_rate": 1.669846604344412e-07,
|
|
"logits/chosen": 0.13211305439472198,
|
|
"logits/rejected": 0.13196702301502228,
|
|
"logps/chosen": -59.602317810058594,
|
|
"logps/ref_chosen": -57.00970458984375,
|
|
"logps/ref_rejected": -59.86549377441406,
|
|
"logps/rejected": -63.90388488769531,
|
|
"loss": 0.969,
|
|
"margin_dpo/margin_mean": 1.4457753896713257,
|
|
"margin_dpo/margin_std": 2.0829176902770996,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.733278751373291,
|
|
"fcm_dpo/delta": 0.05039960518479347,
|
|
"fcm_dpo/margin": 1.3011215925216675,
|
|
"fcm_dpo/q_t": 0.3425254225730896,
|
|
"grad_norm": 174.14768981933594,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 0.08477595448493958,
|
|
"logits/rejected": 0.0728166401386261,
|
|
"logps/chosen": -61.990821838378906,
|
|
"logps/ref_chosen": -59.563194274902344,
|
|
"logps/ref_rejected": -70.52289581298828,
|
|
"logps/rejected": -74.25164794921875,
|
|
"loss": 1.0004,
|
|
"margin_dpo/margin_mean": 1.3011209964752197,
|
|
"margin_dpo/margin_std": 1.9769561290740967,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6515495086923658,
|
|
"fcm_dpo/beta": 0.7297165393829346,
|
|
"fcm_dpo/delta": 0.036733031272888184,
|
|
"fcm_dpo/margin": 1.3228065967559814,
|
|
"fcm_dpo/q_t": 0.3405199646949768,
|
|
"grad_norm": 171.15907287597656,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": 0.17870327830314636,
|
|
"logits/rejected": 0.14549797773361206,
|
|
"logps/chosen": -52.56354904174805,
|
|
"logps/ref_chosen": -50.20032501220703,
|
|
"logps/ref_rejected": -77.81680297851562,
|
|
"logps/rejected": -81.50283813476562,
|
|
"loss": 1.0051,
|
|
"margin_dpo/margin_mean": 1.3228061199188232,
|
|
"margin_dpo/margin_std": 2.025378704071045,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6530612244897959,
|
|
"fcm_dpo/beta": 0.7157025337219238,
|
|
"fcm_dpo/delta": -0.10452289134263992,
|
|
"fcm_dpo/margin": 1.518718957901001,
|
|
"fcm_dpo/q_t": 0.32164376974105835,
|
|
"grad_norm": 169.30746459960938,
|
|
"learning_rate": 1.632536862810844e-07,
|
|
"logits/chosen": 0.17181310057640076,
|
|
"logits/rejected": 0.1360006034374237,
|
|
"logps/chosen": -64.20341491699219,
|
|
"logps/ref_chosen": -61.662757873535156,
|
|
"logps/ref_rejected": -83.94496154785156,
|
|
"logps/rejected": -88.00434112548828,
|
|
"loss": 0.9613,
|
|
"margin_dpo/margin_mean": 1.5187186002731323,
|
|
"margin_dpo/margin_std": 2.0636777877807617,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.654572940287226,
|
|
"fcm_dpo/beta": 0.6925790309906006,
|
|
"fcm_dpo/delta": -0.28451234102249146,
|
|
"fcm_dpo/margin": 1.802809476852417,
|
|
"fcm_dpo/q_t": 0.3154519498348236,
|
|
"grad_norm": 145.2095947265625,
|
|
"learning_rate": 1.6201483487445515e-07,
|
|
"logits/chosen": 0.24505794048309326,
|
|
"logits/rejected": 0.23090487718582153,
|
|
"logps/chosen": -66.46299743652344,
|
|
"logps/ref_chosen": -63.72917938232422,
|
|
"logps/ref_rejected": -65.8391342163086,
|
|
"logps/rejected": -70.3757553100586,
|
|
"loss": 0.9081,
|
|
"margin_dpo/margin_mean": 1.802809476852417,
|
|
"margin_dpo/margin_std": 2.475156784057617,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.656084656084656,
|
|
"fcm_dpo/beta": 0.659028947353363,
|
|
"fcm_dpo/delta": -0.13485188782215118,
|
|
"fcm_dpo/margin": 1.6889315843582153,
|
|
"fcm_dpo/q_t": 0.31248465180397034,
|
|
"grad_norm": 130.46426391601562,
|
|
"learning_rate": 1.6077844460203204e-07,
|
|
"logits/chosen": 0.21205420792102814,
|
|
"logits/rejected": 0.16366201639175415,
|
|
"logps/chosen": -50.280784606933594,
|
|
"logps/ref_chosen": -47.97331619262695,
|
|
"logps/ref_rejected": -72.51132202148438,
|
|
"logps/rejected": -76.50772857666016,
|
|
"loss": 0.9792,
|
|
"margin_dpo/margin_mean": 1.6889313459396362,
|
|
"margin_dpo/margin_std": 2.39209246635437,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.6766383647918701,
|
|
"fcm_dpo/delta": 0.04958092421293259,
|
|
"fcm_dpo/margin": 1.4080250263214111,
|
|
"fcm_dpo/q_t": 0.3341384530067444,
|
|
"grad_norm": 151.441162109375,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 0.22931721806526184,
|
|
"logits/rejected": 0.1980956494808197,
|
|
"logps/chosen": -59.903053283691406,
|
|
"logps/ref_chosen": -57.06024932861328,
|
|
"logps/ref_rejected": -71.69146728515625,
|
|
"logps/rejected": -75.94229888916016,
|
|
"loss": 0.9779,
|
|
"margin_dpo/margin_mean": 1.4080252647399902,
|
|
"margin_dpo/margin_std": 2.0040295124053955,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6591080876795162,
|
|
"fcm_dpo/beta": 0.6710116863250732,
|
|
"fcm_dpo/delta": -0.004951075650751591,
|
|
"fcm_dpo/margin": 1.4969241619110107,
|
|
"fcm_dpo/q_t": 0.3257167935371399,
|
|
"grad_norm": 149.94261169433594,
|
|
"learning_rate": 1.5831318572796847e-07,
|
|
"logits/chosen": 0.16502049565315247,
|
|
"logits/rejected": 0.1188136488199234,
|
|
"logps/chosen": -58.86479949951172,
|
|
"logps/ref_chosen": -56.158050537109375,
|
|
"logps/ref_rejected": -67.63787841796875,
|
|
"logps/rejected": -71.841552734375,
|
|
"loss": 0.9499,
|
|
"margin_dpo/margin_mean": 1.4969241619110107,
|
|
"margin_dpo/margin_std": 2.013727903366089,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6606198034769464,
|
|
"fcm_dpo/beta": 0.6555310487747192,
|
|
"fcm_dpo/delta": 0.013455048203468323,
|
|
"fcm_dpo/margin": 1.48863685131073,
|
|
"fcm_dpo/q_t": 0.3591005504131317,
|
|
"grad_norm": 169.48338317871094,
|
|
"learning_rate": 1.5708438608491815e-07,
|
|
"logits/chosen": 0.15093836188316345,
|
|
"logits/rejected": 0.05515362694859505,
|
|
"logps/chosen": -59.8901252746582,
|
|
"logps/ref_chosen": -56.98578643798828,
|
|
"logps/ref_rejected": -85.61524963378906,
|
|
"logps/rejected": -90.00823974609375,
|
|
"loss": 1.1728,
|
|
"margin_dpo/margin_mean": 1.4886366128921509,
|
|
"margin_dpo/margin_std": 2.654061794281006,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6621315192743764,
|
|
"fcm_dpo/beta": 0.6835014820098877,
|
|
"fcm_dpo/delta": 0.04397985339164734,
|
|
"fcm_dpo/margin": 1.4018324613571167,
|
|
"fcm_dpo/q_t": 0.3333927392959595,
|
|
"grad_norm": 121.23139190673828,
|
|
"learning_rate": 1.558581854913253e-07,
|
|
"logits/chosen": 0.19322752952575684,
|
|
"logits/rejected": 0.14423127472400665,
|
|
"logps/chosen": -43.83991241455078,
|
|
"logps/ref_chosen": -41.27777862548828,
|
|
"logps/ref_rejected": -65.33840942382812,
|
|
"logps/rejected": -69.3023681640625,
|
|
"loss": 0.9826,
|
|
"margin_dpo/margin_mean": 1.401832938194275,
|
|
"margin_dpo/margin_std": 1.9659230709075928,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.6636432350718064,
|
|
"fcm_dpo/beta": 0.6828280091285706,
|
|
"fcm_dpo/delta": -0.04300057888031006,
|
|
"fcm_dpo/margin": 1.5173701047897339,
|
|
"fcm_dpo/q_t": 0.31167930364608765,
|
|
"grad_norm": 186.42234802246094,
|
|
"learning_rate": 1.5463461824665658e-07,
|
|
"logits/chosen": 0.11366529762744904,
|
|
"logits/rejected": 0.09049699455499649,
|
|
"logps/chosen": -83.722900390625,
|
|
"logps/ref_chosen": -81.41764831542969,
|
|
"logps/ref_rejected": -94.72309875488281,
|
|
"logps/rejected": -98.54571533203125,
|
|
"loss": 0.9309,
|
|
"margin_dpo/margin_mean": 1.5173696279525757,
|
|
"margin_dpo/margin_std": 1.9623498916625977,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.65775465965271,
|
|
"fcm_dpo/delta": -0.16628237068653107,
|
|
"fcm_dpo/margin": 1.7444255352020264,
|
|
"fcm_dpo/q_t": 0.30622392892837524,
|
|
"grad_norm": 129.40713500976562,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 0.1228909119963646,
|
|
"logits/rejected": 0.04697669297456741,
|
|
"logps/chosen": -44.876808166503906,
|
|
"logps/ref_chosen": -42.538185119628906,
|
|
"logps/ref_rejected": -69.78813934326172,
|
|
"logps/rejected": -73.87118530273438,
|
|
"loss": 0.8618,
|
|
"margin_dpo/margin_mean": 1.744425654411316,
|
|
"margin_dpo/margin_std": 2.120790481567383,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.6342558860778809,
|
|
"fcm_dpo/delta": -0.15066742897033691,
|
|
"fcm_dpo/margin": 1.7861425876617432,
|
|
"fcm_dpo/q_t": 0.292005717754364,
|
|
"grad_norm": 122.53746032714844,
|
|
"learning_rate": 1.521955206326976e-07,
|
|
"logits/chosen": 0.14506568014621735,
|
|
"logits/rejected": 0.07649335265159607,
|
|
"logps/chosen": -59.81481170654297,
|
|
"logps/ref_chosen": -57.593223571777344,
|
|
"logps/ref_rejected": -84.82878875732422,
|
|
"logps/rejected": -88.83651733398438,
|
|
"loss": 0.7795,
|
|
"margin_dpo/margin_mean": 1.7861430644989014,
|
|
"margin_dpo/margin_std": 1.8194975852966309,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6681783824640968,
|
|
"fcm_dpo/beta": 0.6411465406417847,
|
|
"fcm_dpo/delta": 0.09939359128475189,
|
|
"fcm_dpo/margin": 1.418702483177185,
|
|
"fcm_dpo/q_t": 0.34212759137153625,
|
|
"grad_norm": 168.3711395263672,
|
|
"learning_rate": 1.5098005849021078e-07,
|
|
"logits/chosen": 0.19873833656311035,
|
|
"logits/rejected": 0.16300469636917114,
|
|
"logps/chosen": -70.19367980957031,
|
|
"logps/ref_chosen": -67.46121978759766,
|
|
"logps/ref_rejected": -89.0693588256836,
|
|
"logps/rejected": -93.22052001953125,
|
|
"loss": 0.9856,
|
|
"margin_dpo/margin_mean": 1.418702483177185,
|
|
"margin_dpo/margin_std": 2.081653594970703,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6696900982615268,
|
|
"fcm_dpo/beta": 0.6237589120864868,
|
|
"fcm_dpo/delta": -0.24319452047348022,
|
|
"fcm_dpo/margin": 1.9465469121932983,
|
|
"fcm_dpo/q_t": 0.29789623618125916,
|
|
"grad_norm": 127.22098541259766,
|
|
"learning_rate": 1.4976736614834662e-07,
|
|
"logits/chosen": 0.18308596312999725,
|
|
"logits/rejected": 0.12943433225154877,
|
|
"logps/chosen": -57.22307586669922,
|
|
"logps/ref_chosen": -54.79610061645508,
|
|
"logps/ref_rejected": -77.80781555175781,
|
|
"logps/rejected": -82.18133544921875,
|
|
"loss": 0.9041,
|
|
"margin_dpo/margin_mean": 1.946547269821167,
|
|
"margin_dpo/margin_std": 2.4835057258605957,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.671201814058957,
|
|
"fcm_dpo/beta": 0.661973237991333,
|
|
"fcm_dpo/delta": 0.5227335691452026,
|
|
"fcm_dpo/margin": 0.7623451948165894,
|
|
"fcm_dpo/q_t": 0.4185434579849243,
|
|
"grad_norm": 196.5355224609375,
|
|
"learning_rate": 1.4855747752871654e-07,
|
|
"logits/chosen": 0.18811815977096558,
|
|
"logits/rejected": 0.1319284737110138,
|
|
"logps/chosen": -61.6188850402832,
|
|
"logps/ref_chosen": -58.749061584472656,
|
|
"logps/ref_rejected": -86.87396240234375,
|
|
"logps/rejected": -90.50614166259766,
|
|
"loss": 1.3947,
|
|
"margin_dpo/margin_mean": 0.7623450756072998,
|
|
"margin_dpo/margin_std": 2.2845544815063477,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.6808522343635559,
|
|
"fcm_dpo/delta": -0.0005050599575042725,
|
|
"fcm_dpo/margin": 1.4694095849990845,
|
|
"fcm_dpo/q_t": 0.3312973380088806,
|
|
"grad_norm": 179.10009765625,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 0.1777781844139099,
|
|
"logits/rejected": 0.16238978505134583,
|
|
"logps/chosen": -64.01307678222656,
|
|
"logps/ref_chosen": -60.91743850708008,
|
|
"logps/ref_rejected": -71.5637435913086,
|
|
"logps/rejected": -76.1287841796875,
|
|
"loss": 1.0013,
|
|
"margin_dpo/margin_mean": 1.4694093465805054,
|
|
"margin_dpo/margin_std": 2.1590332984924316,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.674225245653817,
|
|
"fcm_dpo/beta": 0.6526922583580017,
|
|
"fcm_dpo/delta": -0.4451631009578705,
|
|
"fcm_dpo/margin": 2.119361162185669,
|
|
"fcm_dpo/q_t": 0.25671201944351196,
|
|
"grad_norm": 107.62913513183594,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 0.19539491832256317,
|
|
"logits/rejected": 0.13630658388137817,
|
|
"logps/chosen": -51.429866790771484,
|
|
"logps/ref_chosen": -48.79924774169922,
|
|
"logps/ref_rejected": -71.8719482421875,
|
|
"logps/rejected": -76.62193298339844,
|
|
"loss": 0.6997,
|
|
"margin_dpo/margin_mean": 2.1193606853485107,
|
|
"margin_dpo/margin_std": 1.913917064666748,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6757369614512472,
|
|
"fcm_dpo/beta": 0.6015419363975525,
|
|
"fcm_dpo/delta": -0.2893209457397461,
|
|
"fcm_dpo/margin": 2.0844779014587402,
|
|
"fcm_dpo/q_t": 0.27232983708381653,
|
|
"grad_norm": 112.18714141845703,
|
|
"learning_rate": 1.4494497203727843e-07,
|
|
"logits/chosen": 0.1319853812456131,
|
|
"logits/rejected": 0.05933520570397377,
|
|
"logps/chosen": -55.8741569519043,
|
|
"logps/ref_chosen": -53.682716369628906,
|
|
"logps/ref_rejected": -88.17315673828125,
|
|
"logps/rejected": -92.44908142089844,
|
|
"loss": 0.8532,
|
|
"margin_dpo/margin_mean": 2.084477424621582,
|
|
"margin_dpo/margin_std": 2.4492878913879395,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.6772486772486772,
|
|
"fcm_dpo/beta": 0.5884913206100464,
|
|
"fcm_dpo/delta": -8.67573544383049e-06,
|
|
"fcm_dpo/margin": 1.6992573738098145,
|
|
"fcm_dpo/q_t": 0.3187348246574402,
|
|
"grad_norm": 115.34869384765625,
|
|
"learning_rate": 1.4374663593999256e-07,
|
|
"logits/chosen": 0.17938536405563354,
|
|
"logits/rejected": 0.14003482460975647,
|
|
"logps/chosen": -56.258331298828125,
|
|
"logps/ref_chosen": -53.75125503540039,
|
|
"logps/ref_rejected": -77.17623901367188,
|
|
"logps/rejected": -81.382568359375,
|
|
"loss": 0.8966,
|
|
"margin_dpo/margin_mean": 1.6992576122283936,
|
|
"margin_dpo/margin_std": 2.116860866546631,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6787603930461074,
|
|
"fcm_dpo/beta": 0.6253612041473389,
|
|
"fcm_dpo/delta": 0.423923134803772,
|
|
"fcm_dpo/margin": 0.9630190134048462,
|
|
"fcm_dpo/q_t": 0.38473382592201233,
|
|
"grad_norm": 156.52899169921875,
|
|
"learning_rate": 1.4255127197770707e-07,
|
|
"logits/chosen": 0.07655443251132965,
|
|
"logits/rejected": 0.061458148062229156,
|
|
"logps/chosen": -78.82083129882812,
|
|
"logps/ref_chosen": -75.82737731933594,
|
|
"logps/ref_rejected": -82.20687866210938,
|
|
"logps/rejected": -86.16334533691406,
|
|
"loss": 1.1046,
|
|
"margin_dpo/margin_mean": 0.9630191326141357,
|
|
"margin_dpo/margin_std": 1.6913801431655884,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.6548283100128174,
|
|
"fcm_dpo/delta": 0.12034881114959717,
|
|
"fcm_dpo/margin": 1.3581256866455078,
|
|
"fcm_dpo/q_t": 0.35161659121513367,
|
|
"grad_norm": 150.02163696289062,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 0.21886947751045227,
|
|
"logits/rejected": 0.1426752656698227,
|
|
"logps/chosen": -49.74790954589844,
|
|
"logps/ref_chosen": -47.11572265625,
|
|
"logps/ref_rejected": -78.7546615600586,
|
|
"logps/rejected": -82.7449722290039,
|
|
"loss": 1.0203,
|
|
"margin_dpo/margin_mean": 1.3581254482269287,
|
|
"margin_dpo/margin_std": 2.10986590385437,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6817838246409675,
|
|
"fcm_dpo/beta": 0.6705623865127563,
|
|
"fcm_dpo/delta": 0.17599515616893768,
|
|
"fcm_dpo/margin": 1.2513468265533447,
|
|
"fcm_dpo/q_t": 0.3569420576095581,
|
|
"grad_norm": 179.85128784179688,
|
|
"learning_rate": 1.4016959412166437e-07,
|
|
"logits/chosen": 0.18319055438041687,
|
|
"logits/rejected": 0.14441323280334473,
|
|
"logps/chosen": -66.11190795898438,
|
|
"logps/ref_chosen": -63.350440979003906,
|
|
"logps/ref_rejected": -76.28530883789062,
|
|
"logps/rejected": -80.29811096191406,
|
|
"loss": 1.0998,
|
|
"margin_dpo/margin_mean": 1.251347541809082,
|
|
"margin_dpo/margin_std": 2.137026786804199,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6832955404383976,
|
|
"fcm_dpo/beta": 0.6785616874694824,
|
|
"fcm_dpo/delta": -0.009446687065064907,
|
|
"fcm_dpo/margin": 1.486222743988037,
|
|
"fcm_dpo/q_t": 0.32352200150489807,
|
|
"grad_norm": 165.91653442382812,
|
|
"learning_rate": 1.3898334684855645e-07,
|
|
"logits/chosen": 0.13353146612644196,
|
|
"logits/rejected": 0.08063468337059021,
|
|
"logps/chosen": -58.241943359375,
|
|
"logps/ref_chosen": -55.58583450317383,
|
|
"logps/ref_rejected": -77.68738555908203,
|
|
"logps/rejected": -81.8297119140625,
|
|
"loss": 0.9588,
|
|
"margin_dpo/margin_mean": 1.4862233400344849,
|
|
"margin_dpo/margin_std": 2.007995128631592,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6848072562358276,
|
|
"fcm_dpo/beta": 0.6866965293884277,
|
|
"fcm_dpo/delta": 0.07683775573968887,
|
|
"fcm_dpo/margin": 1.354539394378662,
|
|
"fcm_dpo/q_t": 0.36106228828430176,
|
|
"grad_norm": 152.8515167236328,
|
|
"learning_rate": 1.3780020494988445e-07,
|
|
"logits/chosen": 0.12297318130731583,
|
|
"logits/rejected": 0.09657086431980133,
|
|
"logps/chosen": -64.26089477539062,
|
|
"logps/ref_chosen": -61.778202056884766,
|
|
"logps/ref_rejected": -71.51403045654297,
|
|
"logps/rejected": -75.35125732421875,
|
|
"loss": 1.083,
|
|
"margin_dpo/margin_mean": 1.354539394378662,
|
|
"margin_dpo/margin_std": 2.2785511016845703,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6863189720332578,
|
|
"fcm_dpo/beta": 0.6685348749160767,
|
|
"fcm_dpo/delta": -0.19035013020038605,
|
|
"fcm_dpo/margin": 1.7468868494033813,
|
|
"fcm_dpo/q_t": 0.3080099821090698,
|
|
"grad_norm": 127.66053771972656,
|
|
"learning_rate": 1.366202015206706e-07,
|
|
"logits/chosen": 0.16820810735225677,
|
|
"logits/rejected": 0.13530485332012177,
|
|
"logps/chosen": -53.909812927246094,
|
|
"logps/ref_chosen": -51.59515380859375,
|
|
"logps/ref_rejected": -63.96732711791992,
|
|
"logps/rejected": -68.02886962890625,
|
|
"loss": 0.9374,
|
|
"margin_dpo/margin_mean": 1.7468867301940918,
|
|
"margin_dpo/margin_std": 2.297898292541504,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.6465753316879272,
|
|
"fcm_dpo/delta": -0.1078774556517601,
|
|
"fcm_dpo/margin": 1.6916618347167969,
|
|
"fcm_dpo/q_t": 0.31326356530189514,
|
|
"grad_norm": 150.88607788085938,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 0.040207911282777786,
|
|
"logits/rejected": 0.007178250700235367,
|
|
"logps/chosen": -73.28399658203125,
|
|
"logps/ref_chosen": -70.65170288085938,
|
|
"logps/ref_rejected": -77.44276428222656,
|
|
"logps/rejected": -81.7667236328125,
|
|
"loss": 0.8921,
|
|
"margin_dpo/margin_mean": 1.6916615962982178,
|
|
"margin_dpo/margin_std": 2.1551051139831543,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6893424036281179,
|
|
"fcm_dpo/beta": 0.6534501910209656,
|
|
"fcm_dpo/delta": 0.01624855026602745,
|
|
"fcm_dpo/margin": 1.5070427656173706,
|
|
"fcm_dpo/q_t": 0.32177361845970154,
|
|
"grad_norm": 149.37515258789062,
|
|
"learning_rate": 1.3426974201083439e-07,
|
|
"logits/chosen": 0.11166486144065857,
|
|
"logits/rejected": 0.07302643358707428,
|
|
"logps/chosen": -59.19749450683594,
|
|
"logps/ref_chosen": -56.398284912109375,
|
|
"logps/ref_rejected": -82.61642456054688,
|
|
"logps/rejected": -86.92267608642578,
|
|
"loss": 0.921,
|
|
"margin_dpo/margin_mean": 1.507042646408081,
|
|
"margin_dpo/margin_std": 1.9450860023498535,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.690854119425548,
|
|
"fcm_dpo/beta": 0.6657828092575073,
|
|
"fcm_dpo/delta": 0.21099001169204712,
|
|
"fcm_dpo/margin": 1.2110447883605957,
|
|
"fcm_dpo/q_t": 0.3547195792198181,
|
|
"grad_norm": 146.60675048828125,
|
|
"learning_rate": 1.3309935167761717e-07,
|
|
"logits/chosen": 0.20737716555595398,
|
|
"logits/rejected": 0.1518602967262268,
|
|
"logps/chosen": -47.50776290893555,
|
|
"logps/ref_chosen": -44.72057342529297,
|
|
"logps/ref_rejected": -68.1158676147461,
|
|
"logps/rejected": -72.11409759521484,
|
|
"loss": 1.0168,
|
|
"margin_dpo/margin_mean": 1.2110450267791748,
|
|
"margin_dpo/margin_std": 1.8031151294708252,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6923658352229781,
|
|
"fcm_dpo/beta": 0.6568002104759216,
|
|
"fcm_dpo/delta": -0.16649408638477325,
|
|
"fcm_dpo/margin": 1.7431389093399048,
|
|
"fcm_dpo/q_t": 0.2945740222930908,
|
|
"grad_norm": 143.60699462890625,
|
|
"learning_rate": 1.3193223130682936e-07,
|
|
"logits/chosen": 0.15916739404201508,
|
|
"logits/rejected": 0.07576747238636017,
|
|
"logps/chosen": -52.41571807861328,
|
|
"logps/ref_chosen": -50.00569152832031,
|
|
"logps/ref_rejected": -87.50015258789062,
|
|
"logps/rejected": -91.6533203125,
|
|
"loss": 0.8903,
|
|
"margin_dpo/margin_mean": 1.7431399822235107,
|
|
"margin_dpo/margin_std": 2.1261298656463623,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6938775510204082,
|
|
"fcm_dpo/beta": 0.6480120420455933,
|
|
"fcm_dpo/delta": -0.23831713199615479,
|
|
"fcm_dpo/margin": 1.8602559566497803,
|
|
"fcm_dpo/q_t": 0.2993336021900177,
|
|
"grad_norm": 136.50265502929688,
|
|
"learning_rate": 1.3076841354533658e-07,
|
|
"logits/chosen": 0.18282179534435272,
|
|
"logits/rejected": 0.15275558829307556,
|
|
"logps/chosen": -67.98956298828125,
|
|
"logps/ref_chosen": -65.37794494628906,
|
|
"logps/ref_rejected": -88.19244384765625,
|
|
"logps/rejected": -92.66431427001953,
|
|
"loss": 0.8494,
|
|
"margin_dpo/margin_mean": 1.8602561950683594,
|
|
"margin_dpo/margin_std": 2.121655225753784,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.6043037176132202,
|
|
"fcm_dpo/delta": -0.21189001202583313,
|
|
"fcm_dpo/margin": 1.9620928764343262,
|
|
"fcm_dpo/q_t": 0.3046306073665619,
|
|
"grad_norm": 148.3003692626953,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 0.19387787580490112,
|
|
"logits/rejected": 0.09904222190380096,
|
|
"logps/chosen": -67.26277160644531,
|
|
"logps/ref_chosen": -64.5616683959961,
|
|
"logps/ref_rejected": -88.67890167236328,
|
|
"logps/rejected": -93.34209442138672,
|
|
"loss": 0.8275,
|
|
"margin_dpo/margin_mean": 1.962093472480774,
|
|
"margin_dpo/margin_std": 2.332686185836792,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6969009826152683,
|
|
"fcm_dpo/beta": 0.5892372131347656,
|
|
"fcm_dpo/delta": -0.03291664272546768,
|
|
"fcm_dpo/margin": 1.7409393787384033,
|
|
"fcm_dpo/q_t": 0.31874844431877136,
|
|
"grad_norm": 127.28734588623047,
|
|
"learning_rate": 1.2845081597488286e-07,
|
|
"logits/chosen": 0.23900935053825378,
|
|
"logits/rejected": 0.17159438133239746,
|
|
"logps/chosen": -52.00554275512695,
|
|
"logps/ref_chosen": -49.4779167175293,
|
|
"logps/ref_rejected": -72.65262603759766,
|
|
"logps/rejected": -76.92119598388672,
|
|
"loss": 0.9043,
|
|
"margin_dpo/margin_mean": 1.7409393787384033,
|
|
"margin_dpo/margin_std": 2.132420063018799,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"fcm_dpo/beta": 0.577314019203186,
|
|
"fcm_dpo/delta": -0.1604897826910019,
|
|
"fcm_dpo/margin": 1.9741871356964111,
|
|
"fcm_dpo/q_t": 0.2894290089607239,
|
|
"grad_norm": 113.71949005126953,
|
|
"learning_rate": 1.27297100994108e-07,
|
|
"logits/chosen": 0.13943126797676086,
|
|
"logits/rejected": 0.09149923920631409,
|
|
"logps/chosen": -63.15895080566406,
|
|
"logps/ref_chosen": -60.4951171875,
|
|
"logps/ref_rejected": -74.82136535644531,
|
|
"logps/rejected": -79.45939636230469,
|
|
"loss": 0.7873,
|
|
"margin_dpo/margin_mean": 1.9741871356964111,
|
|
"margin_dpo/margin_std": 2.1054553985595703,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6999244142101285,
|
|
"fcm_dpo/beta": 0.5867961049079895,
|
|
"fcm_dpo/delta": 0.11635659635066986,
|
|
"fcm_dpo/margin": 1.523716688156128,
|
|
"fcm_dpo/q_t": 0.3367578387260437,
|
|
"grad_norm": 127.23412322998047,
|
|
"learning_rate": 1.2614681827718695e-07,
|
|
"logits/chosen": 0.15458309650421143,
|
|
"logits/rejected": 0.14070303738117218,
|
|
"logps/chosen": -70.25871276855469,
|
|
"logps/ref_chosen": -67.68511962890625,
|
|
"logps/ref_rejected": -71.32196044921875,
|
|
"logps/rejected": -75.41926574707031,
|
|
"loss": 0.931,
|
|
"margin_dpo/margin_mean": 1.523715853691101,
|
|
"margin_dpo/margin_std": 1.9335708618164062,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.7014361300075586,
|
|
"fcm_dpo/beta": 0.6116993427276611,
|
|
"fcm_dpo/delta": 0.15539291501045227,
|
|
"fcm_dpo/margin": 1.3953006267547607,
|
|
"fcm_dpo/q_t": 0.3521912395954132,
|
|
"grad_norm": 161.49534606933594,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 0.12334546446800232,
|
|
"logits/rejected": 0.10288789868354797,
|
|
"logps/chosen": -61.994964599609375,
|
|
"logps/ref_chosen": -59.16564178466797,
|
|
"logps/ref_rejected": -69.56146240234375,
|
|
"logps/rejected": -73.78608703613281,
|
|
"loss": 1.0905,
|
|
"margin_dpo/margin_mean": 1.3953003883361816,
|
|
"margin_dpo/margin_std": 2.2576606273651123,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.6138174533843994,
|
|
"fcm_dpo/delta": 0.10352025926113129,
|
|
"fcm_dpo/margin": 1.4749349355697632,
|
|
"fcm_dpo/q_t": 0.3457157015800476,
|
|
"grad_norm": 138.18177795410156,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 0.2334330677986145,
|
|
"logits/rejected": 0.18456201255321503,
|
|
"logps/chosen": -61.38888168334961,
|
|
"logps/ref_chosen": -58.513671875,
|
|
"logps/ref_rejected": -84.31745910644531,
|
|
"logps/rejected": -88.6676025390625,
|
|
"loss": 1.0361,
|
|
"margin_dpo/margin_mean": 1.4749336242675781,
|
|
"margin_dpo/margin_std": 2.255413293838501,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7044595616024187,
|
|
"fcm_dpo/beta": 0.6493447422981262,
|
|
"fcm_dpo/delta": 0.3105998635292053,
|
|
"fcm_dpo/margin": 1.0969356298446655,
|
|
"fcm_dpo/q_t": 0.37098121643066406,
|
|
"grad_norm": 185.26171875,
|
|
"learning_rate": 1.2271688498291334e-07,
|
|
"logits/chosen": 0.18046687543392181,
|
|
"logits/rejected": 0.1757928878068924,
|
|
"logps/chosen": -76.58619689941406,
|
|
"logps/ref_chosen": -73.26580810546875,
|
|
"logps/ref_rejected": -74.83621215820312,
|
|
"logps/rejected": -79.25353240966797,
|
|
"loss": 1.0578,
|
|
"margin_dpo/margin_mean": 1.096935749053955,
|
|
"margin_dpo/margin_std": 1.7959051132202148,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.7059712773998488,
|
|
"fcm_dpo/beta": 0.6433865427970886,
|
|
"fcm_dpo/delta": -0.17267094552516937,
|
|
"fcm_dpo/margin": 1.7906594276428223,
|
|
"fcm_dpo/q_t": 0.3067885637283325,
|
|
"grad_norm": 113.32935333251953,
|
|
"learning_rate": 1.2158065210664848e-07,
|
|
"logits/chosen": 0.16693203151226044,
|
|
"logits/rejected": 0.060477063059806824,
|
|
"logps/chosen": -50.42617416381836,
|
|
"logps/ref_chosen": -47.57947540283203,
|
|
"logps/ref_rejected": -78.68522644042969,
|
|
"logps/rejected": -83.32258605957031,
|
|
"loss": 0.8707,
|
|
"margin_dpo/margin_mean": 1.7906594276428223,
|
|
"margin_dpo/margin_std": 2.2650554180145264,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.7074829931972789,
|
|
"fcm_dpo/beta": 0.6142877340316772,
|
|
"fcm_dpo/delta": -0.2895115911960602,
|
|
"fcm_dpo/margin": 2.040811538696289,
|
|
"fcm_dpo/q_t": 0.28548452258110046,
|
|
"grad_norm": 135.33714294433594,
|
|
"learning_rate": 1.204480113956011e-07,
|
|
"logits/chosen": 0.17146506905555725,
|
|
"logits/rejected": 0.1613762527704239,
|
|
"logps/chosen": -66.34950256347656,
|
|
"logps/ref_chosen": -63.92778778076172,
|
|
"logps/ref_rejected": -76.51626586914062,
|
|
"logps/rejected": -80.97879028320312,
|
|
"loss": 0.796,
|
|
"margin_dpo/margin_mean": 2.040811061859131,
|
|
"margin_dpo/margin_std": 2.2666611671447754,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.708994708994709,
|
|
"fcm_dpo/beta": 0.5907766222953796,
|
|
"fcm_dpo/delta": -0.01965993642807007,
|
|
"fcm_dpo/margin": 1.710775375366211,
|
|
"fcm_dpo/q_t": 0.3215448260307312,
|
|
"grad_norm": 115.97638702392578,
|
|
"learning_rate": 1.1931899453216697e-07,
|
|
"logits/chosen": 0.21937254071235657,
|
|
"logits/rejected": 0.2032082974910736,
|
|
"logps/chosen": -61.57379150390625,
|
|
"logps/ref_chosen": -59.05818176269531,
|
|
"logps/ref_rejected": -75.67672729492188,
|
|
"logps/rejected": -79.90310668945312,
|
|
"loss": 0.8873,
|
|
"margin_dpo/margin_mean": 1.710775375366211,
|
|
"margin_dpo/margin_std": 2.0356462001800537,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.6098539233207703,
|
|
"fcm_dpo/delta": 0.04422697797417641,
|
|
"fcm_dpo/margin": 1.5718050003051758,
|
|
"fcm_dpo/q_t": 0.3227683901786804,
|
|
"grad_norm": 119.2257308959961,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 0.13354477286338806,
|
|
"logits/rejected": 0.08637814223766327,
|
|
"logps/chosen": -50.68056106567383,
|
|
"logps/ref_chosen": -47.86743927001953,
|
|
"logps/ref_rejected": -65.96859741210938,
|
|
"logps/rejected": -70.353515625,
|
|
"loss": 0.9471,
|
|
"margin_dpo/margin_mean": 1.5718050003051758,
|
|
"margin_dpo/margin_std": 2.07285475730896,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7120181405895691,
|
|
"fcm_dpo/beta": 0.6002909541130066,
|
|
"fcm_dpo/delta": -0.08815348893404007,
|
|
"fcm_dpo/margin": 1.7969985008239746,
|
|
"fcm_dpo/q_t": 0.3020731508731842,
|
|
"grad_norm": 131.24606323242188,
|
|
"learning_rate": 1.1707195857000215e-07,
|
|
"logits/chosen": 0.1816762238740921,
|
|
"logits/rejected": 0.12994712591171265,
|
|
"logps/chosen": -60.345733642578125,
|
|
"logps/ref_chosen": -57.777854919433594,
|
|
"logps/ref_rejected": -73.81172180175781,
|
|
"logps/rejected": -78.17660522460938,
|
|
"loss": 0.9,
|
|
"margin_dpo/margin_mean": 1.796998381614685,
|
|
"margin_dpo/margin_std": 2.2218680381774902,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.7135298563869993,
|
|
"fcm_dpo/beta": 0.6000721454620361,
|
|
"fcm_dpo/delta": 0.006265308707952499,
|
|
"fcm_dpo/margin": 1.6558948755264282,
|
|
"fcm_dpo/q_t": 0.3246491551399231,
|
|
"grad_norm": 141.67776489257812,
|
|
"learning_rate": 1.1595400232569768e-07,
|
|
"logits/chosen": 0.20169669389724731,
|
|
"logits/rejected": 0.15981845557689667,
|
|
"logps/chosen": -58.36172866821289,
|
|
"logps/ref_chosen": -55.908668518066406,
|
|
"logps/ref_rejected": -74.70294189453125,
|
|
"logps/rejected": -78.81190490722656,
|
|
"loss": 0.9713,
|
|
"margin_dpo/margin_mean": 1.6558947563171387,
|
|
"margin_dpo/margin_std": 2.2935879230499268,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.7150415721844293,
|
|
"fcm_dpo/beta": 0.5857222080230713,
|
|
"fcm_dpo/delta": -0.07999872416257858,
|
|
"fcm_dpo/margin": 1.8269248008728027,
|
|
"fcm_dpo/q_t": 0.32797205448150635,
|
|
"grad_norm": 127.49575805664062,
|
|
"learning_rate": 1.1483979563610069e-07,
|
|
"logits/chosen": 0.23042967915534973,
|
|
"logits/rejected": 0.15509197115898132,
|
|
"logps/chosen": -56.60422134399414,
|
|
"logps/ref_chosen": -54.16088104248047,
|
|
"logps/ref_rejected": -92.76789855957031,
|
|
"logps/rejected": -97.03816223144531,
|
|
"loss": 0.9793,
|
|
"margin_dpo/margin_mean": 1.8269245624542236,
|
|
"margin_dpo/margin_std": 2.6278867721557617,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.7165532879818595,
|
|
"fcm_dpo/beta": 0.6013132333755493,
|
|
"fcm_dpo/delta": 0.11396686732769012,
|
|
"fcm_dpo/margin": 1.4877792596817017,
|
|
"fcm_dpo/q_t": 0.34768766164779663,
|
|
"grad_norm": 151.1398162841797,
|
|
"learning_rate": 1.1372936966796709e-07,
|
|
"logits/chosen": 0.210426926612854,
|
|
"logits/rejected": 0.1586945503950119,
|
|
"logps/chosen": -49.76041793823242,
|
|
"logps/ref_chosen": -46.685707092285156,
|
|
"logps/ref_rejected": -71.44731903076172,
|
|
"logps/rejected": -76.00980377197266,
|
|
"loss": 1.0262,
|
|
"margin_dpo/margin_mean": 1.487779140472412,
|
|
"margin_dpo/margin_std": 2.2466931343078613,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.56545090675354,
|
|
"fcm_dpo/delta": -0.3536713421344757,
|
|
"fcm_dpo/margin": 2.305065631866455,
|
|
"fcm_dpo/q_t": 0.26997214555740356,
|
|
"grad_norm": 108.57855224609375,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 0.15623445808887482,
|
|
"logits/rejected": 0.11391064524650574,
|
|
"logps/chosen": -61.26993179321289,
|
|
"logps/ref_chosen": -58.4873046875,
|
|
"logps/ref_rejected": -87.00187683105469,
|
|
"logps/rejected": -92.08956909179688,
|
|
"loss": 0.7329,
|
|
"margin_dpo/margin_mean": 2.3050661087036133,
|
|
"margin_dpo/margin_std": 2.278512477874756,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7195767195767195,
|
|
"fcm_dpo/beta": 0.5778172016143799,
|
|
"fcm_dpo/delta": 0.17008031904697418,
|
|
"fcm_dpo/margin": 1.4578584432601929,
|
|
"fcm_dpo/q_t": 0.3523421287536621,
|
|
"grad_norm": 163.50177001953125,
|
|
"learning_rate": 1.1151998403347243e-07,
|
|
"logits/chosen": 0.11702927947044373,
|
|
"logits/rejected": 0.10260109603404999,
|
|
"logps/chosen": -78.54095458984375,
|
|
"logps/ref_chosen": -75.38162231445312,
|
|
"logps/ref_rejected": -76.99822235107422,
|
|
"logps/rejected": -81.61541748046875,
|
|
"loss": 1.0833,
|
|
"margin_dpo/margin_mean": 1.4578593969345093,
|
|
"margin_dpo/margin_std": 2.4156503677368164,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7210884353741497,
|
|
"fcm_dpo/beta": 0.591883659362793,
|
|
"fcm_dpo/delta": 0.1255907416343689,
|
|
"fcm_dpo/margin": 1.4951434135437012,
|
|
"fcm_dpo/q_t": 0.3490224778652191,
|
|
"grad_norm": 168.64578247070312,
|
|
"learning_rate": 1.1042108616837692e-07,
|
|
"logits/chosen": 0.19208115339279175,
|
|
"logits/rejected": 0.15968218445777893,
|
|
"logps/chosen": -64.10305786132812,
|
|
"logps/ref_chosen": -61.073387145996094,
|
|
"logps/ref_rejected": -81.34375,
|
|
"logps/rejected": -85.86856079101562,
|
|
"loss": 1.0765,
|
|
"margin_dpo/margin_mean": 1.495143175125122,
|
|
"margin_dpo/margin_std": 2.407855987548828,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7226001511715797,
|
|
"fcm_dpo/beta": 0.6088930368423462,
|
|
"fcm_dpo/delta": 0.1552659273147583,
|
|
"fcm_dpo/margin": 1.4085874557495117,
|
|
"fcm_dpo/q_t": 0.35270342230796814,
|
|
"grad_norm": 148.8921356201172,
|
|
"learning_rate": 1.0932609262554746e-07,
|
|
"logits/chosen": 0.12717093527317047,
|
|
"logits/rejected": 0.1243201345205307,
|
|
"logps/chosen": -59.892295837402344,
|
|
"logps/ref_chosen": -57.16731643676758,
|
|
"logps/ref_rejected": -53.30917739868164,
|
|
"logps/rejected": -57.44274139404297,
|
|
"loss": 1.0176,
|
|
"margin_dpo/margin_mean": 1.408586859703064,
|
|
"margin_dpo/margin_std": 2.1223185062408447,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7241118669690099,
|
|
"fcm_dpo/beta": 0.607843816280365,
|
|
"fcm_dpo/delta": 0.027871206402778625,
|
|
"fcm_dpo/margin": 1.5997779369354248,
|
|
"fcm_dpo/q_t": 0.3466408848762512,
|
|
"grad_norm": 140.39195251464844,
|
|
"learning_rate": 1.0823503403430734e-07,
|
|
"logits/chosen": 0.08278117328882217,
|
|
"logits/rejected": 0.03745885565876961,
|
|
"logps/chosen": -62.16324996948242,
|
|
"logps/ref_chosen": -58.91331481933594,
|
|
"logps/ref_rejected": -63.7403450012207,
|
|
"logps/rejected": -68.59005737304688,
|
|
"loss": 1.0645,
|
|
"margin_dpo/margin_mean": 1.5997782945632935,
|
|
"margin_dpo/margin_std": 2.665220260620117,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.6309263110160828,
|
|
"fcm_dpo/delta": 0.006531953811645508,
|
|
"fcm_dpo/margin": 1.5598734617233276,
|
|
"fcm_dpo/q_t": 0.31962987780570984,
|
|
"grad_norm": 165.5882568359375,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 0.13291680812835693,
|
|
"logits/rejected": 0.1215020939707756,
|
|
"logps/chosen": -65.6745376586914,
|
|
"logps/ref_chosen": -62.80061340332031,
|
|
"logps/ref_rejected": -67.58859252929688,
|
|
"logps/rejected": -72.02239227294922,
|
|
"loss": 1.0675,
|
|
"margin_dpo/margin_mean": 1.5598732233047485,
|
|
"margin_dpo/margin_std": 2.3550682067871094,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.72713529856387,
|
|
"fcm_dpo/beta": 0.6073616743087769,
|
|
"fcm_dpo/delta": -0.08397047966718674,
|
|
"fcm_dpo/margin": 1.7691869735717773,
|
|
"fcm_dpo/q_t": 0.3220970034599304,
|
|
"grad_norm": 130.34205627441406,
|
|
"learning_rate": 1.0606484367268906e-07,
|
|
"logits/chosen": 0.11158512532711029,
|
|
"logits/rejected": 0.10091142356395721,
|
|
"logps/chosen": -67.81123352050781,
|
|
"logps/ref_chosen": -65.28649139404297,
|
|
"logps/ref_rejected": -70.78668212890625,
|
|
"logps/rejected": -75.08061218261719,
|
|
"loss": 0.9249,
|
|
"margin_dpo/margin_mean": 1.7691867351531982,
|
|
"margin_dpo/margin_std": 2.550887107849121,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7286470143613001,
|
|
"fcm_dpo/beta": 0.620997428894043,
|
|
"fcm_dpo/delta": 0.16822174191474915,
|
|
"fcm_dpo/margin": 1.3626244068145752,
|
|
"fcm_dpo/q_t": 0.35040992498397827,
|
|
"grad_norm": 171.70579528808594,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 0.12327564507722855,
|
|
"logits/rejected": 0.022273845970630646,
|
|
"logps/chosen": -63.85777282714844,
|
|
"logps/ref_chosen": -60.906185150146484,
|
|
"logps/ref_rejected": -103.44656372070312,
|
|
"logps/rejected": -107.76078033447266,
|
|
"loss": 1.0802,
|
|
"margin_dpo/margin_mean": 1.3626246452331543,
|
|
"margin_dpo/margin_std": 2.2782201766967773,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"fcm_dpo/beta": 0.6081717014312744,
|
|
"fcm_dpo/delta": -0.14480583369731903,
|
|
"fcm_dpo/margin": 1.8506314754486084,
|
|
"fcm_dpo/q_t": 0.31974995136260986,
|
|
"grad_norm": 137.3916473388672,
|
|
"learning_rate": 1.0391075790138232e-07,
|
|
"logits/chosen": 0.20215514302253723,
|
|
"logits/rejected": 0.13051192462444305,
|
|
"logps/chosen": -56.16196823120117,
|
|
"logps/ref_chosen": -53.192012786865234,
|
|
"logps/ref_rejected": -81.83927154541016,
|
|
"logps/rejected": -86.65986633300781,
|
|
"loss": 0.9449,
|
|
"margin_dpo/margin_mean": 1.850631594657898,
|
|
"margin_dpo/margin_std": 2.5201127529144287,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.7316704459561603,
|
|
"fcm_dpo/beta": 0.6308771967887878,
|
|
"fcm_dpo/delta": 0.20309945940971375,
|
|
"fcm_dpo/margin": 1.2873001098632812,
|
|
"fcm_dpo/q_t": 0.3507644832134247,
|
|
"grad_norm": 157.5721435546875,
|
|
"learning_rate": 1.0283982962570681e-07,
|
|
"logits/chosen": 0.18846221268177032,
|
|
"logits/rejected": 0.15292689204216003,
|
|
"logps/chosen": -60.82984924316406,
|
|
"logps/ref_chosen": -57.76945877075195,
|
|
"logps/ref_rejected": -71.6829833984375,
|
|
"logps/rejected": -76.03067016601562,
|
|
"loss": 1.0035,
|
|
"margin_dpo/margin_mean": 1.2873002290725708,
|
|
"margin_dpo/margin_std": 1.9288554191589355,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.5989984273910522,
|
|
"fcm_dpo/delta": -0.20387829840183258,
|
|
"fcm_dpo/margin": 1.9372670650482178,
|
|
"fcm_dpo/q_t": 0.30228108167648315,
|
|
"grad_norm": 129.9238739013672,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 0.1555819809436798,
|
|
"logits/rejected": 0.13119317591190338,
|
|
"logps/chosen": -59.23163604736328,
|
|
"logps/ref_chosen": -56.63584899902344,
|
|
"logps/ref_rejected": -70.85614013671875,
|
|
"logps/rejected": -75.38919067382812,
|
|
"loss": 0.8593,
|
|
"margin_dpo/margin_mean": 1.9372668266296387,
|
|
"margin_dpo/margin_std": 2.2261605262756348,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7346938775510204,
|
|
"fcm_dpo/beta": 0.6060769557952881,
|
|
"fcm_dpo/delta": -0.02147604152560234,
|
|
"fcm_dpo/margin": 1.6815690994262695,
|
|
"fcm_dpo/q_t": 0.34005385637283325,
|
|
"grad_norm": 140.710693359375,
|
|
"learning_rate": 1.007103520743035e-07,
|
|
"logits/chosen": 0.1885671615600586,
|
|
"logits/rejected": 0.10879142582416534,
|
|
"logps/chosen": -59.6024169921875,
|
|
"logps/ref_chosen": -56.347023010253906,
|
|
"logps/ref_rejected": -85.97221374511719,
|
|
"logps/rejected": -90.9091796875,
|
|
"loss": 1.0424,
|
|
"margin_dpo/margin_mean": 1.6815693378448486,
|
|
"margin_dpo/margin_std": 2.760887861251831,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7362055933484505,
|
|
"fcm_dpo/beta": 0.6121037006378174,
|
|
"fcm_dpo/delta": 0.00196036696434021,
|
|
"fcm_dpo/margin": 1.6285200119018555,
|
|
"fcm_dpo/q_t": 0.3259222209453583,
|
|
"grad_norm": 140.3463897705078,
|
|
"learning_rate": 9.965186236464046e-08,
|
|
"logits/chosen": 0.1968272179365158,
|
|
"logits/rejected": 0.15555810928344727,
|
|
"logps/chosen": -63.40202331542969,
|
|
"logps/ref_chosen": -60.617218017578125,
|
|
"logps/ref_rejected": -82.50975036621094,
|
|
"logps/rejected": -86.9230728149414,
|
|
"loss": 0.9054,
|
|
"margin_dpo/margin_mean": 1.6285200119018555,
|
|
"margin_dpo/margin_std": 2.1146717071533203,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7377173091458806,
|
|
"fcm_dpo/beta": 0.6010755300521851,
|
|
"fcm_dpo/delta": -0.20396147668361664,
|
|
"fcm_dpo/margin": 1.956758975982666,
|
|
"fcm_dpo/q_t": 0.29864153265953064,
|
|
"grad_norm": 125.88450622558594,
|
|
"learning_rate": 9.859757821558337e-08,
|
|
"logits/chosen": 0.1706887185573578,
|
|
"logits/rejected": 0.11105503141880035,
|
|
"logps/chosen": -65.75413513183594,
|
|
"logps/ref_chosen": -63.10905075073242,
|
|
"logps/ref_rejected": -82.49348449707031,
|
|
"logps/rejected": -87.09532165527344,
|
|
"loss": 0.829,
|
|
"margin_dpo/margin_mean": 1.956758737564087,
|
|
"margin_dpo/margin_std": 2.2036216259002686,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7392290249433107,
|
|
"fcm_dpo/beta": 0.6157445311546326,
|
|
"fcm_dpo/delta": 0.3702337145805359,
|
|
"fcm_dpo/margin": 1.0620077848434448,
|
|
"fcm_dpo/q_t": 0.3890204429626465,
|
|
"grad_norm": 165.01705932617188,
|
|
"learning_rate": 9.754752911772615e-08,
|
|
"logits/chosen": 0.19015483558177948,
|
|
"logits/rejected": 0.15638966858386993,
|
|
"logps/chosen": -67.7677993774414,
|
|
"logps/ref_chosen": -64.98896026611328,
|
|
"logps/ref_rejected": -84.39607238769531,
|
|
"logps/rejected": -88.23690795898438,
|
|
"loss": 1.2058,
|
|
"margin_dpo/margin_mean": 1.0620079040527344,
|
|
"margin_dpo/margin_std": 2.2157487869262695,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.6417911648750305,
|
|
"fcm_dpo/delta": 0.10067185759544373,
|
|
"fcm_dpo/margin": 1.4126074314117432,
|
|
"fcm_dpo/q_t": 0.36741340160369873,
|
|
"grad_norm": 186.93276977539062,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 0.22295230627059937,
|
|
"logits/rejected": 0.2012663632631302,
|
|
"logps/chosen": -64.97543334960938,
|
|
"logps/ref_chosen": -61.90874481201172,
|
|
"logps/ref_rejected": -70.58566284179688,
|
|
"logps/rejected": -75.06495666503906,
|
|
"loss": 1.1948,
|
|
"margin_dpo/margin_mean": 1.4126070737838745,
|
|
"margin_dpo/margin_std": 2.7005350589752197,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7422524565381708,
|
|
"fcm_dpo/beta": 0.6383862495422363,
|
|
"fcm_dpo/delta": 0.07852260023355484,
|
|
"fcm_dpo/margin": 1.450089931488037,
|
|
"fcm_dpo/q_t": 0.33126381039619446,
|
|
"grad_norm": 139.84234619140625,
|
|
"learning_rate": 9.546025344484868e-08,
|
|
"logits/chosen": 0.11358515918254852,
|
|
"logits/rejected": 0.0668938159942627,
|
|
"logps/chosen": -58.38109588623047,
|
|
"logps/ref_chosen": -55.47570037841797,
|
|
"logps/ref_rejected": -78.70318603515625,
|
|
"logps/rejected": -83.05867004394531,
|
|
"loss": 0.9724,
|
|
"margin_dpo/margin_mean": 1.4500904083251953,
|
|
"margin_dpo/margin_std": 1.9347925186157227,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7437641723356009,
|
|
"fcm_dpo/beta": 0.6746935844421387,
|
|
"fcm_dpo/delta": 0.11538802087306976,
|
|
"fcm_dpo/margin": 1.3122856616973877,
|
|
"fcm_dpo/q_t": 0.3527218699455261,
|
|
"grad_norm": 183.52841186523438,
|
|
"learning_rate": 9.442308525541589e-08,
|
|
"logits/chosen": 0.15461723506450653,
|
|
"logits/rejected": 0.09914899617433548,
|
|
"logps/chosen": -70.79060363769531,
|
|
"logps/ref_chosen": -67.28638458251953,
|
|
"logps/ref_rejected": -82.78628540039062,
|
|
"logps/rejected": -87.60279846191406,
|
|
"loss": 1.1795,
|
|
"margin_dpo/margin_mean": 1.3122851848602295,
|
|
"margin_dpo/margin_std": 2.379267692565918,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.745275888133031,
|
|
"fcm_dpo/beta": 0.6467149257659912,
|
|
"fcm_dpo/delta": -0.2506517171859741,
|
|
"fcm_dpo/margin": 1.8874269723892212,
|
|
"fcm_dpo/q_t": 0.2871362268924713,
|
|
"grad_norm": 143.90199279785156,
|
|
"learning_rate": 9.339026888672468e-08,
|
|
"logits/chosen": 0.13737066090106964,
|
|
"logits/rejected": 0.0812433660030365,
|
|
"logps/chosen": -58.82109451293945,
|
|
"logps/ref_chosen": -55.92750549316406,
|
|
"logps/ref_rejected": -79.12149810791016,
|
|
"logps/rejected": -83.90251159667969,
|
|
"loss": 0.8709,
|
|
"margin_dpo/margin_mean": 1.8874274492263794,
|
|
"margin_dpo/margin_std": 2.288954734802246,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7467876039304611,
|
|
"fcm_dpo/beta": 0.6492782831192017,
|
|
"fcm_dpo/delta": 0.15562888979911804,
|
|
"fcm_dpo/margin": 1.3203234672546387,
|
|
"fcm_dpo/q_t": 0.3562992513179779,
|
|
"grad_norm": 196.9374542236328,
|
|
"learning_rate": 9.236183322886945e-08,
|
|
"logits/chosen": 0.06576605886220932,
|
|
"logits/rejected": 0.027181722223758698,
|
|
"logps/chosen": -70.94686126708984,
|
|
"logps/ref_chosen": -67.95410919189453,
|
|
"logps/ref_rejected": -90.50865173339844,
|
|
"logps/rejected": -94.82173156738281,
|
|
"loss": 1.1436,
|
|
"margin_dpo/margin_mean": 1.3203232288360596,
|
|
"margin_dpo/margin_std": 2.456470251083374,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.6703627109527588,
|
|
"fcm_dpo/delta": 0.14631986618041992,
|
|
"fcm_dpo/margin": 1.2908090353012085,
|
|
"fcm_dpo/q_t": 0.35358014702796936,
|
|
"grad_norm": 143.63949584960938,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 0.21046996116638184,
|
|
"logits/rejected": 0.15889891982078552,
|
|
"logps/chosen": -55.39236068725586,
|
|
"logps/ref_chosen": -52.62546157836914,
|
|
"logps/ref_rejected": -72.06781005859375,
|
|
"logps/rejected": -76.12551879882812,
|
|
"loss": 1.0334,
|
|
"margin_dpo/margin_mean": 1.2908086776733398,
|
|
"margin_dpo/margin_std": 2.0524096488952637,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7498110355253212,
|
|
"fcm_dpo/beta": 0.6439297199249268,
|
|
"fcm_dpo/delta": -0.214058056473732,
|
|
"fcm_dpo/margin": 1.8385138511657715,
|
|
"fcm_dpo/q_t": 0.33099353313446045,
|
|
"grad_norm": 153.26148986816406,
|
|
"learning_rate": 9.031821899254797e-08,
|
|
"logits/chosen": 0.17050223052501678,
|
|
"logits/rejected": 0.08947437256574631,
|
|
"logps/chosen": -60.537879943847656,
|
|
"logps/ref_chosen": -57.597320556640625,
|
|
"logps/ref_rejected": -94.36127471923828,
|
|
"logps/rejected": -99.14034271240234,
|
|
"loss": 1.0009,
|
|
"margin_dpo/margin_mean": 1.838512897491455,
|
|
"margin_dpo/margin_std": 2.8675289154052734,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7513227513227513,
|
|
"fcm_dpo/beta": 0.5975298881530762,
|
|
"fcm_dpo/delta": -0.43948090076446533,
|
|
"fcm_dpo/margin": 2.297381639480591,
|
|
"fcm_dpo/q_t": 0.27814143896102905,
|
|
"grad_norm": 137.8128204345703,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": 0.20551130175590515,
|
|
"logits/rejected": 0.17609372735023499,
|
|
"logps/chosen": -75.94544982910156,
|
|
"logps/ref_chosen": -72.78994750976562,
|
|
"logps/ref_rejected": -89.48483276367188,
|
|
"logps/rejected": -94.9377212524414,
|
|
"loss": 0.8075,
|
|
"margin_dpo/margin_mean": 2.29738187789917,
|
|
"margin_dpo/margin_std": 2.609776258468628,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7528344671201814,
|
|
"fcm_dpo/beta": 0.5923163294792175,
|
|
"fcm_dpo/delta": -0.017407868057489395,
|
|
"fcm_dpo/margin": 1.7135266065597534,
|
|
"fcm_dpo/q_t": 0.3283523619174957,
|
|
"grad_norm": 158.02391052246094,
|
|
"learning_rate": 8.829247120198563e-08,
|
|
"logits/chosen": 0.17303167283535004,
|
|
"logits/rejected": 0.14456358551979065,
|
|
"logps/chosen": -71.15170288085938,
|
|
"logps/ref_chosen": -68.36572265625,
|
|
"logps/ref_rejected": -71.28846740722656,
|
|
"logps/rejected": -75.78797912597656,
|
|
"loss": 0.9063,
|
|
"margin_dpo/margin_mean": 1.713526964187622,
|
|
"margin_dpo/margin_std": 2.233605146408081,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7543461829176115,
|
|
"fcm_dpo/beta": 0.594528317451477,
|
|
"fcm_dpo/delta": 0.08794374763965607,
|
|
"fcm_dpo/margin": 1.5478346347808838,
|
|
"fcm_dpo/q_t": 0.35329633951187134,
|
|
"grad_norm": 145.75146484375,
|
|
"learning_rate": 8.728636813280163e-08,
|
|
"logits/chosen": 0.16119879484176636,
|
|
"logits/rejected": 0.10922683030366898,
|
|
"logps/chosen": -64.74832916259766,
|
|
"logps/ref_chosen": -61.90882873535156,
|
|
"logps/ref_rejected": -91.9411392211914,
|
|
"logps/rejected": -96.32847595214844,
|
|
"loss": 1.121,
|
|
"margin_dpo/margin_mean": 1.547835111618042,
|
|
"margin_dpo/margin_std": 2.651371479034424,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.6114327311515808,
|
|
"fcm_dpo/delta": 0.14150665700435638,
|
|
"fcm_dpo/margin": 1.4240680932998657,
|
|
"fcm_dpo/q_t": 0.35586458444595337,
|
|
"grad_norm": 174.01441955566406,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 0.12461017072200775,
|
|
"logits/rejected": 0.11526093631982803,
|
|
"logps/chosen": -73.16278076171875,
|
|
"logps/ref_chosen": -70.225830078125,
|
|
"logps/ref_rejected": -71.72203063964844,
|
|
"logps/rejected": -76.08305358886719,
|
|
"loss": 1.1357,
|
|
"margin_dpo/margin_mean": 1.4240679740905762,
|
|
"margin_dpo/margin_std": 2.5242857933044434,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7573696145124716,
|
|
"fcm_dpo/beta": 0.6245852708816528,
|
|
"fcm_dpo/delta": 0.013538122177124023,
|
|
"fcm_dpo/margin": 1.5765228271484375,
|
|
"fcm_dpo/q_t": 0.3233751058578491,
|
|
"grad_norm": 121.06085205078125,
|
|
"learning_rate": 8.528784436016878e-08,
|
|
"logits/chosen": 0.15389983355998993,
|
|
"logits/rejected": 0.13935419917106628,
|
|
"logps/chosen": -67.49052429199219,
|
|
"logps/ref_chosen": -64.59880828857422,
|
|
"logps/ref_rejected": -70.59329223632812,
|
|
"logps/rejected": -75.0615234375,
|
|
"loss": 0.8765,
|
|
"margin_dpo/margin_mean": 1.5765225887298584,
|
|
"margin_dpo/margin_std": 1.870557188987732,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.7588813303099018,
|
|
"fcm_dpo/beta": 0.6334064602851868,
|
|
"fcm_dpo/delta": 0.15443843603134155,
|
|
"fcm_dpo/margin": 1.3549081087112427,
|
|
"fcm_dpo/q_t": 0.3447534441947937,
|
|
"grad_norm": 170.02090454101562,
|
|
"learning_rate": 8.4295479559726e-08,
|
|
"logits/chosen": 0.17954713106155396,
|
|
"logits/rejected": 0.14447346329689026,
|
|
"logps/chosen": -68.44071960449219,
|
|
"logps/ref_chosen": -65.46662902832031,
|
|
"logps/ref_rejected": -90.22233581542969,
|
|
"logps/rejected": -94.55133056640625,
|
|
"loss": 1.0121,
|
|
"margin_dpo/margin_mean": 1.3549081087112427,
|
|
"margin_dpo/margin_std": 2.0818443298339844,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7603930461073318,
|
|
"fcm_dpo/beta": 0.6419456005096436,
|
|
"fcm_dpo/delta": 0.04625112935900688,
|
|
"fcm_dpo/margin": 1.4924449920654297,
|
|
"fcm_dpo/q_t": 0.3314594030380249,
|
|
"grad_norm": 147.28880310058594,
|
|
"learning_rate": 8.330774987092712e-08,
|
|
"logits/chosen": 0.16441036760807037,
|
|
"logits/rejected": 0.14501985907554626,
|
|
"logps/chosen": -54.53971862792969,
|
|
"logps/ref_chosen": -51.83476257324219,
|
|
"logps/ref_rejected": -57.62522506713867,
|
|
"logps/rejected": -61.82262420654297,
|
|
"loss": 0.9904,
|
|
"margin_dpo/margin_mean": 1.4924452304840088,
|
|
"margin_dpo/margin_std": 2.1151676177978516,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"fcm_dpo/beta": 0.6248334646224976,
|
|
"fcm_dpo/delta": -0.23319105803966522,
|
|
"fcm_dpo/margin": 1.9295209646224976,
|
|
"fcm_dpo/q_t": 0.295588880777359,
|
|
"grad_norm": 145.23736572265625,
|
|
"learning_rate": 8.232468292269479e-08,
|
|
"logits/chosen": 0.12036092579364777,
|
|
"logits/rejected": 0.0984935611486435,
|
|
"logps/chosen": -71.39968872070312,
|
|
"logps/ref_chosen": -68.65119934082031,
|
|
"logps/ref_rejected": -77.91394805908203,
|
|
"logps/rejected": -82.59195709228516,
|
|
"loss": 0.7956,
|
|
"margin_dpo/margin_mean": 1.929521083831787,
|
|
"margin_dpo/margin_std": 2.150576114654541,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.6214442253112793,
|
|
"fcm_dpo/delta": 0.14665237069129944,
|
|
"fcm_dpo/margin": 1.393211007118225,
|
|
"fcm_dpo/q_t": 0.35855334997177124,
|
|
"grad_norm": 172.9752960205078,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 0.177079439163208,
|
|
"logits/rejected": 0.14601781964302063,
|
|
"logps/chosen": -62.98767852783203,
|
|
"logps/ref_chosen": -59.99884796142578,
|
|
"logps/ref_rejected": -76.88048553466797,
|
|
"logps/rejected": -81.26252746582031,
|
|
"loss": 1.0917,
|
|
"margin_dpo/margin_mean": 1.3932104110717773,
|
|
"margin_dpo/margin_std": 2.397246837615967,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.764928193499622,
|
|
"fcm_dpo/beta": 0.627830982208252,
|
|
"fcm_dpo/delta": 0.032355912029743195,
|
|
"fcm_dpo/margin": 1.54330575466156,
|
|
"fcm_dpo/q_t": 0.3359745740890503,
|
|
"grad_norm": 169.81752014160156,
|
|
"learning_rate": 8.037264711071698e-08,
|
|
"logits/chosen": 0.1882736086845398,
|
|
"logits/rejected": 0.166158989071846,
|
|
"logps/chosen": -72.7292251586914,
|
|
"logps/ref_chosen": -70.07130432128906,
|
|
"logps/ref_rejected": -82.03775024414062,
|
|
"logps/rejected": -86.23898315429688,
|
|
"loss": 1.0458,
|
|
"margin_dpo/margin_mean": 1.54330575466156,
|
|
"margin_dpo/margin_std": 2.4680933952331543,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7664399092970522,
|
|
"fcm_dpo/beta": 0.6320427656173706,
|
|
"fcm_dpo/delta": -0.013070136308670044,
|
|
"fcm_dpo/margin": 1.600289225578308,
|
|
"fcm_dpo/q_t": 0.3405856490135193,
|
|
"grad_norm": 162.6297607421875,
|
|
"learning_rate": 7.940373284960933e-08,
|
|
"logits/chosen": 0.16370144486427307,
|
|
"logits/rejected": 0.1263059675693512,
|
|
"logps/chosen": -74.96257019042969,
|
|
"logps/ref_chosen": -72.00703430175781,
|
|
"logps/ref_rejected": -93.94987487792969,
|
|
"logps/rejected": -98.50569152832031,
|
|
"loss": 1.032,
|
|
"margin_dpo/margin_mean": 1.6002895832061768,
|
|
"margin_dpo/margin_std": 2.5143651962280273,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7679516250944822,
|
|
"fcm_dpo/beta": 0.6303126811981201,
|
|
"fcm_dpo/delta": -0.09110675752162933,
|
|
"fcm_dpo/margin": 1.7146742343902588,
|
|
"fcm_dpo/q_t": 0.3268600106239319,
|
|
"grad_norm": 160.6134033203125,
|
|
"learning_rate": 7.843959053281663e-08,
|
|
"logits/chosen": 0.1638413965702057,
|
|
"logits/rejected": 0.07367105782032013,
|
|
"logps/chosen": -62.97923278808594,
|
|
"logps/ref_chosen": -60.21992492675781,
|
|
"logps/ref_rejected": -95.9200668334961,
|
|
"logps/rejected": -100.39404296875,
|
|
"loss": 0.9646,
|
|
"margin_dpo/margin_mean": 1.7146737575531006,
|
|
"margin_dpo/margin_std": 2.4505600929260254,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7694633408919124,
|
|
"fcm_dpo/beta": 0.6267092227935791,
|
|
"fcm_dpo/delta": 0.0032510310411453247,
|
|
"fcm_dpo/margin": 1.5897384881973267,
|
|
"fcm_dpo/q_t": 0.3286612629890442,
|
|
"grad_norm": 157.1903839111328,
|
|
"learning_rate": 7.748024712947204e-08,
|
|
"logits/chosen": 0.13599814474582672,
|
|
"logits/rejected": 0.11172134429216385,
|
|
"logps/chosen": -69.07527160644531,
|
|
"logps/ref_chosen": -66.27017211914062,
|
|
"logps/ref_rejected": -71.73065185546875,
|
|
"logps/rejected": -76.12548828125,
|
|
"loss": 0.9953,
|
|
"margin_dpo/margin_mean": 1.589739203453064,
|
|
"margin_dpo/margin_std": 2.3416152000427246,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.6256568431854248,
|
|
"fcm_dpo/delta": 0.07200966775417328,
|
|
"fcm_dpo/margin": 1.4934487342834473,
|
|
"fcm_dpo/q_t": 0.3356286585330963,
|
|
"grad_norm": 166.45079040527344,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 0.22737839818000793,
|
|
"logits/rejected": 0.16066043078899384,
|
|
"logps/chosen": -56.61336898803711,
|
|
"logps/ref_chosen": -53.54487609863281,
|
|
"logps/ref_rejected": -91.36648559570312,
|
|
"logps/rejected": -95.92843627929688,
|
|
"loss": 1.005,
|
|
"margin_dpo/margin_mean": 1.4934483766555786,
|
|
"margin_dpo/margin_std": 2.2340614795684814,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7724867724867724,
|
|
"fcm_dpo/beta": 0.5967855453491211,
|
|
"fcm_dpo/delta": -0.3847648501396179,
|
|
"fcm_dpo/margin": 2.233816146850586,
|
|
"fcm_dpo/q_t": 0.2725888192653656,
|
|
"grad_norm": 127.65147399902344,
|
|
"learning_rate": 7.557606426772961e-08,
|
|
"logits/chosen": 0.19227483868598938,
|
|
"logits/rejected": 0.1546470820903778,
|
|
"logps/chosen": -58.66801452636719,
|
|
"logps/ref_chosen": -55.844383239746094,
|
|
"logps/ref_rejected": -86.49819946289062,
|
|
"logps/rejected": -91.55564880371094,
|
|
"loss": 0.7557,
|
|
"margin_dpo/margin_mean": 2.233816623687744,
|
|
"margin_dpo/margin_std": 2.2734181880950928,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7739984882842026,
|
|
"fcm_dpo/beta": 0.5729248523712158,
|
|
"fcm_dpo/delta": -0.2912992537021637,
|
|
"fcm_dpo/margin": 2.1851539611816406,
|
|
"fcm_dpo/q_t": 0.3602805733680725,
|
|
"grad_norm": 148.83749389648438,
|
|
"learning_rate": 7.463127807341966e-08,
|
|
"logits/chosen": 0.06669703125953674,
|
|
"logits/rejected": 0.05016005039215088,
|
|
"logps/chosen": -64.66706085205078,
|
|
"logps/ref_chosen": -61.653038024902344,
|
|
"logps/ref_rejected": -72.83148193359375,
|
|
"logps/rejected": -78.03065490722656,
|
|
"loss": 1.1281,
|
|
"margin_dpo/margin_mean": 2.185153007507324,
|
|
"margin_dpo/margin_std": 6.0767316818237305,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7755102040816326,
|
|
"fcm_dpo/beta": 0.5423198938369751,
|
|
"fcm_dpo/delta": -0.018883943557739258,
|
|
"fcm_dpo/margin": 1.8668808937072754,
|
|
"fcm_dpo/q_t": 0.3136303424835205,
|
|
"grad_norm": 92.23009490966797,
|
|
"learning_rate": 7.369139731924401e-08,
|
|
"logits/chosen": 0.27449309825897217,
|
|
"logits/rejected": 0.23453694581985474,
|
|
"logps/chosen": -53.48218536376953,
|
|
"logps/ref_chosen": -50.85256576538086,
|
|
"logps/ref_rejected": -69.21754455566406,
|
|
"logps/rejected": -73.71403503417969,
|
|
"loss": 0.8408,
|
|
"margin_dpo/margin_mean": 1.8668807744979858,
|
|
"margin_dpo/margin_std": 2.0499701499938965,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7770219198790628,
|
|
"fcm_dpo/beta": 0.5583192110061646,
|
|
"fcm_dpo/delta": 0.07093075662851334,
|
|
"fcm_dpo/margin": 1.6746280193328857,
|
|
"fcm_dpo/q_t": 0.3376282751560211,
|
|
"grad_norm": 142.2669677734375,
|
|
"learning_rate": 7.275644829568747e-08,
|
|
"logits/chosen": 0.20312434434890747,
|
|
"logits/rejected": 0.16620582342147827,
|
|
"logps/chosen": -72.64167785644531,
|
|
"logps/ref_chosen": -69.38493347167969,
|
|
"logps/ref_rejected": -83.32447814941406,
|
|
"logps/rejected": -88.25584411621094,
|
|
"loss": 1.0344,
|
|
"margin_dpo/margin_mean": 1.674628496170044,
|
|
"margin_dpo/margin_std": 2.6116552352905273,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.5502352118492126,
|
|
"fcm_dpo/delta": -0.08825686573982239,
|
|
"fcm_dpo/margin": 1.9603986740112305,
|
|
"fcm_dpo/q_t": 0.31272023916244507,
|
|
"grad_norm": 118.43267822265625,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 0.18149125576019287,
|
|
"logits/rejected": 0.11726510524749756,
|
|
"logps/chosen": -56.91847610473633,
|
|
"logps/ref_chosen": -53.687034606933594,
|
|
"logps/ref_rejected": -83.59614562988281,
|
|
"logps/rejected": -88.78797912597656,
|
|
"loss": 0.8794,
|
|
"margin_dpo/margin_mean": 1.9603983163833618,
|
|
"margin_dpo/margin_std": 2.4689695835113525,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.780045351473923,
|
|
"fcm_dpo/beta": 0.5598210096359253,
|
|
"fcm_dpo/delta": 0.140054851770401,
|
|
"fcm_dpo/margin": 1.5577726364135742,
|
|
"fcm_dpo/q_t": 0.33335772156715393,
|
|
"grad_norm": 122.2513427734375,
|
|
"learning_rate": 7.090144991188568e-08,
|
|
"logits/chosen": 0.13685812056064606,
|
|
"logits/rejected": 0.0911111831665039,
|
|
"logps/chosen": -59.56391143798828,
|
|
"logps/ref_chosen": -56.9017219543457,
|
|
"logps/ref_rejected": -67.83477783203125,
|
|
"logps/rejected": -72.05474090576172,
|
|
"loss": 0.9382,
|
|
"margin_dpo/margin_mean": 1.557773470878601,
|
|
"margin_dpo/margin_std": 2.0453054904937744,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.781557067271353,
|
|
"fcm_dpo/beta": 0.5620474815368652,
|
|
"fcm_dpo/delta": 0.013102632015943527,
|
|
"fcm_dpo/margin": 1.7575607299804688,
|
|
"fcm_dpo/q_t": 0.3347151577472687,
|
|
"grad_norm": 115.9723892211914,
|
|
"learning_rate": 6.998145243993284e-08,
|
|
"logits/chosen": 0.18582028150558472,
|
|
"logits/rejected": 0.17517045140266418,
|
|
"logps/chosen": -64.92308044433594,
|
|
"logps/ref_chosen": -61.775142669677734,
|
|
"logps/ref_rejected": -62.88270950317383,
|
|
"logps/rejected": -67.78820037841797,
|
|
"loss": 0.9368,
|
|
"margin_dpo/margin_mean": 1.7575602531433105,
|
|
"margin_dpo/margin_std": 2.4262712001800537,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.783068783068783,
|
|
"fcm_dpo/beta": 0.5985446572303772,
|
|
"fcm_dpo/delta": 0.34289732575416565,
|
|
"fcm_dpo/margin": 1.131626844406128,
|
|
"fcm_dpo/q_t": 0.38726097345352173,
|
|
"grad_norm": 139.290283203125,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 0.19278889894485474,
|
|
"logits/rejected": 0.15261326730251312,
|
|
"logps/chosen": -64.96379089355469,
|
|
"logps/ref_chosen": -62.02523422241211,
|
|
"logps/ref_rejected": -79.06085205078125,
|
|
"logps/rejected": -83.13102722167969,
|
|
"loss": 1.1567,
|
|
"margin_dpo/margin_mean": 1.1316269636154175,
|
|
"margin_dpo/margin_std": 2.253190517425537,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.7845804988662132,
|
|
"fcm_dpo/beta": 0.6441134214401245,
|
|
"fcm_dpo/delta": 0.37903302907943726,
|
|
"fcm_dpo/margin": 0.9974351525306702,
|
|
"fcm_dpo/q_t": 0.4084378778934479,
|
|
"grad_norm": 205.64553833007812,
|
|
"learning_rate": 6.815658960673781e-08,
|
|
"logits/chosen": 0.20801779627799988,
|
|
"logits/rejected": 0.15976354479789734,
|
|
"logps/chosen": -65.09986877441406,
|
|
"logps/ref_chosen": -61.60636901855469,
|
|
"logps/ref_rejected": -74.50727844238281,
|
|
"logps/rejected": -78.99821472167969,
|
|
"loss": 1.4602,
|
|
"margin_dpo/margin_mean": 0.9974346160888672,
|
|
"margin_dpo/margin_std": 2.888352870941162,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.6441489458084106,
|
|
"fcm_dpo/delta": -0.010032668709754944,
|
|
"fcm_dpo/margin": 1.562050461769104,
|
|
"fcm_dpo/q_t": 0.3334931433200836,
|
|
"grad_norm": 156.4764404296875,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 0.23202162981033325,
|
|
"logits/rejected": 0.18750135600566864,
|
|
"logps/chosen": -65.67581939697266,
|
|
"logps/ref_chosen": -62.87343215942383,
|
|
"logps/ref_rejected": -76.505615234375,
|
|
"logps/rejected": -80.87004852294922,
|
|
"loss": 0.9578,
|
|
"margin_dpo/margin_mean": 1.5620505809783936,
|
|
"margin_dpo/margin_std": 2.2463040351867676,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7876039304610734,
|
|
"fcm_dpo/beta": 0.6531983017921448,
|
|
"fcm_dpo/delta": 0.006205732002854347,
|
|
"fcm_dpo/margin": 1.5222787857055664,
|
|
"fcm_dpo/q_t": 0.32361727952957153,
|
|
"grad_norm": 157.93814086914062,
|
|
"learning_rate": 6.63520728356167e-08,
|
|
"logits/chosen": 0.1032562106847763,
|
|
"logits/rejected": 0.04845578968524933,
|
|
"logps/chosen": -67.12205505371094,
|
|
"logps/ref_chosen": -64.20668029785156,
|
|
"logps/ref_rejected": -92.28083038330078,
|
|
"logps/rejected": -96.71849060058594,
|
|
"loss": 0.9371,
|
|
"margin_dpo/margin_mean": 1.5222779512405396,
|
|
"margin_dpo/margin_std": 2.023488759994507,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7891156462585034,
|
|
"fcm_dpo/beta": 0.6555431485176086,
|
|
"fcm_dpo/delta": 0.03148447349667549,
|
|
"fcm_dpo/margin": 1.48207688331604,
|
|
"fcm_dpo/q_t": 0.32747435569763184,
|
|
"grad_norm": 147.4585723876953,
|
|
"learning_rate": 6.545750740770336e-08,
|
|
"logits/chosen": 0.16423772275447845,
|
|
"logits/rejected": 0.14849795401096344,
|
|
"logps/chosen": -61.12641525268555,
|
|
"logps/ref_chosen": -58.369720458984375,
|
|
"logps/ref_rejected": -68.79248046875,
|
|
"logps/rejected": -73.03125,
|
|
"loss": 1.0698,
|
|
"margin_dpo/margin_mean": 1.4820764064788818,
|
|
"margin_dpo/margin_std": 2.3686909675598145,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7906273620559335,
|
|
"fcm_dpo/beta": 0.6287499666213989,
|
|
"fcm_dpo/delta": -0.24582098424434662,
|
|
"fcm_dpo/margin": 1.9300764799118042,
|
|
"fcm_dpo/q_t": 0.2935262620449066,
|
|
"grad_norm": 161.8909454345703,
|
|
"learning_rate": 6.456810403001012e-08,
|
|
"logits/chosen": 0.17682617902755737,
|
|
"logits/rejected": 0.09023305773735046,
|
|
"logps/chosen": -68.99554443359375,
|
|
"logps/ref_chosen": -65.71324157714844,
|
|
"logps/ref_rejected": -91.98896789550781,
|
|
"logps/rejected": -97.20135498046875,
|
|
"loss": 0.9505,
|
|
"margin_dpo/margin_mean": 1.9300763607025146,
|
|
"margin_dpo/margin_std": 2.5030922889709473,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7921390778533636,
|
|
"fcm_dpo/beta": 0.642947256565094,
|
|
"fcm_dpo/delta": 0.15807999670505524,
|
|
"fcm_dpo/margin": 1.3285942077636719,
|
|
"fcm_dpo/q_t": 0.34164804220199585,
|
|
"grad_norm": 157.54954528808594,
|
|
"learning_rate": 6.368388758106134e-08,
|
|
"logits/chosen": 0.14576482772827148,
|
|
"logits/rejected": 0.12724286317825317,
|
|
"logps/chosen": -78.79303741455078,
|
|
"logps/ref_chosen": -76.35124969482422,
|
|
"logps/ref_rejected": -89.96072387695312,
|
|
"logps/rejected": -93.73110961914062,
|
|
"loss": 1.0368,
|
|
"margin_dpo/margin_mean": 1.328594446182251,
|
|
"margin_dpo/margin_std": 2.0358855724334717,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.6652133464813232,
|
|
"fcm_dpo/delta": 0.20262369513511658,
|
|
"fcm_dpo/margin": 1.2232202291488647,
|
|
"fcm_dpo/q_t": 0.3485579490661621,
|
|
"grad_norm": 169.54148864746094,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.0526951402425766,
|
|
"logits/rejected": 0.039441537111997604,
|
|
"logps/chosen": -78.24242401123047,
|
|
"logps/ref_chosen": -75.49578857421875,
|
|
"logps/ref_rejected": -84.04852294921875,
|
|
"logps/rejected": -88.01837921142578,
|
|
"loss": 0.9943,
|
|
"margin_dpo/margin_mean": 1.2232205867767334,
|
|
"margin_dpo/margin_std": 1.763154149055481,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7951625094482238,
|
|
"fcm_dpo/beta": 0.6876204609870911,
|
|
"fcm_dpo/delta": 0.2604686915874481,
|
|
"fcm_dpo/margin": 1.1028797626495361,
|
|
"fcm_dpo/q_t": 0.36391156911849976,
|
|
"grad_norm": 168.36891174316406,
|
|
"learning_rate": 6.193111425735515e-08,
|
|
"logits/chosen": 0.1737247258424759,
|
|
"logits/rejected": 0.12701740860939026,
|
|
"logps/chosen": -64.25382995605469,
|
|
"logps/ref_chosen": -61.29241943359375,
|
|
"logps/ref_rejected": -82.47763061523438,
|
|
"logps/rejected": -86.54191589355469,
|
|
"loss": 1.0833,
|
|
"margin_dpo/margin_mean": 1.1028800010681152,
|
|
"margin_dpo/margin_std": 1.7894057035446167,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7966742252456538,
|
|
"fcm_dpo/beta": 0.7351027727127075,
|
|
"fcm_dpo/delta": 0.22121518850326538,
|
|
"fcm_dpo/margin": 1.081107497215271,
|
|
"fcm_dpo/q_t": 0.3715837299823761,
|
|
"grad_norm": 250.8863067626953,
|
|
"learning_rate": 6.106260641143546e-08,
|
|
"logits/chosen": 0.2451198697090149,
|
|
"logits/rejected": 0.1942567229270935,
|
|
"logps/chosen": -64.66523742675781,
|
|
"logps/ref_chosen": -61.472625732421875,
|
|
"logps/ref_rejected": -90.52831268310547,
|
|
"logps/rejected": -94.8020248413086,
|
|
"loss": 1.3058,
|
|
"margin_dpo/margin_mean": 1.0811076164245605,
|
|
"margin_dpo/margin_std": 2.4224696159362793,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.7981859410430839,
|
|
"fcm_dpo/beta": 0.7391092777252197,
|
|
"fcm_dpo/delta": 0.08262480795383453,
|
|
"fcm_dpo/margin": 1.248426914215088,
|
|
"fcm_dpo/q_t": 0.33980193734169006,
|
|
"grad_norm": 163.9738311767578,
|
|
"learning_rate": 6.019938355056422e-08,
|
|
"logits/chosen": 0.050329744815826416,
|
|
"logits/rejected": -0.005134463310241699,
|
|
"logps/chosen": -61.76133728027344,
|
|
"logps/ref_chosen": -58.792015075683594,
|
|
"logps/ref_rejected": -71.82516479492188,
|
|
"logps/rejected": -76.04290771484375,
|
|
"loss": 1.1046,
|
|
"margin_dpo/margin_mean": 1.2484264373779297,
|
|
"margin_dpo/margin_std": 2.0079092979431152,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.799697656840514,
|
|
"fcm_dpo/beta": 0.6621348857879639,
|
|
"fcm_dpo/delta": -0.6627082824707031,
|
|
"fcm_dpo/margin": 2.306537628173828,
|
|
"fcm_dpo/q_t": 0.25794824957847595,
|
|
"grad_norm": 117.09664154052734,
|
|
"learning_rate": 5.934146982094049e-08,
|
|
"logits/chosen": 0.10348678380250931,
|
|
"logits/rejected": 0.05463102087378502,
|
|
"logps/chosen": -57.58445739746094,
|
|
"logps/ref_chosen": -55.070960998535156,
|
|
"logps/ref_rejected": -75.44007873535156,
|
|
"logps/rejected": -80.26010131835938,
|
|
"loss": 0.7618,
|
|
"margin_dpo/margin_mean": 2.306537389755249,
|
|
"margin_dpo/margin_std": 2.351503372192383,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.6574649214744568,
|
|
"fcm_dpo/delta": -0.030184239149093628,
|
|
"fcm_dpo/margin": 1.5620912313461304,
|
|
"fcm_dpo/q_t": 0.32785388827323914,
|
|
"grad_norm": 138.7952880859375,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.1791817992925644,
|
|
"logits/rejected": 0.14665716886520386,
|
|
"logps/chosen": -59.61901092529297,
|
|
"logps/ref_chosen": -56.743812561035156,
|
|
"logps/ref_rejected": -76.6692123413086,
|
|
"logps/rejected": -81.10650634765625,
|
|
"loss": 0.9455,
|
|
"margin_dpo/margin_mean": 1.5620914697647095,
|
|
"margin_dpo/margin_std": 2.117258071899414,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8027210884353742,
|
|
"fcm_dpo/beta": 0.6852550506591797,
|
|
"fcm_dpo/delta": 0.22120189666748047,
|
|
"fcm_dpo/margin": 1.1534594297409058,
|
|
"fcm_dpo/q_t": 0.3655146360397339,
|
|
"grad_norm": 180.98231506347656,
|
|
"learning_rate": 5.7641665597021435e-08,
|
|
"logits/chosen": 0.14505237340927124,
|
|
"logits/rejected": 0.09899041056632996,
|
|
"logps/chosen": -54.173072814941406,
|
|
"logps/ref_chosen": -51.116455078125,
|
|
"logps/ref_rejected": -79.52884674072266,
|
|
"logps/rejected": -83.73892211914062,
|
|
"loss": 1.0873,
|
|
"margin_dpo/margin_mean": 1.1534587144851685,
|
|
"margin_dpo/margin_std": 1.9886727333068848,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.8042328042328042,
|
|
"fcm_dpo/beta": 0.6626486778259277,
|
|
"fcm_dpo/delta": -0.1822899430990219,
|
|
"fcm_dpo/margin": 1.750182867050171,
|
|
"fcm_dpo/q_t": 0.31126174330711365,
|
|
"grad_norm": 145.5829315185547,
|
|
"learning_rate": 5.679982264990424e-08,
|
|
"logits/chosen": 0.09937071800231934,
|
|
"logits/rejected": 0.05832071602344513,
|
|
"logps/chosen": -61.42143249511719,
|
|
"logps/ref_chosen": -58.279945373535156,
|
|
"logps/ref_rejected": -78.05426788330078,
|
|
"logps/rejected": -82.94593811035156,
|
|
"loss": 0.9066,
|
|
"margin_dpo/margin_mean": 1.7501822710037231,
|
|
"margin_dpo/margin_std": 2.264770030975342,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.8057445200302343,
|
|
"fcm_dpo/beta": 0.6552125811576843,
|
|
"fcm_dpo/delta": -0.08179665356874466,
|
|
"fcm_dpo/margin": 1.6374917030334473,
|
|
"fcm_dpo/q_t": 0.3215191662311554,
|
|
"grad_norm": 148.30023193359375,
|
|
"learning_rate": 5.596338392706076e-08,
|
|
"logits/chosen": 0.25206273794174194,
|
|
"logits/rejected": 0.20781204104423523,
|
|
"logps/chosen": -58.997161865234375,
|
|
"logps/ref_chosen": -56.41801071166992,
|
|
"logps/ref_rejected": -73.89324951171875,
|
|
"logps/rejected": -78.1098861694336,
|
|
"loss": 0.9886,
|
|
"margin_dpo/margin_mean": 1.6374918222427368,
|
|
"margin_dpo/margin_std": 2.354870319366455,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.8072562358276644,
|
|
"fcm_dpo/beta": 0.649742603302002,
|
|
"fcm_dpo/delta": 0.05204106122255325,
|
|
"fcm_dpo/margin": 1.4657820463180542,
|
|
"fcm_dpo/q_t": 0.32745662331581116,
|
|
"grad_norm": 144.36602783203125,
|
|
"learning_rate": 5.513237282548033e-08,
|
|
"logits/chosen": 0.17291076481342316,
|
|
"logits/rejected": 0.13234050571918488,
|
|
"logps/chosen": -63.49784851074219,
|
|
"logps/ref_chosen": -60.748687744140625,
|
|
"logps/ref_rejected": -73.8623046875,
|
|
"logps/rejected": -78.07723999023438,
|
|
"loss": 0.9544,
|
|
"margin_dpo/margin_mean": 1.4657821655273438,
|
|
"margin_dpo/margin_std": 1.9620198011398315,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.6593036651611328,
|
|
"fcm_dpo/delta": -0.019535936415195465,
|
|
"fcm_dpo/margin": 1.5417048931121826,
|
|
"fcm_dpo/q_t": 0.3396652340888977,
|
|
"grad_norm": 148.61146545410156,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 0.09707458317279816,
|
|
"logits/rejected": 0.04526631161570549,
|
|
"logps/chosen": -64.64535522460938,
|
|
"logps/ref_chosen": -61.637413024902344,
|
|
"logps/ref_rejected": -80.93138885498047,
|
|
"logps/rejected": -85.48104095458984,
|
|
"loss": 1.0638,
|
|
"margin_dpo/margin_mean": 1.5417053699493408,
|
|
"margin_dpo/margin_std": 2.5180535316467285,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8102796674225246,
|
|
"fcm_dpo/beta": 0.6411547064781189,
|
|
"fcm_dpo/delta": -0.05495788902044296,
|
|
"fcm_dpo/margin": 1.6322299242019653,
|
|
"fcm_dpo/q_t": 0.3149953782558441,
|
|
"grad_norm": 129.11102294921875,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 0.20541507005691528,
|
|
"logits/rejected": 0.14478695392608643,
|
|
"logps/chosen": -54.65433120727539,
|
|
"logps/ref_chosen": -51.88897705078125,
|
|
"logps/ref_rejected": -73.34864044189453,
|
|
"logps/rejected": -77.74623107910156,
|
|
"loss": 0.8709,
|
|
"margin_dpo/margin_mean": 1.6322304010391235,
|
|
"margin_dpo/margin_std": 1.9500904083251953,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.8117913832199547,
|
|
"fcm_dpo/beta": 0.6352528929710388,
|
|
"fcm_dpo/delta": -0.06024022772908211,
|
|
"fcm_dpo/margin": 1.6563501358032227,
|
|
"fcm_dpo/q_t": 0.32194915413856506,
|
|
"grad_norm": 129.3059844970703,
|
|
"learning_rate": 5.267213693697695e-08,
|
|
"logits/chosen": 0.24240414798259735,
|
|
"logits/rejected": 0.1752084195613861,
|
|
"logps/chosen": -57.34803771972656,
|
|
"logps/ref_chosen": -54.248619079589844,
|
|
"logps/ref_rejected": -94.94343566894531,
|
|
"logps/rejected": -99.69920349121094,
|
|
"loss": 1.0076,
|
|
"margin_dpo/margin_mean": 1.656351089477539,
|
|
"margin_dpo/margin_std": 2.3621954917907715,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.8133030990173847,
|
|
"fcm_dpo/beta": 0.6266754865646362,
|
|
"fcm_dpo/delta": -0.23747026920318604,
|
|
"fcm_dpo/margin": 1.9266445636749268,
|
|
"fcm_dpo/q_t": 0.28824812173843384,
|
|
"grad_norm": 151.20790100097656,
|
|
"learning_rate": 5.1863067244167144e-08,
|
|
"logits/chosen": 0.17140713334083557,
|
|
"logits/rejected": 0.14127670228481293,
|
|
"logps/chosen": -73.17599487304688,
|
|
"logps/ref_chosen": -70.09353637695312,
|
|
"logps/ref_rejected": -79.49833679199219,
|
|
"logps/rejected": -84.5074462890625,
|
|
"loss": 0.8148,
|
|
"margin_dpo/margin_mean": 1.926644206047058,
|
|
"margin_dpo/margin_std": 2.1346817016601562,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.8148148148148148,
|
|
"fcm_dpo/beta": 0.6051099300384521,
|
|
"fcm_dpo/delta": -0.027577966451644897,
|
|
"fcm_dpo/margin": 1.6932862997055054,
|
|
"fcm_dpo/q_t": 0.3206092119216919,
|
|
"grad_norm": 135.66448974609375,
|
|
"learning_rate": 5.105953986729195e-08,
|
|
"logits/chosen": 0.13851284980773926,
|
|
"logits/rejected": 0.08982232213020325,
|
|
"logps/chosen": -64.91091918945312,
|
|
"logps/ref_chosen": -61.93169403076172,
|
|
"logps/ref_rejected": -84.08946228027344,
|
|
"logps/rejected": -88.76197052001953,
|
|
"loss": 0.8703,
|
|
"margin_dpo/margin_mean": 1.6932868957519531,
|
|
"margin_dpo/margin_std": 2.0537302494049072,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.5865040421485901,
|
|
"fcm_dpo/delta": -0.2764972448348999,
|
|
"fcm_dpo/margin": 2.119068145751953,
|
|
"fcm_dpo/q_t": 0.29492413997650146,
|
|
"grad_norm": 142.99563598632812,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 0.1776391565799713,
|
|
"logits/rejected": 0.11434172093868256,
|
|
"logps/chosen": -65.55619812011719,
|
|
"logps/ref_chosen": -62.704254150390625,
|
|
"logps/ref_rejected": -95.63597106933594,
|
|
"logps/rejected": -100.60699462890625,
|
|
"loss": 0.841,
|
|
"margin_dpo/margin_mean": 2.1190683841705322,
|
|
"margin_dpo/margin_std": 2.443237781524658,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.817838246409675,
|
|
"fcm_dpo/beta": 0.5594385266304016,
|
|
"fcm_dpo/delta": -0.11497347056865692,
|
|
"fcm_dpo/margin": 1.9675724506378174,
|
|
"fcm_dpo/q_t": 0.3052162230014801,
|
|
"grad_norm": 126.23677062988281,
|
|
"learning_rate": 4.9469201811239035e-08,
|
|
"logits/chosen": 0.12904441356658936,
|
|
"logits/rejected": 0.12872368097305298,
|
|
"logps/chosen": -65.19651794433594,
|
|
"logps/ref_chosen": -62.48084259033203,
|
|
"logps/ref_rejected": -57.55541229248047,
|
|
"logps/rejected": -62.23865509033203,
|
|
"loss": 0.8973,
|
|
"margin_dpo/margin_mean": 1.967572569847107,
|
|
"margin_dpo/margin_std": 2.4288015365600586,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.8193499622071051,
|
|
"fcm_dpo/beta": 0.5571799278259277,
|
|
"fcm_dpo/delta": -0.033925510942935944,
|
|
"fcm_dpo/margin": 1.8493754863739014,
|
|
"fcm_dpo/q_t": 0.3220483660697937,
|
|
"grad_norm": 113.93995666503906,
|
|
"learning_rate": 4.868243561723534e-08,
|
|
"logits/chosen": 0.2040795087814331,
|
|
"logits/rejected": 0.1579345464706421,
|
|
"logps/chosen": -52.065277099609375,
|
|
"logps/ref_chosen": -49.454891204833984,
|
|
"logps/ref_rejected": -65.33275604248047,
|
|
"logps/rejected": -69.79252624511719,
|
|
"loss": 0.9384,
|
|
"margin_dpo/margin_mean": 1.8493754863739014,
|
|
"margin_dpo/margin_std": 2.4480319023132324,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.8208616780045351,
|
|
"fcm_dpo/beta": 0.5707334280014038,
|
|
"fcm_dpo/delta": 0.13935577869415283,
|
|
"fcm_dpo/margin": 1.527040958404541,
|
|
"fcm_dpo/q_t": 0.33425813913345337,
|
|
"grad_norm": 109.5389175415039,
|
|
"learning_rate": 4.790130070827028e-08,
|
|
"logits/chosen": 0.1735750138759613,
|
|
"logits/rejected": 0.10716632753610611,
|
|
"logps/chosen": -53.960105895996094,
|
|
"logps/ref_chosen": -51.100860595703125,
|
|
"logps/ref_rejected": -76.06130981445312,
|
|
"logps/rejected": -80.44760131835938,
|
|
"loss": 0.9579,
|
|
"margin_dpo/margin_mean": 1.5270410776138306,
|
|
"margin_dpo/margin_std": 2.048778533935547,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.8223733938019653,
|
|
"fcm_dpo/beta": 0.5578969717025757,
|
|
"fcm_dpo/delta": -0.18755921721458435,
|
|
"fcm_dpo/margin": 2.0900115966796875,
|
|
"fcm_dpo/q_t": 0.3165471851825714,
|
|
"grad_norm": 130.18556213378906,
|
|
"learning_rate": 4.7125818934366454e-08,
|
|
"logits/chosen": 0.14418599009513855,
|
|
"logits/rejected": 0.09046932309865952,
|
|
"logps/chosen": -63.256656646728516,
|
|
"logps/ref_chosen": -60.2772331237793,
|
|
"logps/ref_rejected": -88.40553283691406,
|
|
"logps/rejected": -93.4749755859375,
|
|
"loss": 0.9465,
|
|
"margin_dpo/margin_mean": 2.0900115966796875,
|
|
"margin_dpo/margin_std": 2.8377792835235596,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.5586047172546387,
|
|
"fcm_dpo/delta": 0.11241482198238373,
|
|
"fcm_dpo/margin": 1.607243299484253,
|
|
"fcm_dpo/q_t": 0.3465924859046936,
|
|
"grad_norm": 141.80880737304688,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 0.1261473000049591,
|
|
"logits/rejected": 0.07930372655391693,
|
|
"logps/chosen": -64.5820541381836,
|
|
"logps/ref_chosen": -61.61524963378906,
|
|
"logps/ref_rejected": -78.71266174316406,
|
|
"logps/rejected": -83.28670501708984,
|
|
"loss": 1.0031,
|
|
"margin_dpo/margin_mean": 1.6072428226470947,
|
|
"margin_dpo/margin_std": 2.3972063064575195,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"fcm_dpo/beta": 0.5760623216629028,
|
|
"fcm_dpo/delta": 0.1294553279876709,
|
|
"fcm_dpo/margin": 1.5299469232559204,
|
|
"fcm_dpo/q_t": 0.3359874188899994,
|
|
"grad_norm": 148.13609313964844,
|
|
"learning_rate": 4.559190140057428e-08,
|
|
"logits/chosen": 0.17436569929122925,
|
|
"logits/rejected": 0.15731996297836304,
|
|
"logps/chosen": -62.12708282470703,
|
|
"logps/ref_chosen": -59.313262939453125,
|
|
"logps/ref_rejected": -64.73631286621094,
|
|
"logps/rejected": -69.080078125,
|
|
"loss": 1.0087,
|
|
"margin_dpo/margin_mean": 1.52994704246521,
|
|
"margin_dpo/margin_std": 2.212397813796997,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8269085411942555,
|
|
"fcm_dpo/beta": 0.5641611814498901,
|
|
"fcm_dpo/delta": -0.1594116985797882,
|
|
"fcm_dpo/margin": 2.0226144790649414,
|
|
"fcm_dpo/q_t": 0.2903903126716614,
|
|
"grad_norm": 107.82057189941406,
|
|
"learning_rate": 4.483350854765672e-08,
|
|
"logits/chosen": 0.1362178921699524,
|
|
"logits/rejected": 0.08240213245153427,
|
|
"logps/chosen": -57.526573181152344,
|
|
"logps/ref_chosen": -54.97674560546875,
|
|
"logps/ref_rejected": -75.35922241210938,
|
|
"logps/rejected": -79.9316635131836,
|
|
"loss": 0.8307,
|
|
"margin_dpo/margin_mean": 2.0226151943206787,
|
|
"margin_dpo/margin_std": 2.2596964836120605,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8284202569916855,
|
|
"fcm_dpo/beta": 0.5807280540466309,
|
|
"fcm_dpo/delta": 0.16467252373695374,
|
|
"fcm_dpo/margin": 1.4542980194091797,
|
|
"fcm_dpo/q_t": 0.3473031222820282,
|
|
"grad_norm": 137.8083038330078,
|
|
"learning_rate": 4.4080854642541826e-08,
|
|
"logits/chosen": 0.09831206500530243,
|
|
"logits/rejected": 0.04538644477725029,
|
|
"logps/chosen": -66.22586822509766,
|
|
"logps/ref_chosen": -63.21067428588867,
|
|
"logps/ref_rejected": -81.23347473144531,
|
|
"logps/rejected": -85.70297241210938,
|
|
"loss": 0.9561,
|
|
"margin_dpo/margin_mean": 1.4542980194091797,
|
|
"margin_dpo/margin_std": 1.9517710208892822,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8299319727891157,
|
|
"fcm_dpo/beta": 0.5972309708595276,
|
|
"fcm_dpo/delta": 0.16186922788619995,
|
|
"fcm_dpo/margin": 1.4225797653198242,
|
|
"fcm_dpo/q_t": 0.35338258743286133,
|
|
"grad_norm": 165.7979736328125,
|
|
"learning_rate": 4.333396073857723e-08,
|
|
"logits/chosen": 0.2513388395309448,
|
|
"logits/rejected": 0.2025221437215805,
|
|
"logps/chosen": -67.16740417480469,
|
|
"logps/ref_chosen": -64.27351379394531,
|
|
"logps/ref_rejected": -92.31663513183594,
|
|
"logps/rejected": -96.63310241699219,
|
|
"loss": 1.0582,
|
|
"margin_dpo/margin_mean": 1.4225800037384033,
|
|
"margin_dpo/margin_std": 2.26432466506958,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.6041165590286255,
|
|
"fcm_dpo/delta": 0.15158365666866302,
|
|
"fcm_dpo/margin": 1.4242266416549683,
|
|
"fcm_dpo/q_t": 0.3457239270210266,
|
|
"grad_norm": 118.26493072509766,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 0.17192748188972473,
|
|
"logits/rejected": 0.14534920454025269,
|
|
"logps/chosen": -59.13628387451172,
|
|
"logps/ref_chosen": -56.230438232421875,
|
|
"logps/ref_rejected": -62.59788513183594,
|
|
"logps/rejected": -66.92796325683594,
|
|
"loss": 1.0043,
|
|
"margin_dpo/margin_mean": 1.4242260456085205,
|
|
"margin_dpo/margin_std": 2.080674171447754,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8329554043839759,
|
|
"fcm_dpo/beta": 0.6244951486587524,
|
|
"fcm_dpo/delta": 0.035569630563259125,
|
|
"fcm_dpo/margin": 1.5473127365112305,
|
|
"fcm_dpo/q_t": 0.33668047189712524,
|
|
"grad_norm": 132.6877899169922,
|
|
"learning_rate": 4.1857536341307176e-08,
|
|
"logits/chosen": 0.17000985145568848,
|
|
"logits/rejected": 0.14413632452487946,
|
|
"logps/chosen": -70.88568878173828,
|
|
"logps/ref_chosen": -67.74720764160156,
|
|
"logps/ref_rejected": -87.04285430908203,
|
|
"logps/rejected": -91.72865295410156,
|
|
"loss": 0.9498,
|
|
"margin_dpo/margin_mean": 1.5473123788833618,
|
|
"margin_dpo/margin_std": 2.1508963108062744,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8344671201814059,
|
|
"fcm_dpo/beta": 0.6188427209854126,
|
|
"fcm_dpo/delta": -0.11111941188573837,
|
|
"fcm_dpo/margin": 1.7723690271377563,
|
|
"fcm_dpo/q_t": 0.2986357808113098,
|
|
"grad_norm": 139.71243286132812,
|
|
"learning_rate": 4.112804714676593e-08,
|
|
"logits/chosen": 0.14941178262233734,
|
|
"logits/rejected": 0.10768507421016693,
|
|
"logps/chosen": -65.74885559082031,
|
|
"logps/ref_chosen": -62.92625427246094,
|
|
"logps/ref_rejected": -82.98365783691406,
|
|
"logps/rejected": -87.57861328125,
|
|
"loss": 0.887,
|
|
"margin_dpo/margin_mean": 1.7723690271377563,
|
|
"margin_dpo/margin_std": 2.1054043769836426,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8359788359788359,
|
|
"fcm_dpo/beta": 0.5973865985870361,
|
|
"fcm_dpo/delta": -0.033724166452884674,
|
|
"fcm_dpo/margin": 1.7203757762908936,
|
|
"fcm_dpo/q_t": 0.3467303514480591,
|
|
"grad_norm": 141.0452880859375,
|
|
"learning_rate": 4.0404400549748144e-08,
|
|
"logits/chosen": 0.1345456838607788,
|
|
"logits/rejected": 0.05725400522351265,
|
|
"logps/chosen": -59.39005661010742,
|
|
"logps/ref_chosen": -56.038490295410156,
|
|
"logps/ref_rejected": -84.48454284667969,
|
|
"logps/rejected": -89.55648803710938,
|
|
"loss": 1.1217,
|
|
"margin_dpo/margin_mean": 1.720375657081604,
|
|
"margin_dpo/margin_std": 2.994457721710205,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8374905517762661,
|
|
"fcm_dpo/beta": 0.61543208360672,
|
|
"fcm_dpo/delta": 0.10628563910722733,
|
|
"fcm_dpo/margin": 1.4661433696746826,
|
|
"fcm_dpo/q_t": 0.3466363847255707,
|
|
"grad_norm": 153.4082489013672,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 0.14405734837055206,
|
|
"logits/rejected": 0.12058596312999725,
|
|
"logps/chosen": -67.53119659423828,
|
|
"logps/ref_chosen": -64.53059387207031,
|
|
"logps/ref_rejected": -71.2155990600586,
|
|
"logps/rejected": -75.6823501586914,
|
|
"loss": 1.032,
|
|
"margin_dpo/margin_mean": 1.4661433696746826,
|
|
"margin_dpo/margin_std": 2.351094961166382,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.6489007472991943,
|
|
"fcm_dpo/delta": 0.20813655853271484,
|
|
"fcm_dpo/margin": 1.2255511283874512,
|
|
"fcm_dpo/q_t": 0.3576173484325409,
|
|
"grad_norm": 182.52911376953125,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 0.15571804344654083,
|
|
"logits/rejected": 0.12670589983463287,
|
|
"logps/chosen": -70.10320281982422,
|
|
"logps/ref_chosen": -66.65191650390625,
|
|
"logps/ref_rejected": -68.6667251586914,
|
|
"logps/rejected": -73.34355163574219,
|
|
"loss": 1.1729,
|
|
"margin_dpo/margin_mean": 1.225550651550293,
|
|
"margin_dpo/margin_std": 2.238006591796875,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8405139833711263,
|
|
"fcm_dpo/beta": 0.6440955400466919,
|
|
"fcm_dpo/delta": 0.016805479303002357,
|
|
"fcm_dpo/margin": 1.5289926528930664,
|
|
"fcm_dpo/q_t": 0.34293437004089355,
|
|
"grad_norm": 150.5496063232422,
|
|
"learning_rate": 3.826871794280192e-08,
|
|
"logits/chosen": 0.17429228127002716,
|
|
"logits/rejected": 0.13041889667510986,
|
|
"logps/chosen": -56.17012405395508,
|
|
"logps/ref_chosen": -52.832366943359375,
|
|
"logps/ref_rejected": -64.49044036865234,
|
|
"logps/rejected": -69.35718536376953,
|
|
"loss": 1.0381,
|
|
"margin_dpo/margin_mean": 1.5289928913116455,
|
|
"margin_dpo/margin_std": 2.3977890014648438,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8420256991685563,
|
|
"fcm_dpo/beta": 0.5984865427017212,
|
|
"fcm_dpo/delta": -0.35813382267951965,
|
|
"fcm_dpo/margin": 2.1672024726867676,
|
|
"fcm_dpo/q_t": 0.2816886305809021,
|
|
"grad_norm": 126.752197265625,
|
|
"learning_rate": 3.756864251262143e-08,
|
|
"logits/chosen": 0.21231526136398315,
|
|
"logits/rejected": 0.1561964750289917,
|
|
"logps/chosen": -58.17803955078125,
|
|
"logps/ref_chosen": -55.03598403930664,
|
|
"logps/ref_rejected": -75.80644989013672,
|
|
"logps/rejected": -81.11570739746094,
|
|
"loss": 0.7731,
|
|
"margin_dpo/margin_mean": 2.1672027111053467,
|
|
"margin_dpo/margin_std": 2.3840556144714355,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8435374149659864,
|
|
"fcm_dpo/beta": 0.5904332399368286,
|
|
"fcm_dpo/delta": -0.06778506934642792,
|
|
"fcm_dpo/margin": 1.794862151145935,
|
|
"fcm_dpo/q_t": 0.32028087973594666,
|
|
"grad_norm": 146.46681213378906,
|
|
"learning_rate": 3.687450924416341e-08,
|
|
"logits/chosen": 0.22109848260879517,
|
|
"logits/rejected": 0.17496977746486664,
|
|
"logps/chosen": -66.06083679199219,
|
|
"logps/ref_chosen": -63.226348876953125,
|
|
"logps/ref_rejected": -91.46881866455078,
|
|
"logps/rejected": -96.09817504882812,
|
|
"loss": 0.9082,
|
|
"margin_dpo/margin_mean": 1.7948615550994873,
|
|
"margin_dpo/margin_std": 2.3338146209716797,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8450491307634165,
|
|
"fcm_dpo/beta": 0.5790784358978271,
|
|
"fcm_dpo/delta": -0.06187023967504501,
|
|
"fcm_dpo/margin": 1.8165602684020996,
|
|
"fcm_dpo/q_t": 0.33412617444992065,
|
|
"grad_norm": 138.7181854248047,
|
|
"learning_rate": 3.6186337553827743e-08,
|
|
"logits/chosen": 0.11689116060733795,
|
|
"logits/rejected": 0.06340146064758301,
|
|
"logps/chosen": -64.47853088378906,
|
|
"logps/ref_chosen": -61.521644592285156,
|
|
"logps/ref_rejected": -82.83859252929688,
|
|
"logps/rejected": -87.61204528808594,
|
|
"loss": 1.0151,
|
|
"margin_dpo/margin_mean": 1.8165605068206787,
|
|
"margin_dpo/margin_std": 2.6852447986602783,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.6011730432510376,
|
|
"fcm_dpo/delta": 0.08525849133729935,
|
|
"fcm_dpo/margin": 1.5243772268295288,
|
|
"fcm_dpo/q_t": 0.345813125371933,
|
|
"grad_norm": 146.03248596191406,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 0.16141119599342346,
|
|
"logits/rejected": 0.13059382140636444,
|
|
"logps/chosen": -63.782081604003906,
|
|
"logps/ref_chosen": -60.64122009277344,
|
|
"logps/ref_rejected": -78.75474548339844,
|
|
"logps/rejected": -83.41998291015625,
|
|
"loss": 1.0243,
|
|
"margin_dpo/margin_mean": 1.5243771076202393,
|
|
"margin_dpo/margin_std": 2.256977081298828,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8480725623582767,
|
|
"fcm_dpo/beta": 0.5956799983978271,
|
|
"fcm_dpo/delta": 0.0049747563898563385,
|
|
"fcm_dpo/margin": 1.6711280345916748,
|
|
"fcm_dpo/q_t": 0.3338923752307892,
|
|
"grad_norm": 122.3641128540039,
|
|
"learning_rate": 3.482795573879241e-08,
|
|
"logits/chosen": 0.15920129418373108,
|
|
"logits/rejected": 0.13256219029426575,
|
|
"logps/chosen": -65.13980102539062,
|
|
"logps/ref_chosen": -62.49859619140625,
|
|
"logps/ref_rejected": -78.72064208984375,
|
|
"logps/rejected": -83.03296661376953,
|
|
"loss": 0.954,
|
|
"margin_dpo/margin_mean": 1.671128273010254,
|
|
"margin_dpo/margin_std": 2.351602077484131,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8495842781557067,
|
|
"fcm_dpo/beta": 0.5753499269485474,
|
|
"fcm_dpo/delta": -0.1565413773059845,
|
|
"fcm_dpo/margin": 1.973330020904541,
|
|
"fcm_dpo/q_t": 0.3080880045890808,
|
|
"grad_norm": 128.77142333984375,
|
|
"learning_rate": 3.415778361095226e-08,
|
|
"logits/chosen": 0.20189374685287476,
|
|
"logits/rejected": 0.17044463753700256,
|
|
"logps/chosen": -77.8094482421875,
|
|
"logps/ref_chosen": -74.78173828125,
|
|
"logps/ref_rejected": -92.63499450683594,
|
|
"logps/rejected": -97.63603210449219,
|
|
"loss": 0.8933,
|
|
"margin_dpo/margin_mean": 1.9733293056488037,
|
|
"margin_dpo/margin_std": 2.4358203411102295,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8510959939531368,
|
|
"fcm_dpo/beta": 0.5790232419967651,
|
|
"fcm_dpo/delta": 0.03163836523890495,
|
|
"fcm_dpo/margin": 1.6777245998382568,
|
|
"fcm_dpo/q_t": 0.3400580883026123,
|
|
"grad_norm": 139.59011840820312,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": 0.2000666856765747,
|
|
"logits/rejected": 0.15850435197353363,
|
|
"logps/chosen": -53.24721145629883,
|
|
"logps/ref_chosen": -50.19850158691406,
|
|
"logps/ref_rejected": -66.76687622070312,
|
|
"logps/rejected": -71.4933090209961,
|
|
"loss": 1.0943,
|
|
"margin_dpo/margin_mean": 1.677725076675415,
|
|
"margin_dpo/margin_std": 2.819387912750244,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8526077097505669,
|
|
"fcm_dpo/beta": 0.5817031264305115,
|
|
"fcm_dpo/delta": -0.0036928579211235046,
|
|
"fcm_dpo/margin": 1.7244809865951538,
|
|
"fcm_dpo/q_t": 0.3220234513282776,
|
|
"grad_norm": 143.81246948242188,
|
|
"learning_rate": 3.283557064487785e-08,
|
|
"logits/chosen": 0.1359567940235138,
|
|
"logits/rejected": 0.10976006835699081,
|
|
"logps/chosen": -58.42416000366211,
|
|
"logps/ref_chosen": -55.7408447265625,
|
|
"logps/ref_rejected": -74.82323455810547,
|
|
"logps/rejected": -79.23103332519531,
|
|
"loss": 0.9548,
|
|
"margin_dpo/margin_mean": 1.7244811058044434,
|
|
"margin_dpo/margin_std": 2.30587100982666,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.5901836156845093,
|
|
"fcm_dpo/delta": 0.04009624570608139,
|
|
"fcm_dpo/margin": 1.627647876739502,
|
|
"fcm_dpo/q_t": 0.32860618829727173,
|
|
"grad_norm": 144.67605590820312,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 0.16707636415958405,
|
|
"logits/rejected": 0.12742547690868378,
|
|
"logps/chosen": -61.788658142089844,
|
|
"logps/ref_chosen": -58.33738327026367,
|
|
"logps/ref_rejected": -78.31776428222656,
|
|
"logps/rejected": -83.39668273925781,
|
|
"loss": 0.9857,
|
|
"margin_dpo/margin_mean": 1.627647876739502,
|
|
"margin_dpo/margin_std": 2.2914857864379883,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8556311413454271,
|
|
"fcm_dpo/beta": 0.6040189266204834,
|
|
"fcm_dpo/delta": 0.15839658677577972,
|
|
"fcm_dpo/margin": 1.4104197025299072,
|
|
"fcm_dpo/q_t": 0.3656797707080841,
|
|
"grad_norm": 174.7579345703125,
|
|
"learning_rate": 3.1537655732553764e-08,
|
|
"logits/chosen": 0.1992546021938324,
|
|
"logits/rejected": 0.1789197027683258,
|
|
"logps/chosen": -74.33102416992188,
|
|
"logps/ref_chosen": -71.22373962402344,
|
|
"logps/ref_rejected": -71.11601257324219,
|
|
"logps/rejected": -75.63371276855469,
|
|
"loss": 1.2269,
|
|
"margin_dpo/margin_mean": 1.4104200601577759,
|
|
"margin_dpo/margin_std": 2.8447115421295166,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"fcm_dpo/beta": 0.579740047454834,
|
|
"fcm_dpo/delta": -0.17305630445480347,
|
|
"fcm_dpo/margin": 1.978826880455017,
|
|
"fcm_dpo/q_t": 0.3130984902381897,
|
|
"grad_norm": 123.3370132446289,
|
|
"learning_rate": 3.089785553471233e-08,
|
|
"logits/chosen": 0.1904851794242859,
|
|
"logits/rejected": 0.11687320470809937,
|
|
"logps/chosen": -55.760459899902344,
|
|
"logps/ref_chosen": -52.669273376464844,
|
|
"logps/ref_rejected": -74.34785461425781,
|
|
"logps/rejected": -79.41786193847656,
|
|
"loss": 0.8807,
|
|
"margin_dpo/margin_mean": 1.9788269996643066,
|
|
"margin_dpo/margin_std": 2.6479549407958984,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8586545729402872,
|
|
"fcm_dpo/beta": 0.5765559673309326,
|
|
"fcm_dpo/delta": -0.18778733909130096,
|
|
"fcm_dpo/margin": 2.0179710388183594,
|
|
"fcm_dpo/q_t": 0.29743558168411255,
|
|
"grad_norm": 112.51110076904297,
|
|
"learning_rate": 3.026418409484513e-08,
|
|
"logits/chosen": 0.20747870206832886,
|
|
"logits/rejected": 0.14340844750404358,
|
|
"logps/chosen": -54.99408721923828,
|
|
"logps/ref_chosen": -52.178001403808594,
|
|
"logps/ref_rejected": -85.8277587890625,
|
|
"logps/rejected": -90.66182708740234,
|
|
"loss": 0.8043,
|
|
"margin_dpo/margin_mean": 2.017970561981201,
|
|
"margin_dpo/margin_std": 2.1505703926086426,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8601662887377173,
|
|
"fcm_dpo/beta": 0.5608316659927368,
|
|
"fcm_dpo/delta": 0.08226889371871948,
|
|
"fcm_dpo/margin": 1.6459429264068604,
|
|
"fcm_dpo/q_t": 0.32129180431365967,
|
|
"grad_norm": 118.24040222167969,
|
|
"learning_rate": 2.963665913810451e-08,
|
|
"logits/chosen": 0.10791079699993134,
|
|
"logits/rejected": 0.08581716567277908,
|
|
"logps/chosen": -65.56758117675781,
|
|
"logps/ref_chosen": -62.649261474609375,
|
|
"logps/ref_rejected": -75.4298324584961,
|
|
"logps/rejected": -79.99409484863281,
|
|
"loss": 0.9399,
|
|
"margin_dpo/margin_mean": 1.6459429264068604,
|
|
"margin_dpo/margin_std": 2.062713623046875,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.552453875541687,
|
|
"fcm_dpo/delta": -0.3056313693523407,
|
|
"fcm_dpo/margin": 2.29406476020813,
|
|
"fcm_dpo/q_t": 0.271054744720459,
|
|
"grad_norm": 103.41246795654297,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 0.12330185621976852,
|
|
"logits/rejected": 0.05149267241358757,
|
|
"logps/chosen": -52.63910675048828,
|
|
"logps/ref_chosen": -50.04179382324219,
|
|
"logps/ref_rejected": -78.27146911621094,
|
|
"logps/rejected": -83.162841796875,
|
|
"loss": 0.7286,
|
|
"margin_dpo/margin_mean": 2.294064521789551,
|
|
"margin_dpo/margin_std": 2.158341884613037,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8631897203325775,
|
|
"fcm_dpo/beta": 0.5561559200286865,
|
|
"fcm_dpo/delta": 0.2247203141450882,
|
|
"fcm_dpo/margin": 1.4249439239501953,
|
|
"fcm_dpo/q_t": 0.35770949721336365,
|
|
"grad_norm": 122.92935180664062,
|
|
"learning_rate": 2.840011871446962e-08,
|
|
"logits/chosen": 0.1378299742937088,
|
|
"logits/rejected": 0.10836352407932281,
|
|
"logps/chosen": -56.67723083496094,
|
|
"logps/ref_chosen": -53.65681457519531,
|
|
"logps/ref_rejected": -66.13298034667969,
|
|
"logps/rejected": -70.57833862304688,
|
|
"loss": 1.0635,
|
|
"margin_dpo/margin_mean": 1.4249444007873535,
|
|
"margin_dpo/margin_std": 2.3282651901245117,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8647014361300076,
|
|
"fcm_dpo/beta": 0.5793824195861816,
|
|
"fcm_dpo/delta": 0.19740980863571167,
|
|
"fcm_dpo/margin": 1.4121860265731812,
|
|
"fcm_dpo/q_t": 0.3518349528312683,
|
|
"grad_norm": 155.68394470214844,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 0.18134906888008118,
|
|
"logits/rejected": 0.20107844471931458,
|
|
"logps/chosen": -77.97372436523438,
|
|
"logps/ref_chosen": -74.81792449951172,
|
|
"logps/ref_rejected": -65.88681030273438,
|
|
"logps/rejected": -70.45478820800781,
|
|
"loss": 1.0227,
|
|
"margin_dpo/margin_mean": 1.4121863842010498,
|
|
"margin_dpo/margin_std": 2.1890487670898438,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8662131519274376,
|
|
"fcm_dpo/beta": 0.5722247362136841,
|
|
"fcm_dpo/delta": -0.10332206636667252,
|
|
"fcm_dpo/margin": 1.9067519903182983,
|
|
"fcm_dpo/q_t": 0.3288191854953766,
|
|
"grad_norm": 141.57550048828125,
|
|
"learning_rate": 2.718837261761528e-08,
|
|
"logits/chosen": 0.16030460596084595,
|
|
"logits/rejected": 0.12534594535827637,
|
|
"logps/chosen": -71.95037078857422,
|
|
"logps/ref_chosen": -68.72564697265625,
|
|
"logps/ref_rejected": -88.16201782226562,
|
|
"logps/rejected": -93.29348754882812,
|
|
"loss": 1.0156,
|
|
"margin_dpo/margin_mean": 1.9067527055740356,
|
|
"margin_dpo/margin_std": 2.9433274269104004,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8677248677248677,
|
|
"fcm_dpo/beta": 0.5721937417984009,
|
|
"fcm_dpo/delta": -0.07496052980422974,
|
|
"fcm_dpo/margin": 1.8618470430374146,
|
|
"fcm_dpo/q_t": 0.3078283965587616,
|
|
"grad_norm": 114.2187271118164,
|
|
"learning_rate": 2.659183991914696e-08,
|
|
"logits/chosen": 0.2320360541343689,
|
|
"logits/rejected": 0.17432260513305664,
|
|
"logps/chosen": -59.44536209106445,
|
|
"logps/ref_chosen": -56.31340026855469,
|
|
"logps/ref_rejected": -83.91553497314453,
|
|
"logps/rejected": -88.90934753417969,
|
|
"loss": 0.8193,
|
|
"margin_dpo/margin_mean": 1.8618476390838623,
|
|
"margin_dpo/margin_std": 2.011713981628418,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.5716170072555542,
|
|
"fcm_dpo/delta": 0.18006719648838043,
|
|
"fcm_dpo/margin": 1.45901620388031,
|
|
"fcm_dpo/q_t": 0.3629787862300873,
|
|
"grad_norm": 139.16409301757812,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 0.24198350310325623,
|
|
"logits/rejected": 0.18358173966407776,
|
|
"logps/chosen": -67.59683990478516,
|
|
"logps/ref_chosen": -64.5841293334961,
|
|
"logps/ref_rejected": -93.47034454345703,
|
|
"logps/rejected": -97.94207000732422,
|
|
"loss": 1.1015,
|
|
"margin_dpo/margin_mean": 1.4590164422988892,
|
|
"margin_dpo/margin_std": 2.4919562339782715,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8707482993197279,
|
|
"fcm_dpo/beta": 0.5859131813049316,
|
|
"fcm_dpo/delta": 0.05836522579193115,
|
|
"fcm_dpo/margin": 1.6160252094268799,
|
|
"fcm_dpo/q_t": 0.34040236473083496,
|
|
"grad_norm": 173.32106018066406,
|
|
"learning_rate": 2.5417538653170754e-08,
|
|
"logits/chosen": 0.18478666245937347,
|
|
"logits/rejected": 0.1120123565196991,
|
|
"logps/chosen": -55.915428161621094,
|
|
"logps/ref_chosen": -53.28052520751953,
|
|
"logps/ref_rejected": -84.2000503540039,
|
|
"logps/rejected": -88.45097351074219,
|
|
"loss": 1.0198,
|
|
"margin_dpo/margin_mean": 1.6160247325897217,
|
|
"margin_dpo/margin_std": 2.3769047260284424,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.872260015117158,
|
|
"fcm_dpo/beta": 0.6100517511367798,
|
|
"fcm_dpo/delta": 0.2396540343761444,
|
|
"fcm_dpo/margin": 1.2779208421707153,
|
|
"fcm_dpo/q_t": 0.36663320660591125,
|
|
"grad_norm": 141.51870727539062,
|
|
"learning_rate": 2.4839802933393607e-08,
|
|
"logits/chosen": 0.15433327853679657,
|
|
"logits/rejected": 0.13624653220176697,
|
|
"logps/chosen": -65.23735046386719,
|
|
"logps/ref_chosen": -62.32468795776367,
|
|
"logps/ref_rejected": -67.300537109375,
|
|
"logps/rejected": -71.49111938476562,
|
|
"loss": 1.1174,
|
|
"margin_dpo/margin_mean": 1.2779215574264526,
|
|
"margin_dpo/margin_std": 2.3165364265441895,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.873771730914588,
|
|
"fcm_dpo/beta": 0.6398091316223145,
|
|
"fcm_dpo/delta": 0.2764911949634552,
|
|
"fcm_dpo/margin": 1.1640191078186035,
|
|
"fcm_dpo/q_t": 0.3820345997810364,
|
|
"grad_norm": 158.93179321289062,
|
|
"learning_rate": 2.4268365428344733e-08,
|
|
"logits/chosen": 0.20606505870819092,
|
|
"logits/rejected": 0.1820131242275238,
|
|
"logps/chosen": -59.829124450683594,
|
|
"logps/ref_chosen": -56.65557861328125,
|
|
"logps/ref_rejected": -68.21835327148438,
|
|
"logps/rejected": -72.55591583251953,
|
|
"loss": 1.1219,
|
|
"margin_dpo/margin_mean": 1.1640193462371826,
|
|
"margin_dpo/margin_std": 2.143388271331787,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8752834467120182,
|
|
"fcm_dpo/beta": 0.6147720217704773,
|
|
"fcm_dpo/delta": -0.3748418092727661,
|
|
"fcm_dpo/margin": 2.1486082077026367,
|
|
"fcm_dpo/q_t": 0.2804148197174072,
|
|
"grad_norm": 129.94529724121094,
|
|
"learning_rate": 2.3703242122359357e-08,
|
|
"logits/chosen": 0.1448242962360382,
|
|
"logits/rejected": 0.1141422688961029,
|
|
"logps/chosen": -59.766502380371094,
|
|
"logps/ref_chosen": -56.809661865234375,
|
|
"logps/ref_rejected": -68.09613037109375,
|
|
"logps/rejected": -73.20157623291016,
|
|
"loss": 0.779,
|
|
"margin_dpo/margin_mean": 2.1486077308654785,
|
|
"margin_dpo/margin_std": 2.415778636932373,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.6254961490631104,
|
|
"fcm_dpo/delta": 0.14091333746910095,
|
|
"fcm_dpo/margin": 1.389689326286316,
|
|
"fcm_dpo/q_t": 0.3568766117095947,
|
|
"grad_norm": 135.1801300048828,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 0.1448507308959961,
|
|
"logits/rejected": 0.10793846845626831,
|
|
"logps/chosen": -60.63453674316406,
|
|
"logps/ref_chosen": -57.70011520385742,
|
|
"logps/ref_rejected": -77.90664672851562,
|
|
"logps/rejected": -82.23075866699219,
|
|
"loss": 1.1175,
|
|
"margin_dpo/margin_mean": 1.3896892070770264,
|
|
"margin_dpo/margin_std": 2.38545823097229,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8783068783068783,
|
|
"fcm_dpo/beta": 0.6277990341186523,
|
|
"fcm_dpo/delta": 0.06906095892190933,
|
|
"fcm_dpo/margin": 1.492587924003601,
|
|
"fcm_dpo/q_t": 0.3425326943397522,
|
|
"grad_norm": 166.3179473876953,
|
|
"learning_rate": 2.259200116137039e-08,
|
|
"logits/chosen": 0.22638601064682007,
|
|
"logits/rejected": 0.17741230130195618,
|
|
"logps/chosen": -62.56743240356445,
|
|
"logps/ref_chosen": -59.332359313964844,
|
|
"logps/ref_rejected": -83.64482116699219,
|
|
"logps/rejected": -88.37248229980469,
|
|
"loss": 1.0185,
|
|
"margin_dpo/margin_mean": 1.4925878047943115,
|
|
"margin_dpo/margin_std": 2.3351616859436035,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8798185941043084,
|
|
"fcm_dpo/beta": 0.6253724098205566,
|
|
"fcm_dpo/delta": -0.07393016666173935,
|
|
"fcm_dpo/margin": 1.7041115760803223,
|
|
"fcm_dpo/q_t": 0.31156644225120544,
|
|
"grad_norm": 143.69277954101562,
|
|
"learning_rate": 2.204591459016525e-08,
|
|
"logits/chosen": 0.1830708086490631,
|
|
"logits/rejected": 0.19862952828407288,
|
|
"logps/chosen": -66.9874267578125,
|
|
"logps/ref_chosen": -64.16285705566406,
|
|
"logps/ref_rejected": -58.632896423339844,
|
|
"logps/rejected": -63.161582946777344,
|
|
"loss": 0.8907,
|
|
"margin_dpo/margin_mean": 1.7041112184524536,
|
|
"margin_dpo/margin_std": 2.1104648113250732,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.8813303099017384,
|
|
"fcm_dpo/beta": 0.6326082944869995,
|
|
"fcm_dpo/delta": 0.08823379874229431,
|
|
"fcm_dpo/margin": 1.4541677236557007,
|
|
"fcm_dpo/q_t": 0.35390377044677734,
|
|
"grad_norm": 158.56170654296875,
|
|
"learning_rate": 2.1506204384751064e-08,
|
|
"logits/chosen": 0.27797916531562805,
|
|
"logits/rejected": 0.20349468290805817,
|
|
"logps/chosen": -54.86329650878906,
|
|
"logps/ref_chosen": -51.87239456176758,
|
|
"logps/ref_rejected": -83.86331176757812,
|
|
"logps/rejected": -88.30838012695312,
|
|
"loss": 1.1301,
|
|
"margin_dpo/margin_mean": 1.4541676044464111,
|
|
"margin_dpo/margin_std": 2.6575491428375244,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8828420256991686,
|
|
"fcm_dpo/beta": 0.6200550198554993,
|
|
"fcm_dpo/delta": -0.1539473831653595,
|
|
"fcm_dpo/margin": 1.8320786952972412,
|
|
"fcm_dpo/q_t": 0.32225194573402405,
|
|
"grad_norm": 134.9540252685547,
|
|
"learning_rate": 2.09728856419826e-08,
|
|
"logits/chosen": 0.23410704731941223,
|
|
"logits/rejected": 0.15715327858924866,
|
|
"logps/chosen": -49.28221130371094,
|
|
"logps/ref_chosen": -46.571388244628906,
|
|
"logps/ref_rejected": -80.67969512939453,
|
|
"logps/rejected": -85.22259521484375,
|
|
"loss": 0.9929,
|
|
"margin_dpo/margin_mean": 1.8320791721343994,
|
|
"margin_dpo/margin_std": 2.6820805072784424,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.6162490844726562,
|
|
"fcm_dpo/delta": 0.03570966795086861,
|
|
"fcm_dpo/margin": 1.5696362257003784,
|
|
"fcm_dpo/q_t": 0.3259866535663605,
|
|
"grad_norm": 138.78614807128906,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 0.16352935135364532,
|
|
"logits/rejected": 0.13112275302410126,
|
|
"logps/chosen": -60.831146240234375,
|
|
"logps/ref_chosen": -58.124534606933594,
|
|
"logps/ref_rejected": -79.00538635253906,
|
|
"logps/rejected": -83.28163146972656,
|
|
"loss": 0.9773,
|
|
"margin_dpo/margin_mean": 1.5696361064910889,
|
|
"margin_dpo/margin_std": 2.2082860469818115,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8858654572940288,
|
|
"fcm_dpo/beta": 0.602626621723175,
|
|
"fcm_dpo/delta": -0.15734781324863434,
|
|
"fcm_dpo/margin": 1.8877205848693848,
|
|
"fcm_dpo/q_t": 0.30060410499572754,
|
|
"grad_norm": 121.3906021118164,
|
|
"learning_rate": 1.9925482037469187e-08,
|
|
"logits/chosen": 0.16395865380764008,
|
|
"logits/rejected": 0.12222093343734741,
|
|
"logps/chosen": -57.20876693725586,
|
|
"logps/ref_chosen": -54.10163879394531,
|
|
"logps/ref_rejected": -63.72113037109375,
|
|
"logps/rejected": -68.71598052978516,
|
|
"loss": 0.8797,
|
|
"margin_dpo/margin_mean": 1.8877204656600952,
|
|
"margin_dpo/margin_std": 2.3431496620178223,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8873771730914588,
|
|
"fcm_dpo/beta": 0.6007733941078186,
|
|
"fcm_dpo/delta": 0.015819646418094635,
|
|
"fcm_dpo/margin": 1.6403778791427612,
|
|
"fcm_dpo/q_t": 0.3280307352542877,
|
|
"grad_norm": 161.0563507080078,
|
|
"learning_rate": 1.9411426473854687e-08,
|
|
"logits/chosen": 0.20693828165531158,
|
|
"logits/rejected": 0.18662090599536896,
|
|
"logps/chosen": -66.15540313720703,
|
|
"logps/ref_chosen": -63.41719436645508,
|
|
"logps/ref_rejected": -63.47003936767578,
|
|
"logps/rejected": -67.84861755371094,
|
|
"loss": 1.0373,
|
|
"margin_dpo/margin_mean": 1.6403785943984985,
|
|
"margin_dpo/margin_std": 2.5440006256103516,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"fcm_dpo/beta": 0.5988498330116272,
|
|
"fcm_dpo/delta": -0.019005782902240753,
|
|
"fcm_dpo/margin": 1.6973689794540405,
|
|
"fcm_dpo/q_t": 0.3193732500076294,
|
|
"grad_norm": 139.44850158691406,
|
|
"learning_rate": 1.890382096832699e-08,
|
|
"logits/chosen": 0.2137565314769745,
|
|
"logits/rejected": 0.17403748631477356,
|
|
"logps/chosen": -65.03866577148438,
|
|
"logps/ref_chosen": -62.20103454589844,
|
|
"logps/ref_rejected": -82.10249328613281,
|
|
"logps/rejected": -86.63749694824219,
|
|
"loss": 0.9558,
|
|
"margin_dpo/margin_mean": 1.697368860244751,
|
|
"margin_dpo/margin_std": 2.3547918796539307,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.890400604686319,
|
|
"fcm_dpo/beta": 0.5705356597900391,
|
|
"fcm_dpo/delta": -0.3349605202674866,
|
|
"fcm_dpo/margin": 2.2617807388305664,
|
|
"fcm_dpo/q_t": 0.2769893407821655,
|
|
"grad_norm": 125.83250427246094,
|
|
"learning_rate": 1.840267971970344e-08,
|
|
"logits/chosen": 0.16948330402374268,
|
|
"logits/rejected": 0.14072127640247345,
|
|
"logps/chosen": -59.358924865722656,
|
|
"logps/ref_chosen": -56.71361541748047,
|
|
"logps/ref_rejected": -76.7366943359375,
|
|
"logps/rejected": -81.64378356933594,
|
|
"loss": 0.7735,
|
|
"margin_dpo/margin_mean": 2.2617812156677246,
|
|
"margin_dpo/margin_std": 2.493180274963379,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.5450751781463623,
|
|
"fcm_dpo/delta": -0.2230319082736969,
|
|
"fcm_dpo/margin": 2.1952528953552246,
|
|
"fcm_dpo/q_t": 0.2884211540222168,
|
|
"grad_norm": 120.71878814697266,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.12301607429981232,
|
|
"logits/rejected": 0.09767352789640427,
|
|
"logps/chosen": -69.55258178710938,
|
|
"logps/ref_chosen": -66.5138168334961,
|
|
"logps/ref_rejected": -85.70820617675781,
|
|
"logps/rejected": -90.94223022460938,
|
|
"loss": 0.8243,
|
|
"margin_dpo/margin_mean": 2.1952528953552246,
|
|
"margin_dpo/margin_std": 2.4201831817626953,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8934240362811792,
|
|
"fcm_dpo/beta": 0.5360534191131592,
|
|
"fcm_dpo/delta": 0.06881964951753616,
|
|
"fcm_dpo/margin": 1.7440357208251953,
|
|
"fcm_dpo/q_t": 0.3370535969734192,
|
|
"grad_norm": 125.32640838623047,
|
|
"learning_rate": 1.7419845883949098e-08,
|
|
"logits/chosen": 0.2466006577014923,
|
|
"logits/rejected": 0.1991560012102127,
|
|
"logps/chosen": -63.56512451171875,
|
|
"logps/ref_chosen": -60.697181701660156,
|
|
"logps/ref_rejected": -86.12278747558594,
|
|
"logps/rejected": -90.73477172851562,
|
|
"loss": 0.9852,
|
|
"margin_dpo/margin_mean": 1.7440353631973267,
|
|
"margin_dpo/margin_std": 2.4317829608917236,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.8949357520786092,
|
|
"fcm_dpo/beta": 0.5456318855285645,
|
|
"fcm_dpo/delta": 0.048897288739681244,
|
|
"fcm_dpo/margin": 1.750661849975586,
|
|
"fcm_dpo/q_t": 0.3427332043647766,
|
|
"grad_norm": 132.4198760986328,
|
|
"learning_rate": 1.6938180788793556e-08,
|
|
"logits/chosen": 0.18844714760780334,
|
|
"logits/rejected": 0.10343804955482483,
|
|
"logps/chosen": -54.16868591308594,
|
|
"logps/ref_chosen": -51.237327575683594,
|
|
"logps/ref_rejected": -81.60242462158203,
|
|
"logps/rejected": -86.2844467163086,
|
|
"loss": 0.9699,
|
|
"margin_dpo/margin_mean": 1.750661849975586,
|
|
"margin_dpo/margin_std": 2.502837657928467,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8964474678760394,
|
|
"fcm_dpo/beta": 0.5585802793502808,
|
|
"fcm_dpo/delta": 0.09174495190382004,
|
|
"fcm_dpo/margin": 1.640941858291626,
|
|
"fcm_dpo/q_t": 0.3366505205631256,
|
|
"grad_norm": 122.52076721191406,
|
|
"learning_rate": 1.6463034933723336e-08,
|
|
"logits/chosen": 0.13056322932243347,
|
|
"logits/rejected": 0.061800211668014526,
|
|
"logps/chosen": -44.626976013183594,
|
|
"logps/ref_chosen": -42.08000183105469,
|
|
"logps/ref_rejected": -68.47499084472656,
|
|
"logps/rejected": -72.66290283203125,
|
|
"loss": 1.0262,
|
|
"margin_dpo/margin_mean": 1.640941858291626,
|
|
"margin_dpo/margin_std": 2.4754600524902344,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8979591836734694,
|
|
"fcm_dpo/beta": 0.5791330337524414,
|
|
"fcm_dpo/delta": 0.23276039958000183,
|
|
"fcm_dpo/margin": 1.3571163415908813,
|
|
"fcm_dpo/q_t": 0.36690980195999146,
|
|
"grad_norm": 131.3136749267578,
|
|
"learning_rate": 1.5994421609589385e-08,
|
|
"logits/chosen": 0.12303361296653748,
|
|
"logits/rejected": 0.10529161989688873,
|
|
"logps/chosen": -66.68218994140625,
|
|
"logps/ref_chosen": -63.658668518066406,
|
|
"logps/ref_rejected": -70.35597229003906,
|
|
"logps/rejected": -74.73660278320312,
|
|
"loss": 1.0883,
|
|
"margin_dpo/margin_mean": 1.3571163415908813,
|
|
"margin_dpo/margin_std": 2.378408193588257,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.5620189905166626,
|
|
"fcm_dpo/delta": -0.30020615458488464,
|
|
"fcm_dpo/margin": 2.245738983154297,
|
|
"fcm_dpo/q_t": 0.2832089960575104,
|
|
"grad_norm": 123.73275756835938,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 0.2004849910736084,
|
|
"logits/rejected": 0.12713924050331116,
|
|
"logps/chosen": -59.19725799560547,
|
|
"logps/ref_chosen": -56.21875762939453,
|
|
"logps/ref_rejected": -83.95773315429688,
|
|
"logps/rejected": -89.18197631835938,
|
|
"loss": 0.8647,
|
|
"margin_dpo/margin_mean": 2.245739459991455,
|
|
"margin_dpo/margin_std": 2.714977741241455,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9009826152683296,
|
|
"fcm_dpo/beta": 0.5812000036239624,
|
|
"fcm_dpo/delta": 0.33480995893478394,
|
|
"fcm_dpo/margin": 1.184531569480896,
|
|
"fcm_dpo/q_t": 0.3911210000514984,
|
|
"grad_norm": 142.58895874023438,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": 0.11143651604652405,
|
|
"logits/rejected": 0.11058718711137772,
|
|
"logps/chosen": -71.51290130615234,
|
|
"logps/ref_chosen": -68.48088073730469,
|
|
"logps/ref_rejected": -61.732967376708984,
|
|
"logps/rejected": -65.94952392578125,
|
|
"loss": 1.1753,
|
|
"margin_dpo/margin_mean": 1.1845312118530273,
|
|
"margin_dpo/margin_std": 2.4391579627990723,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.9024943310657596,
|
|
"fcm_dpo/beta": 0.5947533249855042,
|
|
"fcm_dpo/delta": 0.0645606741309166,
|
|
"fcm_dpo/margin": 1.5828521251678467,
|
|
"fcm_dpo/q_t": 0.3390669822692871,
|
|
"grad_norm": 118.60302734375,
|
|
"learning_rate": 1.4627906988186111e-08,
|
|
"logits/chosen": 0.1252235770225525,
|
|
"logits/rejected": 0.09921297430992126,
|
|
"logps/chosen": -51.60657501220703,
|
|
"logps/ref_chosen": -48.85750961303711,
|
|
"logps/ref_rejected": -55.068084716796875,
|
|
"logps/rejected": -59.400001525878906,
|
|
"loss": 0.963,
|
|
"margin_dpo/margin_mean": 1.5828520059585571,
|
|
"margin_dpo/margin_std": 2.2610087394714355,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.9040060468631897,
|
|
"fcm_dpo/beta": 0.6207314729690552,
|
|
"fcm_dpo/delta": 0.23146888613700867,
|
|
"fcm_dpo/margin": 1.2671819925308228,
|
|
"fcm_dpo/q_t": 0.38011521100997925,
|
|
"grad_norm": 151.75205993652344,
|
|
"learning_rate": 1.4185553036259095e-08,
|
|
"logits/chosen": 0.1728542447090149,
|
|
"logits/rejected": 0.11698366701602936,
|
|
"logps/chosen": -62.22666931152344,
|
|
"logps/ref_chosen": -58.88715362548828,
|
|
"logps/ref_rejected": -81.43145751953125,
|
|
"logps/rejected": -86.03816223144531,
|
|
"loss": 1.1674,
|
|
"margin_dpo/margin_mean": 1.2671819925308228,
|
|
"margin_dpo/margin_std": 2.4872426986694336,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.9055177626606198,
|
|
"fcm_dpo/beta": 0.6450425386428833,
|
|
"fcm_dpo/delta": 0.09236402064561844,
|
|
"fcm_dpo/margin": 1.41303288936615,
|
|
"fcm_dpo/q_t": 0.3642594814300537,
|
|
"grad_norm": 166.78338623046875,
|
|
"learning_rate": 1.3749795321332885e-08,
|
|
"logits/chosen": 0.280083566904068,
|
|
"logits/rejected": 0.24155710637569427,
|
|
"logps/chosen": -60.974185943603516,
|
|
"logps/ref_chosen": -57.60719299316406,
|
|
"logps/ref_rejected": -71.80469512939453,
|
|
"logps/rejected": -76.584716796875,
|
|
"loss": 1.1191,
|
|
"margin_dpo/margin_mean": 1.4130332469940186,
|
|
"margin_dpo/margin_std": 2.563547134399414,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.6648072004318237,
|
|
"fcm_dpo/delta": 0.13435859978199005,
|
|
"fcm_dpo/margin": 1.3058445453643799,
|
|
"fcm_dpo/q_t": 0.35756915807724,
|
|
"grad_norm": 156.00218200683594,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 0.2286624014377594,
|
|
"logits/rejected": 0.19494396448135376,
|
|
"logps/chosen": -61.69401550292969,
|
|
"logps/ref_chosen": -58.44231414794922,
|
|
"logps/ref_rejected": -83.64639282226562,
|
|
"logps/rejected": -88.20393371582031,
|
|
"loss": 1.0999,
|
|
"margin_dpo/margin_mean": 1.3058440685272217,
|
|
"margin_dpo/margin_std": 2.190995693206787,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.6568994522094727,
|
|
"eval_logits/chosen": 0.19621142745018005,
|
|
"eval_logits/rejected": 0.15854774415493011,
|
|
"eval_logps/chosen": -77.82337951660156,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -83.91117095947266,
|
|
"eval_loss": 0.5679579377174377,
|
|
"eval_margin_dpo/margin_mean": 1.3982763290405273,
|
|
"eval_margin_dpo/margin_std": 2.4811148643493652,
|
|
"eval_runtime": 37.9915,
|
|
"eval_samples_per_second": 60.619,
|
|
"eval_steps_per_second": 1.895,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.90854119425548,
|
|
"fcm_dpo/beta": 0.6349242925643921,
|
|
"fcm_dpo/delta": -0.1545478105545044,
|
|
"fcm_dpo/margin": 1.7845826148986816,
|
|
"fcm_dpo/q_t": 0.3199031949043274,
|
|
"grad_norm": 141.2012176513672,
|
|
"learning_rate": 1.2898117173950868e-08,
|
|
"logits/chosen": 0.18726052343845367,
|
|
"logits/rejected": 0.13292908668518066,
|
|
"logps/chosen": -58.19509506225586,
|
|
"logps/ref_chosen": -55.59432601928711,
|
|
"logps/ref_rejected": -83.68630981445312,
|
|
"logps/rejected": -88.07167053222656,
|
|
"loss": 0.9784,
|
|
"margin_dpo/margin_mean": 1.7845829725265503,
|
|
"margin_dpo/margin_std": 2.5417721271514893,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.91005291005291,
|
|
"fcm_dpo/beta": 0.6274067759513855,
|
|
"fcm_dpo/delta": -0.15826797485351562,
|
|
"fcm_dpo/margin": 1.8172000646591187,
|
|
"fcm_dpo/q_t": 0.3086685538291931,
|
|
"grad_norm": 139.8549346923828,
|
|
"learning_rate": 1.2482220564763667e-08,
|
|
"logits/chosen": 0.15303432941436768,
|
|
"logits/rejected": 0.11989377439022064,
|
|
"logps/chosen": -58.714149475097656,
|
|
"logps/ref_chosen": -56.349185943603516,
|
|
"logps/ref_rejected": -71.9959716796875,
|
|
"logps/rejected": -76.17813110351562,
|
|
"loss": 0.887,
|
|
"margin_dpo/margin_mean": 1.81719970703125,
|
|
"margin_dpo/margin_std": 2.2619805335998535,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.9115646258503401,
|
|
"fcm_dpo/beta": 0.6145593523979187,
|
|
"fcm_dpo/delta": 0.003954831510782242,
|
|
"fcm_dpo/margin": 1.620744228363037,
|
|
"fcm_dpo/q_t": 0.32022157311439514,
|
|
"grad_norm": 121.14891052246094,
|
|
"learning_rate": 1.2072967838448051e-08,
|
|
"logits/chosen": 0.13416381180286407,
|
|
"logits/rejected": 0.08777206391096115,
|
|
"logps/chosen": -55.47536849975586,
|
|
"logps/ref_chosen": -53.16838836669922,
|
|
"logps/ref_rejected": -73.8604736328125,
|
|
"logps/rejected": -77.78820037841797,
|
|
"loss": 0.9039,
|
|
"margin_dpo/margin_mean": 1.620744228363037,
|
|
"margin_dpo/margin_std": 2.0299124717712402,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.9130763416477702,
|
|
"fcm_dpo/beta": 0.6099786162376404,
|
|
"fcm_dpo/delta": -0.06591600924730301,
|
|
"fcm_dpo/margin": 1.7355551719665527,
|
|
"fcm_dpo/q_t": 0.3229469954967499,
|
|
"grad_norm": 138.081298828125,
|
|
"learning_rate": 1.1670370442682459e-08,
|
|
"logits/chosen": 0.1237378865480423,
|
|
"logits/rejected": 0.11915041506290436,
|
|
"logps/chosen": -75.28321838378906,
|
|
"logps/ref_chosen": -72.64942169189453,
|
|
"logps/ref_rejected": -69.8792724609375,
|
|
"logps/rejected": -74.24861907958984,
|
|
"loss": 0.9893,
|
|
"margin_dpo/margin_mean": 1.7355563640594482,
|
|
"margin_dpo/margin_std": 2.5649847984313965,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.6022388339042664,
|
|
"fcm_dpo/delta": -0.05076969414949417,
|
|
"fcm_dpo/margin": 1.7350056171417236,
|
|
"fcm_dpo/q_t": 0.3227270543575287,
|
|
"grad_norm": 143.35333251953125,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 0.22395864129066467,
|
|
"logits/rejected": 0.17329740524291992,
|
|
"logps/chosen": -64.78287506103516,
|
|
"logps/ref_chosen": -61.61284637451172,
|
|
"logps/ref_rejected": -79.34398651123047,
|
|
"logps/rejected": -84.24903106689453,
|
|
"loss": 0.9715,
|
|
"margin_dpo/margin_mean": 1.7350056171417236,
|
|
"margin_dpo/margin_std": 2.434452772140503,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9160997732426304,
|
|
"fcm_dpo/beta": 0.6224014759063721,
|
|
"fcm_dpo/delta": 0.12130826711654663,
|
|
"fcm_dpo/margin": 1.4212950468063354,
|
|
"fcm_dpo/q_t": 0.35074251890182495,
|
|
"grad_norm": 151.14439392089844,
|
|
"learning_rate": 1.0885186502381016e-08,
|
|
"logits/chosen": 0.19038084149360657,
|
|
"logits/rejected": 0.14078065752983093,
|
|
"logps/chosen": -57.25608825683594,
|
|
"logps/ref_chosen": -54.46424102783203,
|
|
"logps/ref_rejected": -79.62708282470703,
|
|
"logps/rejected": -83.84022521972656,
|
|
"loss": 1.0273,
|
|
"margin_dpo/margin_mean": 1.421295404434204,
|
|
"margin_dpo/margin_std": 2.172295331954956,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.9176114890400605,
|
|
"fcm_dpo/beta": 0.5959450006484985,
|
|
"fcm_dpo/delta": -0.13134704530239105,
|
|
"fcm_dpo/margin": 1.8611319065093994,
|
|
"fcm_dpo/q_t": 0.31175029277801514,
|
|
"grad_norm": 141.19546508789062,
|
|
"learning_rate": 1.0502621921127774e-08,
|
|
"logits/chosen": 0.14506211876869202,
|
|
"logits/rejected": 0.1156059056520462,
|
|
"logps/chosen": -65.67162322998047,
|
|
"logps/ref_chosen": -62.86086654663086,
|
|
"logps/ref_rejected": -72.5501937866211,
|
|
"logps/rejected": -77.22207641601562,
|
|
"loss": 0.9338,
|
|
"margin_dpo/margin_mean": 1.8611321449279785,
|
|
"margin_dpo/margin_std": 2.463554859161377,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.9191232048374905,
|
|
"fcm_dpo/beta": 0.6229555606842041,
|
|
"fcm_dpo/delta": 0.24546337127685547,
|
|
"fcm_dpo/margin": 1.2424054145812988,
|
|
"fcm_dpo/q_t": 0.36555221676826477,
|
|
"grad_norm": 181.87330627441406,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 0.16855892539024353,
|
|
"logits/rejected": 0.12452598661184311,
|
|
"logps/chosen": -66.26856994628906,
|
|
"logps/ref_chosen": -63.18071746826172,
|
|
"logps/ref_rejected": -99.15888214111328,
|
|
"logps/rejected": -103.48914337158203,
|
|
"loss": 1.0982,
|
|
"margin_dpo/margin_mean": 1.2424057722091675,
|
|
"margin_dpo/margin_std": 2.158616542816162,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"fcm_dpo/beta": 0.600549578666687,
|
|
"fcm_dpo/delta": -0.21868403255939484,
|
|
"fcm_dpo/margin": 1.9679713249206543,
|
|
"fcm_dpo/q_t": 0.2971384525299072,
|
|
"grad_norm": 103.79540252685547,
|
|
"learning_rate": 9.757601041885694e-09,
|
|
"logits/chosen": 0.21697968244552612,
|
|
"logits/rejected": 0.18763911724090576,
|
|
"logps/chosen": -51.205020904541016,
|
|
"logps/ref_chosen": -48.62322235107422,
|
|
"logps/ref_rejected": -68.28271484375,
|
|
"logps/rejected": -72.83248901367188,
|
|
"loss": 0.8665,
|
|
"margin_dpo/margin_mean": 1.9679714441299438,
|
|
"margin_dpo/margin_std": 2.3338661193847656,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.6042653322219849,
|
|
"fcm_dpo/delta": -0.03405376523733139,
|
|
"fcm_dpo/margin": 1.7052839994430542,
|
|
"fcm_dpo/q_t": 0.3533821403980255,
|
|
"grad_norm": 194.40943908691406,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 0.1579647958278656,
|
|
"logits/rejected": 0.1604270040988922,
|
|
"logps/chosen": -75.84585571289062,
|
|
"logps/ref_chosen": -72.66513061523438,
|
|
"logps/ref_rejected": -87.15310668945312,
|
|
"logps/rejected": -92.03912353515625,
|
|
"loss": 1.1081,
|
|
"margin_dpo/margin_mean": 1.7052838802337646,
|
|
"margin_dpo/margin_std": 3.0106663703918457,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9236583522297808,
|
|
"fcm_dpo/beta": 0.6121885776519775,
|
|
"fcm_dpo/delta": 0.25364020466804504,
|
|
"fcm_dpo/margin": 1.2458240985870361,
|
|
"fcm_dpo/q_t": 0.36857932806015015,
|
|
"grad_norm": 133.9265899658203,
|
|
"learning_rate": 9.03946036001449e-09,
|
|
"logits/chosen": 0.2268466055393219,
|
|
"logits/rejected": 0.1877264380455017,
|
|
"logps/chosen": -51.286346435546875,
|
|
"logps/ref_chosen": -48.30857849121094,
|
|
"logps/ref_rejected": -70.6141128540039,
|
|
"logps/rejected": -74.83771514892578,
|
|
"loss": 1.0992,
|
|
"margin_dpo/margin_mean": 1.2458235025405884,
|
|
"margin_dpo/margin_std": 2.115691661834717,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.9251700680272109,
|
|
"fcm_dpo/beta": 0.6135225296020508,
|
|
"fcm_dpo/delta": -0.24519576132297516,
|
|
"fcm_dpo/margin": 1.9819927215576172,
|
|
"fcm_dpo/q_t": 0.30337560176849365,
|
|
"grad_norm": 141.30982971191406,
|
|
"learning_rate": 8.690495320571839e-09,
|
|
"logits/chosen": 0.10801693797111511,
|
|
"logits/rejected": 0.059915874153375626,
|
|
"logps/chosen": -64.04043579101562,
|
|
"logps/ref_chosen": -61.23155975341797,
|
|
"logps/ref_rejected": -94.37979888916016,
|
|
"logps/rejected": -99.17066955566406,
|
|
"loss": 0.9409,
|
|
"margin_dpo/margin_mean": 1.9819923639297485,
|
|
"margin_dpo/margin_std": 2.702681064605713,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.926681783824641,
|
|
"fcm_dpo/beta": 0.5761626958847046,
|
|
"fcm_dpo/delta": -0.3495950698852539,
|
|
"fcm_dpo/margin": 2.265277862548828,
|
|
"fcm_dpo/q_t": 0.28574904799461365,
|
|
"grad_norm": 127.05760955810547,
|
|
"learning_rate": 8.348280226706722e-09,
|
|
"logits/chosen": 0.11358965933322906,
|
|
"logits/rejected": 0.09932907670736313,
|
|
"logps/chosen": -56.594276428222656,
|
|
"logps/ref_chosen": -53.98310852050781,
|
|
"logps/ref_rejected": -58.32208251953125,
|
|
"logps/rejected": -63.19852828979492,
|
|
"loss": 0.8489,
|
|
"margin_dpo/margin_mean": 2.2652783393859863,
|
|
"margin_dpo/margin_std": 2.772947072982788,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9281934996220711,
|
|
"fcm_dpo/beta": 0.569689929485321,
|
|
"fcm_dpo/delta": 0.031611911952495575,
|
|
"fcm_dpo/margin": 1.7016217708587646,
|
|
"fcm_dpo/q_t": 0.3136371970176697,
|
|
"grad_norm": 121.61019134521484,
|
|
"learning_rate": 8.012824650910937e-09,
|
|
"logits/chosen": 0.2070479393005371,
|
|
"logits/rejected": 0.1917956918478012,
|
|
"logps/chosen": -63.198665618896484,
|
|
"logps/ref_chosen": -60.24303436279297,
|
|
"logps/ref_rejected": -72.26258850097656,
|
|
"logps/rejected": -76.91984558105469,
|
|
"loss": 0.8664,
|
|
"margin_dpo/margin_mean": 1.701621174812317,
|
|
"margin_dpo/margin_std": 1.9565538167953491,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.5607982873916626,
|
|
"fcm_dpo/delta": -0.03319869935512543,
|
|
"fcm_dpo/margin": 1.835862398147583,
|
|
"fcm_dpo/q_t": 0.329486608505249,
|
|
"grad_norm": 150.09515380859375,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 0.1885606348514557,
|
|
"logits/rejected": 0.15545931458473206,
|
|
"logps/chosen": -75.15232849121094,
|
|
"logps/ref_chosen": -72.09467315673828,
|
|
"logps/ref_rejected": -104.02980041503906,
|
|
"logps/rejected": -108.92332458496094,
|
|
"loss": 0.9693,
|
|
"margin_dpo/margin_mean": 1.8358616828918457,
|
|
"margin_dpo/margin_std": 2.62037992477417,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9312169312169312,
|
|
"fcm_dpo/beta": 0.5787394046783447,
|
|
"fcm_dpo/delta": 0.22798365354537964,
|
|
"fcm_dpo/margin": 1.3659520149230957,
|
|
"fcm_dpo/q_t": 0.36905261874198914,
|
|
"grad_norm": 138.57786560058594,
|
|
"learning_rate": 7.36222939784098e-09,
|
|
"logits/chosen": 0.17158043384552002,
|
|
"logits/rejected": 0.1132676973938942,
|
|
"logps/chosen": -61.71527862548828,
|
|
"logps/ref_chosen": -58.530723571777344,
|
|
"logps/ref_rejected": -75.48025512695312,
|
|
"logps/rejected": -80.03076171875,
|
|
"loss": 1.0819,
|
|
"margin_dpo/margin_mean": 1.365952491760254,
|
|
"margin_dpo/margin_std": 2.3640403747558594,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9327286470143613,
|
|
"fcm_dpo/beta": 0.5880202651023865,
|
|
"fcm_dpo/delta": 0.12598100304603577,
|
|
"fcm_dpo/margin": 1.4980671405792236,
|
|
"fcm_dpo/q_t": 0.35036730766296387,
|
|
"grad_norm": 136.1953887939453,
|
|
"learning_rate": 7.047107919114586e-09,
|
|
"logits/chosen": 0.1694568246603012,
|
|
"logits/rejected": 0.1344485729932785,
|
|
"logps/chosen": -60.68329620361328,
|
|
"logps/ref_chosen": -57.608673095703125,
|
|
"logps/ref_rejected": -81.22109985351562,
|
|
"logps/rejected": -85.79379272460938,
|
|
"loss": 1.0066,
|
|
"margin_dpo/margin_mean": 1.4980677366256714,
|
|
"margin_dpo/margin_std": 2.2009902000427246,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9342403628117913,
|
|
"fcm_dpo/beta": 0.5951837301254272,
|
|
"fcm_dpo/delta": 0.013019606471061707,
|
|
"fcm_dpo/margin": 1.654120922088623,
|
|
"fcm_dpo/q_t": 0.326251745223999,
|
|
"grad_norm": 141.9059295654297,
|
|
"learning_rate": 6.738782355044048e-09,
|
|
"logits/chosen": 0.1482279747724533,
|
|
"logits/rejected": 0.07427319884300232,
|
|
"logps/chosen": -59.329376220703125,
|
|
"logps/ref_chosen": -56.69594192504883,
|
|
"logps/ref_rejected": -85.92362976074219,
|
|
"logps/rejected": -90.21118927001953,
|
|
"loss": 0.9343,
|
|
"margin_dpo/margin_mean": 1.6541210412979126,
|
|
"margin_dpo/margin_std": 2.201620578765869,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.9357520786092215,
|
|
"fcm_dpo/beta": 0.6088467836380005,
|
|
"fcm_dpo/delta": 0.07530087977647781,
|
|
"fcm_dpo/margin": 1.53031587600708,
|
|
"fcm_dpo/q_t": 0.3448328375816345,
|
|
"grad_norm": 150.22421264648438,
|
|
"learning_rate": 6.437261330158206e-09,
|
|
"logits/chosen": 0.22948047518730164,
|
|
"logits/rejected": 0.18049129843711853,
|
|
"logps/chosen": -56.79576873779297,
|
|
"logps/ref_chosen": -54.05841827392578,
|
|
"logps/ref_rejected": -83.55493927001953,
|
|
"logps/rejected": -87.82260131835938,
|
|
"loss": 1.0339,
|
|
"margin_dpo/margin_mean": 1.53031587600708,
|
|
"margin_dpo/margin_std": 2.370837450027466,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.6322528123855591,
|
|
"fcm_dpo/delta": 0.09385835379362106,
|
|
"fcm_dpo/margin": 1.4362270832061768,
|
|
"fcm_dpo/q_t": 0.3465351164340973,
|
|
"grad_norm": 173.79412841796875,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 0.17980945110321045,
|
|
"logits/rejected": 0.16840043663978577,
|
|
"logps/chosen": -66.160888671875,
|
|
"logps/ref_chosen": -63.36971664428711,
|
|
"logps/ref_rejected": -65.68269348144531,
|
|
"logps/rejected": -69.91009521484375,
|
|
"loss": 1.0082,
|
|
"margin_dpo/margin_mean": 1.4362270832061768,
|
|
"margin_dpo/margin_std": 2.130248546600342,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9387755102040817,
|
|
"fcm_dpo/beta": 0.6423999071121216,
|
|
"fcm_dpo/delta": 0.0907469242811203,
|
|
"fcm_dpo/margin": 1.4205418825149536,
|
|
"fcm_dpo/q_t": 0.35109221935272217,
|
|
"grad_norm": 151.4163055419922,
|
|
"learning_rate": 5.854666444131934e-09,
|
|
"logits/chosen": 0.19905002415180206,
|
|
"logits/rejected": 0.12930849194526672,
|
|
"logps/chosen": -55.218666076660156,
|
|
"logps/ref_chosen": -52.321224212646484,
|
|
"logps/ref_rejected": -88.09001159667969,
|
|
"logps/rejected": -92.40798950195312,
|
|
"loss": 1.0421,
|
|
"margin_dpo/margin_mean": 1.420541524887085,
|
|
"margin_dpo/margin_std": 2.2282516956329346,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9402872260015117,
|
|
"fcm_dpo/beta": 0.640907347202301,
|
|
"fcm_dpo/delta": 0.03911030665040016,
|
|
"fcm_dpo/margin": 1.5050252676010132,
|
|
"fcm_dpo/q_t": 0.3320958614349365,
|
|
"grad_norm": 137.0905303955078,
|
|
"learning_rate": 5.573608879422875e-09,
|
|
"logits/chosen": 0.13063614070415497,
|
|
"logits/rejected": 0.1020088791847229,
|
|
"logps/chosen": -62.50457763671875,
|
|
"logps/ref_chosen": -59.86545944213867,
|
|
"logps/ref_rejected": -81.86668395996094,
|
|
"logps/rejected": -86.01081848144531,
|
|
"loss": 0.9325,
|
|
"margin_dpo/margin_mean": 1.5050253868103027,
|
|
"margin_dpo/margin_std": 2.0284576416015625,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9417989417989417,
|
|
"fcm_dpo/beta": 0.6268334984779358,
|
|
"fcm_dpo/delta": -0.16526103019714355,
|
|
"fcm_dpo/margin": 1.8289923667907715,
|
|
"fcm_dpo/q_t": 0.30245280265808105,
|
|
"grad_norm": 126.97518920898438,
|
|
"learning_rate": 5.299388446305342e-09,
|
|
"logits/chosen": 0.16525626182556152,
|
|
"logits/rejected": 0.11132755130529404,
|
|
"logps/chosen": -70.49075317382812,
|
|
"logps/ref_chosen": -67.36846160888672,
|
|
"logps/ref_rejected": -82.02733612060547,
|
|
"logps/rejected": -86.97863006591797,
|
|
"loss": 0.8618,
|
|
"margin_dpo/margin_mean": 1.828992247581482,
|
|
"margin_dpo/margin_std": 2.1596312522888184,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9433106575963719,
|
|
"fcm_dpo/beta": 0.6166424751281738,
|
|
"fcm_dpo/delta": -0.002019442617893219,
|
|
"fcm_dpo/margin": 1.6235092878341675,
|
|
"fcm_dpo/q_t": 0.33338773250579834,
|
|
"grad_norm": 134.533447265625,
|
|
"learning_rate": 5.03201281531429e-09,
|
|
"logits/chosen": 0.17911109328269958,
|
|
"logits/rejected": 0.10127080231904984,
|
|
"logps/chosen": -53.71633529663086,
|
|
"logps/ref_chosen": -51.02655029296875,
|
|
"logps/ref_rejected": -76.49203491210938,
|
|
"logps/rejected": -80.80532836914062,
|
|
"loss": 0.9784,
|
|
"margin_dpo/margin_mean": 1.623509407043457,
|
|
"margin_dpo/margin_std": 2.3413143157958984,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.6369531154632568,
|
|
"fcm_dpo/delta": 0.11569374054670334,
|
|
"fcm_dpo/margin": 1.400294542312622,
|
|
"fcm_dpo/q_t": 0.3541494905948639,
|
|
"grad_norm": 148.3859100341797,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 0.20979532599449158,
|
|
"logits/rejected": 0.1446528434753418,
|
|
"logps/chosen": -57.27146530151367,
|
|
"logps/ref_chosen": -54.20761489868164,
|
|
"logps/ref_rejected": -84.93669128417969,
|
|
"logps/rejected": -89.40084075927734,
|
|
"loss": 1.0516,
|
|
"margin_dpo/margin_mean": 1.4002941846847534,
|
|
"margin_dpo/margin_std": 2.27569317817688,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9463340891912321,
|
|
"fcm_dpo/beta": 0.6106563806533813,
|
|
"fcm_dpo/delta": -0.2672712206840515,
|
|
"fcm_dpo/margin": 2.0213675498962402,
|
|
"fcm_dpo/q_t": 0.31362709403038025,
|
|
"grad_norm": 127.49701690673828,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 0.24240854382514954,
|
|
"logits/rejected": 0.1507807821035385,
|
|
"logps/chosen": -47.64917755126953,
|
|
"logps/ref_chosen": -45.06201934814453,
|
|
"logps/ref_rejected": -89.66368103027344,
|
|
"logps/rejected": -94.27220153808594,
|
|
"loss": 0.9523,
|
|
"margin_dpo/margin_mean": 2.0213675498962402,
|
|
"margin_dpo/margin_std": 2.8801679611206055,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.9478458049886621,
|
|
"fcm_dpo/beta": 0.5770605802536011,
|
|
"fcm_dpo/delta": -0.23875784873962402,
|
|
"fcm_dpo/margin": 2.0926780700683594,
|
|
"fcm_dpo/q_t": 0.3067499101161957,
|
|
"grad_norm": 135.4554901123047,
|
|
"learning_rate": 4.271028567242818e-09,
|
|
"logits/chosen": 0.12116050720214844,
|
|
"logits/rejected": 0.04095185548067093,
|
|
"logps/chosen": -61.70726776123047,
|
|
"logps/ref_chosen": -58.791053771972656,
|
|
"logps/ref_rejected": -94.90802001953125,
|
|
"logps/rejected": -99.91691589355469,
|
|
"loss": 0.888,
|
|
"margin_dpo/margin_mean": 2.0926778316497803,
|
|
"margin_dpo/margin_std": 2.661421298980713,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.9493575207860923,
|
|
"fcm_dpo/beta": 0.5617523193359375,
|
|
"fcm_dpo/delta": -0.23493322730064392,
|
|
"fcm_dpo/margin": 2.1469919681549072,
|
|
"fcm_dpo/q_t": 0.29745668172836304,
|
|
"grad_norm": 120.84452056884766,
|
|
"learning_rate": 4.0311050177251895e-09,
|
|
"logits/chosen": 0.19451142847537994,
|
|
"logits/rejected": 0.16487011313438416,
|
|
"logps/chosen": -55.42369842529297,
|
|
"logps/ref_chosen": -52.80357360839844,
|
|
"logps/ref_rejected": -76.49468994140625,
|
|
"logps/rejected": -81.26181030273438,
|
|
"loss": 0.9097,
|
|
"margin_dpo/margin_mean": 2.146991729736328,
|
|
"margin_dpo/margin_std": 2.7369041442871094,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9508692365835223,
|
|
"fcm_dpo/beta": 0.5566290616989136,
|
|
"fcm_dpo/delta": 0.07311487942934036,
|
|
"fcm_dpo/margin": 1.6754742860794067,
|
|
"fcm_dpo/q_t": 0.3302459716796875,
|
|
"grad_norm": 120.0956802368164,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": 0.17901673913002014,
|
|
"logits/rejected": 0.17002899944782257,
|
|
"logps/chosen": -73.4151382446289,
|
|
"logps/ref_chosen": -70.71749877929688,
|
|
"logps/ref_rejected": -78.96273803710938,
|
|
"logps/rejected": -83.33586120605469,
|
|
"loss": 0.8944,
|
|
"margin_dpo/margin_mean": 1.6754742860794067,
|
|
"margin_dpo/margin_std": 2.052621603012085,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.5548304915428162,
|
|
"fcm_dpo/delta": -0.06957443058490753,
|
|
"fcm_dpo/margin": 1.912638783454895,
|
|
"fcm_dpo/q_t": 0.32286617159843445,
|
|
"grad_norm": 108.01695251464844,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 0.1528751254081726,
|
|
"logits/rejected": 0.11582262814044952,
|
|
"logps/chosen": -58.81450653076172,
|
|
"logps/ref_chosen": -56.201412200927734,
|
|
"logps/ref_rejected": -74.69807434082031,
|
|
"logps/rejected": -79.22380065917969,
|
|
"loss": 0.891,
|
|
"margin_dpo/margin_mean": 1.9126391410827637,
|
|
"margin_dpo/margin_std": 2.4235522747039795,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9538926681783825,
|
|
"fcm_dpo/beta": 0.5212994813919067,
|
|
"fcm_dpo/delta": -0.20469579100608826,
|
|
"fcm_dpo/margin": 2.244609832763672,
|
|
"fcm_dpo/q_t": 0.2998984754085541,
|
|
"grad_norm": 126.16265869140625,
|
|
"learning_rate": 3.352641923861144e-09,
|
|
"logits/chosen": 0.24326127767562866,
|
|
"logits/rejected": 0.16665881872177124,
|
|
"logps/chosen": -61.588680267333984,
|
|
"logps/ref_chosen": -58.82059860229492,
|
|
"logps/ref_rejected": -96.51437377929688,
|
|
"logps/rejected": -101.5270767211914,
|
|
"loss": 0.8734,
|
|
"margin_dpo/margin_mean": 2.244609832763672,
|
|
"margin_dpo/margin_std": 2.739107370376587,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9554043839758125,
|
|
"fcm_dpo/beta": 0.5139273405075073,
|
|
"fcm_dpo/delta": -0.14226512610912323,
|
|
"fcm_dpo/margin": 2.19085693359375,
|
|
"fcm_dpo/q_t": 0.28850066661834717,
|
|
"grad_norm": 94.82600402832031,
|
|
"learning_rate": 3.140277830901428e-09,
|
|
"logits/chosen": 0.21174263954162598,
|
|
"logits/rejected": 0.18780021369457245,
|
|
"logps/chosen": -61.52200698852539,
|
|
"logps/ref_chosen": -58.786048889160156,
|
|
"logps/ref_rejected": -67.21923828125,
|
|
"logps/rejected": -72.14605712890625,
|
|
"loss": 0.8071,
|
|
"margin_dpo/margin_mean": 2.19085693359375,
|
|
"margin_dpo/margin_std": 2.2715227603912354,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9569160997732427,
|
|
"fcm_dpo/beta": 0.5195468664169312,
|
|
"fcm_dpo/delta": 0.10754405707120895,
|
|
"fcm_dpo/margin": 1.7362931966781616,
|
|
"fcm_dpo/q_t": 0.3419226408004761,
|
|
"grad_norm": 99.7951431274414,
|
|
"learning_rate": 2.9348189350335007e-09,
|
|
"logits/chosen": 0.1292242705821991,
|
|
"logits/rejected": 0.08500467240810394,
|
|
"logps/chosen": -54.51697540283203,
|
|
"logps/ref_chosen": -52.13019561767578,
|
|
"logps/ref_rejected": -67.23016357421875,
|
|
"logps/rejected": -71.35323333740234,
|
|
"loss": 0.971,
|
|
"margin_dpo/margin_mean": 1.7362935543060303,
|
|
"margin_dpo/margin_std": 2.4587457180023193,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9584278155706727,
|
|
"fcm_dpo/beta": 0.5588383674621582,
|
|
"fcm_dpo/delta": 0.42835086584091187,
|
|
"fcm_dpo/margin": 1.0649152994155884,
|
|
"fcm_dpo/q_t": 0.4058130383491516,
|
|
"grad_norm": 154.30062866210938,
|
|
"learning_rate": 2.736270983384276e-09,
|
|
"logits/chosen": 0.22938773036003113,
|
|
"logits/rejected": 0.22998389601707458,
|
|
"logps/chosen": -63.95368194580078,
|
|
"logps/ref_chosen": -60.97979736328125,
|
|
"logps/ref_rejected": -58.50825119018555,
|
|
"logps/rejected": -62.54705047607422,
|
|
"loss": 1.2187,
|
|
"margin_dpo/margin_mean": 1.0649151802062988,
|
|
"margin_dpo/margin_std": 2.3998215198516846,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.5766314268112183,
|
|
"fcm_dpo/delta": 0.07918489724397659,
|
|
"fcm_dpo/margin": 1.6092689037322998,
|
|
"fcm_dpo/q_t": 0.3603755235671997,
|
|
"grad_norm": 152.15786743164062,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 0.10523584485054016,
|
|
"logits/rejected": 0.06159904971718788,
|
|
"logps/chosen": -69.22645568847656,
|
|
"logps/ref_chosen": -65.9730224609375,
|
|
"logps/ref_rejected": -85.61317443847656,
|
|
"logps/rejected": -90.47587585449219,
|
|
"loss": 1.0967,
|
|
"margin_dpo/margin_mean": 1.6092685461044312,
|
|
"margin_dpo/margin_std": 2.7506871223449707,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9614512471655329,
|
|
"fcm_dpo/beta": 0.5654884576797485,
|
|
"fcm_dpo/delta": -0.11426550894975662,
|
|
"fcm_dpo/margin": 1.9465041160583496,
|
|
"fcm_dpo/q_t": 0.2917436957359314,
|
|
"grad_norm": 109.79877471923828,
|
|
"learning_rate": 2.359929934524829e-09,
|
|
"logits/chosen": 0.1432669758796692,
|
|
"logits/rejected": 0.07593058794736862,
|
|
"logps/chosen": -51.8205680847168,
|
|
"logps/ref_chosen": -49.140167236328125,
|
|
"logps/ref_rejected": -81.26971435546875,
|
|
"logps/rejected": -85.89661407470703,
|
|
"loss": 0.7784,
|
|
"margin_dpo/margin_mean": 1.9465045928955078,
|
|
"margin_dpo/margin_std": 1.9250316619873047,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9629629629629629,
|
|
"fcm_dpo/beta": 0.5774806141853333,
|
|
"fcm_dpo/delta": 0.09339653700590134,
|
|
"fcm_dpo/margin": 1.5811973810195923,
|
|
"fcm_dpo/q_t": 0.3516439199447632,
|
|
"grad_norm": 156.06459045410156,
|
|
"learning_rate": 2.1821473643827137e-09,
|
|
"logits/chosen": 0.14332839846611023,
|
|
"logits/rejected": 0.08663806319236755,
|
|
"logps/chosen": -76.97013854980469,
|
|
"logps/ref_chosen": -73.69658660888672,
|
|
"logps/ref_rejected": -83.01487731933594,
|
|
"logps/rejected": -87.86962890625,
|
|
"loss": 1.0302,
|
|
"margin_dpo/margin_mean": 1.5811975002288818,
|
|
"margin_dpo/margin_std": 2.551753044128418,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9644746787603931,
|
|
"fcm_dpo/beta": 0.5492261648178101,
|
|
"fcm_dpo/delta": -0.27356475591659546,
|
|
"fcm_dpo/margin": 2.252800941467285,
|
|
"fcm_dpo/q_t": 0.28849977254867554,
|
|
"grad_norm": 118.49738311767578,
|
|
"learning_rate": 2.0112967923011646e-09,
|
|
"logits/chosen": 0.1485036015510559,
|
|
"logits/rejected": 0.10658858716487885,
|
|
"logps/chosen": -65.84213256835938,
|
|
"logps/ref_chosen": -62.78158187866211,
|
|
"logps/ref_rejected": -85.40478515625,
|
|
"logps/rejected": -90.71813201904297,
|
|
"loss": 0.8175,
|
|
"margin_dpo/margin_mean": 2.252800464630127,
|
|
"margin_dpo/margin_std": 2.519404888153076,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9659863945578231,
|
|
"fcm_dpo/beta": 0.5499449372291565,
|
|
"fcm_dpo/delta": 0.053651005029678345,
|
|
"fcm_dpo/margin": 1.7299044132232666,
|
|
"fcm_dpo/q_t": 0.33649200201034546,
|
|
"grad_norm": 118.28497314453125,
|
|
"learning_rate": 1.847382997337943e-09,
|
|
"logits/chosen": 0.14743350446224213,
|
|
"logits/rejected": 0.052376627922058105,
|
|
"logps/chosen": -56.702606201171875,
|
|
"logps/ref_chosen": -53.76658630371094,
|
|
"logps/ref_rejected": -72.30009460449219,
|
|
"logps/rejected": -76.96601867675781,
|
|
"loss": 0.9522,
|
|
"margin_dpo/margin_mean": 1.7299044132232666,
|
|
"margin_dpo/margin_std": 2.438140392303467,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.5619925260543823,
|
|
"fcm_dpo/delta": 0.15041983127593994,
|
|
"fcm_dpo/margin": 1.5351797342300415,
|
|
"fcm_dpo/q_t": 0.3423158526420593,
|
|
"grad_norm": 126.87297821044922,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 0.23225465416908264,
|
|
"logits/rejected": 0.19046524167060852,
|
|
"logps/chosen": -54.199310302734375,
|
|
"logps/ref_chosen": -51.41777801513672,
|
|
"logps/ref_rejected": -77.27879333496094,
|
|
"logps/rejected": -81.59550476074219,
|
|
"loss": 1.0331,
|
|
"margin_dpo/margin_mean": 1.5351800918579102,
|
|
"margin_dpo/margin_std": 2.3253612518310547,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9690098261526833,
|
|
"fcm_dpo/beta": 0.5881354212760925,
|
|
"fcm_dpo/delta": 0.17225059866905212,
|
|
"fcm_dpo/margin": 1.4259064197540283,
|
|
"fcm_dpo/q_t": 0.34505194425582886,
|
|
"grad_norm": 165.1323699951172,
|
|
"learning_rate": 1.5403838846864692e-09,
|
|
"logits/chosen": 0.13332295417785645,
|
|
"logits/rejected": 0.10575246065855026,
|
|
"logps/chosen": -74.10830688476562,
|
|
"logps/ref_chosen": -71.0546646118164,
|
|
"logps/ref_rejected": -82.2440185546875,
|
|
"logps/rejected": -86.72355651855469,
|
|
"loss": 1.0339,
|
|
"margin_dpo/margin_mean": 1.4259059429168701,
|
|
"margin_dpo/margin_std": 2.282135486602783,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9705215419501134,
|
|
"fcm_dpo/beta": 0.5971714854240417,
|
|
"fcm_dpo/delta": 0.17347897589206696,
|
|
"fcm_dpo/margin": 1.4081449508666992,
|
|
"fcm_dpo/q_t": 0.3561500310897827,
|
|
"grad_norm": 159.68527221679688,
|
|
"learning_rate": 1.3973071544233218e-09,
|
|
"logits/chosen": 0.12671303749084473,
|
|
"logits/rejected": 0.12140335142612457,
|
|
"logps/chosen": -72.1884994506836,
|
|
"logps/ref_chosen": -68.92927551269531,
|
|
"logps/ref_rejected": -70.85682678222656,
|
|
"logps/rejected": -75.52420043945312,
|
|
"loss": 1.078,
|
|
"margin_dpo/margin_mean": 1.4081450700759888,
|
|
"margin_dpo/margin_std": 2.3124876022338867,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9720332577475435,
|
|
"fcm_dpo/beta": 0.6271607875823975,
|
|
"fcm_dpo/delta": 0.3203769028186798,
|
|
"fcm_dpo/margin": 1.1184437274932861,
|
|
"fcm_dpo/q_t": 0.34422484040260315,
|
|
"grad_norm": 2246.781982421875,
|
|
"learning_rate": 1.261184375888541e-09,
|
|
"logits/chosen": 0.1141979843378067,
|
|
"logits/rejected": 0.05281982570886612,
|
|
"logps/chosen": -68.73892974853516,
|
|
"logps/ref_chosen": -65.30903625488281,
|
|
"logps/ref_rejected": -83.61613464355469,
|
|
"logps/rejected": -88.16447448730469,
|
|
"loss": 1.4846,
|
|
"margin_dpo/margin_mean": 1.1184438467025757,
|
|
"margin_dpo/margin_std": 3.4751362800598145,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9735449735449735,
|
|
"fcm_dpo/beta": 0.6507506966590881,
|
|
"fcm_dpo/delta": 0.037187736481428146,
|
|
"fcm_dpo/margin": 1.4850530624389648,
|
|
"fcm_dpo/q_t": 0.3464422821998596,
|
|
"grad_norm": 163.06446838378906,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 0.2439633458852768,
|
|
"logits/rejected": 0.21551108360290527,
|
|
"logps/chosen": -53.948020935058594,
|
|
"logps/ref_chosen": -51.002601623535156,
|
|
"logps/ref_rejected": -64.46372985839844,
|
|
"logps/rejected": -68.89420318603516,
|
|
"loss": 1.0968,
|
|
"margin_dpo/margin_mean": 1.4850530624389648,
|
|
"margin_dpo/margin_std": 2.556273937225342,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.6438242197036743,
|
|
"fcm_dpo/delta": -0.08629032969474792,
|
|
"fcm_dpo/margin": 1.6724817752838135,
|
|
"fcm_dpo/q_t": 0.31522613763809204,
|
|
"grad_norm": 150.0731964111328,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 0.0987434908747673,
|
|
"logits/rejected": 0.07496701180934906,
|
|
"logps/chosen": -64.18575286865234,
|
|
"logps/ref_chosen": -60.963409423828125,
|
|
"logps/ref_rejected": -69.73353576660156,
|
|
"logps/rejected": -74.62835693359375,
|
|
"loss": 0.8818,
|
|
"margin_dpo/margin_mean": 1.6724815368652344,
|
|
"margin_dpo/margin_std": 2.17503023147583,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9765684051398337,
|
|
"fcm_dpo/beta": 0.6224489212036133,
|
|
"fcm_dpo/delta": -0.2178870290517807,
|
|
"fcm_dpo/margin": 1.9151108264923096,
|
|
"fcm_dpo/q_t": 0.30455827713012695,
|
|
"grad_norm": 125.87390899658203,
|
|
"learning_rate": 8.945768539031783e-10,
|
|
"logits/chosen": 0.2028486728668213,
|
|
"logits/rejected": 0.16405922174453735,
|
|
"logps/chosen": -65.69281005859375,
|
|
"logps/ref_chosen": -62.290069580078125,
|
|
"logps/ref_rejected": -85.54812622070312,
|
|
"logps/rejected": -90.86598205566406,
|
|
"loss": 0.9126,
|
|
"margin_dpo/margin_mean": 1.9151103496551514,
|
|
"margin_dpo/margin_std": 2.5356059074401855,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9780801209372638,
|
|
"fcm_dpo/beta": 0.5961904525756836,
|
|
"fcm_dpo/delta": -0.26335006952285767,
|
|
"fcm_dpo/margin": 2.065751552581787,
|
|
"fcm_dpo/q_t": 0.30408942699432373,
|
|
"grad_norm": 186.32350158691406,
|
|
"learning_rate": 7.863060120144316e-10,
|
|
"logits/chosen": 0.20603081583976746,
|
|
"logits/rejected": 0.13498756289482117,
|
|
"logps/chosen": -70.44767761230469,
|
|
"logps/ref_chosen": -67.515869140625,
|
|
"logps/ref_rejected": -101.50871276855469,
|
|
"logps/rejected": -106.50627136230469,
|
|
"loss": 0.9259,
|
|
"margin_dpo/margin_mean": 2.065751075744629,
|
|
"margin_dpo/margin_std": 2.7231507301330566,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9795918367346939,
|
|
"fcm_dpo/beta": 0.5804147720336914,
|
|
"fcm_dpo/delta": 0.019788160920143127,
|
|
"fcm_dpo/margin": 1.6904387474060059,
|
|
"fcm_dpo/q_t": 0.33119359612464905,
|
|
"grad_norm": 137.32240295410156,
|
|
"learning_rate": 6.850062128694045e-10,
|
|
"logits/chosen": 0.13131186366081238,
|
|
"logits/rejected": 0.0793139785528183,
|
|
"logps/chosen": -67.7803726196289,
|
|
"logps/ref_chosen": -64.59593963623047,
|
|
"logps/ref_rejected": -83.384033203125,
|
|
"logps/rejected": -88.25890350341797,
|
|
"loss": 0.9714,
|
|
"margin_dpo/margin_mean": 1.6904385089874268,
|
|
"margin_dpo/margin_std": 2.3690028190612793,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.981103552532124,
|
|
"fcm_dpo/beta": 0.5746303796768188,
|
|
"fcm_dpo/delta": -0.07765467464923859,
|
|
"fcm_dpo/margin": 1.8583083152770996,
|
|
"fcm_dpo/q_t": 0.3117806911468506,
|
|
"grad_norm": 126.6104736328125,
|
|
"learning_rate": 5.906802900412788e-10,
|
|
"logits/chosen": 0.17333395779132843,
|
|
"logits/rejected": 0.1271257847547531,
|
|
"logps/chosen": -52.0538330078125,
|
|
"logps/ref_chosen": -49.30964660644531,
|
|
"logps/ref_rejected": -73.73710632324219,
|
|
"logps/rejected": -78.339599609375,
|
|
"loss": 0.931,
|
|
"margin_dpo/margin_mean": 1.8583080768585205,
|
|
"margin_dpo/margin_std": 2.380646228790283,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.5875668525695801,
|
|
"fcm_dpo/delta": 0.08222602307796478,
|
|
"fcm_dpo/margin": 1.5724918842315674,
|
|
"fcm_dpo/q_t": 0.3484124541282654,
|
|
"grad_norm": 169.66038513183594,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 0.2582772374153137,
|
|
"logits/rejected": 0.21315817534923553,
|
|
"logps/chosen": -57.925880432128906,
|
|
"logps/ref_chosen": -55.06325912475586,
|
|
"logps/ref_rejected": -77.39610290527344,
|
|
"logps/rejected": -81.83120727539062,
|
|
"loss": 1.0498,
|
|
"margin_dpo/margin_mean": 1.5724914073944092,
|
|
"margin_dpo/margin_std": 2.477647542953491,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"fcm_dpo/beta": 0.600640058517456,
|
|
"fcm_dpo/delta": 0.055977076292037964,
|
|
"fcm_dpo/margin": 1.5707225799560547,
|
|
"fcm_dpo/q_t": 0.3421096205711365,
|
|
"grad_norm": 148.89381408691406,
|
|
"learning_rate": 4.2296043218295606e-10,
|
|
"logits/chosen": 0.24641644954681396,
|
|
"logits/rejected": 0.1934581995010376,
|
|
"logps/chosen": -56.82018280029297,
|
|
"logps/ref_chosen": -54.065162658691406,
|
|
"logps/ref_rejected": -77.79080200195312,
|
|
"logps/rejected": -82.1165542602539,
|
|
"loss": 0.9553,
|
|
"margin_dpo/margin_mean": 1.5707224607467651,
|
|
"margin_dpo/margin_std": 2.1566340923309326,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9856386999244142,
|
|
"fcm_dpo/beta": 0.5921408534049988,
|
|
"fcm_dpo/delta": 0.02783304452896118,
|
|
"fcm_dpo/margin": 1.6457834243774414,
|
|
"fcm_dpo/q_t": 0.3308331370353699,
|
|
"grad_norm": 141.38235473632812,
|
|
"learning_rate": 3.4957118863768176e-10,
|
|
"logits/chosen": 0.20549950003623962,
|
|
"logits/rejected": 0.15806761384010315,
|
|
"logps/chosen": -66.84440612792969,
|
|
"logps/ref_chosen": -63.64030456542969,
|
|
"logps/ref_rejected": -78.86882019042969,
|
|
"logps/rejected": -83.71870422363281,
|
|
"loss": 0.9701,
|
|
"margin_dpo/margin_mean": 1.6457829475402832,
|
|
"margin_dpo/margin_std": 2.2666330337524414,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9871504157218443,
|
|
"fcm_dpo/beta": 0.6015132665634155,
|
|
"fcm_dpo/delta": 0.07994754612445831,
|
|
"fcm_dpo/margin": 1.541993260383606,
|
|
"fcm_dpo/q_t": 0.352220356464386,
|
|
"grad_norm": 151.9169464111328,
|
|
"learning_rate": 2.831652042480093e-10,
|
|
"logits/chosen": 0.18092171847820282,
|
|
"logits/rejected": 0.13986843824386597,
|
|
"logps/chosen": -64.58534240722656,
|
|
"logps/ref_chosen": -61.668373107910156,
|
|
"logps/ref_rejected": -73.83012390136719,
|
|
"logps/rejected": -78.28909301757812,
|
|
"loss": 1.0303,
|
|
"margin_dpo/margin_mean": 1.5419931411743164,
|
|
"margin_dpo/margin_std": 2.441131591796875,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.9886621315192744,
|
|
"fcm_dpo/beta": 0.5887470245361328,
|
|
"fcm_dpo/delta": -0.13443875312805176,
|
|
"fcm_dpo/margin": 1.8974157571792603,
|
|
"fcm_dpo/q_t": 0.31610941886901855,
|
|
"grad_norm": 141.95655822753906,
|
|
"learning_rate": 2.2374433653205016e-10,
|
|
"logits/chosen": 0.170884370803833,
|
|
"logits/rejected": 0.10386738926172256,
|
|
"logps/chosen": -60.361976623535156,
|
|
"logps/ref_chosen": -57.568267822265625,
|
|
"logps/ref_rejected": -87.74789428710938,
|
|
"logps/rejected": -92.43901824951172,
|
|
"loss": 0.905,
|
|
"margin_dpo/margin_mean": 1.8974157571792603,
|
|
"margin_dpo/margin_std": 2.5552029609680176,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.5729016661643982,
|
|
"fcm_dpo/delta": -0.060607388615608215,
|
|
"fcm_dpo/margin": 1.8254590034484863,
|
|
"fcm_dpo/q_t": 0.309136301279068,
|
|
"grad_norm": 104.31941223144531,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 0.1574726104736328,
|
|
"logits/rejected": 0.0878022313117981,
|
|
"logps/chosen": -54.848045349121094,
|
|
"logps/ref_chosen": -52.14714813232422,
|
|
"logps/ref_rejected": -80.85014343261719,
|
|
"logps/rejected": -85.37649536132812,
|
|
"loss": 0.8471,
|
|
"margin_dpo/margin_mean": 1.8254594802856445,
|
|
"margin_dpo/margin_std": 2.0447256565093994,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9916855631141346,
|
|
"fcm_dpo/beta": 0.555420458316803,
|
|
"fcm_dpo/delta": -0.24778330326080322,
|
|
"fcm_dpo/margin": 2.184002161026001,
|
|
"fcm_dpo/q_t": 0.29599303007125854,
|
|
"grad_norm": 112.63178253173828,
|
|
"learning_rate": 1.2586440420372934e-10,
|
|
"logits/chosen": 0.16302891075611115,
|
|
"logits/rejected": 0.12122651189565659,
|
|
"logps/chosen": -76.25328063964844,
|
|
"logps/ref_chosen": -73.25672912597656,
|
|
"logps/ref_rejected": -85.35127258300781,
|
|
"logps/rejected": -90.53182220458984,
|
|
"loss": 0.82,
|
|
"margin_dpo/margin_mean": 2.1840016841888428,
|
|
"margin_dpo/margin_std": 2.538489818572998,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9931972789115646,
|
|
"fcm_dpo/beta": 0.5423201322555542,
|
|
"fcm_dpo/delta": -0.10848333686590195,
|
|
"fcm_dpo/margin": 2.0198726654052734,
|
|
"fcm_dpo/q_t": 0.3146105408668518,
|
|
"grad_norm": 127.41387176513672,
|
|
"learning_rate": 8.740807750345913e-11,
|
|
"logits/chosen": 0.26283034682273865,
|
|
"logits/rejected": 0.19647637009620667,
|
|
"logps/chosen": -52.75443649291992,
|
|
"logps/ref_chosen": -49.72339630126953,
|
|
"logps/ref_rejected": -75.1568603515625,
|
|
"logps/rejected": -80.20777893066406,
|
|
"loss": 0.9165,
|
|
"margin_dpo/margin_mean": 2.0198724269866943,
|
|
"margin_dpo/margin_std": 2.636084794998169,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9947089947089947,
|
|
"fcm_dpo/beta": 0.556152880191803,
|
|
"fcm_dpo/delta": 0.08222407847642899,
|
|
"fcm_dpo/margin": 1.6563022136688232,
|
|
"fcm_dpo/q_t": 0.35666483640670776,
|
|
"grad_norm": 134.63629150390625,
|
|
"learning_rate": 5.594234322453539e-11,
|
|
"logits/chosen": 0.1849232316017151,
|
|
"logits/rejected": 0.15448370575904846,
|
|
"logps/chosen": -66.14913940429688,
|
|
"logps/ref_chosen": -63.04634094238281,
|
|
"logps/ref_rejected": -83.44963073730469,
|
|
"logps/rejected": -88.20873260498047,
|
|
"loss": 1.0747,
|
|
"margin_dpo/margin_mean": 1.656302809715271,
|
|
"margin_dpo/margin_std": 2.766972541809082,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9962207105064248,
|
|
"fcm_dpo/beta": 0.5667056441307068,
|
|
"fcm_dpo/delta": 0.22809654474258423,
|
|
"fcm_dpo/margin": 1.3948873281478882,
|
|
"fcm_dpo/q_t": 0.36462146043777466,
|
|
"grad_norm": 134.068603515625,
|
|
"learning_rate": 3.146808153123293e-11,
|
|
"logits/chosen": 0.2203397899866104,
|
|
"logits/rejected": 0.16544032096862793,
|
|
"logps/chosen": -58.36835479736328,
|
|
"logps/ref_chosen": -55.0802001953125,
|
|
"logps/ref_rejected": -71.91049194335938,
|
|
"logps/rejected": -76.59353637695312,
|
|
"loss": 1.1224,
|
|
"margin_dpo/margin_mean": 1.3948872089385986,
|
|
"margin_dpo/margin_std": 2.498440742492676,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.5684385895729065,
|
|
"fcm_dpo/delta": -0.10104553401470184,
|
|
"fcm_dpo/margin": 1.9175570011138916,
|
|
"fcm_dpo/q_t": 0.3092523217201233,
|
|
"grad_norm": 133.181396484375,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 0.2678339183330536,
|
|
"logits/rejected": 0.2140858918428421,
|
|
"logps/chosen": -57.32783889770508,
|
|
"logps/ref_chosen": -54.525917053222656,
|
|
"logps/ref_rejected": -81.23604583740234,
|
|
"logps/rejected": -85.95552825927734,
|
|
"loss": 0.8659,
|
|
"margin_dpo/margin_mean": 1.9175567626953125,
|
|
"margin_dpo/margin_std": 2.366135597229004,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"fcm_dpo/beta": 0.5669894218444824,
|
|
"fcm_dpo/delta": 0.021186619997024536,
|
|
"fcm_dpo/margin": 1.7300082445144653,
|
|
"fcm_dpo/q_t": 0.33315783739089966,
|
|
"grad_norm": 129.08389282226562,
|
|
"learning_rate": 3.4965187065971735e-12,
|
|
"logits/chosen": 0.15239998698234558,
|
|
"logits/rejected": 0.0887516513466835,
|
|
"logps/chosen": -63.79823684692383,
|
|
"logps/ref_chosen": -60.37263870239258,
|
|
"logps/ref_rejected": -77.42874145507812,
|
|
"logps/rejected": -82.5843505859375,
|
|
"loss": 1.0387,
|
|
"margin_dpo/margin_mean": 1.730008840560913,
|
|
"margin_dpo/margin_std": 2.6242144107818604,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.1173522615757363,
|
|
"train_runtime": 1752.852,
|
|
"train_samples_per_second": 24.153,
|
|
"train_steps_per_second": 0.377
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|