Files
llama-3-8b-base-new-dpo-hh-…/trainer_state.json
ModelHub XC cf60694ff3 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0
Source: Original Platform
2026-06-03 22:08:02 +08:00

12654 lines
465 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 200,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015117157974300832,
"fcm_dpo/beta": 0.11051072180271149,
"fcm_dpo/delta": 0.49971169233322144,
"fcm_dpo/margin": -0.0013532638549804688,
"fcm_dpo/q_t": 0.5000448226928711,
"grad_norm": 31.18895149230957,
"learning_rate": 0.0,
"logits/chosen": 0.13337239623069763,
"logits/rejected": 0.12492949515581131,
"logps/chosen": -64.5841293334961,
"logps/ref_chosen": -64.61280822753906,
"logps/ref_rejected": -64.17195129394531,
"logps/rejected": -64.14192199707031,
"loss": 1.3866,
"margin_dpo/margin_mean": -0.0013527870178222656,
"margin_dpo/margin_std": 0.2561596930027008,
"step": 1
},
{
"epoch": 0.0030234315948601664,
"fcm_dpo/beta": 0.11625976860523224,
"fcm_dpo/delta": 0.49490365386009216,
"fcm_dpo/margin": 0.037450045347213745,
"fcm_dpo/q_t": 0.4989655911922455,
"grad_norm": 32.422725677490234,
"learning_rate": 7.462686567164179e-09,
"logits/chosen": 0.09414851665496826,
"logits/rejected": 0.07363267242908478,
"logps/chosen": -56.101890563964844,
"logps/ref_chosen": -56.0989990234375,
"logps/ref_rejected": -66.59971618652344,
"logps/rejected": -66.64006042480469,
"loss": 1.3819,
"margin_dpo/margin_mean": 0.03744968771934509,
"margin_dpo/margin_std": 0.27811938524246216,
"step": 2
},
{
"epoch": 0.0045351473922902496,
"fcm_dpo/beta": 0.12837310135364532,
"fcm_dpo/delta": 0.4961715638637543,
"fcm_dpo/margin": 0.002266407012939453,
"fcm_dpo/q_t": 0.4999309778213501,
"grad_norm": 40.0953483581543,
"learning_rate": 1.4925373134328357e-08,
"logits/chosen": 0.0993770956993103,
"logits/rejected": 0.06136491894721985,
"logps/chosen": -65.4115219116211,
"logps/ref_chosen": -65.45726013183594,
"logps/ref_rejected": -90.82853698730469,
"logps/rejected": -90.78506469726562,
"loss": 1.3859,
"margin_dpo/margin_mean": 0.0022667646408081055,
"margin_dpo/margin_std": 0.26775944232940674,
"step": 3
},
{
"epoch": 0.006046863189720333,
"fcm_dpo/beta": 0.14178214967250824,
"fcm_dpo/delta": 0.4972817301750183,
"fcm_dpo/margin": -0.031194627285003662,
"fcm_dpo/q_t": 0.5010493993759155,
"grad_norm": 48.728790283203125,
"learning_rate": 2.2388059701492534e-08,
"logits/chosen": 0.10073457658290863,
"logits/rejected": 0.08476720750331879,
"logps/chosen": -76.87289428710938,
"logps/ref_chosen": -76.86018371582031,
"logps/ref_rejected": -79.91523742675781,
"logps/rejected": -79.8967514038086,
"loss": 1.3908,
"margin_dpo/margin_mean": -0.031194984912872314,
"margin_dpo/margin_std": 0.3357463479042053,
"step": 4
},
{
"epoch": 0.007558578987150416,
"fcm_dpo/beta": 0.1644459217786789,
"fcm_dpo/delta": 0.49898889660835266,
"fcm_dpo/margin": -0.022104412317276,
"fcm_dpo/q_t": 0.5009359121322632,
"grad_norm": 48.499725341796875,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": 0.08101461827754974,
"logits/rejected": 0.04222995042800903,
"logps/chosen": -62.999996185302734,
"logps/ref_chosen": -62.97134017944336,
"logps/ref_rejected": -79.9192123413086,
"logps/rejected": -79.92576599121094,
"loss": 1.3905,
"margin_dpo/margin_mean": -0.02210336923599243,
"margin_dpo/margin_std": 0.283627986907959,
"step": 5
},
{
"epoch": 0.009070294784580499,
"fcm_dpo/beta": 0.1644459217786789,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.07127216458320618,
"fcm_dpo/q_t": 0.5029286742210388,
"grad_norm": 48.75336456298828,
"learning_rate": 3.731343283582089e-08,
"logits/chosen": 0.13917648792266846,
"logits/rejected": 0.09997415542602539,
"logps/chosen": -51.33795166015625,
"logps/ref_chosen": -51.30736541748047,
"logps/ref_rejected": -82.77239227294922,
"logps/rejected": -82.73170471191406,
"loss": 1.3986,
"margin_dpo/margin_mean": -0.07127270102500916,
"margin_dpo/margin_std": 0.29276320338249207,
"step": 6
},
{
"epoch": 0.010582010582010581,
"fcm_dpo/beta": 0.1816796362400055,
"fcm_dpo/delta": 0.4983155131340027,
"fcm_dpo/margin": -0.0058057308197021484,
"fcm_dpo/q_t": 0.500307023525238,
"grad_norm": 49.4698600769043,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": 0.017587212845683098,
"logits/rejected": -0.02612943761050701,
"logps/chosen": -51.4460334777832,
"logps/ref_chosen": -51.45941162109375,
"logps/ref_rejected": -66.3828125,
"logps/rejected": -66.3636245727539,
"loss": 1.388,
"margin_dpo/margin_mean": -0.005805850028991699,
"margin_dpo/margin_std": 0.2854662537574768,
"step": 7
},
{
"epoch": 0.012093726379440665,
"fcm_dpo/beta": 0.1816796362400055,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.029055893421173096,
"fcm_dpo/q_t": 0.5013211965560913,
"grad_norm": 52.10679244995117,
"learning_rate": 5.223880597014925e-08,
"logits/chosen": 0.09742860496044159,
"logits/rejected": 0.07472395896911621,
"logps/chosen": -62.214691162109375,
"logps/ref_chosen": -62.197547912597656,
"logps/ref_rejected": -74.66180419921875,
"logps/rejected": -74.64989471435547,
"loss": 1.3925,
"margin_dpo/margin_mean": -0.02905610203742981,
"margin_dpo/margin_std": 0.3362266421318054,
"step": 8
},
{
"epoch": 0.013605442176870748,
"fcm_dpo/beta": 0.21118226647377014,
"fcm_dpo/delta": 0.9957462549209595,
"fcm_dpo/margin": 0.02298596501350403,
"fcm_dpo/q_t": 0.498937726020813,
"grad_norm": 66.31742858886719,
"learning_rate": 5.970149253731343e-08,
"logits/chosen": 0.17290404438972473,
"logits/rejected": 0.1132848858833313,
"logps/chosen": -55.65576934814453,
"logps/ref_chosen": -55.629722595214844,
"logps/ref_rejected": -86.21221923828125,
"logps/rejected": -86.2612533569336,
"loss": 1.3826,
"margin_dpo/margin_mean": 0.02298620343208313,
"margin_dpo/margin_std": 0.30049267411231995,
"step": 9
},
{
"epoch": 0.015117157974300832,
"fcm_dpo/beta": 0.24479255080223083,
"fcm_dpo/delta": 0.4950849413871765,
"fcm_dpo/margin": 0.01253315806388855,
"fcm_dpo/q_t": 0.4993648827075958,
"grad_norm": 72.88284301757812,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": 0.11379828304052353,
"logits/rejected": 0.08385583758354187,
"logps/chosen": -62.68762969970703,
"logps/ref_chosen": -62.69060134887695,
"logps/ref_rejected": -90.610107421875,
"logps/rejected": -90.61967468261719,
"loss": 1.3853,
"margin_dpo/margin_mean": 0.01253288984298706,
"margin_dpo/margin_std": 0.3701857328414917,
"step": 10
},
{
"epoch": 0.016628873771730914,
"fcm_dpo/beta": 0.26968804001808167,
"fcm_dpo/delta": 0.484273761510849,
"fcm_dpo/margin": 0.04408371448516846,
"fcm_dpo/q_t": 0.4974249601364136,
"grad_norm": 79.30926513671875,
"learning_rate": 7.462686567164178e-08,
"logits/chosen": 0.10774752497673035,
"logits/rejected": 0.10090956091880798,
"logps/chosen": -65.75579071044922,
"logps/ref_chosen": -65.76712036132812,
"logps/ref_rejected": -72.4764633178711,
"logps/rejected": -72.50921630859375,
"loss": 1.376,
"margin_dpo/margin_mean": 0.04408392310142517,
"margin_dpo/margin_std": 0.2787271738052368,
"step": 11
},
{
"epoch": 0.018140589569160998,
"fcm_dpo/beta": 0.26968804001808167,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.013816118240356445,
"fcm_dpo/q_t": 0.5009292364120483,
"grad_norm": 76.66712951660156,
"learning_rate": 8.208955223880596e-08,
"logits/chosen": 0.03750212490558624,
"logits/rejected": 0.02106173150241375,
"logps/chosen": -60.710899353027344,
"logps/ref_chosen": -60.704891204833984,
"logps/ref_rejected": -69.41564178466797,
"logps/rejected": -69.4078369140625,
"loss": 1.3914,
"margin_dpo/margin_mean": -0.013815999031066895,
"margin_dpo/margin_std": 0.2728922367095947,
"step": 12
},
{
"epoch": 0.019652305366591082,
"fcm_dpo/beta": 0.26968804001808167,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.035725027322769165,
"fcm_dpo/q_t": 0.5023995637893677,
"grad_norm": 78.57530212402344,
"learning_rate": 8.955223880597014e-08,
"logits/chosen": 0.10675495862960815,
"logits/rejected": 0.044550854712724686,
"logps/chosen": -49.91514587402344,
"logps/ref_chosen": -49.90925598144531,
"logps/ref_rejected": -92.37818145751953,
"logps/rejected": -92.34834289550781,
"loss": 1.3971,
"margin_dpo/margin_mean": -0.03572601079940796,
"margin_dpo/margin_std": 0.24918314814567566,
"step": 13
},
{
"epoch": 0.021164021164021163,
"fcm_dpo/beta": 0.31274157762527466,
"fcm_dpo/delta": 0.9786568880081177,
"fcm_dpo/margin": 0.075018972158432,
"fcm_dpo/q_t": 0.49468034505844116,
"grad_norm": 91.86367797851562,
"learning_rate": 9.701492537313432e-08,
"logits/chosen": 0.08078277111053467,
"logits/rejected": 0.06322959065437317,
"logps/chosen": -60.599849700927734,
"logps/ref_chosen": -60.61879348754883,
"logps/ref_rejected": -71.79306030273438,
"logps/rejected": -71.84913635253906,
"loss": 1.3649,
"margin_dpo/margin_mean": 0.07501909136772156,
"margin_dpo/margin_std": 0.2812075614929199,
"step": 14
},
{
"epoch": 0.022675736961451247,
"fcm_dpo/beta": 0.3279946446418762,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.02524346113204956,
"fcm_dpo/q_t": 0.5020579099655151,
"grad_norm": 109.66602325439453,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": 0.06694771349430084,
"logits/rejected": 0.023975659161806107,
"logps/chosen": -63.49495315551758,
"logps/ref_chosen": -63.46953582763672,
"logps/ref_rejected": -88.88951110839844,
"logps/rejected": -88.88968658447266,
"loss": 1.3976,
"margin_dpo/margin_mean": -0.025244086980819702,
"margin_dpo/margin_std": 0.3348177969455719,
"step": 15
},
{
"epoch": 0.02418745275888133,
"fcm_dpo/beta": 0.34503084421157837,
"fcm_dpo/delta": 0.4941604435443878,
"fcm_dpo/margin": 0.014522925019264221,
"fcm_dpo/q_t": 0.4988110661506653,
"grad_norm": 93.19413757324219,
"learning_rate": 1.1194029850746268e-07,
"logits/chosen": 0.11576000601053238,
"logits/rejected": 0.07864248752593994,
"logps/chosen": -46.55487823486328,
"logps/ref_chosen": -46.53229904174805,
"logps/ref_rejected": -74.27533721923828,
"logps/rejected": -74.31243896484375,
"loss": 1.3833,
"margin_dpo/margin_mean": 0.014522776007652283,
"margin_dpo/margin_std": 0.27633634209632874,
"step": 16
},
{
"epoch": 0.025699168556311415,
"fcm_dpo/beta": 0.39912450313568115,
"fcm_dpo/delta": 0.487219899892807,
"fcm_dpo/margin": -0.00017780065536499023,
"fcm_dpo/q_t": 0.5003235936164856,
"grad_norm": 134.72512817382812,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": 0.04781803488731384,
"logits/rejected": 0.02929597906768322,
"logps/chosen": -64.07421875,
"logps/ref_chosen": -64.07783508300781,
"logps/ref_rejected": -86.40876770019531,
"logps/rejected": -86.40497589111328,
"loss": 1.3906,
"margin_dpo/margin_mean": -0.00017789006233215332,
"margin_dpo/margin_std": 0.3190717101097107,
"step": 17
},
{
"epoch": 0.027210884353741496,
"fcm_dpo/beta": 0.4197884202003479,
"fcm_dpo/delta": 0.4926441013813019,
"fcm_dpo/margin": -0.029925107955932617,
"fcm_dpo/q_t": 0.5029613971710205,
"grad_norm": 120.34202575683594,
"learning_rate": 1.2686567164179106e-07,
"logits/chosen": 0.10955735296010971,
"logits/rejected": 0.06258425116539001,
"logps/chosen": -44.86344528198242,
"logps/ref_chosen": -44.87433624267578,
"logps/ref_rejected": -70.97604370117188,
"logps/rejected": -70.93523406982422,
"loss": 1.4019,
"margin_dpo/margin_mean": -0.029924869537353516,
"margin_dpo/margin_std": 0.30913057923316956,
"step": 18
},
{
"epoch": 0.02872260015117158,
"fcm_dpo/beta": 0.5113855004310608,
"fcm_dpo/delta": 0.9829479455947876,
"fcm_dpo/margin": 0.03535567224025726,
"fcm_dpo/q_t": 0.4958151578903198,
"grad_norm": 156.30233764648438,
"learning_rate": 1.343283582089552e-07,
"logits/chosen": 0.07092909514904022,
"logits/rejected": 0.05735887587070465,
"logps/chosen": -68.16175842285156,
"logps/ref_chosen": -68.1598129272461,
"logps/ref_rejected": -81.17138671875,
"logps/rejected": -81.20869445800781,
"loss": 1.3751,
"margin_dpo/margin_mean": 0.03535632789134979,
"margin_dpo/margin_std": 0.33560460805892944,
"step": 19
},
{
"epoch": 0.030234315948601664,
"fcm_dpo/beta": 0.621636152267456,
"fcm_dpo/delta": 0.9787266254425049,
"fcm_dpo/margin": 0.03790883719921112,
"fcm_dpo/q_t": 0.49468833208084106,
"grad_norm": 184.15403747558594,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": 0.11737000942230225,
"logits/rejected": 0.09504500031471252,
"logps/chosen": -53.65413284301758,
"logps/ref_chosen": -53.67856216430664,
"logps/ref_rejected": -74.16911315917969,
"logps/rejected": -74.18260192871094,
"loss": 1.3704,
"margin_dpo/margin_mean": 0.03790910542011261,
"margin_dpo/margin_std": 0.27916550636291504,
"step": 20
},
{
"epoch": 0.031746031746031744,
"fcm_dpo/beta": 0.719708263874054,
"fcm_dpo/delta": 0.49354374408721924,
"fcm_dpo/margin": 0.006038039922714233,
"fcm_dpo/q_t": 0.49931585788726807,
"grad_norm": 208.18948364257812,
"learning_rate": 1.4925373134328355e-07,
"logits/chosen": 0.09322724491357803,
"logits/rejected": 0.06828559935092926,
"logps/chosen": -64.68141174316406,
"logps/ref_chosen": -64.70155334472656,
"logps/ref_rejected": -81.02095031738281,
"logps/rejected": -81.0068359375,
"loss": 1.397,
"margin_dpo/margin_mean": 0.0060374438762664795,
"margin_dpo/margin_std": 0.34163737297058105,
"step": 21
},
{
"epoch": 0.03325774754346183,
"fcm_dpo/beta": 0.7948847413063049,
"fcm_dpo/delta": 0.4967557489871979,
"fcm_dpo/margin": -0.008793145418167114,
"fcm_dpo/q_t": 0.501756489276886,
"grad_norm": 235.72225952148438,
"learning_rate": 1.5671641791044775e-07,
"logits/chosen": 0.020929213613271713,
"logits/rejected": -0.0007745649782009423,
"logps/chosen": -58.05523681640625,
"logps/ref_chosen": -58.03599166870117,
"logps/ref_rejected": -80.72721862792969,
"logps/rejected": -80.7376708984375,
"loss": 1.4021,
"margin_dpo/margin_mean": -0.008793413639068604,
"margin_dpo/margin_std": 0.23543663322925568,
"step": 22
},
{
"epoch": 0.03476946334089191,
"fcm_dpo/beta": 0.8356242179870605,
"fcm_dpo/delta": 0.4879206120967865,
"fcm_dpo/margin": -0.007578670978546143,
"fcm_dpo/q_t": 0.5014785528182983,
"grad_norm": 285.7133483886719,
"learning_rate": 1.6417910447761193e-07,
"logits/chosen": 0.1404346227645874,
"logits/rejected": 0.11506737768650055,
"logps/chosen": -66.38188934326172,
"logps/ref_chosen": -66.35608673095703,
"logps/ref_rejected": -93.02769470214844,
"logps/rejected": -93.04591369628906,
"loss": 1.4157,
"margin_dpo/margin_mean": -0.007578998804092407,
"margin_dpo/margin_std": 0.3775022029876709,
"step": 23
},
{
"epoch": 0.036281179138321996,
"fcm_dpo/beta": 0.8763637542724609,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.023401737213134766,
"fcm_dpo/q_t": 0.5050784349441528,
"grad_norm": 237.49981689453125,
"learning_rate": 1.716417910447761e-07,
"logits/chosen": 0.13775691390037537,
"logits/rejected": 0.10474318265914917,
"logps/chosen": -54.4842529296875,
"logps/ref_chosen": -54.461238861083984,
"logps/ref_rejected": -68.33817291259766,
"logps/rejected": -68.33778381347656,
"loss": 1.4189,
"margin_dpo/margin_mean": -0.02340218424797058,
"margin_dpo/margin_std": 0.2496114820241928,
"step": 24
},
{
"epoch": 0.03779289493575208,
"fcm_dpo/beta": 1.0109740495681763,
"fcm_dpo/delta": 0.933335542678833,
"fcm_dpo/margin": 0.07055863738059998,
"fcm_dpo/q_t": 0.48359498381614685,
"grad_norm": 284.9603271484375,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": 0.09323587268590927,
"logits/rejected": 0.04254044592380524,
"logps/chosen": -60.0122184753418,
"logps/ref_chosen": -60.00420379638672,
"logps/ref_rejected": -90.47376251220703,
"logps/rejected": -90.55233764648438,
"loss": 1.3345,
"margin_dpo/margin_mean": 0.07055890560150146,
"margin_dpo/margin_std": 0.27643194794654846,
"step": 25
},
{
"epoch": 0.039304610733182165,
"fcm_dpo/beta": 1.2268295288085938,
"fcm_dpo/delta": 0.9872031211853027,
"fcm_dpo/margin": 0.011123806238174438,
"fcm_dpo/q_t": 0.49779731035232544,
"grad_norm": 363.8032531738281,
"learning_rate": 1.8656716417910447e-07,
"logits/chosen": 0.12300634384155273,
"logits/rejected": 0.1041463315486908,
"logps/chosen": -56.83332061767578,
"logps/ref_chosen": -56.81915283203125,
"logps/ref_rejected": -77.84333038330078,
"logps/rejected": -77.86862182617188,
"loss": 1.418,
"margin_dpo/margin_mean": 0.011123299598693848,
"margin_dpo/margin_std": 0.3511189818382263,
"step": 26
},
{
"epoch": 0.04081632653061224,
"fcm_dpo/beta": 1.4186409711837769,
"fcm_dpo/delta": 0.4878283143043518,
"fcm_dpo/margin": -0.018930166959762573,
"fcm_dpo/q_t": 0.5060181617736816,
"grad_norm": 426.4020690917969,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": 0.10950794070959091,
"logits/rejected": 0.08432013541460037,
"logps/chosen": -62.87256622314453,
"logps/ref_chosen": -62.87702560424805,
"logps/ref_rejected": -71.34437561035156,
"logps/rejected": -71.32098388671875,
"loss": 1.465,
"margin_dpo/margin_mean": -0.0189303457736969,
"margin_dpo/margin_std": 0.3256291151046753,
"step": 27
},
{
"epoch": 0.042328042328042326,
"fcm_dpo/beta": 1.5392485857009888,
"fcm_dpo/delta": 0.40797513723373413,
"fcm_dpo/margin": 0.019986987113952637,
"fcm_dpo/q_t": 0.49472981691360474,
"grad_norm": 428.8187561035156,
"learning_rate": 2.0149253731343282e-07,
"logits/chosen": 0.05481361597776413,
"logits/rejected": 0.04620601236820221,
"logps/chosen": -59.83418273925781,
"logps/ref_chosen": -59.8333740234375,
"logps/ref_rejected": -70.39804077148438,
"logps/rejected": -70.4188232421875,
"loss": 1.4194,
"margin_dpo/margin_mean": 0.019986748695373535,
"margin_dpo/margin_std": 0.31196290254592896,
"step": 28
},
{
"epoch": 0.04383975812547241,
"fcm_dpo/beta": 1.7656760215759277,
"fcm_dpo/delta": 0.9155327081680298,
"fcm_dpo/margin": 0.05319638550281525,
"fcm_dpo/q_t": 0.47983771562576294,
"grad_norm": 544.4055786132812,
"learning_rate": 2.08955223880597e-07,
"logits/chosen": 0.13988614082336426,
"logits/rejected": 0.12211866676807404,
"logps/chosen": -74.13853454589844,
"logps/ref_chosen": -74.12020111083984,
"logps/ref_rejected": -83.33099365234375,
"logps/rejected": -83.40251159667969,
"loss": 1.3481,
"margin_dpo/margin_mean": 0.053196460008621216,
"margin_dpo/margin_std": 0.2625643312931061,
"step": 29
},
{
"epoch": 0.045351473922902494,
"fcm_dpo/beta": 2.0389435291290283,
"fcm_dpo/delta": 0.4901547431945801,
"fcm_dpo/margin": -0.004268288612365723,
"fcm_dpo/q_t": 0.5039973258972168,
"grad_norm": 641.551025390625,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": 0.13575318455696106,
"logits/rejected": 0.08044232428073883,
"logps/chosen": -50.786643981933594,
"logps/ref_chosen": -50.75128936767578,
"logps/ref_rejected": -89.29063415527344,
"logps/rejected": -89.32171630859375,
"loss": 1.4942,
"margin_dpo/margin_mean": -0.004268676042556763,
"margin_dpo/margin_std": 0.32202666997909546,
"step": 30
},
{
"epoch": 0.04686318972033258,
"fcm_dpo/beta": 2.3525500297546387,
"fcm_dpo/delta": 0.93133544921875,
"fcm_dpo/margin": 0.0310114324092865,
"fcm_dpo/q_t": 0.4873350262641907,
"grad_norm": 821.974365234375,
"learning_rate": 2.2388059701492537e-07,
"logits/chosen": 0.10764593631029129,
"logits/rejected": 0.061387479305267334,
"logps/chosen": -65.36897277832031,
"logps/ref_chosen": -65.33675384521484,
"logps/ref_rejected": -100.76666259765625,
"logps/rejected": -100.82989501953125,
"loss": 1.4747,
"margin_dpo/margin_mean": 0.03101155161857605,
"margin_dpo/margin_std": 0.35841095447540283,
"step": 31
},
{
"epoch": 0.04837490551776266,
"fcm_dpo/beta": 2.5499143600463867,
"fcm_dpo/delta": 0.3668806552886963,
"fcm_dpo/margin": 0.045269906520843506,
"fcm_dpo/q_t": 0.47517523169517517,
"grad_norm": 814.4436645507812,
"learning_rate": 2.3134328358208954e-07,
"logits/chosen": 0.10604210197925568,
"logits/rejected": 0.09800020605325699,
"logps/chosen": -67.19169616699219,
"logps/ref_chosen": -67.18333435058594,
"logps/ref_rejected": -82.80763244628906,
"logps/rejected": -82.86127471923828,
"loss": 1.4012,
"margin_dpo/margin_mean": 0.045270055532455444,
"margin_dpo/margin_std": 0.2860063314437866,
"step": 32
},
{
"epoch": 0.049886621315192746,
"fcm_dpo/beta": 2.7437379360198975,
"fcm_dpo/delta": 0.3657742738723755,
"fcm_dpo/margin": 0.042653635144233704,
"fcm_dpo/q_t": 0.4748280644416809,
"grad_norm": 934.1159057617188,
"learning_rate": 2.388059701492537e-07,
"logits/chosen": 0.0492779016494751,
"logits/rejected": 0.022379783913493156,
"logps/chosen": -64.0654067993164,
"logps/ref_chosen": -64.03948211669922,
"logps/ref_rejected": -75.68357849121094,
"logps/rejected": -75.75216674804688,
"loss": 1.4557,
"margin_dpo/margin_mean": 0.042654380202293396,
"margin_dpo/margin_std": 0.3218376338481903,
"step": 33
},
{
"epoch": 0.05139833711262283,
"fcm_dpo/beta": 3.2462871074676514,
"fcm_dpo/delta": 0.868577241897583,
"fcm_dpo/margin": 0.04391145706176758,
"fcm_dpo/q_t": 0.47198355197906494,
"grad_norm": 959.0977172851562,
"learning_rate": 2.4626865671641786e-07,
"logits/chosen": 0.0971483588218689,
"logits/rejected": 0.0673779547214508,
"logps/chosen": -53.695762634277344,
"logps/ref_chosen": -53.6642951965332,
"logps/ref_rejected": -65.77989959716797,
"logps/rejected": -65.85527038574219,
"loss": 1.4312,
"margin_dpo/margin_mean": 0.04391142725944519,
"margin_dpo/margin_std": 0.2787018120288849,
"step": 34
},
{
"epoch": 0.05291005291005291,
"fcm_dpo/beta": 3.5613765716552734,
"fcm_dpo/delta": 0.49954742193222046,
"fcm_dpo/margin": 0.0079115629196167,
"fcm_dpo/q_t": 0.4954897165298462,
"grad_norm": 1141.895751953125,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": 0.04493723437190056,
"logits/rejected": 0.022909432649612427,
"logps/chosen": -61.07041549682617,
"logps/ref_chosen": -61.01686096191406,
"logps/ref_rejected": -72.78598022460938,
"logps/rejected": -72.84745025634766,
"loss": 1.6389,
"margin_dpo/margin_mean": 0.0079115629196167,
"margin_dpo/margin_std": 0.32450151443481445,
"step": 35
},
{
"epoch": 0.05442176870748299,
"fcm_dpo/beta": 3.89394474029541,
"fcm_dpo/delta": 0.39777103066444397,
"fcm_dpo/margin": 0.013609737157821655,
"fcm_dpo/q_t": 0.5010133981704712,
"grad_norm": 1230.4146728515625,
"learning_rate": 2.611940298507462e-07,
"logits/chosen": 0.1099657341837883,
"logits/rejected": 0.056441109627485275,
"logps/chosen": -50.61913299560547,
"logps/ref_chosen": -50.53736114501953,
"logps/ref_rejected": -78.11678314208984,
"logps/rejected": -78.212158203125,
"loss": 1.6768,
"margin_dpo/margin_mean": 0.013609647750854492,
"margin_dpo/margin_std": 0.32924020290374756,
"step": 36
},
{
"epoch": 0.055933484504913075,
"fcm_dpo/beta": 4.411220550537109,
"fcm_dpo/delta": 0.5480049252510071,
"fcm_dpo/margin": 0.10784178972244263,
"fcm_dpo/q_t": 0.4541955292224884,
"grad_norm": 1664.73095703125,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": 0.08195307105779648,
"logits/rejected": 0.004701277241110802,
"logps/chosen": -59.608680725097656,
"logps/ref_chosen": -59.55394744873047,
"logps/ref_rejected": -108.27702331542969,
"logps/rejected": -108.43960571289062,
"loss": 1.4659,
"margin_dpo/margin_mean": 0.10784146189689636,
"margin_dpo/margin_std": 0.4072269797325134,
"step": 37
},
{
"epoch": 0.05744520030234316,
"fcm_dpo/beta": 4.725587844848633,
"fcm_dpo/delta": 0.22491098940372467,
"fcm_dpo/margin": 0.04019525647163391,
"fcm_dpo/q_t": 0.4804103374481201,
"grad_norm": 1523.71044921875,
"learning_rate": 2.761194029850746e-07,
"logits/chosen": 0.056998323649168015,
"logits/rejected": 0.04311235621571541,
"logps/chosen": -65.85867309570312,
"logps/ref_chosen": -65.78836059570312,
"logps/ref_rejected": -76.1619873046875,
"logps/rejected": -76.27249908447266,
"loss": 1.7981,
"margin_dpo/margin_mean": 0.040194928646087646,
"margin_dpo/margin_std": 0.36567050218582153,
"step": 38
},
{
"epoch": 0.05895691609977324,
"fcm_dpo/beta": 5.4121809005737305,
"fcm_dpo/delta": 0.8187992572784424,
"fcm_dpo/margin": 0.03373938798904419,
"fcm_dpo/q_t": 0.4426850378513336,
"grad_norm": 1836.44189453125,
"learning_rate": 2.8358208955223876e-07,
"logits/chosen": 0.14140446484088898,
"logits/rejected": 0.11523914337158203,
"logps/chosen": -57.26402282714844,
"logps/ref_chosen": -57.17681121826172,
"logps/ref_rejected": -79.486328125,
"logps/rejected": -79.60729217529297,
"loss": 1.9268,
"margin_dpo/margin_mean": 0.033740073442459106,
"margin_dpo/margin_std": 0.3646671772003174,
"step": 39
},
{
"epoch": 0.06046863189720333,
"fcm_dpo/beta": 5.907527923583984,
"fcm_dpo/delta": 0.297378271818161,
"fcm_dpo/margin": 0.006361484527587891,
"fcm_dpo/q_t": 0.4902653098106384,
"grad_norm": 2190.686767578125,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": 0.10774768888950348,
"logits/rejected": 0.058216311037540436,
"logps/chosen": -61.42626953125,
"logps/ref_chosen": -61.33416748046875,
"logps/ref_rejected": -79.10697174072266,
"logps/rejected": -79.20543670654297,
"loss": 2.1253,
"margin_dpo/margin_mean": 0.00636136531829834,
"margin_dpo/margin_std": 0.3456147611141205,
"step": 40
},
{
"epoch": 0.06198034769463341,
"fcm_dpo/beta": 6.096949100494385,
"fcm_dpo/delta": 0.3107817769050598,
"fcm_dpo/margin": 0.03788435459136963,
"fcm_dpo/q_t": 0.4918346107006073,
"grad_norm": 2313.122314453125,
"learning_rate": 2.985074626865671e-07,
"logits/chosen": 0.06469070911407471,
"logits/rejected": 0.04410509765148163,
"logps/chosen": -67.65518188476562,
"logps/ref_chosen": -67.5467300415039,
"logps/ref_rejected": -83.87788391113281,
"logps/rejected": -84.02423095703125,
"loss": 2.1107,
"margin_dpo/margin_mean": 0.03788486123085022,
"margin_dpo/margin_std": 0.3886951506137848,
"step": 41
},
{
"epoch": 0.06349206349206349,
"fcm_dpo/beta": 6.579242706298828,
"fcm_dpo/delta": 0.22767893970012665,
"fcm_dpo/margin": 0.00848454236984253,
"fcm_dpo/q_t": 0.4764997959136963,
"grad_norm": 2254.65283203125,
"learning_rate": 3.059701492537313e-07,
"logits/chosen": 0.04841721057891846,
"logits/rejected": 0.026929516345262527,
"logps/chosen": -61.369564056396484,
"logps/ref_chosen": -61.26485824584961,
"logps/ref_rejected": -76.3629150390625,
"logps/rejected": -76.47610473632812,
"loss": 2.2164,
"margin_dpo/margin_mean": 0.008484512567520142,
"margin_dpo/margin_std": 0.33282727003097534,
"step": 42
},
{
"epoch": 0.06500377928949358,
"fcm_dpo/beta": 7.226245880126953,
"fcm_dpo/delta": 0.5711226463317871,
"fcm_dpo/margin": 0.06204667687416077,
"fcm_dpo/q_t": 0.44025903940200806,
"grad_norm": 2534.08984375,
"learning_rate": 3.134328358208955e-07,
"logits/chosen": 0.062497012317180634,
"logits/rejected": 0.051983729004859924,
"logps/chosen": -71.88862609863281,
"logps/ref_chosen": -71.80902862548828,
"logps/ref_rejected": -81.12464141845703,
"logps/rejected": -81.26628875732422,
"loss": 1.9347,
"margin_dpo/margin_mean": 0.0620463490486145,
"margin_dpo/margin_std": 0.32832396030426025,
"step": 43
},
{
"epoch": 0.06651549508692366,
"fcm_dpo/beta": 7.3753557205200195,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.003143906593322754,
"fcm_dpo/q_t": 0.5123412609100342,
"grad_norm": 3110.311279296875,
"learning_rate": 3.2089552238805965e-07,
"logits/chosen": 0.058967474848032,
"logits/rejected": 0.02790246158838272,
"logps/chosen": -66.68571472167969,
"logps/ref_chosen": -66.55043029785156,
"logps/ref_rejected": -85.06198120117188,
"logps/rejected": -85.19412994384766,
"loss": 2.7173,
"margin_dpo/margin_mean": -0.00314408540725708,
"margin_dpo/margin_std": 0.39732399582862854,
"step": 44
},
{
"epoch": 0.06802721088435375,
"fcm_dpo/beta": 7.334336280822754,
"fcm_dpo/delta": -0.0020287036895751953,
"fcm_dpo/margin": 0.13652004301548004,
"fcm_dpo/q_t": 0.35875630378723145,
"grad_norm": 1952.19775390625,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": 0.10706457495689392,
"logits/rejected": 0.05497971177101135,
"logps/chosen": -62.34738540649414,
"logps/ref_chosen": -62.24385452270508,
"logps/ref_rejected": -92.96665954589844,
"logps/rejected": -93.20671081542969,
"loss": 1.5806,
"margin_dpo/margin_mean": 0.1365204155445099,
"margin_dpo/margin_std": 0.33906716108322144,
"step": 45
},
{
"epoch": 0.06953892668178382,
"fcm_dpo/beta": 7.5740742683410645,
"fcm_dpo/delta": 0.09216267615556717,
"fcm_dpo/margin": 0.12037345767021179,
"fcm_dpo/q_t": 0.4014553129673004,
"grad_norm": 2483.462646484375,
"learning_rate": 3.3582089552238805e-07,
"logits/chosen": 0.10916443914175034,
"logits/rejected": 0.06387359648942947,
"logps/chosen": -61.58867263793945,
"logps/ref_chosen": -61.498905181884766,
"logps/ref_rejected": -78.91172790527344,
"logps/rejected": -79.12187194824219,
"loss": 1.8488,
"margin_dpo/margin_mean": 0.12037333846092224,
"margin_dpo/margin_std": 0.37311580777168274,
"step": 46
},
{
"epoch": 0.0710506424792139,
"fcm_dpo/beta": 7.277153968811035,
"fcm_dpo/delta": -0.1604071408510208,
"fcm_dpo/margin": 0.15663331747055054,
"fcm_dpo/q_t": 0.3513562083244324,
"grad_norm": 1870.4586181640625,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": 0.04342661425471306,
"logits/rejected": -0.00011028675362467766,
"logps/chosen": -51.65697479248047,
"logps/ref_chosen": -51.578346252441406,
"logps/ref_rejected": -68.2215576171875,
"logps/rejected": -68.45681762695312,
"loss": 1.3277,
"margin_dpo/margin_mean": 0.1566331386566162,
"margin_dpo/margin_std": 0.3138054609298706,
"step": 47
},
{
"epoch": 0.07256235827664399,
"fcm_dpo/beta": 8.184296607971191,
"fcm_dpo/delta": 0.7369337677955627,
"fcm_dpo/margin": 0.03382202982902527,
"fcm_dpo/q_t": 0.485114187002182,
"grad_norm": 2785.64453125,
"learning_rate": 3.507462686567164e-07,
"logits/chosen": 0.16853903234004974,
"logits/rejected": 0.1380309760570526,
"logps/chosen": -51.94841003417969,
"logps/ref_chosen": -51.79365158081055,
"logps/ref_rejected": -64.22503662109375,
"logps/rejected": -64.41361999511719,
"loss": 2.617,
"margin_dpo/margin_mean": 0.03382223844528198,
"margin_dpo/margin_std": 0.38840365409851074,
"step": 48
},
{
"epoch": 0.07407407407407407,
"fcm_dpo/beta": 9.334107398986816,
"fcm_dpo/delta": 0.5954843163490295,
"fcm_dpo/margin": 0.044813498854637146,
"fcm_dpo/q_t": 0.4577757716178894,
"grad_norm": 2963.8173828125,
"learning_rate": 3.5820895522388055e-07,
"logits/chosen": 0.038059771060943604,
"logits/rejected": 0.01622236706316471,
"logps/chosen": -58.26384353637695,
"logps/ref_chosen": -58.13460159301758,
"logps/ref_rejected": -64.63206481933594,
"logps/rejected": -64.80612182617188,
"loss": 2.5832,
"margin_dpo/margin_mean": 0.04481416940689087,
"margin_dpo/margin_std": 0.3690647482872009,
"step": 49
},
{
"epoch": 0.07558578987150416,
"fcm_dpo/beta": 9.334890365600586,
"fcm_dpo/delta": -0.06085062772035599,
"fcm_dpo/margin": 0.11278587579727173,
"fcm_dpo/q_t": 0.4165031909942627,
"grad_norm": 2980.169921875,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": 0.11696229875087738,
"logits/rejected": 0.08666031062602997,
"logps/chosen": -52.99673080444336,
"logps/ref_chosen": -52.85643768310547,
"logps/ref_rejected": -72.17460632324219,
"logps/rejected": -72.42768859863281,
"loss": 2.2159,
"margin_dpo/margin_mean": 0.11278638243675232,
"margin_dpo/margin_std": 0.39860716462135315,
"step": 50
},
{
"epoch": 0.07709750566893424,
"fcm_dpo/beta": 8.363540649414062,
"fcm_dpo/delta": -0.5726056098937988,
"fcm_dpo/margin": 0.15886437892913818,
"fcm_dpo/q_t": 0.4000622630119324,
"grad_norm": 2550.51953125,
"learning_rate": 3.7313432835820895e-07,
"logits/chosen": 0.08093120157718658,
"logits/rejected": 0.05302312225103378,
"logps/chosen": -63.80950927734375,
"logps/ref_chosen": -63.65644073486328,
"logps/ref_rejected": -86.13229370117188,
"logps/rejected": -86.44422912597656,
"loss": 1.8986,
"margin_dpo/margin_mean": 0.15886464715003967,
"margin_dpo/margin_std": 0.420296311378479,
"step": 51
},
{
"epoch": 0.07860922146636433,
"fcm_dpo/beta": 8.45156478881836,
"fcm_dpo/delta": 0.09679965674877167,
"fcm_dpo/margin": 0.10791899263858795,
"fcm_dpo/q_t": 0.4053017497062683,
"grad_norm": 3315.59912109375,
"learning_rate": 3.805970149253731e-07,
"logits/chosen": 0.07245868444442749,
"logits/rejected": 0.023480474948883057,
"logps/chosen": -68.0364990234375,
"logps/ref_chosen": -67.8402099609375,
"logps/ref_rejected": -96.97090911865234,
"logps/rejected": -97.27511596679688,
"loss": 2.2996,
"margin_dpo/margin_mean": 0.10791890323162079,
"margin_dpo/margin_std": 0.4656970500946045,
"step": 52
},
{
"epoch": 0.0801209372637944,
"fcm_dpo/beta": 8.50004768371582,
"fcm_dpo/delta": 0.018193505704402924,
"fcm_dpo/margin": 0.11565050482749939,
"fcm_dpo/q_t": 0.371160626411438,
"grad_norm": 2222.445556640625,
"learning_rate": 3.880597014925373e-07,
"logits/chosen": 0.07711566984653473,
"logits/rejected": 0.06644274294376373,
"logps/chosen": -57.06134796142578,
"logps/ref_chosen": -56.87813949584961,
"logps/ref_rejected": -60.75569152832031,
"logps/rejected": -61.05455017089844,
"loss": 1.7873,
"margin_dpo/margin_mean": 0.11565083265304565,
"margin_dpo/margin_std": 0.32278263568878174,
"step": 53
},
{
"epoch": 0.08163265306122448,
"fcm_dpo/beta": 8.553638458251953,
"fcm_dpo/delta": 0.20764021575450897,
"fcm_dpo/margin": 0.09295859932899475,
"fcm_dpo/q_t": 0.40855568647384644,
"grad_norm": 2532.174072265625,
"learning_rate": 3.9552238805970144e-07,
"logits/chosen": 0.03949524462223053,
"logits/rejected": 0.024343054741621017,
"logps/chosen": -47.497901916503906,
"logps/ref_chosen": -47.26692199707031,
"logps/ref_rejected": -62.19426727294922,
"logps/rejected": -62.51820373535156,
"loss": 2.2098,
"margin_dpo/margin_mean": 0.09295853972434998,
"margin_dpo/margin_std": 0.3836787939071655,
"step": 54
},
{
"epoch": 0.08314436885865457,
"fcm_dpo/beta": 7.794929027557373,
"fcm_dpo/delta": -0.9819808006286621,
"fcm_dpo/margin": 0.23201557993888855,
"fcm_dpo/q_t": 0.3249555230140686,
"grad_norm": 2186.456298828125,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": 0.029716331511735916,
"logits/rejected": -0.04705657809972763,
"logps/chosen": -50.511959075927734,
"logps/ref_chosen": -50.32619094848633,
"logps/ref_rejected": -92.44389343261719,
"logps/rejected": -92.8616714477539,
"loss": 1.4207,
"margin_dpo/margin_mean": 0.2320151925086975,
"margin_dpo/margin_std": 0.42721158266067505,
"step": 55
},
{
"epoch": 0.08465608465608465,
"fcm_dpo/beta": 7.265107154846191,
"fcm_dpo/delta": -0.08848509937524796,
"fcm_dpo/margin": 0.1485108733177185,
"fcm_dpo/q_t": 0.32371559739112854,
"grad_norm": 1776.4510498046875,
"learning_rate": 4.1044776119402984e-07,
"logits/chosen": 0.12203441560268402,
"logits/rejected": 0.09960527718067169,
"logps/chosen": -56.979713439941406,
"logps/ref_chosen": -56.766971588134766,
"logps/ref_rejected": -66.30504608154297,
"logps/rejected": -66.66629791259766,
"loss": 1.5337,
"margin_dpo/margin_mean": 0.1485109031200409,
"margin_dpo/margin_std": 0.36086180806159973,
"step": 56
},
{
"epoch": 0.08616780045351474,
"fcm_dpo/beta": 7.139953136444092,
"fcm_dpo/delta": -0.0027963966131210327,
"fcm_dpo/margin": 0.14022627472877502,
"fcm_dpo/q_t": 0.3702242970466614,
"grad_norm": 2208.6005859375,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": 0.09269669651985168,
"logits/rejected": 0.02815322019159794,
"logps/chosen": -58.0312614440918,
"logps/ref_chosen": -57.76774597167969,
"logps/ref_rejected": -82.75698852539062,
"logps/rejected": -83.16073608398438,
"loss": 1.9812,
"margin_dpo/margin_mean": 0.14022645354270935,
"margin_dpo/margin_std": 0.4531812369823456,
"step": 57
},
{
"epoch": 0.08767951625094482,
"fcm_dpo/beta": 6.859474182128906,
"fcm_dpo/delta": -0.0021596550941467285,
"fcm_dpo/margin": 0.13939353823661804,
"fcm_dpo/q_t": 0.4166187345981598,
"grad_norm": 2155.36083984375,
"learning_rate": 4.253731343283582e-07,
"logits/chosen": 0.04018617421388626,
"logits/rejected": 0.024990694597363472,
"logps/chosen": -72.99790954589844,
"logps/ref_chosen": -72.76408386230469,
"logps/ref_rejected": -84.49275207519531,
"logps/rejected": -84.865966796875,
"loss": 2.1325,
"margin_dpo/margin_mean": 0.13939306139945984,
"margin_dpo/margin_std": 0.4941212832927704,
"step": 58
},
{
"epoch": 0.08919123204837491,
"fcm_dpo/beta": 7.301891326904297,
"fcm_dpo/delta": -0.22430884838104248,
"fcm_dpo/margin": 0.15986737608909607,
"fcm_dpo/q_t": 0.36295658349990845,
"grad_norm": 1929.1551513671875,
"learning_rate": 4.3283582089552234e-07,
"logits/chosen": 0.10927902162075043,
"logits/rejected": 0.04400138556957245,
"logps/chosen": -50.06452178955078,
"logps/ref_chosen": -49.820777893066406,
"logps/ref_rejected": -77.14368438720703,
"logps/rejected": -77.54730224609375,
"loss": 1.5376,
"margin_dpo/margin_mean": 0.15986764430999756,
"margin_dpo/margin_std": 0.36004385352134705,
"step": 59
},
{
"epoch": 0.09070294784580499,
"fcm_dpo/beta": 7.388426780700684,
"fcm_dpo/delta": 0.45878517627716064,
"fcm_dpo/margin": 0.07655715942382812,
"fcm_dpo/q_t": 0.44889310002326965,
"grad_norm": 2771.480712890625,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": 0.09602642804384232,
"logits/rejected": 0.09458990395069122,
"logps/chosen": -63.469207763671875,
"logps/ref_chosen": -63.22477340698242,
"logps/ref_rejected": -61.360477447509766,
"logps/rejected": -61.68146896362305,
"loss": 2.1317,
"margin_dpo/margin_mean": 0.07655695080757141,
"margin_dpo/margin_std": 0.3793250024318695,
"step": 60
},
{
"epoch": 0.09221466364323508,
"fcm_dpo/beta": 7.838181495666504,
"fcm_dpo/delta": 0.23324143886566162,
"fcm_dpo/margin": 0.09999506175518036,
"fcm_dpo/q_t": 0.40534743666648865,
"grad_norm": 2478.5703125,
"learning_rate": 4.4776119402985074e-07,
"logits/chosen": 0.12898309528827667,
"logits/rejected": 0.0962858498096466,
"logps/chosen": -49.336029052734375,
"logps/ref_chosen": -49.01679992675781,
"logps/ref_rejected": -74.90817260742188,
"logps/rejected": -75.327392578125,
"loss": 2.1133,
"margin_dpo/margin_mean": 0.09999510645866394,
"margin_dpo/margin_std": 0.3832091987133026,
"step": 61
},
{
"epoch": 0.09372637944066516,
"fcm_dpo/beta": 7.966916084289551,
"fcm_dpo/delta": -0.13704264163970947,
"fcm_dpo/margin": 0.13938570022583008,
"fcm_dpo/q_t": 0.36962825059890747,
"grad_norm": 2647.16162109375,
"learning_rate": 4.552238805970149e-07,
"logits/chosen": 0.11322169005870819,
"logits/rejected": 0.07340162247419357,
"logps/chosen": -63.07339859008789,
"logps/ref_chosen": -62.751869201660156,
"logps/ref_rejected": -78.93360900878906,
"logps/rejected": -79.39452362060547,
"loss": 2.0748,
"margin_dpo/margin_mean": 0.13938573002815247,
"margin_dpo/margin_std": 0.46554332971572876,
"step": 62
},
{
"epoch": 0.09523809523809523,
"fcm_dpo/beta": 7.405724048614502,
"fcm_dpo/delta": -0.43855780363082886,
"fcm_dpo/margin": 0.1857583224773407,
"fcm_dpo/q_t": 0.3208463490009308,
"grad_norm": 2495.121826171875,
"learning_rate": 4.626865671641791e-07,
"logits/chosen": 0.17758890986442566,
"logits/rejected": 0.15252208709716797,
"logps/chosen": -60.78285598754883,
"logps/ref_chosen": -60.51525115966797,
"logps/ref_rejected": -85.11021423339844,
"logps/rejected": -85.56358337402344,
"loss": 1.4936,
"margin_dpo/margin_mean": 0.1857585310935974,
"margin_dpo/margin_std": 0.3698121905326843,
"step": 63
},
{
"epoch": 0.09674981103552532,
"fcm_dpo/beta": 7.346306324005127,
"fcm_dpo/delta": 0.42287638783454895,
"fcm_dpo/margin": 0.08198145031929016,
"fcm_dpo/q_t": 0.44880834221839905,
"grad_norm": 2452.107421875,
"learning_rate": 4.701492537313433e-07,
"logits/chosen": 0.07220865786075592,
"logits/rejected": 0.047520771622657776,
"logps/chosen": -51.5291748046875,
"logps/ref_chosen": -51.20684814453125,
"logps/ref_rejected": -66.93081665039062,
"logps/rejected": -67.33512878417969,
"loss": 2.1847,
"margin_dpo/margin_mean": 0.08198148012161255,
"margin_dpo/margin_std": 0.39635199308395386,
"step": 64
},
{
"epoch": 0.0982615268329554,
"fcm_dpo/beta": 6.500675201416016,
"fcm_dpo/delta": -1.11879301071167,
"fcm_dpo/margin": 0.2926085889339447,
"fcm_dpo/q_t": 0.2865545451641083,
"grad_norm": 2000.08251953125,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": 0.1627321094274521,
"logits/rejected": 0.13373470306396484,
"logps/chosen": -67.60956573486328,
"logps/ref_chosen": -67.2886962890625,
"logps/ref_rejected": -74.44281005859375,
"logps/rejected": -75.05628967285156,
"loss": 1.3075,
"margin_dpo/margin_mean": 0.29260820150375366,
"margin_dpo/margin_std": 0.4580235481262207,
"step": 65
},
{
"epoch": 0.09977324263038549,
"fcm_dpo/beta": 6.545133590698242,
"fcm_dpo/delta": 0.5023878216743469,
"fcm_dpo/margin": 0.08048596978187561,
"fcm_dpo/q_t": 0.435330331325531,
"grad_norm": 2383.31005859375,
"learning_rate": 4.850746268656717e-07,
"logits/chosen": 0.08348944783210754,
"logits/rejected": 0.05969405546784401,
"logps/chosen": -71.08229064941406,
"logps/ref_chosen": -70.743408203125,
"logps/ref_rejected": -77.26499938964844,
"logps/rejected": -77.68437194824219,
"loss": 1.9852,
"margin_dpo/margin_mean": 0.08048596978187561,
"margin_dpo/margin_std": 0.4329206943511963,
"step": 66
},
{
"epoch": 0.10128495842781557,
"fcm_dpo/beta": 6.768010139465332,
"fcm_dpo/delta": 0.0033745458349585533,
"fcm_dpo/margin": 0.1473006308078766,
"fcm_dpo/q_t": 0.39285334944725037,
"grad_norm": 2144.995849609375,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": 0.06257347017526627,
"logits/rejected": 0.007243716157972813,
"logps/chosen": -60.883270263671875,
"logps/ref_chosen": -60.60260009765625,
"logps/ref_rejected": -75.22235870361328,
"logps/rejected": -75.65032958984375,
"loss": 1.6865,
"margin_dpo/margin_mean": 0.1473003625869751,
"margin_dpo/margin_std": 0.44022125005722046,
"step": 67
},
{
"epoch": 0.10279667422524566,
"fcm_dpo/beta": 6.251596450805664,
"fcm_dpo/delta": -0.4169546067714691,
"fcm_dpo/margin": 0.21536040306091309,
"fcm_dpo/q_t": 0.33271753787994385,
"grad_norm": 1849.2083740234375,
"learning_rate": 5e-07,
"logits/chosen": 0.04055653512477875,
"logits/rejected": 0.01153562217950821,
"logps/chosen": -77.89004516601562,
"logps/ref_chosen": -77.52836608886719,
"logps/ref_rejected": -93.17778015136719,
"logps/rejected": -93.75480651855469,
"loss": 1.5844,
"margin_dpo/margin_mean": 0.21536031365394592,
"margin_dpo/margin_std": 0.4642139673233032,
"step": 68
},
{
"epoch": 0.10430839002267574,
"fcm_dpo/beta": 6.393548488616943,
"fcm_dpo/delta": -0.062498897314071655,
"fcm_dpo/margin": 0.16213825345039368,
"fcm_dpo/q_t": 0.3610564172267914,
"grad_norm": 1912.0982666015625,
"learning_rate": 4.999965034812934e-07,
"logits/chosen": 0.06711920350790024,
"logits/rejected": 0.02523168735206127,
"logps/chosen": -66.31640625,
"logps/ref_chosen": -65.94305419921875,
"logps/ref_rejected": -89.7735595703125,
"logps/rejected": -90.30905151367188,
"loss": 1.8845,
"margin_dpo/margin_mean": 0.16213801503181458,
"margin_dpo/margin_std": 0.45384150743484497,
"step": 69
},
{
"epoch": 0.10582010582010581,
"fcm_dpo/beta": 5.698063850402832,
"fcm_dpo/delta": -0.3884986937046051,
"fcm_dpo/margin": 0.13598540425300598,
"fcm_dpo/q_t": 0.3947943449020386,
"grad_norm": 1745.823486328125,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 0.11856390535831451,
"logits/rejected": 0.09516175091266632,
"logps/chosen": -62.308258056640625,
"logps/ref_chosen": -61.95791244506836,
"logps/ref_rejected": -75.80945587158203,
"logps/rejected": -76.29579162597656,
"loss": 1.7483,
"margin_dpo/margin_mean": 0.1359853446483612,
"margin_dpo/margin_std": 0.397053986787796,
"step": 70
},
{
"epoch": 0.1073318216175359,
"fcm_dpo/beta": 6.111842632293701,
"fcm_dpo/delta": 0.5105581283569336,
"fcm_dpo/margin": 0.08478209376335144,
"fcm_dpo/q_t": 0.4329409897327423,
"grad_norm": 2060.398193359375,
"learning_rate": 4.999685319184688e-07,
"logits/chosen": 0.0789206326007843,
"logits/rejected": 0.06302116811275482,
"logps/chosen": -63.77684020996094,
"logps/ref_chosen": -63.34757995605469,
"logps/ref_rejected": -67.49658203125,
"logps/rejected": -68.0106201171875,
"loss": 2.1644,
"margin_dpo/margin_mean": 0.08478212356567383,
"margin_dpo/margin_std": 0.4536302983760834,
"step": 71
},
{
"epoch": 0.10884353741496598,
"fcm_dpo/beta": 5.628780364990234,
"fcm_dpo/delta": -0.7731190919876099,
"fcm_dpo/margin": 0.29211413860321045,
"fcm_dpo/q_t": 0.3161046504974365,
"grad_norm": 1577.560302734375,
"learning_rate": 4.999440576567755e-07,
"logits/chosen": 0.0920741856098175,
"logits/rejected": 0.029652319848537445,
"logps/chosen": -56.157230377197266,
"logps/ref_chosen": -55.85929870605469,
"logps/ref_rejected": -68.45423889160156,
"logps/rejected": -69.04428100585938,
"loss": 1.1926,
"margin_dpo/margin_mean": 0.29211464524269104,
"margin_dpo/margin_std": 0.4848480224609375,
"step": 72
},
{
"epoch": 0.11035525321239607,
"fcm_dpo/beta": 5.599390029907227,
"fcm_dpo/delta": 0.03130987286567688,
"fcm_dpo/margin": 0.17031516134738922,
"fcm_dpo/q_t": 0.40101659297943115,
"grad_norm": 1934.7177734375,
"learning_rate": 4.999125919224965e-07,
"logits/chosen": 0.06832102686166763,
"logits/rejected": 0.05396275222301483,
"logps/chosen": -69.59184265136719,
"logps/ref_chosen": -69.13880920410156,
"logps/ref_rejected": -79.04586791992188,
"logps/rejected": -79.66921997070312,
"loss": 1.8098,
"margin_dpo/margin_mean": 0.17031550407409668,
"margin_dpo/margin_std": 0.48382318019866943,
"step": 73
},
{
"epoch": 0.11186696900982615,
"fcm_dpo/beta": 5.374726295471191,
"fcm_dpo/delta": -0.2641603350639343,
"fcm_dpo/margin": 0.22797469794750214,
"fcm_dpo/q_t": 0.36758503317832947,
"grad_norm": 1349.6485595703125,
"learning_rate": 4.998741355957963e-07,
"logits/chosen": 0.09487976133823395,
"logits/rejected": 0.04454671964049339,
"logps/chosen": -50.26209259033203,
"logps/ref_chosen": -49.923736572265625,
"logps/ref_rejected": -81.73213958740234,
"logps/rejected": -82.2984619140625,
"loss": 1.4011,
"margin_dpo/margin_mean": 0.22797417640686035,
"margin_dpo/margin_std": 0.46810248494148254,
"step": 74
},
{
"epoch": 0.11337868480725624,
"fcm_dpo/beta": 4.875063896179199,
"fcm_dpo/delta": -0.2485802173614502,
"fcm_dpo/margin": 0.24743963778018951,
"fcm_dpo/q_t": 0.34337544441223145,
"grad_norm": 1073.2872314453125,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": 0.07122410833835602,
"logits/rejected": 0.04160505533218384,
"logps/chosen": -46.41517639160156,
"logps/ref_chosen": -46.06875228881836,
"logps/ref_rejected": -66.1181411743164,
"logps/rejected": -66.71200561523438,
"loss": 1.2434,
"margin_dpo/margin_mean": 0.24743930995464325,
"margin_dpo/margin_std": 0.4698425531387329,
"step": 75
},
{
"epoch": 0.11489040060468632,
"fcm_dpo/beta": 5.057272911071777,
"fcm_dpo/delta": 0.1727055013179779,
"fcm_dpo/margin": 0.16627338528633118,
"fcm_dpo/q_t": 0.3987014889717102,
"grad_norm": 1406.1661376953125,
"learning_rate": 4.997762556634679e-07,
"logits/chosen": 0.10183432698249817,
"logits/rejected": 0.058754947036504745,
"logps/chosen": -54.42407989501953,
"logps/ref_chosen": -54.06275177001953,
"logps/ref_rejected": -74.87464141845703,
"logps/rejected": -75.4022445678711,
"loss": 1.5553,
"margin_dpo/margin_mean": 0.16627269983291626,
"margin_dpo/margin_std": 0.4644964933395386,
"step": 76
},
{
"epoch": 0.1164021164021164,
"fcm_dpo/beta": 5.224140167236328,
"fcm_dpo/delta": 0.3271293044090271,
"fcm_dpo/margin": 0.1326499581336975,
"fcm_dpo/q_t": 0.41528427600860596,
"grad_norm": 1620.2890625,
"learning_rate": 4.99716834795752e-07,
"logits/chosen": 0.10804985463619232,
"logits/rejected": 0.06888192892074585,
"logps/chosen": -53.52935028076172,
"logps/ref_chosen": -53.07609176635742,
"logps/ref_rejected": -74.45601654052734,
"logps/rejected": -75.04192352294922,
"loss": 1.6511,
"margin_dpo/margin_mean": 0.1326504349708557,
"margin_dpo/margin_std": 0.44195854663848877,
"step": 77
},
{
"epoch": 0.11791383219954649,
"fcm_dpo/beta": 5.861191749572754,
"fcm_dpo/delta": 0.4837532937526703,
"fcm_dpo/margin": 0.09206165373325348,
"fcm_dpo/q_t": 0.40167397260665894,
"grad_norm": 2161.646240234375,
"learning_rate": 4.996504288113623e-07,
"logits/chosen": 0.039431821554899216,
"logits/rejected": 0.01997038722038269,
"logps/chosen": -68.12590026855469,
"logps/ref_chosen": -67.72541809082031,
"logps/ref_rejected": -79.03926849365234,
"logps/rejected": -79.53181457519531,
"loss": 2.4878,
"margin_dpo/margin_mean": 0.09206125140190125,
"margin_dpo/margin_std": 0.583328366279602,
"step": 78
},
{
"epoch": 0.11942554799697656,
"fcm_dpo/beta": 6.234781265258789,
"fcm_dpo/delta": 0.15417495369911194,
"fcm_dpo/margin": 0.13618767261505127,
"fcm_dpo/q_t": 0.389265775680542,
"grad_norm": 1901.2857666015625,
"learning_rate": 4.995770395678171e-07,
"logits/chosen": 0.150520920753479,
"logits/rejected": 0.09114135801792145,
"logps/chosen": -52.598140716552734,
"logps/ref_chosen": -52.16064453125,
"logps/ref_rejected": -83.31062316894531,
"logps/rejected": -83.8843002319336,
"loss": 1.8135,
"margin_dpo/margin_mean": 0.13618725538253784,
"margin_dpo/margin_std": 0.4358598589897156,
"step": 79
},
{
"epoch": 0.12093726379440665,
"fcm_dpo/beta": 6.20821475982666,
"fcm_dpo/delta": -0.13912838697433472,
"fcm_dpo/margin": 0.17933352291584015,
"fcm_dpo/q_t": 0.37054312229156494,
"grad_norm": 1953.0830078125,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 0.09082512557506561,
"logits/rejected": 0.035024385899305344,
"logps/chosen": -61.85608673095703,
"logps/ref_chosen": -61.410560607910156,
"logps/ref_rejected": -78.66004943847656,
"logps/rejected": -79.284912109375,
"loss": 1.7668,
"margin_dpo/margin_mean": 0.1793329417705536,
"margin_dpo/margin_std": 0.4744713306427002,
"step": 80
},
{
"epoch": 0.12244897959183673,
"fcm_dpo/beta": 5.641842842102051,
"fcm_dpo/delta": -0.34320950508117676,
"fcm_dpo/margin": 0.2292810082435608,
"fcm_dpo/q_t": 0.34941136837005615,
"grad_norm": 1636.6348876953125,
"learning_rate": 4.994093197099587e-07,
"logits/chosen": 0.07053244113922119,
"logits/rejected": 0.03839043155312538,
"logps/chosen": -64.20402526855469,
"logps/ref_chosen": -63.80437088012695,
"logps/ref_rejected": -79.3484115600586,
"logps/rejected": -79.97734069824219,
"loss": 1.4575,
"margin_dpo/margin_mean": 0.22928106784820557,
"margin_dpo/margin_std": 0.4831709563732147,
"step": 81
},
{
"epoch": 0.12396069538926682,
"fcm_dpo/beta": 5.125918388366699,
"fcm_dpo/delta": -0.6294834017753601,
"fcm_dpo/margin": 0.29933592677116394,
"fcm_dpo/q_t": 0.2602464258670807,
"grad_norm": 1189.437255859375,
"learning_rate": 4.993149937871306e-07,
"logits/chosen": 0.07535186409950256,
"logits/rejected": 0.013600241392850876,
"logps/chosen": -49.17131805419922,
"logps/ref_chosen": -48.817893981933594,
"logps/ref_rejected": -70.31497955322266,
"logps/rejected": -70.96774291992188,
"loss": 1.0653,
"margin_dpo/margin_mean": 0.2993359863758087,
"margin_dpo/margin_std": 0.41738319396972656,
"step": 82
},
{
"epoch": 0.1254724111866969,
"fcm_dpo/beta": 4.8786211013793945,
"fcm_dpo/delta": -0.19270329177379608,
"fcm_dpo/margin": 0.23940634727478027,
"fcm_dpo/q_t": 0.3403007388114929,
"grad_norm": 1305.392333984375,
"learning_rate": 4.992136939879856e-07,
"logits/chosen": 0.1480909138917923,
"logits/rejected": 0.09921949356794357,
"logps/chosen": -57.55104064941406,
"logps/ref_chosen": -57.15077209472656,
"logps/ref_rejected": -75.1710205078125,
"logps/rejected": -75.81069946289062,
"loss": 1.3796,
"margin_dpo/margin_mean": 0.23940622806549072,
"margin_dpo/margin_std": 0.48909199237823486,
"step": 83
},
{
"epoch": 0.12698412698412698,
"fcm_dpo/beta": 5.00314998626709,
"fcm_dpo/delta": 0.3583996295928955,
"fcm_dpo/margin": 0.13308590650558472,
"fcm_dpo/q_t": 0.4098473787307739,
"grad_norm": 1782.189208984375,
"learning_rate": 4.991054231460969e-07,
"logits/chosen": 0.12158288061618805,
"logits/rejected": 0.08126094937324524,
"logps/chosen": -65.28231048583984,
"logps/ref_chosen": -64.77729797363281,
"logps/ref_rejected": -84.71949768066406,
"logps/rejected": -85.35758972167969,
"loss": 2.005,
"margin_dpo/margin_mean": 0.1330859661102295,
"margin_dpo/margin_std": 0.5464334487915039,
"step": 84
},
{
"epoch": 0.12849584278155707,
"fcm_dpo/beta": 4.760544300079346,
"fcm_dpo/delta": -0.5031009912490845,
"fcm_dpo/margin": 0.3008922040462494,
"fcm_dpo/q_t": 0.3266737163066864,
"grad_norm": 1381.7164306640625,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": 0.10575494170188904,
"logits/rejected": 0.06379462033510208,
"logps/chosen": -50.625328063964844,
"logps/ref_chosen": -50.25169372558594,
"logps/ref_rejected": -66.55439758300781,
"logps/rejected": -67.22891235351562,
"loss": 1.3231,
"margin_dpo/margin_mean": 0.30089178681373596,
"margin_dpo/margin_std": 0.5666717290878296,
"step": 85
},
{
"epoch": 0.13000755857898716,
"fcm_dpo/beta": 4.623910427093506,
"fcm_dpo/delta": -0.011702943593263626,
"fcm_dpo/margin": 0.21848827600479126,
"fcm_dpo/q_t": 0.3801780939102173,
"grad_norm": 1446.2276611328125,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": 0.11094961315393448,
"logits/rejected": 0.09372542053461075,
"logps/chosen": -61.207275390625,
"logps/ref_chosen": -60.72917938232422,
"logps/ref_rejected": -72.30961608886719,
"logps/rejected": -73.0062026977539,
"loss": 1.5695,
"margin_dpo/margin_mean": 0.2184884250164032,
"margin_dpo/margin_std": 0.5658543109893799,
"step": 86
},
{
"epoch": 0.13151927437641722,
"fcm_dpo/beta": 4.357776641845703,
"fcm_dpo/delta": -0.31460562348365784,
"fcm_dpo/margin": 0.2916297912597656,
"fcm_dpo/q_t": 0.3396541476249695,
"grad_norm": 1350.0787353515625,
"learning_rate": 4.987388156241114e-07,
"logits/chosen": 0.08588653057813644,
"logits/rejected": 0.03219534084200859,
"logps/chosen": -66.21820831298828,
"logps/ref_chosen": -65.75796508789062,
"logps/ref_rejected": -84.81159973144531,
"logps/rejected": -85.56346130371094,
"loss": 1.3305,
"margin_dpo/margin_mean": 0.29163050651550293,
"margin_dpo/margin_std": 0.5484292507171631,
"step": 87
},
{
"epoch": 0.1330309901738473,
"fcm_dpo/beta": 4.051568984985352,
"fcm_dpo/delta": -0.3438121974468231,
"fcm_dpo/margin": 0.3187229037284851,
"fcm_dpo/q_t": 0.3379303812980652,
"grad_norm": 1034.83447265625,
"learning_rate": 4.986026928455767e-07,
"logits/chosen": 0.1511228084564209,
"logits/rejected": 0.12557803094387054,
"logps/chosen": -63.23377227783203,
"logps/ref_chosen": -62.82402801513672,
"logps/ref_rejected": -74.9607162475586,
"logps/rejected": -75.68917846679688,
"loss": 1.2325,
"margin_dpo/margin_mean": 0.31872305274009705,
"margin_dpo/margin_std": 0.5725570917129517,
"step": 88
},
{
"epoch": 0.1345427059712774,
"fcm_dpo/beta": 4.111684799194336,
"fcm_dpo/delta": -0.005461782217025757,
"fcm_dpo/margin": 0.2426835298538208,
"fcm_dpo/q_t": 0.36708956956863403,
"grad_norm": 1158.6134033203125,
"learning_rate": 4.984596161153135e-07,
"logits/chosen": 0.18977168202400208,
"logits/rejected": 0.11028344929218292,
"logps/chosen": -41.57792663574219,
"logps/ref_chosen": -41.191436767578125,
"logps/ref_rejected": -85.44769287109375,
"logps/rejected": -86.07687377929688,
"loss": 1.5568,
"margin_dpo/margin_mean": 0.24268493056297302,
"margin_dpo/margin_std": 0.5871646404266357,
"step": 89
},
{
"epoch": 0.1360544217687075,
"fcm_dpo/beta": 3.9873642921447754,
"fcm_dpo/delta": -0.07796984910964966,
"fcm_dpo/margin": 0.26826444268226624,
"fcm_dpo/q_t": 0.3403598964214325,
"grad_norm": 1097.566650390625,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 0.08516630530357361,
"logits/rejected": 0.03322757035493851,
"logps/chosen": -56.98159408569336,
"logps/ref_chosen": -56.58390808105469,
"logps/ref_rejected": -86.86978149414062,
"logps/rejected": -87.53573608398438,
"loss": 1.3846,
"margin_dpo/margin_mean": 0.26826387643814087,
"margin_dpo/margin_std": 0.6303993463516235,
"step": 90
},
{
"epoch": 0.13756613756613756,
"fcm_dpo/beta": 3.869006633758545,
"fcm_dpo/delta": -0.14137369394302368,
"fcm_dpo/margin": 0.2906471788883209,
"fcm_dpo/q_t": 0.34270262718200684,
"grad_norm": 1045.7557373046875,
"learning_rate": 4.98152617002662e-07,
"logits/chosen": 0.10475227236747742,
"logits/rejected": 0.06226480007171631,
"logps/chosen": -52.82066345214844,
"logps/ref_chosen": -52.38234329223633,
"logps/ref_rejected": -72.17642211914062,
"logps/rejected": -72.90538787841797,
"loss": 1.3782,
"margin_dpo/margin_mean": 0.29064705967903137,
"margin_dpo/margin_std": 0.6195999383926392,
"step": 91
},
{
"epoch": 0.13907785336356765,
"fcm_dpo/beta": 4.068203926086426,
"fcm_dpo/delta": 0.24122354388237,
"fcm_dpo/margin": 0.1886722892522812,
"fcm_dpo/q_t": 0.40757930278778076,
"grad_norm": 1103.524658203125,
"learning_rate": 4.979887032076988e-07,
"logits/chosen": 0.13855835795402527,
"logits/rejected": 0.10122767090797424,
"logps/chosen": -53.529884338378906,
"logps/ref_chosen": -53.00870132446289,
"logps/ref_rejected": -79.77812957763672,
"logps/rejected": -80.48798370361328,
"loss": 1.5458,
"margin_dpo/margin_mean": 0.18867191672325134,
"margin_dpo/margin_std": 0.5168828368186951,
"step": 92
},
{
"epoch": 0.14058956916099774,
"fcm_dpo/beta": 4.217402458190918,
"fcm_dpo/delta": 0.23529532551765442,
"fcm_dpo/margin": 0.1851963996887207,
"fcm_dpo/q_t": 0.3896501064300537,
"grad_norm": 1094.4332275390625,
"learning_rate": 4.978178526356172e-07,
"logits/chosen": 0.10360229760408401,
"logits/rejected": 0.07716604322195053,
"logps/chosen": -45.443016052246094,
"logps/ref_chosen": -44.90705108642578,
"logps/ref_rejected": -58.7879524230957,
"logps/rejected": -59.50910949707031,
"loss": 1.5698,
"margin_dpo/margin_mean": 0.185196191072464,
"margin_dpo/margin_std": 0.5235726237297058,
"step": 93
},
{
"epoch": 0.1421012849584278,
"fcm_dpo/beta": 3.851269006729126,
"fcm_dpo/delta": -0.4641948938369751,
"fcm_dpo/margin": 0.3565562069416046,
"fcm_dpo/q_t": 0.2976396679878235,
"grad_norm": 884.6483764648438,
"learning_rate": 4.976400700654751e-07,
"logits/chosen": 0.16318684816360474,
"logits/rejected": 0.12688644230365753,
"logps/chosen": -60.265010833740234,
"logps/ref_chosen": -59.93777084350586,
"logps/ref_rejected": -79.3138427734375,
"logps/rejected": -79.99763488769531,
"loss": 1.2145,
"margin_dpo/margin_mean": 0.3565560281276703,
"margin_dpo/margin_std": 0.6176527142524719,
"step": 94
},
{
"epoch": 0.1436130007558579,
"fcm_dpo/beta": 3.51151704788208,
"fcm_dpo/delta": -0.556129515171051,
"fcm_dpo/margin": 0.41667065024375916,
"fcm_dpo/q_t": 0.27452635765075684,
"grad_norm": 725.6941528320312,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": 0.04962404444813728,
"logits/rejected": -0.009035153314471245,
"logps/chosen": -60.609527587890625,
"logps/ref_chosen": -60.168487548828125,
"logps/ref_rejected": -90.73665618896484,
"logps/rejected": -91.59436798095703,
"loss": 0.8553,
"margin_dpo/margin_mean": 0.4166697561740875,
"margin_dpo/margin_std": 0.5119737386703491,
"step": 95
},
{
"epoch": 0.14512471655328799,
"fcm_dpo/beta": 3.3476004600524902,
"fcm_dpo/delta": -0.3292371928691864,
"fcm_dpo/margin": 0.38447052240371704,
"fcm_dpo/q_t": 0.3057492971420288,
"grad_norm": 765.777587890625,
"learning_rate": 4.972637290166157e-07,
"logits/chosen": 0.09734475612640381,
"logits/rejected": 0.05707583576440811,
"logps/chosen": -61.152687072753906,
"logps/ref_chosen": -60.66877746582031,
"logps/ref_rejected": -88.30673217773438,
"logps/rejected": -89.17510986328125,
"loss": 1.0612,
"margin_dpo/margin_mean": 0.3844701647758484,
"margin_dpo/margin_std": 0.554520845413208,
"step": 96
},
{
"epoch": 0.14663643235071808,
"fcm_dpo/beta": 3.2319469451904297,
"fcm_dpo/delta": 0.1403380036354065,
"fcm_dpo/margin": 0.2673853933811188,
"fcm_dpo/q_t": 0.3827268183231354,
"grad_norm": 1068.170654296875,
"learning_rate": 4.970651810649666e-07,
"logits/chosen": 0.04420812800526619,
"logits/rejected": 0.002325967885553837,
"logps/chosen": -65.61512756347656,
"logps/ref_chosen": -65.04412078857422,
"logps/ref_rejected": -78.42092895507812,
"logps/rejected": -79.25931549072266,
"loss": 1.3514,
"margin_dpo/margin_mean": 0.2673855423927307,
"margin_dpo/margin_std": 0.6004109382629395,
"step": 97
},
{
"epoch": 0.14814814814814814,
"fcm_dpo/beta": 3.42587947845459,
"fcm_dpo/delta": 0.1983877718448639,
"fcm_dpo/margin": 0.23879370093345642,
"fcm_dpo/q_t": 0.3823162317276001,
"grad_norm": 922.5922241210938,
"learning_rate": 4.968597221690985e-07,
"logits/chosen": 0.13439376652240753,
"logits/rejected": 0.10842472314834595,
"logps/chosen": -55.90753936767578,
"logps/ref_chosen": -55.503231048583984,
"logps/ref_rejected": -72.81553649902344,
"logps/rejected": -73.45864868164062,
"loss": 1.2262,
"margin_dpo/margin_mean": 0.23879370093345642,
"margin_dpo/margin_std": 0.4960702657699585,
"step": 98
},
{
"epoch": 0.14965986394557823,
"fcm_dpo/beta": 3.3395771980285645,
"fcm_dpo/delta": -0.2737107276916504,
"fcm_dpo/margin": 0.3713855743408203,
"fcm_dpo/q_t": 0.31995880603790283,
"grad_norm": 838.7493286132812,
"learning_rate": 4.966473580761389e-07,
"logits/chosen": 0.14741893112659454,
"logits/rejected": 0.11150997132062912,
"logps/chosen": -58.98797607421875,
"logps/ref_chosen": -58.57563781738281,
"logps/ref_rejected": -78.693603515625,
"logps/rejected": -79.47733306884766,
"loss": 1.0331,
"margin_dpo/margin_mean": 0.37138599157333374,
"margin_dpo/margin_std": 0.5738701820373535,
"step": 99
},
{
"epoch": 0.15117157974300832,
"fcm_dpo/beta": 3.413600206375122,
"fcm_dpo/delta": 0.0910910964012146,
"fcm_dpo/margin": 0.2643635869026184,
"fcm_dpo/q_t": 0.3801841139793396,
"grad_norm": 1048.221923828125,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 0.12782281637191772,
"logits/rejected": 0.12069296091794968,
"logps/chosen": -80.02874755859375,
"logps/ref_chosen": -79.58343505859375,
"logps/ref_rejected": -92.152587890625,
"logps/rejected": -92.86225891113281,
"loss": 1.5245,
"margin_dpo/margin_mean": 0.264363557100296,
"margin_dpo/margin_std": 0.6884479522705078,
"step": 100
},
{
"epoch": 0.15268329554043839,
"fcm_dpo/beta": 3.2258787155151367,
"fcm_dpo/delta": -0.34300971031188965,
"fcm_dpo/margin": 0.40265652537345886,
"fcm_dpo/q_t": 0.29698413610458374,
"grad_norm": 702.3856811523438,
"learning_rate": 4.96201938253052e-07,
"logits/chosen": 0.1229773610830307,
"logits/rejected": 0.08786555379629135,
"logps/chosen": -52.74365234375,
"logps/ref_chosen": -52.332786560058594,
"logps/ref_rejected": -69.55589294433594,
"logps/rejected": -70.36941528320312,
"loss": 0.9957,
"margin_dpo/margin_mean": 0.40265610814094543,
"margin_dpo/margin_std": 0.5584487318992615,
"step": 101
},
{
"epoch": 0.15419501133786848,
"fcm_dpo/beta": 3.221522808074951,
"fcm_dpo/delta": 0.1677694320678711,
"fcm_dpo/margin": 0.26210707426071167,
"fcm_dpo/q_t": 0.3745608925819397,
"grad_norm": 933.2718505859375,
"learning_rate": 4.959688949822748e-07,
"logits/chosen": 0.05017256736755371,
"logits/rejected": 0.012172428891062737,
"logps/chosen": -65.21527099609375,
"logps/ref_chosen": -64.74348449707031,
"logps/ref_rejected": -69.06132507324219,
"logps/rejected": -69.79522705078125,
"loss": 1.3826,
"margin_dpo/margin_mean": 0.26210689544677734,
"margin_dpo/margin_std": 0.6122475862503052,
"step": 102
},
{
"epoch": 0.15570672713529857,
"fcm_dpo/beta": 3.343921661376953,
"fcm_dpo/delta": 0.14063423871994019,
"fcm_dpo/margin": 0.25882646441459656,
"fcm_dpo/q_t": 0.3664790987968445,
"grad_norm": 876.5048828125,
"learning_rate": 4.957289714327572e-07,
"logits/chosen": 0.14401525259017944,
"logits/rejected": 0.11314252763986588,
"logps/chosen": -64.34033203125,
"logps/ref_chosen": -63.83664321899414,
"logps/ref_rejected": -79.32362365722656,
"logps/rejected": -80.08615112304688,
"loss": 1.2541,
"margin_dpo/margin_mean": 0.2588259279727936,
"margin_dpo/margin_std": 0.537617564201355,
"step": 103
},
{
"epoch": 0.15721844293272866,
"fcm_dpo/beta": 3.3820950984954834,
"fcm_dpo/delta": 0.11869892477989197,
"fcm_dpo/margin": 0.2636609375476837,
"fcm_dpo/q_t": 0.37176138162612915,
"grad_norm": 1155.879150390625,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": 0.10751787573099136,
"logits/rejected": 0.03126327693462372,
"logps/chosen": -61.48141098022461,
"logps/ref_chosen": -60.99920654296875,
"logps/ref_rejected": -98.84645080566406,
"logps/rejected": -99.59231567382812,
"loss": 1.4641,
"margin_dpo/margin_mean": 0.26366138458251953,
"margin_dpo/margin_std": 0.6468815803527832,
"step": 104
},
{
"epoch": 0.15873015873015872,
"fcm_dpo/beta": 3.3653788566589355,
"fcm_dpo/delta": -0.06612719595432281,
"fcm_dpo/margin": 0.31470757722854614,
"fcm_dpo/q_t": 0.3323326110839844,
"grad_norm": 1119.451171875,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": 0.09568033367395401,
"logits/rejected": 0.044659968465566635,
"logps/chosen": -71.36830139160156,
"logps/ref_chosen": -70.95027160644531,
"logps/ref_rejected": -87.88340759277344,
"logps/rejected": -88.61614227294922,
"loss": 1.3043,
"margin_dpo/margin_mean": 0.3147069811820984,
"margin_dpo/margin_std": 0.6669665575027466,
"step": 105
},
{
"epoch": 0.1602418745275888,
"fcm_dpo/beta": 3.4193921089172363,
"fcm_dpo/delta": 0.15833157300949097,
"fcm_dpo/margin": 0.25018543004989624,
"fcm_dpo/q_t": 0.3685312867164612,
"grad_norm": 1039.7274169921875,
"learning_rate": 4.949679871846857e-07,
"logits/chosen": 0.1333768665790558,
"logits/rejected": 0.1204344779253006,
"logps/chosen": -62.8930549621582,
"logps/ref_chosen": -62.45933151245117,
"logps/ref_rejected": -67.00595092773438,
"logps/rejected": -67.68985748291016,
"loss": 1.3112,
"margin_dpo/margin_mean": 0.2501852214336395,
"margin_dpo/margin_std": 0.5423716306686401,
"step": 106
},
{
"epoch": 0.1617535903250189,
"fcm_dpo/beta": 3.652026653289795,
"fcm_dpo/delta": 0.4439771771430969,
"fcm_dpo/margin": 0.15993273258209229,
"fcm_dpo/q_t": 0.43477770686149597,
"grad_norm": 1451.044677734375,
"learning_rate": 4.947006115536947e-07,
"logits/chosen": 0.06545466929674149,
"logits/rejected": 0.046112120151519775,
"logps/chosen": -76.338623046875,
"logps/ref_chosen": -75.83796691894531,
"logps/ref_rejected": -87.74038696289062,
"logps/rejected": -88.40097045898438,
"loss": 1.8206,
"margin_dpo/margin_mean": 0.15993207693099976,
"margin_dpo/margin_std": 0.673768162727356,
"step": 107
},
{
"epoch": 0.16326530612244897,
"fcm_dpo/beta": 3.626925468444824,
"fcm_dpo/delta": -0.3162718117237091,
"fcm_dpo/margin": 0.3520002067089081,
"fcm_dpo/q_t": 0.33733585476875305,
"grad_norm": 953.0491943359375,
"learning_rate": 4.944263911205772e-07,
"logits/chosen": 0.07331952452659607,
"logits/rejected": 0.047261983156204224,
"logps/chosen": -68.7996826171875,
"logps/ref_chosen": -68.39323425292969,
"logps/ref_rejected": -83.24267578125,
"logps/rejected": -84.00111389160156,
"loss": 1.2468,
"margin_dpo/margin_mean": 0.35200008749961853,
"margin_dpo/margin_std": 0.6720170974731445,
"step": 108
},
{
"epoch": 0.16477702191987906,
"fcm_dpo/beta": 3.5566816329956055,
"fcm_dpo/delta": 0.07203048467636108,
"fcm_dpo/margin": 0.2625024616718292,
"fcm_dpo/q_t": 0.38299477100372314,
"grad_norm": 1039.4810791015625,
"learning_rate": 4.941453335558681e-07,
"logits/chosen": 0.06151915714144707,
"logits/rejected": 0.016085410490632057,
"logps/chosen": -55.96140670776367,
"logps/ref_chosen": -55.52748107910156,
"logps/ref_rejected": -83.55218505859375,
"logps/rejected": -84.24861907958984,
"loss": 1.3522,
"margin_dpo/margin_mean": 0.26250216364860535,
"margin_dpo/margin_std": 0.6019136309623718,
"step": 109
},
{
"epoch": 0.16628873771730915,
"fcm_dpo/beta": 3.8665976524353027,
"fcm_dpo/delta": 0.39112499356269836,
"fcm_dpo/margin": 0.16184496879577637,
"fcm_dpo/q_t": 0.41690564155578613,
"grad_norm": 1377.6337890625,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 0.033917784690856934,
"logits/rejected": 0.04065680876374245,
"logps/chosen": -81.66863250732422,
"logps/ref_chosen": -81.15874481201172,
"logps/ref_rejected": -72.56021118164062,
"logps/rejected": -73.23194885253906,
"loss": 1.6791,
"margin_dpo/margin_mean": 0.16184476017951965,
"margin_dpo/margin_std": 0.5766524076461792,
"step": 110
},
{
"epoch": 0.16780045351473924,
"fcm_dpo/beta": 3.9139037132263184,
"fcm_dpo/delta": -0.052766673266887665,
"fcm_dpo/margin": 0.2670096457004547,
"fcm_dpo/q_t": 0.36203914880752563,
"grad_norm": 1263.3140869140625,
"learning_rate": 4.935627386698418e-07,
"logits/chosen": 0.13472726941108704,
"logits/rejected": 0.10240040719509125,
"logps/chosen": -52.87812042236328,
"logps/ref_chosen": -52.358985900878906,
"logps/ref_rejected": -77.06150817871094,
"logps/rejected": -77.84764862060547,
"loss": 1.6275,
"margin_dpo/margin_mean": 0.26701000332832336,
"margin_dpo/margin_std": 0.6607059836387634,
"step": 111
},
{
"epoch": 0.1693121693121693,
"fcm_dpo/beta": 3.622741937637329,
"fcm_dpo/delta": -0.296929270029068,
"fcm_dpo/margin": 0.34460121393203735,
"fcm_dpo/q_t": 0.33765172958374023,
"grad_norm": 1077.2196044921875,
"learning_rate": 4.932612176449559e-07,
"logits/chosen": 0.05540486425161362,
"logits/rejected": 0.0011176479747518897,
"logps/chosen": -63.441078186035156,
"logps/ref_chosen": -63.02006530761719,
"logps/ref_rejected": -111.36941528320312,
"logps/rejected": -112.13502502441406,
"loss": 1.3207,
"margin_dpo/margin_mean": 0.34459996223449707,
"margin_dpo/margin_std": 0.6546406745910645,
"step": 112
},
{
"epoch": 0.1708238851095994,
"fcm_dpo/beta": 3.7972545623779297,
"fcm_dpo/delta": 0.12773901224136353,
"fcm_dpo/margin": 0.22868876159191132,
"fcm_dpo/q_t": 0.37673860788345337,
"grad_norm": 1496.3253173828125,
"learning_rate": 4.929528920808854e-07,
"logits/chosen": 0.09782901406288147,
"logits/rejected": 0.06331203132867813,
"logps/chosen": -56.33560562133789,
"logps/ref_chosen": -55.80766296386719,
"logps/ref_rejected": -69.84014129638672,
"logps/rejected": -70.59677124023438,
"loss": 1.5844,
"margin_dpo/margin_mean": 0.22868850827217102,
"margin_dpo/margin_std": 0.5964616537094116,
"step": 113
},
{
"epoch": 0.17233560090702948,
"fcm_dpo/beta": 3.3383381366729736,
"fcm_dpo/delta": -0.5678998231887817,
"fcm_dpo/margin": 0.4367901682853699,
"fcm_dpo/q_t": 0.29958969354629517,
"grad_norm": 712.6404418945312,
"learning_rate": 4.92637770602159e-07,
"logits/chosen": 0.13647404313087463,
"logits/rejected": 0.08034436404705048,
"logps/chosen": -66.70956420898438,
"logps/ref_chosen": -66.33277130126953,
"logps/ref_rejected": -71.61489868164062,
"logps/rejected": -72.42848205566406,
"loss": 1.0132,
"margin_dpo/margin_mean": 0.43679025769233704,
"margin_dpo/margin_std": 0.6181018352508545,
"step": 114
},
{
"epoch": 0.17384731670445955,
"fcm_dpo/beta": 3.3931922912597656,
"fcm_dpo/delta": 0.006447508931159973,
"fcm_dpo/margin": 0.291039377450943,
"fcm_dpo/q_t": 0.3765663206577301,
"grad_norm": 1062.108154296875,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": 0.10150371491909027,
"logits/rejected": 0.047993022948503494,
"logps/chosen": -56.26490783691406,
"logps/ref_chosen": -55.74903869628906,
"logps/ref_rejected": -79.59849548339844,
"logps/rejected": -80.40541076660156,
"loss": 1.3013,
"margin_dpo/margin_mean": 0.29103943705558777,
"margin_dpo/margin_std": 0.6191039085388184,
"step": 115
},
{
"epoch": 0.17535903250188964,
"fcm_dpo/beta": 3.4025328159332275,
"fcm_dpo/delta": -0.009448423981666565,
"fcm_dpo/margin": 0.29369187355041504,
"fcm_dpo/q_t": 0.3570387065410614,
"grad_norm": 851.5135498046875,
"learning_rate": 4.91987175349089e-07,
"logits/chosen": 0.1060943752527237,
"logits/rejected": 0.04715292900800705,
"logps/chosen": -49.830238342285156,
"logps/ref_chosen": -49.36516571044922,
"logps/ref_rejected": -72.84671020507812,
"logps/rejected": -73.60546875,
"loss": 1.224,
"margin_dpo/margin_mean": 0.29369184374809265,
"margin_dpo/margin_std": 0.5422225594520569,
"step": 116
},
{
"epoch": 0.17687074829931973,
"fcm_dpo/beta": 3.344947338104248,
"fcm_dpo/delta": 0.09896770864725113,
"fcm_dpo/margin": 0.27185767889022827,
"fcm_dpo/q_t": 0.3576691150665283,
"grad_norm": 870.2111206054688,
"learning_rate": 4.916517197732933e-07,
"logits/chosen": 0.129032701253891,
"logits/rejected": 0.09620364010334015,
"logps/chosen": -58.112640380859375,
"logps/ref_chosen": -57.710899353027344,
"logps/ref_rejected": -69.77253723144531,
"logps/rejected": -70.4461441040039,
"loss": 1.3806,
"margin_dpo/margin_mean": 0.27185723185539246,
"margin_dpo/margin_std": 0.6084860563278198,
"step": 117
},
{
"epoch": 0.17838246409674982,
"fcm_dpo/beta": 3.271368980407715,
"fcm_dpo/delta": -0.09690429270267487,
"fcm_dpo/margin": 0.32874226570129395,
"fcm_dpo/q_t": 0.34997400641441345,
"grad_norm": 930.0485229492188,
"learning_rate": 4.913095046794281e-07,
"logits/chosen": 0.13153867423534393,
"logits/rejected": 0.09707070142030716,
"logps/chosen": -52.88352966308594,
"logps/ref_chosen": -52.479896545410156,
"logps/ref_rejected": -81.359130859375,
"logps/rejected": -82.09149169921875,
"loss": 1.2167,
"margin_dpo/margin_mean": 0.32874205708503723,
"margin_dpo/margin_std": 0.582424521446228,
"step": 118
},
{
"epoch": 0.17989417989417988,
"fcm_dpo/beta": 3.331033229827881,
"fcm_dpo/delta": 0.008004724979400635,
"fcm_dpo/margin": 0.2980421185493469,
"fcm_dpo/q_t": 0.35352998971939087,
"grad_norm": 886.5343017578125,
"learning_rate": 4.909605396399855e-07,
"logits/chosen": 0.08484401553869247,
"logits/rejected": 0.05185426026582718,
"logps/chosen": -61.905765533447266,
"logps/ref_chosen": -61.35767364501953,
"logps/ref_rejected": -75.71510314941406,
"logps/rejected": -76.56123352050781,
"loss": 1.2981,
"margin_dpo/margin_mean": 0.298042356967926,
"margin_dpo/margin_std": 0.5981870293617249,
"step": 119
},
{
"epoch": 0.18140589569160998,
"fcm_dpo/beta": 3.182232618331909,
"fcm_dpo/delta": -0.24365851283073425,
"fcm_dpo/margin": 0.3802499771118164,
"fcm_dpo/q_t": 0.3237895369529724,
"grad_norm": 770.2077026367188,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 0.07990750670433044,
"logits/rejected": 0.02834871970117092,
"logps/chosen": -60.297908782958984,
"logps/ref_chosen": -59.907569885253906,
"logps/ref_rejected": -79.6910629272461,
"logps/rejected": -80.46165466308594,
"loss": 1.0967,
"margin_dpo/margin_mean": 0.3802502751350403,
"margin_dpo/margin_std": 0.6159936189651489,
"step": 120
},
{
"epoch": 0.18291761148904007,
"fcm_dpo/beta": 3.1936514377593994,
"fcm_dpo/delta": 0.05449778214097023,
"fcm_dpo/margin": 0.29768818616867065,
"fcm_dpo/q_t": 0.3674450218677521,
"grad_norm": 816.0641479492188,
"learning_rate": 4.902423989581143e-07,
"logits/chosen": 0.17014677822589874,
"logits/rejected": 0.09852974861860275,
"logps/chosen": -56.18149948120117,
"logps/ref_chosen": -55.66604232788086,
"logps/ref_rejected": -101.56233978271484,
"logps/rejected": -102.37548828125,
"loss": 1.2968,
"margin_dpo/margin_mean": 0.2976876497268677,
"margin_dpo/margin_std": 0.6306780576705933,
"step": 121
},
{
"epoch": 0.18442932728647016,
"fcm_dpo/beta": 3.019984722137451,
"fcm_dpo/delta": -0.44598841667175293,
"fcm_dpo/margin": 0.4587884843349457,
"fcm_dpo/q_t": 0.29104509949684143,
"grad_norm": 804.1510009765625,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": 0.09989838302135468,
"logits/rejected": 0.06974966824054718,
"logps/chosen": -63.821380615234375,
"logps/ref_chosen": -63.334373474121094,
"logps/ref_rejected": -73.67523193359375,
"logps/rejected": -74.62103271484375,
"loss": 0.969,
"margin_dpo/margin_mean": 0.45878836512565613,
"margin_dpo/margin_std": 0.6608290672302246,
"step": 122
},
{
"epoch": 0.18594104308390022,
"fcm_dpo/beta": 2.9597277641296387,
"fcm_dpo/delta": -0.11771807074546814,
"fcm_dpo/margin": 0.37006598711013794,
"fcm_dpo/q_t": 0.31938499212265015,
"grad_norm": 801.2340698242188,
"learning_rate": 4.894973780788722e-07,
"logits/chosen": 0.13346600532531738,
"logits/rejected": 0.09592346101999283,
"logps/chosen": -57.292945861816406,
"logps/ref_chosen": -56.89874267578125,
"logps/ref_rejected": -78.97028350830078,
"logps/rejected": -79.73455810546875,
"loss": 1.2025,
"margin_dpo/margin_mean": 0.37006592750549316,
"margin_dpo/margin_std": 0.640432596206665,
"step": 123
},
{
"epoch": 0.1874527588813303,
"fcm_dpo/beta": 2.767918109893799,
"fcm_dpo/delta": -0.2092204988002777,
"fcm_dpo/margin": 0.4275299310684204,
"fcm_dpo/q_t": 0.31604132056236267,
"grad_norm": 655.9889526367188,
"learning_rate": 4.89114813497619e-07,
"logits/chosen": 0.1320020854473114,
"logits/rejected": 0.08239568769931793,
"logps/chosen": -57.52501678466797,
"logps/ref_chosen": -57.116085052490234,
"logps/ref_rejected": -87.93074035644531,
"logps/rejected": -88.76720428466797,
"loss": 0.9876,
"margin_dpo/margin_mean": 0.4275299310684204,
"margin_dpo/margin_std": 0.6369043588638306,
"step": 124
},
{
"epoch": 0.1889644746787604,
"fcm_dpo/beta": 2.691709518432617,
"fcm_dpo/delta": -0.04524332284927368,
"fcm_dpo/margin": 0.3849112093448639,
"fcm_dpo/q_t": 0.32585692405700684,
"grad_norm": 650.68212890625,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": 0.18828628957271576,
"logits/rejected": 0.1363983303308487,
"logps/chosen": -66.21900939941406,
"logps/ref_chosen": -65.7061767578125,
"logps/ref_rejected": -91.72711944580078,
"logps/rejected": -92.62486267089844,
"loss": 1.0744,
"margin_dpo/margin_mean": 0.3849112391471863,
"margin_dpo/margin_std": 0.5876812934875488,
"step": 125
},
{
"epoch": 0.19047619047619047,
"fcm_dpo/beta": 2.652081251144409,
"fcm_dpo/delta": -0.10823916643857956,
"fcm_dpo/margin": 0.41218724846839905,
"fcm_dpo/q_t": 0.3507199287414551,
"grad_norm": 565.577880859375,
"learning_rate": 4.883296295573176e-07,
"logits/chosen": -0.01404772698879242,
"logits/rejected": -0.02040482684969902,
"logps/chosen": -68.63446807861328,
"logps/ref_chosen": -68.17608642578125,
"logps/ref_rejected": -65.1175537109375,
"logps/rejected": -65.98812103271484,
"loss": 1.143,
"margin_dpo/margin_mean": 0.41218748688697815,
"margin_dpo/margin_std": 0.8156576156616211,
"step": 126
},
{
"epoch": 0.19198790627362056,
"fcm_dpo/beta": 2.6257121562957764,
"fcm_dpo/delta": -0.039457425475120544,
"fcm_dpo/margin": 0.39357566833496094,
"fcm_dpo/q_t": 0.32776835560798645,
"grad_norm": 675.0282592773438,
"learning_rate": 4.87927032161552e-07,
"logits/chosen": 0.06537148356437683,
"logits/rejected": 0.037958111613988876,
"logps/chosen": -62.41735076904297,
"logps/ref_chosen": -61.88023376464844,
"logps/ref_rejected": -68.46012878417969,
"logps/rejected": -69.39082336425781,
"loss": 1.1334,
"margin_dpo/margin_mean": 0.3935753107070923,
"margin_dpo/margin_std": 0.6591010093688965,
"step": 127
},
{
"epoch": 0.19349962207105065,
"fcm_dpo/beta": 2.6458208560943604,
"fcm_dpo/delta": 0.08855466544628143,
"fcm_dpo/margin": 0.3467669188976288,
"fcm_dpo/q_t": 0.36068370938301086,
"grad_norm": 708.3317260742188,
"learning_rate": 4.875177794352363e-07,
"logits/chosen": 0.09111534804105759,
"logits/rejected": 0.045390479266643524,
"logps/chosen": -67.32196044921875,
"logps/ref_chosen": -66.708984375,
"logps/ref_rejected": -94.97969055175781,
"logps/rejected": -95.9394302368164,
"loss": 1.2229,
"margin_dpo/margin_mean": 0.34676679968833923,
"margin_dpo/margin_std": 0.687119722366333,
"step": 128
},
{
"epoch": 0.19501133786848074,
"fcm_dpo/beta": 2.789608955383301,
"fcm_dpo/delta": 0.23963144421577454,
"fcm_dpo/margin": 0.2792533040046692,
"fcm_dpo/q_t": 0.38132244348526,
"grad_norm": 765.4723510742188,
"learning_rate": 4.871018828260491e-07,
"logits/chosen": 0.1117800921201706,
"logits/rejected": 0.10394299030303955,
"logps/chosen": -65.94437408447266,
"logps/ref_chosen": -65.33882904052734,
"logps/ref_rejected": -68.06109619140625,
"logps/rejected": -68.94589233398438,
"loss": 1.2034,
"margin_dpo/margin_mean": 0.279253751039505,
"margin_dpo/margin_std": 0.5657248497009277,
"step": 129
},
{
"epoch": 0.1965230536659108,
"fcm_dpo/beta": 2.7729220390319824,
"fcm_dpo/delta": 0.050821587443351746,
"fcm_dpo/margin": 0.33975258469581604,
"fcm_dpo/q_t": 0.35864949226379395,
"grad_norm": 768.4454956054688,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 0.09036006778478622,
"logits/rejected": 0.04790624603629112,
"logps/chosen": -59.219017028808594,
"logps/ref_chosen": -58.660743713378906,
"logps/ref_rejected": -79.24510192871094,
"logps/rejected": -80.14312744140625,
"loss": 1.1136,
"margin_dpo/margin_mean": 0.3397524058818817,
"margin_dpo/margin_std": 0.5575762987136841,
"step": 130
},
{
"epoch": 0.1980347694633409,
"fcm_dpo/beta": 2.782139301300049,
"fcm_dpo/delta": -0.2223011553287506,
"fcm_dpo/margin": 0.4299342930316925,
"fcm_dpo/q_t": 0.3284626305103302,
"grad_norm": 713.0172119140625,
"learning_rate": 4.86250204678667e-07,
"logits/chosen": 0.10117419809103012,
"logits/rejected": 0.048455823212862015,
"logps/chosen": -52.9766845703125,
"logps/ref_chosen": -52.51453399658203,
"logps/ref_rejected": -85.18299865722656,
"logps/rejected": -86.07508087158203,
"loss": 1.152,
"margin_dpo/margin_mean": 0.42993444204330444,
"margin_dpo/margin_std": 0.7103478908538818,
"step": 131
},
{
"epoch": 0.19954648526077098,
"fcm_dpo/beta": 2.728858470916748,
"fcm_dpo/delta": 0.014488308690488338,
"fcm_dpo/margin": 0.3616468608379364,
"fcm_dpo/q_t": 0.3465641736984253,
"grad_norm": 803.3519897460938,
"learning_rate": 4.858144469637408e-07,
"logits/chosen": 0.16957558691501617,
"logits/rejected": 0.1406971514225006,
"logps/chosen": -66.2645263671875,
"logps/ref_chosen": -65.68513488769531,
"logps/ref_rejected": -69.54120635986328,
"logps/rejected": -70.48225402832031,
"loss": 1.3716,
"margin_dpo/margin_mean": 0.36164700984954834,
"margin_dpo/margin_std": 0.8076159358024597,
"step": 132
},
{
"epoch": 0.20105820105820105,
"fcm_dpo/beta": 2.8398377895355225,
"fcm_dpo/delta": 0.20342613756656647,
"fcm_dpo/margin": 0.2855343222618103,
"fcm_dpo/q_t": 0.3706758916378021,
"grad_norm": 806.1981811523438,
"learning_rate": 4.853720930118138e-07,
"logits/chosen": 0.07018555700778961,
"logits/rejected": 0.06098049134016037,
"logps/chosen": -64.16683959960938,
"logps/ref_chosen": -63.598114013671875,
"logps/ref_rejected": -73.72798156738281,
"logps/rejected": -74.58223724365234,
"loss": 1.3296,
"margin_dpo/margin_mean": 0.2855341136455536,
"margin_dpo/margin_std": 0.6436434984207153,
"step": 133
},
{
"epoch": 0.20256991685563114,
"fcm_dpo/beta": 2.7438831329345703,
"fcm_dpo/delta": -0.14177896082401276,
"fcm_dpo/margin": 0.40749433636665344,
"fcm_dpo/q_t": 0.32042205333709717,
"grad_norm": 637.2561645507812,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": 0.17743632197380066,
"logits/rejected": 0.129373237490654,
"logps/chosen": -54.34275817871094,
"logps/ref_chosen": -53.79457092285156,
"logps/ref_rejected": -74.16741943359375,
"logps/rejected": -75.12309265136719,
"loss": 1.0797,
"margin_dpo/margin_mean": 0.40749499201774597,
"margin_dpo/margin_std": 0.6834430694580078,
"step": 134
},
{
"epoch": 0.20408163265306123,
"fcm_dpo/beta": 2.8520781993865967,
"fcm_dpo/delta": 0.2403937578201294,
"fcm_dpo/margin": 0.2731159031391144,
"fcm_dpo/q_t": 0.3797072768211365,
"grad_norm": 688.9449462890625,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": 0.09586119651794434,
"logits/rejected": 0.06667510420084,
"logps/chosen": -49.97583770751953,
"logps/ref_chosen": -49.441078186035156,
"logps/ref_rejected": -65.96878051757812,
"logps/rejected": -66.77665710449219,
"loss": 1.3673,
"margin_dpo/margin_mean": 0.27311572432518005,
"margin_dpo/margin_std": 0.6469433307647705,
"step": 135
},
{
"epoch": 0.20559334845049132,
"fcm_dpo/beta": 2.90311861038208,
"fcm_dpo/delta": 0.004882900044322014,
"fcm_dpo/margin": 0.3429165482521057,
"fcm_dpo/q_t": 0.36461225152015686,
"grad_norm": 1105.5750732421875,
"learning_rate": 4.840055783904106e-07,
"logits/chosen": 0.10260805487632751,
"logits/rejected": 0.04400411248207092,
"logps/chosen": -67.37596893310547,
"logps/ref_chosen": -66.75926208496094,
"logps/ref_rejected": -94.61787414550781,
"logps/rejected": -95.57749938964844,
"loss": 1.5811,
"margin_dpo/margin_mean": 0.34291741251945496,
"margin_dpo/margin_std": 0.9085370302200317,
"step": 136
},
{
"epoch": 0.20710506424792138,
"fcm_dpo/beta": 2.891000747680664,
"fcm_dpo/delta": -0.11807064712047577,
"fcm_dpo/margin": 0.38187700510025024,
"fcm_dpo/q_t": 0.3429448902606964,
"grad_norm": 694.5245971679688,
"learning_rate": 4.835369650662767e-07,
"logits/chosen": 0.10072774440050125,
"logits/rejected": 0.07752367854118347,
"logps/chosen": -57.34507751464844,
"logps/ref_chosen": -56.78379821777344,
"logps/ref_rejected": -69.89952087402344,
"logps/rejected": -70.8426742553711,
"loss": 1.2,
"margin_dpo/margin_mean": 0.3818773031234741,
"margin_dpo/margin_std": 0.6857679486274719,
"step": 137
},
{
"epoch": 0.20861678004535147,
"fcm_dpo/beta": 2.905198574066162,
"fcm_dpo/delta": 0.12694688141345978,
"fcm_dpo/margin": 0.3041801452636719,
"fcm_dpo/q_t": 0.3584628403186798,
"grad_norm": 783.094482421875,
"learning_rate": 4.830618192112065e-07,
"logits/chosen": 0.0714266374707222,
"logits/rejected": 0.04139017313718796,
"logps/chosen": -59.46550750732422,
"logps/ref_chosen": -58.766014099121094,
"logps/ref_rejected": -68.12371826171875,
"logps/rejected": -69.12739562988281,
"loss": 1.3424,
"margin_dpo/margin_mean": 0.3041801452636719,
"margin_dpo/margin_std": 0.6721060276031494,
"step": 138
},
{
"epoch": 0.21012849584278157,
"fcm_dpo/beta": 2.842034101486206,
"fcm_dpo/delta": -0.29515254497528076,
"fcm_dpo/margin": 0.4422586262226105,
"fcm_dpo/q_t": 0.32127201557159424,
"grad_norm": 698.6984252929688,
"learning_rate": 4.825801541160509e-07,
"logits/chosen": 0.058577846735715866,
"logits/rejected": 0.034485623240470886,
"logps/chosen": -71.8461685180664,
"logps/ref_chosen": -71.2255859375,
"logps/ref_rejected": -82.1834716796875,
"logps/rejected": -83.2463150024414,
"loss": 1.0673,
"margin_dpo/margin_mean": 0.44225820899009705,
"margin_dpo/margin_std": 0.6527875661849976,
"step": 139
},
{
"epoch": 0.21164021164021163,
"fcm_dpo/beta": 2.5584716796875,
"fcm_dpo/delta": -0.5142702460289001,
"fcm_dpo/margin": 0.5633730888366699,
"fcm_dpo/q_t": 0.28819897770881653,
"grad_norm": 720.5505981445312,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 0.06424537301063538,
"logits/rejected": 0.027711138129234314,
"logps/chosen": -63.82072830200195,
"logps/ref_chosen": -63.27766418457031,
"logps/ref_rejected": -83.30647277832031,
"logps/rejected": -84.41291809082031,
"loss": 1.1217,
"margin_dpo/margin_mean": 0.5633726119995117,
"margin_dpo/margin_std": 0.8722689151763916,
"step": 140
},
{
"epoch": 0.21315192743764172,
"fcm_dpo/beta": 2.474776029586792,
"fcm_dpo/delta": 0.05119156837463379,
"fcm_dpo/margin": 0.3848419487476349,
"fcm_dpo/q_t": 0.3656679391860962,
"grad_norm": 698.813720703125,
"learning_rate": 4.815973202802966e-07,
"logits/chosen": 0.09540177881717682,
"logits/rejected": 0.06088024377822876,
"logps/chosen": -62.38557052612305,
"logps/ref_chosen": -61.76676940917969,
"logps/ref_rejected": -88.60601806640625,
"logps/rejected": -89.60966491699219,
"loss": 1.2344,
"margin_dpo/margin_mean": 0.38484299182891846,
"margin_dpo/margin_std": 0.7531988620758057,
"step": 141
},
{
"epoch": 0.2146636432350718,
"fcm_dpo/beta": 2.6312007904052734,
"fcm_dpo/delta": 0.25451576709747314,
"fcm_dpo/margin": 0.2886815369129181,
"fcm_dpo/q_t": 0.39209288358688354,
"grad_norm": 677.07763671875,
"learning_rate": 4.810961790316729e-07,
"logits/chosen": 0.08383051306009293,
"logits/rejected": 0.06255074590444565,
"logps/chosen": -65.8593521118164,
"logps/ref_chosen": -65.2747802734375,
"logps/ref_rejected": -81.1378173828125,
"logps/rejected": -82.01107788085938,
"loss": 1.3684,
"margin_dpo/margin_mean": 0.2886812686920166,
"margin_dpo/margin_std": 0.7156628370285034,
"step": 142
},
{
"epoch": 0.2161753590325019,
"fcm_dpo/beta": 2.587679386138916,
"fcm_dpo/delta": -0.12541311979293823,
"fcm_dpo/margin": 0.42952513694763184,
"fcm_dpo/q_t": 0.3180665075778961,
"grad_norm": 654.9125366210938,
"learning_rate": 4.805885735261454e-07,
"logits/chosen": 0.12669947743415833,
"logits/rejected": 0.11181487888097763,
"logps/chosen": -63.13290023803711,
"logps/ref_chosen": -62.617828369140625,
"logps/ref_rejected": -70.39239501953125,
"logps/rejected": -71.33699035644531,
"loss": 1.0608,
"margin_dpo/margin_mean": 0.4295256435871124,
"margin_dpo/margin_std": 0.6682602167129517,
"step": 143
},
{
"epoch": 0.21768707482993196,
"fcm_dpo/beta": 2.556525707244873,
"fcm_dpo/delta": -0.07277373969554901,
"fcm_dpo/margin": 0.4165218472480774,
"fcm_dpo/q_t": 0.34666940569877625,
"grad_norm": 743.940185546875,
"learning_rate": 4.800745179625307e-07,
"logits/chosen": 0.11285848915576935,
"logits/rejected": 0.0883728414773941,
"logps/chosen": -61.44839096069336,
"logps/ref_chosen": -60.80268859863281,
"logps/ref_rejected": -79.07284545898438,
"logps/rejected": -80.13507080078125,
"loss": 1.1906,
"margin_dpo/margin_mean": 0.4165222942829132,
"margin_dpo/margin_std": 0.7579972743988037,
"step": 144
},
{
"epoch": 0.21919879062736206,
"fcm_dpo/beta": 2.5957999229431152,
"fcm_dpo/delta": 0.1555352658033371,
"fcm_dpo/margin": 0.33037135004997253,
"fcm_dpo/q_t": 0.3795892000198364,
"grad_norm": 965.2445678710938,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": 0.07255662977695465,
"logits/rejected": 0.08949023485183716,
"logps/chosen": -75.23291015625,
"logps/ref_chosen": -74.61146545410156,
"logps/ref_rejected": -83.24461364746094,
"logps/rejected": -84.19642639160156,
"loss": 1.456,
"margin_dpo/margin_mean": 0.3303707540035248,
"margin_dpo/margin_std": 0.8115462064743042,
"step": 145
},
{
"epoch": 0.22071050642479215,
"fcm_dpo/beta": 2.5885000228881836,
"fcm_dpo/delta": -0.06581529229879379,
"fcm_dpo/margin": 0.40906020998954773,
"fcm_dpo/q_t": 0.33817818760871887,
"grad_norm": 619.466064453125,
"learning_rate": 4.790271143580173e-07,
"logits/chosen": 0.05093229562044144,
"logits/rejected": 0.03536106273531914,
"logps/chosen": -58.38066101074219,
"logps/ref_chosen": -57.84098434448242,
"logps/ref_rejected": -67.47422790527344,
"logps/rejected": -68.42295837402344,
"loss": 1.1586,
"margin_dpo/margin_mean": 0.40906035900115967,
"margin_dpo/margin_std": 0.7477720975875854,
"step": 146
},
{
"epoch": 0.2222222222222222,
"fcm_dpo/beta": 2.669675588607788,
"fcm_dpo/delta": 0.24072621762752533,
"fcm_dpo/margin": 0.29151690006256104,
"fcm_dpo/q_t": 0.38209617137908936,
"grad_norm": 970.9570922851562,
"learning_rate": 4.784937956152489e-07,
"logits/chosen": 0.05634861811995506,
"logits/rejected": 0.02099587954580784,
"logps/chosen": -67.4126968383789,
"logps/ref_chosen": -66.81346893310547,
"logps/ref_rejected": -81.1796875,
"logps/rejected": -82.0704345703125,
"loss": 1.4796,
"margin_dpo/margin_mean": 0.29151687026023865,
"margin_dpo/margin_std": 0.7656582593917847,
"step": 147
},
{
"epoch": 0.2237339380196523,
"fcm_dpo/beta": 2.6029810905456543,
"fcm_dpo/delta": -0.31015288829803467,
"fcm_dpo/margin": 0.48854613304138184,
"fcm_dpo/q_t": 0.3191307783126831,
"grad_norm": 515.7787475585938,
"learning_rate": 4.779540854098347e-07,
"logits/chosen": 0.19165629148483276,
"logits/rejected": 0.12959185242652893,
"logps/chosen": -49.28028106689453,
"logps/ref_chosen": -48.6877555847168,
"logps/ref_rejected": -67.50503540039062,
"logps/rejected": -68.58610534667969,
"loss": 1.0437,
"margin_dpo/margin_mean": 0.488546222448349,
"margin_dpo/margin_std": 0.7724089622497559,
"step": 148
},
{
"epoch": 0.2252456538170824,
"fcm_dpo/beta": 2.533602476119995,
"fcm_dpo/delta": -0.09391121566295624,
"fcm_dpo/margin": 0.42736145853996277,
"fcm_dpo/q_t": 0.34449946880340576,
"grad_norm": 640.553466796875,
"learning_rate": 4.774079988386296e-07,
"logits/chosen": 0.05926530063152313,
"logits/rejected": 0.01661105640232563,
"logps/chosen": -55.83122253417969,
"logps/ref_chosen": -55.143775939941406,
"logps/ref_rejected": -64.79888916015625,
"logps/rejected": -65.9136962890625,
"loss": 1.1159,
"margin_dpo/margin_mean": 0.4273618459701538,
"margin_dpo/margin_std": 0.7076586484909058,
"step": 149
},
{
"epoch": 0.22675736961451248,
"fcm_dpo/beta": 2.3987488746643066,
"fcm_dpo/delta": -0.2589126229286194,
"fcm_dpo/margin": 0.5113659501075745,
"fcm_dpo/q_t": 0.2972312569618225,
"grad_norm": 609.1364135742188,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 0.10247902572154999,
"logits/rejected": 0.06602032482624054,
"logps/chosen": -67.88574981689453,
"logps/ref_chosen": -67.47074890136719,
"logps/ref_rejected": -89.21170806884766,
"logps/rejected": -90.13807678222656,
"loss": 0.9804,
"margin_dpo/margin_mean": 0.5113657712936401,
"margin_dpo/margin_std": 0.6983498334884644,
"step": 150
},
{
"epoch": 0.22826908541194255,
"fcm_dpo/beta": 2.3255553245544434,
"fcm_dpo/delta": -0.19754400849342346,
"fcm_dpo/margin": 0.5049396753311157,
"fcm_dpo/q_t": 0.3148772716522217,
"grad_norm": 509.62213134765625,
"learning_rate": 4.762967578776406e-07,
"logits/chosen": 0.06744161248207092,
"logits/rejected": 0.025958776473999023,
"logps/chosen": -52.93949890136719,
"logps/ref_chosen": -52.45954132080078,
"logps/ref_rejected": -79.0630111694336,
"logps/rejected": -80.04790496826172,
"loss": 0.9958,
"margin_dpo/margin_mean": 0.5049391388893127,
"margin_dpo/margin_std": 0.7245649099349976,
"step": 151
},
{
"epoch": 0.22978080120937264,
"fcm_dpo/beta": 2.2168989181518555,
"fcm_dpo/delta": -0.2083718478679657,
"fcm_dpo/margin": 0.5341185927391052,
"fcm_dpo/q_t": 0.30489107966423035,
"grad_norm": 501.7793884277344,
"learning_rate": 4.757316345716553e-07,
"logits/chosen": 0.1456744223833084,
"logits/rejected": 0.10498102009296417,
"logps/chosen": -57.103904724121094,
"logps/ref_chosen": -56.5538330078125,
"logps/ref_rejected": -76.55074310302734,
"logps/rejected": -77.63492584228516,
"loss": 0.9032,
"margin_dpo/margin_mean": 0.5341184139251709,
"margin_dpo/margin_std": 0.6747971773147583,
"step": 152
},
{
"epoch": 0.23129251700680273,
"fcm_dpo/beta": 2.214939832687378,
"fcm_dpo/delta": 0.11927812546491623,
"fcm_dpo/margin": 0.40247973799705505,
"fcm_dpo/q_t": 0.3638674020767212,
"grad_norm": 600.3435668945312,
"learning_rate": 4.751601970666064e-07,
"logits/chosen": 0.05303303897380829,
"logits/rejected": 0.01978529989719391,
"logps/chosen": -68.58604431152344,
"logps/ref_chosen": -68.00689697265625,
"logps/ref_rejected": -74.83482360839844,
"logps/rejected": -75.81645202636719,
"loss": 1.2482,
"margin_dpo/margin_mean": 0.4024793207645416,
"margin_dpo/margin_std": 0.7963600158691406,
"step": 153
},
{
"epoch": 0.2328042328042328,
"fcm_dpo/beta": 2.284209728240967,
"fcm_dpo/delta": 0.13816551864147186,
"fcm_dpo/margin": 0.38259416818618774,
"fcm_dpo/q_t": 0.3632936179637909,
"grad_norm": 581.8963623046875,
"learning_rate": 4.745824613468292e-07,
"logits/chosen": 0.13186918199062347,
"logits/rejected": 0.12810632586479187,
"logps/chosen": -59.913002014160156,
"logps/ref_chosen": -59.222537994384766,
"logps/ref_rejected": -64.19131469726562,
"logps/rejected": -65.2643814086914,
"loss": 1.2855,
"margin_dpo/margin_mean": 0.3825940191745758,
"margin_dpo/margin_std": 0.8229261636734009,
"step": 154
},
{
"epoch": 0.23431594860166288,
"fcm_dpo/beta": 2.359236717224121,
"fcm_dpo/delta": 0.17262253165245056,
"fcm_dpo/margin": 0.3568933308124542,
"fcm_dpo/q_t": 0.3739526867866516,
"grad_norm": 687.4069213867188,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": 0.14357620477676392,
"logits/rejected": 0.1261100172996521,
"logps/chosen": -69.06387329101562,
"logps/ref_chosen": -68.45469665527344,
"logps/ref_rejected": -77.91763305664062,
"logps/rejected": -78.88371276855469,
"loss": 1.4692,
"margin_dpo/margin_mean": 0.35689258575439453,
"margin_dpo/margin_std": 0.879474401473999,
"step": 155
},
{
"epoch": 0.23582766439909297,
"fcm_dpo/beta": 2.3249125480651855,
"fcm_dpo/delta": -0.2846854329109192,
"fcm_dpo/margin": 0.5367815494537354,
"fcm_dpo/q_t": 0.33714932203292847,
"grad_norm": 715.7948608398438,
"learning_rate": 4.7340816008085305e-07,
"logits/chosen": 0.09127533435821533,
"logits/rejected": 0.05366864800453186,
"logps/chosen": -67.89020538330078,
"logps/ref_chosen": -67.26959991455078,
"logps/ref_rejected": -86.95914459228516,
"logps/rejected": -88.11653137207031,
"loss": 1.1585,
"margin_dpo/margin_mean": 0.5367816686630249,
"margin_dpo/margin_std": 0.9481757879257202,
"step": 156
},
{
"epoch": 0.23733938019652306,
"fcm_dpo/beta": 2.2261717319488525,
"fcm_dpo/delta": -0.006286881864070892,
"fcm_dpo/margin": 0.45090270042419434,
"fcm_dpo/q_t": 0.3336307406425476,
"grad_norm": 524.041259765625,
"learning_rate": 4.728116273823847e-07,
"logits/chosen": 0.08175022900104523,
"logits/rejected": 0.062264494597911835,
"logps/chosen": -55.3340950012207,
"logps/ref_chosen": -54.77287292480469,
"logps/ref_rejected": -63.87866973876953,
"logps/rejected": -64.89079284667969,
"loss": 1.0499,
"margin_dpo/margin_mean": 0.45090246200561523,
"margin_dpo/margin_std": 0.7311956286430359,
"step": 157
},
{
"epoch": 0.23885109599395313,
"fcm_dpo/beta": 2.172646999359131,
"fcm_dpo/delta": -0.2092001736164093,
"fcm_dpo/margin": 0.5448204278945923,
"fcm_dpo/q_t": 0.3070847988128662,
"grad_norm": 502.65093994140625,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": 0.1196097731590271,
"logits/rejected": 0.0907188355922699,
"logps/chosen": -65.47095489501953,
"logps/ref_chosen": -64.92271423339844,
"logps/ref_rejected": -82.23789978027344,
"logps/rejected": -83.3309555053711,
"loss": 0.9403,
"margin_dpo/margin_mean": 0.5448204278945923,
"margin_dpo/margin_std": 0.7522009015083313,
"step": 158
},
{
"epoch": 0.24036281179138322,
"fcm_dpo/beta": 2.2735085487365723,
"fcm_dpo/delta": 0.19475619494915009,
"fcm_dpo/margin": 0.3540440499782562,
"fcm_dpo/q_t": 0.35417577624320984,
"grad_norm": 675.0926513671875,
"learning_rate": 4.715998812855304e-07,
"logits/chosen": 0.13686862587928772,
"logits/rejected": 0.10717260837554932,
"logps/chosen": -57.696720123291016,
"logps/ref_chosen": -57.046993255615234,
"logps/ref_rejected": -73.32441711425781,
"logps/rejected": -74.32818603515625,
"loss": 1.2425,
"margin_dpo/margin_mean": 0.3540443778038025,
"margin_dpo/margin_std": 0.6996503472328186,
"step": 159
},
{
"epoch": 0.2418745275888133,
"fcm_dpo/beta": 2.3114399909973145,
"fcm_dpo/delta": 0.17240478098392487,
"fcm_dpo/margin": 0.3636714816093445,
"fcm_dpo/q_t": 0.3722858130931854,
"grad_norm": 635.1336669921875,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": -0.020404599606990814,
"logits/rejected": -0.04949381574988365,
"logps/chosen": -50.52197265625,
"logps/ref_chosen": -49.806915283203125,
"logps/ref_rejected": -68.3370132446289,
"logps/rejected": -69.41574096679688,
"loss": 1.2751,
"margin_dpo/margin_mean": 0.3636714518070221,
"margin_dpo/margin_std": 0.8036404848098755,
"step": 160
},
{
"epoch": 0.24338624338624337,
"fcm_dpo/beta": 2.285512924194336,
"fcm_dpo/delta": -0.16357703506946564,
"fcm_dpo/margin": 0.5008035898208618,
"fcm_dpo/q_t": 0.35015690326690674,
"grad_norm": 537.9451293945312,
"learning_rate": 4.703633408618955e-07,
"logits/chosen": 0.10273732244968414,
"logits/rejected": 0.07215458899736404,
"logps/chosen": -53.11811065673828,
"logps/ref_chosen": -52.50048828125,
"logps/ref_rejected": -66.04540252685547,
"logps/rejected": -67.16382598876953,
"loss": 1.1267,
"margin_dpo/margin_mean": 0.5008042454719543,
"margin_dpo/margin_std": 0.8614367246627808,
"step": 161
},
{
"epoch": 0.24489795918367346,
"fcm_dpo/beta": 2.08780574798584,
"fcm_dpo/delta": -0.3560563623905182,
"fcm_dpo/margin": 0.6207355260848999,
"fcm_dpo/q_t": 0.2931872010231018,
"grad_norm": 511.5837097167969,
"learning_rate": 4.697358159051549e-07,
"logits/chosen": 0.16846126317977905,
"logits/rejected": 0.12896160781383514,
"logps/chosen": -70.21339416503906,
"logps/ref_chosen": -69.46919250488281,
"logps/ref_rejected": -92.00952911376953,
"logps/rejected": -93.37446594238281,
"loss": 0.9736,
"margin_dpo/margin_mean": 0.6207360029220581,
"margin_dpo/margin_std": 0.8480439186096191,
"step": 162
},
{
"epoch": 0.24640967498110355,
"fcm_dpo/beta": 2.0367884635925293,
"fcm_dpo/delta": -0.2981437146663666,
"fcm_dpo/margin": 0.618397057056427,
"fcm_dpo/q_t": 0.29892927408218384,
"grad_norm": 515.961181640625,
"learning_rate": 4.691021444652876e-07,
"logits/chosen": 0.10065104067325592,
"logits/rejected": 0.06360255181789398,
"logps/chosen": -51.2899055480957,
"logps/ref_chosen": -50.613834381103516,
"logps/ref_rejected": -74.62033081054688,
"logps/rejected": -75.914794921875,
"loss": 0.9615,
"margin_dpo/margin_mean": 0.6183971762657166,
"margin_dpo/margin_std": 0.8240780830383301,
"step": 163
},
{
"epoch": 0.24792139077853365,
"fcm_dpo/beta": 1.9806370735168457,
"fcm_dpo/delta": -0.047169312834739685,
"fcm_dpo/margin": 0.5249905586242676,
"fcm_dpo/q_t": 0.32894620299339294,
"grad_norm": 464.32489013671875,
"learning_rate": 4.6846234426744624e-07,
"logits/chosen": 0.08540582656860352,
"logits/rejected": 0.03592706099152565,
"logps/chosen": -55.629249572753906,
"logps/ref_chosen": -54.848114013671875,
"logps/ref_rejected": -79.0630111694336,
"logps/rejected": -80.369140625,
"loss": 1.0554,
"margin_dpo/margin_mean": 0.524990439414978,
"margin_dpo/margin_std": 0.7865326404571533,
"step": 164
},
{
"epoch": 0.2494331065759637,
"fcm_dpo/beta": 1.966191291809082,
"fcm_dpo/delta": -0.0075155869126319885,
"fcm_dpo/margin": 0.5113043785095215,
"fcm_dpo/q_t": 0.3119150400161743,
"grad_norm": 391.06072998046875,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": 0.1477801650762558,
"logits/rejected": 0.10125482082366943,
"logps/chosen": -51.86711883544922,
"logps/ref_chosen": -51.089210510253906,
"logps/ref_rejected": -71.23370361328125,
"logps/rejected": -72.52291870117188,
"loss": 0.9392,
"margin_dpo/margin_mean": 0.5113040804862976,
"margin_dpo/margin_std": 0.6569217443466187,
"step": 165
},
{
"epoch": 0.2509448223733938,
"fcm_dpo/beta": 2.0378761291503906,
"fcm_dpo/delta": 0.31097179651260376,
"fcm_dpo/margin": 0.34910979866981506,
"fcm_dpo/q_t": 0.38307300209999084,
"grad_norm": 556.363037109375,
"learning_rate": 4.6716442935512214e-07,
"logits/chosen": 0.10038108378648758,
"logits/rejected": 0.036018554121255875,
"logps/chosen": -63.89585876464844,
"logps/ref_chosen": -63.19081115722656,
"logps/ref_rejected": -93.8402099609375,
"logps/rejected": -94.89436340332031,
"loss": 1.2454,
"margin_dpo/margin_mean": 0.3491097092628479,
"margin_dpo/margin_std": 0.7657175064086914,
"step": 166
},
{
"epoch": 0.25245653817082386,
"fcm_dpo/beta": 2.0208253860473633,
"fcm_dpo/delta": -0.17621225118637085,
"fcm_dpo/margin": 0.5719941854476929,
"fcm_dpo/q_t": 0.2979215979576111,
"grad_norm": 404.3883056640625,
"learning_rate": 4.6650635094610966e-07,
"logits/chosen": 0.06507319211959839,
"logits/rejected": 0.03547991067171097,
"logps/chosen": -59.553977966308594,
"logps/ref_chosen": -58.92427062988281,
"logps/ref_rejected": -72.97377014160156,
"logps/rejected": -74.17547607421875,
"loss": 0.8796,
"margin_dpo/margin_mean": 0.5719939470291138,
"margin_dpo/margin_std": 0.6960855722427368,
"step": 167
},
{
"epoch": 0.25396825396825395,
"fcm_dpo/beta": 2.060161828994751,
"fcm_dpo/delta": 0.1698514223098755,
"fcm_dpo/margin": 0.4094662666320801,
"fcm_dpo/q_t": 0.3545387387275696,
"grad_norm": 583.7806396484375,
"learning_rate": 4.6584221638904767e-07,
"logits/chosen": 0.07120160013437271,
"logits/rejected": 0.04992123693227768,
"logps/chosen": -66.4698486328125,
"logps/ref_chosen": -65.65138244628906,
"logps/ref_rejected": -79.71418762207031,
"logps/rejected": -80.94212341308594,
"loss": 1.1174,
"margin_dpo/margin_mean": 0.40946611762046814,
"margin_dpo/margin_std": 0.7269895076751709,
"step": 168
},
{
"epoch": 0.25547996976568405,
"fcm_dpo/beta": 2.0494308471679688,
"fcm_dpo/delta": -0.08093604445457458,
"fcm_dpo/margin": 0.5232309699058533,
"fcm_dpo/q_t": 0.34376293420791626,
"grad_norm": 552.8328857421875,
"learning_rate": 4.651720442612075e-07,
"logits/chosen": 0.15064923465251923,
"logits/rejected": 0.12327395379543304,
"logps/chosen": -62.13090515136719,
"logps/ref_chosen": -61.425865173339844,
"logps/ref_rejected": -76.09590148925781,
"logps/rejected": -77.32416534423828,
"loss": 1.0984,
"margin_dpo/margin_mean": 0.523231029510498,
"margin_dpo/margin_std": 0.913813591003418,
"step": 169
},
{
"epoch": 0.25699168556311414,
"fcm_dpo/beta": 2.1044416427612305,
"fcm_dpo/delta": 0.22101661562919617,
"fcm_dpo/margin": 0.3785492181777954,
"fcm_dpo/q_t": 0.3640963137149811,
"grad_norm": 517.9861450195312,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 0.05246744677424431,
"logits/rejected": 0.051534149795770645,
"logps/chosen": -57.42051696777344,
"logps/ref_chosen": -56.65319061279297,
"logps/ref_rejected": -63.45965576171875,
"logps/rejected": -64.60552215576172,
"loss": 1.2398,
"margin_dpo/margin_mean": 0.3785494565963745,
"margin_dpo/margin_std": 0.7789652943611145,
"step": 170
},
{
"epoch": 0.2585034013605442,
"fcm_dpo/beta": 2.0837273597717285,
"fcm_dpo/delta": -0.041708558797836304,
"fcm_dpo/margin": 0.49539345502853394,
"fcm_dpo/q_t": 0.31717920303344727,
"grad_norm": 528.3797607421875,
"learning_rate": 4.6381366244617224e-07,
"logits/chosen": 0.12783187627792358,
"logits/rejected": 0.08980339765548706,
"logps/chosen": -64.4964599609375,
"logps/ref_chosen": -63.73476028442383,
"logps/ref_rejected": -78.50328063964844,
"logps/rejected": -79.76036834716797,
"loss": 1.0854,
"margin_dpo/margin_mean": 0.49539363384246826,
"margin_dpo/margin_std": 0.778314471244812,
"step": 171
},
{
"epoch": 0.2600151171579743,
"fcm_dpo/beta": 2.0559816360473633,
"fcm_dpo/delta": -0.2410603016614914,
"fcm_dpo/margin": 0.589635968208313,
"fcm_dpo/q_t": 0.2946144938468933,
"grad_norm": 428.92987060546875,
"learning_rate": 4.631254907558365e-07,
"logits/chosen": 0.17183159291744232,
"logits/rejected": 0.12959660589694977,
"logps/chosen": -52.974788665771484,
"logps/ref_chosen": -52.201759338378906,
"logps/ref_rejected": -82.85285949707031,
"logps/rejected": -84.21553039550781,
"loss": 0.949,
"margin_dpo/margin_mean": 0.5896360874176025,
"margin_dpo/margin_std": 0.7388289570808411,
"step": 172
},
{
"epoch": 0.2615268329554044,
"fcm_dpo/beta": 1.8781511783599854,
"fcm_dpo/delta": -0.25002074241638184,
"fcm_dpo/margin": 0.6354281902313232,
"fcm_dpo/q_t": 0.32253411412239075,
"grad_norm": 381.1091613769531,
"learning_rate": 4.624313574873786e-07,
"logits/chosen": 0.14371845126152039,
"logits/rejected": 0.07756569981575012,
"logps/chosen": -56.25102233886719,
"logps/ref_chosen": -55.434722900390625,
"logps/ref_rejected": -77.81967163085938,
"logps/rejected": -79.27140045166016,
"loss": 0.9918,
"margin_dpo/margin_mean": 0.6354283094406128,
"margin_dpo/margin_std": 0.9134526252746582,
"step": 173
},
{
"epoch": 0.26303854875283444,
"fcm_dpo/beta": 1.8751147985458374,
"fcm_dpo/delta": -0.21836894750595093,
"fcm_dpo/margin": 0.6357536315917969,
"fcm_dpo/q_t": 0.3092048168182373,
"grad_norm": 498.1719665527344,
"learning_rate": 4.61731282057198e-07,
"logits/chosen": 0.12083408981561661,
"logits/rejected": 0.07103556394577026,
"logps/chosen": -57.99762725830078,
"logps/ref_chosen": -57.17195129394531,
"logps/ref_rejected": -85.47578430175781,
"logps/rejected": -86.93720245361328,
"loss": 1.0066,
"margin_dpo/margin_mean": 0.6357530355453491,
"margin_dpo/margin_std": 0.8986474275588989,
"step": 174
},
{
"epoch": 0.26455026455026454,
"fcm_dpo/beta": 1.770094394683838,
"fcm_dpo/delta": -0.23501265048980713,
"fcm_dpo/margin": 0.6816864013671875,
"fcm_dpo/q_t": 0.30375754833221436,
"grad_norm": 463.9217529296875,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": 0.18367326259613037,
"logits/rejected": 0.15908128023147583,
"logps/chosen": -68.52366638183594,
"logps/ref_chosen": -67.6656265258789,
"logps/ref_rejected": -84.36766815185547,
"logps/rejected": -85.90739440917969,
"loss": 0.9656,
"margin_dpo/margin_mean": 0.6816866397857666,
"margin_dpo/margin_std": 0.915657639503479,
"step": 175
},
{
"epoch": 0.2660619803476946,
"fcm_dpo/beta": 1.7365822792053223,
"fcm_dpo/delta": 0.01569700986146927,
"fcm_dpo/margin": 0.5668948888778687,
"fcm_dpo/q_t": 0.37061506509780884,
"grad_norm": 517.9691162109375,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": 0.08695434033870697,
"logits/rejected": 0.06711474061012268,
"logps/chosen": -78.7764892578125,
"logps/ref_chosen": -77.8587646484375,
"logps/ref_rejected": -81.08732604980469,
"logps/rejected": -82.57195281982422,
"loss": 1.2356,
"margin_dpo/margin_mean": 0.5668948292732239,
"margin_dpo/margin_std": 1.1584200859069824,
"step": 176
},
{
"epoch": 0.2675736961451247,
"fcm_dpo/beta": 1.6431140899658203,
"fcm_dpo/delta": -0.37270694971084595,
"fcm_dpo/margin": 0.8022236227989197,
"fcm_dpo/q_t": 0.2722761631011963,
"grad_norm": 448.8039245605469,
"learning_rate": 4.5959559945025183e-07,
"logits/chosen": 0.24163630604743958,
"logits/rejected": 0.1636749804019928,
"logps/chosen": -56.08782958984375,
"logps/ref_chosen": -55.22039794921875,
"logps/ref_rejected": -92.54973602294922,
"logps/rejected": -94.21939086914062,
"loss": 0.8902,
"margin_dpo/margin_mean": 0.8022229671478271,
"margin_dpo/margin_std": 1.0025627613067627,
"step": 177
},
{
"epoch": 0.2690854119425548,
"fcm_dpo/beta": 1.6737594604492188,
"fcm_dpo/delta": 0.21938863396644592,
"fcm_dpo/margin": 0.47716161608695984,
"fcm_dpo/q_t": 0.3556697368621826,
"grad_norm": 458.8934020996094,
"learning_rate": 4.588719528532341e-07,
"logits/chosen": 0.08861234784126282,
"logits/rejected": 0.05083230137825012,
"logps/chosen": -61.76763916015625,
"logps/ref_chosen": -60.81049346923828,
"logps/ref_rejected": -81.12973022460938,
"logps/rejected": -82.56403350830078,
"loss": 1.0621,
"margin_dpo/margin_mean": 0.47716209292411804,
"margin_dpo/margin_std": 0.7696354985237122,
"step": 178
},
{
"epoch": 0.2705971277399849,
"fcm_dpo/beta": 1.7257657051086426,
"fcm_dpo/delta": 0.10213658213615417,
"fcm_dpo/margin": 0.5256574153900146,
"fcm_dpo/q_t": 0.35149049758911133,
"grad_norm": 419.9453125,
"learning_rate": 4.581424636586928e-07,
"logits/chosen": 0.16226297616958618,
"logits/rejected": 0.1488857865333557,
"logps/chosen": -66.7244644165039,
"logps/ref_chosen": -65.67171478271484,
"logps/ref_rejected": -75.32586669921875,
"logps/rejected": -76.9042739868164,
"loss": 1.0632,
"margin_dpo/margin_mean": 0.5256578922271729,
"margin_dpo/margin_std": 0.8762655258178711,
"step": 179
},
{
"epoch": 0.272108843537415,
"fcm_dpo/beta": 1.8206181526184082,
"fcm_dpo/delta": 0.2291109263896942,
"fcm_dpo/margin": 0.42961639165878296,
"fcm_dpo/q_t": 0.36887508630752563,
"grad_norm": 520.1433715820312,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": -0.0028491299599409103,
"logits/rejected": -0.017982792109251022,
"logps/chosen": -57.53323745727539,
"logps/ref_chosen": -56.68280792236328,
"logps/ref_rejected": -64.94414520263672,
"logps/rejected": -66.22418975830078,
"loss": 1.2665,
"margin_dpo/margin_mean": 0.4296168386936188,
"margin_dpo/margin_std": 0.9101868867874146,
"step": 180
},
{
"epoch": 0.273620559334845,
"fcm_dpo/beta": 1.7677171230316162,
"fcm_dpo/delta": -0.24141666293144226,
"fcm_dpo/margin": 0.6860055923461914,
"fcm_dpo/q_t": 0.2977555990219116,
"grad_norm": 448.171630859375,
"learning_rate": 4.566660392614228e-07,
"logits/chosen": 0.13813161849975586,
"logits/rejected": 0.10955438017845154,
"logps/chosen": -61.58479309082031,
"logps/ref_chosen": -60.77604675292969,
"logps/ref_rejected": -83.98361206054688,
"logps/rejected": -85.47836303710938,
"loss": 0.9069,
"margin_dpo/margin_mean": 0.686005711555481,
"margin_dpo/margin_std": 0.8478412628173828,
"step": 181
},
{
"epoch": 0.2751322751322751,
"fcm_dpo/beta": 1.698185920715332,
"fcm_dpo/delta": -0.05800933390855789,
"fcm_dpo/margin": 0.6177934408187866,
"fcm_dpo/q_t": 0.3166268467903137,
"grad_norm": 430.59722900390625,
"learning_rate": 4.5591914535745817e-07,
"logits/chosen": 0.1499433070421219,
"logits/rejected": 0.09003090858459473,
"logps/chosen": -61.18524932861328,
"logps/ref_chosen": -60.2537841796875,
"logps/ref_rejected": -89.7706298828125,
"logps/rejected": -91.31988525390625,
"loss": 1.0536,
"margin_dpo/margin_mean": 0.6177935004234314,
"margin_dpo/margin_std": 0.9228367209434509,
"step": 182
},
{
"epoch": 0.2766439909297052,
"fcm_dpo/beta": 1.8208626508712769,
"fcm_dpo/delta": 0.4041329324245453,
"fcm_dpo/margin": 0.34058958292007446,
"fcm_dpo/q_t": 0.3961232900619507,
"grad_norm": 510.5823059082031,
"learning_rate": 4.551664914523433e-07,
"logits/chosen": 0.1353759616613388,
"logits/rejected": 0.12078934907913208,
"logps/chosen": -62.945030212402344,
"logps/ref_chosen": -61.76142120361328,
"logps/ref_rejected": -72.54627990722656,
"logps/rejected": -74.07048034667969,
"loss": 1.3364,
"margin_dpo/margin_mean": 0.3405901789665222,
"margin_dpo/margin_std": 0.8557813763618469,
"step": 183
},
{
"epoch": 0.2781557067271353,
"fcm_dpo/beta": 1.8339712619781494,
"fcm_dpo/delta": 0.05328105390071869,
"fcm_dpo/margin": 0.5154128074645996,
"fcm_dpo/q_t": 0.34025606513023376,
"grad_norm": 346.7197265625,
"learning_rate": 4.544080985994258e-07,
"logits/chosen": 0.21997570991516113,
"logits/rejected": 0.1718028038740158,
"logps/chosen": -47.82099151611328,
"logps/ref_chosen": -46.840721130371094,
"logps/ref_rejected": -69.3609390258789,
"logps/rejected": -70.85662841796875,
"loss": 0.989,
"margin_dpo/margin_mean": 0.5154126882553101,
"margin_dpo/margin_std": 0.7232804298400879,
"step": 184
},
{
"epoch": 0.2796674225245654,
"fcm_dpo/beta": 1.8281052112579346,
"fcm_dpo/delta": -0.15045057237148285,
"fcm_dpo/margin": 0.6196208000183105,
"fcm_dpo/q_t": 0.321952223777771,
"grad_norm": 413.64068603515625,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": 0.14673639833927155,
"logits/rejected": 0.10907743126153946,
"logps/chosen": -53.348968505859375,
"logps/ref_chosen": -52.32114028930664,
"logps/ref_rejected": -68.3885726928711,
"logps/rejected": -70.03601837158203,
"loss": 1.0869,
"margin_dpo/margin_mean": 0.6196208000183105,
"margin_dpo/margin_std": 1.0106725692749023,
"step": 185
},
{
"epoch": 0.2811791383219955,
"fcm_dpo/beta": 1.8034803867340088,
"fcm_dpo/delta": -0.005721554160118103,
"fcm_dpo/margin": 0.5566083788871765,
"fcm_dpo/q_t": 0.3407011032104492,
"grad_norm": 465.7325744628906,
"learning_rate": 4.5287418106563354e-07,
"logits/chosen": 0.07695234566926956,
"logits/rejected": 0.04599303752183914,
"logps/chosen": -68.40414428710938,
"logps/ref_chosen": -67.42012786865234,
"logps/ref_rejected": -82.50968933105469,
"logps/rejected": -84.0503158569336,
"loss": 1.0608,
"margin_dpo/margin_mean": 0.5566080808639526,
"margin_dpo/margin_std": 0.8762015104293823,
"step": 186
},
{
"epoch": 0.28269085411942557,
"fcm_dpo/beta": 1.7329106330871582,
"fcm_dpo/delta": -0.18373380601406097,
"fcm_dpo/margin": 0.6642186641693115,
"fcm_dpo/q_t": 0.32172733545303345,
"grad_norm": 530.1708374023438,
"learning_rate": 4.520986992917297e-07,
"logits/chosen": 0.14111362397670746,
"logits/rejected": 0.09435050934553146,
"logps/chosen": -76.567138671875,
"logps/ref_chosen": -75.52549743652344,
"logps/ref_rejected": -94.76289367675781,
"logps/rejected": -96.46875,
"loss": 1.1758,
"margin_dpo/margin_mean": 0.6642183661460876,
"margin_dpo/margin_std": 1.1894935369491577,
"step": 187
},
{
"epoch": 0.2842025699168556,
"fcm_dpo/beta": 1.744195818901062,
"fcm_dpo/delta": 0.005925014615058899,
"fcm_dpo/margin": 0.5699671506881714,
"fcm_dpo/q_t": 0.3193795382976532,
"grad_norm": 516.5187377929688,
"learning_rate": 4.5131756438276466e-07,
"logits/chosen": 0.151597797870636,
"logits/rejected": 0.11677326261997223,
"logps/chosen": -72.44251251220703,
"logps/ref_chosen": -71.52333068847656,
"logps/ref_rejected": -78.29949951171875,
"logps/rejected": -79.78865814208984,
"loss": 1.1632,
"margin_dpo/margin_mean": 0.56996750831604,
"margin_dpo/margin_std": 1.0281386375427246,
"step": 188
},
{
"epoch": 0.2857142857142857,
"fcm_dpo/beta": 1.6932382583618164,
"fcm_dpo/delta": -0.15139150619506836,
"fcm_dpo/margin": 0.6669385433197021,
"fcm_dpo/q_t": 0.30612969398498535,
"grad_norm": 391.1927795410156,
"learning_rate": 4.5053079818876096e-07,
"logits/chosen": 0.12143361568450928,
"logits/rejected": 0.12991394102573395,
"logps/chosen": -73.05665588378906,
"logps/ref_chosen": -72.17626953125,
"logps/ref_rejected": -75.26313781738281,
"logps/rejected": -76.81045532226562,
"loss": 0.8928,
"margin_dpo/margin_mean": 0.6669397950172424,
"margin_dpo/margin_std": 0.832381010055542,
"step": 189
},
{
"epoch": 0.2872260015117158,
"fcm_dpo/beta": 1.6875749826431274,
"fcm_dpo/delta": -0.09505629539489746,
"fcm_dpo/margin": 0.642721951007843,
"fcm_dpo/q_t": 0.32302048802375793,
"grad_norm": 424.7405090332031,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 0.19074919819831848,
"logits/rejected": 0.081771120429039,
"logps/chosen": -55.586639404296875,
"logps/ref_chosen": -54.624271392822266,
"logps/ref_rejected": -101.47068786621094,
"logps/rejected": -103.0757827758789,
"loss": 1.0135,
"margin_dpo/margin_mean": 0.6427220106124878,
"margin_dpo/margin_std": 0.9737996459007263,
"step": 190
},
{
"epoch": 0.2887377173091459,
"fcm_dpo/beta": 1.6515135765075684,
"fcm_dpo/delta": -0.04676612466573715,
"fcm_dpo/margin": 0.6305712461471558,
"fcm_dpo/q_t": 0.31939807534217834,
"grad_norm": 493.140625,
"learning_rate": 4.48940460132708e-07,
"logits/chosen": 0.1993117779493332,
"logits/rejected": 0.1801232248544693,
"logps/chosen": -74.03681945800781,
"logps/ref_chosen": -72.93251037597656,
"logps/ref_rejected": -89.95103454589844,
"logps/rejected": -91.68590545654297,
"loss": 1.047,
"margin_dpo/margin_mean": 0.6305709481239319,
"margin_dpo/margin_std": 0.9657796621322632,
"step": 191
},
{
"epoch": 0.29024943310657597,
"fcm_dpo/beta": 1.7161282300949097,
"fcm_dpo/delta": 0.19982855021953583,
"fcm_dpo/margin": 0.47440922260284424,
"fcm_dpo/q_t": 0.3627406060695648,
"grad_norm": 372.83734130859375,
"learning_rate": 4.481369327558329e-07,
"logits/chosen": 0.16536489129066467,
"logits/rejected": 0.1462571918964386,
"logps/chosen": -55.00927734375,
"logps/ref_chosen": -54.001121520996094,
"logps/ref_rejected": -63.531551361083984,
"logps/rejected": -65.01411437988281,
"loss": 1.1335,
"margin_dpo/margin_mean": 0.4744090139865875,
"margin_dpo/margin_std": 0.8528145551681519,
"step": 192
},
{
"epoch": 0.29176114890400606,
"fcm_dpo/beta": 1.6729130744934082,
"fcm_dpo/delta": -0.11725394427776337,
"fcm_dpo/margin": 0.6579139828681946,
"fcm_dpo/q_t": 0.3185346722602844,
"grad_norm": 344.4507751464844,
"learning_rate": 4.47327863063023e-07,
"logits/chosen": 0.09357620775699615,
"logits/rejected": 0.07541916519403458,
"logps/chosen": -57.7445182800293,
"logps/ref_chosen": -56.74927520751953,
"logps/ref_rejected": -58.80629348754883,
"logps/rejected": -60.459449768066406,
"loss": 0.9244,
"margin_dpo/margin_mean": 0.6579139232635498,
"margin_dpo/margin_std": 0.9124239087104797,
"step": 193
},
{
"epoch": 0.29327286470143615,
"fcm_dpo/beta": 1.6954293251037598,
"fcm_dpo/delta": 0.1348000019788742,
"fcm_dpo/margin": 0.5165129899978638,
"fcm_dpo/q_t": 0.36234208941459656,
"grad_norm": 409.556396484375,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": 0.1781534105539322,
"logits/rejected": 0.15505832433700562,
"logps/chosen": -57.62710189819336,
"logps/ref_chosen": -56.64944076538086,
"logps/ref_rejected": -69.98954772949219,
"logps/rejected": -71.48371887207031,
"loss": 1.1709,
"margin_dpo/margin_mean": 0.5165130496025085,
"margin_dpo/margin_std": 0.9614365100860596,
"step": 194
},
{
"epoch": 0.2947845804988662,
"fcm_dpo/beta": 1.7598028182983398,
"fcm_dpo/delta": 0.22711437940597534,
"fcm_dpo/margin": 0.4485671818256378,
"fcm_dpo/q_t": 0.3598003685474396,
"grad_norm": 496.6513977050781,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": 0.09457789361476898,
"logits/rejected": 0.09252005815505981,
"logps/chosen": -71.65326690673828,
"logps/ref_chosen": -70.40977478027344,
"logps/ref_rejected": -74.39448547363281,
"logps/rejected": -76.08654022216797,
"loss": 1.2491,
"margin_dpo/margin_mean": 0.44856685400009155,
"margin_dpo/margin_std": 0.9060893058776855,
"step": 195
},
{
"epoch": 0.2962962962962963,
"fcm_dpo/beta": 1.8433669805526733,
"fcm_dpo/delta": 0.033393874764442444,
"fcm_dpo/margin": 0.5237653255462646,
"fcm_dpo/q_t": 0.34850770235061646,
"grad_norm": 410.0362854003906,
"learning_rate": 4.448676271745197e-07,
"logits/chosen": 0.1736685037612915,
"logits/rejected": 0.14138346910476685,
"logps/chosen": -60.27099609375,
"logps/ref_chosen": -59.227577209472656,
"logps/ref_rejected": -83.54757690429688,
"logps/rejected": -85.11476135253906,
"loss": 1.193,
"margin_dpo/margin_mean": 0.5237653255462646,
"margin_dpo/margin_std": 0.9456428289413452,
"step": 196
},
{
"epoch": 0.29780801209372637,
"fcm_dpo/beta": 1.8353400230407715,
"fcm_dpo/delta": -0.15863925218582153,
"fcm_dpo/margin": 0.6141480803489685,
"fcm_dpo/q_t": 0.33761459589004517,
"grad_norm": 520.2391967773438,
"learning_rate": 4.440366160729392e-07,
"logits/chosen": 0.23479902744293213,
"logits/rejected": 0.1985134482383728,
"logps/chosen": -52.59620666503906,
"logps/ref_chosen": -51.52912902832031,
"logps/ref_rejected": -73.70631408691406,
"logps/rejected": -75.38753509521484,
"loss": 1.2601,
"margin_dpo/margin_mean": 0.6141484975814819,
"margin_dpo/margin_std": 1.1263779401779175,
"step": 197
},
{
"epoch": 0.29931972789115646,
"fcm_dpo/beta": 1.6845048666000366,
"fcm_dpo/delta": -0.392536461353302,
"fcm_dpo/margin": 0.796295702457428,
"fcm_dpo/q_t": 0.2878139615058899,
"grad_norm": 392.1756591796875,
"learning_rate": 4.432001773500957e-07,
"logits/chosen": 0.19247442483901978,
"logits/rejected": 0.16175703704357147,
"logps/chosen": -60.77484893798828,
"logps/ref_chosen": -59.78268051147461,
"logps/ref_rejected": -72.24533081054688,
"logps/rejected": -74.03379821777344,
"loss": 0.8946,
"margin_dpo/margin_mean": 0.796296238899231,
"margin_dpo/margin_std": 1.0002985000610352,
"step": 198
},
{
"epoch": 0.30083144368858655,
"fcm_dpo/beta": 1.6466844081878662,
"fcm_dpo/delta": 0.005560420453548431,
"fcm_dpo/margin": 0.6032355427742004,
"fcm_dpo/q_t": 0.3430374562740326,
"grad_norm": 385.1544494628906,
"learning_rate": 4.4235833440297856e-07,
"logits/chosen": 0.13208839297294617,
"logits/rejected": 0.0626702532172203,
"logps/chosen": -57.460811614990234,
"logps/ref_chosen": -56.38677215576172,
"logps/ref_rejected": -74.56779479980469,
"logps/rejected": -76.24507141113281,
"loss": 1.0833,
"margin_dpo/margin_mean": 0.6032348275184631,
"margin_dpo/margin_std": 0.970983624458313,
"step": 199
},
{
"epoch": 0.30234315948601664,
"fcm_dpo/beta": 1.5712354183197021,
"fcm_dpo/delta": -0.1618885099887848,
"fcm_dpo/margin": 0.7240477800369263,
"fcm_dpo/q_t": 0.327957421541214,
"grad_norm": 412.8072204589844,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.1844407021999359,
"logits/rejected": 0.12967121601104736,
"logps/chosen": -58.72947692871094,
"logps/ref_chosen": -57.82432556152344,
"logps/ref_rejected": -89.28246307373047,
"logps/rejected": -90.91166687011719,
"loss": 1.0309,
"margin_dpo/margin_mean": 0.7240477800369263,
"margin_dpo/margin_std": 1.1023731231689453,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_fcm_dpo/beta": 1.5800285339355469,
"eval_logits/chosen": 0.16845357418060303,
"eval_logits/rejected": 0.13373498618602753,
"eval_logps/chosen": -75.8633041381836,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -81.05730438232422,
"eval_loss": 0.6114334464073181,
"eval_margin_dpo/margin_mean": 0.5044752359390259,
"eval_margin_dpo/margin_std": 1.028841495513916,
"eval_runtime": 38.0264,
"eval_samples_per_second": 60.563,
"eval_steps_per_second": 1.893,
"step": 200
},
{
"epoch": 0.30385487528344673,
"fcm_dpo/beta": 1.5763221979141235,
"fcm_dpo/delta": 0.00995655357837677,
"fcm_dpo/margin": 0.6285428404808044,
"fcm_dpo/q_t": 0.33469393849372864,
"grad_norm": 368.6453552246094,
"learning_rate": 4.4065853017905953e-07,
"logits/chosen": 0.20987267792224884,
"logits/rejected": 0.17573854327201843,
"logps/chosen": -60.036251068115234,
"logps/ref_chosen": -58.999759674072266,
"logps/ref_rejected": -84.67575073242188,
"logps/rejected": -86.34077453613281,
"loss": 0.9778,
"margin_dpo/margin_mean": 0.6285424828529358,
"margin_dpo/margin_std": 0.8790519833564758,
"step": 201
},
{
"epoch": 0.30536659108087677,
"fcm_dpo/beta": 1.5742418766021729,
"fcm_dpo/delta": -0.20220552384853363,
"fcm_dpo/margin": 0.7447031736373901,
"fcm_dpo/q_t": 0.306610643863678,
"grad_norm": 324.744384765625,
"learning_rate": 4.3980061644943575e-07,
"logits/chosen": 0.08034056425094604,
"logits/rejected": 0.031506434082984924,
"logps/chosen": -48.6182861328125,
"logps/ref_chosen": -47.660648345947266,
"logps/ref_rejected": -73.63249969482422,
"logps/rejected": -75.3348388671875,
"loss": 0.918,
"margin_dpo/margin_mean": 0.7447031736373901,
"margin_dpo/margin_std": 0.9296808242797852,
"step": 202
},
{
"epoch": 0.30687830687830686,
"fcm_dpo/beta": 1.5995709896087646,
"fcm_dpo/delta": 0.29017671942710876,
"fcm_dpo/margin": 0.45493584871292114,
"fcm_dpo/q_t": 0.3810551166534424,
"grad_norm": 469.91229248046875,
"learning_rate": 4.3893739358856455e-07,
"logits/chosen": 0.19242677092552185,
"logits/rejected": 0.14212460815906525,
"logps/chosen": -63.31700134277344,
"logps/ref_chosen": -62.32553482055664,
"logps/ref_rejected": -99.37226104736328,
"logps/rejected": -100.81866455078125,
"loss": 1.2402,
"margin_dpo/margin_mean": 0.454935759305954,
"margin_dpo/margin_std": 0.9751724004745483,
"step": 203
},
{
"epoch": 0.30839002267573695,
"fcm_dpo/beta": 1.5674870014190674,
"fcm_dpo/delta": -0.029434487223625183,
"fcm_dpo/margin": 0.6477770209312439,
"fcm_dpo/q_t": 0.3433707654476166,
"grad_norm": 368.2581787109375,
"learning_rate": 4.380688857426449e-07,
"logits/chosen": 0.0801263153553009,
"logits/rejected": 0.030903467908501625,
"logps/chosen": -51.68293762207031,
"logps/ref_chosen": -50.62931823730469,
"logps/ref_rejected": -66.60475158691406,
"logps/rejected": -68.30615234375,
"loss": 1.1195,
"margin_dpo/margin_mean": 0.6477770805358887,
"margin_dpo/margin_std": 1.0862679481506348,
"step": 204
},
{
"epoch": 0.30990173847316704,
"fcm_dpo/beta": 1.653544306755066,
"fcm_dpo/delta": 0.27683955430984497,
"fcm_dpo/margin": 0.450222909450531,
"fcm_dpo/q_t": 0.3771224319934845,
"grad_norm": 536.0993041992188,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": 0.16879329085350037,
"logits/rejected": 0.12696264684200287,
"logps/chosen": -71.54264831542969,
"logps/ref_chosen": -70.3561782836914,
"logps/ref_rejected": -93.39848327636719,
"logps/rejected": -95.03517150878906,
"loss": 1.3927,
"margin_dpo/margin_mean": 0.4502222239971161,
"margin_dpo/margin_std": 1.1024572849273682,
"step": 205
},
{
"epoch": 0.31141345427059713,
"fcm_dpo/beta": 1.7623982429504395,
"fcm_dpo/delta": 0.12155643105506897,
"fcm_dpo/margin": 0.49780306220054626,
"fcm_dpo/q_t": 0.3561771512031555,
"grad_norm": 486.98162841796875,
"learning_rate": 4.363161124189387e-07,
"logits/chosen": 0.18706491589546204,
"logits/rejected": 0.17341138422489166,
"logps/chosen": -68.82878112792969,
"logps/ref_chosen": -67.64547729492188,
"logps/ref_rejected": -79.89584350585938,
"logps/rejected": -81.57694244384766,
"loss": 1.2778,
"margin_dpo/margin_mean": 0.497803270816803,
"margin_dpo/margin_std": 0.9891307353973389,
"step": 206
},
{
"epoch": 0.3129251700680272,
"fcm_dpo/beta": 1.7180607318878174,
"fcm_dpo/delta": 0.0706307590007782,
"fcm_dpo/margin": 0.5409280061721802,
"fcm_dpo/q_t": 0.35449251532554626,
"grad_norm": 428.377685546875,
"learning_rate": 4.3543189596998986e-07,
"logits/chosen": 0.10714876651763916,
"logits/rejected": 0.05734197795391083,
"logps/chosen": -68.83715057373047,
"logps/ref_chosen": -67.66419219970703,
"logps/ref_rejected": -85.10249328613281,
"logps/rejected": -86.81637573242188,
"loss": 1.1493,
"margin_dpo/margin_mean": 0.5409282445907593,
"margin_dpo/margin_std": 1.005875825881958,
"step": 207
},
{
"epoch": 0.3144368858654573,
"fcm_dpo/beta": 1.7887952327728271,
"fcm_dpo/delta": 0.09666138887405396,
"fcm_dpo/margin": 0.509753942489624,
"fcm_dpo/q_t": 0.3555990755558014,
"grad_norm": 445.3408508300781,
"learning_rate": 4.3454249259229664e-07,
"logits/chosen": 0.13969993591308594,
"logits/rejected": 0.11949601769447327,
"logps/chosen": -58.69970703125,
"logps/ref_chosen": -57.731712341308594,
"logps/ref_rejected": -74.19276428222656,
"logps/rejected": -75.6705093383789,
"loss": 1.2387,
"margin_dpo/margin_mean": 0.509753406047821,
"margin_dpo/margin_std": 0.9776418209075928,
"step": 208
},
{
"epoch": 0.31594860166288735,
"fcm_dpo/beta": 1.7703063488006592,
"fcm_dpo/delta": -0.3099019527435303,
"fcm_dpo/margin": 0.7123583555221558,
"fcm_dpo/q_t": 0.3118298351764679,
"grad_norm": 475.9559020996094,
"learning_rate": 4.336479271643833e-07,
"logits/chosen": 0.08468753099441528,
"logits/rejected": 0.04272126033902168,
"logps/chosen": -69.55844116210938,
"logps/ref_chosen": -68.55007934570312,
"logps/ref_rejected": -87.90541076660156,
"logps/rejected": -89.62612915039062,
"loss": 1.059,
"margin_dpo/margin_mean": 0.7123589515686035,
"margin_dpo/margin_std": 1.0521396398544312,
"step": 209
},
{
"epoch": 0.31746031746031744,
"fcm_dpo/beta": 1.604590892791748,
"fcm_dpo/delta": -0.3447558283805847,
"fcm_dpo/margin": 0.8102937340736389,
"fcm_dpo/q_t": 0.31004101037979126,
"grad_norm": 367.3415832519531,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 0.17499999701976776,
"logits/rejected": 0.10814614593982697,
"logps/chosen": -58.340545654296875,
"logps/ref_chosen": -57.268272399902344,
"logps/ref_rejected": -85.72807312011719,
"logps/rejected": -87.61064147949219,
"loss": 0.9243,
"margin_dpo/margin_mean": 0.8102930784225464,
"margin_dpo/margin_std": 1.078457236289978,
"step": 210
},
{
"epoch": 0.31897203325774753,
"fcm_dpo/beta": 1.5751144886016846,
"fcm_dpo/delta": -0.008975658565759659,
"fcm_dpo/margin": 0.6399465799331665,
"fcm_dpo/q_t": 0.33030977845191956,
"grad_norm": 443.0260009765625,
"learning_rate": 4.3184341039326217e-07,
"logits/chosen": 0.1592363566160202,
"logits/rejected": 0.09597043693065643,
"logps/chosen": -54.56357192993164,
"logps/ref_chosen": -53.640708923339844,
"logps/ref_rejected": -93.0387954711914,
"logps/rejected": -94.60160827636719,
"loss": 0.9671,
"margin_dpo/margin_mean": 0.6399465799331665,
"margin_dpo/margin_std": 0.9168812036514282,
"step": 211
},
{
"epoch": 0.3204837490551776,
"fcm_dpo/beta": 1.564136028289795,
"fcm_dpo/delta": -0.06847534328699112,
"fcm_dpo/margin": 0.678282618522644,
"fcm_dpo/q_t": 0.3206981420516968,
"grad_norm": 370.3670349121094,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": 0.17273937165737152,
"logits/rejected": 0.11869757622480392,
"logps/chosen": -58.41630554199219,
"logps/ref_chosen": -57.36674499511719,
"logps/ref_rejected": -79.89643096923828,
"logps/rejected": -81.62427520751953,
"loss": 1.027,
"margin_dpo/margin_mean": 0.678282618522644,
"margin_dpo/margin_std": 1.0015318393707275,
"step": 212
},
{
"epoch": 0.3219954648526077,
"fcm_dpo/beta": 1.527937889099121,
"fcm_dpo/delta": -0.042714398354291916,
"fcm_dpo/margin": 0.6786133050918579,
"fcm_dpo/q_t": 0.3305957615375519,
"grad_norm": 391.22064208984375,
"learning_rate": 4.3001854756006724e-07,
"logits/chosen": 0.15932638943195343,
"logits/rejected": 0.138889878988266,
"logps/chosen": -66.10162353515625,
"logps/ref_chosen": -65.22111511230469,
"logps/ref_rejected": -80.1810302734375,
"logps/rejected": -81.74015045166016,
"loss": 1.0605,
"margin_dpo/margin_mean": 0.6786131858825684,
"margin_dpo/margin_std": 1.066502332687378,
"step": 213
},
{
"epoch": 0.3235071806500378,
"fcm_dpo/beta": 1.542710781097412,
"fcm_dpo/delta": -0.057289645075798035,
"fcm_dpo/margin": 0.6799356937408447,
"fcm_dpo/q_t": 0.33246564865112305,
"grad_norm": 433.1566467285156,
"learning_rate": 4.290985500881143e-07,
"logits/chosen": 0.06008949503302574,
"logits/rejected": 0.0402056947350502,
"logps/chosen": -62.26348114013672,
"logps/ref_chosen": -61.292327880859375,
"logps/ref_rejected": -67.69841003417969,
"logps/rejected": -69.3494873046875,
"loss": 1.0116,
"margin_dpo/margin_mean": 0.679936408996582,
"margin_dpo/margin_std": 1.0450165271759033,
"step": 214
},
{
"epoch": 0.3250188964474679,
"fcm_dpo/beta": 1.5210988521575928,
"fcm_dpo/delta": -0.0631207600235939,
"fcm_dpo/margin": 0.6934947371482849,
"fcm_dpo/q_t": 0.33989161252975464,
"grad_norm": 409.9744567871094,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": 0.08377814292907715,
"logits/rejected": 0.010123915039002895,
"logps/chosen": -64.82623291015625,
"logps/ref_chosen": -63.869136810302734,
"logps/ref_rejected": -98.7657241821289,
"logps/rejected": -100.41632080078125,
"loss": 1.0766,
"margin_dpo/margin_mean": 0.6934951543807983,
"margin_dpo/margin_std": 1.1064403057098389,
"step": 215
},
{
"epoch": 0.32653061224489793,
"fcm_dpo/beta": 1.4754486083984375,
"fcm_dpo/delta": -0.2396281659603119,
"fcm_dpo/margin": 0.8188655972480774,
"fcm_dpo/q_t": 0.34051093459129333,
"grad_norm": 398.4416198730469,
"learning_rate": 4.2724355170431247e-07,
"logits/chosen": 0.16970112919807434,
"logits/rejected": 0.10684916377067566,
"logps/chosen": -68.89656829833984,
"logps/ref_chosen": -67.824951171875,
"logps/ref_rejected": -96.40231323242188,
"logps/rejected": -98.29280090332031,
"loss": 1.1031,
"margin_dpo/margin_mean": 0.8188657760620117,
"margin_dpo/margin_std": 1.4882557392120361,
"step": 216
},
{
"epoch": 0.328042328042328,
"fcm_dpo/beta": 1.344929575920105,
"fcm_dpo/delta": -0.35066401958465576,
"fcm_dpo/margin": 0.9668929576873779,
"fcm_dpo/q_t": 0.28799083828926086,
"grad_norm": 300.48565673828125,
"learning_rate": 4.26308602680756e-07,
"logits/chosen": 0.11938208341598511,
"logits/rejected": 0.03593681752681732,
"logps/chosen": -61.565162658691406,
"logps/ref_chosen": -60.5049934387207,
"logps/ref_rejected": -84.26618194580078,
"logps/rejected": -86.29324340820312,
"loss": 0.8155,
"margin_dpo/margin_mean": 0.966893196105957,
"margin_dpo/margin_std": 1.1881346702575684,
"step": 217
},
{
"epoch": 0.3295540438397581,
"fcm_dpo/beta": 1.4142457246780396,
"fcm_dpo/delta": 0.2700718343257904,
"fcm_dpo/margin": 0.5216431617736816,
"fcm_dpo/q_t": 0.3781411647796631,
"grad_norm": 394.3163757324219,
"learning_rate": 4.253687219265803e-07,
"logits/chosen": 0.024805322289466858,
"logits/rejected": 0.01834661327302456,
"logps/chosen": -71.73900604248047,
"logps/ref_chosen": -70.59431457519531,
"logps/ref_rejected": -73.89038848876953,
"logps/rejected": -75.55671691894531,
"loss": 1.2642,
"margin_dpo/margin_mean": 0.5216437578201294,
"margin_dpo/margin_std": 1.1325141191482544,
"step": 218
},
{
"epoch": 0.3310657596371882,
"fcm_dpo/beta": 1.4622104167938232,
"fcm_dpo/delta": 0.27110588550567627,
"fcm_dpo/margin": 0.5126116275787354,
"fcm_dpo/q_t": 0.36937472224235535,
"grad_norm": 361.056884765625,
"learning_rate": 4.2442393573227043e-07,
"logits/chosen": 0.08768868446350098,
"logits/rejected": 0.05643084645271301,
"logps/chosen": -61.44779586791992,
"logps/ref_chosen": -60.490943908691406,
"logps/ref_rejected": -75.85001373291016,
"logps/rejected": -77.31947326660156,
"loss": 1.0874,
"margin_dpo/margin_mean": 0.512610912322998,
"margin_dpo/margin_std": 0.8701074123382568,
"step": 219
},
{
"epoch": 0.3325774754346183,
"fcm_dpo/beta": 1.4802911281585693,
"fcm_dpo/delta": 0.026991277933120728,
"fcm_dpo/margin": 0.6585407257080078,
"fcm_dpo/q_t": 0.33476772904396057,
"grad_norm": 283.6431579589844,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 0.17591653764247894,
"logits/rejected": 0.13129651546478271,
"logps/chosen": -46.09846496582031,
"logps/ref_chosen": -45.013397216796875,
"logps/ref_rejected": -70.49369812011719,
"logps/rejected": -72.23731231689453,
"loss": 0.9934,
"margin_dpo/margin_mean": 0.6585406064987183,
"margin_dpo/margin_std": 0.9634271264076233,
"step": 220
},
{
"epoch": 0.3340891912320484,
"fcm_dpo/beta": 1.486309289932251,
"fcm_dpo/delta": -0.12030621618032455,
"fcm_dpo/margin": 0.7435950636863708,
"fcm_dpo/q_t": 0.32344067096710205,
"grad_norm": 338.2693176269531,
"learning_rate": 4.22519752870528e-07,
"logits/chosen": 0.16085302829742432,
"logits/rejected": 0.11432051658630371,
"logps/chosen": -60.045074462890625,
"logps/ref_chosen": -59.09584045410156,
"logps/ref_rejected": -88.64388275146484,
"logps/rejected": -90.33671569824219,
"loss": 0.9562,
"margin_dpo/margin_mean": 0.7435950636863708,
"margin_dpo/margin_std": 1.0672008991241455,
"step": 221
},
{
"epoch": 0.3356009070294785,
"fcm_dpo/beta": 1.4272222518920898,
"fcm_dpo/delta": -0.1927495002746582,
"fcm_dpo/margin": 0.8200865983963013,
"fcm_dpo/q_t": 0.2971838712692261,
"grad_norm": 337.138671875,
"learning_rate": 4.2156040946718343e-07,
"logits/chosen": 0.16604246199131012,
"logits/rejected": 0.10780028998851776,
"logps/chosen": -57.0385627746582,
"logps/ref_chosen": -55.9976921081543,
"logps/ref_rejected": -111.94727325439453,
"logps/rejected": -113.8082275390625,
"loss": 0.856,
"margin_dpo/margin_mean": 0.8200874328613281,
"margin_dpo/margin_std": 0.9770439863204956,
"step": 222
},
{
"epoch": 0.3371126228269085,
"fcm_dpo/beta": 1.3298912048339844,
"fcm_dpo/delta": -0.2783888578414917,
"fcm_dpo/margin": 0.9309906363487244,
"fcm_dpo/q_t": 0.2817830443382263,
"grad_norm": 263.7952880859375,
"learning_rate": 4.2059626715039065e-07,
"logits/chosen": 0.18012914061546326,
"logits/rejected": 0.13743728399276733,
"logps/chosen": -60.831424713134766,
"logps/ref_chosen": -59.891422271728516,
"logps/ref_rejected": -86.28954315185547,
"logps/rejected": -88.16053771972656,
"loss": 0.7994,
"margin_dpo/margin_mean": 0.9309903383255005,
"margin_dpo/margin_std": 0.9911828637123108,
"step": 223
},
{
"epoch": 0.3386243386243386,
"fcm_dpo/beta": 1.3959991931915283,
"fcm_dpo/delta": 0.3523348867893219,
"fcm_dpo/margin": 0.48118603229522705,
"fcm_dpo/q_t": 0.3791411519050598,
"grad_norm": 364.8034362792969,
"learning_rate": 4.1962735288928304e-07,
"logits/chosen": 0.19306407868862152,
"logits/rejected": 0.17875471711158752,
"logps/chosen": -65.12818908691406,
"logps/ref_chosen": -64.04463195800781,
"logps/ref_rejected": -75.05450439453125,
"logps/rejected": -76.61923217773438,
"loss": 1.0923,
"margin_dpo/margin_mean": 0.4811859726905823,
"margin_dpo/margin_std": 0.8308462500572205,
"step": 224
},
{
"epoch": 0.3401360544217687,
"fcm_dpo/beta": 1.395388126373291,
"fcm_dpo/delta": -0.05797035992145538,
"fcm_dpo/margin": 0.7516753673553467,
"fcm_dpo/q_t": 0.3332204222679138,
"grad_norm": 468.0265197753906,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": 0.1659896969795227,
"logits/rejected": 0.07978636771440506,
"logps/chosen": -67.14776611328125,
"logps/ref_chosen": -66.0958251953125,
"logps/ref_rejected": -97.68675231933594,
"logps/rejected": -99.49036407470703,
"loss": 1.0067,
"margin_dpo/margin_mean": 0.7516759634017944,
"margin_dpo/margin_std": 1.1150200366973877,
"step": 225
},
{
"epoch": 0.3416477702191988,
"fcm_dpo/beta": 1.4121769666671753,
"fcm_dpo/delta": 0.040757764130830765,
"fcm_dpo/margin": 0.6820341348648071,
"fcm_dpo/q_t": 0.3408370018005371,
"grad_norm": 292.9568176269531,
"learning_rate": 4.176753170773052e-07,
"logits/chosen": 0.18655280768871307,
"logits/rejected": 0.15081270039081573,
"logps/chosen": -52.478660583496094,
"logps/ref_chosen": -51.4168701171875,
"logps/ref_rejected": -66.30068969726562,
"logps/rejected": -68.04450988769531,
"loss": 1.0394,
"margin_dpo/margin_mean": 0.6820334792137146,
"margin_dpo/margin_std": 1.0649542808532715,
"step": 226
},
{
"epoch": 0.3431594860166289,
"fcm_dpo/beta": 1.4467318058013916,
"fcm_dpo/delta": 0.14849498867988586,
"fcm_dpo/margin": 0.5975882411003113,
"fcm_dpo/q_t": 0.3471581041812897,
"grad_norm": 412.5190734863281,
"learning_rate": 4.166922501290729e-07,
"logits/chosen": 0.20183053612709045,
"logits/rejected": 0.17074134945869446,
"logps/chosen": -59.01850891113281,
"logps/ref_chosen": -57.989776611328125,
"logps/ref_rejected": -75.05464172363281,
"logps/rejected": -76.68096160888672,
"loss": 1.1633,
"margin_dpo/margin_mean": 0.5975878238677979,
"margin_dpo/margin_std": 1.1138098239898682,
"step": 227
},
{
"epoch": 0.34467120181405897,
"fcm_dpo/beta": 1.4542537927627563,
"fcm_dpo/delta": -0.03622462600469589,
"fcm_dpo/margin": 0.7099840641021729,
"fcm_dpo/q_t": 0.3378611207008362,
"grad_norm": 351.0310974121094,
"learning_rate": 4.1570452044027405e-07,
"logits/chosen": 0.17106792330741882,
"logits/rejected": 0.11330675333738327,
"logps/chosen": -56.669921875,
"logps/ref_chosen": -55.55936813354492,
"logps/ref_rejected": -77.02364349365234,
"logps/rejected": -78.84417724609375,
"loss": 1.1439,
"margin_dpo/margin_mean": 0.709984540939331,
"margin_dpo/margin_std": 1.241539716720581,
"step": 228
},
{
"epoch": 0.34618291761148906,
"fcm_dpo/beta": 1.4833769798278809,
"fcm_dpo/delta": 0.18256068229675293,
"fcm_dpo/margin": 0.5615620613098145,
"fcm_dpo/q_t": 0.3450517952442169,
"grad_norm": 629.395263671875,
"learning_rate": 4.147121556398312e-07,
"logits/chosen": 0.23741164803504944,
"logits/rejected": 0.19696751236915588,
"logps/chosen": -51.810546875,
"logps/ref_chosen": -50.79466247558594,
"logps/ref_rejected": -78.4474105834961,
"logps/rejected": -80.02485656738281,
"loss": 1.0995,
"margin_dpo/margin_mean": 0.5615620613098145,
"margin_dpo/margin_std": 0.9383633732795715,
"step": 229
},
{
"epoch": 0.3476946334089191,
"fcm_dpo/beta": 1.5409061908721924,
"fcm_dpo/delta": 0.06196488440036774,
"fcm_dpo/margin": 0.6093405485153198,
"fcm_dpo/q_t": 0.35204917192459106,
"grad_norm": 399.1847839355469,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 0.1482779085636139,
"logits/rejected": 0.14588508009910583,
"logps/chosen": -57.8225212097168,
"logps/ref_chosen": -56.729225158691406,
"logps/ref_rejected": -62.99180603027344,
"logps/rejected": -64.69444274902344,
"loss": 1.1512,
"margin_dpo/margin_mean": 0.6093416213989258,
"margin_dpo/margin_std": 1.131590485572815,
"step": 230
},
{
"epoch": 0.3492063492063492,
"fcm_dpo/beta": 1.4233132600784302,
"fcm_dpo/delta": -0.39863085746765137,
"fcm_dpo/margin": 0.9415854215621948,
"fcm_dpo/q_t": 0.27584952116012573,
"grad_norm": 315.82147216796875,
"learning_rate": 4.1271363186719835e-07,
"logits/chosen": 0.11343254894018173,
"logits/rejected": 0.09590326249599457,
"logps/chosen": -73.67927551269531,
"logps/ref_chosen": -72.59709930419922,
"logps/ref_rejected": -86.2322998046875,
"logps/rejected": -88.25605773925781,
"loss": 0.7891,
"margin_dpo/margin_mean": 0.9415853023529053,
"margin_dpo/margin_std": 1.024916172027588,
"step": 231
},
{
"epoch": 0.3507180650037793,
"fcm_dpo/beta": 1.3663169145584106,
"fcm_dpo/delta": -0.1273672878742218,
"fcm_dpo/margin": 0.8124350309371948,
"fcm_dpo/q_t": 0.33132317662239075,
"grad_norm": 345.0347595214844,
"learning_rate": 4.1170752879801436e-07,
"logits/chosen": 0.14546144008636475,
"logits/rejected": 0.12237675487995148,
"logps/chosen": -69.10570526123047,
"logps/ref_chosen": -68.1185302734375,
"logps/ref_rejected": -83.79415893554688,
"logps/rejected": -85.5937728881836,
"loss": 1.0339,
"margin_dpo/margin_mean": 0.812435507774353,
"margin_dpo/margin_std": 1.2407793998718262,
"step": 232
},
{
"epoch": 0.35222978080120937,
"fcm_dpo/beta": 1.4025087356567383,
"fcm_dpo/delta": 0.2927402853965759,
"fcm_dpo/margin": 0.5172353386878967,
"fcm_dpo/q_t": 0.3894173204898834,
"grad_norm": 380.82867431640625,
"learning_rate": 4.106969024216348e-07,
"logits/chosen": 0.10903730243444443,
"logits/rejected": 0.06897353380918503,
"logps/chosen": -56.4796142578125,
"logps/ref_chosen": -55.070152282714844,
"logps/ref_rejected": -66.61845397949219,
"logps/rejected": -68.5451431274414,
"loss": 1.212,
"margin_dpo/margin_mean": 0.5172350406646729,
"margin_dpo/margin_std": 1.0877723693847656,
"step": 233
},
{
"epoch": 0.35374149659863946,
"fcm_dpo/beta": 1.5282173156738281,
"fcm_dpo/delta": 0.27392610907554626,
"fcm_dpo/margin": 0.48579320311546326,
"fcm_dpo/q_t": 0.3870254158973694,
"grad_norm": 388.5447082519531,
"learning_rate": 4.09681781007452e-07,
"logits/chosen": 0.09753985702991486,
"logits/rejected": 0.08481541275978088,
"logps/chosen": -57.07015609741211,
"logps/ref_chosen": -55.92589569091797,
"logps/ref_rejected": -51.11608123779297,
"logps/rejected": -52.746131896972656,
"loss": 1.2257,
"margin_dpo/margin_mean": 0.48579323291778564,
"margin_dpo/margin_std": 1.005543828010559,
"step": 234
},
{
"epoch": 0.35525321239606955,
"fcm_dpo/beta": 1.4440486431121826,
"fcm_dpo/delta": -0.3470792770385742,
"fcm_dpo/margin": 0.8979411721229553,
"fcm_dpo/q_t": 0.2706488370895386,
"grad_norm": 331.3047180175781,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": 0.18871155381202698,
"logits/rejected": 0.17248067259788513,
"logps/chosen": -65.49934387207031,
"logps/ref_chosen": -64.53972625732422,
"logps/ref_rejected": -77.69151306152344,
"logps/rejected": -79.549072265625,
"loss": 0.7827,
"margin_dpo/margin_mean": 0.897940993309021,
"margin_dpo/margin_std": 0.9550020694732666,
"step": 235
},
{
"epoch": 0.35676492819349964,
"fcm_dpo/beta": 1.4077339172363281,
"fcm_dpo/delta": -0.08833800256252289,
"fcm_dpo/margin": 0.7657995223999023,
"fcm_dpo/q_t": 0.34863966703414917,
"grad_norm": 387.498291015625,
"learning_rate": 4.076381667711306e-07,
"logits/chosen": 0.10846032202243805,
"logits/rejected": 0.09852010011672974,
"logps/chosen": -72.46954345703125,
"logps/ref_chosen": -71.15473937988281,
"logps/ref_rejected": -84.88541412353516,
"logps/rejected": -86.96601867675781,
"loss": 1.1004,
"margin_dpo/margin_mean": 0.7657992839813232,
"margin_dpo/margin_std": 1.3017526865005493,
"step": 236
},
{
"epoch": 0.35827664399092973,
"fcm_dpo/beta": 1.3855292797088623,
"fcm_dpo/delta": -0.0012986212968826294,
"fcm_dpo/margin": 0.7191513776779175,
"fcm_dpo/q_t": 0.3367508053779602,
"grad_norm": 383.1600036621094,
"learning_rate": 4.066097311132753e-07,
"logits/chosen": 0.20629596710205078,
"logits/rejected": 0.19376662373542786,
"logps/chosen": -77.21894836425781,
"logps/ref_chosen": -76.14201354980469,
"logps/ref_rejected": -80.88479614257812,
"logps/rejected": -82.6808853149414,
"loss": 1.1096,
"margin_dpo/margin_mean": 0.7191513180732727,
"margin_dpo/margin_std": 1.1922106742858887,
"step": 237
},
{
"epoch": 0.35978835978835977,
"fcm_dpo/beta": 1.488319993019104,
"fcm_dpo/delta": 0.24613891541957855,
"fcm_dpo/margin": 0.5112044811248779,
"fcm_dpo/q_t": 0.3709501624107361,
"grad_norm": 2477.388916015625,
"learning_rate": 4.0557691474458414e-07,
"logits/chosen": 0.13462495803833008,
"logits/rejected": 0.11732495576143265,
"logps/chosen": -70.04851531982422,
"logps/ref_chosen": -68.88484954833984,
"logps/ref_rejected": -75.8946304321289,
"logps/rejected": -77.56949615478516,
"loss": 1.4557,
"margin_dpo/margin_mean": 0.5112046003341675,
"margin_dpo/margin_std": 1.32561194896698,
"step": 238
},
{
"epoch": 0.36130007558578986,
"fcm_dpo/beta": 1.4520866870880127,
"fcm_dpo/delta": -0.12432458996772766,
"fcm_dpo/margin": 0.7647981643676758,
"fcm_dpo/q_t": 0.31985604763031006,
"grad_norm": 413.1396789550781,
"learning_rate": 4.045397465551513e-07,
"logits/chosen": 0.19768695533275604,
"logits/rejected": 0.11818195134401321,
"logps/chosen": -58.036109924316406,
"logps/ref_chosen": -56.771827697753906,
"logps/ref_rejected": -116.23050689697266,
"logps/rejected": -118.25959014892578,
"loss": 1.0398,
"margin_dpo/margin_mean": 0.764798641204834,
"margin_dpo/margin_std": 1.1497983932495117,
"step": 239
},
{
"epoch": 0.36281179138321995,
"fcm_dpo/beta": 1.4255015850067139,
"fcm_dpo/delta": -0.19121024012565613,
"fcm_dpo/margin": 0.8184474110603333,
"fcm_dpo/q_t": 0.30751824378967285,
"grad_norm": 311.9652404785156,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 0.14041496813297272,
"logits/rejected": 0.07365534454584122,
"logps/chosen": -54.64623260498047,
"logps/ref_chosen": -53.35411071777344,
"logps/ref_rejected": -80.12019348144531,
"logps/rejected": -82.23076629638672,
"loss": 0.9905,
"margin_dpo/margin_mean": 0.8184475898742676,
"margin_dpo/margin_std": 1.1477313041687012,
"step": 240
},
{
"epoch": 0.36432350718065004,
"fcm_dpo/beta": 1.5252119302749634,
"fcm_dpo/delta": 0.4934987425804138,
"fcm_dpo/margin": 0.336672842502594,
"fcm_dpo/q_t": 0.4018644094467163,
"grad_norm": 527.2469482421875,
"learning_rate": 4.0245247088227377e-07,
"logits/chosen": 0.12555167078971863,
"logits/rejected": 0.09838816523551941,
"logps/chosen": -73.14974975585938,
"logps/ref_chosen": -71.89541625976562,
"logps/ref_rejected": -83.03492736816406,
"logps/rejected": -84.62593078613281,
"loss": 1.4036,
"margin_dpo/margin_mean": 0.33667343854904175,
"margin_dpo/margin_std": 0.9868639707565308,
"step": 241
},
{
"epoch": 0.36583522297808013,
"fcm_dpo/beta": 1.443713903427124,
"fcm_dpo/delta": -0.2312009036540985,
"fcm_dpo/margin": 0.8218022584915161,
"fcm_dpo/q_t": 0.31463247537612915,
"grad_norm": 314.4766845703125,
"learning_rate": 4.0140242178441665e-07,
"logits/chosen": 0.1157296746969223,
"logits/rejected": 0.09765278548002243,
"logps/chosen": -58.899356842041016,
"logps/ref_chosen": -57.927433013916016,
"logps/ref_rejected": -67.838623046875,
"logps/rejected": -69.6323471069336,
"loss": 0.9264,
"margin_dpo/margin_mean": 0.8218023180961609,
"margin_dpo/margin_std": 1.0703468322753906,
"step": 242
},
{
"epoch": 0.3673469387755102,
"fcm_dpo/beta": 1.4689741134643555,
"fcm_dpo/delta": 0.049783095717430115,
"fcm_dpo/margin": 0.6500656604766846,
"fcm_dpo/q_t": 0.3428027033805847,
"grad_norm": 388.419921875,
"learning_rate": 4.003481376353596e-07,
"logits/chosen": 0.10413776338100433,
"logits/rejected": 0.09902875125408173,
"logps/chosen": -75.4229965209961,
"logps/ref_chosen": -74.27667236328125,
"logps/ref_rejected": -73.24340057373047,
"logps/rejected": -75.03977966308594,
"loss": 1.1186,
"margin_dpo/margin_mean": 0.650065541267395,
"margin_dpo/margin_std": 1.1026396751403809,
"step": 243
},
{
"epoch": 0.3688586545729403,
"fcm_dpo/beta": 1.4141755104064941,
"fcm_dpo/delta": -0.2628709375858307,
"fcm_dpo/margin": 0.8689752817153931,
"fcm_dpo/q_t": 0.28472983837127686,
"grad_norm": 275.4247131347656,
"learning_rate": 3.9928964792569654e-07,
"logits/chosen": 0.15633343160152435,
"logits/rejected": 0.09156134724617004,
"logps/chosen": -54.57572555541992,
"logps/ref_chosen": -53.36390686035156,
"logps/ref_rejected": -71.10276794433594,
"logps/rejected": -73.18356323242188,
"loss": 0.7643,
"margin_dpo/margin_mean": 0.8689748048782349,
"margin_dpo/margin_std": 0.9518204927444458,
"step": 244
},
{
"epoch": 0.37037037037037035,
"fcm_dpo/beta": 1.3580291271209717,
"fcm_dpo/delta": -0.23086267709732056,
"fcm_dpo/margin": 0.8861613273620605,
"fcm_dpo/q_t": 0.3000994920730591,
"grad_norm": 464.6539611816406,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": 0.16047267615795135,
"logits/rejected": 0.14088091254234314,
"logps/chosen": -72.37004089355469,
"logps/ref_chosen": -71.19510650634766,
"logps/ref_rejected": -80.76235961914062,
"logps/rejected": -82.82345581054688,
"loss": 0.9896,
"margin_dpo/margin_mean": 0.8861616849899292,
"margin_dpo/margin_std": 1.2600171566009521,
"step": 245
},
{
"epoch": 0.37188208616780044,
"fcm_dpo/beta": 1.2877942323684692,
"fcm_dpo/delta": -0.19463737308979034,
"fcm_dpo/margin": 0.9072257280349731,
"fcm_dpo/q_t": 0.3076491057872772,
"grad_norm": 311.3534240722656,
"learning_rate": 3.971601703742932e-07,
"logits/chosen": 0.15642526745796204,
"logits/rejected": 0.11311867088079453,
"logps/chosen": -72.98123168945312,
"logps/ref_chosen": -71.62104797363281,
"logps/ref_rejected": -94.03392028808594,
"logps/rejected": -96.30133056640625,
"loss": 0.8849,
"margin_dpo/margin_mean": 0.9072257876396179,
"margin_dpo/margin_std": 1.1439390182495117,
"step": 246
},
{
"epoch": 0.37339380196523053,
"fcm_dpo/beta": 1.385887622833252,
"fcm_dpo/delta": 0.3913596272468567,
"fcm_dpo/margin": 0.44914084672927856,
"fcm_dpo/q_t": 0.3937835395336151,
"grad_norm": 441.7315673828125,
"learning_rate": 3.960892420986177e-07,
"logits/chosen": 0.14804767072200775,
"logits/rejected": 0.13864608108997345,
"logps/chosen": -81.47723388671875,
"logps/ref_chosen": -80.02254486083984,
"logps/ref_rejected": -89.22705841064453,
"logps/rejected": -91.13088989257812,
"loss": 1.3732,
"margin_dpo/margin_mean": 0.44914010167121887,
"margin_dpo/margin_std": 1.1773467063903809,
"step": 247
},
{
"epoch": 0.3749055177626606,
"fcm_dpo/beta": 1.4095741510391235,
"fcm_dpo/delta": 0.022829867899417877,
"fcm_dpo/margin": 0.6935802698135376,
"fcm_dpo/q_t": 0.3401643633842468,
"grad_norm": 382.5201110839844,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": 0.1225530132651329,
"logits/rejected": 0.14110150933265686,
"logps/chosen": -66.71062469482422,
"logps/ref_chosen": -65.37796020507812,
"logps/ref_rejected": -61.365787506103516,
"logps/rejected": -63.392032623291016,
"loss": 1.0538,
"margin_dpo/margin_mean": 0.6935799717903137,
"margin_dpo/margin_std": 1.1022088527679443,
"step": 248
},
{
"epoch": 0.3764172335600907,
"fcm_dpo/beta": 1.5373433828353882,
"fcm_dpo/delta": 0.5078557133674622,
"fcm_dpo/margin": 0.3260525166988373,
"fcm_dpo/q_t": 0.40423983335494995,
"grad_norm": 464.44989013671875,
"learning_rate": 3.9393515632731094e-07,
"logits/chosen": 0.10547161102294922,
"logits/rejected": 0.12796463072299957,
"logps/chosen": -76.10067749023438,
"logps/ref_chosen": -74.60145568847656,
"logps/ref_rejected": -63.79338455200195,
"logps/rejected": -65.61865234375,
"loss": 1.4834,
"margin_dpo/margin_mean": 0.3260522484779358,
"margin_dpo/margin_std": 1.0597259998321533,
"step": 249
},
{
"epoch": 0.3779289493575208,
"fcm_dpo/beta": 1.4959361553192139,
"fcm_dpo/delta": -0.35001400113105774,
"fcm_dpo/margin": 0.871913731098175,
"fcm_dpo/q_t": 0.305349200963974,
"grad_norm": 331.92315673828125,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 0.21487398445606232,
"logits/rejected": 0.18293528258800507,
"logps/chosen": -63.18694305419922,
"logps/ref_chosen": -61.938209533691406,
"logps/ref_rejected": -72.21602630615234,
"logps/rejected": -74.336669921875,
"loss": 0.9596,
"margin_dpo/margin_mean": 0.8719134330749512,
"margin_dpo/margin_std": 1.247259259223938,
"step": 250
},
{
"epoch": 0.3794406651549509,
"fcm_dpo/beta": 1.4492324590682983,
"fcm_dpo/delta": 0.03792502358555794,
"fcm_dpo/margin": 0.6663841009140015,
"fcm_dpo/q_t": 0.3597896099090576,
"grad_norm": 376.5051574707031,
"learning_rate": 3.9176496596569265e-07,
"logits/chosen": 0.2115197777748108,
"logits/rejected": 0.1815691590309143,
"logps/chosen": -68.17942810058594,
"logps/ref_chosen": -66.85694885253906,
"logps/ref_rejected": -84.83396911621094,
"logps/rejected": -86.82284545898438,
"loss": 1.1816,
"margin_dpo/margin_mean": 0.6663837432861328,
"margin_dpo/margin_std": 1.2780930995941162,
"step": 251
},
{
"epoch": 0.38095238095238093,
"fcm_dpo/beta": 1.496967077255249,
"fcm_dpo/delta": -0.03972265124320984,
"fcm_dpo/margin": 0.6779400110244751,
"fcm_dpo/q_t": 0.32841235399246216,
"grad_norm": 365.8186340332031,
"learning_rate": 3.9067390737445254e-07,
"logits/chosen": 0.12412711977958679,
"logits/rejected": 0.0818972960114479,
"logps/chosen": -57.51960372924805,
"logps/ref_chosen": -56.22393035888672,
"logps/ref_rejected": -77.1136245727539,
"logps/rejected": -79.08723449707031,
"loss": 1.2777,
"margin_dpo/margin_mean": 0.6779407262802124,
"margin_dpo/margin_std": 1.3109935522079468,
"step": 252
},
{
"epoch": 0.382464096749811,
"fcm_dpo/beta": 1.3989202976226807,
"fcm_dpo/delta": -0.0833391547203064,
"fcm_dpo/margin": 0.7623114585876465,
"fcm_dpo/q_t": 0.3369859755039215,
"grad_norm": 290.3008117675781,
"learning_rate": 3.8957891383162304e-07,
"logits/chosen": 0.17171865701675415,
"logits/rejected": 0.14320127665996552,
"logps/chosen": -53.48502731323242,
"logps/ref_chosen": -52.21001434326172,
"logps/ref_rejected": -58.75764846801758,
"logps/rejected": -60.79496765136719,
"loss": 1.0203,
"margin_dpo/margin_mean": 0.762311577796936,
"margin_dpo/margin_std": 1.1594964265823364,
"step": 253
},
{
"epoch": 0.3839758125472411,
"fcm_dpo/beta": 1.3931760787963867,
"fcm_dpo/delta": -0.08251707255840302,
"fcm_dpo/margin": 0.7698103189468384,
"fcm_dpo/q_t": 0.33657705783843994,
"grad_norm": 385.88311767578125,
"learning_rate": 3.884800159665276e-07,
"logits/chosen": 0.13405011594295502,
"logits/rejected": 0.09684339165687561,
"logps/chosen": -66.99877166748047,
"logps/ref_chosen": -65.63632202148438,
"logps/ref_rejected": -82.34425354003906,
"logps/rejected": -84.47650146484375,
"loss": 1.1088,
"margin_dpo/margin_mean": 0.7698097229003906,
"margin_dpo/margin_std": 1.271782398223877,
"step": 254
},
{
"epoch": 0.3854875283446712,
"fcm_dpo/beta": 1.3778660297393799,
"fcm_dpo/delta": -0.019536815583705902,
"fcm_dpo/margin": 0.7369774580001831,
"fcm_dpo/q_t": 0.33573752641677856,
"grad_norm": 347.2344055175781,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": 0.1548142433166504,
"logits/rejected": 0.12779046595096588,
"logps/chosen": -69.03644561767578,
"logps/ref_chosen": -67.91108703613281,
"logps/ref_rejected": -83.89114379882812,
"logps/rejected": -85.75347900390625,
"loss": 1.068,
"margin_dpo/margin_mean": 0.7369774580001831,
"margin_dpo/margin_std": 1.1885807514190674,
"step": 255
},
{
"epoch": 0.3869992441421013,
"fcm_dpo/beta": 1.396597146987915,
"fcm_dpo/delta": -0.011127792298793793,
"fcm_dpo/margin": 0.7227224111557007,
"fcm_dpo/q_t": 0.3369947075843811,
"grad_norm": 397.8918762207031,
"learning_rate": 3.862706303320329e-07,
"logits/chosen": 0.1215638667345047,
"logits/rejected": 0.08205322921276093,
"logps/chosen": -64.94349670410156,
"logps/ref_chosen": -63.49998474121094,
"logps/ref_rejected": -90.77104187011719,
"logps/rejected": -92.93727111816406,
"loss": 1.0738,
"margin_dpo/margin_mean": 0.7227222323417664,
"margin_dpo/margin_std": 1.203334927558899,
"step": 256
},
{
"epoch": 0.3885109599395314,
"fcm_dpo/beta": 1.2768826484680176,
"fcm_dpo/delta": -0.461418092250824,
"fcm_dpo/margin": 1.086154580116272,
"fcm_dpo/q_t": 0.28833481669425964,
"grad_norm": 322.4739685058594,
"learning_rate": 3.851602043638994e-07,
"logits/chosen": 0.1613893210887909,
"logits/rejected": 0.11499130725860596,
"logps/chosen": -71.91011810302734,
"logps/ref_chosen": -70.60064697265625,
"logps/ref_rejected": -108.58313751220703,
"logps/rejected": -110.978759765625,
"loss": 0.9387,
"margin_dpo/margin_mean": 1.0861549377441406,
"margin_dpo/margin_std": 1.5388684272766113,
"step": 257
},
{
"epoch": 0.3900226757369615,
"fcm_dpo/beta": 1.2841336727142334,
"fcm_dpo/delta": 0.00753195583820343,
"fcm_dpo/margin": 0.7705293297767639,
"fcm_dpo/q_t": 0.32425814867019653,
"grad_norm": 300.5933837890625,
"learning_rate": 3.840459976743023e-07,
"logits/chosen": 0.1647588312625885,
"logits/rejected": 0.1312163770198822,
"logps/chosen": -60.74152374267578,
"logps/ref_chosen": -59.25416564941406,
"logps/ref_rejected": -85.58709716796875,
"logps/rejected": -87.84498596191406,
"loss": 0.933,
"margin_dpo/margin_mean": 0.7705295085906982,
"margin_dpo/margin_std": 1.0195441246032715,
"step": 258
},
{
"epoch": 0.3915343915343915,
"fcm_dpo/beta": 1.1445305347442627,
"fcm_dpo/delta": -0.5759162902832031,
"fcm_dpo/margin": 1.2901654243469238,
"fcm_dpo/q_t": 0.2562459111213684,
"grad_norm": 223.67605590820312,
"learning_rate": 3.8292804142999796e-07,
"logits/chosen": 0.09316843003034592,
"logits/rejected": 0.021822050213813782,
"logps/chosen": -66.56085205078125,
"logps/ref_chosen": -65.43487548828125,
"logps/ref_rejected": -95.41731262207031,
"logps/rejected": -97.83345031738281,
"loss": 0.7207,
"margin_dpo/margin_mean": 1.2901657819747925,
"margin_dpo/margin_std": 1.2492828369140625,
"step": 259
},
{
"epoch": 0.3930461073318216,
"fcm_dpo/beta": 1.1257474422454834,
"fcm_dpo/delta": 0.017584767192602158,
"fcm_dpo/margin": 0.8737805485725403,
"fcm_dpo/q_t": 0.33795487880706787,
"grad_norm": 259.8592834472656,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 0.127943217754364,
"logits/rejected": 0.06927517056465149,
"logps/chosen": -50.53019714355469,
"logps/ref_chosen": -49.08958435058594,
"logps/ref_rejected": -79.01708221435547,
"logps/rejected": -81.33147430419922,
"loss": 0.9979,
"margin_dpo/margin_mean": 0.873779296875,
"margin_dpo/margin_std": 1.2833609580993652,
"step": 260
},
{
"epoch": 0.3945578231292517,
"fcm_dpo/beta": 1.1455121040344238,
"fcm_dpo/delta": 0.08530843257904053,
"fcm_dpo/margin": 0.8054367303848267,
"fcm_dpo/q_t": 0.34108513593673706,
"grad_norm": 306.086181640625,
"learning_rate": 3.806810054678331e-07,
"logits/chosen": 0.03654761239886284,
"logits/rejected": 0.04507092386484146,
"logps/chosen": -72.11337280273438,
"logps/ref_chosen": -70.87239074707031,
"logps/ref_rejected": -65.01522064208984,
"logps/rejected": -67.06163787841797,
"loss": 0.9947,
"margin_dpo/margin_mean": 0.805436909198761,
"margin_dpo/margin_std": 1.1771018505096436,
"step": 261
},
{
"epoch": 0.3960695389266818,
"fcm_dpo/beta": 1.1686980724334717,
"fcm_dpo/delta": 0.04035666957497597,
"fcm_dpo/margin": 0.8231496810913086,
"fcm_dpo/q_t": 0.3458126187324524,
"grad_norm": 323.2743225097656,
"learning_rate": 3.7955198860439887e-07,
"logits/chosen": 0.17141227424144745,
"logits/rejected": 0.13052189350128174,
"logps/chosen": -69.32987976074219,
"logps/ref_chosen": -67.8706283569336,
"logps/ref_rejected": -88.7205810546875,
"logps/rejected": -91.00297546386719,
"loss": 0.9832,
"margin_dpo/margin_mean": 0.8231501579284668,
"margin_dpo/margin_std": 1.2316184043884277,
"step": 262
},
{
"epoch": 0.3975812547241119,
"fcm_dpo/beta": 1.23178231716156,
"fcm_dpo/delta": 0.2991076111793518,
"fcm_dpo/margin": 0.5800197124481201,
"fcm_dpo/q_t": 0.38507431745529175,
"grad_norm": 314.1044616699219,
"learning_rate": 3.784193478933516e-07,
"logits/chosen": 0.13119199872016907,
"logits/rejected": 0.05917968600988388,
"logps/chosen": -56.671165466308594,
"logps/ref_chosen": -55.194583892822266,
"logps/ref_rejected": -80.54048156738281,
"logps/rejected": -82.59709167480469,
"loss": 1.231,
"margin_dpo/margin_mean": 0.5800192356109619,
"margin_dpo/margin_std": 1.2319090366363525,
"step": 263
},
{
"epoch": 0.39909297052154197,
"fcm_dpo/beta": 1.2216218709945679,
"fcm_dpo/delta": -0.13029904663562775,
"fcm_dpo/margin": 0.9128764867782593,
"fcm_dpo/q_t": 0.34319591522216797,
"grad_norm": 366.9609069824219,
"learning_rate": 3.7728311501708674e-07,
"logits/chosen": 0.08489110320806503,
"logits/rejected": 0.04814103990793228,
"logps/chosen": -84.4659652709961,
"logps/ref_chosen": -83.17068481445312,
"logps/ref_rejected": -88.33625793457031,
"logps/rejected": -90.54441833496094,
"loss": 1.0999,
"margin_dpo/margin_mean": 0.9128766655921936,
"margin_dpo/margin_std": 1.5033926963806152,
"step": 264
},
{
"epoch": 0.40060468631897206,
"fcm_dpo/beta": 1.1946429014205933,
"fcm_dpo/delta": -0.042388565838336945,
"fcm_dpo/margin": 0.8688783049583435,
"fcm_dpo/q_t": 0.31362420320510864,
"grad_norm": 336.9339904785156,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": 0.1749192476272583,
"logits/rejected": 0.12444747984409332,
"logps/chosen": -53.23479080200195,
"logps/ref_chosen": -51.66284942626953,
"logps/ref_rejected": -67.1720962524414,
"logps/rejected": -69.6129150390625,
"loss": 0.9924,
"margin_dpo/margin_mean": 0.8688779473304749,
"margin_dpo/margin_std": 1.2264655828475952,
"step": 265
},
{
"epoch": 0.4021164021164021,
"fcm_dpo/beta": 1.200698971748352,
"fcm_dpo/delta": 0.018015079200267792,
"fcm_dpo/margin": 0.8188613653182983,
"fcm_dpo/q_t": 0.342385858297348,
"grad_norm": 329.1114807128906,
"learning_rate": 3.75e-07,
"logits/chosen": 0.1544215977191925,
"logits/rejected": 0.10259807109832764,
"logps/chosen": -58.89800262451172,
"logps/ref_chosen": -57.45049285888672,
"logps/ref_rejected": -77.60826110839844,
"logps/rejected": -79.87462615966797,
"loss": 1.0917,
"margin_dpo/margin_mean": 0.8188612461090088,
"margin_dpo/margin_std": 1.41795015335083,
"step": 266
},
{
"epoch": 0.4036281179138322,
"fcm_dpo/beta": 1.2707109451293945,
"fcm_dpo/delta": 0.28088656067848206,
"fcm_dpo/margin": 0.5725541710853577,
"fcm_dpo/q_t": 0.3735983073711395,
"grad_norm": 291.73394775390625,
"learning_rate": 3.738531817228131e-07,
"logits/chosen": 0.15856996178627014,
"logits/rejected": 0.1470004916191101,
"logps/chosen": -56.46894836425781,
"logps/ref_chosen": -55.03535079956055,
"logps/ref_rejected": -66.0953369140625,
"logps/rejected": -68.10148620605469,
"loss": 1.2705,
"margin_dpo/margin_mean": 0.5725547075271606,
"margin_dpo/margin_std": 1.2551192045211792,
"step": 267
},
{
"epoch": 0.4051398337112623,
"fcm_dpo/beta": 1.2686963081359863,
"fcm_dpo/delta": -0.053165629506111145,
"fcm_dpo/margin": 0.8244317770004272,
"fcm_dpo/q_t": 0.34110718965530396,
"grad_norm": 308.4896240234375,
"learning_rate": 3.7270289900589204e-07,
"logits/chosen": 0.07009106129407883,
"logits/rejected": 0.05312522128224373,
"logps/chosen": -66.37588500976562,
"logps/ref_chosen": -65.07174682617188,
"logps/ref_rejected": -71.42485809326172,
"logps/rejected": -73.5534439086914,
"loss": 0.9779,
"margin_dpo/margin_mean": 0.8244317770004272,
"margin_dpo/margin_std": 1.2355961799621582,
"step": 268
},
{
"epoch": 0.40665154950869237,
"fcm_dpo/beta": 1.2246897220611572,
"fcm_dpo/delta": -0.29109132289886475,
"fcm_dpo/margin": 1.0223791599273682,
"fcm_dpo/q_t": 0.3000904321670532,
"grad_norm": 285.43951416015625,
"learning_rate": 3.7154918402511714e-07,
"logits/chosen": 0.22535249590873718,
"logits/rejected": 0.1888647824525833,
"logps/chosen": -68.56861114501953,
"logps/ref_chosen": -67.1362075805664,
"logps/ref_rejected": -82.55778503417969,
"logps/rejected": -85.01256561279297,
"loss": 0.9234,
"margin_dpo/margin_mean": 1.0223793983459473,
"margin_dpo/margin_std": 1.2574834823608398,
"step": 269
},
{
"epoch": 0.40816326530612246,
"fcm_dpo/beta": 1.2294461727142334,
"fcm_dpo/delta": 0.26967453956604004,
"fcm_dpo/margin": 0.6104166507720947,
"fcm_dpo/q_t": 0.3626842498779297,
"grad_norm": 348.1169738769531,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 0.18059831857681274,
"logits/rejected": 0.13146328926086426,
"logps/chosen": -68.18730163574219,
"logps/ref_chosen": -66.6886978149414,
"logps/ref_rejected": -85.16129302978516,
"logps/rejected": -87.27030944824219,
"loss": 1.1816,
"margin_dpo/margin_mean": 0.6104167103767395,
"margin_dpo/margin_std": 1.19452965259552,
"step": 270
},
{
"epoch": 0.40967498110355255,
"fcm_dpo/beta": 1.307576060295105,
"fcm_dpo/delta": 0.36521047353744507,
"fcm_dpo/margin": 0.5049761533737183,
"fcm_dpo/q_t": 0.4147690534591675,
"grad_norm": 440.6063537597656,
"learning_rate": 3.692315864546635e-07,
"logits/chosen": 0.1853274405002594,
"logits/rejected": 0.14235132932662964,
"logps/chosen": -73.94186401367188,
"logps/ref_chosen": -72.40754699707031,
"logps/ref_rejected": -92.06311798095703,
"logps/rejected": -94.10240173339844,
"loss": 1.5095,
"margin_dpo/margin_mean": 0.5049762725830078,
"margin_dpo/margin_std": 1.5028947591781616,
"step": 271
},
{
"epoch": 0.41118669690098264,
"fcm_dpo/beta": 1.266921043395996,
"fcm_dpo/delta": -0.4322693943977356,
"fcm_dpo/margin": 1.0848881006240845,
"fcm_dpo/q_t": 0.28500691056251526,
"grad_norm": 282.21990966796875,
"learning_rate": 3.6806776869317067e-07,
"logits/chosen": 0.15356749296188354,
"logits/rejected": 0.15307673811912537,
"logps/chosen": -67.91374206542969,
"logps/ref_chosen": -66.60140228271484,
"logps/ref_rejected": -67.74340057373047,
"logps/rejected": -70.140625,
"loss": 0.7882,
"margin_dpo/margin_mean": 1.0848881006240845,
"margin_dpo/margin_std": 1.259714126586914,
"step": 272
},
{
"epoch": 0.4126984126984127,
"fcm_dpo/beta": 1.2134504318237305,
"fcm_dpo/delta": -0.202475443482399,
"fcm_dpo/margin": 0.9696700572967529,
"fcm_dpo/q_t": 0.3056218922138214,
"grad_norm": 257.2859191894531,
"learning_rate": 3.669006483223828e-07,
"logits/chosen": 0.18354235589504242,
"logits/rejected": 0.14025747776031494,
"logps/chosen": -58.94585037231445,
"logps/ref_chosen": -57.35487747192383,
"logps/ref_rejected": -84.17168426513672,
"logps/rejected": -86.73233032226562,
"loss": 0.885,
"margin_dpo/margin_mean": 0.9696696996688843,
"margin_dpo/margin_std": 1.2579290866851807,
"step": 273
},
{
"epoch": 0.41421012849584277,
"fcm_dpo/beta": 1.17547607421875,
"fcm_dpo/delta": -0.022935807704925537,
"fcm_dpo/margin": 0.8681968450546265,
"fcm_dpo/q_t": 0.3227364718914032,
"grad_norm": 261.911376953125,
"learning_rate": 3.657302579891656e-07,
"logits/chosen": 0.0659763365983963,
"logits/rejected": 0.04620751738548279,
"logps/chosen": -61.098419189453125,
"logps/ref_chosen": -59.64149475097656,
"logps/ref_rejected": -68.29348754882812,
"logps/rejected": -70.61860656738281,
"loss": 1.004,
"margin_dpo/margin_mean": 0.8681962490081787,
"margin_dpo/margin_std": 1.2701618671417236,
"step": 274
},
{
"epoch": 0.41572184429327286,
"fcm_dpo/beta": 1.157713532447815,
"fcm_dpo/delta": -0.10604125261306763,
"fcm_dpo/margin": 0.9453713893890381,
"fcm_dpo/q_t": 0.32869789004325867,
"grad_norm": 274.95843505859375,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": 0.1414085328578949,
"logits/rejected": 0.07877371460199356,
"logps/chosen": -54.83768844604492,
"logps/ref_chosen": -53.26664352416992,
"logps/ref_rejected": -73.84062194824219,
"logps/rejected": -76.3570327758789,
"loss": 0.9802,
"margin_dpo/margin_mean": 0.9453713893890381,
"margin_dpo/margin_std": 1.3858097791671753,
"step": 275
},
{
"epoch": 0.41723356009070295,
"fcm_dpo/beta": 1.1194026470184326,
"fcm_dpo/delta": -0.17233465611934662,
"fcm_dpo/margin": 1.0291403532028198,
"fcm_dpo/q_t": 0.29955726861953735,
"grad_norm": 242.73602294921875,
"learning_rate": 3.633797984793294e-07,
"logits/chosen": 0.11177192628383636,
"logits/rejected": 0.08380501717329025,
"logps/chosen": -54.313621520996094,
"logps/ref_chosen": -53.02079772949219,
"logps/ref_rejected": -61.56678771972656,
"logps/rejected": -63.888755798339844,
"loss": 0.8804,
"margin_dpo/margin_mean": 1.0291404724121094,
"margin_dpo/margin_std": 1.2605907917022705,
"step": 276
},
{
"epoch": 0.41874527588813304,
"fcm_dpo/beta": 1.16245436668396,
"fcm_dpo/delta": 0.17784440517425537,
"fcm_dpo/margin": 0.7118735313415527,
"fcm_dpo/q_t": 0.3743218183517456,
"grad_norm": 298.37750244140625,
"learning_rate": 3.6219979505011555e-07,
"logits/chosen": 0.2183254361152649,
"logits/rejected": 0.23113352060317993,
"logps/chosen": -73.12422180175781,
"logps/ref_chosen": -71.43299102783203,
"logps/ref_rejected": -67.65852355957031,
"logps/rejected": -70.06163024902344,
"loss": 1.1743,
"margin_dpo/margin_mean": 0.7118737697601318,
"margin_dpo/margin_std": 1.4327894449234009,
"step": 277
},
{
"epoch": 0.42025699168556313,
"fcm_dpo/beta": 1.172609806060791,
"fcm_dpo/delta": -0.03564952313899994,
"fcm_dpo/margin": 0.8724105358123779,
"fcm_dpo/q_t": 0.32611083984375,
"grad_norm": 296.2506408691406,
"learning_rate": 3.6101665315144353e-07,
"logits/chosen": 0.1023668497800827,
"logits/rejected": 0.07007478922605515,
"logps/chosen": -68.64065551757812,
"logps/ref_chosen": -67.11076354980469,
"logps/ref_rejected": -88.74851989746094,
"logps/rejected": -91.15081787109375,
"loss": 1.0143,
"margin_dpo/margin_mean": 0.8724101781845093,
"margin_dpo/margin_std": 1.2294014692306519,
"step": 278
},
{
"epoch": 0.4217687074829932,
"fcm_dpo/beta": 1.1110622882843018,
"fcm_dpo/delta": -0.26436370611190796,
"fcm_dpo/margin": 1.1091269254684448,
"fcm_dpo/q_t": 0.276674747467041,
"grad_norm": 208.7923583984375,
"learning_rate": 3.5983040587833563e-07,
"logits/chosen": 0.1121751144528389,
"logits/rejected": 0.0790662094950676,
"logps/chosen": -55.73821258544922,
"logps/ref_chosen": -54.49748611450195,
"logps/ref_rejected": -70.42373657226562,
"logps/rejected": -72.77357482910156,
"loss": 0.778,
"margin_dpo/margin_mean": 1.1091272830963135,
"margin_dpo/margin_std": 1.1377835273742676,
"step": 279
},
{
"epoch": 0.42328042328042326,
"fcm_dpo/beta": 1.045201063156128,
"fcm_dpo/delta": -0.19996249675750732,
"fcm_dpo/margin": 1.1231290102005005,
"fcm_dpo/q_t": 0.2829288840293884,
"grad_norm": 192.96217346191406,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 0.15486222505569458,
"logits/rejected": 0.123613640666008,
"logps/chosen": -61.731178283691406,
"logps/ref_chosen": -60.43281173706055,
"logps/ref_rejected": -78.39051818847656,
"logps/rejected": -80.81201171875,
"loss": 0.7481,
"margin_dpo/margin_mean": 1.1231298446655273,
"margin_dpo/margin_std": 1.1084861755371094,
"step": 280
},
{
"epoch": 0.42479213907785335,
"fcm_dpo/beta": 1.0173039436340332,
"fcm_dpo/delta": -0.09447715431451797,
"fcm_dpo/margin": 1.0629010200500488,
"fcm_dpo/q_t": 0.3139858841896057,
"grad_norm": 206.9331817626953,
"learning_rate": 3.574487280222929e-07,
"logits/chosen": 0.15210747718811035,
"logits/rejected": 0.15197323262691498,
"logps/chosen": -61.607582092285156,
"logps/ref_chosen": -60.2820930480957,
"logps/ref_rejected": -62.04009246826172,
"logps/rejected": -64.42848205566406,
"loss": 0.9316,
"margin_dpo/margin_mean": 1.0629009008407593,
"margin_dpo/margin_std": 1.3708744049072266,
"step": 281
},
{
"epoch": 0.42630385487528344,
"fcm_dpo/beta": 1.054863452911377,
"fcm_dpo/delta": 0.04717801511287689,
"fcm_dpo/margin": 0.8964927196502686,
"fcm_dpo/q_t": 0.33858174085617065,
"grad_norm": 262.0866394042969,
"learning_rate": 3.562533640600075e-07,
"logits/chosen": 0.1025453507900238,
"logits/rejected": 0.06392862647771835,
"logps/chosen": -62.19837951660156,
"logps/ref_chosen": -60.623924255371094,
"logps/ref_rejected": -68.67400360107422,
"logps/rejected": -71.14495849609375,
"loss": 1.0039,
"margin_dpo/margin_mean": 0.8964922428131104,
"margin_dpo/margin_std": 1.2645740509033203,
"step": 282
},
{
"epoch": 0.42781557067271353,
"fcm_dpo/beta": 1.0366630554199219,
"fcm_dpo/delta": -0.0020070038735866547,
"fcm_dpo/margin": 0.9660577774047852,
"fcm_dpo/q_t": 0.3374154269695282,
"grad_norm": 295.226318359375,
"learning_rate": 3.550550279627215e-07,
"logits/chosen": 0.11783361434936523,
"logits/rejected": 0.054866328835487366,
"logps/chosen": -69.20338439941406,
"logps/ref_chosen": -67.64775085449219,
"logps/ref_rejected": -99.96835327148438,
"logps/rejected": -102.49005126953125,
"loss": 1.0551,
"margin_dpo/margin_mean": 0.9660578370094299,
"margin_dpo/margin_std": 1.526496171951294,
"step": 283
},
{
"epoch": 0.4293272864701436,
"fcm_dpo/beta": 1.0322619676589966,
"fcm_dpo/delta": 0.00461952667683363,
"fcm_dpo/margin": 0.9647125005722046,
"fcm_dpo/q_t": 0.33289098739624023,
"grad_norm": 231.38433837890625,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": 0.17530453205108643,
"logits/rejected": 0.12596732378005981,
"logps/chosen": -58.40034103393555,
"logps/ref_chosen": -56.96742630004883,
"logps/ref_rejected": -86.36236572265625,
"logps/rejected": -88.75999450683594,
"loss": 0.9914,
"margin_dpo/margin_mean": 0.9647125005722046,
"margin_dpo/margin_std": 1.445831298828125,
"step": 284
},
{
"epoch": 0.4308390022675737,
"fcm_dpo/beta": 1.0661512613296509,
"fcm_dpo/delta": 0.16971619427204132,
"fcm_dpo/margin": 0.7905272245407104,
"fcm_dpo/q_t": 0.34532633423805237,
"grad_norm": 249.1006317138672,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": 0.18527567386627197,
"logits/rejected": 0.1653136909008026,
"logps/chosen": -73.34848022460938,
"logps/ref_chosen": -71.65611267089844,
"logps/ref_rejected": -81.63829803466797,
"logps/rejected": -84.12120056152344,
"loss": 0.9726,
"margin_dpo/margin_mean": 0.7905269861221313,
"margin_dpo/margin_std": 1.0835275650024414,
"step": 285
},
{
"epoch": 0.4323507180650038,
"fcm_dpo/beta": 1.005662441253662,
"fcm_dpo/delta": -0.34280824661254883,
"fcm_dpo/margin": 1.2858762741088867,
"fcm_dpo/q_t": 0.27013134956359863,
"grad_norm": 211.03579711914062,
"learning_rate": 3.514425224712835e-07,
"logits/chosen": 0.10800629109144211,
"logits/rejected": 0.0394493006169796,
"logps/chosen": -62.53429412841797,
"logps/ref_chosen": -61.07952117919922,
"logps/ref_rejected": -91.28128051757812,
"logps/rejected": -94.02192687988281,
"loss": 0.8318,
"margin_dpo/margin_mean": 1.2858755588531494,
"margin_dpo/margin_std": 1.5294857025146484,
"step": 286
},
{
"epoch": 0.43386243386243384,
"fcm_dpo/beta": 0.992131769657135,
"fcm_dpo/delta": -0.06625291705131531,
"fcm_dpo/margin": 1.0675835609436035,
"fcm_dpo/q_t": 0.3116587698459625,
"grad_norm": 213.88555908203125,
"learning_rate": 3.502326338516534e-07,
"logits/chosen": 0.13146105408668518,
"logits/rejected": 0.10076682269573212,
"logps/chosen": -47.52275848388672,
"logps/ref_chosen": -46.035789489746094,
"logps/ref_rejected": -59.95293426513672,
"logps/rejected": -62.507484436035156,
"loss": 0.8751,
"margin_dpo/margin_mean": 1.067583441734314,
"margin_dpo/margin_std": 1.295511245727539,
"step": 287
},
{
"epoch": 0.43537414965986393,
"fcm_dpo/beta": 0.9990655183792114,
"fcm_dpo/delta": 0.13602013885974884,
"fcm_dpo/margin": 0.8767856359481812,
"fcm_dpo/q_t": 0.35159996151924133,
"grad_norm": 293.45867919921875,
"learning_rate": 3.490199415097892e-07,
"logits/chosen": 0.053915057331323624,
"logits/rejected": 0.014950074255466461,
"logps/chosen": -67.08319854736328,
"logps/ref_chosen": -65.3908462524414,
"logps/ref_rejected": -88.53607940673828,
"logps/rejected": -91.10520935058594,
"loss": 1.0811,
"margin_dpo/margin_mean": 0.8767852187156677,
"margin_dpo/margin_std": 1.4756031036376953,
"step": 288
},
{
"epoch": 0.436885865457294,
"fcm_dpo/beta": 1.0097713470458984,
"fcm_dpo/delta": 0.013888869434595108,
"fcm_dpo/margin": 0.9776356220245361,
"fcm_dpo/q_t": 0.3457440435886383,
"grad_norm": 220.5363006591797,
"learning_rate": 3.4780447936730247e-07,
"logits/chosen": 0.16835784912109375,
"logits/rejected": 0.14368662238121033,
"logps/chosen": -56.29551696777344,
"logps/ref_chosen": -54.5936279296875,
"logps/ref_rejected": -67.20855712890625,
"logps/rejected": -69.88806915283203,
"loss": 1.0415,
"margin_dpo/margin_mean": 0.9776356220245361,
"margin_dpo/margin_std": 1.556645393371582,
"step": 289
},
{
"epoch": 0.4383975812547241,
"fcm_dpo/beta": 1.0043764114379883,
"fcm_dpo/delta": -0.04867362976074219,
"fcm_dpo/margin": 1.0387194156646729,
"fcm_dpo/q_t": 0.33352506160736084,
"grad_norm": 238.85610961914062,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 0.19789519906044006,
"logits/rejected": 0.14447157084941864,
"logps/chosen": -63.15911865234375,
"logps/ref_chosen": -61.38457489013672,
"logps/ref_rejected": -91.92778015136719,
"logps/rejected": -94.74103546142578,
"loss": 0.9701,
"margin_dpo/margin_mean": 1.038718819618225,
"margin_dpo/margin_std": 1.499373435974121,
"step": 290
},
{
"epoch": 0.4399092970521542,
"fcm_dpo/beta": 1.0028091669082642,
"fcm_dpo/delta": -0.10185343772172928,
"fcm_dpo/margin": 1.0858376026153564,
"fcm_dpo/q_t": 0.3095766305923462,
"grad_norm": 204.3426055908203,
"learning_rate": 3.4536538175334343e-07,
"logits/chosen": 0.23501402139663696,
"logits/rejected": 0.1915348768234253,
"logps/chosen": -52.46550750732422,
"logps/ref_chosen": -50.863037109375,
"logps/ref_rejected": -82.20868682861328,
"logps/rejected": -84.89698791503906,
"loss": 0.8974,
"margin_dpo/margin_mean": 1.085837960243225,
"margin_dpo/margin_std": 1.3475749492645264,
"step": 291
},
{
"epoch": 0.4414210128495843,
"fcm_dpo/beta": 1.0263406038284302,
"fcm_dpo/delta": 0.24256381392478943,
"fcm_dpo/margin": 0.7545459270477295,
"fcm_dpo/q_t": 0.3599158823490143,
"grad_norm": 320.62713623046875,
"learning_rate": 3.4414181450867465e-07,
"logits/chosen": 0.1599317491054535,
"logits/rejected": 0.11992324888706207,
"logps/chosen": -65.92310333251953,
"logps/ref_chosen": -64.34888458251953,
"logps/ref_rejected": -72.86434173583984,
"logps/rejected": -75.193115234375,
"loss": 1.0762,
"margin_dpo/margin_mean": 0.7545456886291504,
"margin_dpo/margin_std": 1.2547924518585205,
"step": 292
},
{
"epoch": 0.4429327286470144,
"fcm_dpo/beta": 1.0156301259994507,
"fcm_dpo/delta": -0.14779676496982574,
"fcm_dpo/margin": 1.1138885021209717,
"fcm_dpo/q_t": 0.29969820380210876,
"grad_norm": 190.7615966796875,
"learning_rate": 3.4291561391508185e-07,
"logits/chosen": 0.22061100602149963,
"logits/rejected": 0.15896283090114594,
"logps/chosen": -56.73548889160156,
"logps/ref_chosen": -54.869468688964844,
"logps/ref_rejected": -81.858642578125,
"logps/rejected": -84.83856201171875,
"loss": 0.8801,
"margin_dpo/margin_mean": 1.1138887405395508,
"margin_dpo/margin_std": 1.374760627746582,
"step": 293
},
{
"epoch": 0.4444444444444444,
"fcm_dpo/beta": 0.9778472185134888,
"fcm_dpo/delta": -0.08654538542032242,
"fcm_dpo/margin": 1.0972121953964233,
"fcm_dpo/q_t": 0.309672474861145,
"grad_norm": 168.36590576171875,
"learning_rate": 3.4168681427203153e-07,
"logits/chosen": 0.17619842290878296,
"logits/rejected": 0.143341526389122,
"logps/chosen": -58.26924133300781,
"logps/ref_chosen": -56.670902252197266,
"logps/ref_rejected": -70.32819366455078,
"logps/rejected": -73.02374267578125,
"loss": 0.8444,
"margin_dpo/margin_mean": 1.0972115993499756,
"margin_dpo/margin_std": 1.2684025764465332,
"step": 294
},
{
"epoch": 0.4459561602418745,
"fcm_dpo/beta": 1.0054044723510742,
"fcm_dpo/delta": 0.08222609758377075,
"fcm_dpo/margin": 0.9189479947090149,
"fcm_dpo/q_t": 0.35169196128845215,
"grad_norm": 228.4792938232422,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": 0.1701010763645172,
"logits/rejected": 0.10789903253316879,
"logps/chosen": -52.09330749511719,
"logps/ref_chosen": -50.40088653564453,
"logps/ref_rejected": -83.43521881103516,
"logps/rejected": -86.04659271240234,
"loss": 1.0393,
"margin_dpo/margin_mean": 0.918948233127594,
"margin_dpo/margin_std": 1.4247148036956787,
"step": 295
},
{
"epoch": 0.4474678760393046,
"fcm_dpo/beta": 0.9557000994682312,
"fcm_dpo/delta": -0.28874313831329346,
"fcm_dpo/margin": 1.3085522651672363,
"fcm_dpo/q_t": 0.28932487964630127,
"grad_norm": 233.505126953125,
"learning_rate": 3.392215553979679e-07,
"logits/chosen": 0.12661093473434448,
"logits/rejected": 0.09931506216526031,
"logps/chosen": -70.89358520507812,
"logps/ref_chosen": -69.15034484863281,
"logps/ref_rejected": -89.60166931152344,
"logps/rejected": -92.65345764160156,
"loss": 0.8386,
"margin_dpo/margin_mean": 1.3085522651672363,
"margin_dpo/margin_std": 1.4957661628723145,
"step": 296
},
{
"epoch": 0.4489795918367347,
"fcm_dpo/beta": 0.9359762668609619,
"fcm_dpo/delta": -0.1143687292933464,
"fcm_dpo/margin": 1.1771044731140137,
"fcm_dpo/q_t": 0.30046796798706055,
"grad_norm": 200.27796936035156,
"learning_rate": 3.3798516512554485e-07,
"logits/chosen": 0.13300062716007233,
"logits/rejected": 0.08352112770080566,
"logps/chosen": -59.854644775390625,
"logps/ref_chosen": -58.01630401611328,
"logps/ref_rejected": -69.95780944824219,
"logps/rejected": -72.97325134277344,
"loss": 0.8157,
"margin_dpo/margin_mean": 1.1771044731140137,
"margin_dpo/margin_std": 1.2866284847259521,
"step": 297
},
{
"epoch": 0.4504913076341648,
"fcm_dpo/beta": 0.9372185468673706,
"fcm_dpo/delta": 0.04445381462574005,
"fcm_dpo/margin": 1.0229597091674805,
"fcm_dpo/q_t": 0.3366781175136566,
"grad_norm": 205.09780883789062,
"learning_rate": 3.367463137189156e-07,
"logits/chosen": 0.2418043464422226,
"logits/rejected": 0.18982425332069397,
"logps/chosen": -58.04621505737305,
"logps/ref_chosen": -56.1693115234375,
"logps/ref_rejected": -68.55052185058594,
"logps/rejected": -71.45037841796875,
"loss": 1.0065,
"margin_dpo/margin_mean": 1.0229599475860596,
"margin_dpo/margin_std": 1.4922395944595337,
"step": 298
},
{
"epoch": 0.4520030234315949,
"fcm_dpo/beta": 0.9768006801605225,
"fcm_dpo/delta": 0.26853734254837036,
"fcm_dpo/margin": 0.7673162817955017,
"fcm_dpo/q_t": 0.3727618455886841,
"grad_norm": 255.50270080566406,
"learning_rate": 3.355050358314172e-07,
"logits/chosen": 0.08510833978652954,
"logits/rejected": 0.06492967158555984,
"logps/chosen": -64.02816009521484,
"logps/ref_chosen": -62.31780242919922,
"logps/ref_rejected": -72.60028839111328,
"logps/rejected": -75.07796478271484,
"loss": 1.1587,
"margin_dpo/margin_mean": 0.7673170566558838,
"margin_dpo/margin_std": 1.4521667957305908,
"step": 299
},
{
"epoch": 0.45351473922902497,
"fcm_dpo/beta": 0.982731282711029,
"fcm_dpo/delta": -0.016341693699359894,
"fcm_dpo/margin": 1.0325164794921875,
"fcm_dpo/q_t": 0.3216491937637329,
"grad_norm": 243.2555389404297,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 0.14876267313957214,
"logits/rejected": 0.10573962330818176,
"logps/chosen": -62.35066223144531,
"logps/ref_chosen": -60.38157653808594,
"logps/ref_rejected": -75.45442199707031,
"logps/rejected": -78.45602416992188,
"loss": 0.9532,
"margin_dpo/margin_mean": 1.0325164794921875,
"margin_dpo/margin_std": 1.3928803205490112,
"step": 300
},
{
"epoch": 0.455026455026455,
"fcm_dpo/beta": 0.9845176935195923,
"fcm_dpo/delta": 0.09773456305265427,
"fcm_dpo/margin": 0.9239287376403809,
"fcm_dpo/q_t": 0.36213570833206177,
"grad_norm": 244.90597534179688,
"learning_rate": 3.3301533956555885e-07,
"logits/chosen": 0.16752877831459045,
"logits/rejected": 0.14301586151123047,
"logps/chosen": -54.7681999206543,
"logps/ref_chosen": -52.85089111328125,
"logps/ref_rejected": -69.97584533691406,
"logps/rejected": -72.81707763671875,
"loss": 1.1465,
"margin_dpo/margin_mean": 0.9239292740821838,
"margin_dpo/margin_std": 1.6796950101852417,
"step": 301
},
{
"epoch": 0.4565381708238851,
"fcm_dpo/beta": 1.0574851036071777,
"fcm_dpo/delta": 0.3823769986629486,
"fcm_dpo/margin": 0.607899010181427,
"fcm_dpo/q_t": 0.3950553238391876,
"grad_norm": 319.1892395019531,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": 0.06301631033420563,
"logits/rejected": 0.02415418066084385,
"logps/chosen": -68.90666198730469,
"logps/ref_chosen": -66.96650695800781,
"logps/ref_rejected": -88.09510803222656,
"logps/rejected": -90.64315795898438,
"loss": 1.2959,
"margin_dpo/margin_mean": 0.6078989505767822,
"margin_dpo/margin_std": 1.4545881748199463,
"step": 302
},
{
"epoch": 0.4580498866213152,
"fcm_dpo/beta": 0.9939075112342834,
"fcm_dpo/delta": -0.5528866052627563,
"fcm_dpo/margin": 1.478973388671875,
"fcm_dpo/q_t": 0.2767646014690399,
"grad_norm": 199.89747619628906,
"learning_rate": 3.3051635489464793e-07,
"logits/chosen": 0.17920634150505066,
"logits/rejected": 0.1289132535457611,
"logps/chosen": -63.91869354248047,
"logps/ref_chosen": -62.12152862548828,
"logps/ref_rejected": -90.31204223632812,
"logps/rejected": -93.58818817138672,
"loss": 0.8206,
"margin_dpo/margin_mean": 1.4789727926254272,
"margin_dpo/margin_std": 1.7652822732925415,
"step": 303
},
{
"epoch": 0.4595616024187453,
"fcm_dpo/beta": 0.9474884271621704,
"fcm_dpo/delta": -0.1770581603050232,
"fcm_dpo/margin": 1.2210001945495605,
"fcm_dpo/q_t": 0.2853131592273712,
"grad_norm": 196.20419311523438,
"learning_rate": 3.292634667444117e-07,
"logits/chosen": 0.14427047967910767,
"logits/rejected": 0.10413600504398346,
"logps/chosen": -62.46052551269531,
"logps/ref_chosen": -60.695091247558594,
"logps/ref_rejected": -78.2525405883789,
"logps/rejected": -81.23897552490234,
"loss": 0.7895,
"margin_dpo/margin_mean": 1.2210009098052979,
"margin_dpo/margin_std": 1.2481887340545654,
"step": 304
},
{
"epoch": 0.46107331821617537,
"fcm_dpo/beta": 0.9275550842285156,
"fcm_dpo/delta": 0.015897810459136963,
"fcm_dpo/margin": 1.0611162185668945,
"fcm_dpo/q_t": 0.3354414701461792,
"grad_norm": 219.49313354492188,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": 0.09203135967254639,
"logits/rejected": 0.09808552265167236,
"logps/chosen": -74.84890747070312,
"logps/ref_chosen": -72.69914245605469,
"logps/ref_rejected": -65.65670776367188,
"logps/rejected": -68.86759185791016,
"loss": 1.0316,
"margin_dpo/margin_mean": 1.0611159801483154,
"margin_dpo/margin_std": 1.6295936107635498,
"step": 305
},
{
"epoch": 0.46258503401360546,
"fcm_dpo/beta": 0.9322211742401123,
"fcm_dpo/delta": 0.10965774953365326,
"fcm_dpo/margin": 0.958516001701355,
"fcm_dpo/q_t": 0.34834927320480347,
"grad_norm": 212.98146057128906,
"learning_rate": 3.267510740432719e-07,
"logits/chosen": 0.15844234824180603,
"logits/rejected": 0.07754644751548767,
"logps/chosen": -56.162513732910156,
"logps/ref_chosen": -53.97052764892578,
"logps/ref_rejected": -71.02423095703125,
"logps/rejected": -74.17473602294922,
"loss": 1.0813,
"margin_dpo/margin_mean": 0.9585161209106445,
"margin_dpo/margin_std": 1.579054355621338,
"step": 306
},
{
"epoch": 0.46409674981103555,
"fcm_dpo/beta": 0.9765808582305908,
"fcm_dpo/delta": 0.09200756251811981,
"fcm_dpo/margin": 0.9371168613433838,
"fcm_dpo/q_t": 0.35316699743270874,
"grad_norm": 227.36492919921875,
"learning_rate": 3.2549163976939285e-07,
"logits/chosen": 0.19748598337173462,
"logits/rejected": 0.16263772547245026,
"logps/chosen": -59.23182678222656,
"logps/ref_chosen": -57.413108825683594,
"logps/ref_rejected": -68.68010711669922,
"logps/rejected": -71.43594360351562,
"loss": 1.1199,
"margin_dpo/margin_mean": 0.9371169805526733,
"margin_dpo/margin_std": 1.6100661754608154,
"step": 307
},
{
"epoch": 0.4656084656084656,
"fcm_dpo/beta": 0.9509669542312622,
"fcm_dpo/delta": -0.15045209228992462,
"fcm_dpo/margin": 1.1912171840667725,
"fcm_dpo/q_t": 0.30430689454078674,
"grad_norm": 198.90879821777344,
"learning_rate": 3.2423009383206874e-07,
"logits/chosen": 0.14542096853256226,
"logits/rejected": 0.1255512833595276,
"logps/chosen": -68.51435852050781,
"logps/ref_chosen": -66.59879302978516,
"logps/ref_rejected": -74.337158203125,
"logps/rejected": -77.44393920898438,
"loss": 0.8821,
"margin_dpo/margin_mean": 1.1912175416946411,
"margin_dpo/margin_std": 1.509572982788086,
"step": 308
},
{
"epoch": 0.4671201814058957,
"fcm_dpo/beta": 0.9307016134262085,
"fcm_dpo/delta": -0.036133162677288055,
"fcm_dpo/margin": 1.1062381267547607,
"fcm_dpo/q_t": 0.32213109731674194,
"grad_norm": 279.2270812988281,
"learning_rate": 3.229664715194511e-07,
"logits/chosen": 0.1940372884273529,
"logits/rejected": 0.14977452158927917,
"logps/chosen": -67.56904602050781,
"logps/ref_chosen": -65.39474487304688,
"logps/ref_rejected": -75.70930480957031,
"logps/rejected": -78.98983764648438,
"loss": 0.926,
"margin_dpo/margin_mean": 1.1062389612197876,
"margin_dpo/margin_std": 1.470036506652832,
"step": 309
},
{
"epoch": 0.46863189720332576,
"fcm_dpo/beta": 0.9882631897926331,
"fcm_dpo/delta": 0.319682240486145,
"fcm_dpo/margin": 0.7099736928939819,
"fcm_dpo/q_t": 0.38903123140335083,
"grad_norm": 270.8043518066406,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 0.19459328055381775,
"logits/rejected": 0.18906690180301666,
"logps/chosen": -76.8685302734375,
"logps/ref_chosen": -74.66827392578125,
"logps/ref_rejected": -80.5689697265625,
"logps/rejected": -83.47919464111328,
"loss": 1.2064,
"margin_dpo/margin_mean": 0.709973931312561,
"margin_dpo/margin_std": 1.481546401977539,
"step": 310
},
{
"epoch": 0.47014361300075586,
"fcm_dpo/beta": 0.997173547744751,
"fcm_dpo/delta": -0.014330286532640457,
"fcm_dpo/margin": 1.015620231628418,
"fcm_dpo/q_t": 0.33949679136276245,
"grad_norm": 241.86203002929688,
"learning_rate": 3.204331392103574e-07,
"logits/chosen": 0.11166363954544067,
"logits/rejected": 0.02732861414551735,
"logps/chosen": -61.56627655029297,
"logps/ref_chosen": -59.738033294677734,
"logps/ref_rejected": -93.60757446289062,
"logps/rejected": -96.4514389038086,
"loss": 1.1047,
"margin_dpo/margin_mean": 1.0156208276748657,
"margin_dpo/margin_std": 1.744983434677124,
"step": 311
},
{
"epoch": 0.47165532879818595,
"fcm_dpo/beta": 1.0016117095947266,
"fcm_dpo/delta": -0.043600842356681824,
"fcm_dpo/margin": 1.0361428260803223,
"fcm_dpo/q_t": 0.3195294141769409,
"grad_norm": 267.0063781738281,
"learning_rate": 3.1916350007663176e-07,
"logits/chosen": 0.16160011291503906,
"logits/rejected": 0.08802653849124908,
"logps/chosen": -55.83079528808594,
"logps/ref_chosen": -53.816436767578125,
"logps/ref_rejected": -68.6575698852539,
"logps/rejected": -71.70807647705078,
"loss": 1.0571,
"margin_dpo/margin_mean": 1.0361424684524536,
"margin_dpo/margin_std": 1.636512041091919,
"step": 312
},
{
"epoch": 0.47316704459561604,
"fcm_dpo/beta": 0.9808007478713989,
"fcm_dpo/delta": -0.0073838010430336,
"fcm_dpo/margin": 1.0249512195587158,
"fcm_dpo/q_t": 0.3501819968223572,
"grad_norm": 224.44908142089844,
"learning_rate": 3.178919262911314e-07,
"logits/chosen": 0.19469937682151794,
"logits/rejected": 0.17493098974227905,
"logps/chosen": -61.82642364501953,
"logps/ref_chosen": -59.957359313964844,
"logps/ref_rejected": -69.31729888916016,
"logps/rejected": -72.21131896972656,
"loss": 1.077,
"margin_dpo/margin_mean": 1.0249509811401367,
"margin_dpo/margin_std": 1.7490208148956299,
"step": 313
},
{
"epoch": 0.47467876039304613,
"fcm_dpo/beta": 0.9450622200965881,
"fcm_dpo/delta": -0.23715783655643463,
"fcm_dpo/margin": 1.274531364440918,
"fcm_dpo/q_t": 0.2915083169937134,
"grad_norm": 200.15219116210938,
"learning_rate": 3.166184534225087e-07,
"logits/chosen": 0.16370022296905518,
"logits/rejected": 0.17338353395462036,
"logps/chosen": -72.26028442382812,
"logps/ref_chosen": -70.26815795898438,
"logps/ref_rejected": -69.23971557617188,
"logps/rejected": -72.50637817382812,
"loss": 0.8243,
"margin_dpo/margin_mean": 1.2745311260223389,
"margin_dpo/margin_std": 1.4535917043685913,
"step": 314
},
{
"epoch": 0.47619047619047616,
"fcm_dpo/beta": 0.9470343589782715,
"fcm_dpo/delta": -0.0004953928291797638,
"fcm_dpo/margin": 1.0559828281402588,
"fcm_dpo/q_t": 0.328433096408844,
"grad_norm": 213.31190490722656,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": 0.09684689342975616,
"logits/rejected": 0.060719601809978485,
"logps/chosen": -69.97232055664062,
"logps/ref_chosen": -67.79469299316406,
"logps/ref_rejected": -74.55148315429688,
"logps/rejected": -77.78509521484375,
"loss": 0.9555,
"margin_dpo/margin_mean": 1.0559827089309692,
"margin_dpo/margin_std": 1.4584524631500244,
"step": 315
},
{
"epoch": 0.47770219198790626,
"fcm_dpo/beta": 0.9314021468162537,
"fcm_dpo/delta": -0.23499611020088196,
"fcm_dpo/margin": 1.2908413410186768,
"fcm_dpo/q_t": 0.3175312876701355,
"grad_norm": 208.80650329589844,
"learning_rate": 3.1406595297511564e-07,
"logits/chosen": 0.08339610695838928,
"logits/rejected": 0.005085200071334839,
"logps/chosen": -57.207271575927734,
"logps/ref_chosen": -55.288482666015625,
"logps/ref_rejected": -96.15723419189453,
"logps/rejected": -99.36686706542969,
"loss": 0.9423,
"margin_dpo/margin_mean": 1.290840983390808,
"margin_dpo/margin_std": 1.7332630157470703,
"step": 316
},
{
"epoch": 0.47921390778533635,
"fcm_dpo/beta": 0.8467363119125366,
"fcm_dpo/delta": -0.33823323249816895,
"fcm_dpo/margin": 1.5226337909698486,
"fcm_dpo/q_t": 0.2754213809967041,
"grad_norm": 168.14048767089844,
"learning_rate": 3.1278699679526975e-07,
"logits/chosen": 0.2033698856830597,
"logits/rejected": 0.1626172512769699,
"logps/chosen": -56.44146728515625,
"logps/ref_chosen": -54.58137512207031,
"logps/ref_rejected": -72.77232360839844,
"logps/rejected": -76.15504455566406,
"loss": 0.7402,
"margin_dpo/margin_mean": 1.5226335525512695,
"margin_dpo/margin_std": 1.537990689277649,
"step": 317
},
{
"epoch": 0.48072562358276644,
"fcm_dpo/beta": 0.880409836769104,
"fcm_dpo/delta": 0.30436062812805176,
"fcm_dpo/margin": 0.8145400285720825,
"fcm_dpo/q_t": 0.3881235420703888,
"grad_norm": 241.8609619140625,
"learning_rate": 3.1150628432815336e-07,
"logits/chosen": 0.21144279837608337,
"logits/rejected": 0.1659296602010727,
"logps/chosen": -55.05265426635742,
"logps/ref_chosen": -52.88822937011719,
"logps/ref_rejected": -80.63988494873047,
"logps/rejected": -83.61885070800781,
"loss": 1.3193,
"margin_dpo/margin_mean": 0.8145396709442139,
"margin_dpo/margin_std": 1.8883013725280762,
"step": 318
},
{
"epoch": 0.48223733938019653,
"fcm_dpo/beta": 0.8839104771614075,
"fcm_dpo/delta": -0.04694606736302376,
"fcm_dpo/margin": 1.1784477233886719,
"fcm_dpo/q_t": 0.3256542682647705,
"grad_norm": 204.67640686035156,
"learning_rate": 3.1022385139804707e-07,
"logits/chosen": 0.14550672471523285,
"logits/rejected": 0.12916123867034912,
"logps/chosen": -66.23543548583984,
"logps/ref_chosen": -64.36333465576172,
"logps/ref_rejected": -79.47296142578125,
"logps/rejected": -82.52351379394531,
"loss": 1.0132,
"margin_dpo/margin_mean": 1.1784476041793823,
"margin_dpo/margin_std": 1.7645900249481201,
"step": 319
},
{
"epoch": 0.4837490551776266,
"fcm_dpo/beta": 0.9131995439529419,
"fcm_dpo/delta": 0.07204453647136688,
"fcm_dpo/margin": 1.013108253479004,
"fcm_dpo/q_t": 0.37524113059043884,
"grad_norm": 217.01268005371094,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.055543892085552216,
"logits/rejected": 0.023240717127919197,
"logps/chosen": -51.213958740234375,
"logps/ref_chosen": -49.558746337890625,
"logps/ref_rejected": -71.23444366455078,
"logps/rejected": -73.90277099609375,
"loss": 1.1825,
"margin_dpo/margin_mean": 1.0131080150604248,
"margin_dpo/margin_std": 1.9926589727401733,
"step": 320
},
{
"epoch": 0.4852607709750567,
"fcm_dpo/beta": 0.8980337381362915,
"fcm_dpo/delta": 0.061922501772642136,
"fcm_dpo/margin": 1.0499402284622192,
"fcm_dpo/q_t": 0.33765465021133423,
"grad_norm": 189.1380615234375,
"learning_rate": 3.0765396768561004e-07,
"logits/chosen": 0.11102212965488434,
"logits/rejected": 0.09998691082000732,
"logps/chosen": -54.228790283203125,
"logps/ref_chosen": -52.08526611328125,
"logps/ref_rejected": -55.58674621582031,
"logps/rejected": -58.78020477294922,
"loss": 1.0637,
"margin_dpo/margin_mean": 1.0499403476715088,
"margin_dpo/margin_std": 1.613488793373108,
"step": 321
},
{
"epoch": 0.48677248677248675,
"fcm_dpo/beta": 0.8880925178527832,
"fcm_dpo/delta": -0.23487280309200287,
"fcm_dpo/margin": 1.3585911989212036,
"fcm_dpo/q_t": 0.28043854236602783,
"grad_norm": 176.82778930664062,
"learning_rate": 3.063665887884511e-07,
"logits/chosen": 0.17297013103961945,
"logits/rejected": 0.1158091127872467,
"logps/chosen": -49.436309814453125,
"logps/ref_chosen": -47.404109954833984,
"logps/ref_rejected": -73.4260025024414,
"logps/rejected": -76.81678771972656,
"loss": 0.7812,
"margin_dpo/margin_mean": 1.3585913181304932,
"margin_dpo/margin_std": 1.383784532546997,
"step": 322
},
{
"epoch": 0.48828420256991684,
"fcm_dpo/beta": 0.8809718489646912,
"fcm_dpo/delta": -0.021233975887298584,
"fcm_dpo/margin": 1.149742841720581,
"fcm_dpo/q_t": 0.359465092420578,
"grad_norm": 248.77261352539062,
"learning_rate": 3.0507763319663517e-07,
"logits/chosen": 0.10760500282049179,
"logits/rejected": 0.054044321179389954,
"logps/chosen": -72.1104736328125,
"logps/ref_chosen": -70.00630187988281,
"logps/ref_rejected": -86.96690368652344,
"logps/rejected": -90.22081756591797,
"loss": 1.1742,
"margin_dpo/margin_mean": 1.1497416496276855,
"margin_dpo/margin_std": 2.1441869735717773,
"step": 323
},
{
"epoch": 0.4897959183673469,
"fcm_dpo/beta": 0.8546582460403442,
"fcm_dpo/delta": -0.05345672369003296,
"fcm_dpo/margin": 1.2257628440856934,
"fcm_dpo/q_t": 0.3085279166698456,
"grad_norm": 156.60922241210938,
"learning_rate": 3.0378713696502097e-07,
"logits/chosen": 0.18619199097156525,
"logits/rejected": 0.1414175033569336,
"logps/chosen": -57.747230529785156,
"logps/ref_chosen": -55.88882064819336,
"logps/ref_rejected": -75.23088073730469,
"logps/rejected": -78.31505584716797,
"loss": 0.8287,
"margin_dpo/margin_mean": 1.2257624864578247,
"margin_dpo/margin_std": 1.363228678703308,
"step": 324
},
{
"epoch": 0.491307634164777,
"fcm_dpo/beta": 0.8600409626960754,
"fcm_dpo/delta": 0.09678801149129868,
"fcm_dpo/margin": 1.0598913431167603,
"fcm_dpo/q_t": 0.3425368666648865,
"grad_norm": 200.6235809326172,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": 0.1525503695011139,
"logits/rejected": 0.1075083315372467,
"logps/chosen": -66.32670593261719,
"logps/ref_chosen": -64.14701843261719,
"logps/ref_rejected": -79.91143798828125,
"logps/rejected": -83.15100860595703,
"loss": 0.9779,
"margin_dpo/margin_mean": 1.059891700744629,
"margin_dpo/margin_std": 1.5498141050338745,
"step": 325
},
{
"epoch": 0.4928193499622071,
"fcm_dpo/beta": 0.9292858242988586,
"fcm_dpo/delta": 0.4350077509880066,
"fcm_dpo/margin": 0.6357402801513672,
"fcm_dpo/q_t": 0.40372180938720703,
"grad_norm": 280.2021179199219,
"learning_rate": 3.012016670162977e-07,
"logits/chosen": 0.11779944598674774,
"logits/rejected": 0.11533387750387192,
"logps/chosen": -78.00666046142578,
"logps/ref_chosen": -75.53131103515625,
"logps/ref_rejected": -76.5898666381836,
"logps/rejected": -79.70095825195312,
"loss": 1.3715,
"margin_dpo/margin_mean": 0.6357403993606567,
"margin_dpo/margin_std": 1.7088639736175537,
"step": 326
},
{
"epoch": 0.4943310657596372,
"fcm_dpo/beta": 0.9441518783569336,
"fcm_dpo/delta": -0.06476020067930222,
"fcm_dpo/margin": 1.120490312576294,
"fcm_dpo/q_t": 0.32473599910736084,
"grad_norm": 212.27267456054688,
"learning_rate": 2.99906765620341e-07,
"logits/chosen": 0.09260990470647812,
"logits/rejected": 0.054583217948675156,
"logps/chosen": -71.31211853027344,
"logps/ref_chosen": -69.33717346191406,
"logps/ref_rejected": -73.37751770019531,
"logps/rejected": -76.47296142578125,
"loss": 1.0397,
"margin_dpo/margin_mean": 1.1204906702041626,
"margin_dpo/margin_std": 1.7054201364517212,
"step": 327
},
{
"epoch": 0.4958427815570673,
"fcm_dpo/beta": 0.938388466835022,
"fcm_dpo/delta": -0.004852544516324997,
"fcm_dpo/margin": 1.0702922344207764,
"fcm_dpo/q_t": 0.3356061577796936,
"grad_norm": 220.39108276367188,
"learning_rate": 2.9861046822486766e-07,
"logits/chosen": 0.1284867823123932,
"logits/rejected": 0.10963472723960876,
"logps/chosen": -63.444252014160156,
"logps/ref_chosen": -61.70623016357422,
"logps/ref_rejected": -83.73808288574219,
"logps/rejected": -86.54638671875,
"loss": 0.9952,
"margin_dpo/margin_mean": 1.0702919960021973,
"margin_dpo/margin_std": 1.561734914779663,
"step": 328
},
{
"epoch": 0.4973544973544973,
"fcm_dpo/beta": 0.926159143447876,
"fcm_dpo/delta": 0.030632048845291138,
"fcm_dpo/margin": 1.044736385345459,
"fcm_dpo/q_t": 0.36076274514198303,
"grad_norm": 272.3174743652344,
"learning_rate": 2.9731281109010253e-07,
"logits/chosen": 0.165533185005188,
"logits/rejected": 0.12572559714317322,
"logps/chosen": -66.76375579833984,
"logps/ref_chosen": -64.4984130859375,
"logps/ref_rejected": -83.6591796875,
"logps/rejected": -86.96925354003906,
"loss": 1.169,
"margin_dpo/margin_mean": 1.0447362661361694,
"margin_dpo/margin_std": 1.9857655763626099,
"step": 329
},
{
"epoch": 0.4988662131519274,
"fcm_dpo/beta": 0.9391987323760986,
"fcm_dpo/delta": -0.06099821627140045,
"fcm_dpo/margin": 1.122628927230835,
"fcm_dpo/q_t": 0.32373300194740295,
"grad_norm": 196.6431427001953,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 0.13443490862846375,
"logits/rejected": 0.08146592229604721,
"logps/chosen": -56.5888671875,
"logps/ref_chosen": -54.80464172363281,
"logps/ref_rejected": -75.3194351196289,
"logps/rejected": -78.22628784179688,
"loss": 1.0489,
"margin_dpo/margin_mean": 1.1226279735565186,
"margin_dpo/margin_std": 1.7134172916412354,
"step": 330
},
{
"epoch": 0.5003779289493575,
"fcm_dpo/beta": 0.8843910694122314,
"fcm_dpo/delta": -0.29690316319465637,
"fcm_dpo/margin": 1.4203097820281982,
"fcm_dpo/q_t": 0.29872971773147583,
"grad_norm": 204.7743377685547,
"learning_rate": 2.947135628327544e-07,
"logits/chosen": 0.24195344746112823,
"logits/rejected": 0.2168186902999878,
"logps/chosen": -61.22477340698242,
"logps/ref_chosen": -59.242584228515625,
"logps/ref_rejected": -69.87483215332031,
"logps/rejected": -73.27733612060547,
"loss": 0.8837,
"margin_dpo/margin_mean": 1.4203091859817505,
"margin_dpo/margin_std": 1.860417127609253,
"step": 331
},
{
"epoch": 0.5018896447467877,
"fcm_dpo/beta": 0.8774402141571045,
"fcm_dpo/delta": -0.07574308663606644,
"fcm_dpo/margin": 1.2151854038238525,
"fcm_dpo/q_t": 0.320780485868454,
"grad_norm": 200.2390899658203,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": 0.1037403866648674,
"logits/rejected": 0.0657915323972702,
"logps/chosen": -69.155517578125,
"logps/ref_chosen": -67.10975646972656,
"logps/ref_rejected": -77.11839294433594,
"logps/rejected": -80.37932586669922,
"loss": 0.9145,
"margin_dpo/margin_mean": 1.2151854038238525,
"margin_dpo/margin_std": 1.6321237087249756,
"step": 332
},
{
"epoch": 0.5034013605442177,
"fcm_dpo/beta": 0.8272979259490967,
"fcm_dpo/delta": -0.3091200590133667,
"fcm_dpo/margin": 1.535266637802124,
"fcm_dpo/q_t": 0.2927742302417755,
"grad_norm": 172.9521942138672,
"learning_rate": 2.921093116725076e-07,
"logits/chosen": 0.1792515516281128,
"logits/rejected": 0.12908412516117096,
"logps/chosen": -60.283103942871094,
"logps/ref_chosen": -58.381134033203125,
"logps/ref_rejected": -85.02839660644531,
"logps/rejected": -88.46562957763672,
"loss": 0.8145,
"margin_dpo/margin_mean": 1.5352662801742554,
"margin_dpo/margin_std": 1.7731688022613525,
"step": 333
},
{
"epoch": 0.5049130763416477,
"fcm_dpo/beta": 0.8177670240402222,
"fcm_dpo/delta": 0.09361538290977478,
"fcm_dpo/margin": 1.1182286739349365,
"fcm_dpo/q_t": 0.3452100455760956,
"grad_norm": 200.41497802734375,
"learning_rate": 2.9080540104031484e-07,
"logits/chosen": 0.16784140467643738,
"logits/rejected": 0.13531029224395752,
"logps/chosen": -69.00025939941406,
"logps/ref_chosen": -66.89199829101562,
"logps/ref_rejected": -91.83695220947266,
"logps/rejected": -95.06344604492188,
"loss": 1.063,
"margin_dpo/margin_mean": 1.118227481842041,
"margin_dpo/margin_std": 1.7574753761291504,
"step": 334
},
{
"epoch": 0.5064247921390779,
"fcm_dpo/beta": 0.8364279866218567,
"fcm_dpo/delta": -0.04344947636127472,
"fcm_dpo/margin": 1.23694908618927,
"fcm_dpo/q_t": 0.32282981276512146,
"grad_norm": 176.56353759765625,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": 0.15401214361190796,
"logits/rejected": 0.12541456520557404,
"logps/chosen": -63.48824691772461,
"logps/ref_chosen": -61.51445770263672,
"logps/ref_rejected": -75.68916320800781,
"logps/rejected": -78.89990234375,
"loss": 0.9844,
"margin_dpo/margin_mean": 1.2369496822357178,
"margin_dpo/margin_std": 1.6888550519943237,
"step": 335
},
{
"epoch": 0.5079365079365079,
"fcm_dpo/beta": 0.8125041127204895,
"fcm_dpo/delta": 0.06346192955970764,
"fcm_dpo/margin": 1.1519867181777954,
"fcm_dpo/q_t": 0.35848677158355713,
"grad_norm": 223.40049743652344,
"learning_rate": 2.8819419203668675e-07,
"logits/chosen": 0.09962709248065948,
"logits/rejected": 0.0864500105381012,
"logps/chosen": -71.24321746826172,
"logps/ref_chosen": -68.85006713867188,
"logps/ref_rejected": -92.99603271484375,
"logps/rejected": -96.54116821289062,
"loss": 1.0724,
"margin_dpo/margin_mean": 1.151987075805664,
"margin_dpo/margin_std": 1.9282138347625732,
"step": 336
},
{
"epoch": 0.509448223733938,
"fcm_dpo/beta": 0.8386461734771729,
"fcm_dpo/delta": 0.09305550158023834,
"fcm_dpo/margin": 1.0915064811706543,
"fcm_dpo/q_t": 0.3443507254123688,
"grad_norm": 215.70370483398438,
"learning_rate": 2.8688696670638053e-07,
"logits/chosen": 0.08634011447429657,
"logits/rejected": 0.05762971565127373,
"logps/chosen": -75.31697845458984,
"logps/ref_chosen": -73.18783569335938,
"logps/ref_rejected": -86.89118957519531,
"logps/rejected": -90.11184692382812,
"loss": 1.0527,
"margin_dpo/margin_mean": 1.0915067195892334,
"margin_dpo/margin_std": 1.7208020687103271,
"step": 337
},
{
"epoch": 0.5109599395313681,
"fcm_dpo/beta": 0.8704172372817993,
"fcm_dpo/delta": 0.1373847872018814,
"fcm_dpo/margin": 1.0027812719345093,
"fcm_dpo/q_t": 0.3466625213623047,
"grad_norm": 219.7271270751953,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": 0.11361702531576157,
"logits/rejected": 0.09130830317735672,
"logps/chosen": -66.18433380126953,
"logps/ref_chosen": -63.939613342285156,
"logps/ref_rejected": -75.34243774414062,
"logps/rejected": -78.58993530273438,
"loss": 1.0112,
"margin_dpo/margin_mean": 1.0027809143066406,
"margin_dpo/margin_std": 1.4872009754180908,
"step": 338
},
{
"epoch": 0.5124716553287982,
"fcm_dpo/beta": 0.8656154274940491,
"fcm_dpo/delta": 0.016111478209495544,
"fcm_dpo/margin": 1.137109398841858,
"fcm_dpo/q_t": 0.3326491713523865,
"grad_norm": 190.49337768554688,
"learning_rate": 2.842694572172736e-07,
"logits/chosen": 0.19113630056381226,
"logits/rejected": 0.12893235683441162,
"logps/chosen": -47.86590576171875,
"logps/ref_chosen": -45.54913330078125,
"logps/ref_rejected": -67.0482177734375,
"logps/rejected": -70.50209045410156,
"loss": 0.958,
"margin_dpo/margin_mean": 1.137109398841858,
"margin_dpo/margin_std": 1.5872113704681396,
"step": 339
},
{
"epoch": 0.5139833711262283,
"fcm_dpo/beta": 0.8644633889198303,
"fcm_dpo/delta": -0.16399508714675903,
"fcm_dpo/margin": 1.3231072425842285,
"fcm_dpo/q_t": 0.3184114396572113,
"grad_norm": 165.63905334472656,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 0.14443045854568481,
"logits/rejected": 0.12298154830932617,
"logps/chosen": -56.29311752319336,
"logps/ref_chosen": -54.00564956665039,
"logps/ref_rejected": -61.314430236816406,
"logps/rejected": -64.92501068115234,
"loss": 1.0075,
"margin_dpo/margin_mean": 1.3231074810028076,
"margin_dpo/margin_std": 1.933672547340393,
"step": 340
},
{
"epoch": 0.5154950869236583,
"fcm_dpo/beta": 0.8116443157196045,
"fcm_dpo/delta": -0.10012944042682648,
"fcm_dpo/margin": 1.3234137296676636,
"fcm_dpo/q_t": 0.31874704360961914,
"grad_norm": 227.253662109375,
"learning_rate": 2.816481133934373e-07,
"logits/chosen": 0.1503913700580597,
"logits/rejected": 0.11632785201072693,
"logps/chosen": -65.45622253417969,
"logps/ref_chosen": -63.39509582519531,
"logps/ref_rejected": -76.20973205566406,
"logps/rejected": -79.59427642822266,
"loss": 0.9461,
"margin_dpo/margin_mean": 1.3234134912490845,
"margin_dpo/margin_std": 1.674351453781128,
"step": 341
},
{
"epoch": 0.5170068027210885,
"fcm_dpo/beta": 0.8011665940284729,
"fcm_dpo/delta": -0.21002721786499023,
"fcm_dpo/margin": 1.4784516096115112,
"fcm_dpo/q_t": 0.30981749296188354,
"grad_norm": 152.3086700439453,
"learning_rate": 2.8033609524527046e-07,
"logits/chosen": 0.17057004570960999,
"logits/rejected": 0.13639067113399506,
"logps/chosen": -55.32265853881836,
"logps/ref_chosen": -53.047813415527344,
"logps/ref_rejected": -68.2854232788086,
"logps/rejected": -72.03872680664062,
"loss": 0.8854,
"margin_dpo/margin_mean": 1.4784512519836426,
"margin_dpo/margin_std": 1.8578169345855713,
"step": 342
},
{
"epoch": 0.5185185185185185,
"fcm_dpo/beta": 0.8318637609481812,
"fcm_dpo/delta": 0.3891153335571289,
"fcm_dpo/margin": 0.7660273909568787,
"fcm_dpo/q_t": 0.38361668586730957,
"grad_norm": 204.89308166503906,
"learning_rate": 2.7902322853130753e-07,
"logits/chosen": 0.08965672552585602,
"logits/rejected": 0.08724290132522583,
"logps/chosen": -72.62460327148438,
"logps/ref_chosen": -70.57852935791016,
"logps/ref_rejected": -84.73873901367188,
"logps/rejected": -87.55084228515625,
"loss": 1.2263,
"margin_dpo/margin_mean": 0.7660267353057861,
"margin_dpo/margin_std": 1.6297626495361328,
"step": 343
},
{
"epoch": 0.5200302343159486,
"fcm_dpo/beta": 0.8591570854187012,
"fcm_dpo/delta": 0.02412712574005127,
"fcm_dpo/margin": 1.1384611129760742,
"fcm_dpo/q_t": 0.3305787742137909,
"grad_norm": 197.9630584716797,
"learning_rate": 2.7770954997525274e-07,
"logits/chosen": 0.15758341550827026,
"logits/rejected": 0.11625611782073975,
"logps/chosen": -58.18259811401367,
"logps/ref_chosen": -55.811004638671875,
"logps/ref_rejected": -84.77637481689453,
"logps/rejected": -88.28643035888672,
"loss": 0.982,
"margin_dpo/margin_mean": 1.138460636138916,
"margin_dpo/margin_std": 1.6236982345581055,
"step": 344
},
{
"epoch": 0.5215419501133787,
"fcm_dpo/beta": 0.8624707460403442,
"fcm_dpo/delta": 0.05118731036782265,
"fcm_dpo/margin": 1.1047433614730835,
"fcm_dpo/q_t": 0.3428131341934204,
"grad_norm": 181.71697998046875,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": 0.2466953694820404,
"logits/rejected": 0.2128785401582718,
"logps/chosen": -59.707969665527344,
"logps/ref_chosen": -57.78609848022461,
"logps/ref_rejected": -78.91847229003906,
"logps/rejected": -81.94508361816406,
"loss": 1.0228,
"margin_dpo/margin_mean": 1.1047430038452148,
"margin_dpo/margin_std": 1.6839402914047241,
"step": 345
},
{
"epoch": 0.5230536659108088,
"fcm_dpo/beta": 0.8550155162811279,
"fcm_dpo/delta": -0.15863507986068726,
"fcm_dpo/margin": 1.334245204925537,
"fcm_dpo/q_t": 0.3107675015926361,
"grad_norm": 217.05181884765625,
"learning_rate": 2.7507990434420123e-07,
"logits/chosen": 0.18394121527671814,
"logits/rejected": 0.13396984338760376,
"logps/chosen": -58.29369354248047,
"logps/ref_chosen": -56.285125732421875,
"logps/ref_rejected": -91.15303039550781,
"logps/rejected": -94.495849609375,
"loss": 0.9038,
"margin_dpo/margin_mean": 1.334245204925537,
"margin_dpo/margin_std": 1.7104822397232056,
"step": 346
},
{
"epoch": 0.5245653817082389,
"fcm_dpo/beta": 0.8462698459625244,
"fcm_dpo/delta": 0.07013484090566635,
"fcm_dpo/margin": 1.1055729389190674,
"fcm_dpo/q_t": 0.34362083673477173,
"grad_norm": 187.4880828857422,
"learning_rate": 2.737640108260456e-07,
"logits/chosen": 0.2319878786802292,
"logits/rejected": 0.19426073133945465,
"logps/chosen": -55.76850891113281,
"logps/ref_chosen": -53.499542236328125,
"logps/ref_rejected": -72.52565002441406,
"logps/rejected": -75.90019226074219,
"loss": 1.0505,
"margin_dpo/margin_mean": 1.1055727005004883,
"margin_dpo/margin_std": 1.7748844623565674,
"step": 347
},
{
"epoch": 0.5260770975056689,
"fcm_dpo/beta": 0.8080967664718628,
"fcm_dpo/delta": -0.3646644949913025,
"fcm_dpo/margin": 1.6269646883010864,
"fcm_dpo/q_t": 0.3286696970462799,
"grad_norm": 163.6527099609375,
"learning_rate": 2.724474525774229e-07,
"logits/chosen": 0.23792192339897156,
"logits/rejected": 0.21459215879440308,
"logps/chosen": -52.67319107055664,
"logps/ref_chosen": -50.78684997558594,
"logps/ref_rejected": -68.63732147216797,
"logps/rejected": -72.15061950683594,
"loss": 0.9542,
"margin_dpo/margin_mean": 1.6269644498825073,
"margin_dpo/margin_std": 2.543069362640381,
"step": 348
},
{
"epoch": 0.527588813303099,
"fcm_dpo/beta": 0.779322624206543,
"fcm_dpo/delta": -0.11380349844694138,
"fcm_dpo/margin": 1.411959171295166,
"fcm_dpo/q_t": 0.3116574287414551,
"grad_norm": 173.95448303222656,
"learning_rate": 2.711302664252973e-07,
"logits/chosen": 0.16869422793388367,
"logits/rejected": 0.10714869201183319,
"logps/chosen": -55.361000061035156,
"logps/ref_chosen": -53.325008392333984,
"logps/ref_rejected": -83.21236419677734,
"logps/rejected": -86.66030883789062,
"loss": 0.9061,
"margin_dpo/margin_mean": 1.4119596481323242,
"margin_dpo/margin_std": 1.8286110162734985,
"step": 349
},
{
"epoch": 0.5291005291005291,
"fcm_dpo/beta": 0.7421770095825195,
"fcm_dpo/delta": -0.2771596610546112,
"fcm_dpo/margin": 1.6710288524627686,
"fcm_dpo/q_t": 0.29638049006462097,
"grad_norm": 182.74623107910156,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 0.1460535228252411,
"logits/rejected": 0.09140360355377197,
"logps/chosen": -63.824440002441406,
"logps/ref_chosen": -61.625770568847656,
"logps/ref_rejected": -87.63627624511719,
"logps/rejected": -91.50596618652344,
"loss": 0.8304,
"margin_dpo/margin_mean": 1.6710278987884521,
"margin_dpo/margin_std": 1.9888241291046143,
"step": 350
},
{
"epoch": 0.5306122448979592,
"fcm_dpo/beta": 0.7518589496612549,
"fcm_dpo/delta": 0.1678137630224228,
"fcm_dpo/margin": 1.1261451244354248,
"fcm_dpo/q_t": 0.33188802003860474,
"grad_norm": 165.69610595703125,
"learning_rate": 2.6849415780518357e-07,
"logits/chosen": 0.09675121307373047,
"logits/rejected": 0.0414729006588459,
"logps/chosen": -58.45286178588867,
"logps/ref_chosen": -56.2563362121582,
"logps/ref_rejected": -79.11589813232422,
"logps/rejected": -82.43856811523438,
"loss": 1.0476,
"margin_dpo/margin_mean": 1.1261451244354248,
"margin_dpo/margin_std": 1.6900222301483154,
"step": 351
},
{
"epoch": 0.5321239606953893,
"fcm_dpo/beta": 0.7760653495788574,
"fcm_dpo/delta": 0.16433821618556976,
"fcm_dpo/margin": 1.0951128005981445,
"fcm_dpo/q_t": 0.35313987731933594,
"grad_norm": 193.21861267089844,
"learning_rate": 2.6717530907482024e-07,
"logits/chosen": 0.16212013363838196,
"logits/rejected": 0.12353149801492691,
"logps/chosen": -65.12254333496094,
"logps/ref_chosen": -63.05195236206055,
"logps/ref_rejected": -85.52035522460938,
"logps/rejected": -88.68605041503906,
"loss": 0.9931,
"margin_dpo/margin_mean": 1.0951130390167236,
"margin_dpo/margin_std": 1.6728229522705078,
"step": 352
},
{
"epoch": 0.5336356764928194,
"fcm_dpo/beta": 0.7995505332946777,
"fcm_dpo/delta": 0.018024399876594543,
"fcm_dpo/margin": 1.2259821891784668,
"fcm_dpo/q_t": 0.33103376626968384,
"grad_norm": 166.7522430419922,
"learning_rate": 2.658559799141411e-07,
"logits/chosen": 0.1692410409450531,
"logits/rejected": 0.16683810949325562,
"logps/chosen": -71.08948516845703,
"logps/ref_chosen": -69.00918579101562,
"logps/ref_rejected": -72.65840148925781,
"logps/rejected": -75.96468353271484,
"loss": 1.0033,
"margin_dpo/margin_mean": 1.2259814739227295,
"margin_dpo/margin_std": 1.810058832168579,
"step": 353
},
{
"epoch": 0.5351473922902494,
"fcm_dpo/beta": 0.7726951837539673,
"fcm_dpo/delta": -0.05429168790578842,
"fcm_dpo/margin": 1.3514502048492432,
"fcm_dpo/q_t": 0.319754034280777,
"grad_norm": 218.87094116210938,
"learning_rate": 2.6453620722761895e-07,
"logits/chosen": 0.20377589762210846,
"logits/rejected": 0.10091987252235413,
"logps/chosen": -41.94265365600586,
"logps/ref_chosen": -39.78833770751953,
"logps/ref_rejected": -69.56885528564453,
"logps/rejected": -73.07461547851562,
"loss": 0.93,
"margin_dpo/margin_mean": 1.3514502048492432,
"margin_dpo/margin_std": 1.7668390274047852,
"step": 354
},
{
"epoch": 0.5366591080876795,
"fcm_dpo/beta": 0.7757099866867065,
"fcm_dpo/delta": -0.04570357874035835,
"fcm_dpo/margin": 1.3419021368026733,
"fcm_dpo/q_t": 0.32356125116348267,
"grad_norm": 184.73477172851562,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": 0.14926283061504364,
"logits/rejected": 0.0633564367890358,
"logps/chosen": -48.385475158691406,
"logps/ref_chosen": -46.25537872314453,
"logps/ref_rejected": -78.20236206054688,
"logps/rejected": -81.67436218261719,
"loss": 0.9503,
"margin_dpo/margin_mean": 1.3419021368026733,
"margin_dpo/margin_std": 1.8027377128601074,
"step": 355
},
{
"epoch": 0.5381708238851096,
"fcm_dpo/beta": 0.7802422046661377,
"fcm_dpo/delta": 0.09977808594703674,
"fcm_dpo/margin": 1.165019154548645,
"fcm_dpo/q_t": 0.34947988390922546,
"grad_norm": 180.77276611328125,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": 0.15579620003700256,
"logits/rejected": 0.1042039543390274,
"logps/chosen": -50.003562927246094,
"logps/ref_chosen": -47.906158447265625,
"logps/ref_rejected": -74.29397583007812,
"logps/rejected": -77.556396484375,
"loss": 1.1606,
"margin_dpo/margin_mean": 1.1650193929672241,
"margin_dpo/margin_std": 2.1200852394104004,
"step": 356
},
{
"epoch": 0.5396825396825397,
"fcm_dpo/beta": 0.7977977991104126,
"fcm_dpo/delta": 0.14315135776996613,
"fcm_dpo/margin": 1.0884279012680054,
"fcm_dpo/q_t": 0.3529171347618103,
"grad_norm": 215.51937866210938,
"learning_rate": 2.6057459723762076e-07,
"logits/chosen": 0.16426903009414673,
"logits/rejected": 0.13840454816818237,
"logps/chosen": -64.85040283203125,
"logps/ref_chosen": -62.63500213623047,
"logps/ref_rejected": -65.11399841308594,
"logps/rejected": -68.41783142089844,
"loss": 1.116,
"margin_dpo/margin_mean": 1.0884283781051636,
"margin_dpo/margin_std": 1.8702547550201416,
"step": 357
},
{
"epoch": 0.5411942554799698,
"fcm_dpo/beta": 0.7857924103736877,
"fcm_dpo/delta": -0.2809675335884094,
"fcm_dpo/margin": 1.586600661277771,
"fcm_dpo/q_t": 0.2850938141345978,
"grad_norm": 148.0938720703125,
"learning_rate": 2.5925341972508954e-07,
"logits/chosen": 0.1491168886423111,
"logits/rejected": 0.14593584835529327,
"logps/chosen": -69.29679107666016,
"logps/ref_chosen": -67.20960998535156,
"logps/ref_rejected": -69.34715270996094,
"logps/rejected": -73.02093505859375,
"loss": 0.798,
"margin_dpo/margin_mean": 1.586600661277771,
"margin_dpo/margin_std": 1.7209175825119019,
"step": 358
},
{
"epoch": 0.5427059712773998,
"fcm_dpo/beta": 0.7968997955322266,
"fcm_dpo/delta": 0.10445237159729004,
"fcm_dpo/margin": 1.1224002838134766,
"fcm_dpo/q_t": 0.34582918882369995,
"grad_norm": 201.40667724609375,
"learning_rate": 2.579319833745169e-07,
"logits/chosen": 0.14725103974342346,
"logits/rejected": 0.12238387763500214,
"logps/chosen": -64.69267272949219,
"logps/ref_chosen": -62.52578353881836,
"logps/ref_rejected": -76.63114929199219,
"logps/rejected": -79.9204330444336,
"loss": 1.0804,
"margin_dpo/margin_mean": 1.1224000453948975,
"margin_dpo/margin_std": 1.7543888092041016,
"step": 359
},
{
"epoch": 0.54421768707483,
"fcm_dpo/beta": 0.7883453369140625,
"fcm_dpo/delta": 0.023167556151747704,
"fcm_dpo/margin": 1.241877794265747,
"fcm_dpo/q_t": 0.3393666446208954,
"grad_norm": 213.6732635498047,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 0.11307230591773987,
"logits/rejected": 0.0418628454208374,
"logps/chosen": -65.72219848632812,
"logps/ref_chosen": -63.48772048950195,
"logps/ref_rejected": -90.6891098022461,
"logps/rejected": -94.16546630859375,
"loss": 1.0189,
"margin_dpo/margin_mean": 1.2418781518936157,
"margin_dpo/margin_std": 1.9586660861968994,
"step": 360
},
{
"epoch": 0.54572940287226,
"fcm_dpo/beta": 0.7847793698310852,
"fcm_dpo/delta": -0.006262313574552536,
"fcm_dpo/margin": 1.2811236381530762,
"fcm_dpo/q_t": 0.32984602451324463,
"grad_norm": 186.03604125976562,
"learning_rate": 2.552884820191154e-07,
"logits/chosen": 0.21646341681480408,
"logits/rejected": 0.17365378141403198,
"logps/chosen": -60.005924224853516,
"logps/ref_chosen": -57.917144775390625,
"logps/ref_rejected": -72.39089965820312,
"logps/rejected": -75.76080322265625,
"loss": 0.9667,
"margin_dpo/margin_mean": 1.2811236381530762,
"margin_dpo/margin_std": 1.8610559701919556,
"step": 361
},
{
"epoch": 0.54724111866969,
"fcm_dpo/beta": 0.7910502552986145,
"fcm_dpo/delta": -0.12872016429901123,
"fcm_dpo/margin": 1.3991649150848389,
"fcm_dpo/q_t": 0.3150370121002197,
"grad_norm": 179.85292053222656,
"learning_rate": 2.53966490958702e-07,
"logits/chosen": 0.20901912450790405,
"logits/rejected": 0.14084625244140625,
"logps/chosen": -65.4914321899414,
"logps/ref_chosen": -63.4434700012207,
"logps/ref_rejected": -103.45516967773438,
"logps/rejected": -106.90229797363281,
"loss": 0.8868,
"margin_dpo/margin_mean": 1.3991665840148926,
"margin_dpo/margin_std": 1.6471703052520752,
"step": 362
},
{
"epoch": 0.5487528344671202,
"fcm_dpo/beta": 0.7514413595199585,
"fcm_dpo/delta": -0.15377500653266907,
"fcm_dpo/margin": 1.512410044670105,
"fcm_dpo/q_t": 0.3079715967178345,
"grad_norm": 172.7432403564453,
"learning_rate": 2.526443889470099e-07,
"logits/chosen": 0.18472757935523987,
"logits/rejected": 0.09980542212724686,
"logps/chosen": -51.084228515625,
"logps/ref_chosen": -48.65182876586914,
"logps/ref_rejected": -88.65904235839844,
"logps/rejected": -92.60385131835938,
"loss": 0.9557,
"margin_dpo/margin_mean": 1.512410044670105,
"margin_dpo/margin_std": 2.1208112239837646,
"step": 363
},
{
"epoch": 0.5502645502645502,
"fcm_dpo/beta": 0.7271950840950012,
"fcm_dpo/delta": -0.1419481635093689,
"fcm_dpo/margin": 1.5479559898376465,
"fcm_dpo/q_t": 0.31253015995025635,
"grad_norm": 143.4964599609375,
"learning_rate": 2.513222129660744e-07,
"logits/chosen": 0.09488549828529358,
"logits/rejected": 0.020679466426372528,
"logps/chosen": -59.90724563598633,
"logps/ref_chosen": -57.87107467651367,
"logps/ref_rejected": -80.95503234863281,
"logps/rejected": -84.53915405273438,
"loss": 0.9253,
"margin_dpo/margin_mean": 1.5479564666748047,
"margin_dpo/margin_std": 2.0348784923553467,
"step": 364
},
{
"epoch": 0.5517762660619804,
"fcm_dpo/beta": 0.7135534882545471,
"fcm_dpo/delta": -0.0528385192155838,
"fcm_dpo/margin": 1.4665672779083252,
"fcm_dpo/q_t": 0.30548185110092163,
"grad_norm": 147.20799255371094,
"learning_rate": 2.5e-07,
"logits/chosen": 0.19886715710163116,
"logits/rejected": 0.18875735998153687,
"logps/chosen": -66.82514190673828,
"logps/ref_chosen": -64.94217681884766,
"logps/ref_rejected": -74.8599853515625,
"logps/rejected": -78.20951843261719,
"loss": 0.868,
"margin_dpo/margin_mean": 1.4665677547454834,
"margin_dpo/margin_std": 1.6899524927139282,
"step": 365
},
{
"epoch": 0.5532879818594104,
"fcm_dpo/beta": 0.749003529548645,
"fcm_dpo/delta": 0.20677639544010162,
"fcm_dpo/margin": 1.071431279182434,
"fcm_dpo/q_t": 0.36068403720855713,
"grad_norm": 177.8300323486328,
"learning_rate": 2.486777870339255e-07,
"logits/chosen": 0.1121918261051178,
"logits/rejected": 0.09326402097940445,
"logps/chosen": -56.96438217163086,
"logps/ref_chosen": -55.16598129272461,
"logps/ref_rejected": -65.26121520996094,
"logps/rejected": -68.13104248046875,
"loss": 1.1563,
"margin_dpo/margin_mean": 1.0714313983917236,
"margin_dpo/margin_std": 1.9580434560775757,
"step": 366
},
{
"epoch": 0.5547996976568406,
"fcm_dpo/beta": 0.7712104320526123,
"fcm_dpo/delta": 0.2716788947582245,
"fcm_dpo/margin": 0.9718486070632935,
"fcm_dpo/q_t": 0.3597671687602997,
"grad_norm": 178.6884002685547,
"learning_rate": 2.4735561105299014e-07,
"logits/chosen": 0.1118677407503128,
"logits/rejected": 0.03625689074397087,
"logps/chosen": -58.22844696044922,
"logps/ref_chosen": -56.01046371459961,
"logps/ref_rejected": -77.31010437011719,
"logps/rejected": -80.49993896484375,
"loss": 1.0665,
"margin_dpo/margin_mean": 0.971848726272583,
"margin_dpo/margin_std": 1.5560146570205688,
"step": 367
},
{
"epoch": 0.5563114134542706,
"fcm_dpo/beta": 0.8040578365325928,
"fcm_dpo/delta": 0.1679600328207016,
"fcm_dpo/margin": 1.0528676509857178,
"fcm_dpo/q_t": 0.3611965775489807,
"grad_norm": 214.42361450195312,
"learning_rate": 2.46033509041298e-07,
"logits/chosen": 0.06508797407150269,
"logits/rejected": 0.056503720581531525,
"logps/chosen": -76.96674346923828,
"logps/ref_chosen": -74.82927703857422,
"logps/ref_rejected": -76.11680603027344,
"logps/rejected": -79.30712890625,
"loss": 1.1432,
"margin_dpo/margin_mean": 1.052868127822876,
"margin_dpo/margin_std": 1.877270221710205,
"step": 368
},
{
"epoch": 0.5578231292517006,
"fcm_dpo/beta": 0.83278489112854,
"fcm_dpo/delta": 0.09441757202148438,
"fcm_dpo/margin": 1.095184564590454,
"fcm_dpo/q_t": 0.3382675051689148,
"grad_norm": 172.21173095703125,
"learning_rate": 2.447115179808846e-07,
"logits/chosen": 0.122508205473423,
"logits/rejected": 0.09032686054706573,
"logps/chosen": -60.496971130371094,
"logps/ref_chosen": -58.32621765136719,
"logps/ref_rejected": -80.92183685302734,
"logps/rejected": -84.18777465820312,
"loss": 1.0485,
"margin_dpo/margin_mean": 1.0951846837997437,
"margin_dpo/margin_std": 1.6767044067382812,
"step": 369
},
{
"epoch": 0.5593348450491308,
"fcm_dpo/beta": 0.8102331757545471,
"fcm_dpo/delta": -0.10076209902763367,
"fcm_dpo/margin": 1.342164397239685,
"fcm_dpo/q_t": 0.3211871385574341,
"grad_norm": 181.2273406982422,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 0.24069687724113464,
"logits/rejected": 0.18847504258155823,
"logps/chosen": -55.11189270019531,
"logps/ref_chosen": -52.88372039794922,
"logps/ref_rejected": -79.43692016601562,
"logps/rejected": -83.00725555419922,
"loss": 1.0596,
"margin_dpo/margin_mean": 1.3421647548675537,
"margin_dpo/margin_std": 2.065488576889038,
"step": 370
},
{
"epoch": 0.5608465608465608,
"fcm_dpo/beta": 0.8171911239624023,
"fcm_dpo/delta": -0.08116129040718079,
"fcm_dpo/margin": 1.3074921369552612,
"fcm_dpo/q_t": 0.3368060290813446,
"grad_norm": 183.6264190673828,
"learning_rate": 2.420680166254831e-07,
"logits/chosen": 0.2111239731311798,
"logits/rejected": 0.18440525233745575,
"logps/chosen": -51.46292495727539,
"logps/ref_chosen": -49.224212646484375,
"logps/ref_rejected": -63.348472595214844,
"logps/rejected": -66.8946762084961,
"loss": 1.0896,
"margin_dpo/margin_mean": 1.3074921369552612,
"margin_dpo/margin_std": 2.0974409580230713,
"step": 371
},
{
"epoch": 0.562358276643991,
"fcm_dpo/beta": 0.8467217683792114,
"fcm_dpo/delta": 0.2932736575603485,
"fcm_dpo/margin": 0.851443350315094,
"fcm_dpo/q_t": 0.3849431276321411,
"grad_norm": 214.92108154296875,
"learning_rate": 2.4074658027491044e-07,
"logits/chosen": 0.18400293588638306,
"logits/rejected": 0.12166933715343475,
"logps/chosen": -54.62636184692383,
"logps/ref_chosen": -52.269554138183594,
"logps/ref_rejected": -72.99522399902344,
"logps/rejected": -76.20347595214844,
"loss": 1.3565,
"margin_dpo/margin_mean": 0.8514436483383179,
"margin_dpo/margin_std": 2.075626850128174,
"step": 372
},
{
"epoch": 0.563869992441421,
"fcm_dpo/beta": 0.8382232189178467,
"fcm_dpo/delta": -0.05690415948629379,
"fcm_dpo/margin": 1.2532103061676025,
"fcm_dpo/q_t": 0.3378145694732666,
"grad_norm": 255.2552947998047,
"learning_rate": 2.394254027623792e-07,
"logits/chosen": 0.19523033499717712,
"logits/rejected": 0.14481961727142334,
"logps/chosen": -63.56736755371094,
"logps/ref_chosen": -61.112998962402344,
"logps/ref_rejected": -76.24851989746094,
"logps/rejected": -79.95610046386719,
"loss": 1.0989,
"margin_dpo/margin_mean": 1.2532098293304443,
"margin_dpo/margin_std": 2.067584991455078,
"step": 373
},
{
"epoch": 0.5653817082388511,
"fcm_dpo/beta": 0.788150429725647,
"fcm_dpo/delta": -0.36466753482818604,
"fcm_dpo/margin": 1.6652493476867676,
"fcm_dpo/q_t": 0.2760714292526245,
"grad_norm": 186.27012634277344,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": 0.12782002985477448,
"logits/rejected": 0.11331016570329666,
"logps/chosen": -74.75846099853516,
"logps/ref_chosen": -72.66920471191406,
"logps/ref_rejected": -76.83158874511719,
"logps/rejected": -80.58609771728516,
"loss": 0.7647,
"margin_dpo/margin_mean": 1.6652493476867676,
"margin_dpo/margin_std": 1.7359645366668701,
"step": 374
},
{
"epoch": 0.5668934240362812,
"fcm_dpo/beta": 0.8175476789474487,
"fcm_dpo/delta": 0.3215762972831726,
"fcm_dpo/margin": 0.8576102256774902,
"fcm_dpo/q_t": 0.37826499342918396,
"grad_norm": 234.281494140625,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": 0.1810404360294342,
"logits/rejected": 0.1419374942779541,
"logps/chosen": -59.804378509521484,
"logps/ref_chosen": -57.68330383300781,
"logps/ref_rejected": -79.34097290039062,
"logps/rejected": -82.31965637207031,
"loss": 1.1949,
"margin_dpo/margin_mean": 0.8576098680496216,
"margin_dpo/margin_std": 1.7258224487304688,
"step": 375
},
{
"epoch": 0.5684051398337112,
"fcm_dpo/beta": 0.7721197605133057,
"fcm_dpo/delta": -0.40702202916145325,
"fcm_dpo/margin": 1.7395694255828857,
"fcm_dpo/q_t": 0.28671878576278687,
"grad_norm": 153.21202087402344,
"learning_rate": 2.3546379277238103e-07,
"logits/chosen": 0.19977155327796936,
"logits/rejected": 0.15468192100524902,
"logps/chosen": -53.76110076904297,
"logps/ref_chosen": -51.674072265625,
"logps/ref_rejected": -75.69713592529297,
"logps/rejected": -79.52372741699219,
"loss": 0.8446,
"margin_dpo/margin_mean": 1.739569067955017,
"margin_dpo/margin_std": 2.094245433807373,
"step": 376
},
{
"epoch": 0.5699168556311414,
"fcm_dpo/beta": 0.7831248641014099,
"fcm_dpo/delta": 0.08697693049907684,
"fcm_dpo/margin": 1.1734894514083862,
"fcm_dpo/q_t": 0.3411799371242523,
"grad_norm": 162.50051879882812,
"learning_rate": 2.3414402008585886e-07,
"logits/chosen": 0.11727416515350342,
"logits/rejected": 0.09625902771949768,
"logps/chosen": -48.64626693725586,
"logps/ref_chosen": -46.17853546142578,
"logps/ref_rejected": -57.756500244140625,
"logps/rejected": -61.39772415161133,
"loss": 1.0109,
"margin_dpo/margin_mean": 1.1734893321990967,
"margin_dpo/margin_std": 1.7293052673339844,
"step": 377
},
{
"epoch": 0.5714285714285714,
"fcm_dpo/beta": 0.8013092875480652,
"fcm_dpo/delta": 0.08626553416252136,
"fcm_dpo/margin": 1.1440807580947876,
"fcm_dpo/q_t": 0.35015422105789185,
"grad_norm": 186.11314392089844,
"learning_rate": 2.3282469092517977e-07,
"logits/chosen": 0.19343531131744385,
"logits/rejected": 0.15595267713069916,
"logps/chosen": -61.52544021606445,
"logps/ref_chosen": -59.21887969970703,
"logps/ref_rejected": -71.24818420410156,
"logps/rejected": -74.69883728027344,
"loss": 1.045,
"margin_dpo/margin_mean": 1.1440809965133667,
"margin_dpo/margin_std": 1.8243110179901123,
"step": 378
},
{
"epoch": 0.5729402872260015,
"fcm_dpo/beta": 0.7728543281555176,
"fcm_dpo/delta": -0.18132196366786957,
"fcm_dpo/margin": 1.5013034343719482,
"fcm_dpo/q_t": 0.3048959970474243,
"grad_norm": 197.8959197998047,
"learning_rate": 2.3150584219481643e-07,
"logits/chosen": 0.21861502528190613,
"logits/rejected": 0.1745017170906067,
"logps/chosen": -78.35917663574219,
"logps/ref_chosen": -76.31658935546875,
"logps/ref_rejected": -104.26200103759766,
"logps/rejected": -107.80589294433594,
"loss": 0.865,
"margin_dpo/margin_mean": 1.501303791999817,
"margin_dpo/margin_std": 1.88877534866333,
"step": 379
},
{
"epoch": 0.5744520030234316,
"fcm_dpo/beta": 0.732434093952179,
"fcm_dpo/delta": -0.3159186840057373,
"fcm_dpo/margin": 1.7422823905944824,
"fcm_dpo/q_t": 0.2831631302833557,
"grad_norm": 152.97654724121094,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 0.18231691420078278,
"logits/rejected": 0.15374861657619476,
"logps/chosen": -63.16365051269531,
"logps/ref_chosen": -61.283164978027344,
"logps/ref_rejected": -72.38892364501953,
"logps/rejected": -76.01168823242188,
"loss": 0.8666,
"margin_dpo/margin_mean": 1.7422822713851929,
"margin_dpo/margin_std": 2.142005443572998,
"step": 380
},
{
"epoch": 0.5759637188208617,
"fcm_dpo/beta": 0.7804316282272339,
"fcm_dpo/delta": 0.42041903734207153,
"fcm_dpo/margin": 0.7539185285568237,
"fcm_dpo/q_t": 0.3955872654914856,
"grad_norm": 201.4615936279297,
"learning_rate": 2.288697335747027e-07,
"logits/chosen": 0.12479900568723679,
"logits/rejected": 0.10375261306762695,
"logps/chosen": -60.67311096191406,
"logps/ref_chosen": -58.2139892578125,
"logps/ref_rejected": -60.78669357299805,
"logps/rejected": -63.999732971191406,
"loss": 1.2184,
"margin_dpo/margin_mean": 0.7539188861846924,
"margin_dpo/margin_std": 1.6283390522003174,
"step": 381
},
{
"epoch": 0.5774754346182918,
"fcm_dpo/beta": 0.8213146924972534,
"fcm_dpo/delta": 0.16648587584495544,
"fcm_dpo/margin": 1.0126454830169678,
"fcm_dpo/q_t": 0.3555706739425659,
"grad_norm": 187.99722290039062,
"learning_rate": 2.2755254742257706e-07,
"logits/chosen": 0.1935308575630188,
"logits/rejected": 0.1620044708251953,
"logps/chosen": -64.31198120117188,
"logps/ref_chosen": -61.82532501220703,
"logps/ref_rejected": -83.0452880859375,
"logps/rejected": -86.54458618164062,
"loss": 1.0464,
"margin_dpo/margin_mean": 1.0126454830169678,
"margin_dpo/margin_std": 1.5206871032714844,
"step": 382
},
{
"epoch": 0.5789871504157218,
"fcm_dpo/beta": 0.7823382616043091,
"fcm_dpo/delta": -0.15291021764278412,
"fcm_dpo/margin": 1.4475116729736328,
"fcm_dpo/q_t": 0.3087931275367737,
"grad_norm": 219.67236328125,
"learning_rate": 2.2623598917395436e-07,
"logits/chosen": 0.10844056308269501,
"logits/rejected": 0.11368384212255478,
"logps/chosen": -82.67160034179688,
"logps/ref_chosen": -80.56326293945312,
"logps/ref_rejected": -74.62922668457031,
"logps/rejected": -78.18507385253906,
"loss": 0.9182,
"margin_dpo/margin_mean": 1.4475116729736328,
"margin_dpo/margin_std": 1.882810354232788,
"step": 383
},
{
"epoch": 0.5804988662131519,
"fcm_dpo/beta": 0.8081178665161133,
"fcm_dpo/delta": 0.1466008424758911,
"fcm_dpo/margin": 1.0682569742202759,
"fcm_dpo/q_t": 0.3386213779449463,
"grad_norm": 210.8586883544922,
"learning_rate": 2.2492009565579875e-07,
"logits/chosen": 0.15908417105674744,
"logits/rejected": 0.1250711977481842,
"logps/chosen": -68.10568237304688,
"logps/ref_chosen": -65.47514343261719,
"logps/ref_rejected": -79.67378234863281,
"logps/rejected": -83.37257385253906,
"loss": 1.014,
"margin_dpo/margin_mean": 1.0682566165924072,
"margin_dpo/margin_std": 1.5980737209320068,
"step": 384
},
{
"epoch": 0.582010582010582,
"fcm_dpo/beta": 0.8039337396621704,
"fcm_dpo/delta": -0.06407226622104645,
"fcm_dpo/margin": 1.3148212432861328,
"fcm_dpo/q_t": 0.3122956156730652,
"grad_norm": 201.8307647705078,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": 0.14007516205310822,
"logits/rejected": 0.11297205090522766,
"logps/chosen": -68.39266967773438,
"logps/ref_chosen": -66.0565185546875,
"logps/ref_rejected": -86.68023681640625,
"logps/rejected": -90.33121490478516,
"loss": 0.9181,
"margin_dpo/margin_mean": 1.3148208856582642,
"margin_dpo/margin_std": 1.7250840663909912,
"step": 385
},
{
"epoch": 0.5835222978080121,
"fcm_dpo/beta": 0.7948161363601685,
"fcm_dpo/delta": 0.02499794214963913,
"fcm_dpo/margin": 1.229053020477295,
"fcm_dpo/q_t": 0.3376579284667969,
"grad_norm": 190.71925354003906,
"learning_rate": 2.2229045002474724e-07,
"logits/chosen": 0.1209828183054924,
"logits/rejected": 0.0802159532904625,
"logps/chosen": -78.08750915527344,
"logps/ref_chosen": -75.6236572265625,
"logps/ref_rejected": -92.62330627441406,
"logps/rejected": -96.31620788574219,
"loss": 1.0068,
"margin_dpo/margin_mean": 1.229053258895874,
"margin_dpo/margin_std": 1.8680897951126099,
"step": 386
},
{
"epoch": 0.5850340136054422,
"fcm_dpo/beta": 0.7716137170791626,
"fcm_dpo/delta": -0.2335832417011261,
"fcm_dpo/margin": 1.5613083839416504,
"fcm_dpo/q_t": 0.27384334802627563,
"grad_norm": 133.3782501220703,
"learning_rate": 2.209767714686924e-07,
"logits/chosen": 0.15220069885253906,
"logits/rejected": 0.08792141824960709,
"logps/chosen": -49.354488372802734,
"logps/ref_chosen": -47.22170639038086,
"logps/ref_rejected": -87.338134765625,
"logps/rejected": -91.03223419189453,
"loss": 0.7497,
"margin_dpo/margin_mean": 1.5613081455230713,
"margin_dpo/margin_std": 1.4777767658233643,
"step": 387
},
{
"epoch": 0.5865457294028723,
"fcm_dpo/beta": 0.763427734375,
"fcm_dpo/delta": 0.034832365810871124,
"fcm_dpo/margin": 1.267984390258789,
"fcm_dpo/q_t": 0.3503814935684204,
"grad_norm": 178.84954833984375,
"learning_rate": 2.1966390475472954e-07,
"logits/chosen": 0.16702687740325928,
"logits/rejected": 0.1540418267250061,
"logps/chosen": -76.91740417480469,
"logps/ref_chosen": -74.5794677734375,
"logps/ref_rejected": -79.92558288574219,
"logps/rejected": -83.531494140625,
"loss": 1.0483,
"margin_dpo/margin_mean": 1.2679840326309204,
"margin_dpo/margin_std": 2.043673515319824,
"step": 388
},
{
"epoch": 0.5880574452003023,
"fcm_dpo/beta": 0.7542685270309448,
"fcm_dpo/delta": -0.14975543320178986,
"fcm_dpo/margin": 1.5020861625671387,
"fcm_dpo/q_t": 0.3079478144645691,
"grad_norm": 161.67514038085938,
"learning_rate": 2.1835188660656265e-07,
"logits/chosen": 0.17143261432647705,
"logits/rejected": 0.1412460058927536,
"logps/chosen": -63.959442138671875,
"logps/ref_chosen": -61.624366760253906,
"logps/ref_rejected": -76.50978088378906,
"logps/rejected": -80.3469467163086,
"loss": 0.9259,
"margin_dpo/margin_mean": 1.5020864009857178,
"margin_dpo/margin_std": 1.9342763423919678,
"step": 389
},
{
"epoch": 0.5895691609977324,
"fcm_dpo/beta": 0.7468278408050537,
"fcm_dpo/delta": 0.005739331711083651,
"fcm_dpo/margin": 1.3320647478103638,
"fcm_dpo/q_t": 0.3248102068901062,
"grad_norm": 146.38316345214844,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 0.2075425386428833,
"logits/rejected": 0.16074812412261963,
"logps/chosen": -48.055908203125,
"logps/ref_chosen": -45.871864318847656,
"logps/ref_rejected": -61.305999755859375,
"logps/rejected": -64.82211303710938,
"loss": 0.9054,
"margin_dpo/margin_mean": 1.3320646286010742,
"margin_dpo/margin_std": 1.6783784627914429,
"step": 390
},
{
"epoch": 0.5910808767951625,
"fcm_dpo/beta": 0.7326708436012268,
"fcm_dpo/delta": -0.18802016973495483,
"fcm_dpo/margin": 1.591862440109253,
"fcm_dpo/q_t": 0.31688395142555237,
"grad_norm": 162.29026794433594,
"learning_rate": 2.1573054278272636e-07,
"logits/chosen": 0.17586693167686462,
"logits/rejected": 0.12610454857349396,
"logps/chosen": -60.547569274902344,
"logps/ref_chosen": -58.18701171875,
"logps/ref_rejected": -83.63442993164062,
"logps/rejected": -87.58686065673828,
"loss": 1.01,
"margin_dpo/margin_mean": 1.5918623208999634,
"margin_dpo/margin_std": 2.3288512229919434,
"step": 391
},
{
"epoch": 0.5925925925925926,
"fcm_dpo/beta": 0.6947846412658691,
"fcm_dpo/delta": -0.25457581877708435,
"fcm_dpo/margin": 1.7612890005111694,
"fcm_dpo/q_t": 0.3048982322216034,
"grad_norm": 162.5065155029297,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": 0.24103191494941711,
"logits/rejected": 0.19386835396289825,
"logps/chosen": -71.8067626953125,
"logps/ref_chosen": -69.7445297241211,
"logps/ref_rejected": -94.05877685546875,
"logps/rejected": -97.8823013305664,
"loss": 0.9129,
"margin_dpo/margin_mean": 1.761289358139038,
"margin_dpo/margin_std": 2.361508846282959,
"step": 392
},
{
"epoch": 0.5941043083900227,
"fcm_dpo/beta": 0.6695666313171387,
"fcm_dpo/delta": -0.061286166310310364,
"fcm_dpo/margin": 1.5697075128555298,
"fcm_dpo/q_t": 0.30320626497268677,
"grad_norm": 139.36740112304688,
"learning_rate": 2.131130332936195e-07,
"logits/chosen": 0.20045891404151917,
"logits/rejected": 0.17006459832191467,
"logps/chosen": -54.7346076965332,
"logps/ref_chosen": -52.33489990234375,
"logps/ref_rejected": -74.33809661865234,
"logps/rejected": -78.30751037597656,
"loss": 0.8145,
"margin_dpo/margin_mean": 1.5697076320648193,
"margin_dpo/margin_std": 1.6137369871139526,
"step": 393
},
{
"epoch": 0.5956160241874527,
"fcm_dpo/beta": 0.683825671672821,
"fcm_dpo/delta": 0.04361763596534729,
"fcm_dpo/margin": 1.4036848545074463,
"fcm_dpo/q_t": 0.3363683223724365,
"grad_norm": 153.11077880859375,
"learning_rate": 2.1180580796331323e-07,
"logits/chosen": 0.22308960556983948,
"logits/rejected": 0.1974237710237503,
"logps/chosen": -63.075340270996094,
"logps/ref_chosen": -60.6761360168457,
"logps/ref_rejected": -71.36074829101562,
"logps/rejected": -75.16364288330078,
"loss": 0.9732,
"margin_dpo/margin_mean": 1.4036844968795776,
"margin_dpo/margin_std": 2.0274405479431152,
"step": 394
},
{
"epoch": 0.5971277399848829,
"fcm_dpo/beta": 0.6959511041641235,
"fcm_dpo/delta": 0.1088649109005928,
"fcm_dpo/margin": 1.2934165000915527,
"fcm_dpo/q_t": 0.3352372646331787,
"grad_norm": 158.16360473632812,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": 0.1776614785194397,
"logits/rejected": 0.117831751704216,
"logps/chosen": -52.74278259277344,
"logps/ref_chosen": -50.60432434082031,
"logps/ref_rejected": -77.08731079101562,
"logps/rejected": -80.51919555664062,
"loss": 0.9631,
"margin_dpo/margin_mean": 1.2934160232543945,
"margin_dpo/margin_std": 1.7340922355651855,
"step": 395
},
{
"epoch": 0.5986394557823129,
"fcm_dpo/beta": 0.6787519454956055,
"fcm_dpo/delta": -0.02714592218399048,
"fcm_dpo/margin": 1.4940762519836426,
"fcm_dpo/q_t": 0.31732630729675293,
"grad_norm": 145.00909423828125,
"learning_rate": 2.0919459895968517e-07,
"logits/chosen": 0.17002803087234497,
"logits/rejected": 0.09879619628190994,
"logps/chosen": -53.40643310546875,
"logps/ref_chosen": -51.35961151123047,
"logps/ref_rejected": -79.89360046386719,
"logps/rejected": -83.43449401855469,
"loss": 0.8661,
"margin_dpo/margin_mean": 1.4940763711929321,
"margin_dpo/margin_std": 1.6158559322357178,
"step": 396
},
{
"epoch": 0.600151171579743,
"fcm_dpo/beta": 0.7337859869003296,
"fcm_dpo/delta": 0.3556361794471741,
"fcm_dpo/margin": 0.9081060886383057,
"fcm_dpo/q_t": 0.37586018443107605,
"grad_norm": 224.95326232910156,
"learning_rate": 2.078906883274924e-07,
"logits/chosen": 0.12839001417160034,
"logits/rejected": 0.09453357756137848,
"logps/chosen": -68.88770294189453,
"logps/ref_chosen": -66.45622253417969,
"logps/ref_rejected": -85.74736785888672,
"logps/rejected": -89.08694458007812,
"loss": 1.3125,
"margin_dpo/margin_mean": 0.9081062078475952,
"margin_dpo/margin_std": 2.2067739963531494,
"step": 397
},
{
"epoch": 0.6016628873771731,
"fcm_dpo/beta": 0.7362475395202637,
"fcm_dpo/delta": -0.06381751596927643,
"fcm_dpo/margin": 1.4355955123901367,
"fcm_dpo/q_t": 0.3111931383609772,
"grad_norm": 139.69631958007812,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": 0.16362245380878448,
"logits/rejected": 0.11482103168964386,
"logps/chosen": -51.42768859863281,
"logps/ref_chosen": -49.244239807128906,
"logps/ref_rejected": -75.18949127197266,
"logps/rejected": -78.80854034423828,
"loss": 0.8542,
"margin_dpo/margin_mean": 1.4355957508087158,
"margin_dpo/margin_std": 1.6850342750549316,
"step": 398
},
{
"epoch": 0.6031746031746031,
"fcm_dpo/beta": 0.7134385108947754,
"fcm_dpo/delta": -0.1938168704509735,
"fcm_dpo/margin": 1.6412277221679688,
"fcm_dpo/q_t": 0.32044440507888794,
"grad_norm": 166.16122436523438,
"learning_rate": 2.052864371672457e-07,
"logits/chosen": 0.12605230510234833,
"logits/rejected": 0.031615402549505234,
"logps/chosen": -70.56350708007812,
"logps/ref_chosen": -68.30679321289062,
"logps/ref_rejected": -113.2708511352539,
"logps/rejected": -117.16879272460938,
"loss": 0.9153,
"margin_dpo/margin_mean": 1.641228437423706,
"margin_dpo/margin_std": 2.2904515266418457,
"step": 399
},
{
"epoch": 0.6046863189720333,
"fcm_dpo/beta": 0.7214968204498291,
"fcm_dpo/delta": 0.2427944540977478,
"fcm_dpo/margin": 1.0737788677215576,
"fcm_dpo/q_t": 0.37354040145874023,
"grad_norm": 195.83456420898438,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 0.18185412883758545,
"logits/rejected": 0.14102932810783386,
"logps/chosen": -74.3447036743164,
"logps/ref_chosen": -71.62649536132812,
"logps/ref_rejected": -90.98765563964844,
"logps/rejected": -94.77964782714844,
"loss": 1.0799,
"margin_dpo/margin_mean": 1.0737799406051636,
"margin_dpo/margin_std": 1.769069790840149,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_fcm_dpo/beta": 0.7420421838760376,
"eval_logits/chosen": 0.20529566705226898,
"eval_logits/rejected": 0.1674942821264267,
"eval_logps/chosen": -77.24707794189453,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -83.04328918457031,
"eval_loss": 0.5736358761787415,
"eval_margin_dpo/margin_mean": 1.1066911220550537,
"eval_margin_dpo/margin_std": 2.0390639305114746,
"eval_runtime": 37.9981,
"eval_samples_per_second": 60.608,
"eval_steps_per_second": 1.895,
"step": 400
},
{
"epoch": 0.6061980347694633,
"fcm_dpo/beta": 0.7018467783927917,
"fcm_dpo/delta": -0.38142985105514526,
"fcm_dpo/margin": 1.8959496021270752,
"fcm_dpo/q_t": 0.28283387422561646,
"grad_norm": 134.66848754882812,
"learning_rate": 2.0268718890989752e-07,
"logits/chosen": 0.19736307859420776,
"logits/rejected": 0.12394518405199051,
"logps/chosen": -55.94346618652344,
"logps/ref_chosen": -53.72495651245117,
"logps/ref_rejected": -75.06304931640625,
"logps/rejected": -79.17750549316406,
"loss": 0.7958,
"margin_dpo/margin_mean": 1.8959496021270752,
"margin_dpo/margin_std": 2.1448566913604736,
"step": 401
},
{
"epoch": 0.6077097505668935,
"fcm_dpo/beta": 0.6789320707321167,
"fcm_dpo/delta": -0.07589547336101532,
"fcm_dpo/margin": 1.5725529193878174,
"fcm_dpo/q_t": 0.31703245639801025,
"grad_norm": 150.985595703125,
"learning_rate": 2.013895317751323e-07,
"logits/chosen": 0.18823865056037903,
"logits/rejected": 0.155757337808609,
"logps/chosen": -64.17735290527344,
"logps/ref_chosen": -61.873931884765625,
"logps/ref_rejected": -66.15198516845703,
"logps/rejected": -70.0279541015625,
"loss": 0.8937,
"margin_dpo/margin_mean": 1.57255220413208,
"margin_dpo/margin_std": 1.990880012512207,
"step": 402
},
{
"epoch": 0.6092214663643235,
"fcm_dpo/beta": 0.6459471583366394,
"fcm_dpo/delta": -0.33136266469955444,
"fcm_dpo/margin": 1.995465636253357,
"fcm_dpo/q_t": 0.2850838303565979,
"grad_norm": 122.05958557128906,
"learning_rate": 2.0009323437965898e-07,
"logits/chosen": 0.254935085773468,
"logits/rejected": 0.19566068053245544,
"logps/chosen": -53.623878479003906,
"logps/ref_chosen": -51.321502685546875,
"logps/ref_rejected": -86.54010772705078,
"logps/rejected": -90.83795166015625,
"loss": 0.822,
"margin_dpo/margin_mean": 1.9954662322998047,
"margin_dpo/margin_std": 2.2620480060577393,
"step": 403
},
{
"epoch": 0.6107331821617535,
"fcm_dpo/beta": 0.6391547918319702,
"fcm_dpo/delta": 0.12487616389989853,
"fcm_dpo/margin": 1.3852322101593018,
"fcm_dpo/q_t": 0.33947524428367615,
"grad_norm": 162.3168487548828,
"learning_rate": 1.9879833298370237e-07,
"logits/chosen": 0.15734031796455383,
"logits/rejected": 0.0940411314368248,
"logps/chosen": -64.49229431152344,
"logps/ref_chosen": -62.26288604736328,
"logps/ref_rejected": -95.19029998779297,
"logps/rejected": -98.80493927001953,
"loss": 0.9692,
"margin_dpo/margin_mean": 1.3852319717407227,
"margin_dpo/margin_std": 1.8755829334259033,
"step": 404
},
{
"epoch": 0.6122448979591837,
"fcm_dpo/beta": 0.634131669998169,
"fcm_dpo/delta": 0.03272247314453125,
"fcm_dpo/margin": 1.5051655769348145,
"fcm_dpo/q_t": 0.3402136266231537,
"grad_norm": 127.78704071044922,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": 0.19416359066963196,
"logits/rejected": 0.16025002300739288,
"logps/chosen": -52.953269958496094,
"logps/ref_chosen": -50.5843391418457,
"logps/ref_rejected": -65.43156433105469,
"logps/rejected": -69.3056640625,
"loss": 0.968,
"margin_dpo/margin_mean": 1.5051651000976562,
"margin_dpo/margin_std": 2.0198493003845215,
"step": 405
},
{
"epoch": 0.6137566137566137,
"fcm_dpo/beta": 0.632315993309021,
"fcm_dpo/delta": -0.13053575158119202,
"fcm_dpo/margin": 1.755932331085205,
"fcm_dpo/q_t": 0.299167662858963,
"grad_norm": 111.40565490722656,
"learning_rate": 1.9621286303497914e-07,
"logits/chosen": 0.19205182790756226,
"logits/rejected": 0.0876828134059906,
"logps/chosen": -51.30841827392578,
"logps/ref_chosen": -48.99560546875,
"logps/ref_rejected": -92.47774505615234,
"logps/rejected": -96.54649353027344,
"loss": 0.8306,
"margin_dpo/margin_mean": 1.7559325695037842,
"margin_dpo/margin_std": 1.8786392211914062,
"step": 406
},
{
"epoch": 0.6152683295540439,
"fcm_dpo/beta": 0.657385528087616,
"fcm_dpo/delta": 0.21200606226921082,
"fcm_dpo/margin": 1.224708914756775,
"fcm_dpo/q_t": 0.3558533787727356,
"grad_norm": 209.7600555419922,
"learning_rate": 1.9492236680336483e-07,
"logits/chosen": 0.13539977371692657,
"logits/rejected": 0.08569268882274628,
"logps/chosen": -92.08710479736328,
"logps/ref_chosen": -89.40056610107422,
"logps/ref_rejected": -99.28775024414062,
"logps/rejected": -103.19900512695312,
"loss": 1.0517,
"margin_dpo/margin_mean": 1.224708914756775,
"margin_dpo/margin_std": 2.031216621398926,
"step": 407
},
{
"epoch": 0.6167800453514739,
"fcm_dpo/beta": 0.6533396244049072,
"fcm_dpo/delta": -0.2026488482952118,
"fcm_dpo/margin": 1.8033205270767212,
"fcm_dpo/q_t": 0.2949105501174927,
"grad_norm": 119.43750762939453,
"learning_rate": 1.9363341121154895e-07,
"logits/chosen": 0.1879599392414093,
"logits/rejected": 0.1291370391845703,
"logps/chosen": -56.83644485473633,
"logps/ref_chosen": -54.70391845703125,
"logps/ref_rejected": -73.98648834228516,
"logps/rejected": -77.92233276367188,
"loss": 0.8189,
"margin_dpo/margin_mean": 1.8033205270767212,
"margin_dpo/margin_std": 1.967519998550415,
"step": 408
},
{
"epoch": 0.618291761148904,
"fcm_dpo/beta": 0.675295352935791,
"fcm_dpo/delta": 0.34728795289993286,
"fcm_dpo/margin": 0.9984000325202942,
"fcm_dpo/q_t": 0.3867419362068176,
"grad_norm": 168.26300048828125,
"learning_rate": 1.9234603231438994e-07,
"logits/chosen": 0.1908985674381256,
"logits/rejected": 0.19027680158615112,
"logps/chosen": -64.74275970458984,
"logps/ref_chosen": -62.11822509765625,
"logps/ref_rejected": -61.933509826660156,
"logps/rejected": -65.55644226074219,
"loss": 1.1776,
"margin_dpo/margin_mean": 0.9983994960784912,
"margin_dpo/margin_std": 2.0483100414276123,
"step": 409
},
{
"epoch": 0.6198034769463341,
"fcm_dpo/beta": 0.6897294521331787,
"fcm_dpo/delta": 0.15048189461231232,
"fcm_dpo/margin": 1.2474337816238403,
"fcm_dpo/q_t": 0.3310784697532654,
"grad_norm": 171.6697998046875,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 0.19930198788642883,
"logits/rejected": 0.1775505691766739,
"logps/chosen": -64.07925415039062,
"logps/ref_chosen": -61.80266189575195,
"logps/ref_rejected": -76.60002136230469,
"logps/rejected": -80.12403869628906,
"loss": 0.9295,
"margin_dpo/margin_mean": 1.2474339008331299,
"margin_dpo/margin_std": 1.4843469858169556,
"step": 410
},
{
"epoch": 0.6213151927437641,
"fcm_dpo/beta": 0.6963478326797485,
"fcm_dpo/delta": -0.008774511516094208,
"fcm_dpo/margin": 1.4437086582183838,
"fcm_dpo/q_t": 0.3453066349029541,
"grad_norm": 181.04974365234375,
"learning_rate": 1.8977614860195296e-07,
"logits/chosen": 0.15856947004795074,
"logits/rejected": 0.1135367900133133,
"logps/chosen": -57.06800842285156,
"logps/ref_chosen": -54.44539260864258,
"logps/ref_rejected": -74.5650863647461,
"logps/rejected": -78.63140869140625,
"loss": 1.0746,
"margin_dpo/margin_mean": 1.4437092542648315,
"margin_dpo/margin_std": 2.3472909927368164,
"step": 411
},
{
"epoch": 0.6228269085411943,
"fcm_dpo/beta": 0.6941448450088501,
"fcm_dpo/delta": -0.037317849695682526,
"fcm_dpo/margin": 1.4864020347595215,
"fcm_dpo/q_t": 0.3218909502029419,
"grad_norm": 150.66571044921875,
"learning_rate": 1.8849371567184662e-07,
"logits/chosen": 0.15843887627124786,
"logits/rejected": 0.1075374186038971,
"logps/chosen": -58.101863861083984,
"logps/ref_chosen": -55.248085021972656,
"logps/ref_rejected": -68.96623229980469,
"logps/rejected": -73.30641174316406,
"loss": 0.9281,
"margin_dpo/margin_mean": 1.4864020347595215,
"margin_dpo/margin_std": 1.928009033203125,
"step": 412
},
{
"epoch": 0.6243386243386243,
"fcm_dpo/beta": 0.7043063640594482,
"fcm_dpo/delta": 0.07277508080005646,
"fcm_dpo/margin": 1.3262073993682861,
"fcm_dpo/q_t": 0.3529996871948242,
"grad_norm": 191.52565002441406,
"learning_rate": 1.872130032047302e-07,
"logits/chosen": 0.07102949917316437,
"logits/rejected": 0.04926881939172745,
"logps/chosen": -71.4292984008789,
"logps/ref_chosen": -68.72074890136719,
"logps/ref_rejected": -78.76539611816406,
"logps/rejected": -82.80016326904297,
"loss": 1.102,
"margin_dpo/margin_mean": 1.3262066841125488,
"margin_dpo/margin_std": 2.299673557281494,
"step": 413
},
{
"epoch": 0.6258503401360545,
"fcm_dpo/beta": 0.6957840919494629,
"fcm_dpo/delta": -0.08349283784627914,
"fcm_dpo/margin": 1.542493462562561,
"fcm_dpo/q_t": 0.3177918791770935,
"grad_norm": 144.82449340820312,
"learning_rate": 1.8593404702488436e-07,
"logits/chosen": 0.17691153287887573,
"logits/rejected": 0.12794461846351624,
"logps/chosen": -56.64301300048828,
"logps/ref_chosen": -54.138214111328125,
"logps/ref_rejected": -74.65741729736328,
"logps/rejected": -78.7047119140625,
"loss": 0.9185,
"margin_dpo/margin_mean": 1.542493462562561,
"margin_dpo/margin_std": 2.0337252616882324,
"step": 414
},
{
"epoch": 0.6273620559334845,
"fcm_dpo/beta": 0.7059125900268555,
"fcm_dpo/delta": 0.12531909346580505,
"fcm_dpo/margin": 1.2547566890716553,
"fcm_dpo/q_t": 0.3529791235923767,
"grad_norm": 175.94845581054688,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": 0.17382574081420898,
"logits/rejected": 0.15772220492362976,
"logps/chosen": -58.64318084716797,
"logps/ref_chosen": -55.91856002807617,
"logps/ref_rejected": -61.747703552246094,
"logps/rejected": -65.72708129882812,
"loss": 1.1261,
"margin_dpo/margin_mean": 1.2547566890716553,
"margin_dpo/margin_std": 2.235924243927002,
"step": 415
},
{
"epoch": 0.6288737717309146,
"fcm_dpo/beta": 0.7500655651092529,
"fcm_dpo/delta": 0.13376402854919434,
"fcm_dpo/margin": 1.1456831693649292,
"fcm_dpo/q_t": 0.356852650642395,
"grad_norm": 205.03404235839844,
"learning_rate": 1.8338154657749128e-07,
"logits/chosen": 0.18582022190093994,
"logits/rejected": 0.1482175588607788,
"logps/chosen": -57.20629119873047,
"logps/ref_chosen": -54.72308349609375,
"logps/ref_rejected": -69.17388916015625,
"logps/rejected": -72.80278015136719,
"loss": 1.154,
"margin_dpo/margin_mean": 1.1456834077835083,
"margin_dpo/margin_std": 2.031163454055786,
"step": 416
},
{
"epoch": 0.6303854875283447,
"fcm_dpo/beta": 0.7249786853790283,
"fcm_dpo/delta": -0.24252735078334808,
"fcm_dpo/margin": 1.6672532558441162,
"fcm_dpo/q_t": 0.29793938994407654,
"grad_norm": 194.7884521484375,
"learning_rate": 1.8210807370886849e-07,
"logits/chosen": 0.24135205149650574,
"logits/rejected": 0.19347181916236877,
"logps/chosen": -59.584938049316406,
"logps/ref_chosen": -56.791259765625,
"logps/ref_rejected": -68.7791748046875,
"logps/rejected": -73.2401123046875,
"loss": 0.9246,
"margin_dpo/margin_mean": 1.6672537326812744,
"margin_dpo/margin_std": 2.092496395111084,
"step": 417
},
{
"epoch": 0.6318972033257747,
"fcm_dpo/beta": 0.7283662557601929,
"fcm_dpo/delta": 0.3391045331954956,
"fcm_dpo/margin": 0.9409202337265015,
"fcm_dpo/q_t": 0.3989126980304718,
"grad_norm": 244.12945556640625,
"learning_rate": 1.8083649992336825e-07,
"logits/chosen": 0.19916735589504242,
"logits/rejected": 0.2042698860168457,
"logps/chosen": -72.2450942993164,
"logps/ref_chosen": -69.10798645019531,
"logps/ref_rejected": -75.09132385253906,
"logps/rejected": -79.16935729980469,
"loss": 1.3312,
"margin_dpo/margin_mean": 0.9409199357032776,
"margin_dpo/margin_std": 2.4089250564575195,
"step": 418
},
{
"epoch": 0.6334089191232048,
"fcm_dpo/beta": 0.7232016324996948,
"fcm_dpo/delta": -0.157709002494812,
"fcm_dpo/margin": 1.570444941520691,
"fcm_dpo/q_t": 0.3125431537628174,
"grad_norm": 159.44833374023438,
"learning_rate": 1.7956686078964255e-07,
"logits/chosen": 0.09053687751293182,
"logits/rejected": 0.04949123412370682,
"logps/chosen": -60.52400588989258,
"logps/ref_chosen": -58.1717643737793,
"logps/ref_rejected": -71.67066955566406,
"logps/rejected": -75.59335327148438,
"loss": 0.9202,
"margin_dpo/margin_mean": 1.5704445838928223,
"margin_dpo/margin_std": 2.1407108306884766,
"step": 419
},
{
"epoch": 0.6349206349206349,
"fcm_dpo/beta": 0.7380191683769226,
"fcm_dpo/delta": 0.1838487833738327,
"fcm_dpo/margin": 1.1262156963348389,
"fcm_dpo/q_t": 0.37532341480255127,
"grad_norm": 189.95645141601562,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 0.15650036931037903,
"logits/rejected": 0.11561809480190277,
"logps/chosen": -60.085960388183594,
"logps/ref_chosen": -57.05351257324219,
"logps/ref_rejected": -62.670982360839844,
"logps/rejected": -66.82964324951172,
"loss": 1.2387,
"margin_dpo/margin_mean": 1.126215934753418,
"margin_dpo/margin_std": 2.343519687652588,
"step": 420
},
{
"epoch": 0.636432350718065,
"fcm_dpo/beta": 0.7731253504753113,
"fcm_dpo/delta": 0.1831362247467041,
"fcm_dpo/margin": 1.076323390007019,
"fcm_dpo/q_t": 0.3773455023765564,
"grad_norm": 191.1004180908203,
"learning_rate": 1.7703352848054887e-07,
"logits/chosen": 0.13414627313613892,
"logits/rejected": 0.09018285572528839,
"logps/chosen": -60.364906311035156,
"logps/ref_chosen": -57.32324981689453,
"logps/ref_rejected": -75.33782958984375,
"logps/rejected": -79.455810546875,
"loss": 1.3191,
"margin_dpo/margin_mean": 1.0763235092163086,
"margin_dpo/margin_std": 2.4303441047668457,
"step": 421
},
{
"epoch": 0.6379440665154951,
"fcm_dpo/beta": 0.7678795456886292,
"fcm_dpo/delta": -0.15709903836250305,
"fcm_dpo/margin": 1.4837114810943604,
"fcm_dpo/q_t": 0.3305337727069855,
"grad_norm": 208.15638732910156,
"learning_rate": 1.7576990616793137e-07,
"logits/chosen": 0.18905231356620789,
"logits/rejected": 0.15734535455703735,
"logps/chosen": -69.5533447265625,
"logps/ref_chosen": -67.05757141113281,
"logps/ref_rejected": -72.12803649902344,
"logps/rejected": -76.10751342773438,
"loss": 1.0168,
"margin_dpo/margin_mean": 1.4837113618850708,
"margin_dpo/margin_std": 2.3469998836517334,
"step": 422
},
{
"epoch": 0.6394557823129252,
"fcm_dpo/beta": 0.7414518594741821,
"fcm_dpo/delta": -0.12381698191165924,
"fcm_dpo/margin": 1.4967904090881348,
"fcm_dpo/q_t": 0.3256801962852478,
"grad_norm": 163.99639892578125,
"learning_rate": 1.745083602306071e-07,
"logits/chosen": 0.17420369386672974,
"logits/rejected": 0.12361004948616028,
"logps/chosen": -56.651309967041016,
"logps/ref_chosen": -54.06167221069336,
"logps/ref_rejected": -76.64092254638672,
"logps/rejected": -80.72735595703125,
"loss": 1.0145,
"margin_dpo/margin_mean": 1.4967900514602661,
"margin_dpo/margin_std": 2.2581710815429688,
"step": 423
},
{
"epoch": 0.6409674981103552,
"fcm_dpo/beta": 0.7214820384979248,
"fcm_dpo/delta": -0.13758614659309387,
"fcm_dpo/margin": 1.5547800064086914,
"fcm_dpo/q_t": 0.3215063512325287,
"grad_norm": 178.93624877929688,
"learning_rate": 1.7324892595672804e-07,
"logits/chosen": 0.12239620089530945,
"logits/rejected": 0.09341743588447571,
"logps/chosen": -56.10038757324219,
"logps/ref_chosen": -53.60887145996094,
"logps/ref_rejected": -79.2139892578125,
"logps/rejected": -83.26029205322266,
"loss": 0.9368,
"margin_dpo/margin_mean": 1.554780125617981,
"margin_dpo/margin_std": 2.143885612487793,
"step": 424
},
{
"epoch": 0.6424792139077853,
"fcm_dpo/beta": 0.7137551307678223,
"fcm_dpo/delta": -0.05110887810587883,
"fcm_dpo/margin": 1.4651919603347778,
"fcm_dpo/q_t": 0.32126736640930176,
"grad_norm": 145.7388153076172,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": 0.18603307008743286,
"logits/rejected": 0.16199590265750885,
"logps/chosen": -60.9288330078125,
"logps/ref_chosen": -58.41468048095703,
"logps/ref_rejected": -66.59054565429688,
"logps/rejected": -70.56989288330078,
"loss": 0.9247,
"margin_dpo/margin_mean": 1.4651916027069092,
"margin_dpo/margin_std": 1.9558470249176025,
"step": 425
},
{
"epoch": 0.6439909297052154,
"fcm_dpo/beta": 0.7687985301017761,
"fcm_dpo/delta": 0.5285735130310059,
"fcm_dpo/margin": 0.6476625204086304,
"fcm_dpo/q_t": 0.4075689911842346,
"grad_norm": 223.34030151367188,
"learning_rate": 1.7073653325558828e-07,
"logits/chosen": 0.14943718910217285,
"logits/rejected": 0.15013810992240906,
"logps/chosen": -74.68634033203125,
"logps/ref_chosen": -71.70822143554688,
"logps/ref_rejected": -73.57725524902344,
"logps/rejected": -77.20303344726562,
"loss": 1.3908,
"margin_dpo/margin_mean": 0.6476625800132751,
"margin_dpo/margin_std": 1.9961354732513428,
"step": 426
},
{
"epoch": 0.6455026455026455,
"fcm_dpo/beta": 0.7852897644042969,
"fcm_dpo/delta": -0.0677163228392601,
"fcm_dpo/margin": 1.3504165410995483,
"fcm_dpo/q_t": 0.33633100986480713,
"grad_norm": 163.32342529296875,
"learning_rate": 1.6948364510535218e-07,
"logits/chosen": 0.20361992716789246,
"logits/rejected": 0.16401880979537964,
"logps/chosen": -61.42845916748047,
"logps/ref_chosen": -58.64276885986328,
"logps/ref_rejected": -86.25437927246094,
"logps/rejected": -90.39048767089844,
"loss": 1.0482,
"margin_dpo/margin_mean": 1.350417137145996,
"margin_dpo/margin_std": 2.189502716064453,
"step": 427
},
{
"epoch": 0.6470143613000756,
"fcm_dpo/beta": 0.7506411671638489,
"fcm_dpo/delta": -0.2936919033527374,
"fcm_dpo/margin": 1.6753690242767334,
"fcm_dpo/q_t": 0.29713624715805054,
"grad_norm": 168.94140625,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": 0.1335376501083374,
"logits/rejected": 0.09132213890552521,
"logps/chosen": -69.21764373779297,
"logps/ref_chosen": -66.5960464477539,
"logps/ref_rejected": -82.3941650390625,
"logps/rejected": -86.69113159179688,
"loss": 0.8538,
"margin_dpo/margin_mean": 1.6753690242767334,
"margin_dpo/margin_std": 2.044569969177246,
"step": 428
},
{
"epoch": 0.6485260770975056,
"fcm_dpo/beta": 0.730757474899292,
"fcm_dpo/delta": -0.06324490904808044,
"fcm_dpo/margin": 1.4457752704620361,
"fcm_dpo/q_t": 0.3198990225791931,
"grad_norm": 177.0709991455078,
"learning_rate": 1.669846604344412e-07,
"logits/chosen": 0.13211305439472198,
"logits/rejected": 0.13196702301502228,
"logps/chosen": -59.602317810058594,
"logps/ref_chosen": -57.00970458984375,
"logps/ref_rejected": -59.86549377441406,
"logps/rejected": -63.90388488769531,
"loss": 0.969,
"margin_dpo/margin_mean": 1.4457753896713257,
"margin_dpo/margin_std": 2.0829176902770996,
"step": 429
},
{
"epoch": 0.6500377928949358,
"fcm_dpo/beta": 0.733278751373291,
"fcm_dpo/delta": 0.05039960518479347,
"fcm_dpo/margin": 1.3011215925216675,
"fcm_dpo/q_t": 0.3425254225730896,
"grad_norm": 174.14768981933594,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 0.08477595448493958,
"logits/rejected": 0.0728166401386261,
"logps/chosen": -61.990821838378906,
"logps/ref_chosen": -59.563194274902344,
"logps/ref_rejected": -70.52289581298828,
"logps/rejected": -74.25164794921875,
"loss": 1.0004,
"margin_dpo/margin_mean": 1.3011209964752197,
"margin_dpo/margin_std": 1.9769561290740967,
"step": 430
},
{
"epoch": 0.6515495086923658,
"fcm_dpo/beta": 0.7297165393829346,
"fcm_dpo/delta": 0.036733031272888184,
"fcm_dpo/margin": 1.3228065967559814,
"fcm_dpo/q_t": 0.3405199646949768,
"grad_norm": 171.15907287597656,
"learning_rate": 1.6449496416858282e-07,
"logits/chosen": 0.17870327830314636,
"logits/rejected": 0.14549797773361206,
"logps/chosen": -52.56354904174805,
"logps/ref_chosen": -50.20032501220703,
"logps/ref_rejected": -77.81680297851562,
"logps/rejected": -81.50283813476562,
"loss": 1.0051,
"margin_dpo/margin_mean": 1.3228061199188232,
"margin_dpo/margin_std": 2.025378704071045,
"step": 431
},
{
"epoch": 0.6530612244897959,
"fcm_dpo/beta": 0.7157025337219238,
"fcm_dpo/delta": -0.10452289134263992,
"fcm_dpo/margin": 1.518718957901001,
"fcm_dpo/q_t": 0.32164376974105835,
"grad_norm": 169.30746459960938,
"learning_rate": 1.632536862810844e-07,
"logits/chosen": 0.17181310057640076,
"logits/rejected": 0.1360006034374237,
"logps/chosen": -64.20341491699219,
"logps/ref_chosen": -61.662757873535156,
"logps/ref_rejected": -83.94496154785156,
"logps/rejected": -88.00434112548828,
"loss": 0.9613,
"margin_dpo/margin_mean": 1.5187186002731323,
"margin_dpo/margin_std": 2.0636777877807617,
"step": 432
},
{
"epoch": 0.654572940287226,
"fcm_dpo/beta": 0.6925790309906006,
"fcm_dpo/delta": -0.28451234102249146,
"fcm_dpo/margin": 1.802809476852417,
"fcm_dpo/q_t": 0.3154519498348236,
"grad_norm": 145.2095947265625,
"learning_rate": 1.6201483487445515e-07,
"logits/chosen": 0.24505794048309326,
"logits/rejected": 0.23090487718582153,
"logps/chosen": -66.46299743652344,
"logps/ref_chosen": -63.72917938232422,
"logps/ref_rejected": -65.8391342163086,
"logps/rejected": -70.3757553100586,
"loss": 0.9081,
"margin_dpo/margin_mean": 1.802809476852417,
"margin_dpo/margin_std": 2.475156784057617,
"step": 433
},
{
"epoch": 0.656084656084656,
"fcm_dpo/beta": 0.659028947353363,
"fcm_dpo/delta": -0.13485188782215118,
"fcm_dpo/margin": 1.6889315843582153,
"fcm_dpo/q_t": 0.31248465180397034,
"grad_norm": 130.46426391601562,
"learning_rate": 1.6077844460203204e-07,
"logits/chosen": 0.21205420792102814,
"logits/rejected": 0.16366201639175415,
"logps/chosen": -50.280784606933594,
"logps/ref_chosen": -47.97331619262695,
"logps/ref_rejected": -72.51132202148438,
"logps/rejected": -76.50772857666016,
"loss": 0.9792,
"margin_dpo/margin_mean": 1.6889313459396362,
"margin_dpo/margin_std": 2.39209246635437,
"step": 434
},
{
"epoch": 0.6575963718820862,
"fcm_dpo/beta": 0.6766383647918701,
"fcm_dpo/delta": 0.04958092421293259,
"fcm_dpo/margin": 1.4080250263214111,
"fcm_dpo/q_t": 0.3341384530067444,
"grad_norm": 151.441162109375,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": 0.22931721806526184,
"logits/rejected": 0.1980956494808197,
"logps/chosen": -59.903053283691406,
"logps/ref_chosen": -57.06024932861328,
"logps/ref_rejected": -71.69146728515625,
"logps/rejected": -75.94229888916016,
"loss": 0.9779,
"margin_dpo/margin_mean": 1.4080252647399902,
"margin_dpo/margin_std": 2.0040295124053955,
"step": 435
},
{
"epoch": 0.6591080876795162,
"fcm_dpo/beta": 0.6710116863250732,
"fcm_dpo/delta": -0.004951075650751591,
"fcm_dpo/margin": 1.4969241619110107,
"fcm_dpo/q_t": 0.3257167935371399,
"grad_norm": 149.94261169433594,
"learning_rate": 1.5831318572796847e-07,
"logits/chosen": 0.16502049565315247,
"logits/rejected": 0.1188136488199234,
"logps/chosen": -58.86479949951172,
"logps/ref_chosen": -56.158050537109375,
"logps/ref_rejected": -67.63787841796875,
"logps/rejected": -71.841552734375,
"loss": 0.9499,
"margin_dpo/margin_mean": 1.4969241619110107,
"margin_dpo/margin_std": 2.013727903366089,
"step": 436
},
{
"epoch": 0.6606198034769464,
"fcm_dpo/beta": 0.6555310487747192,
"fcm_dpo/delta": 0.013455048203468323,
"fcm_dpo/margin": 1.48863685131073,
"fcm_dpo/q_t": 0.3591005504131317,
"grad_norm": 169.48338317871094,
"learning_rate": 1.5708438608491815e-07,
"logits/chosen": 0.15093836188316345,
"logits/rejected": 0.05515362694859505,
"logps/chosen": -59.8901252746582,
"logps/ref_chosen": -56.98578643798828,
"logps/ref_rejected": -85.61524963378906,
"logps/rejected": -90.00823974609375,
"loss": 1.1728,
"margin_dpo/margin_mean": 1.4886366128921509,
"margin_dpo/margin_std": 2.654061794281006,
"step": 437
},
{
"epoch": 0.6621315192743764,
"fcm_dpo/beta": 0.6835014820098877,
"fcm_dpo/delta": 0.04397985339164734,
"fcm_dpo/margin": 1.4018324613571167,
"fcm_dpo/q_t": 0.3333927392959595,
"grad_norm": 121.23139190673828,
"learning_rate": 1.558581854913253e-07,
"logits/chosen": 0.19322752952575684,
"logits/rejected": 0.14423127472400665,
"logps/chosen": -43.83991241455078,
"logps/ref_chosen": -41.27777862548828,
"logps/ref_rejected": -65.33840942382812,
"logps/rejected": -69.3023681640625,
"loss": 0.9826,
"margin_dpo/margin_mean": 1.401832938194275,
"margin_dpo/margin_std": 1.9659230709075928,
"step": 438
},
{
"epoch": 0.6636432350718064,
"fcm_dpo/beta": 0.6828280091285706,
"fcm_dpo/delta": -0.04300057888031006,
"fcm_dpo/margin": 1.5173701047897339,
"fcm_dpo/q_t": 0.31167930364608765,
"grad_norm": 186.42234802246094,
"learning_rate": 1.5463461824665658e-07,
"logits/chosen": 0.11366529762744904,
"logits/rejected": 0.09049699455499649,
"logps/chosen": -83.722900390625,
"logps/ref_chosen": -81.41764831542969,
"logps/ref_rejected": -94.72309875488281,
"logps/rejected": -98.54571533203125,
"loss": 0.9309,
"margin_dpo/margin_mean": 1.5173696279525757,
"margin_dpo/margin_std": 1.9623498916625977,
"step": 439
},
{
"epoch": 0.6651549508692366,
"fcm_dpo/beta": 0.65775465965271,
"fcm_dpo/delta": -0.16628237068653107,
"fcm_dpo/margin": 1.7444255352020264,
"fcm_dpo/q_t": 0.30622392892837524,
"grad_norm": 129.40713500976562,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 0.1228909119963646,
"logits/rejected": 0.04697669297456741,
"logps/chosen": -44.876808166503906,
"logps/ref_chosen": -42.538185119628906,
"logps/ref_rejected": -69.78813934326172,
"logps/rejected": -73.87118530273438,
"loss": 0.8618,
"margin_dpo/margin_mean": 1.744425654411316,
"margin_dpo/margin_std": 2.120790481567383,
"step": 440
},
{
"epoch": 0.6666666666666666,
"fcm_dpo/beta": 0.6342558860778809,
"fcm_dpo/delta": -0.15066742897033691,
"fcm_dpo/margin": 1.7861425876617432,
"fcm_dpo/q_t": 0.292005717754364,
"grad_norm": 122.53746032714844,
"learning_rate": 1.521955206326976e-07,
"logits/chosen": 0.14506568014621735,
"logits/rejected": 0.07649335265159607,
"logps/chosen": -59.81481170654297,
"logps/ref_chosen": -57.593223571777344,
"logps/ref_rejected": -84.82878875732422,
"logps/rejected": -88.83651733398438,
"loss": 0.7795,
"margin_dpo/margin_mean": 1.7861430644989014,
"margin_dpo/margin_std": 1.8194975852966309,
"step": 441
},
{
"epoch": 0.6681783824640968,
"fcm_dpo/beta": 0.6411465406417847,
"fcm_dpo/delta": 0.09939359128475189,
"fcm_dpo/margin": 1.418702483177185,
"fcm_dpo/q_t": 0.34212759137153625,
"grad_norm": 168.3711395263672,
"learning_rate": 1.5098005849021078e-07,
"logits/chosen": 0.19873833656311035,
"logits/rejected": 0.16300469636917114,
"logps/chosen": -70.19367980957031,
"logps/ref_chosen": -67.46121978759766,
"logps/ref_rejected": -89.0693588256836,
"logps/rejected": -93.22052001953125,
"loss": 0.9856,
"margin_dpo/margin_mean": 1.418702483177185,
"margin_dpo/margin_std": 2.081653594970703,
"step": 442
},
{
"epoch": 0.6696900982615268,
"fcm_dpo/beta": 0.6237589120864868,
"fcm_dpo/delta": -0.24319452047348022,
"fcm_dpo/margin": 1.9465469121932983,
"fcm_dpo/q_t": 0.29789623618125916,
"grad_norm": 127.22098541259766,
"learning_rate": 1.4976736614834662e-07,
"logits/chosen": 0.18308596312999725,
"logits/rejected": 0.12943433225154877,
"logps/chosen": -57.22307586669922,
"logps/ref_chosen": -54.79610061645508,
"logps/ref_rejected": -77.80781555175781,
"logps/rejected": -82.18133544921875,
"loss": 0.9041,
"margin_dpo/margin_mean": 1.946547269821167,
"margin_dpo/margin_std": 2.4835057258605957,
"step": 443
},
{
"epoch": 0.671201814058957,
"fcm_dpo/beta": 0.661973237991333,
"fcm_dpo/delta": 0.5227335691452026,
"fcm_dpo/margin": 0.7623451948165894,
"fcm_dpo/q_t": 0.4185434579849243,
"grad_norm": 196.5355224609375,
"learning_rate": 1.4855747752871654e-07,
"logits/chosen": 0.18811815977096558,
"logits/rejected": 0.1319284737110138,
"logps/chosen": -61.6188850402832,
"logps/ref_chosen": -58.749061584472656,
"logps/ref_rejected": -86.87396240234375,
"logps/rejected": -90.50614166259766,
"loss": 1.3947,
"margin_dpo/margin_mean": 0.7623450756072998,
"margin_dpo/margin_std": 2.2845544815063477,
"step": 444
},
{
"epoch": 0.672713529856387,
"fcm_dpo/beta": 0.6808522343635559,
"fcm_dpo/delta": -0.0005050599575042725,
"fcm_dpo/margin": 1.4694095849990845,
"fcm_dpo/q_t": 0.3312973380088806,
"grad_norm": 179.10009765625,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": 0.1777781844139099,
"logits/rejected": 0.16238978505134583,
"logps/chosen": -64.01307678222656,
"logps/ref_chosen": -60.91743850708008,
"logps/ref_rejected": -71.5637435913086,
"logps/rejected": -76.1287841796875,
"loss": 1.0013,
"margin_dpo/margin_mean": 1.4694093465805054,
"margin_dpo/margin_std": 2.1590332984924316,
"step": 445
},
{
"epoch": 0.674225245653817,
"fcm_dpo/beta": 0.6526922583580017,
"fcm_dpo/delta": -0.4451631009578705,
"fcm_dpo/margin": 2.119361162185669,
"fcm_dpo/q_t": 0.25671201944351196,
"grad_norm": 107.62913513183594,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": 0.19539491832256317,
"logits/rejected": 0.13630658388137817,
"logps/chosen": -51.429866790771484,
"logps/ref_chosen": -48.79924774169922,
"logps/ref_rejected": -71.8719482421875,
"logps/rejected": -76.62193298339844,
"loss": 0.6997,
"margin_dpo/margin_mean": 2.1193606853485107,
"margin_dpo/margin_std": 1.913917064666748,
"step": 446
},
{
"epoch": 0.6757369614512472,
"fcm_dpo/beta": 0.6015419363975525,
"fcm_dpo/delta": -0.2893209457397461,
"fcm_dpo/margin": 2.0844779014587402,
"fcm_dpo/q_t": 0.27232983708381653,
"grad_norm": 112.18714141845703,
"learning_rate": 1.4494497203727843e-07,
"logits/chosen": 0.1319853812456131,
"logits/rejected": 0.05933520570397377,
"logps/chosen": -55.8741569519043,
"logps/ref_chosen": -53.682716369628906,
"logps/ref_rejected": -88.17315673828125,
"logps/rejected": -92.44908142089844,
"loss": 0.8532,
"margin_dpo/margin_mean": 2.084477424621582,
"margin_dpo/margin_std": 2.4492878913879395,
"step": 447
},
{
"epoch": 0.6772486772486772,
"fcm_dpo/beta": 0.5884913206100464,
"fcm_dpo/delta": -8.67573544383049e-06,
"fcm_dpo/margin": 1.6992573738098145,
"fcm_dpo/q_t": 0.3187348246574402,
"grad_norm": 115.34869384765625,
"learning_rate": 1.4374663593999256e-07,
"logits/chosen": 0.17938536405563354,
"logits/rejected": 0.14003482460975647,
"logps/chosen": -56.258331298828125,
"logps/ref_chosen": -53.75125503540039,
"logps/ref_rejected": -77.17623901367188,
"logps/rejected": -81.382568359375,
"loss": 0.8966,
"margin_dpo/margin_mean": 1.6992576122283936,
"margin_dpo/margin_std": 2.116860866546631,
"step": 448
},
{
"epoch": 0.6787603930461074,
"fcm_dpo/beta": 0.6253612041473389,
"fcm_dpo/delta": 0.423923134803772,
"fcm_dpo/margin": 0.9630190134048462,
"fcm_dpo/q_t": 0.38473382592201233,
"grad_norm": 156.52899169921875,
"learning_rate": 1.4255127197770707e-07,
"logits/chosen": 0.07655443251132965,
"logits/rejected": 0.061458148062229156,
"logps/chosen": -78.82083129882812,
"logps/ref_chosen": -75.82737731933594,
"logps/ref_rejected": -82.20687866210938,
"logps/rejected": -86.16334533691406,
"loss": 1.1046,
"margin_dpo/margin_mean": 0.9630191326141357,
"margin_dpo/margin_std": 1.6913801431655884,
"step": 449
},
{
"epoch": 0.6802721088435374,
"fcm_dpo/beta": 0.6548283100128174,
"fcm_dpo/delta": 0.12034881114959717,
"fcm_dpo/margin": 1.3581256866455078,
"fcm_dpo/q_t": 0.35161659121513367,
"grad_norm": 150.02163696289062,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 0.21886947751045227,
"logits/rejected": 0.1426752656698227,
"logps/chosen": -49.74790954589844,
"logps/ref_chosen": -47.11572265625,
"logps/ref_rejected": -78.7546615600586,
"logps/rejected": -82.7449722290039,
"loss": 1.0203,
"margin_dpo/margin_mean": 1.3581254482269287,
"margin_dpo/margin_std": 2.10986590385437,
"step": 450
},
{
"epoch": 0.6817838246409675,
"fcm_dpo/beta": 0.6705623865127563,
"fcm_dpo/delta": 0.17599515616893768,
"fcm_dpo/margin": 1.2513468265533447,
"fcm_dpo/q_t": 0.3569420576095581,
"grad_norm": 179.85128784179688,
"learning_rate": 1.4016959412166437e-07,
"logits/chosen": 0.18319055438041687,
"logits/rejected": 0.14441323280334473,
"logps/chosen": -66.11190795898438,
"logps/ref_chosen": -63.350440979003906,
"logps/ref_rejected": -76.28530883789062,
"logps/rejected": -80.29811096191406,
"loss": 1.0998,
"margin_dpo/margin_mean": 1.251347541809082,
"margin_dpo/margin_std": 2.137026786804199,
"step": 451
},
{
"epoch": 0.6832955404383976,
"fcm_dpo/beta": 0.6785616874694824,
"fcm_dpo/delta": -0.009446687065064907,
"fcm_dpo/margin": 1.486222743988037,
"fcm_dpo/q_t": 0.32352200150489807,
"grad_norm": 165.91653442382812,
"learning_rate": 1.3898334684855645e-07,
"logits/chosen": 0.13353146612644196,
"logits/rejected": 0.08063468337059021,
"logps/chosen": -58.241943359375,
"logps/ref_chosen": -55.58583450317383,
"logps/ref_rejected": -77.68738555908203,
"logps/rejected": -81.8297119140625,
"loss": 0.9588,
"margin_dpo/margin_mean": 1.4862233400344849,
"margin_dpo/margin_std": 2.007995128631592,
"step": 452
},
{
"epoch": 0.6848072562358276,
"fcm_dpo/beta": 0.6866965293884277,
"fcm_dpo/delta": 0.07683775573968887,
"fcm_dpo/margin": 1.354539394378662,
"fcm_dpo/q_t": 0.36106228828430176,
"grad_norm": 152.8515167236328,
"learning_rate": 1.3780020494988445e-07,
"logits/chosen": 0.12297318130731583,
"logits/rejected": 0.09657086431980133,
"logps/chosen": -64.26089477539062,
"logps/ref_chosen": -61.778202056884766,
"logps/ref_rejected": -71.51403045654297,
"logps/rejected": -75.35125732421875,
"loss": 1.083,
"margin_dpo/margin_mean": 1.354539394378662,
"margin_dpo/margin_std": 2.2785511016845703,
"step": 453
},
{
"epoch": 0.6863189720332578,
"fcm_dpo/beta": 0.6685348749160767,
"fcm_dpo/delta": -0.19035013020038605,
"fcm_dpo/margin": 1.7468868494033813,
"fcm_dpo/q_t": 0.3080099821090698,
"grad_norm": 127.66053771972656,
"learning_rate": 1.366202015206706e-07,
"logits/chosen": 0.16820810735225677,
"logits/rejected": 0.13530485332012177,
"logps/chosen": -53.909812927246094,
"logps/ref_chosen": -51.59515380859375,
"logps/ref_rejected": -63.96732711791992,
"logps/rejected": -68.02886962890625,
"loss": 0.9374,
"margin_dpo/margin_mean": 1.7468867301940918,
"margin_dpo/margin_std": 2.297898292541504,
"step": 454
},
{
"epoch": 0.6878306878306878,
"fcm_dpo/beta": 0.6465753316879272,
"fcm_dpo/delta": -0.1078774556517601,
"fcm_dpo/margin": 1.6916618347167969,
"fcm_dpo/q_t": 0.31326356530189514,
"grad_norm": 150.88607788085938,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": 0.040207911282777786,
"logits/rejected": 0.007178250700235367,
"logps/chosen": -73.28399658203125,
"logps/ref_chosen": -70.65170288085938,
"logps/ref_rejected": -77.44276428222656,
"logps/rejected": -81.7667236328125,
"loss": 0.8921,
"margin_dpo/margin_mean": 1.6916615962982178,
"margin_dpo/margin_std": 2.1551051139831543,
"step": 455
},
{
"epoch": 0.6893424036281179,
"fcm_dpo/beta": 0.6534501910209656,
"fcm_dpo/delta": 0.01624855026602745,
"fcm_dpo/margin": 1.5070427656173706,
"fcm_dpo/q_t": 0.32177361845970154,
"grad_norm": 149.37515258789062,
"learning_rate": 1.3426974201083439e-07,
"logits/chosen": 0.11166486144065857,
"logits/rejected": 0.07302643358707428,
"logps/chosen": -59.19749450683594,
"logps/ref_chosen": -56.398284912109375,
"logps/ref_rejected": -82.61642456054688,
"logps/rejected": -86.92267608642578,
"loss": 0.921,
"margin_dpo/margin_mean": 1.507042646408081,
"margin_dpo/margin_std": 1.9450860023498535,
"step": 456
},
{
"epoch": 0.690854119425548,
"fcm_dpo/beta": 0.6657828092575073,
"fcm_dpo/delta": 0.21099001169204712,
"fcm_dpo/margin": 1.2110447883605957,
"fcm_dpo/q_t": 0.3547195792198181,
"grad_norm": 146.60675048828125,
"learning_rate": 1.3309935167761717e-07,
"logits/chosen": 0.20737716555595398,
"logits/rejected": 0.1518602967262268,
"logps/chosen": -47.50776290893555,
"logps/ref_chosen": -44.72057342529297,
"logps/ref_rejected": -68.1158676147461,
"logps/rejected": -72.11409759521484,
"loss": 1.0168,
"margin_dpo/margin_mean": 1.2110450267791748,
"margin_dpo/margin_std": 1.8031151294708252,
"step": 457
},
{
"epoch": 0.6923658352229781,
"fcm_dpo/beta": 0.6568002104759216,
"fcm_dpo/delta": -0.16649408638477325,
"fcm_dpo/margin": 1.7431389093399048,
"fcm_dpo/q_t": 0.2945740222930908,
"grad_norm": 143.60699462890625,
"learning_rate": 1.3193223130682936e-07,
"logits/chosen": 0.15916739404201508,
"logits/rejected": 0.07576747238636017,
"logps/chosen": -52.41571807861328,
"logps/ref_chosen": -50.00569152832031,
"logps/ref_rejected": -87.50015258789062,
"logps/rejected": -91.6533203125,
"loss": 0.8903,
"margin_dpo/margin_mean": 1.7431399822235107,
"margin_dpo/margin_std": 2.1261298656463623,
"step": 458
},
{
"epoch": 0.6938775510204082,
"fcm_dpo/beta": 0.6480120420455933,
"fcm_dpo/delta": -0.23831713199615479,
"fcm_dpo/margin": 1.8602559566497803,
"fcm_dpo/q_t": 0.2993336021900177,
"grad_norm": 136.50265502929688,
"learning_rate": 1.3076841354533658e-07,
"logits/chosen": 0.18282179534435272,
"logits/rejected": 0.15275558829307556,
"logps/chosen": -67.98956298828125,
"logps/ref_chosen": -65.37794494628906,
"logps/ref_rejected": -88.19244384765625,
"logps/rejected": -92.66431427001953,
"loss": 0.8494,
"margin_dpo/margin_mean": 1.8602561950683594,
"margin_dpo/margin_std": 2.121655225753784,
"step": 459
},
{
"epoch": 0.6953892668178382,
"fcm_dpo/beta": 0.6043037176132202,
"fcm_dpo/delta": -0.21189001202583313,
"fcm_dpo/margin": 1.9620928764343262,
"fcm_dpo/q_t": 0.3046306073665619,
"grad_norm": 148.3003692626953,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 0.19387787580490112,
"logits/rejected": 0.09904222190380096,
"logps/chosen": -67.26277160644531,
"logps/ref_chosen": -64.5616683959961,
"logps/ref_rejected": -88.67890167236328,
"logps/rejected": -93.34209442138672,
"loss": 0.8275,
"margin_dpo/margin_mean": 1.962093472480774,
"margin_dpo/margin_std": 2.332686185836792,
"step": 460
},
{
"epoch": 0.6969009826152683,
"fcm_dpo/beta": 0.5892372131347656,
"fcm_dpo/delta": -0.03291664272546768,
"fcm_dpo/margin": 1.7409393787384033,
"fcm_dpo/q_t": 0.31874844431877136,
"grad_norm": 127.28734588623047,
"learning_rate": 1.2845081597488286e-07,
"logits/chosen": 0.23900935053825378,
"logits/rejected": 0.17159438133239746,
"logps/chosen": -52.00554275512695,
"logps/ref_chosen": -49.4779167175293,
"logps/ref_rejected": -72.65262603759766,
"logps/rejected": -76.92119598388672,
"loss": 0.9043,
"margin_dpo/margin_mean": 1.7409393787384033,
"margin_dpo/margin_std": 2.132420063018799,
"step": 461
},
{
"epoch": 0.6984126984126984,
"fcm_dpo/beta": 0.577314019203186,
"fcm_dpo/delta": -0.1604897826910019,
"fcm_dpo/margin": 1.9741871356964111,
"fcm_dpo/q_t": 0.2894290089607239,
"grad_norm": 113.71949005126953,
"learning_rate": 1.27297100994108e-07,
"logits/chosen": 0.13943126797676086,
"logits/rejected": 0.09149923920631409,
"logps/chosen": -63.15895080566406,
"logps/ref_chosen": -60.4951171875,
"logps/ref_rejected": -74.82136535644531,
"logps/rejected": -79.45939636230469,
"loss": 0.7873,
"margin_dpo/margin_mean": 1.9741871356964111,
"margin_dpo/margin_std": 2.1054553985595703,
"step": 462
},
{
"epoch": 0.6999244142101285,
"fcm_dpo/beta": 0.5867961049079895,
"fcm_dpo/delta": 0.11635659635066986,
"fcm_dpo/margin": 1.523716688156128,
"fcm_dpo/q_t": 0.3367578387260437,
"grad_norm": 127.23412322998047,
"learning_rate": 1.2614681827718695e-07,
"logits/chosen": 0.15458309650421143,
"logits/rejected": 0.14070303738117218,
"logps/chosen": -70.25871276855469,
"logps/ref_chosen": -67.68511962890625,
"logps/ref_rejected": -71.32196044921875,
"logps/rejected": -75.41926574707031,
"loss": 0.931,
"margin_dpo/margin_mean": 1.523715853691101,
"margin_dpo/margin_std": 1.9335708618164062,
"step": 463
},
{
"epoch": 0.7014361300075586,
"fcm_dpo/beta": 0.6116993427276611,
"fcm_dpo/delta": 0.15539291501045227,
"fcm_dpo/margin": 1.3953006267547607,
"fcm_dpo/q_t": 0.3521912395954132,
"grad_norm": 161.49534606933594,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 0.12334546446800232,
"logits/rejected": 0.10288789868354797,
"logps/chosen": -61.994964599609375,
"logps/ref_chosen": -59.16564178466797,
"logps/ref_rejected": -69.56146240234375,
"logps/rejected": -73.78608703613281,
"loss": 1.0905,
"margin_dpo/margin_mean": 1.3953003883361816,
"margin_dpo/margin_std": 2.2576606273651123,
"step": 464
},
{
"epoch": 0.7029478458049887,
"fcm_dpo/beta": 0.6138174533843994,
"fcm_dpo/delta": 0.10352025926113129,
"fcm_dpo/margin": 1.4749349355697632,
"fcm_dpo/q_t": 0.3457157015800476,
"grad_norm": 138.18177795410156,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": 0.2334330677986145,
"logits/rejected": 0.18456201255321503,
"logps/chosen": -61.38888168334961,
"logps/ref_chosen": -58.513671875,
"logps/ref_rejected": -84.31745910644531,
"logps/rejected": -88.6676025390625,
"loss": 1.0361,
"margin_dpo/margin_mean": 1.4749336242675781,
"margin_dpo/margin_std": 2.255413293838501,
"step": 465
},
{
"epoch": 0.7044595616024187,
"fcm_dpo/beta": 0.6493447422981262,
"fcm_dpo/delta": 0.3105998635292053,
"fcm_dpo/margin": 1.0969356298446655,
"fcm_dpo/q_t": 0.37098121643066406,
"grad_norm": 185.26171875,
"learning_rate": 1.2271688498291334e-07,
"logits/chosen": 0.18046687543392181,
"logits/rejected": 0.1757928878068924,
"logps/chosen": -76.58619689941406,
"logps/ref_chosen": -73.26580810546875,
"logps/ref_rejected": -74.83621215820312,
"logps/rejected": -79.25353240966797,
"loss": 1.0578,
"margin_dpo/margin_mean": 1.096935749053955,
"margin_dpo/margin_std": 1.7959051132202148,
"step": 466
},
{
"epoch": 0.7059712773998488,
"fcm_dpo/beta": 0.6433865427970886,
"fcm_dpo/delta": -0.17267094552516937,
"fcm_dpo/margin": 1.7906594276428223,
"fcm_dpo/q_t": 0.3067885637283325,
"grad_norm": 113.32935333251953,
"learning_rate": 1.2158065210664848e-07,
"logits/chosen": 0.16693203151226044,
"logits/rejected": 0.060477063059806824,
"logps/chosen": -50.42617416381836,
"logps/ref_chosen": -47.57947540283203,
"logps/ref_rejected": -78.68522644042969,
"logps/rejected": -83.32258605957031,
"loss": 0.8707,
"margin_dpo/margin_mean": 1.7906594276428223,
"margin_dpo/margin_std": 2.2650554180145264,
"step": 467
},
{
"epoch": 0.7074829931972789,
"fcm_dpo/beta": 0.6142877340316772,
"fcm_dpo/delta": -0.2895115911960602,
"fcm_dpo/margin": 2.040811538696289,
"fcm_dpo/q_t": 0.28548452258110046,
"grad_norm": 135.33714294433594,
"learning_rate": 1.204480113956011e-07,
"logits/chosen": 0.17146506905555725,
"logits/rejected": 0.1613762527704239,
"logps/chosen": -66.34950256347656,
"logps/ref_chosen": -63.92778778076172,
"logps/ref_rejected": -76.51626586914062,
"logps/rejected": -80.97879028320312,
"loss": 0.796,
"margin_dpo/margin_mean": 2.040811061859131,
"margin_dpo/margin_std": 2.2666611671447754,
"step": 468
},
{
"epoch": 0.708994708994709,
"fcm_dpo/beta": 0.5907766222953796,
"fcm_dpo/delta": -0.01965993642807007,
"fcm_dpo/margin": 1.710775375366211,
"fcm_dpo/q_t": 0.3215448260307312,
"grad_norm": 115.97638702392578,
"learning_rate": 1.1931899453216697e-07,
"logits/chosen": 0.21937254071235657,
"logits/rejected": 0.2032082974910736,
"logps/chosen": -61.57379150390625,
"logps/ref_chosen": -59.05818176269531,
"logps/ref_rejected": -75.67672729492188,
"logps/rejected": -79.90310668945312,
"loss": 0.8873,
"margin_dpo/margin_mean": 1.710775375366211,
"margin_dpo/margin_std": 2.0356462001800537,
"step": 469
},
{
"epoch": 0.7105064247921391,
"fcm_dpo/beta": 0.6098539233207703,
"fcm_dpo/delta": 0.04422697797417641,
"fcm_dpo/margin": 1.5718050003051758,
"fcm_dpo/q_t": 0.3227683901786804,
"grad_norm": 119.2257308959961,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 0.13354477286338806,
"logits/rejected": 0.08637814223766327,
"logps/chosen": -50.68056106567383,
"logps/ref_chosen": -47.86743927001953,
"logps/ref_rejected": -65.96859741210938,
"logps/rejected": -70.353515625,
"loss": 0.9471,
"margin_dpo/margin_mean": 1.5718050003051758,
"margin_dpo/margin_std": 2.07285475730896,
"step": 470
},
{
"epoch": 0.7120181405895691,
"fcm_dpo/beta": 0.6002909541130066,
"fcm_dpo/delta": -0.08815348893404007,
"fcm_dpo/margin": 1.7969985008239746,
"fcm_dpo/q_t": 0.3020731508731842,
"grad_norm": 131.24606323242188,
"learning_rate": 1.1707195857000215e-07,
"logits/chosen": 0.1816762238740921,
"logits/rejected": 0.12994712591171265,
"logps/chosen": -60.345733642578125,
"logps/ref_chosen": -57.777854919433594,
"logps/ref_rejected": -73.81172180175781,
"logps/rejected": -78.17660522460938,
"loss": 0.9,
"margin_dpo/margin_mean": 1.796998381614685,
"margin_dpo/margin_std": 2.2218680381774902,
"step": 471
},
{
"epoch": 0.7135298563869993,
"fcm_dpo/beta": 0.6000721454620361,
"fcm_dpo/delta": 0.006265308707952499,
"fcm_dpo/margin": 1.6558948755264282,
"fcm_dpo/q_t": 0.3246491551399231,
"grad_norm": 141.67776489257812,
"learning_rate": 1.1595400232569768e-07,
"logits/chosen": 0.20169669389724731,
"logits/rejected": 0.15981845557689667,
"logps/chosen": -58.36172866821289,
"logps/ref_chosen": -55.908668518066406,
"logps/ref_rejected": -74.70294189453125,
"logps/rejected": -78.81190490722656,
"loss": 0.9713,
"margin_dpo/margin_mean": 1.6558947563171387,
"margin_dpo/margin_std": 2.2935879230499268,
"step": 472
},
{
"epoch": 0.7150415721844293,
"fcm_dpo/beta": 0.5857222080230713,
"fcm_dpo/delta": -0.07999872416257858,
"fcm_dpo/margin": 1.8269248008728027,
"fcm_dpo/q_t": 0.32797205448150635,
"grad_norm": 127.49575805664062,
"learning_rate": 1.1483979563610069e-07,
"logits/chosen": 0.23042967915534973,
"logits/rejected": 0.15509197115898132,
"logps/chosen": -56.60422134399414,
"logps/ref_chosen": -54.16088104248047,
"logps/ref_rejected": -92.76789855957031,
"logps/rejected": -97.03816223144531,
"loss": 0.9793,
"margin_dpo/margin_mean": 1.8269245624542236,
"margin_dpo/margin_std": 2.6278867721557617,
"step": 473
},
{
"epoch": 0.7165532879818595,
"fcm_dpo/beta": 0.6013132333755493,
"fcm_dpo/delta": 0.11396686732769012,
"fcm_dpo/margin": 1.4877792596817017,
"fcm_dpo/q_t": 0.34768766164779663,
"grad_norm": 151.1398162841797,
"learning_rate": 1.1372936966796709e-07,
"logits/chosen": 0.210426926612854,
"logits/rejected": 0.1586945503950119,
"logps/chosen": -49.76041793823242,
"logps/ref_chosen": -46.685707092285156,
"logps/ref_rejected": -71.44731903076172,
"logps/rejected": -76.00980377197266,
"loss": 1.0262,
"margin_dpo/margin_mean": 1.487779140472412,
"margin_dpo/margin_std": 2.2466931343078613,
"step": 474
},
{
"epoch": 0.7180650037792895,
"fcm_dpo/beta": 0.56545090675354,
"fcm_dpo/delta": -0.3536713421344757,
"fcm_dpo/margin": 2.305065631866455,
"fcm_dpo/q_t": 0.26997214555740356,
"grad_norm": 108.57855224609375,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": 0.15623445808887482,
"logits/rejected": 0.11391064524650574,
"logps/chosen": -61.26993179321289,
"logps/ref_chosen": -58.4873046875,
"logps/ref_rejected": -87.00187683105469,
"logps/rejected": -92.08956909179688,
"loss": 0.7329,
"margin_dpo/margin_mean": 2.3050661087036133,
"margin_dpo/margin_std": 2.278512477874756,
"step": 475
},
{
"epoch": 0.7195767195767195,
"fcm_dpo/beta": 0.5778172016143799,
"fcm_dpo/delta": 0.17008031904697418,
"fcm_dpo/margin": 1.4578584432601929,
"fcm_dpo/q_t": 0.3523421287536621,
"grad_norm": 163.50177001953125,
"learning_rate": 1.1151998403347243e-07,
"logits/chosen": 0.11702927947044373,
"logits/rejected": 0.10260109603404999,
"logps/chosen": -78.54095458984375,
"logps/ref_chosen": -75.38162231445312,
"logps/ref_rejected": -76.99822235107422,
"logps/rejected": -81.61541748046875,
"loss": 1.0833,
"margin_dpo/margin_mean": 1.4578593969345093,
"margin_dpo/margin_std": 2.4156503677368164,
"step": 476
},
{
"epoch": 0.7210884353741497,
"fcm_dpo/beta": 0.591883659362793,
"fcm_dpo/delta": 0.1255907416343689,
"fcm_dpo/margin": 1.4951434135437012,
"fcm_dpo/q_t": 0.3490224778652191,
"grad_norm": 168.64578247070312,
"learning_rate": 1.1042108616837692e-07,
"logits/chosen": 0.19208115339279175,
"logits/rejected": 0.15968218445777893,
"logps/chosen": -64.10305786132812,
"logps/ref_chosen": -61.073387145996094,
"logps/ref_rejected": -81.34375,
"logps/rejected": -85.86856079101562,
"loss": 1.0765,
"margin_dpo/margin_mean": 1.495143175125122,
"margin_dpo/margin_std": 2.407855987548828,
"step": 477
},
{
"epoch": 0.7226001511715797,
"fcm_dpo/beta": 0.6088930368423462,
"fcm_dpo/delta": 0.1552659273147583,
"fcm_dpo/margin": 1.4085874557495117,
"fcm_dpo/q_t": 0.35270342230796814,
"grad_norm": 148.8921356201172,
"learning_rate": 1.0932609262554746e-07,
"logits/chosen": 0.12717093527317047,
"logits/rejected": 0.1243201345205307,
"logps/chosen": -59.892295837402344,
"logps/ref_chosen": -57.16731643676758,
"logps/ref_rejected": -53.30917739868164,
"logps/rejected": -57.44274139404297,
"loss": 1.0176,
"margin_dpo/margin_mean": 1.408586859703064,
"margin_dpo/margin_std": 2.1223185062408447,
"step": 478
},
{
"epoch": 0.7241118669690099,
"fcm_dpo/beta": 0.607843816280365,
"fcm_dpo/delta": 0.027871206402778625,
"fcm_dpo/margin": 1.5997779369354248,
"fcm_dpo/q_t": 0.3466408848762512,
"grad_norm": 140.39195251464844,
"learning_rate": 1.0823503403430734e-07,
"logits/chosen": 0.08278117328882217,
"logits/rejected": 0.03745885565876961,
"logps/chosen": -62.16324996948242,
"logps/ref_chosen": -58.91331481933594,
"logps/ref_rejected": -63.7403450012207,
"logps/rejected": -68.59005737304688,
"loss": 1.0645,
"margin_dpo/margin_mean": 1.5997782945632935,
"margin_dpo/margin_std": 2.665220260620117,
"step": 479
},
{
"epoch": 0.7256235827664399,
"fcm_dpo/beta": 0.6309263110160828,
"fcm_dpo/delta": 0.006531953811645508,
"fcm_dpo/margin": 1.5598734617233276,
"fcm_dpo/q_t": 0.31962987780570984,
"grad_norm": 165.5882568359375,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 0.13291680812835693,
"logits/rejected": 0.1215020939707756,
"logps/chosen": -65.6745376586914,
"logps/ref_chosen": -62.80061340332031,
"logps/ref_rejected": -67.58859252929688,
"logps/rejected": -72.02239227294922,
"loss": 1.0675,
"margin_dpo/margin_mean": 1.5598732233047485,
"margin_dpo/margin_std": 2.3550682067871094,
"step": 480
},
{
"epoch": 0.72713529856387,
"fcm_dpo/beta": 0.6073616743087769,
"fcm_dpo/delta": -0.08397047966718674,
"fcm_dpo/margin": 1.7691869735717773,
"fcm_dpo/q_t": 0.3220970034599304,
"grad_norm": 130.34205627441406,
"learning_rate": 1.0606484367268906e-07,
"logits/chosen": 0.11158512532711029,
"logits/rejected": 0.10091142356395721,
"logps/chosen": -67.81123352050781,
"logps/ref_chosen": -65.28649139404297,
"logps/ref_rejected": -70.78668212890625,
"logps/rejected": -75.08061218261719,
"loss": 0.9249,
"margin_dpo/margin_mean": 1.7691867351531982,
"margin_dpo/margin_std": 2.550887107849121,
"step": 481
},
{
"epoch": 0.7286470143613001,
"fcm_dpo/beta": 0.620997428894043,
"fcm_dpo/delta": 0.16822174191474915,
"fcm_dpo/margin": 1.3626244068145752,
"fcm_dpo/q_t": 0.35040992498397827,
"grad_norm": 171.70579528808594,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": 0.12327564507722855,
"logits/rejected": 0.022273845970630646,
"logps/chosen": -63.85777282714844,
"logps/ref_chosen": -60.906185150146484,
"logps/ref_rejected": -103.44656372070312,
"logps/rejected": -107.76078033447266,
"loss": 1.0802,
"margin_dpo/margin_mean": 1.3626246452331543,
"margin_dpo/margin_std": 2.2782201766967773,
"step": 482
},
{
"epoch": 0.7301587301587301,
"fcm_dpo/beta": 0.6081717014312744,
"fcm_dpo/delta": -0.14480583369731903,
"fcm_dpo/margin": 1.8506314754486084,
"fcm_dpo/q_t": 0.31974995136260986,
"grad_norm": 137.3916473388672,
"learning_rate": 1.0391075790138232e-07,
"logits/chosen": 0.20215514302253723,
"logits/rejected": 0.13051192462444305,
"logps/chosen": -56.16196823120117,
"logps/ref_chosen": -53.192012786865234,
"logps/ref_rejected": -81.83927154541016,
"logps/rejected": -86.65986633300781,
"loss": 0.9449,
"margin_dpo/margin_mean": 1.850631594657898,
"margin_dpo/margin_std": 2.5201127529144287,
"step": 483
},
{
"epoch": 0.7316704459561603,
"fcm_dpo/beta": 0.6308771967887878,
"fcm_dpo/delta": 0.20309945940971375,
"fcm_dpo/margin": 1.2873001098632812,
"fcm_dpo/q_t": 0.3507644832134247,
"grad_norm": 157.5721435546875,
"learning_rate": 1.0283982962570681e-07,
"logits/chosen": 0.18846221268177032,
"logits/rejected": 0.15292689204216003,
"logps/chosen": -60.82984924316406,
"logps/ref_chosen": -57.76945877075195,
"logps/ref_rejected": -71.6829833984375,
"logps/rejected": -76.03067016601562,
"loss": 1.0035,
"margin_dpo/margin_mean": 1.2873002290725708,
"margin_dpo/margin_std": 1.9288554191589355,
"step": 484
},
{
"epoch": 0.7331821617535903,
"fcm_dpo/beta": 0.5989984273910522,
"fcm_dpo/delta": -0.20387829840183258,
"fcm_dpo/margin": 1.9372670650482178,
"fcm_dpo/q_t": 0.30228108167648315,
"grad_norm": 129.9238739013672,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": 0.1555819809436798,
"logits/rejected": 0.13119317591190338,
"logps/chosen": -59.23163604736328,
"logps/ref_chosen": -56.63584899902344,
"logps/ref_rejected": -70.85614013671875,
"logps/rejected": -75.38919067382812,
"loss": 0.8593,
"margin_dpo/margin_mean": 1.9372668266296387,
"margin_dpo/margin_std": 2.2261605262756348,
"step": 485
},
{
"epoch": 0.7346938775510204,
"fcm_dpo/beta": 0.6060769557952881,
"fcm_dpo/delta": -0.02147604152560234,
"fcm_dpo/margin": 1.6815690994262695,
"fcm_dpo/q_t": 0.34005385637283325,
"grad_norm": 140.710693359375,
"learning_rate": 1.007103520743035e-07,
"logits/chosen": 0.1885671615600586,
"logits/rejected": 0.10879142582416534,
"logps/chosen": -59.6024169921875,
"logps/ref_chosen": -56.347023010253906,
"logps/ref_rejected": -85.97221374511719,
"logps/rejected": -90.9091796875,
"loss": 1.0424,
"margin_dpo/margin_mean": 1.6815693378448486,
"margin_dpo/margin_std": 2.760887861251831,
"step": 486
},
{
"epoch": 0.7362055933484505,
"fcm_dpo/beta": 0.6121037006378174,
"fcm_dpo/delta": 0.00196036696434021,
"fcm_dpo/margin": 1.6285200119018555,
"fcm_dpo/q_t": 0.3259222209453583,
"grad_norm": 140.3463897705078,
"learning_rate": 9.965186236464046e-08,
"logits/chosen": 0.1968272179365158,
"logits/rejected": 0.15555810928344727,
"logps/chosen": -63.40202331542969,
"logps/ref_chosen": -60.617218017578125,
"logps/ref_rejected": -82.50975036621094,
"logps/rejected": -86.9230728149414,
"loss": 0.9054,
"margin_dpo/margin_mean": 1.6285200119018555,
"margin_dpo/margin_std": 2.1146717071533203,
"step": 487
},
{
"epoch": 0.7377173091458806,
"fcm_dpo/beta": 0.6010755300521851,
"fcm_dpo/delta": -0.20396147668361664,
"fcm_dpo/margin": 1.956758975982666,
"fcm_dpo/q_t": 0.29864153265953064,
"grad_norm": 125.88450622558594,
"learning_rate": 9.859757821558337e-08,
"logits/chosen": 0.1706887185573578,
"logits/rejected": 0.11105503141880035,
"logps/chosen": -65.75413513183594,
"logps/ref_chosen": -63.10905075073242,
"logps/ref_rejected": -82.49348449707031,
"logps/rejected": -87.09532165527344,
"loss": 0.829,
"margin_dpo/margin_mean": 1.956758737564087,
"margin_dpo/margin_std": 2.2036216259002686,
"step": 488
},
{
"epoch": 0.7392290249433107,
"fcm_dpo/beta": 0.6157445311546326,
"fcm_dpo/delta": 0.3702337145805359,
"fcm_dpo/margin": 1.0620077848434448,
"fcm_dpo/q_t": 0.3890204429626465,
"grad_norm": 165.01705932617188,
"learning_rate": 9.754752911772615e-08,
"logits/chosen": 0.19015483558177948,
"logits/rejected": 0.15638966858386993,
"logps/chosen": -67.7677993774414,
"logps/ref_chosen": -64.98896026611328,
"logps/ref_rejected": -84.39607238769531,
"logps/rejected": -88.23690795898438,
"loss": 1.2058,
"margin_dpo/margin_mean": 1.0620079040527344,
"margin_dpo/margin_std": 2.2157487869262695,
"step": 489
},
{
"epoch": 0.7407407407407407,
"fcm_dpo/beta": 0.6417911648750305,
"fcm_dpo/delta": 0.10067185759544373,
"fcm_dpo/margin": 1.4126074314117432,
"fcm_dpo/q_t": 0.36741340160369873,
"grad_norm": 186.93276977539062,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 0.22295230627059937,
"logits/rejected": 0.2012663632631302,
"logps/chosen": -64.97543334960938,
"logps/ref_chosen": -61.90874481201172,
"logps/ref_rejected": -70.58566284179688,
"logps/rejected": -75.06495666503906,
"loss": 1.1948,
"margin_dpo/margin_mean": 1.4126070737838745,
"margin_dpo/margin_std": 2.7005350589752197,
"step": 490
},
{
"epoch": 0.7422524565381708,
"fcm_dpo/beta": 0.6383862495422363,
"fcm_dpo/delta": 0.07852260023355484,
"fcm_dpo/margin": 1.450089931488037,
"fcm_dpo/q_t": 0.33126381039619446,
"grad_norm": 139.84234619140625,
"learning_rate": 9.546025344484868e-08,
"logits/chosen": 0.11358515918254852,
"logits/rejected": 0.0668938159942627,
"logps/chosen": -58.38109588623047,
"logps/ref_chosen": -55.47570037841797,
"logps/ref_rejected": -78.70318603515625,
"logps/rejected": -83.05867004394531,
"loss": 0.9724,
"margin_dpo/margin_mean": 1.4500904083251953,
"margin_dpo/margin_std": 1.9347925186157227,
"step": 491
},
{
"epoch": 0.7437641723356009,
"fcm_dpo/beta": 0.6746935844421387,
"fcm_dpo/delta": 0.11538802087306976,
"fcm_dpo/margin": 1.3122856616973877,
"fcm_dpo/q_t": 0.3527218699455261,
"grad_norm": 183.52841186523438,
"learning_rate": 9.442308525541589e-08,
"logits/chosen": 0.15461723506450653,
"logits/rejected": 0.09914899617433548,
"logps/chosen": -70.79060363769531,
"logps/ref_chosen": -67.28638458251953,
"logps/ref_rejected": -82.78628540039062,
"logps/rejected": -87.60279846191406,
"loss": 1.1795,
"margin_dpo/margin_mean": 1.3122851848602295,
"margin_dpo/margin_std": 2.379267692565918,
"step": 492
},
{
"epoch": 0.745275888133031,
"fcm_dpo/beta": 0.6467149257659912,
"fcm_dpo/delta": -0.2506517171859741,
"fcm_dpo/margin": 1.8874269723892212,
"fcm_dpo/q_t": 0.2871362268924713,
"grad_norm": 143.90199279785156,
"learning_rate": 9.339026888672468e-08,
"logits/chosen": 0.13737066090106964,
"logits/rejected": 0.0812433660030365,
"logps/chosen": -58.82109451293945,
"logps/ref_chosen": -55.92750549316406,
"logps/ref_rejected": -79.12149810791016,
"logps/rejected": -83.90251159667969,
"loss": 0.8709,
"margin_dpo/margin_mean": 1.8874274492263794,
"margin_dpo/margin_std": 2.288954734802246,
"step": 493
},
{
"epoch": 0.7467876039304611,
"fcm_dpo/beta": 0.6492782831192017,
"fcm_dpo/delta": 0.15562888979911804,
"fcm_dpo/margin": 1.3203234672546387,
"fcm_dpo/q_t": 0.3562992513179779,
"grad_norm": 196.9374542236328,
"learning_rate": 9.236183322886945e-08,
"logits/chosen": 0.06576605886220932,
"logits/rejected": 0.027181722223758698,
"logps/chosen": -70.94686126708984,
"logps/ref_chosen": -67.95410919189453,
"logps/ref_rejected": -90.50865173339844,
"logps/rejected": -94.82173156738281,
"loss": 1.1436,
"margin_dpo/margin_mean": 1.3203232288360596,
"margin_dpo/margin_std": 2.456470251083374,
"step": 494
},
{
"epoch": 0.7482993197278912,
"fcm_dpo/beta": 0.6703627109527588,
"fcm_dpo/delta": 0.14631986618041992,
"fcm_dpo/margin": 1.2908090353012085,
"fcm_dpo/q_t": 0.35358014702796936,
"grad_norm": 143.63949584960938,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": 0.21046996116638184,
"logits/rejected": 0.15889891982078552,
"logps/chosen": -55.39236068725586,
"logps/ref_chosen": -52.62546157836914,
"logps/ref_rejected": -72.06781005859375,
"logps/rejected": -76.12551879882812,
"loss": 1.0334,
"margin_dpo/margin_mean": 1.2908086776733398,
"margin_dpo/margin_std": 2.0524096488952637,
"step": 495
},
{
"epoch": 0.7498110355253212,
"fcm_dpo/beta": 0.6439297199249268,
"fcm_dpo/delta": -0.214058056473732,
"fcm_dpo/margin": 1.8385138511657715,
"fcm_dpo/q_t": 0.33099353313446045,
"grad_norm": 153.26148986816406,
"learning_rate": 9.031821899254797e-08,
"logits/chosen": 0.17050223052501678,
"logits/rejected": 0.08947437256574631,
"logps/chosen": -60.537879943847656,
"logps/ref_chosen": -57.597320556640625,
"logps/ref_rejected": -94.36127471923828,
"logps/rejected": -99.14034271240234,
"loss": 1.0009,
"margin_dpo/margin_mean": 1.838512897491455,
"margin_dpo/margin_std": 2.8675289154052734,
"step": 496
},
{
"epoch": 0.7513227513227513,
"fcm_dpo/beta": 0.5975298881530762,
"fcm_dpo/delta": -0.43948090076446533,
"fcm_dpo/margin": 2.297381639480591,
"fcm_dpo/q_t": 0.27814143896102905,
"grad_norm": 137.8128204345703,
"learning_rate": 8.930309757836516e-08,
"logits/chosen": 0.20551130175590515,
"logits/rejected": 0.17609372735023499,
"logps/chosen": -75.94544982910156,
"logps/ref_chosen": -72.78994750976562,
"logps/ref_rejected": -89.48483276367188,
"logps/rejected": -94.9377212524414,
"loss": 0.8075,
"margin_dpo/margin_mean": 2.29738187789917,
"margin_dpo/margin_std": 2.609776258468628,
"step": 497
},
{
"epoch": 0.7528344671201814,
"fcm_dpo/beta": 0.5923163294792175,
"fcm_dpo/delta": -0.017407868057489395,
"fcm_dpo/margin": 1.7135266065597534,
"fcm_dpo/q_t": 0.3283523619174957,
"grad_norm": 158.02391052246094,
"learning_rate": 8.829247120198563e-08,
"logits/chosen": 0.17303167283535004,
"logits/rejected": 0.14456358551979065,
"logps/chosen": -71.15170288085938,
"logps/ref_chosen": -68.36572265625,
"logps/ref_rejected": -71.28846740722656,
"logps/rejected": -75.78797912597656,
"loss": 0.9063,
"margin_dpo/margin_mean": 1.713526964187622,
"margin_dpo/margin_std": 2.233605146408081,
"step": 498
},
{
"epoch": 0.7543461829176115,
"fcm_dpo/beta": 0.594528317451477,
"fcm_dpo/delta": 0.08794374763965607,
"fcm_dpo/margin": 1.5478346347808838,
"fcm_dpo/q_t": 0.35329633951187134,
"grad_norm": 145.75146484375,
"learning_rate": 8.728636813280163e-08,
"logits/chosen": 0.16119879484176636,
"logits/rejected": 0.10922683030366898,
"logps/chosen": -64.74832916259766,
"logps/ref_chosen": -61.90882873535156,
"logps/ref_rejected": -91.9411392211914,
"logps/rejected": -96.32847595214844,
"loss": 1.121,
"margin_dpo/margin_mean": 1.547835111618042,
"margin_dpo/margin_std": 2.651371479034424,
"step": 499
},
{
"epoch": 0.7558578987150416,
"fcm_dpo/beta": 0.6114327311515808,
"fcm_dpo/delta": 0.14150665700435638,
"fcm_dpo/margin": 1.4240680932998657,
"fcm_dpo/q_t": 0.35586458444595337,
"grad_norm": 174.01441955566406,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.12461017072200775,
"logits/rejected": 0.11526093631982803,
"logps/chosen": -73.16278076171875,
"logps/ref_chosen": -70.225830078125,
"logps/ref_rejected": -71.72203063964844,
"logps/rejected": -76.08305358886719,
"loss": 1.1357,
"margin_dpo/margin_mean": 1.4240679740905762,
"margin_dpo/margin_std": 2.5242857933044434,
"step": 500
},
{
"epoch": 0.7573696145124716,
"fcm_dpo/beta": 0.6245852708816528,
"fcm_dpo/delta": 0.013538122177124023,
"fcm_dpo/margin": 1.5765228271484375,
"fcm_dpo/q_t": 0.3233751058578491,
"grad_norm": 121.06085205078125,
"learning_rate": 8.528784436016878e-08,
"logits/chosen": 0.15389983355998993,
"logits/rejected": 0.13935419917106628,
"logps/chosen": -67.49052429199219,
"logps/ref_chosen": -64.59880828857422,
"logps/ref_rejected": -70.59329223632812,
"logps/rejected": -75.0615234375,
"loss": 0.8765,
"margin_dpo/margin_mean": 1.5765225887298584,
"margin_dpo/margin_std": 1.870557188987732,
"step": 501
},
{
"epoch": 0.7588813303099018,
"fcm_dpo/beta": 0.6334064602851868,
"fcm_dpo/delta": 0.15443843603134155,
"fcm_dpo/margin": 1.3549081087112427,
"fcm_dpo/q_t": 0.3447534441947937,
"grad_norm": 170.02090454101562,
"learning_rate": 8.4295479559726e-08,
"logits/chosen": 0.17954713106155396,
"logits/rejected": 0.14447346329689026,
"logps/chosen": -68.44071960449219,
"logps/ref_chosen": -65.46662902832031,
"logps/ref_rejected": -90.22233581542969,
"logps/rejected": -94.55133056640625,
"loss": 1.0121,
"margin_dpo/margin_mean": 1.3549081087112427,
"margin_dpo/margin_std": 2.0818443298339844,
"step": 502
},
{
"epoch": 0.7603930461073318,
"fcm_dpo/beta": 0.6419456005096436,
"fcm_dpo/delta": 0.04625112935900688,
"fcm_dpo/margin": 1.4924449920654297,
"fcm_dpo/q_t": 0.3314594030380249,
"grad_norm": 147.28880310058594,
"learning_rate": 8.330774987092712e-08,
"logits/chosen": 0.16441036760807037,
"logits/rejected": 0.14501985907554626,
"logps/chosen": -54.53971862792969,
"logps/ref_chosen": -51.83476257324219,
"logps/ref_rejected": -57.62522506713867,
"logps/rejected": -61.82262420654297,
"loss": 0.9904,
"margin_dpo/margin_mean": 1.4924452304840088,
"margin_dpo/margin_std": 2.1151676177978516,
"step": 503
},
{
"epoch": 0.7619047619047619,
"fcm_dpo/beta": 0.6248334646224976,
"fcm_dpo/delta": -0.23319105803966522,
"fcm_dpo/margin": 1.9295209646224976,
"fcm_dpo/q_t": 0.295588880777359,
"grad_norm": 145.23736572265625,
"learning_rate": 8.232468292269479e-08,
"logits/chosen": 0.12036092579364777,
"logits/rejected": 0.0984935611486435,
"logps/chosen": -71.39968872070312,
"logps/ref_chosen": -68.65119934082031,
"logps/ref_rejected": -77.91394805908203,
"logps/rejected": -82.59195709228516,
"loss": 0.7956,
"margin_dpo/margin_mean": 1.929521083831787,
"margin_dpo/margin_std": 2.150576114654541,
"step": 504
},
{
"epoch": 0.763416477702192,
"fcm_dpo/beta": 0.6214442253112793,
"fcm_dpo/delta": 0.14665237069129944,
"fcm_dpo/margin": 1.393211007118225,
"fcm_dpo/q_t": 0.35855334997177124,
"grad_norm": 172.9752960205078,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": 0.177079439163208,
"logits/rejected": 0.14601781964302063,
"logps/chosen": -62.98767852783203,
"logps/ref_chosen": -59.99884796142578,
"logps/ref_rejected": -76.88048553466797,
"logps/rejected": -81.26252746582031,
"loss": 1.0917,
"margin_dpo/margin_mean": 1.3932104110717773,
"margin_dpo/margin_std": 2.397246837615967,
"step": 505
},
{
"epoch": 0.764928193499622,
"fcm_dpo/beta": 0.627830982208252,
"fcm_dpo/delta": 0.032355912029743195,
"fcm_dpo/margin": 1.54330575466156,
"fcm_dpo/q_t": 0.3359745740890503,
"grad_norm": 169.81752014160156,
"learning_rate": 8.037264711071698e-08,
"logits/chosen": 0.1882736086845398,
"logits/rejected": 0.166158989071846,
"logps/chosen": -72.7292251586914,
"logps/ref_chosen": -70.07130432128906,
"logps/ref_rejected": -82.03775024414062,
"logps/rejected": -86.23898315429688,
"loss": 1.0458,
"margin_dpo/margin_mean": 1.54330575466156,
"margin_dpo/margin_std": 2.4680933952331543,
"step": 506
},
{
"epoch": 0.7664399092970522,
"fcm_dpo/beta": 0.6320427656173706,
"fcm_dpo/delta": -0.013070136308670044,
"fcm_dpo/margin": 1.600289225578308,
"fcm_dpo/q_t": 0.3405856490135193,
"grad_norm": 162.6297607421875,
"learning_rate": 7.940373284960933e-08,
"logits/chosen": 0.16370144486427307,
"logits/rejected": 0.1263059675693512,
"logps/chosen": -74.96257019042969,
"logps/ref_chosen": -72.00703430175781,
"logps/ref_rejected": -93.94987487792969,
"logps/rejected": -98.50569152832031,
"loss": 1.032,
"margin_dpo/margin_mean": 1.6002895832061768,
"margin_dpo/margin_std": 2.5143651962280273,
"step": 507
},
{
"epoch": 0.7679516250944822,
"fcm_dpo/beta": 0.6303126811981201,
"fcm_dpo/delta": -0.09110675752162933,
"fcm_dpo/margin": 1.7146742343902588,
"fcm_dpo/q_t": 0.3268600106239319,
"grad_norm": 160.6134033203125,
"learning_rate": 7.843959053281663e-08,
"logits/chosen": 0.1638413965702057,
"logits/rejected": 0.07367105782032013,
"logps/chosen": -62.97923278808594,
"logps/ref_chosen": -60.21992492675781,
"logps/ref_rejected": -95.9200668334961,
"logps/rejected": -100.39404296875,
"loss": 0.9646,
"margin_dpo/margin_mean": 1.7146737575531006,
"margin_dpo/margin_std": 2.4505600929260254,
"step": 508
},
{
"epoch": 0.7694633408919124,
"fcm_dpo/beta": 0.6267092227935791,
"fcm_dpo/delta": 0.0032510310411453247,
"fcm_dpo/margin": 1.5897384881973267,
"fcm_dpo/q_t": 0.3286612629890442,
"grad_norm": 157.1903839111328,
"learning_rate": 7.748024712947204e-08,
"logits/chosen": 0.13599814474582672,
"logits/rejected": 0.11172134429216385,
"logps/chosen": -69.07527160644531,
"logps/ref_chosen": -66.27017211914062,
"logps/ref_rejected": -71.73065185546875,
"logps/rejected": -76.12548828125,
"loss": 0.9953,
"margin_dpo/margin_mean": 1.589739203453064,
"margin_dpo/margin_std": 2.3416152000427246,
"step": 509
},
{
"epoch": 0.7709750566893424,
"fcm_dpo/beta": 0.6256568431854248,
"fcm_dpo/delta": 0.07200966775417328,
"fcm_dpo/margin": 1.4934487342834473,
"fcm_dpo/q_t": 0.3356286585330963,
"grad_norm": 166.45079040527344,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 0.22737839818000793,
"logits/rejected": 0.16066043078899384,
"logps/chosen": -56.61336898803711,
"logps/ref_chosen": -53.54487609863281,
"logps/ref_rejected": -91.36648559570312,
"logps/rejected": -95.92843627929688,
"loss": 1.005,
"margin_dpo/margin_mean": 1.4934483766555786,
"margin_dpo/margin_std": 2.2340614795684814,
"step": 510
},
{
"epoch": 0.7724867724867724,
"fcm_dpo/beta": 0.5967855453491211,
"fcm_dpo/delta": -0.3847648501396179,
"fcm_dpo/margin": 2.233816146850586,
"fcm_dpo/q_t": 0.2725888192653656,
"grad_norm": 127.65147399902344,
"learning_rate": 7.557606426772961e-08,
"logits/chosen": 0.19227483868598938,
"logits/rejected": 0.1546470820903778,
"logps/chosen": -58.66801452636719,
"logps/ref_chosen": -55.844383239746094,
"logps/ref_rejected": -86.49819946289062,
"logps/rejected": -91.55564880371094,
"loss": 0.7557,
"margin_dpo/margin_mean": 2.233816623687744,
"margin_dpo/margin_std": 2.2734181880950928,
"step": 511
},
{
"epoch": 0.7739984882842026,
"fcm_dpo/beta": 0.5729248523712158,
"fcm_dpo/delta": -0.2912992537021637,
"fcm_dpo/margin": 2.1851539611816406,
"fcm_dpo/q_t": 0.3602805733680725,
"grad_norm": 148.83749389648438,
"learning_rate": 7.463127807341966e-08,
"logits/chosen": 0.06669703125953674,
"logits/rejected": 0.05016005039215088,
"logps/chosen": -64.66706085205078,
"logps/ref_chosen": -61.653038024902344,
"logps/ref_rejected": -72.83148193359375,
"logps/rejected": -78.03065490722656,
"loss": 1.1281,
"margin_dpo/margin_mean": 2.185153007507324,
"margin_dpo/margin_std": 6.0767316818237305,
"step": 512
},
{
"epoch": 0.7755102040816326,
"fcm_dpo/beta": 0.5423198938369751,
"fcm_dpo/delta": -0.018883943557739258,
"fcm_dpo/margin": 1.8668808937072754,
"fcm_dpo/q_t": 0.3136303424835205,
"grad_norm": 92.23009490966797,
"learning_rate": 7.369139731924401e-08,
"logits/chosen": 0.27449309825897217,
"logits/rejected": 0.23453694581985474,
"logps/chosen": -53.48218536376953,
"logps/ref_chosen": -50.85256576538086,
"logps/ref_rejected": -69.21754455566406,
"logps/rejected": -73.71403503417969,
"loss": 0.8408,
"margin_dpo/margin_mean": 1.8668807744979858,
"margin_dpo/margin_std": 2.0499701499938965,
"step": 513
},
{
"epoch": 0.7770219198790628,
"fcm_dpo/beta": 0.5583192110061646,
"fcm_dpo/delta": 0.07093075662851334,
"fcm_dpo/margin": 1.6746280193328857,
"fcm_dpo/q_t": 0.3376282751560211,
"grad_norm": 142.2669677734375,
"learning_rate": 7.275644829568747e-08,
"logits/chosen": 0.20312434434890747,
"logits/rejected": 0.16620582342147827,
"logps/chosen": -72.64167785644531,
"logps/ref_chosen": -69.38493347167969,
"logps/ref_rejected": -83.32447814941406,
"logps/rejected": -88.25584411621094,
"loss": 1.0344,
"margin_dpo/margin_mean": 1.674628496170044,
"margin_dpo/margin_std": 2.6116552352905273,
"step": 514
},
{
"epoch": 0.7785336356764928,
"fcm_dpo/beta": 0.5502352118492126,
"fcm_dpo/delta": -0.08825686573982239,
"fcm_dpo/margin": 1.9603986740112305,
"fcm_dpo/q_t": 0.31272023916244507,
"grad_norm": 118.43267822265625,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": 0.18149125576019287,
"logits/rejected": 0.11726510524749756,
"logps/chosen": -56.91847610473633,
"logps/ref_chosen": -53.687034606933594,
"logps/ref_rejected": -83.59614562988281,
"logps/rejected": -88.78797912597656,
"loss": 0.8794,
"margin_dpo/margin_mean": 1.9603983163833618,
"margin_dpo/margin_std": 2.4689695835113525,
"step": 515
},
{
"epoch": 0.780045351473923,
"fcm_dpo/beta": 0.5598210096359253,
"fcm_dpo/delta": 0.140054851770401,
"fcm_dpo/margin": 1.5577726364135742,
"fcm_dpo/q_t": 0.33335772156715393,
"grad_norm": 122.2513427734375,
"learning_rate": 7.090144991188568e-08,
"logits/chosen": 0.13685812056064606,
"logits/rejected": 0.0911111831665039,
"logps/chosen": -59.56391143798828,
"logps/ref_chosen": -56.9017219543457,
"logps/ref_rejected": -67.83477783203125,
"logps/rejected": -72.05474090576172,
"loss": 0.9382,
"margin_dpo/margin_mean": 1.557773470878601,
"margin_dpo/margin_std": 2.0453054904937744,
"step": 516
},
{
"epoch": 0.781557067271353,
"fcm_dpo/beta": 0.5620474815368652,
"fcm_dpo/delta": 0.013102632015943527,
"fcm_dpo/margin": 1.7575607299804688,
"fcm_dpo/q_t": 0.3347151577472687,
"grad_norm": 115.9723892211914,
"learning_rate": 6.998145243993284e-08,
"logits/chosen": 0.18582028150558472,
"logits/rejected": 0.17517045140266418,
"logps/chosen": -64.92308044433594,
"logps/ref_chosen": -61.775142669677734,
"logps/ref_rejected": -62.88270950317383,
"logps/rejected": -67.78820037841797,
"loss": 0.9368,
"margin_dpo/margin_mean": 1.7575602531433105,
"margin_dpo/margin_std": 2.4262712001800537,
"step": 517
},
{
"epoch": 0.783068783068783,
"fcm_dpo/beta": 0.5985446572303772,
"fcm_dpo/delta": 0.34289732575416565,
"fcm_dpo/margin": 1.131626844406128,
"fcm_dpo/q_t": 0.38726097345352173,
"grad_norm": 139.290283203125,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": 0.19278889894485474,
"logits/rejected": 0.15261326730251312,
"logps/chosen": -64.96379089355469,
"logps/ref_chosen": -62.02523422241211,
"logps/ref_rejected": -79.06085205078125,
"logps/rejected": -83.13102722167969,
"loss": 1.1567,
"margin_dpo/margin_mean": 1.1316269636154175,
"margin_dpo/margin_std": 2.253190517425537,
"step": 518
},
{
"epoch": 0.7845804988662132,
"fcm_dpo/beta": 0.6441134214401245,
"fcm_dpo/delta": 0.37903302907943726,
"fcm_dpo/margin": 0.9974351525306702,
"fcm_dpo/q_t": 0.4084378778934479,
"grad_norm": 205.64553833007812,
"learning_rate": 6.815658960673781e-08,
"logits/chosen": 0.20801779627799988,
"logits/rejected": 0.15976354479789734,
"logps/chosen": -65.09986877441406,
"logps/ref_chosen": -61.60636901855469,
"logps/ref_rejected": -74.50727844238281,
"logps/rejected": -78.99821472167969,
"loss": 1.4602,
"margin_dpo/margin_mean": 0.9974346160888672,
"margin_dpo/margin_std": 2.888352870941162,
"step": 519
},
{
"epoch": 0.7860922146636432,
"fcm_dpo/beta": 0.6441489458084106,
"fcm_dpo/delta": -0.010032668709754944,
"fcm_dpo/margin": 1.562050461769104,
"fcm_dpo/q_t": 0.3334931433200836,
"grad_norm": 156.4764404296875,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 0.23202162981033325,
"logits/rejected": 0.18750135600566864,
"logps/chosen": -65.67581939697266,
"logps/ref_chosen": -62.87343215942383,
"logps/ref_rejected": -76.505615234375,
"logps/rejected": -80.87004852294922,
"loss": 0.9578,
"margin_dpo/margin_mean": 1.5620505809783936,
"margin_dpo/margin_std": 2.2463040351867676,
"step": 520
},
{
"epoch": 0.7876039304610734,
"fcm_dpo/beta": 0.6531983017921448,
"fcm_dpo/delta": 0.006205732002854347,
"fcm_dpo/margin": 1.5222787857055664,
"fcm_dpo/q_t": 0.32361727952957153,
"grad_norm": 157.93814086914062,
"learning_rate": 6.63520728356167e-08,
"logits/chosen": 0.1032562106847763,
"logits/rejected": 0.04845578968524933,
"logps/chosen": -67.12205505371094,
"logps/ref_chosen": -64.20668029785156,
"logps/ref_rejected": -92.28083038330078,
"logps/rejected": -96.71849060058594,
"loss": 0.9371,
"margin_dpo/margin_mean": 1.5222779512405396,
"margin_dpo/margin_std": 2.023488759994507,
"step": 521
},
{
"epoch": 0.7891156462585034,
"fcm_dpo/beta": 0.6555431485176086,
"fcm_dpo/delta": 0.03148447349667549,
"fcm_dpo/margin": 1.48207688331604,
"fcm_dpo/q_t": 0.32747435569763184,
"grad_norm": 147.4585723876953,
"learning_rate": 6.545750740770336e-08,
"logits/chosen": 0.16423772275447845,
"logits/rejected": 0.14849795401096344,
"logps/chosen": -61.12641525268555,
"logps/ref_chosen": -58.369720458984375,
"logps/ref_rejected": -68.79248046875,
"logps/rejected": -73.03125,
"loss": 1.0698,
"margin_dpo/margin_mean": 1.4820764064788818,
"margin_dpo/margin_std": 2.3686909675598145,
"step": 522
},
{
"epoch": 0.7906273620559335,
"fcm_dpo/beta": 0.6287499666213989,
"fcm_dpo/delta": -0.24582098424434662,
"fcm_dpo/margin": 1.9300764799118042,
"fcm_dpo/q_t": 0.2935262620449066,
"grad_norm": 161.8909454345703,
"learning_rate": 6.456810403001012e-08,
"logits/chosen": 0.17682617902755737,
"logits/rejected": 0.09023305773735046,
"logps/chosen": -68.99554443359375,
"logps/ref_chosen": -65.71324157714844,
"logps/ref_rejected": -91.98896789550781,
"logps/rejected": -97.20135498046875,
"loss": 0.9505,
"margin_dpo/margin_mean": 1.9300763607025146,
"margin_dpo/margin_std": 2.5030922889709473,
"step": 523
},
{
"epoch": 0.7921390778533636,
"fcm_dpo/beta": 0.642947256565094,
"fcm_dpo/delta": 0.15807999670505524,
"fcm_dpo/margin": 1.3285942077636719,
"fcm_dpo/q_t": 0.34164804220199585,
"grad_norm": 157.54954528808594,
"learning_rate": 6.368388758106134e-08,
"logits/chosen": 0.14576482772827148,
"logits/rejected": 0.12724286317825317,
"logps/chosen": -78.79303741455078,
"logps/ref_chosen": -76.35124969482422,
"logps/ref_rejected": -89.96072387695312,
"logps/rejected": -93.73110961914062,
"loss": 1.0368,
"margin_dpo/margin_mean": 1.328594446182251,
"margin_dpo/margin_std": 2.0358855724334717,
"step": 524
},
{
"epoch": 0.7936507936507936,
"fcm_dpo/beta": 0.6652133464813232,
"fcm_dpo/delta": 0.20262369513511658,
"fcm_dpo/margin": 1.2232202291488647,
"fcm_dpo/q_t": 0.3485579490661621,
"grad_norm": 169.54148864746094,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": 0.0526951402425766,
"logits/rejected": 0.039441537111997604,
"logps/chosen": -78.24242401123047,
"logps/ref_chosen": -75.49578857421875,
"logps/ref_rejected": -84.04852294921875,
"logps/rejected": -88.01837921142578,
"loss": 0.9943,
"margin_dpo/margin_mean": 1.2232205867767334,
"margin_dpo/margin_std": 1.763154149055481,
"step": 525
},
{
"epoch": 0.7951625094482238,
"fcm_dpo/beta": 0.6876204609870911,
"fcm_dpo/delta": 0.2604686915874481,
"fcm_dpo/margin": 1.1028797626495361,
"fcm_dpo/q_t": 0.36391156911849976,
"grad_norm": 168.36891174316406,
"learning_rate": 6.193111425735515e-08,
"logits/chosen": 0.1737247258424759,
"logits/rejected": 0.12701740860939026,
"logps/chosen": -64.25382995605469,
"logps/ref_chosen": -61.29241943359375,
"logps/ref_rejected": -82.47763061523438,
"logps/rejected": -86.54191589355469,
"loss": 1.0833,
"margin_dpo/margin_mean": 1.1028800010681152,
"margin_dpo/margin_std": 1.7894057035446167,
"step": 526
},
{
"epoch": 0.7966742252456538,
"fcm_dpo/beta": 0.7351027727127075,
"fcm_dpo/delta": 0.22121518850326538,
"fcm_dpo/margin": 1.081107497215271,
"fcm_dpo/q_t": 0.3715837299823761,
"grad_norm": 250.8863067626953,
"learning_rate": 6.106260641143546e-08,
"logits/chosen": 0.2451198697090149,
"logits/rejected": 0.1942567229270935,
"logps/chosen": -64.66523742675781,
"logps/ref_chosen": -61.472625732421875,
"logps/ref_rejected": -90.52831268310547,
"logps/rejected": -94.8020248413086,
"loss": 1.3058,
"margin_dpo/margin_mean": 1.0811076164245605,
"margin_dpo/margin_std": 2.4224696159362793,
"step": 527
},
{
"epoch": 0.7981859410430839,
"fcm_dpo/beta": 0.7391092777252197,
"fcm_dpo/delta": 0.08262480795383453,
"fcm_dpo/margin": 1.248426914215088,
"fcm_dpo/q_t": 0.33980193734169006,
"grad_norm": 163.9738311767578,
"learning_rate": 6.019938355056422e-08,
"logits/chosen": 0.050329744815826416,
"logits/rejected": -0.005134463310241699,
"logps/chosen": -61.76133728027344,
"logps/ref_chosen": -58.792015075683594,
"logps/ref_rejected": -71.82516479492188,
"logps/rejected": -76.04290771484375,
"loss": 1.1046,
"margin_dpo/margin_mean": 1.2484264373779297,
"margin_dpo/margin_std": 2.0079092979431152,
"step": 528
},
{
"epoch": 0.799697656840514,
"fcm_dpo/beta": 0.6621348857879639,
"fcm_dpo/delta": -0.6627082824707031,
"fcm_dpo/margin": 2.306537628173828,
"fcm_dpo/q_t": 0.25794824957847595,
"grad_norm": 117.09664154052734,
"learning_rate": 5.934146982094049e-08,
"logits/chosen": 0.10348678380250931,
"logits/rejected": 0.05463102087378502,
"logps/chosen": -57.58445739746094,
"logps/ref_chosen": -55.070960998535156,
"logps/ref_rejected": -75.44007873535156,
"logps/rejected": -80.26010131835938,
"loss": 0.7618,
"margin_dpo/margin_mean": 2.306537389755249,
"margin_dpo/margin_std": 2.351503372192383,
"step": 529
},
{
"epoch": 0.8012093726379441,
"fcm_dpo/beta": 0.6574649214744568,
"fcm_dpo/delta": -0.030184239149093628,
"fcm_dpo/margin": 1.5620912313461304,
"fcm_dpo/q_t": 0.32785388827323914,
"grad_norm": 138.7952880859375,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.1791817992925644,
"logits/rejected": 0.14665716886520386,
"logps/chosen": -59.61901092529297,
"logps/ref_chosen": -56.743812561035156,
"logps/ref_rejected": -76.6692123413086,
"logps/rejected": -81.10650634765625,
"loss": 0.9455,
"margin_dpo/margin_mean": 1.5620914697647095,
"margin_dpo/margin_std": 2.117258071899414,
"step": 530
},
{
"epoch": 0.8027210884353742,
"fcm_dpo/beta": 0.6852550506591797,
"fcm_dpo/delta": 0.22120189666748047,
"fcm_dpo/margin": 1.1534594297409058,
"fcm_dpo/q_t": 0.3655146360397339,
"grad_norm": 180.98231506347656,
"learning_rate": 5.7641665597021435e-08,
"logits/chosen": 0.14505237340927124,
"logits/rejected": 0.09899041056632996,
"logps/chosen": -54.173072814941406,
"logps/ref_chosen": -51.116455078125,
"logps/ref_rejected": -79.52884674072266,
"logps/rejected": -83.73892211914062,
"loss": 1.0873,
"margin_dpo/margin_mean": 1.1534587144851685,
"margin_dpo/margin_std": 1.9886727333068848,
"step": 531
},
{
"epoch": 0.8042328042328042,
"fcm_dpo/beta": 0.6626486778259277,
"fcm_dpo/delta": -0.1822899430990219,
"fcm_dpo/margin": 1.750182867050171,
"fcm_dpo/q_t": 0.31126174330711365,
"grad_norm": 145.5829315185547,
"learning_rate": 5.679982264990424e-08,
"logits/chosen": 0.09937071800231934,
"logits/rejected": 0.05832071602344513,
"logps/chosen": -61.42143249511719,
"logps/ref_chosen": -58.279945373535156,
"logps/ref_rejected": -78.05426788330078,
"logps/rejected": -82.94593811035156,
"loss": 0.9066,
"margin_dpo/margin_mean": 1.7501822710037231,
"margin_dpo/margin_std": 2.264770030975342,
"step": 532
},
{
"epoch": 0.8057445200302343,
"fcm_dpo/beta": 0.6552125811576843,
"fcm_dpo/delta": -0.08179665356874466,
"fcm_dpo/margin": 1.6374917030334473,
"fcm_dpo/q_t": 0.3215191662311554,
"grad_norm": 148.30023193359375,
"learning_rate": 5.596338392706076e-08,
"logits/chosen": 0.25206273794174194,
"logits/rejected": 0.20781204104423523,
"logps/chosen": -58.997161865234375,
"logps/ref_chosen": -56.41801071166992,
"logps/ref_rejected": -73.89324951171875,
"logps/rejected": -78.1098861694336,
"loss": 0.9886,
"margin_dpo/margin_mean": 1.6374918222427368,
"margin_dpo/margin_std": 2.354870319366455,
"step": 533
},
{
"epoch": 0.8072562358276644,
"fcm_dpo/beta": 0.649742603302002,
"fcm_dpo/delta": 0.05204106122255325,
"fcm_dpo/margin": 1.4657820463180542,
"fcm_dpo/q_t": 0.32745662331581116,
"grad_norm": 144.36602783203125,
"learning_rate": 5.513237282548033e-08,
"logits/chosen": 0.17291076481342316,
"logits/rejected": 0.13234050571918488,
"logps/chosen": -63.49784851074219,
"logps/ref_chosen": -60.748687744140625,
"logps/ref_rejected": -73.8623046875,
"logps/rejected": -78.07723999023438,
"loss": 0.9544,
"margin_dpo/margin_mean": 1.4657821655273438,
"margin_dpo/margin_std": 1.9620198011398315,
"step": 534
},
{
"epoch": 0.8087679516250945,
"fcm_dpo/beta": 0.6593036651611328,
"fcm_dpo/delta": -0.019535936415195465,
"fcm_dpo/margin": 1.5417048931121826,
"fcm_dpo/q_t": 0.3396652340888977,
"grad_norm": 148.61146545410156,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": 0.09707458317279816,
"logits/rejected": 0.04526631161570549,
"logps/chosen": -64.64535522460938,
"logps/ref_chosen": -61.637413024902344,
"logps/ref_rejected": -80.93138885498047,
"logps/rejected": -85.48104095458984,
"loss": 1.0638,
"margin_dpo/margin_mean": 1.5417053699493408,
"margin_dpo/margin_std": 2.5180535316467285,
"step": 535
},
{
"epoch": 0.8102796674225246,
"fcm_dpo/beta": 0.6411547064781189,
"fcm_dpo/delta": -0.05495788902044296,
"fcm_dpo/margin": 1.6322299242019653,
"fcm_dpo/q_t": 0.3149953782558441,
"grad_norm": 129.11102294921875,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": 0.20541507005691528,
"logits/rejected": 0.14478695392608643,
"logps/chosen": -54.65433120727539,
"logps/ref_chosen": -51.88897705078125,
"logps/ref_rejected": -73.34864044189453,
"logps/rejected": -77.74623107910156,
"loss": 0.8709,
"margin_dpo/margin_mean": 1.6322304010391235,
"margin_dpo/margin_std": 1.9500904083251953,
"step": 536
},
{
"epoch": 0.8117913832199547,
"fcm_dpo/beta": 0.6352528929710388,
"fcm_dpo/delta": -0.06024022772908211,
"fcm_dpo/margin": 1.6563501358032227,
"fcm_dpo/q_t": 0.32194915413856506,
"grad_norm": 129.3059844970703,
"learning_rate": 5.267213693697695e-08,
"logits/chosen": 0.24240414798259735,
"logits/rejected": 0.1752084195613861,
"logps/chosen": -57.34803771972656,
"logps/ref_chosen": -54.248619079589844,
"logps/ref_rejected": -94.94343566894531,
"logps/rejected": -99.69920349121094,
"loss": 1.0076,
"margin_dpo/margin_mean": 1.656351089477539,
"margin_dpo/margin_std": 2.3621954917907715,
"step": 537
},
{
"epoch": 0.8133030990173847,
"fcm_dpo/beta": 0.6266754865646362,
"fcm_dpo/delta": -0.23747026920318604,
"fcm_dpo/margin": 1.9266445636749268,
"fcm_dpo/q_t": 0.28824812173843384,
"grad_norm": 151.20790100097656,
"learning_rate": 5.1863067244167144e-08,
"logits/chosen": 0.17140713334083557,
"logits/rejected": 0.14127670228481293,
"logps/chosen": -73.17599487304688,
"logps/ref_chosen": -70.09353637695312,
"logps/ref_rejected": -79.49833679199219,
"logps/rejected": -84.5074462890625,
"loss": 0.8148,
"margin_dpo/margin_mean": 1.926644206047058,
"margin_dpo/margin_std": 2.1346817016601562,
"step": 538
},
{
"epoch": 0.8148148148148148,
"fcm_dpo/beta": 0.6051099300384521,
"fcm_dpo/delta": -0.027577966451644897,
"fcm_dpo/margin": 1.6932862997055054,
"fcm_dpo/q_t": 0.3206092119216919,
"grad_norm": 135.66448974609375,
"learning_rate": 5.105953986729195e-08,
"logits/chosen": 0.13851284980773926,
"logits/rejected": 0.08982232213020325,
"logps/chosen": -64.91091918945312,
"logps/ref_chosen": -61.93169403076172,
"logps/ref_rejected": -84.08946228027344,
"logps/rejected": -88.76197052001953,
"loss": 0.8703,
"margin_dpo/margin_mean": 1.6932868957519531,
"margin_dpo/margin_std": 2.0537302494049072,
"step": 539
},
{
"epoch": 0.8163265306122449,
"fcm_dpo/beta": 0.5865040421485901,
"fcm_dpo/delta": -0.2764972448348999,
"fcm_dpo/margin": 2.119068145751953,
"fcm_dpo/q_t": 0.29492413997650146,
"grad_norm": 142.99563598632812,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 0.1776391565799713,
"logits/rejected": 0.11434172093868256,
"logps/chosen": -65.55619812011719,
"logps/ref_chosen": -62.704254150390625,
"logps/ref_rejected": -95.63597106933594,
"logps/rejected": -100.60699462890625,
"loss": 0.841,
"margin_dpo/margin_mean": 2.1190683841705322,
"margin_dpo/margin_std": 2.443237781524658,
"step": 540
},
{
"epoch": 0.817838246409675,
"fcm_dpo/beta": 0.5594385266304016,
"fcm_dpo/delta": -0.11497347056865692,
"fcm_dpo/margin": 1.9675724506378174,
"fcm_dpo/q_t": 0.3052162230014801,
"grad_norm": 126.23677062988281,
"learning_rate": 4.9469201811239035e-08,
"logits/chosen": 0.12904441356658936,
"logits/rejected": 0.12872368097305298,
"logps/chosen": -65.19651794433594,
"logps/ref_chosen": -62.48084259033203,
"logps/ref_rejected": -57.55541229248047,
"logps/rejected": -62.23865509033203,
"loss": 0.8973,
"margin_dpo/margin_mean": 1.967572569847107,
"margin_dpo/margin_std": 2.4288015365600586,
"step": 541
},
{
"epoch": 0.8193499622071051,
"fcm_dpo/beta": 0.5571799278259277,
"fcm_dpo/delta": -0.033925510942935944,
"fcm_dpo/margin": 1.8493754863739014,
"fcm_dpo/q_t": 0.3220483660697937,
"grad_norm": 113.93995666503906,
"learning_rate": 4.868243561723534e-08,
"logits/chosen": 0.2040795087814331,
"logits/rejected": 0.1579345464706421,
"logps/chosen": -52.065277099609375,
"logps/ref_chosen": -49.454891204833984,
"logps/ref_rejected": -65.33275604248047,
"logps/rejected": -69.79252624511719,
"loss": 0.9384,
"margin_dpo/margin_mean": 1.8493754863739014,
"margin_dpo/margin_std": 2.4480319023132324,
"step": 542
},
{
"epoch": 0.8208616780045351,
"fcm_dpo/beta": 0.5707334280014038,
"fcm_dpo/delta": 0.13935577869415283,
"fcm_dpo/margin": 1.527040958404541,
"fcm_dpo/q_t": 0.33425813913345337,
"grad_norm": 109.5389175415039,
"learning_rate": 4.790130070827028e-08,
"logits/chosen": 0.1735750138759613,
"logits/rejected": 0.10716632753610611,
"logps/chosen": -53.960105895996094,
"logps/ref_chosen": -51.100860595703125,
"logps/ref_rejected": -76.06130981445312,
"logps/rejected": -80.44760131835938,
"loss": 0.9579,
"margin_dpo/margin_mean": 1.5270410776138306,
"margin_dpo/margin_std": 2.048778533935547,
"step": 543
},
{
"epoch": 0.8223733938019653,
"fcm_dpo/beta": 0.5578969717025757,
"fcm_dpo/delta": -0.18755921721458435,
"fcm_dpo/margin": 2.0900115966796875,
"fcm_dpo/q_t": 0.3165471851825714,
"grad_norm": 130.18556213378906,
"learning_rate": 4.7125818934366454e-08,
"logits/chosen": 0.14418599009513855,
"logits/rejected": 0.09046932309865952,
"logps/chosen": -63.256656646728516,
"logps/ref_chosen": -60.2772331237793,
"logps/ref_rejected": -88.40553283691406,
"logps/rejected": -93.4749755859375,
"loss": 0.9465,
"margin_dpo/margin_mean": 2.0900115966796875,
"margin_dpo/margin_std": 2.8377792835235596,
"step": 544
},
{
"epoch": 0.8238851095993953,
"fcm_dpo/beta": 0.5586047172546387,
"fcm_dpo/delta": 0.11241482198238373,
"fcm_dpo/margin": 1.607243299484253,
"fcm_dpo/q_t": 0.3465924859046936,
"grad_norm": 141.80880737304688,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": 0.1261473000049591,
"logits/rejected": 0.07930372655391693,
"logps/chosen": -64.5820541381836,
"logps/ref_chosen": -61.61524963378906,
"logps/ref_rejected": -78.71266174316406,
"logps/rejected": -83.28670501708984,
"loss": 1.0031,
"margin_dpo/margin_mean": 1.6072428226470947,
"margin_dpo/margin_std": 2.3972063064575195,
"step": 545
},
{
"epoch": 0.8253968253968254,
"fcm_dpo/beta": 0.5760623216629028,
"fcm_dpo/delta": 0.1294553279876709,
"fcm_dpo/margin": 1.5299469232559204,
"fcm_dpo/q_t": 0.3359874188899994,
"grad_norm": 148.13609313964844,
"learning_rate": 4.559190140057428e-08,
"logits/chosen": 0.17436569929122925,
"logits/rejected": 0.15731996297836304,
"logps/chosen": -62.12708282470703,
"logps/ref_chosen": -59.313262939453125,
"logps/ref_rejected": -64.73631286621094,
"logps/rejected": -69.080078125,
"loss": 1.0087,
"margin_dpo/margin_mean": 1.52994704246521,
"margin_dpo/margin_std": 2.212397813796997,
"step": 546
},
{
"epoch": 0.8269085411942555,
"fcm_dpo/beta": 0.5641611814498901,
"fcm_dpo/delta": -0.1594116985797882,
"fcm_dpo/margin": 2.0226144790649414,
"fcm_dpo/q_t": 0.2903903126716614,
"grad_norm": 107.82057189941406,
"learning_rate": 4.483350854765672e-08,
"logits/chosen": 0.1362178921699524,
"logits/rejected": 0.08240213245153427,
"logps/chosen": -57.526573181152344,
"logps/ref_chosen": -54.97674560546875,
"logps/ref_rejected": -75.35922241210938,
"logps/rejected": -79.9316635131836,
"loss": 0.8307,
"margin_dpo/margin_mean": 2.0226151943206787,
"margin_dpo/margin_std": 2.2596964836120605,
"step": 547
},
{
"epoch": 0.8284202569916855,
"fcm_dpo/beta": 0.5807280540466309,
"fcm_dpo/delta": 0.16467252373695374,
"fcm_dpo/margin": 1.4542980194091797,
"fcm_dpo/q_t": 0.3473031222820282,
"grad_norm": 137.8083038330078,
"learning_rate": 4.4080854642541826e-08,
"logits/chosen": 0.09831206500530243,
"logits/rejected": 0.04538644477725029,
"logps/chosen": -66.22586822509766,
"logps/ref_chosen": -63.21067428588867,
"logps/ref_rejected": -81.23347473144531,
"logps/rejected": -85.70297241210938,
"loss": 0.9561,
"margin_dpo/margin_mean": 1.4542980194091797,
"margin_dpo/margin_std": 1.9517710208892822,
"step": 548
},
{
"epoch": 0.8299319727891157,
"fcm_dpo/beta": 0.5972309708595276,
"fcm_dpo/delta": 0.16186922788619995,
"fcm_dpo/margin": 1.4225797653198242,
"fcm_dpo/q_t": 0.35338258743286133,
"grad_norm": 165.7979736328125,
"learning_rate": 4.333396073857723e-08,
"logits/chosen": 0.2513388395309448,
"logits/rejected": 0.2025221437215805,
"logps/chosen": -67.16740417480469,
"logps/ref_chosen": -64.27351379394531,
"logps/ref_rejected": -92.31663513183594,
"logps/rejected": -96.63310241699219,
"loss": 1.0582,
"margin_dpo/margin_mean": 1.4225800037384033,
"margin_dpo/margin_std": 2.26432466506958,
"step": 549
},
{
"epoch": 0.8314436885865457,
"fcm_dpo/beta": 0.6041165590286255,
"fcm_dpo/delta": 0.15158365666866302,
"fcm_dpo/margin": 1.4242266416549683,
"fcm_dpo/q_t": 0.3457239270210266,
"grad_norm": 118.26493072509766,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 0.17192748188972473,
"logits/rejected": 0.14534920454025269,
"logps/chosen": -59.13628387451172,
"logps/ref_chosen": -56.230438232421875,
"logps/ref_rejected": -62.59788513183594,
"logps/rejected": -66.92796325683594,
"loss": 1.0043,
"margin_dpo/margin_mean": 1.4242260456085205,
"margin_dpo/margin_std": 2.080674171447754,
"step": 550
},
{
"epoch": 0.8329554043839759,
"fcm_dpo/beta": 0.6244951486587524,
"fcm_dpo/delta": 0.035569630563259125,
"fcm_dpo/margin": 1.5473127365112305,
"fcm_dpo/q_t": 0.33668047189712524,
"grad_norm": 132.6877899169922,
"learning_rate": 4.1857536341307176e-08,
"logits/chosen": 0.17000985145568848,
"logits/rejected": 0.14413632452487946,
"logps/chosen": -70.88568878173828,
"logps/ref_chosen": -67.74720764160156,
"logps/ref_rejected": -87.04285430908203,
"logps/rejected": -91.72865295410156,
"loss": 0.9498,
"margin_dpo/margin_mean": 1.5473123788833618,
"margin_dpo/margin_std": 2.1508963108062744,
"step": 551
},
{
"epoch": 0.8344671201814059,
"fcm_dpo/beta": 0.6188427209854126,
"fcm_dpo/delta": -0.11111941188573837,
"fcm_dpo/margin": 1.7723690271377563,
"fcm_dpo/q_t": 0.2986357808113098,
"grad_norm": 139.71243286132812,
"learning_rate": 4.112804714676593e-08,
"logits/chosen": 0.14941178262233734,
"logits/rejected": 0.10768507421016693,
"logps/chosen": -65.74885559082031,
"logps/ref_chosen": -62.92625427246094,
"logps/ref_rejected": -82.98365783691406,
"logps/rejected": -87.57861328125,
"loss": 0.887,
"margin_dpo/margin_mean": 1.7723690271377563,
"margin_dpo/margin_std": 2.1054043769836426,
"step": 552
},
{
"epoch": 0.8359788359788359,
"fcm_dpo/beta": 0.5973865985870361,
"fcm_dpo/delta": -0.033724166452884674,
"fcm_dpo/margin": 1.7203757762908936,
"fcm_dpo/q_t": 0.3467303514480591,
"grad_norm": 141.0452880859375,
"learning_rate": 4.0404400549748144e-08,
"logits/chosen": 0.1345456838607788,
"logits/rejected": 0.05725400522351265,
"logps/chosen": -59.39005661010742,
"logps/ref_chosen": -56.038490295410156,
"logps/ref_rejected": -84.48454284667969,
"logps/rejected": -89.55648803710938,
"loss": 1.1217,
"margin_dpo/margin_mean": 1.720375657081604,
"margin_dpo/margin_std": 2.994457721710205,
"step": 553
},
{
"epoch": 0.8374905517762661,
"fcm_dpo/beta": 0.61543208360672,
"fcm_dpo/delta": 0.10628563910722733,
"fcm_dpo/margin": 1.4661433696746826,
"fcm_dpo/q_t": 0.3466363847255707,
"grad_norm": 153.4082489013672,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": 0.14405734837055206,
"logits/rejected": 0.12058596312999725,
"logps/chosen": -67.53119659423828,
"logps/ref_chosen": -64.53059387207031,
"logps/ref_rejected": -71.2155990600586,
"logps/rejected": -75.6823501586914,
"loss": 1.032,
"margin_dpo/margin_mean": 1.4661433696746826,
"margin_dpo/margin_std": 2.351094961166382,
"step": 554
},
{
"epoch": 0.8390022675736961,
"fcm_dpo/beta": 0.6489007472991943,
"fcm_dpo/delta": 0.20813655853271484,
"fcm_dpo/margin": 1.2255511283874512,
"fcm_dpo/q_t": 0.3576173484325409,
"grad_norm": 182.52911376953125,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": 0.15571804344654083,
"logits/rejected": 0.12670589983463287,
"logps/chosen": -70.10320281982422,
"logps/ref_chosen": -66.65191650390625,
"logps/ref_rejected": -68.6667251586914,
"logps/rejected": -73.34355163574219,
"loss": 1.1729,
"margin_dpo/margin_mean": 1.225550651550293,
"margin_dpo/margin_std": 2.238006591796875,
"step": 555
},
{
"epoch": 0.8405139833711263,
"fcm_dpo/beta": 0.6440955400466919,
"fcm_dpo/delta": 0.016805479303002357,
"fcm_dpo/margin": 1.5289926528930664,
"fcm_dpo/q_t": 0.34293437004089355,
"grad_norm": 150.5496063232422,
"learning_rate": 3.826871794280192e-08,
"logits/chosen": 0.17429228127002716,
"logits/rejected": 0.13041889667510986,
"logps/chosen": -56.17012405395508,
"logps/ref_chosen": -52.832366943359375,
"logps/ref_rejected": -64.49044036865234,
"logps/rejected": -69.35718536376953,
"loss": 1.0381,
"margin_dpo/margin_mean": 1.5289928913116455,
"margin_dpo/margin_std": 2.3977890014648438,
"step": 556
},
{
"epoch": 0.8420256991685563,
"fcm_dpo/beta": 0.5984865427017212,
"fcm_dpo/delta": -0.35813382267951965,
"fcm_dpo/margin": 2.1672024726867676,
"fcm_dpo/q_t": 0.2816886305809021,
"grad_norm": 126.752197265625,
"learning_rate": 3.756864251262143e-08,
"logits/chosen": 0.21231526136398315,
"logits/rejected": 0.1561964750289917,
"logps/chosen": -58.17803955078125,
"logps/ref_chosen": -55.03598403930664,
"logps/ref_rejected": -75.80644989013672,
"logps/rejected": -81.11570739746094,
"loss": 0.7731,
"margin_dpo/margin_mean": 2.1672027111053467,
"margin_dpo/margin_std": 2.3840556144714355,
"step": 557
},
{
"epoch": 0.8435374149659864,
"fcm_dpo/beta": 0.5904332399368286,
"fcm_dpo/delta": -0.06778506934642792,
"fcm_dpo/margin": 1.794862151145935,
"fcm_dpo/q_t": 0.32028087973594666,
"grad_norm": 146.46681213378906,
"learning_rate": 3.687450924416341e-08,
"logits/chosen": 0.22109848260879517,
"logits/rejected": 0.17496977746486664,
"logps/chosen": -66.06083679199219,
"logps/ref_chosen": -63.226348876953125,
"logps/ref_rejected": -91.46881866455078,
"logps/rejected": -96.09817504882812,
"loss": 0.9082,
"margin_dpo/margin_mean": 1.7948615550994873,
"margin_dpo/margin_std": 2.3338146209716797,
"step": 558
},
{
"epoch": 0.8450491307634165,
"fcm_dpo/beta": 0.5790784358978271,
"fcm_dpo/delta": -0.06187023967504501,
"fcm_dpo/margin": 1.8165602684020996,
"fcm_dpo/q_t": 0.33412617444992065,
"grad_norm": 138.7181854248047,
"learning_rate": 3.6186337553827743e-08,
"logits/chosen": 0.11689116060733795,
"logits/rejected": 0.06340146064758301,
"logps/chosen": -64.47853088378906,
"logps/ref_chosen": -61.521644592285156,
"logps/ref_rejected": -82.83859252929688,
"logps/rejected": -87.61204528808594,
"loss": 1.0151,
"margin_dpo/margin_mean": 1.8165605068206787,
"margin_dpo/margin_std": 2.6852447986602783,
"step": 559
},
{
"epoch": 0.8465608465608465,
"fcm_dpo/beta": 0.6011730432510376,
"fcm_dpo/delta": 0.08525849133729935,
"fcm_dpo/margin": 1.5243772268295288,
"fcm_dpo/q_t": 0.345813125371933,
"grad_norm": 146.03248596191406,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 0.16141119599342346,
"logits/rejected": 0.13059382140636444,
"logps/chosen": -63.782081604003906,
"logps/ref_chosen": -60.64122009277344,
"logps/ref_rejected": -78.75474548339844,
"logps/rejected": -83.41998291015625,
"loss": 1.0243,
"margin_dpo/margin_mean": 1.5243771076202393,
"margin_dpo/margin_std": 2.256977081298828,
"step": 560
},
{
"epoch": 0.8480725623582767,
"fcm_dpo/beta": 0.5956799983978271,
"fcm_dpo/delta": 0.0049747563898563385,
"fcm_dpo/margin": 1.6711280345916748,
"fcm_dpo/q_t": 0.3338923752307892,
"grad_norm": 122.3641128540039,
"learning_rate": 3.482795573879241e-08,
"logits/chosen": 0.15920129418373108,
"logits/rejected": 0.13256219029426575,
"logps/chosen": -65.13980102539062,
"logps/ref_chosen": -62.49859619140625,
"logps/ref_rejected": -78.72064208984375,
"logps/rejected": -83.03296661376953,
"loss": 0.954,
"margin_dpo/margin_mean": 1.671128273010254,
"margin_dpo/margin_std": 2.351602077484131,
"step": 561
},
{
"epoch": 0.8495842781557067,
"fcm_dpo/beta": 0.5753499269485474,
"fcm_dpo/delta": -0.1565413773059845,
"fcm_dpo/margin": 1.973330020904541,
"fcm_dpo/q_t": 0.3080880045890808,
"grad_norm": 128.77142333984375,
"learning_rate": 3.415778361095226e-08,
"logits/chosen": 0.20189374685287476,
"logits/rejected": 0.17044463753700256,
"logps/chosen": -77.8094482421875,
"logps/ref_chosen": -74.78173828125,
"logps/ref_rejected": -92.63499450683594,
"logps/rejected": -97.63603210449219,
"loss": 0.8933,
"margin_dpo/margin_mean": 1.9733293056488037,
"margin_dpo/margin_std": 2.4358203411102295,
"step": 562
},
{
"epoch": 0.8510959939531368,
"fcm_dpo/beta": 0.5790232419967651,
"fcm_dpo/delta": 0.03163836523890495,
"fcm_dpo/margin": 1.6777245998382568,
"fcm_dpo/q_t": 0.3400580883026123,
"grad_norm": 139.59011840820312,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": 0.2000666856765747,
"logits/rejected": 0.15850435197353363,
"logps/chosen": -53.24721145629883,
"logps/ref_chosen": -50.19850158691406,
"logps/ref_rejected": -66.76687622070312,
"logps/rejected": -71.4933090209961,
"loss": 1.0943,
"margin_dpo/margin_mean": 1.677725076675415,
"margin_dpo/margin_std": 2.819387912750244,
"step": 563
},
{
"epoch": 0.8526077097505669,
"fcm_dpo/beta": 0.5817031264305115,
"fcm_dpo/delta": -0.0036928579211235046,
"fcm_dpo/margin": 1.7244809865951538,
"fcm_dpo/q_t": 0.3220234513282776,
"grad_norm": 143.81246948242188,
"learning_rate": 3.283557064487785e-08,
"logits/chosen": 0.1359567940235138,
"logits/rejected": 0.10976006835699081,
"logps/chosen": -58.42416000366211,
"logps/ref_chosen": -55.7408447265625,
"logps/ref_rejected": -74.82323455810547,
"logps/rejected": -79.23103332519531,
"loss": 0.9548,
"margin_dpo/margin_mean": 1.7244811058044434,
"margin_dpo/margin_std": 2.30587100982666,
"step": 564
},
{
"epoch": 0.854119425547997,
"fcm_dpo/beta": 0.5901836156845093,
"fcm_dpo/delta": 0.04009624570608139,
"fcm_dpo/margin": 1.627647876739502,
"fcm_dpo/q_t": 0.32860618829727173,
"grad_norm": 144.67605590820312,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": 0.16707636415958405,
"logits/rejected": 0.12742547690868378,
"logps/chosen": -61.788658142089844,
"logps/ref_chosen": -58.33738327026367,
"logps/ref_rejected": -78.31776428222656,
"logps/rejected": -83.39668273925781,
"loss": 0.9857,
"margin_dpo/margin_mean": 1.627647876739502,
"margin_dpo/margin_std": 2.2914857864379883,
"step": 565
},
{
"epoch": 0.8556311413454271,
"fcm_dpo/beta": 0.6040189266204834,
"fcm_dpo/delta": 0.15839658677577972,
"fcm_dpo/margin": 1.4104197025299072,
"fcm_dpo/q_t": 0.3656797707080841,
"grad_norm": 174.7579345703125,
"learning_rate": 3.1537655732553764e-08,
"logits/chosen": 0.1992546021938324,
"logits/rejected": 0.1789197027683258,
"logps/chosen": -74.33102416992188,
"logps/ref_chosen": -71.22373962402344,
"logps/ref_rejected": -71.11601257324219,
"logps/rejected": -75.63371276855469,
"loss": 1.2269,
"margin_dpo/margin_mean": 1.4104200601577759,
"margin_dpo/margin_std": 2.8447115421295166,
"step": 566
},
{
"epoch": 0.8571428571428571,
"fcm_dpo/beta": 0.579740047454834,
"fcm_dpo/delta": -0.17305630445480347,
"fcm_dpo/margin": 1.978826880455017,
"fcm_dpo/q_t": 0.3130984902381897,
"grad_norm": 123.3370132446289,
"learning_rate": 3.089785553471233e-08,
"logits/chosen": 0.1904851794242859,
"logits/rejected": 0.11687320470809937,
"logps/chosen": -55.760459899902344,
"logps/ref_chosen": -52.669273376464844,
"logps/ref_rejected": -74.34785461425781,
"logps/rejected": -79.41786193847656,
"loss": 0.8807,
"margin_dpo/margin_mean": 1.9788269996643066,
"margin_dpo/margin_std": 2.6479549407958984,
"step": 567
},
{
"epoch": 0.8586545729402872,
"fcm_dpo/beta": 0.5765559673309326,
"fcm_dpo/delta": -0.18778733909130096,
"fcm_dpo/margin": 2.0179710388183594,
"fcm_dpo/q_t": 0.29743558168411255,
"grad_norm": 112.51110076904297,
"learning_rate": 3.026418409484513e-08,
"logits/chosen": 0.20747870206832886,
"logits/rejected": 0.14340844750404358,
"logps/chosen": -54.99408721923828,
"logps/ref_chosen": -52.178001403808594,
"logps/ref_rejected": -85.8277587890625,
"logps/rejected": -90.66182708740234,
"loss": 0.8043,
"margin_dpo/margin_mean": 2.017970561981201,
"margin_dpo/margin_std": 2.1505703926086426,
"step": 568
},
{
"epoch": 0.8601662887377173,
"fcm_dpo/beta": 0.5608316659927368,
"fcm_dpo/delta": 0.08226889371871948,
"fcm_dpo/margin": 1.6459429264068604,
"fcm_dpo/q_t": 0.32129180431365967,
"grad_norm": 118.24040222167969,
"learning_rate": 2.963665913810451e-08,
"logits/chosen": 0.10791079699993134,
"logits/rejected": 0.08581716567277908,
"logps/chosen": -65.56758117675781,
"logps/ref_chosen": -62.649261474609375,
"logps/ref_rejected": -75.4298324584961,
"logps/rejected": -79.99409484863281,
"loss": 0.9399,
"margin_dpo/margin_mean": 1.6459429264068604,
"margin_dpo/margin_std": 2.062713623046875,
"step": 569
},
{
"epoch": 0.8616780045351474,
"fcm_dpo/beta": 0.552453875541687,
"fcm_dpo/delta": -0.3056313693523407,
"fcm_dpo/margin": 2.29406476020813,
"fcm_dpo/q_t": 0.271054744720459,
"grad_norm": 103.41246795654297,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 0.12330185621976852,
"logits/rejected": 0.05149267241358757,
"logps/chosen": -52.63910675048828,
"logps/ref_chosen": -50.04179382324219,
"logps/ref_rejected": -78.27146911621094,
"logps/rejected": -83.162841796875,
"loss": 0.7286,
"margin_dpo/margin_mean": 2.294064521789551,
"margin_dpo/margin_std": 2.158341884613037,
"step": 570
},
{
"epoch": 0.8631897203325775,
"fcm_dpo/beta": 0.5561559200286865,
"fcm_dpo/delta": 0.2247203141450882,
"fcm_dpo/margin": 1.4249439239501953,
"fcm_dpo/q_t": 0.35770949721336365,
"grad_norm": 122.92935180664062,
"learning_rate": 2.840011871446962e-08,
"logits/chosen": 0.1378299742937088,
"logits/rejected": 0.10836352407932281,
"logps/chosen": -56.67723083496094,
"logps/ref_chosen": -53.65681457519531,
"logps/ref_rejected": -66.13298034667969,
"logps/rejected": -70.57833862304688,
"loss": 1.0635,
"margin_dpo/margin_mean": 1.4249444007873535,
"margin_dpo/margin_std": 2.3282651901245117,
"step": 571
},
{
"epoch": 0.8647014361300076,
"fcm_dpo/beta": 0.5793824195861816,
"fcm_dpo/delta": 0.19740980863571167,
"fcm_dpo/margin": 1.4121860265731812,
"fcm_dpo/q_t": 0.3518349528312683,
"grad_norm": 155.68394470214844,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": 0.18134906888008118,
"logits/rejected": 0.20107844471931458,
"logps/chosen": -77.97372436523438,
"logps/ref_chosen": -74.81792449951172,
"logps/ref_rejected": -65.88681030273438,
"logps/rejected": -70.45478820800781,
"loss": 1.0227,
"margin_dpo/margin_mean": 1.4121863842010498,
"margin_dpo/margin_std": 2.1890487670898438,
"step": 572
},
{
"epoch": 0.8662131519274376,
"fcm_dpo/beta": 0.5722247362136841,
"fcm_dpo/delta": -0.10332206636667252,
"fcm_dpo/margin": 1.9067519903182983,
"fcm_dpo/q_t": 0.3288191854953766,
"grad_norm": 141.57550048828125,
"learning_rate": 2.718837261761528e-08,
"logits/chosen": 0.16030460596084595,
"logits/rejected": 0.12534594535827637,
"logps/chosen": -71.95037078857422,
"logps/ref_chosen": -68.72564697265625,
"logps/ref_rejected": -88.16201782226562,
"logps/rejected": -93.29348754882812,
"loss": 1.0156,
"margin_dpo/margin_mean": 1.9067527055740356,
"margin_dpo/margin_std": 2.9433274269104004,
"step": 573
},
{
"epoch": 0.8677248677248677,
"fcm_dpo/beta": 0.5721937417984009,
"fcm_dpo/delta": -0.07496052980422974,
"fcm_dpo/margin": 1.8618470430374146,
"fcm_dpo/q_t": 0.3078283965587616,
"grad_norm": 114.2187271118164,
"learning_rate": 2.659183991914696e-08,
"logits/chosen": 0.2320360541343689,
"logits/rejected": 0.17432260513305664,
"logps/chosen": -59.44536209106445,
"logps/ref_chosen": -56.31340026855469,
"logps/ref_rejected": -83.91553497314453,
"logps/rejected": -88.90934753417969,
"loss": 0.8193,
"margin_dpo/margin_mean": 1.8618476390838623,
"margin_dpo/margin_std": 2.011713981628418,
"step": 574
},
{
"epoch": 0.8692365835222978,
"fcm_dpo/beta": 0.5716170072555542,
"fcm_dpo/delta": 0.18006719648838043,
"fcm_dpo/margin": 1.45901620388031,
"fcm_dpo/q_t": 0.3629787862300873,
"grad_norm": 139.16409301757812,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": 0.24198350310325623,
"logits/rejected": 0.18358173966407776,
"logps/chosen": -67.59683990478516,
"logps/ref_chosen": -64.5841293334961,
"logps/ref_rejected": -93.47034454345703,
"logps/rejected": -97.94207000732422,
"loss": 1.1015,
"margin_dpo/margin_mean": 1.4590164422988892,
"margin_dpo/margin_std": 2.4919562339782715,
"step": 575
},
{
"epoch": 0.8707482993197279,
"fcm_dpo/beta": 0.5859131813049316,
"fcm_dpo/delta": 0.05836522579193115,
"fcm_dpo/margin": 1.6160252094268799,
"fcm_dpo/q_t": 0.34040236473083496,
"grad_norm": 173.32106018066406,
"learning_rate": 2.5417538653170754e-08,
"logits/chosen": 0.18478666245937347,
"logits/rejected": 0.1120123565196991,
"logps/chosen": -55.915428161621094,
"logps/ref_chosen": -53.28052520751953,
"logps/ref_rejected": -84.2000503540039,
"logps/rejected": -88.45097351074219,
"loss": 1.0198,
"margin_dpo/margin_mean": 1.6160247325897217,
"margin_dpo/margin_std": 2.3769047260284424,
"step": 576
},
{
"epoch": 0.872260015117158,
"fcm_dpo/beta": 0.6100517511367798,
"fcm_dpo/delta": 0.2396540343761444,
"fcm_dpo/margin": 1.2779208421707153,
"fcm_dpo/q_t": 0.36663320660591125,
"grad_norm": 141.51870727539062,
"learning_rate": 2.4839802933393607e-08,
"logits/chosen": 0.15433327853679657,
"logits/rejected": 0.13624653220176697,
"logps/chosen": -65.23735046386719,
"logps/ref_chosen": -62.32468795776367,
"logps/ref_rejected": -67.300537109375,
"logps/rejected": -71.49111938476562,
"loss": 1.1174,
"margin_dpo/margin_mean": 1.2779215574264526,
"margin_dpo/margin_std": 2.3165364265441895,
"step": 577
},
{
"epoch": 0.873771730914588,
"fcm_dpo/beta": 0.6398091316223145,
"fcm_dpo/delta": 0.2764911949634552,
"fcm_dpo/margin": 1.1640191078186035,
"fcm_dpo/q_t": 0.3820345997810364,
"grad_norm": 158.93179321289062,
"learning_rate": 2.4268365428344733e-08,
"logits/chosen": 0.20606505870819092,
"logits/rejected": 0.1820131242275238,
"logps/chosen": -59.829124450683594,
"logps/ref_chosen": -56.65557861328125,
"logps/ref_rejected": -68.21835327148438,
"logps/rejected": -72.55591583251953,
"loss": 1.1219,
"margin_dpo/margin_mean": 1.1640193462371826,
"margin_dpo/margin_std": 2.143388271331787,
"step": 578
},
{
"epoch": 0.8752834467120182,
"fcm_dpo/beta": 0.6147720217704773,
"fcm_dpo/delta": -0.3748418092727661,
"fcm_dpo/margin": 2.1486082077026367,
"fcm_dpo/q_t": 0.2804148197174072,
"grad_norm": 129.94529724121094,
"learning_rate": 2.3703242122359357e-08,
"logits/chosen": 0.1448242962360382,
"logits/rejected": 0.1141422688961029,
"logps/chosen": -59.766502380371094,
"logps/ref_chosen": -56.809661865234375,
"logps/ref_rejected": -68.09613037109375,
"logps/rejected": -73.20157623291016,
"loss": 0.779,
"margin_dpo/margin_mean": 2.1486077308654785,
"margin_dpo/margin_std": 2.415778636932373,
"step": 579
},
{
"epoch": 0.8767951625094482,
"fcm_dpo/beta": 0.6254961490631104,
"fcm_dpo/delta": 0.14091333746910095,
"fcm_dpo/margin": 1.389689326286316,
"fcm_dpo/q_t": 0.3568766117095947,
"grad_norm": 135.1801300048828,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 0.1448507308959961,
"logits/rejected": 0.10793846845626831,
"logps/chosen": -60.63453674316406,
"logps/ref_chosen": -57.70011520385742,
"logps/ref_rejected": -77.90664672851562,
"logps/rejected": -82.23075866699219,
"loss": 1.1175,
"margin_dpo/margin_mean": 1.3896892070770264,
"margin_dpo/margin_std": 2.38545823097229,
"step": 580
},
{
"epoch": 0.8783068783068783,
"fcm_dpo/beta": 0.6277990341186523,
"fcm_dpo/delta": 0.06906095892190933,
"fcm_dpo/margin": 1.492587924003601,
"fcm_dpo/q_t": 0.3425326943397522,
"grad_norm": 166.3179473876953,
"learning_rate": 2.259200116137039e-08,
"logits/chosen": 0.22638601064682007,
"logits/rejected": 0.17741230130195618,
"logps/chosen": -62.56743240356445,
"logps/ref_chosen": -59.332359313964844,
"logps/ref_rejected": -83.64482116699219,
"logps/rejected": -88.37248229980469,
"loss": 1.0185,
"margin_dpo/margin_mean": 1.4925878047943115,
"margin_dpo/margin_std": 2.3351616859436035,
"step": 581
},
{
"epoch": 0.8798185941043084,
"fcm_dpo/beta": 0.6253724098205566,
"fcm_dpo/delta": -0.07393016666173935,
"fcm_dpo/margin": 1.7041115760803223,
"fcm_dpo/q_t": 0.31156644225120544,
"grad_norm": 143.69277954101562,
"learning_rate": 2.204591459016525e-08,
"logits/chosen": 0.1830708086490631,
"logits/rejected": 0.19862952828407288,
"logps/chosen": -66.9874267578125,
"logps/ref_chosen": -64.16285705566406,
"logps/ref_rejected": -58.632896423339844,
"logps/rejected": -63.161582946777344,
"loss": 0.8907,
"margin_dpo/margin_mean": 1.7041112184524536,
"margin_dpo/margin_std": 2.1104648113250732,
"step": 582
},
{
"epoch": 0.8813303099017384,
"fcm_dpo/beta": 0.6326082944869995,
"fcm_dpo/delta": 0.08823379874229431,
"fcm_dpo/margin": 1.4541677236557007,
"fcm_dpo/q_t": 0.35390377044677734,
"grad_norm": 158.56170654296875,
"learning_rate": 2.1506204384751064e-08,
"logits/chosen": 0.27797916531562805,
"logits/rejected": 0.20349468290805817,
"logps/chosen": -54.86329650878906,
"logps/ref_chosen": -51.87239456176758,
"logps/ref_rejected": -83.86331176757812,
"logps/rejected": -88.30838012695312,
"loss": 1.1301,
"margin_dpo/margin_mean": 1.4541676044464111,
"margin_dpo/margin_std": 2.6575491428375244,
"step": 583
},
{
"epoch": 0.8828420256991686,
"fcm_dpo/beta": 0.6200550198554993,
"fcm_dpo/delta": -0.1539473831653595,
"fcm_dpo/margin": 1.8320786952972412,
"fcm_dpo/q_t": 0.32225194573402405,
"grad_norm": 134.9540252685547,
"learning_rate": 2.09728856419826e-08,
"logits/chosen": 0.23410704731941223,
"logits/rejected": 0.15715327858924866,
"logps/chosen": -49.28221130371094,
"logps/ref_chosen": -46.571388244628906,
"logps/ref_rejected": -80.67969512939453,
"logps/rejected": -85.22259521484375,
"loss": 0.9929,
"margin_dpo/margin_mean": 1.8320791721343994,
"margin_dpo/margin_std": 2.6820805072784424,
"step": 584
},
{
"epoch": 0.8843537414965986,
"fcm_dpo/beta": 0.6162490844726562,
"fcm_dpo/delta": 0.03570966795086861,
"fcm_dpo/margin": 1.5696362257003784,
"fcm_dpo/q_t": 0.3259866535663605,
"grad_norm": 138.78614807128906,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": 0.16352935135364532,
"logits/rejected": 0.13112275302410126,
"logps/chosen": -60.831146240234375,
"logps/ref_chosen": -58.124534606933594,
"logps/ref_rejected": -79.00538635253906,
"logps/rejected": -83.28163146972656,
"loss": 0.9773,
"margin_dpo/margin_mean": 1.5696361064910889,
"margin_dpo/margin_std": 2.2082860469818115,
"step": 585
},
{
"epoch": 0.8858654572940288,
"fcm_dpo/beta": 0.602626621723175,
"fcm_dpo/delta": -0.15734781324863434,
"fcm_dpo/margin": 1.8877205848693848,
"fcm_dpo/q_t": 0.30060410499572754,
"grad_norm": 121.3906021118164,
"learning_rate": 1.9925482037469187e-08,
"logits/chosen": 0.16395865380764008,
"logits/rejected": 0.12222093343734741,
"logps/chosen": -57.20876693725586,
"logps/ref_chosen": -54.10163879394531,
"logps/ref_rejected": -63.72113037109375,
"logps/rejected": -68.71598052978516,
"loss": 0.8797,
"margin_dpo/margin_mean": 1.8877204656600952,
"margin_dpo/margin_std": 2.3431496620178223,
"step": 586
},
{
"epoch": 0.8873771730914588,
"fcm_dpo/beta": 0.6007733941078186,
"fcm_dpo/delta": 0.015819646418094635,
"fcm_dpo/margin": 1.6403778791427612,
"fcm_dpo/q_t": 0.3280307352542877,
"grad_norm": 161.0563507080078,
"learning_rate": 1.9411426473854687e-08,
"logits/chosen": 0.20693828165531158,
"logits/rejected": 0.18662090599536896,
"logps/chosen": -66.15540313720703,
"logps/ref_chosen": -63.41719436645508,
"logps/ref_rejected": -63.47003936767578,
"logps/rejected": -67.84861755371094,
"loss": 1.0373,
"margin_dpo/margin_mean": 1.6403785943984985,
"margin_dpo/margin_std": 2.5440006256103516,
"step": 587
},
{
"epoch": 0.8888888888888888,
"fcm_dpo/beta": 0.5988498330116272,
"fcm_dpo/delta": -0.019005782902240753,
"fcm_dpo/margin": 1.6973689794540405,
"fcm_dpo/q_t": 0.3193732500076294,
"grad_norm": 139.44850158691406,
"learning_rate": 1.890382096832699e-08,
"logits/chosen": 0.2137565314769745,
"logits/rejected": 0.17403748631477356,
"logps/chosen": -65.03866577148438,
"logps/ref_chosen": -62.20103454589844,
"logps/ref_rejected": -82.10249328613281,
"logps/rejected": -86.63749694824219,
"loss": 0.9558,
"margin_dpo/margin_mean": 1.697368860244751,
"margin_dpo/margin_std": 2.3547918796539307,
"step": 588
},
{
"epoch": 0.890400604686319,
"fcm_dpo/beta": 0.5705356597900391,
"fcm_dpo/delta": -0.3349605202674866,
"fcm_dpo/margin": 2.2617807388305664,
"fcm_dpo/q_t": 0.2769893407821655,
"grad_norm": 125.83250427246094,
"learning_rate": 1.840267971970344e-08,
"logits/chosen": 0.16948330402374268,
"logits/rejected": 0.14072127640247345,
"logps/chosen": -59.358924865722656,
"logps/ref_chosen": -56.71361541748047,
"logps/ref_rejected": -76.7366943359375,
"logps/rejected": -81.64378356933594,
"loss": 0.7735,
"margin_dpo/margin_mean": 2.2617812156677246,
"margin_dpo/margin_std": 2.493180274963379,
"step": 589
},
{
"epoch": 0.891912320483749,
"fcm_dpo/beta": 0.5450751781463623,
"fcm_dpo/delta": -0.2230319082736969,
"fcm_dpo/margin": 2.1952528953552246,
"fcm_dpo/q_t": 0.2884211540222168,
"grad_norm": 120.71878814697266,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 0.12301607429981232,
"logits/rejected": 0.09767352789640427,
"logps/chosen": -69.55258178710938,
"logps/ref_chosen": -66.5138168334961,
"logps/ref_rejected": -85.70820617675781,
"logps/rejected": -90.94223022460938,
"loss": 0.8243,
"margin_dpo/margin_mean": 2.1952528953552246,
"margin_dpo/margin_std": 2.4201831817626953,
"step": 590
},
{
"epoch": 0.8934240362811792,
"fcm_dpo/beta": 0.5360534191131592,
"fcm_dpo/delta": 0.06881964951753616,
"fcm_dpo/margin": 1.7440357208251953,
"fcm_dpo/q_t": 0.3370535969734192,
"grad_norm": 125.32640838623047,
"learning_rate": 1.7419845883949098e-08,
"logits/chosen": 0.2466006577014923,
"logits/rejected": 0.1991560012102127,
"logps/chosen": -63.56512451171875,
"logps/ref_chosen": -60.697181701660156,
"logps/ref_rejected": -86.12278747558594,
"logps/rejected": -90.73477172851562,
"loss": 0.9852,
"margin_dpo/margin_mean": 1.7440353631973267,
"margin_dpo/margin_std": 2.4317829608917236,
"step": 591
},
{
"epoch": 0.8949357520786092,
"fcm_dpo/beta": 0.5456318855285645,
"fcm_dpo/delta": 0.048897288739681244,
"fcm_dpo/margin": 1.750661849975586,
"fcm_dpo/q_t": 0.3427332043647766,
"grad_norm": 132.4198760986328,
"learning_rate": 1.6938180788793556e-08,
"logits/chosen": 0.18844714760780334,
"logits/rejected": 0.10343804955482483,
"logps/chosen": -54.16868591308594,
"logps/ref_chosen": -51.237327575683594,
"logps/ref_rejected": -81.60242462158203,
"logps/rejected": -86.2844467163086,
"loss": 0.9699,
"margin_dpo/margin_mean": 1.750661849975586,
"margin_dpo/margin_std": 2.502837657928467,
"step": 592
},
{
"epoch": 0.8964474678760394,
"fcm_dpo/beta": 0.5585802793502808,
"fcm_dpo/delta": 0.09174495190382004,
"fcm_dpo/margin": 1.640941858291626,
"fcm_dpo/q_t": 0.3366505205631256,
"grad_norm": 122.52076721191406,
"learning_rate": 1.6463034933723336e-08,
"logits/chosen": 0.13056322932243347,
"logits/rejected": 0.061800211668014526,
"logps/chosen": -44.626976013183594,
"logps/ref_chosen": -42.08000183105469,
"logps/ref_rejected": -68.47499084472656,
"logps/rejected": -72.66290283203125,
"loss": 1.0262,
"margin_dpo/margin_mean": 1.640941858291626,
"margin_dpo/margin_std": 2.4754600524902344,
"step": 593
},
{
"epoch": 0.8979591836734694,
"fcm_dpo/beta": 0.5791330337524414,
"fcm_dpo/delta": 0.23276039958000183,
"fcm_dpo/margin": 1.3571163415908813,
"fcm_dpo/q_t": 0.36690980195999146,
"grad_norm": 131.3136749267578,
"learning_rate": 1.5994421609589385e-08,
"logits/chosen": 0.12303361296653748,
"logits/rejected": 0.10529161989688873,
"logps/chosen": -66.68218994140625,
"logps/ref_chosen": -63.658668518066406,
"logps/ref_rejected": -70.35597229003906,
"logps/rejected": -74.73660278320312,
"loss": 1.0883,
"margin_dpo/margin_mean": 1.3571163415908813,
"margin_dpo/margin_std": 2.378408193588257,
"step": 594
},
{
"epoch": 0.8994708994708994,
"fcm_dpo/beta": 0.5620189905166626,
"fcm_dpo/delta": -0.30020615458488464,
"fcm_dpo/margin": 2.245738983154297,
"fcm_dpo/q_t": 0.2832089960575104,
"grad_norm": 123.73275756835938,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": 0.2004849910736084,
"logits/rejected": 0.12713924050331116,
"logps/chosen": -59.19725799560547,
"logps/ref_chosen": -56.21875762939453,
"logps/ref_rejected": -83.95773315429688,
"logps/rejected": -89.18197631835938,
"loss": 0.8647,
"margin_dpo/margin_mean": 2.245739459991455,
"margin_dpo/margin_std": 2.714977741241455,
"step": 595
},
{
"epoch": 0.9009826152683296,
"fcm_dpo/beta": 0.5812000036239624,
"fcm_dpo/delta": 0.33480995893478394,
"fcm_dpo/margin": 1.184531569480896,
"fcm_dpo/q_t": 0.3911210000514984,
"grad_norm": 142.58895874023438,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": 0.11143651604652405,
"logits/rejected": 0.11058718711137772,
"logps/chosen": -71.51290130615234,
"logps/ref_chosen": -68.48088073730469,
"logps/ref_rejected": -61.732967376708984,
"logps/rejected": -65.94952392578125,
"loss": 1.1753,
"margin_dpo/margin_mean": 1.1845312118530273,
"margin_dpo/margin_std": 2.4391579627990723,
"step": 596
},
{
"epoch": 0.9024943310657596,
"fcm_dpo/beta": 0.5947533249855042,
"fcm_dpo/delta": 0.0645606741309166,
"fcm_dpo/margin": 1.5828521251678467,
"fcm_dpo/q_t": 0.3390669822692871,
"grad_norm": 118.60302734375,
"learning_rate": 1.4627906988186111e-08,
"logits/chosen": 0.1252235770225525,
"logits/rejected": 0.09921297430992126,
"logps/chosen": -51.60657501220703,
"logps/ref_chosen": -48.85750961303711,
"logps/ref_rejected": -55.068084716796875,
"logps/rejected": -59.400001525878906,
"loss": 0.963,
"margin_dpo/margin_mean": 1.5828520059585571,
"margin_dpo/margin_std": 2.2610087394714355,
"step": 597
},
{
"epoch": 0.9040060468631897,
"fcm_dpo/beta": 0.6207314729690552,
"fcm_dpo/delta": 0.23146888613700867,
"fcm_dpo/margin": 1.2671819925308228,
"fcm_dpo/q_t": 0.38011521100997925,
"grad_norm": 151.75205993652344,
"learning_rate": 1.4185553036259095e-08,
"logits/chosen": 0.1728542447090149,
"logits/rejected": 0.11698366701602936,
"logps/chosen": -62.22666931152344,
"logps/ref_chosen": -58.88715362548828,
"logps/ref_rejected": -81.43145751953125,
"logps/rejected": -86.03816223144531,
"loss": 1.1674,
"margin_dpo/margin_mean": 1.2671819925308228,
"margin_dpo/margin_std": 2.4872426986694336,
"step": 598
},
{
"epoch": 0.9055177626606198,
"fcm_dpo/beta": 0.6450425386428833,
"fcm_dpo/delta": 0.09236402064561844,
"fcm_dpo/margin": 1.41303288936615,
"fcm_dpo/q_t": 0.3642594814300537,
"grad_norm": 166.78338623046875,
"learning_rate": 1.3749795321332885e-08,
"logits/chosen": 0.280083566904068,
"logits/rejected": 0.24155710637569427,
"logps/chosen": -60.974185943603516,
"logps/ref_chosen": -57.60719299316406,
"logps/ref_rejected": -71.80469512939453,
"logps/rejected": -76.584716796875,
"loss": 1.1191,
"margin_dpo/margin_mean": 1.4130332469940186,
"margin_dpo/margin_std": 2.563547134399414,
"step": 599
},
{
"epoch": 0.9070294784580499,
"fcm_dpo/beta": 0.6648072004318237,
"fcm_dpo/delta": 0.13435859978199005,
"fcm_dpo/margin": 1.3058445453643799,
"fcm_dpo/q_t": 0.35756915807724,
"grad_norm": 156.00218200683594,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 0.2286624014377594,
"logits/rejected": 0.19494396448135376,
"logps/chosen": -61.69401550292969,
"logps/ref_chosen": -58.44231414794922,
"logps/ref_rejected": -83.64639282226562,
"logps/rejected": -88.20393371582031,
"loss": 1.0999,
"margin_dpo/margin_mean": 1.3058440685272217,
"margin_dpo/margin_std": 2.190995693206787,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_fcm_dpo/beta": 0.6568994522094727,
"eval_logits/chosen": 0.19621142745018005,
"eval_logits/rejected": 0.15854774415493011,
"eval_logps/chosen": -77.82337951660156,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -83.91117095947266,
"eval_loss": 0.5679579377174377,
"eval_margin_dpo/margin_mean": 1.3982763290405273,
"eval_margin_dpo/margin_std": 2.4811148643493652,
"eval_runtime": 37.9915,
"eval_samples_per_second": 60.619,
"eval_steps_per_second": 1.895,
"step": 600
},
{
"epoch": 0.90854119425548,
"fcm_dpo/beta": 0.6349242925643921,
"fcm_dpo/delta": -0.1545478105545044,
"fcm_dpo/margin": 1.7845826148986816,
"fcm_dpo/q_t": 0.3199031949043274,
"grad_norm": 141.2012176513672,
"learning_rate": 1.2898117173950868e-08,
"logits/chosen": 0.18726052343845367,
"logits/rejected": 0.13292908668518066,
"logps/chosen": -58.19509506225586,
"logps/ref_chosen": -55.59432601928711,
"logps/ref_rejected": -83.68630981445312,
"logps/rejected": -88.07167053222656,
"loss": 0.9784,
"margin_dpo/margin_mean": 1.7845829725265503,
"margin_dpo/margin_std": 2.5417721271514893,
"step": 601
},
{
"epoch": 0.91005291005291,
"fcm_dpo/beta": 0.6274067759513855,
"fcm_dpo/delta": -0.15826797485351562,
"fcm_dpo/margin": 1.8172000646591187,
"fcm_dpo/q_t": 0.3086685538291931,
"grad_norm": 139.8549346923828,
"learning_rate": 1.2482220564763667e-08,
"logits/chosen": 0.15303432941436768,
"logits/rejected": 0.11989377439022064,
"logps/chosen": -58.714149475097656,
"logps/ref_chosen": -56.349185943603516,
"logps/ref_rejected": -71.9959716796875,
"logps/rejected": -76.17813110351562,
"loss": 0.887,
"margin_dpo/margin_mean": 1.81719970703125,
"margin_dpo/margin_std": 2.2619805335998535,
"step": 602
},
{
"epoch": 0.9115646258503401,
"fcm_dpo/beta": 0.6145593523979187,
"fcm_dpo/delta": 0.003954831510782242,
"fcm_dpo/margin": 1.620744228363037,
"fcm_dpo/q_t": 0.32022157311439514,
"grad_norm": 121.14891052246094,
"learning_rate": 1.2072967838448051e-08,
"logits/chosen": 0.13416381180286407,
"logits/rejected": 0.08777206391096115,
"logps/chosen": -55.47536849975586,
"logps/ref_chosen": -53.16838836669922,
"logps/ref_rejected": -73.8604736328125,
"logps/rejected": -77.78820037841797,
"loss": 0.9039,
"margin_dpo/margin_mean": 1.620744228363037,
"margin_dpo/margin_std": 2.0299124717712402,
"step": 603
},
{
"epoch": 0.9130763416477702,
"fcm_dpo/beta": 0.6099786162376404,
"fcm_dpo/delta": -0.06591600924730301,
"fcm_dpo/margin": 1.7355551719665527,
"fcm_dpo/q_t": 0.3229469954967499,
"grad_norm": 138.081298828125,
"learning_rate": 1.1670370442682459e-08,
"logits/chosen": 0.1237378865480423,
"logits/rejected": 0.11915041506290436,
"logps/chosen": -75.28321838378906,
"logps/ref_chosen": -72.64942169189453,
"logps/ref_rejected": -69.8792724609375,
"logps/rejected": -74.24861907958984,
"loss": 0.9893,
"margin_dpo/margin_mean": 1.7355563640594482,
"margin_dpo/margin_std": 2.5649847984313965,
"step": 604
},
{
"epoch": 0.9145880574452003,
"fcm_dpo/beta": 0.6022388339042664,
"fcm_dpo/delta": -0.05076969414949417,
"fcm_dpo/margin": 1.7350056171417236,
"fcm_dpo/q_t": 0.3227270543575287,
"grad_norm": 143.35333251953125,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": 0.22395864129066467,
"logits/rejected": 0.17329740524291992,
"logps/chosen": -64.78287506103516,
"logps/ref_chosen": -61.61284637451172,
"logps/ref_rejected": -79.34398651123047,
"logps/rejected": -84.24903106689453,
"loss": 0.9715,
"margin_dpo/margin_mean": 1.7350056171417236,
"margin_dpo/margin_std": 2.434452772140503,
"step": 605
},
{
"epoch": 0.9160997732426304,
"fcm_dpo/beta": 0.6224014759063721,
"fcm_dpo/delta": 0.12130826711654663,
"fcm_dpo/margin": 1.4212950468063354,
"fcm_dpo/q_t": 0.35074251890182495,
"grad_norm": 151.14439392089844,
"learning_rate": 1.0885186502381016e-08,
"logits/chosen": 0.19038084149360657,
"logits/rejected": 0.14078065752983093,
"logps/chosen": -57.25608825683594,
"logps/ref_chosen": -54.46424102783203,
"logps/ref_rejected": -79.62708282470703,
"logps/rejected": -83.84022521972656,
"loss": 1.0273,
"margin_dpo/margin_mean": 1.421295404434204,
"margin_dpo/margin_std": 2.172295331954956,
"step": 606
},
{
"epoch": 0.9176114890400605,
"fcm_dpo/beta": 0.5959450006484985,
"fcm_dpo/delta": -0.13134704530239105,
"fcm_dpo/margin": 1.8611319065093994,
"fcm_dpo/q_t": 0.31175029277801514,
"grad_norm": 141.19546508789062,
"learning_rate": 1.0502621921127774e-08,
"logits/chosen": 0.14506211876869202,
"logits/rejected": 0.1156059056520462,
"logps/chosen": -65.67162322998047,
"logps/ref_chosen": -62.86086654663086,
"logps/ref_rejected": -72.5501937866211,
"logps/rejected": -77.22207641601562,
"loss": 0.9338,
"margin_dpo/margin_mean": 1.8611321449279785,
"margin_dpo/margin_std": 2.463554859161377,
"step": 607
},
{
"epoch": 0.9191232048374905,
"fcm_dpo/beta": 0.6229555606842041,
"fcm_dpo/delta": 0.24546337127685547,
"fcm_dpo/margin": 1.2424054145812988,
"fcm_dpo/q_t": 0.36555221676826477,
"grad_norm": 181.87330627441406,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": 0.16855892539024353,
"logits/rejected": 0.12452598661184311,
"logps/chosen": -66.26856994628906,
"logps/ref_chosen": -63.18071746826172,
"logps/ref_rejected": -99.15888214111328,
"logps/rejected": -103.48914337158203,
"loss": 1.0982,
"margin_dpo/margin_mean": 1.2424057722091675,
"margin_dpo/margin_std": 2.158616542816162,
"step": 608
},
{
"epoch": 0.9206349206349206,
"fcm_dpo/beta": 0.600549578666687,
"fcm_dpo/delta": -0.21868403255939484,
"fcm_dpo/margin": 1.9679713249206543,
"fcm_dpo/q_t": 0.2971384525299072,
"grad_norm": 103.79540252685547,
"learning_rate": 9.757601041885694e-09,
"logits/chosen": 0.21697968244552612,
"logits/rejected": 0.18763911724090576,
"logps/chosen": -51.205020904541016,
"logps/ref_chosen": -48.62322235107422,
"logps/ref_rejected": -68.28271484375,
"logps/rejected": -72.83248901367188,
"loss": 0.8665,
"margin_dpo/margin_mean": 1.9679714441299438,
"margin_dpo/margin_std": 2.3338661193847656,
"step": 609
},
{
"epoch": 0.9221466364323507,
"fcm_dpo/beta": 0.6042653322219849,
"fcm_dpo/delta": -0.03405376523733139,
"fcm_dpo/margin": 1.7052839994430542,
"fcm_dpo/q_t": 0.3533821403980255,
"grad_norm": 194.40943908691406,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 0.1579647958278656,
"logits/rejected": 0.1604270040988922,
"logps/chosen": -75.84585571289062,
"logps/ref_chosen": -72.66513061523438,
"logps/ref_rejected": -87.15310668945312,
"logps/rejected": -92.03912353515625,
"loss": 1.1081,
"margin_dpo/margin_mean": 1.7052838802337646,
"margin_dpo/margin_std": 3.0106663703918457,
"step": 610
},
{
"epoch": 0.9236583522297808,
"fcm_dpo/beta": 0.6121885776519775,
"fcm_dpo/delta": 0.25364020466804504,
"fcm_dpo/margin": 1.2458240985870361,
"fcm_dpo/q_t": 0.36857932806015015,
"grad_norm": 133.9265899658203,
"learning_rate": 9.03946036001449e-09,
"logits/chosen": 0.2268466055393219,
"logits/rejected": 0.1877264380455017,
"logps/chosen": -51.286346435546875,
"logps/ref_chosen": -48.30857849121094,
"logps/ref_rejected": -70.6141128540039,
"logps/rejected": -74.83771514892578,
"loss": 1.0992,
"margin_dpo/margin_mean": 1.2458235025405884,
"margin_dpo/margin_std": 2.115691661834717,
"step": 611
},
{
"epoch": 0.9251700680272109,
"fcm_dpo/beta": 0.6135225296020508,
"fcm_dpo/delta": -0.24519576132297516,
"fcm_dpo/margin": 1.9819927215576172,
"fcm_dpo/q_t": 0.30337560176849365,
"grad_norm": 141.30982971191406,
"learning_rate": 8.690495320571839e-09,
"logits/chosen": 0.10801693797111511,
"logits/rejected": 0.059915874153375626,
"logps/chosen": -64.04043579101562,
"logps/ref_chosen": -61.23155975341797,
"logps/ref_rejected": -94.37979888916016,
"logps/rejected": -99.17066955566406,
"loss": 0.9409,
"margin_dpo/margin_mean": 1.9819923639297485,
"margin_dpo/margin_std": 2.702681064605713,
"step": 612
},
{
"epoch": 0.926681783824641,
"fcm_dpo/beta": 0.5761626958847046,
"fcm_dpo/delta": -0.3495950698852539,
"fcm_dpo/margin": 2.265277862548828,
"fcm_dpo/q_t": 0.28574904799461365,
"grad_norm": 127.05760955810547,
"learning_rate": 8.348280226706722e-09,
"logits/chosen": 0.11358965933322906,
"logits/rejected": 0.09932907670736313,
"logps/chosen": -56.594276428222656,
"logps/ref_chosen": -53.98310852050781,
"logps/ref_rejected": -58.32208251953125,
"logps/rejected": -63.19852828979492,
"loss": 0.8489,
"margin_dpo/margin_mean": 2.2652783393859863,
"margin_dpo/margin_std": 2.772947072982788,
"step": 613
},
{
"epoch": 0.9281934996220711,
"fcm_dpo/beta": 0.569689929485321,
"fcm_dpo/delta": 0.031611911952495575,
"fcm_dpo/margin": 1.7016217708587646,
"fcm_dpo/q_t": 0.3136371970176697,
"grad_norm": 121.61019134521484,
"learning_rate": 8.012824650910937e-09,
"logits/chosen": 0.2070479393005371,
"logits/rejected": 0.1917956918478012,
"logps/chosen": -63.198665618896484,
"logps/ref_chosen": -60.24303436279297,
"logps/ref_rejected": -72.26258850097656,
"logps/rejected": -76.91984558105469,
"loss": 0.8664,
"margin_dpo/margin_mean": 1.701621174812317,
"margin_dpo/margin_std": 1.9565538167953491,
"step": 614
},
{
"epoch": 0.9297052154195011,
"fcm_dpo/beta": 0.5607982873916626,
"fcm_dpo/delta": -0.03319869935512543,
"fcm_dpo/margin": 1.835862398147583,
"fcm_dpo/q_t": 0.329486608505249,
"grad_norm": 150.09515380859375,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": 0.1885606348514557,
"logits/rejected": 0.15545931458473206,
"logps/chosen": -75.15232849121094,
"logps/ref_chosen": -72.09467315673828,
"logps/ref_rejected": -104.02980041503906,
"logps/rejected": -108.92332458496094,
"loss": 0.9693,
"margin_dpo/margin_mean": 1.8358616828918457,
"margin_dpo/margin_std": 2.62037992477417,
"step": 615
},
{
"epoch": 0.9312169312169312,
"fcm_dpo/beta": 0.5787394046783447,
"fcm_dpo/delta": 0.22798365354537964,
"fcm_dpo/margin": 1.3659520149230957,
"fcm_dpo/q_t": 0.36905261874198914,
"grad_norm": 138.57786560058594,
"learning_rate": 7.36222939784098e-09,
"logits/chosen": 0.17158043384552002,
"logits/rejected": 0.1132676973938942,
"logps/chosen": -61.71527862548828,
"logps/ref_chosen": -58.530723571777344,
"logps/ref_rejected": -75.48025512695312,
"logps/rejected": -80.03076171875,
"loss": 1.0819,
"margin_dpo/margin_mean": 1.365952491760254,
"margin_dpo/margin_std": 2.3640403747558594,
"step": 616
},
{
"epoch": 0.9327286470143613,
"fcm_dpo/beta": 0.5880202651023865,
"fcm_dpo/delta": 0.12598100304603577,
"fcm_dpo/margin": 1.4980671405792236,
"fcm_dpo/q_t": 0.35036730766296387,
"grad_norm": 136.1953887939453,
"learning_rate": 7.047107919114586e-09,
"logits/chosen": 0.1694568246603012,
"logits/rejected": 0.1344485729932785,
"logps/chosen": -60.68329620361328,
"logps/ref_chosen": -57.608673095703125,
"logps/ref_rejected": -81.22109985351562,
"logps/rejected": -85.79379272460938,
"loss": 1.0066,
"margin_dpo/margin_mean": 1.4980677366256714,
"margin_dpo/margin_std": 2.2009902000427246,
"step": 617
},
{
"epoch": 0.9342403628117913,
"fcm_dpo/beta": 0.5951837301254272,
"fcm_dpo/delta": 0.013019606471061707,
"fcm_dpo/margin": 1.654120922088623,
"fcm_dpo/q_t": 0.326251745223999,
"grad_norm": 141.9059295654297,
"learning_rate": 6.738782355044048e-09,
"logits/chosen": 0.1482279747724533,
"logits/rejected": 0.07427319884300232,
"logps/chosen": -59.329376220703125,
"logps/ref_chosen": -56.69594192504883,
"logps/ref_rejected": -85.92362976074219,
"logps/rejected": -90.21118927001953,
"loss": 0.9343,
"margin_dpo/margin_mean": 1.6541210412979126,
"margin_dpo/margin_std": 2.201620578765869,
"step": 618
},
{
"epoch": 0.9357520786092215,
"fcm_dpo/beta": 0.6088467836380005,
"fcm_dpo/delta": 0.07530087977647781,
"fcm_dpo/margin": 1.53031587600708,
"fcm_dpo/q_t": 0.3448328375816345,
"grad_norm": 150.22421264648438,
"learning_rate": 6.437261330158206e-09,
"logits/chosen": 0.22948047518730164,
"logits/rejected": 0.18049129843711853,
"logps/chosen": -56.79576873779297,
"logps/ref_chosen": -54.05841827392578,
"logps/ref_rejected": -83.55493927001953,
"logps/rejected": -87.82260131835938,
"loss": 1.0339,
"margin_dpo/margin_mean": 1.53031587600708,
"margin_dpo/margin_std": 2.370837450027466,
"step": 619
},
{
"epoch": 0.9372637944066515,
"fcm_dpo/beta": 0.6322528123855591,
"fcm_dpo/delta": 0.09385835379362106,
"fcm_dpo/margin": 1.4362270832061768,
"fcm_dpo/q_t": 0.3465351164340973,
"grad_norm": 173.79412841796875,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 0.17980945110321045,
"logits/rejected": 0.16840043663978577,
"logps/chosen": -66.160888671875,
"logps/ref_chosen": -63.36971664428711,
"logps/ref_rejected": -65.68269348144531,
"logps/rejected": -69.91009521484375,
"loss": 1.0082,
"margin_dpo/margin_mean": 1.4362270832061768,
"margin_dpo/margin_std": 2.130248546600342,
"step": 620
},
{
"epoch": 0.9387755102040817,
"fcm_dpo/beta": 0.6423999071121216,
"fcm_dpo/delta": 0.0907469242811203,
"fcm_dpo/margin": 1.4205418825149536,
"fcm_dpo/q_t": 0.35109221935272217,
"grad_norm": 151.4163055419922,
"learning_rate": 5.854666444131934e-09,
"logits/chosen": 0.19905002415180206,
"logits/rejected": 0.12930849194526672,
"logps/chosen": -55.218666076660156,
"logps/ref_chosen": -52.321224212646484,
"logps/ref_rejected": -88.09001159667969,
"logps/rejected": -92.40798950195312,
"loss": 1.0421,
"margin_dpo/margin_mean": 1.420541524887085,
"margin_dpo/margin_std": 2.2282516956329346,
"step": 621
},
{
"epoch": 0.9402872260015117,
"fcm_dpo/beta": 0.640907347202301,
"fcm_dpo/delta": 0.03911030665040016,
"fcm_dpo/margin": 1.5050252676010132,
"fcm_dpo/q_t": 0.3320958614349365,
"grad_norm": 137.0905303955078,
"learning_rate": 5.573608879422875e-09,
"logits/chosen": 0.13063614070415497,
"logits/rejected": 0.1020088791847229,
"logps/chosen": -62.50457763671875,
"logps/ref_chosen": -59.86545944213867,
"logps/ref_rejected": -81.86668395996094,
"logps/rejected": -86.01081848144531,
"loss": 0.9325,
"margin_dpo/margin_mean": 1.5050253868103027,
"margin_dpo/margin_std": 2.0284576416015625,
"step": 622
},
{
"epoch": 0.9417989417989417,
"fcm_dpo/beta": 0.6268334984779358,
"fcm_dpo/delta": -0.16526103019714355,
"fcm_dpo/margin": 1.8289923667907715,
"fcm_dpo/q_t": 0.30245280265808105,
"grad_norm": 126.97518920898438,
"learning_rate": 5.299388446305342e-09,
"logits/chosen": 0.16525626182556152,
"logits/rejected": 0.11132755130529404,
"logps/chosen": -70.49075317382812,
"logps/ref_chosen": -67.36846160888672,
"logps/ref_rejected": -82.02733612060547,
"logps/rejected": -86.97863006591797,
"loss": 0.8618,
"margin_dpo/margin_mean": 1.828992247581482,
"margin_dpo/margin_std": 2.1596312522888184,
"step": 623
},
{
"epoch": 0.9433106575963719,
"fcm_dpo/beta": 0.6166424751281738,
"fcm_dpo/delta": -0.002019442617893219,
"fcm_dpo/margin": 1.6235092878341675,
"fcm_dpo/q_t": 0.33338773250579834,
"grad_norm": 134.533447265625,
"learning_rate": 5.03201281531429e-09,
"logits/chosen": 0.17911109328269958,
"logits/rejected": 0.10127080231904984,
"logps/chosen": -53.71633529663086,
"logps/ref_chosen": -51.02655029296875,
"logps/ref_rejected": -76.49203491210938,
"logps/rejected": -80.80532836914062,
"loss": 0.9784,
"margin_dpo/margin_mean": 1.623509407043457,
"margin_dpo/margin_std": 2.3413143157958984,
"step": 624
},
{
"epoch": 0.9448223733938019,
"fcm_dpo/beta": 0.6369531154632568,
"fcm_dpo/delta": 0.11569374054670334,
"fcm_dpo/margin": 1.400294542312622,
"fcm_dpo/q_t": 0.3541494905948639,
"grad_norm": 148.3859100341797,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": 0.20979532599449158,
"logits/rejected": 0.1446528434753418,
"logps/chosen": -57.27146530151367,
"logps/ref_chosen": -54.20761489868164,
"logps/ref_rejected": -84.93669128417969,
"logps/rejected": -89.40084075927734,
"loss": 1.0516,
"margin_dpo/margin_mean": 1.4002941846847534,
"margin_dpo/margin_std": 2.27569317817688,
"step": 625
},
{
"epoch": 0.9463340891912321,
"fcm_dpo/beta": 0.6106563806533813,
"fcm_dpo/delta": -0.2672712206840515,
"fcm_dpo/margin": 2.0213675498962402,
"fcm_dpo/q_t": 0.31362709403038025,
"grad_norm": 127.49701690673828,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": 0.24240854382514954,
"logits/rejected": 0.1507807821035385,
"logps/chosen": -47.64917755126953,
"logps/ref_chosen": -45.06201934814453,
"logps/ref_rejected": -89.66368103027344,
"logps/rejected": -94.27220153808594,
"loss": 0.9523,
"margin_dpo/margin_mean": 2.0213675498962402,
"margin_dpo/margin_std": 2.8801679611206055,
"step": 626
},
{
"epoch": 0.9478458049886621,
"fcm_dpo/beta": 0.5770605802536011,
"fcm_dpo/delta": -0.23875784873962402,
"fcm_dpo/margin": 2.0926780700683594,
"fcm_dpo/q_t": 0.3067499101161957,
"grad_norm": 135.4554901123047,
"learning_rate": 4.271028567242818e-09,
"logits/chosen": 0.12116050720214844,
"logits/rejected": 0.04095185548067093,
"logps/chosen": -61.70726776123047,
"logps/ref_chosen": -58.791053771972656,
"logps/ref_rejected": -94.90802001953125,
"logps/rejected": -99.91691589355469,
"loss": 0.888,
"margin_dpo/margin_mean": 2.0926778316497803,
"margin_dpo/margin_std": 2.661421298980713,
"step": 627
},
{
"epoch": 0.9493575207860923,
"fcm_dpo/beta": 0.5617523193359375,
"fcm_dpo/delta": -0.23493322730064392,
"fcm_dpo/margin": 2.1469919681549072,
"fcm_dpo/q_t": 0.29745668172836304,
"grad_norm": 120.84452056884766,
"learning_rate": 4.0311050177251895e-09,
"logits/chosen": 0.19451142847537994,
"logits/rejected": 0.16487011313438416,
"logps/chosen": -55.42369842529297,
"logps/ref_chosen": -52.80357360839844,
"logps/ref_rejected": -76.49468994140625,
"logps/rejected": -81.26181030273438,
"loss": 0.9097,
"margin_dpo/margin_mean": 2.146991729736328,
"margin_dpo/margin_std": 2.7369041442871094,
"step": 628
},
{
"epoch": 0.9508692365835223,
"fcm_dpo/beta": 0.5566290616989136,
"fcm_dpo/delta": 0.07311487942934036,
"fcm_dpo/margin": 1.6754742860794067,
"fcm_dpo/q_t": 0.3302459716796875,
"grad_norm": 120.0956802368164,
"learning_rate": 3.798061746947995e-09,
"logits/chosen": 0.17901673913002014,
"logits/rejected": 0.17002899944782257,
"logps/chosen": -73.4151382446289,
"logps/ref_chosen": -70.71749877929688,
"logps/ref_rejected": -78.96273803710938,
"logps/rejected": -83.33586120605469,
"loss": 0.8944,
"margin_dpo/margin_mean": 1.6754742860794067,
"margin_dpo/margin_std": 2.052621603012085,
"step": 629
},
{
"epoch": 0.9523809523809523,
"fcm_dpo/beta": 0.5548304915428162,
"fcm_dpo/delta": -0.06957443058490753,
"fcm_dpo/margin": 1.912638783454895,
"fcm_dpo/q_t": 0.32286617159843445,
"grad_norm": 108.01695251464844,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 0.1528751254081726,
"logits/rejected": 0.11582262814044952,
"logps/chosen": -58.81450653076172,
"logps/ref_chosen": -56.201412200927734,
"logps/ref_rejected": -74.69807434082031,
"logps/rejected": -79.22380065917969,
"loss": 0.891,
"margin_dpo/margin_mean": 1.9126391410827637,
"margin_dpo/margin_std": 2.4235522747039795,
"step": 630
},
{
"epoch": 0.9538926681783825,
"fcm_dpo/beta": 0.5212994813919067,
"fcm_dpo/delta": -0.20469579100608826,
"fcm_dpo/margin": 2.244609832763672,
"fcm_dpo/q_t": 0.2998984754085541,
"grad_norm": 126.16265869140625,
"learning_rate": 3.352641923861144e-09,
"logits/chosen": 0.24326127767562866,
"logits/rejected": 0.16665881872177124,
"logps/chosen": -61.588680267333984,
"logps/ref_chosen": -58.82059860229492,
"logps/ref_rejected": -96.51437377929688,
"logps/rejected": -101.5270767211914,
"loss": 0.8734,
"margin_dpo/margin_mean": 2.244609832763672,
"margin_dpo/margin_std": 2.739107370376587,
"step": 631
},
{
"epoch": 0.9554043839758125,
"fcm_dpo/beta": 0.5139273405075073,
"fcm_dpo/delta": -0.14226512610912323,
"fcm_dpo/margin": 2.19085693359375,
"fcm_dpo/q_t": 0.28850066661834717,
"grad_norm": 94.82600402832031,
"learning_rate": 3.140277830901428e-09,
"logits/chosen": 0.21174263954162598,
"logits/rejected": 0.18780021369457245,
"logps/chosen": -61.52200698852539,
"logps/ref_chosen": -58.786048889160156,
"logps/ref_rejected": -67.21923828125,
"logps/rejected": -72.14605712890625,
"loss": 0.8071,
"margin_dpo/margin_mean": 2.19085693359375,
"margin_dpo/margin_std": 2.2715227603912354,
"step": 632
},
{
"epoch": 0.9569160997732427,
"fcm_dpo/beta": 0.5195468664169312,
"fcm_dpo/delta": 0.10754405707120895,
"fcm_dpo/margin": 1.7362931966781616,
"fcm_dpo/q_t": 0.3419226408004761,
"grad_norm": 99.7951431274414,
"learning_rate": 2.9348189350335007e-09,
"logits/chosen": 0.1292242705821991,
"logits/rejected": 0.08500467240810394,
"logps/chosen": -54.51697540283203,
"logps/ref_chosen": -52.13019561767578,
"logps/ref_rejected": -67.23016357421875,
"logps/rejected": -71.35323333740234,
"loss": 0.971,
"margin_dpo/margin_mean": 1.7362935543060303,
"margin_dpo/margin_std": 2.4587457180023193,
"step": 633
},
{
"epoch": 0.9584278155706727,
"fcm_dpo/beta": 0.5588383674621582,
"fcm_dpo/delta": 0.42835086584091187,
"fcm_dpo/margin": 1.0649152994155884,
"fcm_dpo/q_t": 0.4058130383491516,
"grad_norm": 154.30062866210938,
"learning_rate": 2.736270983384276e-09,
"logits/chosen": 0.22938773036003113,
"logits/rejected": 0.22998389601707458,
"logps/chosen": -63.95368194580078,
"logps/ref_chosen": -60.97979736328125,
"logps/ref_rejected": -58.50825119018555,
"logps/rejected": -62.54705047607422,
"loss": 1.2187,
"margin_dpo/margin_mean": 1.0649151802062988,
"margin_dpo/margin_std": 2.3998215198516846,
"step": 634
},
{
"epoch": 0.9599395313681028,
"fcm_dpo/beta": 0.5766314268112183,
"fcm_dpo/delta": 0.07918489724397659,
"fcm_dpo/margin": 1.6092689037322998,
"fcm_dpo/q_t": 0.3603755235671997,
"grad_norm": 152.15786743164062,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": 0.10523584485054016,
"logits/rejected": 0.06159904971718788,
"logps/chosen": -69.22645568847656,
"logps/ref_chosen": -65.9730224609375,
"logps/ref_rejected": -85.61317443847656,
"logps/rejected": -90.47587585449219,
"loss": 1.0967,
"margin_dpo/margin_mean": 1.6092685461044312,
"margin_dpo/margin_std": 2.7506871223449707,
"step": 635
},
{
"epoch": 0.9614512471655329,
"fcm_dpo/beta": 0.5654884576797485,
"fcm_dpo/delta": -0.11426550894975662,
"fcm_dpo/margin": 1.9465041160583496,
"fcm_dpo/q_t": 0.2917436957359314,
"grad_norm": 109.79877471923828,
"learning_rate": 2.359929934524829e-09,
"logits/chosen": 0.1432669758796692,
"logits/rejected": 0.07593058794736862,
"logps/chosen": -51.8205680847168,
"logps/ref_chosen": -49.140167236328125,
"logps/ref_rejected": -81.26971435546875,
"logps/rejected": -85.89661407470703,
"loss": 0.7784,
"margin_dpo/margin_mean": 1.9465045928955078,
"margin_dpo/margin_std": 1.9250316619873047,
"step": 636
},
{
"epoch": 0.9629629629629629,
"fcm_dpo/beta": 0.5774806141853333,
"fcm_dpo/delta": 0.09339653700590134,
"fcm_dpo/margin": 1.5811973810195923,
"fcm_dpo/q_t": 0.3516439199447632,
"grad_norm": 156.06459045410156,
"learning_rate": 2.1821473643827137e-09,
"logits/chosen": 0.14332839846611023,
"logits/rejected": 0.08663806319236755,
"logps/chosen": -76.97013854980469,
"logps/ref_chosen": -73.69658660888672,
"logps/ref_rejected": -83.01487731933594,
"logps/rejected": -87.86962890625,
"loss": 1.0302,
"margin_dpo/margin_mean": 1.5811975002288818,
"margin_dpo/margin_std": 2.551753044128418,
"step": 637
},
{
"epoch": 0.9644746787603931,
"fcm_dpo/beta": 0.5492261648178101,
"fcm_dpo/delta": -0.27356475591659546,
"fcm_dpo/margin": 2.252800941467285,
"fcm_dpo/q_t": 0.28849977254867554,
"grad_norm": 118.49738311767578,
"learning_rate": 2.0112967923011646e-09,
"logits/chosen": 0.1485036015510559,
"logits/rejected": 0.10658858716487885,
"logps/chosen": -65.84213256835938,
"logps/ref_chosen": -62.78158187866211,
"logps/ref_rejected": -85.40478515625,
"logps/rejected": -90.71813201904297,
"loss": 0.8175,
"margin_dpo/margin_mean": 2.252800464630127,
"margin_dpo/margin_std": 2.519404888153076,
"step": 638
},
{
"epoch": 0.9659863945578231,
"fcm_dpo/beta": 0.5499449372291565,
"fcm_dpo/delta": 0.053651005029678345,
"fcm_dpo/margin": 1.7299044132232666,
"fcm_dpo/q_t": 0.33649200201034546,
"grad_norm": 118.28497314453125,
"learning_rate": 1.847382997337943e-09,
"logits/chosen": 0.14743350446224213,
"logits/rejected": 0.052376627922058105,
"logps/chosen": -56.702606201171875,
"logps/ref_chosen": -53.76658630371094,
"logps/ref_rejected": -72.30009460449219,
"logps/rejected": -76.96601867675781,
"loss": 0.9522,
"margin_dpo/margin_mean": 1.7299044132232666,
"margin_dpo/margin_std": 2.438140392303467,
"step": 639
},
{
"epoch": 0.9674981103552532,
"fcm_dpo/beta": 0.5619925260543823,
"fcm_dpo/delta": 0.15041983127593994,
"fcm_dpo/margin": 1.5351797342300415,
"fcm_dpo/q_t": 0.3423158526420593,
"grad_norm": 126.87297821044922,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 0.23225465416908264,
"logits/rejected": 0.19046524167060852,
"logps/chosen": -54.199310302734375,
"logps/ref_chosen": -51.41777801513672,
"logps/ref_rejected": -77.27879333496094,
"logps/rejected": -81.59550476074219,
"loss": 1.0331,
"margin_dpo/margin_mean": 1.5351800918579102,
"margin_dpo/margin_std": 2.3253612518310547,
"step": 640
},
{
"epoch": 0.9690098261526833,
"fcm_dpo/beta": 0.5881354212760925,
"fcm_dpo/delta": 0.17225059866905212,
"fcm_dpo/margin": 1.4259064197540283,
"fcm_dpo/q_t": 0.34505194425582886,
"grad_norm": 165.1323699951172,
"learning_rate": 1.5403838846864692e-09,
"logits/chosen": 0.13332295417785645,
"logits/rejected": 0.10575246065855026,
"logps/chosen": -74.10830688476562,
"logps/ref_chosen": -71.0546646118164,
"logps/ref_rejected": -82.2440185546875,
"logps/rejected": -86.72355651855469,
"loss": 1.0339,
"margin_dpo/margin_mean": 1.4259059429168701,
"margin_dpo/margin_std": 2.282135486602783,
"step": 641
},
{
"epoch": 0.9705215419501134,
"fcm_dpo/beta": 0.5971714854240417,
"fcm_dpo/delta": 0.17347897589206696,
"fcm_dpo/margin": 1.4081449508666992,
"fcm_dpo/q_t": 0.3561500310897827,
"grad_norm": 159.68527221679688,
"learning_rate": 1.3973071544233218e-09,
"logits/chosen": 0.12671303749084473,
"logits/rejected": 0.12140335142612457,
"logps/chosen": -72.1884994506836,
"logps/ref_chosen": -68.92927551269531,
"logps/ref_rejected": -70.85682678222656,
"logps/rejected": -75.52420043945312,
"loss": 1.078,
"margin_dpo/margin_mean": 1.4081450700759888,
"margin_dpo/margin_std": 2.3124876022338867,
"step": 642
},
{
"epoch": 0.9720332577475435,
"fcm_dpo/beta": 0.6271607875823975,
"fcm_dpo/delta": 0.3203769028186798,
"fcm_dpo/margin": 1.1184437274932861,
"fcm_dpo/q_t": 0.34422484040260315,
"grad_norm": 2246.781982421875,
"learning_rate": 1.261184375888541e-09,
"logits/chosen": 0.1141979843378067,
"logits/rejected": 0.05281982570886612,
"logps/chosen": -68.73892974853516,
"logps/ref_chosen": -65.30903625488281,
"logps/ref_rejected": -83.61613464355469,
"logps/rejected": -88.16447448730469,
"loss": 1.4846,
"margin_dpo/margin_mean": 1.1184438467025757,
"margin_dpo/margin_std": 3.4751362800598145,
"step": 643
},
{
"epoch": 0.9735449735449735,
"fcm_dpo/beta": 0.6507506966590881,
"fcm_dpo/delta": 0.037187736481428146,
"fcm_dpo/margin": 1.4850530624389648,
"fcm_dpo/q_t": 0.3464422821998596,
"grad_norm": 163.06446838378906,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": 0.2439633458852768,
"logits/rejected": 0.21551108360290527,
"logps/chosen": -53.948020935058594,
"logps/ref_chosen": -51.002601623535156,
"logps/ref_rejected": -64.46372985839844,
"logps/rejected": -68.89420318603516,
"loss": 1.0968,
"margin_dpo/margin_mean": 1.4850530624389648,
"margin_dpo/margin_std": 2.556273937225342,
"step": 644
},
{
"epoch": 0.9750566893424036,
"fcm_dpo/beta": 0.6438242197036743,
"fcm_dpo/delta": -0.08629032969474792,
"fcm_dpo/margin": 1.6724817752838135,
"fcm_dpo/q_t": 0.31522613763809204,
"grad_norm": 150.0731964111328,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": 0.0987434908747673,
"logits/rejected": 0.07496701180934906,
"logps/chosen": -64.18575286865234,
"logps/ref_chosen": -60.963409423828125,
"logps/ref_rejected": -69.73353576660156,
"logps/rejected": -74.62835693359375,
"loss": 0.8818,
"margin_dpo/margin_mean": 1.6724815368652344,
"margin_dpo/margin_std": 2.17503023147583,
"step": 645
},
{
"epoch": 0.9765684051398337,
"fcm_dpo/beta": 0.6224489212036133,
"fcm_dpo/delta": -0.2178870290517807,
"fcm_dpo/margin": 1.9151108264923096,
"fcm_dpo/q_t": 0.30455827713012695,
"grad_norm": 125.87390899658203,
"learning_rate": 8.945768539031783e-10,
"logits/chosen": 0.2028486728668213,
"logits/rejected": 0.16405922174453735,
"logps/chosen": -65.69281005859375,
"logps/ref_chosen": -62.290069580078125,
"logps/ref_rejected": -85.54812622070312,
"logps/rejected": -90.86598205566406,
"loss": 0.9126,
"margin_dpo/margin_mean": 1.9151103496551514,
"margin_dpo/margin_std": 2.5356059074401855,
"step": 646
},
{
"epoch": 0.9780801209372638,
"fcm_dpo/beta": 0.5961904525756836,
"fcm_dpo/delta": -0.26335006952285767,
"fcm_dpo/margin": 2.065751552581787,
"fcm_dpo/q_t": 0.30408942699432373,
"grad_norm": 186.32350158691406,
"learning_rate": 7.863060120144316e-10,
"logits/chosen": 0.20603081583976746,
"logits/rejected": 0.13498756289482117,
"logps/chosen": -70.44767761230469,
"logps/ref_chosen": -67.515869140625,
"logps/ref_rejected": -101.50871276855469,
"logps/rejected": -106.50627136230469,
"loss": 0.9259,
"margin_dpo/margin_mean": 2.065751075744629,
"margin_dpo/margin_std": 2.7231507301330566,
"step": 647
},
{
"epoch": 0.9795918367346939,
"fcm_dpo/beta": 0.5804147720336914,
"fcm_dpo/delta": 0.019788160920143127,
"fcm_dpo/margin": 1.6904387474060059,
"fcm_dpo/q_t": 0.33119359612464905,
"grad_norm": 137.32240295410156,
"learning_rate": 6.850062128694045e-10,
"logits/chosen": 0.13131186366081238,
"logits/rejected": 0.0793139785528183,
"logps/chosen": -67.7803726196289,
"logps/ref_chosen": -64.59593963623047,
"logps/ref_rejected": -83.384033203125,
"logps/rejected": -88.25890350341797,
"loss": 0.9714,
"margin_dpo/margin_mean": 1.6904385089874268,
"margin_dpo/margin_std": 2.3690028190612793,
"step": 648
},
{
"epoch": 0.981103552532124,
"fcm_dpo/beta": 0.5746303796768188,
"fcm_dpo/delta": -0.07765467464923859,
"fcm_dpo/margin": 1.8583083152770996,
"fcm_dpo/q_t": 0.3117806911468506,
"grad_norm": 126.6104736328125,
"learning_rate": 5.906802900412788e-10,
"logits/chosen": 0.17333395779132843,
"logits/rejected": 0.1271257847547531,
"logps/chosen": -52.0538330078125,
"logps/ref_chosen": -49.30964660644531,
"logps/ref_rejected": -73.73710632324219,
"logps/rejected": -78.339599609375,
"loss": 0.931,
"margin_dpo/margin_mean": 1.8583080768585205,
"margin_dpo/margin_std": 2.380646228790283,
"step": 649
},
{
"epoch": 0.982615268329554,
"fcm_dpo/beta": 0.5875668525695801,
"fcm_dpo/delta": 0.08222602307796478,
"fcm_dpo/margin": 1.5724918842315674,
"fcm_dpo/q_t": 0.3484124541282654,
"grad_norm": 169.66038513183594,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 0.2582772374153137,
"logits/rejected": 0.21315817534923553,
"logps/chosen": -57.925880432128906,
"logps/ref_chosen": -55.06325912475586,
"logps/ref_rejected": -77.39610290527344,
"logps/rejected": -81.83120727539062,
"loss": 1.0498,
"margin_dpo/margin_mean": 1.5724914073944092,
"margin_dpo/margin_std": 2.477647542953491,
"step": 650
},
{
"epoch": 0.9841269841269841,
"fcm_dpo/beta": 0.600640058517456,
"fcm_dpo/delta": 0.055977076292037964,
"fcm_dpo/margin": 1.5707225799560547,
"fcm_dpo/q_t": 0.3421096205711365,
"grad_norm": 148.89381408691406,
"learning_rate": 4.2296043218295606e-10,
"logits/chosen": 0.24641644954681396,
"logits/rejected": 0.1934581995010376,
"logps/chosen": -56.82018280029297,
"logps/ref_chosen": -54.065162658691406,
"logps/ref_rejected": -77.79080200195312,
"logps/rejected": -82.1165542602539,
"loss": 0.9553,
"margin_dpo/margin_mean": 1.5707224607467651,
"margin_dpo/margin_std": 2.1566340923309326,
"step": 651
},
{
"epoch": 0.9856386999244142,
"fcm_dpo/beta": 0.5921408534049988,
"fcm_dpo/delta": 0.02783304452896118,
"fcm_dpo/margin": 1.6457834243774414,
"fcm_dpo/q_t": 0.3308331370353699,
"grad_norm": 141.38235473632812,
"learning_rate": 3.4957118863768176e-10,
"logits/chosen": 0.20549950003623962,
"logits/rejected": 0.15806761384010315,
"logps/chosen": -66.84440612792969,
"logps/ref_chosen": -63.64030456542969,
"logps/ref_rejected": -78.86882019042969,
"logps/rejected": -83.71870422363281,
"loss": 0.9701,
"margin_dpo/margin_mean": 1.6457829475402832,
"margin_dpo/margin_std": 2.2666330337524414,
"step": 652
},
{
"epoch": 0.9871504157218443,
"fcm_dpo/beta": 0.6015132665634155,
"fcm_dpo/delta": 0.07994754612445831,
"fcm_dpo/margin": 1.541993260383606,
"fcm_dpo/q_t": 0.352220356464386,
"grad_norm": 151.9169464111328,
"learning_rate": 2.831652042480093e-10,
"logits/chosen": 0.18092171847820282,
"logits/rejected": 0.13986843824386597,
"logps/chosen": -64.58534240722656,
"logps/ref_chosen": -61.668373107910156,
"logps/ref_rejected": -73.83012390136719,
"logps/rejected": -78.28909301757812,
"loss": 1.0303,
"margin_dpo/margin_mean": 1.5419931411743164,
"margin_dpo/margin_std": 2.441131591796875,
"step": 653
},
{
"epoch": 0.9886621315192744,
"fcm_dpo/beta": 0.5887470245361328,
"fcm_dpo/delta": -0.13443875312805176,
"fcm_dpo/margin": 1.8974157571792603,
"fcm_dpo/q_t": 0.31610941886901855,
"grad_norm": 141.95655822753906,
"learning_rate": 2.2374433653205016e-10,
"logits/chosen": 0.170884370803833,
"logits/rejected": 0.10386738926172256,
"logps/chosen": -60.361976623535156,
"logps/ref_chosen": -57.568267822265625,
"logps/ref_rejected": -87.74789428710938,
"logps/rejected": -92.43901824951172,
"loss": 0.905,
"margin_dpo/margin_mean": 1.8974157571792603,
"margin_dpo/margin_std": 2.5552029609680176,
"step": 654
},
{
"epoch": 0.9901738473167044,
"fcm_dpo/beta": 0.5729016661643982,
"fcm_dpo/delta": -0.060607388615608215,
"fcm_dpo/margin": 1.8254590034484863,
"fcm_dpo/q_t": 0.309136301279068,
"grad_norm": 104.31941223144531,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": 0.1574726104736328,
"logits/rejected": 0.0878022313117981,
"logps/chosen": -54.848045349121094,
"logps/ref_chosen": -52.14714813232422,
"logps/ref_rejected": -80.85014343261719,
"logps/rejected": -85.37649536132812,
"loss": 0.8471,
"margin_dpo/margin_mean": 1.8254594802856445,
"margin_dpo/margin_std": 2.0447256565093994,
"step": 655
},
{
"epoch": 0.9916855631141346,
"fcm_dpo/beta": 0.555420458316803,
"fcm_dpo/delta": -0.24778330326080322,
"fcm_dpo/margin": 2.184002161026001,
"fcm_dpo/q_t": 0.29599303007125854,
"grad_norm": 112.63178253173828,
"learning_rate": 1.2586440420372934e-10,
"logits/chosen": 0.16302891075611115,
"logits/rejected": 0.12122651189565659,
"logps/chosen": -76.25328063964844,
"logps/ref_chosen": -73.25672912597656,
"logps/ref_rejected": -85.35127258300781,
"logps/rejected": -90.53182220458984,
"loss": 0.82,
"margin_dpo/margin_mean": 2.1840016841888428,
"margin_dpo/margin_std": 2.538489818572998,
"step": 656
},
{
"epoch": 0.9931972789115646,
"fcm_dpo/beta": 0.5423201322555542,
"fcm_dpo/delta": -0.10848333686590195,
"fcm_dpo/margin": 2.0198726654052734,
"fcm_dpo/q_t": 0.3146105408668518,
"grad_norm": 127.41387176513672,
"learning_rate": 8.740807750345913e-11,
"logits/chosen": 0.26283034682273865,
"logits/rejected": 0.19647637009620667,
"logps/chosen": -52.75443649291992,
"logps/ref_chosen": -49.72339630126953,
"logps/ref_rejected": -75.1568603515625,
"logps/rejected": -80.20777893066406,
"loss": 0.9165,
"margin_dpo/margin_mean": 2.0198724269866943,
"margin_dpo/margin_std": 2.636084794998169,
"step": 657
},
{
"epoch": 0.9947089947089947,
"fcm_dpo/beta": 0.556152880191803,
"fcm_dpo/delta": 0.08222407847642899,
"fcm_dpo/margin": 1.6563022136688232,
"fcm_dpo/q_t": 0.35666483640670776,
"grad_norm": 134.63629150390625,
"learning_rate": 5.594234322453539e-11,
"logits/chosen": 0.1849232316017151,
"logits/rejected": 0.15448370575904846,
"logps/chosen": -66.14913940429688,
"logps/ref_chosen": -63.04634094238281,
"logps/ref_rejected": -83.44963073730469,
"logps/rejected": -88.20873260498047,
"loss": 1.0747,
"margin_dpo/margin_mean": 1.656302809715271,
"margin_dpo/margin_std": 2.766972541809082,
"step": 658
},
{
"epoch": 0.9962207105064248,
"fcm_dpo/beta": 0.5667056441307068,
"fcm_dpo/delta": 0.22809654474258423,
"fcm_dpo/margin": 1.3948873281478882,
"fcm_dpo/q_t": 0.36462146043777466,
"grad_norm": 134.068603515625,
"learning_rate": 3.146808153123293e-11,
"logits/chosen": 0.2203397899866104,
"logits/rejected": 0.16544032096862793,
"logps/chosen": -58.36835479736328,
"logps/ref_chosen": -55.0802001953125,
"logps/ref_rejected": -71.91049194335938,
"logps/rejected": -76.59353637695312,
"loss": 1.1224,
"margin_dpo/margin_mean": 1.3948872089385986,
"margin_dpo/margin_std": 2.498440742492676,
"step": 659
},
{
"epoch": 0.9977324263038548,
"fcm_dpo/beta": 0.5684385895729065,
"fcm_dpo/delta": -0.10104553401470184,
"fcm_dpo/margin": 1.9175570011138916,
"fcm_dpo/q_t": 0.3092523217201233,
"grad_norm": 133.181396484375,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 0.2678339183330536,
"logits/rejected": 0.2140858918428421,
"logps/chosen": -57.32783889770508,
"logps/ref_chosen": -54.525917053222656,
"logps/ref_rejected": -81.23604583740234,
"logps/rejected": -85.95552825927734,
"loss": 0.8659,
"margin_dpo/margin_mean": 1.9175567626953125,
"margin_dpo/margin_std": 2.366135597229004,
"step": 660
},
{
"epoch": 0.999244142101285,
"fcm_dpo/beta": 0.5669894218444824,
"fcm_dpo/delta": 0.021186619997024536,
"fcm_dpo/margin": 1.7300082445144653,
"fcm_dpo/q_t": 0.33315783739089966,
"grad_norm": 129.08389282226562,
"learning_rate": 3.4965187065971735e-12,
"logits/chosen": 0.15239998698234558,
"logits/rejected": 0.0887516513466835,
"logps/chosen": -63.79823684692383,
"logps/ref_chosen": -60.37263870239258,
"logps/ref_rejected": -77.42874145507812,
"logps/rejected": -82.5843505859375,
"loss": 1.0387,
"margin_dpo/margin_mean": 1.730008840560913,
"margin_dpo/margin_std": 2.6242144107818604,
"step": 661
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.1173522615757363,
"train_runtime": 1752.852,
"train_samples_per_second": 24.153,
"train_steps_per_second": 0.377
}
],
"logging_steps": 1,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}