Files
llama3-8b-base-new-method-s…/trainer_state.json
ModelHub XC 7bc7b67926 初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama3-8b-base-new-method-s_star0.6-20260426-230653
Source: Original Platform
2026-05-05 23:45:11 +08:00

9141 lines
335 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989528795811519,
"eval_steps": 200,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0020942408376963353,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.02793481945991516,
"fcm_dpo/q_t": 0.500069797039032,
"grad_norm": 28.58863639831543,
"learning_rate": 0.0,
"logits/chosen": -0.5898098945617676,
"logits/rejected": -0.604260265827179,
"logps/chosen": -275.28570556640625,
"logps/ref_chosen": -275.2312927246094,
"logps/ref_rejected": -222.9380340576172,
"logps/rejected": -222.96453857421875,
"loss": 5.5463,
"margin_dpo/margin_mean": -0.02793477475643158,
"margin_dpo/margin_std": 0.5724214911460876,
"step": 1
},
{
"epoch": 0.004188481675392671,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.014312177896499634,
"fcm_dpo/q_t": 0.4999642074108124,
"grad_norm": 27.88129234313965,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -0.6574729681015015,
"logits/rejected": -0.6464410424232483,
"logps/chosen": -264.7165222167969,
"logps/ref_chosen": -264.7611083984375,
"logps/ref_rejected": -242.5597686767578,
"logps/rejected": -242.52951049804688,
"loss": 5.5446,
"margin_dpo/margin_mean": 0.014312252402305603,
"margin_dpo/margin_std": 0.6423971652984619,
"step": 2
},
{
"epoch": 0.0062827225130890054,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.0522288978099823,
"fcm_dpo/q_t": 0.4998694360256195,
"grad_norm": 25.787551879882812,
"learning_rate": 2.083333333333333e-08,
"logits/chosen": -0.6840452551841736,
"logits/rejected": -0.7351922392845154,
"logps/chosen": -274.11102294921875,
"logps/ref_chosen": -274.1018981933594,
"logps/ref_rejected": -286.5882568359375,
"logps/rejected": -286.6496276855469,
"loss": 5.5431,
"margin_dpo/margin_mean": 0.05222900211811066,
"margin_dpo/margin_std": 0.6702825427055359,
"step": 3
},
{
"epoch": 0.008376963350785341,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.01975475251674652,
"fcm_dpo/q_t": 0.4999505877494812,
"grad_norm": 31.67710304260254,
"learning_rate": 3.125e-08,
"logits/chosen": -0.620403528213501,
"logits/rejected": -0.6149281859397888,
"logps/chosen": -329.92706298828125,
"logps/ref_chosen": -329.8382568359375,
"logps/ref_rejected": -303.2850646972656,
"logps/rejected": -303.3935852050781,
"loss": 5.5445,
"margin_dpo/margin_mean": 0.019755080342292786,
"margin_dpo/margin_std": 0.7474581003189087,
"step": 4
},
{
"epoch": 0.010471204188481676,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.020737484097480774,
"fcm_dpo/q_t": 0.5000518560409546,
"grad_norm": 29.562240600585938,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": -0.5746553540229797,
"logits/rejected": -0.5913240313529968,
"logps/chosen": -301.7220153808594,
"logps/ref_chosen": -301.7389221191406,
"logps/ref_rejected": -274.7654724121094,
"logps/rejected": -274.7278137207031,
"loss": 5.5461,
"margin_dpo/margin_mean": -0.020738065242767334,
"margin_dpo/margin_std": 0.7413759231567383,
"step": 5
},
{
"epoch": 0.012565445026178011,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.042598187923431396,
"fcm_dpo/q_t": 0.49989351630210876,
"grad_norm": 28.17943572998047,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -0.6895941495895386,
"logits/rejected": -0.6532205939292908,
"logps/chosen": -285.6484069824219,
"logps/ref_chosen": -285.6946716308594,
"logps/ref_rejected": -245.8200225830078,
"logps/rejected": -245.81639099121094,
"loss": 5.5435,
"margin_dpo/margin_mean": 0.042598843574523926,
"margin_dpo/margin_std": 0.6217130422592163,
"step": 6
},
{
"epoch": 0.014659685863874346,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.04013945162296295,
"fcm_dpo/q_t": 0.49989965558052063,
"grad_norm": 28.537418365478516,
"learning_rate": 6.25e-08,
"logits/chosen": -0.5685023069381714,
"logits/rejected": -0.6004266142845154,
"logps/chosen": -264.6379089355469,
"logps/ref_chosen": -264.65545654296875,
"logps/ref_rejected": -253.10305786132812,
"logps/rejected": -253.1256866455078,
"loss": 5.5436,
"margin_dpo/margin_mean": 0.040140300989151,
"margin_dpo/margin_std": 0.6211207509040833,
"step": 7
},
{
"epoch": 0.016753926701570682,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.021448686718940735,
"fcm_dpo/q_t": 0.5000536441802979,
"grad_norm": 30.7622127532959,
"learning_rate": 7.291666666666667e-08,
"logits/chosen": -0.6872634291648865,
"logits/rejected": -0.6957005262374878,
"logps/chosen": -354.21673583984375,
"logps/ref_chosen": -354.1887512207031,
"logps/ref_rejected": -282.9112243652344,
"logps/rejected": -282.91778564453125,
"loss": 5.5461,
"margin_dpo/margin_mean": -0.0214470773935318,
"margin_dpo/margin_std": 0.7891030311584473,
"step": 8
},
{
"epoch": 0.018848167539267015,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.0005417168140411377,
"fcm_dpo/q_t": 0.49999865889549255,
"grad_norm": 27.870540618896484,
"learning_rate": 8.333333333333333e-08,
"logits/chosen": -0.637888491153717,
"logits/rejected": -0.6561119556427002,
"logps/chosen": -285.5398254394531,
"logps/ref_chosen": -285.5502014160156,
"logps/ref_rejected": -267.99664306640625,
"logps/rejected": -267.98681640625,
"loss": 5.5452,
"margin_dpo/margin_mean": 0.0005417615175247192,
"margin_dpo/margin_std": 0.6964117884635925,
"step": 9
},
{
"epoch": 0.020942408376963352,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07241007685661316,
"fcm_dpo/q_t": 0.49981898069381714,
"grad_norm": 26.481124877929688,
"learning_rate": 9.375e-08,
"logits/chosen": -0.6758443117141724,
"logits/rejected": -0.6697291135787964,
"logps/chosen": -251.83383178710938,
"logps/ref_chosen": -251.91238403320312,
"logps/ref_rejected": -226.45260620117188,
"logps/rejected": -226.44647216796875,
"loss": 5.5423,
"margin_dpo/margin_mean": 0.07241000235080719,
"margin_dpo/margin_std": 0.6581631898880005,
"step": 10
},
{
"epoch": 0.023036649214659685,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.04671028256416321,
"fcm_dpo/q_t": 0.499883234500885,
"grad_norm": 28.99079704284668,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -0.5921290516853333,
"logits/rejected": -0.6467868685722351,
"logps/chosen": -300.9755554199219,
"logps/ref_chosen": -301.08343505859375,
"logps/ref_rejected": -259.546630859375,
"logps/rejected": -259.48541259765625,
"loss": 5.5434,
"margin_dpo/margin_mean": 0.04670977592468262,
"margin_dpo/margin_std": 0.6685330867767334,
"step": 11
},
{
"epoch": 0.025130890052356022,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.08367666602134705,
"fcm_dpo/q_t": 0.49979081749916077,
"grad_norm": 30.09294891357422,
"learning_rate": 1.1458333333333332e-07,
"logits/chosen": -0.5769016742706299,
"logits/rejected": -0.536662220954895,
"logps/chosen": -287.5732727050781,
"logps/ref_chosen": -287.548095703125,
"logps/ref_rejected": -277.37945556640625,
"logps/rejected": -277.48828125,
"loss": 5.5419,
"margin_dpo/margin_mean": 0.08367684483528137,
"margin_dpo/margin_std": 0.8274821043014526,
"step": 12
},
{
"epoch": 0.027225130890052355,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07414671778678894,
"fcm_dpo/q_t": 0.4998146593570709,
"grad_norm": 27.353668212890625,
"learning_rate": 1.25e-07,
"logits/chosen": -0.656341016292572,
"logits/rejected": -0.663852870464325,
"logps/chosen": -270.6452331542969,
"logps/ref_chosen": -270.6664123535156,
"logps/ref_rejected": -274.6546936035156,
"logps/rejected": -274.7076721191406,
"loss": 5.5423,
"margin_dpo/margin_mean": 0.07414683699607849,
"margin_dpo/margin_std": 0.7050091028213501,
"step": 13
},
{
"epoch": 0.02931937172774869,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.02401043474674225,
"fcm_dpo/q_t": 0.4999399483203888,
"grad_norm": 28.236251831054688,
"learning_rate": 1.3541666666666666e-07,
"logits/chosen": -0.6250549554824829,
"logits/rejected": -0.6541483402252197,
"logps/chosen": -281.5421142578125,
"logps/ref_chosen": -281.59320068359375,
"logps/ref_rejected": -263.52215576171875,
"logps/rejected": -263.49505615234375,
"loss": 5.5443,
"margin_dpo/margin_mean": 0.024009257555007935,
"margin_dpo/margin_std": 0.7082223296165466,
"step": 14
},
{
"epoch": 0.031413612565445025,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.11133088171482086,
"fcm_dpo/q_t": 0.4997216761112213,
"grad_norm": 30.394655227661133,
"learning_rate": 1.4583333333333335e-07,
"logits/chosen": -0.6390557289123535,
"logits/rejected": -0.6513383388519287,
"logps/chosen": -298.3955383300781,
"logps/ref_chosen": -298.45343017578125,
"logps/ref_rejected": -227.17832946777344,
"logps/rejected": -227.23179626464844,
"loss": 5.5408,
"margin_dpo/margin_mean": 0.11132954061031342,
"margin_dpo/margin_std": 0.7194129228591919,
"step": 15
},
{
"epoch": 0.033507853403141365,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.12089978158473969,
"fcm_dpo/q_t": 0.4996977746486664,
"grad_norm": 30.162830352783203,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -0.6070325970649719,
"logits/rejected": -0.6046398878097534,
"logps/chosen": -293.8941345214844,
"logps/ref_chosen": -293.96661376953125,
"logps/ref_rejected": -250.78443908691406,
"logps/rejected": -250.83285522460938,
"loss": 5.5404,
"margin_dpo/margin_mean": 0.1208992600440979,
"margin_dpo/margin_std": 0.7001491785049438,
"step": 16
},
{
"epoch": 0.0356020942408377,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.03587338328361511,
"fcm_dpo/q_t": 0.4999103248119354,
"grad_norm": 27.698881149291992,
"learning_rate": 1.6666666666666665e-07,
"logits/chosen": -0.5651789307594299,
"logits/rejected": -0.5910645723342896,
"logps/chosen": -262.3535461425781,
"logps/ref_chosen": -262.39398193359375,
"logps/ref_rejected": -248.500244140625,
"logps/rejected": -248.4956512451172,
"loss": 5.5438,
"margin_dpo/margin_mean": 0.03587399423122406,
"margin_dpo/margin_std": 0.6019639372825623,
"step": 17
},
{
"epoch": 0.03769633507853403,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.09658396244049072,
"fcm_dpo/q_t": 0.4997585415840149,
"grad_norm": 29.77211570739746,
"learning_rate": 1.7708333333333334e-07,
"logits/chosen": -0.6159874200820923,
"logits/rejected": -0.622156023979187,
"logps/chosen": -293.6891174316406,
"logps/ref_chosen": -293.709228515625,
"logps/ref_rejected": -274.5875244140625,
"logps/rejected": -274.6639709472656,
"loss": 5.5414,
"margin_dpo/margin_mean": 0.0965852439403534,
"margin_dpo/margin_std": 0.6984401941299438,
"step": 18
},
{
"epoch": 0.039790575916230364,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.08792537450790405,
"fcm_dpo/q_t": 0.49978017807006836,
"grad_norm": 28.1430606842041,
"learning_rate": 1.875e-07,
"logits/chosen": -0.6285202503204346,
"logits/rejected": -0.625032901763916,
"logps/chosen": -280.1614074707031,
"logps/ref_chosen": -280.26568603515625,
"logps/ref_rejected": -259.9742736816406,
"logps/rejected": -259.9579162597656,
"loss": 5.5417,
"margin_dpo/margin_mean": 0.08792558312416077,
"margin_dpo/margin_std": 0.6590030789375305,
"step": 19
},
{
"epoch": 0.041884816753926704,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1195082813501358,
"fcm_dpo/q_t": 0.49970126152038574,
"grad_norm": 29.686382293701172,
"learning_rate": 1.9791666666666664e-07,
"logits/chosen": -0.6156403422355652,
"logits/rejected": -0.6466647386550903,
"logps/chosen": -303.7765808105469,
"logps/ref_chosen": -303.8954162597656,
"logps/ref_rejected": -260.214599609375,
"logps/rejected": -260.21527099609375,
"loss": 5.5404,
"margin_dpo/margin_mean": 0.11950752139091492,
"margin_dpo/margin_std": 0.6185337901115417,
"step": 20
},
{
"epoch": 0.04397905759162304,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.11209306120872498,
"fcm_dpo/q_t": 0.4997197389602661,
"grad_norm": 35.081111907958984,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -0.6411827206611633,
"logits/rejected": -0.6685382723808289,
"logps/chosen": -301.36767578125,
"logps/ref_chosen": -301.5334777832031,
"logps/ref_rejected": -280.28900146484375,
"logps/rejected": -280.2352600097656,
"loss": 5.5408,
"margin_dpo/margin_mean": 0.11209359765052795,
"margin_dpo/margin_std": 0.7848556637763977,
"step": 21
},
{
"epoch": 0.04607329842931937,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.01605142652988434,
"fcm_dpo/q_t": 0.5000401735305786,
"grad_norm": 25.28415870666504,
"learning_rate": 2.1875e-07,
"logits/chosen": -0.666114330291748,
"logits/rejected": -0.668174684047699,
"logps/chosen": -259.9440612792969,
"logps/ref_chosen": -259.9951477050781,
"logps/ref_rejected": -243.0721435546875,
"logps/rejected": -243.00502014160156,
"loss": 5.5459,
"margin_dpo/margin_mean": -0.016050517559051514,
"margin_dpo/margin_std": 0.7094568610191345,
"step": 22
},
{
"epoch": 0.048167539267015703,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.08404606580734253,
"fcm_dpo/q_t": 0.49978986382484436,
"grad_norm": 27.975460052490234,
"learning_rate": 2.2916666666666663e-07,
"logits/chosen": -0.6258850693702698,
"logits/rejected": -0.6604623794555664,
"logps/chosen": -282.1323547363281,
"logps/ref_chosen": -282.1807556152344,
"logps/ref_rejected": -265.0758056640625,
"logps/rejected": -265.1114501953125,
"loss": 5.5419,
"margin_dpo/margin_mean": 0.08404561877250671,
"margin_dpo/margin_std": 0.7268498539924622,
"step": 23
},
{
"epoch": 0.050261780104712044,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2948577404022217,
"fcm_dpo/q_t": 0.49926286935806274,
"grad_norm": 29.78768539428711,
"learning_rate": 2.3958333333333335e-07,
"logits/chosen": -0.6449757218360901,
"logits/rejected": -0.557062029838562,
"logps/chosen": -300.929443359375,
"logps/ref_chosen": -301.17962646484375,
"logps/ref_rejected": -302.12786865234375,
"logps/rejected": -302.1725158691406,
"loss": 5.5335,
"margin_dpo/margin_mean": 0.2948572337627411,
"margin_dpo/margin_std": 0.7735106348991394,
"step": 24
},
{
"epoch": 0.05235602094240838,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.10211683809757233,
"fcm_dpo/q_t": 0.499744713306427,
"grad_norm": 26.4748592376709,
"learning_rate": 2.5e-07,
"logits/chosen": -0.6071560382843018,
"logits/rejected": -0.6157788634300232,
"logps/chosen": -246.59918212890625,
"logps/ref_chosen": -246.74649047851562,
"logps/ref_rejected": -235.55638122558594,
"logps/rejected": -235.51116943359375,
"loss": 5.5412,
"margin_dpo/margin_mean": 0.10211563110351562,
"margin_dpo/margin_std": 0.7187904119491577,
"step": 25
},
{
"epoch": 0.05445026178010471,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1786605715751648,
"fcm_dpo/q_t": 0.4995533227920532,
"grad_norm": 28.6179256439209,
"learning_rate": 2.604166666666667e-07,
"logits/chosen": -0.6561638116836548,
"logits/rejected": -0.6707935333251953,
"logps/chosen": -281.9454345703125,
"logps/ref_chosen": -282.1955871582031,
"logps/ref_rejected": -235.3135528564453,
"logps/rejected": -235.2420654296875,
"loss": 5.5381,
"margin_dpo/margin_mean": 0.17866107821464539,
"margin_dpo/margin_std": 0.7850726842880249,
"step": 26
},
{
"epoch": 0.05654450261780105,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.11002352833747864,
"fcm_dpo/q_t": 0.49972498416900635,
"grad_norm": 27.748746871948242,
"learning_rate": 2.708333333333333e-07,
"logits/chosen": -0.6526930332183838,
"logits/rejected": -0.6720865368843079,
"logps/chosen": -323.60198974609375,
"logps/ref_chosen": -323.8563537597656,
"logps/ref_rejected": -245.968017578125,
"logps/rejected": -245.82369995117188,
"loss": 5.5409,
"margin_dpo/margin_mean": 0.11002381145954132,
"margin_dpo/margin_std": 0.8846166133880615,
"step": 27
},
{
"epoch": 0.05863874345549738,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.15542466938495636,
"fcm_dpo/q_t": 0.4996114671230316,
"grad_norm": 26.329299926757812,
"learning_rate": 2.8125e-07,
"logits/chosen": -0.614606499671936,
"logits/rejected": -0.6235878467559814,
"logps/chosen": -248.0102081298828,
"logps/ref_chosen": -248.24673461914062,
"logps/ref_rejected": -240.0382080078125,
"logps/rejected": -239.9570770263672,
"loss": 5.539,
"margin_dpo/margin_mean": 0.15542495250701904,
"margin_dpo/margin_std": 0.8629389405250549,
"step": 28
},
{
"epoch": 0.060732984293193716,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.20112989842891693,
"fcm_dpo/q_t": 0.4994971752166748,
"grad_norm": 29.66515350341797,
"learning_rate": 2.916666666666667e-07,
"logits/chosen": -0.5941799879074097,
"logits/rejected": -0.6167061924934387,
"logps/chosen": -317.9654541015625,
"logps/ref_chosen": -318.2564392089844,
"logps/ref_rejected": -286.75848388671875,
"logps/rejected": -286.6686706542969,
"loss": 5.5372,
"margin_dpo/margin_mean": 0.20112943649291992,
"margin_dpo/margin_std": 0.8353064060211182,
"step": 29
},
{
"epoch": 0.06282722513089005,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.31348854303359985,
"fcm_dpo/q_t": 0.49921631813049316,
"grad_norm": 28.987579345703125,
"learning_rate": 3.020833333333333e-07,
"logits/chosen": -0.5948168635368347,
"logits/rejected": -0.6118742823600769,
"logps/chosen": -252.6605987548828,
"logps/ref_chosen": -253.0491485595703,
"logps/ref_rejected": -261.30029296875,
"logps/rejected": -261.2252502441406,
"loss": 5.5328,
"margin_dpo/margin_mean": 0.3134886920452118,
"margin_dpo/margin_std": 0.956783652305603,
"step": 30
},
{
"epoch": 0.06492146596858639,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.20913538336753845,
"fcm_dpo/q_t": 0.4994771480560303,
"grad_norm": 25.035062789916992,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.6614914536476135,
"logits/rejected": -0.6970552206039429,
"logps/chosen": -247.77171325683594,
"logps/ref_chosen": -248.15301513671875,
"logps/ref_rejected": -203.17703247070312,
"logps/rejected": -203.0048828125,
"loss": 5.5369,
"margin_dpo/margin_mean": 0.20913533866405487,
"margin_dpo/margin_std": 1.0212087631225586,
"step": 31
},
{
"epoch": 0.06701570680628273,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.37815043330192566,
"fcm_dpo/q_t": 0.4990546405315399,
"grad_norm": 29.706012725830078,
"learning_rate": 3.2291666666666666e-07,
"logits/chosen": -0.6007161140441895,
"logits/rejected": -0.6042397022247314,
"logps/chosen": -304.9756774902344,
"logps/ref_chosen": -305.5399475097656,
"logps/ref_rejected": -267.6527099609375,
"logps/rejected": -267.46661376953125,
"loss": 5.5302,
"margin_dpo/margin_mean": 0.3781506419181824,
"margin_dpo/margin_std": 1.0059348344802856,
"step": 32
},
{
"epoch": 0.06910994764397906,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.351329505443573,
"fcm_dpo/q_t": 0.49912169575691223,
"grad_norm": 28.26189422607422,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -0.6345657706260681,
"logits/rejected": -0.646354615688324,
"logps/chosen": -285.6530456542969,
"logps/ref_chosen": -286.2335205078125,
"logps/ref_rejected": -255.38748168945312,
"logps/rejected": -255.1583251953125,
"loss": 5.5313,
"margin_dpo/margin_mean": 0.351329505443573,
"margin_dpo/margin_std": 1.1539372205734253,
"step": 33
},
{
"epoch": 0.0712041884816754,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7785255908966064,
"fcm_dpo/q_t": 0.49805375933647156,
"grad_norm": 31.09929847717285,
"learning_rate": 3.4375e-07,
"logits/chosen": -0.6342900395393372,
"logits/rejected": -0.6447117924690247,
"logps/chosen": -340.80950927734375,
"logps/ref_chosen": -341.5920104980469,
"logps/ref_rejected": -278.8866882324219,
"logps/rejected": -278.8827209472656,
"loss": 5.5142,
"margin_dpo/margin_mean": 0.7785259485244751,
"margin_dpo/margin_std": 1.1828100681304932,
"step": 34
},
{
"epoch": 0.07329842931937172,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.4459952712059021,
"fcm_dpo/q_t": 0.49888503551483154,
"grad_norm": 26.795753479003906,
"learning_rate": 3.541666666666667e-07,
"logits/chosen": -0.6334152221679688,
"logits/rejected": -0.6538709402084351,
"logps/chosen": -264.3992919921875,
"logps/ref_chosen": -265.0795593261719,
"logps/ref_rejected": -264.4876708984375,
"logps/rejected": -264.2533874511719,
"loss": 5.5275,
"margin_dpo/margin_mean": 0.44599461555480957,
"margin_dpo/margin_std": 1.2866384983062744,
"step": 35
},
{
"epoch": 0.07539267015706806,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6205528378486633,
"fcm_dpo/q_t": 0.4984487295150757,
"grad_norm": 32.203128814697266,
"learning_rate": 3.645833333333333e-07,
"logits/chosen": -0.5942565202713013,
"logits/rejected": -0.611389696598053,
"logps/chosen": -296.5020446777344,
"logps/ref_chosen": -297.3261413574219,
"logps/ref_rejected": -282.09515380859375,
"logps/rejected": -281.8916320800781,
"loss": 5.5206,
"margin_dpo/margin_mean": 0.6205521821975708,
"margin_dpo/margin_std": 1.4419972896575928,
"step": 36
},
{
"epoch": 0.0774869109947644,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5163920521736145,
"fcm_dpo/q_t": 0.498709112405777,
"grad_norm": 31.128677368164062,
"learning_rate": 3.75e-07,
"logits/chosen": -0.5990525484085083,
"logits/rejected": -0.6132475137710571,
"logps/chosen": -313.2706298828125,
"logps/ref_chosen": -314.0340270996094,
"logps/ref_rejected": -299.3437805175781,
"logps/rejected": -299.0968017578125,
"loss": 5.5248,
"margin_dpo/margin_mean": 0.5163909196853638,
"margin_dpo/margin_std": 1.5658156871795654,
"step": 37
},
{
"epoch": 0.07958115183246073,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7946432828903198,
"fcm_dpo/q_t": 0.49801355600357056,
"grad_norm": 28.43010139465332,
"learning_rate": 3.8541666666666665e-07,
"logits/chosen": -0.6444147229194641,
"logits/rejected": -0.6569415330886841,
"logps/chosen": -281.4555969238281,
"logps/ref_chosen": -282.54119873046875,
"logps/ref_rejected": -269.7773132324219,
"logps/rejected": -269.486328125,
"loss": 5.5137,
"margin_dpo/margin_mean": 0.7946435213088989,
"margin_dpo/margin_std": 1.5317476987838745,
"step": 38
},
{
"epoch": 0.08167539267015707,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2457653284072876,
"fcm_dpo/q_t": 0.49688607454299927,
"grad_norm": 29.478654861450195,
"learning_rate": 3.958333333333333e-07,
"logits/chosen": -0.620808482170105,
"logits/rejected": -0.6350722312927246,
"logps/chosen": -275.4786376953125,
"logps/ref_chosen": -276.7729187011719,
"logps/ref_rejected": -249.95889282226562,
"logps/rejected": -249.91033935546875,
"loss": 5.4959,
"margin_dpo/margin_mean": 1.2457654476165771,
"margin_dpo/margin_std": 1.870965838432312,
"step": 39
},
{
"epoch": 0.08376963350785341,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7840527296066284,
"fcm_dpo/q_t": 0.49804022908210754,
"grad_norm": 27.324800491333008,
"learning_rate": 4.0625e-07,
"logits/chosen": -0.6211684346199036,
"logits/rejected": -0.6565241813659668,
"logps/chosen": -283.1966857910156,
"logps/ref_chosen": -284.30706787109375,
"logps/ref_rejected": -244.4459991455078,
"logps/rejected": -244.11964416503906,
"loss": 5.5143,
"margin_dpo/margin_mean": 0.7840531468391418,
"margin_dpo/margin_std": 1.919097900390625,
"step": 40
},
{
"epoch": 0.08586387434554973,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7700332403182983,
"fcm_dpo/q_t": 0.49807509779930115,
"grad_norm": 30.37227439880371,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.6269267201423645,
"logits/rejected": -0.6523310542106628,
"logps/chosen": -292.72161865234375,
"logps/ref_chosen": -293.8151550292969,
"logps/ref_rejected": -252.16815185546875,
"logps/rejected": -251.84463500976562,
"loss": 5.5147,
"margin_dpo/margin_mean": 0.7700337171554565,
"margin_dpo/margin_std": 1.7210803031921387,
"step": 41
},
{
"epoch": 0.08795811518324607,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.9412699341773987,
"fcm_dpo/q_t": 0.49764710664749146,
"grad_norm": 27.493986129760742,
"learning_rate": 4.270833333333333e-07,
"logits/chosen": -0.6289184093475342,
"logits/rejected": -0.6464809775352478,
"logps/chosen": -251.489013671875,
"logps/ref_chosen": -252.76023864746094,
"logps/ref_rejected": -261.0414733886719,
"logps/rejected": -260.71148681640625,
"loss": 5.5082,
"margin_dpo/margin_mean": 0.9412699937820435,
"margin_dpo/margin_std": 2.1883938312530518,
"step": 42
},
{
"epoch": 0.09005235602094241,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.1826536655426025,
"fcm_dpo/q_t": 0.49704375863075256,
"grad_norm": 29.944631576538086,
"learning_rate": 4.375e-07,
"logits/chosen": -0.5914728045463562,
"logits/rejected": -0.6065964102745056,
"logps/chosen": -315.5735168457031,
"logps/ref_chosen": -316.8347473144531,
"logps/ref_rejected": -273.7649230957031,
"logps/rejected": -273.6863708496094,
"loss": 5.4986,
"margin_dpo/margin_mean": 1.182654857635498,
"margin_dpo/margin_std": 2.2558624744415283,
"step": 43
},
{
"epoch": 0.09214659685863874,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.6461647748947144,
"fcm_dpo/q_t": 0.49588537216186523,
"grad_norm": 30.859792709350586,
"learning_rate": 4.479166666666667e-07,
"logits/chosen": -0.5914509296417236,
"logits/rejected": -0.5897051692008972,
"logps/chosen": -285.2916259765625,
"logps/ref_chosen": -286.8757019042969,
"logps/ref_rejected": -282.4681396484375,
"logps/rejected": -282.5302734375,
"loss": 5.4806,
"margin_dpo/margin_mean": 1.646165370941162,
"margin_dpo/margin_std": 3.07133150100708,
"step": 44
},
{
"epoch": 0.09424083769633508,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.3455177545547485,
"fcm_dpo/q_t": 0.496637225151062,
"grad_norm": 29.004980087280273,
"learning_rate": 4.5833333333333327e-07,
"logits/chosen": -0.6958540678024292,
"logits/rejected": -0.7206953763961792,
"logps/chosen": -322.5387268066406,
"logps/ref_chosen": -324.2633972167969,
"logps/ref_rejected": -293.09466552734375,
"logps/rejected": -292.71551513671875,
"loss": 5.4923,
"margin_dpo/margin_mean": 1.345517635345459,
"margin_dpo/margin_std": 2.75714111328125,
"step": 45
},
{
"epoch": 0.09633507853403141,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.6059635877609253,
"fcm_dpo/q_t": 0.4959862530231476,
"grad_norm": 30.163619995117188,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -0.6251657605171204,
"logits/rejected": -0.6386570930480957,
"logps/chosen": -296.61322021484375,
"logps/ref_chosen": -298.3357238769531,
"logps/ref_rejected": -267.66204833984375,
"logps/rejected": -267.5455017089844,
"loss": 5.482,
"margin_dpo/margin_mean": 1.6059637069702148,
"margin_dpo/margin_std": 2.6878585815429688,
"step": 46
},
{
"epoch": 0.09842931937172775,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.0325478315353394,
"fcm_dpo/q_t": 0.4974192678928375,
"grad_norm": 26.348020553588867,
"learning_rate": 4.791666666666667e-07,
"logits/chosen": -0.6017532348632812,
"logits/rejected": -0.6222880482673645,
"logps/chosen": -261.1288146972656,
"logps/ref_chosen": -262.5669250488281,
"logps/ref_rejected": -258.70989990234375,
"logps/rejected": -258.3043212890625,
"loss": 5.5052,
"margin_dpo/margin_mean": 1.0325474739074707,
"margin_dpo/margin_std": 3.2608814239501953,
"step": 47
},
{
"epoch": 0.10052356020942409,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.5231389999389648,
"fcm_dpo/q_t": 0.4961942434310913,
"grad_norm": 27.5208797454834,
"learning_rate": 4.895833333333333e-07,
"logits/chosen": -0.6104145646095276,
"logits/rejected": -0.6352874636650085,
"logps/chosen": -267.69293212890625,
"logps/ref_chosen": -269.4932556152344,
"logps/ref_rejected": -241.888916015625,
"logps/rejected": -241.6117401123047,
"loss": 5.4855,
"margin_dpo/margin_mean": 1.5231391191482544,
"margin_dpo/margin_std": 2.9792253971099854,
"step": 48
},
{
"epoch": 0.10261780104712041,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.7325966358184814,
"fcm_dpo/q_t": 0.4956699013710022,
"grad_norm": 27.652172088623047,
"learning_rate": 5e-07,
"logits/chosen": -0.6746569871902466,
"logits/rejected": -0.658962607383728,
"logps/chosen": -255.67564392089844,
"logps/ref_chosen": -257.8844909667969,
"logps/ref_rejected": -256.8912048339844,
"logps/rejected": -256.4149475097656,
"loss": 5.4774,
"margin_dpo/margin_mean": 1.7325971126556396,
"margin_dpo/margin_std": 3.360792398452759,
"step": 49
},
{
"epoch": 0.10471204188481675,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.238340973854065,
"fcm_dpo/q_t": 0.4969059228897095,
"grad_norm": 27.846542358398438,
"learning_rate": 4.999932966293553e-07,
"logits/chosen": -0.6347872018814087,
"logits/rejected": -0.6571816205978394,
"logps/chosen": -299.55889892578125,
"logps/ref_chosen": -301.62884521484375,
"logps/ref_rejected": -298.2716064453125,
"logps/rejected": -297.44000244140625,
"loss": 5.4975,
"margin_dpo/margin_mean": 1.2383403778076172,
"margin_dpo/margin_std": 3.8234386444091797,
"step": 50
},
{
"epoch": 0.1068062827225131,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.7760026454925537,
"fcm_dpo/q_t": 0.4955626130104065,
"grad_norm": 29.151445388793945,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": -0.6137688755989075,
"logits/rejected": -0.6077885031700134,
"logps/chosen": -267.3522644042969,
"logps/ref_chosen": -269.37237548828125,
"logps/ref_rejected": -297.0167541503906,
"logps/rejected": -296.7726135253906,
"loss": 5.4768,
"margin_dpo/margin_mean": 1.7760027647018433,
"margin_dpo/margin_std": 4.458441734313965,
"step": 51
},
{
"epoch": 0.10890052356020942,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.1212289333343506,
"fcm_dpo/q_t": 0.49220389127731323,
"grad_norm": 30.20243263244629,
"learning_rate": 4.99939671821067e-07,
"logits/chosen": -0.6574031114578247,
"logits/rejected": -0.6640452146530151,
"logps/chosen": -304.0586242675781,
"logps/ref_chosen": -306.9028015136719,
"logps/ref_rejected": -281.24737548828125,
"logps/rejected": -281.5244140625,
"loss": 5.4238,
"margin_dpo/margin_mean": 3.1212282180786133,
"margin_dpo/margin_std": 4.759432315826416,
"step": 52
},
{
"epoch": 0.11099476439790576,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.1728179454803467,
"fcm_dpo/q_t": 0.49457210302352905,
"grad_norm": 30.007936477661133,
"learning_rate": 4.998927532591591e-07,
"logits/chosen": -0.657882034778595,
"logits/rejected": -0.6984093189239502,
"logps/chosen": -283.18218994140625,
"logps/ref_chosen": -285.9759521484375,
"logps/ref_rejected": -273.9073486328125,
"logps/rejected": -273.2864074707031,
"loss": 5.4617,
"margin_dpo/margin_mean": 2.172816753387451,
"margin_dpo/margin_std": 4.996975898742676,
"step": 53
},
{
"epoch": 0.1130890052356021,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.9492709636688232,
"fcm_dpo/q_t": 0.495132178068161,
"grad_norm": 26.357450485229492,
"learning_rate": 4.998324337072792e-07,
"logits/chosen": -0.6814525127410889,
"logits/rejected": -0.6885929703712463,
"logps/chosen": -303.84771728515625,
"logps/ref_chosen": -306.504638671875,
"logps/ref_rejected": -272.67431640625,
"logps/rejected": -271.9667053222656,
"loss": 5.4709,
"margin_dpo/margin_mean": 1.9492708444595337,
"margin_dpo/margin_std": 5.48816442489624,
"step": 54
},
{
"epoch": 0.11518324607329843,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.3219242095947266,
"fcm_dpo/q_t": 0.49420011043548584,
"grad_norm": 24.795543670654297,
"learning_rate": 4.997587164001815e-07,
"logits/chosen": -0.6326627135276794,
"logits/rejected": -0.6362449526786804,
"logps/chosen": -220.49732971191406,
"logps/ref_chosen": -222.33013916015625,
"logps/ref_rejected": -206.59571838378906,
"logps/rejected": -207.08482360839844,
"loss": 5.4553,
"margin_dpo/margin_mean": 2.321924924850464,
"margin_dpo/margin_std": 4.869386196136475,
"step": 55
},
{
"epoch": 0.11727748691099477,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.1078054904937744,
"fcm_dpo/q_t": 0.4922420382499695,
"grad_norm": 27.52865219116211,
"learning_rate": 4.996716052911017e-07,
"logits/chosen": -0.6062515377998352,
"logits/rejected": -0.6205031871795654,
"logps/chosen": -247.61697387695312,
"logps/ref_chosen": -250.47816467285156,
"logps/ref_rejected": -228.25848388671875,
"logps/rejected": -228.50511169433594,
"loss": 5.4261,
"margin_dpo/margin_mean": 3.1078062057495117,
"margin_dpo/margin_std": 5.900341033935547,
"step": 56
},
{
"epoch": 0.1193717277486911,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.170928955078125,
"fcm_dpo/q_t": 0.4895845651626587,
"grad_norm": 30.931716918945312,
"learning_rate": 4.99571105051544e-07,
"logits/chosen": -0.6947147846221924,
"logits/rejected": -0.6651787161827087,
"logps/chosen": -311.2119140625,
"logps/ref_chosen": -315.1195373535156,
"logps/ref_rejected": -272.755615234375,
"logps/rejected": -273.0189208984375,
"loss": 5.3835,
"margin_dpo/margin_mean": 4.170928001403809,
"margin_dpo/margin_std": 5.598484039306641,
"step": 57
},
{
"epoch": 0.12146596858638743,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.4037206172943115,
"fcm_dpo/q_t": 0.4939940869808197,
"grad_norm": 27.60306167602539,
"learning_rate": 4.994572210710314e-07,
"logits/chosen": -0.6179156303405762,
"logits/rejected": -0.6422260403633118,
"logps/chosen": -262.6658020019531,
"logps/ref_chosen": -265.1816711425781,
"logps/ref_rejected": -268.2203369140625,
"logps/rejected": -268.108154296875,
"loss": 5.4532,
"margin_dpo/margin_mean": 2.4037222862243652,
"margin_dpo/margin_std": 5.711413860321045,
"step": 58
},
{
"epoch": 0.12356020942408377,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.516843318939209,
"fcm_dpo/q_t": 0.4962143301963806,
"grad_norm": 29.968425750732422,
"learning_rate": 4.993299594568162e-07,
"logits/chosen": -0.6006144285202026,
"logits/rejected": -0.5931594371795654,
"logps/chosen": -284.288818359375,
"logps/ref_chosen": -286.35394287109375,
"logps/ref_rejected": -260.6757507324219,
"logps/rejected": -260.12744140625,
"loss": 5.4905,
"margin_dpo/margin_mean": 1.5168440341949463,
"margin_dpo/margin_std": 7.0178422927856445,
"step": 59
},
{
"epoch": 0.1256544502617801,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.354137897491455,
"fcm_dpo/q_t": 0.49162524938583374,
"grad_norm": 27.982912063598633,
"learning_rate": 4.991893270335525e-07,
"logits/chosen": -0.6835321187973022,
"logits/rejected": -0.7084705829620361,
"logps/chosen": -255.92019653320312,
"logps/ref_chosen": -258.74859619140625,
"logps/ref_rejected": -255.04893493652344,
"logps/rejected": -255.5746612548828,
"loss": 5.4176,
"margin_dpo/margin_mean": 3.354137897491455,
"margin_dpo/margin_std": 7.129515171051025,
"step": 60
},
{
"epoch": 0.12774869109947645,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.0934977531433105,
"fcm_dpo/q_t": 0.49227961897850037,
"grad_norm": 29.938720703125,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -0.649039089679718,
"logits/rejected": -0.6660676598548889,
"logps/chosen": -275.51678466796875,
"logps/ref_chosen": -278.4678955078125,
"logps/ref_rejected": -252.02720642089844,
"logps/rejected": -252.1695556640625,
"loss": 5.4291,
"margin_dpo/margin_mean": 3.0934970378875732,
"margin_dpo/margin_std": 7.676670551300049,
"step": 61
},
{
"epoch": 0.12984293193717278,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.486009120941162,
"fcm_dpo/q_t": 0.49130553007125854,
"grad_norm": 26.67005729675293,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": -0.605567216873169,
"logits/rejected": -0.6468653082847595,
"logps/chosen": -268.9039306640625,
"logps/ref_chosen": -272.92431640625,
"logps/ref_rejected": -260.7935485839844,
"logps/rejected": -260.2591552734375,
"loss": 5.4128,
"margin_dpo/margin_mean": 3.4860095977783203,
"margin_dpo/margin_std": 7.451489448547363,
"step": 62
},
{
"epoch": 0.1319371727748691,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.4414150714874268,
"fcm_dpo/q_t": 0.49141040444374084,
"grad_norm": 28.199609756469727,
"learning_rate": 4.986872839090852e-07,
"logits/chosen": -0.6584441661834717,
"logits/rejected": -0.6647105813026428,
"logps/chosen": -273.7209167480469,
"logps/ref_chosen": -277.0889892578125,
"logps/ref_rejected": -273.3413391113281,
"logps/rejected": -273.4146728515625,
"loss": 5.4143,
"margin_dpo/margin_mean": 3.4414143562316895,
"margin_dpo/margin_std": 7.199389457702637,
"step": 63
},
{
"epoch": 0.13403141361256546,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.197411060333252,
"fcm_dpo/q_t": 0.4895278811454773,
"grad_norm": 28.395610809326172,
"learning_rate": 4.9849325083059e-07,
"logits/chosen": -0.6434694528579712,
"logits/rejected": -0.6396706104278564,
"logps/chosen": -279.68328857421875,
"logps/ref_chosen": -283.8244934082031,
"logps/ref_rejected": -263.29351806640625,
"logps/rejected": -263.3497314453125,
"loss": 5.3866,
"margin_dpo/margin_mean": 4.1974101066589355,
"margin_dpo/margin_std": 8.332403182983398,
"step": 64
},
{
"epoch": 0.13612565445026178,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.187953233718872,
"fcm_dpo/q_t": 0.49204325675964355,
"grad_norm": 27.825172424316406,
"learning_rate": 4.982858918131906e-07,
"logits/chosen": -0.7001113891601562,
"logits/rejected": -0.6681698560714722,
"logps/chosen": -261.5180969238281,
"logps/ref_chosen": -264.8699645996094,
"logps/ref_rejected": -268.5076904296875,
"logps/rejected": -268.34381103515625,
"loss": 5.4245,
"margin_dpo/margin_mean": 3.1879520416259766,
"margin_dpo/margin_std": 7.417712211608887,
"step": 65
},
{
"epoch": 0.1382198952879581,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.193648338317871,
"fcm_dpo/q_t": 0.48955121636390686,
"grad_norm": 27.884878158569336,
"learning_rate": 4.980652179769217e-07,
"logits/chosen": -0.6776885986328125,
"logits/rejected": -0.6976199746131897,
"logps/chosen": -269.7621765136719,
"logps/ref_chosen": -272.9283142089844,
"logps/ref_rejected": -280.94696044921875,
"logps/rejected": -281.9744567871094,
"loss": 5.3898,
"margin_dpo/margin_mean": 4.193647861480713,
"margin_dpo/margin_std": 10.121187210083008,
"step": 66
},
{
"epoch": 0.14031413612565444,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.643204689025879,
"fcm_dpo/q_t": 0.4909108579158783,
"grad_norm": 25.530357360839844,
"learning_rate": 4.978312411558517e-07,
"logits/chosen": -0.693301260471344,
"logits/rejected": -0.7261889576911926,
"logps/chosen": -262.1764221191406,
"logps/ref_chosen": -266.18695068359375,
"logps/ref_rejected": -250.17405700683594,
"logps/rejected": -249.80673217773438,
"loss": 5.409,
"margin_dpo/margin_mean": 3.643204927444458,
"margin_dpo/margin_std": 8.773540496826172,
"step": 67
},
{
"epoch": 0.1424083769633508,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.684436798095703,
"fcm_dpo/q_t": 0.4883463978767395,
"grad_norm": 28.18437385559082,
"learning_rate": 4.975839738974473e-07,
"logits/chosen": -0.6813502311706543,
"logits/rejected": -0.694290280342102,
"logps/chosen": -294.862060546875,
"logps/ref_chosen": -297.9385986328125,
"logps/ref_rejected": -261.5141296386719,
"logps/rejected": -263.1220397949219,
"loss": 5.3719,
"margin_dpo/margin_mean": 4.684436321258545,
"margin_dpo/margin_std": 10.52728271484375,
"step": 68
},
{
"epoch": 0.14450261780104712,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.02636194229126,
"fcm_dpo/q_t": 0.48498111963272095,
"grad_norm": 28.678905487060547,
"learning_rate": 4.97323429461901e-07,
"logits/chosen": -0.6936722993850708,
"logits/rejected": -0.7256036400794983,
"logps/chosen": -261.7213134765625,
"logps/ref_chosen": -265.6175231933594,
"logps/ref_rejected": -236.8287353515625,
"logps/rejected": -238.95887756347656,
"loss": 5.3175,
"margin_dpo/margin_mean": 6.02636194229126,
"margin_dpo/margin_std": 9.588497161865234,
"step": 69
},
{
"epoch": 0.14659685863874344,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.8976898193359375,
"fcm_dpo/q_t": 0.4853127598762512,
"grad_norm": 28.633691787719727,
"learning_rate": 4.970496218214204e-07,
"logits/chosen": -0.6690410375595093,
"logits/rejected": -0.7014957666397095,
"logps/chosen": -291.8380432128906,
"logps/ref_chosen": -296.2259216308594,
"logps/ref_rejected": -254.68496704101562,
"logps/rejected": -256.1947326660156,
"loss": 5.3258,
"margin_dpo/margin_mean": 5.897688388824463,
"margin_dpo/margin_std": 10.961847305297852,
"step": 70
},
{
"epoch": 0.1486910994764398,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.169013500213623,
"fcm_dpo/q_t": 0.4871177673339844,
"grad_norm": 28.43419647216797,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": -0.6524871587753296,
"logits/rejected": -0.6415574550628662,
"logps/chosen": -283.871826171875,
"logps/ref_chosen": -288.92724609375,
"logps/ref_rejected": -278.6405334472656,
"logps/rejected": -278.754150390625,
"loss": 5.358,
"margin_dpo/margin_mean": 5.169013500213623,
"margin_dpo/margin_std": 12.44704532623291,
"step": 71
},
{
"epoch": 0.15078534031413612,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.125137805938721,
"fcm_dpo/q_t": 0.48721808195114136,
"grad_norm": 28.26305389404297,
"learning_rate": 4.964622763700252e-07,
"logits/chosen": -0.6996564269065857,
"logits/rejected": -0.7112348079681396,
"logps/chosen": -233.74087524414062,
"logps/ref_chosen": -237.0452880859375,
"logps/ref_rejected": -252.7946319580078,
"logps/rejected": -254.6153564453125,
"loss": 5.3553,
"margin_dpo/margin_mean": 5.125139236450195,
"margin_dpo/margin_std": 10.407864570617676,
"step": 72
},
{
"epoch": 0.15287958115183245,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.746781349182129,
"fcm_dpo/q_t": 0.4881611168384552,
"grad_norm": 27.88288688659668,
"learning_rate": 4.961487700566646e-07,
"logits/chosen": -0.6515335440635681,
"logits/rejected": -0.6686420440673828,
"logps/chosen": -268.7247314453125,
"logps/ref_chosen": -273.0531005859375,
"logps/ref_rejected": -246.8330841064453,
"logps/rejected": -247.25152587890625,
"loss": 5.374,
"margin_dpo/margin_mean": 4.746780872344971,
"margin_dpo/margin_std": 12.301660537719727,
"step": 73
},
{
"epoch": 0.1549738219895288,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.388714790344238,
"fcm_dpo/q_t": 0.48907017707824707,
"grad_norm": 30.373809814453125,
"learning_rate": 4.958220635317885e-07,
"logits/chosen": -0.7225594520568848,
"logits/rejected": -0.700709342956543,
"logps/chosen": -338.90679931640625,
"logps/ref_chosen": -342.2818908691406,
"logps/ref_rejected": -330.0293884277344,
"logps/rejected": -331.0429382324219,
"loss": 5.3862,
"margin_dpo/margin_mean": 4.388715744018555,
"margin_dpo/margin_std": 11.592477798461914,
"step": 74
},
{
"epoch": 0.15706806282722513,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.691708087921143,
"fcm_dpo/q_t": 0.483328640460968,
"grad_norm": 29.403121948242188,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": -0.6306406259536743,
"logits/rejected": -0.6319374442100525,
"logps/chosen": -262.20196533203125,
"logps/ref_chosen": -266.8641662597656,
"logps/ref_rejected": -276.8699951171875,
"logps/rejected": -278.8995361328125,
"loss": 5.2955,
"margin_dpo/margin_mean": 6.691707611083984,
"margin_dpo/margin_std": 11.022812843322754,
"step": 75
},
{
"epoch": 0.15916230366492146,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.202816486358643,
"fcm_dpo/q_t": 0.482099711894989,
"grad_norm": 29.140716552734375,
"learning_rate": 4.951291206355559e-07,
"logits/chosen": -0.7213584780693054,
"logits/rejected": -0.7292888760566711,
"logps/chosen": -277.04583740234375,
"logps/ref_chosen": -281.174560546875,
"logps/ref_rejected": -263.6067199707031,
"logps/rejected": -266.6807556152344,
"loss": 5.2799,
"margin_dpo/margin_mean": 7.202816486358643,
"margin_dpo/margin_std": 12.760961532592773,
"step": 76
},
{
"epoch": 0.1612565445026178,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.7732439041137695,
"fcm_dpo/q_t": 0.4856662452220917,
"grad_norm": 33.32221603393555,
"learning_rate": 4.947629214246236e-07,
"logits/chosen": -0.56251460313797,
"logits/rejected": -0.5714588165283203,
"logps/chosen": -302.36383056640625,
"logps/ref_chosen": -306.09527587890625,
"logps/ref_rejected": -253.49569702148438,
"logps/rejected": -255.5375213623047,
"loss": 5.3388,
"margin_dpo/margin_mean": 5.773242950439453,
"margin_dpo/margin_std": 14.010757446289062,
"step": 77
},
{
"epoch": 0.16335078534031414,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.770861625671387,
"fcm_dpo/q_t": 0.4782388508319855,
"grad_norm": 29.759794235229492,
"learning_rate": 4.943835963210323e-07,
"logits/chosen": -0.6812779903411865,
"logits/rejected": -0.6756828427314758,
"logps/chosen": -253.0252685546875,
"logps/ref_chosen": -256.90234375,
"logps/ref_rejected": -211.57154846191406,
"logps/rejected": -216.4653778076172,
"loss": 5.2239,
"margin_dpo/margin_mean": 8.77086067199707,
"margin_dpo/margin_std": 14.511454582214355,
"step": 78
},
{
"epoch": 0.16544502617801046,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.177414894104004,
"fcm_dpo/q_t": 0.4797084331512451,
"grad_norm": 29.77664566040039,
"learning_rate": 4.939911656668361e-07,
"logits/chosen": -0.6469649076461792,
"logits/rejected": -0.6670407652854919,
"logps/chosen": -263.2286376953125,
"logps/ref_chosen": -266.2735595703125,
"logps/ref_rejected": -251.57257080078125,
"logps/rejected": -256.705078125,
"loss": 5.2488,
"margin_dpo/margin_mean": 8.177413940429688,
"margin_dpo/margin_std": 14.974796295166016,
"step": 79
},
{
"epoch": 0.16753926701570682,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.5798444747924805,
"fcm_dpo/q_t": 0.4836696982383728,
"grad_norm": 28.978797912597656,
"learning_rate": 4.935856505068998e-07,
"logits/chosen": -0.6657726168632507,
"logits/rejected": -0.6958042979240417,
"logps/chosen": -286.041259765625,
"logps/ref_chosen": -287.8509826660156,
"logps/ref_rejected": -256.0766296386719,
"logps/rejected": -260.84674072265625,
"loss": 5.3059,
"margin_dpo/margin_mean": 6.579843997955322,
"margin_dpo/margin_std": 13.124878883361816,
"step": 80
},
{
"epoch": 0.16963350785340314,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.183926105499268,
"fcm_dpo/q_t": 0.4821889102458954,
"grad_norm": 28.12186622619629,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": -0.6388418674468994,
"logits/rejected": -0.637535810470581,
"logps/chosen": -266.1118469238281,
"logps/ref_chosen": -268.5232238769531,
"logps/ref_rejected": -237.81137084960938,
"logps/rejected": -242.58392333984375,
"loss": 5.2927,
"margin_dpo/margin_mean": 7.1839280128479,
"margin_dpo/margin_std": 16.76314926147461,
"step": 81
},
{
"epoch": 0.17172774869109947,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.849641799926758,
"fcm_dpo/q_t": 0.4805837869644165,
"grad_norm": 27.822717666625977,
"learning_rate": 4.92735454356513e-07,
"logits/chosen": -0.7268111705780029,
"logits/rejected": -0.7341311573982239,
"logps/chosen": -276.9981994628906,
"logps/ref_chosen": -279.36395263671875,
"logps/ref_rejected": -236.51365661621094,
"logps/rejected": -241.99754333496094,
"loss": 5.2655,
"margin_dpo/margin_mean": 7.849642276763916,
"margin_dpo/margin_std": 15.717304229736328,
"step": 82
},
{
"epoch": 0.17382198952879582,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.690813064575195,
"fcm_dpo/q_t": 0.4784224033355713,
"grad_norm": 30.746620178222656,
"learning_rate": 4.922908189595017e-07,
"logits/chosen": -0.6818782687187195,
"logits/rejected": -0.6649395823478699,
"logps/chosen": -273.88427734375,
"logps/ref_chosen": -274.21923828125,
"logps/ref_rejected": -276.2212219238281,
"logps/rejected": -284.57708740234375,
"loss": 5.2419,
"margin_dpo/margin_mean": 8.690812110900879,
"margin_dpo/margin_std": 18.241342544555664,
"step": 83
},
{
"epoch": 0.17591623036649215,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.327735424041748,
"fcm_dpo/q_t": 0.48182323575019836,
"grad_norm": 29.881669998168945,
"learning_rate": 4.918331902411841e-07,
"logits/chosen": -0.7337232232093811,
"logits/rejected": -0.7480797171592712,
"logps/chosen": -293.81634521484375,
"logps/ref_chosen": -294.3975524902344,
"logps/ref_rejected": -279.81884765625,
"logps/rejected": -286.56536865234375,
"loss": 5.2885,
"margin_dpo/margin_mean": 7.327735900878906,
"margin_dpo/margin_std": 16.83694839477539,
"step": 84
},
{
"epoch": 0.17801047120418848,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.688338279724121,
"fcm_dpo/q_t": 0.48588407039642334,
"grad_norm": 29.326702117919922,
"learning_rate": 4.913625927427995e-07,
"logits/chosen": -0.6618837118148804,
"logits/rejected": -0.6701700687408447,
"logps/chosen": -245.18934631347656,
"logps/ref_chosen": -243.66220092773438,
"logps/ref_rejected": -263.9421691894531,
"logps/rejected": -271.15765380859375,
"loss": 5.3478,
"margin_dpo/margin_mean": 5.688338279724121,
"margin_dpo/margin_std": 15.453804016113281,
"step": 85
},
{
"epoch": 0.18010471204188483,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.833646774291992,
"fcm_dpo/q_t": 0.4780700206756592,
"grad_norm": 34.98909378051758,
"learning_rate": 4.908790517010636e-07,
"logits/chosen": -0.6862183809280396,
"logits/rejected": -0.6790927052497864,
"logps/chosen": -308.2589111328125,
"logps/ref_chosen": -309.4306945800781,
"logps/ref_rejected": -290.91278076171875,
"logps/rejected": -298.57464599609375,
"loss": 5.2316,
"margin_dpo/margin_mean": 8.833646774291992,
"margin_dpo/margin_std": 17.44679832458496,
"step": 86
},
{
"epoch": 0.18219895287958116,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.396361351013184,
"fcm_dpo/q_t": 0.4743334650993347,
"grad_norm": 29.610158920288086,
"learning_rate": 4.903825930468148e-07,
"logits/chosen": -0.7608101963996887,
"logits/rejected": -0.7554056644439697,
"logps/chosen": -278.097412109375,
"logps/ref_chosen": -278.0277099609375,
"logps/ref_rejected": -245.70123291015625,
"logps/rejected": -256.167236328125,
"loss": 5.1792,
"margin_dpo/margin_mean": 10.396361351013184,
"margin_dpo/margin_std": 19.02577018737793,
"step": 87
},
{
"epoch": 0.18429319371727748,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.898334503173828,
"fcm_dpo/q_t": 0.47800639271736145,
"grad_norm": 28.828121185302734,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": -0.7627401351928711,
"logits/rejected": -0.7784541845321655,
"logps/chosen": -268.6249694824219,
"logps/ref_chosen": -266.5148010253906,
"logps/ref_rejected": -265.90081787109375,
"logps/rejected": -276.9093017578125,
"loss": 5.2352,
"margin_dpo/margin_mean": 8.898333549499512,
"margin_dpo/margin_std": 19.069538116455078,
"step": 88
},
{
"epoch": 0.18638743455497384,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.238783836364746,
"fcm_dpo/q_t": 0.4771183729171753,
"grad_norm": 30.569440841674805,
"learning_rate": 4.893510300863676e-07,
"logits/chosen": -0.742774486541748,
"logits/rejected": -0.733336329460144,
"logps/chosen": -265.68560791015625,
"logps/ref_chosen": -265.6893005371094,
"logps/ref_rejected": -251.49314880371094,
"logps/rejected": -260.728271484375,
"loss": 5.2215,
"margin_dpo/margin_mean": 9.238783836364746,
"margin_dpo/margin_std": 18.268709182739258,
"step": 89
},
{
"epoch": 0.18848167539267016,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.208330154418945,
"fcm_dpo/q_t": 0.47965091466903687,
"grad_norm": 29.82548713684082,
"learning_rate": 4.8881598109976e-07,
"logits/chosen": -0.7374743819236755,
"logits/rejected": -0.7458919286727905,
"logps/chosen": -308.52386474609375,
"logps/ref_chosen": -307.4250183105469,
"logps/ref_rejected": -265.7172546386719,
"logps/rejected": -275.0244140625,
"loss": 5.2577,
"margin_dpo/margin_mean": 8.208331108093262,
"margin_dpo/margin_std": 17.711767196655273,
"step": 90
},
{
"epoch": 0.1905759162303665,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.566421508789062,
"fcm_dpo/q_t": 0.4764086604118347,
"grad_norm": 32.80574417114258,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -0.6797096729278564,
"logits/rejected": -0.6969512104988098,
"logps/chosen": -237.80616760253906,
"logps/ref_chosen": -235.74098205566406,
"logps/ref_rejected": -226.6428985595703,
"logps/rejected": -238.27447509765625,
"loss": 5.214,
"margin_dpo/margin_mean": 9.566422462463379,
"margin_dpo/margin_std": 19.243335723876953,
"step": 91
},
{
"epoch": 0.19267015706806281,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.109006881713867,
"fcm_dpo/q_t": 0.47501635551452637,
"grad_norm": 33.64514923095703,
"learning_rate": 4.877074915775048e-07,
"logits/chosen": -0.7398884892463684,
"logits/rejected": -0.7234249711036682,
"logps/chosen": -286.53607177734375,
"logps/ref_chosen": -283.4475402832031,
"logps/ref_rejected": -273.134033203125,
"logps/rejected": -286.33154296875,
"loss": 5.1988,
"margin_dpo/margin_mean": 10.109006881713867,
"margin_dpo/margin_std": 21.27271842956543,
"step": 92
},
{
"epoch": 0.19476439790575917,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.553420066833496,
"fcm_dpo/q_t": 0.47643160820007324,
"grad_norm": 28.544116973876953,
"learning_rate": 4.871341104867864e-07,
"logits/chosen": -0.7257264256477356,
"logits/rejected": -0.7486892342567444,
"logps/chosen": -235.73709106445312,
"logps/ref_chosen": -233.33714294433594,
"logps/ref_rejected": -230.54273986816406,
"logps/rejected": -242.49607849121094,
"loss": 5.2133,
"margin_dpo/margin_mean": 9.553420066833496,
"margin_dpo/margin_std": 19.524932861328125,
"step": 93
},
{
"epoch": 0.1968586387434555,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.401151657104492,
"fcm_dpo/q_t": 0.4767385721206665,
"grad_norm": 31.77324867248535,
"learning_rate": 4.865480126133871e-07,
"logits/chosen": -0.6870794296264648,
"logits/rejected": -0.708354651927948,
"logps/chosen": -297.0319519042969,
"logps/ref_chosen": -294.6528015136719,
"logps/ref_rejected": -283.657958984375,
"logps/rejected": -295.43829345703125,
"loss": 5.2293,
"margin_dpo/margin_mean": 9.401151657104492,
"margin_dpo/margin_std": 21.88288116455078,
"step": 94
},
{
"epoch": 0.19895287958115182,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.18586254119873,
"fcm_dpo/q_t": 0.47492167353630066,
"grad_norm": 33.443687438964844,
"learning_rate": 4.859492293879573e-07,
"logits/chosen": -0.7171642184257507,
"logits/rejected": -0.7374083399772644,
"logps/chosen": -315.00238037109375,
"logps/ref_chosen": -311.6697082519531,
"logps/ref_rejected": -262.7471923828125,
"logps/rejected": -276.2657775878906,
"loss": 5.2048,
"margin_dpo/margin_mean": 10.185861587524414,
"margin_dpo/margin_std": 22.405195236206055,
"step": 95
},
{
"epoch": 0.20104712041884817,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.209500312805176,
"fcm_dpo/q_t": 0.4748857915401459,
"grad_norm": 32.14888381958008,
"learning_rate": 4.853377929214243e-07,
"logits/chosen": -0.7045935392379761,
"logits/rejected": -0.7170954942703247,
"logps/chosen": -287.2989807128906,
"logps/ref_chosen": -282.55596923828125,
"logps/ref_rejected": -242.71588134765625,
"logps/rejected": -257.6683654785156,
"loss": 5.2063,
"margin_dpo/margin_mean": 10.209501266479492,
"margin_dpo/margin_std": 23.4683837890625,
"step": 96
},
{
"epoch": 0.2031413612565445,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.014219284057617,
"fcm_dpo/q_t": 0.470403254032135,
"grad_norm": 32.0782470703125,
"learning_rate": 4.847137360032699e-07,
"logits/chosen": -0.7504763007164001,
"logits/rejected": -0.7378920316696167,
"logps/chosen": -308.1119384765625,
"logps/ref_chosen": -303.57781982421875,
"logps/ref_rejected": -264.22491455078125,
"logps/rejected": -280.7732849121094,
"loss": 5.1316,
"margin_dpo/margin_mean": 12.014220237731934,
"margin_dpo/margin_std": 22.27433967590332,
"step": 97
},
{
"epoch": 0.20523560209424083,
"fcm_dpo/beta": 0.010397407226264477,
"fcm_dpo/delta": 0.08688867092132568,
"fcm_dpo/margin": 12.346640586853027,
"fcm_dpo/q_t": 0.46929067373275757,
"grad_norm": 35.79840087890625,
"learning_rate": 4.84077092099773e-07,
"logits/chosen": -0.7676523923873901,
"logits/rejected": -0.7795037031173706,
"logps/chosen": -291.7115173339844,
"logps/ref_chosen": -286.8303527832031,
"logps/ref_rejected": -278.08331298828125,
"logps/rejected": -295.31109619140625,
"loss": 5.1205,
"margin_dpo/margin_mean": 12.346640586853027,
"margin_dpo/margin_std": 22.685035705566406,
"step": 98
},
{
"epoch": 0.20732984293193718,
"fcm_dpo/beta": 0.011020062491297722,
"fcm_dpo/delta": 0.0930284708738327,
"fcm_dpo/margin": 12.543420791625977,
"fcm_dpo/q_t": 0.4665352702140808,
"grad_norm": 35.79397201538086,
"learning_rate": 4.834278953522137e-07,
"logits/chosen": -0.734366774559021,
"logits/rejected": -0.7471228837966919,
"logps/chosen": -285.3235168457031,
"logps/ref_chosen": -279.92120361328125,
"logps/ref_rejected": -250.3365478515625,
"logps/rejected": -268.2822570800781,
"loss": 5.1041,
"margin_dpo/margin_mean": 12.54341983795166,
"margin_dpo/margin_std": 27.157373428344727,
"step": 99
},
{
"epoch": 0.2094240837696335,
"fcm_dpo/beta": 0.012127361260354519,
"fcm_dpo/delta": 0.08171184360980988,
"fcm_dpo/margin": 12.563081741333008,
"fcm_dpo/q_t": 0.4635997712612152,
"grad_norm": 41.947509765625,
"learning_rate": 4.827661805750437e-07,
"logits/chosen": -0.778613269329071,
"logits/rejected": -0.7914372086524963,
"logps/chosen": -304.66107177734375,
"logps/ref_chosen": -296.8276672363281,
"logps/ref_rejected": -275.56146240234375,
"logps/rejected": -295.9578857421875,
"loss": 5.0543,
"margin_dpo/margin_mean": 12.563082695007324,
"margin_dpo/margin_std": 24.145723342895508,
"step": 100
},
{
"epoch": 0.21151832460732983,
"fcm_dpo/beta": 0.012680807150900364,
"fcm_dpo/delta": 0.07358981668949127,
"fcm_dpo/margin": 14.978999137878418,
"fcm_dpo/q_t": 0.45468670129776,
"grad_norm": 40.07128143310547,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": -0.763048529624939,
"logits/rejected": -0.7765510678291321,
"logps/chosen": -257.9189758300781,
"logps/ref_chosen": -252.74203491210938,
"logps/ref_rejected": -276.4185485839844,
"logps/rejected": -296.57452392578125,
"loss": 4.9422,
"margin_dpo/margin_mean": 14.978999137878418,
"margin_dpo/margin_std": 26.411649703979492,
"step": 101
},
{
"epoch": 0.2136125654450262,
"fcm_dpo/beta": 0.014082551002502441,
"fcm_dpo/delta": 0.15481697022914886,
"fcm_dpo/margin": 14.866358757019043,
"fcm_dpo/q_t": 0.44989722967147827,
"grad_norm": 43.43537139892578,
"learning_rate": 4.814053395442932e-07,
"logits/chosen": -0.7522528767585754,
"logits/rejected": -0.7483828067779541,
"logps/chosen": -224.5865020751953,
"logps/ref_chosen": -219.5537109375,
"logps/ref_rejected": -231.90853881835938,
"logps/rejected": -251.80767822265625,
"loss": 4.8822,
"margin_dpo/margin_mean": 14.866357803344727,
"margin_dpo/margin_std": 24.949522018432617,
"step": 102
},
{
"epoch": 0.2157068062827225,
"fcm_dpo/beta": 0.0163778867572546,
"fcm_dpo/delta": 0.15776585042476654,
"fcm_dpo/margin": 13.612730026245117,
"fcm_dpo/q_t": 0.44889208674430847,
"grad_norm": 51.776180267333984,
"learning_rate": 4.807062862684873e-07,
"logits/chosen": -0.7638022899627686,
"logits/rejected": -0.7605728507041931,
"logps/chosen": -264.34832763671875,
"logps/ref_chosen": -259.6750793457031,
"logps/ref_rejected": -278.7400817871094,
"logps/rejected": -297.0260925292969,
"loss": 4.8895,
"margin_dpo/margin_mean": 13.612730026245117,
"margin_dpo/margin_std": 25.700881958007812,
"step": 103
},
{
"epoch": 0.21780104712041884,
"fcm_dpo/beta": 0.01778705231845379,
"fcm_dpo/delta": 0.1246490478515625,
"fcm_dpo/margin": 10.009342193603516,
"fcm_dpo/q_t": 0.4585469961166382,
"grad_norm": 57.93727493286133,
"learning_rate": 4.799948609147061e-07,
"logits/chosen": -0.767953634262085,
"logits/rejected": -0.7740171551704407,
"logps/chosen": -277.0461730957031,
"logps/ref_chosen": -267.9741516113281,
"logps/ref_rejected": -230.5306396484375,
"logps/rejected": -249.61195373535156,
"loss": 5.0799,
"margin_dpo/margin_mean": 10.009342193603516,
"margin_dpo/margin_std": 26.243793487548828,
"step": 104
},
{
"epoch": 0.2198952879581152,
"fcm_dpo/beta": 0.019210072234272957,
"fcm_dpo/delta": 0.10028429329395294,
"fcm_dpo/margin": 20.508113861083984,
"fcm_dpo/q_t": 0.40895533561706543,
"grad_norm": 61.65319061279297,
"learning_rate": 4.792711016345321e-07,
"logits/chosen": -0.7570338845252991,
"logits/rejected": -0.7671861052513123,
"logps/chosen": -327.42303466796875,
"logps/ref_chosen": -322.25482177734375,
"logps/ref_rejected": -279.02978515625,
"logps/rejected": -304.7060852050781,
"loss": 4.4105,
"margin_dpo/margin_mean": 20.508113861083984,
"margin_dpo/margin_std": 26.88151741027832,
"step": 105
},
{
"epoch": 0.22198952879581152,
"fcm_dpo/beta": 0.021580977365374565,
"fcm_dpo/delta": 0.1117968037724495,
"fcm_dpo/margin": 12.482087135314941,
"fcm_dpo/q_t": 0.4391931891441345,
"grad_norm": 78.94829559326172,
"learning_rate": 4.785350472409791e-07,
"logits/chosen": -0.74703049659729,
"logits/rejected": -0.784168004989624,
"logps/chosen": -308.35302734375,
"logps/ref_chosen": -296.15777587890625,
"logps/ref_rejected": -266.2691650390625,
"logps/rejected": -290.9465026855469,
"loss": 4.9393,
"margin_dpo/margin_mean": 12.482088088989258,
"margin_dpo/margin_std": 29.17128562927246,
"step": 106
},
{
"epoch": 0.22408376963350785,
"fcm_dpo/beta": 0.023654192686080933,
"fcm_dpo/delta": 0.14638309180736542,
"fcm_dpo/margin": 19.429903030395508,
"fcm_dpo/q_t": 0.3981781005859375,
"grad_norm": 77.15711975097656,
"learning_rate": 4.777867372064105e-07,
"logits/chosen": -0.7866736054420471,
"logits/rejected": -0.7802896499633789,
"logps/chosen": -310.7807312011719,
"logps/ref_chosen": -306.996337890625,
"logps/ref_rejected": -296.79412841796875,
"logps/rejected": -320.0083923339844,
"loss": 4.3175,
"margin_dpo/margin_mean": 19.429903030395508,
"margin_dpo/margin_std": 27.29071044921875,
"step": 107
},
{
"epoch": 0.2261780104712042,
"fcm_dpo/beta": 0.02535373345017433,
"fcm_dpo/delta": 0.09316066652536392,
"fcm_dpo/margin": 18.04349136352539,
"fcm_dpo/q_t": 0.40335312485694885,
"grad_norm": 286.72467041015625,
"learning_rate": 4.770262116604223e-07,
"logits/chosen": -0.7606772780418396,
"logits/rejected": -0.7718777656555176,
"logps/chosen": -299.8807067871094,
"logps/ref_chosen": -295.1526794433594,
"logps/ref_rejected": -235.974853515625,
"logps/rejected": -258.74639892578125,
"loss": 4.4948,
"margin_dpo/margin_mean": 18.043493270874023,
"margin_dpo/margin_std": 29.69964599609375,
"step": 108
},
{
"epoch": 0.22827225130890053,
"fcm_dpo/beta": 0.02665630169212818,
"fcm_dpo/delta": 0.009390286169946194,
"fcm_dpo/margin": 19.90768814086914,
"fcm_dpo/q_t": 0.38985177874565125,
"grad_norm": 89.05216979980469,
"learning_rate": 4.7625351138769166e-07,
"logits/chosen": -0.7884357571601868,
"logits/rejected": -0.785269558429718,
"logps/chosen": -333.0575256347656,
"logps/ref_chosen": -325.9248046875,
"logps/ref_rejected": -279.15423583984375,
"logps/rejected": -306.19464111328125,
"loss": 4.3126,
"margin_dpo/margin_mean": 19.907691955566406,
"margin_dpo/margin_std": 30.949382781982422,
"step": 109
},
{
"epoch": 0.23036649214659685,
"fcm_dpo/beta": 0.028177602216601372,
"fcm_dpo/delta": 0.07937376946210861,
"fcm_dpo/margin": 18.563549041748047,
"fcm_dpo/q_t": 0.39298582077026367,
"grad_norm": 85.68260192871094,
"learning_rate": 4.75468677825789e-07,
"logits/chosen": -0.797019362449646,
"logits/rejected": -0.785659670829773,
"logps/chosen": -281.6994934082031,
"logps/ref_chosen": -274.439208984375,
"logps/ref_rejected": -260.0552062988281,
"logps/rejected": -285.87908935546875,
"loss": 4.4857,
"margin_dpo/margin_mean": 18.563547134399414,
"margin_dpo/margin_std": 32.792720794677734,
"step": 110
},
{
"epoch": 0.2324607329842932,
"fcm_dpo/beta": 0.029292024672031403,
"fcm_dpo/delta": 0.026741422712802887,
"fcm_dpo/margin": 19.533769607543945,
"fcm_dpo/q_t": 0.3849615156650543,
"grad_norm": 93.73371887207031,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": -0.8400822877883911,
"logits/rejected": -0.8202126622200012,
"logps/chosen": -336.7535705566406,
"logps/ref_chosen": -329.2361755371094,
"logps/ref_rejected": -287.82830810546875,
"logps/rejected": -314.87945556640625,
"loss": 4.4179,
"margin_dpo/margin_mean": 19.533769607543945,
"margin_dpo/margin_std": 32.756980895996094,
"step": 111
},
{
"epoch": 0.23455497382198953,
"fcm_dpo/beta": 0.028662927448749542,
"fcm_dpo/delta": -0.024072205647826195,
"fcm_dpo/margin": 12.431413650512695,
"fcm_dpo/q_t": 0.4280843138694763,
"grad_norm": 117.88312530517578,
"learning_rate": 4.7386277983585053e-07,
"logits/chosen": -0.7474071979522705,
"logits/rejected": -0.7788360714912415,
"logps/chosen": -269.38671875,
"logps/ref_chosen": -257.0593566894531,
"logps/ref_rejected": -272.9595031738281,
"logps/rejected": -297.7182922363281,
"loss": 5.1908,
"margin_dpo/margin_mean": 12.431414604187012,
"margin_dpo/margin_std": 33.55975341796875,
"step": 112
},
{
"epoch": 0.23664921465968586,
"fcm_dpo/beta": 0.02724049799144268,
"fcm_dpo/delta": -0.033632293343544006,
"fcm_dpo/margin": 23.016876220703125,
"fcm_dpo/q_t": 0.3752860426902771,
"grad_norm": 88.89751434326172,
"learning_rate": 4.7304180152725024e-07,
"logits/chosen": -0.7992004156112671,
"logits/rejected": -0.8044995069503784,
"logps/chosen": -295.09869384765625,
"logps/ref_chosen": -286.0416564941406,
"logps/ref_rejected": -270.374267578125,
"logps/rejected": -302.4481506347656,
"loss": 4.276,
"margin_dpo/margin_mean": 23.016876220703125,
"margin_dpo/margin_std": 36.376365661621094,
"step": 113
},
{
"epoch": 0.2387434554973822,
"fcm_dpo/beta": 0.029494168236851692,
"fcm_dpo/delta": 0.07874082773923874,
"fcm_dpo/margin": 12.47346019744873,
"fcm_dpo/q_t": 0.4217187762260437,
"grad_norm": 107.33486938476562,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": -0.8364426493644714,
"logits/rejected": -0.8387445211410522,
"logps/chosen": -271.4934387207031,
"logps/ref_chosen": -260.0084533691406,
"logps/ref_rejected": -246.67190551757812,
"logps/rejected": -270.6303405761719,
"loss": 5.0381,
"margin_dpo/margin_mean": 12.473462104797363,
"margin_dpo/margin_std": 31.346172332763672,
"step": 114
},
{
"epoch": 0.24083769633507854,
"fcm_dpo/beta": 0.0292170662432909,
"fcm_dpo/delta": 0.01833523064851761,
"fcm_dpo/margin": 12.282448768615723,
"fcm_dpo/q_t": 0.43316584825515747,
"grad_norm": 120.01754760742188,
"learning_rate": 4.7136400641330245e-07,
"logits/chosen": -0.8408206701278687,
"logits/rejected": -0.8037774562835693,
"logps/chosen": -310.68414306640625,
"logps/ref_chosen": -299.4229736328125,
"logps/ref_rejected": -272.1186828613281,
"logps/rejected": -295.662353515625,
"loss": 5.1455,
"margin_dpo/margin_mean": 12.28244686126709,
"margin_dpo/margin_std": 34.40483856201172,
"step": 115
},
{
"epoch": 0.24293193717277486,
"fcm_dpo/beta": 0.031351905316114426,
"fcm_dpo/delta": 0.08501939475536346,
"fcm_dpo/margin": 16.533979415893555,
"fcm_dpo/q_t": 0.3935103118419647,
"grad_norm": 97.82234191894531,
"learning_rate": 4.70507279583015e-07,
"logits/chosen": -0.8445395827293396,
"logits/rejected": -0.8092914819717407,
"logps/chosen": -285.07208251953125,
"logps/ref_chosen": -279.263916015625,
"logps/ref_rejected": -253.6192169189453,
"logps/rejected": -275.9613342285156,
"loss": 4.5439,
"margin_dpo/margin_mean": 16.533979415893555,
"margin_dpo/margin_std": 30.13557243347168,
"step": 116
},
{
"epoch": 0.2450261780104712,
"fcm_dpo/beta": 0.03163749352097511,
"fcm_dpo/delta": -0.10516245663166046,
"fcm_dpo/margin": 17.771800994873047,
"fcm_dpo/q_t": 0.38749605417251587,
"grad_norm": 112.60164642333984,
"learning_rate": 4.6963872761652834e-07,
"logits/chosen": -0.8044043779373169,
"logits/rejected": -0.8110002875328064,
"logps/chosen": -266.38836669921875,
"logps/ref_chosen": -259.2248840332031,
"logps/ref_rejected": -229.3042755126953,
"logps/rejected": -254.23956298828125,
"loss": 4.4661,
"margin_dpo/margin_mean": 17.771800994873047,
"margin_dpo/margin_std": 28.43597412109375,
"step": 117
},
{
"epoch": 0.24712041884816754,
"fcm_dpo/beta": 0.029554441571235657,
"fcm_dpo/delta": -0.03656444326043129,
"fcm_dpo/margin": 19.763996124267578,
"fcm_dpo/q_t": 0.3844214081764221,
"grad_norm": 114.48808288574219,
"learning_rate": 4.687583970916486e-07,
"logits/chosen": -0.7859967947006226,
"logits/rejected": -0.7776579856872559,
"logps/chosen": -277.0380859375,
"logps/ref_chosen": -267.0707092285156,
"logps/ref_rejected": -272.7322082519531,
"logps/rejected": -302.46356201171875,
"loss": 4.481,
"margin_dpo/margin_mean": 19.763996124267578,
"margin_dpo/margin_std": 34.8245964050293,
"step": 118
},
{
"epoch": 0.24921465968586387,
"fcm_dpo/beta": 0.029193801805377007,
"fcm_dpo/delta": -0.002929478883743286,
"fcm_dpo/margin": 15.792888641357422,
"fcm_dpo/q_t": 0.40991657972335815,
"grad_norm": 117.3210678100586,
"learning_rate": 4.6786633521783005e-07,
"logits/chosen": -0.8547701835632324,
"logits/rejected": -0.8576165437698364,
"logps/chosen": -336.96929931640625,
"logps/ref_chosen": -324.6766357421875,
"logps/ref_rejected": -306.0322265625,
"logps/rejected": -334.1177673339844,
"loss": 4.8945,
"margin_dpo/margin_mean": 15.792887687683105,
"margin_dpo/margin_std": 34.564945220947266,
"step": 119
},
{
"epoch": 0.2513089005235602,
"fcm_dpo/beta": 0.029505960643291473,
"fcm_dpo/delta": 0.015900129452347755,
"fcm_dpo/margin": 15.261905670166016,
"fcm_dpo/q_t": 0.4121847450733185,
"grad_norm": 98.33564758300781,
"learning_rate": 4.669625898336438e-07,
"logits/chosen": -0.8068041801452637,
"logits/rejected": -0.8266972303390503,
"logps/chosen": -325.0469665527344,
"logps/ref_chosen": -315.2617492675781,
"logps/ref_rejected": -265.32501220703125,
"logps/rejected": -290.37213134765625,
"loss": 4.8972,
"margin_dpo/margin_mean": 15.261905670166016,
"margin_dpo/margin_std": 33.490478515625,
"step": 120
},
{
"epoch": 0.2534031413612565,
"fcm_dpo/beta": 0.031028514727950096,
"fcm_dpo/delta": 0.14798876643180847,
"fcm_dpo/margin": 12.815652847290039,
"fcm_dpo/q_t": 0.4254152476787567,
"grad_norm": 112.03368377685547,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -0.8176071047782898,
"logits/rejected": -0.834455668926239,
"logps/chosen": -236.07276916503906,
"logps/ref_chosen": -222.99609375,
"logps/ref_rejected": -226.92860412597656,
"logps/rejected": -252.82089233398438,
"loss": 5.0135,
"margin_dpo/margin_mean": 12.815652847290039,
"margin_dpo/margin_std": 32.47069549560547,
"step": 121
},
{
"epoch": 0.2554973821989529,
"fcm_dpo/beta": 0.031968794763088226,
"fcm_dpo/delta": -0.020312972366809845,
"fcm_dpo/margin": 15.189802169799805,
"fcm_dpo/q_t": 0.40527546405792236,
"grad_norm": 117.55231475830078,
"learning_rate": 4.651202430186092e-07,
"logits/chosen": -0.8868773579597473,
"logits/rejected": -0.8504554033279419,
"logps/chosen": -288.91522216796875,
"logps/ref_chosen": -276.02630615234375,
"logps/ref_rejected": -277.97418212890625,
"logps/rejected": -306.0528259277344,
"loss": 4.926,
"margin_dpo/margin_mean": 15.189804077148438,
"margin_dpo/margin_std": 34.809261322021484,
"step": 122
},
{
"epoch": 0.25759162303664923,
"fcm_dpo/beta": 0.031755901873111725,
"fcm_dpo/delta": -0.06527850031852722,
"fcm_dpo/margin": 20.802139282226562,
"fcm_dpo/q_t": 0.36643853783607483,
"grad_norm": 115.1010513305664,
"learning_rate": 4.6418174038722924e-07,
"logits/chosen": -0.7995103597640991,
"logits/rejected": -0.7998446226119995,
"logps/chosen": -335.6615295410156,
"logps/ref_chosen": -328.1546325683594,
"logps/ref_rejected": -280.6911315917969,
"logps/rejected": -309.00018310546875,
"loss": 4.2495,
"margin_dpo/margin_mean": 20.802139282226562,
"margin_dpo/margin_std": 32.38178634643555,
"step": 123
},
{
"epoch": 0.25968586387434556,
"fcm_dpo/beta": 0.03064357116818428,
"fcm_dpo/delta": 0.03005780465900898,
"fcm_dpo/margin": 16.928504943847656,
"fcm_dpo/q_t": 0.3922573924064636,
"grad_norm": 101.0470199584961,
"learning_rate": 4.6323175183912023e-07,
"logits/chosen": -0.8197784423828125,
"logits/rejected": -0.7911043763160706,
"logps/chosen": -285.9018859863281,
"logps/ref_chosen": -275.6961975097656,
"logps/ref_rejected": -225.361572265625,
"logps/rejected": -252.49575805664062,
"loss": 4.5875,
"margin_dpo/margin_mean": 16.928504943847656,
"margin_dpo/margin_std": 30.196701049804688,
"step": 124
},
{
"epoch": 0.2617801047120419,
"fcm_dpo/beta": 0.03097906894981861,
"fcm_dpo/delta": -0.034325286746025085,
"fcm_dpo/margin": 16.488847732543945,
"fcm_dpo/q_t": 0.4054495692253113,
"grad_norm": 123.87239074707031,
"learning_rate": 4.6227032831928483e-07,
"logits/chosen": -0.7871371507644653,
"logits/rejected": -0.7472814321517944,
"logps/chosen": -288.9555358886719,
"logps/ref_chosen": -278.06976318359375,
"logps/ref_rejected": -265.63873291015625,
"logps/rejected": -293.0133361816406,
"loss": 4.8993,
"margin_dpo/margin_mean": 16.488849639892578,
"margin_dpo/margin_std": 36.578277587890625,
"step": 125
},
{
"epoch": 0.2638743455497382,
"fcm_dpo/beta": 0.02998891845345497,
"fcm_dpo/delta": -0.01702306792140007,
"fcm_dpo/margin": 18.2719669342041,
"fcm_dpo/q_t": 0.38644424080848694,
"grad_norm": 109.28790283203125,
"learning_rate": 4.612975213859487e-07,
"logits/chosen": -0.7935796976089478,
"logits/rejected": -0.8115208745002747,
"logps/chosen": -329.9949951171875,
"logps/ref_chosen": -321.3960876464844,
"logps/ref_rejected": -285.37664794921875,
"logps/rejected": -312.24749755859375,
"loss": 4.4632,
"margin_dpo/margin_mean": 18.271968841552734,
"margin_dpo/margin_std": 31.196125030517578,
"step": 126
},
{
"epoch": 0.26596858638743454,
"fcm_dpo/beta": 0.030074207112193108,
"fcm_dpo/delta": -0.09070973843336105,
"fcm_dpo/margin": 20.92560386657715,
"fcm_dpo/q_t": 0.3753412663936615,
"grad_norm": 109.4990463256836,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": -0.8826844692230225,
"logits/rejected": -0.836493730545044,
"logps/chosen": -313.4111328125,
"logps/ref_chosen": -306.55877685546875,
"logps/ref_rejected": -274.8651428222656,
"logps/rejected": -302.6430969238281,
"loss": 4.3094,
"margin_dpo/margin_mean": 20.925601959228516,
"margin_dpo/margin_std": 31.99746322631836,
"step": 127
},
{
"epoch": 0.2680628272251309,
"fcm_dpo/beta": 0.026865279302001,
"fcm_dpo/delta": -0.0676947608590126,
"fcm_dpo/margin": 22.934003829956055,
"fcm_dpo/q_t": 0.3755223751068115,
"grad_norm": 87.6462173461914,
"learning_rate": 4.5931796656116837e-07,
"logits/chosen": -0.7586472630500793,
"logits/rejected": -0.7625279426574707,
"logps/chosen": -268.45068359375,
"logps/ref_chosen": -265.3973693847656,
"logps/ref_rejected": -250.9737548828125,
"logps/rejected": -276.9610290527344,
"loss": 4.2379,
"margin_dpo/margin_mean": 22.934003829956055,
"margin_dpo/margin_std": 35.073516845703125,
"step": 128
},
{
"epoch": 0.27015706806282724,
"fcm_dpo/beta": 0.027813997119665146,
"fcm_dpo/delta": 0.054987452924251556,
"fcm_dpo/margin": 19.57146453857422,
"fcm_dpo/q_t": 0.3910842835903168,
"grad_norm": 93.74571990966797,
"learning_rate": 4.5831132482724193e-07,
"logits/chosen": -0.799019455909729,
"logits/rejected": -0.8034683465957642,
"logps/chosen": -307.2705993652344,
"logps/ref_chosen": -303.158447265625,
"logps/ref_rejected": -275.9891052246094,
"logps/rejected": -299.6726989746094,
"loss": 4.4534,
"margin_dpo/margin_mean": 19.57146453857422,
"margin_dpo/margin_std": 34.049896240234375,
"step": 129
},
{
"epoch": 0.27225130890052357,
"fcm_dpo/beta": 0.028576284646987915,
"fcm_dpo/delta": 0.07245050370693207,
"fcm_dpo/margin": 16.821483612060547,
"fcm_dpo/q_t": 0.4015900492668152,
"grad_norm": 103.17170715332031,
"learning_rate": 4.5729351198915705e-07,
"logits/chosen": -0.7722957730293274,
"logits/rejected": -0.8155333399772644,
"logps/chosen": -292.2886047363281,
"logps/ref_chosen": -286.4073486328125,
"logps/ref_rejected": -294.38665771484375,
"logps/rejected": -317.08941650390625,
"loss": 4.6124,
"margin_dpo/margin_mean": 16.82148551940918,
"margin_dpo/margin_std": 32.20860290527344,
"step": 130
},
{
"epoch": 0.2743455497382199,
"fcm_dpo/beta": 0.03118608519434929,
"fcm_dpo/delta": 0.12869605422019958,
"fcm_dpo/margin": 15.276546478271484,
"fcm_dpo/q_t": 0.40500974655151367,
"grad_norm": 124.76173400878906,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": -0.8375826478004456,
"logits/rejected": -0.7880801558494568,
"logps/chosen": -317.2205810546875,
"logps/ref_chosen": -311.5650634765625,
"logps/ref_rejected": -291.62432861328125,
"logps/rejected": -312.5564270019531,
"loss": 4.7363,
"margin_dpo/margin_mean": 15.276546478271484,
"margin_dpo/margin_std": 31.08780288696289,
"step": 131
},
{
"epoch": 0.2764397905759162,
"fcm_dpo/beta": 0.03283756971359253,
"fcm_dpo/delta": -0.08807911723852158,
"fcm_dpo/margin": 20.79026985168457,
"fcm_dpo/q_t": 0.3727704584598541,
"grad_norm": 137.25875854492188,
"learning_rate": 4.5522459192551166e-07,
"logits/chosen": -0.796299934387207,
"logits/rejected": -0.7804505228996277,
"logps/chosen": -272.0005798339844,
"logps/ref_chosen": -270.0818176269531,
"logps/ref_rejected": -284.3084411621094,
"logps/rejected": -307.0174560546875,
"loss": 4.3546,
"margin_dpo/margin_mean": 20.79026985168457,
"margin_dpo/margin_std": 34.049842834472656,
"step": 132
},
{
"epoch": 0.27853403141361255,
"fcm_dpo/beta": 0.0303749218583107,
"fcm_dpo/delta": -0.02263142168521881,
"fcm_dpo/margin": 18.328353881835938,
"fcm_dpo/q_t": 0.38462090492248535,
"grad_norm": 95.446533203125,
"learning_rate": 4.541735956498554e-07,
"logits/chosen": -0.8196045160293579,
"logits/rejected": -0.8250406980514526,
"logps/chosen": -287.2611389160156,
"logps/ref_chosen": -285.6213684082031,
"logps/ref_rejected": -251.19386291503906,
"logps/rejected": -271.1619873046875,
"loss": 4.4574,
"margin_dpo/margin_mean": 18.328353881835938,
"margin_dpo/margin_std": 30.608631134033203,
"step": 133
},
{
"epoch": 0.2806282722513089,
"fcm_dpo/beta": 0.030752191320061684,
"fcm_dpo/delta": 0.052163075655698776,
"fcm_dpo/margin": 15.364531517028809,
"fcm_dpo/q_t": 0.40057751536369324,
"grad_norm": 106.3124008178711,
"learning_rate": 4.5311165016389914e-07,
"logits/chosen": -0.8452025651931763,
"logits/rejected": -0.8484188914299011,
"logps/chosen": -327.99664306640625,
"logps/ref_chosen": -318.92083740234375,
"logps/ref_rejected": -293.1894836425781,
"logps/rejected": -317.62982177734375,
"loss": 4.6623,
"margin_dpo/margin_mean": 15.364532470703125,
"margin_dpo/margin_std": 29.14794158935547,
"step": 134
},
{
"epoch": 0.28272251308900526,
"fcm_dpo/beta": 0.031104128807783127,
"fcm_dpo/delta": 0.00010712631046772003,
"fcm_dpo/margin": 17.666057586669922,
"fcm_dpo/q_t": 0.3856186866760254,
"grad_norm": 131.088623046875,
"learning_rate": 4.520388124165564e-07,
"logits/chosen": -0.7372372150421143,
"logits/rejected": -0.7832177877426147,
"logps/chosen": -296.5128173828125,
"logps/ref_chosen": -292.8217468261719,
"logps/ref_rejected": -269.2896728515625,
"logps/rejected": -290.6468505859375,
"loss": 4.3699,
"margin_dpo/margin_mean": 17.66605567932129,
"margin_dpo/margin_std": 27.916501998901367,
"step": 135
},
{
"epoch": 0.2848167539267016,
"fcm_dpo/beta": 0.03133618086576462,
"fcm_dpo/delta": 0.03240864723920822,
"fcm_dpo/margin": 16.50086212158203,
"fcm_dpo/q_t": 0.4006633460521698,
"grad_norm": 129.73684692382812,
"learning_rate": 4.5095513994085974e-07,
"logits/chosen": -0.7910470366477966,
"logits/rejected": -0.7884470224380493,
"logps/chosen": -278.3386535644531,
"logps/ref_chosen": -272.8525390625,
"logps/ref_rejected": -252.68202209472656,
"logps/rejected": -274.6689758300781,
"loss": 4.7126,
"margin_dpo/margin_mean": 16.500864028930664,
"margin_dpo/margin_std": 32.701168060302734,
"step": 136
},
{
"epoch": 0.2869109947643979,
"fcm_dpo/beta": 0.03253614902496338,
"fcm_dpo/delta": 0.03034902550280094,
"fcm_dpo/margin": 15.305620193481445,
"fcm_dpo/q_t": 0.4033234417438507,
"grad_norm": 127.83984375,
"learning_rate": 4.498606908508753e-07,
"logits/chosen": -0.8439798355102539,
"logits/rejected": -0.8307949304580688,
"logps/chosen": -308.5978698730469,
"logps/ref_chosen": -300.7522277832031,
"logps/ref_rejected": -286.1935119628906,
"logps/rejected": -309.34478759765625,
"loss": 4.7393,
"margin_dpo/margin_mean": 15.305620193481445,
"margin_dpo/margin_std": 31.10264778137207,
"step": 137
},
{
"epoch": 0.28900523560209423,
"fcm_dpo/beta": 0.032551947981119156,
"fcm_dpo/delta": 0.005163721740245819,
"fcm_dpo/margin": 18.243833541870117,
"fcm_dpo/q_t": 0.3905155658721924,
"grad_norm": 106.98749542236328,
"learning_rate": 4.487555238385862e-07,
"logits/chosen": -0.7689125537872314,
"logits/rejected": -0.7516045570373535,
"logps/chosen": -294.6270751953125,
"logps/ref_chosen": -288.9369812011719,
"logps/ref_rejected": -263.7076416015625,
"logps/rejected": -287.6415710449219,
"loss": 4.5624,
"margin_dpo/margin_mean": 18.243831634521484,
"margin_dpo/margin_std": 34.325828552246094,
"step": 138
},
{
"epoch": 0.29109947643979056,
"fcm_dpo/beta": 0.033183857798576355,
"fcm_dpo/delta": 0.033980607986450195,
"fcm_dpo/margin": 12.946343421936035,
"fcm_dpo/q_t": 0.4189698100090027,
"grad_norm": 115.80516052246094,
"learning_rate": 4.476396981707453e-07,
"logits/chosen": -0.7807446718215942,
"logits/rejected": -0.8127070069313049,
"logps/chosen": -274.021728515625,
"logps/ref_chosen": -270.0443115234375,
"logps/ref_rejected": -267.3226013183594,
"logps/rejected": -284.246337890625,
"loss": 4.9004,
"margin_dpo/margin_mean": 12.946342468261719,
"margin_dpo/margin_std": 29.353422164916992,
"step": 139
},
{
"epoch": 0.2931937172774869,
"fcm_dpo/beta": 0.034226901829242706,
"fcm_dpo/delta": -0.004557475447654724,
"fcm_dpo/margin": 17.575387954711914,
"fcm_dpo/q_t": 0.38020825386047363,
"grad_norm": 128.48643493652344,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": -0.8485485315322876,
"logits/rejected": -0.8208659291267395,
"logps/chosen": -287.2353515625,
"logps/ref_chosen": -282.9555969238281,
"logps/ref_rejected": -251.17181396484375,
"logps/rejected": -273.0269775390625,
"loss": 4.2958,
"margin_dpo/margin_mean": 17.575389862060547,
"margin_dpo/margin_std": 27.578033447265625,
"step": 140
},
{
"epoch": 0.29528795811518327,
"fcm_dpo/beta": 0.03216833248734474,
"fcm_dpo/delta": -0.05748983472585678,
"fcm_dpo/margin": 20.221830368041992,
"fcm_dpo/q_t": 0.3674147129058838,
"grad_norm": 109.13350677490234,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": -0.7944917678833008,
"logits/rejected": -0.794426679611206,
"logps/chosen": -298.8318786621094,
"logps/ref_chosen": -296.3001708984375,
"logps/ref_rejected": -279.8486633300781,
"logps/rejected": -302.6021728515625,
"loss": 4.2543,
"margin_dpo/margin_mean": 20.221830368041992,
"margin_dpo/margin_std": 31.374059677124023,
"step": 141
},
{
"epoch": 0.2973821989528796,
"fcm_dpo/beta": 0.03117799200117588,
"fcm_dpo/delta": -0.002421182580292225,
"fcm_dpo/margin": 16.759801864624023,
"fcm_dpo/q_t": 0.4001937210559845,
"grad_norm": 104.06517791748047,
"learning_rate": 4.4422887045602674e-07,
"logits/chosen": -0.8062896728515625,
"logits/rejected": -0.812911331653595,
"logps/chosen": -304.14556884765625,
"logps/ref_chosen": -300.56585693359375,
"logps/ref_rejected": -231.43316650390625,
"logps/rejected": -251.77268981933594,
"loss": 4.7004,
"margin_dpo/margin_mean": 16.759801864624023,
"margin_dpo/margin_std": 33.37417984008789,
"step": 142
},
{
"epoch": 0.2994764397905759,
"fcm_dpo/beta": 0.031860120594501495,
"fcm_dpo/delta": -0.003158077597618103,
"fcm_dpo/margin": 18.83399200439453,
"fcm_dpo/q_t": 0.378153920173645,
"grad_norm": 109.7158203125,
"learning_rate": 4.4307101421701755e-07,
"logits/chosen": -0.7957272529602051,
"logits/rejected": -0.7826195955276489,
"logps/chosen": -300.0506591796875,
"logps/ref_chosen": -296.73236083984375,
"logps/ref_rejected": -266.45257568359375,
"logps/rejected": -288.6048889160156,
"loss": 4.3108,
"margin_dpo/margin_mean": 18.83399200439453,
"margin_dpo/margin_std": 29.4503173828125,
"step": 143
},
{
"epoch": 0.30157068062827225,
"fcm_dpo/beta": 0.03140409663319588,
"fcm_dpo/delta": 0.05489187315106392,
"fcm_dpo/margin": 16.132699966430664,
"fcm_dpo/q_t": 0.4020684063434601,
"grad_norm": 109.79220581054688,
"learning_rate": 4.419028041654559e-07,
"logits/chosen": -0.8549841642379761,
"logits/rejected": -0.8448209762573242,
"logps/chosen": -303.1390380859375,
"logps/ref_chosen": -298.843994140625,
"logps/ref_rejected": -266.120849609375,
"logps/rejected": -286.548583984375,
"loss": 4.6749,
"margin_dpo/margin_mean": 16.132701873779297,
"margin_dpo/margin_std": 32.414798736572266,
"step": 144
},
{
"epoch": 0.3036649214659686,
"fcm_dpo/beta": 0.03131024166941643,
"fcm_dpo/delta": -0.10811541974544525,
"fcm_dpo/margin": 20.41290855407715,
"fcm_dpo/q_t": 0.36850613355636597,
"grad_norm": 102.48077392578125,
"learning_rate": 4.4072430294890166e-07,
"logits/chosen": -0.8508666157722473,
"logits/rejected": -0.8601468801498413,
"logps/chosen": -278.57275390625,
"logps/ref_chosen": -275.7528381347656,
"logps/ref_rejected": -214.74807739257812,
"logps/rejected": -237.98089599609375,
"loss": 4.1601,
"margin_dpo/margin_mean": 20.41291046142578,
"margin_dpo/margin_std": 28.729717254638672,
"step": 145
},
{
"epoch": 0.3057591623036649,
"fcm_dpo/beta": 0.030361486598849297,
"fcm_dpo/delta": 0.01096423901617527,
"fcm_dpo/margin": 19.31856918334961,
"fcm_dpo/q_t": 0.3816215991973877,
"grad_norm": 101.0565414428711,
"learning_rate": 4.395355737667985e-07,
"logits/chosen": -0.8102554082870483,
"logits/rejected": -0.8105416297912598,
"logps/chosen": -285.0329284667969,
"logps/ref_chosen": -277.09820556640625,
"logps/ref_rejected": -265.41046142578125,
"logps/rejected": -292.6636962890625,
"loss": 4.2858,
"margin_dpo/margin_mean": 19.318571090698242,
"margin_dpo/margin_std": 29.257003784179688,
"step": 146
},
{
"epoch": 0.3078534031413613,
"fcm_dpo/beta": 0.03215925768017769,
"fcm_dpo/delta": 0.032970868051052094,
"fcm_dpo/margin": 15.492907524108887,
"fcm_dpo/q_t": 0.3999587893486023,
"grad_norm": 105.35274505615234,
"learning_rate": 4.3833668036708483e-07,
"logits/chosen": -0.8169939517974854,
"logits/rejected": -0.8200792074203491,
"logps/chosen": -299.4952697753906,
"logps/ref_chosen": -291.4185791015625,
"logps/ref_rejected": -253.43051147460938,
"logps/rejected": -277.0000915527344,
"loss": 4.7918,
"margin_dpo/margin_mean": 15.492908477783203,
"margin_dpo/margin_std": 32.0605354309082,
"step": 147
},
{
"epoch": 0.3099476439790576,
"fcm_dpo/beta": 0.032771460711956024,
"fcm_dpo/delta": 0.0693143978714943,
"fcm_dpo/margin": 15.142913818359375,
"fcm_dpo/q_t": 0.40560245513916016,
"grad_norm": 105.60718536376953,
"learning_rate": 4.3712768704277524e-07,
"logits/chosen": -0.8757405281066895,
"logits/rejected": -0.8822675943374634,
"logps/chosen": -244.65098571777344,
"logps/ref_chosen": -236.74850463867188,
"logps/ref_rejected": -231.4674072265625,
"logps/rejected": -254.51280212402344,
"loss": 4.7622,
"margin_dpo/margin_mean": 15.142913818359375,
"margin_dpo/margin_std": 31.51068115234375,
"step": 148
},
{
"epoch": 0.31204188481675393,
"fcm_dpo/beta": 0.03229037672281265,
"fcm_dpo/delta": -0.048303790390491486,
"fcm_dpo/margin": 19.920228958129883,
"fcm_dpo/q_t": 0.3683924376964569,
"grad_norm": 107.26622772216797,
"learning_rate": 4.3590865862851263e-07,
"logits/chosen": -0.829014241695404,
"logits/rejected": -0.8182701468467712,
"logps/chosen": -326.3049621582031,
"logps/ref_chosen": -319.9284973144531,
"logps/ref_rejected": -308.20233154296875,
"logps/rejected": -334.4990234375,
"loss": 4.0688,
"margin_dpo/margin_mean": 19.920230865478516,
"margin_dpo/margin_std": 27.486299514770508,
"step": 149
},
{
"epoch": 0.31413612565445026,
"fcm_dpo/beta": 0.03191431611776352,
"fcm_dpo/delta": 0.010023342445492744,
"fcm_dpo/margin": 18.475460052490234,
"fcm_dpo/q_t": 0.3809901475906372,
"grad_norm": 106.9743881225586,
"learning_rate": 4.346796604970912e-07,
"logits/chosen": -0.8023788928985596,
"logits/rejected": -0.793890118598938,
"logps/chosen": -286.2445373535156,
"logps/ref_chosen": -276.3182373046875,
"logps/ref_rejected": -273.02215576171875,
"logps/rejected": -301.4239501953125,
"loss": 4.3442,
"margin_dpo/margin_mean": 18.475460052490234,
"margin_dpo/margin_std": 29.90300941467285,
"step": 150
},
{
"epoch": 0.3162303664921466,
"fcm_dpo/beta": 0.0293461661785841,
"fcm_dpo/delta": -0.18943935632705688,
"fcm_dpo/margin": 26.317476272583008,
"fcm_dpo/q_t": 0.34219515323638916,
"grad_norm": 90.29637908935547,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -0.8202653527259827,
"logits/rejected": -0.8275180459022522,
"logps/chosen": -305.0558776855469,
"logps/ref_chosen": -297.31280517578125,
"logps/ref_rejected": -266.1003723144531,
"logps/rejected": -300.16094970703125,
"loss": 3.7553,
"margin_dpo/margin_mean": 26.317481994628906,
"margin_dpo/margin_std": 30.933624267578125,
"step": 151
},
{
"epoch": 0.3183246073298429,
"fcm_dpo/beta": 0.02696666121482849,
"fcm_dpo/delta": -0.02964053675532341,
"fcm_dpo/margin": 20.30819320678711,
"fcm_dpo/q_t": 0.38851580023765564,
"grad_norm": 95.48226928710938,
"learning_rate": 4.3219201924364323e-07,
"logits/chosen": -0.8484928011894226,
"logits/rejected": -0.8524165153503418,
"logps/chosen": -276.18218994140625,
"logps/ref_chosen": -270.2470397949219,
"logps/ref_rejected": -269.7749328613281,
"logps/rejected": -296.018310546875,
"loss": 4.379,
"margin_dpo/margin_mean": 20.30819320678711,
"margin_dpo/margin_std": 31.848304748535156,
"step": 152
},
{
"epoch": 0.3204188481675393,
"fcm_dpo/beta": 0.025042923167347908,
"fcm_dpo/delta": -0.13993717730045319,
"fcm_dpo/margin": 29.001144409179688,
"fcm_dpo/q_t": 0.34314873814582825,
"grad_norm": 84.08607482910156,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": -0.8237071633338928,
"logits/rejected": -0.8168530464172363,
"logps/chosen": -283.3426513671875,
"logps/ref_chosen": -273.779052734375,
"logps/ref_rejected": -280.9530944824219,
"logps/rejected": -319.5178527832031,
"loss": 3.6829,
"margin_dpo/margin_mean": 29.001144409179688,
"margin_dpo/margin_std": 30.45376205444336,
"step": 153
},
{
"epoch": 0.3225130890052356,
"fcm_dpo/beta": 0.02431459352374077,
"fcm_dpo/delta": -0.015772145241498947,
"fcm_dpo/margin": 20.476642608642578,
"fcm_dpo/q_t": 0.3949674367904663,
"grad_norm": 89.15077209472656,
"learning_rate": 4.2966529689388064e-07,
"logits/chosen": -0.8586506843566895,
"logits/rejected": -0.8460282683372498,
"logps/chosen": -301.7556457519531,
"logps/ref_chosen": -289.9031982421875,
"logps/ref_rejected": -261.5166320800781,
"logps/rejected": -293.8457336425781,
"loss": 4.4883,
"margin_dpo/margin_mean": 20.47664451599121,
"margin_dpo/margin_std": 34.57201385498047,
"step": 154
},
{
"epoch": 0.32460732984293195,
"fcm_dpo/beta": 0.024474315345287323,
"fcm_dpo/delta": 0.04425939917564392,
"fcm_dpo/margin": 20.402536392211914,
"fcm_dpo/q_t": 0.39846354722976685,
"grad_norm": 100.91524505615234,
"learning_rate": 4.2838744935687716e-07,
"logits/chosen": -0.7969489693641663,
"logits/rejected": -0.7992517948150635,
"logps/chosen": -299.48004150390625,
"logps/ref_chosen": -285.8612060546875,
"logps/ref_rejected": -300.1272888183594,
"logps/rejected": -334.1487121582031,
"loss": 4.4626,
"margin_dpo/margin_mean": 20.40253448486328,
"margin_dpo/margin_std": 35.07635498046875,
"step": 155
},
{
"epoch": 0.3267015706806283,
"fcm_dpo/beta": 0.024291612207889557,
"fcm_dpo/delta": -0.1369752585887909,
"fcm_dpo/margin": 30.0484619140625,
"fcm_dpo/q_t": 0.3525455892086029,
"grad_norm": 76.57457733154297,
"learning_rate": 4.271000354423425e-07,
"logits/chosen": -0.8318104147911072,
"logits/rejected": -0.8374019861221313,
"logps/chosen": -291.5562744140625,
"logps/ref_chosen": -279.0354919433594,
"logps/ref_rejected": -244.2198486328125,
"logps/rejected": -286.78912353515625,
"loss": 3.9775,
"margin_dpo/margin_mean": 30.048463821411133,
"margin_dpo/margin_std": 39.803226470947266,
"step": 156
},
{
"epoch": 0.3287958115183246,
"fcm_dpo/beta": 0.022700754925608635,
"fcm_dpo/delta": 0.06447763741016388,
"fcm_dpo/margin": 21.06729507446289,
"fcm_dpo/q_t": 0.3995548486709595,
"grad_norm": 84.66363525390625,
"learning_rate": 4.258031241903777e-07,
"logits/chosen": -0.8930936455726624,
"logits/rejected": -0.8946095108985901,
"logps/chosen": -287.12164306640625,
"logps/ref_chosen": -270.830322265625,
"logps/ref_rejected": -259.08319091796875,
"logps/rejected": -296.44183349609375,
"loss": 4.4727,
"margin_dpo/margin_mean": 21.06729507446289,
"margin_dpo/margin_std": 34.88356399536133,
"step": 157
},
{
"epoch": 0.3308900523560209,
"fcm_dpo/beta": 0.023696184158325195,
"fcm_dpo/delta": -0.022407012060284615,
"fcm_dpo/margin": 23.372053146362305,
"fcm_dpo/q_t": 0.38260895013809204,
"grad_norm": 88.16154479980469,
"learning_rate": 4.2449678515039743e-07,
"logits/chosen": -0.8335078358650208,
"logits/rejected": -0.8233439326286316,
"logps/chosen": -306.63201904296875,
"logps/ref_chosen": -289.9663391113281,
"logps/ref_rejected": -271.335693359375,
"logps/rejected": -311.3734130859375,
"loss": 4.2823,
"margin_dpo/margin_mean": 23.372051239013672,
"margin_dpo/margin_std": 34.65653610229492,
"step": 158
},
{
"epoch": 0.33298429319371725,
"fcm_dpo/beta": 0.0234974417835474,
"fcm_dpo/delta": 0.046846918761730194,
"fcm_dpo/margin": 18.556854248046875,
"fcm_dpo/q_t": 0.41267889738082886,
"grad_norm": 93.64372253417969,
"learning_rate": 4.2318108837739986e-07,
"logits/chosen": -0.9038013219833374,
"logits/rejected": -0.8640592694282532,
"logps/chosen": -340.5484619140625,
"logps/ref_chosen": -321.37835693359375,
"logps/ref_rejected": -250.45652770996094,
"logps/rejected": -288.1834716796875,
"loss": 4.8046,
"margin_dpo/margin_mean": 18.556854248046875,
"margin_dpo/margin_std": 39.233619689941406,
"step": 159
},
{
"epoch": 0.33507853403141363,
"fcm_dpo/beta": 0.02296513319015503,
"fcm_dpo/delta": -0.0735328420996666,
"fcm_dpo/margin": 29.02091407775879,
"fcm_dpo/q_t": 0.35804080963134766,
"grad_norm": 87.23406219482422,
"learning_rate": 4.218561044282098e-07,
"logits/chosen": -0.8459993004798889,
"logits/rejected": -0.855299711227417,
"logps/chosen": -291.4868469238281,
"logps/ref_chosen": -276.28350830078125,
"logps/ref_rejected": -262.7477722167969,
"logps/rejected": -306.9720458984375,
"loss": 3.8776,
"margin_dpo/margin_mean": 29.020915985107422,
"margin_dpo/margin_std": 33.83423614501953,
"step": 160
},
{
"epoch": 0.33717277486910996,
"fcm_dpo/beta": 0.023070694878697395,
"fcm_dpo/delta": -0.006039864383637905,
"fcm_dpo/margin": 26.19426727294922,
"fcm_dpo/q_t": 0.3749847114086151,
"grad_norm": 87.1927261352539,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": -0.8614886403083801,
"logits/rejected": -0.8548807501792908,
"logps/chosen": -329.09661865234375,
"logps/ref_chosen": -310.4927978515625,
"logps/ref_rejected": -250.25347900390625,
"logps/rejected": -295.0516052246094,
"loss": 4.2422,
"margin_dpo/margin_mean": 26.19426727294922,
"margin_dpo/margin_std": 39.32307815551758,
"step": 161
},
{
"epoch": 0.3392670157068063,
"fcm_dpo/beta": 0.022042490541934967,
"fcm_dpo/delta": -0.0053899819031357765,
"fcm_dpo/margin": 22.930227279663086,
"fcm_dpo/q_t": 0.3928814232349396,
"grad_norm": 84.0722427368164,
"learning_rate": 4.1917855971495763e-07,
"logits/chosen": -0.8419027328491211,
"logits/rejected": -0.836093544960022,
"logps/chosen": -313.3344421386719,
"logps/ref_chosen": -296.1105041503906,
"logps/ref_rejected": -253.4247589111328,
"logps/rejected": -293.57891845703125,
"loss": 4.4335,
"margin_dpo/margin_mean": 22.930227279663086,
"margin_dpo/margin_std": 36.98151397705078,
"step": 162
},
{
"epoch": 0.3413612565445026,
"fcm_dpo/beta": 0.023281563073396683,
"fcm_dpo/delta": 0.031158914789557457,
"fcm_dpo/margin": 24.374839782714844,
"fcm_dpo/q_t": 0.37846505641937256,
"grad_norm": 105.36766052246094,
"learning_rate": 4.1782614253949255e-07,
"logits/chosen": -0.8736593723297119,
"logits/rejected": -0.8767552375793457,
"logps/chosen": -313.42913818359375,
"logps/ref_chosen": -293.4999084472656,
"logps/ref_rejected": -266.7116394042969,
"logps/rejected": -311.0157470703125,
"loss": 4.2172,
"margin_dpo/margin_mean": 24.37484359741211,
"margin_dpo/margin_std": 34.463653564453125,
"step": 163
},
{
"epoch": 0.34345549738219894,
"fcm_dpo/beta": 0.023631222546100616,
"fcm_dpo/delta": -0.005312643945217133,
"fcm_dpo/margin": 25.51420021057129,
"fcm_dpo/q_t": 0.3786207139492035,
"grad_norm": 94.62559509277344,
"learning_rate": 4.164647253573289e-07,
"logits/chosen": -0.846282422542572,
"logits/rejected": -0.8675246238708496,
"logps/chosen": -291.2696533203125,
"logps/ref_chosen": -267.04949951171875,
"logps/ref_rejected": -215.9768829345703,
"logps/rejected": -265.7112121582031,
"loss": 4.2925,
"margin_dpo/margin_mean": 25.51420021057129,
"margin_dpo/margin_std": 39.75941467285156,
"step": 164
},
{
"epoch": 0.34554973821989526,
"fcm_dpo/beta": 0.02316068299114704,
"fcm_dpo/delta": 0.014690798707306385,
"fcm_dpo/margin": 20.29566192626953,
"fcm_dpo/q_t": 0.4032779335975647,
"grad_norm": 95.55998229980469,
"learning_rate": 4.1509438117713863e-07,
"logits/chosen": -0.8854783177375793,
"logits/rejected": -0.8603523373603821,
"logps/chosen": -296.2962341308594,
"logps/ref_chosen": -278.06146240234375,
"logps/ref_rejected": -260.4288635253906,
"logps/rejected": -298.9592590332031,
"loss": 4.5279,
"margin_dpo/margin_mean": 20.29566192626953,
"margin_dpo/margin_std": 35.129371643066406,
"step": 165
},
{
"epoch": 0.34764397905759165,
"fcm_dpo/beta": 0.023668359965085983,
"fcm_dpo/delta": 0.0750846192240715,
"fcm_dpo/margin": 22.351829528808594,
"fcm_dpo/q_t": 0.39492562413215637,
"grad_norm": 92.57283020019531,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": -0.8455550074577332,
"logits/rejected": -0.814144492149353,
"logps/chosen": -292.3992919921875,
"logps/ref_chosen": -275.9490661621094,
"logps/ref_rejected": -232.13473510742188,
"logps/rejected": -270.93682861328125,
"loss": 4.4549,
"margin_dpo/margin_mean": 22.35182762145996,
"margin_dpo/margin_std": 38.78591537475586,
"step": 166
},
{
"epoch": 0.34973821989528797,
"fcm_dpo/beta": 0.025082817301154137,
"fcm_dpo/delta": 0.012834087014198303,
"fcm_dpo/margin": 23.36766815185547,
"fcm_dpo/q_t": 0.3822540044784546,
"grad_norm": 97.74182891845703,
"learning_rate": 4.123272062470633e-07,
"logits/chosen": -0.8396057486534119,
"logits/rejected": -0.8283241987228394,
"logps/chosen": -299.5604553222656,
"logps/ref_chosen": -280.5514221191406,
"logps/ref_rejected": -255.2896728515625,
"logps/rejected": -297.6663818359375,
"loss": 4.4165,
"margin_dpo/margin_mean": 23.367666244506836,
"margin_dpo/margin_std": 39.522518157958984,
"step": 167
},
{
"epoch": 0.3518324607329843,
"fcm_dpo/beta": 0.023963892832398415,
"fcm_dpo/delta": -0.06409404426813126,
"fcm_dpo/margin": 25.364152908325195,
"fcm_dpo/q_t": 0.3715497553348541,
"grad_norm": 305.63275146484375,
"learning_rate": 4.1093052389237174e-07,
"logits/chosen": -0.8306468725204468,
"logits/rejected": -0.807292103767395,
"logps/chosen": -334.9006042480469,
"logps/ref_chosen": -315.7982177734375,
"logps/ref_rejected": -291.48406982421875,
"logps/rejected": -335.95062255859375,
"loss": 4.4037,
"margin_dpo/margin_mean": 25.364151000976562,
"margin_dpo/margin_std": 42.07012176513672,
"step": 168
},
{
"epoch": 0.3539267015706806,
"fcm_dpo/beta": 0.021986354142427444,
"fcm_dpo/delta": -0.18093189597129822,
"fcm_dpo/margin": 34.84695053100586,
"fcm_dpo/q_t": 0.34274956583976746,
"grad_norm": 78.17916107177734,
"learning_rate": 4.0952521132208267e-07,
"logits/chosen": -0.816986620426178,
"logits/rejected": -0.8321943879127502,
"logps/chosen": -276.01812744140625,
"logps/ref_chosen": -261.06427001953125,
"logps/ref_rejected": -235.40663146972656,
"logps/rejected": -285.20745849609375,
"loss": 3.6866,
"margin_dpo/margin_mean": 34.84695053100586,
"margin_dpo/margin_std": 38.065284729003906,
"step": 169
},
{
"epoch": 0.35602094240837695,
"fcm_dpo/beta": 0.020543169230222702,
"fcm_dpo/delta": 0.05482568219304085,
"fcm_dpo/margin": 26.701711654663086,
"fcm_dpo/q_t": 0.3890666663646698,
"grad_norm": 98.05411529541016,
"learning_rate": 4.081113438988443e-07,
"logits/chosen": -0.7915770411491394,
"logits/rejected": -0.7925465106964111,
"logps/chosen": -324.87530517578125,
"logps/ref_chosen": -308.96722412109375,
"logps/ref_rejected": -263.8466796875,
"logps/rejected": -306.4564514160156,
"loss": 4.4074,
"margin_dpo/margin_mean": 26.701711654663086,
"margin_dpo/margin_std": 45.96706008911133,
"step": 170
},
{
"epoch": 0.3581151832460733,
"fcm_dpo/beta": 0.02039124257862568,
"fcm_dpo/delta": -0.09280530363321304,
"fcm_dpo/margin": 30.494121551513672,
"fcm_dpo/q_t": 0.36619833111763,
"grad_norm": 92.74893188476562,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": -0.8212159872055054,
"logits/rejected": -0.8347527384757996,
"logps/chosen": -270.1197509765625,
"logps/ref_chosen": -258.8890380859375,
"logps/ref_rejected": -262.19140625,
"logps/rejected": -303.916259765625,
"loss": 3.9458,
"margin_dpo/margin_mean": 30.494121551513672,
"margin_dpo/margin_std": 35.400516510009766,
"step": 171
},
{
"epoch": 0.36020942408376966,
"fcm_dpo/beta": 0.021083693951368332,
"fcm_dpo/delta": 0.1604629009962082,
"fcm_dpo/margin": 16.23406410217285,
"fcm_dpo/q_t": 0.425800085067749,
"grad_norm": 95.47428131103516,
"learning_rate": 4.0525824823390043e-07,
"logits/chosen": -0.8353057503700256,
"logits/rejected": -0.8544809222221375,
"logps/chosen": -352.4789123535156,
"logps/ref_chosen": -339.0223388671875,
"logps/ref_rejected": -295.78759765625,
"logps/rejected": -325.4782409667969,
"loss": 4.8806,
"margin_dpo/margin_mean": 16.234066009521484,
"margin_dpo/margin_std": 37.09178161621094,
"step": 172
},
{
"epoch": 0.362303664921466,
"fcm_dpo/beta": 0.023462966084480286,
"fcm_dpo/delta": 0.06941507756710052,
"fcm_dpo/margin": 22.576725006103516,
"fcm_dpo/q_t": 0.39380908012390137,
"grad_norm": 85.06795501708984,
"learning_rate": 4.0381917299505686e-07,
"logits/chosen": -0.8411876559257507,
"logits/rejected": -0.8425909280776978,
"logps/chosen": -314.23345947265625,
"logps/ref_chosen": -300.1114501953125,
"logps/ref_rejected": -273.78460693359375,
"logps/rejected": -310.48333740234375,
"loss": 4.4598,
"margin_dpo/margin_mean": 22.57672119140625,
"margin_dpo/margin_std": 38.42784881591797,
"step": 173
},
{
"epoch": 0.3643979057591623,
"fcm_dpo/beta": 0.02340209297835827,
"fcm_dpo/delta": -0.04257451742887497,
"fcm_dpo/margin": 27.29839324951172,
"fcm_dpo/q_t": 0.3668029010295868,
"grad_norm": 108.3929443359375,
"learning_rate": 4.0237184890078243e-07,
"logits/chosen": -0.8130418658256531,
"logits/rejected": -0.8016320466995239,
"logps/chosen": -348.58929443359375,
"logps/ref_chosen": -335.0538635253906,
"logps/ref_rejected": -257.4646911621094,
"logps/rejected": -298.2984924316406,
"loss": 4.0609,
"margin_dpo/margin_mean": 27.298397064208984,
"margin_dpo/margin_std": 37.006752014160156,
"step": 174
},
{
"epoch": 0.36649214659685864,
"fcm_dpo/beta": 0.02319101057946682,
"fcm_dpo/delta": -0.01429109089076519,
"fcm_dpo/margin": 26.22753143310547,
"fcm_dpo/q_t": 0.38202327489852905,
"grad_norm": 106.47049713134766,
"learning_rate": 4.00916353566676e-07,
"logits/chosen": -0.8251218795776367,
"logits/rejected": -0.8282068967819214,
"logps/chosen": -304.2876281738281,
"logps/ref_chosen": -284.39556884765625,
"logps/ref_rejected": -283.3876647949219,
"logps/rejected": -329.50726318359375,
"loss": 4.3814,
"margin_dpo/margin_mean": 26.22753143310547,
"margin_dpo/margin_std": 42.50096130371094,
"step": 175
},
{
"epoch": 0.36858638743455496,
"fcm_dpo/beta": 0.023719631135463715,
"fcm_dpo/delta": 0.05169348418712616,
"fcm_dpo/margin": 20.287446975708008,
"fcm_dpo/q_t": 0.4035814702510834,
"grad_norm": 94.84113311767578,
"learning_rate": 3.994527650465352e-07,
"logits/chosen": -0.7882924675941467,
"logits/rejected": -0.8019800186157227,
"logps/chosen": -271.99951171875,
"logps/ref_chosen": -251.81280517578125,
"logps/ref_rejected": -242.05328369140625,
"logps/rejected": -282.5274658203125,
"loss": 4.8493,
"margin_dpo/margin_mean": 20.287450790405273,
"margin_dpo/margin_std": 43.71425247192383,
"step": 176
},
{
"epoch": 0.3706806282722513,
"fcm_dpo/beta": 0.023168740794062614,
"fcm_dpo/delta": -0.04496470466256142,
"fcm_dpo/margin": 20.56722640991211,
"fcm_dpo/q_t": 0.4018362760543823,
"grad_norm": 99.85309600830078,
"learning_rate": 3.979811618281705e-07,
"logits/chosen": -0.8806796669960022,
"logits/rejected": -0.8574539422988892,
"logps/chosen": -319.0129699707031,
"logps/ref_chosen": -298.6463928222656,
"logps/ref_rejected": -295.66534423828125,
"logps/rejected": -336.59912109375,
"loss": 4.7961,
"margin_dpo/margin_mean": 20.56722640991211,
"margin_dpo/margin_std": 41.72360610961914,
"step": 177
},
{
"epoch": 0.37277486910994767,
"fcm_dpo/beta": 0.02236098423600197,
"fcm_dpo/delta": -0.031636402010917664,
"fcm_dpo/margin": 28.116716384887695,
"fcm_dpo/q_t": 0.3736070692539215,
"grad_norm": 87.69011688232422,
"learning_rate": 3.9650162282919654e-07,
"logits/chosen": -0.7916255593299866,
"logits/rejected": -0.7900866270065308,
"logps/chosen": -302.40118408203125,
"logps/ref_chosen": -286.2576599121094,
"logps/ref_rejected": -243.97491455078125,
"logps/rejected": -288.2351379394531,
"loss": 4.1353,
"margin_dpo/margin_mean": 28.116714477539062,
"margin_dpo/margin_std": 40.36370849609375,
"step": 178
},
{
"epoch": 0.374869109947644,
"fcm_dpo/beta": 0.02173008769750595,
"fcm_dpo/delta": -0.03953684866428375,
"fcm_dpo/margin": 23.7108097076416,
"fcm_dpo/q_t": 0.39363473653793335,
"grad_norm": 94.55865478515625,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": -0.7766736745834351,
"logits/rejected": -0.782062292098999,
"logps/chosen": -277.46343994140625,
"logps/ref_chosen": -259.737060546875,
"logps/ref_rejected": -277.8813171386719,
"logps/rejected": -319.3185119628906,
"loss": 4.5415,
"margin_dpo/margin_mean": 23.710805892944336,
"margin_dpo/margin_std": 41.660884857177734,
"step": 179
},
{
"epoch": 0.3769633507853403,
"fcm_dpo/beta": 0.021278660744428635,
"fcm_dpo/delta": -0.048960644751787186,
"fcm_dpo/margin": 27.975608825683594,
"fcm_dpo/q_t": 0.379283607006073,
"grad_norm": 81.7175521850586,
"learning_rate": 3.935190552834828e-07,
"logits/chosen": -0.8114000558853149,
"logits/rejected": -0.8452147841453552,
"logps/chosen": -285.2571105957031,
"logps/ref_chosen": -267.30889892578125,
"logps/ref_rejected": -230.4376983642578,
"logps/rejected": -276.3615417480469,
"loss": 4.1923,
"margin_dpo/margin_mean": 27.975608825683594,
"margin_dpo/margin_std": 40.780189514160156,
"step": 180
},
{
"epoch": 0.37905759162303665,
"fcm_dpo/beta": 0.02138346992433071,
"fcm_dpo/delta": 0.08889839053153992,
"fcm_dpo/margin": 24.11235809326172,
"fcm_dpo/q_t": 0.39553123712539673,
"grad_norm": 104.4340591430664,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -0.8049870729446411,
"logits/rejected": -0.8164405822753906,
"logps/chosen": -321.71044921875,
"logps/ref_chosen": -300.49139404296875,
"logps/ref_rejected": -278.98284912109375,
"logps/rejected": -324.31427001953125,
"loss": 4.5755,
"margin_dpo/margin_mean": 24.112356185913086,
"margin_dpo/margin_std": 44.64960479736328,
"step": 181
},
{
"epoch": 0.381151832460733,
"fcm_dpo/beta": 0.021802516654133797,
"fcm_dpo/delta": -0.12682966887950897,
"fcm_dpo/margin": 33.0461540222168,
"fcm_dpo/q_t": 0.35091304779052734,
"grad_norm": 94.29923248291016,
"learning_rate": 3.90505702185e-07,
"logits/chosen": -0.7876582741737366,
"logits/rejected": -0.8228734731674194,
"logps/chosen": -297.51129150390625,
"logps/ref_chosen": -279.4981689453125,
"logps/ref_rejected": -263.6926574707031,
"logps/rejected": -314.751953125,
"loss": 3.8418,
"margin_dpo/margin_mean": 33.0461540222168,
"margin_dpo/margin_std": 39.205291748046875,
"step": 182
},
{
"epoch": 0.3832460732984293,
"fcm_dpo/beta": 0.02001192420721054,
"fcm_dpo/delta": 0.017274007201194763,
"fcm_dpo/margin": 29.154460906982422,
"fcm_dpo/q_t": 0.38196709752082825,
"grad_norm": 85.02561950683594,
"learning_rate": 3.889876827928156e-07,
"logits/chosen": -0.8367516398429871,
"logits/rejected": -0.8470520973205566,
"logps/chosen": -289.88006591796875,
"logps/ref_chosen": -271.2057189941406,
"logps/ref_rejected": -243.91549682617188,
"logps/rejected": -291.7442932128906,
"loss": 4.277,
"margin_dpo/margin_mean": 29.154460906982422,
"margin_dpo/margin_std": 45.45560836791992,
"step": 183
},
{
"epoch": 0.38534031413612563,
"fcm_dpo/beta": 0.018630409613251686,
"fcm_dpo/delta": -0.1170383170247078,
"fcm_dpo/margin": 37.69718551635742,
"fcm_dpo/q_t": 0.35355544090270996,
"grad_norm": 91.68877410888672,
"learning_rate": 3.874622099130087e-07,
"logits/chosen": -0.8675556182861328,
"logits/rejected": -0.8580695390701294,
"logps/chosen": -331.67840576171875,
"logps/ref_chosen": -318.4457702636719,
"logps/ref_rejected": -266.640869140625,
"logps/rejected": -317.5706787109375,
"loss": 3.8938,
"margin_dpo/margin_mean": 37.69718933105469,
"margin_dpo/margin_std": 46.49965286254883,
"step": 184
},
{
"epoch": 0.387434554973822,
"fcm_dpo/beta": 0.018275652080774307,
"fcm_dpo/delta": -0.017188355326652527,
"fcm_dpo/margin": 31.037744522094727,
"fcm_dpo/q_t": 0.38223421573638916,
"grad_norm": 80.14971923828125,
"learning_rate": 3.859293653520604e-07,
"logits/chosen": -0.8481187224388123,
"logits/rejected": -0.8484024405479431,
"logps/chosen": -296.3069763183594,
"logps/ref_chosen": -274.308837890625,
"logps/ref_rejected": -260.7274169921875,
"logps/rejected": -313.7633056640625,
"loss": 4.2285,
"margin_dpo/margin_mean": 31.037744522094727,
"margin_dpo/margin_std": 45.34954071044922,
"step": 185
},
{
"epoch": 0.38952879581151834,
"fcm_dpo/beta": 0.01840740442276001,
"fcm_dpo/delta": 0.0072084227576851845,
"fcm_dpo/margin": 29.299354553222656,
"fcm_dpo/q_t": 0.3844219148159027,
"grad_norm": 81.5396499633789,
"learning_rate": 3.8438923131177237e-07,
"logits/chosen": -0.8498209714889526,
"logits/rejected": -0.8591220378875732,
"logps/chosen": -321.2626953125,
"logps/ref_chosen": -299.00537109375,
"logps/ref_rejected": -274.4014587402344,
"logps/rejected": -325.9581298828125,
"loss": 4.2641,
"margin_dpo/margin_mean": 29.29935646057129,
"margin_dpo/margin_std": 41.73757553100586,
"step": 186
},
{
"epoch": 0.39162303664921466,
"fcm_dpo/beta": 0.01949266903102398,
"fcm_dpo/delta": 0.11426316946744919,
"fcm_dpo/margin": 25.147544860839844,
"fcm_dpo/q_t": 0.3968276381492615,
"grad_norm": 109.50704193115234,
"learning_rate": 3.828418903848593e-07,
"logits/chosen": -0.8067930936813354,
"logits/rejected": -0.801750898361206,
"logps/chosen": -356.03204345703125,
"logps/ref_chosen": -329.8253173828125,
"logps/ref_rejected": -263.73175048828125,
"logps/rejected": -315.0860595703125,
"loss": 4.6663,
"margin_dpo/margin_mean": 25.14754295349121,
"margin_dpo/margin_std": 48.58820343017578,
"step": 187
},
{
"epoch": 0.393717277486911,
"fcm_dpo/beta": 0.019669629633426666,
"fcm_dpo/delta": -0.031924083828926086,
"fcm_dpo/margin": 29.831005096435547,
"fcm_dpo/q_t": 0.3818528950214386,
"grad_norm": 85.78080749511719,
"learning_rate": 3.812874255505191e-07,
"logits/chosen": -0.8346595168113708,
"logits/rejected": -0.8323647975921631,
"logps/chosen": -289.03765869140625,
"logps/ref_chosen": -263.005615234375,
"logps/ref_rejected": -247.08668518066406,
"logps/rejected": -302.9496765136719,
"loss": 4.4437,
"margin_dpo/margin_mean": 29.831003189086914,
"margin_dpo/margin_std": 50.36288070678711,
"step": 188
},
{
"epoch": 0.3958115183246073,
"fcm_dpo/beta": 0.01861950382590294,
"fcm_dpo/delta": -0.0650758147239685,
"fcm_dpo/margin": 35.26509475708008,
"fcm_dpo/q_t": 0.36197108030319214,
"grad_norm": 82.21233367919922,
"learning_rate": 3.797259201699833e-07,
"logits/chosen": -0.8459858298301697,
"logits/rejected": -0.8555557131767273,
"logps/chosen": -291.046142578125,
"logps/ref_chosen": -272.96038818359375,
"logps/ref_rejected": -275.13238525390625,
"logps/rejected": -328.4832458496094,
"loss": 3.8948,
"margin_dpo/margin_mean": 35.26509475708008,
"margin_dpo/margin_std": 41.49103546142578,
"step": 189
},
{
"epoch": 0.39790575916230364,
"fcm_dpo/beta": 0.018630830571055412,
"fcm_dpo/delta": 0.004260986112058163,
"fcm_dpo/margin": 31.947521209716797,
"fcm_dpo/q_t": 0.3738987445831299,
"grad_norm": 85.7725830078125,
"learning_rate": 3.781574579820464e-07,
"logits/chosen": -0.8605042099952698,
"logits/rejected": -0.8269729614257812,
"logps/chosen": -275.54925537109375,
"logps/ref_chosen": -257.79754638671875,
"logps/ref_rejected": -225.2164306640625,
"logps/rejected": -274.9156494140625,
"loss": 4.0818,
"margin_dpo/margin_mean": 31.94751739501953,
"margin_dpo/margin_std": 42.33838653564453,
"step": 190
},
{
"epoch": 0.4,
"fcm_dpo/beta": 0.01885531283915043,
"fcm_dpo/delta": 0.012270934879779816,
"fcm_dpo/margin": 31.156997680664062,
"fcm_dpo/q_t": 0.3792232871055603,
"grad_norm": 93.37854766845703,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": -0.8681455850601196,
"logits/rejected": -0.8712099194526672,
"logps/chosen": -260.21612548828125,
"logps/ref_chosen": -243.8585205078125,
"logps/ref_rejected": -245.12136840820312,
"logps/rejected": -292.6359558105469,
"loss": 4.2407,
"margin_dpo/margin_mean": 31.156997680664062,
"margin_dpo/margin_std": 46.79732131958008,
"step": 191
},
{
"epoch": 0.40209424083769635,
"fcm_dpo/beta": 0.01894964464008808,
"fcm_dpo/delta": 0.001287955790758133,
"fcm_dpo/margin": 25.870622634887695,
"fcm_dpo/q_t": 0.39755818247795105,
"grad_norm": 83.08194732666016,
"learning_rate": 3.75e-07,
"logits/chosen": -0.8269639015197754,
"logits/rejected": -0.8149864077568054,
"logps/chosen": -289.5196228027344,
"logps/ref_chosen": -266.9799499511719,
"logps/ref_rejected": -260.1697082519531,
"logps/rejected": -308.58001708984375,
"loss": 4.5557,
"margin_dpo/margin_mean": 25.870622634887695,
"margin_dpo/margin_std": 45.57743835449219,
"step": 192
},
{
"epoch": 0.4041884816753927,
"fcm_dpo/beta": 0.018430905416607857,
"fcm_dpo/delta": -0.02623889222741127,
"fcm_dpo/margin": 30.82439422607422,
"fcm_dpo/q_t": 0.38306957483291626,
"grad_norm": 91.62554931640625,
"learning_rate": 3.734111735307796e-07,
"logits/chosen": -0.8791731595993042,
"logits/rejected": -0.8559356927871704,
"logps/chosen": -308.2332763671875,
"logps/ref_chosen": -280.25323486328125,
"logps/ref_rejected": -291.0348815917969,
"logps/rejected": -349.8393249511719,
"loss": 4.3166,
"margin_dpo/margin_mean": 30.824386596679688,
"margin_dpo/margin_std": 48.09514236450195,
"step": 193
},
{
"epoch": 0.406282722513089,
"fcm_dpo/beta": 0.01942608132958412,
"fcm_dpo/delta": 0.08116672188043594,
"fcm_dpo/margin": 23.930679321289062,
"fcm_dpo/q_t": 0.4047344923019409,
"grad_norm": 118.4610824584961,
"learning_rate": 3.7181572889485623e-07,
"logits/chosen": -0.8472974896430969,
"logits/rejected": -0.8410882949829102,
"logps/chosen": -317.900146484375,
"logps/ref_chosen": -288.13946533203125,
"logps/ref_rejected": -251.31529235839844,
"logps/rejected": -305.00665283203125,
"loss": 4.5406,
"margin_dpo/margin_mean": 23.930675506591797,
"margin_dpo/margin_std": 42.646785736083984,
"step": 194
},
{
"epoch": 0.4083769633507853,
"fcm_dpo/beta": 0.02106415294110775,
"fcm_dpo/delta": 0.09580697119235992,
"fcm_dpo/margin": 21.20762062072754,
"fcm_dpo/q_t": 0.412017285823822,
"grad_norm": 105.6582260131836,
"learning_rate": 3.7021375165108377e-07,
"logits/chosen": -0.8573026657104492,
"logits/rejected": -0.8636154532432556,
"logps/chosen": -305.279541015625,
"logps/ref_chosen": -274.0006408691406,
"logps/ref_rejected": -280.22723388671875,
"logps/rejected": -332.7137451171875,
"loss": 4.6593,
"margin_dpo/margin_mean": 21.20762062072754,
"margin_dpo/margin_std": 41.330299377441406,
"step": 195
},
{
"epoch": 0.41047120418848165,
"fcm_dpo/beta": 0.021116994321346283,
"fcm_dpo/delta": -0.03291000798344612,
"fcm_dpo/margin": 29.783409118652344,
"fcm_dpo/q_t": 0.374653160572052,
"grad_norm": 108.43070983886719,
"learning_rate": 3.6860532770864005e-07,
"logits/chosen": -0.8379297256469727,
"logits/rejected": -0.8495330810546875,
"logps/chosen": -297.95947265625,
"logps/ref_chosen": -274.90069580078125,
"logps/ref_rejected": -248.7281951904297,
"logps/rejected": -301.57037353515625,
"loss": 4.2422,
"margin_dpo/margin_mean": 29.783409118652344,
"margin_dpo/margin_std": 46.15776062011719,
"step": 196
},
{
"epoch": 0.41256544502617803,
"fcm_dpo/beta": 0.020392950624227524,
"fcm_dpo/delta": -0.10839693248271942,
"fcm_dpo/margin": 34.37493896484375,
"fcm_dpo/q_t": 0.3563109040260315,
"grad_norm": 117.64871215820312,
"learning_rate": 3.6699054332241985e-07,
"logits/chosen": -0.8725043535232544,
"logits/rejected": -0.8630834817886353,
"logps/chosen": -335.2013854980469,
"logps/ref_chosen": -309.5348205566406,
"logps/ref_rejected": -264.3179931640625,
"logps/rejected": -324.3594970703125,
"loss": 3.9499,
"margin_dpo/margin_mean": 34.37493896484375,
"margin_dpo/margin_std": 43.26872634887695,
"step": 197
},
{
"epoch": 0.41465968586387436,
"fcm_dpo/beta": 0.01880509965121746,
"fcm_dpo/delta": -0.018900295719504356,
"fcm_dpo/margin": 32.761688232421875,
"fcm_dpo/q_t": 0.37768036127090454,
"grad_norm": 100.41459655761719,
"learning_rate": 3.653694850884091e-07,
"logits/chosen": -0.8667469024658203,
"logits/rejected": -0.845592200756073,
"logps/chosen": -325.9341125488281,
"logps/ref_chosen": -301.0134582519531,
"logps/ref_rejected": -292.84185791015625,
"logps/rejected": -350.5242004394531,
"loss": 4.3054,
"margin_dpo/margin_mean": 32.761688232421875,
"margin_dpo/margin_std": 52.25929260253906,
"step": 198
},
{
"epoch": 0.4167539267015707,
"fcm_dpo/beta": 0.01867169514298439,
"fcm_dpo/delta": -0.039132870733737946,
"fcm_dpo/margin": 31.537443161010742,
"fcm_dpo/q_t": 0.37857967615127563,
"grad_norm": 91.19898986816406,
"learning_rate": 3.6374223993904124e-07,
"logits/chosen": -0.8458956480026245,
"logits/rejected": -0.8102747797966003,
"logps/chosen": -290.2001647949219,
"logps/ref_chosen": -264.6058654785156,
"logps/ref_rejected": -214.9014892578125,
"logps/rejected": -272.033203125,
"loss": 4.1773,
"margin_dpo/margin_mean": 31.53744125366211,
"margin_dpo/margin_std": 45.666507720947266,
"step": 199
},
{
"epoch": 0.418848167539267,
"fcm_dpo/beta": 0.018353408202528954,
"fcm_dpo/delta": 0.041134800761938095,
"fcm_dpo/margin": 28.204835891723633,
"fcm_dpo/q_t": 0.39774975180625916,
"grad_norm": 105.55087280273438,
"learning_rate": 3.621088951385353e-07,
"logits/chosen": -0.9000794887542725,
"logits/rejected": -0.8824851512908936,
"logps/chosen": -351.81829833984375,
"logps/ref_chosen": -324.1588134765625,
"logps/ref_rejected": -277.80218505859375,
"logps/rejected": -333.6665344238281,
"loss": 4.6147,
"margin_dpo/margin_mean": 28.204835891723633,
"margin_dpo/margin_std": 53.386009216308594,
"step": 200
},
{
"epoch": 0.418848167539267,
"eval_fcm_dpo/beta": 0.01886005327105522,
"eval_logits/chosen": -0.868439257144928,
"eval_logits/rejected": -0.8583438396453857,
"eval_logps/chosen": -320.2021179199219,
"eval_logps/ref_chosen": -287.8268127441406,
"eval_logps/ref_rejected": -266.9300231933594,
"eval_logps/rejected": -328.9063720703125,
"eval_loss": 0.5499768257141113,
"eval_margin_dpo/margin_mean": 29.600982666015625,
"eval_margin_dpo/margin_std": 45.74766159057617,
"eval_runtime": 78.7653,
"eval_samples_per_second": 25.392,
"eval_steps_per_second": 3.174,
"step": 200
},
{
"epoch": 0.42094240837696334,
"fcm_dpo/beta": 0.019238265231251717,
"fcm_dpo/delta": -0.009198937565088272,
"fcm_dpo/margin": 31.507125854492188,
"fcm_dpo/q_t": 0.3743167221546173,
"grad_norm": 97.80419158935547,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": -0.8685228824615479,
"logits/rejected": -0.8634480237960815,
"logps/chosen": -303.941650390625,
"logps/ref_chosen": -271.49566650390625,
"logps/ref_rejected": -245.71414184570312,
"logps/rejected": -309.6672058105469,
"loss": 4.2189,
"margin_dpo/margin_mean": 31.50712776184082,
"margin_dpo/margin_std": 46.31949996948242,
"step": 201
},
{
"epoch": 0.42303664921465967,
"fcm_dpo/beta": 0.018488463014364243,
"fcm_dpo/delta": -0.04204194247722626,
"fcm_dpo/margin": 31.415302276611328,
"fcm_dpo/q_t": 0.3799372613430023,
"grad_norm": 100.98213195800781,
"learning_rate": 3.588242572718162e-07,
"logits/chosen": -0.8742230534553528,
"logits/rejected": -0.8673840165138245,
"logps/chosen": -303.4588317871094,
"logps/ref_chosen": -272.0979309082031,
"logps/ref_rejected": -235.94805908203125,
"logps/rejected": -298.7242431640625,
"loss": 4.3376,
"margin_dpo/margin_mean": 31.415302276611328,
"margin_dpo/margin_std": 48.84147644042969,
"step": 202
},
{
"epoch": 0.42513089005235605,
"fcm_dpo/beta": 0.018687793985009193,
"fcm_dpo/delta": 0.0978153869509697,
"fcm_dpo/margin": 24.22014808654785,
"fcm_dpo/q_t": 0.4048110246658325,
"grad_norm": 104.75067138671875,
"learning_rate": 3.571731403507635e-07,
"logits/chosen": -0.8457682132720947,
"logits/rejected": -0.857460618019104,
"logps/chosen": -317.5693664550781,
"logps/ref_chosen": -280.2221374511719,
"logps/ref_rejected": -251.79798889160156,
"logps/rejected": -313.36541748046875,
"loss": 4.5713,
"margin_dpo/margin_mean": 24.22014808654785,
"margin_dpo/margin_std": 43.69092559814453,
"step": 203
},
{
"epoch": 0.4272251308900524,
"fcm_dpo/beta": 0.018568500876426697,
"fcm_dpo/delta": -0.08285186439752579,
"fcm_dpo/margin": 36.37318420410156,
"fcm_dpo/q_t": 0.3603626787662506,
"grad_norm": 101.34950256347656,
"learning_rate": 3.5551627605944746e-07,
"logits/chosen": -0.8969916701316833,
"logits/rejected": -0.8768599033355713,
"logps/chosen": -347.6547546386719,
"logps/ref_chosen": -318.7960510253906,
"logps/ref_rejected": -269.69921875,
"logps/rejected": -334.93109130859375,
"loss": 3.9315,
"margin_dpo/margin_mean": 36.37318420410156,
"margin_dpo/margin_std": 46.72166061401367,
"step": 204
},
{
"epoch": 0.4293193717277487,
"fcm_dpo/beta": 0.017940927296876907,
"fcm_dpo/delta": -0.05948423594236374,
"fcm_dpo/margin": 36.4859733581543,
"fcm_dpo/q_t": 0.36764687299728394,
"grad_norm": 90.59722900390625,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": -0.8392518162727356,
"logits/rejected": -0.8088283538818359,
"logps/chosen": -315.66253662109375,
"logps/ref_chosen": -283.7620544433594,
"logps/ref_rejected": -297.69439697265625,
"logps/rejected": -366.0808410644531,
"loss": 4.0268,
"margin_dpo/margin_mean": 36.4859733581543,
"margin_dpo/margin_std": 49.50936508178711,
"step": 205
},
{
"epoch": 0.431413612565445,
"fcm_dpo/beta": 0.017662961035966873,
"fcm_dpo/delta": 0.010416839271783829,
"fcm_dpo/margin": 30.46101188659668,
"fcm_dpo/q_t": 0.3891698122024536,
"grad_norm": 100.13421630859375,
"learning_rate": 3.5218566107988867e-07,
"logits/chosen": -0.8697702884674072,
"logits/rejected": -0.8934507369995117,
"logps/chosen": -329.244873046875,
"logps/ref_chosen": -293.66387939453125,
"logps/ref_rejected": -291.3056640625,
"logps/rejected": -357.34771728515625,
"loss": 4.4422,
"margin_dpo/margin_mean": 30.461013793945312,
"margin_dpo/margin_std": 50.47527313232422,
"step": 206
},
{
"epoch": 0.43350785340314135,
"fcm_dpo/beta": 0.01789412647485733,
"fcm_dpo/delta": 0.018324781209230423,
"fcm_dpo/margin": 29.37425994873047,
"fcm_dpo/q_t": 0.3917839229106903,
"grad_norm": 103.07648468017578,
"learning_rate": 3.505120890024195e-07,
"logits/chosen": -0.8186078667640686,
"logits/rejected": -0.8274865746498108,
"logps/chosen": -302.8357849121094,
"logps/ref_chosen": -270.5350646972656,
"logps/ref_rejected": -278.7747497558594,
"logps/rejected": -340.4497375488281,
"loss": 4.6022,
"margin_dpo/margin_mean": 29.3742618560791,
"margin_dpo/margin_std": 55.54574966430664,
"step": 207
},
{
"epoch": 0.4356020942408377,
"fcm_dpo/beta": 0.017709776759147644,
"fcm_dpo/delta": -0.04383649304509163,
"fcm_dpo/margin": 36.105621337890625,
"fcm_dpo/q_t": 0.3701040744781494,
"grad_norm": 86.0116958618164,
"learning_rate": 3.4883312676665534e-07,
"logits/chosen": -0.8696956038475037,
"logits/rejected": -0.8232940435409546,
"logps/chosen": -315.8093566894531,
"logps/ref_chosen": -279.582763671875,
"logps/ref_rejected": -290.041015625,
"logps/rejected": -362.3731994628906,
"loss": 4.0999,
"margin_dpo/margin_mean": 36.105621337890625,
"margin_dpo/margin_std": 51.143226623535156,
"step": 208
},
{
"epoch": 0.437696335078534,
"fcm_dpo/beta": 0.01805291511118412,
"fcm_dpo/delta": 0.0809149444103241,
"fcm_dpo/margin": 26.21035385131836,
"fcm_dpo/q_t": 0.40189653635025024,
"grad_norm": 109.74065399169922,
"learning_rate": 3.4714886441024573e-07,
"logits/chosen": -0.7950612902641296,
"logits/rejected": -0.7992856502532959,
"logps/chosen": -359.4092102050781,
"logps/ref_chosen": -318.8725280761719,
"logps/ref_rejected": -270.64324951171875,
"logps/rejected": -337.39031982421875,
"loss": 4.7203,
"margin_dpo/margin_mean": 26.210355758666992,
"margin_dpo/margin_std": 53.022239685058594,
"step": 209
},
{
"epoch": 0.4397905759162304,
"fcm_dpo/beta": 0.0181864183396101,
"fcm_dpo/delta": 0.0194247979670763,
"fcm_dpo/margin": 31.902477264404297,
"fcm_dpo/q_t": 0.38086259365081787,
"grad_norm": 107.53482055664062,
"learning_rate": 3.454593922550693e-07,
"logits/chosen": -0.8214377164840698,
"logits/rejected": -0.8090283870697021,
"logps/chosen": -318.75921630859375,
"logps/ref_chosen": -283.14031982421875,
"logps/ref_rejected": -287.2986755371094,
"logps/rejected": -354.820068359375,
"loss": 4.317,
"margin_dpo/margin_mean": 31.902477264404297,
"margin_dpo/margin_std": 50.24505615234375,
"step": 210
},
{
"epoch": 0.4418848167539267,
"fcm_dpo/beta": 0.017495566979050636,
"fcm_dpo/delta": -0.13013754785060883,
"fcm_dpo/margin": 40.89909362792969,
"fcm_dpo/q_t": 0.34772253036499023,
"grad_norm": 85.72150421142578,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -0.8469374775886536,
"logits/rejected": -0.8271616101264954,
"logps/chosen": -309.1142578125,
"logps/ref_chosen": -276.4228515625,
"logps/ref_rejected": -252.40603637695312,
"logps/rejected": -325.9964599609375,
"loss": 3.7482,
"margin_dpo/margin_mean": 40.89909744262695,
"margin_dpo/margin_std": 43.54120635986328,
"step": 211
},
{
"epoch": 0.44397905759162304,
"fcm_dpo/beta": 0.01705530472099781,
"fcm_dpo/delta": 0.0323108546435833,
"fcm_dpo/margin": 28.951906204223633,
"fcm_dpo/q_t": 0.3937266170978546,
"grad_norm": 93.46395111083984,
"learning_rate": 3.4206518122800055e-07,
"logits/chosen": -0.8298263549804688,
"logits/rejected": -0.8326829075813293,
"logps/chosen": -307.32464599609375,
"logps/ref_chosen": -271.7055358886719,
"logps/ref_rejected": -241.18511962890625,
"logps/rejected": -305.7561950683594,
"loss": 4.4735,
"margin_dpo/margin_mean": 28.951906204223633,
"margin_dpo/margin_std": 47.416404724121094,
"step": 212
},
{
"epoch": 0.44607329842931936,
"fcm_dpo/beta": 0.017670337110757828,
"fcm_dpo/delta": 0.036003537476062775,
"fcm_dpo/margin": 29.480064392089844,
"fcm_dpo/q_t": 0.39773082733154297,
"grad_norm": 100.81073760986328,
"learning_rate": 3.403606243773448e-07,
"logits/chosen": -0.8215805292129517,
"logits/rejected": -0.8386380672454834,
"logps/chosen": -339.76416015625,
"logps/ref_chosen": -302.2976379394531,
"logps/ref_rejected": -303.6202087402344,
"logps/rejected": -370.5667724609375,
"loss": 4.5047,
"margin_dpo/margin_mean": 29.48006248474121,
"margin_dpo/margin_std": 53.4147834777832,
"step": 213
},
{
"epoch": 0.4481675392670157,
"fcm_dpo/beta": 0.017651241272687912,
"fcm_dpo/delta": -0.009320348501205444,
"fcm_dpo/margin": 34.294586181640625,
"fcm_dpo/q_t": 0.37349388003349304,
"grad_norm": 111.82011413574219,
"learning_rate": 3.3865122176063385e-07,
"logits/chosen": -0.8299503326416016,
"logits/rejected": -0.8320922255516052,
"logps/chosen": -318.2923278808594,
"logps/ref_chosen": -272.13262939453125,
"logps/ref_rejected": -294.82354736328125,
"logps/rejected": -375.2778015136719,
"loss": 4.0849,
"margin_dpo/margin_mean": 34.294586181640625,
"margin_dpo/margin_std": 44.05116271972656,
"step": 214
},
{
"epoch": 0.450261780104712,
"fcm_dpo/beta": 0.017613038420677185,
"fcm_dpo/delta": 0.010489102452993393,
"fcm_dpo/margin": 26.734289169311523,
"fcm_dpo/q_t": 0.40911558270454407,
"grad_norm": 102.18154907226562,
"learning_rate": 3.3693706504794243e-07,
"logits/chosen": -0.8729708194732666,
"logits/rejected": -0.8599320650100708,
"logps/chosen": -334.3503723144531,
"logps/ref_chosen": -291.3782958984375,
"logps/ref_rejected": -261.05792236328125,
"logps/rejected": -330.7642822265625,
"loss": 4.6895,
"margin_dpo/margin_mean": 26.734289169311523,
"margin_dpo/margin_std": 53.317264556884766,
"step": 215
},
{
"epoch": 0.4523560209424084,
"fcm_dpo/beta": 0.01718178391456604,
"fcm_dpo/delta": -0.005167707800865173,
"fcm_dpo/margin": 35.105499267578125,
"fcm_dpo/q_t": 0.37694042921066284,
"grad_norm": 94.87683868408203,
"learning_rate": 3.3521824616429284e-07,
"logits/chosen": -0.8911623358726501,
"logits/rejected": -0.8866602182388306,
"logps/chosen": -374.8359375,
"logps/ref_chosen": -338.50543212890625,
"logps/ref_rejected": -305.76104736328125,
"logps/rejected": -377.197021484375,
"loss": 4.2912,
"margin_dpo/margin_mean": 35.105499267578125,
"margin_dpo/margin_std": 54.505611419677734,
"step": 216
},
{
"epoch": 0.4544502617801047,
"fcm_dpo/beta": 0.01670690067112446,
"fcm_dpo/delta": -0.13507547974586487,
"fcm_dpo/margin": 43.35816955566406,
"fcm_dpo/q_t": 0.35271987318992615,
"grad_norm": 86.05104064941406,
"learning_rate": 3.334948572847253e-07,
"logits/chosen": -0.787706732749939,
"logits/rejected": -0.7581799626350403,
"logps/chosen": -331.9385986328125,
"logps/ref_chosen": -293.5498046875,
"logps/ref_rejected": -256.7830810546875,
"logps/rejected": -338.530029296875,
"loss": 3.9209,
"margin_dpo/margin_mean": 43.35816955566406,
"margin_dpo/margin_std": 55.72812271118164,
"step": 217
},
{
"epoch": 0.45654450261780105,
"fcm_dpo/beta": 0.015875810757279396,
"fcm_dpo/delta": 0.0009746733121573925,
"fcm_dpo/margin": 37.655181884765625,
"fcm_dpo/q_t": 0.37353405356407166,
"grad_norm": 89.3914566040039,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": -0.8176430463790894,
"logits/rejected": -0.840962290763855,
"logps/chosen": -357.17340087890625,
"logps/ref_chosen": -320.579345703125,
"logps/ref_rejected": -294.0381164550781,
"logps/rejected": -368.2873840332031,
"loss": 4.0903,
"margin_dpo/margin_mean": 37.65517807006836,
"margin_dpo/margin_std": 50.91783905029297,
"step": 218
},
{
"epoch": 0.4586387434554974,
"fcm_dpo/beta": 0.01585143618285656,
"fcm_dpo/delta": -0.027818219736218452,
"fcm_dpo/margin": 39.36839294433594,
"fcm_dpo/q_t": 0.3697540760040283,
"grad_norm": 87.21080780029297,
"learning_rate": 3.300347394584172e-07,
"logits/chosen": -0.8196850419044495,
"logits/rejected": -0.8459032773971558,
"logps/chosen": -300.89532470703125,
"logps/ref_chosen": -268.4186096191406,
"logps/ref_rejected": -265.7808837890625,
"logps/rejected": -337.62591552734375,
"loss": 4.1119,
"margin_dpo/margin_mean": 39.3683967590332,
"margin_dpo/margin_std": 54.10658264160156,
"step": 219
},
{
"epoch": 0.4607329842931937,
"fcm_dpo/beta": 0.015508392825722694,
"fcm_dpo/delta": 0.006693243980407715,
"fcm_dpo/margin": 38.219276428222656,
"fcm_dpo/q_t": 0.37400129437446594,
"grad_norm": 84.04236602783203,
"learning_rate": 3.2829819606729477e-07,
"logits/chosen": -0.8543677926063538,
"logits/rejected": -0.8366027474403381,
"logps/chosen": -346.4298400878906,
"logps/ref_chosen": -312.8864440917969,
"logps/ref_rejected": -259.5191955566406,
"logps/rejected": -331.2818298339844,
"loss": 4.1852,
"margin_dpo/margin_mean": 38.219276428222656,
"margin_dpo/margin_std": 54.659847259521484,
"step": 220
},
{
"epoch": 0.46282722513089003,
"fcm_dpo/beta": 0.01612645946443081,
"fcm_dpo/delta": 0.004002414643764496,
"fcm_dpo/margin": 30.247737884521484,
"fcm_dpo/q_t": 0.40294140577316284,
"grad_norm": 90.31486511230469,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": -0.7849829792976379,
"logits/rejected": -0.7987595200538635,
"logps/chosen": -336.2065734863281,
"logps/ref_chosen": -300.32586669921875,
"logps/ref_rejected": -286.312255859375,
"logps/rejected": -352.44073486328125,
"loss": 4.5734,
"margin_dpo/margin_mean": 30.247739791870117,
"margin_dpo/margin_std": 55.0887336730957,
"step": 221
},
{
"epoch": 0.4649214659685864,
"fcm_dpo/beta": 0.01531196478754282,
"fcm_dpo/delta": -0.010990019887685776,
"fcm_dpo/margin": 36.88833999633789,
"fcm_dpo/q_t": 0.3805161714553833,
"grad_norm": 96.45641326904297,
"learning_rate": 3.248126059518784e-07,
"logits/chosen": -0.8649472594261169,
"logits/rejected": -0.8534566760063171,
"logps/chosen": -329.9435119628906,
"logps/ref_chosen": -297.1113586425781,
"logps/ref_rejected": -235.53146362304688,
"logps/rejected": -305.251953125,
"loss": 4.1935,
"margin_dpo/margin_mean": 36.88833999633789,
"margin_dpo/margin_std": 50.991146087646484,
"step": 222
},
{
"epoch": 0.46701570680628274,
"fcm_dpo/beta": 0.015516339801251888,
"fcm_dpo/delta": -0.006761605851352215,
"fcm_dpo/margin": 38.97541046142578,
"fcm_dpo/q_t": 0.3734777569770813,
"grad_norm": 79.94070434570312,
"learning_rate": 3.230637461492043e-07,
"logits/chosen": -0.8134406805038452,
"logits/rejected": -0.7874367833137512,
"logps/chosen": -322.5858459472656,
"logps/ref_chosen": -286.41510009765625,
"logps/ref_rejected": -241.1181640625,
"logps/rejected": -316.2643127441406,
"loss": 4.138,
"margin_dpo/margin_mean": 38.97541427612305,
"margin_dpo/margin_std": 54.24036407470703,
"step": 223
},
{
"epoch": 0.46910994764397906,
"fcm_dpo/beta": 0.015485338866710663,
"fcm_dpo/delta": -0.04357857629656792,
"fcm_dpo/margin": 41.29396438598633,
"fcm_dpo/q_t": 0.36654770374298096,
"grad_norm": 85.50264739990234,
"learning_rate": 3.213109681595612e-07,
"logits/chosen": -0.780707597732544,
"logits/rejected": -0.7996824979782104,
"logps/chosen": -283.2223205566406,
"logps/ref_chosen": -249.49234008789062,
"logps/ref_rejected": -233.10752868652344,
"logps/rejected": -308.1314697265625,
"loss": 3.9722,
"margin_dpo/margin_mean": 41.29396438598633,
"margin_dpo/margin_std": 51.46320343017578,
"step": 224
},
{
"epoch": 0.4712041884816754,
"fcm_dpo/beta": 0.014851980842649937,
"fcm_dpo/delta": 0.04186537116765976,
"fcm_dpo/margin": 37.72064208984375,
"fcm_dpo/q_t": 0.38652801513671875,
"grad_norm": 97.50272369384766,
"learning_rate": 3.1955436597911315e-07,
"logits/chosen": -0.8135025501251221,
"logits/rejected": -0.7935299873352051,
"logps/chosen": -354.3567810058594,
"logps/ref_chosen": -311.8583679199219,
"logps/ref_rejected": -336.8523864746094,
"logps/rejected": -417.0714111328125,
"loss": 4.3211,
"margin_dpo/margin_mean": 37.720645904541016,
"margin_dpo/margin_std": 58.61198043823242,
"step": 225
},
{
"epoch": 0.4732984293193717,
"fcm_dpo/beta": 0.015629008412361145,
"fcm_dpo/delta": 0.020238326862454414,
"fcm_dpo/margin": 33.9615592956543,
"fcm_dpo/q_t": 0.3907637596130371,
"grad_norm": 85.4857406616211,
"learning_rate": 3.1779403380910425e-07,
"logits/chosen": -0.8504273891448975,
"logits/rejected": -0.8434605598449707,
"logps/chosen": -291.4212951660156,
"logps/ref_chosen": -252.20123291015625,
"logps/ref_rejected": -254.41162109375,
"logps/rejected": -327.5932312011719,
"loss": 4.3859,
"margin_dpo/margin_mean": 33.96156311035156,
"margin_dpo/margin_std": 56.106605529785156,
"step": 226
},
{
"epoch": 0.47539267015706804,
"fcm_dpo/beta": 0.015467462129890919,
"fcm_dpo/delta": -0.03860139474272728,
"fcm_dpo/margin": 41.034793853759766,
"fcm_dpo/q_t": 0.369282603263855,
"grad_norm": 102.45677947998047,
"learning_rate": 3.160300660508064e-07,
"logits/chosen": -0.8150337934494019,
"logits/rejected": -0.8127814531326294,
"logps/chosen": -325.2018737792969,
"logps/ref_chosen": -285.25946044921875,
"logps/ref_rejected": -261.3220520019531,
"logps/rejected": -342.29925537109375,
"loss": 4.2129,
"margin_dpo/margin_mean": 41.034793853759766,
"margin_dpo/margin_std": 60.92158508300781,
"step": 227
},
{
"epoch": 0.4774869109947644,
"fcm_dpo/beta": 0.01522951852530241,
"fcm_dpo/delta": -0.043590422719717026,
"fcm_dpo/margin": 41.9859733581543,
"fcm_dpo/q_t": 0.3681778311729431,
"grad_norm": 91.48838806152344,
"learning_rate": 3.1426255730045695e-07,
"logits/chosen": -0.8238086700439453,
"logits/rejected": -0.7931742072105408,
"logps/chosen": -348.5426330566406,
"logps/ref_chosen": -313.81878662109375,
"logps/ref_rejected": -258.07061767578125,
"logps/rejected": -334.7804260253906,
"loss": 4.0378,
"margin_dpo/margin_mean": 41.98596954345703,
"margin_dpo/margin_std": 55.18696594238281,
"step": 228
},
{
"epoch": 0.47958115183246075,
"fcm_dpo/beta": 0.014314261265099049,
"fcm_dpo/delta": -0.07704558223485947,
"fcm_dpo/margin": 46.78490447998047,
"fcm_dpo/q_t": 0.3585876524448395,
"grad_norm": 174.31724548339844,
"learning_rate": 3.1249160234418644e-07,
"logits/chosen": -0.7998561263084412,
"logits/rejected": -0.8153296113014221,
"logps/chosen": -334.5465087890625,
"logps/ref_chosen": -291.9707946777344,
"logps/ref_rejected": -263.42059326171875,
"logps/rejected": -352.7812805175781,
"loss": 3.973,
"margin_dpo/margin_mean": 46.78490447998047,
"margin_dpo/margin_std": 58.31138610839844,
"step": 229
},
{
"epoch": 0.4816753926701571,
"fcm_dpo/beta": 0.0136597054079175,
"fcm_dpo/delta": -0.0017933191265910864,
"fcm_dpo/margin": 43.92266082763672,
"fcm_dpo/q_t": 0.37403687834739685,
"grad_norm": 77.7289810180664,
"learning_rate": 3.1071729615293424e-07,
"logits/chosen": -0.8659123182296753,
"logits/rejected": -0.8676111698150635,
"logps/chosen": -273.0910949707031,
"logps/ref_chosen": -233.2601318359375,
"logps/ref_rejected": -238.922119140625,
"logps/rejected": -322.67572021484375,
"loss": 4.1335,
"margin_dpo/margin_mean": 43.92265319824219,
"margin_dpo/margin_std": 60.90802001953125,
"step": 230
},
{
"epoch": 0.4837696335078534,
"fcm_dpo/beta": 0.014134555123746395,
"fcm_dpo/delta": 0.054537300020456314,
"fcm_dpo/margin": 34.40003204345703,
"fcm_dpo/q_t": 0.3954438269138336,
"grad_norm": 92.6048355102539,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": -0.8301805853843689,
"logits/rejected": -0.8212379217147827,
"logps/chosen": -370.2666931152344,
"logps/ref_chosen": -322.1551818847656,
"logps/ref_rejected": -280.97613525390625,
"logps/rejected": -363.48773193359375,
"loss": 4.4293,
"margin_dpo/margin_mean": 34.40003204345703,
"margin_dpo/margin_std": 54.50558090209961,
"step": 231
},
{
"epoch": 0.48586387434554973,
"fcm_dpo/beta": 0.01422965433448553,
"fcm_dpo/delta": -0.03155010566115379,
"fcm_dpo/margin": 38.31331253051758,
"fcm_dpo/q_t": 0.386367529630661,
"grad_norm": 108.28009796142578,
"learning_rate": 3.071590108427243e-07,
"logits/chosen": -0.8186150789260864,
"logits/rejected": -0.8022579550743103,
"logps/chosen": -320.86212158203125,
"logps/ref_chosen": -271.7437744140625,
"logps/ref_rejected": -249.94981384277344,
"logps/rejected": -337.3814697265625,
"loss": 4.409,
"margin_dpo/margin_mean": 38.31331253051758,
"margin_dpo/margin_std": 60.96247863769531,
"step": 232
},
{
"epoch": 0.48795811518324606,
"fcm_dpo/beta": 0.013790407218039036,
"fcm_dpo/delta": -0.07378131151199341,
"fcm_dpo/margin": 41.6362419128418,
"fcm_dpo/q_t": 0.37872081995010376,
"grad_norm": 111.52574920654297,
"learning_rate": 3.05375222543809e-07,
"logits/chosen": -0.8492098450660706,
"logits/rejected": -0.8407477140426636,
"logps/chosen": -334.5429992675781,
"logps/ref_chosen": -285.3423156738281,
"logps/ref_rejected": -266.34320068359375,
"logps/rejected": -357.1800842285156,
"loss": 4.2161,
"margin_dpo/margin_mean": 41.6362419128418,
"margin_dpo/margin_std": 59.20707702636719,
"step": 233
},
{
"epoch": 0.4900523560209424,
"fcm_dpo/beta": 0.013664349913597107,
"fcm_dpo/delta": 0.043013282120227814,
"fcm_dpo/margin": 40.833702087402344,
"fcm_dpo/q_t": 0.38517236709594727,
"grad_norm": 81.4188003540039,
"learning_rate": 3.035884646397637e-07,
"logits/chosen": -0.8213815093040466,
"logits/rejected": -0.8046208620071411,
"logps/chosen": -344.2747497558594,
"logps/ref_chosen": -294.9057312011719,
"logps/ref_rejected": -299.37054443359375,
"logps/rejected": -389.57330322265625,
"loss": 4.4323,
"margin_dpo/margin_mean": 40.833702087402344,
"margin_dpo/margin_std": 68.16500091552734,
"step": 234
},
{
"epoch": 0.49214659685863876,
"fcm_dpo/beta": 0.014067186042666435,
"fcm_dpo/delta": 0.0023157279938459396,
"fcm_dpo/margin": 42.43536376953125,
"fcm_dpo/q_t": 0.3754667043685913,
"grad_norm": 111.46656036376953,
"learning_rate": 3.017988329489923e-07,
"logits/chosen": -0.8356315493583679,
"logits/rejected": -0.834743320941925,
"logps/chosen": -342.5096740722656,
"logps/ref_chosen": -289.49755859375,
"logps/ref_rejected": -247.55076599121094,
"logps/rejected": -342.9981994628906,
"loss": 4.2787,
"margin_dpo/margin_mean": 42.43537139892578,
"margin_dpo/margin_std": 65.42337036132812,
"step": 235
},
{
"epoch": 0.4942408376963351,
"fcm_dpo/beta": 0.014103572815656662,
"fcm_dpo/delta": -0.0036417022347450256,
"fcm_dpo/margin": 42.677146911621094,
"fcm_dpo/q_t": 0.3757117688655853,
"grad_norm": 87.55143737792969,
"learning_rate": 3.000064234440111e-07,
"logits/chosen": -0.869731605052948,
"logits/rejected": -0.872031033039093,
"logps/chosen": -337.2437744140625,
"logps/ref_chosen": -288.8846435546875,
"logps/ref_rejected": -242.0452880859375,
"logps/rejected": -333.0815734863281,
"loss": 4.231,
"margin_dpo/margin_mean": 42.677146911621094,
"margin_dpo/margin_std": 62.929443359375,
"step": 236
},
{
"epoch": 0.4963350785340314,
"fcm_dpo/beta": 0.013692477717995644,
"fcm_dpo/delta": -0.04169701784849167,
"fcm_dpo/margin": 42.81891632080078,
"fcm_dpo/q_t": 0.3778565227985382,
"grad_norm": 94.28779602050781,
"learning_rate": 2.9821133224630223e-07,
"logits/chosen": -0.8328222632408142,
"logits/rejected": -0.8149389028549194,
"logps/chosen": -318.3191833496094,
"logps/ref_chosen": -265.47869873046875,
"logps/ref_rejected": -267.9891357421875,
"logps/rejected": -363.64849853515625,
"loss": 4.2077,
"margin_dpo/margin_mean": 42.81891632080078,
"margin_dpo/margin_std": 62.191993713378906,
"step": 237
},
{
"epoch": 0.49842931937172774,
"fcm_dpo/beta": 0.013286177068948746,
"fcm_dpo/delta": 0.0006893336540088058,
"fcm_dpo/margin": 41.189964294433594,
"fcm_dpo/q_t": 0.3882310390472412,
"grad_norm": 99.93206787109375,
"learning_rate": 2.964136556211588e-07,
"logits/chosen": -0.8325835466384888,
"logits/rejected": -0.8071151375770569,
"logps/chosen": -367.561279296875,
"logps/ref_chosen": -312.0026550292969,
"logps/ref_rejected": -270.0257263183594,
"logps/rejected": -366.7742919921875,
"loss": 4.3033,
"margin_dpo/margin_mean": 41.18996810913086,
"margin_dpo/margin_std": 64.29379272460938,
"step": 238
},
{
"epoch": 0.5005235602094241,
"fcm_dpo/beta": 0.013920535333454609,
"fcm_dpo/delta": 0.09140698611736298,
"fcm_dpo/margin": 36.762245178222656,
"fcm_dpo/q_t": 0.3982861042022705,
"grad_norm": 103.36236572265625,
"learning_rate": 2.946134899725226e-07,
"logits/chosen": -0.8311811089515686,
"logits/rejected": -0.8709484338760376,
"logps/chosen": -318.14971923828125,
"logps/ref_chosen": -267.167236328125,
"logps/ref_rejected": -275.99468994140625,
"logps/rejected": -363.73944091796875,
"loss": 4.6305,
"margin_dpo/margin_mean": 36.762245178222656,
"margin_dpo/margin_std": 71.0285873413086,
"step": 239
},
{
"epoch": 0.5026178010471204,
"fcm_dpo/beta": 0.013737525790929794,
"fcm_dpo/delta": -0.057324089109897614,
"fcm_dpo/margin": 47.42666244506836,
"fcm_dpo/q_t": 0.36713799834251404,
"grad_norm": 116.31761932373047,
"learning_rate": 2.9281093183781403e-07,
"logits/chosen": -0.8899922370910645,
"logits/rejected": -0.8857712149620056,
"logps/chosen": -334.71600341796875,
"logps/ref_chosen": -285.9796142578125,
"logps/ref_rejected": -256.8258056640625,
"logps/rejected": -352.98883056640625,
"loss": 4.058,
"margin_dpo/margin_mean": 47.42666244506836,
"margin_dpo/margin_std": 65.38998413085938,
"step": 240
},
{
"epoch": 0.5047120418848168,
"fcm_dpo/beta": 0.013606472872197628,
"fcm_dpo/delta": 0.037277210503816605,
"fcm_dpo/margin": 37.668827056884766,
"fcm_dpo/q_t": 0.3958120048046112,
"grad_norm": 107.70622253417969,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -0.80711430311203,
"logits/rejected": -0.7877066731452942,
"logps/chosen": -318.73126220703125,
"logps/ref_chosen": -261.516845703125,
"logps/ref_rejected": -250.2250518798828,
"logps/rejected": -345.1082458496094,
"loss": 4.5076,
"margin_dpo/margin_mean": 37.668827056884766,
"margin_dpo/margin_std": 65.27635192871094,
"step": 241
},
{
"epoch": 0.506806282722513,
"fcm_dpo/beta": 0.014048721641302109,
"fcm_dpo/delta": -0.022284481674432755,
"fcm_dpo/margin": 44.0509147644043,
"fcm_dpo/q_t": 0.37357252836227417,
"grad_norm": 94.55889892578125,
"learning_rate": 2.891990248961871e-07,
"logits/chosen": -0.8759533762931824,
"logits/rejected": -0.8638408184051514,
"logps/chosen": -319.8083190917969,
"logps/ref_chosen": -270.51397705078125,
"logps/ref_rejected": -244.8560791015625,
"logps/rejected": -338.20135498046875,
"loss": 4.1086,
"margin_dpo/margin_mean": 44.05091857910156,
"margin_dpo/margin_std": 60.92688751220703,
"step": 242
},
{
"epoch": 0.5089005235602094,
"fcm_dpo/beta": 0.013818719424307346,
"fcm_dpo/delta": -0.06250281631946564,
"fcm_dpo/margin": 47.56100845336914,
"fcm_dpo/q_t": 0.36709579825401306,
"grad_norm": 112.03346252441406,
"learning_rate": 2.873898697848762e-07,
"logits/chosen": -0.8465127944946289,
"logits/rejected": -0.8346729278564453,
"logps/chosen": -369.3190612792969,
"logps/ref_chosen": -324.68206787109375,
"logps/ref_rejected": -307.1111755371094,
"logps/rejected": -399.3091735839844,
"loss": 4.0675,
"margin_dpo/margin_mean": 47.561012268066406,
"margin_dpo/margin_std": 65.72129821777344,
"step": 243
},
{
"epoch": 0.5109947643979058,
"fcm_dpo/beta": 0.012858567759394646,
"fcm_dpo/delta": -0.009479108266532421,
"fcm_dpo/margin": 47.19831085205078,
"fcm_dpo/q_t": 0.37022095918655396,
"grad_norm": 96.53694915771484,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": -0.8438245058059692,
"logits/rejected": -0.7965729832649231,
"logps/chosen": -363.5497741699219,
"logps/ref_chosen": -318.979248046875,
"logps/ref_rejected": -269.67572021484375,
"logps/rejected": -361.4445495605469,
"loss": 4.0776,
"margin_dpo/margin_mean": 47.19831466674805,
"margin_dpo/margin_std": 61.196590423583984,
"step": 244
},
{
"epoch": 0.5130890052356021,
"fcm_dpo/beta": 0.012791464105248451,
"fcm_dpo/delta": -0.07829757034778595,
"fcm_dpo/margin": 47.49521255493164,
"fcm_dpo/q_t": 0.3693080246448517,
"grad_norm": 86.34193420410156,
"learning_rate": 2.837656413735479e-07,
"logits/chosen": -0.8461104035377502,
"logits/rejected": -0.8512292504310608,
"logps/chosen": -337.3260192871094,
"logps/ref_chosen": -294.8980712890625,
"logps/ref_rejected": -239.8111114501953,
"logps/rejected": -329.7342529296875,
"loss": 4.0572,
"margin_dpo/margin_mean": 47.49521255493164,
"margin_dpo/margin_std": 59.5516471862793,
"step": 245
},
{
"epoch": 0.5151832460732985,
"fcm_dpo/beta": 0.012845880351960659,
"fcm_dpo/delta": 0.09227827191352844,
"fcm_dpo/margin": 35.69134521484375,
"fcm_dpo/q_t": 0.404882550239563,
"grad_norm": 102.24588775634766,
"learning_rate": 2.8195076242990116e-07,
"logits/chosen": -0.8206315040588379,
"logits/rejected": -0.8294263482093811,
"logps/chosen": -334.98681640625,
"logps/ref_chosen": -280.6854248046875,
"logps/ref_rejected": -253.65382385253906,
"logps/rejected": -343.64654541015625,
"loss": 4.5677,
"margin_dpo/margin_mean": 35.69134521484375,
"margin_dpo/margin_std": 64.71369934082031,
"step": 246
},
{
"epoch": 0.5172774869109947,
"fcm_dpo/beta": 0.013430180959403515,
"fcm_dpo/delta": 0.008843163028359413,
"fcm_dpo/margin": 40.15351104736328,
"fcm_dpo/q_t": 0.3880252242088318,
"grad_norm": 82.07162475585938,
"learning_rate": 2.801341700638307e-07,
"logits/chosen": -0.8394767045974731,
"logits/rejected": -0.8434745073318481,
"logps/chosen": -330.6539611816406,
"logps/ref_chosen": -281.1091003417969,
"logps/ref_rejected": -260.3700866699219,
"logps/rejected": -350.0684814453125,
"loss": 4.2961,
"margin_dpo/margin_mean": 40.15351104736328,
"margin_dpo/margin_std": 59.38816452026367,
"step": 247
},
{
"epoch": 0.5193717277486911,
"fcm_dpo/beta": 0.01332888100296259,
"fcm_dpo/delta": 0.036328285932540894,
"fcm_dpo/margin": 37.929134368896484,
"fcm_dpo/q_t": 0.3918164074420929,
"grad_norm": 99.17080688476562,
"learning_rate": 2.7831596169367227e-07,
"logits/chosen": -0.8002797365188599,
"logits/rejected": -0.8162012696266174,
"logps/chosen": -319.5628967285156,
"logps/ref_chosen": -270.318359375,
"logps/ref_rejected": -233.46778869628906,
"logps/rejected": -320.6414794921875,
"loss": 4.3869,
"margin_dpo/margin_mean": 37.92913055419922,
"margin_dpo/margin_std": 58.7851676940918,
"step": 248
},
{
"epoch": 0.5214659685863874,
"fcm_dpo/beta": 0.013946634717285633,
"fcm_dpo/delta": 0.03218340501189232,
"fcm_dpo/margin": 36.27839279174805,
"fcm_dpo/q_t": 0.3957046866416931,
"grad_norm": 100.11740112304688,
"learning_rate": 2.7649623482442274e-07,
"logits/chosen": -0.8372878432273865,
"logits/rejected": -0.8184173703193665,
"logps/chosen": -336.67974853515625,
"logps/ref_chosen": -275.8088684082031,
"logps/ref_rejected": -243.45138549804688,
"logps/rejected": -340.6006774902344,
"loss": 4.5694,
"margin_dpo/margin_mean": 36.27839279174805,
"margin_dpo/margin_std": 66.48210906982422,
"step": 249
},
{
"epoch": 0.5235602094240838,
"fcm_dpo/beta": 0.013306835666298866,
"fcm_dpo/delta": -0.04749767482280731,
"fcm_dpo/margin": 48.137725830078125,
"fcm_dpo/q_t": 0.3672924041748047,
"grad_norm": 97.09394836425781,
"learning_rate": 2.7467508704251135e-07,
"logits/chosen": -0.8327563405036926,
"logits/rejected": -0.8362528681755066,
"logps/chosen": -354.5564270019531,
"logps/ref_chosen": -292.4945373535156,
"logps/ref_rejected": -284.2869567871094,
"logps/rejected": -394.4865417480469,
"loss": 4.1373,
"margin_dpo/margin_mean": 48.137718200683594,
"margin_dpo/margin_std": 67.8976821899414,
"step": 250
},
{
"epoch": 0.5256544502617801,
"fcm_dpo/beta": 0.013809560798108578,
"fcm_dpo/delta": -0.00444817915558815,
"fcm_dpo/margin": 43.47351837158203,
"fcm_dpo/q_t": 0.3821024000644684,
"grad_norm": 100.93732452392578,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": -0.8351647257804871,
"logits/rejected": -0.8213850259780884,
"logps/chosen": -336.347900390625,
"logps/ref_chosen": -281.736572265625,
"logps/ref_rejected": -255.9419708251953,
"logps/rejected": -354.02679443359375,
"loss": 4.1882,
"margin_dpo/margin_mean": 43.473514556884766,
"margin_dpo/margin_std": 63.28712463378906,
"step": 251
},
{
"epoch": 0.5277486910994764,
"fcm_dpo/beta": 0.013638043776154518,
"fcm_dpo/delta": 0.03314843028783798,
"fcm_dpo/margin": 41.505462646484375,
"fcm_dpo/q_t": 0.380887508392334,
"grad_norm": 90.63612365722656,
"learning_rate": 2.7102891946217994e-07,
"logits/chosen": -0.8848163485527039,
"logits/rejected": -0.861879289150238,
"logps/chosen": -360.0526428222656,
"logps/ref_chosen": -295.9674072265625,
"logps/ref_rejected": -280.111572265625,
"logps/rejected": -385.70233154296875,
"loss": 4.3812,
"margin_dpo/margin_mean": 41.505462646484375,
"margin_dpo/margin_std": 66.30912780761719,
"step": 252
},
{
"epoch": 0.5298429319371728,
"fcm_dpo/beta": 0.013700338080525398,
"fcm_dpo/delta": -0.014694290235638618,
"fcm_dpo/margin": 40.862892150878906,
"fcm_dpo/q_t": 0.3878205716609955,
"grad_norm": 98.66238403320312,
"learning_rate": 2.692040951966617e-07,
"logits/chosen": -0.8532537221908569,
"logits/rejected": -0.8468297123908997,
"logps/chosen": -346.66949462890625,
"logps/ref_chosen": -277.072265625,
"logps/ref_rejected": -247.31643676757812,
"logps/rejected": -357.7765808105469,
"loss": 4.418,
"margin_dpo/margin_mean": 40.862892150878906,
"margin_dpo/margin_std": 68.3602294921875,
"step": 253
},
{
"epoch": 0.5319371727748691,
"fcm_dpo/beta": 0.013948986306786537,
"fcm_dpo/delta": -0.05029097944498062,
"fcm_dpo/margin": 43.06998062133789,
"fcm_dpo/q_t": 0.37745508551597595,
"grad_norm": 101.81684875488281,
"learning_rate": 2.6737824107379947e-07,
"logits/chosen": -0.7788005471229553,
"logits/rejected": -0.7665879130363464,
"logps/chosen": -334.8228454589844,
"logps/ref_chosen": -269.9478454589844,
"logps/ref_rejected": -249.45005798339844,
"logps/rejected": -357.3950500488281,
"loss": 4.2058,
"margin_dpo/margin_mean": 43.06998062133789,
"margin_dpo/margin_std": 61.4660530090332,
"step": 254
},
{
"epoch": 0.5340314136125655,
"fcm_dpo/beta": 0.01308943796902895,
"fcm_dpo/delta": -0.05487431585788727,
"fcm_dpo/margin": 49.75128173828125,
"fcm_dpo/q_t": 0.3672358989715576,
"grad_norm": 89.9389877319336,
"learning_rate": 2.655514550086086e-07,
"logits/chosen": -0.7998797297477722,
"logits/rejected": -0.7680299282073975,
"logps/chosen": -369.9079284667969,
"logps/ref_chosen": -306.6552734375,
"logps/ref_rejected": -254.47528076171875,
"logps/rejected": -367.47918701171875,
"loss": 4.1475,
"margin_dpo/margin_mean": 49.75128173828125,
"margin_dpo/margin_std": 72.38668823242188,
"step": 255
},
{
"epoch": 0.5361256544502618,
"fcm_dpo/beta": 0.012760424986481667,
"fcm_dpo/delta": -0.006860591471195221,
"fcm_dpo/margin": 46.954036712646484,
"fcm_dpo/q_t": 0.3672289550304413,
"grad_norm": 254.555908203125,
"learning_rate": 2.6372383496608186e-07,
"logits/chosen": -0.8446077704429626,
"logits/rejected": -0.8431991338729858,
"logps/chosen": -387.7762451171875,
"logps/ref_chosen": -323.7181701660156,
"logps/ref_rejected": -254.1871337890625,
"logps/rejected": -365.19921875,
"loss": 4.5513,
"margin_dpo/margin_mean": 46.95404052734375,
"margin_dpo/margin_std": 78.9866714477539,
"step": 256
},
{
"epoch": 0.5382198952879581,
"fcm_dpo/beta": 0.012355271726846695,
"fcm_dpo/delta": -0.011902200058102608,
"fcm_dpo/margin": 49.428714752197266,
"fcm_dpo/q_t": 0.37219175696372986,
"grad_norm": 96.56497955322266,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": -0.8214514255523682,
"logits/rejected": -0.811537504196167,
"logps/chosen": -330.85174560546875,
"logps/ref_chosen": -267.21209716796875,
"logps/ref_rejected": -249.12579345703125,
"logps/rejected": -362.19415283203125,
"loss": 4.05,
"margin_dpo/margin_mean": 49.428714752197266,
"margin_dpo/margin_std": 66.22650146484375,
"step": 257
},
{
"epoch": 0.5403141361256545,
"fcm_dpo/beta": 0.011946265585720539,
"fcm_dpo/delta": -0.015679441392421722,
"fcm_dpo/margin": 51.2308349609375,
"fcm_dpo/q_t": 0.3674898147583008,
"grad_norm": 103.5653305053711,
"learning_rate": 2.600664850273538e-07,
"logits/chosen": -0.8514293432235718,
"logits/rejected": -0.8220398426055908,
"logps/chosen": -344.84173583984375,
"logps/ref_chosen": -277.6827392578125,
"logps/ref_rejected": -250.73385620117188,
"logps/rejected": -369.1236572265625,
"loss": 4.021,
"margin_dpo/margin_mean": 51.2308349609375,
"margin_dpo/margin_std": 63.38447570800781,
"step": 258
},
{
"epoch": 0.5424083769633508,
"fcm_dpo/beta": 0.012485547922551632,
"fcm_dpo/delta": 0.019997823983430862,
"fcm_dpo/margin": 46.484439849853516,
"fcm_dpo/q_t": 0.3781452775001526,
"grad_norm": 86.96826934814453,
"learning_rate": 2.582369512637302e-07,
"logits/chosen": -0.8579553365707397,
"logits/rejected": -0.8558469414710999,
"logps/chosen": -352.1661071777344,
"logps/ref_chosen": -294.6099853515625,
"logps/ref_rejected": -272.2725830078125,
"logps/rejected": -376.31317138671875,
"loss": 4.118,
"margin_dpo/margin_mean": 46.484439849853516,
"margin_dpo/margin_std": 63.52824020385742,
"step": 259
},
{
"epoch": 0.5445026178010471,
"fcm_dpo/beta": 0.013305707834661007,
"fcm_dpo/delta": 0.1523754745721817,
"fcm_dpo/margin": 22.55513572692871,
"fcm_dpo/q_t": 0.43987154960632324,
"grad_norm": 106.72164916992188,
"learning_rate": 2.5640697577740815e-07,
"logits/chosen": -0.8499176502227783,
"logits/rejected": -0.8482145667076111,
"logps/chosen": -356.1315612792969,
"logps/ref_chosen": -290.85711669921875,
"logps/ref_rejected": -277.5970153808594,
"logps/rejected": -365.4266357421875,
"loss": 5.1348,
"margin_dpo/margin_mean": 22.55513572692871,
"margin_dpo/margin_std": 64.21441650390625,
"step": 260
},
{
"epoch": 0.5465968586387434,
"fcm_dpo/beta": 0.013940032571554184,
"fcm_dpo/delta": -0.03943036496639252,
"fcm_dpo/margin": 37.314186096191406,
"fcm_dpo/q_t": 0.39753836393356323,
"grad_norm": 122.23685455322266,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": -0.7377340197563171,
"logits/rejected": -0.7536084055900574,
"logps/chosen": -320.2516174316406,
"logps/ref_chosen": -251.13223266601562,
"logps/ref_rejected": -244.76016235351562,
"logps/rejected": -351.1937255859375,
"loss": 4.6636,
"margin_dpo/margin_mean": 37.314186096191406,
"margin_dpo/margin_std": 71.60857391357422,
"step": 261
},
{
"epoch": 0.5486910994764398,
"fcm_dpo/beta": 0.013438230380415916,
"fcm_dpo/delta": -0.06206154823303223,
"fcm_dpo/margin": 48.95298767089844,
"fcm_dpo/q_t": 0.366224467754364,
"grad_norm": 105.1137466430664,
"learning_rate": 2.527460921992209e-07,
"logits/chosen": -0.7684426307678223,
"logits/rejected": -0.762508749961853,
"logps/chosen": -362.34210205078125,
"logps/ref_chosen": -299.7217712402344,
"logps/ref_rejected": -277.0969543457031,
"logps/rejected": -388.67022705078125,
"loss": 4.0258,
"margin_dpo/margin_mean": 48.95298767089844,
"margin_dpo/margin_std": 65.29669189453125,
"step": 262
},
{
"epoch": 0.5507853403141362,
"fcm_dpo/beta": 0.012967620976269245,
"fcm_dpo/delta": -0.021967921406030655,
"fcm_dpo/margin": 40.99163055419922,
"fcm_dpo/q_t": 0.3881154954433441,
"grad_norm": 88.81194305419922,
"learning_rate": 2.509153804294318e-07,
"logits/chosen": -0.7807701826095581,
"logits/rejected": -0.7647296786308289,
"logps/chosen": -348.8820495605469,
"logps/ref_chosen": -279.95257568359375,
"logps/ref_rejected": -256.5327453613281,
"logps/rejected": -366.4538269042969,
"loss": 4.4404,
"margin_dpo/margin_mean": 40.99162673950195,
"margin_dpo/margin_std": 67.29783630371094,
"step": 263
},
{
"epoch": 0.5528795811518324,
"fcm_dpo/beta": 0.012286883778870106,
"fcm_dpo/delta": -0.06830502301454544,
"fcm_dpo/margin": 49.56260681152344,
"fcm_dpo/q_t": 0.36980560421943665,
"grad_norm": 102.09204864501953,
"learning_rate": 2.4908461957056825e-07,
"logits/chosen": -0.7926238179206848,
"logits/rejected": -0.7938596606254578,
"logps/chosen": -321.9147033691406,
"logps/ref_chosen": -260.53509521484375,
"logps/ref_rejected": -255.53799438476562,
"logps/rejected": -366.4801940917969,
"loss": 4.0499,
"margin_dpo/margin_mean": 49.56260681152344,
"margin_dpo/margin_std": 64.20928955078125,
"step": 264
},
{
"epoch": 0.5549738219895288,
"fcm_dpo/beta": 0.011741209775209427,
"fcm_dpo/delta": -0.022350091487169266,
"fcm_dpo/margin": 52.69523620605469,
"fcm_dpo/q_t": 0.3699071407318115,
"grad_norm": 84.6684341430664,
"learning_rate": 2.4725390780077905e-07,
"logits/chosen": -0.8557706475257874,
"logits/rejected": -0.8646640181541443,
"logps/chosen": -346.32525634765625,
"logps/ref_chosen": -283.7130432128906,
"logps/ref_rejected": -270.3209533691406,
"logps/rejected": -385.62835693359375,
"loss": 4.1084,
"margin_dpo/margin_mean": 52.69523620605469,
"margin_dpo/margin_std": 71.56565856933594,
"step": 265
},
{
"epoch": 0.5570680628272251,
"fcm_dpo/beta": 0.011872725561261177,
"fcm_dpo/delta": -0.0177978053689003,
"fcm_dpo/margin": 51.798423767089844,
"fcm_dpo/q_t": 0.3671649396419525,
"grad_norm": 76.50971221923828,
"learning_rate": 2.454233432955807e-07,
"logits/chosen": -0.8721534609794617,
"logits/rejected": -0.8421223163604736,
"logps/chosen": -332.63873291015625,
"logps/ref_chosen": -278.09930419921875,
"logps/ref_rejected": -260.6734619140625,
"logps/rejected": -367.0113525390625,
"loss": 3.9391,
"margin_dpo/margin_mean": 51.798423767089844,
"margin_dpo/margin_std": 59.57025146484375,
"step": 266
},
{
"epoch": 0.5591623036649215,
"fcm_dpo/beta": 0.011853402480483055,
"fcm_dpo/delta": 0.05013914406299591,
"fcm_dpo/margin": 42.46312713623047,
"fcm_dpo/q_t": 0.3929550051689148,
"grad_norm": 92.15836334228516,
"learning_rate": 2.435930242225919e-07,
"logits/chosen": -0.8216646313667297,
"logits/rejected": -0.8368365168571472,
"logps/chosen": -349.4070129394531,
"logps/ref_chosen": -280.33319091796875,
"logps/ref_rejected": -247.78099060058594,
"logps/rejected": -359.31793212890625,
"loss": 4.3247,
"margin_dpo/margin_mean": 42.46312713623047,
"margin_dpo/margin_std": 62.86342239379883,
"step": 267
},
{
"epoch": 0.5612565445026177,
"fcm_dpo/beta": 0.012068388983607292,
"fcm_dpo/delta": -0.048395268619060516,
"fcm_dpo/margin": 53.47141647338867,
"fcm_dpo/q_t": 0.3641560673713684,
"grad_norm": 97.78318786621094,
"learning_rate": 2.4176304873626984e-07,
"logits/chosen": -0.7805606722831726,
"logits/rejected": -0.7611721158027649,
"logps/chosen": -369.9452209472656,
"logps/ref_chosen": -304.1787109375,
"logps/ref_rejected": -272.80316162109375,
"logps/rejected": -392.04107666015625,
"loss": 3.9723,
"margin_dpo/margin_mean": 53.47141647338867,
"margin_dpo/margin_std": 67.32571411132812,
"step": 268
},
{
"epoch": 0.5633507853403141,
"fcm_dpo/beta": 0.012517577037215233,
"fcm_dpo/delta": 0.11093584448099136,
"fcm_dpo/margin": 39.28661346435547,
"fcm_dpo/q_t": 0.39567479491233826,
"grad_norm": 108.96583557128906,
"learning_rate": 2.399335149726463e-07,
"logits/chosen": -0.8175703287124634,
"logits/rejected": -0.8138397932052612,
"logps/chosen": -320.2642517089844,
"logps/ref_chosen": -249.84512329101562,
"logps/ref_rejected": -223.37356567382812,
"logps/rejected": -333.079345703125,
"loss": 4.5504,
"margin_dpo/margin_mean": 39.28661346435547,
"margin_dpo/margin_std": 71.5968017578125,
"step": 269
},
{
"epoch": 0.5654450261780105,
"fcm_dpo/beta": 0.012729712761938572,
"fcm_dpo/delta": 0.005217651836574078,
"fcm_dpo/margin": 46.67543029785156,
"fcm_dpo/q_t": 0.3786097764968872,
"grad_norm": 101.96600341796875,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": -0.8885576128959656,
"logits/rejected": -0.9054932594299316,
"logps/chosen": -394.0969543457031,
"logps/ref_chosen": -318.5623779296875,
"logps/ref_rejected": -281.1880798339844,
"logps/rejected": -403.39813232421875,
"loss": 4.3489,
"margin_dpo/margin_mean": 46.67543029785156,
"margin_dpo/margin_std": 76.03194427490234,
"step": 270
},
{
"epoch": 0.5675392670157068,
"fcm_dpo/beta": 0.013197865337133408,
"fcm_dpo/delta": 0.0033783800899982452,
"fcm_dpo/margin": 45.022972106933594,
"fcm_dpo/q_t": 0.38268500566482544,
"grad_norm": 100.24028778076172,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.7424062490463257,
"logits/rejected": -0.7395483255386353,
"logps/chosen": -357.7034606933594,
"logps/ref_chosen": -284.104736328125,
"logps/ref_rejected": -253.9580535888672,
"logps/rejected": -372.57977294921875,
"loss": 4.285,
"margin_dpo/margin_mean": 45.022972106933594,
"margin_dpo/margin_std": 69.82579803466797,
"step": 271
},
{
"epoch": 0.5696335078534032,
"fcm_dpo/beta": 0.012690500356256962,
"fcm_dpo/delta": -0.032762445509433746,
"fcm_dpo/margin": 49.54048156738281,
"fcm_dpo/q_t": 0.3718162178993225,
"grad_norm": 91.42510986328125,
"learning_rate": 2.344485449913914e-07,
"logits/chosen": -0.8627545833587646,
"logits/rejected": -0.8512448072433472,
"logps/chosen": -365.7294006347656,
"logps/ref_chosen": -297.3590087890625,
"logps/ref_rejected": -279.20196533203125,
"logps/rejected": -397.1128845214844,
"loss": 4.2931,
"margin_dpo/margin_mean": 49.54048156738281,
"margin_dpo/margin_std": 78.09527587890625,
"step": 272
},
{
"epoch": 0.5717277486910994,
"fcm_dpo/beta": 0.012210111133754253,
"fcm_dpo/delta": -0.020789261907339096,
"fcm_dpo/margin": 50.5628547668457,
"fcm_dpo/q_t": 0.3735979199409485,
"grad_norm": 97.6107177734375,
"learning_rate": 2.3262175892620062e-07,
"logits/chosen": -0.8329838514328003,
"logits/rejected": -0.8465803265571594,
"logps/chosen": -365.12774658203125,
"logps/ref_chosen": -293.20574951171875,
"logps/ref_rejected": -274.7646789550781,
"logps/rejected": -397.24951171875,
"loss": 4.2177,
"margin_dpo/margin_mean": 50.5628547668457,
"margin_dpo/margin_std": 74.91758728027344,
"step": 273
},
{
"epoch": 0.5738219895287958,
"fcm_dpo/beta": 0.011717407964169979,
"fcm_dpo/delta": -0.11725673824548721,
"fcm_dpo/margin": 60.52649688720703,
"fcm_dpo/q_t": 0.3518860936164856,
"grad_norm": 91.55536651611328,
"learning_rate": 2.3079590480333827e-07,
"logits/chosen": -0.7906344532966614,
"logits/rejected": -0.7621797323226929,
"logps/chosen": -341.36102294921875,
"logps/ref_chosen": -270.55865478515625,
"logps/ref_rejected": -239.47048950195312,
"logps/rejected": -370.7992858886719,
"loss": 3.8357,
"margin_dpo/margin_mean": 60.5264892578125,
"margin_dpo/margin_std": 72.19639587402344,
"step": 274
},
{
"epoch": 0.5759162303664922,
"fcm_dpo/beta": 0.010930696502327919,
"fcm_dpo/delta": -0.049976646900177,
"fcm_dpo/margin": 59.051597595214844,
"fcm_dpo/q_t": 0.3641063868999481,
"grad_norm": 70.57231140136719,
"learning_rate": 2.2897108053782e-07,
"logits/chosen": -0.8348967432975769,
"logits/rejected": -0.8198622465133667,
"logps/chosen": -314.8291320800781,
"logps/ref_chosen": -250.31922912597656,
"logps/ref_rejected": -249.3187255859375,
"logps/rejected": -372.8802490234375,
"loss": 3.9217,
"margin_dpo/margin_mean": 59.051597595214844,
"margin_dpo/margin_std": 71.34449005126953,
"step": 275
},
{
"epoch": 0.5780104712041885,
"fcm_dpo/beta": 0.01073821447789669,
"fcm_dpo/delta": 0.05224524065852165,
"fcm_dpo/margin": 51.23969268798828,
"fcm_dpo/q_t": 0.3837069272994995,
"grad_norm": 86.6988754272461,
"learning_rate": 2.2714738398943308e-07,
"logits/chosen": -0.9012481570243835,
"logits/rejected": -0.8782431483268738,
"logps/chosen": -370.5542297363281,
"logps/ref_chosen": -297.6310729980469,
"logps/ref_rejected": -295.225830078125,
"logps/rejected": -419.388671875,
"loss": 4.2576,
"margin_dpo/margin_mean": 51.23969268798828,
"margin_dpo/margin_std": 74.20903015136719,
"step": 276
},
{
"epoch": 0.5801047120418849,
"fcm_dpo/beta": 0.01166857872158289,
"fcm_dpo/delta": 0.06670135259628296,
"fcm_dpo/margin": 45.79502487182617,
"fcm_dpo/q_t": 0.3874945640563965,
"grad_norm": 98.19717407226562,
"learning_rate": 2.2532491295748865e-07,
"logits/chosen": -0.8439708948135376,
"logits/rejected": -0.8469751477241516,
"logps/chosen": -344.6778869628906,
"logps/ref_chosen": -266.3604736328125,
"logps/ref_rejected": -253.36767578125,
"logps/rejected": -377.4801025390625,
"loss": 4.4195,
"margin_dpo/margin_mean": 45.795021057128906,
"margin_dpo/margin_std": 75.37797546386719,
"step": 277
},
{
"epoch": 0.5821989528795811,
"fcm_dpo/beta": 0.01215837337076664,
"fcm_dpo/delta": 0.05089471489191055,
"fcm_dpo/margin": 33.36516189575195,
"fcm_dpo/q_t": 0.42117270827293396,
"grad_norm": 116.95894622802734,
"learning_rate": 2.2350376517557726e-07,
"logits/chosen": -0.8678665161132812,
"logits/rejected": -0.8359065651893616,
"logps/chosen": -357.11553955078125,
"logps/ref_chosen": -267.40728759765625,
"logps/ref_rejected": -229.5758514404297,
"logps/rejected": -352.64923095703125,
"loss": 5.013,
"margin_dpo/margin_mean": 33.36516571044922,
"margin_dpo/margin_std": 81.0997543334961,
"step": 278
},
{
"epoch": 0.5842931937172775,
"fcm_dpo/beta": 0.011790696531534195,
"fcm_dpo/delta": -0.12328186631202698,
"fcm_dpo/margin": 55.72016525268555,
"fcm_dpo/q_t": 0.3644426763057709,
"grad_norm": 110.0771484375,
"learning_rate": 2.2168403830632769e-07,
"logits/chosen": -0.7839449644088745,
"logits/rejected": -0.7698712348937988,
"logps/chosen": -393.23095703125,
"logps/ref_chosen": -313.3677978515625,
"logps/ref_rejected": -299.1744384765625,
"logps/rejected": -434.75775146484375,
"loss": 4.1056,
"margin_dpo/margin_mean": 55.72016906738281,
"margin_dpo/margin_std": 77.62565612792969,
"step": 279
},
{
"epoch": 0.5863874345549738,
"fcm_dpo/beta": 0.011370867490768433,
"fcm_dpo/delta": 0.04377196356654167,
"fcm_dpo/margin": 49.120079040527344,
"fcm_dpo/q_t": 0.38405805826187134,
"grad_norm": 80.28543090820312,
"learning_rate": 2.1986582993616925e-07,
"logits/chosen": -0.8500107526779175,
"logits/rejected": -0.8614104986190796,
"logps/chosen": -333.7889404296875,
"logps/ref_chosen": -265.5558166503906,
"logps/ref_rejected": -247.1573944091797,
"logps/rejected": -364.5106201171875,
"loss": 4.3234,
"margin_dpo/margin_mean": 49.120079040527344,
"margin_dpo/margin_std": 78.11002349853516,
"step": 280
},
{
"epoch": 0.5884816753926702,
"fcm_dpo/beta": 0.011548914946615696,
"fcm_dpo/delta": 0.035830557346343994,
"fcm_dpo/margin": 48.90550994873047,
"fcm_dpo/q_t": 0.3849385976791382,
"grad_norm": 104.87403869628906,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": -0.8356598615646362,
"logits/rejected": -0.84555983543396,
"logps/chosen": -381.03668212890625,
"logps/ref_chosen": -295.2995910644531,
"logps/ref_rejected": -293.80877685546875,
"logps/rejected": -428.4513244628906,
"loss": 4.2998,
"margin_dpo/margin_mean": 48.90550994873047,
"margin_dpo/margin_std": 75.06422424316406,
"step": 281
},
{
"epoch": 0.5905759162303665,
"fcm_dpo/beta": 0.011683318763971329,
"fcm_dpo/delta": -0.01186651736497879,
"fcm_dpo/margin": 52.158531188964844,
"fcm_dpo/q_t": 0.375276118516922,
"grad_norm": 104.86578369140625,
"learning_rate": 2.1623435862645205e-07,
"logits/chosen": -0.8201556205749512,
"logits/rejected": -0.8232011795043945,
"logps/chosen": -391.24603271484375,
"logps/ref_chosen": -318.63714599609375,
"logps/ref_rejected": -273.5943603515625,
"logps/rejected": -398.36175537109375,
"loss": 4.2261,
"margin_dpo/margin_mean": 52.158531188964844,
"margin_dpo/margin_std": 77.26969146728516,
"step": 282
},
{
"epoch": 0.5926701570680628,
"fcm_dpo/beta": 0.01230486948043108,
"fcm_dpo/delta": 0.03790903091430664,
"fcm_dpo/margin": 45.61647033691406,
"fcm_dpo/q_t": 0.38794541358947754,
"grad_norm": 91.48641204833984,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": -0.8388844728469849,
"logits/rejected": -0.835168182849884,
"logps/chosen": -332.825927734375,
"logps/ref_chosen": -254.66053771972656,
"logps/ref_rejected": -236.8627166748047,
"logps/rejected": -360.64459228515625,
"loss": 4.3522,
"margin_dpo/margin_mean": 45.61647033691406,
"margin_dpo/margin_std": 73.58220672607422,
"step": 283
},
{
"epoch": 0.5947643979057592,
"fcm_dpo/beta": 0.011913403868675232,
"fcm_dpo/delta": -0.03183186054229736,
"fcm_dpo/margin": 52.571319580078125,
"fcm_dpo/q_t": 0.3742894232273102,
"grad_norm": 125.63947296142578,
"learning_rate": 2.1261013021512378e-07,
"logits/chosen": -0.8041011095046997,
"logits/rejected": -0.7851691246032715,
"logps/chosen": -353.00323486328125,
"logps/ref_chosen": -273.355224609375,
"logps/ref_rejected": -259.84759521484375,
"logps/rejected": -392.0669250488281,
"loss": 4.3049,
"margin_dpo/margin_mean": 52.57131576538086,
"margin_dpo/margin_std": 81.13550567626953,
"step": 284
},
{
"epoch": 0.5968586387434555,
"fcm_dpo/beta": 0.01256585493683815,
"fcm_dpo/delta": 0.0635511577129364,
"fcm_dpo/margin": 36.67383575439453,
"fcm_dpo/q_t": 0.4085825979709625,
"grad_norm": 145.48236083984375,
"learning_rate": 2.1080097510381294e-07,
"logits/chosen": -0.8173208236694336,
"logits/rejected": -0.8177124857902527,
"logps/chosen": -394.8221130371094,
"logps/ref_chosen": -309.8022155761719,
"logps/ref_rejected": -279.11846923828125,
"logps/rejected": -400.81219482421875,
"loss": 4.781,
"margin_dpo/margin_mean": 36.67383575439453,
"margin_dpo/margin_std": 75.84709167480469,
"step": 285
},
{
"epoch": 0.5989528795811518,
"fcm_dpo/beta": 0.012453727424144745,
"fcm_dpo/delta": 0.030384495854377747,
"fcm_dpo/margin": 45.85185623168945,
"fcm_dpo/q_t": 0.3881286382675171,
"grad_norm": 128.9279022216797,
"learning_rate": 2.089939221172446e-07,
"logits/chosen": -0.80593341588974,
"logits/rejected": -0.7947626709938049,
"logps/chosen": -348.8951416015625,
"logps/ref_chosen": -271.4655456542969,
"logps/ref_rejected": -279.531494140625,
"logps/rejected": -402.8129577636719,
"loss": 4.4438,
"margin_dpo/margin_mean": 45.85185241699219,
"margin_dpo/margin_std": 79.64374542236328,
"step": 286
},
{
"epoch": 0.6010471204188481,
"fcm_dpo/beta": 0.01248287595808506,
"fcm_dpo/delta": -0.01810862123966217,
"fcm_dpo/margin": 49.31248474121094,
"fcm_dpo/q_t": 0.37548819184303284,
"grad_norm": 108.0594253540039,
"learning_rate": 2.0718906816218595e-07,
"logits/chosen": -0.8343806266784668,
"logits/rejected": -0.8241921663284302,
"logps/chosen": -350.6217041015625,
"logps/ref_chosen": -277.0932312011719,
"logps/ref_rejected": -233.55599975585938,
"logps/rejected": -356.3969421386719,
"loss": 4.3305,
"margin_dpo/margin_mean": 49.31248474121094,
"margin_dpo/margin_std": 79.3702621459961,
"step": 287
},
{
"epoch": 0.6031413612565445,
"fcm_dpo/beta": 0.013002946972846985,
"fcm_dpo/delta": -0.011839449405670166,
"fcm_dpo/margin": 46.86420822143555,
"fcm_dpo/q_t": 0.3763912618160248,
"grad_norm": 125.55633544921875,
"learning_rate": 2.053865100274774e-07,
"logits/chosen": -0.8246794939041138,
"logits/rejected": -0.8403179049491882,
"logps/chosen": -362.42303466796875,
"logps/ref_chosen": -293.1681823730469,
"logps/ref_rejected": -263.4059143066406,
"logps/rejected": -379.5249938964844,
"loss": 4.252,
"margin_dpo/margin_mean": 46.86420822143555,
"margin_dpo/margin_std": 71.86483764648438,
"step": 288
},
{
"epoch": 0.6052356020942409,
"fcm_dpo/beta": 0.01310706790536642,
"fcm_dpo/delta": 0.12158410996198654,
"fcm_dpo/margin": 33.08653259277344,
"fcm_dpo/q_t": 0.41252899169921875,
"grad_norm": 101.58392333984375,
"learning_rate": 2.035863443788411e-07,
"logits/chosen": -0.8192298412322998,
"logits/rejected": -0.8059917688369751,
"logps/chosen": -412.61199951171875,
"logps/ref_chosen": -329.9574279785156,
"logps/ref_rejected": -276.7565002441406,
"logps/rejected": -392.4975891113281,
"loss": 4.7866,
"margin_dpo/margin_mean": 33.08653259277344,
"margin_dpo/margin_std": 70.98587799072266,
"step": 289
},
{
"epoch": 0.6073298429319371,
"fcm_dpo/beta": 0.0129544697701931,
"fcm_dpo/delta": -0.07823115587234497,
"fcm_dpo/margin": 44.445472717285156,
"fcm_dpo/q_t": 0.38723504543304443,
"grad_norm": 132.9167022705078,
"learning_rate": 2.0178866775369774e-07,
"logits/chosen": -0.833109974861145,
"logits/rejected": -0.7754253149032593,
"logps/chosen": -399.00225830078125,
"logps/ref_chosen": -324.6690673828125,
"logps/ref_rejected": -311.8439636230469,
"logps/rejected": -430.6226806640625,
"loss": 4.4788,
"margin_dpo/margin_mean": 44.445472717285156,
"margin_dpo/margin_std": 75.31282043457031,
"step": 290
},
{
"epoch": 0.6094240837696335,
"fcm_dpo/beta": 0.012139367870986462,
"fcm_dpo/delta": -0.09630902111530304,
"fcm_dpo/margin": 56.51261520385742,
"fcm_dpo/q_t": 0.36019349098205566,
"grad_norm": 103.76278686523438,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": -0.7965466380119324,
"logits/rejected": -0.7893682718276978,
"logps/chosen": -342.99267578125,
"logps/ref_chosen": -274.1440734863281,
"logps/ref_rejected": -278.07208251953125,
"logps/rejected": -403.4332580566406,
"loss": 3.9622,
"margin_dpo/margin_mean": 56.51261520385742,
"margin_dpo/margin_std": 71.91201782226562,
"step": 291
},
{
"epoch": 0.6115183246073298,
"fcm_dpo/beta": 0.012333599850535393,
"fcm_dpo/delta": 0.09315042197704315,
"fcm_dpo/margin": 41.38365936279297,
"fcm_dpo/q_t": 0.39496973156929016,
"grad_norm": 106.41152954101562,
"learning_rate": 1.9820116705100775e-07,
"logits/chosen": -0.800334632396698,
"logits/rejected": -0.7948342561721802,
"logps/chosen": -324.21563720703125,
"logps/ref_chosen": -259.3636779785156,
"logps/ref_rejected": -279.30218505859375,
"logps/rejected": -385.53778076171875,
"loss": 4.5122,
"margin_dpo/margin_mean": 41.38365936279297,
"margin_dpo/margin_std": 71.46543884277344,
"step": 292
},
{
"epoch": 0.6136125654450262,
"fcm_dpo/beta": 0.012719634920358658,
"fcm_dpo/delta": -0.04290001466870308,
"fcm_dpo/margin": 50.179176330566406,
"fcm_dpo/q_t": 0.3686256408691406,
"grad_norm": 98.5149917602539,
"learning_rate": 1.9641153536023642e-07,
"logits/chosen": -0.8973373174667358,
"logits/rejected": -0.8607571125030518,
"logps/chosen": -376.51239013671875,
"logps/ref_chosen": -303.77081298828125,
"logps/ref_rejected": -270.07513427734375,
"logps/rejected": -392.9958801269531,
"loss": 4.0359,
"margin_dpo/margin_mean": 50.179176330566406,
"margin_dpo/margin_std": 66.0428466796875,
"step": 293
},
{
"epoch": 0.6157068062827226,
"fcm_dpo/beta": 0.012579267844557762,
"fcm_dpo/delta": 0.0067170350812375546,
"fcm_dpo/margin": 47.1030158996582,
"fcm_dpo/q_t": 0.381188303232193,
"grad_norm": 109.40179443359375,
"learning_rate": 1.9462477745619106e-07,
"logits/chosen": -0.795992374420166,
"logits/rejected": -0.8062705993652344,
"logps/chosen": -303.7636413574219,
"logps/ref_chosen": -240.23831176757812,
"logps/ref_rejected": -229.187744140625,
"logps/rejected": -339.81610107421875,
"loss": 4.2363,
"margin_dpo/margin_mean": 47.10301971435547,
"margin_dpo/margin_std": 71.89276123046875,
"step": 294
},
{
"epoch": 0.6178010471204188,
"fcm_dpo/beta": 0.012650757096707821,
"fcm_dpo/delta": 0.03361117094755173,
"fcm_dpo/margin": 44.81559371948242,
"fcm_dpo/q_t": 0.3833611309528351,
"grad_norm": 84.96255493164062,
"learning_rate": 1.928409891572757e-07,
"logits/chosen": -0.7861504554748535,
"logits/rejected": -0.8039959073066711,
"logps/chosen": -319.04840087890625,
"logps/ref_chosen": -251.00970458984375,
"logps/ref_rejected": -244.15142822265625,
"logps/rejected": -357.00567626953125,
"loss": 4.2769,
"margin_dpo/margin_mean": 44.815589904785156,
"margin_dpo/margin_std": 67.42735290527344,
"step": 295
},
{
"epoch": 0.6198952879581152,
"fcm_dpo/beta": 0.012142694555222988,
"fcm_dpo/delta": -0.11849077045917511,
"fcm_dpo/margin": 58.27566909790039,
"fcm_dpo/q_t": 0.35460710525512695,
"grad_norm": 113.73211669921875,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": -0.7692264914512634,
"logits/rejected": -0.7446385025978088,
"logps/chosen": -364.2586669921875,
"logps/ref_chosen": -293.880615234375,
"logps/ref_rejected": -283.4175720214844,
"logps/rejected": -412.0712585449219,
"loss": 4.0144,
"margin_dpo/margin_mean": 58.27566909790039,
"margin_dpo/margin_std": 77.97174835205078,
"step": 296
},
{
"epoch": 0.6219895287958115,
"fcm_dpo/beta": 0.01153514999896288,
"fcm_dpo/delta": 0.015021094121038914,
"fcm_dpo/margin": 42.32819366455078,
"fcm_dpo/q_t": 0.3964134454727173,
"grad_norm": 96.94518280029297,
"learning_rate": 1.8928270384706582e-07,
"logits/chosen": -0.878042459487915,
"logits/rejected": -0.8735657930374146,
"logps/chosen": -357.3432312011719,
"logps/ref_chosen": -289.4600830078125,
"logps/ref_rejected": -283.69110107421875,
"logps/rejected": -393.9024353027344,
"loss": 4.4507,
"margin_dpo/margin_mean": 42.32819747924805,
"margin_dpo/margin_std": 69.12220764160156,
"step": 297
},
{
"epoch": 0.6240837696335079,
"fcm_dpo/beta": 0.011558500118553638,
"fcm_dpo/delta": -0.06854025274515152,
"fcm_dpo/margin": 48.931365966796875,
"fcm_dpo/q_t": 0.3847343921661377,
"grad_norm": 113.2163314819336,
"learning_rate": 1.875083976558136e-07,
"logits/chosen": -0.7906237840652466,
"logits/rejected": -0.7819389700889587,
"logps/chosen": -369.0527648925781,
"logps/ref_chosen": -306.5150146484375,
"logps/ref_rejected": -280.6969909667969,
"logps/rejected": -392.16607666015625,
"loss": 4.3345,
"margin_dpo/margin_mean": 48.931373596191406,
"margin_dpo/margin_std": 76.4260482788086,
"step": 298
},
{
"epoch": 0.6261780104712041,
"fcm_dpo/beta": 0.011346298269927502,
"fcm_dpo/delta": 0.05653999000787735,
"fcm_dpo/margin": 42.16632843017578,
"fcm_dpo/q_t": 0.39747947454452515,
"grad_norm": 98.81658935546875,
"learning_rate": 1.8573744269954297e-07,
"logits/chosen": -0.7703346610069275,
"logits/rejected": -0.7614036202430725,
"logps/chosen": -357.9806213378906,
"logps/ref_chosen": -281.36376953125,
"logps/ref_rejected": -270.39508056640625,
"logps/rejected": -389.1782531738281,
"loss": 4.4279,
"margin_dpo/margin_mean": 42.166324615478516,
"margin_dpo/margin_std": 66.2205810546875,
"step": 299
},
{
"epoch": 0.6282722513089005,
"fcm_dpo/beta": 0.01241186074912548,
"fcm_dpo/delta": 0.0943484753370285,
"fcm_dpo/margin": 40.93339157104492,
"fcm_dpo/q_t": 0.39440780878067017,
"grad_norm": 134.6527557373047,
"learning_rate": 1.839699339491937e-07,
"logits/chosen": -0.8087725043296814,
"logits/rejected": -0.7854644656181335,
"logps/chosen": -390.7112121582031,
"logps/ref_chosen": -314.83575439453125,
"logps/ref_rejected": -269.1154479980469,
"logps/rejected": -385.92431640625,
"loss": 4.5001,
"margin_dpo/margin_mean": 40.93339538574219,
"margin_dpo/margin_std": 71.97175598144531,
"step": 300
},
{
"epoch": 0.6303664921465969,
"fcm_dpo/beta": 0.013004500418901443,
"fcm_dpo/delta": 0.04637196660041809,
"fcm_dpo/margin": 42.64813995361328,
"fcm_dpo/q_t": 0.38811761140823364,
"grad_norm": 94.59200286865234,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.8116219639778137,
"logits/rejected": -0.8269953727722168,
"logps/chosen": -351.3847351074219,
"logps/ref_chosen": -279.89453125,
"logps/ref_rejected": -271.6694641113281,
"logps/rejected": -385.80780029296875,
"loss": 4.3694,
"margin_dpo/margin_mean": 42.64814758300781,
"margin_dpo/margin_std": 68.6011962890625,
"step": 301
},
{
"epoch": 0.6324607329842932,
"fcm_dpo/beta": 0.012540532276034355,
"fcm_dpo/delta": -0.08675913512706757,
"fcm_dpo/margin": 54.20530700683594,
"fcm_dpo/q_t": 0.3618788719177246,
"grad_norm": 120.73770904541016,
"learning_rate": 1.8044563402088682e-07,
"logits/chosen": -0.7918750643730164,
"logits/rejected": -0.7770553827285767,
"logps/chosen": -340.3618469238281,
"logps/ref_chosen": -271.3318176269531,
"logps/ref_rejected": -256.5587158203125,
"logps/rejected": -379.79400634765625,
"loss": 4.0268,
"margin_dpo/margin_mean": 54.20530700683594,
"margin_dpo/margin_std": 74.60562133789062,
"step": 302
},
{
"epoch": 0.6345549738219896,
"fcm_dpo/beta": 0.01216448936611414,
"fcm_dpo/delta": -0.04195103794336319,
"fcm_dpo/margin": 48.33647537231445,
"fcm_dpo/q_t": 0.3784167170524597,
"grad_norm": 115.15612030029297,
"learning_rate": 1.7868903184043885e-07,
"logits/chosen": -0.7648223042488098,
"logits/rejected": -0.7485473155975342,
"logps/chosen": -378.9388427734375,
"logps/ref_chosen": -304.88104248046875,
"logps/ref_rejected": -269.063720703125,
"logps/rejected": -391.4580078125,
"loss": 4.3028,
"margin_dpo/margin_mean": 48.33647537231445,
"margin_dpo/margin_std": 75.0361557006836,
"step": 303
},
{
"epoch": 0.6366492146596858,
"fcm_dpo/beta": 0.01175426971167326,
"fcm_dpo/delta": -0.016764672473073006,
"fcm_dpo/margin": 52.35718536376953,
"fcm_dpo/q_t": 0.37501004338264465,
"grad_norm": 113.5647964477539,
"learning_rate": 1.7693625385079574e-07,
"logits/chosen": -0.7781350016593933,
"logits/rejected": -0.7962645888328552,
"logps/chosen": -371.59942626953125,
"logps/ref_chosen": -290.7109680175781,
"logps/ref_rejected": -237.6885986328125,
"logps/rejected": -370.9342346191406,
"loss": 4.1504,
"margin_dpo/margin_mean": 52.35718536376953,
"margin_dpo/margin_std": 76.69111633300781,
"step": 304
},
{
"epoch": 0.6387434554973822,
"fcm_dpo/beta": 0.010681025683879852,
"fcm_dpo/delta": -0.18092726171016693,
"fcm_dpo/margin": 71.44352722167969,
"fcm_dpo/q_t": 0.339513897895813,
"grad_norm": 93.67918395996094,
"learning_rate": 1.7518739404812155e-07,
"logits/chosen": -0.8444567918777466,
"logits/rejected": -0.8151299953460693,
"logps/chosen": -326.0416259765625,
"logps/ref_chosen": -256.4839782714844,
"logps/ref_rejected": -266.4063415527344,
"logps/rejected": -407.40753173828125,
"loss": 3.6998,
"margin_dpo/margin_mean": 71.44352722167969,
"margin_dpo/margin_std": 78.092529296875,
"step": 305
},
{
"epoch": 0.6408376963350786,
"fcm_dpo/beta": 0.010258047841489315,
"fcm_dpo/delta": 0.02255289815366268,
"fcm_dpo/margin": 45.75574493408203,
"fcm_dpo/q_t": 0.3990488648414612,
"grad_norm": 83.1161880493164,
"learning_rate": 1.7344254621846017e-07,
"logits/chosen": -0.829738974571228,
"logits/rejected": -0.8198045492172241,
"logps/chosen": -397.2244567871094,
"logps/ref_chosen": -320.6492004394531,
"logps/ref_rejected": -273.36773681640625,
"logps/rejected": -395.69866943359375,
"loss": 4.3876,
"margin_dpo/margin_mean": 45.75574493408203,
"margin_dpo/margin_std": 69.27189636230469,
"step": 306
},
{
"epoch": 0.6429319371727749,
"fcm_dpo/beta": 0.01028377190232277,
"fcm_dpo/delta": -0.021565284579992294,
"fcm_dpo/margin": 52.735225677490234,
"fcm_dpo/q_t": 0.3810930848121643,
"grad_norm": 123.105224609375,
"learning_rate": 1.717018039327053e-07,
"logits/chosen": -0.7636174559593201,
"logits/rejected": -0.8098389506340027,
"logps/chosen": -374.28753662109375,
"logps/ref_chosen": -279.4541931152344,
"logps/ref_rejected": -240.3796844482422,
"logps/rejected": -387.9482421875,
"loss": 4.1453,
"margin_dpo/margin_mean": 52.73522186279297,
"margin_dpo/margin_std": 66.00975036621094,
"step": 307
},
{
"epoch": 0.6450261780104712,
"fcm_dpo/beta": 0.010600929148495197,
"fcm_dpo/delta": 0.12419873476028442,
"fcm_dpo/margin": 40.71012878417969,
"fcm_dpo/q_t": 0.4081202447414398,
"grad_norm": 94.15335083007812,
"learning_rate": 1.699652605415828e-07,
"logits/chosen": -0.8173856735229492,
"logits/rejected": -0.8383054137229919,
"logps/chosen": -395.36480712890625,
"logps/ref_chosen": -296.598388671875,
"logps/ref_rejected": -258.6953430175781,
"logps/rejected": -398.17193603515625,
"loss": 4.5984,
"margin_dpo/margin_mean": 40.71012878417969,
"margin_dpo/margin_std": 74.02152252197266,
"step": 308
},
{
"epoch": 0.6471204188481675,
"fcm_dpo/beta": 0.011164986528456211,
"fcm_dpo/delta": -0.02332584373652935,
"fcm_dpo/margin": 55.65196990966797,
"fcm_dpo/q_t": 0.36911213397979736,
"grad_norm": 91.83976745605469,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": -0.8137606978416443,
"logits/rejected": -0.8284541368484497,
"logps/chosen": -378.19000244140625,
"logps/ref_chosen": -281.3881530761719,
"logps/ref_rejected": -262.458740234375,
"logps/rejected": -414.91259765625,
"loss": 4.0289,
"margin_dpo/margin_mean": 55.6519775390625,
"margin_dpo/margin_std": 72.8603286743164,
"step": 309
},
{
"epoch": 0.6492146596858639,
"fcm_dpo/beta": 0.011183914728462696,
"fcm_dpo/delta": 0.004746271297335625,
"fcm_dpo/margin": 53.116363525390625,
"fcm_dpo/q_t": 0.3752599358558655,
"grad_norm": 101.53085327148438,
"learning_rate": 1.6650514271527465e-07,
"logits/chosen": -0.8139233589172363,
"logits/rejected": -0.7922792434692383,
"logps/chosen": -372.5635986328125,
"logps/ref_chosen": -279.1872863769531,
"logps/ref_rejected": -261.8279724121094,
"logps/rejected": -408.3207092285156,
"loss": 4.1434,
"margin_dpo/margin_mean": 53.116371154785156,
"margin_dpo/margin_std": 73.46383666992188,
"step": 310
},
{
"epoch": 0.6513089005235602,
"fcm_dpo/beta": 0.011093046516180038,
"fcm_dpo/delta": 0.00796596985310316,
"fcm_dpo/margin": 53.135868072509766,
"fcm_dpo/q_t": 0.3744713366031647,
"grad_norm": 133.24725341796875,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": -0.8096467852592468,
"logits/rejected": -0.7938026785850525,
"logps/chosen": -367.9974365234375,
"logps/ref_chosen": -271.39813232421875,
"logps/ref_rejected": -266.12701416015625,
"logps/rejected": -415.8622131347656,
"loss": 4.2424,
"margin_dpo/margin_mean": 53.135868072509766,
"margin_dpo/margin_std": 76.9805908203125,
"step": 311
},
{
"epoch": 0.6534031413612565,
"fcm_dpo/beta": 0.01127730030566454,
"fcm_dpo/delta": 0.04617173224687576,
"fcm_dpo/margin": 48.976646423339844,
"fcm_dpo/q_t": 0.3884603679180145,
"grad_norm": 101.93241119384766,
"learning_rate": 1.6306293495205755e-07,
"logits/chosen": -0.815342903137207,
"logits/rejected": -0.8017351627349854,
"logps/chosen": -377.88897705078125,
"logps/ref_chosen": -282.3850402832031,
"logps/ref_rejected": -246.35389709472656,
"logps/rejected": -390.8345031738281,
"loss": 4.5077,
"margin_dpo/margin_mean": 48.976646423339844,
"margin_dpo/margin_std": 85.27238464355469,
"step": 312
},
{
"epoch": 0.6554973821989529,
"fcm_dpo/beta": 0.011504658497869968,
"fcm_dpo/delta": -0.04747847467660904,
"fcm_dpo/margin": 51.269386291503906,
"fcm_dpo/q_t": 0.3808142840862274,
"grad_norm": 90.95819854736328,
"learning_rate": 1.6134877823936607e-07,
"logits/chosen": -0.8608230352401733,
"logits/rejected": -0.8569746613502502,
"logps/chosen": -398.6612548828125,
"logps/ref_chosen": -303.630859375,
"logps/ref_rejected": -273.1156921386719,
"logps/rejected": -419.4154968261719,
"loss": 4.3476,
"margin_dpo/margin_mean": 51.269386291503906,
"margin_dpo/margin_std": 79.70832824707031,
"step": 313
},
{
"epoch": 0.6575916230366492,
"fcm_dpo/beta": 0.011516381986439228,
"fcm_dpo/delta": 0.024132583290338516,
"fcm_dpo/margin": 49.92859649658203,
"fcm_dpo/q_t": 0.3797074556350708,
"grad_norm": 96.51738739013672,
"learning_rate": 1.5963937562265522e-07,
"logits/chosen": -0.8718076348304749,
"logits/rejected": -0.8581203818321228,
"logps/chosen": -393.2103576660156,
"logps/ref_chosen": -302.3042907714844,
"logps/ref_rejected": -273.6416015625,
"logps/rejected": -414.4762878417969,
"loss": 4.2219,
"margin_dpo/margin_mean": 49.92859649658203,
"margin_dpo/margin_std": 72.24923706054688,
"step": 314
},
{
"epoch": 0.6596858638743456,
"fcm_dpo/beta": 0.011163339018821716,
"fcm_dpo/delta": -0.057650674134492874,
"fcm_dpo/margin": 58.469234466552734,
"fcm_dpo/q_t": 0.36371082067489624,
"grad_norm": 90.03949737548828,
"learning_rate": 1.5793481877199943e-07,
"logits/chosen": -0.8473063707351685,
"logits/rejected": -0.8344524502754211,
"logps/chosen": -393.37115478515625,
"logps/ref_chosen": -302.729248046875,
"logps/ref_rejected": -270.26910400390625,
"logps/rejected": -419.38018798828125,
"loss": 4.0094,
"margin_dpo/margin_mean": 58.469234466552734,
"margin_dpo/margin_std": 74.8115005493164,
"step": 315
},
{
"epoch": 0.6617801047120419,
"fcm_dpo/beta": 0.010703526437282562,
"fcm_dpo/delta": -0.009762253612279892,
"fcm_dpo/margin": 56.77616500854492,
"fcm_dpo/q_t": 0.37491375207901,
"grad_norm": 81.0658187866211,
"learning_rate": 1.562351990976095e-07,
"logits/chosen": -0.8620774745941162,
"logits/rejected": -0.853934109210968,
"logps/chosen": -398.0158386230469,
"logps/ref_chosen": -310.5706481933594,
"logps/ref_rejected": -272.9354553222656,
"logps/rejected": -417.1568603515625,
"loss": 4.1512,
"margin_dpo/margin_mean": 56.77616882324219,
"margin_dpo/margin_std": 80.87450408935547,
"step": 316
},
{
"epoch": 0.6638743455497382,
"fcm_dpo/beta": 0.010788942687213421,
"fcm_dpo/delta": 0.025753259658813477,
"fcm_dpo/margin": 53.33159637451172,
"fcm_dpo/q_t": 0.3748745322227478,
"grad_norm": 80.9461898803711,
"learning_rate": 1.5454060774493065e-07,
"logits/chosen": -0.8560656309127808,
"logits/rejected": -0.8267807960510254,
"logps/chosen": -326.962890625,
"logps/ref_chosen": -253.90036010742188,
"logps/ref_rejected": -218.74078369140625,
"logps/rejected": -345.1348571777344,
"loss": 4.0694,
"margin_dpo/margin_mean": 53.33160400390625,
"margin_dpo/margin_std": 67.30595397949219,
"step": 317
},
{
"epoch": 0.6659685863874345,
"fcm_dpo/beta": 0.010560003109276295,
"fcm_dpo/delta": -0.023112213239073753,
"fcm_dpo/margin": 58.614662170410156,
"fcm_dpo/q_t": 0.36647799611091614,
"grad_norm": 80.73484802246094,
"learning_rate": 1.5285113558975427e-07,
"logits/chosen": -0.8801468014717102,
"logits/rejected": -0.8469870686531067,
"logps/chosen": -352.06805419921875,
"logps/ref_chosen": -270.8228759765625,
"logps/ref_rejected": -255.30972290039062,
"logps/rejected": -395.1695556640625,
"loss": 3.9782,
"margin_dpo/margin_mean": 58.61466979980469,
"margin_dpo/margin_std": 70.51625061035156,
"step": 318
},
{
"epoch": 0.6680628272251309,
"fcm_dpo/beta": 0.010418823920190334,
"fcm_dpo/delta": -0.0004974156618118286,
"fcm_dpo/margin": 57.531654357910156,
"fcm_dpo/q_t": 0.3701345920562744,
"grad_norm": 105.95365142822266,
"learning_rate": 1.5116687323334464e-07,
"logits/chosen": -0.8481382131576538,
"logits/rejected": -0.8254935145378113,
"logps/chosen": -388.3399658203125,
"logps/ref_chosen": -301.0028076171875,
"logps/ref_rejected": -242.39002990722656,
"logps/rejected": -387.2587890625,
"loss": 3.9928,
"margin_dpo/margin_mean": 57.531654357910156,
"margin_dpo/margin_std": 70.07221221923828,
"step": 319
},
{
"epoch": 0.6701570680628273,
"fcm_dpo/beta": 0.010746842250227928,
"fcm_dpo/delta": 0.03297354653477669,
"fcm_dpo/margin": 52.7890739440918,
"fcm_dpo/q_t": 0.3829456567764282,
"grad_norm": 107.80091857910156,
"learning_rate": 1.4948791099758052e-07,
"logits/chosen": -0.8255133032798767,
"logits/rejected": -0.8311203718185425,
"logps/chosen": -385.2088928222656,
"logps/ref_chosen": -303.6225891113281,
"logps/ref_rejected": -280.85174560546875,
"logps/rejected": -415.2270812988281,
"loss": 4.3689,
"margin_dpo/margin_mean": 52.78907012939453,
"margin_dpo/margin_std": 85.84711456298828,
"step": 320
},
{
"epoch": 0.6722513089005235,
"fcm_dpo/beta": 0.011272162199020386,
"fcm_dpo/delta": 0.03078434243798256,
"fcm_dpo/margin": 40.83373260498047,
"fcm_dpo/q_t": 0.40626707673072815,
"grad_norm": 103.12840270996094,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": -0.8609663248062134,
"logits/rejected": -0.8327873349189758,
"logps/chosen": -380.1144714355469,
"logps/ref_chosen": -288.98583984375,
"logps/ref_rejected": -241.1822052001953,
"logps/rejected": -373.1446228027344,
"loss": 4.6024,
"margin_dpo/margin_mean": 40.8337287902832,
"margin_dpo/margin_std": 76.73638916015625,
"step": 321
},
{
"epoch": 0.6743455497382199,
"fcm_dpo/beta": 0.011492523364722729,
"fcm_dpo/delta": -0.0035615600645542145,
"fcm_dpo/margin": 52.34375,
"fcm_dpo/q_t": 0.3778424561023712,
"grad_norm": 95.56451416015625,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": -0.9015979766845703,
"logits/rejected": -0.8630591630935669,
"logps/chosen": -400.4721374511719,
"logps/ref_chosen": -308.54345703125,
"logps/ref_rejected": -269.7995910644531,
"logps/rejected": -414.0719909667969,
"loss": 4.2084,
"margin_dpo/margin_mean": 52.343746185302734,
"margin_dpo/margin_std": 78.392333984375,
"step": 322
},
{
"epoch": 0.6764397905759162,
"fcm_dpo/beta": 0.011814561672508717,
"fcm_dpo/delta": 0.1254061460494995,
"fcm_dpo/margin": 36.32615661621094,
"fcm_dpo/q_t": 0.41510459780693054,
"grad_norm": 104.00493621826172,
"learning_rate": 1.4448372394055246e-07,
"logits/chosen": -0.8651161789894104,
"logits/rejected": -0.8598443269729614,
"logps/chosen": -372.36126708984375,
"logps/ref_chosen": -282.49365234375,
"logps/ref_rejected": -227.7105255126953,
"logps/rejected": -353.9043273925781,
"loss": 4.8553,
"margin_dpo/margin_mean": 36.32615280151367,
"margin_dpo/margin_std": 81.96444702148438,
"step": 323
},
{
"epoch": 0.6785340314136126,
"fcm_dpo/beta": 0.011333497241139412,
"fcm_dpo/delta": -0.13659581542015076,
"fcm_dpo/margin": 63.554481506347656,
"fcm_dpo/q_t": 0.34813186526298523,
"grad_norm": 92.25740051269531,
"learning_rate": 1.428268596492364e-07,
"logits/chosen": -0.8000403046607971,
"logits/rejected": -0.7981281280517578,
"logps/chosen": -315.97833251953125,
"logps/ref_chosen": -239.33836364746094,
"logps/ref_rejected": -230.53775024414062,
"logps/rejected": -370.7322082519531,
"loss": 3.7578,
"margin_dpo/margin_mean": 63.55448913574219,
"margin_dpo/margin_std": 70.8255386352539,
"step": 324
},
{
"epoch": 0.680628272251309,
"fcm_dpo/beta": 0.011075211688876152,
"fcm_dpo/delta": -0.027800805866718292,
"fcm_dpo/margin": 51.91961669921875,
"fcm_dpo/q_t": 0.3847375512123108,
"grad_norm": 110.48858642578125,
"learning_rate": 1.4117574272818386e-07,
"logits/chosen": -0.8060468435287476,
"logits/rejected": -0.7908115386962891,
"logps/chosen": -370.1428527832031,
"logps/ref_chosen": -280.62896728515625,
"logps/ref_rejected": -270.5085754394531,
"logps/rejected": -411.9421081542969,
"loss": 4.3861,
"margin_dpo/margin_mean": 51.91961669921875,
"margin_dpo/margin_std": 84.64945220947266,
"step": 325
},
{
"epoch": 0.6827225130890052,
"fcm_dpo/beta": 0.011209266260266304,
"fcm_dpo/delta": 0.03459738567471504,
"fcm_dpo/margin": 50.52565002441406,
"fcm_dpo/q_t": 0.38198092579841614,
"grad_norm": 110.66674041748047,
"learning_rate": 1.3953046172178413e-07,
"logits/chosen": -0.9141020178794861,
"logits/rejected": -0.9032143950462341,
"logps/chosen": -322.02789306640625,
"logps/ref_chosen": -240.9871368408203,
"logps/ref_rejected": -261.0238342285156,
"logps/rejected": -392.5902099609375,
"loss": 4.2869,
"margin_dpo/margin_mean": 50.52565002441406,
"margin_dpo/margin_std": 77.59326934814453,
"step": 326
},
{
"epoch": 0.6848167539267016,
"fcm_dpo/beta": 0.01094572339206934,
"fcm_dpo/delta": -0.0513957142829895,
"fcm_dpo/margin": 59.012569427490234,
"fcm_dpo/q_t": 0.3640768527984619,
"grad_norm": 78.67206573486328,
"learning_rate": 1.3789110486146468e-07,
"logits/chosen": -0.8713995218276978,
"logits/rejected": -0.851097822189331,
"logps/chosen": -351.3957824707031,
"logps/ref_chosen": -279.52001953125,
"logps/ref_rejected": -269.51824951171875,
"logps/rejected": -400.40655517578125,
"loss": 3.9834,
"margin_dpo/margin_mean": 59.012569427490234,
"margin_dpo/margin_std": 74.85302734375,
"step": 327
},
{
"epoch": 0.6869109947643979,
"fcm_dpo/beta": 0.010611481964588165,
"fcm_dpo/delta": 0.034625254571437836,
"fcm_dpo/margin": 53.36151123046875,
"fcm_dpo/q_t": 0.3801298141479492,
"grad_norm": 102.40028381347656,
"learning_rate": 1.362577600609588e-07,
"logits/chosen": -0.8238348960876465,
"logits/rejected": -0.8258199691772461,
"logps/chosen": -384.0259094238281,
"logps/ref_chosen": -301.033447265625,
"logps/ref_rejected": -284.2101135253906,
"logps/rejected": -420.5640869140625,
"loss": 4.1232,
"margin_dpo/margin_mean": 53.36151123046875,
"margin_dpo/margin_std": 69.19473266601562,
"step": 328
},
{
"epoch": 0.6890052356020943,
"fcm_dpo/beta": 0.011005845852196217,
"fcm_dpo/delta": -0.0060157435946166515,
"fcm_dpo/margin": 54.90179443359375,
"fcm_dpo/q_t": 0.3814074397087097,
"grad_norm": 118.011962890625,
"learning_rate": 1.3463051491159093e-07,
"logits/chosen": -0.8349476456642151,
"logits/rejected": -0.8108065128326416,
"logps/chosen": -408.7464904785156,
"logps/ref_chosen": -319.9888610839844,
"logps/ref_rejected": -307.5588684082031,
"logps/rejected": -451.2182312011719,
"loss": 4.2707,
"margin_dpo/margin_mean": 54.901790618896484,
"margin_dpo/margin_std": 86.18560791015625,
"step": 329
},
{
"epoch": 0.6910994764397905,
"fcm_dpo/beta": 0.011318149045109749,
"fcm_dpo/delta": 0.04677288234233856,
"fcm_dpo/margin": 48.92704391479492,
"fcm_dpo/q_t": 0.3829770088195801,
"grad_norm": 114.82202911376953,
"learning_rate": 1.3300945667758012e-07,
"logits/chosen": -0.8282672166824341,
"logits/rejected": -0.8419229388237,
"logps/chosen": -387.9001770019531,
"logps/ref_chosen": -301.11474609375,
"logps/ref_rejected": -299.673095703125,
"logps/rejected": -435.3855895996094,
"loss": 4.2182,
"margin_dpo/margin_mean": 48.92704391479492,
"margin_dpo/margin_std": 70.02067565917969,
"step": 330
},
{
"epoch": 0.6931937172774869,
"fcm_dpo/beta": 0.011273819953203201,
"fcm_dpo/delta": 0.006491330452263355,
"fcm_dpo/margin": 52.56464385986328,
"fcm_dpo/q_t": 0.3814270496368408,
"grad_norm": 102.59062957763672,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.8695875406265259,
"logits/rejected": -0.8564931750297546,
"logps/chosen": -357.00396728515625,
"logps/ref_chosen": -277.59149169921875,
"logps/ref_rejected": -256.025634765625,
"logps/rejected": -388.00274658203125,
"loss": 4.3351,
"margin_dpo/margin_mean": 52.56464385986328,
"margin_dpo/margin_std": 86.2286148071289,
"step": 331
},
{
"epoch": 0.6952879581151833,
"fcm_dpo/beta": 0.011159934103488922,
"fcm_dpo/delta": -0.007100608199834824,
"fcm_dpo/margin": 54.201053619384766,
"fcm_dpo/q_t": 0.37773868441581726,
"grad_norm": 123.88658142089844,
"learning_rate": 1.2978624834891626e-07,
"logits/chosen": -0.8522534966468811,
"logits/rejected": -0.829330563545227,
"logps/chosen": -352.5909729003906,
"logps/ref_chosen": -269.97369384765625,
"logps/ref_rejected": -235.03164672851562,
"logps/rejected": -371.8499450683594,
"loss": 4.2557,
"margin_dpo/margin_mean": 54.201053619384766,
"margin_dpo/margin_std": 82.71824645996094,
"step": 332
},
{
"epoch": 0.6973821989528796,
"fcm_dpo/beta": 0.01151346042752266,
"fcm_dpo/delta": 0.005112664774060249,
"fcm_dpo/margin": 47.8289680480957,
"fcm_dpo/q_t": 0.3855535686016083,
"grad_norm": 110.88188171386719,
"learning_rate": 1.281842711051438e-07,
"logits/chosen": -0.9249609112739563,
"logits/rejected": -0.8914788961410522,
"logps/chosen": -380.4714050292969,
"logps/ref_chosen": -296.76300048828125,
"logps/ref_rejected": -265.97991943359375,
"logps/rejected": -397.517333984375,
"loss": 4.2389,
"margin_dpo/margin_mean": 47.8289680480957,
"margin_dpo/margin_std": 70.54847717285156,
"step": 333
},
{
"epoch": 0.6994764397905759,
"fcm_dpo/beta": 0.011438943445682526,
"fcm_dpo/delta": -0.05398944765329361,
"fcm_dpo/margin": 56.70977783203125,
"fcm_dpo/q_t": 0.3660760521888733,
"grad_norm": 113.78327178955078,
"learning_rate": 1.2658882646922033e-07,
"logits/chosen": -0.8433147668838501,
"logits/rejected": -0.8184890747070312,
"logps/chosen": -379.4197692871094,
"logps/ref_chosen": -301.0367431640625,
"logps/ref_rejected": -268.87652587890625,
"logps/rejected": -403.96929931640625,
"loss": 4.0958,
"margin_dpo/margin_mean": 56.709781646728516,
"margin_dpo/margin_std": 76.3214111328125,
"step": 334
},
{
"epoch": 0.7015706806282722,
"fcm_dpo/beta": 0.01065311674028635,
"fcm_dpo/delta": 0.0019375793635845184,
"fcm_dpo/margin": 55.916114807128906,
"fcm_dpo/q_t": 0.37777969241142273,
"grad_norm": 140.96397399902344,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": -0.8295958042144775,
"logits/rejected": -0.8266909122467041,
"logps/chosen": -366.25634765625,
"logps/ref_chosen": -276.13275146484375,
"logps/ref_rejected": -243.44203186035156,
"logps/rejected": -389.481689453125,
"loss": 4.2603,
"margin_dpo/margin_mean": 55.916114807128906,
"margin_dpo/margin_std": 83.37496185302734,
"step": 335
},
{
"epoch": 0.7036649214659686,
"fcm_dpo/beta": 0.010925454087555408,
"fcm_dpo/delta": -0.0008706990629434586,
"fcm_dpo/margin": 50.11119842529297,
"fcm_dpo/q_t": 0.39162981510162354,
"grad_norm": 105.25801086425781,
"learning_rate": 1.2341787690142435e-07,
"logits/chosen": -0.8540661931037903,
"logits/rejected": -0.7894359230995178,
"logps/chosen": -337.92779541015625,
"logps/ref_chosen": -246.2626495361328,
"logps/ref_rejected": -261.0617980957031,
"logps/rejected": -402.8382263183594,
"loss": 4.3595,
"margin_dpo/margin_mean": 50.11119842529297,
"margin_dpo/margin_std": 81.72027587890625,
"step": 336
},
{
"epoch": 0.7057591623036649,
"fcm_dpo/beta": 0.010866876691579819,
"fcm_dpo/delta": -0.06778120994567871,
"fcm_dpo/margin": 61.01750946044922,
"fcm_dpo/q_t": 0.3631795048713684,
"grad_norm": 92.77830505371094,
"learning_rate": 1.2184254201795363e-07,
"logits/chosen": -0.8569778800010681,
"logits/rejected": -0.8286029696464539,
"logps/chosen": -350.7175598144531,
"logps/ref_chosen": -266.9937744140625,
"logps/ref_rejected": -253.015625,
"logps/rejected": -397.7568359375,
"loss": 3.9651,
"margin_dpo/margin_mean": 61.017513275146484,
"margin_dpo/margin_std": 77.8094482421875,
"step": 337
},
{
"epoch": 0.7078534031413612,
"fcm_dpo/beta": 0.010680442675948143,
"fcm_dpo/delta": 0.032451678067445755,
"fcm_dpo/margin": 53.09748458862305,
"fcm_dpo/q_t": 0.38340330123901367,
"grad_norm": 111.03559875488281,
"learning_rate": 1.202740798300168e-07,
"logits/chosen": -0.8789874911308289,
"logits/rejected": -0.8605346083641052,
"logps/chosen": -357.2398376464844,
"logps/ref_chosen": -276.5925598144531,
"logps/ref_rejected": -233.979248046875,
"logps/rejected": -367.7239990234375,
"loss": 4.2707,
"margin_dpo/margin_mean": 53.09748458862305,
"margin_dpo/margin_std": 80.79719543457031,
"step": 338
},
{
"epoch": 0.7099476439790576,
"fcm_dpo/beta": 0.010629120282828808,
"fcm_dpo/delta": -0.029368996620178223,
"fcm_dpo/margin": 58.94451904296875,
"fcm_dpo/q_t": 0.3694709241390228,
"grad_norm": 96.47938537597656,
"learning_rate": 1.1871257444948096e-07,
"logits/chosen": -0.8932757377624512,
"logits/rejected": -0.8851479291915894,
"logps/chosen": -391.7145690917969,
"logps/ref_chosen": -303.5277404785156,
"logps/ref_rejected": -283.11676025390625,
"logps/rejected": -430.2481689453125,
"loss": 4.143,
"margin_dpo/margin_mean": 58.944515228271484,
"margin_dpo/margin_std": 83.4503402709961,
"step": 339
},
{
"epoch": 0.7120418848167539,
"fcm_dpo/beta": 0.01047454308718443,
"fcm_dpo/delta": -0.005283636972308159,
"fcm_dpo/margin": 52.87586975097656,
"fcm_dpo/q_t": 0.3890461325645447,
"grad_norm": 129.6068878173828,
"learning_rate": 1.1715810961514072e-07,
"logits/chosen": -0.8445842266082764,
"logits/rejected": -0.8434731364250183,
"logps/chosen": -353.9705505371094,
"logps/ref_chosen": -261.5257568359375,
"logps/ref_rejected": -259.39862060546875,
"logps/rejected": -404.71923828125,
"loss": 4.4994,
"margin_dpo/margin_mean": 52.87586975097656,
"margin_dpo/margin_std": 93.22081756591797,
"step": 340
},
{
"epoch": 0.7141361256544503,
"fcm_dpo/beta": 0.010717066004872322,
"fcm_dpo/delta": 0.0799395889043808,
"fcm_dpo/margin": 40.81736373901367,
"fcm_dpo/q_t": 0.41147974133491516,
"grad_norm": 151.6585235595703,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": -0.8664836883544922,
"logits/rejected": -0.8353126645088196,
"logps/chosen": -425.8817443847656,
"logps/ref_chosen": -315.903564453125,
"logps/ref_rejected": -308.02392578125,
"logps/rejected": -458.81951904296875,
"loss": 4.8832,
"margin_dpo/margin_mean": 40.81736373901367,
"margin_dpo/margin_std": 89.86011505126953,
"step": 341
},
{
"epoch": 0.7162303664921466,
"fcm_dpo/beta": 0.011261941865086555,
"fcm_dpo/delta": -0.030568070709705353,
"fcm_dpo/margin": 55.79458999633789,
"fcm_dpo/q_t": 0.3656637966632843,
"grad_norm": 94.8580322265625,
"learning_rate": 1.1407063464793965e-07,
"logits/chosen": -0.8575106859207153,
"logits/rejected": -0.8551716208457947,
"logps/chosen": -355.30908203125,
"logps/ref_chosen": -269.17864990234375,
"logps/ref_rejected": -260.8977355957031,
"logps/rejected": -402.82281494140625,
"loss": 4.0376,
"margin_dpo/margin_mean": 55.794593811035156,
"margin_dpo/margin_std": 71.31321716308594,
"step": 342
},
{
"epoch": 0.7183246073298429,
"fcm_dpo/beta": 0.010942941531538963,
"fcm_dpo/delta": 0.05468939617276192,
"fcm_dpo/margin": 50.11905288696289,
"fcm_dpo/q_t": 0.38805603981018066,
"grad_norm": 104.98192596435547,
"learning_rate": 1.125377900869913e-07,
"logits/chosen": -0.844852864742279,
"logits/rejected": -0.8284053206443787,
"logps/chosen": -401.84783935546875,
"logps/ref_chosen": -310.719970703125,
"logps/ref_rejected": -263.5224914550781,
"logps/rejected": -404.7693786621094,
"loss": 4.322,
"margin_dpo/margin_mean": 50.11905288696289,
"margin_dpo/margin_std": 79.16139221191406,
"step": 343
},
{
"epoch": 0.7204188481675393,
"fcm_dpo/beta": 0.011533169075846672,
"fcm_dpo/delta": -0.019078608602285385,
"fcm_dpo/margin": 53.341590881347656,
"fcm_dpo/q_t": 0.37309640645980835,
"grad_norm": 124.94219970703125,
"learning_rate": 1.110123172071844e-07,
"logits/chosen": -0.8453131914138794,
"logits/rejected": -0.8294092416763306,
"logps/chosen": -394.9805603027344,
"logps/ref_chosen": -301.7999267578125,
"logps/ref_rejected": -257.9061584472656,
"logps/rejected": -404.42840576171875,
"loss": 4.2305,
"margin_dpo/margin_mean": 53.341590881347656,
"margin_dpo/margin_std": 78.44779205322266,
"step": 344
},
{
"epoch": 0.7225130890052356,
"fcm_dpo/beta": 0.011175896972417831,
"fcm_dpo/delta": 0.020824579522013664,
"fcm_dpo/margin": 47.329490661621094,
"fcm_dpo/q_t": 0.3897257447242737,
"grad_norm": 132.1914520263672,
"learning_rate": 1.09494297815e-07,
"logits/chosen": -0.8442228436470032,
"logits/rejected": -0.8435863256454468,
"logps/chosen": -374.52606201171875,
"logps/ref_chosen": -283.0184326171875,
"logps/ref_rejected": -266.8457336425781,
"logps/rejected": -405.68280029296875,
"loss": 4.2956,
"margin_dpo/margin_mean": 47.32949447631836,
"margin_dpo/margin_std": 67.64356231689453,
"step": 345
},
{
"epoch": 0.724607329842932,
"fcm_dpo/beta": 0.011194109916687012,
"fcm_dpo/delta": -0.06004277244210243,
"fcm_dpo/margin": 58.3812141418457,
"fcm_dpo/q_t": 0.36269640922546387,
"grad_norm": 88.73771667480469,
"learning_rate": 1.0798381331721107e-07,
"logits/chosen": -0.9335488080978394,
"logits/rejected": -0.8848499059677124,
"logps/chosen": -364.63702392578125,
"logps/ref_chosen": -268.44122314453125,
"logps/ref_rejected": -227.8225860595703,
"logps/rejected": -382.3995666503906,
"loss": 4.0986,
"margin_dpo/margin_mean": 58.3812141418457,
"margin_dpo/margin_std": 77.73462677001953,
"step": 346
},
{
"epoch": 0.7267015706806282,
"fcm_dpo/beta": 0.010750826448202133,
"fcm_dpo/delta": -0.0014726296067237854,
"fcm_dpo/margin": 51.17145538330078,
"fcm_dpo/q_t": 0.38269051909446716,
"grad_norm": 98.32434844970703,
"learning_rate": 1.0648094471651722e-07,
"logits/chosen": -0.7837051749229431,
"logits/rejected": -0.8113095760345459,
"logps/chosen": -362.5570068359375,
"logps/ref_chosen": -273.70355224609375,
"logps/ref_rejected": -243.65521240234375,
"logps/rejected": -383.6800842285156,
"loss": 4.2541,
"margin_dpo/margin_mean": 51.17145538330078,
"margin_dpo/margin_std": 73.79896545410156,
"step": 347
},
{
"epoch": 0.7287958115183246,
"fcm_dpo/beta": 0.011453090235590935,
"fcm_dpo/delta": 0.10326485335826874,
"fcm_dpo/margin": 43.56976318359375,
"fcm_dpo/q_t": 0.40140581130981445,
"grad_norm": 90.84459686279297,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": -0.8765660524368286,
"logits/rejected": -0.8619852066040039,
"logps/chosen": -373.6608581542969,
"logps/ref_chosen": -285.64141845703125,
"logps/ref_rejected": -265.6270446777344,
"logps/rejected": -397.2162780761719,
"loss": 4.482,
"margin_dpo/margin_mean": 43.56976318359375,
"margin_dpo/margin_std": 76.03251647949219,
"step": 348
},
{
"epoch": 0.7308900523560209,
"fcm_dpo/beta": 0.011219880543649197,
"fcm_dpo/delta": -0.10234306752681732,
"fcm_dpo/margin": 61.77885055541992,
"fcm_dpo/q_t": 0.36026033759117126,
"grad_norm": 176.53419494628906,
"learning_rate": 1.0349837717080347e-07,
"logits/chosen": -0.8281702399253845,
"logits/rejected": -0.8236852884292603,
"logps/chosen": -416.912353515625,
"logps/ref_chosen": -328.3175048828125,
"logps/ref_rejected": -292.37872314453125,
"logps/rejected": -442.7524108886719,
"loss": 4.0555,
"margin_dpo/margin_mean": 61.77885055541992,
"margin_dpo/margin_std": 84.70580291748047,
"step": 349
},
{
"epoch": 0.7329842931937173,
"fcm_dpo/beta": 0.011043412610888481,
"fcm_dpo/delta": 0.0056533366441726685,
"fcm_dpo/margin": 49.282676696777344,
"fcm_dpo/q_t": 0.3879699110984802,
"grad_norm": 110.3293685913086,
"learning_rate": 1.0201883817182949e-07,
"logits/chosen": -0.8206266164779663,
"logits/rejected": -0.8350641131401062,
"logps/chosen": -391.7946472167969,
"logps/ref_chosen": -292.8046569824219,
"logps/ref_rejected": -250.35504150390625,
"logps/rejected": -398.62774658203125,
"loss": 4.4195,
"margin_dpo/margin_mean": 49.28268051147461,
"margin_dpo/margin_std": 81.72764587402344,
"step": 350
},
{
"epoch": 0.7350785340314137,
"fcm_dpo/beta": 0.01127422321587801,
"fcm_dpo/delta": 0.05659697949886322,
"fcm_dpo/margin": 36.060794830322266,
"fcm_dpo/q_t": 0.4199449419975281,
"grad_norm": 131.51641845703125,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": -0.8776383399963379,
"logits/rejected": -0.8651461005210876,
"logps/chosen": -403.58026123046875,
"logps/ref_chosen": -311.8890380859375,
"logps/ref_rejected": -263.59033203125,
"logps/rejected": -391.34234619140625,
"loss": 4.965,
"margin_dpo/margin_mean": 36.060794830322266,
"margin_dpo/margin_std": 87.43182373046875,
"step": 351
},
{
"epoch": 0.7371727748691099,
"fcm_dpo/beta": 0.010802392847836018,
"fcm_dpo/delta": -0.10912173241376877,
"fcm_dpo/margin": 64.70912170410156,
"fcm_dpo/q_t": 0.35592180490493774,
"grad_norm": 101.48279571533203,
"learning_rate": 9.908364643332398e-08,
"logits/chosen": -0.8236813545227051,
"logits/rejected": -0.7954239249229431,
"logps/chosen": -339.6770935058594,
"logps/ref_chosen": -254.9078826904297,
"logps/ref_rejected": -257.1688232421875,
"logps/rejected": -406.64715576171875,
"loss": 3.9846,
"margin_dpo/margin_mean": 64.70912170410156,
"margin_dpo/margin_std": 82.60865783691406,
"step": 352
},
{
"epoch": 0.7392670157068063,
"fcm_dpo/beta": 0.010326343588531017,
"fcm_dpo/delta": 0.011817870661616325,
"fcm_dpo/margin": 50.87775802612305,
"fcm_dpo/q_t": 0.3918360471725464,
"grad_norm": 117.75369262695312,
"learning_rate": 9.76281510992176e-08,
"logits/chosen": -0.8311276435852051,
"logits/rejected": -0.8247827291488647,
"logps/chosen": -364.5557556152344,
"logps/ref_chosen": -270.3760681152344,
"logps/ref_rejected": -264.65234375,
"logps/rejected": -409.7098388671875,
"loss": 4.4262,
"margin_dpo/margin_mean": 50.87776184082031,
"margin_dpo/margin_std": 83.41526794433594,
"step": 353
},
{
"epoch": 0.7413612565445026,
"fcm_dpo/beta": 0.010943200439214706,
"fcm_dpo/delta": 0.10599180310964584,
"fcm_dpo/margin": 36.72353744506836,
"fcm_dpo/q_t": 0.41883280873298645,
"grad_norm": 119.48013305664062,
"learning_rate": 9.618082700494318e-08,
"logits/chosen": -0.8320400714874268,
"logits/rejected": -0.866357684135437,
"logps/chosen": -354.59100341796875,
"logps/ref_chosen": -257.6485595703125,
"logps/ref_rejected": -246.94203186035156,
"logps/rejected": -380.6080322265625,
"loss": 4.8811,
"margin_dpo/margin_mean": 36.723541259765625,
"margin_dpo/margin_std": 83.38043212890625,
"step": 354
},
{
"epoch": 0.743455497382199,
"fcm_dpo/beta": 0.01037515327334404,
"fcm_dpo/delta": -0.1262883096933365,
"fcm_dpo/margin": 62.35184860229492,
"fcm_dpo/q_t": 0.36492300033569336,
"grad_norm": 113.33094024658203,
"learning_rate": 9.474175176609956e-08,
"logits/chosen": -0.875731348991394,
"logits/rejected": -0.8765732049942017,
"logps/chosen": -382.8927307128906,
"logps/ref_chosen": -293.35333251953125,
"logps/ref_rejected": -275.6051940917969,
"logps/rejected": -427.4964294433594,
"loss": 4.1582,
"margin_dpo/margin_mean": 62.35184860229492,
"margin_dpo/margin_std": 87.46715545654297,
"step": 355
},
{
"epoch": 0.7455497382198953,
"fcm_dpo/beta": 0.010616269893944263,
"fcm_dpo/delta": 0.06479822099208832,
"fcm_dpo/margin": 40.029476165771484,
"fcm_dpo/q_t": 0.40827593207359314,
"grad_norm": 93.70746612548828,
"learning_rate": 9.331100255592436e-08,
"logits/chosen": -0.8007790446281433,
"logits/rejected": -0.8301706314086914,
"logps/chosen": -291.36260986328125,
"logps/ref_chosen": -204.25550842285156,
"logps/ref_rejected": -213.467529296875,
"logps/rejected": -340.6041259765625,
"loss": 4.5453,
"margin_dpo/margin_mean": 40.02947235107422,
"margin_dpo/margin_std": 66.7158432006836,
"step": 356
},
{
"epoch": 0.7476439790575916,
"fcm_dpo/beta": 0.010492799803614616,
"fcm_dpo/delta": -0.07817438989877701,
"fcm_dpo/margin": 58.730445861816406,
"fcm_dpo/q_t": 0.37493807077407837,
"grad_norm": 100.33387756347656,
"learning_rate": 9.18886561011557e-08,
"logits/chosen": -0.7677896618843079,
"logits/rejected": -0.7687491774559021,
"logps/chosen": -361.63714599609375,
"logps/ref_chosen": -266.3705749511719,
"logps/ref_rejected": -239.04490661621094,
"logps/rejected": -393.0418701171875,
"loss": 4.1916,
"margin_dpo/margin_mean": 58.730445861816406,
"margin_dpo/margin_std": 85.08563995361328,
"step": 357
},
{
"epoch": 0.749738219895288,
"fcm_dpo/beta": 0.010010240599513054,
"fcm_dpo/delta": -0.06146865338087082,
"fcm_dpo/margin": 65.60650634765625,
"fcm_dpo/q_t": 0.361916184425354,
"grad_norm": 97.68511199951172,
"learning_rate": 9.047478867791731e-08,
"logits/chosen": -0.8633837699890137,
"logits/rejected": -0.845342218875885,
"logps/chosen": -383.7676086425781,
"logps/ref_chosen": -299.1474609375,
"logps/ref_rejected": -257.2531433105469,
"logps/rejected": -407.47979736328125,
"loss": 4.0168,
"margin_dpo/margin_mean": 65.60650634765625,
"margin_dpo/margin_std": 84.83085632324219,
"step": 358
},
{
"epoch": 0.7518324607329843,
"fcm_dpo/beta": 0.010214395821094513,
"fcm_dpo/delta": 0.03628703951835632,
"fcm_dpo/margin": 55.01346206665039,
"fcm_dpo/q_t": 0.3792075216770172,
"grad_norm": 102.26093292236328,
"learning_rate": 8.906947610762825e-08,
"logits/chosen": -0.8235169053077698,
"logits/rejected": -0.8387635350227356,
"logps/chosen": -389.9339904785156,
"logps/ref_chosen": -302.99786376953125,
"logps/ref_rejected": -260.4137268066406,
"logps/rejected": -402.36334228515625,
"loss": 4.1236,
"margin_dpo/margin_mean": 55.01346206665039,
"margin_dpo/margin_std": 71.41567993164062,
"step": 359
},
{
"epoch": 0.7539267015706806,
"fcm_dpo/beta": 0.010241111740469933,
"fcm_dpo/delta": 0.05266699939966202,
"fcm_dpo/margin": 48.13507080078125,
"fcm_dpo/q_t": 0.3928843140602112,
"grad_norm": 114.6863784790039,
"learning_rate": 8.76727937529367e-08,
"logits/chosen": -0.8444012403488159,
"logits/rejected": -0.8383079767227173,
"logps/chosen": -404.416259765625,
"logps/ref_chosen": -309.6114501953125,
"logps/ref_rejected": -256.64031982421875,
"logps/rejected": -399.5802001953125,
"loss": 4.4973,
"margin_dpo/margin_mean": 48.13507080078125,
"margin_dpo/margin_std": 80.82567596435547,
"step": 360
},
{
"epoch": 0.7560209424083769,
"fcm_dpo/beta": 0.010225515812635422,
"fcm_dpo/delta": -0.06704443693161011,
"fcm_dpo/margin": 64.82032775878906,
"fcm_dpo/q_t": 0.36452746391296387,
"grad_norm": 99.3719253540039,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.7976441979408264,
"logits/rejected": -0.7765456438064575,
"logps/chosen": -340.08935546875,
"logps/ref_chosen": -263.3797607421875,
"logps/ref_rejected": -271.18157958984375,
"logps/rejected": -412.71148681640625,
"loss": 4.0939,
"margin_dpo/margin_mean": 64.82032775878906,
"margin_dpo/margin_std": 91.12376403808594,
"step": 361
},
{
"epoch": 0.7581151832460733,
"fcm_dpo/beta": 0.009894550777971745,
"fcm_dpo/delta": 0.03964172303676605,
"fcm_dpo/margin": 56.83903503417969,
"fcm_dpo/q_t": 0.3783508837223053,
"grad_norm": 95.44233703613281,
"learning_rate": 8.490561882286135e-08,
"logits/chosen": -0.8215633630752563,
"logits/rejected": -0.816551923751831,
"logps/chosen": -388.55169677734375,
"logps/ref_chosen": -303.2583923339844,
"logps/ref_rejected": -243.22891235351562,
"logps/rejected": -385.3612060546875,
"loss": 4.0905,
"margin_dpo/margin_mean": 56.83903503417969,
"margin_dpo/margin_std": 72.36383056640625,
"step": 362
},
{
"epoch": 0.7602094240837697,
"fcm_dpo/beta": 0.010408826172351837,
"fcm_dpo/delta": 0.03300439193844795,
"fcm_dpo/margin": 54.380584716796875,
"fcm_dpo/q_t": 0.3840283751487732,
"grad_norm": 108.82610321044922,
"learning_rate": 8.353527464267104e-08,
"logits/chosen": -0.8414917588233948,
"logits/rejected": -0.7955522537231445,
"logps/chosen": -395.16302490234375,
"logps/ref_chosen": -303.34722900390625,
"logps/ref_rejected": -262.05419921875,
"logps/rejected": -408.25054931640625,
"loss": 4.3162,
"margin_dpo/margin_mean": 54.38058090209961,
"margin_dpo/margin_std": 84.62854766845703,
"step": 363
},
{
"epoch": 0.762303664921466,
"fcm_dpo/beta": 0.010767980478703976,
"fcm_dpo/delta": 0.10710425674915314,
"fcm_dpo/margin": 46.3648567199707,
"fcm_dpo/q_t": 0.39887574315071106,
"grad_norm": 99.91820526123047,
"learning_rate": 8.217385746050742e-08,
"logits/chosen": -0.7961313724517822,
"logits/rejected": -0.8093927502632141,
"logps/chosen": -395.1819152832031,
"logps/ref_chosen": -285.54376220703125,
"logps/ref_rejected": -284.84619140625,
"logps/rejected": -440.8492736816406,
"loss": 4.6636,
"margin_dpo/margin_mean": 46.3648567199707,
"margin_dpo/margin_std": 89.22390747070312,
"step": 364
},
{
"epoch": 0.7643979057591623,
"fcm_dpo/beta": 0.011136573739349842,
"fcm_dpo/delta": -0.06065363436937332,
"fcm_dpo/margin": 54.53364562988281,
"fcm_dpo/q_t": 0.37902113795280457,
"grad_norm": 102.22779083251953,
"learning_rate": 8.082144028504231e-08,
"logits/chosen": -0.8273108601570129,
"logits/rejected": -0.8318711519241333,
"logps/chosen": -371.9251403808594,
"logps/ref_chosen": -274.7878112792969,
"logps/ref_rejected": -256.5738220214844,
"logps/rejected": -408.2447509765625,
"loss": 4.2396,
"margin_dpo/margin_mean": 54.53364181518555,
"margin_dpo/margin_std": 82.52845764160156,
"step": 365
},
{
"epoch": 0.7664921465968586,
"fcm_dpo/beta": 0.010572044178843498,
"fcm_dpo/delta": -0.05891243368387222,
"fcm_dpo/margin": 61.89867401123047,
"fcm_dpo/q_t": 0.3644544184207916,
"grad_norm": 94.1471176147461,
"learning_rate": 7.947809564230445e-08,
"logits/chosen": -0.7954903841018677,
"logits/rejected": -0.8094490766525269,
"logps/chosen": -376.78997802734375,
"logps/ref_chosen": -286.6496276855469,
"logps/ref_rejected": -251.97140502929688,
"logps/rejected": -404.0104675292969,
"loss": 4.0522,
"margin_dpo/margin_mean": 61.89867401123047,
"margin_dpo/margin_std": 84.70960998535156,
"step": 366
},
{
"epoch": 0.768586387434555,
"fcm_dpo/beta": 0.009961485862731934,
"fcm_dpo/delta": -0.007503882050514221,
"fcm_dpo/margin": 60.758731842041016,
"fcm_dpo/q_t": 0.3712635934352875,
"grad_norm": 103.36516571044922,
"learning_rate": 7.814389557179016e-08,
"logits/chosen": -0.804265022277832,
"logits/rejected": -0.787903368473053,
"logps/chosen": -393.11322021484375,
"logps/ref_chosen": -301.9449768066406,
"logps/ref_rejected": -265.5677185058594,
"logps/rejected": -417.49468994140625,
"loss": 4.0468,
"margin_dpo/margin_mean": 60.758731842041016,
"margin_dpo/margin_std": 77.93997192382812,
"step": 367
},
{
"epoch": 0.7706806282722513,
"fcm_dpo/beta": 0.009893280453979969,
"fcm_dpo/delta": -0.10160201787948608,
"fcm_dpo/margin": 70.12001037597656,
"fcm_dpo/q_t": 0.35026952624320984,
"grad_norm": 73.314697265625,
"learning_rate": 7.681891162260015e-08,
"logits/chosen": -0.789295494556427,
"logits/rejected": -0.8031895160675049,
"logps/chosen": -380.4212646484375,
"logps/ref_chosen": -294.62652587890625,
"logps/ref_rejected": -258.7628479003906,
"logps/rejected": -414.6776123046875,
"loss": 3.7301,
"margin_dpo/margin_mean": 70.12001037597656,
"margin_dpo/margin_std": 73.42273712158203,
"step": 368
},
{
"epoch": 0.7727748691099476,
"fcm_dpo/beta": 0.009725566022098064,
"fcm_dpo/delta": 0.07594747841358185,
"fcm_dpo/margin": 54.26398849487305,
"fcm_dpo/q_t": 0.3867446184158325,
"grad_norm": 94.77143859863281,
"learning_rate": 7.550321484960251e-08,
"logits/chosen": -0.8820152282714844,
"logits/rejected": -0.8652966022491455,
"logps/chosen": -376.5359191894531,
"logps/ref_chosen": -282.5057373046875,
"logps/ref_rejected": -266.41607666015625,
"logps/rejected": -414.71026611328125,
"loss": 4.238,
"margin_dpo/margin_mean": 54.26398849487305,
"margin_dpo/margin_std": 76.04060363769531,
"step": 369
},
{
"epoch": 0.774869109947644,
"fcm_dpo/beta": 0.009759629145264626,
"fcm_dpo/delta": -0.03207730874419212,
"fcm_dpo/margin": 64.38795471191406,
"fcm_dpo/q_t": 0.36700335144996643,
"grad_norm": 83.10556030273438,
"learning_rate": 7.419687580962222e-08,
"logits/chosen": -0.8547401428222656,
"logits/rejected": -0.8778725266456604,
"logps/chosen": -336.8154296875,
"logps/ref_chosen": -251.00640869140625,
"logps/ref_rejected": -238.12542724609375,
"logps/rejected": -388.32232666015625,
"loss": 4.0568,
"margin_dpo/margin_mean": 64.3879623413086,
"margin_dpo/margin_std": 86.20503997802734,
"step": 370
},
{
"epoch": 0.7769633507853403,
"fcm_dpo/beta": 0.010260825045406818,
"fcm_dpo/delta": 0.0865623950958252,
"fcm_dpo/margin": 50.42669677734375,
"fcm_dpo/q_t": 0.3898235559463501,
"grad_norm": 108.6645736694336,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": -0.7909586429595947,
"logits/rejected": -0.7886074781417847,
"logps/chosen": -393.975830078125,
"logps/ref_chosen": -296.6591491699219,
"logps/ref_rejected": -251.14675903320312,
"logps/rejected": -398.89013671875,
"loss": 4.3432,
"margin_dpo/margin_mean": 50.42669677734375,
"margin_dpo/margin_std": 77.26437377929688,
"step": 371
},
{
"epoch": 0.7790575916230367,
"fcm_dpo/beta": 0.010090112686157227,
"fcm_dpo/delta": -0.05242789536714554,
"fcm_dpo/margin": 64.0174331665039,
"fcm_dpo/q_t": 0.3648003935813904,
"grad_norm": 83.54692840576172,
"learning_rate": 7.161255064312283e-08,
"logits/chosen": -0.7713180780410767,
"logits/rejected": -0.7686434984207153,
"logps/chosen": -424.0948181152344,
"logps/ref_chosen": -331.3714599609375,
"logps/ref_rejected": -285.56805419921875,
"logps/rejected": -442.308837890625,
"loss": 4.0493,
"margin_dpo/margin_mean": 64.0174331665039,
"margin_dpo/margin_std": 83.79845428466797,
"step": 372
},
{
"epoch": 0.7811518324607329,
"fcm_dpo/beta": 0.009875521995127201,
"fcm_dpo/delta": -0.005197510123252869,
"fcm_dpo/margin": 61.0987434387207,
"fcm_dpo/q_t": 0.3673868179321289,
"grad_norm": 86.26287078857422,
"learning_rate": 7.033470310611945e-08,
"logits/chosen": -0.858359158039093,
"logits/rejected": -0.8340578675270081,
"logps/chosen": -405.3838806152344,
"logps/ref_chosen": -321.9429931640625,
"logps/ref_rejected": -271.2288513183594,
"logps/rejected": -415.7685546875,
"loss": 4.0013,
"margin_dpo/margin_mean": 61.09874725341797,
"margin_dpo/margin_std": 72.15105438232422,
"step": 373
},
{
"epoch": 0.7832460732984293,
"fcm_dpo/beta": 0.010431567206978798,
"fcm_dpo/delta": 0.08249574154615402,
"fcm_dpo/margin": 49.9046745300293,
"fcm_dpo/q_t": 0.3921506404876709,
"grad_norm": 71.61186218261719,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": -0.8459421396255493,
"logits/rejected": -0.8448758125305176,
"logps/chosen": -409.2577819824219,
"logps/ref_chosen": -319.1685485839844,
"logps/ref_rejected": -284.6263732910156,
"logps/rejected": -424.6202392578125,
"loss": 4.36,
"margin_dpo/margin_mean": 49.9046745300293,
"margin_dpo/margin_std": 79.11766815185547,
"step": 374
},
{
"epoch": 0.7853403141361257,
"fcm_dpo/beta": 0.010909291915595531,
"fcm_dpo/delta": 0.025316692888736725,
"fcm_dpo/margin": 47.739749908447266,
"fcm_dpo/q_t": 0.3932640850543976,
"grad_norm": 96.96272277832031,
"learning_rate": 6.780798075635675e-08,
"logits/chosen": -0.8526198863983154,
"logits/rejected": -0.835331380367279,
"logps/chosen": -412.8765869140625,
"logps/ref_chosen": -314.87579345703125,
"logps/ref_rejected": -259.1965026855469,
"logps/rejected": -404.93707275390625,
"loss": 4.4618,
"margin_dpo/margin_mean": 47.73974609375,
"margin_dpo/margin_std": 81.5127182006836,
"step": 375
},
{
"epoch": 0.787434554973822,
"fcm_dpo/beta": 0.010985768400132656,
"fcm_dpo/delta": -0.007917094975709915,
"fcm_dpo/margin": 55.14532470703125,
"fcm_dpo/q_t": 0.377773642539978,
"grad_norm": 107.2135009765625,
"learning_rate": 6.655924144404906e-08,
"logits/chosen": -0.8210961818695068,
"logits/rejected": -0.8290749788284302,
"logps/chosen": -384.5388488769531,
"logps/ref_chosen": -287.6732482910156,
"logps/ref_rejected": -256.6697082519531,
"logps/rejected": -408.6806335449219,
"loss": 4.2785,
"margin_dpo/margin_mean": 55.14532470703125,
"margin_dpo/margin_std": 85.58231353759766,
"step": 376
},
{
"epoch": 0.7895287958115184,
"fcm_dpo/beta": 0.011254341341555119,
"fcm_dpo/delta": 0.053448669612407684,
"fcm_dpo/margin": 38.42869186401367,
"fcm_dpo/q_t": 0.4102451801300049,
"grad_norm": 113.72765350341797,
"learning_rate": 6.532033950290885e-08,
"logits/chosen": -0.8185837864875793,
"logits/rejected": -0.8221685886383057,
"logps/chosen": -409.1022033691406,
"logps/ref_chosen": -305.261474609375,
"logps/ref_rejected": -271.8887023925781,
"logps/rejected": -414.15814208984375,
"loss": 4.8061,
"margin_dpo/margin_mean": 38.42869186401367,
"margin_dpo/margin_std": 82.16449737548828,
"step": 377
},
{
"epoch": 0.7916230366492146,
"fcm_dpo/beta": 0.011703657917678356,
"fcm_dpo/delta": 0.059528548270463943,
"fcm_dpo/margin": 46.378326416015625,
"fcm_dpo/q_t": 0.38869708776474,
"grad_norm": 111.90191650390625,
"learning_rate": 6.409134137148736e-08,
"logits/chosen": -0.8007180094718933,
"logits/rejected": -0.7878426313400269,
"logps/chosen": -378.7329406738281,
"logps/ref_chosen": -281.5295715332031,
"logps/ref_rejected": -296.980224609375,
"logps/rejected": -440.56195068359375,
"loss": 4.3661,
"margin_dpo/margin_mean": 46.378326416015625,
"margin_dpo/margin_std": 73.78543090820312,
"step": 378
},
{
"epoch": 0.793717277486911,
"fcm_dpo/beta": 0.011767145246267319,
"fcm_dpo/delta": 0.0018080808222293854,
"fcm_dpo/margin": 50.696449279785156,
"fcm_dpo/q_t": 0.38004711270332336,
"grad_norm": 130.11024475097656,
"learning_rate": 6.28723129572247e-08,
"logits/chosen": -0.8714730143547058,
"logits/rejected": -0.8521773815155029,
"logps/chosen": -355.7151794433594,
"logps/ref_chosen": -265.0807800292969,
"logps/ref_rejected": -230.58932495117188,
"logps/rejected": -371.920166015625,
"loss": 4.3454,
"margin_dpo/margin_mean": 50.69645690917969,
"margin_dpo/margin_std": 82.31288146972656,
"step": 379
},
{
"epoch": 0.7958115183246073,
"fcm_dpo/beta": 0.011543155647814274,
"fcm_dpo/delta": -0.09121623635292053,
"fcm_dpo/margin": 53.771484375,
"fcm_dpo/q_t": 0.3734373450279236,
"grad_norm": 122.70896911621094,
"learning_rate": 6.166331963291519e-08,
"logits/chosen": -0.8505301475524902,
"logits/rejected": -0.8327133655548096,
"logps/chosen": -403.8320617675781,
"logps/ref_chosen": -305.90838623046875,
"logps/ref_rejected": -286.5906677246094,
"logps/rejected": -438.2857971191406,
"loss": 4.227,
"margin_dpo/margin_mean": 53.771484375,
"margin_dpo/margin_std": 79.21472930908203,
"step": 380
},
{
"epoch": 0.7979057591623037,
"fcm_dpo/beta": 0.011249177157878876,
"fcm_dpo/delta": -0.024705251678824425,
"fcm_dpo/margin": 55.35076904296875,
"fcm_dpo/q_t": 0.37365224957466125,
"grad_norm": 99.3327865600586,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": -0.820453405380249,
"logits/rejected": -0.7837440967559814,
"logps/chosen": -346.07220458984375,
"logps/ref_chosen": -252.87066650390625,
"logps/ref_rejected": -261.1927490234375,
"logps/rejected": -409.7450256347656,
"loss": 4.1399,
"margin_dpo/margin_mean": 55.35076904296875,
"margin_dpo/margin_std": 79.20867156982422,
"step": 381
},
{
"epoch": 0.8,
"fcm_dpo/beta": 0.010675630532205105,
"fcm_dpo/delta": -0.08816227316856384,
"fcm_dpo/margin": 63.82965087890625,
"fcm_dpo/q_t": 0.3560883402824402,
"grad_norm": 98.31607818603516,
"learning_rate": 5.9275697051098275e-08,
"logits/chosen": -0.8420031070709229,
"logits/rejected": -0.8378089070320129,
"logps/chosen": -379.265380859375,
"logps/ref_chosen": -289.2114562988281,
"logps/ref_rejected": -278.45751953125,
"logps/rejected": -432.3410339355469,
"loss": 3.9075,
"margin_dpo/margin_mean": 63.82964324951172,
"margin_dpo/margin_std": 76.59968566894531,
"step": 382
},
{
"epoch": 0.8020942408376963,
"fcm_dpo/beta": 0.010143190622329712,
"fcm_dpo/delta": -0.026774900034070015,
"fcm_dpo/margin": 56.6756706237793,
"fcm_dpo/q_t": 0.3786071240901947,
"grad_norm": 106.76795959472656,
"learning_rate": 5.809719583454414e-08,
"logits/chosen": -0.8325682282447815,
"logits/rejected": -0.8136028051376343,
"logps/chosen": -362.4136962890625,
"logps/ref_chosen": -273.630859375,
"logps/ref_rejected": -261.44024658203125,
"logps/rejected": -406.8987731933594,
"loss": 4.2121,
"margin_dpo/margin_mean": 56.67566680908203,
"margin_dpo/margin_std": 80.95785522460938,
"step": 383
},
{
"epoch": 0.8041884816753927,
"fcm_dpo/beta": 0.010267859324812889,
"fcm_dpo/delta": 0.051759272813797,
"fcm_dpo/margin": 48.685951232910156,
"fcm_dpo/q_t": 0.39458543062210083,
"grad_norm": 78.74759674072266,
"learning_rate": 5.6928985782982524e-08,
"logits/chosen": -0.8430302739143372,
"logits/rejected": -0.8414285778999329,
"logps/chosen": -368.34307861328125,
"logps/ref_chosen": -274.5699462890625,
"logps/ref_rejected": -285.8253479003906,
"logps/rejected": -428.2843933105469,
"loss": 4.3999,
"margin_dpo/margin_mean": 48.685951232910156,
"margin_dpo/margin_std": 78.47038269042969,
"step": 384
},
{
"epoch": 0.806282722513089,
"fcm_dpo/beta": 0.010413900017738342,
"fcm_dpo/delta": -0.0003421269357204437,
"fcm_dpo/margin": 52.91630935668945,
"fcm_dpo/q_t": 0.38285988569259644,
"grad_norm": 86.21760559082031,
"learning_rate": 5.57711295439732e-08,
"logits/chosen": -0.793001115322113,
"logits/rejected": -0.7949045896530151,
"logps/chosen": -379.3741455078125,
"logps/ref_chosen": -284.150634765625,
"logps/ref_rejected": -244.87921142578125,
"logps/rejected": -393.01904296875,
"loss": 4.1941,
"margin_dpo/margin_mean": 52.91630935668945,
"margin_dpo/margin_std": 73.58924865722656,
"step": 385
},
{
"epoch": 0.8083769633507853,
"fcm_dpo/beta": 0.009639251977205276,
"fcm_dpo/delta": -0.11836303025484085,
"fcm_dpo/margin": 67.22946166992188,
"fcm_dpo/q_t": 0.3605455756187439,
"grad_norm": 85.76860046386719,
"learning_rate": 5.4623689209832484e-08,
"logits/chosen": -0.7818478941917419,
"logits/rejected": -0.7824301719665527,
"logps/chosen": -407.1231994628906,
"logps/ref_chosen": -320.1762390136719,
"logps/ref_rejected": -302.05023193359375,
"logps/rejected": -456.2266540527344,
"loss": 3.9072,
"margin_dpo/margin_mean": 67.22946166992188,
"margin_dpo/margin_std": 75.95755004882812,
"step": 386
},
{
"epoch": 0.8104712041884817,
"fcm_dpo/beta": 0.009522214531898499,
"fcm_dpo/delta": 0.001985335722565651,
"fcm_dpo/margin": 57.08240509033203,
"fcm_dpo/q_t": 0.381357342004776,
"grad_norm": 85.12853240966797,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": -0.8208848237991333,
"logits/rejected": -0.8258309364318848,
"logps/chosen": -366.4295654296875,
"logps/ref_chosen": -272.2801513671875,
"logps/ref_rejected": -265.1615905761719,
"logps/rejected": -416.3934326171875,
"loss": 4.2012,
"margin_dpo/margin_mean": 57.08240509033203,
"margin_dpo/margin_std": 77.8724136352539,
"step": 387
},
{
"epoch": 0.812565445026178,
"fcm_dpo/beta": 0.009763755835592747,
"fcm_dpo/delta": 0.09430886805057526,
"fcm_dpo/margin": 41.966461181640625,
"fcm_dpo/q_t": 0.4138518273830414,
"grad_norm": 107.85652923583984,
"learning_rate": 5.2360301829254745e-08,
"logits/chosen": -0.8028566837310791,
"logits/rejected": -0.7961896657943726,
"logps/chosen": -377.24932861328125,
"logps/ref_chosen": -272.5313415527344,
"logps/ref_rejected": -239.55735778808594,
"logps/rejected": -386.24176025390625,
"loss": 4.7123,
"margin_dpo/margin_mean": 41.966461181640625,
"margin_dpo/margin_std": 82.00777435302734,
"step": 388
},
{
"epoch": 0.8146596858638744,
"fcm_dpo/beta": 0.009949136525392532,
"fcm_dpo/delta": -0.03285611793398857,
"fcm_dpo/margin": 52.38904571533203,
"fcm_dpo/q_t": 0.3916034698486328,
"grad_norm": 86.20325469970703,
"learning_rate": 5.1244476161413806e-08,
"logits/chosen": -0.8366051912307739,
"logits/rejected": -0.8354239463806152,
"logps/chosen": -380.04913330078125,
"logps/ref_chosen": -281.0892639160156,
"logps/ref_rejected": -246.50045776367188,
"logps/rejected": -397.849365234375,
"loss": 4.4031,
"margin_dpo/margin_mean": 52.3890495300293,
"margin_dpo/margin_std": 83.40953063964844,
"step": 389
},
{
"epoch": 0.8167539267015707,
"fcm_dpo/beta": 0.01029281411319971,
"fcm_dpo/delta": 0.03942079097032547,
"fcm_dpo/margin": 54.43703079223633,
"fcm_dpo/q_t": 0.38137805461883545,
"grad_norm": 88.30001831054688,
"learning_rate": 5.013930914912476e-08,
"logits/chosen": -0.8523457646369934,
"logits/rejected": -0.8585647940635681,
"logps/chosen": -382.1541748046875,
"logps/ref_chosen": -283.98748779296875,
"logps/ref_rejected": -283.465087890625,
"logps/rejected": -436.06878662109375,
"loss": 4.2467,
"margin_dpo/margin_mean": 54.43703079223633,
"margin_dpo/margin_std": 79.09922790527344,
"step": 390
},
{
"epoch": 0.818848167539267,
"fcm_dpo/beta": 0.009914442896842957,
"fcm_dpo/delta": -0.028575582429766655,
"fcm_dpo/margin": 57.40606689453125,
"fcm_dpo/q_t": 0.3794897496700287,
"grad_norm": 86.59980010986328,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.8060988187789917,
"logits/rejected": -0.8013872504234314,
"logps/chosen": -389.5373840332031,
"logps/ref_chosen": -283.86138916015625,
"logps/ref_rejected": -263.5093688964844,
"logps/rejected": -426.5914306640625,
"loss": 4.2074,
"margin_dpo/margin_mean": 57.40606689453125,
"margin_dpo/margin_std": 81.51762390136719,
"step": 391
},
{
"epoch": 0.8209424083769633,
"fcm_dpo/beta": 0.009418400004506111,
"fcm_dpo/delta": -0.06252562999725342,
"fcm_dpo/margin": 69.35395812988281,
"fcm_dpo/q_t": 0.3590378761291504,
"grad_norm": 91.34620666503906,
"learning_rate": 4.796118758344353e-08,
"logits/chosen": -0.774043083190918,
"logits/rejected": -0.7974765300750732,
"logps/chosen": -403.4256896972656,
"logps/ref_chosen": -310.070068359375,
"logps/ref_rejected": -252.89817810058594,
"logps/rejected": -415.6076965332031,
"loss": 3.8833,
"margin_dpo/margin_mean": 69.35395812988281,
"margin_dpo/margin_std": 76.41130065917969,
"step": 392
},
{
"epoch": 0.8230366492146597,
"fcm_dpo/beta": 0.010048740543425083,
"fcm_dpo/delta": 0.059518344700336456,
"fcm_dpo/margin": 53.80087661743164,
"fcm_dpo/q_t": 0.38574740290641785,
"grad_norm": 121.00365447998047,
"learning_rate": 4.688834983610082e-08,
"logits/chosen": -0.8275657296180725,
"logits/rejected": -0.8169302940368652,
"logps/chosen": -378.3330078125,
"logps/ref_chosen": -286.7156677246094,
"logps/ref_rejected": -230.00357055664062,
"logps/rejected": -375.4217834472656,
"loss": 4.2594,
"margin_dpo/margin_mean": 53.80087661743164,
"margin_dpo/margin_std": 79.71178436279297,
"step": 393
},
{
"epoch": 0.8251308900523561,
"fcm_dpo/beta": 0.010072952136397362,
"fcm_dpo/delta": 0.04542340710759163,
"fcm_dpo/margin": 48.90460205078125,
"fcm_dpo/q_t": 0.40026745200157166,
"grad_norm": 82.5592269897461,
"learning_rate": 4.582640435014459e-08,
"logits/chosen": -0.861187756061554,
"logits/rejected": -0.8614484071731567,
"logps/chosen": -419.2198791503906,
"logps/ref_chosen": -325.9934387207031,
"logps/ref_rejected": -317.42706298828125,
"logps/rejected": -459.55810546875,
"loss": 4.4748,
"margin_dpo/margin_mean": 48.90460205078125,
"margin_dpo/margin_std": 82.41121673583984,
"step": 394
},
{
"epoch": 0.8272251308900523,
"fcm_dpo/beta": 0.010364928282797337,
"fcm_dpo/delta": -0.026334993541240692,
"fcm_dpo/margin": 60.20890808105469,
"fcm_dpo/q_t": 0.37179651856422424,
"grad_norm": 83.10125732421875,
"learning_rate": 4.477540807448832e-08,
"logits/chosen": -0.8094066381454468,
"logits/rejected": -0.8200665712356567,
"logps/chosen": -359.6258850097656,
"logps/ref_chosen": -268.90081787109375,
"logps/ref_rejected": -272.85809326171875,
"logps/rejected": -423.7920837402344,
"loss": 4.0331,
"margin_dpo/margin_mean": 60.20890426635742,
"margin_dpo/margin_std": 78.91072082519531,
"step": 395
},
{
"epoch": 0.8293193717277487,
"fcm_dpo/beta": 0.01008035521954298,
"fcm_dpo/delta": -0.017106691375374794,
"fcm_dpo/margin": 55.339622497558594,
"fcm_dpo/q_t": 0.38071006536483765,
"grad_norm": 91.52494049072266,
"learning_rate": 4.373541737087263e-08,
"logits/chosen": -0.8092782497406006,
"logits/rejected": -0.7949211597442627,
"logps/chosen": -383.99359130859375,
"logps/ref_chosen": -291.19830322265625,
"logps/ref_rejected": -253.2803955078125,
"logps/rejected": -401.41534423828125,
"loss": 4.1984,
"margin_dpo/margin_mean": 55.339622497558594,
"margin_dpo/margin_std": 76.27399444580078,
"step": 396
},
{
"epoch": 0.831413612565445,
"fcm_dpo/beta": 0.00993900652974844,
"fcm_dpo/delta": -0.025697803124785423,
"fcm_dpo/margin": 49.429141998291016,
"fcm_dpo/q_t": 0.39648500084877014,
"grad_norm": 90.36344909667969,
"learning_rate": 4.270648801084295e-08,
"logits/chosen": -0.8374252319335938,
"logits/rejected": -0.8150765299797058,
"logps/chosen": -400.49786376953125,
"logps/ref_chosen": -309.8224182128906,
"logps/ref_rejected": -291.9057922363281,
"logps/rejected": -432.0103759765625,
"loss": 4.5093,
"margin_dpo/margin_mean": 49.429141998291016,
"margin_dpo/margin_std": 83.18435668945312,
"step": 397
},
{
"epoch": 0.8335078534031414,
"fcm_dpo/beta": 0.009895882569253445,
"fcm_dpo/delta": 0.07465239614248276,
"fcm_dpo/margin": 45.989097595214844,
"fcm_dpo/q_t": 0.40378451347351074,
"grad_norm": 114.50669860839844,
"learning_rate": 4.168867517275806e-08,
"logits/chosen": -0.7460988759994507,
"logits/rejected": -0.787642240524292,
"logps/chosen": -397.8860168457031,
"logps/ref_chosen": -297.8135070800781,
"logps/ref_rejected": -270.5025634765625,
"logps/rejected": -416.564208984375,
"loss": 4.7411,
"margin_dpo/margin_mean": 45.989097595214844,
"margin_dpo/margin_std": 91.29690551757812,
"step": 398
},
{
"epoch": 0.8356020942408376,
"fcm_dpo/beta": 0.010618692263960838,
"fcm_dpo/delta": 0.06222732365131378,
"fcm_dpo/margin": 50.926490783691406,
"fcm_dpo/q_t": 0.38839948177337646,
"grad_norm": 93.77667236328125,
"learning_rate": 4.0682033438831584e-08,
"logits/chosen": -0.8398821949958801,
"logits/rejected": -0.8023529052734375,
"logps/chosen": -393.19097900390625,
"logps/ref_chosen": -292.8467712402344,
"logps/ref_rejected": -268.3638916015625,
"logps/rejected": -419.6346435546875,
"loss": 4.3527,
"margin_dpo/margin_mean": 50.92649459838867,
"margin_dpo/margin_std": 81.3008041381836,
"step": 399
},
{
"epoch": 0.837696335078534,
"fcm_dpo/beta": 0.010857629589736462,
"fcm_dpo/delta": 0.02651361934840679,
"fcm_dpo/margin": 52.668907165527344,
"fcm_dpo/q_t": 0.38137534260749817,
"grad_norm": 124.49967956542969,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": -0.8855699896812439,
"logits/rejected": -0.8847328424453735,
"logps/chosen": -357.024658203125,
"logps/ref_chosen": -263.6763916015625,
"logps/ref_rejected": -258.67266845703125,
"logps/rejected": -404.6898498535156,
"loss": 4.3361,
"margin_dpo/margin_mean": 52.668907165527344,
"margin_dpo/margin_std": 79.36495208740234,
"step": 400
},
{
"epoch": 0.837696335078534,
"eval_fcm_dpo/beta": 0.01113525778055191,
"eval_logits/chosen": -0.8274842500686646,
"eval_logits/rejected": -0.8179031610488892,
"eval_logps/chosen": -383.4114074707031,
"eval_logps/ref_chosen": -287.8268127441406,
"eval_logps/ref_rejected": -266.9300231933594,
"eval_logps/rejected": -416.5982360839844,
"eval_loss": 0.5352392792701721,
"eval_margin_dpo/margin_mean": 54.083595275878906,
"eval_margin_dpo/margin_std": 78.04414367675781,
"eval_runtime": 78.6529,
"eval_samples_per_second": 25.428,
"eval_steps_per_second": 3.179,
"step": 400
},
{
"epoch": 0.8397905759162304,
"fcm_dpo/beta": 0.010933172889053822,
"fcm_dpo/delta": -0.036804597824811935,
"fcm_dpo/margin": 57.962257385253906,
"fcm_dpo/q_t": 0.36903733015060425,
"grad_norm": 132.86756896972656,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": -0.8089762330055237,
"logits/rejected": -0.8090959191322327,
"logps/chosen": -411.4322814941406,
"logps/ref_chosen": -318.2853088378906,
"logps/ref_rejected": -293.75225830078125,
"logps/rejected": -444.8614501953125,
"loss": 4.1077,
"margin_dpo/margin_mean": 57.96226501464844,
"margin_dpo/margin_std": 81.48827362060547,
"step": 401
},
{
"epoch": 0.8418848167539267,
"fcm_dpo/beta": 0.010850298218429089,
"fcm_dpo/delta": -0.00018405605806037784,
"fcm_dpo/margin": 55.26762771606445,
"fcm_dpo/q_t": 0.37649285793304443,
"grad_norm": 105.15616607666016,
"learning_rate": 3.772967168071517e-08,
"logits/chosen": -0.861879825592041,
"logits/rejected": -0.8359262347221375,
"logps/chosen": -396.8935852050781,
"logps/ref_chosen": -309.4278564453125,
"logps/ref_rejected": -282.0279846191406,
"logps/rejected": -424.7613525390625,
"loss": 4.179,
"margin_dpo/margin_mean": 55.26762390136719,
"margin_dpo/margin_std": 81.65580749511719,
"step": 402
},
{
"epoch": 0.8439790575916231,
"fcm_dpo/beta": 0.010152220726013184,
"fcm_dpo/delta": -0.16042862832546234,
"fcm_dpo/margin": 73.78020477294922,
"fcm_dpo/q_t": 0.342138409614563,
"grad_norm": 78.87046813964844,
"learning_rate": 3.676824816087978e-08,
"logits/chosen": -0.8525277376174927,
"logits/rejected": -0.8336724638938904,
"logps/chosen": -398.9835205078125,
"logps/ref_chosen": -309.0284729003906,
"logps/ref_rejected": -272.9622497558594,
"logps/rejected": -436.69744873046875,
"loss": 3.6772,
"margin_dpo/margin_mean": 73.78020477294922,
"margin_dpo/margin_std": 79.28921508789062,
"step": 403
},
{
"epoch": 0.8460732984293193,
"fcm_dpo/beta": 0.009858440607786179,
"fcm_dpo/delta": 0.06699323654174805,
"fcm_dpo/margin": 54.31309509277344,
"fcm_dpo/q_t": 0.38632509112358093,
"grad_norm": 91.77136993408203,
"learning_rate": 3.581825961277074e-08,
"logits/chosen": -0.8809780478477478,
"logits/rejected": -0.8597022294998169,
"logps/chosen": -397.1322021484375,
"logps/ref_chosen": -297.2837219238281,
"logps/ref_rejected": -256.99041748046875,
"logps/rejected": -411.15203857421875,
"loss": 4.3272,
"margin_dpo/margin_mean": 54.31309509277344,
"margin_dpo/margin_std": 83.20060729980469,
"step": 404
},
{
"epoch": 0.8481675392670157,
"fcm_dpo/beta": 0.010094488970935345,
"fcm_dpo/delta": -0.0015985970385372639,
"fcm_dpo/margin": 59.558921813964844,
"fcm_dpo/q_t": 0.3737775683403015,
"grad_norm": 70.01200103759766,
"learning_rate": 3.487975698139084e-08,
"logits/chosen": -0.7941403985023499,
"logits/rejected": -0.803252637386322,
"logps/chosen": -348.8029479980469,
"logps/ref_chosen": -257.96533203125,
"logps/ref_rejected": -255.811279296875,
"logps/rejected": -406.2078552246094,
"loss": 4.0913,
"margin_dpo/margin_mean": 59.558921813964844,
"margin_dpo/margin_std": 81.33020782470703,
"step": 405
},
{
"epoch": 0.8502617801047121,
"fcm_dpo/beta": 0.01080853957682848,
"fcm_dpo/delta": 0.11059418320655823,
"fcm_dpo/margin": 45.39131164550781,
"fcm_dpo/q_t": 0.3956514894962311,
"grad_norm": 114.03080749511719,
"learning_rate": 3.3952790595787986e-08,
"logits/chosen": -0.8301359415054321,
"logits/rejected": -0.8075696229934692,
"logps/chosen": -388.3358154296875,
"logps/ref_chosen": -285.1810607910156,
"logps/ref_rejected": -264.41351318359375,
"logps/rejected": -412.9596252441406,
"loss": 4.4328,
"margin_dpo/margin_mean": 45.39131164550781,
"margin_dpo/margin_std": 73.8642349243164,
"step": 406
},
{
"epoch": 0.8523560209424084,
"fcm_dpo/beta": 0.010692048817873001,
"fcm_dpo/delta": -0.03384571149945259,
"fcm_dpo/margin": 58.965179443359375,
"fcm_dpo/q_t": 0.372200608253479,
"grad_norm": 109.08424377441406,
"learning_rate": 3.303741016635614e-08,
"logits/chosen": -0.8248212337493896,
"logits/rejected": -0.8543944954872131,
"logps/chosen": -369.94219970703125,
"logps/ref_chosen": -265.23809814453125,
"logps/ref_rejected": -219.0631561279297,
"logps/rejected": -382.732421875,
"loss": 4.1392,
"margin_dpo/margin_mean": 58.96518325805664,
"margin_dpo/margin_std": 84.269287109375,
"step": 407
},
{
"epoch": 0.8544502617801047,
"fcm_dpo/beta": 0.010714426636695862,
"fcm_dpo/delta": -0.028275392949581146,
"fcm_dpo/margin": 58.18950653076172,
"fcm_dpo/q_t": 0.37304675579071045,
"grad_norm": 80.60567474365234,
"learning_rate": 3.2133664782169944e-08,
"logits/chosen": -0.862250030040741,
"logits/rejected": -0.8581533432006836,
"logps/chosen": -387.4540100097656,
"logps/ref_chosen": -296.9726257324219,
"logps/ref_rejected": -295.4786376953125,
"logps/rejected": -444.1495361328125,
"loss": 4.1167,
"margin_dpo/margin_mean": 58.18950653076172,
"margin_dpo/margin_std": 78.87529754638672,
"step": 408
},
{
"epoch": 0.856544502617801,
"fcm_dpo/beta": 0.01028523314744234,
"fcm_dpo/delta": -0.026700038462877274,
"fcm_dpo/margin": 55.847084045410156,
"fcm_dpo/q_t": 0.38022899627685547,
"grad_norm": 90.30248260498047,
"learning_rate": 3.12416029083514e-08,
"logits/chosen": -0.8302770256996155,
"logits/rejected": -0.8195016384124756,
"logps/chosen": -386.9856262207031,
"logps/ref_chosen": -287.37933349609375,
"logps/ref_rejected": -275.80291748046875,
"logps/rejected": -431.2562255859375,
"loss": 4.3809,
"margin_dpo/margin_mean": 55.847084045410156,
"margin_dpo/margin_std": 91.54480743408203,
"step": 409
},
{
"epoch": 0.8586387434554974,
"fcm_dpo/beta": 0.0104904780164361,
"fcm_dpo/delta": 0.046907056123018265,
"fcm_dpo/margin": 52.79481887817383,
"fcm_dpo/q_t": 0.38510861992836,
"grad_norm": 106.11753845214844,
"learning_rate": 3.036127238347164e-08,
"logits/chosen": -0.853104829788208,
"logits/rejected": -0.8613294959068298,
"logps/chosen": -377.5323791503906,
"logps/ref_chosen": -281.7801818847656,
"logps/ref_rejected": -266.7550354003906,
"logps/rejected": -415.3020935058594,
"loss": 4.3734,
"margin_dpo/margin_mean": 52.79481506347656,
"margin_dpo/margin_std": 85.05574798583984,
"step": 410
},
{
"epoch": 0.8607329842931937,
"fcm_dpo/beta": 0.010124841704964638,
"fcm_dpo/delta": -0.07326184958219528,
"fcm_dpo/margin": 65.72576904296875,
"fcm_dpo/q_t": 0.35882946848869324,
"grad_norm": 82.55951690673828,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": -0.8430988192558289,
"logits/rejected": -0.8095424771308899,
"logps/chosen": -371.9637756347656,
"logps/ref_chosen": -281.5872497558594,
"logps/ref_rejected": -254.78916931152344,
"logps/rejected": -410.8914794921875,
"loss": 3.8667,
"margin_dpo/margin_mean": 65.72576904296875,
"margin_dpo/margin_std": 76.63848876953125,
"step": 411
},
{
"epoch": 0.86282722513089,
"fcm_dpo/beta": 0.009727457538247108,
"fcm_dpo/delta": -0.012256894260644913,
"fcm_dpo/margin": 49.4742546081543,
"fcm_dpo/q_t": 0.3995344638824463,
"grad_norm": 90.7106704711914,
"learning_rate": 2.863599358669755e-08,
"logits/chosen": -0.8161391615867615,
"logits/rejected": -0.8233824372291565,
"logps/chosen": -381.5071105957031,
"logps/ref_chosen": -276.5341796875,
"logps/ref_rejected": -273.8751220703125,
"logps/rejected": -428.3223571777344,
"loss": 4.4685,
"margin_dpo/margin_mean": 49.4742546081543,
"margin_dpo/margin_std": 82.43728637695312,
"step": 412
},
{
"epoch": 0.8649214659685864,
"fcm_dpo/beta": 0.01038267370313406,
"fcm_dpo/delta": 0.11351024359464645,
"fcm_dpo/margin": 47.416351318359375,
"fcm_dpo/q_t": 0.3980448544025421,
"grad_norm": 119.93087768554688,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": -0.8390222191810608,
"logits/rejected": -0.837721049785614,
"logps/chosen": -369.8499755859375,
"logps/ref_chosen": -271.2745666503906,
"logps/ref_rejected": -270.16912841796875,
"logps/rejected": -416.160888671875,
"loss": 4.4373,
"margin_dpo/margin_mean": 47.416358947753906,
"margin_dpo/margin_std": 78.89204406738281,
"step": 413
},
{
"epoch": 0.8670157068062827,
"fcm_dpo/beta": 0.010600894689559937,
"fcm_dpo/delta": -0.07364710420370102,
"fcm_dpo/margin": 62.915504455566406,
"fcm_dpo/q_t": 0.3617112338542938,
"grad_norm": 98.02194213867188,
"learning_rate": 2.6958198472749717e-08,
"logits/chosen": -0.8604521751403809,
"logits/rejected": -0.8675873279571533,
"logps/chosen": -394.2980041503906,
"logps/ref_chosen": -297.11505126953125,
"logps/ref_rejected": -271.7034606933594,
"logps/rejected": -431.80194091796875,
"loss": 3.9988,
"margin_dpo/margin_mean": 62.915504455566406,
"margin_dpo/margin_std": 80.08522033691406,
"step": 414
},
{
"epoch": 0.8691099476439791,
"fcm_dpo/beta": 0.010481567122042179,
"fcm_dpo/delta": 0.027636148035526276,
"fcm_dpo/margin": 54.60208511352539,
"fcm_dpo/q_t": 0.3772929012775421,
"grad_norm": 89.56175994873047,
"learning_rate": 2.613722016414943e-08,
"logits/chosen": -0.8698713779449463,
"logits/rejected": -0.8565788865089417,
"logps/chosen": -392.7457275390625,
"logps/ref_chosen": -297.6926574707031,
"logps/ref_rejected": -279.0503234863281,
"logps/rejected": -428.70550537109375,
"loss": 4.1341,
"margin_dpo/margin_mean": 54.602081298828125,
"margin_dpo/margin_std": 74.00060272216797,
"step": 415
},
{
"epoch": 0.8712041884816754,
"fcm_dpo/beta": 0.010026373900473118,
"fcm_dpo/delta": -0.057540446519851685,
"fcm_dpo/margin": 64.75239562988281,
"fcm_dpo/q_t": 0.36300843954086304,
"grad_norm": 75.71894836425781,
"learning_rate": 2.5328246937043525e-08,
"logits/chosen": -0.8659712672233582,
"logits/rejected": -0.8757675290107727,
"logps/chosen": -400.69134521484375,
"logps/ref_chosen": -311.8255615234375,
"logps/ref_rejected": -268.6170654296875,
"logps/rejected": -422.23529052734375,
"loss": 4.0244,
"margin_dpo/margin_mean": 64.75240325927734,
"margin_dpo/margin_std": 82.02722930908203,
"step": 416
},
{
"epoch": 0.8732984293193717,
"fcm_dpo/beta": 0.00980073120445013,
"fcm_dpo/delta": -0.020607443526387215,
"fcm_dpo/margin": 56.351253509521484,
"fcm_dpo/q_t": 0.3844594359397888,
"grad_norm": 97.4702377319336,
"learning_rate": 2.4531322174210973e-08,
"logits/chosen": -0.8039509057998657,
"logits/rejected": -0.8083282113075256,
"logps/chosen": -409.8139953613281,
"logps/ref_chosen": -310.43682861328125,
"logps/ref_rejected": -277.15283203125,
"logps/rejected": -432.88128662109375,
"loss": 4.3175,
"margin_dpo/margin_mean": 56.351253509521484,
"margin_dpo/margin_std": 84.7042007446289,
"step": 417
},
{
"epoch": 0.875392670157068,
"fcm_dpo/beta": 0.00980357639491558,
"fcm_dpo/delta": -0.041675545275211334,
"fcm_dpo/margin": 54.67127990722656,
"fcm_dpo/q_t": 0.3868068754673004,
"grad_norm": 97.9288330078125,
"learning_rate": 2.3746488612308295e-08,
"logits/chosen": -0.8138055801391602,
"logits/rejected": -0.7914860844612122,
"logps/chosen": -385.43609619140625,
"logps/ref_chosen": -278.49591064453125,
"logps/ref_rejected": -276.56671142578125,
"logps/rejected": -438.1781311035156,
"loss": 4.3261,
"margin_dpo/margin_mean": 54.67127990722656,
"margin_dpo/margin_std": 80.07173156738281,
"step": 418
},
{
"epoch": 0.8774869109947644,
"fcm_dpo/beta": 0.00954905990511179,
"fcm_dpo/delta": 0.0025477148592472076,
"fcm_dpo/margin": 62.34251403808594,
"fcm_dpo/q_t": 0.3726246654987335,
"grad_norm": 98.41114044189453,
"learning_rate": 2.297378833957761e-08,
"logits/chosen": -0.8610261082649231,
"logits/rejected": -0.8407485485076904,
"logps/chosen": -404.93939208984375,
"logps/ref_chosen": -298.9002380371094,
"logps/ref_rejected": -246.1540985107422,
"logps/rejected": -414.53582763671875,
"loss": 4.1706,
"margin_dpo/margin_mean": 62.3425178527832,
"margin_dpo/margin_std": 87.26535034179688,
"step": 419
},
{
"epoch": 0.8795811518324608,
"fcm_dpo/beta": 0.009364170022308826,
"fcm_dpo/delta": -0.031245797872543335,
"fcm_dpo/margin": 67.03992462158203,
"fcm_dpo/q_t": 0.36937007308006287,
"grad_norm": 118.59954071044922,
"learning_rate": 2.2213262793589482e-08,
"logits/chosen": -0.8001975417137146,
"logits/rejected": -0.7743805646896362,
"logps/chosen": -367.395751953125,
"logps/ref_chosen": -264.5608825683594,
"logps/ref_rejected": -245.67031860351562,
"logps/rejected": -415.54510498046875,
"loss": 4.1234,
"margin_dpo/margin_mean": 67.03992462158203,
"margin_dpo/margin_std": 94.58979034423828,
"step": 420
},
{
"epoch": 0.881675392670157,
"fcm_dpo/beta": 0.009468503296375275,
"fcm_dpo/delta": 0.05897749215364456,
"fcm_dpo/margin": 57.387535095214844,
"fcm_dpo/q_t": 0.3812984824180603,
"grad_norm": 96.44364166259766,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.8752861022949219,
"logits/rejected": -0.8608182668685913,
"logps/chosen": -391.05804443359375,
"logps/ref_chosen": -297.70501708984375,
"logps/ref_rejected": -243.74771118164062,
"logps/rejected": -394.4882507324219,
"loss": 4.1932,
"margin_dpo/margin_mean": 57.387535095214844,
"margin_dpo/margin_std": 78.87214660644531,
"step": 421
},
{
"epoch": 0.8837696335078534,
"fcm_dpo/beta": 0.009868706576526165,
"fcm_dpo/delta": 0.013630709610879421,
"fcm_dpo/margin": 59.41127395629883,
"fcm_dpo/q_t": 0.3788284361362457,
"grad_norm": 75.39724731445312,
"learning_rate": 2.07288983654679e-08,
"logits/chosen": -0.7338589429855347,
"logits/rejected": -0.784461498260498,
"logps/chosen": -387.1398010253906,
"logps/ref_chosen": -288.3587646484375,
"logps/ref_rejected": -256.4377746582031,
"logps/rejected": -414.63006591796875,
"loss": 4.2569,
"margin_dpo/margin_mean": 59.41127395629883,
"margin_dpo/margin_std": 90.28678894042969,
"step": 422
},
{
"epoch": 0.8858638743455497,
"fcm_dpo/beta": 0.009787296876311302,
"fcm_dpo/delta": -0.014436397701501846,
"fcm_dpo/margin": 62.54419708251953,
"fcm_dpo/q_t": 0.37159958481788635,
"grad_norm": 108.14152526855469,
"learning_rate": 2.0005139085293942e-08,
"logits/chosen": -0.8656849265098572,
"logits/rejected": -0.8510351777076721,
"logps/chosen": -397.02996826171875,
"logps/ref_chosen": -296.00701904296875,
"logps/ref_rejected": -261.3480529785156,
"logps/rejected": -424.9151916503906,
"loss": 4.0862,
"margin_dpo/margin_mean": 62.54419708251953,
"margin_dpo/margin_std": 84.30635833740234,
"step": 423
},
{
"epoch": 0.8879581151832461,
"fcm_dpo/beta": 0.009680146351456642,
"fcm_dpo/delta": -0.010340253822505474,
"fcm_dpo/margin": 62.79508972167969,
"fcm_dpo/q_t": 0.3680788278579712,
"grad_norm": 95.83734893798828,
"learning_rate": 1.9293713731512673e-08,
"logits/chosen": -0.8333520889282227,
"logits/rejected": -0.8356263637542725,
"logps/chosen": -402.0572204589844,
"logps/ref_chosen": -309.421875,
"logps/ref_rejected": -249.14886474609375,
"logps/rejected": -404.5793151855469,
"loss": 3.988,
"margin_dpo/margin_mean": 62.79508590698242,
"margin_dpo/margin_std": 75.4756088256836,
"step": 424
},
{
"epoch": 0.8900523560209425,
"fcm_dpo/beta": 0.009858609177172184,
"fcm_dpo/delta": 0.03790643811225891,
"fcm_dpo/margin": 50.37432098388672,
"fcm_dpo/q_t": 0.3970460295677185,
"grad_norm": 109.99349212646484,
"learning_rate": 1.8594660455706763e-08,
"logits/chosen": -0.8281177878379822,
"logits/rejected": -0.8340511918067932,
"logps/chosen": -381.4590759277344,
"logps/ref_chosen": -280.50909423828125,
"logps/ref_rejected": -276.8252258300781,
"logps/rejected": -428.1495056152344,
"loss": 4.5116,
"margin_dpo/margin_mean": 50.37432098388672,
"margin_dpo/margin_std": 86.79659271240234,
"step": 425
},
{
"epoch": 0.8921465968586387,
"fcm_dpo/beta": 0.010049426928162575,
"fcm_dpo/delta": 0.010002564638853073,
"fcm_dpo/margin": 58.607635498046875,
"fcm_dpo/q_t": 0.3756742477416992,
"grad_norm": 103.70176696777344,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": -0.8594489097595215,
"logits/rejected": -0.8479264974594116,
"logps/chosen": -395.7527160644531,
"logps/ref_chosen": -292.78521728515625,
"logps/ref_rejected": -255.62698364257812,
"logps/rejected": -417.2021484375,
"loss": 4.1411,
"margin_dpo/margin_mean": 58.607635498046875,
"margin_dpo/margin_std": 79.4923095703125,
"step": 426
},
{
"epoch": 0.8942408376963351,
"fcm_dpo/beta": 0.00980192981660366,
"fcm_dpo/delta": -0.15457119047641754,
"fcm_dpo/margin": 76.36004638671875,
"fcm_dpo/q_t": 0.34554582834243774,
"grad_norm": 92.07188415527344,
"learning_rate": 1.7233819424956247e-08,
"logits/chosen": -0.836094319820404,
"logits/rejected": -0.8061795234680176,
"logps/chosen": -388.0554504394531,
"logps/ref_chosen": -288.7687072753906,
"logps/ref_rejected": -268.4986572265625,
"logps/rejected": -444.14544677734375,
"loss": 3.8482,
"margin_dpo/margin_mean": 76.36004638671875,
"margin_dpo/margin_std": 89.82933044433594,
"step": 427
},
{
"epoch": 0.8963350785340314,
"fcm_dpo/beta": 0.008906656876206398,
"fcm_dpo/delta": -0.015173885971307755,
"fcm_dpo/margin": 68.88334655761719,
"fcm_dpo/q_t": 0.3676660656929016,
"grad_norm": 82.5910873413086,
"learning_rate": 1.6572104647786245e-08,
"logits/chosen": -0.7911025285720825,
"logits/rejected": -0.818476140499115,
"logps/chosen": -406.2961120605469,
"logps/ref_chosen": -295.5209655761719,
"logps/ref_rejected": -275.71026611328125,
"logps/rejected": -455.3687744140625,
"loss": 4.0516,
"margin_dpo/margin_mean": 68.88334655761719,
"margin_dpo/margin_std": 89.82479858398438,
"step": 428
},
{
"epoch": 0.8984293193717278,
"fcm_dpo/beta": 0.008761554956436157,
"fcm_dpo/delta": -0.016175897791981697,
"fcm_dpo/margin": 61.458229064941406,
"fcm_dpo/q_t": 0.37919533252716064,
"grad_norm": 150.3956298828125,
"learning_rate": 1.5922907900227017e-08,
"logits/chosen": -0.7926703095436096,
"logits/rejected": -0.8025503158569336,
"logps/chosen": -376.794189453125,
"logps/ref_chosen": -274.392333984375,
"logps/ref_rejected": -258.574462890625,
"logps/rejected": -422.4345703125,
"loss": 4.3478,
"margin_dpo/margin_mean": 61.458229064941406,
"margin_dpo/margin_std": 93.87308502197266,
"step": 429
},
{
"epoch": 0.900523560209424,
"fcm_dpo/beta": 0.008986860513687134,
"fcm_dpo/delta": 0.03410874679684639,
"fcm_dpo/margin": 51.494415283203125,
"fcm_dpo/q_t": 0.4010925590991974,
"grad_norm": 82.05398559570312,
"learning_rate": 1.5286263996730026e-08,
"logits/chosen": -0.8744654655456543,
"logits/rejected": -0.8475915789604187,
"logps/chosen": -388.521484375,
"logps/ref_chosen": -288.7391357421875,
"logps/ref_rejected": -268.6106262207031,
"logps/rejected": -419.8874206542969,
"loss": 4.4594,
"margin_dpo/margin_mean": 51.49441146850586,
"margin_dpo/margin_std": 83.37062072753906,
"step": 430
},
{
"epoch": 0.9026178010471204,
"fcm_dpo/beta": 0.009652719832956791,
"fcm_dpo/delta": 0.11538906395435333,
"fcm_dpo/margin": 44.83124923706055,
"fcm_dpo/q_t": 0.4090067744255066,
"grad_norm": 104.76152038574219,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": -0.8430911898612976,
"logits/rejected": -0.8098528385162354,
"logps/chosen": -377.4594421386719,
"logps/ref_chosen": -275.7247314453125,
"logps/ref_rejected": -268.91729736328125,
"logps/rejected": -415.48321533203125,
"loss": 4.5624,
"margin_dpo/margin_mean": 44.83124923706055,
"margin_dpo/margin_std": 78.89287567138672,
"step": 431
},
{
"epoch": 0.9047120418848168,
"fcm_dpo/beta": 0.009524986147880554,
"fcm_dpo/delta": -0.0009523874614387751,
"fcm_dpo/margin": 62.96201705932617,
"fcm_dpo/q_t": 0.37596815824508667,
"grad_norm": 79.11823272705078,
"learning_rate": 1.40507706120426e-08,
"logits/chosen": -0.8689834475517273,
"logits/rejected": -0.8573225140571594,
"logps/chosen": -385.52545166015625,
"logps/ref_chosen": -291.42010498046875,
"logps/ref_rejected": -255.48202514648438,
"logps/rejected": -412.5494079589844,
"loss": 4.125,
"margin_dpo/margin_mean": 62.9620246887207,
"margin_dpo/margin_std": 87.03445434570312,
"step": 432
},
{
"epoch": 0.9068062827225131,
"fcm_dpo/beta": 0.009905043989419937,
"fcm_dpo/delta": 0.06354302912950516,
"fcm_dpo/margin": 54.46798324584961,
"fcm_dpo/q_t": 0.38325023651123047,
"grad_norm": 89.49055480957031,
"learning_rate": 1.345198738661285e-08,
"logits/chosen": -0.8393370509147644,
"logits/rejected": -0.8366720676422119,
"logps/chosen": -353.0195007324219,
"logps/ref_chosen": -246.2268829345703,
"logps/ref_rejected": -253.65924072265625,
"logps/rejected": -414.9197692871094,
"loss": 4.2557,
"margin_dpo/margin_mean": 54.46798324584961,
"margin_dpo/margin_std": 79.35485076904297,
"step": 433
},
{
"epoch": 0.9089005235602095,
"fcm_dpo/beta": 0.01033061183989048,
"fcm_dpo/delta": -0.006721100769937038,
"fcm_dpo/margin": 53.89496612548828,
"fcm_dpo/q_t": 0.38306722044944763,
"grad_norm": 82.5522232055664,
"learning_rate": 1.2865889513213628e-08,
"logits/chosen": -0.8196003437042236,
"logits/rejected": -0.8318718671798706,
"logps/chosen": -405.0242614746094,
"logps/ref_chosen": -295.4618225097656,
"logps/ref_rejected": -256.2254333496094,
"logps/rejected": -419.68292236328125,
"loss": 4.2651,
"margin_dpo/margin_mean": 53.89497375488281,
"margin_dpo/margin_std": 80.93537902832031,
"step": 434
},
{
"epoch": 0.9109947643979057,
"fcm_dpo/beta": 0.010011866688728333,
"fcm_dpo/delta": 0.002371033653616905,
"fcm_dpo/margin": 59.46184539794922,
"fcm_dpo/q_t": 0.3749125599861145,
"grad_norm": 113.84324645996094,
"learning_rate": 1.2292508422495157e-08,
"logits/chosen": -0.8343696594238281,
"logits/rejected": -0.8213623762130737,
"logps/chosen": -360.0166015625,
"logps/ref_chosen": -260.7384033203125,
"logps/ref_rejected": -248.5688018798828,
"logps/rejected": -407.3088073730469,
"loss": 4.0724,
"margin_dpo/margin_mean": 59.46183776855469,
"margin_dpo/margin_std": 76.63561248779297,
"step": 435
},
{
"epoch": 0.9130890052356021,
"fcm_dpo/beta": 0.010455166921019554,
"fcm_dpo/delta": 0.06821566820144653,
"fcm_dpo/margin": 51.208003997802734,
"fcm_dpo/q_t": 0.3907574415206909,
"grad_norm": 108.74581909179688,
"learning_rate": 1.1731874863145142e-08,
"logits/chosen": -0.8110353350639343,
"logits/rejected": -0.8122683763504028,
"logps/chosen": -424.99334716796875,
"logps/ref_chosen": -319.3224792480469,
"logps/ref_rejected": -299.30322265625,
"logps/rejected": -456.1820983886719,
"loss": 4.3717,
"margin_dpo/margin_mean": 51.208003997802734,
"margin_dpo/margin_std": 84.06385040283203,
"step": 436
},
{
"epoch": 0.9151832460732985,
"fcm_dpo/beta": 0.010105324909090996,
"fcm_dpo/delta": -0.14192956686019897,
"fcm_dpo/margin": 68.01710510253906,
"fcm_dpo/q_t": 0.3576942980289459,
"grad_norm": 97.00816345214844,
"learning_rate": 1.118401890024001e-08,
"logits/chosen": -0.845094621181488,
"logits/rejected": -0.8325349688529968,
"logps/chosen": -376.66925048828125,
"logps/ref_chosen": -278.82879638671875,
"logps/ref_rejected": -272.55303955078125,
"logps/rejected": -438.4106140136719,
"loss": 3.9596,
"margin_dpo/margin_mean": 68.01710510253906,
"margin_dpo/margin_std": 85.8590087890625,
"step": 437
},
{
"epoch": 0.9172774869109948,
"fcm_dpo/beta": 0.009664845652878284,
"fcm_dpo/delta": 0.030612653121352196,
"fcm_dpo/margin": 35.6963996887207,
"fcm_dpo/q_t": 0.42677760124206543,
"grad_norm": 104.95515441894531,
"learning_rate": 1.06489699136324e-08,
"logits/chosen": -0.8136807680130005,
"logits/rejected": -0.8375378847122192,
"logps/chosen": -362.48974609375,
"logps/ref_chosen": -259.31903076171875,
"logps/ref_rejected": -240.99581909179688,
"logps/rejected": -379.8629150390625,
"loss": 4.9299,
"margin_dpo/margin_mean": 35.69639587402344,
"margin_dpo/margin_std": 83.86488342285156,
"step": 438
},
{
"epoch": 0.9193717277486911,
"fcm_dpo/beta": 0.009936582297086716,
"fcm_dpo/delta": 0.03292373940348625,
"fcm_dpo/margin": 57.200782775878906,
"fcm_dpo/q_t": 0.38112884759902954,
"grad_norm": 110.18673706054688,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": -0.8162304162979126,
"logits/rejected": -0.8262991905212402,
"logps/chosen": -361.297607421875,
"logps/ref_chosen": -257.1243896484375,
"logps/ref_rejected": -243.20416259765625,
"logps/rejected": -404.5781555175781,
"loss": 4.2137,
"margin_dpo/margin_mean": 57.200782775878906,
"margin_dpo/margin_std": 83.29421997070312,
"step": 439
},
{
"epoch": 0.9214659685863874,
"fcm_dpo/beta": 0.010659238323569298,
"fcm_dpo/delta": 0.0877818912267685,
"fcm_dpo/margin": 43.59062957763672,
"fcm_dpo/q_t": 0.39968663454055786,
"grad_norm": 104.97032165527344,
"learning_rate": 9.617406953185136e-09,
"logits/chosen": -0.8673726320266724,
"logits/rejected": -0.8623473048210144,
"logps/chosen": -421.5701599121094,
"logps/ref_chosen": -307.5315246582031,
"logps/ref_rejected": -264.3540954589844,
"logps/rejected": -421.98333740234375,
"loss": 4.5375,
"margin_dpo/margin_mean": 43.59062576293945,
"margin_dpo/margin_std": 75.73169708251953,
"step": 440
},
{
"epoch": 0.9235602094240838,
"fcm_dpo/beta": 0.010612818412482738,
"fcm_dpo/delta": -0.07970403879880905,
"fcm_dpo/margin": 63.51165008544922,
"fcm_dpo/q_t": 0.3585563600063324,
"grad_norm": 95.90740203857422,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": -0.8216838240623474,
"logits/rejected": -0.8057107329368591,
"logps/chosen": -411.36907958984375,
"logps/ref_chosen": -309.9819641113281,
"logps/ref_rejected": -297.4968566894531,
"logps/rejected": -462.3956298828125,
"loss": 3.938,
"margin_dpo/margin_mean": 63.511653900146484,
"margin_dpo/margin_std": 77.47901153564453,
"step": 441
},
{
"epoch": 0.9256544502617801,
"fcm_dpo/beta": 0.010241352021694183,
"fcm_dpo/delta": 0.07879231870174408,
"fcm_dpo/margin": 51.40802001953125,
"fcm_dpo/q_t": 0.3907470703125,
"grad_norm": 98.82845306396484,
"learning_rate": 8.637407257200496e-09,
"logits/chosen": -0.8957461714744568,
"logits/rejected": -0.8518679141998291,
"logps/chosen": -387.4726867675781,
"logps/ref_chosen": -278.9791564941406,
"logps/ref_rejected": -242.87310791015625,
"logps/rejected": -402.7746887207031,
"loss": 4.4572,
"margin_dpo/margin_mean": 51.40802001953125,
"margin_dpo/margin_std": 85.5845947265625,
"step": 442
},
{
"epoch": 0.9277486910994764,
"fcm_dpo/beta": 0.010851511731743813,
"fcm_dpo/delta": -0.02806878834962845,
"fcm_dpo/margin": 57.299949645996094,
"fcm_dpo/q_t": 0.36912405490875244,
"grad_norm": 98.69976806640625,
"learning_rate": 8.166809758815895e-09,
"logits/chosen": -0.7989782094955444,
"logits/rejected": -0.8233458995819092,
"logps/chosen": -373.9927978515625,
"logps/ref_chosen": -273.5590515136719,
"logps/ref_rejected": -264.0199279785156,
"logps/rejected": -421.7536315917969,
"loss": 4.1327,
"margin_dpo/margin_mean": 57.299949645996094,
"margin_dpo/margin_std": 77.64727020263672,
"step": 443
},
{
"epoch": 0.9298429319371728,
"fcm_dpo/beta": 0.010247818194329739,
"fcm_dpo/delta": -0.036531638354063034,
"fcm_dpo/margin": 61.67512512207031,
"fcm_dpo/q_t": 0.37224987149238586,
"grad_norm": 99.69876861572266,
"learning_rate": 7.709181040498253e-09,
"logits/chosen": -0.8095158934593201,
"logits/rejected": -0.7996165752410889,
"logps/chosen": -398.9835205078125,
"logps/ref_chosen": -298.1441955566406,
"logps/ref_rejected": -268.0572814941406,
"logps/rejected": -430.57171630859375,
"loss": 4.1991,
"margin_dpo/margin_mean": 61.67512893676758,
"margin_dpo/margin_std": 92.46453857421875,
"step": 444
},
{
"epoch": 0.9319371727748691,
"fcm_dpo/beta": 0.009988191537559032,
"fcm_dpo/delta": -0.08285348117351532,
"fcm_dpo/margin": 50.43273162841797,
"fcm_dpo/q_t": 0.39380931854248047,
"grad_norm": 90.62871551513672,
"learning_rate": 7.2645456434869965e-09,
"logits/chosen": -0.8713312745094299,
"logits/rejected": -0.8869834542274475,
"logps/chosen": -357.09771728515625,
"logps/ref_chosen": -254.54067993164062,
"logps/ref_rejected": -264.2445983886719,
"logps/rejected": -417.234375,
"loss": 4.4441,
"margin_dpo/margin_mean": 50.4327278137207,
"margin_dpo/margin_std": 77.92971801757812,
"step": 445
},
{
"epoch": 0.9340314136125655,
"fcm_dpo/beta": 0.009595160372555256,
"fcm_dpo/delta": 0.03080780804157257,
"fcm_dpo/margin": 59.38265609741211,
"fcm_dpo/q_t": 0.3774047791957855,
"grad_norm": 90.70514678955078,
"learning_rate": 6.832927412229017e-09,
"logits/chosen": -0.8085803985595703,
"logits/rejected": -0.8101305961608887,
"logps/chosen": -404.2763366699219,
"logps/ref_chosen": -306.72247314453125,
"logps/ref_rejected": -266.3735656738281,
"logps/rejected": -423.31005859375,
"loss": 4.1929,
"margin_dpo/margin_mean": 59.382652282714844,
"margin_dpo/margin_std": 82.84217071533203,
"step": 446
},
{
"epoch": 0.9361256544502617,
"fcm_dpo/beta": 0.00941769964993,
"fcm_dpo/delta": -0.0662500411272049,
"fcm_dpo/margin": 64.87944030761719,
"fcm_dpo/q_t": 0.36734655499458313,
"grad_norm": 78.74610900878906,
"learning_rate": 6.414349493100129e-09,
"logits/chosen": -0.8000814318656921,
"logits/rejected": -0.8020035624504089,
"logps/chosen": -357.4937744140625,
"logps/ref_chosen": -260.51727294921875,
"logps/ref_rejected": -236.47061157226562,
"logps/rejected": -398.3265075683594,
"loss": 3.9644,
"margin_dpo/margin_mean": 64.87944030761719,
"margin_dpo/margin_std": 77.26435089111328,
"step": 447
},
{
"epoch": 0.9382198952879581,
"fcm_dpo/beta": 0.00942399725317955,
"fcm_dpo/delta": 0.04195284843444824,
"fcm_dpo/margin": 59.36241912841797,
"fcm_dpo/q_t": 0.3821418881416321,
"grad_norm": 100.28685760498047,
"learning_rate": 6.0088343331638756e-09,
"logits/chosen": -0.8049849271774292,
"logits/rejected": -0.8009424805641174,
"logps/chosen": -371.8785705566406,
"logps/ref_chosen": -268.78704833984375,
"logps/ref_rejected": -262.1703796386719,
"logps/rejected": -424.6243591308594,
"loss": 4.1666,
"margin_dpo/margin_mean": 59.36241912841797,
"margin_dpo/margin_std": 80.97298431396484,
"step": 448
},
{
"epoch": 0.9403141361256544,
"fcm_dpo/beta": 0.00966222770512104,
"fcm_dpo/delta": -0.02228935807943344,
"fcm_dpo/margin": 64.16152954101562,
"fcm_dpo/q_t": 0.36533617973327637,
"grad_norm": 127.85818481445312,
"learning_rate": 5.616403678967624e-09,
"logits/chosen": -0.8874871730804443,
"logits/rejected": -0.8732025623321533,
"logps/chosen": -421.92047119140625,
"logps/ref_chosen": -330.9514465332031,
"logps/ref_rejected": -239.76974487304688,
"logps/rejected": -394.9002685546875,
"loss": 4.0159,
"margin_dpo/margin_mean": 64.16152954101562,
"margin_dpo/margin_std": 80.18797302246094,
"step": 449
},
{
"epoch": 0.9424083769633508,
"fcm_dpo/beta": 0.00963627640157938,
"fcm_dpo/delta": 0.05303880572319031,
"fcm_dpo/margin": 51.81724548339844,
"fcm_dpo/q_t": 0.39200538396835327,
"grad_norm": 105.71543884277344,
"learning_rate": 5.2370785753763356e-09,
"logits/chosen": -0.7862426042556763,
"logits/rejected": -0.7958937883377075,
"logps/chosen": -395.0786437988281,
"logps/ref_chosen": -284.26544189453125,
"logps/ref_rejected": -250.5401611328125,
"logps/rejected": -413.170654296875,
"loss": 4.2778,
"margin_dpo/margin_mean": 51.81724548339844,
"margin_dpo/margin_std": 73.03158569335938,
"step": 450
},
{
"epoch": 0.9445026178010472,
"fcm_dpo/beta": 0.009586581960320473,
"fcm_dpo/delta": -0.022069107741117477,
"fcm_dpo/margin": 52.51289367675781,
"fcm_dpo/q_t": 0.39439404010772705,
"grad_norm": 103.11542510986328,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.810042679309845,
"logits/rejected": -0.7838971018791199,
"logps/chosen": -413.1301574707031,
"logps/ref_chosen": -302.3209228515625,
"logps/ref_rejected": -254.09747314453125,
"logps/rejected": -417.41961669921875,
"loss": 4.422,
"margin_dpo/margin_mean": 52.51289749145508,
"margin_dpo/margin_std": 82.73294830322266,
"step": 451
},
{
"epoch": 0.9465968586387434,
"fcm_dpo/beta": 0.009494351223111153,
"fcm_dpo/delta": 0.006426731124520302,
"fcm_dpo/margin": 57.46319580078125,
"fcm_dpo/q_t": 0.38508880138397217,
"grad_norm": 94.53024291992188,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": -0.8596530556678772,
"logits/rejected": -0.8371134996414185,
"logps/chosen": -398.1798400878906,
"logps/ref_chosen": -299.39215087890625,
"logps/ref_rejected": -284.3475036621094,
"logps/rejected": -440.59844970703125,
"loss": 4.2508,
"margin_dpo/margin_mean": 57.46318817138672,
"margin_dpo/margin_std": 82.77123260498047,
"step": 452
},
{
"epoch": 0.9486910994764398,
"fcm_dpo/beta": 0.009665731340646744,
"fcm_dpo/delta": -0.016047965735197067,
"fcm_dpo/margin": 63.59440231323242,
"fcm_dpo/q_t": 0.3707888722419739,
"grad_norm": 89.89924621582031,
"learning_rate": 4.1779364682113794e-09,
"logits/chosen": -0.7991673350334167,
"logits/rejected": -0.7965455651283264,
"logps/chosen": -429.2721862792969,
"logps/ref_chosen": -324.6517028808594,
"logps/ref_rejected": -304.1527099609375,
"logps/rejected": -472.3676452636719,
"loss": 4.0389,
"margin_dpo/margin_mean": 63.59440231323242,
"margin_dpo/margin_std": 84.75194549560547,
"step": 453
},
{
"epoch": 0.9507853403141361,
"fcm_dpo/beta": 0.009600440971553326,
"fcm_dpo/delta": -0.007240898907184601,
"fcm_dpo/margin": 62.99691390991211,
"fcm_dpo/q_t": 0.3711826205253601,
"grad_norm": 75.93077087402344,
"learning_rate": 3.851229943335393e-09,
"logits/chosen": -0.86097651720047,
"logits/rejected": -0.8730704188346863,
"logps/chosen": -399.46826171875,
"logps/ref_chosen": -299.6117248535156,
"logps/ref_rejected": -303.74224853515625,
"logps/rejected": -466.59564208984375,
"loss": 4.1159,
"margin_dpo/margin_mean": 62.99691390991211,
"margin_dpo/margin_std": 85.24671936035156,
"step": 454
},
{
"epoch": 0.9528795811518325,
"fcm_dpo/beta": 0.010221119970083237,
"fcm_dpo/delta": 0.1354561150074005,
"fcm_dpo/margin": 45.94690704345703,
"fcm_dpo/q_t": 0.4033811688423157,
"grad_norm": 97.06004333496094,
"learning_rate": 3.5377236299748147e-09,
"logits/chosen": -0.8142644166946411,
"logits/rejected": -0.826336145401001,
"logps/chosen": -372.8396301269531,
"logps/ref_chosen": -273.6116943359375,
"logps/ref_rejected": -274.4293518066406,
"logps/rejected": -419.604248046875,
"loss": 4.5863,
"margin_dpo/margin_mean": 45.94690704345703,
"margin_dpo/margin_std": 85.6665267944336,
"step": 455
},
{
"epoch": 0.9549738219895288,
"fcm_dpo/beta": 0.010199323296546936,
"fcm_dpo/delta": -0.09923385083675385,
"fcm_dpo/margin": 63.42009735107422,
"fcm_dpo/q_t": 0.377056747674942,
"grad_norm": 94.97087097167969,
"learning_rate": 3.2374343405217884e-09,
"logits/chosen": -0.7348066568374634,
"logits/rejected": -0.7477578520774841,
"logps/chosen": -438.2121276855469,
"logps/ref_chosen": -322.17193603515625,
"logps/ref_rejected": -294.54461669921875,
"logps/rejected": -474.0048522949219,
"loss": 4.3425,
"margin_dpo/margin_mean": 63.42009353637695,
"margin_dpo/margin_std": 105.82379913330078,
"step": 456
},
{
"epoch": 0.9570680628272251,
"fcm_dpo/beta": 0.009744374081492424,
"fcm_dpo/delta": -0.009416388347744942,
"fcm_dpo/margin": 62.40718460083008,
"fcm_dpo/q_t": 0.36779892444610596,
"grad_norm": 84.67405700683594,
"learning_rate": 2.9503781785795713e-09,
"logits/chosen": -0.7926053404808044,
"logits/rejected": -0.7992970943450928,
"logps/chosen": -416.7201843261719,
"logps/ref_chosen": -307.7962341308594,
"logps/ref_rejected": -274.5501403808594,
"logps/rejected": -445.88128662109375,
"loss": 4.173,
"margin_dpo/margin_mean": 62.40718460083008,
"margin_dpo/margin_std": 89.37660217285156,
"step": 457
},
{
"epoch": 0.9591623036649215,
"fcm_dpo/beta": 0.010077232494950294,
"fcm_dpo/delta": 0.03728824108839035,
"fcm_dpo/margin": 55.845306396484375,
"fcm_dpo/q_t": 0.3842548131942749,
"grad_norm": 84.43614959716797,
"learning_rate": 2.6765705380989432e-09,
"logits/chosen": -0.8247987627983093,
"logits/rejected": -0.8124662041664124,
"logps/chosen": -402.5335388183594,
"logps/ref_chosen": -297.0316467285156,
"logps/ref_rejected": -276.1112365722656,
"logps/rejected": -437.45843505859375,
"loss": 4.3221,
"margin_dpo/margin_mean": 55.845306396484375,
"margin_dpo/margin_std": 86.97913360595703,
"step": 458
},
{
"epoch": 0.9612565445026178,
"fcm_dpo/beta": 0.010236883535981178,
"fcm_dpo/delta": 0.021781034767627716,
"fcm_dpo/margin": 51.37626647949219,
"fcm_dpo/q_t": 0.38991579413414,
"grad_norm": 117.91474151611328,
"learning_rate": 2.416026102552732e-09,
"logits/chosen": -0.8719888925552368,
"logits/rejected": -0.8656003475189209,
"logps/chosen": -394.6509704589844,
"logps/ref_chosen": -293.5252990722656,
"logps/ref_rejected": -289.30126953125,
"logps/rejected": -441.8031921386719,
"loss": 4.367,
"margin_dpo/margin_mean": 51.37627410888672,
"margin_dpo/margin_std": 80.18513488769531,
"step": 459
},
{
"epoch": 0.9633507853403142,
"fcm_dpo/beta": 0.010486846789717674,
"fcm_dpo/delta": 0.013618772849440575,
"fcm_dpo/margin": 50.83551788330078,
"fcm_dpo/q_t": 0.385250449180603,
"grad_norm": 104.75066375732422,
"learning_rate": 2.168758844148272e-09,
"logits/chosen": -0.8460461497306824,
"logits/rejected": -0.8530416488647461,
"logps/chosen": -421.2718200683594,
"logps/ref_chosen": -318.7803649902344,
"logps/ref_rejected": -258.7906799316406,
"logps/rejected": -412.1176452636719,
"loss": 4.3321,
"margin_dpo/margin_mean": 50.83551788330078,
"margin_dpo/margin_std": 77.74996948242188,
"step": 460
},
{
"epoch": 0.9654450261780104,
"fcm_dpo/beta": 0.010277766734361649,
"fcm_dpo/delta": -0.023162774741649628,
"fcm_dpo/margin": 55.653541564941406,
"fcm_dpo/q_t": 0.3829959034919739,
"grad_norm": 109.19080352783203,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": -0.8199470043182373,
"logits/rejected": -0.8473576903343201,
"logps/chosen": -346.2790222167969,
"logps/ref_chosen": -243.9099884033203,
"logps/ref_rejected": -232.6382293701172,
"logps/rejected": -390.6607666015625,
"loss": 4.3594,
"margin_dpo/margin_mean": 55.653533935546875,
"margin_dpo/margin_std": 89.18666076660156,
"step": 461
},
{
"epoch": 0.9675392670157068,
"fcm_dpo/beta": 0.009733910672366619,
"fcm_dpo/delta": -0.06866450607776642,
"fcm_dpo/margin": 67.92211151123047,
"fcm_dpo/q_t": 0.36428695917129517,
"grad_norm": 90.08053588867188,
"learning_rate": 1.7141081868094209e-09,
"logits/chosen": -0.8322769403457642,
"logits/rejected": -0.7903834581375122,
"logps/chosen": -447.9652404785156,
"logps/ref_chosen": -344.09100341796875,
"logps/ref_rejected": -252.45037841796875,
"logps/rejected": -424.2467956542969,
"loss": 4.0804,
"margin_dpo/margin_mean": 67.92211151123047,
"margin_dpo/margin_std": 93.55986785888672,
"step": 462
},
{
"epoch": 0.9696335078534032,
"fcm_dpo/beta": 0.01002544816583395,
"fcm_dpo/delta": 0.07595731317996979,
"fcm_dpo/margin": 52.43413162231445,
"fcm_dpo/q_t": 0.3893330693244934,
"grad_norm": 105.60916900634766,
"learning_rate": 1.5067491694100153e-09,
"logits/chosen": -0.8533962368965149,
"logits/rejected": -0.8175274133682251,
"logps/chosen": -396.2633056640625,
"logps/ref_chosen": -297.1424560546875,
"logps/ref_rejected": -234.0208282470703,
"logps/rejected": -385.5758361816406,
"loss": 4.3877,
"margin_dpo/margin_mean": 52.43413162231445,
"margin_dpo/margin_std": 84.19559478759766,
"step": 463
},
{
"epoch": 0.9717277486910995,
"fcm_dpo/beta": 0.010431027971208096,
"fcm_dpo/delta": 0.05969306826591492,
"fcm_dpo/margin": 52.017539978027344,
"fcm_dpo/q_t": 0.38790467381477356,
"grad_norm": 135.7662353515625,
"learning_rate": 1.3127160909147672e-09,
"logits/chosen": -0.8195664286613464,
"logits/rejected": -0.8451250791549683,
"logps/chosen": -378.0357971191406,
"logps/ref_chosen": -265.71075439453125,
"logps/ref_rejected": -256.4108581542969,
"logps/rejected": -420.75341796875,
"loss": 4.4027,
"margin_dpo/margin_mean": 52.017539978027344,
"margin_dpo/margin_std": 85.27294921875,
"step": 464
},
{
"epoch": 0.9738219895287958,
"fcm_dpo/beta": 0.00996001623570919,
"fcm_dpo/delta": -0.14235681295394897,
"fcm_dpo/margin": 65.76058197021484,
"fcm_dpo/q_t": 0.36305713653564453,
"grad_norm": 67.80677032470703,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": -0.8823571801185608,
"logits/rejected": -0.8570696115493774,
"logps/chosen": -390.29156494140625,
"logps/ref_chosen": -293.1527404785156,
"logps/ref_rejected": -293.70947265625,
"logps/rejected": -456.60888671875,
"loss": 4.0328,
"margin_dpo/margin_mean": 65.76058197021484,
"margin_dpo/margin_std": 82.558837890625,
"step": 465
},
{
"epoch": 0.9759162303664921,
"fcm_dpo/beta": 0.00922522135078907,
"fcm_dpo/delta": -0.055178042501211166,
"fcm_dpo/margin": 70.33241271972656,
"fcm_dpo/q_t": 0.36043423414230347,
"grad_norm": 80.35893249511719,
"learning_rate": 9.64668657069706e-10,
"logits/chosen": -0.8069887757301331,
"logits/rejected": -0.7611278295516968,
"logps/chosen": -353.83447265625,
"logps/ref_chosen": -261.4775695800781,
"logps/ref_rejected": -248.36282348632812,
"logps/rejected": -411.0520935058594,
"loss": 3.8624,
"margin_dpo/margin_mean": 70.33241271972656,
"margin_dpo/margin_std": 77.98775482177734,
"step": 466
},
{
"epoch": 0.9780104712041885,
"fcm_dpo/beta": 0.009081280790269375,
"fcm_dpo/delta": 0.013140158727765083,
"fcm_dpo/margin": 50.5679817199707,
"fcm_dpo/q_t": 0.40236690640449524,
"grad_norm": 95.41093444824219,
"learning_rate": 8.106729664475176e-10,
"logits/chosen": -0.7933071851730347,
"logits/rejected": -0.7896216511726379,
"logps/chosen": -371.2308044433594,
"logps/ref_chosen": -266.354248046875,
"logps/ref_rejected": -277.76324462890625,
"logps/rejected": -433.20782470703125,
"loss": 4.5699,
"margin_dpo/margin_mean": 50.5679817199707,
"margin_dpo/margin_std": 88.48072052001953,
"step": 467
},
{
"epoch": 0.9801047120418848,
"fcm_dpo/beta": 0.009550162591040134,
"fcm_dpo/delta": 0.045823030173778534,
"fcm_dpo/margin": 52.80379104614258,
"fcm_dpo/q_t": 0.3920201063156128,
"grad_norm": 90.87904357910156,
"learning_rate": 6.700405431837585e-10,
"logits/chosen": -0.8760251998901367,
"logits/rejected": -0.8512067198753357,
"logps/chosen": -418.8556213378906,
"logps/ref_chosen": -317.9631652832031,
"logps/ref_rejected": -261.8744201660156,
"logps/rejected": -415.5706787109375,
"loss": 4.3766,
"margin_dpo/margin_mean": 52.80379104614258,
"margin_dpo/margin_std": 82.05970001220703,
"step": 468
},
{
"epoch": 0.9821989528795811,
"fcm_dpo/beta": 0.009466158226132393,
"fcm_dpo/delta": -0.01515500620007515,
"fcm_dpo/margin": 64.6595458984375,
"fcm_dpo/q_t": 0.370385080575943,
"grad_norm": 82.3922348022461,
"learning_rate": 5.427789289685347e-10,
"logits/chosen": -0.8086352348327637,
"logits/rejected": -0.7982282042503357,
"logps/chosen": -420.8330993652344,
"logps/ref_chosen": -324.8868103027344,
"logps/ref_rejected": -264.0421447753906,
"logps/rejected": -424.64794921875,
"loss": 4.1333,
"margin_dpo/margin_mean": 64.6595458984375,
"margin_dpo/margin_std": 88.66175842285156,
"step": 469
},
{
"epoch": 0.9842931937172775,
"fcm_dpo/beta": 0.009759598411619663,
"fcm_dpo/delta": -0.009445477277040482,
"fcm_dpo/margin": 62.19755935668945,
"fcm_dpo/q_t": 0.3727426528930664,
"grad_norm": 74.95175170898438,
"learning_rate": 4.288949484559934e-10,
"logits/chosen": -0.8073372840881348,
"logits/rejected": -0.8078172206878662,
"logps/chosen": -407.6559753417969,
"logps/ref_chosen": -314.7042541503906,
"logps/ref_rejected": -259.2276611328125,
"logps/rejected": -414.376953125,
"loss": 4.1115,
"margin_dpo/margin_mean": 62.19756317138672,
"margin_dpo/margin_std": 83.67054748535156,
"step": 470
},
{
"epoch": 0.9863874345549738,
"fcm_dpo/beta": 0.009834382683038712,
"fcm_dpo/delta": 0.014730914495885372,
"fcm_dpo/margin": 54.49878692626953,
"fcm_dpo/q_t": 0.39111655950546265,
"grad_norm": 105.49163055419922,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": -0.8555752635002136,
"logits/rejected": -0.8461736440658569,
"logps/chosen": -399.32470703125,
"logps/ref_chosen": -292.5748291015625,
"logps/ref_rejected": -298.7525329589844,
"logps/rejected": -460.0011901855469,
"loss": 4.3592,
"margin_dpo/margin_mean": 54.49878692626953,
"margin_dpo/margin_std": 87.04114532470703,
"step": 471
},
{
"epoch": 0.9884816753926702,
"fcm_dpo/beta": 0.00952895823866129,
"fcm_dpo/delta": -0.003236265853047371,
"fcm_dpo/margin": 63.19277572631836,
"fcm_dpo/q_t": 0.37333759665489197,
"grad_norm": 75.30541229248047,
"learning_rate": 2.412835998185092e-10,
"logits/chosen": -0.8509103655815125,
"logits/rejected": -0.8665403127670288,
"logps/chosen": -336.0157470703125,
"logps/ref_chosen": -243.37380981445312,
"logps/ref_rejected": -251.12109375,
"logps/rejected": -406.9557800292969,
"loss": 4.0229,
"margin_dpo/margin_mean": 63.192779541015625,
"margin_dpo/margin_std": 79.94721984863281,
"step": 472
},
{
"epoch": 0.9905759162303664,
"fcm_dpo/beta": 0.009621812961995602,
"fcm_dpo/delta": -0.031623952090740204,
"fcm_dpo/margin": 65.44559478759766,
"fcm_dpo/q_t": 0.365360826253891,
"grad_norm": 80.94692993164062,
"learning_rate": 1.6756629272085544e-10,
"logits/chosen": -0.809826672077179,
"logits/rejected": -0.816728413105011,
"logps/chosen": -384.465087890625,
"logps/ref_chosen": -286.3286437988281,
"logps/ref_rejected": -258.6535339355469,
"logps/rejected": -422.235595703125,
"loss": 3.9983,
"margin_dpo/margin_mean": 65.44559478759766,
"margin_dpo/margin_std": 81.92974090576172,
"step": 473
},
{
"epoch": 0.9926701570680628,
"fcm_dpo/beta": 0.009346621111035347,
"fcm_dpo/delta": 0.05879068374633789,
"fcm_dpo/margin": 51.022865295410156,
"fcm_dpo/q_t": 0.39581844210624695,
"grad_norm": 107.87037658691406,
"learning_rate": 1.072467408408384e-10,
"logits/chosen": -0.8381420373916626,
"logits/rejected": -0.8414347171783447,
"logps/chosen": -392.3779296875,
"logps/ref_chosen": -288.08966064453125,
"logps/ref_rejected": -266.69696044921875,
"logps/rejected": -422.00811767578125,
"loss": 4.3549,
"margin_dpo/margin_mean": 51.022865295410156,
"margin_dpo/margin_std": 71.58537292480469,
"step": 474
},
{
"epoch": 0.9947643979057592,
"fcm_dpo/beta": 0.009538455866277218,
"fcm_dpo/delta": -0.0211745984852314,
"fcm_dpo/margin": 53.395355224609375,
"fcm_dpo/q_t": 0.3920353651046753,
"grad_norm": 98.68036651611328,
"learning_rate": 6.032817893297793e-11,
"logits/chosen": -0.8091562390327454,
"logits/rejected": -0.8317868113517761,
"logps/chosen": -350.73065185546875,
"logps/ref_chosen": -256.0030517578125,
"logps/ref_rejected": -244.50660705566406,
"logps/rejected": -392.6295471191406,
"loss": 4.3115,
"margin_dpo/margin_mean": 53.395362854003906,
"margin_dpo/margin_std": 77.75424194335938,
"step": 475
},
{
"epoch": 0.9968586387434555,
"fcm_dpo/beta": 0.009954025037586689,
"fcm_dpo/delta": 0.031187057495117188,
"fcm_dpo/margin": 56.84566116333008,
"fcm_dpo/q_t": 0.3844139575958252,
"grad_norm": 119.93623352050781,
"learning_rate": 2.6813123097352287e-11,
"logits/chosen": -0.8869379162788391,
"logits/rejected": -0.8474351167678833,
"logps/chosen": -413.58526611328125,
"logps/ref_chosen": -321.467529296875,
"logps/ref_rejected": -295.0592956542969,
"logps/rejected": -444.022705078125,
"loss": 4.314,
"margin_dpo/margin_mean": 56.84566116333008,
"margin_dpo/margin_std": 86.78395080566406,
"step": 476
},
{
"epoch": 0.9989528795811519,
"fcm_dpo/beta": 0.00990710686892271,
"fcm_dpo/delta": -0.03750115633010864,
"fcm_dpo/margin": 59.27772521972656,
"fcm_dpo/q_t": 0.3814779818058014,
"grad_norm": 128.50753784179688,
"learning_rate": 6.7033706447061635e-12,
"logits/chosen": -0.7780085206031799,
"logits/rejected": -0.7910189032554626,
"logps/chosen": -384.5294189453125,
"logps/ref_chosen": -276.7939758300781,
"logps/ref_rejected": -244.82919311523438,
"logps/rejected": -411.8423156738281,
"loss": 4.38,
"margin_dpo/margin_mean": 59.27772521972656,
"margin_dpo/margin_std": 96.08084106445312,
"step": 477
},
{
"epoch": 0.9989528795811519,
"step": 477,
"total_flos": 0.0,
"train_loss": 4.544049396954742,
"train_runtime": 6685.0038,
"train_samples_per_second": 9.145,
"train_steps_per_second": 0.071
}
],
"logging_steps": 1,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}