{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020942408376963353, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.02793481945991516, "fcm_dpo/q_t": 0.500069797039032, "grad_norm": 28.58863639831543, "learning_rate": 0.0, "logits/chosen": -0.5898098945617676, "logits/rejected": -0.604260265827179, "logps/chosen": -275.28570556640625, "logps/ref_chosen": -275.2312927246094, "logps/ref_rejected": -222.9380340576172, "logps/rejected": -222.96453857421875, "loss": 5.5463, "margin_dpo/margin_mean": -0.02793477475643158, "margin_dpo/margin_std": 0.5724214911460876, "step": 1 }, { "epoch": 0.004188481675392671, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.014312177896499634, "fcm_dpo/q_t": 0.4999642074108124, "grad_norm": 27.88129234313965, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -0.6574729681015015, "logits/rejected": -0.6464410424232483, "logps/chosen": -264.7165222167969, "logps/ref_chosen": -264.7611083984375, "logps/ref_rejected": -242.5597686767578, "logps/rejected": -242.52951049804688, "loss": 5.5446, "margin_dpo/margin_mean": 0.014312252402305603, "margin_dpo/margin_std": 0.6423971652984619, "step": 2 }, { "epoch": 0.0062827225130890054, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.0522288978099823, "fcm_dpo/q_t": 0.4998694360256195, "grad_norm": 25.787551879882812, "learning_rate": 2.083333333333333e-08, "logits/chosen": -0.6840452551841736, "logits/rejected": -0.7351922392845154, "logps/chosen": -274.11102294921875, "logps/ref_chosen": -274.1018981933594, "logps/ref_rejected": -286.5882568359375, "logps/rejected": -286.6496276855469, "loss": 5.5431, "margin_dpo/margin_mean": 0.05222900211811066, "margin_dpo/margin_std": 0.6702825427055359, "step": 3 }, { "epoch": 0.008376963350785341, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.01975475251674652, "fcm_dpo/q_t": 0.4999505877494812, "grad_norm": 31.67710304260254, "learning_rate": 3.125e-08, "logits/chosen": -0.620403528213501, "logits/rejected": -0.6149281859397888, "logps/chosen": -329.92706298828125, "logps/ref_chosen": -329.8382568359375, "logps/ref_rejected": -303.2850646972656, "logps/rejected": -303.3935852050781, "loss": 5.5445, "margin_dpo/margin_mean": 0.019755080342292786, "margin_dpo/margin_std": 0.7474581003189087, "step": 4 }, { "epoch": 0.010471204188481676, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.020737484097480774, "fcm_dpo/q_t": 0.5000518560409546, "grad_norm": 29.562240600585938, "learning_rate": 4.166666666666666e-08, "logits/chosen": -0.5746553540229797, "logits/rejected": -0.5913240313529968, "logps/chosen": -301.7220153808594, "logps/ref_chosen": -301.7389221191406, "logps/ref_rejected": -274.7654724121094, "logps/rejected": -274.7278137207031, "loss": 5.5461, "margin_dpo/margin_mean": -0.020738065242767334, "margin_dpo/margin_std": 0.7413759231567383, "step": 5 }, { "epoch": 0.012565445026178011, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.042598187923431396, "fcm_dpo/q_t": 0.49989351630210876, "grad_norm": 28.17943572998047, "learning_rate": 5.208333333333333e-08, "logits/chosen": -0.6895941495895386, "logits/rejected": -0.6532205939292908, "logps/chosen": -285.6484069824219, "logps/ref_chosen": -285.6946716308594, "logps/ref_rejected": -245.8200225830078, "logps/rejected": -245.81639099121094, "loss": 5.5435, "margin_dpo/margin_mean": 0.042598843574523926, "margin_dpo/margin_std": 0.6217130422592163, "step": 6 }, { "epoch": 0.014659685863874346, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04013945162296295, "fcm_dpo/q_t": 0.49989965558052063, "grad_norm": 28.537418365478516, "learning_rate": 6.25e-08, "logits/chosen": -0.5685023069381714, "logits/rejected": -0.6004266142845154, "logps/chosen": -264.6379089355469, "logps/ref_chosen": -264.65545654296875, "logps/ref_rejected": -253.10305786132812, "logps/rejected": -253.1256866455078, "loss": 5.5436, "margin_dpo/margin_mean": 0.040140300989151, "margin_dpo/margin_std": 0.6211207509040833, "step": 7 }, { "epoch": 0.016753926701570682, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.021448686718940735, "fcm_dpo/q_t": 0.5000536441802979, "grad_norm": 30.7622127532959, "learning_rate": 7.291666666666667e-08, "logits/chosen": -0.6872634291648865, "logits/rejected": -0.6957005262374878, "logps/chosen": -354.21673583984375, "logps/ref_chosen": -354.1887512207031, "logps/ref_rejected": -282.9112243652344, "logps/rejected": -282.91778564453125, "loss": 5.5461, "margin_dpo/margin_mean": -0.0214470773935318, "margin_dpo/margin_std": 0.7891030311584473, "step": 8 }, { "epoch": 0.018848167539267015, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.0005417168140411377, "fcm_dpo/q_t": 0.49999865889549255, "grad_norm": 27.870540618896484, "learning_rate": 8.333333333333333e-08, "logits/chosen": -0.637888491153717, "logits/rejected": -0.6561119556427002, "logps/chosen": -285.5398254394531, "logps/ref_chosen": -285.5502014160156, "logps/ref_rejected": -267.99664306640625, "logps/rejected": -267.98681640625, "loss": 5.5452, "margin_dpo/margin_mean": 0.0005417615175247192, "margin_dpo/margin_std": 0.6964117884635925, "step": 9 }, { "epoch": 0.020942408376963352, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07241007685661316, "fcm_dpo/q_t": 0.49981898069381714, "grad_norm": 26.481124877929688, "learning_rate": 9.375e-08, "logits/chosen": -0.6758443117141724, "logits/rejected": -0.6697291135787964, "logps/chosen": -251.83383178710938, "logps/ref_chosen": -251.91238403320312, "logps/ref_rejected": -226.45260620117188, "logps/rejected": -226.44647216796875, "loss": 5.5423, "margin_dpo/margin_mean": 0.07241000235080719, "margin_dpo/margin_std": 0.6581631898880005, "step": 10 }, { "epoch": 0.023036649214659685, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04671028256416321, "fcm_dpo/q_t": 0.499883234500885, "grad_norm": 28.99079704284668, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -0.5921290516853333, "logits/rejected": -0.6467868685722351, "logps/chosen": -300.9755554199219, "logps/ref_chosen": -301.08343505859375, "logps/ref_rejected": -259.546630859375, "logps/rejected": -259.48541259765625, "loss": 5.5434, "margin_dpo/margin_mean": 0.04670977592468262, "margin_dpo/margin_std": 0.6685330867767334, "step": 11 }, { "epoch": 0.025130890052356022, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08367666602134705, "fcm_dpo/q_t": 0.49979081749916077, "grad_norm": 30.09294891357422, "learning_rate": 1.1458333333333332e-07, "logits/chosen": -0.5769016742706299, "logits/rejected": -0.536662220954895, "logps/chosen": -287.5732727050781, "logps/ref_chosen": -287.548095703125, "logps/ref_rejected": -277.37945556640625, "logps/rejected": -277.48828125, "loss": 5.5419, "margin_dpo/margin_mean": 0.08367684483528137, "margin_dpo/margin_std": 0.8274821043014526, "step": 12 }, { "epoch": 0.027225130890052355, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07414671778678894, "fcm_dpo/q_t": 0.4998146593570709, "grad_norm": 27.353668212890625, "learning_rate": 1.25e-07, "logits/chosen": -0.656341016292572, "logits/rejected": -0.663852870464325, "logps/chosen": -270.6452331542969, "logps/ref_chosen": -270.6664123535156, "logps/ref_rejected": -274.6546936035156, "logps/rejected": -274.7076721191406, "loss": 5.5423, "margin_dpo/margin_mean": 0.07414683699607849, "margin_dpo/margin_std": 0.7050091028213501, "step": 13 }, { "epoch": 0.02931937172774869, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.02401043474674225, "fcm_dpo/q_t": 0.4999399483203888, "grad_norm": 28.236251831054688, "learning_rate": 1.3541666666666666e-07, "logits/chosen": -0.6250549554824829, "logits/rejected": -0.6541483402252197, "logps/chosen": -281.5421142578125, "logps/ref_chosen": -281.59320068359375, "logps/ref_rejected": -263.52215576171875, "logps/rejected": -263.49505615234375, "loss": 5.5443, "margin_dpo/margin_mean": 0.024009257555007935, "margin_dpo/margin_std": 0.7082223296165466, "step": 14 }, { "epoch": 0.031413612565445025, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.11133088171482086, "fcm_dpo/q_t": 0.4997216761112213, "grad_norm": 30.394655227661133, "learning_rate": 1.4583333333333335e-07, "logits/chosen": -0.6390557289123535, "logits/rejected": -0.6513383388519287, "logps/chosen": -298.3955383300781, "logps/ref_chosen": -298.45343017578125, "logps/ref_rejected": -227.17832946777344, "logps/rejected": -227.23179626464844, "loss": 5.5408, "margin_dpo/margin_mean": 0.11132954061031342, "margin_dpo/margin_std": 0.7194129228591919, "step": 15 }, { "epoch": 0.033507853403141365, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.12089978158473969, "fcm_dpo/q_t": 0.4996977746486664, "grad_norm": 30.162830352783203, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -0.6070325970649719, "logits/rejected": -0.6046398878097534, "logps/chosen": -293.8941345214844, "logps/ref_chosen": -293.96661376953125, "logps/ref_rejected": -250.78443908691406, "logps/rejected": -250.83285522460938, "loss": 5.5404, "margin_dpo/margin_mean": 0.1208992600440979, "margin_dpo/margin_std": 0.7001491785049438, "step": 16 }, { "epoch": 0.0356020942408377, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.03587338328361511, "fcm_dpo/q_t": 0.4999103248119354, "grad_norm": 27.698881149291992, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -0.5651789307594299, "logits/rejected": -0.5910645723342896, "logps/chosen": -262.3535461425781, "logps/ref_chosen": -262.39398193359375, "logps/ref_rejected": -248.500244140625, "logps/rejected": -248.4956512451172, "loss": 5.5438, "margin_dpo/margin_mean": 0.03587399423122406, "margin_dpo/margin_std": 0.6019639372825623, "step": 17 }, { "epoch": 0.03769633507853403, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09658396244049072, "fcm_dpo/q_t": 0.4997585415840149, "grad_norm": 29.77211570739746, "learning_rate": 1.7708333333333334e-07, "logits/chosen": -0.6159874200820923, "logits/rejected": -0.622156023979187, "logps/chosen": -293.6891174316406, "logps/ref_chosen": -293.709228515625, "logps/ref_rejected": -274.5875244140625, "logps/rejected": -274.6639709472656, "loss": 5.5414, "margin_dpo/margin_mean": 0.0965852439403534, "margin_dpo/margin_std": 0.6984401941299438, "step": 18 }, { "epoch": 0.039790575916230364, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08792537450790405, "fcm_dpo/q_t": 0.49978017807006836, "grad_norm": 28.1430606842041, "learning_rate": 1.875e-07, "logits/chosen": -0.6285202503204346, "logits/rejected": -0.625032901763916, "logps/chosen": -280.1614074707031, "logps/ref_chosen": -280.26568603515625, "logps/ref_rejected": -259.9742736816406, "logps/rejected": -259.9579162597656, "loss": 5.5417, "margin_dpo/margin_mean": 0.08792558312416077, "margin_dpo/margin_std": 0.6590030789375305, "step": 19 }, { "epoch": 0.041884816753926704, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1195082813501358, "fcm_dpo/q_t": 0.49970126152038574, "grad_norm": 29.686382293701172, "learning_rate": 1.9791666666666664e-07, "logits/chosen": -0.6156403422355652, "logits/rejected": -0.6466647386550903, "logps/chosen": -303.7765808105469, "logps/ref_chosen": -303.8954162597656, "logps/ref_rejected": -260.214599609375, "logps/rejected": -260.21527099609375, "loss": 5.5404, "margin_dpo/margin_mean": 0.11950752139091492, "margin_dpo/margin_std": 0.6185337901115417, "step": 20 }, { "epoch": 0.04397905759162304, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.11209306120872498, "fcm_dpo/q_t": 0.4997197389602661, "grad_norm": 35.081111907958984, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -0.6411827206611633, "logits/rejected": -0.6685382723808289, "logps/chosen": -301.36767578125, "logps/ref_chosen": -301.5334777832031, "logps/ref_rejected": -280.28900146484375, "logps/rejected": -280.2352600097656, "loss": 5.5408, "margin_dpo/margin_mean": 0.11209359765052795, "margin_dpo/margin_std": 0.7848556637763977, "step": 21 }, { "epoch": 0.04607329842931937, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.01605142652988434, "fcm_dpo/q_t": 0.5000401735305786, "grad_norm": 25.28415870666504, "learning_rate": 2.1875e-07, "logits/chosen": -0.666114330291748, "logits/rejected": -0.668174684047699, "logps/chosen": -259.9440612792969, "logps/ref_chosen": -259.9951477050781, "logps/ref_rejected": -243.0721435546875, "logps/rejected": -243.00502014160156, "loss": 5.5459, "margin_dpo/margin_mean": -0.016050517559051514, "margin_dpo/margin_std": 0.7094568610191345, "step": 22 }, { "epoch": 0.048167539267015703, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08404606580734253, "fcm_dpo/q_t": 0.49978986382484436, "grad_norm": 27.975460052490234, "learning_rate": 2.2916666666666663e-07, "logits/chosen": -0.6258850693702698, "logits/rejected": -0.6604623794555664, "logps/chosen": -282.1323547363281, "logps/ref_chosen": -282.1807556152344, "logps/ref_rejected": -265.0758056640625, "logps/rejected": -265.1114501953125, "loss": 5.5419, "margin_dpo/margin_mean": 0.08404561877250671, "margin_dpo/margin_std": 0.7268498539924622, "step": 23 }, { "epoch": 0.050261780104712044, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2948577404022217, "fcm_dpo/q_t": 0.49926286935806274, "grad_norm": 29.78768539428711, "learning_rate": 2.3958333333333335e-07, "logits/chosen": -0.6449757218360901, "logits/rejected": -0.557062029838562, "logps/chosen": -300.929443359375, "logps/ref_chosen": -301.17962646484375, "logps/ref_rejected": -302.12786865234375, "logps/rejected": -302.1725158691406, "loss": 5.5335, "margin_dpo/margin_mean": 0.2948572337627411, "margin_dpo/margin_std": 0.7735106348991394, "step": 24 }, { "epoch": 0.05235602094240838, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.10211683809757233, "fcm_dpo/q_t": 0.499744713306427, "grad_norm": 26.4748592376709, "learning_rate": 2.5e-07, "logits/chosen": -0.6071560382843018, "logits/rejected": -0.6157788634300232, "logps/chosen": -246.59918212890625, "logps/ref_chosen": -246.74649047851562, "logps/ref_rejected": -235.55638122558594, "logps/rejected": -235.51116943359375, "loss": 5.5412, "margin_dpo/margin_mean": 0.10211563110351562, "margin_dpo/margin_std": 0.7187904119491577, "step": 25 }, { "epoch": 0.05445026178010471, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1786605715751648, "fcm_dpo/q_t": 0.4995533227920532, "grad_norm": 28.6179256439209, "learning_rate": 2.604166666666667e-07, "logits/chosen": -0.6561638116836548, "logits/rejected": -0.6707935333251953, "logps/chosen": -281.9454345703125, "logps/ref_chosen": -282.1955871582031, "logps/ref_rejected": -235.3135528564453, "logps/rejected": -235.2420654296875, "loss": 5.5381, "margin_dpo/margin_mean": 0.17866107821464539, "margin_dpo/margin_std": 0.7850726842880249, "step": 26 }, { "epoch": 0.05654450261780105, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.11002352833747864, "fcm_dpo/q_t": 0.49972498416900635, "grad_norm": 27.748746871948242, "learning_rate": 2.708333333333333e-07, "logits/chosen": -0.6526930332183838, "logits/rejected": -0.6720865368843079, "logps/chosen": -323.60198974609375, "logps/ref_chosen": -323.8563537597656, "logps/ref_rejected": -245.968017578125, "logps/rejected": -245.82369995117188, "loss": 5.5409, "margin_dpo/margin_mean": 0.11002381145954132, "margin_dpo/margin_std": 0.8846166133880615, "step": 27 }, { "epoch": 0.05863874345549738, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.15542466938495636, "fcm_dpo/q_t": 0.4996114671230316, "grad_norm": 26.329299926757812, "learning_rate": 2.8125e-07, "logits/chosen": -0.614606499671936, "logits/rejected": -0.6235878467559814, "logps/chosen": -248.0102081298828, "logps/ref_chosen": -248.24673461914062, "logps/ref_rejected": -240.0382080078125, "logps/rejected": -239.9570770263672, "loss": 5.539, "margin_dpo/margin_mean": 0.15542495250701904, "margin_dpo/margin_std": 0.8629389405250549, "step": 28 }, { "epoch": 0.060732984293193716, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.20112989842891693, "fcm_dpo/q_t": 0.4994971752166748, "grad_norm": 29.66515350341797, "learning_rate": 2.916666666666667e-07, "logits/chosen": -0.5941799879074097, "logits/rejected": -0.6167061924934387, "logps/chosen": -317.9654541015625, "logps/ref_chosen": -318.2564392089844, "logps/ref_rejected": -286.75848388671875, "logps/rejected": -286.6686706542969, "loss": 5.5372, "margin_dpo/margin_mean": 0.20112943649291992, "margin_dpo/margin_std": 0.8353064060211182, "step": 29 }, { "epoch": 0.06282722513089005, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.31348854303359985, "fcm_dpo/q_t": 0.49921631813049316, "grad_norm": 28.987579345703125, "learning_rate": 3.020833333333333e-07, "logits/chosen": -0.5948168635368347, "logits/rejected": -0.6118742823600769, "logps/chosen": -252.6605987548828, "logps/ref_chosen": -253.0491485595703, "logps/ref_rejected": -261.30029296875, "logps/rejected": -261.2252502441406, "loss": 5.5328, "margin_dpo/margin_mean": 0.3134886920452118, "margin_dpo/margin_std": 0.956783652305603, "step": 30 }, { "epoch": 0.06492146596858639, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.20913538336753845, "fcm_dpo/q_t": 0.4994771480560303, "grad_norm": 25.035062789916992, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -0.6614914536476135, "logits/rejected": -0.6970552206039429, "logps/chosen": -247.77171325683594, "logps/ref_chosen": -248.15301513671875, "logps/ref_rejected": -203.17703247070312, "logps/rejected": -203.0048828125, "loss": 5.5369, "margin_dpo/margin_mean": 0.20913533866405487, "margin_dpo/margin_std": 1.0212087631225586, "step": 31 }, { "epoch": 0.06701570680628273, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.37815043330192566, "fcm_dpo/q_t": 0.4990546405315399, "grad_norm": 29.706012725830078, "learning_rate": 3.2291666666666666e-07, "logits/chosen": -0.6007161140441895, "logits/rejected": -0.6042397022247314, "logps/chosen": -304.9756774902344, "logps/ref_chosen": -305.5399475097656, "logps/ref_rejected": -267.6527099609375, "logps/rejected": -267.46661376953125, "loss": 5.5302, "margin_dpo/margin_mean": 0.3781506419181824, "margin_dpo/margin_std": 1.0059348344802856, "step": 32 }, { "epoch": 0.06910994764397906, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.351329505443573, "fcm_dpo/q_t": 0.49912169575691223, "grad_norm": 28.26189422607422, "learning_rate": 3.333333333333333e-07, "logits/chosen": -0.6345657706260681, "logits/rejected": -0.646354615688324, "logps/chosen": -285.6530456542969, "logps/ref_chosen": -286.2335205078125, "logps/ref_rejected": -255.38748168945312, "logps/rejected": -255.1583251953125, "loss": 5.5313, "margin_dpo/margin_mean": 0.351329505443573, "margin_dpo/margin_std": 1.1539372205734253, "step": 33 }, { "epoch": 0.0712041884816754, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7785255908966064, "fcm_dpo/q_t": 0.49805375933647156, "grad_norm": 31.09929847717285, "learning_rate": 3.4375e-07, "logits/chosen": -0.6342900395393372, "logits/rejected": -0.6447117924690247, "logps/chosen": -340.80950927734375, "logps/ref_chosen": -341.5920104980469, "logps/ref_rejected": -278.8866882324219, "logps/rejected": -278.8827209472656, "loss": 5.5142, "margin_dpo/margin_mean": 0.7785259485244751, "margin_dpo/margin_std": 1.1828100681304932, "step": 34 }, { "epoch": 0.07329842931937172, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4459952712059021, "fcm_dpo/q_t": 0.49888503551483154, "grad_norm": 26.795753479003906, "learning_rate": 3.541666666666667e-07, "logits/chosen": -0.6334152221679688, "logits/rejected": -0.6538709402084351, "logps/chosen": -264.3992919921875, "logps/ref_chosen": -265.0795593261719, "logps/ref_rejected": -264.4876708984375, "logps/rejected": -264.2533874511719, "loss": 5.5275, "margin_dpo/margin_mean": 0.44599461555480957, "margin_dpo/margin_std": 1.2866384983062744, "step": 35 }, { "epoch": 0.07539267015706806, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6205528378486633, "fcm_dpo/q_t": 0.4984487295150757, "grad_norm": 32.203128814697266, "learning_rate": 3.645833333333333e-07, "logits/chosen": -0.5942565202713013, "logits/rejected": -0.611389696598053, "logps/chosen": -296.5020446777344, "logps/ref_chosen": -297.3261413574219, "logps/ref_rejected": -282.09515380859375, "logps/rejected": -281.8916320800781, "loss": 5.5206, "margin_dpo/margin_mean": 0.6205521821975708, "margin_dpo/margin_std": 1.4419972896575928, "step": 36 }, { "epoch": 0.0774869109947644, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5163920521736145, "fcm_dpo/q_t": 0.498709112405777, "grad_norm": 31.128677368164062, "learning_rate": 3.75e-07, "logits/chosen": -0.5990525484085083, "logits/rejected": -0.6132475137710571, "logps/chosen": -313.2706298828125, "logps/ref_chosen": -314.0340270996094, "logps/ref_rejected": -299.3437805175781, "logps/rejected": -299.0968017578125, "loss": 5.5248, "margin_dpo/margin_mean": 0.5163909196853638, "margin_dpo/margin_std": 1.5658156871795654, "step": 37 }, { "epoch": 0.07958115183246073, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7946432828903198, "fcm_dpo/q_t": 0.49801355600357056, "grad_norm": 28.43010139465332, "learning_rate": 3.8541666666666665e-07, "logits/chosen": -0.6444147229194641, "logits/rejected": -0.6569415330886841, "logps/chosen": -281.4555969238281, "logps/ref_chosen": -282.54119873046875, "logps/ref_rejected": -269.7773132324219, "logps/rejected": -269.486328125, "loss": 5.5137, "margin_dpo/margin_mean": 0.7946435213088989, "margin_dpo/margin_std": 1.5317476987838745, "step": 38 }, { "epoch": 0.08167539267015707, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2457653284072876, "fcm_dpo/q_t": 0.49688607454299927, "grad_norm": 29.478654861450195, "learning_rate": 3.958333333333333e-07, "logits/chosen": -0.620808482170105, "logits/rejected": -0.6350722312927246, "logps/chosen": -275.4786376953125, "logps/ref_chosen": -276.7729187011719, "logps/ref_rejected": -249.95889282226562, "logps/rejected": -249.91033935546875, "loss": 5.4959, "margin_dpo/margin_mean": 1.2457654476165771, "margin_dpo/margin_std": 1.870965838432312, "step": 39 }, { "epoch": 0.08376963350785341, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7840527296066284, "fcm_dpo/q_t": 0.49804022908210754, "grad_norm": 27.324800491333008, "learning_rate": 4.0625e-07, "logits/chosen": -0.6211684346199036, "logits/rejected": -0.6565241813659668, "logps/chosen": -283.1966857910156, "logps/ref_chosen": -284.30706787109375, "logps/ref_rejected": -244.4459991455078, "logps/rejected": -244.11964416503906, "loss": 5.5143, "margin_dpo/margin_mean": 0.7840531468391418, "margin_dpo/margin_std": 1.919097900390625, "step": 40 }, { "epoch": 0.08586387434554973, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7700332403182983, "fcm_dpo/q_t": 0.49807509779930115, "grad_norm": 30.37227439880371, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -0.6269267201423645, "logits/rejected": -0.6523310542106628, "logps/chosen": -292.72161865234375, "logps/ref_chosen": -293.8151550292969, "logps/ref_rejected": -252.16815185546875, "logps/rejected": -251.84463500976562, "loss": 5.5147, "margin_dpo/margin_mean": 0.7700337171554565, "margin_dpo/margin_std": 1.7210803031921387, "step": 41 }, { "epoch": 0.08795811518324607, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9412699341773987, "fcm_dpo/q_t": 0.49764710664749146, "grad_norm": 27.493986129760742, "learning_rate": 4.270833333333333e-07, "logits/chosen": -0.6289184093475342, "logits/rejected": -0.6464809775352478, "logps/chosen": -251.489013671875, "logps/ref_chosen": -252.76023864746094, "logps/ref_rejected": -261.0414733886719, "logps/rejected": -260.71148681640625, "loss": 5.5082, "margin_dpo/margin_mean": 0.9412699937820435, "margin_dpo/margin_std": 2.1883938312530518, "step": 42 }, { "epoch": 0.09005235602094241, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1826536655426025, "fcm_dpo/q_t": 0.49704375863075256, "grad_norm": 29.944631576538086, "learning_rate": 4.375e-07, "logits/chosen": -0.5914728045463562, "logits/rejected": -0.6065964102745056, "logps/chosen": -315.5735168457031, "logps/ref_chosen": -316.8347473144531, "logps/ref_rejected": -273.7649230957031, "logps/rejected": -273.6863708496094, "loss": 5.4986, "margin_dpo/margin_mean": 1.182654857635498, "margin_dpo/margin_std": 2.2558624744415283, "step": 43 }, { "epoch": 0.09214659685863874, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.6461647748947144, "fcm_dpo/q_t": 0.49588537216186523, "grad_norm": 30.859792709350586, "learning_rate": 4.479166666666667e-07, "logits/chosen": -0.5914509296417236, "logits/rejected": -0.5897051692008972, "logps/chosen": -285.2916259765625, "logps/ref_chosen": -286.8757019042969, "logps/ref_rejected": -282.4681396484375, "logps/rejected": -282.5302734375, "loss": 5.4806, "margin_dpo/margin_mean": 1.646165370941162, "margin_dpo/margin_std": 3.07133150100708, "step": 44 }, { "epoch": 0.09424083769633508, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.3455177545547485, "fcm_dpo/q_t": 0.496637225151062, "grad_norm": 29.004980087280273, "learning_rate": 4.5833333333333327e-07, "logits/chosen": -0.6958540678024292, "logits/rejected": -0.7206953763961792, "logps/chosen": -322.5387268066406, "logps/ref_chosen": -324.2633972167969, "logps/ref_rejected": -293.09466552734375, "logps/rejected": -292.71551513671875, "loss": 5.4923, "margin_dpo/margin_mean": 1.345517635345459, "margin_dpo/margin_std": 2.75714111328125, "step": 45 }, { "epoch": 0.09633507853403141, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.6059635877609253, "fcm_dpo/q_t": 0.4959862530231476, "grad_norm": 30.163619995117188, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -0.6251657605171204, "logits/rejected": -0.6386570930480957, "logps/chosen": -296.61322021484375, "logps/ref_chosen": -298.3357238769531, "logps/ref_rejected": -267.66204833984375, "logps/rejected": -267.5455017089844, "loss": 5.482, "margin_dpo/margin_mean": 1.6059637069702148, "margin_dpo/margin_std": 2.6878585815429688, "step": 46 }, { "epoch": 0.09842931937172775, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.0325478315353394, "fcm_dpo/q_t": 0.4974192678928375, "grad_norm": 26.348020553588867, "learning_rate": 4.791666666666667e-07, "logits/chosen": -0.6017532348632812, "logits/rejected": -0.6222880482673645, "logps/chosen": -261.1288146972656, "logps/ref_chosen": -262.5669250488281, "logps/ref_rejected": -258.70989990234375, "logps/rejected": -258.3043212890625, "loss": 5.5052, "margin_dpo/margin_mean": 1.0325474739074707, "margin_dpo/margin_std": 3.2608814239501953, "step": 47 }, { "epoch": 0.10052356020942409, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5231389999389648, "fcm_dpo/q_t": 0.4961942434310913, "grad_norm": 27.5208797454834, "learning_rate": 4.895833333333333e-07, "logits/chosen": -0.6104145646095276, "logits/rejected": -0.6352874636650085, "logps/chosen": -267.69293212890625, "logps/ref_chosen": -269.4932556152344, "logps/ref_rejected": -241.888916015625, "logps/rejected": -241.6117401123047, "loss": 5.4855, "margin_dpo/margin_mean": 1.5231391191482544, "margin_dpo/margin_std": 2.9792253971099854, "step": 48 }, { "epoch": 0.10261780104712041, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.7325966358184814, "fcm_dpo/q_t": 0.4956699013710022, "grad_norm": 27.652172088623047, "learning_rate": 5e-07, "logits/chosen": -0.6746569871902466, "logits/rejected": -0.658962607383728, "logps/chosen": -255.67564392089844, "logps/ref_chosen": -257.8844909667969, "logps/ref_rejected": -256.8912048339844, "logps/rejected": -256.4149475097656, "loss": 5.4774, "margin_dpo/margin_mean": 1.7325971126556396, "margin_dpo/margin_std": 3.360792398452759, "step": 49 }, { "epoch": 0.10471204188481675, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.238340973854065, "fcm_dpo/q_t": 0.4969059228897095, "grad_norm": 27.846542358398438, "learning_rate": 4.999932966293553e-07, "logits/chosen": -0.6347872018814087, "logits/rejected": -0.6571816205978394, "logps/chosen": -299.55889892578125, "logps/ref_chosen": -301.62884521484375, "logps/ref_rejected": -298.2716064453125, "logps/rejected": -297.44000244140625, "loss": 5.4975, "margin_dpo/margin_mean": 1.2383403778076172, "margin_dpo/margin_std": 3.8234386444091797, "step": 50 }, { "epoch": 0.1068062827225131, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.7760026454925537, "fcm_dpo/q_t": 0.4955626130104065, "grad_norm": 29.151445388793945, "learning_rate": 4.999731868769026e-07, "logits/chosen": -0.6137688755989075, "logits/rejected": -0.6077885031700134, "logps/chosen": -267.3522644042969, "logps/ref_chosen": -269.37237548828125, "logps/ref_rejected": -297.0167541503906, "logps/rejected": -296.7726135253906, "loss": 5.4768, "margin_dpo/margin_mean": 1.7760027647018433, "margin_dpo/margin_std": 4.458441734313965, "step": 51 }, { "epoch": 0.10890052356020942, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.1212289333343506, "fcm_dpo/q_t": 0.49220389127731323, "grad_norm": 30.20243263244629, "learning_rate": 4.99939671821067e-07, "logits/chosen": -0.6574031114578247, "logits/rejected": -0.6640452146530151, "logps/chosen": -304.0586242675781, "logps/ref_chosen": -306.9028015136719, "logps/ref_rejected": -281.24737548828125, "logps/rejected": -281.5244140625, "loss": 5.4238, "margin_dpo/margin_mean": 3.1212282180786133, "margin_dpo/margin_std": 4.759432315826416, "step": 52 }, { "epoch": 0.11099476439790576, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.1728179454803467, "fcm_dpo/q_t": 0.49457210302352905, "grad_norm": 30.007936477661133, "learning_rate": 4.998927532591591e-07, "logits/chosen": -0.657882034778595, "logits/rejected": -0.6984093189239502, "logps/chosen": -283.18218994140625, "logps/ref_chosen": -285.9759521484375, "logps/ref_rejected": -273.9073486328125, "logps/rejected": -273.2864074707031, "loss": 5.4617, "margin_dpo/margin_mean": 2.172816753387451, "margin_dpo/margin_std": 4.996975898742676, "step": 53 }, { "epoch": 0.1130890052356021, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.9492709636688232, "fcm_dpo/q_t": 0.495132178068161, "grad_norm": 26.357450485229492, "learning_rate": 4.998324337072792e-07, "logits/chosen": -0.6814525127410889, "logits/rejected": -0.6885929703712463, "logps/chosen": -303.84771728515625, "logps/ref_chosen": -306.504638671875, "logps/ref_rejected": -272.67431640625, "logps/rejected": -271.9667053222656, "loss": 5.4709, "margin_dpo/margin_mean": 1.9492708444595337, "margin_dpo/margin_std": 5.48816442489624, "step": 54 }, { "epoch": 0.11518324607329843, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.3219242095947266, "fcm_dpo/q_t": 0.49420011043548584, "grad_norm": 24.795543670654297, "learning_rate": 4.997587164001815e-07, "logits/chosen": -0.6326627135276794, "logits/rejected": -0.6362449526786804, "logps/chosen": -220.49732971191406, "logps/ref_chosen": -222.33013916015625, "logps/ref_rejected": -206.59571838378906, "logps/rejected": -207.08482360839844, "loss": 5.4553, "margin_dpo/margin_mean": 2.321924924850464, "margin_dpo/margin_std": 4.869386196136475, "step": 55 }, { "epoch": 0.11727748691099477, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.1078054904937744, "fcm_dpo/q_t": 0.4922420382499695, "grad_norm": 27.52865219116211, "learning_rate": 4.996716052911017e-07, "logits/chosen": -0.6062515377998352, "logits/rejected": -0.6205031871795654, "logps/chosen": -247.61697387695312, "logps/ref_chosen": -250.47816467285156, "logps/ref_rejected": -228.25848388671875, "logps/rejected": -228.50511169433594, "loss": 5.4261, "margin_dpo/margin_mean": 3.1078062057495117, "margin_dpo/margin_std": 5.900341033935547, "step": 56 }, { "epoch": 0.1193717277486911, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.170928955078125, "fcm_dpo/q_t": 0.4895845651626587, "grad_norm": 30.931716918945312, "learning_rate": 4.99571105051544e-07, "logits/chosen": -0.6947147846221924, "logits/rejected": -0.6651787161827087, "logps/chosen": -311.2119140625, "logps/ref_chosen": -315.1195373535156, "logps/ref_rejected": -272.755615234375, "logps/rejected": -273.0189208984375, "loss": 5.3835, "margin_dpo/margin_mean": 4.170928001403809, "margin_dpo/margin_std": 5.598484039306641, "step": 57 }, { "epoch": 0.12146596858638743, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.4037206172943115, "fcm_dpo/q_t": 0.4939940869808197, "grad_norm": 27.60306167602539, "learning_rate": 4.994572210710314e-07, "logits/chosen": -0.6179156303405762, "logits/rejected": -0.6422260403633118, "logps/chosen": -262.6658020019531, "logps/ref_chosen": -265.1816711425781, "logps/ref_rejected": -268.2203369140625, "logps/rejected": -268.108154296875, "loss": 5.4532, "margin_dpo/margin_mean": 2.4037222862243652, "margin_dpo/margin_std": 5.711413860321045, "step": 58 }, { "epoch": 0.12356020942408377, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.516843318939209, "fcm_dpo/q_t": 0.4962143301963806, "grad_norm": 29.968425750732422, "learning_rate": 4.993299594568162e-07, "logits/chosen": -0.6006144285202026, "logits/rejected": -0.5931594371795654, "logps/chosen": -284.288818359375, "logps/ref_chosen": -286.35394287109375, "logps/ref_rejected": -260.6757507324219, "logps/rejected": -260.12744140625, "loss": 5.4905, "margin_dpo/margin_mean": 1.5168440341949463, "margin_dpo/margin_std": 7.0178422927856445, "step": 59 }, { "epoch": 0.1256544502617801, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.354137897491455, "fcm_dpo/q_t": 0.49162524938583374, "grad_norm": 27.982912063598633, "learning_rate": 4.991893270335525e-07, "logits/chosen": -0.6835321187973022, "logits/rejected": -0.7084705829620361, "logps/chosen": -255.92019653320312, "logps/ref_chosen": -258.74859619140625, "logps/ref_rejected": -255.04893493652344, "logps/rejected": -255.5746612548828, "loss": 5.4176, "margin_dpo/margin_mean": 3.354137897491455, "margin_dpo/margin_std": 7.129515171051025, "step": 60 }, { "epoch": 0.12774869109947645, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.0934977531433105, "fcm_dpo/q_t": 0.49227961897850037, "grad_norm": 29.938720703125, "learning_rate": 4.990353313429303e-07, "logits/chosen": -0.649039089679718, "logits/rejected": -0.6660676598548889, "logps/chosen": -275.51678466796875, "logps/ref_chosen": -278.4678955078125, "logps/ref_rejected": -252.02720642089844, "logps/rejected": -252.1695556640625, "loss": 5.4291, "margin_dpo/margin_mean": 3.0934970378875732, "margin_dpo/margin_std": 7.676670551300049, "step": 61 }, { "epoch": 0.12984293193717278, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.486009120941162, "fcm_dpo/q_t": 0.49130553007125854, "grad_norm": 26.67005729675293, "learning_rate": 4.988679806432711e-07, "logits/chosen": -0.605567216873169, "logits/rejected": -0.6468653082847595, "logps/chosen": -268.9039306640625, "logps/ref_chosen": -272.92431640625, "logps/ref_rejected": -260.7935485839844, "logps/rejected": -260.2591552734375, "loss": 5.4128, "margin_dpo/margin_mean": 3.4860095977783203, "margin_dpo/margin_std": 7.451489448547363, "step": 62 }, { "epoch": 0.1319371727748691, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.4414150714874268, "fcm_dpo/q_t": 0.49141040444374084, "grad_norm": 28.199609756469727, "learning_rate": 4.986872839090852e-07, "logits/chosen": -0.6584441661834717, "logits/rejected": -0.6647105813026428, "logps/chosen": -273.7209167480469, "logps/ref_chosen": -277.0889892578125, "logps/ref_rejected": -273.3413391113281, "logps/rejected": -273.4146728515625, "loss": 5.4143, "margin_dpo/margin_mean": 3.4414143562316895, "margin_dpo/margin_std": 7.199389457702637, "step": 63 }, { "epoch": 0.13403141361256546, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.197411060333252, "fcm_dpo/q_t": 0.4895278811454773, "grad_norm": 28.395610809326172, "learning_rate": 4.9849325083059e-07, "logits/chosen": -0.6434694528579712, "logits/rejected": -0.6396706104278564, "logps/chosen": -279.68328857421875, "logps/ref_chosen": -283.8244934082031, "logps/ref_rejected": -263.29351806640625, "logps/rejected": -263.3497314453125, "loss": 5.3866, "margin_dpo/margin_mean": 4.1974101066589355, "margin_dpo/margin_std": 8.332403182983398, "step": 64 }, { "epoch": 0.13612565445026178, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.187953233718872, "fcm_dpo/q_t": 0.49204325675964355, "grad_norm": 27.825172424316406, "learning_rate": 4.982858918131906e-07, "logits/chosen": -0.7001113891601562, "logits/rejected": -0.6681698560714722, "logps/chosen": -261.5180969238281, "logps/ref_chosen": -264.8699645996094, "logps/ref_rejected": -268.5076904296875, "logps/rejected": -268.34381103515625, "loss": 5.4245, "margin_dpo/margin_mean": 3.1879520416259766, "margin_dpo/margin_std": 7.417712211608887, "step": 65 }, { "epoch": 0.1382198952879581, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.193648338317871, "fcm_dpo/q_t": 0.48955121636390686, "grad_norm": 27.884878158569336, "learning_rate": 4.980652179769217e-07, "logits/chosen": -0.6776885986328125, "logits/rejected": -0.6976199746131897, "logps/chosen": -269.7621765136719, "logps/ref_chosen": -272.9283142089844, "logps/ref_rejected": -280.94696044921875, "logps/rejected": -281.9744567871094, "loss": 5.3898, "margin_dpo/margin_mean": 4.193647861480713, "margin_dpo/margin_std": 10.121187210083008, "step": 66 }, { "epoch": 0.14031413612565444, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.643204689025879, "fcm_dpo/q_t": 0.4909108579158783, "grad_norm": 25.530357360839844, "learning_rate": 4.978312411558517e-07, "logits/chosen": -0.693301260471344, "logits/rejected": -0.7261889576911926, "logps/chosen": -262.1764221191406, "logps/ref_chosen": -266.18695068359375, "logps/ref_rejected": -250.17405700683594, "logps/rejected": -249.80673217773438, "loss": 5.409, "margin_dpo/margin_mean": 3.643204927444458, "margin_dpo/margin_std": 8.773540496826172, "step": 67 }, { "epoch": 0.1424083769633508, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.684436798095703, "fcm_dpo/q_t": 0.4883463978767395, "grad_norm": 28.18437385559082, "learning_rate": 4.975839738974473e-07, "logits/chosen": -0.6813502311706543, "logits/rejected": -0.694290280342102, "logps/chosen": -294.862060546875, "logps/ref_chosen": -297.9385986328125, "logps/ref_rejected": -261.5141296386719, "logps/rejected": -263.1220397949219, "loss": 5.3719, "margin_dpo/margin_mean": 4.684436321258545, "margin_dpo/margin_std": 10.52728271484375, "step": 68 }, { "epoch": 0.14450261780104712, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.02636194229126, "fcm_dpo/q_t": 0.48498111963272095, "grad_norm": 28.678905487060547, "learning_rate": 4.97323429461901e-07, "logits/chosen": -0.6936722993850708, "logits/rejected": -0.7256036400794983, "logps/chosen": -261.7213134765625, "logps/ref_chosen": -265.6175231933594, "logps/ref_rejected": -236.8287353515625, "logps/rejected": -238.95887756347656, "loss": 5.3175, "margin_dpo/margin_mean": 6.02636194229126, "margin_dpo/margin_std": 9.588497161865234, "step": 69 }, { "epoch": 0.14659685863874344, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.8976898193359375, "fcm_dpo/q_t": 0.4853127598762512, "grad_norm": 28.633691787719727, "learning_rate": 4.970496218214204e-07, "logits/chosen": -0.6690410375595093, "logits/rejected": -0.7014957666397095, "logps/chosen": -291.8380432128906, "logps/ref_chosen": -296.2259216308594, "logps/ref_rejected": -254.68496704101562, "logps/rejected": -256.1947326660156, "loss": 5.3258, "margin_dpo/margin_mean": 5.897688388824463, "margin_dpo/margin_std": 10.961847305297852, "step": 70 }, { "epoch": 0.1486910994764398, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.169013500213623, "fcm_dpo/q_t": 0.4871177673339844, "grad_norm": 28.43419647216797, "learning_rate": 4.967625656594781e-07, "logits/chosen": -0.6524871587753296, "logits/rejected": -0.6415574550628662, "logps/chosen": -283.871826171875, "logps/ref_chosen": -288.92724609375, "logps/ref_rejected": -278.6405334472656, "logps/rejected": -278.754150390625, "loss": 5.358, "margin_dpo/margin_mean": 5.169013500213623, "margin_dpo/margin_std": 12.44704532623291, "step": 71 }, { "epoch": 0.15078534031413612, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.125137805938721, "fcm_dpo/q_t": 0.48721808195114136, "grad_norm": 28.26305389404297, "learning_rate": 4.964622763700252e-07, "logits/chosen": -0.6996564269065857, "logits/rejected": -0.7112348079681396, "logps/chosen": -233.74087524414062, "logps/ref_chosen": -237.0452880859375, "logps/ref_rejected": -252.7946319580078, "logps/rejected": -254.6153564453125, "loss": 5.3553, "margin_dpo/margin_mean": 5.125139236450195, "margin_dpo/margin_std": 10.407864570617676, "step": 72 }, { "epoch": 0.15287958115183245, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.746781349182129, "fcm_dpo/q_t": 0.4881611168384552, "grad_norm": 27.88288688659668, "learning_rate": 4.961487700566646e-07, "logits/chosen": -0.6515335440635681, "logits/rejected": -0.6686420440673828, "logps/chosen": -268.7247314453125, "logps/ref_chosen": -273.0531005859375, "logps/ref_rejected": -246.8330841064453, "logps/rejected": -247.25152587890625, "loss": 5.374, "margin_dpo/margin_mean": 4.746780872344971, "margin_dpo/margin_std": 12.301660537719727, "step": 73 }, { "epoch": 0.1549738219895288, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.388714790344238, "fcm_dpo/q_t": 0.48907017707824707, "grad_norm": 30.373809814453125, "learning_rate": 4.958220635317885e-07, "logits/chosen": -0.7225594520568848, "logits/rejected": -0.700709342956543, "logps/chosen": -338.90679931640625, "logps/ref_chosen": -342.2818908691406, "logps/ref_rejected": -330.0293884277344, "logps/rejected": -331.0429382324219, "loss": 5.3862, "margin_dpo/margin_mean": 4.388715744018555, "margin_dpo/margin_std": 11.592477798461914, "step": 74 }, { "epoch": 0.15706806282722513, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.691708087921143, "fcm_dpo/q_t": 0.483328640460968, "grad_norm": 29.403121948242188, "learning_rate": 4.954821743156767e-07, "logits/chosen": -0.6306406259536743, "logits/rejected": -0.6319374442100525, "logps/chosen": -262.20196533203125, "logps/ref_chosen": -266.8641662597656, "logps/ref_rejected": -276.8699951171875, "logps/rejected": -278.8995361328125, "loss": 5.2955, "margin_dpo/margin_mean": 6.691707611083984, "margin_dpo/margin_std": 11.022812843322754, "step": 75 }, { "epoch": 0.15916230366492146, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.202816486358643, "fcm_dpo/q_t": 0.482099711894989, "grad_norm": 29.140716552734375, "learning_rate": 4.951291206355559e-07, "logits/chosen": -0.7213584780693054, "logits/rejected": -0.7292888760566711, "logps/chosen": -277.04583740234375, "logps/ref_chosen": -281.174560546875, "logps/ref_rejected": -263.6067199707031, "logps/rejected": -266.6807556152344, "loss": 5.2799, "margin_dpo/margin_mean": 7.202816486358643, "margin_dpo/margin_std": 12.760961532592773, "step": 76 }, { "epoch": 0.1612565445026178, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.7732439041137695, "fcm_dpo/q_t": 0.4856662452220917, "grad_norm": 33.32221603393555, "learning_rate": 4.947629214246236e-07, "logits/chosen": -0.56251460313797, "logits/rejected": -0.5714588165283203, "logps/chosen": -302.36383056640625, "logps/ref_chosen": -306.09527587890625, "logps/ref_rejected": -253.49569702148438, "logps/rejected": -255.5375213623047, "loss": 5.3388, "margin_dpo/margin_mean": 5.773242950439453, "margin_dpo/margin_std": 14.010757446289062, "step": 77 }, { "epoch": 0.16335078534031414, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.770861625671387, "fcm_dpo/q_t": 0.4782388508319855, "grad_norm": 29.759794235229492, "learning_rate": 4.943835963210323e-07, "logits/chosen": -0.6812779903411865, "logits/rejected": -0.6756828427314758, "logps/chosen": -253.0252685546875, "logps/ref_chosen": -256.90234375, "logps/ref_rejected": -211.57154846191406, "logps/rejected": -216.4653778076172, "loss": 5.2239, "margin_dpo/margin_mean": 8.77086067199707, "margin_dpo/margin_std": 14.511454582214355, "step": 78 }, { "epoch": 0.16544502617801046, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.177414894104004, "fcm_dpo/q_t": 0.4797084331512451, "grad_norm": 29.77664566040039, "learning_rate": 4.939911656668361e-07, "logits/chosen": -0.6469649076461792, "logits/rejected": -0.6670407652854919, "logps/chosen": -263.2286376953125, "logps/ref_chosen": -266.2735595703125, "logps/ref_rejected": -251.57257080078125, "logps/rejected": -256.705078125, "loss": 5.2488, "margin_dpo/margin_mean": 8.177413940429688, "margin_dpo/margin_std": 14.974796295166016, "step": 79 }, { "epoch": 0.16753926701570682, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.5798444747924805, "fcm_dpo/q_t": 0.4836696982383728, "grad_norm": 28.978797912597656, "learning_rate": 4.935856505068998e-07, "logits/chosen": -0.6657726168632507, "logits/rejected": -0.6958042979240417, "logps/chosen": -286.041259765625, "logps/ref_chosen": -287.8509826660156, "logps/ref_rejected": -256.0766296386719, "logps/rejected": -260.84674072265625, "loss": 5.3059, "margin_dpo/margin_mean": 6.579843997955322, "margin_dpo/margin_std": 13.124878883361816, "step": 80 }, { "epoch": 0.16963350785340314, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.183926105499268, "fcm_dpo/q_t": 0.4821889102458954, "grad_norm": 28.12186622619629, "learning_rate": 4.93167072587771e-07, "logits/chosen": -0.6388418674468994, "logits/rejected": -0.637535810470581, "logps/chosen": -266.1118469238281, "logps/ref_chosen": -268.5232238769531, "logps/ref_rejected": -237.81137084960938, "logps/rejected": -242.58392333984375, "loss": 5.2927, "margin_dpo/margin_mean": 7.1839280128479, "margin_dpo/margin_std": 16.76314926147461, "step": 81 }, { "epoch": 0.17172774869109947, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.849641799926758, "fcm_dpo/q_t": 0.4805837869644165, "grad_norm": 27.822717666625977, "learning_rate": 4.92735454356513e-07, "logits/chosen": -0.7268111705780029, "logits/rejected": -0.7341311573982239, "logps/chosen": -276.9981994628906, "logps/ref_chosen": -279.36395263671875, "logps/ref_rejected": -236.51365661621094, "logps/rejected": -241.99754333496094, "loss": 5.2655, "margin_dpo/margin_mean": 7.849642276763916, "margin_dpo/margin_std": 15.717304229736328, "step": 82 }, { "epoch": 0.17382198952879582, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.690813064575195, "fcm_dpo/q_t": 0.4784224033355713, "grad_norm": 30.746620178222656, "learning_rate": 4.922908189595017e-07, "logits/chosen": -0.6818782687187195, "logits/rejected": -0.6649395823478699, "logps/chosen": -273.88427734375, "logps/ref_chosen": -274.21923828125, "logps/ref_rejected": -276.2212219238281, "logps/rejected": -284.57708740234375, "loss": 5.2419, "margin_dpo/margin_mean": 8.690812110900879, "margin_dpo/margin_std": 18.241342544555664, "step": 83 }, { "epoch": 0.17591623036649215, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.327735424041748, "fcm_dpo/q_t": 0.48182323575019836, "grad_norm": 29.881669998168945, "learning_rate": 4.918331902411841e-07, "logits/chosen": -0.7337232232093811, "logits/rejected": -0.7480797171592712, "logps/chosen": -293.81634521484375, "logps/ref_chosen": -294.3975524902344, "logps/ref_rejected": -279.81884765625, "logps/rejected": -286.56536865234375, "loss": 5.2885, "margin_dpo/margin_mean": 7.327735900878906, "margin_dpo/margin_std": 16.83694839477539, "step": 84 }, { "epoch": 0.17801047120418848, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.688338279724121, "fcm_dpo/q_t": 0.48588407039642334, "grad_norm": 29.326702117919922, "learning_rate": 4.913625927427995e-07, "logits/chosen": -0.6618837118148804, "logits/rejected": -0.6701700687408447, "logps/chosen": -245.18934631347656, "logps/ref_chosen": -243.66220092773438, "logps/ref_rejected": -263.9421691894531, "logps/rejected": -271.15765380859375, "loss": 5.3478, "margin_dpo/margin_mean": 5.688338279724121, "margin_dpo/margin_std": 15.453804016113281, "step": 85 }, { "epoch": 0.18010471204188483, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.833646774291992, "fcm_dpo/q_t": 0.4780700206756592, "grad_norm": 34.98909378051758, "learning_rate": 4.908790517010636e-07, "logits/chosen": -0.6862183809280396, "logits/rejected": -0.6790927052497864, "logps/chosen": -308.2589111328125, "logps/ref_chosen": -309.4306945800781, "logps/ref_rejected": -290.91278076171875, "logps/rejected": -298.57464599609375, "loss": 5.2316, "margin_dpo/margin_mean": 8.833646774291992, "margin_dpo/margin_std": 17.44679832458496, "step": 86 }, { "epoch": 0.18219895287958116, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.396361351013184, "fcm_dpo/q_t": 0.4743334650993347, "grad_norm": 29.610158920288086, "learning_rate": 4.903825930468148e-07, "logits/chosen": -0.7608101963996887, "logits/rejected": -0.7554056644439697, "logps/chosen": -278.097412109375, "logps/ref_chosen": -278.0277099609375, "logps/ref_rejected": -245.70123291015625, "logps/rejected": -256.167236328125, "loss": 5.1792, "margin_dpo/margin_mean": 10.396361351013184, "margin_dpo/margin_std": 19.02577018737793, "step": 87 }, { "epoch": 0.18429319371727748, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.898334503173828, "fcm_dpo/q_t": 0.47800639271736145, "grad_norm": 28.828121185302734, "learning_rate": 4.898732434036243e-07, "logits/chosen": -0.7627401351928711, "logits/rejected": -0.7784541845321655, "logps/chosen": -268.6249694824219, "logps/ref_chosen": -266.5148010253906, "logps/ref_rejected": -265.90081787109375, "logps/rejected": -276.9093017578125, "loss": 5.2352, "margin_dpo/margin_mean": 8.898333549499512, "margin_dpo/margin_std": 19.069538116455078, "step": 88 }, { "epoch": 0.18638743455497384, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.238783836364746, "fcm_dpo/q_t": 0.4771183729171753, "grad_norm": 30.569440841674805, "learning_rate": 4.893510300863676e-07, "logits/chosen": -0.742774486541748, "logits/rejected": -0.733336329460144, "logps/chosen": -265.68560791015625, "logps/ref_chosen": -265.6893005371094, "logps/ref_rejected": -251.49314880371094, "logps/rejected": -260.728271484375, "loss": 5.2215, "margin_dpo/margin_mean": 9.238783836364746, "margin_dpo/margin_std": 18.268709182739258, "step": 89 }, { "epoch": 0.18848167539267016, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.208330154418945, "fcm_dpo/q_t": 0.47965091466903687, "grad_norm": 29.82548713684082, "learning_rate": 4.8881598109976e-07, "logits/chosen": -0.7374743819236755, "logits/rejected": -0.7458919286727905, "logps/chosen": -308.52386474609375, "logps/ref_chosen": -307.4250183105469, "logps/ref_rejected": -265.7172546386719, "logps/rejected": -275.0244140625, "loss": 5.2577, "margin_dpo/margin_mean": 8.208331108093262, "margin_dpo/margin_std": 17.711767196655273, "step": 90 }, { "epoch": 0.1905759162303665, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.566421508789062, "fcm_dpo/q_t": 0.4764086604118347, "grad_norm": 32.80574417114258, "learning_rate": 4.882681251368548e-07, "logits/chosen": -0.6797096729278564, "logits/rejected": -0.6969512104988098, "logps/chosen": -237.80616760253906, "logps/ref_chosen": -235.74098205566406, "logps/ref_rejected": -226.6428985595703, "logps/rejected": -238.27447509765625, "loss": 5.214, "margin_dpo/margin_mean": 9.566422462463379, "margin_dpo/margin_std": 19.243335723876953, "step": 91 }, { "epoch": 0.19267015706806281, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.109006881713867, "fcm_dpo/q_t": 0.47501635551452637, "grad_norm": 33.64514923095703, "learning_rate": 4.877074915775048e-07, "logits/chosen": -0.7398884892463684, "logits/rejected": -0.7234249711036682, "logps/chosen": -286.53607177734375, "logps/ref_chosen": -283.4475402832031, "logps/ref_rejected": -273.134033203125, "logps/rejected": -286.33154296875, "loss": 5.1988, "margin_dpo/margin_mean": 10.109006881713867, "margin_dpo/margin_std": 21.27271842956543, "step": 92 }, { "epoch": 0.19476439790575917, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.553420066833496, "fcm_dpo/q_t": 0.47643160820007324, "grad_norm": 28.544116973876953, "learning_rate": 4.871341104867864e-07, "logits/chosen": -0.7257264256477356, "logits/rejected": -0.7486892342567444, "logps/chosen": -235.73709106445312, "logps/ref_chosen": -233.33714294433594, "logps/ref_rejected": -230.54273986816406, "logps/rejected": -242.49607849121094, "loss": 5.2133, "margin_dpo/margin_mean": 9.553420066833496, "margin_dpo/margin_std": 19.524932861328125, "step": 93 }, { "epoch": 0.1968586387434555, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.401151657104492, "fcm_dpo/q_t": 0.4767385721206665, "grad_norm": 31.77324867248535, "learning_rate": 4.865480126133871e-07, "logits/chosen": -0.6870794296264648, "logits/rejected": -0.708354651927948, "logps/chosen": -297.0319519042969, "logps/ref_chosen": -294.6528015136719, "logps/ref_rejected": -283.657958984375, "logps/rejected": -295.43829345703125, "loss": 5.2293, "margin_dpo/margin_mean": 9.401151657104492, "margin_dpo/margin_std": 21.88288116455078, "step": 94 }, { "epoch": 0.19895287958115182, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.18586254119873, "fcm_dpo/q_t": 0.47492167353630066, "grad_norm": 33.443687438964844, "learning_rate": 4.859492293879573e-07, "logits/chosen": -0.7171642184257507, "logits/rejected": -0.7374083399772644, "logps/chosen": -315.00238037109375, "logps/ref_chosen": -311.6697082519531, "logps/ref_rejected": -262.7471923828125, "logps/rejected": -276.2657775878906, "loss": 5.2048, "margin_dpo/margin_mean": 10.185861587524414, "margin_dpo/margin_std": 22.405195236206055, "step": 95 }, { "epoch": 0.20104712041884817, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.209500312805176, "fcm_dpo/q_t": 0.4748857915401459, "grad_norm": 32.14888381958008, "learning_rate": 4.853377929214243e-07, "logits/chosen": -0.7045935392379761, "logits/rejected": -0.7170954942703247, "logps/chosen": -287.2989807128906, "logps/ref_chosen": -282.55596923828125, "logps/ref_rejected": -242.71588134765625, "logps/rejected": -257.6683654785156, "loss": 5.2063, "margin_dpo/margin_mean": 10.209501266479492, "margin_dpo/margin_std": 23.4683837890625, "step": 96 }, { "epoch": 0.2031413612565445, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 12.014219284057617, "fcm_dpo/q_t": 0.470403254032135, "grad_norm": 32.0782470703125, "learning_rate": 4.847137360032699e-07, "logits/chosen": -0.7504763007164001, "logits/rejected": -0.7378920316696167, "logps/chosen": -308.1119384765625, "logps/ref_chosen": -303.57781982421875, "logps/ref_rejected": -264.22491455078125, "logps/rejected": -280.7732849121094, "loss": 5.1316, "margin_dpo/margin_mean": 12.014220237731934, "margin_dpo/margin_std": 22.27433967590332, "step": 97 }, { "epoch": 0.20523560209424083, "fcm_dpo/beta": 0.010397407226264477, "fcm_dpo/delta": 0.08688867092132568, "fcm_dpo/margin": 12.346640586853027, "fcm_dpo/q_t": 0.46929067373275757, "grad_norm": 35.79840087890625, "learning_rate": 4.84077092099773e-07, "logits/chosen": -0.7676523923873901, "logits/rejected": -0.7795037031173706, "logps/chosen": -291.7115173339844, "logps/ref_chosen": -286.8303527832031, "logps/ref_rejected": -278.08331298828125, "logps/rejected": -295.31109619140625, "loss": 5.1205, "margin_dpo/margin_mean": 12.346640586853027, "margin_dpo/margin_std": 22.685035705566406, "step": 98 }, { "epoch": 0.20732984293193718, "fcm_dpo/beta": 0.011020062491297722, "fcm_dpo/delta": 0.0930284708738327, "fcm_dpo/margin": 12.543420791625977, "fcm_dpo/q_t": 0.4665352702140808, "grad_norm": 35.79397201538086, "learning_rate": 4.834278953522137e-07, "logits/chosen": -0.734366774559021, "logits/rejected": -0.7471228837966919, "logps/chosen": -285.3235168457031, "logps/ref_chosen": -279.92120361328125, "logps/ref_rejected": -250.3365478515625, "logps/rejected": -268.2822570800781, "loss": 5.1041, "margin_dpo/margin_mean": 12.54341983795166, "margin_dpo/margin_std": 27.157373428344727, "step": 99 }, { "epoch": 0.2094240837696335, "fcm_dpo/beta": 0.012127361260354519, "fcm_dpo/delta": 0.08171184360980988, "fcm_dpo/margin": 12.563081741333008, "fcm_dpo/q_t": 0.4635997712612152, "grad_norm": 41.947509765625, "learning_rate": 4.827661805750437e-07, "logits/chosen": -0.778613269329071, "logits/rejected": -0.7914372086524963, "logps/chosen": -304.66107177734375, "logps/ref_chosen": -296.8276672363281, "logps/ref_rejected": -275.56146240234375, "logps/rejected": -295.9578857421875, "loss": 5.0543, "margin_dpo/margin_mean": 12.563082695007324, "margin_dpo/margin_std": 24.145723342895508, "step": 100 }, { "epoch": 0.21151832460732983, "fcm_dpo/beta": 0.012680807150900364, "fcm_dpo/delta": 0.07358981668949127, "fcm_dpo/margin": 14.978999137878418, "fcm_dpo/q_t": 0.45468670129776, "grad_norm": 40.07128143310547, "learning_rate": 4.820919832540181e-07, "logits/chosen": -0.763048529624939, "logits/rejected": -0.7765510678291321, "logps/chosen": -257.9189758300781, "logps/ref_chosen": -252.74203491210938, "logps/ref_rejected": -276.4185485839844, "logps/rejected": -296.57452392578125, "loss": 4.9422, "margin_dpo/margin_mean": 14.978999137878418, "margin_dpo/margin_std": 26.411649703979492, "step": 101 }, { "epoch": 0.2136125654450262, "fcm_dpo/beta": 0.014082551002502441, "fcm_dpo/delta": 0.15481697022914886, "fcm_dpo/margin": 14.866358757019043, "fcm_dpo/q_t": 0.44989722967147827, "grad_norm": 43.43537139892578, "learning_rate": 4.814053395442932e-07, "logits/chosen": -0.7522528767585754, "logits/rejected": -0.7483828067779541, "logps/chosen": -224.5865020751953, "logps/ref_chosen": -219.5537109375, "logps/ref_rejected": -231.90853881835938, "logps/rejected": -251.80767822265625, "loss": 4.8822, "margin_dpo/margin_mean": 14.866357803344727, "margin_dpo/margin_std": 24.949522018432617, "step": 102 }, { "epoch": 0.2157068062827225, "fcm_dpo/beta": 0.0163778867572546, "fcm_dpo/delta": 0.15776585042476654, "fcm_dpo/margin": 13.612730026245117, "fcm_dpo/q_t": 0.44889208674430847, "grad_norm": 51.776180267333984, "learning_rate": 4.807062862684873e-07, "logits/chosen": -0.7638022899627686, "logits/rejected": -0.7605728507041931, "logps/chosen": -264.34832763671875, "logps/ref_chosen": -259.6750793457031, "logps/ref_rejected": -278.7400817871094, "logps/rejected": -297.0260925292969, "loss": 4.8895, "margin_dpo/margin_mean": 13.612730026245117, "margin_dpo/margin_std": 25.700881958007812, "step": 103 }, { "epoch": 0.21780104712041884, "fcm_dpo/beta": 0.01778705231845379, "fcm_dpo/delta": 0.1246490478515625, "fcm_dpo/margin": 10.009342193603516, "fcm_dpo/q_t": 0.4585469961166382, "grad_norm": 57.93727493286133, "learning_rate": 4.799948609147061e-07, "logits/chosen": -0.767953634262085, "logits/rejected": -0.7740171551704407, "logps/chosen": -277.0461730957031, "logps/ref_chosen": -267.9741516113281, "logps/ref_rejected": -230.5306396484375, "logps/rejected": -249.61195373535156, "loss": 5.0799, "margin_dpo/margin_mean": 10.009342193603516, "margin_dpo/margin_std": 26.243793487548828, "step": 104 }, { "epoch": 0.2198952879581152, "fcm_dpo/beta": 0.019210072234272957, "fcm_dpo/delta": 0.10028429329395294, "fcm_dpo/margin": 20.508113861083984, "fcm_dpo/q_t": 0.40895533561706543, "grad_norm": 61.65319061279297, "learning_rate": 4.792711016345321e-07, "logits/chosen": -0.7570338845252991, "logits/rejected": -0.7671861052513123, "logps/chosen": -327.42303466796875, "logps/ref_chosen": -322.25482177734375, "logps/ref_rejected": -279.02978515625, "logps/rejected": -304.7060852050781, "loss": 4.4105, "margin_dpo/margin_mean": 20.508113861083984, "margin_dpo/margin_std": 26.88151741027832, "step": 105 }, { "epoch": 0.22198952879581152, "fcm_dpo/beta": 0.021580977365374565, "fcm_dpo/delta": 0.1117968037724495, "fcm_dpo/margin": 12.482087135314941, "fcm_dpo/q_t": 0.4391931891441345, "grad_norm": 78.94829559326172, "learning_rate": 4.785350472409791e-07, "logits/chosen": -0.74703049659729, "logits/rejected": -0.784168004989624, "logps/chosen": -308.35302734375, "logps/ref_chosen": -296.15777587890625, "logps/ref_rejected": -266.2691650390625, "logps/rejected": -290.9465026855469, "loss": 4.9393, "margin_dpo/margin_mean": 12.482088088989258, "margin_dpo/margin_std": 29.17128562927246, "step": 106 }, { "epoch": 0.22408376963350785, "fcm_dpo/beta": 0.023654192686080933, "fcm_dpo/delta": 0.14638309180736542, "fcm_dpo/margin": 19.429903030395508, "fcm_dpo/q_t": 0.3981781005859375, "grad_norm": 77.15711975097656, "learning_rate": 4.777867372064105e-07, "logits/chosen": -0.7866736054420471, "logits/rejected": -0.7802896499633789, "logps/chosen": -310.7807312011719, "logps/ref_chosen": -306.996337890625, "logps/ref_rejected": -296.79412841796875, "logps/rejected": -320.0083923339844, "loss": 4.3175, "margin_dpo/margin_mean": 19.429903030395508, "margin_dpo/margin_std": 27.29071044921875, "step": 107 }, { "epoch": 0.2261780104712042, "fcm_dpo/beta": 0.02535373345017433, "fcm_dpo/delta": 0.09316066652536392, "fcm_dpo/margin": 18.04349136352539, "fcm_dpo/q_t": 0.40335312485694885, "grad_norm": 286.72467041015625, "learning_rate": 4.770262116604223e-07, "logits/chosen": -0.7606772780418396, "logits/rejected": -0.7718777656555176, "logps/chosen": -299.8807067871094, "logps/ref_chosen": -295.1526794433594, "logps/ref_rejected": -235.974853515625, "logps/rejected": -258.74639892578125, "loss": 4.4948, "margin_dpo/margin_mean": 18.043493270874023, "margin_dpo/margin_std": 29.69964599609375, "step": 108 }, { "epoch": 0.22827225130890053, "fcm_dpo/beta": 0.02665630169212818, "fcm_dpo/delta": 0.009390286169946194, "fcm_dpo/margin": 19.90768814086914, "fcm_dpo/q_t": 0.38985177874565125, "grad_norm": 89.05216979980469, "learning_rate": 4.7625351138769166e-07, "logits/chosen": -0.7884357571601868, "logits/rejected": -0.785269558429718, "logps/chosen": -333.0575256347656, "logps/ref_chosen": -325.9248046875, "logps/ref_rejected": -279.15423583984375, "logps/rejected": -306.19464111328125, "loss": 4.3126, "margin_dpo/margin_mean": 19.907691955566406, "margin_dpo/margin_std": 30.949382781982422, "step": 109 }, { "epoch": 0.23036649214659685, "fcm_dpo/beta": 0.028177602216601372, "fcm_dpo/delta": 0.07937376946210861, "fcm_dpo/margin": 18.563549041748047, "fcm_dpo/q_t": 0.39298582077026367, "grad_norm": 85.68260192871094, "learning_rate": 4.75468677825789e-07, "logits/chosen": -0.797019362449646, "logits/rejected": -0.785659670829773, "logps/chosen": -281.6994934082031, "logps/ref_chosen": -274.439208984375, "logps/ref_rejected": -260.0552062988281, "logps/rejected": -285.87908935546875, "loss": 4.4857, "margin_dpo/margin_mean": 18.563547134399414, "margin_dpo/margin_std": 32.792720794677734, "step": 110 }, { "epoch": 0.2324607329842932, "fcm_dpo/beta": 0.029292024672031403, "fcm_dpo/delta": 0.026741422712802887, "fcm_dpo/margin": 19.533769607543945, "fcm_dpo/q_t": 0.3849615156650543, "grad_norm": 93.73371887207031, "learning_rate": 4.7467175306295647e-07, "logits/chosen": -0.8400822877883911, "logits/rejected": -0.8202126622200012, "logps/chosen": -336.7535705566406, "logps/ref_chosen": -329.2361755371094, "logps/ref_rejected": -287.82830810546875, "logps/rejected": -314.87945556640625, "loss": 4.4179, "margin_dpo/margin_mean": 19.533769607543945, "margin_dpo/margin_std": 32.756980895996094, "step": 111 }, { "epoch": 0.23455497382198953, "fcm_dpo/beta": 0.028662927448749542, "fcm_dpo/delta": -0.024072205647826195, "fcm_dpo/margin": 12.431413650512695, "fcm_dpo/q_t": 0.4280843138694763, "grad_norm": 117.88312530517578, "learning_rate": 4.7386277983585053e-07, "logits/chosen": -0.7474071979522705, "logits/rejected": -0.7788360714912415, "logps/chosen": -269.38671875, "logps/ref_chosen": -257.0593566894531, "logps/ref_rejected": -272.9595031738281, "logps/rejected": -297.7182922363281, "loss": 5.1908, "margin_dpo/margin_mean": 12.431414604187012, "margin_dpo/margin_std": 33.55975341796875, "step": 112 }, { "epoch": 0.23664921465968586, "fcm_dpo/beta": 0.02724049799144268, "fcm_dpo/delta": -0.033632293343544006, "fcm_dpo/margin": 23.016876220703125, "fcm_dpo/q_t": 0.3752860426902771, "grad_norm": 88.89751434326172, "learning_rate": 4.7304180152725024e-07, "logits/chosen": -0.7992004156112671, "logits/rejected": -0.8044995069503784, "logps/chosen": -295.09869384765625, "logps/ref_chosen": -286.0416564941406, "logps/ref_rejected": -270.374267578125, "logps/rejected": -302.4481506347656, "loss": 4.276, "margin_dpo/margin_mean": 23.016876220703125, "margin_dpo/margin_std": 36.376365661621094, "step": 113 }, { "epoch": 0.2387434554973822, "fcm_dpo/beta": 0.029494168236851692, "fcm_dpo/delta": 0.07874082773923874, "fcm_dpo/margin": 12.47346019744873, "fcm_dpo/q_t": 0.4217187762260437, "grad_norm": 107.33486938476562, "learning_rate": 4.7220886216373085e-07, "logits/chosen": -0.8364426493644714, "logits/rejected": -0.8387445211410522, "logps/chosen": -271.4934387207031, "logps/ref_chosen": -260.0084533691406, "logps/ref_rejected": -246.67190551757812, "logps/rejected": -270.6303405761719, "loss": 5.0381, "margin_dpo/margin_mean": 12.473462104797363, "margin_dpo/margin_std": 31.346172332763672, "step": 114 }, { "epoch": 0.24083769633507854, "fcm_dpo/beta": 0.0292170662432909, "fcm_dpo/delta": 0.01833523064851761, "fcm_dpo/margin": 12.282448768615723, "fcm_dpo/q_t": 0.43316584825515747, "grad_norm": 120.01754760742188, "learning_rate": 4.7136400641330245e-07, "logits/chosen": -0.8408206701278687, "logits/rejected": -0.8037774562835693, "logps/chosen": -310.68414306640625, "logps/ref_chosen": -299.4229736328125, "logps/ref_rejected": -272.1186828613281, "logps/rejected": -295.662353515625, "loss": 5.1455, "margin_dpo/margin_mean": 12.28244686126709, "margin_dpo/margin_std": 34.40483856201172, "step": 115 }, { "epoch": 0.24293193717277486, "fcm_dpo/beta": 0.031351905316114426, "fcm_dpo/delta": 0.08501939475536346, "fcm_dpo/margin": 16.533979415893555, "fcm_dpo/q_t": 0.3935103118419647, "grad_norm": 97.82234191894531, "learning_rate": 4.70507279583015e-07, "logits/chosen": -0.8445395827293396, "logits/rejected": -0.8092914819717407, "logps/chosen": -285.07208251953125, "logps/ref_chosen": -279.263916015625, "logps/ref_rejected": -253.6192169189453, "logps/rejected": -275.9613342285156, "loss": 4.5439, "margin_dpo/margin_mean": 16.533979415893555, "margin_dpo/margin_std": 30.13557243347168, "step": 116 }, { "epoch": 0.2450261780104712, "fcm_dpo/beta": 0.03163749352097511, "fcm_dpo/delta": -0.10516245663166046, "fcm_dpo/margin": 17.771800994873047, "fcm_dpo/q_t": 0.38749605417251587, "grad_norm": 112.60164642333984, "learning_rate": 4.6963872761652834e-07, "logits/chosen": -0.8044043779373169, "logits/rejected": -0.8110002875328064, "logps/chosen": -266.38836669921875, "logps/ref_chosen": -259.2248840332031, "logps/ref_rejected": -229.3042755126953, "logps/rejected": -254.23956298828125, "loss": 4.4661, "margin_dpo/margin_mean": 17.771800994873047, "margin_dpo/margin_std": 28.43597412109375, "step": 117 }, { "epoch": 0.24712041884816754, "fcm_dpo/beta": 0.029554441571235657, "fcm_dpo/delta": -0.03656444326043129, "fcm_dpo/margin": 19.763996124267578, "fcm_dpo/q_t": 0.3844214081764221, "grad_norm": 114.48808288574219, "learning_rate": 4.687583970916486e-07, "logits/chosen": -0.7859967947006226, "logits/rejected": -0.7776579856872559, "logps/chosen": -277.0380859375, "logps/ref_chosen": -267.0707092285156, "logps/ref_rejected": -272.7322082519531, "logps/rejected": -302.46356201171875, "loss": 4.481, "margin_dpo/margin_mean": 19.763996124267578, "margin_dpo/margin_std": 34.8245964050293, "step": 118 }, { "epoch": 0.24921465968586387, "fcm_dpo/beta": 0.029193801805377007, "fcm_dpo/delta": -0.002929478883743286, "fcm_dpo/margin": 15.792888641357422, "fcm_dpo/q_t": 0.40991657972335815, "grad_norm": 117.3210678100586, "learning_rate": 4.6786633521783005e-07, "logits/chosen": -0.8547701835632324, "logits/rejected": -0.8576165437698364, "logps/chosen": -336.96929931640625, "logps/ref_chosen": -324.6766357421875, "logps/ref_rejected": -306.0322265625, "logps/rejected": -334.1177673339844, "loss": 4.8945, "margin_dpo/margin_mean": 15.792887687683105, "margin_dpo/margin_std": 34.564945220947266, "step": 119 }, { "epoch": 0.2513089005235602, "fcm_dpo/beta": 0.029505960643291473, "fcm_dpo/delta": 0.015900129452347755, "fcm_dpo/margin": 15.261905670166016, "fcm_dpo/q_t": 0.4121847450733185, "grad_norm": 98.33564758300781, "learning_rate": 4.669625898336438e-07, "logits/chosen": -0.8068041801452637, "logits/rejected": -0.8266972303390503, "logps/chosen": -325.0469665527344, "logps/ref_chosen": -315.2617492675781, "logps/ref_rejected": -265.32501220703125, "logps/rejected": -290.37213134765625, "loss": 4.8972, "margin_dpo/margin_mean": 15.261905670166016, "margin_dpo/margin_std": 33.490478515625, "step": 120 }, { "epoch": 0.2534031413612565, "fcm_dpo/beta": 0.031028514727950096, "fcm_dpo/delta": 0.14798876643180847, "fcm_dpo/margin": 12.815652847290039, "fcm_dpo/q_t": 0.4254152476787567, "grad_norm": 112.03368377685547, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -0.8176071047782898, "logits/rejected": -0.834455668926239, "logps/chosen": -236.07276916503906, "logps/ref_chosen": -222.99609375, "logps/ref_rejected": -226.92860412597656, "logps/rejected": -252.82089233398438, "loss": 5.0135, "margin_dpo/margin_mean": 12.815652847290039, "margin_dpo/margin_std": 32.47069549560547, "step": 121 }, { "epoch": 0.2554973821989529, "fcm_dpo/beta": 0.031968794763088226, "fcm_dpo/delta": -0.020312972366809845, "fcm_dpo/margin": 15.189802169799805, "fcm_dpo/q_t": 0.40527546405792236, "grad_norm": 117.55231475830078, "learning_rate": 4.651202430186092e-07, "logits/chosen": -0.8868773579597473, "logits/rejected": -0.8504554033279419, "logps/chosen": -288.91522216796875, "logps/ref_chosen": -276.02630615234375, "logps/ref_rejected": -277.97418212890625, "logps/rejected": -306.0528259277344, "loss": 4.926, "margin_dpo/margin_mean": 15.189804077148438, "margin_dpo/margin_std": 34.809261322021484, "step": 122 }, { "epoch": 0.25759162303664923, "fcm_dpo/beta": 0.031755901873111725, "fcm_dpo/delta": -0.06527850031852722, "fcm_dpo/margin": 20.802139282226562, "fcm_dpo/q_t": 0.36643853783607483, "grad_norm": 115.1010513305664, "learning_rate": 4.6418174038722924e-07, "logits/chosen": -0.7995103597640991, "logits/rejected": -0.7998446226119995, "logps/chosen": -335.6615295410156, "logps/ref_chosen": -328.1546325683594, "logps/ref_rejected": -280.6911315917969, "logps/rejected": -309.00018310546875, "loss": 4.2495, "margin_dpo/margin_mean": 20.802139282226562, "margin_dpo/margin_std": 32.38178634643555, "step": 123 }, { "epoch": 0.25968586387434556, "fcm_dpo/beta": 0.03064357116818428, "fcm_dpo/delta": 0.03005780465900898, "fcm_dpo/margin": 16.928504943847656, "fcm_dpo/q_t": 0.3922573924064636, "grad_norm": 101.0470199584961, "learning_rate": 4.6323175183912023e-07, "logits/chosen": -0.8197784423828125, "logits/rejected": -0.7911043763160706, "logps/chosen": -285.9018859863281, "logps/ref_chosen": -275.6961975097656, "logps/ref_rejected": -225.361572265625, "logps/rejected": -252.49575805664062, "loss": 4.5875, "margin_dpo/margin_mean": 16.928504943847656, "margin_dpo/margin_std": 30.196701049804688, "step": 124 }, { "epoch": 0.2617801047120419, "fcm_dpo/beta": 0.03097906894981861, "fcm_dpo/delta": -0.034325286746025085, "fcm_dpo/margin": 16.488847732543945, "fcm_dpo/q_t": 0.4054495692253113, "grad_norm": 123.87239074707031, "learning_rate": 4.6227032831928483e-07, "logits/chosen": -0.7871371507644653, "logits/rejected": -0.7472814321517944, "logps/chosen": -288.9555358886719, "logps/ref_chosen": -278.06976318359375, "logps/ref_rejected": -265.63873291015625, "logps/rejected": -293.0133361816406, "loss": 4.8993, "margin_dpo/margin_mean": 16.488849639892578, "margin_dpo/margin_std": 36.578277587890625, "step": 125 }, { "epoch": 0.2638743455497382, "fcm_dpo/beta": 0.02998891845345497, "fcm_dpo/delta": -0.01702306792140007, "fcm_dpo/margin": 18.2719669342041, "fcm_dpo/q_t": 0.38644424080848694, "grad_norm": 109.28790283203125, "learning_rate": 4.612975213859487e-07, "logits/chosen": -0.7935796976089478, "logits/rejected": -0.8115208745002747, "logps/chosen": -329.9949951171875, "logps/ref_chosen": -321.3960876464844, "logps/ref_rejected": -285.37664794921875, "logps/rejected": -312.24749755859375, "loss": 4.4632, "margin_dpo/margin_mean": 18.271968841552734, "margin_dpo/margin_std": 31.196125030517578, "step": 126 }, { "epoch": 0.26596858638743454, "fcm_dpo/beta": 0.030074207112193108, "fcm_dpo/delta": -0.09070973843336105, "fcm_dpo/margin": 20.92560386657715, "fcm_dpo/q_t": 0.3753412663936615, "grad_norm": 109.4990463256836, "learning_rate": 4.603133832077953e-07, "logits/chosen": -0.8826844692230225, "logits/rejected": -0.836493730545044, "logps/chosen": -313.4111328125, "logps/ref_chosen": -306.55877685546875, "logps/ref_rejected": -274.8651428222656, "logps/rejected": -302.6430969238281, "loss": 4.3094, "margin_dpo/margin_mean": 20.925601959228516, "margin_dpo/margin_std": 31.99746322631836, "step": 127 }, { "epoch": 0.2680628272251309, "fcm_dpo/beta": 0.026865279302001, "fcm_dpo/delta": -0.0676947608590126, "fcm_dpo/margin": 22.934003829956055, "fcm_dpo/q_t": 0.3755223751068115, "grad_norm": 87.6462173461914, "learning_rate": 4.5931796656116837e-07, "logits/chosen": -0.7586472630500793, "logits/rejected": -0.7625279426574707, "logps/chosen": -268.45068359375, "logps/ref_chosen": -265.3973693847656, "logps/ref_rejected": -250.9737548828125, "logps/rejected": -276.9610290527344, "loss": 4.2379, "margin_dpo/margin_mean": 22.934003829956055, "margin_dpo/margin_std": 35.073516845703125, "step": 128 }, { "epoch": 0.27015706806282724, "fcm_dpo/beta": 0.027813997119665146, "fcm_dpo/delta": 0.054987452924251556, "fcm_dpo/margin": 19.57146453857422, "fcm_dpo/q_t": 0.3910842835903168, "grad_norm": 93.74571990966797, "learning_rate": 4.5831132482724193e-07, "logits/chosen": -0.799019455909729, "logits/rejected": -0.8034683465957642, "logps/chosen": -307.2705993652344, "logps/ref_chosen": -303.158447265625, "logps/ref_rejected": -275.9891052246094, "logps/rejected": -299.6726989746094, "loss": 4.4534, "margin_dpo/margin_mean": 19.57146453857422, "margin_dpo/margin_std": 34.049896240234375, "step": 129 }, { "epoch": 0.27225130890052357, "fcm_dpo/beta": 0.028576284646987915, "fcm_dpo/delta": 0.07245050370693207, "fcm_dpo/margin": 16.821483612060547, "fcm_dpo/q_t": 0.4015900492668152, "grad_norm": 103.17170715332031, "learning_rate": 4.5729351198915705e-07, "logits/chosen": -0.7722957730293274, "logits/rejected": -0.8155333399772644, "logps/chosen": -292.2886047363281, "logps/ref_chosen": -286.4073486328125, "logps/ref_rejected": -294.38665771484375, "logps/rejected": -317.08941650390625, "loss": 4.6124, "margin_dpo/margin_mean": 16.82148551940918, "margin_dpo/margin_std": 32.20860290527344, "step": 130 }, { "epoch": 0.2743455497382199, "fcm_dpo/beta": 0.03118608519434929, "fcm_dpo/delta": 0.12869605422019958, "fcm_dpo/margin": 15.276546478271484, "fcm_dpo/q_t": 0.40500974655151367, "grad_norm": 124.76173400878906, "learning_rate": 4.5626458262912735e-07, "logits/chosen": -0.8375826478004456, "logits/rejected": -0.7880801558494568, "logps/chosen": -317.2205810546875, "logps/ref_chosen": -311.5650634765625, "logps/ref_rejected": -291.62432861328125, "logps/rejected": -312.5564270019531, "loss": 4.7363, "margin_dpo/margin_mean": 15.276546478271484, "margin_dpo/margin_std": 31.08780288696289, "step": 131 }, { "epoch": 0.2764397905759162, "fcm_dpo/beta": 0.03283756971359253, "fcm_dpo/delta": -0.08807911723852158, "fcm_dpo/margin": 20.79026985168457, "fcm_dpo/q_t": 0.3727704584598541, "grad_norm": 137.25875854492188, "learning_rate": 4.5522459192551166e-07, "logits/chosen": -0.796299934387207, "logits/rejected": -0.7804505228996277, "logps/chosen": -272.0005798339844, "logps/ref_chosen": -270.0818176269531, "logps/ref_rejected": -284.3084411621094, "logps/rejected": -307.0174560546875, "loss": 4.3546, "margin_dpo/margin_mean": 20.79026985168457, "margin_dpo/margin_std": 34.049842834472656, "step": 132 }, { "epoch": 0.27853403141361255, "fcm_dpo/beta": 0.0303749218583107, "fcm_dpo/delta": -0.02263142168521881, "fcm_dpo/margin": 18.328353881835938, "fcm_dpo/q_t": 0.38462090492248535, "grad_norm": 95.446533203125, "learning_rate": 4.541735956498554e-07, "logits/chosen": -0.8196045160293579, "logits/rejected": -0.8250406980514526, "logps/chosen": -287.2611389160156, "logps/ref_chosen": -285.6213684082031, "logps/ref_rejected": -251.19386291503906, "logps/rejected": -271.1619873046875, "loss": 4.4574, "margin_dpo/margin_mean": 18.328353881835938, "margin_dpo/margin_std": 30.608631134033203, "step": 133 }, { "epoch": 0.2806282722513089, "fcm_dpo/beta": 0.030752191320061684, "fcm_dpo/delta": 0.052163075655698776, "fcm_dpo/margin": 15.364531517028809, "fcm_dpo/q_t": 0.40057751536369324, "grad_norm": 106.3124008178711, "learning_rate": 4.5311165016389914e-07, "logits/chosen": -0.8452025651931763, "logits/rejected": -0.8484188914299011, "logps/chosen": -327.99664306640625, "logps/ref_chosen": -318.92083740234375, "logps/ref_rejected": -293.1894836425781, "logps/rejected": -317.62982177734375, "loss": 4.6623, "margin_dpo/margin_mean": 15.364532470703125, "margin_dpo/margin_std": 29.14794158935547, "step": 134 }, { "epoch": 0.28272251308900526, "fcm_dpo/beta": 0.031104128807783127, "fcm_dpo/delta": 0.00010712631046772003, "fcm_dpo/margin": 17.666057586669922, "fcm_dpo/q_t": 0.3856186866760254, "grad_norm": 131.088623046875, "learning_rate": 4.520388124165564e-07, "logits/chosen": -0.7372372150421143, "logits/rejected": -0.7832177877426147, "logps/chosen": -296.5128173828125, "logps/ref_chosen": -292.8217468261719, "logps/ref_rejected": -269.2896728515625, "logps/rejected": -290.6468505859375, "loss": 4.3699, "margin_dpo/margin_mean": 17.66605567932129, "margin_dpo/margin_std": 27.916501998901367, "step": 135 }, { "epoch": 0.2848167539267016, "fcm_dpo/beta": 0.03133618086576462, "fcm_dpo/delta": 0.03240864723920822, "fcm_dpo/margin": 16.50086212158203, "fcm_dpo/q_t": 0.4006633460521698, "grad_norm": 129.73684692382812, "learning_rate": 4.5095513994085974e-07, "logits/chosen": -0.7910470366477966, "logits/rejected": -0.7884470224380493, "logps/chosen": -278.3386535644531, "logps/ref_chosen": -272.8525390625, "logps/ref_rejected": -252.68202209472656, "logps/rejected": -274.6689758300781, "loss": 4.7126, "margin_dpo/margin_mean": 16.500864028930664, "margin_dpo/margin_std": 32.701168060302734, "step": 136 }, { "epoch": 0.2869109947643979, "fcm_dpo/beta": 0.03253614902496338, "fcm_dpo/delta": 0.03034902550280094, "fcm_dpo/margin": 15.305620193481445, "fcm_dpo/q_t": 0.4033234417438507, "grad_norm": 127.83984375, "learning_rate": 4.498606908508753e-07, "logits/chosen": -0.8439798355102539, "logits/rejected": -0.8307949304580688, "logps/chosen": -308.5978698730469, "logps/ref_chosen": -300.7522277832031, "logps/ref_rejected": -286.1935119628906, "logps/rejected": -309.34478759765625, "loss": 4.7393, "margin_dpo/margin_mean": 15.305620193481445, "margin_dpo/margin_std": 31.10264778137207, "step": 137 }, { "epoch": 0.28900523560209423, "fcm_dpo/beta": 0.032551947981119156, "fcm_dpo/delta": 0.005163721740245819, "fcm_dpo/margin": 18.243833541870117, "fcm_dpo/q_t": 0.3905155658721924, "grad_norm": 106.98749542236328, "learning_rate": 4.487555238385862e-07, "logits/chosen": -0.7689125537872314, "logits/rejected": -0.7516045570373535, "logps/chosen": -294.6270751953125, "logps/ref_chosen": -288.9369812011719, "logps/ref_rejected": -263.7076416015625, "logps/rejected": -287.6415710449219, "loss": 4.5624, "margin_dpo/margin_mean": 18.243831634521484, "margin_dpo/margin_std": 34.325828552246094, "step": 138 }, { "epoch": 0.29109947643979056, "fcm_dpo/beta": 0.033183857798576355, "fcm_dpo/delta": 0.033980607986450195, "fcm_dpo/margin": 12.946343421936035, "fcm_dpo/q_t": 0.4189698100090027, "grad_norm": 115.80516052246094, "learning_rate": 4.476396981707453e-07, "logits/chosen": -0.7807446718215942, "logits/rejected": -0.8127070069313049, "logps/chosen": -274.021728515625, "logps/ref_chosen": -270.0443115234375, "logps/ref_rejected": -267.3226013183594, "logps/rejected": -284.246337890625, "loss": 4.9004, "margin_dpo/margin_mean": 12.946342468261719, "margin_dpo/margin_std": 29.353422164916992, "step": 139 }, { "epoch": 0.2931937172774869, "fcm_dpo/beta": 0.034226901829242706, "fcm_dpo/delta": -0.004557475447654724, "fcm_dpo/margin": 17.575387954711914, "fcm_dpo/q_t": 0.38020825386047363, "grad_norm": 128.48643493652344, "learning_rate": 4.4651327368569684e-07, "logits/chosen": -0.8485485315322876, "logits/rejected": -0.8208659291267395, "logps/chosen": -287.2353515625, "logps/ref_chosen": -282.9555969238281, "logps/ref_rejected": -251.17181396484375, "logps/rejected": -273.0269775390625, "loss": 4.2958, "margin_dpo/margin_mean": 17.575389862060547, "margin_dpo/margin_std": 27.578033447265625, "step": 140 }, { "epoch": 0.29528795811518327, "fcm_dpo/beta": 0.03216833248734474, "fcm_dpo/delta": -0.05748983472585678, "fcm_dpo/margin": 20.221830368041992, "fcm_dpo/q_t": 0.3674147129058838, "grad_norm": 109.13350677490234, "learning_rate": 4.453763107901675e-07, "logits/chosen": -0.7944917678833008, "logits/rejected": -0.794426679611206, "logps/chosen": -298.8318786621094, "logps/ref_chosen": -296.3001708984375, "logps/ref_rejected": -279.8486633300781, "logps/rejected": -302.6021728515625, "loss": 4.2543, "margin_dpo/margin_mean": 20.221830368041992, "margin_dpo/margin_std": 31.374059677124023, "step": 141 }, { "epoch": 0.2973821989528796, "fcm_dpo/beta": 0.03117799200117588, "fcm_dpo/delta": -0.002421182580292225, "fcm_dpo/margin": 16.759801864624023, "fcm_dpo/q_t": 0.4001937210559845, "grad_norm": 104.06517791748047, "learning_rate": 4.4422887045602674e-07, "logits/chosen": -0.8062896728515625, "logits/rejected": -0.812911331653595, "logps/chosen": -304.14556884765625, "logps/ref_chosen": -300.56585693359375, "logps/ref_rejected": -231.43316650390625, "logps/rejected": -251.77268981933594, "loss": 4.7004, "margin_dpo/margin_mean": 16.759801864624023, "margin_dpo/margin_std": 33.37417984008789, "step": 142 }, { "epoch": 0.2994764397905759, "fcm_dpo/beta": 0.031860120594501495, "fcm_dpo/delta": -0.003158077597618103, "fcm_dpo/margin": 18.83399200439453, "fcm_dpo/q_t": 0.378153920173645, "grad_norm": 109.7158203125, "learning_rate": 4.4307101421701755e-07, "logits/chosen": -0.7957272529602051, "logits/rejected": -0.7826195955276489, "logps/chosen": -300.0506591796875, "logps/ref_chosen": -296.73236083984375, "logps/ref_rejected": -266.45257568359375, "logps/rejected": -288.6048889160156, "loss": 4.3108, "margin_dpo/margin_mean": 18.83399200439453, "margin_dpo/margin_std": 29.4503173828125, "step": 143 }, { "epoch": 0.30157068062827225, "fcm_dpo/beta": 0.03140409663319588, "fcm_dpo/delta": 0.05489187315106392, "fcm_dpo/margin": 16.132699966430664, "fcm_dpo/q_t": 0.4020684063434601, "grad_norm": 109.79220581054688, "learning_rate": 4.419028041654559e-07, "logits/chosen": -0.8549841642379761, "logits/rejected": -0.8448209762573242, "logps/chosen": -303.1390380859375, "logps/ref_chosen": -298.843994140625, "logps/ref_rejected": -266.120849609375, "logps/rejected": -286.548583984375, "loss": 4.6749, "margin_dpo/margin_mean": 16.132701873779297, "margin_dpo/margin_std": 32.414798736572266, "step": 144 }, { "epoch": 0.3036649214659686, "fcm_dpo/beta": 0.03131024166941643, "fcm_dpo/delta": -0.10811541974544525, "fcm_dpo/margin": 20.41290855407715, "fcm_dpo/q_t": 0.36850613355636597, "grad_norm": 102.48077392578125, "learning_rate": 4.4072430294890166e-07, "logits/chosen": -0.8508666157722473, "logits/rejected": -0.8601468801498413, "logps/chosen": -278.57275390625, "logps/ref_chosen": -275.7528381347656, "logps/ref_rejected": -214.74807739257812, "logps/rejected": -237.98089599609375, "loss": 4.1601, "margin_dpo/margin_mean": 20.41291046142578, "margin_dpo/margin_std": 28.729717254638672, "step": 145 }, { "epoch": 0.3057591623036649, "fcm_dpo/beta": 0.030361486598849297, "fcm_dpo/delta": 0.01096423901617527, "fcm_dpo/margin": 19.31856918334961, "fcm_dpo/q_t": 0.3816215991973877, "grad_norm": 101.0565414428711, "learning_rate": 4.395355737667985e-07, "logits/chosen": -0.8102554082870483, "logits/rejected": -0.8105416297912598, "logps/chosen": -285.0329284667969, "logps/ref_chosen": -277.09820556640625, "logps/ref_rejected": -265.41046142578125, "logps/rejected": -292.6636962890625, "loss": 4.2858, "margin_dpo/margin_mean": 19.318571090698242, "margin_dpo/margin_std": 29.257003784179688, "step": 146 }, { "epoch": 0.3078534031413613, "fcm_dpo/beta": 0.03215925768017769, "fcm_dpo/delta": 0.032970868051052094, "fcm_dpo/margin": 15.492907524108887, "fcm_dpo/q_t": 0.3999587893486023, "grad_norm": 105.35274505615234, "learning_rate": 4.3833668036708483e-07, "logits/chosen": -0.8169939517974854, "logits/rejected": -0.8200792074203491, "logps/chosen": -299.4952697753906, "logps/ref_chosen": -291.4185791015625, "logps/ref_rejected": -253.43051147460938, "logps/rejected": -277.0000915527344, "loss": 4.7918, "margin_dpo/margin_mean": 15.492908477783203, "margin_dpo/margin_std": 32.0605354309082, "step": 147 }, { "epoch": 0.3099476439790576, "fcm_dpo/beta": 0.032771460711956024, "fcm_dpo/delta": 0.0693143978714943, "fcm_dpo/margin": 15.142913818359375, "fcm_dpo/q_t": 0.40560245513916016, "grad_norm": 105.60718536376953, "learning_rate": 4.3712768704277524e-07, "logits/chosen": -0.8757405281066895, "logits/rejected": -0.8822675943374634, "logps/chosen": -244.65098571777344, "logps/ref_chosen": -236.74850463867188, "logps/ref_rejected": -231.4674072265625, "logps/rejected": -254.51280212402344, "loss": 4.7622, "margin_dpo/margin_mean": 15.142913818359375, "margin_dpo/margin_std": 31.51068115234375, "step": 148 }, { "epoch": 0.31204188481675393, "fcm_dpo/beta": 0.03229037672281265, "fcm_dpo/delta": -0.048303790390491486, "fcm_dpo/margin": 19.920228958129883, "fcm_dpo/q_t": 0.3683924376964569, "grad_norm": 107.26622772216797, "learning_rate": 4.3590865862851263e-07, "logits/chosen": -0.829014241695404, "logits/rejected": -0.8182701468467712, "logps/chosen": -326.3049621582031, "logps/ref_chosen": -319.9284973144531, "logps/ref_rejected": -308.20233154296875, "logps/rejected": -334.4990234375, "loss": 4.0688, "margin_dpo/margin_mean": 19.920230865478516, "margin_dpo/margin_std": 27.486299514770508, "step": 149 }, { "epoch": 0.31413612565445026, "fcm_dpo/beta": 0.03191431611776352, "fcm_dpo/delta": 0.010023342445492744, "fcm_dpo/margin": 18.475460052490234, "fcm_dpo/q_t": 0.3809901475906372, "grad_norm": 106.9743881225586, "learning_rate": 4.346796604970912e-07, "logits/chosen": -0.8023788928985596, "logits/rejected": -0.793890118598938, "logps/chosen": -286.2445373535156, "logps/ref_chosen": -276.3182373046875, "logps/ref_rejected": -273.02215576171875, "logps/rejected": -301.4239501953125, "loss": 4.3442, "margin_dpo/margin_mean": 18.475460052490234, "margin_dpo/margin_std": 29.90300941467285, "step": 150 }, { "epoch": 0.3162303664921466, "fcm_dpo/beta": 0.0293461661785841, "fcm_dpo/delta": -0.18943935632705688, "fcm_dpo/margin": 26.317476272583008, "fcm_dpo/q_t": 0.34219515323638916, "grad_norm": 90.29637908935547, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -0.8202653527259827, "logits/rejected": -0.8275180459022522, "logps/chosen": -305.0558776855469, "logps/ref_chosen": -297.31280517578125, "logps/ref_rejected": -266.1003723144531, "logps/rejected": -300.16094970703125, "loss": 3.7553, "margin_dpo/margin_mean": 26.317481994628906, "margin_dpo/margin_std": 30.933624267578125, "step": 151 }, { "epoch": 0.3183246073298429, "fcm_dpo/beta": 0.02696666121482849, "fcm_dpo/delta": -0.02964053675532341, "fcm_dpo/margin": 20.30819320678711, "fcm_dpo/q_t": 0.38851580023765564, "grad_norm": 95.48226928710938, "learning_rate": 4.3219201924364323e-07, "logits/chosen": -0.8484928011894226, "logits/rejected": -0.8524165153503418, "logps/chosen": -276.18218994140625, "logps/ref_chosen": -270.2470397949219, "logps/ref_rejected": -269.7749328613281, "logps/rejected": -296.018310546875, "loss": 4.379, "margin_dpo/margin_mean": 20.30819320678711, "margin_dpo/margin_std": 31.848304748535156, "step": 152 }, { "epoch": 0.3204188481675393, "fcm_dpo/beta": 0.025042923167347908, "fcm_dpo/delta": -0.13993717730045319, "fcm_dpo/margin": 29.001144409179688, "fcm_dpo/q_t": 0.34314873814582825, "grad_norm": 84.08607482910156, "learning_rate": 4.309335095262675e-07, "logits/chosen": -0.8237071633338928, "logits/rejected": -0.8168530464172363, "logps/chosen": -283.3426513671875, "logps/ref_chosen": -273.779052734375, "logps/ref_rejected": -280.9530944824219, "logps/rejected": -319.5178527832031, "loss": 3.6829, "margin_dpo/margin_mean": 29.001144409179688, "margin_dpo/margin_std": 30.45376205444336, "step": 153 }, { "epoch": 0.3225130890052356, "fcm_dpo/beta": 0.02431459352374077, "fcm_dpo/delta": -0.015772145241498947, "fcm_dpo/margin": 20.476642608642578, "fcm_dpo/q_t": 0.3949674367904663, "grad_norm": 89.15077209472656, "learning_rate": 4.2966529689388064e-07, "logits/chosen": -0.8586506843566895, "logits/rejected": -0.8460282683372498, "logps/chosen": -301.7556457519531, "logps/ref_chosen": -289.9031982421875, "logps/ref_rejected": -261.5166320800781, "logps/rejected": -293.8457336425781, "loss": 4.4883, "margin_dpo/margin_mean": 20.47664451599121, "margin_dpo/margin_std": 34.57201385498047, "step": 154 }, { "epoch": 0.32460732984293195, "fcm_dpo/beta": 0.024474315345287323, "fcm_dpo/delta": 0.04425939917564392, "fcm_dpo/margin": 20.402536392211914, "fcm_dpo/q_t": 0.39846354722976685, "grad_norm": 100.91524505615234, "learning_rate": 4.2838744935687716e-07, "logits/chosen": -0.7969489693641663, "logits/rejected": -0.7992517948150635, "logps/chosen": -299.48004150390625, "logps/ref_chosen": -285.8612060546875, "logps/ref_rejected": -300.1272888183594, "logps/rejected": -334.1487121582031, "loss": 4.4626, "margin_dpo/margin_mean": 20.40253448486328, "margin_dpo/margin_std": 35.07635498046875, "step": 155 }, { "epoch": 0.3267015706806283, "fcm_dpo/beta": 0.024291612207889557, "fcm_dpo/delta": -0.1369752585887909, "fcm_dpo/margin": 30.0484619140625, "fcm_dpo/q_t": 0.3525455892086029, "grad_norm": 76.57457733154297, "learning_rate": 4.271000354423425e-07, "logits/chosen": -0.8318104147911072, "logits/rejected": -0.8374019861221313, "logps/chosen": -291.5562744140625, "logps/ref_chosen": -279.0354919433594, "logps/ref_rejected": -244.2198486328125, "logps/rejected": -286.78912353515625, "loss": 3.9775, "margin_dpo/margin_mean": 30.048463821411133, "margin_dpo/margin_std": 39.803226470947266, "step": 156 }, { "epoch": 0.3287958115183246, "fcm_dpo/beta": 0.022700754925608635, "fcm_dpo/delta": 0.06447763741016388, "fcm_dpo/margin": 21.06729507446289, "fcm_dpo/q_t": 0.3995548486709595, "grad_norm": 84.66363525390625, "learning_rate": 4.258031241903777e-07, "logits/chosen": -0.8930936455726624, "logits/rejected": -0.8946095108985901, "logps/chosen": -287.12164306640625, "logps/ref_chosen": -270.830322265625, "logps/ref_rejected": -259.08319091796875, "logps/rejected": -296.44183349609375, "loss": 4.4727, "margin_dpo/margin_mean": 21.06729507446289, "margin_dpo/margin_std": 34.88356399536133, "step": 157 }, { "epoch": 0.3308900523560209, "fcm_dpo/beta": 0.023696184158325195, "fcm_dpo/delta": -0.022407012060284615, "fcm_dpo/margin": 23.372053146362305, "fcm_dpo/q_t": 0.38260895013809204, "grad_norm": 88.16154479980469, "learning_rate": 4.2449678515039743e-07, "logits/chosen": -0.8335078358650208, "logits/rejected": -0.8233439326286316, "logps/chosen": -306.63201904296875, "logps/ref_chosen": -289.9663391113281, "logps/ref_rejected": -271.335693359375, "logps/rejected": -311.3734130859375, "loss": 4.2823, "margin_dpo/margin_mean": 23.372051239013672, "margin_dpo/margin_std": 34.65653610229492, "step": 158 }, { "epoch": 0.33298429319371725, "fcm_dpo/beta": 0.0234974417835474, "fcm_dpo/delta": 0.046846918761730194, "fcm_dpo/margin": 18.556854248046875, "fcm_dpo/q_t": 0.41267889738082886, "grad_norm": 93.64372253417969, "learning_rate": 4.2318108837739986e-07, "logits/chosen": -0.9038013219833374, "logits/rejected": -0.8640592694282532, "logps/chosen": -340.5484619140625, "logps/ref_chosen": -321.37835693359375, "logps/ref_rejected": -250.45652770996094, "logps/rejected": -288.1834716796875, "loss": 4.8046, "margin_dpo/margin_mean": 18.556854248046875, "margin_dpo/margin_std": 39.233619689941406, "step": 159 }, { "epoch": 0.33507853403141363, "fcm_dpo/beta": 0.02296513319015503, "fcm_dpo/delta": -0.0735328420996666, "fcm_dpo/margin": 29.02091407775879, "fcm_dpo/q_t": 0.35804080963134766, "grad_norm": 87.23406219482422, "learning_rate": 4.218561044282098e-07, "logits/chosen": -0.8459993004798889, "logits/rejected": -0.855299711227417, "logps/chosen": -291.4868469238281, "logps/ref_chosen": -276.28350830078125, "logps/ref_rejected": -262.7477722167969, "logps/rejected": -306.9720458984375, "loss": 3.8776, "margin_dpo/margin_mean": 29.020915985107422, "margin_dpo/margin_std": 33.83423614501953, "step": 160 }, { "epoch": 0.33717277486910996, "fcm_dpo/beta": 0.023070694878697395, "fcm_dpo/delta": -0.006039864383637905, "fcm_dpo/margin": 26.19426727294922, "fcm_dpo/q_t": 0.3749847114086151, "grad_norm": 87.1927261352539, "learning_rate": 4.2052190435769554e-07, "logits/chosen": -0.8614886403083801, "logits/rejected": -0.8548807501792908, "logps/chosen": -329.09661865234375, "logps/ref_chosen": -310.4927978515625, "logps/ref_rejected": -250.25347900390625, "logps/rejected": -295.0516052246094, "loss": 4.2422, "margin_dpo/margin_mean": 26.19426727294922, "margin_dpo/margin_std": 39.32307815551758, "step": 161 }, { "epoch": 0.3392670157068063, "fcm_dpo/beta": 0.022042490541934967, "fcm_dpo/delta": -0.0053899819031357765, "fcm_dpo/margin": 22.930227279663086, "fcm_dpo/q_t": 0.3928814232349396, "grad_norm": 84.0722427368164, "learning_rate": 4.1917855971495763e-07, "logits/chosen": -0.8419027328491211, "logits/rejected": -0.836093544960022, "logps/chosen": -313.3344421386719, "logps/ref_chosen": -296.1105041503906, "logps/ref_rejected": -253.4247589111328, "logps/rejected": -293.57891845703125, "loss": 4.4335, "margin_dpo/margin_mean": 22.930227279663086, "margin_dpo/margin_std": 36.98151397705078, "step": 162 }, { "epoch": 0.3413612565445026, "fcm_dpo/beta": 0.023281563073396683, "fcm_dpo/delta": 0.031158914789557457, "fcm_dpo/margin": 24.374839782714844, "fcm_dpo/q_t": 0.37846505641937256, "grad_norm": 105.36766052246094, "learning_rate": 4.1782614253949255e-07, "logits/chosen": -0.8736593723297119, "logits/rejected": -0.8767552375793457, "logps/chosen": -313.42913818359375, "logps/ref_chosen": -293.4999084472656, "logps/ref_rejected": -266.7116394042969, "logps/rejected": -311.0157470703125, "loss": 4.2172, "margin_dpo/margin_mean": 24.37484359741211, "margin_dpo/margin_std": 34.463653564453125, "step": 163 }, { "epoch": 0.34345549738219894, "fcm_dpo/beta": 0.023631222546100616, "fcm_dpo/delta": -0.005312643945217133, "fcm_dpo/margin": 25.51420021057129, "fcm_dpo/q_t": 0.3786207139492035, "grad_norm": 94.62559509277344, "learning_rate": 4.164647253573289e-07, "logits/chosen": -0.846282422542572, "logits/rejected": -0.8675246238708496, "logps/chosen": -291.2696533203125, "logps/ref_chosen": -267.04949951171875, "logps/ref_rejected": -215.9768829345703, "logps/rejected": -265.7112121582031, "loss": 4.2925, "margin_dpo/margin_mean": 25.51420021057129, "margin_dpo/margin_std": 39.75941467285156, "step": 164 }, { "epoch": 0.34554973821989526, "fcm_dpo/beta": 0.02316068299114704, "fcm_dpo/delta": 0.014690798707306385, "fcm_dpo/margin": 20.29566192626953, "fcm_dpo/q_t": 0.4032779335975647, "grad_norm": 95.55998229980469, "learning_rate": 4.1509438117713863e-07, "logits/chosen": -0.8854783177375793, "logits/rejected": -0.8603523373603821, "logps/chosen": -296.2962341308594, "logps/ref_chosen": -278.06146240234375, "logps/ref_rejected": -260.4288635253906, "logps/rejected": -298.9592590332031, "loss": 4.5279, "margin_dpo/margin_mean": 20.29566192626953, "margin_dpo/margin_std": 35.129371643066406, "step": 165 }, { "epoch": 0.34764397905759165, "fcm_dpo/beta": 0.023668359965085983, "fcm_dpo/delta": 0.0750846192240715, "fcm_dpo/margin": 22.351829528808594, "fcm_dpo/q_t": 0.39492562413215637, "grad_norm": 92.57283020019531, "learning_rate": 4.137151834863213e-07, "logits/chosen": -0.8455550074577332, "logits/rejected": -0.814144492149353, "logps/chosen": -292.3992919921875, "logps/ref_chosen": -275.9490661621094, "logps/ref_rejected": -232.13473510742188, "logps/rejected": -270.93682861328125, "loss": 4.4549, "margin_dpo/margin_mean": 22.35182762145996, "margin_dpo/margin_std": 38.78591537475586, "step": 166 }, { "epoch": 0.34973821989528797, "fcm_dpo/beta": 0.025082817301154137, "fcm_dpo/delta": 0.012834087014198303, "fcm_dpo/margin": 23.36766815185547, "fcm_dpo/q_t": 0.3822540044784546, "grad_norm": 97.74182891845703, "learning_rate": 4.123272062470633e-07, "logits/chosen": -0.8396057486534119, "logits/rejected": -0.8283241987228394, "logps/chosen": -299.5604553222656, "logps/ref_chosen": -280.5514221191406, "logps/ref_rejected": -255.2896728515625, "logps/rejected": -297.6663818359375, "loss": 4.4165, "margin_dpo/margin_mean": 23.367666244506836, "margin_dpo/margin_std": 39.522518157958984, "step": 167 }, { "epoch": 0.3518324607329843, "fcm_dpo/beta": 0.023963892832398415, "fcm_dpo/delta": -0.06409404426813126, "fcm_dpo/margin": 25.364152908325195, "fcm_dpo/q_t": 0.3715497553348541, "grad_norm": 305.63275146484375, "learning_rate": 4.1093052389237174e-07, "logits/chosen": -0.8306468725204468, "logits/rejected": -0.807292103767395, "logps/chosen": -334.9006042480469, "logps/ref_chosen": -315.7982177734375, "logps/ref_rejected": -291.48406982421875, "logps/rejected": -335.95062255859375, "loss": 4.4037, "margin_dpo/margin_mean": 25.364151000976562, "margin_dpo/margin_std": 42.07012176513672, "step": 168 }, { "epoch": 0.3539267015706806, "fcm_dpo/beta": 0.021986354142427444, "fcm_dpo/delta": -0.18093189597129822, "fcm_dpo/margin": 34.84695053100586, "fcm_dpo/q_t": 0.34274956583976746, "grad_norm": 78.17916107177734, "learning_rate": 4.0952521132208267e-07, "logits/chosen": -0.816986620426178, "logits/rejected": -0.8321943879127502, "logps/chosen": -276.01812744140625, "logps/ref_chosen": -261.06427001953125, "logps/ref_rejected": -235.40663146972656, "logps/rejected": -285.20745849609375, "loss": 3.6866, "margin_dpo/margin_mean": 34.84695053100586, "margin_dpo/margin_std": 38.065284729003906, "step": 169 }, { "epoch": 0.35602094240837695, "fcm_dpo/beta": 0.020543169230222702, "fcm_dpo/delta": 0.05482568219304085, "fcm_dpo/margin": 26.701711654663086, "fcm_dpo/q_t": 0.3890666663646698, "grad_norm": 98.05411529541016, "learning_rate": 4.081113438988443e-07, "logits/chosen": -0.7915770411491394, "logits/rejected": -0.7925465106964111, "logps/chosen": -324.87530517578125, "logps/ref_chosen": -308.96722412109375, "logps/ref_rejected": -263.8466796875, "logps/rejected": -306.4564514160156, "loss": 4.4074, "margin_dpo/margin_mean": 26.701711654663086, "margin_dpo/margin_std": 45.96706008911133, "step": 170 }, { "epoch": 0.3581151832460733, "fcm_dpo/beta": 0.02039124257862568, "fcm_dpo/delta": -0.09280530363321304, "fcm_dpo/margin": 30.494121551513672, "fcm_dpo/q_t": 0.36619833111763, "grad_norm": 92.74893188476562, "learning_rate": 4.0668899744407567e-07, "logits/chosen": -0.8212159872055054, "logits/rejected": -0.8347527384757996, "logps/chosen": -270.1197509765625, "logps/ref_chosen": -258.8890380859375, "logps/ref_rejected": -262.19140625, "logps/rejected": -303.916259765625, "loss": 3.9458, "margin_dpo/margin_mean": 30.494121551513672, "margin_dpo/margin_std": 35.400516510009766, "step": 171 }, { "epoch": 0.36020942408376966, "fcm_dpo/beta": 0.021083693951368332, "fcm_dpo/delta": 0.1604629009962082, "fcm_dpo/margin": 16.23406410217285, "fcm_dpo/q_t": 0.425800085067749, "grad_norm": 95.47428131103516, "learning_rate": 4.0525824823390043e-07, "logits/chosen": -0.8353057503700256, "logits/rejected": -0.8544809222221375, "logps/chosen": -352.4789123535156, "logps/ref_chosen": -339.0223388671875, "logps/ref_rejected": -295.78759765625, "logps/rejected": -325.4782409667969, "loss": 4.8806, "margin_dpo/margin_mean": 16.234066009521484, "margin_dpo/margin_std": 37.09178161621094, "step": 172 }, { "epoch": 0.362303664921466, "fcm_dpo/beta": 0.023462966084480286, "fcm_dpo/delta": 0.06941507756710052, "fcm_dpo/margin": 22.576725006103516, "fcm_dpo/q_t": 0.39380908012390137, "grad_norm": 85.06795501708984, "learning_rate": 4.0381917299505686e-07, "logits/chosen": -0.8411876559257507, "logits/rejected": -0.8425909280776978, "logps/chosen": -314.23345947265625, "logps/ref_chosen": -300.1114501953125, "logps/ref_rejected": -273.78460693359375, "logps/rejected": -310.48333740234375, "loss": 4.4598, "margin_dpo/margin_mean": 22.57672119140625, "margin_dpo/margin_std": 38.42784881591797, "step": 173 }, { "epoch": 0.3643979057591623, "fcm_dpo/beta": 0.02340209297835827, "fcm_dpo/delta": -0.04257451742887497, "fcm_dpo/margin": 27.29839324951172, "fcm_dpo/q_t": 0.3668029010295868, "grad_norm": 108.3929443359375, "learning_rate": 4.0237184890078243e-07, "logits/chosen": -0.8130418658256531, "logits/rejected": -0.8016320466995239, "logps/chosen": -348.58929443359375, "logps/ref_chosen": -335.0538635253906, "logps/ref_rejected": -257.4646911621094, "logps/rejected": -298.2984924316406, "loss": 4.0609, "margin_dpo/margin_mean": 27.298397064208984, "margin_dpo/margin_std": 37.006752014160156, "step": 174 }, { "epoch": 0.36649214659685864, "fcm_dpo/beta": 0.02319101057946682, "fcm_dpo/delta": -0.01429109089076519, "fcm_dpo/margin": 26.22753143310547, "fcm_dpo/q_t": 0.38202327489852905, "grad_norm": 106.47049713134766, "learning_rate": 4.00916353566676e-07, "logits/chosen": -0.8251218795776367, "logits/rejected": -0.8282068967819214, "logps/chosen": -304.2876281738281, "logps/ref_chosen": -284.39556884765625, "logps/ref_rejected": -283.3876647949219, "logps/rejected": -329.50726318359375, "loss": 4.3814, "margin_dpo/margin_mean": 26.22753143310547, "margin_dpo/margin_std": 42.50096130371094, "step": 175 }, { "epoch": 0.36858638743455496, "fcm_dpo/beta": 0.023719631135463715, "fcm_dpo/delta": 0.05169348418712616, "fcm_dpo/margin": 20.287446975708008, "fcm_dpo/q_t": 0.4035814702510834, "grad_norm": 94.84113311767578, "learning_rate": 3.994527650465352e-07, "logits/chosen": -0.7882924675941467, "logits/rejected": -0.8019800186157227, "logps/chosen": -271.99951171875, "logps/ref_chosen": -251.81280517578125, "logps/ref_rejected": -242.05328369140625, "logps/rejected": -282.5274658203125, "loss": 4.8493, "margin_dpo/margin_mean": 20.287450790405273, "margin_dpo/margin_std": 43.71425247192383, "step": 176 }, { "epoch": 0.3706806282722513, "fcm_dpo/beta": 0.023168740794062614, "fcm_dpo/delta": -0.04496470466256142, "fcm_dpo/margin": 20.56722640991211, "fcm_dpo/q_t": 0.4018362760543823, "grad_norm": 99.85309600830078, "learning_rate": 3.979811618281705e-07, "logits/chosen": -0.8806796669960022, "logits/rejected": -0.8574539422988892, "logps/chosen": -319.0129699707031, "logps/ref_chosen": -298.6463928222656, "logps/ref_rejected": -295.66534423828125, "logps/rejected": -336.59912109375, "loss": 4.7961, "margin_dpo/margin_mean": 20.56722640991211, "margin_dpo/margin_std": 41.72360610961914, "step": 177 }, { "epoch": 0.37277486910994767, "fcm_dpo/beta": 0.02236098423600197, "fcm_dpo/delta": -0.031636402010917664, "fcm_dpo/margin": 28.116716384887695, "fcm_dpo/q_t": 0.3736070692539215, "grad_norm": 87.69011688232422, "learning_rate": 3.9650162282919654e-07, "logits/chosen": -0.7916255593299866, "logits/rejected": -0.7900866270065308, "logps/chosen": -302.40118408203125, "logps/ref_chosen": -286.2576599121094, "logps/ref_rejected": -243.97491455078125, "logps/rejected": -288.2351379394531, "loss": 4.1353, "margin_dpo/margin_mean": 28.116714477539062, "margin_dpo/margin_std": 40.36370849609375, "step": 178 }, { "epoch": 0.374869109947644, "fcm_dpo/beta": 0.02173008769750595, "fcm_dpo/delta": -0.03953684866428375, "fcm_dpo/margin": 23.7108097076416, "fcm_dpo/q_t": 0.39363473653793335, "grad_norm": 94.55865478515625, "learning_rate": 3.9501422739279953e-07, "logits/chosen": -0.7766736745834351, "logits/rejected": -0.782062292098999, "logps/chosen": -277.46343994140625, "logps/ref_chosen": -259.737060546875, "logps/ref_rejected": -277.8813171386719, "logps/rejected": -319.3185119628906, "loss": 4.5415, "margin_dpo/margin_mean": 23.710805892944336, "margin_dpo/margin_std": 41.660884857177734, "step": 179 }, { "epoch": 0.3769633507853403, "fcm_dpo/beta": 0.021278660744428635, "fcm_dpo/delta": -0.048960644751787186, "fcm_dpo/margin": 27.975608825683594, "fcm_dpo/q_t": 0.379283607006073, "grad_norm": 81.7175521850586, "learning_rate": 3.935190552834828e-07, "logits/chosen": -0.8114000558853149, "logits/rejected": -0.8452147841453552, "logps/chosen": -285.2571105957031, "logps/ref_chosen": -267.30889892578125, "logps/ref_rejected": -230.4376983642578, "logps/rejected": -276.3615417480469, "loss": 4.1923, "margin_dpo/margin_mean": 27.975608825683594, "margin_dpo/margin_std": 40.780189514160156, "step": 180 }, { "epoch": 0.37905759162303665, "fcm_dpo/beta": 0.02138346992433071, "fcm_dpo/delta": 0.08889839053153992, "fcm_dpo/margin": 24.11235809326172, "fcm_dpo/q_t": 0.39553123712539673, "grad_norm": 104.4340591430664, "learning_rate": 3.920161866827889e-07, "logits/chosen": -0.8049870729446411, "logits/rejected": -0.8164405822753906, "logps/chosen": -321.71044921875, "logps/ref_chosen": -300.49139404296875, "logps/ref_rejected": -278.98284912109375, "logps/rejected": -324.31427001953125, "loss": 4.5755, "margin_dpo/margin_mean": 24.112356185913086, "margin_dpo/margin_std": 44.64960479736328, "step": 181 }, { "epoch": 0.381151832460733, "fcm_dpo/beta": 0.021802516654133797, "fcm_dpo/delta": -0.12682966887950897, "fcm_dpo/margin": 33.0461540222168, "fcm_dpo/q_t": 0.35091304779052734, "grad_norm": 94.29923248291016, "learning_rate": 3.90505702185e-07, "logits/chosen": -0.7876582741737366, "logits/rejected": -0.8228734731674194, "logps/chosen": -297.51129150390625, "logps/ref_chosen": -279.4981689453125, "logps/ref_rejected": -263.6926574707031, "logps/rejected": -314.751953125, "loss": 3.8418, "margin_dpo/margin_mean": 33.0461540222168, "margin_dpo/margin_std": 39.205291748046875, "step": 182 }, { "epoch": 0.3832460732984293, "fcm_dpo/beta": 0.02001192420721054, "fcm_dpo/delta": 0.017274007201194763, "fcm_dpo/margin": 29.154460906982422, "fcm_dpo/q_t": 0.38196709752082825, "grad_norm": 85.02561950683594, "learning_rate": 3.889876827928156e-07, "logits/chosen": -0.8367516398429871, "logits/rejected": -0.8470520973205566, "logps/chosen": -289.88006591796875, "logps/ref_chosen": -271.2057189941406, "logps/ref_rejected": -243.91549682617188, "logps/rejected": -291.7442932128906, "loss": 4.277, "margin_dpo/margin_mean": 29.154460906982422, "margin_dpo/margin_std": 45.45560836791992, "step": 183 }, { "epoch": 0.38534031413612563, "fcm_dpo/beta": 0.018630409613251686, "fcm_dpo/delta": -0.1170383170247078, "fcm_dpo/margin": 37.69718551635742, "fcm_dpo/q_t": 0.35355544090270996, "grad_norm": 91.68877410888672, "learning_rate": 3.874622099130087e-07, "logits/chosen": -0.8675556182861328, "logits/rejected": -0.8580695390701294, "logps/chosen": -331.67840576171875, "logps/ref_chosen": -318.4457702636719, "logps/ref_rejected": -266.640869140625, "logps/rejected": -317.5706787109375, "loss": 3.8938, "margin_dpo/margin_mean": 37.69718933105469, "margin_dpo/margin_std": 46.49965286254883, "step": 184 }, { "epoch": 0.387434554973822, "fcm_dpo/beta": 0.018275652080774307, "fcm_dpo/delta": -0.017188355326652527, "fcm_dpo/margin": 31.037744522094727, "fcm_dpo/q_t": 0.38223421573638916, "grad_norm": 80.14971923828125, "learning_rate": 3.859293653520604e-07, "logits/chosen": -0.8481187224388123, "logits/rejected": -0.8484024405479431, "logps/chosen": -296.3069763183594, "logps/ref_chosen": -274.308837890625, "logps/ref_rejected": -260.7274169921875, "logps/rejected": -313.7633056640625, "loss": 4.2285, "margin_dpo/margin_mean": 31.037744522094727, "margin_dpo/margin_std": 45.34954071044922, "step": 185 }, { "epoch": 0.38952879581151834, "fcm_dpo/beta": 0.01840740442276001, "fcm_dpo/delta": 0.0072084227576851845, "fcm_dpo/margin": 29.299354553222656, "fcm_dpo/q_t": 0.3844219148159027, "grad_norm": 81.5396499633789, "learning_rate": 3.8438923131177237e-07, "logits/chosen": -0.8498209714889526, "logits/rejected": -0.8591220378875732, "logps/chosen": -321.2626953125, "logps/ref_chosen": -299.00537109375, "logps/ref_rejected": -274.4014587402344, "logps/rejected": -325.9581298828125, "loss": 4.2641, "margin_dpo/margin_mean": 29.29935646057129, "margin_dpo/margin_std": 41.73757553100586, "step": 186 }, { "epoch": 0.39162303664921466, "fcm_dpo/beta": 0.01949266903102398, "fcm_dpo/delta": 0.11426316946744919, "fcm_dpo/margin": 25.147544860839844, "fcm_dpo/q_t": 0.3968276381492615, "grad_norm": 109.50704193115234, "learning_rate": 3.828418903848593e-07, "logits/chosen": -0.8067930936813354, "logits/rejected": -0.801750898361206, "logps/chosen": -356.03204345703125, "logps/ref_chosen": -329.8253173828125, "logps/ref_rejected": -263.73175048828125, "logps/rejected": -315.0860595703125, "loss": 4.6663, "margin_dpo/margin_mean": 25.14754295349121, "margin_dpo/margin_std": 48.58820343017578, "step": 187 }, { "epoch": 0.393717277486911, "fcm_dpo/beta": 0.019669629633426666, "fcm_dpo/delta": -0.031924083828926086, "fcm_dpo/margin": 29.831005096435547, "fcm_dpo/q_t": 0.3818528950214386, "grad_norm": 85.78080749511719, "learning_rate": 3.812874255505191e-07, "logits/chosen": -0.8346595168113708, "logits/rejected": -0.8323647975921631, "logps/chosen": -289.03765869140625, "logps/ref_chosen": -263.005615234375, "logps/ref_rejected": -247.08668518066406, "logps/rejected": -302.9496765136719, "loss": 4.4437, "margin_dpo/margin_mean": 29.831003189086914, "margin_dpo/margin_std": 50.36288070678711, "step": 188 }, { "epoch": 0.3958115183246073, "fcm_dpo/beta": 0.01861950382590294, "fcm_dpo/delta": -0.0650758147239685, "fcm_dpo/margin": 35.26509475708008, "fcm_dpo/q_t": 0.36197108030319214, "grad_norm": 82.21233367919922, "learning_rate": 3.797259201699833e-07, "logits/chosen": -0.8459858298301697, "logits/rejected": -0.8555557131767273, "logps/chosen": -291.046142578125, "logps/ref_chosen": -272.96038818359375, "logps/ref_rejected": -275.13238525390625, "logps/rejected": -328.4832458496094, "loss": 3.8948, "margin_dpo/margin_mean": 35.26509475708008, "margin_dpo/margin_std": 41.49103546142578, "step": 189 }, { "epoch": 0.39790575916230364, "fcm_dpo/beta": 0.018630830571055412, "fcm_dpo/delta": 0.004260986112058163, "fcm_dpo/margin": 31.947521209716797, "fcm_dpo/q_t": 0.3738987445831299, "grad_norm": 85.7725830078125, "learning_rate": 3.781574579820464e-07, "logits/chosen": -0.8605042099952698, "logits/rejected": -0.8269729614257812, "logps/chosen": -275.54925537109375, "logps/ref_chosen": -257.79754638671875, "logps/ref_rejected": -225.2164306640625, "logps/rejected": -274.9156494140625, "loss": 4.0818, "margin_dpo/margin_mean": 31.94751739501953, "margin_dpo/margin_std": 42.33838653564453, "step": 190 }, { "epoch": 0.4, "fcm_dpo/beta": 0.01885531283915043, "fcm_dpo/delta": 0.012270934879779816, "fcm_dpo/margin": 31.156997680664062, "fcm_dpo/q_t": 0.3792232871055603, "grad_norm": 93.37854766845703, "learning_rate": 3.765821230985757e-07, "logits/chosen": -0.8681455850601196, "logits/rejected": -0.8712099194526672, "logps/chosen": -260.21612548828125, "logps/ref_chosen": -243.8585205078125, "logps/ref_rejected": -245.12136840820312, "logps/rejected": -292.6359558105469, "loss": 4.2407, "margin_dpo/margin_mean": 31.156997680664062, "margin_dpo/margin_std": 46.79732131958008, "step": 191 }, { "epoch": 0.40209424083769635, "fcm_dpo/beta": 0.01894964464008808, "fcm_dpo/delta": 0.001287955790758133, "fcm_dpo/margin": 25.870622634887695, "fcm_dpo/q_t": 0.39755818247795105, "grad_norm": 83.08194732666016, "learning_rate": 3.75e-07, "logits/chosen": -0.8269639015197754, "logits/rejected": -0.8149864077568054, "logps/chosen": -289.5196228027344, "logps/ref_chosen": -266.9799499511719, "logps/ref_rejected": -260.1697082519531, "logps/rejected": -308.58001708984375, "loss": 4.5557, "margin_dpo/margin_mean": 25.870622634887695, "margin_dpo/margin_std": 45.57743835449219, "step": 192 }, { "epoch": 0.4041884816753927, "fcm_dpo/beta": 0.018430905416607857, "fcm_dpo/delta": -0.02623889222741127, "fcm_dpo/margin": 30.82439422607422, "fcm_dpo/q_t": 0.38306957483291626, "grad_norm": 91.62554931640625, "learning_rate": 3.734111735307796e-07, "logits/chosen": -0.8791731595993042, "logits/rejected": -0.8559356927871704, "logps/chosen": -308.2332763671875, "logps/ref_chosen": -280.25323486328125, "logps/ref_rejected": -291.0348815917969, "logps/rejected": -349.8393249511719, "loss": 4.3166, "margin_dpo/margin_mean": 30.824386596679688, "margin_dpo/margin_std": 48.09514236450195, "step": 193 }, { "epoch": 0.406282722513089, "fcm_dpo/beta": 0.01942608132958412, "fcm_dpo/delta": 0.08116672188043594, "fcm_dpo/margin": 23.930679321289062, "fcm_dpo/q_t": 0.4047344923019409, "grad_norm": 118.4610824584961, "learning_rate": 3.7181572889485623e-07, "logits/chosen": -0.8472974896430969, "logits/rejected": -0.8410882949829102, "logps/chosen": -317.900146484375, "logps/ref_chosen": -288.13946533203125, "logps/ref_rejected": -251.31529235839844, "logps/rejected": -305.00665283203125, "loss": 4.5406, "margin_dpo/margin_mean": 23.930675506591797, "margin_dpo/margin_std": 42.646785736083984, "step": 194 }, { "epoch": 0.4083769633507853, "fcm_dpo/beta": 0.02106415294110775, "fcm_dpo/delta": 0.09580697119235992, "fcm_dpo/margin": 21.20762062072754, "fcm_dpo/q_t": 0.412017285823822, "grad_norm": 105.6582260131836, "learning_rate": 3.7021375165108377e-07, "logits/chosen": -0.8573026657104492, "logits/rejected": -0.8636154532432556, "logps/chosen": -305.279541015625, "logps/ref_chosen": -274.0006408691406, "logps/ref_rejected": -280.22723388671875, "logps/rejected": -332.7137451171875, "loss": 4.6593, "margin_dpo/margin_mean": 21.20762062072754, "margin_dpo/margin_std": 41.330299377441406, "step": 195 }, { "epoch": 0.41047120418848165, "fcm_dpo/beta": 0.021116994321346283, "fcm_dpo/delta": -0.03291000798344612, "fcm_dpo/margin": 29.783409118652344, "fcm_dpo/q_t": 0.374653160572052, "grad_norm": 108.43070983886719, "learning_rate": 3.6860532770864005e-07, "logits/chosen": -0.8379297256469727, "logits/rejected": -0.8495330810546875, "logps/chosen": -297.95947265625, "logps/ref_chosen": -274.90069580078125, "logps/ref_rejected": -248.7281951904297, "logps/rejected": -301.57037353515625, "loss": 4.2422, "margin_dpo/margin_mean": 29.783409118652344, "margin_dpo/margin_std": 46.15776062011719, "step": 196 }, { "epoch": 0.41256544502617803, "fcm_dpo/beta": 0.020392950624227524, "fcm_dpo/delta": -0.10839693248271942, "fcm_dpo/margin": 34.37493896484375, "fcm_dpo/q_t": 0.3563109040260315, "grad_norm": 117.64871215820312, "learning_rate": 3.6699054332241985e-07, "logits/chosen": -0.8725043535232544, "logits/rejected": -0.8630834817886353, "logps/chosen": -335.2013854980469, "logps/ref_chosen": -309.5348205566406, "logps/ref_rejected": -264.3179931640625, "logps/rejected": -324.3594970703125, "loss": 3.9499, "margin_dpo/margin_mean": 34.37493896484375, "margin_dpo/margin_std": 43.26872634887695, "step": 197 }, { "epoch": 0.41465968586387436, "fcm_dpo/beta": 0.01880509965121746, "fcm_dpo/delta": -0.018900295719504356, "fcm_dpo/margin": 32.761688232421875, "fcm_dpo/q_t": 0.37768036127090454, "grad_norm": 100.41459655761719, "learning_rate": 3.653694850884091e-07, "logits/chosen": -0.8667469024658203, "logits/rejected": -0.845592200756073, "logps/chosen": -325.9341125488281, "logps/ref_chosen": -301.0134582519531, "logps/ref_rejected": -292.84185791015625, "logps/rejected": -350.5242004394531, "loss": 4.3054, "margin_dpo/margin_mean": 32.761688232421875, "margin_dpo/margin_std": 52.25929260253906, "step": 198 }, { "epoch": 0.4167539267015707, "fcm_dpo/beta": 0.01867169514298439, "fcm_dpo/delta": -0.039132870733737946, "fcm_dpo/margin": 31.537443161010742, "fcm_dpo/q_t": 0.37857967615127563, "grad_norm": 91.19898986816406, "learning_rate": 3.6374223993904124e-07, "logits/chosen": -0.8458956480026245, "logits/rejected": -0.8102747797966003, "logps/chosen": -290.2001647949219, "logps/ref_chosen": -264.6058654785156, "logps/ref_rejected": -214.9014892578125, "logps/rejected": -272.033203125, "loss": 4.1773, "margin_dpo/margin_mean": 31.53744125366211, "margin_dpo/margin_std": 45.666507720947266, "step": 199 }, { "epoch": 0.418848167539267, "fcm_dpo/beta": 0.018353408202528954, "fcm_dpo/delta": 0.041134800761938095, "fcm_dpo/margin": 28.204835891723633, "fcm_dpo/q_t": 0.39774975180625916, "grad_norm": 105.55087280273438, "learning_rate": 3.621088951385353e-07, "logits/chosen": -0.9000794887542725, "logits/rejected": -0.8824851512908936, "logps/chosen": -351.81829833984375, "logps/ref_chosen": -324.1588134765625, "logps/ref_rejected": -277.80218505859375, "logps/rejected": -333.6665344238281, "loss": 4.6147, "margin_dpo/margin_mean": 28.204835891723633, "margin_dpo/margin_std": 53.386009216308594, "step": 200 }, { "epoch": 0.418848167539267, "eval_fcm_dpo/beta": 0.01886005327105522, "eval_logits/chosen": -0.868439257144928, "eval_logits/rejected": -0.8583438396453857, "eval_logps/chosen": -320.2021179199219, "eval_logps/ref_chosen": -287.8268127441406, "eval_logps/ref_rejected": -266.9300231933594, "eval_logps/rejected": -328.9063720703125, "eval_loss": 0.5499768257141113, "eval_margin_dpo/margin_mean": 29.600982666015625, "eval_margin_dpo/margin_std": 45.74766159057617, "eval_runtime": 78.7653, "eval_samples_per_second": 25.392, "eval_steps_per_second": 3.174, "step": 200 }, { "epoch": 0.42094240837696334, "fcm_dpo/beta": 0.019238265231251717, "fcm_dpo/delta": -0.009198937565088272, "fcm_dpo/margin": 31.507125854492188, "fcm_dpo/q_t": 0.3743167221546173, "grad_norm": 97.80419158935547, "learning_rate": 3.604695382782159e-07, "logits/chosen": -0.8685228824615479, "logits/rejected": -0.8634480237960815, "logps/chosen": -303.941650390625, "logps/ref_chosen": -271.49566650390625, "logps/ref_rejected": -245.71414184570312, "logps/rejected": -309.6672058105469, "loss": 4.2189, "margin_dpo/margin_mean": 31.50712776184082, "margin_dpo/margin_std": 46.31949996948242, "step": 201 }, { "epoch": 0.42303664921465967, "fcm_dpo/beta": 0.018488463014364243, "fcm_dpo/delta": -0.04204194247722626, "fcm_dpo/margin": 31.415302276611328, "fcm_dpo/q_t": 0.3799372613430023, "grad_norm": 100.98213195800781, "learning_rate": 3.588242572718162e-07, "logits/chosen": -0.8742230534553528, "logits/rejected": -0.8673840165138245, "logps/chosen": -303.4588317871094, "logps/ref_chosen": -272.0979309082031, "logps/ref_rejected": -235.94805908203125, "logps/rejected": -298.7242431640625, "loss": 4.3376, "margin_dpo/margin_mean": 31.415302276611328, "margin_dpo/margin_std": 48.84147644042969, "step": 202 }, { "epoch": 0.42513089005235605, "fcm_dpo/beta": 0.018687793985009193, "fcm_dpo/delta": 0.0978153869509697, "fcm_dpo/margin": 24.22014808654785, "fcm_dpo/q_t": 0.4048110246658325, "grad_norm": 104.75067138671875, "learning_rate": 3.571731403507635e-07, "logits/chosen": -0.8457682132720947, "logits/rejected": -0.857460618019104, "logps/chosen": -317.5693664550781, "logps/ref_chosen": -280.2221374511719, "logps/ref_rejected": -251.79798889160156, "logps/rejected": -313.36541748046875, "loss": 4.5713, "margin_dpo/margin_mean": 24.22014808654785, "margin_dpo/margin_std": 43.69092559814453, "step": 203 }, { "epoch": 0.4272251308900524, "fcm_dpo/beta": 0.018568500876426697, "fcm_dpo/delta": -0.08285186439752579, "fcm_dpo/margin": 36.37318420410156, "fcm_dpo/q_t": 0.3603626787662506, "grad_norm": 101.34950256347656, "learning_rate": 3.5551627605944746e-07, "logits/chosen": -0.8969916701316833, "logits/rejected": -0.8768599033355713, "logps/chosen": -347.6547546386719, "logps/ref_chosen": -318.7960510253906, "logps/ref_rejected": -269.69921875, "logps/rejected": -334.93109130859375, "loss": 3.9315, "margin_dpo/margin_mean": 36.37318420410156, "margin_dpo/margin_std": 46.72166061401367, "step": 204 }, { "epoch": 0.4293193717277487, "fcm_dpo/beta": 0.017940927296876907, "fcm_dpo/delta": -0.05948423594236374, "fcm_dpo/margin": 36.4859733581543, "fcm_dpo/q_t": 0.36764687299728394, "grad_norm": 90.59722900390625, "learning_rate": 3.5385375325047163e-07, "logits/chosen": -0.8392518162727356, "logits/rejected": -0.8088283538818359, "logps/chosen": -315.66253662109375, "logps/ref_chosen": -283.7620544433594, "logps/ref_rejected": -297.69439697265625, "logps/rejected": -366.0808410644531, "loss": 4.0268, "margin_dpo/margin_mean": 36.4859733581543, "margin_dpo/margin_std": 49.50936508178711, "step": 205 }, { "epoch": 0.431413612565445, "fcm_dpo/beta": 0.017662961035966873, "fcm_dpo/delta": 0.010416839271783829, "fcm_dpo/margin": 30.46101188659668, "fcm_dpo/q_t": 0.3891698122024536, "grad_norm": 100.13421630859375, "learning_rate": 3.5218566107988867e-07, "logits/chosen": -0.8697702884674072, "logits/rejected": -0.8934507369995117, "logps/chosen": -329.244873046875, "logps/ref_chosen": -293.66387939453125, "logps/ref_rejected": -291.3056640625, "logps/rejected": -357.34771728515625, "loss": 4.4422, "margin_dpo/margin_mean": 30.461013793945312, "margin_dpo/margin_std": 50.47527313232422, "step": 206 }, { "epoch": 0.43350785340314135, "fcm_dpo/beta": 0.01789412647485733, "fcm_dpo/delta": 0.018324781209230423, "fcm_dpo/margin": 29.37425994873047, "fcm_dpo/q_t": 0.3917839229106903, "grad_norm": 103.07648468017578, "learning_rate": 3.505120890024195e-07, "logits/chosen": -0.8186078667640686, "logits/rejected": -0.8274865746498108, "logps/chosen": -302.8357849121094, "logps/ref_chosen": -270.5350646972656, "logps/ref_rejected": -278.7747497558594, "logps/rejected": -340.4497375488281, "loss": 4.6022, "margin_dpo/margin_mean": 29.3742618560791, "margin_dpo/margin_std": 55.54574966430664, "step": 207 }, { "epoch": 0.4356020942408377, "fcm_dpo/beta": 0.017709776759147644, "fcm_dpo/delta": -0.04383649304509163, "fcm_dpo/margin": 36.105621337890625, "fcm_dpo/q_t": 0.3701040744781494, "grad_norm": 86.0116958618164, "learning_rate": 3.4883312676665534e-07, "logits/chosen": -0.8696956038475037, "logits/rejected": -0.8232940435409546, "logps/chosen": -315.8093566894531, "logps/ref_chosen": -279.582763671875, "logps/ref_rejected": -290.041015625, "logps/rejected": -362.3731994628906, "loss": 4.0999, "margin_dpo/margin_mean": 36.105621337890625, "margin_dpo/margin_std": 51.143226623535156, "step": 208 }, { "epoch": 0.437696335078534, "fcm_dpo/beta": 0.01805291511118412, "fcm_dpo/delta": 0.0809149444103241, "fcm_dpo/margin": 26.21035385131836, "fcm_dpo/q_t": 0.40189653635025024, "grad_norm": 109.74065399169922, "learning_rate": 3.4714886441024573e-07, "logits/chosen": -0.7950612902641296, "logits/rejected": -0.7992856502532959, "logps/chosen": -359.4092102050781, "logps/ref_chosen": -318.8725280761719, "logps/ref_rejected": -270.64324951171875, "logps/rejected": -337.39031982421875, "loss": 4.7203, "margin_dpo/margin_mean": 26.210355758666992, "margin_dpo/margin_std": 53.022239685058594, "step": 209 }, { "epoch": 0.4397905759162304, "fcm_dpo/beta": 0.0181864183396101, "fcm_dpo/delta": 0.0194247979670763, "fcm_dpo/margin": 31.902477264404297, "fcm_dpo/q_t": 0.38086259365081787, "grad_norm": 107.53482055664062, "learning_rate": 3.454593922550693e-07, "logits/chosen": -0.8214377164840698, "logits/rejected": -0.8090283870697021, "logps/chosen": -318.75921630859375, "logps/ref_chosen": -283.14031982421875, "logps/ref_rejected": -287.2986755371094, "logps/rejected": -354.820068359375, "loss": 4.317, "margin_dpo/margin_mean": 31.902477264404297, "margin_dpo/margin_std": 50.24505615234375, "step": 210 }, { "epoch": 0.4418848167539267, "fcm_dpo/beta": 0.017495566979050636, "fcm_dpo/delta": -0.13013754785060883, "fcm_dpo/margin": 40.89909362792969, "fcm_dpo/q_t": 0.34772253036499023, "grad_norm": 85.72150421142578, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -0.8469374775886536, "logits/rejected": -0.8271616101264954, "logps/chosen": -309.1142578125, "logps/ref_chosen": -276.4228515625, "logps/ref_rejected": -252.40603637695312, "logps/rejected": -325.9964599609375, "loss": 3.7482, "margin_dpo/margin_mean": 40.89909744262695, "margin_dpo/margin_std": 43.54120635986328, "step": 211 }, { "epoch": 0.44397905759162304, "fcm_dpo/beta": 0.01705530472099781, "fcm_dpo/delta": 0.0323108546435833, "fcm_dpo/margin": 28.951906204223633, "fcm_dpo/q_t": 0.3937266170978546, "grad_norm": 93.46395111083984, "learning_rate": 3.4206518122800055e-07, "logits/chosen": -0.8298263549804688, "logits/rejected": -0.8326829075813293, "logps/chosen": -307.32464599609375, "logps/ref_chosen": -271.7055358886719, "logps/ref_rejected": -241.18511962890625, "logps/rejected": -305.7561950683594, "loss": 4.4735, "margin_dpo/margin_mean": 28.951906204223633, "margin_dpo/margin_std": 47.416404724121094, "step": 212 }, { "epoch": 0.44607329842931936, "fcm_dpo/beta": 0.017670337110757828, "fcm_dpo/delta": 0.036003537476062775, "fcm_dpo/margin": 29.480064392089844, "fcm_dpo/q_t": 0.39773082733154297, "grad_norm": 100.81073760986328, "learning_rate": 3.403606243773448e-07, "logits/chosen": -0.8215805292129517, "logits/rejected": -0.8386380672454834, "logps/chosen": -339.76416015625, "logps/ref_chosen": -302.2976379394531, "logps/ref_rejected": -303.6202087402344, "logps/rejected": -370.5667724609375, "loss": 4.5047, "margin_dpo/margin_mean": 29.48006248474121, "margin_dpo/margin_std": 53.4147834777832, "step": 213 }, { "epoch": 0.4481675392670157, "fcm_dpo/beta": 0.017651241272687912, "fcm_dpo/delta": -0.009320348501205444, "fcm_dpo/margin": 34.294586181640625, "fcm_dpo/q_t": 0.37349388003349304, "grad_norm": 111.82011413574219, "learning_rate": 3.3865122176063385e-07, "logits/chosen": -0.8299503326416016, "logits/rejected": -0.8320922255516052, "logps/chosen": -318.2923278808594, "logps/ref_chosen": -272.13262939453125, "logps/ref_rejected": -294.82354736328125, "logps/rejected": -375.2778015136719, "loss": 4.0849, "margin_dpo/margin_mean": 34.294586181640625, "margin_dpo/margin_std": 44.05116271972656, "step": 214 }, { "epoch": 0.450261780104712, "fcm_dpo/beta": 0.017613038420677185, "fcm_dpo/delta": 0.010489102452993393, "fcm_dpo/margin": 26.734289169311523, "fcm_dpo/q_t": 0.40911558270454407, "grad_norm": 102.18154907226562, "learning_rate": 3.3693706504794243e-07, "logits/chosen": -0.8729708194732666, "logits/rejected": -0.8599320650100708, "logps/chosen": -334.3503723144531, "logps/ref_chosen": -291.3782958984375, "logps/ref_rejected": -261.05792236328125, "logps/rejected": -330.7642822265625, "loss": 4.6895, "margin_dpo/margin_mean": 26.734289169311523, "margin_dpo/margin_std": 53.317264556884766, "step": 215 }, { "epoch": 0.4523560209424084, "fcm_dpo/beta": 0.01718178391456604, "fcm_dpo/delta": -0.005167707800865173, "fcm_dpo/margin": 35.105499267578125, "fcm_dpo/q_t": 0.37694042921066284, "grad_norm": 94.87683868408203, "learning_rate": 3.3521824616429284e-07, "logits/chosen": -0.8911623358726501, "logits/rejected": -0.8866602182388306, "logps/chosen": -374.8359375, "logps/ref_chosen": -338.50543212890625, "logps/ref_rejected": -305.76104736328125, "logps/rejected": -377.197021484375, "loss": 4.2912, "margin_dpo/margin_mean": 35.105499267578125, "margin_dpo/margin_std": 54.505611419677734, "step": 216 }, { "epoch": 0.4544502617801047, "fcm_dpo/beta": 0.01670690067112446, "fcm_dpo/delta": -0.13507547974586487, "fcm_dpo/margin": 43.35816955566406, "fcm_dpo/q_t": 0.35271987318992615, "grad_norm": 86.05104064941406, "learning_rate": 3.334948572847253e-07, "logits/chosen": -0.787706732749939, "logits/rejected": -0.7581799626350403, "logps/chosen": -331.9385986328125, "logps/ref_chosen": -293.5498046875, "logps/ref_rejected": -256.7830810546875, "logps/rejected": -338.530029296875, "loss": 3.9209, "margin_dpo/margin_mean": 43.35816955566406, "margin_dpo/margin_std": 55.72812271118164, "step": 217 }, { "epoch": 0.45654450261780105, "fcm_dpo/beta": 0.015875810757279396, "fcm_dpo/delta": 0.0009746733121573925, "fcm_dpo/margin": 37.655181884765625, "fcm_dpo/q_t": 0.37353405356407166, "grad_norm": 89.3914566040039, "learning_rate": 3.317669908293554e-07, "logits/chosen": -0.8176430463790894, "logits/rejected": -0.840962290763855, "logps/chosen": -357.17340087890625, "logps/ref_chosen": -320.579345703125, "logps/ref_rejected": -294.0381164550781, "logps/rejected": -368.2873840332031, "loss": 4.0903, "margin_dpo/margin_mean": 37.65517807006836, "margin_dpo/margin_std": 50.91783905029297, "step": 218 }, { "epoch": 0.4586387434554974, "fcm_dpo/beta": 0.01585143618285656, "fcm_dpo/delta": -0.027818219736218452, "fcm_dpo/margin": 39.36839294433594, "fcm_dpo/q_t": 0.3697540760040283, "grad_norm": 87.21080780029297, "learning_rate": 3.300347394584172e-07, "logits/chosen": -0.8196850419044495, "logits/rejected": -0.8459032773971558, "logps/chosen": -300.89532470703125, "logps/ref_chosen": -268.4186096191406, "logps/ref_rejected": -265.7808837890625, "logps/rejected": -337.62591552734375, "loss": 4.1119, "margin_dpo/margin_mean": 39.3683967590332, "margin_dpo/margin_std": 54.10658264160156, "step": 219 }, { "epoch": 0.4607329842931937, "fcm_dpo/beta": 0.015508392825722694, "fcm_dpo/delta": 0.006693243980407715, "fcm_dpo/margin": 38.219276428222656, "fcm_dpo/q_t": 0.37400129437446594, "grad_norm": 84.04236602783203, "learning_rate": 3.2829819606729477e-07, "logits/chosen": -0.8543677926063538, "logits/rejected": -0.8366027474403381, "logps/chosen": -346.4298400878906, "logps/ref_chosen": -312.8864440917969, "logps/ref_rejected": -259.5191955566406, "logps/rejected": -331.2818298339844, "loss": 4.1852, "margin_dpo/margin_mean": 38.219276428222656, "margin_dpo/margin_std": 54.659847259521484, "step": 220 }, { "epoch": 0.46282722513089003, "fcm_dpo/beta": 0.01612645946443081, "fcm_dpo/delta": 0.004002414643764496, "fcm_dpo/margin": 30.247737884521484, "fcm_dpo/q_t": 0.40294140577316284, "grad_norm": 90.31486511230469, "learning_rate": 3.265574537815398e-07, "logits/chosen": -0.7849829792976379, "logits/rejected": -0.7987595200538635, "logps/chosen": -336.2065734863281, "logps/ref_chosen": -300.32586669921875, "logps/ref_rejected": -286.312255859375, "logps/rejected": -352.44073486328125, "loss": 4.5734, "margin_dpo/margin_mean": 30.247739791870117, "margin_dpo/margin_std": 55.0887336730957, "step": 221 }, { "epoch": 0.4649214659685864, "fcm_dpo/beta": 0.01531196478754282, "fcm_dpo/delta": -0.010990019887685776, "fcm_dpo/margin": 36.88833999633789, "fcm_dpo/q_t": 0.3805161714553833, "grad_norm": 96.45641326904297, "learning_rate": 3.248126059518784e-07, "logits/chosen": -0.8649472594261169, "logits/rejected": -0.8534566760063171, "logps/chosen": -329.9435119628906, "logps/ref_chosen": -297.1113586425781, "logps/ref_rejected": -235.53146362304688, "logps/rejected": -305.251953125, "loss": 4.1935, "margin_dpo/margin_mean": 36.88833999633789, "margin_dpo/margin_std": 50.991146087646484, "step": 222 }, { "epoch": 0.46701570680628274, "fcm_dpo/beta": 0.015516339801251888, "fcm_dpo/delta": -0.006761605851352215, "fcm_dpo/margin": 38.97541046142578, "fcm_dpo/q_t": 0.3734777569770813, "grad_norm": 79.94070434570312, "learning_rate": 3.230637461492043e-07, "logits/chosen": -0.8134406805038452, "logits/rejected": -0.7874367833137512, "logps/chosen": -322.5858459472656, "logps/ref_chosen": -286.41510009765625, "logps/ref_rejected": -241.1181640625, "logps/rejected": -316.2643127441406, "loss": 4.138, "margin_dpo/margin_mean": 38.97541427612305, "margin_dpo/margin_std": 54.24036407470703, "step": 223 }, { "epoch": 0.46910994764397906, "fcm_dpo/beta": 0.015485338866710663, "fcm_dpo/delta": -0.04357857629656792, "fcm_dpo/margin": 41.29396438598633, "fcm_dpo/q_t": 0.36654770374298096, "grad_norm": 85.50264739990234, "learning_rate": 3.213109681595612e-07, "logits/chosen": -0.780707597732544, "logits/rejected": -0.7996824979782104, "logps/chosen": -283.2223205566406, "logps/ref_chosen": -249.49234008789062, "logps/ref_rejected": -233.10752868652344, "logps/rejected": -308.1314697265625, "loss": 3.9722, "margin_dpo/margin_mean": 41.29396438598633, "margin_dpo/margin_std": 51.46320343017578, "step": 224 }, { "epoch": 0.4712041884816754, "fcm_dpo/beta": 0.014851980842649937, "fcm_dpo/delta": 0.04186537116765976, "fcm_dpo/margin": 37.72064208984375, "fcm_dpo/q_t": 0.38652801513671875, "grad_norm": 97.50272369384766, "learning_rate": 3.1955436597911315e-07, "logits/chosen": -0.8135025501251221, "logits/rejected": -0.7935299873352051, "logps/chosen": -354.3567810058594, "logps/ref_chosen": -311.8583679199219, "logps/ref_rejected": -336.8523864746094, "logps/rejected": -417.0714111328125, "loss": 4.3211, "margin_dpo/margin_mean": 37.720645904541016, "margin_dpo/margin_std": 58.61198043823242, "step": 225 }, { "epoch": 0.4732984293193717, "fcm_dpo/beta": 0.015629008412361145, "fcm_dpo/delta": 0.020238326862454414, "fcm_dpo/margin": 33.9615592956543, "fcm_dpo/q_t": 0.3907637596130371, "grad_norm": 85.4857406616211, "learning_rate": 3.1779403380910425e-07, "logits/chosen": -0.8504273891448975, "logits/rejected": -0.8434605598449707, "logps/chosen": -291.4212951660156, "logps/ref_chosen": -252.20123291015625, "logps/ref_rejected": -254.41162109375, "logps/rejected": -327.5932312011719, "loss": 4.3859, "margin_dpo/margin_mean": 33.96156311035156, "margin_dpo/margin_std": 56.106605529785156, "step": 226 }, { "epoch": 0.47539267015706804, "fcm_dpo/beta": 0.015467462129890919, "fcm_dpo/delta": -0.03860139474272728, "fcm_dpo/margin": 41.034793853759766, "fcm_dpo/q_t": 0.369282603263855, "grad_norm": 102.45677947998047, "learning_rate": 3.160300660508064e-07, "logits/chosen": -0.8150337934494019, "logits/rejected": -0.8127814531326294, "logps/chosen": -325.2018737792969, "logps/ref_chosen": -285.25946044921875, "logps/ref_rejected": -261.3220520019531, "logps/rejected": -342.29925537109375, "loss": 4.2129, "margin_dpo/margin_mean": 41.034793853759766, "margin_dpo/margin_std": 60.92158508300781, "step": 227 }, { "epoch": 0.4774869109947644, "fcm_dpo/beta": 0.01522951852530241, "fcm_dpo/delta": -0.043590422719717026, "fcm_dpo/margin": 41.9859733581543, "fcm_dpo/q_t": 0.3681778311729431, "grad_norm": 91.48838806152344, "learning_rate": 3.1426255730045695e-07, "logits/chosen": -0.8238086700439453, "logits/rejected": -0.7931742072105408, "logps/chosen": -348.5426330566406, "logps/ref_chosen": -313.81878662109375, "logps/ref_rejected": -258.07061767578125, "logps/rejected": -334.7804260253906, "loss": 4.0378, "margin_dpo/margin_mean": 41.98596954345703, "margin_dpo/margin_std": 55.18696594238281, "step": 228 }, { "epoch": 0.47958115183246075, "fcm_dpo/beta": 0.014314261265099049, "fcm_dpo/delta": -0.07704558223485947, "fcm_dpo/margin": 46.78490447998047, "fcm_dpo/q_t": 0.3585876524448395, "grad_norm": 174.31724548339844, "learning_rate": 3.1249160234418644e-07, "logits/chosen": -0.7998561263084412, "logits/rejected": -0.8153296113014221, "logps/chosen": -334.5465087890625, "logps/ref_chosen": -291.9707946777344, "logps/ref_rejected": -263.42059326171875, "logps/rejected": -352.7812805175781, "loss": 3.973, "margin_dpo/margin_mean": 46.78490447998047, "margin_dpo/margin_std": 58.31138610839844, "step": 229 }, { "epoch": 0.4816753926701571, "fcm_dpo/beta": 0.0136597054079175, "fcm_dpo/delta": -0.0017933191265910864, "fcm_dpo/margin": 43.92266082763672, "fcm_dpo/q_t": 0.37403687834739685, "grad_norm": 77.7289810180664, "learning_rate": 3.1071729615293424e-07, "logits/chosen": -0.8659123182296753, "logits/rejected": -0.8676111698150635, "logps/chosen": -273.0910949707031, "logps/ref_chosen": -233.2601318359375, "logps/ref_rejected": -238.922119140625, "logps/rejected": -322.67572021484375, "loss": 4.1335, "margin_dpo/margin_mean": 43.92265319824219, "margin_dpo/margin_std": 60.90802001953125, "step": 230 }, { "epoch": 0.4837696335078534, "fcm_dpo/beta": 0.014134555123746395, "fcm_dpo/delta": 0.054537300020456314, "fcm_dpo/margin": 34.40003204345703, "fcm_dpo/q_t": 0.3954438269138336, "grad_norm": 92.6048355102539, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -0.8301805853843689, "logits/rejected": -0.8212379217147827, "logps/chosen": -370.2666931152344, "logps/ref_chosen": -322.1551818847656, "logps/ref_rejected": -280.97613525390625, "logps/rejected": -363.48773193359375, "loss": 4.4293, "margin_dpo/margin_mean": 34.40003204345703, "margin_dpo/margin_std": 54.50558090209961, "step": 231 }, { "epoch": 0.48586387434554973, "fcm_dpo/beta": 0.01422965433448553, "fcm_dpo/delta": -0.03155010566115379, "fcm_dpo/margin": 38.31331253051758, "fcm_dpo/q_t": 0.386367529630661, "grad_norm": 108.28009796142578, "learning_rate": 3.071590108427243e-07, "logits/chosen": -0.8186150789260864, "logits/rejected": -0.8022579550743103, "logps/chosen": -320.86212158203125, "logps/ref_chosen": -271.7437744140625, "logps/ref_rejected": -249.94981384277344, "logps/rejected": -337.3814697265625, "loss": 4.409, "margin_dpo/margin_mean": 38.31331253051758, "margin_dpo/margin_std": 60.96247863769531, "step": 232 }, { "epoch": 0.48795811518324606, "fcm_dpo/beta": 0.013790407218039036, "fcm_dpo/delta": -0.07378131151199341, "fcm_dpo/margin": 41.6362419128418, "fcm_dpo/q_t": 0.37872081995010376, "grad_norm": 111.52574920654297, "learning_rate": 3.05375222543809e-07, "logits/chosen": -0.8492098450660706, "logits/rejected": -0.8407477140426636, "logps/chosen": -334.5429992675781, "logps/ref_chosen": -285.3423156738281, "logps/ref_rejected": -266.34320068359375, "logps/rejected": -357.1800842285156, "loss": 4.2161, "margin_dpo/margin_mean": 41.6362419128418, "margin_dpo/margin_std": 59.20707702636719, "step": 233 }, { "epoch": 0.4900523560209424, "fcm_dpo/beta": 0.013664349913597107, "fcm_dpo/delta": 0.043013282120227814, "fcm_dpo/margin": 40.833702087402344, "fcm_dpo/q_t": 0.38517236709594727, "grad_norm": 81.4188003540039, "learning_rate": 3.035884646397637e-07, "logits/chosen": -0.8213815093040466, "logits/rejected": -0.8046208620071411, "logps/chosen": -344.2747497558594, "logps/ref_chosen": -294.9057312011719, "logps/ref_rejected": -299.37054443359375, "logps/rejected": -389.57330322265625, "loss": 4.4323, "margin_dpo/margin_mean": 40.833702087402344, "margin_dpo/margin_std": 68.16500091552734, "step": 234 }, { "epoch": 0.49214659685863876, "fcm_dpo/beta": 0.014067186042666435, "fcm_dpo/delta": 0.0023157279938459396, "fcm_dpo/margin": 42.43536376953125, "fcm_dpo/q_t": 0.3754667043685913, "grad_norm": 111.46656036376953, "learning_rate": 3.017988329489923e-07, "logits/chosen": -0.8356315493583679, "logits/rejected": -0.834743320941925, "logps/chosen": -342.5096740722656, "logps/ref_chosen": -289.49755859375, "logps/ref_rejected": -247.55076599121094, "logps/rejected": -342.9981994628906, "loss": 4.2787, "margin_dpo/margin_mean": 42.43537139892578, "margin_dpo/margin_std": 65.42337036132812, "step": 235 }, { "epoch": 0.4942408376963351, "fcm_dpo/beta": 0.014103572815656662, "fcm_dpo/delta": -0.0036417022347450256, "fcm_dpo/margin": 42.677146911621094, "fcm_dpo/q_t": 0.3757117688655853, "grad_norm": 87.55143737792969, "learning_rate": 3.000064234440111e-07, "logits/chosen": -0.869731605052948, "logits/rejected": -0.872031033039093, "logps/chosen": -337.2437744140625, "logps/ref_chosen": -288.8846435546875, "logps/ref_rejected": -242.0452880859375, "logps/rejected": -333.0815734863281, "loss": 4.231, "margin_dpo/margin_mean": 42.677146911621094, "margin_dpo/margin_std": 62.929443359375, "step": 236 }, { "epoch": 0.4963350785340314, "fcm_dpo/beta": 0.013692477717995644, "fcm_dpo/delta": -0.04169701784849167, "fcm_dpo/margin": 42.81891632080078, "fcm_dpo/q_t": 0.3778565227985382, "grad_norm": 94.28779602050781, "learning_rate": 2.9821133224630223e-07, "logits/chosen": -0.8328222632408142, "logits/rejected": -0.8149389028549194, "logps/chosen": -318.3191833496094, "logps/ref_chosen": -265.47869873046875, "logps/ref_rejected": -267.9891357421875, "logps/rejected": -363.64849853515625, "loss": 4.2077, "margin_dpo/margin_mean": 42.81891632080078, "margin_dpo/margin_std": 62.191993713378906, "step": 237 }, { "epoch": 0.49842931937172774, "fcm_dpo/beta": 0.013286177068948746, "fcm_dpo/delta": 0.0006893336540088058, "fcm_dpo/margin": 41.189964294433594, "fcm_dpo/q_t": 0.3882310390472412, "grad_norm": 99.93206787109375, "learning_rate": 2.964136556211588e-07, "logits/chosen": -0.8325835466384888, "logits/rejected": -0.8071151375770569, "logps/chosen": -367.561279296875, "logps/ref_chosen": -312.0026550292969, "logps/ref_rejected": -270.0257263183594, "logps/rejected": -366.7742919921875, "loss": 4.3033, "margin_dpo/margin_mean": 41.18996810913086, "margin_dpo/margin_std": 64.29379272460938, "step": 238 }, { "epoch": 0.5005235602094241, "fcm_dpo/beta": 0.013920535333454609, "fcm_dpo/delta": 0.09140698611736298, "fcm_dpo/margin": 36.762245178222656, "fcm_dpo/q_t": 0.3982861042022705, "grad_norm": 103.36236572265625, "learning_rate": 2.946134899725226e-07, "logits/chosen": -0.8311811089515686, "logits/rejected": -0.8709484338760376, "logps/chosen": -318.14971923828125, "logps/ref_chosen": -267.167236328125, "logps/ref_rejected": -275.99468994140625, "logps/rejected": -363.73944091796875, "loss": 4.6305, "margin_dpo/margin_mean": 36.762245178222656, "margin_dpo/margin_std": 71.0285873413086, "step": 239 }, { "epoch": 0.5026178010471204, "fcm_dpo/beta": 0.013737525790929794, "fcm_dpo/delta": -0.057324089109897614, "fcm_dpo/margin": 47.42666244506836, "fcm_dpo/q_t": 0.36713799834251404, "grad_norm": 116.31761932373047, "learning_rate": 2.9281093183781403e-07, "logits/chosen": -0.8899922370910645, "logits/rejected": -0.8857712149620056, "logps/chosen": -334.71600341796875, "logps/ref_chosen": -285.9796142578125, "logps/ref_rejected": -256.8258056640625, "logps/rejected": -352.98883056640625, "loss": 4.058, "margin_dpo/margin_mean": 47.42666244506836, "margin_dpo/margin_std": 65.38998413085938, "step": 240 }, { "epoch": 0.5047120418848168, "fcm_dpo/beta": 0.013606472872197628, "fcm_dpo/delta": 0.037277210503816605, "fcm_dpo/margin": 37.668827056884766, "fcm_dpo/q_t": 0.3958120048046112, "grad_norm": 107.70622253417969, "learning_rate": 2.910060778827554e-07, "logits/chosen": -0.80711430311203, "logits/rejected": -0.7877066731452942, "logps/chosen": -318.73126220703125, "logps/ref_chosen": -261.516845703125, "logps/ref_rejected": -250.2250518798828, "logps/rejected": -345.1082458496094, "loss": 4.5076, "margin_dpo/margin_mean": 37.668827056884766, "margin_dpo/margin_std": 65.27635192871094, "step": 241 }, { "epoch": 0.506806282722513, "fcm_dpo/beta": 0.014048721641302109, "fcm_dpo/delta": -0.022284481674432755, "fcm_dpo/margin": 44.0509147644043, "fcm_dpo/q_t": 0.37357252836227417, "grad_norm": 94.55889892578125, "learning_rate": 2.891990248961871e-07, "logits/chosen": -0.8759533762931824, "logits/rejected": -0.8638408184051514, "logps/chosen": -319.8083190917969, "logps/ref_chosen": -270.51397705078125, "logps/ref_rejected": -244.8560791015625, "logps/rejected": -338.20135498046875, "loss": 4.1086, "margin_dpo/margin_mean": 44.05091857910156, "margin_dpo/margin_std": 60.92688751220703, "step": 242 }, { "epoch": 0.5089005235602094, "fcm_dpo/beta": 0.013818719424307346, "fcm_dpo/delta": -0.06250281631946564, "fcm_dpo/margin": 47.56100845336914, "fcm_dpo/q_t": 0.36709579825401306, "grad_norm": 112.03346252441406, "learning_rate": 2.873898697848762e-07, "logits/chosen": -0.8465127944946289, "logits/rejected": -0.8346729278564453, "logps/chosen": -369.3190612792969, "logps/ref_chosen": -324.68206787109375, "logps/ref_rejected": -307.1111755371094, "logps/rejected": -399.3091735839844, "loss": 4.0675, "margin_dpo/margin_mean": 47.561012268066406, "margin_dpo/margin_std": 65.72129821777344, "step": 243 }, { "epoch": 0.5109947643979058, "fcm_dpo/beta": 0.012858567759394646, "fcm_dpo/delta": -0.009479108266532421, "fcm_dpo/margin": 47.19831085205078, "fcm_dpo/q_t": 0.37022095918655396, "grad_norm": 96.53694915771484, "learning_rate": 2.8557870956832133e-07, "logits/chosen": -0.8438245058059692, "logits/rejected": -0.7965729832649231, "logps/chosen": -363.5497741699219, "logps/ref_chosen": -318.979248046875, "logps/ref_rejected": -269.67572021484375, "logps/rejected": -361.4445495605469, "loss": 4.0776, "margin_dpo/margin_mean": 47.19831466674805, "margin_dpo/margin_std": 61.196590423583984, "step": 244 }, { "epoch": 0.5130890052356021, "fcm_dpo/beta": 0.012791464105248451, "fcm_dpo/delta": -0.07829757034778595, "fcm_dpo/margin": 47.49521255493164, "fcm_dpo/q_t": 0.3693080246448517, "grad_norm": 86.34193420410156, "learning_rate": 2.837656413735479e-07, "logits/chosen": -0.8461104035377502, "logits/rejected": -0.8512292504310608, "logps/chosen": -337.3260192871094, "logps/ref_chosen": -294.8980712890625, "logps/ref_rejected": -239.8111114501953, "logps/rejected": -329.7342529296875, "loss": 4.0572, "margin_dpo/margin_mean": 47.49521255493164, "margin_dpo/margin_std": 59.5516471862793, "step": 245 }, { "epoch": 0.5151832460732985, "fcm_dpo/beta": 0.012845880351960659, "fcm_dpo/delta": 0.09227827191352844, "fcm_dpo/margin": 35.69134521484375, "fcm_dpo/q_t": 0.404882550239563, "grad_norm": 102.24588775634766, "learning_rate": 2.8195076242990116e-07, "logits/chosen": -0.8206315040588379, "logits/rejected": -0.8294263482093811, "logps/chosen": -334.98681640625, "logps/ref_chosen": -280.6854248046875, "logps/ref_rejected": -253.65382385253906, "logps/rejected": -343.64654541015625, "loss": 4.5677, "margin_dpo/margin_mean": 35.69134521484375, "margin_dpo/margin_std": 64.71369934082031, "step": 246 }, { "epoch": 0.5172774869109947, "fcm_dpo/beta": 0.013430180959403515, "fcm_dpo/delta": 0.008843163028359413, "fcm_dpo/margin": 40.15351104736328, "fcm_dpo/q_t": 0.3880252242088318, "grad_norm": 82.07162475585938, "learning_rate": 2.801341700638307e-07, "logits/chosen": -0.8394767045974731, "logits/rejected": -0.8434745073318481, "logps/chosen": -330.6539611816406, "logps/ref_chosen": -281.1091003417969, "logps/ref_rejected": -260.3700866699219, "logps/rejected": -350.0684814453125, "loss": 4.2961, "margin_dpo/margin_mean": 40.15351104736328, "margin_dpo/margin_std": 59.38816452026367, "step": 247 }, { "epoch": 0.5193717277486911, "fcm_dpo/beta": 0.01332888100296259, "fcm_dpo/delta": 0.036328285932540894, "fcm_dpo/margin": 37.929134368896484, "fcm_dpo/q_t": 0.3918164074420929, "grad_norm": 99.17080688476562, "learning_rate": 2.7831596169367227e-07, "logits/chosen": -0.8002797365188599, "logits/rejected": -0.8162012696266174, "logps/chosen": -319.5628967285156, "logps/ref_chosen": -270.318359375, "logps/ref_rejected": -233.46778869628906, "logps/rejected": -320.6414794921875, "loss": 4.3869, "margin_dpo/margin_mean": 37.92913055419922, "margin_dpo/margin_std": 58.7851676940918, "step": 248 }, { "epoch": 0.5214659685863874, "fcm_dpo/beta": 0.013946634717285633, "fcm_dpo/delta": 0.03218340501189232, "fcm_dpo/margin": 36.27839279174805, "fcm_dpo/q_t": 0.3957046866416931, "grad_norm": 100.11740112304688, "learning_rate": 2.7649623482442274e-07, "logits/chosen": -0.8372878432273865, "logits/rejected": -0.8184173703193665, "logps/chosen": -336.67974853515625, "logps/ref_chosen": -275.8088684082031, "logps/ref_rejected": -243.45138549804688, "logps/rejected": -340.6006774902344, "loss": 4.5694, "margin_dpo/margin_mean": 36.27839279174805, "margin_dpo/margin_std": 66.48210906982422, "step": 249 }, { "epoch": 0.5235602094240838, "fcm_dpo/beta": 0.013306835666298866, "fcm_dpo/delta": -0.04749767482280731, "fcm_dpo/margin": 48.137725830078125, "fcm_dpo/q_t": 0.3672924041748047, "grad_norm": 97.09394836425781, "learning_rate": 2.7467508704251135e-07, "logits/chosen": -0.8327563405036926, "logits/rejected": -0.8362528681755066, "logps/chosen": -354.5564270019531, "logps/ref_chosen": -292.4945373535156, "logps/ref_rejected": -284.2869567871094, "logps/rejected": -394.4865417480469, "loss": 4.1373, "margin_dpo/margin_mean": 48.137718200683594, "margin_dpo/margin_std": 67.8976821899414, "step": 250 }, { "epoch": 0.5256544502617801, "fcm_dpo/beta": 0.013809560798108578, "fcm_dpo/delta": -0.00444817915558815, "fcm_dpo/margin": 43.47351837158203, "fcm_dpo/q_t": 0.3821024000644684, "grad_norm": 100.93732452392578, "learning_rate": 2.7285261601056697e-07, "logits/chosen": -0.8351647257804871, "logits/rejected": -0.8213850259780884, "logps/chosen": -336.347900390625, "logps/ref_chosen": -281.736572265625, "logps/ref_rejected": -255.9419708251953, "logps/rejected": -354.02679443359375, "loss": 4.1882, "margin_dpo/margin_mean": 43.473514556884766, "margin_dpo/margin_std": 63.28712463378906, "step": 251 }, { "epoch": 0.5277486910994764, "fcm_dpo/beta": 0.013638043776154518, "fcm_dpo/delta": 0.03314843028783798, "fcm_dpo/margin": 41.505462646484375, "fcm_dpo/q_t": 0.380887508392334, "grad_norm": 90.63612365722656, "learning_rate": 2.7102891946217994e-07, "logits/chosen": -0.8848163485527039, "logits/rejected": -0.861879289150238, "logps/chosen": -360.0526428222656, "logps/ref_chosen": -295.9674072265625, "logps/ref_rejected": -280.111572265625, "logps/rejected": -385.70233154296875, "loss": 4.3812, "margin_dpo/margin_mean": 41.505462646484375, "margin_dpo/margin_std": 66.30912780761719, "step": 252 }, { "epoch": 0.5298429319371728, "fcm_dpo/beta": 0.013700338080525398, "fcm_dpo/delta": -0.014694290235638618, "fcm_dpo/margin": 40.862892150878906, "fcm_dpo/q_t": 0.3878205716609955, "grad_norm": 98.66238403320312, "learning_rate": 2.692040951966617e-07, "logits/chosen": -0.8532537221908569, "logits/rejected": -0.8468297123908997, "logps/chosen": -346.66949462890625, "logps/ref_chosen": -277.072265625, "logps/ref_rejected": -247.31643676757812, "logps/rejected": -357.7765808105469, "loss": 4.418, "margin_dpo/margin_mean": 40.862892150878906, "margin_dpo/margin_std": 68.3602294921875, "step": 253 }, { "epoch": 0.5319371727748691, "fcm_dpo/beta": 0.013948986306786537, "fcm_dpo/delta": -0.05029097944498062, "fcm_dpo/margin": 43.06998062133789, "fcm_dpo/q_t": 0.37745508551597595, "grad_norm": 101.81684875488281, "learning_rate": 2.6737824107379947e-07, "logits/chosen": -0.7788005471229553, "logits/rejected": -0.7665879130363464, "logps/chosen": -334.8228454589844, "logps/ref_chosen": -269.9478454589844, "logps/ref_rejected": -249.45005798339844, "logps/rejected": -357.3950500488281, "loss": 4.2058, "margin_dpo/margin_mean": 43.06998062133789, "margin_dpo/margin_std": 61.4660530090332, "step": 254 }, { "epoch": 0.5340314136125655, "fcm_dpo/beta": 0.01308943796902895, "fcm_dpo/delta": -0.05487431585788727, "fcm_dpo/margin": 49.75128173828125, "fcm_dpo/q_t": 0.3672358989715576, "grad_norm": 89.9389877319336, "learning_rate": 2.655514550086086e-07, "logits/chosen": -0.7998797297477722, "logits/rejected": -0.7680299282073975, "logps/chosen": -369.9079284667969, "logps/ref_chosen": -306.6552734375, "logps/ref_rejected": -254.47528076171875, "logps/rejected": -367.47918701171875, "loss": 4.1475, "margin_dpo/margin_mean": 49.75128173828125, "margin_dpo/margin_std": 72.38668823242188, "step": 255 }, { "epoch": 0.5361256544502618, "fcm_dpo/beta": 0.012760424986481667, "fcm_dpo/delta": -0.006860591471195221, "fcm_dpo/margin": 46.954036712646484, "fcm_dpo/q_t": 0.3672289550304413, "grad_norm": 254.555908203125, "learning_rate": 2.6372383496608186e-07, "logits/chosen": -0.8446077704429626, "logits/rejected": -0.8431991338729858, "logps/chosen": -387.7762451171875, "logps/ref_chosen": -323.7181701660156, "logps/ref_rejected": -254.1871337890625, "logps/rejected": -365.19921875, "loss": 4.5513, "margin_dpo/margin_mean": 46.95404052734375, "margin_dpo/margin_std": 78.9866714477539, "step": 256 }, { "epoch": 0.5382198952879581, "fcm_dpo/beta": 0.012355271726846695, "fcm_dpo/delta": -0.011902200058102608, "fcm_dpo/margin": 49.428714752197266, "fcm_dpo/q_t": 0.37219175696372986, "grad_norm": 96.56497955322266, "learning_rate": 2.618954789559356e-07, "logits/chosen": -0.8214514255523682, "logits/rejected": -0.811537504196167, "logps/chosen": -330.85174560546875, "logps/ref_chosen": -267.21209716796875, "logps/ref_rejected": -249.12579345703125, "logps/rejected": -362.19415283203125, "loss": 4.05, "margin_dpo/margin_mean": 49.428714752197266, "margin_dpo/margin_std": 66.22650146484375, "step": 257 }, { "epoch": 0.5403141361256545, "fcm_dpo/beta": 0.011946265585720539, "fcm_dpo/delta": -0.015679441392421722, "fcm_dpo/margin": 51.2308349609375, "fcm_dpo/q_t": 0.3674898147583008, "grad_norm": 103.5653305053711, "learning_rate": 2.600664850273538e-07, "logits/chosen": -0.8514293432235718, "logits/rejected": -0.8220398426055908, "logps/chosen": -344.84173583984375, "logps/ref_chosen": -277.6827392578125, "logps/ref_rejected": -250.73385620117188, "logps/rejected": -369.1236572265625, "loss": 4.021, "margin_dpo/margin_mean": 51.2308349609375, "margin_dpo/margin_std": 63.38447570800781, "step": 258 }, { "epoch": 0.5424083769633508, "fcm_dpo/beta": 0.012485547922551632, "fcm_dpo/delta": 0.019997823983430862, "fcm_dpo/margin": 46.484439849853516, "fcm_dpo/q_t": 0.3781452775001526, "grad_norm": 86.96826934814453, "learning_rate": 2.582369512637302e-07, "logits/chosen": -0.8579553365707397, "logits/rejected": -0.8558469414710999, "logps/chosen": -352.1661071777344, "logps/ref_chosen": -294.6099853515625, "logps/ref_rejected": -272.2725830078125, "logps/rejected": -376.31317138671875, "loss": 4.118, "margin_dpo/margin_mean": 46.484439849853516, "margin_dpo/margin_std": 63.52824020385742, "step": 259 }, { "epoch": 0.5445026178010471, "fcm_dpo/beta": 0.013305707834661007, "fcm_dpo/delta": 0.1523754745721817, "fcm_dpo/margin": 22.55513572692871, "fcm_dpo/q_t": 0.43987154960632324, "grad_norm": 106.72164916992188, "learning_rate": 2.5640697577740815e-07, "logits/chosen": -0.8499176502227783, "logits/rejected": -0.8482145667076111, "logps/chosen": -356.1315612792969, "logps/ref_chosen": -290.85711669921875, "logps/ref_rejected": -277.5970153808594, "logps/rejected": -365.4266357421875, "loss": 5.1348, "margin_dpo/margin_mean": 22.55513572692871, "margin_dpo/margin_std": 64.21441650390625, "step": 260 }, { "epoch": 0.5465968586387434, "fcm_dpo/beta": 0.013940032571554184, "fcm_dpo/delta": -0.03943036496639252, "fcm_dpo/margin": 37.314186096191406, "fcm_dpo/q_t": 0.39753836393356323, "grad_norm": 122.23685455322266, "learning_rate": 2.5457665670441937e-07, "logits/chosen": -0.7377340197563171, "logits/rejected": -0.7536084055900574, "logps/chosen": -320.2516174316406, "logps/ref_chosen": -251.13223266601562, "logps/ref_rejected": -244.76016235351562, "logps/rejected": -351.1937255859375, "loss": 4.6636, "margin_dpo/margin_mean": 37.314186096191406, "margin_dpo/margin_std": 71.60857391357422, "step": 261 }, { "epoch": 0.5486910994764398, "fcm_dpo/beta": 0.013438230380415916, "fcm_dpo/delta": -0.06206154823303223, "fcm_dpo/margin": 48.95298767089844, "fcm_dpo/q_t": 0.366224467754364, "grad_norm": 105.1137466430664, "learning_rate": 2.527460921992209e-07, "logits/chosen": -0.7684426307678223, "logits/rejected": -0.762508749961853, "logps/chosen": -362.34210205078125, "logps/ref_chosen": -299.7217712402344, "logps/ref_rejected": -277.0969543457031, "logps/rejected": -388.67022705078125, "loss": 4.0258, "margin_dpo/margin_mean": 48.95298767089844, "margin_dpo/margin_std": 65.29669189453125, "step": 262 }, { "epoch": 0.5507853403141362, "fcm_dpo/beta": 0.012967620976269245, "fcm_dpo/delta": -0.021967921406030655, "fcm_dpo/margin": 40.99163055419922, "fcm_dpo/q_t": 0.3881154954433441, "grad_norm": 88.81194305419922, "learning_rate": 2.509153804294318e-07, "logits/chosen": -0.7807701826095581, "logits/rejected": -0.7647296786308289, "logps/chosen": -348.8820495605469, "logps/ref_chosen": -279.95257568359375, "logps/ref_rejected": -256.5327453613281, "logps/rejected": -366.4538269042969, "loss": 4.4404, "margin_dpo/margin_mean": 40.99162673950195, "margin_dpo/margin_std": 67.29783630371094, "step": 263 }, { "epoch": 0.5528795811518324, "fcm_dpo/beta": 0.012286883778870106, "fcm_dpo/delta": -0.06830502301454544, "fcm_dpo/margin": 49.56260681152344, "fcm_dpo/q_t": 0.36980560421943665, "grad_norm": 102.09204864501953, "learning_rate": 2.4908461957056825e-07, "logits/chosen": -0.7926238179206848, "logits/rejected": -0.7938596606254578, "logps/chosen": -321.9147033691406, "logps/ref_chosen": -260.53509521484375, "logps/ref_rejected": -255.53799438476562, "logps/rejected": -366.4801940917969, "loss": 4.0499, "margin_dpo/margin_mean": 49.56260681152344, "margin_dpo/margin_std": 64.20928955078125, "step": 264 }, { "epoch": 0.5549738219895288, "fcm_dpo/beta": 0.011741209775209427, "fcm_dpo/delta": -0.022350091487169266, "fcm_dpo/margin": 52.69523620605469, "fcm_dpo/q_t": 0.3699071407318115, "grad_norm": 84.6684341430664, "learning_rate": 2.4725390780077905e-07, "logits/chosen": -0.8557706475257874, "logits/rejected": -0.8646640181541443, "logps/chosen": -346.32525634765625, "logps/ref_chosen": -283.7130432128906, "logps/ref_rejected": -270.3209533691406, "logps/rejected": -385.62835693359375, "loss": 4.1084, "margin_dpo/margin_mean": 52.69523620605469, "margin_dpo/margin_std": 71.56565856933594, "step": 265 }, { "epoch": 0.5570680628272251, "fcm_dpo/beta": 0.011872725561261177, "fcm_dpo/delta": -0.0177978053689003, "fcm_dpo/margin": 51.798423767089844, "fcm_dpo/q_t": 0.3671649396419525, "grad_norm": 76.50971221923828, "learning_rate": 2.454233432955807e-07, "logits/chosen": -0.8721534609794617, "logits/rejected": -0.8421223163604736, "logps/chosen": -332.63873291015625, "logps/ref_chosen": -278.09930419921875, "logps/ref_rejected": -260.6734619140625, "logps/rejected": -367.0113525390625, "loss": 3.9391, "margin_dpo/margin_mean": 51.798423767089844, "margin_dpo/margin_std": 59.57025146484375, "step": 266 }, { "epoch": 0.5591623036649215, "fcm_dpo/beta": 0.011853402480483055, "fcm_dpo/delta": 0.05013914406299591, "fcm_dpo/margin": 42.46312713623047, "fcm_dpo/q_t": 0.3929550051689148, "grad_norm": 92.15836334228516, "learning_rate": 2.435930242225919e-07, "logits/chosen": -0.8216646313667297, "logits/rejected": -0.8368365168571472, "logps/chosen": -349.4070129394531, "logps/ref_chosen": -280.33319091796875, "logps/ref_rejected": -247.78099060058594, "logps/rejected": -359.31793212890625, "loss": 4.3247, "margin_dpo/margin_mean": 42.46312713623047, "margin_dpo/margin_std": 62.86342239379883, "step": 267 }, { "epoch": 0.5612565445026177, "fcm_dpo/beta": 0.012068388983607292, "fcm_dpo/delta": -0.048395268619060516, "fcm_dpo/margin": 53.47141647338867, "fcm_dpo/q_t": 0.3641560673713684, "grad_norm": 97.78318786621094, "learning_rate": 2.4176304873626984e-07, "logits/chosen": -0.7805606722831726, "logits/rejected": -0.7611721158027649, "logps/chosen": -369.9452209472656, "logps/ref_chosen": -304.1787109375, "logps/ref_rejected": -272.80316162109375, "logps/rejected": -392.04107666015625, "loss": 3.9723, "margin_dpo/margin_mean": 53.47141647338867, "margin_dpo/margin_std": 67.32571411132812, "step": 268 }, { "epoch": 0.5633507853403141, "fcm_dpo/beta": 0.012517577037215233, "fcm_dpo/delta": 0.11093584448099136, "fcm_dpo/margin": 39.28661346435547, "fcm_dpo/q_t": 0.39567479491233826, "grad_norm": 108.96583557128906, "learning_rate": 2.399335149726463e-07, "logits/chosen": -0.8175703287124634, "logits/rejected": -0.8138397932052612, "logps/chosen": -320.2642517089844, "logps/ref_chosen": -249.84512329101562, "logps/ref_rejected": -223.37356567382812, "logps/rejected": -333.079345703125, "loss": 4.5504, "margin_dpo/margin_mean": 39.28661346435547, "margin_dpo/margin_std": 71.5968017578125, "step": 269 }, { "epoch": 0.5654450261780105, "fcm_dpo/beta": 0.012729712761938572, "fcm_dpo/delta": 0.005217651836574078, "fcm_dpo/margin": 46.67543029785156, "fcm_dpo/q_t": 0.3786097764968872, "grad_norm": 101.96600341796875, "learning_rate": 2.381045210440644e-07, "logits/chosen": -0.8885576128959656, "logits/rejected": -0.9054932594299316, "logps/chosen": -394.0969543457031, "logps/ref_chosen": -318.5623779296875, "logps/ref_rejected": -281.1880798339844, "logps/rejected": -403.39813232421875, "loss": 4.3489, "margin_dpo/margin_mean": 46.67543029785156, "margin_dpo/margin_std": 76.03194427490234, "step": 270 }, { "epoch": 0.5675392670157068, "fcm_dpo/beta": 0.013197865337133408, "fcm_dpo/delta": 0.0033783800899982452, "fcm_dpo/margin": 45.022972106933594, "fcm_dpo/q_t": 0.38268500566482544, "grad_norm": 100.24028778076172, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -0.7424062490463257, "logits/rejected": -0.7395483255386353, "logps/chosen": -357.7034606933594, "logps/ref_chosen": -284.104736328125, "logps/ref_rejected": -253.9580535888672, "logps/rejected": -372.57977294921875, "loss": 4.285, "margin_dpo/margin_mean": 45.022972106933594, "margin_dpo/margin_std": 69.82579803466797, "step": 271 }, { "epoch": 0.5696335078534032, "fcm_dpo/beta": 0.012690500356256962, "fcm_dpo/delta": -0.032762445509433746, "fcm_dpo/margin": 49.54048156738281, "fcm_dpo/q_t": 0.3718162178993225, "grad_norm": 91.42510986328125, "learning_rate": 2.344485449913914e-07, "logits/chosen": -0.8627545833587646, "logits/rejected": -0.8512448072433472, "logps/chosen": -365.7294006347656, "logps/ref_chosen": -297.3590087890625, "logps/ref_rejected": -279.20196533203125, "logps/rejected": -397.1128845214844, "loss": 4.2931, "margin_dpo/margin_mean": 49.54048156738281, "margin_dpo/margin_std": 78.09527587890625, "step": 272 }, { "epoch": 0.5717277486910994, "fcm_dpo/beta": 0.012210111133754253, "fcm_dpo/delta": -0.020789261907339096, "fcm_dpo/margin": 50.5628547668457, "fcm_dpo/q_t": 0.3735979199409485, "grad_norm": 97.6107177734375, "learning_rate": 2.3262175892620062e-07, "logits/chosen": -0.8329838514328003, "logits/rejected": -0.8465803265571594, "logps/chosen": -365.12774658203125, "logps/ref_chosen": -293.20574951171875, "logps/ref_rejected": -274.7646789550781, "logps/rejected": -397.24951171875, "loss": 4.2177, "margin_dpo/margin_mean": 50.5628547668457, "margin_dpo/margin_std": 74.91758728027344, "step": 273 }, { "epoch": 0.5738219895287958, "fcm_dpo/beta": 0.011717407964169979, "fcm_dpo/delta": -0.11725673824548721, "fcm_dpo/margin": 60.52649688720703, "fcm_dpo/q_t": 0.3518860936164856, "grad_norm": 91.55536651611328, "learning_rate": 2.3079590480333827e-07, "logits/chosen": -0.7906344532966614, "logits/rejected": -0.7621797323226929, "logps/chosen": -341.36102294921875, "logps/ref_chosen": -270.55865478515625, "logps/ref_rejected": -239.47048950195312, "logps/rejected": -370.7992858886719, "loss": 3.8357, "margin_dpo/margin_mean": 60.5264892578125, "margin_dpo/margin_std": 72.19639587402344, "step": 274 }, { "epoch": 0.5759162303664922, "fcm_dpo/beta": 0.010930696502327919, "fcm_dpo/delta": -0.049976646900177, "fcm_dpo/margin": 59.051597595214844, "fcm_dpo/q_t": 0.3641063868999481, "grad_norm": 70.57231140136719, "learning_rate": 2.2897108053782e-07, "logits/chosen": -0.8348967432975769, "logits/rejected": -0.8198622465133667, "logps/chosen": -314.8291320800781, "logps/ref_chosen": -250.31922912597656, "logps/ref_rejected": -249.3187255859375, "logps/rejected": -372.8802490234375, "loss": 3.9217, "margin_dpo/margin_mean": 59.051597595214844, "margin_dpo/margin_std": 71.34449005126953, "step": 275 }, { "epoch": 0.5780104712041885, "fcm_dpo/beta": 0.01073821447789669, "fcm_dpo/delta": 0.05224524065852165, "fcm_dpo/margin": 51.23969268798828, "fcm_dpo/q_t": 0.3837069272994995, "grad_norm": 86.6988754272461, "learning_rate": 2.2714738398943308e-07, "logits/chosen": -0.9012481570243835, "logits/rejected": -0.8782431483268738, "logps/chosen": -370.5542297363281, "logps/ref_chosen": -297.6310729980469, "logps/ref_rejected": -295.225830078125, "logps/rejected": -419.388671875, "loss": 4.2576, "margin_dpo/margin_mean": 51.23969268798828, "margin_dpo/margin_std": 74.20903015136719, "step": 276 }, { "epoch": 0.5801047120418849, "fcm_dpo/beta": 0.01166857872158289, "fcm_dpo/delta": 0.06670135259628296, "fcm_dpo/margin": 45.79502487182617, "fcm_dpo/q_t": 0.3874945640563965, "grad_norm": 98.19717407226562, "learning_rate": 2.2532491295748865e-07, "logits/chosen": -0.8439708948135376, "logits/rejected": -0.8469751477241516, "logps/chosen": -344.6778869628906, "logps/ref_chosen": -266.3604736328125, "logps/ref_rejected": -253.36767578125, "logps/rejected": -377.4801025390625, "loss": 4.4195, "margin_dpo/margin_mean": 45.795021057128906, "margin_dpo/margin_std": 75.37797546386719, "step": 277 }, { "epoch": 0.5821989528795811, "fcm_dpo/beta": 0.01215837337076664, "fcm_dpo/delta": 0.05089471489191055, "fcm_dpo/margin": 33.36516189575195, "fcm_dpo/q_t": 0.42117270827293396, "grad_norm": 116.95894622802734, "learning_rate": 2.2350376517557726e-07, "logits/chosen": -0.8678665161132812, "logits/rejected": -0.8359065651893616, "logps/chosen": -357.11553955078125, "logps/ref_chosen": -267.40728759765625, "logps/ref_rejected": -229.5758514404297, "logps/rejected": -352.64923095703125, "loss": 5.013, "margin_dpo/margin_mean": 33.36516571044922, "margin_dpo/margin_std": 81.0997543334961, "step": 278 }, { "epoch": 0.5842931937172775, "fcm_dpo/beta": 0.011790696531534195, "fcm_dpo/delta": -0.12328186631202698, "fcm_dpo/margin": 55.72016525268555, "fcm_dpo/q_t": 0.3644426763057709, "grad_norm": 110.0771484375, "learning_rate": 2.2168403830632769e-07, "logits/chosen": -0.7839449644088745, "logits/rejected": -0.7698712348937988, "logps/chosen": -393.23095703125, "logps/ref_chosen": -313.3677978515625, "logps/ref_rejected": -299.1744384765625, "logps/rejected": -434.75775146484375, "loss": 4.1056, "margin_dpo/margin_mean": 55.72016906738281, "margin_dpo/margin_std": 77.62565612792969, "step": 279 }, { "epoch": 0.5863874345549738, "fcm_dpo/beta": 0.011370867490768433, "fcm_dpo/delta": 0.04377196356654167, "fcm_dpo/margin": 49.120079040527344, "fcm_dpo/q_t": 0.38405805826187134, "grad_norm": 80.28543090820312, "learning_rate": 2.1986582993616925e-07, "logits/chosen": -0.8500107526779175, "logits/rejected": -0.8614104986190796, "logps/chosen": -333.7889404296875, "logps/ref_chosen": -265.5558166503906, "logps/ref_rejected": -247.1573944091797, "logps/rejected": -364.5106201171875, "loss": 4.3234, "margin_dpo/margin_mean": 49.120079040527344, "margin_dpo/margin_std": 78.11002349853516, "step": 280 }, { "epoch": 0.5884816753926702, "fcm_dpo/beta": 0.011548914946615696, "fcm_dpo/delta": 0.035830557346343994, "fcm_dpo/margin": 48.90550994873047, "fcm_dpo/q_t": 0.3849385976791382, "grad_norm": 104.87403869628906, "learning_rate": 2.1804923757009882e-07, "logits/chosen": -0.8356598615646362, "logits/rejected": -0.84555983543396, "logps/chosen": -381.03668212890625, "logps/ref_chosen": -295.2995910644531, "logps/ref_rejected": -293.80877685546875, "logps/rejected": -428.4513244628906, "loss": 4.2998, "margin_dpo/margin_mean": 48.90550994873047, "margin_dpo/margin_std": 75.06422424316406, "step": 281 }, { "epoch": 0.5905759162303665, "fcm_dpo/beta": 0.011683318763971329, "fcm_dpo/delta": -0.01186651736497879, "fcm_dpo/margin": 52.158531188964844, "fcm_dpo/q_t": 0.375276118516922, "grad_norm": 104.86578369140625, "learning_rate": 2.1623435862645205e-07, "logits/chosen": -0.8201556205749512, "logits/rejected": -0.8232011795043945, "logps/chosen": -391.24603271484375, "logps/ref_chosen": -318.63714599609375, "logps/ref_rejected": -273.5943603515625, "logps/rejected": -398.36175537109375, "loss": 4.2261, "margin_dpo/margin_mean": 52.158531188964844, "margin_dpo/margin_std": 77.26969146728516, "step": 282 }, { "epoch": 0.5926701570680628, "fcm_dpo/beta": 0.01230486948043108, "fcm_dpo/delta": 0.03790903091430664, "fcm_dpo/margin": 45.61647033691406, "fcm_dpo/q_t": 0.38794541358947754, "grad_norm": 91.48641204833984, "learning_rate": 2.1442129043167873e-07, "logits/chosen": -0.8388844728469849, "logits/rejected": -0.835168182849884, "logps/chosen": -332.825927734375, "logps/ref_chosen": -254.66053771972656, "logps/ref_rejected": -236.8627166748047, "logps/rejected": -360.64459228515625, "loss": 4.3522, "margin_dpo/margin_mean": 45.61647033691406, "margin_dpo/margin_std": 73.58220672607422, "step": 283 }, { "epoch": 0.5947643979057592, "fcm_dpo/beta": 0.011913403868675232, "fcm_dpo/delta": -0.03183186054229736, "fcm_dpo/margin": 52.571319580078125, "fcm_dpo/q_t": 0.3742894232273102, "grad_norm": 125.63947296142578, "learning_rate": 2.1261013021512378e-07, "logits/chosen": -0.8041011095046997, "logits/rejected": -0.7851691246032715, "logps/chosen": -353.00323486328125, "logps/ref_chosen": -273.355224609375, "logps/ref_rejected": -259.84759521484375, "logps/rejected": -392.0669250488281, "loss": 4.3049, "margin_dpo/margin_mean": 52.57131576538086, "margin_dpo/margin_std": 81.13550567626953, "step": 284 }, { "epoch": 0.5968586387434555, "fcm_dpo/beta": 0.01256585493683815, "fcm_dpo/delta": 0.0635511577129364, "fcm_dpo/margin": 36.67383575439453, "fcm_dpo/q_t": 0.4085825979709625, "grad_norm": 145.48236083984375, "learning_rate": 2.1080097510381294e-07, "logits/chosen": -0.8173208236694336, "logits/rejected": -0.8177124857902527, "logps/chosen": -394.8221130371094, "logps/ref_chosen": -309.8022155761719, "logps/ref_rejected": -279.11846923828125, "logps/rejected": -400.81219482421875, "loss": 4.781, "margin_dpo/margin_mean": 36.67383575439453, "margin_dpo/margin_std": 75.84709167480469, "step": 285 }, { "epoch": 0.5989528795811518, "fcm_dpo/beta": 0.012453727424144745, "fcm_dpo/delta": 0.030384495854377747, "fcm_dpo/margin": 45.85185623168945, "fcm_dpo/q_t": 0.3881286382675171, "grad_norm": 128.9279022216797, "learning_rate": 2.089939221172446e-07, "logits/chosen": -0.80593341588974, "logits/rejected": -0.7947626709938049, "logps/chosen": -348.8951416015625, "logps/ref_chosen": -271.4655456542969, "logps/ref_rejected": -279.531494140625, "logps/rejected": -402.8129577636719, "loss": 4.4438, "margin_dpo/margin_mean": 45.85185241699219, "margin_dpo/margin_std": 79.64374542236328, "step": 286 }, { "epoch": 0.6010471204188481, "fcm_dpo/beta": 0.01248287595808506, "fcm_dpo/delta": -0.01810862123966217, "fcm_dpo/margin": 49.31248474121094, "fcm_dpo/q_t": 0.37548819184303284, "grad_norm": 108.0594253540039, "learning_rate": 2.0718906816218595e-07, "logits/chosen": -0.8343806266784668, "logits/rejected": -0.8241921663284302, "logps/chosen": -350.6217041015625, "logps/ref_chosen": -277.0932312011719, "logps/ref_rejected": -233.55599975585938, "logps/rejected": -356.3969421386719, "loss": 4.3305, "margin_dpo/margin_mean": 49.31248474121094, "margin_dpo/margin_std": 79.3702621459961, "step": 287 }, { "epoch": 0.6031413612565445, "fcm_dpo/beta": 0.013002946972846985, "fcm_dpo/delta": -0.011839449405670166, "fcm_dpo/margin": 46.86420822143555, "fcm_dpo/q_t": 0.3763912618160248, "grad_norm": 125.55633544921875, "learning_rate": 2.053865100274774e-07, "logits/chosen": -0.8246794939041138, "logits/rejected": -0.8403179049491882, "logps/chosen": -362.42303466796875, "logps/ref_chosen": -293.1681823730469, "logps/ref_rejected": -263.4059143066406, "logps/rejected": -379.5249938964844, "loss": 4.252, "margin_dpo/margin_mean": 46.86420822143555, "margin_dpo/margin_std": 71.86483764648438, "step": 288 }, { "epoch": 0.6052356020942409, "fcm_dpo/beta": 0.01310706790536642, "fcm_dpo/delta": 0.12158410996198654, "fcm_dpo/margin": 33.08653259277344, "fcm_dpo/q_t": 0.41252899169921875, "grad_norm": 101.58392333984375, "learning_rate": 2.035863443788411e-07, "logits/chosen": -0.8192298412322998, "logits/rejected": -0.8059917688369751, "logps/chosen": -412.61199951171875, "logps/ref_chosen": -329.9574279785156, "logps/ref_rejected": -276.7565002441406, "logps/rejected": -392.4975891113281, "loss": 4.7866, "margin_dpo/margin_mean": 33.08653259277344, "margin_dpo/margin_std": 70.98587799072266, "step": 289 }, { "epoch": 0.6073298429319371, "fcm_dpo/beta": 0.0129544697701931, "fcm_dpo/delta": -0.07823115587234497, "fcm_dpo/margin": 44.445472717285156, "fcm_dpo/q_t": 0.38723504543304443, "grad_norm": 132.9167022705078, "learning_rate": 2.0178866775369774e-07, "logits/chosen": -0.833109974861145, "logits/rejected": -0.7754253149032593, "logps/chosen": -399.00225830078125, "logps/ref_chosen": -324.6690673828125, "logps/ref_rejected": -311.8439636230469, "logps/rejected": -430.6226806640625, "loss": 4.4788, "margin_dpo/margin_mean": 44.445472717285156, "margin_dpo/margin_std": 75.31282043457031, "step": 290 }, { "epoch": 0.6094240837696335, "fcm_dpo/beta": 0.012139367870986462, "fcm_dpo/delta": -0.09630902111530304, "fcm_dpo/margin": 56.51261520385742, "fcm_dpo/q_t": 0.36019349098205566, "grad_norm": 103.76278686523438, "learning_rate": 1.9999357655598891e-07, "logits/chosen": -0.7965466380119324, "logits/rejected": -0.7893682718276978, "logps/chosen": -342.99267578125, "logps/ref_chosen": -274.1440734863281, "logps/ref_rejected": -278.07208251953125, "logps/rejected": -403.4332580566406, "loss": 3.9622, "margin_dpo/margin_mean": 56.51261520385742, "margin_dpo/margin_std": 71.91201782226562, "step": 291 }, { "epoch": 0.6115183246073298, "fcm_dpo/beta": 0.012333599850535393, "fcm_dpo/delta": 0.09315042197704315, "fcm_dpo/margin": 41.38365936279297, "fcm_dpo/q_t": 0.39496973156929016, "grad_norm": 106.41152954101562, "learning_rate": 1.9820116705100775e-07, "logits/chosen": -0.800334632396698, "logits/rejected": -0.7948342561721802, "logps/chosen": -324.21563720703125, "logps/ref_chosen": -259.3636779785156, "logps/ref_rejected": -279.30218505859375, "logps/rejected": -385.53778076171875, "loss": 4.5122, "margin_dpo/margin_mean": 41.38365936279297, "margin_dpo/margin_std": 71.46543884277344, "step": 292 }, { "epoch": 0.6136125654450262, "fcm_dpo/beta": 0.012719634920358658, "fcm_dpo/delta": -0.04290001466870308, "fcm_dpo/margin": 50.179176330566406, "fcm_dpo/q_t": 0.3686256408691406, "grad_norm": 98.5149917602539, "learning_rate": 1.9641153536023642e-07, "logits/chosen": -0.8973373174667358, "logits/rejected": -0.8607571125030518, "logps/chosen": -376.51239013671875, "logps/ref_chosen": -303.77081298828125, "logps/ref_rejected": -270.07513427734375, "logps/rejected": -392.9958801269531, "loss": 4.0359, "margin_dpo/margin_mean": 50.179176330566406, "margin_dpo/margin_std": 66.0428466796875, "step": 293 }, { "epoch": 0.6157068062827226, "fcm_dpo/beta": 0.012579267844557762, "fcm_dpo/delta": 0.0067170350812375546, "fcm_dpo/margin": 47.1030158996582, "fcm_dpo/q_t": 0.381188303232193, "grad_norm": 109.40179443359375, "learning_rate": 1.9462477745619106e-07, "logits/chosen": -0.795992374420166, "logits/rejected": -0.8062705993652344, "logps/chosen": -303.7636413574219, "logps/ref_chosen": -240.23831176757812, "logps/ref_rejected": -229.187744140625, "logps/rejected": -339.81610107421875, "loss": 4.2363, "margin_dpo/margin_mean": 47.10301971435547, "margin_dpo/margin_std": 71.89276123046875, "step": 294 }, { "epoch": 0.6178010471204188, "fcm_dpo/beta": 0.012650757096707821, "fcm_dpo/delta": 0.03361117094755173, "fcm_dpo/margin": 44.81559371948242, "fcm_dpo/q_t": 0.3833611309528351, "grad_norm": 84.96255493164062, "learning_rate": 1.928409891572757e-07, "logits/chosen": -0.7861504554748535, "logits/rejected": -0.8039959073066711, "logps/chosen": -319.04840087890625, "logps/ref_chosen": -251.00970458984375, "logps/ref_rejected": -244.15142822265625, "logps/rejected": -357.00567626953125, "loss": 4.2769, "margin_dpo/margin_mean": 44.815589904785156, "margin_dpo/margin_std": 67.42735290527344, "step": 295 }, { "epoch": 0.6198952879581152, "fcm_dpo/beta": 0.012142694555222988, "fcm_dpo/delta": -0.11849077045917511, "fcm_dpo/margin": 58.27566909790039, "fcm_dpo/q_t": 0.35460710525512695, "grad_norm": 113.73211669921875, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -0.7692264914512634, "logits/rejected": -0.7446385025978088, "logps/chosen": -364.2586669921875, "logps/ref_chosen": -293.880615234375, "logps/ref_rejected": -283.4175720214844, "logps/rejected": -412.0712585449219, "loss": 4.0144, "margin_dpo/margin_mean": 58.27566909790039, "margin_dpo/margin_std": 77.97174835205078, "step": 296 }, { "epoch": 0.6219895287958115, "fcm_dpo/beta": 0.01153514999896288, "fcm_dpo/delta": 0.015021094121038914, "fcm_dpo/margin": 42.32819366455078, "fcm_dpo/q_t": 0.3964134454727173, "grad_norm": 96.94518280029297, "learning_rate": 1.8928270384706582e-07, "logits/chosen": -0.878042459487915, "logits/rejected": -0.8735657930374146, "logps/chosen": -357.3432312011719, "logps/ref_chosen": -289.4600830078125, "logps/ref_rejected": -283.69110107421875, "logps/rejected": -393.9024353027344, "loss": 4.4507, "margin_dpo/margin_mean": 42.32819747924805, "margin_dpo/margin_std": 69.12220764160156, "step": 297 }, { "epoch": 0.6240837696335079, "fcm_dpo/beta": 0.011558500118553638, "fcm_dpo/delta": -0.06854025274515152, "fcm_dpo/margin": 48.931365966796875, "fcm_dpo/q_t": 0.3847343921661377, "grad_norm": 113.2163314819336, "learning_rate": 1.875083976558136e-07, "logits/chosen": -0.7906237840652466, "logits/rejected": -0.7819389700889587, "logps/chosen": -369.0527648925781, "logps/ref_chosen": -306.5150146484375, "logps/ref_rejected": -280.6969909667969, "logps/rejected": -392.16607666015625, "loss": 4.3345, "margin_dpo/margin_mean": 48.931373596191406, "margin_dpo/margin_std": 76.4260482788086, "step": 298 }, { "epoch": 0.6261780104712041, "fcm_dpo/beta": 0.011346298269927502, "fcm_dpo/delta": 0.05653999000787735, "fcm_dpo/margin": 42.16632843017578, "fcm_dpo/q_t": 0.39747947454452515, "grad_norm": 98.81658935546875, "learning_rate": 1.8573744269954297e-07, "logits/chosen": -0.7703346610069275, "logits/rejected": -0.7614036202430725, "logps/chosen": -357.9806213378906, "logps/ref_chosen": -281.36376953125, "logps/ref_rejected": -270.39508056640625, "logps/rejected": -389.1782531738281, "loss": 4.4279, "margin_dpo/margin_mean": 42.166324615478516, "margin_dpo/margin_std": 66.2205810546875, "step": 299 }, { "epoch": 0.6282722513089005, "fcm_dpo/beta": 0.01241186074912548, "fcm_dpo/delta": 0.0943484753370285, "fcm_dpo/margin": 40.93339157104492, "fcm_dpo/q_t": 0.39440780878067017, "grad_norm": 134.6527557373047, "learning_rate": 1.839699339491937e-07, "logits/chosen": -0.8087725043296814, "logits/rejected": -0.7854644656181335, "logps/chosen": -390.7112121582031, "logps/ref_chosen": -314.83575439453125, "logps/ref_rejected": -269.1154479980469, "logps/rejected": -385.92431640625, "loss": 4.5001, "margin_dpo/margin_mean": 40.93339538574219, "margin_dpo/margin_std": 71.97175598144531, "step": 300 }, { "epoch": 0.6303664921465969, "fcm_dpo/beta": 0.013004500418901443, "fcm_dpo/delta": 0.04637196660041809, "fcm_dpo/margin": 42.64813995361328, "fcm_dpo/q_t": 0.38811761140823364, "grad_norm": 94.59200286865234, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -0.8116219639778137, "logits/rejected": -0.8269953727722168, "logps/chosen": -351.3847351074219, "logps/ref_chosen": -279.89453125, "logps/ref_rejected": -271.6694641113281, "logps/rejected": -385.80780029296875, "loss": 4.3694, "margin_dpo/margin_mean": 42.64814758300781, "margin_dpo/margin_std": 68.6011962890625, "step": 301 }, { "epoch": 0.6324607329842932, "fcm_dpo/beta": 0.012540532276034355, "fcm_dpo/delta": -0.08675913512706757, "fcm_dpo/margin": 54.20530700683594, "fcm_dpo/q_t": 0.3618788719177246, "grad_norm": 120.73770904541016, "learning_rate": 1.8044563402088682e-07, "logits/chosen": -0.7918750643730164, "logits/rejected": -0.7770553827285767, "logps/chosen": -340.3618469238281, "logps/ref_chosen": -271.3318176269531, "logps/ref_rejected": -256.5587158203125, "logps/rejected": -379.79400634765625, "loss": 4.0268, "margin_dpo/margin_mean": 54.20530700683594, "margin_dpo/margin_std": 74.60562133789062, "step": 302 }, { "epoch": 0.6345549738219896, "fcm_dpo/beta": 0.01216448936611414, "fcm_dpo/delta": -0.04195103794336319, "fcm_dpo/margin": 48.33647537231445, "fcm_dpo/q_t": 0.3784167170524597, "grad_norm": 115.15612030029297, "learning_rate": 1.7868903184043885e-07, "logits/chosen": -0.7648223042488098, "logits/rejected": -0.7485473155975342, "logps/chosen": -378.9388427734375, "logps/ref_chosen": -304.88104248046875, "logps/ref_rejected": -269.063720703125, "logps/rejected": -391.4580078125, "loss": 4.3028, "margin_dpo/margin_mean": 48.33647537231445, "margin_dpo/margin_std": 75.0361557006836, "step": 303 }, { "epoch": 0.6366492146596858, "fcm_dpo/beta": 0.01175426971167326, "fcm_dpo/delta": -0.016764672473073006, "fcm_dpo/margin": 52.35718536376953, "fcm_dpo/q_t": 0.37501004338264465, "grad_norm": 113.5647964477539, "learning_rate": 1.7693625385079574e-07, "logits/chosen": -0.7781350016593933, "logits/rejected": -0.7962645888328552, "logps/chosen": -371.59942626953125, "logps/ref_chosen": -290.7109680175781, "logps/ref_rejected": -237.6885986328125, "logps/rejected": -370.9342346191406, "loss": 4.1504, "margin_dpo/margin_mean": 52.35718536376953, "margin_dpo/margin_std": 76.69111633300781, "step": 304 }, { "epoch": 0.6387434554973822, "fcm_dpo/beta": 0.010681025683879852, "fcm_dpo/delta": -0.18092726171016693, "fcm_dpo/margin": 71.44352722167969, "fcm_dpo/q_t": 0.339513897895813, "grad_norm": 93.67918395996094, "learning_rate": 1.7518739404812155e-07, "logits/chosen": -0.8444567918777466, "logits/rejected": -0.8151299953460693, "logps/chosen": -326.0416259765625, "logps/ref_chosen": -256.4839782714844, "logps/ref_rejected": -266.4063415527344, "logps/rejected": -407.40753173828125, "loss": 3.6998, "margin_dpo/margin_mean": 71.44352722167969, "margin_dpo/margin_std": 78.092529296875, "step": 305 }, { "epoch": 0.6408376963350786, "fcm_dpo/beta": 0.010258047841489315, "fcm_dpo/delta": 0.02255289815366268, "fcm_dpo/margin": 45.75574493408203, "fcm_dpo/q_t": 0.3990488648414612, "grad_norm": 83.1161880493164, "learning_rate": 1.7344254621846017e-07, "logits/chosen": -0.829738974571228, "logits/rejected": -0.8198045492172241, "logps/chosen": -397.2244567871094, "logps/ref_chosen": -320.6492004394531, "logps/ref_rejected": -273.36773681640625, "logps/rejected": -395.69866943359375, "loss": 4.3876, "margin_dpo/margin_mean": 45.75574493408203, "margin_dpo/margin_std": 69.27189636230469, "step": 306 }, { "epoch": 0.6429319371727749, "fcm_dpo/beta": 0.01028377190232277, "fcm_dpo/delta": -0.021565284579992294, "fcm_dpo/margin": 52.735225677490234, "fcm_dpo/q_t": 0.3810930848121643, "grad_norm": 123.105224609375, "learning_rate": 1.717018039327053e-07, "logits/chosen": -0.7636174559593201, "logits/rejected": -0.8098389506340027, "logps/chosen": -374.28753662109375, "logps/ref_chosen": -279.4541931152344, "logps/ref_rejected": -240.3796844482422, "logps/rejected": -387.9482421875, "loss": 4.1453, "margin_dpo/margin_mean": 52.73522186279297, "margin_dpo/margin_std": 66.00975036621094, "step": 307 }, { "epoch": 0.6450261780104712, "fcm_dpo/beta": 0.010600929148495197, "fcm_dpo/delta": 0.12419873476028442, "fcm_dpo/margin": 40.71012878417969, "fcm_dpo/q_t": 0.4081202447414398, "grad_norm": 94.15335083007812, "learning_rate": 1.699652605415828e-07, "logits/chosen": -0.8173856735229492, "logits/rejected": -0.8383054137229919, "logps/chosen": -395.36480712890625, "logps/ref_chosen": -296.598388671875, "logps/ref_rejected": -258.6953430175781, "logps/rejected": -398.17193603515625, "loss": 4.5984, "margin_dpo/margin_mean": 40.71012878417969, "margin_dpo/margin_std": 74.02152252197266, "step": 308 }, { "epoch": 0.6471204188481675, "fcm_dpo/beta": 0.011164986528456211, "fcm_dpo/delta": -0.02332584373652935, "fcm_dpo/margin": 55.65196990966797, "fcm_dpo/q_t": 0.36911213397979736, "grad_norm": 91.83976745605469, "learning_rate": 1.6823300917064458e-07, "logits/chosen": -0.8137606978416443, "logits/rejected": -0.8284541368484497, "logps/chosen": -378.19000244140625, "logps/ref_chosen": -281.3881530761719, "logps/ref_rejected": -262.458740234375, "logps/rejected": -414.91259765625, "loss": 4.0289, "margin_dpo/margin_mean": 55.6519775390625, "margin_dpo/margin_std": 72.8603286743164, "step": 309 }, { "epoch": 0.6492146596858639, "fcm_dpo/beta": 0.011183914728462696, "fcm_dpo/delta": 0.004746271297335625, "fcm_dpo/margin": 53.116363525390625, "fcm_dpo/q_t": 0.3752599358558655, "grad_norm": 101.53085327148438, "learning_rate": 1.6650514271527465e-07, "logits/chosen": -0.8139233589172363, "logits/rejected": -0.7922792434692383, "logps/chosen": -372.5635986328125, "logps/ref_chosen": -279.1872863769531, "logps/ref_rejected": -261.8279724121094, "logps/rejected": -408.3207092285156, "loss": 4.1434, "margin_dpo/margin_mean": 53.116371154785156, "margin_dpo/margin_std": 73.46383666992188, "step": 310 }, { "epoch": 0.6513089005235602, "fcm_dpo/beta": 0.011093046516180038, "fcm_dpo/delta": 0.00796596985310316, "fcm_dpo/margin": 53.135868072509766, "fcm_dpo/q_t": 0.3744713366031647, "grad_norm": 133.24725341796875, "learning_rate": 1.647817538357072e-07, "logits/chosen": -0.8096467852592468, "logits/rejected": -0.7938026785850525, "logps/chosen": -367.9974365234375, "logps/ref_chosen": -271.39813232421875, "logps/ref_rejected": -266.12701416015625, "logps/rejected": -415.8622131347656, "loss": 4.2424, "margin_dpo/margin_mean": 53.135868072509766, "margin_dpo/margin_std": 76.9805908203125, "step": 311 }, { "epoch": 0.6534031413612565, "fcm_dpo/beta": 0.01127730030566454, "fcm_dpo/delta": 0.04617173224687576, "fcm_dpo/margin": 48.976646423339844, "fcm_dpo/q_t": 0.3884603679180145, "grad_norm": 101.93241119384766, "learning_rate": 1.6306293495205755e-07, "logits/chosen": -0.815342903137207, "logits/rejected": -0.8017351627349854, "logps/chosen": -377.88897705078125, "logps/ref_chosen": -282.3850402832031, "logps/ref_rejected": -246.35389709472656, "logps/rejected": -390.8345031738281, "loss": 4.5077, "margin_dpo/margin_mean": 48.976646423339844, "margin_dpo/margin_std": 85.27238464355469, "step": 312 }, { "epoch": 0.6554973821989529, "fcm_dpo/beta": 0.011504658497869968, "fcm_dpo/delta": -0.04747847467660904, "fcm_dpo/margin": 51.269386291503906, "fcm_dpo/q_t": 0.3808142840862274, "grad_norm": 90.95819854736328, "learning_rate": 1.6134877823936607e-07, "logits/chosen": -0.8608230352401733, "logits/rejected": -0.8569746613502502, "logps/chosen": -398.6612548828125, "logps/ref_chosen": -303.630859375, "logps/ref_rejected": -273.1156921386719, "logps/rejected": -419.4154968261719, "loss": 4.3476, "margin_dpo/margin_mean": 51.269386291503906, "margin_dpo/margin_std": 79.70832824707031, "step": 313 }, { "epoch": 0.6575916230366492, "fcm_dpo/beta": 0.011516381986439228, "fcm_dpo/delta": 0.024132583290338516, "fcm_dpo/margin": 49.92859649658203, "fcm_dpo/q_t": 0.3797074556350708, "grad_norm": 96.51738739013672, "learning_rate": 1.5963937562265522e-07, "logits/chosen": -0.8718076348304749, "logits/rejected": -0.8581203818321228, "logps/chosen": -393.2103576660156, "logps/ref_chosen": -302.3042907714844, "logps/ref_rejected": -273.6416015625, "logps/rejected": -414.4762878417969, "loss": 4.2219, "margin_dpo/margin_mean": 49.92859649658203, "margin_dpo/margin_std": 72.24923706054688, "step": 314 }, { "epoch": 0.6596858638743456, "fcm_dpo/beta": 0.011163339018821716, "fcm_dpo/delta": -0.057650674134492874, "fcm_dpo/margin": 58.469234466552734, "fcm_dpo/q_t": 0.36371082067489624, "grad_norm": 90.03949737548828, "learning_rate": 1.5793481877199943e-07, "logits/chosen": -0.8473063707351685, "logits/rejected": -0.8344524502754211, "logps/chosen": -393.37115478515625, "logps/ref_chosen": -302.729248046875, "logps/ref_rejected": -270.26910400390625, "logps/rejected": -419.38018798828125, "loss": 4.0094, "margin_dpo/margin_mean": 58.469234466552734, "margin_dpo/margin_std": 74.8115005493164, "step": 315 }, { "epoch": 0.6617801047120419, "fcm_dpo/beta": 0.010703526437282562, "fcm_dpo/delta": -0.009762253612279892, "fcm_dpo/margin": 56.77616500854492, "fcm_dpo/q_t": 0.37491375207901, "grad_norm": 81.0658187866211, "learning_rate": 1.562351990976095e-07, "logits/chosen": -0.8620774745941162, "logits/rejected": -0.853934109210968, "logps/chosen": -398.0158386230469, "logps/ref_chosen": -310.5706481933594, "logps/ref_rejected": -272.9354553222656, "logps/rejected": -417.1568603515625, "loss": 4.1512, "margin_dpo/margin_mean": 56.77616882324219, "margin_dpo/margin_std": 80.87450408935547, "step": 316 }, { "epoch": 0.6638743455497382, "fcm_dpo/beta": 0.010788942687213421, "fcm_dpo/delta": 0.025753259658813477, "fcm_dpo/margin": 53.33159637451172, "fcm_dpo/q_t": 0.3748745322227478, "grad_norm": 80.9461898803711, "learning_rate": 1.5454060774493065e-07, "logits/chosen": -0.8560656309127808, "logits/rejected": -0.8267807960510254, "logps/chosen": -326.962890625, "logps/ref_chosen": -253.90036010742188, "logps/ref_rejected": -218.74078369140625, "logps/rejected": -345.1348571777344, "loss": 4.0694, "margin_dpo/margin_mean": 53.33160400390625, "margin_dpo/margin_std": 67.30595397949219, "step": 317 }, { "epoch": 0.6659685863874345, "fcm_dpo/beta": 0.010560003109276295, "fcm_dpo/delta": -0.023112213239073753, "fcm_dpo/margin": 58.614662170410156, "fcm_dpo/q_t": 0.36647799611091614, "grad_norm": 80.73484802246094, "learning_rate": 1.5285113558975427e-07, "logits/chosen": -0.8801468014717102, "logits/rejected": -0.8469870686531067, "logps/chosen": -352.06805419921875, "logps/ref_chosen": -270.8228759765625, "logps/ref_rejected": -255.30972290039062, "logps/rejected": -395.1695556640625, "loss": 3.9782, "margin_dpo/margin_mean": 58.61466979980469, "margin_dpo/margin_std": 70.51625061035156, "step": 318 }, { "epoch": 0.6680628272251309, "fcm_dpo/beta": 0.010418823920190334, "fcm_dpo/delta": -0.0004974156618118286, "fcm_dpo/margin": 57.531654357910156, "fcm_dpo/q_t": 0.3701345920562744, "grad_norm": 105.95365142822266, "learning_rate": 1.5116687323334464e-07, "logits/chosen": -0.8481382131576538, "logits/rejected": -0.8254935145378113, "logps/chosen": -388.3399658203125, "logps/ref_chosen": -301.0028076171875, "logps/ref_rejected": -242.39002990722656, "logps/rejected": -387.2587890625, "loss": 3.9928, "margin_dpo/margin_mean": 57.531654357910156, "margin_dpo/margin_std": 70.07221221923828, "step": 319 }, { "epoch": 0.6701570680628273, "fcm_dpo/beta": 0.010746842250227928, "fcm_dpo/delta": 0.03297354653477669, "fcm_dpo/margin": 52.7890739440918, "fcm_dpo/q_t": 0.3829456567764282, "grad_norm": 107.80091857910156, "learning_rate": 1.4948791099758052e-07, "logits/chosen": -0.8255133032798767, "logits/rejected": -0.8311203718185425, "logps/chosen": -385.2088928222656, "logps/ref_chosen": -303.6225891113281, "logps/ref_rejected": -280.85174560546875, "logps/rejected": -415.2270812988281, "loss": 4.3689, "margin_dpo/margin_mean": 52.78907012939453, "margin_dpo/margin_std": 85.84711456298828, "step": 320 }, { "epoch": 0.6722513089005235, "fcm_dpo/beta": 0.011272162199020386, "fcm_dpo/delta": 0.03078434243798256, "fcm_dpo/margin": 40.83373260498047, "fcm_dpo/q_t": 0.40626707673072815, "grad_norm": 103.12840270996094, "learning_rate": 1.478143389201113e-07, "logits/chosen": -0.8609663248062134, "logits/rejected": -0.8327873349189758, "logps/chosen": -380.1144714355469, "logps/ref_chosen": -288.98583984375, "logps/ref_rejected": -241.1822052001953, "logps/rejected": -373.1446228027344, "loss": 4.6024, "margin_dpo/margin_mean": 40.8337287902832, "margin_dpo/margin_std": 76.73638916015625, "step": 321 }, { "epoch": 0.6743455497382199, "fcm_dpo/beta": 0.011492523364722729, "fcm_dpo/delta": -0.0035615600645542145, "fcm_dpo/margin": 52.34375, "fcm_dpo/q_t": 0.3778424561023712, "grad_norm": 95.56451416015625, "learning_rate": 1.461462467495284e-07, "logits/chosen": -0.9015979766845703, "logits/rejected": -0.8630591630935669, "logps/chosen": -400.4721374511719, "logps/ref_chosen": -308.54345703125, "logps/ref_rejected": -269.7995910644531, "logps/rejected": -414.0719909667969, "loss": 4.2084, "margin_dpo/margin_mean": 52.343746185302734, "margin_dpo/margin_std": 78.392333984375, "step": 322 }, { "epoch": 0.6764397905759162, "fcm_dpo/beta": 0.011814561672508717, "fcm_dpo/delta": 0.1254061460494995, "fcm_dpo/margin": 36.32615661621094, "fcm_dpo/q_t": 0.41510459780693054, "grad_norm": 104.00493621826172, "learning_rate": 1.4448372394055246e-07, "logits/chosen": -0.8651161789894104, "logits/rejected": -0.8598443269729614, "logps/chosen": -372.36126708984375, "logps/ref_chosen": -282.49365234375, "logps/ref_rejected": -227.7105255126953, "logps/rejected": -353.9043273925781, "loss": 4.8553, "margin_dpo/margin_mean": 36.32615280151367, "margin_dpo/margin_std": 81.96444702148438, "step": 323 }, { "epoch": 0.6785340314136126, "fcm_dpo/beta": 0.011333497241139412, "fcm_dpo/delta": -0.13659581542015076, "fcm_dpo/margin": 63.554481506347656, "fcm_dpo/q_t": 0.34813186526298523, "grad_norm": 92.25740051269531, "learning_rate": 1.428268596492364e-07, "logits/chosen": -0.8000403046607971, "logits/rejected": -0.7981281280517578, "logps/chosen": -315.97833251953125, "logps/ref_chosen": -239.33836364746094, "logps/ref_rejected": -230.53775024414062, "logps/rejected": -370.7322082519531, "loss": 3.7578, "margin_dpo/margin_mean": 63.55448913574219, "margin_dpo/margin_std": 70.8255386352539, "step": 324 }, { "epoch": 0.680628272251309, "fcm_dpo/beta": 0.011075211688876152, "fcm_dpo/delta": -0.027800805866718292, "fcm_dpo/margin": 51.91961669921875, "fcm_dpo/q_t": 0.3847375512123108, "grad_norm": 110.48858642578125, "learning_rate": 1.4117574272818386e-07, "logits/chosen": -0.8060468435287476, "logits/rejected": -0.7908115386962891, "logps/chosen": -370.1428527832031, "logps/ref_chosen": -280.62896728515625, "logps/ref_rejected": -270.5085754394531, "logps/rejected": -411.9421081542969, "loss": 4.3861, "margin_dpo/margin_mean": 51.91961669921875, "margin_dpo/margin_std": 84.64945220947266, "step": 325 }, { "epoch": 0.6827225130890052, "fcm_dpo/beta": 0.011209266260266304, "fcm_dpo/delta": 0.03459738567471504, "fcm_dpo/margin": 50.52565002441406, "fcm_dpo/q_t": 0.38198092579841614, "grad_norm": 110.66674041748047, "learning_rate": 1.3953046172178413e-07, "logits/chosen": -0.9141020178794861, "logits/rejected": -0.9032143950462341, "logps/chosen": -322.02789306640625, "logps/ref_chosen": -240.9871368408203, "logps/ref_rejected": -261.0238342285156, "logps/rejected": -392.5902099609375, "loss": 4.2869, "margin_dpo/margin_mean": 50.52565002441406, "margin_dpo/margin_std": 77.59326934814453, "step": 326 }, { "epoch": 0.6848167539267016, "fcm_dpo/beta": 0.01094572339206934, "fcm_dpo/delta": -0.0513957142829895, "fcm_dpo/margin": 59.012569427490234, "fcm_dpo/q_t": 0.3640768527984619, "grad_norm": 78.67206573486328, "learning_rate": 1.3789110486146468e-07, "logits/chosen": -0.8713995218276978, "logits/rejected": -0.851097822189331, "logps/chosen": -351.3957824707031, "logps/ref_chosen": -279.52001953125, "logps/ref_rejected": -269.51824951171875, "logps/rejected": -400.40655517578125, "loss": 3.9834, "margin_dpo/margin_mean": 59.012569427490234, "margin_dpo/margin_std": 74.85302734375, "step": 327 }, { "epoch": 0.6869109947643979, "fcm_dpo/beta": 0.010611481964588165, "fcm_dpo/delta": 0.034625254571437836, "fcm_dpo/margin": 53.36151123046875, "fcm_dpo/q_t": 0.3801298141479492, "grad_norm": 102.40028381347656, "learning_rate": 1.362577600609588e-07, "logits/chosen": -0.8238348960876465, "logits/rejected": -0.8258199691772461, "logps/chosen": -384.0259094238281, "logps/ref_chosen": -301.033447265625, "logps/ref_rejected": -284.2101135253906, "logps/rejected": -420.5640869140625, "loss": 4.1232, "margin_dpo/margin_mean": 53.36151123046875, "margin_dpo/margin_std": 69.19473266601562, "step": 328 }, { "epoch": 0.6890052356020943, "fcm_dpo/beta": 0.011005845852196217, "fcm_dpo/delta": -0.0060157435946166515, "fcm_dpo/margin": 54.90179443359375, "fcm_dpo/q_t": 0.3814074397087097, "grad_norm": 118.011962890625, "learning_rate": 1.3463051491159093e-07, "logits/chosen": -0.8349476456642151, "logits/rejected": -0.8108065128326416, "logps/chosen": -408.7464904785156, "logps/ref_chosen": -319.9888610839844, "logps/ref_rejected": -307.5588684082031, "logps/rejected": -451.2182312011719, "loss": 4.2707, "margin_dpo/margin_mean": 54.901790618896484, "margin_dpo/margin_std": 86.18560791015625, "step": 329 }, { "epoch": 0.6910994764397905, "fcm_dpo/beta": 0.011318149045109749, "fcm_dpo/delta": 0.04677288234233856, "fcm_dpo/margin": 48.92704391479492, "fcm_dpo/q_t": 0.3829770088195801, "grad_norm": 114.82202911376953, "learning_rate": 1.3300945667758012e-07, "logits/chosen": -0.8282672166824341, "logits/rejected": -0.8419229388237, "logps/chosen": -387.9001770019531, "logps/ref_chosen": -301.11474609375, "logps/ref_rejected": -299.673095703125, "logps/rejected": -435.3855895996094, "loss": 4.2182, "margin_dpo/margin_mean": 48.92704391479492, "margin_dpo/margin_std": 70.02067565917969, "step": 330 }, { "epoch": 0.6931937172774869, "fcm_dpo/beta": 0.011273819953203201, "fcm_dpo/delta": 0.006491330452263355, "fcm_dpo/margin": 52.56464385986328, "fcm_dpo/q_t": 0.3814270496368408, "grad_norm": 102.59062957763672, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -0.8695875406265259, "logits/rejected": -0.8564931750297546, "logps/chosen": -357.00396728515625, "logps/ref_chosen": -277.59149169921875, "logps/ref_rejected": -256.025634765625, "logps/rejected": -388.00274658203125, "loss": 4.3351, "margin_dpo/margin_mean": 52.56464385986328, "margin_dpo/margin_std": 86.2286148071289, "step": 331 }, { "epoch": 0.6952879581151833, "fcm_dpo/beta": 0.011159934103488922, "fcm_dpo/delta": -0.007100608199834824, "fcm_dpo/margin": 54.201053619384766, "fcm_dpo/q_t": 0.37773868441581726, "grad_norm": 123.88658142089844, "learning_rate": 1.2978624834891626e-07, "logits/chosen": -0.8522534966468811, "logits/rejected": -0.829330563545227, "logps/chosen": -352.5909729003906, "logps/ref_chosen": -269.97369384765625, "logps/ref_rejected": -235.03164672851562, "logps/rejected": -371.8499450683594, "loss": 4.2557, "margin_dpo/margin_mean": 54.201053619384766, "margin_dpo/margin_std": 82.71824645996094, "step": 332 }, { "epoch": 0.6973821989528796, "fcm_dpo/beta": 0.01151346042752266, "fcm_dpo/delta": 0.005112664774060249, "fcm_dpo/margin": 47.8289680480957, "fcm_dpo/q_t": 0.3855535686016083, "grad_norm": 110.88188171386719, "learning_rate": 1.281842711051438e-07, "logits/chosen": -0.9249609112739563, "logits/rejected": -0.8914788961410522, "logps/chosen": -380.4714050292969, "logps/ref_chosen": -296.76300048828125, "logps/ref_rejected": -265.97991943359375, "logps/rejected": -397.517333984375, "loss": 4.2389, "margin_dpo/margin_mean": 47.8289680480957, "margin_dpo/margin_std": 70.54847717285156, "step": 333 }, { "epoch": 0.6994764397905759, "fcm_dpo/beta": 0.011438943445682526, "fcm_dpo/delta": -0.05398944765329361, "fcm_dpo/margin": 56.70977783203125, "fcm_dpo/q_t": 0.3660760521888733, "grad_norm": 113.78327178955078, "learning_rate": 1.2658882646922033e-07, "logits/chosen": -0.8433147668838501, "logits/rejected": -0.8184890747070312, "logps/chosen": -379.4197692871094, "logps/ref_chosen": -301.0367431640625, "logps/ref_rejected": -268.87652587890625, "logps/rejected": -403.96929931640625, "loss": 4.0958, "margin_dpo/margin_mean": 56.709781646728516, "margin_dpo/margin_std": 76.3214111328125, "step": 334 }, { "epoch": 0.7015706806282722, "fcm_dpo/beta": 0.01065311674028635, "fcm_dpo/delta": 0.0019375793635845184, "fcm_dpo/margin": 55.916114807128906, "fcm_dpo/q_t": 0.37777969241142273, "grad_norm": 140.96397399902344, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -0.8295958042144775, "logits/rejected": -0.8266909122467041, "logps/chosen": -366.25634765625, "logps/ref_chosen": -276.13275146484375, "logps/ref_rejected": -243.44203186035156, "logps/rejected": -389.481689453125, "loss": 4.2603, "margin_dpo/margin_mean": 55.916114807128906, "margin_dpo/margin_std": 83.37496185302734, "step": 335 }, { "epoch": 0.7036649214659686, "fcm_dpo/beta": 0.010925454087555408, "fcm_dpo/delta": -0.0008706990629434586, "fcm_dpo/margin": 50.11119842529297, "fcm_dpo/q_t": 0.39162981510162354, "grad_norm": 105.25801086425781, "learning_rate": 1.2341787690142435e-07, "logits/chosen": -0.8540661931037903, "logits/rejected": -0.7894359230995178, "logps/chosen": -337.92779541015625, "logps/ref_chosen": -246.2626495361328, "logps/ref_rejected": -261.0617980957031, "logps/rejected": -402.8382263183594, "loss": 4.3595, "margin_dpo/margin_mean": 50.11119842529297, "margin_dpo/margin_std": 81.72027587890625, "step": 336 }, { "epoch": 0.7057591623036649, "fcm_dpo/beta": 0.010866876691579819, "fcm_dpo/delta": -0.06778120994567871, "fcm_dpo/margin": 61.01750946044922, "fcm_dpo/q_t": 0.3631795048713684, "grad_norm": 92.77830505371094, "learning_rate": 1.2184254201795363e-07, "logits/chosen": -0.8569778800010681, "logits/rejected": -0.8286029696464539, "logps/chosen": -350.7175598144531, "logps/ref_chosen": -266.9937744140625, "logps/ref_rejected": -253.015625, "logps/rejected": -397.7568359375, "loss": 3.9651, "margin_dpo/margin_mean": 61.017513275146484, "margin_dpo/margin_std": 77.8094482421875, "step": 337 }, { "epoch": 0.7078534031413612, "fcm_dpo/beta": 0.010680442675948143, "fcm_dpo/delta": 0.032451678067445755, "fcm_dpo/margin": 53.09748458862305, "fcm_dpo/q_t": 0.38340330123901367, "grad_norm": 111.03559875488281, "learning_rate": 1.202740798300168e-07, "logits/chosen": -0.8789874911308289, "logits/rejected": -0.8605346083641052, "logps/chosen": -357.2398376464844, "logps/ref_chosen": -276.5925598144531, "logps/ref_rejected": -233.979248046875, "logps/rejected": -367.7239990234375, "loss": 4.2707, "margin_dpo/margin_mean": 53.09748458862305, "margin_dpo/margin_std": 80.79719543457031, "step": 338 }, { "epoch": 0.7099476439790576, "fcm_dpo/beta": 0.010629120282828808, "fcm_dpo/delta": -0.029368996620178223, "fcm_dpo/margin": 58.94451904296875, "fcm_dpo/q_t": 0.3694709241390228, "grad_norm": 96.47938537597656, "learning_rate": 1.1871257444948096e-07, "logits/chosen": -0.8932757377624512, "logits/rejected": -0.8851479291915894, "logps/chosen": -391.7145690917969, "logps/ref_chosen": -303.5277404785156, "logps/ref_rejected": -283.11676025390625, "logps/rejected": -430.2481689453125, "loss": 4.143, "margin_dpo/margin_mean": 58.944515228271484, "margin_dpo/margin_std": 83.4503402709961, "step": 339 }, { "epoch": 0.7120418848167539, "fcm_dpo/beta": 0.01047454308718443, "fcm_dpo/delta": -0.005283636972308159, "fcm_dpo/margin": 52.87586975097656, "fcm_dpo/q_t": 0.3890461325645447, "grad_norm": 129.6068878173828, "learning_rate": 1.1715810961514072e-07, "logits/chosen": -0.8445842266082764, "logits/rejected": -0.8434731364250183, "logps/chosen": -353.9705505371094, "logps/ref_chosen": -261.5257568359375, "logps/ref_rejected": -259.39862060546875, "logps/rejected": -404.71923828125, "loss": 4.4994, "margin_dpo/margin_mean": 52.87586975097656, "margin_dpo/margin_std": 93.22081756591797, "step": 340 }, { "epoch": 0.7141361256544503, "fcm_dpo/beta": 0.010717066004872322, "fcm_dpo/delta": 0.0799395889043808, "fcm_dpo/margin": 40.81736373901367, "fcm_dpo/q_t": 0.41147974133491516, "grad_norm": 151.6585235595703, "learning_rate": 1.1561076868822755e-07, "logits/chosen": -0.8664836883544922, "logits/rejected": -0.8353126645088196, "logps/chosen": -425.8817443847656, "logps/ref_chosen": -315.903564453125, "logps/ref_rejected": -308.02392578125, "logps/rejected": -458.81951904296875, "loss": 4.8832, "margin_dpo/margin_mean": 40.81736373901367, "margin_dpo/margin_std": 89.86011505126953, "step": 341 }, { "epoch": 0.7162303664921466, "fcm_dpo/beta": 0.011261941865086555, "fcm_dpo/delta": -0.030568070709705353, "fcm_dpo/margin": 55.79458999633789, "fcm_dpo/q_t": 0.3656637966632843, "grad_norm": 94.8580322265625, "learning_rate": 1.1407063464793965e-07, "logits/chosen": -0.8575106859207153, "logits/rejected": -0.8551716208457947, "logps/chosen": -355.30908203125, "logps/ref_chosen": -269.17864990234375, "logps/ref_rejected": -260.8977355957031, "logps/rejected": -402.82281494140625, "loss": 4.0376, "margin_dpo/margin_mean": 55.794593811035156, "margin_dpo/margin_std": 71.31321716308594, "step": 342 }, { "epoch": 0.7183246073298429, "fcm_dpo/beta": 0.010942941531538963, "fcm_dpo/delta": 0.05468939617276192, "fcm_dpo/margin": 50.11905288696289, "fcm_dpo/q_t": 0.38805603981018066, "grad_norm": 104.98192596435547, "learning_rate": 1.125377900869913e-07, "logits/chosen": -0.844852864742279, "logits/rejected": -0.8284053206443787, "logps/chosen": -401.84783935546875, "logps/ref_chosen": -310.719970703125, "logps/ref_rejected": -263.5224914550781, "logps/rejected": -404.7693786621094, "loss": 4.322, "margin_dpo/margin_mean": 50.11905288696289, "margin_dpo/margin_std": 79.16139221191406, "step": 343 }, { "epoch": 0.7204188481675393, "fcm_dpo/beta": 0.011533169075846672, "fcm_dpo/delta": -0.019078608602285385, "fcm_dpo/margin": 53.341590881347656, "fcm_dpo/q_t": 0.37309640645980835, "grad_norm": 124.94219970703125, "learning_rate": 1.110123172071844e-07, "logits/chosen": -0.8453131914138794, "logits/rejected": -0.8294092416763306, "logps/chosen": -394.9805603027344, "logps/ref_chosen": -301.7999267578125, "logps/ref_rejected": -257.9061584472656, "logps/rejected": -404.42840576171875, "loss": 4.2305, "margin_dpo/margin_mean": 53.341590881347656, "margin_dpo/margin_std": 78.44779205322266, "step": 344 }, { "epoch": 0.7225130890052356, "fcm_dpo/beta": 0.011175896972417831, "fcm_dpo/delta": 0.020824579522013664, "fcm_dpo/margin": 47.329490661621094, "fcm_dpo/q_t": 0.3897257447242737, "grad_norm": 132.1914520263672, "learning_rate": 1.09494297815e-07, "logits/chosen": -0.8442228436470032, "logits/rejected": -0.8435863256454468, "logps/chosen": -374.52606201171875, "logps/ref_chosen": -283.0184326171875, "logps/ref_rejected": -266.8457336425781, "logps/rejected": -405.68280029296875, "loss": 4.2956, "margin_dpo/margin_mean": 47.32949447631836, "margin_dpo/margin_std": 67.64356231689453, "step": 345 }, { "epoch": 0.724607329842932, "fcm_dpo/beta": 0.011194109916687012, "fcm_dpo/delta": -0.06004277244210243, "fcm_dpo/margin": 58.3812141418457, "fcm_dpo/q_t": 0.36269640922546387, "grad_norm": 88.73771667480469, "learning_rate": 1.0798381331721107e-07, "logits/chosen": -0.9335488080978394, "logits/rejected": -0.8848499059677124, "logps/chosen": -364.63702392578125, "logps/ref_chosen": -268.44122314453125, "logps/ref_rejected": -227.8225860595703, "logps/rejected": -382.3995666503906, "loss": 4.0986, "margin_dpo/margin_mean": 58.3812141418457, "margin_dpo/margin_std": 77.73462677001953, "step": 346 }, { "epoch": 0.7267015706806282, "fcm_dpo/beta": 0.010750826448202133, "fcm_dpo/delta": -0.0014726296067237854, "fcm_dpo/margin": 51.17145538330078, "fcm_dpo/q_t": 0.38269051909446716, "grad_norm": 98.32434844970703, "learning_rate": 1.0648094471651722e-07, "logits/chosen": -0.7837051749229431, "logits/rejected": -0.8113095760345459, "logps/chosen": -362.5570068359375, "logps/ref_chosen": -273.70355224609375, "logps/ref_rejected": -243.65521240234375, "logps/rejected": -383.6800842285156, "loss": 4.2541, "margin_dpo/margin_mean": 51.17145538330078, "margin_dpo/margin_std": 73.79896545410156, "step": 347 }, { "epoch": 0.7287958115183246, "fcm_dpo/beta": 0.011453090235590935, "fcm_dpo/delta": 0.10326485335826874, "fcm_dpo/margin": 43.56976318359375, "fcm_dpo/q_t": 0.40140581130981445, "grad_norm": 90.84459686279297, "learning_rate": 1.0498577260720048e-07, "logits/chosen": -0.8765660524368286, "logits/rejected": -0.8619852066040039, "logps/chosen": -373.6608581542969, "logps/ref_chosen": -285.64141845703125, "logps/ref_rejected": -265.6270446777344, "logps/rejected": -397.2162780761719, "loss": 4.482, "margin_dpo/margin_mean": 43.56976318359375, "margin_dpo/margin_std": 76.03251647949219, "step": 348 }, { "epoch": 0.7308900523560209, "fcm_dpo/beta": 0.011219880543649197, "fcm_dpo/delta": -0.10234306752681732, "fcm_dpo/margin": 61.77885055541992, "fcm_dpo/q_t": 0.36026033759117126, "grad_norm": 176.53419494628906, "learning_rate": 1.0349837717080347e-07, "logits/chosen": -0.8281702399253845, "logits/rejected": -0.8236852884292603, "logps/chosen": -416.912353515625, "logps/ref_chosen": -328.3175048828125, "logps/ref_rejected": -292.37872314453125, "logps/rejected": -442.7524108886719, "loss": 4.0555, "margin_dpo/margin_mean": 61.77885055541992, "margin_dpo/margin_std": 84.70580291748047, "step": 349 }, { "epoch": 0.7329842931937173, "fcm_dpo/beta": 0.011043412610888481, "fcm_dpo/delta": 0.0056533366441726685, "fcm_dpo/margin": 49.282676696777344, "fcm_dpo/q_t": 0.3879699110984802, "grad_norm": 110.3293685913086, "learning_rate": 1.0201883817182949e-07, "logits/chosen": -0.8206266164779663, "logits/rejected": -0.8350641131401062, "logps/chosen": -391.7946472167969, "logps/ref_chosen": -292.8046569824219, "logps/ref_rejected": -250.35504150390625, "logps/rejected": -398.62774658203125, "loss": 4.4195, "margin_dpo/margin_mean": 49.28268051147461, "margin_dpo/margin_std": 81.72764587402344, "step": 350 }, { "epoch": 0.7350785340314137, "fcm_dpo/beta": 0.01127422321587801, "fcm_dpo/delta": 0.05659697949886322, "fcm_dpo/margin": 36.060794830322266, "fcm_dpo/q_t": 0.4199449419975281, "grad_norm": 131.51641845703125, "learning_rate": 1.0054723495346482e-07, "logits/chosen": -0.8776383399963379, "logits/rejected": -0.8651461005210876, "logps/chosen": -403.58026123046875, "logps/ref_chosen": -311.8890380859375, "logps/ref_rejected": -263.59033203125, "logps/rejected": -391.34234619140625, "loss": 4.965, "margin_dpo/margin_mean": 36.060794830322266, "margin_dpo/margin_std": 87.43182373046875, "step": 351 }, { "epoch": 0.7371727748691099, "fcm_dpo/beta": 0.010802392847836018, "fcm_dpo/delta": -0.10912173241376877, "fcm_dpo/margin": 64.70912170410156, "fcm_dpo/q_t": 0.35592180490493774, "grad_norm": 101.48279571533203, "learning_rate": 9.908364643332398e-08, "logits/chosen": -0.8236813545227051, "logits/rejected": -0.7954239249229431, "logps/chosen": -339.6770935058594, "logps/ref_chosen": -254.9078826904297, "logps/ref_rejected": -257.1688232421875, "logps/rejected": -406.64715576171875, "loss": 3.9846, "margin_dpo/margin_mean": 64.70912170410156, "margin_dpo/margin_std": 82.60865783691406, "step": 352 }, { "epoch": 0.7392670157068063, "fcm_dpo/beta": 0.010326343588531017, "fcm_dpo/delta": 0.011817870661616325, "fcm_dpo/margin": 50.87775802612305, "fcm_dpo/q_t": 0.3918360471725464, "grad_norm": 117.75369262695312, "learning_rate": 9.76281510992176e-08, "logits/chosen": -0.8311276435852051, "logits/rejected": -0.8247827291488647, "logps/chosen": -364.5557556152344, "logps/ref_chosen": -270.3760681152344, "logps/ref_rejected": -264.65234375, "logps/rejected": -409.7098388671875, "loss": 4.4262, "margin_dpo/margin_mean": 50.87776184082031, "margin_dpo/margin_std": 83.41526794433594, "step": 353 }, { "epoch": 0.7413612565445026, "fcm_dpo/beta": 0.010943200439214706, "fcm_dpo/delta": 0.10599180310964584, "fcm_dpo/margin": 36.72353744506836, "fcm_dpo/q_t": 0.41883280873298645, "grad_norm": 119.48013305664062, "learning_rate": 9.618082700494318e-08, "logits/chosen": -0.8320400714874268, "logits/rejected": -0.866357684135437, "logps/chosen": -354.59100341796875, "logps/ref_chosen": -257.6485595703125, "logps/ref_rejected": -246.94203186035156, "logps/rejected": -380.6080322265625, "loss": 4.8811, "margin_dpo/margin_mean": 36.723541259765625, "margin_dpo/margin_std": 83.38043212890625, "step": 354 }, { "epoch": 0.743455497382199, "fcm_dpo/beta": 0.01037515327334404, "fcm_dpo/delta": -0.1262883096933365, "fcm_dpo/margin": 62.35184860229492, "fcm_dpo/q_t": 0.36492300033569336, "grad_norm": 113.33094024658203, "learning_rate": 9.474175176609956e-08, "logits/chosen": -0.875731348991394, "logits/rejected": -0.8765732049942017, "logps/chosen": -382.8927307128906, "logps/ref_chosen": -293.35333251953125, "logps/ref_rejected": -275.6051940917969, "logps/rejected": -427.4964294433594, "loss": 4.1582, "margin_dpo/margin_mean": 62.35184860229492, "margin_dpo/margin_std": 87.46715545654297, "step": 355 }, { "epoch": 0.7455497382198953, "fcm_dpo/beta": 0.010616269893944263, "fcm_dpo/delta": 0.06479822099208832, "fcm_dpo/margin": 40.029476165771484, "fcm_dpo/q_t": 0.40827593207359314, "grad_norm": 93.70746612548828, "learning_rate": 9.331100255592436e-08, "logits/chosen": -0.8007790446281433, "logits/rejected": -0.8301706314086914, "logps/chosen": -291.36260986328125, "logps/ref_chosen": -204.25550842285156, "logps/ref_rejected": -213.467529296875, "logps/rejected": -340.6041259765625, "loss": 4.5453, "margin_dpo/margin_mean": 40.02947235107422, "margin_dpo/margin_std": 66.7158432006836, "step": 356 }, { "epoch": 0.7476439790575916, "fcm_dpo/beta": 0.010492799803614616, "fcm_dpo/delta": -0.07817438989877701, "fcm_dpo/margin": 58.730445861816406, "fcm_dpo/q_t": 0.37493807077407837, "grad_norm": 100.33387756347656, "learning_rate": 9.18886561011557e-08, "logits/chosen": -0.7677896618843079, "logits/rejected": -0.7687491774559021, "logps/chosen": -361.63714599609375, "logps/ref_chosen": -266.3705749511719, "logps/ref_rejected": -239.04490661621094, "logps/rejected": -393.0418701171875, "loss": 4.1916, "margin_dpo/margin_mean": 58.730445861816406, "margin_dpo/margin_std": 85.08563995361328, "step": 357 }, { "epoch": 0.749738219895288, "fcm_dpo/beta": 0.010010240599513054, "fcm_dpo/delta": -0.06146865338087082, "fcm_dpo/margin": 65.60650634765625, "fcm_dpo/q_t": 0.361916184425354, "grad_norm": 97.68511199951172, "learning_rate": 9.047478867791731e-08, "logits/chosen": -0.8633837699890137, "logits/rejected": -0.845342218875885, "logps/chosen": -383.7676086425781, "logps/ref_chosen": -299.1474609375, "logps/ref_rejected": -257.2531433105469, "logps/rejected": -407.47979736328125, "loss": 4.0168, "margin_dpo/margin_mean": 65.60650634765625, "margin_dpo/margin_std": 84.83085632324219, "step": 358 }, { "epoch": 0.7518324607329843, "fcm_dpo/beta": 0.010214395821094513, "fcm_dpo/delta": 0.03628703951835632, "fcm_dpo/margin": 55.01346206665039, "fcm_dpo/q_t": 0.3792075216770172, "grad_norm": 102.26093292236328, "learning_rate": 8.906947610762825e-08, "logits/chosen": -0.8235169053077698, "logits/rejected": -0.8387635350227356, "logps/chosen": -389.9339904785156, "logps/ref_chosen": -302.99786376953125, "logps/ref_rejected": -260.4137268066406, "logps/rejected": -402.36334228515625, "loss": 4.1236, "margin_dpo/margin_mean": 55.01346206665039, "margin_dpo/margin_std": 71.41567993164062, "step": 359 }, { "epoch": 0.7539267015706806, "fcm_dpo/beta": 0.010241111740469933, "fcm_dpo/delta": 0.05266699939966202, "fcm_dpo/margin": 48.13507080078125, "fcm_dpo/q_t": 0.3928843140602112, "grad_norm": 114.6863784790039, "learning_rate": 8.76727937529367e-08, "logits/chosen": -0.8444012403488159, "logits/rejected": -0.8383079767227173, "logps/chosen": -404.416259765625, "logps/ref_chosen": -309.6114501953125, "logps/ref_rejected": -256.64031982421875, "logps/rejected": -399.5802001953125, "loss": 4.4973, "margin_dpo/margin_mean": 48.13507080078125, "margin_dpo/margin_std": 80.82567596435547, "step": 360 }, { "epoch": 0.7560209424083769, "fcm_dpo/beta": 0.010225515812635422, "fcm_dpo/delta": -0.06704443693161011, "fcm_dpo/margin": 64.82032775878906, "fcm_dpo/q_t": 0.36452746391296387, "grad_norm": 99.3719253540039, "learning_rate": 8.628481651367875e-08, "logits/chosen": -0.7976441979408264, "logits/rejected": -0.7765456438064575, "logps/chosen": -340.08935546875, "logps/ref_chosen": -263.3797607421875, "logps/ref_rejected": -271.18157958984375, "logps/rejected": -412.71148681640625, "loss": 4.0939, "margin_dpo/margin_mean": 64.82032775878906, "margin_dpo/margin_std": 91.12376403808594, "step": 361 }, { "epoch": 0.7581151832460733, "fcm_dpo/beta": 0.009894550777971745, "fcm_dpo/delta": 0.03964172303676605, "fcm_dpo/margin": 56.83903503417969, "fcm_dpo/q_t": 0.3783508837223053, "grad_norm": 95.44233703613281, "learning_rate": 8.490561882286135e-08, "logits/chosen": -0.8215633630752563, "logits/rejected": -0.816551923751831, "logps/chosen": -388.55169677734375, "logps/ref_chosen": -303.2583923339844, "logps/ref_rejected": -243.22891235351562, "logps/rejected": -385.3612060546875, "loss": 4.0905, "margin_dpo/margin_mean": 56.83903503417969, "margin_dpo/margin_std": 72.36383056640625, "step": 362 }, { "epoch": 0.7602094240837697, "fcm_dpo/beta": 0.010408826172351837, "fcm_dpo/delta": 0.03300439193844795, "fcm_dpo/margin": 54.380584716796875, "fcm_dpo/q_t": 0.3840283751487732, "grad_norm": 108.82610321044922, "learning_rate": 8.353527464267104e-08, "logits/chosen": -0.8414917588233948, "logits/rejected": -0.7955522537231445, "logps/chosen": -395.16302490234375, "logps/ref_chosen": -303.34722900390625, "logps/ref_rejected": -262.05419921875, "logps/rejected": -408.25054931640625, "loss": 4.3162, "margin_dpo/margin_mean": 54.38058090209961, "margin_dpo/margin_std": 84.62854766845703, "step": 363 }, { "epoch": 0.762303664921466, "fcm_dpo/beta": 0.010767980478703976, "fcm_dpo/delta": 0.10710425674915314, "fcm_dpo/margin": 46.3648567199707, "fcm_dpo/q_t": 0.39887574315071106, "grad_norm": 99.91820526123047, "learning_rate": 8.217385746050742e-08, "logits/chosen": -0.7961313724517822, "logits/rejected": -0.8093927502632141, "logps/chosen": -395.1819152832031, "logps/ref_chosen": -285.54376220703125, "logps/ref_rejected": -284.84619140625, "logps/rejected": -440.8492736816406, "loss": 4.6636, "margin_dpo/margin_mean": 46.3648567199707, "margin_dpo/margin_std": 89.22390747070312, "step": 364 }, { "epoch": 0.7643979057591623, "fcm_dpo/beta": 0.011136573739349842, "fcm_dpo/delta": -0.06065363436937332, "fcm_dpo/margin": 54.53364562988281, "fcm_dpo/q_t": 0.37902113795280457, "grad_norm": 102.22779083251953, "learning_rate": 8.082144028504231e-08, "logits/chosen": -0.8273108601570129, "logits/rejected": -0.8318711519241333, "logps/chosen": -371.9251403808594, "logps/ref_chosen": -274.7878112792969, "logps/ref_rejected": -256.5738220214844, "logps/rejected": -408.2447509765625, "loss": 4.2396, "margin_dpo/margin_mean": 54.53364181518555, "margin_dpo/margin_std": 82.52845764160156, "step": 365 }, { "epoch": 0.7664921465968586, "fcm_dpo/beta": 0.010572044178843498, "fcm_dpo/delta": -0.05891243368387222, "fcm_dpo/margin": 61.89867401123047, "fcm_dpo/q_t": 0.3644544184207916, "grad_norm": 94.1471176147461, "learning_rate": 7.947809564230445e-08, "logits/chosen": -0.7954903841018677, "logits/rejected": -0.8094490766525269, "logps/chosen": -376.78997802734375, "logps/ref_chosen": -286.6496276855469, "logps/ref_rejected": -251.97140502929688, "logps/rejected": -404.0104675292969, "loss": 4.0522, "margin_dpo/margin_mean": 61.89867401123047, "margin_dpo/margin_std": 84.70960998535156, "step": 366 }, { "epoch": 0.768586387434555, "fcm_dpo/beta": 0.009961485862731934, "fcm_dpo/delta": -0.007503882050514221, "fcm_dpo/margin": 60.758731842041016, "fcm_dpo/q_t": 0.3712635934352875, "grad_norm": 103.36516571044922, "learning_rate": 7.814389557179016e-08, "logits/chosen": -0.804265022277832, "logits/rejected": -0.787903368473053, "logps/chosen": -393.11322021484375, "logps/ref_chosen": -301.9449768066406, "logps/ref_rejected": -265.5677185058594, "logps/rejected": -417.49468994140625, "loss": 4.0468, "margin_dpo/margin_mean": 60.758731842041016, "margin_dpo/margin_std": 77.93997192382812, "step": 367 }, { "epoch": 0.7706806282722513, "fcm_dpo/beta": 0.009893280453979969, "fcm_dpo/delta": -0.10160201787948608, "fcm_dpo/margin": 70.12001037597656, "fcm_dpo/q_t": 0.35026952624320984, "grad_norm": 73.314697265625, "learning_rate": 7.681891162260015e-08, "logits/chosen": -0.789295494556427, "logits/rejected": -0.8031895160675049, "logps/chosen": -380.4212646484375, "logps/ref_chosen": -294.62652587890625, "logps/ref_rejected": -258.7628479003906, "logps/rejected": -414.6776123046875, "loss": 3.7301, "margin_dpo/margin_mean": 70.12001037597656, "margin_dpo/margin_std": 73.42273712158203, "step": 368 }, { "epoch": 0.7727748691099476, "fcm_dpo/beta": 0.009725566022098064, "fcm_dpo/delta": 0.07594747841358185, "fcm_dpo/margin": 54.26398849487305, "fcm_dpo/q_t": 0.3867446184158325, "grad_norm": 94.77143859863281, "learning_rate": 7.550321484960251e-08, "logits/chosen": -0.8820152282714844, "logits/rejected": -0.8652966022491455, "logps/chosen": -376.5359191894531, "logps/ref_chosen": -282.5057373046875, "logps/ref_rejected": -266.41607666015625, "logps/rejected": -414.71026611328125, "loss": 4.238, "margin_dpo/margin_mean": 54.26398849487305, "margin_dpo/margin_std": 76.04060363769531, "step": 369 }, { "epoch": 0.774869109947644, "fcm_dpo/beta": 0.009759629145264626, "fcm_dpo/delta": -0.03207730874419212, "fcm_dpo/margin": 64.38795471191406, "fcm_dpo/q_t": 0.36700335144996643, "grad_norm": 83.10556030273438, "learning_rate": 7.419687580962222e-08, "logits/chosen": -0.8547401428222656, "logits/rejected": -0.8778725266456604, "logps/chosen": -336.8154296875, "logps/ref_chosen": -251.00640869140625, "logps/ref_rejected": -238.12542724609375, "logps/rejected": -388.32232666015625, "loss": 4.0568, "margin_dpo/margin_mean": 64.3879623413086, "margin_dpo/margin_std": 86.20503997802734, "step": 370 }, { "epoch": 0.7769633507853403, "fcm_dpo/beta": 0.010260825045406818, "fcm_dpo/delta": 0.0865623950958252, "fcm_dpo/margin": 50.42669677734375, "fcm_dpo/q_t": 0.3898235559463501, "grad_norm": 108.6645736694336, "learning_rate": 7.289996455765748e-08, "logits/chosen": -0.7909586429595947, "logits/rejected": -0.7886074781417847, "logps/chosen": -393.975830078125, "logps/ref_chosen": -296.6591491699219, "logps/ref_rejected": -251.14675903320312, "logps/rejected": -398.89013671875, "loss": 4.3432, "margin_dpo/margin_mean": 50.42669677734375, "margin_dpo/margin_std": 77.26437377929688, "step": 371 }, { "epoch": 0.7790575916230367, "fcm_dpo/beta": 0.010090112686157227, "fcm_dpo/delta": -0.05242789536714554, "fcm_dpo/margin": 64.0174331665039, "fcm_dpo/q_t": 0.3648003935813904, "grad_norm": 83.54692840576172, "learning_rate": 7.161255064312283e-08, "logits/chosen": -0.7713180780410767, "logits/rejected": -0.7686434984207153, "logps/chosen": -424.0948181152344, "logps/ref_chosen": -331.3714599609375, "logps/ref_rejected": -285.56805419921875, "logps/rejected": -442.308837890625, "loss": 4.0493, "margin_dpo/margin_mean": 64.0174331665039, "margin_dpo/margin_std": 83.79845428466797, "step": 372 }, { "epoch": 0.7811518324607329, "fcm_dpo/beta": 0.009875521995127201, "fcm_dpo/delta": -0.005197510123252869, "fcm_dpo/margin": 61.0987434387207, "fcm_dpo/q_t": 0.3673868179321289, "grad_norm": 86.26287078857422, "learning_rate": 7.033470310611945e-08, "logits/chosen": -0.858359158039093, "logits/rejected": -0.8340578675270081, "logps/chosen": -405.3838806152344, "logps/ref_chosen": -321.9429931640625, "logps/ref_rejected": -271.2288513183594, "logps/rejected": -415.7685546875, "loss": 4.0013, "margin_dpo/margin_mean": 61.09874725341797, "margin_dpo/margin_std": 72.15105438232422, "step": 373 }, { "epoch": 0.7832460732984293, "fcm_dpo/beta": 0.010431567206978798, "fcm_dpo/delta": 0.08249574154615402, "fcm_dpo/margin": 49.9046745300293, "fcm_dpo/q_t": 0.3921506404876709, "grad_norm": 71.61186218261719, "learning_rate": 6.906649047373245e-08, "logits/chosen": -0.8459421396255493, "logits/rejected": -0.8448758125305176, "logps/chosen": -409.2577819824219, "logps/ref_chosen": -319.1685485839844, "logps/ref_rejected": -284.6263732910156, "logps/rejected": -424.6202392578125, "loss": 4.36, "margin_dpo/margin_mean": 49.9046745300293, "margin_dpo/margin_std": 79.11766815185547, "step": 374 }, { "epoch": 0.7853403141361257, "fcm_dpo/beta": 0.010909291915595531, "fcm_dpo/delta": 0.025316692888736725, "fcm_dpo/margin": 47.739749908447266, "fcm_dpo/q_t": 0.3932640850543976, "grad_norm": 96.96272277832031, "learning_rate": 6.780798075635675e-08, "logits/chosen": -0.8526198863983154, "logits/rejected": -0.835331380367279, "logps/chosen": -412.8765869140625, "logps/ref_chosen": -314.87579345703125, "logps/ref_rejected": -259.1965026855469, "logps/rejected": -404.93707275390625, "loss": 4.4618, "margin_dpo/margin_mean": 47.73974609375, "margin_dpo/margin_std": 81.5127182006836, "step": 375 }, { "epoch": 0.787434554973822, "fcm_dpo/beta": 0.010985768400132656, "fcm_dpo/delta": -0.007917094975709915, "fcm_dpo/margin": 55.14532470703125, "fcm_dpo/q_t": 0.377773642539978, "grad_norm": 107.2135009765625, "learning_rate": 6.655924144404906e-08, "logits/chosen": -0.8210961818695068, "logits/rejected": -0.8290749788284302, "logps/chosen": -384.5388488769531, "logps/ref_chosen": -287.6732482910156, "logps/ref_rejected": -256.6697082519531, "logps/rejected": -408.6806335449219, "loss": 4.2785, "margin_dpo/margin_mean": 55.14532470703125, "margin_dpo/margin_std": 85.58231353759766, "step": 376 }, { "epoch": 0.7895287958115184, "fcm_dpo/beta": 0.011254341341555119, "fcm_dpo/delta": 0.053448669612407684, "fcm_dpo/margin": 38.42869186401367, "fcm_dpo/q_t": 0.4102451801300049, "grad_norm": 113.72765350341797, "learning_rate": 6.532033950290885e-08, "logits/chosen": -0.8185837864875793, "logits/rejected": -0.8221685886383057, "logps/chosen": -409.1022033691406, "logps/ref_chosen": -305.261474609375, "logps/ref_rejected": -271.8887023925781, "logps/rejected": -414.15814208984375, "loss": 4.8061, "margin_dpo/margin_mean": 38.42869186401367, "margin_dpo/margin_std": 82.16449737548828, "step": 377 }, { "epoch": 0.7916230366492146, "fcm_dpo/beta": 0.011703657917678356, "fcm_dpo/delta": 0.059528548270463943, "fcm_dpo/margin": 46.378326416015625, "fcm_dpo/q_t": 0.38869708776474, "grad_norm": 111.90191650390625, "learning_rate": 6.409134137148736e-08, "logits/chosen": -0.8007180094718933, "logits/rejected": -0.7878426313400269, "logps/chosen": -378.7329406738281, "logps/ref_chosen": -281.5295715332031, "logps/ref_rejected": -296.980224609375, "logps/rejected": -440.56195068359375, "loss": 4.3661, "margin_dpo/margin_mean": 46.378326416015625, "margin_dpo/margin_std": 73.78543090820312, "step": 378 }, { "epoch": 0.793717277486911, "fcm_dpo/beta": 0.011767145246267319, "fcm_dpo/delta": 0.0018080808222293854, "fcm_dpo/margin": 50.696449279785156, "fcm_dpo/q_t": 0.38004711270332336, "grad_norm": 130.11024475097656, "learning_rate": 6.28723129572247e-08, "logits/chosen": -0.8714730143547058, "logits/rejected": -0.8521773815155029, "logps/chosen": -355.7151794433594, "logps/ref_chosen": -265.0807800292969, "logps/ref_rejected": -230.58932495117188, "logps/rejected": -371.920166015625, "loss": 4.3454, "margin_dpo/margin_mean": 50.69645690917969, "margin_dpo/margin_std": 82.31288146972656, "step": 379 }, { "epoch": 0.7958115183246073, "fcm_dpo/beta": 0.011543155647814274, "fcm_dpo/delta": -0.09121623635292053, "fcm_dpo/margin": 53.771484375, "fcm_dpo/q_t": 0.3734373450279236, "grad_norm": 122.70896911621094, "learning_rate": 6.166331963291519e-08, "logits/chosen": -0.8505301475524902, "logits/rejected": -0.8327133655548096, "logps/chosen": -403.8320617675781, "logps/ref_chosen": -305.90838623046875, "logps/ref_rejected": -286.5906677246094, "logps/rejected": -438.2857971191406, "loss": 4.227, "margin_dpo/margin_mean": 53.771484375, "margin_dpo/margin_std": 79.21472930908203, "step": 380 }, { "epoch": 0.7979057591623037, "fcm_dpo/beta": 0.011249177157878876, "fcm_dpo/delta": -0.024705251678824425, "fcm_dpo/margin": 55.35076904296875, "fcm_dpo/q_t": 0.37365224957466125, "grad_norm": 99.3327865600586, "learning_rate": 6.046442623320145e-08, "logits/chosen": -0.820453405380249, "logits/rejected": -0.7837440967559814, "logps/chosen": -346.07220458984375, "logps/ref_chosen": -252.87066650390625, "logps/ref_rejected": -261.1927490234375, "logps/rejected": -409.7450256347656, "loss": 4.1399, "margin_dpo/margin_mean": 55.35076904296875, "margin_dpo/margin_std": 79.20867156982422, "step": 381 }, { "epoch": 0.8, "fcm_dpo/beta": 0.010675630532205105, "fcm_dpo/delta": -0.08816227316856384, "fcm_dpo/margin": 63.82965087890625, "fcm_dpo/q_t": 0.3560883402824402, "grad_norm": 98.31607818603516, "learning_rate": 5.9275697051098275e-08, "logits/chosen": -0.8420031070709229, "logits/rejected": -0.8378089070320129, "logps/chosen": -379.265380859375, "logps/ref_chosen": -289.2114562988281, "logps/ref_rejected": -278.45751953125, "logps/rejected": -432.3410339355469, "loss": 3.9075, "margin_dpo/margin_mean": 63.82964324951172, "margin_dpo/margin_std": 76.59968566894531, "step": 382 }, { "epoch": 0.8020942408376963, "fcm_dpo/beta": 0.010143190622329712, "fcm_dpo/delta": -0.026774900034070015, "fcm_dpo/margin": 56.6756706237793, "fcm_dpo/q_t": 0.3786071240901947, "grad_norm": 106.76795959472656, "learning_rate": 5.809719583454414e-08, "logits/chosen": -0.8325682282447815, "logits/rejected": -0.8136028051376343, "logps/chosen": -362.4136962890625, "logps/ref_chosen": -273.630859375, "logps/ref_rejected": -261.44024658203125, "logps/rejected": -406.8987731933594, "loss": 4.2121, "margin_dpo/margin_mean": 56.67566680908203, "margin_dpo/margin_std": 80.95785522460938, "step": 383 }, { "epoch": 0.8041884816753927, "fcm_dpo/beta": 0.010267859324812889, "fcm_dpo/delta": 0.051759272813797, "fcm_dpo/margin": 48.685951232910156, "fcm_dpo/q_t": 0.39458543062210083, "grad_norm": 78.74759674072266, "learning_rate": 5.6928985782982524e-08, "logits/chosen": -0.8430302739143372, "logits/rejected": -0.8414285778999329, "logps/chosen": -368.34307861328125, "logps/ref_chosen": -274.5699462890625, "logps/ref_rejected": -285.8253479003906, "logps/rejected": -428.2843933105469, "loss": 4.3999, "margin_dpo/margin_mean": 48.685951232910156, "margin_dpo/margin_std": 78.47038269042969, "step": 384 }, { "epoch": 0.806282722513089, "fcm_dpo/beta": 0.010413900017738342, "fcm_dpo/delta": -0.0003421269357204437, "fcm_dpo/margin": 52.91630935668945, "fcm_dpo/q_t": 0.38285988569259644, "grad_norm": 86.21760559082031, "learning_rate": 5.57711295439732e-08, "logits/chosen": -0.793001115322113, "logits/rejected": -0.7949045896530151, "logps/chosen": -379.3741455078125, "logps/ref_chosen": -284.150634765625, "logps/ref_rejected": -244.87921142578125, "logps/rejected": -393.01904296875, "loss": 4.1941, "margin_dpo/margin_mean": 52.91630935668945, "margin_dpo/margin_std": 73.58924865722656, "step": 385 }, { "epoch": 0.8083769633507853, "fcm_dpo/beta": 0.009639251977205276, "fcm_dpo/delta": -0.11836303025484085, "fcm_dpo/margin": 67.22946166992188, "fcm_dpo/q_t": 0.3605455756187439, "grad_norm": 85.76860046386719, "learning_rate": 5.4623689209832484e-08, "logits/chosen": -0.7818478941917419, "logits/rejected": -0.7824301719665527, "logps/chosen": -407.1231994628906, "logps/ref_chosen": -320.1762390136719, "logps/ref_rejected": -302.05023193359375, "logps/rejected": -456.2266540527344, "loss": 3.9072, "margin_dpo/margin_mean": 67.22946166992188, "margin_dpo/margin_std": 75.95755004882812, "step": 386 }, { "epoch": 0.8104712041884817, "fcm_dpo/beta": 0.009522214531898499, "fcm_dpo/delta": 0.001985335722565651, "fcm_dpo/margin": 57.08240509033203, "fcm_dpo/q_t": 0.381357342004776, "grad_norm": 85.12853240966797, "learning_rate": 5.3486726314303175e-08, "logits/chosen": -0.8208848237991333, "logits/rejected": -0.8258309364318848, "logps/chosen": -366.4295654296875, "logps/ref_chosen": -272.2801513671875, "logps/ref_rejected": -265.1615905761719, "logps/rejected": -416.3934326171875, "loss": 4.2012, "margin_dpo/margin_mean": 57.08240509033203, "margin_dpo/margin_std": 77.8724136352539, "step": 387 }, { "epoch": 0.812565445026178, "fcm_dpo/beta": 0.009763755835592747, "fcm_dpo/delta": 0.09430886805057526, "fcm_dpo/margin": 41.966461181640625, "fcm_dpo/q_t": 0.4138518273830414, "grad_norm": 107.85652923583984, "learning_rate": 5.2360301829254745e-08, "logits/chosen": -0.8028566837310791, "logits/rejected": -0.7961896657943726, "logps/chosen": -377.24932861328125, "logps/ref_chosen": -272.5313415527344, "logps/ref_rejected": -239.55735778808594, "logps/rejected": -386.24176025390625, "loss": 4.7123, "margin_dpo/margin_mean": 41.966461181640625, "margin_dpo/margin_std": 82.00777435302734, "step": 388 }, { "epoch": 0.8146596858638744, "fcm_dpo/beta": 0.009949136525392532, "fcm_dpo/delta": -0.03285611793398857, "fcm_dpo/margin": 52.38904571533203, "fcm_dpo/q_t": 0.3916034698486328, "grad_norm": 86.20325469970703, "learning_rate": 5.1244476161413806e-08, "logits/chosen": -0.8366051912307739, "logits/rejected": -0.8354239463806152, "logps/chosen": -380.04913330078125, "logps/ref_chosen": -281.0892639160156, "logps/ref_rejected": -246.50045776367188, "logps/rejected": -397.849365234375, "loss": 4.4031, "margin_dpo/margin_mean": 52.3890495300293, "margin_dpo/margin_std": 83.40953063964844, "step": 389 }, { "epoch": 0.8167539267015707, "fcm_dpo/beta": 0.01029281411319971, "fcm_dpo/delta": 0.03942079097032547, "fcm_dpo/margin": 54.43703079223633, "fcm_dpo/q_t": 0.38137805461883545, "grad_norm": 88.30001831054688, "learning_rate": 5.013930914912476e-08, "logits/chosen": -0.8523457646369934, "logits/rejected": -0.8585647940635681, "logps/chosen": -382.1541748046875, "logps/ref_chosen": -283.98748779296875, "logps/ref_rejected": -283.465087890625, "logps/rejected": -436.06878662109375, "loss": 4.2467, "margin_dpo/margin_mean": 54.43703079223633, "margin_dpo/margin_std": 79.09922790527344, "step": 390 }, { "epoch": 0.818848167539267, "fcm_dpo/beta": 0.009914442896842957, "fcm_dpo/delta": -0.028575582429766655, "fcm_dpo/margin": 57.40606689453125, "fcm_dpo/q_t": 0.3794897496700287, "grad_norm": 86.59980010986328, "learning_rate": 4.904486005914027e-08, "logits/chosen": -0.8060988187789917, "logits/rejected": -0.8013872504234314, "logps/chosen": -389.5373840332031, "logps/ref_chosen": -283.86138916015625, "logps/ref_rejected": -263.5093688964844, "logps/rejected": -426.5914306640625, "loss": 4.2074, "margin_dpo/margin_mean": 57.40606689453125, "margin_dpo/margin_std": 81.51762390136719, "step": 391 }, { "epoch": 0.8209424083769633, "fcm_dpo/beta": 0.009418400004506111, "fcm_dpo/delta": -0.06252562999725342, "fcm_dpo/margin": 69.35395812988281, "fcm_dpo/q_t": 0.3590378761291504, "grad_norm": 91.34620666503906, "learning_rate": 4.796118758344353e-08, "logits/chosen": -0.774043083190918, "logits/rejected": -0.7974765300750732, "logps/chosen": -403.4256896972656, "logps/ref_chosen": -310.070068359375, "logps/ref_rejected": -252.89817810058594, "logps/rejected": -415.6076965332031, "loss": 3.8833, "margin_dpo/margin_mean": 69.35395812988281, "margin_dpo/margin_std": 76.41130065917969, "step": 392 }, { "epoch": 0.8230366492146597, "fcm_dpo/beta": 0.010048740543425083, "fcm_dpo/delta": 0.059518344700336456, "fcm_dpo/margin": 53.80087661743164, "fcm_dpo/q_t": 0.38574740290641785, "grad_norm": 121.00365447998047, "learning_rate": 4.688834983610082e-08, "logits/chosen": -0.8275657296180725, "logits/rejected": -0.8169302940368652, "logps/chosen": -378.3330078125, "logps/ref_chosen": -286.7156677246094, "logps/ref_rejected": -230.00357055664062, "logps/rejected": -375.4217834472656, "loss": 4.2594, "margin_dpo/margin_mean": 53.80087661743164, "margin_dpo/margin_std": 79.71178436279297, "step": 393 }, { "epoch": 0.8251308900523561, "fcm_dpo/beta": 0.010072952136397362, "fcm_dpo/delta": 0.04542340710759163, "fcm_dpo/margin": 48.90460205078125, "fcm_dpo/q_t": 0.40026745200157166, "grad_norm": 82.5592269897461, "learning_rate": 4.582640435014459e-08, "logits/chosen": -0.861187756061554, "logits/rejected": -0.8614484071731567, "logps/chosen": -419.2198791503906, "logps/ref_chosen": -325.9934387207031, "logps/ref_rejected": -317.42706298828125, "logps/rejected": -459.55810546875, "loss": 4.4748, "margin_dpo/margin_mean": 48.90460205078125, "margin_dpo/margin_std": 82.41121673583984, "step": 394 }, { "epoch": 0.8272251308900523, "fcm_dpo/beta": 0.010364928282797337, "fcm_dpo/delta": -0.026334993541240692, "fcm_dpo/margin": 60.20890808105469, "fcm_dpo/q_t": 0.37179651856422424, "grad_norm": 83.10125732421875, "learning_rate": 4.477540807448832e-08, "logits/chosen": -0.8094066381454468, "logits/rejected": -0.8200665712356567, "logps/chosen": -359.6258850097656, "logps/ref_chosen": -268.90081787109375, "logps/ref_rejected": -272.85809326171875, "logps/rejected": -423.7920837402344, "loss": 4.0331, "margin_dpo/margin_mean": 60.20890426635742, "margin_dpo/margin_std": 78.91072082519531, "step": 395 }, { "epoch": 0.8293193717277487, "fcm_dpo/beta": 0.01008035521954298, "fcm_dpo/delta": -0.017106691375374794, "fcm_dpo/margin": 55.339622497558594, "fcm_dpo/q_t": 0.38071006536483765, "grad_norm": 91.52494049072266, "learning_rate": 4.373541737087263e-08, "logits/chosen": -0.8092782497406006, "logits/rejected": -0.7949211597442627, "logps/chosen": -383.99359130859375, "logps/ref_chosen": -291.19830322265625, "logps/ref_rejected": -253.2803955078125, "logps/rejected": -401.41534423828125, "loss": 4.1984, "margin_dpo/margin_mean": 55.339622497558594, "margin_dpo/margin_std": 76.27399444580078, "step": 396 }, { "epoch": 0.831413612565445, "fcm_dpo/beta": 0.00993900652974844, "fcm_dpo/delta": -0.025697803124785423, "fcm_dpo/margin": 49.429141998291016, "fcm_dpo/q_t": 0.39648500084877014, "grad_norm": 90.36344909667969, "learning_rate": 4.270648801084295e-08, "logits/chosen": -0.8374252319335938, "logits/rejected": -0.8150765299797058, "logps/chosen": -400.49786376953125, "logps/ref_chosen": -309.8224182128906, "logps/ref_rejected": -291.9057922363281, "logps/rejected": -432.0103759765625, "loss": 4.5093, "margin_dpo/margin_mean": 49.429141998291016, "margin_dpo/margin_std": 83.18435668945312, "step": 397 }, { "epoch": 0.8335078534031414, "fcm_dpo/beta": 0.009895882569253445, "fcm_dpo/delta": 0.07465239614248276, "fcm_dpo/margin": 45.989097595214844, "fcm_dpo/q_t": 0.40378451347351074, "grad_norm": 114.50669860839844, "learning_rate": 4.168867517275806e-08, "logits/chosen": -0.7460988759994507, "logits/rejected": -0.787642240524292, "logps/chosen": -397.8860168457031, "logps/ref_chosen": -297.8135070800781, "logps/ref_rejected": -270.5025634765625, "logps/rejected": -416.564208984375, "loss": 4.7411, "margin_dpo/margin_mean": 45.989097595214844, "margin_dpo/margin_std": 91.29690551757812, "step": 398 }, { "epoch": 0.8356020942408376, "fcm_dpo/beta": 0.010618692263960838, "fcm_dpo/delta": 0.06222732365131378, "fcm_dpo/margin": 50.926490783691406, "fcm_dpo/q_t": 0.38839948177337646, "grad_norm": 93.77667236328125, "learning_rate": 4.0682033438831584e-08, "logits/chosen": -0.8398821949958801, "logits/rejected": -0.8023529052734375, "logps/chosen": -393.19097900390625, "logps/ref_chosen": -292.8467712402344, "logps/ref_rejected": -268.3638916015625, "logps/rejected": -419.6346435546875, "loss": 4.3527, "margin_dpo/margin_mean": 50.92649459838867, "margin_dpo/margin_std": 81.3008041381836, "step": 399 }, { "epoch": 0.837696335078534, "fcm_dpo/beta": 0.010857629589736462, "fcm_dpo/delta": 0.02651361934840679, "fcm_dpo/margin": 52.668907165527344, "fcm_dpo/q_t": 0.38137534260749817, "grad_norm": 124.49967956542969, "learning_rate": 3.968661679220467e-08, "logits/chosen": -0.8855699896812439, "logits/rejected": -0.8847328424453735, "logps/chosen": -357.024658203125, "logps/ref_chosen": -263.6763916015625, "logps/ref_rejected": -258.67266845703125, "logps/rejected": -404.6898498535156, "loss": 4.3361, "margin_dpo/margin_mean": 52.668907165527344, "margin_dpo/margin_std": 79.36495208740234, "step": 400 }, { "epoch": 0.837696335078534, "eval_fcm_dpo/beta": 0.01113525778055191, "eval_logits/chosen": -0.8274842500686646, "eval_logits/rejected": -0.8179031610488892, "eval_logps/chosen": -383.4114074707031, "eval_logps/ref_chosen": -287.8268127441406, "eval_logps/ref_rejected": -266.9300231933594, "eval_logps/rejected": -416.5982360839844, "eval_loss": 0.5352392792701721, "eval_margin_dpo/margin_mean": 54.083595275878906, "eval_margin_dpo/margin_std": 78.04414367675781, "eval_runtime": 78.6529, "eval_samples_per_second": 25.428, "eval_steps_per_second": 3.179, "step": 400 }, { "epoch": 0.8397905759162304, "fcm_dpo/beta": 0.010933172889053822, "fcm_dpo/delta": -0.036804597824811935, "fcm_dpo/margin": 57.962257385253906, "fcm_dpo/q_t": 0.36903733015060425, "grad_norm": 132.86756896972656, "learning_rate": 3.8702478614051345e-08, "logits/chosen": -0.8089762330055237, "logits/rejected": -0.8090959191322327, "logps/chosen": -411.4322814941406, "logps/ref_chosen": -318.2853088378906, "logps/ref_rejected": -293.75225830078125, "logps/rejected": -444.8614501953125, "loss": 4.1077, "margin_dpo/margin_mean": 57.96226501464844, "margin_dpo/margin_std": 81.48827362060547, "step": 401 }, { "epoch": 0.8418848167539267, "fcm_dpo/beta": 0.010850298218429089, "fcm_dpo/delta": -0.00018405605806037784, "fcm_dpo/margin": 55.26762771606445, "fcm_dpo/q_t": 0.37649285793304443, "grad_norm": 105.15616607666016, "learning_rate": 3.772967168071517e-08, "logits/chosen": -0.861879825592041, "logits/rejected": -0.8359262347221375, "logps/chosen": -396.8935852050781, "logps/ref_chosen": -309.4278564453125, "logps/ref_rejected": -282.0279846191406, "logps/rejected": -424.7613525390625, "loss": 4.179, "margin_dpo/margin_mean": 55.26762390136719, "margin_dpo/margin_std": 81.65580749511719, "step": 402 }, { "epoch": 0.8439790575916231, "fcm_dpo/beta": 0.010152220726013184, "fcm_dpo/delta": -0.16042862832546234, "fcm_dpo/margin": 73.78020477294922, "fcm_dpo/q_t": 0.342138409614563, "grad_norm": 78.87046813964844, "learning_rate": 3.676824816087978e-08, "logits/chosen": -0.8525277376174927, "logits/rejected": -0.8336724638938904, "logps/chosen": -398.9835205078125, "logps/ref_chosen": -309.0284729003906, "logps/ref_rejected": -272.9622497558594, "logps/rejected": -436.69744873046875, "loss": 3.6772, "margin_dpo/margin_mean": 73.78020477294922, "margin_dpo/margin_std": 79.28921508789062, "step": 403 }, { "epoch": 0.8460732984293193, "fcm_dpo/beta": 0.009858440607786179, "fcm_dpo/delta": 0.06699323654174805, "fcm_dpo/margin": 54.31309509277344, "fcm_dpo/q_t": 0.38632509112358093, "grad_norm": 91.77136993408203, "learning_rate": 3.581825961277074e-08, "logits/chosen": -0.8809780478477478, "logits/rejected": -0.8597022294998169, "logps/chosen": -397.1322021484375, "logps/ref_chosen": -297.2837219238281, "logps/ref_rejected": -256.99041748046875, "logps/rejected": -411.15203857421875, "loss": 4.3272, "margin_dpo/margin_mean": 54.31309509277344, "margin_dpo/margin_std": 83.20060729980469, "step": 404 }, { "epoch": 0.8481675392670157, "fcm_dpo/beta": 0.010094488970935345, "fcm_dpo/delta": -0.0015985970385372639, "fcm_dpo/margin": 59.558921813964844, "fcm_dpo/q_t": 0.3737775683403015, "grad_norm": 70.01200103759766, "learning_rate": 3.487975698139084e-08, "logits/chosen": -0.7941403985023499, "logits/rejected": -0.803252637386322, "logps/chosen": -348.8029479980469, "logps/ref_chosen": -257.96533203125, "logps/ref_rejected": -255.811279296875, "logps/rejected": -406.2078552246094, "loss": 4.0913, "margin_dpo/margin_mean": 59.558921813964844, "margin_dpo/margin_std": 81.33020782470703, "step": 405 }, { "epoch": 0.8502617801047121, "fcm_dpo/beta": 0.01080853957682848, "fcm_dpo/delta": 0.11059418320655823, "fcm_dpo/margin": 45.39131164550781, "fcm_dpo/q_t": 0.3956514894962311, "grad_norm": 114.03080749511719, "learning_rate": 3.3952790595787986e-08, "logits/chosen": -0.8301359415054321, "logits/rejected": -0.8075696229934692, "logps/chosen": -388.3358154296875, "logps/ref_chosen": -285.1810607910156, "logps/ref_rejected": -264.41351318359375, "logps/rejected": -412.9596252441406, "loss": 4.4328, "margin_dpo/margin_mean": 45.39131164550781, "margin_dpo/margin_std": 73.8642349243164, "step": 406 }, { "epoch": 0.8523560209424084, "fcm_dpo/beta": 0.010692048817873001, "fcm_dpo/delta": -0.03384571149945259, "fcm_dpo/margin": 58.965179443359375, "fcm_dpo/q_t": 0.372200608253479, "grad_norm": 109.08424377441406, "learning_rate": 3.303741016635614e-08, "logits/chosen": -0.8248212337493896, "logits/rejected": -0.8543944954872131, "logps/chosen": -369.94219970703125, "logps/ref_chosen": -265.23809814453125, "logps/ref_rejected": -219.0631561279297, "logps/rejected": -382.732421875, "loss": 4.1392, "margin_dpo/margin_mean": 58.96518325805664, "margin_dpo/margin_std": 84.269287109375, "step": 407 }, { "epoch": 0.8544502617801047, "fcm_dpo/beta": 0.010714426636695862, "fcm_dpo/delta": -0.028275392949581146, "fcm_dpo/margin": 58.18950653076172, "fcm_dpo/q_t": 0.37304675579071045, "grad_norm": 80.60567474365234, "learning_rate": 3.2133664782169944e-08, "logits/chosen": -0.862250030040741, "logits/rejected": -0.8581533432006836, "logps/chosen": -387.4540100097656, "logps/ref_chosen": -296.9726257324219, "logps/ref_rejected": -295.4786376953125, "logps/rejected": -444.1495361328125, "loss": 4.1167, "margin_dpo/margin_mean": 58.18950653076172, "margin_dpo/margin_std": 78.87529754638672, "step": 408 }, { "epoch": 0.856544502617801, "fcm_dpo/beta": 0.01028523314744234, "fcm_dpo/delta": -0.026700038462877274, "fcm_dpo/margin": 55.847084045410156, "fcm_dpo/q_t": 0.38022899627685547, "grad_norm": 90.30248260498047, "learning_rate": 3.12416029083514e-08, "logits/chosen": -0.8302770256996155, "logits/rejected": -0.8195016384124756, "logps/chosen": -386.9856262207031, "logps/ref_chosen": -287.37933349609375, "logps/ref_rejected": -275.80291748046875, "logps/rejected": -431.2562255859375, "loss": 4.3809, "margin_dpo/margin_mean": 55.847084045410156, "margin_dpo/margin_std": 91.54480743408203, "step": 409 }, { "epoch": 0.8586387434554974, "fcm_dpo/beta": 0.0104904780164361, "fcm_dpo/delta": 0.046907056123018265, "fcm_dpo/margin": 52.79481887817383, "fcm_dpo/q_t": 0.38510861992836, "grad_norm": 106.11753845214844, "learning_rate": 3.036127238347164e-08, "logits/chosen": -0.853104829788208, "logits/rejected": -0.8613294959068298, "logps/chosen": -377.5323791503906, "logps/ref_chosen": -281.7801818847656, "logps/ref_rejected": -266.7550354003906, "logps/rejected": -415.3020935058594, "loss": 4.3734, "margin_dpo/margin_mean": 52.79481506347656, "margin_dpo/margin_std": 85.05574798583984, "step": 410 }, { "epoch": 0.8607329842931937, "fcm_dpo/beta": 0.010124841704964638, "fcm_dpo/delta": -0.07326184958219528, "fcm_dpo/margin": 65.72576904296875, "fcm_dpo/q_t": 0.35882946848869324, "grad_norm": 82.55951690673828, "learning_rate": 2.9492720416985e-08, "logits/chosen": -0.8430988192558289, "logits/rejected": -0.8095424771308899, "logps/chosen": -371.9637756347656, "logps/ref_chosen": -281.5872497558594, "logps/ref_rejected": -254.78916931152344, "logps/rejected": -410.8914794921875, "loss": 3.8667, "margin_dpo/margin_mean": 65.72576904296875, "margin_dpo/margin_std": 76.63848876953125, "step": 411 }, { "epoch": 0.86282722513089, "fcm_dpo/beta": 0.009727457538247108, "fcm_dpo/delta": -0.012256894260644913, "fcm_dpo/margin": 49.4742546081543, "fcm_dpo/q_t": 0.3995344638824463, "grad_norm": 90.7106704711914, "learning_rate": 2.863599358669755e-08, "logits/chosen": -0.8161391615867615, "logits/rejected": -0.8233824372291565, "logps/chosen": -381.5071105957031, "logps/ref_chosen": -276.5341796875, "logps/ref_rejected": -273.8751220703125, "logps/rejected": -428.3223571777344, "loss": 4.4685, "margin_dpo/margin_mean": 49.4742546081543, "margin_dpo/margin_std": 82.43728637695312, "step": 412 }, { "epoch": 0.8649214659685864, "fcm_dpo/beta": 0.01038267370313406, "fcm_dpo/delta": 0.11351024359464645, "fcm_dpo/margin": 47.416351318359375, "fcm_dpo/q_t": 0.3980448544025421, "grad_norm": 119.93087768554688, "learning_rate": 2.7791137836269158e-08, "logits/chosen": -0.8390222191810608, "logits/rejected": -0.837721049785614, "logps/chosen": -369.8499755859375, "logps/ref_chosen": -271.2745666503906, "logps/ref_rejected": -270.16912841796875, "logps/rejected": -416.160888671875, "loss": 4.4373, "margin_dpo/margin_mean": 47.416358947753906, "margin_dpo/margin_std": 78.89204406738281, "step": 413 }, { "epoch": 0.8670157068062827, "fcm_dpo/beta": 0.010600894689559937, "fcm_dpo/delta": -0.07364710420370102, "fcm_dpo/margin": 62.915504455566406, "fcm_dpo/q_t": 0.3617112338542938, "grad_norm": 98.02194213867188, "learning_rate": 2.6958198472749717e-08, "logits/chosen": -0.8604521751403809, "logits/rejected": -0.8675873279571533, "logps/chosen": -394.2980041503906, "logps/ref_chosen": -297.11505126953125, "logps/ref_rejected": -271.7034606933594, "logps/rejected": -431.80194091796875, "loss": 3.9988, "margin_dpo/margin_mean": 62.915504455566406, "margin_dpo/margin_std": 80.08522033691406, "step": 414 }, { "epoch": 0.8691099476439791, "fcm_dpo/beta": 0.010481567122042179, "fcm_dpo/delta": 0.027636148035526276, "fcm_dpo/margin": 54.60208511352539, "fcm_dpo/q_t": 0.3772929012775421, "grad_norm": 89.56175994873047, "learning_rate": 2.613722016414943e-08, "logits/chosen": -0.8698713779449463, "logits/rejected": -0.8565788865089417, "logps/chosen": -392.7457275390625, "logps/ref_chosen": -297.6926574707031, "logps/ref_rejected": -279.0503234863281, "logps/rejected": -428.70550537109375, "loss": 4.1341, "margin_dpo/margin_mean": 54.602081298828125, "margin_dpo/margin_std": 74.00060272216797, "step": 415 }, { "epoch": 0.8712041884816754, "fcm_dpo/beta": 0.010026373900473118, "fcm_dpo/delta": -0.057540446519851685, "fcm_dpo/margin": 64.75239562988281, "fcm_dpo/q_t": 0.36300843954086304, "grad_norm": 75.71894836425781, "learning_rate": 2.5328246937043525e-08, "logits/chosen": -0.8659712672233582, "logits/rejected": -0.8757675290107727, "logps/chosen": -400.69134521484375, "logps/ref_chosen": -311.8255615234375, "logps/ref_rejected": -268.6170654296875, "logps/rejected": -422.23529052734375, "loss": 4.0244, "margin_dpo/margin_mean": 64.75240325927734, "margin_dpo/margin_std": 82.02722930908203, "step": 416 }, { "epoch": 0.8732984293193717, "fcm_dpo/beta": 0.00980073120445013, "fcm_dpo/delta": -0.020607443526387215, "fcm_dpo/margin": 56.351253509521484, "fcm_dpo/q_t": 0.3844594359397888, "grad_norm": 97.4702377319336, "learning_rate": 2.4531322174210973e-08, "logits/chosen": -0.8039509057998657, "logits/rejected": -0.8083282113075256, "logps/chosen": -409.8139953613281, "logps/ref_chosen": -310.43682861328125, "logps/ref_rejected": -277.15283203125, "logps/rejected": -432.88128662109375, "loss": 4.3175, "margin_dpo/margin_mean": 56.351253509521484, "margin_dpo/margin_std": 84.7042007446289, "step": 417 }, { "epoch": 0.875392670157068, "fcm_dpo/beta": 0.00980357639491558, "fcm_dpo/delta": -0.041675545275211334, "fcm_dpo/margin": 54.67127990722656, "fcm_dpo/q_t": 0.3868068754673004, "grad_norm": 97.9288330078125, "learning_rate": 2.3746488612308295e-08, "logits/chosen": -0.8138055801391602, "logits/rejected": -0.7914860844612122, "logps/chosen": -385.43609619140625, "logps/ref_chosen": -278.49591064453125, "logps/ref_rejected": -276.56671142578125, "logps/rejected": -438.1781311035156, "loss": 4.3261, "margin_dpo/margin_mean": 54.67127990722656, "margin_dpo/margin_std": 80.07173156738281, "step": 418 }, { "epoch": 0.8774869109947644, "fcm_dpo/beta": 0.00954905990511179, "fcm_dpo/delta": 0.0025477148592472076, "fcm_dpo/margin": 62.34251403808594, "fcm_dpo/q_t": 0.3726246654987335, "grad_norm": 98.41114044189453, "learning_rate": 2.297378833957761e-08, "logits/chosen": -0.8610261082649231, "logits/rejected": -0.8407485485076904, "logps/chosen": -404.93939208984375, "logps/ref_chosen": -298.9002380371094, "logps/ref_rejected": -246.1540985107422, "logps/rejected": -414.53582763671875, "loss": 4.1706, "margin_dpo/margin_mean": 62.3425178527832, "margin_dpo/margin_std": 87.26535034179688, "step": 419 }, { "epoch": 0.8795811518324608, "fcm_dpo/beta": 0.009364170022308826, "fcm_dpo/delta": -0.031245797872543335, "fcm_dpo/margin": 67.03992462158203, "fcm_dpo/q_t": 0.36937007308006287, "grad_norm": 118.59954071044922, "learning_rate": 2.2213262793589482e-08, "logits/chosen": -0.8001975417137146, "logits/rejected": -0.7743805646896362, "logps/chosen": -367.395751953125, "logps/ref_chosen": -264.5608825683594, "logps/ref_rejected": -245.67031860351562, "logps/rejected": -415.54510498046875, "loss": 4.1234, "margin_dpo/margin_mean": 67.03992462158203, "margin_dpo/margin_std": 94.58979034423828, "step": 420 }, { "epoch": 0.881675392670157, "fcm_dpo/beta": 0.009468503296375275, "fcm_dpo/delta": 0.05897749215364456, "fcm_dpo/margin": 57.387535095214844, "fcm_dpo/q_t": 0.3812984824180603, "grad_norm": 96.44364166259766, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -0.8752861022949219, "logits/rejected": -0.8608182668685913, "logps/chosen": -391.05804443359375, "logps/ref_chosen": -297.70501708984375, "logps/ref_rejected": -243.74771118164062, "logps/rejected": -394.4882507324219, "loss": 4.1932, "margin_dpo/margin_mean": 57.387535095214844, "margin_dpo/margin_std": 78.87214660644531, "step": 421 }, { "epoch": 0.8837696335078534, "fcm_dpo/beta": 0.009868706576526165, "fcm_dpo/delta": 0.013630709610879421, "fcm_dpo/margin": 59.41127395629883, "fcm_dpo/q_t": 0.3788284361362457, "grad_norm": 75.39724731445312, "learning_rate": 2.07288983654679e-08, "logits/chosen": -0.7338589429855347, "logits/rejected": -0.784461498260498, "logps/chosen": -387.1398010253906, "logps/ref_chosen": -288.3587646484375, "logps/ref_rejected": -256.4377746582031, "logps/rejected": -414.63006591796875, "loss": 4.2569, "margin_dpo/margin_mean": 59.41127395629883, "margin_dpo/margin_std": 90.28678894042969, "step": 422 }, { "epoch": 0.8858638743455497, "fcm_dpo/beta": 0.009787296876311302, "fcm_dpo/delta": -0.014436397701501846, "fcm_dpo/margin": 62.54419708251953, "fcm_dpo/q_t": 0.37159958481788635, "grad_norm": 108.14152526855469, "learning_rate": 2.0005139085293942e-08, "logits/chosen": -0.8656849265098572, "logits/rejected": -0.8510351777076721, "logps/chosen": -397.02996826171875, "logps/ref_chosen": -296.00701904296875, "logps/ref_rejected": -261.3480529785156, "logps/rejected": -424.9151916503906, "loss": 4.0862, "margin_dpo/margin_mean": 62.54419708251953, "margin_dpo/margin_std": 84.30635833740234, "step": 423 }, { "epoch": 0.8879581151832461, "fcm_dpo/beta": 0.009680146351456642, "fcm_dpo/delta": -0.010340253822505474, "fcm_dpo/margin": 62.79508972167969, "fcm_dpo/q_t": 0.3680788278579712, "grad_norm": 95.83734893798828, "learning_rate": 1.9293713731512673e-08, "logits/chosen": -0.8333520889282227, "logits/rejected": -0.8356263637542725, "logps/chosen": -402.0572204589844, "logps/ref_chosen": -309.421875, "logps/ref_rejected": -249.14886474609375, "logps/rejected": -404.5793151855469, "loss": 3.988, "margin_dpo/margin_mean": 62.79508590698242, "margin_dpo/margin_std": 75.4756088256836, "step": 424 }, { "epoch": 0.8900523560209425, "fcm_dpo/beta": 0.009858609177172184, "fcm_dpo/delta": 0.03790643811225891, "fcm_dpo/margin": 50.37432098388672, "fcm_dpo/q_t": 0.3970460295677185, "grad_norm": 109.99349212646484, "learning_rate": 1.8594660455706763e-08, "logits/chosen": -0.8281177878379822, "logits/rejected": -0.8340511918067932, "logps/chosen": -381.4590759277344, "logps/ref_chosen": -280.50909423828125, "logps/ref_rejected": -276.8252258300781, "logps/rejected": -428.1495056152344, "loss": 4.5116, "margin_dpo/margin_mean": 50.37432098388672, "margin_dpo/margin_std": 86.79659271240234, "step": 425 }, { "epoch": 0.8921465968586387, "fcm_dpo/beta": 0.010049426928162575, "fcm_dpo/delta": 0.010002564638853073, "fcm_dpo/margin": 58.607635498046875, "fcm_dpo/q_t": 0.3756742477416992, "grad_norm": 103.70176696777344, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -0.8594489097595215, "logits/rejected": -0.8479264974594116, "logps/chosen": -395.7527160644531, "logps/ref_chosen": -292.78521728515625, "logps/ref_rejected": -255.62698364257812, "logps/rejected": -417.2021484375, "loss": 4.1411, "margin_dpo/margin_mean": 58.607635498046875, "margin_dpo/margin_std": 79.4923095703125, "step": 426 }, { "epoch": 0.8942408376963351, "fcm_dpo/beta": 0.00980192981660366, "fcm_dpo/delta": -0.15457119047641754, "fcm_dpo/margin": 76.36004638671875, "fcm_dpo/q_t": 0.34554582834243774, "grad_norm": 92.07188415527344, "learning_rate": 1.7233819424956247e-08, "logits/chosen": -0.836094319820404, "logits/rejected": -0.8061795234680176, "logps/chosen": -388.0554504394531, "logps/ref_chosen": -288.7687072753906, "logps/ref_rejected": -268.4986572265625, "logps/rejected": -444.14544677734375, "loss": 3.8482, "margin_dpo/margin_mean": 76.36004638671875, "margin_dpo/margin_std": 89.82933044433594, "step": 427 }, { "epoch": 0.8963350785340314, "fcm_dpo/beta": 0.008906656876206398, "fcm_dpo/delta": -0.015173885971307755, "fcm_dpo/margin": 68.88334655761719, "fcm_dpo/q_t": 0.3676660656929016, "grad_norm": 82.5910873413086, "learning_rate": 1.6572104647786245e-08, "logits/chosen": -0.7911025285720825, "logits/rejected": -0.818476140499115, "logps/chosen": -406.2961120605469, "logps/ref_chosen": -295.5209655761719, "logps/ref_rejected": -275.71026611328125, "logps/rejected": -455.3687744140625, "loss": 4.0516, "margin_dpo/margin_mean": 68.88334655761719, "margin_dpo/margin_std": 89.82479858398438, "step": 428 }, { "epoch": 0.8984293193717278, "fcm_dpo/beta": 0.008761554956436157, "fcm_dpo/delta": -0.016175897791981697, "fcm_dpo/margin": 61.458229064941406, "fcm_dpo/q_t": 0.37919533252716064, "grad_norm": 150.3956298828125, "learning_rate": 1.5922907900227017e-08, "logits/chosen": -0.7926703095436096, "logits/rejected": -0.8025503158569336, "logps/chosen": -376.794189453125, "logps/ref_chosen": -274.392333984375, "logps/ref_rejected": -258.574462890625, "logps/rejected": -422.4345703125, "loss": 4.3478, "margin_dpo/margin_mean": 61.458229064941406, "margin_dpo/margin_std": 93.87308502197266, "step": 429 }, { "epoch": 0.900523560209424, "fcm_dpo/beta": 0.008986860513687134, "fcm_dpo/delta": 0.03410874679684639, "fcm_dpo/margin": 51.494415283203125, "fcm_dpo/q_t": 0.4010925590991974, "grad_norm": 82.05398559570312, "learning_rate": 1.5286263996730026e-08, "logits/chosen": -0.8744654655456543, "logits/rejected": -0.8475915789604187, "logps/chosen": -388.521484375, "logps/ref_chosen": -288.7391357421875, "logps/ref_rejected": -268.6106262207031, "logps/rejected": -419.8874206542969, "loss": 4.4594, "margin_dpo/margin_mean": 51.49441146850586, "margin_dpo/margin_std": 83.37062072753906, "step": 430 }, { "epoch": 0.9026178010471204, "fcm_dpo/beta": 0.009652719832956791, "fcm_dpo/delta": 0.11538906395435333, "fcm_dpo/margin": 44.83124923706055, "fcm_dpo/q_t": 0.4090067744255066, "grad_norm": 104.76152038574219, "learning_rate": 1.4662207078575684e-08, "logits/chosen": -0.8430911898612976, "logits/rejected": -0.8098528385162354, "logps/chosen": -377.4594421386719, "logps/ref_chosen": -275.7247314453125, "logps/ref_rejected": -268.91729736328125, "logps/rejected": -415.48321533203125, "loss": 4.5624, "margin_dpo/margin_mean": 44.83124923706055, "margin_dpo/margin_std": 78.89287567138672, "step": 431 }, { "epoch": 0.9047120418848168, "fcm_dpo/beta": 0.009524986147880554, "fcm_dpo/delta": -0.0009523874614387751, "fcm_dpo/margin": 62.96201705932617, "fcm_dpo/q_t": 0.37596815824508667, "grad_norm": 79.11823272705078, "learning_rate": 1.40507706120426e-08, "logits/chosen": -0.8689834475517273, "logits/rejected": -0.8573225140571594, "logps/chosen": -385.52545166015625, "logps/ref_chosen": -291.42010498046875, "logps/ref_rejected": -255.48202514648438, "logps/rejected": -412.5494079589844, "loss": 4.125, "margin_dpo/margin_mean": 62.9620246887207, "margin_dpo/margin_std": 87.03445434570312, "step": 432 }, { "epoch": 0.9068062827225131, "fcm_dpo/beta": 0.009905043989419937, "fcm_dpo/delta": 0.06354302912950516, "fcm_dpo/margin": 54.46798324584961, "fcm_dpo/q_t": 0.38325023651123047, "grad_norm": 89.49055480957031, "learning_rate": 1.345198738661285e-08, "logits/chosen": -0.8393370509147644, "logits/rejected": -0.8366720676422119, "logps/chosen": -353.0195007324219, "logps/ref_chosen": -246.2268829345703, "logps/ref_rejected": -253.65924072265625, "logps/rejected": -414.9197692871094, "loss": 4.2557, "margin_dpo/margin_mean": 54.46798324584961, "margin_dpo/margin_std": 79.35485076904297, "step": 433 }, { "epoch": 0.9089005235602095, "fcm_dpo/beta": 0.01033061183989048, "fcm_dpo/delta": -0.006721100769937038, "fcm_dpo/margin": 53.89496612548828, "fcm_dpo/q_t": 0.38306722044944763, "grad_norm": 82.5522232055664, "learning_rate": 1.2865889513213628e-08, "logits/chosen": -0.8196003437042236, "logits/rejected": -0.8318718671798706, "logps/chosen": -405.0242614746094, "logps/ref_chosen": -295.4618225097656, "logps/ref_rejected": -256.2254333496094, "logps/rejected": -419.68292236328125, "loss": 4.2651, "margin_dpo/margin_mean": 53.89497375488281, "margin_dpo/margin_std": 80.93537902832031, "step": 434 }, { "epoch": 0.9109947643979057, "fcm_dpo/beta": 0.010011866688728333, "fcm_dpo/delta": 0.002371033653616905, "fcm_dpo/margin": 59.46184539794922, "fcm_dpo/q_t": 0.3749125599861145, "grad_norm": 113.84324645996094, "learning_rate": 1.2292508422495157e-08, "logits/chosen": -0.8343696594238281, "logits/rejected": -0.8213623762130737, "logps/chosen": -360.0166015625, "logps/ref_chosen": -260.7384033203125, "logps/ref_rejected": -248.5688018798828, "logps/rejected": -407.3088073730469, "loss": 4.0724, "margin_dpo/margin_mean": 59.46183776855469, "margin_dpo/margin_std": 76.63561248779297, "step": 435 }, { "epoch": 0.9130890052356021, "fcm_dpo/beta": 0.010455166921019554, "fcm_dpo/delta": 0.06821566820144653, "fcm_dpo/margin": 51.208003997802734, "fcm_dpo/q_t": 0.3907574415206909, "grad_norm": 108.74581909179688, "learning_rate": 1.1731874863145142e-08, "logits/chosen": -0.8110353350639343, "logits/rejected": -0.8122683763504028, "logps/chosen": -424.99334716796875, "logps/ref_chosen": -319.3224792480469, "logps/ref_rejected": -299.30322265625, "logps/rejected": -456.1820983886719, "loss": 4.3717, "margin_dpo/margin_mean": 51.208003997802734, "margin_dpo/margin_std": 84.06385040283203, "step": 436 }, { "epoch": 0.9151832460732985, "fcm_dpo/beta": 0.010105324909090996, "fcm_dpo/delta": -0.14192956686019897, "fcm_dpo/margin": 68.01710510253906, "fcm_dpo/q_t": 0.3576942980289459, "grad_norm": 97.00816345214844, "learning_rate": 1.118401890024001e-08, "logits/chosen": -0.845094621181488, "logits/rejected": -0.8325349688529968, "logps/chosen": -376.66925048828125, "logps/ref_chosen": -278.82879638671875, "logps/ref_rejected": -272.55303955078125, "logps/rejected": -438.4106140136719, "loss": 3.9596, "margin_dpo/margin_mean": 68.01710510253906, "margin_dpo/margin_std": 85.8590087890625, "step": 437 }, { "epoch": 0.9172774869109948, "fcm_dpo/beta": 0.009664845652878284, "fcm_dpo/delta": 0.030612653121352196, "fcm_dpo/margin": 35.6963996887207, "fcm_dpo/q_t": 0.42677760124206543, "grad_norm": 104.95515441894531, "learning_rate": 1.06489699136324e-08, "logits/chosen": -0.8136807680130005, "logits/rejected": -0.8375378847122192, "logps/chosen": -362.48974609375, "logps/ref_chosen": -259.31903076171875, "logps/ref_rejected": -240.99581909179688, "logps/rejected": -379.8629150390625, "loss": 4.9299, "margin_dpo/margin_mean": 35.69639587402344, "margin_dpo/margin_std": 83.86488342285156, "step": 438 }, { "epoch": 0.9193717277486911, "fcm_dpo/beta": 0.009936582297086716, "fcm_dpo/delta": 0.03292373940348625, "fcm_dpo/margin": 57.200782775878906, "fcm_dpo/q_t": 0.38112884759902954, "grad_norm": 110.18673706054688, "learning_rate": 1.0126756596375685e-08, "logits/chosen": -0.8162304162979126, "logits/rejected": -0.8262991905212402, "logps/chosen": -361.297607421875, "logps/ref_chosen": -257.1243896484375, "logps/ref_rejected": -243.20416259765625, "logps/rejected": -404.5781555175781, "loss": 4.2137, "margin_dpo/margin_mean": 57.200782775878906, "margin_dpo/margin_std": 83.29421997070312, "step": 439 }, { "epoch": 0.9214659685863874, "fcm_dpo/beta": 0.010659238323569298, "fcm_dpo/delta": 0.0877818912267685, "fcm_dpo/margin": 43.59062957763672, "fcm_dpo/q_t": 0.39968663454055786, "grad_norm": 104.97032165527344, "learning_rate": 9.617406953185136e-09, "logits/chosen": -0.8673726320266724, "logits/rejected": -0.8623473048210144, "logps/chosen": -421.5701599121094, "logps/ref_chosen": -307.5315246582031, "logps/ref_rejected": -264.3540954589844, "logps/rejected": -421.98333740234375, "loss": 4.5375, "margin_dpo/margin_mean": 43.59062576293945, "margin_dpo/margin_std": 75.73169708251953, "step": 440 }, { "epoch": 0.9235602094240838, "fcm_dpo/beta": 0.010612818412482738, "fcm_dpo/delta": -0.07970403879880905, "fcm_dpo/margin": 63.51165008544922, "fcm_dpo/q_t": 0.3585563600063324, "grad_norm": 95.90740203857422, "learning_rate": 9.12094829893642e-09, "logits/chosen": -0.8216838240623474, "logits/rejected": -0.8057107329368591, "logps/chosen": -411.36907958984375, "logps/ref_chosen": -309.9819641113281, "logps/ref_rejected": -297.4968566894531, "logps/rejected": -462.3956298828125, "loss": 3.938, "margin_dpo/margin_mean": 63.511653900146484, "margin_dpo/margin_std": 77.47901153564453, "step": 441 }, { "epoch": 0.9256544502617801, "fcm_dpo/beta": 0.010241352021694183, "fcm_dpo/delta": 0.07879231870174408, "fcm_dpo/margin": 51.40802001953125, "fcm_dpo/q_t": 0.3907470703125, "grad_norm": 98.82845306396484, "learning_rate": 8.637407257200496e-09, "logits/chosen": -0.8957461714744568, "logits/rejected": -0.8518679141998291, "logps/chosen": -387.4726867675781, "logps/ref_chosen": -278.9791564941406, "logps/ref_rejected": -242.87310791015625, "logps/rejected": -402.7746887207031, "loss": 4.4572, "margin_dpo/margin_mean": 51.40802001953125, "margin_dpo/margin_std": 85.5845947265625, "step": 442 }, { "epoch": 0.9277486910994764, "fcm_dpo/beta": 0.010851511731743813, "fcm_dpo/delta": -0.02806878834962845, "fcm_dpo/margin": 57.299949645996094, "fcm_dpo/q_t": 0.36912405490875244, "grad_norm": 98.69976806640625, "learning_rate": 8.166809758815895e-09, "logits/chosen": -0.7989782094955444, "logits/rejected": -0.8233458995819092, "logps/chosen": -373.9927978515625, "logps/ref_chosen": -273.5590515136719, "logps/ref_rejected": -264.0199279785156, "logps/rejected": -421.7536315917969, "loss": 4.1327, "margin_dpo/margin_mean": 57.299949645996094, "margin_dpo/margin_std": 77.64727020263672, "step": 443 }, { "epoch": 0.9298429319371728, "fcm_dpo/beta": 0.010247818194329739, "fcm_dpo/delta": -0.036531638354063034, "fcm_dpo/margin": 61.67512512207031, "fcm_dpo/q_t": 0.37224987149238586, "grad_norm": 99.69876861572266, "learning_rate": 7.709181040498253e-09, "logits/chosen": -0.8095158934593201, "logits/rejected": -0.7996165752410889, "logps/chosen": -398.9835205078125, "logps/ref_chosen": -298.1441955566406, "logps/ref_rejected": -268.0572814941406, "logps/rejected": -430.57171630859375, "loss": 4.1991, "margin_dpo/margin_mean": 61.67512893676758, "margin_dpo/margin_std": 92.46453857421875, "step": 444 }, { "epoch": 0.9319371727748691, "fcm_dpo/beta": 0.009988191537559032, "fcm_dpo/delta": -0.08285348117351532, "fcm_dpo/margin": 50.43273162841797, "fcm_dpo/q_t": 0.39380931854248047, "grad_norm": 90.62871551513672, "learning_rate": 7.2645456434869965e-09, "logits/chosen": -0.8713312745094299, "logits/rejected": -0.8869834542274475, "logps/chosen": -357.09771728515625, "logps/ref_chosen": -254.54067993164062, "logps/ref_rejected": -264.2445983886719, "logps/rejected": -417.234375, "loss": 4.4441, "margin_dpo/margin_mean": 50.4327278137207, "margin_dpo/margin_std": 77.92971801757812, "step": 445 }, { "epoch": 0.9340314136125655, "fcm_dpo/beta": 0.009595160372555256, "fcm_dpo/delta": 0.03080780804157257, "fcm_dpo/margin": 59.38265609741211, "fcm_dpo/q_t": 0.3774047791957855, "grad_norm": 90.70514678955078, "learning_rate": 6.832927412229017e-09, "logits/chosen": -0.8085803985595703, "logits/rejected": -0.8101305961608887, "logps/chosen": -404.2763366699219, "logps/ref_chosen": -306.72247314453125, "logps/ref_rejected": -266.3735656738281, "logps/rejected": -423.31005859375, "loss": 4.1929, "margin_dpo/margin_mean": 59.382652282714844, "margin_dpo/margin_std": 82.84217071533203, "step": 446 }, { "epoch": 0.9361256544502617, "fcm_dpo/beta": 0.00941769964993, "fcm_dpo/delta": -0.0662500411272049, "fcm_dpo/margin": 64.87944030761719, "fcm_dpo/q_t": 0.36734655499458313, "grad_norm": 78.74610900878906, "learning_rate": 6.414349493100129e-09, "logits/chosen": -0.8000814318656921, "logits/rejected": -0.8020035624504089, "logps/chosen": -357.4937744140625, "logps/ref_chosen": -260.51727294921875, "logps/ref_rejected": -236.47061157226562, "logps/rejected": -398.3265075683594, "loss": 3.9644, "margin_dpo/margin_mean": 64.87944030761719, "margin_dpo/margin_std": 77.26435089111328, "step": 447 }, { "epoch": 0.9382198952879581, "fcm_dpo/beta": 0.00942399725317955, "fcm_dpo/delta": 0.04195284843444824, "fcm_dpo/margin": 59.36241912841797, "fcm_dpo/q_t": 0.3821418881416321, "grad_norm": 100.28685760498047, "learning_rate": 6.0088343331638756e-09, "logits/chosen": -0.8049849271774292, "logits/rejected": -0.8009424805641174, "logps/chosen": -371.8785705566406, "logps/ref_chosen": -268.78704833984375, "logps/ref_rejected": -262.1703796386719, "logps/rejected": -424.6243591308594, "loss": 4.1666, "margin_dpo/margin_mean": 59.36241912841797, "margin_dpo/margin_std": 80.97298431396484, "step": 448 }, { "epoch": 0.9403141361256544, "fcm_dpo/beta": 0.00966222770512104, "fcm_dpo/delta": -0.02228935807943344, "fcm_dpo/margin": 64.16152954101562, "fcm_dpo/q_t": 0.36533617973327637, "grad_norm": 127.85818481445312, "learning_rate": 5.616403678967624e-09, "logits/chosen": -0.8874871730804443, "logits/rejected": -0.8732025623321533, "logps/chosen": -421.92047119140625, "logps/ref_chosen": -330.9514465332031, "logps/ref_rejected": -239.76974487304688, "logps/rejected": -394.9002685546875, "loss": 4.0159, "margin_dpo/margin_mean": 64.16152954101562, "margin_dpo/margin_std": 80.18797302246094, "step": 449 }, { "epoch": 0.9424083769633508, "fcm_dpo/beta": 0.00963627640157938, "fcm_dpo/delta": 0.05303880572319031, "fcm_dpo/margin": 51.81724548339844, "fcm_dpo/q_t": 0.39200538396835327, "grad_norm": 105.71543884277344, "learning_rate": 5.2370785753763356e-09, "logits/chosen": -0.7862426042556763, "logits/rejected": -0.7958937883377075, "logps/chosen": -395.0786437988281, "logps/ref_chosen": -284.26544189453125, "logps/ref_rejected": -250.5401611328125, "logps/rejected": -413.170654296875, "loss": 4.2778, "margin_dpo/margin_mean": 51.81724548339844, "margin_dpo/margin_std": 73.03158569335938, "step": 450 }, { "epoch": 0.9445026178010472, "fcm_dpo/beta": 0.009586581960320473, "fcm_dpo/delta": -0.022069107741117477, "fcm_dpo/margin": 52.51289367675781, "fcm_dpo/q_t": 0.39439404010772705, "grad_norm": 103.11542510986328, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -0.810042679309845, "logits/rejected": -0.7838971018791199, "logps/chosen": -413.1301574707031, "logps/ref_chosen": -302.3209228515625, "logps/ref_rejected": -254.09747314453125, "logps/rejected": -417.41961669921875, "loss": 4.422, "margin_dpo/margin_mean": 52.51289749145508, "margin_dpo/margin_std": 82.73294830322266, "step": 451 }, { "epoch": 0.9465968586387434, "fcm_dpo/beta": 0.009494351223111153, "fcm_dpo/delta": 0.006426731124520302, "fcm_dpo/margin": 57.46319580078125, "fcm_dpo/q_t": 0.38508880138397217, "grad_norm": 94.53024291992188, "learning_rate": 4.517825684323323e-09, "logits/chosen": -0.8596530556678772, "logits/rejected": -0.8371134996414185, "logps/chosen": -398.1798400878906, "logps/ref_chosen": -299.39215087890625, "logps/ref_rejected": -284.3475036621094, "logps/rejected": -440.59844970703125, "loss": 4.2508, "margin_dpo/margin_mean": 57.46318817138672, "margin_dpo/margin_std": 82.77123260498047, "step": 452 }, { "epoch": 0.9486910994764398, "fcm_dpo/beta": 0.009665731340646744, "fcm_dpo/delta": -0.016047965735197067, "fcm_dpo/margin": 63.59440231323242, "fcm_dpo/q_t": 0.3707888722419739, "grad_norm": 89.89924621582031, "learning_rate": 4.1779364682113794e-09, "logits/chosen": -0.7991673350334167, "logits/rejected": -0.7965455651283264, "logps/chosen": -429.2721862792969, "logps/ref_chosen": -324.6517028808594, "logps/ref_rejected": -304.1527099609375, "logps/rejected": -472.3676452636719, "loss": 4.0389, "margin_dpo/margin_mean": 63.59440231323242, "margin_dpo/margin_std": 84.75194549560547, "step": 453 }, { "epoch": 0.9507853403141361, "fcm_dpo/beta": 0.009600440971553326, "fcm_dpo/delta": -0.007240898907184601, "fcm_dpo/margin": 62.99691390991211, "fcm_dpo/q_t": 0.3711826205253601, "grad_norm": 75.93077087402344, "learning_rate": 3.851229943335393e-09, "logits/chosen": -0.86097651720047, "logits/rejected": -0.8730704188346863, "logps/chosen": -399.46826171875, "logps/ref_chosen": -299.6117248535156, "logps/ref_rejected": -303.74224853515625, "logps/rejected": -466.59564208984375, "loss": 4.1159, "margin_dpo/margin_mean": 62.99691390991211, "margin_dpo/margin_std": 85.24671936035156, "step": 454 }, { "epoch": 0.9528795811518325, "fcm_dpo/beta": 0.010221119970083237, "fcm_dpo/delta": 0.1354561150074005, "fcm_dpo/margin": 45.94690704345703, "fcm_dpo/q_t": 0.4033811688423157, "grad_norm": 97.06004333496094, "learning_rate": 3.5377236299748147e-09, "logits/chosen": -0.8142644166946411, "logits/rejected": -0.826336145401001, "logps/chosen": -372.8396301269531, "logps/ref_chosen": -273.6116943359375, "logps/ref_rejected": -274.4293518066406, "logps/rejected": -419.604248046875, "loss": 4.5863, "margin_dpo/margin_mean": 45.94690704345703, "margin_dpo/margin_std": 85.6665267944336, "step": 455 }, { "epoch": 0.9549738219895288, "fcm_dpo/beta": 0.010199323296546936, "fcm_dpo/delta": -0.09923385083675385, "fcm_dpo/margin": 63.42009735107422, "fcm_dpo/q_t": 0.377056747674942, "grad_norm": 94.97087097167969, "learning_rate": 3.2374343405217884e-09, "logits/chosen": -0.7348066568374634, "logits/rejected": -0.7477578520774841, "logps/chosen": -438.2121276855469, "logps/ref_chosen": -322.17193603515625, "logps/ref_rejected": -294.54461669921875, "logps/rejected": -474.0048522949219, "loss": 4.3425, "margin_dpo/margin_mean": 63.42009353637695, "margin_dpo/margin_std": 105.82379913330078, "step": 456 }, { "epoch": 0.9570680628272251, "fcm_dpo/beta": 0.009744374081492424, "fcm_dpo/delta": -0.009416388347744942, "fcm_dpo/margin": 62.40718460083008, "fcm_dpo/q_t": 0.36779892444610596, "grad_norm": 84.67405700683594, "learning_rate": 2.9503781785795713e-09, "logits/chosen": -0.7926053404808044, "logits/rejected": -0.7992970943450928, "logps/chosen": -416.7201843261719, "logps/ref_chosen": -307.7962341308594, "logps/ref_rejected": -274.5501403808594, "logps/rejected": -445.88128662109375, "loss": 4.173, "margin_dpo/margin_mean": 62.40718460083008, "margin_dpo/margin_std": 89.37660217285156, "step": 457 }, { "epoch": 0.9591623036649215, "fcm_dpo/beta": 0.010077232494950294, "fcm_dpo/delta": 0.03728824108839035, "fcm_dpo/margin": 55.845306396484375, "fcm_dpo/q_t": 0.3842548131942749, "grad_norm": 84.43614959716797, "learning_rate": 2.6765705380989432e-09, "logits/chosen": -0.8247987627983093, "logits/rejected": -0.8124662041664124, "logps/chosen": -402.5335388183594, "logps/ref_chosen": -297.0316467285156, "logps/ref_rejected": -276.1112365722656, "logps/rejected": -437.45843505859375, "loss": 4.3221, "margin_dpo/margin_mean": 55.845306396484375, "margin_dpo/margin_std": 86.97913360595703, "step": 458 }, { "epoch": 0.9612565445026178, "fcm_dpo/beta": 0.010236883535981178, "fcm_dpo/delta": 0.021781034767627716, "fcm_dpo/margin": 51.37626647949219, "fcm_dpo/q_t": 0.38991579413414, "grad_norm": 117.91474151611328, "learning_rate": 2.416026102552732e-09, "logits/chosen": -0.8719888925552368, "logits/rejected": -0.8656003475189209, "logps/chosen": -394.6509704589844, "logps/ref_chosen": -293.5252990722656, "logps/ref_rejected": -289.30126953125, "logps/rejected": -441.8031921386719, "loss": 4.367, "margin_dpo/margin_mean": 51.37627410888672, "margin_dpo/margin_std": 80.18513488769531, "step": 459 }, { "epoch": 0.9633507853403142, "fcm_dpo/beta": 0.010486846789717674, "fcm_dpo/delta": 0.013618772849440575, "fcm_dpo/margin": 50.83551788330078, "fcm_dpo/q_t": 0.385250449180603, "grad_norm": 104.75066375732422, "learning_rate": 2.168758844148272e-09, "logits/chosen": -0.8460461497306824, "logits/rejected": -0.8530416488647461, "logps/chosen": -421.2718200683594, "logps/ref_chosen": -318.7803649902344, "logps/ref_rejected": -258.7906799316406, "logps/rejected": -412.1176452636719, "loss": 4.3321, "margin_dpo/margin_mean": 50.83551788330078, "margin_dpo/margin_std": 77.74996948242188, "step": 460 }, { "epoch": 0.9654450261780104, "fcm_dpo/beta": 0.010277766734361649, "fcm_dpo/delta": -0.023162774741649628, "fcm_dpo/margin": 55.653541564941406, "fcm_dpo/q_t": 0.3829959034919739, "grad_norm": 109.19080352783203, "learning_rate": 1.9347820230782295e-09, "logits/chosen": -0.8199470043182373, "logits/rejected": -0.8473576903343201, "logps/chosen": -346.2790222167969, "logps/ref_chosen": -243.9099884033203, "logps/ref_rejected": -232.6382293701172, "logps/rejected": -390.6607666015625, "loss": 4.3594, "margin_dpo/margin_mean": 55.653533935546875, "margin_dpo/margin_std": 89.18666076660156, "step": 461 }, { "epoch": 0.9675392670157068, "fcm_dpo/beta": 0.009733910672366619, "fcm_dpo/delta": -0.06866450607776642, "fcm_dpo/margin": 67.92211151123047, "fcm_dpo/q_t": 0.36428695917129517, "grad_norm": 90.08053588867188, "learning_rate": 1.7141081868094209e-09, "logits/chosen": -0.8322769403457642, "logits/rejected": -0.7903834581375122, "logps/chosen": -447.9652404785156, "logps/ref_chosen": -344.09100341796875, "logps/ref_rejected": -252.45037841796875, "logps/rejected": -424.2467956542969, "loss": 4.0804, "margin_dpo/margin_mean": 67.92211151123047, "margin_dpo/margin_std": 93.55986785888672, "step": 462 }, { "epoch": 0.9696335078534032, "fcm_dpo/beta": 0.01002544816583395, "fcm_dpo/delta": 0.07595731317996979, "fcm_dpo/margin": 52.43413162231445, "fcm_dpo/q_t": 0.3893330693244934, "grad_norm": 105.60916900634766, "learning_rate": 1.5067491694100153e-09, "logits/chosen": -0.8533962368965149, "logits/rejected": -0.8175274133682251, "logps/chosen": -396.2633056640625, "logps/ref_chosen": -297.1424560546875, "logps/ref_rejected": -234.0208282470703, "logps/rejected": -385.5758361816406, "loss": 4.3877, "margin_dpo/margin_mean": 52.43413162231445, "margin_dpo/margin_std": 84.19559478759766, "step": 463 }, { "epoch": 0.9717277486910995, "fcm_dpo/beta": 0.010431027971208096, "fcm_dpo/delta": 0.05969306826591492, "fcm_dpo/margin": 52.017539978027344, "fcm_dpo/q_t": 0.38790467381477356, "grad_norm": 135.7662353515625, "learning_rate": 1.3127160909147672e-09, "logits/chosen": -0.8195664286613464, "logits/rejected": -0.8451250791549683, "logps/chosen": -378.0357971191406, "logps/ref_chosen": -265.71075439453125, "logps/ref_rejected": -256.4108581542969, "logps/rejected": -420.75341796875, "loss": 4.4027, "margin_dpo/margin_mean": 52.017539978027344, "margin_dpo/margin_std": 85.27294921875, "step": 464 }, { "epoch": 0.9738219895287958, "fcm_dpo/beta": 0.00996001623570919, "fcm_dpo/delta": -0.14235681295394897, "fcm_dpo/margin": 65.76058197021484, "fcm_dpo/q_t": 0.36305713653564453, "grad_norm": 67.80677032470703, "learning_rate": 1.1320193567288527e-09, "logits/chosen": -0.8823571801185608, "logits/rejected": -0.8570696115493774, "logps/chosen": -390.29156494140625, "logps/ref_chosen": -293.1527404785156, "logps/ref_rejected": -293.70947265625, "logps/rejected": -456.60888671875, "loss": 4.0328, "margin_dpo/margin_mean": 65.76058197021484, "margin_dpo/margin_std": 82.558837890625, "step": 465 }, { "epoch": 0.9759162303664921, "fcm_dpo/beta": 0.00922522135078907, "fcm_dpo/delta": -0.055178042501211166, "fcm_dpo/margin": 70.33241271972656, "fcm_dpo/q_t": 0.36043423414230347, "grad_norm": 80.35893249511719, "learning_rate": 9.64668657069706e-10, "logits/chosen": -0.8069887757301331, "logits/rejected": -0.7611278295516968, "logps/chosen": -353.83447265625, "logps/ref_chosen": -261.4775695800781, "logps/ref_rejected": -248.36282348632812, "logps/rejected": -411.0520935058594, "loss": 3.8624, "margin_dpo/margin_mean": 70.33241271972656, "margin_dpo/margin_std": 77.98775482177734, "step": 466 }, { "epoch": 0.9780104712041885, "fcm_dpo/beta": 0.009081280790269375, "fcm_dpo/delta": 0.013140158727765083, "fcm_dpo/margin": 50.5679817199707, "fcm_dpo/q_t": 0.40236690640449524, "grad_norm": 95.41093444824219, "learning_rate": 8.106729664475176e-10, "logits/chosen": -0.7933071851730347, "logits/rejected": -0.7896216511726379, "logps/chosen": -371.2308044433594, "logps/ref_chosen": -266.354248046875, "logps/ref_rejected": -277.76324462890625, "logps/rejected": -433.20782470703125, "loss": 4.5699, "margin_dpo/margin_mean": 50.5679817199707, "margin_dpo/margin_std": 88.48072052001953, "step": 467 }, { "epoch": 0.9801047120418848, "fcm_dpo/beta": 0.009550162591040134, "fcm_dpo/delta": 0.045823030173778534, "fcm_dpo/margin": 52.80379104614258, "fcm_dpo/q_t": 0.3920201063156128, "grad_norm": 90.87904357910156, "learning_rate": 6.700405431837585e-10, "logits/chosen": -0.8760251998901367, "logits/rejected": -0.8512067198753357, "logps/chosen": -418.8556213378906, "logps/ref_chosen": -317.9631652832031, "logps/ref_rejected": -261.8744201660156, "logps/rejected": -415.5706787109375, "loss": 4.3766, "margin_dpo/margin_mean": 52.80379104614258, "margin_dpo/margin_std": 82.05970001220703, "step": 468 }, { "epoch": 0.9821989528795811, "fcm_dpo/beta": 0.009466158226132393, "fcm_dpo/delta": -0.01515500620007515, "fcm_dpo/margin": 64.6595458984375, "fcm_dpo/q_t": 0.370385080575943, "grad_norm": 82.3922348022461, "learning_rate": 5.427789289685347e-10, "logits/chosen": -0.8086352348327637, "logits/rejected": -0.7982282042503357, "logps/chosen": -420.8330993652344, "logps/ref_chosen": -324.8868103027344, "logps/ref_rejected": -264.0421447753906, "logps/rejected": -424.64794921875, "loss": 4.1333, "margin_dpo/margin_mean": 64.6595458984375, "margin_dpo/margin_std": 88.66175842285156, "step": 469 }, { "epoch": 0.9842931937172775, "fcm_dpo/beta": 0.009759598411619663, "fcm_dpo/delta": -0.009445477277040482, "fcm_dpo/margin": 62.19755935668945, "fcm_dpo/q_t": 0.3727426528930664, "grad_norm": 74.95175170898438, "learning_rate": 4.288949484559934e-10, "logits/chosen": -0.8073372840881348, "logits/rejected": -0.8078172206878662, "logps/chosen": -407.6559753417969, "logps/ref_chosen": -314.7042541503906, "logps/ref_rejected": -259.2276611328125, "logps/rejected": -414.376953125, "loss": 4.1115, "margin_dpo/margin_mean": 62.19756317138672, "margin_dpo/margin_std": 83.67054748535156, "step": 470 }, { "epoch": 0.9863874345549738, "fcm_dpo/beta": 0.009834382683038712, "fcm_dpo/delta": 0.014730914495885372, "fcm_dpo/margin": 54.49878692626953, "fcm_dpo/q_t": 0.39111655950546265, "grad_norm": 105.49163055419922, "learning_rate": 3.2839470889836627e-10, "logits/chosen": -0.8555752635002136, "logits/rejected": -0.8461736440658569, "logps/chosen": -399.32470703125, "logps/ref_chosen": -292.5748291015625, "logps/ref_rejected": -298.7525329589844, "logps/rejected": -460.0011901855469, "loss": 4.3592, "margin_dpo/margin_mean": 54.49878692626953, "margin_dpo/margin_std": 87.04114532470703, "step": 471 }, { "epoch": 0.9884816753926702, "fcm_dpo/beta": 0.00952895823866129, "fcm_dpo/delta": -0.003236265853047371, "fcm_dpo/margin": 63.19277572631836, "fcm_dpo/q_t": 0.37333759665489197, "grad_norm": 75.30541229248047, "learning_rate": 2.412835998185092e-10, "logits/chosen": -0.8509103655815125, "logits/rejected": -0.8665403127670288, "logps/chosen": -336.0157470703125, "logps/ref_chosen": -243.37380981445312, "logps/ref_rejected": -251.12109375, "logps/rejected": -406.9557800292969, "loss": 4.0229, "margin_dpo/margin_mean": 63.192779541015625, "margin_dpo/margin_std": 79.94721984863281, "step": 472 }, { "epoch": 0.9905759162303664, "fcm_dpo/beta": 0.009621812961995602, "fcm_dpo/delta": -0.031623952090740204, "fcm_dpo/margin": 65.44559478759766, "fcm_dpo/q_t": 0.365360826253891, "grad_norm": 80.94692993164062, "learning_rate": 1.6756629272085544e-10, "logits/chosen": -0.809826672077179, "logits/rejected": -0.816728413105011, "logps/chosen": -384.465087890625, "logps/ref_chosen": -286.3286437988281, "logps/ref_rejected": -258.6535339355469, "logps/rejected": -422.235595703125, "loss": 3.9983, "margin_dpo/margin_mean": 65.44559478759766, "margin_dpo/margin_std": 81.92974090576172, "step": 473 }, { "epoch": 0.9926701570680628, "fcm_dpo/beta": 0.009346621111035347, "fcm_dpo/delta": 0.05879068374633789, "fcm_dpo/margin": 51.022865295410156, "fcm_dpo/q_t": 0.39581844210624695, "grad_norm": 107.87037658691406, "learning_rate": 1.072467408408384e-10, "logits/chosen": -0.8381420373916626, "logits/rejected": -0.8414347171783447, "logps/chosen": -392.3779296875, "logps/ref_chosen": -288.08966064453125, "logps/ref_rejected": -266.69696044921875, "logps/rejected": -422.00811767578125, "loss": 4.3549, "margin_dpo/margin_mean": 51.022865295410156, "margin_dpo/margin_std": 71.58537292480469, "step": 474 }, { "epoch": 0.9947643979057592, "fcm_dpo/beta": 0.009538455866277218, "fcm_dpo/delta": -0.0211745984852314, "fcm_dpo/margin": 53.395355224609375, "fcm_dpo/q_t": 0.3920353651046753, "grad_norm": 98.68036651611328, "learning_rate": 6.032817893297793e-11, "logits/chosen": -0.8091562390327454, "logits/rejected": -0.8317868113517761, "logps/chosen": -350.73065185546875, "logps/ref_chosen": -256.0030517578125, "logps/ref_rejected": -244.50660705566406, "logps/rejected": -392.6295471191406, "loss": 4.3115, "margin_dpo/margin_mean": 53.395362854003906, "margin_dpo/margin_std": 77.75424194335938, "step": 475 }, { "epoch": 0.9968586387434555, "fcm_dpo/beta": 0.009954025037586689, "fcm_dpo/delta": 0.031187057495117188, "fcm_dpo/margin": 56.84566116333008, "fcm_dpo/q_t": 0.3844139575958252, "grad_norm": 119.93623352050781, "learning_rate": 2.6813123097352287e-11, "logits/chosen": -0.8869379162788391, "logits/rejected": -0.8474351167678833, "logps/chosen": -413.58526611328125, "logps/ref_chosen": -321.467529296875, "logps/ref_rejected": -295.0592956542969, "logps/rejected": -444.022705078125, "loss": 4.314, "margin_dpo/margin_mean": 56.84566116333008, "margin_dpo/margin_std": 86.78395080566406, "step": 476 }, { "epoch": 0.9989528795811519, "fcm_dpo/beta": 0.00990710686892271, "fcm_dpo/delta": -0.03750115633010864, "fcm_dpo/margin": 59.27772521972656, "fcm_dpo/q_t": 0.3814779818058014, "grad_norm": 128.50753784179688, "learning_rate": 6.7033706447061635e-12, "logits/chosen": -0.7780085206031799, "logits/rejected": -0.7910189032554626, "logps/chosen": -384.5294189453125, "logps/ref_chosen": -276.7939758300781, "logps/ref_rejected": -244.82919311523438, "logps/rejected": -411.8423156738281, "loss": 4.38, "margin_dpo/margin_mean": 59.27772521972656, "margin_dpo/margin_std": 96.08084106445312, "step": 477 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 4.544049396954742, "train_runtime": 6685.0038, "train_samples_per_second": 9.145, "train_steps_per_second": 0.071 } ], "logging_steps": 1, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }