{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 500, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020942408376963353, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.02793481945991516, "fcm_dpo/q_t": 0.500069797039032, "grad_norm": 28.588409423828125, "learning_rate": 0.0, "logits/chosen": -0.5906078815460205, "logits/rejected": -0.6050581932067871, "logps/chosen": -275.48590087890625, "logps/ref_chosen": -275.43902587890625, "logps/ref_rejected": -223.14576721191406, "logps/rejected": -223.16473388671875, "loss": 5.5463, "margin_dpo/margin_mean": -0.02793477475643158, "margin_dpo/margin_std": 0.5724214911460876, "step": 1 }, { "epoch": 0.004188481675392671, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.014312177896499634, "fcm_dpo/q_t": 0.4999642074108124, "grad_norm": 27.878114700317383, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -0.6574729681015015, "logits/rejected": -0.6464410424232483, "logps/chosen": -264.7165222167969, "logps/ref_chosen": -264.7611083984375, "logps/ref_rejected": -242.5597686767578, "logps/rejected": -242.52951049804688, "loss": 5.5446, "margin_dpo/margin_mean": 0.014312252402305603, "margin_dpo/margin_std": 0.6423971652984619, "step": 2 }, { "epoch": 0.0062827225130890054, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.004927471280097961, "fcm_dpo/q_t": 0.4999876916408539, "grad_norm": 25.813234329223633, "learning_rate": 2.083333333333333e-08, "logits/chosen": -0.6840659379959106, "logits/rejected": -0.7352093458175659, "logps/chosen": -274.1263122558594, "logps/ref_chosen": -274.1018981933594, "logps/ref_rejected": -286.5882568359375, "logps/rejected": -286.61761474609375, "loss": 5.545, "margin_dpo/margin_mean": 0.0049266517162323, "margin_dpo/margin_std": 0.6733812093734741, "step": 3 }, { "epoch": 0.008376963350785341, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.13576093316078186, "fcm_dpo/q_t": 0.49966058135032654, "grad_norm": 31.608015060424805, "learning_rate": 3.125e-08, "logits/chosen": -0.61723792552948, "logits/rejected": -0.6116781234741211, "logps/chosen": -329.80804443359375, "logps/ref_chosen": -329.8382568359375, "logps/ref_rejected": -303.2850646972656, "logps/rejected": -303.39056396484375, "loss": 5.5398, "margin_dpo/margin_mean": 0.13576152920722961, "margin_dpo/margin_std": 0.8702787756919861, "step": 4 }, { "epoch": 0.010471204188481676, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.02006945013999939, "fcm_dpo/q_t": 0.4999498128890991, "grad_norm": 29.561357498168945, "learning_rate": 4.166666666666666e-08, "logits/chosen": -0.5704169273376465, "logits/rejected": -0.5865122675895691, "logps/chosen": -301.6563720703125, "logps/ref_chosen": -301.7389221191406, "logps/ref_rejected": -274.7654724121094, "logps/rejected": -274.7029724121094, "loss": 5.5444, "margin_dpo/margin_mean": 0.020069316029548645, "margin_dpo/margin_std": 0.6975337862968445, "step": 5 }, { "epoch": 0.012565445026178011, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07302632927894592, "fcm_dpo/q_t": 0.499817430973053, "grad_norm": 28.17999267578125, "learning_rate": 5.208333333333333e-08, "logits/chosen": -0.6796520352363586, "logits/rejected": -0.6424388885498047, "logps/chosen": -285.6215515136719, "logps/ref_chosen": -285.6946716308594, "logps/ref_rejected": -245.8200225830078, "logps/rejected": -245.81993103027344, "loss": 5.5423, "margin_dpo/margin_mean": 0.07302609086036682, "margin_dpo/margin_std": 0.7085909247398376, "step": 6 }, { "epoch": 0.014659685863874346, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.1700635403394699, "fcm_dpo/q_t": 0.5004251003265381, "grad_norm": 28.65284538269043, "learning_rate": 6.25e-08, "logits/chosen": -0.5784342288970947, "logits/rejected": -0.611269474029541, "logps/chosen": -264.7759704589844, "logps/ref_chosen": -264.65545654296875, "logps/ref_rejected": -253.10305786132812, "logps/rejected": -253.05352783203125, "loss": 5.552, "margin_dpo/margin_mean": -0.17006349563598633, "margin_dpo/margin_std": 0.7042044401168823, "step": 7 }, { "epoch": 0.016753926701570682, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.024522915482521057, "fcm_dpo/q_t": 0.4999386966228485, "grad_norm": 30.742538452148438, "learning_rate": 7.291666666666667e-08, "logits/chosen": -0.6803320646286011, "logits/rejected": -0.6875563859939575, "logps/chosen": -354.2235412597656, "logps/ref_chosen": -354.1887512207031, "logps/ref_rejected": -282.9112243652344, "logps/rejected": -282.97052001953125, "loss": 5.5443, "margin_dpo/margin_mean": 0.0245237797498703, "margin_dpo/margin_std": 0.7568092346191406, "step": 8 }, { "epoch": 0.018848167539267015, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.012213125824928284, "fcm_dpo/q_t": 0.4999694526195526, "grad_norm": 27.8194637298584, "learning_rate": 8.333333333333333e-08, "logits/chosen": -0.6387220025062561, "logits/rejected": -0.6569886207580566, "logps/chosen": -285.7725524902344, "logps/ref_chosen": -285.76055908203125, "logps/ref_rejected": -268.0285339355469, "logps/rejected": -268.052734375, "loss": 5.5448, "margin_dpo/margin_mean": 0.012212991714477539, "margin_dpo/margin_std": 0.738137423992157, "step": 9 }, { "epoch": 0.020942408376963352, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07377111911773682, "fcm_dpo/q_t": 0.49981561303138733, "grad_norm": 26.536937713623047, "learning_rate": 9.375e-08, "logits/chosen": -0.6875832676887512, "logits/rejected": -0.6820325255393982, "logps/chosen": -251.8724822998047, "logps/ref_chosen": -251.91238403320312, "logps/ref_rejected": -226.45260620117188, "logps/rejected": -226.48646545410156, "loss": 5.5423, "margin_dpo/margin_mean": 0.07377050817012787, "margin_dpo/margin_std": 0.6598670482635498, "step": 10 }, { "epoch": 0.023036649214659685, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.01063111424446106, "fcm_dpo/q_t": 0.4999734163284302, "grad_norm": 29.03658676147461, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -0.5948761701583862, "logits/rejected": -0.6499456763267517, "logps/chosen": -301.04718017578125, "logps/ref_chosen": -301.08343505859375, "logps/ref_rejected": -259.546630859375, "logps/rejected": -259.52099609375, "loss": 5.5448, "margin_dpo/margin_mean": 0.01063111424446106, "margin_dpo/margin_std": 0.7354652881622314, "step": 11 }, { "epoch": 0.025130890052356022, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.13213083148002625, "fcm_dpo/q_t": 0.4996696412563324, "grad_norm": 30.0300350189209, "learning_rate": 1.1458333333333332e-07, "logits/chosen": -0.5773444175720215, "logits/rejected": -0.5371226668357849, "logps/chosen": -287.5074462890625, "logps/ref_chosen": -287.548095703125, "logps/ref_rejected": -277.37945556640625, "logps/rejected": -277.470947265625, "loss": 5.54, "margin_dpo/margin_mean": 0.1321302056312561, "margin_dpo/margin_std": 0.7726021409034729, "step": 12 }, { "epoch": 0.027225130890052355, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.13757160305976868, "fcm_dpo/q_t": 0.4996561110019684, "grad_norm": 27.360511779785156, "learning_rate": 1.25e-07, "logits/chosen": -0.660868227481842, "logits/rejected": -0.6686940789222717, "logps/chosen": -270.62811279296875, "logps/ref_chosen": -270.6664123535156, "logps/ref_rejected": -274.6546936035156, "logps/rejected": -274.7539978027344, "loss": 5.5397, "margin_dpo/margin_mean": 0.13757173717021942, "margin_dpo/margin_std": 0.6561607718467712, "step": 13 }, { "epoch": 0.02931937172774869, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05934774875640869, "fcm_dpo/q_t": 0.49985164403915405, "grad_norm": 28.293212890625, "learning_rate": 1.3541666666666666e-07, "logits/chosen": -0.6239809393882751, "logits/rejected": -0.6529561281204224, "logps/chosen": -281.5748596191406, "logps/ref_chosen": -281.59320068359375, "logps/ref_rejected": -263.52215576171875, "logps/rejected": -263.5631408691406, "loss": 5.5429, "margin_dpo/margin_mean": 0.0593467652797699, "margin_dpo/margin_std": 0.7482225894927979, "step": 14 }, { "epoch": 0.031413612565445025, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.039270758628845215, "fcm_dpo/q_t": 0.4999018609523773, "grad_norm": 30.250518798828125, "learning_rate": 1.4583333333333335e-07, "logits/chosen": -0.6411060094833374, "logits/rejected": -0.6542255878448486, "logps/chosen": -298.3626403808594, "logps/ref_chosen": -298.4093322753906, "logps/ref_rejected": -227.5626983642578, "logps/rejected": -227.5552978515625, "loss": 5.5437, "margin_dpo/margin_mean": 0.03926950693130493, "margin_dpo/margin_std": 0.6303021907806396, "step": 15 }, { "epoch": 0.033507853403141365, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07741273194551468, "fcm_dpo/q_t": 0.4998064637184143, "grad_norm": 30.171316146850586, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -0.6012479066848755, "logits/rejected": -0.5982969999313354, "logps/chosen": -293.8901062011719, "logps/ref_chosen": -293.96661376953125, "logps/ref_rejected": -250.78443908691406, "logps/rejected": -250.78536987304688, "loss": 5.5421, "margin_dpo/margin_mean": 0.07741250097751617, "margin_dpo/margin_std": 0.7642932534217834, "step": 16 }, { "epoch": 0.0356020942408377, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07924064993858337, "fcm_dpo/q_t": 0.4998018741607666, "grad_norm": 27.790224075317383, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -0.5656470656394958, "logits/rejected": -0.5915025472640991, "logps/chosen": -262.3228759765625, "logps/ref_chosen": -262.39398193359375, "logps/ref_rejected": -248.500244140625, "logps/rejected": -248.5083465576172, "loss": 5.5421, "margin_dpo/margin_mean": 0.07924069464206696, "margin_dpo/margin_std": 0.7225322723388672, "step": 17 }, { "epoch": 0.03769633507853403, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.0872071236371994, "fcm_dpo/q_t": 0.499781996011734, "grad_norm": 29.743511199951172, "learning_rate": 1.7708333333333334e-07, "logits/chosen": -0.6092166304588318, "logits/rejected": -0.6151822805404663, "logps/chosen": -293.66082763671875, "logps/ref_chosen": -293.709228515625, "logps/ref_rejected": -274.5875244140625, "logps/rejected": -274.62628173828125, "loss": 5.5417, "margin_dpo/margin_mean": 0.08720706403255463, "margin_dpo/margin_std": 0.7599306702613831, "step": 18 }, { "epoch": 0.039790575916230364, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.015616178512573242, "fcm_dpo/q_t": 0.4999609589576721, "grad_norm": 28.129470825195312, "learning_rate": 1.875e-07, "logits/chosen": -0.6186746954917908, "logits/rejected": -0.6140046715736389, "logps/chosen": -280.1886901855469, "logps/ref_chosen": -280.26568603515625, "logps/ref_rejected": -259.9742736816406, "logps/rejected": -259.91290283203125, "loss": 5.5446, "margin_dpo/margin_mean": 0.015616029500961304, "margin_dpo/margin_std": 0.7585482001304626, "step": 19 }, { "epoch": 0.041884816753926704, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.12203510105609894, "fcm_dpo/q_t": 0.49969494342803955, "grad_norm": 29.671567916870117, "learning_rate": 1.9791666666666664e-07, "logits/chosen": -0.6235091090202332, "logits/rejected": -0.6559134125709534, "logps/chosen": -303.7472229003906, "logps/ref_chosen": -303.8954162597656, "logps/ref_rejected": -260.214599609375, "logps/rejected": -260.1884765625, "loss": 5.5404, "margin_dpo/margin_mean": 0.12203498184680939, "margin_dpo/margin_std": 0.7693343162536621, "step": 20 }, { "epoch": 0.04397905759162304, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07917946577072144, "fcm_dpo/q_t": 0.4998020529747009, "grad_norm": 35.06045913696289, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -0.651059627532959, "logits/rejected": -0.6805247664451599, "logps/chosen": -301.47650146484375, "logps/ref_chosen": -301.5334777832031, "logps/ref_rejected": -280.28900146484375, "logps/rejected": -280.3111572265625, "loss": 5.5421, "margin_dpo/margin_mean": 0.07917973399162292, "margin_dpo/margin_std": 0.7579631805419922, "step": 21 }, { "epoch": 0.04607329842931937, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.07917653024196625, "fcm_dpo/q_t": 0.5001979470252991, "grad_norm": 25.275522232055664, "learning_rate": 2.1875e-07, "logits/chosen": -0.6546105742454529, "logits/rejected": -0.6562420129776001, "logps/chosen": -259.986083984375, "logps/ref_chosen": -259.9951477050781, "logps/ref_rejected": -243.0721435546875, "logps/rejected": -242.98394775390625, "loss": 5.5484, "margin_dpo/margin_mean": -0.0791759043931961, "margin_dpo/margin_std": 0.7844414710998535, "step": 22 }, { "epoch": 0.048167539267015703, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07579465210437775, "fcm_dpo/q_t": 0.49981051683425903, "grad_norm": 27.832441329956055, "learning_rate": 2.2916666666666663e-07, "logits/chosen": -0.6248008012771606, "logits/rejected": -0.6592731475830078, "logps/chosen": -282.06793212890625, "logps/ref_chosen": -282.1807556152344, "logps/ref_rejected": -265.0758056640625, "logps/rejected": -265.0387878417969, "loss": 5.5422, "margin_dpo/margin_mean": 0.07579512894153595, "margin_dpo/margin_std": 0.7000916600227356, "step": 23 }, { "epoch": 0.050261780104712044, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.3224449157714844, "fcm_dpo/q_t": 0.49919387698173523, "grad_norm": 29.807321548461914, "learning_rate": 2.3958333333333335e-07, "logits/chosen": -0.656727135181427, "logits/rejected": -0.568871021270752, "logps/chosen": -300.95025634765625, "logps/ref_chosen": -301.17962646484375, "logps/ref_rejected": -302.12786865234375, "logps/rejected": -302.2209167480469, "loss": 5.5324, "margin_dpo/margin_mean": 0.32244449853897095, "margin_dpo/margin_std": 0.8477628827095032, "step": 24 }, { "epoch": 0.05235602094240838, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05949197709560394, "fcm_dpo/q_t": 0.4998512864112854, "grad_norm": 26.471641540527344, "learning_rate": 2.5e-07, "logits/chosen": -0.5939292311668396, "logits/rejected": -0.6018354296684265, "logps/chosen": -246.61839294433594, "logps/ref_chosen": -246.74649047851562, "logps/ref_rejected": -235.55638122558594, "logps/rejected": -235.48777770996094, "loss": 5.5429, "margin_dpo/margin_mean": 0.05949154496192932, "margin_dpo/margin_std": 0.7037143707275391, "step": 25 }, { "epoch": 0.05445026178010471, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1720740646123886, "fcm_dpo/q_t": 0.4995698034763336, "grad_norm": 28.6167049407959, "learning_rate": 2.604166666666667e-07, "logits/chosen": -0.6558055877685547, "logits/rejected": -0.6704913377761841, "logps/chosen": -281.93994140625, "logps/ref_chosen": -282.1955871582031, "logps/ref_rejected": -235.3135528564453, "logps/rejected": -235.22994995117188, "loss": 5.5384, "margin_dpo/margin_mean": 0.17207396030426025, "margin_dpo/margin_std": 0.7740581035614014, "step": 26 }, { "epoch": 0.05654450261780105, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.17665477097034454, "fcm_dpo/q_t": 0.49955838918685913, "grad_norm": 27.75247573852539, "learning_rate": 2.708333333333333e-07, "logits/chosen": -0.652806282043457, "logits/rejected": -0.6721222400665283, "logps/chosen": -323.57098388671875, "logps/ref_chosen": -323.8563537597656, "logps/ref_rejected": -245.968017578125, "logps/rejected": -245.85931396484375, "loss": 5.5382, "margin_dpo/margin_mean": 0.17665448784828186, "margin_dpo/margin_std": 0.9216269850730896, "step": 27 }, { "epoch": 0.05863874345549738, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.18093740940093994, "fcm_dpo/q_t": 0.49954766035079956, "grad_norm": 26.308048248291016, "learning_rate": 2.8125e-07, "logits/chosen": -0.6253893971443176, "logits/rejected": -0.6348061561584473, "logps/chosen": -247.98081970214844, "logps/ref_chosen": -248.24673461914062, "logps/ref_rejected": -240.0382080078125, "logps/rejected": -239.95323181152344, "loss": 5.538, "margin_dpo/margin_mean": 0.18093715608119965, "margin_dpo/margin_std": 0.8086836934089661, "step": 28 }, { "epoch": 0.060732984293193716, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2044949233531952, "fcm_dpo/q_t": 0.4994887411594391, "grad_norm": 29.518325805664062, "learning_rate": 2.916666666666667e-07, "logits/chosen": -0.5903065204620361, "logits/rejected": -0.6122087240219116, "logps/chosen": -317.87603759765625, "logps/ref_chosen": -318.2564392089844, "logps/ref_rejected": -286.75848388671875, "logps/rejected": -286.5826110839844, "loss": 5.5371, "margin_dpo/margin_mean": 0.20449501276016235, "margin_dpo/margin_std": 0.8431642055511475, "step": 29 }, { "epoch": 0.06282722513089005, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.26426297426223755, "fcm_dpo/q_t": 0.49933937191963196, "grad_norm": 28.8969783782959, "learning_rate": 3.020833333333333e-07, "logits/chosen": -0.6008409261703491, "logits/rejected": -0.6181896328926086, "logps/chosen": -252.7128143310547, "logps/ref_chosen": -253.0491485595703, "logps/ref_rejected": -261.30029296875, "logps/rejected": -261.2282409667969, "loss": 5.5347, "margin_dpo/margin_mean": 0.26426294445991516, "margin_dpo/margin_std": 0.8474119901657104, "step": 30 }, { "epoch": 0.06492146596858639, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2159474790096283, "fcm_dpo/q_t": 0.4994601905345917, "grad_norm": 25.033519744873047, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -0.6617273092269897, "logits/rejected": -0.697493314743042, "logps/chosen": -247.74754333496094, "logps/ref_chosen": -248.15301513671875, "logps/ref_rejected": -203.17703247070312, "logps/rejected": -202.98751831054688, "loss": 5.5367, "margin_dpo/margin_mean": 0.2159472405910492, "margin_dpo/margin_std": 0.9926141500473022, "step": 31 }, { "epoch": 0.06701570680628273, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2026405930519104, "fcm_dpo/q_t": 0.49949339032173157, "grad_norm": 29.645679473876953, "learning_rate": 3.2291666666666666e-07, "logits/chosen": -0.6060912013053894, "logits/rejected": -0.6101662516593933, "logps/chosen": -305.0863037109375, "logps/ref_chosen": -305.5399475097656, "logps/ref_rejected": -267.6527099609375, "logps/rejected": -267.4017028808594, "loss": 5.5372, "margin_dpo/margin_mean": 0.2026415467262268, "margin_dpo/margin_std": 0.9702023267745972, "step": 32 }, { "epoch": 0.06910994764397906, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.37219977378845215, "fcm_dpo/q_t": 0.49906954169273376, "grad_norm": 28.309568405151367, "learning_rate": 3.333333333333333e-07, "logits/chosen": -0.6386057138442993, "logits/rejected": -0.6507879495620728, "logps/chosen": -285.6277160644531, "logps/ref_chosen": -286.2335205078125, "logps/ref_rejected": -255.38748168945312, "logps/rejected": -255.15390014648438, "loss": 5.5304, "margin_dpo/margin_mean": 0.37220001220703125, "margin_dpo/margin_std": 1.079951524734497, "step": 33 }, { "epoch": 0.0712041884816754, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7043960094451904, "fcm_dpo/q_t": 0.49823909997940063, "grad_norm": 31.112960815429688, "learning_rate": 3.4375e-07, "logits/chosen": -0.6270374059677124, "logits/rejected": -0.6368086338043213, "logps/chosen": -340.82989501953125, "logps/ref_chosen": -341.5920104980469, "logps/ref_rejected": -278.8866882324219, "logps/rejected": -278.8289794921875, "loss": 5.5172, "margin_dpo/margin_mean": 0.7043963074684143, "margin_dpo/margin_std": 1.175834059715271, "step": 34 }, { "epoch": 0.07329842931937172, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.28337526321411133, "fcm_dpo/q_t": 0.4992915987968445, "grad_norm": 26.421070098876953, "learning_rate": 3.541666666666667e-07, "logits/chosen": -0.6235166192054749, "logits/rejected": -0.6431545615196228, "logps/chosen": -264.448974609375, "logps/ref_chosen": -265.0795593261719, "logps/ref_rejected": -264.4876708984375, "logps/rejected": -264.1404724121094, "loss": 5.534, "margin_dpo/margin_mean": 0.2833753824234009, "margin_dpo/margin_std": 1.2927016019821167, "step": 35 }, { "epoch": 0.07539267015706806, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6523094177246094, "fcm_dpo/q_t": 0.49836936593055725, "grad_norm": 31.853910446166992, "learning_rate": 3.645833333333333e-07, "logits/chosen": -0.5965672731399536, "logits/rejected": -0.6139001250267029, "logps/chosen": -296.49725341796875, "logps/ref_chosen": -297.3261413574219, "logps/ref_rejected": -282.09515380859375, "logps/rejected": -281.9185791015625, "loss": 5.5193, "margin_dpo/margin_mean": 0.6523087024688721, "margin_dpo/margin_std": 1.4231526851654053, "step": 36 }, { "epoch": 0.0774869109947644, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5245670080184937, "fcm_dpo/q_t": 0.4986886978149414, "grad_norm": 30.910381317138672, "learning_rate": 3.75e-07, "logits/chosen": -0.6018107533454895, "logits/rejected": -0.6163386106491089, "logps/chosen": -313.3153381347656, "logps/ref_chosen": -314.0340270996094, "logps/ref_rejected": -299.3437805175781, "logps/rejected": -299.149658203125, "loss": 5.5245, "margin_dpo/margin_mean": 0.5245668888092041, "margin_dpo/margin_std": 1.4947643280029297, "step": 37 }, { "epoch": 0.07958115183246073, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6186456680297852, "fcm_dpo/q_t": 0.4984534978866577, "grad_norm": 28.512378692626953, "learning_rate": 3.8541666666666665e-07, "logits/chosen": -0.635643720626831, "logits/rejected": -0.6476578712463379, "logps/chosen": -281.5082092285156, "logps/ref_chosen": -282.54119873046875, "logps/ref_rejected": -269.7773132324219, "logps/rejected": -269.3629150390625, "loss": 5.5207, "margin_dpo/margin_mean": 0.6186456680297852, "margin_dpo/margin_std": 1.486997127532959, "step": 38 }, { "epoch": 0.08167539267015707, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.244322657585144, "fcm_dpo/q_t": 0.49688953161239624, "grad_norm": 29.5240478515625, "learning_rate": 3.958333333333333e-07, "logits/chosen": -0.6167585253715515, "logits/rejected": -0.6307709217071533, "logps/chosen": -275.46820068359375, "logps/ref_chosen": -276.7729187011719, "logps/ref_rejected": -249.95889282226562, "logps/rejected": -249.89846801757812, "loss": 5.4959, "margin_dpo/margin_mean": 1.2443227767944336, "margin_dpo/margin_std": 1.6552257537841797, "step": 39 }, { "epoch": 0.08376963350785341, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6932114362716675, "fcm_dpo/q_t": 0.498267263174057, "grad_norm": 27.426746368408203, "learning_rate": 4.0625e-07, "logits/chosen": -0.6174054741859436, "logits/rejected": -0.652121901512146, "logps/chosen": -283.2413330078125, "logps/ref_chosen": -284.30706787109375, "logps/ref_rejected": -244.4459991455078, "logps/rejected": -244.0734405517578, "loss": 5.5179, "margin_dpo/margin_mean": 0.6932120323181152, "margin_dpo/margin_std": 1.8467386960983276, "step": 40 }, { "epoch": 0.08586387434554973, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7606411576271057, "fcm_dpo/q_t": 0.49809861183166504, "grad_norm": 30.642261505126953, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -0.6371512413024902, "logits/rejected": -0.6642083525657654, "logps/chosen": -292.70098876953125, "logps/ref_chosen": -293.8151550292969, "logps/ref_rejected": -252.16815185546875, "logps/rejected": -251.8146209716797, "loss": 5.5151, "margin_dpo/margin_mean": 0.7606427669525146, "margin_dpo/margin_std": 1.701622486114502, "step": 41 }, { "epoch": 0.08795811518324607, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9576462507247925, "fcm_dpo/q_t": 0.4976062476634979, "grad_norm": 27.611026763916016, "learning_rate": 4.270833333333333e-07, "logits/chosen": -0.6295742988586426, "logits/rejected": -0.6472880244255066, "logps/chosen": -251.46585083007812, "logps/ref_chosen": -252.76023864746094, "logps/ref_rejected": -261.0414733886719, "logps/rejected": -260.7047424316406, "loss": 5.5075, "margin_dpo/margin_mean": 0.9576468467712402, "margin_dpo/margin_std": 2.2051548957824707, "step": 42 }, { "epoch": 0.09005235602094241, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2415517568588257, "fcm_dpo/q_t": 0.49689653515815735, "grad_norm": 29.821718215942383, "learning_rate": 4.375e-07, "logits/chosen": -0.5814259648323059, "logits/rejected": -0.5952868461608887, "logps/chosen": -315.51239013671875, "logps/ref_chosen": -316.8347473144531, "logps/ref_rejected": -273.7649230957031, "logps/rejected": -273.68414306640625, "loss": 5.4962, "margin_dpo/margin_mean": 1.241552472114563, "margin_dpo/margin_std": 2.2551767826080322, "step": 43 }, { "epoch": 0.09214659685863874, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.6491384506225586, "fcm_dpo/q_t": 0.4958779215812683, "grad_norm": 30.916332244873047, "learning_rate": 4.479166666666667e-07, "logits/chosen": -0.5960883498191833, "logits/rejected": -0.594748854637146, "logps/chosen": -285.3397216796875, "logps/ref_chosen": -286.8757019042969, "logps/ref_rejected": -282.4681396484375, "logps/rejected": -282.581298828125, "loss": 5.4805, "margin_dpo/margin_mean": 1.649139165878296, "margin_dpo/margin_std": 3.022979974746704, "step": 44 }, { "epoch": 0.09424083769633508, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.249646544456482, "fcm_dpo/q_t": 0.49687686562538147, "grad_norm": 28.89425277709961, "learning_rate": 4.5833333333333327e-07, "logits/chosen": -0.6914635300636292, "logits/rejected": -0.7155641913414001, "logps/chosen": -322.6188659667969, "logps/ref_chosen": -324.2633972167969, "logps/ref_rejected": -293.09466552734375, "logps/rejected": -292.6997985839844, "loss": 5.4962, "margin_dpo/margin_mean": 1.2496464252471924, "margin_dpo/margin_std": 2.7630484104156494, "step": 45 }, { "epoch": 0.09633507853403141, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5387152433395386, "fcm_dpo/q_t": 0.49615412950515747, "grad_norm": 30.371707916259766, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -0.6212559938430786, "logits/rejected": -0.6344550251960754, "logps/chosen": -296.5879211425781, "logps/ref_chosen": -298.3357238769531, "logps/ref_rejected": -267.66204833984375, "logps/rejected": -267.45294189453125, "loss": 5.4846, "margin_dpo/margin_mean": 1.5387158393859863, "margin_dpo/margin_std": 2.5809476375579834, "step": 46 }, { "epoch": 0.09842931937172775, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1460494995117188, "fcm_dpo/q_t": 0.4971354603767395, "grad_norm": 26.331024169921875, "learning_rate": 4.791666666666667e-07, "logits/chosen": -0.6050630211830139, "logits/rejected": -0.6257311701774597, "logps/chosen": -260.9723815917969, "logps/ref_chosen": -262.5669250488281, "logps/ref_rejected": -258.70989990234375, "logps/rejected": -258.2613525390625, "loss": 5.5007, "margin_dpo/margin_mean": 1.1460487842559814, "margin_dpo/margin_std": 3.3671884536743164, "step": 47 }, { "epoch": 0.10052356020942409, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.543501853942871, "fcm_dpo/q_t": 0.49614325165748596, "grad_norm": 27.530820846557617, "learning_rate": 4.895833333333333e-07, "logits/chosen": -0.6108264923095703, "logits/rejected": -0.6356756687164307, "logps/chosen": -267.622802734375, "logps/ref_chosen": -269.4932556152344, "logps/ref_rejected": -241.888916015625, "logps/rejected": -241.56198120117188, "loss": 5.4847, "margin_dpo/margin_mean": 1.543501853942871, "margin_dpo/margin_std": 2.9233288764953613, "step": 48 }, { "epoch": 0.10261780104712041, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.6295912265777588, "fcm_dpo/q_t": 0.4959271550178528, "grad_norm": 27.69369125366211, "learning_rate": 5e-07, "logits/chosen": -0.6768261194229126, "logits/rejected": -0.6610736846923828, "logps/chosen": -255.700439453125, "logps/ref_chosen": -257.8844909667969, "logps/ref_rejected": -256.8912048339844, "logps/rejected": -256.3367614746094, "loss": 5.4815, "margin_dpo/margin_mean": 1.6295921802520752, "margin_dpo/margin_std": 3.415902614593506, "step": 49 }, { "epoch": 0.10471204188481675, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2272062301635742, "fcm_dpo/q_t": 0.49693384766578674, "grad_norm": 27.84197235107422, "learning_rate": 4.999932966293553e-07, "logits/chosen": -0.6295742392539978, "logits/rejected": -0.6513818502426147, "logps/chosen": -299.98370361328125, "logps/ref_chosen": -302.1083679199219, "logps/ref_rejected": -298.355224609375, "logps/rejected": -297.457763671875, "loss": 5.4979, "margin_dpo/margin_mean": 1.227207064628601, "margin_dpo/margin_std": 3.8781399726867676, "step": 50 }, { "epoch": 0.1068062827225131, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.9203577041625977, "fcm_dpo/q_t": 0.4952022135257721, "grad_norm": 29.167591094970703, "learning_rate": 4.999731868769026e-07, "logits/chosen": -0.6100184321403503, "logits/rejected": -0.6041375398635864, "logps/chosen": -267.2272644042969, "logps/ref_chosen": -269.37237548828125, "logps/ref_rejected": -297.0167541503906, "logps/rejected": -296.7919616699219, "loss": 5.4712, "margin_dpo/margin_mean": 1.920358419418335, "margin_dpo/margin_std": 4.5612592697143555, "step": 51 }, { "epoch": 0.10890052356020942, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.1528501510620117, "fcm_dpo/q_t": 0.49212491512298584, "grad_norm": 30.217905044555664, "learning_rate": 4.99939671821067e-07, "logits/chosen": -0.6610238552093506, "logits/rejected": -0.6679620146751404, "logps/chosen": -304.05975341796875, "logps/ref_chosen": -306.9028015136719, "logps/ref_rejected": -281.24737548828125, "logps/rejected": -281.55718994140625, "loss": 5.4226, "margin_dpo/margin_mean": 3.15285062789917, "margin_dpo/margin_std": 4.807487487792969, "step": 52 }, { "epoch": 0.11099476439790576, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.2223706245422363, "fcm_dpo/q_t": 0.4944484829902649, "grad_norm": 30.019371032714844, "learning_rate": 4.998927532591591e-07, "logits/chosen": -0.6413898468017578, "logits/rejected": -0.6815477609634399, "logps/chosen": -283.0915222167969, "logps/ref_chosen": -285.9759521484375, "logps/ref_rejected": -273.9073486328125, "logps/rejected": -273.2453308105469, "loss": 5.46, "margin_dpo/margin_mean": 2.2223708629608154, "margin_dpo/margin_std": 5.145662784576416, "step": 53 }, { "epoch": 0.1130890052356021, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.779584527015686, "fcm_dpo/q_t": 0.49555593729019165, "grad_norm": 26.53990936279297, "learning_rate": 4.998324337072792e-07, "logits/chosen": -0.6968706846237183, "logits/rejected": -0.7060681581497192, "logps/chosen": -303.8675842285156, "logps/ref_chosen": -306.504638671875, "logps/ref_rejected": -272.67431640625, "logps/rejected": -271.81689453125, "loss": 5.4775, "margin_dpo/margin_mean": 1.7795841693878174, "margin_dpo/margin_std": 5.366870403289795, "step": 54 }, { "epoch": 0.11518324607329843, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.3014349937438965, "fcm_dpo/q_t": 0.49425217509269714, "grad_norm": 24.823610305786133, "learning_rate": 4.997587164001815e-07, "logits/chosen": -0.6394699215888977, "logits/rejected": -0.6436302065849304, "logps/chosen": -220.45968627929688, "logps/ref_chosen": -222.33013916015625, "logps/ref_rejected": -206.59571838378906, "logps/rejected": -207.02670288085938, "loss": 5.4561, "margin_dpo/margin_mean": 2.301435708999634, "margin_dpo/margin_std": 4.904862880706787, "step": 55 }, { "epoch": 0.11727748691099477, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.1400392055511475, "fcm_dpo/q_t": 0.49216216802597046, "grad_norm": 27.470352172851562, "learning_rate": 4.996716052911017e-07, "logits/chosen": -0.6028516888618469, "logits/rejected": -0.6167591214179993, "logps/chosen": -247.6485595703125, "logps/ref_chosen": -250.47816467285156, "logps/ref_rejected": -228.25848388671875, "logps/rejected": -228.5689239501953, "loss": 5.4249, "margin_dpo/margin_mean": 3.140040159225464, "margin_dpo/margin_std": 5.96918249130249, "step": 56 }, { "epoch": 0.1193717277486911, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.32672643661499, "fcm_dpo/q_t": 0.489196240901947, "grad_norm": 30.81308937072754, "learning_rate": 4.99571105051544e-07, "logits/chosen": -0.6932777166366577, "logits/rejected": -0.6637296080589294, "logps/chosen": -311.116455078125, "logps/ref_chosen": -315.1195373535156, "logps/ref_rejected": -272.755615234375, "logps/rejected": -273.0793151855469, "loss": 5.3776, "margin_dpo/margin_mean": 4.32672643661499, "margin_dpo/margin_std": 5.751393795013428, "step": 57 }, { "epoch": 0.12146596858638743, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.327610969543457, "fcm_dpo/q_t": 0.494184285402298, "grad_norm": 27.799428939819336, "learning_rate": 4.994572210710314e-07, "logits/chosen": -0.6185472011566162, "logits/rejected": -0.642967939376831, "logps/chosen": -262.7143859863281, "logps/ref_chosen": -265.1816711425781, "logps/ref_rejected": -268.2203369140625, "logps/rejected": -268.08062744140625, "loss": 5.4561, "margin_dpo/margin_mean": 2.3276116847991943, "margin_dpo/margin_std": 5.6259846687316895, "step": 58 }, { "epoch": 0.12356020942408377, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.62693190574646, "fcm_dpo/q_t": 0.4959397315979004, "grad_norm": 29.78545379638672, "learning_rate": 4.993299594568162e-07, "logits/chosen": -0.6041760444641113, "logits/rejected": -0.5971446633338928, "logps/chosen": -284.1904602050781, "logps/ref_chosen": -286.35394287109375, "logps/ref_rejected": -260.6757507324219, "logps/rejected": -260.13916015625, "loss": 5.4861, "margin_dpo/margin_mean": 1.626932978630066, "margin_dpo/margin_std": 7.043335437774658, "step": 59 }, { "epoch": 0.1256544502617801, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.401254177093506, "fcm_dpo/q_t": 0.49150803685188293, "grad_norm": 28.24305534362793, "learning_rate": 4.991893270335525e-07, "logits/chosen": -0.6704109311103821, "logits/rejected": -0.6943408846855164, "logps/chosen": -256.01605224609375, "logps/ref_chosen": -258.9134521484375, "logps/ref_rejected": -255.21377563476562, "logps/rejected": -255.71762084960938, "loss": 5.4159, "margin_dpo/margin_mean": 3.4012551307678223, "margin_dpo/margin_std": 7.249381065368652, "step": 60 }, { "epoch": 0.12774869109947645, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.159944772720337, "fcm_dpo/q_t": 0.4921136796474457, "grad_norm": 29.98958969116211, "learning_rate": 4.990353313429303e-07, "logits/chosen": -0.6577328443527222, "logits/rejected": -0.675537645816803, "logps/chosen": -275.4341125488281, "logps/ref_chosen": -278.4678955078125, "logps/ref_rejected": -252.02720642089844, "logps/rejected": -252.15333557128906, "loss": 5.4264, "margin_dpo/margin_mean": 3.159944534301758, "margin_dpo/margin_std": 7.637033462524414, "step": 61 }, { "epoch": 0.12984293193717278, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.3599321842193604, "fcm_dpo/q_t": 0.4916197657585144, "grad_norm": 26.689373016357422, "learning_rate": 4.988679806432711e-07, "logits/chosen": -0.6122528314590454, "logits/rejected": -0.6542145609855652, "logps/chosen": -268.97198486328125, "logps/ref_chosen": -272.92431640625, "logps/ref_rejected": -260.7935485839844, "logps/rejected": -260.201171875, "loss": 5.4177, "margin_dpo/margin_mean": 3.3599326610565186, "margin_dpo/margin_std": 7.437721252441406, "step": 62 }, { "epoch": 0.1319371727748691, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.375985860824585, "fcm_dpo/q_t": 0.4915734529495239, "grad_norm": 28.36103630065918, "learning_rate": 4.986872839090852e-07, "logits/chosen": -0.6654269695281982, "logits/rejected": -0.672168493270874, "logps/chosen": -273.79058837890625, "logps/ref_chosen": -277.0889892578125, "logps/ref_rejected": -273.3413391113281, "logps/rejected": -273.4189147949219, "loss": 5.4169, "margin_dpo/margin_mean": 3.3759853839874268, "margin_dpo/margin_std": 7.225174903869629, "step": 63 }, { "epoch": 0.13403141361256546, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.306244373321533, "fcm_dpo/q_t": 0.48925700783729553, "grad_norm": 28.335159301757812, "learning_rate": 4.9849325083059e-07, "logits/chosen": -0.6384666562080383, "logits/rejected": -0.634242057800293, "logps/chosen": -279.6446533203125, "logps/ref_chosen": -283.8244934082031, "logps/ref_rejected": -263.29351806640625, "logps/rejected": -263.419921875, "loss": 5.3821, "margin_dpo/margin_mean": 4.306243419647217, "margin_dpo/margin_std": 8.264389038085938, "step": 64 }, { "epoch": 0.13612565445026178, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.298241376876831, "fcm_dpo/q_t": 0.49176740646362305, "grad_norm": 27.84617805480957, "learning_rate": 4.982858918131906e-07, "logits/chosen": -0.7026023268699646, "logits/rejected": -0.6706424951553345, "logps/chosen": -261.4404296875, "logps/ref_chosen": -264.8699645996094, "logps/ref_rejected": -268.5076904296875, "logps/rejected": -268.3764343261719, "loss": 5.4202, "margin_dpo/margin_mean": 3.2982404232025146, "margin_dpo/margin_std": 7.425678253173828, "step": 65 }, { "epoch": 0.1382198952879581, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.103024005889893, "fcm_dpo/q_t": 0.48977720737457275, "grad_norm": 27.68032455444336, "learning_rate": 4.980652179769217e-07, "logits/chosen": -0.6742034554481506, "logits/rejected": -0.6938581466674805, "logps/chosen": -269.83514404296875, "logps/ref_chosen": -272.9283142089844, "logps/ref_rejected": -280.94696044921875, "logps/rejected": -281.956787109375, "loss": 5.3933, "margin_dpo/margin_mean": 4.103023529052734, "margin_dpo/margin_std": 10.111289024353027, "step": 66 }, { "epoch": 0.14031413612565444, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.6750328540802, "fcm_dpo/q_t": 0.49083197116851807, "grad_norm": 25.625337600708008, "learning_rate": 4.978312411558517e-07, "logits/chosen": -0.6939666867256165, "logits/rejected": -0.7270027995109558, "logps/chosen": -262.1410217285156, "logps/ref_chosen": -266.18695068359375, "logps/ref_rejected": -250.17405700683594, "logps/rejected": -249.80316162109375, "loss": 5.4077, "margin_dpo/margin_mean": 3.675032615661621, "margin_dpo/margin_std": 8.745926856994629, "step": 67 }, { "epoch": 0.1424083769633508, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.620594501495361, "fcm_dpo/q_t": 0.48850664496421814, "grad_norm": 28.196027755737305, "learning_rate": 4.975839738974473e-07, "logits/chosen": -0.6843511462211609, "logits/rejected": -0.6975783109664917, "logps/chosen": -294.89398193359375, "logps/ref_chosen": -297.9385986328125, "logps/ref_rejected": -261.5141296386719, "logps/rejected": -263.0901184082031, "loss": 5.3746, "margin_dpo/margin_mean": 4.6205949783325195, "margin_dpo/margin_std": 10.609207153320312, "step": 68 }, { "epoch": 0.14450261780104712, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.947337627410889, "fcm_dpo/q_t": 0.48517727851867676, "grad_norm": 28.755943298339844, "learning_rate": 4.97323429461901e-07, "logits/chosen": -0.6901842355728149, "logits/rejected": -0.7216166257858276, "logps/chosen": -261.7662658691406, "logps/ref_chosen": -265.6175231933594, "logps/ref_rejected": -236.8287353515625, "logps/rejected": -238.92481994628906, "loss": 5.3208, "margin_dpo/margin_mean": 5.947338581085205, "margin_dpo/margin_std": 9.681401252746582, "step": 69 }, { "epoch": 0.14659685863874344, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.84010124206543, "fcm_dpo/q_t": 0.48545634746551514, "grad_norm": 28.748836517333984, "learning_rate": 4.970496218214204e-07, "logits/chosen": -0.6725472211837769, "logits/rejected": -0.7054740786552429, "logps/chosen": -291.8865051269531, "logps/ref_chosen": -296.2259216308594, "logps/ref_rejected": -254.68496704101562, "logps/rejected": -256.18560791015625, "loss": 5.328, "margin_dpo/margin_mean": 5.840100288391113, "margin_dpo/margin_std": 10.971734046936035, "step": 70 }, { "epoch": 0.1486910994764398, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.157320499420166, "fcm_dpo/q_t": 0.4871465563774109, "grad_norm": 28.44356346130371, "learning_rate": 4.967625656594781e-07, "logits/chosen": -0.6519126892089844, "logits/rejected": -0.6409755945205688, "logps/chosen": -283.77288818359375, "logps/ref_chosen": -288.92724609375, "logps/ref_rejected": -278.6405334472656, "logps/rejected": -278.64349365234375, "loss": 5.3581, "margin_dpo/margin_mean": 5.15731954574585, "margin_dpo/margin_std": 12.306236267089844, "step": 71 }, { "epoch": 0.15078534031413612, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.081698894500732, "fcm_dpo/q_t": 0.4873269498348236, "grad_norm": 28.21834945678711, "learning_rate": 4.964622763700252e-07, "logits/chosen": -0.700300931930542, "logits/rejected": -0.7120264172554016, "logps/chosen": -233.72628784179688, "logps/ref_chosen": -237.0452880859375, "logps/ref_rejected": -252.7946319580078, "logps/rejected": -254.55735778808594, "loss": 5.3569, "margin_dpo/margin_mean": 5.081700325012207, "margin_dpo/margin_std": 10.372089385986328, "step": 72 }, { "epoch": 0.15287958115183245, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.796858787536621, "fcm_dpo/q_t": 0.48803800344467163, "grad_norm": 27.81236457824707, "learning_rate": 4.961487700566646e-07, "logits/chosen": -0.660097599029541, "logits/rejected": -0.6779041886329651, "logps/chosen": -268.6611022949219, "logps/ref_chosen": -273.0531005859375, "logps/ref_rejected": -246.8330841064453, "logps/rejected": -247.23794555664062, "loss": 5.372, "margin_dpo/margin_mean": 4.796858787536621, "margin_dpo/margin_std": 12.272148132324219, "step": 73 }, { "epoch": 0.1549738219895288, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.374041557312012, "fcm_dpo/q_t": 0.48911044001579285, "grad_norm": 30.324485778808594, "learning_rate": 4.958220635317885e-07, "logits/chosen": -0.7277234196662903, "logits/rejected": -0.7060559391975403, "logps/chosen": -338.9614562988281, "logps/ref_chosen": -342.2818908691406, "logps/ref_rejected": -330.0293884277344, "logps/rejected": -331.08294677734375, "loss": 5.3871, "margin_dpo/margin_mean": 4.374040603637695, "margin_dpo/margin_std": 11.71304702758789, "step": 74 }, { "epoch": 0.15706806282722513, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.654738426208496, "fcm_dpo/q_t": 0.48341798782348633, "grad_norm": 29.471088409423828, "learning_rate": 4.954821743156767e-07, "logits/chosen": -0.6373202800750732, "logits/rejected": -0.6386787295341492, "logps/chosen": -262.1964111328125, "logps/ref_chosen": -266.8641662597656, "logps/ref_rejected": -276.8699951171875, "logps/rejected": -278.8570251464844, "loss": 5.2965, "margin_dpo/margin_mean": 6.6547393798828125, "margin_dpo/margin_std": 10.852510452270508, "step": 75 }, { "epoch": 0.15916230366492146, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.250955104827881, "fcm_dpo/q_t": 0.4819841682910919, "grad_norm": 29.12325096130371, "learning_rate": 4.951291206355559e-07, "logits/chosen": -0.7122775316238403, "logits/rejected": -0.7196100950241089, "logps/chosen": -276.97625732421875, "logps/ref_chosen": -281.174560546875, "logps/ref_rejected": -263.6067199707031, "logps/rejected": -266.6593017578125, "loss": 5.2782, "margin_dpo/margin_mean": 7.250955581665039, "margin_dpo/margin_std": 12.860380172729492, "step": 76 }, { "epoch": 0.1612565445026178, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.727426052093506, "fcm_dpo/q_t": 0.48577675223350525, "grad_norm": 33.081321716308594, "learning_rate": 4.947629214246236e-07, "logits/chosen": -0.5670685172080994, "logits/rejected": -0.5766149759292603, "logps/chosen": -302.4000549316406, "logps/ref_chosen": -306.09527587890625, "logps/ref_rejected": -253.49569702148438, "logps/rejected": -255.5279083251953, "loss": 5.3404, "margin_dpo/margin_mean": 5.727425575256348, "margin_dpo/margin_std": 13.957700729370117, "step": 77 }, { "epoch": 0.16335078534031414, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.765287399291992, "fcm_dpo/q_t": 0.4782516360282898, "grad_norm": 29.579288482666016, "learning_rate": 4.943835963210323e-07, "logits/chosen": -0.6769639253616333, "logits/rejected": -0.6716817617416382, "logps/chosen": -253.07315063476562, "logps/ref_chosen": -256.9934997558594, "logps/ref_rejected": -211.74012756347656, "logps/rejected": -216.5850830078125, "loss": 5.2239, "margin_dpo/margin_mean": 8.765288352966309, "margin_dpo/margin_std": 14.442641258239746, "step": 78 }, { "epoch": 0.16544502617801046, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.160972595214844, "fcm_dpo/q_t": 0.479748010635376, "grad_norm": 29.708646774291992, "learning_rate": 4.939911656668361e-07, "logits/chosen": -0.6571123003959656, "logits/rejected": -0.6778618693351746, "logps/chosen": -263.1961669921875, "logps/ref_chosen": -266.2735595703125, "logps/ref_rejected": -251.57257080078125, "logps/rejected": -256.6561584472656, "loss": 5.2494, "margin_dpo/margin_mean": 8.160972595214844, "margin_dpo/margin_std": 14.984216690063477, "step": 79 }, { "epoch": 0.16753926701570682, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.6066484451293945, "fcm_dpo/q_t": 0.4836036264896393, "grad_norm": 28.88882064819336, "learning_rate": 4.935856505068998e-07, "logits/chosen": -0.6748917698860168, "logits/rejected": -0.7046967148780823, "logps/chosen": -285.9798278808594, "logps/ref_chosen": -287.8509826660156, "logps/ref_rejected": -256.0766296386719, "logps/rejected": -260.81207275390625, "loss": 5.3052, "margin_dpo/margin_mean": 6.606649398803711, "margin_dpo/margin_std": 13.269367218017578, "step": 80 }, { "epoch": 0.16963350785340314, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.190183639526367, "fcm_dpo/q_t": 0.4821716547012329, "grad_norm": 28.030818939208984, "learning_rate": 4.93167072587771e-07, "logits/chosen": -0.6425015926361084, "logits/rejected": -0.641758143901825, "logps/chosen": -266.0821228027344, "logps/ref_chosen": -268.5232238769531, "logps/ref_rejected": -237.81137084960938, "logps/rejected": -242.56045532226562, "loss": 5.2924, "margin_dpo/margin_mean": 7.190183639526367, "margin_dpo/margin_std": 16.7568302154541, "step": 81 }, { "epoch": 0.17172774869109947, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.836269378662109, "fcm_dpo/q_t": 0.48062148690223694, "grad_norm": 27.81950569152832, "learning_rate": 4.92735454356513e-07, "logits/chosen": -0.7289373874664307, "logits/rejected": -0.7366238236427307, "logps/chosen": -276.76800537109375, "logps/ref_chosen": -279.24798583984375, "logps/ref_rejected": -236.6510772705078, "logps/rejected": -242.00738525390625, "loss": 5.2665, "margin_dpo/margin_mean": 7.836269378662109, "margin_dpo/margin_std": 15.824460983276367, "step": 82 }, { "epoch": 0.17382198952879582, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.676000595092773, "fcm_dpo/q_t": 0.47846055030822754, "grad_norm": 30.727758407592773, "learning_rate": 4.922908189595017e-07, "logits/chosen": -0.6865428686141968, "logits/rejected": -0.6698246002197266, "logps/chosen": -273.9609069824219, "logps/ref_chosen": -274.21923828125, "logps/ref_rejected": -276.2212219238281, "logps/rejected": -284.638916015625, "loss": 5.2418, "margin_dpo/margin_mean": 8.676000595092773, "margin_dpo/margin_std": 18.0338077545166, "step": 83 }, { "epoch": 0.17591623036649215, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.394294738769531, "fcm_dpo/q_t": 0.4816589951515198, "grad_norm": 29.849651336669922, "learning_rate": 4.918331902411841e-07, "logits/chosen": -0.7308733463287354, "logits/rejected": -0.7451096177101135, "logps/chosen": -293.73614501953125, "logps/ref_chosen": -294.3975524902344, "logps/ref_rejected": -279.81884765625, "logps/rejected": -286.5517272949219, "loss": 5.2856, "margin_dpo/margin_mean": 7.3942952156066895, "margin_dpo/margin_std": 16.78285789489746, "step": 84 }, { "epoch": 0.17801047120418848, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.794508934020996, "fcm_dpo/q_t": 0.4856225848197937, "grad_norm": 29.354572296142578, "learning_rate": 4.913625927427995e-07, "logits/chosen": -0.6477065086364746, "logits/rejected": -0.6560468077659607, "logps/chosen": -245.17652893066406, "logps/ref_chosen": -243.66220092773438, "logps/ref_rejected": -263.9421691894531, "logps/rejected": -271.2510070800781, "loss": 5.3435, "margin_dpo/margin_mean": 5.794508934020996, "margin_dpo/margin_std": 15.385719299316406, "step": 85 }, { "epoch": 0.18010471204188483, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.944154739379883, "fcm_dpo/q_t": 0.4777962565422058, "grad_norm": 34.90058517456055, "learning_rate": 4.908790517010636e-07, "logits/chosen": -0.6895659565925598, "logits/rejected": -0.6828902363777161, "logps/chosen": -308.1940612792969, "logps/ref_chosen": -309.4306945800781, "logps/ref_rejected": -290.91278076171875, "logps/rejected": -298.62030029296875, "loss": 5.227, "margin_dpo/margin_mean": 8.944153785705566, "margin_dpo/margin_std": 17.328086853027344, "step": 86 }, { "epoch": 0.18219895287958116, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.260643005371094, "fcm_dpo/q_t": 0.4746631383895874, "grad_norm": 29.66830825805664, "learning_rate": 4.903825930468148e-07, "logits/chosen": -0.7512010931968689, "logits/rejected": -0.7451151013374329, "logps/chosen": -278.11773681640625, "logps/ref_chosen": -278.0277099609375, "logps/ref_rejected": -245.70123291015625, "logps/rejected": -256.0518493652344, "loss": 5.1839, "margin_dpo/margin_mean": 10.260643005371094, "margin_dpo/margin_std": 18.95460319519043, "step": 87 }, { "epoch": 0.18429319371727748, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.927164077758789, "fcm_dpo/q_t": 0.477934330701828, "grad_norm": 28.764787673950195, "learning_rate": 4.898732434036243e-07, "logits/chosen": -0.7751933336257935, "logits/rejected": -0.7918457984924316, "logps/chosen": -268.5487365722656, "logps/ref_chosen": -266.5148010253906, "logps/ref_rejected": -265.90081787109375, "logps/rejected": -276.8619384765625, "loss": 5.2339, "margin_dpo/margin_mean": 8.927164077758789, "margin_dpo/margin_std": 19.03485107421875, "step": 88 }, { "epoch": 0.18638743455497384, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.322531700134277, "fcm_dpo/q_t": 0.4769115746021271, "grad_norm": 30.534467697143555, "learning_rate": 4.893510300863676e-07, "logits/chosen": -0.7388455271720886, "logits/rejected": -0.7291704416275024, "logps/chosen": -265.65667724609375, "logps/ref_chosen": -265.6893005371094, "logps/ref_rejected": -251.49314880371094, "logps/rejected": -260.7830810546875, "loss": 5.2181, "margin_dpo/margin_mean": 9.322531700134277, "margin_dpo/margin_std": 18.186668395996094, "step": 89 }, { "epoch": 0.18848167539267016, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.266681671142578, "fcm_dpo/q_t": 0.47950226068496704, "grad_norm": 29.96919822692871, "learning_rate": 4.8881598109976e-07, "logits/chosen": -0.7426354885101318, "logits/rejected": -0.7515090703964233, "logps/chosen": -308.5796813964844, "logps/ref_chosen": -307.4250183105469, "logps/ref_rejected": -265.7172546386719, "logps/rejected": -275.13861083984375, "loss": 5.2548, "margin_dpo/margin_mean": 8.266682624816895, "margin_dpo/margin_std": 17.510297775268555, "step": 90 }, { "epoch": 0.1905759162303665, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.559464454650879, "fcm_dpo/q_t": 0.47642382979393005, "grad_norm": 32.85371017456055, "learning_rate": 4.882681251368548e-07, "logits/chosen": -0.6758638620376587, "logits/rejected": -0.6926856637001038, "logps/chosen": -237.77833557128906, "logps/ref_chosen": -235.74098205566406, "logps/ref_rejected": -226.6428985595703, "logps/rejected": -238.23971557617188, "loss": 5.214, "margin_dpo/margin_mean": 9.559464454650879, "margin_dpo/margin_std": 19.182907104492188, "step": 91 }, { "epoch": 0.19267015706806281, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.123077392578125, "fcm_dpo/q_t": 0.47499004006385803, "grad_norm": 33.156288146972656, "learning_rate": 4.877074915775048e-07, "logits/chosen": -0.7403147220611572, "logits/rejected": -0.72395920753479, "logps/chosen": -286.5037841796875, "logps/ref_chosen": -283.4475402832031, "logps/ref_rejected": -273.134033203125, "logps/rejected": -286.3133544921875, "loss": 5.1979, "margin_dpo/margin_mean": 10.123077392578125, "margin_dpo/margin_std": 21.20536231994629, "step": 92 }, { "epoch": 0.19476439790575917, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.612565994262695, "fcm_dpo/q_t": 0.4762864410877228, "grad_norm": 28.68975257873535, "learning_rate": 4.871341104867864e-07, "logits/chosen": -0.7337056994438171, "logits/rejected": -0.7575539350509644, "logps/chosen": -235.6643524169922, "logps/ref_chosen": -233.33714294433594, "logps/ref_rejected": -230.54273986816406, "logps/rejected": -242.48248291015625, "loss": 5.2109, "margin_dpo/margin_mean": 9.612567901611328, "margin_dpo/margin_std": 19.466064453125, "step": 93 }, { "epoch": 0.1968586387434555, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.34221076965332, "fcm_dpo/q_t": 0.47688379883766174, "grad_norm": 31.385103225708008, "learning_rate": 4.865480126133871e-07, "logits/chosen": -0.6867436766624451, "logits/rejected": -0.7081081867218018, "logps/chosen": -296.996826171875, "logps/ref_chosen": -294.6528015136719, "logps/ref_rejected": -283.657958984375, "logps/rejected": -295.34423828125, "loss": 5.2315, "margin_dpo/margin_mean": 9.342211723327637, "margin_dpo/margin_std": 21.86980628967285, "step": 94 }, { "epoch": 0.19895287958115182, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.031109809875488, "fcm_dpo/q_t": 0.47530385851860046, "grad_norm": 33.378883361816406, "learning_rate": 4.859492293879573e-07, "logits/chosen": -0.7343789339065552, "logits/rejected": -0.756463885307312, "logps/chosen": -315.03533935546875, "logps/ref_chosen": -311.6697082519531, "logps/ref_rejected": -262.7471923828125, "logps/rejected": -276.1439208984375, "loss": 5.2116, "margin_dpo/margin_mean": 10.031108856201172, "margin_dpo/margin_std": 22.576051712036133, "step": 95 }, { "epoch": 0.20104712041884817, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.250564575195312, "fcm_dpo/q_t": 0.4747813940048218, "grad_norm": 37.031070709228516, "learning_rate": 4.853377929214243e-07, "logits/chosen": -0.7045353651046753, "logits/rejected": -0.7171480655670166, "logps/chosen": -287.286376953125, "logps/ref_chosen": -282.55596923828125, "logps/ref_rejected": -242.71588134765625, "logps/rejected": -257.6968078613281, "loss": 5.204, "margin_dpo/margin_mean": 10.250566482543945, "margin_dpo/margin_std": 23.351776123046875, "step": 96 }, { "epoch": 0.2031413612565445, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 12.032766342163086, "fcm_dpo/q_t": 0.47035449743270874, "grad_norm": 32.03152847290039, "learning_rate": 4.847137360032699e-07, "logits/chosen": -0.7473950386047363, "logits/rejected": -0.7346963286399841, "logps/chosen": -307.8915100097656, "logps/ref_chosen": -303.57781982421875, "logps/ref_rejected": -264.22491455078125, "logps/rejected": -280.5714416503906, "loss": 5.1312, "margin_dpo/margin_mean": 12.032766342163086, "margin_dpo/margin_std": 22.341087341308594, "step": 97 }, { "epoch": 0.20523560209424083, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 12.27221965789795, "fcm_dpo/q_t": 0.4699545204639435, "grad_norm": 34.470611572265625, "learning_rate": 4.84077092099773e-07, "logits/chosen": -0.7738041877746582, "logits/rejected": -0.7862895131111145, "logps/chosen": -291.651611328125, "logps/ref_chosen": -286.8303527832031, "logps/ref_rejected": -278.08331298828125, "logps/rejected": -295.1767883300781, "loss": 5.1343, "margin_dpo/margin_mean": 12.272220611572266, "margin_dpo/margin_std": 22.819379806518555, "step": 98 }, { "epoch": 0.20732984293193718, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 12.655092239379883, "fcm_dpo/q_t": 0.46898141503334045, "grad_norm": 32.86789321899414, "learning_rate": 4.834278953522137e-07, "logits/chosen": -0.736225962638855, "logits/rejected": -0.7492181062698364, "logps/chosen": -285.2147521972656, "logps/ref_chosen": -279.92120361328125, "logps/ref_rejected": -250.3365478515625, "logps/rejected": -268.28515625, "loss": 5.1341, "margin_dpo/margin_mean": 12.65509033203125, "margin_dpo/margin_std": 27.144683837890625, "step": 99 }, { "epoch": 0.2094240837696335, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 12.444181442260742, "fcm_dpo/q_t": 0.4695214033126831, "grad_norm": 35.53663635253906, "learning_rate": 4.827661805750437e-07, "logits/chosen": -0.7796453833580017, "logits/rejected": -0.7926532030105591, "logps/chosen": -304.6813659667969, "logps/ref_chosen": -296.8276672363281, "logps/ref_rejected": -275.56146240234375, "logps/rejected": -295.8592529296875, "loss": 5.1268, "margin_dpo/margin_mean": 12.444183349609375, "margin_dpo/margin_std": 24.066898345947266, "step": 100 }, { "epoch": 0.21151832460732983, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 15.036481857299805, "fcm_dpo/q_t": 0.4632215201854706, "grad_norm": 32.41525650024414, "learning_rate": 4.820919832540181e-07, "logits/chosen": -0.7710584998130798, "logits/rejected": -0.7849109172821045, "logps/chosen": -257.8660888671875, "logps/ref_chosen": -252.74203491210938, "logps/ref_rejected": -276.4185485839844, "logps/rejected": -296.5791015625, "loss": 5.0411, "margin_dpo/margin_mean": 15.036481857299805, "margin_dpo/margin_std": 26.31357765197754, "step": 101 }, { "epoch": 0.2136125654450262, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 14.796443939208984, "fcm_dpo/q_t": 0.4635191857814789, "grad_norm": 32.229835510253906, "learning_rate": 4.814053395442932e-07, "logits/chosen": -0.747922420501709, "logits/rejected": -0.7440513372421265, "logps/chosen": -224.68930053710938, "logps/ref_chosen": -219.5537109375, "logps/ref_rejected": -231.90853881835938, "logps/rejected": -251.84056091308594, "loss": 5.0434, "margin_dpo/margin_mean": 14.7964448928833, "margin_dpo/margin_std": 24.94526481628418, "step": 102 }, { "epoch": 0.2157068062827225, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 13.613631248474121, "fcm_dpo/q_t": 0.46684983372688293, "grad_norm": 32.68932342529297, "learning_rate": 4.807062862684873e-07, "logits/chosen": -0.7766976356506348, "logits/rejected": -0.7736947536468506, "logps/chosen": -264.4947509765625, "logps/ref_chosen": -259.6750793457031, "logps/ref_rejected": -278.7400817871094, "logps/rejected": -297.17340087890625, "loss": 5.0901, "margin_dpo/margin_mean": 13.613631248474121, "margin_dpo/margin_std": 25.650850296020508, "step": 103 }, { "epoch": 0.21780104712041884, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.063506126403809, "fcm_dpo/q_t": 0.47534891963005066, "grad_norm": 33.11854934692383, "learning_rate": 4.799948609147061e-07, "logits/chosen": -0.7753955125808716, "logits/rejected": -0.7825241088867188, "logps/chosen": -277.03887939453125, "logps/ref_chosen": -267.9741516113281, "logps/ref_rejected": -230.5306396484375, "logps/rejected": -249.6588134765625, "loss": 5.225, "margin_dpo/margin_mean": 10.063505172729492, "margin_dpo/margin_std": 26.37413215637207, "step": 104 }, { "epoch": 0.2198952879581152, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 20.57976531982422, "fcm_dpo/q_t": 0.4495754837989807, "grad_norm": 34.676841735839844, "learning_rate": 4.792711016345321e-07, "logits/chosen": -0.7640881538391113, "logits/rejected": -0.7759814858436584, "logps/chosen": -327.5960998535156, "logps/ref_chosen": -322.25482177734375, "logps/ref_rejected": -279.02978515625, "logps/rejected": -304.9508361816406, "loss": 4.8454, "margin_dpo/margin_mean": 20.579763412475586, "margin_dpo/margin_std": 26.948394775390625, "step": 105 }, { "epoch": 0.22198952879581152, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 12.518043518066406, "fcm_dpo/q_t": 0.4693966507911682, "grad_norm": 38.22789764404297, "learning_rate": 4.785350472409791e-07, "logits/chosen": -0.7372999787330627, "logits/rejected": -0.7724757194519043, "logps/chosen": -308.64599609375, "logps/ref_chosen": -296.15777587890625, "logps/ref_rejected": -266.2691650390625, "logps/rejected": -291.275390625, "loss": 5.1512, "margin_dpo/margin_mean": 12.518043518066406, "margin_dpo/margin_std": 29.34693145751953, "step": 106 }, { "epoch": 0.22408376963350785, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 19.722835540771484, "fcm_dpo/q_t": 0.4517911970615387, "grad_norm": 38.28390121459961, "learning_rate": 4.777867372064105e-07, "logits/chosen": -0.7788010239601135, "logits/rejected": -0.77190101146698, "logps/chosen": -311.3397216796875, "logps/ref_chosen": -306.996337890625, "logps/ref_rejected": -296.79412841796875, "logps/rejected": -320.8603515625, "loss": 4.877, "margin_dpo/margin_mean": 19.722835540771484, "margin_dpo/margin_std": 27.75455665588379, "step": 107 }, { "epoch": 0.2261780104712042, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 18.265289306640625, "fcm_dpo/q_t": 0.4557679295539856, "grad_norm": 100.8703842163086, "learning_rate": 4.770262116604223e-07, "logits/chosen": -0.7604493498802185, "logits/rejected": -0.7712941765785217, "logps/chosen": -300.7414245605469, "logps/ref_chosen": -295.1526794433594, "logps/ref_rejected": -235.974853515625, "logps/rejected": -259.8288879394531, "loss": 4.9518, "margin_dpo/margin_mean": 18.265289306640625, "margin_dpo/margin_std": 30.369789123535156, "step": 108 }, { "epoch": 0.22827225130890053, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 20.25125503540039, "fcm_dpo/q_t": 0.45110049843788147, "grad_norm": 38.23166275024414, "learning_rate": 4.7625351138769166e-07, "logits/chosen": -0.7942591309547424, "logits/rejected": -0.791488528251648, "logps/chosen": -334.3830261230469, "logps/ref_chosen": -325.9248046875, "logps/ref_rejected": -279.15423583984375, "logps/rejected": -307.8637390136719, "loss": 4.8796, "margin_dpo/margin_mean": 20.25125503540039, "margin_dpo/margin_std": 31.70002555847168, "step": 109 }, { "epoch": 0.23036649214659685, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 19.04351806640625, "fcm_dpo/q_t": 0.4539196491241455, "grad_norm": 34.719669342041016, "learning_rate": 4.75468677825789e-07, "logits/chosen": -0.8007383346557617, "logits/rejected": -0.7890827059745789, "logps/chosen": -283.4249572753906, "logps/ref_chosen": -274.439208984375, "logps/ref_rejected": -260.0552062988281, "logps/rejected": -288.0845031738281, "loss": 4.9361, "margin_dpo/margin_mean": 19.043519973754883, "margin_dpo/margin_std": 33.801780700683594, "step": 110 }, { "epoch": 0.2324607329842932, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 20.36928939819336, "fcm_dpo/q_t": 0.450971394777298, "grad_norm": 38.119789123535156, "learning_rate": 4.7467175306295647e-07, "logits/chosen": -0.8362029194831848, "logits/rejected": -0.8157572150230408, "logps/chosen": -338.9130859375, "logps/ref_chosen": -329.2361755371094, "logps/ref_rejected": -287.82830810546875, "logps/rejected": -317.8745422363281, "loss": 4.8982, "margin_dpo/margin_mean": 20.369291305541992, "margin_dpo/margin_std": 34.47408676147461, "step": 111 }, { "epoch": 0.23455497382198953, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 13.103754043579102, "fcm_dpo/q_t": 0.4685131311416626, "grad_norm": 39.488136291503906, "learning_rate": 4.7386277983585053e-07, "logits/chosen": -0.7342914342880249, "logits/rejected": -0.7637506127357483, "logps/chosen": -272.73883056640625, "logps/ref_chosen": -257.0593566894531, "logps/ref_rejected": -272.9595031738281, "logps/rejected": -301.7427062988281, "loss": 5.1873, "margin_dpo/margin_mean": 13.103754997253418, "margin_dpo/margin_std": 36.33610153198242, "step": 112 }, { "epoch": 0.23664921465968586, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 24.700986862182617, "fcm_dpo/q_t": 0.44143223762512207, "grad_norm": 41.771854400634766, "learning_rate": 4.7304180152725024e-07, "logits/chosen": -0.8089713454246521, "logits/rejected": -0.8124600648880005, "logps/chosen": -298.964599609375, "logps/ref_chosen": -286.0416564941406, "logps/ref_rejected": -270.374267578125, "logps/rejected": -307.9981994628906, "loss": 4.777, "margin_dpo/margin_mean": 24.700986862182617, "margin_dpo/margin_std": 39.2578125, "step": 113 }, { "epoch": 0.2387434554973822, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 13.720559120178223, "fcm_dpo/q_t": 0.4666045606136322, "grad_norm": 40.004310607910156, "learning_rate": 4.7220886216373085e-07, "logits/chosen": -0.8449599742889404, "logits/rejected": -0.8461405038833618, "logps/chosen": -276.9638671875, "logps/ref_chosen": -260.0084533691406, "logps/ref_rejected": -246.67190551757812, "logps/rejected": -277.3478698730469, "loss": 5.1508, "margin_dpo/margin_mean": 13.720561027526855, "margin_dpo/margin_std": 35.51994323730469, "step": 114 }, { "epoch": 0.24083769633507854, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 14.000336647033691, "fcm_dpo/q_t": 0.46712926030158997, "grad_norm": 43.63201904296875, "learning_rate": 4.7136400641330245e-07, "logits/chosen": -0.8578217029571533, "logits/rejected": -0.8179551959037781, "logps/chosen": -318.00872802734375, "logps/ref_chosen": -298.8608093261719, "logps/ref_rejected": -272.1927795410156, "logps/rejected": -305.341064453125, "loss": 5.164, "margin_dpo/margin_mean": 14.000338554382324, "margin_dpo/margin_std": 38.821632385253906, "step": 115 }, { "epoch": 0.24293193717277486, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 18.14166259765625, "fcm_dpo/q_t": 0.4560409188270569, "grad_norm": 38.731719970703125, "learning_rate": 4.70507279583015e-07, "logits/chosen": -0.8598328828811646, "logits/rejected": -0.8213679790496826, "logps/chosen": -294.76617431640625, "logps/ref_chosen": -279.263916015625, "logps/ref_rejected": -253.6192169189453, "logps/rejected": -287.2631530761719, "loss": 4.9813, "margin_dpo/margin_mean": 18.14166259765625, "margin_dpo/margin_std": 35.22654724121094, "step": 116 }, { "epoch": 0.2450261780104712, "fcm_dpo/beta": 0.010019151493906975, "fcm_dpo/delta": 0.019006887450814247, "fcm_dpo/margin": 20.834585189819336, "fcm_dpo/q_t": 0.44975975155830383, "grad_norm": 42.065223693847656, "learning_rate": 4.6963872761652834e-07, "logits/chosen": -0.8122572898864746, "logits/rejected": -0.8118118643760681, "logps/chosen": -278.41455078125, "logps/ref_chosen": -259.2248840332031, "logps/ref_rejected": -229.3042755126953, "logps/rejected": -269.32855224609375, "loss": 4.8801, "margin_dpo/margin_mean": 20.834585189819336, "margin_dpo/margin_std": 32.034950256347656, "step": 117 }, { "epoch": 0.24712041884816754, "fcm_dpo/beta": 0.010153218172490597, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 23.139522552490234, "fcm_dpo/q_t": 0.4447304904460907, "grad_norm": 46.219722747802734, "learning_rate": 4.687583970916486e-07, "logits/chosen": -0.8006891012191772, "logits/rejected": -0.7873902320861816, "logps/chosen": -292.9374694824219, "logps/ref_chosen": -267.0707092285156, "logps/ref_rejected": -272.7322082519531, "logps/rejected": -321.73846435546875, "loss": 4.8612, "margin_dpo/margin_mean": 23.139522552490234, "margin_dpo/margin_std": 43.533145904541016, "step": 118 }, { "epoch": 0.24921465968586387, "fcm_dpo/beta": 0.010153218172490597, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 16.957921981811523, "fcm_dpo/q_t": 0.45950642228126526, "grad_norm": 46.27934646606445, "learning_rate": 4.6786633521783005e-07, "logits/chosen": -0.8989782333374023, "logits/rejected": -0.8991548418998718, "logps/chosen": -356.221923828125, "logps/ref_chosen": -324.6766357421875, "logps/ref_rejected": -306.0322265625, "logps/rejected": -354.5354309082031, "loss": 5.083, "margin_dpo/margin_mean": 16.95792007446289, "margin_dpo/margin_std": 41.61006164550781, "step": 119 }, { "epoch": 0.2513089005235602, "fcm_dpo/beta": 0.010216230526566505, "fcm_dpo/delta": 0.020517978817224503, "fcm_dpo/margin": 19.49388885498047, "fcm_dpo/q_t": 0.4531494379043579, "grad_norm": 42.6641731262207, "learning_rate": 4.669625898336438e-07, "logits/chosen": -0.8377653360366821, "logits/rejected": -0.8475413918495178, "logps/chosen": -343.8332214355469, "logps/ref_chosen": -315.2617492675781, "logps/ref_rejected": -265.32501220703125, "logps/rejected": -313.390380859375, "loss": 4.9857, "margin_dpo/margin_mean": 19.49388885498047, "margin_dpo/margin_std": 41.39332580566406, "step": 120 }, { "epoch": 0.2534031413612565, "fcm_dpo/beta": 0.01032125111669302, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 13.927159309387207, "fcm_dpo/q_t": 0.4664355218410492, "grad_norm": 60.31532287597656, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -0.8654758930206299, "logits/rejected": -0.8799676299095154, "logps/chosen": -256.4559631347656, "logps/ref_chosen": -222.99609375, "logps/ref_rejected": -226.92860412597656, "logps/rejected": -274.31561279296875, "loss": 5.1715, "margin_dpo/margin_mean": 13.92716121673584, "margin_dpo/margin_std": 40.628849029541016, "step": 121 }, { "epoch": 0.2554973821989529, "fcm_dpo/beta": 0.01032125111669302, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 19.776992797851562, "fcm_dpo/q_t": 0.45205867290496826, "grad_norm": 57.31346130371094, "learning_rate": 4.651202430186092e-07, "logits/chosen": -0.9148901700973511, "logits/rejected": -0.8764075636863708, "logps/chosen": -309.6811828613281, "logps/ref_chosen": -276.02630615234375, "logps/ref_rejected": -277.97418212890625, "logps/rejected": -331.4060363769531, "loss": 5.0087, "margin_dpo/margin_mean": 19.776994705200195, "margin_dpo/margin_std": 46.8244514465332, "step": 122 }, { "epoch": 0.25759162303664923, "fcm_dpo/beta": 0.01032125111669302, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 25.753007888793945, "fcm_dpo/q_t": 0.43692460656166077, "grad_norm": 50.204429626464844, "learning_rate": 4.6418174038722924e-07, "logits/chosen": -0.8277499079704285, "logits/rejected": -0.8173301219940186, "logps/chosen": -354.5268249511719, "logps/ref_chosen": -328.1546325683594, "logps/ref_rejected": -280.6911315917969, "logps/rejected": -332.8163146972656, "loss": 4.7536, "margin_dpo/margin_mean": 25.753005981445312, "margin_dpo/margin_std": 42.5488166809082, "step": 123 }, { "epoch": 0.25968586387434556, "fcm_dpo/beta": 0.01032125111669302, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 22.0130558013916, "fcm_dpo/q_t": 0.4456826448440552, "grad_norm": 54.57142639160156, "learning_rate": 4.6323175183912023e-07, "logits/chosen": -0.8554036021232605, "logits/rejected": -0.8191466927528381, "logps/chosen": -302.9939270019531, "logps/ref_chosen": -275.6961975097656, "logps/ref_rejected": -225.361572265625, "logps/rejected": -274.6723327636719, "loss": 4.8715, "margin_dpo/margin_mean": 22.01305389404297, "margin_dpo/margin_std": 39.297996520996094, "step": 124 }, { "epoch": 0.2617801047120419, "fcm_dpo/beta": 0.01032125111669302, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 19.9759578704834, "fcm_dpo/q_t": 0.4523986577987671, "grad_norm": 53.71831130981445, "learning_rate": 4.6227032831928483e-07, "logits/chosen": -0.8214420080184937, "logits/rejected": -0.7753271460533142, "logps/chosen": -306.7501220703125, "logps/ref_chosen": -278.06976318359375, "logps/ref_rejected": -265.63873291015625, "logps/rejected": -314.2950439453125, "loss": 5.0425, "margin_dpo/margin_mean": 19.9759578704834, "margin_dpo/margin_std": 50.73390579223633, "step": 125 }, { "epoch": 0.2638743455497382, "fcm_dpo/beta": 0.01032125111669302, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 22.57097816467285, "fcm_dpo/q_t": 0.4450061023235321, "grad_norm": 51.29899215698242, "learning_rate": 4.612975213859487e-07, "logits/chosen": -0.8229495286941528, "logits/rejected": -0.8314183354377747, "logps/chosen": -346.1187438964844, "logps/ref_chosen": -321.3960876464844, "logps/ref_rejected": -285.37664794921875, "logps/rejected": -332.6702880859375, "loss": 4.8884, "margin_dpo/margin_mean": 22.57097625732422, "margin_dpo/margin_std": 45.24233627319336, "step": 126 }, { "epoch": 0.26596858638743454, "fcm_dpo/beta": 0.010337848216295242, "fcm_dpo/delta": 0.008014491759240627, "fcm_dpo/margin": 26.70491600036621, "fcm_dpo/q_t": 0.4356614053249359, "grad_norm": 50.95025634765625, "learning_rate": 4.603133832077953e-07, "logits/chosen": -0.9044252634048462, "logits/rejected": -0.8535300493240356, "logps/chosen": -330.0174255371094, "logps/ref_chosen": -306.55877685546875, "logps/ref_rejected": -274.8651428222656, "logps/rejected": -325.02874755859375, "loss": 4.7648, "margin_dpo/margin_mean": 26.70491600036621, "margin_dpo/margin_std": 45.92063903808594, "step": 127 }, { "epoch": 0.2680628272251309, "fcm_dpo/beta": 0.01038763951510191, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 30.74336051940918, "fcm_dpo/q_t": 0.4257518947124481, "grad_norm": 50.56816864013672, "learning_rate": 4.5931796656116837e-07, "logits/chosen": -0.7912300825119019, "logits/rejected": -0.7867921590805054, "logps/chosen": -283.9368591308594, "logps/ref_chosen": -265.3973693847656, "logps/ref_rejected": -250.9737548828125, "logps/rejected": -300.25665283203125, "loss": 4.6379, "margin_dpo/margin_mean": 30.743362426757812, "margin_dpo/margin_std": 49.90855407714844, "step": 128 }, { "epoch": 0.27015706806282724, "fcm_dpo/beta": 0.010399233549833298, "fcm_dpo/delta": 0.011111855506896973, "fcm_dpo/margin": 27.626813888549805, "fcm_dpo/q_t": 0.4336443245410919, "grad_norm": 46.69161605834961, "learning_rate": 4.5831132482724193e-07, "logits/chosen": -0.8296740055084229, "logits/rejected": -0.8278064131736755, "logps/chosen": -323.13433837890625, "logps/ref_chosen": -303.158447265625, "logps/ref_rejected": -275.9891052246094, "logps/rejected": -323.5918273925781, "loss": 4.7427, "margin_dpo/margin_mean": 27.626811981201172, "margin_dpo/margin_std": 49.01125717163086, "step": 129 }, { "epoch": 0.27225130890052357, "fcm_dpo/beta": 0.010586390271782875, "fcm_dpo/delta": 0.01436567772179842, "fcm_dpo/margin": 27.690763473510742, "fcm_dpo/q_t": 0.43227171897888184, "grad_norm": 59.677452087402344, "learning_rate": 4.5729351198915705e-07, "logits/chosen": -0.807654619216919, "logits/rejected": -0.8454784154891968, "logps/chosen": -309.1056213378906, "logps/ref_chosen": -286.4073486328125, "logps/ref_rejected": -294.38665771484375, "logps/rejected": -344.77569580078125, "loss": 4.7332, "margin_dpo/margin_mean": 27.690763473510742, "margin_dpo/margin_std": 49.714717864990234, "step": 130 }, { "epoch": 0.2743455497382199, "fcm_dpo/beta": 0.010601533576846123, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 22.352327346801758, "fcm_dpo/q_t": 0.4453392028808594, "grad_norm": 63.08754348754883, "learning_rate": 4.5626458262912735e-07, "logits/chosen": -0.8775216937065125, "logits/rejected": -0.8249009251594543, "logps/chosen": -338.8774719238281, "logps/ref_chosen": -311.5650634765625, "logps/ref_rejected": -291.62432861328125, "logps/rejected": -341.2890625, "loss": 4.9797, "margin_dpo/margin_mean": 22.352325439453125, "margin_dpo/margin_std": 53.10045623779297, "step": 131 }, { "epoch": 0.2764397905759162, "fcm_dpo/beta": 0.010651674121618271, "fcm_dpo/delta": 0.0169695895165205, "fcm_dpo/margin": 31.339889526367188, "fcm_dpo/q_t": 0.42420607805252075, "grad_norm": 66.45543670654297, "learning_rate": 4.5522459192551166e-07, "logits/chosen": -0.837442934513092, "logits/rejected": -0.8209645748138428, "logps/chosen": -293.2777099609375, "logps/ref_chosen": -270.0818176269531, "logps/ref_rejected": -284.3084411621094, "logps/rejected": -338.8442687988281, "loss": 4.6632, "margin_dpo/margin_mean": 31.339889526367188, "margin_dpo/margin_std": 54.162715911865234, "step": 132 }, { "epoch": 0.27853403141361255, "fcm_dpo/beta": 0.010769927874207497, "fcm_dpo/delta": 0.010881779715418816, "fcm_dpo/margin": 28.75752830505371, "fcm_dpo/q_t": 0.42886942625045776, "grad_norm": 47.24089431762695, "learning_rate": 4.541735956498554e-07, "logits/chosen": -0.8648256063461304, "logits/rejected": -0.8635565042495728, "logps/chosen": -312.2882385253906, "logps/ref_chosen": -285.6213684082031, "logps/ref_rejected": -251.19386291503906, "logps/rejected": -306.6182556152344, "loss": 4.7005, "margin_dpo/margin_mean": 28.757530212402344, "margin_dpo/margin_std": 50.636070251464844, "step": 133 }, { "epoch": 0.2806282722513089, "fcm_dpo/beta": 0.01087371539324522, "fcm_dpo/delta": 0.0076601761393249035, "fcm_dpo/margin": 21.52016830444336, "fcm_dpo/q_t": 0.44513970613479614, "grad_norm": 55.61819076538086, "learning_rate": 4.5311165016389914e-07, "logits/chosen": -0.8549675345420837, "logits/rejected": -0.8537189364433289, "logps/chosen": -358.1379699707031, "logps/ref_chosen": -318.92083740234375, "logps/ref_rejected": -293.1894836425781, "logps/rejected": -353.92681884765625, "loss": 4.9157, "margin_dpo/margin_mean": 21.520170211791992, "margin_dpo/margin_std": 45.02650451660156, "step": 134 }, { "epoch": 0.28272251308900526, "fcm_dpo/beta": 0.010914881713688374, "fcm_dpo/delta": 0.003592526540160179, "fcm_dpo/margin": 25.296375274658203, "fcm_dpo/q_t": 0.43536409735679626, "grad_norm": 66.84896087646484, "learning_rate": 4.520388124165564e-07, "logits/chosen": -0.7576621174812317, "logits/rejected": -0.7966057062149048, "logps/chosen": -329.4492492675781, "logps/ref_chosen": -292.8217468261719, "logps/ref_rejected": -269.2896728515625, "logps/rejected": -331.21356201171875, "loss": 4.8392, "margin_dpo/margin_mean": 25.296375274658203, "margin_dpo/margin_std": 49.8359489440918, "step": 135 }, { "epoch": 0.2848167539267016, "fcm_dpo/beta": 0.010963549837470055, "fcm_dpo/delta": 0.007633054628968239, "fcm_dpo/margin": 28.468870162963867, "fcm_dpo/q_t": 0.4295894503593445, "grad_norm": 77.23846435546875, "learning_rate": 4.5095513994085974e-07, "logits/chosen": -0.8170275688171387, "logits/rejected": -0.8116145133972168, "logps/chosen": -312.12353515625, "logps/ref_chosen": -272.8525390625, "logps/ref_rejected": -252.68202209472656, "logps/rejected": -320.42193603515625, "loss": 4.7672, "margin_dpo/margin_mean": 28.468868255615234, "margin_dpo/margin_std": 55.25246047973633, "step": 136 }, { "epoch": 0.2869109947643979, "fcm_dpo/beta": 0.011102970689535141, "fcm_dpo/delta": 0.01760217919945717, "fcm_dpo/margin": 25.322219848632812, "fcm_dpo/q_t": 0.43689489364624023, "grad_norm": 71.75211334228516, "learning_rate": 4.498606908508753e-07, "logits/chosen": -0.8611711859703064, "logits/rejected": -0.8475285172462463, "logps/chosen": -344.4405517578125, "logps/ref_chosen": -300.7522277832031, "logps/ref_rejected": -286.1935119628906, "logps/rejected": -355.20404052734375, "loss": 4.8397, "margin_dpo/margin_mean": 25.322223663330078, "margin_dpo/margin_std": 52.45671081542969, "step": 137 }, { "epoch": 0.28900523560209423, "fcm_dpo/beta": 0.011161497794091702, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 29.333019256591797, "fcm_dpo/q_t": 0.4278327226638794, "grad_norm": 65.46996307373047, "learning_rate": 4.487555238385862e-07, "logits/chosen": -0.7847152352333069, "logits/rejected": -0.7637529969215393, "logps/chosen": -330.0584716796875, "logps/ref_chosen": -288.89056396484375, "logps/ref_rejected": -263.1719055175781, "logps/rejected": -333.67279052734375, "loss": 4.8012, "margin_dpo/margin_mean": 29.333017349243164, "margin_dpo/margin_std": 61.842342376708984, "step": 138 }, { "epoch": 0.29109947643979056, "fcm_dpo/beta": 0.011205606162548065, "fcm_dpo/delta": 0.007879176177084446, "fcm_dpo/margin": 20.918292999267578, "fcm_dpo/q_t": 0.4472315013408661, "grad_norm": 71.34123992919922, "learning_rate": 4.476396981707453e-07, "logits/chosen": -0.8044672012329102, "logits/rejected": -0.8337982892990112, "logps/chosen": -307.34002685546875, "logps/ref_chosen": -270.0443115234375, "logps/ref_rejected": -267.3226013183594, "logps/rejected": -325.53662109375, "loss": 5.0205, "margin_dpo/margin_mean": 20.918291091918945, "margin_dpo/margin_std": 52.10359573364258, "step": 139 }, { "epoch": 0.2931937172774869, "fcm_dpo/beta": 0.011320183053612709, "fcm_dpo/delta": 0.019457083195447922, "fcm_dpo/margin": 31.68231201171875, "fcm_dpo/q_t": 0.41780370473861694, "grad_norm": 61.65796661376953, "learning_rate": 4.4651327368569684e-07, "logits/chosen": -0.8818329572677612, "logits/rejected": -0.8469699621200562, "logps/chosen": -317.78314208984375, "logps/ref_chosen": -282.9555969238281, "logps/ref_rejected": -251.17181396484375, "logps/rejected": -317.681640625, "loss": 4.5506, "margin_dpo/margin_mean": 31.68231201171875, "margin_dpo/margin_std": 48.60394287109375, "step": 140 }, { "epoch": 0.29528795811518327, "fcm_dpo/beta": 0.011689888313412666, "fcm_dpo/delta": 0.0463109090924263, "fcm_dpo/margin": 32.73537826538086, "fcm_dpo/q_t": 0.4133494198322296, "grad_norm": 74.72412872314453, "learning_rate": 4.453763107901675e-07, "logits/chosen": -0.8145374059677124, "logits/rejected": -0.8087294697761536, "logps/chosen": -329.2580261230469, "logps/ref_chosen": -296.3001708984375, "logps/ref_rejected": -279.8486633300781, "logps/rejected": -345.5418701171875, "loss": 4.6074, "margin_dpo/margin_mean": 32.73537826538086, "margin_dpo/margin_std": 57.37196350097656, "step": 141 }, { "epoch": 0.2973821989528796, "fcm_dpo/beta": 0.01217513345181942, "fcm_dpo/delta": 0.061236005276441574, "fcm_dpo/margin": 26.921472549438477, "fcm_dpo/q_t": 0.42784056067466736, "grad_norm": 71.6540756225586, "learning_rate": 4.4422887045602674e-07, "logits/chosen": -0.834761381149292, "logits/rejected": -0.8356201648712158, "logps/chosen": -333.8131103515625, "logps/ref_chosen": -300.56585693359375, "logps/ref_rejected": -231.43316650390625, "logps/rejected": -291.6018981933594, "loss": 4.8387, "margin_dpo/margin_mean": 26.92147445678711, "margin_dpo/margin_std": 57.958465576171875, "step": 142 }, { "epoch": 0.2994764397905759, "fcm_dpo/beta": 0.012588088400661945, "fcm_dpo/delta": 0.025997720658779144, "fcm_dpo/margin": 32.42500305175781, "fcm_dpo/q_t": 0.4098896384239197, "grad_norm": 71.97573852539062, "learning_rate": 4.4307101421701755e-07, "logits/chosen": -0.8194795846939087, "logits/rejected": -0.7966126799583435, "logps/chosen": -329.8268737792969, "logps/ref_chosen": -296.73236083984375, "logps/ref_rejected": -266.45257568359375, "logps/rejected": -331.97210693359375, "loss": 4.4955, "margin_dpo/margin_mean": 32.42500305175781, "margin_dpo/margin_std": 51.72855758666992, "step": 143 }, { "epoch": 0.30157068062827225, "fcm_dpo/beta": 0.012575407512485981, "fcm_dpo/delta": -0.008530584163963795, "fcm_dpo/margin": 26.383615493774414, "fcm_dpo/q_t": 0.4281023442745209, "grad_norm": 65.61817932128906, "learning_rate": 4.419028041654559e-07, "logits/chosen": -0.8793942332267761, "logits/rejected": -0.8645679950714111, "logps/chosen": -331.48211669921875, "logps/ref_chosen": -298.843994140625, "logps/ref_rejected": -266.120849609375, "logps/rejected": -325.142578125, "loss": 4.8261, "margin_dpo/margin_mean": 26.38361358642578, "margin_dpo/margin_std": 56.853328704833984, "step": 144 }, { "epoch": 0.3036649214659686, "fcm_dpo/beta": 0.01254544872790575, "fcm_dpo/delta": -0.01076475065201521, "fcm_dpo/margin": 34.03130340576172, "fcm_dpo/q_t": 0.4056648015975952, "grad_norm": 67.97920989990234, "learning_rate": 4.4072430294890166e-07, "logits/chosen": -0.8655514717102051, "logits/rejected": -0.867131769657135, "logps/chosen": -303.7411193847656, "logps/ref_chosen": -275.7528381347656, "logps/ref_rejected": -214.74807739257812, "logps/rejected": -276.76763916015625, "loss": 4.4575, "margin_dpo/margin_mean": 34.031307220458984, "margin_dpo/margin_std": 52.632320404052734, "step": 145 }, { "epoch": 0.3057591623036649, "fcm_dpo/beta": 0.012334452010691166, "fcm_dpo/delta": -0.015375002287328243, "fcm_dpo/margin": 33.436424255371094, "fcm_dpo/q_t": 0.4091810882091522, "grad_norm": 65.9619369506836, "learning_rate": 4.395355737667985e-07, "logits/chosen": -0.8543013334274292, "logits/rejected": -0.8488306999206543, "logps/chosen": -313.3991394042969, "logps/ref_chosen": -277.09820556640625, "logps/ref_rejected": -265.41046142578125, "logps/rejected": -335.14776611328125, "loss": 4.5041, "margin_dpo/margin_mean": 33.436424255371094, "margin_dpo/margin_std": 52.72343826293945, "step": 146 }, { "epoch": 0.3078534031413613, "fcm_dpo/beta": 0.012323684990406036, "fcm_dpo/delta": -0.014296751469373703, "fcm_dpo/margin": 25.813636779785156, "fcm_dpo/q_t": 0.42911848425865173, "grad_norm": 70.41072845458984, "learning_rate": 4.3833668036708483e-07, "logits/chosen": -0.845245897769928, "logits/rejected": -0.8413334488868713, "logps/chosen": -329.39447021484375, "logps/ref_chosen": -291.4185791015625, "logps/ref_rejected": -253.43051147460938, "logps/rejected": -317.2200927734375, "loss": 4.8645, "margin_dpo/margin_mean": 25.813636779785156, "margin_dpo/margin_std": 54.92702865600586, "step": 147 }, { "epoch": 0.3099476439790576, "fcm_dpo/beta": 0.012206897139549255, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 28.223648071289062, "fcm_dpo/q_t": 0.4271428883075714, "grad_norm": 66.38739776611328, "learning_rate": 4.3712768704277524e-07, "logits/chosen": -0.8933055400848389, "logits/rejected": -0.8955690860748291, "logps/chosen": -270.71759033203125, "logps/ref_chosen": -236.74850463867188, "logps/ref_rejected": -231.4674072265625, "logps/rejected": -293.66009521484375, "loss": 4.8075, "margin_dpo/margin_mean": 28.223648071289062, "margin_dpo/margin_std": 61.39948654174805, "step": 148 }, { "epoch": 0.31204188481675393, "fcm_dpo/beta": 0.012314035557210445, "fcm_dpo/delta": 0.01721823401749134, "fcm_dpo/margin": 36.42420196533203, "fcm_dpo/q_t": 0.4006516933441162, "grad_norm": 77.8093032836914, "learning_rate": 4.3590865862851263e-07, "logits/chosen": -0.8710360527038574, "logits/rejected": -0.8562425374984741, "logps/chosen": -360.1137390136719, "logps/ref_chosen": -319.9284973144531, "logps/ref_rejected": -308.20233154296875, "logps/rejected": -384.81182861328125, "loss": 4.3377, "margin_dpo/margin_mean": 36.4242057800293, "margin_dpo/margin_std": 52.034080505371094, "step": 149 }, { "epoch": 0.31413612565445026, "fcm_dpo/beta": 0.012715589255094528, "fcm_dpo/delta": 0.053375184535980225, "fcm_dpo/margin": 30.288711547851562, "fcm_dpo/q_t": 0.4150834083557129, "grad_norm": 72.03448486328125, "learning_rate": 4.346796604970912e-07, "logits/chosen": -0.8501912951469421, "logits/rejected": -0.8354445099830627, "logps/chosen": -321.476806640625, "logps/ref_chosen": -276.3182373046875, "logps/ref_rejected": -273.02215576171875, "logps/rejected": -348.46942138671875, "loss": 4.6202, "margin_dpo/margin_mean": 30.288713455200195, "margin_dpo/margin_std": 54.6740837097168, "step": 150 }, { "epoch": 0.3162303664921466, "fcm_dpo/beta": 0.012854784727096558, "fcm_dpo/delta": -0.019951222464442253, "fcm_dpo/margin": 44.77876281738281, "fcm_dpo/q_t": 0.37585607171058655, "grad_norm": 79.77096557617188, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -0.871157169342041, "logits/rejected": -0.8656657338142395, "logps/chosen": -339.890869140625, "logps/ref_chosen": -297.31280517578125, "logps/ref_rejected": -266.1003723144531, "logps/rejected": -353.45721435546875, "loss": 4.0884, "margin_dpo/margin_mean": 44.77876663208008, "margin_dpo/margin_std": 57.61820983886719, "step": 151 }, { "epoch": 0.3183246073298429, "fcm_dpo/beta": 0.012488273903727531, "fcm_dpo/delta": -0.028484076261520386, "fcm_dpo/margin": 37.6032829284668, "fcm_dpo/q_t": 0.4018944799900055, "grad_norm": 67.45524597167969, "learning_rate": 4.3219201924364323e-07, "logits/chosen": -0.8691319823265076, "logits/rejected": -0.8660374879837036, "logps/chosen": -307.91668701171875, "logps/ref_chosen": -270.2470397949219, "logps/ref_rejected": -269.7749328613281, "logps/rejected": -345.04791259765625, "loss": 4.4857, "margin_dpo/margin_mean": 37.60328674316406, "margin_dpo/margin_std": 61.99601745605469, "step": 152 }, { "epoch": 0.3204188481675393, "fcm_dpo/beta": 0.01196226291358471, "fcm_dpo/delta": -0.05861516296863556, "fcm_dpo/margin": 52.48561096191406, "fcm_dpo/q_t": 0.3647117614746094, "grad_norm": 73.9798355102539, "learning_rate": 4.309335095262675e-07, "logits/chosen": -0.8589173555374146, "logits/rejected": -0.8468472957611084, "logps/chosen": -319.50592041015625, "logps/ref_chosen": -273.779052734375, "logps/ref_rejected": -280.9530944824219, "logps/rejected": -379.16558837890625, "loss": 3.8701, "margin_dpo/margin_mean": 52.48561096191406, "margin_dpo/margin_std": 58.62868881225586, "step": 153 }, { "epoch": 0.3225130890052356, "fcm_dpo/beta": 0.011798365972936153, "fcm_dpo/delta": -0.018483448773622513, "fcm_dpo/margin": 41.51868438720703, "fcm_dpo/q_t": 0.3961712121963501, "grad_norm": 70.80081176757812, "learning_rate": 4.2966529689388064e-07, "logits/chosen": -0.9012446403503418, "logits/rejected": -0.8823704719543457, "logps/chosen": -336.96453857421875, "logps/ref_chosen": -289.9031982421875, "logps/ref_rejected": -261.5166320800781, "logps/rejected": -350.0966491699219, "loss": 4.4483, "margin_dpo/margin_mean": 41.518680572509766, "margin_dpo/margin_std": 68.92434692382812, "step": 154 }, { "epoch": 0.32460732984293195, "fcm_dpo/beta": 0.01188894733786583, "fcm_dpo/delta": 0.020381543785333633, "fcm_dpo/margin": 35.36058044433594, "fcm_dpo/q_t": 0.41230636835098267, "grad_norm": 93.07943725585938, "learning_rate": 4.2838744935687716e-07, "logits/chosen": -0.8364645838737488, "logits/rejected": -0.8337617516517639, "logps/chosen": -339.5706481933594, "logps/ref_chosen": -285.8612060546875, "logps/ref_rejected": -300.1272888183594, "logps/rejected": -389.19732666015625, "loss": 4.5817, "margin_dpo/margin_mean": 35.36058044433594, "margin_dpo/margin_std": 63.83776092529297, "step": 155 }, { "epoch": 0.3267015706806283, "fcm_dpo/beta": 0.011719970963895321, "fcm_dpo/delta": -0.07219862937927246, "fcm_dpo/margin": 50.530967712402344, "fcm_dpo/q_t": 0.375670850276947, "grad_norm": 78.01712036132812, "learning_rate": 4.271000354423425e-07, "logits/chosen": -0.8607072830200195, "logits/rejected": -0.8579990863800049, "logps/chosen": -327.7187805175781, "logps/ref_chosen": -279.0354919433594, "logps/ref_rejected": -244.2198486328125, "logps/rejected": -343.43408203125, "loss": 4.2327, "margin_dpo/margin_mean": 50.530967712402344, "margin_dpo/margin_std": 74.52775573730469, "step": 156 }, { "epoch": 0.3287958115183246, "fcm_dpo/beta": 0.010926080867648125, "fcm_dpo/delta": -0.044517070055007935, "fcm_dpo/margin": 38.28815460205078, "fcm_dpo/q_t": 0.40745672583580017, "grad_norm": 64.37937927246094, "learning_rate": 4.258031241903777e-07, "logits/chosen": -0.9258842468261719, "logits/rejected": -0.9248091578483582, "logps/chosen": -324.15484619140625, "logps/ref_chosen": -270.830322265625, "logps/ref_rejected": -259.08319091796875, "logps/rejected": -350.69586181640625, "loss": 4.5384, "margin_dpo/margin_mean": 38.28815460205078, "margin_dpo/margin_std": 64.0045166015625, "step": 157 }, { "epoch": 0.3308900523560209, "fcm_dpo/beta": 0.010922886431217194, "fcm_dpo/delta": 0.0029906341806054115, "fcm_dpo/margin": 44.55851364135742, "fcm_dpo/q_t": 0.39252322912216187, "grad_norm": 67.0842056274414, "learning_rate": 4.2449678515039743e-07, "logits/chosen": -0.8688482642173767, "logits/rejected": -0.8544604778289795, "logps/chosen": -343.98077392578125, "logps/ref_chosen": -290.381103515625, "logps/ref_rejected": -271.95166015625, "logps/rejected": -370.1098937988281, "loss": 4.3355, "margin_dpo/margin_mean": 44.558509826660156, "margin_dpo/margin_std": 64.2014389038086, "step": 158 }, { "epoch": 0.33298429319371725, "fcm_dpo/beta": 0.010863966308534145, "fcm_dpo/delta": 0.010356229729950428, "fcm_dpo/margin": 28.11869239807129, "fcm_dpo/q_t": 0.4351333975791931, "grad_norm": 98.55948638916016, "learning_rate": 4.2318108837739986e-07, "logits/chosen": -0.9458408355712891, "logits/rejected": -0.8985559344291687, "logps/chosen": -377.7143859863281, "logps/ref_chosen": -321.37835693359375, "logps/ref_rejected": -250.45652770996094, "logps/rejected": -334.9112854003906, "loss": 4.9886, "margin_dpo/margin_mean": 28.118694305419922, "margin_dpo/margin_std": 68.85266876220703, "step": 159 }, { "epoch": 0.33507853403141363, "fcm_dpo/beta": 0.010856934823095798, "fcm_dpo/delta": -0.0305030420422554, "fcm_dpo/margin": 48.76349639892578, "fcm_dpo/q_t": 0.38204342126846313, "grad_norm": 76.18362426757812, "learning_rate": 4.218561044282098e-07, "logits/chosen": -0.8730629682540894, "logits/rejected": -0.8760570287704468, "logps/chosen": -323.17864990234375, "logps/ref_chosen": -276.28350830078125, "logps/ref_rejected": -262.7477722167969, "logps/rejected": -358.40643310546875, "loss": 4.0662, "margin_dpo/margin_mean": 48.76349639892578, "margin_dpo/margin_std": 56.23298645019531, "step": 160 }, { "epoch": 0.33717277486910996, "fcm_dpo/beta": 0.010607090778648853, "fcm_dpo/delta": -0.010924622416496277, "fcm_dpo/margin": 43.944374084472656, "fcm_dpo/q_t": 0.4002479612827301, "grad_norm": 78.79258728027344, "learning_rate": 4.2052190435769554e-07, "logits/chosen": -0.8944586515426636, "logits/rejected": -0.8787456750869751, "logps/chosen": -366.509521484375, "logps/ref_chosen": -310.4927978515625, "logps/ref_rejected": -250.25347900390625, "logps/rejected": -350.2145690917969, "loss": 4.4434, "margin_dpo/margin_mean": 43.944374084472656, "margin_dpo/margin_std": 71.29467010498047, "step": 161 }, { "epoch": 0.3392670157068063, "fcm_dpo/beta": 0.010460903868079185, "fcm_dpo/delta": 0.0181996151804924, "fcm_dpo/margin": 39.56256866455078, "fcm_dpo/q_t": 0.4082155227661133, "grad_norm": 61.922889709472656, "learning_rate": 4.1917855971495763e-07, "logits/chosen": -0.8656594157218933, "logits/rejected": -0.8580671548843384, "logps/chosen": -347.6031494140625, "logps/ref_chosen": -296.1105041503906, "logps/ref_rejected": -253.4247589111328, "logps/rejected": -344.4800109863281, "loss": 4.5132, "margin_dpo/margin_mean": 39.562564849853516, "margin_dpo/margin_std": 63.643367767333984, "step": 162 }, { "epoch": 0.3413612565445026, "fcm_dpo/beta": 0.01069792453199625, "fcm_dpo/delta": -0.004520459100604057, "fcm_dpo/margin": 44.61610412597656, "fcm_dpo/q_t": 0.3934933543205261, "grad_norm": 85.54950714111328, "learning_rate": 4.1782614253949255e-07, "logits/chosen": -0.9105485677719116, "logits/rejected": -0.9078636765480042, "logps/chosen": -348.55645751953125, "logps/ref_chosen": -293.5898132324219, "logps/ref_rejected": -266.951904296875, "logps/rejected": -366.53466796875, "loss": 4.2956, "margin_dpo/margin_mean": 44.61610412597656, "margin_dpo/margin_std": 61.28437042236328, "step": 163 }, { "epoch": 0.34345549738219894, "fcm_dpo/beta": 0.010653373785316944, "fcm_dpo/delta": -0.019431831315159798, "fcm_dpo/margin": 40.82433319091797, "fcm_dpo/q_t": 0.4064997732639313, "grad_norm": 79.59967803955078, "learning_rate": 4.164647253573289e-07, "logits/chosen": -0.8544159531593323, "logits/rejected": -0.8663524389266968, "logps/chosen": -331.69659423828125, "logps/ref_chosen": -267.04949951171875, "logps/ref_rejected": -215.9768829345703, "logps/rejected": -321.44830322265625, "loss": 4.5291, "margin_dpo/margin_mean": 40.82433319091797, "margin_dpo/margin_std": 69.7838134765625, "step": 164 }, { "epoch": 0.34554973821989526, "fcm_dpo/beta": 0.01054457575082779, "fcm_dpo/delta": 0.002462883247062564, "fcm_dpo/margin": 29.31476593017578, "fcm_dpo/q_t": 0.4311188757419586, "grad_norm": 81.46530151367188, "learning_rate": 4.1509438117713863e-07, "logits/chosen": -0.8851940035820007, "logits/rejected": -0.8585687875747681, "logps/chosen": -329.74737548828125, "logps/ref_chosen": -278.06146240234375, "logps/ref_rejected": -260.4288635253906, "logps/rejected": -341.4295349121094, "loss": 4.7755, "margin_dpo/margin_mean": 29.31476593017578, "margin_dpo/margin_std": 56.65957260131836, "step": 165 }, { "epoch": 0.34764397905759165, "fcm_dpo/beta": 0.010434024967253208, "fcm_dpo/delta": -0.013516011647880077, "fcm_dpo/margin": 36.80669403076172, "fcm_dpo/q_t": 0.4170481562614441, "grad_norm": 88.08253479003906, "learning_rate": 4.137151834863213e-07, "logits/chosen": -0.8408401012420654, "logits/rejected": -0.8059453964233398, "logps/chosen": -321.4761962890625, "logps/ref_chosen": -275.6466369628906, "logps/ref_rejected": -232.37017822265625, "logps/rejected": -315.0064697265625, "loss": 4.621, "margin_dpo/margin_mean": 36.80669403076172, "margin_dpo/margin_std": 67.22309875488281, "step": 166 }, { "epoch": 0.34973821989528797, "fcm_dpo/beta": 0.010469815693795681, "fcm_dpo/delta": -0.009936400689184666, "fcm_dpo/margin": 41.52671813964844, "fcm_dpo/q_t": 0.40366828441619873, "grad_norm": 92.14634704589844, "learning_rate": 4.123272062470633e-07, "logits/chosen": -0.8315152525901794, "logits/rejected": -0.8175035715103149, "logps/chosen": -333.81878662109375, "logps/ref_chosen": -280.5514221191406, "logps/ref_rejected": -255.2896728515625, "logps/rejected": -350.083740234375, "loss": 4.5055, "margin_dpo/margin_mean": 41.52671813964844, "margin_dpo/margin_std": 69.44818115234375, "step": 167 }, { "epoch": 0.3518324607329843, "fcm_dpo/beta": 0.01042957603931427, "fcm_dpo/delta": 0.012083848938345909, "fcm_dpo/margin": 46.28468322753906, "fcm_dpo/q_t": 0.39214834570884705, "grad_norm": 171.32440185546875, "learning_rate": 4.1093052389237174e-07, "logits/chosen": -0.8135068416595459, "logits/rejected": -0.7858311533927917, "logps/chosen": -371.6254577636719, "logps/ref_chosen": -315.7982177734375, "logps/ref_rejected": -291.48406982421875, "logps/rejected": -393.59600830078125, "loss": 4.3379, "margin_dpo/margin_mean": 46.28468322753906, "margin_dpo/margin_std": 68.01168823242188, "step": 168 }, { "epoch": 0.3539267015706806, "fcm_dpo/beta": 0.010296836495399475, "fcm_dpo/delta": -0.0430966354906559, "fcm_dpo/margin": 54.97410202026367, "fcm_dpo/q_t": 0.3766815960407257, "grad_norm": 106.70867919921875, "learning_rate": 4.0952521132208267e-07, "logits/chosen": -0.811809778213501, "logits/rejected": -0.8225773572921753, "logps/chosen": -314.03985595703125, "logps/ref_chosen": -261.06427001953125, "logps/ref_rejected": -235.40663146972656, "logps/rejected": -343.35638427734375, "loss": 4.0653, "margin_dpo/margin_mean": 54.97410202026367, "margin_dpo/margin_std": 67.30137634277344, "step": 169 }, { "epoch": 0.35602094240837695, "fcm_dpo/beta": 0.010233273729681969, "fcm_dpo/delta": 0.03896753489971161, "fcm_dpo/margin": 45.84815216064453, "fcm_dpo/q_t": 0.4011499881744385, "grad_norm": 97.2695083618164, "learning_rate": 4.081113438988443e-07, "logits/chosen": -0.7859560251235962, "logits/rejected": -0.7877269983291626, "logps/chosen": -361.7164001464844, "logps/ref_chosen": -308.96722412109375, "logps/ref_rejected": -263.8466796875, "logps/rejected": -362.4439697265625, "loss": 4.4936, "margin_dpo/margin_mean": 45.8481559753418, "margin_dpo/margin_std": 80.94108581542969, "step": 170 }, { "epoch": 0.3581151832460733, "fcm_dpo/beta": 0.010340461507439613, "fcm_dpo/delta": -0.0003459630534052849, "fcm_dpo/margin": 52.07539367675781, "fcm_dpo/q_t": 0.3825134336948395, "grad_norm": 103.40943908691406, "learning_rate": 4.0668899744407567e-07, "logits/chosen": -0.8086336255073547, "logits/rejected": -0.8238397240638733, "logps/chosen": -304.28411865234375, "logps/ref_chosen": -258.8890380859375, "logps/ref_rejected": -262.19140625, "logps/rejected": -359.661865234375, "loss": 4.1402, "margin_dpo/margin_mean": 52.07539367675781, "margin_dpo/margin_std": 66.66648864746094, "step": 171 }, { "epoch": 0.36020942408376966, "fcm_dpo/beta": 0.010584648698568344, "fcm_dpo/delta": 0.031594403088092804, "fcm_dpo/margin": 30.508852005004883, "fcm_dpo/q_t": 0.4273463487625122, "grad_norm": 99.63431549072266, "learning_rate": 4.0525824823390043e-07, "logits/chosen": -0.823786199092865, "logits/rejected": -0.8405004739761353, "logps/chosen": -386.2387390136719, "logps/ref_chosen": -339.0223388671875, "logps/ref_rejected": -295.78759765625, "logps/rejected": -373.5128479003906, "loss": 4.8523, "margin_dpo/margin_mean": 30.508852005004883, "margin_dpo/margin_std": 67.79949951171875, "step": 172 }, { "epoch": 0.362303664921466, "fcm_dpo/beta": 0.010532171465456486, "fcm_dpo/delta": -0.022472519427537918, "fcm_dpo/margin": 38.833194732666016, "fcm_dpo/q_t": 0.41293755173683167, "grad_norm": 131.93397521972656, "learning_rate": 4.0381917299505686e-07, "logits/chosen": -0.8346858024597168, "logits/rejected": -0.8341192603111267, "logps/chosen": -344.9473876953125, "logps/ref_chosen": -300.1114501953125, "logps/ref_rejected": -273.78460693359375, "logps/rejected": -357.4537353515625, "loss": 4.5961, "margin_dpo/margin_mean": 38.833194732666016, "margin_dpo/margin_std": 68.92523956298828, "step": 173 }, { "epoch": 0.3643979057591623, "fcm_dpo/beta": 0.010570104233920574, "fcm_dpo/delta": 0.02033688873052597, "fcm_dpo/margin": 47.59809494018555, "fcm_dpo/q_t": 0.38671064376831055, "grad_norm": 136.58636474609375, "learning_rate": 4.0237184890078243e-07, "logits/chosen": -0.8063141107559204, "logits/rejected": -0.7906365394592285, "logps/chosen": -379.1315002441406, "logps/ref_chosen": -335.0538635253906, "logps/ref_rejected": -257.4646911621094, "logps/rejected": -349.1404113769531, "loss": 4.1945, "margin_dpo/margin_mean": 47.59809494018555, "margin_dpo/margin_std": 62.02534484863281, "step": 174 }, { "epoch": 0.36649214659685864, "fcm_dpo/beta": 0.010354937054216862, "fcm_dpo/delta": -0.06971834599971771, "fcm_dpo/margin": 44.109615325927734, "fcm_dpo/q_t": 0.4042738974094391, "grad_norm": 81.95703887939453, "learning_rate": 4.00916353566676e-07, "logits/chosen": -0.8228567242622375, "logits/rejected": -0.8251509070396423, "logps/chosen": -340.4371337890625, "logps/ref_chosen": -284.39556884765625, "logps/ref_rejected": -283.3876647949219, "logps/rejected": -383.538818359375, "loss": 4.5043, "margin_dpo/margin_mean": 44.109615325927734, "margin_dpo/margin_std": 72.40750122070312, "step": 175 }, { "epoch": 0.36858638743455496, "fcm_dpo/beta": 0.01005008164793253, "fcm_dpo/delta": -0.004386642947793007, "fcm_dpo/margin": 34.69060134887695, "fcm_dpo/q_t": 0.4250302314758301, "grad_norm": 107.35453796386719, "learning_rate": 3.994527650465352e-07, "logits/chosen": -0.7942756414413452, "logits/rejected": -0.8046650886535645, "logps/chosen": -307.6514892578125, "logps/ref_chosen": -251.81280517578125, "logps/ref_rejected": -242.05328369140625, "logps/rejected": -332.58258056640625, "loss": 4.8619, "margin_dpo/margin_mean": 34.69059753417969, "margin_dpo/margin_std": 77.51087951660156, "step": 176 }, { "epoch": 0.3706806282722513, "fcm_dpo/beta": 0.010259328410029411, "fcm_dpo/delta": 0.03945356607437134, "fcm_dpo/margin": 35.12948226928711, "fcm_dpo/q_t": 0.4213239550590515, "grad_norm": 85.04834747314453, "learning_rate": 3.979811618281705e-07, "logits/chosen": -0.8936614394187927, "logits/rejected": -0.8652552366256714, "logps/chosen": -361.48468017578125, "logps/ref_chosen": -298.6463928222656, "logps/ref_rejected": -295.66534423828125, "logps/rejected": -393.63311767578125, "loss": 4.8502, "margin_dpo/margin_mean": 35.129478454589844, "margin_dpo/margin_std": 74.60753631591797, "step": 177 }, { "epoch": 0.37277486910994767, "fcm_dpo/beta": 0.010234692133963108, "fcm_dpo/delta": -0.013511069118976593, "fcm_dpo/margin": 51.200801849365234, "fcm_dpo/q_t": 0.38916733860969543, "grad_norm": 118.27215576171875, "learning_rate": 3.9650162282919654e-07, "logits/chosen": -0.8079323768615723, "logits/rejected": -0.8033620119094849, "logps/chosen": -339.77313232421875, "logps/ref_chosen": -286.2576599121094, "logps/ref_rejected": -243.97491455078125, "logps/rejected": -348.6911926269531, "loss": 4.2924, "margin_dpo/margin_mean": 51.200801849365234, "margin_dpo/margin_std": 77.08502960205078, "step": 178 }, { "epoch": 0.374869109947644, "fcm_dpo/beta": 0.010111997835338116, "fcm_dpo/delta": -0.024984102696180344, "fcm_dpo/margin": 42.48678970336914, "fcm_dpo/q_t": 0.4064781665802002, "grad_norm": 117.5345458984375, "learning_rate": 3.9501422739279953e-07, "logits/chosen": -0.7860456109046936, "logits/rejected": -0.7883044481277466, "logps/chosen": -317.0030212402344, "logps/ref_chosen": -259.737060546875, "logps/ref_rejected": -277.8813171386719, "logps/rejected": -377.634033203125, "loss": 4.5748, "margin_dpo/margin_mean": 42.486785888671875, "margin_dpo/margin_std": 74.06698608398438, "step": 179 }, { "epoch": 0.3769633507853403, "fcm_dpo/beta": 0.010198577307164669, "fcm_dpo/delta": 0.030198615044355392, "fcm_dpo/margin": 49.38009262084961, "fcm_dpo/q_t": 0.3938713073730469, "grad_norm": 118.17071533203125, "learning_rate": 3.935190552834828e-07, "logits/chosen": -0.8322474956512451, "logits/rejected": -0.8610942959785461, "logps/chosen": -325.9746398925781, "logps/ref_chosen": -267.30889892578125, "logps/ref_rejected": -230.4376983642578, "logps/rejected": -338.4835205078125, "loss": 4.3078, "margin_dpo/margin_mean": 49.380088806152344, "margin_dpo/margin_std": 73.99922180175781, "step": 180 }, { "epoch": 0.37905759162303665, "fcm_dpo/beta": 0.010158884339034557, "fcm_dpo/delta": -0.006093205884099007, "fcm_dpo/margin": 40.51472854614258, "fcm_dpo/q_t": 0.4114302098751068, "grad_norm": 144.84808349609375, "learning_rate": 3.920161866827889e-07, "logits/chosen": -0.8205707669258118, "logits/rejected": -0.8252695202827454, "logps/chosen": -367.8943176269531, "logps/ref_chosen": -300.49139404296875, "logps/ref_rejected": -278.98284912109375, "logps/rejected": -386.9005126953125, "loss": 4.6462, "margin_dpo/margin_mean": 40.51472854614258, "margin_dpo/margin_std": 76.76387786865234, "step": 181 }, { "epoch": 0.381151832460733, "fcm_dpo/beta": 0.01002179179340601, "fcm_dpo/delta": -0.06329777836799622, "fcm_dpo/margin": 57.60490798950195, "fcm_dpo/q_t": 0.3754042387008667, "grad_norm": 79.3833999633789, "learning_rate": 3.90505702185e-07, "logits/chosen": -0.8123592138290405, "logits/rejected": -0.8395570516586304, "logps/chosen": -344.8331298828125, "logps/ref_chosen": -279.4981689453125, "logps/ref_rejected": -263.6926574707031, "logps/rejected": -386.632568359375, "loss": 4.0947, "margin_dpo/margin_mean": 57.60490798950195, "margin_dpo/margin_std": 74.24735260009766, "step": 182 }, { "epoch": 0.3832460732984293, "fcm_dpo/beta": 0.009926295839250088, "fcm_dpo/delta": 0.04236668348312378, "fcm_dpo/margin": 51.719627380371094, "fcm_dpo/q_t": 0.3916701674461365, "grad_norm": 69.92806243896484, "learning_rate": 3.889876827928156e-07, "logits/chosen": -0.8526961803436279, "logits/rejected": -0.8551933169364929, "logps/chosen": -336.1162414550781, "logps/ref_chosen": -270.8456726074219, "logps/ref_rejected": -244.1910400390625, "logps/rejected": -361.18121337890625, "loss": 4.3359, "margin_dpo/margin_mean": 51.719627380371094, "margin_dpo/margin_std": 80.76448822021484, "step": 183 }, { "epoch": 0.38534031413612563, "fcm_dpo/beta": 0.009438715875148773, "fcm_dpo/delta": -0.10046012699604034, "fcm_dpo/margin": 69.27982330322266, "fcm_dpo/q_t": 0.3597652316093445, "grad_norm": 82.657958984375, "learning_rate": 3.874622099130087e-07, "logits/chosen": -0.8982101082801819, "logits/rejected": -0.8769604563713074, "logps/chosen": -382.0798645019531, "logps/ref_chosen": -318.4457702636719, "logps/ref_rejected": -266.640869140625, "logps/rejected": -399.55474853515625, "loss": 3.9421, "margin_dpo/margin_mean": 69.27982330322266, "margin_dpo/margin_std": 85.03633880615234, "step": 184 }, { "epoch": 0.387434554973822, "fcm_dpo/beta": 0.009186076931655407, "fcm_dpo/delta": -0.023270942270755768, "fcm_dpo/margin": 48.90029525756836, "fcm_dpo/q_t": 0.40330770611763, "grad_norm": 90.22915649414062, "learning_rate": 3.859293653520604e-07, "logits/chosen": -0.8841208219528198, "logits/rejected": -0.8796699643135071, "logps/chosen": -349.35546875, "logps/ref_chosen": -274.308837890625, "logps/ref_rejected": -260.7274169921875, "logps/rejected": -384.67437744140625, "loss": 4.5001, "margin_dpo/margin_mean": 48.900299072265625, "margin_dpo/margin_std": 82.05442810058594, "step": 185 }, { "epoch": 0.38952879581151834, "fcm_dpo/beta": 0.009153323248028755, "fcm_dpo/delta": -0.0018929075449705124, "fcm_dpo/margin": 46.96990203857422, "fcm_dpo/q_t": 0.4052902162075043, "grad_norm": 95.6862564086914, "learning_rate": 3.8438923131177237e-07, "logits/chosen": -0.8866674304008484, "logits/rejected": -0.8885794878005981, "logps/chosen": -373.1971130371094, "logps/ref_chosen": -299.00537109375, "logps/ref_rejected": -274.4014587402344, "logps/rejected": -395.5630798339844, "loss": 4.4928, "margin_dpo/margin_mean": 46.96990203857422, "margin_dpo/margin_std": 75.92547607421875, "step": 186 }, { "epoch": 0.39162303664921466, "fcm_dpo/beta": 0.009052860550582409, "fcm_dpo/delta": -0.005804085172712803, "fcm_dpo/margin": 43.057518005371094, "fcm_dpo/q_t": 0.4116409420967102, "grad_norm": 123.71559143066406, "learning_rate": 3.828418903848593e-07, "logits/chosen": -0.8360170722007751, "logits/rejected": -0.8213926553726196, "logps/chosen": -412.3048095703125, "logps/ref_chosen": -329.8253173828125, "logps/ref_rejected": -263.73175048828125, "logps/rejected": -389.26885986328125, "loss": 4.7538, "margin_dpo/margin_mean": 43.057518005371094, "margin_dpo/margin_std": 86.98685455322266, "step": 187 }, { "epoch": 0.393717277486911, "fcm_dpo/beta": 0.009060696698725224, "fcm_dpo/delta": -0.0018207728862762451, "fcm_dpo/margin": 48.631187438964844, "fcm_dpo/q_t": 0.4070327579975128, "grad_norm": 76.27377319335938, "learning_rate": 3.812874255505191e-07, "logits/chosen": -0.8485522270202637, "logits/rejected": -0.8396183848381042, "logps/chosen": -338.3583679199219, "logps/ref_chosen": -263.005615234375, "logps/ref_rejected": -247.08668518066406, "logps/rejected": -371.07061767578125, "loss": 4.6237, "margin_dpo/margin_mean": 48.63118362426758, "margin_dpo/margin_std": 90.66398620605469, "step": 188 }, { "epoch": 0.3958115183246073, "fcm_dpo/beta": 0.008920717053115368, "fcm_dpo/delta": -0.02324105054140091, "fcm_dpo/margin": 59.242305755615234, "fcm_dpo/q_t": 0.3857063353061676, "grad_norm": 112.7400894165039, "learning_rate": 3.797259201699833e-07, "logits/chosen": -0.876733660697937, "logits/rejected": -0.8841363787651062, "logps/chosen": -335.7334289550781, "logps/ref_chosen": -272.96038818359375, "logps/ref_rejected": -275.13238525390625, "logps/rejected": -397.14776611328125, "loss": 4.1634, "margin_dpo/margin_mean": 59.24230194091797, "margin_dpo/margin_std": 76.81705474853516, "step": 189 }, { "epoch": 0.39790575916230364, "fcm_dpo/beta": 0.0087926359847188, "fcm_dpo/delta": -0.005176635459065437, "fcm_dpo/margin": 52.0158805847168, "fcm_dpo/q_t": 0.3995116353034973, "grad_norm": 87.53999328613281, "learning_rate": 3.781574579820464e-07, "logits/chosen": -0.8722460865974426, "logits/rejected": -0.8310267329216003, "logps/chosen": -322.8664855957031, "logps/ref_chosen": -257.79754638671875, "logps/ref_rejected": -225.2164306640625, "logps/rejected": -342.3012390136719, "loss": 4.3655, "margin_dpo/margin_mean": 52.01588821411133, "margin_dpo/margin_std": 76.1653823852539, "step": 190 }, { "epoch": 0.4, "fcm_dpo/beta": 0.008890870027244091, "fcm_dpo/delta": 0.013219781219959259, "fcm_dpo/margin": 52.1411247253418, "fcm_dpo/q_t": 0.3991526961326599, "grad_norm": 74.29290008544922, "learning_rate": 3.765821230985757e-07, "logits/chosen": -0.9063390493392944, "logits/rejected": -0.9074532985687256, "logps/chosen": -303.6692199707031, "logps/ref_chosen": -243.8585205078125, "logps/ref_rejected": -245.12136840820312, "logps/rejected": -357.0732116699219, "loss": 4.4241, "margin_dpo/margin_mean": 52.1411247253418, "margin_dpo/margin_std": 83.34513092041016, "step": 191 }, { "epoch": 0.40209424083769635, "fcm_dpo/beta": 0.009091068990528584, "fcm_dpo/delta": 0.029404528439044952, "fcm_dpo/margin": 40.7011833190918, "fcm_dpo/q_t": 0.4178800880908966, "grad_norm": 74.3216552734375, "learning_rate": 3.75e-07, "logits/chosen": -0.8373446464538574, "logits/rejected": -0.8207670450210571, "logps/chosen": -337.1842346191406, "logps/ref_chosen": -266.9799499511719, "logps/ref_rejected": -260.1697082519531, "logps/rejected": -371.0751953125, "loss": 4.6603, "margin_dpo/margin_mean": 40.7011833190918, "margin_dpo/margin_std": 74.03218841552734, "step": 192 }, { "epoch": 0.4041884816753927, "fcm_dpo/beta": 0.009255967102944851, "fcm_dpo/delta": 0.02564959228038788, "fcm_dpo/margin": 50.345237731933594, "fcm_dpo/q_t": 0.39963340759277344, "grad_norm": 78.43025970458984, "learning_rate": 3.734111735307796e-07, "logits/chosen": -0.8874344229698181, "logits/rejected": -0.8585877418518066, "logps/chosen": -360.2878723144531, "logps/ref_chosen": -280.25323486328125, "logps/ref_rejected": -291.0348815917969, "logps/rejected": -421.4147644042969, "loss": 4.4668, "margin_dpo/margin_mean": 50.345237731933594, "margin_dpo/margin_std": 81.91041564941406, "step": 193 }, { "epoch": 0.406282722513089, "fcm_dpo/beta": 0.009216805920004845, "fcm_dpo/delta": -0.022374922409653664, "fcm_dpo/margin": 39.54142379760742, "fcm_dpo/q_t": 0.42112547159194946, "grad_norm": 116.94824981689453, "learning_rate": 3.7181572889485623e-07, "logits/chosen": -0.8712892532348633, "logits/rejected": -0.8622381091117859, "logps/chosen": -370.13507080078125, "logps/ref_chosen": -288.4075927734375, "logps/ref_rejected": -251.57994079589844, "logps/rejected": -372.848876953125, "loss": 4.7329, "margin_dpo/margin_mean": 39.54142761230469, "margin_dpo/margin_std": 77.82317352294922, "step": 194 }, { "epoch": 0.4083769633507853, "fcm_dpo/beta": 0.009137854911386967, "fcm_dpo/delta": -0.01154034398496151, "fcm_dpo/margin": 37.4809455871582, "fcm_dpo/q_t": 0.4273977279663086, "grad_norm": 94.07014465332031, "learning_rate": 3.7021375165108377e-07, "logits/chosen": -0.8473997116088867, "logits/rejected": -0.8510259985923767, "logps/chosen": -356.85504150390625, "logps/ref_chosen": -274.0006408691406, "logps/ref_rejected": -280.22723388671875, "logps/rejected": -400.5626220703125, "loss": 4.7994, "margin_dpo/margin_mean": 37.4809455871582, "margin_dpo/margin_std": 78.94889831542969, "step": 195 }, { "epoch": 0.41047120418848165, "fcm_dpo/beta": 0.009222757071256638, "fcm_dpo/delta": 0.01410084217786789, "fcm_dpo/margin": 49.72914123535156, "fcm_dpo/q_t": 0.40148258209228516, "grad_norm": 82.66631317138672, "learning_rate": 3.6860532770864005e-07, "logits/chosen": -0.8405277132987976, "logits/rejected": -0.8463934659957886, "logps/chosen": -343.28216552734375, "logps/ref_chosen": -274.90069580078125, "logps/ref_rejected": -248.7281951904297, "logps/rejected": -366.8388366699219, "loss": 4.4567, "margin_dpo/margin_mean": 49.72914123535156, "margin_dpo/margin_std": 82.37889862060547, "step": 196 }, { "epoch": 0.41256544502617803, "fcm_dpo/beta": 0.009043055586516857, "fcm_dpo/delta": -0.04608980193734169, "fcm_dpo/margin": 59.12012481689453, "fcm_dpo/q_t": 0.38405507802963257, "grad_norm": 109.01643371582031, "learning_rate": 3.6699054332241985e-07, "logits/chosen": -0.8721888661384583, "logits/rejected": -0.8570114374160767, "logps/chosen": -382.73541259765625, "logps/ref_chosen": -309.5348205566406, "logps/ref_rejected": -264.3179931640625, "logps/rejected": -396.638671875, "loss": 4.1834, "margin_dpo/margin_mean": 59.1201171875, "margin_dpo/margin_std": 76.47515106201172, "step": 197 }, { "epoch": 0.41465968586387436, "fcm_dpo/beta": 0.008807329460978508, "fcm_dpo/delta": -0.0007833493873476982, "fcm_dpo/margin": 58.406890869140625, "fcm_dpo/q_t": 0.3895660638809204, "grad_norm": 90.92684936523438, "learning_rate": 3.653694850884091e-07, "logits/chosen": -0.874003529548645, "logits/rejected": -0.8490350842475891, "logps/chosen": -369.50592041015625, "logps/ref_chosen": -301.0134582519531, "logps/ref_rejected": -292.84185791015625, "logps/rejected": -419.7412109375, "loss": 4.2752, "margin_dpo/margin_mean": 58.40689468383789, "margin_dpo/margin_std": 84.98297119140625, "step": 198 }, { "epoch": 0.4167539267015707, "fcm_dpo/beta": 0.009006940759718418, "fcm_dpo/delta": 0.032412342727184296, "fcm_dpo/margin": 52.57870864868164, "fcm_dpo/q_t": 0.39799413084983826, "grad_norm": 107.88542938232422, "learning_rate": 3.6374223993904124e-07, "logits/chosen": -0.8546149730682373, "logits/rejected": -0.8131856918334961, "logps/chosen": -339.248291015625, "logps/ref_chosen": -264.6058654785156, "logps/ref_rejected": -214.9014892578125, "logps/rejected": -342.1226806640625, "loss": 4.3915, "margin_dpo/margin_mean": 52.57870864868164, "margin_dpo/margin_std": 82.29186248779297, "step": 199 }, { "epoch": 0.418848167539267, "fcm_dpo/beta": 0.008955798111855984, "fcm_dpo/delta": -0.03517484664916992, "fcm_dpo/margin": 43.835540771484375, "fcm_dpo/q_t": 0.4171965718269348, "grad_norm": 166.77725219726562, "learning_rate": 3.621088951385353e-07, "logits/chosen": -0.9009106159210205, "logits/rejected": -0.8782452344894409, "logps/chosen": -407.1960754394531, "logps/ref_chosen": -324.1588134765625, "logps/ref_rejected": -277.80218505859375, "logps/rejected": -404.67498779296875, "loss": 4.8277, "margin_dpo/margin_mean": 43.835540771484375, "margin_dpo/margin_std": 94.47071075439453, "step": 200 }, { "epoch": 0.42094240837696334, "fcm_dpo/beta": 0.008827144280076027, "fcm_dpo/delta": -0.013941485434770584, "fcm_dpo/margin": 48.97315216064453, "fcm_dpo/q_t": 0.40686681866645813, "grad_norm": 110.23241424560547, "learning_rate": 3.604695382782159e-07, "logits/chosen": -0.8766977190971375, "logits/rejected": -0.8654013276100159, "logps/chosen": -361.5707092285156, "logps/ref_chosen": -271.49566650390625, "logps/ref_rejected": -245.71414184570312, "logps/rejected": -384.7623291015625, "loss": 4.5951, "margin_dpo/margin_mean": 48.97315216064453, "margin_dpo/margin_std": 88.29933166503906, "step": 201 }, { "epoch": 0.42303664921465967, "fcm_dpo/beta": 0.008783853612840176, "fcm_dpo/delta": 0.012613123282790184, "fcm_dpo/margin": 49.992881774902344, "fcm_dpo/q_t": 0.40671300888061523, "grad_norm": 97.78633117675781, "learning_rate": 3.588242572718162e-07, "logits/chosen": -0.8849156498908997, "logits/rejected": -0.8734662532806396, "logps/chosen": -359.2434387207031, "logps/ref_chosen": -272.0979309082031, "logps/ref_rejected": -235.94805908203125, "logps/rejected": -373.08648681640625, "loss": 4.5931, "margin_dpo/margin_mean": 49.99287796020508, "margin_dpo/margin_std": 91.31502532958984, "step": 202 }, { "epoch": 0.42513089005235605, "fcm_dpo/beta": 0.009002954699099064, "fcm_dpo/delta": 0.018306914716959, "fcm_dpo/margin": 37.65985870361328, "fcm_dpo/q_t": 0.42487096786499023, "grad_norm": 88.12516021728516, "learning_rate": 3.571731403507635e-07, "logits/chosen": -0.8546892404556274, "logits/rejected": -0.862351655960083, "logps/chosen": -375.03314208984375, "logps/ref_chosen": -280.2221374511719, "logps/ref_rejected": -251.79798889160156, "logps/rejected": -384.26885986328125, "loss": 4.8132, "margin_dpo/margin_mean": 37.65985870361328, "margin_dpo/margin_std": 79.15308380126953, "step": 203 }, { "epoch": 0.4272251308900524, "fcm_dpo/beta": 0.009114697575569153, "fcm_dpo/delta": 0.0330776572227478, "fcm_dpo/margin": 56.529239654541016, "fcm_dpo/q_t": 0.3886546194553375, "grad_norm": 108.35669708251953, "learning_rate": 3.5551627605944746e-07, "logits/chosen": -0.9038645625114441, "logits/rejected": -0.8774609565734863, "logps/chosen": -403.1597900390625, "logps/ref_chosen": -318.7960510253906, "logps/ref_rejected": -269.69921875, "logps/rejected": -410.59222412109375, "loss": 4.2776, "margin_dpo/margin_mean": 56.529239654541016, "margin_dpo/margin_std": 82.768798828125, "step": 204 }, { "epoch": 0.4293193717277487, "fcm_dpo/beta": 0.009092997759580612, "fcm_dpo/delta": -0.04461819678544998, "fcm_dpo/margin": 64.81192779541016, "fcm_dpo/q_t": 0.3769741952419281, "grad_norm": 94.5504379272461, "learning_rate": 3.5385375325047163e-07, "logits/chosen": -0.8502748012542725, "logits/rejected": -0.8185191750526428, "logps/chosen": -365.49652099609375, "logps/ref_chosen": -283.7620544433594, "logps/ref_rejected": -297.69439697265625, "logps/rejected": -444.24078369140625, "loss": 4.1451, "margin_dpo/margin_mean": 64.81192779541016, "margin_dpo/margin_std": 90.32294464111328, "step": 205 }, { "epoch": 0.431413612565445, "fcm_dpo/beta": 0.008791347965598106, "fcm_dpo/delta": -0.023827582597732544, "fcm_dpo/margin": 51.878639221191406, "fcm_dpo/q_t": 0.4022713601589203, "grad_norm": 111.39179992675781, "learning_rate": 3.5218566107988867e-07, "logits/chosen": -0.8792861104011536, "logits/rejected": -0.901611864566803, "logps/chosen": -378.02874755859375, "logps/ref_chosen": -293.66387939453125, "logps/ref_rejected": -291.3056640625, "logps/rejected": -427.5492248535156, "loss": 4.557, "margin_dpo/margin_mean": 51.878639221191406, "margin_dpo/margin_std": 91.19255065917969, "step": 206 }, { "epoch": 0.43350785340314135, "fcm_dpo/beta": 0.008793886750936508, "fcm_dpo/delta": 0.03257821500301361, "fcm_dpo/margin": 48.695133209228516, "fcm_dpo/q_t": 0.4082852602005005, "grad_norm": 128.17391967773438, "learning_rate": 3.505120890024195e-07, "logits/chosen": -0.8236503005027771, "logits/rejected": -0.8320968747138977, "logps/chosen": -345.8304138183594, "logps/ref_chosen": -270.5350646972656, "logps/ref_rejected": -278.7747497558594, "logps/rejected": -402.7652282714844, "loss": 4.7188, "margin_dpo/margin_mean": 48.695133209228516, "margin_dpo/margin_std": 98.31978607177734, "step": 207 }, { "epoch": 0.4356020942408377, "fcm_dpo/beta": 0.008899745531380177, "fcm_dpo/delta": -0.023412320762872696, "fcm_dpo/margin": 61.56591796875, "fcm_dpo/q_t": 0.38415008783340454, "grad_norm": 79.3017807006836, "learning_rate": 3.4883312676665534e-07, "logits/chosen": -0.8775085806846619, "logits/rejected": -0.8293582201004028, "logps/chosen": -359.858642578125, "logps/ref_chosen": -279.582763671875, "logps/ref_rejected": -290.041015625, "logps/rejected": -431.8828125, "loss": 4.1762, "margin_dpo/margin_mean": 61.565914154052734, "margin_dpo/margin_std": 86.33106994628906, "step": 208 }, { "epoch": 0.437696335078534, "fcm_dpo/beta": 0.00878224615007639, "fcm_dpo/delta": 0.0022584167309105396, "fcm_dpo/margin": 38.80632781982422, "fcm_dpo/q_t": 0.42601528763771057, "grad_norm": 98.5056381225586, "learning_rate": 3.4714886441024573e-07, "logits/chosen": -0.7886694669723511, "logits/rejected": -0.7872456312179565, "logps/chosen": -404.982177734375, "logps/ref_chosen": -318.8725280761719, "logps/ref_rejected": -270.64324951171875, "logps/rejected": -395.55926513671875, "loss": 4.8535, "margin_dpo/margin_mean": 38.80632781982422, "margin_dpo/margin_std": 85.54448699951172, "step": 209 }, { "epoch": 0.4397905759162304, "fcm_dpo/beta": 0.008630942553281784, "fcm_dpo/delta": -0.02474762126803398, "fcm_dpo/margin": 52.63178253173828, "fcm_dpo/q_t": 0.4012606739997864, "grad_norm": 83.42438507080078, "learning_rate": 3.454593922550693e-07, "logits/chosen": -0.8290054798126221, "logits/rejected": -0.8178330659866333, "logps/chosen": -358.53643798828125, "logps/ref_chosen": -283.14031982421875, "logps/ref_rejected": -287.2986755371094, "logps/rejected": -415.3265686035156, "loss": 4.4553, "margin_dpo/margin_mean": 52.63178253173828, "margin_dpo/margin_std": 86.18547058105469, "step": 210 }, { "epoch": 0.4418848167539267, "fcm_dpo/beta": 0.008338711224496365, "fcm_dpo/delta": -0.06341198086738586, "fcm_dpo/margin": 64.17230987548828, "fcm_dpo/q_t": 0.3805708587169647, "grad_norm": 77.78020477294922, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -0.8488391041755676, "logits/rejected": -0.8283941149711609, "logps/chosen": -342.4434509277344, "logps/ref_chosen": -276.4228515625, "logps/ref_rejected": -252.40603637695312, "logps/rejected": -382.59893798828125, "loss": 4.086, "margin_dpo/margin_mean": 64.17230224609375, "margin_dpo/margin_std": 74.36666107177734, "step": 211 }, { "epoch": 0.44397905759162304, "fcm_dpo/beta": 0.008269790560007095, "fcm_dpo/delta": 0.03665412217378616, "fcm_dpo/margin": 43.63727951049805, "fcm_dpo/q_t": 0.41719359159469604, "grad_norm": 91.59362030029297, "learning_rate": 3.4206518122800055e-07, "logits/chosen": -0.8333749771118164, "logits/rejected": -0.8325668573379517, "logps/chosen": -348.72747802734375, "logps/ref_chosen": -271.7055358886719, "logps/ref_rejected": -241.18511962890625, "logps/rejected": -361.84442138671875, "loss": 4.6904, "margin_dpo/margin_mean": 43.63727951049805, "margin_dpo/margin_std": 80.35758209228516, "step": 212 }, { "epoch": 0.44607329842931936, "fcm_dpo/beta": 0.008552048355340958, "fcm_dpo/delta": 0.023182792589068413, "fcm_dpo/margin": 47.81290817260742, "fcm_dpo/q_t": 0.41327401995658875, "grad_norm": 95.78131866455078, "learning_rate": 3.403606243773448e-07, "logits/chosen": -0.8298773765563965, "logits/rejected": -0.8453171849250793, "logps/chosen": -382.4115295410156, "logps/ref_chosen": -302.2976379394531, "logps/ref_rejected": -303.6202087402344, "logps/rejected": -431.5469970703125, "loss": 4.6235, "margin_dpo/margin_mean": 47.81290054321289, "margin_dpo/margin_std": 89.57734680175781, "step": 213 }, { "epoch": 0.4481675392670157, "fcm_dpo/beta": 0.008502138778567314, "fcm_dpo/delta": -0.031188862398266792, "fcm_dpo/margin": 54.75181579589844, "fcm_dpo/q_t": 0.39819973707199097, "grad_norm": 102.20772552490234, "learning_rate": 3.3865122176063385e-07, "logits/chosen": -0.8368493318557739, "logits/rejected": -0.8382536172866821, "logps/chosen": -367.3687438964844, "logps/ref_chosen": -272.13262939453125, "logps/ref_rejected": -294.82354736328125, "logps/rejected": -444.81146240234375, "loss": 4.3495, "margin_dpo/margin_mean": 54.75181579589844, "margin_dpo/margin_std": 77.65438842773438, "step": 214 }, { "epoch": 0.450261780104712, "fcm_dpo/beta": 0.008280987851321697, "fcm_dpo/delta": -0.03984394669532776, "fcm_dpo/margin": 41.743927001953125, "fcm_dpo/q_t": 0.4267158508300781, "grad_norm": 97.85367584228516, "learning_rate": 3.3693706504794243e-07, "logits/chosen": -0.8805428743362427, "logits/rejected": -0.8621854186058044, "logps/chosen": -387.81982421875, "logps/ref_chosen": -291.3782958984375, "logps/ref_rejected": -261.05792236328125, "logps/rejected": -399.2433166503906, "loss": 4.8072, "margin_dpo/margin_mean": 41.74393081665039, "margin_dpo/margin_std": 86.34469604492188, "step": 215 }, { "epoch": 0.4523560209424084, "fcm_dpo/beta": 0.008214929141104221, "fcm_dpo/delta": 0.013393443077802658, "fcm_dpo/margin": 53.72854995727539, "fcm_dpo/q_t": 0.40446725487709045, "grad_norm": 105.10110473632812, "learning_rate": 3.3521824616429284e-07, "logits/chosen": -0.8951680660247803, "logits/rejected": -0.8882848024368286, "logps/chosen": -429.6124572753906, "logps/ref_chosen": -338.50543212890625, "logps/ref_rejected": -305.76104736328125, "logps/rejected": -450.5965881347656, "loss": 4.5592, "margin_dpo/margin_mean": 53.728546142578125, "margin_dpo/margin_std": 95.15604400634766, "step": 216 }, { "epoch": 0.4544502617801047, "fcm_dpo/beta": 0.008185570128262043, "fcm_dpo/delta": 0.013848704285919666, "fcm_dpo/margin": 65.41835021972656, "fcm_dpo/q_t": 0.3857192099094391, "grad_norm": 82.3330307006836, "learning_rate": 3.334948572847253e-07, "logits/chosen": -0.8043022751808167, "logits/rejected": -0.7672190070152283, "logps/chosen": -390.0213928222656, "logps/ref_chosen": -293.5498046875, "logps/ref_rejected": -256.7830810546875, "logps/rejected": -418.6730651855469, "loss": 4.2157, "margin_dpo/margin_mean": 65.41835021972656, "margin_dpo/margin_std": 92.30606079101562, "step": 217 }, { "epoch": 0.45654450261780105, "fcm_dpo/beta": 0.008206600323319435, "fcm_dpo/delta": 0.0029644761234521866, "fcm_dpo/margin": 62.74656295776367, "fcm_dpo/q_t": 0.38812729716300964, "grad_norm": 92.55133819580078, "learning_rate": 3.317669908293554e-07, "logits/chosen": -0.8422492742538452, "logits/rejected": -0.8610657453536987, "logps/chosen": -415.7991027832031, "logps/ref_chosen": -320.579345703125, "logps/ref_rejected": -294.0381164550781, "logps/rejected": -452.00445556640625, "loss": 4.2441, "margin_dpo/margin_mean": 62.74656295776367, "margin_dpo/margin_std": 89.6981201171875, "step": 218 }, { "epoch": 0.4586387434554974, "fcm_dpo/beta": 0.008203094825148582, "fcm_dpo/delta": -0.02315128594636917, "fcm_dpo/margin": 59.49258041381836, "fcm_dpo/q_t": 0.39354074001312256, "grad_norm": 100.95577239990234, "learning_rate": 3.300347394584172e-07, "logits/chosen": -0.8339366316795349, "logits/rejected": -0.8600754141807556, "logps/chosen": -366.7186279296875, "logps/ref_chosen": -268.4186096191406, "logps/ref_rejected": -265.7808837890625, "logps/rejected": -423.57342529296875, "loss": 4.3875, "margin_dpo/margin_mean": 59.49258041381836, "margin_dpo/margin_std": 91.78353118896484, "step": 219 }, { "epoch": 0.4607329842931937, "fcm_dpo/beta": 0.008344794623553753, "fcm_dpo/delta": 0.034630343317985535, "fcm_dpo/margin": 63.56485366821289, "fcm_dpo/q_t": 0.38438913226127625, "grad_norm": 139.32931518554688, "learning_rate": 3.2829819606729477e-07, "logits/chosen": -0.8834062218666077, "logits/rejected": -0.8546550869941711, "logps/chosen": -409.75164794921875, "logps/ref_chosen": -312.8864440917969, "logps/ref_rejected": -259.5191955566406, "logps/rejected": -419.9492492675781, "loss": 4.2254, "margin_dpo/margin_mean": 63.56485366821289, "margin_dpo/margin_std": 89.70089721679688, "step": 220 }, { "epoch": 0.46282722513089003, "fcm_dpo/beta": 0.008402268402278423, "fcm_dpo/delta": -0.024994712322950363, "fcm_dpo/margin": 48.53934860229492, "fcm_dpo/q_t": 0.4136922061443329, "grad_norm": 94.52365112304688, "learning_rate": 3.265574537815398e-07, "logits/chosen": -0.8016759157180786, "logits/rejected": -0.81261146068573, "logps/chosen": -406.11309814453125, "logps/ref_chosen": -300.32586669921875, "logps/ref_rejected": -286.312255859375, "logps/rejected": -440.63885498046875, "loss": 4.67, "margin_dpo/margin_mean": 48.53934860229492, "margin_dpo/margin_std": 92.02408599853516, "step": 221 }, { "epoch": 0.4649214659685864, "fcm_dpo/beta": 0.008166534826159477, "fcm_dpo/delta": -0.01898489147424698, "fcm_dpo/margin": 52.806861877441406, "fcm_dpo/q_t": 0.40435272455215454, "grad_norm": 104.70159149169922, "learning_rate": 3.248126059518784e-07, "logits/chosen": -0.8911526203155518, "logits/rejected": -0.8636682629585266, "logps/chosen": -396.1815490722656, "logps/ref_chosen": -297.1113586425781, "logps/ref_rejected": -235.53146362304688, "logps/rejected": -387.40850830078125, "loss": 4.4779, "margin_dpo/margin_mean": 52.806861877441406, "margin_dpo/margin_std": 84.82585144042969, "step": 222 }, { "epoch": 0.46701570680628274, "fcm_dpo/beta": 0.008165441453456879, "fcm_dpo/delta": 0.014042757451534271, "fcm_dpo/margin": 55.89503479003906, "fcm_dpo/q_t": 0.3992462754249573, "grad_norm": 84.36212921142578, "learning_rate": 3.230637461492043e-07, "logits/chosen": -0.8363898992538452, "logits/rejected": -0.8041598200798035, "logps/chosen": -383.9892272949219, "logps/ref_chosen": -286.41510009765625, "logps/ref_rejected": -241.1181640625, "logps/rejected": -394.58734130859375, "loss": 4.39, "margin_dpo/margin_mean": 55.89503860473633, "margin_dpo/margin_std": 85.06916046142578, "step": 223 }, { "epoch": 0.46910994764397906, "fcm_dpo/beta": 0.008199742995202541, "fcm_dpo/delta": -0.024815764278173447, "fcm_dpo/margin": 61.84333038330078, "fcm_dpo/q_t": 0.389165461063385, "grad_norm": 104.28144073486328, "learning_rate": 3.213109681595612e-07, "logits/chosen": -0.802398145198822, "logits/rejected": -0.8188230395317078, "logps/chosen": -336.8028259277344, "logps/ref_chosen": -249.49234008789062, "logps/ref_rejected": -233.10752868652344, "logps/rejected": -382.2613525390625, "loss": 4.2198, "margin_dpo/margin_mean": 61.84333038330078, "margin_dpo/margin_std": 82.82916259765625, "step": 224 }, { "epoch": 0.4712041884816754, "fcm_dpo/beta": 0.008005239069461823, "fcm_dpo/delta": -0.003811831586062908, "fcm_dpo/margin": 54.48173904418945, "fcm_dpo/q_t": 0.40706250071525574, "grad_norm": 104.64070129394531, "learning_rate": 3.1955436597911315e-07, "logits/chosen": -0.8437448143959045, "logits/rejected": -0.819525957107544, "logps/chosen": -413.5852966308594, "logps/ref_chosen": -311.8583679199219, "logps/ref_rejected": -336.8523864746094, "logps/rejected": -493.06103515625, "loss": 4.5478, "margin_dpo/margin_mean": 54.48173904418945, "margin_dpo/margin_std": 95.53211975097656, "step": 225 }, { "epoch": 0.4732984293193717, "fcm_dpo/beta": 0.007992172613739967, "fcm_dpo/delta": -0.002601095475256443, "fcm_dpo/margin": 51.423744201660156, "fcm_dpo/q_t": 0.41092240810394287, "grad_norm": 82.05426788330078, "learning_rate": 3.1779403380910425e-07, "logits/chosen": -0.8683302402496338, "logits/rejected": -0.8574090003967285, "logps/chosen": -337.87457275390625, "logps/ref_chosen": -252.20123291015625, "logps/ref_rejected": -254.41162109375, "logps/rejected": -391.5086975097656, "loss": 4.5394, "margin_dpo/margin_mean": 51.423744201660156, "margin_dpo/margin_std": 88.69334411621094, "step": 226 }, { "epoch": 0.47539267015706804, "fcm_dpo/beta": 0.007951832376420498, "fcm_dpo/delta": -0.0021827910095453262, "fcm_dpo/margin": 58.96784973144531, "fcm_dpo/q_t": 0.3964974880218506, "grad_norm": 76.2507553100586, "learning_rate": 3.160300660508064e-07, "logits/chosen": -0.8247069120407104, "logits/rejected": -0.8209613561630249, "logps/chosen": -369.0705871582031, "logps/ref_chosen": -285.25946044921875, "logps/ref_rejected": -261.3220520019531, "logps/rejected": -404.10101318359375, "loss": 4.3982, "margin_dpo/margin_mean": 58.96784973144531, "margin_dpo/margin_std": 91.02167510986328, "step": 227 }, { "epoch": 0.4774869109947644, "fcm_dpo/beta": 0.007981422357261181, "fcm_dpo/delta": -0.030416294932365417, "fcm_dpo/margin": 59.90766143798828, "fcm_dpo/q_t": 0.39538905024528503, "grad_norm": 87.2895736694336, "learning_rate": 3.1426255730045695e-07, "logits/chosen": -0.844511091709137, "logits/rejected": -0.8116718530654907, "logps/chosen": -387.2881164550781, "logps/ref_chosen": -313.81878662109375, "logps/ref_rejected": -258.07061767578125, "logps/rejected": -391.4476013183594, "loss": 4.2818, "margin_dpo/margin_mean": 59.90766143798828, "margin_dpo/margin_std": 81.06597137451172, "step": 228 }, { "epoch": 0.47958115183246075, "fcm_dpo/beta": 0.007695622276514769, "fcm_dpo/delta": -0.013841855339705944, "fcm_dpo/margin": 64.4743881225586, "fcm_dpo/q_t": 0.3899773061275482, "grad_norm": 108.06454467773438, "learning_rate": 3.1249160234418644e-07, "logits/chosen": -0.8274182677268982, "logits/rejected": -0.8372348546981812, "logps/chosen": -377.1920166015625, "logps/ref_chosen": -291.9707946777344, "logps/ref_rejected": -263.42059326171875, "logps/rejected": -413.1162414550781, "loss": 4.2288, "margin_dpo/margin_mean": 64.4743881225586, "margin_dpo/margin_std": 85.52989196777344, "step": 229 }, { "epoch": 0.4816753926701571, "fcm_dpo/beta": 0.007751506753265858, "fcm_dpo/delta": 0.03374722972512245, "fcm_dpo/margin": 60.86343765258789, "fcm_dpo/q_t": 0.3971996009349823, "grad_norm": 71.40199279785156, "learning_rate": 3.1071729615293424e-07, "logits/chosen": -0.8689441680908203, "logits/rejected": -0.8685297966003418, "logps/chosen": -308.78057861328125, "logps/ref_chosen": -233.2601318359375, "logps/ref_rejected": -238.922119140625, "logps/rejected": -375.3060302734375, "loss": 4.3431, "margin_dpo/margin_mean": 60.863441467285156, "margin_dpo/margin_std": 91.31082153320312, "step": 230 }, { "epoch": 0.4837696335078534, "fcm_dpo/beta": 0.007850628346204758, "fcm_dpo/delta": -0.01595788449048996, "fcm_dpo/margin": 49.51227569580078, "fcm_dpo/q_t": 0.41266465187072754, "grad_norm": 78.52806091308594, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -0.8488875031471252, "logits/rejected": -0.8367761969566345, "logps/chosen": -408.9139099121094, "logps/ref_chosen": -322.1551818847656, "logps/ref_rejected": -280.97613525390625, "logps/rejected": -417.24713134765625, "loss": 4.5434, "margin_dpo/margin_mean": 49.51227569580078, "margin_dpo/margin_std": 79.67231750488281, "step": 231 }, { "epoch": 0.48586387434554973, "fcm_dpo/beta": 0.0078697195276618, "fcm_dpo/delta": -0.003923982381820679, "fcm_dpo/margin": 52.60862350463867, "fcm_dpo/q_t": 0.4088551998138428, "grad_norm": 140.39244079589844, "learning_rate": 3.071590108427243e-07, "logits/chosen": -0.8175272941589355, "logits/rejected": -0.7981524467468262, "logps/chosen": -354.7743225097656, "logps/ref_chosen": -271.7437744140625, "logps/ref_rejected": -249.94981384277344, "logps/rejected": -385.58905029296875, "loss": 4.5563, "margin_dpo/margin_mean": 52.608619689941406, "margin_dpo/margin_std": 87.00882720947266, "step": 232 }, { "epoch": 0.48795811518324606, "fcm_dpo/beta": 0.007669942919164896, "fcm_dpo/delta": -0.040378112345933914, "fcm_dpo/margin": 58.76781463623047, "fcm_dpo/q_t": 0.40030309557914734, "grad_norm": 71.58476257324219, "learning_rate": 3.05375222543809e-07, "logits/chosen": -0.8641107678413391, "logits/rejected": -0.8540716767311096, "logps/chosen": -365.4216003417969, "logps/ref_chosen": -285.3423156738281, "logps/ref_rejected": -266.34320068359375, "logps/rejected": -405.1903076171875, "loss": 4.3662, "margin_dpo/margin_mean": 58.7678108215332, "margin_dpo/margin_std": 83.56101989746094, "step": 233 }, { "epoch": 0.4900523560209424, "fcm_dpo/beta": 0.007635914720594883, "fcm_dpo/delta": 0.028283506631851196, "fcm_dpo/margin": 50.3465461730957, "fcm_dpo/q_t": 0.41592836380004883, "grad_norm": 68.07914733886719, "learning_rate": 3.035884646397637e-07, "logits/chosen": -0.8356366753578186, "logits/rejected": -0.8209684491157532, "logps/chosen": -379.02130126953125, "logps/ref_chosen": -294.9057312011719, "logps/ref_rejected": -299.37054443359375, "logps/rejected": -433.8326416015625, "loss": 4.6745, "margin_dpo/margin_mean": 50.3465461730957, "margin_dpo/margin_std": 95.68152618408203, "step": 234 }, { "epoch": 0.49214659685863876, "fcm_dpo/beta": 0.007906999439001083, "fcm_dpo/delta": 0.039126671850681305, "fcm_dpo/margin": 60.62733840942383, "fcm_dpo/q_t": 0.39487558603286743, "grad_norm": 72.7540512084961, "learning_rate": 3.017988329489923e-07, "logits/chosen": -0.8502811789512634, "logits/rejected": -0.8423137068748474, "logps/chosen": -369.0730285644531, "logps/ref_chosen": -289.49755859375, "logps/ref_rejected": -247.55076599121094, "logps/rejected": -387.7535400390625, "loss": 4.3199, "margin_dpo/margin_mean": 60.627342224121094, "margin_dpo/margin_std": 89.68913269042969, "step": 235 }, { "epoch": 0.4942408376963351, "fcm_dpo/beta": 0.008040757849812508, "fcm_dpo/delta": 0.023043854162096977, "fcm_dpo/margin": 54.43937301635742, "fcm_dpo/q_t": 0.40379512310028076, "grad_norm": 90.92185974121094, "learning_rate": 3.000064234440111e-07, "logits/chosen": -0.8824329376220703, "logits/rejected": -0.8828592896461487, "logps/chosen": -365.1532287597656, "logps/ref_chosen": -288.8846435546875, "logps/ref_rejected": -242.0452880859375, "logps/rejected": -372.7532043457031, "loss": 4.433, "margin_dpo/margin_mean": 54.43937301635742, "margin_dpo/margin_std": 85.27285766601562, "step": 236 }, { "epoch": 0.4963350785340314, "fcm_dpo/beta": 0.008117102086544037, "fcm_dpo/delta": -0.0058396486565470695, "fcm_dpo/margin": 55.81697082519531, "fcm_dpo/q_t": 0.40049877762794495, "grad_norm": 84.8370361328125, "learning_rate": 2.9821133224630223e-07, "logits/chosen": -0.8333731293678284, "logits/rejected": -0.8125811815261841, "logps/chosen": -347.8975524902344, "logps/ref_chosen": -265.47869873046875, "logps/ref_rejected": -267.9891357421875, "logps/rejected": -406.22491455078125, "loss": 4.3497, "margin_dpo/margin_mean": 55.81697082519531, "margin_dpo/margin_std": 81.0350112915039, "step": 237 }, { "epoch": 0.49842931937172774, "fcm_dpo/beta": 0.008078444749116898, "fcm_dpo/delta": 0.00038408301770687103, "fcm_dpo/margin": 53.928340911865234, "fcm_dpo/q_t": 0.40683451294898987, "grad_norm": 89.60171508789062, "learning_rate": 2.964136556211588e-07, "logits/chosen": -0.8460046052932739, "logits/rejected": -0.8157504200935364, "logps/chosen": -401.5897521972656, "logps/ref_chosen": -312.0026550292969, "logps/ref_rejected": -270.0257263183594, "logps/rejected": -413.5411376953125, "loss": 4.4321, "margin_dpo/margin_mean": 53.928340911865234, "margin_dpo/margin_std": 84.81220245361328, "step": 238 }, { "epoch": 0.5005235602094241, "fcm_dpo/beta": 0.008109199814498425, "fcm_dpo/delta": 0.005069888196885586, "fcm_dpo/margin": 51.8012580871582, "fcm_dpo/q_t": 0.4107738733291626, "grad_norm": 78.47299194335938, "learning_rate": 2.946134899725226e-07, "logits/chosen": -0.829999566078186, "logits/rejected": -0.8703840374946594, "logps/chosen": -344.9981994628906, "logps/ref_chosen": -266.9936218261719, "logps/ref_rejected": -276.13525390625, "logps/rejected": -405.9410705566406, "loss": 4.6567, "margin_dpo/margin_mean": 51.80126190185547, "margin_dpo/margin_std": 100.10111999511719, "step": 239 }, { "epoch": 0.5026178010471204, "fcm_dpo/beta": 0.008296947926282883, "fcm_dpo/delta": 0.03266207128763199, "fcm_dpo/margin": 59.96100616455078, "fcm_dpo/q_t": 0.3940165638923645, "grad_norm": 78.5995864868164, "learning_rate": 2.9281093183781403e-07, "logits/chosen": -0.9000136852264404, "logits/rejected": -0.8934633135795593, "logps/chosen": -367.1950988769531, "logps/ref_chosen": -286.0997619628906, "logps/ref_rejected": -256.9459533691406, "logps/rejected": -398.0023193359375, "loss": 4.3154, "margin_dpo/margin_mean": 59.96100616455078, "margin_dpo/margin_std": 90.11138153076172, "step": 240 }, { "epoch": 0.5047120418848168, "fcm_dpo/beta": 0.008294462226331234, "fcm_dpo/delta": -0.001803908497095108, "fcm_dpo/margin": 46.42206573486328, "fcm_dpo/q_t": 0.4186059236526489, "grad_norm": 87.16766357421875, "learning_rate": 2.910060778827554e-07, "logits/chosen": -0.8022783994674683, "logits/rejected": -0.7795644998550415, "logps/chosen": -354.74188232421875, "logps/ref_chosen": -260.6881408691406, "logps/ref_rejected": -250.02915954589844, "logps/rejected": -390.5049743652344, "loss": 4.6915, "margin_dpo/margin_mean": 46.422061920166016, "margin_dpo/margin_std": 88.32010650634766, "step": 241 }, { "epoch": 0.506806282722513, "fcm_dpo/beta": 0.008265901356935501, "fcm_dpo/delta": 0.011965340934693813, "fcm_dpo/margin": 57.2177619934082, "fcm_dpo/q_t": 0.3987177908420563, "grad_norm": 129.31790161132812, "learning_rate": 2.891990248961871e-07, "logits/chosen": -0.8726097345352173, "logits/rejected": -0.8508012890815735, "logps/chosen": -352.75592041015625, "logps/ref_chosen": -270.51397705078125, "logps/ref_rejected": -244.8560791015625, "logps/rejected": -384.3157653808594, "loss": 4.3126, "margin_dpo/margin_mean": 57.2177619934082, "margin_dpo/margin_std": 83.8380126953125, "step": 242 }, { "epoch": 0.5089005235602094, "fcm_dpo/beta": 0.008414202369749546, "fcm_dpo/delta": -0.02754206582903862, "fcm_dpo/margin": 64.92880249023438, "fcm_dpo/q_t": 0.3845221698284149, "grad_norm": 101.96147918701172, "learning_rate": 2.873898697848762e-07, "logits/chosen": -0.8713305592536926, "logits/rejected": -0.8565847873687744, "logps/chosen": -403.90106201171875, "logps/ref_chosen": -324.68206787109375, "logps/ref_rejected": -307.1111755371094, "logps/rejected": -451.2590026855469, "loss": 4.1495, "margin_dpo/margin_mean": 64.92879486083984, "margin_dpo/margin_std": 87.35710906982422, "step": 243 }, { "epoch": 0.5109947643979058, "fcm_dpo/beta": 0.008167761377990246, "fcm_dpo/delta": -0.007273124065250158, "fcm_dpo/margin": 65.28921508789062, "fcm_dpo/q_t": 0.38221222162246704, "grad_norm": 113.77725219726562, "learning_rate": 2.8557870956832133e-07, "logits/chosen": -0.8651580810546875, "logits/rejected": -0.8182386755943298, "logps/chosen": -400.8844299316406, "logps/ref_chosen": -318.979248046875, "logps/ref_rejected": -269.67572021484375, "logps/rejected": -416.8700866699219, "loss": 4.1858, "margin_dpo/margin_mean": 65.28921508789062, "margin_dpo/margin_std": 85.85536193847656, "step": 244 }, { "epoch": 0.5130890052356021, "fcm_dpo/beta": 0.008117292076349258, "fcm_dpo/delta": -0.022571483626961708, "fcm_dpo/margin": 65.36834716796875, "fcm_dpo/q_t": 0.3831850290298462, "grad_norm": 71.29036712646484, "learning_rate": 2.837656413735479e-07, "logits/chosen": -0.8694513440132141, "logits/rejected": -0.8749232292175293, "logps/chosen": -376.11077880859375, "logps/ref_chosen": -294.8980712890625, "logps/ref_rejected": -239.8111114501953, "logps/rejected": -386.3921813964844, "loss": 4.1545, "margin_dpo/margin_mean": 65.36834716796875, "margin_dpo/margin_std": 83.06272888183594, "step": 245 }, { "epoch": 0.5151832460732985, "fcm_dpo/beta": 0.007969960570335388, "fcm_dpo/delta": -0.008928188122808933, "fcm_dpo/margin": 44.71929931640625, "fcm_dpo/q_t": 0.4241969585418701, "grad_norm": 86.78738403320312, "learning_rate": 2.8195076242990116e-07, "logits/chosen": -0.8513779044151306, "logits/rejected": -0.8571193218231201, "logps/chosen": -380.00701904296875, "logps/ref_chosen": -280.6854248046875, "logps/ref_rejected": -253.65382385253906, "logps/rejected": -397.6946716308594, "loss": 4.8125, "margin_dpo/margin_mean": 44.71929931640625, "margin_dpo/margin_std": 96.33696746826172, "step": 246 }, { "epoch": 0.5172774869109947, "fcm_dpo/beta": 0.007945312187075615, "fcm_dpo/delta": -0.0081523098051548, "fcm_dpo/margin": 59.00965881347656, "fcm_dpo/q_t": 0.3986579179763794, "grad_norm": 63.86665725708008, "learning_rate": 2.801341700638307e-07, "logits/chosen": -0.8496901392936707, "logits/rejected": -0.8478038311004639, "logps/chosen": -373.80157470703125, "logps/ref_chosen": -281.1091003417969, "logps/ref_rejected": -260.3700866699219, "logps/rejected": -412.0722351074219, "loss": 4.3814, "margin_dpo/margin_mean": 59.0096549987793, "margin_dpo/margin_std": 89.11226654052734, "step": 247 }, { "epoch": 0.5193717277486911, "fcm_dpo/beta": 0.007763488218188286, "fcm_dpo/delta": -0.02389615960419178, "fcm_dpo/margin": 55.78809356689453, "fcm_dpo/q_t": 0.4041425585746765, "grad_norm": 117.25294494628906, "learning_rate": 2.7831596169367227e-07, "logits/chosen": -0.8269961476325989, "logits/rejected": -0.8366529941558838, "logps/chosen": -363.4480895996094, "logps/ref_chosen": -270.318359375, "logps/ref_rejected": -233.46778869628906, "logps/rejected": -382.3856201171875, "loss": 4.4813, "margin_dpo/margin_mean": 55.78809356689453, "margin_dpo/margin_std": 88.58773803710938, "step": 248 }, { "epoch": 0.5214659685863874, "fcm_dpo/beta": 0.00790868978947401, "fcm_dpo/delta": 0.03692461922764778, "fcm_dpo/margin": 48.326904296875, "fcm_dpo/q_t": 0.4165218472480774, "grad_norm": 106.95769500732422, "learning_rate": 2.7649623482442274e-07, "logits/chosen": -0.8559276461601257, "logits/rejected": -0.8305518627166748, "logps/chosen": -385.6744689941406, "logps/ref_chosen": -275.8088684082031, "logps/ref_rejected": -243.45138549804688, "logps/rejected": -401.6439514160156, "loss": 4.7438, "margin_dpo/margin_mean": 48.326904296875, "margin_dpo/margin_std": 99.37535095214844, "step": 249 }, { "epoch": 0.5235602094240838, "fcm_dpo/beta": 0.00772194704040885, "fcm_dpo/delta": -0.05862649157643318, "fcm_dpo/margin": 67.97274017333984, "fcm_dpo/q_t": 0.38753461837768555, "grad_norm": 97.94071197509766, "learning_rate": 2.7467508704251135e-07, "logits/chosen": -0.8564193248748779, "logits/rejected": -0.8508210182189941, "logps/chosen": -403.1372375488281, "logps/ref_chosen": -291.68524169921875, "logps/ref_rejected": -284.5358581542969, "logps/rejected": -463.9606018066406, "loss": 4.3415, "margin_dpo/margin_mean": 67.97273254394531, "margin_dpo/margin_std": 104.22421264648438, "step": 250 }, { "epoch": 0.5256544502617801, "fcm_dpo/beta": 0.007520149927586317, "fcm_dpo/delta": -0.030201872810721397, "fcm_dpo/margin": 60.210289001464844, "fcm_dpo/q_t": 0.40624505281448364, "grad_norm": 90.0475082397461, "learning_rate": 2.7285261601056697e-07, "logits/chosen": -0.8678263425827026, "logits/rejected": -0.8515715003013611, "logps/chosen": -383.1903991699219, "logps/ref_chosen": -281.736572265625, "logps/ref_rejected": -255.9419708251953, "logps/rejected": -417.6060485839844, "loss": 4.4574, "margin_dpo/margin_mean": 60.210289001464844, "margin_dpo/margin_std": 97.2637939453125, "step": 251 }, { "epoch": 0.5277486910994764, "fcm_dpo/beta": 0.007303354796022177, "fcm_dpo/delta": -0.024126823991537094, "fcm_dpo/margin": 57.908607482910156, "fcm_dpo/q_t": 0.40556401014328003, "grad_norm": 121.3109130859375, "learning_rate": 2.7102891946217994e-07, "logits/chosen": -0.906019926071167, "logits/rejected": -0.8782291412353516, "logps/chosen": -409.6369323730469, "logps/ref_chosen": -295.9674072265625, "logps/ref_rejected": -280.111572265625, "logps/rejected": -451.689697265625, "loss": 4.5379, "margin_dpo/margin_mean": 57.90860366821289, "margin_dpo/margin_std": 97.96647644042969, "step": 252 }, { "epoch": 0.5298429319371728, "fcm_dpo/beta": 0.007306728512048721, "fcm_dpo/delta": 0.008086594752967358, "fcm_dpo/margin": 57.006065368652344, "fcm_dpo/q_t": 0.4107561707496643, "grad_norm": 95.82548522949219, "learning_rate": 2.692040951966617e-07, "logits/chosen": -0.8742294311523438, "logits/rejected": -0.8646455407142639, "logps/chosen": -400.19940185546875, "logps/ref_chosen": -277.072265625, "logps/ref_rejected": -247.31643676757812, "logps/rejected": -427.44964599609375, "loss": 4.5858, "margin_dpo/margin_mean": 57.006065368652344, "margin_dpo/margin_std": 100.61224365234375, "step": 253 }, { "epoch": 0.5319371727748691, "fcm_dpo/beta": 0.007321351673454046, "fcm_dpo/delta": -0.030212795361876488, "fcm_dpo/margin": 61.80467224121094, "fcm_dpo/q_t": 0.4016120135784149, "grad_norm": 80.6700439453125, "learning_rate": 2.6737824107379947e-07, "logits/chosen": -0.8140766620635986, "logits/rejected": -0.7952826619148254, "logps/chosen": -390.226806640625, "logps/ref_chosen": -269.9478454589844, "logps/ref_rejected": -249.45005798339844, "logps/rejected": -431.5336608886719, "loss": 4.4121, "margin_dpo/margin_mean": 61.8046760559082, "margin_dpo/margin_std": 94.67742919921875, "step": 254 }, { "epoch": 0.5340314136125655, "fcm_dpo/beta": 0.007317520212382078, "fcm_dpo/delta": 0.02305128611624241, "fcm_dpo/margin": 71.82103729248047, "fcm_dpo/q_t": 0.38811203837394714, "grad_norm": 86.81169128417969, "learning_rate": 2.655514550086086e-07, "logits/chosen": -0.8274251222610474, "logits/rejected": -0.78985595703125, "logps/chosen": -420.36663818359375, "logps/ref_chosen": -306.6552734375, "logps/ref_rejected": -254.47528076171875, "logps/rejected": -440.00762939453125, "loss": 4.333, "margin_dpo/margin_mean": 71.82103729248047, "margin_dpo/margin_std": 111.82829284667969, "step": 255 }, { "epoch": 0.5361256544502618, "fcm_dpo/beta": 0.007091246545314789, "fcm_dpo/delta": -0.07956443727016449, "fcm_dpo/margin": 65.31961822509766, "fcm_dpo/q_t": 0.3946439325809479, "grad_norm": 143.94879150390625, "learning_rate": 2.6372383496608186e-07, "logits/chosen": -0.8587058782577515, "logits/rejected": -0.8423352837562561, "logps/chosen": -442.55224609375, "logps/ref_chosen": -323.7181701660156, "logps/ref_rejected": -254.1871337890625, "logps/rejected": -438.3408203125, "loss": 4.5335, "margin_dpo/margin_mean": 65.31961059570312, "margin_dpo/margin_std": 108.02703094482422, "step": 256 }, { "epoch": 0.5382198952879581, "fcm_dpo/beta": 0.007004152052104473, "fcm_dpo/delta": 0.047338955104351044, "fcm_dpo/margin": 67.73786926269531, "fcm_dpo/q_t": 0.39716964960098267, "grad_norm": 107.15489959716797, "learning_rate": 2.618954789559356e-07, "logits/chosen": -0.8520160913467407, "logits/rejected": -0.8423279523849487, "logps/chosen": -389.7987365722656, "logps/ref_chosen": -267.21209716796875, "logps/ref_rejected": -249.12579345703125, "logps/rejected": -439.4503173828125, "loss": 4.3327, "margin_dpo/margin_mean": 67.73786926269531, "margin_dpo/margin_std": 101.2127456665039, "step": 257 }, { "epoch": 0.5403141361256545, "fcm_dpo/beta": 0.007009489927440882, "fcm_dpo/delta": -0.022928498685359955, "fcm_dpo/margin": 67.92435455322266, "fcm_dpo/q_t": 0.39396271109580994, "grad_norm": 94.51057434082031, "learning_rate": 2.600664850273538e-07, "logits/chosen": -0.8741627335548401, "logits/rejected": -0.8431136608123779, "logps/chosen": -408.9303894042969, "logps/ref_chosen": -277.6827392578125, "logps/ref_rejected": -250.73385620117188, "logps/rejected": -449.90582275390625, "loss": 4.3035, "margin_dpo/margin_mean": 67.92435455322266, "margin_dpo/margin_std": 93.82572937011719, "step": 258 }, { "epoch": 0.5424083769633508, "fcm_dpo/beta": 0.007051707711070776, "fcm_dpo/delta": 0.012171324342489243, "fcm_dpo/margin": 62.30729675292969, "fcm_dpo/q_t": 0.4041876196861267, "grad_norm": 89.43299102783203, "learning_rate": 2.582369512637302e-07, "logits/chosen": -0.8937543630599976, "logits/rejected": -0.8885373473167419, "logps/chosen": -417.1524658203125, "logps/ref_chosen": -294.6099853515625, "logps/ref_rejected": -272.2725830078125, "logps/rejected": -457.1224060058594, "loss": 4.4307, "margin_dpo/margin_mean": 62.30729675292969, "margin_dpo/margin_std": 97.35728454589844, "step": 259 }, { "epoch": 0.5445026178010471, "fcm_dpo/beta": 0.0070616197772324085, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 30.28175163269043, "fcm_dpo/q_t": 0.45381462574005127, "grad_norm": 80.50341796875, "learning_rate": 2.5640697577740815e-07, "logits/chosen": -0.8794345259666443, "logits/rejected": -0.8733927011489868, "logps/chosen": -418.9019775390625, "logps/ref_chosen": -290.85711669921875, "logps/ref_rejected": -277.5970153808594, "logps/rejected": -435.9236755371094, "loss": 5.1902, "margin_dpo/margin_mean": 30.28175163269043, "margin_dpo/margin_std": 95.94783782958984, "step": 260 }, { "epoch": 0.5465968586387434, "fcm_dpo/beta": 0.0070557305589318275, "fcm_dpo/delta": -0.003128012176603079, "fcm_dpo/margin": 55.079566955566406, "fcm_dpo/q_t": 0.41656798124313354, "grad_norm": 77.27826690673828, "learning_rate": 2.5457665670441937e-07, "logits/chosen": -0.7756448984146118, "logits/rejected": -0.7851826548576355, "logps/chosen": -384.649169921875, "logps/ref_chosen": -251.13223266601562, "logps/ref_rejected": -244.76016235351562, "logps/rejected": -433.3566589355469, "loss": 4.6971, "margin_dpo/margin_mean": 55.079559326171875, "margin_dpo/margin_std": 107.6162109375, "step": 261 }, { "epoch": 0.5486910994764398, "fcm_dpo/beta": 0.007122871000319719, "fcm_dpo/delta": -0.004592832177877426, "fcm_dpo/margin": 71.38398742675781, "fcm_dpo/q_t": 0.3885217308998108, "grad_norm": 100.85935974121094, "learning_rate": 2.527460921992209e-07, "logits/chosen": -0.8093084096908569, "logits/rejected": -0.7977215051651001, "logps/chosen": -422.3883972167969, "logps/ref_chosen": -299.7217712402344, "logps/ref_rejected": -277.0969543457031, "logps/rejected": -471.1475524902344, "loss": 4.2029, "margin_dpo/margin_mean": 71.38399505615234, "margin_dpo/margin_std": 94.25498962402344, "step": 262 }, { "epoch": 0.5507853403141362, "fcm_dpo/beta": 0.0070396289229393005, "fcm_dpo/delta": 0.003985295072197914, "fcm_dpo/margin": 61.07171630859375, "fcm_dpo/q_t": 0.4044322073459625, "grad_norm": 107.648681640625, "learning_rate": 2.509153804294318e-07, "logits/chosen": -0.792377769947052, "logits/rejected": -0.7732011079788208, "logps/chosen": -410.0021667480469, "logps/ref_chosen": -280.1349792480469, "logps/ref_rejected": -256.7151184082031, "logps/rejected": -447.654052734375, "loss": 4.5065, "margin_dpo/margin_mean": 61.07171630859375, "margin_dpo/margin_std": 97.67864227294922, "step": 263 }, { "epoch": 0.5528795811518324, "fcm_dpo/beta": 0.006962607614696026, "fcm_dpo/delta": -0.005255230236798525, "fcm_dpo/margin": 73.86559295654297, "fcm_dpo/q_t": 0.388028621673584, "grad_norm": 77.60498046875, "learning_rate": 2.4908461957056825e-07, "logits/chosen": -0.8271849155426025, "logits/rejected": -0.821992039680481, "logps/chosen": -383.2393798828125, "logps/ref_chosen": -260.53509521484375, "logps/ref_rejected": -255.53799438476562, "logps/rejected": -452.1078796386719, "loss": 4.2021, "margin_dpo/margin_mean": 73.86559295654297, "margin_dpo/margin_std": 98.95625305175781, "step": 264 }, { "epoch": 0.5549738219895288, "fcm_dpo/beta": 0.006922694388777018, "fcm_dpo/delta": -0.033377017825841904, "fcm_dpo/margin": 74.40999603271484, "fcm_dpo/q_t": 0.38931435346603394, "grad_norm": 80.55166625976562, "learning_rate": 2.4725390780077905e-07, "logits/chosen": -0.8924263119697571, "logits/rejected": -0.8955501317977905, "logps/chosen": -409.32769775390625, "logps/ref_chosen": -283.7130432128906, "logps/ref_rejected": -270.3209533691406, "logps/rejected": -470.3455505371094, "loss": 4.2945, "margin_dpo/margin_mean": 74.40999603271484, "margin_dpo/margin_std": 109.31571960449219, "step": 265 }, { "epoch": 0.5570680628272251, "fcm_dpo/beta": 0.006731396075338125, "fcm_dpo/delta": -0.037259288132190704, "fcm_dpo/margin": 73.79714965820312, "fcm_dpo/q_t": 0.38967522978782654, "grad_norm": 68.00286865234375, "learning_rate": 2.454233432955807e-07, "logits/chosen": -0.9042876362800598, "logits/rejected": -0.8750791549682617, "logps/chosen": -390.12872314453125, "logps/ref_chosen": -278.09930419921875, "logps/ref_rejected": -260.6734619140625, "logps/rejected": -446.5, "loss": 4.1997, "margin_dpo/margin_mean": 73.79714965820312, "margin_dpo/margin_std": 93.62163543701172, "step": 266 }, { "epoch": 0.5591623036649215, "fcm_dpo/beta": 0.006746482569724321, "fcm_dpo/delta": 0.04334060102701187, "fcm_dpo/margin": 60.28854751586914, "fcm_dpo/q_t": 0.40969720482826233, "grad_norm": 72.49388885498047, "learning_rate": 2.435930242225919e-07, "logits/chosen": -0.8423041701316833, "logits/rejected": -0.8496881127357483, "logps/chosen": -408.0243225097656, "logps/ref_chosen": -280.33319091796875, "logps/ref_rejected": -247.78099060058594, "logps/rejected": -435.7605895996094, "loss": 4.5149, "margin_dpo/margin_mean": 60.28854751586914, "margin_dpo/margin_std": 97.47137451171875, "step": 267 }, { "epoch": 0.5612565445026177, "fcm_dpo/beta": 0.006985923275351524, "fcm_dpo/delta": 0.02039477974176407, "fcm_dpo/margin": 73.69112396240234, "fcm_dpo/q_t": 0.38772106170654297, "grad_norm": 83.59169006347656, "learning_rate": 2.4176304873626984e-07, "logits/chosen": -0.8112634420394897, "logits/rejected": -0.7900372743606567, "logps/chosen": -424.6039733886719, "logps/ref_chosen": -304.1787109375, "logps/ref_rejected": -272.80316162109375, "logps/rejected": -466.9195556640625, "loss": 4.2079, "margin_dpo/margin_mean": 73.69112396240234, "margin_dpo/margin_std": 99.36200714111328, "step": 268 }, { "epoch": 0.5633507853403141, "fcm_dpo/beta": 0.007065373472869396, "fcm_dpo/delta": 0.04509638249874115, "fcm_dpo/margin": 58.13600158691406, "fcm_dpo/q_t": 0.4082724452018738, "grad_norm": 88.63798522949219, "learning_rate": 2.399335149726463e-07, "logits/chosen": -0.8427159190177917, "logits/rejected": -0.8367486000061035, "logps/chosen": -372.8246154785156, "logps/ref_chosen": -249.84512329101562, "logps/ref_rejected": -223.37356567382812, "logps/rejected": -404.4891052246094, "loss": 4.5646, "margin_dpo/margin_mean": 58.13600158691406, "margin_dpo/margin_std": 101.92466735839844, "step": 269 }, { "epoch": 0.5654450261780105, "fcm_dpo/beta": 0.007279932964593172, "fcm_dpo/delta": 0.024306561797857285, "fcm_dpo/margin": 65.43903350830078, "fcm_dpo/q_t": 0.3980584740638733, "grad_norm": 96.99080657958984, "learning_rate": 2.381045210440644e-07, "logits/chosen": -0.9254723787307739, "logits/rejected": -0.9346519708633423, "logps/chosen": -450.68603515625, "logps/ref_chosen": -318.5623779296875, "logps/ref_rejected": -281.1880798339844, "logps/rejected": -478.7507629394531, "loss": 4.4724, "margin_dpo/margin_mean": 65.43903350830078, "margin_dpo/margin_std": 112.69806671142578, "step": 270 }, { "epoch": 0.5675392670157068, "fcm_dpo/beta": 0.0073170713149011135, "fcm_dpo/delta": -0.024393264204263687, "fcm_dpo/margin": 55.638763427734375, "fcm_dpo/q_t": 0.41326209902763367, "grad_norm": 92.29756164550781, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -0.7708844542503357, "logits/rejected": -0.7646578550338745, "logps/chosen": -415.815673828125, "logps/ref_chosen": -284.104736328125, "logps/ref_rejected": -253.9580535888672, "logps/rejected": -441.3077697753906, "loss": 4.5874, "margin_dpo/margin_mean": 55.638763427734375, "margin_dpo/margin_std": 99.02946472167969, "step": 271 }, { "epoch": 0.5696335078534032, "fcm_dpo/beta": 0.007198906969279051, "fcm_dpo/delta": -0.004485009238123894, "fcm_dpo/margin": 64.96874237060547, "fcm_dpo/q_t": 0.39990508556365967, "grad_norm": 78.74405670166016, "learning_rate": 2.344485449913914e-07, "logits/chosen": -0.8817507028579712, "logits/rejected": -0.8673537969589233, "logps/chosen": -423.4478759765625, "logps/ref_chosen": -297.3590087890625, "logps/ref_rejected": -279.20196533203125, "logps/rejected": -470.2595520019531, "loss": 4.5042, "margin_dpo/margin_mean": 64.96874237060547, "margin_dpo/margin_std": 112.7622299194336, "step": 272 }, { "epoch": 0.5717277486910994, "fcm_dpo/beta": 0.007184017449617386, "fcm_dpo/delta": 0.005371536128222942, "fcm_dpo/margin": 68.41497039794922, "fcm_dpo/q_t": 0.3951926827430725, "grad_norm": 102.56245422363281, "learning_rate": 2.3262175892620062e-07, "logits/chosen": -0.86268150806427, "logits/rejected": -0.8752706050872803, "logps/chosen": -420.91461181640625, "logps/ref_chosen": -293.20574951171875, "logps/ref_rejected": -274.7646789550781, "logps/rejected": -470.88848876953125, "loss": 4.4026, "margin_dpo/margin_mean": 68.41497039794922, "margin_dpo/margin_std": 110.33782958984375, "step": 273 }, { "epoch": 0.5738219895287958, "fcm_dpo/beta": 0.007152612321078777, "fcm_dpo/delta": -0.028561905026435852, "fcm_dpo/margin": 83.62451934814453, "fcm_dpo/q_t": 0.37009555101394653, "grad_norm": 108.42113494873047, "learning_rate": 2.3079590480333827e-07, "logits/chosen": -0.8165264129638672, "logits/rejected": -0.7849279046058655, "logps/chosen": -393.9966125488281, "logps/ref_chosen": -270.55865478515625, "logps/ref_rejected": -239.47048950195312, "logps/rejected": -446.5329284667969, "loss": 3.9903, "margin_dpo/margin_mean": 83.62451934814453, "margin_dpo/margin_std": 102.3349838256836, "step": 274 }, { "epoch": 0.5759162303664922, "fcm_dpo/beta": 0.006889094598591328, "fcm_dpo/delta": -0.037462376058101654, "fcm_dpo/margin": 80.98619842529297, "fcm_dpo/q_t": 0.379713773727417, "grad_norm": 81.02855682373047, "learning_rate": 2.2897108053782e-07, "logits/chosen": -0.864743709564209, "logits/rejected": -0.8470298647880554, "logps/chosen": -368.32879638671875, "logps/ref_chosen": -250.4369354248047, "logps/ref_rejected": -249.5605926513672, "logps/rejected": -448.4385986328125, "loss": 4.0944, "margin_dpo/margin_mean": 80.98619842529297, "margin_dpo/margin_std": 103.47311401367188, "step": 275 }, { "epoch": 0.5780104712041885, "fcm_dpo/beta": 0.006674672476947308, "fcm_dpo/delta": -0.04028826206922531, "fcm_dpo/margin": 74.76463317871094, "fcm_dpo/q_t": 0.39007264375686646, "grad_norm": 83.3016128540039, "learning_rate": 2.2714738398943308e-07, "logits/chosen": -0.9309563636779785, "logits/rejected": -0.9080483913421631, "logps/chosen": -423.72235107421875, "logps/ref_chosen": -297.8566589355469, "logps/ref_rejected": -295.5954895019531, "logps/rejected": -496.225830078125, "loss": 4.2971, "margin_dpo/margin_mean": 74.76463317871094, "margin_dpo/margin_std": 108.16291809082031, "step": 276 }, { "epoch": 0.5801047120418849, "fcm_dpo/beta": 0.0066186352632939816, "fcm_dpo/delta": -0.004491167608648539, "fcm_dpo/margin": 54.66672897338867, "fcm_dpo/q_t": 0.42031899094581604, "grad_norm": 104.00203704833984, "learning_rate": 2.2532491295748865e-07, "logits/chosen": -0.8646161556243896, "logits/rejected": -0.8646143078804016, "logps/chosen": -405.98822021484375, "logps/ref_chosen": -266.3604736328125, "logps/ref_rejected": -253.36767578125, "logps/rejected": -447.662109375, "loss": 4.7764, "margin_dpo/margin_mean": 54.66672897338867, "margin_dpo/margin_std": 113.69486999511719, "step": 277 }, { "epoch": 0.5821989528795811, "fcm_dpo/beta": 0.006715740542858839, "fcm_dpo/delta": 0.02268362231552601, "fcm_dpo/margin": 46.8961181640625, "fcm_dpo/q_t": 0.4369484484195709, "grad_norm": 140.3434295654297, "learning_rate": 2.2350376517557726e-07, "logits/chosen": -0.899573802947998, "logits/rejected": -0.8620951771736145, "logps/chosen": -416.90313720703125, "logps/ref_chosen": -267.40728759765625, "logps/ref_rejected": -229.5758514404297, "logps/rejected": -425.96783447265625, "loss": 5.0277, "margin_dpo/margin_mean": 46.8961181640625, "margin_dpo/margin_std": 120.51719665527344, "step": 278 }, { "epoch": 0.5842931937172775, "fcm_dpo/beta": 0.006686557084321976, "fcm_dpo/delta": -0.02701484225690365, "fcm_dpo/margin": 83.11595153808594, "fcm_dpo/q_t": 0.38234850764274597, "grad_norm": 93.99402618408203, "learning_rate": 2.2168403830632769e-07, "logits/chosen": -0.809998095035553, "logits/rejected": -0.7943370342254639, "logps/chosen": -445.050537109375, "logps/ref_chosen": -313.3677978515625, "logps/ref_rejected": -299.1744384765625, "logps/rejected": -513.97314453125, "loss": 4.1802, "margin_dpo/margin_mean": 83.11595153808594, "margin_dpo/margin_std": 115.72819519042969, "step": 279 }, { "epoch": 0.5863874345549738, "fcm_dpo/beta": 0.006766310427337885, "fcm_dpo/delta": 0.05007310211658478, "fcm_dpo/margin": 66.72462463378906, "fcm_dpo/q_t": 0.40197527408599854, "grad_norm": 77.47269439697266, "learning_rate": 2.1986582993616925e-07, "logits/chosen": -0.870246410369873, "logits/rejected": -0.8812520503997803, "logps/chosen": -391.3334045410156, "logps/ref_chosen": -265.5558166503906, "logps/ref_rejected": -247.1573944091797, "logps/rejected": -439.6595764160156, "loss": 4.4456, "margin_dpo/margin_mean": 66.72462463378906, "margin_dpo/margin_std": 109.50012969970703, "step": 280 }, { "epoch": 0.5884816753926702, "fcm_dpo/beta": 0.006822681520134211, "fcm_dpo/delta": -0.020259760320186615, "fcm_dpo/margin": 71.36731719970703, "fcm_dpo/q_t": 0.3979014456272125, "grad_norm": 104.89105224609375, "learning_rate": 2.1804923757009882e-07, "logits/chosen": -0.855800986289978, "logits/rejected": -0.8650408983230591, "logps/chosen": -444.5128479003906, "logps/ref_chosen": -295.2995910644531, "logps/ref_rejected": -293.80877685546875, "logps/rejected": -514.3893432617188, "loss": 4.403, "margin_dpo/margin_mean": 71.36732482910156, "margin_dpo/margin_std": 113.76239776611328, "step": 281 }, { "epoch": 0.5905759162303665, "fcm_dpo/beta": 0.006766719743609428, "fcm_dpo/delta": -0.0025003692135214806, "fcm_dpo/margin": 69.98826599121094, "fcm_dpo/q_t": 0.3973071575164795, "grad_norm": 71.5643539428711, "learning_rate": 2.1623435862645205e-07, "logits/chosen": -0.8500492572784424, "logits/rejected": -0.8468190431594849, "logps/chosen": -454.17938232421875, "logps/ref_chosen": -318.63714599609375, "logps/ref_rejected": -273.5943603515625, "logps/rejected": -479.1248779296875, "loss": 4.3813, "margin_dpo/margin_mean": 69.98826599121094, "margin_dpo/margin_std": 107.60038757324219, "step": 282 }, { "epoch": 0.5926701570680628, "fcm_dpo/beta": 0.006712625734508038, "fcm_dpo/delta": -0.015935653820633888, "fcm_dpo/margin": 65.22550201416016, "fcm_dpo/q_t": 0.40675878524780273, "grad_norm": 72.51644134521484, "learning_rate": 2.1442129043167873e-07, "logits/chosen": -0.8635268807411194, "logits/rejected": -0.8540130853652954, "logps/chosen": -392.47882080078125, "logps/ref_chosen": -255.11477661132812, "logps/ref_rejected": -236.97372436523438, "logps/rejected": -439.5632629394531, "loss": 4.4794, "margin_dpo/margin_mean": 65.22550201416016, "margin_dpo/margin_std": 107.16011810302734, "step": 283 }, { "epoch": 0.5947643979057592, "fcm_dpo/beta": 0.0064870212227106094, "fcm_dpo/delta": -0.039267394691705704, "fcm_dpo/margin": 74.81352996826172, "fcm_dpo/q_t": 0.3955889046192169, "grad_norm": 100.19154357910156, "learning_rate": 2.1261013021512378e-07, "logits/chosen": -0.8406773209571838, "logits/rejected": -0.8125337958335876, "logps/chosen": -415.146728515625, "logps/ref_chosen": -273.355224609375, "logps/ref_rejected": -259.84759521484375, "logps/rejected": -476.45263671875, "loss": 4.3948, "margin_dpo/margin_mean": 74.81352996826172, "margin_dpo/margin_std": 116.77448272705078, "step": 284 }, { "epoch": 0.5968586387434555, "fcm_dpo/beta": 0.006448796950280666, "fcm_dpo/delta": 0.005233362317085266, "fcm_dpo/margin": 48.80956268310547, "fcm_dpo/q_t": 0.4320613145828247, "grad_norm": 115.82734680175781, "learning_rate": 2.1080097510381294e-07, "logits/chosen": -0.8477023839950562, "logits/rejected": -0.8431457281112671, "logps/chosen": -455.9388732910156, "logps/ref_chosen": -309.8022155761719, "logps/ref_rejected": -279.11846923828125, "logps/rejected": -474.064697265625, "loss": 4.9064, "margin_dpo/margin_mean": 48.80955505371094, "margin_dpo/margin_std": 111.24368286132812, "step": 285 }, { "epoch": 0.5989528795811518, "fcm_dpo/beta": 0.006532514467835426, "fcm_dpo/delta": 0.0254356786608696, "fcm_dpo/margin": 65.45335388183594, "fcm_dpo/q_t": 0.4098204970359802, "grad_norm": 80.22543334960938, "learning_rate": 2.089939221172446e-07, "logits/chosen": -0.8206965923309326, "logits/rejected": -0.8110418915748596, "logps/chosen": -408.1368408203125, "logps/ref_chosen": -271.4655456542969, "logps/ref_rejected": -279.531494140625, "logps/rejected": -481.6561279296875, "loss": 4.5244, "margin_dpo/margin_mean": 65.45335388183594, "margin_dpo/margin_std": 113.59163665771484, "step": 286 }, { "epoch": 0.6010471204188481, "fcm_dpo/beta": 0.00667279027402401, "fcm_dpo/delta": 0.019707411527633667, "fcm_dpo/margin": 64.98822021484375, "fcm_dpo/q_t": 0.40575066208839417, "grad_norm": 79.20893096923828, "learning_rate": 2.0718906816218595e-07, "logits/chosen": -0.856654942035675, "logits/rejected": -0.841168224811554, "logps/chosen": -412.86700439453125, "logps/ref_chosen": -277.0932312011719, "logps/ref_rejected": -233.55599975585938, "logps/rejected": -434.3179931640625, "loss": 4.5597, "margin_dpo/margin_mean": 64.98822021484375, "margin_dpo/margin_std": 114.76084899902344, "step": 287 }, { "epoch": 0.6031413612565445, "fcm_dpo/beta": 0.006710154935717583, "fcm_dpo/delta": -0.014013386331498623, "fcm_dpo/margin": 61.08485412597656, "fcm_dpo/q_t": 0.4106292426586151, "grad_norm": 92.16407775878906, "learning_rate": 2.053865100274774e-07, "logits/chosen": -0.85230553150177, "logits/rejected": -0.8594391942024231, "logps/chosen": -425.701171875, "logps/ref_chosen": -293.1681823730469, "logps/ref_rejected": -263.4059143066406, "logps/rejected": -457.0237731933594, "loss": 4.584, "margin_dpo/margin_mean": 61.08485794067383, "margin_dpo/margin_std": 109.07479858398438, "step": 288 }, { "epoch": 0.6052356020942409, "fcm_dpo/beta": 0.006692454218864441, "fcm_dpo/delta": 0.01522915530949831, "fcm_dpo/margin": 40.210289001464844, "fcm_dpo/q_t": 0.4406478703022003, "grad_norm": 97.82292938232422, "learning_rate": 2.035863443788411e-07, "logits/chosen": -0.849497377872467, "logits/rejected": -0.8317880034446716, "logps/chosen": -478.8475341796875, "logps/ref_chosen": -329.9574279785156, "logps/ref_rejected": -276.7565002441406, "logps/rejected": -465.85687255859375, "loss": 5.023, "margin_dpo/margin_mean": 40.210289001464844, "margin_dpo/margin_std": 104.00145721435547, "step": 289 }, { "epoch": 0.6073298429319371, "fcm_dpo/beta": 0.0065245069563388824, "fcm_dpo/delta": -0.04471207410097122, "fcm_dpo/margin": 57.04399108886719, "fcm_dpo/q_t": 0.4190434217453003, "grad_norm": 83.78768157958984, "learning_rate": 2.0178866775369774e-07, "logits/chosen": -0.8456165194511414, "logits/rejected": -0.7818999886512756, "logps/chosen": -460.7046203613281, "logps/ref_chosen": -324.6690673828125, "logps/ref_rejected": -311.8439636230469, "logps/rejected": -504.9234924316406, "loss": 4.7357, "margin_dpo/margin_mean": 57.043983459472656, "margin_dpo/margin_std": 112.49860382080078, "step": 290 }, { "epoch": 0.6094240837696335, "fcm_dpo/beta": 0.006406103260815144, "fcm_dpo/delta": -0.016831081360578537, "fcm_dpo/margin": 75.2476806640625, "fcm_dpo/q_t": 0.3942800760269165, "grad_norm": 80.28817749023438, "learning_rate": 1.9999357655598891e-07, "logits/chosen": -0.831132173538208, "logits/rejected": -0.8223029971122742, "logps/chosen": -400.536376953125, "logps/ref_chosen": -275.1535949707031, "logps/ref_rejected": -278.1832580566406, "logps/rejected": -478.81365966796875, "loss": 4.2825, "margin_dpo/margin_mean": 75.24768829345703, "margin_dpo/margin_std": 103.58267211914062, "step": 291 }, { "epoch": 0.6115183246073298, "fcm_dpo/beta": 0.006372286006808281, "fcm_dpo/delta": -0.007237437646836042, "fcm_dpo/margin": 56.694847106933594, "fcm_dpo/q_t": 0.4195069968700409, "grad_norm": 73.26972198486328, "learning_rate": 1.9820116705100775e-07, "logits/chosen": -0.8270624876022339, "logits/rejected": -0.8235753774642944, "logps/chosen": -377.44439697265625, "logps/ref_chosen": -259.3636779785156, "logps/ref_rejected": -279.30218505859375, "logps/rejected": -454.0777282714844, "loss": 4.6624, "margin_dpo/margin_mean": 56.694847106933594, "margin_dpo/margin_std": 104.66506958007812, "step": 292 }, { "epoch": 0.6136125654450262, "fcm_dpo/beta": 0.0063700140453875065, "fcm_dpo/delta": 0.003185997251421213, "fcm_dpo/margin": 70.93012237548828, "fcm_dpo/q_t": 0.40013647079467773, "grad_norm": 87.3719711303711, "learning_rate": 1.9641153536023642e-07, "logits/chosen": -0.9205597639083862, "logits/rejected": -0.8796571493148804, "logps/chosen": -435.2584228515625, "logps/ref_chosen": -303.77081298828125, "logps/ref_rejected": -270.07513427734375, "logps/rejected": -472.4928283691406, "loss": 4.3374, "margin_dpo/margin_mean": 70.93012237548828, "margin_dpo/margin_std": 98.5417251586914, "step": 293 }, { "epoch": 0.6157068062827226, "fcm_dpo/beta": 0.00646553561091423, "fcm_dpo/delta": 0.018955595791339874, "fcm_dpo/margin": 65.13241577148438, "fcm_dpo/q_t": 0.4103822112083435, "grad_norm": 82.27020263671875, "learning_rate": 1.9462477745619106e-07, "logits/chosen": -0.8257191181182861, "logits/rejected": -0.8322641849517822, "logps/chosen": -361.6537170410156, "logps/ref_chosen": -240.23831176757812, "logps/ref_rejected": -229.187744140625, "logps/rejected": -415.735595703125, "loss": 4.5112, "margin_dpo/margin_mean": 65.1324234008789, "margin_dpo/margin_std": 109.38378143310547, "step": 294 }, { "epoch": 0.6178010471204188, "fcm_dpo/beta": 0.006473129615187645, "fcm_dpo/delta": 0.013521851040422916, "fcm_dpo/margin": 63.43254852294922, "fcm_dpo/q_t": 0.40921661257743835, "grad_norm": 76.50547790527344, "learning_rate": 1.928409891572757e-07, "logits/chosen": -0.8179617524147034, "logits/rejected": -0.8252490758895874, "logps/chosen": -374.97149658203125, "logps/ref_chosen": -251.00970458984375, "logps/ref_rejected": -244.15142822265625, "logps/rejected": -431.5457763671875, "loss": 4.4736, "margin_dpo/margin_mean": 63.432552337646484, "margin_dpo/margin_std": 99.88431549072266, "step": 295 }, { "epoch": 0.6198952879581152, "fcm_dpo/beta": 0.006484742276370525, "fcm_dpo/delta": -0.02761462889611721, "fcm_dpo/margin": 84.66267395019531, "fcm_dpo/q_t": 0.3797982931137085, "grad_norm": 83.58622741699219, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -0.8179198503494263, "logits/rejected": -0.7856135964393616, "logps/chosen": -420.0888977050781, "logps/ref_chosen": -293.880615234375, "logps/ref_rejected": -283.4175720214844, "logps/rejected": -494.28857421875, "loss": 4.1604, "margin_dpo/margin_mean": 84.66267395019531, "margin_dpo/margin_std": 113.16719818115234, "step": 296 }, { "epoch": 0.6219895287958115, "fcm_dpo/beta": 0.0063199070282280445, "fcm_dpo/delta": -0.019577497616410255, "fcm_dpo/margin": 59.113037109375, "fcm_dpo/q_t": 0.41643232107162476, "grad_norm": 80.03433227539062, "learning_rate": 1.8928270384706582e-07, "logits/chosen": -0.9052919745445251, "logits/rejected": -0.8989733457565308, "logps/chosen": -414.87054443359375, "logps/ref_chosen": -289.4600830078125, "logps/ref_rejected": -283.69110107421875, "logps/rejected": -468.21453857421875, "loss": 4.5989, "margin_dpo/margin_mean": 59.113037109375, "margin_dpo/margin_std": 100.17891693115234, "step": 297 }, { "epoch": 0.6240837696335079, "fcm_dpo/beta": 0.006384821608662605, "fcm_dpo/delta": 0.010201474651694298, "fcm_dpo/margin": 74.46007537841797, "fcm_dpo/q_t": 0.3973884880542755, "grad_norm": 99.93316650390625, "learning_rate": 1.875083976558136e-07, "logits/chosen": -0.8152309060096741, "logits/rejected": -0.8023860454559326, "logps/chosen": -425.2400207519531, "logps/ref_chosen": -306.5150146484375, "logps/ref_rejected": -280.6969909667969, "logps/rejected": -473.88201904296875, "loss": 4.4122, "margin_dpo/margin_mean": 74.46007537841797, "margin_dpo/margin_std": 115.8766098022461, "step": 298 }, { "epoch": 0.6261780104712041, "fcm_dpo/beta": 0.006350814364850521, "fcm_dpo/delta": -0.002623580861836672, "fcm_dpo/margin": 60.436031341552734, "fcm_dpo/q_t": 0.41395753622055054, "grad_norm": 88.62031555175781, "learning_rate": 1.8573744269954297e-07, "logits/chosen": -0.8021990060806274, "logits/rejected": -0.7875962257385254, "logps/chosen": -420.3141174316406, "logps/ref_chosen": -281.36376953125, "logps/ref_rejected": -270.39508056640625, "logps/rejected": -469.7814636230469, "loss": 4.5633, "margin_dpo/margin_mean": 60.43603515625, "margin_dpo/margin_std": 96.89127349853516, "step": 299 }, { "epoch": 0.6282722513089005, "fcm_dpo/beta": 0.006370040588080883, "fcm_dpo/delta": 0.02115996927022934, "fcm_dpo/margin": 56.68406677246094, "fcm_dpo/q_t": 0.42014020681381226, "grad_norm": 143.75216674804688, "learning_rate": 1.839699339491937e-07, "logits/chosen": -0.8434377908706665, "logits/rejected": -0.8197212219238281, "logps/chosen": -457.9049377441406, "logps/ref_chosen": -314.923095703125, "logps/ref_rejected": -269.2027893066406, "logps/rejected": -468.86871337890625, "loss": 4.731, "margin_dpo/margin_mean": 56.6840705871582, "margin_dpo/margin_std": 112.98627471923828, "step": 300 }, { "epoch": 0.6303664921465969, "fcm_dpo/beta": 0.006618153303861618, "fcm_dpo/delta": 0.040116339921951294, "fcm_dpo/margin": 62.580421447753906, "fcm_dpo/q_t": 0.4097692668437958, "grad_norm": 93.548828125, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -0.857657253742218, "logits/rejected": -0.857261061668396, "logps/chosen": -413.1051330566406, "logps/ref_chosen": -279.89453125, "logps/ref_rejected": -271.6694641113281, "logps/rejected": -467.4604797363281, "loss": 4.5268, "margin_dpo/margin_mean": 62.58042526245117, "margin_dpo/margin_std": 104.89457702636719, "step": 301 }, { "epoch": 0.6324607329842932, "fcm_dpo/beta": 0.006750874686986208, "fcm_dpo/delta": 0.022394752129912376, "fcm_dpo/margin": 75.39173889160156, "fcm_dpo/q_t": 0.39047718048095703, "grad_norm": 96.45508575439453, "learning_rate": 1.8044563402088682e-07, "logits/chosen": -0.8146281838417053, "logits/rejected": -0.8003526926040649, "logps/chosen": -402.07379150390625, "logps/ref_chosen": -271.3318176269531, "logps/ref_rejected": -256.5587158203125, "logps/rejected": -462.6923828125, "loss": 4.2878, "margin_dpo/margin_mean": 75.39173889160156, "margin_dpo/margin_std": 110.79779052734375, "step": 302 }, { "epoch": 0.6345549738219896, "fcm_dpo/beta": 0.0067290207371115685, "fcm_dpo/delta": -0.012645013630390167, "fcm_dpo/margin": 69.51010131835938, "fcm_dpo/q_t": 0.3968094289302826, "grad_norm": 89.24269104003906, "learning_rate": 1.7868903184043885e-07, "logits/chosen": -0.8077495098114014, "logits/rejected": -0.787011444568634, "logps/chosen": -441.51629638671875, "logps/ref_chosen": -304.88104248046875, "logps/ref_rejected": -269.063720703125, "logps/rejected": -475.2090759277344, "loss": 4.4047, "margin_dpo/margin_mean": 69.51010131835938, "margin_dpo/margin_std": 107.004638671875, "step": 303 }, { "epoch": 0.6366492146596858, "fcm_dpo/beta": 0.006841976661235094, "fcm_dpo/delta": 0.033334143459796906, "fcm_dpo/margin": 76.11368560791016, "fcm_dpo/q_t": 0.3914816379547119, "grad_norm": 96.88390350341797, "learning_rate": 1.7693625385079574e-07, "logits/chosen": -0.8111523389816284, "logits/rejected": -0.8187903165817261, "logps/chosen": -438.7404479980469, "logps/ref_chosen": -290.7109680175781, "logps/ref_rejected": -237.6885986328125, "logps/rejected": -461.8317565917969, "loss": 4.2888, "margin_dpo/margin_mean": 76.11369323730469, "margin_dpo/margin_std": 116.99392700195312, "step": 304 }, { "epoch": 0.6387434554973822, "fcm_dpo/beta": 0.006396348122507334, "fcm_dpo/delta": -0.1703343540430069, "fcm_dpo/margin": 107.9814224243164, "fcm_dpo/q_t": 0.3516058921813965, "grad_norm": 75.3594741821289, "learning_rate": 1.7518739404812155e-07, "logits/chosen": -0.8580024242401123, "logits/rejected": -0.8278228640556335, "logps/chosen": -383.34393310546875, "logps/ref_chosen": -256.4839782714844, "logps/ref_rejected": -266.4063415527344, "logps/rejected": -501.24761962890625, "loss": 3.8056, "margin_dpo/margin_mean": 107.9814224243164, "margin_dpo/margin_std": 118.7427749633789, "step": 305 }, { "epoch": 0.6408376963350786, "fcm_dpo/beta": 0.006036079488694668, "fcm_dpo/delta": -0.0010613007470965385, "fcm_dpo/margin": 65.81829833984375, "fcm_dpo/q_t": 0.4118325114250183, "grad_norm": 83.1937484741211, "learning_rate": 1.7344254621846017e-07, "logits/chosen": -0.8624471426010132, "logits/rejected": -0.8450292944908142, "logps/chosen": -458.81719970703125, "logps/ref_chosen": -320.6492004394531, "logps/ref_rejected": -273.36773681640625, "logps/rejected": -477.35400390625, "loss": 4.5335, "margin_dpo/margin_mean": 65.81829833984375, "margin_dpo/margin_std": 105.20728302001953, "step": 306 }, { "epoch": 0.6429319371727749, "fcm_dpo/beta": 0.006111084017902613, "fcm_dpo/delta": 0.03404655680060387, "fcm_dpo/margin": 74.80149841308594, "fcm_dpo/q_t": 0.39704886078834534, "grad_norm": 86.36273956298828, "learning_rate": 1.717018039327053e-07, "logits/chosen": -0.794308602809906, "logits/rejected": -0.8210631608963013, "logps/chosen": -440.82342529296875, "logps/ref_chosen": -279.4541931152344, "logps/ref_rejected": -240.3796844482422, "logps/rejected": -476.5504455566406, "loss": 4.322, "margin_dpo/margin_mean": 74.80149841308594, "margin_dpo/margin_std": 101.4282455444336, "step": 307 }, { "epoch": 0.6450261780104712, "fcm_dpo/beta": 0.006177757866680622, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 47.84044647216797, "fcm_dpo/q_t": 0.4335402250289917, "grad_norm": 89.86914825439453, "learning_rate": 1.699652605415828e-07, "logits/chosen": -0.8320671319961548, "logits/rejected": -0.8438513278961182, "logps/chosen": -472.46478271484375, "logps/ref_chosen": -297.068359375, "logps/ref_rejected": -258.83856201171875, "logps/rejected": -482.0754089355469, "loss": 4.8917, "margin_dpo/margin_mean": 47.84044647216797, "margin_dpo/margin_std": 108.76823425292969, "step": 308 }, { "epoch": 0.6471204188481675, "fcm_dpo/beta": 0.006233610212802887, "fcm_dpo/delta": 0.030657585710287094, "fcm_dpo/margin": 79.33876037597656, "fcm_dpo/q_t": 0.3936896026134491, "grad_norm": 99.26750946044922, "learning_rate": 1.6823300917064458e-07, "logits/chosen": -0.8398002982139587, "logits/rejected": -0.8489018678665161, "logps/chosen": -445.3302001953125, "logps/ref_chosen": -281.3881530761719, "logps/ref_rejected": -262.458740234375, "logps/rejected": -505.739501953125, "loss": 4.2964, "margin_dpo/margin_mean": 79.33875274658203, "margin_dpo/margin_std": 117.96406555175781, "step": 309 }, { "epoch": 0.6492146596858639, "fcm_dpo/beta": 0.0062689767219126225, "fcm_dpo/delta": -0.023925358429551125, "fcm_dpo/margin": 78.43304443359375, "fcm_dpo/q_t": 0.39273297786712646, "grad_norm": 157.27012634277344, "learning_rate": 1.6650514271527465e-07, "logits/chosen": -0.8444918394088745, "logits/rejected": -0.8194589614868164, "logps/chosen": -438.931884765625, "logps/ref_chosen": -279.1872863769531, "logps/ref_rejected": -261.8279724121094, "logps/rejected": -500.0056457519531, "loss": 4.2968, "margin_dpo/margin_mean": 78.43304443359375, "margin_dpo/margin_std": 111.59870910644531, "step": 310 }, { "epoch": 0.6513089005235602, "fcm_dpo/beta": 0.006130394991487265, "fcm_dpo/delta": -0.007703306153416634, "fcm_dpo/margin": 75.38703918457031, "fcm_dpo/q_t": 0.39765244722366333, "grad_norm": 146.17376708984375, "learning_rate": 1.647817538357072e-07, "logits/chosen": -0.8305755853652954, "logits/rejected": -0.8122124075889587, "logps/chosen": -437.87200927734375, "logps/ref_chosen": -271.39813232421875, "logps/ref_rejected": -266.12701416015625, "logps/rejected": -507.98797607421875, "loss": 4.4273, "margin_dpo/margin_mean": 75.38703155517578, "margin_dpo/margin_std": 117.2852783203125, "step": 311 }, { "epoch": 0.6534031413612565, "fcm_dpo/beta": 0.00600817333906889, "fcm_dpo/delta": -0.0545642226934433, "fcm_dpo/margin": 68.5428466796875, "fcm_dpo/q_t": 0.4122428297996521, "grad_norm": 104.90550231933594, "learning_rate": 1.6306293495205755e-07, "logits/chosen": -0.8464758396148682, "logits/rejected": -0.8214608430862427, "logps/chosen": -447.5267639160156, "logps/ref_chosen": -282.3850402832031, "logps/ref_rejected": -246.35389709472656, "logps/rejected": -480.0384826660156, "loss": 4.7234, "margin_dpo/margin_mean": 68.5428466796875, "margin_dpo/margin_std": 135.85427856445312, "step": 312 }, { "epoch": 0.6554973821989529, "fcm_dpo/beta": 0.005932308733463287, "fcm_dpo/delta": -0.02101931907236576, "fcm_dpo/margin": 71.42918395996094, "fcm_dpo/q_t": 0.41074472665786743, "grad_norm": 75.15242004394531, "learning_rate": 1.6134877823936607e-07, "logits/chosen": -0.8891011476516724, "logits/rejected": -0.8756020069122314, "logps/chosen": -469.3253479003906, "logps/ref_chosen": -303.630859375, "logps/ref_rejected": -273.1156921386719, "logps/rejected": -510.2393798828125, "loss": 4.5956, "margin_dpo/margin_mean": 71.42918395996094, "margin_dpo/margin_std": 126.27899169921875, "step": 313 }, { "epoch": 0.6575916230366492, "fcm_dpo/beta": 0.005879377480596304, "fcm_dpo/delta": 0.035980284214019775, "fcm_dpo/margin": 71.69800567626953, "fcm_dpo/q_t": 0.40531933307647705, "grad_norm": 102.78753662109375, "learning_rate": 1.5963937562265522e-07, "logits/chosen": -0.9064289331436157, "logits/rejected": -0.8947293758392334, "logps/chosen": -461.6485595703125, "logps/ref_chosen": -302.3042907714844, "logps/ref_rejected": -273.6416015625, "logps/rejected": -504.6839294433594, "loss": 4.4426, "margin_dpo/margin_mean": 71.69800567626953, "margin_dpo/margin_std": 109.42501831054688, "step": 314 }, { "epoch": 0.6596858638743456, "fcm_dpo/beta": 0.005956702399998903, "fcm_dpo/delta": -0.0020787278190255165, "fcm_dpo/margin": 85.90567016601562, "fcm_dpo/q_t": 0.3879070580005646, "grad_norm": 84.1939697265625, "learning_rate": 1.5793481877199943e-07, "logits/chosen": -0.8775259256362915, "logits/rejected": -0.860569953918457, "logps/chosen": -459.0497741699219, "logps/ref_chosen": -302.729248046875, "logps/ref_rejected": -270.26910400390625, "logps/rejected": -512.4953002929688, "loss": 4.1999, "margin_dpo/margin_mean": 85.90567016601562, "margin_dpo/margin_std": 114.59932708740234, "step": 315 }, { "epoch": 0.6617801047120419, "fcm_dpo/beta": 0.00605932530015707, "fcm_dpo/delta": 0.009984731674194336, "fcm_dpo/margin": 77.9808349609375, "fcm_dpo/q_t": 0.39844048023223877, "grad_norm": 106.31928253173828, "learning_rate": 1.562351990976095e-07, "logits/chosen": -0.8906347155570984, "logits/rejected": -0.8755995035171509, "logps/chosen": -471.20452880859375, "logps/ref_chosen": -310.5706481933594, "logps/ref_rejected": -272.9354553222656, "logps/rejected": -511.5502014160156, "loss": 4.3628, "margin_dpo/margin_mean": 77.9808349609375, "margin_dpo/margin_std": 119.30810546875, "step": 316 }, { "epoch": 0.6638743455497382, "fcm_dpo/beta": 0.006037175189703703, "fcm_dpo/delta": 0.010911202058196068, "fcm_dpo/margin": 74.88856506347656, "fcm_dpo/q_t": 0.39838525652885437, "grad_norm": 84.3582992553711, "learning_rate": 1.5454060774493065e-07, "logits/chosen": -0.8910847902297974, "logits/rejected": -0.860565185546875, "logps/chosen": -394.9084777832031, "logps/ref_chosen": -253.90036010742188, "logps/ref_rejected": -218.74078369140625, "logps/rejected": -434.6374206542969, "loss": 4.3347, "margin_dpo/margin_mean": 74.88856506347656, "margin_dpo/margin_std": 103.74834442138672, "step": 317 }, { "epoch": 0.6659685863874345, "fcm_dpo/beta": 0.0059028794057667255, "fcm_dpo/delta": -0.04113336279988289, "fcm_dpo/margin": 88.18386840820312, "fcm_dpo/q_t": 0.3841785490512848, "grad_norm": 70.52163696289062, "learning_rate": 1.5285113558975427e-07, "logits/chosen": -0.9013999700546265, "logits/rejected": -0.8701552152633667, "logps/chosen": -417.7796936035156, "logps/ref_chosen": -270.8228759765625, "logps/ref_rejected": -255.30972290039062, "logps/rejected": -490.450439453125, "loss": 4.153, "margin_dpo/margin_mean": 88.18386840820312, "margin_dpo/margin_std": 111.3894271850586, "step": 318 }, { "epoch": 0.6680628272251309, "fcm_dpo/beta": 0.006000366993248463, "fcm_dpo/delta": 0.0448153130710125, "fcm_dpo/margin": 82.24055480957031, "fcm_dpo/q_t": 0.38862344622612, "grad_norm": 94.62660217285156, "learning_rate": 1.5116687323334464e-07, "logits/chosen": -0.8874606490135193, "logits/rejected": -0.857952356338501, "logps/chosen": -455.48486328125, "logps/ref_chosen": -301.0028076171875, "logps/ref_rejected": -242.39002990722656, "logps/rejected": -479.1126708984375, "loss": 4.1591, "margin_dpo/margin_mean": 82.24055480957031, "margin_dpo/margin_std": 99.47880554199219, "step": 319 }, { "epoch": 0.6701570680628273, "fcm_dpo/beta": 0.00606071762740612, "fcm_dpo/delta": -0.0073157744482159615, "fcm_dpo/margin": 77.76814270019531, "fcm_dpo/q_t": 0.398094117641449, "grad_norm": 91.96961212158203, "learning_rate": 1.4948791099758052e-07, "logits/chosen": -0.8550815582275391, "logits/rejected": -0.8503654599189758, "logps/chosen": -452.3647155761719, "logps/ref_chosen": -303.6225891113281, "logps/ref_rejected": -280.85174560546875, "logps/rejected": -507.362060546875, "loss": 4.4294, "margin_dpo/margin_mean": 77.76814270019531, "margin_dpo/margin_std": 126.88796997070312, "step": 320 }, { "epoch": 0.6722513089005235, "fcm_dpo/beta": 0.0060967146418988705, "fcm_dpo/delta": 0.02567823976278305, "fcm_dpo/margin": 54.03432083129883, "fcm_dpo/q_t": 0.4288126230239868, "grad_norm": 95.56733703613281, "learning_rate": 1.478143389201113e-07, "logits/chosen": -0.8824321031570435, "logits/rejected": -0.847492516040802, "logps/chosen": -452.32916259765625, "logps/ref_chosen": -288.98583984375, "logps/ref_rejected": -241.1822052001953, "logps/rejected": -458.5599060058594, "loss": 4.8267, "margin_dpo/margin_mean": 54.03431701660156, "margin_dpo/margin_std": 116.12944030761719, "step": 321 }, { "epoch": 0.6743455497382199, "fcm_dpo/beta": 0.0063505470752716064, "fcm_dpo/delta": 0.060941774398088455, "fcm_dpo/margin": 70.42938995361328, "fcm_dpo/q_t": 0.4040513336658478, "grad_norm": 84.9507064819336, "learning_rate": 1.461462467495284e-07, "logits/chosen": -0.935436487197876, "logits/rejected": -0.8963932394981384, "logps/chosen": -473.46697998046875, "logps/ref_chosen": -308.54345703125, "logps/ref_rejected": -269.7995910644531, "logps/rejected": -505.15252685546875, "loss": 4.4664, "margin_dpo/margin_mean": 70.42938995361328, "margin_dpo/margin_std": 118.01498413085938, "step": 322 }, { "epoch": 0.6764397905759162, "fcm_dpo/beta": 0.006490709725767374, "fcm_dpo/delta": 0.0008104295702651143, "fcm_dpo/margin": 55.20634841918945, "fcm_dpo/q_t": 0.423994779586792, "grad_norm": 96.36703491210938, "learning_rate": 1.4448372394055246e-07, "logits/chosen": -0.8880312442779541, "logits/rejected": -0.8804765343666077, "logps/chosen": -436.9565124511719, "logps/ref_chosen": -279.49371337890625, "logps/ref_rejected": -228.15521240234375, "logps/rejected": -440.8243713378906, "loss": 4.8032, "margin_dpo/margin_mean": 55.20635223388672, "margin_dpo/margin_std": 118.82220458984375, "step": 323 }, { "epoch": 0.6785340314136126, "fcm_dpo/beta": 0.006320840213447809, "fcm_dpo/delta": -0.018460873514413834, "fcm_dpo/margin": 86.409912109375, "fcm_dpo/q_t": 0.37887102365493774, "grad_norm": 97.08597564697266, "learning_rate": 1.428268596492364e-07, "logits/chosen": -0.8197450041770935, "logits/rejected": -0.8136438727378845, "logps/chosen": -376.65020751953125, "logps/ref_chosen": -239.33836364746094, "logps/ref_rejected": -230.53775024414062, "logps/rejected": -454.25946044921875, "loss": 4.0627, "margin_dpo/margin_mean": 86.409912109375, "margin_dpo/margin_std": 101.99807739257812, "step": 324 }, { "epoch": 0.680628272251309, "fcm_dpo/beta": 0.006354123819619417, "fcm_dpo/delta": -0.0036203861236572266, "fcm_dpo/margin": 73.38639068603516, "fcm_dpo/q_t": 0.4013862907886505, "grad_norm": 98.75489807128906, "learning_rate": 1.4117574272818386e-07, "logits/chosen": -0.8287428617477417, "logits/rejected": -0.812412679195404, "logps/chosen": -435.7538757324219, "logps/ref_chosen": -280.62896728515625, "logps/ref_rejected": -270.5085754394531, "logps/rejected": -499.0198974609375, "loss": 4.5095, "margin_dpo/margin_mean": 73.38638305664062, "margin_dpo/margin_std": 126.34809112548828, "step": 325 }, { "epoch": 0.6827225130890052, "fcm_dpo/beta": 0.006422973703593016, "fcm_dpo/delta": 0.002800529822707176, "fcm_dpo/margin": 72.96824645996094, "fcm_dpo/q_t": 0.39816251397132874, "grad_norm": 101.16968536376953, "learning_rate": 1.3953046172178413e-07, "logits/chosen": -0.9406434297561646, "logits/rejected": -0.9313357472419739, "logps/chosen": -385.07867431640625, "logps/ref_chosen": -240.9871368408203, "logps/ref_rejected": -261.0238342285156, "logps/rejected": -478.0836181640625, "loss": 4.4055, "margin_dpo/margin_mean": 72.96824645996094, "margin_dpo/margin_std": 114.49446105957031, "step": 326 }, { "epoch": 0.6848167539267016, "fcm_dpo/beta": 0.006266596727073193, "fcm_dpo/delta": -0.023223016411066055, "fcm_dpo/margin": 80.68010711669922, "fcm_dpo/q_t": 0.38984158635139465, "grad_norm": 122.5355224609375, "learning_rate": 1.3789110486146468e-07, "logits/chosen": -0.8904516696929932, "logits/rejected": -0.8670768737792969, "logps/chosen": -414.452880859375, "logps/ref_chosen": -279.6148986816406, "logps/ref_rejected": -269.76934814453125, "logps/rejected": -485.2874755859375, "loss": 4.2333, "margin_dpo/margin_mean": 80.68010711669922, "margin_dpo/margin_std": 110.3857421875, "step": 327 }, { "epoch": 0.6869109947643979, "fcm_dpo/beta": 0.006192709319293499, "fcm_dpo/delta": -0.014546114951372147, "fcm_dpo/margin": 73.90266418457031, "fcm_dpo/q_t": 0.39877283573150635, "grad_norm": 123.79401397705078, "learning_rate": 1.362577600609588e-07, "logits/chosen": -0.8520888090133667, "logits/rejected": -0.852110743522644, "logps/chosen": -450.4490051269531, "logps/ref_chosen": -301.033447265625, "logps/ref_rejected": -284.2101135253906, "logps/rejected": -507.5283203125, "loss": 4.3332, "margin_dpo/margin_mean": 73.90266418457031, "margin_dpo/margin_std": 103.64661407470703, "step": 328 }, { "epoch": 0.6890052356020943, "fcm_dpo/beta": 0.0062096007168293, "fcm_dpo/delta": 0.012995388358831406, "fcm_dpo/margin": 81.40901184082031, "fcm_dpo/q_t": 0.39801162481307983, "grad_norm": 98.26327514648438, "learning_rate": 1.3463051491159093e-07, "logits/chosen": -0.8586837649345398, "logits/rejected": -0.8311326503753662, "logps/chosen": -477.521484375, "logps/ref_chosen": -319.9888610839844, "logps/ref_rejected": -307.5588684082031, "logps/rejected": -546.50048828125, "loss": 4.4147, "margin_dpo/margin_mean": 81.40901184082031, "margin_dpo/margin_std": 134.6064453125, "step": 329 }, { "epoch": 0.6910994764397905, "fcm_dpo/beta": 0.006210251711308956, "fcm_dpo/delta": 0.0007845014333724976, "fcm_dpo/margin": 74.684326171875, "fcm_dpo/q_t": 0.3985758423805237, "grad_norm": 118.70218658447266, "learning_rate": 1.3300945667758012e-07, "logits/chosen": -0.8644099831581116, "logits/rejected": -0.8749092817306519, "logps/chosen": -453.7188720703125, "logps/ref_chosen": -301.11474609375, "logps/ref_rejected": -299.673095703125, "logps/rejected": -526.9615478515625, "loss": 4.3426, "margin_dpo/margin_mean": 74.684326171875, "margin_dpo/margin_std": 108.2922592163086, "step": 330 }, { "epoch": 0.6931937172774869, "fcm_dpo/beta": 0.0062918756157159805, "fcm_dpo/delta": 0.02173735201358795, "fcm_dpo/margin": 72.32933044433594, "fcm_dpo/q_t": 0.4040902853012085, "grad_norm": 102.2640151977539, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -0.8764195442199707, "logits/rejected": -0.8572993278503418, "logps/chosen": -423.5179443359375, "logps/ref_chosen": -277.59149169921875, "logps/ref_rejected": -256.025634765625, "logps/rejected": -474.2813720703125, "loss": 4.5118, "margin_dpo/margin_mean": 72.32933807373047, "margin_dpo/margin_std": 127.8876953125, "step": 331 }, { "epoch": 0.6952879581151833, "fcm_dpo/beta": 0.006289025768637657, "fcm_dpo/delta": -0.021090295165777206, "fcm_dpo/margin": 77.75308227539062, "fcm_dpo/q_t": 0.3964523673057556, "grad_norm": 79.96570587158203, "learning_rate": 1.2978624834891626e-07, "logits/chosen": -0.877373993396759, "logits/rejected": -0.8500229716300964, "logps/chosen": -421.5205993652344, "logps/ref_chosen": -269.97369384765625, "logps/ref_rejected": -235.03164672851562, "logps/rejected": -464.3316345214844, "loss": 4.4297, "margin_dpo/margin_mean": 77.75308990478516, "margin_dpo/margin_std": 127.36727142333984, "step": 332 }, { "epoch": 0.6973821989528796, "fcm_dpo/beta": 0.006327156908810139, "fcm_dpo/delta": 0.02856810763478279, "fcm_dpo/margin": 65.4626693725586, "fcm_dpo/q_t": 0.41039058566093445, "grad_norm": 89.18975830078125, "learning_rate": 1.281842711051438e-07, "logits/chosen": -0.950103223323822, "logits/rejected": -0.9125382304191589, "logps/chosen": -451.1890869140625, "logps/ref_chosen": -296.76300048828125, "logps/ref_rejected": -265.97991943359375, "logps/rejected": -485.86871337890625, "loss": 4.5003, "margin_dpo/margin_mean": 65.46266174316406, "margin_dpo/margin_std": 108.16644287109375, "step": 333 }, { "epoch": 0.6994764397905759, "fcm_dpo/beta": 0.006423701532185078, "fcm_dpo/delta": -0.02974826470017433, "fcm_dpo/margin": 74.7381362915039, "fcm_dpo/q_t": 0.3955420255661011, "grad_norm": 111.00814819335938, "learning_rate": 1.2658882646922033e-07, "logits/chosen": -0.8517099618911743, "logits/rejected": -0.824093222618103, "logps/chosen": -447.6474609375, "logps/ref_chosen": -301.0367431640625, "logps/ref_rejected": -268.87652587890625, "logps/rejected": -490.225341796875, "loss": 4.3765, "margin_dpo/margin_mean": 74.7381362915039, "margin_dpo/margin_std": 112.17715454101562, "step": 334 }, { "epoch": 0.7015706806282722, "fcm_dpo/beta": 0.00610282551497221, "fcm_dpo/delta": -0.03819268196821213, "fcm_dpo/margin": 75.7378158569336, "fcm_dpo/q_t": 0.3999744653701782, "grad_norm": 98.96734619140625, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -0.8547537922859192, "logits/rejected": -0.8491874933242798, "logps/chosen": -438.50799560546875, "logps/ref_chosen": -276.13275146484375, "logps/ref_rejected": -243.44203186035156, "logps/rejected": -481.5550231933594, "loss": 4.4533, "margin_dpo/margin_mean": 75.7378158569336, "margin_dpo/margin_std": 121.23136138916016, "step": 335 }, { "epoch": 0.7036649214659686, "fcm_dpo/beta": 0.006105098873376846, "fcm_dpo/delta": 0.011495206505060196, "fcm_dpo/margin": 71.84475708007812, "fcm_dpo/q_t": 0.41034749150276184, "grad_norm": 88.5465316772461, "learning_rate": 1.2341787690142435e-07, "logits/chosen": -0.8739686012268066, "logits/rejected": -0.8107472658157349, "logps/chosen": -407.6484375, "logps/ref_chosen": -246.2626495361328, "logps/ref_rejected": -261.0617980957031, "logps/rejected": -494.29241943359375, "loss": 4.5309, "margin_dpo/margin_mean": 71.8447494506836, "margin_dpo/margin_std": 125.27745819091797, "step": 336 }, { "epoch": 0.7057591623036649, "fcm_dpo/beta": 0.006118074059486389, "fcm_dpo/delta": -0.026346998289227486, "fcm_dpo/margin": 82.11372375488281, "fcm_dpo/q_t": 0.39139044284820557, "grad_norm": 101.76982879638672, "learning_rate": 1.2184254201795363e-07, "logits/chosen": -0.8743100166320801, "logits/rejected": -0.8459343314170837, "logps/chosen": -416.9528503417969, "logps/ref_chosen": -266.9937744140625, "logps/ref_rejected": -253.015625, "logps/rejected": -485.08837890625, "loss": 4.2292, "margin_dpo/margin_mean": 82.11372375488281, "margin_dpo/margin_std": 111.41446685791016, "step": 337 }, { "epoch": 0.7078534031413612, "fcm_dpo/beta": 0.006007419899106026, "fcm_dpo/delta": -0.009809102863073349, "fcm_dpo/margin": 79.27864074707031, "fcm_dpo/q_t": 0.39599183201789856, "grad_norm": 140.00198364257812, "learning_rate": 1.202740798300168e-07, "logits/chosen": -0.9043620228767395, "logits/rejected": -0.8862846493721008, "logps/chosen": -422.32366943359375, "logps/ref_chosen": -276.5925598144531, "logps/ref_rejected": -233.979248046875, "logps/rejected": -458.989013671875, "loss": 4.3198, "margin_dpo/margin_mean": 79.27864074707031, "margin_dpo/margin_std": 114.81280517578125, "step": 338 }, { "epoch": 0.7099476439790576, "fcm_dpo/beta": 0.00596853019669652, "fcm_dpo/delta": -0.022641818970441818, "fcm_dpo/margin": 87.88375854492188, "fcm_dpo/q_t": 0.38571611046791077, "grad_norm": 83.07054138183594, "learning_rate": 1.1871257444948096e-07, "logits/chosen": -0.9114202260971069, "logits/rejected": -0.8988155722618103, "logps/chosen": -456.2464599609375, "logps/ref_chosen": -303.5277404785156, "logps/ref_rejected": -283.11676025390625, "logps/rejected": -523.71923828125, "loss": 4.234, "margin_dpo/margin_mean": 87.88375091552734, "margin_dpo/margin_std": 123.4928970336914, "step": 339 }, { "epoch": 0.7120418848167539, "fcm_dpo/beta": 0.0059357453137636185, "fcm_dpo/delta": 0.029740605503320694, "fcm_dpo/margin": 70.70509338378906, "fcm_dpo/q_t": 0.4103008508682251, "grad_norm": 124.46795654296875, "learning_rate": 1.1715810961514072e-07, "logits/chosen": -0.8569005131721497, "logits/rejected": -0.8529561758041382, "logps/chosen": -426.36224365234375, "logps/ref_chosen": -261.773681640625, "logps/ref_rejected": -259.6319580078125, "logps/rejected": -494.9256286621094, "loss": 4.6878, "margin_dpo/margin_mean": 70.70509338378906, "margin_dpo/margin_std": 138.91661071777344, "step": 340 }, { "epoch": 0.7141361256544503, "fcm_dpo/beta": 0.006035798694938421, "fcm_dpo/delta": -0.00834234245121479, "fcm_dpo/margin": 62.31619644165039, "fcm_dpo/q_t": 0.42033177614212036, "grad_norm": 96.36731719970703, "learning_rate": 1.1561076868822755e-07, "logits/chosen": -0.8783236145973206, "logits/rejected": -0.8495619297027588, "logps/chosen": -501.4289245605469, "logps/ref_chosen": -315.903564453125, "logps/ref_rejected": -308.02392578125, "logps/rejected": -555.865478515625, "loss": 4.8825, "margin_dpo/margin_mean": 62.31619644165039, "margin_dpo/margin_std": 138.55262756347656, "step": 341 }, { "epoch": 0.7162303664921466, "fcm_dpo/beta": 0.006027575582265854, "fcm_dpo/delta": 0.009746946394443512, "fcm_dpo/margin": 73.49700164794922, "fcm_dpo/q_t": 0.401203453540802, "grad_norm": 96.99530792236328, "learning_rate": 1.1407063464793965e-07, "logits/chosen": -0.8633178472518921, "logits/rejected": -0.8587174415588379, "logps/chosen": -425.6961669921875, "logps/ref_chosen": -269.17864990234375, "logps/ref_rejected": -260.8977355957031, "logps/rejected": -490.9122619628906, "loss": 4.3999, "margin_dpo/margin_mean": 73.49700164794922, "margin_dpo/margin_std": 109.63931274414062, "step": 342 }, { "epoch": 0.7183246073298429, "fcm_dpo/beta": 0.0060882181860506535, "fcm_dpo/delta": 0.041927412152290344, "fcm_dpo/margin": 67.39126586914062, "fcm_dpo/q_t": 0.4095425009727478, "grad_norm": 78.4207763671875, "learning_rate": 1.125377900869913e-07, "logits/chosen": -0.8656107783317566, "logits/rejected": -0.8401020169258118, "logps/chosen": -472.4183654785156, "logps/ref_chosen": -310.719970703125, "logps/ref_rejected": -263.5224914550781, "logps/rejected": -492.6121520996094, "loss": 4.4919, "margin_dpo/margin_mean": 67.3912582397461, "margin_dpo/margin_std": 109.82903289794922, "step": 343 }, { "epoch": 0.7204188481675393, "fcm_dpo/beta": 0.006175879389047623, "fcm_dpo/delta": -0.02153255045413971, "fcm_dpo/margin": 69.64826202392578, "fcm_dpo/q_t": 0.40481624007225037, "grad_norm": 87.6580581665039, "learning_rate": 1.110123172071844e-07, "logits/chosen": -0.8676539063453674, "logits/rejected": -0.8484990000724792, "logps/chosen": -464.42755126953125, "logps/ref_chosen": -301.7999267578125, "logps/ref_rejected": -257.9061584472656, "logps/rejected": -490.1820373535156, "loss": 4.5216, "margin_dpo/margin_mean": 69.64826202392578, "margin_dpo/margin_std": 116.88276672363281, "step": 344 }, { "epoch": 0.7225130890052356, "fcm_dpo/beta": 0.006050711497664452, "fcm_dpo/delta": -0.012871744111180305, "fcm_dpo/margin": 62.111324310302734, "fcm_dpo/q_t": 0.41702836751937866, "grad_norm": 118.44066619873047, "learning_rate": 1.09494297815e-07, "logits/chosen": -0.87298983335495, "logits/rejected": -0.8727879524230957, "logps/chosen": -442.3889465332031, "logps/ref_chosen": -283.0184326171875, "logps/ref_rejected": -266.8457336425781, "logps/rejected": -488.3275451660156, "loss": 4.5952, "margin_dpo/margin_mean": 62.111324310302734, "margin_dpo/margin_std": 101.83731842041016, "step": 345 }, { "epoch": 0.724607329842932, "fcm_dpo/beta": 0.0059198313392698765, "fcm_dpo/delta": -0.045941807329654694, "fcm_dpo/margin": 80.39420318603516, "fcm_dpo/q_t": 0.3944595456123352, "grad_norm": 89.89678955078125, "learning_rate": 1.0798381331721107e-07, "logits/chosen": -0.9523677825927734, "logits/rejected": -0.9006531238555908, "logps/chosen": -438.8220520019531, "logps/ref_chosen": -268.44122314453125, "logps/ref_rejected": -227.8225860595703, "logps/rejected": -478.5976257324219, "loss": 4.3652, "margin_dpo/margin_mean": 80.39420318603516, "margin_dpo/margin_std": 115.65848541259766, "step": 346 }, { "epoch": 0.7267015706806282, "fcm_dpo/beta": 0.0058544836938381195, "fcm_dpo/delta": 0.014904415234923363, "fcm_dpo/margin": 64.3581314086914, "fcm_dpo/q_t": 0.41530972719192505, "grad_norm": 110.02278137207031, "learning_rate": 1.0648094471651722e-07, "logits/chosen": -0.8056429028511047, "logits/rejected": -0.8316211104393005, "logps/chosen": -434.23370361328125, "logps/ref_chosen": -273.80743408203125, "logps/ref_rejected": -243.77993774414062, "logps/rejected": -468.5643310546875, "loss": 4.5818, "margin_dpo/margin_mean": 64.3581314086914, "margin_dpo/margin_std": 106.8345947265625, "step": 347 }, { "epoch": 0.7287958115183246, "fcm_dpo/beta": 0.005897823721170425, "fcm_dpo/delta": 0.0033234686125069857, "fcm_dpo/margin": 60.73371124267578, "fcm_dpo/q_t": 0.42289185523986816, "grad_norm": 77.89971923828125, "learning_rate": 1.0498577260720048e-07, "logits/chosen": -0.8974190950393677, "logits/rejected": -0.8794293403625488, "logps/chosen": -443.1033935546875, "logps/ref_chosen": -285.64141845703125, "logps/ref_rejected": -265.6270446777344, "logps/rejected": -483.8227233886719, "loss": 4.6911, "margin_dpo/margin_mean": 60.73371505737305, "margin_dpo/margin_std": 115.34396362304688, "step": 348 }, { "epoch": 0.7308900523560209, "fcm_dpo/beta": 0.00578670809045434, "fcm_dpo/delta": -0.043646618723869324, "fcm_dpo/margin": 87.91434478759766, "fcm_dpo/q_t": 0.387836754322052, "grad_norm": 110.71875, "learning_rate": 1.0349837717080347e-07, "logits/chosen": -0.8430861234664917, "logits/rejected": -0.8314425349235535, "logps/chosen": -489.0180969238281, "logps/ref_chosen": -328.3175048828125, "logps/ref_rejected": -292.37872314453125, "logps/rejected": -540.99365234375, "loss": 4.2708, "margin_dpo/margin_mean": 87.91434478759766, "margin_dpo/margin_std": 124.25060272216797, "step": 349 }, { "epoch": 0.7329842931937173, "fcm_dpo/beta": 0.005712728016078472, "fcm_dpo/delta": 0.016727229580283165, "fcm_dpo/margin": 73.64863586425781, "fcm_dpo/q_t": 0.4075002670288086, "grad_norm": 79.8434829711914, "learning_rate": 1.0201883817182949e-07, "logits/chosen": -0.8432740569114685, "logits/rejected": -0.8538658618927002, "logps/chosen": -465.37115478515625, "logps/ref_chosen": -292.8046569824219, "logps/ref_rejected": -250.35504150390625, "logps/rejected": -496.5701904296875, "loss": 4.5371, "margin_dpo/margin_mean": 73.64864349365234, "margin_dpo/margin_std": 125.77432250976562, "step": 350 }, { "epoch": 0.7350785340314137, "fcm_dpo/beta": 0.0057805683463811874, "fcm_dpo/delta": 0.0012550692772492766, "fcm_dpo/margin": 47.94782257080078, "fcm_dpo/q_t": 0.43993788957595825, "grad_norm": 80.92696380615234, "learning_rate": 1.0054723495346482e-07, "logits/chosen": -0.9094070196151733, "logits/rejected": -0.8915246725082397, "logps/chosen": -473.46112060546875, "logps/ref_chosen": -311.8890380859375, "logps/ref_rejected": -263.59033203125, "logps/rejected": -473.1102294921875, "loss": 5.0341, "margin_dpo/margin_mean": 47.94782257080078, "margin_dpo/margin_std": 125.9432601928711, "step": 351 }, { "epoch": 0.7371727748691099, "fcm_dpo/beta": 0.005751358810812235, "fcm_dpo/delta": -0.01616637036204338, "fcm_dpo/margin": 87.51070404052734, "fcm_dpo/q_t": 0.3887549042701721, "grad_norm": 91.40033721923828, "learning_rate": 9.908364643332398e-08, "logits/chosen": -0.8355797529220581, "logits/rejected": -0.8050141334533691, "logps/chosen": -406.9665222167969, "logps/ref_chosen": -254.9078826904297, "logps/ref_rejected": -257.1688232421875, "logps/rejected": -496.73822021484375, "loss": 4.2611, "margin_dpo/margin_mean": 87.51070404052734, "margin_dpo/margin_std": 120.50152587890625, "step": 352 }, { "epoch": 0.7392670157068063, "fcm_dpo/beta": 0.005663630552589893, "fcm_dpo/delta": -0.017772413790225983, "fcm_dpo/margin": 75.58662414550781, "fcm_dpo/q_t": 0.4062352180480957, "grad_norm": 95.64911651611328, "learning_rate": 9.76281510992176e-08, "logits/chosen": -0.8585754036903381, "logits/rejected": -0.8523797988891602, "logps/chosen": -433.67724609375, "logps/ref_chosen": -270.3760681152344, "logps/ref_rejected": -264.65234375, "logps/rejected": -503.5401306152344, "loss": 4.4941, "margin_dpo/margin_mean": 75.58662414550781, "margin_dpo/margin_std": 123.0322265625, "step": 353 }, { "epoch": 0.7413612565445026, "fcm_dpo/beta": 0.005698430351912975, "fcm_dpo/delta": 0.01707335188984871, "fcm_dpo/margin": 56.518348693847656, "fcm_dpo/q_t": 0.4294819235801697, "grad_norm": 100.97528076171875, "learning_rate": 9.618082700494318e-08, "logits/chosen": -0.851939857006073, "logits/rejected": -0.8844606876373291, "logps/chosen": -422.6122741699219, "logps/ref_chosen": -257.6485595703125, "logps/ref_rejected": -246.94203186035156, "logps/rejected": -468.4241027832031, "loss": 4.8345, "margin_dpo/margin_mean": 56.518348693847656, "margin_dpo/margin_std": 121.96803283691406, "step": 354 }, { "epoch": 0.743455497382199, "fcm_dpo/beta": 0.005763496737927198, "fcm_dpo/delta": 0.02075032889842987, "fcm_dpo/margin": 81.39944458007812, "fcm_dpo/q_t": 0.3979400098323822, "grad_norm": 102.7159423828125, "learning_rate": 9.474175176609956e-08, "logits/chosen": -0.9034559726715088, "logits/rejected": -0.9011656641960144, "logps/chosen": -457.8494567871094, "logps/ref_chosen": -293.7086181640625, "logps/ref_rejected": -275.7286682128906, "logps/rejected": -521.2689208984375, "loss": 4.467, "margin_dpo/margin_mean": 81.39944458007812, "margin_dpo/margin_std": 131.8922882080078, "step": 355 }, { "epoch": 0.7455497382198953, "fcm_dpo/beta": 0.005846232175827026, "fcm_dpo/delta": 0.005677876062691212, "fcm_dpo/margin": 55.80099105834961, "fcm_dpo/q_t": 0.426545649766922, "grad_norm": 99.32606506347656, "learning_rate": 9.331100255592436e-08, "logits/chosen": -0.8223292231559753, "logits/rejected": -0.8494030833244324, "logps/chosen": -359.28802490234375, "logps/ref_chosen": -204.25550842285156, "logps/ref_rejected": -213.467529296875, "logps/rejected": -424.301025390625, "loss": 4.7212, "margin_dpo/margin_mean": 55.80099105834961, "margin_dpo/margin_std": 101.92318725585938, "step": 356 }, { "epoch": 0.7476439790575916, "fcm_dpo/beta": 0.005853200796991587, "fcm_dpo/delta": 0.01576152630150318, "fcm_dpo/margin": 78.51569366455078, "fcm_dpo/q_t": 0.40189990401268005, "grad_norm": 88.03360748291016, "learning_rate": 9.18886561011557e-08, "logits/chosen": -0.7967926263809204, "logits/rejected": -0.7948128581047058, "logps/chosen": -430.37744140625, "logps/ref_chosen": -266.3705749511719, "logps/ref_rejected": -239.04490661621094, "logps/rejected": -481.5674743652344, "loss": 4.4341, "margin_dpo/margin_mean": 78.51569366455078, "margin_dpo/margin_std": 123.16831970214844, "step": 357 }, { "epoch": 0.749738219895288, "fcm_dpo/beta": 0.005892930086702108, "fcm_dpo/delta": -0.016687767580151558, "fcm_dpo/margin": 88.47492980957031, "fcm_dpo/q_t": 0.3852734863758087, "grad_norm": 81.83648681640625, "learning_rate": 9.047478867791731e-08, "logits/chosen": -0.8925428986549377, "logits/rejected": -0.8706269860267639, "logps/chosen": -445.3536376953125, "logps/ref_chosen": -299.1474609375, "logps/ref_rejected": -257.2531433105469, "logps/rejected": -491.93426513671875, "loss": 4.2216, "margin_dpo/margin_mean": 88.47492980957031, "margin_dpo/margin_std": 120.04918670654297, "step": 358 }, { "epoch": 0.7518324607329843, "fcm_dpo/beta": 0.005809912458062172, "fcm_dpo/delta": -0.005308025516569614, "fcm_dpo/margin": 76.4288101196289, "fcm_dpo/q_t": 0.4017740488052368, "grad_norm": 101.37205505371094, "learning_rate": 8.906947610762825e-08, "logits/chosen": -0.8550885915756226, "logits/rejected": -0.8663427233695984, "logps/chosen": -455.0647888183594, "logps/ref_chosen": -302.99786376953125, "logps/ref_rejected": -260.4137268066406, "logps/rejected": -488.90948486328125, "loss": 4.3942, "margin_dpo/margin_mean": 76.42880249023438, "margin_dpo/margin_std": 113.6546401977539, "step": 359 }, { "epoch": 0.7539267015706806, "fcm_dpo/beta": 0.005714688450098038, "fcm_dpo/delta": -0.03394751250743866, "fcm_dpo/margin": 67.75115966796875, "fcm_dpo/q_t": 0.41331568360328674, "grad_norm": 96.14535522460938, "learning_rate": 8.76727937529367e-08, "logits/chosen": -0.8793231248855591, "logits/rejected": -0.8659530878067017, "logps/chosen": -471.56915283203125, "logps/ref_chosen": -309.6114501953125, "logps/ref_rejected": -256.64031982421875, "logps/rejected": -486.34912109375, "loss": 4.6733, "margin_dpo/margin_mean": 67.75115966796875, "margin_dpo/margin_std": 122.61837005615234, "step": 360 }, { "epoch": 0.7560209424083769, "fcm_dpo/beta": 0.005799026228487492, "fcm_dpo/delta": 0.03264398127794266, "fcm_dpo/margin": 92.61182403564453, "fcm_dpo/q_t": 0.38507190346717834, "grad_norm": 77.22454071044922, "learning_rate": 8.628481651367875e-08, "logits/chosen": -0.8238570690155029, "logits/rejected": -0.803720235824585, "logps/chosen": -393.7602233886719, "logps/ref_chosen": -263.3797607421875, "logps/ref_rejected": -271.18157958984375, "logps/rejected": -494.173828125, "loss": 4.2488, "margin_dpo/margin_mean": 92.61182403564453, "margin_dpo/margin_std": 135.1205291748047, "step": 361 }, { "epoch": 0.7581151832460733, "fcm_dpo/beta": 0.005834028124809265, "fcm_dpo/delta": 0.024925608187913895, "fcm_dpo/margin": 76.73521423339844, "fcm_dpo/q_t": 0.40017956495285034, "grad_norm": 77.04792022705078, "learning_rate": 8.490561882286135e-08, "logits/chosen": -0.8460214138031006, "logits/rejected": -0.8339633345603943, "logps/chosen": -447.5461730957031, "logps/ref_chosen": -303.2583923339844, "logps/ref_rejected": -243.22891235351562, "logps/rejected": -464.2519226074219, "loss": 4.3232, "margin_dpo/margin_mean": 76.73521423339844, "margin_dpo/margin_std": 105.83525848388672, "step": 362 }, { "epoch": 0.7602094240837697, "fcm_dpo/beta": 0.005913248751312494, "fcm_dpo/delta": -0.024684742093086243, "fcm_dpo/margin": 77.5684814453125, "fcm_dpo/q_t": 0.4012209177017212, "grad_norm": 112.09042358398438, "learning_rate": 8.353527464267104e-08, "logits/chosen": -0.8637977838516235, "logits/rejected": -0.8185281157493591, "logps/chosen": -452.3515319824219, "logps/ref_chosen": -303.34722900390625, "logps/ref_rejected": -262.05419921875, "logps/rejected": -488.62701416015625, "loss": 4.4309, "margin_dpo/margin_mean": 77.56847381591797, "margin_dpo/margin_std": 123.60404205322266, "step": 363 }, { "epoch": 0.762303664921466, "fcm_dpo/beta": 0.0058737266808748245, "fcm_dpo/delta": 0.01749351993203163, "fcm_dpo/margin": 67.9317855834961, "fcm_dpo/q_t": 0.4149549603462219, "grad_norm": 70.62501525878906, "learning_rate": 8.217385746050742e-08, "logits/chosen": -0.8313828706741333, "logits/rejected": -0.8386605978012085, "logps/chosen": -457.17083740234375, "logps/ref_chosen": -285.54376220703125, "logps/ref_rejected": -284.84619140625, "logps/rejected": -524.405029296875, "loss": 4.6634, "margin_dpo/margin_mean": 67.93179321289062, "margin_dpo/margin_std": 130.92474365234375, "step": 364 }, { "epoch": 0.7643979057591623, "fcm_dpo/beta": 0.005943778902292252, "fcm_dpo/delta": 0.03518182039260864, "fcm_dpo/margin": 68.78251647949219, "fcm_dpo/q_t": 0.4121231138706207, "grad_norm": 114.49729919433594, "learning_rate": 8.082144028504231e-08, "logits/chosen": -0.8508257865905762, "logits/rejected": -0.8534409403800964, "logps/chosen": -434.00701904296875, "logps/ref_chosen": -274.7878112792969, "logps/ref_rejected": -256.5738220214844, "logps/rejected": -484.5755310058594, "loss": 4.575, "margin_dpo/margin_mean": 68.78252410888672, "margin_dpo/margin_std": 120.49612426757812, "step": 365 }, { "epoch": 0.7664921465968586, "fcm_dpo/beta": 0.006143275648355484, "fcm_dpo/delta": 0.024667471647262573, "fcm_dpo/margin": 77.7815933227539, "fcm_dpo/q_t": 0.3957550525665283, "grad_norm": 73.5907974243164, "learning_rate": 7.947809564230445e-08, "logits/chosen": -0.8157333731651306, "logits/rejected": -0.8252131342887878, "logps/chosen": -433.6657409667969, "logps/ref_chosen": -286.6496276855469, "logps/ref_rejected": -251.97140502929688, "logps/rejected": -476.7691650390625, "loss": 4.3843, "margin_dpo/margin_mean": 77.7815933227539, "margin_dpo/margin_std": 122.67338562011719, "step": 366 }, { "epoch": 0.768586387434555, "fcm_dpo/beta": 0.006196199916303158, "fcm_dpo/delta": 0.016968997195363045, "fcm_dpo/margin": 81.11161041259766, "fcm_dpo/q_t": 0.391402930021286, "grad_norm": 110.20975494384766, "learning_rate": 7.814389557179016e-08, "logits/chosen": -0.8344327211380005, "logits/rejected": -0.8122835159301758, "logps/chosen": -446.1802673339844, "logps/ref_chosen": -301.9449768066406, "logps/ref_rejected": -265.5677185058594, "logps/rejected": -490.9145812988281, "loss": 4.2666, "margin_dpo/margin_mean": 81.11161041259766, "margin_dpo/margin_std": 115.35000610351562, "step": 367 }, { "epoch": 0.7706806282722513, "fcm_dpo/beta": 0.006140113342553377, "fcm_dpo/delta": -0.05516147240996361, "fcm_dpo/margin": 98.54963684082031, "fcm_dpo/q_t": 0.3665163218975067, "grad_norm": 78.63714599609375, "learning_rate": 7.681891162260015e-08, "logits/chosen": -0.8067573308944702, "logits/rejected": -0.8146823644638062, "logps/chosen": -430.4259948730469, "logps/ref_chosen": -294.62652587890625, "logps/ref_rejected": -258.7628479003906, "logps/rejected": -493.1119689941406, "loss": 3.8877, "margin_dpo/margin_mean": 98.54963684082031, "margin_dpo/margin_std": 106.2259521484375, "step": 368 }, { "epoch": 0.7727748691099476, "fcm_dpo/beta": 0.005970560014247894, "fcm_dpo/delta": -0.0030122532043606043, "fcm_dpo/margin": 69.67950439453125, "fcm_dpo/q_t": 0.4068659245967865, "grad_norm": 79.01779174804688, "learning_rate": 7.550321484960251e-08, "logits/chosen": -0.8858903050422668, "logits/rejected": -0.8695452213287354, "logps/chosen": -428.8231506347656, "logps/ref_chosen": -282.5057373046875, "logps/ref_rejected": -266.41607666015625, "logps/rejected": -482.4130554199219, "loss": 4.4581, "margin_dpo/margin_mean": 69.67951202392578, "margin_dpo/margin_std": 107.40840911865234, "step": 369 }, { "epoch": 0.774869109947644, "fcm_dpo/beta": 0.005926723126322031, "fcm_dpo/delta": -0.02430885285139084, "fcm_dpo/margin": 87.5254898071289, "fcm_dpo/q_t": 0.3869887888431549, "grad_norm": 82.53268432617188, "learning_rate": 7.419687580962222e-08, "logits/chosen": -0.8693802952766418, "logits/rejected": -0.8931019306182861, "logps/chosen": -385.6020202636719, "logps/ref_chosen": -251.00640869140625, "logps/ref_rejected": -238.12542724609375, "logps/rejected": -460.2464599609375, "loss": 4.2537, "margin_dpo/margin_mean": 87.52548217773438, "margin_dpo/margin_std": 125.15676879882812, "step": 370 }, { "epoch": 0.7769633507853403, "fcm_dpo/beta": 0.005934232845902443, "fcm_dpo/delta": 0.0160065945237875, "fcm_dpo/margin": 63.61900329589844, "fcm_dpo/q_t": 0.4160739779472351, "grad_norm": 117.99787139892578, "learning_rate": 7.289996455765748e-08, "logits/chosen": -0.822093665599823, "logits/rejected": -0.8111148476600647, "logps/chosen": -452.1481018066406, "logps/ref_chosen": -296.6591491699219, "logps/ref_rejected": -251.14675903320312, "logps/rejected": -470.25469970703125, "loss": 4.5981, "margin_dpo/margin_mean": 63.61901092529297, "margin_dpo/margin_std": 109.16148376464844, "step": 371 }, { "epoch": 0.7790575916230367, "fcm_dpo/beta": 0.005838276818394661, "fcm_dpo/delta": -0.018924139440059662, "fcm_dpo/margin": 88.01292419433594, "fcm_dpo/q_t": 0.3878030478954315, "grad_norm": 87.69620513916016, "learning_rate": 7.161255064312283e-08, "logits/chosen": -0.7913342714309692, "logits/rejected": -0.7832822799682617, "logps/chosen": -480.2196044921875, "logps/ref_chosen": -331.3714599609375, "logps/ref_rejected": -285.56805419921875, "logps/rejected": -522.4291381835938, "loss": 4.2501, "margin_dpo/margin_mean": 88.01290893554688, "margin_dpo/margin_std": 120.81631469726562, "step": 372 }, { "epoch": 0.7811518324607329, "fcm_dpo/beta": 0.005902654957026243, "fcm_dpo/delta": 0.03373875096440315, "fcm_dpo/margin": 82.58007049560547, "fcm_dpo/q_t": 0.3900327980518341, "grad_norm": 96.37030792236328, "learning_rate": 7.033470310611945e-08, "logits/chosen": -0.8866556286811829, "logits/rejected": -0.8576078414916992, "logps/chosen": -457.91162109375, "logps/ref_chosen": -321.9429931640625, "logps/ref_rejected": -271.2288513183594, "logps/rejected": -489.7775573730469, "loss": 4.2322, "margin_dpo/margin_mean": 82.58007049560547, "margin_dpo/margin_std": 107.1668930053711, "step": 373 }, { "epoch": 0.7832460732984293, "fcm_dpo/beta": 0.005965717602521181, "fcm_dpo/delta": -0.008612215518951416, "fcm_dpo/margin": 69.785400390625, "fcm_dpo/q_t": 0.40951642394065857, "grad_norm": 70.30329132080078, "learning_rate": 6.906649047373245e-08, "logits/chosen": -0.8783119916915894, "logits/rejected": -0.8763049840927124, "logps/chosen": -463.5076599121094, "logps/ref_chosen": -318.8375244140625, "logps/ref_rejected": -285.1805419921875, "logps/rejected": -499.6360168457031, "loss": 4.5067, "margin_dpo/margin_mean": 69.78540802001953, "margin_dpo/margin_std": 115.1856689453125, "step": 374 }, { "epoch": 0.7853403141361257, "fcm_dpo/beta": 0.0060208821669220924, "fcm_dpo/delta": 0.0241708941757679, "fcm_dpo/margin": 61.874324798583984, "fcm_dpo/q_t": 0.4182923436164856, "grad_norm": 84.8660659790039, "learning_rate": 6.780798075635675e-08, "logits/chosen": -0.8697792887687683, "logits/rejected": -0.8496103286743164, "logps/chosen": -464.14794921875, "logps/ref_chosen": -314.87579345703125, "logps/ref_rejected": -259.1965026855469, "logps/rejected": -470.3429870605469, "loss": 4.6508, "margin_dpo/margin_mean": 61.87432098388672, "margin_dpo/margin_std": 113.47924041748047, "step": 375 }, { "epoch": 0.787434554973822, "fcm_dpo/beta": 0.006095539778470993, "fcm_dpo/delta": -0.01614905148744583, "fcm_dpo/margin": 75.52915954589844, "fcm_dpo/q_t": 0.4003352224826813, "grad_norm": 118.24581146240234, "learning_rate": 6.655924144404906e-08, "logits/chosen": -0.8553508520126343, "logits/rejected": -0.8609263896942139, "logps/chosen": -435.73291015625, "logps/ref_chosen": -287.6732482910156, "logps/ref_rejected": -256.6697082519531, "logps/rejected": -480.258544921875, "loss": 4.4422, "margin_dpo/margin_mean": 75.5291519165039, "margin_dpo/margin_std": 121.42930603027344, "step": 376 }, { "epoch": 0.7895287958115184, "fcm_dpo/beta": 0.0060347155667841434, "fcm_dpo/delta": 0.02518528327345848, "fcm_dpo/margin": 52.56871795654297, "fcm_dpo/q_t": 0.4289829730987549, "grad_norm": 77.70852661132812, "learning_rate": 6.532033950290885e-08, "logits/chosen": -0.8347393274307251, "logits/rejected": -0.8331432342529297, "logps/chosen": -464.60101318359375, "logps/ref_chosen": -305.261474609375, "logps/ref_rejected": -271.8887023925781, "logps/rejected": -483.7969970703125, "loss": 4.8715, "margin_dpo/margin_mean": 52.5687141418457, "margin_dpo/margin_std": 117.13998413085938, "step": 377 }, { "epoch": 0.7916230366492146, "fcm_dpo/beta": 0.006152212154120207, "fcm_dpo/delta": 0.01931355521082878, "fcm_dpo/margin": 62.3286247253418, "fcm_dpo/q_t": 0.4168659448623657, "grad_norm": 114.11940002441406, "learning_rate": 6.409134137148736e-08, "logits/chosen": -0.8285514116287231, "logits/rejected": -0.8158466219902039, "logps/chosen": -428.2237243652344, "logps/ref_chosen": -281.5295715332031, "logps/ref_rejected": -296.980224609375, "logps/rejected": -506.0030517578125, "loss": 4.6407, "margin_dpo/margin_mean": 62.3286247253418, "margin_dpo/margin_std": 114.72845458984375, "step": 378 }, { "epoch": 0.793717277486911, "fcm_dpo/beta": 0.006289042532444, "fcm_dpo/delta": 0.027656404301524162, "fcm_dpo/margin": 69.89701080322266, "fcm_dpo/q_t": 0.4024941325187683, "grad_norm": 115.24781036376953, "learning_rate": 6.28723129572247e-08, "logits/chosen": -0.8913782238960266, "logits/rejected": -0.8727085590362549, "logps/chosen": -402.0198059082031, "logps/ref_chosen": -265.0807800292969, "logps/ref_rejected": -230.58932495117188, "logps/rejected": -437.42535400390625, "loss": 4.4366, "margin_dpo/margin_mean": 69.89701080322266, "margin_dpo/margin_std": 110.67437744140625, "step": 379 }, { "epoch": 0.7958115183246073, "fcm_dpo/beta": 0.0064195310696959496, "fcm_dpo/delta": 0.005187598988413811, "fcm_dpo/margin": 72.04703521728516, "fcm_dpo/q_t": 0.40023064613342285, "grad_norm": 91.46406555175781, "learning_rate": 6.166331963291519e-08, "logits/chosen": -0.8595871329307556, "logits/rejected": -0.8388068079948425, "logps/chosen": -452.0315856933594, "logps/ref_chosen": -305.90838623046875, "logps/ref_rejected": -286.5906677246094, "logps/rejected": -504.7608337402344, "loss": 4.4181, "margin_dpo/margin_mean": 72.04704284667969, "margin_dpo/margin_std": 111.72415161132812, "step": 380 }, { "epoch": 0.7979057591623037, "fcm_dpo/beta": 0.006307562813162804, "fcm_dpo/delta": -0.03759654238820076, "fcm_dpo/margin": 80.26667785644531, "fcm_dpo/q_t": 0.39190673828125, "grad_norm": 93.41769409179688, "learning_rate": 6.046442623320145e-08, "logits/chosen": -0.8274192810058594, "logits/rejected": -0.792305052280426, "logps/chosen": -390.3536376953125, "logps/ref_chosen": -248.75901794433594, "logps/ref_rejected": -261.37420654296875, "logps/rejected": -483.2354431152344, "loss": 4.3242, "margin_dpo/margin_mean": 80.26667785644531, "margin_dpo/margin_std": 119.75177764892578, "step": 381 }, { "epoch": 0.8, "fcm_dpo/beta": 0.006153635680675507, "fcm_dpo/delta": -0.02411348558962345, "fcm_dpo/margin": 90.74658203125, "fcm_dpo/q_t": 0.3765316903591156, "grad_norm": 71.57942962646484, "learning_rate": 5.9275697051098275e-08, "logits/chosen": -0.8713455200195312, "logits/rejected": -0.8670474290847778, "logps/chosen": -423.5090026855469, "logps/ref_chosen": -289.2114562988281, "logps/ref_rejected": -278.45751953125, "logps/rejected": -503.5016784667969, "loss": 4.0686, "margin_dpo/margin_mean": 90.74658203125, "margin_dpo/margin_std": 110.36607360839844, "step": 382 }, { "epoch": 0.8020942408376963, "fcm_dpo/beta": 0.006209728308022022, "fcm_dpo/delta": 0.009757298976182938, "fcm_dpo/margin": 81.08769989013672, "fcm_dpo/q_t": 0.3904225826263428, "grad_norm": 114.92029571533203, "learning_rate": 5.809719583454414e-08, "logits/chosen": -0.8584976196289062, "logits/rejected": -0.8407590389251709, "logps/chosen": -407.2940673828125, "logps/ref_chosen": -273.630859375, "logps/ref_rejected": -261.44024658203125, "logps/rejected": -476.191162109375, "loss": 4.2888, "margin_dpo/margin_mean": 81.08769989013672, "margin_dpo/margin_std": 116.51924133300781, "step": 383 }, { "epoch": 0.8041884816753927, "fcm_dpo/beta": 0.006165705155581236, "fcm_dpo/delta": 0.015231862664222717, "fcm_dpo/margin": 67.98896026611328, "fcm_dpo/q_t": 0.40863093733787537, "grad_norm": 102.76622772216797, "learning_rate": 5.6928985782982524e-08, "logits/chosen": -0.8647336959838867, "logits/rejected": -0.8628965616226196, "logps/chosen": -417.2807922363281, "logps/ref_chosen": -274.5699462890625, "logps/ref_rejected": -285.8253479003906, "logps/rejected": -496.5251159667969, "loss": 4.514, "margin_dpo/margin_mean": 67.98896026611328, "margin_dpo/margin_std": 113.87785339355469, "step": 384 }, { "epoch": 0.806282722513089, "fcm_dpo/beta": 0.0063270702958106995, "fcm_dpo/delta": 0.029631979763507843, "fcm_dpo/margin": 67.26148223876953, "fcm_dpo/q_t": 0.4064520299434662, "grad_norm": 106.2359390258789, "learning_rate": 5.57711295439732e-08, "logits/chosen": -0.8074496984481812, "logits/rejected": -0.8046512603759766, "logps/chosen": -428.5892639160156, "logps/ref_chosen": -284.150634765625, "logps/ref_rejected": -244.87921142578125, "logps/rejected": -456.5793762207031, "loss": 4.4353, "margin_dpo/margin_mean": 67.26148223876953, "margin_dpo/margin_std": 103.14485931396484, "step": 385 }, { "epoch": 0.8083769633507853, "fcm_dpo/beta": 0.006050161086022854, "fcm_dpo/delta": -0.05498097091913223, "fcm_dpo/margin": 88.26424407958984, "fcm_dpo/q_t": 0.3840131163597107, "grad_norm": 84.46400451660156, "learning_rate": 5.4623689209832484e-08, "logits/chosen": -0.7946534752845764, "logits/rejected": -0.7936133742332458, "logps/chosen": -453.0300598144531, "logps/ref_chosen": -320.1762390136719, "logps/ref_rejected": -302.05023193359375, "logps/rejected": -523.1682739257812, "loss": 4.1745, "margin_dpo/margin_mean": 88.26424407958984, "margin_dpo/margin_std": 112.27763366699219, "step": 386 }, { "epoch": 0.8104712041884817, "fcm_dpo/beta": 0.006075785029679537, "fcm_dpo/delta": -0.00033976510167121887, "fcm_dpo/margin": 71.9666976928711, "fcm_dpo/q_t": 0.4021925926208496, "grad_norm": 81.50077056884766, "learning_rate": 5.3486726314303175e-08, "logits/chosen": -0.843817412853241, "logits/rejected": -0.8498228788375854, "logps/chosen": -417.2613220214844, "logps/ref_chosen": -272.2801513671875, "logps/ref_rejected": -265.1615905761719, "logps/rejected": -482.10943603515625, "loss": 4.4821, "margin_dpo/margin_mean": 71.9666976928711, "margin_dpo/margin_std": 115.36253356933594, "step": 387 }, { "epoch": 0.812565445026178, "fcm_dpo/beta": 0.0059943245723843575, "fcm_dpo/delta": -0.017121536657214165, "fcm_dpo/margin": 49.354312896728516, "fcm_dpo/q_t": 0.43480098247528076, "grad_norm": 98.12901306152344, "learning_rate": 5.2360301829254745e-08, "logits/chosen": -0.827545166015625, "logits/rejected": -0.8192716240882874, "logps/chosen": -427.61083984375, "logps/ref_chosen": -272.5313415527344, "logps/ref_rejected": -239.55735778808594, "logps/rejected": -443.9912414550781, "loss": 4.9501, "margin_dpo/margin_mean": 49.354312896728516, "margin_dpo/margin_std": 115.72721862792969, "step": 388 }, { "epoch": 0.8146596858638744, "fcm_dpo/beta": 0.005862588062882423, "fcm_dpo/delta": -0.04299803823232651, "fcm_dpo/margin": 72.5375747680664, "fcm_dpo/q_t": 0.4077499210834503, "grad_norm": 74.54901123046875, "learning_rate": 5.1244476161413806e-08, "logits/chosen": -0.8496405482292175, "logits/rejected": -0.8481156229972839, "logps/chosen": -430.34722900390625, "logps/ref_chosen": -281.0892639160156, "logps/ref_rejected": -246.50045776367188, "logps/rejected": -468.2959899902344, "loss": 4.5301, "margin_dpo/margin_mean": 72.53758239746094, "margin_dpo/margin_std": 120.5777816772461, "step": 389 }, { "epoch": 0.8167539267015707, "fcm_dpo/beta": 0.005800171289592981, "fcm_dpo/delta": -0.005835860967636108, "fcm_dpo/margin": 74.20028686523438, "fcm_dpo/q_t": 0.4045044183731079, "grad_norm": 61.78060531616211, "learning_rate": 5.013930914912476e-08, "logits/chosen": -0.8756927847862244, "logits/rejected": -0.8838696479797363, "logps/chosen": -430.1648864746094, "logps/ref_chosen": -283.98748779296875, "logps/ref_rejected": -283.465087890625, "logps/rejected": -503.8427734375, "loss": 4.429, "margin_dpo/margin_mean": 74.20028686523438, "margin_dpo/margin_std": 112.28407287597656, "step": 390 }, { "epoch": 0.818848167539267, "fcm_dpo/beta": 0.005761809181421995, "fcm_dpo/delta": -0.0012128003872931004, "fcm_dpo/margin": 78.95933532714844, "fcm_dpo/q_t": 0.3996528089046478, "grad_norm": 95.56779479980469, "learning_rate": 4.904486005914027e-08, "logits/chosen": -0.8164564967155457, "logits/rejected": -0.8071346282958984, "logps/chosen": -439.491455078125, "logps/ref_chosen": -283.86138916015625, "logps/ref_rejected": -263.5093688964844, "logps/rejected": -498.0987854003906, "loss": 4.3854, "margin_dpo/margin_mean": 78.95933532714844, "margin_dpo/margin_std": 116.82786560058594, "step": 391 }, { "epoch": 0.8209424083769633, "fcm_dpo/beta": 0.005556129384785891, "fcm_dpo/delta": -0.04375196620821953, "fcm_dpo/margin": 92.68234252929688, "fcm_dpo/q_t": 0.3841486871242523, "grad_norm": 77.97855377197266, "learning_rate": 4.796118758344353e-08, "logits/chosen": -0.8111199140548706, "logits/rejected": -0.8326938152313232, "logps/chosen": -452.4772033691406, "logps/ref_chosen": -310.070068359375, "logps/ref_rejected": -252.89817810058594, "logps/rejected": -487.9876403808594, "loss": 4.1345, "margin_dpo/margin_mean": 92.68234252929688, "margin_dpo/margin_std": 109.37548828125, "step": 392 }, { "epoch": 0.8230366492146597, "fcm_dpo/beta": 0.005607847589999437, "fcm_dpo/delta": 0.028703555464744568, "fcm_dpo/margin": 70.10714721679688, "fcm_dpo/q_t": 0.4122408926486969, "grad_norm": 52.80439376831055, "learning_rate": 4.688834983610082e-08, "logits/chosen": -0.8504996299743652, "logits/rejected": -0.8361021876335144, "logps/chosen": -427.104248046875, "logps/ref_chosen": -286.7156677246094, "logps/ref_rejected": -230.00357055664062, "logps/rejected": -440.49932861328125, "loss": 4.5192, "margin_dpo/margin_mean": 70.10714721679688, "margin_dpo/margin_std": 114.96930694580078, "step": 393 }, { "epoch": 0.8251308900523561, "fcm_dpo/beta": 0.00568231288343668, "fcm_dpo/delta": 0.0005726986564695835, "fcm_dpo/margin": 64.0665283203125, "fcm_dpo/q_t": 0.42188864946365356, "grad_norm": 75.28536987304688, "learning_rate": 4.582640435014459e-08, "logits/chosen": -0.8926426768302917, "logits/rejected": -0.8892621397972107, "logps/chosen": -470.1836242675781, "logps/ref_chosen": -325.9934387207031, "logps/ref_rejected": -317.42706298828125, "logps/rejected": -525.6837158203125, "loss": 4.6822, "margin_dpo/margin_mean": 64.0665283203125, "margin_dpo/margin_std": 119.64508819580078, "step": 394 }, { "epoch": 0.8272251308900523, "fcm_dpo/beta": 0.0057205078192055225, "fcm_dpo/delta": 0.006010397337377071, "fcm_dpo/margin": 84.71833801269531, "fcm_dpo/q_t": 0.396056205034256, "grad_norm": 66.78388214111328, "learning_rate": 4.477540807448832e-08, "logits/chosen": -0.8380643129348755, "logits/rejected": -0.8483308553695679, "logps/chosen": -412.0726318359375, "logps/ref_chosen": -268.90081787109375, "logps/ref_rejected": -272.85809326171875, "logps/rejected": -500.748291015625, "loss": 4.2831, "margin_dpo/margin_mean": 84.71833801269531, "margin_dpo/margin_std": 120.70704650878906, "step": 395 }, { "epoch": 0.8293193717277487, "fcm_dpo/beta": 0.005744542460888624, "fcm_dpo/delta": 0.01056294422596693, "fcm_dpo/margin": 74.62256622314453, "fcm_dpo/q_t": 0.4045211672782898, "grad_norm": 91.99522399902344, "learning_rate": 4.373541737087263e-08, "logits/chosen": -0.8419132232666016, "logits/rejected": -0.8268328905105591, "logps/chosen": -434.677978515625, "logps/ref_chosen": -291.19830322265625, "logps/ref_rejected": -253.2803955078125, "logps/rejected": -471.3826599121094, "loss": 4.4236, "margin_dpo/margin_mean": 74.62256622314453, "margin_dpo/margin_std": 110.49628448486328, "step": 396 }, { "epoch": 0.831413612565445, "fcm_dpo/beta": 0.005709501449018717, "fcm_dpo/delta": -0.01539832167327404, "fcm_dpo/margin": 63.596290588378906, "fcm_dpo/q_t": 0.42058008909225464, "grad_norm": 97.39371490478516, "learning_rate": 4.270648801084295e-08, "logits/chosen": -0.8566058278083801, "logits/rejected": -0.8354380130767822, "logps/chosen": -454.8165588378906, "logps/ref_chosen": -309.8224182128906, "logps/ref_rejected": -291.9057922363281, "logps/rejected": -500.4962158203125, "loss": 4.7185, "margin_dpo/margin_mean": 63.596290588378906, "margin_dpo/margin_std": 120.06228637695312, "step": 397 }, { "epoch": 0.8335078534031414, "fcm_dpo/beta": 0.005609571933746338, "fcm_dpo/delta": -0.022501792758703232, "fcm_dpo/margin": 61.537353515625, "fcm_dpo/q_t": 0.4217448830604553, "grad_norm": 94.82504272460938, "learning_rate": 4.168867517275806e-08, "logits/chosen": -0.758474588394165, "logits/rejected": -0.7971174120903015, "logps/chosen": -458.55206298828125, "logps/ref_chosen": -297.8135070800781, "logps/ref_rejected": -270.5025634765625, "logps/rejected": -492.7784423828125, "loss": 4.8741, "margin_dpo/margin_mean": 61.53734588623047, "margin_dpo/margin_std": 134.36575317382812, "step": 398 }, { "epoch": 0.8356020942408376, "fcm_dpo/beta": 0.005676961503922939, "fcm_dpo/delta": 0.020742880180478096, "fcm_dpo/margin": 69.29886627197266, "fcm_dpo/q_t": 0.4137263298034668, "grad_norm": 95.31922912597656, "learning_rate": 4.0682033438831584e-08, "logits/chosen": -0.8655831813812256, "logits/rejected": -0.8285009860992432, "logps/chosen": -449.01116943359375, "logps/ref_chosen": -292.8467712402344, "logps/ref_rejected": -268.3638916015625, "logps/rejected": -493.8271484375, "loss": 4.5865, "margin_dpo/margin_mean": 69.29886627197266, "margin_dpo/margin_std": 120.91732025146484, "step": 399 }, { "epoch": 0.837696335078534, "fcm_dpo/beta": 0.005642024800181389, "fcm_dpo/delta": -0.0023907367140054703, "fcm_dpo/margin": 66.65235900878906, "fcm_dpo/q_t": 0.4158845841884613, "grad_norm": 62.70810317993164, "learning_rate": 3.968661679220467e-08, "logits/chosen": -0.9007169604301453, "logits/rejected": -0.9017296433448792, "logps/chosen": -413.6676330566406, "logps/ref_chosen": -263.6763916015625, "logps/ref_rejected": -258.67266845703125, "logps/rejected": -475.3162536621094, "loss": 4.6554, "margin_dpo/margin_mean": 66.65235900878906, "margin_dpo/margin_std": 119.08416748046875, "step": 400 }, { "epoch": 0.8397905759162304, "fcm_dpo/beta": 0.0058155423030257225, "fcm_dpo/delta": 0.041426703333854675, "fcm_dpo/margin": 74.96790313720703, "fcm_dpo/q_t": 0.4043424427509308, "grad_norm": 95.43059539794922, "learning_rate": 3.8702478614051345e-08, "logits/chosen": -0.8293420672416687, "logits/rejected": -0.8301993608474731, "logps/chosen": -465.02734375, "logps/ref_chosen": -318.2853088378906, "logps/ref_rejected": -293.75225830078125, "logps/rejected": -515.462158203125, "loss": 4.4232, "margin_dpo/margin_mean": 74.96790313720703, "margin_dpo/margin_std": 115.87003326416016, "step": 401 }, { "epoch": 0.8418848167539267, "fcm_dpo/beta": 0.005902654491364956, "fcm_dpo/delta": 0.020538516342639923, "fcm_dpo/margin": 74.3623046875, "fcm_dpo/q_t": 0.40442952513694763, "grad_norm": 93.4496841430664, "learning_rate": 3.772967168071517e-08, "logits/chosen": -0.8920119404792786, "logits/rejected": -0.8689513802528381, "logps/chosen": -452.31707763671875, "logps/ref_chosen": -309.4278564453125, "logps/ref_rejected": -282.0279846191406, "logps/rejected": -499.279541015625, "loss": 4.404, "margin_dpo/margin_mean": 74.3623046875, "margin_dpo/margin_std": 115.68778228759766, "step": 402 }, { "epoch": 0.8439790575916231, "fcm_dpo/beta": 0.005882907193154097, "fcm_dpo/delta": -0.04950367659330368, "fcm_dpo/margin": 99.05455017089844, "fcm_dpo/q_t": 0.3726005554199219, "grad_norm": 70.34110260009766, "learning_rate": 3.676824816087978e-08, "logits/chosen": -0.8733730912208557, "logits/rejected": -0.8539401292800903, "logps/chosen": -452.7962646484375, "logps/ref_chosen": -309.1670837402344, "logps/ref_rejected": -273.0928955078125, "logps/rejected": -515.776611328125, "loss": 3.9753, "margin_dpo/margin_mean": 99.05455017089844, "margin_dpo/margin_std": 114.16398620605469, "step": 403 }, { "epoch": 0.8460732984293193, "fcm_dpo/beta": 0.005841795355081558, "fcm_dpo/delta": 0.036075517535209656, "fcm_dpo/margin": 75.77478790283203, "fcm_dpo/q_t": 0.40231138467788696, "grad_norm": 118.02269744873047, "learning_rate": 3.581825961277074e-08, "logits/chosen": -0.8921858072280884, "logits/rejected": -0.8689145445823669, "logps/chosen": -454.9412536621094, "logps/ref_chosen": -297.5953674316406, "logps/ref_rejected": -257.24658203125, "logps/rejected": -490.36724853515625, "loss": 4.4651, "margin_dpo/margin_mean": 75.77479553222656, "margin_dpo/margin_std": 121.71118927001953, "step": 404 }, { "epoch": 0.8481675392670157, "fcm_dpo/beta": 0.00599704822525382, "fcm_dpo/delta": 0.032708872109651566, "fcm_dpo/margin": 80.49796295166016, "fcm_dpo/q_t": 0.39463940262794495, "grad_norm": 58.484832763671875, "learning_rate": 3.487975698139084e-08, "logits/chosen": -0.816624641418457, "logits/rejected": -0.8253002166748047, "logps/chosen": -406.7665100097656, "logps/ref_chosen": -257.96533203125, "logps/ref_rejected": -255.811279296875, "logps/rejected": -485.1104431152344, "loss": 4.2978, "margin_dpo/margin_mean": 80.49796295166016, "margin_dpo/margin_std": 116.48614501953125, "step": 405 }, { "epoch": 0.8502617801047121, "fcm_dpo/beta": 0.006106458138674498, "fcm_dpo/delta": 0.007878802716732025, "fcm_dpo/margin": 54.28831481933594, "fcm_dpo/q_t": 0.42538759112358093, "grad_norm": 88.17230224609375, "learning_rate": 3.3952790595787986e-08, "logits/chosen": -0.8533459901809692, "logits/rejected": -0.8310421705245972, "logps/chosen": -448.6619873046875, "logps/ref_chosen": -285.1810607910156, "logps/ref_rejected": -264.41351318359375, "logps/rejected": -482.18280029296875, "loss": 4.789, "margin_dpo/margin_mean": 54.28831481933594, "margin_dpo/margin_std": 110.01734924316406, "step": 406 }, { "epoch": 0.8523560209424084, "fcm_dpo/beta": 0.006114899180829525, "fcm_dpo/delta": 0.004650698509067297, "fcm_dpo/margin": 79.76924133300781, "fcm_dpo/q_t": 0.39609432220458984, "grad_norm": 83.25770568847656, "learning_rate": 3.303741016635614e-08, "logits/chosen": -0.8408970832824707, "logits/rejected": -0.8644695281982422, "logps/chosen": -431.0948486328125, "logps/ref_chosen": -265.23809814453125, "logps/ref_rejected": -219.0631561279297, "logps/rejected": -464.6891174316406, "loss": 4.3823, "margin_dpo/margin_mean": 79.76924133300781, "margin_dpo/margin_std": 124.53511810302734, "step": 407 }, { "epoch": 0.8544502617801047, "fcm_dpo/beta": 0.006033940240740776, "fcm_dpo/delta": -0.04740475118160248, "fcm_dpo/margin": 78.37755584716797, "fcm_dpo/q_t": 0.39640355110168457, "grad_norm": 73.92862701416016, "learning_rate": 3.2133664782169944e-08, "logits/chosen": -0.8809211254119873, "logits/rejected": -0.8750734329223633, "logps/chosen": -442.30926513671875, "logps/ref_chosen": -296.9726257324219, "logps/ref_rejected": -295.4786376953125, "logps/rejected": -519.1928100585938, "loss": 4.3317, "margin_dpo/margin_mean": 78.37754821777344, "margin_dpo/margin_std": 111.70768737792969, "step": 408 }, { "epoch": 0.856544502617801, "fcm_dpo/beta": 0.005910936277359724, "fcm_dpo/delta": 0.0008720820769667625, "fcm_dpo/margin": 76.07402038574219, "fcm_dpo/q_t": 0.40171951055526733, "grad_norm": 87.3036880493164, "learning_rate": 3.12416029083514e-08, "logits/chosen": -0.8479756712913513, "logits/rejected": -0.8337869048118591, "logps/chosen": -443.5372619628906, "logps/ref_chosen": -287.37933349609375, "logps/ref_rejected": -275.80291748046875, "logps/rejected": -508.03485107421875, "loss": 4.5197, "margin_dpo/margin_mean": 76.07402038574219, "margin_dpo/margin_std": 132.66152954101562, "step": 409 }, { "epoch": 0.8586387434554974, "fcm_dpo/beta": 0.00603690929710865, "fcm_dpo/delta": 0.05478723347187042, "fcm_dpo/margin": 70.18323516845703, "fcm_dpo/q_t": 0.4090788662433624, "grad_norm": 101.47988891601562, "learning_rate": 3.036127238347164e-08, "logits/chosen": -0.8647603392601013, "logits/rejected": -0.8730304837226868, "logps/chosen": -432.797607421875, "logps/ref_chosen": -281.7801818847656, "logps/ref_rejected": -266.7550354003906, "logps/rejected": -487.9557189941406, "loss": 4.5527, "margin_dpo/margin_mean": 70.18323516845703, "margin_dpo/margin_std": 122.88518524169922, "step": 410 }, { "epoch": 0.8607329842931937, "fcm_dpo/beta": 0.006130387540906668, "fcm_dpo/delta": -0.022047296166419983, "fcm_dpo/margin": 84.9005355834961, "fcm_dpo/q_t": 0.38678428530693054, "grad_norm": 81.07767486572266, "learning_rate": 2.9492720416985e-08, "logits/chosen": -0.8631331920623779, "logits/rejected": -0.821528434753418, "logps/chosen": -429.8334045410156, "logps/ref_chosen": -281.5872497558594, "logps/ref_rejected": -254.78916931152344, "logps/rejected": -487.9358825683594, "loss": 4.1487, "margin_dpo/margin_mean": 84.9005355834961, "margin_dpo/margin_std": 108.43609619140625, "step": 411 }, { "epoch": 0.86282722513089, "fcm_dpo/beta": 0.005866607651114464, "fcm_dpo/delta": -0.03245619311928749, "fcm_dpo/margin": 67.33855438232422, "fcm_dpo/q_t": 0.41442614793777466, "grad_norm": 112.37825775146484, "learning_rate": 2.863599358669755e-08, "logits/chosen": -0.8319679498672485, "logits/rejected": -0.8427531719207764, "logps/chosen": -442.21954345703125, "logps/ref_chosen": -276.796142578125, "logps/ref_rejected": -274.1370849609375, "logps/rejected": -506.8990783691406, "loss": 4.5832, "margin_dpo/margin_mean": 67.33855438232422, "margin_dpo/margin_std": 114.56013488769531, "step": 412 }, { "epoch": 0.8649214659685864, "fcm_dpo/beta": 0.005889165215194225, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 62.966705322265625, "fcm_dpo/q_t": 0.41903725266456604, "grad_norm": 77.6297836303711, "learning_rate": 2.7791137836269158e-08, "logits/chosen": -0.8528549671173096, "logits/rejected": -0.8511483669281006, "logps/chosen": -427.43853759765625, "logps/ref_chosen": -271.2745666503906, "logps/ref_rejected": -270.16912841796875, "logps/rejected": -489.2998046875, "loss": 4.6339, "margin_dpo/margin_mean": 62.96670913696289, "margin_dpo/margin_std": 115.2607421875, "step": 413 }, { "epoch": 0.8670157068062827, "fcm_dpo/beta": 0.005872922483831644, "fcm_dpo/delta": -0.018332332372665405, "fcm_dpo/margin": 81.90978240966797, "fcm_dpo/q_t": 0.39579349756240845, "grad_norm": 96.03504180908203, "learning_rate": 2.6958198472749717e-08, "logits/chosen": -0.8964706659317017, "logits/rejected": -0.8990840911865234, "logps/chosen": -453.42193603515625, "logps/ref_chosen": -297.11505126953125, "logps/ref_rejected": -271.7034606933594, "logps/rejected": -509.920166015625, "loss": 4.3142, "margin_dpo/margin_mean": 81.90979766845703, "margin_dpo/margin_std": 116.18328094482422, "step": 414 }, { "epoch": 0.8691099476439791, "fcm_dpo/beta": 0.005942903459072113, "fcm_dpo/delta": 0.03745156526565552, "fcm_dpo/margin": 75.0562744140625, "fcm_dpo/q_t": 0.39994585514068604, "grad_norm": 75.44696044921875, "learning_rate": 2.613722016414943e-08, "logits/chosen": -0.8856995701789856, "logits/rejected": -0.8719730377197266, "logps/chosen": -445.2117919921875, "logps/ref_chosen": -297.6926574707031, "logps/ref_rejected": -279.0503234863281, "logps/rejected": -501.625732421875, "loss": 4.3258, "margin_dpo/margin_mean": 75.0562744140625, "margin_dpo/margin_std": 104.50729370117188, "step": 415 }, { "epoch": 0.8712041884816754, "fcm_dpo/beta": 0.005803161766380072, "fcm_dpo/delta": -0.027818644419312477, "fcm_dpo/margin": 88.9339370727539, "fcm_dpo/q_t": 0.38566797971725464, "grad_norm": 57.11982345581055, "learning_rate": 2.5328246937043525e-08, "logits/chosen": -0.8839541077613831, "logits/rejected": -0.8885830640792847, "logps/chosen": -454.14947509765625, "logps/ref_chosen": -311.8255615234375, "logps/ref_rejected": -268.6170654296875, "logps/rejected": -499.87493896484375, "loss": 4.2069, "margin_dpo/margin_mean": 88.9339370727539, "margin_dpo/margin_std": 118.29706573486328, "step": 416 }, { "epoch": 0.8732984293193717, "fcm_dpo/beta": 0.00576009601354599, "fcm_dpo/delta": -0.03987088054418564, "fcm_dpo/margin": 76.869140625, "fcm_dpo/q_t": 0.4039611518383026, "grad_norm": 79.72920227050781, "learning_rate": 2.4531322174210973e-08, "logits/chosen": -0.8273904919624329, "logits/rejected": -0.8230270743370056, "logps/chosen": -464.0906066894531, "logps/ref_chosen": -310.43682861328125, "logps/ref_rejected": -277.15283203125, "logps/rejected": -507.67578125, "loss": 4.4403, "margin_dpo/margin_mean": 76.869140625, "margin_dpo/margin_std": 118.56803131103516, "step": 417 }, { "epoch": 0.875392670157068, "fcm_dpo/beta": 0.005557649303227663, "fcm_dpo/delta": -0.03773919492959976, "fcm_dpo/margin": 73.4759750366211, "fcm_dpo/q_t": 0.4096967577934265, "grad_norm": 82.97373962402344, "learning_rate": 2.3746488612308295e-08, "logits/chosen": -0.8288162350654602, "logits/rejected": -0.8059218525886536, "logps/chosen": -443.56036376953125, "logps/ref_chosen": -278.49591064453125, "logps/ref_rejected": -276.56671142578125, "logps/rejected": -515.107177734375, "loss": 4.5563, "margin_dpo/margin_mean": 73.4759750366211, "margin_dpo/margin_std": 122.90873718261719, "step": 418 }, { "epoch": 0.8774869109947644, "fcm_dpo/beta": 0.005459555424749851, "fcm_dpo/delta": -0.02270214632153511, "fcm_dpo/margin": 82.70216369628906, "fcm_dpo/q_t": 0.39849433302879333, "grad_norm": 79.49519348144531, "learning_rate": 2.297378833957761e-08, "logits/chosen": -0.8849822282791138, "logits/rejected": -0.8598195910453796, "logps/chosen": -464.46832275390625, "logps/ref_chosen": -298.9002380371094, "logps/ref_rejected": -246.1540985107422, "logps/rejected": -494.4243469238281, "loss": 4.4405, "margin_dpo/margin_mean": 82.70216369628906, "margin_dpo/margin_std": 130.42945861816406, "step": 419 }, { "epoch": 0.8795811518324608, "fcm_dpo/beta": 0.005352628417313099, "fcm_dpo/delta": -0.010891912505030632, "fcm_dpo/margin": 91.52397918701172, "fcm_dpo/q_t": 0.39347517490386963, "grad_norm": 85.73942565917969, "learning_rate": 2.2213262793589482e-08, "logits/chosen": -0.8193755745887756, "logits/rejected": -0.7928704619407654, "logps/chosen": -428.12677001953125, "logps/ref_chosen": -264.5608825683594, "logps/ref_rejected": -245.67031860351562, "logps/rejected": -500.7602233886719, "loss": 4.3604, "margin_dpo/margin_mean": 91.52397918701172, "margin_dpo/margin_std": 141.74795532226562, "step": 420 }, { "epoch": 0.881675392670157, "fcm_dpo/beta": 0.005432287231087685, "fcm_dpo/delta": 0.027727685868740082, "fcm_dpo/margin": 79.07569885253906, "fcm_dpo/q_t": 0.40264689922332764, "grad_norm": 86.56868743896484, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -0.9046144485473633, "logits/rejected": -0.8843198418617249, "logps/chosen": -444.3966064453125, "logps/ref_chosen": -297.70501708984375, "logps/ref_rejected": -243.74771118164062, "logps/rejected": -469.5150146484375, "loss": 4.3809, "margin_dpo/margin_mean": 79.07569122314453, "margin_dpo/margin_std": 113.20210266113281, "step": 421 }, { "epoch": 0.8837696335078534, "fcm_dpo/beta": 0.005514299962669611, "fcm_dpo/delta": 0.024063242599368095, "fcm_dpo/margin": 79.2686767578125, "fcm_dpo/q_t": 0.40496283769607544, "grad_norm": 80.32569122314453, "learning_rate": 2.07288983654679e-08, "logits/chosen": -0.7527928352355957, "logits/rejected": -0.7986171245574951, "logps/chosen": -443.7966613769531, "logps/ref_chosen": -288.3587646484375, "logps/ref_rejected": -256.4377746582031, "logps/rejected": -491.14434814453125, "loss": 4.4651, "margin_dpo/margin_mean": 79.2686767578125, "margin_dpo/margin_std": 129.04014587402344, "step": 422 }, { "epoch": 0.8858638743455497, "fcm_dpo/beta": 0.0055565787479281425, "fcm_dpo/delta": -0.0007310956716537476, "fcm_dpo/margin": 89.85076904296875, "fcm_dpo/q_t": 0.3894519507884979, "grad_norm": 83.02074432373047, "learning_rate": 2.0005139085293942e-08, "logits/chosen": -0.8701252937316895, "logits/rejected": -0.8482787013053894, "logps/chosen": -452.765380859375, "logps/ref_chosen": -296.00701904296875, "logps/ref_rejected": -261.3480529785156, "logps/rejected": -507.9571838378906, "loss": 4.2225, "margin_dpo/margin_mean": 89.85076904296875, "margin_dpo/margin_std": 120.07376861572266, "step": 423 }, { "epoch": 0.8879581151832461, "fcm_dpo/beta": 0.0054782391525805, "fcm_dpo/delta": -0.01255854032933712, "fcm_dpo/margin": 79.7599105834961, "fcm_dpo/q_t": 0.401252418756485, "grad_norm": 85.05402374267578, "learning_rate": 1.9293713731512673e-08, "logits/chosen": -0.8636439442634583, "logits/rejected": -0.8568890690803528, "logps/chosen": -457.044189453125, "logps/ref_chosen": -309.421875, "logps/ref_rejected": -249.14886474609375, "logps/rejected": -476.53106689453125, "loss": 4.3359, "margin_dpo/margin_mean": 79.75990295410156, "margin_dpo/margin_std": 107.50566101074219, "step": 424 }, { "epoch": 0.8900523560209425, "fcm_dpo/beta": 0.005506892688572407, "fcm_dpo/delta": -0.0035199569538235664, "fcm_dpo/margin": 69.21192169189453, "fcm_dpo/q_t": 0.41614991426467896, "grad_norm": 69.28353118896484, "learning_rate": 1.8594660455706763e-08, "logits/chosen": -0.8524129390716553, "logits/rejected": -0.8573122024536133, "logps/chosen": -438.5583801269531, "logps/ref_chosen": -280.50909423828125, "logps/ref_rejected": -276.8252258300781, "logps/rejected": -504.08642578125, "loss": 4.6056, "margin_dpo/margin_mean": 69.21192169189453, "margin_dpo/margin_std": 119.23878479003906, "step": 425 }, { "epoch": 0.8921465968586387, "fcm_dpo/beta": 0.0055983117781579494, "fcm_dpo/delta": 0.03057074546813965, "fcm_dpo/margin": 78.6160888671875, "fcm_dpo/q_t": 0.4026082456111908, "grad_norm": 109.25310516357422, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -0.8885151147842407, "logits/rejected": -0.8731377124786377, "logps/chosen": -450.5845947265625, "logps/ref_chosen": -292.78521728515625, "logps/ref_rejected": -255.62698364257812, "logps/rejected": -492.04241943359375, "loss": 4.3977, "margin_dpo/margin_mean": 78.6160888671875, "margin_dpo/margin_std": 116.64258575439453, "step": 426 }, { "epoch": 0.8942408376963351, "fcm_dpo/beta": 0.005566183011978865, "fcm_dpo/delta": -0.07854758203029633, "fcm_dpo/margin": 106.50798034667969, "fcm_dpo/q_t": 0.3709147274494171, "grad_norm": 92.78337860107422, "learning_rate": 1.7233819424956247e-08, "logits/chosen": -0.857439398765564, "logits/rejected": -0.8251030445098877, "logps/chosen": -443.70599365234375, "logps/ref_chosen": -288.7687072753906, "logps/ref_rejected": -268.4986572265625, "logps/rejected": -529.9439697265625, "loss": 4.0525, "margin_dpo/margin_mean": 106.50798797607422, "margin_dpo/margin_std": 131.25039672851562, "step": 427 }, { "epoch": 0.8963350785340314, "fcm_dpo/beta": 0.005344281904399395, "fcm_dpo/delta": 0.004657023120671511, "fcm_dpo/margin": 94.15774536132812, "fcm_dpo/q_t": 0.38818326592445374, "grad_norm": 79.40316009521484, "learning_rate": 1.6572104647786245e-08, "logits/chosen": -0.807416558265686, "logits/rejected": -0.8362680673599243, "logps/chosen": -468.56304931640625, "logps/ref_chosen": -295.5209655761719, "logps/ref_rejected": -275.71026611328125, "logps/rejected": -542.91015625, "loss": 4.2235, "margin_dpo/margin_mean": 94.15774536132812, "margin_dpo/margin_std": 125.85404205322266, "step": 428 }, { "epoch": 0.8984293193717278, "fcm_dpo/beta": 0.005322734825313091, "fcm_dpo/delta": 0.021079566329717636, "fcm_dpo/margin": 85.7845458984375, "fcm_dpo/q_t": 0.39704862236976624, "grad_norm": 104.11244201660156, "learning_rate": 1.5922907900227017e-08, "logits/chosen": -0.8256397247314453, "logits/rejected": -0.8333263993263245, "logps/chosen": -432.5317077636719, "logps/ref_chosen": -274.392333984375, "logps/ref_rejected": -258.574462890625, "logps/rejected": -502.49835205078125, "loss": 4.4423, "margin_dpo/margin_mean": 85.7845458984375, "margin_dpo/margin_std": 134.6868896484375, "step": 429 }, { "epoch": 0.900523560209424, "fcm_dpo/beta": 0.0054636141285300255, "fcm_dpo/delta": 0.007338759955018759, "fcm_dpo/margin": 73.55387115478516, "fcm_dpo/q_t": 0.41170477867126465, "grad_norm": 77.30048370361328, "learning_rate": 1.5286263996730026e-08, "logits/chosen": -0.9002894163131714, "logits/rejected": -0.8716924786567688, "logps/chosen": -446.356689453125, "logps/ref_chosen": -288.7391357421875, "logps/ref_rejected": -268.6106262207031, "logps/rejected": -499.7821044921875, "loss": 4.534, "margin_dpo/margin_mean": 73.55387115478516, "margin_dpo/margin_std": 121.59858703613281, "step": 430 }, { "epoch": 0.9026178010471204, "fcm_dpo/beta": 0.005443257745355368, "fcm_dpo/delta": -0.012642772868275642, "fcm_dpo/margin": 62.72542190551758, "fcm_dpo/q_t": 0.4242577850818634, "grad_norm": 79.91255187988281, "learning_rate": 1.4662207078575684e-08, "logits/chosen": -0.8643919229507446, "logits/rejected": -0.8308559060096741, "logps/chosen": -431.7356262207031, "logps/ref_chosen": -273.8291931152344, "logps/ref_rejected": -269.02239990234375, "logps/rejected": -489.6541748046875, "loss": 4.6841, "margin_dpo/margin_mean": 62.72542190551758, "margin_dpo/margin_std": 112.40259552001953, "step": 431 }, { "epoch": 0.9047120418848168, "fcm_dpo/beta": 0.00547941867262125, "fcm_dpo/delta": 0.029989372938871384, "fcm_dpo/margin": 82.27677917480469, "fcm_dpo/q_t": 0.40229594707489014, "grad_norm": 54.76456069946289, "learning_rate": 1.40507706120426e-08, "logits/chosen": -0.8918091654777527, "logits/rejected": -0.8755742311477661, "logps/chosen": -438.60479736328125, "logps/ref_chosen": -291.42010498046875, "logps/ref_rejected": -255.48202514648438, "logps/rejected": -484.9435119628906, "loss": 4.3868, "margin_dpo/margin_mean": 82.27677917480469, "margin_dpo/margin_std": 124.5217514038086, "step": 432 }, { "epoch": 0.9068062827225131, "fcm_dpo/beta": 0.005607725586742163, "fcm_dpo/delta": 0.02624966762959957, "fcm_dpo/margin": 74.57537841796875, "fcm_dpo/q_t": 0.40625956654548645, "grad_norm": 72.38121032714844, "learning_rate": 1.345198738661285e-08, "logits/chosen": -0.8581191301345825, "logits/rejected": -0.8533951044082642, "logps/chosen": -411.82525634765625, "logps/ref_chosen": -246.2268829345703, "logps/ref_rejected": -253.65924072265625, "logps/rejected": -493.8329772949219, "loss": 4.4275, "margin_dpo/margin_mean": 74.57537841796875, "margin_dpo/margin_std": 113.67277526855469, "step": 433 }, { "epoch": 0.9089005235602095, "fcm_dpo/beta": 0.005738706793636084, "fcm_dpo/delta": 0.04150499403476715, "fcm_dpo/margin": 73.76260375976562, "fcm_dpo/q_t": 0.40676653385162354, "grad_norm": 81.86857604980469, "learning_rate": 1.2865889513213628e-08, "logits/chosen": -0.8482558727264404, "logits/rejected": -0.8541163206100464, "logps/chosen": -465.7991027832031, "logps/ref_chosen": -295.4618225097656, "logps/ref_rejected": -256.2254333496094, "logps/rejected": -500.3253479003906, "loss": 4.4495, "margin_dpo/margin_mean": 73.76260375976562, "margin_dpo/margin_std": 114.90216064453125, "step": 434 }, { "epoch": 0.9109947643979057, "fcm_dpo/beta": 0.005786753259599209, "fcm_dpo/delta": -0.025829218327999115, "fcm_dpo/margin": 83.76541900634766, "fcm_dpo/q_t": 0.39663389325141907, "grad_norm": 120.20237731933594, "learning_rate": 1.2292508422495157e-08, "logits/chosen": -0.8453208208084106, "logits/rejected": -0.8324633240699768, "logps/chosen": -415.5836486816406, "logps/ref_chosen": -260.7384033203125, "logps/ref_rejected": -248.5688018798828, "logps/rejected": -487.179443359375, "loss": 4.2876, "margin_dpo/margin_mean": 83.76541900634766, "margin_dpo/margin_std": 117.57093811035156, "step": 435 }, { "epoch": 0.9130890052356021, "fcm_dpo/beta": 0.005721217952668667, "fcm_dpo/delta": 0.00584397790953517, "fcm_dpo/margin": 65.6273193359375, "fcm_dpo/q_t": 0.4201821982860565, "grad_norm": 95.52420806884766, "learning_rate": 1.1731874863145142e-08, "logits/chosen": -0.8257200717926025, "logits/rejected": -0.822697103023529, "logps/chosen": -488.3257141113281, "logps/ref_chosen": -319.3224792480469, "logps/ref_rejected": -299.30322265625, "logps/rejected": -533.9337768554688, "loss": 4.6615, "margin_dpo/margin_mean": 65.62731170654297, "margin_dpo/margin_std": 127.28849029541016, "step": 436 }, { "epoch": 0.9151832460732985, "fcm_dpo/beta": 0.005732518620789051, "fcm_dpo/delta": -0.007303288206458092, "fcm_dpo/margin": 88.15834045410156, "fcm_dpo/q_t": 0.3912045359611511, "grad_norm": 96.03433990478516, "learning_rate": 1.118401890024001e-08, "logits/chosen": -0.8685740232467651, "logits/rejected": -0.8529913425445557, "logps/chosen": -437.06005859375, "logps/ref_chosen": -279.1155700683594, "logps/ref_rejected": -272.904052734375, "logps/rejected": -519.0068969726562, "loss": 4.2895, "margin_dpo/margin_mean": 88.15834045410156, "margin_dpo/margin_std": 126.89192199707031, "step": 437 }, { "epoch": 0.9172774869109948, "fcm_dpo/beta": 0.005765823647379875, "fcm_dpo/delta": 0.013268672861158848, "fcm_dpo/margin": 45.000396728515625, "fcm_dpo/q_t": 0.4435799717903137, "grad_norm": 86.30032348632812, "learning_rate": 1.06489699136324e-08, "logits/chosen": -0.8477824330329895, "logits/rejected": -0.8642206788063049, "logps/chosen": -424.9432373046875, "logps/ref_chosen": -259.53076171875, "logps/ref_rejected": -241.20753479003906, "logps/rejected": -451.620361328125, "loss": 5.072, "margin_dpo/margin_mean": 45.00039291381836, "margin_dpo/margin_std": 120.71588897705078, "step": 438 }, { "epoch": 0.9193717277486911, "fcm_dpo/beta": 0.005863718222826719, "fcm_dpo/delta": 0.02462560310959816, "fcm_dpo/margin": 81.05281066894531, "fcm_dpo/q_t": 0.3949528932571411, "grad_norm": 99.85994720458984, "learning_rate": 1.0126756596375685e-08, "logits/chosen": -0.8318859934806824, "logits/rejected": -0.8341456055641174, "logps/chosen": -419.4014587402344, "logps/ref_chosen": -257.1243896484375, "logps/ref_rejected": -243.20416259765625, "logps/rejected": -486.5340270996094, "loss": 4.293, "margin_dpo/margin_mean": 81.05281066894531, "margin_dpo/margin_std": 114.34336853027344, "step": 439 }, { "epoch": 0.9214659685863874, "fcm_dpo/beta": 0.005901531782001257, "fcm_dpo/delta": 0.015944896265864372, "fcm_dpo/margin": 57.71433639526367, "fcm_dpo/q_t": 0.4226870834827423, "grad_norm": 78.55369567871094, "learning_rate": 9.617406953185136e-09, "logits/chosen": -0.8871990442276001, "logits/rejected": -0.8788408041000366, "logps/chosen": -482.9383239746094, "logps/ref_chosen": -307.680419921875, "logps/ref_rejected": -264.5030212402344, "logps/rejected": -497.47528076171875, "loss": 4.7105, "margin_dpo/margin_mean": 57.71433639526367, "margin_dpo/margin_std": 106.74054718017578, "step": 440 }, { "epoch": 0.9235602094240838, "fcm_dpo/beta": 0.005923756398260593, "fcm_dpo/delta": -0.023813921958208084, "fcm_dpo/margin": 84.93556213378906, "fcm_dpo/q_t": 0.38803669810295105, "grad_norm": 86.6318588256836, "learning_rate": 9.12094829893642e-09, "logits/chosen": -0.8496291637420654, "logits/rejected": -0.8375406265258789, "logps/chosen": -470.83203125, "logps/ref_chosen": -309.9819641113281, "logps/ref_rejected": -297.4968566894531, "logps/rejected": -543.282470703125, "loss": 4.1966, "margin_dpo/margin_mean": 84.93556213378906, "margin_dpo/margin_std": 108.48258972167969, "step": 441 }, { "epoch": 0.9256544502617801, "fcm_dpo/beta": 0.005848567001521587, "fcm_dpo/delta": 0.002810728969052434, "fcm_dpo/margin": 65.81173706054688, "fcm_dpo/q_t": 0.4157796800136566, "grad_norm": 85.53507232666016, "learning_rate": 8.637407257200496e-09, "logits/chosen": -0.9164412021636963, "logits/rejected": -0.8720095157623291, "logps/chosen": -456.07440185546875, "logps/ref_chosen": -278.9791564941406, "logps/ref_rejected": -242.87310791015625, "logps/rejected": -485.7801513671875, "loss": 4.6629, "margin_dpo/margin_mean": 65.81172943115234, "margin_dpo/margin_std": 123.5169677734375, "step": 442 }, { "epoch": 0.9277486910994764, "fcm_dpo/beta": 0.005795356351882219, "fcm_dpo/delta": -0.04416951909661293, "fcm_dpo/margin": 73.87689208984375, "fcm_dpo/q_t": 0.40389981865882874, "grad_norm": 84.2241439819336, "learning_rate": 8.166809758815895e-09, "logits/chosen": -0.806608259677887, "logits/rejected": -0.8269810676574707, "logps/chosen": -433.94921875, "logps/ref_chosen": -273.5590515136719, "logps/ref_rejected": -264.0199279785156, "logps/rejected": -498.2870178222656, "loss": 4.4568, "margin_dpo/margin_mean": 73.87689208984375, "margin_dpo/margin_std": 113.74835205078125, "step": 443 }, { "epoch": 0.9298429319371728, "fcm_dpo/beta": 0.005656501278281212, "fcm_dpo/delta": 0.010248812846839428, "fcm_dpo/margin": 81.75801849365234, "fcm_dpo/q_t": 0.4004564583301544, "grad_norm": 87.11282348632812, "learning_rate": 7.709181040498253e-09, "logits/chosen": -0.8332200646400452, "logits/rejected": -0.8207956552505493, "logps/chosen": -460.2959899902344, "logps/ref_chosen": -298.1441955566406, "logps/ref_rejected": -268.0572814941406, "logps/rejected": -511.96710205078125, "loss": 4.4441, "margin_dpo/margin_mean": 81.75801849365234, "margin_dpo/margin_std": 133.82241821289062, "step": 444 }, { "epoch": 0.9319371727748691, "fcm_dpo/beta": 0.005604690872132778, "fcm_dpo/delta": -0.05251846835017204, "fcm_dpo/margin": 70.86494445800781, "fcm_dpo/q_t": 0.41341015696525574, "grad_norm": 77.97962188720703, "learning_rate": 7.2645456434869965e-09, "logits/chosen": -0.9123114943504333, "logits/rejected": -0.9230950474739075, "logps/chosen": -417.5129699707031, "logps/ref_chosen": -254.54067993164062, "logps/ref_rejected": -264.2445983886719, "logps/rejected": -498.081787109375, "loss": 4.6085, "margin_dpo/margin_mean": 70.86495208740234, "margin_dpo/margin_std": 120.35411071777344, "step": 445 }, { "epoch": 0.9340314136125655, "fcm_dpo/beta": 0.005446064751595259, "fcm_dpo/delta": 0.013901110738515854, "fcm_dpo/margin": 83.37535858154297, "fcm_dpo/q_t": 0.39877498149871826, "grad_norm": 58.67851257324219, "learning_rate": 6.832927412229017e-09, "logits/chosen": -0.8186119794845581, "logits/rejected": -0.8207546472549438, "logps/chosen": -461.3026428222656, "logps/ref_chosen": -306.72247314453125, "logps/ref_rejected": -266.3735656738281, "logps/rejected": -504.3291320800781, "loss": 4.3583, "margin_dpo/margin_mean": 83.37535858154297, "margin_dpo/margin_std": 121.05807495117188, "step": 446 }, { "epoch": 0.9361256544502617, "fcm_dpo/beta": 0.00545801455155015, "fcm_dpo/delta": 0.007384308613836765, "fcm_dpo/margin": 91.6766357421875, "fcm_dpo/q_t": 0.3885115385055542, "grad_norm": 80.14990997314453, "learning_rate": 6.414349493100129e-09, "logits/chosen": -0.8291323781013489, "logits/rejected": -0.8268994688987732, "logps/chosen": -409.84698486328125, "logps/ref_chosen": -260.51727294921875, "logps/ref_rejected": -236.47061157226562, "logps/rejected": -477.4768981933594, "loss": 4.153, "margin_dpo/margin_mean": 91.6766357421875, "margin_dpo/margin_std": 111.279052734375, "step": 447 }, { "epoch": 0.9382198952879581, "fcm_dpo/beta": 0.005555163137614727, "fcm_dpo/delta": 0.01007094793021679, "fcm_dpo/margin": 82.22817993164062, "fcm_dpo/q_t": 0.3993835747241974, "grad_norm": 96.70314025878906, "learning_rate": 6.0088343331638756e-09, "logits/chosen": -0.8319501280784607, "logits/rejected": -0.8259201049804688, "logps/chosen": -435.1070861816406, "logps/ref_chosen": -268.78704833984375, "logps/ref_rejected": -262.1703796386719, "logps/rejected": -510.7186279296875, "loss": 4.3331, "margin_dpo/margin_mean": 82.22817993164062, "margin_dpo/margin_std": 115.585205078125, "step": 448 }, { "epoch": 0.9403141361256544, "fcm_dpo/beta": 0.005662827752530575, "fcm_dpo/delta": 0.016118278726935387, "fcm_dpo/margin": 84.56497955322266, "fcm_dpo/q_t": 0.39151531457901, "grad_norm": 94.81627655029297, "learning_rate": 5.616403678967624e-09, "logits/chosen": -0.9148420691490173, "logits/rejected": -0.8991859555244446, "logps/chosen": -485.2103271484375, "logps/ref_chosen": -331.58074951171875, "logps/ref_rejected": -240.3651123046875, "logps/rejected": -478.5596923828125, "loss": 4.3273, "margin_dpo/margin_mean": 84.56498718261719, "margin_dpo/margin_std": 122.7528076171875, "step": 449 }, { "epoch": 0.9424083769633508, "fcm_dpo/beta": 0.005679248366504908, "fcm_dpo/delta": 0.003446461632847786, "fcm_dpo/margin": 75.41486358642578, "fcm_dpo/q_t": 0.40379881858825684, "grad_norm": 96.63995361328125, "learning_rate": 5.2370785753763356e-09, "logits/chosen": -0.813703179359436, "logits/rejected": -0.8123511672019958, "logps/chosen": -454.12786865234375, "logps/ref_chosen": -284.26544189453125, "logps/ref_rejected": -250.5401611328125, "logps/rejected": -495.81744384765625, "loss": 4.3804, "margin_dpo/margin_mean": 75.41486358642578, "margin_dpo/margin_std": 105.50797271728516, "step": 450 }, { "epoch": 0.9445026178010472, "fcm_dpo/beta": 0.0055923121981322765, "fcm_dpo/delta": -0.017466533929109573, "fcm_dpo/margin": 70.07624053955078, "fcm_dpo/q_t": 0.4131358861923218, "grad_norm": 89.1207046508789, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -0.8348339200019836, "logits/rejected": -0.8052613735198975, "logps/chosen": -477.8414306640625, "logps/ref_chosen": -302.3209228515625, "logps/ref_rejected": -254.09747314453125, "logps/rejected": -499.6942138671875, "loss": 4.5781, "margin_dpo/margin_mean": 70.07624816894531, "margin_dpo/margin_std": 117.26962280273438, "step": 451 }, { "epoch": 0.9465968586387434, "fcm_dpo/beta": 0.005450280383229256, "fcm_dpo/delta": -0.03073035180568695, "fcm_dpo/margin": 87.17440795898438, "fcm_dpo/q_t": 0.397165447473526, "grad_norm": 85.69123840332031, "learning_rate": 4.517825684323323e-09, "logits/chosen": -0.8877136707305908, "logits/rejected": -0.8643620014190674, "logps/chosen": -459.5303039550781, "logps/ref_chosen": -299.39215087890625, "logps/ref_rejected": -284.3475036621094, "logps/rejected": -531.6600952148438, "loss": 4.3552, "margin_dpo/margin_mean": 87.17440795898438, "margin_dpo/margin_std": 129.93690490722656, "step": 452 }, { "epoch": 0.9486910994764398, "fcm_dpo/beta": 0.005511339753866196, "fcm_dpo/delta": 0.02019287645816803, "fcm_dpo/margin": 80.06608581542969, "fcm_dpo/q_t": 0.4024481177330017, "grad_norm": 73.6377182006836, "learning_rate": 4.1779364682113794e-09, "logits/chosen": -0.8337778449058533, "logits/rejected": -0.8293156027793884, "logps/chosen": -494.59014892578125, "logps/ref_chosen": -324.6517028808594, "logps/ref_rejected": -304.1527099609375, "logps/rejected": -554.1572875976562, "loss": 4.3921, "margin_dpo/margin_mean": 80.06608581542969, "margin_dpo/margin_std": 122.65001678466797, "step": 453 }, { "epoch": 0.9507853403141361, "fcm_dpo/beta": 0.005554089788347483, "fcm_dpo/delta": -0.007839905098080635, "fcm_dpo/margin": 85.82544708251953, "fcm_dpo/q_t": 0.3937579393386841, "grad_norm": 79.51781463623047, "learning_rate": 3.851229943335393e-09, "logits/chosen": -0.8730612993240356, "logits/rejected": -0.8847228288650513, "logps/chosen": -465.4644775390625, "logps/ref_chosen": -299.6117248535156, "logps/ref_rejected": -303.74224853515625, "logps/rejected": -555.42041015625, "loss": 4.3296, "margin_dpo/margin_mean": 85.82544708251953, "margin_dpo/margin_std": 124.97549438476562, "step": 454 }, { "epoch": 0.9528795811518325, "fcm_dpo/beta": 0.005550594534724951, "fcm_dpo/delta": 0.004189205355942249, "fcm_dpo/margin": 62.6799430847168, "fcm_dpo/q_t": 0.4252288043498993, "grad_norm": 80.37173461914062, "learning_rate": 3.5377236299748147e-09, "logits/chosen": -0.8488650321960449, "logits/rejected": -0.8594434261322021, "logps/chosen": -433.9133605957031, "logps/ref_chosen": -273.6116943359375, "logps/ref_rejected": -274.4293518066406, "logps/rejected": -497.4110412597656, "loss": 4.7381, "margin_dpo/margin_mean": 62.67994689941406, "margin_dpo/margin_std": 124.70183563232422, "step": 455 }, { "epoch": 0.9549738219895288, "fcm_dpo/beta": 0.005470286589115858, "fcm_dpo/delta": -0.042232729494571686, "fcm_dpo/margin": 87.12472534179688, "fcm_dpo/q_t": 0.40261968970298767, "grad_norm": 79.4292221069336, "learning_rate": 3.2374343405217884e-09, "logits/chosen": -0.7552270889282227, "logits/rejected": -0.7663687467575073, "logps/chosen": -502.1330261230469, "logps/ref_chosen": -322.17193603515625, "logps/ref_rejected": -294.54461669921875, "logps/rejected": -561.6304321289062, "loss": 4.4833, "margin_dpo/margin_mean": 87.12472534179688, "margin_dpo/margin_std": 152.3446044921875, "step": 456 }, { "epoch": 0.9570680628272251, "fcm_dpo/beta": 0.005494946148246527, "fcm_dpo/delta": 0.039688169956207275, "fcm_dpo/margin": 90.5218505859375, "fcm_dpo/q_t": 0.38801220059394836, "grad_norm": 77.78425598144531, "learning_rate": 2.9503781785795713e-09, "logits/chosen": -0.831157386302948, "logits/rejected": -0.8342105150222778, "logps/chosen": -477.7151794433594, "logps/ref_chosen": -307.7962341308594, "logps/ref_rejected": -274.5501403808594, "logps/rejected": -534.9909057617188, "loss": 4.3304, "margin_dpo/margin_mean": 90.5218505859375, "margin_dpo/margin_std": 134.86016845703125, "step": 457 }, { "epoch": 0.9591623036649215, "fcm_dpo/beta": 0.005554028321057558, "fcm_dpo/delta": 0.004314765799790621, "fcm_dpo/margin": 78.83430480957031, "fcm_dpo/q_t": 0.40472283959388733, "grad_norm": 69.29220581054688, "learning_rate": 2.6765705380989432e-09, "logits/chosen": -0.8536108136177063, "logits/rejected": -0.8355450630187988, "logps/chosen": -466.06524658203125, "logps/ref_chosen": -297.0316467285156, "logps/ref_rejected": -276.1112365722656, "logps/rejected": -523.9791259765625, "loss": 4.4573, "margin_dpo/margin_mean": 78.83430480957031, "margin_dpo/margin_std": 125.95325469970703, "step": 458 }, { "epoch": 0.9612565445026178, "fcm_dpo/beta": 0.005654921289533377, "fcm_dpo/delta": 0.04975789785385132, "fcm_dpo/margin": 67.66356658935547, "fcm_dpo/q_t": 0.4175841510295868, "grad_norm": 79.01788330078125, "learning_rate": 2.416026102552732e-09, "logits/chosen": -0.9122970104217529, "logits/rejected": -0.9040276408195496, "logps/chosen": -451.9207763671875, "logps/ref_chosen": -293.5252990722656, "logps/ref_rejected": -289.30126953125, "logps/rejected": -515.3603515625, "loss": 4.6228, "margin_dpo/margin_mean": 67.66356658935547, "margin_dpo/margin_std": 121.35086059570312, "step": 459 }, { "epoch": 0.9633507853403142, "fcm_dpo/beta": 0.005886501632630825, "fcm_dpo/delta": 0.050066620111465454, "fcm_dpo/margin": 66.52505493164062, "fcm_dpo/q_t": 0.4135185778141022, "grad_norm": 120.4760513305664, "learning_rate": 2.168758844148272e-09, "logits/chosen": -0.8763177990913391, "logits/rejected": -0.8776261806488037, "logps/chosen": -480.5386962890625, "logps/ref_chosen": -318.7803649902344, "logps/ref_rejected": -258.7906799316406, "logps/rejected": -487.07403564453125, "loss": 4.5718, "margin_dpo/margin_mean": 66.52505493164062, "margin_dpo/margin_std": 115.43054962158203, "step": 460 }, { "epoch": 0.9654450261780104, "fcm_dpo/beta": 0.006130027584731579, "fcm_dpo/delta": 0.02878350019454956, "fcm_dpo/margin": 76.16785430908203, "fcm_dpo/q_t": 0.4008653461933136, "grad_norm": 97.33731079101562, "learning_rate": 1.9347820230782295e-09, "logits/chosen": -0.8445146083831787, "logits/rejected": -0.8692530989646912, "logps/chosen": -407.8790588378906, "logps/ref_chosen": -243.9099884033203, "logps/ref_rejected": -232.6382293701172, "logps/rejected": -472.7751159667969, "loss": 4.4989, "margin_dpo/margin_mean": 76.16785430908203, "margin_dpo/margin_std": 129.78131103515625, "step": 461 }, { "epoch": 0.9675392670157068, "fcm_dpo/beta": 0.00608763936907053, "fcm_dpo/delta": -0.02736498787999153, "fcm_dpo/margin": 90.83987426757812, "fcm_dpo/q_t": 0.38287225365638733, "grad_norm": 91.66661834716797, "learning_rate": 1.7141081868094209e-09, "logits/chosen": -0.8614755868911743, "logits/rejected": -0.8124670386314392, "logps/chosen": -508.40203857421875, "logps/ref_chosen": -344.09100341796875, "logps/ref_rejected": -252.45037841796875, "logps/rejected": -507.601318359375, "loss": 4.2288, "margin_dpo/margin_mean": 90.83987426757812, "margin_dpo/margin_std": 130.77877807617188, "step": 462 }, { "epoch": 0.9696335078534032, "fcm_dpo/beta": 0.005978195928037167, "fcm_dpo/delta": -0.015503959730267525, "fcm_dpo/margin": 72.70486450195312, "fcm_dpo/q_t": 0.40621206164360046, "grad_norm": 88.91027069091797, "learning_rate": 1.5067491694100153e-09, "logits/chosen": -0.870310366153717, "logits/rejected": -0.8297045230865479, "logps/chosen": -459.2799072265625, "logps/ref_chosen": -297.3134460449219, "logps/ref_rejected": -234.3878936767578, "logps/rejected": -469.05926513671875, "loss": 4.5566, "margin_dpo/margin_mean": 72.70486450195312, "margin_dpo/margin_std": 128.3613739013672, "step": 463 }, { "epoch": 0.9717277486910995, "fcm_dpo/beta": 0.005981272552162409, "fcm_dpo/delta": 0.0026608407497406006, "fcm_dpo/margin": 69.68535614013672, "fcm_dpo/q_t": 0.4095439314842224, "grad_norm": 143.6160888671875, "learning_rate": 1.3127160909147672e-09, "logits/chosen": -0.8514159917831421, "logits/rejected": -0.8731627464294434, "logps/chosen": -441.0743408203125, "logps/ref_chosen": -265.71075439453125, "logps/ref_rejected": -256.4108581542969, "logps/rejected": -501.45977783203125, "loss": 4.5617, "margin_dpo/margin_mean": 69.68534851074219, "margin_dpo/margin_std": 122.15727233886719, "step": 464 }, { "epoch": 0.9738219895287958, "fcm_dpo/beta": 0.00563573744148016, "fcm_dpo/delta": -0.0926453098654747, "fcm_dpo/margin": 94.17436218261719, "fcm_dpo/q_t": 0.3835006058216095, "grad_norm": 84.5207290649414, "learning_rate": 1.1320193567288527e-09, "logits/chosen": -0.9116008877754211, "logits/rejected": -0.8782393336296082, "logps/chosen": -449.47894287109375, "logps/ref_chosen": -293.1527404785156, "logps/ref_rejected": -293.70947265625, "logps/rejected": -544.2099609375, "loss": 4.196, "margin_dpo/margin_mean": 94.17436218261719, "margin_dpo/margin_std": 121.221923828125, "step": 465 }, { "epoch": 0.9759162303664921, "fcm_dpo/beta": 0.005492227151989937, "fcm_dpo/delta": -0.01386126596480608, "fcm_dpo/margin": 96.84224700927734, "fcm_dpo/q_t": 0.38248857855796814, "grad_norm": 73.73484802246094, "learning_rate": 9.64668657069706e-10, "logits/chosen": -0.8305215835571289, "logits/rejected": -0.7785975337028503, "logps/chosen": -410.4551086425781, "logps/ref_chosen": -261.4775695800781, "logps/ref_rejected": -248.36282348632812, "logps/rejected": -494.18255615234375, "loss": 4.0916, "margin_dpo/margin_mean": 96.84223175048828, "margin_dpo/margin_std": 117.41493225097656, "step": 466 }, { "epoch": 0.9780104712041885, "fcm_dpo/beta": 0.005417585372924805, "fcm_dpo/delta": -0.010228976607322693, "fcm_dpo/margin": 71.93285369873047, "fcm_dpo/q_t": 0.4153047800064087, "grad_norm": 81.48310852050781, "learning_rate": 8.106729664475176e-10, "logits/chosen": -0.8108528852462769, "logits/rejected": -0.8057974576950073, "logps/chosen": -431.22412109375, "logps/ref_chosen": -266.354248046875, "logps/ref_rejected": -277.76324462890625, "logps/rejected": -514.5659790039062, "loss": 4.6387, "margin_dpo/margin_mean": 71.93285369873047, "margin_dpo/margin_std": 132.09213256835938, "step": 467 }, { "epoch": 0.9801047120418848, "fcm_dpo/beta": 0.00549811776727438, "fcm_dpo/delta": 0.021085752174258232, "fcm_dpo/margin": 67.23999786376953, "fcm_dpo/q_t": 0.41613680124282837, "grad_norm": 82.15486145019531, "learning_rate": 6.700405431837585e-10, "logits/chosen": -0.9056313037872314, "logits/rejected": -0.8760198354721069, "logps/chosen": -480.73162841796875, "logps/ref_chosen": -317.9631652832031, "logps/ref_rejected": -261.8744201660156, "logps/rejected": -491.88287353515625, "loss": 4.6372, "margin_dpo/margin_mean": 67.23999786376953, "margin_dpo/margin_std": 119.53501892089844, "step": 468 }, { "epoch": 0.9821989528795811, "fcm_dpo/beta": 0.005514613352715969, "fcm_dpo/delta": -0.004768936894834042, "fcm_dpo/margin": 91.36453247070312, "fcm_dpo/q_t": 0.3884834349155426, "grad_norm": 64.80612182617188, "learning_rate": 5.427789289685347e-10, "logits/chosen": -0.8382606506347656, "logits/rejected": -0.8244605660438538, "logps/chosen": -479.63787841796875, "logps/ref_chosen": -324.8868103027344, "logps/ref_rejected": -264.0421447753906, "logps/rejected": -510.1578063964844, "loss": 4.2394, "margin_dpo/margin_mean": 91.36453247070312, "margin_dpo/margin_std": 125.39714050292969, "step": 469 }, { "epoch": 0.9842931937172775, "fcm_dpo/beta": 0.005484522320330143, "fcm_dpo/delta": -0.013818852603435516, "fcm_dpo/margin": 84.76747131347656, "fcm_dpo/q_t": 0.39746540784835815, "grad_norm": 83.80744171142578, "learning_rate": 4.288949484559934e-10, "logits/chosen": -0.8254011869430542, "logits/rejected": -0.8231028318405151, "logps/chosen": -470.1563720703125, "logps/ref_chosen": -314.7042541503906, "logps/ref_rejected": -259.2276611328125, "logps/rejected": -499.447265625, "loss": 4.3306, "margin_dpo/margin_mean": 84.76747131347656, "margin_dpo/margin_std": 120.29107666015625, "step": 470 }, { "epoch": 0.9863874345549738, "fcm_dpo/beta": 0.0054491800256073475, "fcm_dpo/delta": -0.004639061167836189, "fcm_dpo/margin": 78.18816375732422, "fcm_dpo/q_t": 0.41097328066825867, "grad_norm": 89.04488372802734, "learning_rate": 3.2839470889836627e-10, "logits/chosen": -0.8858464360237122, "logits/rejected": -0.8748361468315125, "logps/chosen": -462.7659912109375, "logps/ref_chosen": -292.801513671875, "logps/ref_rejected": -298.979248046875, "logps/rejected": -547.1318359375, "loss": 4.5156, "margin_dpo/margin_mean": 78.18816375732422, "margin_dpo/margin_std": 133.43814086914062, "step": 471 }, { "epoch": 0.9884816753926702, "fcm_dpo/beta": 0.005437402054667473, "fcm_dpo/delta": 0.0063332486897706985, "fcm_dpo/margin": 88.26492309570312, "fcm_dpo/q_t": 0.3961615264415741, "grad_norm": 73.95541381835938, "learning_rate": 2.412835998185092e-10, "logits/chosen": -0.8828275203704834, "logits/rejected": -0.8989793658256531, "logps/chosen": -395.50311279296875, "logps/ref_chosen": -243.37380981445312, "logps/ref_rejected": -251.12109375, "logps/rejected": -491.51531982421875, "loss": 4.277, "margin_dpo/margin_mean": 88.26492309570312, "margin_dpo/margin_std": 122.50725555419922, "step": 472 }, { "epoch": 0.9905759162303664, "fcm_dpo/beta": 0.005580813158303499, "fcm_dpo/delta": 0.042260996997356415, "fcm_dpo/margin": 91.16032409667969, "fcm_dpo/q_t": 0.3865862488746643, "grad_norm": 75.50499725341797, "learning_rate": 1.6756629272085544e-10, "logits/chosen": -0.8339266180992126, "logits/rejected": -0.8364192247390747, "logps/chosen": -444.529052734375, "logps/ref_chosen": -286.3286437988281, "logps/ref_rejected": -258.6535339355469, "logps/rejected": -508.0143127441406, "loss": 4.1634, "margin_dpo/margin_mean": 91.16032409667969, "margin_dpo/margin_std": 114.12368774414062, "step": 473 }, { "epoch": 0.9926701570680628, "fcm_dpo/beta": 0.005527782253921032, "fcm_dpo/delta": -0.028659962117671967, "fcm_dpo/margin": 75.26751708984375, "fcm_dpo/q_t": 0.40586668252944946, "grad_norm": 91.60271453857422, "learning_rate": 1.072467408408384e-10, "logits/chosen": -0.8723991513252258, "logits/rejected": -0.8708733916282654, "logps/chosen": -451.0887756347656, "logps/ref_chosen": -288.08966064453125, "logps/ref_rejected": -266.69696044921875, "logps/rejected": -504.9636535644531, "loss": 4.4347, "margin_dpo/margin_mean": 75.26751708984375, "margin_dpo/margin_std": 108.9689712524414, "step": 474 }, { "epoch": 0.9947643979057592, "fcm_dpo/beta": 0.0054468982852995396, "fcm_dpo/delta": -0.02301694266498089, "fcm_dpo/margin": 70.21611022949219, "fcm_dpo/q_t": 0.41601884365081787, "grad_norm": 91.83534240722656, "learning_rate": 6.032817893297793e-11, "logits/chosen": -0.8479362726211548, "logits/rejected": -0.8634592890739441, "logps/chosen": -409.83917236328125, "logps/ref_chosen": -256.0030517578125, "logps/ref_rejected": -244.50660705566406, "logps/rejected": -468.5588073730469, "loss": 4.5513, "margin_dpo/margin_mean": 70.21611022949219, "margin_dpo/margin_std": 114.1260986328125, "step": 475 }, { "epoch": 0.9968586387434555, "fcm_dpo/beta": 0.005282857920974493, "fcm_dpo/delta": -0.0387037992477417, "fcm_dpo/margin": 80.51129913330078, "fcm_dpo/q_t": 0.40867236256599426, "grad_norm": 65.54981994628906, "learning_rate": 2.6813123097352287e-11, "logits/chosen": -0.9229732751846313, "logits/rejected": -0.8801470398902893, "logps/chosen": -470.89788818359375, "logps/ref_chosen": -321.467529296875, "logps/ref_rejected": -295.0592956542969, "logps/rejected": -525.0009765625, "loss": 4.4865, "margin_dpo/margin_mean": 80.51129150390625, "margin_dpo/margin_std": 127.75419616699219, "step": 476 }, { "epoch": 0.9989528795811519, "fcm_dpo/beta": 0.005212708842009306, "fcm_dpo/delta": -0.02850748598575592, "fcm_dpo/margin": 78.61067199707031, "fcm_dpo/q_t": 0.4106035530567169, "grad_norm": 77.4634780883789, "learning_rate": 6.7033706447061635e-12, "logits/chosen": -0.8011319041252136, "logits/rejected": -0.8102338314056396, "logps/chosen": -445.9722900390625, "logps/ref_chosen": -277.4477233886719, "logps/ref_rejected": -244.70004272460938, "logps/rejected": -491.83526611328125, "loss": 4.6089, "margin_dpo/margin_mean": 78.61067199707031, "margin_dpo/margin_std": 140.86875915527344, "step": 477 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 4.696330676288725, "train_runtime": 6071.6812, "train_samples_per_second": 10.069, "train_steps_per_second": 0.079 } ], "logging_steps": 1, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }