{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200, "global_step": 681, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014684287812041115, "fcm_dpo/beta": 0.1020384356379509, "fcm_dpo/delta": 0.19979780912399292, "fcm_dpo/margin": -0.02287006378173828, "fcm_dpo/q_t": 0.5005706548690796, "grad_norm": 85.29718780517578, "learning_rate": 0.0, "logits/chosen": -0.4974287748336792, "logits/rejected": -0.43299180269241333, "logps/chosen": -50.1435661315918, "logps/ref_chosen": -50.14883804321289, "logps/ref_rejected": -74.1280517578125, "logps/rejected": -74.09991455078125, "loss": 1.389, "margin_dpo/margin_mean": -0.02287048101425171, "margin_dpo/margin_std": 0.41920793056488037, "step": 1 }, { "epoch": 0.002936857562408223, "fcm_dpo/beta": 0.10407686978578568, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.06572261452674866, "fcm_dpo/q_t": 0.501709520816803, "grad_norm": 75.22118377685547, "learning_rate": 7.246376811594203e-09, "logits/chosen": -0.49536412954330444, "logits/rejected": -0.4594460427761078, "logps/chosen": -52.65568923950195, "logps/ref_chosen": -52.620704650878906, "logps/ref_rejected": -75.30413818359375, "logps/rejected": -75.27340698242188, "loss": 1.3935, "margin_dpo/margin_mean": -0.06572240591049194, "margin_dpo/margin_std": 0.35048407316207886, "step": 2 }, { "epoch": 0.004405286343612335, "fcm_dpo/beta": 0.10832422226667404, "fcm_dpo/delta": 0.1999952644109726, "fcm_dpo/margin": -0.004782050848007202, "fcm_dpo/q_t": 0.500129759311676, "grad_norm": 76.9161376953125, "learning_rate": 1.4492753623188406e-08, "logits/chosen": -0.4816010594367981, "logits/rejected": -0.44217073917388916, "logps/chosen": -60.95306396484375, "logps/ref_chosen": -60.981597900390625, "logps/ref_rejected": -68.67259216308594, "logps/rejected": -68.6392822265625, "loss": 1.3872, "margin_dpo/margin_mean": -0.004781663417816162, "margin_dpo/margin_std": 0.33950307965278625, "step": 3 }, { "epoch": 0.005873715124816446, "fcm_dpo/beta": 0.10832422226667404, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.06117314100265503, "fcm_dpo/q_t": 0.501656711101532, "grad_norm": 78.19353485107422, "learning_rate": 2.1739130434782606e-08, "logits/chosen": -0.4680081605911255, "logits/rejected": -0.44041645526885986, "logps/chosen": -56.78364181518555, "logps/ref_chosen": -56.7677116394043, "logps/ref_rejected": -86.64710998535156, "logps/rejected": -86.60186767578125, "loss": 1.3934, "margin_dpo/margin_mean": -0.06117379665374756, "margin_dpo/margin_std": 0.3837040364742279, "step": 4 }, { "epoch": 0.007342143906020558, "fcm_dpo/beta": 0.11499093472957611, "fcm_dpo/delta": 0.3965454697608948, "fcm_dpo/margin": 0.031099945306777954, "fcm_dpo/q_t": 0.49913665652275085, "grad_norm": 103.3542709350586, "learning_rate": 2.898550724637681e-08, "logits/chosen": -0.5145474672317505, "logits/rejected": -0.47077202796936035, "logps/chosen": -53.81591033935547, "logps/ref_chosen": -53.859375, "logps/ref_rejected": -84.14918518066406, "logps/rejected": -84.1368179321289, "loss": 1.3831, "margin_dpo/margin_mean": 0.031100064516067505, "margin_dpo/margin_std": 0.32387232780456543, "step": 5 }, { "epoch": 0.00881057268722467, "fcm_dpo/beta": 0.12198945879936218, "fcm_dpo/delta": 0.19748398661613464, "fcm_dpo/margin": -0.011603772640228271, "fcm_dpo/q_t": 0.5003781318664551, "grad_norm": 112.3940658569336, "learning_rate": 3.6231884057971014e-08, "logits/chosen": -0.5098507404327393, "logits/rejected": -0.4680579900741577, "logps/chosen": -63.016353607177734, "logps/ref_chosen": -63.007484436035156, "logps/ref_rejected": -92.64534759521484, "logps/rejected": -92.64260864257812, "loss": 1.3883, "margin_dpo/margin_mean": -0.011603862047195435, "margin_dpo/margin_std": 0.39160168170928955, "step": 6 }, { "epoch": 0.010279001468428781, "fcm_dpo/beta": 0.12692973017692566, "fcm_dpo/delta": 0.19849498569965363, "fcm_dpo/margin": 0.008680760860443115, "fcm_dpo/q_t": 0.4997381567955017, "grad_norm": 104.92411041259766, "learning_rate": 4.347826086956521e-08, "logits/chosen": -0.49692052602767944, "logits/rejected": -0.4630710482597351, "logps/chosen": -57.742652893066406, "logps/ref_chosen": -57.774818420410156, "logps/ref_rejected": -103.92059326171875, "logps/rejected": -103.89710235595703, "loss": 1.386, "margin_dpo/margin_mean": 0.008680880069732666, "margin_dpo/margin_std": 0.43539559841156006, "step": 7 }, { "epoch": 0.011747430249632892, "fcm_dpo/beta": 0.13201940059661865, "fcm_dpo/delta": 0.19657650589942932, "fcm_dpo/margin": -0.028942912817001343, "fcm_dpo/q_t": 0.5009875297546387, "grad_norm": 103.702880859375, "learning_rate": 5.0724637681159424e-08, "logits/chosen": -0.5114612579345703, "logits/rejected": -0.48591524362564087, "logps/chosen": -58.68933868408203, "logps/ref_chosen": -58.716033935546875, "logps/ref_rejected": -79.3114242553711, "logps/rejected": -79.25579071044922, "loss": 1.3909, "margin_dpo/margin_mean": -0.028942614793777466, "margin_dpo/margin_std": 0.4058513045310974, "step": 8 }, { "epoch": 0.013215859030837005, "fcm_dpo/beta": 0.13730010390281677, "fcm_dpo/delta": 0.1961011439561844, "fcm_dpo/margin": 0.01041179895401001, "fcm_dpo/q_t": 0.4996810555458069, "grad_norm": 117.20587921142578, "learning_rate": 5.797101449275362e-08, "logits/chosen": -0.4901723265647888, "logits/rejected": -0.44332075119018555, "logps/chosen": -69.85332489013672, "logps/ref_chosen": -69.8668441772461, "logps/ref_rejected": -99.6026611328125, "logps/rejected": -99.59955596923828, "loss": 1.3857, "margin_dpo/margin_mean": 0.010411262512207031, "margin_dpo/margin_std": 0.4200562834739685, "step": 9 }, { "epoch": 0.014684287812041116, "fcm_dpo/beta": 0.1400114893913269, "fcm_dpo/delta": 0.1936788260936737, "fcm_dpo/margin": 0.01942703127861023, "fcm_dpo/q_t": 0.49933522939682007, "grad_norm": 99.37556457519531, "learning_rate": 6.521739130434782e-08, "logits/chosen": -0.48142820596694946, "logits/rejected": -0.43749985098838806, "logps/chosen": -48.3685302734375, "logps/ref_chosen": -48.35768508911133, "logps/ref_rejected": -80.37206268310547, "logps/rejected": -80.40232849121094, "loss": 1.3841, "margin_dpo/margin_mean": 0.019427448511123657, "margin_dpo/margin_std": 0.374165415763855, "step": 10 }, { "epoch": 0.016152716593245228, "fcm_dpo/beta": 0.14840584993362427, "fcm_dpo/delta": 0.1952294558286667, "fcm_dpo/margin": 0.02777162194252014, "fcm_dpo/q_t": 0.49901843070983887, "grad_norm": 101.62715911865234, "learning_rate": 7.246376811594203e-08, "logits/chosen": -0.4318368434906006, "logits/rejected": -0.40546509623527527, "logps/chosen": -52.996986389160156, "logps/ref_chosen": -53.01685333251953, "logps/ref_rejected": -87.78038024902344, "logps/rejected": -87.78828430175781, "loss": 1.3829, "margin_dpo/margin_mean": 0.02777191996574402, "margin_dpo/margin_std": 0.362691193819046, "step": 11 }, { "epoch": 0.01762114537444934, "fcm_dpo/beta": 0.15135988593101501, "fcm_dpo/delta": 0.19519127905368805, "fcm_dpo/margin": 0.01589415967464447, "fcm_dpo/q_t": 0.4994090497493744, "grad_norm": 140.9950408935547, "learning_rate": 7.971014492753623e-08, "logits/chosen": -0.5231326818466187, "logits/rejected": -0.486427366733551, "logps/chosen": -61.79826354980469, "logps/ref_chosen": -61.80543518066406, "logps/ref_rejected": -104.8582763671875, "logps/rejected": -104.86698913574219, "loss": 1.3844, "margin_dpo/margin_mean": 0.01589377224445343, "margin_dpo/margin_std": 0.3336790204048157, "step": 12 }, { "epoch": 0.01908957415565345, "fcm_dpo/beta": 0.163808211684227, "fcm_dpo/delta": 0.39621487259864807, "fcm_dpo/margin": 0.023847192525863647, "fcm_dpo/q_t": 0.49905675649642944, "grad_norm": 129.1976318359375, "learning_rate": 8.695652173913042e-08, "logits/chosen": -0.4889492690563202, "logits/rejected": -0.4614258408546448, "logps/chosen": -64.2720718383789, "logps/ref_chosen": -64.2603530883789, "logps/ref_rejected": -87.20307922363281, "logps/rejected": -87.23863983154297, "loss": 1.3831, "margin_dpo/margin_mean": 0.023847103118896484, "margin_dpo/margin_std": 0.31533756852149963, "step": 13 }, { "epoch": 0.020558002936857563, "fcm_dpo/beta": 0.1737682819366455, "fcm_dpo/delta": 0.19745339453220367, "fcm_dpo/margin": 5.2809715270996094e-05, "fcm_dpo/q_t": 0.5000207424163818, "grad_norm": 148.0649871826172, "learning_rate": 9.420289855072464e-08, "logits/chosen": -0.48691490292549133, "logits/rejected": -0.4480026960372925, "logps/chosen": -58.149009704589844, "logps/ref_chosen": -58.11021041870117, "logps/ref_rejected": -104.04708099365234, "logps/rejected": -104.08592224121094, "loss": 1.3876, "margin_dpo/margin_mean": 5.313754081726074e-05, "margin_dpo/margin_std": 0.41946524381637573, "step": 14 }, { "epoch": 0.022026431718061675, "fcm_dpo/beta": 0.1737682819366455, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.01620301604270935, "fcm_dpo/q_t": 0.5006987452507019, "grad_norm": 112.28044891357422, "learning_rate": 1.0144927536231885e-07, "logits/chosen": -0.5082442164421082, "logits/rejected": -0.49048274755477905, "logps/chosen": -57.02097702026367, "logps/ref_chosen": -56.96691131591797, "logps/ref_rejected": -80.80863952636719, "logps/rejected": -80.84651184082031, "loss": 1.3902, "margin_dpo/margin_mean": -0.01620301604270935, "margin_dpo/margin_std": 0.3850763142108917, "step": 15 }, { "epoch": 0.023494860499265784, "fcm_dpo/beta": 0.18400363624095917, "fcm_dpo/delta": 0.3830709159374237, "fcm_dpo/margin": 0.09651938080787659, "fcm_dpo/q_t": 0.4957681894302368, "grad_norm": 152.3389892578125, "learning_rate": 1.0869565217391303e-07, "logits/chosen": -0.5282368659973145, "logits/rejected": -0.4875671863555908, "logps/chosen": -61.703330993652344, "logps/ref_chosen": -61.739891052246094, "logps/ref_rejected": -84.36947631835938, "logps/rejected": -84.42942810058594, "loss": 1.3699, "margin_dpo/margin_mean": 0.09651932120323181, "margin_dpo/margin_std": 0.3430694341659546, "step": 16 }, { "epoch": 0.024963289280469897, "fcm_dpo/beta": 0.19130608439445496, "fcm_dpo/delta": 0.19350671768188477, "fcm_dpo/margin": 0.0113239586353302, "fcm_dpo/q_t": 0.4994708001613617, "grad_norm": 150.90025329589844, "learning_rate": 1.1594202898550725e-07, "logits/chosen": -0.4968636631965637, "logits/rejected": -0.4591953158378601, "logps/chosen": -67.68939208984375, "logps/ref_chosen": -67.71033477783203, "logps/ref_rejected": -85.37865447998047, "logps/rejected": -85.36903381347656, "loss": 1.3851, "margin_dpo/margin_mean": 0.011324524879455566, "margin_dpo/margin_std": 0.35448014736175537, "step": 17 }, { "epoch": 0.02643171806167401, "fcm_dpo/beta": 0.19877119362354279, "fcm_dpo/delta": 0.18936890363693237, "fcm_dpo/margin": 0.026239663362503052, "fcm_dpo/q_t": 0.49871736764907837, "grad_norm": 163.8603515625, "learning_rate": 1.2318840579710146e-07, "logits/chosen": -0.48526573181152344, "logits/rejected": -0.4287734031677246, "logps/chosen": -47.76115417480469, "logps/ref_chosen": -47.7394905090332, "logps/ref_rejected": -75.4722900390625, "logps/rejected": -75.52019500732422, "loss": 1.3818, "margin_dpo/margin_mean": 0.026239246129989624, "margin_dpo/margin_std": 0.30841344594955444, "step": 18 }, { "epoch": 0.027900146842878122, "fcm_dpo/beta": 0.2146751880645752, "fcm_dpo/delta": 0.3876880407333374, "fcm_dpo/margin": 0.05988234281539917, "fcm_dpo/q_t": 0.49692660570144653, "grad_norm": 158.2223663330078, "learning_rate": 1.3043478260869563e-07, "logits/chosen": -0.4940911531448364, "logits/rejected": -0.4445168375968933, "logps/chosen": -70.18385314941406, "logps/ref_chosen": -70.20536041259766, "logps/ref_rejected": -89.7575912475586, "logps/rejected": -89.79597473144531, "loss": 1.3745, "margin_dpo/margin_mean": 0.05988246202468872, "margin_dpo/margin_std": 0.2845911383628845, "step": 19 }, { "epoch": 0.02936857562408223, "fcm_dpo/beta": 0.23141203820705414, "fcm_dpo/delta": 0.3657988905906677, "fcm_dpo/margin": 0.1521126627922058, "fcm_dpo/q_t": 0.4914783835411072, "grad_norm": 166.44786071777344, "learning_rate": 1.3768115942028986e-07, "logits/chosen": -0.5221278667449951, "logits/rejected": -0.4627057909965515, "logps/chosen": -50.78226089477539, "logps/ref_chosen": -50.80324172973633, "logps/ref_rejected": -78.82334899902344, "logps/rejected": -78.9544677734375, "loss": 1.3531, "margin_dpo/margin_mean": 0.15211281180381775, "margin_dpo/margin_std": 0.3802841305732727, "step": 20 }, { "epoch": 0.030837004405286344, "fcm_dpo/beta": 0.24898943305015564, "fcm_dpo/delta": 0.3725013732910156, "fcm_dpo/margin": 0.11505882441997528, "fcm_dpo/q_t": 0.49314409494400024, "grad_norm": 189.82945251464844, "learning_rate": 1.4492753623188405e-07, "logits/chosen": -0.5031737685203552, "logits/rejected": -0.4799532890319824, "logps/chosen": -50.0748291015625, "logps/ref_chosen": -50.063018798828125, "logps/ref_rejected": -77.86878967285156, "logps/rejected": -77.99566650390625, "loss": 1.3604, "margin_dpo/margin_mean": 0.11505846679210663, "margin_dpo/margin_std": 0.3957793712615967, "step": 21 }, { "epoch": 0.032305433186490456, "fcm_dpo/beta": 0.26710399985313416, "fcm_dpo/delta": 0.3357844650745392, "fcm_dpo/margin": 0.24726280570030212, "fcm_dpo/q_t": 0.48401886224746704, "grad_norm": 218.20628356933594, "learning_rate": 1.5217391304347825e-07, "logits/chosen": -0.485355019569397, "logits/rejected": -0.44164812564849854, "logps/chosen": -59.00285339355469, "logps/ref_chosen": -59.05763626098633, "logps/ref_rejected": -97.50466918945312, "logps/rejected": -97.69715118408203, "loss": 1.3248, "margin_dpo/margin_mean": 0.247263103723526, "margin_dpo/margin_std": 0.45322418212890625, "step": 22 }, { "epoch": 0.033773861967694566, "fcm_dpo/beta": 0.2858576774597168, "fcm_dpo/delta": 0.34957826137542725, "fcm_dpo/margin": 0.18317532539367676, "fcm_dpo/q_t": 0.4874514639377594, "grad_norm": 224.35922241210938, "learning_rate": 1.5942028985507245e-07, "logits/chosen": -0.4880913197994232, "logits/rejected": -0.46549102663993835, "logps/chosen": -60.035518646240234, "logps/ref_chosen": -60.07769775390625, "logps/ref_rejected": -81.13955688476562, "logps/rejected": -81.28054809570312, "loss": 1.3377, "margin_dpo/margin_mean": 0.18317526578903198, "margin_dpo/margin_std": 0.37381279468536377, "step": 23 }, { "epoch": 0.03524229074889868, "fcm_dpo/beta": 0.30613917112350464, "fcm_dpo/delta": 0.33740469813346863, "fcm_dpo/margin": 0.21142138540744781, "fcm_dpo/q_t": 0.48438760638237, "grad_norm": 254.505615234375, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -0.5014858245849609, "logits/rejected": -0.4851001501083374, "logps/chosen": -44.279727935791016, "logps/ref_chosen": -44.29103469848633, "logps/ref_rejected": -99.12521362304688, "logps/rejected": -99.32533264160156, "loss": 1.3256, "margin_dpo/margin_mean": 0.21142247319221497, "margin_dpo/margin_std": 0.35174310207366943, "step": 24 }, { "epoch": 0.03671071953010279, "fcm_dpo/beta": 0.3263891637325287, "fcm_dpo/delta": 0.31944698095321655, "fcm_dpo/margin": 0.25548607110977173, "fcm_dpo/q_t": 0.4799500107765198, "grad_norm": 231.55694580078125, "learning_rate": 1.7391304347826085e-07, "logits/chosen": -0.5223193168640137, "logits/rejected": -0.49323010444641113, "logps/chosen": -52.49665069580078, "logps/ref_chosen": -52.537052154541016, "logps/ref_rejected": -89.34219360351562, "logps/rejected": -89.55728149414062, "loss": 1.309, "margin_dpo/margin_mean": 0.2554857134819031, "margin_dpo/margin_std": 0.39272648096084595, "step": 25 }, { "epoch": 0.0381791483113069, "fcm_dpo/beta": 0.3455832004547119, "fcm_dpo/delta": 0.25443094968795776, "fcm_dpo/margin": 0.4299333393573761, "fcm_dpo/q_t": 0.4642295241355896, "grad_norm": 271.15667724609375, "learning_rate": 1.8115942028985507e-07, "logits/chosen": -0.5332764387130737, "logits/rejected": -0.5016044974327087, "logps/chosen": -53.841529846191406, "logps/ref_chosen": -53.92280578613281, "logps/ref_rejected": -103.35971069335938, "logps/rejected": -103.70835876464844, "loss": 1.2531, "margin_dpo/margin_mean": 0.4299335479736328, "margin_dpo/margin_std": 0.5525455474853516, "step": 26 }, { "epoch": 0.039647577092511016, "fcm_dpo/beta": 0.3615337014198303, "fcm_dpo/delta": 0.23555167019367218, "fcm_dpo/margin": 0.4660835266113281, "fcm_dpo/q_t": 0.45947709679603577, "grad_norm": 305.2655944824219, "learning_rate": 1.8840579710144927e-07, "logits/chosen": -0.4840855598449707, "logits/rejected": -0.44715797901153564, "logps/chosen": -42.79102325439453, "logps/ref_chosen": -42.898529052734375, "logps/ref_rejected": -98.72419738769531, "logps/rejected": -99.08277893066406, "loss": 1.2344, "margin_dpo/margin_mean": 0.46608299016952515, "margin_dpo/margin_std": 0.5391252636909485, "step": 27 }, { "epoch": 0.041116005873715125, "fcm_dpo/beta": 0.3825129270553589, "fcm_dpo/delta": 0.2807585895061493, "fcm_dpo/margin": 0.31892046332359314, "fcm_dpo/q_t": 0.4705425500869751, "grad_norm": 266.1156921386719, "learning_rate": 1.9565217391304347e-07, "logits/chosen": -0.49522364139556885, "logits/rejected": -0.4389377236366272, "logps/chosen": -60.54632568359375, "logps/ref_chosen": -60.55650329589844, "logps/ref_rejected": -91.40111541748047, "logps/rejected": -91.7098617553711, "loss": 1.2817, "margin_dpo/margin_mean": 0.3189205825328827, "margin_dpo/margin_std": 0.6154531836509705, "step": 28 }, { "epoch": 0.042584434654919234, "fcm_dpo/beta": 0.39745935797691345, "fcm_dpo/delta": 0.17806152999401093, "fcm_dpo/margin": 0.5686274766921997, "fcm_dpo/q_t": 0.4454106092453003, "grad_norm": 315.0572814941406, "learning_rate": 2.028985507246377e-07, "logits/chosen": -0.5572994947433472, "logits/rejected": -0.5113492608070374, "logps/chosen": -57.70075225830078, "logps/ref_chosen": -57.80778503417969, "logps/ref_rejected": -97.39434814453125, "logps/rejected": -97.85594177246094, "loss": 1.1846, "margin_dpo/margin_mean": 0.5686285495758057, "margin_dpo/margin_std": 0.544101357460022, "step": 29 }, { "epoch": 0.04405286343612335, "fcm_dpo/beta": 0.4080265164375305, "fcm_dpo/delta": 0.11794110387563705, "fcm_dpo/margin": 0.6996808648109436, "fcm_dpo/q_t": 0.4310336709022522, "grad_norm": 297.2781677246094, "learning_rate": 2.1014492753623187e-07, "logits/chosen": -0.5077843070030212, "logits/rejected": -0.4774767756462097, "logps/chosen": -52.4200553894043, "logps/ref_chosen": -52.577369689941406, "logps/ref_rejected": -98.48920440673828, "logps/rejected": -99.03157043457031, "loss": 1.136, "margin_dpo/margin_mean": 0.6996806859970093, "margin_dpo/margin_std": 0.6045821905136108, "step": 30 }, { "epoch": 0.04552129221732746, "fcm_dpo/beta": 0.42595115303993225, "fcm_dpo/delta": 0.2233620285987854, "fcm_dpo/margin": 0.42216721177101135, "fcm_dpo/q_t": 0.45694005489349365, "grad_norm": 259.3175048828125, "learning_rate": 2.1739130434782607e-07, "logits/chosen": -0.5219802856445312, "logits/rejected": -0.4791126251220703, "logps/chosen": -63.74620819091797, "logps/ref_chosen": -63.806922912597656, "logps/ref_rejected": -72.89400482177734, "logps/rejected": -73.25546264648438, "loss": 1.2309, "margin_dpo/margin_mean": 0.4221669137477875, "margin_dpo/margin_std": 0.6024155616760254, "step": 31 }, { "epoch": 0.04698972099853157, "fcm_dpo/beta": 0.4344155192375183, "fcm_dpo/delta": 0.08483266085386276, "fcm_dpo/margin": 0.7318711280822754, "fcm_dpo/q_t": 0.42483460903167725, "grad_norm": 283.7579040527344, "learning_rate": 2.2463768115942027e-07, "logits/chosen": -0.5009861588478088, "logits/rejected": -0.45893198251724243, "logps/chosen": -62.55799865722656, "logps/ref_chosen": -62.739524841308594, "logps/ref_rejected": -89.3175048828125, "logps/rejected": -89.86784362792969, "loss": 1.1257, "margin_dpo/margin_mean": 0.7318712472915649, "margin_dpo/margin_std": 0.8441380262374878, "step": 32 }, { "epoch": 0.048458149779735685, "fcm_dpo/beta": 0.44722574949264526, "fcm_dpo/delta": 0.138888880610466, "fcm_dpo/margin": 0.5908744931221008, "fcm_dpo/q_t": 0.43646401166915894, "grad_norm": 274.2132873535156, "learning_rate": 2.318840579710145e-07, "logits/chosen": -0.5129827260971069, "logits/rejected": -0.48746123909950256, "logps/chosen": -53.16265106201172, "logps/ref_chosen": -53.26097106933594, "logps/ref_rejected": -87.8851318359375, "logps/rejected": -88.377685546875, "loss": 1.1604, "margin_dpo/margin_mean": 0.5908748507499695, "margin_dpo/margin_std": 0.6484410166740417, "step": 33 }, { "epoch": 0.049926578560939794, "fcm_dpo/beta": 0.45357927680015564, "fcm_dpo/delta": 0.06588704884052277, "fcm_dpo/margin": 0.7414969205856323, "fcm_dpo/q_t": 0.4207090139389038, "grad_norm": 277.12481689453125, "learning_rate": 2.391304347826087e-07, "logits/chosen": -0.48127520084381104, "logits/rejected": -0.4638293981552124, "logps/chosen": -50.74750518798828, "logps/ref_chosen": -50.81732940673828, "logps/ref_rejected": -101.92184448242188, "logps/rejected": -102.59352111816406, "loss": 1.113, "margin_dpo/margin_mean": 0.7414963841438293, "margin_dpo/margin_std": 0.8493252992630005, "step": 34 }, { "epoch": 0.0513950073421439, "fcm_dpo/beta": 0.4451371729373932, "fcm_dpo/delta": -0.1301283985376358, "fcm_dpo/margin": 1.1745426654815674, "fcm_dpo/q_t": 0.38051727414131165, "grad_norm": 252.35678100585938, "learning_rate": 2.463768115942029e-07, "logits/chosen": -0.508255124092102, "logits/rejected": -0.4715331196784973, "logps/chosen": -50.89655303955078, "logps/ref_chosen": -51.02449035644531, "logps/ref_rejected": -106.82443237304688, "logps/rejected": -107.87103271484375, "loss": 0.9853, "margin_dpo/margin_mean": 1.174542784690857, "margin_dpo/margin_std": 1.1653451919555664, "step": 35 }, { "epoch": 0.05286343612334802, "fcm_dpo/beta": 0.44032353162765503, "fcm_dpo/delta": -0.08942731469869614, "fcm_dpo/margin": 1.101135015487671, "fcm_dpo/q_t": 0.38899609446525574, "grad_norm": 212.2360382080078, "learning_rate": 2.536231884057971e-07, "logits/chosen": -0.5859663486480713, "logits/rejected": -0.5510756373405457, "logps/chosen": -51.97636795043945, "logps/ref_chosen": -51.991493225097656, "logps/ref_rejected": -86.0406265258789, "logps/rejected": -87.12663269042969, "loss": 1.0312, "margin_dpo/margin_mean": 1.1011340618133545, "margin_dpo/margin_std": 1.233978033065796, "step": 36 }, { "epoch": 0.05433186490455213, "fcm_dpo/beta": 0.4299408495426178, "fcm_dpo/delta": -0.03354019671678543, "fcm_dpo/margin": 1.0015331506729126, "fcm_dpo/q_t": 0.40413135290145874, "grad_norm": 196.89625549316406, "learning_rate": 2.6086956521739126e-07, "logits/chosen": -0.5104124546051025, "logits/rejected": -0.46673229336738586, "logps/chosen": -62.82787322998047, "logps/ref_chosen": -62.807106018066406, "logps/ref_rejected": -77.89507293701172, "logps/rejected": -78.91737365722656, "loss": 1.0822, "margin_dpo/margin_mean": 1.0015329122543335, "margin_dpo/margin_std": 1.3581469058990479, "step": 37 }, { "epoch": 0.055800293685756244, "fcm_dpo/beta": 0.42291873693466187, "fcm_dpo/delta": -0.16829678416252136, "fcm_dpo/margin": 1.3221113681793213, "fcm_dpo/q_t": 0.37941908836364746, "grad_norm": 206.74301147460938, "learning_rate": 2.681159420289855e-07, "logits/chosen": -0.5344926118850708, "logits/rejected": -0.5018054842948914, "logps/chosen": -48.27055358886719, "logps/ref_chosen": -48.39051818847656, "logps/ref_rejected": -97.91244506835938, "logps/rejected": -99.11459350585938, "loss": 1.0001, "margin_dpo/margin_mean": 1.3221120834350586, "margin_dpo/margin_std": 1.6323070526123047, "step": 38 }, { "epoch": 0.05726872246696035, "fcm_dpo/beta": 0.3985295593738556, "fcm_dpo/delta": -0.29377901554107666, "fcm_dpo/margin": 1.6867289543151855, "fcm_dpo/q_t": 0.34562230110168457, "grad_norm": 223.958984375, "learning_rate": 2.753623188405797e-07, "logits/chosen": -0.5842019319534302, "logits/rejected": -0.5461462736129761, "logps/chosen": -50.60572814941406, "logps/ref_chosen": -50.75047302246094, "logps/ref_rejected": -78.56951141357422, "logps/rejected": -80.11149597167969, "loss": 0.8956, "margin_dpo/margin_mean": 1.686728835105896, "margin_dpo/margin_std": 1.4286189079284668, "step": 39 }, { "epoch": 0.05873715124816446, "fcm_dpo/beta": 0.3848017454147339, "fcm_dpo/delta": -0.19280429184436798, "fcm_dpo/margin": 1.5124502182006836, "fcm_dpo/q_t": 0.3725898265838623, "grad_norm": 157.12765502929688, "learning_rate": 2.8260869565217386e-07, "logits/chosen": -0.525189995765686, "logits/rejected": -0.4957452118396759, "logps/chosen": -57.79692077636719, "logps/ref_chosen": -57.985069274902344, "logps/ref_rejected": -74.3000717163086, "logps/rejected": -75.62437438964844, "loss": 0.9784, "margin_dpo/margin_mean": 1.512451171875, "margin_dpo/margin_std": 1.693179965019226, "step": 40 }, { "epoch": 0.06020558002936858, "fcm_dpo/beta": 0.3643730580806732, "fcm_dpo/delta": -0.2775127589702606, "fcm_dpo/margin": 1.809408187866211, "fcm_dpo/q_t": 0.3553549349308014, "grad_norm": 166.80831909179688, "learning_rate": 2.898550724637681e-07, "logits/chosen": -0.5380607843399048, "logits/rejected": -0.5014735460281372, "logps/chosen": -62.65604019165039, "logps/ref_chosen": -62.69581604003906, "logps/ref_rejected": -97.02352905273438, "logps/rejected": -98.79315948486328, "loss": 0.9346, "margin_dpo/margin_mean": 1.8094090223312378, "margin_dpo/margin_std": 1.9210506677627563, "step": 41 }, { "epoch": 0.06167400881057269, "fcm_dpo/beta": 0.33479398488998413, "fcm_dpo/delta": -0.4733234643936157, "fcm_dpo/margin": 2.4904072284698486, "fcm_dpo/q_t": 0.3218887448310852, "grad_norm": 154.84799194335938, "learning_rate": 2.971014492753623e-07, "logits/chosen": -0.5270863771438599, "logits/rejected": -0.4803048372268677, "logps/chosen": -58.758487701416016, "logps/ref_chosen": -58.966426849365234, "logps/ref_rejected": -109.90837097167969, "logps/rejected": -112.19084167480469, "loss": 0.8362, "margin_dpo/margin_mean": 2.4904069900512695, "margin_dpo/margin_std": 2.361166477203369, "step": 42 }, { "epoch": 0.0631424375917768, "fcm_dpo/beta": 0.3071477711200714, "fcm_dpo/delta": -0.3796875774860382, "fcm_dpo/margin": 2.438110113143921, "fcm_dpo/q_t": 0.32926446199417114, "grad_norm": 144.34567260742188, "learning_rate": 3.043478260869565e-07, "logits/chosen": -0.5323761701583862, "logits/rejected": -0.5072122812271118, "logps/chosen": -53.617584228515625, "logps/ref_chosen": -54.15599822998047, "logps/ref_rejected": -96.48019409179688, "logps/rejected": -98.37989044189453, "loss": 0.8428, "margin_dpo/margin_mean": 2.438110113143921, "margin_dpo/margin_std": 1.899155855178833, "step": 43 }, { "epoch": 0.06461086637298091, "fcm_dpo/beta": 0.28413423895835876, "fcm_dpo/delta": -0.40553855895996094, "fcm_dpo/margin": 2.7193827629089355, "fcm_dpo/q_t": 0.3261229395866394, "grad_norm": 146.47552490234375, "learning_rate": 3.115942028985507e-07, "logits/chosen": -0.4545024633407593, "logits/rejected": -0.43494725227355957, "logps/chosen": -49.821990966796875, "logps/ref_chosen": -50.07849884033203, "logps/ref_rejected": -108.78376007080078, "logps/rejected": -111.24664306640625, "loss": 0.836, "margin_dpo/margin_mean": 2.7193822860717773, "margin_dpo/margin_std": 2.205606460571289, "step": 44 }, { "epoch": 0.06607929515418502, "fcm_dpo/beta": 0.27153611183166504, "fcm_dpo/delta": -0.2071065902709961, "fcm_dpo/margin": 2.1919541358947754, "fcm_dpo/q_t": 0.3700290322303772, "grad_norm": 116.2034683227539, "learning_rate": 3.188405797101449e-07, "logits/chosen": -0.5074399709701538, "logits/rejected": -0.49551981687545776, "logps/chosen": -48.25216293334961, "logps/ref_chosen": -48.4149284362793, "logps/ref_rejected": -77.93643188476562, "logps/rejected": -79.96562194824219, "loss": 0.9723, "margin_dpo/margin_mean": 2.1919541358947754, "margin_dpo/margin_std": 2.4712252616882324, "step": 45 }, { "epoch": 0.06754772393538913, "fcm_dpo/beta": 0.2555280327796936, "fcm_dpo/delta": -0.3303987383842468, "fcm_dpo/margin": 2.7688708305358887, "fcm_dpo/q_t": 0.34917011857032776, "grad_norm": 124.21039581298828, "learning_rate": 3.260869565217391e-07, "logits/chosen": -0.5273990631103516, "logits/rejected": -0.4762783646583557, "logps/chosen": -55.79579162597656, "logps/ref_chosen": -55.999427795410156, "logps/ref_rejected": -95.652587890625, "logps/rejected": -98.21781921386719, "loss": 0.922, "margin_dpo/margin_mean": 2.768871307373047, "margin_dpo/margin_std": 3.0828027725219727, "step": 46 }, { "epoch": 0.06901615271659324, "fcm_dpo/beta": 0.24128976464271545, "fcm_dpo/delta": -0.2963497042655945, "fcm_dpo/margin": 2.806663990020752, "fcm_dpo/q_t": 0.3475228548049927, "grad_norm": 116.81612396240234, "learning_rate": 3.333333333333333e-07, "logits/chosen": -0.5757611989974976, "logits/rejected": -0.5232735872268677, "logps/chosen": -57.52677917480469, "logps/ref_chosen": -57.92607879638672, "logps/ref_rejected": -94.67920684814453, "logps/rejected": -97.0865707397461, "loss": 0.9024, "margin_dpo/margin_mean": 2.80666446685791, "margin_dpo/margin_std": 2.5121896266937256, "step": 47 }, { "epoch": 0.07048458149779736, "fcm_dpo/beta": 0.22662241756916046, "fcm_dpo/delta": -0.23767787218093872, "fcm_dpo/margin": 2.7389888763427734, "fcm_dpo/q_t": 0.3588021993637085, "grad_norm": 126.09705352783203, "learning_rate": 3.4057971014492755e-07, "logits/chosen": -0.569898784160614, "logits/rejected": -0.5105218291282654, "logps/chosen": -57.07960510253906, "logps/ref_chosen": -57.188072204589844, "logps/ref_rejected": -88.0166015625, "logps/rejected": -90.64712524414062, "loss": 0.9453, "margin_dpo/margin_mean": 2.7389891147613525, "margin_dpo/margin_std": 2.5488128662109375, "step": 48 }, { "epoch": 0.07195301027900147, "fcm_dpo/beta": 0.21518516540527344, "fcm_dpo/delta": -0.34602996706962585, "fcm_dpo/margin": 3.3560891151428223, "fcm_dpo/q_t": 0.3433716297149658, "grad_norm": 101.85260772705078, "learning_rate": 3.478260869565217e-07, "logits/chosen": -0.5557532906532288, "logits/rejected": -0.49865707755088806, "logps/chosen": -61.317138671875, "logps/ref_chosen": -61.685272216796875, "logps/ref_rejected": -83.76747131347656, "logps/rejected": -86.75543212890625, "loss": 0.8993, "margin_dpo/margin_mean": 3.356088638305664, "margin_dpo/margin_std": 3.3915152549743652, "step": 49 }, { "epoch": 0.07342143906020558, "fcm_dpo/beta": 0.19847270846366882, "fcm_dpo/delta": -0.36405348777770996, "fcm_dpo/margin": 3.707047700881958, "fcm_dpo/q_t": 0.33734625577926636, "grad_norm": 95.96993255615234, "learning_rate": 3.5507246376811595e-07, "logits/chosen": -0.5368717908859253, "logits/rejected": -0.5008732080459595, "logps/chosen": -58.7468147277832, "logps/ref_chosen": -58.72413635253906, "logps/ref_rejected": -96.35814666748047, "logps/rejected": -100.08787536621094, "loss": 0.883, "margin_dpo/margin_mean": 3.707047939300537, "margin_dpo/margin_std": 3.5281009674072266, "step": 50 }, { "epoch": 0.07488986784140969, "fcm_dpo/beta": 0.1852697730064392, "fcm_dpo/delta": -0.3053027391433716, "fcm_dpo/margin": 3.6747231483459473, "fcm_dpo/q_t": 0.3589528203010559, "grad_norm": 75.97203826904297, "learning_rate": 3.6231884057971015e-07, "logits/chosen": -0.514645516872406, "logits/rejected": -0.48158469796180725, "logps/chosen": -61.512142181396484, "logps/ref_chosen": -61.3736686706543, "logps/ref_rejected": -76.00199890136719, "logps/rejected": -79.81520080566406, "loss": 0.9563, "margin_dpo/margin_mean": 3.674722671508789, "margin_dpo/margin_std": 4.535408020019531, "step": 51 }, { "epoch": 0.0763582966226138, "fcm_dpo/beta": 0.16818588972091675, "fcm_dpo/delta": -0.596248984336853, "fcm_dpo/margin": 5.584807395935059, "fcm_dpo/q_t": 0.29704979062080383, "grad_norm": 75.52082824707031, "learning_rate": 3.695652173913043e-07, "logits/chosen": -0.4952273666858673, "logits/rejected": -0.43586522340774536, "logps/chosen": -51.88074493408203, "logps/ref_chosen": -52.33735656738281, "logps/ref_rejected": -79.97391510009766, "logps/rejected": -85.10211181640625, "loss": 0.7698, "margin_dpo/margin_mean": 5.584807872772217, "margin_dpo/margin_std": 4.602443218231201, "step": 52 }, { "epoch": 0.07782672540381791, "fcm_dpo/beta": 0.1531587839126587, "fcm_dpo/delta": -0.5026867389678955, "fcm_dpo/margin": 5.6284685134887695, "fcm_dpo/q_t": 0.32493168115615845, "grad_norm": 77.16937255859375, "learning_rate": 3.7681159420289855e-07, "logits/chosen": -0.6023041009902954, "logits/rejected": -0.5804400444030762, "logps/chosen": -53.40974044799805, "logps/ref_chosen": -53.31465148925781, "logps/ref_rejected": -91.78359985351562, "logps/rejected": -97.50714111328125, "loss": 0.8492, "margin_dpo/margin_mean": 5.6284685134887695, "margin_dpo/margin_std": 5.570339679718018, "step": 53 }, { "epoch": 0.07929515418502203, "fcm_dpo/beta": 0.14113633334636688, "fcm_dpo/delta": -0.3154899477958679, "fcm_dpo/margin": 4.917664527893066, "fcm_dpo/q_t": 0.3461211621761322, "grad_norm": 67.67288970947266, "learning_rate": 3.8405797101449274e-07, "logits/chosen": -0.5780713558197021, "logits/rejected": -0.5238767862319946, "logps/chosen": -50.9090576171875, "logps/ref_chosen": -50.68865966796875, "logps/ref_rejected": -91.71539306640625, "logps/rejected": -96.85345458984375, "loss": 0.8952, "margin_dpo/margin_mean": 4.917665004730225, "margin_dpo/margin_std": 4.633595943450928, "step": 54 }, { "epoch": 0.08076358296622614, "fcm_dpo/beta": 0.13106518983840942, "fcm_dpo/delta": -0.40200865268707275, "fcm_dpo/margin": 5.890453338623047, "fcm_dpo/q_t": 0.33843106031417847, "grad_norm": 64.55609893798828, "learning_rate": 3.9130434782608694e-07, "logits/chosen": -0.6431792974472046, "logits/rejected": -0.5808027982711792, "logps/chosen": -63.28520965576172, "logps/ref_chosen": -62.615234375, "logps/ref_rejected": -88.99349975585938, "logps/rejected": -95.55393981933594, "loss": 0.908, "margin_dpo/margin_mean": 5.890453338623047, "margin_dpo/margin_std": 6.6398115158081055, "step": 55 }, { "epoch": 0.08223201174743025, "fcm_dpo/beta": 0.12227150052785873, "fcm_dpo/delta": -0.30633145570755005, "fcm_dpo/margin": 5.606152057647705, "fcm_dpo/q_t": 0.354331374168396, "grad_norm": 57.52459716796875, "learning_rate": 3.9855072463768114e-07, "logits/chosen": -0.6030697822570801, "logits/rejected": -0.5601364374160767, "logps/chosen": -58.434322357177734, "logps/ref_chosen": -57.9327278137207, "logps/ref_rejected": -94.1744384765625, "logps/rejected": -100.28218078613281, "loss": 0.9514, "margin_dpo/margin_mean": 5.606152534484863, "margin_dpo/margin_std": 6.62627649307251, "step": 56 }, { "epoch": 0.08370044052863436, "fcm_dpo/beta": 0.11491702497005463, "fcm_dpo/delta": -0.34079399704933167, "fcm_dpo/margin": 6.242837905883789, "fcm_dpo/q_t": 0.33905744552612305, "grad_norm": 59.57084655761719, "learning_rate": 4.057971014492754e-07, "logits/chosen": -0.5788109302520752, "logits/rejected": -0.5509734153747559, "logps/chosen": -71.0447998046875, "logps/ref_chosen": -70.49528503417969, "logps/ref_rejected": -95.56546020507812, "logps/rejected": -102.35780334472656, "loss": 0.8891, "margin_dpo/margin_mean": 6.242837905883789, "margin_dpo/margin_std": 5.736446857452393, "step": 57 }, { "epoch": 0.08516886930983847, "fcm_dpo/beta": 0.10662582516670227, "fcm_dpo/delta": -0.38594919443130493, "fcm_dpo/margin": 7.108300685882568, "fcm_dpo/q_t": 0.3379080295562744, "grad_norm": 60.7117805480957, "learning_rate": 4.1304347826086954e-07, "logits/chosen": -0.6119065880775452, "logits/rejected": -0.5379560589790344, "logps/chosen": -62.78474807739258, "logps/ref_chosen": -62.13294219970703, "logps/ref_rejected": -84.61729431152344, "logps/rejected": -92.37741088867188, "loss": 0.8931, "margin_dpo/margin_mean": 7.108301162719727, "margin_dpo/margin_std": 7.328868389129639, "step": 58 }, { "epoch": 0.08663729809104258, "fcm_dpo/beta": 0.0974373072385788, "fcm_dpo/delta": -0.3749150037765503, "fcm_dpo/margin": 7.614223957061768, "fcm_dpo/q_t": 0.33926984667778015, "grad_norm": 55.413719177246094, "learning_rate": 4.2028985507246374e-07, "logits/chosen": -0.6353350877761841, "logits/rejected": -0.5950881242752075, "logps/chosen": -53.027374267578125, "logps/ref_chosen": -51.932525634765625, "logps/ref_rejected": -88.88520050048828, "logps/rejected": -97.59426879882812, "loss": 0.8934, "margin_dpo/margin_mean": 7.614224433898926, "margin_dpo/margin_std": 7.698889255523682, "step": 59 }, { "epoch": 0.0881057268722467, "fcm_dpo/beta": 0.09344291687011719, "fcm_dpo/delta": -0.1885889768600464, "fcm_dpo/margin": 6.171331405639648, "fcm_dpo/q_t": 0.3686023950576782, "grad_norm": 60.355464935302734, "learning_rate": 4.2753623188405794e-07, "logits/chosen": -0.5876541137695312, "logits/rejected": -0.5260422229766846, "logps/chosen": -63.00043869018555, "logps/ref_chosen": -60.94218826293945, "logps/ref_rejected": -85.39340209960938, "logps/rejected": -93.62298583984375, "loss": 0.9854, "margin_dpo/margin_mean": 6.171331405639648, "margin_dpo/margin_std": 6.925416946411133, "step": 60 }, { "epoch": 0.08957415565345081, "fcm_dpo/beta": 0.08880254626274109, "fcm_dpo/delta": -0.2756527066230774, "fcm_dpo/margin": 7.387622833251953, "fcm_dpo/q_t": 0.36539459228515625, "grad_norm": 49.870140075683594, "learning_rate": 4.3478260869565214e-07, "logits/chosen": -0.6084675788879395, "logits/rejected": -0.5737414956092834, "logps/chosen": -61.716583251953125, "logps/ref_chosen": -60.633522033691406, "logps/ref_rejected": -89.85249328613281, "logps/rejected": -98.32318115234375, "loss": 0.9793, "margin_dpo/margin_mean": 7.387622833251953, "margin_dpo/margin_std": 9.976318359375, "step": 61 }, { "epoch": 0.09104258443465492, "fcm_dpo/beta": 0.08695913851261139, "fcm_dpo/delta": -0.10356283187866211, "fcm_dpo/margin": 5.732213973999023, "fcm_dpo/q_t": 0.3890203535556793, "grad_norm": 48.48149871826172, "learning_rate": 4.420289855072464e-07, "logits/chosen": -0.6002976298332214, "logits/rejected": -0.566005289554596, "logps/chosen": -57.36714553833008, "logps/ref_chosen": -56.15077209472656, "logps/ref_rejected": -75.56619262695312, "logps/rejected": -82.51478576660156, "loss": 1.037, "margin_dpo/margin_mean": 5.732213973999023, "margin_dpo/margin_std": 7.161689758300781, "step": 62 }, { "epoch": 0.09251101321585903, "fcm_dpo/beta": 0.08248023688793182, "fcm_dpo/delta": -0.2507448196411133, "fcm_dpo/margin": 7.671149253845215, "fcm_dpo/q_t": 0.3569306433200836, "grad_norm": 49.066436767578125, "learning_rate": 4.4927536231884053e-07, "logits/chosen": -0.5983408689498901, "logits/rejected": -0.5525267720222473, "logps/chosen": -75.25900268554688, "logps/ref_chosen": -73.14739227294922, "logps/ref_rejected": -97.61006164550781, "logps/rejected": -107.39281463623047, "loss": 0.9455, "margin_dpo/margin_mean": 7.671149253845215, "margin_dpo/margin_std": 7.900073528289795, "step": 63 }, { "epoch": 0.09397944199706314, "fcm_dpo/beta": 0.07727767527103424, "fcm_dpo/delta": -0.32136908173561096, "fcm_dpo/margin": 8.976408004760742, "fcm_dpo/q_t": 0.3494236171245575, "grad_norm": 44.7556266784668, "learning_rate": 4.5652173913043473e-07, "logits/chosen": -0.590173602104187, "logits/rejected": -0.5585036873817444, "logps/chosen": -54.6782341003418, "logps/ref_chosen": -53.998600006103516, "logps/ref_rejected": -93.53019714355469, "logps/rejected": -103.18624877929688, "loss": 0.9307, "margin_dpo/margin_mean": 8.976409912109375, "margin_dpo/margin_std": 9.896392822265625, "step": 64 }, { "epoch": 0.09544787077826726, "fcm_dpo/beta": 0.07323503494262695, "fcm_dpo/delta": -0.28665655851364136, "fcm_dpo/margin": 9.088768005371094, "fcm_dpo/q_t": 0.35149580240249634, "grad_norm": 45.25883865356445, "learning_rate": 4.63768115942029e-07, "logits/chosen": -0.6546945571899414, "logits/rejected": -0.6407773494720459, "logps/chosen": -67.44660949707031, "logps/ref_chosen": -64.83599853515625, "logps/ref_rejected": -109.94645690917969, "logps/rejected": -121.64584350585938, "loss": 0.9441, "margin_dpo/margin_mean": 9.088767051696777, "margin_dpo/margin_std": 9.91242790222168, "step": 65 }, { "epoch": 0.09691629955947137, "fcm_dpo/beta": 0.06984806060791016, "fcm_dpo/delta": -0.20510993897914886, "fcm_dpo/margin": 8.45463752746582, "fcm_dpo/q_t": 0.3711079955101013, "grad_norm": 39.93006134033203, "learning_rate": 4.7101449275362313e-07, "logits/chosen": -0.6191996932029724, "logits/rejected": -0.5847162008285522, "logps/chosen": -53.86325454711914, "logps/ref_chosen": -51.44352722167969, "logps/ref_rejected": -75.63629913330078, "logps/rejected": -86.51066589355469, "loss": 0.9827, "margin_dpo/margin_mean": 8.45463752746582, "margin_dpo/margin_std": 10.011677742004395, "step": 66 }, { "epoch": 0.09838472834067548, "fcm_dpo/beta": 0.06784674525260925, "fcm_dpo/delta": -0.19093264639377594, "fcm_dpo/margin": 8.551323890686035, "fcm_dpo/q_t": 0.37226301431655884, "grad_norm": 40.03382110595703, "learning_rate": 4.782608695652174e-07, "logits/chosen": -0.61040198802948, "logits/rejected": -0.5693593621253967, "logps/chosen": -61.51601028442383, "logps/ref_chosen": -59.34080505371094, "logps/ref_rejected": -72.78728485107422, "logps/rejected": -83.5138168334961, "loss": 0.9854, "margin_dpo/margin_mean": 8.551323890686035, "margin_dpo/margin_std": 9.986248016357422, "step": 67 }, { "epoch": 0.09985315712187959, "fcm_dpo/beta": 0.06566619873046875, "fcm_dpo/delta": -0.15436488389968872, "fcm_dpo/margin": 8.31753158569336, "fcm_dpo/q_t": 0.3754774034023285, "grad_norm": 39.015968322753906, "learning_rate": 4.855072463768116e-07, "logits/chosen": -0.6389807462692261, "logits/rejected": -0.5808882713317871, "logps/chosen": -67.62654113769531, "logps/ref_chosen": -65.2058334350586, "logps/ref_rejected": -77.20724487304688, "logps/rejected": -87.94549560546875, "loss": 0.9805, "margin_dpo/margin_mean": 8.31753158569336, "margin_dpo/margin_std": 8.358397483825684, "step": 68 }, { "epoch": 0.1013215859030837, "fcm_dpo/beta": 0.06245996803045273, "fcm_dpo/delta": -0.2367386519908905, "fcm_dpo/margin": 9.93748664855957, "fcm_dpo/q_t": 0.3604923486709595, "grad_norm": 41.36071014404297, "learning_rate": 4.927536231884058e-07, "logits/chosen": -0.5898059606552124, "logits/rejected": -0.5660474896430969, "logps/chosen": -62.712249755859375, "logps/ref_chosen": -59.81924057006836, "logps/ref_rejected": -103.38886260986328, "logps/rejected": -116.2193603515625, "loss": 0.9374, "margin_dpo/margin_mean": 9.937487602233887, "margin_dpo/margin_std": 9.718847274780273, "step": 69 }, { "epoch": 0.1027900146842878, "fcm_dpo/beta": 0.059792205691337585, "fcm_dpo/delta": -0.26161158084869385, "fcm_dpo/margin": 10.791955947875977, "fcm_dpo/q_t": 0.3578525483608246, "grad_norm": 41.59380340576172, "learning_rate": 5e-07, "logits/chosen": -0.6141102313995361, "logits/rejected": -0.5785216093063354, "logps/chosen": -66.17719268798828, "logps/ref_chosen": -61.930641174316406, "logps/ref_rejected": -91.06078338623047, "logps/rejected": -106.09928894042969, "loss": 0.9415, "margin_dpo/margin_mean": 10.791955947875977, "margin_dpo/margin_std": 11.36978530883789, "step": 70 }, { "epoch": 0.10425844346549193, "fcm_dpo/beta": 0.05598774552345276, "fcm_dpo/delta": -0.31813400983810425, "fcm_dpo/margin": 12.418464660644531, "fcm_dpo/q_t": 0.3474087715148926, "grad_norm": 38.49131393432617, "learning_rate": 4.999967061337492e-07, "logits/chosen": -0.613985538482666, "logits/rejected": -0.5706911087036133, "logps/chosen": -65.64326477050781, "logps/ref_chosen": -61.750335693359375, "logps/ref_rejected": -97.33662414550781, "logps/rejected": -113.64801025390625, "loss": 0.9031, "margin_dpo/margin_mean": 12.418464660644531, "margin_dpo/margin_std": 11.948598861694336, "step": 71 }, { "epoch": 0.10572687224669604, "fcm_dpo/beta": 0.05243536829948425, "fcm_dpo/delta": -0.32549160718917847, "fcm_dpo/margin": 13.37977409362793, "fcm_dpo/q_t": 0.3453982472419739, "grad_norm": 38.92274856567383, "learning_rate": 4.999868246217933e-07, "logits/chosen": -0.6485065221786499, "logits/rejected": -0.6123002767562866, "logps/chosen": -70.4666748046875, "logps/ref_chosen": -66.05341339111328, "logps/ref_rejected": -95.2869873046875, "logps/rejected": -113.08002471923828, "loss": 0.9101, "margin_dpo/margin_mean": 13.379773139953613, "margin_dpo/margin_std": 13.722232818603516, "step": 72 }, { "epoch": 0.10719530102790015, "fcm_dpo/beta": 0.04936884716153145, "fcm_dpo/delta": -0.28036195039749146, "fcm_dpo/margin": 13.372041702270508, "fcm_dpo/q_t": 0.36727890372276306, "grad_norm": 37.18900680541992, "learning_rate": 4.999703557245192e-07, "logits/chosen": -0.6953590512275696, "logits/rejected": -0.6547967195510864, "logps/chosen": -72.60985565185547, "logps/ref_chosen": -66.25627136230469, "logps/ref_rejected": -90.45613098144531, "logps/rejected": -110.1817626953125, "loss": 1.0173, "margin_dpo/margin_mean": 13.37204360961914, "margin_dpo/margin_std": 19.566150665283203, "step": 73 }, { "epoch": 0.10866372980910426, "fcm_dpo/beta": 0.046581994742155075, "fcm_dpo/delta": -0.3048560619354248, "fcm_dpo/margin": 14.660598754882812, "fcm_dpo/q_t": 0.3614646792411804, "grad_norm": 38.133087158203125, "learning_rate": 4.999472998758977e-07, "logits/chosen": -0.6480814218521118, "logits/rejected": -0.6374853849411011, "logps/chosen": -60.36994171142578, "logps/ref_chosen": -53.42488098144531, "logps/ref_rejected": -95.94693756103516, "logps/rejected": -117.55259704589844, "loss": 0.9857, "margin_dpo/margin_mean": 14.660598754882812, "margin_dpo/margin_std": 21.60616111755371, "step": 74 }, { "epoch": 0.11013215859030837, "fcm_dpo/beta": 0.04279695451259613, "fcm_dpo/delta": -0.5349281430244446, "fcm_dpo/margin": 20.78795051574707, "fcm_dpo/q_t": 0.31636515259742737, "grad_norm": 33.39282989501953, "learning_rate": 4.999176576834721e-07, "logits/chosen": -0.6751279830932617, "logits/rejected": -0.6657723188400269, "logps/chosen": -58.766300201416016, "logps/ref_chosen": -51.861663818359375, "logps/ref_rejected": -111.25398254394531, "logps/rejected": -138.94656372070312, "loss": 0.8338, "margin_dpo/margin_mean": 20.787948608398438, "margin_dpo/margin_std": 20.532333374023438, "step": 75 }, { "epoch": 0.11160058737151249, "fcm_dpo/beta": 0.04038340225815773, "fcm_dpo/delta": -0.1572856605052948, "fcm_dpo/margin": 13.58674430847168, "fcm_dpo/q_t": 0.37524014711380005, "grad_norm": 33.306888580322266, "learning_rate": 4.998814299283415e-07, "logits/chosen": -0.6934888362884521, "logits/rejected": -0.6505051851272583, "logps/chosen": -61.79083251953125, "logps/ref_chosen": -53.26603698730469, "logps/ref_rejected": -78.21662902832031, "logps/rejected": -100.32815551757812, "loss": 1.006, "margin_dpo/margin_mean": 13.58674430847168, "margin_dpo/margin_std": 16.316781997680664, "step": 76 }, { "epoch": 0.1130690161527166, "fcm_dpo/beta": 0.03723585233092308, "fcm_dpo/delta": -0.4455404579639435, "fcm_dpo/margin": 21.639652252197266, "fcm_dpo/q_t": 0.3260105848312378, "grad_norm": 34.49169158935547, "learning_rate": 4.998386175651409e-07, "logits/chosen": -0.679383397102356, "logits/rejected": -0.6394709348678589, "logps/chosen": -65.87094116210938, "logps/ref_chosen": -58.0966796875, "logps/ref_rejected": -93.77361297607422, "logps/rejected": -123.18753814697266, "loss": 0.8811, "margin_dpo/margin_mean": 21.639652252197266, "margin_dpo/margin_std": 22.59795379638672, "step": 77 }, { "epoch": 0.1145374449339207, "fcm_dpo/beta": 0.03562740236520767, "fcm_dpo/delta": -0.2082417905330658, "fcm_dpo/margin": 16.73519515991211, "fcm_dpo/q_t": 0.3686809539794922, "grad_norm": 31.130857467651367, "learning_rate": 4.997892217220159e-07, "logits/chosen": -0.6434469223022461, "logits/rejected": -0.6172356605529785, "logps/chosen": -63.4884033203125, "logps/ref_chosen": -55.61378479003906, "logps/ref_rejected": -84.93436431884766, "logps/rejected": -109.54417419433594, "loss": 0.9783, "margin_dpo/margin_mean": 16.73519515991211, "margin_dpo/margin_std": 19.067113876342773, "step": 78 }, { "epoch": 0.11600587371512482, "fcm_dpo/beta": 0.03389370068907738, "fcm_dpo/delta": -0.24003317952156067, "fcm_dpo/margin": 18.436786651611328, "fcm_dpo/q_t": 0.3689296245574951, "grad_norm": 27.611059188842773, "learning_rate": 4.997332437005931e-07, "logits/chosen": -0.6699581742286682, "logits/rejected": -0.6403902173042297, "logps/chosen": -63.6839599609375, "logps/ref_chosen": -55.45048522949219, "logps/ref_rejected": -87.64756774902344, "logps/rejected": -114.31782531738281, "loss": 0.9907, "margin_dpo/margin_mean": 18.436786651611328, "margin_dpo/margin_std": 23.604507446289062, "step": 79 }, { "epoch": 0.11747430249632893, "fcm_dpo/beta": 0.032423943281173706, "fcm_dpo/delta": -0.16374123096466064, "fcm_dpo/margin": 17.029020309448242, "fcm_dpo/q_t": 0.3843163251876831, "grad_norm": 29.77735710144043, "learning_rate": 4.996706849759452e-07, "logits/chosen": -0.7156171798706055, "logits/rejected": -0.6716504096984863, "logps/chosen": -69.70584869384766, "logps/ref_chosen": -58.519290924072266, "logps/ref_rejected": -87.54750061035156, "logps/rejected": -115.76307678222656, "loss": 1.0366, "margin_dpo/margin_mean": 17.029020309448242, "margin_dpo/margin_std": 23.282854080200195, "step": 80 }, { "epoch": 0.11894273127753303, "fcm_dpo/beta": 0.030525632202625275, "fcm_dpo/delta": -0.3602490723133087, "fcm_dpo/margin": 23.939817428588867, "fcm_dpo/q_t": 0.3476225733757019, "grad_norm": 30.38198471069336, "learning_rate": 4.996015471965529e-07, "logits/chosen": -0.732462465763092, "logits/rejected": -0.7031528353691101, "logps/chosen": -76.67108154296875, "logps/ref_chosen": -66.44886779785156, "logps/ref_rejected": -129.66270446777344, "logps/rejected": -163.82473754882812, "loss": 0.9326, "margin_dpo/margin_mean": 23.939817428588867, "margin_dpo/margin_std": 28.79480743408203, "step": 81 }, { "epoch": 0.12041116005873716, "fcm_dpo/beta": 0.02971896156668663, "fcm_dpo/delta": -0.17042918503284454, "fcm_dpo/margin": 18.8587703704834, "fcm_dpo/q_t": 0.38334983587265015, "grad_norm": 32.88773727416992, "learning_rate": 4.995258321842611e-07, "logits/chosen": -0.6365201473236084, "logits/rejected": -0.6239097118377686, "logps/chosen": -64.87715148925781, "logps/ref_chosen": -52.232383728027344, "logps/ref_rejected": -90.74325561523438, "logps/rejected": -122.24678802490234, "loss": 1.0784, "margin_dpo/margin_mean": 18.8587703704834, "margin_dpo/margin_std": 30.427291870117188, "step": 82 }, { "epoch": 0.12187958883994127, "fcm_dpo/beta": 0.028013106435537338, "fcm_dpo/delta": -0.24482643604278564, "fcm_dpo/margin": 22.431982040405273, "fcm_dpo/q_t": 0.36464548110961914, "grad_norm": 28.086669921875, "learning_rate": 4.994435419342304e-07, "logits/chosen": -0.6463146209716797, "logits/rejected": -0.6088162660598755, "logps/chosen": -68.94061279296875, "logps/ref_chosen": -55.82738494873047, "logps/ref_rejected": -103.71589660644531, "logps/rejected": -139.2611083984375, "loss": 0.98, "margin_dpo/margin_mean": 22.43198013305664, "margin_dpo/margin_std": 27.435033798217773, "step": 83 }, { "epoch": 0.12334801762114538, "fcm_dpo/beta": 0.027015678584575653, "fcm_dpo/delta": -0.13040480017662048, "fcm_dpo/margin": 19.303150177001953, "fcm_dpo/q_t": 0.37953460216522217, "grad_norm": 27.229429244995117, "learning_rate": 4.993546786148857e-07, "logits/chosen": -0.6810680031776428, "logits/rejected": -0.6438438892364502, "logps/chosen": -79.30827331542969, "logps/ref_chosen": -67.1761703491211, "logps/ref_rejected": -87.29859924316406, "logps/rejected": -118.73384857177734, "loss": 1.0045, "margin_dpo/margin_mean": 19.303150177001953, "margin_dpo/margin_std": 20.458560943603516, "step": 84 }, { "epoch": 0.12481644640234948, "fcm_dpo/beta": 0.02669249102473259, "fcm_dpo/delta": -0.14147168397903442, "fcm_dpo/margin": 19.99712371826172, "fcm_dpo/q_t": 0.3820468783378601, "grad_norm": 27.95656394958496, "learning_rate": 4.992592445678582e-07, "logits/chosen": -0.6734552383422852, "logits/rejected": -0.6434047222137451, "logps/chosen": -71.04620361328125, "logps/ref_chosen": -58.4066162109375, "logps/ref_rejected": -78.63880157470703, "logps/rejected": -111.2755126953125, "loss": 1.0251, "margin_dpo/margin_mean": 19.99712371826172, "margin_dpo/margin_std": 24.61843490600586, "step": 85 }, { "epoch": 0.1262848751835536, "fcm_dpo/beta": 0.025787636637687683, "fcm_dpo/delta": -0.15711811184883118, "fcm_dpo/margin": 21.28069305419922, "fcm_dpo/q_t": 0.39334917068481445, "grad_norm": 30.917089462280273, "learning_rate": 4.991572423079235e-07, "logits/chosen": -0.7074247598648071, "logits/rejected": -0.6953707337379456, "logps/chosen": -72.53890991210938, "logps/ref_chosen": -56.13746643066406, "logps/ref_rejected": -88.12165069580078, "logps/rejected": -125.80378723144531, "loss": 1.1033, "margin_dpo/margin_mean": 21.28069305419922, "margin_dpo/margin_std": 38.22045135498047, "step": 86 }, { "epoch": 0.1277533039647577, "fcm_dpo/beta": 0.024498071521520615, "fcm_dpo/delta": -0.2259608507156372, "fcm_dpo/margin": 24.91492462158203, "fcm_dpo/q_t": 0.3681684136390686, "grad_norm": 26.133350372314453, "learning_rate": 4.990486745229364e-07, "logits/chosen": -0.7258398532867432, "logits/rejected": -0.7005423307418823, "logps/chosen": -71.85640716552734, "logps/ref_chosen": -55.63609313964844, "logps/ref_rejected": -95.46757507324219, "logps/rejected": -136.60281372070312, "loss": 1.014, "margin_dpo/margin_mean": 24.9149227142334, "margin_dpo/margin_std": 33.578163146972656, "step": 87 }, { "epoch": 0.12922173274596183, "fcm_dpo/beta": 0.024026712402701378, "fcm_dpo/delta": -0.052284643054008484, "fcm_dpo/margin": 18.689456939697266, "fcm_dpo/q_t": 0.4039269983768463, "grad_norm": 27.430917739868164, "learning_rate": 4.989335440737586e-07, "logits/chosen": -0.6927683353424072, "logits/rejected": -0.6857916116714478, "logps/chosen": -94.15711975097656, "logps/ref_chosen": -73.67115020751953, "logps/ref_rejected": -106.70849609375, "logps/rejected": -145.88392639160156, "loss": 1.1188, "margin_dpo/margin_mean": 18.689455032348633, "margin_dpo/margin_std": 30.20696258544922, "step": 88 }, { "epoch": 0.13069016152716592, "fcm_dpo/beta": 0.023834798485040665, "fcm_dpo/delta": -0.104669950902462, "fcm_dpo/margin": 20.956254959106445, "fcm_dpo/q_t": 0.3892160654067993, "grad_norm": 24.95569610595703, "learning_rate": 4.988118539941847e-07, "logits/chosen": -0.7409356832504272, "logits/rejected": -0.7092708349227905, "logps/chosen": -73.19313049316406, "logps/ref_chosen": -60.624916076660156, "logps/ref_rejected": -82.08354949951172, "logps/rejected": -115.60802459716797, "loss": 1.0428, "margin_dpo/margin_mean": 20.956254959106445, "margin_dpo/margin_std": 27.846221923828125, "step": 89 }, { "epoch": 0.13215859030837004, "fcm_dpo/beta": 0.022901657968759537, "fcm_dpo/delta": -0.2683162987232208, "fcm_dpo/margin": 28.446239471435547, "fcm_dpo/q_t": 0.3710969090461731, "grad_norm": 26.91438865661621, "learning_rate": 4.986836074908615e-07, "logits/chosen": -0.6682174205780029, "logits/rejected": -0.6794829964637756, "logps/chosen": -69.5387191772461, "logps/ref_chosen": -53.285308837890625, "logps/ref_rejected": -111.54470825195312, "logps/rejected": -156.24435424804688, "loss": 1.0224, "margin_dpo/margin_mean": 28.446239471435547, "margin_dpo/margin_std": 41.36814880371094, "step": 90 }, { "epoch": 0.13362701908957417, "fcm_dpo/beta": 0.022023282945156097, "fcm_dpo/delta": -0.1283145546913147, "fcm_dpo/margin": 23.693279266357422, "fcm_dpo/q_t": 0.38849714398384094, "grad_norm": 25.179744720458984, "learning_rate": 4.985488079432037e-07, "logits/chosen": -0.680759847164154, "logits/rejected": -0.6463046669960022, "logps/chosen": -78.63939666748047, "logps/ref_chosen": -61.802955627441406, "logps/ref_rejected": -87.87395477294922, "logps/rejected": -128.40367126464844, "loss": 1.0583, "margin_dpo/margin_mean": 23.693279266357422, "margin_dpo/margin_std": 34.601070404052734, "step": 91 }, { "epoch": 0.13509544787077826, "fcm_dpo/beta": 0.021527327597141266, "fcm_dpo/delta": -0.12105225771665573, "fcm_dpo/margin": 23.916973114013672, "fcm_dpo/q_t": 0.3887942433357239, "grad_norm": 23.610260009765625, "learning_rate": 4.984074589033043e-07, "logits/chosen": -0.712161660194397, "logits/rejected": -0.6888165473937988, "logps/chosen": -66.98762512207031, "logps/ref_chosen": -51.640769958496094, "logps/ref_rejected": -77.88117980957031, "logps/rejected": -117.14500427246094, "loss": 1.0527, "margin_dpo/margin_mean": 23.916976928710938, "margin_dpo/margin_std": 33.57923889160156, "step": 92 }, { "epoch": 0.13656387665198239, "fcm_dpo/beta": 0.02094947174191475, "fcm_dpo/delta": -0.10999026894569397, "fcm_dpo/margin": 24.082473754882812, "fcm_dpo/q_t": 0.38795292377471924, "grad_norm": 24.15863037109375, "learning_rate": 4.982595640958425e-07, "logits/chosen": -0.7293976545333862, "logits/rejected": -0.6787578463554382, "logps/chosen": -69.72396087646484, "logps/ref_chosen": -52.529239654541016, "logps/ref_rejected": -77.16075134277344, "logps/rejected": -118.43794250488281, "loss": 1.0288, "margin_dpo/margin_mean": 24.08247184753418, "margin_dpo/margin_std": 30.17513656616211, "step": 93 }, { "epoch": 0.13803230543318648, "fcm_dpo/beta": 0.02017746865749359, "fcm_dpo/delta": -0.17116506397724152, "fcm_dpo/margin": 27.756542205810547, "fcm_dpo/q_t": 0.37484580278396606, "grad_norm": 23.89820098876953, "learning_rate": 4.98105127417984e-07, "logits/chosen": -0.6600474119186401, "logits/rejected": -0.6440984010696411, "logps/chosen": -79.61772155761719, "logps/ref_chosen": -61.22261047363281, "logps/ref_rejected": -99.59902954101562, "logps/rejected": -145.75067138671875, "loss": 0.9922, "margin_dpo/margin_mean": 27.756542205810547, "margin_dpo/margin_std": 31.244901657104492, "step": 94 }, { "epoch": 0.1395007342143906, "fcm_dpo/beta": 0.019902190193533897, "fcm_dpo/delta": -0.047649484127759933, "fcm_dpo/margin": 22.366260528564453, "fcm_dpo/q_t": 0.3979894518852234, "grad_norm": 22.373977661132812, "learning_rate": 4.979441529392784e-07, "logits/chosen": -0.7095633149147034, "logits/rejected": -0.6818147301673889, "logps/chosen": -70.20155334472656, "logps/ref_chosen": -52.523643493652344, "logps/ref_rejected": -75.8803482055664, "logps/rejected": -115.92452239990234, "loss": 1.0719, "margin_dpo/margin_mean": 22.366260528564453, "margin_dpo/margin_std": 29.86621856689453, "step": 95 }, { "epoch": 0.14096916299559473, "fcm_dpo/beta": 0.01920231431722641, "fcm_dpo/delta": -0.20027107000350952, "fcm_dpo/margin": 30.528703689575195, "fcm_dpo/q_t": 0.371820330619812, "grad_norm": 22.68387794494629, "learning_rate": 4.977766449015534e-07, "logits/chosen": -0.7119661569595337, "logits/rejected": -0.6831298470497131, "logps/chosen": -79.36348724365234, "logps/ref_chosen": -62.15697479248047, "logps/ref_rejected": -96.59601593017578, "logps/rejected": -144.33123779296875, "loss": 0.9831, "margin_dpo/margin_mean": 30.528701782226562, "margin_dpo/margin_std": 35.82476806640625, "step": 96 }, { "epoch": 0.14243759177679882, "fcm_dpo/beta": 0.019140418618917465, "fcm_dpo/delta": -0.05700864642858505, "fcm_dpo/margin": 23.69298553466797, "fcm_dpo/q_t": 0.3948918282985687, "grad_norm": 23.630054473876953, "learning_rate": 4.976026077188012e-07, "logits/chosen": -0.6437740325927734, "logits/rejected": -0.6002140045166016, "logps/chosen": -73.06685638427734, "logps/ref_chosen": -54.646366119384766, "logps/ref_rejected": -76.96475219726562, "logps/rejected": -119.0782241821289, "loss": 1.0526, "margin_dpo/margin_mean": 23.69298553466797, "margin_dpo/margin_std": 27.06100845336914, "step": 97 }, { "epoch": 0.14390602055800295, "fcm_dpo/beta": 0.018587183207273483, "fcm_dpo/delta": -0.10853452980518341, "fcm_dpo/margin": 27.052547454833984, "fcm_dpo/q_t": 0.38545018434524536, "grad_norm": 24.32488250732422, "learning_rate": 4.974220459770639e-07, "logits/chosen": -0.6696487069129944, "logits/rejected": -0.653130829334259, "logps/chosen": -87.99504089355469, "logps/ref_chosen": -65.25862884521484, "logps/ref_rejected": -96.5274887084961, "logps/rejected": -146.3164520263672, "loss": 1.0556, "margin_dpo/margin_mean": 27.052547454833984, "margin_dpo/margin_std": 37.12006759643555, "step": 98 }, { "epoch": 0.14537444933920704, "fcm_dpo/beta": 0.01791193149983883, "fcm_dpo/delta": -0.20442235469818115, "fcm_dpo/margin": 33.02878189086914, "fcm_dpo/q_t": 0.3736024498939514, "grad_norm": 21.534595489501953, "learning_rate": 4.972349644343108e-07, "logits/chosen": -0.684786319732666, "logits/rejected": -0.6852524876594543, "logps/chosen": -63.73361587524414, "logps/ref_chosen": -45.638484954833984, "logps/ref_rejected": -86.43793487548828, "logps/rejected": -137.5618438720703, "loss": 0.992, "margin_dpo/margin_mean": 33.02878189086914, "margin_dpo/margin_std": 41.72602844238281, "step": 99 }, { "epoch": 0.14684287812041116, "fcm_dpo/beta": 0.01798401214182377, "fcm_dpo/delta": 0.05157166346907616, "fcm_dpo/margin": 19.44617462158203, "fcm_dpo/q_t": 0.4210618734359741, "grad_norm": 23.82549476623535, "learning_rate": 4.970413680203148e-07, "logits/chosen": -0.6766690015792847, "logits/rejected": -0.6355087757110596, "logps/chosen": -77.89352416992188, "logps/ref_chosen": -57.59397888183594, "logps/ref_rejected": -74.06021118164062, "logps/rejected": -113.80592346191406, "loss": 1.1629, "margin_dpo/margin_mean": 19.44617462158203, "margin_dpo/margin_std": 34.781532287597656, "step": 100 }, { "epoch": 0.14831130690161526, "fcm_dpo/beta": 0.017808571457862854, "fcm_dpo/delta": -0.01938944309949875, "fcm_dpo/margin": 23.464597702026367, "fcm_dpo/q_t": 0.4106113314628601, "grad_norm": 23.713424682617188, "learning_rate": 4.968412618365215e-07, "logits/chosen": -0.6706404685974121, "logits/rejected": -0.639204740524292, "logps/chosen": -86.684814453125, "logps/ref_chosen": -61.64885330200195, "logps/ref_rejected": -83.18968200683594, "logps/rejected": -131.69024658203125, "loss": 1.1273, "margin_dpo/margin_mean": 23.464595794677734, "margin_dpo/margin_std": 40.112548828125, "step": 101 }, { "epoch": 0.14977973568281938, "fcm_dpo/beta": 0.018256399780511856, "fcm_dpo/delta": 0.08609801530838013, "fcm_dpo/margin": 17.24917221069336, "fcm_dpo/q_t": 0.4305152893066406, "grad_norm": 27.488197326660156, "learning_rate": 4.966346511559149e-07, "logits/chosen": -0.6962743997573853, "logits/rejected": -0.6518734693527222, "logps/chosen": -91.27113342285156, "logps/ref_chosen": -64.0788803100586, "logps/ref_rejected": -68.18707275390625, "logps/rejected": -112.62849426269531, "loss": 1.2068, "margin_dpo/margin_mean": 17.24917221069336, "margin_dpo/margin_std": 37.05067825317383, "step": 102 }, { "epoch": 0.1512481644640235, "fcm_dpo/beta": 0.017573434859514236, "fcm_dpo/delta": -0.222814679145813, "fcm_dpo/margin": 34.63999938964844, "fcm_dpo/q_t": 0.3685527443885803, "grad_norm": 23.29231071472168, "learning_rate": 4.964215414228785e-07, "logits/chosen": -0.6724139451980591, "logits/rejected": -0.6383209228515625, "logps/chosen": -82.775146484375, "logps/ref_chosen": -61.299278259277344, "logps/ref_rejected": -93.57270812988281, "logps/rejected": -149.6885986328125, "loss": 0.98, "margin_dpo/margin_mean": 34.63999938964844, "margin_dpo/margin_std": 42.09827423095703, "step": 103 }, { "epoch": 0.1527165932452276, "fcm_dpo/beta": 0.017012089490890503, "fcm_dpo/delta": -0.16792970895767212, "fcm_dpo/margin": 32.84601593017578, "fcm_dpo/q_t": 0.3835586905479431, "grad_norm": 22.68368148803711, "learning_rate": 4.96201938253052e-07, "logits/chosen": -0.7026796340942383, "logits/rejected": -0.6724193096160889, "logps/chosen": -77.75988006591797, "logps/ref_chosen": -54.372772216796875, "logps/ref_rejected": -89.5647201538086, "logps/rejected": -145.79783630371094, "loss": 1.0391, "margin_dpo/margin_mean": 32.84601593017578, "margin_dpo/margin_std": 46.943763732910156, "step": 104 }, { "epoch": 0.15418502202643172, "fcm_dpo/beta": 0.0159467663615942, "fcm_dpo/delta": -0.37162917852401733, "fcm_dpo/margin": 46.66958236694336, "fcm_dpo/q_t": 0.33258479833602905, "grad_norm": 23.175701141357422, "learning_rate": 4.959758474331832e-07, "logits/chosen": -0.6862632036209106, "logits/rejected": -0.6642385721206665, "logps/chosen": -76.52045440673828, "logps/ref_chosen": -54.638946533203125, "logps/ref_rejected": -97.97351837158203, "logps/rejected": -166.52459716796875, "loss": 0.867, "margin_dpo/margin_mean": 46.66958236694336, "margin_dpo/margin_std": 41.3289680480957, "step": 105 }, { "epoch": 0.15565345080763582, "fcm_dpo/beta": 0.015470081940293312, "fcm_dpo/delta": -0.05400984361767769, "fcm_dpo/margin": 29.177989959716797, "fcm_dpo/q_t": 0.3962496519088745, "grad_norm": 22.519311904907227, "learning_rate": 4.957432749209755e-07, "logits/chosen": -0.6360474824905396, "logits/rejected": -0.6046931743621826, "logps/chosen": -79.4212875366211, "logps/ref_chosen": -54.83289337158203, "logps/ref_rejected": -85.22461700439453, "logps/rejected": -138.99099731445312, "loss": 1.0559, "margin_dpo/margin_mean": 29.177989959716797, "margin_dpo/margin_std": 35.5566520690918, "step": 106 }, { "epoch": 0.15712187958883994, "fcm_dpo/beta": 0.015153482556343079, "fcm_dpo/delta": -0.10540027171373367, "fcm_dpo/margin": 32.95298767089844, "fcm_dpo/q_t": 0.3870370090007782, "grad_norm": 21.292871475219727, "learning_rate": 4.955042268449307e-07, "logits/chosen": -0.6769276857376099, "logits/rejected": -0.6308863162994385, "logps/chosen": -99.10942840576172, "logps/ref_chosen": -69.70780944824219, "logps/ref_rejected": -94.73950958251953, "logps/rejected": -157.0941162109375, "loss": 1.042, "margin_dpo/margin_mean": 32.95298767089844, "margin_dpo/margin_std": 42.030609130859375, "step": 107 }, { "epoch": 0.15859030837004406, "fcm_dpo/beta": 0.01471179910004139, "fcm_dpo/delta": -0.1934787929058075, "fcm_dpo/margin": 39.57737731933594, "fcm_dpo/q_t": 0.37953275442123413, "grad_norm": 21.4735164642334, "learning_rate": 4.952587095041881e-07, "logits/chosen": -0.6490312814712524, "logits/rejected": -0.6262690424919128, "logps/chosen": -82.56188201904297, "logps/ref_chosen": -56.0098876953125, "logps/ref_rejected": -95.79601287841797, "logps/rejected": -161.92538452148438, "loss": 1.0361, "margin_dpo/margin_mean": 39.57737731933594, "margin_dpo/margin_std": 57.18327331542969, "step": 108 }, { "epoch": 0.16005873715124816, "fcm_dpo/beta": 0.014065122231841087, "fcm_dpo/delta": -0.20978516340255737, "fcm_dpo/margin": 42.39696502685547, "fcm_dpo/q_t": 0.3675564229488373, "grad_norm": 21.875646591186523, "learning_rate": 4.95006729368358e-07, "logits/chosen": -0.595153272151947, "logits/rejected": -0.5731344223022461, "logps/chosen": -87.58088684082031, "logps/ref_chosen": -62.88549041748047, "logps/ref_rejected": -98.68573760986328, "logps/rejected": -165.77810668945312, "loss": 0.9853, "margin_dpo/margin_mean": 42.39696502685547, "margin_dpo/margin_std": 49.910247802734375, "step": 109 }, { "epoch": 0.16152716593245228, "fcm_dpo/beta": 0.013536353595554829, "fcm_dpo/delta": -0.12679250538349152, "fcm_dpo/margin": 38.109649658203125, "fcm_dpo/q_t": 0.38643020391464233, "grad_norm": 19.236122131347656, "learning_rate": 4.947482930773511e-07, "logits/chosen": -0.5986282825469971, "logits/rejected": -0.5608283877372742, "logps/chosen": -84.11024475097656, "logps/ref_chosen": -58.753684997558594, "logps/ref_rejected": -79.75001525878906, "logps/rejected": -143.2162322998047, "loss": 1.0498, "margin_dpo/margin_mean": 38.10965347290039, "margin_dpo/margin_std": 50.650054931640625, "step": 110 }, { "epoch": 0.16299559471365638, "fcm_dpo/beta": 0.013182668015360832, "fcm_dpo/delta": -0.1789449155330658, "fcm_dpo/margin": 43.02890396118164, "fcm_dpo/q_t": 0.3763716518878937, "grad_norm": 21.762737274169922, "learning_rate": 4.944834074412042e-07, "logits/chosen": -0.6486632227897644, "logits/rejected": -0.6258302927017212, "logps/chosen": -97.67506408691406, "logps/ref_chosen": -68.62410736083984, "logps/ref_rejected": -98.42886352539062, "logps/rejected": -170.50872802734375, "loss": 1.031, "margin_dpo/margin_mean": 43.02890396118164, "margin_dpo/margin_std": 58.42748260498047, "step": 111 }, { "epoch": 0.1644640234948605, "fcm_dpo/beta": 0.013155752792954445, "fcm_dpo/delta": 0.03874760866165161, "fcm_dpo/margin": 27.551637649536133, "fcm_dpo/q_t": 0.4168916344642639, "grad_norm": 19.853069305419922, "learning_rate": 4.942120794399002e-07, "logits/chosen": -0.6284303665161133, "logits/rejected": -0.5894876718521118, "logps/chosen": -77.16087341308594, "logps/ref_chosen": -50.24964141845703, "logps/ref_rejected": -64.77442932128906, "logps/rejected": -119.2373046875, "loss": 1.1235, "margin_dpo/margin_mean": 27.551637649536133, "margin_dpo/margin_std": 40.0994873046875, "step": 112 }, { "epoch": 0.16593245227606462, "fcm_dpo/beta": 0.01325266994535923, "fcm_dpo/delta": 0.006955064833164215, "fcm_dpo/margin": 29.678356170654297, "fcm_dpo/q_t": 0.40930402278900146, "grad_norm": 20.103424072265625, "learning_rate": 4.939343162231841e-07, "logits/chosen": -0.6129658818244934, "logits/rejected": -0.5705182552337646, "logps/chosen": -100.33967590332031, "logps/ref_chosen": -66.71295166015625, "logps/ref_rejected": -77.96870422363281, "logps/rejected": -141.2738037109375, "loss": 1.0914, "margin_dpo/margin_mean": 29.678359985351562, "margin_dpo/margin_std": 38.312931060791016, "step": 113 }, { "epoch": 0.16740088105726872, "fcm_dpo/beta": 0.012752614915370941, "fcm_dpo/delta": -0.2160319983959198, "fcm_dpo/margin": 47.115623474121094, "fcm_dpo/q_t": 0.3740980625152588, "grad_norm": 21.467836380004883, "learning_rate": 4.936501251103751e-07, "logits/chosen": -0.6114667654037476, "logits/rejected": -0.5787659287452698, "logps/chosen": -88.59614562988281, "logps/ref_chosen": -57.78507995605469, "logps/ref_rejected": -87.10966491699219, "logps/rejected": -165.03634643554688, "loss": 0.9984, "margin_dpo/margin_mean": 47.115623474121094, "margin_dpo/margin_std": 63.08842086791992, "step": 114 }, { "epoch": 0.16886930983847284, "fcm_dpo/beta": 0.012707412242889404, "fcm_dpo/delta": -0.013142341747879982, "fcm_dpo/margin": 32.45915222167969, "fcm_dpo/q_t": 0.41373395919799805, "grad_norm": 27.934776306152344, "learning_rate": 4.933595135901732e-07, "logits/chosen": -0.6261277198791504, "logits/rejected": -0.6049231290817261, "logps/chosen": -105.82150268554688, "logps/ref_chosen": -65.5826416015625, "logps/ref_rejected": -98.56552124023438, "logps/rejected": -171.2635498046875, "loss": 1.1638, "margin_dpo/margin_mean": 32.45915603637695, "margin_dpo/margin_std": 65.33894348144531, "step": 115 }, { "epoch": 0.17033773861967694, "fcm_dpo/beta": 0.01261107623577118, "fcm_dpo/delta": -0.05706522613763809, "fcm_dpo/margin": 36.02623748779297, "fcm_dpo/q_t": 0.39755555987358093, "grad_norm": 21.839122772216797, "learning_rate": 4.930624893204624e-07, "logits/chosen": -0.6210289001464844, "logits/rejected": -0.6112542152404785, "logps/chosen": -81.75634765625, "logps/ref_chosen": -51.40031433105469, "logps/ref_rejected": -80.5218505859375, "logps/rejected": -146.90414428710938, "loss": 1.0579, "margin_dpo/margin_mean": 36.02623748779297, "margin_dpo/margin_std": 45.45439910888672, "step": 116 }, { "epoch": 0.17180616740088106, "fcm_dpo/beta": 0.01257578656077385, "fcm_dpo/delta": 0.02945420891046524, "fcm_dpo/margin": 29.551578521728516, "fcm_dpo/q_t": 0.41677069664001465, "grad_norm": 28.586557388305664, "learning_rate": 4.927590601281083e-07, "logits/chosen": -0.581455409526825, "logits/rejected": -0.543665885925293, "logps/chosen": -108.22999572753906, "logps/ref_chosen": -69.29840850830078, "logps/ref_rejected": -66.583984375, "logps/rejected": -135.06715393066406, "loss": 1.1416, "margin_dpo/margin_mean": 29.55158042907715, "margin_dpo/margin_std": 50.78779602050781, "step": 117 }, { "epoch": 0.17327459618208516, "fcm_dpo/beta": 0.012499745935201645, "fcm_dpo/delta": -0.05271512269973755, "fcm_dpo/margin": 36.0295524597168, "fcm_dpo/q_t": 0.39876455068588257, "grad_norm": 20.938520431518555, "learning_rate": 4.924492340087524e-07, "logits/chosen": -0.6351233720779419, "logits/rejected": -0.6163256168365479, "logps/chosen": -86.66950988769531, "logps/ref_chosen": -55.6409797668457, "logps/ref_rejected": -75.66905975341797, "logps/rejected": -142.72714233398438, "loss": 1.062, "margin_dpo/margin_mean": 36.0295524597168, "margin_dpo/margin_std": 46.57566833496094, "step": 118 }, { "epoch": 0.17474302496328928, "fcm_dpo/beta": 0.012305308133363724, "fcm_dpo/delta": -0.03963203728199005, "fcm_dpo/margin": 35.49628829956055, "fcm_dpo/q_t": 0.4050098657608032, "grad_norm": 23.495380401611328, "learning_rate": 4.92133019126601e-07, "logits/chosen": -0.6303149461746216, "logits/rejected": -0.6183843016624451, "logps/chosen": -115.98379516601562, "logps/ref_chosen": -73.51019287109375, "logps/ref_rejected": -102.977294921875, "logps/rejected": -180.94717407226562, "loss": 1.1013, "margin_dpo/margin_mean": 35.49628829956055, "margin_dpo/margin_std": 55.193031311035156, "step": 119 }, { "epoch": 0.1762114537444934, "fcm_dpo/beta": 0.011988421902060509, "fcm_dpo/delta": -0.20115892589092255, "fcm_dpo/margin": 49.14187240600586, "fcm_dpo/q_t": 0.370976984500885, "grad_norm": 22.095067977905273, "learning_rate": 4.918104238142103e-07, "logits/chosen": -0.6180063486099243, "logits/rejected": -0.5856211185455322, "logps/chosen": -120.44984436035156, "logps/ref_chosen": -76.78083801269531, "logps/ref_rejected": -108.02374267578125, "logps/rejected": -200.83462524414062, "loss": 0.9929, "margin_dpo/margin_mean": 49.14187240600586, "margin_dpo/margin_std": 59.94157409667969, "step": 120 }, { "epoch": 0.1776798825256975, "fcm_dpo/beta": 0.011408919468522072, "fcm_dpo/delta": -0.2337116003036499, "fcm_dpo/margin": 54.12976837158203, "fcm_dpo/q_t": 0.367302268743515, "grad_norm": 24.336544036865234, "learning_rate": 4.91481456572267e-07, "logits/chosen": -0.5880341529846191, "logits/rejected": -0.5841655731201172, "logps/chosen": -104.06226348876953, "logps/ref_chosen": -61.789894104003906, "logps/ref_rejected": -109.99456787109375, "logps/rejected": -206.39671325683594, "loss": 0.9962, "margin_dpo/margin_mean": 54.12976837158203, "margin_dpo/margin_std": 69.66026306152344, "step": 121 }, { "epoch": 0.17914831130690162, "fcm_dpo/beta": 0.010763179510831833, "fcm_dpo/delta": -0.35151687264442444, "fcm_dpo/margin": 67.46307373046875, "fcm_dpo/q_t": 0.3405070900917053, "grad_norm": 23.621253967285156, "learning_rate": 4.911461260693638e-07, "logits/chosen": -0.5764192938804626, "logits/rejected": -0.5921785831451416, "logps/chosen": -85.21223449707031, "logps/ref_chosen": -46.9022102355957, "logps/ref_rejected": -106.71418762207031, "logps/rejected": -212.48728942871094, "loss": 0.893, "margin_dpo/margin_mean": 67.46307373046875, "margin_dpo/margin_std": 66.18208312988281, "step": 122 }, { "epoch": 0.18061674008810572, "fcm_dpo/beta": 0.010407611727714539, "fcm_dpo/delta": -0.06276418268680573, "fcm_dpo/margin": 44.099815368652344, "fcm_dpo/q_t": 0.4013745188713074, "grad_norm": 20.684696197509766, "learning_rate": 4.908044411417711e-07, "logits/chosen": -0.569983184337616, "logits/rejected": -0.5534219741821289, "logps/chosen": -103.7418212890625, "logps/ref_chosen": -61.33863830566406, "logps/ref_rejected": -87.775390625, "logps/rejected": -174.2783966064453, "loss": 1.1084, "margin_dpo/margin_mean": 44.099815368652344, "margin_dpo/margin_std": 72.8822021484375, "step": 123 }, { "epoch": 0.18208516886930984, "fcm_dpo/beta": 0.009994514286518097, "fcm_dpo/delta": -0.26900696754455566, "fcm_dpo/margin": 65.09678649902344, "fcm_dpo/q_t": 0.36936578154563904, "grad_norm": 22.232603073120117, "learning_rate": 4.904564107932048e-07, "logits/chosen": -0.5371190309524536, "logits/rejected": -0.5390757322311401, "logps/chosen": -119.59730529785156, "logps/ref_chosen": -71.44833374023438, "logps/ref_rejected": -117.58056640625, "logps/rejected": -230.82635498046875, "loss": 1.0148, "margin_dpo/margin_mean": 65.09678649902344, "margin_dpo/margin_std": 93.31256103515625, "step": 124 }, { "epoch": 0.18355359765051396, "fcm_dpo/beta": 0.009659139439463615, "fcm_dpo/delta": -0.16824766993522644, "fcm_dpo/margin": 57.879905700683594, "fcm_dpo/q_t": 0.3799300193786621, "grad_norm": 18.988040924072266, "learning_rate": 4.90102044194588e-07, "logits/chosen": -0.49056124687194824, "logits/rejected": -0.4914134740829468, "logps/chosen": -90.02660369873047, "logps/ref_chosen": -50.136940002441406, "logps/ref_rejected": -83.98861694335938, "logps/rejected": -181.7581787109375, "loss": 1.0264, "margin_dpo/margin_mean": 57.879905700683594, "margin_dpo/margin_std": 77.51099395751953, "step": 125 }, { "epoch": 0.18502202643171806, "fcm_dpo/beta": 0.009380832314491272, "fcm_dpo/delta": -0.11235487461090088, "fcm_dpo/margin": 53.98036193847656, "fcm_dpo/q_t": 0.38861894607543945, "grad_norm": 20.407129287719727, "learning_rate": 4.897413506838102e-07, "logits/chosen": -0.5199521780014038, "logits/rejected": -0.514183759689331, "logps/chosen": -98.78469848632812, "logps/ref_chosen": -55.66706848144531, "logps/ref_rejected": -98.1297607421875, "logps/rejected": -195.22775268554688, "loss": 1.042, "margin_dpo/margin_mean": 53.98036193847656, "margin_dpo/margin_std": 71.43438720703125, "step": 126 }, { "epoch": 0.18649045521292218, "fcm_dpo/beta": 0.009391989558935165, "fcm_dpo/delta": 0.029904596507549286, "fcm_dpo/margin": 39.52136993408203, "fcm_dpo/q_t": 0.4143528640270233, "grad_norm": 21.105072021484375, "learning_rate": 4.89374339765481e-07, "logits/chosen": -0.5297501087188721, "logits/rejected": -0.5099810361862183, "logps/chosen": -98.52586364746094, "logps/ref_chosen": -56.55467987060547, "logps/ref_rejected": -76.7957763671875, "logps/rejected": -158.288330078125, "loss": 1.1295, "margin_dpo/margin_mean": 39.52136993408203, "margin_dpo/margin_std": 62.1949348449707, "step": 127 }, { "epoch": 0.18795888399412627, "fcm_dpo/beta": 0.009450054727494717, "fcm_dpo/delta": 0.006840545684099197, "fcm_dpo/margin": 41.593753814697266, "fcm_dpo/q_t": 0.4126642346382141, "grad_norm": 29.35715103149414, "learning_rate": 4.890010211106795e-07, "logits/chosen": -0.5065322518348694, "logits/rejected": -0.484443336725235, "logps/chosen": -103.02119445800781, "logps/ref_chosen": -58.12095642089844, "logps/ref_rejected": -76.43896484375, "logps/rejected": -162.93295288085938, "loss": 1.1414, "margin_dpo/margin_mean": 41.593753814697266, "margin_dpo/margin_std": 72.77674865722656, "step": 128 }, { "epoch": 0.1894273127753304, "fcm_dpo/beta": 0.009403524920344353, "fcm_dpo/delta": -0.013958234339952469, "fcm_dpo/margin": 43.95970916748047, "fcm_dpo/q_t": 0.4135984778404236, "grad_norm": 20.894304275512695, "learning_rate": 4.88621404556699e-07, "logits/chosen": -0.5332880020141602, "logits/rejected": -0.5224489569664001, "logps/chosen": -121.43072509765625, "logps/ref_chosen": -66.91637420654297, "logps/ref_rejected": -96.6422119140625, "logps/rejected": -195.11627197265625, "loss": 1.1476, "margin_dpo/margin_mean": 43.95970916748047, "margin_dpo/margin_std": 83.69145202636719, "step": 129 }, { "epoch": 0.19089574155653452, "fcm_dpo/beta": 0.009182717651128769, "fcm_dpo/delta": -0.22041718661785126, "fcm_dpo/margin": 66.11294555664062, "fcm_dpo/q_t": 0.36964261531829834, "grad_norm": 21.242704391479492, "learning_rate": 4.882355001067891e-07, "logits/chosen": -0.5126354098320007, "logits/rejected": -0.50765061378479, "logps/chosen": -85.27062225341797, "logps/ref_chosen": -44.66685104370117, "logps/ref_rejected": -82.78165435791016, "logps/rejected": -189.49838256835938, "loss": 0.996, "margin_dpo/margin_mean": 66.11294555664062, "margin_dpo/margin_std": 80.52020263671875, "step": 130 }, { "epoch": 0.19236417033773862, "fcm_dpo/beta": 0.008719469420611858, "fcm_dpo/delta": -0.19158010184764862, "fcm_dpo/margin": 66.538818359375, "fcm_dpo/q_t": 0.36819595098495483, "grad_norm": 28.657150268554688, "learning_rate": 4.878433179298909e-07, "logits/chosen": -0.5120518803596497, "logits/rejected": -0.5190242528915405, "logps/chosen": -81.01126098632812, "logps/ref_chosen": -44.924591064453125, "logps/ref_rejected": -88.44401550292969, "logps/rejected": -191.06948852539062, "loss": 0.9776, "margin_dpo/margin_mean": 66.538818359375, "margin_dpo/margin_std": 72.21876525878906, "step": 131 }, { "epoch": 0.19383259911894274, "fcm_dpo/beta": 0.00854223407804966, "fcm_dpo/delta": -0.06684187799692154, "fcm_dpo/margin": 54.26338577270508, "fcm_dpo/q_t": 0.40112632513046265, "grad_norm": 20.267854690551758, "learning_rate": 4.874448683603694e-07, "logits/chosen": -0.5317084789276123, "logits/rejected": -0.5302582383155823, "logps/chosen": -107.2270278930664, "logps/ref_chosen": -59.00108337402344, "logps/ref_rejected": -87.89215087890625, "logps/rejected": -190.38148498535156, "loss": 1.0888, "margin_dpo/margin_mean": 54.263389587402344, "margin_dpo/margin_std": 85.4618148803711, "step": 132 }, { "epoch": 0.19530102790014683, "fcm_dpo/beta": 0.008498817682266235, "fcm_dpo/delta": -0.012123266234993935, "fcm_dpo/margin": 48.4220085144043, "fcm_dpo/q_t": 0.40967467427253723, "grad_norm": 26.216642379760742, "learning_rate": 4.870401618977415e-07, "logits/chosen": -0.5197868347167969, "logits/rejected": -0.5063532590866089, "logps/chosen": -123.58735656738281, "logps/ref_chosen": -66.60449981689453, "logps/ref_rejected": -96.33355712890625, "logps/rejected": -201.73843383789062, "loss": 1.108, "margin_dpo/margin_mean": 48.4220085144043, "margin_dpo/margin_std": 75.48104858398438, "step": 133 }, { "epoch": 0.19676945668135096, "fcm_dpo/beta": 0.008487870916724205, "fcm_dpo/delta": -0.0391845665872097, "fcm_dpo/margin": 51.5308952331543, "fcm_dpo/q_t": 0.4019153118133545, "grad_norm": 19.05293846130371, "learning_rate": 4.866292092063986e-07, "logits/chosen": -0.4650689363479614, "logits/rejected": -0.45051348209381104, "logps/chosen": -97.44313049316406, "logps/ref_chosen": -52.06925582885742, "logps/ref_rejected": -87.6545181274414, "logps/rejected": -184.55929565429688, "loss": 1.0679, "margin_dpo/margin_mean": 51.5308952331543, "margin_dpo/margin_std": 67.31776428222656, "step": 134 }, { "epoch": 0.19823788546255505, "fcm_dpo/beta": 0.00817069597542286, "fcm_dpo/delta": -0.21977676451206207, "fcm_dpo/margin": 74.21546936035156, "fcm_dpo/q_t": 0.3703498840332031, "grad_norm": 21.739349365234375, "learning_rate": 4.862120211153265e-07, "logits/chosen": -0.4731314182281494, "logits/rejected": -0.5078834295272827, "logps/chosen": -100.25476837158203, "logps/ref_chosen": -50.353858947753906, "logps/ref_rejected": -115.97975158691406, "logps/rejected": -240.0961151123047, "loss": 0.9934, "margin_dpo/margin_mean": 74.21546936035156, "margin_dpo/margin_std": 92.9926528930664, "step": 135 }, { "epoch": 0.19970631424375918, "fcm_dpo/beta": 0.00803595595061779, "fcm_dpo/delta": 0.0032859407365322113, "fcm_dpo/margin": 49.29758834838867, "fcm_dpo/q_t": 0.4189005196094513, "grad_norm": 20.417558670043945, "learning_rate": 4.857886086178193e-07, "logits/chosen": -0.5016771554946899, "logits/rejected": -0.4939349293708801, "logps/chosen": -124.20162963867188, "logps/ref_chosen": -65.072509765625, "logps/ref_rejected": -96.32122802734375, "logps/rejected": -204.7479248046875, "loss": 1.1426, "margin_dpo/margin_mean": 49.297584533691406, "margin_dpo/margin_std": 89.33781433105469, "step": 136 }, { "epoch": 0.2011747430249633, "fcm_dpo/beta": 0.007817087695002556, "fcm_dpo/delta": -0.2199798822402954, "fcm_dpo/margin": 77.56993103027344, "fcm_dpo/q_t": 0.376494824886322, "grad_norm": 18.184268951416016, "learning_rate": 4.853589828711902e-07, "logits/chosen": -0.4158422350883484, "logits/rejected": -0.44304847717285156, "logps/chosen": -105.83258056640625, "logps/ref_chosen": -48.759117126464844, "logps/ref_rejected": -113.86376953125, "logps/rejected": -248.50717163085938, "loss": 1.0242, "margin_dpo/margin_mean": 77.56993103027344, "margin_dpo/margin_std": 111.20265197753906, "step": 137 }, { "epoch": 0.2026431718061674, "fcm_dpo/beta": 0.007688170298933983, "fcm_dpo/delta": -0.05497686192393303, "fcm_dpo/margin": 58.859375, "fcm_dpo/q_t": 0.3964359164237976, "grad_norm": 21.49945831298828, "learning_rate": 4.849231551964771e-07, "logits/chosen": -0.3968755602836609, "logits/rejected": -0.38326364755630493, "logps/chosen": -120.03235626220703, "logps/ref_chosen": -60.519649505615234, "logps/ref_rejected": -93.19694519042969, "logps/rejected": -211.56903076171875, "loss": 1.0554, "margin_dpo/margin_mean": 58.859375, "margin_dpo/margin_std": 72.39787292480469, "step": 138 }, { "epoch": 0.20411160058737152, "fcm_dpo/beta": 0.007515173405408859, "fcm_dpo/delta": -0.13694770634174347, "fcm_dpo/margin": 70.50794982910156, "fcm_dpo/q_t": 0.3826453983783722, "grad_norm": 18.61595916748047, "learning_rate": 4.844811370781446e-07, "logits/chosen": -0.451102077960968, "logits/rejected": -0.4422782063484192, "logps/chosen": -96.82493591308594, "logps/ref_chosen": -46.89138412475586, "logps/ref_rejected": -79.72798156738281, "logps/rejected": -200.16949462890625, "loss": 1.0181, "margin_dpo/margin_mean": 70.50794982910156, "margin_dpo/margin_std": 87.34086608886719, "step": 139 }, { "epoch": 0.2055800293685756, "fcm_dpo/beta": 0.00736122764647007, "fcm_dpo/delta": -0.07254733890295029, "fcm_dpo/margin": 63.72399139404297, "fcm_dpo/q_t": 0.3958495855331421, "grad_norm": 22.667436599731445, "learning_rate": 4.840329401637809e-07, "logits/chosen": -0.4456948935985565, "logits/rejected": -0.43251848220825195, "logps/chosen": -119.22482299804688, "logps/ref_chosen": -58.97471618652344, "logps/ref_rejected": -83.28410339355469, "logps/rejected": -207.25820922851562, "loss": 1.0686, "margin_dpo/margin_mean": 63.72399139404297, "margin_dpo/margin_std": 89.69435119628906, "step": 140 }, { "epoch": 0.20704845814977973, "fcm_dpo/beta": 0.007298792712390423, "fcm_dpo/delta": -0.04338546097278595, "fcm_dpo/margin": 60.48833465576172, "fcm_dpo/q_t": 0.40087050199508667, "grad_norm": 27.13235092163086, "learning_rate": 4.83578576263792e-07, "logits/chosen": -0.4381271302700043, "logits/rejected": -0.42595377564430237, "logps/chosen": -143.43618774414062, "logps/ref_chosen": -75.07566833496094, "logps/ref_rejected": -98.1922607421875, "logps/rejected": -227.04110717773438, "loss": 1.1013, "margin_dpo/margin_mean": 60.48833465576172, "margin_dpo/margin_std": 94.93978881835938, "step": 141 }, { "epoch": 0.20851688693098386, "fcm_dpo/beta": 0.0071950615383684635, "fcm_dpo/delta": -0.10405787825584412, "fcm_dpo/margin": 69.34352111816406, "fcm_dpo/q_t": 0.39234134554862976, "grad_norm": 27.40236473083496, "learning_rate": 4.83118057351089e-07, "logits/chosen": -0.4233921468257904, "logits/rejected": -0.4229241907596588, "logps/chosen": -127.72467803955078, "logps/ref_chosen": -58.027931213378906, "logps/ref_rejected": -94.58222961425781, "logps/rejected": -233.62249755859375, "loss": 1.0828, "margin_dpo/margin_mean": 69.34352111816406, "margin_dpo/margin_std": 106.0972900390625, "step": 142 }, { "epoch": 0.20998531571218795, "fcm_dpo/beta": 0.007188013754785061, "fcm_dpo/delta": 0.08534206449985504, "fcm_dpo/margin": 44.15364074707031, "fcm_dpo/q_t": 0.4322444796562195, "grad_norm": 23.33932876586914, "learning_rate": 4.826513955607734e-07, "logits/chosen": -0.3806174397468567, "logits/rejected": -0.37236979603767395, "logps/chosen": -131.15016174316406, "logps/ref_chosen": -57.59645080566406, "logps/ref_rejected": -78.99957275390625, "logps/rejected": -196.70692443847656, "loss": 1.1967, "margin_dpo/margin_mean": 44.15364074707031, "margin_dpo/margin_std": 92.67884826660156, "step": 143 }, { "epoch": 0.21145374449339208, "fcm_dpo/beta": 0.007249427028000355, "fcm_dpo/delta": 0.007432065438479185, "fcm_dpo/margin": 54.190277099609375, "fcm_dpo/q_t": 0.41100186109542847, "grad_norm": 20.80966567993164, "learning_rate": 4.821786031898176e-07, "logits/chosen": -0.4202612638473511, "logits/rejected": -0.4080207347869873, "logps/chosen": -124.41218566894531, "logps/ref_chosen": -59.90636444091797, "logps/ref_rejected": -82.00025939941406, "logps/rejected": -200.6963653564453, "loss": 1.1065, "margin_dpo/margin_mean": 54.190277099609375, "margin_dpo/margin_std": 78.08949279785156, "step": 144 }, { "epoch": 0.21292217327459617, "fcm_dpo/beta": 0.007232350297272205, "fcm_dpo/delta": -0.020431246608495712, "fcm_dpo/margin": 58.010536193847656, "fcm_dpo/q_t": 0.40479713678359985, "grad_norm": 23.85308837890625, "learning_rate": 4.816996926967401e-07, "logits/chosen": -0.4434972405433655, "logits/rejected": -0.4278140068054199, "logps/chosen": -118.61093139648438, "logps/ref_chosen": -56.60066604614258, "logps/ref_rejected": -77.86631774902344, "logps/rejected": -197.88710021972656, "loss": 1.0888, "margin_dpo/margin_mean": 58.01053237915039, "margin_dpo/margin_std": 80.87408447265625, "step": 145 }, { "epoch": 0.2143906020558003, "fcm_dpo/beta": 0.007291465997695923, "fcm_dpo/delta": 0.07497753947973251, "fcm_dpo/margin": 44.91979217529297, "fcm_dpo/q_t": 0.4256930947303772, "grad_norm": 27.03116226196289, "learning_rate": 4.812146767012779e-07, "logits/chosen": -0.42746251821517944, "logits/rejected": -0.4016566276550293, "logps/chosen": -150.37661743164062, "logps/ref_chosen": -66.00045013427734, "logps/ref_rejected": -81.70278930664062, "logps/rejected": -210.99874877929688, "loss": 1.1828, "margin_dpo/margin_mean": 44.91979217529297, "margin_dpo/margin_std": 87.16963195800781, "step": 146 }, { "epoch": 0.21585903083700442, "fcm_dpo/beta": 0.0072729880921542645, "fcm_dpo/delta": -0.04680505767464638, "fcm_dpo/margin": 61.123626708984375, "fcm_dpo/q_t": 0.40164506435394287, "grad_norm": 19.43584632873535, "learning_rate": 4.807235679840536e-07, "logits/chosen": -0.44083860516548157, "logits/rejected": -0.4199514389038086, "logps/chosen": -115.26583862304688, "logps/ref_chosen": -53.405487060546875, "logps/ref_rejected": -71.39060974121094, "logps/rejected": -194.3745880126953, "loss": 1.0883, "margin_dpo/margin_mean": 61.12362289428711, "margin_dpo/margin_std": 90.59841918945312, "step": 147 }, { "epoch": 0.2173274596182085, "fcm_dpo/beta": 0.007355110719799995, "fcm_dpo/delta": 0.012732595205307007, "fcm_dpo/margin": 52.45569610595703, "fcm_dpo/q_t": 0.41518306732177734, "grad_norm": 19.188884735107422, "learning_rate": 4.802263794862384e-07, "logits/chosen": -0.497209370136261, "logits/rejected": -0.4899882674217224, "logps/chosen": -125.58383178710938, "logps/ref_chosen": -64.93708038330078, "logps/ref_rejected": -103.09384155273438, "logps/rejected": -216.1962890625, "loss": 1.1198, "margin_dpo/margin_mean": 52.45569610595703, "margin_dpo/margin_std": 76.64443969726562, "step": 148 }, { "epoch": 0.21879588839941264, "fcm_dpo/beta": 0.007154976017773151, "fcm_dpo/delta": -0.069991335272789, "fcm_dpo/margin": 64.97525024414062, "fcm_dpo/q_t": 0.3939010500907898, "grad_norm": 18.49083709716797, "learning_rate": 4.797231243092118e-07, "logits/chosen": -0.4779682755470276, "logits/rejected": -0.46220314502716064, "logps/chosen": -116.61399841308594, "logps/ref_chosen": -58.47376251220703, "logps/ref_rejected": -99.31474304199219, "logps/rejected": -222.43020629882812, "loss": 1.0514, "margin_dpo/margin_mean": 64.97525024414062, "margin_dpo/margin_std": 78.06156921386719, "step": 149 }, { "epoch": 0.22026431718061673, "fcm_dpo/beta": 0.007078321650624275, "fcm_dpo/delta": -0.05432058125734329, "fcm_dpo/margin": 63.64070129394531, "fcm_dpo/q_t": 0.40325814485549927, "grad_norm": 18.106704711914062, "learning_rate": 4.792138157142157e-07, "logits/chosen": -0.4446331262588501, "logits/rejected": -0.4476960301399231, "logps/chosen": -96.900146484375, "logps/ref_chosen": -45.705810546875, "logps/ref_rejected": -83.34759521484375, "logps/rejected": -198.18264770507812, "loss": 1.0789, "margin_dpo/margin_mean": 63.64070129394531, "margin_dpo/margin_std": 92.74796295166016, "step": 150 }, { "epoch": 0.22173274596182085, "fcm_dpo/beta": 0.0070512015372514725, "fcm_dpo/delta": -0.053547054529190063, "fcm_dpo/margin": 63.97663879394531, "fcm_dpo/q_t": 0.3967490792274475, "grad_norm": 21.403526306152344, "learning_rate": 4.786984671220053e-07, "logits/chosen": -0.5293259620666504, "logits/rejected": -0.5012092590332031, "logps/chosen": -134.41217041015625, "logps/ref_chosen": -70.57083129882812, "logps/ref_rejected": -100.46382141113281, "logps/rejected": -228.28179931640625, "loss": 1.0576, "margin_dpo/margin_mean": 63.97663879394531, "margin_dpo/margin_std": 79.69630432128906, "step": 151 }, { "epoch": 0.22320117474302498, "fcm_dpo/beta": 0.0069010304287076, "fcm_dpo/delta": -0.14396238327026367, "fcm_dpo/margin": 77.73429870605469, "fcm_dpo/q_t": 0.3808351755142212, "grad_norm": 20.531400680541992, "learning_rate": 4.78177092112495e-07, "logits/chosen": -0.4896411895751953, "logits/rejected": -0.48783737421035767, "logps/chosen": -116.02652740478516, "logps/ref_chosen": -60.16438674926758, "logps/ref_rejected": -106.14045715332031, "logps/rejected": -239.73690795898438, "loss": 1.0136, "margin_dpo/margin_mean": 77.73429870605469, "margin_dpo/margin_std": 92.09419250488281, "step": 152 }, { "epoch": 0.22466960352422907, "fcm_dpo/beta": 0.00679405964910984, "fcm_dpo/delta": -0.05286309868097305, "fcm_dpo/margin": 66.30891418457031, "fcm_dpo/q_t": 0.40227580070495605, "grad_norm": 15.813210487365723, "learning_rate": 4.776497044244016e-07, "logits/chosen": -0.46835315227508545, "logits/rejected": -0.4619212746620178, "logps/chosen": -112.95548248291016, "logps/ref_chosen": -56.315277099609375, "logps/ref_rejected": -85.65583801269531, "logps/rejected": -208.60496520996094, "loss": 1.0864, "margin_dpo/margin_mean": 66.30891418457031, "margin_dpo/margin_std": 100.04182434082031, "step": 153 }, { "epoch": 0.2261380323054332, "fcm_dpo/beta": 0.006759101524949074, "fcm_dpo/delta": -0.03209718316793442, "fcm_dpo/margin": 63.69126510620117, "fcm_dpo/q_t": 0.4056595265865326, "grad_norm": 19.18117332458496, "learning_rate": 4.771163179548808e-07, "logits/chosen": -0.4857712984085083, "logits/rejected": -0.4887011647224426, "logps/chosen": -131.005859375, "logps/ref_chosen": -62.74256896972656, "logps/ref_rejected": -104.24420166015625, "logps/rejected": -236.19874572753906, "loss": 1.1182, "margin_dpo/margin_mean": 63.691261291503906, "margin_dpo/margin_std": 105.09062194824219, "step": 154 }, { "epoch": 0.2276064610866373, "fcm_dpo/beta": 0.0067064836621284485, "fcm_dpo/delta": -0.031813234090805054, "fcm_dpo/margin": 64.17315673828125, "fcm_dpo/q_t": 0.4035380482673645, "grad_norm": 19.415454864501953, "learning_rate": 4.7657694675916247e-07, "logits/chosen": -0.49250417947769165, "logits/rejected": -0.4741542339324951, "logps/chosen": -123.27816772460938, "logps/ref_chosen": -60.65318298339844, "logps/ref_rejected": -77.49220275878906, "logps/rejected": -204.29034423828125, "loss": 1.0929, "margin_dpo/margin_mean": 64.17315673828125, "margin_dpo/margin_std": 94.83734130859375, "step": 155 }, { "epoch": 0.2290748898678414, "fcm_dpo/beta": 0.006815006956458092, "fcm_dpo/delta": 0.15295735001564026, "fcm_dpo/margin": 36.849365234375, "fcm_dpo/q_t": 0.4437222480773926, "grad_norm": 30.32708168029785, "learning_rate": 4.7603160505017893e-07, "logits/chosen": -0.3995016813278198, "logits/rejected": -0.39005500078201294, "logps/chosen": -156.82156372070312, "logps/ref_chosen": -69.49188232421875, "logps/ref_rejected": -77.16929626464844, "logps/rejected": -201.34835815429688, "loss": 1.2742, "margin_dpo/margin_mean": 36.849365234375, "margin_dpo/margin_std": 106.61282348632812, "step": 156 }, { "epoch": 0.2305433186490455, "fcm_dpo/beta": 0.006692226976156235, "fcm_dpo/delta": -0.1519441455602646, "fcm_dpo/margin": 81.02685546875, "fcm_dpo/q_t": 0.3766096234321594, "grad_norm": 24.09219741821289, "learning_rate": 4.7548030719819154e-07, "logits/chosen": -0.3954240083694458, "logits/rejected": -0.402127742767334, "logps/chosen": -140.93927001953125, "logps/ref_chosen": -61.368438720703125, "logps/ref_rejected": -107.64636993408203, "logps/rejected": -268.24407958984375, "loss": 1.025, "margin_dpo/margin_mean": 81.02685546875, "margin_dpo/margin_std": 100.71873474121094, "step": 157 }, { "epoch": 0.23201174743024963, "fcm_dpo/beta": 0.006510759703814983, "fcm_dpo/delta": -0.17182058095932007, "fcm_dpo/margin": 86.34577178955078, "fcm_dpo/q_t": 0.385204017162323, "grad_norm": 20.76033592224121, "learning_rate": 4.7492306773041136e-07, "logits/chosen": -0.38746166229248047, "logits/rejected": -0.4051688611507416, "logps/chosen": -137.71897888183594, "logps/ref_chosen": -57.612918853759766, "logps/ref_rejected": -113.6946792602539, "logps/rejected": -280.146484375, "loss": 1.0522, "margin_dpo/margin_mean": 86.34577178955078, "margin_dpo/margin_std": 131.83157348632812, "step": 158 }, { "epoch": 0.23348017621145375, "fcm_dpo/beta": 0.0064995670691132545, "fcm_dpo/delta": 0.0181589238345623, "fcm_dpo/margin": 58.781009674072266, "fcm_dpo/q_t": 0.41514790058135986, "grad_norm": 24.090534210205078, "learning_rate": 4.743599013306165e-07, "logits/chosen": -0.4077296257019043, "logits/rejected": -0.377045214176178, "logps/chosen": -171.456298828125, "logps/ref_chosen": -81.56034851074219, "logps/ref_rejected": -88.89871215820312, "logps/rejected": -237.57568359375, "loss": 1.1468, "margin_dpo/margin_mean": 58.781009674072266, "margin_dpo/margin_std": 103.61927795410156, "step": 159 }, { "epoch": 0.23494860499265785, "fcm_dpo/beta": 0.006338327657431364, "fcm_dpo/delta": -0.11268580704927444, "fcm_dpo/margin": 79.83772277832031, "fcm_dpo/q_t": 0.39425748586654663, "grad_norm": 24.490720748901367, "learning_rate": 4.737908228387656e-07, "logits/chosen": -0.37448424100875854, "logits/rejected": -0.36534446477890015, "logps/chosen": -157.87139892578125, "logps/ref_chosen": -65.73088073730469, "logps/ref_rejected": -97.21781921386719, "logps/rejected": -269.196044921875, "loss": 1.086, "margin_dpo/margin_mean": 79.83772277832031, "margin_dpo/margin_std": 128.0322265625, "step": 160 }, { "epoch": 0.23641703377386197, "fcm_dpo/beta": 0.006289385724812746, "fcm_dpo/delta": -0.04286719858646393, "fcm_dpo/margin": 70.11194610595703, "fcm_dpo/q_t": 0.4028061032295227, "grad_norm": 21.464141845703125, "learning_rate": 4.7321584725060594e-07, "logits/chosen": -0.38927197456359863, "logits/rejected": -0.38858896493911743, "logps/chosen": -130.46055603027344, "logps/ref_chosen": -52.43647003173828, "logps/ref_rejected": -83.43095397949219, "logps/rejected": -231.56698608398438, "loss": 1.0886, "margin_dpo/margin_mean": 70.11195373535156, "margin_dpo/margin_std": 103.08557891845703, "step": 161 }, { "epoch": 0.23788546255506607, "fcm_dpo/beta": 0.006195507012307644, "fcm_dpo/delta": -0.030592873692512512, "fcm_dpo/margin": 69.01253509521484, "fcm_dpo/q_t": 0.40611007809638977, "grad_norm": 22.719524383544922, "learning_rate": 4.7263498971727905e-07, "logits/chosen": -0.4335670471191406, "logits/rejected": -0.4162771999835968, "logps/chosen": -137.5789337158203, "logps/ref_chosen": -62.6105842590332, "logps/ref_rejected": -89.39057922363281, "logps/rejected": -233.3714599609375, "loss": 1.106, "margin_dpo/margin_mean": 69.01253509521484, "margin_dpo/margin_std": 106.23509216308594, "step": 162 }, { "epoch": 0.2393538913362702, "fcm_dpo/beta": 0.006221453659236431, "fcm_dpo/delta": -0.020094340667128563, "fcm_dpo/margin": 67.38839721679688, "fcm_dpo/q_t": 0.40836769342422485, "grad_norm": 20.796031951904297, "learning_rate": 4.720482655449212e-07, "logits/chosen": -0.35502612590789795, "logits/rejected": -0.33497339487075806, "logps/chosen": -138.83261108398438, "logps/ref_chosen": -55.021629333496094, "logps/ref_rejected": -75.418212890625, "logps/rejected": -226.61758422851562, "loss": 1.1113, "margin_dpo/margin_mean": 67.38839721679688, "margin_dpo/margin_std": 107.830322265625, "step": 163 }, { "epoch": 0.24082232011747431, "fcm_dpo/beta": 0.006072811782360077, "fcm_dpo/delta": -0.10795401781797409, "fcm_dpo/margin": 82.47865295410156, "fcm_dpo/q_t": 0.3873969316482544, "grad_norm": 21.56767463684082, "learning_rate": 4.714556901942599e-07, "logits/chosen": -0.36625775694847107, "logits/rejected": -0.35121026635169983, "logps/chosen": -131.74513244628906, "logps/ref_chosen": -55.64066696166992, "logps/ref_rejected": -79.66463470458984, "logps/rejected": -238.24774169921875, "loss": 1.0342, "margin_dpo/margin_mean": 82.47865295410156, "margin_dpo/margin_std": 101.75411224365234, "step": 164 }, { "epoch": 0.2422907488986784, "fcm_dpo/beta": 0.006134270690381527, "fcm_dpo/delta": 0.07639573514461517, "fcm_dpo/margin": 53.168251037597656, "fcm_dpo/q_t": 0.42690160870552063, "grad_norm": 24.297542572021484, "learning_rate": 4.708572792802069e-07, "logits/chosen": -0.3836957812309265, "logits/rejected": -0.3541428744792938, "logps/chosen": -143.4055633544922, "logps/ref_chosen": -61.310691833496094, "logps/ref_rejected": -73.67060852050781, "logps/rejected": -208.93374633789062, "loss": 1.1731, "margin_dpo/margin_mean": 53.16825866699219, "margin_dpo/margin_std": 98.54109191894531, "step": 165 }, { "epoch": 0.24375917767988253, "fcm_dpo/beta": 0.005966954864561558, "fcm_dpo/delta": -0.19942107796669006, "fcm_dpo/margin": 98.27601623535156, "fcm_dpo/q_t": 0.37992316484451294, "grad_norm": 17.423593521118164, "learning_rate": 4.702530485714461e-07, "logits/chosen": -0.35796594619750977, "logits/rejected": -0.36847564578056335, "logps/chosen": -123.2952880859375, "logps/ref_chosen": -50.98360061645508, "logps/ref_rejected": -98.09512329101562, "logps/rejected": -268.682861328125, "loss": 1.0159, "margin_dpo/margin_mean": 98.27601623535156, "margin_dpo/margin_std": 136.08973693847656, "step": 166 }, { "epoch": 0.24522760646108663, "fcm_dpo/beta": 0.005754595622420311, "fcm_dpo/delta": -0.20378953218460083, "fcm_dpo/margin": 102.81171417236328, "fcm_dpo/q_t": 0.36769425868988037, "grad_norm": 18.515390396118164, "learning_rate": 4.6964301399001877e-07, "logits/chosen": -0.3561704754829407, "logits/rejected": -0.3588705360889435, "logps/chosen": -123.58355712890625, "logps/ref_chosen": -50.424095153808594, "logps/ref_rejected": -96.03042602539062, "logps/rejected": -272.0016174316406, "loss": 0.9723, "margin_dpo/margin_mean": 102.81171417236328, "margin_dpo/margin_std": 112.32765197753906, "step": 167 }, { "epoch": 0.24669603524229075, "fcm_dpo/beta": 0.005659398622810841, "fcm_dpo/delta": -0.030612219125032425, "fcm_dpo/margin": 75.83096313476562, "fcm_dpo/q_t": 0.40368539094924927, "grad_norm": 19.071271896362305, "learning_rate": 4.690271916109034e-07, "logits/chosen": -0.3632907271385193, "logits/rejected": -0.3534013032913208, "logps/chosen": -129.71824645996094, "logps/ref_chosen": -49.462825775146484, "logps/ref_rejected": -75.30855560302734, "logps/rejected": -231.39492797851562, "loss": 1.0785, "margin_dpo/margin_mean": 75.83096313476562, "margin_dpo/margin_std": 102.57954406738281, "step": 168 }, { "epoch": 0.24816446402349487, "fcm_dpo/beta": 0.00563174020498991, "fcm_dpo/delta": 0.021915137767791748, "fcm_dpo/margin": 67.0512466430664, "fcm_dpo/q_t": 0.41945117712020874, "grad_norm": 21.434741973876953, "learning_rate": 4.6840559766159235e-07, "logits/chosen": -0.3872135281562805, "logits/rejected": -0.37111321091651917, "logps/chosen": -142.41534423828125, "logps/ref_chosen": -59.803443908691406, "logps/ref_rejected": -83.34574890136719, "logps/rejected": -233.0089111328125, "loss": 1.1591, "margin_dpo/margin_mean": 67.05125427246094, "margin_dpo/margin_std": 125.73867797851562, "step": 169 }, { "epoch": 0.24963289280469897, "fcm_dpo/beta": 0.005613743327558041, "fcm_dpo/delta": -0.046133168041706085, "fcm_dpo/margin": 78.93022918701172, "fcm_dpo/q_t": 0.3986685574054718, "grad_norm": 17.828161239624023, "learning_rate": 4.6777824852166437e-07, "logits/chosen": -0.3273963928222656, "logits/rejected": -0.31577807664871216, "logps/chosen": -122.65813446044922, "logps/ref_chosen": -49.471771240234375, "logps/ref_rejected": -75.91734313964844, "logps/rejected": -228.033935546875, "loss": 1.0728, "margin_dpo/margin_mean": 78.93023681640625, "margin_dpo/margin_std": 103.84635925292969, "step": 170 }, { "epoch": 0.2511013215859031, "fcm_dpo/beta": 0.005664165131747723, "fcm_dpo/delta": 0.037670087069272995, "fcm_dpo/margin": 64.21273040771484, "fcm_dpo/q_t": 0.4233492612838745, "grad_norm": 29.94133186340332, "learning_rate": 4.6714516072235273e-07, "logits/chosen": -0.3752813935279846, "logits/rejected": -0.3576093912124634, "logps/chosen": -190.8294677734375, "logps/ref_chosen": -84.49931335449219, "logps/ref_rejected": -109.38209533691406, "logps/rejected": -279.92498779296875, "loss": 1.1713, "margin_dpo/margin_mean": 64.21273040771484, "margin_dpo/margin_std": 129.4434051513672, "step": 171 }, { "epoch": 0.2525697503671072, "fcm_dpo/beta": 0.005686759948730469, "fcm_dpo/delta": 0.019866658374667168, "fcm_dpo/margin": 66.97691345214844, "fcm_dpo/q_t": 0.4149053692817688, "grad_norm": 21.02857780456543, "learning_rate": 4.6650635094610966e-07, "logits/chosen": -0.39714205265045166, "logits/rejected": -0.378519207239151, "logps/chosen": -163.3927001953125, "logps/ref_chosen": -68.65391540527344, "logps/ref_rejected": -85.43667602539062, "logps/rejected": -247.15237426757812, "loss": 1.1304, "margin_dpo/margin_mean": 66.97691345214844, "margin_dpo/margin_std": 110.04927062988281, "step": 172 }, { "epoch": 0.2540381791483113, "fcm_dpo/beta": 0.005732472985982895, "fcm_dpo/delta": 0.0141000896692276, "fcm_dpo/margin": 67.36067199707031, "fcm_dpo/q_t": 0.4125649333000183, "grad_norm": 20.02904510498047, "learning_rate": 4.6586183602616687e-07, "logits/chosen": -0.42190614342689514, "logits/rejected": -0.39422181248664856, "logps/chosen": -148.86404418945312, "logps/ref_chosen": -63.050880432128906, "logps/ref_rejected": -78.68392181396484, "logps/rejected": -231.85775756835938, "loss": 1.1066, "margin_dpo/margin_mean": 67.36067199707031, "margin_dpo/margin_std": 95.92362976074219, "step": 173 }, { "epoch": 0.2555066079295154, "fcm_dpo/beta": 0.005698870867490768, "fcm_dpo/delta": -0.06049712002277374, "fcm_dpo/margin": 80.25811004638672, "fcm_dpo/q_t": 0.39966872334480286, "grad_norm": 23.803714752197266, "learning_rate": 4.652116329460919e-07, "logits/chosen": -0.371025949716568, "logits/rejected": -0.3882911801338196, "logps/chosen": -135.3501434326172, "logps/ref_chosen": -53.36296844482422, "logps/ref_rejected": -101.91120910644531, "logps/rejected": -264.156494140625, "loss": 1.0799, "margin_dpo/margin_mean": 80.25811767578125, "margin_dpo/margin_std": 114.95230102539062, "step": 174 }, { "epoch": 0.25697503671071953, "fcm_dpo/beta": 0.005467164795845747, "fcm_dpo/delta": -0.20820683240890503, "fcm_dpo/margin": 108.91580200195312, "fcm_dpo/q_t": 0.3646426200866699, "grad_norm": 27.59210968017578, "learning_rate": 4.645557588393406e-07, "logits/chosen": -0.3253830671310425, "logits/rejected": -0.31133347749710083, "logps/chosen": -121.3033447265625, "logps/ref_chosen": -45.417762756347656, "logps/ref_rejected": -89.50579833984375, "logps/rejected": -274.30718994140625, "loss": 0.954, "margin_dpo/margin_mean": 108.91580200195312, "margin_dpo/margin_std": 108.02481079101562, "step": 175 }, { "epoch": 0.25844346549192365, "fcm_dpo/beta": 0.005350666120648384, "fcm_dpo/delta": -0.09525755047798157, "fcm_dpo/margin": 91.70521545410156, "fcm_dpo/q_t": 0.3924447298049927, "grad_norm": 20.749011993408203, "learning_rate": 4.638942309888058e-07, "logits/chosen": -0.2885698676109314, "logits/rejected": -0.3057425618171692, "logps/chosen": -131.7587890625, "logps/ref_chosen": -50.452842712402344, "logps/ref_rejected": -95.5589599609375, "logps/rejected": -268.57012939453125, "loss": 1.043, "margin_dpo/margin_mean": 91.70521545410156, "margin_dpo/margin_std": 118.48809814453125, "step": 176 }, { "epoch": 0.2599118942731278, "fcm_dpo/beta": 0.005270042456686497, "fcm_dpo/delta": -0.07793374359607697, "fcm_dpo/margin": 89.99525451660156, "fcm_dpo/q_t": 0.3949745297431946, "grad_norm": 29.508962631225586, "learning_rate": 4.6322706682636137e-07, "logits/chosen": -0.3700808584690094, "logits/rejected": -0.36038738489151, "logps/chosen": -155.28253173828125, "logps/ref_chosen": -61.216468811035156, "logps/ref_rejected": -95.89378356933594, "logps/rejected": -279.955078125, "loss": 1.0511, "margin_dpo/margin_mean": 89.99525451660156, "margin_dpo/margin_std": 117.3301773071289, "step": 177 }, { "epoch": 0.26138032305433184, "fcm_dpo/beta": 0.0050674136728048325, "fcm_dpo/delta": -0.18977004289627075, "fcm_dpo/margin": 114.02528381347656, "fcm_dpo/q_t": 0.37513747811317444, "grad_norm": 27.016380310058594, "learning_rate": 4.6255428393240354e-07, "logits/chosen": -0.256081223487854, "logits/rejected": -0.24759094417095184, "logps/chosen": -162.09182739257812, "logps/ref_chosen": -58.26478958129883, "logps/ref_rejected": -105.3653335571289, "logps/rejected": -323.2176513671875, "loss": 0.9993, "margin_dpo/margin_mean": 114.02528381347656, "margin_dpo/margin_std": 142.46255493164062, "step": 178 }, { "epoch": 0.26284875183553597, "fcm_dpo/beta": 0.0050058369524776936, "fcm_dpo/delta": -0.010855477303266525, "fcm_dpo/margin": 81.88908386230469, "fcm_dpo/q_t": 0.409834086894989, "grad_norm": 28.862895965576172, "learning_rate": 4.6187590003538724e-07, "logits/chosen": -0.3041580319404602, "logits/rejected": -0.31187134981155396, "logps/chosen": -166.816162109375, "logps/ref_chosen": -61.05832290649414, "logps/ref_rejected": -90.52782440185547, "logps/rejected": -278.17474365234375, "loss": 1.1298, "margin_dpo/margin_mean": 81.88908386230469, "margin_dpo/margin_std": 138.5677032470703, "step": 179 }, { "epoch": 0.2643171806167401, "fcm_dpo/beta": 0.0049311150796711445, "fcm_dpo/delta": -0.1207597553730011, "fcm_dpo/margin": 104.31163787841797, "fcm_dpo/q_t": 0.3830791115760803, "grad_norm": 20.39897918701172, "learning_rate": 4.611919330113591e-07, "logits/chosen": -0.30937910079956055, "logits/rejected": -0.30372101068496704, "logps/chosen": -145.08279418945312, "logps/ref_chosen": -54.34272003173828, "logps/ref_rejected": -98.21183776855469, "logps/rejected": -293.2635498046875, "loss": 1.0205, "margin_dpo/margin_mean": 104.31163787841797, "margin_dpo/margin_std": 123.76762390136719, "step": 180 }, { "epoch": 0.2657856093979442, "fcm_dpo/beta": 0.004971269518136978, "fcm_dpo/delta": 0.07836050540208817, "fcm_dpo/margin": 65.1822509765625, "fcm_dpo/q_t": 0.42635685205459595, "grad_norm": 16.958599090576172, "learning_rate": 4.605024008834863e-07, "logits/chosen": -0.3443525433540344, "logits/rejected": -0.3196682631969452, "logps/chosen": -133.23472595214844, "logps/ref_chosen": -55.000457763671875, "logps/ref_rejected": -61.656166076660156, "logps/rejected": -205.07269287109375, "loss": 1.1641, "margin_dpo/margin_mean": 65.1822509765625, "margin_dpo/margin_std": 114.510498046875, "step": 181 }, { "epoch": 0.26725403817914833, "fcm_dpo/beta": 0.0048531051725149155, "fcm_dpo/delta": -0.15253815054893494, "fcm_dpo/margin": 111.9610366821289, "fcm_dpo/q_t": 0.37664783000946045, "grad_norm": 17.655406951904297, "learning_rate": 4.598073218215817e-07, "logits/chosen": -0.2682988941669464, "logits/rejected": -0.27632054686546326, "logps/chosen": -116.1263656616211, "logps/ref_chosen": -41.107852935791016, "logps/ref_rejected": -89.5215835571289, "logps/rejected": -276.5011291503906, "loss": 1.0094, "margin_dpo/margin_mean": 111.96102905273438, "margin_dpo/margin_std": 131.97494506835938, "step": 182 }, { "epoch": 0.2687224669603524, "fcm_dpo/beta": 0.004865183494985104, "fcm_dpo/delta": 0.1063179224729538, "fcm_dpo/margin": 60.8377685546875, "fcm_dpo/q_t": 0.4321151673793793, "grad_norm": 19.247581481933594, "learning_rate": 4.5910671414162484e-07, "logits/chosen": -0.31034791469573975, "logits/rejected": -0.29865318536758423, "logps/chosen": -168.7334442138672, "logps/ref_chosen": -57.52456283569336, "logps/ref_rejected": -75.97572326660156, "logps/rejected": -248.02236938476562, "loss": 1.1792, "margin_dpo/margin_mean": 60.8377685546875, "margin_dpo/margin_std": 100.23750305175781, "step": 183 }, { "epoch": 0.2701908957415565, "fcm_dpo/beta": 0.005026969127357006, "fcm_dpo/delta": 0.08225920051336288, "fcm_dpo/margin": 63.5242919921875, "fcm_dpo/q_t": 0.42830824851989746, "grad_norm": 17.908655166625977, "learning_rate": 4.5840059630527985e-07, "logits/chosen": -0.364746630191803, "logits/rejected": -0.3552126884460449, "logps/chosen": -151.75286865234375, "logps/ref_chosen": -58.544952392578125, "logps/ref_rejected": -76.63406372070312, "logps/rejected": -233.36627197265625, "loss": 1.1644, "margin_dpo/margin_mean": 63.524295806884766, "margin_dpo/margin_std": 108.76426696777344, "step": 184 }, { "epoch": 0.27165932452276065, "fcm_dpo/beta": 0.005125709809362888, "fcm_dpo/delta": 0.14241455495357513, "fcm_dpo/margin": 50.99299240112305, "fcm_dpo/q_t": 0.44419747591018677, "grad_norm": 20.483118057250977, "learning_rate": 4.5768898691940836e-07, "logits/chosen": -0.3164641857147217, "logits/rejected": -0.291867733001709, "logps/chosen": -162.77264404296875, "logps/ref_chosen": -62.025848388671875, "logps/ref_rejected": -73.7625961303711, "logps/rejected": -225.5023956298828, "loss": 1.2266, "margin_dpo/margin_mean": 50.99299621582031, "margin_dpo/margin_std": 119.36555480957031, "step": 185 }, { "epoch": 0.27312775330396477, "fcm_dpo/beta": 0.00508568761870265, "fcm_dpo/delta": -0.09505677223205566, "fcm_dpo/margin": 96.39396667480469, "fcm_dpo/q_t": 0.3893883526325226, "grad_norm": 28.187034606933594, "learning_rate": 4.5697190473557947e-07, "logits/chosen": -0.384753942489624, "logits/rejected": -0.36420518159866333, "logps/chosen": -161.88429260253906, "logps/ref_chosen": -69.35346984863281, "logps/ref_rejected": -88.07244873046875, "logps/rejected": -276.99725341796875, "loss": 1.0324, "margin_dpo/margin_mean": 96.39396667480469, "margin_dpo/margin_std": 115.44207763671875, "step": 186 }, { "epoch": 0.2745961820851689, "fcm_dpo/beta": 0.005085780750960112, "fcm_dpo/delta": -0.005826789885759354, "fcm_dpo/margin": 79.68223571777344, "fcm_dpo/q_t": 0.407413512468338, "grad_norm": 24.32848358154297, "learning_rate": 4.5624936864957555e-07, "logits/chosen": -0.3272107243537903, "logits/rejected": -0.3207721710205078, "logps/chosen": -140.29827880859375, "logps/ref_chosen": -52.7564582824707, "logps/ref_rejected": -81.96910095214844, "logps/rejected": -249.19314575195312, "loss": 1.0889, "margin_dpo/margin_mean": 79.68223571777344, "margin_dpo/margin_std": 104.66482543945312, "step": 187 }, { "epoch": 0.27606461086637296, "fcm_dpo/beta": 0.004998504184186459, "fcm_dpo/delta": -0.0912085771560669, "fcm_dpo/margin": 97.39512634277344, "fcm_dpo/q_t": 0.3907063603401184, "grad_norm": 30.731496810913086, "learning_rate": 4.5552139770089454e-07, "logits/chosen": -0.3083413541316986, "logits/rejected": -0.313301146030426, "logps/chosen": -131.1968994140625, "logps/ref_chosen": -49.415489196777344, "logps/ref_rejected": -89.54043579101562, "logps/rejected": -268.71697998046875, "loss": 1.0356, "margin_dpo/margin_mean": 97.39511108398438, "margin_dpo/margin_std": 118.06564331054688, "step": 188 }, { "epoch": 0.2775330396475771, "fcm_dpo/beta": 0.0049751270562410355, "fcm_dpo/delta": 0.0008535268716514111, "fcm_dpo/margin": 80.23283386230469, "fcm_dpo/q_t": 0.4126392900943756, "grad_norm": 23.215417861938477, "learning_rate": 4.5478801107224794e-07, "logits/chosen": -0.35090625286102295, "logits/rejected": -0.3341342806816101, "logps/chosen": -148.1799774169922, "logps/ref_chosen": -52.39896011352539, "logps/ref_rejected": -72.16735076904297, "logps/rejected": -248.1811981201172, "loss": 1.1204, "margin_dpo/margin_mean": 80.23283386230469, "margin_dpo/margin_std": 131.99029541015625, "step": 189 }, { "epoch": 0.2790014684287812, "fcm_dpo/beta": 0.004961079452186823, "fcm_dpo/delta": -0.07475320994853973, "fcm_dpo/margin": 94.80111694335938, "fcm_dpo/q_t": 0.39625459909439087, "grad_norm": 18.946704864501953, "learning_rate": 4.5404922808905543e-07, "logits/chosen": -0.38340574502944946, "logits/rejected": -0.3741362690925598, "logps/chosen": -167.19398498535156, "logps/ref_chosen": -64.68305969238281, "logps/ref_rejected": -102.55052185058594, "logps/rejected": -299.862548828125, "loss": 1.0765, "margin_dpo/margin_mean": 94.80110931396484, "margin_dpo/margin_std": 133.4532012939453, "step": 190 }, { "epoch": 0.28046989720998533, "fcm_dpo/beta": 0.0046934699639678, "fcm_dpo/delta": -0.2578633725643158, "fcm_dpo/margin": 136.34432983398438, "fcm_dpo/q_t": 0.35979998111724854, "grad_norm": 20.45859146118164, "learning_rate": 4.5330506821893565e-07, "logits/chosen": -0.3379257917404175, "logits/rejected": -0.31569015979766846, "logps/chosen": -163.58349609375, "logps/ref_chosen": -68.65887451171875, "logps/ref_rejected": -110.1396713256836, "logps/rejected": -341.40863037109375, "loss": 0.9469, "margin_dpo/margin_mean": 136.34432983398438, "margin_dpo/margin_std": 148.54867553710938, "step": 191 }, { "epoch": 0.28193832599118945, "fcm_dpo/beta": 0.004647374618798494, "fcm_dpo/delta": -0.024048451334238052, "fcm_dpo/margin": 91.00912475585938, "fcm_dpo/q_t": 0.40724360942840576, "grad_norm": 25.17645263671875, "learning_rate": 4.5255555107119336e-07, "logits/chosen": -0.27830517292022705, "logits/rejected": -0.2767331004142761, "logps/chosen": -193.28927612304688, "logps/ref_chosen": -69.72691345214844, "logps/ref_rejected": -103.32135009765625, "logps/rejected": -317.892822265625, "loss": 1.1082, "margin_dpo/margin_mean": 91.00912475585938, "margin_dpo/margin_std": 144.29054260253906, "step": 192 }, { "epoch": 0.2834067547723935, "fcm_dpo/beta": 0.0047539277002215385, "fcm_dpo/delta": 0.14870129525661469, "fcm_dpo/margin": 53.44430923461914, "fcm_dpo/q_t": 0.4405200481414795, "grad_norm": 30.352754592895508, "learning_rate": 4.5180069639630236e-07, "logits/chosen": -0.3066081404685974, "logits/rejected": -0.2960602045059204, "logps/chosen": -183.55735778808594, "logps/ref_chosen": -60.19049835205078, "logps/ref_rejected": -76.40755462646484, "logps/rejected": -253.21871948242188, "loss": 1.2487, "margin_dpo/margin_mean": 53.44430923461914, "margin_dpo/margin_std": 137.12416076660156, "step": 193 }, { "epoch": 0.28487518355359764, "fcm_dpo/beta": 0.004748090170323849, "fcm_dpo/delta": -0.009687615558505058, "fcm_dpo/margin": 86.15937805175781, "fcm_dpo/q_t": 0.4043833017349243, "grad_norm": 17.603483200073242, "learning_rate": 4.510405240853854e-07, "logits/chosen": -0.23391515016555786, "logits/rejected": -0.21777713298797607, "logps/chosen": -114.55517578125, "logps/ref_chosen": -37.84037399291992, "logps/ref_rejected": -60.684783935546875, "logps/rejected": -223.5589599609375, "loss": 1.07, "margin_dpo/margin_mean": 86.15937805175781, "margin_dpo/margin_std": 97.66061401367188, "step": 194 }, { "epoch": 0.28634361233480177, "fcm_dpo/beta": 0.004725456237792969, "fcm_dpo/delta": -0.044938720762729645, "fcm_dpo/margin": 93.73036193847656, "fcm_dpo/q_t": 0.39892733097076416, "grad_norm": 21.71803092956543, "learning_rate": 4.5027505416968985e-07, "logits/chosen": -0.24051514267921448, "logits/rejected": -0.25914958119392395, "logps/chosen": -176.97164916992188, "logps/ref_chosen": -54.891571044921875, "logps/ref_rejected": -96.77095794677734, "logps/rejected": -312.5813903808594, "loss": 1.0613, "margin_dpo/margin_mean": 93.73036193847656, "margin_dpo/margin_std": 116.85511016845703, "step": 195 }, { "epoch": 0.2878120411160059, "fcm_dpo/beta": 0.004625506699085236, "fcm_dpo/delta": -0.09405344724655151, "fcm_dpo/margin": 105.57457733154297, "fcm_dpo/q_t": 0.39183375239372253, "grad_norm": 17.44955062866211, "learning_rate": 4.495043068200599e-07, "logits/chosen": -0.2761349380016327, "logits/rejected": -0.25936779379844666, "logps/chosen": -148.1903076171875, "logps/ref_chosen": -53.245243072509766, "logps/ref_rejected": -76.05294799804688, "logps/rejected": -276.57257080078125, "loss": 1.0531, "margin_dpo/margin_mean": 105.57457733154297, "margin_dpo/margin_std": 137.7983856201172, "step": 196 }, { "epoch": 0.28928046989721, "fcm_dpo/beta": 0.004657561890780926, "fcm_dpo/delta": 0.02050638385117054, "fcm_dpo/margin": 81.59483337402344, "fcm_dpo/q_t": 0.4134349226951599, "grad_norm": 51.03510284423828, "learning_rate": 4.4872830234640493e-07, "logits/chosen": -0.2589200735092163, "logits/rejected": -0.252452552318573, "logps/chosen": -158.63006591796875, "logps/ref_chosen": -60.42033386230469, "logps/ref_rejected": -77.20890808105469, "logps/rejected": -257.01348876953125, "loss": 1.106, "margin_dpo/margin_mean": 81.59483337402344, "margin_dpo/margin_std": 112.34895324707031, "step": 197 }, { "epoch": 0.2907488986784141, "fcm_dpo/beta": 0.004592553712427616, "fcm_dpo/delta": -0.08417719602584839, "fcm_dpo/margin": 104.55335998535156, "fcm_dpo/q_t": 0.39459335803985596, "grad_norm": 21.29139518737793, "learning_rate": 4.479470611971645e-07, "logits/chosen": -0.30762723088264465, "logits/rejected": -0.30751878023147583, "logps/chosen": -167.06329345703125, "logps/ref_chosen": -55.03618621826172, "logps/ref_rejected": -97.24325561523438, "logps/rejected": -313.82373046875, "loss": 1.0534, "margin_dpo/margin_mean": 104.5533676147461, "margin_dpo/margin_std": 141.0715789794922, "step": 198 }, { "epoch": 0.2922173274596182, "fcm_dpo/beta": 0.004494061227887869, "fcm_dpo/delta": -0.07228090614080429, "fcm_dpo/margin": 104.078369140625, "fcm_dpo/q_t": 0.3951132893562317, "grad_norm": 22.24437141418457, "learning_rate": 4.471606039587695e-07, "logits/chosen": -0.3077055811882019, "logits/rejected": -0.2900369167327881, "logps/chosen": -163.30047607421875, "logps/ref_chosen": -56.828826904296875, "logps/ref_rejected": -84.64820861816406, "logps/rejected": -295.1982116699219, "loss": 1.0639, "margin_dpo/margin_mean": 104.078369140625, "margin_dpo/margin_std": 139.16575622558594, "step": 199 }, { "epoch": 0.2936857562408223, "fcm_dpo/beta": 0.004447395913302898, "fcm_dpo/delta": -0.06333615630865097, "fcm_dpo/margin": 103.42639923095703, "fcm_dpo/q_t": 0.39867448806762695, "grad_norm": 21.961410522460938, "learning_rate": 4.4636895135509966e-07, "logits/chosen": -0.25170740485191345, "logits/rejected": -0.23479950428009033, "logps/chosen": -158.19174194335938, "logps/ref_chosen": -53.06706237792969, "logps/ref_rejected": -80.60843658447266, "logps/rejected": -289.1595153808594, "loss": 1.0859, "margin_dpo/margin_mean": 103.4263916015625, "margin_dpo/margin_std": 155.78683471679688, "step": 200 }, { "epoch": 0.2936857562408223, "eval_fcm_dpo/beta": 0.004448407795280218, "eval_logits/chosen": -0.3343876898288727, "eval_logits/rejected": -0.32057368755340576, "eval_logps/chosen": -222.92990112304688, "eval_logps/ref_chosen": -79.05104064941406, "eval_logps/ref_rejected": -86.79793548583984, "eval_logps/rejected": -287.1362609863281, "eval_loss": 0.6270496845245361, "eval_margin_dpo/margin_mean": 56.45948028564453, "eval_margin_dpo/margin_std": 146.72439575195312, "eval_runtime": 39.2985, "eval_samples_per_second": 59.519, "eval_steps_per_second": 1.883, "step": 200 }, { "epoch": 0.29515418502202645, "fcm_dpo/beta": 0.0044220732524991035, "fcm_dpo/delta": -0.05648049712181091, "fcm_dpo/margin": 102.65122985839844, "fcm_dpo/q_t": 0.39846765995025635, "grad_norm": 28.671607971191406, "learning_rate": 4.455721242469372e-07, "logits/chosen": -0.3493998944759369, "logits/rejected": -0.34729981422424316, "logps/chosen": -184.7327880859375, "logps/ref_chosen": -75.4022216796875, "logps/ref_rejected": -114.80821990966797, "logps/rejected": -326.7900390625, "loss": 1.0774, "margin_dpo/margin_mean": 102.65122985839844, "margin_dpo/margin_std": 145.4634246826172, "step": 201 }, { "epoch": 0.2966226138032305, "fcm_dpo/beta": 0.004456365015357733, "fcm_dpo/delta": 0.08349813520908356, "fcm_dpo/margin": 71.60739135742188, "fcm_dpo/q_t": 0.4296954274177551, "grad_norm": 20.462011337280273, "learning_rate": 4.4477014363141755e-07, "logits/chosen": -0.2886938750743866, "logits/rejected": -0.3031277060508728, "logps/chosen": -163.66189575195312, "logps/ref_chosen": -50.101318359375, "logps/ref_rejected": -86.98503112792969, "logps/rejected": -272.15301513671875, "loss": 1.187, "margin_dpo/margin_mean": 71.6073989868164, "margin_dpo/margin_std": 143.59751892089844, "step": 202 }, { "epoch": 0.29809104258443464, "fcm_dpo/beta": 0.004469073843210936, "fcm_dpo/delta": 0.0007087336853146553, "fcm_dpo/margin": 89.34852600097656, "fcm_dpo/q_t": 0.4091683030128479, "grad_norm": 21.502296447753906, "learning_rate": 4.439630306414758e-07, "logits/chosen": -0.34994345903396606, "logits/rejected": -0.34321272373199463, "logps/chosen": -173.15109252929688, "logps/ref_chosen": -60.60969543457031, "logps/ref_rejected": -85.89596557617188, "logps/rejected": -287.785888671875, "loss": 1.0972, "margin_dpo/margin_mean": 89.34852600097656, "margin_dpo/margin_std": 124.40196228027344, "step": 203 }, { "epoch": 0.29955947136563876, "fcm_dpo/beta": 0.00449190940707922, "fcm_dpo/delta": 0.030911792069673538, "fcm_dpo/margin": 82.42413330078125, "fcm_dpo/q_t": 0.41922539472579956, "grad_norm": 25.3990535736084, "learning_rate": 4.431508065452897e-07, "logits/chosen": -0.3936639428138733, "logits/rejected": -0.35490119457244873, "logps/chosen": -204.40565490722656, "logps/ref_chosen": -80.16496276855469, "logps/ref_rejected": -87.69590759277344, "logps/rejected": -294.3607177734375, "loss": 1.1486, "margin_dpo/margin_mean": 82.42413330078125, "margin_dpo/margin_std": 145.6385040283203, "step": 204 }, { "epoch": 0.3010279001468429, "fcm_dpo/beta": 0.004410895984619856, "fcm_dpo/delta": -0.08054696023464203, "fcm_dpo/margin": 107.63643646240234, "fcm_dpo/q_t": 0.3914853632450104, "grad_norm": 22.971195220947266, "learning_rate": 4.4233349274571974e-07, "logits/chosen": -0.3133258819580078, "logits/rejected": -0.2844025492668152, "logps/chosen": -178.8911895751953, "logps/ref_chosen": -59.384735107421875, "logps/ref_rejected": -85.12505340576172, "logps/rejected": -312.2679443359375, "loss": 1.0581, "margin_dpo/margin_mean": 107.63642883300781, "margin_dpo/margin_std": 137.53057861328125, "step": 205 }, { "epoch": 0.302496328928047, "fcm_dpo/beta": 0.004336735233664513, "fcm_dpo/delta": -0.1181631088256836, "fcm_dpo/margin": 117.94447326660156, "fcm_dpo/q_t": 0.38123589754104614, "grad_norm": 24.863079071044922, "learning_rate": 4.415111107797445e-07, "logits/chosen": -0.23951232433319092, "logits/rejected": -0.2434745579957962, "logps/chosen": -153.0740203857422, "logps/ref_chosen": -46.964500427246094, "logps/ref_rejected": -98.9534912109375, "logps/rejected": -323.00750732421875, "loss": 1.0098, "margin_dpo/margin_mean": 117.94447326660156, "margin_dpo/margin_std": 127.08613586425781, "step": 206 }, { "epoch": 0.3039647577092511, "fcm_dpo/beta": 0.004215326625853777, "fcm_dpo/delta": -0.1830846071243286, "fcm_dpo/margin": 135.88345336914062, "fcm_dpo/q_t": 0.3738357424736023, "grad_norm": 23.037128448486328, "learning_rate": 4.4068368231789365e-07, "logits/chosen": -0.36506402492523193, "logits/rejected": -0.34065380692481995, "logps/chosen": -152.6772003173828, "logps/ref_chosen": -56.05625915527344, "logps/ref_rejected": -84.44779968261719, "logps/rejected": -316.95220947265625, "loss": 0.9863, "margin_dpo/margin_mean": 135.88345336914062, "margin_dpo/margin_std": 156.41925048828125, "step": 207 }, { "epoch": 0.3054331864904552, "fcm_dpo/beta": 0.004147009924054146, "fcm_dpo/delta": -0.024765145033597946, "fcm_dpo/margin": 102.14737701416016, "fcm_dpo/q_t": 0.40402868390083313, "grad_norm": 24.734445571899414, "learning_rate": 4.398512291636768e-07, "logits/chosen": -0.3294498026371002, "logits/rejected": -0.31117957830429077, "logps/chosen": -219.45916748046875, "logps/ref_chosen": -67.06761169433594, "logps/ref_rejected": -94.28689575195312, "logps/rejected": -348.8258361816406, "loss": 1.1027, "margin_dpo/margin_mean": 102.14736938476562, "margin_dpo/margin_std": 155.35968017578125, "step": 208 }, { "epoch": 0.3069016152716593, "fcm_dpo/beta": 0.004163610748946667, "fcm_dpo/delta": 0.01959494687616825, "fcm_dpo/margin": 91.5401382446289, "fcm_dpo/q_t": 0.4133692979812622, "grad_norm": 29.469234466552734, "learning_rate": 4.3901377325300857e-07, "logits/chosen": -0.2552475929260254, "logits/rejected": -0.24348849058151245, "logps/chosen": -184.3878631591797, "logps/ref_chosen": -56.18169403076172, "logps/ref_rejected": -80.94152069091797, "logps/rejected": -300.68780517578125, "loss": 1.1275, "margin_dpo/margin_mean": 91.54013061523438, "margin_dpo/margin_std": 143.59213256835938, "step": 209 }, { "epoch": 0.30837004405286345, "fcm_dpo/beta": 0.0041510555893182755, "fcm_dpo/delta": -0.04974536970257759, "fcm_dpo/margin": 107.78252410888672, "fcm_dpo/q_t": 0.39923179149627686, "grad_norm": 23.570310592651367, "learning_rate": 4.381713366536311e-07, "logits/chosen": -0.2864752411842346, "logits/rejected": -0.2775830030441284, "logps/chosen": -160.75347900390625, "logps/ref_chosen": -46.371822357177734, "logps/ref_rejected": -76.68162536621094, "logps/rejected": -298.84576416015625, "loss": 1.0692, "margin_dpo/margin_mean": 107.78252410888672, "margin_dpo/margin_std": 142.31451416015625, "step": 210 }, { "epoch": 0.30983847283406757, "fcm_dpo/beta": 0.004142909776419401, "fcm_dpo/delta": 0.02150268293917179, "fcm_dpo/margin": 91.54325866699219, "fcm_dpo/q_t": 0.41826772689819336, "grad_norm": 31.811269760131836, "learning_rate": 4.373239415645323e-07, "logits/chosen": -0.2951120138168335, "logits/rejected": -0.25315576791763306, "logps/chosen": -244.23867797851562, "logps/ref_chosen": -78.93235778808594, "logps/ref_rejected": -86.82098388671875, "logps/rejected": -343.6705627441406, "loss": 1.1421, "margin_dpo/margin_mean": 91.54325103759766, "margin_dpo/margin_std": 157.68630981445312, "step": 211 }, { "epoch": 0.31130690161527164, "fcm_dpo/beta": 0.004025098867714405, "fcm_dpo/delta": -0.14305897057056427, "fcm_dpo/margin": 132.50680541992188, "fcm_dpo/q_t": 0.38068991899490356, "grad_norm": 27.229854583740234, "learning_rate": 4.3647161031536086e-07, "logits/chosen": -0.2998214364051819, "logits/rejected": -0.2908783257007599, "logps/chosen": -194.6740264892578, "logps/ref_chosen": -58.19701385498047, "logps/ref_rejected": -103.05785369873047, "logps/rejected": -372.04168701171875, "loss": 1.0273, "margin_dpo/margin_mean": 132.50680541992188, "margin_dpo/margin_std": 162.19839477539062, "step": 212 }, { "epoch": 0.31277533039647576, "fcm_dpo/beta": 0.0039492822252213955, "fcm_dpo/delta": -0.1063137948513031, "fcm_dpo/margin": 126.71992492675781, "fcm_dpo/q_t": 0.3866674602031708, "grad_norm": 29.196735382080078, "learning_rate": 4.3561436536583774e-07, "logits/chosen": -0.27108436822891235, "logits/rejected": -0.24033893644809723, "logps/chosen": -197.20233154296875, "logps/ref_chosen": -67.51271057128906, "logps/ref_rejected": -93.91471862792969, "logps/rejected": -350.32427978515625, "loss": 1.0335, "margin_dpo/margin_mean": 126.71992492675781, "margin_dpo/margin_std": 155.05508422851562, "step": 213 }, { "epoch": 0.3142437591776799, "fcm_dpo/beta": 0.0039157988503575325, "fcm_dpo/delta": -0.043942950665950775, "fcm_dpo/margin": 112.87818145751953, "fcm_dpo/q_t": 0.4002673327922821, "grad_norm": 23.29557991027832, "learning_rate": 4.3475222930516473e-07, "logits/chosen": -0.2142457664012909, "logits/rejected": -0.2187519669532776, "logps/chosen": -156.87863159179688, "logps/ref_chosen": -41.604888916015625, "logps/ref_rejected": -77.51741027832031, "logps/rejected": -305.66937255859375, "loss": 1.0694, "margin_dpo/margin_mean": 112.87818908691406, "margin_dpo/margin_std": 148.09786987304688, "step": 214 }, { "epoch": 0.315712187958884, "fcm_dpo/beta": 0.003857589792460203, "fcm_dpo/delta": -0.06539718061685562, "fcm_dpo/margin": 119.77651977539062, "fcm_dpo/q_t": 0.39416319131851196, "grad_norm": 27.0275821685791, "learning_rate": 4.3388522485142885e-07, "logits/chosen": -0.2805659770965576, "logits/rejected": -0.2722279727458954, "logps/chosen": -188.13607788085938, "logps/ref_chosen": -53.279266357421875, "logps/ref_rejected": -89.96464538574219, "logps/rejected": -344.59796142578125, "loss": 1.0434, "margin_dpo/margin_mean": 119.77651977539062, "margin_dpo/margin_std": 138.7313232421875, "step": 215 }, { "epoch": 0.31718061674008813, "fcm_dpo/beta": 0.0038378096651285887, "fcm_dpo/delta": -0.05455287545919418, "fcm_dpo/margin": 117.78301239013672, "fcm_dpo/q_t": 0.39905649423599243, "grad_norm": 26.12622833251953, "learning_rate": 4.330133748510036e-07, "logits/chosen": -0.2915078401565552, "logits/rejected": -0.27635902166366577, "logps/chosen": -185.41506958007812, "logps/ref_chosen": -48.887794494628906, "logps/ref_rejected": -77.19892883300781, "logps/rejected": -331.50921630859375, "loss": 1.08, "margin_dpo/margin_mean": 117.78302001953125, "margin_dpo/margin_std": 167.56219482421875, "step": 216 }, { "epoch": 0.3186490455212922, "fcm_dpo/beta": 0.003737176302820444, "fcm_dpo/delta": -0.12201692909002304, "fcm_dpo/margin": 137.91058349609375, "fcm_dpo/q_t": 0.383506178855896, "grad_norm": 20.442758560180664, "learning_rate": 4.3213670227794757e-07, "logits/chosen": -0.26202258467674255, "logits/rejected": -0.25628846883773804, "logps/chosen": -187.60577392578125, "logps/ref_chosen": -49.845306396484375, "logps/ref_rejected": -100.07832336425781, "logps/rejected": -375.7493896484375, "loss": 1.0117, "margin_dpo/margin_mean": 137.91058349609375, "margin_dpo/margin_std": 156.00509643554688, "step": 217 }, { "epoch": 0.3201174743024963, "fcm_dpo/beta": 0.003717987798154354, "fcm_dpo/delta": 0.004697195254266262, "fcm_dpo/margin": 106.35856628417969, "fcm_dpo/q_t": 0.41124439239501953, "grad_norm": 21.640470504760742, "learning_rate": 4.3125523023339815e-07, "logits/chosen": -0.29033294320106506, "logits/rejected": -0.2837512791156769, "logps/chosen": -200.13418579101562, "logps/ref_chosen": -58.576683044433594, "logps/ref_rejected": -87.84639739990234, "logps/rejected": -335.762451171875, "loss": 1.1106, "margin_dpo/margin_mean": 106.35856628417969, "margin_dpo/margin_std": 158.380859375, "step": 218 }, { "epoch": 0.32158590308370044, "fcm_dpo/beta": 0.0037690873723477125, "fcm_dpo/delta": 0.04747757688164711, "fcm_dpo/margin": 93.80690002441406, "fcm_dpo/q_t": 0.42123115062713623, "grad_norm": 26.6854305267334, "learning_rate": 4.303689819449636e-07, "logits/chosen": -0.3261488974094391, "logits/rejected": -0.318808376789093, "logps/chosen": -212.6107635498047, "logps/ref_chosen": -61.083858489990234, "logps/ref_rejected": -85.83042907714844, "logps/rejected": -331.16424560546875, "loss": 1.1637, "margin_dpo/margin_mean": 93.80690002441406, "margin_dpo/margin_std": 173.55465698242188, "step": 219 }, { "epoch": 0.32305433186490456, "fcm_dpo/beta": 0.003811020404100418, "fcm_dpo/delta": 0.1028871089220047, "fcm_dpo/margin": 78.8044662475586, "fcm_dpo/q_t": 0.42963457107543945, "grad_norm": 24.080263137817383, "learning_rate": 4.2947798076611047e-07, "logits/chosen": -0.29265105724334717, "logits/rejected": -0.26879560947418213, "logps/chosen": -240.15542602539062, "logps/ref_chosen": -70.03128051757812, "logps/ref_rejected": -87.68551635742188, "logps/rejected": -336.6141357421875, "loss": 1.1611, "margin_dpo/margin_mean": 78.8044662475586, "margin_dpo/margin_std": 122.5282974243164, "step": 220 }, { "epoch": 0.3245227606461087, "fcm_dpo/beta": 0.003719739615917206, "fcm_dpo/delta": -0.23312455415725708, "fcm_dpo/margin": 166.45452880859375, "fcm_dpo/q_t": 0.35831981897354126, "grad_norm": 29.536046981811523, "learning_rate": 4.285822501755485e-07, "logits/chosen": -0.2684306204319, "logits/rejected": -0.27492430806159973, "logps/chosen": -199.0157470703125, "logps/ref_chosen": -52.15470886230469, "logps/ref_rejected": -106.46768188476562, "logps/rejected": -419.7832336425781, "loss": 0.9386, "margin_dpo/margin_mean": 166.45452880859375, "margin_dpo/margin_std": 155.8255157470703, "step": 221 }, { "epoch": 0.32599118942731276, "fcm_dpo/beta": 0.003646267345175147, "fcm_dpo/delta": -0.06051616743206978, "fcm_dpo/margin": 125.51266479492188, "fcm_dpo/q_t": 0.3957686424255371, "grad_norm": 22.21294403076172, "learning_rate": 4.276818137766118e-07, "logits/chosen": -0.32068097591400146, "logits/rejected": -0.3213370442390442, "logps/chosen": -214.42459106445312, "logps/ref_chosen": -60.971099853515625, "logps/ref_rejected": -100.00115203857422, "logps/rejected": -378.9673156738281, "loss": 1.056, "margin_dpo/margin_mean": 125.51266479492188, "margin_dpo/margin_std": 156.2869110107422, "step": 222 }, { "epoch": 0.3274596182085169, "fcm_dpo/beta": 0.0036216324660927057, "fcm_dpo/delta": 0.012630530633032322, "fcm_dpo/margin": 107.08901977539062, "fcm_dpo/q_t": 0.412469744682312, "grad_norm": 29.19846534729004, "learning_rate": 4.2677669529663686e-07, "logits/chosen": -0.23973864316940308, "logits/rejected": -0.23426464200019836, "logps/chosen": -216.11801147460938, "logps/ref_chosen": -52.64057540893555, "logps/ref_rejected": -82.82502746582031, "logps/rejected": -353.3914794921875, "loss": 1.1276, "margin_dpo/margin_mean": 107.08901977539062, "margin_dpo/margin_std": 174.32290649414062, "step": 223 }, { "epoch": 0.328928046989721, "fcm_dpo/beta": 0.0035798242315649986, "fcm_dpo/delta": -0.04205327853560448, "fcm_dpo/margin": 122.58447265625, "fcm_dpo/q_t": 0.4028151333332062, "grad_norm": 26.769624710083008, "learning_rate": 4.2586691858633747e-07, "logits/chosen": -0.291404664516449, "logits/rejected": -0.27433890104293823, "logps/chosen": -191.07574462890625, "logps/ref_chosen": -48.59541320800781, "logps/ref_rejected": -77.11648559570312, "logps/rejected": -342.1812744140625, "loss": 1.0876, "margin_dpo/margin_mean": 122.58447265625, "margin_dpo/margin_std": 175.91018676757812, "step": 224 }, { "epoch": 0.3303964757709251, "fcm_dpo/beta": 0.0035359251778572798, "fcm_dpo/delta": -0.11218781769275665, "fcm_dpo/margin": 143.213134765625, "fcm_dpo/q_t": 0.3858293294906616, "grad_norm": 22.56654167175293, "learning_rate": 4.249525076191759e-07, "logits/chosen": -0.32044434547424316, "logits/rejected": -0.312914103269577, "logps/chosen": -222.03887939453125, "logps/ref_chosen": -58.000465393066406, "logps/ref_rejected": -99.90291595458984, "logps/rejected": -407.15447998046875, "loss": 1.0315, "margin_dpo/margin_mean": 143.213134765625, "margin_dpo/margin_std": 177.5218048095703, "step": 225 }, { "epoch": 0.33186490455212925, "fcm_dpo/beta": 0.003493384225293994, "fcm_dpo/delta": -0.009331781417131424, "fcm_dpo/margin": 116.85462188720703, "fcm_dpo/q_t": 0.4088728427886963, "grad_norm": 29.890501022338867, "learning_rate": 4.2403348649073167e-07, "logits/chosen": -0.3462528586387634, "logits/rejected": -0.30755919218063354, "logps/chosen": -192.7152099609375, "logps/ref_chosen": -58.898799896240234, "logps/ref_rejected": -78.68775939941406, "logps/rejected": -329.3587646484375, "loss": 1.0972, "margin_dpo/margin_mean": 116.85462951660156, "margin_dpo/margin_std": 164.87612915039062, "step": 226 }, { "epoch": 0.3333333333333333, "fcm_dpo/beta": 0.0034475913271307945, "fcm_dpo/delta": -0.09939224272966385, "fcm_dpo/margin": 143.24203491210938, "fcm_dpo/q_t": 0.3873962163925171, "grad_norm": 25.0889949798584, "learning_rate": 4.2310987941806615e-07, "logits/chosen": -0.32977473735809326, "logits/rejected": -0.31640344858169556, "logps/chosen": -220.12014770507812, "logps/ref_chosen": -59.072181701660156, "logps/ref_rejected": -99.41236877441406, "logps/rejected": -403.7023620605469, "loss": 1.0294, "margin_dpo/margin_mean": 143.24203491210938, "margin_dpo/margin_std": 169.32302856445312, "step": 227 }, { "epoch": 0.33480176211453744, "fcm_dpo/beta": 0.003472366835922003, "fcm_dpo/delta": 0.04667496308684349, "fcm_dpo/margin": 102.19868469238281, "fcm_dpo/q_t": 0.4186497926712036, "grad_norm": 24.252988815307617, "learning_rate": 4.2218171073908463e-07, "logits/chosen": -0.33692067861557007, "logits/rejected": -0.32044440507888794, "logps/chosen": -228.12362670898438, "logps/ref_chosen": -65.89128875732422, "logps/ref_rejected": -91.04875183105469, "logps/rejected": -355.4797668457031, "loss": 1.1385, "margin_dpo/margin_mean": 102.19868469238281, "margin_dpo/margin_std": 163.4045867919922, "step": 228 }, { "epoch": 0.33627019089574156, "fcm_dpo/beta": 0.003483015578240156, "fcm_dpo/delta": 0.010966208763420582, "fcm_dpo/margin": 111.81390380859375, "fcm_dpo/q_t": 0.41027718782424927, "grad_norm": 29.11672592163086, "learning_rate": 4.212490049118951e-07, "logits/chosen": -0.4054703414440155, "logits/rejected": -0.37607651948928833, "logps/chosen": -232.11636352539062, "logps/ref_chosen": -70.70637512207031, "logps/ref_rejected": -84.52741241455078, "logps/rejected": -357.75128173828125, "loss": 1.1063, "margin_dpo/margin_mean": 111.81390380859375, "margin_dpo/margin_std": 160.0384063720703, "step": 229 }, { "epoch": 0.3377386196769457, "fcm_dpo/beta": 0.003395712934434414, "fcm_dpo/delta": -0.15949219465255737, "fcm_dpo/margin": 162.08526611328125, "fcm_dpo/q_t": 0.37249940633773804, "grad_norm": 26.661436080932617, "learning_rate": 4.203117865141635e-07, "logits/chosen": -0.30987975001335144, "logits/rejected": -0.3130999505519867, "logps/chosen": -165.25253295898438, "logps/ref_chosen": -39.282005310058594, "logps/ref_rejected": -85.62191009521484, "logps/rejected": -373.6777038574219, "loss": 0.9778, "margin_dpo/margin_mean": 162.08526611328125, "margin_dpo/margin_std": 160.76185607910156, "step": 230 }, { "epoch": 0.3392070484581498, "fcm_dpo/beta": 0.003384451847523451, "fcm_dpo/delta": 0.020527083426713943, "fcm_dpo/margin": 112.35234069824219, "fcm_dpo/q_t": 0.4136514365673065, "grad_norm": 32.04772186279297, "learning_rate": 4.1937008024246625e-07, "logits/chosen": -0.37022462487220764, "logits/rejected": -0.3420015573501587, "logps/chosen": -207.71981811523438, "logps/ref_chosen": -63.27644348144531, "logps/ref_rejected": -74.1239013671875, "logps/rejected": -330.91961669921875, "loss": 1.1013, "margin_dpo/margin_mean": 112.35232543945312, "margin_dpo/margin_std": 149.82984924316406, "step": 231 }, { "epoch": 0.3406754772393539, "fcm_dpo/beta": 0.0034229401499032974, "fcm_dpo/delta": 0.08551573753356934, "fcm_dpo/margin": 92.69304656982422, "fcm_dpo/q_t": 0.42926985025405884, "grad_norm": 22.970428466796875, "learning_rate": 4.1842391091163933e-07, "logits/chosen": -0.3794689476490021, "logits/rejected": -0.3578903377056122, "logps/chosen": -247.9998321533203, "logps/ref_chosen": -70.74876403808594, "logps/ref_rejected": -83.97706604003906, "logps/rejected": -353.92120361328125, "loss": 1.1598, "margin_dpo/margin_mean": 92.69305419921875, "margin_dpo/margin_std": 156.01400756835938, "step": 232 }, { "epoch": 0.342143906020558, "fcm_dpo/beta": 0.003400879679247737, "fcm_dpo/delta": -0.09279187768697739, "fcm_dpo/margin": 143.58230590820312, "fcm_dpo/q_t": 0.3939361870288849, "grad_norm": 25.750471115112305, "learning_rate": 4.174733034541245e-07, "logits/chosen": -0.37290647625923157, "logits/rejected": -0.3758310079574585, "logps/chosen": -220.12417602539062, "logps/ref_chosen": -54.8829345703125, "logps/ref_rejected": -107.4800796508789, "logps/rejected": -416.30364990234375, "loss": 1.0704, "margin_dpo/margin_mean": 143.5823211669922, "margin_dpo/margin_std": 209.883056640625, "step": 233 }, { "epoch": 0.3436123348017621, "fcm_dpo/beta": 0.0033045965246856213, "fcm_dpo/delta": -0.10699286311864853, "fcm_dpo/margin": 151.24588012695312, "fcm_dpo/q_t": 0.38530829548835754, "grad_norm": 28.405414581298828, "learning_rate": 4.165182829193126e-07, "logits/chosen": -0.36269479990005493, "logits/rejected": -0.3897593021392822, "logps/chosen": -197.34793090820312, "logps/ref_chosen": -44.094520568847656, "logps/ref_rejected": -100.00663757324219, "logps/rejected": -404.50592041015625, "loss": 1.0153, "margin_dpo/margin_mean": 151.24588012695312, "margin_dpo/margin_std": 159.92330932617188, "step": 234 }, { "epoch": 0.34508076358296624, "fcm_dpo/beta": 0.003356143133714795, "fcm_dpo/delta": 0.07040451467037201, "fcm_dpo/margin": 98.78762817382812, "fcm_dpo/q_t": 0.4242765009403229, "grad_norm": 24.10730743408203, "learning_rate": 4.1555887447288255e-07, "logits/chosen": -0.3934810757637024, "logits/rejected": -0.3729557693004608, "logps/chosen": -247.36497497558594, "logps/ref_chosen": -62.237911224365234, "logps/ref_rejected": -90.39506530761719, "logps/rejected": -374.30975341796875, "loss": 1.1544, "margin_dpo/margin_mean": 98.78762817382812, "margin_dpo/margin_std": 164.12014770507812, "step": 235 }, { "epoch": 0.3465491923641703, "fcm_dpo/beta": 0.0032943575643002987, "fcm_dpo/delta": -0.12876766920089722, "fcm_dpo/margin": 158.4656982421875, "fcm_dpo/q_t": 0.3778604567050934, "grad_norm": 42.23731231689453, "learning_rate": 4.1459510339613946e-07, "logits/chosen": -0.3503900170326233, "logits/rejected": -0.34960126876831055, "logps/chosen": -188.35073852539062, "logps/ref_chosen": -49.34136199951172, "logps/ref_rejected": -103.51162719726562, "logps/rejected": -400.9866943359375, "loss": 0.9814, "margin_dpo/margin_mean": 158.4656982421875, "margin_dpo/margin_std": 142.0621337890625, "step": 236 }, { "epoch": 0.34801762114537443, "fcm_dpo/beta": 0.0032789534889161587, "fcm_dpo/delta": 0.005344166420400143, "fcm_dpo/margin": 120.41532135009766, "fcm_dpo/q_t": 0.41042613983154297, "grad_norm": 30.962125778198242, "learning_rate": 4.136269950853473e-07, "logits/chosen": -0.3941130042076111, "logits/rejected": -0.38884979486465454, "logps/chosen": -242.329345703125, "logps/ref_chosen": -54.168121337890625, "logps/ref_rejected": -94.78036499023438, "logps/rejected": -403.35693359375, "loss": 1.1064, "margin_dpo/margin_mean": 120.41531372070312, "margin_dpo/margin_std": 176.17005920410156, "step": 237 }, { "epoch": 0.34948604992657856, "fcm_dpo/beta": 0.003268222790211439, "fcm_dpo/delta": 0.013299603015184402, "fcm_dpo/margin": 118.37882995605469, "fcm_dpo/q_t": 0.4140799045562744, "grad_norm": 29.681880950927734, "learning_rate": 4.126545750510605e-07, "logits/chosen": -0.3387226164340973, "logits/rejected": -0.35407811403274536, "logps/chosen": -227.864501953125, "logps/ref_chosen": -53.973121643066406, "logps/ref_rejected": -89.41795349121094, "logps/rejected": -381.68817138671875, "loss": 1.1072, "margin_dpo/margin_mean": 118.37882995605469, "margin_dpo/margin_std": 169.83518981933594, "step": 238 }, { "epoch": 0.3509544787077827, "fcm_dpo/beta": 0.00324842007830739, "fcm_dpo/delta": -0.031969405710697174, "fcm_dpo/margin": 132.2473602294922, "fcm_dpo/q_t": 0.40041935443878174, "grad_norm": 30.094566345214844, "learning_rate": 4.116778689174514e-07, "logits/chosen": -0.35974210500717163, "logits/rejected": -0.34793728590011597, "logps/chosen": -243.16796875, "logps/ref_chosen": -58.09782409667969, "logps/ref_rejected": -93.59294128417969, "logps/rejected": -410.91046142578125, "loss": 1.0679, "margin_dpo/margin_mean": 132.2473602294922, "margin_dpo/margin_std": 157.95797729492188, "step": 239 }, { "epoch": 0.3524229074889868, "fcm_dpo/beta": 0.003268931061029434, "fcm_dpo/delta": 0.03347586840391159, "fcm_dpo/margin": 112.4616928100586, "fcm_dpo/q_t": 0.4169153571128845, "grad_norm": 38.88202667236328, "learning_rate": 4.106969024216348e-07, "logits/chosen": -0.390846848487854, "logits/rejected": -0.36975374817848206, "logps/chosen": -257.6044006347656, "logps/ref_chosen": -60.6144905090332, "logps/ref_rejected": -74.1185302734375, "logps/rejected": -383.57012939453125, "loss": 1.1434, "margin_dpo/margin_mean": 112.4616928100586, "margin_dpo/margin_std": 189.4227752685547, "step": 240 }, { "epoch": 0.35389133627019087, "fcm_dpo/beta": 0.0032093606423586607, "fcm_dpo/delta": -0.17607250809669495, "fcm_dpo/margin": 176.48245239257812, "fcm_dpo/q_t": 0.374181866645813, "grad_norm": 26.158462524414062, "learning_rate": 4.097117014129903e-07, "logits/chosen": -0.4116344749927521, "logits/rejected": -0.38839179277420044, "logps/chosen": -227.10543823242188, "logps/ref_chosen": -66.091064453125, "logps/ref_rejected": -88.06088256835938, "logps/rejected": -425.5577087402344, "loss": 0.9888, "margin_dpo/margin_mean": 176.48245239257812, "margin_dpo/margin_std": 196.57537841796875, "step": 241 }, { "epoch": 0.355359765051395, "fcm_dpo/beta": 0.0031865746714174747, "fcm_dpo/delta": 0.0012423545122146606, "fcm_dpo/margin": 125.03507995605469, "fcm_dpo/q_t": 0.4101165533065796, "grad_norm": 29.37732696533203, "learning_rate": 4.087222918524807e-07, "logits/chosen": -0.33866798877716064, "logits/rejected": -0.3151482343673706, "logps/chosen": -251.06314086914062, "logps/ref_chosen": -67.86392974853516, "logps/ref_rejected": -83.36033630371094, "logps/rejected": -391.5946350097656, "loss": 1.0969, "margin_dpo/margin_mean": 125.03507995605469, "margin_dpo/margin_std": 171.787353515625, "step": 242 }, { "epoch": 0.3568281938325991, "fcm_dpo/beta": 0.0031295460648834705, "fcm_dpo/delta": -0.08388511836528778, "fcm_dpo/margin": 153.2886199951172, "fcm_dpo/q_t": 0.39050671458244324, "grad_norm": 22.981002807617188, "learning_rate": 4.07728699811968e-07, "logits/chosen": -0.35215356945991516, "logits/rejected": -0.3224591612815857, "logps/chosen": -251.0262451171875, "logps/ref_chosen": -63.0842399597168, "logps/ref_rejected": -76.33563232421875, "logps/rejected": -417.56622314453125, "loss": 1.0365, "margin_dpo/margin_mean": 153.28863525390625, "margin_dpo/margin_std": 182.36080932617188, "step": 243 }, { "epoch": 0.35829662261380324, "fcm_dpo/beta": 0.0030690422281622887, "fcm_dpo/delta": -0.09600830078125, "fcm_dpo/margin": 159.98179626464844, "fcm_dpo/q_t": 0.38639965653419495, "grad_norm": 30.8914737701416, "learning_rate": 4.067309514735267e-07, "logits/chosen": -0.4122433662414551, "logits/rejected": -0.40430814027786255, "logps/chosen": -223.86654663085938, "logps/ref_chosen": -61.140689849853516, "logps/ref_rejected": -94.89193725585938, "logps/rejected": -417.5995788574219, "loss": 1.0117, "margin_dpo/margin_mean": 159.98179626464844, "margin_dpo/margin_std": 160.8214111328125, "step": 244 }, { "epoch": 0.35976505139500736, "fcm_dpo/beta": 0.0030440008267760277, "fcm_dpo/delta": 0.01792435348033905, "fcm_dpo/margin": 125.35258483886719, "fcm_dpo/q_t": 0.4126400649547577, "grad_norm": 25.37367057800293, "learning_rate": 4.057290731287531e-07, "logits/chosen": -0.3932448625564575, "logits/rejected": -0.36441653966903687, "logps/chosen": -254.88162231445312, "logps/ref_chosen": -67.26228332519531, "logps/ref_rejected": -87.64010620117188, "logps/rejected": -400.61199951171875, "loss": 1.1127, "margin_dpo/margin_mean": 125.35258483886719, "margin_dpo/margin_std": 174.19549560546875, "step": 245 }, { "epoch": 0.36123348017621143, "fcm_dpo/beta": 0.0030658990144729614, "fcm_dpo/delta": 0.0024497676640748978, "fcm_dpo/margin": 129.66812133789062, "fcm_dpo/q_t": 0.4106002449989319, "grad_norm": 28.117544174194336, "learning_rate": 4.047230911780736e-07, "logits/chosen": -0.4509885311126709, "logits/rejected": -0.4132448434829712, "logps/chosen": -250.50326538085938, "logps/ref_chosen": -66.69696807861328, "logps/ref_rejected": -84.34634399414062, "logps/rejected": -397.8207702636719, "loss": 1.0995, "margin_dpo/margin_mean": 129.66812133789062, "margin_dpo/margin_std": 183.7802734375, "step": 246 }, { "epoch": 0.36270190895741555, "fcm_dpo/beta": 0.003005662001669407, "fcm_dpo/delta": -0.14235088229179382, "fcm_dpo/margin": 177.8526611328125, "fcm_dpo/q_t": 0.37811583280563354, "grad_norm": 33.17192840576172, "learning_rate": 4.0371303213004814e-07, "logits/chosen": -0.3711887001991272, "logits/rejected": -0.3687683343887329, "logps/chosen": -278.5263366699219, "logps/ref_chosen": -56.6053466796875, "logps/ref_rejected": -106.29326629638672, "logps/rejected": -506.06695556640625, "loss": 1.0024, "margin_dpo/margin_mean": 177.85267639160156, "margin_dpo/margin_std": 196.65899658203125, "step": 247 }, { "epoch": 0.3641703377386197, "fcm_dpo/beta": 0.002947642235085368, "fcm_dpo/delta": -0.05931021273136139, "fcm_dpo/margin": 154.6879425048828, "fcm_dpo/q_t": 0.39200544357299805, "grad_norm": 25.346040725708008, "learning_rate": 4.0269892260067197e-07, "logits/chosen": -0.3714104890823364, "logits/rejected": -0.38998764753341675, "logps/chosen": -238.24075317382812, "logps/ref_chosen": -44.043216705322266, "logps/ref_rejected": -91.85687255859375, "logps/rejected": -440.7423400878906, "loss": 1.0253, "margin_dpo/margin_mean": 154.6879425048828, "margin_dpo/margin_std": 143.38400268554688, "step": 248 }, { "epoch": 0.3656387665198238, "fcm_dpo/beta": 0.003015869064256549, "fcm_dpo/delta": 0.14630991220474243, "fcm_dpo/margin": 85.38651275634766, "fcm_dpo/q_t": 0.4420652985572815, "grad_norm": 42.6093864440918, "learning_rate": 4.0168078931267426e-07, "logits/chosen": -0.367745578289032, "logits/rejected": -0.33774110674858093, "logps/chosen": -308.61773681640625, "logps/ref_chosen": -62.442352294921875, "logps/ref_rejected": -80.46806335449219, "logps/rejected": -412.02996826171875, "loss": 1.2243, "margin_dpo/margin_mean": 85.38652038574219, "margin_dpo/margin_std": 190.7613983154297, "step": 249 }, { "epoch": 0.3671071953010279, "fcm_dpo/beta": 0.003016442758962512, "fcm_dpo/delta": -0.0814606249332428, "fcm_dpo/margin": 158.3081817626953, "fcm_dpo/q_t": 0.38880687952041626, "grad_norm": 40.93179702758789, "learning_rate": 4.006586590948141e-07, "logits/chosen": -0.39541658759117126, "logits/rejected": -0.33937403559684753, "logps/chosen": -279.10040283203125, "logps/ref_chosen": -65.63668823242188, "logps/ref_rejected": -73.87184143066406, "logps/rejected": -445.64373779296875, "loss": 1.0225, "margin_dpo/margin_mean": 158.30816650390625, "margin_dpo/margin_std": 162.19927978515625, "step": 250 }, { "epoch": 0.368575624082232, "fcm_dpo/beta": 0.003019275376573205, "fcm_dpo/delta": 0.08311907947063446, "fcm_dpo/margin": 105.85592651367188, "fcm_dpo/q_t": 0.4273427128791809, "grad_norm": 30.16434669494629, "learning_rate": 3.9963255888117325e-07, "logits/chosen": -0.38391727209091187, "logits/rejected": -0.35589849948883057, "logps/chosen": -277.97418212890625, "logps/ref_chosen": -57.182716369628906, "logps/ref_rejected": -77.66343688964844, "logps/rejected": -404.31085205078125, "loss": 1.1639, "margin_dpo/margin_mean": 105.85592651367188, "margin_dpo/margin_std": 178.24061584472656, "step": 251 }, { "epoch": 0.3700440528634361, "fcm_dpo/beta": 0.0030162562616169453, "fcm_dpo/delta": -0.037618160247802734, "fcm_dpo/margin": 144.47677612304688, "fcm_dpo/q_t": 0.39547207951545715, "grad_norm": 24.476696014404297, "learning_rate": 3.9860251571044666e-07, "logits/chosen": -0.41118282079696655, "logits/rejected": -0.36917877197265625, "logps/chosen": -278.718994140625, "logps/ref_chosen": -71.68563842773438, "logps/ref_rejected": -84.75799560546875, "logps/rejected": -436.26806640625, "loss": 1.0411, "margin_dpo/margin_mean": 144.47677612304688, "margin_dpo/margin_std": 140.32473754882812, "step": 252 }, { "epoch": 0.37151248164464024, "fcm_dpo/beta": 0.003020418342202902, "fcm_dpo/delta": -0.03368496149778366, "fcm_dpo/margin": 142.9415283203125, "fcm_dpo/q_t": 0.3989100456237793, "grad_norm": 25.34270477294922, "learning_rate": 3.9756855672522986e-07, "logits/chosen": -0.3994063138961792, "logits/rejected": -0.3902568817138672, "logps/chosen": -246.81979370117188, "logps/ref_chosen": -69.1339340209961, "logps/ref_rejected": -98.70252990722656, "logps/rejected": -419.3299255371094, "loss": 1.0651, "margin_dpo/margin_mean": 142.94154357910156, "margin_dpo/margin_std": 168.36923217773438, "step": 253 }, { "epoch": 0.37298091042584436, "fcm_dpo/beta": 0.0030065332539379597, "fcm_dpo/delta": 0.028634043410420418, "fcm_dpo/margin": 123.87564086914062, "fcm_dpo/q_t": 0.4176272749900818, "grad_norm": 23.688716888427734, "learning_rate": 3.965307091713037e-07, "logits/chosen": -0.396982342004776, "logits/rejected": -0.38199833035469055, "logps/chosen": -232.9241943359375, "logps/ref_chosen": -54.154998779296875, "logps/ref_rejected": -90.30764770507812, "logps/rejected": -392.9524841308594, "loss": 1.1322, "margin_dpo/margin_mean": 123.87564086914062, "margin_dpo/margin_std": 203.29437255859375, "step": 254 }, { "epoch": 0.3744493392070485, "fcm_dpo/beta": 0.003004954196512699, "fcm_dpo/delta": 0.002638857811689377, "fcm_dpo/margin": 132.18743896484375, "fcm_dpo/q_t": 0.4083111584186554, "grad_norm": 21.265838623046875, "learning_rate": 3.954890003969163e-07, "logits/chosen": -0.37806227803230286, "logits/rejected": -0.36832255125045776, "logps/chosen": -245.07669067382812, "logps/ref_chosen": -57.14167022705078, "logps/ref_rejected": -90.2085952758789, "logps/rejected": -410.3310546875, "loss": 1.1026, "margin_dpo/margin_mean": 132.18743896484375, "margin_dpo/margin_std": 184.99835205078125, "step": 255 }, { "epoch": 0.37591776798825255, "fcm_dpo/beta": 0.003001492004841566, "fcm_dpo/delta": -0.032948367297649384, "fcm_dpo/margin": 143.77066040039062, "fcm_dpo/q_t": 0.40088456869125366, "grad_norm": 21.909685134887695, "learning_rate": 3.944434578520628e-07, "logits/chosen": -0.3176652789115906, "logits/rejected": -0.3248659372329712, "logps/chosen": -223.44412231445312, "logps/ref_chosen": -55.163490295410156, "logps/ref_rejected": -92.56291961669922, "logps/rejected": -404.61419677734375, "loss": 1.07, "margin_dpo/margin_mean": 143.77066040039062, "margin_dpo/margin_std": 182.72714233398438, "step": 256 }, { "epoch": 0.37738619676945667, "fcm_dpo/beta": 0.0029555135406553745, "fcm_dpo/delta": -0.04563986137509346, "fcm_dpo/margin": 149.67494201660156, "fcm_dpo/q_t": 0.3995245099067688, "grad_norm": 19.213043212890625, "learning_rate": 3.933941090877615e-07, "logits/chosen": -0.33717894554138184, "logits/rejected": -0.32419469952583313, "logps/chosen": -221.68699645996094, "logps/ref_chosen": -49.42369842529297, "logps/ref_rejected": -79.53791809082031, "logps/rejected": -401.47613525390625, "loss": 1.0699, "margin_dpo/margin_mean": 149.67494201660156, "margin_dpo/margin_std": 189.41546630859375, "step": 257 }, { "epoch": 0.3788546255506608, "fcm_dpo/beta": 0.0029583657160401344, "fcm_dpo/delta": -0.03949305787682533, "fcm_dpo/margin": 147.97097778320312, "fcm_dpo/q_t": 0.3986639380455017, "grad_norm": 27.519527435302734, "learning_rate": 3.923409817553284e-07, "logits/chosen": -0.3358303904533386, "logits/rejected": -0.33348649740219116, "logps/chosen": -284.0785827636719, "logps/ref_chosen": -59.384124755859375, "logps/ref_rejected": -95.99010467529297, "logps/rejected": -468.655517578125, "loss": 1.0828, "margin_dpo/margin_mean": 147.97097778320312, "margin_dpo/margin_std": 206.81240844726562, "step": 258 }, { "epoch": 0.3803230543318649, "fcm_dpo/beta": 0.0029501118697226048, "fcm_dpo/delta": 0.03456338495016098, "fcm_dpo/margin": 124.2512435913086, "fcm_dpo/q_t": 0.41663050651550293, "grad_norm": 22.547470092773438, "learning_rate": 3.9128410360564793e-07, "logits/chosen": -0.37859004735946655, "logits/rejected": -0.37319231033325195, "logps/chosen": -262.14630126953125, "logps/ref_chosen": -52.828346252441406, "logps/ref_rejected": -89.191650390625, "logps/rejected": -422.7608642578125, "loss": 1.1244, "margin_dpo/margin_mean": 124.25125122070312, "margin_dpo/margin_std": 185.36854553222656, "step": 259 }, { "epoch": 0.38179148311306904, "fcm_dpo/beta": 0.0029368563555181026, "fcm_dpo/delta": -0.09315244853496552, "fcm_dpo/margin": 166.3863525390625, "fcm_dpo/q_t": 0.3896779716014862, "grad_norm": 29.52651596069336, "learning_rate": 3.9022350248844246e-07, "logits/chosen": -0.37252455949783325, "logits/rejected": -0.38852792978286743, "logps/chosen": -253.25216674804688, "logps/ref_chosen": -47.41767501831055, "logps/ref_rejected": -95.08978271484375, "logps/rejected": -467.31060791015625, "loss": 1.0226, "margin_dpo/margin_mean": 166.3863525390625, "margin_dpo/margin_std": 186.8936004638672, "step": 260 }, { "epoch": 0.3832599118942731, "fcm_dpo/beta": 0.0028830531518906355, "fcm_dpo/delta": -0.047782331705093384, "fcm_dpo/margin": 154.48388671875, "fcm_dpo/q_t": 0.39917880296707153, "grad_norm": 22.343847274780273, "learning_rate": 3.891592063515376e-07, "logits/chosen": -0.33259785175323486, "logits/rejected": -0.33178287744522095, "logps/chosen": -259.1055603027344, "logps/ref_chosen": -53.03137969970703, "logps/ref_rejected": -88.51494598388672, "logps/rejected": -449.072998046875, "loss": 1.0681, "margin_dpo/margin_mean": 154.48391723632812, "margin_dpo/margin_std": 203.65478515625, "step": 261 }, { "epoch": 0.38472834067547723, "fcm_dpo/beta": 0.0029039657674729824, "fcm_dpo/delta": 0.02600773237645626, "fcm_dpo/margin": 129.03204345703125, "fcm_dpo/q_t": 0.41260138154029846, "grad_norm": 23.152450561523438, "learning_rate": 3.880912432401264e-07, "logits/chosen": -0.33915072679519653, "logits/rejected": -0.3117906153202057, "logps/chosen": -296.1290283203125, "logps/ref_chosen": -59.620140075683594, "logps/ref_rejected": -86.41853332519531, "logps/rejected": -451.9594421386719, "loss": 1.0985, "margin_dpo/margin_mean": 129.03204345703125, "margin_dpo/margin_std": 162.029296875, "step": 262 }, { "epoch": 0.38619676945668135, "fcm_dpo/beta": 0.0028360923752188683, "fcm_dpo/delta": -0.1183972954750061, "fcm_dpo/margin": 180.35324096679688, "fcm_dpo/q_t": 0.3831443786621094, "grad_norm": 25.438756942749023, "learning_rate": 3.870196412960302e-07, "logits/chosen": -0.36076992750167847, "logits/rejected": -0.339572936296463, "logps/chosen": -272.611083984375, "logps/ref_chosen": -59.42094421386719, "logps/ref_rejected": -96.85720825195312, "logps/rejected": -490.400634765625, "loss": 1.0191, "margin_dpo/margin_mean": 180.35324096679688, "margin_dpo/margin_std": 207.73788452148438, "step": 263 }, { "epoch": 0.3876651982378855, "fcm_dpo/beta": 0.0027964303735643625, "fcm_dpo/delta": -0.035919900983572006, "fcm_dpo/margin": 154.90243530273438, "fcm_dpo/q_t": 0.4014216661453247, "grad_norm": 25.1867733001709, "learning_rate": 3.8594442875695665e-07, "logits/chosen": -0.37755894660949707, "logits/rejected": -0.3679601550102234, "logps/chosen": -286.45306396484375, "logps/ref_chosen": -62.722084045410156, "logps/ref_rejected": -93.85620880126953, "logps/rejected": -472.4896545410156, "loss": 1.0749, "margin_dpo/margin_mean": 154.90243530273438, "margin_dpo/margin_std": 199.20541381835938, "step": 264 }, { "epoch": 0.3891336270190896, "fcm_dpo/beta": 0.002805937547236681, "fcm_dpo/delta": -0.010240239091217518, "fcm_dpo/margin": 146.05377197265625, "fcm_dpo/q_t": 0.40965187549591064, "grad_norm": 26.549524307250977, "learning_rate": 3.848656339557562e-07, "logits/chosen": -0.3260389268398285, "logits/rejected": -0.3126361668109894, "logps/chosen": -311.82476806640625, "logps/ref_chosen": -61.971466064453125, "logps/ref_rejected": -88.02059936523438, "logps/rejected": -483.92767333984375, "loss": 1.1221, "margin_dpo/margin_mean": 146.05377197265625, "margin_dpo/margin_std": 243.52978515625, "step": 265 }, { "epoch": 0.39060205580029367, "fcm_dpo/beta": 0.002838346641510725, "fcm_dpo/delta": 0.07484833896160126, "fcm_dpo/margin": 115.4014892578125, "fcm_dpo/q_t": 0.425703227519989, "grad_norm": 57.709693908691406, "learning_rate": 3.8378328531967507e-07, "logits/chosen": -0.3606586456298828, "logits/rejected": -0.3276086449623108, "logps/chosen": -325.3542785644531, "logps/ref_chosen": -67.09967041015625, "logps/ref_rejected": -67.97122192382812, "logps/rejected": -441.6273498535156, "loss": 1.1584, "margin_dpo/margin_mean": 115.4014892578125, "margin_dpo/margin_std": 196.9221954345703, "step": 266 }, { "epoch": 0.3920704845814978, "fcm_dpo/beta": 0.0028311798814684153, "fcm_dpo/delta": -0.015104478225111961, "fcm_dpo/margin": 146.29811096191406, "fcm_dpo/q_t": 0.4068409502506256, "grad_norm": 31.23808479309082, "learning_rate": 3.8269741136960646e-07, "logits/chosen": -0.3521597981452942, "logits/rejected": -0.32346588373184204, "logps/chosen": -300.9726257324219, "logps/ref_chosen": -68.97075653076172, "logps/ref_rejected": -90.16844940185547, "logps/rejected": -468.4684143066406, "loss": 1.1043, "margin_dpo/margin_mean": 146.298095703125, "margin_dpo/margin_std": 222.26060485839844, "step": 267 }, { "epoch": 0.3935389133627019, "fcm_dpo/beta": 0.002821648493409157, "fcm_dpo/delta": -0.019557194784283638, "fcm_dpo/margin": 148.31320190429688, "fcm_dpo/q_t": 0.40599367022514343, "grad_norm": 28.47870445251465, "learning_rate": 3.8160804071933894e-07, "logits/chosen": -0.31350523233413696, "logits/rejected": -0.3184083104133606, "logps/chosen": -297.250732421875, "logps/ref_chosen": -55.90031051635742, "logps/ref_rejected": -101.64763641357422, "logps/rejected": -491.311279296875, "loss": 1.098, "margin_dpo/margin_mean": 148.3131866455078, "margin_dpo/margin_std": 220.14576721191406, "step": 268 }, { "epoch": 0.39500734214390604, "fcm_dpo/beta": 0.002787231467664242, "fcm_dpo/delta": -0.10659514367580414, "fcm_dpo/margin": 179.87583923339844, "fcm_dpo/q_t": 0.3871752619743347, "grad_norm": 26.379419326782227, "learning_rate": 3.8051520207480204e-07, "logits/chosen": -0.3805098831653595, "logits/rejected": -0.3652857542037964, "logps/chosen": -320.774658203125, "logps/ref_chosen": -70.03955841064453, "logps/ref_rejected": -107.34937286376953, "logps/rejected": -537.9603271484375, "loss": 1.0468, "margin_dpo/margin_mean": 179.8758544921875, "margin_dpo/margin_std": 237.33578491210938, "step": 269 }, { "epoch": 0.3964757709251101, "fcm_dpo/beta": 0.002790778409689665, "fcm_dpo/delta": 0.036502670496702194, "fcm_dpo/margin": 130.6387939453125, "fcm_dpo/q_t": 0.4163072109222412, "grad_norm": 28.952787399291992, "learning_rate": 3.794189242333106e-07, "logits/chosen": -0.3982747793197632, "logits/rejected": -0.3937312960624695, "logps/chosen": -285.7011413574219, "logps/ref_chosen": -69.53347778320312, "logps/ref_rejected": -109.92864990234375, "logps/rejected": -456.735107421875, "loss": 1.1297, "margin_dpo/margin_mean": 130.6387939453125, "margin_dpo/margin_std": 202.16256713867188, "step": 270 }, { "epoch": 0.39794419970631423, "fcm_dpo/beta": 0.0027565429918468, "fcm_dpo/delta": -0.058672454208135605, "fcm_dpo/margin": 165.3160858154297, "fcm_dpo/q_t": 0.39579975605010986, "grad_norm": 23.75882911682129, "learning_rate": 3.7831923608280514e-07, "logits/chosen": -0.3246617317199707, "logits/rejected": -0.3058730959892273, "logps/chosen": -254.30191040039062, "logps/ref_chosen": -56.76456832885742, "logps/ref_rejected": -92.51383972167969, "logps/rejected": -455.36724853515625, "loss": 1.0457, "margin_dpo/margin_mean": 165.3160858154297, "margin_dpo/margin_std": 191.5840606689453, "step": 271 }, { "epoch": 0.39941262848751835, "fcm_dpo/beta": 0.002703585661947727, "fcm_dpo/delta": -0.15685208141803741, "fcm_dpo/margin": 202.88958740234375, "fcm_dpo/q_t": 0.3729744553565979, "grad_norm": 36.98969268798828, "learning_rate": 3.772161666010912e-07, "logits/chosen": -0.2816220223903656, "logits/rejected": -0.29340463876724243, "logps/chosen": -241.36251831054688, "logps/ref_chosen": -49.497154235839844, "logps/ref_rejected": -105.54279327392578, "logps/rejected": -500.2977600097656, "loss": 0.9761, "margin_dpo/margin_mean": 202.88958740234375, "margin_dpo/margin_std": 193.3899688720703, "step": 272 }, { "epoch": 0.4008810572687225, "fcm_dpo/beta": 0.0026178741827607155, "fcm_dpo/delta": -0.11314442753791809, "fcm_dpo/margin": 193.62213134765625, "fcm_dpo/q_t": 0.3831535577774048, "grad_norm": 28.243759155273438, "learning_rate": 3.761097448550755e-07, "logits/chosen": -0.29644063115119934, "logits/rejected": -0.279215931892395, "logps/chosen": -290.4288330078125, "logps/ref_chosen": -62.97539520263672, "logps/ref_rejected": -92.49858093261719, "logps/rejected": -513.5741577148438, "loss": 1.0121, "margin_dpo/margin_mean": 193.62213134765625, "margin_dpo/margin_std": 209.56016540527344, "step": 273 }, { "epoch": 0.4023494860499266, "fcm_dpo/beta": 0.002625478897243738, "fcm_dpo/delta": 0.020765498280525208, "fcm_dpo/margin": 144.68017578125, "fcm_dpo/q_t": 0.4123363792896271, "grad_norm": 32.35233688354492, "learning_rate": 3.75e-07, "logits/chosen": -0.21601241827011108, "logits/rejected": -0.19805273413658142, "logps/chosen": -323.8441162109375, "logps/ref_chosen": -55.66770935058594, "logps/ref_rejected": -77.33308410644531, "logps/rejected": -490.18963623046875, "loss": 1.1032, "margin_dpo/margin_mean": 144.68017578125, "margin_dpo/margin_std": 193.43875122070312, "step": 274 }, { "epoch": 0.40381791483113066, "fcm_dpo/beta": 0.0026261399034410715, "fcm_dpo/delta": -0.00641494058072567, "fcm_dpo/margin": 154.6136016845703, "fcm_dpo/q_t": 0.4062380790710449, "grad_norm": 42.94489669799805, "learning_rate": 3.738869612786737e-07, "logits/chosen": -0.3160795271396637, "logits/rejected": -0.32304587960243225, "logps/chosen": -257.8700866699219, "logps/ref_chosen": -48.594703674316406, "logps/ref_rejected": -93.30369567871094, "logps/rejected": -457.1926574707031, "loss": 1.0864, "margin_dpo/margin_mean": 154.61358642578125, "margin_dpo/margin_std": 199.8903350830078, "step": 275 }, { "epoch": 0.4052863436123348, "fcm_dpo/beta": 0.0026143963914364576, "fcm_dpo/delta": 0.0011903084814548492, "fcm_dpo/margin": 152.5404815673828, "fcm_dpo/q_t": 0.40908658504486084, "grad_norm": 27.080881118774414, "learning_rate": 3.7277065802070204e-07, "logits/chosen": -0.2598373293876648, "logits/rejected": -0.23683007061481476, "logps/chosen": -282.9354248046875, "logps/ref_chosen": -56.57740783691406, "logps/ref_rejected": -70.36566925048828, "logps/rejected": -449.26416015625, "loss": 1.0986, "margin_dpo/margin_mean": 152.5404815673828, "margin_dpo/margin_std": 212.240966796875, "step": 276 }, { "epoch": 0.4067547723935389, "fcm_dpo/beta": 0.0026140885893255472, "fcm_dpo/delta": -0.024904295802116394, "fcm_dpo/margin": 162.13217163085938, "fcm_dpo/q_t": 0.40205782651901245, "grad_norm": 36.253150939941406, "learning_rate": 3.71651119641714e-07, "logits/chosen": -0.26820749044418335, "logits/rejected": -0.2528313994407654, "logps/chosen": -305.7073669433594, "logps/ref_chosen": -56.27156066894531, "logps/ref_rejected": -92.88127136230469, "logps/rejected": -504.4492492675781, "loss": 1.0753, "margin_dpo/margin_mean": 162.13217163085938, "margin_dpo/margin_std": 206.08795166015625, "step": 277 }, { "epoch": 0.40822320117474303, "fcm_dpo/beta": 0.0025698295794427395, "fcm_dpo/delta": -0.0788620337843895, "fcm_dpo/margin": 184.778076171875, "fcm_dpo/q_t": 0.39111045002937317, "grad_norm": 27.628742218017578, "learning_rate": 3.705283756425872e-07, "logits/chosen": -0.2766546607017517, "logits/rejected": -0.28136929869651794, "logps/chosen": -274.0843811035156, "logps/ref_chosen": -52.94194030761719, "logps/ref_rejected": -91.25357818603516, "logps/rejected": -497.1741027832031, "loss": 1.0325, "margin_dpo/margin_mean": 184.778076171875, "margin_dpo/margin_std": 208.0440673828125, "step": 278 }, { "epoch": 0.40969162995594716, "fcm_dpo/beta": 0.002526093740016222, "fcm_dpo/delta": -0.06635798513889313, "fcm_dpo/margin": 183.02890014648438, "fcm_dpo/q_t": 0.3952983319759369, "grad_norm": 28.49809455871582, "learning_rate": 3.6940245560867e-07, "logits/chosen": -0.2253936529159546, "logits/rejected": -0.22515779733657837, "logps/chosen": -302.7215576171875, "logps/ref_chosen": -48.641319274902344, "logps/ref_rejected": -87.8514404296875, "logps/rejected": -524.9605712890625, "loss": 1.0589, "margin_dpo/margin_mean": 183.02891540527344, "margin_dpo/margin_std": 234.94747924804688, "step": 279 }, { "epoch": 0.4111600587371512, "fcm_dpo/beta": 0.002501129638403654, "fcm_dpo/delta": -0.07484998553991318, "fcm_dpo/margin": 188.41152954101562, "fcm_dpo/q_t": 0.38939058780670166, "grad_norm": 25.34486961364746, "learning_rate": 3.6827338920900253e-07, "logits/chosen": -0.27106496691703796, "logits/rejected": -0.27247706055641174, "logps/chosen": -309.34259033203125, "logps/ref_chosen": -58.797122955322266, "logps/ref_rejected": -98.61885070800781, "logps/rejected": -537.5758056640625, "loss": 1.0243, "margin_dpo/margin_mean": 188.41152954101562, "margin_dpo/margin_std": 193.04116821289062, "step": 280 }, { "epoch": 0.41262848751835535, "fcm_dpo/beta": 0.00248980731703341, "fcm_dpo/delta": -0.030596543103456497, "fcm_dpo/margin": 172.38308715820312, "fcm_dpo/q_t": 0.3983362019062042, "grad_norm": 27.566728591918945, "learning_rate": 3.6714120619553435e-07, "logits/chosen": -0.3100356459617615, "logits/rejected": -0.28896278142929077, "logps/chosen": -283.4266662597656, "logps/ref_chosen": -55.488521575927734, "logps/ref_rejected": -80.88258361816406, "logps/rejected": -481.2038269042969, "loss": 1.0558, "margin_dpo/margin_mean": 172.38308715820312, "margin_dpo/margin_std": 188.12759399414062, "step": 281 }, { "epoch": 0.41409691629955947, "fcm_dpo/beta": 0.0025168233551084995, "fcm_dpo/delta": 0.07814561575651169, "fcm_dpo/margin": 128.67344665527344, "fcm_dpo/q_t": 0.42755842208862305, "grad_norm": 23.409597396850586, "learning_rate": 3.660059364023408e-07, "logits/chosen": -0.3624737560749054, "logits/rejected": -0.3413255214691162, "logps/chosen": -328.95660400390625, "logps/ref_chosen": -73.07014465332031, "logps/ref_rejected": -95.35098266601562, "logps/rejected": -479.910888671875, "loss": 1.1522, "margin_dpo/margin_mean": 128.6734619140625, "margin_dpo/margin_std": 211.4798583984375, "step": 282 }, { "epoch": 0.4155653450807636, "fcm_dpo/beta": 0.0024866703897714615, "fcm_dpo/delta": -0.10790442675352097, "fcm_dpo/margin": 202.11631774902344, "fcm_dpo/q_t": 0.3826720118522644, "grad_norm": 26.5286808013916, "learning_rate": 3.6486760974483685e-07, "logits/chosen": -0.317167729139328, "logits/rejected": -0.3204939663410187, "logps/chosen": -317.53839111328125, "logps/ref_chosen": -61.89844512939453, "logps/ref_rejected": -96.98655700683594, "logps/rejected": -554.7427978515625, "loss": 1.0059, "margin_dpo/margin_mean": 202.11631774902344, "margin_dpo/margin_std": 201.07449340820312, "step": 283 }, { "epoch": 0.4170337738619677, "fcm_dpo/beta": 0.0024385638535022736, "fcm_dpo/delta": -0.06003139913082123, "fcm_dpo/margin": 187.47438049316406, "fcm_dpo/q_t": 0.394646555185318, "grad_norm": 31.048564910888672, "learning_rate": 3.6372625621898863e-07, "logits/chosen": -0.3580009341239929, "logits/rejected": -0.344012975692749, "logps/chosen": -303.29498291015625, "logps/ref_chosen": -58.4355354309082, "logps/ref_rejected": -93.46926879882812, "logps/rejected": -525.8031005859375, "loss": 1.037, "margin_dpo/margin_mean": 187.474365234375, "margin_dpo/margin_std": 205.1959228515625, "step": 284 }, { "epoch": 0.4185022026431718, "fcm_dpo/beta": 0.002448021899908781, "fcm_dpo/delta": 0.003494247794151306, "fcm_dpo/margin": 161.81854248046875, "fcm_dpo/q_t": 0.40617606043815613, "grad_norm": 28.366336822509766, "learning_rate": 3.625819059005228e-07, "logits/chosen": -0.34775876998901367, "logits/rejected": -0.3358742296695709, "logps/chosen": -342.4769287109375, "logps/ref_chosen": -66.23219299316406, "logps/ref_rejected": -99.1268310546875, "logps/rejected": -537.1901245117188, "loss": 1.0806, "margin_dpo/margin_mean": 161.8185272216797, "margin_dpo/margin_std": 185.91748046875, "step": 285 }, { "epoch": 0.4199706314243759, "fcm_dpo/beta": 0.0024225222878158092, "fcm_dpo/delta": -0.04691235348582268, "fcm_dpo/margin": 183.6310577392578, "fcm_dpo/q_t": 0.3971766233444214, "grad_norm": 32.24759292602539, "learning_rate": 3.614345889441346e-07, "logits/chosen": -0.3304685950279236, "logits/rejected": -0.3126610517501831, "logps/chosen": -349.9842224121094, "logps/ref_chosen": -72.95100402832031, "logps/ref_rejected": -88.58845520019531, "logps/rejected": -549.2527465820312, "loss": 1.0587, "margin_dpo/margin_mean": 183.6310577392578, "margin_dpo/margin_std": 223.42913818359375, "step": 286 }, { "epoch": 0.42143906020558003, "fcm_dpo/beta": 0.0024321102537214756, "fcm_dpo/delta": 0.04561718553304672, "fcm_dpo/margin": 146.354736328125, "fcm_dpo/q_t": 0.4179733395576477, "grad_norm": 27.742694854736328, "learning_rate": 3.6028433558269275e-07, "logits/chosen": -0.3509722948074341, "logits/rejected": -0.3353240489959717, "logps/chosen": -324.606201171875, "logps/ref_chosen": -61.54115295410156, "logps/ref_rejected": -77.69607543945312, "logps/rejected": -487.1158447265625, "loss": 1.1162, "margin_dpo/margin_mean": 146.354736328125, "margin_dpo/margin_std": 199.26669311523438, "step": 287 }, { "epoch": 0.42290748898678415, "fcm_dpo/beta": 0.002393337432295084, "fcm_dpo/delta": -0.06459330767393112, "fcm_dpo/margin": 192.16741943359375, "fcm_dpo/q_t": 0.3921490013599396, "grad_norm": 27.812746047973633, "learning_rate": 3.5913117612644327e-07, "logits/chosen": -0.3211832344532013, "logits/rejected": -0.3044850826263428, "logps/chosen": -312.4546813964844, "logps/ref_chosen": -56.661224365234375, "logps/ref_rejected": -87.33570098876953, "logps/rejected": -535.296630859375, "loss": 1.0334, "margin_dpo/margin_mean": 192.1674346923828, "margin_dpo/margin_std": 192.72792053222656, "step": 288 }, { "epoch": 0.4243759177679883, "fcm_dpo/beta": 0.0023707286454737186, "fcm_dpo/delta": -0.11460113525390625, "fcm_dpo/margin": 214.66830444335938, "fcm_dpo/q_t": 0.38350093364715576, "grad_norm": 27.846128463745117, "learning_rate": 3.5797514096221024e-07, "logits/chosen": -0.308102011680603, "logits/rejected": -0.30951741337776184, "logps/chosen": -305.4954833984375, "logps/ref_chosen": -45.23039245605469, "logps/ref_rejected": -87.64266967773438, "logps/rejected": -562.5761108398438, "loss": 1.0098, "margin_dpo/margin_mean": 214.66830444335938, "margin_dpo/margin_std": 229.25790405273438, "step": 289 }, { "epoch": 0.42584434654919234, "fcm_dpo/beta": 0.002311383606866002, "fcm_dpo/delta": -0.10088926553726196, "fcm_dpo/margin": 214.4608917236328, "fcm_dpo/q_t": 0.38838887214660645, "grad_norm": 24.528976440429688, "learning_rate": 3.568162605525952e-07, "logits/chosen": -0.28302210569381714, "logits/rejected": -0.30255717039108276, "logps/chosen": -338.3675231933594, "logps/ref_chosen": -55.47149658203125, "logps/ref_rejected": -116.70857238769531, "logps/rejected": -614.0654296875, "loss": 1.0326, "margin_dpo/margin_mean": 214.46090698242188, "margin_dpo/margin_std": 262.3920593261719, "step": 290 }, { "epoch": 0.42731277533039647, "fcm_dpo/beta": 0.0022962733637541533, "fcm_dpo/delta": -0.051323793828487396, "fcm_dpo/margin": 195.45303344726562, "fcm_dpo/q_t": 0.3957386016845703, "grad_norm": 24.162464141845703, "learning_rate": 3.5565456543517485e-07, "logits/chosen": -0.30087271332740784, "logits/rejected": -0.2888765335083008, "logps/chosen": -307.3430480957031, "logps/ref_chosen": -63.26036834716797, "logps/ref_rejected": -89.29708862304688, "logps/rejected": -528.8328247070312, "loss": 1.0536, "margin_dpo/margin_mean": 195.45303344726562, "margin_dpo/margin_std": 227.96817016601562, "step": 291 }, { "epoch": 0.4287812041116006, "fcm_dpo/beta": 0.002262428868561983, "fcm_dpo/delta": -0.07314444333314896, "fcm_dpo/margin": 207.6175537109375, "fcm_dpo/q_t": 0.392647922039032, "grad_norm": 23.157028198242188, "learning_rate": 3.5449008622169583e-07, "logits/chosen": -0.3102809488773346, "logits/rejected": -0.29813438653945923, "logps/chosen": -320.7266540527344, "logps/ref_chosen": -53.91852951049805, "logps/ref_rejected": -89.96138000488281, "logps/rejected": -564.3870849609375, "loss": 1.056, "margin_dpo/margin_mean": 207.6175537109375, "margin_dpo/margin_std": 268.990234375, "step": 292 }, { "epoch": 0.4302496328928047, "fcm_dpo/beta": 0.002260031644254923, "fcm_dpo/delta": 0.028196241706609726, "fcm_dpo/margin": 164.92308044433594, "fcm_dpo/q_t": 0.41596880555152893, "grad_norm": 51.17985534667969, "learning_rate": 3.5332285359726846e-07, "logits/chosen": -0.3236939013004303, "logits/rejected": -0.3180049955844879, "logps/chosen": -321.3856201171875, "logps/ref_chosen": -60.376033782958984, "logps/ref_rejected": -77.85244750976562, "logps/rejected": -503.78509521484375, "loss": 1.1218, "margin_dpo/margin_mean": 164.92306518554688, "margin_dpo/margin_std": 251.44886779785156, "step": 293 }, { "epoch": 0.43171806167400884, "fcm_dpo/beta": 0.0022536704782396555, "fcm_dpo/delta": -0.0008471310138702393, "fcm_dpo/margin": 177.8052978515625, "fcm_dpo/q_t": 0.408719003200531, "grad_norm": 20.008682250976562, "learning_rate": 3.5215289831955786e-07, "logits/chosen": -0.3001745939254761, "logits/rejected": -0.3048727512359619, "logps/chosen": -286.2076721191406, "logps/ref_chosen": -48.0875358581543, "logps/ref_rejected": -81.89698791503906, "logps/rejected": -497.82244873046875, "loss": 1.0938, "margin_dpo/margin_mean": 177.8052978515625, "margin_dpo/margin_std": 241.23526000976562, "step": 294 }, { "epoch": 0.4331864904552129, "fcm_dpo/beta": 0.002245218027383089, "fcm_dpo/delta": -0.04526704549789429, "fcm_dpo/margin": 197.43405151367188, "fcm_dpo/q_t": 0.39877989888191223, "grad_norm": 25.436016082763672, "learning_rate": 3.509802512179737e-07, "logits/chosen": -0.3277565836906433, "logits/rejected": -0.3330543041229248, "logps/chosen": -333.48773193359375, "logps/ref_chosen": -49.92467498779297, "logps/ref_rejected": -87.45632934570312, "logps/rejected": -568.4534301757812, "loss": 1.0659, "margin_dpo/margin_mean": 197.43405151367188, "margin_dpo/margin_std": 250.5416717529297, "step": 295 }, { "epoch": 0.434654919236417, "fcm_dpo/beta": 0.002252609934657812, "fcm_dpo/delta": 0.08743564784526825, "fcm_dpo/margin": 139.83860778808594, "fcm_dpo/q_t": 0.42798036336898804, "grad_norm": 43.92038345336914, "learning_rate": 3.498049431928577e-07, "logits/chosen": -0.3475581109523773, "logits/rejected": -0.33524176478385925, "logps/chosen": -408.71044921875, "logps/ref_chosen": -65.49124145507812, "logps/ref_rejected": -93.08908081054688, "logps/rejected": -576.1469116210938, "loss": 1.1808, "margin_dpo/margin_mean": 139.83860778808594, "margin_dpo/margin_std": 259.12890625, "step": 296 }, { "epoch": 0.43612334801762115, "fcm_dpo/beta": 0.0022864262573421, "fcm_dpo/delta": 0.03677614405751228, "fcm_dpo/margin": 159.44927978515625, "fcm_dpo/q_t": 0.4151715040206909, "grad_norm": 40.28225326538086, "learning_rate": 3.486270052146694e-07, "logits/chosen": -0.3144751489162445, "logits/rejected": -0.322353720664978, "logps/chosen": -362.0162658691406, "logps/ref_chosen": -56.476951599121094, "logps/ref_rejected": -95.1385498046875, "logps/rejected": -560.1271362304688, "loss": 1.1061, "margin_dpo/margin_mean": 159.44927978515625, "margin_dpo/margin_std": 203.22805786132812, "step": 297 }, { "epoch": 0.43759177679882527, "fcm_dpo/beta": 0.0022769877687096596, "fcm_dpo/delta": -0.07902979105710983, "fcm_dpo/margin": 208.72238159179688, "fcm_dpo/q_t": 0.39558666944503784, "grad_norm": 26.517858505249023, "learning_rate": 3.474464683231698e-07, "logits/chosen": -0.3526347875595093, "logits/rejected": -0.36952221393585205, "logps/chosen": -382.0199890136719, "logps/ref_chosen": -67.32516479492188, "logps/ref_rejected": -116.66217041015625, "logps/rejected": -640.079345703125, "loss": 1.0697, "margin_dpo/margin_mean": 208.72238159179688, "margin_dpo/margin_std": 299.59674072265625, "step": 298 }, { "epoch": 0.4390602055800294, "fcm_dpo/beta": 0.0022661760449409485, "fcm_dpo/delta": -0.010751504451036453, "fcm_dpo/margin": 180.86856079101562, "fcm_dpo/q_t": 0.4070656895637512, "grad_norm": 38.609622955322266, "learning_rate": 3.462633636266041e-07, "logits/chosen": -0.35478705167770386, "logits/rejected": -0.36300161480903625, "logps/chosen": -315.2637939453125, "logps/ref_chosen": -48.96209716796875, "logps/ref_rejected": -84.32823944091797, "logps/rejected": -531.4984741210938, "loss": 1.0927, "margin_dpo/margin_mean": 180.86856079101562, "margin_dpo/margin_std": 247.35357666015625, "step": 299 }, { "epoch": 0.44052863436123346, "fcm_dpo/beta": 0.002229666104540229, "fcm_dpo/delta": -0.09078172594308853, "fcm_dpo/margin": 218.17410278320312, "fcm_dpo/q_t": 0.38910606503486633, "grad_norm": 40.36360549926758, "learning_rate": 3.4507772230088147e-07, "logits/chosen": -0.3490832448005676, "logits/rejected": -0.3574965000152588, "logps/chosen": -383.52117919921875, "logps/ref_chosen": -59.07371139526367, "logps/ref_rejected": -95.9664535522461, "logps/rejected": -638.5880126953125, "loss": 1.049, "margin_dpo/margin_mean": 218.17413330078125, "margin_dpo/margin_std": 278.7861633300781, "step": 300 }, { "epoch": 0.4419970631424376, "fcm_dpo/beta": 0.0021854317747056484, "fcm_dpo/delta": -0.06135018169879913, "fcm_dpo/margin": 209.61598205566406, "fcm_dpo/q_t": 0.3961915373802185, "grad_norm": 23.53122329711914, "learning_rate": 3.4388957558875316e-07, "logits/chosen": -0.3479039669036865, "logits/rejected": -0.3508484363555908, "logps/chosen": -332.4249267578125, "logps/ref_chosen": -57.249366760253906, "logps/ref_rejected": -92.35354614257812, "logps/rejected": -577.1451416015625, "loss": 1.0508, "margin_dpo/margin_mean": 209.615966796875, "margin_dpo/margin_std": 255.20401000976562, "step": 301 }, { "epoch": 0.4434654919236417, "fcm_dpo/beta": 0.0021836140658706427, "fcm_dpo/delta": 0.016016894951462746, "fcm_dpo/margin": 176.0408172607422, "fcm_dpo/q_t": 0.4110547602176666, "grad_norm": 26.080291748046875, "learning_rate": 3.426989547989902e-07, "logits/chosen": -0.3281886875629425, "logits/rejected": -0.3344939947128296, "logps/chosen": -289.31488037109375, "logps/ref_chosen": -51.197994232177734, "logps/ref_rejected": -97.22636413574219, "logps/rejected": -511.38409423828125, "loss": 1.0933, "margin_dpo/margin_mean": 176.04083251953125, "margin_dpo/margin_std": 219.8755645751953, "step": 302 }, { "epoch": 0.44493392070484583, "fcm_dpo/beta": 0.002201726660132408, "fcm_dpo/delta": 0.041237279772758484, "fcm_dpo/margin": 163.615966796875, "fcm_dpo/q_t": 0.4173469543457031, "grad_norm": 32.51033020019531, "learning_rate": 3.4150589130555773e-07, "logits/chosen": -0.3730643093585968, "logits/rejected": -0.3612968325614929, "logps/chosen": -314.7459716796875, "logps/ref_chosen": -66.71394348144531, "logps/ref_rejected": -86.94542694091797, "logps/rejected": -498.59344482421875, "loss": 1.1264, "margin_dpo/margin_mean": 163.615966796875, "margin_dpo/margin_std": 242.33187866210938, "step": 303 }, { "epoch": 0.44640234948604995, "fcm_dpo/beta": 0.0022171237505972385, "fcm_dpo/delta": -0.006592735648155212, "fcm_dpo/margin": 183.16299438476562, "fcm_dpo/q_t": 0.4029558300971985, "grad_norm": 34.26176071166992, "learning_rate": 3.403104165467883e-07, "logits/chosen": -0.40618133544921875, "logits/rejected": -0.398425817489624, "logps/chosen": -288.716796875, "logps/ref_chosen": -71.95069885253906, "logps/ref_rejected": -90.47203063964844, "logps/rejected": -490.401123046875, "loss": 1.0539, "margin_dpo/margin_mean": 183.16299438476562, "margin_dpo/margin_std": 163.05859375, "step": 304 }, { "epoch": 0.447870778267254, "fcm_dpo/beta": 0.0022104752715677023, "fcm_dpo/delta": 0.043227050453424454, "fcm_dpo/margin": 161.9324951171875, "fcm_dpo/q_t": 0.4192023277282715, "grad_norm": 26.293283462524414, "learning_rate": 3.391125620245535e-07, "logits/chosen": -0.3858333230018616, "logits/rejected": -0.37053465843200684, "logps/chosen": -311.5250244140625, "logps/ref_chosen": -66.79523468017578, "logps/ref_rejected": -92.75459289550781, "logps/rejected": -499.4169006347656, "loss": 1.1289, "margin_dpo/margin_mean": 161.93251037597656, "margin_dpo/margin_std": 241.1954345703125, "step": 305 }, { "epoch": 0.44933920704845814, "fcm_dpo/beta": 0.002244081348180771, "fcm_dpo/delta": 0.04034552350640297, "fcm_dpo/margin": 160.8773193359375, "fcm_dpo/q_t": 0.416267991065979, "grad_norm": 44.03425216674805, "learning_rate": 3.3791235930343417e-07, "logits/chosen": -0.3744924068450928, "logits/rejected": -0.35346078872680664, "logps/chosen": -308.999755859375, "logps/ref_chosen": -69.68389892578125, "logps/ref_rejected": -85.15919494628906, "logps/rejected": -485.35235595703125, "loss": 1.1064, "margin_dpo/margin_mean": 160.87730407714844, "margin_dpo/margin_std": 204.9425048828125, "step": 306 }, { "epoch": 0.45080763582966227, "fcm_dpo/beta": 0.0022494769655168056, "fcm_dpo/delta": 0.0235704705119133, "fcm_dpo/margin": 167.72592163085938, "fcm_dpo/q_t": 0.4124801456928253, "grad_norm": 28.95937728881836, "learning_rate": 3.367098400098881e-07, "logits/chosen": -0.3728075623512268, "logits/rejected": -0.3534259498119354, "logps/chosen": -297.20965576171875, "logps/ref_chosen": -70.16542053222656, "logps/ref_rejected": -86.97230529785156, "logps/rejected": -481.74249267578125, "loss": 1.1012, "margin_dpo/margin_mean": 167.72592163085938, "margin_dpo/margin_std": 218.9744415283203, "step": 307 }, { "epoch": 0.4522760646108664, "fcm_dpo/beta": 0.002247368451207876, "fcm_dpo/delta": -0.029369540512561798, "fcm_dpo/margin": 190.49864196777344, "fcm_dpo/q_t": 0.39926615357398987, "grad_norm": 34.80091857910156, "learning_rate": 3.355050358314172e-07, "logits/chosen": -0.347628116607666, "logits/rejected": -0.3359089493751526, "logps/chosen": -292.7979736328125, "logps/ref_chosen": -55.2449951171875, "logps/ref_rejected": -79.37226104736328, "logps/rejected": -507.42388916015625, "loss": 1.0523, "margin_dpo/margin_mean": 190.49862670898438, "margin_dpo/margin_std": 201.00457763671875, "step": 308 }, { "epoch": 0.45374449339207046, "fcm_dpo/beta": 0.002246787305921316, "fcm_dpo/delta": -0.021639183163642883, "fcm_dpo/margin": 187.0961151123047, "fcm_dpo/q_t": 0.40152931213378906, "grad_norm": 34.3463020324707, "learning_rate": 3.3429797851573183e-07, "logits/chosen": -0.336150586605072, "logits/rejected": -0.3299615979194641, "logps/chosen": -293.8416748046875, "logps/ref_chosen": -48.959083557128906, "logps/ref_rejected": -82.34072875976562, "logps/rejected": -514.3194580078125, "loss": 1.0797, "margin_dpo/margin_mean": 187.0961151123047, "margin_dpo/margin_std": 238.40676879882812, "step": 309 }, { "epoch": 0.4552129221732746, "fcm_dpo/beta": 0.002238691318780184, "fcm_dpo/delta": -0.007201097905635834, "fcm_dpo/margin": 181.66790771484375, "fcm_dpo/q_t": 0.404284805059433, "grad_norm": 27.16603660583496, "learning_rate": 3.3308869986991487e-07, "logits/chosen": -0.38059085607528687, "logits/rejected": -0.37059611082077026, "logps/chosen": -346.43719482421875, "logps/ref_chosen": -62.74177932739258, "logps/ref_rejected": -79.9302978515625, "logps/rejected": -545.2936401367188, "loss": 1.0678, "margin_dpo/margin_mean": 181.66787719726562, "margin_dpo/margin_std": 195.15277099609375, "step": 310 }, { "epoch": 0.4566813509544787, "fcm_dpo/beta": 0.0022144997492432594, "fcm_dpo/delta": -0.047252584248781204, "fcm_dpo/margin": 201.01556396484375, "fcm_dpo/q_t": 0.3994860351085663, "grad_norm": 31.022571563720703, "learning_rate": 3.3187723175958346e-07, "logits/chosen": -0.32762807607650757, "logits/rejected": -0.3017122149467468, "logps/chosen": -353.03253173828125, "logps/ref_chosen": -53.02798080444336, "logps/ref_rejected": -77.43820190429688, "logps/rejected": -578.4583129882812, "loss": 1.0672, "margin_dpo/margin_mean": 201.0155487060547, "margin_dpo/margin_std": 262.7830810546875, "step": 311 }, { "epoch": 0.4581497797356828, "fcm_dpo/beta": 0.0022153835743665695, "fcm_dpo/delta": -0.009008888155221939, "fcm_dpo/margin": 184.3203125, "fcm_dpo/q_t": 0.4087637960910797, "grad_norm": 25.93047332763672, "learning_rate": 3.306636061080487e-07, "logits/chosen": -0.2974693775177002, "logits/rejected": -0.28706854581832886, "logps/chosen": -339.06146240234375, "logps/ref_chosen": -49.39221954345703, "logps/ref_rejected": -75.79280853271484, "logps/rejected": -549.7823486328125, "loss": 1.0958, "margin_dpo/margin_mean": 184.32032775878906, "margin_dpo/margin_std": 263.0006103515625, "step": 312 }, { "epoch": 0.45961820851688695, "fcm_dpo/beta": 0.002215869491919875, "fcm_dpo/delta": -0.016454219818115234, "fcm_dpo/margin": 187.22032165527344, "fcm_dpo/q_t": 0.40516579151153564, "grad_norm": 31.79006004333496, "learning_rate": 3.2944785489547537e-07, "logits/chosen": -0.3782232999801636, "logits/rejected": -0.376730352640152, "logps/chosen": -320.020263671875, "logps/ref_chosen": -50.152740478515625, "logps/ref_rejected": -86.40620422363281, "logps/rejected": -543.4940185546875, "loss": 1.0904, "margin_dpo/margin_mean": 187.22030639648438, "margin_dpo/margin_std": 251.69192504882812, "step": 313 }, { "epoch": 0.461086637298091, "fcm_dpo/beta": 0.002191446255892515, "fcm_dpo/delta": 0.009936392307281494, "fcm_dpo/margin": 178.01272583007812, "fcm_dpo/q_t": 0.41321849822998047, "grad_norm": 22.70627784729004, "learning_rate": 3.2823001015803857e-07, "logits/chosen": -0.4097542464733124, "logits/rejected": -0.4117008149623871, "logps/chosen": -346.734375, "logps/ref_chosen": -57.237579345703125, "logps/ref_rejected": -97.5965347290039, "logps/rejected": -565.1060791015625, "loss": 1.1218, "margin_dpo/margin_mean": 178.0127410888672, "margin_dpo/margin_std": 280.118896484375, "step": 314 }, { "epoch": 0.46255506607929514, "fcm_dpo/beta": 0.0022113011218607426, "fcm_dpo/delta": 0.0364176481962204, "fcm_dpo/margin": 165.0262451171875, "fcm_dpo/q_t": 0.4158650040626526, "grad_norm": 23.05976104736328, "learning_rate": 3.270101039870797e-07, "logits/chosen": -0.3035910427570343, "logits/rejected": -0.3061618208885193, "logps/chosen": -304.7342834472656, "logps/ref_chosen": -49.06958770751953, "logps/ref_rejected": -85.68087768554688, "logps/rejected": -506.371826171875, "loss": 1.1088, "margin_dpo/margin_mean": 165.0262451171875, "margin_dpo/margin_std": 218.05349731445312, "step": 315 }, { "epoch": 0.46402349486049926, "fcm_dpo/beta": 0.0021732416935265064, "fcm_dpo/delta": -0.10777918994426727, "fcm_dpo/margin": 230.76441955566406, "fcm_dpo/q_t": 0.3855735957622528, "grad_norm": 25.089046478271484, "learning_rate": 3.2578816852826086e-07, "logits/chosen": -0.3702074885368347, "logits/rejected": -0.3739718794822693, "logps/chosen": -308.93255615234375, "logps/ref_chosen": -54.26074981689453, "logps/ref_rejected": -101.2814712524414, "logps/rejected": -586.7176513671875, "loss": 1.0133, "margin_dpo/margin_mean": 230.764404296875, "margin_dpo/margin_std": 248.62588500976562, "step": 316 }, { "epoch": 0.4654919236417034, "fcm_dpo/beta": 0.002131909830495715, "fcm_dpo/delta": -0.14962507784366608, "fcm_dpo/margin": 254.1263427734375, "fcm_dpo/q_t": 0.3746058940887451, "grad_norm": 29.67884635925293, "learning_rate": 3.2456423598071783e-07, "logits/chosen": -0.3865886330604553, "logits/rejected": -0.37612611055374146, "logps/chosen": -318.9429626464844, "logps/ref_chosen": -56.094207763671875, "logps/ref_rejected": -100.69905090332031, "logps/rejected": -617.6741943359375, "loss": 0.9798, "margin_dpo/margin_mean": 254.1263427734375, "margin_dpo/margin_std": 239.26747131347656, "step": 317 }, { "epoch": 0.4669603524229075, "fcm_dpo/beta": 0.0021128756925463676, "fcm_dpo/delta": -0.0003537740558385849, "fcm_dpo/margin": 189.40443420410156, "fcm_dpo/q_t": 0.4078769087791443, "grad_norm": 26.946083068847656, "learning_rate": 3.233383385962115e-07, "logits/chosen": -0.43728041648864746, "logits/rejected": -0.4106597304344177, "logps/chosen": -352.3509521484375, "logps/ref_chosen": -64.64569854736328, "logps/ref_rejected": -82.76425170898438, "logps/rejected": -559.8739624023438, "loss": 1.0848, "margin_dpo/margin_mean": 189.40443420410156, "margin_dpo/margin_std": 238.53329467773438, "step": 318 }, { "epoch": 0.4684287812041116, "fcm_dpo/beta": 0.002084306674078107, "fcm_dpo/delta": -0.07317540049552917, "fcm_dpo/margin": 225.37155151367188, "fcm_dpo/q_t": 0.3912753760814667, "grad_norm": 24.27452278137207, "learning_rate": 3.2211050867827805e-07, "logits/chosen": -0.37028658390045166, "logits/rejected": -0.3828258514404297, "logps/chosen": -309.1868591308594, "logps/ref_chosen": -49.383758544921875, "logps/ref_rejected": -113.90650939941406, "logps/rejected": -599.0811767578125, "loss": 1.0362, "margin_dpo/margin_mean": 225.37155151367188, "margin_dpo/margin_std": 256.1711120605469, "step": 319 }, { "epoch": 0.4698972099853157, "fcm_dpo/beta": 0.002035951940342784, "fcm_dpo/delta": -0.11059105396270752, "fcm_dpo/margin": 247.78436279296875, "fcm_dpo/q_t": 0.3837531805038452, "grad_norm": 31.604127883911133, "learning_rate": 3.208807785813777e-07, "logits/chosen": -0.3997849225997925, "logits/rejected": -0.4061777591705322, "logps/chosen": -327.56396484375, "logps/ref_chosen": -59.50489044189453, "logps/ref_rejected": -97.66717529296875, "logps/rejected": -613.5106201171875, "loss": 1.0089, "margin_dpo/margin_mean": 247.7843780517578, "margin_dpo/margin_std": 260.9141845703125, "step": 320 }, { "epoch": 0.4713656387665198, "fcm_dpo/beta": 0.002011922188103199, "fcm_dpo/delta": -0.02466902881860733, "fcm_dpo/margin": 210.1709442138672, "fcm_dpo/q_t": 0.4029350280761719, "grad_norm": 25.207433700561523, "learning_rate": 3.1964918071004217e-07, "logits/chosen": -0.3274344205856323, "logits/rejected": -0.3175322413444519, "logps/chosen": -384.12017822265625, "logps/ref_chosen": -61.548683166503906, "logps/ref_rejected": -91.64103698730469, "logps/rejected": -624.3834838867188, "loss": 1.0747, "margin_dpo/margin_mean": 210.17095947265625, "margin_dpo/margin_std": 260.92510986328125, "step": 321 }, { "epoch": 0.47283406754772395, "fcm_dpo/beta": 0.0019989702850580215, "fcm_dpo/delta": -0.05126545578241348, "fcm_dpo/margin": 224.41220092773438, "fcm_dpo/q_t": 0.3951689302921295, "grad_norm": 28.6334285736084, "learning_rate": 3.184157475180207e-07, "logits/chosen": -0.33229005336761475, "logits/rejected": -0.3307497799396515, "logps/chosen": -346.727294921875, "logps/ref_chosen": -57.29003143310547, "logps/ref_rejected": -95.74992370605469, "logps/rejected": -609.599365234375, "loss": 1.0416, "margin_dpo/margin_mean": 224.41220092773438, "margin_dpo/margin_std": 238.20156860351562, "step": 322 }, { "epoch": 0.47430249632892807, "fcm_dpo/beta": 0.002003198955208063, "fcm_dpo/delta": 0.020026560872793198, "fcm_dpo/margin": 190.05551147460938, "fcm_dpo/q_t": 0.41150009632110596, "grad_norm": 39.6695671081543, "learning_rate": 3.171805115074251e-07, "logits/chosen": -0.3865179419517517, "logits/rejected": -0.3845609426498413, "logps/chosen": -364.77532958984375, "logps/ref_chosen": -51.23395919799805, "logps/ref_rejected": -75.06192016601562, "logps/rejected": -578.6588134765625, "loss": 1.1034, "margin_dpo/margin_mean": 190.05551147460938, "margin_dpo/margin_std": 250.55490112304688, "step": 323 }, { "epoch": 0.47577092511013214, "fcm_dpo/beta": 0.0020291549153625965, "fcm_dpo/delta": 0.020745858550071716, "fcm_dpo/margin": 186.71697998046875, "fcm_dpo/q_t": 0.41493675112724304, "grad_norm": 45.34550857543945, "learning_rate": 3.1594350522787295e-07, "logits/chosen": -0.3825080394744873, "logits/rejected": -0.36702072620391846, "logps/chosen": -429.89703369140625, "logps/ref_chosen": -65.13516998291016, "logps/ref_rejected": -86.47750854492188, "logps/rejected": -637.9563598632812, "loss": 1.1259, "margin_dpo/margin_mean": 186.71697998046875, "margin_dpo/margin_std": 281.0679626464844, "step": 324 }, { "epoch": 0.47723935389133626, "fcm_dpo/beta": 0.0020416593179106712, "fcm_dpo/delta": 0.07895328104496002, "fcm_dpo/margin": 158.47682189941406, "fcm_dpo/q_t": 0.4251885414123535, "grad_norm": 28.742250442504883, "learning_rate": 3.147047612756302e-07, "logits/chosen": -0.4396970570087433, "logits/rejected": -0.4218035936355591, "logps/chosen": -342.92401123046875, "logps/ref_chosen": -56.215599060058594, "logps/ref_rejected": -70.08592987060547, "logps/rejected": -515.2711181640625, "loss": 1.1448, "margin_dpo/margin_mean": 158.47682189941406, "margin_dpo/margin_std": 230.89166259765625, "step": 325 }, { "epoch": 0.4787077826725404, "fcm_dpo/beta": 0.0020674504339694977, "fcm_dpo/delta": 0.07334257662296295, "fcm_dpo/margin": 159.19859313964844, "fcm_dpo/q_t": 0.42226988077163696, "grad_norm": 46.8277473449707, "learning_rate": 3.134643122927519e-07, "logits/chosen": -0.446100115776062, "logits/rejected": -0.4265139698982239, "logps/chosen": -398.791748046875, "logps/ref_chosen": -72.72496032714844, "logps/ref_rejected": -79.8467788696289, "logps/rejected": -565.1121826171875, "loss": 1.1294, "margin_dpo/margin_mean": 159.19859313964844, "margin_dpo/margin_std": 203.34844970703125, "step": 326 }, { "epoch": 0.4801762114537445, "fcm_dpo/beta": 0.002041102387011051, "fcm_dpo/delta": -0.1034296378493309, "fcm_dpo/margin": 243.88858032226562, "fcm_dpo/q_t": 0.38400399684906006, "grad_norm": 32.756866455078125, "learning_rate": 3.1222219096622264e-07, "logits/chosen": -0.40883296728134155, "logits/rejected": -0.39531680941581726, "logps/chosen": -362.8526611328125, "logps/ref_chosen": -69.13441467285156, "logps/ref_rejected": -111.93377685546875, "logps/rejected": -649.5405883789062, "loss": 1.0146, "margin_dpo/margin_mean": 243.88858032226562, "margin_dpo/margin_std": 258.3064880371094, "step": 327 }, { "epoch": 0.48164464023494863, "fcm_dpo/beta": 0.0020253488328307867, "fcm_dpo/delta": -0.04178054630756378, "fcm_dpo/margin": 217.22732543945312, "fcm_dpo/q_t": 0.3996826410293579, "grad_norm": 28.734113693237305, "learning_rate": 3.1097843002709427e-07, "logits/chosen": -0.4222732186317444, "logits/rejected": -0.42595934867858887, "logps/chosen": -361.3597412109375, "logps/ref_chosen": -59.68719482421875, "logps/ref_rejected": -90.85499572753906, "logps/rejected": -609.7548828125, "loss": 1.0643, "margin_dpo/margin_mean": 217.22732543945312, "margin_dpo/margin_std": 274.53643798828125, "step": 328 }, { "epoch": 0.4831130690161527, "fcm_dpo/beta": 0.001993193756788969, "fcm_dpo/delta": -0.050994060933589935, "fcm_dpo/margin": 224.6842041015625, "fcm_dpo/q_t": 0.39687401056289673, "grad_norm": 31.805065155029297, "learning_rate": 3.0973306224962437e-07, "logits/chosen": -0.40810489654541016, "logits/rejected": -0.4009937047958374, "logps/chosen": -404.92718505859375, "logps/ref_chosen": -65.2461929321289, "logps/ref_rejected": -100.69770812988281, "logps/rejected": -665.0628662109375, "loss": 1.0694, "margin_dpo/margin_mean": 224.6842041015625, "margin_dpo/margin_std": 286.8136291503906, "step": 329 }, { "epoch": 0.4845814977973568, "fcm_dpo/beta": 0.0019774779211729765, "fcm_dpo/delta": -0.07221639156341553, "fcm_dpo/margin": 237.08126831054688, "fcm_dpo/q_t": 0.3920379877090454, "grad_norm": 28.682498931884766, "learning_rate": 3.084861204504122e-07, "logits/chosen": -0.3676164746284485, "logits/rejected": -0.36775562167167664, "logps/chosen": -336.18048095703125, "logps/ref_chosen": -46.998348236083984, "logps/ref_rejected": -86.87684631347656, "logps/rejected": -613.1402587890625, "loss": 1.0379, "margin_dpo/margin_mean": 237.08126831054688, "margin_dpo/margin_std": 271.5751037597656, "step": 330 }, { "epoch": 0.48604992657856094, "fcm_dpo/beta": 0.001954648643732071, "fcm_dpo/delta": -0.050193920731544495, "fcm_dpo/margin": 229.15296936035156, "fcm_dpo/q_t": 0.39381328225135803, "grad_norm": 30.48107147216797, "learning_rate": 3.072376374875335e-07, "logits/chosen": -0.43283581733703613, "logits/rejected": -0.43236637115478516, "logps/chosen": -361.46075439453125, "logps/ref_chosen": -50.52424621582031, "logps/ref_rejected": -89.01544189453125, "logps/rejected": -629.1049194335938, "loss": 1.0226, "margin_dpo/margin_mean": 229.15298461914062, "margin_dpo/margin_std": 199.23004150390625, "step": 331 }, { "epoch": 0.48751835535976507, "fcm_dpo/beta": 0.0019774874672293663, "fcm_dpo/delta": 0.08132193237543106, "fcm_dpo/margin": 162.39129638671875, "fcm_dpo/q_t": 0.42597857117652893, "grad_norm": 23.958471298217773, "learning_rate": 3.059876462596758e-07, "logits/chosen": -0.4306356608867645, "logits/rejected": -0.41308534145355225, "logps/chosen": -359.36370849609375, "logps/ref_chosen": -49.18028259277344, "logps/ref_rejected": -76.48515319824219, "logps/rejected": -549.0598754882812, "loss": 1.1409, "margin_dpo/margin_mean": 162.39129638671875, "margin_dpo/margin_std": 234.40750122070312, "step": 332 }, { "epoch": 0.4889867841409692, "fcm_dpo/beta": 0.0019611469469964504, "fcm_dpo/delta": -0.03113037347793579, "fcm_dpo/margin": 218.6634521484375, "fcm_dpo/q_t": 0.4022316336631775, "grad_norm": 24.696426391601562, "learning_rate": 3.0473617970527015e-07, "logits/chosen": -0.4549393653869629, "logits/rejected": -0.4510509967803955, "logps/chosen": -389.515869140625, "logps/ref_chosen": -63.75574493408203, "logps/ref_rejected": -95.04411315917969, "logps/rejected": -639.4676513671875, "loss": 1.0836, "margin_dpo/margin_mean": 218.6634521484375, "margin_dpo/margin_std": 293.59735107421875, "step": 333 }, { "epoch": 0.49045521292217326, "fcm_dpo/beta": 0.001973837148398161, "fcm_dpo/delta": 0.018399503082036972, "fcm_dpo/margin": 193.68478393554688, "fcm_dpo/q_t": 0.4129871726036072, "grad_norm": 24.81572151184082, "learning_rate": 3.034832708016243e-07, "logits/chosen": -0.42541375756263733, "logits/rejected": -0.4231783449649811, "logps/chosen": -383.4892272949219, "logps/ref_chosen": -66.97975158691406, "logps/ref_rejected": -95.31692504882812, "logps/rejected": -605.51123046875, "loss": 1.1228, "margin_dpo/margin_mean": 193.68478393554688, "margin_dpo/margin_std": 298.8642272949219, "step": 334 }, { "epoch": 0.4919236417033774, "fcm_dpo/beta": 0.00201216503046453, "fcm_dpo/delta": 0.08145836740732193, "fcm_dpo/margin": 159.1428680419922, "fcm_dpo/q_t": 0.42715874314308167, "grad_norm": 32.46516036987305, "learning_rate": 3.022289525640531e-07, "logits/chosen": -0.46390390396118164, "logits/rejected": -0.44169843196868896, "logps/chosen": -399.4327697753906, "logps/ref_chosen": -62.54248046875, "logps/ref_rejected": -87.61770629882812, "logps/rejected": -583.65087890625, "loss": 1.1609, "margin_dpo/margin_mean": 159.14288330078125, "margin_dpo/margin_std": 264.8077087402344, "step": 335 }, { "epoch": 0.4933920704845815, "fcm_dpo/beta": 0.001989907817915082, "fcm_dpo/delta": -0.08648539334535599, "fcm_dpo/margin": 242.4196319580078, "fcm_dpo/q_t": 0.39155828952789307, "grad_norm": 30.810070037841797, "learning_rate": 3.009732580450086e-07, "logits/chosen": -0.4227680563926697, "logits/rejected": -0.42365506291389465, "logps/chosen": -381.827392578125, "logps/ref_chosen": -54.53115463256836, "logps/ref_rejected": -104.40424346923828, "logps/rejected": -674.1201171875, "loss": 1.0553, "margin_dpo/margin_mean": 242.41964721679688, "margin_dpo/margin_std": 320.0055236816406, "step": 336 }, { "epoch": 0.4948604992657856, "fcm_dpo/beta": 0.001954274019226432, "fcm_dpo/delta": -0.06099873036146164, "fcm_dpo/margin": 234.3765411376953, "fcm_dpo/q_t": 0.39415156841278076, "grad_norm": 37.6779899597168, "learning_rate": 2.9971622033320914e-07, "logits/chosen": -0.4804219603538513, "logits/rejected": -0.47055840492248535, "logps/chosen": -348.1040344238281, "logps/ref_chosen": -65.12869262695312, "logps/ref_rejected": -101.72701263427734, "logps/rejected": -619.078857421875, "loss": 1.0399, "margin_dpo/margin_mean": 234.37652587890625, "margin_dpo/margin_std": 261.6142578125, "step": 337 }, { "epoch": 0.49632892804698975, "fcm_dpo/beta": 0.0019397891592234373, "fcm_dpo/delta": -0.03308578580617905, "fcm_dpo/margin": 222.4942626953125, "fcm_dpo/q_t": 0.3983496129512787, "grad_norm": 51.19593048095703, "learning_rate": 2.984578725527675e-07, "logits/chosen": -0.4641989767551422, "logits/rejected": -0.45939457416534424, "logps/chosen": -313.06396484375, "logps/ref_chosen": -58.422706604003906, "logps/ref_rejected": -89.06854248046875, "logps/rejected": -566.2040405273438, "loss": 1.0441, "margin_dpo/margin_mean": 222.4942626953125, "margin_dpo/margin_std": 221.66220092773438, "step": 338 }, { "epoch": 0.4977973568281938, "fcm_dpo/beta": 0.001947054173797369, "fcm_dpo/delta": -0.027510955929756165, "fcm_dpo/margin": 218.38970947265625, "fcm_dpo/q_t": 0.400276780128479, "grad_norm": 36.372657775878906, "learning_rate": 2.9719824786231796e-07, "logits/chosen": -0.505604088306427, "logits/rejected": -0.492832750082016, "logps/chosen": -339.7549743652344, "logps/ref_chosen": -59.99531555175781, "logps/ref_rejected": -103.9109115600586, "logps/rejected": -602.060302734375, "loss": 1.0564, "margin_dpo/margin_mean": 218.38970947265625, "margin_dpo/margin_std": 221.18817138671875, "step": 339 }, { "epoch": 0.49926578560939794, "fcm_dpo/beta": 0.0019270360935479403, "fcm_dpo/delta": 0.019969457760453224, "fcm_dpo/margin": 197.50909423828125, "fcm_dpo/q_t": 0.41230642795562744, "grad_norm": 23.526317596435547, "learning_rate": 2.959373794541426e-07, "logits/chosen": -0.4352113902568817, "logits/rejected": -0.41571658849716187, "logps/chosen": -356.28045654296875, "logps/ref_chosen": -52.83022689819336, "logps/ref_rejected": -73.10723114013672, "logps/rejected": -574.0665283203125, "loss": 1.1044, "margin_dpo/margin_mean": 197.50909423828125, "margin_dpo/margin_std": 266.762451171875, "step": 340 }, { "epoch": 0.5007342143906021, "fcm_dpo/beta": 0.0019175230991095304, "fcm_dpo/delta": -0.03825069218873978, "fcm_dpo/margin": 227.45579528808594, "fcm_dpo/q_t": 0.39912909269332886, "grad_norm": 26.079498291015625, "learning_rate": 2.946753005532965e-07, "logits/chosen": -0.403905987739563, "logits/rejected": -0.4048531651496887, "logps/chosen": -344.41949462890625, "logps/ref_chosen": -47.899803161621094, "logps/ref_rejected": -101.80987548828125, "logps/rejected": -625.7853393554688, "loss": 1.0547, "margin_dpo/margin_mean": 227.45578002929688, "margin_dpo/margin_std": 254.0438232421875, "step": 341 }, { "epoch": 0.5022026431718062, "fcm_dpo/beta": 0.0019315474200993776, "fcm_dpo/delta": 0.012259891256690025, "fcm_dpo/margin": 200.836181640625, "fcm_dpo/q_t": 0.4102938175201416, "grad_norm": 26.406532287597656, "learning_rate": 2.934120444167326e-07, "logits/chosen": -0.413399875164032, "logits/rejected": -0.3872986137866974, "logps/chosen": -373.72735595703125, "logps/ref_chosen": -71.99664306640625, "logps/ref_rejected": -92.58959197998047, "logps/rejected": -595.156494140625, "loss": 1.0985, "margin_dpo/margin_mean": 200.836181640625, "margin_dpo/margin_std": 267.432861328125, "step": 342 }, { "epoch": 0.5036710719530103, "fcm_dpo/beta": 0.001907234895043075, "fcm_dpo/delta": -0.06225571036338806, "fcm_dpo/margin": 240.85353088378906, "fcm_dpo/q_t": 0.3906010389328003, "grad_norm": 25.300434112548828, "learning_rate": 2.9214764433242476e-07, "logits/chosen": -0.45764094591140747, "logits/rejected": -0.46164631843566895, "logps/chosen": -339.2222595214844, "logps/ref_chosen": -54.405616760253906, "logps/ref_rejected": -111.04142761230469, "logps/rejected": -636.7116088867188, "loss": 1.0151, "margin_dpo/margin_mean": 240.85354614257812, "margin_dpo/margin_std": 200.33447265625, "step": 343 }, { "epoch": 0.5051395007342144, "fcm_dpo/beta": 0.001916981302201748, "fcm_dpo/delta": -0.00981883704662323, "fcm_dpo/margin": 212.7474365234375, "fcm_dpo/q_t": 0.4074634909629822, "grad_norm": 42.644657135009766, "learning_rate": 2.9088213361849126e-07, "logits/chosen": -0.4130118489265442, "logits/rejected": -0.41583961248397827, "logps/chosen": -340.350830078125, "logps/ref_chosen": -53.96466827392578, "logps/ref_rejected": -90.62336730957031, "logps/rejected": -589.7569580078125, "loss": 1.082, "margin_dpo/margin_mean": 212.74746704101562, "margin_dpo/margin_std": 255.60020446777344, "step": 344 }, { "epoch": 0.5066079295154186, "fcm_dpo/beta": 0.001887032762169838, "fcm_dpo/delta": -0.05342705175280571, "fcm_dpo/margin": 239.02159118652344, "fcm_dpo/q_t": 0.395052433013916, "grad_norm": 27.184518814086914, "learning_rate": 2.896155456223163e-07, "logits/chosen": -0.446855753660202, "logits/rejected": -0.44713422656059265, "logps/chosen": -399.9010009765625, "logps/ref_chosen": -61.685699462890625, "logps/ref_rejected": -99.49041748046875, "logps/rejected": -676.727294921875, "loss": 1.0441, "margin_dpo/margin_mean": 239.02159118652344, "margin_dpo/margin_std": 266.046630859375, "step": 345 }, { "epoch": 0.5080763582966226, "fcm_dpo/beta": 0.001873625093139708, "fcm_dpo/delta": -0.01170763373374939, "fcm_dpo/margin": 219.47308349609375, "fcm_dpo/q_t": 0.4032544493675232, "grad_norm": 26.40635871887207, "learning_rate": 2.883479137196714e-07, "logits/chosen": -0.4138724207878113, "logits/rejected": -0.4036678373813629, "logps/chosen": -398.43524169921875, "logps/ref_chosen": -55.256263732910156, "logps/ref_rejected": -77.41532135009766, "logps/rejected": -640.0673828125, "loss": 1.0715, "margin_dpo/margin_mean": 219.4730987548828, "margin_dpo/margin_std": 256.69036865234375, "step": 346 }, { "epoch": 0.5095447870778267, "fcm_dpo/beta": 0.0018691536970436573, "fcm_dpo/delta": -0.01727224886417389, "fcm_dpo/margin": 222.85726928710938, "fcm_dpo/q_t": 0.4037840962409973, "grad_norm": 30.47779083251953, "learning_rate": 2.8707927131383614e-07, "logits/chosen": -0.39017215371131897, "logits/rejected": -0.384768545627594, "logps/chosen": -396.3253173828125, "logps/ref_chosen": -57.56623840332031, "logps/ref_rejected": -92.35509490966797, "logps/rejected": -653.971435546875, "loss": 1.0757, "margin_dpo/margin_mean": 222.85726928710938, "margin_dpo/margin_std": 278.2526550292969, "step": 347 }, { "epoch": 0.5110132158590308, "fcm_dpo/beta": 0.0018709124997258186, "fcm_dpo/delta": 0.05157490074634552, "fcm_dpo/margin": 186.99984741210938, "fcm_dpo/q_t": 0.4193815588951111, "grad_norm": 26.73240089416504, "learning_rate": 2.858096518347179e-07, "logits/chosen": -0.4690071940422058, "logits/rejected": -0.47212427854537964, "logps/chosen": -365.4136657714844, "logps/ref_chosen": -56.31770324707031, "logps/ref_rejected": -89.13836669921875, "logps/rejected": -585.2341918945312, "loss": 1.122, "margin_dpo/margin_mean": 186.99984741210938, "margin_dpo/margin_std": 251.55563354492188, "step": 348 }, { "epoch": 0.5124816446402349, "fcm_dpo/beta": 0.0018943310715258121, "fcm_dpo/delta": -1.317635178565979e-05, "fcm_dpo/margin": 210.99227905273438, "fcm_dpo/q_t": 0.40966325998306274, "grad_norm": 21.820791244506836, "learning_rate": 2.845390887379706e-07, "logits/chosen": -0.4289626181125641, "logits/rejected": -0.4302072525024414, "logps/chosen": -338.00421142578125, "logps/ref_chosen": -58.025516510009766, "logps/ref_rejected": -97.50515747070312, "logps/rejected": -588.4761352539062, "loss": 1.0992, "margin_dpo/margin_mean": 210.99227905273438, "margin_dpo/margin_std": 296.27386474609375, "step": 349 }, { "epoch": 0.5139500734214391, "fcm_dpo/beta": 0.0018844606820493937, "fcm_dpo/delta": 0.020040031522512436, "fcm_dpo/margin": 201.89549255371094, "fcm_dpo/q_t": 0.41158032417297363, "grad_norm": 36.85105895996094, "learning_rate": 2.8326761550411346e-07, "logits/chosen": -0.40707576274871826, "logits/rejected": -0.40998172760009766, "logps/chosen": -374.5255126953125, "logps/ref_chosen": -64.33049011230469, "logps/ref_rejected": -89.87164306640625, "logps/rejected": -601.962158203125, "loss": 1.1092, "margin_dpo/margin_mean": 201.89547729492188, "margin_dpo/margin_std": 281.0362243652344, "step": 350 }, { "epoch": 0.5154185022026432, "fcm_dpo/beta": 0.0018699737265706062, "fcm_dpo/delta": -0.06955541670322418, "fcm_dpo/margin": 249.21286010742188, "fcm_dpo/q_t": 0.3945404291152954, "grad_norm": 31.279190063476562, "learning_rate": 2.819952656376487e-07, "logits/chosen": -0.4381694197654724, "logits/rejected": -0.4381583034992218, "logps/chosen": -339.1927795410156, "logps/ref_chosen": -60.6721305847168, "logps/ref_rejected": -101.5654296875, "logps/rejected": -629.2989501953125, "loss": 1.0495, "margin_dpo/margin_mean": 249.212890625, "margin_dpo/margin_std": 307.7030029296875, "step": 351 }, { "epoch": 0.5168869309838473, "fcm_dpo/beta": 0.0018936812411993742, "fcm_dpo/delta": 0.1119888573884964, "fcm_dpo/margin": 153.8917999267578, "fcm_dpo/q_t": 0.4330083727836609, "grad_norm": 41.63142013549805, "learning_rate": 2.8072207266617854e-07, "logits/chosen": -0.43417030572891235, "logits/rejected": -0.40401673316955566, "logps/chosen": -405.0966491699219, "logps/ref_chosen": -70.9434585571289, "logps/ref_rejected": -76.6419677734375, "logps/rejected": -564.68701171875, "loss": 1.1811, "margin_dpo/margin_mean": 153.8917999267578, "margin_dpo/margin_std": 272.4174499511719, "step": 352 }, { "epoch": 0.5183553597650514, "fcm_dpo/beta": 0.0018986646318808198, "fcm_dpo/delta": -0.004761148244142532, "fcm_dpo/margin": 212.76190185546875, "fcm_dpo/q_t": 0.4080585241317749, "grad_norm": 23.361574172973633, "learning_rate": 2.794480701395219e-07, "logits/chosen": -0.4715597331523895, "logits/rejected": -0.459200382232666, "logps/chosen": -371.7664489746094, "logps/ref_chosen": -58.39533996582031, "logps/ref_rejected": -80.33553314208984, "logps/rejected": -606.468505859375, "loss": 1.094, "margin_dpo/margin_mean": 212.76190185546875, "margin_dpo/margin_std": 286.2218017578125, "step": 353 }, { "epoch": 0.5198237885462555, "fcm_dpo/beta": 0.0018948422512039542, "fcm_dpo/delta": -0.03894488885998726, "fcm_dpo/margin": 230.70608520507812, "fcm_dpo/q_t": 0.3976895213127136, "grad_norm": 43.41948699951172, "learning_rate": 2.781732916288303e-07, "logits/chosen": -0.44769683480262756, "logits/rejected": -0.43864506483078003, "logps/chosen": -324.921875, "logps/ref_chosen": -59.80299377441406, "logps/ref_rejected": -88.75750732421875, "logps/rejected": -584.58251953125, "loss": 1.0434, "margin_dpo/margin_mean": 230.70606994628906, "margin_dpo/margin_std": 237.67138671875, "step": 354 }, { "epoch": 0.5212922173274597, "fcm_dpo/beta": 0.0018797200173139572, "fcm_dpo/delta": -0.03509457781910896, "fcm_dpo/margin": 230.56893920898438, "fcm_dpo/q_t": 0.39808282256126404, "grad_norm": 47.345184326171875, "learning_rate": 2.7689777072570284e-07, "logits/chosen": -0.4880906343460083, "logits/rejected": -0.4761296510696411, "logps/chosen": -330.4738464355469, "logps/ref_chosen": -54.12849807739258, "logps/ref_rejected": -82.40606689453125, "logps/rejected": -589.3203125, "loss": 1.0473, "margin_dpo/margin_mean": 230.5689239501953, "margin_dpo/margin_std": 238.7706298828125, "step": 355 }, { "epoch": 0.5227606461086637, "fcm_dpo/beta": 0.001937782857567072, "fcm_dpo/delta": 0.16766567528247833, "fcm_dpo/margin": 121.26019287109375, "fcm_dpo/q_t": 0.4479098618030548, "grad_norm": 56.13861083984375, "learning_rate": 2.7562154104130176e-07, "logits/chosen": -0.47440847754478455, "logits/rejected": -0.4546169638633728, "logps/chosen": -418.028564453125, "logps/ref_chosen": -64.6738052368164, "logps/ref_rejected": -75.89926147460938, "logps/rejected": -550.5142211914062, "loss": 1.2588, "margin_dpo/margin_mean": 121.26020050048828, "margin_dpo/margin_std": 323.25531005859375, "step": 356 }, { "epoch": 0.5242290748898678, "fcm_dpo/beta": 0.001942659611813724, "fcm_dpo/delta": 0.021397359669208527, "fcm_dpo/margin": 195.1722412109375, "fcm_dpo/q_t": 0.4128842055797577, "grad_norm": 41.46245193481445, "learning_rate": 2.7434463620546594e-07, "logits/chosen": -0.44245290756225586, "logits/rejected": -0.42818719148635864, "logps/chosen": -383.28961181640625, "logps/ref_chosen": -52.725799560546875, "logps/ref_rejected": -86.84115600585938, "logps/rejected": -612.5772705078125, "loss": 1.1159, "margin_dpo/margin_mean": 195.1722412109375, "margin_dpo/margin_std": 283.9158935546875, "step": 357 }, { "epoch": 0.5256975036710719, "fcm_dpo/beta": 0.0019549184944480658, "fcm_dpo/delta": 0.008138120174407959, "fcm_dpo/margin": 200.61288452148438, "fcm_dpo/q_t": 0.4124792516231537, "grad_norm": 31.154674530029297, "learning_rate": 2.730670898658255e-07, "logits/chosen": -0.4751536548137665, "logits/rejected": -0.45662182569503784, "logps/chosen": -362.30657958984375, "logps/ref_chosen": -63.20543670654297, "logps/ref_rejected": -88.373291015625, "logps/rejected": -588.0873413085938, "loss": 1.1075, "margin_dpo/margin_mean": 200.61288452148438, "margin_dpo/margin_std": 293.744384765625, "step": 358 }, { "epoch": 0.527165932452276, "fcm_dpo/beta": 0.0019379984587430954, "fcm_dpo/delta": -0.057486288249492645, "fcm_dpo/margin": 234.68478393554688, "fcm_dpo/q_t": 0.3975210189819336, "grad_norm": 43.98432922363281, "learning_rate": 2.717889356869146e-07, "logits/chosen": -0.4332413673400879, "logits/rejected": -0.41946709156036377, "logps/chosen": -396.8498840332031, "logps/ref_chosen": -56.370216369628906, "logps/ref_rejected": -82.17375183105469, "logps/rejected": -657.3382568359375, "loss": 1.068, "margin_dpo/margin_mean": 234.68478393554688, "margin_dpo/margin_std": 311.1407775878906, "step": 359 }, { "epoch": 0.5286343612334802, "fcm_dpo/beta": 0.0019518618937581778, "fcm_dpo/delta": 0.05515030398964882, "fcm_dpo/margin": 177.61534118652344, "fcm_dpo/q_t": 0.41836071014404297, "grad_norm": 32.58218002319336, "learning_rate": 2.7051020734928443e-07, "logits/chosen": -0.4247097969055176, "logits/rejected": -0.4117840826511383, "logps/chosen": -380.07391357421875, "logps/ref_chosen": -51.460384368896484, "logps/ref_rejected": -69.83892059326172, "logps/rejected": -576.0677490234375, "loss": 1.109, "margin_dpo/margin_mean": 177.61534118652344, "margin_dpo/margin_std": 208.568359375, "step": 360 }, { "epoch": 0.5301027900146843, "fcm_dpo/beta": 0.001977581763640046, "fcm_dpo/delta": 0.06216863542795181, "fcm_dpo/margin": 171.79086303710938, "fcm_dpo/q_t": 0.42134982347488403, "grad_norm": 42.63149642944336, "learning_rate": 2.6923093854861593e-07, "logits/chosen": -0.43728476762771606, "logits/rejected": -0.4339728355407715, "logps/chosen": -395.3648681640625, "logps/ref_chosen": -53.86951446533203, "logps/ref_rejected": -90.7692642211914, "logps/rejected": -604.0554809570312, "loss": 1.1364, "margin_dpo/margin_mean": 171.79086303710938, "margin_dpo/margin_std": 256.6304931640625, "step": 361 }, { "epoch": 0.5315712187958884, "fcm_dpo/beta": 0.00193558179307729, "fcm_dpo/delta": -0.1272476613521576, "fcm_dpo/margin": 268.6226806640625, "fcm_dpo/q_t": 0.3800477683544159, "grad_norm": 41.641483306884766, "learning_rate": 2.679511629948319e-07, "logits/chosen": -0.44722434878349304, "logits/rejected": -0.46128737926483154, "logps/chosen": -343.3209228515625, "logps/ref_chosen": -58.639060974121094, "logps/ref_rejected": -105.58195495605469, "logps/rejected": -658.886474609375, "loss": 0.9893, "margin_dpo/margin_mean": 268.6226806640625, "margin_dpo/margin_std": 259.2852783203125, "step": 362 }, { "epoch": 0.5330396475770925, "fcm_dpo/beta": 0.0018970153760164976, "fcm_dpo/delta": -0.1342625916004181, "fcm_dpo/margin": 278.00701904296875, "fcm_dpo/q_t": 0.3781528174877167, "grad_norm": 37.65498733520508, "learning_rate": 2.6667091441120816e-07, "logits/chosen": -0.4264340400695801, "logits/rejected": -0.42666763067245483, "logps/chosen": -301.8473815917969, "logps/ref_chosen": -44.558380126953125, "logps/ref_rejected": -74.69496154785156, "logps/rejected": -609.990966796875, "loss": 0.9853, "margin_dpo/margin_mean": 278.00701904296875, "margin_dpo/margin_std": 266.7933349609375, "step": 363 }, { "epoch": 0.5345080763582967, "fcm_dpo/beta": 0.0018907999619841576, "fcm_dpo/delta": 0.014427829533815384, "fcm_dpo/margin": 203.99966430664062, "fcm_dpo/q_t": 0.4105517864227295, "grad_norm": 23.780954360961914, "learning_rate": 2.6539022653348575e-07, "logits/chosen": -0.4266408383846283, "logits/rejected": -0.4368516504764557, "logps/chosen": -334.2245788574219, "logps/ref_chosen": -48.894622802734375, "logps/ref_rejected": -91.395751953125, "logps/rejected": -580.725341796875, "loss": 1.101, "margin_dpo/margin_mean": 203.99966430664062, "margin_dpo/margin_std": 271.7967529296875, "step": 364 }, { "epoch": 0.5359765051395007, "fcm_dpo/beta": 0.0018834024667739868, "fcm_dpo/delta": -0.004186911974102259, "fcm_dpo/margin": 214.51487731933594, "fcm_dpo/q_t": 0.40672045946121216, "grad_norm": 26.113784790039062, "learning_rate": 2.641091331089811e-07, "logits/chosen": -0.4105398654937744, "logits/rejected": -0.42130357027053833, "logps/chosen": -327.7710266113281, "logps/ref_chosen": -51.49274444580078, "logps/ref_rejected": -92.70166778564453, "logps/rejected": -583.4948120117188, "loss": 1.0732, "margin_dpo/margin_mean": 214.514892578125, "margin_dpo/margin_std": 248.24508666992188, "step": 365 }, { "epoch": 0.5374449339207048, "fcm_dpo/beta": 0.001870601437985897, "fcm_dpo/delta": -0.01503688097000122, "fcm_dpo/margin": 221.3247833251953, "fcm_dpo/q_t": 0.40477490425109863, "grad_norm": 28.54776954650879, "learning_rate": 2.6282766789569736e-07, "logits/chosen": -0.3827515244483948, "logits/rejected": -0.3970991373062134, "logps/chosen": -305.78424072265625, "logps/ref_chosen": -44.7205696105957, "logps/ref_rejected": -83.31040954589844, "logps/rejected": -565.6988525390625, "loss": 1.079, "margin_dpo/margin_mean": 221.32476806640625, "margin_dpo/margin_std": 278.6839904785156, "step": 366 }, { "epoch": 0.5389133627019089, "fcm_dpo/beta": 0.001894644577987492, "fcm_dpo/delta": 0.050500668585300446, "fcm_dpo/margin": 185.32125854492188, "fcm_dpo/q_t": 0.41730421781539917, "grad_norm": 23.173542022705078, "learning_rate": 2.615458646614349e-07, "logits/chosen": -0.43230247497558594, "logits/rejected": -0.4155634045600891, "logps/chosen": -330.4307861328125, "logps/ref_chosen": -58.405418395996094, "logps/ref_rejected": -76.75132751464844, "logps/rejected": -534.0979614257812, "loss": 1.1118, "margin_dpo/margin_mean": 185.32125854492188, "margin_dpo/margin_std": 230.9281005859375, "step": 367 }, { "epoch": 0.540381791483113, "fcm_dpo/beta": 0.0018566998187452555, "fcm_dpo/delta": -0.14017510414123535, "fcm_dpo/margin": 286.89935302734375, "fcm_dpo/q_t": 0.37319713830947876, "grad_norm": 49.00677490234375, "learning_rate": 2.6026375718290083e-07, "logits/chosen": -0.42150646448135376, "logits/rejected": -0.4286186695098877, "logps/chosen": -302.36474609375, "logps/ref_chosen": -44.452518463134766, "logps/ref_rejected": -98.55526733398438, "logps/rejected": -643.3668212890625, "loss": 0.961, "margin_dpo/margin_mean": 286.89935302734375, "margin_dpo/margin_std": 217.0551300048828, "step": 368 }, { "epoch": 0.5418502202643172, "fcm_dpo/beta": 0.001882010605186224, "fcm_dpo/delta": 0.11852943897247314, "fcm_dpo/margin": 151.08335876464844, "fcm_dpo/q_t": 0.4338718354701996, "grad_norm": 29.52981948852539, "learning_rate": 2.589813792448196e-07, "logits/chosen": -0.4294429123401642, "logits/rejected": -0.4097931981086731, "logps/chosen": -410.28509521484375, "logps/ref_chosen": -71.38150024414062, "logps/ref_rejected": -91.29582214355469, "logps/rejected": -581.2828369140625, "loss": 1.1846, "margin_dpo/margin_mean": 151.08335876464844, "margin_dpo/margin_std": 272.0557861328125, "step": 369 }, { "epoch": 0.5433186490455213, "fcm_dpo/beta": 0.0019265762530267239, "fcm_dpo/delta": 0.12166447192430496, "fcm_dpo/margin": 146.07177734375, "fcm_dpo/q_t": 0.43590569496154785, "grad_norm": 39.32080841064453, "learning_rate": 2.5769876463904263e-07, "logits/chosen": -0.4616813063621521, "logits/rejected": -0.45466917753219604, "logps/chosen": -428.994384765625, "logps/ref_chosen": -71.60749816894531, "logps/ref_rejected": -97.25978088378906, "logps/rejected": -600.718505859375, "loss": 1.1947, "margin_dpo/margin_mean": 146.07177734375, "margin_dpo/margin_std": 281.0660095214844, "step": 370 }, { "epoch": 0.5447870778267254, "fcm_dpo/beta": 0.0019367990316823125, "fcm_dpo/delta": -0.011994550004601479, "fcm_dpo/margin": 212.38925170898438, "fcm_dpo/q_t": 0.4069690704345703, "grad_norm": 33.81187057495117, "learning_rate": 2.5641594716365744e-07, "logits/chosen": -0.5018552541732788, "logits/rejected": -0.4875721335411072, "logps/chosen": -412.5298767089844, "logps/ref_chosen": -69.41448974609375, "logps/ref_rejected": -99.17217254638672, "logps/rejected": -654.6768188476562, "loss": 1.0955, "margin_dpo/margin_mean": 212.3892364501953, "margin_dpo/margin_std": 298.9532775878906, "step": 371 }, { "epoch": 0.5462555066079295, "fcm_dpo/beta": 0.0019036408048123121, "fcm_dpo/delta": -0.08581465482711792, "fcm_dpo/margin": 252.9879608154297, "fcm_dpo/q_t": 0.39388322830200195, "grad_norm": 24.240703582763672, "learning_rate": 2.551329606220976e-07, "logits/chosen": -0.46786242723464966, "logits/rejected": -0.45145729184150696, "logps/chosen": -392.63507080078125, "logps/ref_chosen": -61.8179931640625, "logps/ref_rejected": -78.53948974609375, "logps/rejected": -662.344482421875, "loss": 1.0529, "margin_dpo/margin_mean": 252.9879608154297, "margin_dpo/margin_std": 342.00787353515625, "step": 372 }, { "epoch": 0.5477239353891337, "fcm_dpo/beta": 0.0018930002115666866, "fcm_dpo/delta": -0.05019930750131607, "fcm_dpo/margin": 236.47280883789062, "fcm_dpo/q_t": 0.3964656591415405, "grad_norm": 29.245372772216797, "learning_rate": 2.538498388222517e-07, "logits/chosen": -0.4352779984474182, "logits/rejected": -0.4134613871574402, "logps/chosen": -427.55157470703125, "logps/ref_chosen": -64.21713256835938, "logps/ref_rejected": -85.95960998535156, "logps/rejected": -685.766845703125, "loss": 1.0577, "margin_dpo/margin_mean": 236.47280883789062, "margin_dpo/margin_std": 278.249755859375, "step": 373 }, { "epoch": 0.5491923641703378, "fcm_dpo/beta": 0.0018695429898798466, "fcm_dpo/delta": 0.0013285353779792786, "fcm_dpo/margin": 213.04611206054688, "fcm_dpo/q_t": 0.412605345249176, "grad_norm": 28.108699798583984, "learning_rate": 2.525666155755725e-07, "logits/chosen": -0.5210952162742615, "logits/rejected": -0.5011500120162964, "logps/chosen": -390.86297607421875, "logps/ref_chosen": -70.65018463134766, "logps/ref_rejected": -93.64016723632812, "logps/rejected": -626.8990478515625, "loss": 1.1194, "margin_dpo/margin_mean": 213.04612731933594, "margin_dpo/margin_std": 341.2312316894531, "step": 374 }, { "epoch": 0.5506607929515418, "fcm_dpo/beta": 0.0018667408730834723, "fcm_dpo/delta": -0.00040426477789878845, "fcm_dpo/margin": 214.07928466796875, "fcm_dpo/q_t": 0.4087774455547333, "grad_norm": 31.289676666259766, "learning_rate": 2.512833246961859e-07, "logits/chosen": -0.426000714302063, "logits/rejected": -0.42188286781311035, "logps/chosen": -394.99072265625, "logps/ref_chosen": -60.080223083496094, "logps/ref_rejected": -88.93830871582031, "logps/rejected": -637.9281005859375, "loss": 1.1052, "margin_dpo/margin_mean": 214.07928466796875, "margin_dpo/margin_std": 306.2797546386719, "step": 375 }, { "epoch": 0.5521292217327459, "fcm_dpo/beta": 0.0018509968649595976, "fcm_dpo/delta": -0.08695752173662186, "fcm_dpo/margin": 260.70465087890625, "fcm_dpo/q_t": 0.38965705037117004, "grad_norm": 25.07152557373047, "learning_rate": 2.5e-07, "logits/chosen": -0.4629325866699219, "logits/rejected": -0.4503718614578247, "logps/chosen": -398.86688232421875, "logps/ref_chosen": -62.660308837890625, "logps/ref_rejected": -105.52660369873047, "logps/rejected": -702.4378051757812, "loss": 1.0414, "margin_dpo/margin_mean": 260.70465087890625, "margin_dpo/margin_std": 318.3728942871094, "step": 376 }, { "epoch": 0.55359765051395, "fcm_dpo/beta": 0.0018346365541219711, "fcm_dpo/delta": -0.055273640900850296, "fcm_dpo/margin": 246.80406188964844, "fcm_dpo/q_t": 0.3960615396499634, "grad_norm": 27.58287811279297, "learning_rate": 2.487166753038141e-07, "logits/chosen": -0.4195740818977356, "logits/rejected": -0.4171292185783386, "logps/chosen": -392.3238830566406, "logps/ref_chosen": -54.478736877441406, "logps/ref_rejected": -98.70335388183594, "logps/rejected": -683.3525390625, "loss": 1.0526, "margin_dpo/margin_mean": 246.80404663085938, "margin_dpo/margin_std": 296.20745849609375, "step": 377 }, { "epoch": 0.5550660792951542, "fcm_dpo/beta": 0.0018070295918732882, "fcm_dpo/delta": -0.0666477382183075, "fcm_dpo/margin": 256.5183410644531, "fcm_dpo/q_t": 0.3916090726852417, "grad_norm": 28.932802200317383, "learning_rate": 2.4743338442442754e-07, "logits/chosen": -0.4206734001636505, "logits/rejected": -0.4349641501903534, "logps/chosen": -375.62255859375, "logps/ref_chosen": -45.02053451538086, "logps/ref_rejected": -88.0469741821289, "logps/rejected": -675.1673583984375, "loss": 1.035, "margin_dpo/margin_mean": 256.5183410644531, "margin_dpo/margin_std": 279.6002197265625, "step": 378 }, { "epoch": 0.5565345080763583, "fcm_dpo/beta": 0.0017812212463468313, "fcm_dpo/delta": -0.045191098004579544, "fcm_dpo/margin": 248.47398376464844, "fcm_dpo/q_t": 0.39899513125419617, "grad_norm": 36.7637939453125, "learning_rate": 2.461501611777483e-07, "logits/chosen": -0.37811413407325745, "logits/rejected": -0.39795851707458496, "logps/chosen": -419.84063720703125, "logps/ref_chosen": -53.182098388671875, "logps/ref_rejected": -114.3001708984375, "logps/rejected": -729.4326782226562, "loss": 1.0653, "margin_dpo/margin_mean": 248.47398376464844, "margin_dpo/margin_std": 314.6936340332031, "step": 379 }, { "epoch": 0.5580029368575624, "fcm_dpo/beta": 0.0017588778864592314, "fcm_dpo/delta": -0.10711812973022461, "fcm_dpo/margin": 285.31103515625, "fcm_dpo/q_t": 0.3839528560638428, "grad_norm": 23.18588638305664, "learning_rate": 2.4486703937790243e-07, "logits/chosen": -0.4267119765281677, "logits/rejected": -0.45323461294174194, "logps/chosen": -399.3060607910156, "logps/ref_chosen": -51.3530387878418, "logps/ref_rejected": -104.19169616699219, "logps/rejected": -737.4556884765625, "loss": 1.0213, "margin_dpo/margin_mean": 285.31103515625, "margin_dpo/margin_std": 325.0993347167969, "step": 380 }, { "epoch": 0.5594713656387665, "fcm_dpo/beta": 0.0017572679789736867, "fcm_dpo/delta": 0.04623348265886307, "fcm_dpo/margin": 202.26556396484375, "fcm_dpo/q_t": 0.41859930753707886, "grad_norm": 25.287288665771484, "learning_rate": 2.435840528363426e-07, "logits/chosen": -0.4158741235733032, "logits/rejected": -0.39695611596107483, "logps/chosen": -411.60211181640625, "logps/ref_chosen": -57.80306625366211, "logps/ref_rejected": -79.21940612792969, "logps/rejected": -635.2840576171875, "loss": 1.1416, "margin_dpo/margin_mean": 202.26556396484375, "margin_dpo/margin_std": 330.1795654296875, "step": 381 }, { "epoch": 0.5609397944199707, "fcm_dpo/beta": 0.0017583861481398344, "fcm_dpo/delta": -0.020793016999959946, "fcm_dpo/margin": 238.81103515625, "fcm_dpo/q_t": 0.40172097086906433, "grad_norm": 27.541227340698242, "learning_rate": 2.4230123536095745e-07, "logits/chosen": -0.45488008856773376, "logits/rejected": -0.46144935488700867, "logps/chosen": -410.63665771484375, "logps/ref_chosen": -66.02030181884766, "logps/ref_rejected": -110.71016693115234, "logps/rejected": -694.1375732421875, "loss": 1.0582, "margin_dpo/margin_mean": 238.81101989746094, "margin_dpo/margin_std": 257.6006774902344, "step": 382 }, { "epoch": 0.5624082232011748, "fcm_dpo/beta": 0.0017506459262222052, "fcm_dpo/delta": -0.016186170279979706, "fcm_dpo/margin": 237.33407592773438, "fcm_dpo/q_t": 0.40410494804382324, "grad_norm": 35.84563446044922, "learning_rate": 2.4101862075518037e-07, "logits/chosen": -0.4268774092197418, "logits/rejected": -0.43681007623672485, "logps/chosen": -397.21624755859375, "logps/ref_chosen": -50.39148712158203, "logps/ref_rejected": -93.71589660644531, "logps/rejected": -677.874755859375, "loss": 1.0905, "margin_dpo/margin_mean": 237.33407592773438, "margin_dpo/margin_std": 328.8858947753906, "step": 383 }, { "epoch": 0.5638766519823789, "fcm_dpo/beta": 0.0017733362037688494, "fcm_dpo/delta": 0.06153050810098648, "fcm_dpo/margin": 191.76878356933594, "fcm_dpo/q_t": 0.4192041754722595, "grad_norm": 23.139604568481445, "learning_rate": 2.397362428170992e-07, "logits/chosen": -0.45157086849212646, "logits/rejected": -0.4445483684539795, "logps/chosen": -395.2562255859375, "logps/ref_chosen": -52.046104431152344, "logps/ref_rejected": -85.76089477539062, "logps/rejected": -620.7398681640625, "loss": 1.1117, "margin_dpo/margin_mean": 191.76878356933594, "margin_dpo/margin_std": 217.23977661132812, "step": 384 }, { "epoch": 0.5653450807635829, "fcm_dpo/beta": 0.0017724630888551474, "fcm_dpo/delta": -0.0006713038310408592, "fcm_dpo/margin": 226.03790283203125, "fcm_dpo/q_t": 0.4047047793865204, "grad_norm": 36.076045989990234, "learning_rate": 2.3845413533856514e-07, "logits/chosen": -0.4669855237007141, "logits/rejected": -0.4436225891113281, "logps/chosen": -375.51416015625, "logps/ref_chosen": -65.55215454101562, "logps/ref_rejected": -77.82792663574219, "logps/rejected": -613.8278198242188, "loss": 1.0627, "margin_dpo/margin_mean": 226.03790283203125, "margin_dpo/margin_std": 223.3173065185547, "step": 385 }, { "epoch": 0.566813509544787, "fcm_dpo/beta": 0.0017592008225619793, "fcm_dpo/delta": -0.05203462019562721, "fcm_dpo/margin": 255.63485717773438, "fcm_dpo/q_t": 0.39665845036506653, "grad_norm": 25.189849853515625, "learning_rate": 2.3717233210430254e-07, "logits/chosen": -0.4352476894855499, "logits/rejected": -0.4300195872783661, "logps/chosen": -392.69586181640625, "logps/ref_chosen": -58.22185516357422, "logps/ref_rejected": -92.32742309570312, "logps/rejected": -682.436279296875, "loss": 1.0532, "margin_dpo/margin_mean": 255.63485717773438, "margin_dpo/margin_std": 307.45904541015625, "step": 386 }, { "epoch": 0.5682819383259912, "fcm_dpo/beta": 0.0017553928773850203, "fcm_dpo/delta": 0.03558676689863205, "fcm_dpo/margin": 208.20899963378906, "fcm_dpo/q_t": 0.4137038588523865, "grad_norm": 32.04467010498047, "learning_rate": 2.3589086689101889e-07, "logits/chosen": -0.5002990961074829, "logits/rejected": -0.48599332571029663, "logps/chosen": -418.4845275878906, "logps/ref_chosen": -66.41944885253906, "logps/ref_rejected": -92.16915893554688, "logps/rejected": -652.4432373046875, "loss": 1.096, "margin_dpo/margin_mean": 208.208984375, "margin_dpo/margin_std": 233.26101684570312, "step": 387 }, { "epoch": 0.5697503671071953, "fcm_dpo/beta": 0.001736361999064684, "fcm_dpo/delta": -0.09935353696346283, "fcm_dpo/margin": 284.57513427734375, "fcm_dpo/q_t": 0.38717541098594666, "grad_norm": 25.433643341064453, "learning_rate": 2.3460977346651428e-07, "logits/chosen": -0.43736058473587036, "logits/rejected": -0.44726645946502686, "logps/chosen": -382.73590087890625, "logps/ref_chosen": -50.129459381103516, "logps/ref_rejected": -104.43305969238281, "logps/rejected": -721.6146240234375, "loss": 1.0199, "margin_dpo/margin_mean": 284.57513427734375, "margin_dpo/margin_std": 316.06414794921875, "step": 388 }, { "epoch": 0.5712187958883994, "fcm_dpo/beta": 0.0017261260654777288, "fcm_dpo/delta": -0.010240463539958, "fcm_dpo/margin": 237.38491821289062, "fcm_dpo/q_t": 0.4057249426841736, "grad_norm": 29.678983688354492, "learning_rate": 2.3332908558879177e-07, "logits/chosen": -0.4718496799468994, "logits/rejected": -0.46279120445251465, "logps/chosen": -426.9161071777344, "logps/ref_chosen": -57.906593322753906, "logps/ref_rejected": -77.91454315185547, "logps/rejected": -684.3089599609375, "loss": 1.0856, "margin_dpo/margin_mean": 237.3849334716797, "margin_dpo/margin_std": 311.5970458984375, "step": 389 }, { "epoch": 0.5726872246696035, "fcm_dpo/beta": 0.0017196969129145145, "fcm_dpo/delta": -0.005931627005338669, "fcm_dpo/margin": 235.7211151123047, "fcm_dpo/q_t": 0.4099145531654358, "grad_norm": 25.528303146362305, "learning_rate": 2.320488370051681e-07, "logits/chosen": -0.4342801570892334, "logits/rejected": -0.4257649779319763, "logps/chosen": -403.05682373046875, "logps/ref_chosen": -49.22591781616211, "logps/ref_rejected": -85.5281982421875, "logps/rejected": -675.0802001953125, "loss": 1.1052, "margin_dpo/margin_mean": 235.72113037109375, "margin_dpo/margin_std": 351.66534423828125, "step": 390 }, { "epoch": 0.5741556534508077, "fcm_dpo/beta": 0.0017695992719382048, "fcm_dpo/delta": 0.1603117436170578, "fcm_dpo/margin": 137.55014038085938, "fcm_dpo/q_t": 0.44382068514823914, "grad_norm": 41.52486801147461, "learning_rate": 2.3076906145138405e-07, "logits/chosen": -0.46436670422554016, "logits/rejected": -0.4556189179420471, "logps/chosen": -420.87359619140625, "logps/ref_chosen": -64.32965087890625, "logps/ref_rejected": -86.73820495605469, "logps/rejected": -580.832275390625, "loss": 1.2149, "margin_dpo/margin_mean": 137.55014038085938, "margin_dpo/margin_std": 271.1540222167969, "step": 391 }, { "epoch": 0.5756240822320118, "fcm_dpo/beta": 0.0017650609370321035, "fcm_dpo/delta": -0.08989348262548447, "fcm_dpo/margin": 275.10516357421875, "fcm_dpo/q_t": 0.3868984878063202, "grad_norm": 31.219282150268555, "learning_rate": 2.294897926507156e-07, "logits/chosen": -0.42887139320373535, "logits/rejected": -0.4229578375816345, "logps/chosen": -352.71368408203125, "logps/ref_chosen": -53.50397872924805, "logps/ref_rejected": -102.34584045410156, "logps/rejected": -676.6607055664062, "loss": 1.0083, "margin_dpo/margin_mean": 275.10516357421875, "margin_dpo/margin_std": 260.21282958984375, "step": 392 }, { "epoch": 0.5770925110132159, "fcm_dpo/beta": 0.0017527798190712929, "fcm_dpo/delta": 0.019200202077627182, "fcm_dpo/margin": 217.66339111328125, "fcm_dpo/q_t": 0.41545170545578003, "grad_norm": 25.45195198059082, "learning_rate": 2.2821106431308543e-07, "logits/chosen": -0.4183732867240906, "logits/rejected": -0.41586729884147644, "logps/chosen": -345.04229736328125, "logps/ref_chosen": -46.473915100097656, "logps/ref_rejected": -71.96885681152344, "logps/rejected": -588.20068359375, "loss": 1.1209, "margin_dpo/margin_mean": 217.66339111328125, "margin_dpo/margin_std": 343.6617431640625, "step": 393 }, { "epoch": 0.57856093979442, "fcm_dpo/beta": 0.0017561479471623898, "fcm_dpo/delta": 0.005379532463848591, "fcm_dpo/margin": 224.81240844726562, "fcm_dpo/q_t": 0.40968430042266846, "grad_norm": 26.87594223022461, "learning_rate": 2.2693291013417452e-07, "logits/chosen": -0.416803240776062, "logits/rejected": -0.41659796237945557, "logps/chosen": -388.80706787109375, "logps/ref_chosen": -52.91154861450195, "logps/ref_rejected": -90.8226318359375, "logps/rejected": -651.530517578125, "loss": 1.0913, "margin_dpo/margin_mean": 224.81239318847656, "margin_dpo/margin_std": 293.50146484375, "step": 394 }, { "epoch": 0.580029368575624, "fcm_dpo/beta": 0.0017417933559045196, "fcm_dpo/delta": -0.041999928653240204, "fcm_dpo/margin": 252.47410583496094, "fcm_dpo/q_t": 0.3995997905731201, "grad_norm": 25.122419357299805, "learning_rate": 2.2565536379453404e-07, "logits/chosen": -0.47765952348709106, "logits/rejected": -0.473066508769989, "logps/chosen": -398.39801025390625, "logps/ref_chosen": -62.546112060546875, "logps/ref_rejected": -83.78262329101562, "logps/rejected": -672.108642578125, "loss": 1.0658, "margin_dpo/margin_mean": 252.47409057617188, "margin_dpo/margin_std": 318.75537109375, "step": 395 }, { "epoch": 0.5814977973568282, "fcm_dpo/beta": 0.001741830026730895, "fcm_dpo/delta": 0.00434575229883194, "fcm_dpo/margin": 227.20425415039062, "fcm_dpo/q_t": 0.40757039189338684, "grad_norm": 29.88756561279297, "learning_rate": 2.2437845895869825e-07, "logits/chosen": -0.4663088619709015, "logits/rejected": -0.4470548629760742, "logps/chosen": -417.51031494140625, "logps/ref_chosen": -68.99594116210938, "logps/ref_rejected": -88.64665985107422, "logps/rejected": -664.365234375, "loss": 1.0774, "margin_dpo/margin_mean": 227.20425415039062, "margin_dpo/margin_std": 261.07513427734375, "step": 396 }, { "epoch": 0.5829662261380323, "fcm_dpo/beta": 0.001714893733151257, "fcm_dpo/delta": -0.09588249027729034, "fcm_dpo/margin": 286.07928466796875, "fcm_dpo/q_t": 0.386309951543808, "grad_norm": 33.899620056152344, "learning_rate": 2.2310222927429716e-07, "logits/chosen": -0.412584125995636, "logits/rejected": -0.41607069969177246, "logps/chosen": -393.6998291015625, "logps/ref_chosen": -61.27716827392578, "logps/ref_rejected": -103.11612701416016, "logps/rejected": -721.6180419921875, "loss": 1.0151, "margin_dpo/margin_mean": 286.07928466796875, "margin_dpo/margin_std": 297.44598388671875, "step": 397 }, { "epoch": 0.5844346549192364, "fcm_dpo/beta": 0.0017011422896757722, "fcm_dpo/delta": -0.04757946729660034, "fcm_dpo/margin": 261.866943359375, "fcm_dpo/q_t": 0.3992553949356079, "grad_norm": 21.9180965423584, "learning_rate": 2.2182670837116972e-07, "logits/chosen": -0.49817049503326416, "logits/rejected": -0.49308010935783386, "logps/chosen": -433.2986145019531, "logps/ref_chosen": -68.15155029296875, "logps/ref_rejected": -108.52360534667969, "logps/rejected": -735.53759765625, "loss": 1.0678, "margin_dpo/margin_mean": 261.866943359375, "margin_dpo/margin_std": 345.6563720703125, "step": 398 }, { "epoch": 0.5859030837004405, "fcm_dpo/beta": 0.0016931265126913786, "fcm_dpo/delta": 0.007417585700750351, "fcm_dpo/margin": 231.94464111328125, "fcm_dpo/q_t": 0.4104883074760437, "grad_norm": 35.04402160644531, "learning_rate": 2.2055192986047804e-07, "logits/chosen": -0.44559311866760254, "logits/rejected": -0.4083556532859802, "logps/chosen": -388.63470458984375, "logps/ref_chosen": -60.889801025390625, "logps/ref_rejected": -77.965576171875, "logps/rejected": -637.6551513671875, "loss": 1.1115, "margin_dpo/margin_mean": 231.9446563720703, "margin_dpo/margin_std": 344.48883056640625, "step": 399 }, { "epoch": 0.5873715124816447, "fcm_dpo/beta": 0.0016531790606677532, "fcm_dpo/delta": -0.16823890805244446, "fcm_dpo/margin": 337.75396728515625, "fcm_dpo/q_t": 0.36917316913604736, "grad_norm": 28.63687515258789, "learning_rate": 2.192779273338215e-07, "logits/chosen": -0.4327552914619446, "logits/rejected": -0.4296589195728302, "logps/chosen": -371.40936279296875, "logps/ref_chosen": -63.64359664916992, "logps/ref_rejected": -105.252685546875, "logps/rejected": -750.7724609375, "loss": 0.9641, "margin_dpo/margin_mean": 337.75396728515625, "margin_dpo/margin_std": 308.93011474609375, "step": 400 }, { "epoch": 0.5873715124816447, "eval_fcm_dpo/beta": 0.0016427375376224518, "eval_logits/chosen": -0.5096563100814819, "eval_logits/rejected": -0.49900755286216736, "eval_logps/chosen": -501.3864440917969, "eval_logps/ref_chosen": -79.05104064941406, "eval_logps/ref_rejected": -86.79793548583984, "eval_logps/rejected": -661.2091064453125, "eval_loss": 0.6132233738899231, "eval_margin_dpo/margin_mean": 152.07579040527344, "eval_margin_dpo/margin_std": 334.0150146484375, "eval_runtime": 39.2529, "eval_samples_per_second": 59.588, "eval_steps_per_second": 1.885, "step": 400 }, { "epoch": 0.5888399412628488, "fcm_dpo/beta": 0.001660442678257823, "fcm_dpo/delta": 0.08897262066602707, "fcm_dpo/margin": 189.05294799804688, "fcm_dpo/q_t": 0.43077051639556885, "grad_norm": 31.62966537475586, "learning_rate": 2.1800473436235136e-07, "logits/chosen": -0.46482232213020325, "logits/rejected": -0.4563053250312805, "logps/chosen": -419.8261413574219, "logps/ref_chosen": -57.16303253173828, "logps/ref_rejected": -83.79249572753906, "logps/rejected": -635.508544921875, "loss": 1.1971, "margin_dpo/margin_mean": 189.05294799804688, "margin_dpo/margin_std": 394.0697937011719, "step": 401 }, { "epoch": 0.5903083700440529, "fcm_dpo/beta": 0.0016284910961985588, "fcm_dpo/delta": -0.17874157428741455, "fcm_dpo/margin": 349.1449279785156, "fcm_dpo/q_t": 0.36785176396369934, "grad_norm": 21.58502197265625, "learning_rate": 2.1673238449588665e-07, "logits/chosen": -0.4704708456993103, "logits/rejected": -0.4619377553462982, "logps/chosen": -316.80438232421875, "logps/ref_chosen": -50.74037170410156, "logps/ref_rejected": -81.0460433959961, "logps/rejected": -696.2550048828125, "loss": 0.9548, "margin_dpo/margin_mean": 349.1449279785156, "margin_dpo/margin_std": 313.5357971191406, "step": 402 }, { "epoch": 0.591776798825257, "fcm_dpo/beta": 0.0016079884953796864, "fcm_dpo/delta": -0.022973710671067238, "fcm_dpo/margin": 262.4422607421875, "fcm_dpo/q_t": 0.40218695998191833, "grad_norm": 27.24091339111328, "learning_rate": 2.154609112620295e-07, "logits/chosen": -0.4586551785469055, "logits/rejected": -0.4596661627292633, "logps/chosen": -333.87322998046875, "logps/ref_chosen": -47.14731216430664, "logps/ref_rejected": -77.2666015625, "logps/rejected": -626.4347534179688, "loss": 1.0646, "margin_dpo/margin_mean": 262.4422302246094, "margin_dpo/margin_std": 302.470947265625, "step": 403 }, { "epoch": 0.593245227606461, "fcm_dpo/beta": 0.0016004211502149701, "fcm_dpo/delta": -0.010493889451026917, "fcm_dpo/margin": 256.16156005859375, "fcm_dpo/q_t": 0.40553057193756104, "grad_norm": 30.971912384033203, "learning_rate": 2.1419034816528218e-07, "logits/chosen": -0.44930434226989746, "logits/rejected": -0.44178757071495056, "logps/chosen": -366.42584228515625, "logps/ref_chosen": -47.875274658203125, "logps/ref_rejected": -77.15499877929688, "logps/rejected": -651.8671264648438, "loss": 1.0906, "margin_dpo/margin_mean": 256.16156005859375, "margin_dpo/margin_std": 345.9460144042969, "step": 404 }, { "epoch": 0.5947136563876652, "fcm_dpo/beta": 0.0016291348729282618, "fcm_dpo/delta": 0.05789618194103241, "fcm_dpo/margin": 210.23138427734375, "fcm_dpo/q_t": 0.42367473244667053, "grad_norm": 32.061859130859375, "learning_rate": 2.129207286861638e-07, "logits/chosen": -0.4601576626300812, "logits/rejected": -0.4487999379634857, "logps/chosen": -422.7500305175781, "logps/ref_chosen": -65.16290283203125, "logps/ref_rejected": -87.18678283691406, "logps/rejected": -655.0052490234375, "loss": 1.1669, "margin_dpo/margin_mean": 210.23141479492188, "margin_dpo/margin_std": 378.6610107421875, "step": 405 }, { "epoch": 0.5961820851688693, "fcm_dpo/beta": 0.0016158397775143385, "fcm_dpo/delta": -0.05560196936130524, "fcm_dpo/margin": 280.2799987792969, "fcm_dpo/q_t": 0.39691057801246643, "grad_norm": 28.331180572509766, "learning_rate": 2.1165208628032861e-07, "logits/chosen": -0.4773136377334595, "logits/rejected": -0.4842330813407898, "logps/chosen": -360.078857421875, "logps/ref_chosen": -49.740814208984375, "logps/ref_rejected": -92.07862854003906, "logps/rejected": -682.6966552734375, "loss": 1.0544, "margin_dpo/margin_mean": 280.2799987792969, "margin_dpo/margin_std": 338.8279113769531, "step": 406 }, { "epoch": 0.5976505139500734, "fcm_dpo/beta": 0.001641381997615099, "fcm_dpo/delta": 0.11392003297805786, "fcm_dpo/margin": 175.37591552734375, "fcm_dpo/q_t": 0.43280327320098877, "grad_norm": 32.0561408996582, "learning_rate": 2.1038445437768375e-07, "logits/chosen": -0.5068432092666626, "logits/rejected": -0.4771941900253296, "logps/chosen": -397.24237060546875, "logps/ref_chosen": -56.33069610595703, "logps/ref_rejected": -77.51209259033203, "logps/rejected": -593.7996826171875, "loss": 1.1901, "margin_dpo/margin_mean": 175.37591552734375, "margin_dpo/margin_std": 323.81439208984375, "step": 407 }, { "epoch": 0.5991189427312775, "fcm_dpo/beta": 0.001660007401369512, "fcm_dpo/delta": 0.07628411054611206, "fcm_dpo/margin": 196.483642578125, "fcm_dpo/q_t": 0.42345568537712097, "grad_norm": 31.536413192749023, "learning_rate": 2.0911786638150872e-07, "logits/chosen": -0.48006772994995117, "logits/rejected": -0.4519917368888855, "logps/chosen": -402.90374755859375, "logps/ref_chosen": -69.789306640625, "logps/ref_rejected": -90.09693908691406, "logps/rejected": -619.6949462890625, "loss": 1.1308, "margin_dpo/margin_mean": 196.48365783691406, "margin_dpo/margin_std": 258.8351745605469, "step": 408 }, { "epoch": 0.6005873715124816, "fcm_dpo/beta": 0.001686369301751256, "fcm_dpo/delta": 0.06914930045604706, "fcm_dpo/margin": 197.4263458251953, "fcm_dpo/q_t": 0.4226710796356201, "grad_norm": 30.05813980102539, "learning_rate": 2.0785235566757517e-07, "logits/chosen": -0.5069749355316162, "logits/rejected": -0.48954230546951294, "logps/chosen": -396.84429931640625, "logps/ref_chosen": -67.31744384765625, "logps/ref_rejected": -84.904296875, "logps/rejected": -611.8575439453125, "loss": 1.1345, "margin_dpo/margin_mean": 197.42636108398438, "margin_dpo/margin_std": 280.6907653808594, "step": 409 }, { "epoch": 0.6020558002936858, "fcm_dpo/beta": 0.001684611663222313, "fcm_dpo/delta": -0.007662855088710785, "fcm_dpo/margin": 241.77908325195312, "fcm_dpo/q_t": 0.4040978252887726, "grad_norm": 28.958459854125977, "learning_rate": 2.065879555832674e-07, "logits/chosen": -0.5035703778266907, "logits/rejected": -0.50343918800354, "logps/chosen": -351.52081298828125, "logps/ref_chosen": -51.465354919433594, "logps/ref_rejected": -83.198974609375, "logps/rejected": -625.0335693359375, "loss": 1.0731, "margin_dpo/margin_mean": 241.77908325195312, "margin_dpo/margin_std": 281.2712707519531, "step": 410 }, { "epoch": 0.6035242290748899, "fcm_dpo/beta": 0.001677290303632617, "fcm_dpo/delta": 0.01198473572731018, "fcm_dpo/margin": 231.06283569335938, "fcm_dpo/q_t": 0.41211268305778503, "grad_norm": 26.263025283813477, "learning_rate": 2.0532469944670343e-07, "logits/chosen": -0.5023758411407471, "logits/rejected": -0.5086607933044434, "logps/chosen": -383.20245361328125, "logps/ref_chosen": -52.30727005004883, "logps/ref_rejected": -80.69495391845703, "logps/rejected": -642.6529541015625, "loss": 1.1057, "margin_dpo/margin_mean": 231.06283569335938, "margin_dpo/margin_std": 318.57080078125, "step": 411 }, { "epoch": 0.604992657856094, "fcm_dpo/beta": 0.0016901884227991104, "fcm_dpo/delta": -0.0035414875019341707, "fcm_dpo/margin": 238.66928100585938, "fcm_dpo/q_t": 0.40699994564056396, "grad_norm": 39.12421798706055, "learning_rate": 2.0406262054585738e-07, "logits/chosen": -0.5658432245254517, "logits/rejected": -0.5902704000473022, "logps/chosen": -392.9666748046875, "logps/ref_chosen": -53.144126892089844, "logps/ref_rejected": -100.0608139038086, "logps/rejected": -678.5526123046875, "loss": 1.0903, "margin_dpo/margin_mean": 238.66928100585938, "margin_dpo/margin_std": 317.6526794433594, "step": 412 }, { "epoch": 0.6064610866372981, "fcm_dpo/beta": 0.0016896736342459917, "fcm_dpo/delta": -0.01300879381597042, "fcm_dpo/margin": 244.07077026367188, "fcm_dpo/q_t": 0.40322571992874146, "grad_norm": 35.633907318115234, "learning_rate": 2.0280175213768205e-07, "logits/chosen": -0.5198254585266113, "logits/rejected": -0.5212410688400269, "logps/chosen": -435.4099426269531, "logps/ref_chosen": -61.58196258544922, "logps/ref_rejected": -99.47340393066406, "logps/rejected": -717.3721923828125, "loss": 1.0867, "margin_dpo/margin_mean": 244.07077026367188, "margin_dpo/margin_std": 323.739990234375, "step": 413 }, { "epoch": 0.6079295154185022, "fcm_dpo/beta": 0.0016835236456245184, "fcm_dpo/delta": -0.05097716301679611, "fcm_dpo/margin": 266.1756591796875, "fcm_dpo/q_t": 0.3957219123840332, "grad_norm": 31.402956008911133, "learning_rate": 2.0154212744723247e-07, "logits/chosen": -0.4976515769958496, "logits/rejected": -0.4892701506614685, "logps/chosen": -396.79071044921875, "logps/ref_chosen": -46.63148498535156, "logps/ref_rejected": -87.64653015136719, "logps/rejected": -703.9813842773438, "loss": 1.0572, "margin_dpo/margin_mean": 266.1756286621094, "margin_dpo/margin_std": 315.53411865234375, "step": 414 }, { "epoch": 0.6093979441997063, "fcm_dpo/beta": 0.0016782158054411411, "fcm_dpo/delta": 0.06835382431745529, "fcm_dpo/margin": 198.95372009277344, "fcm_dpo/q_t": 0.42239439487457275, "grad_norm": 32.070098876953125, "learning_rate": 2.002837796667909e-07, "logits/chosen": -0.5548099279403687, "logits/rejected": -0.5496998429298401, "logps/chosen": -458.45635986328125, "logps/ref_chosen": -78.6182861328125, "logps/ref_rejected": -100.47752380371094, "logps/rejected": -679.269287109375, "loss": 1.1424, "margin_dpo/margin_mean": 198.95372009277344, "margin_dpo/margin_std": 302.24237060546875, "step": 415 }, { "epoch": 0.6108663729809104, "fcm_dpo/beta": 0.001665110932663083, "fcm_dpo/delta": -0.119843028485775, "fcm_dpo/margin": 308.59735107421875, "fcm_dpo/q_t": 0.3798748552799225, "grad_norm": 42.64773941040039, "learning_rate": 1.990267419549914e-07, "logits/chosen": -0.5385543704032898, "logits/rejected": -0.5424953699111938, "logps/chosen": -396.99420166015625, "logps/ref_chosen": -58.27912521362305, "logps/ref_rejected": -90.56871795654297, "logps/rejected": -737.881103515625, "loss": 0.9898, "margin_dpo/margin_mean": 308.59735107421875, "margin_dpo/margin_std": 283.72137451171875, "step": 416 }, { "epoch": 0.6123348017621145, "fcm_dpo/beta": 0.0016435376601293683, "fcm_dpo/delta": -0.04060738533735275, "fcm_dpo/margin": 267.00482177734375, "fcm_dpo/q_t": 0.3967912495136261, "grad_norm": 30.187908172607422, "learning_rate": 1.9777104743594686e-07, "logits/chosen": -0.5202246308326721, "logits/rejected": -0.4980872869491577, "logps/chosen": -385.2103271484375, "logps/ref_chosen": -50.1987190246582, "logps/ref_rejected": -68.15184020996094, "logps/rejected": -670.1682739257812, "loss": 1.0417, "margin_dpo/margin_mean": 267.00482177734375, "margin_dpo/margin_std": 270.859375, "step": 417 }, { "epoch": 0.6138032305433186, "fcm_dpo/beta": 0.0016406815266236663, "fcm_dpo/delta": -0.04235214740037918, "fcm_dpo/margin": 267.85089111328125, "fcm_dpo/q_t": 0.40181127190589905, "grad_norm": 28.84935188293457, "learning_rate": 1.965167291983757e-07, "logits/chosen": -0.6032763719558716, "logits/rejected": -0.5830473899841309, "logps/chosen": -446.807373046875, "logps/ref_chosen": -81.97846984863281, "logps/ref_rejected": -104.69148254394531, "logps/rejected": -737.3712768554688, "loss": 1.0814, "margin_dpo/margin_mean": 267.85089111328125, "margin_dpo/margin_std": 362.703125, "step": 418 }, { "epoch": 0.6152716593245228, "fcm_dpo/beta": 0.0016053111758083105, "fcm_dpo/delta": -0.07667610794305801, "fcm_dpo/margin": 294.6860656738281, "fcm_dpo/q_t": 0.3896028399467468, "grad_norm": 32.947139739990234, "learning_rate": 1.9526382029472988e-07, "logits/chosen": -0.5336101055145264, "logits/rejected": -0.5327674150466919, "logps/chosen": -387.23291015625, "logps/ref_chosen": -52.948646545410156, "logps/ref_rejected": -91.58309936523438, "logps/rejected": -720.553466796875, "loss": 1.0288, "margin_dpo/margin_mean": 294.6860656738281, "margin_dpo/margin_std": 316.33544921875, "step": 419 }, { "epoch": 0.6167400881057269, "fcm_dpo/beta": 0.0016269120387732983, "fcm_dpo/delta": 0.11196567863225937, "fcm_dpo/margin": 178.9881591796875, "fcm_dpo/q_t": 0.43498021364212036, "grad_norm": 68.43128967285156, "learning_rate": 1.9401235374032425e-07, "logits/chosen": -0.544563353061676, "logits/rejected": -0.5079815983772278, "logps/chosen": -504.28094482421875, "logps/ref_chosen": -77.7699203491211, "logps/ref_rejected": -69.31985473632812, "logps/rejected": -674.819091796875, "loss": 1.2042, "margin_dpo/margin_mean": 178.9881591796875, "margin_dpo/margin_std": 377.63885498046875, "step": 420 }, { "epoch": 0.618208516886931, "fcm_dpo/beta": 0.0016551846638321877, "fcm_dpo/delta": 0.057633526623249054, "fcm_dpo/margin": 207.74978637695312, "fcm_dpo/q_t": 0.4185563027858734, "grad_norm": 26.919042587280273, "learning_rate": 1.9276236251246653e-07, "logits/chosen": -0.5525974035263062, "logits/rejected": -0.5384413599967957, "logps/chosen": -398.10992431640625, "logps/ref_chosen": -53.765865325927734, "logps/ref_rejected": -89.28144836425781, "logps/rejected": -641.3753051757812, "loss": 1.1259, "margin_dpo/margin_mean": 207.74978637695312, "margin_dpo/margin_std": 281.2756042480469, "step": 421 }, { "epoch": 0.6196769456681351, "fcm_dpo/beta": 0.0016573498724028468, "fcm_dpo/delta": 0.01915598101913929, "fcm_dpo/margin": 230.2337646484375, "fcm_dpo/q_t": 0.41092464327812195, "grad_norm": 43.74554443359375, "learning_rate": 1.9151387954958792e-07, "logits/chosen": -0.5684947371482849, "logits/rejected": -0.5682277083396912, "logps/chosen": -455.68402099609375, "logps/ref_chosen": -68.6337661743164, "logps/ref_rejected": -87.86351013183594, "logps/rejected": -705.1475830078125, "loss": 1.1039, "margin_dpo/margin_mean": 230.2337646484375, "margin_dpo/margin_std": 312.58026123046875, "step": 422 }, { "epoch": 0.6211453744493393, "fcm_dpo/beta": 0.0016523964004591107, "fcm_dpo/delta": -0.024727419018745422, "fcm_dpo/margin": 256.37628173828125, "fcm_dpo/q_t": 0.4017714262008667, "grad_norm": 34.65106201171875, "learning_rate": 1.902669377503756e-07, "logits/chosen": -0.5626628398895264, "logits/rejected": -0.5683019757270813, "logps/chosen": -435.6455993652344, "logps/ref_chosen": -54.99030303955078, "logps/ref_rejected": -86.30654907226562, "logps/rejected": -723.338134765625, "loss": 1.0655, "margin_dpo/margin_mean": 256.3762512207031, "margin_dpo/margin_std": 303.867431640625, "step": 423 }, { "epoch": 0.6226138032305433, "fcm_dpo/beta": 0.0016435494180768728, "fcm_dpo/delta": -0.0081382617354393, "fcm_dpo/margin": 247.92364501953125, "fcm_dpo/q_t": 0.4074801802635193, "grad_norm": 43.08205032348633, "learning_rate": 1.890215699729057e-07, "logits/chosen": -0.5762988328933716, "logits/rejected": -0.5552129745483398, "logps/chosen": -404.3183898925781, "logps/ref_chosen": -56.01192092895508, "logps/ref_rejected": -66.47896575927734, "logps/rejected": -662.7091064453125, "loss": 1.0888, "margin_dpo/margin_mean": 247.92367553710938, "margin_dpo/margin_std": 331.8496398925781, "step": 424 }, { "epoch": 0.6240822320117474, "fcm_dpo/beta": 0.001672400627285242, "fcm_dpo/delta": 0.07351066917181015, "fcm_dpo/margin": 196.439697265625, "fcm_dpo/q_t": 0.42280128598213196, "grad_norm": 42.44888687133789, "learning_rate": 1.8777780903377732e-07, "logits/chosen": -0.5834954977035522, "logits/rejected": -0.5841509103775024, "logps/chosen": -441.07666015625, "logps/ref_chosen": -46.86899948120117, "logps/ref_rejected": -95.92545318603516, "logps/rejected": -686.5728149414062, "loss": 1.1481, "margin_dpo/margin_mean": 196.439697265625, "margin_dpo/margin_std": 305.77667236328125, "step": 425 }, { "epoch": 0.6255506607929515, "fcm_dpo/beta": 0.0016779915895313025, "fcm_dpo/delta": -0.012628093361854553, "fcm_dpo/margin": 245.40122985839844, "fcm_dpo/q_t": 0.4041150212287903, "grad_norm": 32.92577362060547, "learning_rate": 1.8653568770724803e-07, "logits/chosen": -0.5419399738311768, "logits/rejected": -0.5101866722106934, "logps/chosen": -436.7662353515625, "logps/ref_chosen": -76.58354187011719, "logps/ref_rejected": -81.26658630371094, "logps/rejected": -686.8504638671875, "loss": 1.0761, "margin_dpo/margin_mean": 245.4012451171875, "margin_dpo/margin_std": 292.03070068359375, "step": 426 }, { "epoch": 0.6270190895741556, "fcm_dpo/beta": 0.0016908218385651708, "fcm_dpo/delta": 0.09585842490196228, "fcm_dpo/margin": 181.6783447265625, "fcm_dpo/q_t": 0.4288369417190552, "grad_norm": 34.73727798461914, "learning_rate": 1.8529523872436977e-07, "logits/chosen": -0.5694754123687744, "logits/rejected": -0.5520645380020142, "logps/chosen": -403.98638916015625, "logps/ref_chosen": -64.8538818359375, "logps/ref_rejected": -78.5660171508789, "logps/rejected": -599.3768310546875, "loss": 1.1532, "margin_dpo/margin_mean": 181.6783447265625, "margin_dpo/margin_std": 272.23016357421875, "step": 427 }, { "epoch": 0.6284875183553598, "fcm_dpo/beta": 0.0016909840051084757, "fcm_dpo/delta": -0.015947699546813965, "fcm_dpo/margin": 245.3643341064453, "fcm_dpo/q_t": 0.40564045310020447, "grad_norm": 36.74341583251953, "learning_rate": 1.8405649477212697e-07, "logits/chosen": -0.5723360776901245, "logits/rejected": -0.5774627923965454, "logps/chosen": -486.622802734375, "logps/ref_chosen": -62.63666534423828, "logps/ref_rejected": -103.28181457519531, "logps/rejected": -772.63232421875, "loss": 1.1107, "margin_dpo/margin_mean": 245.36434936523438, "margin_dpo/margin_std": 380.26068115234375, "step": 428 }, { "epoch": 0.6299559471365639, "fcm_dpo/beta": 0.0017246659845113754, "fcm_dpo/delta": 0.06474070250988007, "fcm_dpo/margin": 194.86798095703125, "fcm_dpo/q_t": 0.4237772226333618, "grad_norm": 36.05411148071289, "learning_rate": 1.828194884925749e-07, "logits/chosen": -0.5850157737731934, "logits/rejected": -0.5665490627288818, "logps/chosen": -504.0472717285156, "logps/ref_chosen": -81.23401641845703, "logps/ref_rejected": -91.79493713378906, "logps/rejected": -709.4761962890625, "loss": 1.1639, "margin_dpo/margin_mean": 194.8679962158203, "margin_dpo/margin_std": 337.99810791015625, "step": 429 }, { "epoch": 0.631424375917768, "fcm_dpo/beta": 0.0017335449811071157, "fcm_dpo/delta": 0.04493825510144234, "fcm_dpo/margin": 205.6468505859375, "fcm_dpo/q_t": 0.4179939031600952, "grad_norm": 33.19789505004883, "learning_rate": 1.8158425248197928e-07, "logits/chosen": -0.5243804454803467, "logits/rejected": -0.5206818580627441, "logps/chosen": -411.6202392578125, "logps/ref_chosen": -60.920326232910156, "logps/ref_rejected": -104.42280578613281, "logps/rejected": -660.7695922851562, "loss": 1.1167, "margin_dpo/margin_mean": 205.64683532714844, "margin_dpo/margin_std": 281.1217041015625, "step": 430 }, { "epoch": 0.6328928046989721, "fcm_dpo/beta": 0.0017057711957022548, "fcm_dpo/delta": -0.10787712037563324, "fcm_dpo/margin": 294.5368957519531, "fcm_dpo/q_t": 0.3836921751499176, "grad_norm": 37.219364166259766, "learning_rate": 1.8035081928995788e-07, "logits/chosen": -0.5476272106170654, "logits/rejected": -0.5541262626647949, "logps/chosen": -408.8150634765625, "logps/ref_chosen": -57.34874725341797, "logps/ref_rejected": -92.84022521972656, "logps/rejected": -738.8434448242188, "loss": 1.0175, "margin_dpo/margin_mean": 294.5368957519531, "margin_dpo/margin_std": 320.72357177734375, "step": 431 }, { "epoch": 0.6343612334801763, "fcm_dpo/beta": 0.0016846886137500405, "fcm_dpo/delta": -0.09407821297645569, "fcm_dpo/margin": 290.38677978515625, "fcm_dpo/q_t": 0.3872066140174866, "grad_norm": 31.853593826293945, "learning_rate": 1.791192214186223e-07, "logits/chosen": -0.5290106534957886, "logits/rejected": -0.520604133605957, "logps/chosen": -424.4138488769531, "logps/ref_chosen": -71.07479095458984, "logps/ref_rejected": -98.57952880859375, "logps/rejected": -742.3053588867188, "loss": 1.0162, "margin_dpo/margin_mean": 290.38677978515625, "margin_dpo/margin_std": 291.4584045410156, "step": 432 }, { "epoch": 0.6358296622613803, "fcm_dpo/beta": 0.0016858684830367565, "fcm_dpo/delta": 0.07005324959754944, "fcm_dpo/margin": 196.98558044433594, "fcm_dpo/q_t": 0.4214964509010315, "grad_norm": 34.71784591674805, "learning_rate": 1.7788949132172193e-07, "logits/chosen": -0.5854760408401489, "logits/rejected": -0.5759547352790833, "logps/chosen": -507.0865173339844, "logps/ref_chosen": -58.273193359375, "logps/ref_rejected": -95.95089721679688, "logps/rejected": -741.7498168945312, "loss": 1.1623, "margin_dpo/margin_mean": 196.98558044433594, "margin_dpo/margin_std": 344.61151123046875, "step": 433 }, { "epoch": 0.6372980910425844, "fcm_dpo/beta": 0.0016826842911541462, "fcm_dpo/delta": -0.0009639374911785126, "fcm_dpo/margin": 238.1170654296875, "fcm_dpo/q_t": 0.4146474003791809, "grad_norm": 31.148998260498047, "learning_rate": 1.7666166140378853e-07, "logits/chosen": -0.6250817775726318, "logits/rejected": -0.6325300931930542, "logps/chosen": -461.8482971191406, "logps/ref_chosen": -61.97370147705078, "logps/ref_rejected": -78.49861145019531, "logps/rejected": -716.490234375, "loss": 1.1158, "margin_dpo/margin_mean": 238.1170654296875, "margin_dpo/margin_std": 388.4697265625, "step": 434 }, { "epoch": 0.6387665198237885, "fcm_dpo/beta": 0.0016721903812140226, "fcm_dpo/delta": -0.05436144396662712, "fcm_dpo/margin": 270.16436767578125, "fcm_dpo/q_t": 0.3959371745586395, "grad_norm": 27.92201805114746, "learning_rate": 1.7543576401928218e-07, "logits/chosen": -0.6887817978858948, "logits/rejected": -0.684075117111206, "logps/chosen": -414.2963562011719, "logps/ref_chosen": -51.502052307128906, "logps/ref_rejected": -87.56689453125, "logps/rejected": -720.5255737304688, "loss": 1.0586, "margin_dpo/margin_mean": 270.16436767578125, "margin_dpo/margin_std": 331.7671203613281, "step": 435 }, { "epoch": 0.6402349486049926, "fcm_dpo/beta": 0.0016701570712029934, "fcm_dpo/delta": 0.02181386575102806, "fcm_dpo/margin": 226.842529296875, "fcm_dpo/q_t": 0.4125426113605499, "grad_norm": 47.98572540283203, "learning_rate": 1.742118314717391e-07, "logits/chosen": -0.6400831341743469, "logits/rejected": -0.612436056137085, "logps/chosen": -450.94403076171875, "logps/ref_chosen": -71.40371704101562, "logps/ref_rejected": -82.72775268554688, "logps/rejected": -689.110595703125, "loss": 1.1044, "margin_dpo/margin_mean": 226.842529296875, "margin_dpo/margin_std": 305.198486328125, "step": 436 }, { "epoch": 0.6417033773861968, "fcm_dpo/beta": 0.0016808616928756237, "fcm_dpo/delta": 0.017694242298603058, "fcm_dpo/margin": 227.850830078125, "fcm_dpo/q_t": 0.41099801659584045, "grad_norm": 29.38176155090332, "learning_rate": 1.7298989601292036e-07, "logits/chosen": -0.652057945728302, "logits/rejected": -0.6310602426528931, "logps/chosen": -457.568359375, "logps/ref_chosen": -64.7442626953125, "logps/ref_rejected": -82.04356384277344, "logps/rejected": -702.7184448242188, "loss": 1.1034, "margin_dpo/margin_mean": 227.85081481933594, "margin_dpo/margin_std": 309.5552978515625, "step": 437 }, { "epoch": 0.6431718061674009, "fcm_dpo/beta": 0.0016721580177545547, "fcm_dpo/delta": -0.017781764268875122, "fcm_dpo/margin": 249.15879821777344, "fcm_dpo/q_t": 0.4034152030944824, "grad_norm": 37.04447555541992, "learning_rate": 1.7176998984196144e-07, "logits/chosen": -0.6542237997055054, "logits/rejected": -0.6281710863113403, "logps/chosen": -465.63787841796875, "logps/ref_chosen": -59.0186653137207, "logps/ref_rejected": -83.07682800292969, "logps/rejected": -738.8548583984375, "loss": 1.0805, "margin_dpo/margin_mean": 249.1588134765625, "margin_dpo/margin_std": 318.4876403808594, "step": 438 }, { "epoch": 0.644640234948605, "fcm_dpo/beta": 0.0016700313426554203, "fcm_dpo/delta": 0.03846631944179535, "fcm_dpo/margin": 216.2918701171875, "fcm_dpo/q_t": 0.41903701424598694, "grad_norm": 34.50837326049805, "learning_rate": 1.7055214510452458e-07, "logits/chosen": -0.6358094811439514, "logits/rejected": -0.6351268291473389, "logps/chosen": -459.71185302734375, "logps/ref_chosen": -53.78407669067383, "logps/ref_rejected": -83.98545837402344, "logps/rejected": -706.2051391601562, "loss": 1.141, "margin_dpo/margin_mean": 216.2918701171875, "margin_dpo/margin_std": 336.371337890625, "step": 439 }, { "epoch": 0.6461086637298091, "fcm_dpo/beta": 0.0016852959524840117, "fcm_dpo/delta": -0.038592737168073654, "fcm_dpo/margin": 259.22357177734375, "fcm_dpo/q_t": 0.4044986069202423, "grad_norm": 55.87424850463867, "learning_rate": 1.6933639389195134e-07, "logits/chosen": -0.6913414001464844, "logits/rejected": -0.6863530874252319, "logps/chosen": -513.6109008789062, "logps/ref_chosen": -78.56671905517578, "logps/ref_rejected": -96.49775695800781, "logps/rejected": -790.7655029296875, "loss": 1.0805, "margin_dpo/margin_mean": 259.22357177734375, "margin_dpo/margin_std": 368.9686279296875, "step": 440 }, { "epoch": 0.6475770925110133, "fcm_dpo/beta": 0.0016829633386805654, "fcm_dpo/delta": -0.005735956132411957, "fcm_dpo/margin": 240.76620483398438, "fcm_dpo/q_t": 0.41345399618148804, "grad_norm": 47.60608673095703, "learning_rate": 1.681227682404166e-07, "logits/chosen": -0.6709833145141602, "logits/rejected": -0.6541421413421631, "logps/chosen": -547.8490600585938, "logps/ref_chosen": -60.824440002441406, "logps/ref_rejected": -96.47080993652344, "logps/rejected": -824.2615966796875, "loss": 1.144, "margin_dpo/margin_mean": 240.76620483398438, "margin_dpo/margin_std": 431.6340637207031, "step": 441 }, { "epoch": 0.6490455212922174, "fcm_dpo/beta": 0.0016690012998878956, "fcm_dpo/delta": -0.07323877513408661, "fcm_dpo/margin": 281.21954345703125, "fcm_dpo/q_t": 0.3959912061691284, "grad_norm": 31.50290298461914, "learning_rate": 1.669113001300851e-07, "logits/chosen": -0.6310905814170837, "logits/rejected": -0.6183385848999023, "logps/chosen": -455.05615234375, "logps/ref_chosen": -47.01121520996094, "logps/ref_rejected": -76.53926086425781, "logps/rejected": -765.8037719726562, "loss": 1.0605, "margin_dpo/margin_mean": 281.2195739746094, "margin_dpo/margin_std": 366.8897399902344, "step": 442 }, { "epoch": 0.6505139500734214, "fcm_dpo/beta": 0.0016703938599675894, "fcm_dpo/delta": 0.11156397312879562, "fcm_dpo/margin": 174.64288330078125, "fcm_dpo/q_t": 0.43598759174346924, "grad_norm": 41.197444915771484, "learning_rate": 1.6570202148426815e-07, "logits/chosen": -0.6210609674453735, "logits/rejected": -0.5954668521881104, "logps/chosen": -524.9894409179688, "logps/ref_chosen": -71.27301788330078, "logps/ref_rejected": -86.679931640625, "logps/rejected": -715.0391845703125, "loss": 1.2238, "margin_dpo/margin_mean": 174.6428680419922, "margin_dpo/margin_std": 401.8946533203125, "step": 443 }, { "epoch": 0.6519823788546255, "fcm_dpo/beta": 0.0016623300034552813, "fcm_dpo/delta": -0.08805151283740997, "fcm_dpo/margin": 290.83160400390625, "fcm_dpo/q_t": 0.39226892590522766, "grad_norm": 31.508068084716797, "learning_rate": 1.6449496416858282e-07, "logits/chosen": -0.5918526649475098, "logits/rejected": -0.5983752012252808, "logps/chosen": -496.802978515625, "logps/ref_chosen": -57.213706970214844, "logps/ref_rejected": -97.25489807128906, "logps/rejected": -827.67578125, "loss": 1.0517, "margin_dpo/margin_mean": 290.83160400390625, "margin_dpo/margin_std": 387.1565246582031, "step": 444 }, { "epoch": 0.6534508076358296, "fcm_dpo/beta": 0.0016500870697200298, "fcm_dpo/delta": -0.04307527467608452, "fcm_dpo/margin": 267.3776550292969, "fcm_dpo/q_t": 0.3995034098625183, "grad_norm": 31.755836486816406, "learning_rate": 1.6329015999011182e-07, "logits/chosen": -0.6003238558769226, "logits/rejected": -0.5861713290214539, "logps/chosen": -438.2301025390625, "logps/ref_chosen": -67.29979705810547, "logps/ref_rejected": -92.68267059326172, "logps/rejected": -730.9906005859375, "loss": 1.0641, "margin_dpo/margin_mean": 267.3776550292969, "margin_dpo/margin_std": 331.8627014160156, "step": 445 }, { "epoch": 0.6549192364170338, "fcm_dpo/beta": 0.0016383725451305509, "fcm_dpo/delta": -0.07028612494468689, "fcm_dpo/margin": 284.8059997558594, "fcm_dpo/q_t": 0.39093977212905884, "grad_norm": 36.69627380371094, "learning_rate": 1.6208764069656578e-07, "logits/chosen": -0.581207811832428, "logits/rejected": -0.5915842056274414, "logps/chosen": -395.5775146484375, "logps/ref_chosen": -59.098487854003906, "logps/ref_rejected": -101.26419067382812, "logps/rejected": -722.5491943359375, "loss": 1.032, "margin_dpo/margin_mean": 284.80596923828125, "margin_dpo/margin_std": 294.6192626953125, "step": 446 }, { "epoch": 0.6563876651982379, "fcm_dpo/beta": 0.0015993504785001278, "fcm_dpo/delta": -0.07478490471839905, "fcm_dpo/margin": 294.4344482421875, "fcm_dpo/q_t": 0.3939029276371002, "grad_norm": 34.44708251953125, "learning_rate": 1.608874379754465e-07, "logits/chosen": -0.6691935658454895, "logits/rejected": -0.6827399730682373, "logps/chosen": -412.94537353515625, "logps/ref_chosen": -56.07533264160156, "logps/ref_rejected": -98.69475555419922, "logps/rejected": -749.999267578125, "loss": 1.045, "margin_dpo/margin_mean": 294.4344482421875, "margin_dpo/margin_std": 367.2359924316406, "step": 447 }, { "epoch": 0.657856093979442, "fcm_dpo/beta": 0.0015895452816039324, "fcm_dpo/delta": -0.05442797392606735, "fcm_dpo/margin": 284.32830810546875, "fcm_dpo/q_t": 0.3961128890514374, "grad_norm": 47.770294189453125, "learning_rate": 1.5968958345321177e-07, "logits/chosen": -0.553863525390625, "logits/rejected": -0.5608095526695251, "logps/chosen": -462.1229248046875, "logps/ref_chosen": -60.00384521484375, "logps/ref_rejected": -102.26465606689453, "logps/rejected": -788.7120361328125, "loss": 1.0526, "margin_dpo/margin_mean": 284.32830810546875, "margin_dpo/margin_std": 340.556884765625, "step": 448 }, { "epoch": 0.6593245227606461, "fcm_dpo/beta": 0.0015652881702408195, "fcm_dpo/delta": -0.054406676441431046, "fcm_dpo/margin": 288.65020751953125, "fcm_dpo/q_t": 0.39999139308929443, "grad_norm": 29.779434204101562, "learning_rate": 1.584941086944423e-07, "logits/chosen": -0.5949973464012146, "logits/rejected": -0.5889606475830078, "logps/chosen": -468.46368408203125, "logps/ref_chosen": -67.52661895751953, "logps/ref_rejected": -88.59690856933594, "logps/rejected": -778.1842041015625, "loss": 1.0777, "margin_dpo/margin_mean": 288.65020751953125, "margin_dpo/margin_std": 417.9888610839844, "step": 449 }, { "epoch": 0.6607929515418502, "fcm_dpo/beta": 0.0015472873346880078, "fcm_dpo/delta": -0.08019885420799255, "fcm_dpo/margin": 307.92584228515625, "fcm_dpo/q_t": 0.38820013403892517, "grad_norm": 45.0981559753418, "learning_rate": 1.573010452010098e-07, "logits/chosen": -0.6336866617202759, "logits/rejected": -0.6424990892410278, "logps/chosen": -382.67413330078125, "logps/ref_chosen": -57.10811996459961, "logps/ref_rejected": -102.75494384765625, "logps/rejected": -736.246826171875, "loss": 1.0184, "margin_dpo/margin_mean": 307.92584228515625, "margin_dpo/margin_std": 307.9268798828125, "step": 450 }, { "epoch": 0.6622613803230544, "fcm_dpo/beta": 0.0015509811928495765, "fcm_dpo/delta": 0.039534684270620346, "fcm_dpo/margin": 233.21817016601562, "fcm_dpo/q_t": 0.41526713967323303, "grad_norm": 40.15821838378906, "learning_rate": 1.5611042441124687e-07, "logits/chosen": -0.6718031167984009, "logits/rejected": -0.6435151100158691, "logps/chosen": -512.0057983398438, "logps/ref_chosen": -58.46883010864258, "logps/ref_rejected": -72.92941284179688, "logps/rejected": -759.6845703125, "loss": 1.1567, "margin_dpo/margin_mean": 233.21817016601562, "margin_dpo/margin_std": 423.0076904296875, "step": 451 }, { "epoch": 0.6637298091042585, "fcm_dpo/beta": 0.0015399182448163629, "fcm_dpo/delta": -0.035000670701265335, "fcm_dpo/margin": 281.4200439453125, "fcm_dpo/q_t": 0.3974810838699341, "grad_norm": 27.29172706604004, "learning_rate": 1.549222776991186e-07, "logits/chosen": -0.6307343244552612, "logits/rejected": -0.6474366188049316, "logps/chosen": -360.15216064453125, "logps/ref_chosen": -50.39055252075195, "logps/ref_rejected": -97.77142333984375, "logps/rejected": -688.953125, "loss": 1.0431, "margin_dpo/margin_mean": 281.4200439453125, "margin_dpo/margin_std": 278.75592041015625, "step": 452 }, { "epoch": 0.6651982378854625, "fcm_dpo/beta": 0.0015358540695160627, "fcm_dpo/delta": -0.0009381119161844254, "fcm_dpo/margin": 260.94769287109375, "fcm_dpo/q_t": 0.40897369384765625, "grad_norm": 32.27933120727539, "learning_rate": 1.5373663637339584e-07, "logits/chosen": -0.6390811800956726, "logits/rejected": -0.622653603553772, "logps/chosen": -432.13372802734375, "logps/ref_chosen": -57.71485137939453, "logps/ref_rejected": -82.20741271972656, "logps/rejected": -717.573974609375, "loss": 1.0895, "margin_dpo/margin_mean": 260.94769287109375, "margin_dpo/margin_std": 345.93994140625, "step": 453 }, { "epoch": 0.6666666666666666, "fcm_dpo/beta": 0.0015241008950397372, "fcm_dpo/delta": -0.054964885115623474, "fcm_dpo/margin": 296.76806640625, "fcm_dpo/q_t": 0.3962477445602417, "grad_norm": 28.71084213256836, "learning_rate": 1.5255353167683017e-07, "logits/chosen": -0.7012594938278198, "logits/rejected": -0.6893137693405151, "logps/chosen": -514.2677612304688, "logps/ref_chosen": -60.945648193359375, "logps/ref_rejected": -84.95079040527344, "logps/rejected": -835.0409545898438, "loss": 1.0532, "margin_dpo/margin_mean": 296.76806640625, "margin_dpo/margin_std": 363.46282958984375, "step": 454 }, { "epoch": 0.6681350954478708, "fcm_dpo/beta": 0.0015060155419632792, "fcm_dpo/delta": -0.11781027913093567, "fcm_dpo/margin": 339.77191162109375, "fcm_dpo/q_t": 0.3838193118572235, "grad_norm": 38.13735580444336, "learning_rate": 1.5137299478533064e-07, "logits/chosen": -0.6432729363441467, "logits/rejected": -0.6612046957015991, "logps/chosen": -434.3492431640625, "logps/ref_chosen": -44.88671112060547, "logps/ref_rejected": -115.30147552490234, "logps/rejected": -844.535888671875, "loss": 1.0173, "margin_dpo/margin_mean": 339.7718811035156, "margin_dpo/margin_std": 379.62335205078125, "step": 455 }, { "epoch": 0.6696035242290749, "fcm_dpo/beta": 0.001467827707529068, "fcm_dpo/delta": -0.09735056757926941, "fcm_dpo/margin": 335.6260986328125, "fcm_dpo/q_t": 0.3862442374229431, "grad_norm": 28.999698638916016, "learning_rate": 1.5019505680714232e-07, "logits/chosen": -0.6519438624382019, "logits/rejected": -0.6727065443992615, "logps/chosen": -465.74395751953125, "logps/ref_chosen": -57.036781311035156, "logps/ref_rejected": -105.21784210205078, "logps/rejected": -849.5510864257812, "loss": 1.0071, "margin_dpo/margin_mean": 335.6260986328125, "margin_dpo/margin_std": 334.113525390625, "step": 456 }, { "epoch": 0.671071953010279, "fcm_dpo/beta": 0.0014373862650245428, "fcm_dpo/delta": -0.0841878205537796, "fcm_dpo/margin": 333.8603820800781, "fcm_dpo/q_t": 0.3876863121986389, "grad_norm": 37.97410202026367, "learning_rate": 1.4901974878202627e-07, "logits/chosen": -0.6944575905799866, "logits/rejected": -0.6935386061668396, "logps/chosen": -451.5135498046875, "logps/ref_chosen": -54.24253845214844, "logps/ref_rejected": -85.10956573486328, "logps/rejected": -816.240966796875, "loss": 1.0151, "margin_dpo/margin_mean": 333.8603515625, "margin_dpo/margin_std": 328.8755187988281, "step": 457 }, { "epoch": 0.6725403817914831, "fcm_dpo/beta": 0.0014226180501282215, "fcm_dpo/delta": -0.02253449335694313, "fcm_dpo/margin": 295.9541015625, "fcm_dpo/q_t": 0.40318185091018677, "grad_norm": 29.526060104370117, "learning_rate": 1.4784710168044212e-07, "logits/chosen": -0.6689471006393433, "logits/rejected": -0.6608414649963379, "logps/chosen": -469.71185302734375, "logps/ref_chosen": -55.40888214111328, "logps/ref_rejected": -97.68325805664062, "logps/rejected": -807.9403076171875, "loss": 1.0677, "margin_dpo/margin_mean": 295.9541015625, "margin_dpo/margin_std": 346.7279968261719, "step": 458 }, { "epoch": 0.6740088105726872, "fcm_dpo/beta": 0.0014095264486968517, "fcm_dpo/delta": -0.06269839406013489, "fcm_dpo/margin": 325.8946533203125, "fcm_dpo/q_t": 0.3947668671607971, "grad_norm": 31.499923706054688, "learning_rate": 1.466771464027316e-07, "logits/chosen": -0.6561689376831055, "logits/rejected": -0.6712781190872192, "logps/chosen": -505.89019775390625, "logps/ref_chosen": -46.55748748779297, "logps/ref_rejected": -86.16854095458984, "logps/rejected": -871.3959350585938, "loss": 1.0538, "margin_dpo/margin_mean": 325.8946533203125, "margin_dpo/margin_std": 400.7102355957031, "step": 459 }, { "epoch": 0.6754772393538914, "fcm_dpo/beta": 0.0013926841784268618, "fcm_dpo/delta": -0.09006337821483612, "fcm_dpo/margin": 348.79052734375, "fcm_dpo/q_t": 0.3885030150413513, "grad_norm": 40.160850524902344, "learning_rate": 1.4550991377830423e-07, "logits/chosen": -0.679194450378418, "logits/rejected": -0.7077926993370056, "logps/chosen": -545.5593872070312, "logps/ref_chosen": -51.63489532470703, "logps/ref_rejected": -104.11935424804688, "logps/rejected": -946.8343505859375, "loss": 1.0237, "margin_dpo/margin_mean": 348.79052734375, "margin_dpo/margin_std": 384.35693359375, "step": 460 }, { "epoch": 0.6769456681350955, "fcm_dpo/beta": 0.0013905889354646206, "fcm_dpo/delta": 0.008415699005126953, "fcm_dpo/margin": 281.6998291015625, "fcm_dpo/q_t": 0.4110637307167053, "grad_norm": 28.433305740356445, "learning_rate": 1.4434543456482518e-07, "logits/chosen": -0.7387268543243408, "logits/rejected": -0.7501698732376099, "logps/chosen": -583.958984375, "logps/ref_chosen": -55.18195724487305, "logps/ref_rejected": -86.47689819335938, "logps/rejected": -896.9537353515625, "loss": 1.0996, "margin_dpo/margin_mean": 281.6997985839844, "margin_dpo/margin_std": 387.92791748046875, "step": 461 }, { "epoch": 0.6784140969162996, "fcm_dpo/beta": 0.0014029676094651222, "fcm_dpo/delta": 0.08377067744731903, "fcm_dpo/margin": 227.35406494140625, "fcm_dpo/q_t": 0.4282529056072235, "grad_norm": 49.88774108886719, "learning_rate": 1.4318373944740484e-07, "logits/chosen": -0.8244759440422058, "logits/rejected": -0.8147263526916504, "logps/chosen": -598.8399658203125, "logps/ref_chosen": -69.92803192138672, "logps/ref_rejected": -78.84111022949219, "logps/rejected": -835.1070556640625, "loss": 1.1609, "margin_dpo/margin_mean": 227.35406494140625, "margin_dpo/margin_std": 385.42572021484375, "step": 462 }, { "epoch": 0.6798825256975036, "fcm_dpo/beta": 0.0014092556666582823, "fcm_dpo/delta": -0.037594083696603775, "fcm_dpo/margin": 309.1751708984375, "fcm_dpo/q_t": 0.40178006887435913, "grad_norm": 37.86516189575195, "learning_rate": 1.4202485903778976e-07, "logits/chosen": -0.8040248155593872, "logits/rejected": -0.8105688095092773, "logps/chosen": -594.924072265625, "logps/ref_chosen": -55.27437210083008, "logps/ref_rejected": -89.02497863769531, "logps/rejected": -937.849853515625, "loss": 1.0796, "margin_dpo/margin_mean": 309.1751708984375, "margin_dpo/margin_std": 419.88360595703125, "step": 463 }, { "epoch": 0.6813509544787077, "fcm_dpo/beta": 0.0013409138191491365, "fcm_dpo/delta": -0.24509315192699432, "fcm_dpo/margin": 467.78326416015625, "fcm_dpo/q_t": 0.35747382044792175, "grad_norm": 48.06254959106445, "learning_rate": 1.4086882387355658e-07, "logits/chosen": -0.7823467254638672, "logits/rejected": -0.8419663906097412, "logps/chosen": -594.6781005859375, "logps/ref_chosen": -50.91230010986328, "logps/ref_rejected": -102.4893798828125, "logps/rejected": -1114.03857421875, "loss": 0.9356, "margin_dpo/margin_mean": 467.78326416015625, "margin_dpo/margin_std": 453.023681640625, "step": 464 }, { "epoch": 0.6828193832599119, "fcm_dpo/beta": 0.0013181334361433983, "fcm_dpo/delta": -0.09859161078929901, "fcm_dpo/margin": 374.6343994140625, "fcm_dpo/q_t": 0.3844667673110962, "grad_norm": 37.97553634643555, "learning_rate": 1.3971566441730714e-07, "logits/chosen": -0.78519207239151, "logits/rejected": -0.8018290400505066, "logps/chosen": -611.6665649414062, "logps/ref_chosen": -60.116851806640625, "logps/ref_rejected": -113.94602966308594, "logps/rejected": -1040.130126953125, "loss": 1.0469, "margin_dpo/margin_mean": 374.6343994140625, "margin_dpo/margin_std": 480.17120361328125, "step": 465 }, { "epoch": 0.684287812041116, "fcm_dpo/beta": 0.0012953735422343016, "fcm_dpo/delta": -0.02665306255221367, "fcm_dpo/margin": 328.0135803222656, "fcm_dpo/q_t": 0.40215975046157837, "grad_norm": 31.085790634155273, "learning_rate": 1.3856541105586545e-07, "logits/chosen": -0.822087287902832, "logits/rejected": -0.8236336708068848, "logps/chosen": -642.7122802734375, "logps/ref_chosen": -52.920921325683594, "logps/ref_rejected": -90.3154296875, "logps/rejected": -1008.1204223632812, "loss": 1.0834, "margin_dpo/margin_mean": 328.01361083984375, "margin_dpo/margin_std": 438.4411926269531, "step": 466 }, { "epoch": 0.6857562408223201, "fcm_dpo/beta": 0.001278050011023879, "fcm_dpo/delta": -0.055538684129714966, "fcm_dpo/margin": 352.9036865234375, "fcm_dpo/q_t": 0.40140700340270996, "grad_norm": 44.78834915161133, "learning_rate": 1.3741809409947729e-07, "logits/chosen": -0.8997122049331665, "logits/rejected": -0.8935316801071167, "logps/chosen": -827.7107543945312, "logps/ref_chosen": -78.7158203125, "logps/ref_rejected": -102.86019897460938, "logps/rejected": -1204.7587890625, "loss": 1.1325, "margin_dpo/margin_mean": 352.9036865234375, "margin_dpo/margin_std": 619.0111083984375, "step": 467 }, { "epoch": 0.6872246696035242, "fcm_dpo/beta": 0.0012572079431265593, "fcm_dpo/delta": -0.14669275283813477, "fcm_dpo/margin": 428.48065185546875, "fcm_dpo/q_t": 0.3811851441860199, "grad_norm": 51.817596435546875, "learning_rate": 1.362737437810114e-07, "logits/chosen": -0.8791412115097046, "logits/rejected": -0.8881509304046631, "logps/chosen": -655.0909423828125, "logps/ref_chosen": -69.93536376953125, "logps/ref_rejected": -101.02880859375, "logps/rejected": -1114.6650390625, "loss": 1.0157, "margin_dpo/margin_mean": 428.48065185546875, "margin_dpo/margin_std": 536.8350830078125, "step": 468 }, { "epoch": 0.6886930983847284, "fcm_dpo/beta": 0.0012133971322327852, "fcm_dpo/delta": -0.1514207422733307, "fcm_dpo/margin": 446.37615966796875, "fcm_dpo/q_t": 0.3775207996368408, "grad_norm": 46.171669006347656, "learning_rate": 1.351323902551631e-07, "logits/chosen": -0.8811942934989929, "logits/rejected": -0.8931454420089722, "logps/chosen": -704.7970581054688, "logps/ref_chosen": -68.12469482421875, "logps/ref_rejected": -104.78640747070312, "logps/rejected": -1187.8349609375, "loss": 1.0025, "margin_dpo/margin_mean": 446.37615966796875, "margin_dpo/margin_std": 505.142822265625, "step": 469 }, { "epoch": 0.6901615271659325, "fcm_dpo/beta": 0.0012034507235512137, "fcm_dpo/delta": -0.06455770879983902, "fcm_dpo/margin": 383.5496520996094, "fcm_dpo/q_t": 0.3943213224411011, "grad_norm": 33.37154006958008, "learning_rate": 1.339940635976592e-07, "logits/chosen": -0.8440713882446289, "logits/rejected": -0.8514707088470459, "logps/chosen": -594.4234008789062, "logps/ref_chosen": -43.791927337646484, "logps/ref_rejected": -82.70285034179688, "logps/rejected": -1016.8839721679688, "loss": 1.0655, "margin_dpo/margin_mean": 383.5496520996094, "margin_dpo/margin_std": 511.9697570800781, "step": 470 }, { "epoch": 0.6916299559471366, "fcm_dpo/beta": 0.0011930849868804216, "fcm_dpo/delta": 0.018563054502010345, "fcm_dpo/margin": 319.95709228515625, "fcm_dpo/q_t": 0.4155677855014801, "grad_norm": 46.75631332397461, "learning_rate": 1.3285879380446563e-07, "logits/chosen": -0.910805344581604, "logits/rejected": -0.9144266843795776, "logps/chosen": -742.8097534179688, "logps/ref_chosen": -63.33952331542969, "logps/ref_rejected": -83.61048126220703, "logps/rejected": -1083.037841796875, "loss": 1.1267, "margin_dpo/margin_mean": 319.95709228515625, "margin_dpo/margin_std": 512.3135375976562, "step": 471 }, { "epoch": 0.6930983847283406, "fcm_dpo/beta": 0.0011829681461676955, "fcm_dpo/delta": -0.06849831342697144, "fcm_dpo/margin": 392.803466796875, "fcm_dpo/q_t": 0.40142130851745605, "grad_norm": 37.54646301269531, "learning_rate": 1.317266107909975e-07, "logits/chosen": -0.9019899964332581, "logits/rejected": -0.8787474036216736, "logps/chosen": -746.1549682617188, "logps/ref_chosen": -83.66610717773438, "logps/ref_rejected": -117.20919799804688, "logps/rejected": -1172.50146484375, "loss": 1.0996, "margin_dpo/margin_mean": 392.803466796875, "margin_dpo/margin_std": 627.5294189453125, "step": 472 }, { "epoch": 0.6945668135095447, "fcm_dpo/beta": 0.001211212482303381, "fcm_dpo/delta": 0.17383155226707458, "fcm_dpo/margin": 190.3201904296875, "fcm_dpo/q_t": 0.4516882002353668, "grad_norm": 120.86524200439453, "learning_rate": 1.3059754439133002e-07, "logits/chosen": -0.9114212989807129, "logits/rejected": -0.8835663795471191, "logps/chosen": -869.204833984375, "logps/ref_chosen": -63.49696731567383, "logps/ref_rejected": -81.14657592773438, "logps/rejected": -1077.174560546875, "loss": 1.3499, "margin_dpo/margin_mean": 190.32020568847656, "margin_dpo/margin_std": 765.7159423828125, "step": 473 }, { "epoch": 0.6960352422907489, "fcm_dpo/beta": 0.0012354427017271519, "fcm_dpo/delta": -0.011602118611335754, "fcm_dpo/margin": 331.4249572753906, "fcm_dpo/q_t": 0.40766799449920654, "grad_norm": 39.35799789428711, "learning_rate": 1.2947162435741277e-07, "logits/chosen": -0.8279140591621399, "logits/rejected": -0.8301948308944702, "logps/chosen": -675.7288818359375, "logps/ref_chosen": -52.6119384765625, "logps/ref_rejected": -90.08041381835938, "logps/rejected": -1044.622314453125, "loss": 1.1359, "margin_dpo/margin_mean": 331.42498779296875, "margin_dpo/margin_std": 559.75341796875, "step": 474 }, { "epoch": 0.697503671071953, "fcm_dpo/beta": 0.0012009632773697376, "fcm_dpo/delta": -0.10904163122177124, "fcm_dpo/margin": 419.064697265625, "fcm_dpo/q_t": 0.3838295340538025, "grad_norm": 59.33665466308594, "learning_rate": 1.2834888035828596e-07, "logits/chosen": -0.8741481304168701, "logits/rejected": -0.8975253701210022, "logps/chosen": -498.5884094238281, "logps/ref_chosen": -42.49519348144531, "logps/ref_rejected": -90.06294250488281, "logps/rejected": -965.2208251953125, "loss": 1.0072, "margin_dpo/margin_mean": 419.064697265625, "margin_dpo/margin_std": 434.1658935546875, "step": 475 }, { "epoch": 0.6989720998531571, "fcm_dpo/beta": 0.0011981537099927664, "fcm_dpo/delta": 0.014648713171482086, "fcm_dpo/margin": 322.09075927734375, "fcm_dpo/q_t": 0.41181665658950806, "grad_norm": 54.55389404296875, "learning_rate": 1.2722934197929802e-07, "logits/chosen": -0.8839001655578613, "logits/rejected": -0.8944188356399536, "logps/chosen": -600.6002807617188, "logps/ref_chosen": -42.94938278198242, "logps/ref_rejected": -73.71023559570312, "logps/rejected": -953.451904296875, "loss": 1.0989, "margin_dpo/margin_mean": 322.09075927734375, "margin_dpo/margin_std": 432.81268310546875, "step": 476 }, { "epoch": 0.7004405286343612, "fcm_dpo/beta": 0.0012043662136420608, "fcm_dpo/delta": 0.001615665853023529, "fcm_dpo/margin": 330.6528015136719, "fcm_dpo/q_t": 0.4092486798763275, "grad_norm": 37.25672149658203, "learning_rate": 1.2611303872132631e-07, "logits/chosen": -0.8755106925964355, "logits/rejected": -0.8339799642562866, "logps/chosen": -640.450439453125, "logps/ref_chosen": -70.77261352539062, "logps/ref_rejected": -76.13737487792969, "logps/rejected": -976.4679565429688, "loss": 1.123, "margin_dpo/margin_mean": 330.65277099609375, "margin_dpo/margin_std": 534.1469116210938, "step": 477 }, { "epoch": 0.7019089574155654, "fcm_dpo/beta": 0.0011925864964723587, "fcm_dpo/delta": -0.06166646629571915, "fcm_dpo/margin": 384.75927734375, "fcm_dpo/q_t": 0.3951849341392517, "grad_norm": 45.197479248046875, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -0.777691662311554, "logits/rejected": -0.7990972995758057, "logps/chosen": -495.652587890625, "logps/ref_chosen": -41.440513610839844, "logps/ref_rejected": -85.36196899414062, "logps/rejected": -924.3333129882812, "loss": 1.0533, "margin_dpo/margin_mean": 384.75927734375, "margin_dpo/margin_std": 474.02569580078125, "step": 478 }, { "epoch": 0.7033773861967695, "fcm_dpo/beta": 0.001189418020658195, "fcm_dpo/delta": -0.02111241966485977, "fcm_dpo/margin": 352.7756042480469, "fcm_dpo/q_t": 0.40721356868743896, "grad_norm": 32.11994171142578, "learning_rate": 1.2389025514492456e-07, "logits/chosen": -0.8205797076225281, "logits/rejected": -0.8455021381378174, "logps/chosen": -669.05615234375, "logps/ref_chosen": -53.907920837402344, "logps/ref_rejected": -95.1163330078125, "logps/rejected": -1063.040283203125, "loss": 1.1155, "margin_dpo/margin_mean": 352.7756042480469, "margin_dpo/margin_std": 573.1203002929688, "step": 479 }, { "epoch": 0.7048458149779736, "fcm_dpo/beta": 0.0012020855210721493, "fcm_dpo/delta": 0.05400090664625168, "fcm_dpo/margin": 287.3784484863281, "fcm_dpo/q_t": 0.42420148849487305, "grad_norm": 66.96390533447266, "learning_rate": 1.227838333989088e-07, "logits/chosen": -0.8848108053207397, "logits/rejected": -0.8782625198364258, "logps/chosen": -796.4169921875, "logps/ref_chosen": -58.682701110839844, "logps/ref_rejected": -82.93248748779297, "logps/rejected": -1108.045166015625, "loss": 1.1735, "margin_dpo/margin_mean": 287.3784484863281, "margin_dpo/margin_std": 522.4732055664062, "step": 480 }, { "epoch": 0.7063142437591777, "fcm_dpo/beta": 0.0011700207833200693, "fcm_dpo/delta": -0.1395144760608673, "fcm_dpo/margin": 454.84710693359375, "fcm_dpo/q_t": 0.3807649314403534, "grad_norm": 38.5280647277832, "learning_rate": 1.2168076391719489e-07, "logits/chosen": -0.9153322577476501, "logits/rejected": -0.9426373243331909, "logps/chosen": -667.7345581054688, "logps/ref_chosen": -54.964271545410156, "logps/ref_rejected": -92.42044067382812, "logps/rejected": -1160.037841796875, "loss": 1.0112, "margin_dpo/margin_mean": 454.84710693359375, "margin_dpo/margin_std": 539.13134765625, "step": 481 }, { "epoch": 0.7077826725403817, "fcm_dpo/beta": 0.00119025819003582, "fcm_dpo/delta": 0.16345669329166412, "fcm_dpo/margin": 201.93572998046875, "fcm_dpo/q_t": 0.4430094063282013, "grad_norm": 72.33926391601562, "learning_rate": 1.2058107576668938e-07, "logits/chosen": -0.8522086143493652, "logits/rejected": -0.8430407643318176, "logps/chosen": -748.6646728515625, "logps/ref_chosen": -67.553466796875, "logps/ref_rejected": -87.58953857421875, "logps/rejected": -970.6365356445312, "loss": 1.2679, "margin_dpo/margin_mean": 201.93572998046875, "margin_dpo/margin_std": 574.7503051757812, "step": 482 }, { "epoch": 0.7092511013215859, "fcm_dpo/beta": 0.0011707013472914696, "fcm_dpo/delta": -0.15054769814014435, "fcm_dpo/margin": 463.0313415527344, "fcm_dpo/q_t": 0.3800230026245117, "grad_norm": 44.42465591430664, "learning_rate": 1.194847979251979e-07, "logits/chosen": -0.9137969017028809, "logits/rejected": -0.9261190891265869, "logps/chosen": -685.5653076171875, "logps/ref_chosen": -63.32981872558594, "logps/ref_rejected": -95.78697204589844, "logps/rejected": -1181.0537109375, "loss": 1.0094, "margin_dpo/margin_mean": 463.0313415527344, "margin_dpo/margin_std": 563.2684936523438, "step": 483 }, { "epoch": 0.71071953010279, "fcm_dpo/beta": 0.001149929827079177, "fcm_dpo/delta": -0.10865214467048645, "fcm_dpo/margin": 437.65692138671875, "fcm_dpo/q_t": 0.38679593801498413, "grad_norm": 52.580448150634766, "learning_rate": 1.1839195928066101e-07, "logits/chosen": -0.9185287952423096, "logits/rejected": -0.9445685148239136, "logps/chosen": -590.5552368164062, "logps/ref_chosen": -59.13812255859375, "logps/ref_rejected": -84.37144470214844, "logps/rejected": -1053.445556640625, "loss": 1.0252, "margin_dpo/margin_mean": 437.65692138671875, "margin_dpo/margin_std": 514.391357421875, "step": 484 }, { "epoch": 0.7121879588839941, "fcm_dpo/beta": 0.0011327798711135983, "fcm_dpo/delta": -0.053013693541288376, "fcm_dpo/margin": 397.80157470703125, "fcm_dpo/q_t": 0.399851530790329, "grad_norm": 38.13155746459961, "learning_rate": 1.1730258863039347e-07, "logits/chosen": -0.8724352717399597, "logits/rejected": -0.8907235860824585, "logps/chosen": -631.8712158203125, "logps/ref_chosen": -58.849571228027344, "logps/ref_rejected": -103.36408233642578, "logps/rejected": -1074.187255859375, "loss": 1.0791, "margin_dpo/margin_mean": 397.80157470703125, "margin_dpo/margin_std": 566.2918701171875, "step": 485 }, { "epoch": 0.7136563876651982, "fcm_dpo/beta": 0.0011056348448619246, "fcm_dpo/delta": -0.11685362458229065, "fcm_dpo/margin": 461.8829345703125, "fcm_dpo/q_t": 0.3876580595970154, "grad_norm": 40.950435638427734, "learning_rate": 1.1621671468032493e-07, "logits/chosen": -0.9299178123474121, "logits/rejected": -0.945504903793335, "logps/chosen": -695.784423828125, "logps/ref_chosen": -55.25966262817383, "logps/ref_rejected": -92.13936614990234, "logps/rejected": -1194.547119140625, "loss": 1.0587, "margin_dpo/margin_mean": 461.8829345703125, "margin_dpo/margin_std": 665.51806640625, "step": 486 }, { "epoch": 0.7151248164464024, "fcm_dpo/beta": 0.001111747114919126, "fcm_dpo/delta": 0.03676484897732735, "fcm_dpo/margin": 327.4232177734375, "fcm_dpo/q_t": 0.41555002331733704, "grad_norm": 40.30672073364258, "learning_rate": 1.1513436604424378e-07, "logits/chosen": -0.9425885677337646, "logits/rejected": -0.9484249353408813, "logps/chosen": -738.612548828125, "logps/ref_chosen": -53.06330871582031, "logps/ref_rejected": -92.41883087158203, "logps/rejected": -1105.3912353515625, "loss": 1.1361, "margin_dpo/margin_mean": 327.4231872558594, "margin_dpo/margin_std": 529.4686889648438, "step": 487 }, { "epoch": 0.7165932452276065, "fcm_dpo/beta": 0.0011130350176244974, "fcm_dpo/delta": 0.004432627931237221, "fcm_dpo/margin": 355.3664855957031, "fcm_dpo/q_t": 0.40918317437171936, "grad_norm": 37.70393753051758, "learning_rate": 1.1405557124304335e-07, "logits/chosen": -0.8855708241462708, "logits/rejected": -0.8907876014709473, "logps/chosen": -617.8189697265625, "logps/ref_chosen": -52.22815704345703, "logps/ref_rejected": -84.00656127929688, "logps/rejected": -1004.9638671875, "loss": 1.0909, "margin_dpo/margin_mean": 355.366455078125, "margin_dpo/margin_std": 460.8048095703125, "step": 488 }, { "epoch": 0.7180616740088106, "fcm_dpo/beta": 0.0011166043113917112, "fcm_dpo/delta": 0.015580521896481514, "fcm_dpo/margin": 344.5076599121094, "fcm_dpo/q_t": 0.4143439531326294, "grad_norm": 32.79881286621094, "learning_rate": 1.1298035870396985e-07, "logits/chosen": -0.9116396307945251, "logits/rejected": -0.9118118286132812, "logps/chosen": -587.8505859375, "logps/ref_chosen": -55.989627838134766, "logps/ref_rejected": -79.39812469482422, "logps/rejected": -955.7667236328125, "loss": 1.1132, "margin_dpo/margin_mean": 344.5076904296875, "margin_dpo/margin_std": 512.8632202148438, "step": 489 }, { "epoch": 0.7195301027900147, "fcm_dpo/beta": 0.0011167211923748255, "fcm_dpo/delta": 0.0001148320734500885, "fcm_dpo/margin": 357.92462158203125, "fcm_dpo/q_t": 0.4123692512512207, "grad_norm": 48.77171325683594, "learning_rate": 1.1190875675987355e-07, "logits/chosen": -0.9318056106567383, "logits/rejected": -0.9702289700508118, "logps/chosen": -655.4432373046875, "logps/ref_chosen": -52.36639404296875, "logps/ref_rejected": -110.4090576171875, "logps/rejected": -1071.4105224609375, "loss": 1.1441, "margin_dpo/margin_mean": 357.9245910644531, "margin_dpo/margin_std": 644.6651611328125, "step": 490 }, { "epoch": 0.7209985315712188, "fcm_dpo/beta": 0.0011353420559316874, "fcm_dpo/delta": 0.1237509548664093, "fcm_dpo/margin": 246.2215118408203, "fcm_dpo/q_t": 0.4365187883377075, "grad_norm": 30.707971572875977, "learning_rate": 1.1084079364846241e-07, "logits/chosen": -0.9380520582199097, "logits/rejected": -0.9351228475570679, "logps/chosen": -626.2109375, "logps/ref_chosen": -60.11626434326172, "logps/ref_rejected": -73.27278900146484, "logps/rejected": -885.5888671875, "loss": 1.1901, "margin_dpo/margin_mean": 246.22149658203125, "margin_dpo/margin_std": 459.95343017578125, "step": 491 }, { "epoch": 0.7224669603524229, "fcm_dpo/beta": 0.0011589345522224903, "fcm_dpo/delta": 0.11556318402290344, "fcm_dpo/margin": 248.4207763671875, "fcm_dpo/q_t": 0.43614447116851807, "grad_norm": 39.64064407348633, "learning_rate": 1.097764975115576e-07, "logits/chosen": -0.9761683940887451, "logits/rejected": -0.9563957452774048, "logps/chosen": -648.32568359375, "logps/ref_chosen": -53.994178771972656, "logps/ref_rejected": -72.65962219238281, "logps/rejected": -915.411865234375, "loss": 1.2158, "margin_dpo/margin_mean": 248.42079162597656, "margin_dpo/margin_std": 551.939453125, "step": 492 }, { "epoch": 0.723935389133627, "fcm_dpo/beta": 0.0011761472560465336, "fcm_dpo/delta": 0.08842451870441437, "fcm_dpo/margin": 267.0850524902344, "fcm_dpo/q_t": 0.426922470331192, "grad_norm": 36.99184799194336, "learning_rate": 1.0871589639435203e-07, "logits/chosen": -0.9865261316299438, "logits/rejected": -0.9545494318008423, "logps/chosen": -706.1417236328125, "logps/ref_chosen": -75.49723815917969, "logps/ref_rejected": -87.32301330566406, "logps/rejected": -985.0526123046875, "loss": 1.1778, "margin_dpo/margin_mean": 267.0850524902344, "margin_dpo/margin_std": 489.79541015625, "step": 493 }, { "epoch": 0.7254038179148311, "fcm_dpo/beta": 0.0011666524223983288, "fcm_dpo/delta": -0.11960312724113464, "fcm_dpo/margin": 440.0100402832031, "fcm_dpo/q_t": 0.38171452283859253, "grad_norm": 46.9616584777832, "learning_rate": 1.0765901824467166e-07, "logits/chosen": -0.8546612858772278, "logits/rejected": -0.8873025178909302, "logps/chosen": -538.9559326171875, "logps/ref_chosen": -41.35926818847656, "logps/ref_rejected": -86.09136962890625, "logps/rejected": -1023.697998046875, "loss": 1.0058, "margin_dpo/margin_mean": 440.01007080078125, "margin_dpo/margin_std": 467.8382263183594, "step": 494 }, { "epoch": 0.7268722466960352, "fcm_dpo/beta": 0.0011541168205440044, "fcm_dpo/delta": -0.04541929066181183, "fcm_dpo/margin": 384.21527099609375, "fcm_dpo/q_t": 0.40197789669036865, "grad_norm": 42.29792022705078, "learning_rate": 1.0660589091223854e-07, "logits/chosen": -0.9579362869262695, "logits/rejected": -0.9644915461540222, "logps/chosen": -626.9910888671875, "logps/ref_chosen": -63.53507995605469, "logps/ref_rejected": -91.42443084716797, "logps/rejected": -1039.095703125, "loss": 1.0847, "margin_dpo/margin_mean": 384.21527099609375, "margin_dpo/margin_std": 566.3270874023438, "step": 495 }, { "epoch": 0.7283406754772394, "fcm_dpo/beta": 0.0011813519522547722, "fcm_dpo/delta": 0.176091730594635, "fcm_dpo/margin": 192.8607177734375, "fcm_dpo/q_t": 0.4472728371620178, "grad_norm": 79.11801147460938, "learning_rate": 1.0555654214793722e-07, "logits/chosen": -0.9284073710441589, "logits/rejected": -0.8919467926025391, "logps/chosen": -776.5562744140625, "logps/ref_chosen": -72.5919189453125, "logps/ref_rejected": -84.32933807373047, "logps/rejected": -981.1544189453125, "loss": 1.2346, "margin_dpo/margin_mean": 192.86068725585938, "margin_dpo/margin_std": 430.8555908203125, "step": 496 }, { "epoch": 0.7298091042584435, "fcm_dpo/beta": 0.001212080824188888, "fcm_dpo/delta": 0.15942029654979706, "fcm_dpo/margin": 201.83465576171875, "fcm_dpo/q_t": 0.4441620111465454, "grad_norm": 43.26478576660156, "learning_rate": 1.0451099960308374e-07, "logits/chosen": -0.8874781727790833, "logits/rejected": -0.8688886165618896, "logps/chosen": -716.3388671875, "logps/ref_chosen": -58.59397506713867, "logps/ref_rejected": -76.28836822509766, "logps/rejected": -935.867919921875, "loss": 1.2305, "margin_dpo/margin_mean": 201.83465576171875, "margin_dpo/margin_std": 438.799560546875, "step": 497 }, { "epoch": 0.7312775330396476, "fcm_dpo/beta": 0.0012286882847547531, "fcm_dpo/delta": 0.0005217818543314934, "fcm_dpo/margin": 325.12445068359375, "fcm_dpo/q_t": 0.40874072909355164, "grad_norm": 41.114078521728516, "learning_rate": 1.0346929082869641e-07, "logits/chosen": -0.8940218687057495, "logits/rejected": -0.8773443698883057, "logps/chosen": -690.500732421875, "logps/ref_chosen": -71.20565795898438, "logps/ref_rejected": -83.95803833007812, "logps/rejected": -1028.3775634765625, "loss": 1.1315, "margin_dpo/margin_mean": 325.12445068359375, "margin_dpo/margin_std": 548.890380859375, "step": 498 }, { "epoch": 0.7327459618208517, "fcm_dpo/beta": 0.0012098584556952119, "fcm_dpo/delta": -0.1002291664481163, "fcm_dpo/margin": 409.18572998046875, "fcm_dpo/q_t": 0.38879793882369995, "grad_norm": 43.56269454956055, "learning_rate": 1.0243144327477013e-07, "logits/chosen": -0.9197027683258057, "logits/rejected": -0.9493994116783142, "logps/chosen": -586.4039306640625, "logps/ref_chosen": -51.25519561767578, "logps/ref_rejected": -101.07870483398438, "logps/rejected": -1045.4130859375, "loss": 1.0504, "margin_dpo/margin_mean": 409.18572998046875, "margin_dpo/margin_std": 551.0729370117188, "step": 499 }, { "epoch": 0.7342143906020558, "fcm_dpo/beta": 0.001203530584461987, "fcm_dpo/delta": -0.00410887785255909, "fcm_dpo/margin": 335.60675048828125, "fcm_dpo/q_t": 0.4080291986465454, "grad_norm": 34.33218765258789, "learning_rate": 1.0139748428955333e-07, "logits/chosen": -0.8823539018630981, "logits/rejected": -0.9056754112243652, "logps/chosen": -684.366455078125, "logps/ref_chosen": -57.027442932128906, "logps/ref_rejected": -93.93421173095703, "logps/rejected": -1056.8798828125, "loss": 1.116, "margin_dpo/margin_mean": 335.6067810058594, "margin_dpo/margin_std": 522.2293701171875, "step": 500 }, { "epoch": 0.73568281938326, "fcm_dpo/beta": 0.0011992482468485832, "fcm_dpo/delta": -0.04197482019662857, "fcm_dpo/margin": 367.0113525390625, "fcm_dpo/q_t": 0.4015328288078308, "grad_norm": 40.373878479003906, "learning_rate": 1.0036744111882672e-07, "logits/chosen": -0.8434513807296753, "logits/rejected": -0.8268097639083862, "logps/chosen": -608.291748046875, "logps/ref_chosen": -54.359527587890625, "logps/ref_rejected": -80.15670013427734, "logps/rejected": -1001.100341796875, "loss": 1.0956, "margin_dpo/margin_mean": 367.0113830566406, "margin_dpo/margin_std": 555.9649658203125, "step": 501 }, { "epoch": 0.737151248164464, "fcm_dpo/beta": 0.001190928858704865, "fcm_dpo/delta": -0.029523320496082306, "fcm_dpo/margin": 359.60321044921875, "fcm_dpo/q_t": 0.4018331468105316, "grad_norm": 32.51664733886719, "learning_rate": 9.934134090518592e-08, "logits/chosen": -0.7909951210021973, "logits/rejected": -0.7704396843910217, "logps/chosen": -573.842041015625, "logps/ref_chosen": -67.60050964355469, "logps/ref_rejected": -82.94876098632812, "logps/rejected": -948.7935180664062, "loss": 1.0626, "margin_dpo/margin_mean": 359.60321044921875, "margin_dpo/margin_std": 427.0804443359375, "step": 502 }, { "epoch": 0.7386196769456681, "fcm_dpo/beta": 0.001186006236821413, "fcm_dpo/delta": 0.008181419223546982, "fcm_dpo/margin": 330.5501708984375, "fcm_dpo/q_t": 0.4111096262931824, "grad_norm": 25.633560180664062, "learning_rate": 9.831921068732571e-08, "logits/chosen": -0.7634469270706177, "logits/rejected": -0.7438396215438843, "logps/chosen": -551.8092651367188, "logps/ref_chosen": -55.078407287597656, "logps/ref_rejected": -82.50544738769531, "logps/rejected": -909.7864990234375, "loss": 1.0933, "margin_dpo/margin_mean": 330.5502014160156, "margin_dpo/margin_std": 435.82733154296875, "step": 503 }, { "epoch": 0.7400881057268722, "fcm_dpo/beta": 0.0011800960637629032, "fcm_dpo/delta": -0.064161516726017, "fcm_dpo/margin": 390.8580322265625, "fcm_dpo/q_t": 0.39606422185897827, "grad_norm": 30.177688598632812, "learning_rate": 9.730107739932805e-08, "logits/chosen": -0.8217068314552307, "logits/rejected": -0.8377784490585327, "logps/chosen": -611.22216796875, "logps/ref_chosen": -59.96575164794922, "logps/ref_rejected": -103.76212310791016, "logps/rejected": -1045.8765869140625, "loss": 1.0653, "margin_dpo/margin_mean": 390.8580017089844, "margin_dpo/margin_std": 516.3780517578125, "step": 504 }, { "epoch": 0.7415565345080763, "fcm_dpo/beta": 0.0012019076384603977, "fcm_dpo/delta": 0.14581286907196045, "fcm_dpo/margin": 214.4113006591797, "fcm_dpo/q_t": 0.4408836364746094, "grad_norm": 37.63759231567383, "learning_rate": 9.628696786995188e-08, "logits/chosen": -0.852393627166748, "logits/rejected": -0.8234021663665771, "logps/chosen": -684.0704345703125, "logps/ref_chosen": -76.1549072265625, "logps/ref_rejected": -88.58537292480469, "logps/rejected": -910.912109375, "loss": 1.2104, "margin_dpo/margin_mean": 214.41128540039062, "margin_dpo/margin_std": 429.28167724609375, "step": 505 }, { "epoch": 0.7430249632892805, "fcm_dpo/beta": 0.001200593076646328, "fcm_dpo/delta": -0.024187199771404266, "fcm_dpo/margin": 352.2035217285156, "fcm_dpo/q_t": 0.40292614698410034, "grad_norm": 40.91268539428711, "learning_rate": 9.527690882192635e-08, "logits/chosen": -0.8200086355209351, "logits/rejected": -0.8278741836547852, "logps/chosen": -518.744873046875, "logps/ref_chosen": -48.96050262451172, "logps/ref_rejected": -78.41505432128906, "logps/rejected": -900.4028930664062, "loss": 1.0806, "margin_dpo/margin_mean": 352.2035217285156, "margin_dpo/margin_std": 464.32720947265625, "step": 506 }, { "epoch": 0.7444933920704846, "fcm_dpo/beta": 0.0012085672933608294, "fcm_dpo/delta": 0.0354694128036499, "fcm_dpo/margin": 302.70806884765625, "fcm_dpo/q_t": 0.42136380076408386, "grad_norm": 32.80398178100586, "learning_rate": 9.427092687124691e-08, "logits/chosen": -0.8519065380096436, "logits/rejected": -0.8482725620269775, "logps/chosen": -630.7051391601562, "logps/ref_chosen": -66.80149841308594, "logps/ref_rejected": -95.37289428710938, "logps/rejected": -961.984619140625, "loss": 1.1537, "margin_dpo/margin_mean": 302.70806884765625, "margin_dpo/margin_std": 559.1907958984375, "step": 507 }, { "epoch": 0.7459618208516887, "fcm_dpo/beta": 0.001229484099894762, "fcm_dpo/delta": 0.0795753076672554, "fcm_dpo/margin": 262.43280029296875, "fcm_dpo/q_t": 0.42874249815940857, "grad_norm": 36.952003479003906, "learning_rate": 9.326904852647344e-08, "logits/chosen": -0.8085677623748779, "logits/rejected": -0.8026360273361206, "logps/chosen": -644.8947143554688, "logps/ref_chosen": -71.303466796875, "logps/ref_rejected": -95.6275405883789, "logps/rejected": -931.651611328125, "loss": 1.192, "margin_dpo/margin_mean": 262.4328308105469, "margin_dpo/margin_std": 540.2072143554688, "step": 508 }, { "epoch": 0.7474302496328928, "fcm_dpo/beta": 0.0012463298626244068, "fcm_dpo/delta": 0.04969964176416397, "fcm_dpo/margin": 281.92559814453125, "fcm_dpo/q_t": 0.4197409749031067, "grad_norm": 35.611900329589844, "learning_rate": 9.227130018803195e-08, "logits/chosen": -0.7330523729324341, "logits/rejected": -0.7244545221328735, "logps/chosen": -496.95751953125, "logps/ref_chosen": -63.81895065307617, "logps/ref_rejected": -83.25643920898438, "logps/rejected": -798.3206176757812, "loss": 1.132, "margin_dpo/margin_mean": 281.92559814453125, "margin_dpo/margin_std": 420.67022705078125, "step": 509 }, { "epoch": 0.748898678414097, "fcm_dpo/beta": 0.001235937001183629, "fcm_dpo/delta": -0.050036292523145676, "fcm_dpo/margin": 362.3361511230469, "fcm_dpo/q_t": 0.39491382241249084, "grad_norm": 36.38752746582031, "learning_rate": 9.127770814751932e-08, "logits/chosen": -0.7187889218330383, "logits/rejected": -0.7337198257446289, "logps/chosen": -575.311279296875, "logps/ref_chosen": -51.878448486328125, "logps/ref_rejected": -102.7651596069336, "logps/rejected": -988.5341186523438, "loss": 1.0406, "margin_dpo/margin_mean": 362.33612060546875, "margin_dpo/margin_std": 382.94219970703125, "step": 510 }, { "epoch": 0.750367107195301, "fcm_dpo/beta": 0.0012384748551994562, "fcm_dpo/delta": 0.0352516807615757, "fcm_dpo/margin": 295.53057861328125, "fcm_dpo/q_t": 0.4174777865409851, "grad_norm": 43.398075103759766, "learning_rate": 9.028829858700973e-08, "logits/chosen": -0.7895054817199707, "logits/rejected": -0.7914731502532959, "logps/chosen": -560.07421875, "logps/ref_chosen": -60.23811721801758, "logps/ref_rejected": -92.85676574707031, "logps/rejected": -888.2234497070312, "loss": 1.153, "margin_dpo/margin_mean": 295.53057861328125, "margin_dpo/margin_std": 535.6825561523438, "step": 511 }, { "epoch": 0.7518355359765051, "fcm_dpo/beta": 0.0012218713527545333, "fcm_dpo/delta": -0.08188765496015549, "fcm_dpo/margin": 390.9322509765625, "fcm_dpo/q_t": 0.3888099193572998, "grad_norm": 54.90243911743164, "learning_rate": 8.930309757836516e-08, "logits/chosen": -0.7493938207626343, "logits/rejected": -0.7599710822105408, "logps/chosen": -444.6104736328125, "logps/ref_chosen": -54.905494689941406, "logps/ref_rejected": -81.87586975097656, "logps/rejected": -862.5130615234375, "loss": 1.0207, "margin_dpo/margin_mean": 390.9322509765625, "margin_dpo/margin_std": 403.09393310546875, "step": 512 }, { "epoch": 0.7533039647577092, "fcm_dpo/beta": 0.0012233736924827099, "fcm_dpo/delta": 0.05911244451999664, "fcm_dpo/margin": 280.02032470703125, "fcm_dpo/q_t": 0.4215858578681946, "grad_norm": 46.705345153808594, "learning_rate": 8.832213108254863e-08, "logits/chosen": -0.8222278356552124, "logits/rejected": -0.8028245568275452, "logps/chosen": -562.989013671875, "logps/ref_chosen": -64.91644287109375, "logps/ref_rejected": -76.06245422363281, "logps/rejected": -854.1553955078125, "loss": 1.1409, "margin_dpo/margin_mean": 280.02032470703125, "margin_dpo/margin_std": 426.9283447265625, "step": 513 }, { "epoch": 0.7547723935389133, "fcm_dpo/beta": 0.0012440317077562213, "fcm_dpo/delta": 0.044145580381155014, "fcm_dpo/margin": 287.24462890625, "fcm_dpo/q_t": 0.4205576777458191, "grad_norm": 33.267601013183594, "learning_rate": 8.734542494893954e-08, "logits/chosen": -0.76353919506073, "logits/rejected": -0.7479803562164307, "logps/chosen": -612.5877685546875, "logps/ref_chosen": -74.22957611083984, "logps/ref_rejected": -78.945556640625, "logps/rejected": -904.54833984375, "loss": 1.1419, "margin_dpo/margin_mean": 287.24462890625, "margin_dpo/margin_std": 480.07623291015625, "step": 514 }, { "epoch": 0.7562408223201175, "fcm_dpo/beta": 0.0012643520021811128, "fcm_dpo/delta": 0.10326212644577026, "fcm_dpo/margin": 237.156005859375, "fcm_dpo/q_t": 0.42959409952163696, "grad_norm": 43.65932083129883, "learning_rate": 8.637300491465272e-08, "logits/chosen": -0.7629779577255249, "logits/rejected": -0.771415114402771, "logps/chosen": -499.5674743652344, "logps/ref_chosen": -50.40156555175781, "logps/ref_rejected": -87.09774780273438, "logps/rejected": -773.419677734375, "loss": 1.1742, "margin_dpo/margin_mean": 237.156005859375, "margin_dpo/margin_std": 419.44488525390625, "step": 515 }, { "epoch": 0.7577092511013216, "fcm_dpo/beta": 0.0012712322641164064, "fcm_dpo/delta": -0.02079898491501808, "fcm_dpo/margin": 330.2271728515625, "fcm_dpo/q_t": 0.4013640880584717, "grad_norm": 44.21212387084961, "learning_rate": 8.540489660386064e-08, "logits/chosen": -0.8088574409484863, "logits/rejected": -0.8313539028167725, "logps/chosen": -550.5634765625, "logps/ref_chosen": -64.64956665039062, "logps/ref_rejected": -111.72237396240234, "logps/rejected": -927.8634033203125, "loss": 1.0712, "margin_dpo/margin_mean": 330.2271728515625, "margin_dpo/margin_std": 396.8078918457031, "step": 516 }, { "epoch": 0.7591776798825257, "fcm_dpo/beta": 0.0012457960983738303, "fcm_dpo/delta": -0.09264262765645981, "fcm_dpo/margin": 391.58154296875, "fcm_dpo/q_t": 0.3908216953277588, "grad_norm": 32.42768478393555, "learning_rate": 8.444112552711752e-08, "logits/chosen": -0.7810485363006592, "logits/rejected": -0.77211594581604, "logps/chosen": -568.1607666015625, "logps/ref_chosen": -60.913551330566406, "logps/ref_rejected": -89.08308410644531, "logps/rejected": -987.911865234375, "loss": 1.041, "margin_dpo/margin_mean": 391.5815734863281, "margin_dpo/margin_std": 495.0039367675781, "step": 517 }, { "epoch": 0.7606461086637298, "fcm_dpo/beta": 0.0012362590059638023, "fcm_dpo/delta": -0.010039325803518295, "fcm_dpo/margin": 331.1033935546875, "fcm_dpo/q_t": 0.40492960810661316, "grad_norm": 51.907432556152344, "learning_rate": 8.348171708068747e-08, "logits/chosen": -0.8003250360488892, "logits/rejected": -0.8121221661567688, "logps/chosen": -537.2240600585938, "logps/ref_chosen": -57.45589065551758, "logps/ref_rejected": -85.31269836425781, "logps/rejected": -896.184326171875, "loss": 1.082, "margin_dpo/margin_mean": 331.1033935546875, "margin_dpo/margin_std": 417.5572509765625, "step": 518 }, { "epoch": 0.762114537444934, "fcm_dpo/beta": 0.0012659772764891386, "fcm_dpo/delta": 0.11485999822616577, "fcm_dpo/margin": 227.35385131835938, "fcm_dpo/q_t": 0.43359264731407166, "grad_norm": 34.693687438964844, "learning_rate": 8.25266965458755e-08, "logits/chosen": -0.8020866513252258, "logits/rejected": -0.7832698822021484, "logps/chosen": -571.0335693359375, "logps/ref_chosen": -74.06331634521484, "logps/ref_rejected": -104.44416809082031, "logps/rejected": -828.768310546875, "loss": 1.1924, "margin_dpo/margin_mean": 227.35385131835938, "margin_dpo/margin_std": 434.76171875, "step": 519 }, { "epoch": 0.7635829662261381, "fcm_dpo/beta": 0.0012666715774685144, "fcm_dpo/delta": 0.012290934100747108, "fcm_dpo/margin": 306.2880859375, "fcm_dpo/q_t": 0.41312289237976074, "grad_norm": 41.684967041015625, "learning_rate": 8.15760890883607e-08, "logits/chosen": -0.7368162870407104, "logits/rejected": -0.7360740900039673, "logps/chosen": -598.6595458984375, "logps/ref_chosen": -70.2998275756836, "logps/ref_rejected": -99.98133850097656, "logps/rejected": -934.629150390625, "loss": 1.115, "margin_dpo/margin_mean": 306.2880859375, "margin_dpo/margin_std": 450.21893310546875, "step": 520 }, { "epoch": 0.7650513950073421, "fcm_dpo/beta": 0.001273997942917049, "fcm_dpo/delta": -0.03606198728084564, "fcm_dpo/margin": 340.52056884765625, "fcm_dpo/q_t": 0.4016761779785156, "grad_norm": 35.294342041015625, "learning_rate": 8.062991975753378e-08, "logits/chosen": -0.8503248691558838, "logits/rejected": -0.8509985208511353, "logps/chosen": -542.428955078125, "logps/ref_chosen": -58.14292526245117, "logps/ref_rejected": -83.28060913085938, "logps/rejected": -908.0872192382812, "loss": 1.0758, "margin_dpo/margin_mean": 340.5205383300781, "margin_dpo/margin_std": 442.4564208984375, "step": 521 }, { "epoch": 0.7665198237885462, "fcm_dpo/beta": 0.0012649366399273276, "fcm_dpo/delta": 0.010182084515690804, "fcm_dpo/margin": 308.485595703125, "fcm_dpo/q_t": 0.41139504313468933, "grad_norm": 30.87180519104004, "learning_rate": 7.968821348583643e-08, "logits/chosen": -0.854311466217041, "logits/rejected": -0.8545736074447632, "logps/chosen": -605.51220703125, "logps/ref_chosen": -46.54766845703125, "logps/ref_rejected": -66.01388549804688, "logps/rejected": -933.4640502929688, "loss": 1.1183, "margin_dpo/margin_mean": 308.485595703125, "margin_dpo/margin_std": 476.64263916015625, "step": 522 }, { "epoch": 0.7679882525697503, "fcm_dpo/beta": 0.001264127902686596, "fcm_dpo/delta": -0.007558091077953577, "fcm_dpo/margin": 322.1591491699219, "fcm_dpo/q_t": 0.41010820865631104, "grad_norm": 41.77898406982422, "learning_rate": 7.875099508810484e-08, "logits/chosen": -0.9136984348297119, "logits/rejected": -0.9157658815383911, "logps/chosen": -658.3345947265625, "logps/ref_chosen": -61.76960372924805, "logps/ref_rejected": -83.76141357421875, "logps/rejected": -1002.485595703125, "loss": 1.1291, "margin_dpo/margin_mean": 322.1591796875, "margin_dpo/margin_std": 548.837158203125, "step": 523 }, { "epoch": 0.7694566813509545, "fcm_dpo/beta": 0.001250438392162323, "fcm_dpo/delta": -0.03454245999455452, "fcm_dpo/margin": 345.728271484375, "fcm_dpo/q_t": 0.39945122599601746, "grad_norm": 37.57605743408203, "learning_rate": 7.781828926091535e-08, "logits/chosen": -0.9349164366722107, "logits/rejected": -0.9216375350952148, "logps/chosen": -658.25390625, "logps/ref_chosen": -78.0720443725586, "logps/ref_rejected": -81.30198669433594, "logps/rejected": -1007.212158203125, "loss": 1.0951, "margin_dpo/margin_mean": 345.728271484375, "margin_dpo/margin_std": 496.82781982421875, "step": 524 }, { "epoch": 0.7709251101321586, "fcm_dpo/beta": 0.0012223366647958755, "fcm_dpo/delta": -0.14574724435806274, "fcm_dpo/margin": 439.2078857421875, "fcm_dpo/q_t": 0.38136690855026245, "grad_norm": 29.998579025268555, "learning_rate": 7.689012058193384e-08, "logits/chosen": -0.8645678162574768, "logits/rejected": -0.9013247489929199, "logps/chosen": -630.4777221679688, "logps/ref_chosen": -50.827857971191406, "logps/ref_rejected": -100.05294036865234, "logps/rejected": -1118.91064453125, "loss": 1.0154, "margin_dpo/margin_mean": 439.2078857421875, "margin_dpo/margin_std": 538.682373046875, "step": 525 }, { "epoch": 0.7723935389133627, "fcm_dpo/beta": 0.0012013528030365705, "fcm_dpo/delta": -0.12226028740406036, "fcm_dpo/margin": 429.6107177734375, "fcm_dpo/q_t": 0.38352981209754944, "grad_norm": 31.9099178314209, "learning_rate": 7.596651350926836e-08, "logits/chosen": -0.8861783146858215, "logits/rejected": -0.881539523601532, "logps/chosen": -684.8582153320312, "logps/ref_chosen": -63.167236328125, "logps/ref_rejected": -86.30934143066406, "logps/rejected": -1137.611083984375, "loss": 1.048, "margin_dpo/margin_mean": 429.6107177734375, "margin_dpo/margin_std": 579.431396484375, "step": 526 }, { "epoch": 0.7738619676945668, "fcm_dpo/beta": 0.0011940683471038938, "fcm_dpo/delta": 0.04404643923044205, "fcm_dpo/margin": 299.3970947265625, "fcm_dpo/q_t": 0.41738709807395935, "grad_norm": 34.606040954589844, "learning_rate": 7.504749238082414e-08, "logits/chosen": -1.0818817615509033, "logits/rejected": -1.0509649515151978, "logps/chosen": -709.3896484375, "logps/ref_chosen": -71.12867736816406, "logps/ref_rejected": -78.3425521850586, "logps/rejected": -1016.0006713867188, "loss": 1.1256, "margin_dpo/margin_mean": 299.3970947265625, "margin_dpo/margin_std": 437.45355224609375, "step": 527 }, { "epoch": 0.775330396475771, "fcm_dpo/beta": 0.001194945303723216, "fcm_dpo/delta": -0.01856505125761032, "fcm_dpo/margin": 349.5766906738281, "fcm_dpo/q_t": 0.40821221470832825, "grad_norm": 51.902496337890625, "learning_rate": 7.413308141366254e-08, "logits/chosen": -0.9746694564819336, "logits/rejected": -0.9602982997894287, "logps/chosen": -706.31396484375, "logps/ref_chosen": -68.0894546508789, "logps/ref_rejected": -93.91006469726562, "logps/rejected": -1081.711181640625, "loss": 1.1192, "margin_dpo/margin_mean": 349.5766906738281, "margin_dpo/margin_std": 573.882080078125, "step": 528 }, { "epoch": 0.7767988252569751, "fcm_dpo/beta": 0.0012100792955607176, "fcm_dpo/delta": 0.10508871078491211, "fcm_dpo/margin": 246.39369201660156, "fcm_dpo/q_t": 0.4327790141105652, "grad_norm": 48.65058898925781, "learning_rate": 7.322330470336313e-08, "logits/chosen": -0.996649980545044, "logits/rejected": -1.008021593093872, "logps/chosen": -809.325927734375, "logps/ref_chosen": -55.57495880126953, "logps/ref_rejected": -89.20909118652344, "logps/rejected": -1089.353759765625, "loss": 1.2287, "margin_dpo/margin_mean": 246.39369201660156, "margin_dpo/margin_std": 580.08642578125, "step": 529 }, { "epoch": 0.7782672540381792, "fcm_dpo/beta": 0.0012070810189470649, "fcm_dpo/delta": -0.08302216976881027, "fcm_dpo/margin": 396.90985107421875, "fcm_dpo/q_t": 0.3978345990180969, "grad_norm": 57.351463317871094, "learning_rate": 7.231818622338822e-08, "logits/chosen": -0.880218505859375, "logits/rejected": -0.8767000436782837, "logps/chosen": -690.0313720703125, "logps/ref_chosen": -47.601417541503906, "logps/ref_rejected": -87.2845230102539, "logps/rejected": -1126.624267578125, "loss": 1.1142, "margin_dpo/margin_mean": 396.90985107421875, "margin_dpo/margin_std": 693.56298828125, "step": 530 }, { "epoch": 0.7797356828193832, "fcm_dpo/beta": 0.0011991492938250303, "fcm_dpo/delta": -0.024136528372764587, "fcm_dpo/margin": 352.80865478515625, "fcm_dpo/q_t": 0.40578585863113403, "grad_norm": 44.2398681640625, "learning_rate": 7.141774982445147e-08, "logits/chosen": -1.0027759075164795, "logits/rejected": -0.9875552654266357, "logps/chosen": -751.2490234375, "logps/ref_chosen": -55.246063232421875, "logps/ref_rejected": -70.60598754882812, "logps/rejected": -1119.4176025390625, "loss": 1.1017, "margin_dpo/margin_mean": 352.80865478515625, "margin_dpo/margin_std": 537.1859741210938, "step": 531 }, { "epoch": 0.7812041116005873, "fcm_dpo/beta": 0.0011825578985735774, "fcm_dpo/delta": -0.022645261138677597, "fcm_dpo/margin": 355.6239318847656, "fcm_dpo/q_t": 0.40732958912849426, "grad_norm": 74.50859832763672, "learning_rate": 7.052201923388953e-08, "logits/chosen": -0.9509673714637756, "logits/rejected": -0.9269773960113525, "logps/chosen": -783.3470458984375, "logps/ref_chosen": -70.28601837158203, "logps/ref_rejected": -86.5913314819336, "logps/rejected": -1155.2763671875, "loss": 1.1382, "margin_dpo/margin_mean": 355.62396240234375, "margin_dpo/margin_std": 624.2130737304688, "step": 532 }, { "epoch": 0.7826725403817915, "fcm_dpo/beta": 0.001213046140037477, "fcm_dpo/delta": 0.09190007299184799, "fcm_dpo/margin": 255.36468505859375, "fcm_dpo/q_t": 0.4308984875679016, "grad_norm": 47.32761764526367, "learning_rate": 6.963101805503646e-08, "logits/chosen": -0.9791627526283264, "logits/rejected": -0.9598466753959656, "logps/chosen": -701.266357421875, "logps/ref_chosen": -64.8551025390625, "logps/ref_rejected": -76.58805847167969, "logps/rejected": -968.364013671875, "loss": 1.2054, "margin_dpo/margin_mean": 255.3646697998047, "margin_dpo/margin_std": 552.512451171875, "step": 533 }, { "epoch": 0.7841409691629956, "fcm_dpo/beta": 0.001198928919620812, "fcm_dpo/delta": -0.02406427264213562, "fcm_dpo/margin": 351.9185485839844, "fcm_dpo/q_t": 0.40505561232566833, "grad_norm": 48.15425491333008, "learning_rate": 6.874476976660184e-08, "logits/chosen": -0.9442458152770996, "logits/rejected": -0.9411351680755615, "logps/chosen": -707.2823486328125, "logps/ref_chosen": -60.119388580322266, "logps/ref_rejected": -78.54347229003906, "logps/rejected": -1077.625, "loss": 1.0963, "margin_dpo/margin_mean": 351.9185791015625, "margin_dpo/margin_std": 504.8380126953125, "step": 534 }, { "epoch": 0.7856093979441997, "fcm_dpo/beta": 0.0011966589372605085, "fcm_dpo/delta": -0.0962173268198967, "fcm_dpo/margin": 410.5157165527344, "fcm_dpo/q_t": 0.390081524848938, "grad_norm": 31.493331909179688, "learning_rate": 6.786329772205246e-08, "logits/chosen": -0.8576223850250244, "logits/rejected": -0.8609852194786072, "logps/chosen": -605.9679565429688, "logps/ref_chosen": -54.330238342285156, "logps/ref_rejected": -96.30763244628906, "logps/rejected": -1058.4610595703125, "loss": 1.0442, "margin_dpo/margin_mean": 410.51568603515625, "margin_dpo/margin_std": 513.386962890625, "step": 535 }, { "epoch": 0.7870778267254038, "fcm_dpo/beta": 0.001155639300122857, "fcm_dpo/delta": -0.15889059007167816, "fcm_dpo/margin": 476.0905456542969, "fcm_dpo/q_t": 0.38305604457855225, "grad_norm": 43.20456314086914, "learning_rate": 6.698662514899638e-08, "logits/chosen": -0.833030104637146, "logits/rejected": -0.8629981279373169, "logps/chosen": -540.8412475585938, "logps/ref_chosen": -47.08053207397461, "logps/ref_rejected": -89.09783935546875, "logps/rejected": -1058.9490966796875, "loss": 1.0253, "margin_dpo/margin_mean": 476.09051513671875, "margin_dpo/margin_std": 665.188232421875, "step": 536 }, { "epoch": 0.788546255506608, "fcm_dpo/beta": 0.0011549813207238913, "fcm_dpo/delta": 0.018105141818523407, "fcm_dpo/margin": 330.7102355957031, "fcm_dpo/q_t": 0.4120485186576843, "grad_norm": 46.58029556274414, "learning_rate": 6.611477514857114e-08, "logits/chosen": -0.8844733238220215, "logits/rejected": -0.8643313646316528, "logps/chosen": -571.9066162109375, "logps/ref_chosen": -57.747467041015625, "logps/ref_rejected": -70.43838500976562, "logps/rejected": -915.3078002929688, "loss": 1.1346, "margin_dpo/margin_mean": 330.7102355957031, "margin_dpo/margin_std": 539.7521362304688, "step": 537 }, { "epoch": 0.7900146842878121, "fcm_dpo/beta": 0.001142657594755292, "fcm_dpo/delta": -0.025782715529203415, "fcm_dpo/margin": 371.4954833984375, "fcm_dpo/q_t": 0.40331846475601196, "grad_norm": 30.739717483520508, "learning_rate": 6.524777069483525e-08, "logits/chosen": -0.902673065662384, "logits/rejected": -0.8884932994842529, "logps/chosen": -698.6116943359375, "logps/ref_chosen": -66.41594696044922, "logps/ref_rejected": -84.22808837890625, "logps/rejected": -1087.919189453125, "loss": 1.0796, "margin_dpo/margin_mean": 371.4954833984375, "margin_dpo/margin_std": 494.69195556640625, "step": 538 }, { "epoch": 0.7914831130690162, "fcm_dpo/beta": 0.0011477393563836813, "fcm_dpo/delta": 0.007408445701003075, "fcm_dpo/margin": 342.22515869140625, "fcm_dpo/q_t": 0.40988558530807495, "grad_norm": 47.36545944213867, "learning_rate": 6.438563463416221e-08, "logits/chosen": -0.9254465103149414, "logits/rejected": -0.9171432256698608, "logps/chosen": -587.165771484375, "logps/ref_chosen": -58.492855072021484, "logps/ref_rejected": -91.85395050048828, "logps/rejected": -962.7520141601562, "loss": 1.0922, "margin_dpo/margin_mean": 342.22515869140625, "margin_dpo/margin_std": 441.59429931640625, "step": 539 }, { "epoch": 0.7929515418502202, "fcm_dpo/beta": 0.0011303776409476995, "fcm_dpo/delta": -0.10793224722146988, "fcm_dpo/margin": 444.6667785644531, "fcm_dpo/q_t": 0.3898891806602478, "grad_norm": 39.17680740356445, "learning_rate": 6.352838968463919e-08, "logits/chosen": -0.8121837377548218, "logits/rejected": -0.8325707912445068, "logps/chosen": -587.6639404296875, "logps/ref_chosen": -63.482513427734375, "logps/ref_rejected": -116.42999267578125, "logps/rejected": -1085.2781982421875, "loss": 1.0491, "margin_dpo/margin_mean": 444.666748046875, "margin_dpo/margin_std": 597.3258056640625, "step": 540 }, { "epoch": 0.7944199706314243, "fcm_dpo/beta": 0.0011378416093066335, "fcm_dpo/delta": 0.13771183788776398, "fcm_dpo/margin": 233.8116912841797, "fcm_dpo/q_t": 0.4404492974281311, "grad_norm": 58.325618743896484, "learning_rate": 6.267605843546767e-08, "logits/chosen": -0.9590853452682495, "logits/rejected": -0.949677586555481, "logps/chosen": -730.1240234375, "logps/ref_chosen": -78.28036499023438, "logps/ref_rejected": -103.273681640625, "logps/rejected": -988.9290771484375, "loss": 1.2296, "margin_dpo/margin_mean": 233.8116912841797, "margin_dpo/margin_std": 537.7781372070312, "step": 541 }, { "epoch": 0.7958883994126285, "fcm_dpo/beta": 0.0011231580283492804, "fcm_dpo/delta": -0.1151493713259697, "fcm_dpo/margin": 451.2611083984375, "fcm_dpo/q_t": 0.3872736692428589, "grad_norm": 51.69799041748047, "learning_rate": 6.182866334636888e-08, "logits/chosen": -0.9066444635391235, "logits/rejected": -0.9385085105895996, "logps/chosen": -588.926025390625, "logps/ref_chosen": -57.48497009277344, "logps/ref_rejected": -96.47506713867188, "logps/rejected": -1079.17724609375, "loss": 1.0454, "margin_dpo/margin_mean": 451.2610778808594, "margin_dpo/margin_std": 590.90966796875, "step": 542 }, { "epoch": 0.7973568281938326, "fcm_dpo/beta": 0.0011308449320495129, "fcm_dpo/delta": 0.04076213017106056, "fcm_dpo/margin": 318.96612548828125, "fcm_dpo/q_t": 0.42912137508392334, "grad_norm": 38.154632568359375, "learning_rate": 6.098622674699147e-08, "logits/chosen": -0.8999680280685425, "logits/rejected": -0.9285463690757751, "logps/chosen": -678.298095703125, "logps/ref_chosen": -60.61750793457031, "logps/ref_rejected": -105.59896850585938, "logps/rejected": -1042.2457275390625, "loss": 1.189, "margin_dpo/margin_mean": 318.96612548828125, "margin_dpo/margin_std": 700.2628173828125, "step": 543 }, { "epoch": 0.7988252569750367, "fcm_dpo/beta": 0.0011320568155497313, "fcm_dpo/delta": -0.019344709813594818, "fcm_dpo/margin": 369.7061767578125, "fcm_dpo/q_t": 0.40487393736839294, "grad_norm": 33.63976287841797, "learning_rate": 6.01487708363232e-08, "logits/chosen": -0.8758711814880371, "logits/rejected": -0.8889458775520325, "logps/chosen": -673.983154296875, "logps/ref_chosen": -59.642303466796875, "logps/ref_rejected": -100.95469665527344, "logps/rejected": -1085.001708984375, "loss": 1.0941, "margin_dpo/margin_mean": 369.7061767578125, "margin_dpo/margin_std": 533.091064453125, "step": 544 }, { "epoch": 0.8002936857562408, "fcm_dpo/beta": 0.0011182475136592984, "fcm_dpo/delta": -0.08015477657318115, "fcm_dpo/margin": 425.98883056640625, "fcm_dpo/q_t": 0.39286336302757263, "grad_norm": 36.59641647338867, "learning_rate": 5.9316317682106294e-08, "logits/chosen": -0.8046392202377319, "logits/rejected": -0.8314776420593262, "logps/chosen": -634.07080078125, "logps/ref_chosen": -67.64859771728516, "logps/ref_rejected": -95.90800476074219, "logps/rejected": -1088.319091796875, "loss": 1.0483, "margin_dpo/margin_mean": 425.98883056640625, "margin_dpo/margin_std": 539.1649169921875, "step": 545 }, { "epoch": 0.801762114537445, "fcm_dpo/beta": 0.0011238758452236652, "fcm_dpo/delta": 0.06430754065513611, "fcm_dpo/margin": 300.63092041015625, "fcm_dpo/q_t": 0.42007124423980713, "grad_norm": 35.92195510864258, "learning_rate": 5.848888922025552e-08, "logits/chosen": -0.8405758738517761, "logits/rejected": -0.8265654444694519, "logps/chosen": -585.0632934570312, "logps/ref_chosen": -50.744232177734375, "logps/ref_rejected": -81.86622619628906, "logps/rejected": -916.816162109375, "loss": 1.1366, "margin_dpo/margin_mean": 300.63092041015625, "margin_dpo/margin_std": 442.31207275390625, "step": 546 }, { "epoch": 0.8032305433186491, "fcm_dpo/beta": 0.0011259375605732203, "fcm_dpo/delta": -0.025206491351127625, "fcm_dpo/margin": 376.65985107421875, "fcm_dpo/q_t": 0.40438586473464966, "grad_norm": 50.428184509277344, "learning_rate": 5.7666507254280265e-08, "logits/chosen": -0.8404146432876587, "logits/rejected": -0.8504692316055298, "logps/chosen": -630.6488037109375, "logps/ref_chosen": -73.6877212524414, "logps/ref_rejected": -90.76136779785156, "logps/rejected": -1024.38232421875, "loss": 1.0885, "margin_dpo/margin_mean": 376.65985107421875, "margin_dpo/margin_std": 529.802490234375, "step": 547 }, { "epoch": 0.8046989720998532, "fcm_dpo/beta": 0.0011212106328457594, "fcm_dpo/delta": -0.001919570378959179, "fcm_dpo/margin": 358.396728515625, "fcm_dpo/q_t": 0.4131587743759155, "grad_norm": 29.529178619384766, "learning_rate": 5.684919345471029e-08, "logits/chosen": -0.9091461896896362, "logits/rejected": -0.9068449139595032, "logps/chosen": -639.157470703125, "logps/ref_chosen": -65.24634552001953, "logps/ref_rejected": -94.11807250976562, "logps/rejected": -1026.426025390625, "loss": 1.11, "margin_dpo/margin_mean": 358.3966979980469, "margin_dpo/margin_std": 571.4837646484375, "step": 548 }, { "epoch": 0.8061674008810573, "fcm_dpo/beta": 0.0011411058949306607, "fcm_dpo/delta": 0.08808208256959915, "fcm_dpo/margin": 275.1691589355469, "fcm_dpo/q_t": 0.42972275614738464, "grad_norm": 55.16639709472656, "learning_rate": 5.603696935852426e-08, "logits/chosen": -0.9102625846862793, "logits/rejected": -0.8981518745422363, "logps/chosen": -652.5174560546875, "logps/ref_chosen": -49.21235656738281, "logps/ref_rejected": -73.91031646728516, "logps/rejected": -952.3845825195312, "loss": 1.176, "margin_dpo/margin_mean": 275.169189453125, "margin_dpo/margin_std": 505.90155029296875, "step": 549 }, { "epoch": 0.8076358296622613, "fcm_dpo/beta": 0.0011486895382404327, "fcm_dpo/delta": 0.04329656437039375, "fcm_dpo/margin": 311.8872375488281, "fcm_dpo/q_t": 0.4181970953941345, "grad_norm": 39.404815673828125, "learning_rate": 5.5229856368582376e-08, "logits/chosen": -0.8432351350784302, "logits/rejected": -0.8644379377365112, "logps/chosen": -670.8403930664062, "logps/ref_chosen": -56.80695343017578, "logps/ref_rejected": -95.12580871582031, "logps/rejected": -1021.0465087890625, "loss": 1.1318, "margin_dpo/margin_mean": 311.88726806640625, "margin_dpo/margin_std": 489.3165283203125, "step": 550 }, { "epoch": 0.8091042584434655, "fcm_dpo/beta": 0.0011192983947694302, "fcm_dpo/delta": -0.1783682256937027, "fcm_dpo/margin": 507.3398132324219, "fcm_dpo/q_t": 0.3693164587020874, "grad_norm": 55.6103515625, "learning_rate": 5.4427875753062734e-08, "logits/chosen": -0.8312960863113403, "logits/rejected": -0.8840258717536926, "logps/chosen": -585.3047485351562, "logps/ref_chosen": -59.10633087158203, "logps/ref_rejected": -111.67280578613281, "logps/rejected": -1145.2109375, "loss": 0.9652, "margin_dpo/margin_mean": 507.3398132324219, "margin_dpo/margin_std": 490.194580078125, "step": 551 }, { "epoch": 0.8105726872246696, "fcm_dpo/beta": 0.0010634324280545115, "fcm_dpo/delta": -0.23344632983207703, "fcm_dpo/margin": 578.7212524414062, "fcm_dpo/q_t": 0.3663737177848816, "grad_norm": 50.61666488647461, "learning_rate": 5.363104864490034e-08, "logits/chosen": -0.8697335720062256, "logits/rejected": -0.9037412405014038, "logps/chosen": -573.7623291015625, "logps/ref_chosen": -62.35459899902344, "logps/ref_rejected": -104.56210327148438, "logps/rejected": -1194.691162109375, "loss": 0.9718, "margin_dpo/margin_mean": 578.7212524414062, "margin_dpo/margin_std": 671.6409912109375, "step": 552 }, { "epoch": 0.8120411160058737, "fcm_dpo/beta": 0.0010705923195928335, "fcm_dpo/delta": 0.08465807139873505, "fcm_dpo/margin": 297.1253967285156, "fcm_dpo/q_t": 0.4299642741680145, "grad_norm": 28.852155685424805, "learning_rate": 5.2839396041230415e-08, "logits/chosen": -0.8760408759117126, "logits/rejected": -0.8712909817695618, "logps/chosen": -675.16796875, "logps/ref_chosen": -68.25881958007812, "logps/ref_rejected": -98.0971450805664, "logps/rejected": -1002.1317138671875, "loss": 1.1695, "margin_dpo/margin_mean": 297.1253662109375, "margin_dpo/margin_std": 539.8681640625, "step": 553 }, { "epoch": 0.8135095447870778, "fcm_dpo/beta": 0.0010796760907396674, "fcm_dpo/delta": -0.04111691564321518, "fcm_dpo/margin": 406.2252197265625, "fcm_dpo/q_t": 0.40428856015205383, "grad_norm": 116.00951385498047, "learning_rate": 5.205293880283551e-08, "logits/chosen": -0.8844936490058899, "logits/rejected": -0.8602651953697205, "logps/chosen": -693.2430419921875, "logps/ref_chosen": -67.94767761230469, "logps/ref_rejected": -89.78272247314453, "logps/rejected": -1121.3033447265625, "loss": 1.1261, "margin_dpo/margin_mean": 406.2252197265625, "margin_dpo/margin_std": 695.0372314453125, "step": 554 }, { "epoch": 0.8149779735682819, "fcm_dpo/beta": 0.0010527544654905796, "fcm_dpo/delta": -0.11765166372060776, "fcm_dpo/margin": 486.11102294921875, "fcm_dpo/q_t": 0.3908356726169586, "grad_norm": 39.57646560668945, "learning_rate": 5.127169765359515e-08, "logits/chosen": -0.9440624713897705, "logits/rejected": -0.9946380853652954, "logps/chosen": -687.46533203125, "logps/ref_chosen": -53.33049011230469, "logps/ref_rejected": -108.47937774658203, "logps/rejected": -1228.7252197265625, "loss": 1.0654, "margin_dpo/margin_mean": 486.11102294921875, "margin_dpo/margin_std": 729.351318359375, "step": 555 }, { "epoch": 0.8164464023494861, "fcm_dpo/beta": 0.001059158006682992, "fcm_dpo/delta": 0.0916333943605423, "fcm_dpo/margin": 293.83837890625, "fcm_dpo/q_t": 0.4280283451080322, "grad_norm": 39.030269622802734, "learning_rate": 5.049569317994012e-08, "logits/chosen": -0.9093760848045349, "logits/rejected": -0.9025793671607971, "logps/chosen": -666.353759765625, "logps/ref_chosen": -58.64447021484375, "logps/ref_rejected": -101.34040832519531, "logps/rejected": -1002.8881225585938, "loss": 1.148, "margin_dpo/margin_mean": 293.83837890625, "margin_dpo/margin_std": 430.22235107421875, "step": 556 }, { "epoch": 0.8179148311306902, "fcm_dpo/beta": 0.001053366344422102, "fcm_dpo/delta": -0.0524490550160408, "fcm_dpo/margin": 427.0723876953125, "fcm_dpo/q_t": 0.401094913482666, "grad_norm": 53.48329162597656, "learning_rate": 4.9724945830310144e-08, "logits/chosen": -0.9332031011581421, "logits/rejected": -0.9630019664764404, "logps/chosen": -737.6878662109375, "logps/ref_chosen": -67.84066009521484, "logps/ref_rejected": -109.93965911865234, "logps/rejected": -1206.859375, "loss": 1.1001, "margin_dpo/margin_mean": 427.0723876953125, "margin_dpo/margin_std": 670.270751953125, "step": 557 }, { "epoch": 0.8193832599118943, "fcm_dpo/beta": 0.0010221919510513544, "fcm_dpo/delta": -0.2019827663898468, "fcm_dpo/margin": 577.2942504882812, "fcm_dpo/q_t": 0.36431533098220825, "grad_norm": 37.130680084228516, "learning_rate": 4.8959475914614554e-08, "logits/chosen": -1.0133311748504639, "logits/rejected": -1.0322959423065186, "logps/chosen": -656.3045043945312, "logps/ref_chosen": -62.36824035644531, "logps/ref_rejected": -102.16102600097656, "logps/rejected": -1273.3916015625, "loss": 0.9729, "margin_dpo/margin_mean": 577.2942504882812, "margin_dpo/margin_std": 620.1890869140625, "step": 558 }, { "epoch": 0.8208516886930984, "fcm_dpo/beta": 0.0009995660511776805, "fcm_dpo/delta": -0.08984459936618805, "fcm_dpo/margin": 485.7807312011719, "fcm_dpo/q_t": 0.39124971628189087, "grad_norm": 30.684978485107422, "learning_rate": 4.8199303603697614e-08, "logits/chosen": -1.0911483764648438, "logits/rejected": -1.0965559482574463, "logps/chosen": -750.491455078125, "logps/ref_chosen": -60.752323150634766, "logps/ref_rejected": -93.44229125976562, "logps/rejected": -1268.962158203125, "loss": 1.0455, "margin_dpo/margin_mean": 485.7807312011719, "margin_dpo/margin_std": 622.151123046875, "step": 559 }, { "epoch": 0.8223201174743024, "fcm_dpo/beta": 0.000995859270915389, "fcm_dpo/delta": 0.05174366384744644, "fcm_dpo/margin": 351.25054931640625, "fcm_dpo/q_t": 0.4200424551963806, "grad_norm": 32.02336502075195, "learning_rate": 4.7444448928806615e-08, "logits/chosen": -0.8772056102752686, "logits/rejected": -0.8566712141036987, "logps/chosen": -679.903076171875, "logps/ref_chosen": -58.10382080078125, "logps/ref_rejected": -79.99122619628906, "logps/rejected": -1053.041015625, "loss": 1.1385, "margin_dpo/margin_mean": 351.25054931640625, "margin_dpo/margin_std": 544.828857421875, "step": 560 }, { "epoch": 0.8237885462555066, "fcm_dpo/beta": 0.0010213316418230534, "fcm_dpo/delta": 0.09696964174509048, "fcm_dpo/margin": 299.154052734375, "fcm_dpo/q_t": 0.42913612723350525, "grad_norm": 46.0489501953125, "learning_rate": 4.669493178106432e-08, "logits/chosen": -1.0040576457977295, "logits/rejected": -1.0211834907531738, "logps/chosen": -775.0875244140625, "logps/ref_chosen": -50.912879943847656, "logps/ref_rejected": -99.06856536865234, "logps/rejected": -1122.3973388671875, "loss": 1.196, "margin_dpo/margin_mean": 299.154052734375, "margin_dpo/margin_std": 618.0548095703125, "step": 561 }, { "epoch": 0.8252569750367107, "fcm_dpo/beta": 0.0010161999380216002, "fcm_dpo/delta": -0.008913304656744003, "fcm_dpo/margin": 401.3731689453125, "fcm_dpo/q_t": 0.40855729579925537, "grad_norm": 35.42552947998047, "learning_rate": 4.5950771910944596e-08, "logits/chosen": -0.9741950035095215, "logits/rejected": -0.9822410345077515, "logps/chosen": -739.70166015625, "logps/ref_chosen": -59.46440124511719, "logps/ref_rejected": -96.54266357421875, "logps/rejected": -1178.153076171875, "loss": 1.1047, "margin_dpo/margin_mean": 401.3731689453125, "margin_dpo/margin_std": 601.1802368164062, "step": 562 }, { "epoch": 0.8267254038179148, "fcm_dpo/beta": 0.0010444659274071455, "fcm_dpo/delta": 0.08401615172624588, "fcm_dpo/margin": 302.5437927246094, "fcm_dpo/q_t": 0.42215970158576965, "grad_norm": 58.64229202270508, "learning_rate": 4.521198892775202e-08, "logits/chosen": -0.9864065647125244, "logits/rejected": -0.98963463306427, "logps/chosen": -830.58203125, "logps/ref_chosen": -60.60819625854492, "logps/ref_rejected": -94.56770324707031, "logps/rejected": -1167.0853271484375, "loss": 1.2354, "margin_dpo/margin_mean": 302.5437927246094, "margin_dpo/margin_std": 723.0938720703125, "step": 563 }, { "epoch": 0.8281938325991189, "fcm_dpo/beta": 0.0010381749598309398, "fcm_dpo/delta": -0.017018113285303116, "fcm_dpo/margin": 400.96112060546875, "fcm_dpo/q_t": 0.4057755768299103, "grad_norm": 49.656681060791016, "learning_rate": 4.447860229910544e-08, "logits/chosen": -1.0767529010772705, "logits/rejected": -1.0685616731643677, "logps/chosen": -764.374267578125, "logps/ref_chosen": -74.26837921142578, "logps/ref_rejected": -93.23818969726562, "logps/rejected": -1184.30517578125, "loss": 1.0832, "margin_dpo/margin_mean": 400.9611511230469, "margin_dpo/margin_std": 527.44775390625, "step": 564 }, { "epoch": 0.8296622613803231, "fcm_dpo/beta": 0.0010299738496541977, "fcm_dpo/delta": -0.030898885801434517, "fcm_dpo/margin": 417.05035400390625, "fcm_dpo/q_t": 0.40826284885406494, "grad_norm": 58.330814361572266, "learning_rate": 4.375063135042445e-08, "logits/chosen": -0.9842187166213989, "logits/rejected": -0.9827100038528442, "logps/chosen": -795.4959716796875, "logps/ref_chosen": -69.0199203491211, "logps/ref_rejected": -85.7789306640625, "logps/rejected": -1229.3052978515625, "loss": 1.1307, "margin_dpo/margin_mean": 417.05029296875, "margin_dpo/margin_std": 738.177734375, "step": 565 }, { "epoch": 0.8311306901615272, "fcm_dpo/beta": 0.0010237455135211349, "fcm_dpo/delta": -0.07710428535938263, "fcm_dpo/margin": 462.15057373046875, "fcm_dpo/q_t": 0.3955567479133606, "grad_norm": 44.30464553833008, "learning_rate": 4.3028095264420525e-08, "logits/chosen": -0.9977031946182251, "logits/rejected": -1.0206011533737183, "logps/chosen": -761.23193359375, "logps/ref_chosen": -66.5453109741211, "logps/ref_rejected": -103.86932373046875, "logps/rejected": -1260.70654296875, "loss": 1.1004, "margin_dpo/margin_mean": 462.15057373046875, "margin_dpo/margin_std": 732.3837890625, "step": 566 }, { "epoch": 0.8325991189427313, "fcm_dpo/beta": 0.0010178061202168465, "fcm_dpo/delta": 0.03579302877187729, "fcm_dpo/margin": 359.13531494140625, "fcm_dpo/q_t": 0.4146028161048889, "grad_norm": 34.87330627441406, "learning_rate": 4.231101308059165e-08, "logits/chosen": -1.0946989059448242, "logits/rejected": -1.097286343574524, "logps/chosen": -709.2471313476562, "logps/ref_chosen": -52.85829544067383, "logps/ref_rejected": -85.37095642089844, "logps/rejected": -1100.89501953125, "loss": 1.1094, "margin_dpo/margin_mean": 359.1352844238281, "margin_dpo/margin_std": 478.47601318359375, "step": 567 }, { "epoch": 0.8340675477239354, "fcm_dpo/beta": 0.0010078256018459797, "fcm_dpo/delta": -0.077871173620224, "fcm_dpo/margin": 470.42230224609375, "fcm_dpo/q_t": 0.3900975286960602, "grad_norm": 41.226470947265625, "learning_rate": 4.1599403694720145e-08, "logits/chosen": -0.9984632730484009, "logits/rejected": -1.0317835807800293, "logps/chosen": -692.5980224609375, "logps/ref_chosen": -45.1923828125, "logps/ref_rejected": -89.09236907958984, "logps/rejected": -1206.9202880859375, "loss": 1.0342, "margin_dpo/margin_mean": 470.4223327636719, "margin_dpo/margin_std": 534.6771240234375, "step": 568 }, { "epoch": 0.8355359765051396, "fcm_dpo/beta": 0.0010118992067873478, "fcm_dpo/delta": 0.0053036510944366455, "fcm_dpo/margin": 389.440185546875, "fcm_dpo/q_t": 0.411299467086792, "grad_norm": 59.71284103393555, "learning_rate": 4.089328585837512e-08, "logits/chosen": -1.0540335178375244, "logits/rejected": -1.0605683326721191, "logps/chosen": -802.30126953125, "logps/ref_chosen": -63.72056198120117, "logps/ref_rejected": -79.10325622558594, "logps/rejected": -1207.1240234375, "loss": 1.1411, "margin_dpo/margin_mean": 389.440185546875, "margin_dpo/margin_std": 669.8353271484375, "step": 569 }, { "epoch": 0.8370044052863436, "fcm_dpo/beta": 0.0010090538999065757, "fcm_dpo/delta": 0.034501124173402786, "fcm_dpo/margin": 363.46832275390625, "fcm_dpo/q_t": 0.4174221158027649, "grad_norm": 36.75635528564453, "learning_rate": 4.019267817841834e-08, "logits/chosen": -1.1069672107696533, "logits/rejected": -1.0978264808654785, "logps/chosen": -741.89794921875, "logps/ref_chosen": -61.61454391479492, "logps/ref_rejected": -82.14186096191406, "logps/rejected": -1125.8935546875, "loss": 1.1306, "margin_dpo/margin_mean": 363.46832275390625, "margin_dpo/margin_std": 565.5325927734375, "step": 570 }, { "epoch": 0.8384728340675477, "fcm_dpo/beta": 0.001006947597488761, "fcm_dpo/delta": -0.025127392262220383, "fcm_dpo/margin": 421.017333984375, "fcm_dpo/q_t": 0.40483880043029785, "grad_norm": 46.7877197265625, "learning_rate": 3.9497599116513705e-08, "logits/chosen": -0.9774879217147827, "logits/rejected": -0.9870057106018066, "logps/chosen": -736.8277587890625, "logps/ref_chosen": -53.05406188964844, "logps/ref_rejected": -91.33682250976562, "logps/rejected": -1196.1279296875, "loss": 1.1072, "margin_dpo/margin_mean": 421.01739501953125, "margin_dpo/margin_std": 656.8223266601562, "step": 571 }, { "epoch": 0.8399412628487518, "fcm_dpo/beta": 0.0009963458869606256, "fcm_dpo/delta": -0.05837538465857506, "fcm_dpo/margin": 457.11859130859375, "fcm_dpo/q_t": 0.4020523428916931, "grad_norm": 34.478126525878906, "learning_rate": 3.880806698864086e-08, "logits/chosen": -1.0106735229492188, "logits/rejected": -1.0380733013153076, "logps/chosen": -765.731201171875, "logps/ref_chosen": -48.45928955078125, "logps/ref_rejected": -83.55703735351562, "logps/rejected": -1257.947509765625, "loss": 1.101, "margin_dpo/margin_mean": 457.11859130859375, "margin_dpo/margin_std": 743.0745239257812, "step": 572 }, { "epoch": 0.8414096916299559, "fcm_dpo/beta": 0.000998746370896697, "fcm_dpo/delta": 0.01006124448031187, "fcm_dpo/margin": 390.7725830078125, "fcm_dpo/q_t": 0.41236811876296997, "grad_norm": 31.542451858520508, "learning_rate": 3.812409996461275e-08, "logits/chosen": -1.0760366916656494, "logits/rejected": -1.0830974578857422, "logps/chosen": -746.3466186523438, "logps/ref_chosen": -51.62262725830078, "logps/ref_rejected": -85.32499694824219, "logps/rejected": -1170.821533203125, "loss": 1.1013, "margin_dpo/margin_mean": 390.7725830078125, "margin_dpo/margin_std": 552.9385986328125, "step": 573 }, { "epoch": 0.8428781204111601, "fcm_dpo/beta": 0.0009959687013179064, "fcm_dpo/delta": -0.0264823567122221, "fcm_dpo/margin": 427.0505676269531, "fcm_dpo/q_t": 0.40291672945022583, "grad_norm": 34.11531066894531, "learning_rate": 3.74457160675965e-08, "logits/chosen": -1.0604960918426514, "logits/rejected": -1.0845885276794434, "logps/chosen": -673.98388671875, "logps/ref_chosen": -51.04446029663086, "logps/ref_rejected": -92.80640411376953, "logps/rejected": -1142.79638671875, "loss": 1.079, "margin_dpo/margin_mean": 427.0505676269531, "margin_dpo/margin_std": 563.7635498046875, "step": 574 }, { "epoch": 0.8443465491923642, "fcm_dpo/beta": 0.0009870969224721193, "fcm_dpo/delta": 0.015626847743988037, "fcm_dpo/margin": 388.8648681640625, "fcm_dpo/q_t": 0.4126191735267639, "grad_norm": 41.480953216552734, "learning_rate": 3.677293317363864e-08, "logits/chosen": -0.9152381420135498, "logits/rejected": -0.9155479669570923, "logps/chosen": -773.0484619140625, "logps/ref_chosen": -71.7901382446289, "logps/ref_rejected": -95.38619995117188, "logps/rejected": -1185.5093994140625, "loss": 1.1384, "margin_dpo/margin_mean": 388.8648681640625, "margin_dpo/margin_std": 641.7487182617188, "step": 575 }, { "epoch": 0.8458149779735683, "fcm_dpo/beta": 0.0010088002309203148, "fcm_dpo/delta": 0.10819648951292038, "fcm_dpo/margin": 292.5570983886719, "fcm_dpo/q_t": 0.4330289363861084, "grad_norm": 34.14402770996094, "learning_rate": 3.6105769011194224e-08, "logits/chosen": -1.0202168226242065, "logits/rejected": -1.0450081825256348, "logps/chosen": -745.5078735351562, "logps/ref_chosen": -54.262962341308594, "logps/ref_rejected": -100.75428009033203, "logps/rejected": -1084.5562744140625, "loss": 1.1883, "margin_dpo/margin_mean": 292.5570983886719, "margin_dpo/margin_std": 554.0349731445312, "step": 576 }, { "epoch": 0.8472834067547724, "fcm_dpo/beta": 0.0010158491786569357, "fcm_dpo/delta": -0.00041581690311431885, "fcm_dpo/margin": 394.10260009765625, "fcm_dpo/q_t": 0.4098922610282898, "grad_norm": 32.901432037353516, "learning_rate": 3.5444241160659304e-08, "logits/chosen": -0.9910534620285034, "logits/rejected": -0.9746923446655273, "logps/chosen": -663.3876953125, "logps/ref_chosen": -61.909706115722656, "logps/ref_rejected": -84.07069396972656, "logps/rejected": -1079.6512451171875, "loss": 1.1104, "margin_dpo/margin_mean": 394.10260009765625, "margin_dpo/margin_std": 565.6033325195312, "step": 577 }, { "epoch": 0.8487518355359766, "fcm_dpo/beta": 0.0010062268702313304, "fcm_dpo/delta": -0.03685392439365387, "fcm_dpo/margin": 431.7515563964844, "fcm_dpo/q_t": 0.40013357996940613, "grad_norm": 34.37710952758789, "learning_rate": 3.478836705390808e-08, "logits/chosen": -0.8883892297744751, "logits/rejected": -0.9162989854812622, "logps/chosen": -642.0396728515625, "logps/ref_chosen": -49.26368713378906, "logps/ref_rejected": -83.4362564086914, "logps/rejected": -1107.9637451171875, "loss": 1.0655, "margin_dpo/margin_mean": 431.75152587890625, "margin_dpo/margin_std": 524.7294921875, "step": 578 }, { "epoch": 0.8502202643171806, "fcm_dpo/beta": 0.0010275598615407944, "fcm_dpo/delta": 0.1303221881389618, "fcm_dpo/margin": 266.0924072265625, "fcm_dpo/q_t": 0.43752169609069824, "grad_norm": 60.628578186035156, "learning_rate": 3.41381639738331e-08, "logits/chosen": -0.9820126295089722, "logits/rejected": -0.9807819128036499, "logps/chosen": -728.9359130859375, "logps/ref_chosen": -58.88581848144531, "logps/ref_rejected": -94.78762817382812, "logps/rejected": -1030.9300537109375, "loss": 1.212, "margin_dpo/margin_mean": 266.0924072265625, "margin_dpo/margin_std": 565.8082275390625, "step": 579 }, { "epoch": 0.8516886930983847, "fcm_dpo/beta": 0.0010196480434387922, "fcm_dpo/delta": -0.08530791848897934, "fcm_dpo/margin": 471.34722900390625, "fcm_dpo/q_t": 0.39458024501800537, "grad_norm": 42.90260696411133, "learning_rate": 3.349364905389032e-08, "logits/chosen": -0.842185378074646, "logits/rejected": -0.8703323006629944, "logps/chosen": -540.9808349609375, "logps/ref_chosen": -48.70683670043945, "logps/ref_rejected": -81.7583999633789, "logps/rejected": -1045.379638671875, "loss": 1.0552, "margin_dpo/margin_mean": 471.3471984863281, "margin_dpo/margin_std": 646.364013671875, "step": 580 }, { "epoch": 0.8531571218795888, "fcm_dpo/beta": 0.001028277212753892, "fcm_dpo/delta": 0.052076976746320724, "fcm_dpo/margin": 340.040771484375, "fcm_dpo/q_t": 0.4224529564380646, "grad_norm": 42.94197082519531, "learning_rate": 3.285483927764726e-08, "logits/chosen": -1.043858528137207, "logits/rejected": -1.0504353046417236, "logps/chosen": -738.8606567382812, "logps/ref_chosen": -62.22235107421875, "logps/ref_rejected": -91.73568725585938, "logps/rejected": -1108.414794921875, "loss": 1.151, "margin_dpo/margin_mean": 340.040771484375, "margin_dpo/margin_std": 596.381591796875, "step": 581 }, { "epoch": 0.8546255506607929, "fcm_dpo/beta": 0.0010394532000645995, "fcm_dpo/delta": 0.0019838809967041016, "fcm_dpo/margin": 381.43505859375, "fcm_dpo/q_t": 0.4081156849861145, "grad_norm": 32.26409149169922, "learning_rate": 3.222175147833556e-08, "logits/chosen": -0.9781073331832886, "logits/rejected": -0.9989155530929565, "logps/chosen": -653.311279296875, "logps/ref_chosen": -58.228660583496094, "logps/ref_rejected": -110.06959533691406, "logps/rejected": -1086.5872802734375, "loss": 1.1041, "margin_dpo/margin_mean": 381.43505859375, "margin_dpo/margin_std": 514.436279296875, "step": 582 }, { "epoch": 0.856093979441997, "fcm_dpo/beta": 0.001045125536620617, "fcm_dpo/delta": 0.15199461579322815, "fcm_dpo/margin": 240.80845642089844, "fcm_dpo/q_t": 0.4445319175720215, "grad_norm": 61.14152908325195, "learning_rate": 3.159440233840763e-08, "logits/chosen": -0.9267081022262573, "logits/rejected": -0.9212468862533569, "logps/chosen": -741.570068359375, "logps/ref_chosen": -56.86286163330078, "logps/ref_rejected": -88.4039306640625, "logps/rejected": -1013.9195556640625, "loss": 1.2611, "margin_dpo/margin_mean": 240.80844116210938, "margin_dpo/margin_std": 632.1082153320312, "step": 583 }, { "epoch": 0.8575624082232012, "fcm_dpo/beta": 0.001047009602189064, "fcm_dpo/delta": -0.09994575381278992, "fcm_dpo/margin": 472.8240661621094, "fcm_dpo/q_t": 0.38906019926071167, "grad_norm": 33.5833854675293, "learning_rate": 3.0972808389096635e-08, "logits/chosen": -0.9425207376480103, "logits/rejected": -0.9496064186096191, "logps/chosen": -631.0457763671875, "logps/ref_chosen": -56.90068054199219, "logps/ref_rejected": -97.63606262207031, "logps/rejected": -1144.6053466796875, "loss": 1.0321, "margin_dpo/margin_mean": 472.8240966796875, "margin_dpo/margin_std": 567.1585693359375, "step": 584 }, { "epoch": 0.8590308370044053, "fcm_dpo/beta": 0.0010301112197339535, "fcm_dpo/delta": -0.050719231367111206, "fcm_dpo/margin": 435.0365905761719, "fcm_dpo/q_t": 0.4000217318534851, "grad_norm": 38.895267486572266, "learning_rate": 3.035698600998121e-08, "logits/chosen": -0.9933120608329773, "logits/rejected": -1.0140407085418701, "logps/chosen": -725.3422241210938, "logps/ref_chosen": -60.973968505859375, "logps/ref_rejected": -84.16952514648438, "logps/rejected": -1183.5743408203125, "loss": 1.1026, "margin_dpo/margin_mean": 435.03656005859375, "margin_dpo/margin_std": 694.1354370117188, "step": 585 }, { "epoch": 0.8604992657856094, "fcm_dpo/beta": 0.0010434159776195884, "fcm_dpo/delta": 0.09400048106908798, "fcm_dpo/margin": 296.1407775878906, "fcm_dpo/q_t": 0.4296777844429016, "grad_norm": 32.761634826660156, "learning_rate": 2.974695142855388e-08, "logits/chosen": -0.9788910746574402, "logits/rejected": -0.997832715511322, "logps/chosen": -762.8277587890625, "logps/ref_chosen": -56.85559844970703, "logps/ref_rejected": -91.80261993408203, "logps/rejected": -1093.91552734375, "loss": 1.1855, "margin_dpo/margin_mean": 296.1407775878906, "margin_dpo/margin_std": 574.4661865234375, "step": 586 }, { "epoch": 0.8619676945668135, "fcm_dpo/beta": 0.001048217760398984, "fcm_dpo/delta": -0.02521578222513199, "fcm_dpo/margin": 404.6052551269531, "fcm_dpo/q_t": 0.4032083749771118, "grad_norm": 38.702091217041016, "learning_rate": 2.9142720719793122e-08, "logits/chosen": -1.0009737014770508, "logits/rejected": -1.0238654613494873, "logps/chosen": -547.4169921875, "logps/ref_chosen": -44.69159698486328, "logps/ref_rejected": -82.62385559082031, "logps/rejected": -989.9544677734375, "loss": 1.0848, "margin_dpo/margin_mean": 404.6052551269531, "margin_dpo/margin_std": 556.65869140625, "step": 587 }, { "epoch": 0.8634361233480177, "fcm_dpo/beta": 0.0010463828220963478, "fcm_dpo/delta": 0.038819462060928345, "fcm_dpo/margin": 346.3304748535156, "fcm_dpo/q_t": 0.41620907187461853, "grad_norm": 33.679988861083984, "learning_rate": 2.8544309805740018e-08, "logits/chosen": -0.9818781614303589, "logits/rejected": -1.0031819343566895, "logps/chosen": -710.351318359375, "logps/ref_chosen": -50.29494857788086, "logps/ref_rejected": -107.36988067626953, "logps/rejected": -1113.7567138671875, "loss": 1.1241, "margin_dpo/margin_mean": 346.3304748535156, "margin_dpo/margin_std": 503.89788818359375, "step": 588 }, { "epoch": 0.8649045521292217, "fcm_dpo/beta": 0.001043025404214859, "fcm_dpo/delta": -0.07069654762744904, "fcm_dpo/margin": 448.1602783203125, "fcm_dpo/q_t": 0.3938640058040619, "grad_norm": 33.65080261230469, "learning_rate": 2.7951734455078786e-08, "logits/chosen": -0.9495465159416199, "logits/rejected": -0.9597896933555603, "logps/chosen": -717.1591796875, "logps/ref_chosen": -59.929908752441406, "logps/ref_rejected": -111.65534973144531, "logps/rejected": -1217.044921875, "loss": 1.0505, "margin_dpo/margin_mean": 448.1602783203125, "margin_dpo/margin_std": 561.6795654296875, "step": 589 }, { "epoch": 0.8663729809104258, "fcm_dpo/beta": 0.0010281222639605403, "fcm_dpo/delta": -0.06000884622335434, "fcm_dpo/margin": 444.7072448730469, "fcm_dpo/q_t": 0.396173357963562, "grad_norm": 31.502580642700195, "learning_rate": 2.736501028272095e-08, "logits/chosen": -0.9299443364143372, "logits/rejected": -0.9518330097198486, "logps/chosen": -629.3699340820312, "logps/ref_chosen": -55.80979537963867, "logps/ref_rejected": -106.06282043457031, "logps/rejected": -1124.330078125, "loss": 1.0524, "margin_dpo/margin_mean": 444.70721435546875, "margin_dpo/margin_std": 547.791748046875, "step": 590 }, { "epoch": 0.8678414096916299, "fcm_dpo/beta": 0.0010233856737613678, "fcm_dpo/delta": -0.014741834253072739, "fcm_dpo/margin": 404.666748046875, "fcm_dpo/q_t": 0.40434908866882324, "grad_norm": 39.38751983642578, "learning_rate": 2.678415274939408e-08, "logits/chosen": -1.014183521270752, "logits/rejected": -1.0046617984771729, "logps/chosen": -680.9166259765625, "logps/ref_chosen": -56.24061965942383, "logps/ref_rejected": -83.78629302978516, "logps/rejected": -1113.1290283203125, "loss": 1.0914, "margin_dpo/margin_mean": 404.666748046875, "margin_dpo/margin_std": 561.1109619140625, "step": 591 }, { "epoch": 0.869309838472834, "fcm_dpo/beta": 0.0010261686984449625, "fcm_dpo/delta": 0.026942353695631027, "fcm_dpo/margin": 364.5346374511719, "fcm_dpo/q_t": 0.4158400297164917, "grad_norm": 37.25166702270508, "learning_rate": 2.6209177161234442e-08, "logits/chosen": -0.9915554523468018, "logits/rejected": -0.9911011457443237, "logps/chosen": -713.0601806640625, "logps/ref_chosen": -47.94025421142578, "logps/ref_rejected": -75.73287963867188, "logps/rejected": -1105.387451171875, "loss": 1.1667, "margin_dpo/margin_mean": 364.5346374511719, "margin_dpo/margin_std": 686.7486572265625, "step": 592 }, { "epoch": 0.8707782672540382, "fcm_dpo/beta": 0.0010391025571152568, "fcm_dpo/delta": 0.0972876325249672, "fcm_dpo/margin": 294.2417297363281, "fcm_dpo/q_t": 0.43218329548835754, "grad_norm": 51.747840881347656, "learning_rate": 2.564009866938349e-08, "logits/chosen": -0.8582196235656738, "logits/rejected": -0.8465102910995483, "logps/chosen": -675.2666015625, "logps/ref_chosen": -48.690757751464844, "logps/ref_rejected": -60.90800094604492, "logps/rejected": -981.7255859375, "loss": 1.1985, "margin_dpo/margin_mean": 294.2417297363281, "margin_dpo/margin_std": 604.4602661132812, "step": 593 }, { "epoch": 0.8722466960352423, "fcm_dpo/beta": 0.0010585633572191, "fcm_dpo/delta": 0.01568973809480667, "fcm_dpo/margin": 362.5345153808594, "fcm_dpo/q_t": 0.41356098651885986, "grad_norm": 41.935630798339844, "learning_rate": 2.5076932269588708e-08, "logits/chosen": -0.9748561382293701, "logits/rejected": -0.9647470712661743, "logps/chosen": -659.4791870117188, "logps/ref_chosen": -54.93488693237305, "logps/ref_rejected": -86.09967803955078, "logps/rejected": -1053.178466796875, "loss": 1.1296, "margin_dpo/margin_mean": 362.5345153808594, "margin_dpo/margin_std": 570.4609375, "step": 594 }, { "epoch": 0.8737151248164464, "fcm_dpo/beta": 0.001042917836457491, "fcm_dpo/delta": -0.03980486840009689, "fcm_dpo/margin": 419.83013916015625, "fcm_dpo/q_t": 0.4033294916152954, "grad_norm": 47.1993293762207, "learning_rate": 2.451969280180849e-08, "logits/chosen": -0.9675269722938538, "logits/rejected": -0.984915018081665, "logps/chosen": -637.9202880859375, "logps/ref_chosen": -49.4204216003418, "logps/ref_rejected": -80.62731170654297, "logps/rejected": -1088.957275390625, "loss": 1.0748, "margin_dpo/margin_mean": 419.83013916015625, "margin_dpo/margin_std": 574.22412109375, "step": 595 }, { "epoch": 0.8751835535976505, "fcm_dpo/beta": 0.001060024369508028, "fcm_dpo/delta": 0.09763069450855255, "fcm_dpo/margin": 287.9169921875, "fcm_dpo/q_t": 0.4336046576499939, "grad_norm": 63.941131591796875, "learning_rate": 2.396839494982103e-08, "logits/chosen": -0.9581549167633057, "logits/rejected": -0.9236706495285034, "logps/chosen": -733.4730224609375, "logps/ref_chosen": -59.791683197021484, "logps/ref_rejected": -80.09111785888672, "logps/rejected": -1041.689453125, "loss": 1.1978, "margin_dpo/margin_mean": 287.9169921875, "margin_dpo/margin_std": 603.924560546875, "step": 596 }, { "epoch": 0.8766519823788547, "fcm_dpo/beta": 0.0010323208989575505, "fcm_dpo/delta": -0.13189196586608887, "fcm_dpo/margin": 505.18560791015625, "fcm_dpo/q_t": 0.3857056498527527, "grad_norm": 32.748313903808594, "learning_rate": 2.3423053240837514e-08, "logits/chosen": -0.8813081979751587, "logits/rejected": -0.9265748262405396, "logps/chosen": -683.490234375, "logps/ref_chosen": -57.26078796386719, "logps/ref_rejected": -100.6937255859375, "logps/rejected": -1232.1087646484375, "loss": 1.0412, "margin_dpo/margin_mean": 505.18560791015625, "margin_dpo/margin_std": 654.4864501953125, "step": 597 }, { "epoch": 0.8781204111600588, "fcm_dpo/beta": 0.001027698628604412, "fcm_dpo/delta": -0.0025387555360794067, "fcm_dpo/margin": 390.36993408203125, "fcm_dpo/q_t": 0.4076780080795288, "grad_norm": 37.18258285522461, "learning_rate": 2.2883682045119062e-08, "logits/chosen": -1.003603219985962, "logits/rejected": -1.0124932527542114, "logps/chosen": -688.0802001953125, "logps/ref_chosen": -52.51850509643555, "logps/ref_rejected": -89.44385528564453, "logps/rejected": -1115.37548828125, "loss": 1.1075, "margin_dpo/margin_mean": 390.36993408203125, "margin_dpo/margin_std": 558.430908203125, "step": 598 }, { "epoch": 0.8795888399412628, "fcm_dpo/beta": 0.0010358416475355625, "fcm_dpo/delta": 0.05951521918177605, "fcm_dpo/margin": 329.6752624511719, "fcm_dpo/q_t": 0.41848763823509216, "grad_norm": 40.37639236450195, "learning_rate": 2.2350295575598367e-08, "logits/chosen": -0.9107227325439453, "logits/rejected": -0.916115403175354, "logps/chosen": -684.2776489257812, "logps/ref_chosen": -49.802677154541016, "logps/ref_rejected": -82.978515625, "logps/rejected": -1047.1287841796875, "loss": 1.1367, "margin_dpo/margin_mean": 329.6752624511719, "margin_dpo/margin_std": 463.71551513671875, "step": 599 }, { "epoch": 0.8810572687224669, "fcm_dpo/beta": 0.0010582783725112677, "fcm_dpo/delta": 0.07414868474006653, "fcm_dpo/margin": 310.2664794921875, "fcm_dpo/q_t": 0.4272015690803528, "grad_norm": 38.3448371887207, "learning_rate": 2.1822907887504932e-08, "logits/chosen": -1.036280870437622, "logits/rejected": -1.0311980247497559, "logps/chosen": -761.5162963867188, "logps/ref_chosen": -66.43487548828125, "logps/ref_rejected": -85.45649719238281, "logps/rejected": -1090.804443359375, "loss": 1.1787, "margin_dpo/margin_mean": 310.2664794921875, "margin_dpo/margin_std": 604.5630493164062, "step": 600 }, { "epoch": 0.8810572687224669, "eval_fcm_dpo/beta": 0.0010637843515723944, "eval_logits/chosen": -1.0093570947647095, "eval_logits/rejected": -1.006962776184082, "eval_logps/chosen": -820.5049438476562, "eval_logps/ref_chosen": -79.05104064941406, "eval_logps/ref_rejected": -86.79793548583984, "eval_logps/rejected": -1108.1163330078125, "eval_loss": 0.6083120107650757, "eval_margin_dpo/margin_mean": 279.864501953125, "eval_margin_dpo/margin_std": 623.9760131835938, "eval_runtime": 39.2935, "eval_samples_per_second": 59.526, "eval_steps_per_second": 1.883, "step": 600 }, { "epoch": 0.882525697503671, "fcm_dpo/beta": 0.0010579151567071676, "fcm_dpo/delta": -0.02401512674987316, "fcm_dpo/margin": 399.729248046875, "fcm_dpo/q_t": 0.40252023935317993, "grad_norm": 37.17901611328125, "learning_rate": 2.1301532877994742e-08, "logits/chosen": -0.9913003444671631, "logits/rejected": -1.0062311887741089, "logps/chosen": -783.751708984375, "logps/ref_chosen": -59.13361358642578, "logps/ref_rejected": -94.69093322753906, "logps/rejected": -1219.038330078125, "loss": 1.0849, "margin_dpo/margin_mean": 399.7292785644531, "margin_dpo/margin_std": 545.0308227539062, "step": 601 }, { "epoch": 0.8839941262848752, "fcm_dpo/beta": 0.0010464717634022236, "fcm_dpo/delta": -0.12171060591936111, "fcm_dpo/margin": 492.49066162109375, "fcm_dpo/q_t": 0.3832111358642578, "grad_norm": 63.6419677734375, "learning_rate": 2.0786184285784298e-08, "logits/chosen": -0.9910135269165039, "logits/rejected": -1.024593710899353, "logps/chosen": -532.450927734375, "logps/ref_chosen": -48.59352111816406, "logps/ref_rejected": -87.6685562133789, "logps/rejected": -1064.0166015625, "loss": 1.0113, "margin_dpo/margin_mean": 492.4906921386719, "margin_dpo/margin_std": 543.3355102539062, "step": 602 }, { "epoch": 0.8854625550660793, "fcm_dpo/beta": 0.001022031530737877, "fcm_dpo/delta": -0.07094608247280121, "fcm_dpo/margin": 457.523193359375, "fcm_dpo/q_t": 0.3987892270088196, "grad_norm": 45.804847717285156, "learning_rate": 2.0276875690788204e-08, "logits/chosen": -0.9808427095413208, "logits/rejected": -0.9711321592330933, "logps/chosen": -687.8028564453125, "logps/ref_chosen": -70.41461944580078, "logps/ref_rejected": -100.32559967041016, "logps/rejected": -1175.237060546875, "loss": 1.0778, "margin_dpo/margin_mean": 457.523193359375, "margin_dpo/margin_std": 686.7850341796875, "step": 603 }, { "epoch": 0.8869309838472834, "fcm_dpo/beta": 0.0010027764365077019, "fcm_dpo/delta": -0.10522530972957611, "fcm_dpo/margin": 498.571044921875, "fcm_dpo/q_t": 0.3895166218280792, "grad_norm": 45.26606369018555, "learning_rate": 1.977362051376158e-08, "logits/chosen": -0.970876157283783, "logits/rejected": -1.0076401233673096, "logps/chosen": -649.3685302734375, "logps/ref_chosen": -46.45808029174805, "logps/ref_rejected": -91.8544921875, "logps/rejected": -1193.3359375, "loss": 1.0481, "margin_dpo/margin_mean": 498.571044921875, "margin_dpo/margin_std": 668.9893798828125, "step": 604 }, { "epoch": 0.8883994126284875, "fcm_dpo/beta": 0.0010006949305534363, "fcm_dpo/delta": 0.038943853229284286, "fcm_dpo/margin": 362.182373046875, "fcm_dpo/q_t": 0.41993600130081177, "grad_norm": 39.667091369628906, "learning_rate": 1.9276432015946446e-08, "logits/chosen": -0.9392565488815308, "logits/rejected": -0.9526230096817017, "logps/chosen": -716.2191162109375, "logps/ref_chosen": -66.24933624267578, "logps/ref_rejected": -102.30496978759766, "logps/rejected": -1114.45703125, "loss": 1.1385, "margin_dpo/margin_mean": 362.182373046875, "margin_dpo/margin_std": 607.8369140625, "step": 605 }, { "epoch": 0.8898678414096917, "fcm_dpo/beta": 0.001008864026516676, "fcm_dpo/delta": -0.003518037497997284, "fcm_dpo/margin": 399.5196533203125, "fcm_dpo/q_t": 0.40903180837631226, "grad_norm": 28.302597045898438, "learning_rate": 1.8785323298722093e-08, "logits/chosen": -0.9261046051979065, "logits/rejected": -0.9387686848640442, "logps/chosen": -695.829345703125, "logps/ref_chosen": -54.819122314453125, "logps/ref_rejected": -98.37146759033203, "logps/rejected": -1138.9013671875, "loss": 1.0963, "margin_dpo/margin_mean": 399.5196533203125, "margin_dpo/margin_std": 556.862060546875, "step": 606 }, { "epoch": 0.8913362701908958, "fcm_dpo/beta": 0.0010172666516155005, "fcm_dpo/delta": 0.06301670521497726, "fcm_dpo/margin": 332.9754943847656, "fcm_dpo/q_t": 0.4240596890449524, "grad_norm": 30.476581573486328, "learning_rate": 1.8300307303259904e-08, "logits/chosen": -0.918168306350708, "logits/rejected": -0.9049118161201477, "logps/chosen": -733.3533935546875, "logps/ref_chosen": -58.08403778076172, "logps/ref_rejected": -79.777099609375, "logps/rejected": -1088.02197265625, "loss": 1.1554, "margin_dpo/margin_mean": 332.97552490234375, "margin_dpo/margin_std": 572.5631103515625, "step": 607 }, { "epoch": 0.8928046989720999, "fcm_dpo/beta": 0.00101991998963058, "fcm_dpo/delta": 0.0018842313438653946, "fcm_dpo/margin": 390.30804443359375, "fcm_dpo/q_t": 0.4078730344772339, "grad_norm": 31.276063919067383, "learning_rate": 1.7821396810182437e-08, "logits/chosen": -0.9649184942245483, "logits/rejected": -0.9744598865509033, "logps/chosen": -653.48583984375, "logps/ref_chosen": -57.450836181640625, "logps/ref_rejected": -94.77339172363281, "logps/rejected": -1081.116455078125, "loss": 1.0854, "margin_dpo/margin_mean": 390.30804443359375, "margin_dpo/margin_std": 487.88525390625, "step": 608 }, { "epoch": 0.8942731277533039, "fcm_dpo/beta": 0.0010023643262684345, "fcm_dpo/delta": -0.10511058568954468, "fcm_dpo/margin": 498.7186584472656, "fcm_dpo/q_t": 0.3940780460834503, "grad_norm": 31.827770233154297, "learning_rate": 1.7348604439226617e-08, "logits/chosen": -1.0191905498504639, "logits/rejected": -1.0367772579193115, "logps/chosen": -678.4195556640625, "logps/ref_chosen": -58.805355072021484, "logps/ref_rejected": -88.81600952148438, "logps/rejected": -1207.14892578125, "loss": 1.0632, "margin_dpo/margin_mean": 498.7186584472656, "margin_dpo/margin_std": 760.2215576171875, "step": 609 }, { "epoch": 0.895741556534508, "fcm_dpo/beta": 0.0010101648513227701, "fcm_dpo/delta": 0.08314502984285355, "fcm_dpo/margin": 316.170654296875, "fcm_dpo/q_t": 0.426239937543869, "grad_norm": 49.75373458862305, "learning_rate": 1.6881942648911074e-08, "logits/chosen": -0.9572958946228027, "logits/rejected": -0.9347594380378723, "logps/chosen": -688.2482299804688, "logps/ref_chosen": -65.69503784179688, "logps/ref_rejected": -83.40538787841797, "logps/rejected": -1022.1292724609375, "loss": 1.172, "margin_dpo/margin_mean": 316.170654296875, "margin_dpo/margin_std": 579.01513671875, "step": 610 }, { "epoch": 0.8972099853157122, "fcm_dpo/beta": 0.0009934802073985338, "fcm_dpo/delta": -0.13049647212028503, "fcm_dpo/margin": 527.0625, "fcm_dpo/q_t": 0.38698631525039673, "grad_norm": 32.91178894042969, "learning_rate": 1.6421423736208e-08, "logits/chosen": -1.0069807767868042, "logits/rejected": -1.0485907793045044, "logps/chosen": -711.209716796875, "logps/ref_chosen": -52.59946823120117, "logps/ref_rejected": -86.33099365234375, "logps/rejected": -1272.003662109375, "loss": 1.0454, "margin_dpo/margin_mean": 527.0625, "margin_dpo/margin_std": 733.5755615234375, "step": 611 }, { "epoch": 0.8986784140969163, "fcm_dpo/beta": 0.0009895211551338434, "fcm_dpo/delta": -0.007441475987434387, "fcm_dpo/margin": 411.2275085449219, "fcm_dpo/q_t": 0.40736648440361023, "grad_norm": 32.78879165649414, "learning_rate": 1.5967059836219042e-08, "logits/chosen": -1.0194255113601685, "logits/rejected": -1.0259497165679932, "logps/chosen": -765.8218994140625, "logps/ref_chosen": -59.32372283935547, "logps/ref_rejected": -88.31239318847656, "logps/rejected": -1206.0380859375, "loss": 1.0933, "margin_dpo/margin_mean": 411.22747802734375, "margin_dpo/margin_std": 567.0072021484375, "step": 612 }, { "epoch": 0.9001468428781204, "fcm_dpo/beta": 0.0009705990669317544, "fcm_dpo/delta": -0.08382614701986313, "fcm_dpo/margin": 493.93206787109375, "fcm_dpo/q_t": 0.390705943107605, "grad_norm": 35.579498291015625, "learning_rate": 1.551886292185553e-08, "logits/chosen": -0.9657102227210999, "logits/rejected": -1.0158898830413818, "logps/chosen": -682.1220703125, "logps/ref_chosen": -59.72996520996094, "logps/ref_rejected": -105.10752868652344, "logps/rejected": -1221.431640625, "loss": 1.042, "margin_dpo/margin_mean": 493.93206787109375, "margin_dpo/margin_std": 606.7100830078125, "step": 613 }, { "epoch": 0.9016152716593245, "fcm_dpo/beta": 0.0009615451563149691, "fcm_dpo/delta": -0.06227314844727516, "fcm_dpo/margin": 477.830810546875, "fcm_dpo/q_t": 0.3970866799354553, "grad_norm": 54.85789108276367, "learning_rate": 1.507684480352292e-08, "logits/chosen": -0.9549213647842407, "logits/rejected": -1.0247011184692383, "logps/chosen": -726.4387817382812, "logps/ref_chosen": -52.93898010253906, "logps/ref_rejected": -104.67938232421875, "logps/rejected": -1256.010009765625, "loss": 1.0719, "margin_dpo/margin_mean": 477.8308410644531, "margin_dpo/margin_std": 666.865478515625, "step": 614 }, { "epoch": 0.9030837004405287, "fcm_dpo/beta": 0.0009616809547878802, "fcm_dpo/delta": 0.005887992680072784, "fcm_dpo/margin": 409.75201416015625, "fcm_dpo/q_t": 0.41104698181152344, "grad_norm": 30.430543899536133, "learning_rate": 1.4641017128809801e-08, "logits/chosen": -0.9402486681938171, "logits/rejected": -0.9574323892593384, "logps/chosen": -709.1848754882812, "logps/ref_chosen": -65.81727600097656, "logps/ref_rejected": -95.17749786376953, "logps/rejected": -1148.297119140625, "loss": 1.1207, "margin_dpo/margin_mean": 409.75201416015625, "margin_dpo/margin_std": 655.8992919921875, "step": 615 }, { "epoch": 0.9045521292217328, "fcm_dpo/beta": 0.0009705583215691149, "fcm_dpo/delta": 0.08069596439599991, "fcm_dpo/margin": 331.6129455566406, "fcm_dpo/q_t": 0.42673254013061523, "grad_norm": 32.93946075439453, "learning_rate": 1.4211391382180637e-08, "logits/chosen": -1.0074067115783691, "logits/rejected": -0.9894207715988159, "logps/chosen": -825.5562744140625, "logps/ref_chosen": -65.13285827636719, "logps/ref_rejected": -74.70050048828125, "logps/rejected": -1166.73681640625, "loss": 1.161, "margin_dpo/margin_mean": 331.6129455566406, "margin_dpo/margin_std": 562.9627685546875, "step": 616 }, { "epoch": 0.9060205580029369, "fcm_dpo/beta": 0.000999385491013527, "fcm_dpo/delta": 0.16457805037498474, "fcm_dpo/margin": 239.29647827148438, "fcm_dpo/q_t": 0.4457574784755707, "grad_norm": 54.66185760498047, "learning_rate": 1.378797888467345e-08, "logits/chosen": -0.9095033407211304, "logits/rejected": -0.8759035468101501, "logps/chosen": -777.98486328125, "logps/ref_chosen": -63.005550384521484, "logps/ref_rejected": -64.234130859375, "logps/rejected": -1018.5098876953125, "loss": 1.2344, "margin_dpo/margin_mean": 239.29647827148438, "margin_dpo/margin_std": 552.6876220703125, "step": 617 }, { "epoch": 0.9074889867841409, "fcm_dpo/beta": 0.0009954373817890882, "fcm_dpo/delta": -0.11912906914949417, "fcm_dpo/margin": 515.2799072265625, "fcm_dpo/q_t": 0.3897508680820465, "grad_norm": 41.893890380859375, "learning_rate": 1.3370790793601371e-08, "logits/chosen": -0.9436015486717224, "logits/rejected": -0.9779185652732849, "logps/chosen": -797.0797119140625, "logps/ref_chosen": -67.10134887695312, "logps/ref_rejected": -92.15340423583984, "logps/rejected": -1337.41162109375, "loss": 1.0795, "margin_dpo/margin_mean": 515.2799072265625, "margin_dpo/margin_std": 803.7247314453125, "step": 618 }, { "epoch": 0.908957415565345, "fcm_dpo/beta": 0.0009861327707767487, "fcm_dpo/delta": 0.018695583567023277, "fcm_dpo/margin": 387.3755798339844, "fcm_dpo/q_t": 0.42096269130706787, "grad_norm": 51.36298370361328, "learning_rate": 1.2959838102258535e-08, "logits/chosen": -0.9847129583358765, "logits/rejected": -0.9953620433807373, "logps/chosen": -796.674072265625, "logps/ref_chosen": -55.978233337402344, "logps/ref_rejected": -93.1854019165039, "logps/rejected": -1221.2568359375, "loss": 1.1758, "margin_dpo/margin_mean": 387.3755798339844, "margin_dpo/margin_std": 792.19189453125, "step": 619 }, { "epoch": 0.9104258443465492, "fcm_dpo/beta": 0.0009894105605781078, "fcm_dpo/delta": 0.04409804195165634, "fcm_dpo/margin": 361.19091796875, "fcm_dpo/q_t": 0.420096218585968, "grad_norm": 38.462669372558594, "learning_rate": 1.2555131639630567e-08, "logits/chosen": -1.0069198608398438, "logits/rejected": -1.0124790668487549, "logps/chosen": -751.1500244140625, "logps/ref_chosen": -59.79750061035156, "logps/ref_rejected": -78.41075134277344, "logps/rejected": -1130.9541015625, "loss": 1.147, "margin_dpo/margin_mean": 361.19091796875, "margin_dpo/margin_std": 616.5042724609375, "step": 620 }, { "epoch": 0.9118942731277533, "fcm_dpo/beta": 0.0009799831314012408, "fcm_dpo/delta": -0.15620173513889313, "fcm_dpo/margin": 558.8348388671875, "fcm_dpo/q_t": 0.3770964741706848, "grad_norm": 40.64413070678711, "learning_rate": 1.2156682070109086e-08, "logits/chosen": -1.0363627672195435, "logits/rejected": -1.0835423469543457, "logps/chosen": -709.837646484375, "logps/ref_chosen": -53.93375778198242, "logps/ref_rejected": -88.36951446533203, "logps/rejected": -1303.1083984375, "loss": 1.0241, "margin_dpo/margin_mean": 558.8348388671875, "margin_dpo/margin_std": 708.3245849609375, "step": 621 }, { "epoch": 0.9133627019089574, "fcm_dpo/beta": 0.0009631599532440305, "fcm_dpo/delta": 0.02442977949976921, "fcm_dpo/margin": 390.4576721191406, "fcm_dpo/q_t": 0.4159480631351471, "grad_norm": 40.2901611328125, "learning_rate": 1.1764499893210878e-08, "logits/chosen": -0.8899050951004028, "logits/rejected": -0.8752150535583496, "logps/chosen": -711.6151123046875, "logps/ref_chosen": -60.28582000732422, "logps/ref_rejected": -85.51873779296875, "logps/rejected": -1127.3056640625, "loss": 1.1281, "margin_dpo/margin_mean": 390.45770263671875, "margin_dpo/margin_std": 627.16259765625, "step": 622 }, { "epoch": 0.9148311306901615, "fcm_dpo/beta": 0.0009855421958491206, "fcm_dpo/delta": 0.10792680084705353, "fcm_dpo/margin": 299.546630859375, "fcm_dpo/q_t": 0.43511348962783813, "grad_norm": 37.03680419921875, "learning_rate": 1.1378595443300998e-08, "logits/chosen": -1.0057023763656616, "logits/rejected": -1.003598928451538, "logps/chosen": -770.2930297851562, "logps/ref_chosen": -64.1569595336914, "logps/ref_rejected": -85.08304595947266, "logps/rejected": -1090.765869140625, "loss": 1.1935, "margin_dpo/margin_mean": 299.5466613769531, "margin_dpo/margin_std": 606.3578491210938, "step": 623 }, { "epoch": 0.9162995594713657, "fcm_dpo/beta": 0.0009818391408771276, "fcm_dpo/delta": -0.0640857070684433, "fcm_dpo/margin": 469.6931457519531, "fcm_dpo/q_t": 0.3927311301231384, "grad_norm": 33.05645751953125, "learning_rate": 1.0998978889320582e-08, "logits/chosen": -1.034135103225708, "logits/rejected": -1.032621145248413, "logps/chosen": -779.2677612304688, "logps/ref_chosen": -71.91862487792969, "logps/ref_rejected": -97.13203430175781, "logps/rejected": -1274.17431640625, "loss": 1.0546, "margin_dpo/margin_mean": 469.6931457519531, "margin_dpo/margin_std": 583.8242797851562, "step": 624 }, { "epoch": 0.9177679882525698, "fcm_dpo/beta": 0.0009644476231187582, "fcm_dpo/delta": -0.11376336961984634, "fcm_dpo/margin": 526.8627319335938, "fcm_dpo/q_t": 0.3830944895744324, "grad_norm": 67.42720794677734, "learning_rate": 1.0625660234518913e-08, "logits/chosen": -0.9315009117126465, "logits/rejected": -0.9569610953330994, "logps/chosen": -696.3958740234375, "logps/ref_chosen": -58.342071533203125, "logps/ref_rejected": -86.09038543701172, "logps/rejected": -1251.0068359375, "loss": 1.0001, "margin_dpo/margin_mean": 526.8627319335938, "margin_dpo/margin_std": 528.9290771484375, "step": 625 }, { "epoch": 0.9192364170337739, "fcm_dpo/beta": 0.0009729490848258138, "fcm_dpo/delta": 0.1208110824227333, "fcm_dpo/margin": 290.5083923339844, "fcm_dpo/q_t": 0.43388864398002625, "grad_norm": 42.13291931152344, "learning_rate": 1.0258649316189721e-08, "logits/chosen": -0.9432566165924072, "logits/rejected": -0.9342271685600281, "logps/chosen": -887.2332763671875, "logps/ref_chosen": -75.11260986328125, "logps/ref_rejected": -99.188720703125, "logps/rejected": -1201.81787109375, "loss": 1.218, "margin_dpo/margin_mean": 290.5083923339844, "margin_dpo/margin_std": 636.7235107421875, "step": 626 }, { "epoch": 0.920704845814978, "fcm_dpo/beta": 0.0009612108115106821, "fcm_dpo/delta": -0.1750987321138382, "fcm_dpo/margin": 588.1961669921875, "fcm_dpo/q_t": 0.3845459520816803, "grad_norm": 32.053462982177734, "learning_rate": 9.897955805412e-09, "logits/chosen": -0.8771834373474121, "logits/rejected": -0.9516627192497253, "logps/chosen": -608.0906982421875, "logps/ref_chosen": -47.74314880371094, "logps/ref_rejected": -106.75448608398438, "logps/rejected": -1255.29833984375, "loss": 1.0301, "margin_dpo/margin_mean": 588.1962280273438, "margin_dpo/margin_std": 823.0296630859375, "step": 627 }, { "epoch": 0.922173274596182, "fcm_dpo/beta": 0.0009421667200513184, "fcm_dpo/delta": -0.013625391758978367, "fcm_dpo/margin": 438.33990478515625, "fcm_dpo/q_t": 0.4074496626853943, "grad_norm": 35.12033462524414, "learning_rate": 9.543589206795238e-09, "logits/chosen": -0.9957294464111328, "logits/rejected": -1.0113835334777832, "logps/chosen": -786.462646484375, "logps/ref_chosen": -60.182945251464844, "logps/ref_rejected": -101.55467224121094, "logps/rejected": -1266.17431640625, "loss": 1.1037, "margin_dpo/margin_mean": 438.33990478515625, "margin_dpo/margin_std": 655.6448974609375, "step": 628 }, { "epoch": 0.9236417033773862, "fcm_dpo/beta": 0.0009447969496250153, "fcm_dpo/delta": 0.008085597306489944, "fcm_dpo/margin": 415.1349792480469, "fcm_dpo/q_t": 0.4088371992111206, "grad_norm": 39.81538772583008, "learning_rate": 9.19555885822887e-09, "logits/chosen": -0.9972596168518066, "logits/rejected": -1.0130850076675415, "logps/chosen": -760.9837646484375, "logps/ref_chosen": -64.21354675292969, "logps/ref_rejected": -91.65367126464844, "logps/rejected": -1203.5589599609375, "loss": 1.0959, "margin_dpo/margin_mean": 415.1349792480469, "margin_dpo/margin_std": 550.6982421875, "step": 629 }, { "epoch": 0.9251101321585903, "fcm_dpo/beta": 0.0009731564205139875, "fcm_dpo/delta": 0.17090709507465363, "fcm_dpo/margin": 238.38504028320312, "fcm_dpo/q_t": 0.45254063606262207, "grad_norm": 57.21560287475586, "learning_rate": 8.85387393063622e-09, "logits/chosen": -0.9586611390113831, "logits/rejected": -0.929883599281311, "logps/chosen": -707.209716796875, "logps/ref_chosen": -59.29100036621094, "logps/ref_rejected": -83.59829711914062, "logps/rejected": -969.902099609375, "loss": 1.2624, "margin_dpo/margin_mean": 238.38504028320312, "margin_dpo/margin_std": 663.2811279296875, "step": 630 }, { "epoch": 0.9265785609397944, "fcm_dpo/beta": 0.0009837900288403034, "fcm_dpo/delta": 0.034737687557935715, "fcm_dpo/margin": 372.49517822265625, "fcm_dpo/q_t": 0.4164769649505615, "grad_norm": 31.798059463500977, "learning_rate": 8.518543427732949e-09, "logits/chosen": -1.0678491592407227, "logits/rejected": -1.0738096237182617, "logps/chosen": -826.5518798828125, "logps/ref_chosen": -59.45360565185547, "logps/ref_rejected": -80.95156860351562, "logps/rejected": -1220.544921875, "loss": 1.1496, "margin_dpo/margin_mean": 372.4952087402344, "margin_dpo/margin_std": 653.4166870117188, "step": 631 }, { "epoch": 0.9280469897209985, "fcm_dpo/beta": 0.0009830878116190434, "fcm_dpo/delta": 0.02477090060710907, "fcm_dpo/margin": 382.27484130859375, "fcm_dpo/q_t": 0.41463130712509155, "grad_norm": 43.760009765625, "learning_rate": 8.189576185789637e-09, "logits/chosen": -1.0130560398101807, "logits/rejected": -1.014772891998291, "logps/chosen": -740.15673828125, "logps/ref_chosen": -61.35155487060547, "logps/ref_rejected": -86.16017150878906, "logps/rejected": -1147.240234375, "loss": 1.1373, "margin_dpo/margin_mean": 382.27484130859375, "margin_dpo/margin_std": 631.7064819335938, "step": 632 }, { "epoch": 0.9295154185022027, "fcm_dpo/beta": 0.0010105203837156296, "fcm_dpo/delta": 0.12257733941078186, "fcm_dpo/margin": 277.3817138671875, "fcm_dpo/q_t": 0.43608927726745605, "grad_norm": 44.86662673950195, "learning_rate": 7.866980873399015e-09, "logits/chosen": -1.0408048629760742, "logits/rejected": -1.048929214477539, "logps/chosen": -789.4072265625, "logps/ref_chosen": -57.27816390991211, "logps/ref_rejected": -91.58395385742188, "logps/rejected": -1101.0947265625, "loss": 1.206, "margin_dpo/margin_mean": 277.3817443847656, "margin_dpo/margin_std": 575.353515625, "step": 633 }, { "epoch": 0.9309838472834068, "fcm_dpo/beta": 0.0010330864461138844, "fcm_dpo/delta": 0.1345616579055786, "fcm_dpo/margin": 260.56390380859375, "fcm_dpo/q_t": 0.4413578510284424, "grad_norm": 40.64011001586914, "learning_rate": 7.550765991247654e-09, "logits/chosen": -0.8922678828239441, "logits/rejected": -0.8835877180099487, "logps/chosen": -892.26904296875, "logps/ref_chosen": -66.61896514892578, "logps/ref_rejected": -107.12564849853516, "logps/rejected": -1193.339599609375, "loss": 1.2209, "margin_dpo/margin_mean": 260.56390380859375, "margin_dpo/margin_std": 591.0201416015625, "step": 634 }, { "epoch": 0.9324522760646109, "fcm_dpo/beta": 0.0010454216971993446, "fcm_dpo/delta": 0.03238925710320473, "fcm_dpo/margin": 352.79302978515625, "fcm_dpo/q_t": 0.4197089672088623, "grad_norm": 44.7509765625, "learning_rate": 7.240939871891699e-09, "logits/chosen": -1.0014543533325195, "logits/rejected": -0.9792279005050659, "logps/chosen": -778.3882446289062, "logps/ref_chosen": -73.95551300048828, "logps/ref_rejected": -82.50045776367188, "logps/rejected": -1139.726318359375, "loss": 1.1468, "margin_dpo/margin_mean": 352.79302978515625, "margin_dpo/margin_std": 629.5213623046875, "step": 635 }, { "epoch": 0.933920704845815, "fcm_dpo/beta": 0.0010495057795196772, "fcm_dpo/delta": -0.0377667136490345, "fcm_dpo/margin": 414.8923034667969, "fcm_dpo/q_t": 0.40556490421295166, "grad_norm": 32.477806091308594, "learning_rate": 6.937510679537628e-09, "logits/chosen": -0.9295854568481445, "logits/rejected": -0.9285162091255188, "logps/chosen": -739.5404052734375, "logps/ref_chosen": -59.628910064697266, "logps/ref_rejected": -81.97883605957031, "logps/rejected": -1176.78271484375, "loss": 1.0913, "margin_dpo/margin_mean": 414.8923034667969, "margin_dpo/margin_std": 634.6422729492188, "step": 636 }, { "epoch": 0.9353891336270191, "fcm_dpo/beta": 0.0010253810323774815, "fcm_dpo/delta": -0.06474175304174423, "fcm_dpo/margin": 449.619384765625, "fcm_dpo/q_t": 0.39714446663856506, "grad_norm": 31.273347854614258, "learning_rate": 6.640486409826785e-09, "logits/chosen": -1.0574856996536255, "logits/rejected": -1.1046029329299927, "logps/chosen": -716.3837890625, "logps/ref_chosen": -49.652687072753906, "logps/ref_rejected": -98.40513610839844, "logps/rejected": -1214.755615234375, "loss": 1.0641, "margin_dpo/margin_mean": 449.619384765625, "margin_dpo/margin_std": 604.8092651367188, "step": 637 }, { "epoch": 0.9368575624082232, "fcm_dpo/beta": 0.0010413650888949633, "fcm_dpo/delta": 0.01695757359266281, "fcm_dpo/margin": 365.78363037109375, "fcm_dpo/q_t": 0.40809565782546997, "grad_norm": 42.46715545654297, "learning_rate": 6.349874889624962e-09, "logits/chosen": -0.9286128878593445, "logits/rejected": -0.9132605791091919, "logps/chosen": -708.2297973632812, "logps/ref_chosen": -58.156639099121094, "logps/ref_rejected": -79.3014907836914, "logps/rejected": -1095.158203125, "loss": 1.1521, "margin_dpo/margin_mean": 365.7836608886719, "margin_dpo/margin_std": 640.359130859375, "step": 638 }, { "epoch": 0.9383259911894273, "fcm_dpo/beta": 0.0010672295466065407, "fcm_dpo/delta": 0.23513709008693695, "fcm_dpo/margin": 158.20433044433594, "fcm_dpo/q_t": 0.46294790506362915, "grad_norm": 126.83493041992188, "learning_rate": 6.065683776815933e-09, "logits/chosen": -0.9369577169418335, "logits/rejected": -0.8788477182388306, "logps/chosen": -971.4729614257812, "logps/ref_chosen": -72.32319641113281, "logps/ref_rejected": -74.2749252319336, "logps/rejected": -1131.6290283203125, "loss": 1.3492, "margin_dpo/margin_mean": 158.20433044433594, "margin_dpo/margin_std": 680.5343017578125, "step": 639 }, { "epoch": 0.9397944199706314, "fcm_dpo/beta": 0.0010533035965636373, "fcm_dpo/delta": -0.20259231328964233, "fcm_dpo/margin": 561.17333984375, "fcm_dpo/q_t": 0.3751782476902008, "grad_norm": 42.19377136230469, "learning_rate": 5.7879205600998296e-09, "logits/chosen": -0.9074388742446899, "logits/rejected": -0.9390972852706909, "logps/chosen": -740.1415405273438, "logps/ref_chosen": -56.13436508178711, "logps/ref_rejected": -108.60014343261719, "logps/rejected": -1353.78076171875, "loss": 1.0146, "margin_dpo/margin_mean": 561.17333984375, "margin_dpo/margin_std": 757.92333984375, "step": 640 }, { "epoch": 0.9412628487518355, "fcm_dpo/beta": 0.0010397237492725253, "fcm_dpo/delta": 0.030443288385868073, "fcm_dpo/margin": 356.50567626953125, "fcm_dpo/q_t": 0.4193439483642578, "grad_norm": 39.22941207885742, "learning_rate": 5.516592558795746e-09, "logits/chosen": -0.9675936102867126, "logits/rejected": -0.9748636484146118, "logps/chosen": -830.5360107421875, "logps/ref_chosen": -64.99689483642578, "logps/ref_rejected": -86.99232482910156, "logps/rejected": -1209.037109375, "loss": 1.1667, "margin_dpo/margin_mean": 356.50567626953125, "margin_dpo/margin_std": 696.1326293945312, "step": 641 }, { "epoch": 0.9427312775330396, "fcm_dpo/beta": 0.0010383711196482182, "fcm_dpo/delta": -0.061861053109169006, "fcm_dpo/margin": 441.9483337402344, "fcm_dpo/q_t": 0.40887451171875, "grad_norm": 49.41666793823242, "learning_rate": 5.251706922648868e-09, "logits/chosen": -0.9242057800292969, "logits/rejected": -0.9570740461349487, "logps/chosen": -824.4931640625, "logps/ref_chosen": -65.68924713134766, "logps/ref_rejected": -110.24205017089844, "logps/rejected": -1310.9942626953125, "loss": 1.1366, "margin_dpo/margin_mean": 441.9483337402344, "margin_dpo/margin_std": 868.3840942382812, "step": 642 }, { "epoch": 0.9441997063142438, "fcm_dpo/beta": 0.0010298211127519608, "fcm_dpo/delta": 0.047456324100494385, "fcm_dpo/margin": 343.14263916015625, "fcm_dpo/q_t": 0.41927698254585266, "grad_norm": 44.1935920715332, "learning_rate": 4.993270631642038e-09, "logits/chosen": -1.0508840084075928, "logits/rejected": -1.0492839813232422, "logps/chosen": -730.5237426757812, "logps/ref_chosen": -51.94999694824219, "logps/ref_rejected": -87.46833801269531, "logps/rejected": -1109.1846923828125, "loss": 1.1361, "margin_dpo/margin_mean": 343.14263916015625, "margin_dpo/margin_std": 513.9169311523438, "step": 643 }, { "epoch": 0.9456681350954479, "fcm_dpo/beta": 0.0010451602283865213, "fcm_dpo/delta": 0.03396453708410263, "fcm_dpo/margin": 351.423583984375, "fcm_dpo/q_t": 0.42045366764068604, "grad_norm": 67.2789306640625, "learning_rate": 4.741290495811873e-09, "logits/chosen": -0.9379393458366394, "logits/rejected": -0.9444572329521179, "logps/chosen": -730.1318359375, "logps/ref_chosen": -59.017662048339844, "logps/ref_rejected": -87.13668823242188, "logps/rejected": -1109.67431640625, "loss": 1.1793, "margin_dpo/margin_mean": 351.4235534667969, "margin_dpo/margin_std": 714.7095947265625, "step": 644 }, { "epoch": 0.947136563876652, "fcm_dpo/beta": 0.0010768567444756627, "fcm_dpo/delta": 0.21616601943969727, "fcm_dpo/margin": 175.01731872558594, "fcm_dpo/q_t": 0.46020662784576416, "grad_norm": 97.86798858642578, "learning_rate": 4.495773155069299e-09, "logits/chosen": -0.9184206128120422, "logits/rejected": -0.9053988456726074, "logps/chosen": -743.8544921875, "logps/ref_chosen": -55.87602233886719, "logps/ref_rejected": -97.78080749511719, "logps/rejected": -960.776611328125, "loss": 1.3232, "margin_dpo/margin_mean": 175.017333984375, "margin_dpo/margin_std": 630.8798828125, "step": 645 }, { "epoch": 0.9486049926578561, "fcm_dpo/beta": 0.0011009529698640108, "fcm_dpo/delta": 0.07241631299257278, "fcm_dpo/margin": 299.66253662109375, "fcm_dpo/q_t": 0.4266805648803711, "grad_norm": 62.45351791381836, "learning_rate": 4.256725079024553e-09, "logits/chosen": -0.9712661504745483, "logits/rejected": -0.9501047730445862, "logps/chosen": -719.6700439453125, "logps/ref_chosen": -61.275787353515625, "logps/ref_rejected": -77.50580596923828, "logps/rejected": -1035.5626220703125, "loss": 1.1724, "margin_dpo/margin_mean": 299.66253662109375, "margin_dpo/margin_std": 550.3168334960938, "step": 646 }, { "epoch": 0.9500734214390602, "fcm_dpo/beta": 0.0011067369487136602, "fcm_dpo/delta": -0.017161661759018898, "fcm_dpo/margin": 376.28173828125, "fcm_dpo/q_t": 0.4051622450351715, "grad_norm": 33.93653106689453, "learning_rate": 4.024152566816791e-09, "logits/chosen": -0.8960561752319336, "logits/rejected": -0.9237020015716553, "logps/chosen": -640.0201416015625, "logps/ref_chosen": -54.8524169921875, "logps/ref_rejected": -93.5194091796875, "logps/rejected": -1054.9688720703125, "loss": 1.0902, "margin_dpo/margin_mean": 376.28173828125, "margin_dpo/margin_std": 518.373779296875, "step": 647 }, { "epoch": 0.9515418502202643, "fcm_dpo/beta": 0.001069669146090746, "fcm_dpo/delta": -0.2267727255821228, "fcm_dpo/margin": 573.0113525390625, "fcm_dpo/q_t": 0.3707248568534851, "grad_norm": 32.7856559753418, "learning_rate": 3.798061746947995e-09, "logits/chosen": -0.9845846891403198, "logits/rejected": -1.0428485870361328, "logps/chosen": -663.8997802734375, "logps/ref_chosen": -54.17146682739258, "logps/ref_rejected": -98.7127914428711, "logps/rejected": -1281.452392578125, "loss": 0.996, "margin_dpo/margin_mean": 573.0113525390625, "margin_dpo/margin_std": 760.2608032226562, "step": 648 }, { "epoch": 0.9530102790014684, "fcm_dpo/beta": 0.0010775276459753513, "fcm_dpo/delta": 0.11305912584066391, "fcm_dpo/margin": 268.9691162109375, "fcm_dpo/q_t": 0.43929412961006165, "grad_norm": 36.04080581665039, "learning_rate": 3.5784585771215235e-09, "logits/chosen": -0.995445191860199, "logits/rejected": -0.9858365654945374, "logps/chosen": -715.2255859375, "logps/ref_chosen": -62.480350494384766, "logps/ref_rejected": -80.07717895507812, "logps/rejected": -1001.79150390625, "loss": 1.2242, "margin_dpo/margin_mean": 268.9690856933594, "margin_dpo/margin_std": 619.8480224609375, "step": 649 }, { "epoch": 0.9544787077826725, "fcm_dpo/beta": 0.0010704685701057315, "fcm_dpo/delta": -0.05206456780433655, "fcm_dpo/margin": 419.9454345703125, "fcm_dpo/q_t": 0.40182268619537354, "grad_norm": 39.28509521484375, "learning_rate": 3.3653488440851253e-09, "logits/chosen": -0.9378179311752319, "logits/rejected": -0.9557840824127197, "logps/chosen": -739.037109375, "logps/ref_chosen": -56.09281921386719, "logps/ref_rejected": -98.26483917236328, "logps/rejected": -1201.1546630859375, "loss": 1.1067, "margin_dpo/margin_mean": 419.9454345703125, "margin_dpo/margin_std": 684.6904296875, "step": 650 }, { "epoch": 0.9559471365638766, "fcm_dpo/beta": 0.001042212126776576, "fcm_dpo/delta": -0.16722121834754944, "fcm_dpo/margin": 535.0650634765625, "fcm_dpo/q_t": 0.3747968077659607, "grad_norm": 39.4487419128418, "learning_rate": 3.158738163478475e-09, "logits/chosen": -0.9745798110961914, "logits/rejected": -1.02805757522583, "logps/chosen": -506.2451171875, "logps/ref_chosen": -43.42544937133789, "logps/ref_rejected": -99.95791625976562, "logps/rejected": -1097.842529296875, "loss": 0.985, "margin_dpo/margin_mean": 535.0650634765625, "margin_dpo/margin_std": 584.2362060546875, "step": 651 }, { "epoch": 0.9574155653450808, "fcm_dpo/beta": 0.0010319831781089306, "fcm_dpo/delta": -0.022920312359929085, "fcm_dpo/margin": 408.8804626464844, "fcm_dpo/q_t": 0.4074384272098541, "grad_norm": 37.125572204589844, "learning_rate": 2.9586319796851555e-09, "logits/chosen": -0.9512811303138733, "logits/rejected": -0.9735857248306274, "logps/chosen": -662.72412109375, "logps/ref_chosen": -62.57680892944336, "logps/ref_rejected": -111.76779174804688, "logps/rejected": -1120.7955322265625, "loss": 1.1193, "margin_dpo/margin_mean": 408.88043212890625, "margin_dpo/margin_std": 690.8095092773438, "step": 652 }, { "epoch": 0.9588839941262849, "fcm_dpo/beta": 0.001033414271660149, "fcm_dpo/delta": 0.01614570990204811, "fcm_dpo/margin": 372.00970458984375, "fcm_dpo/q_t": 0.41556644439697266, "grad_norm": 43.73779296875, "learning_rate": 2.7650355656892166e-09, "logits/chosen": -1.027014970779419, "logits/rejected": -1.0478019714355469, "logps/chosen": -775.9241943359375, "logps/ref_chosen": -61.11295700073242, "logps/ref_rejected": -103.24960327148438, "logps/rejected": -1190.070556640625, "loss": 1.1355, "margin_dpo/margin_mean": 372.00970458984375, "margin_dpo/margin_std": 633.2871704101562, "step": 653 }, { "epoch": 0.960352422907489, "fcm_dpo/beta": 0.0010394034907221794, "fcm_dpo/delta": 0.044013820588588715, "fcm_dpo/margin": 344.02301025390625, "fcm_dpo/q_t": 0.42064782977104187, "grad_norm": 36.298988342285156, "learning_rate": 2.577954022936174e-09, "logits/chosen": -1.0113834142684937, "logits/rejected": -1.0238120555877686, "logps/chosen": -716.1892700195312, "logps/ref_chosen": -61.7281379699707, "logps/ref_rejected": -98.7738037109375, "logps/rejected": -1097.2579345703125, "loss": 1.1403, "margin_dpo/margin_mean": 344.02301025390625, "margin_dpo/margin_std": 565.0518798828125, "step": 654 }, { "epoch": 0.9618208516886931, "fcm_dpo/beta": 0.001045609824359417, "fcm_dpo/delta": 0.020625807344913483, "fcm_dpo/margin": 363.5751647949219, "fcm_dpo/q_t": 0.41563743352890015, "grad_norm": 32.227134704589844, "learning_rate": 2.397392281198729e-09, "logits/chosen": -0.9684833288192749, "logits/rejected": -1.0073820352554321, "logps/chosen": -658.065673828125, "logps/ref_chosen": -49.576812744140625, "logps/ref_rejected": -98.29183197021484, "logps/rejected": -1070.3558349609375, "loss": 1.1311, "margin_dpo/margin_mean": 363.57513427734375, "margin_dpo/margin_std": 596.8692626953125, "step": 655 }, { "epoch": 0.9632892804698973, "fcm_dpo/beta": 0.0010109146824106574, "fcm_dpo/delta": -0.2578379213809967, "fcm_dpo/margin": 634.94873046875, "fcm_dpo/q_t": 0.3600732684135437, "grad_norm": 76.16907501220703, "learning_rate": 2.223355098446622e-09, "logits/chosen": -0.8919037580490112, "logits/rejected": -0.9556556344032288, "logps/chosen": -714.67626953125, "logps/ref_chosen": -52.54943084716797, "logps/ref_rejected": -113.67464447021484, "logps/rejected": -1410.750244140625, "loss": 0.9459, "margin_dpo/margin_mean": 634.9487915039062, "margin_dpo/margin_std": 687.5736083984375, "step": 656 }, { "epoch": 0.9647577092511013, "fcm_dpo/beta": 0.0009771925397217274, "fcm_dpo/delta": -0.10524962842464447, "fcm_dpo/margin": 511.46331787109375, "fcm_dpo/q_t": 0.3879910707473755, "grad_norm": 39.690528869628906, "learning_rate": 2.055847060721566e-09, "logits/chosen": -1.0206704139709473, "logits/rejected": -1.0610435009002686, "logps/chosen": -666.410888671875, "logps/ref_chosen": -46.700538635253906, "logps/ref_rejected": -97.91487121582031, "logps/rejected": -1229.0885009765625, "loss": 1.0394, "margin_dpo/margin_mean": 511.46331787109375, "margin_dpo/margin_std": 661.5406494140625, "step": 657 }, { "epoch": 0.9662261380323054, "fcm_dpo/beta": 0.0009689409052953124, "fcm_dpo/delta": 0.006780218333005905, "fcm_dpo/margin": 405.5053405761719, "fcm_dpo/q_t": 0.409820556640625, "grad_norm": 38.81819534301758, "learning_rate": 1.8948725820160662e-09, "logits/chosen": -0.9818142652511597, "logits/rejected": -1.0077033042907715, "logps/chosen": -744.4568481445312, "logps/ref_chosen": -60.95820999145508, "logps/ref_rejected": -95.93949127197266, "logps/rejected": -1184.943359375, "loss": 1.1084, "margin_dpo/margin_mean": 405.5053405761719, "margin_dpo/margin_std": 579.2117919921875, "step": 658 }, { "epoch": 0.9676945668135095, "fcm_dpo/beta": 0.0009741444955579937, "fcm_dpo/delta": 0.01644733175635338, "fcm_dpo/margin": 394.2469482421875, "fcm_dpo/q_t": 0.41280660033226013, "grad_norm": 38.62456512451172, "learning_rate": 1.7404359041573723e-09, "logits/chosen": -0.9040530920028687, "logits/rejected": -0.8700916767120361, "logps/chosen": -667.4215698242188, "logps/ref_chosen": -76.74298095703125, "logps/ref_rejected": -87.4709701538086, "logps/rejected": -1072.396484375, "loss": 1.1053, "margin_dpo/margin_mean": 394.2469482421875, "margin_dpo/margin_std": 549.975830078125, "step": 659 }, { "epoch": 0.9691629955947136, "fcm_dpo/beta": 0.0009657872142270207, "fcm_dpo/delta": -0.09469971060752869, "fcm_dpo/margin": 507.5157470703125, "fcm_dpo/q_t": 0.3888697624206543, "grad_norm": 44.33787536621094, "learning_rate": 1.592541096695571e-09, "logits/chosen": -0.9871212840080261, "logits/rejected": -1.0068674087524414, "logps/chosen": -680.8031005859375, "logps/ref_chosen": -59.04788589477539, "logps/ref_rejected": -75.96005249023438, "logps/rejected": -1205.23095703125, "loss": 1.0367, "margin_dpo/margin_mean": 507.5157470703125, "margin_dpo/margin_std": 621.5484619140625, "step": 660 }, { "epoch": 0.9706314243759178, "fcm_dpo/beta": 0.0009580702753737569, "fcm_dpo/delta": -0.03481549024581909, "fcm_dpo/margin": 452.1288757324219, "fcm_dpo/q_t": 0.40379828214645386, "grad_norm": 48.46449279785156, "learning_rate": 1.4511920567963908e-09, "logits/chosen": -0.9898974895477295, "logits/rejected": -1.0011284351348877, "logps/chosen": -606.3502197265625, "logps/ref_chosen": -50.673973083496094, "logps/ref_rejected": -86.00569152832031, "logps/rejected": -1093.810791015625, "loss": 1.0762, "margin_dpo/margin_mean": 452.12884521484375, "margin_dpo/margin_std": 627.0391235351562, "step": 661 }, { "epoch": 0.9720998531571219, "fcm_dpo/beta": 0.000960524077527225, "fcm_dpo/delta": 0.052273206412792206, "fcm_dpo/margin": 363.91009521484375, "fcm_dpo/q_t": 0.42221707105636597, "grad_norm": 28.411582946777344, "learning_rate": 1.3163925091384532e-09, "logits/chosen": -0.9227521419525146, "logits/rejected": -0.9219968318939209, "logps/chosen": -726.12939453125, "logps/ref_chosen": -69.26106262207031, "logps/ref_rejected": -89.05593872070312, "logps/rejected": -1109.834228515625, "loss": 1.1608, "margin_dpo/margin_mean": 363.9100341796875, "margin_dpo/margin_std": 666.9813232421875, "step": 662 }, { "epoch": 0.973568281938326, "fcm_dpo/beta": 0.0009597113821655512, "fcm_dpo/delta": -0.011007752269506454, "fcm_dpo/margin": 427.71435546875, "fcm_dpo/q_t": 0.41027143597602844, "grad_norm": 28.180540084838867, "learning_rate": 1.1881460058152382e-09, "logits/chosen": -1.013875961303711, "logits/rejected": -1.0345901250839233, "logps/chosen": -687.0326538085938, "logps/ref_chosen": -64.87890625, "logps/ref_rejected": -113.92536926269531, "logps/rejected": -1163.79345703125, "loss": 1.1218, "margin_dpo/margin_mean": 427.7143859863281, "margin_dpo/margin_std": 725.150634765625, "step": 663 }, { "epoch": 0.9750367107195301, "fcm_dpo/beta": 0.000947619671933353, "fcm_dpo/delta": -0.06448528170585632, "fcm_dpo/margin": 486.3853759765625, "fcm_dpo/q_t": 0.3969351351261139, "grad_norm": 32.270606994628906, "learning_rate": 1.066455926241383e-09, "logits/chosen": -0.9712049961090088, "logits/rejected": -1.0025453567504883, "logps/chosen": -725.9244384765625, "logps/ref_chosen": -60.88847351074219, "logps/ref_rejected": -105.521728515625, "logps/rejected": -1256.943115234375, "loss": 1.0631, "margin_dpo/margin_mean": 486.3853454589844, "margin_dpo/margin_std": 648.6405639648438, "step": 664 }, { "epoch": 0.9765051395007343, "fcm_dpo/beta": 0.0009507514769211411, "fcm_dpo/delta": 0.013285509310662746, "fcm_dpo/margin": 407.2781677246094, "fcm_dpo/q_t": 0.41020679473876953, "grad_norm": 47.90570831298828, "learning_rate": 9.513254770636137e-10, "logits/chosen": -1.0707690715789795, "logits/rejected": -1.0900096893310547, "logps/chosen": -645.716552734375, "logps/ref_chosen": -60.56413269042969, "logps/ref_rejected": -84.80882263183594, "logps/rejected": -1077.239501953125, "loss": 1.0876, "margin_dpo/margin_mean": 407.2781677246094, "margin_dpo/margin_std": 489.22955322265625, "step": 665 }, { "epoch": 0.9779735682819384, "fcm_dpo/beta": 0.0009521855972707272, "fcm_dpo/delta": 0.014441363513469696, "fcm_dpo/margin": 405.50140380859375, "fcm_dpo/q_t": 0.412578284740448, "grad_norm": 29.04341697692871, "learning_rate": 8.427576920763956e-10, "logits/chosen": -0.878127932548523, "logits/rejected": -0.8861696124076843, "logps/chosen": -688.6695556640625, "logps/ref_chosen": -64.41996002197266, "logps/ref_rejected": -95.8916244506836, "logps/rejected": -1125.642578125, "loss": 1.1069, "margin_dpo/margin_mean": 405.50140380859375, "margin_dpo/margin_std": 567.3914184570312, "step": 666 }, { "epoch": 0.9794419970631424, "fcm_dpo/beta": 0.0009428112534806132, "fcm_dpo/delta": -0.06218276545405388, "fcm_dpo/margin": 486.9377136230469, "fcm_dpo/q_t": 0.3959430158138275, "grad_norm": 45.73838424682617, "learning_rate": 7.407554321417764e-10, "logits/chosen": -0.9214434623718262, "logits/rejected": -0.9194406867027283, "logps/chosen": -773.194091796875, "logps/ref_chosen": -69.27702331542969, "logps/ref_rejected": -87.83549499511719, "logps/rejected": -1278.6903076171875, "loss": 1.0586, "margin_dpo/margin_mean": 486.937744140625, "margin_dpo/margin_std": 627.3309326171875, "step": 667 }, { "epoch": 0.9809104258443465, "fcm_dpo/beta": 0.0009598816395737231, "fcm_dpo/delta": 0.09079144895076752, "fcm_dpo/margin": 324.0137939453125, "fcm_dpo/q_t": 0.4328286647796631, "grad_norm": 54.188987731933594, "learning_rate": 6.453213851142225e-10, "logits/chosen": -0.9863263964653015, "logits/rejected": -0.9880591630935669, "logps/chosen": -829.204345703125, "logps/ref_chosen": -72.60400390625, "logps/ref_rejected": -103.73905944824219, "logps/rejected": -1184.353271484375, "loss": 1.2095, "margin_dpo/margin_mean": 324.0137634277344, "margin_dpo/margin_std": 717.3749389648438, "step": 668 }, { "epoch": 0.9823788546255506, "fcm_dpo/beta": 0.0009502802276983857, "fcm_dpo/delta": -0.07527793198823929, "fcm_dpo/margin": 496.47698974609375, "fcm_dpo/q_t": 0.3927857577800751, "grad_norm": 28.336467742919922, "learning_rate": 5.564580657695939e-10, "logits/chosen": -0.9510325193405151, "logits/rejected": -0.9615212678909302, "logps/chosen": -596.826416015625, "logps/ref_chosen": -46.116416931152344, "logps/ref_rejected": -77.92434692382812, "logps/rejected": -1125.111328125, "loss": 1.0523, "margin_dpo/margin_mean": 496.47698974609375, "margin_dpo/margin_std": 634.5511474609375, "step": 669 }, { "epoch": 0.9838472834067548, "fcm_dpo/beta": 0.0009398977854289114, "fcm_dpo/delta": -0.06452971696853638, "fcm_dpo/margin": 490.96636962890625, "fcm_dpo/q_t": 0.3954104483127594, "grad_norm": 30.532726287841797, "learning_rate": 4.741678157389739e-10, "logits/chosen": -0.8932855129241943, "logits/rejected": -0.9082292318344116, "logps/chosen": -611.8515625, "logps/ref_chosen": -62.34575271606445, "logps/ref_rejected": -96.9405517578125, "logps/rejected": -1137.412841796875, "loss": 1.0638, "margin_dpo/margin_mean": 490.96636962890625, "margin_dpo/margin_std": 644.054931640625, "step": 670 }, { "epoch": 0.9853157121879589, "fcm_dpo/beta": 0.0009400290437042713, "fcm_dpo/delta": 0.02662300132215023, "fcm_dpo/margin": 397.8563232421875, "fcm_dpo/q_t": 0.4140617549419403, "grad_norm": 37.733253479003906, "learning_rate": 3.9845280344705245e-10, "logits/chosen": -1.0189416408538818, "logits/rejected": -1.0476462841033936, "logps/chosen": -758.807373046875, "logps/ref_chosen": -48.00010681152344, "logps/ref_rejected": -83.81932067871094, "logps/rejected": -1192.48291015625, "loss": 1.1326, "margin_dpo/margin_mean": 397.8563232421875, "margin_dpo/margin_std": 642.0556640625, "step": 671 }, { "epoch": 0.986784140969163, "fcm_dpo/beta": 0.0009393775835633278, "fcm_dpo/delta": 0.019410815089941025, "fcm_dpo/margin": 405.93304443359375, "fcm_dpo/q_t": 0.4160517454147339, "grad_norm": 52.242919921875, "learning_rate": 3.293150240547549e-10, "logits/chosen": -1.0485416650772095, "logits/rejected": -1.0544018745422363, "logps/chosen": -830.88720703125, "logps/ref_chosen": -58.58328628540039, "logps/ref_rejected": -93.14015197753906, "logps/rejected": -1271.3770751953125, "loss": 1.145, "margin_dpo/margin_mean": 405.9330139160156, "margin_dpo/margin_std": 711.9027099609375, "step": 672 }, { "epoch": 0.9882525697503671, "fcm_dpo/beta": 0.0009472850942984223, "fcm_dpo/delta": 0.04647299647331238, "fcm_dpo/margin": 374.96588134765625, "fcm_dpo/q_t": 0.41997867822647095, "grad_norm": 32.77516555786133, "learning_rate": 2.6675629940689504e-10, "logits/chosen": -0.9867175817489624, "logits/rejected": -0.9888732433319092, "logps/chosen": -728.9349365234375, "logps/ref_chosen": -46.72320556640625, "logps/ref_rejected": -85.29623413085938, "logps/rejected": -1142.473876953125, "loss": 1.1382, "margin_dpo/margin_mean": 374.96588134765625, "margin_dpo/margin_std": 617.45556640625, "step": 673 }, { "epoch": 0.9897209985315712, "fcm_dpo/beta": 0.0009443633025512099, "fcm_dpo/delta": -0.0580584779381752, "fcm_dpo/margin": 482.2635498046875, "fcm_dpo/q_t": 0.39911842346191406, "grad_norm": 40.93299865722656, "learning_rate": 2.1077827798404725e-10, "logits/chosen": -0.9459275007247925, "logits/rejected": -0.9650304913520813, "logps/chosen": -617.8692016601562, "logps/ref_chosen": -45.445526123046875, "logps/ref_rejected": -70.04593658447266, "logps/rejected": -1124.733154296875, "loss": 1.0626, "margin_dpo/margin_mean": 482.26348876953125, "margin_dpo/margin_std": 641.1407470703125, "step": 674 }, { "epoch": 0.9911894273127754, "fcm_dpo/beta": 0.0009247527341358364, "fcm_dpo/delta": -0.05846470221877098, "fcm_dpo/margin": 491.4967956542969, "fcm_dpo/q_t": 0.39873215556144714, "grad_norm": 28.94685935974121, "learning_rate": 1.6138243485910863e-10, "logits/chosen": -0.9571169018745422, "logits/rejected": -0.9678725004196167, "logps/chosen": -673.1968994140625, "logps/ref_chosen": -44.17628479003906, "logps/ref_rejected": -74.09197998046875, "logps/rejected": -1194.609375, "loss": 1.0628, "margin_dpo/margin_mean": 491.4967956542969, "margin_dpo/margin_std": 623.8955078125, "step": 675 }, { "epoch": 0.9926578560939795, "fcm_dpo/beta": 0.0009249091381207108, "fcm_dpo/delta": -0.03686067834496498, "fcm_dpo/margin": 470.5931091308594, "fcm_dpo/q_t": 0.40108269453048706, "grad_norm": 26.681962966918945, "learning_rate": 1.1857007165852472e-10, "logits/chosen": -0.9203785061836243, "logits/rejected": -0.9312641620635986, "logps/chosen": -757.725830078125, "logps/ref_chosen": -71.39852905273438, "logps/ref_rejected": -88.3587646484375, "logps/rejected": -1245.279052734375, "loss": 1.0637, "margin_dpo/margin_mean": 470.59307861328125, "margin_dpo/margin_std": 574.8800048828125, "step": 676 }, { "epoch": 0.9941262848751835, "fcm_dpo/beta": 0.0009184239897876978, "fcm_dpo/delta": -0.015428077429533005, "fcm_dpo/margin": 451.59100341796875, "fcm_dpo/q_t": 0.4104122519493103, "grad_norm": 28.66457176208496, "learning_rate": 8.23423165278725e-11, "logits/chosen": -0.9908117055892944, "logits/rejected": -0.9794431328773499, "logps/chosen": -754.1339111328125, "logps/ref_chosen": -56.527435302734375, "logps/ref_rejected": -78.22654724121094, "logps/rejected": -1227.424072265625, "loss": 1.1016, "margin_dpo/margin_mean": 451.59100341796875, "margin_dpo/margin_std": 696.70068359375, "step": 677 }, { "epoch": 0.9955947136563876, "fcm_dpo/beta": 0.0009117278386838734, "fcm_dpo/delta": -0.07986919581890106, "fcm_dpo/margin": 522.123779296875, "fcm_dpo/q_t": 0.39320850372314453, "grad_norm": 41.66019821166992, "learning_rate": 5.270012410216185e-11, "logits/chosen": -0.915215015411377, "logits/rejected": -0.947140097618103, "logps/chosen": -621.6987915039062, "logps/ref_chosen": -46.13447570800781, "logps/ref_rejected": -80.60462951660156, "logps/rejected": -1178.292724609375, "loss": 1.0567, "margin_dpo/margin_mean": 522.123779296875, "margin_dpo/margin_std": 695.4920654296875, "step": 678 }, { "epoch": 0.9970631424375918, "fcm_dpo/beta": 0.0009118493180721998, "fcm_dpo/delta": 0.064061738550663, "fcm_dpo/margin": 370.836669921875, "fcm_dpo/q_t": 0.42350929975509644, "grad_norm": 31.21086883544922, "learning_rate": 2.9644275480772416e-11, "logits/chosen": -0.9314343929290771, "logits/rejected": -0.9189698696136475, "logps/chosen": -707.2667236328125, "logps/ref_chosen": -50.294921875, "logps/ref_rejected": -76.59813690185547, "logps/rejected": -1104.4066162109375, "loss": 1.1437, "margin_dpo/margin_mean": 370.836669921875, "margin_dpo/margin_std": 591.001220703125, "step": 679 }, { "epoch": 0.9985315712187959, "fcm_dpo/beta": 0.0009041996672749519, "fcm_dpo/delta": -0.052669595927000046, "fcm_dpo/margin": 497.2313537597656, "fcm_dpo/q_t": 0.39771518111228943, "grad_norm": 35.252685546875, "learning_rate": 1.31753782067201e-11, "logits/chosen": -0.9416483640670776, "logits/rejected": -0.9650506973266602, "logps/chosen": -763.13818359375, "logps/ref_chosen": -76.91569519042969, "logps/ref_rejected": -112.384765625, "logps/rejected": -1295.838623046875, "loss": 1.0844, "margin_dpo/margin_mean": 497.2313537597656, "margin_dpo/margin_std": 722.805419921875, "step": 680 }, { "epoch": 1.0, "fcm_dpo/beta": 0.000918180332519114, "fcm_dpo/delta": 0.040153294801712036, "fcm_dpo/margin": 392.08013916015625, "fcm_dpo/q_t": 0.4184055030345917, "grad_norm": 39.58386993408203, "learning_rate": 3.2938662507808745e-12, "logits/chosen": -1.003857135772705, "logits/rejected": -1.0180325508117676, "logps/chosen": -744.2374267578125, "logps/ref_chosen": -60.957279205322266, "logps/ref_rejected": -88.55797576904297, "logps/rejected": -1163.918212890625, "loss": 1.1386, "margin_dpo/margin_mean": 392.08013916015625, "margin_dpo/margin_std": 609.6609497070312, "step": 681 }, { "epoch": 1.0, "step": 681, "total_flos": 0.0, "train_loss": 1.0910053594522013, "train_runtime": 1869.3984, "train_samples_per_second": 23.322, "train_steps_per_second": 0.364 } ], "logging_steps": 1, "max_steps": 681, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }