{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020942408376963353, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.02793481945991516, "fcm_dpo/q_t": 0.500069797039032, "grad_norm": 28.592390060424805, "learning_rate": 0.0, "logits/chosen": -0.5898098945617676, "logits/rejected": -0.604260265827179, "logps/chosen": -275.28570556640625, "logps/ref_chosen": -275.2312927246094, "logps/ref_rejected": -222.9380340576172, "logps/rejected": -222.96453857421875, "loss": 5.5463, "margin_dpo/margin_mean": -0.02793477475643158, "margin_dpo/margin_std": 0.5724214911460876, "step": 1 }, { "epoch": 0.004188481675392671, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.014312177896499634, "fcm_dpo/q_t": 0.4999642074108124, "grad_norm": 27.881120681762695, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -0.6574729681015015, "logits/rejected": -0.6464410424232483, "logps/chosen": -264.7165222167969, "logps/ref_chosen": -264.7611083984375, "logps/ref_rejected": -242.5597686767578, "logps/rejected": -242.52951049804688, "loss": 5.5446, "margin_dpo/margin_mean": 0.014312252402305603, "margin_dpo/margin_std": 0.6423971652984619, "step": 2 }, { "epoch": 0.0062827225130890054, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.029146358370780945, "fcm_dpo/q_t": 0.4999271333217621, "grad_norm": 25.850038528442383, "learning_rate": 2.083333333333333e-08, "logits/chosen": -0.6840031743049622, "logits/rejected": -0.7351865172386169, "logps/chosen": -274.1335754394531, "logps/ref_chosen": -274.1018981933594, "logps/ref_rejected": -286.5882568359375, "logps/rejected": -286.64910888671875, "loss": 5.5441, "margin_dpo/margin_mean": 0.02914564311504364, "margin_dpo/margin_std": 0.7203992605209351, "step": 3 }, { "epoch": 0.008376963350785341, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.10328760743141174, "fcm_dpo/q_t": 0.499741792678833, "grad_norm": 31.70708656311035, "learning_rate": 3.125e-08, "logits/chosen": -0.6172086000442505, "logits/rejected": -0.6114800572395325, "logps/chosen": -329.83612060546875, "logps/ref_chosen": -329.8382568359375, "logps/ref_rejected": -303.2850646972656, "logps/rejected": -303.3861999511719, "loss": 5.5411, "margin_dpo/margin_mean": 0.10328748822212219, "margin_dpo/margin_std": 0.8034393787384033, "step": 4 }, { "epoch": 0.010471204188481676, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.01281556487083435, "fcm_dpo/q_t": 0.4999679923057556, "grad_norm": 29.54966163635254, "learning_rate": 4.166666666666666e-08, "logits/chosen": -0.5715648531913757, "logits/rejected": -0.587770938873291, "logps/chosen": -301.7329406738281, "logps/ref_chosen": -301.7389221191406, "logps/ref_rejected": -274.7654724121094, "logps/rejected": -274.77227783203125, "loss": 5.5447, "margin_dpo/margin_mean": 0.012814819812774658, "margin_dpo/margin_std": 0.8004137277603149, "step": 5 }, { "epoch": 0.012565445026178011, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05144025385379791, "fcm_dpo/q_t": 0.4998714029788971, "grad_norm": 28.23720932006836, "learning_rate": 5.208333333333333e-08, "logits/chosen": -0.6801129579544067, "logits/rejected": -0.6429607272148132, "logps/chosen": -285.62481689453125, "logps/ref_chosen": -285.6946716308594, "logps/ref_rejected": -245.8200225830078, "logps/rejected": -245.80160522460938, "loss": 5.5432, "margin_dpo/margin_mean": 0.051440998911857605, "margin_dpo/margin_std": 0.691977858543396, "step": 6 }, { "epoch": 0.014659685863874346, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.057578980922698975, "fcm_dpo/q_t": 0.5001440048217773, "grad_norm": 28.544734954833984, "learning_rate": 6.25e-08, "logits/chosen": -0.5832664966583252, "logits/rejected": -0.6165621280670166, "logps/chosen": -264.64544677734375, "logps/ref_chosen": -264.65545654296875, "logps/ref_rejected": -253.10305786132812, "logps/rejected": -253.03549194335938, "loss": 5.5475, "margin_dpo/margin_mean": -0.05757877230644226, "margin_dpo/margin_std": 0.6711597442626953, "step": 7 }, { "epoch": 0.016753926701570682, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09753617644309998, "fcm_dpo/q_t": 0.4997561573982239, "grad_norm": 30.755247116088867, "learning_rate": 7.291666666666667e-08, "logits/chosen": -0.6714497804641724, "logits/rejected": -0.6773282885551453, "logps/chosen": -354.1408996582031, "logps/ref_chosen": -354.1887512207031, "logps/ref_rejected": -282.9112243652344, "logps/rejected": -282.96087646484375, "loss": 5.5413, "margin_dpo/margin_mean": 0.097537100315094, "margin_dpo/margin_std": 0.7466810345649719, "step": 8 }, { "epoch": 0.018848167539267015, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.026623502373695374, "fcm_dpo/q_t": 0.5000665783882141, "grad_norm": 27.906946182250977, "learning_rate": 8.333333333333333e-08, "logits/chosen": -0.6355319023132324, "logits/rejected": -0.6535608768463135, "logps/chosen": -285.5481872558594, "logps/ref_chosen": -285.5502014160156, "logps/ref_rejected": -267.99664306640625, "logps/rejected": -267.9679870605469, "loss": 5.5463, "margin_dpo/margin_mean": -0.0266236811876297, "margin_dpo/margin_std": 0.6391922831535339, "step": 9 }, { "epoch": 0.020942408376963352, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.023563116788864136, "fcm_dpo/q_t": 0.4999410808086395, "grad_norm": 26.76718521118164, "learning_rate": 9.375e-08, "logits/chosen": -0.6935949325561523, "logits/rejected": -0.6888067722320557, "logps/chosen": -251.90386962890625, "logps/ref_chosen": -251.91238403320312, "logps/ref_rejected": -226.45260620117188, "logps/rejected": -226.46763610839844, "loss": 5.5443, "margin_dpo/margin_mean": 0.0235632061958313, "margin_dpo/margin_std": 0.7389193177223206, "step": 10 }, { "epoch": 0.023036649214659685, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03727307915687561, "fcm_dpo/q_t": 0.5000931620597839, "grad_norm": 28.944982528686523, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -0.5943973660469055, "logits/rejected": -0.6493593454360962, "logps/chosen": -301.0625, "logps/ref_chosen": -301.08343505859375, "logps/ref_rejected": -259.546630859375, "logps/rejected": -259.4883728027344, "loss": 5.5467, "margin_dpo/margin_mean": -0.037272870540618896, "margin_dpo/margin_std": 0.7176087498664856, "step": 11 }, { "epoch": 0.025130890052356022, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.10183002054691315, "fcm_dpo/q_t": 0.4997454583644867, "grad_norm": 30.07319450378418, "learning_rate": 1.1458333333333332e-07, "logits/chosen": -0.5795747637748718, "logits/rejected": -0.5394208431243896, "logps/chosen": -287.56854248046875, "logps/ref_chosen": -287.548095703125, "logps/ref_rejected": -277.37945556640625, "logps/rejected": -277.5017395019531, "loss": 5.5412, "margin_dpo/margin_mean": 0.10182976722717285, "margin_dpo/margin_std": 0.6723535060882568, "step": 12 }, { "epoch": 0.027225130890052355, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.15790200233459473, "fcm_dpo/q_t": 0.4996052384376526, "grad_norm": 27.2218017578125, "learning_rate": 1.25e-07, "logits/chosen": -0.6672236323356628, "logits/rejected": -0.6754846572875977, "logps/chosen": -270.6041564941406, "logps/ref_chosen": -270.6664123535156, "logps/ref_rejected": -274.6546936035156, "logps/rejected": -274.7503662109375, "loss": 5.5389, "margin_dpo/margin_mean": 0.1579025536775589, "margin_dpo/margin_std": 0.6908207535743713, "step": 13 }, { "epoch": 0.02931937172774869, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.01676630973815918, "fcm_dpo/q_t": 0.4999580979347229, "grad_norm": 28.227462768554688, "learning_rate": 1.3541666666666666e-07, "logits/chosen": -0.623089611530304, "logits/rejected": -0.6518293619155884, "logps/chosen": -281.58538818359375, "logps/ref_chosen": -281.59320068359375, "logps/ref_rejected": -263.52215576171875, "logps/rejected": -263.53106689453125, "loss": 5.5446, "margin_dpo/margin_mean": 0.016765296459197998, "margin_dpo/margin_std": 0.6453270316123962, "step": 14 }, { "epoch": 0.031413612565445025, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08115784823894501, "fcm_dpo/q_t": 0.49979710578918457, "grad_norm": 30.37261390686035, "learning_rate": 1.4583333333333335e-07, "logits/chosen": -0.6461591720581055, "logits/rejected": -0.6596108675003052, "logps/chosen": -298.36016845703125, "logps/ref_chosen": -298.45343017578125, "logps/ref_rejected": -227.17832946777344, "logps/rejected": -227.16622924804688, "loss": 5.542, "margin_dpo/margin_mean": 0.08115695416927338, "margin_dpo/margin_std": 0.6280770301818848, "step": 15 }, { "epoch": 0.033507853403141365, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1199236512184143, "fcm_dpo/q_t": 0.4997002184391022, "grad_norm": 30.1366024017334, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -0.6011725068092346, "logits/rejected": -0.5981835722923279, "logps/chosen": -293.8862609863281, "logps/ref_chosen": -293.96661376953125, "logps/ref_rejected": -250.78443908691406, "logps/rejected": -250.82400512695312, "loss": 5.5404, "margin_dpo/margin_mean": 0.11992333829402924, "margin_dpo/margin_std": 0.720985472202301, "step": 16 }, { "epoch": 0.0356020942408377, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.06940680742263794, "fcm_dpo/q_t": 0.49982649087905884, "grad_norm": 27.645227432250977, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -0.558211624622345, "logits/rejected": -0.5835133194923401, "logps/chosen": -262.30767822265625, "logps/ref_chosen": -262.39398193359375, "logps/ref_rejected": -248.500244140625, "logps/rejected": -248.48330688476562, "loss": 5.5424, "margin_dpo/margin_mean": 0.06940683722496033, "margin_dpo/margin_std": 0.6322791576385498, "step": 17 }, { "epoch": 0.03769633507853403, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.00013044476509094238, "fcm_dpo/q_t": 0.5000002980232239, "grad_norm": 29.71380043029785, "learning_rate": 1.7708333333333334e-07, "logits/chosen": -0.612942636013031, "logits/rejected": -0.619144856929779, "logps/chosen": -293.71783447265625, "logps/ref_chosen": -293.709228515625, "logps/ref_rejected": -274.5875244140625, "logps/rejected": -274.5960388183594, "loss": 5.5453, "margin_dpo/margin_mean": -0.00012956559658050537, "margin_dpo/margin_std": 0.7896002531051636, "step": 18 }, { "epoch": 0.039790575916230364, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.02526429295539856, "fcm_dpo/q_t": 0.4999368190765381, "grad_norm": 28.150474548339844, "learning_rate": 1.875e-07, "logits/chosen": -0.6289379000663757, "logits/rejected": -0.6254291534423828, "logps/chosen": -280.205322265625, "logps/ref_chosen": -280.26568603515625, "logps/ref_rejected": -259.9742736816406, "logps/rejected": -259.93914794921875, "loss": 5.5442, "margin_dpo/margin_mean": 0.025263652205467224, "margin_dpo/margin_std": 0.7644654512405396, "step": 19 }, { "epoch": 0.041884816753926704, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.10411535203456879, "fcm_dpo/q_t": 0.49973970651626587, "grad_norm": 29.686153411865234, "learning_rate": 1.9791666666666664e-07, "logits/chosen": -0.622660219669342, "logits/rejected": -0.6548238396644592, "logps/chosen": -303.71466064453125, "logps/ref_chosen": -303.8954162597656, "logps/ref_rejected": -260.214599609375, "logps/rejected": -260.13800048828125, "loss": 5.5411, "margin_dpo/margin_mean": 0.10411512851715088, "margin_dpo/margin_std": 0.766339898109436, "step": 20 }, { "epoch": 0.04397905759162304, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04293261468410492, "fcm_dpo/q_t": 0.4998926520347595, "grad_norm": 35.22480392456055, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -0.6408384442329407, "logits/rejected": -0.6681733131408691, "logps/chosen": -301.4923095703125, "logps/ref_chosen": -301.5334777832031, "logps/ref_rejected": -280.28900146484375, "logps/rejected": -280.2907409667969, "loss": 5.5435, "margin_dpo/margin_mean": 0.04293195903301239, "margin_dpo/margin_std": 0.8277014493942261, "step": 21 }, { "epoch": 0.04607329842931937, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.030557870864868164, "fcm_dpo/q_t": 0.500076413154602, "grad_norm": 25.291522979736328, "learning_rate": 2.1875e-07, "logits/chosen": -0.6586352586746216, "logits/rejected": -0.6604381799697876, "logps/chosen": -259.9430236816406, "logps/ref_chosen": -259.9951477050781, "logps/ref_rejected": -243.0721435546875, "logps/rejected": -242.98948669433594, "loss": 5.5465, "margin_dpo/margin_mean": -0.030558019876480103, "margin_dpo/margin_std": 0.7162632346153259, "step": 22 }, { "epoch": 0.048167539267015703, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09941744804382324, "fcm_dpo/q_t": 0.4997514486312866, "grad_norm": 27.887392044067383, "learning_rate": 2.2916666666666663e-07, "logits/chosen": -0.6176055669784546, "logits/rejected": -0.6513772010803223, "logps/chosen": -282.0886535644531, "logps/ref_chosen": -282.1807556152344, "logps/ref_rejected": -265.0758056640625, "logps/rejected": -265.0830993652344, "loss": 5.5413, "margin_dpo/margin_mean": 0.09941692650318146, "margin_dpo/margin_std": 0.7664570808410645, "step": 23 }, { "epoch": 0.050261780104712044, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.24128368496894836, "fcm_dpo/q_t": 0.49939680099487305, "grad_norm": 29.68800163269043, "learning_rate": 2.3958333333333335e-07, "logits/chosen": -0.6591615676879883, "logits/rejected": -0.5714296102523804, "logps/chosen": -300.9408874511719, "logps/ref_chosen": -301.17962646484375, "logps/ref_rejected": -302.12786865234375, "logps/rejected": -302.1304016113281, "loss": 5.5356, "margin_dpo/margin_mean": 0.24128423631191254, "margin_dpo/margin_std": 0.7133185267448425, "step": 24 }, { "epoch": 0.05235602094240838, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.16697196662425995, "fcm_dpo/q_t": 0.49958258867263794, "grad_norm": 26.460615158081055, "learning_rate": 2.5e-07, "logits/chosen": -0.6041996479034424, "logits/rejected": -0.6127534508705139, "logps/chosen": -246.56582641601562, "logps/ref_chosen": -246.74649047851562, "logps/ref_rejected": -235.55638122558594, "logps/rejected": -235.54269409179688, "loss": 5.5386, "margin_dpo/margin_mean": 0.16697131097316742, "margin_dpo/margin_std": 0.7036500573158264, "step": 25 }, { "epoch": 0.05445026178010471, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08371715247631073, "fcm_dpo/q_t": 0.499790757894516, "grad_norm": 28.732345581054688, "learning_rate": 2.604166666666667e-07, "logits/chosen": -0.6599952578544617, "logits/rejected": -0.6750520467758179, "logps/chosen": -281.9931335449219, "logps/ref_chosen": -282.1955871582031, "logps/ref_rejected": -235.3135528564453, "logps/rejected": -235.19482421875, "loss": 5.5419, "margin_dpo/margin_mean": 0.08371736109256744, "margin_dpo/margin_std": 0.8501687049865723, "step": 26 }, { "epoch": 0.05654450261780105, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1340600550174713, "fcm_dpo/q_t": 0.4996648132801056, "grad_norm": 27.74333953857422, "learning_rate": 2.708333333333333e-07, "logits/chosen": -0.6518189907073975, "logits/rejected": -0.6709730625152588, "logps/chosen": -323.5457763671875, "logps/ref_chosen": -323.8563537597656, "logps/ref_rejected": -245.968017578125, "logps/rejected": -245.7915496826172, "loss": 5.5399, "margin_dpo/margin_mean": 0.13406014442443848, "margin_dpo/margin_std": 0.925900936126709, "step": 27 }, { "epoch": 0.05863874345549738, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1435232013463974, "fcm_dpo/q_t": 0.49964118003845215, "grad_norm": 26.3175106048584, "learning_rate": 2.8125e-07, "logits/chosen": -0.622589111328125, "logits/rejected": -0.6318536400794983, "logps/chosen": -247.97296142578125, "logps/ref_chosen": -248.24673461914062, "logps/ref_rejected": -240.0382080078125, "logps/rejected": -239.90797424316406, "loss": 5.5395, "margin_dpo/margin_mean": 0.1435234248638153, "margin_dpo/margin_std": 0.7288922071456909, "step": 28 }, { "epoch": 0.060732984293193716, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.14979667961597443, "fcm_dpo/q_t": 0.4996255040168762, "grad_norm": 29.79783821105957, "learning_rate": 2.916666666666667e-07, "logits/chosen": -0.599511444568634, "logits/rejected": -0.6228891015052795, "logps/chosen": -317.9765319824219, "logps/ref_chosen": -318.2564392089844, "logps/ref_rejected": -286.75848388671875, "logps/rejected": -286.62841796875, "loss": 5.5393, "margin_dpo/margin_mean": 0.1497972011566162, "margin_dpo/margin_std": 0.8029959201812744, "step": 29 }, { "epoch": 0.06282722513089005, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2501027584075928, "fcm_dpo/q_t": 0.49937474727630615, "grad_norm": 28.9178524017334, "learning_rate": 3.020833333333333e-07, "logits/chosen": -0.5949351191520691, "logits/rejected": -0.6119610071182251, "logps/chosen": -252.69354248046875, "logps/ref_chosen": -253.0491485595703, "logps/ref_rejected": -261.30029296875, "logps/rejected": -261.19482421875, "loss": 5.5353, "margin_dpo/margin_mean": 0.25010228157043457, "margin_dpo/margin_std": 0.975698709487915, "step": 30 }, { "epoch": 0.06492146596858639, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2504439949989319, "fcm_dpo/q_t": 0.4993739426136017, "grad_norm": 25.57700538635254, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -0.655745804309845, "logits/rejected": -0.690646767616272, "logps/chosen": -247.7589569091797, "logps/ref_chosen": -248.15301513671875, "logps/ref_rejected": -203.17703247070312, "logps/rejected": -203.03338623046875, "loss": 5.5353, "margin_dpo/margin_mean": 0.25044363737106323, "margin_dpo/margin_std": 1.0158027410507202, "step": 31 }, { "epoch": 0.06701570680628273, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.33064448833465576, "fcm_dpo/q_t": 0.4991734027862549, "grad_norm": 29.667165756225586, "learning_rate": 3.2291666666666666e-07, "logits/chosen": -0.6167346835136414, "logits/rejected": -0.622105062007904, "logps/chosen": -304.966796875, "logps/ref_chosen": -305.5399475097656, "logps/ref_rejected": -267.6527099609375, "logps/rejected": -267.4101867675781, "loss": 5.5321, "margin_dpo/margin_mean": 0.3306446075439453, "margin_dpo/margin_std": 0.9724135994911194, "step": 32 }, { "epoch": 0.06910994764397906, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.39138373732566833, "fcm_dpo/q_t": 0.49902158975601196, "grad_norm": 28.317340850830078, "learning_rate": 3.333333333333333e-07, "logits/chosen": -0.6441166400909424, "logits/rejected": -0.6569768190383911, "logps/chosen": -285.66668701171875, "logps/ref_chosen": -286.2335205078125, "logps/ref_rejected": -255.38748168945312, "logps/rejected": -255.2120361328125, "loss": 5.5297, "margin_dpo/margin_mean": 0.39138340950012207, "margin_dpo/margin_std": 1.0941178798675537, "step": 33 }, { "epoch": 0.0712041884816754, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8573173880577087, "fcm_dpo/q_t": 0.4978567957878113, "grad_norm": 31.08516502380371, "learning_rate": 3.4375e-07, "logits/chosen": -0.619086503982544, "logits/rejected": -0.6280518770217896, "logps/chosen": -340.7860107421875, "logps/ref_chosen": -341.5920104980469, "logps/ref_rejected": -278.8866882324219, "logps/rejected": -278.93798828125, "loss": 5.5111, "margin_dpo/margin_mean": 0.8573174476623535, "margin_dpo/margin_std": 1.1765947341918945, "step": 34 }, { "epoch": 0.07329842931937172, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.3640906810760498, "fcm_dpo/q_t": 0.499089777469635, "grad_norm": 26.597396850585938, "learning_rate": 3.541666666666667e-07, "logits/chosen": -0.6332607269287109, "logits/rejected": -0.653661847114563, "logps/chosen": -264.4308166503906, "logps/ref_chosen": -265.0795593261719, "logps/ref_rejected": -264.4876708984375, "logps/rejected": -264.20306396484375, "loss": 5.5308, "margin_dpo/margin_mean": 0.3640906810760498, "margin_dpo/margin_std": 1.240203619003296, "step": 35 }, { "epoch": 0.07539267015706806, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6718266606330872, "fcm_dpo/q_t": 0.4983205497264862, "grad_norm": 31.872516632080078, "learning_rate": 3.645833333333333e-07, "logits/chosen": -0.6042373180389404, "logits/rejected": -0.6219602823257446, "logps/chosen": -296.499755859375, "logps/ref_chosen": -297.3261413574219, "logps/ref_rejected": -282.09515380859375, "logps/rejected": -281.94061279296875, "loss": 5.5186, "margin_dpo/margin_mean": 0.6718263030052185, "margin_dpo/margin_std": 1.3965107202529907, "step": 36 }, { "epoch": 0.0774869109947644, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5291406512260437, "fcm_dpo/q_t": 0.49867722392082214, "grad_norm": 30.831968307495117, "learning_rate": 3.75e-07, "logits/chosen": -0.6052833795547485, "logits/rejected": -0.6201093196868896, "logps/chosen": -313.28765869140625, "logps/ref_chosen": -314.0340270996094, "logps/ref_rejected": -299.3437805175781, "logps/rejected": -299.1265563964844, "loss": 5.5243, "margin_dpo/margin_mean": 0.5291397571563721, "margin_dpo/margin_std": 1.5934827327728271, "step": 37 }, { "epoch": 0.07958115183246073, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6104308366775513, "fcm_dpo/q_t": 0.49847403168678284, "grad_norm": 28.433164596557617, "learning_rate": 3.8541666666666665e-07, "logits/chosen": -0.640455424785614, "logits/rejected": -0.6528275012969971, "logps/chosen": -281.47015380859375, "logps/ref_chosen": -282.54119873046875, "logps/ref_rejected": -269.7773132324219, "logps/rejected": -269.3166809082031, "loss": 5.5211, "margin_dpo/margin_mean": 0.610431969165802, "margin_dpo/margin_std": 1.5897610187530518, "step": 38 }, { "epoch": 0.08167539267015707, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2843832969665527, "fcm_dpo/q_t": 0.49678951501846313, "grad_norm": 29.44791603088379, "learning_rate": 3.958333333333333e-07, "logits/chosen": -0.6190811991691589, "logits/rejected": -0.6332811713218689, "logps/chosen": -275.41473388671875, "logps/ref_chosen": -276.7729187011719, "logps/ref_rejected": -249.95889282226562, "logps/rejected": -249.88507080078125, "loss": 5.4944, "margin_dpo/margin_mean": 1.2843828201293945, "margin_dpo/margin_std": 1.8405652046203613, "step": 39 }, { "epoch": 0.08376963350785341, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8125598430633545, "fcm_dpo/q_t": 0.49796897172927856, "grad_norm": 27.380224227905273, "learning_rate": 4.0625e-07, "logits/chosen": -0.6235227584838867, "logits/rejected": -0.6593804359436035, "logps/chosen": -283.1932678222656, "logps/ref_chosen": -284.30706787109375, "logps/ref_rejected": -244.4459991455078, "logps/rejected": -244.14476013183594, "loss": 5.5131, "margin_dpo/margin_mean": 0.8125599026679993, "margin_dpo/margin_std": 1.823110580444336, "step": 40 }, { "epoch": 0.08586387434554973, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8139923810958862, "fcm_dpo/q_t": 0.4979651868343353, "grad_norm": 30.188688278198242, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -0.6231560707092285, "logits/rejected": -0.6478135585784912, "logps/chosen": -292.701171875, "logps/ref_chosen": -293.8151550292969, "logps/ref_rejected": -252.16815185546875, "logps/rejected": -251.86814880371094, "loss": 5.513, "margin_dpo/margin_mean": 0.8139930963516235, "margin_dpo/margin_std": 1.6932668685913086, "step": 41 }, { "epoch": 0.08795811518324607, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8815785050392151, "fcm_dpo/q_t": 0.49779632687568665, "grad_norm": 27.49101448059082, "learning_rate": 4.270833333333333e-07, "logits/chosen": -0.6369996666908264, "logits/rejected": -0.6549193859100342, "logps/chosen": -251.56045532226562, "logps/ref_chosen": -252.76023864746094, "logps/ref_rejected": -261.0414733886719, "logps/rejected": -260.7232666015625, "loss": 5.5106, "margin_dpo/margin_mean": 0.881578803062439, "margin_dpo/margin_std": 2.2027502059936523, "step": 42 }, { "epoch": 0.09005235602094241, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.269668459892273, "fcm_dpo/q_t": 0.4968262314796448, "grad_norm": 29.89678955078125, "learning_rate": 4.375e-07, "logits/chosen": -0.5942052602767944, "logits/rejected": -0.6096649169921875, "logps/chosen": -315.5320129394531, "logps/ref_chosen": -316.8347473144531, "logps/ref_rejected": -273.7649230957031, "logps/rejected": -273.73187255859375, "loss": 5.4951, "margin_dpo/margin_mean": 1.2696670293807983, "margin_dpo/margin_std": 2.1477150917053223, "step": 43 }, { "epoch": 0.09214659685863874, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.6501388549804688, "fcm_dpo/q_t": 0.49587562680244446, "grad_norm": 30.845321655273438, "learning_rate": 4.479166666666667e-07, "logits/chosen": -0.5972121953964233, "logits/rejected": -0.5959709882736206, "logps/chosen": -285.3184509277344, "logps/ref_chosen": -286.8757019042969, "logps/ref_rejected": -282.4681396484375, "logps/rejected": -282.5610656738281, "loss": 5.4804, "margin_dpo/margin_mean": 1.6501388549804688, "margin_dpo/margin_std": 2.9746947288513184, "step": 44 }, { "epoch": 0.09424083769633508, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1927553415298462, "fcm_dpo/q_t": 0.4970191717147827, "grad_norm": 28.909330368041992, "learning_rate": 4.5833333333333327e-07, "logits/chosen": -0.6916259527206421, "logits/rejected": -0.715716540813446, "logps/chosen": -322.6328125, "logps/ref_chosen": -324.2633972167969, "logps/ref_rejected": -293.09466552734375, "logps/rejected": -292.6568298339844, "loss": 5.4985, "margin_dpo/margin_mean": 1.192754864692688, "margin_dpo/margin_std": 2.8390185832977295, "step": 45 }, { "epoch": 0.09633507853403141, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5148533582687378, "fcm_dpo/q_t": 0.4962137043476105, "grad_norm": 30.279727935791016, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -0.6287131309509277, "logits/rejected": -0.6423863768577576, "logps/chosen": -296.6163635253906, "logps/ref_chosen": -298.3357238769531, "logps/ref_rejected": -267.66204833984375, "logps/rejected": -267.45751953125, "loss": 5.4855, "margin_dpo/margin_mean": 1.5148537158966064, "margin_dpo/margin_std": 2.574115753173828, "step": 46 }, { "epoch": 0.09842931937172775, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9817731976509094, "fcm_dpo/q_t": 0.49754610657691956, "grad_norm": 26.394506454467773, "learning_rate": 4.791666666666667e-07, "logits/chosen": -0.590155303478241, "logits/rejected": -0.6099727153778076, "logps/chosen": -261.077392578125, "logps/ref_chosen": -262.5669250488281, "logps/ref_rejected": -258.70989990234375, "logps/rejected": -258.20208740234375, "loss": 5.5072, "margin_dpo/margin_mean": 0.9817725419998169, "margin_dpo/margin_std": 3.2907633781433105, "step": 47 }, { "epoch": 0.10052356020942409, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5850274562835693, "fcm_dpo/q_t": 0.4960397481918335, "grad_norm": 27.51393699645996, "learning_rate": 4.895833333333333e-07, "logits/chosen": -0.625287652015686, "logits/rejected": -0.6512780785560608, "logps/chosen": -267.6490173339844, "logps/ref_chosen": -269.4932556152344, "logps/ref_rejected": -241.888916015625, "logps/rejected": -241.6297149658203, "loss": 5.4831, "margin_dpo/margin_mean": 1.5850276947021484, "margin_dpo/margin_std": 3.051654100418091, "step": 48 }, { "epoch": 0.10261780104712041, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.7378376722335815, "fcm_dpo/q_t": 0.4956568479537964, "grad_norm": 27.689477920532227, "learning_rate": 5e-07, "logits/chosen": -0.6677048802375793, "logits/rejected": -0.6521282196044922, "logps/chosen": -255.6532745361328, "logps/ref_chosen": -257.8844909667969, "logps/ref_rejected": -256.8912048339844, "logps/rejected": -256.3978271484375, "loss": 5.4772, "margin_dpo/margin_mean": 1.7378380298614502, "margin_dpo/margin_std": 3.387692928314209, "step": 49 }, { "epoch": 0.10471204188481675, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1869100332260132, "fcm_dpo/q_t": 0.49703431129455566, "grad_norm": 27.907745361328125, "learning_rate": 4.999932966293553e-07, "logits/chosen": -0.6318182945251465, "logits/rejected": -0.6537318229675293, "logps/chosen": -299.6126403808594, "logps/ref_chosen": -301.62884521484375, "logps/ref_rejected": -298.2716064453125, "logps/rejected": -297.4422912597656, "loss": 5.4995, "margin_dpo/margin_mean": 1.1869091987609863, "margin_dpo/margin_std": 3.856821298599243, "step": 50 }, { "epoch": 0.1068062827225131, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.0278406143188477, "fcm_dpo/q_t": 0.49493393301963806, "grad_norm": 29.1589298248291, "learning_rate": 4.999731868769026e-07, "logits/chosen": -0.60748690366745, "logits/rejected": -0.6017611026763916, "logps/chosen": -267.2236022949219, "logps/ref_chosen": -269.37237548828125, "logps/ref_rejected": -297.0167541503906, "logps/rejected": -296.89581298828125, "loss": 5.4671, "margin_dpo/margin_mean": 2.0278408527374268, "margin_dpo/margin_std": 4.648531436920166, "step": 51 }, { "epoch": 0.10890052356020942, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.078564167022705, "fcm_dpo/q_t": 0.4923100769519806, "grad_norm": 30.302101135253906, "learning_rate": 4.99939671821067e-07, "logits/chosen": -0.6508050560951233, "logits/rejected": -0.6571372151374817, "logps/chosen": -304.177978515625, "logps/ref_chosen": -306.9028015136719, "logps/ref_rejected": -281.24737548828125, "logps/rejected": -281.6011047363281, "loss": 5.4254, "margin_dpo/margin_mean": 3.078564167022705, "margin_dpo/margin_std": 4.690369606018066, "step": 52 }, { "epoch": 0.11099476439790576, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.22479248046875, "fcm_dpo/q_t": 0.4944427013397217, "grad_norm": 31.287256240844727, "learning_rate": 4.998927532591591e-07, "logits/chosen": -0.6520200371742249, "logits/rejected": -0.6926702260971069, "logps/chosen": -283.11590576171875, "logps/ref_chosen": -285.9759521484375, "logps/ref_rejected": -273.9073486328125, "logps/rejected": -273.2720947265625, "loss": 5.4597, "margin_dpo/margin_mean": 2.22479248046875, "margin_dpo/margin_std": 5.078397274017334, "step": 53 }, { "epoch": 0.1130890052356021, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.866455316543579, "fcm_dpo/q_t": 0.49533912539482117, "grad_norm": 26.340913772583008, "learning_rate": 4.998324337072792e-07, "logits/chosen": -0.68650883436203, "logits/rejected": -0.6943265795707703, "logps/chosen": -303.7992858886719, "logps/ref_chosen": -306.504638671875, "logps/ref_rejected": -272.67431640625, "logps/rejected": -271.8354187011719, "loss": 5.4741, "margin_dpo/margin_mean": 1.866454839706421, "margin_dpo/margin_std": 5.443723678588867, "step": 54 }, { "epoch": 0.11518324607329843, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.2475740909576416, "fcm_dpo/q_t": 0.494386225938797, "grad_norm": 24.877641677856445, "learning_rate": 4.997587164001815e-07, "logits/chosen": -0.6414747834205627, "logits/rejected": -0.6457206606864929, "logps/chosen": -220.563720703125, "logps/ref_chosen": -222.33013916015625, "logps/ref_rejected": -206.59571838378906, "logps/rejected": -207.07687377929688, "loss": 5.4583, "margin_dpo/margin_mean": 2.2475738525390625, "margin_dpo/margin_std": 4.910269737243652, "step": 55 }, { "epoch": 0.11727748691099477, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.1674323081970215, "fcm_dpo/q_t": 0.49209311604499817, "grad_norm": 27.445068359375, "learning_rate": 4.996716052911017e-07, "logits/chosen": -0.6101264357566833, "logits/rejected": -0.6247260570526123, "logps/chosen": -247.60667419433594, "logps/ref_chosen": -250.47816467285156, "logps/ref_rejected": -228.25848388671875, "logps/rejected": -228.55442810058594, "loss": 5.4237, "margin_dpo/margin_mean": 3.167431354522705, "margin_dpo/margin_std": 5.867389678955078, "step": 56 }, { "epoch": 0.1193717277486911, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.167306423187256, "fcm_dpo/q_t": 0.48959389328956604, "grad_norm": 30.918928146362305, "learning_rate": 4.99571105051544e-07, "logits/chosen": -0.7010935544967651, "logits/rejected": -0.6720656156539917, "logps/chosen": -311.27130126953125, "logps/ref_chosen": -315.1195373535156, "logps/ref_rejected": -272.755615234375, "logps/rejected": -273.0746765136719, "loss": 5.3836, "margin_dpo/margin_mean": 4.167305946350098, "margin_dpo/margin_std": 5.622750759124756, "step": 57 }, { "epoch": 0.12146596858638743, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.323967933654785, "fcm_dpo/q_t": 0.49419358372688293, "grad_norm": 27.555803298950195, "learning_rate": 4.994572210710314e-07, "logits/chosen": -0.6160457730293274, "logits/rejected": -0.6402078866958618, "logps/chosen": -262.7194519042969, "logps/ref_chosen": -265.1816711425781, "logps/ref_rejected": -268.2203369140625, "logps/rejected": -268.0820617675781, "loss": 5.4563, "margin_dpo/margin_mean": 2.323967933654785, "margin_dpo/margin_std": 5.637367248535156, "step": 58 }, { "epoch": 0.12356020942408377, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5584101676940918, "fcm_dpo/q_t": 0.4961104989051819, "grad_norm": 29.654539108276367, "learning_rate": 4.993299594568162e-07, "logits/chosen": -0.5985250473022461, "logits/rejected": -0.5907694697380066, "logps/chosen": -284.25274658203125, "logps/ref_chosen": -286.35394287109375, "logps/ref_rejected": -260.6757507324219, "logps/rejected": -260.1329345703125, "loss": 5.4889, "margin_dpo/margin_mean": 1.5584099292755127, "margin_dpo/margin_std": 7.0632781982421875, "step": 59 }, { "epoch": 0.1256544502617801, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.4080166816711426, "fcm_dpo/q_t": 0.4914897680282593, "grad_norm": 28.067386627197266, "learning_rate": 4.991893270335525e-07, "logits/chosen": -0.6754345297813416, "logits/rejected": -0.699802577495575, "logps/chosen": -255.8909912109375, "logps/ref_chosen": -258.74859619140625, "logps/ref_rejected": -255.04893493652344, "logps/rejected": -255.59933471679688, "loss": 5.4155, "margin_dpo/margin_mean": 3.4080190658569336, "margin_dpo/margin_std": 7.094330787658691, "step": 60 }, { "epoch": 0.12774869109947645, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.202296257019043, "fcm_dpo/q_t": 0.49200791120529175, "grad_norm": 29.841184616088867, "learning_rate": 4.990353313429303e-07, "logits/chosen": -0.6437735557556152, "logits/rejected": -0.6602544784545898, "logps/chosen": -275.47747802734375, "logps/ref_chosen": -278.4678955078125, "logps/ref_rejected": -252.02720642089844, "logps/rejected": -252.23904418945312, "loss": 5.4247, "margin_dpo/margin_mean": 3.202296257019043, "margin_dpo/margin_std": 7.591219425201416, "step": 61 }, { "epoch": 0.12984293193717278, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.3970751762390137, "fcm_dpo/q_t": 0.4915270209312439, "grad_norm": 26.655916213989258, "learning_rate": 4.988679806432711e-07, "logits/chosen": -0.6097227334976196, "logits/rejected": -0.6514406800270081, "logps/chosen": -268.9474182128906, "logps/ref_chosen": -272.92431640625, "logps/ref_rejected": -260.7935485839844, "logps/rejected": -260.2137145996094, "loss": 5.4163, "margin_dpo/margin_mean": 3.39707612991333, "margin_dpo/margin_std": 7.437541961669922, "step": 62 }, { "epoch": 0.1319371727748691, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.588202476501465, "fcm_dpo/q_t": 0.4910445213317871, "grad_norm": 28.286346435546875, "learning_rate": 4.986872839090852e-07, "logits/chosen": -0.6595807671546936, "logits/rejected": -0.6659517288208008, "logps/chosen": -273.69244384765625, "logps/ref_chosen": -277.0889892578125, "logps/ref_rejected": -273.3413391113281, "logps/rejected": -273.532958984375, "loss": 5.4086, "margin_dpo/margin_mean": 3.588200807571411, "margin_dpo/margin_std": 7.262460708618164, "step": 63 }, { "epoch": 0.13403141361256546, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.24724006652832, "fcm_dpo/q_t": 0.48940467834472656, "grad_norm": 28.308141708374023, "learning_rate": 4.9849325083059e-07, "logits/chosen": -0.628346860408783, "logits/rejected": -0.6231892704963684, "logps/chosen": -279.7200927734375, "logps/ref_chosen": -283.8244934082031, "logps/ref_rejected": -263.29351806640625, "logps/rejected": -263.4363708496094, "loss": 5.3847, "margin_dpo/margin_mean": 4.24724006652832, "margin_dpo/margin_std": 8.381464958190918, "step": 64 }, { "epoch": 0.13612565445026178, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.2838730812072754, "fcm_dpo/q_t": 0.49180322885513306, "grad_norm": 27.914520263671875, "learning_rate": 4.982858918131906e-07, "logits/chosen": -0.6988512277603149, "logits/rejected": -0.6669014692306519, "logps/chosen": -261.4900817871094, "logps/ref_chosen": -264.8699645996094, "logps/ref_rejected": -268.5076904296875, "logps/rejected": -268.4117431640625, "loss": 5.4206, "margin_dpo/margin_mean": 3.2838728427886963, "margin_dpo/margin_std": 7.35109281539917, "step": 65 }, { "epoch": 0.1382198952879581, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.0454559326171875, "fcm_dpo/q_t": 0.48991909623146057, "grad_norm": 27.6617488861084, "learning_rate": 4.980652179769217e-07, "logits/chosen": -0.6764880418777466, "logits/rejected": -0.696363091468811, "logps/chosen": -269.8972473144531, "logps/ref_chosen": -272.9283142089844, "logps/ref_rejected": -280.94696044921875, "logps/rejected": -281.96136474609375, "loss": 5.3953, "margin_dpo/margin_mean": 4.045454978942871, "margin_dpo/margin_std": 9.999269485473633, "step": 66 }, { "epoch": 0.14031413612565444, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.675020694732666, "fcm_dpo/q_t": 0.4908318519592285, "grad_norm": 25.536836624145508, "learning_rate": 4.978312411558517e-07, "logits/chosen": -0.6852215528488159, "logits/rejected": -0.7170518636703491, "logps/chosen": -262.1640319824219, "logps/ref_chosen": -266.18695068359375, "logps/ref_rejected": -250.17405700683594, "logps/rejected": -249.82615661621094, "loss": 5.4077, "margin_dpo/margin_mean": 3.6750199794769287, "margin_dpo/margin_std": 8.747812271118164, "step": 67 }, { "epoch": 0.1424083769633508, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.583087921142578, "fcm_dpo/q_t": 0.48859941959381104, "grad_norm": 28.211336135864258, "learning_rate": 4.975839738974473e-07, "logits/chosen": -0.6900507211685181, "logits/rejected": -0.7039142847061157, "logps/chosen": -294.9899597167969, "logps/ref_chosen": -297.9385986328125, "logps/ref_rejected": -261.5141296386719, "logps/rejected": -263.14862060546875, "loss": 5.3759, "margin_dpo/margin_mean": 4.583088397979736, "margin_dpo/margin_std": 10.535322189331055, "step": 68 }, { "epoch": 0.14450261780104712, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.044898986816406, "fcm_dpo/q_t": 0.48493677377700806, "grad_norm": 28.641454696655273, "learning_rate": 4.97323429461901e-07, "logits/chosen": -0.6796502470970154, "logits/rejected": -0.7097989320755005, "logps/chosen": -261.7384338378906, "logps/ref_chosen": -265.6175231933594, "logps/ref_rejected": -236.8287353515625, "logps/rejected": -238.99456787109375, "loss": 5.3173, "margin_dpo/margin_mean": 6.044898509979248, "margin_dpo/margin_std": 9.810757637023926, "step": 69 }, { "epoch": 0.14659685863874344, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.822492599487305, "fcm_dpo/q_t": 0.4854995310306549, "grad_norm": 28.677330017089844, "learning_rate": 4.970496218214204e-07, "logits/chosen": -0.6750044822692871, "logits/rejected": -0.7083183526992798, "logps/chosen": -291.96441650390625, "logps/ref_chosen": -296.2259216308594, "logps/ref_rejected": -254.68496704101562, "logps/rejected": -256.2459411621094, "loss": 5.3286, "margin_dpo/margin_mean": 5.822491645812988, "margin_dpo/margin_std": 10.934935569763184, "step": 70 }, { "epoch": 0.1486910994764398, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.134796142578125, "fcm_dpo/q_t": 0.4872013032436371, "grad_norm": 28.424619674682617, "learning_rate": 4.967625656594781e-07, "logits/chosen": -0.6523040533065796, "logits/rejected": -0.6413918733596802, "logps/chosen": -283.8147277832031, "logps/ref_chosen": -288.92724609375, "logps/ref_rejected": -278.6405334472656, "logps/rejected": -278.662841796875, "loss": 5.3592, "margin_dpo/margin_mean": 5.134795188903809, "margin_dpo/margin_std": 12.370285034179688, "step": 71 }, { "epoch": 0.15078534031413612, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.098928928375244, "fcm_dpo/q_t": 0.4872835576534271, "grad_norm": 28.138423919677734, "learning_rate": 4.964622763700252e-07, "logits/chosen": -0.6939007639884949, "logits/rejected": -0.705129861831665, "logps/chosen": -233.71646118164062, "logps/ref_chosen": -237.0452880859375, "logps/ref_rejected": -252.7946319580078, "logps/rejected": -254.56471252441406, "loss": 5.3564, "margin_dpo/margin_mean": 5.098929405212402, "margin_dpo/margin_std": 10.444880485534668, "step": 72 }, { "epoch": 0.15287958115183245, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.748826503753662, "fcm_dpo/q_t": 0.48815372586250305, "grad_norm": 27.864391326904297, "learning_rate": 4.961487700566646e-07, "logits/chosen": -0.659065306186676, "logits/rejected": -0.6768229603767395, "logps/chosen": -268.7459411621094, "logps/ref_chosen": -273.0531005859375, "logps/ref_rejected": -246.8330841064453, "logps/rejected": -247.2747802734375, "loss": 5.3737, "margin_dpo/margin_mean": 4.748826503753662, "margin_dpo/margin_std": 12.207172393798828, "step": 73 }, { "epoch": 0.1549738219895288, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.507737159729004, "fcm_dpo/q_t": 0.48877474665641785, "grad_norm": 30.305334091186523, "learning_rate": 4.958220635317885e-07, "logits/chosen": -0.7256600260734558, "logits/rejected": -0.7039333581924438, "logps/chosen": -338.9497985839844, "logps/ref_chosen": -342.2818908691406, "logps/ref_rejected": -330.0293884277344, "logps/rejected": -331.2049865722656, "loss": 5.3817, "margin_dpo/margin_mean": 4.5077362060546875, "margin_dpo/margin_std": 11.664762496948242, "step": 74 }, { "epoch": 0.15706806282722513, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.5859527587890625, "fcm_dpo/q_t": 0.48358994722366333, "grad_norm": 29.470287322998047, "learning_rate": 4.954821743156767e-07, "logits/chosen": -0.6431756615638733, "logits/rejected": -0.6447348594665527, "logps/chosen": -262.26544189453125, "logps/ref_chosen": -266.8641662597656, "logps/ref_rejected": -276.8699951171875, "logps/rejected": -278.8572692871094, "loss": 5.2993, "margin_dpo/margin_mean": 6.585953712463379, "margin_dpo/margin_std": 10.910937309265137, "step": 75 }, { "epoch": 0.15916230366492146, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.311924457550049, "fcm_dpo/q_t": 0.4818291962146759, "grad_norm": 29.281173706054688, "learning_rate": 4.951291206355559e-07, "logits/chosen": -0.7205427289009094, "logits/rejected": -0.7283482551574707, "logps/chosen": -277.0059814453125, "logps/ref_chosen": -281.174560546875, "logps/ref_rejected": -263.6067199707031, "logps/rejected": -266.7500305175781, "loss": 5.2755, "margin_dpo/margin_mean": 7.311923980712891, "margin_dpo/margin_std": 12.705620765686035, "step": 76 }, { "epoch": 0.1612565445026178, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.805636405944824, "fcm_dpo/q_t": 0.4855879247188568, "grad_norm": 33.04362106323242, "learning_rate": 4.947629214246236e-07, "logits/chosen": -0.5541229248046875, "logits/rejected": -0.5619992017745972, "logps/chosen": -302.3905944824219, "logps/ref_chosen": -306.09527587890625, "logps/ref_rejected": -253.49569702148438, "logps/rejected": -255.5966339111328, "loss": 5.3378, "margin_dpo/margin_mean": 5.805635452270508, "margin_dpo/margin_std": 14.091662406921387, "step": 77 }, { "epoch": 0.16335078534031414, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.697511672973633, "fcm_dpo/q_t": 0.47841718792915344, "grad_norm": 29.72622299194336, "learning_rate": 4.943835963210323e-07, "logits/chosen": -0.6819251179695129, "logits/rejected": -0.6768004298210144, "logps/chosen": -253.04547119140625, "logps/ref_chosen": -256.90234375, "logps/ref_rejected": -211.57154846191406, "logps/rejected": -216.41221618652344, "loss": 5.2264, "margin_dpo/margin_mean": 8.697509765625, "margin_dpo/margin_std": 14.436126708984375, "step": 78 }, { "epoch": 0.16544502617801046, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.278536796569824, "fcm_dpo/q_t": 0.479459285736084, "grad_norm": 29.883098602294922, "learning_rate": 4.939911656668361e-07, "logits/chosen": -0.6628604531288147, "logits/rejected": -0.6839243173599243, "logps/chosen": -263.1708068847656, "logps/ref_chosen": -266.2735595703125, "logps/ref_rejected": -251.57257080078125, "logps/rejected": -256.74835205078125, "loss": 5.2449, "margin_dpo/margin_mean": 8.278536796569824, "margin_dpo/margin_std": 14.98855972290039, "step": 79 }, { "epoch": 0.16753926701570682, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 6.632655143737793, "fcm_dpo/q_t": 0.48353880643844604, "grad_norm": 28.937639236450195, "learning_rate": 4.935856505068998e-07, "logits/chosen": -0.6736690998077393, "logits/rejected": -0.7038200497627258, "logps/chosen": -285.98919677734375, "logps/ref_chosen": -287.8509826660156, "logps/ref_rejected": -256.0766296386719, "logps/rejected": -260.8474426269531, "loss": 5.3041, "margin_dpo/margin_mean": 6.632654190063477, "margin_dpo/margin_std": 13.19876480102539, "step": 80 }, { "epoch": 0.16963350785340314, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.253467559814453, "fcm_dpo/q_t": 0.4820139408111572, "grad_norm": 28.075214385986328, "learning_rate": 4.93167072587771e-07, "logits/chosen": -0.6420468091964722, "logits/rejected": -0.6412660479545593, "logps/chosen": -266.10321044921875, "logps/ref_chosen": -268.5232238769531, "logps/ref_rejected": -237.81137084960938, "logps/rejected": -242.6448211669922, "loss": 5.2899, "margin_dpo/margin_mean": 7.253467559814453, "margin_dpo/margin_std": 16.714815139770508, "step": 81 }, { "epoch": 0.17172774869109947, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.97769021987915, "fcm_dpo/q_t": 0.48026588559150696, "grad_norm": 27.7528018951416, "learning_rate": 4.92735454356513e-07, "logits/chosen": -0.7254935503005981, "logits/rejected": -0.7326993346214294, "logps/chosen": -276.9371032714844, "logps/ref_chosen": -279.36395263671875, "logps/ref_rejected": -236.51365661621094, "logps/rejected": -242.0644989013672, "loss": 5.2605, "margin_dpo/margin_mean": 7.977689743041992, "margin_dpo/margin_std": 15.65487289428711, "step": 82 }, { "epoch": 0.17382198952879582, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.64671516418457, "fcm_dpo/q_t": 0.4785246253013611, "grad_norm": 30.814836502075195, "learning_rate": 4.922908189595017e-07, "logits/chosen": -0.6886410713195801, "logits/rejected": -0.6722111105918884, "logps/chosen": -273.9360046386719, "logps/ref_chosen": -274.21923828125, "logps/ref_rejected": -276.2212219238281, "logps/rejected": -284.584716796875, "loss": 5.243, "margin_dpo/margin_mean": 8.646713256835938, "margin_dpo/margin_std": 18.04184913635254, "step": 83 }, { "epoch": 0.17591623036649215, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.324193477630615, "fcm_dpo/q_t": 0.48183199763298035, "grad_norm": 29.859872817993164, "learning_rate": 4.918331902411841e-07, "logits/chosen": -0.7265677452087402, "logits/rejected": -0.7404079437255859, "logps/chosen": -293.82232666015625, "logps/ref_chosen": -294.3975524902344, "logps/ref_rejected": -279.81884765625, "logps/rejected": -286.56781005859375, "loss": 5.2888, "margin_dpo/margin_mean": 7.324193477630615, "margin_dpo/margin_std": 16.89883804321289, "step": 84 }, { "epoch": 0.17801047120418848, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.70173454284668, "fcm_dpo/q_t": 0.4858514070510864, "grad_norm": 29.388431549072266, "learning_rate": 4.913625927427995e-07, "logits/chosen": -0.6746452450752258, "logits/rejected": -0.6829299330711365, "logps/chosen": -245.21981811523438, "logps/ref_chosen": -243.66220092773438, "logps/ref_rejected": -263.9421691894531, "logps/rejected": -271.2015380859375, "loss": 5.3473, "margin_dpo/margin_mean": 5.701735019683838, "margin_dpo/margin_std": 15.451016426086426, "step": 85 }, { "epoch": 0.18010471204188483, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.923068046569824, "fcm_dpo/q_t": 0.4778454601764679, "grad_norm": 34.907039642333984, "learning_rate": 4.908790517010636e-07, "logits/chosen": -0.6926656365394592, "logits/rejected": -0.6865877509117126, "logps/chosen": -308.2077941894531, "logps/ref_chosen": -309.4306945800781, "logps/ref_rejected": -290.91278076171875, "logps/rejected": -298.6129455566406, "loss": 5.2276, "margin_dpo/margin_mean": 8.923067092895508, "margin_dpo/margin_std": 17.267658233642578, "step": 86 }, { "epoch": 0.18219895287958116, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.27783203125, "fcm_dpo/q_t": 0.47462230920791626, "grad_norm": 29.65764617919922, "learning_rate": 4.903825930468148e-07, "logits/chosen": -0.755806028842926, "logits/rejected": -0.7502421736717224, "logps/chosen": -278.2044677734375, "logps/ref_chosen": -278.0277099609375, "logps/ref_rejected": -245.70123291015625, "logps/rejected": -256.15582275390625, "loss": 5.1833, "margin_dpo/margin_mean": 10.27783203125, "margin_dpo/margin_std": 18.962289810180664, "step": 87 }, { "epoch": 0.18429319371727748, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.847644805908203, "fcm_dpo/q_t": 0.4781361222267151, "grad_norm": 28.80191421508789, "learning_rate": 4.898732434036243e-07, "logits/chosen": -0.7761508822441101, "logits/rejected": -0.7929233312606812, "logps/chosen": -268.6051025390625, "logps/ref_chosen": -266.5148010253906, "logps/ref_rejected": -265.90081787109375, "logps/rejected": -276.8387756347656, "loss": 5.2374, "margin_dpo/margin_mean": 8.847643852233887, "margin_dpo/margin_std": 19.153247833251953, "step": 88 }, { "epoch": 0.18638743455497384, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.27825927734375, "fcm_dpo/q_t": 0.4770185947418213, "grad_norm": 30.52988624572754, "learning_rate": 4.893510300863676e-07, "logits/chosen": -0.7448249459266663, "logits/rejected": -0.7356829643249512, "logps/chosen": -265.67352294921875, "logps/ref_chosen": -265.6893005371094, "logps/ref_rejected": -251.49314880371094, "logps/rejected": -260.7556457519531, "loss": 5.2198, "margin_dpo/margin_mean": 9.27825927734375, "margin_dpo/margin_std": 18.18901824951172, "step": 89 }, { "epoch": 0.18848167539267016, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.289998054504395, "fcm_dpo/q_t": 0.4794497489929199, "grad_norm": 29.910764694213867, "learning_rate": 4.8881598109976e-07, "logits/chosen": -0.7497580051422119, "logits/rejected": -0.7592126131057739, "logps/chosen": -308.5013122558594, "logps/ref_chosen": -307.4250183105469, "logps/ref_rejected": -265.7172546386719, "logps/rejected": -275.0835266113281, "loss": 5.2544, "margin_dpo/margin_mean": 8.289999008178711, "margin_dpo/margin_std": 17.661346435546875, "step": 90 }, { "epoch": 0.1905759162303665, "fcm_dpo/beta": 0.010252725332975388, "fcm_dpo/delta": 0.04954978823661804, "fcm_dpo/margin": 9.483511924743652, "fcm_dpo/q_t": 0.47634202241897583, "grad_norm": 33.50828552246094, "learning_rate": 4.882681251368548e-07, "logits/chosen": -0.6733120679855347, "logits/rejected": -0.6901057958602905, "logps/chosen": -237.88088989257812, "logps/ref_chosen": -235.74098205566406, "logps/ref_rejected": -226.6428985595703, "logps/rejected": -238.2663116455078, "loss": 5.2111, "margin_dpo/margin_mean": 9.483511924743652, "margin_dpo/margin_std": 19.319496154785156, "step": 91 }, { "epoch": 0.19267015706806281, "fcm_dpo/beta": 0.010404359549283981, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.150311470031738, "fcm_dpo/q_t": 0.47393330931663513, "grad_norm": 34.50823974609375, "learning_rate": 4.877074915775048e-07, "logits/chosen": -0.7354683876037598, "logits/rejected": -0.7188453674316406, "logps/chosen": -286.5132751464844, "logps/ref_chosen": -283.4475402832031, "logps/ref_rejected": -273.134033203125, "logps/rejected": -286.35009765625, "loss": 5.1857, "margin_dpo/margin_mean": 10.150311470031738, "margin_dpo/margin_std": 21.28767967224121, "step": 92 }, { "epoch": 0.19476439790575917, "fcm_dpo/beta": 0.010404359549283981, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.547552108764648, "fcm_dpo/q_t": 0.47551578283309937, "grad_norm": 29.792530059814453, "learning_rate": 4.871341104867864e-07, "logits/chosen": -0.7289955019950867, "logits/rejected": -0.7523810267448425, "logps/chosen": -235.75485229492188, "logps/ref_chosen": -233.33714294433594, "logps/ref_rejected": -230.54273986816406, "logps/rejected": -242.5079803466797, "loss": 5.2018, "margin_dpo/margin_mean": 9.547552108764648, "margin_dpo/margin_std": 19.47620391845703, "step": 93 }, { "epoch": 0.1968586387434555, "fcm_dpo/beta": 0.010404359549283981, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 9.37955093383789, "fcm_dpo/q_t": 0.47586768865585327, "grad_norm": 32.49482727050781, "learning_rate": 4.865480126133871e-07, "logits/chosen": -0.6883825659751892, "logits/rejected": -0.7099732160568237, "logps/chosen": -297.0543212890625, "logps/ref_chosen": -294.6528015136719, "logps/ref_rejected": -283.657958984375, "logps/rejected": -295.4390563964844, "loss": 5.2195, "margin_dpo/margin_mean": 9.379551887512207, "margin_dpo/margin_std": 21.819246292114258, "step": 94 }, { "epoch": 0.19895287958115182, "fcm_dpo/beta": 0.010404359549283981, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.186941146850586, "fcm_dpo/q_t": 0.4739447236061096, "grad_norm": 34.7429313659668, "learning_rate": 4.859492293879573e-07, "logits/chosen": -0.7289009094238281, "logits/rejected": -0.7504929304122925, "logps/chosen": -314.9253845214844, "logps/ref_chosen": -311.6697082519531, "logps/ref_rejected": -262.7471923828125, "logps/rejected": -276.1898193359375, "loss": 5.1947, "margin_dpo/margin_mean": 10.18694019317627, "margin_dpo/margin_std": 22.561256408691406, "step": 95 }, { "epoch": 0.20104712041884817, "fcm_dpo/beta": 0.010404359549283981, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 10.215592384338379, "fcm_dpo/q_t": 0.4738875925540924, "grad_norm": 36.46210479736328, "learning_rate": 4.853377929214243e-07, "logits/chosen": -0.7038691639900208, "logits/rejected": -0.7164921760559082, "logps/chosen": -287.2462158203125, "logps/ref_chosen": -282.55596923828125, "logps/ref_rejected": -242.71588134765625, "logps/rejected": -257.6216735839844, "loss": 5.1946, "margin_dpo/margin_mean": 10.215592384338379, "margin_dpo/margin_std": 23.375957489013672, "step": 96 }, { "epoch": 0.2031413612565445, "fcm_dpo/beta": 0.010404359549283981, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 12.028409957885742, "fcm_dpo/q_t": 0.4692074954509735, "grad_norm": 33.21619415283203, "learning_rate": 4.847137360032699e-07, "logits/chosen": -0.7515384554862976, "logits/rejected": -0.7390632629394531, "logps/chosen": -307.90765380859375, "logps/ref_chosen": -303.57781982421875, "logps/ref_rejected": -264.22491455078125, "logps/rejected": -280.58319091796875, "loss": 5.1173, "margin_dpo/margin_mean": 12.028410911560059, "margin_dpo/margin_std": 22.326217651367188, "step": 97 }, { "epoch": 0.20523560209424083, "fcm_dpo/beta": 0.010807948186993599, "fcm_dpo/delta": 0.08449017256498337, "fcm_dpo/margin": 12.213380813598633, "fcm_dpo/q_t": 0.4684543013572693, "grad_norm": 37.011268615722656, "learning_rate": 4.84077092099773e-07, "logits/chosen": -0.7741104364395142, "logits/rejected": -0.7865383625030518, "logps/chosen": -291.7223815917969, "logps/ref_chosen": -286.8303527832031, "logps/ref_rejected": -278.08331298828125, "logps/rejected": -295.1886901855469, "loss": 5.1132, "margin_dpo/margin_mean": 12.213380813598633, "margin_dpo/margin_std": 22.774032592773438, "step": 98 }, { "epoch": 0.20732984293193718, "fcm_dpo/beta": 0.011437967419624329, "fcm_dpo/delta": 0.09145952761173248, "fcm_dpo/margin": 12.564851760864258, "fcm_dpo/q_t": 0.46524322032928467, "grad_norm": 37.08080291748047, "learning_rate": 4.834278953522137e-07, "logits/chosen": -0.7426201701164246, "logits/rejected": -0.756097137928009, "logps/chosen": -285.139404296875, "logps/ref_chosen": -279.92120361328125, "logps/ref_rejected": -250.3365478515625, "logps/rejected": -268.11956787109375, "loss": 5.0901, "margin_dpo/margin_mean": 12.564850807189941, "margin_dpo/margin_std": 27.0224666595459, "step": 99 }, { "epoch": 0.2094240837696335, "fcm_dpo/beta": 0.012557308189570904, "fcm_dpo/delta": 0.07896663248538971, "fcm_dpo/margin": 12.57419490814209, "fcm_dpo/q_t": 0.46233466267585754, "grad_norm": 43.29024887084961, "learning_rate": 4.827661805750437e-07, "logits/chosen": -0.7817738056182861, "logits/rejected": -0.7950529456138611, "logps/chosen": -304.5354309082031, "logps/ref_chosen": -296.8276672363281, "logps/ref_rejected": -275.56146240234375, "logps/rejected": -295.8433837890625, "loss": 5.04, "margin_dpo/margin_mean": 12.574195861816406, "margin_dpo/margin_std": 24.096710205078125, "step": 100 }, { "epoch": 0.21151832460732983, "fcm_dpo/beta": 0.013111630454659462, "fcm_dpo/delta": 0.07151152938604355, "fcm_dpo/margin": 14.857452392578125, "fcm_dpo/q_t": 0.45359134674072266, "grad_norm": 41.36968231201172, "learning_rate": 4.820919832540181e-07, "logits/chosen": -0.770940363407135, "logits/rejected": -0.7847775816917419, "logps/chosen": -257.88330078125, "logps/ref_chosen": -252.74203491210938, "logps/ref_rejected": -276.4185485839844, "logps/rejected": -296.41729736328125, "loss": 4.9313, "margin_dpo/margin_mean": 14.857452392578125, "margin_dpo/margin_std": 26.29358673095703, "step": 101 }, { "epoch": 0.2136125654450262, "fcm_dpo/beta": 0.014527034014463425, "fcm_dpo/delta": 0.15123134851455688, "fcm_dpo/margin": 14.755053520202637, "fcm_dpo/q_t": 0.4487529695034027, "grad_norm": 44.943565368652344, "learning_rate": 4.814053395442932e-07, "logits/chosen": -0.7487014532089233, "logits/rejected": -0.7447975873947144, "logps/chosen": -224.57212829589844, "logps/ref_chosen": -219.5537109375, "logps/ref_rejected": -231.90853881835938, "logps/rejected": -251.68197631835938, "loss": 4.8727, "margin_dpo/margin_mean": 14.75505256652832, "margin_dpo/margin_std": 24.941452026367188, "step": 102 }, { "epoch": 0.2157068062827225, "fcm_dpo/beta": 0.016830556094646454, "fcm_dpo/delta": 0.15375208854675293, "fcm_dpo/margin": 13.61697769165039, "fcm_dpo/q_t": 0.44760948419570923, "grad_norm": 53.148414611816406, "learning_rate": 4.807062862684873e-07, "logits/chosen": -0.7735249996185303, "logits/rejected": -0.770460307598114, "logps/chosen": -264.299560546875, "logps/ref_chosen": -259.6750793457031, "logps/ref_rejected": -278.7400817871094, "logps/rejected": -296.9815368652344, "loss": 4.8764, "margin_dpo/margin_mean": 13.61697769165039, "margin_dpo/margin_std": 25.619842529296875, "step": 103 }, { "epoch": 0.21780104712041884, "fcm_dpo/beta": 0.01824803464114666, "fcm_dpo/delta": 0.12172321230173111, "fcm_dpo/margin": 10.087403297424316, "fcm_dpo/q_t": 0.45723575353622437, "grad_norm": 59.135841369628906, "learning_rate": 4.799948609147061e-07, "logits/chosen": -0.7728451490402222, "logits/rejected": -0.7799044251441956, "logps/chosen": -276.86041259765625, "logps/ref_chosen": -267.9741516113281, "logps/ref_rejected": -230.5306396484375, "logps/rejected": -249.5042724609375, "loss": 5.0674, "margin_dpo/margin_mean": 10.087403297424316, "margin_dpo/margin_std": 26.16142463684082, "step": 104 }, { "epoch": 0.2198952879581152, "fcm_dpo/beta": 0.019626103341579437, "fcm_dpo/delta": 0.09441255033016205, "fcm_dpo/margin": 20.40988540649414, "fcm_dpo/q_t": 0.40757566690444946, "grad_norm": 62.410152435302734, "learning_rate": 4.792711016345321e-07, "logits/chosen": -0.7623639106750488, "logits/rejected": -0.7740727066993713, "logps/chosen": -327.2814025878906, "logps/ref_chosen": -322.25482177734375, "logps/ref_rejected": -279.02978515625, "logps/rejected": -304.46624755859375, "loss": 4.3973, "margin_dpo/margin_mean": 20.409887313842773, "margin_dpo/margin_std": 26.728302001953125, "step": 105 }, { "epoch": 0.22198952879581152, "fcm_dpo/beta": 0.021983552724123, "fcm_dpo/delta": 0.10915235430002213, "fcm_dpo/margin": 12.417057991027832, "fcm_dpo/q_t": 0.4385029673576355, "grad_norm": 80.47908020019531, "learning_rate": 4.785350472409791e-07, "logits/chosen": -0.7452399730682373, "logits/rejected": -0.782451868057251, "logps/chosen": -308.17291259765625, "logps/ref_chosen": -296.15777587890625, "logps/ref_rejected": -266.2691650390625, "logps/rejected": -290.70135498046875, "loss": 4.9373, "margin_dpo/margin_mean": 12.4170560836792, "margin_dpo/margin_std": 29.023073196411133, "step": 106 }, { "epoch": 0.22408376963350785, "fcm_dpo/beta": 0.024012316018342972, "fcm_dpo/delta": 0.14064227044582367, "fcm_dpo/margin": 19.3704833984375, "fcm_dpo/q_t": 0.3971790373325348, "grad_norm": 77.79216766357422, "learning_rate": 4.777867372064105e-07, "logits/chosen": -0.78067547082901, "logits/rejected": -0.7740224599838257, "logps/chosen": -310.7627868652344, "logps/ref_chosen": -306.996337890625, "logps/ref_rejected": -296.79412841796875, "logps/rejected": -319.9310302734375, "loss": 4.3062, "margin_dpo/margin_mean": 19.370481491088867, "margin_dpo/margin_std": 27.15206527709961, "step": 107 }, { "epoch": 0.2261780104712042, "fcm_dpo/beta": 0.025636808946728706, "fcm_dpo/delta": 0.09028993546962738, "fcm_dpo/margin": 17.937637329101562, "fcm_dpo/q_t": 0.4029965102672577, "grad_norm": 286.3813781738281, "learning_rate": 4.770262116604223e-07, "logits/chosen": -0.7616235017776489, "logits/rejected": -0.7734853625297546, "logps/chosen": -299.8006286621094, "logps/ref_chosen": -295.1526794433594, "logps/ref_rejected": -235.974853515625, "logps/rejected": -258.5604553222656, "loss": 4.4937, "margin_dpo/margin_mean": 17.937637329101562, "margin_dpo/margin_std": 29.53498649597168, "step": 108 }, { "epoch": 0.22827225130890053, "fcm_dpo/beta": 0.02691740356385708, "fcm_dpo/delta": 0.008004628121852875, "fcm_dpo/margin": 19.709096908569336, "fcm_dpo/q_t": 0.3899995982646942, "grad_norm": 89.63356018066406, "learning_rate": 4.7625351138769166e-07, "logits/chosen": -0.7978358268737793, "logits/rejected": -0.796513020992279, "logps/chosen": -333.0800476074219, "logps/ref_chosen": -325.9248046875, "logps/ref_rejected": -279.15423583984375, "logps/rejected": -306.0185852050781, "loss": 4.3147, "margin_dpo/margin_mean": 19.709096908569336, "margin_dpo/margin_std": 30.691986083984375, "step": 109 }, { "epoch": 0.23036649214659685, "fcm_dpo/beta": 0.028422407805919647, "fcm_dpo/delta": 0.0776296854019165, "fcm_dpo/margin": 18.461952209472656, "fcm_dpo/q_t": 0.392780601978302, "grad_norm": 86.87859344482422, "learning_rate": 4.75468677825789e-07, "logits/chosen": -0.7915902137756348, "logits/rejected": -0.780044674873352, "logps/chosen": -281.5834655761719, "logps/ref_chosen": -274.439208984375, "logps/ref_rejected": -260.0552062988281, "logps/rejected": -285.6614074707031, "loss": 4.4874, "margin_dpo/margin_mean": 18.46194839477539, "margin_dpo/margin_std": 32.64317321777344, "step": 110 }, { "epoch": 0.2324607329842932, "fcm_dpo/beta": 0.029570797458291054, "fcm_dpo/delta": 0.029479999095201492, "fcm_dpo/margin": 19.259639739990234, "fcm_dpo/q_t": 0.38573166728019714, "grad_norm": 94.41033935546875, "learning_rate": 4.7467175306295647e-07, "logits/chosen": -0.8291243314743042, "logits/rejected": -0.8090481162071228, "logps/chosen": -336.6731262207031, "logps/ref_chosen": -329.2361755371094, "logps/ref_rejected": -287.82830810546875, "logps/rejected": -314.52490234375, "loss": 4.4329, "margin_dpo/margin_mean": 19.259639739990234, "margin_dpo/margin_std": 32.591739654541016, "step": 111 }, { "epoch": 0.23455497382198953, "fcm_dpo/beta": 0.028911547735333443, "fcm_dpo/delta": -0.026821672916412354, "fcm_dpo/margin": 12.475048065185547, "fcm_dpo/q_t": 0.4273641109466553, "grad_norm": 117.3414077758789, "learning_rate": 4.7386277983585053e-07, "logits/chosen": -0.7319104671478271, "logits/rejected": -0.7623211145401001, "logps/chosen": -269.155517578125, "logps/ref_chosen": -257.0593566894531, "logps/ref_rejected": -272.9595031738281, "logps/rejected": -297.53070068359375, "loss": 5.1793, "margin_dpo/margin_mean": 12.475048065185547, "margin_dpo/margin_std": 33.321533203125, "step": 112 }, { "epoch": 0.23664921465968586, "fcm_dpo/beta": 0.02749396488070488, "fcm_dpo/delta": -0.030711829662322998, "fcm_dpo/margin": 22.709571838378906, "fcm_dpo/q_t": 0.3759039044380188, "grad_norm": 89.33954620361328, "learning_rate": 4.7304180152725024e-07, "logits/chosen": -0.7936510443687439, "logits/rejected": -0.7988536953926086, "logps/chosen": -294.9634094238281, "logps/ref_chosen": -286.0416564941406, "logps/ref_rejected": -270.374267578125, "logps/rejected": -302.00555419921875, "loss": 4.2854, "margin_dpo/margin_mean": 22.709571838378906, "margin_dpo/margin_std": 36.09437942504883, "step": 113 }, { "epoch": 0.2387434554973822, "fcm_dpo/beta": 0.029794633388519287, "fcm_dpo/delta": 0.07771297544240952, "fcm_dpo/margin": 12.448982238769531, "fcm_dpo/q_t": 0.42108646035194397, "grad_norm": 107.05913543701172, "learning_rate": 4.7220886216373085e-07, "logits/chosen": -0.83903968334198, "logits/rejected": -0.841633677482605, "logps/chosen": -271.00335693359375, "logps/ref_chosen": -260.0084533691406, "logps/ref_rejected": -246.67190551757812, "logps/rejected": -270.11578369140625, "loss": 5.0306, "margin_dpo/margin_mean": 12.448982238769531, "margin_dpo/margin_std": 31.104576110839844, "step": 114 }, { "epoch": 0.24083769633507854, "fcm_dpo/beta": 0.029507935047149658, "fcm_dpo/delta": 0.016908658668398857, "fcm_dpo/margin": 12.143805503845215, "fcm_dpo/q_t": 0.4331102967262268, "grad_norm": 120.61966705322266, "learning_rate": 4.7136400641330245e-07, "logits/chosen": -0.8338419795036316, "logits/rejected": -0.7967959642410278, "logps/chosen": -310.32476806640625, "logps/ref_chosen": -299.4229736328125, "logps/ref_rejected": -272.1186828613281, "logps/rejected": -295.1643371582031, "loss": 5.1519, "margin_dpo/margin_mean": 12.143804550170898, "margin_dpo/margin_std": 34.1319465637207, "step": 115 }, { "epoch": 0.24293193717277486, "fcm_dpo/beta": 0.03158475458621979, "fcm_dpo/delta": 0.08304879814386368, "fcm_dpo/margin": 16.471763610839844, "fcm_dpo/q_t": 0.39313048124313354, "grad_norm": 96.66343688964844, "learning_rate": 4.70507279583015e-07, "logits/chosen": -0.8481428027153015, "logits/rejected": -0.8136316537857056, "logps/chosen": -284.63897705078125, "logps/ref_chosen": -279.263916015625, "logps/ref_rejected": -253.6192169189453, "logps/rejected": -275.46600341796875, "loss": 4.5386, "margin_dpo/margin_mean": 16.471763610839844, "margin_dpo/margin_std": 29.921730041503906, "step": 116 }, { "epoch": 0.2450261780104712, "fcm_dpo/beta": 0.03184635192155838, "fcm_dpo/delta": -0.10544593632221222, "fcm_dpo/margin": 17.663230895996094, "fcm_dpo/q_t": 0.3876641094684601, "grad_norm": 113.3866958618164, "learning_rate": 4.6963872761652834e-07, "logits/chosen": -0.7901442646980286, "logits/rejected": -0.7954122424125671, "logps/chosen": -266.06890869140625, "logps/ref_chosen": -259.2248840332031, "logps/ref_rejected": -229.3042755126953, "logps/rejected": -253.81155395507812, "loss": 4.4728, "margin_dpo/margin_mean": 17.663230895996094, "margin_dpo/margin_std": 28.427824020385742, "step": 117 }, { "epoch": 0.24712041884816754, "fcm_dpo/beta": 0.029734350740909576, "fcm_dpo/delta": -0.03558747097849846, "fcm_dpo/margin": 19.60186004638672, "fcm_dpo/q_t": 0.38436776399612427, "grad_norm": 113.9665756225586, "learning_rate": 4.687583970916486e-07, "logits/chosen": -0.7948500514030457, "logits/rejected": -0.7873266935348511, "logps/chosen": -276.48236083984375, "logps/ref_chosen": -267.0707092285156, "logps/ref_rejected": -272.7322082519531, "logps/rejected": -301.7456970214844, "loss": 4.4767, "margin_dpo/margin_mean": 19.601858139038086, "margin_dpo/margin_std": 34.46326446533203, "step": 118 }, { "epoch": 0.24921465968586387, "fcm_dpo/beta": 0.029399575665593147, "fcm_dpo/delta": -0.0034819915890693665, "fcm_dpo/margin": 15.70901870727539, "fcm_dpo/q_t": 0.4097801744937897, "grad_norm": 116.46439361572266, "learning_rate": 4.6786633521783005e-07, "logits/chosen": -0.8555842638015747, "logits/rejected": -0.8587056398391724, "logps/chosen": -336.5263671875, "logps/ref_chosen": -324.6766357421875, "logps/ref_rejected": -306.0322265625, "logps/rejected": -333.5909423828125, "loss": 4.8796, "margin_dpo/margin_mean": 15.709016799926758, "margin_dpo/margin_std": 34.084205627441406, "step": 119 }, { "epoch": 0.2513089005235602, "fcm_dpo/beta": 0.029722902923822403, "fcm_dpo/delta": 0.015680911019444466, "fcm_dpo/margin": 15.187647819519043, "fcm_dpo/q_t": 0.41209471225738525, "grad_norm": 98.18533325195312, "learning_rate": 4.669625898336438e-07, "logits/chosen": -0.8075263500213623, "logits/rejected": -0.8280918598175049, "logps/chosen": -324.54205322265625, "logps/ref_chosen": -315.2617492675781, "logps/ref_rejected": -265.32501220703125, "logps/rejected": -289.79296875, "loss": 4.8908, "margin_dpo/margin_mean": 15.187647819519043, "margin_dpo/margin_std": 33.20510482788086, "step": 120 }, { "epoch": 0.2534031413612565, "fcm_dpo/beta": 0.03124306909739971, "fcm_dpo/delta": 0.14657826721668243, "fcm_dpo/margin": 12.737668991088867, "fcm_dpo/q_t": 0.42548656463623047, "grad_norm": 110.32199096679688, "learning_rate": 4.6604720940421207e-07, "logits/chosen": -0.8259115815162659, "logits/rejected": -0.8430719971656799, "logps/chosen": -235.69189453125, "logps/ref_chosen": -222.99609375, "logps/ref_rejected": -226.92860412597656, "logps/rejected": -252.36209106445312, "loss": 5.0129, "margin_dpo/margin_mean": 12.737669944763184, "margin_dpo/margin_std": 32.302677154541016, "step": 121 }, { "epoch": 0.2554973821989529, "fcm_dpo/beta": 0.032185669988393784, "fcm_dpo/delta": -0.019368404522538185, "fcm_dpo/margin": 15.059699058532715, "fcm_dpo/q_t": 0.40549296140670776, "grad_norm": 117.572509765625, "learning_rate": 4.651202430186092e-07, "logits/chosen": -0.8742939829826355, "logits/rejected": -0.8380413055419922, "logps/chosen": -288.3365783691406, "logps/ref_chosen": -276.02630615234375, "logps/ref_rejected": -277.97418212890625, "logps/rejected": -305.3441162109375, "loss": 4.9239, "margin_dpo/margin_mean": 15.059700012207031, "margin_dpo/margin_std": 34.5055046081543, "step": 122 }, { "epoch": 0.25759162303664923, "fcm_dpo/beta": 0.03192441910505295, "fcm_dpo/delta": -0.06708841025829315, "fcm_dpo/margin": 20.746381759643555, "fcm_dpo/q_t": 0.3659403324127197, "grad_norm": 114.86331176757812, "learning_rate": 4.6418174038722924e-07, "logits/chosen": -0.7943709492683411, "logits/rejected": -0.794708788394928, "logps/chosen": -334.998291015625, "logps/ref_chosen": -328.1546325683594, "logps/ref_rejected": -280.6911315917969, "logps/rejected": -308.28118896484375, "loss": 4.2403, "margin_dpo/margin_mean": 20.746379852294922, "margin_dpo/margin_std": 32.13544464111328, "step": 123 }, { "epoch": 0.25968586387434556, "fcm_dpo/beta": 0.030819490551948547, "fcm_dpo/delta": 0.03179997205734253, "fcm_dpo/margin": 16.770048141479492, "fcm_dpo/q_t": 0.39261382818222046, "grad_norm": 99.67831420898438, "learning_rate": 4.6323175183912023e-07, "logits/chosen": -0.8295610547065735, "logits/rejected": -0.8021270036697388, "logps/chosen": -285.3762512207031, "logps/ref_chosen": -275.6961975097656, "logps/ref_rejected": -225.361572265625, "logps/rejected": -251.8116455078125, "loss": 4.5914, "margin_dpo/margin_mean": 16.770048141479492, "margin_dpo/margin_std": 29.99167251586914, "step": 124 }, { "epoch": 0.2617801047120419, "fcm_dpo/beta": 0.03114517405629158, "fcm_dpo/delta": -0.03549438342452049, "fcm_dpo/margin": 16.479698181152344, "fcm_dpo/q_t": 0.40479522943496704, "grad_norm": 124.4884033203125, "learning_rate": 4.6227032831928483e-07, "logits/chosen": -0.7944302558898926, "logits/rejected": -0.7552446722984314, "logps/chosen": -288.5038757324219, "logps/ref_chosen": -278.06976318359375, "logps/ref_rejected": -265.63873291015625, "logps/rejected": -292.5525207519531, "loss": 4.8872, "margin_dpo/margin_mean": 16.479698181152344, "margin_dpo/margin_std": 36.26911544799805, "step": 125 }, { "epoch": 0.2638743455497382, "fcm_dpo/beta": 0.03014766052365303, "fcm_dpo/delta": -0.020349113270640373, "fcm_dpo/margin": 18.311885833740234, "fcm_dpo/q_t": 0.38548335433006287, "grad_norm": 109.14166259765625, "learning_rate": 4.612975213859487e-07, "logits/chosen": -0.8047983646392822, "logits/rejected": -0.8239343166351318, "logps/chosen": -329.813232421875, "logps/ref_chosen": -321.3960876464844, "logps/ref_rejected": -285.37664794921875, "logps/rejected": -312.10565185546875, "loss": 4.4441, "margin_dpo/margin_mean": 18.311885833740234, "margin_dpo/margin_std": 30.9145450592041, "step": 126 }, { "epoch": 0.26596858638743454, "fcm_dpo/beta": 0.030198298394680023, "fcm_dpo/delta": -0.08730512112379074, "fcm_dpo/margin": 20.74812889099121, "fcm_dpo/q_t": 0.37573808431625366, "grad_norm": 107.66555786132812, "learning_rate": 4.603133832077953e-07, "logits/chosen": -0.87255859375, "logits/rejected": -0.8262636661529541, "logps/chosen": -313.08575439453125, "logps/ref_chosen": -306.55877685546875, "logps/ref_rejected": -274.8651428222656, "logps/rejected": -302.1402587890625, "loss": 4.3108, "margin_dpo/margin_mean": 20.748130798339844, "margin_dpo/margin_std": 31.776979446411133, "step": 127 }, { "epoch": 0.2680628272251309, "fcm_dpo/beta": 0.027010329067707062, "fcm_dpo/delta": -0.06802891194820404, "fcm_dpo/margin": 22.768661499023438, "fcm_dpo/q_t": 0.37576210498809814, "grad_norm": 87.82177734375, "learning_rate": 4.5931796656116837e-07, "logits/chosen": -0.7685502171516418, "logits/rejected": -0.7739553451538086, "logps/chosen": -268.2386779785156, "logps/ref_chosen": -265.3973693847656, "logps/ref_rejected": -250.9737548828125, "logps/rejected": -276.5837097167969, "loss": 4.2397, "margin_dpo/margin_mean": 22.768665313720703, "margin_dpo/margin_std": 34.84334945678711, "step": 128 }, { "epoch": 0.27015706806282724, "fcm_dpo/beta": 0.027978552505373955, "fcm_dpo/delta": 0.057149242609739304, "fcm_dpo/margin": 19.382301330566406, "fcm_dpo/q_t": 0.39152759313583374, "grad_norm": 95.93099212646484, "learning_rate": 4.5831132482724193e-07, "logits/chosen": -0.7922682166099548, "logits/rejected": -0.795950710773468, "logps/chosen": -307.2889404296875, "logps/ref_chosen": -303.158447265625, "logps/ref_rejected": -275.9891052246094, "logps/rejected": -299.50189208984375, "loss": 4.457, "margin_dpo/margin_mean": 19.38229751586914, "margin_dpo/margin_std": 33.825496673583984, "step": 129 }, { "epoch": 0.27225130890052357, "fcm_dpo/beta": 0.028778987005352974, "fcm_dpo/delta": 0.07279841601848602, "fcm_dpo/margin": 16.71417999267578, "fcm_dpo/q_t": 0.401623010635376, "grad_norm": 103.26705932617188, "learning_rate": 4.5729351198915705e-07, "logits/chosen": -0.7664986848831177, "logits/rejected": -0.8091428875923157, "logps/chosen": -292.35394287109375, "logps/ref_chosen": -286.4073486328125, "logps/ref_rejected": -294.38665771484375, "logps/rejected": -317.0474853515625, "loss": 4.6181, "margin_dpo/margin_mean": 16.714181900024414, "margin_dpo/margin_std": 32.14228820800781, "step": 130 }, { "epoch": 0.2743455497382199, "fcm_dpo/beta": 0.03138697147369385, "fcm_dpo/delta": 0.12776511907577515, "fcm_dpo/margin": 15.206416130065918, "fcm_dpo/q_t": 0.40508803725242615, "grad_norm": 126.76692199707031, "learning_rate": 4.5626458262912735e-07, "logits/chosen": -0.8393828868865967, "logits/rejected": -0.7898960113525391, "logps/chosen": -317.5235290527344, "logps/ref_chosen": -311.5650634765625, "logps/ref_rejected": -291.62432861328125, "logps/rejected": -312.7891845703125, "loss": 4.7479, "margin_dpo/margin_mean": 15.206417083740234, "margin_dpo/margin_std": 31.15882110595703, "step": 131 }, { "epoch": 0.2764397905759162, "fcm_dpo/beta": 0.03294968605041504, "fcm_dpo/delta": -0.09555768966674805, "fcm_dpo/margin": 20.942249298095703, "fcm_dpo/q_t": 0.37162911891937256, "grad_norm": 136.78445434570312, "learning_rate": 4.5522459192551166e-07, "logits/chosen": -0.8077597618103027, "logits/rejected": -0.7918823957443237, "logps/chosen": -272.0758972167969, "logps/ref_chosen": -270.0818176269531, "logps/ref_rejected": -284.3084411621094, "logps/rejected": -307.24481201171875, "loss": 4.3352, "margin_dpo/margin_mean": 20.942249298095703, "margin_dpo/margin_std": 33.96846389770508, "step": 132 }, { "epoch": 0.27853403141361255, "fcm_dpo/beta": 0.030396468937397003, "fcm_dpo/delta": -0.021258918568491936, "fcm_dpo/margin": 18.287437438964844, "fcm_dpo/q_t": 0.38483449816703796, "grad_norm": 97.30946350097656, "learning_rate": 4.541735956498554e-07, "logits/chosen": -0.8339589834213257, "logits/rejected": -0.841139018535614, "logps/chosen": -287.4236145019531, "logps/ref_chosen": -285.6213684082031, "logps/ref_rejected": -251.19386291503906, "logps/rejected": -271.2835693359375, "loss": 4.4624, "margin_dpo/margin_mean": 18.287437438964844, "margin_dpo/margin_std": 30.66234588623047, "step": 133 }, { "epoch": 0.2806282722513089, "fcm_dpo/beta": 0.03080589883029461, "fcm_dpo/delta": 0.050978198647499084, "fcm_dpo/margin": 15.381253242492676, "fcm_dpo/q_t": 0.40036991238594055, "grad_norm": 106.52774810791016, "learning_rate": 4.5311165016389914e-07, "logits/chosen": -0.8478070497512817, "logits/rejected": -0.8514746427536011, "logps/chosen": -328.1652526855469, "logps/ref_chosen": -318.92083740234375, "logps/ref_rejected": -293.1894836425781, "logps/rejected": -317.8151550292969, "loss": 4.662, "margin_dpo/margin_mean": 15.381254196166992, "margin_dpo/margin_std": 29.16480255126953, "step": 134 }, { "epoch": 0.28272251308900526, "fcm_dpo/beta": 0.031100064516067505, "fcm_dpo/delta": -0.0012684855610132217, "fcm_dpo/margin": 17.726974487304688, "fcm_dpo/q_t": 0.38483473658561707, "grad_norm": 133.8107147216797, "learning_rate": 4.520388124165564e-07, "logits/chosen": -0.7306185364723206, "logits/rejected": -0.7757068872451782, "logps/chosen": -296.7521667480469, "logps/ref_chosen": -292.8217468261719, "logps/ref_rejected": -269.2896728515625, "logps/rejected": -290.9470520019531, "loss": 4.365, "margin_dpo/margin_mean": 17.726974487304688, "margin_dpo/margin_std": 27.901412963867188, "step": 135 }, { "epoch": 0.2848167539267016, "fcm_dpo/beta": 0.03128836303949356, "fcm_dpo/delta": 0.02984962984919548, "fcm_dpo/margin": 16.618209838867188, "fcm_dpo/q_t": 0.3998725414276123, "grad_norm": 131.5796356201172, "learning_rate": 4.5095513994085974e-07, "logits/chosen": -0.7901206612586975, "logits/rejected": -0.7873492240905762, "logps/chosen": -278.5729064941406, "logps/ref_chosen": -272.8525390625, "logps/ref_rejected": -252.68202209472656, "logps/rejected": -275.0205993652344, "loss": 4.711, "margin_dpo/margin_mean": 16.618209838867188, "margin_dpo/margin_std": 32.85752487182617, "step": 136 }, { "epoch": 0.2869109947643979, "fcm_dpo/beta": 0.03243596479296684, "fcm_dpo/delta": 0.0307313185185194, "fcm_dpo/margin": 15.30479621887207, "fcm_dpo/q_t": 0.4031601846218109, "grad_norm": 127.34578704833984, "learning_rate": 4.498606908508753e-07, "logits/chosen": -0.8465839624404907, "logits/rejected": -0.8333037495613098, "logps/chosen": -308.864013671875, "logps/ref_chosen": -300.7522277832031, "logps/ref_rejected": -286.1935119628906, "logps/rejected": -309.6100769042969, "loss": 4.7322, "margin_dpo/margin_mean": 15.30479621887207, "margin_dpo/margin_std": 30.956771850585938, "step": 137 }, { "epoch": 0.28900523560209423, "fcm_dpo/beta": 0.032454121857881546, "fcm_dpo/delta": 0.005412563681602478, "fcm_dpo/margin": 18.291305541992188, "fcm_dpo/q_t": 0.3903680145740509, "grad_norm": 106.79438781738281, "learning_rate": 4.487555238385862e-07, "logits/chosen": -0.7613782286643982, "logits/rejected": -0.7434461712837219, "logps/chosen": -294.6986083984375, "logps/ref_chosen": -288.9369812011719, "logps/ref_rejected": -263.7076416015625, "logps/rejected": -287.7606201171875, "loss": 4.5596, "margin_dpo/margin_mean": 18.291303634643555, "margin_dpo/margin_std": 34.35835266113281, "step": 138 }, { "epoch": 0.29109947643979056, "fcm_dpo/beta": 0.03308243677020073, "fcm_dpo/delta": 0.03304573893547058, "fcm_dpo/margin": 13.005290985107422, "fcm_dpo/q_t": 0.41861557960510254, "grad_norm": 116.88390350341797, "learning_rate": 4.476396981707453e-07, "logits/chosen": -0.7813708782196045, "logits/rejected": -0.8129632472991943, "logps/chosen": -274.0767517089844, "logps/ref_chosen": -270.0443115234375, "logps/ref_rejected": -267.3226013183594, "logps/rejected": -284.3603210449219, "loss": 4.901, "margin_dpo/margin_mean": 13.005290031433105, "margin_dpo/margin_std": 29.456113815307617, "step": 139 }, { "epoch": 0.2931937172774869, "fcm_dpo/beta": 0.03421860188245773, "fcm_dpo/delta": 0.0003454945981502533, "fcm_dpo/margin": 17.4409236907959, "fcm_dpo/q_t": 0.3806909918785095, "grad_norm": 129.08346557617188, "learning_rate": 4.4651327368569684e-07, "logits/chosen": -0.8671438694000244, "logits/rejected": -0.841330349445343, "logps/chosen": -287.3354187011719, "logps/ref_chosen": -282.9555969238281, "logps/ref_rejected": -251.17181396484375, "logps/rejected": -272.9925537109375, "loss": 4.2991, "margin_dpo/margin_mean": 17.440921783447266, "margin_dpo/margin_std": 27.346405029296875, "step": 140 }, { "epoch": 0.29528795811518327, "fcm_dpo/beta": 0.03223487734794617, "fcm_dpo/delta": -0.05671250820159912, "fcm_dpo/margin": 20.16065216064453, "fcm_dpo/q_t": 0.36774590611457825, "grad_norm": 107.1982192993164, "learning_rate": 4.453763107901675e-07, "logits/chosen": -0.7901620268821716, "logits/rejected": -0.7895568013191223, "logps/chosen": -298.8914794921875, "logps/ref_chosen": -296.3001708984375, "logps/ref_rejected": -279.8486633300781, "logps/rejected": -302.6006164550781, "loss": 4.2532, "margin_dpo/margin_mean": 20.160648345947266, "margin_dpo/margin_std": 31.275304794311523, "step": 141 }, { "epoch": 0.2973821989528796, "fcm_dpo/beta": 0.031261567026376724, "fcm_dpo/delta": -0.0013711625942960382, "fcm_dpo/margin": 16.695674896240234, "fcm_dpo/q_t": 0.40020960569381714, "grad_norm": 104.9339370727539, "learning_rate": 4.4422887045602674e-07, "logits/chosen": -0.8087915778160095, "logits/rejected": -0.8152974843978882, "logps/chosen": -304.1774597167969, "logps/ref_chosen": -300.56585693359375, "logps/ref_rejected": -231.43316650390625, "logps/rejected": -251.74044799804688, "loss": 4.7019, "margin_dpo/margin_mean": 16.695674896240234, "margin_dpo/margin_std": 33.27724075317383, "step": 142 }, { "epoch": 0.2994764397905759, "fcm_dpo/beta": 0.03194243088364601, "fcm_dpo/delta": -0.005147319287061691, "fcm_dpo/margin": 18.84752655029297, "fcm_dpo/q_t": 0.37769022583961487, "grad_norm": 109.19286346435547, "learning_rate": 4.4307101421701755e-07, "logits/chosen": -0.7999674677848816, "logits/rejected": -0.7869732975959778, "logps/chosen": -300.0097961425781, "logps/ref_chosen": -296.73236083984375, "logps/ref_rejected": -266.45257568359375, "logps/rejected": -288.5776062011719, "loss": 4.3005, "margin_dpo/margin_mean": 18.847524642944336, "margin_dpo/margin_std": 29.313934326171875, "step": 143 }, { "epoch": 0.30157068062827225, "fcm_dpo/beta": 0.03147399052977562, "fcm_dpo/delta": 0.05607675388455391, "fcm_dpo/margin": 16.06841278076172, "fcm_dpo/q_t": 0.4018367528915405, "grad_norm": 109.13096618652344, "learning_rate": 4.419028041654559e-07, "logits/chosen": -0.8504543304443359, "logits/rejected": -0.8398086428642273, "logps/chosen": -302.9305419921875, "logps/ref_chosen": -298.843994140625, "logps/ref_rejected": -266.120849609375, "logps/rejected": -286.2757873535156, "loss": 4.6688, "margin_dpo/margin_mean": 16.06841278076172, "margin_dpo/margin_std": 32.15247344970703, "step": 144 }, { "epoch": 0.3036649214659686, "fcm_dpo/beta": 0.03136536106467247, "fcm_dpo/delta": -0.10698030889034271, "fcm_dpo/margin": 20.321487426757812, "fcm_dpo/q_t": 0.36860162019729614, "grad_norm": 104.23075103759766, "learning_rate": 4.4072430294890166e-07, "logits/chosen": -0.8474912047386169, "logits/rejected": -0.8559509515762329, "logps/chosen": -278.58154296875, "logps/ref_chosen": -275.7528381347656, "logps/ref_rejected": -214.74807739257812, "logps/rejected": -237.8982391357422, "loss": 4.1589, "margin_dpo/margin_mean": 20.321487426757812, "margin_dpo/margin_std": 28.55498504638672, "step": 145 }, { "epoch": 0.3057591623036649, "fcm_dpo/beta": 0.030392833054065704, "fcm_dpo/delta": 0.009405029937624931, "fcm_dpo/margin": 19.343936920166016, "fcm_dpo/q_t": 0.38136640191078186, "grad_norm": 100.81139373779297, "learning_rate": 4.395355737667985e-07, "logits/chosen": -0.817609429359436, "logits/rejected": -0.8185821771621704, "logps/chosen": -284.92779541015625, "logps/ref_chosen": -277.09820556640625, "logps/ref_rejected": -265.41046142578125, "logps/rejected": -292.58392333984375, "loss": 4.2804, "margin_dpo/margin_mean": 19.343936920166016, "margin_dpo/margin_std": 29.182607650756836, "step": 146 }, { "epoch": 0.3078534031413613, "fcm_dpo/beta": 0.03225337713956833, "fcm_dpo/delta": 0.03318355232477188, "fcm_dpo/margin": 15.443235397338867, "fcm_dpo/q_t": 0.40001511573791504, "grad_norm": 107.14227294921875, "learning_rate": 4.3833668036708483e-07, "logits/chosen": -0.8150308132171631, "logits/rejected": -0.8176466822624207, "logps/chosen": -299.32708740234375, "logps/ref_chosen": -291.4185791015625, "logps/ref_rejected": -253.43051147460938, "logps/rejected": -276.7822265625, "loss": 4.7888, "margin_dpo/margin_mean": 15.443236351013184, "margin_dpo/margin_std": 31.842870712280273, "step": 147 }, { "epoch": 0.3099476439790576, "fcm_dpo/beta": 0.03284765034914017, "fcm_dpo/delta": 0.06887248158454895, "fcm_dpo/margin": 15.128622055053711, "fcm_dpo/q_t": 0.4055444300174713, "grad_norm": 105.72512817382812, "learning_rate": 4.3712768704277524e-07, "logits/chosen": -0.8757432699203491, "logits/rejected": -0.8821508288383484, "logps/chosen": -244.3941650390625, "logps/ref_chosen": -236.74850463867188, "logps/ref_rejected": -231.4674072265625, "logps/rejected": -254.24166870117188, "loss": 4.7548, "margin_dpo/margin_mean": 15.128622055053711, "margin_dpo/margin_std": 31.366443634033203, "step": 148 }, { "epoch": 0.31204188481675393, "fcm_dpo/beta": 0.032370131462812424, "fcm_dpo/delta": -0.04777521640062332, "fcm_dpo/margin": 19.857650756835938, "fcm_dpo/q_t": 0.3684397339820862, "grad_norm": 107.77214813232422, "learning_rate": 4.3590865862851263e-07, "logits/chosen": -0.8252199292182922, "logits/rejected": -0.8139665126800537, "logps/chosen": -326.12774658203125, "logps/ref_chosen": -319.9284973144531, "logps/ref_rejected": -308.20233154296875, "logps/rejected": -334.2592468261719, "loss": 4.0675, "margin_dpo/margin_mean": 19.857654571533203, "margin_dpo/margin_std": 27.37247085571289, "step": 149 }, { "epoch": 0.31413612565445026, "fcm_dpo/beta": 0.032013505697250366, "fcm_dpo/delta": 0.011951310560107231, "fcm_dpo/margin": 18.362552642822266, "fcm_dpo/q_t": 0.38145214319229126, "grad_norm": 108.20628356933594, "learning_rate": 4.346796604970912e-07, "logits/chosen": -0.8032433390617371, "logits/rejected": -0.7947119474411011, "logps/chosen": -286.0317077636719, "logps/ref_chosen": -276.3182373046875, "logps/ref_rejected": -273.02215576171875, "logps/rejected": -301.0981750488281, "loss": 4.3501, "margin_dpo/margin_mean": 18.362550735473633, "margin_dpo/margin_std": 29.839893341064453, "step": 150 }, { "epoch": 0.3162303664921466, "fcm_dpo/beta": 0.029401123523712158, "fcm_dpo/delta": -0.19104339182376862, "fcm_dpo/margin": 26.312572479248047, "fcm_dpo/q_t": 0.34187808632850647, "grad_norm": 90.58390045166016, "learning_rate": 4.3344075855595097e-07, "logits/chosen": -0.8197271823883057, "logits/rejected": -0.8265554308891296, "logps/chosen": -304.98236083984375, "logps/ref_chosen": -297.31280517578125, "logps/ref_rejected": -266.1003723144531, "logps/rejected": -300.0824890136719, "loss": 3.7524, "margin_dpo/margin_mean": 26.312572479248047, "margin_dpo/margin_std": 30.86597442626953, "step": 151 }, { "epoch": 0.3183246073298429, "fcm_dpo/beta": 0.027028188109397888, "fcm_dpo/delta": -0.029568390920758247, "fcm_dpo/margin": 20.28197479248047, "fcm_dpo/q_t": 0.38840028643608093, "grad_norm": 95.14047241210938, "learning_rate": 4.3219201924364323e-07, "logits/chosen": -0.8347331285476685, "logits/rejected": -0.8374426364898682, "logps/chosen": -276.0089416503906, "logps/ref_chosen": -270.2470397949219, "logps/ref_rejected": -269.7749328613281, "logps/rejected": -295.8188781738281, "loss": 4.3761, "margin_dpo/margin_mean": 20.28197479248047, "margin_dpo/margin_std": 31.723121643066406, "step": 152 }, { "epoch": 0.3204188481675393, "fcm_dpo/beta": 0.0251263827085495, "fcm_dpo/delta": -0.1380881667137146, "fcm_dpo/margin": 28.840253829956055, "fcm_dpo/q_t": 0.34343641996383667, "grad_norm": 84.2206039428711, "learning_rate": 4.309335095262675e-07, "logits/chosen": -0.8285923004150391, "logits/rejected": -0.8218899369239807, "logps/chosen": -283.19036865234375, "logps/ref_chosen": -273.779052734375, "logps/ref_rejected": -280.9530944824219, "logps/rejected": -319.20465087890625, "loss": 3.6856, "margin_dpo/margin_mean": 28.840253829956055, "margin_dpo/margin_std": 30.283130645751953, "step": 153 }, { "epoch": 0.3225130890052356, "fcm_dpo/beta": 0.024389155209064484, "fcm_dpo/delta": -0.017212260514497757, "fcm_dpo/margin": 20.448200225830078, "fcm_dpo/q_t": 0.3947216272354126, "grad_norm": 89.34386444091797, "learning_rate": 4.2966529689388064e-07, "logits/chosen": -0.8547238707542419, "logits/rejected": -0.841791033744812, "logps/chosen": -301.56524658203125, "logps/ref_chosen": -289.9031982421875, "logps/ref_rejected": -261.5166320800781, "logps/rejected": -293.62689208984375, "loss": 4.4855, "margin_dpo/margin_mean": 20.448200225830078, "margin_dpo/margin_std": 34.4425048828125, "step": 154 }, { "epoch": 0.32460732984293195, "fcm_dpo/beta": 0.0245128832757473, "fcm_dpo/delta": 0.04135804995894432, "fcm_dpo/margin": 20.464195251464844, "fcm_dpo/q_t": 0.3978845477104187, "grad_norm": 100.74219512939453, "learning_rate": 4.2838744935687716e-07, "logits/chosen": -0.7908228635787964, "logits/rejected": -0.7928870916366577, "logps/chosen": -299.3333435058594, "logps/ref_chosen": -285.8612060546875, "logps/ref_rejected": -300.1272888183594, "logps/rejected": -334.0636291503906, "loss": 4.4532, "margin_dpo/margin_mean": 20.464195251464844, "margin_dpo/margin_std": 34.932029724121094, "step": 155 }, { "epoch": 0.3267015706806283, "fcm_dpo/beta": 0.024317309260368347, "fcm_dpo/delta": -0.13548636436462402, "fcm_dpo/margin": 29.958675384521484, "fcm_dpo/q_t": 0.3527216911315918, "grad_norm": 76.70926666259766, "learning_rate": 4.271000354423425e-07, "logits/chosen": -0.8232815265655518, "logits/rejected": -0.8277627825737, "logps/chosen": -291.4639587402344, "logps/ref_chosen": -279.0354919433594, "logps/ref_rejected": -244.2198486328125, "logps/rejected": -286.6070251464844, "loss": 3.9783, "margin_dpo/margin_mean": 29.95867919921875, "margin_dpo/margin_std": 39.6899299621582, "step": 156 }, { "epoch": 0.3287958115183246, "fcm_dpo/beta": 0.0227323267608881, "fcm_dpo/delta": 0.0647030621767044, "fcm_dpo/margin": 21.048620223999023, "fcm_dpo/q_t": 0.39951539039611816, "grad_norm": 85.27225494384766, "learning_rate": 4.258031241903777e-07, "logits/chosen": -0.8875189423561096, "logits/rejected": -0.8885977864265442, "logps/chosen": -287.203125, "logps/ref_chosen": -270.830322265625, "logps/ref_rejected": -259.08319091796875, "logps/rejected": -296.504638671875, "loss": 4.4747, "margin_dpo/margin_mean": 21.048620223999023, "margin_dpo/margin_std": 34.90514373779297, "step": 157 }, { "epoch": 0.3308900523560209, "fcm_dpo/beta": 0.02372920699417591, "fcm_dpo/delta": -0.022162986919283867, "fcm_dpo/margin": 23.346778869628906, "fcm_dpo/q_t": 0.38233768939971924, "grad_norm": 88.78839874267578, "learning_rate": 4.2449678515039743e-07, "logits/chosen": -0.8333015441894531, "logits/rejected": -0.822943389415741, "logps/chosen": -306.6914367675781, "logps/ref_chosen": -289.9663391113281, "logps/ref_rejected": -271.335693359375, "logps/rejected": -311.4075927734375, "loss": 4.2801, "margin_dpo/margin_mean": 23.346778869628906, "margin_dpo/margin_std": 34.549774169921875, "step": 158 }, { "epoch": 0.33298429319371725, "fcm_dpo/beta": 0.023554343730211258, "fcm_dpo/delta": 0.04829606041312218, "fcm_dpo/margin": 18.543855667114258, "fcm_dpo/q_t": 0.4130256772041321, "grad_norm": 94.4974136352539, "learning_rate": 4.2318108837739986e-07, "logits/chosen": -0.9109346270561218, "logits/rejected": -0.8718158602714539, "logps/chosen": -340.6222839355469, "logps/ref_chosen": -321.37835693359375, "logps/ref_rejected": -250.45652770996094, "logps/rejected": -288.24432373046875, "loss": 4.8111, "margin_dpo/margin_mean": 18.54385757446289, "margin_dpo/margin_std": 39.477230072021484, "step": 159 }, { "epoch": 0.33507853403141363, "fcm_dpo/beta": 0.023043226450681686, "fcm_dpo/delta": -0.07298657298088074, "fcm_dpo/margin": 28.90218734741211, "fcm_dpo/q_t": 0.3582006096839905, "grad_norm": 87.2422866821289, "learning_rate": 4.218561044282098e-07, "logits/chosen": -0.8484607338905334, "logits/rejected": -0.8577648401260376, "logps/chosen": -291.74420166015625, "logps/ref_chosen": -276.28350830078125, "logps/ref_rejected": -262.7477722167969, "logps/rejected": -307.1106262207031, "loss": 3.88, "margin_dpo/margin_mean": 28.90218734741211, "margin_dpo/margin_std": 33.74877166748047, "step": 160 }, { "epoch": 0.33717277486910996, "fcm_dpo/beta": 0.02312248945236206, "fcm_dpo/delta": -0.00832156278192997, "fcm_dpo/margin": 26.23219108581543, "fcm_dpo/q_t": 0.3745940327644348, "grad_norm": 87.6370620727539, "learning_rate": 4.2052190435769554e-07, "logits/chosen": -0.8637784719467163, "logits/rejected": -0.8568350076675415, "logps/chosen": -329.4377746582031, "logps/ref_chosen": -310.4927978515625, "logps/ref_rejected": -250.25347900390625, "logps/rejected": -295.4306335449219, "loss": 4.238, "margin_dpo/margin_mean": 26.23219108581543, "margin_dpo/margin_std": 39.27847671508789, "step": 161 }, { "epoch": 0.3392670157068063, "fcm_dpo/beta": 0.02204562909901142, "fcm_dpo/delta": -0.00742918998003006, "fcm_dpo/margin": 22.998626708984375, "fcm_dpo/q_t": 0.3925955891609192, "grad_norm": 84.41416931152344, "learning_rate": 4.1917855971495763e-07, "logits/chosen": -0.845470130443573, "logits/rejected": -0.8392305374145508, "logps/chosen": -313.5657653808594, "logps/ref_chosen": -296.1105041503906, "logps/ref_rejected": -253.4247589111328, "logps/rejected": -293.8786926269531, "loss": 4.4313, "margin_dpo/margin_mean": 22.998626708984375, "margin_dpo/margin_std": 37.05625534057617, "step": 162 }, { "epoch": 0.3413612565445026, "fcm_dpo/beta": 0.023259364068508148, "fcm_dpo/delta": 0.029428036883473396, "fcm_dpo/margin": 24.4699764251709, "fcm_dpo/q_t": 0.37805965542793274, "grad_norm": 105.51744842529297, "learning_rate": 4.1782614253949255e-07, "logits/chosen": -0.8878765106201172, "logits/rejected": -0.8931166529655457, "logps/chosen": -313.3696594238281, "logps/ref_chosen": -293.4999084472656, "logps/ref_rejected": -266.7116394042969, "logps/rejected": -311.0514221191406, "loss": 4.2088, "margin_dpo/margin_mean": 24.4699764251709, "margin_dpo/margin_std": 34.383941650390625, "step": 163 }, { "epoch": 0.34345549738219894, "fcm_dpo/beta": 0.02358204685151577, "fcm_dpo/delta": -0.005520589649677277, "fcm_dpo/margin": 25.577049255371094, "fcm_dpo/q_t": 0.3782970905303955, "grad_norm": 93.9336929321289, "learning_rate": 4.164647253573289e-07, "logits/chosen": -0.8413535356521606, "logits/rejected": -0.8617441654205322, "logps/chosen": -291.18902587890625, "logps/ref_chosen": -267.04949951171875, "logps/ref_rejected": -215.9768829345703, "logps/rejected": -265.6934509277344, "loss": 4.2881, "margin_dpo/margin_mean": 25.577049255371094, "margin_dpo/margin_std": 39.729583740234375, "step": 164 }, { "epoch": 0.34554973821989526, "fcm_dpo/beta": 0.023120472207665443, "fcm_dpo/delta": 0.014161716215312481, "fcm_dpo/margin": 20.357099533081055, "fcm_dpo/q_t": 0.40322345495224, "grad_norm": 96.0807113647461, "learning_rate": 4.1509438117713863e-07, "logits/chosen": -0.8848339319229126, "logits/rejected": -0.8598626255989075, "logps/chosen": -296.1021728515625, "logps/ref_chosen": -278.06146240234375, "logps/ref_rejected": -260.4288635253906, "logps/rejected": -298.82666015625, "loss": 4.5232, "margin_dpo/margin_mean": 20.357099533081055, "margin_dpo/margin_std": 35.15179443359375, "step": 165 }, { "epoch": 0.34764397905759165, "fcm_dpo/beta": 0.02365921624004841, "fcm_dpo/delta": 0.07884444296360016, "fcm_dpo/margin": 22.211210250854492, "fcm_dpo/q_t": 0.3956853151321411, "grad_norm": 100.91581726074219, "learning_rate": 4.137151834863213e-07, "logits/chosen": -0.8429009914398193, "logits/rejected": -0.8110395669937134, "logps/chosen": -292.6982727050781, "logps/ref_chosen": -275.9490661621094, "logps/ref_rejected": -232.13473510742188, "logps/rejected": -271.09515380859375, "loss": 4.4726, "margin_dpo/margin_mean": 22.211214065551758, "margin_dpo/margin_std": 38.937843322753906, "step": 166 }, { "epoch": 0.34973821989528797, "fcm_dpo/beta": 0.025131061673164368, "fcm_dpo/delta": 0.01401679590344429, "fcm_dpo/margin": 23.275146484375, "fcm_dpo/q_t": 0.382481187582016, "grad_norm": 97.39994049072266, "learning_rate": 4.123272062470633e-07, "logits/chosen": -0.8488789796829224, "logits/rejected": -0.8377172946929932, "logps/chosen": -299.45098876953125, "logps/ref_chosen": -280.5514221191406, "logps/ref_rejected": -255.2896728515625, "logps/rejected": -297.46441650390625, "loss": 4.4208, "margin_dpo/margin_mean": 23.275146484375, "margin_dpo/margin_std": 39.44821548461914, "step": 167 }, { "epoch": 0.3518324607329843, "fcm_dpo/beta": 0.023984873667359352, "fcm_dpo/delta": -0.06481810659170151, "fcm_dpo/margin": 25.343101501464844, "fcm_dpo/q_t": 0.3719956874847412, "grad_norm": 296.59173583984375, "learning_rate": 4.1093052389237174e-07, "logits/chosen": -0.8263663649559021, "logits/rejected": -0.8027467727661133, "logps/chosen": -334.6053771972656, "logps/ref_chosen": -315.7982177734375, "logps/ref_rejected": -291.48406982421875, "logps/rejected": -335.63433837890625, "loss": 4.4031, "margin_dpo/margin_mean": 25.34310531616211, "margin_dpo/margin_std": 42.009727478027344, "step": 168 }, { "epoch": 0.3539267015706806, "fcm_dpo/beta": 0.022055521607398987, "fcm_dpo/delta": -0.17693692445755005, "fcm_dpo/margin": 34.57185363769531, "fcm_dpo/q_t": 0.34355735778808594, "grad_norm": 79.44908905029297, "learning_rate": 4.0952521132208267e-07, "logits/chosen": -0.8222439885139465, "logits/rejected": -0.8391299843788147, "logps/chosen": -275.7101745605469, "logps/ref_chosen": -261.06427001953125, "logps/ref_rejected": -235.40663146972656, "logps/rejected": -284.6243896484375, "loss": 3.6944, "margin_dpo/margin_mean": 34.57185363769531, "margin_dpo/margin_std": 37.923160552978516, "step": 169 }, { "epoch": 0.35602094240837695, "fcm_dpo/beta": 0.02063800022006035, "fcm_dpo/delta": 0.05438760668039322, "fcm_dpo/margin": 26.59862518310547, "fcm_dpo/q_t": 0.3886667490005493, "grad_norm": 96.43052673339844, "learning_rate": 4.081113438988443e-07, "logits/chosen": -0.7964289784431458, "logits/rejected": -0.7981937527656555, "logps/chosen": -324.3586120605469, "logps/ref_chosen": -308.96722412109375, "logps/ref_rejected": -263.8466796875, "logps/rejected": -305.836669921875, "loss": 4.3947, "margin_dpo/margin_mean": 26.598623275756836, "margin_dpo/margin_std": 45.38837432861328, "step": 170 }, { "epoch": 0.3581151832460733, "fcm_dpo/beta": 0.020479857921600342, "fcm_dpo/delta": -0.09359031170606613, "fcm_dpo/margin": 30.383586883544922, "fcm_dpo/q_t": 0.36607781052589417, "grad_norm": 93.717529296875, "learning_rate": 4.0668899744407567e-07, "logits/chosen": -0.8218968510627747, "logits/rejected": -0.8354977369308472, "logps/chosen": -269.9103698730469, "logps/ref_chosen": -258.8890380859375, "logps/ref_rejected": -262.19140625, "logps/rejected": -303.5963439941406, "loss": 3.9434, "margin_dpo/margin_mean": 30.383586883544922, "margin_dpo/margin_std": 35.17938995361328, "step": 171 }, { "epoch": 0.36020942408376966, "fcm_dpo/beta": 0.021176544949412346, "fcm_dpo/delta": 0.16231057047843933, "fcm_dpo/margin": 16.054502487182617, "fcm_dpo/q_t": 0.42630359530448914, "grad_norm": 96.79520416259766, "learning_rate": 4.0525824823390043e-07, "logits/chosen": -0.8345335125923157, "logits/rejected": -0.853988766670227, "logps/chosen": -352.2255554199219, "logps/ref_chosen": -339.0223388671875, "logps/ref_rejected": -295.78759765625, "logps/rejected": -325.0453186035156, "loss": 4.8841, "margin_dpo/margin_mean": 16.054502487182617, "margin_dpo/margin_std": 36.764705657958984, "step": 172 }, { "epoch": 0.362303664921466, "fcm_dpo/beta": 0.023571645841002464, "fcm_dpo/delta": 0.06804777681827545, "fcm_dpo/margin": 22.532241821289062, "fcm_dpo/q_t": 0.3934495151042938, "grad_norm": 84.94215393066406, "learning_rate": 4.0381917299505686e-07, "logits/chosen": -0.8429185748100281, "logits/rejected": -0.8447529077529907, "logps/chosen": -313.66534423828125, "logps/ref_chosen": -300.1114501953125, "logps/ref_rejected": -273.78460693359375, "logps/rejected": -309.87078857421875, "loss": 4.4559, "margin_dpo/margin_mean": 22.532241821289062, "margin_dpo/margin_std": 38.33403015136719, "step": 173 }, { "epoch": 0.3643979057591623, "fcm_dpo/beta": 0.023526517674326897, "fcm_dpo/delta": -0.038947440683841705, "fcm_dpo/margin": 27.008258819580078, "fcm_dpo/q_t": 0.3678101897239685, "grad_norm": 109.56539154052734, "learning_rate": 4.0237184890078243e-07, "logits/chosen": -0.8134390711784363, "logits/rejected": -0.8019281625747681, "logps/chosen": -348.16650390625, "logps/ref_chosen": -335.0538635253906, "logps/ref_rejected": -257.4646911621094, "logps/rejected": -297.5855407714844, "loss": 4.0733, "margin_dpo/margin_mean": 27.008256912231445, "margin_dpo/margin_std": 36.92762756347656, "step": 174 }, { "epoch": 0.36649214659685864, "fcm_dpo/beta": 0.023315949365496635, "fcm_dpo/delta": -0.02021496742963791, "fcm_dpo/margin": 26.327842712402344, "fcm_dpo/q_t": 0.3810538947582245, "grad_norm": 105.11174011230469, "learning_rate": 4.00916353566676e-07, "logits/chosen": -0.8290956616401672, "logits/rejected": -0.8322280645370483, "logps/chosen": -303.4194030761719, "logps/ref_chosen": -284.39556884765625, "logps/ref_rejected": -283.3876647949219, "logps/rejected": -328.7392883300781, "loss": 4.3747, "margin_dpo/margin_mean": 26.327844619750977, "margin_dpo/margin_std": 42.5020637512207, "step": 175 }, { "epoch": 0.36858638743455496, "fcm_dpo/beta": 0.023749521002173424, "fcm_dpo/delta": 0.04947128891944885, "fcm_dpo/margin": 20.36212158203125, "fcm_dpo/q_t": 0.40329134464263916, "grad_norm": 95.4178695678711, "learning_rate": 3.994527650465352e-07, "logits/chosen": -0.7997909784317017, "logits/rejected": -0.8140876293182373, "logps/chosen": -271.2232360839844, "logps/ref_chosen": -251.81280517578125, "logps/ref_rejected": -242.05328369140625, "logps/rejected": -281.8258361816406, "loss": 4.8359, "margin_dpo/margin_mean": 20.36212158203125, "margin_dpo/margin_std": 43.5911750793457, "step": 176 }, { "epoch": 0.3706806282722513, "fcm_dpo/beta": 0.023227877914905548, "fcm_dpo/delta": -0.04320107400417328, "fcm_dpo/margin": 20.517908096313477, "fcm_dpo/q_t": 0.40150418877601624, "grad_norm": 95.16880798339844, "learning_rate": 3.979811618281705e-07, "logits/chosen": -0.8828033804893494, "logits/rejected": -0.8596282005310059, "logps/chosen": -318.2162780761719, "logps/ref_chosen": -298.6463928222656, "logps/ref_rejected": -295.66534423828125, "logps/rejected": -335.75311279296875, "loss": 4.7767, "margin_dpo/margin_mean": 20.517908096313477, "margin_dpo/margin_std": 41.196895599365234, "step": 177 }, { "epoch": 0.37277486910994767, "fcm_dpo/beta": 0.02242261730134487, "fcm_dpo/delta": -0.029734821990132332, "fcm_dpo/margin": 27.962230682373047, "fcm_dpo/q_t": 0.3739369809627533, "grad_norm": 87.00016021728516, "learning_rate": 3.9650162282919654e-07, "logits/chosen": -0.7981923222541809, "logits/rejected": -0.7972285747528076, "logps/chosen": -301.7319641113281, "logps/ref_chosen": -286.2576599121094, "logps/ref_rejected": -243.97491455078125, "logps/rejected": -287.41143798828125, "loss": 4.1371, "margin_dpo/margin_mean": 27.962230682373047, "margin_dpo/margin_std": 40.20293426513672, "step": 178 }, { "epoch": 0.374869109947644, "fcm_dpo/beta": 0.021783435717225075, "fcm_dpo/delta": -0.04039537161588669, "fcm_dpo/margin": 23.8038272857666, "fcm_dpo/q_t": 0.392859548330307, "grad_norm": 93.1056137084961, "learning_rate": 3.9501422739279953e-07, "logits/chosen": -0.7682486772537231, "logits/rejected": -0.7733548283576965, "logps/chosen": -276.9896240234375, "logps/ref_chosen": -259.737060546875, "logps/ref_rejected": -277.8813171386719, "logps/rejected": -318.9377136230469, "loss": 4.5235, "margin_dpo/margin_mean": 23.80382537841797, "margin_dpo/margin_std": 41.399452209472656, "step": 179 }, { "epoch": 0.3769633507853403, "fcm_dpo/beta": 0.02132536470890045, "fcm_dpo/delta": -0.050105344504117966, "fcm_dpo/margin": 28.025800704956055, "fcm_dpo/q_t": 0.378864049911499, "grad_norm": 80.81954956054688, "learning_rate": 3.935190552834828e-07, "logits/chosen": -0.8179333209991455, "logits/rejected": -0.8522875905036926, "logps/chosen": -284.9200744628906, "logps/ref_chosen": -267.30889892578125, "logps/ref_rejected": -230.4376983642578, "logps/rejected": -276.0746765136719, "loss": 4.1849, "margin_dpo/margin_mean": 28.025800704956055, "margin_dpo/margin_std": 40.71231460571289, "step": 180 }, { "epoch": 0.37905759162303665, "fcm_dpo/beta": 0.021433616057038307, "fcm_dpo/delta": 0.09068157523870468, "fcm_dpo/margin": 23.975404739379883, "fcm_dpo/q_t": 0.39596718549728394, "grad_norm": 105.24143981933594, "learning_rate": 3.920161866827889e-07, "logits/chosen": -0.8095259666442871, "logits/rejected": -0.8213891386985779, "logps/chosen": -321.52716064453125, "logps/ref_chosen": -300.49139404296875, "logps/ref_rejected": -278.98284912109375, "logps/rejected": -323.9939880371094, "loss": 4.5794, "margin_dpo/margin_mean": 23.97540283203125, "margin_dpo/margin_std": 44.497955322265625, "step": 181 }, { "epoch": 0.381151832460733, "fcm_dpo/beta": 0.02187720127403736, "fcm_dpo/delta": -0.12704817950725555, "fcm_dpo/margin": 32.94600296020508, "fcm_dpo/q_t": 0.350864440202713, "grad_norm": 94.21673583984375, "learning_rate": 3.90505702185e-07, "logits/chosen": -0.7871803045272827, "logits/rejected": -0.8218678832054138, "logps/chosen": -297.6783142089844, "logps/ref_chosen": -279.4981689453125, "logps/ref_rejected": -263.6926574707031, "logps/rejected": -314.8188171386719, "loss": 3.8389, "margin_dpo/margin_mean": 32.94600296020508, "margin_dpo/margin_std": 39.00600051879883, "step": 182 }, { "epoch": 0.3832460732984293, "fcm_dpo/beta": 0.020077742636203766, "fcm_dpo/delta": 0.016617465764284134, "fcm_dpo/margin": 29.090024948120117, "fcm_dpo/q_t": 0.38179779052734375, "grad_norm": 83.8680191040039, "learning_rate": 3.889876827928156e-07, "logits/chosen": -0.842463493347168, "logits/rejected": -0.8533914685249329, "logps/chosen": -289.95166015625, "logps/ref_chosen": -271.2057189941406, "logps/ref_rejected": -243.91549682617188, "logps/rejected": -291.75146484375, "loss": 4.2765, "margin_dpo/margin_mean": 29.090024948120117, "margin_dpo/margin_std": 45.345638275146484, "step": 183 }, { "epoch": 0.38534031413612563, "fcm_dpo/beta": 0.018667876720428467, "fcm_dpo/delta": -0.12012484669685364, "fcm_dpo/margin": 37.77571487426758, "fcm_dpo/q_t": 0.35291537642478943, "grad_norm": 92.6821060180664, "learning_rate": 3.874622099130087e-07, "logits/chosen": -0.8658108711242676, "logits/rejected": -0.8556749820709229, "logps/chosen": -331.8802185058594, "logps/ref_chosen": -318.4457702636719, "logps/ref_rejected": -266.640869140625, "logps/rejected": -317.8509826660156, "loss": 3.8875, "margin_dpo/margin_mean": 37.77571487426758, "margin_dpo/margin_std": 46.467491149902344, "step": 184 }, { "epoch": 0.387434554973822, "fcm_dpo/beta": 0.018280260264873505, "fcm_dpo/delta": -0.017129220068454742, "fcm_dpo/margin": 31.00004768371582, "fcm_dpo/q_t": 0.382385790348053, "grad_norm": 80.60724639892578, "learning_rate": 3.859293653520604e-07, "logits/chosen": -0.854312539100647, "logits/rejected": -0.8550869226455688, "logps/chosen": -296.79412841796875, "logps/ref_chosen": -274.308837890625, "logps/ref_rejected": -260.7274169921875, "logps/rejected": -314.2127685546875, "loss": 4.2308, "margin_dpo/margin_mean": 31.000051498413086, "margin_dpo/margin_std": 45.35227584838867, "step": 185 }, { "epoch": 0.38952879581151834, "fcm_dpo/beta": 0.018405750393867493, "fcm_dpo/delta": 0.0071517787873744965, "fcm_dpo/margin": 29.293418884277344, "fcm_dpo/q_t": 0.3844657838344574, "grad_norm": 82.40447998046875, "learning_rate": 3.8438923131177237e-07, "logits/chosen": -0.8595123291015625, "logits/rejected": -0.8700802326202393, "logps/chosen": -321.4861145019531, "logps/ref_chosen": -299.00537109375, "logps/ref_rejected": -274.4014587402344, "logps/rejected": -326.1756286621094, "loss": 4.2669, "margin_dpo/margin_mean": 29.293418884277344, "margin_dpo/margin_std": 41.822120666503906, "step": 186 }, { "epoch": 0.39162303664921466, "fcm_dpo/beta": 0.01949167065322399, "fcm_dpo/delta": 0.11492104828357697, "fcm_dpo/margin": 25.117904663085938, "fcm_dpo/q_t": 0.39704573154449463, "grad_norm": 108.40086364746094, "learning_rate": 3.828418903848593e-07, "logits/chosen": -0.8057087659835815, "logits/rejected": -0.800156831741333, "logps/chosen": -356.62225341796875, "logps/ref_chosen": -329.8253173828125, "logps/ref_rejected": -263.73175048828125, "logps/rejected": -315.6466064453125, "loss": 4.6723, "margin_dpo/margin_mean": 25.117904663085938, "margin_dpo/margin_std": 48.73664093017578, "step": 187 }, { "epoch": 0.393717277486911, "fcm_dpo/beta": 0.01967058703303337, "fcm_dpo/delta": -0.03304888680577278, "fcm_dpo/margin": 29.974576950073242, "fcm_dpo/q_t": 0.38121888041496277, "grad_norm": 85.1061019897461, "learning_rate": 3.812874255505191e-07, "logits/chosen": -0.8419395089149475, "logits/rejected": -0.8400317430496216, "logps/chosen": -289.6829528808594, "logps/ref_chosen": -263.005615234375, "logps/ref_rejected": -247.08668518066406, "logps/rejected": -303.7385559082031, "loss": 4.4389, "margin_dpo/margin_mean": 29.974576950073242, "margin_dpo/margin_std": 50.47289276123047, "step": 188 }, { "epoch": 0.3958115183246073, "fcm_dpo/beta": 0.018633361905813217, "fcm_dpo/delta": -0.060549549758434296, "fcm_dpo/margin": 35.01060104370117, "fcm_dpo/q_t": 0.3630969822406769, "grad_norm": 82.80532836914062, "learning_rate": 3.797259201699833e-07, "logits/chosen": -0.859175443649292, "logits/rejected": -0.8690008521080017, "logps/chosen": -291.63153076171875, "logps/ref_chosen": -272.96038818359375, "logps/ref_rejected": -275.13238525390625, "logps/rejected": -328.81414794921875, "loss": 3.9104, "margin_dpo/margin_mean": 35.01060104370117, "margin_dpo/margin_std": 41.501155853271484, "step": 189 }, { "epoch": 0.39790575916230364, "fcm_dpo/beta": 0.018668456003069878, "fcm_dpo/delta": 0.0034092608839273453, "fcm_dpo/margin": 31.924047470092773, "fcm_dpo/q_t": 0.3739194869995117, "grad_norm": 86.3962173461914, "learning_rate": 3.781574579820464e-07, "logits/chosen": -0.8613168597221375, "logits/rejected": -0.8277738094329834, "logps/chosen": -275.919677734375, "logps/ref_chosen": -257.79754638671875, "logps/ref_rejected": -225.2164306640625, "logps/rejected": -275.2625732421875, "loss": 4.0864, "margin_dpo/margin_mean": 31.924047470092773, "margin_dpo/margin_std": 42.496273040771484, "step": 190 }, { "epoch": 0.4, "fcm_dpo/beta": 0.018610456958413124, "fcm_dpo/delta": -0.014111967757344246, "fcm_dpo/margin": 31.048202514648438, "fcm_dpo/q_t": 0.3805280923843384, "grad_norm": 87.75660705566406, "learning_rate": 3.765821230985757e-07, "logits/chosen": -0.8736047148704529, "logits/rejected": -0.8768740296363831, "logps/chosen": -260.75518798828125, "logps/ref_chosen": -243.8585205078125, "logps/ref_rejected": -245.12136840820312, "logps/rejected": -293.0662536621094, "loss": 4.2548, "margin_dpo/margin_mean": 31.048202514648438, "margin_dpo/margin_std": 46.76060104370117, "step": 191 }, { "epoch": 0.40209424083769635, "fcm_dpo/beta": 0.018671073019504547, "fcm_dpo/delta": 0.009804993867874146, "fcm_dpo/margin": 25.756927490234375, "fcm_dpo/q_t": 0.39886969327926636, "grad_norm": 83.8148193359375, "learning_rate": 3.75e-07, "logits/chosen": -0.825681746006012, "logits/rejected": -0.8136826157569885, "logps/chosen": -289.8357238769531, "logps/ref_chosen": -266.9799499511719, "logps/ref_rejected": -260.1697082519531, "logps/rejected": -308.78240966796875, "loss": 4.5612, "margin_dpo/margin_mean": 25.756927490234375, "margin_dpo/margin_std": 45.346221923828125, "step": 192 }, { "epoch": 0.4041884816753927, "fcm_dpo/beta": 0.018258847296237946, "fcm_dpo/delta": -0.021077796816825867, "fcm_dpo/margin": 30.8725643157959, "fcm_dpo/q_t": 0.38373884558677673, "grad_norm": 91.01241302490234, "learning_rate": 3.734111735307796e-07, "logits/chosen": -0.8784509897232056, "logits/rejected": -0.8553139567375183, "logps/chosen": -308.2591247558594, "logps/ref_chosen": -280.25323486328125, "logps/ref_rejected": -291.0348815917969, "logps/rejected": -349.9133605957031, "loss": 4.3134, "margin_dpo/margin_mean": 30.872562408447266, "margin_dpo/margin_std": 47.93418884277344, "step": 193 }, { "epoch": 0.406282722513089, "fcm_dpo/beta": 0.019326101988554, "fcm_dpo/delta": 0.08527359366416931, "fcm_dpo/margin": 23.81899070739746, "fcm_dpo/q_t": 0.4056922197341919, "grad_norm": 106.6082992553711, "learning_rate": 3.7181572889485623e-07, "logits/chosen": -0.8528724908828735, "logits/rejected": -0.8473402261734009, "logps/chosen": -318.2233581542969, "logps/ref_chosen": -288.13946533203125, "logps/ref_rejected": -251.31529235839844, "logps/rejected": -305.2181701660156, "loss": 4.5547, "margin_dpo/margin_mean": 23.818988800048828, "margin_dpo/margin_std": 42.86112594604492, "step": 194 }, { "epoch": 0.4083769633507853, "fcm_dpo/beta": 0.020995743572711945, "fcm_dpo/delta": 0.09806863218545914, "fcm_dpo/margin": 21.186416625976562, "fcm_dpo/q_t": 0.41219669580459595, "grad_norm": 105.31787872314453, "learning_rate": 3.7021375165108377e-07, "logits/chosen": -0.8652254343032837, "logits/rejected": -0.8719401359558105, "logps/chosen": -305.6102600097656, "logps/ref_chosen": -274.0006408691406, "logps/ref_rejected": -280.22723388671875, "logps/rejected": -333.0232849121094, "loss": 4.659, "margin_dpo/margin_mean": 21.186416625976562, "margin_dpo/margin_std": 41.24464797973633, "step": 195 }, { "epoch": 0.41047120418848165, "fcm_dpo/beta": 0.021107617765665054, "fcm_dpo/delta": -0.02604127675294876, "fcm_dpo/margin": 29.49114227294922, "fcm_dpo/q_t": 0.37593233585357666, "grad_norm": 109.97003173828125, "learning_rate": 3.6860532770864005e-07, "logits/chosen": -0.8447614908218384, "logits/rejected": -0.8570613861083984, "logps/chosen": -298.1605529785156, "logps/ref_chosen": -274.90069580078125, "logps/ref_rejected": -248.7281951904297, "logps/rejected": -301.47918701171875, "loss": 4.2574, "margin_dpo/margin_mean": 29.49114227294922, "margin_dpo/margin_std": 46.1149787902832, "step": 196 }, { "epoch": 0.41256544502617803, "fcm_dpo/beta": 0.02037704363465309, "fcm_dpo/delta": -0.11450602114200592, "fcm_dpo/margin": 34.678550720214844, "fcm_dpo/q_t": 0.35536617040634155, "grad_norm": 116.41548156738281, "learning_rate": 3.6699054332241985e-07, "logits/chosen": -0.8692039847373962, "logits/rejected": -0.8588843941688538, "logps/chosen": -335.359375, "logps/ref_chosen": -309.5348205566406, "logps/ref_rejected": -264.3179931640625, "logps/rejected": -324.8210754394531, "loss": 3.9332, "margin_dpo/margin_mean": 34.678550720214844, "margin_dpo/margin_std": 43.28546142578125, "step": 197 }, { "epoch": 0.41465968586387436, "fcm_dpo/beta": 0.0187942273914814, "fcm_dpo/delta": -0.014170356094837189, "fcm_dpo/margin": 32.54724884033203, "fcm_dpo/q_t": 0.3783041536808014, "grad_norm": 99.18403625488281, "learning_rate": 3.653694850884091e-07, "logits/chosen": -0.8634573221206665, "logits/rejected": -0.841856062412262, "logps/chosen": -326.5914306640625, "logps/ref_chosen": -301.0134582519531, "logps/ref_rejected": -292.84185791015625, "logps/rejected": -350.9670715332031, "loss": 4.31, "margin_dpo/margin_mean": 32.5472526550293, "margin_dpo/margin_std": 51.99414825439453, "step": 198 }, { "epoch": 0.4167539267015707, "fcm_dpo/beta": 0.01868726871907711, "fcm_dpo/delta": -0.040653832256793976, "fcm_dpo/margin": 31.534244537353516, "fcm_dpo/q_t": 0.3784925043582916, "grad_norm": 91.59637451171875, "learning_rate": 3.6374223993904124e-07, "logits/chosen": -0.8504621982574463, "logits/rejected": -0.8154540061950684, "logps/chosen": -290.4877014160156, "logps/ref_chosen": -264.6058654785156, "logps/ref_rejected": -214.9014892578125, "logps/rejected": -272.31756591796875, "loss": 4.1757, "margin_dpo/margin_mean": 31.534244537353516, "margin_dpo/margin_std": 45.6278190612793, "step": 199 }, { "epoch": 0.418848167539267, "fcm_dpo/beta": 0.018357042223215103, "fcm_dpo/delta": 0.04033544659614563, "fcm_dpo/margin": 28.317873001098633, "fcm_dpo/q_t": 0.39727315306663513, "grad_norm": 104.42108917236328, "learning_rate": 3.621088951385353e-07, "logits/chosen": -0.8921913504600525, "logits/rejected": -0.8735958337783813, "logps/chosen": -352.2391662597656, "logps/ref_chosen": -324.1588134765625, "logps/ref_rejected": -277.80218505859375, "logps/rejected": -334.200439453125, "loss": 4.6087, "margin_dpo/margin_mean": 28.31787872314453, "margin_dpo/margin_std": 53.46382522583008, "step": 200 }, { "epoch": 0.418848167539267, "eval_fcm_dpo/beta": 0.018857382237911224, "eval_logits/chosen": -0.8679316639900208, "eval_logits/rejected": -0.8609716296195984, "eval_logps/chosen": -320.89276123046875, "eval_logps/ref_chosen": -287.8267517089844, "eval_logps/ref_rejected": -266.9313659667969, "eval_logps/rejected": -329.564697265625, "eval_loss": 0.5497193336486816, "eval_margin_dpo/margin_mean": 29.56734848022461, "eval_margin_dpo/margin_std": 48.380184173583984, "eval_runtime": 81.4797, "eval_samples_per_second": 24.546, "eval_steps_per_second": 1.534, "step": 200 }, { "epoch": 0.42094240837696334, "fcm_dpo/beta": 0.019249822944402695, "fcm_dpo/delta": -0.007784634828567505, "fcm_dpo/margin": 31.416568756103516, "fcm_dpo/q_t": 0.3744759261608124, "grad_norm": 98.83305358886719, "learning_rate": 3.604695382782159e-07, "logits/chosen": -0.8689364194869995, "logits/rejected": -0.8637883067131042, "logps/chosen": -304.6473388671875, "logps/ref_chosen": -271.49566650390625, "logps/ref_rejected": -245.71414184570312, "logps/rejected": -310.2823791503906, "loss": 4.2192, "margin_dpo/margin_mean": 31.416568756103516, "margin_dpo/margin_std": 46.150325775146484, "step": 201 }, { "epoch": 0.42303664921465967, "fcm_dpo/beta": 0.018503909930586815, "fcm_dpo/delta": -0.041275542229413986, "fcm_dpo/margin": 31.27553939819336, "fcm_dpo/q_t": 0.3803809881210327, "grad_norm": 98.9993667602539, "learning_rate": 3.588242572718162e-07, "logits/chosen": -0.8732012510299683, "logits/rejected": -0.8661995530128479, "logps/chosen": -304.1346435546875, "logps/ref_chosen": -272.0979309082031, "logps/ref_rejected": -235.94805908203125, "logps/rejected": -299.2603454589844, "loss": 4.3415, "margin_dpo/margin_mean": 31.27553939819336, "margin_dpo/margin_std": 48.76369094848633, "step": 202 }, { "epoch": 0.42513089005235605, "fcm_dpo/beta": 0.01837236061692238, "fcm_dpo/delta": 0.05643375590443611, "fcm_dpo/margin": 24.213150024414062, "fcm_dpo/q_t": 0.40584272146224976, "grad_norm": 100.67874145507812, "learning_rate": 3.571731403507635e-07, "logits/chosen": -0.850642204284668, "logits/rejected": -0.8625622391700745, "logps/chosen": -318.47943115234375, "logps/ref_chosen": -280.2221374511719, "logps/ref_rejected": -251.79798889160156, "logps/rejected": -314.2684326171875, "loss": 4.5736, "margin_dpo/margin_mean": 24.213150024414062, "margin_dpo/margin_std": 43.66739273071289, "step": 203 }, { "epoch": 0.4272251308900524, "fcm_dpo/beta": 0.018126487731933594, "fcm_dpo/delta": -0.06936343759298325, "fcm_dpo/margin": 36.58906173706055, "fcm_dpo/q_t": 0.3620806634426117, "grad_norm": 95.67644500732422, "learning_rate": 3.5551627605944746e-07, "logits/chosen": -0.8942813277244568, "logits/rejected": -0.8736305236816406, "logps/chosen": -348.34130859375, "logps/ref_chosen": -318.7960510253906, "logps/ref_rejected": -269.69921875, "logps/rejected": -335.83355712890625, "loss": 3.9389, "margin_dpo/margin_mean": 36.58906173706055, "margin_dpo/margin_std": 46.73650360107422, "step": 204 }, { "epoch": 0.4293193717277487, "fcm_dpo/beta": 0.01768399402499199, "fcm_dpo/delta": -0.04656511917710304, "fcm_dpo/margin": 36.3321418762207, "fcm_dpo/q_t": 0.36954307556152344, "grad_norm": 89.59551239013672, "learning_rate": 3.5385375325047163e-07, "logits/chosen": -0.8388100862503052, "logits/rejected": -0.8084380626678467, "logps/chosen": -316.36187744140625, "logps/ref_chosen": -283.7620544433594, "logps/ref_rejected": -297.69439697265625, "logps/rejected": -366.6263732910156, "loss": 4.04, "margin_dpo/margin_mean": 36.3321418762207, "margin_dpo/margin_std": 49.33777618408203, "step": 205 }, { "epoch": 0.431413612565445, "fcm_dpo/beta": 0.017556358128786087, "fcm_dpo/delta": 0.019014529883861542, "fcm_dpo/margin": 30.180171966552734, "fcm_dpo/q_t": 0.3905620276927948, "grad_norm": 98.87091827392578, "learning_rate": 3.5218566107988867e-07, "logits/chosen": -0.8614488840103149, "logits/rejected": -0.8843433260917664, "logps/chosen": -330.2857971191406, "logps/ref_chosen": -293.66387939453125, "logps/ref_rejected": -291.3056640625, "logps/rejected": -358.1078186035156, "loss": 4.453, "margin_dpo/margin_mean": 30.18017578125, "margin_dpo/margin_std": 50.2305908203125, "step": 206 }, { "epoch": 0.43350785340314135, "fcm_dpo/beta": 0.017863312736153603, "fcm_dpo/delta": 0.02159544639289379, "fcm_dpo/margin": 29.29644775390625, "fcm_dpo/q_t": 0.39205509424209595, "grad_norm": 100.55647277832031, "learning_rate": 3.505120890024195e-07, "logits/chosen": -0.8120275735855103, "logits/rejected": -0.8208277821540833, "logps/chosen": -303.7646484375, "logps/ref_chosen": -270.5350646972656, "logps/ref_rejected": -278.7747497558594, "logps/rejected": -341.30072021484375, "loss": 4.5997, "margin_dpo/margin_mean": 29.296445846557617, "margin_dpo/margin_std": 55.331058502197266, "step": 207 }, { "epoch": 0.4356020942408377, "fcm_dpo/beta": 0.01773180440068245, "fcm_dpo/delta": -0.038446761667728424, "fcm_dpo/margin": 35.76897430419922, "fcm_dpo/q_t": 0.37097251415252686, "grad_norm": 86.00871276855469, "learning_rate": 3.4883312676665534e-07, "logits/chosen": -0.8688828945159912, "logits/rejected": -0.8223684430122375, "logps/chosen": -317.2559509277344, "logps/ref_chosen": -279.582763671875, "logps/ref_rejected": -290.041015625, "logps/rejected": -363.483154296875, "loss": 4.108, "margin_dpo/margin_mean": 35.76897430419922, "margin_dpo/margin_std": 50.78927993774414, "step": 208 }, { "epoch": 0.437696335078534, "fcm_dpo/beta": 0.018073974177241325, "fcm_dpo/delta": 0.07754447311162949, "fcm_dpo/margin": 26.4322566986084, "fcm_dpo/q_t": 0.40094897150993347, "grad_norm": 106.75226593017578, "learning_rate": 3.4714886441024573e-07, "logits/chosen": -0.7833099365234375, "logits/rejected": -0.7856354713439941, "logps/chosen": -360.97906494140625, "logps/ref_chosen": -318.8725280761719, "logps/ref_rejected": -270.64324951171875, "logps/rejected": -339.18206787109375, "loss": 4.7071, "margin_dpo/margin_mean": 26.4322566986084, "margin_dpo/margin_std": 52.98542785644531, "step": 209 }, { "epoch": 0.4397905759162304, "fcm_dpo/beta": 0.018200790509581566, "fcm_dpo/delta": 0.0208455678075552, "fcm_dpo/margin": 31.805618286132812, "fcm_dpo/q_t": 0.3811089098453522, "grad_norm": 105.60123443603516, "learning_rate": 3.454593922550693e-07, "logits/chosen": -0.8259727358818054, "logits/rejected": -0.8135036826133728, "logps/chosen": -320.53704833984375, "logps/ref_chosen": -283.14031982421875, "logps/ref_rejected": -287.2986755371094, "logps/rejected": -356.50103759765625, "loss": 4.3226, "margin_dpo/margin_mean": 31.805618286132812, "margin_dpo/margin_std": 50.25780487060547, "step": 210 }, { "epoch": 0.4418848167539267, "fcm_dpo/beta": 0.01751658506691456, "fcm_dpo/delta": -0.1294037252664566, "fcm_dpo/margin": 40.80807876586914, "fcm_dpo/q_t": 0.34755995869636536, "grad_norm": 86.59803771972656, "learning_rate": 3.4376480090239047e-07, "logits/chosen": -0.8490579128265381, "logits/rejected": -0.829590916633606, "logps/chosen": -310.4613342285156, "logps/ref_chosen": -276.4228515625, "logps/ref_rejected": -252.40603637695312, "logps/rejected": -327.25262451171875, "loss": 3.7428, "margin_dpo/margin_mean": 40.80807876586914, "margin_dpo/margin_std": 43.20057678222656, "step": 211 }, { "epoch": 0.44397905759162304, "fcm_dpo/beta": 0.01712076924741268, "fcm_dpo/delta": 0.03554587438702583, "fcm_dpo/margin": 28.673137664794922, "fcm_dpo/q_t": 0.39428529143333435, "grad_norm": 94.82775115966797, "learning_rate": 3.4206518122800055e-07, "logits/chosen": -0.8299760818481445, "logits/rejected": -0.8329156041145325, "logps/chosen": -309.0224914550781, "logps/ref_chosen": -271.7055358886719, "logps/ref_rejected": -241.18511962890625, "logps/rejected": -307.17529296875, "loss": 4.4766, "margin_dpo/margin_mean": 28.673141479492188, "margin_dpo/margin_std": 47.071434020996094, "step": 212 }, { "epoch": 0.44607329842931936, "fcm_dpo/beta": 0.017721228301525116, "fcm_dpo/delta": 0.03447887301445007, "fcm_dpo/margin": 29.419530868530273, "fcm_dpo/q_t": 0.3976650834083557, "grad_norm": 103.67435455322266, "learning_rate": 3.403606243773448e-07, "logits/chosen": -0.824676513671875, "logits/rejected": -0.8418750762939453, "logps/chosen": -341.2528076171875, "logps/ref_chosen": -302.2976379394531, "logps/ref_rejected": -303.6202087402344, "logps/rejected": -371.9948425292969, "loss": 4.5048, "margin_dpo/margin_mean": 29.41952896118164, "margin_dpo/margin_std": 53.222564697265625, "step": 213 }, { "epoch": 0.4481675392670157, "fcm_dpo/beta": 0.017746904864907265, "fcm_dpo/delta": -0.004792161285877228, "fcm_dpo/margin": 33.86843490600586, "fcm_dpo/q_t": 0.3744812309741974, "grad_norm": 106.49107360839844, "learning_rate": 3.3865122176063385e-07, "logits/chosen": -0.830028772354126, "logits/rejected": -0.8322975635528564, "logps/chosen": -319.8704528808594, "logps/ref_chosen": -272.13262939453125, "logps/ref_rejected": -294.82354736328125, "logps/rejected": -376.4298095703125, "loss": 4.0953, "margin_dpo/margin_mean": 33.86843490600586, "margin_dpo/margin_std": 43.68943405151367, "step": 214 }, { "epoch": 0.450261780104712, "fcm_dpo/beta": 0.017707258462905884, "fcm_dpo/delta": 0.005800800397992134, "fcm_dpo/margin": 26.85211944580078, "fcm_dpo/q_t": 0.40805721282958984, "grad_norm": 100.26293182373047, "learning_rate": 3.3693706504794243e-07, "logits/chosen": -0.8703227043151855, "logits/rejected": -0.8574371933937073, "logps/chosen": -335.0187072753906, "logps/ref_chosen": -291.3782958984375, "logps/ref_rejected": -261.05792236328125, "logps/rejected": -331.5504150390625, "loss": 4.6754, "margin_dpo/margin_mean": 26.85211944580078, "margin_dpo/margin_std": 53.00439453125, "step": 215 }, { "epoch": 0.4523560209424084, "fcm_dpo/beta": 0.017213810235261917, "fcm_dpo/delta": -0.00978805497288704, "fcm_dpo/margin": 35.283409118652344, "fcm_dpo/q_t": 0.3760201334953308, "grad_norm": 95.87169647216797, "learning_rate": 3.3521824616429284e-07, "logits/chosen": -0.8963602185249329, "logits/rejected": -0.8926108479499817, "logps/chosen": -375.2848815917969, "logps/ref_chosen": -338.50543212890625, "logps/ref_rejected": -305.76104736328125, "logps/rejected": -377.8238830566406, "loss": 4.2837, "margin_dpo/margin_mean": 35.283409118652344, "margin_dpo/margin_std": 54.5643196105957, "step": 216 }, { "epoch": 0.4544502617801047, "fcm_dpo/beta": 0.01671535335481167, "fcm_dpo/delta": -0.13403168320655823, "fcm_dpo/margin": 43.277740478515625, "fcm_dpo/q_t": 0.3528442978858948, "grad_norm": 85.40447235107422, "learning_rate": 3.334948572847253e-07, "logits/chosen": -0.7879663109779358, "logits/rejected": -0.7589735388755798, "logps/chosen": -332.1008605957031, "logps/ref_chosen": -293.5498046875, "logps/ref_rejected": -256.7830810546875, "logps/rejected": -338.6118469238281, "loss": 3.9244, "margin_dpo/margin_mean": 43.277740478515625, "margin_dpo/margin_std": 55.66615295410156, "step": 217 }, { "epoch": 0.45654450261780105, "fcm_dpo/beta": 0.015897490084171295, "fcm_dpo/delta": 0.003095601685345173, "fcm_dpo/margin": 37.482078552246094, "fcm_dpo/q_t": 0.3739127516746521, "grad_norm": 89.20011901855469, "learning_rate": 3.317669908293554e-07, "logits/chosen": -0.8181397914886475, "logits/rejected": -0.8422555923461914, "logps/chosen": -357.48828125, "logps/ref_chosen": -320.579345703125, "logps/ref_rejected": -294.0381164550781, "logps/rejected": -368.42913818359375, "loss": 4.0961, "margin_dpo/margin_mean": 37.482078552246094, "margin_dpo/margin_std": 50.897701263427734, "step": 218 }, { "epoch": 0.4586387434554974, "fcm_dpo/beta": 0.015867143869400024, "fcm_dpo/delta": -0.031162606552243233, "fcm_dpo/margin": 39.53302001953125, "fcm_dpo/q_t": 0.36894065141677856, "grad_norm": 85.81663513183594, "learning_rate": 3.300347394584172e-07, "logits/chosen": -0.8200687170028687, "logits/rejected": -0.846379280090332, "logps/chosen": -301.2198486328125, "logps/ref_chosen": -268.4186096191406, "logps/ref_rejected": -265.7808837890625, "logps/rejected": -338.1151428222656, "loss": 4.1022, "margin_dpo/margin_mean": 39.53302001953125, "margin_dpo/margin_std": 54.08649826049805, "step": 219 }, { "epoch": 0.4607329842931937, "fcm_dpo/beta": 0.015527862124145031, "fcm_dpo/delta": 0.009382149204611778, "fcm_dpo/margin": 38.0103759765625, "fcm_dpo/q_t": 0.3744858205318451, "grad_norm": 86.28771209716797, "learning_rate": 3.2829819606729477e-07, "logits/chosen": -0.8505481481552124, "logits/rejected": -0.8325619697570801, "logps/chosen": -347.1203918457031, "logps/ref_chosen": -312.8864440917969, "logps/ref_rejected": -259.5191955566406, "logps/rejected": -331.7634582519531, "loss": 4.1899, "margin_dpo/margin_mean": 38.0103759765625, "margin_dpo/margin_std": 54.48101043701172, "step": 220 }, { "epoch": 0.46282722513089003, "fcm_dpo/beta": 0.016174497082829475, "fcm_dpo/delta": 0.0048094987869262695, "fcm_dpo/margin": 30.234722137451172, "fcm_dpo/q_t": 0.4027414321899414, "grad_norm": 90.07968139648438, "learning_rate": 3.265574537815398e-07, "logits/chosen": -0.7801198363304138, "logits/rejected": -0.79371577501297, "logps/chosen": -337.3284606933594, "logps/ref_chosen": -300.32586669921875, "logps/ref_rejected": -286.312255859375, "logps/rejected": -353.549560546875, "loss": 4.5684, "margin_dpo/margin_mean": 30.234722137451172, "margin_dpo/margin_std": 54.877281188964844, "step": 221 }, { "epoch": 0.4649214659685864, "fcm_dpo/beta": 0.015347619540989399, "fcm_dpo/delta": -0.009789157658815384, "fcm_dpo/margin": 36.647762298583984, "fcm_dpo/q_t": 0.3810715973377228, "grad_norm": 95.45844268798828, "learning_rate": 3.248126059518784e-07, "logits/chosen": -0.8610984086990356, "logits/rejected": -0.8496800661087036, "logps/chosen": -329.9424743652344, "logps/ref_chosen": -297.1113586425781, "logps/ref_rejected": -235.53146362304688, "logps/rejected": -305.0103454589844, "loss": 4.2022, "margin_dpo/margin_mean": 36.64776611328125, "margin_dpo/margin_std": 50.83029556274414, "step": 222 }, { "epoch": 0.46701570680628274, "fcm_dpo/beta": 0.015580544248223305, "fcm_dpo/delta": -0.005719708278775215, "fcm_dpo/margin": 38.75231170654297, "fcm_dpo/q_t": 0.37368282675743103, "grad_norm": 83.94607543945312, "learning_rate": 3.230637461492043e-07, "logits/chosen": -0.8233493566513062, "logits/rejected": -0.7984543442726135, "logps/chosen": -322.42913818359375, "logps/ref_chosen": -286.41510009765625, "logps/ref_rejected": -241.1181640625, "logps/rejected": -315.884521484375, "loss": 4.139, "margin_dpo/margin_mean": 38.75231170654297, "margin_dpo/margin_std": 53.93544006347656, "step": 223 }, { "epoch": 0.46910994764397906, "fcm_dpo/beta": 0.015308534726500511, "fcm_dpo/delta": -0.07908003032207489, "fcm_dpo/margin": 41.015872955322266, "fcm_dpo/q_t": 0.36839425563812256, "grad_norm": 83.50463104248047, "learning_rate": 3.213109681595612e-07, "logits/chosen": -0.7854145765304565, "logits/rejected": -0.8054001927375793, "logps/chosen": -282.39862060546875, "logps/ref_chosen": -249.49234008789062, "logps/ref_rejected": -233.10752868652344, "logps/rejected": -307.02972412109375, "loss": 3.9926, "margin_dpo/margin_mean": 41.015872955322266, "margin_dpo/margin_std": 51.277225494384766, "step": 224 }, { "epoch": 0.4712041884816754, "fcm_dpo/beta": 0.01455092616379261, "fcm_dpo/delta": 0.04796172305941582, "fcm_dpo/margin": 38.115787506103516, "fcm_dpo/q_t": 0.3868432939052582, "grad_norm": 94.90240478515625, "learning_rate": 3.1955436597911315e-07, "logits/chosen": -0.8136327266693115, "logits/rejected": -0.7935799360275269, "logps/chosen": -353.4432067871094, "logps/ref_chosen": -311.8583679199219, "logps/ref_rejected": -336.8523864746094, "logps/rejected": -416.5530090332031, "loss": 4.3046, "margin_dpo/margin_mean": 38.11579132080078, "margin_dpo/margin_std": 58.1151237487793, "step": 225 }, { "epoch": 0.4732984293193717, "fcm_dpo/beta": 0.015586531721055508, "fcm_dpo/delta": 0.07917778939008713, "fcm_dpo/margin": 33.66019821166992, "fcm_dpo/q_t": 0.3920612037181854, "grad_norm": 80.37389373779297, "learning_rate": 3.1779403380910425e-07, "logits/chosen": -0.8555701971054077, "logits/rejected": -0.8487062454223633, "logps/chosen": -290.4698486328125, "logps/ref_chosen": -252.20123291015625, "logps/ref_rejected": -254.41162109375, "logps/rejected": -326.3404541015625, "loss": 4.3932, "margin_dpo/margin_mean": 33.66019821166992, "margin_dpo/margin_std": 55.53483581542969, "step": 226 }, { "epoch": 0.47539267015706804, "fcm_dpo/beta": 0.01585298217833042, "fcm_dpo/delta": -0.0467713437974453, "fcm_dpo/margin": 40.5180549621582, "fcm_dpo/q_t": 0.36852991580963135, "grad_norm": 112.51945495605469, "learning_rate": 3.160300660508064e-07, "logits/chosen": -0.8035961985588074, "logits/rejected": -0.8008553385734558, "logps/chosen": -324.879150390625, "logps/ref_chosen": -285.25946044921875, "logps/ref_rejected": -261.3220520019531, "logps/rejected": -341.4598083496094, "loss": 4.2243, "margin_dpo/margin_mean": 40.5180549621582, "margin_dpo/margin_std": 60.73136901855469, "step": 227 }, { "epoch": 0.4774869109947644, "fcm_dpo/beta": 0.015510935336351395, "fcm_dpo/delta": -0.051342956721782684, "fcm_dpo/margin": 41.696563720703125, "fcm_dpo/q_t": 0.3670775890350342, "grad_norm": 85.83709716796875, "learning_rate": 3.1426255730045695e-07, "logits/chosen": -0.8358519077301025, "logits/rejected": -0.8068508505821228, "logps/chosen": -348.1343078613281, "logps/ref_chosen": -313.81878662109375, "logps/ref_rejected": -258.07061767578125, "logps/rejected": -334.08270263671875, "loss": 4.0336, "margin_dpo/margin_mean": 41.696563720703125, "margin_dpo/margin_std": 54.898597717285156, "step": 228 }, { "epoch": 0.47958115183246075, "fcm_dpo/beta": 0.014525864273309708, "fcm_dpo/delta": -0.08014161139726639, "fcm_dpo/margin": 46.30763244628906, "fcm_dpo/q_t": 0.3581668734550476, "grad_norm": 171.63238525390625, "learning_rate": 3.1249160234418644e-07, "logits/chosen": -0.8062803149223328, "logits/rejected": -0.8233762979507446, "logps/chosen": -334.2206726074219, "logps/ref_chosen": -291.9707946777344, "logps/ref_rejected": -263.42059326171875, "logps/rejected": -351.9781494140625, "loss": 3.9764, "margin_dpo/margin_mean": 46.30763244628906, "margin_dpo/margin_std": 58.0003662109375, "step": 229 }, { "epoch": 0.4816753926701571, "fcm_dpo/beta": 0.013805052265524864, "fcm_dpo/delta": -0.005476825870573521, "fcm_dpo/margin": 43.70093536376953, "fcm_dpo/q_t": 0.37367361783981323, "grad_norm": 79.83263397216797, "learning_rate": 3.1071729615293424e-07, "logits/chosen": -0.8613869547843933, "logits/rejected": -0.8628825545310974, "logps/chosen": -272.9879150390625, "logps/ref_chosen": -233.2601318359375, "logps/ref_rejected": -238.922119140625, "logps/rejected": -322.3508605957031, "loss": 4.134, "margin_dpo/margin_mean": 43.70093536376953, "margin_dpo/margin_std": 60.809654235839844, "step": 230 }, { "epoch": 0.4837696335078534, "fcm_dpo/beta": 0.014280532486736774, "fcm_dpo/delta": 0.054186657071113586, "fcm_dpo/margin": 34.104496002197266, "fcm_dpo/q_t": 0.39545977115631104, "grad_norm": 89.10991668701172, "learning_rate": 3.0893973387735683e-07, "logits/chosen": -0.8317367434501648, "logits/rejected": -0.8229210376739502, "logps/chosen": -370.91632080078125, "logps/ref_chosen": -322.1551818847656, "logps/ref_rejected": -280.97613525390625, "logps/rejected": -363.8418273925781, "loss": 4.4358, "margin_dpo/margin_mean": 34.10449981689453, "margin_dpo/margin_std": 54.3597412109375, "step": 231 }, { "epoch": 0.48586387434554973, "fcm_dpo/beta": 0.01439041830599308, "fcm_dpo/delta": -0.028247211128473282, "fcm_dpo/margin": 37.74383544921875, "fcm_dpo/q_t": 0.38723382353782654, "grad_norm": 111.32173919677734, "learning_rate": 3.071590108427243e-07, "logits/chosen": -0.8064876198768616, "logits/rejected": -0.7893252372741699, "logps/chosen": -321.2291564941406, "logps/ref_chosen": -271.7437744140625, "logps/ref_rejected": -249.94981384277344, "logps/rejected": -337.1790466308594, "loss": 4.4283, "margin_dpo/margin_mean": 37.74383544921875, "margin_dpo/margin_std": 60.81903839111328, "step": 232 }, { "epoch": 0.48795811518324606, "fcm_dpo/beta": 0.013932683505117893, "fcm_dpo/delta": -0.07837289571762085, "fcm_dpo/margin": 41.53171157836914, "fcm_dpo/q_t": 0.37816399335861206, "grad_norm": 93.736328125, "learning_rate": 3.05375222543809e-07, "logits/chosen": -0.8585054278373718, "logits/rejected": -0.8508076071739197, "logps/chosen": -335.4866943359375, "logps/ref_chosen": -285.3423156738281, "logps/ref_rejected": -266.34320068359375, "logps/rejected": -358.0192565917969, "loss": 4.2142, "margin_dpo/margin_mean": 41.53171157836914, "margin_dpo/margin_std": 59.24362564086914, "step": 233 }, { "epoch": 0.4900523560209424, "fcm_dpo/beta": 0.01374006737023592, "fcm_dpo/delta": 0.036976464092731476, "fcm_dpo/margin": 41.03116226196289, "fcm_dpo/q_t": 0.3843136429786682, "grad_norm": 78.69235229492188, "learning_rate": 3.035884646397637e-07, "logits/chosen": -0.829176664352417, "logits/rejected": -0.812563419342041, "logps/chosen": -345.6146545410156, "logps/ref_chosen": -294.9057312011719, "logps/ref_rejected": -299.37054443359375, "logps/rejected": -391.11065673828125, "loss": 4.4275, "margin_dpo/margin_mean": 41.03116226196289, "margin_dpo/margin_std": 68.48192596435547, "step": 234 }, { "epoch": 0.49214659685863876, "fcm_dpo/beta": 0.01411922462284565, "fcm_dpo/delta": 0.003345828503370285, "fcm_dpo/margin": 42.21276092529297, "fcm_dpo/q_t": 0.37557002902030945, "grad_norm": 109.72699737548828, "learning_rate": 3.017988329489923e-07, "logits/chosen": -0.8408417701721191, "logits/rejected": -0.8409253358840942, "logps/chosen": -343.94256591796875, "logps/ref_chosen": -289.49755859375, "logps/ref_rejected": -247.55076599121094, "logps/rejected": -344.20849609375, "loss": 4.2826, "margin_dpo/margin_mean": 42.2127571105957, "margin_dpo/margin_std": 65.22442626953125, "step": 235 }, { "epoch": 0.4942408376963351, "fcm_dpo/beta": 0.013934805057942867, "fcm_dpo/delta": -0.03408358246088028, "fcm_dpo/margin": 42.292049407958984, "fcm_dpo/q_t": 0.3777884840965271, "grad_norm": 81.88858032226562, "learning_rate": 3.000064234440111e-07, "logits/chosen": -0.8615151643753052, "logits/rejected": -0.8628526926040649, "logps/chosen": -339.2417297363281, "logps/ref_chosen": -288.8846435546875, "logps/ref_rejected": -242.0452880859375, "logps/rejected": -334.6944274902344, "loss": 4.2538, "margin_dpo/margin_mean": 42.292049407958984, "margin_dpo/margin_std": 62.85895538330078, "step": 236 }, { "epoch": 0.4963350785340314, "fcm_dpo/beta": 0.013490064069628716, "fcm_dpo/delta": -0.031569261103868484, "fcm_dpo/margin": 42.74472427368164, "fcm_dpo/q_t": 0.3792114853858948, "grad_norm": 85.20064544677734, "learning_rate": 2.9821133224630223e-07, "logits/chosen": -0.8437389731407166, "logits/rejected": -0.8258963227272034, "logps/chosen": -320.6917419433594, "logps/ref_chosen": -265.47869873046875, "logps/ref_rejected": -267.9891357421875, "logps/rejected": -365.94683837890625, "loss": 4.2127, "margin_dpo/margin_mean": 42.74472427368164, "margin_dpo/margin_std": 61.919334411621094, "step": 237 }, { "epoch": 0.49842931937172774, "fcm_dpo/beta": 0.013200972229242325, "fcm_dpo/delta": 0.01033791620284319, "fcm_dpo/margin": 40.789093017578125, "fcm_dpo/q_t": 0.38993343710899353, "grad_norm": 93.29105377197266, "learning_rate": 2.964136556211588e-07, "logits/chosen": -0.8295376300811768, "logits/rejected": -0.8033552169799805, "logps/chosen": -369.406982421875, "logps/ref_chosen": -312.0026550292969, "logps/ref_rejected": -270.0257263183594, "logps/rejected": -368.21917724609375, "loss": 4.327, "margin_dpo/margin_mean": 40.789093017578125, "margin_dpo/margin_std": 64.44735717773438, "step": 238 }, { "epoch": 0.5005235602094241, "fcm_dpo/beta": 0.013887631706893444, "fcm_dpo/delta": 0.09677696973085403, "fcm_dpo/margin": 36.4874382019043, "fcm_dpo/q_t": 0.3990153670310974, "grad_norm": 100.5346908569336, "learning_rate": 2.946134899725226e-07, "logits/chosen": -0.8349162936210632, "logits/rejected": -0.8748633861541748, "logps/chosen": -320.4220275878906, "logps/ref_chosen": -267.167236328125, "logps/ref_rejected": -275.99468994140625, "logps/rejected": -365.7369384765625, "loss": 4.6325, "margin_dpo/margin_mean": 36.4874382019043, "margin_dpo/margin_std": 70.55658721923828, "step": 239 }, { "epoch": 0.5026178010471204, "fcm_dpo/beta": 0.013791955076158047, "fcm_dpo/delta": -0.048899125307798386, "fcm_dpo/margin": 46.676414489746094, "fcm_dpo/q_t": 0.3687818944454193, "grad_norm": 117.41996765136719, "learning_rate": 2.9281093183781403e-07, "logits/chosen": -0.8881155848503113, "logits/rejected": -0.8836052417755127, "logps/chosen": -337.365478515625, "logps/ref_chosen": -285.9796142578125, "logps/ref_rejected": -256.8258056640625, "logps/rejected": -354.8880615234375, "loss": 4.0805, "margin_dpo/margin_mean": 46.67641830444336, "margin_dpo/margin_std": 65.10855102539062, "step": 240 }, { "epoch": 0.5047120418848168, "fcm_dpo/beta": 0.013698762282729149, "fcm_dpo/delta": 0.03592575713992119, "fcm_dpo/margin": 37.19944381713867, "fcm_dpo/q_t": 0.3960975408554077, "grad_norm": 95.49946594238281, "learning_rate": 2.910060778827554e-07, "logits/chosen": -0.7951388359069824, "logits/rejected": -0.7752350568771362, "logps/chosen": -321.134033203125, "logps/ref_chosen": -261.516845703125, "logps/ref_rejected": -250.2250518798828, "logps/rejected": -347.0416564941406, "loss": 4.529, "margin_dpo/margin_mean": 37.19944381713867, "margin_dpo/margin_std": 65.24166107177734, "step": 241 }, { "epoch": 0.506806282722513, "fcm_dpo/beta": 0.014109227806329727, "fcm_dpo/delta": -0.023093625903129578, "fcm_dpo/margin": 43.914390563964844, "fcm_dpo/q_t": 0.3734211027622223, "grad_norm": 97.55506134033203, "learning_rate": 2.891990248961871e-07, "logits/chosen": -0.8705978393554688, "logits/rejected": -0.8577161431312561, "logps/chosen": -322.5037536621094, "logps/ref_chosen": -270.51397705078125, "logps/ref_rejected": -244.8560791015625, "logps/rejected": -340.76025390625, "loss": 4.1074, "margin_dpo/margin_mean": 43.91438674926758, "margin_dpo/margin_std": 60.707244873046875, "step": 242 }, { "epoch": 0.5089005235602094, "fcm_dpo/beta": 0.013829024508595467, "fcm_dpo/delta": -0.07154600322246552, "fcm_dpo/margin": 48.14585876464844, "fcm_dpo/q_t": 0.36528927087783813, "grad_norm": 109.1782455444336, "learning_rate": 2.873898697848762e-07, "logits/chosen": -0.8485463857650757, "logits/rejected": -0.8369187116622925, "logps/chosen": -370.865234375, "logps/ref_chosen": -324.68206787109375, "logps/ref_rejected": -307.1111755371094, "logps/rejected": -401.440185546875, "loss": 4.0443, "margin_dpo/margin_mean": 48.14585876464844, "margin_dpo/margin_std": 65.65919494628906, "step": 243 }, { "epoch": 0.5109947643979058, "fcm_dpo/beta": 0.012862252071499825, "fcm_dpo/delta": -0.007831787690520287, "fcm_dpo/margin": 47.06397247314453, "fcm_dpo/q_t": 0.3703567385673523, "grad_norm": 87.85368347167969, "learning_rate": 2.8557870956832133e-07, "logits/chosen": -0.8476990461349487, "logits/rejected": -0.8005751967430115, "logps/chosen": -365.91729736328125, "logps/ref_chosen": -318.979248046875, "logps/ref_rejected": -269.67572021484375, "logps/rejected": -363.677734375, "loss": 4.0768, "margin_dpo/margin_mean": 47.06397247314453, "margin_dpo/margin_std": 60.87822723388672, "step": 244 }, { "epoch": 0.5130890052356021, "fcm_dpo/beta": 0.012774711474776268, "fcm_dpo/delta": -0.07881193608045578, "fcm_dpo/margin": 47.673194885253906, "fcm_dpo/q_t": 0.3689280152320862, "grad_norm": 81.23341369628906, "learning_rate": 2.837656413735479e-07, "logits/chosen": -0.8486171960830688, "logits/rejected": -0.8539371490478516, "logps/chosen": -338.697265625, "logps/ref_chosen": -294.8980712890625, "logps/ref_rejected": -239.8111114501953, "logps/rejected": -331.2834777832031, "loss": 4.0503, "margin_dpo/margin_mean": 47.67319869995117, "margin_dpo/margin_std": 59.50359344482422, "step": 245 }, { "epoch": 0.5151832460732985, "fcm_dpo/beta": 0.012836070731282234, "fcm_dpo/delta": 0.08966440707445145, "fcm_dpo/margin": 35.876922607421875, "fcm_dpo/q_t": 0.4043683707714081, "grad_norm": 97.06179809570312, "learning_rate": 2.8195076242990116e-07, "logits/chosen": -0.823259711265564, "logits/rejected": -0.8320043087005615, "logps/chosen": -336.7489318847656, "logps/ref_chosen": -280.6854248046875, "logps/ref_rejected": -253.65382385253906, "logps/rejected": -345.59423828125, "loss": 4.5646, "margin_dpo/margin_mean": 35.876922607421875, "margin_dpo/margin_std": 64.8729248046875, "step": 246 }, { "epoch": 0.5172774869109947, "fcm_dpo/beta": 0.01340182963758707, "fcm_dpo/delta": 0.010741522535681725, "fcm_dpo/margin": 40.165985107421875, "fcm_dpo/q_t": 0.38809463381767273, "grad_norm": 82.3198013305664, "learning_rate": 2.801341700638307e-07, "logits/chosen": -0.8334712386131287, "logits/rejected": -0.8363280296325684, "logps/chosen": -332.05615234375, "logps/ref_chosen": -281.1091003417969, "logps/ref_rejected": -260.3700866699219, "logps/rejected": -351.4831237792969, "loss": 4.295, "margin_dpo/margin_mean": 40.165985107421875, "margin_dpo/margin_std": 59.34774398803711, "step": 247 }, { "epoch": 0.5193717277486911, "fcm_dpo/beta": 0.013335911557078362, "fcm_dpo/delta": 0.03803172707557678, "fcm_dpo/margin": 37.84646224975586, "fcm_dpo/q_t": 0.39170122146606445, "grad_norm": 96.70375061035156, "learning_rate": 2.7831596169367227e-07, "logits/chosen": -0.795592188835144, "logits/rejected": -0.8106747269630432, "logps/chosen": -320.57391357421875, "logps/ref_chosen": -270.318359375, "logps/ref_rejected": -233.46778869628906, "logps/rejected": -321.56982421875, "loss": 4.3839, "margin_dpo/margin_mean": 37.84646224975586, "margin_dpo/margin_std": 58.59114074707031, "step": 248 }, { "epoch": 0.5214659685863874, "fcm_dpo/beta": 0.013947556726634502, "fcm_dpo/delta": 0.03363037109375, "fcm_dpo/margin": 36.132991790771484, "fcm_dpo/q_t": 0.3958445191383362, "grad_norm": 102.5847396850586, "learning_rate": 2.7649623482442274e-07, "logits/chosen": -0.8209048509597778, "logits/rejected": -0.8001272082328796, "logps/chosen": -337.85406494140625, "logps/ref_chosen": -275.8088684082031, "logps/ref_rejected": -243.45138549804688, "logps/rejected": -341.6295471191406, "loss": 4.566, "margin_dpo/margin_mean": 36.132991790771484, "margin_dpo/margin_std": 66.05538940429688, "step": 249 }, { "epoch": 0.5235602094240838, "fcm_dpo/beta": 0.013355924747884274, "fcm_dpo/delta": -0.04608849063515663, "fcm_dpo/margin": 47.88051223754883, "fcm_dpo/q_t": 0.36725619435310364, "grad_norm": 95.6384048461914, "learning_rate": 2.7467508704251135e-07, "logits/chosen": -0.829230010509491, "logits/rejected": -0.8328065872192383, "logps/chosen": -355.0364990234375, "logps/ref_chosen": -292.4945373535156, "logps/ref_rejected": -284.2869567871094, "logps/rejected": -394.7093811035156, "loss": 4.1352, "margin_dpo/margin_mean": 47.88051223754883, "margin_dpo/margin_std": 67.44532012939453, "step": 250 }, { "epoch": 0.5256544502617801, "fcm_dpo/beta": 0.013813665136694908, "fcm_dpo/delta": -0.007170406170189381, "fcm_dpo/margin": 43.663360595703125, "fcm_dpo/q_t": 0.3814099431037903, "grad_norm": 100.86103820800781, "learning_rate": 2.7285261601056697e-07, "logits/chosen": -0.8296136856079102, "logits/rejected": -0.8152703046798706, "logps/chosen": -336.88873291015625, "logps/ref_chosen": -281.736572265625, "logps/ref_rejected": -255.9419708251953, "logps/rejected": -354.75750732421875, "loss": 4.1787, "margin_dpo/margin_mean": 43.66335678100586, "margin_dpo/margin_std": 63.43466567993164, "step": 251 }, { "epoch": 0.5277486910994764, "fcm_dpo/beta": 0.013664179481565952, "fcm_dpo/delta": 0.0335024930536747, "fcm_dpo/margin": 41.402076721191406, "fcm_dpo/q_t": 0.3806874752044678, "grad_norm": 102.68427276611328, "learning_rate": 2.7102891946217994e-07, "logits/chosen": -0.8773578405380249, "logits/rejected": -0.854051411151886, "logps/chosen": -360.0166931152344, "logps/ref_chosen": -295.9674072265625, "logps/ref_rejected": -280.111572265625, "logps/rejected": -385.56292724609375, "loss": 4.3841, "margin_dpo/margin_mean": 41.40208053588867, "margin_dpo/margin_std": 66.1944580078125, "step": 252 }, { "epoch": 0.5298429319371728, "fcm_dpo/beta": 0.013652501627802849, "fcm_dpo/delta": -0.021744156256318092, "fcm_dpo/margin": 41.56562805175781, "fcm_dpo/q_t": 0.38615942001342773, "grad_norm": 96.21172332763672, "learning_rate": 2.692040951966617e-07, "logits/chosen": -0.8553462624549866, "logits/rejected": -0.848787248134613, "logps/chosen": -346.29815673828125, "logps/ref_chosen": -277.072265625, "logps/ref_rejected": -247.31643676757812, "logps/rejected": -358.10791015625, "loss": 4.3891, "margin_dpo/margin_mean": 41.56563186645508, "margin_dpo/margin_std": 68.40611267089844, "step": 253 }, { "epoch": 0.5319371727748691, "fcm_dpo/beta": 0.01416382659226656, "fcm_dpo/delta": -0.016133006662130356, "fcm_dpo/margin": 43.26961898803711, "fcm_dpo/q_t": 0.37535524368286133, "grad_norm": 99.73017120361328, "learning_rate": 2.6737824107379947e-07, "logits/chosen": -0.7875509858131409, "logits/rejected": -0.7763053774833679, "logps/chosen": -334.57989501953125, "logps/ref_chosen": -269.9478454589844, "logps/ref_rejected": -249.45005798339844, "logps/rejected": -357.3516845703125, "loss": 4.18, "margin_dpo/margin_mean": 43.26961898803711, "margin_dpo/margin_std": 61.28417205810547, "step": 254 }, { "epoch": 0.5340314136125655, "fcm_dpo/beta": 0.013342966325581074, "fcm_dpo/delta": -0.06489241868257523, "fcm_dpo/margin": 49.52783966064453, "fcm_dpo/q_t": 0.36613547801971436, "grad_norm": 90.38292694091797, "learning_rate": 2.655514550086086e-07, "logits/chosen": -0.8106395602226257, "logits/rejected": -0.7797207832336426, "logps/chosen": -370.4023742675781, "logps/ref_chosen": -306.6552734375, "logps/ref_rejected": -254.47528076171875, "logps/rejected": -367.7502136230469, "loss": 4.1532, "margin_dpo/margin_mean": 49.52783966064453, "margin_dpo/margin_std": 72.60646057128906, "step": 255 }, { "epoch": 0.5361256544502618, "fcm_dpo/beta": 0.012888522818684578, "fcm_dpo/delta": -0.017480649054050446, "fcm_dpo/margin": 47.27513122558594, "fcm_dpo/q_t": 0.3648688495159149, "grad_norm": 255.97872924804688, "learning_rate": 2.6372383496608186e-07, "logits/chosen": -0.8314058184623718, "logits/rejected": -0.827141523361206, "logps/chosen": -388.43408203125, "logps/ref_chosen": -323.7181701660156, "logps/ref_rejected": -254.1871337890625, "logps/rejected": -366.1781311035156, "loss": 4.5363, "margin_dpo/margin_mean": 47.2751350402832, "margin_dpo/margin_std": 78.98124694824219, "step": 256 }, { "epoch": 0.5382198952879581, "fcm_dpo/beta": 0.012398256920278072, "fcm_dpo/delta": -0.015751376748085022, "fcm_dpo/margin": 49.54781723022461, "fcm_dpo/q_t": 0.3713992238044739, "grad_norm": 97.32785034179688, "learning_rate": 2.618954789559356e-07, "logits/chosen": -0.8290724158287048, "logits/rejected": -0.8196491003036499, "logps/chosen": -331.4079895019531, "logps/ref_chosen": -267.21209716796875, "logps/ref_rejected": -249.12579345703125, "logps/rejected": -362.8694763183594, "loss": 4.039, "margin_dpo/margin_mean": 49.54781723022461, "margin_dpo/margin_std": 66.0081558227539, "step": 257 }, { "epoch": 0.5403141361256545, "fcm_dpo/beta": 0.011941884644329548, "fcm_dpo/delta": -0.021545007824897766, "fcm_dpo/margin": 51.69853210449219, "fcm_dpo/q_t": 0.36628904938697815, "grad_norm": 81.3831787109375, "learning_rate": 2.600664850273538e-07, "logits/chosen": -0.8486968278884888, "logits/rejected": -0.8191419243812561, "logps/chosen": -345.801025390625, "logps/ref_chosen": -277.6827392578125, "logps/ref_rejected": -250.73385620117188, "logps/rejected": -370.5506591796875, "loss": 3.9976, "margin_dpo/margin_mean": 51.69853591918945, "margin_dpo/margin_std": 62.97686004638672, "step": 258 }, { "epoch": 0.5424083769633508, "fcm_dpo/beta": 0.01243941206485033, "fcm_dpo/delta": 0.02084418572485447, "fcm_dpo/margin": 46.594276428222656, "fcm_dpo/q_t": 0.3780772387981415, "grad_norm": 86.16590118408203, "learning_rate": 2.582369512637302e-07, "logits/chosen": -0.8632500171661377, "logits/rejected": -0.8614512085914612, "logps/chosen": -354.69976806640625, "logps/ref_chosen": -294.6099853515625, "logps/ref_rejected": -272.2725830078125, "logps/rejected": -378.9566345214844, "loss": 4.1194, "margin_dpo/margin_mean": 46.594268798828125, "margin_dpo/margin_std": 63.69516372680664, "step": 259 }, { "epoch": 0.5445026178010471, "fcm_dpo/beta": 0.013677787035703659, "fcm_dpo/delta": 0.19107326865196228, "fcm_dpo/margin": 22.353225708007812, "fcm_dpo/q_t": 0.43973931670188904, "grad_norm": 113.30580139160156, "learning_rate": 2.5640697577740815e-07, "logits/chosen": -0.8496757745742798, "logits/rejected": -0.8480501174926758, "logps/chosen": -357.86456298828125, "logps/ref_chosen": -290.85711669921875, "logps/ref_rejected": -277.5970153808594, "logps/rejected": -366.95770263671875, "loss": 5.1521, "margin_dpo/margin_mean": 22.353225708007812, "margin_dpo/margin_std": 64.10260772705078, "step": 260 }, { "epoch": 0.5465968586387434, "fcm_dpo/beta": 0.014235386624932289, "fcm_dpo/delta": -0.05504711717367172, "fcm_dpo/margin": 37.57318115234375, "fcm_dpo/q_t": 0.39546385407447815, "grad_norm": 130.93417358398438, "learning_rate": 2.5457665670441937e-07, "logits/chosen": -0.733401358127594, "logits/rejected": -0.7483704090118408, "logps/chosen": -322.2603454589844, "logps/ref_chosen": -251.13223266601562, "logps/ref_rejected": -244.76016235351562, "logps/rejected": -353.4614562988281, "loss": 4.6495, "margin_dpo/margin_mean": 37.57318115234375, "margin_dpo/margin_std": 71.4426040649414, "step": 261 }, { "epoch": 0.5486910994764398, "fcm_dpo/beta": 0.013635975308716297, "fcm_dpo/delta": -0.06950134038925171, "fcm_dpo/margin": 48.76420211791992, "fcm_dpo/q_t": 0.365522176027298, "grad_norm": 102.4128646850586, "learning_rate": 2.527460921992209e-07, "logits/chosen": -0.7756036520004272, "logits/rejected": -0.7701444625854492, "logps/chosen": -363.00665283203125, "logps/ref_chosen": -299.7217712402344, "logps/ref_rejected": -277.0969543457031, "logps/rejected": -389.14605712890625, "loss": 4.0242, "margin_dpo/margin_mean": 48.76420211791992, "margin_dpo/margin_std": 65.38024139404297, "step": 262 }, { "epoch": 0.5507853403141362, "fcm_dpo/beta": 0.013076528906822205, "fcm_dpo/delta": -0.0265921950340271, "fcm_dpo/margin": 40.98373794555664, "fcm_dpo/q_t": 0.38731229305267334, "grad_norm": 84.20980072021484, "learning_rate": 2.509153804294318e-07, "logits/chosen": -0.7757068276405334, "logits/rejected": -0.7594835758209229, "logps/chosen": -350.37353515625, "logps/ref_chosen": -279.95257568359375, "logps/ref_rejected": -256.5327453613281, "logps/rejected": -367.9373779296875, "loss": 4.4423, "margin_dpo/margin_mean": 40.983741760253906, "margin_dpo/margin_std": 67.5083236694336, "step": 263 }, { "epoch": 0.5528795811518324, "fcm_dpo/beta": 0.012376993894577026, "fcm_dpo/delta": -0.06797336786985397, "fcm_dpo/margin": 49.21453857421875, "fcm_dpo/q_t": 0.37006676197052, "grad_norm": 106.1910400390625, "learning_rate": 2.4908461957056825e-07, "logits/chosen": -0.7897322177886963, "logits/rejected": -0.7906150817871094, "logps/chosen": -323.62689208984375, "logps/ref_chosen": -260.53509521484375, "logps/ref_rejected": -255.53799438476562, "logps/rejected": -367.8443298339844, "loss": 4.0584, "margin_dpo/margin_mean": 49.21453857421875, "margin_dpo/margin_std": 64.37642669677734, "step": 264 }, { "epoch": 0.5549738219895288, "fcm_dpo/beta": 0.011801987886428833, "fcm_dpo/delta": -0.024343391880393028, "fcm_dpo/margin": 52.564517974853516, "fcm_dpo/q_t": 0.36950555443763733, "grad_norm": 83.8652114868164, "learning_rate": 2.4725390780077905e-07, "logits/chosen": -0.8621577024459839, "logits/rejected": -0.8714127540588379, "logps/chosen": -347.90740966796875, "logps/ref_chosen": -283.7130432128906, "logps/ref_rejected": -270.3209533691406, "logps/rejected": -387.079833984375, "loss": 4.1157, "margin_dpo/margin_mean": 52.564517974853516, "margin_dpo/margin_std": 71.61198425292969, "step": 265 }, { "epoch": 0.5570680628272251, "fcm_dpo/beta": 0.01190432533621788, "fcm_dpo/delta": -0.02119002863764763, "fcm_dpo/margin": 51.92703628540039, "fcm_dpo/q_t": 0.36642715334892273, "grad_norm": 75.40443420410156, "learning_rate": 2.454233432955807e-07, "logits/chosen": -0.8703705072402954, "logits/rejected": -0.8404238224029541, "logps/chosen": -333.9207458496094, "logps/ref_chosen": -278.09930419921875, "logps/ref_rejected": -260.6734619140625, "logps/rejected": -368.4219665527344, "loss": 3.9296, "margin_dpo/margin_mean": 51.92703628540039, "margin_dpo/margin_std": 59.450191497802734, "step": 266 }, { "epoch": 0.5591623036649215, "fcm_dpo/beta": 0.011871559545397758, "fcm_dpo/delta": 0.046678848564624786, "fcm_dpo/margin": 42.63209533691406, "fcm_dpo/q_t": 0.39243483543395996, "grad_norm": 101.08575439453125, "learning_rate": 2.435930242225919e-07, "logits/chosen": -0.8200643658638, "logits/rejected": -0.834830105304718, "logps/chosen": -349.9026794433594, "logps/ref_chosen": -280.33319091796875, "logps/ref_rejected": -247.78099060058594, "logps/rejected": -359.9825744628906, "loss": 4.3181, "margin_dpo/margin_mean": 42.63209533691406, "margin_dpo/margin_std": 63.018951416015625, "step": 267 }, { "epoch": 0.5612565445026177, "fcm_dpo/beta": 0.012035196647047997, "fcm_dpo/delta": -0.05291684344410896, "fcm_dpo/margin": 53.979488372802734, "fcm_dpo/q_t": 0.36334070563316345, "grad_norm": 90.82762145996094, "learning_rate": 2.4176304873626984e-07, "logits/chosen": -0.7756884098052979, "logits/rejected": -0.7557308673858643, "logps/chosen": -370.0815734863281, "logps/ref_chosen": -304.1787109375, "logps/ref_rejected": -272.80316162109375, "logps/rejected": -392.685546875, "loss": 3.968, "margin_dpo/margin_mean": 53.979488372802734, "margin_dpo/margin_std": 67.94827270507812, "step": 268 }, { "epoch": 0.5633507853403141, "fcm_dpo/beta": 0.012536915019154549, "fcm_dpo/delta": 0.11777209490537643, "fcm_dpo/margin": 38.674407958984375, "fcm_dpo/q_t": 0.39720258116722107, "grad_norm": 126.63288116455078, "learning_rate": 2.399335149726463e-07, "logits/chosen": -0.8296777606010437, "logits/rejected": -0.8268716931343079, "logps/chosen": -321.98870849609375, "logps/ref_chosen": -249.84512329101562, "logps/ref_rejected": -223.37356567382812, "logps/rejected": -334.19158935546875, "loss": 4.5857, "margin_dpo/margin_mean": 38.67441177368164, "margin_dpo/margin_std": 72.29447174072266, "step": 269 }, { "epoch": 0.5654450261780105, "fcm_dpo/beta": 0.012732122093439102, "fcm_dpo/delta": 0.004775438457727432, "fcm_dpo/margin": 46.69505310058594, "fcm_dpo/q_t": 0.3786012828350067, "grad_norm": 100.78535461425781, "learning_rate": 2.381045210440644e-07, "logits/chosen": -0.8925029635429382, "logits/rejected": -0.9094992876052856, "logps/chosen": -395.2410888671875, "logps/ref_chosen": -318.5623779296875, "logps/ref_rejected": -281.1880798339844, "logps/rejected": -404.5618591308594, "loss": 4.3373, "margin_dpo/margin_mean": 46.69505310058594, "margin_dpo/margin_std": 75.69879150390625, "step": 270 }, { "epoch": 0.5675392670157068, "fcm_dpo/beta": 0.013183288276195526, "fcm_dpo/delta": -0.0032483600080013275, "fcm_dpo/margin": 45.56004333496094, "fcm_dpo/q_t": 0.3815266489982605, "grad_norm": 102.04464721679688, "learning_rate": 2.3627616503391812e-07, "logits/chosen": -0.7499503493309021, "logits/rejected": -0.7473767995834351, "logps/chosen": -358.6291198730469, "logps/ref_chosen": -284.104736328125, "logps/ref_rejected": -253.9580535888672, "logps/rejected": -374.0425720214844, "loss": 4.2719, "margin_dpo/margin_mean": 45.5600471496582, "margin_dpo/margin_std": 70.25496673583984, "step": 271 }, { "epoch": 0.5696335078534032, "fcm_dpo/beta": 0.01266053318977356, "fcm_dpo/delta": -0.029928136616945267, "fcm_dpo/margin": 49.448184967041016, "fcm_dpo/q_t": 0.37268373370170593, "grad_norm": 87.62028503417969, "learning_rate": 2.344485449913914e-07, "logits/chosen": -0.862612247467041, "logits/rejected": -0.8508659601211548, "logps/chosen": -367.183349609375, "logps/ref_chosen": -297.3590087890625, "logps/ref_rejected": -279.20196533203125, "logps/rejected": -398.4744873046875, "loss": 4.303, "margin_dpo/margin_mean": 49.448184967041016, "margin_dpo/margin_std": 78.49717712402344, "step": 272 }, { "epoch": 0.5717277486910994, "fcm_dpo/beta": 0.012191718444228172, "fcm_dpo/delta": -0.02252171002328396, "fcm_dpo/margin": 50.77750778198242, "fcm_dpo/q_t": 0.3730708956718445, "grad_norm": 96.84104919433594, "learning_rate": 2.3262175892620062e-07, "logits/chosen": -0.829898476600647, "logits/rejected": -0.8430629968643188, "logps/chosen": -365.34466552734375, "logps/ref_chosen": -293.20574951171875, "logps/ref_rejected": -274.7646789550781, "logps/rejected": -397.68109130859375, "loss": 4.2016, "margin_dpo/margin_mean": 50.77750778198242, "margin_dpo/margin_std": 74.58787536621094, "step": 273 }, { "epoch": 0.5738219895287958, "fcm_dpo/beta": 0.011691069230437279, "fcm_dpo/delta": -0.11797457188367844, "fcm_dpo/margin": 60.72999954223633, "fcm_dpo/q_t": 0.3517453372478485, "grad_norm": 88.58201599121094, "learning_rate": 2.3079590480333827e-07, "logits/chosen": -0.7908748388290405, "logits/rejected": -0.7618493437767029, "logps/chosen": -342.7866516113281, "logps/ref_chosen": -270.55865478515625, "logps/ref_rejected": -239.47048950195312, "logps/rejected": -372.428466796875, "loss": 3.8295, "margin_dpo/margin_mean": 60.72999572753906, "margin_dpo/margin_std": 72.17181396484375, "step": 274 }, { "epoch": 0.5759162303664922, "fcm_dpo/beta": 0.010889939963817596, "fcm_dpo/delta": -0.05324774980545044, "fcm_dpo/margin": 59.565277099609375, "fcm_dpo/q_t": 0.3633711040019989, "grad_norm": 70.90768432617188, "learning_rate": 2.2897108053782e-07, "logits/chosen": -0.8442721962928772, "logits/rejected": -0.8297668099403381, "logps/chosen": -315.0128173828125, "logps/ref_chosen": -250.31922912597656, "logps/ref_rejected": -249.3187255859375, "logps/rejected": -373.57757568359375, "loss": 3.905, "margin_dpo/margin_mean": 59.56527328491211, "margin_dpo/margin_std": 71.25727844238281, "step": 275 }, { "epoch": 0.5780104712041885, "fcm_dpo/beta": 0.010673362761735916, "fcm_dpo/delta": 0.0510733537375927, "fcm_dpo/margin": 51.6572265625, "fcm_dpo/q_t": 0.38332486152648926, "grad_norm": 80.28546905517578, "learning_rate": 2.2714738398943308e-07, "logits/chosen": -0.910760223865509, "logits/rejected": -0.8880026340484619, "logps/chosen": -372.0062561035156, "logps/ref_chosen": -297.6310729980469, "logps/ref_rejected": -295.225830078125, "logps/rejected": -421.25823974609375, "loss": 4.2562, "margin_dpo/margin_mean": 51.6572265625, "margin_dpo/margin_std": 74.79243469238281, "step": 276 }, { "epoch": 0.5801047120418849, "fcm_dpo/beta": 0.01158787589520216, "fcm_dpo/delta": 0.07226106524467468, "fcm_dpo/margin": 45.672664642333984, "fcm_dpo/q_t": 0.38873162865638733, "grad_norm": 104.22013092041016, "learning_rate": 2.2532491295748865e-07, "logits/chosen": -0.8400160074234009, "logits/rejected": -0.8426806330680847, "logps/chosen": -344.77178955078125, "logps/ref_chosen": -266.3604736328125, "logps/ref_rejected": -253.36767578125, "logps/rejected": -377.45166015625, "loss": 4.4179, "margin_dpo/margin_mean": 45.672664642333984, "margin_dpo/margin_std": 74.9579849243164, "step": 277 }, { "epoch": 0.5821989528795811, "fcm_dpo/beta": 0.0121334008872509, "fcm_dpo/delta": 0.04825280234217644, "fcm_dpo/margin": 33.917503356933594, "fcm_dpo/q_t": 0.420282781124115, "grad_norm": 115.2516860961914, "learning_rate": 2.2350376517557726e-07, "logits/chosen": -0.8667393326759338, "logits/rejected": -0.8342878222465515, "logps/chosen": -357.53857421875, "logps/ref_chosen": -267.40728759765625, "logps/ref_rejected": -229.5758514404297, "logps/rejected": -353.6246337890625, "loss": 4.9917, "margin_dpo/margin_mean": 33.917503356933594, "margin_dpo/margin_std": 81.22914123535156, "step": 278 }, { "epoch": 0.5842931937172775, "fcm_dpo/beta": 0.011737332679331303, "fcm_dpo/delta": -0.12058336287736893, "fcm_dpo/margin": 55.69242858886719, "fcm_dpo/q_t": 0.3652134835720062, "grad_norm": 112.99444580078125, "learning_rate": 2.2168403830632769e-07, "logits/chosen": -0.781296968460083, "logits/rejected": -0.7669795751571655, "logps/chosen": -393.08892822265625, "logps/ref_chosen": -313.3677978515625, "logps/ref_rejected": -299.1744384765625, "logps/rejected": -434.5880126953125, "loss": 4.1187, "margin_dpo/margin_mean": 55.69242477416992, "margin_dpo/margin_std": 78.3192138671875, "step": 279 }, { "epoch": 0.5863874345549738, "fcm_dpo/beta": 0.011380909010767937, "fcm_dpo/delta": 0.04981427267193794, "fcm_dpo/margin": 48.567710876464844, "fcm_dpo/q_t": 0.3854876160621643, "grad_norm": 81.46392059326172, "learning_rate": 2.1986582993616925e-07, "logits/chosen": -0.8543354272842407, "logits/rejected": -0.8661242127418518, "logps/chosen": -334.360595703125, "logps/ref_chosen": -265.5558166503906, "logps/ref_rejected": -247.1573944091797, "logps/rejected": -364.5298767089844, "loss": 4.3441, "margin_dpo/margin_mean": 48.567710876464844, "margin_dpo/margin_std": 78.15949249267578, "step": 280 }, { "epoch": 0.5884816753926702, "fcm_dpo/beta": 0.011563065461814404, "fcm_dpo/delta": 0.030172260478138924, "fcm_dpo/margin": 49.29734802246094, "fcm_dpo/q_t": 0.3839731514453888, "grad_norm": 101.9212875366211, "learning_rate": 2.1804923757009882e-07, "logits/chosen": -0.8250092267990112, "logits/rejected": -0.8347154855728149, "logps/chosen": -380.861328125, "logps/ref_chosen": -295.2995910644531, "logps/ref_rejected": -293.80877685546875, "logps/rejected": -428.6678161621094, "loss": 4.2825, "margin_dpo/margin_mean": 49.29734420776367, "margin_dpo/margin_std": 74.99603271484375, "step": 281 }, { "epoch": 0.5905759162303665, "fcm_dpo/beta": 0.011664286255836487, "fcm_dpo/delta": -0.011024218052625656, "fcm_dpo/margin": 52.18028259277344, "fcm_dpo/q_t": 0.3756002187728882, "grad_norm": 89.68161010742188, "learning_rate": 2.1623435862645205e-07, "logits/chosen": -0.8206506967544556, "logits/rejected": -0.8235145211219788, "logps/chosen": -391.75213623046875, "logps/ref_chosen": -318.63714599609375, "logps/ref_rejected": -273.5943603515625, "logps/rejected": -398.88958740234375, "loss": 4.2229, "margin_dpo/margin_mean": 52.18027877807617, "margin_dpo/margin_std": 77.2578353881836, "step": 282 }, { "epoch": 0.5926701570680628, "fcm_dpo/beta": 0.012338871136307716, "fcm_dpo/delta": 0.04455633834004402, "fcm_dpo/margin": 44.96173858642578, "fcm_dpo/q_t": 0.3896506428718567, "grad_norm": 90.43144989013672, "learning_rate": 2.1442129043167873e-07, "logits/chosen": -0.8331937193870544, "logits/rejected": -0.8291042447090149, "logps/chosen": -333.6814270019531, "logps/ref_chosen": -254.66053771972656, "logps/ref_rejected": -236.8627166748047, "logps/rejected": -360.8453369140625, "loss": 4.3718, "margin_dpo/margin_mean": 44.96173858642578, "margin_dpo/margin_std": 73.44696044921875, "step": 283 }, { "epoch": 0.5947643979057592, "fcm_dpo/beta": 0.011935784481465816, "fcm_dpo/delta": -0.03917480632662773, "fcm_dpo/margin": 53.04762649536133, "fcm_dpo/q_t": 0.37296316027641296, "grad_norm": 118.94564819335938, "learning_rate": 2.1261013021512378e-07, "logits/chosen": -0.8013940453529358, "logits/rejected": -0.7824323773384094, "logps/chosen": -353.3193359375, "logps/ref_chosen": -273.355224609375, "logps/ref_rejected": -259.84759521484375, "logps/rejected": -392.8592834472656, "loss": 4.2865, "margin_dpo/margin_mean": 53.047630310058594, "margin_dpo/margin_std": 81.07416534423828, "step": 284 }, { "epoch": 0.5968586387434555, "fcm_dpo/beta": 0.012555155903100967, "fcm_dpo/delta": 0.06368312239646912, "fcm_dpo/margin": 36.739646911621094, "fcm_dpo/q_t": 0.40852105617523193, "grad_norm": 148.76206970214844, "learning_rate": 2.1080097510381294e-07, "logits/chosen": -0.8108698725700378, "logits/rejected": -0.8106898069381714, "logps/chosen": -394.7806701660156, "logps/ref_chosen": -309.8022155761719, "logps/ref_rejected": -279.11846923828125, "logps/rejected": -400.8365478515625, "loss": 4.7712, "margin_dpo/margin_mean": 36.739646911621094, "margin_dpo/margin_std": 75.5753402709961, "step": 285 }, { "epoch": 0.5989528795811518, "fcm_dpo/beta": 0.012427356094121933, "fcm_dpo/delta": 0.030025284737348557, "fcm_dpo/margin": 45.97602844238281, "fcm_dpo/q_t": 0.38827937841415405, "grad_norm": 124.07766723632812, "learning_rate": 2.089939221172446e-07, "logits/chosen": -0.8039661049842834, "logits/rejected": -0.7928801774978638, "logps/chosen": -349.1395568847656, "logps/ref_chosen": -271.4655456542969, "logps/ref_rejected": -279.531494140625, "logps/rejected": -403.1815185546875, "loss": 4.4449, "margin_dpo/margin_mean": 45.97602844238281, "margin_dpo/margin_std": 79.96269989013672, "step": 286 }, { "epoch": 0.6010471204188481, "fcm_dpo/beta": 0.012432662770152092, "fcm_dpo/delta": -0.020902253687381744, "fcm_dpo/margin": 49.71929931640625, "fcm_dpo/q_t": 0.37481507658958435, "grad_norm": 98.54369354248047, "learning_rate": 2.0718906816218595e-07, "logits/chosen": -0.8174068331718445, "logits/rejected": -0.8055183291435242, "logps/chosen": -350.4097595214844, "logps/ref_chosen": -277.0932312011719, "logps/ref_rejected": -233.55599975585938, "logps/rejected": -356.591796875, "loss": 4.3139, "margin_dpo/margin_mean": 49.71929931640625, "margin_dpo/margin_std": 79.30457305908203, "step": 287 }, { "epoch": 0.6031413612565445, "fcm_dpo/beta": 0.01297105010598898, "fcm_dpo/delta": -0.004447203129529953, "fcm_dpo/margin": 46.434669494628906, "fcm_dpo/q_t": 0.3777006268501282, "grad_norm": 120.47964477539062, "learning_rate": 2.053865100274774e-07, "logits/chosen": -0.8263804316520691, "logits/rejected": -0.8423773050308228, "logps/chosen": -362.74114990234375, "logps/ref_chosen": -293.1681823730469, "logps/ref_rejected": -263.4059143066406, "logps/rejected": -379.41351318359375, "loss": 4.2667, "margin_dpo/margin_mean": 46.43466567993164, "margin_dpo/margin_std": 71.86286926269531, "step": 288 }, { "epoch": 0.6052356020942409, "fcm_dpo/beta": 0.013167420402169228, "fcm_dpo/delta": 0.1251918226480484, "fcm_dpo/margin": 32.636474609375, "fcm_dpo/q_t": 0.41369497776031494, "grad_norm": 108.58908081054688, "learning_rate": 2.035863443788411e-07, "logits/chosen": -0.8092857599258423, "logits/rejected": -0.7957339882850647, "logps/chosen": -412.3819580078125, "logps/ref_chosen": -329.9574279785156, "logps/ref_rejected": -276.7565002441406, "logps/rejected": -391.8175048828125, "loss": 4.8066, "margin_dpo/margin_mean": 32.636474609375, "margin_dpo/margin_std": 70.96094512939453, "step": 289 }, { "epoch": 0.6073298429319371, "fcm_dpo/beta": 0.012977060861885548, "fcm_dpo/delta": -0.08185821771621704, "fcm_dpo/margin": 44.748985290527344, "fcm_dpo/q_t": 0.38607901334762573, "grad_norm": 140.78160095214844, "learning_rate": 2.0178866775369774e-07, "logits/chosen": -0.8182957172393799, "logits/rejected": -0.7599232196807861, "logps/chosen": -399.31103515625, "logps/ref_chosen": -324.6690673828125, "logps/ref_rejected": -311.8439636230469, "logps/rejected": -431.23492431640625, "loss": 4.4597, "margin_dpo/margin_mean": 44.74897766113281, "margin_dpo/margin_std": 74.92218780517578, "step": 290 }, { "epoch": 0.6094240837696335, "fcm_dpo/beta": 0.012201309204101562, "fcm_dpo/delta": -0.08980172872543335, "fcm_dpo/margin": 55.73931121826172, "fcm_dpo/q_t": 0.3614313304424286, "grad_norm": 100.59260559082031, "learning_rate": 1.9999357655598891e-07, "logits/chosen": -0.7959886193275452, "logits/rejected": -0.789124608039856, "logps/chosen": -342.9815673828125, "logps/ref_chosen": -274.1440734863281, "logps/ref_rejected": -278.07208251953125, "logps/rejected": -402.6488342285156, "loss": 3.9778, "margin_dpo/margin_mean": 55.73931121826172, "margin_dpo/margin_std": 71.53327941894531, "step": 291 }, { "epoch": 0.6115183246073298, "fcm_dpo/beta": 0.012481886893510818, "fcm_dpo/delta": 0.098934106528759, "fcm_dpo/margin": 40.45347213745117, "fcm_dpo/q_t": 0.39631906151771545, "grad_norm": 105.07350158691406, "learning_rate": 1.9820116705100775e-07, "logits/chosen": -0.7960292100906372, "logits/rejected": -0.7907694578170776, "logps/chosen": -324.7724914550781, "logps/ref_chosen": -259.3636779785156, "logps/ref_rejected": -279.30218505859375, "logps/rejected": -385.16448974609375, "loss": 4.5424, "margin_dpo/margin_mean": 40.45347213745117, "margin_dpo/margin_std": 71.65106964111328, "step": 292 }, { "epoch": 0.6136125654450262, "fcm_dpo/beta": 0.012850621715188026, "fcm_dpo/delta": -0.047699183225631714, "fcm_dpo/margin": 50.01988983154297, "fcm_dpo/q_t": 0.3677240014076233, "grad_norm": 105.40121459960938, "learning_rate": 1.9641153536023642e-07, "logits/chosen": -0.8889198899269104, "logits/rejected": -0.8521823287010193, "logps/chosen": -376.5350646972656, "logps/ref_chosen": -303.77081298828125, "logps/ref_rejected": -270.07513427734375, "logps/rejected": -392.8592224121094, "loss": 4.0238, "margin_dpo/margin_mean": 50.01988220214844, "margin_dpo/margin_std": 65.39283752441406, "step": 293 }, { "epoch": 0.6157068062827226, "fcm_dpo/beta": 0.012599381618201733, "fcm_dpo/delta": -0.005292973015457392, "fcm_dpo/margin": 47.930747985839844, "fcm_dpo/q_t": 0.3788926601409912, "grad_norm": 105.63341522216797, "learning_rate": 1.9462477745619106e-07, "logits/chosen": -0.795003354549408, "logits/rejected": -0.8052266240119934, "logps/chosen": -302.889892578125, "logps/ref_chosen": -240.23831176757812, "logps/ref_rejected": -229.187744140625, "logps/rejected": -339.77008056640625, "loss": 4.1926, "margin_dpo/margin_mean": 47.93075180053711, "margin_dpo/margin_std": 71.22593688964844, "step": 294 }, { "epoch": 0.6178010471204188, "fcm_dpo/beta": 0.012672440148890018, "fcm_dpo/delta": 0.043098170310258865, "fcm_dpo/margin": 44.029075622558594, "fcm_dpo/q_t": 0.38533294200897217, "grad_norm": 89.81253814697266, "learning_rate": 1.928409891572757e-07, "logits/chosen": -0.7766979932785034, "logits/rejected": -0.7932155728340149, "logps/chosen": -319.8603210449219, "logps/ref_chosen": -251.00970458984375, "logps/ref_rejected": -244.15142822265625, "logps/rejected": -357.0310974121094, "loss": 4.3008, "margin_dpo/margin_mean": 44.029075622558594, "margin_dpo/margin_std": 67.06430053710938, "step": 295 }, { "epoch": 0.6198952879581152, "fcm_dpo/beta": 0.012121832929551601, "fcm_dpo/delta": -0.1331343948841095, "fcm_dpo/margin": 59.48346710205078, "fcm_dpo/q_t": 0.3515579402446747, "grad_norm": 86.63916015625, "learning_rate": 1.9106026612264315e-07, "logits/chosen": -0.7734822034835815, "logits/rejected": -0.7490954995155334, "logps/chosen": -363.75494384765625, "logps/ref_chosen": -293.880615234375, "logps/ref_rejected": -283.4175720214844, "logps/rejected": -412.775390625, "loss": 3.9594, "margin_dpo/margin_mean": 59.48346710205078, "margin_dpo/margin_std": 77.31208801269531, "step": 296 }, { "epoch": 0.6219895287958115, "fcm_dpo/beta": 0.011517000384628773, "fcm_dpo/delta": 0.023262428119778633, "fcm_dpo/margin": 41.557411193847656, "fcm_dpo/q_t": 0.39879322052001953, "grad_norm": 90.58515930175781, "learning_rate": 1.8928270384706582e-07, "logits/chosen": -0.8670139312744141, "logits/rejected": -0.8624626994132996, "logps/chosen": -358.82000732421875, "logps/ref_chosen": -289.4600830078125, "logps/ref_rejected": -283.69110107421875, "logps/rejected": -394.60845947265625, "loss": 4.4825, "margin_dpo/margin_mean": 41.557411193847656, "margin_dpo/margin_std": 69.22006225585938, "step": 297 }, { "epoch": 0.6240837696335079, "fcm_dpo/beta": 0.01156252808868885, "fcm_dpo/delta": -0.06845314055681229, "fcm_dpo/margin": 48.89856719970703, "fcm_dpo/q_t": 0.38479888439178467, "grad_norm": 105.2696533203125, "learning_rate": 1.875083976558136e-07, "logits/chosen": -0.7988805770874023, "logits/rejected": -0.7908245921134949, "logps/chosen": -369.48431396484375, "logps/ref_chosen": -306.5150146484375, "logps/ref_rejected": -280.6969909667969, "logps/rejected": -392.5648193359375, "loss": 4.3527, "margin_dpo/margin_mean": 48.89856719970703, "margin_dpo/margin_std": 77.35851287841797, "step": 298 }, { "epoch": 0.6261780104712041, "fcm_dpo/beta": 0.01131986640393734, "fcm_dpo/delta": 0.04895632341504097, "fcm_dpo/margin": 43.01826477050781, "fcm_dpo/q_t": 0.39540231227874756, "grad_norm": 94.2397232055664, "learning_rate": 1.8573744269954297e-07, "logits/chosen": -0.7741419076919556, "logits/rejected": -0.7654407024383545, "logps/chosen": -358.94085693359375, "logps/ref_chosen": -281.36376953125, "logps/ref_rejected": -270.39508056640625, "logps/rejected": -390.9903869628906, "loss": 4.399, "margin_dpo/margin_mean": 43.01826477050781, "margin_dpo/margin_std": 66.27005767822266, "step": 299 }, { "epoch": 0.6282722513089005, "fcm_dpo/beta": 0.012323617935180664, "fcm_dpo/delta": 0.09153569489717484, "fcm_dpo/margin": 41.455352783203125, "fcm_dpo/q_t": 0.39375266432762146, "grad_norm": 146.0800323486328, "learning_rate": 1.839699339491937e-07, "logits/chosen": -0.81211256980896, "logits/rejected": -0.788737952709198, "logps/chosen": -392.5552978515625, "logps/ref_chosen": -314.83575439453125, "logps/ref_rejected": -269.1154479980469, "logps/rejected": -388.29034423828125, "loss": 4.4804, "margin_dpo/margin_mean": 41.45535659790039, "margin_dpo/margin_std": 71.92596435546875, "step": 300 }, { "epoch": 0.6303664921465969, "fcm_dpo/beta": 0.0128701226785779, "fcm_dpo/delta": 0.04166974872350693, "fcm_dpo/margin": 43.445735931396484, "fcm_dpo/q_t": 0.3868556618690491, "grad_norm": 91.15668487548828, "learning_rate": 1.8220596619089573e-07, "logits/chosen": -0.814331591129303, "logits/rejected": -0.8293969035148621, "logps/chosen": -353.07177734375, "logps/ref_chosen": -279.89453125, "logps/ref_rejected": -271.6694641113281, "logps/rejected": -388.2925109863281, "loss": 4.3454, "margin_dpo/margin_mean": 43.44573974609375, "margin_dpo/margin_std": 68.72089385986328, "step": 301 }, { "epoch": 0.6324607329842932, "fcm_dpo/beta": 0.012415561825037003, "fcm_dpo/delta": -0.08762803673744202, "fcm_dpo/margin": 54.83326721191406, "fcm_dpo/q_t": 0.3612514138221741, "grad_norm": 117.01398468017578, "learning_rate": 1.8044563402088682e-07, "logits/chosen": -0.7894245386123657, "logits/rejected": -0.7746908068656921, "logps/chosen": -341.82904052734375, "logps/ref_chosen": -271.3318176269531, "logps/ref_rejected": -256.5587158203125, "logps/rejected": -381.88922119140625, "loss": 4.0128, "margin_dpo/margin_mean": 54.83326721191406, "margin_dpo/margin_std": 74.75352478027344, "step": 302 }, { "epoch": 0.6345549738219896, "fcm_dpo/beta": 0.012025467120110989, "fcm_dpo/delta": -0.04249938949942589, "fcm_dpo/margin": 48.98454666137695, "fcm_dpo/q_t": 0.3781017065048218, "grad_norm": 116.30992126464844, "learning_rate": 1.7868903184043885e-07, "logits/chosen": -0.7718071937561035, "logits/rejected": -0.7559300661087036, "logps/chosen": -381.57781982421875, "logps/ref_chosen": -304.88104248046875, "logps/ref_rejected": -269.063720703125, "logps/rejected": -394.7451171875, "loss": 4.2912, "margin_dpo/margin_mean": 48.98455047607422, "margin_dpo/margin_std": 75.46881103515625, "step": 303 }, { "epoch": 0.6366492146596858, "fcm_dpo/beta": 0.011567480862140656, "fcm_dpo/delta": -0.02077743411064148, "fcm_dpo/margin": 53.529052734375, "fcm_dpo/q_t": 0.37393832206726074, "grad_norm": 108.21533966064453, "learning_rate": 1.7693625385079574e-07, "logits/chosen": -0.7794772982597351, "logits/rejected": -0.7964142560958862, "logps/chosen": -375.22418212890625, "logps/ref_chosen": -290.7109680175781, "logps/ref_rejected": -237.6885986328125, "logps/rejected": -375.7308654785156, "loss": 4.1337, "margin_dpo/margin_mean": 53.529056549072266, "margin_dpo/margin_std": 77.61477661132812, "step": 304 }, { "epoch": 0.6387434554973822, "fcm_dpo/beta": 0.010576148517429829, "fcm_dpo/delta": -0.1733783483505249, "fcm_dpo/margin": 71.53874206542969, "fcm_dpo/q_t": 0.3409091830253601, "grad_norm": 89.68358612060547, "learning_rate": 1.7518739404812155e-07, "logits/chosen": -0.8426798582077026, "logits/rejected": -0.8134666085243225, "logps/chosen": -331.08544921875, "logps/ref_chosen": -256.4839782714844, "logps/ref_rejected": -266.4063415527344, "logps/rejected": -412.5465087890625, "loss": 3.7185, "margin_dpo/margin_mean": 71.53873443603516, "margin_dpo/margin_std": 78.6550064086914, "step": 305 }, { "epoch": 0.6408376963350786, "fcm_dpo/beta": 0.010183380916714668, "fcm_dpo/delta": 0.026483479887247086, "fcm_dpo/margin": 45.928916931152344, "fcm_dpo/q_t": 0.3991745114326477, "grad_norm": 85.03260803222656, "learning_rate": 1.7344254621846017e-07, "logits/chosen": -0.8300163745880127, "logits/rejected": -0.8189243078231812, "logps/chosen": -402.5002746582031, "logps/ref_chosen": -320.6492004394531, "logps/ref_rejected": -273.36773681640625, "logps/rejected": -401.1476745605469, "loss": 4.3841, "margin_dpo/margin_mean": 45.92892074584961, "margin_dpo/margin_std": 69.21966552734375, "step": 306 }, { "epoch": 0.6429319371727749, "fcm_dpo/beta": 0.010290293022990227, "fcm_dpo/delta": -0.004134609363973141, "fcm_dpo/margin": 51.01483154296875, "fcm_dpo/q_t": 0.38504621386528015, "grad_norm": 133.7060546875, "learning_rate": 1.717018039327053e-07, "logits/chosen": -0.7672021389007568, "logits/rejected": -0.8132136464118958, "logps/chosen": -379.48583984375, "logps/ref_chosen": -279.4541931152344, "logps/ref_rejected": -240.3796844482422, "logps/rejected": -391.4261474609375, "loss": 4.2074, "margin_dpo/margin_mean": 51.01482391357422, "margin_dpo/margin_std": 66.68528747558594, "step": 307 }, { "epoch": 0.6450261780104712, "fcm_dpo/beta": 0.010651452466845512, "fcm_dpo/delta": 0.11725203692913055, "fcm_dpo/margin": 41.120540618896484, "fcm_dpo/q_t": 0.4069848656654358, "grad_norm": 93.3102035522461, "learning_rate": 1.699652605415828e-07, "logits/chosen": -0.8160425424575806, "logits/rejected": -0.8356633186340332, "logps/chosen": -400.3565673828125, "logps/ref_chosen": -296.598388671875, "logps/ref_rejected": -258.6953430175781, "logps/rejected": -403.57403564453125, "loss": 4.584, "margin_dpo/margin_mean": 41.120540618896484, "margin_dpo/margin_std": 74.20211791992188, "step": 308 }, { "epoch": 0.6471204188481675, "fcm_dpo/beta": 0.011160111054778099, "fcm_dpo/delta": -0.028207721188664436, "fcm_dpo/margin": 56.09125518798828, "fcm_dpo/q_t": 0.3682219386100769, "grad_norm": 90.17739868164062, "learning_rate": 1.6823300917064458e-07, "logits/chosen": -0.8190463781356812, "logits/rejected": -0.832842230796814, "logps/chosen": -382.3316955566406, "logps/ref_chosen": -281.3881530761719, "logps/ref_rejected": -262.458740234375, "logps/rejected": -419.4935302734375, "loss": 4.0162, "margin_dpo/margin_mean": 56.09125518798828, "margin_dpo/margin_std": 73.06241607666016, "step": 309 }, { "epoch": 0.6492146596858639, "fcm_dpo/beta": 0.011178172193467617, "fcm_dpo/delta": 0.008381815627217293, "fcm_dpo/margin": 52.827457427978516, "fcm_dpo/q_t": 0.3756198287010193, "grad_norm": 120.2673568725586, "learning_rate": 1.6650514271527465e-07, "logits/chosen": -0.8169420957565308, "logits/rejected": -0.7943635582923889, "logps/chosen": -377.6767578125, "logps/ref_chosen": -279.1872863769531, "logps/ref_rejected": -261.8279724121094, "logps/rejected": -413.1448974609375, "loss": 4.1525, "margin_dpo/margin_mean": 52.82746124267578, "margin_dpo/margin_std": 73.31507873535156, "step": 310 }, { "epoch": 0.6513089005235602, "fcm_dpo/beta": 0.011097338050603867, "fcm_dpo/delta": 0.006144438870251179, "fcm_dpo/margin": 53.27901840209961, "fcm_dpo/q_t": 0.37421154975891113, "grad_norm": 124.52708435058594, "learning_rate": 1.647817538357072e-07, "logits/chosen": -0.8149024844169617, "logits/rejected": -0.7987397909164429, "logps/chosen": -371.7431335449219, "logps/ref_chosen": -271.39813232421875, "logps/ref_rejected": -266.12701416015625, "logps/rejected": -419.75103759765625, "loss": 4.2385, "margin_dpo/margin_mean": 53.279022216796875, "margin_dpo/margin_std": 77.10747528076172, "step": 311 }, { "epoch": 0.6534031413612565, "fcm_dpo/beta": 0.011262207292020321, "fcm_dpo/delta": 0.04970509931445122, "fcm_dpo/margin": 48.75176239013672, "fcm_dpo/q_t": 0.3897601068019867, "grad_norm": 105.6718521118164, "learning_rate": 1.6306293495205755e-07, "logits/chosen": -0.8199286460876465, "logits/rejected": -0.8051372766494751, "logps/chosen": -381.46502685546875, "logps/ref_chosen": -282.3850402832031, "logps/ref_rejected": -246.35389709472656, "logps/rejected": -394.1856689453125, "loss": 4.5143, "margin_dpo/margin_mean": 48.75176239013672, "margin_dpo/margin_std": 85.32847595214844, "step": 312 }, { "epoch": 0.6554973821989529, "fcm_dpo/beta": 0.011512380093336105, "fcm_dpo/delta": -0.049063071608543396, "fcm_dpo/margin": 51.31576156616211, "fcm_dpo/q_t": 0.3808843493461609, "grad_norm": 92.33170318603516, "learning_rate": 1.6134877823936607e-07, "logits/chosen": -0.8606759309768677, "logits/rejected": -0.8551607131958008, "logps/chosen": -401.7897033691406, "logps/ref_chosen": -303.630859375, "logps/ref_rejected": -273.1156921386719, "logps/rejected": -422.5903015136719, "loss": 4.3579, "margin_dpo/margin_mean": 51.31576156616211, "margin_dpo/margin_std": 80.45419311523438, "step": 313 }, { "epoch": 0.6575916230366492, "fcm_dpo/beta": 0.011472068727016449, "fcm_dpo/delta": 0.017066676169633865, "fcm_dpo/margin": 50.73048782348633, "fcm_dpo/q_t": 0.3782009482383728, "grad_norm": 96.01194763183594, "learning_rate": 1.5963937562265522e-07, "logits/chosen": -0.8795362114906311, "logits/rejected": -0.8655129671096802, "logps/chosen": -394.734619140625, "logps/ref_chosen": -302.3042907714844, "logps/ref_rejected": -273.6416015625, "logps/rejected": -416.8023681640625, "loss": 4.2074, "margin_dpo/margin_mean": 50.73048782348633, "margin_dpo/margin_std": 72.94244384765625, "step": 314 }, { "epoch": 0.6596858638743456, "fcm_dpo/beta": 0.011077978648245335, "fcm_dpo/delta": -0.06065046787261963, "fcm_dpo/margin": 59.16315841674805, "fcm_dpo/q_t": 0.3630554676055908, "grad_norm": 93.33431243896484, "learning_rate": 1.5793481877199943e-07, "logits/chosen": -0.8478763103485107, "logits/rejected": -0.834101140499115, "logps/chosen": -394.13946533203125, "logps/ref_chosen": -302.729248046875, "logps/ref_rejected": -270.26910400390625, "logps/rejected": -420.8424377441406, "loss": 4.0009, "margin_dpo/margin_mean": 59.16315841674805, "margin_dpo/margin_std": 75.37049102783203, "step": 315 }, { "epoch": 0.6617801047120419, "fcm_dpo/beta": 0.010612869635224342, "fcm_dpo/delta": -0.011615972965955734, "fcm_dpo/margin": 57.42588806152344, "fcm_dpo/q_t": 0.37472862005233765, "grad_norm": 80.97152709960938, "learning_rate": 1.562351990976095e-07, "logits/chosen": -0.8666278123855591, "logits/rejected": -0.8581745624542236, "logps/chosen": -398.5106506347656, "logps/ref_chosen": -310.5706481933594, "logps/ref_rejected": -272.9354553222656, "logps/rejected": -418.3013610839844, "loss": 4.1489, "margin_dpo/margin_mean": 57.42588806152344, "margin_dpo/margin_std": 81.77729797363281, "step": 316 }, { "epoch": 0.6638743455497382, "fcm_dpo/beta": 0.010676562786102295, "fcm_dpo/delta": 0.02444746345281601, "fcm_dpo/margin": 54.00886535644531, "fcm_dpo/q_t": 0.3743758201599121, "grad_norm": 83.4330825805664, "learning_rate": 1.5454060774493065e-07, "logits/chosen": -0.8651271462440491, "logits/rejected": -0.8354383111000061, "logps/chosen": -327.0922546386719, "logps/ref_chosen": -253.90036010742188, "logps/ref_rejected": -218.74078369140625, "logps/rejected": -345.9415283203125, "loss": 4.0552, "margin_dpo/margin_mean": 54.00886535644531, "margin_dpo/margin_std": 67.14466094970703, "step": 317 }, { "epoch": 0.6659685863874345, "fcm_dpo/beta": 0.010473274625837803, "fcm_dpo/delta": -0.02130315639078617, "fcm_dpo/margin": 58.93336486816406, "fcm_dpo/q_t": 0.36683323979377747, "grad_norm": 77.39559173583984, "learning_rate": 1.5285113558975427e-07, "logits/chosen": -0.883613646030426, "logits/rejected": -0.8504911065101624, "logps/chosen": -352.9744873046875, "logps/ref_chosen": -270.8228759765625, "logps/ref_rejected": -255.30972290039062, "logps/rejected": -396.39471435546875, "loss": 3.9828, "margin_dpo/margin_mean": 58.93336486816406, "margin_dpo/margin_std": 70.93016052246094, "step": 318 }, { "epoch": 0.6680628272251309, "fcm_dpo/beta": 0.010347644798457623, "fcm_dpo/delta": 0.004265286028385162, "fcm_dpo/margin": 57.502281188964844, "fcm_dpo/q_t": 0.3711587190628052, "grad_norm": 106.93011474609375, "learning_rate": 1.5116687323334464e-07, "logits/chosen": -0.8568066358566284, "logits/rejected": -0.8343677520751953, "logps/chosen": -389.6893310546875, "logps/ref_chosen": -301.0028076171875, "logps/ref_rejected": -242.39002990722656, "logps/rejected": -388.5788269042969, "loss": 4.0047, "margin_dpo/margin_mean": 57.50227737426758, "margin_dpo/margin_std": 70.3616714477539, "step": 319 }, { "epoch": 0.6701570680628273, "fcm_dpo/beta": 0.010691437870264053, "fcm_dpo/delta": 0.03203843906521797, "fcm_dpo/margin": 53.148414611816406, "fcm_dpo/q_t": 0.38301903009414673, "grad_norm": 128.77078247070312, "learning_rate": 1.4948791099758052e-07, "logits/chosen": -0.823917806148529, "logits/rejected": -0.8286012411117554, "logps/chosen": -385.59344482421875, "logps/ref_chosen": -303.6225891113281, "logps/ref_rejected": -280.85174560546875, "logps/rejected": -415.97100830078125, "loss": 4.3482, "margin_dpo/margin_mean": 53.148414611816406, "margin_dpo/margin_std": 85.3245849609375, "step": 320 }, { "epoch": 0.6722513089005235, "fcm_dpo/beta": 0.011223748326301575, "fcm_dpo/delta": 0.034036025404930115, "fcm_dpo/margin": 40.71882629394531, "fcm_dpo/q_t": 0.4071481227874756, "grad_norm": 100.50800323486328, "learning_rate": 1.478143389201113e-07, "logits/chosen": -0.8584508299827576, "logits/rejected": -0.8295794725418091, "logps/chosen": -380.6787109375, "logps/ref_chosen": -288.98583984375, "logps/ref_rejected": -241.1822052001953, "logps/rejected": -373.59393310546875, "loss": 4.6134, "margin_dpo/margin_mean": 40.71883010864258, "margin_dpo/margin_std": 77.2356948852539, "step": 321 }, { "epoch": 0.6743455497382199, "fcm_dpo/beta": 0.011451047845184803, "fcm_dpo/delta": -0.009086892008781433, "fcm_dpo/margin": 52.995948791503906, "fcm_dpo/q_t": 0.37670472264289856, "grad_norm": 84.11673736572266, "learning_rate": 1.461462467495284e-07, "logits/chosen": -0.9030950665473938, "logits/rejected": -0.8643764853477478, "logps/chosen": -400.96453857421875, "logps/ref_chosen": -308.54345703125, "logps/ref_rejected": -269.7995910644531, "logps/rejected": -415.2165832519531, "loss": 4.1867, "margin_dpo/margin_mean": 52.99595260620117, "margin_dpo/margin_std": 78.33987426757812, "step": 322 }, { "epoch": 0.6764397905759162, "fcm_dpo/beta": 0.011756744235754013, "fcm_dpo/delta": 0.1289975494146347, "fcm_dpo/margin": 36.21906661987305, "fcm_dpo/q_t": 0.41570037603378296, "grad_norm": 103.57855987548828, "learning_rate": 1.4448372394055246e-07, "logits/chosen": -0.8654804229736328, "logits/rejected": -0.8599724173545837, "logps/chosen": -372.8677978515625, "logps/ref_chosen": -282.49365234375, "logps/ref_rejected": -227.7105255126953, "logps/rejected": -354.30377197265625, "loss": 4.8564, "margin_dpo/margin_mean": 36.21906280517578, "margin_dpo/margin_std": 81.94871520996094, "step": 323 }, { "epoch": 0.6785340314136126, "fcm_dpo/beta": 0.011317353695631027, "fcm_dpo/delta": -0.13536657392978668, "fcm_dpo/margin": 63.58089065551758, "fcm_dpo/q_t": 0.34825509786605835, "grad_norm": 99.381103515625, "learning_rate": 1.428268596492364e-07, "logits/chosen": -0.8112601637840271, "logits/rejected": -0.8100103139877319, "logps/chosen": -317.1282958984375, "logps/ref_chosen": -239.33836364746094, "logps/ref_rejected": -230.53775024414062, "logps/rejected": -371.9085388183594, "loss": 3.756, "margin_dpo/margin_mean": 63.58089065551758, "margin_dpo/margin_std": 70.64297485351562, "step": 324 }, { "epoch": 0.680628272251309, "fcm_dpo/beta": 0.011040986515581608, "fcm_dpo/delta": -0.02803659997880459, "fcm_dpo/margin": 52.0232048034668, "fcm_dpo/q_t": 0.38475099205970764, "grad_norm": 113.279296875, "learning_rate": 1.4117574272818386e-07, "logits/chosen": -0.8139001131057739, "logits/rejected": -0.7986257076263428, "logps/chosen": -370.62933349609375, "logps/ref_chosen": -280.62896728515625, "logps/ref_rejected": -270.5085754394531, "logps/rejected": -412.5321350097656, "loss": 4.3798, "margin_dpo/margin_mean": 52.0232048034668, "margin_dpo/margin_std": 84.57878112792969, "step": 325 }, { "epoch": 0.6827225130890052, "fcm_dpo/beta": 0.011147797107696533, "fcm_dpo/delta": 0.03236812353134155, "fcm_dpo/margin": 51.00239562988281, "fcm_dpo/q_t": 0.38107889890670776, "grad_norm": 111.60675048828125, "learning_rate": 1.3953046172178413e-07, "logits/chosen": -0.9249294996261597, "logits/rejected": -0.9140468239784241, "logps/chosen": -322.252685546875, "logps/ref_chosen": -240.9871368408203, "logps/ref_rejected": -261.0238342285156, "logps/rejected": -393.291748046875, "loss": 4.2602, "margin_dpo/margin_mean": 51.00239562988281, "margin_dpo/margin_std": 76.8713607788086, "step": 326 }, { "epoch": 0.6848167539267016, "fcm_dpo/beta": 0.010913331992924213, "fcm_dpo/delta": -0.048158351331949234, "fcm_dpo/margin": 58.91179275512695, "fcm_dpo/q_t": 0.36473149061203003, "grad_norm": 80.92235565185547, "learning_rate": 1.3789110486146468e-07, "logits/chosen": -0.8732993006706238, "logits/rejected": -0.8531113862991333, "logps/chosen": -352.11798095703125, "logps/ref_chosen": -279.52001953125, "logps/ref_rejected": -269.51824951171875, "logps/rejected": -401.02801513671875, "loss": 3.9857, "margin_dpo/margin_mean": 58.91179275512695, "margin_dpo/margin_std": 74.66950225830078, "step": 327 }, { "epoch": 0.6869109947643979, "fcm_dpo/beta": 0.010607855394482613, "fcm_dpo/delta": 0.03731272369623184, "fcm_dpo/margin": 53.156368255615234, "fcm_dpo/q_t": 0.38036584854125977, "grad_norm": 105.32549285888672, "learning_rate": 1.362577600609588e-07, "logits/chosen": -0.8312807083129883, "logits/rejected": -0.8335475325584412, "logps/chosen": -384.21630859375, "logps/ref_chosen": -301.033447265625, "logps/ref_rejected": -284.2101135253906, "logps/rejected": -420.5493469238281, "loss": 4.1238, "margin_dpo/margin_mean": 53.156368255615234, "margin_dpo/margin_std": 68.87464141845703, "step": 328 }, { "epoch": 0.6890052356020943, "fcm_dpo/beta": 0.011003939434885979, "fcm_dpo/delta": -0.0023946845903992653, "fcm_dpo/margin": 54.590736389160156, "fcm_dpo/q_t": 0.3825053870677948, "grad_norm": 104.08448791503906, "learning_rate": 1.3463051491159093e-07, "logits/chosen": -0.8463307619094849, "logits/rejected": -0.8228050470352173, "logps/chosen": -409.5216369628906, "logps/ref_chosen": -319.9888610839844, "logps/ref_rejected": -307.5588684082031, "logps/rejected": -451.6824035644531, "loss": 4.2917, "margin_dpo/margin_mean": 54.590736389160156, "margin_dpo/margin_std": 86.76227569580078, "step": 329 }, { "epoch": 0.6910994764397905, "fcm_dpo/beta": 0.011315654963254929, "fcm_dpo/delta": 0.03985806554555893, "fcm_dpo/margin": 49.51683807373047, "fcm_dpo/q_t": 0.3813677728176117, "grad_norm": 110.84941864013672, "learning_rate": 1.3300945667758012e-07, "logits/chosen": -0.8280748128890991, "logits/rejected": -0.8414457440376282, "logps/chosen": -388.23834228515625, "logps/ref_chosen": -301.11474609375, "logps/ref_rejected": -299.673095703125, "logps/rejected": -436.3134765625, "loss": 4.1843, "margin_dpo/margin_mean": 49.51683807373047, "margin_dpo/margin_std": 68.95203399658203, "step": 330 }, { "epoch": 0.6931937172774869, "fcm_dpo/beta": 0.011244787834584713, "fcm_dpo/delta": 0.009060085751116276, "fcm_dpo/margin": 52.50252151489258, "fcm_dpo/q_t": 0.38197970390319824, "grad_norm": 180.15573120117188, "learning_rate": 1.3139467229135998e-07, "logits/chosen": -0.8683615922927856, "logits/rejected": -0.8551488518714905, "logps/chosen": -356.96160888671875, "logps/ref_chosen": -277.59149169921875, "logps/ref_rejected": -256.025634765625, "logps/rejected": -387.8982849121094, "loss": 4.3397, "margin_dpo/margin_mean": 52.50252151489258, "margin_dpo/margin_std": 86.5692138671875, "step": 331 }, { "epoch": 0.6952879581151833, "fcm_dpo/beta": 0.011165878735482693, "fcm_dpo/delta": -0.0032293088734149933, "fcm_dpo/margin": 53.855308532714844, "fcm_dpo/q_t": 0.37883564829826355, "grad_norm": 115.64824676513672, "learning_rate": 1.2978624834891626e-07, "logits/chosen": -0.865576982498169, "logits/rejected": -0.8442394137382507, "logps/chosen": -352.7916259765625, "logps/ref_chosen": -269.97369384765625, "logps/ref_rejected": -235.03164672851562, "logps/rejected": -371.70489501953125, "loss": 4.2568, "margin_dpo/margin_mean": 53.855308532714844, "margin_dpo/margin_std": 82.26235961914062, "step": 332 }, { "epoch": 0.6973821989528796, "fcm_dpo/beta": 0.011527864262461662, "fcm_dpo/delta": 0.004975374788045883, "fcm_dpo/margin": 47.826690673828125, "fcm_dpo/q_t": 0.38533908128738403, "grad_norm": 103.64472198486328, "learning_rate": 1.281842711051438e-07, "logits/chosen": -0.9276981949806213, "logits/rejected": -0.8942596316337585, "logps/chosen": -381.27313232421875, "logps/ref_chosen": -296.76300048828125, "logps/ref_rejected": -265.97991943359375, "logps/rejected": -398.31671142578125, "loss": 4.226, "margin_dpo/margin_mean": 47.826690673828125, "margin_dpo/margin_std": 69.83184051513672, "step": 333 }, { "epoch": 0.6994764397905759, "fcm_dpo/beta": 0.011430593207478523, "fcm_dpo/delta": -0.059879280626773834, "fcm_dpo/margin": 57.263526916503906, "fcm_dpo/q_t": 0.3644816279411316, "grad_norm": 102.11288452148438, "learning_rate": 1.2658882646922033e-07, "logits/chosen": -0.839641809463501, "logits/rejected": -0.8142789602279663, "logps/chosen": -379.44012451171875, "logps/ref_chosen": -301.0367431640625, "logps/ref_rejected": -268.87652587890625, "logps/rejected": -404.54339599609375, "loss": 4.0678, "margin_dpo/margin_mean": 57.263526916503906, "margin_dpo/margin_std": 76.02151489257812, "step": 334 }, { "epoch": 0.7015706806282722, "fcm_dpo/beta": 0.010619346983730793, "fcm_dpo/delta": -7.020309567451477e-05, "fcm_dpo/margin": 56.28835678100586, "fcm_dpo/q_t": 0.377109557390213, "grad_norm": 112.79352569580078, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -0.8311583399772644, "logits/rejected": -0.827835738658905, "logps/chosen": -365.9309997558594, "logps/ref_chosen": -276.13275146484375, "logps/ref_rejected": -243.44203186035156, "logps/rejected": -389.5285949707031, "loss": 4.245, "margin_dpo/margin_mean": 56.28835678100586, "margin_dpo/margin_std": 83.26809692382812, "step": 335 }, { "epoch": 0.7036649214659686, "fcm_dpo/beta": 0.010861254297196865, "fcm_dpo/delta": -0.004219849593937397, "fcm_dpo/margin": 50.423587799072266, "fcm_dpo/q_t": 0.3916303515434265, "grad_norm": 112.85342407226562, "learning_rate": 1.2341787690142435e-07, "logits/chosen": -0.839414119720459, "logits/rejected": -0.7747617959976196, "logps/chosen": -337.7890319824219, "logps/ref_chosen": -246.2626495361328, "logps/ref_rejected": -261.0617980957031, "logps/rejected": -403.0118408203125, "loss": 4.3563, "margin_dpo/margin_mean": 50.423587799072266, "margin_dpo/margin_std": 81.8228988647461, "step": 336 }, { "epoch": 0.7057591623036649, "fcm_dpo/beta": 0.010828062891960144, "fcm_dpo/delta": -0.06099002808332443, "fcm_dpo/margin": 60.63716125488281, "fcm_dpo/q_t": 0.3647434711456299, "grad_norm": 89.56388854980469, "learning_rate": 1.2184254201795363e-07, "logits/chosen": -0.8643673062324524, "logits/rejected": -0.8361295461654663, "logps/chosen": -350.9415283203125, "logps/ref_chosen": -266.9937744140625, "logps/ref_rejected": -253.015625, "logps/rejected": -397.60052490234375, "loss": 3.9831, "margin_dpo/margin_mean": 60.63715362548828, "margin_dpo/margin_std": 78.05126953125, "step": 337 }, { "epoch": 0.7078534031413612, "fcm_dpo/beta": 0.010671587660908699, "fcm_dpo/delta": 0.035113610327243805, "fcm_dpo/margin": 52.91345977783203, "fcm_dpo/q_t": 0.38380661606788635, "grad_norm": 123.61051177978516, "learning_rate": 1.202740798300168e-07, "logits/chosen": -0.8847794532775879, "logits/rejected": -0.867152214050293, "logps/chosen": -357.63946533203125, "logps/ref_chosen": -276.5925598144531, "logps/ref_rejected": -233.979248046875, "logps/rejected": -367.9396057128906, "loss": 4.264, "margin_dpo/margin_mean": 52.9134521484375, "margin_dpo/margin_std": 80.15204620361328, "step": 338 }, { "epoch": 0.7099476439790576, "fcm_dpo/beta": 0.010630465112626553, "fcm_dpo/delta": -0.030053602531552315, "fcm_dpo/margin": 58.987159729003906, "fcm_dpo/q_t": 0.36951741576194763, "grad_norm": 107.49327087402344, "learning_rate": 1.1871257444948096e-07, "logits/chosen": -0.8885621428489685, "logits/rejected": -0.8790793418884277, "logps/chosen": -392.2843933105469, "logps/ref_chosen": -303.5277404785156, "logps/ref_rejected": -283.11676025390625, "logps/rejected": -430.860595703125, "loss": 4.1489, "margin_dpo/margin_mean": 58.987159729003906, "margin_dpo/margin_std": 83.707763671875, "step": 339 }, { "epoch": 0.7120418848167539, "fcm_dpo/beta": 0.010451890528202057, "fcm_dpo/delta": -0.00904519110918045, "fcm_dpo/margin": 53.435546875, "fcm_dpo/q_t": 0.3880341053009033, "grad_norm": 126.61689758300781, "learning_rate": 1.1715810961514072e-07, "logits/chosen": -0.8415927886962891, "logits/rejected": -0.8401827812194824, "logps/chosen": -354.4100341796875, "logps/ref_chosen": -261.5257568359375, "logps/ref_rejected": -259.39862060546875, "logps/rejected": -405.7184143066406, "loss": 4.4717, "margin_dpo/margin_mean": 53.435546875, "margin_dpo/margin_std": 92.72853088378906, "step": 340 }, { "epoch": 0.7141361256544503, "fcm_dpo/beta": 0.010697471909224987, "fcm_dpo/delta": 0.08240307867527008, "fcm_dpo/margin": 40.59882354736328, "fcm_dpo/q_t": 0.4121650159358978, "grad_norm": 146.87213134765625, "learning_rate": 1.1561076868822755e-07, "logits/chosen": -0.86383056640625, "logits/rejected": -0.8326124548912048, "logps/chosen": -426.71337890625, "logps/ref_chosen": -315.903564453125, "logps/ref_rejected": -308.02392578125, "logps/rejected": -459.4324951171875, "loss": 4.9048, "margin_dpo/margin_mean": 40.59882354736328, "margin_dpo/margin_std": 90.46697235107422, "step": 341 }, { "epoch": 0.7162303664921466, "fcm_dpo/beta": 0.011284704320132732, "fcm_dpo/delta": -0.020554201677441597, "fcm_dpo/margin": 54.822906494140625, "fcm_dpo/q_t": 0.3675943613052368, "grad_norm": 101.63287353515625, "learning_rate": 1.1407063464793965e-07, "logits/chosen": -0.8525506258010864, "logits/rejected": -0.8500516414642334, "logps/chosen": -356.83026123046875, "logps/ref_chosen": -269.17864990234375, "logps/ref_rejected": -260.8977355957031, "logps/rejected": -403.3722229003906, "loss": 4.0639, "margin_dpo/margin_mean": 54.822906494140625, "margin_dpo/margin_std": 71.24502563476562, "step": 342 }, { "epoch": 0.7183246073298429, "fcm_dpo/beta": 0.01099632028490305, "fcm_dpo/delta": 0.047995634377002716, "fcm_dpo/margin": 50.445194244384766, "fcm_dpo/q_t": 0.3869495093822479, "grad_norm": 110.69547271728516, "learning_rate": 1.125377900869913e-07, "logits/chosen": -0.8448514938354492, "logits/rejected": -0.8279154896736145, "logps/chosen": -402.746826171875, "logps/ref_chosen": -310.719970703125, "logps/ref_rejected": -263.5224914550781, "logps/rejected": -405.9945373535156, "loss": 4.3061, "margin_dpo/margin_mean": 50.445194244384766, "margin_dpo/margin_std": 79.09446716308594, "step": 343 }, { "epoch": 0.7204188481675393, "fcm_dpo/beta": 0.011513441801071167, "fcm_dpo/delta": -0.023599928244948387, "fcm_dpo/margin": 53.812904357910156, "fcm_dpo/q_t": 0.3725927770137787, "grad_norm": 115.21056365966797, "learning_rate": 1.110123172071844e-07, "logits/chosen": -0.8441615104675293, "logits/rejected": -0.8278071880340576, "logps/chosen": -395.754150390625, "logps/ref_chosen": -301.7999267578125, "logps/ref_rejected": -257.9061584472656, "logps/rejected": -405.67327880859375, "loss": 4.225, "margin_dpo/margin_mean": 53.812904357910156, "margin_dpo/margin_std": 79.05390930175781, "step": 344 }, { "epoch": 0.7225130890052356, "fcm_dpo/beta": 0.011425694450736046, "fcm_dpo/delta": 0.05544426292181015, "fcm_dpo/margin": 47.61176300048828, "fcm_dpo/q_t": 0.387326180934906, "grad_norm": 137.6807098388672, "learning_rate": 1.09494297815e-07, "logits/chosen": -0.842475175857544, "logits/rejected": -0.842012345790863, "logps/chosen": -375.22137451171875, "logps/ref_chosen": -283.0184326171875, "logps/ref_rejected": -266.8457336425781, "logps/rejected": -406.660400390625, "loss": 4.2707, "margin_dpo/margin_mean": 47.61176300048828, "margin_dpo/margin_std": 67.73701477050781, "step": 345 }, { "epoch": 0.724607329842932, "fcm_dpo/beta": 0.011425861157476902, "fcm_dpo/delta": -0.07231096923351288, "fcm_dpo/margin": 58.19831848144531, "fcm_dpo/q_t": 0.36130863428115845, "grad_norm": 90.98426055908203, "learning_rate": 1.0798381331721107e-07, "logits/chosen": -0.9370063543319702, "logits/rejected": -0.8892075419425964, "logps/chosen": -366.372802734375, "logps/ref_chosen": -268.44122314453125, "logps/ref_rejected": -227.8225860595703, "logps/rejected": -383.952392578125, "loss": 4.1066, "margin_dpo/margin_mean": 58.19831848144531, "margin_dpo/margin_std": 78.29531860351562, "step": 346 }, { "epoch": 0.7267015706806282, "fcm_dpo/beta": 0.010861432179808617, "fcm_dpo/delta": -0.010385667905211449, "fcm_dpo/margin": 51.505802154541016, "fcm_dpo/q_t": 0.380726158618927, "grad_norm": 98.91201782226562, "learning_rate": 1.0648094471651722e-07, "logits/chosen": -0.7857590913772583, "logits/rejected": -0.8135133981704712, "logps/chosen": -364.33441162109375, "logps/ref_chosen": -273.70355224609375, "logps/ref_rejected": -243.65521240234375, "logps/rejected": -385.7918395996094, "loss": 4.2412, "margin_dpo/margin_mean": 51.505802154541016, "margin_dpo/margin_std": 73.8308334350586, "step": 347 }, { "epoch": 0.7287958115183246, "fcm_dpo/beta": 0.011517523787915707, "fcm_dpo/delta": 0.09734243154525757, "fcm_dpo/margin": 43.836822509765625, "fcm_dpo/q_t": 0.40036576986312866, "grad_norm": 90.68925476074219, "learning_rate": 1.0498577260720048e-07, "logits/chosen": -0.882627010345459, "logits/rejected": -0.8689060807228088, "logps/chosen": -374.64874267578125, "logps/ref_chosen": -285.64141845703125, "logps/ref_rejected": -265.6270446777344, "logps/rejected": -398.47119140625, "loss": 4.4704, "margin_dpo/margin_mean": 43.836822509765625, "margin_dpo/margin_std": 75.92671966552734, "step": 348 }, { "epoch": 0.7308900523560209, "fcm_dpo/beta": 0.011294200085103512, "fcm_dpo/delta": -0.10136254876852036, "fcm_dpo/margin": 61.32181930541992, "fcm_dpo/q_t": 0.36070722341537476, "grad_norm": 167.52288818359375, "learning_rate": 1.0349837717080347e-07, "logits/chosen": -0.8177285194396973, "logits/rejected": -0.8121789693832397, "logps/chosen": -418.723876953125, "logps/ref_chosen": -328.3175048828125, "logps/ref_rejected": -292.37872314453125, "logps/rejected": -444.10687255859375, "loss": 4.0752, "margin_dpo/margin_mean": 61.32181930541992, "margin_dpo/margin_std": 85.29824829101562, "step": 349 }, { "epoch": 0.7329842931937173, "fcm_dpo/beta": 0.011082770302891731, "fcm_dpo/delta": 0.0001247054897248745, "fcm_dpo/margin": 49.612892150878906, "fcm_dpo/q_t": 0.38704627752304077, "grad_norm": 104.57560729980469, "learning_rate": 1.0201883817182949e-07, "logits/chosen": -0.8255881071090698, "logits/rejected": -0.8411324620246887, "logps/chosen": -392.31982421875, "logps/ref_chosen": -292.8046569824219, "logps/ref_rejected": -250.35504150390625, "logps/rejected": -399.4831237792969, "loss": 4.4145, "margin_dpo/margin_mean": 49.61289978027344, "margin_dpo/margin_std": 82.11981201171875, "step": 350 }, { "epoch": 0.7350785340314137, "fcm_dpo/beta": 0.011276098899543285, "fcm_dpo/delta": 0.053270816802978516, "fcm_dpo/margin": 36.07026290893555, "fcm_dpo/q_t": 0.4199068546295166, "grad_norm": 156.5530242919922, "learning_rate": 1.0054723495346482e-07, "logits/chosen": -0.8887529373168945, "logits/rejected": -0.8778947591781616, "logps/chosen": -404.2864074707031, "logps/ref_chosen": -311.8890380859375, "logps/ref_rejected": -263.59033203125, "logps/rejected": -392.0579833984375, "loss": 4.9768, "margin_dpo/margin_mean": 36.07026290893555, "margin_dpo/margin_std": 87.96586608886719, "step": 351 }, { "epoch": 0.7371727748691099, "fcm_dpo/beta": 0.010786263272166252, "fcm_dpo/delta": -0.10786393284797668, "fcm_dpo/margin": 64.68121337890625, "fcm_dpo/q_t": 0.3563269376754761, "grad_norm": 107.21913146972656, "learning_rate": 9.908364643332398e-08, "logits/chosen": -0.8178911805152893, "logits/rejected": -0.7894106507301331, "logps/chosen": -341.0050354003906, "logps/ref_chosen": -254.9078826904297, "logps/ref_rejected": -257.1688232421875, "logps/rejected": -407.94720458984375, "loss": 3.9965, "margin_dpo/margin_mean": 64.68122100830078, "margin_dpo/margin_std": 83.1053466796875, "step": 352 }, { "epoch": 0.7392670157068063, "fcm_dpo/beta": 0.01031852513551712, "fcm_dpo/delta": 0.012895338237285614, "fcm_dpo/margin": 50.85631561279297, "fcm_dpo/q_t": 0.3920055627822876, "grad_norm": 121.84527587890625, "learning_rate": 9.76281510992176e-08, "logits/chosen": -0.836536169052124, "logits/rejected": -0.8306083679199219, "logps/chosen": -365.62139892578125, "logps/ref_chosen": -270.3760681152344, "logps/ref_rejected": -264.65234375, "logps/rejected": -410.7539367675781, "loss": 4.4158, "margin_dpo/margin_mean": 50.85631561279297, "margin_dpo/margin_std": 82.86323547363281, "step": 353 }, { "epoch": 0.7413612565445026, "fcm_dpo/beta": 0.01093815453350544, "fcm_dpo/delta": 0.10080662369728088, "fcm_dpo/margin": 37.49406433105469, "fcm_dpo/q_t": 0.4171503186225891, "grad_norm": 136.33518981933594, "learning_rate": 9.618082700494318e-08, "logits/chosen": -0.8385964632034302, "logits/rejected": -0.8738152384757996, "logps/chosen": -354.9613037109375, "logps/ref_chosen": -257.6485595703125, "logps/ref_rejected": -246.94203186035156, "logps/rejected": -381.74884033203125, "loss": 4.8602, "margin_dpo/margin_mean": 37.49407196044922, "margin_dpo/margin_std": 83.91649627685547, "step": 354 }, { "epoch": 0.743455497382199, "fcm_dpo/beta": 0.010349645279347897, "fcm_dpo/delta": -0.12334014475345612, "fcm_dpo/margin": 62.511470794677734, "fcm_dpo/q_t": 0.36480429768562317, "grad_norm": 95.06636047363281, "learning_rate": 9.474175176609956e-08, "logits/chosen": -0.8743699193000793, "logits/rejected": -0.875370979309082, "logps/chosen": -384.0447692871094, "logps/ref_chosen": -293.35333251953125, "logps/ref_rejected": -275.6051940917969, "logps/rejected": -428.80804443359375, "loss": 4.1536, "margin_dpo/margin_mean": 62.511470794677734, "margin_dpo/margin_std": 87.62345886230469, "step": 355 }, { "epoch": 0.7455497382198953, "fcm_dpo/beta": 0.01060514897108078, "fcm_dpo/delta": 0.06647256016731262, "fcm_dpo/margin": 40.10423278808594, "fcm_dpo/q_t": 0.4083283841609955, "grad_norm": 88.60588836669922, "learning_rate": 9.331100255592436e-08, "logits/chosen": -0.796362042427063, "logits/rejected": -0.8256345391273499, "logps/chosen": -293.0960388183594, "logps/ref_chosen": -204.25550842285156, "logps/ref_rejected": -213.467529296875, "logps/rejected": -342.41229248046875, "loss": 4.548, "margin_dpo/margin_mean": 40.10423278808594, "margin_dpo/margin_std": 67.21572875976562, "step": 356 }, { "epoch": 0.7476439790575916, "fcm_dpo/beta": 0.010486958548426628, "fcm_dpo/delta": -0.07790210843086243, "fcm_dpo/margin": 58.63288879394531, "fcm_dpo/q_t": 0.37552568316459656, "grad_norm": 98.0359115600586, "learning_rate": 9.18886561011557e-08, "logits/chosen": -0.7627823352813721, "logits/rejected": -0.7633357048034668, "logps/chosen": -362.4690246582031, "logps/ref_chosen": -266.3705749511719, "logps/ref_rejected": -239.04490661621094, "logps/rejected": -393.7762451171875, "loss": 4.2021, "margin_dpo/margin_mean": 58.63289260864258, "margin_dpo/margin_std": 85.52519226074219, "step": 357 }, { "epoch": 0.749738219895288, "fcm_dpo/beta": 0.009996353648602962, "fcm_dpo/delta": -0.06562351435422897, "fcm_dpo/margin": 66.09445190429688, "fcm_dpo/q_t": 0.3610166311264038, "grad_norm": 88.50709533691406, "learning_rate": 9.047478867791731e-08, "logits/chosen": -0.8669772148132324, "logits/rejected": -0.8496595621109009, "logps/chosen": -382.9401550292969, "logps/ref_chosen": -299.1474609375, "logps/ref_rejected": -257.2531433105469, "logps/rejected": -407.1402587890625, "loss": 4.0085, "margin_dpo/margin_mean": 66.09444427490234, "margin_dpo/margin_std": 85.0299072265625, "step": 358 }, { "epoch": 0.7518324607329843, "fcm_dpo/beta": 0.010169594548642635, "fcm_dpo/delta": 0.03530079498887062, "fcm_dpo/margin": 55.34699249267578, "fcm_dpo/q_t": 0.3793519139289856, "grad_norm": 106.86293029785156, "learning_rate": 8.906947610762825e-08, "logits/chosen": -0.8287184238433838, "logits/rejected": -0.8446385860443115, "logps/chosen": -390.8289794921875, "logps/ref_chosen": -302.99786376953125, "logps/ref_rejected": -260.4137268066406, "logps/rejected": -403.5918273925781, "loss": 4.1275, "margin_dpo/margin_mean": 55.34699249267578, "margin_dpo/margin_std": 72.260009765625, "step": 359 }, { "epoch": 0.7539267015706806, "fcm_dpo/beta": 0.010196023620665073, "fcm_dpo/delta": 0.05344226956367493, "fcm_dpo/margin": 48.37961196899414, "fcm_dpo/q_t": 0.39275315403938293, "grad_norm": 115.53006744384766, "learning_rate": 8.76727937529367e-08, "logits/chosen": -0.8422183394432068, "logits/rejected": -0.8362429141998291, "logps/chosen": -404.19610595703125, "logps/ref_chosen": -309.6114501953125, "logps/ref_rejected": -256.64031982421875, "logps/rejected": -399.6045837402344, "loss": 4.4924, "margin_dpo/margin_mean": 48.37961196899414, "margin_dpo/margin_std": 80.97713470458984, "step": 360 }, { "epoch": 0.7560209424083769, "fcm_dpo/beta": 0.010226656682789326, "fcm_dpo/delta": -0.05834663659334183, "fcm_dpo/margin": 64.01289367675781, "fcm_dpo/q_t": 0.36634212732315063, "grad_norm": 100.24212646484375, "learning_rate": 8.628481651367875e-08, "logits/chosen": -0.798484742641449, "logits/rejected": -0.7778838276863098, "logps/chosen": -340.5452575683594, "logps/ref_chosen": -263.3797607421875, "logps/ref_rejected": -271.18157958984375, "logps/rejected": -412.3599548339844, "loss": 4.1178, "margin_dpo/margin_mean": 64.01289367675781, "margin_dpo/margin_std": 91.16770935058594, "step": 361 }, { "epoch": 0.7581151832460733, "fcm_dpo/beta": 0.009908447042107582, "fcm_dpo/delta": 0.03887121379375458, "fcm_dpo/margin": 56.83973693847656, "fcm_dpo/q_t": 0.3782539367675781, "grad_norm": 90.40959930419922, "learning_rate": 8.490561882286135e-08, "logits/chosen": -0.8111223578453064, "logits/rejected": -0.8046758770942688, "logps/chosen": -389.388916015625, "logps/ref_chosen": -303.2583923339844, "logps/ref_rejected": -243.22891235351562, "logps/rejected": -386.1991271972656, "loss": 4.0886, "margin_dpo/margin_mean": 56.83973693847656, "margin_dpo/margin_std": 72.43896484375, "step": 362 }, { "epoch": 0.7602094240837697, "fcm_dpo/beta": 0.010420668870210648, "fcm_dpo/delta": 0.029644204303622246, "fcm_dpo/margin": 54.629852294921875, "fcm_dpo/q_t": 0.38353899121284485, "grad_norm": 97.74794006347656, "learning_rate": 8.353527464267104e-08, "logits/chosen": -0.8362611532211304, "logits/rejected": -0.7902975678443909, "logps/chosen": -395.0887451171875, "logps/ref_chosen": -303.34722900390625, "logps/ref_rejected": -262.05419921875, "logps/rejected": -408.4255676269531, "loss": 4.3097, "margin_dpo/margin_mean": 54.62985610961914, "margin_dpo/margin_std": 84.8177261352539, "step": 363 }, { "epoch": 0.762303664921466, "fcm_dpo/beta": 0.010761250741779804, "fcm_dpo/delta": 0.1065862700343132, "fcm_dpo/margin": 46.43891143798828, "fcm_dpo/q_t": 0.3989133834838867, "grad_norm": 97.99282836914062, "learning_rate": 8.217385746050742e-08, "logits/chosen": -0.806189239025116, "logits/rejected": -0.8205310702323914, "logps/chosen": -395.6390075683594, "logps/ref_chosen": -285.54376220703125, "logps/ref_rejected": -284.84619140625, "logps/rejected": -441.38031005859375, "loss": 4.6575, "margin_dpo/margin_mean": 46.43891143798828, "margin_dpo/margin_std": 89.2318115234375, "step": 364 }, { "epoch": 0.7643979057591623, "fcm_dpo/beta": 0.011114663444459438, "fcm_dpo/delta": -0.062224358320236206, "fcm_dpo/margin": 54.65919876098633, "fcm_dpo/q_t": 0.37894606590270996, "grad_norm": 99.08690643310547, "learning_rate": 8.082144028504231e-08, "logits/chosen": -0.8273904323577881, "logits/rejected": -0.8326528668403625, "logps/chosen": -370.6837158203125, "logps/ref_chosen": -274.7878112792969, "logps/ref_rejected": -256.5738220214844, "logps/rejected": -407.1288757324219, "loss": 4.2368, "margin_dpo/margin_mean": 54.6591911315918, "margin_dpo/margin_std": 82.35843658447266, "step": 365 }, { "epoch": 0.7664921465968586, "fcm_dpo/beta": 0.010609567165374756, "fcm_dpo/delta": -0.053685709834098816, "fcm_dpo/margin": 61.24087905883789, "fcm_dpo/q_t": 0.3654269874095917, "grad_norm": 92.26556396484375, "learning_rate": 7.947809564230445e-08, "logits/chosen": -0.7945237159729004, "logits/rejected": -0.8086446523666382, "logps/chosen": -376.56878662109375, "logps/ref_chosen": -286.6496276855469, "logps/ref_rejected": -251.97140502929688, "logps/rejected": -403.1314697265625, "loss": 4.0641, "margin_dpo/margin_mean": 61.24087905883789, "margin_dpo/margin_std": 84.4163818359375, "step": 366 }, { "epoch": 0.768586387434555, "fcm_dpo/beta": 0.009982116520404816, "fcm_dpo/delta": -0.006225086748600006, "fcm_dpo/margin": 60.51777648925781, "fcm_dpo/q_t": 0.3717145025730133, "grad_norm": 107.34187316894531, "learning_rate": 7.814389557179016e-08, "logits/chosen": -0.7962571382522583, "logits/rejected": -0.7791531085968018, "logps/chosen": -392.7152099609375, "logps/ref_chosen": -301.9449768066406, "logps/ref_rejected": -265.5677185058594, "logps/rejected": -416.85565185546875, "loss": 4.0549, "margin_dpo/margin_mean": 60.51777648925781, "margin_dpo/margin_std": 78.20709228515625, "step": 367 }, { "epoch": 0.7706806282722513, "fcm_dpo/beta": 0.009898173622786999, "fcm_dpo/delta": -0.10373516380786896, "fcm_dpo/margin": 70.26689147949219, "fcm_dpo/q_t": 0.3502688705921173, "grad_norm": 72.27176666259766, "learning_rate": 7.681891162260015e-08, "logits/chosen": -0.7817418575286865, "logits/rejected": -0.7951399683952332, "logps/chosen": -379.85211181640625, "logps/ref_chosen": -294.62652587890625, "logps/ref_rejected": -258.7628479003906, "logps/rejected": -414.2553405761719, "loss": 3.7321, "margin_dpo/margin_mean": 70.26689910888672, "margin_dpo/margin_std": 73.98336791992188, "step": 368 }, { "epoch": 0.7727748691099476, "fcm_dpo/beta": 0.009732791222631931, "fcm_dpo/delta": 0.07320413738489151, "fcm_dpo/margin": 54.49217987060547, "fcm_dpo/q_t": 0.38609111309051514, "grad_norm": 93.12295532226562, "learning_rate": 7.550321484960251e-08, "logits/chosen": -0.8595657348632812, "logits/rejected": -0.8424580097198486, "logps/chosen": -375.50518798828125, "logps/ref_chosen": -282.5057373046875, "logps/ref_rejected": -266.41607666015625, "logps/rejected": -413.9076843261719, "loss": 4.2228, "margin_dpo/margin_mean": 54.49217987060547, "margin_dpo/margin_std": 75.60814666748047, "step": 369 }, { "epoch": 0.774869109947644, "fcm_dpo/beta": 0.009743990376591682, "fcm_dpo/delta": -0.03128061443567276, "fcm_dpo/margin": 64.41328430175781, "fcm_dpo/q_t": 0.36713510751724243, "grad_norm": 76.50689697265625, "learning_rate": 7.419687580962222e-08, "logits/chosen": -0.8467559218406677, "logits/rejected": -0.8696060180664062, "logps/chosen": -336.12493896484375, "logps/ref_chosen": -251.00640869140625, "logps/ref_rejected": -238.12542724609375, "logps/rejected": -387.6571960449219, "loss": 4.0569, "margin_dpo/margin_mean": 64.41327667236328, "margin_dpo/margin_std": 86.21026611328125, "step": 370 }, { "epoch": 0.7769633507853403, "fcm_dpo/beta": 0.010253066197037697, "fcm_dpo/delta": 0.0842670351266861, "fcm_dpo/margin": 50.67913818359375, "fcm_dpo/q_t": 0.3899438977241516, "grad_norm": 123.32413482666016, "learning_rate": 7.289996455765748e-08, "logits/chosen": -0.7954655885696411, "logits/rejected": -0.7937295436859131, "logps/chosen": -393.655029296875, "logps/ref_chosen": -296.6591491699219, "logps/ref_rejected": -251.14675903320312, "logps/rejected": -398.8217468261719, "loss": 4.3229, "margin_dpo/margin_mean": 50.679134368896484, "margin_dpo/margin_std": 76.93683624267578, "step": 371 }, { "epoch": 0.7790575916230367, "fcm_dpo/beta": 0.010081680491566658, "fcm_dpo/delta": -0.051262035965919495, "fcm_dpo/margin": 63.9815788269043, "fcm_dpo/q_t": 0.365226149559021, "grad_norm": 83.84464263916016, "learning_rate": 7.161255064312283e-08, "logits/chosen": -0.7702327370643616, "logits/rejected": -0.7675243020057678, "logps/chosen": -424.36273193359375, "logps/ref_chosen": -331.3714599609375, "logps/ref_rejected": -285.56805419921875, "logps/rejected": -442.5409240722656, "loss": 4.0537, "margin_dpo/margin_mean": 63.98158645629883, "margin_dpo/margin_std": 84.29653930664062, "step": 372 }, { "epoch": 0.7811518324607329, "fcm_dpo/beta": 0.009867929853498936, "fcm_dpo/delta": -0.005473626311868429, "fcm_dpo/margin": 61.17123031616211, "fcm_dpo/q_t": 0.3673017919063568, "grad_norm": 91.18738555908203, "learning_rate": 7.033470310611945e-08, "logits/chosen": -0.8663382530212402, "logits/rejected": -0.843439519405365, "logps/chosen": -405.7951354980469, "logps/ref_chosen": -321.9429931640625, "logps/ref_rejected": -271.2288513183594, "logps/rejected": -416.25225830078125, "loss": 4.0013, "margin_dpo/margin_mean": 61.171226501464844, "margin_dpo/margin_std": 72.36864471435547, "step": 373 }, { "epoch": 0.7832460732984293, "fcm_dpo/beta": 0.010404913686215878, "fcm_dpo/delta": 0.08060853183269501, "fcm_dpo/margin": 50.20440673828125, "fcm_dpo/q_t": 0.39187973737716675, "grad_norm": 74.0364990234375, "learning_rate": 6.906649047373245e-08, "logits/chosen": -0.8531290292739868, "logits/rejected": -0.8525895476341248, "logps/chosen": -410.4658203125, "logps/ref_chosen": -319.1685485839844, "logps/ref_rejected": -284.6263732910156, "logps/rejected": -426.1280212402344, "loss": 4.3572, "margin_dpo/margin_mean": 50.204410552978516, "margin_dpo/margin_std": 79.54742431640625, "step": 374 }, { "epoch": 0.7853403141361257, "fcm_dpo/beta": 0.010893258266150951, "fcm_dpo/delta": 0.029163816943764687, "fcm_dpo/margin": 47.64557647705078, "fcm_dpo/q_t": 0.39366090297698975, "grad_norm": 104.41280364990234, "learning_rate": 6.780798075635675e-08, "logits/chosen": -0.8502262830734253, "logits/rejected": -0.8328761458396912, "logps/chosen": -412.74224853515625, "logps/ref_chosen": -314.87579345703125, "logps/ref_rejected": -259.1965026855469, "logps/rejected": -404.70849609375, "loss": 4.457, "margin_dpo/margin_mean": 47.64557647705078, "margin_dpo/margin_std": 81.04498291015625, "step": 375 }, { "epoch": 0.787434554973822, "fcm_dpo/beta": 0.010992622934281826, "fcm_dpo/delta": -0.005566142499446869, "fcm_dpo/margin": 54.897132873535156, "fcm_dpo/q_t": 0.3781394064426422, "grad_norm": 112.78710174560547, "learning_rate": 6.655924144404906e-08, "logits/chosen": -0.8241918087005615, "logits/rejected": -0.832420825958252, "logps/chosen": -385.7311096191406, "logps/ref_chosen": -287.6732482910156, "logps/ref_rejected": -256.6697082519531, "logps/rejected": -409.6247253417969, "loss": 4.2815, "margin_dpo/margin_mean": 54.897132873535156, "margin_dpo/margin_std": 85.11792755126953, "step": 376 }, { "epoch": 0.7895287958115184, "fcm_dpo/beta": 0.01128990575671196, "fcm_dpo/delta": 0.05416107177734375, "fcm_dpo/margin": 38.42709732055664, "fcm_dpo/q_t": 0.41005009412765503, "grad_norm": 113.04798889160156, "learning_rate": 6.532033950290885e-08, "logits/chosen": -0.8132824897766113, "logits/rejected": -0.8157401084899902, "logps/chosen": -409.5943298339844, "logps/ref_chosen": -305.261474609375, "logps/ref_rejected": -271.8887023925781, "logps/rejected": -414.6486511230469, "loss": 4.8146, "margin_dpo/margin_mean": 38.427101135253906, "margin_dpo/margin_std": 82.6573486328125, "step": 377 }, { "epoch": 0.7916230366492146, "fcm_dpo/beta": 0.011431505903601646, "fcm_dpo/delta": 0.02444280870258808, "fcm_dpo/margin": 46.53923416137695, "fcm_dpo/q_t": 0.39048752188682556, "grad_norm": 110.65091705322266, "learning_rate": 6.409134137148736e-08, "logits/chosen": -0.8158414363861084, "logits/rejected": -0.8029335737228394, "logps/chosen": -378.9710388183594, "logps/ref_chosen": -281.5295715332031, "logps/ref_rejected": -296.980224609375, "logps/rejected": -440.9609069824219, "loss": 4.3736, "margin_dpo/margin_mean": 46.53923416137695, "margin_dpo/margin_std": 74.37672424316406, "step": 378 }, { "epoch": 0.793717277486911, "fcm_dpo/beta": 0.011517210863530636, "fcm_dpo/delta": 0.008636513724923134, "fcm_dpo/margin": 51.24773406982422, "fcm_dpo/q_t": 0.38045018911361694, "grad_norm": 115.9316635131836, "learning_rate": 6.28723129572247e-08, "logits/chosen": -0.8725168704986572, "logits/rejected": -0.8533939123153687, "logps/chosen": -355.72607421875, "logps/ref_chosen": -265.0807800292969, "logps/ref_rejected": -230.58932495117188, "logps/rejected": -372.4823303222656, "loss": 4.336, "margin_dpo/margin_mean": 51.247737884521484, "margin_dpo/margin_std": 82.44475555419922, "step": 379 }, { "epoch": 0.7958115183246073, "fcm_dpo/beta": 0.011387725360691547, "fcm_dpo/delta": -0.07844444364309311, "fcm_dpo/margin": 53.415035247802734, "fcm_dpo/q_t": 0.37550121545791626, "grad_norm": 118.78459167480469, "learning_rate": 6.166331963291519e-08, "logits/chosen": -0.8518512845039368, "logits/rejected": -0.8342669010162354, "logps/chosen": -403.8897399902344, "logps/ref_chosen": -305.90838623046875, "logps/ref_rejected": -286.5906677246094, "logps/rejected": -437.987060546875, "loss": 4.2305, "margin_dpo/margin_mean": 53.4150390625, "margin_dpo/margin_std": 78.6050033569336, "step": 380 }, { "epoch": 0.7979057591623037, "fcm_dpo/beta": 0.011148151010274887, "fcm_dpo/delta": -0.023326825350522995, "fcm_dpo/margin": 55.73320388793945, "fcm_dpo/q_t": 0.3738594651222229, "grad_norm": 100.60095977783203, "learning_rate": 6.046442623320145e-08, "logits/chosen": -0.8115476369857788, "logits/rejected": -0.7750450372695923, "logps/chosen": -346.8482666015625, "logps/ref_chosen": -252.87066650390625, "logps/ref_rejected": -261.1927490234375, "logps/rejected": -410.9035339355469, "loss": 4.1363, "margin_dpo/margin_mean": 55.73320007324219, "margin_dpo/margin_std": 79.52117919921875, "step": 381 }, { "epoch": 0.8, "fcm_dpo/beta": 0.010607222095131874, "fcm_dpo/delta": -0.08224906027317047, "fcm_dpo/margin": 63.71092987060547, "fcm_dpo/q_t": 0.35713696479797363, "grad_norm": 90.26844024658203, "learning_rate": 5.9275697051098275e-08, "logits/chosen": -0.8479326963424683, "logits/rejected": -0.8441295623779297, "logps/chosen": -379.1060791015625, "logps/ref_chosen": -289.2114562988281, "logps/ref_rejected": -278.45751953125, "logps/rejected": -432.06298828125, "loss": 3.9126, "margin_dpo/margin_mean": 63.71092224121094, "margin_dpo/margin_std": 76.458740234375, "step": 382 }, { "epoch": 0.8020942408376963, "fcm_dpo/beta": 0.010085361078381538, "fcm_dpo/delta": -0.03180404752492905, "fcm_dpo/margin": 57.359859466552734, "fcm_dpo/q_t": 0.37756213545799255, "grad_norm": 110.38035583496094, "learning_rate": 5.809719583454414e-08, "logits/chosen": -0.8326891660690308, "logits/rejected": -0.8139215111732483, "logps/chosen": -362.66534423828125, "logps/ref_chosen": -273.630859375, "logps/ref_rejected": -261.44024658203125, "logps/rejected": -407.83465576171875, "loss": 4.187, "margin_dpo/margin_mean": 57.359867095947266, "margin_dpo/margin_std": 80.45777893066406, "step": 383 }, { "epoch": 0.8041884816753927, "fcm_dpo/beta": 0.010192757472395897, "fcm_dpo/delta": 0.057503946125507355, "fcm_dpo/margin": 48.359901428222656, "fcm_dpo/q_t": 0.3959079384803772, "grad_norm": 82.36161804199219, "learning_rate": 5.6928985782982524e-08, "logits/chosen": -0.8383417725563049, "logits/rejected": -0.8363715410232544, "logps/chosen": -369.3538818359375, "logps/ref_chosen": -274.5699462890625, "logps/ref_rejected": -285.8253479003906, "logps/rejected": -428.9691467285156, "loss": 4.4124, "margin_dpo/margin_mean": 48.35989761352539, "margin_dpo/margin_std": 78.35002899169922, "step": 384 }, { "epoch": 0.806282722513089, "fcm_dpo/beta": 0.010394207201898098, "fcm_dpo/delta": 0.002360312268137932, "fcm_dpo/margin": 52.75077819824219, "fcm_dpo/q_t": 0.3835112154483795, "grad_norm": 88.78260803222656, "learning_rate": 5.57711295439732e-08, "logits/chosen": -0.7930533289909363, "logits/rejected": -0.794459342956543, "logps/chosen": -380.3506774902344, "logps/ref_chosen": -284.150634765625, "logps/ref_rejected": -244.87921142578125, "logps/rejected": -393.8300476074219, "loss": 4.204, "margin_dpo/margin_mean": 52.75077819824219, "margin_dpo/margin_std": 73.82457733154297, "step": 385 }, { "epoch": 0.8083769633507853, "fcm_dpo/beta": 0.009621590375900269, "fcm_dpo/delta": -0.12054447084665298, "fcm_dpo/margin": 67.64607238769531, "fcm_dpo/q_t": 0.35985732078552246, "grad_norm": 86.24301147460938, "learning_rate": 5.4623689209832484e-08, "logits/chosen": -0.785068929195404, "logits/rejected": -0.7856448888778687, "logps/chosen": -407.9813537597656, "logps/ref_chosen": -320.1762390136719, "logps/ref_rejected": -302.05023193359375, "logps/rejected": -457.50140380859375, "loss": 3.8964, "margin_dpo/margin_mean": 67.64607238769531, "margin_dpo/margin_std": 75.94105529785156, "step": 386 }, { "epoch": 0.8104712041884817, "fcm_dpo/beta": 0.009473450481891632, "fcm_dpo/delta": 0.00044431351125240326, "fcm_dpo/margin": 57.35693359375, "fcm_dpo/q_t": 0.3816065788269043, "grad_norm": 81.3988037109375, "learning_rate": 5.3486726314303175e-08, "logits/chosen": -0.8247092962265015, "logits/rejected": -0.8297352194786072, "logps/chosen": -366.90478515625, "logps/ref_chosen": -272.2801513671875, "logps/ref_rejected": -265.1615905761719, "logps/rejected": -417.14312744140625, "loss": 4.2071, "margin_dpo/margin_mean": 57.356929779052734, "margin_dpo/margin_std": 78.60884094238281, "step": 387 }, { "epoch": 0.812565445026178, "fcm_dpo/beta": 0.009732890874147415, "fcm_dpo/delta": 0.09739725291728973, "fcm_dpo/margin": 41.571998596191406, "fcm_dpo/q_t": 0.41511738300323486, "grad_norm": 105.03797149658203, "learning_rate": 5.2360301829254745e-08, "logits/chosen": -0.8036607503890991, "logits/rejected": -0.7966702580451965, "logps/chosen": -378.40643310546875, "logps/ref_chosen": -272.5313415527344, "logps/ref_rejected": -239.55735778808594, "logps/rejected": -387.00445556640625, "loss": 4.7304, "margin_dpo/margin_mean": 41.571998596191406, "margin_dpo/margin_std": 82.30770874023438, "step": 388 }, { "epoch": 0.8146596858638744, "fcm_dpo/beta": 0.009908687323331833, "fcm_dpo/delta": -0.03899161145091057, "fcm_dpo/margin": 52.967166900634766, "fcm_dpo/q_t": 0.3908618688583374, "grad_norm": 86.0737075805664, "learning_rate": 5.1244476161413806e-08, "logits/chosen": -0.8428322076797485, "logits/rejected": -0.8418431878089905, "logps/chosen": -380.26837158203125, "logps/ref_chosen": -281.0892639160156, "logps/ref_rejected": -246.50045776367188, "logps/rejected": -398.646728515625, "loss": 4.3874, "margin_dpo/margin_mean": 52.96717071533203, "margin_dpo/margin_std": 83.29864501953125, "step": 389 }, { "epoch": 0.8167539267015707, "fcm_dpo/beta": 0.010237889364361763, "fcm_dpo/delta": 0.0414692722260952, "fcm_dpo/margin": 54.53743362426758, "fcm_dpo/q_t": 0.3818510174751282, "grad_norm": 83.15040588378906, "learning_rate": 5.013930914912476e-08, "logits/chosen": -0.852079451084137, "logits/rejected": -0.8583500981330872, "logps/chosen": -382.2542419433594, "logps/ref_chosen": -283.98748779296875, "logps/ref_rejected": -283.465087890625, "logps/rejected": -436.26922607421875, "loss": 4.2404, "margin_dpo/margin_mean": 54.53743362426758, "margin_dpo/margin_std": 78.8434066772461, "step": 390 }, { "epoch": 0.818848167539267, "fcm_dpo/beta": 0.009870692156255245, "fcm_dpo/delta": -0.02596093714237213, "fcm_dpo/margin": 57.44294738769531, "fcm_dpo/q_t": 0.38027477264404297, "grad_norm": 101.38391876220703, "learning_rate": 4.904486005914027e-08, "logits/chosen": -0.7972782850265503, "logits/rejected": -0.7920839190483093, "logps/chosen": -389.5296325683594, "logps/ref_chosen": -283.86138916015625, "logps/ref_rejected": -263.5093688964844, "logps/rejected": -426.6205749511719, "loss": 4.1996, "margin_dpo/margin_mean": 57.44294357299805, "margin_dpo/margin_std": 80.77877807617188, "step": 391 }, { "epoch": 0.8209424083769633, "fcm_dpo/beta": 0.009380877017974854, "fcm_dpo/delta": -0.06458516418933868, "fcm_dpo/margin": 69.83071899414062, "fcm_dpo/q_t": 0.3584578335285187, "grad_norm": 85.31986236572266, "learning_rate": 4.796118758344353e-08, "logits/chosen": -0.7884517312049866, "logits/rejected": -0.8135141730308533, "logps/chosen": -403.9764099121094, "logps/ref_chosen": -310.070068359375, "logps/ref_rejected": -252.89817810058594, "logps/rejected": -416.63519287109375, "loss": 3.8778, "margin_dpo/margin_mean": 69.83071899414062, "margin_dpo/margin_std": 76.77499389648438, "step": 392 }, { "epoch": 0.8230366492146597, "fcm_dpo/beta": 0.010007185861468315, "fcm_dpo/delta": 0.061848465353250504, "fcm_dpo/margin": 53.800540924072266, "fcm_dpo/q_t": 0.3861052393913269, "grad_norm": 114.60466766357422, "learning_rate": 4.688834983610082e-08, "logits/chosen": -0.8373547792434692, "logits/rejected": -0.8279107213020325, "logps/chosen": -378.2579040527344, "logps/ref_chosen": -286.7156677246094, "logps/ref_rejected": -230.00357055664062, "logps/rejected": -375.34637451171875, "loss": 4.2635, "margin_dpo/margin_mean": 53.800537109375, "margin_dpo/margin_std": 79.83255767822266, "step": 393 }, { "epoch": 0.8251308900523561, "fcm_dpo/beta": 0.010027028620243073, "fcm_dpo/delta": 0.04232503101229668, "fcm_dpo/margin": 49.44554901123047, "fcm_dpo/q_t": 0.39943477511405945, "grad_norm": 78.94566345214844, "learning_rate": 4.582640435014459e-08, "logits/chosen": -0.8648529052734375, "logits/rejected": -0.8650112152099609, "logps/chosen": -419.1053161621094, "logps/ref_chosen": -325.9934387207031, "logps/ref_rejected": -317.42706298828125, "logps/rejected": -459.9844970703125, "loss": 4.4582, "margin_dpo/margin_mean": 49.4455451965332, "margin_dpo/margin_std": 82.50776672363281, "step": 394 }, { "epoch": 0.8272251308900523, "fcm_dpo/beta": 0.010306437499821186, "fcm_dpo/delta": -0.029076773673295975, "fcm_dpo/margin": 60.80992889404297, "fcm_dpo/q_t": 0.37129712104797363, "grad_norm": 75.36946868896484, "learning_rate": 4.477540807448832e-08, "logits/chosen": -0.8019086122512817, "logits/rejected": -0.8114342093467712, "logps/chosen": -360.0174560546875, "logps/ref_chosen": -268.90081787109375, "logps/ref_rejected": -272.85809326171875, "logps/rejected": -424.7846984863281, "loss": 4.0261, "margin_dpo/margin_mean": 60.8099250793457, "margin_dpo/margin_std": 79.55804443359375, "step": 395 }, { "epoch": 0.8293193717277487, "fcm_dpo/beta": 0.010009893216192722, "fcm_dpo/delta": -0.016297191381454468, "fcm_dpo/margin": 55.63848114013672, "fcm_dpo/q_t": 0.3808242976665497, "grad_norm": 90.71900177001953, "learning_rate": 4.373541737087263e-08, "logits/chosen": -0.8296109437942505, "logits/rejected": -0.8163138628005981, "logps/chosen": -384.39410400390625, "logps/ref_chosen": -291.19830322265625, "logps/ref_rejected": -253.2803955078125, "logps/rejected": -402.11468505859375, "loss": 4.1991, "margin_dpo/margin_mean": 55.63848114013672, "margin_dpo/margin_std": 76.70056915283203, "step": 396 }, { "epoch": 0.831413612565445, "fcm_dpo/beta": 0.009893465787172318, "fcm_dpo/delta": -0.023841019719839096, "fcm_dpo/margin": 49.24290466308594, "fcm_dpo/q_t": 0.39738088846206665, "grad_norm": 90.27240753173828, "learning_rate": 4.270648801084295e-08, "logits/chosen": -0.8341606259346008, "logits/rejected": -0.8116894960403442, "logps/chosen": -400.8775939941406, "logps/ref_chosen": -309.8224182128906, "logps/ref_rejected": -291.9057922363281, "logps/rejected": -432.20391845703125, "loss": 4.5174, "margin_dpo/margin_mean": 49.24290466308594, "margin_dpo/margin_std": 83.20286560058594, "step": 397 }, { "epoch": 0.8335078534031414, "fcm_dpo/beta": 0.009832684881985188, "fcm_dpo/delta": 0.07155661284923553, "fcm_dpo/margin": 46.61724853515625, "fcm_dpo/q_t": 0.40298062562942505, "grad_norm": 107.44989776611328, "learning_rate": 4.168867517275806e-08, "logits/chosen": -0.7414498925209045, "logits/rejected": -0.7821962833404541, "logps/chosen": -398.432861328125, "logps/ref_chosen": -297.8135070800781, "logps/ref_rejected": -270.5025634765625, "logps/rejected": -417.7391662597656, "loss": 4.726, "margin_dpo/margin_mean": 46.61724853515625, "margin_dpo/margin_std": 91.47262573242188, "step": 398 }, { "epoch": 0.8356020942408376, "fcm_dpo/beta": 0.010510783642530441, "fcm_dpo/delta": 0.05873828008770943, "fcm_dpo/margin": 51.76239776611328, "fcm_dpo/q_t": 0.38723278045654297, "grad_norm": 91.62894439697266, "learning_rate": 4.0682033438831584e-08, "logits/chosen": -0.8432673811912537, "logits/rejected": -0.80589359998703, "logps/chosen": -392.64324951171875, "logps/ref_chosen": -292.8467712402344, "logps/ref_rejected": -268.3638916015625, "logps/rejected": -419.9228210449219, "loss": 4.3291, "margin_dpo/margin_mean": 51.76239776611328, "margin_dpo/margin_std": 81.01602172851562, "step": 399 }, { "epoch": 0.837696335078534, "fcm_dpo/beta": 0.010746605694293976, "fcm_dpo/delta": 0.025092536583542824, "fcm_dpo/margin": 53.3397331237793, "fcm_dpo/q_t": 0.3807898461818695, "grad_norm": 134.78067016601562, "learning_rate": 3.968661679220467e-08, "logits/chosen": -0.8801178932189941, "logits/rejected": -0.8791629672050476, "logps/chosen": -358.38555908203125, "logps/ref_chosen": -263.6763916015625, "logps/ref_rejected": -258.67266845703125, "logps/rejected": -406.7215881347656, "loss": 4.3167, "margin_dpo/margin_mean": 53.3397331237793, "margin_dpo/margin_std": 79.3423080444336, "step": 400 }, { "epoch": 0.837696335078534, "eval_fcm_dpo/beta": 0.011007222346961498, "eval_logits/chosen": -0.840668797492981, "eval_logits/rejected": -0.8345889449119568, "eval_logps/chosen": -383.9891357421875, "eval_logps/ref_chosen": -287.8267517089844, "eval_logps/ref_rejected": -266.9313659667969, "eval_logps/rejected": -417.3312072753906, "eval_loss": 0.5351805090904236, "eval_margin_dpo/margin_mean": 54.237510681152344, "eval_margin_dpo/margin_std": 83.07901763916016, "eval_runtime": 81.6128, "eval_samples_per_second": 24.506, "eval_steps_per_second": 1.532, "step": 400 }, { "epoch": 0.8397905759162304, "fcm_dpo/beta": 0.01083466224372387, "fcm_dpo/delta": -0.03301737830042839, "fcm_dpo/margin": 58.17015075683594, "fcm_dpo/q_t": 0.3694632053375244, "grad_norm": 130.7917022705078, "learning_rate": 3.8702478614051345e-08, "logits/chosen": -0.8163310289382935, "logits/rejected": -0.8166416883468628, "logps/chosen": -411.48193359375, "logps/ref_chosen": -318.2853088378906, "logps/ref_rejected": -293.75225830078125, "logps/rejected": -445.11895751953125, "loss": 4.0959, "margin_dpo/margin_mean": 58.1701545715332, "margin_dpo/margin_std": 81.16681671142578, "step": 401 }, { "epoch": 0.8418848167539267, "fcm_dpo/beta": 0.010784516111016273, "fcm_dpo/delta": 0.002740806434303522, "fcm_dpo/margin": 55.34971237182617, "fcm_dpo/q_t": 0.37727218866348267, "grad_norm": 109.63217163085938, "learning_rate": 3.772967168071517e-08, "logits/chosen": -0.8767110705375671, "logits/rejected": -0.8513585329055786, "logps/chosen": -398.0880126953125, "logps/ref_chosen": -309.4278564453125, "logps/ref_rejected": -282.0279846191406, "logps/rejected": -426.037841796875, "loss": 4.1861, "margin_dpo/margin_mean": 55.34970474243164, "margin_dpo/margin_std": 82.14326477050781, "step": 402 }, { "epoch": 0.8439790575916231, "fcm_dpo/beta": 0.010111565701663494, "fcm_dpo/delta": -0.1565774530172348, "fcm_dpo/margin": 73.72638702392578, "fcm_dpo/q_t": 0.34308868646621704, "grad_norm": 77.10204315185547, "learning_rate": 3.676824816087978e-08, "logits/chosen": -0.8601398468017578, "logits/rejected": -0.8417026996612549, "logps/chosen": -399.95440673828125, "logps/ref_chosen": -309.0284729003906, "logps/ref_rejected": -272.9622497558594, "logps/rejected": -437.61456298828125, "loss": 3.6889, "margin_dpo/margin_mean": 73.72638702392578, "margin_dpo/margin_std": 79.83676147460938, "step": 403 }, { "epoch": 0.8460732984293193, "fcm_dpo/beta": 0.009832248091697693, "fcm_dpo/delta": 0.06779766827821732, "fcm_dpo/margin": 54.37714385986328, "fcm_dpo/q_t": 0.3864296078681946, "grad_norm": 93.5862045288086, "learning_rate": 3.581825961277074e-08, "logits/chosen": -0.88753741979599, "logits/rejected": -0.8670026063919067, "logps/chosen": -398.1080627441406, "logps/ref_chosen": -297.2837219238281, "logps/ref_rejected": -256.99041748046875, "logps/rejected": -412.1919250488281, "loss": 4.3263, "margin_dpo/margin_mean": 54.377140045166016, "margin_dpo/margin_std": 83.19239044189453, "step": 404 }, { "epoch": 0.8481675392670157, "fcm_dpo/beta": 0.010070566087961197, "fcm_dpo/delta": -0.0018516681157052517, "fcm_dpo/margin": 59.72123718261719, "fcm_dpo/q_t": 0.37372201681137085, "grad_norm": 72.33039093017578, "learning_rate": 3.487975698139084e-08, "logits/chosen": -0.7841629385948181, "logits/rejected": -0.7917266488075256, "logps/chosen": -349.7501220703125, "logps/ref_chosen": -257.96533203125, "logps/ref_rejected": -255.811279296875, "logps/rejected": -407.3173522949219, "loss": 4.0942, "margin_dpo/margin_mean": 59.72124099731445, "margin_dpo/margin_std": 81.7040786743164, "step": 405 }, { "epoch": 0.8502617801047121, "fcm_dpo/beta": 0.010788071900606155, "fcm_dpo/delta": 0.11286494135856628, "fcm_dpo/margin": 45.265289306640625, "fcm_dpo/q_t": 0.3963577449321747, "grad_norm": 122.36735534667969, "learning_rate": 3.3952790595787986e-08, "logits/chosen": -0.8172638416290283, "logits/rejected": -0.7938133478164673, "logps/chosen": -388.5911865234375, "logps/ref_chosen": -285.1810607910156, "logps/ref_rejected": -264.41351318359375, "logps/rejected": -413.0889892578125, "loss": 4.4481, "margin_dpo/margin_mean": 45.265289306640625, "margin_dpo/margin_std": 74.5055160522461, "step": 406 }, { "epoch": 0.8523560209424084, "fcm_dpo/beta": 0.010678643360733986, "fcm_dpo/delta": -0.038571376353502274, "fcm_dpo/margin": 59.46702575683594, "fcm_dpo/q_t": 0.3713955879211426, "grad_norm": 115.4225082397461, "learning_rate": 3.303741016635614e-08, "logits/chosen": -0.8237298130989075, "logits/rejected": -0.8525005578994751, "logps/chosen": -370.30657958984375, "logps/ref_chosen": -265.23809814453125, "logps/ref_rejected": -219.0631561279297, "logps/rejected": -383.59869384765625, "loss": 4.1242, "margin_dpo/margin_mean": 59.46702575683594, "margin_dpo/margin_std": 84.35752868652344, "step": 407 }, { "epoch": 0.8544502617801047, "fcm_dpo/beta": 0.010710010305047035, "fcm_dpo/delta": -0.021216176450252533, "fcm_dpo/margin": 57.599510192871094, "fcm_dpo/q_t": 0.3745374381542206, "grad_norm": 82.85662841796875, "learning_rate": 3.2133664782169944e-08, "logits/chosen": -0.853847861289978, "logits/rejected": -0.8488500118255615, "logps/chosen": -388.76116943359375, "logps/ref_chosen": -296.9726257324219, "logps/ref_rejected": -295.4786376953125, "logps/rejected": -444.86669921875, "loss": 4.1418, "margin_dpo/margin_mean": 57.599510192871094, "margin_dpo/margin_std": 79.41829681396484, "step": 408 }, { "epoch": 0.856544502617801, "fcm_dpo/beta": 0.010306498035788536, "fcm_dpo/delta": -0.026171572506427765, "fcm_dpo/margin": 55.8836669921875, "fcm_dpo/q_t": 0.38002270460128784, "grad_norm": 89.53182220458984, "learning_rate": 3.12416029083514e-08, "logits/chosen": -0.8308712244033813, "logits/rejected": -0.8197529315948486, "logps/chosen": -387.86822509765625, "logps/ref_chosen": -287.37933349609375, "logps/ref_rejected": -275.80291748046875, "logps/rejected": -432.1754150390625, "loss": 4.3765, "margin_dpo/margin_mean": 55.8836669921875, "margin_dpo/margin_std": 91.40107727050781, "step": 409 }, { "epoch": 0.8586387434554974, "fcm_dpo/beta": 0.010539250448346138, "fcm_dpo/delta": 0.05248191952705383, "fcm_dpo/margin": 52.03704833984375, "fcm_dpo/q_t": 0.38655808568000793, "grad_norm": 104.86951446533203, "learning_rate": 3.036127238347164e-08, "logits/chosen": -0.8444973826408386, "logits/rejected": -0.8523566722869873, "logps/chosen": -379.1742248535156, "logps/ref_chosen": -281.7801818847656, "logps/ref_rejected": -266.7550354003906, "logps/rejected": -416.18609619140625, "loss": 4.3978, "margin_dpo/margin_mean": 52.03704833984375, "margin_dpo/margin_std": 85.04647827148438, "step": 410 }, { "epoch": 0.8607329842931937, "fcm_dpo/beta": 0.010161810554564, "fcm_dpo/delta": -0.07766594737768173, "fcm_dpo/margin": 65.88399505615234, "fcm_dpo/q_t": 0.35847824811935425, "grad_norm": 82.89816284179688, "learning_rate": 2.9492720416985e-08, "logits/chosen": -0.8404784798622131, "logits/rejected": -0.8063231706619263, "logps/chosen": -373.0326843261719, "logps/ref_chosen": -281.5872497558594, "logps/ref_rejected": -254.78916931152344, "logps/rejected": -412.1186218261719, "loss": 3.8683, "margin_dpo/margin_mean": 65.88399505615234, "margin_dpo/margin_std": 77.45508575439453, "step": 411 }, { "epoch": 0.86282722513089, "fcm_dpo/beta": 0.009724740870296955, "fcm_dpo/delta": -0.014092553406953812, "fcm_dpo/margin": 49.745506286621094, "fcm_dpo/q_t": 0.39898359775543213, "grad_norm": 88.29672241210938, "learning_rate": 2.863599358669755e-08, "logits/chosen": -0.8222418427467346, "logits/rejected": -0.8297065496444702, "logps/chosen": -382.5504455566406, "logps/ref_chosen": -276.5341796875, "logps/ref_rejected": -273.8751220703125, "logps/rejected": -429.636962890625, "loss": 4.4639, "margin_dpo/margin_mean": 49.745506286621094, "margin_dpo/margin_std": 82.65047454833984, "step": 412 }, { "epoch": 0.8649214659685864, "fcm_dpo/beta": 0.01038271188735962, "fcm_dpo/delta": 0.1108207255601883, "fcm_dpo/margin": 47.65922927856445, "fcm_dpo/q_t": 0.3976641297340393, "grad_norm": 122.68524169921875, "learning_rate": 2.7791137836269158e-08, "logits/chosen": -0.8296762704849243, "logits/rejected": -0.8280857801437378, "logps/chosen": -370.8761901855469, "logps/ref_chosen": -271.2745666503906, "logps/ref_rejected": -270.16912841796875, "logps/rejected": -417.4300231933594, "loss": 4.4352, "margin_dpo/margin_mean": 47.659236907958984, "margin_dpo/margin_std": 79.33601379394531, "step": 413 }, { "epoch": 0.8670157068062827, "fcm_dpo/beta": 0.010576970875263214, "fcm_dpo/delta": -0.07696720957756042, "fcm_dpo/margin": 63.35627746582031, "fcm_dpo/q_t": 0.3609466850757599, "grad_norm": 91.79287719726562, "learning_rate": 2.6958198472749717e-08, "logits/chosen": -0.8634947538375854, "logits/rejected": -0.8709216117858887, "logps/chosen": -394.9339904785156, "logps/ref_chosen": -297.11505126953125, "logps/ref_rejected": -271.7034606933594, "logps/rejected": -432.8786926269531, "loss": 3.9873, "margin_dpo/margin_mean": 63.35627746582031, "margin_dpo/margin_std": 80.128173828125, "step": 414 }, { "epoch": 0.8691099476439791, "fcm_dpo/beta": 0.010473713278770447, "fcm_dpo/delta": 0.03152439743280411, "fcm_dpo/margin": 54.287376403808594, "fcm_dpo/q_t": 0.37784260511398315, "grad_norm": 88.63931274414062, "learning_rate": 2.613722016414943e-08, "logits/chosen": -0.8671582937240601, "logits/rejected": -0.8537446856498718, "logps/chosen": -394.3064880371094, "logps/ref_chosen": -297.6926574707031, "logps/ref_rejected": -279.0503234863281, "logps/rejected": -429.9515380859375, "loss": 4.1391, "margin_dpo/margin_mean": 54.287376403808594, "margin_dpo/margin_std": 73.61995697021484, "step": 415 }, { "epoch": 0.8712041884816754, "fcm_dpo/beta": 0.010003462433815002, "fcm_dpo/delta": -0.06168883666396141, "fcm_dpo/margin": 65.28002166748047, "fcm_dpo/q_t": 0.3622613847255707, "grad_norm": 75.9556655883789, "learning_rate": 2.5328246937043525e-08, "logits/chosen": -0.8746985197067261, "logits/rejected": -0.8851325511932373, "logps/chosen": -402.248046875, "logps/ref_chosen": -311.8255615234375, "logps/ref_rejected": -268.6170654296875, "logps/rejected": -424.3195495605469, "loss": 4.0131, "margin_dpo/margin_mean": 65.28001403808594, "margin_dpo/margin_std": 82.11227416992188, "step": 416 }, { "epoch": 0.8732984293193717, "fcm_dpo/beta": 0.009751483798027039, "fcm_dpo/delta": -0.026388350874185562, "fcm_dpo/margin": 57.194740295410156, "fcm_dpo/q_t": 0.38324517011642456, "grad_norm": 92.91184997558594, "learning_rate": 2.4531322174210973e-08, "logits/chosen": -0.8104668259620667, "logits/rejected": -0.8152583837509155, "logps/chosen": -410.4869384765625, "logps/ref_chosen": -310.43682861328125, "logps/ref_rejected": -277.15283203125, "logps/rejected": -434.3976745605469, "loss": 4.2952, "margin_dpo/margin_mean": 57.194740295410156, "margin_dpo/margin_std": 84.70370483398438, "step": 417 }, { "epoch": 0.875392670157068, "fcm_dpo/beta": 0.009737811051309109, "fcm_dpo/delta": -0.03723875805735588, "fcm_dpo/margin": 54.46815490722656, "fcm_dpo/q_t": 0.38777798414230347, "grad_norm": 96.64009094238281, "learning_rate": 2.3746488612308295e-08, "logits/chosen": -0.8096103072166443, "logits/rejected": -0.7874211668968201, "logps/chosen": -387.23211669921875, "logps/ref_chosen": -278.49591064453125, "logps/ref_rejected": -276.56671142578125, "logps/rejected": -439.77105712890625, "loss": 4.3411, "margin_dpo/margin_mean": 54.46815490722656, "margin_dpo/margin_std": 80.28997802734375, "step": 418 }, { "epoch": 0.8774869109947644, "fcm_dpo/beta": 0.00948832742869854, "fcm_dpo/delta": 0.0006105322390794754, "fcm_dpo/margin": 62.94663619995117, "fcm_dpo/q_t": 0.3723425269126892, "grad_norm": 94.83244323730469, "learning_rate": 2.297378833957761e-08, "logits/chosen": -0.8623223304748535, "logits/rejected": -0.841428816318512, "logps/chosen": -406.687744140625, "logps/ref_chosen": -298.9002380371094, "logps/ref_rejected": -246.1540985107422, "logps/rejected": -416.88824462890625, "loss": 4.1616, "margin_dpo/margin_mean": 62.946632385253906, "margin_dpo/margin_std": 87.8830337524414, "step": 419 }, { "epoch": 0.8795811518324608, "fcm_dpo/beta": 0.009303269907832146, "fcm_dpo/delta": -0.02820839360356331, "fcm_dpo/margin": 67.16934967041016, "fcm_dpo/q_t": 0.3698027729988098, "grad_norm": 119.92971801757812, "learning_rate": 2.2213262793589482e-08, "logits/chosen": -0.8005006909370422, "logits/rejected": -0.7742573618888855, "logps/chosen": -369.0135498046875, "logps/ref_chosen": -264.5608825683594, "logps/ref_rejected": -245.67031860351562, "logps/rejected": -417.29229736328125, "loss": 4.1315, "margin_dpo/margin_mean": 67.16934967041016, "margin_dpo/margin_std": 95.13330078125, "step": 420 }, { "epoch": 0.881675392670157, "fcm_dpo/beta": 0.009417861700057983, "fcm_dpo/delta": 0.0553901270031929, "fcm_dpo/margin": 58.053810119628906, "fcm_dpo/q_t": 0.3806764483451843, "grad_norm": 95.21514129638672, "learning_rate": 2.1464952759020856e-08, "logits/chosen": -0.87691730260849, "logits/rejected": -0.8619418144226074, "logps/chosen": -393.3926086425781, "logps/ref_chosen": -297.70501708984375, "logps/ref_rejected": -243.74771118164062, "logps/rejected": -397.4891052246094, "loss": 4.1826, "margin_dpo/margin_mean": 58.053810119628906, "margin_dpo/margin_std": 79.26122283935547, "step": 421 }, { "epoch": 0.8837696335078534, "fcm_dpo/beta": 0.009782630950212479, "fcm_dpo/delta": 0.011894671246409416, "fcm_dpo/margin": 60.101036071777344, "fcm_dpo/q_t": 0.378351628780365, "grad_norm": 73.66893768310547, "learning_rate": 2.07288983654679e-08, "logits/chosen": -0.7312873601913452, "logits/rejected": -0.7808342576026917, "logps/chosen": -388.74200439453125, "logps/ref_chosen": -288.3587646484375, "logps/ref_rejected": -256.4377746582031, "logps/rejected": -416.9220886230469, "loss": 4.2494, "margin_dpo/margin_mean": 60.101036071777344, "margin_dpo/margin_std": 90.90327453613281, "step": 422 }, { "epoch": 0.8858638743455497, "fcm_dpo/beta": 0.009724876843392849, "fcm_dpo/delta": -0.009197833016514778, "fcm_dpo/margin": 62.44007873535156, "fcm_dpo/q_t": 0.3724360466003418, "grad_norm": 106.5963134765625, "learning_rate": 2.0005139085293942e-08, "logits/chosen": -0.8626726269721985, "logits/rejected": -0.847291886806488, "logps/chosen": -398.4643859863281, "logps/ref_chosen": -296.00701904296875, "logps/ref_rejected": -261.3480529785156, "logps/rejected": -426.2454833984375, "loss": 4.104, "margin_dpo/margin_mean": 62.44007873535156, "margin_dpo/margin_std": 85.05935668945312, "step": 423 }, { "epoch": 0.8879581151832461, "fcm_dpo/beta": 0.00962867308408022, "fcm_dpo/delta": -0.012859391048550606, "fcm_dpo/margin": 63.36909103393555, "fcm_dpo/q_t": 0.36758118867874146, "grad_norm": 92.60458374023438, "learning_rate": 1.9293713731512673e-08, "logits/chosen": -0.8467947840690613, "logits/rejected": -0.8503403663635254, "logps/chosen": -404.1105041503906, "logps/ref_chosen": -309.421875, "logps/ref_rejected": -249.14886474609375, "logps/rejected": -407.2065734863281, "loss": 3.9839, "margin_dpo/margin_mean": 63.36909103393555, "margin_dpo/margin_std": 75.94871520996094, "step": 424 }, { "epoch": 0.8900523560209425, "fcm_dpo/beta": 0.009777205064892769, "fcm_dpo/delta": 0.03504558652639389, "fcm_dpo/margin": 50.999881744384766, "fcm_dpo/q_t": 0.3966800570487976, "grad_norm": 110.89618682861328, "learning_rate": 1.8594660455706763e-08, "logits/chosen": -0.82796710729599, "logits/rejected": -0.8337902426719666, "logps/chosen": -382.77001953125, "logps/ref_chosen": -280.50909423828125, "logps/ref_rejected": -276.8252258300781, "logps/rejected": -430.08599853515625, "loss": 4.5049, "margin_dpo/margin_mean": 50.99988555908203, "margin_dpo/margin_std": 87.4363784790039, "step": 425 }, { "epoch": 0.8921465968586387, "fcm_dpo/beta": 0.009954184293746948, "fcm_dpo/delta": 0.007840080186724663, "fcm_dpo/margin": 59.37195587158203, "fcm_dpo/q_t": 0.37472671270370483, "grad_norm": 97.03230285644531, "learning_rate": 1.7908016745981856e-08, "logits/chosen": -0.852469801902771, "logits/rejected": -0.839727520942688, "logps/chosen": -397.4804992675781, "logps/ref_chosen": -292.78521728515625, "logps/ref_rejected": -255.62698364257812, "logps/rejected": -419.69415283203125, "loss": 4.1268, "margin_dpo/margin_mean": 59.37195587158203, "margin_dpo/margin_std": 79.54149627685547, "step": 426 }, { "epoch": 0.8942408376963351, "fcm_dpo/beta": 0.009696273133158684, "fcm_dpo/delta": -0.15165768563747406, "fcm_dpo/margin": 76.89569091796875, "fcm_dpo/q_t": 0.34561559557914734, "grad_norm": 90.61172485351562, "learning_rate": 1.7233819424956247e-08, "logits/chosen": -0.8438408374786377, "logits/rejected": -0.8143002390861511, "logps/chosen": -388.6639099121094, "logps/ref_chosen": -288.7687072753906, "logps/ref_rejected": -268.4986572265625, "logps/rejected": -445.2895812988281, "loss": 3.8296, "margin_dpo/margin_mean": 76.89569091796875, "margin_dpo/margin_std": 89.48991394042969, "step": 427 }, { "epoch": 0.8963350785340314, "fcm_dpo/beta": 0.008838072419166565, "fcm_dpo/delta": -0.012139791622757912, "fcm_dpo/margin": 69.09696197509766, "fcm_dpo/q_t": 0.36812734603881836, "grad_norm": 81.80809783935547, "learning_rate": 1.6572104647786245e-08, "logits/chosen": -0.79007887840271, "logits/rejected": -0.8174630999565125, "logps/chosen": -407.681640625, "logps/ref_chosen": -295.5209655761719, "logps/ref_rejected": -275.71026611328125, "logps/rejected": -456.96795654296875, "loss": 4.0523, "margin_dpo/margin_mean": 69.09696197509766, "margin_dpo/margin_std": 90.01223754882812, "step": 428 }, { "epoch": 0.8984293193717278, "fcm_dpo/beta": 0.00869191437959671, "fcm_dpo/delta": -0.019767988473176956, "fcm_dpo/margin": 62.65784454345703, "fcm_dpo/q_t": 0.3779388666152954, "grad_norm": 152.89610290527344, "learning_rate": 1.5922907900227017e-08, "logits/chosen": -0.8012307286262512, "logits/rejected": -0.8117492198944092, "logps/chosen": -377.2466735839844, "logps/ref_chosen": -274.392333984375, "logps/ref_rejected": -258.574462890625, "logps/rejected": -424.086669921875, "loss": 4.3069, "margin_dpo/margin_mean": 62.65784454345703, "margin_dpo/margin_std": 93.02066802978516, "step": 429 }, { "epoch": 0.900523560209424, "fcm_dpo/beta": 0.008904652670025826, "fcm_dpo/delta": 0.03332711011171341, "fcm_dpo/margin": 52.0653076171875, "fcm_dpo/q_t": 0.400870144367218, "grad_norm": 87.17733001708984, "learning_rate": 1.5286263996730026e-08, "logits/chosen": -0.8745531439781189, "logits/rejected": -0.8473076224327087, "logps/chosen": -389.5596923828125, "logps/ref_chosen": -288.7391357421875, "logps/ref_rejected": -268.6106262207031, "logps/rejected": -421.49652099609375, "loss": 4.4476, "margin_dpo/margin_mean": 52.06529998779297, "margin_dpo/margin_std": 83.41305541992188, "step": 430 }, { "epoch": 0.9026178010471204, "fcm_dpo/beta": 0.009530465118587017, "fcm_dpo/delta": 0.10892680287361145, "fcm_dpo/margin": 46.10047912597656, "fcm_dpo/q_t": 0.4076777696609497, "grad_norm": 102.02164459228516, "learning_rate": 1.4662207078575684e-08, "logits/chosen": -0.8528344631195068, "logits/rejected": -0.8196998238563538, "logps/chosen": -378.56854248046875, "logps/ref_chosen": -275.7247314453125, "logps/ref_rejected": -268.91729736328125, "logps/rejected": -417.8615417480469, "loss": 4.5373, "margin_dpo/margin_mean": 46.10047912597656, "margin_dpo/margin_std": 79.43936157226562, "step": 431 }, { "epoch": 0.9047120418848168, "fcm_dpo/beta": 0.009441605769097805, "fcm_dpo/delta": 0.008324447087943554, "fcm_dpo/margin": 62.6112174987793, "fcm_dpo/q_t": 0.3774021565914154, "grad_norm": 76.50348663330078, "learning_rate": 1.40507706120426e-08, "logits/chosen": -0.8662209510803223, "logits/rejected": -0.8538703918457031, "logps/chosen": -387.158203125, "logps/ref_chosen": -291.42010498046875, "logps/ref_rejected": -255.48202514648438, "logps/rejected": -413.8314208984375, "loss": 4.1391, "margin_dpo/margin_mean": 62.6112174987793, "margin_dpo/margin_std": 86.902587890625, "step": 432 }, { "epoch": 0.9068062827225131, "fcm_dpo/beta": 0.009825142100453377, "fcm_dpo/delta": 0.0619993582367897, "fcm_dpo/margin": 55.066165924072266, "fcm_dpo/q_t": 0.3830508589744568, "grad_norm": 89.54483795166016, "learning_rate": 1.345198738661285e-08, "logits/chosen": -0.8318926692008972, "logits/rejected": -0.8286322951316833, "logps/chosen": -353.9482116699219, "logps/ref_chosen": -246.2268829345703, "logps/ref_rejected": -253.65924072265625, "logps/rejected": -416.4466552734375, "loss": 4.2477, "margin_dpo/margin_mean": 55.066165924072266, "margin_dpo/margin_std": 80.00988006591797, "step": 433 }, { "epoch": 0.9089005235602095, "fcm_dpo/beta": 0.01034282986074686, "fcm_dpo/delta": 0.03874684125185013, "fcm_dpo/margin": 54.308753967285156, "fcm_dpo/q_t": 0.3828258812427521, "grad_norm": 85.50724029541016, "learning_rate": 1.2865889513213628e-08, "logits/chosen": -0.8225914239883423, "logits/rejected": -0.8342767953872681, "logps/chosen": -406.2464294433594, "logps/ref_chosen": -295.4618225097656, "logps/ref_rejected": -256.2254333496094, "logps/rejected": -421.31878662109375, "loss": 4.2621, "margin_dpo/margin_mean": 54.308753967285156, "margin_dpo/margin_std": 81.47319030761719, "step": 434 }, { "epoch": 0.9109947643979057, "fcm_dpo/beta": 0.010198265314102173, "fcm_dpo/delta": -0.014171771705150604, "fcm_dpo/margin": 59.84620666503906, "fcm_dpo/q_t": 0.37271490693092346, "grad_norm": 118.80712890625, "learning_rate": 1.2292508422495157e-08, "logits/chosen": -0.8360690474510193, "logits/rejected": -0.8230299949645996, "logps/chosen": -361.0164489746094, "logps/ref_chosen": -260.7384033203125, "logps/ref_rejected": -248.5688018798828, "logps/rejected": -408.69305419921875, "loss": 4.0566, "margin_dpo/margin_mean": 59.84620666503906, "margin_dpo/margin_std": 77.26177978515625, "step": 435 }, { "epoch": 0.9130890052356021, "fcm_dpo/beta": 0.010558899492025375, "fcm_dpo/delta": 0.0584358386695385, "fcm_dpo/margin": 51.58165740966797, "fcm_dpo/q_t": 0.38922375440597534, "grad_norm": 111.06973266601562, "learning_rate": 1.1731874863145142e-08, "logits/chosen": -0.8108433485031128, "logits/rejected": -0.8116201162338257, "logps/chosen": -426.4559326171875, "logps/ref_chosen": -319.3224792480469, "logps/ref_rejected": -299.30322265625, "logps/rejected": -458.01837158203125, "loss": 4.3549, "margin_dpo/margin_mean": 51.58165740966797, "margin_dpo/margin_std": 84.18111419677734, "step": 436 }, { "epoch": 0.9151832460732985, "fcm_dpo/beta": 0.010173209011554718, "fcm_dpo/delta": -0.1447606235742569, "fcm_dpo/margin": 67.71795654296875, "fcm_dpo/q_t": 0.35751214623451233, "grad_norm": 94.34660339355469, "learning_rate": 1.118401890024001e-08, "logits/chosen": -0.844616174697876, "logits/rejected": -0.8318252563476562, "logps/chosen": -377.5007019042969, "logps/ref_chosen": -278.82879638671875, "logps/ref_rejected": -272.55303955078125, "logps/rejected": -438.94293212890625, "loss": 3.9626, "margin_dpo/margin_mean": 67.71794891357422, "margin_dpo/margin_std": 85.74967956542969, "step": 437 }, { "epoch": 0.9172774869109948, "fcm_dpo/beta": 0.009965099394321442, "fcm_dpo/delta": 0.061277735978364944, "fcm_dpo/margin": 36.24957275390625, "fcm_dpo/q_t": 0.42433011531829834, "grad_norm": 114.82047271728516, "learning_rate": 1.06489699136324e-08, "logits/chosen": -0.81844162940979, "logits/rejected": -0.842022716999054, "logps/chosen": -362.96392822265625, "logps/ref_chosen": -259.31903076171875, "logps/ref_rejected": -240.99581909179688, "logps/rejected": -380.8902282714844, "loss": 4.9148, "margin_dpo/margin_mean": 36.24957275390625, "margin_dpo/margin_std": 83.63807678222656, "step": 438 }, { "epoch": 0.9193717277486911, "fcm_dpo/beta": 0.010149678215384483, "fcm_dpo/delta": 0.01735379360616207, "fcm_dpo/margin": 57.44043731689453, "fcm_dpo/q_t": 0.3788164556026459, "grad_norm": 111.58253479003906, "learning_rate": 1.0126756596375685e-08, "logits/chosen": -0.8203510046005249, "logits/rejected": -0.8303657174110413, "logps/chosen": -361.6324462890625, "logps/ref_chosen": -257.1243896484375, "logps/ref_rejected": -243.20416259765625, "logps/rejected": -405.1526184082031, "loss": 4.1973, "margin_dpo/margin_mean": 57.44043731689453, "margin_dpo/margin_std": 83.50491333007812, "step": 439 }, { "epoch": 0.9214659685863874, "fcm_dpo/beta": 0.01079685427248478, "fcm_dpo/delta": 0.07901112735271454, "fcm_dpo/margin": 43.94521713256836, "fcm_dpo/q_t": 0.39794009923934937, "grad_norm": 109.37852478027344, "learning_rate": 9.617406953185136e-09, "logits/chosen": -0.8688513040542603, "logits/rejected": -0.8636762499809265, "logps/chosen": -421.8122863769531, "logps/ref_chosen": -307.5315246582031, "logps/ref_rejected": -264.3540954589844, "logps/rejected": -422.580078125, "loss": 4.5155, "margin_dpo/margin_mean": 43.94521713256836, "margin_dpo/margin_std": 75.19562530517578, "step": 440 }, { "epoch": 0.9235602094240838, "fcm_dpo/beta": 0.01067368034273386, "fcm_dpo/delta": -0.08818989247083664, "fcm_dpo/margin": 63.886962890625, "fcm_dpo/q_t": 0.3568004071712494, "grad_norm": 96.44715881347656, "learning_rate": 9.12094829893642e-09, "logits/chosen": -0.820861279964447, "logits/rejected": -0.8048292994499207, "logps/chosen": -411.4300537109375, "logps/ref_chosen": -309.9819641113281, "logps/ref_rejected": -297.4968566894531, "logps/rejected": -462.8319091796875, "loss": 3.9218, "margin_dpo/margin_mean": 63.88697052001953, "margin_dpo/margin_std": 77.22992706298828, "step": 441 }, { "epoch": 0.9256544502617801, "fcm_dpo/beta": 0.010254503227770329, "fcm_dpo/delta": 0.07677368074655533, "fcm_dpo/margin": 51.52596664428711, "fcm_dpo/q_t": 0.3904913365840912, "grad_norm": 98.88241577148438, "learning_rate": 8.637407257200496e-09, "logits/chosen": -0.8967298865318298, "logits/rejected": -0.8527672290802002, "logps/chosen": -388.2881774902344, "logps/ref_chosen": -278.9791564941406, "logps/ref_rejected": -242.87310791015625, "logps/rejected": -403.7081298828125, "loss": 4.4585, "margin_dpo/margin_mean": 51.525962829589844, "margin_dpo/margin_std": 85.75384521484375, "step": 442 }, { "epoch": 0.9277486910994764, "fcm_dpo/beta": 0.010872803628444672, "fcm_dpo/delta": -0.026267580687999725, "fcm_dpo/margin": 57.026920318603516, "fcm_dpo/q_t": 0.3696047067642212, "grad_norm": 103.62533569335938, "learning_rate": 8.166809758815895e-09, "logits/chosen": -0.7956724166870117, "logits/rejected": -0.8195681571960449, "logps/chosen": -375.178955078125, "logps/ref_chosen": -273.5590515136719, "logps/ref_rejected": -264.0199279785156, "logps/rejected": -422.6667785644531, "loss": 4.1475, "margin_dpo/margin_mean": 57.026920318603516, "margin_dpo/margin_std": 78.09822082519531, "step": 443 }, { "epoch": 0.9298429319371728, "fcm_dpo/beta": 0.010260455310344696, "fcm_dpo/delta": -0.04022660851478577, "fcm_dpo/margin": 61.927947998046875, "fcm_dpo/q_t": 0.3720618486404419, "grad_norm": 100.3301773071289, "learning_rate": 7.709181040498253e-09, "logits/chosen": -0.807881772518158, "logits/rejected": -0.7976375818252563, "logps/chosen": -399.5924377441406, "logps/ref_chosen": -298.1441955566406, "logps/ref_rejected": -268.0572814941406, "logps/rejected": -431.4334716796875, "loss": 4.209, "margin_dpo/margin_mean": 61.927947998046875, "margin_dpo/margin_std": 93.60353088378906, "step": 444 }, { "epoch": 0.9319371727748691, "fcm_dpo/beta": 0.009982587769627571, "fcm_dpo/delta": -0.0828336626291275, "fcm_dpo/margin": 50.54986572265625, "fcm_dpo/q_t": 0.39341387152671814, "grad_norm": 95.27164459228516, "learning_rate": 7.2645456434869965e-09, "logits/chosen": -0.8636192679405212, "logits/rejected": -0.8787074685096741, "logps/chosen": -358.3545837402344, "logps/ref_chosen": -254.54067993164062, "logps/ref_rejected": -264.2445983886719, "logps/rejected": -418.6083679199219, "loss": 4.4375, "margin_dpo/margin_mean": 50.54986572265625, "margin_dpo/margin_std": 77.68645477294922, "step": 445 }, { "epoch": 0.9340314136125655, "fcm_dpo/beta": 0.009562542662024498, "fcm_dpo/delta": 0.023602399975061417, "fcm_dpo/margin": 60.29633331298828, "fcm_dpo/q_t": 0.3763912618160248, "grad_norm": 82.18879699707031, "learning_rate": 6.832927412229017e-09, "logits/chosen": -0.8063375949859619, "logits/rejected": -0.8075209856033325, "logps/chosen": -404.150634765625, "logps/ref_chosen": -306.72247314453125, "logps/ref_rejected": -266.3735656738281, "logps/rejected": -424.0980224609375, "loss": 4.179, "margin_dpo/margin_mean": 60.29633331298828, "margin_dpo/margin_std": 84.00418853759766, "step": 446 }, { "epoch": 0.9361256544502617, "fcm_dpo/beta": 0.009372793138027191, "fcm_dpo/delta": -0.06785252690315247, "fcm_dpo/margin": 65.36695098876953, "fcm_dpo/q_t": 0.36677664518356323, "grad_norm": 81.27397155761719, "learning_rate": 6.414349493100129e-09, "logits/chosen": -0.8006303906440735, "logits/rejected": -0.8021730184555054, "logps/chosen": -357.7692565917969, "logps/ref_chosen": -260.51727294921875, "logps/ref_rejected": -236.47061157226562, "logps/rejected": -399.0894775390625, "loss": 3.948, "margin_dpo/margin_mean": 65.36695098876953, "margin_dpo/margin_std": 76.74752044677734, "step": 447 }, { "epoch": 0.9382198952879581, "fcm_dpo/beta": 0.009358673356473446, "fcm_dpo/delta": 0.04219186305999756, "fcm_dpo/margin": 59.74993133544922, "fcm_dpo/q_t": 0.38221871852874756, "grad_norm": 101.68222045898438, "learning_rate": 6.0088343331638756e-09, "logits/chosen": -0.8103606104850769, "logits/rejected": -0.8069367408752441, "logps/chosen": -372.63238525390625, "logps/ref_chosen": -268.78704833984375, "logps/ref_rejected": -262.1703796386719, "logps/rejected": -425.76568603515625, "loss": 4.17, "margin_dpo/margin_mean": 59.74993896484375, "margin_dpo/margin_std": 81.6711654663086, "step": 448 }, { "epoch": 0.9403141361256544, "fcm_dpo/beta": 0.00959862396121025, "fcm_dpo/delta": -0.02114713191986084, "fcm_dpo/margin": 64.47396850585938, "fcm_dpo/q_t": 0.36557552218437195, "grad_norm": 131.04855346679688, "learning_rate": 5.616403678967624e-09, "logits/chosen": -0.893824577331543, "logits/rejected": -0.8799617290496826, "logps/chosen": -422.57275390625, "logps/ref_chosen": -330.9514465332031, "logps/ref_rejected": -239.76974487304688, "logps/rejected": -395.8650207519531, "loss": 4.0123, "margin_dpo/margin_mean": 64.47396850585938, "margin_dpo/margin_std": 80.27033233642578, "step": 449 }, { "epoch": 0.9424083769633508, "fcm_dpo/beta": 0.009571806527674198, "fcm_dpo/delta": 0.05357804149389267, "fcm_dpo/margin": 52.08460235595703, "fcm_dpo/q_t": 0.3920726478099823, "grad_norm": 97.84994506835938, "learning_rate": 5.2370785753763356e-09, "logits/chosen": -0.784131646156311, "logits/rejected": -0.7929754257202148, "logps/chosen": -395.12738037109375, "logps/ref_chosen": -284.26544189453125, "logps/ref_rejected": -250.5401611328125, "logps/rejected": -413.4866943359375, "loss": 4.2733, "margin_dpo/margin_mean": 52.08460235595703, "margin_dpo/margin_std": 72.87914276123047, "step": 450 }, { "epoch": 0.9445026178010472, "fcm_dpo/beta": 0.009548072703182697, "fcm_dpo/delta": -0.01880437321960926, "fcm_dpo/margin": 52.78652572631836, "fcm_dpo/q_t": 0.3943302035331726, "grad_norm": 102.84935760498047, "learning_rate": 4.8708793644441086e-09, "logits/chosen": -0.8045037984848022, "logits/rejected": -0.777286171913147, "logps/chosen": -414.0666809082031, "logps/ref_chosen": -302.3209228515625, "logps/ref_rejected": -254.09747314453125, "logps/rejected": -418.62969970703125, "loss": 4.419, "margin_dpo/margin_mean": 52.78652572631836, "margin_dpo/margin_std": 83.30145263671875, "step": 451 }, { "epoch": 0.9465968586387434, "fcm_dpo/beta": 0.009476564824581146, "fcm_dpo/delta": 0.00583769753575325, "fcm_dpo/margin": 57.515316009521484, "fcm_dpo/q_t": 0.3849991261959076, "grad_norm": 92.07205963134766, "learning_rate": 4.517825684323323e-09, "logits/chosen": -0.8670358061790466, "logits/rejected": -0.8449291586875916, "logps/chosen": -398.6337890625, "logps/ref_chosen": -299.39215087890625, "logps/ref_rejected": -284.3475036621094, "logps/rejected": -441.1044616699219, "loss": 4.2481, "margin_dpo/margin_mean": 57.51531219482422, "margin_dpo/margin_std": 82.63733673095703, "step": 452 }, { "epoch": 0.9486910994764398, "fcm_dpo/beta": 0.009641487151384354, "fcm_dpo/delta": -0.013517485931515694, "fcm_dpo/margin": 63.50697326660156, "fcm_dpo/q_t": 0.3711569905281067, "grad_norm": 95.07856750488281, "learning_rate": 4.1779364682113794e-09, "logits/chosen": -0.8013238310813904, "logits/rejected": -0.7985789179801941, "logps/chosen": -429.9082336425781, "logps/ref_chosen": -324.6517028808594, "logps/ref_rejected": -304.1527099609375, "logps/rejected": -472.91619873046875, "loss": 4.0431, "margin_dpo/margin_mean": 63.50697326660156, "margin_dpo/margin_std": 84.71268463134766, "step": 453 }, { "epoch": 0.9507853403141361, "fcm_dpo/beta": 0.009615411050617695, "fcm_dpo/delta": -0.0042562056332826614, "fcm_dpo/margin": 62.60576248168945, "fcm_dpo/q_t": 0.3715764582157135, "grad_norm": 76.87505340576172, "learning_rate": 3.851229943335393e-09, "logits/chosen": -0.8534815907478333, "logits/rejected": -0.8655160665512085, "logps/chosen": -401.67681884765625, "logps/ref_chosen": -299.6117248535156, "logps/ref_rejected": -303.74224853515625, "logps/rejected": -468.4130554199219, "loss": 4.1252, "margin_dpo/margin_mean": 62.60576248168945, "margin_dpo/margin_std": 85.04026794433594, "step": 454 }, { "epoch": 0.9528795811518325, "fcm_dpo/beta": 0.010230256244540215, "fcm_dpo/delta": 0.13277457654476166, "fcm_dpo/margin": 46.14201736450195, "fcm_dpo/q_t": 0.402716726064682, "grad_norm": 95.32615661621094, "learning_rate": 3.5377236299748147e-09, "logits/chosen": -0.807562530040741, "logits/rejected": -0.8190088272094727, "logps/chosen": -374.5747985839844, "logps/ref_chosen": -273.6116943359375, "logps/ref_rejected": -274.4293518066406, "logps/rejected": -421.5345458984375, "loss": 4.5798, "margin_dpo/margin_mean": 46.14202117919922, "margin_dpo/margin_std": 85.73726654052734, "step": 455 }, { "epoch": 0.9549738219895288, "fcm_dpo/beta": 0.010185835883021355, "fcm_dpo/delta": -0.10059641301631927, "fcm_dpo/margin": 63.664310455322266, "fcm_dpo/q_t": 0.3761172890663147, "grad_norm": 98.11104583740234, "learning_rate": 3.2374343405217884e-09, "logits/chosen": -0.7371918559074402, "logits/rejected": -0.7502031326293945, "logps/chosen": -438.56854248046875, "logps/ref_chosen": -322.17193603515625, "logps/ref_rejected": -294.54461669921875, "logps/rejected": -474.6055603027344, "loss": 4.3393, "margin_dpo/margin_mean": 63.664310455322266, "margin_dpo/margin_std": 105.6181411743164, "step": 456 }, { "epoch": 0.9570680628272251, "fcm_dpo/beta": 0.009687078185379505, "fcm_dpo/delta": -0.013452993705868721, "fcm_dpo/margin": 63.15357971191406, "fcm_dpo/q_t": 0.3667003810405731, "grad_norm": 85.27921295166016, "learning_rate": 2.9503781785795713e-09, "logits/chosen": -0.7975083589553833, "logits/rejected": -0.8043266534805298, "logps/chosen": -416.79241943359375, "logps/ref_chosen": -307.7962341308594, "logps/ref_rejected": -274.5501403808594, "logps/rejected": -446.69989013671875, "loss": 4.1568, "margin_dpo/margin_mean": 63.15358352661133, "margin_dpo/margin_std": 89.4337158203125, "step": 457 }, { "epoch": 0.9591623036649215, "fcm_dpo/beta": 0.010026252828538418, "fcm_dpo/delta": 0.03702447563409805, "fcm_dpo/margin": 56.15598678588867, "fcm_dpo/q_t": 0.3839564621448517, "grad_norm": 83.73405456542969, "learning_rate": 2.6765705380989432e-09, "logits/chosen": -0.822134256362915, "logits/rejected": -0.8091610670089722, "logps/chosen": -403.46807861328125, "logps/ref_chosen": -297.0316467285156, "logps/ref_rejected": -276.1112365722656, "logps/rejected": -438.7036437988281, "loss": 4.318, "margin_dpo/margin_mean": 56.15598678588867, "margin_dpo/margin_std": 87.26219940185547, "step": 458 }, { "epoch": 0.9612565445026178, "fcm_dpo/beta": 0.010189807042479515, "fcm_dpo/delta": 0.022624600678682327, "fcm_dpo/margin": 51.38506317138672, "fcm_dpo/q_t": 0.39040350914001465, "grad_norm": 119.48713684082031, "learning_rate": 2.416026102552732e-09, "logits/chosen": -0.8736098408699036, "logits/rejected": -0.8673666715621948, "logps/chosen": -394.96923828125, "logps/ref_chosen": -293.5252990722656, "logps/ref_rejected": -289.30126953125, "logps/rejected": -442.1302490234375, "loss": 4.3828, "margin_dpo/margin_mean": 51.385066986083984, "margin_dpo/margin_std": 80.866455078125, "step": 459 }, { "epoch": 0.9633507853403142, "fcm_dpo/beta": 0.010447122156620026, "fcm_dpo/delta": 0.01566571742296219, "fcm_dpo/margin": 50.86057662963867, "fcm_dpo/q_t": 0.38575083017349243, "grad_norm": 106.79894256591797, "learning_rate": 2.168758844148272e-09, "logits/chosen": -0.8482452034950256, "logits/rejected": -0.8550105094909668, "logps/chosen": -422.16461181640625, "logps/ref_chosen": -318.7803649902344, "logps/ref_rejected": -258.7906799316406, "logps/rejected": -413.0354919433594, "loss": 4.335, "margin_dpo/margin_mean": 50.86057662963867, "margin_dpo/margin_std": 78.11962890625, "step": 460 }, { "epoch": 0.9654450261780104, "fcm_dpo/beta": 0.010231072083115578, "fcm_dpo/delta": -0.023067938163876534, "fcm_dpo/margin": 56.03329849243164, "fcm_dpo/q_t": 0.3828889727592468, "grad_norm": 106.28280639648438, "learning_rate": 1.9347820230782295e-09, "logits/chosen": -0.8239161372184753, "logits/rejected": -0.8513062596321106, "logps/chosen": -346.3656311035156, "logps/ref_chosen": -243.9099884033203, "logps/ref_rejected": -232.6382293701172, "logps/rejected": -391.1271667480469, "loss": 4.351, "margin_dpo/margin_mean": 56.03329849243164, "margin_dpo/margin_std": 89.30838775634766, "step": 461 }, { "epoch": 0.9675392670157068, "fcm_dpo/beta": 0.00970435980707407, "fcm_dpo/delta": -0.0686081126332283, "fcm_dpo/margin": 68.12385559082031, "fcm_dpo/q_t": 0.3644864857196808, "grad_norm": 94.2146987915039, "learning_rate": 1.7141081868094209e-09, "logits/chosen": -0.8403683304786682, "logits/rejected": -0.7992677092552185, "logps/chosen": -448.24212646484375, "logps/ref_chosen": -344.09100341796875, "logps/ref_rejected": -252.45037841796875, "logps/rejected": -424.72540283203125, "loss": 4.0783, "margin_dpo/margin_mean": 68.12385559082031, "margin_dpo/margin_std": 93.93057250976562, "step": 462 }, { "epoch": 0.9696335078534032, "fcm_dpo/beta": 0.010029610246419907, "fcm_dpo/delta": 0.08034525066614151, "fcm_dpo/margin": 51.993682861328125, "fcm_dpo/q_t": 0.39002859592437744, "grad_norm": 99.33654022216797, "learning_rate": 1.5067491694100153e-09, "logits/chosen": -0.8565876483917236, "logits/rejected": -0.8210662603378296, "logps/chosen": -397.64654541015625, "logps/ref_chosen": -297.1424560546875, "logps/ref_rejected": -234.0208282470703, "logps/rejected": -386.5185852050781, "loss": 4.4017, "margin_dpo/margin_mean": 51.99367904663086, "margin_dpo/margin_std": 84.37198638916016, "step": 463 }, { "epoch": 0.9717277486910995, "fcm_dpo/beta": 0.01041481550782919, "fcm_dpo/delta": 0.04956157132983208, "fcm_dpo/margin": 52.998783111572266, "fcm_dpo/q_t": 0.3862907886505127, "grad_norm": 133.43360900878906, "learning_rate": 1.3127160909147672e-09, "logits/chosen": -0.8275717496871948, "logits/rejected": -0.8531575202941895, "logps/chosen": -378.3173522949219, "logps/ref_chosen": -265.71075439453125, "logps/ref_rejected": -256.4108581542969, "logps/rejected": -422.01617431640625, "loss": 4.3843, "margin_dpo/margin_mean": 52.998779296875, "margin_dpo/margin_std": 86.16059112548828, "step": 464 }, { "epoch": 0.9738219895287958, "fcm_dpo/beta": 0.009905759245157242, "fcm_dpo/delta": -0.13921670615673065, "fcm_dpo/margin": 65.77295684814453, "fcm_dpo/q_t": 0.36359280347824097, "grad_norm": 68.23556518554688, "learning_rate": 1.1320193567288527e-09, "logits/chosen": -0.8828303217887878, "logits/rejected": -0.8572342395782471, "logps/chosen": -391.30364990234375, "logps/ref_chosen": -293.1527404785156, "logps/ref_rejected": -293.70947265625, "logps/rejected": -457.63336181640625, "loss": 4.0356, "margin_dpo/margin_mean": 65.77295684814453, "margin_dpo/margin_std": 82.50149536132812, "step": 465 }, { "epoch": 0.9759162303664921, "fcm_dpo/beta": 0.009194673970341682, "fcm_dpo/delta": -0.052108634263277054, "fcm_dpo/margin": 70.25971221923828, "fcm_dpo/q_t": 0.36073338985443115, "grad_norm": 77.50592803955078, "learning_rate": 9.64668657069706e-10, "logits/chosen": -0.8009305000305176, "logits/rejected": -0.7544541954994202, "logps/chosen": -353.8938293457031, "logps/ref_chosen": -261.4775695800781, "logps/ref_rejected": -248.36282348632812, "logps/rejected": -411.038818359375, "loss": 3.8645, "margin_dpo/margin_mean": 70.25971221923828, "margin_dpo/margin_std": 77.51724243164062, "step": 466 }, { "epoch": 0.9780104712041885, "fcm_dpo/beta": 0.009051669389009476, "fcm_dpo/delta": 0.01738828979432583, "fcm_dpo/margin": 50.43449401855469, "fcm_dpo/q_t": 0.40313076972961426, "grad_norm": 100.67909240722656, "learning_rate": 8.106729664475176e-10, "logits/chosen": -0.7964373230934143, "logits/rejected": -0.7927530407905579, "logps/chosen": -372.4566955566406, "logps/ref_chosen": -266.354248046875, "logps/ref_rejected": -277.76324462890625, "logps/rejected": -434.3001708984375, "loss": 4.5803, "margin_dpo/margin_mean": 50.43449401855469, "margin_dpo/margin_std": 88.83377075195312, "step": 467 }, { "epoch": 0.9801047120418848, "fcm_dpo/beta": 0.009555336087942123, "fcm_dpo/delta": 0.04439329728484154, "fcm_dpo/margin": 52.71699523925781, "fcm_dpo/q_t": 0.39236387610435486, "grad_norm": 95.95642852783203, "learning_rate": 6.700405431837585e-10, "logits/chosen": -0.8729247450828552, "logits/rejected": -0.847733736038208, "logps/chosen": -419.738525390625, "logps/ref_chosen": -317.9631652832031, "logps/ref_rejected": -261.8744201660156, "logps/rejected": -416.3667297363281, "loss": 4.3829, "margin_dpo/margin_mean": 52.71699523925781, "margin_dpo/margin_std": 82.52155303955078, "step": 468 }, { "epoch": 0.9821989528795811, "fcm_dpo/beta": 0.009460176341235638, "fcm_dpo/delta": -0.016594115644693375, "fcm_dpo/margin": 64.84768676757812, "fcm_dpo/q_t": 0.37026524543762207, "grad_norm": 80.08511352539062, "learning_rate": 5.427789289685347e-10, "logits/chosen": -0.813917875289917, "logits/rejected": -0.8034530282020569, "logps/chosen": -421.9589538574219, "logps/ref_chosen": -324.8868103027344, "logps/ref_rejected": -264.0421447753906, "logps/rejected": -425.9620056152344, "loss": 4.1315, "margin_dpo/margin_mean": 64.84768676757812, "margin_dpo/margin_std": 89.04574584960938, "step": 469 }, { "epoch": 0.9842931937172775, "fcm_dpo/beta": 0.009737596847116947, "fcm_dpo/delta": -0.01096111536026001, "fcm_dpo/margin": 62.489173889160156, "fcm_dpo/q_t": 0.3725891411304474, "grad_norm": 75.43241119384766, "learning_rate": 4.288949484559934e-10, "logits/chosen": -0.8106139898300171, "logits/rejected": -0.8112368583679199, "logps/chosen": -408.78070068359375, "logps/ref_chosen": -314.7042541503906, "logps/ref_rejected": -259.2276611328125, "logps/rejected": -415.7933044433594, "loss": 4.1024, "margin_dpo/margin_mean": 62.489173889160156, "margin_dpo/margin_std": 83.64241027832031, "step": 470 }, { "epoch": 0.9863874345549738, "fcm_dpo/beta": 0.009809708222746849, "fcm_dpo/delta": 0.014611058868467808, "fcm_dpo/margin": 54.49514389038086, "fcm_dpo/q_t": 0.3918081820011139, "grad_norm": 100.71548461914062, "learning_rate": 3.2839470889836627e-10, "logits/chosen": -0.8629408478736877, "logits/rejected": -0.8535081148147583, "logps/chosen": -400.7052917480469, "logps/ref_chosen": -292.5748291015625, "logps/ref_rejected": -298.7525329589844, "logps/rejected": -461.37811279296875, "loss": 4.3804, "margin_dpo/margin_mean": 54.49514389038086, "margin_dpo/margin_std": 88.37163543701172, "step": 471 }, { "epoch": 0.9884816753926702, "fcm_dpo/beta": 0.009502904489636421, "fcm_dpo/delta": -0.0033622095361351967, "fcm_dpo/margin": 63.385826110839844, "fcm_dpo/q_t": 0.3735297918319702, "grad_norm": 83.12667083740234, "learning_rate": 2.412835998185092e-10, "logits/chosen": -0.8532136082649231, "logits/rejected": -0.8691096901893616, "logps/chosen": -336.4410400390625, "logps/ref_chosen": -243.37380981445312, "logps/ref_rejected": -251.12109375, "logps/rejected": -407.5741271972656, "loss": 4.0089, "margin_dpo/margin_mean": 63.385826110839844, "margin_dpo/margin_std": 79.39556884765625, "step": 472 }, { "epoch": 0.9905759162303664, "fcm_dpo/beta": 0.00958459172397852, "fcm_dpo/delta": -0.033373601734638214, "fcm_dpo/margin": 65.87982177734375, "fcm_dpo/q_t": 0.36491870880126953, "grad_norm": 100.53154754638672, "learning_rate": 1.6756629272085544e-10, "logits/chosen": -0.8084653615951538, "logits/rejected": -0.8148469924926758, "logps/chosen": -385.27142333984375, "logps/ref_chosen": -286.3286437988281, "logps/ref_rejected": -258.6535339355469, "logps/rejected": -423.4761962890625, "loss": 3.9915, "margin_dpo/margin_mean": 65.87982177734375, "margin_dpo/margin_std": 82.26141357421875, "step": 473 }, { "epoch": 0.9926701570680628, "fcm_dpo/beta": 0.009304332546889782, "fcm_dpo/delta": 0.059272147715091705, "fcm_dpo/margin": 51.1921272277832, "fcm_dpo/q_t": 0.3959723114967346, "grad_norm": 109.58087921142578, "learning_rate": 1.072467408408384e-10, "logits/chosen": -0.839458167552948, "logits/rejected": -0.8423305749893188, "logps/chosen": -393.1019287109375, "logps/ref_chosen": -288.08966064453125, "logps/ref_rejected": -266.69696044921875, "logps/rejected": -422.9013977050781, "loss": 4.3602, "margin_dpo/margin_mean": 51.1921272277832, "margin_dpo/margin_std": 72.16545104980469, "step": 474 }, { "epoch": 0.9947643979057592, "fcm_dpo/beta": 0.009782197885215282, "fcm_dpo/delta": 0.013262166641652584, "fcm_dpo/margin": 53.376014709472656, "fcm_dpo/q_t": 0.3906119465827942, "grad_norm": 89.71319580078125, "learning_rate": 6.032817893297793e-11, "logits/chosen": -0.812603771686554, "logits/rejected": -0.8350270390510559, "logps/chosen": -350.7452087402344, "logps/ref_chosen": -256.0030517578125, "logps/ref_rejected": -244.50660705566406, "logps/rejected": -392.624755859375, "loss": 4.3125, "margin_dpo/margin_mean": 53.376007080078125, "margin_dpo/margin_std": 78.27056884765625, "step": 475 }, { "epoch": 0.9968586387434555, "fcm_dpo/beta": 0.01014248188585043, "fcm_dpo/delta": 0.01945674978196621, "fcm_dpo/margin": 56.881683349609375, "fcm_dpo/q_t": 0.3829057216644287, "grad_norm": 124.16419982910156, "learning_rate": 2.6813123097352287e-11, "logits/chosen": -0.8888995051383972, "logits/rejected": -0.8492950797080994, "logps/chosen": -414.1214904785156, "logps/ref_chosen": -321.467529296875, "logps/ref_rejected": -295.0592956542969, "logps/rejected": -444.594970703125, "loss": 4.316, "margin_dpo/margin_mean": 56.881683349609375, "margin_dpo/margin_std": 87.3014907836914, "step": 476 }, { "epoch": 0.9989528795811519, "fcm_dpo/beta": 0.010023507289588451, "fcm_dpo/delta": -0.04792780801653862, "fcm_dpo/margin": 59.55145263671875, "fcm_dpo/q_t": 0.38020825386047363, "grad_norm": 126.22605895996094, "learning_rate": 6.7033706447061635e-12, "logits/chosen": -0.7799222469329834, "logits/rejected": -0.792705774307251, "logps/chosen": -385.03021240234375, "logps/ref_chosen": -276.7939758300781, "logps/ref_rejected": -244.82919311523438, "logps/rejected": -412.61688232421875, "loss": 4.3806, "margin_dpo/margin_mean": 59.55145263671875, "margin_dpo/margin_std": 96.72840881347656, "step": 477 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 4.542374380479568, "train_runtime": 6039.2377, "train_samples_per_second": 10.123, "train_steps_per_second": 0.079 } ], "logging_steps": 1, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }