Model: jackf857/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.85 Source: Original Platform
13034 lines
480 KiB
JSON
13034 lines
480 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 200,
|
|
"global_step": 681,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0014684287812041115,
|
|
"fcm_dpo/beta": 0.10443365573883057,
|
|
"fcm_dpo/delta": 0.42479780316352844,
|
|
"fcm_dpo/margin": -0.02287006378173828,
|
|
"fcm_dpo/q_t": 0.5005706548690796,
|
|
"grad_norm": 87.35734558105469,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.4974287748336792,
|
|
"logits/rejected": -0.43299180269241333,
|
|
"logps/chosen": -50.1435661315918,
|
|
"logps/ref_chosen": -50.14883804321289,
|
|
"logps/ref_rejected": -74.1280517578125,
|
|
"logps/rejected": -74.09991455078125,
|
|
"loss": 1.389,
|
|
"margin_dpo/margin_mean": -0.02287048101425171,
|
|
"margin_dpo/margin_std": 0.41920793056488037,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.002936857562408223,
|
|
"fcm_dpo/beta": 0.10886730253696442,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06572261452674866,
|
|
"fcm_dpo/q_t": 0.5017880797386169,
|
|
"grad_norm": 78.55793762207031,
|
|
"learning_rate": 7.246376811594203e-09,
|
|
"logits/chosen": -0.49536412954330444,
|
|
"logits/rejected": -0.4594460427761078,
|
|
"logps/chosen": -52.65568923950195,
|
|
"logps/ref_chosen": -52.620704650878906,
|
|
"logps/ref_rejected": -75.30413818359375,
|
|
"logps/rejected": -75.27340698242188,
|
|
"loss": 1.3938,
|
|
"margin_dpo/margin_mean": -0.06572240591049194,
|
|
"margin_dpo/margin_std": 0.35048407316207886,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.004405286343612335,
|
|
"fcm_dpo/beta": 0.11366599053144455,
|
|
"fcm_dpo/delta": 0.4224254786968231,
|
|
"fcm_dpo/margin": 0.009388208389282227,
|
|
"fcm_dpo/q_t": 0.4997447431087494,
|
|
"grad_norm": 80.87034606933594,
|
|
"learning_rate": 1.4492753623188406e-08,
|
|
"logits/chosen": -0.48167288303375244,
|
|
"logits/rejected": -0.44224029779434204,
|
|
"logps/chosen": -60.9686279296875,
|
|
"logps/ref_chosen": -60.981597900390625,
|
|
"logps/ref_rejected": -68.67259216308594,
|
|
"logps/rejected": -68.66902160644531,
|
|
"loss": 1.3853,
|
|
"margin_dpo/margin_mean": 0.009388208389282227,
|
|
"margin_dpo/margin_std": 0.282347708940506,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.005873715124816446,
|
|
"fcm_dpo/beta": 0.11846467852592468,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.04822826385498047,
|
|
"fcm_dpo/q_t": 0.5014264583587646,
|
|
"grad_norm": 85.6677474975586,
|
|
"learning_rate": 2.1739130434782606e-08,
|
|
"logits/chosen": -0.471701055765152,
|
|
"logits/rejected": -0.44420742988586426,
|
|
"logps/chosen": -56.783233642578125,
|
|
"logps/ref_chosen": -56.7677116394043,
|
|
"logps/ref_rejected": -86.64710998535156,
|
|
"logps/rejected": -86.61439514160156,
|
|
"loss": 1.3927,
|
|
"margin_dpo/margin_mean": -0.04822826385498047,
|
|
"margin_dpo/margin_std": 0.4498218595981598,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007342143906020558,
|
|
"fcm_dpo/beta": 0.12882092595100403,
|
|
"fcm_dpo/delta": 0.41904187202453613,
|
|
"fcm_dpo/margin": 0.016975760459899902,
|
|
"fcm_dpo/q_t": 0.4995834231376648,
|
|
"grad_norm": 115.2800064086914,
|
|
"learning_rate": 2.898550724637681e-08,
|
|
"logits/chosen": -0.5144484043121338,
|
|
"logits/rejected": -0.47074341773986816,
|
|
"logps/chosen": -53.837677001953125,
|
|
"logps/ref_chosen": -53.859375,
|
|
"logps/ref_rejected": -84.14918518066406,
|
|
"logps/rejected": -84.14445495605469,
|
|
"loss": 1.3846,
|
|
"margin_dpo/margin_mean": 0.01697593927383423,
|
|
"margin_dpo/margin_std": 0.3396869897842407,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.00881057268722467,
|
|
"fcm_dpo/beta": 0.13447463512420654,
|
|
"fcm_dpo/delta": 0.42067742347717285,
|
|
"fcm_dpo/margin": 0.010400176048278809,
|
|
"fcm_dpo/q_t": 0.49966543912887573,
|
|
"grad_norm": 123.6097640991211,
|
|
"learning_rate": 3.6231884057971014e-08,
|
|
"logits/chosen": -0.5179563760757446,
|
|
"logits/rejected": -0.4767148494720459,
|
|
"logps/chosen": -63.031429290771484,
|
|
"logps/ref_chosen": -63.007484436035156,
|
|
"logps/ref_rejected": -92.64534759521484,
|
|
"logps/rejected": -92.6796875,
|
|
"loss": 1.3852,
|
|
"margin_dpo/margin_mean": 0.010399848222732544,
|
|
"margin_dpo/margin_std": 0.3767673969268799,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010279001468428781,
|
|
"fcm_dpo/beta": 0.1462460160255432,
|
|
"fcm_dpo/delta": 0.418557733297348,
|
|
"fcm_dpo/margin": 0.0423530638217926,
|
|
"fcm_dpo/q_t": 0.4985184967517853,
|
|
"grad_norm": 120.26276397705078,
|
|
"learning_rate": 4.347826086956521e-08,
|
|
"logits/chosen": -0.5139331817626953,
|
|
"logits/rejected": -0.4799633026123047,
|
|
"logps/chosen": -57.72328186035156,
|
|
"logps/ref_chosen": -57.774818420410156,
|
|
"logps/ref_rejected": -103.92059326171875,
|
|
"logps/rejected": -103.91140747070312,
|
|
"loss": 1.3807,
|
|
"margin_dpo/margin_mean": 0.042353659868240356,
|
|
"margin_dpo/margin_std": 0.41376739740371704,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.011747430249632892,
|
|
"fcm_dpo/beta": 0.16579358279705048,
|
|
"fcm_dpo/delta": 0.42236655950546265,
|
|
"fcm_dpo/margin": -0.051773250102996826,
|
|
"fcm_dpo/q_t": 0.5021986961364746,
|
|
"grad_norm": 130.60256958007812,
|
|
"learning_rate": 5.0724637681159424e-08,
|
|
"logits/chosen": -0.502518892288208,
|
|
"logits/rejected": -0.47607821226119995,
|
|
"logps/chosen": -58.71660614013672,
|
|
"logps/ref_chosen": -58.716033935546875,
|
|
"logps/ref_rejected": -79.3114242553711,
|
|
"logps/rejected": -79.26022338867188,
|
|
"loss": 1.396,
|
|
"margin_dpo/margin_mean": -0.05177316069602966,
|
|
"margin_dpo/margin_std": 0.37757202982902527,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013215859030837005,
|
|
"fcm_dpo/beta": 0.18036609888076782,
|
|
"fcm_dpo/delta": 0.42122572660446167,
|
|
"fcm_dpo/margin": -0.041811347007751465,
|
|
"fcm_dpo/q_t": 0.5019670724868774,
|
|
"grad_norm": 154.1094970703125,
|
|
"learning_rate": 5.797101449275362e-08,
|
|
"logits/chosen": -0.4861254096031189,
|
|
"logits/rejected": -0.4390403628349304,
|
|
"logps/chosen": -69.8838882446289,
|
|
"logps/ref_chosen": -69.8668441772461,
|
|
"logps/ref_rejected": -99.6026611328125,
|
|
"logps/rejected": -99.57789611816406,
|
|
"loss": 1.3954,
|
|
"margin_dpo/margin_mean": -0.041812002658843994,
|
|
"margin_dpo/margin_std": 0.42343536019325256,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.014684287812041116,
|
|
"fcm_dpo/beta": 0.20495112240314484,
|
|
"fcm_dpo/delta": 0.8467953205108643,
|
|
"fcm_dpo/margin": 0.017669588327407837,
|
|
"fcm_dpo/q_t": 0.4991891384124756,
|
|
"grad_norm": 147.3089141845703,
|
|
"learning_rate": 6.521739130434782e-08,
|
|
"logits/chosen": -0.4706907868385315,
|
|
"logits/rejected": -0.4263506531715393,
|
|
"logps/chosen": -48.373626708984375,
|
|
"logps/ref_chosen": -48.35768508911133,
|
|
"logps/ref_rejected": -80.37206268310547,
|
|
"logps/rejected": -80.40567016601562,
|
|
"loss": 1.3847,
|
|
"margin_dpo/margin_mean": 0.017669767141342163,
|
|
"margin_dpo/margin_std": 0.4244072437286377,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016152716593245228,
|
|
"fcm_dpo/beta": 0.24274222552776337,
|
|
"fcm_dpo/delta": 0.8439900875091553,
|
|
"fcm_dpo/margin": 0.026336967945098877,
|
|
"fcm_dpo/q_t": 0.49850064516067505,
|
|
"grad_norm": 164.48699951171875,
|
|
"learning_rate": 7.246376811594203e-08,
|
|
"logits/chosen": -0.4563661217689514,
|
|
"logits/rejected": -0.4311213493347168,
|
|
"logps/chosen": -53.00157928466797,
|
|
"logps/ref_chosen": -53.01685333251953,
|
|
"logps/ref_rejected": -87.78038024902344,
|
|
"logps/rejected": -87.79144287109375,
|
|
"loss": 1.3814,
|
|
"margin_dpo/margin_mean": 0.026336759328842163,
|
|
"margin_dpo/margin_std": 0.3369394540786743,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.01762114537444934,
|
|
"fcm_dpo/beta": 0.26402783393859863,
|
|
"fcm_dpo/delta": 0.42025870084762573,
|
|
"fcm_dpo/margin": 0.01722872257232666,
|
|
"fcm_dpo/q_t": 0.4989096224308014,
|
|
"grad_norm": 249.8050079345703,
|
|
"learning_rate": 7.971014492753623e-08,
|
|
"logits/chosen": -0.5214487314224243,
|
|
"logits/rejected": -0.48471778631210327,
|
|
"logps/chosen": -61.81374740600586,
|
|
"logps/ref_chosen": -61.80543518066406,
|
|
"logps/ref_rejected": -104.8582763671875,
|
|
"logps/rejected": -104.8838119506836,
|
|
"loss": 1.3842,
|
|
"margin_dpo/margin_mean": 0.01722833514213562,
|
|
"margin_dpo/margin_std": 0.39332982897758484,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.01908957415565345,
|
|
"fcm_dpo/beta": 0.31265515089035034,
|
|
"fcm_dpo/delta": 0.8470354080200195,
|
|
"fcm_dpo/margin": 0.010488659143447876,
|
|
"fcm_dpo/q_t": 0.499286413192749,
|
|
"grad_norm": 244.24612426757812,
|
|
"learning_rate": 8.695652173913042e-08,
|
|
"logits/chosen": -0.4612182378768921,
|
|
"logits/rejected": -0.4315667748451233,
|
|
"logps/chosen": -64.26567840576172,
|
|
"logps/ref_chosen": -64.2603530883789,
|
|
"logps/ref_rejected": -87.20307922363281,
|
|
"logps/rejected": -87.21888732910156,
|
|
"loss": 1.3867,
|
|
"margin_dpo/margin_mean": 0.010488033294677734,
|
|
"margin_dpo/margin_std": 0.37841933965682983,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.020558002936857563,
|
|
"fcm_dpo/beta": 0.3400227725505829,
|
|
"fcm_dpo/delta": 0.4154511094093323,
|
|
"fcm_dpo/margin": 0.02040991187095642,
|
|
"fcm_dpo/q_t": 0.4983346462249756,
|
|
"grad_norm": 287.9231262207031,
|
|
"learning_rate": 9.420289855072464e-08,
|
|
"logits/chosen": -0.4573858082294464,
|
|
"logits/rejected": -0.41736847162246704,
|
|
"logps/chosen": -58.149322509765625,
|
|
"logps/ref_chosen": -58.11021041870117,
|
|
"logps/ref_rejected": -104.04708099365234,
|
|
"logps/rejected": -104.10659790039062,
|
|
"loss": 1.3835,
|
|
"margin_dpo/margin_mean": 0.020410001277923584,
|
|
"margin_dpo/margin_std": 0.3933468461036682,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022026431718061675,
|
|
"fcm_dpo/beta": 0.3845078647136688,
|
|
"fcm_dpo/delta": 0.4113464057445526,
|
|
"fcm_dpo/margin": 0.021445751190185547,
|
|
"fcm_dpo/q_t": 0.4981946051120758,
|
|
"grad_norm": 246.7677764892578,
|
|
"learning_rate": 1.0144927536231885e-07,
|
|
"logits/chosen": -0.4819180965423584,
|
|
"logits/rejected": -0.46261194348335266,
|
|
"logps/chosen": -56.989723205566406,
|
|
"logps/ref_chosen": -56.96691131591797,
|
|
"logps/ref_rejected": -80.80863952636719,
|
|
"logps/rejected": -80.8529052734375,
|
|
"loss": 1.3822,
|
|
"margin_dpo/margin_mean": 0.02144584059715271,
|
|
"margin_dpo/margin_std": 0.3318941593170166,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.023494860499265784,
|
|
"fcm_dpo/beta": 0.4179382026195526,
|
|
"fcm_dpo/delta": 0.4168465733528137,
|
|
"fcm_dpo/margin": 0.020205020904541016,
|
|
"fcm_dpo/q_t": 0.49806952476501465,
|
|
"grad_norm": 354.9513244628906,
|
|
"learning_rate": 1.0869565217391303e-07,
|
|
"logits/chosen": -0.5375505089759827,
|
|
"logits/rejected": -0.49767178297042847,
|
|
"logps/chosen": -61.75188446044922,
|
|
"logps/ref_chosen": -61.739891052246094,
|
|
"logps/ref_rejected": -84.36947631835938,
|
|
"logps/rejected": -84.40167236328125,
|
|
"loss": 1.384,
|
|
"margin_dpo/margin_mean": 0.020204484462738037,
|
|
"margin_dpo/margin_std": 0.374006986618042,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.024963289280469897,
|
|
"fcm_dpo/beta": 0.47306138277053833,
|
|
"fcm_dpo/delta": 0.8088477849960327,
|
|
"fcm_dpo/margin": 0.0909477174282074,
|
|
"fcm_dpo/q_t": 0.4897420406341553,
|
|
"grad_norm": 359.2418212890625,
|
|
"learning_rate": 1.1594202898550725e-07,
|
|
"logits/chosen": -0.49428272247314453,
|
|
"logits/rejected": -0.4564746022224426,
|
|
"logps/chosen": -67.70065307617188,
|
|
"logps/ref_chosen": -67.71033477783203,
|
|
"logps/ref_rejected": -85.37865447998047,
|
|
"logps/rejected": -85.45992279052734,
|
|
"loss": 1.3526,
|
|
"margin_dpo/margin_mean": 0.09094801545143127,
|
|
"margin_dpo/margin_std": 0.41880565881729126,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.02643171806167401,
|
|
"fcm_dpo/beta": 0.49132367968559265,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.024234741926193237,
|
|
"fcm_dpo/q_t": 0.5029305219650269,
|
|
"grad_norm": 411.5271911621094,
|
|
"learning_rate": 1.2318840579710146e-07,
|
|
"logits/chosen": -0.5078892707824707,
|
|
"logits/rejected": -0.45399364829063416,
|
|
"logps/chosen": -47.75193786621094,
|
|
"logps/ref_chosen": -47.7394905090332,
|
|
"logps/ref_rejected": -75.4722900390625,
|
|
"logps/rejected": -75.46050262451172,
|
|
"loss": 1.4031,
|
|
"margin_dpo/margin_mean": -0.02423509955406189,
|
|
"margin_dpo/margin_std": 0.28412166237831116,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.027900146842878122,
|
|
"fcm_dpo/beta": 0.5553874969482422,
|
|
"fcm_dpo/delta": 0.800998330116272,
|
|
"fcm_dpo/margin": 0.09300807118415833,
|
|
"fcm_dpo/q_t": 0.48788678646087646,
|
|
"grad_norm": 400.787109375,
|
|
"learning_rate": 1.3043478260869563e-07,
|
|
"logits/chosen": -0.48504626750946045,
|
|
"logits/rejected": -0.43480148911476135,
|
|
"logps/chosen": -70.17445373535156,
|
|
"logps/ref_chosen": -70.20536041259766,
|
|
"logps/ref_rejected": -89.7575912475586,
|
|
"logps/rejected": -89.81969451904297,
|
|
"loss": 1.3413,
|
|
"margin_dpo/margin_mean": 0.09300819039344788,
|
|
"margin_dpo/margin_std": 0.2757360339164734,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.02936857562408223,
|
|
"fcm_dpo/beta": 0.649980366230011,
|
|
"fcm_dpo/delta": 0.7827930450439453,
|
|
"fcm_dpo/margin": 0.10994091629981995,
|
|
"fcm_dpo/q_t": 0.48339736461639404,
|
|
"grad_norm": 451.47674560546875,
|
|
"learning_rate": 1.3768115942028986e-07,
|
|
"logits/chosen": -0.5489249229431152,
|
|
"logits/rejected": -0.49235957860946655,
|
|
"logps/chosen": -50.79475784301758,
|
|
"logps/ref_chosen": -50.80324172973633,
|
|
"logps/ref_rejected": -78.82334899902344,
|
|
"logps/rejected": -78.9248046875,
|
|
"loss": 1.3281,
|
|
"margin_dpo/margin_mean": 0.10994073748588562,
|
|
"margin_dpo/margin_std": 0.3477732241153717,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.030837004405286344,
|
|
"fcm_dpo/beta": 0.7579153776168823,
|
|
"fcm_dpo/delta": 0.7742947340011597,
|
|
"fcm_dpo/margin": 0.1082988828420639,
|
|
"fcm_dpo/q_t": 0.48166587948799133,
|
|
"grad_norm": 542.7781982421875,
|
|
"learning_rate": 1.4492753623188405e-07,
|
|
"logits/chosen": -0.48926520347595215,
|
|
"logits/rejected": -0.4650164246559143,
|
|
"logps/chosen": -50.03901672363281,
|
|
"logps/ref_chosen": -50.063018798828125,
|
|
"logps/ref_rejected": -77.86878967285156,
|
|
"logps/rejected": -77.95309448242188,
|
|
"loss": 1.3293,
|
|
"margin_dpo/margin_mean": 0.10829953849315643,
|
|
"margin_dpo/margin_std": 0.39389097690582275,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.032305433186490456,
|
|
"fcm_dpo/beta": 0.8798083066940308,
|
|
"fcm_dpo/delta": 0.7186980247497559,
|
|
"fcm_dpo/margin": 0.15809443593025208,
|
|
"fcm_dpo/q_t": 0.46808773279190063,
|
|
"grad_norm": 664.1295776367188,
|
|
"learning_rate": 1.5217391304347825e-07,
|
|
"logits/chosen": -0.5107871294021606,
|
|
"logits/rejected": -0.4682098627090454,
|
|
"logps/chosen": -59.02461242675781,
|
|
"logps/ref_chosen": -59.05763626098633,
|
|
"logps/ref_rejected": -97.50466918945312,
|
|
"logps/rejected": -97.62974548339844,
|
|
"loss": 1.2817,
|
|
"margin_dpo/margin_mean": 0.15809422731399536,
|
|
"margin_dpo/margin_std": 0.4001656174659729,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.033773861967694566,
|
|
"fcm_dpo/beta": 1.0026525259017944,
|
|
"fcm_dpo/delta": 0.6618263721466064,
|
|
"fcm_dpo/margin": 0.20202824473381042,
|
|
"fcm_dpo/q_t": 0.4552893042564392,
|
|
"grad_norm": 672.5951538085938,
|
|
"learning_rate": 1.5942028985507245e-07,
|
|
"logits/chosen": -0.5025913119316101,
|
|
"logits/rejected": -0.4807313084602356,
|
|
"logps/chosen": -60.07830810546875,
|
|
"logps/ref_chosen": -60.07769775390625,
|
|
"logps/ref_rejected": -81.13955688476562,
|
|
"logps/rejected": -81.34219360351562,
|
|
"loss": 1.231,
|
|
"margin_dpo/margin_mean": 0.20202842354774475,
|
|
"margin_dpo/margin_std": 0.3648604154586792,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.03524229074889868,
|
|
"fcm_dpo/beta": 1.1451796293258667,
|
|
"fcm_dpo/delta": 0.6144854426383972,
|
|
"fcm_dpo/margin": 0.21480031311511993,
|
|
"fcm_dpo/q_t": 0.4435741901397705,
|
|
"grad_norm": 806.4075317382812,
|
|
"learning_rate": 1.6666666666666665e-07,
|
|
"logits/chosen": -0.4962637424468994,
|
|
"logits/rejected": -0.4799681305885315,
|
|
"logps/chosen": -44.290122985839844,
|
|
"logps/ref_chosen": -44.29103469848633,
|
|
"logps/ref_rejected": -99.12521362304688,
|
|
"logps/rejected": -99.33910369873047,
|
|
"loss": 1.1955,
|
|
"margin_dpo/margin_mean": 0.21480104327201843,
|
|
"margin_dpo/margin_std": 0.34698599576950073,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03671071953010279,
|
|
"fcm_dpo/beta": 1.2869091033935547,
|
|
"fcm_dpo/delta": 0.5991071462631226,
|
|
"fcm_dpo/margin": 0.20611488819122314,
|
|
"fcm_dpo/q_t": 0.44144344329833984,
|
|
"grad_norm": 790.001953125,
|
|
"learning_rate": 1.7391304347826085e-07,
|
|
"logits/chosen": -0.5114257335662842,
|
|
"logits/rejected": -0.48204663395881653,
|
|
"logps/chosen": -52.55467987060547,
|
|
"logps/ref_chosen": -52.537052154541016,
|
|
"logps/ref_rejected": -89.34219360351562,
|
|
"logps/rejected": -89.56593322753906,
|
|
"loss": 1.2231,
|
|
"margin_dpo/margin_mean": 0.2061150074005127,
|
|
"margin_dpo/margin_std": 0.4720209240913391,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0381791483113069,
|
|
"fcm_dpo/beta": 1.4049187898635864,
|
|
"fcm_dpo/delta": 0.3693993091583252,
|
|
"fcm_dpo/margin": 0.3534828722476959,
|
|
"fcm_dpo/q_t": 0.4011384844779968,
|
|
"grad_norm": 799.3506469726562,
|
|
"learning_rate": 1.8115942028985507e-07,
|
|
"logits/chosen": -0.5231348872184753,
|
|
"logits/rejected": -0.4912256896495819,
|
|
"logps/chosen": -53.86709976196289,
|
|
"logps/ref_chosen": -53.92280578613281,
|
|
"logps/ref_rejected": -103.35971069335938,
|
|
"logps/rejected": -103.65748596191406,
|
|
"loss": 1.0901,
|
|
"margin_dpo/margin_mean": 0.3534834384918213,
|
|
"margin_dpo/margin_std": 0.576106071472168,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.039647577092511016,
|
|
"fcm_dpo/beta": 1.4509608745574951,
|
|
"fcm_dpo/delta": 0.16326822340488434,
|
|
"fcm_dpo/margin": 0.4813149869441986,
|
|
"fcm_dpo/q_t": 0.3541679382324219,
|
|
"grad_norm": 837.9793701171875,
|
|
"learning_rate": 1.8840579710144927e-07,
|
|
"logits/chosen": -0.5251492857933044,
|
|
"logits/rejected": -0.4889242649078369,
|
|
"logps/chosen": -42.80194854736328,
|
|
"logps/ref_chosen": -42.898529052734375,
|
|
"logps/ref_rejected": -98.72419738769531,
|
|
"logps/rejected": -99.10894012451172,
|
|
"loss": 0.9227,
|
|
"margin_dpo/margin_mean": 0.4813147783279419,
|
|
"margin_dpo/margin_std": 0.5135878324508667,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.041116005873715125,
|
|
"fcm_dpo/beta": 1.5709292888641357,
|
|
"fcm_dpo/delta": 0.3916556239128113,
|
|
"fcm_dpo/margin": 0.3012993633747101,
|
|
"fcm_dpo/q_t": 0.4064401686191559,
|
|
"grad_norm": 838.3126831054688,
|
|
"learning_rate": 1.9565217391304347e-07,
|
|
"logits/chosen": -0.5148231983184814,
|
|
"logits/rejected": -0.4603886008262634,
|
|
"logps/chosen": -60.546630859375,
|
|
"logps/ref_chosen": -60.55650329589844,
|
|
"logps/ref_rejected": -91.40111541748047,
|
|
"logps/rejected": -91.69254302978516,
|
|
"loss": 1.1735,
|
|
"margin_dpo/margin_mean": 0.301299124956131,
|
|
"margin_dpo/margin_std": 0.6170388460159302,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.042584434654919234,
|
|
"fcm_dpo/beta": 1.6120238304138184,
|
|
"fcm_dpo/delta": 0.06577476114034653,
|
|
"fcm_dpo/margin": 0.4894725978374481,
|
|
"fcm_dpo/q_t": 0.33954018354415894,
|
|
"grad_norm": 836.0704345703125,
|
|
"learning_rate": 2.028985507246377e-07,
|
|
"logits/chosen": -0.5353908538818359,
|
|
"logits/rejected": -0.4876295328140259,
|
|
"logps/chosen": -57.748104095458984,
|
|
"logps/ref_chosen": -57.80778503417969,
|
|
"logps/ref_rejected": -97.39434814453125,
|
|
"logps/rejected": -97.82414245605469,
|
|
"loss": 0.8943,
|
|
"margin_dpo/margin_mean": 0.48947349190711975,
|
|
"margin_dpo/margin_std": 0.5322099328041077,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.04405286343612335,
|
|
"fcm_dpo/beta": 1.5611000061035156,
|
|
"fcm_dpo/delta": -0.16747647523880005,
|
|
"fcm_dpo/margin": 0.6395827531814575,
|
|
"fcm_dpo/q_t": 0.2977033853530884,
|
|
"grad_norm": 659.2262573242188,
|
|
"learning_rate": 2.1014492753623187e-07,
|
|
"logits/chosen": -0.47992774844169617,
|
|
"logits/rejected": -0.44857877492904663,
|
|
"logps/chosen": -52.428070068359375,
|
|
"logps/ref_chosen": -52.577369689941406,
|
|
"logps/ref_rejected": -98.48920440673828,
|
|
"logps/rejected": -98.97948455810547,
|
|
"loss": 0.7823,
|
|
"margin_dpo/margin_mean": 0.6395825147628784,
|
|
"margin_dpo/margin_std": 0.5751562118530273,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04552129221732746,
|
|
"fcm_dpo/beta": 1.6095309257507324,
|
|
"fcm_dpo/delta": 0.14502495527267456,
|
|
"fcm_dpo/margin": 0.44233372807502747,
|
|
"fcm_dpo/q_t": 0.36425676941871643,
|
|
"grad_norm": 802.032470703125,
|
|
"learning_rate": 2.1739130434782607e-07,
|
|
"logits/chosen": -0.4975692331790924,
|
|
"logits/rejected": -0.45189863443374634,
|
|
"logps/chosen": -63.71034240722656,
|
|
"logps/ref_chosen": -63.806922912597656,
|
|
"logps/ref_rejected": -72.89400482177734,
|
|
"logps/rejected": -73.23976135253906,
|
|
"loss": 1.0302,
|
|
"margin_dpo/margin_mean": 0.4423332214355469,
|
|
"margin_dpo/margin_std": 0.6822764873504639,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04698972099853157,
|
|
"fcm_dpo/beta": 1.5345242023468018,
|
|
"fcm_dpo/delta": -0.2706429362297058,
|
|
"fcm_dpo/margin": 0.7098908424377441,
|
|
"fcm_dpo/q_t": 0.3092593550682068,
|
|
"grad_norm": 571.6924438476562,
|
|
"learning_rate": 2.2463768115942027e-07,
|
|
"logits/chosen": -0.5193390250205994,
|
|
"logits/rejected": -0.4783915877342224,
|
|
"logps/chosen": -62.573360443115234,
|
|
"logps/ref_chosen": -62.739524841308594,
|
|
"logps/ref_rejected": -89.3175048828125,
|
|
"logps/rejected": -89.86123657226562,
|
|
"loss": 0.8387,
|
|
"margin_dpo/margin_mean": 0.7098907232284546,
|
|
"margin_dpo/margin_std": 0.8541468977928162,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.048458149779735685,
|
|
"fcm_dpo/beta": 1.5268961191177368,
|
|
"fcm_dpo/delta": -0.031100787222385406,
|
|
"fcm_dpo/margin": 0.5745024681091309,
|
|
"fcm_dpo/q_t": 0.3211558759212494,
|
|
"grad_norm": 612.5335083007812,
|
|
"learning_rate": 2.318840579710145e-07,
|
|
"logits/chosen": -0.4916543960571289,
|
|
"logits/rejected": -0.46555328369140625,
|
|
"logps/chosen": -53.1591682434082,
|
|
"logps/ref_chosen": -53.26097106933594,
|
|
"logps/ref_rejected": -87.8851318359375,
|
|
"logps/rejected": -88.35783386230469,
|
|
"loss": 0.8401,
|
|
"margin_dpo/margin_mean": 0.5745028257369995,
|
|
"margin_dpo/margin_std": 0.5594164133071899,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.049926578560939794,
|
|
"fcm_dpo/beta": 1.4713330268859863,
|
|
"fcm_dpo/delta": -0.17497381567955017,
|
|
"fcm_dpo/margin": 0.6845256090164185,
|
|
"fcm_dpo/q_t": 0.3139664828777313,
|
|
"grad_norm": 577.4857177734375,
|
|
"learning_rate": 2.391304347826087e-07,
|
|
"logits/chosen": -0.5157313346862793,
|
|
"logits/rejected": -0.4982021450996399,
|
|
"logps/chosen": -50.735015869140625,
|
|
"logps/ref_chosen": -50.81732940673828,
|
|
"logps/ref_rejected": -101.92184448242188,
|
|
"logps/rejected": -102.52406311035156,
|
|
"loss": 0.842,
|
|
"margin_dpo/margin_mean": 0.6845252513885498,
|
|
"margin_dpo/margin_std": 0.780299723148346,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.0513950073421439,
|
|
"fcm_dpo/beta": 1.321608066558838,
|
|
"fcm_dpo/delta": -0.6159881353378296,
|
|
"fcm_dpo/margin": 1.0402474403381348,
|
|
"fcm_dpo/q_t": 0.25291207432746887,
|
|
"grad_norm": 438.6875305175781,
|
|
"learning_rate": 2.463768115942029e-07,
|
|
"logits/chosen": -0.5221713185310364,
|
|
"logits/rejected": -0.4853857457637787,
|
|
"logps/chosen": -50.92929458618164,
|
|
"logps/ref_chosen": -51.02449035644531,
|
|
"logps/ref_rejected": -106.82443237304688,
|
|
"logps/rejected": -107.76948547363281,
|
|
"loss": 0.665,
|
|
"margin_dpo/margin_mean": 1.0402469635009766,
|
|
"margin_dpo/margin_std": 1.0385708808898926,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05286343612334802,
|
|
"fcm_dpo/beta": 1.225428581237793,
|
|
"fcm_dpo/delta": -0.465666800737381,
|
|
"fcm_dpo/margin": 1.0272314548492432,
|
|
"fcm_dpo/q_t": 0.2840074300765991,
|
|
"grad_norm": 377.8875427246094,
|
|
"learning_rate": 2.536231884057971e-07,
|
|
"logits/chosen": -0.5505253672599792,
|
|
"logits/rejected": -0.5139901041984558,
|
|
"logps/chosen": -51.958282470703125,
|
|
"logps/ref_chosen": -51.991493225097656,
|
|
"logps/ref_rejected": -86.0406265258789,
|
|
"logps/rejected": -87.03465270996094,
|
|
"loss": 0.8189,
|
|
"margin_dpo/margin_mean": 1.0272307395935059,
|
|
"margin_dpo/margin_std": 1.1449289321899414,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.05433186490455213,
|
|
"fcm_dpo/beta": 1.1077017784118652,
|
|
"fcm_dpo/delta": -0.27637869119644165,
|
|
"fcm_dpo/margin": 0.9796682596206665,
|
|
"fcm_dpo/q_t": 0.3130841851234436,
|
|
"grad_norm": 408.67047119140625,
|
|
"learning_rate": 2.6086956521739126e-07,
|
|
"logits/chosen": -0.5258996486663818,
|
|
"logits/rejected": -0.4838007390499115,
|
|
"logps/chosen": -62.77622985839844,
|
|
"logps/ref_chosen": -62.807106018066406,
|
|
"logps/ref_rejected": -77.89507293701172,
|
|
"logps/rejected": -78.84385681152344,
|
|
"loss": 0.8676,
|
|
"margin_dpo/margin_mean": 0.979668140411377,
|
|
"margin_dpo/margin_std": 1.212325096130371,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.055800293685756244,
|
|
"fcm_dpo/beta": 1.0282557010650635,
|
|
"fcm_dpo/delta": -0.5638899803161621,
|
|
"fcm_dpo/margin": 1.3030188083648682,
|
|
"fcm_dpo/q_t": 0.2778816223144531,
|
|
"grad_norm": 352.4019470214844,
|
|
"learning_rate": 2.681159420289855e-07,
|
|
"logits/chosen": -0.5080125331878662,
|
|
"logits/rejected": -0.4751642942428589,
|
|
"logps/chosen": -48.19729995727539,
|
|
"logps/ref_chosen": -48.39051818847656,
|
|
"logps/ref_rejected": -97.91244506835938,
|
|
"logps/rejected": -99.0222396850586,
|
|
"loss": 0.7363,
|
|
"margin_dpo/margin_mean": 1.3030191659927368,
|
|
"margin_dpo/margin_std": 1.503594160079956,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05726872246696035,
|
|
"fcm_dpo/beta": 0.9014551639556885,
|
|
"fcm_dpo/delta": -0.5705999732017517,
|
|
"fcm_dpo/margin": 1.47984778881073,
|
|
"fcm_dpo/q_t": 0.24894997477531433,
|
|
"grad_norm": 300.4083557128906,
|
|
"learning_rate": 2.753623188405797e-07,
|
|
"logits/chosen": -0.5485913753509521,
|
|
"logits/rejected": -0.5085880756378174,
|
|
"logps/chosen": -50.66078567504883,
|
|
"logps/ref_chosen": -50.75047302246094,
|
|
"logps/ref_rejected": -78.56951141357422,
|
|
"logps/rejected": -79.95967102050781,
|
|
"loss": 0.6737,
|
|
"margin_dpo/margin_mean": 1.4798476696014404,
|
|
"margin_dpo/margin_std": 1.253267765045166,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.05873715124816446,
|
|
"fcm_dpo/beta": 0.8415946960449219,
|
|
"fcm_dpo/delta": -0.3833761215209961,
|
|
"fcm_dpo/margin": 1.4126371145248413,
|
|
"fcm_dpo/q_t": 0.28941503167152405,
|
|
"grad_norm": 265.9231872558594,
|
|
"learning_rate": 2.8260869565217386e-07,
|
|
"logits/chosen": -0.5170412659645081,
|
|
"logits/rejected": -0.48721107840538025,
|
|
"logps/chosen": -57.76616668701172,
|
|
"logps/ref_chosen": -57.985069274902344,
|
|
"logps/ref_rejected": -74.3000717163086,
|
|
"logps/rejected": -75.49380493164062,
|
|
"loss": 0.7687,
|
|
"margin_dpo/margin_mean": 1.4126379489898682,
|
|
"margin_dpo/margin_std": 1.5114383697509766,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06020558002936858,
|
|
"fcm_dpo/beta": 0.7668871879577637,
|
|
"fcm_dpo/delta": -0.47359752655029297,
|
|
"fcm_dpo/margin": 1.648345947265625,
|
|
"fcm_dpo/q_t": 0.2783043086528778,
|
|
"grad_norm": 277.421875,
|
|
"learning_rate": 2.898550724637681e-07,
|
|
"logits/chosen": -0.5229369401931763,
|
|
"logits/rejected": -0.48551464080810547,
|
|
"logps/chosen": -62.65602111816406,
|
|
"logps/ref_chosen": -62.69581604003906,
|
|
"logps/ref_rejected": -97.02352905273438,
|
|
"logps/rejected": -98.632080078125,
|
|
"loss": 0.7602,
|
|
"margin_dpo/margin_mean": 1.6483460664749146,
|
|
"margin_dpo/margin_std": 1.7497031688690186,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06167400881057269,
|
|
"fcm_dpo/beta": 0.6656206846237183,
|
|
"fcm_dpo/delta": -0.7854231595993042,
|
|
"fcm_dpo/margin": 2.276841640472412,
|
|
"fcm_dpo/q_t": 0.23248319327831268,
|
|
"grad_norm": 194.76841735839844,
|
|
"learning_rate": 2.971014492753623e-07,
|
|
"logits/chosen": -0.5424157381057739,
|
|
"logits/rejected": -0.4958062767982483,
|
|
"logps/chosen": -58.732154846191406,
|
|
"logps/ref_chosen": -58.966426849365234,
|
|
"logps/ref_rejected": -109.90837097167969,
|
|
"logps/rejected": -111.95094299316406,
|
|
"loss": 0.6171,
|
|
"margin_dpo/margin_mean": 2.276841640472412,
|
|
"margin_dpo/margin_std": 2.000026226043701,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.0631424375917768,
|
|
"fcm_dpo/beta": 0.5898722410202026,
|
|
"fcm_dpo/delta": -0.4751095175743103,
|
|
"fcm_dpo/margin": 2.1357011795043945,
|
|
"fcm_dpo/q_t": 0.25335824489593506,
|
|
"grad_norm": 190.30642700195312,
|
|
"learning_rate": 3.043478260869565e-07,
|
|
"logits/chosen": -0.5324772596359253,
|
|
"logits/rejected": -0.5075618028640747,
|
|
"logps/chosen": -53.679229736328125,
|
|
"logps/ref_chosen": -54.15599822998047,
|
|
"logps/ref_rejected": -96.48019409179688,
|
|
"logps/rejected": -98.13912963867188,
|
|
"loss": 0.6504,
|
|
"margin_dpo/margin_mean": 2.1357011795043945,
|
|
"margin_dpo/margin_std": 1.690740942955017,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06461086637298091,
|
|
"fcm_dpo/beta": 0.5374739170074463,
|
|
"fcm_dpo/delta": -0.4890367388725281,
|
|
"fcm_dpo/margin": 2.3702030181884766,
|
|
"fcm_dpo/q_t": 0.2507287263870239,
|
|
"grad_norm": 187.52468872070312,
|
|
"learning_rate": 3.115942028985507e-07,
|
|
"logits/chosen": -0.4761464595794678,
|
|
"logits/rejected": -0.4557410180568695,
|
|
"logps/chosen": -49.860252380371094,
|
|
"logps/ref_chosen": -50.07849884033203,
|
|
"logps/ref_rejected": -108.78376007080078,
|
|
"logps/rejected": -110.93571472167969,
|
|
"loss": 0.6411,
|
|
"margin_dpo/margin_mean": 2.370201587677002,
|
|
"margin_dpo/margin_std": 1.8710044622421265,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06607929515418502,
|
|
"fcm_dpo/beta": 0.5190870761871338,
|
|
"fcm_dpo/delta": -0.12950783967971802,
|
|
"fcm_dpo/margin": 1.863038420677185,
|
|
"fcm_dpo/q_t": 0.3222770094871521,
|
|
"grad_norm": 173.381591796875,
|
|
"learning_rate": 3.188405797101449e-07,
|
|
"logits/chosen": -0.4617079496383667,
|
|
"logits/rejected": -0.44868040084838867,
|
|
"logps/chosen": -48.283241271972656,
|
|
"logps/ref_chosen": -48.4149284362793,
|
|
"logps/ref_rejected": -77.93643188476562,
|
|
"logps/rejected": -79.66777038574219,
|
|
"loss": 0.8516,
|
|
"margin_dpo/margin_mean": 1.8630379438400269,
|
|
"margin_dpo/margin_std": 2.1737613677978516,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06754772393538913,
|
|
"fcm_dpo/beta": 0.4841097593307495,
|
|
"fcm_dpo/delta": -0.41270509362220764,
|
|
"fcm_dpo/margin": 2.5065417289733887,
|
|
"fcm_dpo/q_t": 0.28057238459587097,
|
|
"grad_norm": 169.7103271484375,
|
|
"learning_rate": 3.260869565217391e-07,
|
|
"logits/chosen": -0.5448036193847656,
|
|
"logits/rejected": -0.49467170238494873,
|
|
"logps/chosen": -55.76255798339844,
|
|
"logps/ref_chosen": -55.999427795410156,
|
|
"logps/ref_rejected": -95.652587890625,
|
|
"logps/rejected": -97.92225646972656,
|
|
"loss": 0.7446,
|
|
"margin_dpo/margin_mean": 2.5065412521362305,
|
|
"margin_dpo/margin_std": 2.575545072555542,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.06901615271659324,
|
|
"fcm_dpo/beta": 0.4579503536224365,
|
|
"fcm_dpo/delta": -0.2642349898815155,
|
|
"fcm_dpo/margin": 2.3719077110290527,
|
|
"fcm_dpo/q_t": 0.28744781017303467,
|
|
"grad_norm": 176.13291931152344,
|
|
"learning_rate": 3.333333333333333e-07,
|
|
"logits/chosen": -0.5746684074401855,
|
|
"logits/rejected": -0.5221650004386902,
|
|
"logps/chosen": -57.57725524902344,
|
|
"logps/ref_chosen": -57.92607879638672,
|
|
"logps/ref_rejected": -94.67920684814453,
|
|
"logps/rejected": -96.7022933959961,
|
|
"loss": 0.7478,
|
|
"margin_dpo/margin_mean": 2.371908664703369,
|
|
"margin_dpo/margin_std": 2.1393775939941406,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07048458149779736,
|
|
"fcm_dpo/beta": 0.43097007274627686,
|
|
"fcm_dpo/delta": -0.18657633662223816,
|
|
"fcm_dpo/margin": 2.351602554321289,
|
|
"fcm_dpo/q_t": 0.3005352318286896,
|
|
"grad_norm": 185.70761108398438,
|
|
"learning_rate": 3.4057971014492755e-07,
|
|
"logits/chosen": -0.5795384645462036,
|
|
"logits/rejected": -0.5214025378227234,
|
|
"logps/chosen": -57.07599639892578,
|
|
"logps/ref_chosen": -57.188072204589844,
|
|
"logps/ref_rejected": -88.0166015625,
|
|
"logps/rejected": -90.25611877441406,
|
|
"loss": 0.7955,
|
|
"margin_dpo/margin_mean": 2.3516030311584473,
|
|
"margin_dpo/margin_std": 2.1867825984954834,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07195301027900147,
|
|
"fcm_dpo/beta": 0.4181467294692993,
|
|
"fcm_dpo/delta": -0.271043062210083,
|
|
"fcm_dpo/margin": 2.6118998527526855,
|
|
"fcm_dpo/q_t": 0.29078733921051025,
|
|
"grad_norm": 488.25555419921875,
|
|
"learning_rate": 3.478260869565217e-07,
|
|
"logits/chosen": -0.5379482507705688,
|
|
"logits/rejected": -0.4796925187110901,
|
|
"logps/chosen": -61.225616455078125,
|
|
"logps/ref_chosen": -61.685272216796875,
|
|
"logps/ref_rejected": -83.76747131347656,
|
|
"logps/rejected": -85.91972351074219,
|
|
"loss": 0.7584,
|
|
"margin_dpo/margin_mean": 2.6118998527526855,
|
|
"margin_dpo/margin_std": 2.511692523956299,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07342143906020558,
|
|
"fcm_dpo/beta": 0.3943514823913574,
|
|
"fcm_dpo/delta": -0.21341389417648315,
|
|
"fcm_dpo/margin": 2.6372203826904297,
|
|
"fcm_dpo/q_t": 0.29721564054489136,
|
|
"grad_norm": 156.1033935546875,
|
|
"learning_rate": 3.5507246376811595e-07,
|
|
"logits/chosen": -0.5170506834983826,
|
|
"logits/rejected": -0.48113885521888733,
|
|
"logps/chosen": -58.57736587524414,
|
|
"logps/ref_chosen": -58.72413635253906,
|
|
"logps/ref_rejected": -96.35814666748047,
|
|
"logps/rejected": -98.84859466552734,
|
|
"loss": 0.7786,
|
|
"margin_dpo/margin_mean": 2.6372203826904297,
|
|
"margin_dpo/margin_std": 2.533358097076416,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07488986784140969,
|
|
"fcm_dpo/beta": 0.37727904319763184,
|
|
"fcm_dpo/delta": -0.17445003986358643,
|
|
"fcm_dpo/margin": 2.6572084426879883,
|
|
"fcm_dpo/q_t": 0.317116379737854,
|
|
"grad_norm": 127.68961334228516,
|
|
"learning_rate": 3.6231884057971015e-07,
|
|
"logits/chosen": -0.49076852202415466,
|
|
"logits/rejected": -0.45782989263534546,
|
|
"logps/chosen": -61.265625,
|
|
"logps/ref_chosen": -61.3736686706543,
|
|
"logps/ref_rejected": -76.00199890136719,
|
|
"logps/rejected": -78.55116271972656,
|
|
"loss": 0.847,
|
|
"margin_dpo/margin_mean": 2.6572086811065674,
|
|
"margin_dpo/margin_std": 3.0995936393737793,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.0763582966226138,
|
|
"fcm_dpo/beta": 0.34055250883102417,
|
|
"fcm_dpo/delta": -0.6958788633346558,
|
|
"fcm_dpo/margin": 4.237062931060791,
|
|
"fcm_dpo/q_t": 0.2313612997531891,
|
|
"grad_norm": 108.22584533691406,
|
|
"learning_rate": 3.695652173913043e-07,
|
|
"logits/chosen": -0.5450096130371094,
|
|
"logits/rejected": -0.49051666259765625,
|
|
"logps/chosen": -51.71245574951172,
|
|
"logps/ref_chosen": -52.33735656738281,
|
|
"logps/ref_rejected": -79.97391510009766,
|
|
"logps/rejected": -83.58607482910156,
|
|
"loss": 0.6041,
|
|
"margin_dpo/margin_mean": 4.237063407897949,
|
|
"margin_dpo/margin_std": 3.3426589965820312,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.07782672540381791,
|
|
"fcm_dpo/beta": 0.3099779486656189,
|
|
"fcm_dpo/delta": -0.5153226256370544,
|
|
"fcm_dpo/margin": 4.196630477905273,
|
|
"fcm_dpo/q_t": 0.26962772011756897,
|
|
"grad_norm": 118.41160583496094,
|
|
"learning_rate": 3.7681159420289855e-07,
|
|
"logits/chosen": -0.5767320394515991,
|
|
"logits/rejected": -0.554436206817627,
|
|
"logps/chosen": -53.15708541870117,
|
|
"logps/ref_chosen": -53.31465148925781,
|
|
"logps/ref_rejected": -91.78359985351562,
|
|
"logps/rejected": -95.82264709472656,
|
|
"loss": 0.6986,
|
|
"margin_dpo/margin_mean": 4.196630477905273,
|
|
"margin_dpo/margin_std": 3.9024457931518555,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.07929515418502203,
|
|
"fcm_dpo/beta": 0.284817636013031,
|
|
"fcm_dpo/delta": -0.3066610097885132,
|
|
"fcm_dpo/margin": 3.9426732063293457,
|
|
"fcm_dpo/q_t": 0.2827852964401245,
|
|
"grad_norm": 96.0743408203125,
|
|
"learning_rate": 3.8405797101449274e-07,
|
|
"logits/chosen": -0.5662685632705688,
|
|
"logits/rejected": -0.511724591255188,
|
|
"logps/chosen": -50.537540435791016,
|
|
"logps/ref_chosen": -50.68865966796875,
|
|
"logps/ref_rejected": -91.71539306640625,
|
|
"logps/rejected": -95.50694274902344,
|
|
"loss": 0.7202,
|
|
"margin_dpo/margin_mean": 3.9426727294921875,
|
|
"margin_dpo/margin_std": 3.563432216644287,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08076358296622614,
|
|
"fcm_dpo/beta": 0.2634790539741516,
|
|
"fcm_dpo/delta": -0.4253869354724884,
|
|
"fcm_dpo/margin": 4.647494792938232,
|
|
"fcm_dpo/q_t": 0.2821628153324127,
|
|
"grad_norm": 104.46064758300781,
|
|
"learning_rate": 3.9130434782608694e-07,
|
|
"logits/chosen": -0.6163734197616577,
|
|
"logits/rejected": -0.5525354146957397,
|
|
"logps/chosen": -62.8038330078125,
|
|
"logps/ref_chosen": -62.615234375,
|
|
"logps/ref_rejected": -88.99349975585938,
|
|
"logps/rejected": -93.82960510253906,
|
|
"loss": 0.7725,
|
|
"margin_dpo/margin_mean": 4.647494316101074,
|
|
"margin_dpo/margin_std": 4.973275184631348,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08223201174743025,
|
|
"fcm_dpo/beta": 0.24703602492809296,
|
|
"fcm_dpo/delta": -0.26557958126068115,
|
|
"fcm_dpo/margin": 4.4003424644470215,
|
|
"fcm_dpo/q_t": 0.2998681366443634,
|
|
"grad_norm": 82.96701049804688,
|
|
"learning_rate": 3.9855072463768114e-07,
|
|
"logits/chosen": -0.5948492288589478,
|
|
"logits/rejected": -0.5514296293258667,
|
|
"logps/chosen": -57.949623107910156,
|
|
"logps/ref_chosen": -57.9327278137207,
|
|
"logps/ref_rejected": -94.1744384765625,
|
|
"logps/rejected": -98.5916748046875,
|
|
"loss": 0.8196,
|
|
"margin_dpo/margin_mean": 4.4003424644470215,
|
|
"margin_dpo/margin_std": 4.814118385314941,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08370044052863436,
|
|
"fcm_dpo/beta": 0.2332727015018463,
|
|
"fcm_dpo/delta": -0.33255159854888916,
|
|
"fcm_dpo/margin": 4.910983085632324,
|
|
"fcm_dpo/q_t": 0.2743523120880127,
|
|
"grad_norm": 88.67000579833984,
|
|
"learning_rate": 4.057971014492754e-07,
|
|
"logits/chosen": -0.5736434459686279,
|
|
"logits/rejected": -0.5462496280670166,
|
|
"logps/chosen": -70.63133239746094,
|
|
"logps/ref_chosen": -70.49528503417969,
|
|
"logps/ref_rejected": -95.56546020507812,
|
|
"logps/rejected": -100.61249542236328,
|
|
"loss": 0.7252,
|
|
"margin_dpo/margin_mean": 4.910983085632324,
|
|
"margin_dpo/margin_std": 4.256160259246826,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08516886930983847,
|
|
"fcm_dpo/beta": 0.21495123207569122,
|
|
"fcm_dpo/delta": -0.4436984658241272,
|
|
"fcm_dpo/margin": 5.7713303565979,
|
|
"fcm_dpo/q_t": 0.27471083402633667,
|
|
"grad_norm": 93.05653381347656,
|
|
"learning_rate": 4.1304347826086954e-07,
|
|
"logits/chosen": -0.5842028856277466,
|
|
"logits/rejected": -0.5084757804870605,
|
|
"logps/chosen": -62.119361877441406,
|
|
"logps/ref_chosen": -62.13294219970703,
|
|
"logps/ref_rejected": -84.61729431152344,
|
|
"logps/rejected": -90.37504577636719,
|
|
"loss": 0.7351,
|
|
"margin_dpo/margin_mean": 5.771330833435059,
|
|
"margin_dpo/margin_std": 5.583444595336914,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08663729809104258,
|
|
"fcm_dpo/beta": 0.19481170177459717,
|
|
"fcm_dpo/delta": -0.34936845302581787,
|
|
"fcm_dpo/margin": 5.904120445251465,
|
|
"fcm_dpo/q_t": 0.2848263382911682,
|
|
"grad_norm": 84.72233581542969,
|
|
"learning_rate": 4.2028985507246374e-07,
|
|
"logits/chosen": -0.608730673789978,
|
|
"logits/rejected": -0.5659762620925903,
|
|
"logps/chosen": -52.416080474853516,
|
|
"logps/ref_chosen": -51.932525634765625,
|
|
"logps/ref_rejected": -88.88520050048828,
|
|
"logps/rejected": -95.27287292480469,
|
|
"loss": 0.755,
|
|
"margin_dpo/margin_mean": 5.904120922088623,
|
|
"margin_dpo/margin_std": 5.746884822845459,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.0881057268722467,
|
|
"fcm_dpo/beta": 0.18991079926490784,
|
|
"fcm_dpo/delta": -0.10931669175624847,
|
|
"fcm_dpo/margin": 4.990189075469971,
|
|
"fcm_dpo/q_t": 0.30916330218315125,
|
|
"grad_norm": 100.4244384765625,
|
|
"learning_rate": 4.2753623188405794e-07,
|
|
"logits/chosen": -0.573798418045044,
|
|
"logits/rejected": -0.51241534948349,
|
|
"logps/chosen": -62.01226806640625,
|
|
"logps/ref_chosen": -60.94218826293945,
|
|
"logps/ref_rejected": -85.39340209960938,
|
|
"logps/rejected": -91.45367431640625,
|
|
"loss": 0.8378,
|
|
"margin_dpo/margin_mean": 4.9901885986328125,
|
|
"margin_dpo/margin_std": 5.157164096832275,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.08957415565345081,
|
|
"fcm_dpo/beta": 0.1817345768213272,
|
|
"fcm_dpo/delta": -0.2543361485004425,
|
|
"fcm_dpo/margin": 5.912901401519775,
|
|
"fcm_dpo/q_t": 0.3096533417701721,
|
|
"grad_norm": 76.46479797363281,
|
|
"learning_rate": 4.3478260869565214e-07,
|
|
"logits/chosen": -0.5715320110321045,
|
|
"logits/rejected": -0.5351488590240479,
|
|
"logps/chosen": -60.89082336425781,
|
|
"logps/ref_chosen": -60.633522033691406,
|
|
"logps/ref_rejected": -89.85249328613281,
|
|
"logps/rejected": -96.02268981933594,
|
|
"loss": 0.8382,
|
|
"margin_dpo/margin_mean": 5.912901401519775,
|
|
"margin_dpo/margin_std": 7.353564262390137,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09104258443465492,
|
|
"fcm_dpo/beta": 0.18260258436203003,
|
|
"fcm_dpo/delta": 0.0254787877202034,
|
|
"fcm_dpo/margin": 4.519835948944092,
|
|
"fcm_dpo/q_t": 0.33831268548965454,
|
|
"grad_norm": 84.54349517822266,
|
|
"learning_rate": 4.420289855072464e-07,
|
|
"logits/chosen": -0.5869123935699463,
|
|
"logits/rejected": -0.5528802871704102,
|
|
"logps/chosen": -56.695011138916016,
|
|
"logps/ref_chosen": -56.15077209472656,
|
|
"logps/ref_rejected": -75.56619262695312,
|
|
"logps/rejected": -80.6302719116211,
|
|
"loss": 0.9,
|
|
"margin_dpo/margin_mean": 4.51983642578125,
|
|
"margin_dpo/margin_std": 5.208561420440674,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09251101321585903,
|
|
"fcm_dpo/beta": 0.17352905869483948,
|
|
"fcm_dpo/delta": -0.23942901194095612,
|
|
"fcm_dpo/margin": 6.1104888916015625,
|
|
"fcm_dpo/q_t": 0.29208725690841675,
|
|
"grad_norm": 78.32554626464844,
|
|
"learning_rate": 4.4927536231884053e-07,
|
|
"logits/chosen": -0.5659127235412598,
|
|
"logits/rejected": -0.5172122716903687,
|
|
"logps/chosen": -74.04725646972656,
|
|
"logps/ref_chosen": -73.14739227294922,
|
|
"logps/ref_rejected": -97.61006164550781,
|
|
"logps/rejected": -104.62040710449219,
|
|
"loss": 0.7887,
|
|
"margin_dpo/margin_mean": 6.1104888916015625,
|
|
"margin_dpo/margin_std": 6.0076375007629395,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09397944199706314,
|
|
"fcm_dpo/beta": 0.1625455915927887,
|
|
"fcm_dpo/delta": -0.30817508697509766,
|
|
"fcm_dpo/margin": 6.8527960777282715,
|
|
"fcm_dpo/q_t": 0.29219383001327515,
|
|
"grad_norm": 72.54730987548828,
|
|
"learning_rate": 4.5652173913043473e-07,
|
|
"logits/chosen": -0.540681004524231,
|
|
"logits/rejected": -0.5078280568122864,
|
|
"logps/chosen": -54.01070022583008,
|
|
"logps/ref_chosen": -53.998600006103516,
|
|
"logps/ref_rejected": -93.53019714355469,
|
|
"logps/rejected": -100.39509582519531,
|
|
"loss": 0.793,
|
|
"margin_dpo/margin_mean": 6.8527960777282715,
|
|
"margin_dpo/margin_std": 7.245349884033203,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.09544787077826726,
|
|
"fcm_dpo/beta": 0.15612056851387024,
|
|
"fcm_dpo/delta": -0.23279529809951782,
|
|
"fcm_dpo/margin": 6.764092445373535,
|
|
"fcm_dpo/q_t": 0.2945548892021179,
|
|
"grad_norm": 71.34774780273438,
|
|
"learning_rate": 4.63768115942029e-07,
|
|
"logits/chosen": -0.6489189267158508,
|
|
"logits/rejected": -0.6330606937408447,
|
|
"logps/chosen": -66.12747192382812,
|
|
"logps/ref_chosen": -64.83599853515625,
|
|
"logps/ref_rejected": -109.94645690917969,
|
|
"logps/rejected": -118.00201416015625,
|
|
"loss": 0.8093,
|
|
"margin_dpo/margin_mean": 6.764091968536377,
|
|
"margin_dpo/margin_std": 6.935907363891602,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09691629955947137,
|
|
"fcm_dpo/beta": 0.15022742748260498,
|
|
"fcm_dpo/delta": -0.14811818301677704,
|
|
"fcm_dpo/margin": 6.522555828094482,
|
|
"fcm_dpo/q_t": 0.31489285826683044,
|
|
"grad_norm": 69.30376434326172,
|
|
"learning_rate": 4.7101449275362313e-07,
|
|
"logits/chosen": -0.6159607768058777,
|
|
"logits/rejected": -0.5809246301651001,
|
|
"logps/chosen": -52.61880874633789,
|
|
"logps/ref_chosen": -51.44352722167969,
|
|
"logps/ref_rejected": -75.63629913330078,
|
|
"logps/rejected": -83.33413696289062,
|
|
"loss": 0.834,
|
|
"margin_dpo/margin_mean": 6.522555351257324,
|
|
"margin_dpo/margin_std": 7.281524658203125,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.09838472834067548,
|
|
"fcm_dpo/beta": 0.14798378944396973,
|
|
"fcm_dpo/delta": -0.15271174907684326,
|
|
"fcm_dpo/margin": 6.6755170822143555,
|
|
"fcm_dpo/q_t": 0.3114186227321625,
|
|
"grad_norm": 61.069061279296875,
|
|
"learning_rate": 4.782608695652174e-07,
|
|
"logits/chosen": -0.5994589328765869,
|
|
"logits/rejected": -0.5600971579551697,
|
|
"logps/chosen": -60.29442596435547,
|
|
"logps/ref_chosen": -59.34080505371094,
|
|
"logps/ref_rejected": -72.78728485107422,
|
|
"logps/rejected": -80.41642761230469,
|
|
"loss": 0.8244,
|
|
"margin_dpo/margin_mean": 6.6755170822143555,
|
|
"margin_dpo/margin_std": 7.188636779785156,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.09985315712187959,
|
|
"fcm_dpo/beta": 0.1445780098438263,
|
|
"fcm_dpo/delta": -0.11112671345472336,
|
|
"fcm_dpo/margin": 6.57466983795166,
|
|
"fcm_dpo/q_t": 0.3090569078922272,
|
|
"grad_norm": 61.32022476196289,
|
|
"learning_rate": 4.855072463768116e-07,
|
|
"logits/chosen": -0.5993345975875854,
|
|
"logits/rejected": -0.5429648160934448,
|
|
"logps/chosen": -66.08113861083984,
|
|
"logps/ref_chosen": -65.2058334350586,
|
|
"logps/ref_rejected": -77.20724487304688,
|
|
"logps/rejected": -84.6572265625,
|
|
"loss": 0.795,
|
|
"margin_dpo/margin_mean": 6.57466983795166,
|
|
"margin_dpo/margin_std": 6.135951042175293,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.1013215859030837,
|
|
"fcm_dpo/beta": 0.1381378173828125,
|
|
"fcm_dpo/delta": -0.18166013062000275,
|
|
"fcm_dpo/margin": 7.318739891052246,
|
|
"fcm_dpo/q_t": 0.2995903491973877,
|
|
"grad_norm": 63.2430419921875,
|
|
"learning_rate": 4.927536231884058e-07,
|
|
"logits/chosen": -0.560828685760498,
|
|
"logits/rejected": -0.5357776880264282,
|
|
"logps/chosen": -61.07904052734375,
|
|
"logps/ref_chosen": -59.81924057006836,
|
|
"logps/ref_rejected": -103.38886260986328,
|
|
"logps/rejected": -111.9674072265625,
|
|
"loss": 0.7665,
|
|
"margin_dpo/margin_mean": 7.3187408447265625,
|
|
"margin_dpo/margin_std": 6.837162017822266,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.1027900146842878,
|
|
"fcm_dpo/beta": 0.13328900933265686,
|
|
"fcm_dpo/delta": -0.2611111104488373,
|
|
"fcm_dpo/margin": 8.128886222839355,
|
|
"fcm_dpo/q_t": 0.295282244682312,
|
|
"grad_norm": 66.4734878540039,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.5893011689186096,
|
|
"logits/rejected": -0.5513046383857727,
|
|
"logps/chosen": -63.8671875,
|
|
"logps/ref_chosen": -61.930641174316406,
|
|
"logps/ref_rejected": -91.06078338623047,
|
|
"logps/rejected": -101.12621307373047,
|
|
"loss": 0.7692,
|
|
"margin_dpo/margin_mean": 8.128886222839355,
|
|
"margin_dpo/margin_std": 8.00094223022461,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.10425844346549193,
|
|
"fcm_dpo/beta": 0.12505091726779938,
|
|
"fcm_dpo/delta": -0.3048865795135498,
|
|
"fcm_dpo/margin": 8.960905075073242,
|
|
"fcm_dpo/q_t": 0.2825569808483124,
|
|
"grad_norm": 55.4888916015625,
|
|
"learning_rate": 4.999967061337492e-07,
|
|
"logits/chosen": -0.6552280187606812,
|
|
"logits/rejected": -0.612133800983429,
|
|
"logps/chosen": -63.16753005981445,
|
|
"logps/ref_chosen": -61.750335693359375,
|
|
"logps/ref_rejected": -97.33662414550781,
|
|
"logps/rejected": -107.71471405029297,
|
|
"loss": 0.7315,
|
|
"margin_dpo/margin_mean": 8.960905075073242,
|
|
"margin_dpo/margin_std": 8.088345527648926,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10572687224669604,
|
|
"fcm_dpo/beta": 0.11747831106185913,
|
|
"fcm_dpo/delta": -0.285390704870224,
|
|
"fcm_dpo/margin": 9.382757186889648,
|
|
"fcm_dpo/q_t": 0.2830852270126343,
|
|
"grad_norm": 57.36223220825195,
|
|
"learning_rate": 4.999868246217933e-07,
|
|
"logits/chosen": -0.5827009677886963,
|
|
"logits/rejected": -0.5448130369186401,
|
|
"logps/chosen": -67.41600036621094,
|
|
"logps/ref_chosen": -66.05341339111328,
|
|
"logps/ref_rejected": -95.2869873046875,
|
|
"logps/rejected": -106.03233337402344,
|
|
"loss": 0.7334,
|
|
"margin_dpo/margin_mean": 9.382757186889648,
|
|
"margin_dpo/margin_std": 8.36812686920166,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.10719530102790015,
|
|
"fcm_dpo/beta": 0.11263042688369751,
|
|
"fcm_dpo/delta": -0.17705786228179932,
|
|
"fcm_dpo/margin": 8.946794509887695,
|
|
"fcm_dpo/q_t": 0.3216911852359772,
|
|
"grad_norm": 58.959449768066406,
|
|
"learning_rate": 4.999703557245192e-07,
|
|
"logits/chosen": -0.6371798515319824,
|
|
"logits/rejected": -0.5916515588760376,
|
|
"logps/chosen": -68.6202163696289,
|
|
"logps/ref_chosen": -66.25627136230469,
|
|
"logps/ref_rejected": -90.45613098144531,
|
|
"logps/rejected": -101.76687622070312,
|
|
"loss": 0.8956,
|
|
"margin_dpo/margin_mean": 8.946794509887695,
|
|
"margin_dpo/margin_std": 11.44607162475586,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.10866372980910426,
|
|
"fcm_dpo/beta": 0.10893216729164124,
|
|
"fcm_dpo/delta": -0.17043882608413696,
|
|
"fcm_dpo/margin": 9.200440406799316,
|
|
"fcm_dpo/q_t": 0.31555283069610596,
|
|
"grad_norm": 60.90262985229492,
|
|
"learning_rate": 4.999472998758977e-07,
|
|
"logits/chosen": -0.5984748601913452,
|
|
"logits/rejected": -0.5802183151245117,
|
|
"logps/chosen": -56.02476501464844,
|
|
"logps/ref_chosen": -53.42488098144531,
|
|
"logps/ref_rejected": -95.94693756103516,
|
|
"logps/rejected": -107.74725341796875,
|
|
"loss": 0.8747,
|
|
"margin_dpo/margin_mean": 9.200439453125,
|
|
"margin_dpo/margin_std": 12.053499221801758,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11013215859030837,
|
|
"fcm_dpo/beta": 0.10171889513731003,
|
|
"fcm_dpo/delta": -0.4963496923446655,
|
|
"fcm_dpo/margin": 12.63138484954834,
|
|
"fcm_dpo/q_t": 0.26315972208976746,
|
|
"grad_norm": 49.437259674072266,
|
|
"learning_rate": 4.999176576834721e-07,
|
|
"logits/chosen": -0.6258114576339722,
|
|
"logits/rejected": -0.6118018627166748,
|
|
"logps/chosen": -54.22317886352539,
|
|
"logps/ref_chosen": -51.861663818359375,
|
|
"logps/ref_rejected": -111.25398254394531,
|
|
"logps/rejected": -126.24687957763672,
|
|
"loss": 0.6895,
|
|
"margin_dpo/margin_mean": 12.631382942199707,
|
|
"margin_dpo/margin_std": 11.423229217529297,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11160058737151249,
|
|
"fcm_dpo/beta": 0.09800372272729874,
|
|
"fcm_dpo/delta": -0.010731710121035576,
|
|
"fcm_dpo/margin": 8.773109436035156,
|
|
"fcm_dpo/q_t": 0.32228267192840576,
|
|
"grad_norm": 49.78368377685547,
|
|
"learning_rate": 4.998814299283415e-07,
|
|
"logits/chosen": -0.6634424924850464,
|
|
"logits/rejected": -0.6184474229812622,
|
|
"logps/chosen": -56.04176330566406,
|
|
"logps/ref_chosen": -53.26603698730469,
|
|
"logps/ref_rejected": -78.21662902832031,
|
|
"logps/rejected": -89.76544952392578,
|
|
"loss": 0.8528,
|
|
"margin_dpo/margin_mean": 8.773109436035156,
|
|
"margin_dpo/margin_std": 9.061115264892578,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1130690161527166,
|
|
"fcm_dpo/beta": 0.09110400080680847,
|
|
"fcm_dpo/delta": -0.42337319254875183,
|
|
"fcm_dpo/margin": 13.34306526184082,
|
|
"fcm_dpo/q_t": 0.2679826617240906,
|
|
"grad_norm": 54.54016876220703,
|
|
"learning_rate": 4.998386175651409e-07,
|
|
"logits/chosen": -0.6144517660140991,
|
|
"logits/rejected": -0.5647158026695251,
|
|
"logps/chosen": -60.33265686035156,
|
|
"logps/ref_chosen": -58.0966796875,
|
|
"logps/ref_rejected": -93.77361297607422,
|
|
"logps/rejected": -109.3526611328125,
|
|
"loss": 0.7215,
|
|
"margin_dpo/margin_mean": 13.34306526184082,
|
|
"margin_dpo/margin_std": 12.149935722351074,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.1145374449339207,
|
|
"fcm_dpo/beta": 0.08875822275876999,
|
|
"fcm_dpo/delta": -0.14152249693870544,
|
|
"fcm_dpo/margin": 11.015653610229492,
|
|
"fcm_dpo/q_t": 0.30442285537719727,
|
|
"grad_norm": 50.21555709838867,
|
|
"learning_rate": 4.997892217220159e-07,
|
|
"logits/chosen": -0.6342002153396606,
|
|
"logits/rejected": -0.6031632423400879,
|
|
"logps/chosen": -57.952491760253906,
|
|
"logps/ref_chosen": -55.61378479003906,
|
|
"logps/ref_rejected": -84.93436431884766,
|
|
"logps/rejected": -98.28872680664062,
|
|
"loss": 0.7928,
|
|
"margin_dpo/margin_mean": 11.015655517578125,
|
|
"margin_dpo/margin_std": 10.317201614379883,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11600587371512482,
|
|
"fcm_dpo/beta": 0.08457382023334503,
|
|
"fcm_dpo/delta": -0.2228245735168457,
|
|
"fcm_dpo/margin": 12.404184341430664,
|
|
"fcm_dpo/q_t": 0.3089568018913269,
|
|
"grad_norm": 40.83265686035156,
|
|
"learning_rate": 4.997332437005931e-07,
|
|
"logits/chosen": -0.619921088218689,
|
|
"logits/rejected": -0.5860031247138977,
|
|
"logps/chosen": -57.76416778564453,
|
|
"logps/ref_chosen": -55.45048522949219,
|
|
"logps/ref_rejected": -87.64756774902344,
|
|
"logps/rejected": -102.36543273925781,
|
|
"loss": 0.8187,
|
|
"margin_dpo/margin_mean": 12.404184341430664,
|
|
"margin_dpo/margin_std": 13.576568603515625,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.11747430249632893,
|
|
"fcm_dpo/beta": 0.08190235495567322,
|
|
"fcm_dpo/delta": -0.06250029802322388,
|
|
"fcm_dpo/margin": 11.025638580322266,
|
|
"fcm_dpo/q_t": 0.3301146924495697,
|
|
"grad_norm": 44.95631790161133,
|
|
"learning_rate": 4.996706849759452e-07,
|
|
"logits/chosen": -0.6934123039245605,
|
|
"logits/rejected": -0.643015444278717,
|
|
"logps/chosen": -62.62284469604492,
|
|
"logps/ref_chosen": -58.519290924072266,
|
|
"logps/ref_rejected": -87.54750061035156,
|
|
"logps/rejected": -102.67669677734375,
|
|
"loss": 0.8811,
|
|
"margin_dpo/margin_mean": 11.025639533996582,
|
|
"margin_dpo/margin_std": 12.54986572265625,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.11894273127753303,
|
|
"fcm_dpo/beta": 0.07723959535360336,
|
|
"fcm_dpo/delta": -0.3877883851528168,
|
|
"fcm_dpo/margin": 15.350555419921875,
|
|
"fcm_dpo/q_t": 0.28344881534576416,
|
|
"grad_norm": 48.92912292480469,
|
|
"learning_rate": 4.996015471965529e-07,
|
|
"logits/chosen": -0.7091237306594849,
|
|
"logits/rejected": -0.6722517013549805,
|
|
"logps/chosen": -69.73745727539062,
|
|
"logps/ref_chosen": -66.44886779785156,
|
|
"logps/ref_rejected": -129.66270446777344,
|
|
"logps/rejected": -148.30184936523438,
|
|
"loss": 0.7526,
|
|
"margin_dpo/margin_mean": 15.350555419921875,
|
|
"margin_dpo/margin_std": 15.928838729858398,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12041116005873716,
|
|
"fcm_dpo/beta": 0.0763830617070198,
|
|
"fcm_dpo/delta": -0.12376299500465393,
|
|
"fcm_dpo/margin": 12.550471305847168,
|
|
"fcm_dpo/q_t": 0.32355159521102905,
|
|
"grad_norm": 56.68275451660156,
|
|
"learning_rate": 4.995258321842611e-07,
|
|
"logits/chosen": -0.6178134679794312,
|
|
"logits/rejected": -0.5973782539367676,
|
|
"logps/chosen": -57.490257263183594,
|
|
"logps/ref_chosen": -52.232383728027344,
|
|
"logps/ref_rejected": -90.74325561523438,
|
|
"logps/rejected": -108.55159759521484,
|
|
"loss": 0.9461,
|
|
"margin_dpo/margin_mean": 12.550471305847168,
|
|
"margin_dpo/margin_std": 17.051355361938477,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.12187958883994127,
|
|
"fcm_dpo/beta": 0.07206125557422638,
|
|
"fcm_dpo/delta": -0.21825863420963287,
|
|
"fcm_dpo/margin": 14.49725341796875,
|
|
"fcm_dpo/q_t": 0.30366265773773193,
|
|
"grad_norm": 49.30983352661133,
|
|
"learning_rate": 4.994435419342304e-07,
|
|
"logits/chosen": -0.6791856288909912,
|
|
"logits/rejected": -0.6300394535064697,
|
|
"logps/chosen": -61.20024108886719,
|
|
"logps/ref_chosen": -55.82738494873047,
|
|
"logps/ref_rejected": -103.71589660644531,
|
|
"logps/rejected": -123.58601379394531,
|
|
"loss": 0.8149,
|
|
"margin_dpo/margin_mean": 14.497251510620117,
|
|
"margin_dpo/margin_std": 15.496784210205078,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12334801762114538,
|
|
"fcm_dpo/beta": 0.06941388547420502,
|
|
"fcm_dpo/delta": -0.12336910516023636,
|
|
"fcm_dpo/margin": 13.803592681884766,
|
|
"fcm_dpo/q_t": 0.3031036853790283,
|
|
"grad_norm": 42.30681228637695,
|
|
"learning_rate": 4.993546786148857e-07,
|
|
"logits/chosen": -0.6525845527648926,
|
|
"logits/rejected": -0.6165010333061218,
|
|
"logps/chosen": -71.6080093383789,
|
|
"logps/ref_chosen": -67.1761703491211,
|
|
"logps/ref_rejected": -87.29859924316406,
|
|
"logps/rejected": -105.53402709960938,
|
|
"loss": 0.7964,
|
|
"margin_dpo/margin_mean": 13.803592681884766,
|
|
"margin_dpo/margin_std": 12.420629501342773,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12481644640234948,
|
|
"fcm_dpo/beta": 0.06950154155492783,
|
|
"fcm_dpo/delta": -0.10908734798431396,
|
|
"fcm_dpo/margin": 13.623420715332031,
|
|
"fcm_dpo/q_t": 0.32009202241897583,
|
|
"grad_norm": 48.14596176147461,
|
|
"learning_rate": 4.992592445678582e-07,
|
|
"logits/chosen": -0.6232889890670776,
|
|
"logits/rejected": -0.591201663017273,
|
|
"logps/chosen": -63.80423355102539,
|
|
"logps/ref_chosen": -58.4066162109375,
|
|
"logps/ref_rejected": -78.63880157470703,
|
|
"logps/rejected": -97.65983581542969,
|
|
"loss": 0.856,
|
|
"margin_dpo/margin_mean": 13.623420715332031,
|
|
"margin_dpo/margin_std": 14.695003509521484,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.1262848751835536,
|
|
"fcm_dpo/beta": 0.06705532968044281,
|
|
"fcm_dpo/delta": -0.1516844481229782,
|
|
"fcm_dpo/margin": 14.717806816101074,
|
|
"fcm_dpo/q_t": 0.3345668911933899,
|
|
"grad_norm": 52.9031867980957,
|
|
"learning_rate": 4.991572423079235e-07,
|
|
"logits/chosen": -0.6772565841674805,
|
|
"logits/rejected": -0.6597760915756226,
|
|
"logps/chosen": -63.28825378417969,
|
|
"logps/ref_chosen": -56.13746643066406,
|
|
"logps/ref_rejected": -88.12165069580078,
|
|
"logps/rejected": -109.99024200439453,
|
|
"loss": 0.9488,
|
|
"margin_dpo/margin_mean": 14.717806816101074,
|
|
"margin_dpo/margin_std": 21.25366973876953,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.1277533039647577,
|
|
"fcm_dpo/beta": 0.06366388499736786,
|
|
"fcm_dpo/delta": -0.20331230759620667,
|
|
"fcm_dpo/margin": 16.169401168823242,
|
|
"fcm_dpo/q_t": 0.30775928497314453,
|
|
"grad_norm": 43.02919387817383,
|
|
"learning_rate": 4.990486745229364e-07,
|
|
"logits/chosen": -0.6946601867675781,
|
|
"logits/rejected": -0.6573343276977539,
|
|
"logps/chosen": -63.13631057739258,
|
|
"logps/ref_chosen": -55.63609313964844,
|
|
"logps/ref_rejected": -95.46757507324219,
|
|
"logps/rejected": -119.13719940185547,
|
|
"loss": 0.8612,
|
|
"margin_dpo/margin_mean": 16.16939926147461,
|
|
"margin_dpo/margin_std": 18.847665786743164,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.12922173274596183,
|
|
"fcm_dpo/beta": 0.0635436624288559,
|
|
"fcm_dpo/delta": 0.05298266187310219,
|
|
"fcm_dpo/margin": 12.59765625,
|
|
"fcm_dpo/q_t": 0.34781643748283386,
|
|
"grad_norm": 48.83399200439453,
|
|
"learning_rate": 4.989335440737586e-07,
|
|
"logits/chosen": -0.6725143194198608,
|
|
"logits/rejected": -0.6565263271331787,
|
|
"logps/chosen": -83.05477905273438,
|
|
"logps/ref_chosen": -73.67115020751953,
|
|
"logps/ref_rejected": -106.70849609375,
|
|
"logps/rejected": -128.68978881835938,
|
|
"loss": 0.979,
|
|
"margin_dpo/margin_mean": 12.597654342651367,
|
|
"margin_dpo/margin_std": 16.687740325927734,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.13069016152716592,
|
|
"fcm_dpo/beta": 0.06436465680599213,
|
|
"fcm_dpo/delta": -0.04542829841375351,
|
|
"fcm_dpo/margin": 13.830251693725586,
|
|
"fcm_dpo/q_t": 0.32423245906829834,
|
|
"grad_norm": 41.47353744506836,
|
|
"learning_rate": 4.988118539941847e-07,
|
|
"logits/chosen": -0.6947389841079712,
|
|
"logits/rejected": -0.6542242765426636,
|
|
"logps/chosen": -66.13823699951172,
|
|
"logps/ref_chosen": -60.624916076660156,
|
|
"logps/ref_rejected": -82.08354949951172,
|
|
"logps/rejected": -101.4271240234375,
|
|
"loss": 0.8568,
|
|
"margin_dpo/margin_mean": 13.83025074005127,
|
|
"margin_dpo/margin_std": 15.321671485900879,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.13215859030837004,
|
|
"fcm_dpo/beta": 0.06196070462465286,
|
|
"fcm_dpo/delta": -0.2721518874168396,
|
|
"fcm_dpo/margin": 17.629770278930664,
|
|
"fcm_dpo/q_t": 0.31731653213500977,
|
|
"grad_norm": 45.96269607543945,
|
|
"learning_rate": 4.986836074908615e-07,
|
|
"logits/chosen": -0.6709829568862915,
|
|
"logits/rejected": -0.6653557419776917,
|
|
"logps/chosen": -60.85920715332031,
|
|
"logps/ref_chosen": -53.285308837890625,
|
|
"logps/ref_rejected": -111.54470825195312,
|
|
"logps/rejected": -136.74838256835938,
|
|
"loss": 0.8919,
|
|
"margin_dpo/margin_mean": 17.629772186279297,
|
|
"margin_dpo/margin_std": 22.947355270385742,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13362701908957417,
|
|
"fcm_dpo/beta": 0.0593516007065773,
|
|
"fcm_dpo/delta": -0.15248210728168488,
|
|
"fcm_dpo/margin": 16.636859893798828,
|
|
"fcm_dpo/q_t": 0.3205872178077698,
|
|
"grad_norm": 45.15717697143555,
|
|
"learning_rate": 4.985488079432037e-07,
|
|
"logits/chosen": -0.6791576147079468,
|
|
"logits/rejected": -0.6390055418014526,
|
|
"logps/chosen": -69.1212158203125,
|
|
"logps/ref_chosen": -61.802955627441406,
|
|
"logps/ref_rejected": -87.87395477294922,
|
|
"logps/rejected": -111.82907104492188,
|
|
"loss": 0.8799,
|
|
"margin_dpo/margin_mean": 16.636859893798828,
|
|
"margin_dpo/margin_std": 19.837177276611328,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13509544787077826,
|
|
"fcm_dpo/beta": 0.05815107375383377,
|
|
"fcm_dpo/delta": -0.09225773066282272,
|
|
"fcm_dpo/margin": 16.04425811767578,
|
|
"fcm_dpo/q_t": 0.3281884789466858,
|
|
"grad_norm": 41.01671600341797,
|
|
"learning_rate": 4.984074589033043e-07,
|
|
"logits/chosen": -0.7218481302261353,
|
|
"logits/rejected": -0.6939176321029663,
|
|
"logps/chosen": -58.69712829589844,
|
|
"logps/ref_chosen": -51.640769958496094,
|
|
"logps/ref_rejected": -77.88117980957031,
|
|
"logps/rejected": -100.98180389404297,
|
|
"loss": 0.9017,
|
|
"margin_dpo/margin_mean": 16.04425811767578,
|
|
"margin_dpo/margin_std": 19.757722854614258,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.13656387665198239,
|
|
"fcm_dpo/beta": 0.0565192773938179,
|
|
"fcm_dpo/delta": -0.09390807151794434,
|
|
"fcm_dpo/margin": 16.542964935302734,
|
|
"fcm_dpo/q_t": 0.3172317147254944,
|
|
"grad_norm": 36.77476501464844,
|
|
"learning_rate": 4.982595640958425e-07,
|
|
"logits/chosen": -0.7294448614120483,
|
|
"logits/rejected": -0.6686340570449829,
|
|
"logps/chosen": -60.201622009277344,
|
|
"logps/ref_chosen": -52.529239654541016,
|
|
"logps/ref_rejected": -77.16075134277344,
|
|
"logps/rejected": -101.37608337402344,
|
|
"loss": 0.832,
|
|
"margin_dpo/margin_mean": 16.5429630279541,
|
|
"margin_dpo/margin_std": 17.85883903503418,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.13803230543318648,
|
|
"fcm_dpo/beta": 0.05466047674417496,
|
|
"fcm_dpo/delta": -0.14541637897491455,
|
|
"fcm_dpo/margin": 17.906766891479492,
|
|
"fcm_dpo/q_t": 0.3086274564266205,
|
|
"grad_norm": 38.767738342285156,
|
|
"learning_rate": 4.98105127417984e-07,
|
|
"logits/chosen": -0.689984917640686,
|
|
"logits/rejected": -0.6643059253692627,
|
|
"logps/chosen": -69.84527587890625,
|
|
"logps/ref_chosen": -61.22261047363281,
|
|
"logps/ref_rejected": -99.59902954101562,
|
|
"logps/rejected": -126.12846374511719,
|
|
"loss": 0.809,
|
|
"margin_dpo/margin_mean": 17.906766891479492,
|
|
"margin_dpo/margin_std": 17.686237335205078,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1395007342143906,
|
|
"fcm_dpo/beta": 0.054808467626571655,
|
|
"fcm_dpo/delta": 0.03201872110366821,
|
|
"fcm_dpo/margin": 14.971220016479492,
|
|
"fcm_dpo/q_t": 0.3315742611885071,
|
|
"grad_norm": 39.22067642211914,
|
|
"learning_rate": 4.979441529392784e-07,
|
|
"logits/chosen": -0.7354916334152222,
|
|
"logits/rejected": -0.7008249759674072,
|
|
"logps/chosen": -59.91139221191406,
|
|
"logps/ref_chosen": -52.523643493652344,
|
|
"logps/ref_rejected": -75.8803482055664,
|
|
"logps/rejected": -98.23931884765625,
|
|
"loss": 0.8765,
|
|
"margin_dpo/margin_mean": 14.971220016479492,
|
|
"margin_dpo/margin_std": 15.818527221679688,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14096916299559473,
|
|
"fcm_dpo/beta": 0.0527215413749218,
|
|
"fcm_dpo/delta": -0.2318594753742218,
|
|
"fcm_dpo/margin": 19.9919490814209,
|
|
"fcm_dpo/q_t": 0.2983996868133545,
|
|
"grad_norm": 36.0137939453125,
|
|
"learning_rate": 4.977766449015534e-07,
|
|
"logits/chosen": -0.6940563917160034,
|
|
"logits/rejected": -0.6533340811729431,
|
|
"logps/chosen": -68.70280456542969,
|
|
"logps/ref_chosen": -62.15697479248047,
|
|
"logps/ref_rejected": -96.59601593017578,
|
|
"logps/rejected": -123.13380432128906,
|
|
"loss": 0.7806,
|
|
"margin_dpo/margin_mean": 19.9919490814209,
|
|
"margin_dpo/margin_std": 20.55471420288086,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14243759177679882,
|
|
"fcm_dpo/beta": 0.0531981959939003,
|
|
"fcm_dpo/delta": -0.009700343012809753,
|
|
"fcm_dpo/margin": 16.080947875976562,
|
|
"fcm_dpo/q_t": 0.3233848214149475,
|
|
"grad_norm": 39.30295944213867,
|
|
"learning_rate": 4.976026077188012e-07,
|
|
"logits/chosen": -0.658359169960022,
|
|
"logits/rejected": -0.6041878461837769,
|
|
"logps/chosen": -62.353187561035156,
|
|
"logps/ref_chosen": -54.646366119384766,
|
|
"logps/ref_rejected": -76.96475219726562,
|
|
"logps/rejected": -100.75251770019531,
|
|
"loss": 0.8526,
|
|
"margin_dpo/margin_mean": 16.080947875976562,
|
|
"margin_dpo/margin_std": 15.41889762878418,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14390602055800295,
|
|
"fcm_dpo/beta": 0.051846109330654144,
|
|
"fcm_dpo/delta": -0.06934039294719696,
|
|
"fcm_dpo/margin": 17.607675552368164,
|
|
"fcm_dpo/q_t": 0.31782758235931396,
|
|
"grad_norm": 39.470882415771484,
|
|
"learning_rate": 4.974220459770639e-07,
|
|
"logits/chosen": -0.6812083125114441,
|
|
"logits/rejected": -0.6582015752792358,
|
|
"logps/chosen": -74.72683715820312,
|
|
"logps/ref_chosen": -65.25862884521484,
|
|
"logps/ref_rejected": -96.5274887084961,
|
|
"logps/rejected": -123.60337829589844,
|
|
"loss": 0.8784,
|
|
"margin_dpo/margin_mean": 17.607677459716797,
|
|
"margin_dpo/margin_std": 19.72555923461914,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14537444933920704,
|
|
"fcm_dpo/beta": 0.05020611733198166,
|
|
"fcm_dpo/delta": -0.19243989884853363,
|
|
"fcm_dpo/margin": 20.36504364013672,
|
|
"fcm_dpo/q_t": 0.3088953495025635,
|
|
"grad_norm": 34.95133590698242,
|
|
"learning_rate": 4.972349644343108e-07,
|
|
"logits/chosen": -0.6733574271202087,
|
|
"logits/rejected": -0.6646283864974976,
|
|
"logps/chosen": -53.47751998901367,
|
|
"logps/ref_chosen": -45.638484954833984,
|
|
"logps/ref_rejected": -86.43793487548828,
|
|
"logps/rejected": -114.64201354980469,
|
|
"loss": 0.8047,
|
|
"margin_dpo/margin_mean": 20.36504364013672,
|
|
"margin_dpo/margin_std": 22.298368453979492,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.14684287812041116,
|
|
"fcm_dpo/beta": 0.05120418965816498,
|
|
"fcm_dpo/delta": 0.1581817865371704,
|
|
"fcm_dpo/margin": 13.70488166809082,
|
|
"fcm_dpo/q_t": 0.3617163896560669,
|
|
"grad_norm": 41.91087341308594,
|
|
"learning_rate": 4.970413680203148e-07,
|
|
"logits/chosen": -0.6871071457862854,
|
|
"logits/rejected": -0.6425069570541382,
|
|
"logps/chosen": -66.13288879394531,
|
|
"logps/ref_chosen": -57.59397888183594,
|
|
"logps/ref_rejected": -74.06021118164062,
|
|
"logps/rejected": -96.30400085449219,
|
|
"loss": 1.0066,
|
|
"margin_dpo/margin_mean": 13.70488166809082,
|
|
"margin_dpo/margin_std": 19.342105865478516,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.14831130690161526,
|
|
"fcm_dpo/beta": 0.051217399537563324,
|
|
"fcm_dpo/delta": 0.037239253520965576,
|
|
"fcm_dpo/margin": 15.905902862548828,
|
|
"fcm_dpo/q_t": 0.3492443561553955,
|
|
"grad_norm": 41.5073127746582,
|
|
"learning_rate": 4.968412618365215e-07,
|
|
"logits/chosen": -0.7009834051132202,
|
|
"logits/rejected": -0.6622995734214783,
|
|
"logps/chosen": -71.91978454589844,
|
|
"logps/ref_chosen": -61.64885330200195,
|
|
"logps/ref_rejected": -83.18968200683594,
|
|
"logps/rejected": -109.36653137207031,
|
|
"loss": 0.951,
|
|
"margin_dpo/margin_mean": 15.905902862548828,
|
|
"margin_dpo/margin_std": 21.403085708618164,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.14977973568281938,
|
|
"fcm_dpo/beta": 0.05395771190524101,
|
|
"fcm_dpo/delta": 0.20888186991214752,
|
|
"fcm_dpo/margin": 12.05476188659668,
|
|
"fcm_dpo/q_t": 0.37573665380477905,
|
|
"grad_norm": 50.882755279541016,
|
|
"learning_rate": 4.966346511559149e-07,
|
|
"logits/chosen": -0.7070550918579102,
|
|
"logits/rejected": -0.6569492220878601,
|
|
"logps/chosen": -76.05787658691406,
|
|
"logps/ref_chosen": -64.0788803100586,
|
|
"logps/ref_rejected": -68.18707275390625,
|
|
"logps/rejected": -92.2208251953125,
|
|
"loss": 1.0619,
|
|
"margin_dpo/margin_mean": 12.05476188659668,
|
|
"margin_dpo/margin_std": 19.289947509765625,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.1512481644640235,
|
|
"fcm_dpo/beta": 0.05126365274190903,
|
|
"fcm_dpo/delta": -0.3250649571418762,
|
|
"fcm_dpo/margin": 22.175342559814453,
|
|
"fcm_dpo/q_t": 0.28338393568992615,
|
|
"grad_norm": 35.19338607788086,
|
|
"learning_rate": 4.964215414228785e-07,
|
|
"logits/chosen": -0.7068288326263428,
|
|
"logits/rejected": -0.6695182919502258,
|
|
"logps/chosen": -69.05276489257812,
|
|
"logps/ref_chosen": -61.299278259277344,
|
|
"logps/ref_rejected": -93.57270812988281,
|
|
"logps/rejected": -123.50154876708984,
|
|
"loss": 0.7378,
|
|
"margin_dpo/margin_mean": 22.175342559814453,
|
|
"margin_dpo/margin_std": 20.998464584350586,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.1527165932452276,
|
|
"fcm_dpo/beta": 0.049339231103658676,
|
|
"fcm_dpo/delta": -0.19892087578773499,
|
|
"fcm_dpo/margin": 20.853076934814453,
|
|
"fcm_dpo/q_t": 0.3135518431663513,
|
|
"grad_norm": 38.35378646850586,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": -0.713874101638794,
|
|
"logits/rejected": -0.6700358390808105,
|
|
"logps/chosen": -63.76753234863281,
|
|
"logps/ref_chosen": -54.372772216796875,
|
|
"logps/ref_rejected": -89.5647201538086,
|
|
"logps/rejected": -119.81255340576172,
|
|
"loss": 0.844,
|
|
"margin_dpo/margin_mean": 20.853076934814453,
|
|
"margin_dpo/margin_std": 23.93612289428711,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15418502202643172,
|
|
"fcm_dpo/beta": 0.04497908800840378,
|
|
"fcm_dpo/delta": -0.5303850769996643,
|
|
"fcm_dpo/margin": 29.143280029296875,
|
|
"fcm_dpo/q_t": 0.24480026960372925,
|
|
"grad_norm": 31.636995315551758,
|
|
"learning_rate": 4.959758474331832e-07,
|
|
"logits/chosen": -0.7144509553909302,
|
|
"logits/rejected": -0.678684651851654,
|
|
"logps/chosen": -62.63848876953125,
|
|
"logps/ref_chosen": -54.638946533203125,
|
|
"logps/ref_rejected": -97.97351837158203,
|
|
"logps/rejected": -135.11634826660156,
|
|
"loss": 0.6311,
|
|
"margin_dpo/margin_mean": 29.143281936645508,
|
|
"margin_dpo/margin_std": 22.500640869140625,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.15565345080763582,
|
|
"fcm_dpo/beta": 0.04392065852880478,
|
|
"fcm_dpo/delta": 0.03618063032627106,
|
|
"fcm_dpo/margin": 18.596851348876953,
|
|
"fcm_dpo/q_t": 0.33291202783584595,
|
|
"grad_norm": 33.444847106933594,
|
|
"learning_rate": 4.957432749209755e-07,
|
|
"logits/chosen": -0.6787040829658508,
|
|
"logits/rejected": -0.6291109323501587,
|
|
"logps/chosen": -64.25106811523438,
|
|
"logps/ref_chosen": -54.83289337158203,
|
|
"logps/ref_rejected": -85.22461700439453,
|
|
"logps/rejected": -113.2396469116211,
|
|
"loss": 0.873,
|
|
"margin_dpo/margin_mean": 18.596851348876953,
|
|
"margin_dpo/margin_std": 19.459993362426758,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.15712187958883994,
|
|
"fcm_dpo/beta": 0.043349266052246094,
|
|
"fcm_dpo/delta": -0.07450622320175171,
|
|
"fcm_dpo/margin": 21.153640747070312,
|
|
"fcm_dpo/q_t": 0.31638288497924805,
|
|
"grad_norm": 32.62092208862305,
|
|
"learning_rate": 4.955042268449307e-07,
|
|
"logits/chosen": -0.737189531326294,
|
|
"logits/rejected": -0.6847895383834839,
|
|
"logps/chosen": -81.13639831542969,
|
|
"logps/ref_chosen": -69.70780944824219,
|
|
"logps/ref_rejected": -94.73950958251953,
|
|
"logps/rejected": -127.32173156738281,
|
|
"loss": 0.8405,
|
|
"margin_dpo/margin_mean": 21.153640747070312,
|
|
"margin_dpo/margin_std": 21.512563705444336,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.15859030837004406,
|
|
"fcm_dpo/beta": 0.042633771896362305,
|
|
"fcm_dpo/delta": -0.11348213255405426,
|
|
"fcm_dpo/margin": 22.344249725341797,
|
|
"fcm_dpo/q_t": 0.32922905683517456,
|
|
"grad_norm": 39.67808532714844,
|
|
"learning_rate": 4.952587095041881e-07,
|
|
"logits/chosen": -0.7291974425315857,
|
|
"logits/rejected": -0.6880236864089966,
|
|
"logps/chosen": -67.06340789794922,
|
|
"logps/ref_chosen": -56.0098876953125,
|
|
"logps/ref_rejected": -95.79601287841797,
|
|
"logps/rejected": -129.19378662109375,
|
|
"loss": 0.905,
|
|
"margin_dpo/margin_mean": 22.344249725341797,
|
|
"margin_dpo/margin_std": 28.051963806152344,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16005873715124816,
|
|
"fcm_dpo/beta": 0.04097578674554825,
|
|
"fcm_dpo/delta": -0.20452482998371124,
|
|
"fcm_dpo/margin": 25.194259643554688,
|
|
"fcm_dpo/q_t": 0.29648420214653015,
|
|
"grad_norm": 33.704471588134766,
|
|
"learning_rate": 4.95006729368358e-07,
|
|
"logits/chosen": -0.643449604511261,
|
|
"logits/rejected": -0.6156653165817261,
|
|
"logps/chosen": -72.79986572265625,
|
|
"logps/ref_chosen": -62.88549041748047,
|
|
"logps/ref_rejected": -98.68573760986328,
|
|
"logps/rejected": -133.79437255859375,
|
|
"loss": 0.7753,
|
|
"margin_dpo/margin_mean": 25.194259643554688,
|
|
"margin_dpo/margin_std": 23.329872131347656,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16152716593245228,
|
|
"fcm_dpo/beta": 0.03958093747496605,
|
|
"fcm_dpo/delta": -0.08387066423892975,
|
|
"fcm_dpo/margin": 23.25214958190918,
|
|
"fcm_dpo/q_t": 0.3206620514392853,
|
|
"grad_norm": 30.748167037963867,
|
|
"learning_rate": 4.947482930773511e-07,
|
|
"logits/chosen": -0.6758570671081543,
|
|
"logits/rejected": -0.6276640295982361,
|
|
"logps/chosen": -67.95677185058594,
|
|
"logps/ref_chosen": -58.753684997558594,
|
|
"logps/ref_rejected": -79.75001525878906,
|
|
"logps/rejected": -112.20524597167969,
|
|
"loss": 0.8678,
|
|
"margin_dpo/margin_mean": 23.25214958190918,
|
|
"margin_dpo/margin_std": 25.55893898010254,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16299559471365638,
|
|
"fcm_dpo/beta": 0.038935232907533646,
|
|
"fcm_dpo/delta": -0.1342032104730606,
|
|
"fcm_dpo/margin": 24.890220642089844,
|
|
"fcm_dpo/q_t": 0.3145398795604706,
|
|
"grad_norm": 32.24641418457031,
|
|
"learning_rate": 4.944834074412042e-07,
|
|
"logits/chosen": -0.6879921555519104,
|
|
"logits/rejected": -0.659111738204956,
|
|
"logps/chosen": -80.65528869628906,
|
|
"logps/ref_chosen": -68.62410736083984,
|
|
"logps/ref_rejected": -98.42886352539062,
|
|
"logps/rejected": -135.35025024414062,
|
|
"loss": 0.8545,
|
|
"margin_dpo/margin_mean": 24.890220642089844,
|
|
"margin_dpo/margin_std": 27.572715759277344,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1644640234948605,
|
|
"fcm_dpo/beta": 0.039760373532772064,
|
|
"fcm_dpo/delta": 0.18148502707481384,
|
|
"fcm_dpo/margin": 17.13097381591797,
|
|
"fcm_dpo/q_t": 0.3622838854789734,
|
|
"grad_norm": 33.34195327758789,
|
|
"learning_rate": 4.942120794399002e-07,
|
|
"logits/chosen": -0.7036277055740356,
|
|
"logits/rejected": -0.6543942093849182,
|
|
"logps/chosen": -61.78867721557617,
|
|
"logps/ref_chosen": -50.24964141845703,
|
|
"logps/ref_rejected": -64.77442932128906,
|
|
"logps/rejected": -93.44444274902344,
|
|
"loss": 0.9724,
|
|
"margin_dpo/margin_mean": 17.130971908569336,
|
|
"margin_dpo/margin_std": 21.436376571655273,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.16593245227606462,
|
|
"fcm_dpo/beta": 0.04075583070516586,
|
|
"fcm_dpo/delta": 0.062437187880277634,
|
|
"fcm_dpo/margin": 19.445987701416016,
|
|
"fcm_dpo/q_t": 0.3351534307003021,
|
|
"grad_norm": 36.214141845703125,
|
|
"learning_rate": 4.939343162231841e-07,
|
|
"logits/chosen": -0.6719074249267578,
|
|
"logits/rejected": -0.6214045882225037,
|
|
"logps/chosen": -79.45092010498047,
|
|
"logps/ref_chosen": -66.71295166015625,
|
|
"logps/ref_rejected": -77.96870422363281,
|
|
"logps/rejected": -110.15266418457031,
|
|
"loss": 0.8755,
|
|
"margin_dpo/margin_mean": 19.445987701416016,
|
|
"margin_dpo/margin_std": 19.488388061523438,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.16740088105726872,
|
|
"fcm_dpo/beta": 0.03883285075426102,
|
|
"fcm_dpo/delta": -0.27601903676986694,
|
|
"fcm_dpo/margin": 28.083696365356445,
|
|
"fcm_dpo/q_t": 0.29987218976020813,
|
|
"grad_norm": 30.65591812133789,
|
|
"learning_rate": 4.936501251103751e-07,
|
|
"logits/chosen": -0.6819378137588501,
|
|
"logits/rejected": -0.6380999684333801,
|
|
"logps/chosen": -69.460205078125,
|
|
"logps/ref_chosen": -57.78507995605469,
|
|
"logps/ref_rejected": -87.10966491699219,
|
|
"logps/rejected": -126.86849212646484,
|
|
"loss": 0.7933,
|
|
"margin_dpo/margin_mean": 28.083698272705078,
|
|
"margin_dpo/margin_std": 31.275924682617188,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.16886930983847284,
|
|
"fcm_dpo/beta": 0.039183393120765686,
|
|
"fcm_dpo/delta": 0.049480877816677094,
|
|
"fcm_dpo/margin": 20.51511001586914,
|
|
"fcm_dpo/q_t": 0.35244330763816833,
|
|
"grad_norm": 45.887428283691406,
|
|
"learning_rate": 4.933595135901732e-07,
|
|
"logits/chosen": -0.6870021820068359,
|
|
"logits/rejected": -0.6467078924179077,
|
|
"logps/chosen": -81.95085906982422,
|
|
"logps/ref_chosen": -65.5826416015625,
|
|
"logps/ref_rejected": -98.56552124023438,
|
|
"logps/rejected": -135.4488525390625,
|
|
"loss": 0.9938,
|
|
"margin_dpo/margin_mean": 20.51511001586914,
|
|
"margin_dpo/margin_std": 30.72865867614746,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17033773861967694,
|
|
"fcm_dpo/beta": 0.039394035935401917,
|
|
"fcm_dpo/delta": 0.01498110219836235,
|
|
"fcm_dpo/margin": 21.212890625,
|
|
"fcm_dpo/q_t": 0.33000385761260986,
|
|
"grad_norm": 34.129329681396484,
|
|
"learning_rate": 4.930624893204624e-07,
|
|
"logits/chosen": -0.7020711898803711,
|
|
"logits/rejected": -0.6741104125976562,
|
|
"logps/chosen": -63.63768005371094,
|
|
"logps/ref_chosen": -51.40031433105469,
|
|
"logps/ref_rejected": -80.5218505859375,
|
|
"logps/rejected": -113.97211456298828,
|
|
"loss": 0.8508,
|
|
"margin_dpo/margin_mean": 21.212890625,
|
|
"margin_dpo/margin_std": 21.642147064208984,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17180616740088106,
|
|
"fcm_dpo/beta": 0.03966808319091797,
|
|
"fcm_dpo/delta": 0.07746738195419312,
|
|
"fcm_dpo/margin": 19.62699317932129,
|
|
"fcm_dpo/q_t": 0.345183789730072,
|
|
"grad_norm": 41.55912780761719,
|
|
"learning_rate": 4.927590601281083e-07,
|
|
"logits/chosen": -0.6694446802139282,
|
|
"logits/rejected": -0.6278887987136841,
|
|
"logps/chosen": -83.67584228515625,
|
|
"logps/ref_chosen": -69.29840850830078,
|
|
"logps/ref_rejected": -66.583984375,
|
|
"logps/rejected": -100.58841705322266,
|
|
"loss": 0.9269,
|
|
"margin_dpo/margin_mean": 19.62699317932129,
|
|
"margin_dpo/margin_std": 24.160232543945312,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17327459618208516,
|
|
"fcm_dpo/beta": 0.04006565362215042,
|
|
"fcm_dpo/delta": 0.035930898040533066,
|
|
"fcm_dpo/margin": 20.39095115661621,
|
|
"fcm_dpo/q_t": 0.3354613184928894,
|
|
"grad_norm": 34.89082336425781,
|
|
"learning_rate": 4.924492340087524e-07,
|
|
"logits/chosen": -0.7189027070999146,
|
|
"logits/rejected": -0.6873916387557983,
|
|
"logps/chosen": -68.96400451660156,
|
|
"logps/ref_chosen": -55.6409797668457,
|
|
"logps/ref_rejected": -75.66905975341797,
|
|
"logps/rejected": -109.3830337524414,
|
|
"loss": 0.8748,
|
|
"margin_dpo/margin_mean": 20.390953063964844,
|
|
"margin_dpo/margin_std": 21.79241943359375,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17474302496328928,
|
|
"fcm_dpo/beta": 0.03989461809396744,
|
|
"fcm_dpo/delta": 0.016687780618667603,
|
|
"fcm_dpo/margin": 20.894739151000977,
|
|
"fcm_dpo/q_t": 0.3389769196510315,
|
|
"grad_norm": 36.64957046508789,
|
|
"learning_rate": 4.92133019126601e-07,
|
|
"logits/chosen": -0.6799700260162354,
|
|
"logits/rejected": -0.6564502716064453,
|
|
"logps/chosen": -89.88623046875,
|
|
"logps/ref_chosen": -73.51019287109375,
|
|
"logps/ref_rejected": -102.977294921875,
|
|
"logps/rejected": -140.24806213378906,
|
|
"loss": 0.9122,
|
|
"margin_dpo/margin_mean": 20.894737243652344,
|
|
"margin_dpo/margin_std": 25.40456199645996,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.1762114537444934,
|
|
"fcm_dpo/beta": 0.038859717547893524,
|
|
"fcm_dpo/delta": -0.2279738485813141,
|
|
"fcm_dpo/margin": 27.11212921142578,
|
|
"fcm_dpo/q_t": 0.29994580149650574,
|
|
"grad_norm": 35.182918548583984,
|
|
"learning_rate": 4.918104238142103e-07,
|
|
"logits/chosen": -0.6844409704208374,
|
|
"logits/rejected": -0.6399496793746948,
|
|
"logps/chosen": -94.61766052246094,
|
|
"logps/ref_chosen": -76.78083801269531,
|
|
"logps/ref_rejected": -108.02374267578125,
|
|
"logps/rejected": -152.9727020263672,
|
|
"loss": 0.8006,
|
|
"margin_dpo/margin_mean": 27.11212921142578,
|
|
"margin_dpo/margin_std": 28.035802841186523,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1776798825256975,
|
|
"fcm_dpo/beta": 0.036688677966594696,
|
|
"fcm_dpo/delta": -0.2756182849407196,
|
|
"fcm_dpo/margin": 29.79346466064453,
|
|
"fcm_dpo/q_t": 0.2999020218849182,
|
|
"grad_norm": 33.34456253051758,
|
|
"learning_rate": 4.91481456572267e-07,
|
|
"logits/chosen": -0.6548373699188232,
|
|
"logits/rejected": -0.6413829326629639,
|
|
"logps/chosen": -78.59416961669922,
|
|
"logps/ref_chosen": -61.789894104003906,
|
|
"logps/ref_rejected": -109.99456787109375,
|
|
"logps/rejected": -156.59231567382812,
|
|
"loss": 0.803,
|
|
"margin_dpo/margin_mean": 29.79346466064453,
|
|
"margin_dpo/margin_std": 31.758745193481445,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.17914831130690162,
|
|
"fcm_dpo/beta": 0.0338733084499836,
|
|
"fcm_dpo/delta": -0.47818467020988464,
|
|
"fcm_dpo/margin": 37.396575927734375,
|
|
"fcm_dpo/q_t": 0.25546765327453613,
|
|
"grad_norm": 32.44083023071289,
|
|
"learning_rate": 4.911461260693638e-07,
|
|
"logits/chosen": -0.6689621806144714,
|
|
"logits/rejected": -0.6719322204589844,
|
|
"logps/chosen": -62.280059814453125,
|
|
"logps/ref_chosen": -46.9022102355957,
|
|
"logps/ref_rejected": -106.71418762207031,
|
|
"logps/rejected": -159.48861694335938,
|
|
"loss": 0.6623,
|
|
"margin_dpo/margin_mean": 37.396568298339844,
|
|
"margin_dpo/margin_std": 30.360065460205078,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18061674008810572,
|
|
"fcm_dpo/beta": 0.033041030168533325,
|
|
"fcm_dpo/delta": 0.02030833438038826,
|
|
"fcm_dpo/margin": 25.154926300048828,
|
|
"fcm_dpo/q_t": 0.34362679719924927,
|
|
"grad_norm": 34.55007553100586,
|
|
"learning_rate": 4.908044411417711e-07,
|
|
"logits/chosen": -0.6678287982940674,
|
|
"logits/rejected": -0.63847815990448,
|
|
"logps/chosen": -77.76958465576172,
|
|
"logps/ref_chosen": -61.33863830566406,
|
|
"logps/ref_rejected": -87.775390625,
|
|
"logps/rejected": -129.36126708984375,
|
|
"loss": 0.9621,
|
|
"margin_dpo/margin_mean": 25.154926300048828,
|
|
"margin_dpo/margin_std": 34.486244201660156,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.18208516886930984,
|
|
"fcm_dpo/beta": 0.03184504061937332,
|
|
"fcm_dpo/delta": -0.2603537142276764,
|
|
"fcm_dpo/margin": 33.94874572753906,
|
|
"fcm_dpo/q_t": 0.3141557276248932,
|
|
"grad_norm": 35.607940673828125,
|
|
"learning_rate": 4.904564107932048e-07,
|
|
"logits/chosen": -0.6511549353599548,
|
|
"logits/rejected": -0.6457709074020386,
|
|
"logps/chosen": -89.34744262695312,
|
|
"logps/ref_chosen": -71.44833374023438,
|
|
"logps/ref_rejected": -117.58056640625,
|
|
"logps/rejected": -169.42843627929688,
|
|
"loss": 0.865,
|
|
"margin_dpo/margin_mean": 33.94874954223633,
|
|
"margin_dpo/margin_std": 43.01670455932617,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.18355359765051396,
|
|
"fcm_dpo/beta": 0.03098900616168976,
|
|
"fcm_dpo/delta": -0.136797696352005,
|
|
"fcm_dpo/margin": 31.42009735107422,
|
|
"fcm_dpo/q_t": 0.31026577949523926,
|
|
"grad_norm": 29.541427612304688,
|
|
"learning_rate": 4.90102044194588e-07,
|
|
"logits/chosen": -0.6053882241249084,
|
|
"logits/rejected": -0.5895368456840515,
|
|
"logps/chosen": -64.38691711425781,
|
|
"logps/ref_chosen": -50.136940002441406,
|
|
"logps/ref_rejected": -83.98861694335938,
|
|
"logps/rejected": -129.65869140625,
|
|
"loss": 0.8086,
|
|
"margin_dpo/margin_mean": 31.42009735107422,
|
|
"margin_dpo/margin_std": 31.38010025024414,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.18502202643171806,
|
|
"fcm_dpo/beta": 0.030372876673936844,
|
|
"fcm_dpo/delta": -0.059251464903354645,
|
|
"fcm_dpo/margin": 29.75729751586914,
|
|
"fcm_dpo/q_t": 0.32057902216911316,
|
|
"grad_norm": 31.237802505493164,
|
|
"learning_rate": 4.897413506838102e-07,
|
|
"logits/chosen": -0.6320816874504089,
|
|
"logits/rejected": -0.6053131222724915,
|
|
"logps/chosen": -71.67118072509766,
|
|
"logps/ref_chosen": -55.66706848144531,
|
|
"logps/ref_rejected": -98.1297607421875,
|
|
"logps/rejected": -143.89117431640625,
|
|
"loss": 0.8432,
|
|
"margin_dpo/margin_mean": 29.75729751586914,
|
|
"margin_dpo/margin_std": 31.18754768371582,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.18649045521292218,
|
|
"fcm_dpo/beta": 0.030883219093084335,
|
|
"fcm_dpo/delta": 0.10523413866758347,
|
|
"fcm_dpo/margin": 24.356143951416016,
|
|
"fcm_dpo/q_t": 0.3438740372657776,
|
|
"grad_norm": 31.248865127563477,
|
|
"learning_rate": 4.89374339765481e-07,
|
|
"logits/chosen": -0.64985591173172,
|
|
"logits/rejected": -0.6203486919403076,
|
|
"logps/chosen": -71.13253021240234,
|
|
"logps/ref_chosen": -56.55467987060547,
|
|
"logps/ref_rejected": -76.7957763671875,
|
|
"logps/rejected": -115.72977447509766,
|
|
"loss": 0.9098,
|
|
"margin_dpo/margin_mean": 24.356143951416016,
|
|
"margin_dpo/margin_std": 26.364276885986328,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.18795888399412627,
|
|
"fcm_dpo/beta": 0.03142295777797699,
|
|
"fcm_dpo/delta": 0.059074968099594116,
|
|
"fcm_dpo/margin": 25.285329818725586,
|
|
"fcm_dpo/q_t": 0.34666118025779724,
|
|
"grad_norm": 36.50014877319336,
|
|
"learning_rate": 4.890010211106795e-07,
|
|
"logits/chosen": -0.6584875583648682,
|
|
"logits/rejected": -0.6175848245620728,
|
|
"logps/chosen": -74.28919219970703,
|
|
"logps/ref_chosen": -58.12095642089844,
|
|
"logps/ref_rejected": -76.43896484375,
|
|
"logps/rejected": -117.89253234863281,
|
|
"loss": 0.9482,
|
|
"margin_dpo/margin_mean": 25.285327911376953,
|
|
"margin_dpo/margin_std": 32.572845458984375,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.1894273127753304,
|
|
"fcm_dpo/beta": 0.031564533710479736,
|
|
"fcm_dpo/delta": 0.029005445539951324,
|
|
"fcm_dpo/margin": 26.0760498046875,
|
|
"fcm_dpo/q_t": 0.34760773181915283,
|
|
"grad_norm": 37.66703414916992,
|
|
"learning_rate": 4.88621404556699e-07,
|
|
"logits/chosen": -0.6664289236068726,
|
|
"logits/rejected": -0.642329216003418,
|
|
"logps/chosen": -88.56261444091797,
|
|
"logps/ref_chosen": -66.91637420654297,
|
|
"logps/ref_rejected": -96.6422119140625,
|
|
"logps/rejected": -144.364501953125,
|
|
"loss": 0.9722,
|
|
"margin_dpo/margin_mean": 26.0760498046875,
|
|
"margin_dpo/margin_std": 36.57023620605469,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.19089574155653452,
|
|
"fcm_dpo/beta": 0.03058389388024807,
|
|
"fcm_dpo/delta": -0.32343757152557373,
|
|
"fcm_dpo/margin": 37.14537048339844,
|
|
"fcm_dpo/q_t": 0.2883310616016388,
|
|
"grad_norm": 29.162261962890625,
|
|
"learning_rate": 4.882355001067891e-07,
|
|
"logits/chosen": -0.6344603300094604,
|
|
"logits/rejected": -0.6262820959091187,
|
|
"logps/chosen": -60.690242767333984,
|
|
"logps/ref_chosen": -44.66685104370117,
|
|
"logps/ref_rejected": -82.78165435791016,
|
|
"logps/rejected": -135.95042419433594,
|
|
"loss": 0.7976,
|
|
"margin_dpo/margin_mean": 37.14537048339844,
|
|
"margin_dpo/margin_std": 36.782554626464844,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.19236417033773862,
|
|
"fcm_dpo/beta": 0.02876003086566925,
|
|
"fcm_dpo/delta": -0.16914135217666626,
|
|
"fcm_dpo/margin": 34.836463928222656,
|
|
"fcm_dpo/q_t": 0.29792845249176025,
|
|
"grad_norm": 29.51772117614746,
|
|
"learning_rate": 4.878433179298909e-07,
|
|
"logits/chosen": -0.6310759782791138,
|
|
"logits/rejected": -0.627743661403656,
|
|
"logps/chosen": -57.54106903076172,
|
|
"logps/ref_chosen": -44.924591064453125,
|
|
"logps/ref_rejected": -88.44401550292969,
|
|
"logps/rejected": -135.89695739746094,
|
|
"loss": 0.7855,
|
|
"margin_dpo/margin_mean": 34.836463928222656,
|
|
"margin_dpo/margin_std": 32.454898834228516,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19383259911894274,
|
|
"fcm_dpo/beta": 0.028265126049518585,
|
|
"fcm_dpo/delta": -0.045259568840265274,
|
|
"fcm_dpo/margin": 31.518386840820312,
|
|
"fcm_dpo/q_t": 0.3281964957714081,
|
|
"grad_norm": 31.3057918548584,
|
|
"learning_rate": 4.874448683603694e-07,
|
|
"logits/chosen": -0.6486563682556152,
|
|
"logits/rejected": -0.6290041208267212,
|
|
"logps/chosen": -77.1704330444336,
|
|
"logps/ref_chosen": -59.00108337402344,
|
|
"logps/ref_rejected": -87.89215087890625,
|
|
"logps/rejected": -137.57989501953125,
|
|
"loss": 0.8694,
|
|
"margin_dpo/margin_mean": 31.518390655517578,
|
|
"margin_dpo/margin_std": 36.52690124511719,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.19530102790014683,
|
|
"fcm_dpo/beta": 0.028410259634256363,
|
|
"fcm_dpo/delta": 0.037730347365140915,
|
|
"fcm_dpo/margin": 28.69933319091797,
|
|
"fcm_dpo/q_t": 0.3451124429702759,
|
|
"grad_norm": 30.7010440826416,
|
|
"learning_rate": 4.870401618977415e-07,
|
|
"logits/chosen": -0.6556867361068726,
|
|
"logits/rejected": -0.6370849609375,
|
|
"logps/chosen": -87.88372802734375,
|
|
"logps/ref_chosen": -66.60449981689453,
|
|
"logps/ref_rejected": -96.33355712890625,
|
|
"logps/rejected": -146.3121337890625,
|
|
"loss": 0.9262,
|
|
"margin_dpo/margin_mean": 28.69933319091797,
|
|
"margin_dpo/margin_std": 36.48450469970703,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.19676945668135096,
|
|
"fcm_dpo/beta": 0.028403986245393753,
|
|
"fcm_dpo/delta": -0.05411171913146973,
|
|
"fcm_dpo/margin": 31.654129028320312,
|
|
"fcm_dpo/q_t": 0.31937503814697266,
|
|
"grad_norm": 30.17385482788086,
|
|
"learning_rate": 4.866292092063986e-07,
|
|
"logits/chosen": -0.6521985530853271,
|
|
"logits/rejected": -0.6217156648635864,
|
|
"logps/chosen": -68.50302124023438,
|
|
"logps/ref_chosen": -52.06925582885742,
|
|
"logps/ref_rejected": -87.6545181274414,
|
|
"logps/rejected": -135.74241638183594,
|
|
"loss": 0.8195,
|
|
"margin_dpo/margin_mean": 31.654129028320312,
|
|
"margin_dpo/margin_std": 30.899707794189453,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.19823788546255505,
|
|
"fcm_dpo/beta": 0.027399450540542603,
|
|
"fcm_dpo/delta": -0.2010505199432373,
|
|
"fcm_dpo/margin": 37.6085319519043,
|
|
"fcm_dpo/q_t": 0.3078111410140991,
|
|
"grad_norm": 30.048168182373047,
|
|
"learning_rate": 4.862120211153265e-07,
|
|
"logits/chosen": -0.6633049249649048,
|
|
"logits/rejected": -0.6791324019432068,
|
|
"logps/chosen": -70.54566192626953,
|
|
"logps/ref_chosen": -50.353858947753906,
|
|
"logps/ref_rejected": -115.97975158691406,
|
|
"logps/rejected": -173.7800750732422,
|
|
"loss": 0.8237,
|
|
"margin_dpo/margin_mean": 37.6085319519043,
|
|
"margin_dpo/margin_std": 40.70466613769531,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.19970631424375918,
|
|
"fcm_dpo/beta": 0.02726733312010765,
|
|
"fcm_dpo/delta": 0.10746175795793533,
|
|
"fcm_dpo/margin": 27.498376846313477,
|
|
"fcm_dpo/q_t": 0.3627038598060608,
|
|
"grad_norm": 32.92823028564453,
|
|
"learning_rate": 4.857886086178193e-07,
|
|
"logits/chosen": -0.6476492881774902,
|
|
"logits/rejected": -0.6248095035552979,
|
|
"logps/chosen": -88.18641662597656,
|
|
"logps/ref_chosen": -65.072509765625,
|
|
"logps/ref_rejected": -96.32122802734375,
|
|
"logps/rejected": -146.9335174560547,
|
|
"loss": 0.9683,
|
|
"margin_dpo/margin_mean": 27.498376846313477,
|
|
"margin_dpo/margin_std": 38.72801208496094,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.2011747430249633,
|
|
"fcm_dpo/beta": 0.026446528732776642,
|
|
"fcm_dpo/delta": -0.28031742572784424,
|
|
"fcm_dpo/margin": 41.52157211303711,
|
|
"fcm_dpo/q_t": 0.30795353651046753,
|
|
"grad_norm": 29.9466609954834,
|
|
"learning_rate": 4.853589828711902e-07,
|
|
"logits/chosen": -0.6132858991622925,
|
|
"logits/rejected": -0.6249020099639893,
|
|
"logps/chosen": -73.17112731933594,
|
|
"logps/ref_chosen": -48.759117126464844,
|
|
"logps/ref_rejected": -113.86376953125,
|
|
"logps/rejected": -179.79736328125,
|
|
"loss": 0.8465,
|
|
"margin_dpo/margin_mean": 41.52157211303711,
|
|
"margin_dpo/margin_std": 49.937278747558594,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.2026431718061674,
|
|
"fcm_dpo/beta": 0.02638918161392212,
|
|
"fcm_dpo/delta": 0.0248483307659626,
|
|
"fcm_dpo/margin": 31.32628631591797,
|
|
"fcm_dpo/q_t": 0.33315446972846985,
|
|
"grad_norm": 33.18700408935547,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": -0.6230736374855042,
|
|
"logits/rejected": -0.603124737739563,
|
|
"logps/chosen": -86.28077697753906,
|
|
"logps/ref_chosen": -60.519649505615234,
|
|
"logps/ref_rejected": -93.19694519042969,
|
|
"logps/rejected": -150.28436279296875,
|
|
"loss": 0.8956,
|
|
"margin_dpo/margin_mean": 31.32628631591797,
|
|
"margin_dpo/margin_std": 35.16431427001953,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.20411160058737152,
|
|
"fcm_dpo/beta": 0.025706283748149872,
|
|
"fcm_dpo/delta": -0.17333011329174042,
|
|
"fcm_dpo/margin": 39.14105987548828,
|
|
"fcm_dpo/q_t": 0.30358999967575073,
|
|
"grad_norm": 28.363384246826172,
|
|
"learning_rate": 4.844811370781446e-07,
|
|
"logits/chosen": -0.6366190910339355,
|
|
"logits/rejected": -0.6161980032920837,
|
|
"logps/chosen": -66.5361099243164,
|
|
"logps/ref_chosen": -46.89138412475586,
|
|
"logps/ref_rejected": -79.72798156738281,
|
|
"logps/rejected": -138.51376342773438,
|
|
"loss": 0.7925,
|
|
"margin_dpo/margin_mean": 39.14105987548828,
|
|
"margin_dpo/margin_std": 38.770042419433594,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.2055800293685756,
|
|
"fcm_dpo/beta": 0.02510003000497818,
|
|
"fcm_dpo/delta": -0.06664561480283737,
|
|
"fcm_dpo/margin": 36.26915740966797,
|
|
"fcm_dpo/q_t": 0.3291313052177429,
|
|
"grad_norm": 30.79945945739746,
|
|
"learning_rate": 4.840329401637809e-07,
|
|
"logits/chosen": -0.6232326030731201,
|
|
"logits/rejected": -0.6018444299697876,
|
|
"logps/chosen": -82.54205322265625,
|
|
"logps/ref_chosen": -58.97471618652344,
|
|
"logps/ref_rejected": -83.28410339355469,
|
|
"logps/rejected": -143.12060546875,
|
|
"loss": 0.8953,
|
|
"margin_dpo/margin_mean": 36.26915740966797,
|
|
"margin_dpo/margin_std": 43.71975326538086,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.20704845814977973,
|
|
"fcm_dpo/beta": 0.02491670660674572,
|
|
"fcm_dpo/delta": -0.04375480115413666,
|
|
"fcm_dpo/margin": 35.71399688720703,
|
|
"fcm_dpo/q_t": 0.3214990496635437,
|
|
"grad_norm": 33.90923309326172,
|
|
"learning_rate": 4.83578576263792e-07,
|
|
"logits/chosen": -0.6241617202758789,
|
|
"logits/rejected": -0.6002821326255798,
|
|
"logps/chosen": -98.45579528808594,
|
|
"logps/ref_chosen": -75.07566833496094,
|
|
"logps/ref_rejected": -98.1922607421875,
|
|
"logps/rejected": -157.286376953125,
|
|
"loss": 0.8779,
|
|
"margin_dpo/margin_mean": 35.71399688720703,
|
|
"margin_dpo/margin_std": 39.75990295410156,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.20851688693098386,
|
|
"fcm_dpo/beta": 0.024470651522278786,
|
|
"fcm_dpo/delta": -0.15550553798675537,
|
|
"fcm_dpo/margin": 40.46052551269531,
|
|
"fcm_dpo/q_t": 0.3215620219707489,
|
|
"grad_norm": 33.35566329956055,
|
|
"learning_rate": 4.83118057351089e-07,
|
|
"logits/chosen": -0.5816446542739868,
|
|
"logits/rejected": -0.5725831985473633,
|
|
"logps/chosen": -86.0451889038086,
|
|
"logps/ref_chosen": -58.027931213378906,
|
|
"logps/ref_rejected": -94.58222961425781,
|
|
"logps/rejected": -163.06002807617188,
|
|
"loss": 0.9117,
|
|
"margin_dpo/margin_mean": 40.46052551269531,
|
|
"margin_dpo/margin_std": 50.63873291015625,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.20998531571218795,
|
|
"fcm_dpo/beta": 0.0247478224337101,
|
|
"fcm_dpo/delta": 0.21386152505874634,
|
|
"fcm_dpo/margin": 26.287752151489258,
|
|
"fcm_dpo/q_t": 0.37550124526023865,
|
|
"grad_norm": 38.79267120361328,
|
|
"learning_rate": 4.826513955607734e-07,
|
|
"logits/chosen": -0.6034103631973267,
|
|
"logits/rejected": -0.5768376588821411,
|
|
"logps/chosen": -87.43092346191406,
|
|
"logps/ref_chosen": -57.59645080566406,
|
|
"logps/ref_rejected": -78.99957275390625,
|
|
"logps/rejected": -135.12179565429688,
|
|
"loss": 1.0509,
|
|
"margin_dpo/margin_mean": 26.287750244140625,
|
|
"margin_dpo/margin_std": 41.523109436035156,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21145374449339208,
|
|
"fcm_dpo/beta": 0.02499964088201523,
|
|
"fcm_dpo/delta": -0.029541175812482834,
|
|
"fcm_dpo/margin": 35.07600784301758,
|
|
"fcm_dpo/q_t": 0.3219374418258667,
|
|
"grad_norm": 29.825363159179688,
|
|
"learning_rate": 4.821786031898176e-07,
|
|
"logits/chosen": -0.5954188704490662,
|
|
"logits/rejected": -0.5570698380470276,
|
|
"logps/chosen": -84.0816650390625,
|
|
"logps/ref_chosen": -59.90636444091797,
|
|
"logps/ref_rejected": -82.00025939941406,
|
|
"logps/rejected": -141.25157165527344,
|
|
"loss": 0.8468,
|
|
"margin_dpo/margin_mean": 35.076011657714844,
|
|
"margin_dpo/margin_std": 35.606632232666016,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21292217327459617,
|
|
"fcm_dpo/beta": 0.024706725031137466,
|
|
"fcm_dpo/delta": -0.06008060276508331,
|
|
"fcm_dpo/margin": 36.604347229003906,
|
|
"fcm_dpo/q_t": 0.324362576007843,
|
|
"grad_norm": 35.410675048828125,
|
|
"learning_rate": 4.816996926967401e-07,
|
|
"logits/chosen": -0.5916780233383179,
|
|
"logits/rejected": -0.55748450756073,
|
|
"logps/chosen": -83.62374877929688,
|
|
"logps/ref_chosen": -56.60066604614258,
|
|
"logps/ref_rejected": -77.86631774902344,
|
|
"logps/rejected": -141.49374389648438,
|
|
"loss": 0.8636,
|
|
"margin_dpo/margin_mean": 36.604347229003906,
|
|
"margin_dpo/margin_std": 40.430137634277344,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.2143906020558003,
|
|
"fcm_dpo/beta": 0.025057196617126465,
|
|
"fcm_dpo/delta": 0.12556242942810059,
|
|
"fcm_dpo/margin": 29.2852783203125,
|
|
"fcm_dpo/q_t": 0.3575107455253601,
|
|
"grad_norm": 46.84541702270508,
|
|
"learning_rate": 4.812146767012779e-07,
|
|
"logits/chosen": -0.5719867944717407,
|
|
"logits/rejected": -0.5233458280563354,
|
|
"logps/chosen": -103.90762329101562,
|
|
"logps/ref_chosen": -66.00045013427734,
|
|
"logps/ref_rejected": -81.70278930664062,
|
|
"logps/rejected": -148.89523315429688,
|
|
"loss": 1.0217,
|
|
"margin_dpo/margin_mean": 29.2852783203125,
|
|
"margin_dpo/margin_std": 43.19415283203125,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.21585903083700442,
|
|
"fcm_dpo/beta": 0.02465198189020157,
|
|
"fcm_dpo/delta": -0.14972899854183197,
|
|
"fcm_dpo/margin": 39.91291046142578,
|
|
"fcm_dpo/q_t": 0.3194273114204407,
|
|
"grad_norm": 39.194644927978516,
|
|
"learning_rate": 4.807235679840536e-07,
|
|
"logits/chosen": -0.5805500149726868,
|
|
"logits/rejected": -0.5455455183982849,
|
|
"logps/chosen": -82.01380157470703,
|
|
"logps/ref_chosen": -53.405487060546875,
|
|
"logps/ref_rejected": -71.39060974121094,
|
|
"logps/rejected": -139.91183471679688,
|
|
"loss": 0.8708,
|
|
"margin_dpo/margin_mean": 39.91291046142578,
|
|
"margin_dpo/margin_std": 47.64521789550781,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2173274596182085,
|
|
"fcm_dpo/beta": 0.02513396367430687,
|
|
"fcm_dpo/delta": 0.046579986810684204,
|
|
"fcm_dpo/margin": 31.903329849243164,
|
|
"fcm_dpo/q_t": 0.3463394045829773,
|
|
"grad_norm": 36.274044036865234,
|
|
"learning_rate": 4.802263794862384e-07,
|
|
"logits/chosen": -0.6228268146514893,
|
|
"logits/rejected": -0.6043528318405151,
|
|
"logps/chosen": -91.84988403320312,
|
|
"logps/ref_chosen": -64.93708038330078,
|
|
"logps/ref_rejected": -103.09384155273438,
|
|
"logps/rejected": -161.90997314453125,
|
|
"loss": 0.9264,
|
|
"margin_dpo/margin_mean": 31.903329849243164,
|
|
"margin_dpo/margin_std": 37.86564636230469,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.21879588839941264,
|
|
"fcm_dpo/beta": 0.023908747360110283,
|
|
"fcm_dpo/delta": -0.15772250294685364,
|
|
"fcm_dpo/margin": 41.27139663696289,
|
|
"fcm_dpo/q_t": 0.30605483055114746,
|
|
"grad_norm": 32.16698455810547,
|
|
"learning_rate": 4.797231243092118e-07,
|
|
"logits/chosen": -0.6210156083106995,
|
|
"logits/rejected": -0.5976389050483704,
|
|
"logps/chosen": -85.54385375976562,
|
|
"logps/ref_chosen": -58.47376251220703,
|
|
"logps/ref_rejected": -99.31474304199219,
|
|
"logps/rejected": -167.65623474121094,
|
|
"loss": 0.8122,
|
|
"margin_dpo/margin_mean": 41.271400451660156,
|
|
"margin_dpo/margin_std": 41.392826080322266,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22026431718061673,
|
|
"fcm_dpo/beta": 0.02343493327498436,
|
|
"fcm_dpo/delta": -0.06840753555297852,
|
|
"fcm_dpo/margin": 38.6958122253418,
|
|
"fcm_dpo/q_t": 0.33513975143432617,
|
|
"grad_norm": 32.42837142944336,
|
|
"learning_rate": 4.792138157142157e-07,
|
|
"logits/chosen": -0.5992769002914429,
|
|
"logits/rejected": -0.5919263362884521,
|
|
"logps/chosen": -71.05235290527344,
|
|
"logps/ref_chosen": -45.705810546875,
|
|
"logps/ref_rejected": -83.34759521484375,
|
|
"logps/rejected": -147.38995361328125,
|
|
"loss": 0.9001,
|
|
"margin_dpo/margin_mean": 38.6958122253418,
|
|
"margin_dpo/margin_std": 49.43400573730469,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22173274596182085,
|
|
"fcm_dpo/beta": 0.023284468799829483,
|
|
"fcm_dpo/delta": -0.12595298886299133,
|
|
"fcm_dpo/margin": 41.395328521728516,
|
|
"fcm_dpo/q_t": 0.30875810980796814,
|
|
"grad_norm": 31.666311264038086,
|
|
"learning_rate": 4.786984671220053e-07,
|
|
"logits/chosen": -0.6710444688796997,
|
|
"logits/rejected": -0.6334470510482788,
|
|
"logps/chosen": -98.85562133789062,
|
|
"logps/ref_chosen": -70.57083129882812,
|
|
"logps/ref_rejected": -100.46382141113281,
|
|
"logps/rejected": -170.14393615722656,
|
|
"loss": 0.8177,
|
|
"margin_dpo/margin_mean": 41.39532470703125,
|
|
"margin_dpo/margin_std": 41.780487060546875,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22320117474302498,
|
|
"fcm_dpo/beta": 0.022631559520959854,
|
|
"fcm_dpo/delta": -0.1879671961069107,
|
|
"fcm_dpo/margin": 45.03105926513672,
|
|
"fcm_dpo/q_t": 0.3069019615650177,
|
|
"grad_norm": 28.960357666015625,
|
|
"learning_rate": 4.78177092112495e-07,
|
|
"logits/chosen": -0.6386080980300903,
|
|
"logits/rejected": -0.6261081099510193,
|
|
"logps/chosen": -84.44389343261719,
|
|
"logps/ref_chosen": -60.16438674926758,
|
|
"logps/ref_rejected": -106.14045715332031,
|
|
"logps/rejected": -175.45101928710938,
|
|
"loss": 0.8142,
|
|
"margin_dpo/margin_mean": 45.03105926513672,
|
|
"margin_dpo/margin_std": 46.33378601074219,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.22466960352422907,
|
|
"fcm_dpo/beta": 0.022312451153993607,
|
|
"fcm_dpo/delta": -0.0069070253521203995,
|
|
"fcm_dpo/margin": 38.37080001831055,
|
|
"fcm_dpo/q_t": 0.3373130261898041,
|
|
"grad_norm": 27.920183181762695,
|
|
"learning_rate": 4.776497044244016e-07,
|
|
"logits/chosen": -0.6307264566421509,
|
|
"logits/rejected": -0.6185749769210815,
|
|
"logps/chosen": -80.81060028076172,
|
|
"logps/ref_chosen": -56.315277099609375,
|
|
"logps/ref_rejected": -85.65583801269531,
|
|
"logps/rejected": -148.52195739746094,
|
|
"loss": 0.9136,
|
|
"margin_dpo/margin_mean": 38.37080001831055,
|
|
"margin_dpo/margin_std": 48.248252868652344,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2261380323054332,
|
|
"fcm_dpo/beta": 0.022488757967948914,
|
|
"fcm_dpo/delta": 0.013789080083370209,
|
|
"fcm_dpo/margin": 37.155738830566406,
|
|
"fcm_dpo/q_t": 0.34336578845977783,
|
|
"grad_norm": 33.870460510253906,
|
|
"learning_rate": 4.771163179548808e-07,
|
|
"logits/chosen": -0.613184928894043,
|
|
"logits/rejected": -0.6004446744918823,
|
|
"logps/chosen": -91.80575561523438,
|
|
"logps/ref_chosen": -62.74256896972656,
|
|
"logps/ref_rejected": -104.24420166015625,
|
|
"logps/rejected": -170.463134765625,
|
|
"loss": 0.958,
|
|
"margin_dpo/margin_mean": 37.15574264526367,
|
|
"margin_dpo/margin_std": 48.641082763671875,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.2276064610866373,
|
|
"fcm_dpo/beta": 0.0222769845277071,
|
|
"fcm_dpo/delta": -0.047925353050231934,
|
|
"fcm_dpo/margin": 40.09435272216797,
|
|
"fcm_dpo/q_t": 0.3238127827644348,
|
|
"grad_norm": 28.904926300048828,
|
|
"learning_rate": 4.7657694675916247e-07,
|
|
"logits/chosen": -0.6429010033607483,
|
|
"logits/rejected": -0.615513265132904,
|
|
"logps/chosen": -84.72270965576172,
|
|
"logps/ref_chosen": -60.65318298339844,
|
|
"logps/ref_rejected": -77.49220275878906,
|
|
"logps/rejected": -141.6560821533203,
|
|
"loss": 0.8665,
|
|
"margin_dpo/margin_mean": 40.09435272216797,
|
|
"margin_dpo/margin_std": 44.73542022705078,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2290748898678414,
|
|
"fcm_dpo/beta": 0.02297946810722351,
|
|
"fcm_dpo/delta": 0.26264724135398865,
|
|
"fcm_dpo/margin": 26.240877151489258,
|
|
"fcm_dpo/q_t": 0.382540762424469,
|
|
"grad_norm": 47.13714599609375,
|
|
"learning_rate": 4.7603160505017893e-07,
|
|
"logits/chosen": -0.609096109867096,
|
|
"logits/rejected": -0.5791852474212646,
|
|
"logps/chosen": -104.53880310058594,
|
|
"logps/ref_chosen": -69.49188232421875,
|
|
"logps/ref_rejected": -77.16929626464844,
|
|
"logps/rejected": -138.45709228515625,
|
|
"loss": 1.1578,
|
|
"margin_dpo/margin_mean": 26.240875244140625,
|
|
"margin_dpo/margin_std": 51.68781280517578,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.2305433186490455,
|
|
"fcm_dpo/beta": 0.02237515337765217,
|
|
"fcm_dpo/delta": -0.21659667789936066,
|
|
"fcm_dpo/margin": 46.5479736328125,
|
|
"fcm_dpo/q_t": 0.29945075511932373,
|
|
"grad_norm": 34.71981430053711,
|
|
"learning_rate": 4.7548030719819154e-07,
|
|
"logits/chosen": -0.6424323320388794,
|
|
"logits/rejected": -0.6342293620109558,
|
|
"logps/chosen": -94.13262939453125,
|
|
"logps/ref_chosen": -61.368438720703125,
|
|
"logps/ref_rejected": -107.64636993408203,
|
|
"logps/rejected": -186.9585418701172,
|
|
"loss": 0.8133,
|
|
"margin_dpo/margin_mean": 46.5479736328125,
|
|
"margin_dpo/margin_std": 47.89894104003906,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23201174743024963,
|
|
"fcm_dpo/beta": 0.0216847974807024,
|
|
"fcm_dpo/delta": -0.17651230096817017,
|
|
"fcm_dpo/margin": 46.50477600097656,
|
|
"fcm_dpo/q_t": 0.32549408078193665,
|
|
"grad_norm": 27.118051528930664,
|
|
"learning_rate": 4.7492306773041136e-07,
|
|
"logits/chosen": -0.6172913312911987,
|
|
"logits/rejected": -0.6179910898208618,
|
|
"logps/chosen": -87.16582489013672,
|
|
"logps/ref_chosen": -57.612918853759766,
|
|
"logps/ref_rejected": -113.6946792602539,
|
|
"logps/rejected": -189.75234985351562,
|
|
"loss": 0.8785,
|
|
"margin_dpo/margin_mean": 46.50477600097656,
|
|
"margin_dpo/margin_std": 59.50420379638672,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.23348017621145375,
|
|
"fcm_dpo/beta": 0.021733447909355164,
|
|
"fcm_dpo/delta": 0.028508372604846954,
|
|
"fcm_dpo/margin": 37.868499755859375,
|
|
"fcm_dpo/q_t": 0.3453836739063263,
|
|
"grad_norm": 54.266422271728516,
|
|
"learning_rate": 4.743599013306165e-07,
|
|
"logits/chosen": -0.6216092705726624,
|
|
"logits/rejected": -0.5767144560813904,
|
|
"logps/chosen": -115.16629028320312,
|
|
"logps/ref_chosen": -81.56034851074219,
|
|
"logps/ref_rejected": -88.89871215820312,
|
|
"logps/rejected": -160.37315368652344,
|
|
"loss": 0.946,
|
|
"margin_dpo/margin_mean": 37.868499755859375,
|
|
"margin_dpo/margin_std": 50.3989143371582,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.23494860499265785,
|
|
"fcm_dpo/beta": 0.02132713794708252,
|
|
"fcm_dpo/delta": -0.06508233398199081,
|
|
"fcm_dpo/margin": 42.595428466796875,
|
|
"fcm_dpo/q_t": 0.3389905095100403,
|
|
"grad_norm": 37.15032196044922,
|
|
"learning_rate": 4.737908228387656e-07,
|
|
"logits/chosen": -0.6360121965408325,
|
|
"logits/rejected": -0.6134800910949707,
|
|
"logps/chosen": -99.04662322998047,
|
|
"logps/ref_chosen": -65.73088073730469,
|
|
"logps/ref_rejected": -97.21781921386719,
|
|
"logps/rejected": -173.12899780273438,
|
|
"loss": 0.9272,
|
|
"margin_dpo/margin_mean": 42.59543228149414,
|
|
"margin_dpo/margin_std": 56.86052703857422,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.23641703377386197,
|
|
"fcm_dpo/beta": 0.021413713693618774,
|
|
"fcm_dpo/delta": -0.0029074866324663162,
|
|
"fcm_dpo/margin": 39.81043243408203,
|
|
"fcm_dpo/q_t": 0.3397424519062042,
|
|
"grad_norm": 32.6351432800293,
|
|
"learning_rate": 4.7321584725060594e-07,
|
|
"logits/chosen": -0.6320241689682007,
|
|
"logits/rejected": -0.614162802696228,
|
|
"logps/chosen": -82.55064392089844,
|
|
"logps/ref_chosen": -52.43647003173828,
|
|
"logps/ref_rejected": -83.43095397949219,
|
|
"logps/rejected": -153.3555450439453,
|
|
"loss": 0.9246,
|
|
"margin_dpo/margin_mean": 39.81043243408203,
|
|
"margin_dpo/margin_std": 50.22443389892578,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.23788546255506607,
|
|
"fcm_dpo/beta": 0.020959045737981796,
|
|
"fcm_dpo/delta": -0.020670533180236816,
|
|
"fcm_dpo/margin": 41.22890090942383,
|
|
"fcm_dpo/q_t": 0.3389648199081421,
|
|
"grad_norm": 26.661542892456055,
|
|
"learning_rate": 4.7263498971727905e-07,
|
|
"logits/chosen": -0.6230508685112,
|
|
"logits/rejected": -0.5922760367393494,
|
|
"logps/chosen": -90.76399993896484,
|
|
"logps/ref_chosen": -62.6105842590332,
|
|
"logps/ref_rejected": -89.39057922363281,
|
|
"logps/rejected": -158.77288818359375,
|
|
"loss": 0.9148,
|
|
"margin_dpo/margin_mean": 41.22889709472656,
|
|
"margin_dpo/margin_std": 52.275760650634766,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.2393538913362702,
|
|
"fcm_dpo/beta": 0.021323660388588905,
|
|
"fcm_dpo/delta": 0.011209924705326557,
|
|
"fcm_dpo/margin": 39.377601623535156,
|
|
"fcm_dpo/q_t": 0.33567047119140625,
|
|
"grad_norm": 28.12457847595215,
|
|
"learning_rate": 4.720482655449212e-07,
|
|
"logits/chosen": -0.6243945956230164,
|
|
"logits/rejected": -0.5960164070129395,
|
|
"logps/chosen": -83.83268737792969,
|
|
"logps/ref_chosen": -55.021629333496094,
|
|
"logps/ref_rejected": -75.418212890625,
|
|
"logps/rejected": -143.60687255859375,
|
|
"loss": 0.8906,
|
|
"margin_dpo/margin_mean": 39.377601623535156,
|
|
"margin_dpo/margin_std": 45.302734375,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24082232011747431,
|
|
"fcm_dpo/beta": 0.020698610693216324,
|
|
"fcm_dpo/delta": -0.11274396628141403,
|
|
"fcm_dpo/margin": 45.797706604003906,
|
|
"fcm_dpo/q_t": 0.311231791973114,
|
|
"grad_norm": 29.976978302001953,
|
|
"learning_rate": 4.714556901942599e-07,
|
|
"logits/chosen": -0.6112655401229858,
|
|
"logits/rejected": -0.5806326866149902,
|
|
"logps/chosen": -83.19902038574219,
|
|
"logps/ref_chosen": -55.64066696166992,
|
|
"logps/ref_rejected": -79.66463470458984,
|
|
"logps/rejected": -153.02069091796875,
|
|
"loss": 0.8155,
|
|
"margin_dpo/margin_mean": 45.79771041870117,
|
|
"margin_dpo/margin_std": 45.62419128417969,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2422907488986784,
|
|
"fcm_dpo/beta": 0.021473418921232224,
|
|
"fcm_dpo/delta": 0.20934727787971497,
|
|
"fcm_dpo/margin": 30.47848129272461,
|
|
"fcm_dpo/q_t": 0.36805397272109985,
|
|
"grad_norm": 38.93865203857422,
|
|
"learning_rate": 4.708572792802069e-07,
|
|
"logits/chosen": -0.6419720649719238,
|
|
"logits/rejected": -0.5996274948120117,
|
|
"logps/chosen": -92.79157257080078,
|
|
"logps/ref_chosen": -61.310691833496094,
|
|
"logps/ref_rejected": -73.67060852050781,
|
|
"logps/rejected": -135.62997436523438,
|
|
"loss": 1.0007,
|
|
"margin_dpo/margin_mean": 30.47848129272461,
|
|
"margin_dpo/margin_std": 41.282371520996094,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.24375917767988253,
|
|
"fcm_dpo/beta": 0.020592810586094856,
|
|
"fcm_dpo/delta": -0.3078145384788513,
|
|
"fcm_dpo/margin": 54.38330841064453,
|
|
"fcm_dpo/q_t": 0.30514246225357056,
|
|
"grad_norm": 27.210079193115234,
|
|
"learning_rate": 4.702530485714461e-07,
|
|
"logits/chosen": -0.5745774507522583,
|
|
"logits/rejected": -0.580484926700592,
|
|
"logps/chosen": -80.91297912597656,
|
|
"logps/ref_chosen": -50.98360061645508,
|
|
"logps/ref_rejected": -98.09512329101562,
|
|
"logps/rejected": -182.40780639648438,
|
|
"loss": 0.8056,
|
|
"margin_dpo/margin_mean": 54.38330841064453,
|
|
"margin_dpo/margin_std": 63.627532958984375,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.24522760646108663,
|
|
"fcm_dpo/beta": 0.0195004865527153,
|
|
"fcm_dpo/delta": -0.295537531375885,
|
|
"fcm_dpo/margin": 57.01265335083008,
|
|
"fcm_dpo/q_t": 0.28371232748031616,
|
|
"grad_norm": 26.915447235107422,
|
|
"learning_rate": 4.6964301399001877e-07,
|
|
"logits/chosen": -0.5345897674560547,
|
|
"logits/rejected": -0.5327332615852356,
|
|
"logps/chosen": -81.28956604003906,
|
|
"logps/ref_chosen": -50.424095153808594,
|
|
"logps/ref_rejected": -96.03042602539062,
|
|
"logps/rejected": -183.90853881835938,
|
|
"loss": 0.7552,
|
|
"margin_dpo/margin_mean": 57.01264953613281,
|
|
"margin_dpo/margin_std": 52.89997863769531,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.24669603524229075,
|
|
"fcm_dpo/beta": 0.019154991954565048,
|
|
"fcm_dpo/delta": -0.011862130835652351,
|
|
"fcm_dpo/margin": 44.928321838378906,
|
|
"fcm_dpo/q_t": 0.3287070691585541,
|
|
"grad_norm": 30.181053161621094,
|
|
"learning_rate": 4.690271916109034e-07,
|
|
"logits/chosen": -0.5706725716590881,
|
|
"logits/rejected": -0.5486756563186646,
|
|
"logps/chosen": -83.01795959472656,
|
|
"logps/ref_chosen": -49.462825775146484,
|
|
"logps/ref_rejected": -75.30855560302734,
|
|
"logps/rejected": -153.79202270507812,
|
|
"loss": 0.8619,
|
|
"margin_dpo/margin_mean": 44.928321838378906,
|
|
"margin_dpo/margin_std": 48.069007873535156,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.24816446402349487,
|
|
"fcm_dpo/beta": 0.019270140677690506,
|
|
"fcm_dpo/delta": 0.09090002626180649,
|
|
"fcm_dpo/margin": 39.69366455078125,
|
|
"fcm_dpo/q_t": 0.36279886960983276,
|
|
"grad_norm": 31.3607234954834,
|
|
"learning_rate": 4.6840559766159235e-07,
|
|
"logits/chosen": -0.5877891778945923,
|
|
"logits/rejected": -0.564651370048523,
|
|
"logps/chosen": -92.62950134277344,
|
|
"logps/ref_chosen": -59.803443908691406,
|
|
"logps/ref_rejected": -83.34574890136719,
|
|
"logps/rejected": -155.86546325683594,
|
|
"loss": 1.004,
|
|
"margin_dpo/margin_mean": 39.69366455078125,
|
|
"margin_dpo/margin_std": 61.303009033203125,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.24963289280469897,
|
|
"fcm_dpo/beta": 0.019043531268835068,
|
|
"fcm_dpo/delta": -0.11414046585559845,
|
|
"fcm_dpo/margin": 49.9086799621582,
|
|
"fcm_dpo/q_t": 0.3118130564689636,
|
|
"grad_norm": 29.482023239135742,
|
|
"learning_rate": 4.6777824852166437e-07,
|
|
"logits/chosen": -0.5438283681869507,
|
|
"logits/rejected": -0.5250686407089233,
|
|
"logps/chosen": -79.19264221191406,
|
|
"logps/ref_chosen": -49.471771240234375,
|
|
"logps/ref_rejected": -75.91734313964844,
|
|
"logps/rejected": -155.546875,
|
|
"loss": 0.8341,
|
|
"margin_dpo/margin_mean": 49.9086799621582,
|
|
"margin_dpo/margin_std": 50.66975402832031,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2511013215859031,
|
|
"fcm_dpo/beta": 0.019304830580949783,
|
|
"fcm_dpo/delta": 0.06730199605226517,
|
|
"fcm_dpo/margin": 40.81938171386719,
|
|
"fcm_dpo/q_t": 0.3554888367652893,
|
|
"grad_norm": 40.410728454589844,
|
|
"learning_rate": 4.6714516072235273e-07,
|
|
"logits/chosen": -0.591011643409729,
|
|
"logits/rejected": -0.5511142015457153,
|
|
"logps/chosen": -129.15768432617188,
|
|
"logps/ref_chosen": -84.49931335449219,
|
|
"logps/ref_rejected": -109.38209533691406,
|
|
"logps/rejected": -194.85986328125,
|
|
"loss": 0.9864,
|
|
"margin_dpo/margin_mean": 40.81937789916992,
|
|
"margin_dpo/margin_std": 60.447486877441406,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2525697503671072,
|
|
"fcm_dpo/beta": 0.01938115805387497,
|
|
"fcm_dpo/delta": 0.0339435450732708,
|
|
"fcm_dpo/margin": 42.23426055908203,
|
|
"fcm_dpo/q_t": 0.34325116872787476,
|
|
"grad_norm": 37.05482864379883,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": -0.5794566869735718,
|
|
"logits/rejected": -0.5384765863418579,
|
|
"logps/chosen": -112.9964599609375,
|
|
"logps/ref_chosen": -68.65391540527344,
|
|
"logps/ref_rejected": -85.43667602539062,
|
|
"logps/rejected": -172.0134735107422,
|
|
"loss": 0.9452,
|
|
"margin_dpo/margin_mean": 42.23426055908203,
|
|
"margin_dpo/margin_std": 55.385589599609375,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2540381791483113,
|
|
"fcm_dpo/beta": 0.01962945982813835,
|
|
"fcm_dpo/delta": 0.012805420905351639,
|
|
"fcm_dpo/margin": 42.6837158203125,
|
|
"fcm_dpo/q_t": 0.33354562520980835,
|
|
"grad_norm": 35.34359359741211,
|
|
"learning_rate": 4.6586183602616687e-07,
|
|
"logits/chosen": -0.579889714717865,
|
|
"logits/rejected": -0.5342808961868286,
|
|
"logps/chosen": -101.8213119506836,
|
|
"logps/ref_chosen": -63.050880432128906,
|
|
"logps/ref_rejected": -78.68392181396484,
|
|
"logps/rejected": -160.1380615234375,
|
|
"loss": 0.8791,
|
|
"margin_dpo/margin_mean": 42.6837158203125,
|
|
"margin_dpo/margin_std": 47.47747039794922,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.2555066079295154,
|
|
"fcm_dpo/beta": 0.019577287137508392,
|
|
"fcm_dpo/delta": -0.051362376660108566,
|
|
"fcm_dpo/margin": 45.75571060180664,
|
|
"fcm_dpo/q_t": 0.3332166075706482,
|
|
"grad_norm": 41.42070770263672,
|
|
"learning_rate": 4.652116329460919e-07,
|
|
"logits/chosen": -0.5348306894302368,
|
|
"logits/rejected": -0.5489102602005005,
|
|
"logps/chosen": -93.36699676513672,
|
|
"logps/ref_chosen": -53.36296844482422,
|
|
"logps/ref_rejected": -101.91120910644531,
|
|
"logps/rejected": -187.67095947265625,
|
|
"loss": 0.9046,
|
|
"margin_dpo/margin_mean": 45.755714416503906,
|
|
"margin_dpo/margin_std": 55.97746276855469,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.25697503671071953,
|
|
"fcm_dpo/beta": 0.018269890919327736,
|
|
"fcm_dpo/delta": -0.3679980933666229,
|
|
"fcm_dpo/margin": 64.21873474121094,
|
|
"fcm_dpo/q_t": 0.2768920063972473,
|
|
"grad_norm": 34.53947830200195,
|
|
"learning_rate": 4.645557588393406e-07,
|
|
"logits/chosen": -0.45041948556900024,
|
|
"logits/rejected": -0.4352826476097107,
|
|
"logps/chosen": -84.18891906738281,
|
|
"logps/ref_chosen": -45.417762756347656,
|
|
"logps/ref_rejected": -89.50579833984375,
|
|
"logps/rejected": -192.49569702148438,
|
|
"loss": 0.7209,
|
|
"margin_dpo/margin_mean": 64.21873474121094,
|
|
"margin_dpo/margin_std": 58.910743713378906,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.25844346549192365,
|
|
"fcm_dpo/beta": 0.01778358779847622,
|
|
"fcm_dpo/delta": -0.09820236265659332,
|
|
"fcm_dpo/margin": 52.805381774902344,
|
|
"fcm_dpo/q_t": 0.3222000002861023,
|
|
"grad_norm": 47.036460876464844,
|
|
"learning_rate": 4.638942309888058e-07,
|
|
"logits/chosen": -0.5136569738388062,
|
|
"logits/rejected": -0.5245624780654907,
|
|
"logps/chosen": -90.69015502929688,
|
|
"logps/ref_chosen": -50.452842712402344,
|
|
"logps/ref_rejected": -95.5589599609375,
|
|
"logps/rejected": -188.60165405273438,
|
|
"loss": 0.8453,
|
|
"margin_dpo/margin_mean": 52.80537796020508,
|
|
"margin_dpo/margin_std": 58.76519012451172,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2599118942731278,
|
|
"fcm_dpo/beta": 0.01743006333708763,
|
|
"fcm_dpo/delta": -0.10580591857433319,
|
|
"fcm_dpo/margin": 54.26726531982422,
|
|
"fcm_dpo/q_t": 0.3224429488182068,
|
|
"grad_norm": 32.768924713134766,
|
|
"learning_rate": 4.6322706682636137e-07,
|
|
"logits/chosen": -0.5340214967727661,
|
|
"logits/rejected": -0.5136857032775879,
|
|
"logps/chosen": -106.60789489746094,
|
|
"logps/ref_chosen": -61.216468811035156,
|
|
"logps/ref_rejected": -95.89378356933594,
|
|
"logps/rejected": -195.55247497558594,
|
|
"loss": 0.855,
|
|
"margin_dpo/margin_mean": 54.26726150512695,
|
|
"margin_dpo/margin_std": 61.632484436035156,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.26138032305433184,
|
|
"fcm_dpo/beta": 0.016440367326140404,
|
|
"fcm_dpo/delta": -0.292092502117157,
|
|
"fcm_dpo/margin": 67.36396789550781,
|
|
"fcm_dpo/q_t": 0.29801633954048157,
|
|
"grad_norm": 42.785648345947266,
|
|
"learning_rate": 4.6255428393240354e-07,
|
|
"logits/chosen": -0.4795859456062317,
|
|
"logits/rejected": -0.48533114790916443,
|
|
"logps/chosen": -108.70709228515625,
|
|
"logps/ref_chosen": -58.26478958129883,
|
|
"logps/ref_rejected": -105.3653335571289,
|
|
"logps/rejected": -223.17160034179688,
|
|
"loss": 0.8,
|
|
"margin_dpo/margin_mean": 67.36396026611328,
|
|
"margin_dpo/margin_std": 74.31996154785156,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.26284875183553597,
|
|
"fcm_dpo/beta": 0.01604972966015339,
|
|
"fcm_dpo/delta": -0.04819926619529724,
|
|
"fcm_dpo/margin": 55.60845184326172,
|
|
"fcm_dpo/q_t": 0.33276423811912537,
|
|
"grad_norm": 37.59048080444336,
|
|
"learning_rate": 4.6187590003538724e-07,
|
|
"logits/chosen": -0.49698591232299805,
|
|
"logits/rejected": -0.48694515228271484,
|
|
"logps/chosen": -108.184814453125,
|
|
"logps/ref_chosen": -61.05832290649414,
|
|
"logps/ref_rejected": -90.52782440185547,
|
|
"logps/rejected": -193.26278686523438,
|
|
"loss": 0.9377,
|
|
"margin_dpo/margin_mean": 55.60845184326172,
|
|
"margin_dpo/margin_std": 71.06735229492188,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.2643171806167401,
|
|
"fcm_dpo/beta": 0.01576114445924759,
|
|
"fcm_dpo/delta": -0.13302375376224518,
|
|
"fcm_dpo/margin": 61.51070785522461,
|
|
"fcm_dpo/q_t": 0.30989372730255127,
|
|
"grad_norm": 25.751888275146484,
|
|
"learning_rate": 4.611919330113591e-07,
|
|
"logits/chosen": -0.5168595314025879,
|
|
"logits/rejected": -0.5018938183784485,
|
|
"logps/chosen": -93.7746810913086,
|
|
"logps/ref_chosen": -54.34272003173828,
|
|
"logps/ref_rejected": -98.21183776855469,
|
|
"logps/rejected": -199.15451049804688,
|
|
"loss": 0.8218,
|
|
"margin_dpo/margin_mean": 61.510704040527344,
|
|
"margin_dpo/margin_std": 62.60074996948242,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2657856093979442,
|
|
"fcm_dpo/beta": 0.016240477561950684,
|
|
"fcm_dpo/delta": 0.2024461030960083,
|
|
"fcm_dpo/margin": 40.64165496826172,
|
|
"fcm_dpo/q_t": 0.3688216805458069,
|
|
"grad_norm": 28.267780303955078,
|
|
"learning_rate": 4.605024008834863e-07,
|
|
"logits/chosen": -0.5522741079330444,
|
|
"logits/rejected": -0.5164051055908203,
|
|
"logps/chosen": -89.09597778320312,
|
|
"logps/ref_chosen": -55.000457763671875,
|
|
"logps/ref_rejected": -61.656166076660156,
|
|
"logps/rejected": -136.39334106445312,
|
|
"loss": 1.0021,
|
|
"margin_dpo/margin_mean": 40.64165496826172,
|
|
"margin_dpo/margin_std": 56.71002197265625,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.26725403817914833,
|
|
"fcm_dpo/beta": 0.015759017318487167,
|
|
"fcm_dpo/delta": -0.20672228932380676,
|
|
"fcm_dpo/margin": 65.56771850585938,
|
|
"fcm_dpo/q_t": 0.30198216438293457,
|
|
"grad_norm": 35.510986328125,
|
|
"learning_rate": 4.598073218215817e-07,
|
|
"logits/chosen": -0.5237354636192322,
|
|
"logits/rejected": -0.5221885442733765,
|
|
"logps/chosen": -75.54692077636719,
|
|
"logps/ref_chosen": -41.107852935791016,
|
|
"logps/ref_rejected": -89.5215835571289,
|
|
"logps/rejected": -189.52838134765625,
|
|
"loss": 0.8296,
|
|
"margin_dpo/margin_mean": 65.56771850585938,
|
|
"margin_dpo/margin_std": 69.53907775878906,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2687224669603524,
|
|
"fcm_dpo/beta": 0.015870921313762665,
|
|
"fcm_dpo/delta": 0.18142208456993103,
|
|
"fcm_dpo/margin": 42.763145446777344,
|
|
"fcm_dpo/q_t": 0.363423615694046,
|
|
"grad_norm": 36.854557037353516,
|
|
"learning_rate": 4.5910671414162484e-07,
|
|
"logits/chosen": -0.5377739667892456,
|
|
"logits/rejected": -0.5063446760177612,
|
|
"logps/chosen": -108.02732849121094,
|
|
"logps/ref_chosen": -57.52456283569336,
|
|
"logps/ref_rejected": -75.97572326660156,
|
|
"logps/rejected": -169.24163818359375,
|
|
"loss": 0.9778,
|
|
"margin_dpo/margin_mean": 42.763145446777344,
|
|
"margin_dpo/margin_std": 50.845008850097656,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2701908957415565,
|
|
"fcm_dpo/beta": 0.016876371577382088,
|
|
"fcm_dpo/delta": 0.18617403507232666,
|
|
"fcm_dpo/margin": 39.896549224853516,
|
|
"fcm_dpo/q_t": 0.36848223209381104,
|
|
"grad_norm": 28.983440399169922,
|
|
"learning_rate": 4.5840059630527985e-07,
|
|
"logits/chosen": -0.5860706567764282,
|
|
"logits/rejected": -0.5714048743247986,
|
|
"logps/chosen": -97.67625427246094,
|
|
"logps/ref_chosen": -58.544952392578125,
|
|
"logps/ref_rejected": -76.63406372070312,
|
|
"logps/rejected": -155.66189575195312,
|
|
"loss": 0.9873,
|
|
"margin_dpo/margin_mean": 39.89654541015625,
|
|
"margin_dpo/margin_std": 53.60821533203125,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.27165932452276065,
|
|
"fcm_dpo/beta": 0.017562009394168854,
|
|
"fcm_dpo/delta": 0.273959755897522,
|
|
"fcm_dpo/margin": 33.74722671508789,
|
|
"fcm_dpo/q_t": 0.3876458406448364,
|
|
"grad_norm": 37.04210662841797,
|
|
"learning_rate": 4.5768898691940836e-07,
|
|
"logits/chosen": -0.5385079383850098,
|
|
"logits/rejected": -0.4919096827507019,
|
|
"logps/chosen": -107.01687622070312,
|
|
"logps/ref_chosen": -62.025848388671875,
|
|
"logps/ref_rejected": -73.7625961303711,
|
|
"logps/rejected": -152.5008544921875,
|
|
"loss": 1.0766,
|
|
"margin_dpo/margin_mean": 33.74722671508789,
|
|
"margin_dpo/margin_std": 56.945701599121094,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.27312775330396477,
|
|
"fcm_dpo/beta": 0.017246492207050323,
|
|
"fcm_dpo/delta": -0.20956075191497803,
|
|
"fcm_dpo/margin": 60.13277816772461,
|
|
"fcm_dpo/q_t": 0.29886382818222046,
|
|
"grad_norm": 54.477352142333984,
|
|
"learning_rate": 4.5697190473557947e-07,
|
|
"logits/chosen": -0.5471476912498474,
|
|
"logits/rejected": -0.5003129839897156,
|
|
"logps/chosen": -112.1868667602539,
|
|
"logps/ref_chosen": -69.35346984863281,
|
|
"logps/ref_rejected": -88.07244873046875,
|
|
"logps/rejected": -191.03863525390625,
|
|
"loss": 0.7824,
|
|
"margin_dpo/margin_mean": 60.132774353027344,
|
|
"margin_dpo/margin_std": 59.29591369628906,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.2745961820851689,
|
|
"fcm_dpo/beta": 0.01733904704451561,
|
|
"fcm_dpo/delta": 0.010633766651153564,
|
|
"fcm_dpo/margin": 48.30116271972656,
|
|
"fcm_dpo/q_t": 0.3396226167678833,
|
|
"grad_norm": 53.08622741699219,
|
|
"learning_rate": 4.5624936864957555e-07,
|
|
"logits/chosen": -0.5102699398994446,
|
|
"logits/rejected": -0.5049658417701721,
|
|
"logps/chosen": -97.04034423828125,
|
|
"logps/ref_chosen": -52.7564582824707,
|
|
"logps/ref_rejected": -81.96910095214844,
|
|
"logps/rejected": -174.5541534423828,
|
|
"loss": 0.9272,
|
|
"margin_dpo/margin_mean": 48.30116271972656,
|
|
"margin_dpo/margin_std": 60.002471923828125,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.27606461086637296,
|
|
"fcm_dpo/beta": 0.016646115109324455,
|
|
"fcm_dpo/delta": -0.19869595766067505,
|
|
"fcm_dpo/margin": 61.76872253417969,
|
|
"fcm_dpo/q_t": 0.3067232370376587,
|
|
"grad_norm": 65.48516082763672,
|
|
"learning_rate": 4.5552139770089454e-07,
|
|
"logits/chosen": -0.4937124252319336,
|
|
"logits/rejected": -0.4901246428489685,
|
|
"logps/chosen": -89.49577331542969,
|
|
"logps/ref_chosen": -49.415489196777344,
|
|
"logps/ref_rejected": -89.54043579101562,
|
|
"logps/rejected": -191.3894500732422,
|
|
"loss": 0.817,
|
|
"margin_dpo/margin_mean": 61.76872253417969,
|
|
"margin_dpo/margin_std": 66.22196960449219,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2775330396475771,
|
|
"fcm_dpo/beta": 0.016446605324745178,
|
|
"fcm_dpo/delta": -0.023531386628746986,
|
|
"fcm_dpo/margin": 52.98753356933594,
|
|
"fcm_dpo/q_t": 0.33847445249557495,
|
|
"grad_norm": 57.1566276550293,
|
|
"learning_rate": 4.5478801107224794e-07,
|
|
"logits/chosen": -0.4816792607307434,
|
|
"logits/rejected": -0.4451908469200134,
|
|
"logps/chosen": -101.60316467285156,
|
|
"logps/ref_chosen": -52.39896011352539,
|
|
"logps/ref_rejected": -72.16735076904297,
|
|
"logps/rejected": -174.3590850830078,
|
|
"loss": 0.9291,
|
|
"margin_dpo/margin_mean": 52.98753356933594,
|
|
"margin_dpo/margin_std": 70.50366973876953,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2790014684287812,
|
|
"fcm_dpo/beta": 0.01647689938545227,
|
|
"fcm_dpo/delta": -0.08300600945949554,
|
|
"fcm_dpo/margin": 55.913509368896484,
|
|
"fcm_dpo/q_t": 0.3302146792411804,
|
|
"grad_norm": 43.29951477050781,
|
|
"learning_rate": 4.5404922808905543e-07,
|
|
"logits/chosen": -0.5044703483581543,
|
|
"logits/rejected": -0.47580382227897644,
|
|
"logps/chosen": -114.02642822265625,
|
|
"logps/ref_chosen": -64.68305969238281,
|
|
"logps/ref_rejected": -102.55052185058594,
|
|
"logps/rejected": -207.80740356445312,
|
|
"loss": 0.9189,
|
|
"margin_dpo/margin_mean": 55.913509368896484,
|
|
"margin_dpo/margin_std": 68.61431884765625,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.28046989720998533,
|
|
"fcm_dpo/beta": 0.015079159289598465,
|
|
"fcm_dpo/delta": -0.40096384286880493,
|
|
"fcm_dpo/margin": 79.48197174072266,
|
|
"fcm_dpo/q_t": 0.2806900441646576,
|
|
"grad_norm": 30.056840896606445,
|
|
"learning_rate": 4.5330506821893565e-07,
|
|
"logits/chosen": -0.4775508940219879,
|
|
"logits/rejected": -0.46088314056396484,
|
|
"logps/chosen": -111.59598541259766,
|
|
"logps/ref_chosen": -68.65887451171875,
|
|
"logps/ref_rejected": -110.1396713256836,
|
|
"logps/rejected": -232.55877685546875,
|
|
"loss": 0.7341,
|
|
"margin_dpo/margin_mean": 79.48197174072266,
|
|
"margin_dpo/margin_std": 80.22380065917969,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.28193832599118945,
|
|
"fcm_dpo/beta": 0.014918088912963867,
|
|
"fcm_dpo/delta": -0.015953052788972855,
|
|
"fcm_dpo/margin": 57.933658599853516,
|
|
"fcm_dpo/q_t": 0.33742040395736694,
|
|
"grad_norm": 35.163917541503906,
|
|
"learning_rate": 4.5255555107119336e-07,
|
|
"logits/chosen": -0.46824973821640015,
|
|
"logits/rejected": -0.458457350730896,
|
|
"logps/chosen": -126.23261260986328,
|
|
"logps/ref_chosen": -69.72691345214844,
|
|
"logps/ref_rejected": -103.32135009765625,
|
|
"logps/rejected": -217.7606964111328,
|
|
"loss": 0.9085,
|
|
"margin_dpo/margin_mean": 57.93366241455078,
|
|
"margin_dpo/margin_std": 71.61734008789062,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.2834067547723935,
|
|
"fcm_dpo/beta": 0.015718581154942513,
|
|
"fcm_dpo/delta": 0.3292880654335022,
|
|
"fcm_dpo/margin": 33.98414611816406,
|
|
"fcm_dpo/q_t": 0.3883090615272522,
|
|
"grad_norm": 56.01692199707031,
|
|
"learning_rate": 4.5180069639630236e-07,
|
|
"logits/chosen": -0.49984651803970337,
|
|
"logits/rejected": -0.4666469395160675,
|
|
"logps/chosen": -117.69091796875,
|
|
"logps/ref_chosen": -60.19049835205078,
|
|
"logps/ref_rejected": -76.40755462646484,
|
|
"logps/rejected": -167.89212036132812,
|
|
"loss": 1.1352,
|
|
"margin_dpo/margin_mean": 33.98414611816406,
|
|
"margin_dpo/margin_std": 63.35324478149414,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.28487518355359764,
|
|
"fcm_dpo/beta": 0.01578504592180252,
|
|
"fcm_dpo/delta": -0.01547658909112215,
|
|
"fcm_dpo/margin": 54.73874282836914,
|
|
"fcm_dpo/q_t": 0.3235979378223419,
|
|
"grad_norm": 30.953447341918945,
|
|
"learning_rate": 4.510405240853854e-07,
|
|
"logits/chosen": -0.4425453841686249,
|
|
"logits/rejected": -0.4208832383155823,
|
|
"logps/chosen": -73.02734375,
|
|
"logps/ref_chosen": -37.84037399291992,
|
|
"logps/ref_rejected": -60.684783935546875,
|
|
"logps/rejected": -150.61050415039062,
|
|
"loss": 0.8382,
|
|
"margin_dpo/margin_mean": 54.73874282836914,
|
|
"margin_dpo/margin_std": 53.040504455566406,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.28634361233480177,
|
|
"fcm_dpo/beta": 0.015666445717215538,
|
|
"fcm_dpo/delta": -0.03931724652647972,
|
|
"fcm_dpo/margin": 56.528350830078125,
|
|
"fcm_dpo/q_t": 0.3216269016265869,
|
|
"grad_norm": 33.33697509765625,
|
|
"learning_rate": 4.5027505416968985e-07,
|
|
"logits/chosen": -0.43572598695755005,
|
|
"logits/rejected": -0.448942631483078,
|
|
"logps/chosen": -111.7900390625,
|
|
"logps/ref_chosen": -54.891571044921875,
|
|
"logps/ref_rejected": -96.77095794677734,
|
|
"logps/rejected": -210.19778442382812,
|
|
"loss": 0.8393,
|
|
"margin_dpo/margin_mean": 56.528350830078125,
|
|
"margin_dpo/margin_std": 57.825653076171875,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2878120411160059,
|
|
"fcm_dpo/beta": 0.015181340277194977,
|
|
"fcm_dpo/delta": -0.17460601031780243,
|
|
"fcm_dpo/margin": 66.21810913085938,
|
|
"fcm_dpo/q_t": 0.3101344108581543,
|
|
"grad_norm": 34.602691650390625,
|
|
"learning_rate": 4.495043068200599e-07,
|
|
"logits/chosen": -0.4735764265060425,
|
|
"logits/rejected": -0.4467168152332306,
|
|
"logps/chosen": -97.20225524902344,
|
|
"logps/ref_chosen": -53.245243072509766,
|
|
"logps/ref_rejected": -76.05294799804688,
|
|
"logps/rejected": -186.2280731201172,
|
|
"loss": 0.8288,
|
|
"margin_dpo/margin_mean": 66.21810913085938,
|
|
"margin_dpo/margin_std": 70.18364715576172,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.28928046989721,
|
|
"fcm_dpo/beta": 0.015351845882833004,
|
|
"fcm_dpo/delta": 0.05590180307626724,
|
|
"fcm_dpo/margin": 51.957035064697266,
|
|
"fcm_dpo/q_t": 0.3400971293449402,
|
|
"grad_norm": 36.760929107666016,
|
|
"learning_rate": 4.4872830234640493e-07,
|
|
"logits/chosen": -0.4230024814605713,
|
|
"logits/rejected": -0.4091572165489197,
|
|
"logps/chosen": -105.07384490966797,
|
|
"logps/ref_chosen": -60.42033386230469,
|
|
"logps/ref_rejected": -77.20890808105469,
|
|
"logps/rejected": -173.8194580078125,
|
|
"loss": 0.8925,
|
|
"margin_dpo/margin_mean": 51.95703125,
|
|
"margin_dpo/margin_std": 57.920127868652344,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.2907488986784141,
|
|
"fcm_dpo/beta": 0.015172794461250305,
|
|
"fcm_dpo/delta": -0.09196484833955765,
|
|
"fcm_dpo/margin": 61.515472412109375,
|
|
"fcm_dpo/q_t": 0.3227071166038513,
|
|
"grad_norm": 36.14360046386719,
|
|
"learning_rate": 4.479470611971645e-07,
|
|
"logits/chosen": -0.47960442304611206,
|
|
"logits/rejected": -0.47948503494262695,
|
|
"logps/chosen": -102.31593322753906,
|
|
"logps/ref_chosen": -55.03618621826172,
|
|
"logps/ref_rejected": -97.24325561523438,
|
|
"logps/rejected": -206.03848266601562,
|
|
"loss": 0.8429,
|
|
"margin_dpo/margin_mean": 61.515472412109375,
|
|
"margin_dpo/margin_std": 70.07635498046875,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.2922173274596182,
|
|
"fcm_dpo/beta": 0.014803415164351463,
|
|
"fcm_dpo/delta": -0.04618511348962784,
|
|
"fcm_dpo/margin": 60.14692687988281,
|
|
"fcm_dpo/q_t": 0.32798993587493896,
|
|
"grad_norm": 36.83704376220703,
|
|
"learning_rate": 4.471606039587695e-07,
|
|
"logits/chosen": -0.45688751339912415,
|
|
"logits/rejected": -0.44097527861595154,
|
|
"logps/chosen": -105.17231750488281,
|
|
"logps/ref_chosen": -56.828826904296875,
|
|
"logps/ref_rejected": -84.64820861816406,
|
|
"logps/rejected": -193.1386260986328,
|
|
"loss": 0.8872,
|
|
"margin_dpo/margin_mean": 60.14692687988281,
|
|
"margin_dpo/margin_std": 69.28252410888672,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"fcm_dpo/beta": 0.014579104259610176,
|
|
"fcm_dpo/delta": -0.10652126371860504,
|
|
"fcm_dpo/margin": 64.84251403808594,
|
|
"fcm_dpo/q_t": 0.3242376148700714,
|
|
"grad_norm": 37.86602783203125,
|
|
"learning_rate": 4.4636895135509966e-07,
|
|
"logits/chosen": -0.4370264708995819,
|
|
"logits/rejected": -0.41041016578674316,
|
|
"logps/chosen": -102.37236022949219,
|
|
"logps/ref_chosen": -53.06706237792969,
|
|
"logps/ref_rejected": -80.60843658447266,
|
|
"logps/rejected": -194.75624084472656,
|
|
"loss": 0.8901,
|
|
"margin_dpo/margin_mean": 64.84251403808594,
|
|
"margin_dpo/margin_std": 79.6728515625,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"eval_fcm_dpo/beta": 0.014577739872038364,
|
|
"eval_logits/chosen": -0.48076131939888,
|
|
"eval_logits/rejected": -0.46049416065216064,
|
|
"eval_logps/chosen": -146.9453125,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -191.740234375,
|
|
"eval_loss": 0.5793219804763794,
|
|
"eval_margin_dpo/margin_mean": 37.048030853271484,
|
|
"eval_margin_dpo/margin_std": 73.52235412597656,
|
|
"eval_runtime": 39.2951,
|
|
"eval_samples_per_second": 59.524,
|
|
"eval_steps_per_second": 1.883,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.29515418502202645,
|
|
"fcm_dpo/beta": 0.014311077073216438,
|
|
"fcm_dpo/delta": -0.11738879978656769,
|
|
"fcm_dpo/margin": 66.80262756347656,
|
|
"fcm_dpo/q_t": 0.31848424673080444,
|
|
"grad_norm": 38.43661880493164,
|
|
"learning_rate": 4.455721242469372e-07,
|
|
"logits/chosen": -0.4631321132183075,
|
|
"logits/rejected": -0.4501439929008484,
|
|
"logps/chosen": -125.53224182128906,
|
|
"logps/ref_chosen": -75.4022216796875,
|
|
"logps/ref_rejected": -114.80821990966797,
|
|
"logps/rejected": -231.74085998535156,
|
|
"loss": 0.8657,
|
|
"margin_dpo/margin_mean": 66.80262756347656,
|
|
"margin_dpo/margin_std": 77.89523315429688,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.2966226138032305,
|
|
"fcm_dpo/beta": 0.014638571999967098,
|
|
"fcm_dpo/delta": 0.17033323645591736,
|
|
"fcm_dpo/margin": 47.20063018798828,
|
|
"fcm_dpo/q_t": 0.3694015145301819,
|
|
"grad_norm": 47.15175247192383,
|
|
"learning_rate": 4.4477014363141755e-07,
|
|
"logits/chosen": -0.41728901863098145,
|
|
"logits/rejected": -0.4195215702056885,
|
|
"logps/chosen": -106.94139099121094,
|
|
"logps/ref_chosen": -50.101318359375,
|
|
"logps/ref_rejected": -86.98503112792969,
|
|
"logps/rejected": -191.0257568359375,
|
|
"loss": 1.0404,
|
|
"margin_dpo/margin_mean": 47.20063400268555,
|
|
"margin_dpo/margin_std": 73.57470703125,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.29809104258443464,
|
|
"fcm_dpo/beta": 0.01473167072981596,
|
|
"fcm_dpo/delta": 0.005034193862229586,
|
|
"fcm_dpo/margin": 57.385345458984375,
|
|
"fcm_dpo/q_t": 0.33291637897491455,
|
|
"grad_norm": 36.741172790527344,
|
|
"learning_rate": 4.439630306414758e-07,
|
|
"logits/chosen": -0.4235112965106964,
|
|
"logits/rejected": -0.4039536118507385,
|
|
"logps/chosen": -114.47615051269531,
|
|
"logps/ref_chosen": -60.60969543457031,
|
|
"logps/ref_rejected": -85.89596557617188,
|
|
"logps/rejected": -197.14776611328125,
|
|
"loss": 0.8768,
|
|
"margin_dpo/margin_mean": 57.385345458984375,
|
|
"margin_dpo/margin_std": 63.92000961303711,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.29955947136563876,
|
|
"fcm_dpo/beta": 0.01482885517179966,
|
|
"fcm_dpo/delta": 0.04627120867371559,
|
|
"fcm_dpo/margin": 54.453792572021484,
|
|
"fcm_dpo/q_t": 0.3538212776184082,
|
|
"grad_norm": 52.60590744018555,
|
|
"learning_rate": 4.431508065452897e-07,
|
|
"logits/chosen": -0.4854148030281067,
|
|
"logits/rejected": -0.44390296936035156,
|
|
"logps/chosen": -144.6455078125,
|
|
"logps/ref_chosen": -80.16496276855469,
|
|
"logps/ref_rejected": -87.69590759277344,
|
|
"logps/rejected": -206.6302490234375,
|
|
"loss": 0.9524,
|
|
"margin_dpo/margin_mean": 54.453792572021484,
|
|
"margin_dpo/margin_std": 75.84136199951172,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.3010279001468429,
|
|
"fcm_dpo/beta": 0.014350036159157753,
|
|
"fcm_dpo/delta": -0.1424178183078766,
|
|
"fcm_dpo/margin": 67.729736328125,
|
|
"fcm_dpo/q_t": 0.3128374218940735,
|
|
"grad_norm": 36.14482116699219,
|
|
"learning_rate": 4.4233349274571974e-07,
|
|
"logits/chosen": -0.37768441438674927,
|
|
"logits/rejected": -0.33807796239852905,
|
|
"logps/chosen": -124.49932861328125,
|
|
"logps/ref_chosen": -59.384735107421875,
|
|
"logps/ref_rejected": -85.12505340576172,
|
|
"logps/rejected": -217.96939086914062,
|
|
"loss": 0.8763,
|
|
"margin_dpo/margin_mean": 67.729736328125,
|
|
"margin_dpo/margin_std": 74.6842269897461,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.302496328928047,
|
|
"fcm_dpo/beta": 0.013974905014038086,
|
|
"fcm_dpo/delta": -0.1962062567472458,
|
|
"fcm_dpo/margin": 73.30876159667969,
|
|
"fcm_dpo/q_t": 0.3037898540496826,
|
|
"grad_norm": 53.73223876953125,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": -0.370413213968277,
|
|
"logits/rejected": -0.37365084886550903,
|
|
"logps/chosen": -109.06692504882812,
|
|
"logps/ref_chosen": -46.964500427246094,
|
|
"logps/ref_rejected": -98.9534912109375,
|
|
"logps/rejected": -234.36468505859375,
|
|
"loss": 0.8314,
|
|
"margin_dpo/margin_mean": 73.30876159667969,
|
|
"margin_dpo/margin_std": 78.49857330322266,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3039647577092511,
|
|
"fcm_dpo/beta": 0.013306405395269394,
|
|
"fcm_dpo/delta": -0.31155675649642944,
|
|
"fcm_dpo/margin": 84.67915344238281,
|
|
"fcm_dpo/q_t": 0.2934865355491638,
|
|
"grad_norm": 29.86716651916504,
|
|
"learning_rate": 4.4068368231789365e-07,
|
|
"logits/chosen": -0.4337068796157837,
|
|
"logits/rejected": -0.39963075518608093,
|
|
"logps/chosen": -102.7784194946289,
|
|
"logps/ref_chosen": -56.05625915527344,
|
|
"logps/ref_rejected": -84.44779968261719,
|
|
"logps/rejected": -215.84910583496094,
|
|
"loss": 0.7677,
|
|
"margin_dpo/margin_mean": 84.67914581298828,
|
|
"margin_dpo/margin_std": 87.53607177734375,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3054331864904552,
|
|
"fcm_dpo/beta": 0.01302929688245058,
|
|
"fcm_dpo/delta": -0.000983063131570816,
|
|
"fcm_dpo/margin": 65.29347229003906,
|
|
"fcm_dpo/q_t": 0.33387383818626404,
|
|
"grad_norm": 40.39805221557617,
|
|
"learning_rate": 4.398512291636768e-07,
|
|
"logits/chosen": -0.5328778028488159,
|
|
"logits/rejected": -0.5141773819923401,
|
|
"logps/chosen": -137.27557373046875,
|
|
"logps/ref_chosen": -67.06761169433594,
|
|
"logps/ref_rejected": -94.28689575195312,
|
|
"logps/rejected": -229.78834533691406,
|
|
"loss": 0.9107,
|
|
"margin_dpo/margin_mean": 65.29347229003906,
|
|
"margin_dpo/margin_std": 79.34541320800781,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.3069016152716593,
|
|
"fcm_dpo/beta": 0.013248629868030548,
|
|
"fcm_dpo/delta": 0.10473860800266266,
|
|
"fcm_dpo/margin": 56.855796813964844,
|
|
"fcm_dpo/q_t": 0.3515823483467102,
|
|
"grad_norm": 38.002723693847656,
|
|
"learning_rate": 4.3901377325300857e-07,
|
|
"logits/chosen": -0.43297621607780457,
|
|
"logits/rejected": -0.41614365577697754,
|
|
"logps/chosen": -114.31430053710938,
|
|
"logps/ref_chosen": -56.18169403076172,
|
|
"logps/ref_rejected": -80.94152069091797,
|
|
"logps/rejected": -195.929931640625,
|
|
"loss": 0.9608,
|
|
"margin_dpo/margin_mean": 56.85579299926758,
|
|
"margin_dpo/margin_std": 72.0236587524414,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.30837004405286345,
|
|
"fcm_dpo/beta": 0.013421540148556232,
|
|
"fcm_dpo/delta": 0.023709196597337723,
|
|
"fcm_dpo/margin": 61.6995735168457,
|
|
"fcm_dpo/q_t": 0.3359639644622803,
|
|
"grad_norm": 31.023426055908203,
|
|
"learning_rate": 4.381713366536311e-07,
|
|
"logits/chosen": -0.4762294888496399,
|
|
"logits/rejected": -0.4587492346763611,
|
|
"logps/chosen": -98.13655090332031,
|
|
"logps/ref_chosen": -46.371822357177734,
|
|
"logps/ref_rejected": -76.68162536621094,
|
|
"logps/rejected": -190.1459197998047,
|
|
"loss": 0.8813,
|
|
"margin_dpo/margin_mean": 61.69956970214844,
|
|
"margin_dpo/margin_std": 67.59247589111328,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.30983847283406757,
|
|
"fcm_dpo/beta": 0.013691660948097706,
|
|
"fcm_dpo/delta": 0.142348051071167,
|
|
"fcm_dpo/margin": 52.43247985839844,
|
|
"fcm_dpo/q_t": 0.36083507537841797,
|
|
"grad_norm": 44.8113899230957,
|
|
"learning_rate": 4.373239415645323e-07,
|
|
"logits/chosen": -0.49992769956588745,
|
|
"logits/rejected": -0.4475579857826233,
|
|
"logps/chosen": -151.08291625976562,
|
|
"logps/ref_chosen": -78.93235778808594,
|
|
"logps/ref_rejected": -86.82098388671875,
|
|
"logps/rejected": -211.40402221679688,
|
|
"loss": 0.9757,
|
|
"margin_dpo/margin_mean": 52.432472229003906,
|
|
"margin_dpo/margin_std": 69.52543640136719,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.31130690161527164,
|
|
"fcm_dpo/beta": 0.0132517721503973,
|
|
"fcm_dpo/delta": -0.173972487449646,
|
|
"fcm_dpo/margin": 75.48409271240234,
|
|
"fcm_dpo/q_t": 0.31110864877700806,
|
|
"grad_norm": 29.362991333007812,
|
|
"learning_rate": 4.3647161031536086e-07,
|
|
"logits/chosen": -0.5245535373687744,
|
|
"logits/rejected": -0.51213139295578,
|
|
"logps/chosen": -115.64869689941406,
|
|
"logps/ref_chosen": -58.19701385498047,
|
|
"logps/ref_rejected": -103.05785369873047,
|
|
"logps/rejected": -235.99363708496094,
|
|
"loss": 0.8399,
|
|
"margin_dpo/margin_mean": 75.48409271240234,
|
|
"margin_dpo/margin_std": 78.64497375488281,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.31277533039647576,
|
|
"fcm_dpo/beta": 0.012959420680999756,
|
|
"fcm_dpo/delta": -0.1306145340204239,
|
|
"fcm_dpo/margin": 74.51495361328125,
|
|
"fcm_dpo/q_t": 0.3106541037559509,
|
|
"grad_norm": 33.29969787597656,
|
|
"learning_rate": 4.3561436536583774e-07,
|
|
"logits/chosen": -0.49456068873405457,
|
|
"logits/rejected": -0.4497288167476654,
|
|
"logps/chosen": -117.433349609375,
|
|
"logps/ref_chosen": -67.51271057128906,
|
|
"logps/ref_rejected": -93.91471862792969,
|
|
"logps/rejected": -218.35031127929688,
|
|
"loss": 0.8229,
|
|
"margin_dpo/margin_mean": 74.51496124267578,
|
|
"margin_dpo/margin_std": 75.42293548583984,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3142437591776799,
|
|
"fcm_dpo/beta": 0.013114094734191895,
|
|
"fcm_dpo/delta": 0.05126992613077164,
|
|
"fcm_dpo/margin": 61.19904327392578,
|
|
"fcm_dpo/q_t": 0.3447904884815216,
|
|
"grad_norm": 33.585716247558594,
|
|
"learning_rate": 4.3475222930516473e-07,
|
|
"logits/chosen": -0.4499906897544861,
|
|
"logits/rejected": -0.4545586109161377,
|
|
"logps/chosen": -91.10122680664062,
|
|
"logps/ref_chosen": -41.604888916015625,
|
|
"logps/ref_rejected": -77.51741027832031,
|
|
"logps/rejected": -188.21279907226562,
|
|
"loss": 0.9278,
|
|
"margin_dpo/margin_mean": 61.19904327392578,
|
|
"margin_dpo/margin_std": 76.41812896728516,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.315712187958884,
|
|
"fcm_dpo/beta": 0.013036966323852539,
|
|
"fcm_dpo/delta": -0.00907570868730545,
|
|
"fcm_dpo/margin": 65.80323791503906,
|
|
"fcm_dpo/q_t": 0.3300132751464844,
|
|
"grad_norm": 35.7015380859375,
|
|
"learning_rate": 4.3388522485142885e-07,
|
|
"logits/chosen": -0.5079599022865295,
|
|
"logits/rejected": -0.49217790365219116,
|
|
"logps/chosen": -107.33973693847656,
|
|
"logps/ref_chosen": -53.279266357421875,
|
|
"logps/ref_rejected": -89.96464538574219,
|
|
"logps/rejected": -209.82835388183594,
|
|
"loss": 0.8646,
|
|
"margin_dpo/margin_mean": 65.80323791503906,
|
|
"margin_dpo/margin_std": 71.56494140625,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.31718061674008813,
|
|
"fcm_dpo/beta": 0.013059217482805252,
|
|
"fcm_dpo/delta": -0.06660346686840057,
|
|
"fcm_dpo/margin": 69.67720794677734,
|
|
"fcm_dpo/q_t": 0.32552677392959595,
|
|
"grad_norm": 29.45123863220215,
|
|
"learning_rate": 4.330133748510036e-07,
|
|
"logits/chosen": -0.4530643820762634,
|
|
"logits/rejected": -0.4277815818786621,
|
|
"logps/chosen": -105.15579223632812,
|
|
"logps/ref_chosen": -48.887794494628906,
|
|
"logps/ref_rejected": -77.19892883300781,
|
|
"logps/rejected": -203.14413452148438,
|
|
"loss": 0.8853,
|
|
"margin_dpo/margin_mean": 69.67721557617188,
|
|
"margin_dpo/margin_std": 79.81889343261719,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.3186490455212922,
|
|
"fcm_dpo/beta": 0.012638933025300503,
|
|
"fcm_dpo/delta": -0.13730451464653015,
|
|
"fcm_dpo/margin": 77.040771484375,
|
|
"fcm_dpo/q_t": 0.30401867628097534,
|
|
"grad_norm": 30.868886947631836,
|
|
"learning_rate": 4.3213670227794757e-07,
|
|
"logits/chosen": -0.49128514528274536,
|
|
"logits/rejected": -0.47628241777420044,
|
|
"logps/chosen": -102.47686767578125,
|
|
"logps/ref_chosen": -49.845306396484375,
|
|
"logps/ref_rejected": -100.07832336425781,
|
|
"logps/rejected": -229.75064086914062,
|
|
"loss": 0.7844,
|
|
"margin_dpo/margin_mean": 77.040771484375,
|
|
"margin_dpo/margin_std": 70.88652801513672,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3201174743024963,
|
|
"fcm_dpo/beta": 0.012668648734688759,
|
|
"fcm_dpo/delta": 0.0585474818944931,
|
|
"fcm_dpo/margin": 62.8431282043457,
|
|
"fcm_dpo/q_t": 0.3426221013069153,
|
|
"grad_norm": 26.22398567199707,
|
|
"learning_rate": 4.3125523023339815e-07,
|
|
"logits/chosen": -0.49315348267555237,
|
|
"logits/rejected": -0.47417598962783813,
|
|
"logps/chosen": -115.05643463134766,
|
|
"logps/ref_chosen": -58.576683044433594,
|
|
"logps/ref_rejected": -87.84639739990234,
|
|
"logps/rejected": -207.16928100585938,
|
|
"loss": 0.9079,
|
|
"margin_dpo/margin_mean": 62.84312438964844,
|
|
"margin_dpo/margin_std": 73.27041625976562,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.32158590308370044,
|
|
"fcm_dpo/beta": 0.01306487713009119,
|
|
"fcm_dpo/delta": 0.1448213905096054,
|
|
"fcm_dpo/margin": 54.6587028503418,
|
|
"fcm_dpo/q_t": 0.3576211929321289,
|
|
"grad_norm": 34.506874084472656,
|
|
"learning_rate": 4.303689819449636e-07,
|
|
"logits/chosen": -0.4658815264701843,
|
|
"logits/rejected": -0.44217032194137573,
|
|
"logps/chosen": -120.91069030761719,
|
|
"logps/ref_chosen": -61.083858489990234,
|
|
"logps/ref_rejected": -85.83042907714844,
|
|
"logps/rejected": -200.3159637451172,
|
|
"loss": 0.9701,
|
|
"margin_dpo/margin_mean": 54.6587028503418,
|
|
"margin_dpo/margin_std": 71.40689849853516,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.32305433186490456,
|
|
"fcm_dpo/beta": 0.013401372358202934,
|
|
"fcm_dpo/delta": 0.20168578624725342,
|
|
"fcm_dpo/margin": 49.403968811035156,
|
|
"fcm_dpo/q_t": 0.3585790991783142,
|
|
"grad_norm": 34.5196418762207,
|
|
"learning_rate": 4.2947798076611047e-07,
|
|
"logits/chosen": -0.4772907495498657,
|
|
"logits/rejected": -0.44704103469848633,
|
|
"logps/chosen": -136.77674865722656,
|
|
"logps/ref_chosen": -70.03128051757812,
|
|
"logps/ref_rejected": -87.68551635742188,
|
|
"logps/rejected": -203.83493041992188,
|
|
"loss": 0.9412,
|
|
"margin_dpo/margin_mean": 49.403968811035156,
|
|
"margin_dpo/margin_std": 51.8128662109375,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3245227606461087,
|
|
"fcm_dpo/beta": 0.01300697959959507,
|
|
"fcm_dpo/delta": -0.36966100335121155,
|
|
"fcm_dpo/margin": 90.53009033203125,
|
|
"fcm_dpo/q_t": 0.2712687849998474,
|
|
"grad_norm": 39.37263107299805,
|
|
"learning_rate": 4.285822501755485e-07,
|
|
"logits/chosen": -0.41888752579689026,
|
|
"logits/rejected": -0.42424139380455017,
|
|
"logps/chosen": -122.67888641357422,
|
|
"logps/ref_chosen": -52.15470886230469,
|
|
"logps/ref_rejected": -106.46768188476562,
|
|
"logps/rejected": -267.52197265625,
|
|
"loss": 0.7145,
|
|
"margin_dpo/margin_mean": 90.53008270263672,
|
|
"margin_dpo/margin_std": 76.58544921875,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.32599118942731276,
|
|
"fcm_dpo/beta": 0.012651478871703148,
|
|
"fcm_dpo/delta": -0.035454705357551575,
|
|
"fcm_dpo/margin": 69.71733856201172,
|
|
"fcm_dpo/q_t": 0.3251156210899353,
|
|
"grad_norm": 42.33450698852539,
|
|
"learning_rate": 4.276818137766118e-07,
|
|
"logits/chosen": -0.4270946979522705,
|
|
"logits/rejected": -0.40993452072143555,
|
|
"logps/chosen": -134.14321899414062,
|
|
"logps/ref_chosen": -60.971099853515625,
|
|
"logps/ref_rejected": -100.00115203857422,
|
|
"logps/rejected": -242.89060974121094,
|
|
"loss": 0.8663,
|
|
"margin_dpo/margin_mean": 69.71733856201172,
|
|
"margin_dpo/margin_std": 75.56416320800781,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3274596182085169,
|
|
"fcm_dpo/beta": 0.01269703172147274,
|
|
"fcm_dpo/delta": 0.07957568764686584,
|
|
"fcm_dpo/margin": 61.16942596435547,
|
|
"fcm_dpo/q_t": 0.34925732016563416,
|
|
"grad_norm": 45.7380485534668,
|
|
"learning_rate": 4.2677669529663686e-07,
|
|
"logits/chosen": -0.3831120729446411,
|
|
"logits/rejected": -0.361503005027771,
|
|
"logps/chosen": -134.04067993164062,
|
|
"logps/ref_chosen": -52.64057540893555,
|
|
"logps/ref_rejected": -82.82502746582031,
|
|
"logps/rejected": -225.39456176757812,
|
|
"loss": 1.0058,
|
|
"margin_dpo/margin_mean": 61.16942596435547,
|
|
"margin_dpo/margin_std": 87.82357025146484,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.328928046989721,
|
|
"fcm_dpo/beta": 0.012370316311717033,
|
|
"fcm_dpo/delta": -0.12362442910671234,
|
|
"fcm_dpo/margin": 77.3526611328125,
|
|
"fcm_dpo/q_t": 0.3199881315231323,
|
|
"grad_norm": 43.84239196777344,
|
|
"learning_rate": 4.2586691858633747e-07,
|
|
"logits/chosen": -0.4017111659049988,
|
|
"logits/rejected": -0.3758423328399658,
|
|
"logps/chosen": -117.4913330078125,
|
|
"logps/ref_chosen": -48.59541320800781,
|
|
"logps/ref_rejected": -77.11648559570312,
|
|
"logps/rejected": -223.36508178710938,
|
|
"loss": 0.863,
|
|
"margin_dpo/margin_mean": 77.3526611328125,
|
|
"margin_dpo/margin_std": 87.10986328125,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3303964757709251,
|
|
"fcm_dpo/beta": 0.011899469420313835,
|
|
"fcm_dpo/delta": -0.2886032462120056,
|
|
"fcm_dpo/margin": 92.91471862792969,
|
|
"fcm_dpo/q_t": 0.29158496856689453,
|
|
"grad_norm": 33.034423828125,
|
|
"learning_rate": 4.249525076191759e-07,
|
|
"logits/chosen": -0.39564913511276245,
|
|
"logits/rejected": -0.3767988085746765,
|
|
"logps/chosen": -139.4900665283203,
|
|
"logps/ref_chosen": -58.000465393066406,
|
|
"logps/ref_rejected": -99.90291595458984,
|
|
"logps/rejected": -274.3072204589844,
|
|
"loss": 0.784,
|
|
"margin_dpo/margin_mean": 92.91471862792969,
|
|
"margin_dpo/margin_std": 94.30024719238281,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.33186490455212925,
|
|
"fcm_dpo/beta": 0.01162062305957079,
|
|
"fcm_dpo/delta": 0.011304348707199097,
|
|
"fcm_dpo/margin": 71.96773529052734,
|
|
"fcm_dpo/q_t": 0.34078845381736755,
|
|
"grad_norm": 36.67145538330078,
|
|
"learning_rate": 4.2403348649073167e-07,
|
|
"logits/chosen": -0.4350174069404602,
|
|
"logits/rejected": -0.3871588110923767,
|
|
"logps/chosen": -128.597900390625,
|
|
"logps/ref_chosen": -58.898799896240234,
|
|
"logps/ref_rejected": -78.68775939941406,
|
|
"logps/rejected": -220.35458374023438,
|
|
"loss": 0.9178,
|
|
"margin_dpo/margin_mean": 71.96773529052734,
|
|
"margin_dpo/margin_std": 87.11494445800781,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"fcm_dpo/beta": 0.011324774473905563,
|
|
"fcm_dpo/delta": -0.20764365792274475,
|
|
"fcm_dpo/margin": 91.20738983154297,
|
|
"fcm_dpo/q_t": 0.2983647286891937,
|
|
"grad_norm": 33.768497467041016,
|
|
"learning_rate": 4.2310987941806615e-07,
|
|
"logits/chosen": -0.3924615979194641,
|
|
"logits/rejected": -0.37529265880584717,
|
|
"logps/chosen": -140.55740356445312,
|
|
"logps/ref_chosen": -59.072181701660156,
|
|
"logps/ref_rejected": -99.41236877441406,
|
|
"logps/rejected": -272.10498046875,
|
|
"loss": 0.8177,
|
|
"margin_dpo/margin_mean": 91.20738220214844,
|
|
"margin_dpo/margin_std": 95.2232666015625,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.33480176211453744,
|
|
"fcm_dpo/beta": 0.011514011770486832,
|
|
"fcm_dpo/delta": 0.1022706851363182,
|
|
"fcm_dpo/margin": 65.56757354736328,
|
|
"fcm_dpo/q_t": 0.349528431892395,
|
|
"grad_norm": 39.931148529052734,
|
|
"learning_rate": 4.2218171073908463e-07,
|
|
"logits/chosen": -0.3640768527984619,
|
|
"logits/rejected": -0.340065598487854,
|
|
"logps/chosen": -146.19509887695312,
|
|
"logps/ref_chosen": -65.89128875732422,
|
|
"logps/ref_rejected": -91.04875183105469,
|
|
"logps/rejected": -236.92013549804688,
|
|
"loss": 0.9489,
|
|
"margin_dpo/margin_mean": 65.56758117675781,
|
|
"margin_dpo/margin_std": 82.21015930175781,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.33627019089574156,
|
|
"fcm_dpo/beta": 0.011491687968373299,
|
|
"fcm_dpo/delta": -0.03256909176707268,
|
|
"fcm_dpo/margin": 76.55232238769531,
|
|
"fcm_dpo/q_t": 0.3270722031593323,
|
|
"grad_norm": 46.43395233154297,
|
|
"learning_rate": 4.212490049118951e-07,
|
|
"logits/chosen": -0.4288903772830963,
|
|
"logits/rejected": -0.3912368714809418,
|
|
"logps/chosen": -151.40249633789062,
|
|
"logps/ref_chosen": -70.70637512207031,
|
|
"logps/ref_rejected": -84.52741241455078,
|
|
"logps/rejected": -241.77587890625,
|
|
"loss": 0.8898,
|
|
"margin_dpo/margin_mean": 76.55232238769531,
|
|
"margin_dpo/margin_std": 88.84889221191406,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3377386196769457,
|
|
"fcm_dpo/beta": 0.010963954031467438,
|
|
"fcm_dpo/delta": -0.26099899411201477,
|
|
"fcm_dpo/margin": 98.563720703125,
|
|
"fcm_dpo/q_t": 0.28947991132736206,
|
|
"grad_norm": 47.571136474609375,
|
|
"learning_rate": 4.203117865141635e-07,
|
|
"logits/chosen": -0.33261680603027344,
|
|
"logits/rejected": -0.33441460132598877,
|
|
"logps/chosen": -108.84280395507812,
|
|
"logps/ref_chosen": -39.282005310058594,
|
|
"logps/ref_rejected": -85.62191009521484,
|
|
"logps/rejected": -253.74642944335938,
|
|
"loss": 0.7737,
|
|
"margin_dpo/margin_mean": 98.563720703125,
|
|
"margin_dpo/margin_std": 94.94647216796875,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3392070484581498,
|
|
"fcm_dpo/beta": 0.010946284979581833,
|
|
"fcm_dpo/delta": 0.0592944361269474,
|
|
"fcm_dpo/margin": 72.66645812988281,
|
|
"fcm_dpo/q_t": 0.3454594314098358,
|
|
"grad_norm": 39.91952896118164,
|
|
"learning_rate": 4.1937008024246625e-07,
|
|
"logits/chosen": -0.3895813822746277,
|
|
"logits/rejected": -0.3568507432937622,
|
|
"logps/chosen": -135.733642578125,
|
|
"logps/ref_chosen": -63.27644348144531,
|
|
"logps/ref_rejected": -74.1239013671875,
|
|
"logps/rejected": -219.24757385253906,
|
|
"loss": 0.9076,
|
|
"margin_dpo/margin_mean": 72.66645812988281,
|
|
"margin_dpo/margin_std": 87.05967712402344,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3406754772393539,
|
|
"fcm_dpo/beta": 0.011238744482398033,
|
|
"fcm_dpo/delta": 0.17818310856819153,
|
|
"fcm_dpo/margin": 60.888694763183594,
|
|
"fcm_dpo/q_t": 0.3727785050868988,
|
|
"grad_norm": 45.982215881347656,
|
|
"learning_rate": 4.1842391091163933e-07,
|
|
"logits/chosen": -0.40042543411254883,
|
|
"logits/rejected": -0.3696829676628113,
|
|
"logps/chosen": -156.334228515625,
|
|
"logps/ref_chosen": -70.74876403808594,
|
|
"logps/ref_rejected": -83.97706604003906,
|
|
"logps/rejected": -230.45123291015625,
|
|
"loss": 1.0139,
|
|
"margin_dpo/margin_mean": 60.888694763183594,
|
|
"margin_dpo/margin_std": 91.31798553466797,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.342143906020558,
|
|
"fcm_dpo/beta": 0.011158278211951256,
|
|
"fcm_dpo/delta": -0.15974467992782593,
|
|
"fcm_dpo/margin": 89.08323669433594,
|
|
"fcm_dpo/q_t": 0.3230384588241577,
|
|
"grad_norm": 36.23651885986328,
|
|
"learning_rate": 4.174733034541245e-07,
|
|
"logits/chosen": -0.3902140259742737,
|
|
"logits/rejected": -0.3841424584388733,
|
|
"logps/chosen": -128.5865478515625,
|
|
"logps/ref_chosen": -54.8829345703125,
|
|
"logps/ref_rejected": -107.4800796508789,
|
|
"logps/rejected": -270.26690673828125,
|
|
"loss": 0.8862,
|
|
"margin_dpo/margin_mean": 89.08323669433594,
|
|
"margin_dpo/margin_std": 109.74813079833984,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.3436123348017621,
|
|
"fcm_dpo/beta": 0.010733449831604958,
|
|
"fcm_dpo/delta": -0.12230968475341797,
|
|
"fcm_dpo/margin": 89.16376495361328,
|
|
"fcm_dpo/q_t": 0.3087769150733948,
|
|
"grad_norm": 52.80620193481445,
|
|
"learning_rate": 4.165182829193126e-07,
|
|
"logits/chosen": -0.3361210823059082,
|
|
"logits/rejected": -0.36225271224975586,
|
|
"logps/chosen": -117.68397521972656,
|
|
"logps/ref_chosen": -44.094520568847656,
|
|
"logps/ref_rejected": -100.00663757324219,
|
|
"logps/rejected": -262.7598571777344,
|
|
"loss": 0.8077,
|
|
"margin_dpo/margin_mean": 89.16377258300781,
|
|
"margin_dpo/margin_std": 85.72807312011719,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.34508076358296624,
|
|
"fcm_dpo/beta": 0.011142631061375141,
|
|
"fcm_dpo/delta": 0.19422681629657745,
|
|
"fcm_dpo/margin": 59.925628662109375,
|
|
"fcm_dpo/q_t": 0.366345077753067,
|
|
"grad_norm": 39.07419204711914,
|
|
"learning_rate": 4.1555887447288255e-07,
|
|
"logits/chosen": -0.3953653573989868,
|
|
"logits/rejected": -0.3707156777381897,
|
|
"logps/chosen": -151.11203002929688,
|
|
"logps/ref_chosen": -62.237911224365234,
|
|
"logps/ref_rejected": -90.39506530761719,
|
|
"logps/rejected": -239.19482421875,
|
|
"loss": 0.9945,
|
|
"margin_dpo/margin_mean": 59.925628662109375,
|
|
"margin_dpo/margin_std": 81.4887466430664,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.3465491923641703,
|
|
"fcm_dpo/beta": 0.011004526168107986,
|
|
"fcm_dpo/delta": -0.1253395676612854,
|
|
"fcm_dpo/margin": 87.53258514404297,
|
|
"fcm_dpo/q_t": 0.3034972548484802,
|
|
"grad_norm": 45.433135986328125,
|
|
"learning_rate": 4.1459510339613946e-07,
|
|
"logits/chosen": -0.36071985960006714,
|
|
"logits/rejected": -0.3635398745536804,
|
|
"logps/chosen": -114.3785400390625,
|
|
"logps/ref_chosen": -49.34136199951172,
|
|
"logps/ref_rejected": -103.51162719726562,
|
|
"logps/rejected": -256.0813903808594,
|
|
"loss": 0.7776,
|
|
"margin_dpo/margin_mean": 87.5325927734375,
|
|
"margin_dpo/margin_std": 76.69570922851562,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.34801762114537443,
|
|
"fcm_dpo/beta": 0.010965939611196518,
|
|
"fcm_dpo/delta": 0.021763239055871964,
|
|
"fcm_dpo/margin": 75.69317626953125,
|
|
"fcm_dpo/q_t": 0.3360444903373718,
|
|
"grad_norm": 35.225894927978516,
|
|
"learning_rate": 4.136269950853473e-07,
|
|
"logits/chosen": -0.3720950484275818,
|
|
"logits/rejected": -0.36715224385261536,
|
|
"logps/chosen": -138.06900024414062,
|
|
"logps/ref_chosen": -54.168121337890625,
|
|
"logps/ref_rejected": -94.78036499023438,
|
|
"logps/rejected": -254.3744354248047,
|
|
"loss": 0.9023,
|
|
"margin_dpo/margin_mean": 75.69317626953125,
|
|
"margin_dpo/margin_std": 88.33975219726562,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.34948604992657856,
|
|
"fcm_dpo/beta": 0.011054374277591705,
|
|
"fcm_dpo/delta": 0.08588720113039017,
|
|
"fcm_dpo/margin": 69.70079040527344,
|
|
"fcm_dpo/q_t": 0.34868574142456055,
|
|
"grad_norm": 44.55629348754883,
|
|
"learning_rate": 4.126545750510605e-07,
|
|
"logits/chosen": -0.36067309975624084,
|
|
"logits/rejected": -0.3768489360809326,
|
|
"logps/chosen": -133.702880859375,
|
|
"logps/ref_chosen": -53.973121643066406,
|
|
"logps/ref_rejected": -89.41795349121094,
|
|
"logps/rejected": -238.84848022460938,
|
|
"loss": 0.917,
|
|
"margin_dpo/margin_mean": 69.70079040527344,
|
|
"margin_dpo/margin_std": 83.06000518798828,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.3509544787077827,
|
|
"fcm_dpo/beta": 0.010967254638671875,
|
|
"fcm_dpo/delta": -0.034537769854068756,
|
|
"fcm_dpo/margin": 80.02545166015625,
|
|
"fcm_dpo/q_t": 0.32616370916366577,
|
|
"grad_norm": 34.387508392333984,
|
|
"learning_rate": 4.116778689174514e-07,
|
|
"logits/chosen": -0.34563374519348145,
|
|
"logits/rejected": -0.3208710551261902,
|
|
"logps/chosen": -145.20004272460938,
|
|
"logps/ref_chosen": -58.09782409667969,
|
|
"logps/ref_rejected": -93.59294128417969,
|
|
"logps/rejected": -260.7206115722656,
|
|
"loss": 0.8603,
|
|
"margin_dpo/margin_mean": 80.02545166015625,
|
|
"margin_dpo/margin_std": 81.53392028808594,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.3524229074889868,
|
|
"fcm_dpo/beta": 0.011072592809796333,
|
|
"fcm_dpo/delta": 0.026514161378145218,
|
|
"fcm_dpo/margin": 74.52180480957031,
|
|
"fcm_dpo/q_t": 0.3355349600315094,
|
|
"grad_norm": 43.181861877441406,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": -0.39122214913368225,
|
|
"logits/rejected": -0.3691304326057434,
|
|
"logps/chosen": -150.86891174316406,
|
|
"logps/ref_chosen": -60.6144905090332,
|
|
"logps/ref_rejected": -74.1185302734375,
|
|
"logps/rejected": -238.894775390625,
|
|
"loss": 0.9114,
|
|
"margin_dpo/margin_mean": 74.52180480957031,
|
|
"margin_dpo/margin_std": 85.87225341796875,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.35389133627019087,
|
|
"fcm_dpo/beta": 0.010732254013419151,
|
|
"fcm_dpo/delta": -0.28711360692977905,
|
|
"fcm_dpo/margin": 103.05683898925781,
|
|
"fcm_dpo/q_t": 0.2914666533470154,
|
|
"grad_norm": 35.48265838623047,
|
|
"learning_rate": 4.097117014129903e-07,
|
|
"logits/chosen": -0.4265785217285156,
|
|
"logits/rejected": -0.39695611596107483,
|
|
"logps/chosen": -144.91229248046875,
|
|
"logps/ref_chosen": -66.091064453125,
|
|
"logps/ref_rejected": -88.06088256835938,
|
|
"logps/rejected": -269.93896484375,
|
|
"loss": 0.7766,
|
|
"margin_dpo/margin_mean": 103.05683898925781,
|
|
"margin_dpo/margin_std": 102.60064697265625,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.355359765051395,
|
|
"fcm_dpo/beta": 0.01074141077697277,
|
|
"fcm_dpo/delta": 0.087952621281147,
|
|
"fcm_dpo/margin": 71.43954467773438,
|
|
"fcm_dpo/q_t": 0.34943699836730957,
|
|
"grad_norm": 43.884765625,
|
|
"learning_rate": 4.087222918524807e-07,
|
|
"logits/chosen": -0.31767386198043823,
|
|
"logits/rejected": -0.2909330725669861,
|
|
"logps/chosen": -160.47384643554688,
|
|
"logps/ref_chosen": -67.86392974853516,
|
|
"logps/ref_rejected": -83.36033630371094,
|
|
"logps/rejected": -247.4097900390625,
|
|
"loss": 0.9196,
|
|
"margin_dpo/margin_mean": 71.43954467773438,
|
|
"margin_dpo/margin_std": 86.29911804199219,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3568281938325991,
|
|
"fcm_dpo/beta": 0.01043357141315937,
|
|
"fcm_dpo/delta": -0.16880205273628235,
|
|
"fcm_dpo/margin": 95.98949432373047,
|
|
"fcm_dpo/q_t": 0.3014954924583435,
|
|
"grad_norm": 38.88534927368164,
|
|
"learning_rate": 4.07728699811968e-07,
|
|
"logits/chosen": -0.33565038442611694,
|
|
"logits/rejected": -0.28926849365234375,
|
|
"logps/chosen": -157.85023498535156,
|
|
"logps/ref_chosen": -63.0842399597168,
|
|
"logps/ref_rejected": -76.33563232421875,
|
|
"logps/rejected": -267.09112548828125,
|
|
"loss": 0.7836,
|
|
"margin_dpo/margin_mean": 95.98949432373047,
|
|
"margin_dpo/margin_std": 91.47665405273438,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.35829662261380324,
|
|
"fcm_dpo/beta": 0.010181013494729996,
|
|
"fcm_dpo/delta": -0.10228382050991058,
|
|
"fcm_dpo/margin": 92.55367279052734,
|
|
"fcm_dpo/q_t": 0.30694684386253357,
|
|
"grad_norm": 43.90488052368164,
|
|
"learning_rate": 4.067309514735267e-07,
|
|
"logits/chosen": -0.39476436376571655,
|
|
"logits/rejected": -0.3839525282382965,
|
|
"logps/chosen": -143.63925170898438,
|
|
"logps/ref_chosen": -61.140689849853516,
|
|
"logps/ref_rejected": -94.89193725585938,
|
|
"logps/rejected": -269.9441833496094,
|
|
"loss": 0.7917,
|
|
"margin_dpo/margin_mean": 92.55368041992188,
|
|
"margin_dpo/margin_std": 81.99246215820312,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.35976505139500736,
|
|
"fcm_dpo/beta": 0.010019933804869652,
|
|
"fcm_dpo/delta": 0.03025764226913452,
|
|
"fcm_dpo/margin": 81.50281524658203,
|
|
"fcm_dpo/q_t": 0.34738224744796753,
|
|
"grad_norm": 61.226661682128906,
|
|
"learning_rate": 4.057290731287531e-07,
|
|
"logits/chosen": -0.40170228481292725,
|
|
"logits/rejected": -0.3602461814880371,
|
|
"logps/chosen": -163.54153442382812,
|
|
"logps/ref_chosen": -67.26228332519531,
|
|
"logps/ref_rejected": -87.64010620117188,
|
|
"logps/rejected": -265.4221496582031,
|
|
"loss": 0.947,
|
|
"margin_dpo/margin_mean": 81.50281524658203,
|
|
"margin_dpo/margin_std": 102.8226318359375,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.36123348017621143,
|
|
"fcm_dpo/beta": 0.010238472372293472,
|
|
"fcm_dpo/delta": 0.03806059807538986,
|
|
"fcm_dpo/margin": 79.59729766845703,
|
|
"fcm_dpo/q_t": 0.3448247015476227,
|
|
"grad_norm": 45.083065032958984,
|
|
"learning_rate": 4.047230911780736e-07,
|
|
"logits/chosen": -0.4234234392642975,
|
|
"logits/rejected": -0.38395100831985474,
|
|
"logps/chosen": -156.4099884033203,
|
|
"logps/ref_chosen": -66.69696807861328,
|
|
"logps/ref_rejected": -84.34634399414062,
|
|
"logps/rejected": -253.65667724609375,
|
|
"loss": 0.9226,
|
|
"margin_dpo/margin_mean": 79.59729766845703,
|
|
"margin_dpo/margin_std": 100.47013854980469,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.36270190895741555,
|
|
"fcm_dpo/beta": 0.009807577356696129,
|
|
"fcm_dpo/delta": -0.2844681143760681,
|
|
"fcm_dpo/margin": 112.23686218261719,
|
|
"fcm_dpo/q_t": 0.29242298007011414,
|
|
"grad_norm": 48.097572326660156,
|
|
"learning_rate": 4.0371303213004814e-07,
|
|
"logits/chosen": -0.33529412746429443,
|
|
"logits/rejected": -0.3306733965873718,
|
|
"logps/chosen": -160.50535583496094,
|
|
"logps/ref_chosen": -56.6053466796875,
|
|
"logps/ref_rejected": -106.29326629638672,
|
|
"logps/rejected": -322.43017578125,
|
|
"loss": 0.7889,
|
|
"margin_dpo/margin_mean": 112.23686218261719,
|
|
"margin_dpo/margin_std": 113.30484008789062,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3641703377386197,
|
|
"fcm_dpo/beta": 0.00968283973634243,
|
|
"fcm_dpo/delta": -0.006413431838154793,
|
|
"fcm_dpo/margin": 88.3683090209961,
|
|
"fcm_dpo/q_t": 0.319698691368103,
|
|
"grad_norm": 42.922454833984375,
|
|
"learning_rate": 4.0269892260067197e-07,
|
|
"logits/chosen": -0.34435319900512695,
|
|
"logits/rejected": -0.3552175462245941,
|
|
"logps/chosen": -127.05135345458984,
|
|
"logps/ref_chosen": -44.043216705322266,
|
|
"logps/ref_rejected": -91.85687255859375,
|
|
"logps/rejected": -263.23333740234375,
|
|
"loss": 0.8132,
|
|
"margin_dpo/margin_mean": 88.36830139160156,
|
|
"margin_dpo/margin_std": 75.46261596679688,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3656387665198238,
|
|
"fcm_dpo/beta": 0.01001214049756527,
|
|
"fcm_dpo/delta": 0.22601774334907532,
|
|
"fcm_dpo/margin": 63.81077194213867,
|
|
"fcm_dpo/q_t": 0.37422841787338257,
|
|
"grad_norm": 45.26923751831055,
|
|
"learning_rate": 4.0168078931267426e-07,
|
|
"logits/chosen": -0.3437703251838684,
|
|
"logits/rejected": -0.31204476952552795,
|
|
"logps/chosen": -162.9739227294922,
|
|
"logps/ref_chosen": -62.442352294921875,
|
|
"logps/ref_rejected": -80.46806335449219,
|
|
"logps/rejected": -244.8104248046875,
|
|
"loss": 1.0405,
|
|
"margin_dpo/margin_mean": 63.81077575683594,
|
|
"margin_dpo/margin_std": 95.67174530029297,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3671071953010279,
|
|
"fcm_dpo/beta": 0.010107604786753654,
|
|
"fcm_dpo/delta": -0.07880331575870514,
|
|
"fcm_dpo/margin": 91.13977813720703,
|
|
"fcm_dpo/q_t": 0.3046584725379944,
|
|
"grad_norm": 43.14041519165039,
|
|
"learning_rate": 4.006586590948141e-07,
|
|
"logits/chosen": -0.3341864347457886,
|
|
"logits/rejected": -0.2635290324687958,
|
|
"logps/chosen": -150.46229553222656,
|
|
"logps/ref_chosen": -65.63668823242188,
|
|
"logps/ref_rejected": -73.87184143066406,
|
|
"logps/rejected": -249.83721923828125,
|
|
"loss": 0.7832,
|
|
"margin_dpo/margin_mean": 91.13977813720703,
|
|
"margin_dpo/margin_std": 72.58334350585938,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.368575624082232,
|
|
"fcm_dpo/beta": 0.010061986744403839,
|
|
"fcm_dpo/delta": 0.08627409487962723,
|
|
"fcm_dpo/margin": 76.52996826171875,
|
|
"fcm_dpo/q_t": 0.3464193344116211,
|
|
"grad_norm": 42.19496154785156,
|
|
"learning_rate": 3.9963255888117325e-07,
|
|
"logits/chosen": -0.34372633695602417,
|
|
"logits/rejected": -0.307092547416687,
|
|
"logps/chosen": -154.13720703125,
|
|
"logps/ref_chosen": -57.182716369628906,
|
|
"logps/ref_rejected": -77.66343688964844,
|
|
"logps/rejected": -251.14788818359375,
|
|
"loss": 0.9374,
|
|
"margin_dpo/margin_mean": 76.52996826171875,
|
|
"margin_dpo/margin_std": 91.52153015136719,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.3700440528634361,
|
|
"fcm_dpo/beta": 0.00997251458466053,
|
|
"fcm_dpo/delta": -0.07944738864898682,
|
|
"fcm_dpo/margin": 92.27542114257812,
|
|
"fcm_dpo/q_t": 0.300639808177948,
|
|
"grad_norm": 44.86310958862305,
|
|
"learning_rate": 3.9860251571044666e-07,
|
|
"logits/chosen": -0.3977884352207184,
|
|
"logits/rejected": -0.3488880395889282,
|
|
"logps/chosen": -165.05645751953125,
|
|
"logps/ref_chosen": -71.68563842773438,
|
|
"logps/ref_rejected": -84.75799560546875,
|
|
"logps/rejected": -270.40423583984375,
|
|
"loss": 0.7912,
|
|
"margin_dpo/margin_mean": 92.27542114257812,
|
|
"margin_dpo/margin_std": 75.70187377929688,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.37151248164464024,
|
|
"fcm_dpo/beta": 0.010129924863576889,
|
|
"fcm_dpo/delta": 0.004893705248832703,
|
|
"fcm_dpo/margin": 83.24456024169922,
|
|
"fcm_dpo/q_t": 0.3302081227302551,
|
|
"grad_norm": 47.6492805480957,
|
|
"learning_rate": 3.9756855672522986e-07,
|
|
"logits/chosen": -0.3927938640117645,
|
|
"logits/rejected": -0.38195687532424927,
|
|
"logps/chosen": -155.09674072265625,
|
|
"logps/ref_chosen": -69.1339340209961,
|
|
"logps/ref_rejected": -98.70252990722656,
|
|
"logps/rejected": -267.909912109375,
|
|
"loss": 0.9012,
|
|
"margin_dpo/margin_mean": 83.24456024169922,
|
|
"margin_dpo/margin_std": 93.68569946289062,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.37298091042584436,
|
|
"fcm_dpo/beta": 0.010117759928107262,
|
|
"fcm_dpo/delta": 0.06923794001340866,
|
|
"fcm_dpo/margin": 77.70970153808594,
|
|
"fcm_dpo/q_t": 0.3566807806491852,
|
|
"grad_norm": 47.6483154296875,
|
|
"learning_rate": 3.965307091713037e-07,
|
|
"logits/chosen": -0.3602520823478699,
|
|
"logits/rejected": -0.33540549874305725,
|
|
"logps/chosen": -154.7190704345703,
|
|
"logps/ref_chosen": -54.154998779296875,
|
|
"logps/ref_rejected": -90.30764770507812,
|
|
"logps/rejected": -268.5814208984375,
|
|
"loss": 0.9903,
|
|
"margin_dpo/margin_mean": 77.70970153808594,
|
|
"margin_dpo/margin_std": 112.6717758178711,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3744493392070485,
|
|
"fcm_dpo/beta": 0.010053331032395363,
|
|
"fcm_dpo/delta": -0.01586991548538208,
|
|
"fcm_dpo/margin": 85.83883666992188,
|
|
"fcm_dpo/q_t": 0.3369311988353729,
|
|
"grad_norm": 55.98381423950195,
|
|
"learning_rate": 3.954890003969163e-07,
|
|
"logits/chosen": -0.2981446087360382,
|
|
"logits/rejected": -0.28794023394584656,
|
|
"logps/chosen": -166.04107666015625,
|
|
"logps/ref_chosen": -57.14167022705078,
|
|
"logps/ref_rejected": -90.2085952758789,
|
|
"logps/rejected": -284.94683837890625,
|
|
"loss": 0.9489,
|
|
"margin_dpo/margin_mean": 85.8388442993164,
|
|
"margin_dpo/margin_std": 111.47303771972656,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.37591776798825255,
|
|
"fcm_dpo/beta": 0.009983371943235397,
|
|
"fcm_dpo/delta": -0.07811053097248077,
|
|
"fcm_dpo/margin": 92.20353698730469,
|
|
"fcm_dpo/q_t": 0.3229755163192749,
|
|
"grad_norm": 58.64165115356445,
|
|
"learning_rate": 3.944434578520628e-07,
|
|
"logits/chosen": -0.29079824686050415,
|
|
"logits/rejected": -0.2855415940284729,
|
|
"logps/chosen": -154.9361572265625,
|
|
"logps/ref_chosen": -55.163490295410156,
|
|
"logps/ref_rejected": -92.56291961669922,
|
|
"logps/rejected": -284.53912353515625,
|
|
"loss": 0.8674,
|
|
"margin_dpo/margin_mean": 92.20353698730469,
|
|
"margin_dpo/margin_std": 104.3232192993164,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.37738619676945667,
|
|
"fcm_dpo/beta": 0.009662920609116554,
|
|
"fcm_dpo/delta": -0.09995823353528976,
|
|
"fcm_dpo/margin": 96.6123275756836,
|
|
"fcm_dpo/q_t": 0.3234117329120636,
|
|
"grad_norm": 42.73604202270508,
|
|
"learning_rate": 3.933941090877615e-07,
|
|
"logits/chosen": -0.29401636123657227,
|
|
"logits/rejected": -0.2769302725791931,
|
|
"logps/chosen": -144.01602172851562,
|
|
"logps/ref_chosen": -49.42369842529297,
|
|
"logps/ref_rejected": -79.53791809082031,
|
|
"logps/rejected": -270.7425537109375,
|
|
"loss": 0.8724,
|
|
"margin_dpo/margin_mean": 96.6123275756836,
|
|
"margin_dpo/margin_std": 106.85414123535156,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3788546255506608,
|
|
"fcm_dpo/beta": 0.00979782734066248,
|
|
"fcm_dpo/delta": -0.00526980496942997,
|
|
"fcm_dpo/margin": 87.2305908203125,
|
|
"fcm_dpo/q_t": 0.32909661531448364,
|
|
"grad_norm": 49.11513900756836,
|
|
"learning_rate": 3.923409817553284e-07,
|
|
"logits/chosen": -0.2832171320915222,
|
|
"logits/rejected": -0.2739581763744354,
|
|
"logps/chosen": -169.74600219726562,
|
|
"logps/ref_chosen": -59.384124755859375,
|
|
"logps/ref_rejected": -95.99010467529297,
|
|
"logps/rejected": -293.58258056640625,
|
|
"loss": 0.9349,
|
|
"margin_dpo/margin_mean": 87.23057556152344,
|
|
"margin_dpo/margin_std": 109.7698974609375,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3803230543318649,
|
|
"fcm_dpo/beta": 0.009825894609093666,
|
|
"fcm_dpo/delta": 0.08960148692131042,
|
|
"fcm_dpo/margin": 78.04417419433594,
|
|
"fcm_dpo/q_t": 0.34981006383895874,
|
|
"grad_norm": 45.765663146972656,
|
|
"learning_rate": 3.9128410360564793e-07,
|
|
"logits/chosen": -0.3537431061267853,
|
|
"logits/rejected": -0.3431965112686157,
|
|
"logps/chosen": -151.65065002441406,
|
|
"logps/ref_chosen": -52.828346252441406,
|
|
"logps/ref_rejected": -89.191650390625,
|
|
"logps/rejected": -266.05816650390625,
|
|
"loss": 0.9498,
|
|
"margin_dpo/margin_mean": 78.04417419433594,
|
|
"margin_dpo/margin_std": 97.46719360351562,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.38179148311306904,
|
|
"fcm_dpo/beta": 0.009930366650223732,
|
|
"fcm_dpo/delta": -0.03357648104429245,
|
|
"fcm_dpo/margin": 88.66500091552734,
|
|
"fcm_dpo/q_t": 0.3270108103752136,
|
|
"grad_norm": 43.459815979003906,
|
|
"learning_rate": 3.9022350248844246e-07,
|
|
"logits/chosen": -0.3347843885421753,
|
|
"logits/rejected": -0.35157203674316406,
|
|
"logps/chosen": -141.62643432617188,
|
|
"logps/ref_chosen": -47.41767501831055,
|
|
"logps/ref_rejected": -95.08978271484375,
|
|
"logps/rejected": -277.96356201171875,
|
|
"loss": 0.8549,
|
|
"margin_dpo/margin_mean": 88.66499328613281,
|
|
"margin_dpo/margin_std": 95.96063995361328,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3832599118942731,
|
|
"fcm_dpo/beta": 0.009731737896800041,
|
|
"fcm_dpo/delta": -0.07620503008365631,
|
|
"fcm_dpo/margin": 94.40481567382812,
|
|
"fcm_dpo/q_t": 0.3174518346786499,
|
|
"grad_norm": 46.79330825805664,
|
|
"learning_rate": 3.891592063515376e-07,
|
|
"logits/chosen": -0.31741079688072205,
|
|
"logits/rejected": -0.30679404735565186,
|
|
"logps/chosen": -132.945068359375,
|
|
"logps/ref_chosen": -53.03137969970703,
|
|
"logps/ref_rejected": -88.51494598388672,
|
|
"logps/rejected": -262.8334655761719,
|
|
"loss": 0.8336,
|
|
"margin_dpo/margin_mean": 94.40481567382812,
|
|
"margin_dpo/margin_std": 97.66380310058594,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.38472834067547723,
|
|
"fcm_dpo/beta": 0.009914107620716095,
|
|
"fcm_dpo/delta": 0.10215713828802109,
|
|
"fcm_dpo/margin": 76.14309692382812,
|
|
"fcm_dpo/q_t": 0.33958059549331665,
|
|
"grad_norm": 38.953277587890625,
|
|
"learning_rate": 3.880912432401264e-07,
|
|
"logits/chosen": -0.28771814703941345,
|
|
"logits/rejected": -0.2547649145126343,
|
|
"logps/chosen": -152.20550537109375,
|
|
"logps/ref_chosen": -59.620140075683594,
|
|
"logps/ref_rejected": -86.41853332519531,
|
|
"logps/rejected": -255.14697265625,
|
|
"loss": 0.8977,
|
|
"margin_dpo/margin_mean": 76.14309692382812,
|
|
"margin_dpo/margin_std": 78.52725219726562,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.38619676945668135,
|
|
"fcm_dpo/beta": 0.009619778022170067,
|
|
"fcm_dpo/delta": -0.17337682843208313,
|
|
"fcm_dpo/margin": 104.40455627441406,
|
|
"fcm_dpo/q_t": 0.2986921966075897,
|
|
"grad_norm": 40.7275390625,
|
|
"learning_rate": 3.870196412960302e-07,
|
|
"logits/chosen": -0.38451990485191345,
|
|
"logits/rejected": -0.35206347703933716,
|
|
"logps/chosen": -142.514892578125,
|
|
"logps/ref_chosen": -59.42094421386719,
|
|
"logps/ref_rejected": -96.85720825195312,
|
|
"logps/rejected": -284.355712890625,
|
|
"loss": 0.7864,
|
|
"margin_dpo/margin_mean": 104.40455627441406,
|
|
"margin_dpo/margin_std": 96.11820983886719,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.3876651982378855,
|
|
"fcm_dpo/beta": 0.009586556814610958,
|
|
"fcm_dpo/delta": 0.04767933860421181,
|
|
"fcm_dpo/margin": 84.01654052734375,
|
|
"fcm_dpo/q_t": 0.33650076389312744,
|
|
"grad_norm": 62.09348678588867,
|
|
"learning_rate": 3.8594442875695665e-07,
|
|
"logits/chosen": -0.3532578945159912,
|
|
"logits/rejected": -0.337126761674881,
|
|
"logps/chosen": -148.93775939941406,
|
|
"logps/ref_chosen": -62.722084045410156,
|
|
"logps/ref_rejected": -93.85620880126953,
|
|
"logps/rejected": -264.0884094238281,
|
|
"loss": 0.8848,
|
|
"margin_dpo/margin_mean": 84.01654052734375,
|
|
"margin_dpo/margin_std": 89.25030517578125,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.3891336270190896,
|
|
"fcm_dpo/beta": 0.009736664593219757,
|
|
"fcm_dpo/delta": 0.019567368552088737,
|
|
"fcm_dpo/margin": 85.43596649169922,
|
|
"fcm_dpo/q_t": 0.34142905473709106,
|
|
"grad_norm": 50.95734786987305,
|
|
"learning_rate": 3.848656339557562e-07,
|
|
"logits/chosen": -0.27792888879776,
|
|
"logits/rejected": -0.2561708092689514,
|
|
"logps/chosen": -161.2020721435547,
|
|
"logps/ref_chosen": -61.971466064453125,
|
|
"logps/ref_rejected": -88.02059936523438,
|
|
"logps/rejected": -272.6871643066406,
|
|
"loss": 0.9227,
|
|
"margin_dpo/margin_mean": 85.43595886230469,
|
|
"margin_dpo/margin_std": 107.73658752441406,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.39060205580029367,
|
|
"fcm_dpo/beta": 0.009648610837757587,
|
|
"fcm_dpo/delta": -0.01776065304875374,
|
|
"fcm_dpo/margin": 89.72529602050781,
|
|
"fcm_dpo/q_t": 0.3276539444923401,
|
|
"grad_norm": 39.50182342529297,
|
|
"learning_rate": 3.8378328531967507e-07,
|
|
"logits/chosen": -0.33635252714157104,
|
|
"logits/rejected": -0.27758198976516724,
|
|
"logps/chosen": -164.45074462890625,
|
|
"logps/ref_chosen": -67.09967041015625,
|
|
"logps/ref_rejected": -67.97122192382812,
|
|
"logps/rejected": -255.047607421875,
|
|
"loss": 0.8672,
|
|
"margin_dpo/margin_mean": 89.72528839111328,
|
|
"margin_dpo/margin_std": 96.26741027832031,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.3920704845814978,
|
|
"fcm_dpo/beta": 0.009558433666825294,
|
|
"fcm_dpo/delta": -0.05971603840589523,
|
|
"fcm_dpo/margin": 94.54733276367188,
|
|
"fcm_dpo/q_t": 0.31543660163879395,
|
|
"grad_norm": 42.246028900146484,
|
|
"learning_rate": 3.8269741136960646e-07,
|
|
"logits/chosen": -0.28289055824279785,
|
|
"logits/rejected": -0.2381281852722168,
|
|
"logps/chosen": -171.30865478515625,
|
|
"logps/ref_chosen": -68.97075653076172,
|
|
"logps/ref_rejected": -90.16844940185547,
|
|
"logps/rejected": -287.0536804199219,
|
|
"loss": 0.8204,
|
|
"margin_dpo/margin_mean": 94.54733276367188,
|
|
"margin_dpo/margin_std": 91.51085662841797,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.3935389133627019,
|
|
"fcm_dpo/beta": 0.00957027729600668,
|
|
"fcm_dpo/delta": 0.02075892500579357,
|
|
"fcm_dpo/margin": 86.81520080566406,
|
|
"fcm_dpo/q_t": 0.33144694566726685,
|
|
"grad_norm": 69.94318389892578,
|
|
"learning_rate": 3.8160804071933894e-07,
|
|
"logits/chosen": -0.23065903782844543,
|
|
"logits/rejected": -0.23640581965446472,
|
|
"logps/chosen": -172.29135131835938,
|
|
"logps/ref_chosen": -55.90031051635742,
|
|
"logps/ref_rejected": -101.64763641357422,
|
|
"logps/rejected": -304.8538818359375,
|
|
"loss": 0.88,
|
|
"margin_dpo/margin_mean": 86.81520080566406,
|
|
"margin_dpo/margin_std": 94.50149536132812,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.39500734214390604,
|
|
"fcm_dpo/beta": 0.009249732829630375,
|
|
"fcm_dpo/delta": -0.2376570701599121,
|
|
"fcm_dpo/margin": 114.76429748535156,
|
|
"fcm_dpo/q_t": 0.29645225405693054,
|
|
"grad_norm": 83.19200134277344,
|
|
"learning_rate": 3.8051520207480204e-07,
|
|
"logits/chosen": -0.2872861623764038,
|
|
"logits/rejected": -0.2640247941017151,
|
|
"logps/chosen": -198.46896362304688,
|
|
"logps/ref_chosen": -70.03955841064453,
|
|
"logps/ref_rejected": -107.34937286376953,
|
|
"logps/rejected": -350.5430908203125,
|
|
"loss": 0.7972,
|
|
"margin_dpo/margin_mean": 114.7643051147461,
|
|
"margin_dpo/margin_std": 115.25680541992188,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.3964757709251101,
|
|
"fcm_dpo/beta": 0.009293366223573685,
|
|
"fcm_dpo/delta": 0.07112763822078705,
|
|
"fcm_dpo/margin": 84.35902404785156,
|
|
"fcm_dpo/q_t": 0.34950098395347595,
|
|
"grad_norm": 53.65988540649414,
|
|
"learning_rate": 3.794189242333106e-07,
|
|
"logits/chosen": -0.28146451711654663,
|
|
"logits/rejected": -0.2732328176498413,
|
|
"logps/chosen": -189.49839782714844,
|
|
"logps/ref_chosen": -69.53347778320312,
|
|
"logps/ref_rejected": -109.92864990234375,
|
|
"logps/rejected": -314.25262451171875,
|
|
"loss": 0.948,
|
|
"margin_dpo/margin_mean": 84.35902404785156,
|
|
"margin_dpo/margin_std": 109.4448471069336,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.39794419970631423,
|
|
"fcm_dpo/beta": 0.009072428569197655,
|
|
"fcm_dpo/delta": -0.13254715502262115,
|
|
"fcm_dpo/margin": 106.77680969238281,
|
|
"fcm_dpo/q_t": 0.31320929527282715,
|
|
"grad_norm": 39.882137298583984,
|
|
"learning_rate": 3.7831923608280514e-07,
|
|
"logits/chosen": -0.21552950143814087,
|
|
"logits/rejected": -0.18930025398731232,
|
|
"logps/chosen": -177.8402099609375,
|
|
"logps/ref_chosen": -56.76456832885742,
|
|
"logps/ref_rejected": -92.51383972167969,
|
|
"logps/rejected": -320.36627197265625,
|
|
"loss": 0.8185,
|
|
"margin_dpo/margin_mean": 106.77680969238281,
|
|
"margin_dpo/margin_std": 110.38920593261719,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.39941262848751835,
|
|
"fcm_dpo/beta": 0.008728357963263988,
|
|
"fcm_dpo/delta": -0.29591071605682373,
|
|
"fcm_dpo/margin": 127.61502075195312,
|
|
"fcm_dpo/q_t": 0.28334927558898926,
|
|
"grad_norm": 33.21427917480469,
|
|
"learning_rate": 3.772161666010912e-07,
|
|
"logits/chosen": -0.18609288334846497,
|
|
"logits/rejected": -0.19840866327285767,
|
|
"logps/chosen": -163.57485961914062,
|
|
"logps/ref_chosen": -49.497154235839844,
|
|
"logps/ref_rejected": -105.54279327392578,
|
|
"logps/rejected": -347.23553466796875,
|
|
"loss": 0.755,
|
|
"margin_dpo/margin_mean": 127.61502075195312,
|
|
"margin_dpo/margin_std": 117.34019470214844,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4008810572687225,
|
|
"fcm_dpo/beta": 0.008227063342928886,
|
|
"fcm_dpo/delta": -0.1768997311592102,
|
|
"fcm_dpo/margin": 122.21438598632812,
|
|
"fcm_dpo/q_t": 0.30270469188690186,
|
|
"grad_norm": 33.36330032348633,
|
|
"learning_rate": 3.761097448550755e-07,
|
|
"logits/chosen": -0.22537121176719666,
|
|
"logits/rejected": -0.2015794813632965,
|
|
"logps/chosen": -183.01629638671875,
|
|
"logps/ref_chosen": -62.97539520263672,
|
|
"logps/ref_rejected": -92.49858093261719,
|
|
"logps/rejected": -334.75384521484375,
|
|
"loss": 0.7921,
|
|
"margin_dpo/margin_mean": 122.21438598632812,
|
|
"margin_dpo/margin_std": 115.96615600585938,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.4023494860499266,
|
|
"fcm_dpo/beta": 0.008325198665261269,
|
|
"fcm_dpo/delta": 0.05208263173699379,
|
|
"fcm_dpo/margin": 96.30369567871094,
|
|
"fcm_dpo/q_t": 0.3377155065536499,
|
|
"grad_norm": 50.85749435424805,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -0.19594232738018036,
|
|
"logits/rejected": -0.17438998818397522,
|
|
"logps/chosen": -197.13754272460938,
|
|
"logps/ref_chosen": -55.66770935058594,
|
|
"logps/ref_rejected": -77.33308410644531,
|
|
"logps/rejected": -315.10662841796875,
|
|
"loss": 0.9015,
|
|
"margin_dpo/margin_mean": 96.30369567871094,
|
|
"margin_dpo/margin_std": 107.05192565917969,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.40381791483113066,
|
|
"fcm_dpo/beta": 0.008365076035261154,
|
|
"fcm_dpo/delta": -0.002092186361551285,
|
|
"fcm_dpo/margin": 101.77294158935547,
|
|
"fcm_dpo/q_t": 0.3336763381958008,
|
|
"grad_norm": 45.55134201049805,
|
|
"learning_rate": 3.738869612786737e-07,
|
|
"logits/chosen": -0.22732514142990112,
|
|
"logits/rejected": -0.2275867611169815,
|
|
"logps/chosen": -155.5432891845703,
|
|
"logps/ref_chosen": -48.594703674316406,
|
|
"logps/ref_rejected": -93.30369567871094,
|
|
"logps/rejected": -302.02520751953125,
|
|
"loss": 0.9015,
|
|
"margin_dpo/margin_mean": 101.77294158935547,
|
|
"margin_dpo/margin_std": 118.06379699707031,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.4052863436123348,
|
|
"fcm_dpo/beta": 0.008263555355370045,
|
|
"fcm_dpo/delta": -0.0206641536206007,
|
|
"fcm_dpo/margin": 105.10835266113281,
|
|
"fcm_dpo/q_t": 0.33232614398002625,
|
|
"grad_norm": 45.553375244140625,
|
|
"learning_rate": 3.7277065802070204e-07,
|
|
"logits/chosen": -0.2473652958869934,
|
|
"logits/rejected": -0.21824447810649872,
|
|
"logps/chosen": -171.64883422851562,
|
|
"logps/ref_chosen": -56.57740783691406,
|
|
"logps/ref_rejected": -70.36566925048828,
|
|
"logps/rejected": -290.5454406738281,
|
|
"loss": 0.8898,
|
|
"margin_dpo/margin_mean": 105.10834503173828,
|
|
"margin_dpo/margin_std": 122.83244323730469,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.4067547723935389,
|
|
"fcm_dpo/beta": 0.0082924272865057,
|
|
"fcm_dpo/delta": -0.02089064195752144,
|
|
"fcm_dpo/margin": 104.77349090576172,
|
|
"fcm_dpo/q_t": 0.32611536979675293,
|
|
"grad_norm": 36.01552963256836,
|
|
"learning_rate": 3.71651119641714e-07,
|
|
"logits/chosen": -0.20939847826957703,
|
|
"logits/rejected": -0.19383195042610168,
|
|
"logps/chosen": -180.56362915039062,
|
|
"logps/ref_chosen": -56.27156066894531,
|
|
"logps/ref_rejected": -92.88127136230469,
|
|
"logps/rejected": -321.94683837890625,
|
|
"loss": 0.8521,
|
|
"margin_dpo/margin_mean": 104.77348327636719,
|
|
"margin_dpo/margin_std": 107.48456573486328,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.40822320117474303,
|
|
"fcm_dpo/beta": 0.008100062608718872,
|
|
"fcm_dpo/delta": -0.09823326766490936,
|
|
"fcm_dpo/margin": 115.85311126708984,
|
|
"fcm_dpo/q_t": 0.3185454308986664,
|
|
"grad_norm": 34.42502212524414,
|
|
"learning_rate": 3.705283756425872e-07,
|
|
"logits/chosen": -0.20616397261619568,
|
|
"logits/rejected": -0.20798000693321228,
|
|
"logps/chosen": -170.531982421875,
|
|
"logps/ref_chosen": -52.94194030761719,
|
|
"logps/ref_rejected": -91.25357818603516,
|
|
"logps/rejected": -324.69671630859375,
|
|
"loss": 0.8401,
|
|
"margin_dpo/margin_mean": 115.85311126708984,
|
|
"margin_dpo/margin_std": 122.75349426269531,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.40969162995594716,
|
|
"fcm_dpo/beta": 0.007804838474839926,
|
|
"fcm_dpo/delta": -0.16851356625556946,
|
|
"fcm_dpo/margin": 127.84455871582031,
|
|
"fcm_dpo/q_t": 0.31053388118743896,
|
|
"grad_norm": 41.10043716430664,
|
|
"learning_rate": 3.6940245560867e-07,
|
|
"logits/chosen": -0.04199061542749405,
|
|
"logits/rejected": -0.032916110008955,
|
|
"logps/chosen": -192.05714416503906,
|
|
"logps/ref_chosen": -48.641319274902344,
|
|
"logps/ref_rejected": -87.8514404296875,
|
|
"logps/rejected": -359.11181640625,
|
|
"loss": 0.8453,
|
|
"margin_dpo/margin_mean": 127.84455871582031,
|
|
"margin_dpo/margin_std": 138.51828002929688,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.4111600587371512,
|
|
"fcm_dpo/beta": 0.0076360213570296764,
|
|
"fcm_dpo/delta": -0.13522008061408997,
|
|
"fcm_dpo/margin": 127.201171875,
|
|
"fcm_dpo/q_t": 0.2989714741706848,
|
|
"grad_norm": 48.5097541809082,
|
|
"learning_rate": 3.6827338920900253e-07,
|
|
"logits/chosen": -0.10075664520263672,
|
|
"logits/rejected": -0.09791351854801178,
|
|
"logps/chosen": -199.2039031982422,
|
|
"logps/ref_chosen": -58.797122955322266,
|
|
"logps/ref_rejected": -98.61885070800781,
|
|
"logps/rejected": -366.226806640625,
|
|
"loss": 0.7771,
|
|
"margin_dpo/margin_mean": 127.201171875,
|
|
"margin_dpo/margin_std": 109.58746337890625,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.41262848751835535,
|
|
"fcm_dpo/beta": 0.007574355695396662,
|
|
"fcm_dpo/delta": -0.06523635238409042,
|
|
"fcm_dpo/margin": 120.03223419189453,
|
|
"fcm_dpo/q_t": 0.3079676628112793,
|
|
"grad_norm": 41.980621337890625,
|
|
"learning_rate": 3.6714120619553435e-07,
|
|
"logits/chosen": -0.1654478907585144,
|
|
"logits/rejected": -0.13742399215698242,
|
|
"logps/chosen": -192.7757568359375,
|
|
"logps/ref_chosen": -55.488521575927734,
|
|
"logps/ref_rejected": -80.88258361816406,
|
|
"logps/rejected": -338.2020568847656,
|
|
"loss": 0.8205,
|
|
"margin_dpo/margin_mean": 120.03223419189453,
|
|
"margin_dpo/margin_std": 112.45529174804688,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.41409691629955947,
|
|
"fcm_dpo/beta": 0.007696120534092188,
|
|
"fcm_dpo/delta": 0.11213569343090057,
|
|
"fcm_dpo/margin": 96.7197036743164,
|
|
"fcm_dpo/q_t": 0.3515746593475342,
|
|
"grad_norm": 70.64493560791016,
|
|
"learning_rate": 3.660059364023408e-07,
|
|
"logits/chosen": -0.18230430781841278,
|
|
"logits/rejected": -0.14879056811332703,
|
|
"logps/chosen": -227.57904052734375,
|
|
"logps/ref_chosen": -73.07014465332031,
|
|
"logps/ref_rejected": -95.35098266601562,
|
|
"logps/rejected": -346.57958984375,
|
|
"loss": 0.9293,
|
|
"margin_dpo/margin_mean": 96.71971130371094,
|
|
"margin_dpo/margin_std": 116.51522827148438,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.4155653450807636,
|
|
"fcm_dpo/beta": 0.007496010046452284,
|
|
"fcm_dpo/delta": -0.20360887050628662,
|
|
"fcm_dpo/margin": 137.80856323242188,
|
|
"fcm_dpo/q_t": 0.294839084148407,
|
|
"grad_norm": 55.21201705932617,
|
|
"learning_rate": 3.6486760974483685e-07,
|
|
"logits/chosen": -0.0822724774479866,
|
|
"logits/rejected": -0.07609227299690247,
|
|
"logps/chosen": -223.06536865234375,
|
|
"logps/ref_chosen": -61.89844512939453,
|
|
"logps/ref_rejected": -96.98655700683594,
|
|
"logps/rejected": -395.9620361328125,
|
|
"loss": 0.784,
|
|
"margin_dpo/margin_mean": 137.80856323242188,
|
|
"margin_dpo/margin_std": 129.84677124023438,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4170337738619677,
|
|
"fcm_dpo/beta": 0.007216787431389093,
|
|
"fcm_dpo/delta": -0.14114126563072205,
|
|
"fcm_dpo/margin": 135.40316772460938,
|
|
"fcm_dpo/q_t": 0.3051418662071228,
|
|
"grad_norm": 40.07844924926758,
|
|
"learning_rate": 3.6372625621898863e-07,
|
|
"logits/chosen": -0.1315004974603653,
|
|
"logits/rejected": -0.11729149520397186,
|
|
"logps/chosen": -218.79432678222656,
|
|
"logps/ref_chosen": -58.4355354309082,
|
|
"logps/ref_rejected": -93.46926879882812,
|
|
"logps/rejected": -389.2312316894531,
|
|
"loss": 0.784,
|
|
"margin_dpo/margin_mean": 135.40316772460938,
|
|
"margin_dpo/margin_std": 128.7941131591797,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4185022026431718,
|
|
"fcm_dpo/beta": 0.007242846768349409,
|
|
"fcm_dpo/delta": 0.008408665657043457,
|
|
"fcm_dpo/margin": 116.0887680053711,
|
|
"fcm_dpo/q_t": 0.323802649974823,
|
|
"grad_norm": 40.012691497802734,
|
|
"learning_rate": 3.625819059005228e-07,
|
|
"logits/chosen": -0.14234209060668945,
|
|
"logits/rejected": -0.12387385219335556,
|
|
"logps/chosen": -239.96295166015625,
|
|
"logps/ref_chosen": -66.23219299316406,
|
|
"logps/ref_rejected": -99.1268310546875,
|
|
"logps/rejected": -388.94635009765625,
|
|
"loss": 0.8585,
|
|
"margin_dpo/margin_mean": 116.08877563476562,
|
|
"margin_dpo/margin_std": 113.36100769042969,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4199706314243759,
|
|
"fcm_dpo/beta": 0.007111635059118271,
|
|
"fcm_dpo/delta": -0.06945645064115524,
|
|
"fcm_dpo/margin": 128.3986053466797,
|
|
"fcm_dpo/q_t": 0.3180208206176758,
|
|
"grad_norm": 50.79453659057617,
|
|
"learning_rate": 3.614345889441346e-07,
|
|
"logits/chosen": -0.20035997033119202,
|
|
"logits/rejected": -0.1742473840713501,
|
|
"logps/chosen": -238.18199157714844,
|
|
"logps/ref_chosen": -72.95100402832031,
|
|
"logps/ref_rejected": -88.58845520019531,
|
|
"logps/rejected": -382.218017578125,
|
|
"loss": 0.8479,
|
|
"margin_dpo/margin_mean": 128.3986053466797,
|
|
"margin_dpo/margin_std": 134.0906982421875,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.42143906020558003,
|
|
"fcm_dpo/beta": 0.007145530544221401,
|
|
"fcm_dpo/delta": 0.0370517298579216,
|
|
"fcm_dpo/margin": 114.15289306640625,
|
|
"fcm_dpo/q_t": 0.3385983407497406,
|
|
"grad_norm": 34.708343505859375,
|
|
"learning_rate": 3.6028433558269275e-07,
|
|
"logits/chosen": -0.11621429771184921,
|
|
"logits/rejected": -0.09012611210346222,
|
|
"logps/chosen": -222.60751342773438,
|
|
"logps/ref_chosen": -61.54115295410156,
|
|
"logps/ref_rejected": -77.69607543945312,
|
|
"logps/rejected": -352.91534423828125,
|
|
"loss": 0.9046,
|
|
"margin_dpo/margin_mean": 114.15290069580078,
|
|
"margin_dpo/margin_std": 134.21795654296875,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.42290748898678415,
|
|
"fcm_dpo/beta": 0.00695967860519886,
|
|
"fcm_dpo/delta": -0.07556534558534622,
|
|
"fcm_dpo/margin": 131.31358337402344,
|
|
"fcm_dpo/q_t": 0.3097105920314789,
|
|
"grad_norm": 35.59653091430664,
|
|
"learning_rate": 3.5913117612644327e-07,
|
|
"logits/chosen": -0.08861184120178223,
|
|
"logits/rejected": -0.06481438130140305,
|
|
"logps/chosen": -225.13528442382812,
|
|
"logps/ref_chosen": -56.661224365234375,
|
|
"logps/ref_rejected": -87.33570098876953,
|
|
"logps/rejected": -387.1233215332031,
|
|
"loss": 0.8115,
|
|
"margin_dpo/margin_mean": 131.31358337402344,
|
|
"margin_dpo/margin_std": 114.73504638671875,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.4243759177679883,
|
|
"fcm_dpo/beta": 0.0068605877459049225,
|
|
"fcm_dpo/delta": -0.19726723432540894,
|
|
"fcm_dpo/margin": 149.75587463378906,
|
|
"fcm_dpo/q_t": 0.3031596541404724,
|
|
"grad_norm": 43.76935958862305,
|
|
"learning_rate": 3.5797514096221024e-07,
|
|
"logits/chosen": -0.011087626218795776,
|
|
"logits/rejected": -0.0034407819621264935,
|
|
"logps/chosen": -216.83993530273438,
|
|
"logps/ref_chosen": -45.23039245605469,
|
|
"logps/ref_rejected": -87.64266967773438,
|
|
"logps/rejected": -409.008056640625,
|
|
"loss": 0.7988,
|
|
"margin_dpo/margin_mean": 149.75587463378906,
|
|
"margin_dpo/margin_std": 149.0826873779297,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.42584434654919234,
|
|
"fcm_dpo/beta": 0.006617412902414799,
|
|
"fcm_dpo/delta": -0.13556578755378723,
|
|
"fcm_dpo/margin": 146.91989135742188,
|
|
"fcm_dpo/q_t": 0.31431177258491516,
|
|
"grad_norm": 37.676513671875,
|
|
"learning_rate": 3.568162605525952e-07,
|
|
"logits/chosen": -0.017872124910354614,
|
|
"logits/rejected": -0.041775476187467575,
|
|
"logps/chosen": -242.56007385253906,
|
|
"logps/ref_chosen": -55.47149658203125,
|
|
"logps/ref_rejected": -116.70857238769531,
|
|
"logps/rejected": -450.717041015625,
|
|
"loss": 0.8339,
|
|
"margin_dpo/margin_mean": 146.91989135742188,
|
|
"margin_dpo/margin_std": 160.46890258789062,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.42731277533039647,
|
|
"fcm_dpo/beta": 0.006590387783944607,
|
|
"fcm_dpo/delta": -0.04984688758850098,
|
|
"fcm_dpo/margin": 135.67874145507812,
|
|
"fcm_dpo/q_t": 0.3153119683265686,
|
|
"grad_norm": 41.912757873535156,
|
|
"learning_rate": 3.5565456543517485e-07,
|
|
"logits/chosen": -0.08200214803218842,
|
|
"logits/rejected": -0.06535626202821732,
|
|
"logps/chosen": -219.4686279296875,
|
|
"logps/ref_chosen": -63.26036834716797,
|
|
"logps/ref_rejected": -89.29708862304688,
|
|
"logps/rejected": -381.1841125488281,
|
|
"loss": 0.8297,
|
|
"margin_dpo/margin_mean": 135.67874145507812,
|
|
"margin_dpo/margin_std": 126.33541870117188,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4287812041116006,
|
|
"fcm_dpo/beta": 0.006525726057589054,
|
|
"fcm_dpo/delta": -0.022788774222135544,
|
|
"fcm_dpo/margin": 133.373291015625,
|
|
"fcm_dpo/q_t": 0.3237302601337433,
|
|
"grad_norm": 60.781497955322266,
|
|
"learning_rate": 3.5449008622169583e-07,
|
|
"logits/chosen": -0.07108621299266815,
|
|
"logits/rejected": -0.05422316491603851,
|
|
"logps/chosen": -232.05926513671875,
|
|
"logps/ref_chosen": -53.91852951049805,
|
|
"logps/ref_rejected": -89.96138000488281,
|
|
"logps/rejected": -401.47540283203125,
|
|
"loss": 0.9028,
|
|
"margin_dpo/margin_mean": 133.37327575683594,
|
|
"margin_dpo/margin_std": 155.2228240966797,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4302496328928047,
|
|
"fcm_dpo/beta": 0.00664651021361351,
|
|
"fcm_dpo/delta": 0.1351955235004425,
|
|
"fcm_dpo/margin": 108.85354614257812,
|
|
"fcm_dpo/q_t": 0.3587031066417694,
|
|
"grad_norm": 39.997802734375,
|
|
"learning_rate": 3.5332285359726846e-07,
|
|
"logits/chosen": -0.08526704460382462,
|
|
"logits/rejected": -0.07327552884817123,
|
|
"logps/chosen": -225.83401489257812,
|
|
"logps/ref_chosen": -60.376033782958984,
|
|
"logps/ref_rejected": -77.85244750976562,
|
|
"logps/rejected": -352.1639709472656,
|
|
"loss": 0.9827,
|
|
"margin_dpo/margin_mean": 108.85354614257812,
|
|
"margin_dpo/margin_std": 145.9722442626953,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.43171806167400884,
|
|
"fcm_dpo/beta": 0.006676151417195797,
|
|
"fcm_dpo/delta": 0.04422283172607422,
|
|
"fcm_dpo/margin": 121.20598602294922,
|
|
"fcm_dpo/q_t": 0.3434596061706543,
|
|
"grad_norm": 39.22120666503906,
|
|
"learning_rate": 3.5215289831955786e-07,
|
|
"logits/chosen": -0.07479946315288544,
|
|
"logits/rejected": -0.07693535089492798,
|
|
"logps/chosen": -197.52316284179688,
|
|
"logps/ref_chosen": -48.0875358581543,
|
|
"logps/ref_rejected": -81.89698791503906,
|
|
"logps/rejected": -352.53857421875,
|
|
"loss": 0.9206,
|
|
"margin_dpo/margin_mean": 121.20599365234375,
|
|
"margin_dpo/margin_std": 148.40878295898438,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4331864904552129,
|
|
"fcm_dpo/beta": 0.0067435759119689465,
|
|
"fcm_dpo/delta": 0.031169120222330093,
|
|
"fcm_dpo/margin": 121.80484008789062,
|
|
"fcm_dpo/q_t": 0.3364499807357788,
|
|
"grad_norm": 40.891448974609375,
|
|
"learning_rate": 3.509802512179737e-07,
|
|
"logits/chosen": -0.11838773638010025,
|
|
"logits/rejected": -0.11896628886461258,
|
|
"logps/chosen": -223.58511352539062,
|
|
"logps/ref_chosen": -49.92467498779297,
|
|
"logps/ref_rejected": -87.45632934570312,
|
|
"logps/rejected": -382.9216003417969,
|
|
"loss": 0.8978,
|
|
"margin_dpo/margin_mean": 121.80484008789062,
|
|
"margin_dpo/margin_std": 137.68096923828125,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.434654919236417,
|
|
"fcm_dpo/beta": 0.006936186458915472,
|
|
"fcm_dpo/delta": 0.2200748324394226,
|
|
"fcm_dpo/margin": 92.93003845214844,
|
|
"fcm_dpo/q_t": 0.3718491196632385,
|
|
"grad_norm": 62.75762939453125,
|
|
"learning_rate": 3.498049431928577e-07,
|
|
"logits/chosen": -0.12827003002166748,
|
|
"logits/rejected": -0.10901685059070587,
|
|
"logps/chosen": -262.9681396484375,
|
|
"logps/ref_chosen": -65.49124145507812,
|
|
"logps/ref_rejected": -93.08908081054688,
|
|
"logps/rejected": -383.49603271484375,
|
|
"loss": 1.0533,
|
|
"margin_dpo/margin_mean": 92.93003845214844,
|
|
"margin_dpo/margin_std": 143.3027801513672,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.43612334801762115,
|
|
"fcm_dpo/beta": 0.007118170149624348,
|
|
"fcm_dpo/delta": 0.05691620334982872,
|
|
"fcm_dpo/margin": 112.05933380126953,
|
|
"fcm_dpo/q_t": 0.33294984698295593,
|
|
"grad_norm": 44.155250549316406,
|
|
"learning_rate": 3.486270052146694e-07,
|
|
"logits/chosen": -0.14343738555908203,
|
|
"logits/rejected": -0.14404615759849548,
|
|
"logps/chosen": -233.68067932128906,
|
|
"logps/ref_chosen": -56.476951599121094,
|
|
"logps/ref_rejected": -95.1385498046875,
|
|
"logps/rejected": -384.401611328125,
|
|
"loss": 0.8644,
|
|
"margin_dpo/margin_mean": 112.05933380126953,
|
|
"margin_dpo/margin_std": 108.87353515625,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.43759177679882527,
|
|
"fcm_dpo/beta": 0.007042343728244305,
|
|
"fcm_dpo/delta": -0.10975862294435501,
|
|
"fcm_dpo/margin": 134.819091796875,
|
|
"fcm_dpo/q_t": 0.32128387689590454,
|
|
"grad_norm": 55.54197692871094,
|
|
"learning_rate": 3.474464683231698e-07,
|
|
"logits/chosen": -0.12262261658906937,
|
|
"logits/rejected": -0.14369015395641327,
|
|
"logps/chosen": -240.33999633789062,
|
|
"logps/ref_chosen": -67.32516479492188,
|
|
"logps/ref_rejected": -116.66217041015625,
|
|
"logps/rejected": -424.49609375,
|
|
"loss": 0.8518,
|
|
"margin_dpo/margin_mean": 134.819091796875,
|
|
"margin_dpo/margin_std": 153.47982788085938,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4390602055800294,
|
|
"fcm_dpo/beta": 0.007004514802247286,
|
|
"fcm_dpo/delta": -0.062288179993629456,
|
|
"fcm_dpo/margin": 129.19944763183594,
|
|
"fcm_dpo/q_t": 0.32569801807403564,
|
|
"grad_norm": 44.929222106933594,
|
|
"learning_rate": 3.462633636266041e-07,
|
|
"logits/chosen": -0.1638999879360199,
|
|
"logits/rejected": -0.15911725163459778,
|
|
"logps/chosen": -198.703369140625,
|
|
"logps/ref_chosen": -48.96209716796875,
|
|
"logps/ref_rejected": -84.32823944091797,
|
|
"logps/rejected": -363.26898193359375,
|
|
"loss": 0.8819,
|
|
"margin_dpo/margin_mean": 129.19944763183594,
|
|
"margin_dpo/margin_std": 146.23391723632812,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.44052863436123346,
|
|
"fcm_dpo/beta": 0.006798220798373222,
|
|
"fcm_dpo/delta": -0.12738555669784546,
|
|
"fcm_dpo/margin": 141.98765563964844,
|
|
"fcm_dpo/q_t": 0.31477490067481995,
|
|
"grad_norm": 67.45763397216797,
|
|
"learning_rate": 3.4507772230088147e-07,
|
|
"logits/chosen": -0.1142905056476593,
|
|
"logits/rejected": -0.10768699645996094,
|
|
"logps/chosen": -250.97042846679688,
|
|
"logps/ref_chosen": -59.07371139526367,
|
|
"logps/ref_rejected": -95.9664535522461,
|
|
"logps/rejected": -429.85076904296875,
|
|
"loss": 0.8901,
|
|
"margin_dpo/margin_mean": 141.98765563964844,
|
|
"margin_dpo/margin_std": 166.83888244628906,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.4419970631424376,
|
|
"fcm_dpo/beta": 0.006596191320568323,
|
|
"fcm_dpo/delta": -0.09547975659370422,
|
|
"fcm_dpo/margin": 141.79217529296875,
|
|
"fcm_dpo/q_t": 0.31292590498924255,
|
|
"grad_norm": 43.63452911376953,
|
|
"learning_rate": 3.4388957558875316e-07,
|
|
"logits/chosen": -0.1395409107208252,
|
|
"logits/rejected": -0.1367347240447998,
|
|
"logps/chosen": -224.599853515625,
|
|
"logps/ref_chosen": -57.249366760253906,
|
|
"logps/ref_rejected": -92.35354614257812,
|
|
"logps/rejected": -401.4962158203125,
|
|
"loss": 0.8159,
|
|
"margin_dpo/margin_mean": 141.79217529296875,
|
|
"margin_dpo/margin_std": 137.7713165283203,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4434654919236417,
|
|
"fcm_dpo/beta": 0.00664362870156765,
|
|
"fcm_dpo/delta": 0.07332435995340347,
|
|
"fcm_dpo/margin": 117.74930572509766,
|
|
"fcm_dpo/q_t": 0.34151118993759155,
|
|
"grad_norm": 47.059879302978516,
|
|
"learning_rate": 3.426989547989902e-07,
|
|
"logits/chosen": -0.14914287626743317,
|
|
"logits/rejected": -0.15093174576759338,
|
|
"logps/chosen": -200.8439483642578,
|
|
"logps/ref_chosen": -51.197994232177734,
|
|
"logps/ref_rejected": -97.22636413574219,
|
|
"logps/rejected": -364.62164306640625,
|
|
"loss": 0.904,
|
|
"margin_dpo/margin_mean": 117.74931335449219,
|
|
"margin_dpo/margin_std": 131.7711639404297,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.44493392070484583,
|
|
"fcm_dpo/beta": 0.006741734221577644,
|
|
"fcm_dpo/delta": 0.05900689214468002,
|
|
"fcm_dpo/margin": 118.02484893798828,
|
|
"fcm_dpo/q_t": 0.3473062813282013,
|
|
"grad_norm": 72.38304901123047,
|
|
"learning_rate": 3.4150589130555773e-07,
|
|
"logits/chosen": -0.1549404263496399,
|
|
"logits/rejected": -0.13192573189735413,
|
|
"logps/chosen": -220.18551635742188,
|
|
"logps/ref_chosen": -66.71394348144531,
|
|
"logps/ref_rejected": -86.94542694091797,
|
|
"logps/rejected": -358.44183349609375,
|
|
"loss": 0.9342,
|
|
"margin_dpo/margin_mean": 118.02484893798828,
|
|
"margin_dpo/margin_std": 147.0444793701172,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.44640234948604995,
|
|
"fcm_dpo/beta": 0.006808813661336899,
|
|
"fcm_dpo/delta": -0.0030750595033168793,
|
|
"fcm_dpo/margin": 125.19535827636719,
|
|
"fcm_dpo/q_t": 0.3163961172103882,
|
|
"grad_norm": 48.08484649658203,
|
|
"learning_rate": 3.403104165467883e-07,
|
|
"logits/chosen": -0.16453275084495544,
|
|
"logits/rejected": -0.14551252126693726,
|
|
"logps/chosen": -214.55648803710938,
|
|
"logps/ref_chosen": -71.95069885253906,
|
|
"logps/ref_rejected": -90.47203063964844,
|
|
"logps/rejected": -358.273193359375,
|
|
"loss": 0.811,
|
|
"margin_dpo/margin_mean": 125.19536590576172,
|
|
"margin_dpo/margin_std": 101.456298828125,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.447870778267254,
|
|
"fcm_dpo/beta": 0.006762784440070391,
|
|
"fcm_dpo/delta": 0.04657618701457977,
|
|
"fcm_dpo/margin": 119.18153381347656,
|
|
"fcm_dpo/q_t": 0.3422441780567169,
|
|
"grad_norm": 46.43743896484375,
|
|
"learning_rate": 3.391125620245535e-07,
|
|
"logits/chosen": -0.18260565400123596,
|
|
"logits/rejected": -0.15914301574230194,
|
|
"logps/chosen": -220.98190307617188,
|
|
"logps/ref_chosen": -66.79523468017578,
|
|
"logps/ref_rejected": -92.75459289550781,
|
|
"logps/rejected": -366.122802734375,
|
|
"loss": 0.9052,
|
|
"margin_dpo/margin_mean": 119.18153381347656,
|
|
"margin_dpo/margin_std": 139.260986328125,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.44933920704845814,
|
|
"fcm_dpo/beta": 0.006842019967734814,
|
|
"fcm_dpo/delta": 0.010429871268570423,
|
|
"fcm_dpo/margin": 122.83320617675781,
|
|
"fcm_dpo/q_t": 0.32789498567581177,
|
|
"grad_norm": 54.641883850097656,
|
|
"learning_rate": 3.3791235930343417e-07,
|
|
"logits/chosen": -0.14581012725830078,
|
|
"logits/rejected": -0.11724609136581421,
|
|
"logps/chosen": -221.95294189453125,
|
|
"logps/ref_chosen": -69.68389892578125,
|
|
"logps/ref_rejected": -85.15919494628906,
|
|
"logps/rejected": -360.2614440917969,
|
|
"loss": 0.8568,
|
|
"margin_dpo/margin_mean": 122.83321380615234,
|
|
"margin_dpo/margin_std": 124.98741149902344,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.45080763582966227,
|
|
"fcm_dpo/beta": 0.00686724903061986,
|
|
"fcm_dpo/delta": 0.04468066990375519,
|
|
"fcm_dpo/margin": 117.76041412353516,
|
|
"fcm_dpo/q_t": 0.3355296850204468,
|
|
"grad_norm": 43.83578109741211,
|
|
"learning_rate": 3.367098400098881e-07,
|
|
"logits/chosen": -0.12014691531658173,
|
|
"logits/rejected": -0.09900471568107605,
|
|
"logps/chosen": -214.15711975097656,
|
|
"logps/ref_chosen": -70.16542053222656,
|
|
"logps/ref_rejected": -86.97230529785156,
|
|
"logps/rejected": -348.7243957519531,
|
|
"loss": 0.9003,
|
|
"margin_dpo/margin_mean": 117.76041412353516,
|
|
"margin_dpo/margin_std": 131.61854553222656,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4522760646108664,
|
|
"fcm_dpo/beta": 0.00685135880485177,
|
|
"fcm_dpo/delta": -0.05890195071697235,
|
|
"fcm_dpo/margin": 131.8763885498047,
|
|
"fcm_dpo/q_t": 0.3151218891143799,
|
|
"grad_norm": 77.11981201171875,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": -0.07158000022172928,
|
|
"logits/rejected": -0.048265837132930756,
|
|
"logps/chosen": -199.24583435058594,
|
|
"logps/ref_chosen": -55.2449951171875,
|
|
"logps/ref_rejected": -79.37226104736328,
|
|
"logps/rejected": -355.2494812011719,
|
|
"loss": 0.824,
|
|
"margin_dpo/margin_mean": 131.8763885498047,
|
|
"margin_dpo/margin_std": 125.1200942993164,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.45374449339207046,
|
|
"fcm_dpo/beta": 0.006839036010205746,
|
|
"fcm_dpo/delta": -0.05211016535758972,
|
|
"fcm_dpo/margin": 131.0915069580078,
|
|
"fcm_dpo/q_t": 0.3178969919681549,
|
|
"grad_norm": 51.315860748291016,
|
|
"learning_rate": 3.3429797851573183e-07,
|
|
"logits/chosen": -0.036357562988996506,
|
|
"logits/rejected": -0.011226684786379337,
|
|
"logps/chosen": -194.12411499023438,
|
|
"logps/ref_chosen": -48.959083557128906,
|
|
"logps/ref_rejected": -82.34072875976562,
|
|
"logps/rejected": -358.5972900390625,
|
|
"loss": 0.8367,
|
|
"margin_dpo/margin_mean": 131.0915069580078,
|
|
"margin_dpo/margin_std": 127.46534729003906,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.4552129221732746,
|
|
"fcm_dpo/beta": 0.006814408116042614,
|
|
"fcm_dpo/delta": 0.012445982545614243,
|
|
"fcm_dpo/margin": 122.96669006347656,
|
|
"fcm_dpo/q_t": 0.321883887052536,
|
|
"grad_norm": 40.05405044555664,
|
|
"learning_rate": 3.3308869986991487e-07,
|
|
"logits/chosen": -0.0659695714712143,
|
|
"logits/rejected": -0.039075713604688644,
|
|
"logps/chosen": -229.92140197753906,
|
|
"logps/ref_chosen": -62.74177932739258,
|
|
"logps/ref_rejected": -79.9302978515625,
|
|
"logps/rejected": -370.07659912109375,
|
|
"loss": 0.8222,
|
|
"margin_dpo/margin_mean": 122.96668243408203,
|
|
"margin_dpo/margin_std": 104.34278106689453,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.4566813509544787,
|
|
"fcm_dpo/beta": 0.006636098027229309,
|
|
"fcm_dpo/delta": -0.12000095099210739,
|
|
"fcm_dpo/margin": 144.33042907714844,
|
|
"fcm_dpo/q_t": 0.31533536314964294,
|
|
"grad_norm": 39.31062698364258,
|
|
"learning_rate": 3.3187723175958346e-07,
|
|
"logits/chosen": -0.06826113164424896,
|
|
"logits/rejected": -0.042183272540569305,
|
|
"logps/chosen": -235.5238494873047,
|
|
"logps/ref_chosen": -53.02798080444336,
|
|
"logps/ref_rejected": -77.43820190429688,
|
|
"logps/rejected": -404.2644958496094,
|
|
"loss": 0.8325,
|
|
"margin_dpo/margin_mean": 144.33042907714844,
|
|
"margin_dpo/margin_std": 152.86956787109375,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.4581497797356828,
|
|
"fcm_dpo/beta": 0.006625012028962374,
|
|
"fcm_dpo/delta": -0.02862781658768654,
|
|
"fcm_dpo/margin": 132.19378662109375,
|
|
"fcm_dpo/q_t": 0.32340696454048157,
|
|
"grad_norm": 56.44401550292969,
|
|
"learning_rate": 3.306636061080487e-07,
|
|
"logits/chosen": 0.06051143631339073,
|
|
"logits/rejected": 0.07712406665086746,
|
|
"logps/chosen": -231.40371704101562,
|
|
"logps/ref_chosen": -49.39221954345703,
|
|
"logps/ref_rejected": -75.79280853271484,
|
|
"logps/rejected": -389.9980773925781,
|
|
"loss": 0.8657,
|
|
"margin_dpo/margin_mean": 132.19378662109375,
|
|
"margin_dpo/margin_std": 143.23089599609375,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.45961820851688695,
|
|
"fcm_dpo/beta": 0.006699627265334129,
|
|
"fcm_dpo/delta": 0.05388227105140686,
|
|
"fcm_dpo/margin": 119.13378143310547,
|
|
"fcm_dpo/q_t": 0.34248650074005127,
|
|
"grad_norm": 41.009490966796875,
|
|
"learning_rate": 3.2944785489547537e-07,
|
|
"logits/chosen": 0.02956494688987732,
|
|
"logits/rejected": 0.03531932458281517,
|
|
"logps/chosen": -225.14703369140625,
|
|
"logps/ref_chosen": -50.152740478515625,
|
|
"logps/ref_rejected": -86.40620422363281,
|
|
"logps/rejected": -380.5343017578125,
|
|
"loss": 0.9434,
|
|
"margin_dpo/margin_mean": 119.13378143310547,
|
|
"margin_dpo/margin_std": 149.03701782226562,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.461086637298091,
|
|
"fcm_dpo/beta": 0.006667881738394499,
|
|
"fcm_dpo/delta": 0.05602387711405754,
|
|
"fcm_dpo/margin": 119.68208312988281,
|
|
"fcm_dpo/q_t": 0.3448660671710968,
|
|
"grad_norm": 40.57001495361328,
|
|
"learning_rate": 3.2823001015803857e-07,
|
|
"logits/chosen": -0.01754029095172882,
|
|
"logits/rejected": -0.010302603244781494,
|
|
"logps/chosen": -255.24281311035156,
|
|
"logps/ref_chosen": -57.237579345703125,
|
|
"logps/ref_rejected": -97.5965347290039,
|
|
"logps/rejected": -415.2838439941406,
|
|
"loss": 0.9455,
|
|
"margin_dpo/margin_mean": 119.68208312988281,
|
|
"margin_dpo/margin_std": 152.344482421875,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.46255506607929514,
|
|
"fcm_dpo/beta": 0.006808501668274403,
|
|
"fcm_dpo/delta": 0.08605809509754181,
|
|
"fcm_dpo/margin": 113.18682861328125,
|
|
"fcm_dpo/q_t": 0.3449942171573639,
|
|
"grad_norm": 55.45918273925781,
|
|
"learning_rate": 3.270101039870797e-07,
|
|
"logits/chosen": 0.13534711301326752,
|
|
"logits/rejected": 0.13563254475593567,
|
|
"logps/chosen": -255.06546020507812,
|
|
"logps/ref_chosen": -49.06958770751953,
|
|
"logps/ref_rejected": -85.68087768554688,
|
|
"logps/rejected": -404.86358642578125,
|
|
"loss": 0.9118,
|
|
"margin_dpo/margin_mean": 113.18682861328125,
|
|
"margin_dpo/margin_std": 130.35165405273438,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.46402349486049926,
|
|
"fcm_dpo/beta": 0.006600411608815193,
|
|
"fcm_dpo/delta": -0.19463670253753662,
|
|
"fcm_dpo/margin": 154.8727569580078,
|
|
"fcm_dpo/q_t": 0.3011382222175598,
|
|
"grad_norm": 46.55292892456055,
|
|
"learning_rate": 3.2578816852826086e-07,
|
|
"logits/chosen": 0.06141500920057297,
|
|
"logits/rejected": 0.06320170313119888,
|
|
"logps/chosen": -257.7305908203125,
|
|
"logps/ref_chosen": -54.26074981689453,
|
|
"logps/ref_rejected": -101.2814712524414,
|
|
"logps/rejected": -459.6240539550781,
|
|
"loss": 0.7816,
|
|
"margin_dpo/margin_mean": 154.87277221679688,
|
|
"margin_dpo/margin_std": 149.25509643554688,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.4654919236417034,
|
|
"fcm_dpo/beta": 0.006331765092909336,
|
|
"fcm_dpo/delta": -0.276128351688385,
|
|
"fcm_dpo/margin": 173.1220703125,
|
|
"fcm_dpo/q_t": 0.27989763021469116,
|
|
"grad_norm": 38.22316360473633,
|
|
"learning_rate": 3.2456423598071783e-07,
|
|
"logits/chosen": 0.021085377782583237,
|
|
"logits/rejected": 0.03478589653968811,
|
|
"logps/chosen": -270.232666015625,
|
|
"logps/ref_chosen": -56.094207763671875,
|
|
"logps/ref_rejected": -100.69905090332031,
|
|
"logps/rejected": -487.9595947265625,
|
|
"loss": 0.726,
|
|
"margin_dpo/margin_mean": 173.1220703125,
|
|
"margin_dpo/margin_std": 143.30935668945312,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.4669603524229075,
|
|
"fcm_dpo/beta": 0.0061859143897891045,
|
|
"fcm_dpo/delta": -0.0931069478392601,
|
|
"fcm_dpo/margin": 151.012939453125,
|
|
"fcm_dpo/q_t": 0.30796754360198975,
|
|
"grad_norm": 49.025367736816406,
|
|
"learning_rate": 3.233383385962115e-07,
|
|
"logits/chosen": -0.04002639278769493,
|
|
"logits/rejected": -0.00955579336732626,
|
|
"logps/chosen": -275.85675048828125,
|
|
"logps/ref_chosen": -64.64569854736328,
|
|
"logps/ref_rejected": -82.76425170898438,
|
|
"logps/rejected": -444.98822021484375,
|
|
"loss": 0.801,
|
|
"margin_dpo/margin_mean": 151.012939453125,
|
|
"margin_dpo/margin_std": 136.198486328125,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.4684287812041116,
|
|
"fcm_dpo/beta": 0.006093823350965977,
|
|
"fcm_dpo/delta": -0.023943927139043808,
|
|
"fcm_dpo/margin": 143.07208251953125,
|
|
"fcm_dpo/q_t": 0.3199493885040283,
|
|
"grad_norm": 36.52496337890625,
|
|
"learning_rate": 3.2211050867827805e-07,
|
|
"logits/chosen": 0.015176716260612011,
|
|
"logits/rejected": 0.0014225305058062077,
|
|
"logps/chosen": -254.9478302001953,
|
|
"logps/ref_chosen": -49.383758544921875,
|
|
"logps/ref_rejected": -113.90650939941406,
|
|
"logps/rejected": -462.54266357421875,
|
|
"loss": 0.8406,
|
|
"margin_dpo/margin_mean": 143.07208251953125,
|
|
"margin_dpo/margin_std": 138.97381591796875,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4698972099853157,
|
|
"fcm_dpo/beta": 0.005952038802206516,
|
|
"fcm_dpo/delta": -0.11902501434087753,
|
|
"fcm_dpo/margin": 160.73431396484375,
|
|
"fcm_dpo/q_t": 0.30818331241607666,
|
|
"grad_norm": 37.61782455444336,
|
|
"learning_rate": 3.208807785813777e-07,
|
|
"logits/chosen": -0.004372608382254839,
|
|
"logits/rejected": -0.003199207130819559,
|
|
"logps/chosen": -259.04400634765625,
|
|
"logps/ref_chosen": -59.50489044189453,
|
|
"logps/ref_rejected": -97.66717529296875,
|
|
"logps/rejected": -457.94061279296875,
|
|
"loss": 0.7985,
|
|
"margin_dpo/margin_mean": 160.73431396484375,
|
|
"margin_dpo/margin_std": 153.39141845703125,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4713656387665198,
|
|
"fcm_dpo/beta": 0.005892496090382338,
|
|
"fcm_dpo/delta": 0.027914032340049744,
|
|
"fcm_dpo/margin": 139.42694091796875,
|
|
"fcm_dpo/q_t": 0.3354644179344177,
|
|
"grad_norm": 50.34774398803711,
|
|
"learning_rate": 3.1964918071004217e-07,
|
|
"logits/chosen": 0.11003893613815308,
|
|
"logits/rejected": 0.12183515727519989,
|
|
"logps/chosen": -296.3280029296875,
|
|
"logps/ref_chosen": -61.548683166503906,
|
|
"logps/ref_rejected": -91.64103698730469,
|
|
"logps/rejected": -465.8472900390625,
|
|
"loss": 0.9028,
|
|
"margin_dpo/margin_mean": 139.42694091796875,
|
|
"margin_dpo/margin_std": 153.59918212890625,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.47283406754772395,
|
|
"fcm_dpo/beta": 0.005919340066611767,
|
|
"fcm_dpo/delta": -0.02661764994263649,
|
|
"fcm_dpo/margin": 147.57391357421875,
|
|
"fcm_dpo/q_t": 0.3203071355819702,
|
|
"grad_norm": 56.95943069458008,
|
|
"learning_rate": 3.184157475180207e-07,
|
|
"logits/chosen": 0.08124169707298279,
|
|
"logits/rejected": 0.08749310672283173,
|
|
"logps/chosen": -270.20855712890625,
|
|
"logps/ref_chosen": -57.29003143310547,
|
|
"logps/ref_rejected": -95.74992370605469,
|
|
"logps/rejected": -456.24237060546875,
|
|
"loss": 0.841,
|
|
"margin_dpo/margin_mean": 147.5738983154297,
|
|
"margin_dpo/margin_std": 142.39199829101562,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.47430249632892807,
|
|
"fcm_dpo/beta": 0.005923746153712273,
|
|
"fcm_dpo/delta": 0.007014013826847076,
|
|
"fcm_dpo/margin": 142.34339904785156,
|
|
"fcm_dpo/q_t": 0.32728075981140137,
|
|
"grad_norm": 70.69721984863281,
|
|
"learning_rate": 3.171805115074251e-07,
|
|
"logits/chosen": 0.04329624027013779,
|
|
"logits/rejected": 0.046803638339042664,
|
|
"logps/chosen": -281.20391845703125,
|
|
"logps/ref_chosen": -51.23395919799805,
|
|
"logps/ref_rejected": -75.06192016601562,
|
|
"logps/rejected": -447.37530517578125,
|
|
"loss": 0.8642,
|
|
"margin_dpo/margin_mean": 142.34339904785156,
|
|
"margin_dpo/margin_std": 142.3323211669922,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.47577092511013214,
|
|
"fcm_dpo/beta": 0.00593388918787241,
|
|
"fcm_dpo/delta": -0.08985871076583862,
|
|
"fcm_dpo/margin": 156.79869079589844,
|
|
"fcm_dpo/q_t": 0.31534451246261597,
|
|
"grad_norm": 74.83609008789062,
|
|
"learning_rate": 3.1594350522787295e-07,
|
|
"logits/chosen": 0.022530967369675636,
|
|
"logits/rejected": 0.047397322952747345,
|
|
"logps/chosen": -317.4718933105469,
|
|
"logps/ref_chosen": -65.13516998291016,
|
|
"logps/ref_rejected": -86.47750854492188,
|
|
"logps/rejected": -495.6129150390625,
|
|
"loss": 0.8564,
|
|
"margin_dpo/margin_mean": 156.79869079589844,
|
|
"margin_dpo/margin_std": 162.0347900390625,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.47723935389133626,
|
|
"fcm_dpo/beta": 0.005914532113820314,
|
|
"fcm_dpo/delta": 0.08749772608280182,
|
|
"fcm_dpo/margin": 130.07130432128906,
|
|
"fcm_dpo/q_t": 0.340700626373291,
|
|
"grad_norm": 51.47658157348633,
|
|
"learning_rate": 3.147047612756302e-07,
|
|
"logits/chosen": -0.0764886736869812,
|
|
"logits/rejected": -0.04481898248195648,
|
|
"logps/chosen": -261.2364807128906,
|
|
"logps/ref_chosen": -56.215599060058594,
|
|
"logps/ref_rejected": -70.08592987060547,
|
|
"logps/rejected": -405.1781311035156,
|
|
"loss": 0.9054,
|
|
"margin_dpo/margin_mean": 130.07130432128906,
|
|
"margin_dpo/margin_std": 138.21401977539062,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4787077826725404,
|
|
"fcm_dpo/beta": 0.006035645958036184,
|
|
"fcm_dpo/delta": 0.10336153209209442,
|
|
"fcm_dpo/margin": 125.01016235351562,
|
|
"fcm_dpo/q_t": 0.34127509593963623,
|
|
"grad_norm": 60.38080978393555,
|
|
"learning_rate": 3.134643122927519e-07,
|
|
"logits/chosen": -0.05668081343173981,
|
|
"logits/rejected": -0.019624464213848114,
|
|
"logps/chosen": -288.8016357421875,
|
|
"logps/ref_chosen": -72.72496032714844,
|
|
"logps/ref_rejected": -79.8467788696289,
|
|
"logps/rejected": -420.93359375,
|
|
"loss": 0.8786,
|
|
"margin_dpo/margin_mean": 125.0101547241211,
|
|
"margin_dpo/margin_std": 122.0008773803711,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4801762114537445,
|
|
"fcm_dpo/beta": 0.0058260331861674786,
|
|
"fcm_dpo/delta": -0.22583037614822388,
|
|
"fcm_dpo/margin": 180.06192016601562,
|
|
"fcm_dpo/q_t": 0.2888588607311249,
|
|
"grad_norm": 49.314125061035156,
|
|
"learning_rate": 3.1222219096622264e-07,
|
|
"logits/chosen": -0.04947715997695923,
|
|
"logits/rejected": -0.02368859574198723,
|
|
"logps/chosen": -264.2978515625,
|
|
"logps/ref_chosen": -69.13441467285156,
|
|
"logps/ref_rejected": -111.93377685546875,
|
|
"logps/rejected": -487.15911865234375,
|
|
"loss": 0.7669,
|
|
"margin_dpo/margin_mean": 180.06192016601562,
|
|
"margin_dpo/margin_std": 160.97418212890625,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.48164464023494863,
|
|
"fcm_dpo/beta": 0.005800084210932255,
|
|
"fcm_dpo/delta": -0.018652496859431267,
|
|
"fcm_dpo/margin": 149.482177734375,
|
|
"fcm_dpo/q_t": 0.3247663974761963,
|
|
"grad_norm": 56.6755485534668,
|
|
"learning_rate": 3.1097843002709427e-07,
|
|
"logits/chosen": -0.004470609128475189,
|
|
"logits/rejected": 0.00039456598460674286,
|
|
"logps/chosen": -270.512451171875,
|
|
"logps/ref_chosen": -59.68719482421875,
|
|
"logps/ref_rejected": -90.85499572753906,
|
|
"logps/rejected": -451.16241455078125,
|
|
"loss": 0.8476,
|
|
"margin_dpo/margin_mean": 149.482177734375,
|
|
"margin_dpo/margin_std": 151.96820068359375,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4831130690161527,
|
|
"fcm_dpo/beta": 0.00566395279020071,
|
|
"fcm_dpo/delta": -0.09518752992153168,
|
|
"fcm_dpo/margin": 165.03314208984375,
|
|
"fcm_dpo/q_t": 0.31437113881111145,
|
|
"grad_norm": 45.39422607421875,
|
|
"learning_rate": 3.0973306224962437e-07,
|
|
"logits/chosen": 0.044825874269008636,
|
|
"logits/rejected": 0.059470441192388535,
|
|
"logps/chosen": -300.3798828125,
|
|
"logps/ref_chosen": -65.2461929321289,
|
|
"logps/ref_rejected": -100.69770812988281,
|
|
"logps/rejected": -500.8645324707031,
|
|
"loss": 0.8432,
|
|
"margin_dpo/margin_mean": 165.03314208984375,
|
|
"margin_dpo/margin_std": 168.14520263671875,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4845814977973568,
|
|
"fcm_dpo/beta": 0.005626134108752012,
|
|
"fcm_dpo/delta": -0.04790551960468292,
|
|
"fcm_dpo/margin": 158.8123321533203,
|
|
"fcm_dpo/q_t": 0.31789863109588623,
|
|
"grad_norm": 45.673545837402344,
|
|
"learning_rate": 3.084861204504122e-07,
|
|
"logits/chosen": 0.09089761972427368,
|
|
"logits/rejected": 0.08711989969015121,
|
|
"logps/chosen": -265.07464599609375,
|
|
"logps/ref_chosen": -46.998348236083984,
|
|
"logps/ref_rejected": -86.87684631347656,
|
|
"logps/rejected": -463.76544189453125,
|
|
"loss": 0.8397,
|
|
"margin_dpo/margin_mean": 158.8123321533203,
|
|
"margin_dpo/margin_std": 159.0528106689453,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.48604992657856094,
|
|
"fcm_dpo/beta": 0.005541889928281307,
|
|
"fcm_dpo/delta": -0.0821513682603836,
|
|
"fcm_dpo/margin": 166.78671264648438,
|
|
"fcm_dpo/q_t": 0.30144935846328735,
|
|
"grad_norm": 50.89043426513672,
|
|
"learning_rate": 3.072376374875335e-07,
|
|
"logits/chosen": 0.1645202934741974,
|
|
"logits/rejected": 0.18161094188690186,
|
|
"logps/chosen": -286.89776611328125,
|
|
"logps/ref_chosen": -50.52424621582031,
|
|
"logps/ref_rejected": -89.01544189453125,
|
|
"logps/rejected": -492.1756896972656,
|
|
"loss": 0.7535,
|
|
"margin_dpo/margin_mean": 166.78672790527344,
|
|
"margin_dpo/margin_std": 120.23434448242188,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.48751835535976507,
|
|
"fcm_dpo/beta": 0.005678609013557434,
|
|
"fcm_dpo/delta": 0.16144299507141113,
|
|
"fcm_dpo/margin": 123.15026092529297,
|
|
"fcm_dpo/q_t": 0.35300952196121216,
|
|
"grad_norm": 50.86631393432617,
|
|
"learning_rate": 3.059876462596758e-07,
|
|
"logits/chosen": 0.11794328689575195,
|
|
"logits/rejected": 0.1290109008550644,
|
|
"logps/chosen": -287.29925537109375,
|
|
"logps/ref_chosen": -49.18028259277344,
|
|
"logps/ref_rejected": -76.48515319824219,
|
|
"logps/rejected": -437.75439453125,
|
|
"loss": 0.9257,
|
|
"margin_dpo/margin_mean": 123.15026092529297,
|
|
"margin_dpo/margin_std": 133.58187866210938,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.4889867841409692,
|
|
"fcm_dpo/beta": 0.005601478740572929,
|
|
"fcm_dpo/delta": -0.06622818112373352,
|
|
"fcm_dpo/margin": 162.07449340820312,
|
|
"fcm_dpo/q_t": 0.321658194065094,
|
|
"grad_norm": 54.75845718383789,
|
|
"learning_rate": 3.0473617970527015e-07,
|
|
"logits/chosen": 0.036008186638355255,
|
|
"logits/rejected": 0.044931698590517044,
|
|
"logps/chosen": -301.5813903808594,
|
|
"logps/ref_chosen": -63.75574493408203,
|
|
"logps/ref_rejected": -95.04411315917969,
|
|
"logps/rejected": -494.9442443847656,
|
|
"loss": 0.8579,
|
|
"margin_dpo/margin_mean": 162.07449340820312,
|
|
"margin_dpo/margin_std": 169.41943359375,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.49045521292217326,
|
|
"fcm_dpo/beta": 0.00569639727473259,
|
|
"fcm_dpo/delta": 0.07207376509904861,
|
|
"fcm_dpo/margin": 137.55638122558594,
|
|
"fcm_dpo/q_t": 0.3387569487094879,
|
|
"grad_norm": 55.08982467651367,
|
|
"learning_rate": 3.034832708016243e-07,
|
|
"logits/chosen": -0.03774498403072357,
|
|
"logits/rejected": -0.032692015171051025,
|
|
"logps/chosen": -284.002685546875,
|
|
"logps/ref_chosen": -66.97975158691406,
|
|
"logps/ref_rejected": -95.31692504882812,
|
|
"logps/rejected": -449.896240234375,
|
|
"loss": 0.9061,
|
|
"margin_dpo/margin_mean": 137.55636596679688,
|
|
"margin_dpo/margin_std": 151.70651245117188,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.4919236417033774,
|
|
"fcm_dpo/beta": 0.00587943522259593,
|
|
"fcm_dpo/delta": 0.1511404812335968,
|
|
"fcm_dpo/margin": 120.45762634277344,
|
|
"fcm_dpo/q_t": 0.3560691773891449,
|
|
"grad_norm": 64.96064758300781,
|
|
"learning_rate": 3.022289525640531e-07,
|
|
"logits/chosen": 0.07412372529506683,
|
|
"logits/rejected": 0.09628117084503174,
|
|
"logps/chosen": -290.11248779296875,
|
|
"logps/ref_chosen": -62.54248046875,
|
|
"logps/ref_rejected": -87.61770629882812,
|
|
"logps/rejected": -435.64532470703125,
|
|
"loss": 0.9656,
|
|
"margin_dpo/margin_mean": 120.4576187133789,
|
|
"margin_dpo/margin_std": 151.005615234375,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.4933920704845815,
|
|
"fcm_dpo/beta": 0.005829405039548874,
|
|
"fcm_dpo/delta": -0.06654589623212814,
|
|
"fcm_dpo/margin": 156.17337036132812,
|
|
"fcm_dpo/q_t": 0.3145473599433899,
|
|
"grad_norm": 35.54839324951172,
|
|
"learning_rate": 3.009732580450086e-07,
|
|
"logits/chosen": 0.029192904010415077,
|
|
"logits/rejected": 0.03370156139135361,
|
|
"logps/chosen": -260.6049499511719,
|
|
"logps/ref_chosen": -54.53115463256836,
|
|
"logps/ref_rejected": -104.40424346923828,
|
|
"logps/rejected": -466.65142822265625,
|
|
"loss": 0.8382,
|
|
"margin_dpo/margin_mean": 156.17337036132812,
|
|
"margin_dpo/margin_std": 156.29110717773438,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.4948604992657856,
|
|
"fcm_dpo/beta": 0.0057077473029494286,
|
|
"fcm_dpo/delta": -0.0889483243227005,
|
|
"fcm_dpo/margin": 162.87271118164062,
|
|
"fcm_dpo/q_t": 0.3076227307319641,
|
|
"grad_norm": 49.6478385925293,
|
|
"learning_rate": 2.9971622033320914e-07,
|
|
"logits/chosen": -0.03223178908228874,
|
|
"logits/rejected": -0.024181274697184563,
|
|
"logps/chosen": -233.42138671875,
|
|
"logps/ref_chosen": -65.12869262695312,
|
|
"logps/ref_rejected": -101.72701263427734,
|
|
"logps/rejected": -432.89239501953125,
|
|
"loss": 0.7977,
|
|
"margin_dpo/margin_mean": 162.87271118164062,
|
|
"margin_dpo/margin_std": 144.59527587890625,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.49632892804698975,
|
|
"fcm_dpo/beta": 0.005662037990987301,
|
|
"fcm_dpo/delta": -0.05141569674015045,
|
|
"fcm_dpo/margin": 158.37158203125,
|
|
"fcm_dpo/q_t": 0.31238317489624023,
|
|
"grad_norm": 50.83082580566406,
|
|
"learning_rate": 2.984578725527675e-07,
|
|
"logits/chosen": -0.012493651360273361,
|
|
"logits/rejected": 0.010093292221426964,
|
|
"logps/chosen": -225.07749938964844,
|
|
"logps/ref_chosen": -58.422706604003906,
|
|
"logps/ref_rejected": -89.06854248046875,
|
|
"logps/rejected": -414.09490966796875,
|
|
"loss": 0.8022,
|
|
"margin_dpo/margin_mean": 158.37158203125,
|
|
"margin_dpo/margin_std": 136.95310974121094,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.4977973568281938,
|
|
"fcm_dpo/beta": 0.005685793701559305,
|
|
"fcm_dpo/delta": -0.012543223798274994,
|
|
"fcm_dpo/margin": 151.34521484375,
|
|
"fcm_dpo/q_t": 0.3188544511795044,
|
|
"grad_norm": 43.71391677856445,
|
|
"learning_rate": 2.9719824786231796e-07,
|
|
"logits/chosen": -0.0183430016040802,
|
|
"logits/rejected": -0.007623875513672829,
|
|
"logps/chosen": -240.96888732910156,
|
|
"logps/ref_chosen": -59.99531555175781,
|
|
"logps/ref_rejected": -103.9109115600586,
|
|
"logps/rejected": -436.2297058105469,
|
|
"loss": 0.8153,
|
|
"margin_dpo/margin_mean": 151.34521484375,
|
|
"margin_dpo/margin_std": 130.1288604736328,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.49926578560939794,
|
|
"fcm_dpo/beta": 0.0056188758462667465,
|
|
"fcm_dpo/delta": 0.01368454098701477,
|
|
"fcm_dpo/margin": 148.94017028808594,
|
|
"fcm_dpo/q_t": 0.3267067074775696,
|
|
"grad_norm": 61.01662063598633,
|
|
"learning_rate": 2.959373794541426e-07,
|
|
"logits/chosen": 0.02150595560669899,
|
|
"logits/rejected": 0.04815651848912239,
|
|
"logps/chosen": -254.2559356689453,
|
|
"logps/ref_chosen": -52.83022689819336,
|
|
"logps/ref_rejected": -73.10723114013672,
|
|
"logps/rejected": -423.4731140136719,
|
|
"loss": 0.856,
|
|
"margin_dpo/margin_mean": 148.940185546875,
|
|
"margin_dpo/margin_std": 147.6375732421875,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5007342143906021,
|
|
"fcm_dpo/beta": 0.0055615827441215515,
|
|
"fcm_dpo/delta": -0.07392589747905731,
|
|
"fcm_dpo/margin": 164.7185821533203,
|
|
"fcm_dpo/q_t": 0.3134787678718567,
|
|
"grad_norm": 54.293521881103516,
|
|
"learning_rate": 2.946753005532965e-07,
|
|
"logits/chosen": 0.03781311213970184,
|
|
"logits/rejected": 0.034403085708618164,
|
|
"logps/chosen": -265.510986328125,
|
|
"logps/ref_chosen": -47.899803161621094,
|
|
"logps/ref_rejected": -101.80987548828125,
|
|
"logps/rejected": -484.1396484375,
|
|
"loss": 0.8249,
|
|
"margin_dpo/margin_mean": 164.71856689453125,
|
|
"margin_dpo/margin_std": 157.4544677734375,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5022026431718062,
|
|
"fcm_dpo/beta": 0.00556070264428854,
|
|
"fcm_dpo/delta": -0.04328972473740578,
|
|
"fcm_dpo/margin": 159.92022705078125,
|
|
"fcm_dpo/q_t": 0.3194407820701599,
|
|
"grad_norm": 41.392662048339844,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": -0.04628030210733414,
|
|
"logits/rejected": -0.02110264264047146,
|
|
"logps/chosen": -283.7849426269531,
|
|
"logps/ref_chosen": -71.99664306640625,
|
|
"logps/ref_rejected": -92.58959197998047,
|
|
"logps/rejected": -464.2981262207031,
|
|
"loss": 0.8333,
|
|
"margin_dpo/margin_mean": 159.92022705078125,
|
|
"margin_dpo/margin_std": 158.2020263671875,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5036710719530103,
|
|
"fcm_dpo/beta": 0.005446239374577999,
|
|
"fcm_dpo/delta": -0.08976535499095917,
|
|
"fcm_dpo/margin": 170.99798583984375,
|
|
"fcm_dpo/q_t": 0.3034150302410126,
|
|
"grad_norm": 50.54157257080078,
|
|
"learning_rate": 2.9214764433242476e-07,
|
|
"logits/chosen": 0.11037549376487732,
|
|
"logits/rejected": 0.11976627260446548,
|
|
"logps/chosen": -277.7355041503906,
|
|
"logps/ref_chosen": -54.405616760253906,
|
|
"logps/ref_rejected": -111.04142761230469,
|
|
"logps/rejected": -505.3692626953125,
|
|
"loss": 0.7733,
|
|
"margin_dpo/margin_mean": 170.99798583984375,
|
|
"margin_dpo/margin_std": 136.3294219970703,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5051395007342144,
|
|
"fcm_dpo/beta": 0.005502544809132814,
|
|
"fcm_dpo/delta": 0.005939602851867676,
|
|
"fcm_dpo/margin": 152.88720703125,
|
|
"fcm_dpo/q_t": 0.3289456069469452,
|
|
"grad_norm": 63.03850173950195,
|
|
"learning_rate": 2.9088213361849126e-07,
|
|
"logits/chosen": 0.09415749460458755,
|
|
"logits/rejected": 0.09810211509466171,
|
|
"logps/chosen": -292.4935302734375,
|
|
"logps/ref_chosen": -53.96466827392578,
|
|
"logps/ref_rejected": -90.62336730957031,
|
|
"logps/rejected": -482.0394287109375,
|
|
"loss": 0.859,
|
|
"margin_dpo/margin_mean": 152.88720703125,
|
|
"margin_dpo/margin_std": 154.0537109375,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5066079295154186,
|
|
"fcm_dpo/beta": 0.005438483320176601,
|
|
"fcm_dpo/delta": -0.030558232218027115,
|
|
"fcm_dpo/margin": 161.34080505371094,
|
|
"fcm_dpo/q_t": 0.3203575611114502,
|
|
"grad_norm": 54.166324615478516,
|
|
"learning_rate": 2.896155456223163e-07,
|
|
"logits/chosen": 0.0032112393528223038,
|
|
"logits/rejected": 0.020324693992733955,
|
|
"logps/chosen": -325.7508544921875,
|
|
"logps/ref_chosen": -61.685699462890625,
|
|
"logps/ref_rejected": -99.49041748046875,
|
|
"logps/rejected": -524.8964233398438,
|
|
"loss": 0.8401,
|
|
"margin_dpo/margin_mean": 161.3408203125,
|
|
"margin_dpo/margin_std": 158.29058837890625,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5080763582966226,
|
|
"fcm_dpo/beta": 0.0054172007367014885,
|
|
"fcm_dpo/delta": 0.03349316492676735,
|
|
"fcm_dpo/margin": 151.2305145263672,
|
|
"fcm_dpo/q_t": 0.3277038335800171,
|
|
"grad_norm": 49.29082489013672,
|
|
"learning_rate": 2.883479137196714e-07,
|
|
"logits/chosen": 0.19713056087493896,
|
|
"logits/rejected": 0.2153664231300354,
|
|
"logps/chosen": -339.09173583984375,
|
|
"logps/ref_chosen": -55.256263732910156,
|
|
"logps/ref_rejected": -77.41532135009766,
|
|
"logps/rejected": -512.4812622070312,
|
|
"loss": 0.8599,
|
|
"margin_dpo/margin_mean": 151.2305145263672,
|
|
"margin_dpo/margin_std": 147.90621948242188,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5095447870778267,
|
|
"fcm_dpo/beta": 0.00541126262396574,
|
|
"fcm_dpo/delta": -0.013398189097642899,
|
|
"fcm_dpo/margin": 159.3125,
|
|
"fcm_dpo/q_t": 0.32410484552383423,
|
|
"grad_norm": 47.86494827270508,
|
|
"learning_rate": 2.8707927131383614e-07,
|
|
"logits/chosen": 0.14842018485069275,
|
|
"logits/rejected": 0.16279613971710205,
|
|
"logps/chosen": -331.56488037109375,
|
|
"logps/ref_chosen": -57.56623840332031,
|
|
"logps/ref_rejected": -92.35509490966797,
|
|
"logps/rejected": -525.6661987304688,
|
|
"loss": 0.8662,
|
|
"margin_dpo/margin_mean": 159.3125,
|
|
"margin_dpo/margin_std": 166.56910705566406,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5110132158590308,
|
|
"fcm_dpo/beta": 0.005429641343653202,
|
|
"fcm_dpo/delta": 0.07389134168624878,
|
|
"fcm_dpo/margin": 143.79965209960938,
|
|
"fcm_dpo/q_t": 0.3401951193809509,
|
|
"grad_norm": 47.6584587097168,
|
|
"learning_rate": 2.858096518347179e-07,
|
|
"logits/chosen": 0.09554639458656311,
|
|
"logits/rejected": 0.10207704454660416,
|
|
"logps/chosen": -311.5487060546875,
|
|
"logps/ref_chosen": -56.31770324707031,
|
|
"logps/ref_rejected": -89.13836669921875,
|
|
"logps/rejected": -488.16900634765625,
|
|
"loss": 0.8958,
|
|
"margin_dpo/margin_mean": 143.79965209960938,
|
|
"margin_dpo/margin_std": 152.26095581054688,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.5124816446402349,
|
|
"fcm_dpo/beta": 0.005589696578681469,
|
|
"fcm_dpo/delta": 0.036192864179611206,
|
|
"fcm_dpo/margin": 145.68653869628906,
|
|
"fcm_dpo/q_t": 0.3400714099407196,
|
|
"grad_norm": 44.34540939331055,
|
|
"learning_rate": 2.845390887379706e-07,
|
|
"logits/chosen": 0.1389956921339035,
|
|
"logits/rejected": 0.1430295705795288,
|
|
"logps/chosen": -307.2108154296875,
|
|
"logps/ref_chosen": -58.025516510009766,
|
|
"logps/ref_rejected": -97.50515747070312,
|
|
"logps/rejected": -492.3769836425781,
|
|
"loss": 0.9291,
|
|
"margin_dpo/margin_mean": 145.68653869628906,
|
|
"margin_dpo/margin_std": 177.18255615234375,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5139500734214391,
|
|
"fcm_dpo/beta": 0.005505557172000408,
|
|
"fcm_dpo/delta": 0.0005776733160018921,
|
|
"fcm_dpo/margin": 154.13563537597656,
|
|
"fcm_dpo/q_t": 0.32515567541122437,
|
|
"grad_norm": 74.10888671875,
|
|
"learning_rate": 2.8326761550411346e-07,
|
|
"logits/chosen": 0.05175316706299782,
|
|
"logits/rejected": 0.061344295740127563,
|
|
"logps/chosen": -309.2527770996094,
|
|
"logps/ref_chosen": -64.33049011230469,
|
|
"logps/ref_rejected": -89.87164306640625,
|
|
"logps/rejected": -488.9295654296875,
|
|
"loss": 0.869,
|
|
"margin_dpo/margin_mean": 154.13565063476562,
|
|
"margin_dpo/margin_std": 157.90634155273438,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5154185022026432,
|
|
"fcm_dpo/beta": 0.005446532741189003,
|
|
"fcm_dpo/delta": -0.13678933680057526,
|
|
"fcm_dpo/margin": 178.76748657226562,
|
|
"fcm_dpo/q_t": 0.305521160364151,
|
|
"grad_norm": 50.48676681518555,
|
|
"learning_rate": 2.819952656376487e-07,
|
|
"logits/chosen": -0.02746611461043358,
|
|
"logits/rejected": -0.01481974683701992,
|
|
"logps/chosen": -298.5670166015625,
|
|
"logps/ref_chosen": -60.6721305847168,
|
|
"logps/ref_rejected": -101.5654296875,
|
|
"logps/rejected": -518.227783203125,
|
|
"loss": 0.8216,
|
|
"margin_dpo/margin_mean": 178.76748657226562,
|
|
"margin_dpo/margin_std": 179.7300567626953,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5168869309838473,
|
|
"fcm_dpo/beta": 0.005514339543879032,
|
|
"fcm_dpo/delta": 0.19816651940345764,
|
|
"fcm_dpo/margin": 120.65501403808594,
|
|
"fcm_dpo/q_t": 0.36773788928985596,
|
|
"grad_norm": 75.69171905517578,
|
|
"learning_rate": 2.8072207266617854e-07,
|
|
"logits/chosen": 0.0787711963057518,
|
|
"logits/rejected": 0.11258897185325623,
|
|
"logps/chosen": -345.6042175292969,
|
|
"logps/ref_chosen": -70.9434585571289,
|
|
"logps/ref_rejected": -76.6419677734375,
|
|
"logps/rejected": -471.957763671875,
|
|
"loss": 1.0103,
|
|
"margin_dpo/margin_mean": 120.65501403808594,
|
|
"margin_dpo/margin_std": 167.7642364501953,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5183553597650514,
|
|
"fcm_dpo/beta": 0.005604561883956194,
|
|
"fcm_dpo/delta": 0.042658913880586624,
|
|
"fcm_dpo/margin": 144.52413940429688,
|
|
"fcm_dpo/q_t": 0.3393586277961731,
|
|
"grad_norm": 46.874942779541016,
|
|
"learning_rate": 2.794480701395219e-07,
|
|
"logits/chosen": 0.08481930941343307,
|
|
"logits/rejected": 0.10338996350765228,
|
|
"logps/chosen": -321.98089599609375,
|
|
"logps/ref_chosen": -58.39533996582031,
|
|
"logps/ref_rejected": -80.33553314208984,
|
|
"logps/rejected": -488.4452209472656,
|
|
"loss": 0.9148,
|
|
"margin_dpo/margin_mean": 144.52413940429688,
|
|
"margin_dpo/margin_std": 167.46084594726562,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5198237885462555,
|
|
"fcm_dpo/beta": 0.005544104613363743,
|
|
"fcm_dpo/delta": -0.1198074221611023,
|
|
"fcm_dpo/margin": 172.76959228515625,
|
|
"fcm_dpo/q_t": 0.29841989278793335,
|
|
"grad_norm": 36.41807556152344,
|
|
"learning_rate": 2.781732916288303e-07,
|
|
"logits/chosen": 0.0665828287601471,
|
|
"logits/rejected": 0.08835647255182266,
|
|
"logps/chosen": -291.6112365722656,
|
|
"logps/ref_chosen": -59.80299377441406,
|
|
"logps/ref_rejected": -88.75750732421875,
|
|
"logps/rejected": -493.3353271484375,
|
|
"loss": 0.7587,
|
|
"margin_dpo/margin_mean": 172.76959228515625,
|
|
"margin_dpo/margin_std": 136.2689208984375,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5212922173274597,
|
|
"fcm_dpo/beta": 0.005405407398939133,
|
|
"fcm_dpo/delta": -0.11095987260341644,
|
|
"fcm_dpo/margin": 175.61483764648438,
|
|
"fcm_dpo/q_t": 0.30244117975234985,
|
|
"grad_norm": 57.12250518798828,
|
|
"learning_rate": 2.7689777072570284e-07,
|
|
"logits/chosen": 0.06327119469642639,
|
|
"logits/rejected": 0.09310467541217804,
|
|
"logps/chosen": -288.7947692871094,
|
|
"logps/ref_chosen": -54.12849807739258,
|
|
"logps/ref_rejected": -82.40606689453125,
|
|
"logps/rejected": -492.68719482421875,
|
|
"loss": 0.7733,
|
|
"margin_dpo/margin_mean": 175.61483764648438,
|
|
"margin_dpo/margin_std": 145.6189422607422,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5227606461086637,
|
|
"fcm_dpo/beta": 0.0056666238233447075,
|
|
"fcm_dpo/delta": 0.23844945430755615,
|
|
"fcm_dpo/margin": 109.56277465820312,
|
|
"fcm_dpo/q_t": 0.38272541761398315,
|
|
"grad_norm": 58.59632110595703,
|
|
"learning_rate": 2.7562154104130176e-07,
|
|
"logits/chosen": 0.0763608068227768,
|
|
"logits/rejected": 0.09287572652101517,
|
|
"logps/chosen": -345.8939208984375,
|
|
"logps/ref_chosen": -64.6738052368164,
|
|
"logps/ref_rejected": -75.89926147460938,
|
|
"logps/rejected": -466.6821594238281,
|
|
"loss": 1.1128,
|
|
"margin_dpo/margin_mean": 109.56277465820312,
|
|
"margin_dpo/margin_std": 190.751708984375,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5242290748898678,
|
|
"fcm_dpo/beta": 0.005567336454987526,
|
|
"fcm_dpo/delta": -0.07430887222290039,
|
|
"fcm_dpo/margin": 164.54934692382812,
|
|
"fcm_dpo/q_t": 0.31005075573921204,
|
|
"grad_norm": 52.44282531738281,
|
|
"learning_rate": 2.7434463620546594e-07,
|
|
"logits/chosen": 0.09520265460014343,
|
|
"logits/rejected": 0.11559751629829407,
|
|
"logps/chosen": -318.9443359375,
|
|
"logps/ref_chosen": -52.725799560546875,
|
|
"logps/ref_rejected": -86.84115600585938,
|
|
"logps/rejected": -517.6090087890625,
|
|
"loss": 0.8028,
|
|
"margin_dpo/margin_mean": 164.54934692382812,
|
|
"margin_dpo/margin_std": 146.5643768310547,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5256975036710719,
|
|
"fcm_dpo/beta": 0.005649616941809654,
|
|
"fcm_dpo/delta": 0.07324951887130737,
|
|
"fcm_dpo/margin": 138.49087524414062,
|
|
"fcm_dpo/q_t": 0.34356987476348877,
|
|
"grad_norm": 37.71842956542969,
|
|
"learning_rate": 2.730670898658255e-07,
|
|
"logits/chosen": 0.0035172095522284508,
|
|
"logits/rejected": 0.023625530302524567,
|
|
"logps/chosen": -319.94830322265625,
|
|
"logps/ref_chosen": -63.20543670654297,
|
|
"logps/ref_rejected": -88.373291015625,
|
|
"logps/rejected": -483.60699462890625,
|
|
"loss": 0.9045,
|
|
"margin_dpo/margin_mean": 138.49087524414062,
|
|
"margin_dpo/margin_std": 159.35696411132812,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.527165932452276,
|
|
"fcm_dpo/beta": 0.005610806867480278,
|
|
"fcm_dpo/delta": -0.041874960064888,
|
|
"fcm_dpo/margin": 158.2200927734375,
|
|
"fcm_dpo/q_t": 0.3280462622642517,
|
|
"grad_norm": 69.10176086425781,
|
|
"learning_rate": 2.717889356869146e-07,
|
|
"logits/chosen": 0.1252935528755188,
|
|
"logits/rejected": 0.1476270705461502,
|
|
"logps/chosen": -356.76495361328125,
|
|
"logps/ref_chosen": -56.370216369628906,
|
|
"logps/ref_rejected": -82.17375183105469,
|
|
"logps/rejected": -540.78857421875,
|
|
"loss": 0.8751,
|
|
"margin_dpo/margin_mean": 158.2200927734375,
|
|
"margin_dpo/margin_std": 177.02398681640625,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.5286343612334802,
|
|
"fcm_dpo/beta": 0.005699576810002327,
|
|
"fcm_dpo/delta": 0.0714188739657402,
|
|
"fcm_dpo/margin": 137.50169372558594,
|
|
"fcm_dpo/q_t": 0.33583831787109375,
|
|
"grad_norm": 87.95269775390625,
|
|
"learning_rate": 2.7051020734928443e-07,
|
|
"logits/chosen": 0.1796594262123108,
|
|
"logits/rejected": 0.19369468092918396,
|
|
"logps/chosen": -336.75543212890625,
|
|
"logps/ref_chosen": -51.460384368896484,
|
|
"logps/ref_rejected": -69.83892059326172,
|
|
"logps/rejected": -492.6356506347656,
|
|
"loss": 0.8694,
|
|
"margin_dpo/margin_mean": 137.50169372558594,
|
|
"margin_dpo/margin_std": 132.57077026367188,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5301027900146843,
|
|
"fcm_dpo/beta": 0.005806830711662769,
|
|
"fcm_dpo/delta": 0.09413307905197144,
|
|
"fcm_dpo/margin": 131.26763916015625,
|
|
"fcm_dpo/q_t": 0.34591707587242126,
|
|
"grad_norm": 43.504676818847656,
|
|
"learning_rate": 2.6923093854861593e-07,
|
|
"logits/chosen": 0.10329671949148178,
|
|
"logits/rejected": 0.1067652627825737,
|
|
"logps/chosen": -333.27484130859375,
|
|
"logps/ref_chosen": -53.86951446533203,
|
|
"logps/ref_rejected": -90.7692642211914,
|
|
"logps/rejected": -501.4422607421875,
|
|
"loss": 0.9286,
|
|
"margin_dpo/margin_mean": 131.2676239013672,
|
|
"margin_dpo/margin_std": 154.85382080078125,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.5315712187958884,
|
|
"fcm_dpo/beta": 0.005605396814644337,
|
|
"fcm_dpo/delta": -0.21299204230308533,
|
|
"fcm_dpo/margin": 185.47732543945312,
|
|
"fcm_dpo/q_t": 0.28802040219306946,
|
|
"grad_norm": 42.840492248535156,
|
|
"learning_rate": 2.679511629948319e-07,
|
|
"logits/chosen": 0.09067463874816895,
|
|
"logits/rejected": 0.08635248243808746,
|
|
"logps/chosen": -302.639404296875,
|
|
"logps/ref_chosen": -58.639060974121094,
|
|
"logps/ref_rejected": -105.58195495605469,
|
|
"logps/rejected": -535.0595703125,
|
|
"loss": 0.7337,
|
|
"margin_dpo/margin_mean": 185.47732543945312,
|
|
"margin_dpo/margin_std": 153.58267211914062,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5330396475770925,
|
|
"fcm_dpo/beta": 0.005374937318265438,
|
|
"fcm_dpo/delta": -0.2472880780696869,
|
|
"fcm_dpo/margin": 199.2676239013672,
|
|
"fcm_dpo/q_t": 0.28192996978759766,
|
|
"grad_norm": 39.75864028930664,
|
|
"learning_rate": 2.6667091441120816e-07,
|
|
"logits/chosen": 0.1431807279586792,
|
|
"logits/rejected": 0.1652299463748932,
|
|
"logps/chosen": -266.96466064453125,
|
|
"logps/ref_chosen": -44.558380126953125,
|
|
"logps/ref_rejected": -74.69496154785156,
|
|
"logps/rejected": -496.3688659667969,
|
|
"loss": 0.73,
|
|
"margin_dpo/margin_mean": 199.26763916015625,
|
|
"margin_dpo/margin_std": 164.5372772216797,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5345080763582967,
|
|
"fcm_dpo/beta": 0.005400581285357475,
|
|
"fcm_dpo/delta": 0.07649517804384232,
|
|
"fcm_dpo/margin": 144.00555419921875,
|
|
"fcm_dpo/q_t": 0.33972108364105225,
|
|
"grad_norm": 45.31873321533203,
|
|
"learning_rate": 2.6539022653348575e-07,
|
|
"logits/chosen": 0.071674644947052,
|
|
"logits/rejected": 0.07084225863218307,
|
|
"logps/chosen": -291.6654052734375,
|
|
"logps/ref_chosen": -48.894622802734375,
|
|
"logps/ref_rejected": -91.395751953125,
|
|
"logps/rejected": -478.17205810546875,
|
|
"loss": 0.9032,
|
|
"margin_dpo/margin_mean": 144.00555419921875,
|
|
"margin_dpo/margin_std": 157.2249755859375,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5359765051395007,
|
|
"fcm_dpo/beta": 0.005438569001853466,
|
|
"fcm_dpo/delta": 0.050673648715019226,
|
|
"fcm_dpo/margin": 147.65203857421875,
|
|
"fcm_dpo/q_t": 0.3388054668903351,
|
|
"grad_norm": 49.04033279418945,
|
|
"learning_rate": 2.641091331089811e-07,
|
|
"logits/chosen": 0.05171611160039902,
|
|
"logits/rejected": 0.04313598573207855,
|
|
"logps/chosen": -280.9577331542969,
|
|
"logps/ref_chosen": -51.49274444580078,
|
|
"logps/ref_rejected": -92.70166778564453,
|
|
"logps/rejected": -469.8186950683594,
|
|
"loss": 0.8784,
|
|
"margin_dpo/margin_mean": 147.65203857421875,
|
|
"margin_dpo/margin_std": 159.82666015625,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5374449339207048,
|
|
"fcm_dpo/beta": 0.0054289670661091805,
|
|
"fcm_dpo/delta": 0.009159095585346222,
|
|
"fcm_dpo/margin": 155.0063018798828,
|
|
"fcm_dpo/q_t": 0.3273472189903259,
|
|
"grad_norm": 38.92621994018555,
|
|
"learning_rate": 2.6282766789569736e-07,
|
|
"logits/chosen": 0.14211732149124146,
|
|
"logits/rejected": 0.14219552278518677,
|
|
"logps/chosen": -267.73809814453125,
|
|
"logps/ref_chosen": -44.7205696105957,
|
|
"logps/ref_rejected": -83.31040954589844,
|
|
"logps/rejected": -461.3342590332031,
|
|
"loss": 0.8501,
|
|
"margin_dpo/margin_mean": 155.00631713867188,
|
|
"margin_dpo/margin_std": 154.13015747070312,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5389133627019089,
|
|
"fcm_dpo/beta": 0.005595469381660223,
|
|
"fcm_dpo/delta": 0.18162932991981506,
|
|
"fcm_dpo/margin": 121.64521789550781,
|
|
"fcm_dpo/q_t": 0.355856716632843,
|
|
"grad_norm": 38.51764678955078,
|
|
"learning_rate": 2.615458646614349e-07,
|
|
"logits/chosen": 0.06456591933965683,
|
|
"logits/rejected": 0.0903320163488388,
|
|
"logps/chosen": -279.4564208984375,
|
|
"logps/ref_chosen": -58.405418395996094,
|
|
"logps/ref_rejected": -76.75132751464844,
|
|
"logps/rejected": -419.447509765625,
|
|
"loss": 0.9315,
|
|
"margin_dpo/margin_mean": 121.64521789550781,
|
|
"margin_dpo/margin_std": 129.7523956298828,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.540381791483113,
|
|
"fcm_dpo/beta": 0.005565423984080553,
|
|
"fcm_dpo/delta": -0.12588441371917725,
|
|
"fcm_dpo/margin": 173.19337463378906,
|
|
"fcm_dpo/q_t": 0.2921599745750427,
|
|
"grad_norm": 65.85102081298828,
|
|
"learning_rate": 2.6026375718290083e-07,
|
|
"logits/chosen": 0.03500935807824135,
|
|
"logits/rejected": 0.04446982964873314,
|
|
"logps/chosen": -250.3231964111328,
|
|
"logps/ref_chosen": -44.452518463134766,
|
|
"logps/ref_rejected": -98.55526733398438,
|
|
"logps/rejected": -477.61932373046875,
|
|
"loss": 0.7302,
|
|
"margin_dpo/margin_mean": 173.19337463378906,
|
|
"margin_dpo/margin_std": 117.90557861328125,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5418502202643172,
|
|
"fcm_dpo/beta": 0.005647559650242329,
|
|
"fcm_dpo/delta": 0.13559746742248535,
|
|
"fcm_dpo/margin": 128.01370239257812,
|
|
"fcm_dpo/q_t": 0.3497123420238495,
|
|
"grad_norm": 69.40166473388672,
|
|
"learning_rate": 2.589813792448196e-07,
|
|
"logits/chosen": 0.015510164201259613,
|
|
"logits/rejected": 0.03292276710271835,
|
|
"logps/chosen": -309.7448425292969,
|
|
"logps/ref_chosen": -71.38150024414062,
|
|
"logps/ref_rejected": -91.29582214355469,
|
|
"logps/rejected": -457.6728515625,
|
|
"loss": 0.9356,
|
|
"margin_dpo/margin_mean": 128.01370239257812,
|
|
"margin_dpo/margin_std": 145.52603149414062,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5433186490455213,
|
|
"fcm_dpo/beta": 0.005865996703505516,
|
|
"fcm_dpo/delta": 0.22589412331581116,
|
|
"fcm_dpo/margin": 108.72503662109375,
|
|
"fcm_dpo/q_t": 0.36666351556777954,
|
|
"grad_norm": 48.482791900634766,
|
|
"learning_rate": 2.5769876463904263e-07,
|
|
"logits/chosen": 0.0964912623167038,
|
|
"logits/rejected": 0.10018481314182281,
|
|
"logps/chosen": -338.19866943359375,
|
|
"logps/ref_chosen": -71.60749816894531,
|
|
"logps/ref_rejected": -97.25978088378906,
|
|
"logps/rejected": -472.57598876953125,
|
|
"loss": 0.9842,
|
|
"margin_dpo/margin_mean": 108.72503662109375,
|
|
"margin_dpo/margin_std": 135.08151245117188,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5447870778267254,
|
|
"fcm_dpo/beta": 0.005965778138488531,
|
|
"fcm_dpo/delta": 0.03497808426618576,
|
|
"fcm_dpo/margin": 137.05462646484375,
|
|
"fcm_dpo/q_t": 0.3335404396057129,
|
|
"grad_norm": 47.60246658325195,
|
|
"learning_rate": 2.5641594716365744e-07,
|
|
"logits/chosen": 0.08590900897979736,
|
|
"logits/rejected": 0.10128141194581985,
|
|
"logps/chosen": -339.6130065917969,
|
|
"logps/ref_chosen": -69.41448974609375,
|
|
"logps/ref_rejected": -99.17217254638672,
|
|
"logps/rejected": -506.42529296875,
|
|
"loss": 0.8969,
|
|
"margin_dpo/margin_mean": 137.05462646484375,
|
|
"margin_dpo/margin_std": 152.06521606445312,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.5462555066079295,
|
|
"fcm_dpo/beta": 0.005722453817725182,
|
|
"fcm_dpo/delta": -0.25100451707839966,
|
|
"fcm_dpo/margin": 187.50184631347656,
|
|
"fcm_dpo/q_t": 0.29311686754226685,
|
|
"grad_norm": 39.004390716552734,
|
|
"learning_rate": 2.551329606220976e-07,
|
|
"logits/chosen": 0.16508790850639343,
|
|
"logits/rejected": 0.20037144422531128,
|
|
"logps/chosen": -342.9236755371094,
|
|
"logps/ref_chosen": -61.8179931640625,
|
|
"logps/ref_rejected": -78.53948974609375,
|
|
"logps/rejected": -547.1470336914062,
|
|
"loss": 0.775,
|
|
"margin_dpo/margin_mean": 187.50184631347656,
|
|
"margin_dpo/margin_std": 180.50228881835938,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.5477239353891337,
|
|
"fcm_dpo/beta": 0.005600764416158199,
|
|
"fcm_dpo/delta": -0.13974212110042572,
|
|
"fcm_dpo/margin": 174.2254180908203,
|
|
"fcm_dpo/q_t": 0.3003678619861603,
|
|
"grad_norm": 49.6671257019043,
|
|
"learning_rate": 2.538498388222517e-07,
|
|
"logits/chosen": 0.20778214931488037,
|
|
"logits/rejected": 0.24689488112926483,
|
|
"logps/chosen": -361.0768127441406,
|
|
"logps/ref_chosen": -64.21713256835938,
|
|
"logps/ref_rejected": -85.95960998535156,
|
|
"logps/rejected": -557.044677734375,
|
|
"loss": 0.792,
|
|
"margin_dpo/margin_mean": 174.22540283203125,
|
|
"margin_dpo/margin_std": 154.6001434326172,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5491923641703378,
|
|
"fcm_dpo/beta": 0.005461122840642929,
|
|
"fcm_dpo/delta": -0.006661958992481232,
|
|
"fcm_dpo/margin": 156.51187133789062,
|
|
"fcm_dpo/q_t": 0.3341420888900757,
|
|
"grad_norm": 39.634254455566406,
|
|
"learning_rate": 2.525666155755725e-07,
|
|
"logits/chosen": 0.042114850133657455,
|
|
"logits/rejected": 0.07254066318273544,
|
|
"logps/chosen": -347.50177001953125,
|
|
"logps/ref_chosen": -70.65018463134766,
|
|
"logps/ref_rejected": -93.64016723632812,
|
|
"logps/rejected": -527.0036010742188,
|
|
"loss": 0.8785,
|
|
"margin_dpo/margin_mean": 156.51187133789062,
|
|
"margin_dpo/margin_std": 173.5503692626953,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5506607929515418,
|
|
"fcm_dpo/beta": 0.005447746254503727,
|
|
"fcm_dpo/delta": 0.027071624994277954,
|
|
"fcm_dpo/margin": 150.794677734375,
|
|
"fcm_dpo/q_t": 0.33665597438812256,
|
|
"grad_norm": 71.83676147460938,
|
|
"learning_rate": 2.512833246961859e-07,
|
|
"logits/chosen": 0.09779814630746841,
|
|
"logits/rejected": 0.10543471574783325,
|
|
"logps/chosen": -369.1651611328125,
|
|
"logps/ref_chosen": -60.080223083496094,
|
|
"logps/ref_rejected": -88.93830871582031,
|
|
"logps/rejected": -548.81787109375,
|
|
"loss": 0.9248,
|
|
"margin_dpo/margin_mean": 150.794677734375,
|
|
"margin_dpo/margin_std": 173.79681396484375,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5521292217327459,
|
|
"fcm_dpo/beta": 0.005424397066235542,
|
|
"fcm_dpo/delta": -0.13509052991867065,
|
|
"fcm_dpo/margin": 179.1793670654297,
|
|
"fcm_dpo/q_t": 0.3073885142803192,
|
|
"grad_norm": 41.7952880859375,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.09893445670604706,
|
|
"logits/rejected": 0.11599358916282654,
|
|
"logps/chosen": -363.12640380859375,
|
|
"logps/ref_chosen": -62.660308837890625,
|
|
"logps/ref_rejected": -105.52660369873047,
|
|
"logps/rejected": -585.1720581054688,
|
|
"loss": 0.8182,
|
|
"margin_dpo/margin_mean": 179.1793670654297,
|
|
"margin_dpo/margin_std": 180.31005859375,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.55359765051395,
|
|
"fcm_dpo/beta": 0.005333360284566879,
|
|
"fcm_dpo/delta": -0.07344111055135727,
|
|
"fcm_dpo/margin": 171.89007568359375,
|
|
"fcm_dpo/q_t": 0.3136495351791382,
|
|
"grad_norm": 46.397911071777344,
|
|
"learning_rate": 2.487166753038141e-07,
|
|
"logits/chosen": 0.19385088980197906,
|
|
"logits/rejected": 0.20475682616233826,
|
|
"logps/chosen": -379.18011474609375,
|
|
"logps/ref_chosen": -54.478736877441406,
|
|
"logps/ref_rejected": -98.70335388183594,
|
|
"logps/rejected": -595.2947998046875,
|
|
"loss": 0.8312,
|
|
"margin_dpo/margin_mean": 171.89007568359375,
|
|
"margin_dpo/margin_std": 168.72779846191406,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5550660792951542,
|
|
"fcm_dpo/beta": 0.005202522035688162,
|
|
"fcm_dpo/delta": -0.10398915410041809,
|
|
"fcm_dpo/margin": 181.31509399414062,
|
|
"fcm_dpo/q_t": 0.30345821380615234,
|
|
"grad_norm": 75.51764678955078,
|
|
"learning_rate": 2.4743338442442754e-07,
|
|
"logits/chosen": 0.3088299334049225,
|
|
"logits/rejected": 0.31884270906448364,
|
|
"logps/chosen": -373.27655029296875,
|
|
"logps/ref_chosen": -45.02053451538086,
|
|
"logps/ref_rejected": -88.0469741821289,
|
|
"logps/rejected": -597.6180419921875,
|
|
"loss": 0.7906,
|
|
"margin_dpo/margin_mean": 181.31509399414062,
|
|
"margin_dpo/margin_std": 157.98651123046875,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5565345080763583,
|
|
"fcm_dpo/beta": 0.005089180544018745,
|
|
"fcm_dpo/delta": -0.08599420636892319,
|
|
"fcm_dpo/margin": 181.9398651123047,
|
|
"fcm_dpo/q_t": 0.31637871265411377,
|
|
"grad_norm": 59.954917907714844,
|
|
"learning_rate": 2.461501611777483e-07,
|
|
"logits/chosen": 0.23598089814186096,
|
|
"logits/rejected": 0.2249039113521576,
|
|
"logps/chosen": -385.88226318359375,
|
|
"logps/ref_chosen": -53.182098388671875,
|
|
"logps/ref_rejected": -114.3001708984375,
|
|
"logps/rejected": -628.940185546875,
|
|
"loss": 0.8413,
|
|
"margin_dpo/margin_mean": 181.93988037109375,
|
|
"margin_dpo/margin_std": 186.93975830078125,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5580029368575624,
|
|
"fcm_dpo/beta": 0.005020034499466419,
|
|
"fcm_dpo/delta": -0.168114572763443,
|
|
"fcm_dpo/margin": 199.48626708984375,
|
|
"fcm_dpo/q_t": 0.298037052154541,
|
|
"grad_norm": 41.02925491333008,
|
|
"learning_rate": 2.4486703937790243e-07,
|
|
"logits/chosen": 0.17431041598320007,
|
|
"logits/rejected": 0.15981586277484894,
|
|
"logps/chosen": -365.4420471191406,
|
|
"logps/ref_chosen": -51.3530387878418,
|
|
"logps/ref_rejected": -104.19169616699219,
|
|
"logps/rejected": -617.7669677734375,
|
|
"loss": 0.8056,
|
|
"margin_dpo/margin_mean": 199.48626708984375,
|
|
"margin_dpo/margin_std": 189.1451873779297,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5594713656387665,
|
|
"fcm_dpo/beta": 0.004953205585479736,
|
|
"fcm_dpo/delta": 0.021930061280727386,
|
|
"fcm_dpo/margin": 167.54666137695312,
|
|
"fcm_dpo/q_t": 0.3372423052787781,
|
|
"grad_norm": 67.10491180419922,
|
|
"learning_rate": 2.435840528363426e-07,
|
|
"logits/chosen": 0.19572949409484863,
|
|
"logits/rejected": 0.21503984928131104,
|
|
"logps/chosen": -375.8944091796875,
|
|
"logps/ref_chosen": -57.80306625366211,
|
|
"logps/ref_rejected": -79.21940612792969,
|
|
"logps/rejected": -564.857421875,
|
|
"loss": 0.9236,
|
|
"margin_dpo/margin_mean": 167.54666137695312,
|
|
"margin_dpo/margin_std": 204.34890747070312,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5609397944199707,
|
|
"fcm_dpo/beta": 0.004994013346731663,
|
|
"fcm_dpo/delta": 0.05573827028274536,
|
|
"fcm_dpo/margin": 159.93917846679688,
|
|
"fcm_dpo/q_t": 0.3334108591079712,
|
|
"grad_norm": 50.57058334350586,
|
|
"learning_rate": 2.4230123536095745e-07,
|
|
"logits/chosen": 0.12441153824329376,
|
|
"logits/rejected": 0.13224869966506958,
|
|
"logps/chosen": -371.9028625488281,
|
|
"logps/ref_chosen": -66.02030181884766,
|
|
"logps/ref_rejected": -110.71016693115234,
|
|
"logps/rejected": -576.5319213867188,
|
|
"loss": 0.8654,
|
|
"margin_dpo/margin_mean": 159.93917846679688,
|
|
"margin_dpo/margin_std": 155.31463623046875,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5624082232011748,
|
|
"fcm_dpo/beta": 0.004986027255654335,
|
|
"fcm_dpo/delta": -0.02076072059571743,
|
|
"fcm_dpo/margin": 174.21910095214844,
|
|
"fcm_dpo/q_t": 0.3251308500766754,
|
|
"grad_norm": 40.4781494140625,
|
|
"learning_rate": 2.4101862075518037e-07,
|
|
"logits/chosen": 0.18596576154232025,
|
|
"logits/rejected": 0.1823549121618271,
|
|
"logps/chosen": -351.5233154296875,
|
|
"logps/ref_chosen": -50.39148712158203,
|
|
"logps/ref_rejected": -93.71589660644531,
|
|
"logps/rejected": -569.06689453125,
|
|
"loss": 0.8845,
|
|
"margin_dpo/margin_mean": 174.2191162109375,
|
|
"margin_dpo/margin_std": 194.69053649902344,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5638766519823789,
|
|
"fcm_dpo/beta": 0.005112666171044111,
|
|
"fcm_dpo/delta": 0.09205277264118195,
|
|
"fcm_dpo/margin": 149.1619873046875,
|
|
"fcm_dpo/q_t": 0.3353465795516968,
|
|
"grad_norm": 38.69187927246094,
|
|
"learning_rate": 2.397362428170992e-07,
|
|
"logits/chosen": 0.04655902460217476,
|
|
"logits/rejected": 0.05491742864251137,
|
|
"logps/chosen": -335.37225341796875,
|
|
"logps/ref_chosen": -52.046104431152344,
|
|
"logps/ref_rejected": -85.76089477539062,
|
|
"logps/rejected": -518.2490234375,
|
|
"loss": 0.8667,
|
|
"margin_dpo/margin_mean": 149.1619873046875,
|
|
"margin_dpo/margin_std": 133.10096740722656,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.5653450807635829,
|
|
"fcm_dpo/beta": 0.005061786156147718,
|
|
"fcm_dpo/delta": -0.047479018568992615,
|
|
"fcm_dpo/margin": 176.4716796875,
|
|
"fcm_dpo/q_t": 0.30865800380706787,
|
|
"grad_norm": 36.45566177368164,
|
|
"learning_rate": 2.3845413533856514e-07,
|
|
"logits/chosen": 0.12907695770263672,
|
|
"logits/rejected": 0.16897349059581757,
|
|
"logps/chosen": -322.817626953125,
|
|
"logps/ref_chosen": -65.55215454101562,
|
|
"logps/ref_rejected": -77.82792663574219,
|
|
"logps/rejected": -511.5650634765625,
|
|
"loss": 0.785,
|
|
"margin_dpo/margin_mean": 176.47166442871094,
|
|
"margin_dpo/margin_std": 139.1245574951172,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.566813509544787,
|
|
"fcm_dpo/beta": 0.005007788073271513,
|
|
"fcm_dpo/delta": -0.07901398092508316,
|
|
"fcm_dpo/margin": 184.04327392578125,
|
|
"fcm_dpo/q_t": 0.3119524121284485,
|
|
"grad_norm": 47.43567657470703,
|
|
"learning_rate": 2.3717233210430254e-07,
|
|
"logits/chosen": 0.04530417546629906,
|
|
"logits/rejected": 0.06010700762271881,
|
|
"logps/chosen": -312.85601806640625,
|
|
"logps/ref_chosen": -58.22185516357422,
|
|
"logps/ref_rejected": -92.32742309570312,
|
|
"logps/rejected": -531.0048828125,
|
|
"loss": 0.8184,
|
|
"margin_dpo/margin_mean": 184.04327392578125,
|
|
"margin_dpo/margin_std": 175.82864379882812,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5682819383259912,
|
|
"fcm_dpo/beta": 0.005064930766820908,
|
|
"fcm_dpo/delta": 0.16430482268333435,
|
|
"fcm_dpo/margin": 137.70285034179688,
|
|
"fcm_dpo/q_t": 0.34966596961021423,
|
|
"grad_norm": 46.66494369506836,
|
|
"learning_rate": 2.3589086689101889e-07,
|
|
"logits/chosen": 0.02450702153146267,
|
|
"logits/rejected": 0.05837446078658104,
|
|
"logps/chosen": -343.4558410644531,
|
|
"logps/ref_chosen": -66.41944885253906,
|
|
"logps/ref_rejected": -92.16915893554688,
|
|
"logps/rejected": -506.90838623046875,
|
|
"loss": 0.9043,
|
|
"margin_dpo/margin_mean": 137.70285034179688,
|
|
"margin_dpo/margin_std": 132.16102600097656,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5697503671071953,
|
|
"fcm_dpo/beta": 0.005017186980694532,
|
|
"fcm_dpo/delta": -0.10396112501621246,
|
|
"fcm_dpo/margin": 187.8830108642578,
|
|
"fcm_dpo/q_t": 0.30914634466171265,
|
|
"grad_norm": 44.683433532714844,
|
|
"learning_rate": 2.3460977346651428e-07,
|
|
"logits/chosen": 0.11675406992435455,
|
|
"logits/rejected": 0.10898333787918091,
|
|
"logps/chosen": -318.9471740722656,
|
|
"logps/ref_chosen": -50.129459381103516,
|
|
"logps/ref_rejected": -104.43305969238281,
|
|
"logps/rejected": -561.1337890625,
|
|
"loss": 0.8084,
|
|
"margin_dpo/margin_mean": 187.8830108642578,
|
|
"margin_dpo/margin_std": 178.2823486328125,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5712187958883994,
|
|
"fcm_dpo/beta": 0.0049585141241550446,
|
|
"fcm_dpo/delta": -0.07406939566135406,
|
|
"fcm_dpo/margin": 184.94287109375,
|
|
"fcm_dpo/q_t": 0.3153570890426636,
|
|
"grad_norm": 38.42765808105469,
|
|
"learning_rate": 2.3332908558879177e-07,
|
|
"logits/chosen": 0.13295167684555054,
|
|
"logits/rejected": 0.1595715433359146,
|
|
"logps/chosen": -336.7831726074219,
|
|
"logps/ref_chosen": -57.906593322753906,
|
|
"logps/ref_rejected": -77.91454315185547,
|
|
"logps/rejected": -541.7340087890625,
|
|
"loss": 0.8249,
|
|
"margin_dpo/margin_mean": 184.94285583496094,
|
|
"margin_dpo/margin_std": 180.59262084960938,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5726872246696035,
|
|
"fcm_dpo/beta": 0.004955414682626724,
|
|
"fcm_dpo/delta": 0.04264906048774719,
|
|
"fcm_dpo/margin": 163.55552673339844,
|
|
"fcm_dpo/q_t": 0.3355684280395508,
|
|
"grad_norm": 64.1507339477539,
|
|
"learning_rate": 2.320488370051681e-07,
|
|
"logits/chosen": 0.10355956852436066,
|
|
"logits/rejected": 0.11937595903873444,
|
|
"logps/chosen": -331.3822326660156,
|
|
"logps/ref_chosen": -49.22591781616211,
|
|
"logps/ref_rejected": -85.5281982421875,
|
|
"logps/rejected": -531.239990234375,
|
|
"loss": 0.9176,
|
|
"margin_dpo/margin_mean": 163.55551147460938,
|
|
"margin_dpo/margin_std": 192.38583374023438,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5741556534508077,
|
|
"fcm_dpo/beta": 0.005186218768358231,
|
|
"fcm_dpo/delta": 0.2533876895904541,
|
|
"fcm_dpo/margin": 118.00922393798828,
|
|
"fcm_dpo/q_t": 0.3784768879413605,
|
|
"grad_norm": 48.2935905456543,
|
|
"learning_rate": 2.3076906145138405e-07,
|
|
"logits/chosen": 0.08619362860918045,
|
|
"logits/rejected": 0.08983877301216125,
|
|
"logps/chosen": -342.7335205078125,
|
|
"logps/ref_chosen": -64.32965087890625,
|
|
"logps/ref_rejected": -86.73820495605469,
|
|
"logps/rejected": -483.15130615234375,
|
|
"loss": 1.0202,
|
|
"margin_dpo/margin_mean": 118.00923156738281,
|
|
"margin_dpo/margin_std": 167.4725341796875,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5756240822320118,
|
|
"fcm_dpo/beta": 0.005176296457648277,
|
|
"fcm_dpo/delta": -0.12520991265773773,
|
|
"fcm_dpo/margin": 186.098388671875,
|
|
"fcm_dpo/q_t": 0.3001660406589508,
|
|
"grad_norm": 46.61800003051758,
|
|
"learning_rate": 2.294897926507156e-07,
|
|
"logits/chosen": 0.054711125791072845,
|
|
"logits/rejected": 0.06594612449407578,
|
|
"logps/chosen": -293.49102783203125,
|
|
"logps/ref_chosen": -53.50397872924805,
|
|
"logps/ref_rejected": -102.34584045410156,
|
|
"logps/rejected": -528.4312744140625,
|
|
"loss": 0.7649,
|
|
"margin_dpo/margin_mean": 186.098388671875,
|
|
"margin_dpo/margin_std": 153.24871826171875,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5770925110132159,
|
|
"fcm_dpo/beta": 0.005192288197577,
|
|
"fcm_dpo/delta": 0.10276921838521957,
|
|
"fcm_dpo/margin": 145.42605590820312,
|
|
"fcm_dpo/q_t": 0.35600078105926514,
|
|
"grad_norm": 46.10240936279297,
|
|
"learning_rate": 2.2821106431308543e-07,
|
|
"logits/chosen": 0.13289561867713928,
|
|
"logits/rejected": 0.14119097590446472,
|
|
"logps/chosen": -310.223388671875,
|
|
"logps/ref_chosen": -46.473915100097656,
|
|
"logps/ref_rejected": -71.96885681152344,
|
|
"logps/rejected": -481.1444091796875,
|
|
"loss": 0.9668,
|
|
"margin_dpo/margin_mean": 145.42605590820312,
|
|
"margin_dpo/margin_std": 197.1509552001953,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.57856093979442,
|
|
"fcm_dpo/beta": 0.00520074088126421,
|
|
"fcm_dpo/delta": -0.019946470856666565,
|
|
"fcm_dpo/margin": 166.911865234375,
|
|
"fcm_dpo/q_t": 0.3283039927482605,
|
|
"grad_norm": 37.210731506347656,
|
|
"learning_rate": 2.2693291013417452e-07,
|
|
"logits/chosen": 0.10127435624599457,
|
|
"logits/rejected": 0.10776053369045258,
|
|
"logps/chosen": -332.1357421875,
|
|
"logps/ref_chosen": -52.91154861450195,
|
|
"logps/ref_rejected": -90.8226318359375,
|
|
"logps/rejected": -536.958740234375,
|
|
"loss": 0.8841,
|
|
"margin_dpo/margin_mean": 166.911865234375,
|
|
"margin_dpo/margin_std": 186.99267578125,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.580029368575624,
|
|
"fcm_dpo/beta": 0.005109791178256273,
|
|
"fcm_dpo/delta": -0.09205502271652222,
|
|
"fcm_dpo/margin": 182.4871826171875,
|
|
"fcm_dpo/q_t": 0.3203425407409668,
|
|
"grad_norm": 76.74991607666016,
|
|
"learning_rate": 2.2565536379453404e-07,
|
|
"logits/chosen": 0.12516123056411743,
|
|
"logits/rejected": 0.13688474893569946,
|
|
"logps/chosen": -341.91693115234375,
|
|
"logps/ref_chosen": -62.546112060546875,
|
|
"logps/ref_rejected": -83.78262329101562,
|
|
"logps/rejected": -545.640625,
|
|
"loss": 0.8685,
|
|
"margin_dpo/margin_mean": 182.4871826171875,
|
|
"margin_dpo/margin_std": 205.46337890625,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5814977973568282,
|
|
"fcm_dpo/beta": 0.00510101392865181,
|
|
"fcm_dpo/delta": 0.0003374125808477402,
|
|
"fcm_dpo/margin": 166.55291748046875,
|
|
"fcm_dpo/q_t": 0.32344305515289307,
|
|
"grad_norm": 42.463134765625,
|
|
"learning_rate": 2.2437845895869825e-07,
|
|
"logits/chosen": 0.07252983748912811,
|
|
"logits/rejected": 0.09801161289215088,
|
|
"logps/chosen": -347.77801513671875,
|
|
"logps/ref_chosen": -68.99594116210938,
|
|
"logps/ref_rejected": -88.64665985107422,
|
|
"logps/rejected": -533.981689453125,
|
|
"loss": 0.8382,
|
|
"margin_dpo/margin_mean": 166.55291748046875,
|
|
"margin_dpo/margin_std": 159.85546875,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.5829662261380323,
|
|
"fcm_dpo/beta": 0.004994560033082962,
|
|
"fcm_dpo/delta": -0.10879573971033096,
|
|
"fcm_dpo/margin": 189.552734375,
|
|
"fcm_dpo/q_t": 0.30495208501815796,
|
|
"grad_norm": 57.0134391784668,
|
|
"learning_rate": 2.2310222927429716e-07,
|
|
"logits/chosen": 0.1951860934495926,
|
|
"logits/rejected": 0.20993033051490784,
|
|
"logps/chosen": -337.2643737792969,
|
|
"logps/ref_chosen": -61.27716827392578,
|
|
"logps/ref_rejected": -103.11612701416016,
|
|
"logps/rejected": -568.656005859375,
|
|
"loss": 0.7919,
|
|
"margin_dpo/margin_mean": 189.552734375,
|
|
"margin_dpo/margin_std": 167.1475372314453,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.5844346549192364,
|
|
"fcm_dpo/beta": 0.0049682073295116425,
|
|
"fcm_dpo/delta": -0.03307997062802315,
|
|
"fcm_dpo/margin": 177.1311798095703,
|
|
"fcm_dpo/q_t": 0.3241208791732788,
|
|
"grad_norm": 74.6159896850586,
|
|
"learning_rate": 2.2182670837116972e-07,
|
|
"logits/chosen": 0.004022831097245216,
|
|
"logits/rejected": 0.014983203262090683,
|
|
"logps/chosen": -374.131103515625,
|
|
"logps/ref_chosen": -68.15155029296875,
|
|
"logps/ref_rejected": -108.52360534667969,
|
|
"logps/rejected": -591.6343994140625,
|
|
"loss": 0.853,
|
|
"margin_dpo/margin_mean": 177.13116455078125,
|
|
"margin_dpo/margin_std": 186.1813201904297,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.5859030837004405,
|
|
"fcm_dpo/beta": 0.004931504372507334,
|
|
"fcm_dpo/delta": -0.030671503394842148,
|
|
"fcm_dpo/margin": 177.97613525390625,
|
|
"fcm_dpo/q_t": 0.3222649097442627,
|
|
"grad_norm": 54.23942947387695,
|
|
"learning_rate": 2.2055192986047804e-07,
|
|
"logits/chosen": 0.11953572928905487,
|
|
"logits/rejected": 0.16603019833564758,
|
|
"logps/chosen": -341.5679931640625,
|
|
"logps/ref_chosen": -60.889801025390625,
|
|
"logps/ref_rejected": -77.965576171875,
|
|
"logps/rejected": -536.619873046875,
|
|
"loss": 0.8604,
|
|
"margin_dpo/margin_mean": 177.97613525390625,
|
|
"margin_dpo/margin_std": 184.6171112060547,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"fcm_dpo/beta": 0.004671948961913586,
|
|
"fcm_dpo/delta": -0.326324462890625,
|
|
"fcm_dpo/margin": 243.12725830078125,
|
|
"fcm_dpo/q_t": 0.27325916290283203,
|
|
"grad_norm": 30.489665985107422,
|
|
"learning_rate": 2.192779273338215e-07,
|
|
"logits/chosen": 0.15955214202404022,
|
|
"logits/rejected": 0.17605489492416382,
|
|
"logps/chosen": -354.65338134765625,
|
|
"logps/ref_chosen": -63.64359664916992,
|
|
"logps/ref_rejected": -105.252685546875,
|
|
"logps/rejected": -639.3897094726562,
|
|
"loss": 0.7153,
|
|
"margin_dpo/margin_mean": 243.12725830078125,
|
|
"margin_dpo/margin_std": 202.89080810546875,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"eval_fcm_dpo/beta": 0.0046274252235889435,
|
|
"eval_logits/chosen": 0.12993372976779938,
|
|
"eval_logits/rejected": 0.15252064168453217,
|
|
"eval_logps/chosen": -440.1346435546875,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -563.7314453125,
|
|
"eval_loss": 0.544421374797821,
|
|
"eval_margin_dpo/margin_mean": 115.849853515625,
|
|
"eval_margin_dpo/margin_std": 187.16053771972656,
|
|
"eval_runtime": 39.2673,
|
|
"eval_samples_per_second": 59.566,
|
|
"eval_steps_per_second": 1.885,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5888399412628488,
|
|
"fcm_dpo/beta": 0.004744245205074549,
|
|
"fcm_dpo/delta": 0.1801946461200714,
|
|
"fcm_dpo/margin": 143.82565307617188,
|
|
"fcm_dpo/q_t": 0.3702501058578491,
|
|
"grad_norm": 62.11568069458008,
|
|
"learning_rate": 2.1800473436235136e-07,
|
|
"logits/chosen": 0.17814716696739197,
|
|
"logits/rejected": 0.18520209193229675,
|
|
"logps/chosen": -379.033447265625,
|
|
"logps/ref_chosen": -57.16303253173828,
|
|
"logps/ref_rejected": -83.79249572753906,
|
|
"logps/rejected": -549.488525390625,
|
|
"loss": 1.0428,
|
|
"margin_dpo/margin_mean": 143.8256378173828,
|
|
"margin_dpo/margin_std": 222.36459350585938,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.5903083700440529,
|
|
"fcm_dpo/beta": 0.004563698545098305,
|
|
"fcm_dpo/delta": -0.3387846052646637,
|
|
"fcm_dpo/margin": 251.93418884277344,
|
|
"fcm_dpo/q_t": 0.2641059160232544,
|
|
"grad_norm": 33.36614990234375,
|
|
"learning_rate": 2.1673238449588665e-07,
|
|
"logits/chosen": 0.15705952048301697,
|
|
"logits/rejected": 0.17883501946926117,
|
|
"logps/chosen": -324.14556884765625,
|
|
"logps/ref_chosen": -50.74037170410156,
|
|
"logps/ref_rejected": -81.0460433959961,
|
|
"logps/rejected": -606.385498046875,
|
|
"loss": 0.6684,
|
|
"margin_dpo/margin_mean": 251.9342041015625,
|
|
"margin_dpo/margin_std": 182.95848083496094,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.591776798825257,
|
|
"fcm_dpo/beta": 0.0044688875786960125,
|
|
"fcm_dpo/delta": 0.0052043236792087555,
|
|
"fcm_dpo/margin": 189.0760498046875,
|
|
"fcm_dpo/q_t": 0.3240288197994232,
|
|
"grad_norm": 42.3422966003418,
|
|
"learning_rate": 2.154609112620295e-07,
|
|
"logits/chosen": 0.2680968940258026,
|
|
"logits/rejected": 0.27872592210769653,
|
|
"logps/chosen": -372.03997802734375,
|
|
"logps/ref_chosen": -47.14731216430664,
|
|
"logps/ref_rejected": -77.2666015625,
|
|
"logps/rejected": -591.2353515625,
|
|
"loss": 0.8459,
|
|
"margin_dpo/margin_mean": 189.0760498046875,
|
|
"margin_dpo/margin_std": 177.94534301757812,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.593245227606461,
|
|
"fcm_dpo/beta": 0.004463577643036842,
|
|
"fcm_dpo/delta": -0.029174722731113434,
|
|
"fcm_dpo/margin": 196.38226318359375,
|
|
"fcm_dpo/q_t": 0.32246384024620056,
|
|
"grad_norm": 48.92930603027344,
|
|
"learning_rate": 2.1419034816528218e-07,
|
|
"logits/chosen": 0.3021692931652069,
|
|
"logits/rejected": 0.3231460452079773,
|
|
"logps/chosen": -387.607666015625,
|
|
"logps/ref_chosen": -47.875274658203125,
|
|
"logps/ref_rejected": -77.15499877929688,
|
|
"logps/rejected": -613.2696533203125,
|
|
"loss": 0.8722,
|
|
"margin_dpo/margin_mean": 196.38226318359375,
|
|
"margin_dpo/margin_std": 209.30274963378906,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.5947136563876652,
|
|
"fcm_dpo/beta": 0.0045960224233567715,
|
|
"fcm_dpo/delta": 0.11465553939342499,
|
|
"fcm_dpo/margin": 160.8705596923828,
|
|
"fcm_dpo/q_t": 0.3535424470901489,
|
|
"grad_norm": 59.457088470458984,
|
|
"learning_rate": 2.129207286861638e-07,
|
|
"logits/chosen": 0.16498160362243652,
|
|
"logits/rejected": 0.17874008417129517,
|
|
"logps/chosen": -429.4807434082031,
|
|
"logps/ref_chosen": -65.16290283203125,
|
|
"logps/ref_rejected": -87.18678283691406,
|
|
"logps/rejected": -612.375244140625,
|
|
"loss": 0.9583,
|
|
"margin_dpo/margin_mean": 160.8705596923828,
|
|
"margin_dpo/margin_std": 197.54196166992188,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.5961820851688693,
|
|
"fcm_dpo/beta": 0.004600249230861664,
|
|
"fcm_dpo/delta": -0.017502881586551666,
|
|
"fcm_dpo/margin": 187.85006713867188,
|
|
"fcm_dpo/q_t": 0.3287266790866852,
|
|
"grad_norm": 47.70918655395508,
|
|
"learning_rate": 2.1165208628032861e-07,
|
|
"logits/chosen": 0.23692449927330017,
|
|
"logits/rejected": 0.25910329818725586,
|
|
"logps/chosen": -408.302001953125,
|
|
"logps/ref_chosen": -49.740814208984375,
|
|
"logps/ref_rejected": -92.07862854003906,
|
|
"logps/rejected": -638.4898681640625,
|
|
"loss": 0.8766,
|
|
"margin_dpo/margin_mean": 187.85006713867188,
|
|
"margin_dpo/margin_std": 203.72189331054688,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.5976505139500734,
|
|
"fcm_dpo/beta": 0.004692500457167625,
|
|
"fcm_dpo/delta": 0.1510598063468933,
|
|
"fcm_dpo/margin": 150.65615844726562,
|
|
"fcm_dpo/q_t": 0.3553357720375061,
|
|
"grad_norm": 61.100006103515625,
|
|
"learning_rate": 2.1038445437768375e-07,
|
|
"logits/chosen": 0.3105884790420532,
|
|
"logits/rejected": 0.3434818387031555,
|
|
"logps/chosen": -443.04608154296875,
|
|
"logps/ref_chosen": -56.33069610595703,
|
|
"logps/ref_rejected": -77.51209259033203,
|
|
"logps/rejected": -614.8836669921875,
|
|
"loss": 0.979,
|
|
"margin_dpo/margin_mean": 150.65615844726562,
|
|
"margin_dpo/margin_std": 192.8316650390625,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.5991189427312775,
|
|
"fcm_dpo/beta": 0.00478686299175024,
|
|
"fcm_dpo/delta": 0.14895686507225037,
|
|
"fcm_dpo/margin": 148.69833374023438,
|
|
"fcm_dpo/q_t": 0.3522757291793823,
|
|
"grad_norm": 55.596675872802734,
|
|
"learning_rate": 2.0911786638150872e-07,
|
|
"logits/chosen": 0.2758781313896179,
|
|
"logits/rejected": 0.3151041269302368,
|
|
"logps/chosen": -463.2239990234375,
|
|
"logps/ref_chosen": -69.789306640625,
|
|
"logps/ref_rejected": -90.09693908691406,
|
|
"logps/rejected": -632.2299194335938,
|
|
"loss": 0.9397,
|
|
"margin_dpo/margin_mean": 148.69833374023438,
|
|
"margin_dpo/margin_std": 172.4322967529297,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.6005873715124816,
|
|
"fcm_dpo/beta": 0.004894952289760113,
|
|
"fcm_dpo/delta": 0.08498449623584747,
|
|
"fcm_dpo/margin": 157.61297607421875,
|
|
"fcm_dpo/q_t": 0.3446779251098633,
|
|
"grad_norm": 94.63024139404297,
|
|
"learning_rate": 2.0785235566757517e-07,
|
|
"logits/chosen": 0.1406092494726181,
|
|
"logits/rejected": 0.15482883155345917,
|
|
"logps/chosen": -447.18670654296875,
|
|
"logps/ref_chosen": -67.31744384765625,
|
|
"logps/ref_rejected": -84.904296875,
|
|
"logps/rejected": -622.386474609375,
|
|
"loss": 0.907,
|
|
"margin_dpo/margin_mean": 157.61297607421875,
|
|
"margin_dpo/margin_std": 178.1060333251953,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6020558002936858,
|
|
"fcm_dpo/beta": 0.004944339860230684,
|
|
"fcm_dpo/delta": 0.0544782392680645,
|
|
"fcm_dpo/margin": 161.7826690673828,
|
|
"fcm_dpo/q_t": 0.3345832824707031,
|
|
"grad_norm": 47.36720657348633,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": 0.228413388133049,
|
|
"logits/rejected": 0.23824666440486908,
|
|
"logps/chosen": -425.039306640625,
|
|
"logps/ref_chosen": -51.465354919433594,
|
|
"logps/ref_rejected": -83.198974609375,
|
|
"logps/rejected": -618.5556030273438,
|
|
"loss": 0.9026,
|
|
"margin_dpo/margin_mean": 161.78265380859375,
|
|
"margin_dpo/margin_std": 179.0279541015625,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6035242290748899,
|
|
"fcm_dpo/beta": 0.004880721680819988,
|
|
"fcm_dpo/delta": -0.02468109130859375,
|
|
"fcm_dpo/margin": 177.94451904296875,
|
|
"fcm_dpo/q_t": 0.32889145612716675,
|
|
"grad_norm": 57.21861267089844,
|
|
"learning_rate": 2.0532469944670343e-07,
|
|
"logits/chosen": 0.35708197951316833,
|
|
"logits/rejected": 0.3739537000656128,
|
|
"logps/chosen": -432.9054870605469,
|
|
"logps/ref_chosen": -52.30727005004883,
|
|
"logps/ref_rejected": -80.69495391845703,
|
|
"logps/rejected": -639.2376708984375,
|
|
"loss": 0.8709,
|
|
"margin_dpo/margin_mean": 177.9445037841797,
|
|
"margin_dpo/margin_std": 190.1000213623047,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.604992657856094,
|
|
"fcm_dpo/beta": 0.004977867007255554,
|
|
"fcm_dpo/delta": 0.04423771798610687,
|
|
"fcm_dpo/margin": 162.58218383789062,
|
|
"fcm_dpo/q_t": 0.335659384727478,
|
|
"grad_norm": 44.3292236328125,
|
|
"learning_rate": 2.0406262054585738e-07,
|
|
"logits/chosen": 0.23362818360328674,
|
|
"logits/rejected": 0.22613993287086487,
|
|
"logps/chosen": -432.71868896484375,
|
|
"logps/ref_chosen": -53.144126892089844,
|
|
"logps/ref_rejected": -100.0608139038086,
|
|
"logps/rejected": -642.2175903320312,
|
|
"loss": 0.9013,
|
|
"margin_dpo/margin_mean": 162.5821990966797,
|
|
"margin_dpo/margin_std": 181.373779296875,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6064610866372981,
|
|
"fcm_dpo/beta": 0.005048343911767006,
|
|
"fcm_dpo/delta": 0.06283044815063477,
|
|
"fcm_dpo/margin": 156.84176635742188,
|
|
"fcm_dpo/q_t": 0.3339266777038574,
|
|
"grad_norm": 74.36189270019531,
|
|
"learning_rate": 2.0280175213768205e-07,
|
|
"logits/chosen": 0.10526064038276672,
|
|
"logits/rejected": 0.12061142921447754,
|
|
"logps/chosen": -434.7674255371094,
|
|
"logps/ref_chosen": -61.58196258544922,
|
|
"logps/ref_rejected": -99.47340393066406,
|
|
"logps/rejected": -629.5006103515625,
|
|
"loss": 0.8827,
|
|
"margin_dpo/margin_mean": 156.84176635742188,
|
|
"margin_dpo/margin_std": 160.830322265625,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6079295154185022,
|
|
"fcm_dpo/beta": 0.0049922592006623745,
|
|
"fcm_dpo/delta": -0.13861587643623352,
|
|
"fcm_dpo/margin": 195.24468994140625,
|
|
"fcm_dpo/q_t": 0.2993001341819763,
|
|
"grad_norm": 50.09534454345703,
|
|
"learning_rate": 2.0154212744723247e-07,
|
|
"logits/chosen": 0.2176135778427124,
|
|
"logits/rejected": 0.23482248187065125,
|
|
"logps/chosen": -374.04327392578125,
|
|
"logps/ref_chosen": -46.63148498535156,
|
|
"logps/ref_rejected": -87.64653015136719,
|
|
"logps/rejected": -610.302978515625,
|
|
"loss": 0.7763,
|
|
"margin_dpo/margin_mean": 195.24468994140625,
|
|
"margin_dpo/margin_std": 165.15785217285156,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6093979441997063,
|
|
"fcm_dpo/beta": 0.004968525841832161,
|
|
"fcm_dpo/delta": 0.11446189880371094,
|
|
"fcm_dpo/margin": 149.75445556640625,
|
|
"fcm_dpo/q_t": 0.34472692012786865,
|
|
"grad_norm": 49.72021484375,
|
|
"learning_rate": 2.002837796667909e-07,
|
|
"logits/chosen": 0.085420161485672,
|
|
"logits/rejected": 0.1004190742969513,
|
|
"logps/chosen": -415.4371337890625,
|
|
"logps/ref_chosen": -78.6182861328125,
|
|
"logps/ref_rejected": -100.47752380371094,
|
|
"logps/rejected": -587.05078125,
|
|
"loss": 0.914,
|
|
"margin_dpo/margin_mean": 149.75445556640625,
|
|
"margin_dpo/margin_std": 160.1766357421875,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6108663729809104,
|
|
"fcm_dpo/beta": 0.004934309050440788,
|
|
"fcm_dpo/delta": -0.17341846227645874,
|
|
"fcm_dpo/margin": 203.90164184570312,
|
|
"fcm_dpo/q_t": 0.28993839025497437,
|
|
"grad_norm": 51.852142333984375,
|
|
"learning_rate": 1.990267419549914e-07,
|
|
"logits/chosen": 0.10152135044336319,
|
|
"logits/rejected": 0.1114257276058197,
|
|
"logps/chosen": -357.18414306640625,
|
|
"logps/ref_chosen": -58.27912521362305,
|
|
"logps/ref_rejected": -90.56871795654297,
|
|
"logps/rejected": -593.3753662109375,
|
|
"loss": 0.7466,
|
|
"margin_dpo/margin_mean": 203.90164184570312,
|
|
"margin_dpo/margin_std": 158.02981567382812,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6123348017621145,
|
|
"fcm_dpo/beta": 0.004816145170480013,
|
|
"fcm_dpo/delta": -0.07927640527486801,
|
|
"fcm_dpo/margin": 191.4312744140625,
|
|
"fcm_dpo/q_t": 0.3030052185058594,
|
|
"grad_norm": 42.43336486816406,
|
|
"learning_rate": 1.9777104743594686e-07,
|
|
"logits/chosen": 0.23988962173461914,
|
|
"logits/rejected": 0.2814761996269226,
|
|
"logps/chosen": -353.5125732421875,
|
|
"logps/ref_chosen": -50.1987190246582,
|
|
"logps/ref_rejected": -68.15184020996094,
|
|
"logps/rejected": -562.89697265625,
|
|
"loss": 0.7679,
|
|
"margin_dpo/margin_mean": 191.4312744140625,
|
|
"margin_dpo/margin_std": 146.1686553955078,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6138032305433186,
|
|
"fcm_dpo/beta": 0.004821361042559147,
|
|
"fcm_dpo/delta": -0.0207485631108284,
|
|
"fcm_dpo/margin": 179.69488525390625,
|
|
"fcm_dpo/q_t": 0.32698148488998413,
|
|
"grad_norm": 37.9013786315918,
|
|
"learning_rate": 1.965167291983757e-07,
|
|
"logits/chosen": 0.0310177244246006,
|
|
"logits/rejected": 0.06632434576749802,
|
|
"logps/chosen": -372.91217041015625,
|
|
"logps/ref_chosen": -81.97846984863281,
|
|
"logps/ref_rejected": -104.69148254394531,
|
|
"logps/rejected": -575.320068359375,
|
|
"loss": 0.8727,
|
|
"margin_dpo/margin_mean": 179.69488525390625,
|
|
"margin_dpo/margin_std": 190.57337951660156,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6152716593245228,
|
|
"fcm_dpo/beta": 0.004708138294517994,
|
|
"fcm_dpo/delta": -0.051440898329019547,
|
|
"fcm_dpo/margin": 190.4224090576172,
|
|
"fcm_dpo/q_t": 0.3137531876564026,
|
|
"grad_norm": 40.84404373168945,
|
|
"learning_rate": 1.9526382029472988e-07,
|
|
"logits/chosen": -0.034985870122909546,
|
|
"logits/rejected": -0.023904774338006973,
|
|
"logps/chosen": -312.14996337890625,
|
|
"logps/ref_chosen": -52.948646545410156,
|
|
"logps/ref_rejected": -91.58309936523438,
|
|
"logps/rejected": -541.2068481445312,
|
|
"loss": 0.8186,
|
|
"margin_dpo/margin_mean": 190.4224090576172,
|
|
"margin_dpo/margin_std": 174.2207794189453,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6167400881057269,
|
|
"fcm_dpo/beta": 0.004817042499780655,
|
|
"fcm_dpo/delta": 0.1355183720588684,
|
|
"fcm_dpo/margin": 150.2655029296875,
|
|
"fcm_dpo/q_t": 0.3537171483039856,
|
|
"grad_norm": 53.41609191894531,
|
|
"learning_rate": 1.9401235374032425e-07,
|
|
"logits/chosen": 0.02854771912097931,
|
|
"logits/rejected": 0.07809595763683319,
|
|
"logps/chosen": -342.8812255859375,
|
|
"logps/ref_chosen": -77.7699203491211,
|
|
"logps/ref_rejected": -69.31985473632812,
|
|
"logps/rejected": -484.6966857910156,
|
|
"loss": 0.9521,
|
|
"margin_dpo/margin_mean": 150.2655029296875,
|
|
"margin_dpo/margin_std": 187.6026611328125,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.618208516886931,
|
|
"fcm_dpo/beta": 0.004980705678462982,
|
|
"fcm_dpo/delta": 0.17126131057739258,
|
|
"fcm_dpo/margin": 138.48049926757812,
|
|
"fcm_dpo/q_t": 0.35512322187423706,
|
|
"grad_norm": 50.58280563354492,
|
|
"learning_rate": 1.9276236251246653e-07,
|
|
"logits/chosen": -0.004294605925679207,
|
|
"logits/rejected": 0.012868620455265045,
|
|
"logps/chosen": -289.5254211425781,
|
|
"logps/ref_chosen": -53.765865325927734,
|
|
"logps/ref_rejected": -89.28144836425781,
|
|
"logps/rejected": -463.521484375,
|
|
"loss": 0.9536,
|
|
"margin_dpo/margin_mean": 138.48049926757812,
|
|
"margin_dpo/margin_std": 160.81411743164062,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6196769456681351,
|
|
"fcm_dpo/beta": 0.005074765998870134,
|
|
"fcm_dpo/delta": 0.07042841613292694,
|
|
"fcm_dpo/margin": 154.63316345214844,
|
|
"fcm_dpo/q_t": 0.3365428149700165,
|
|
"grad_norm": 43.70001983642578,
|
|
"learning_rate": 1.9151387954958792e-07,
|
|
"logits/chosen": 0.05924776941537857,
|
|
"logits/rejected": 0.0849265307188034,
|
|
"logps/chosen": -328.7698974609375,
|
|
"logps/ref_chosen": -68.6337661743164,
|
|
"logps/ref_rejected": -87.86351013183594,
|
|
"logps/rejected": -502.6328125,
|
|
"loss": 0.9044,
|
|
"margin_dpo/margin_mean": 154.6331787109375,
|
|
"margin_dpo/margin_std": 169.25784301757812,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6211453744493393,
|
|
"fcm_dpo/beta": 0.005103256553411484,
|
|
"fcm_dpo/delta": 0.04598752781748772,
|
|
"fcm_dpo/margin": 158.27920532226562,
|
|
"fcm_dpo/q_t": 0.3329399824142456,
|
|
"grad_norm": 52.14820098876953,
|
|
"learning_rate": 1.902669377503756e-07,
|
|
"logits/chosen": 0.04696730524301529,
|
|
"logits/rejected": 0.05033382400870323,
|
|
"logps/chosen": -299.9007873535156,
|
|
"logps/ref_chosen": -54.99030303955078,
|
|
"logps/ref_rejected": -86.30654907226562,
|
|
"logps/rejected": -489.4962463378906,
|
|
"loss": 0.8829,
|
|
"margin_dpo/margin_mean": 158.27920532226562,
|
|
"margin_dpo/margin_std": 166.1439971923828,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6226138032305433,
|
|
"fcm_dpo/beta": 0.005140416789799929,
|
|
"fcm_dpo/delta": 0.04995416849851608,
|
|
"fcm_dpo/margin": 156.386474609375,
|
|
"fcm_dpo/q_t": 0.3389360010623932,
|
|
"grad_norm": 62.656551361083984,
|
|
"learning_rate": 1.890215699729057e-07,
|
|
"logits/chosen": -0.046080753207206726,
|
|
"logits/rejected": -0.017214350402355194,
|
|
"logps/chosen": -261.4268798828125,
|
|
"logps/ref_chosen": -56.01192092895508,
|
|
"logps/ref_rejected": -66.47896575927734,
|
|
"logps/rejected": -428.2803955078125,
|
|
"loss": 0.9007,
|
|
"margin_dpo/margin_mean": 156.38648986816406,
|
|
"margin_dpo/margin_std": 177.43490600585938,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6240822320117474,
|
|
"fcm_dpo/beta": 0.005301492288708687,
|
|
"fcm_dpo/delta": 0.1069888174533844,
|
|
"fcm_dpo/margin": 141.27867126464844,
|
|
"fcm_dpo/q_t": 0.3437122702598572,
|
|
"grad_norm": 44.34437942504883,
|
|
"learning_rate": 1.8777780903377732e-07,
|
|
"logits/chosen": 0.0495096892118454,
|
|
"logits/rejected": 0.04999320209026337,
|
|
"logps/chosen": -264.53546142578125,
|
|
"logps/ref_chosen": -46.86899948120117,
|
|
"logps/ref_rejected": -95.92545318603516,
|
|
"logps/rejected": -454.87060546875,
|
|
"loss": 0.9226,
|
|
"margin_dpo/margin_mean": 141.2786865234375,
|
|
"margin_dpo/margin_std": 155.26663208007812,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6255506607929515,
|
|
"fcm_dpo/beta": 0.005260363221168518,
|
|
"fcm_dpo/delta": -0.09204407781362534,
|
|
"fcm_dpo/margin": 177.3383331298828,
|
|
"fcm_dpo/q_t": 0.3091350197792053,
|
|
"grad_norm": 76.20099639892578,
|
|
"learning_rate": 1.8653568770724803e-07,
|
|
"logits/chosen": -0.04422163963317871,
|
|
"logits/rejected": 0.00882166437804699,
|
|
"logps/chosen": -279.64617919921875,
|
|
"logps/ref_chosen": -76.58354187011719,
|
|
"logps/ref_rejected": -81.26658630371094,
|
|
"logps/rejected": -461.66754150390625,
|
|
"loss": 0.8229,
|
|
"margin_dpo/margin_mean": 177.33831787109375,
|
|
"margin_dpo/margin_std": 168.63809204101562,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6270190895741556,
|
|
"fcm_dpo/beta": 0.005375740118324757,
|
|
"fcm_dpo/delta": 0.23773564398288727,
|
|
"fcm_dpo/margin": 116.69024658203125,
|
|
"fcm_dpo/q_t": 0.36856818199157715,
|
|
"grad_norm": 53.63347244262695,
|
|
"learning_rate": 1.8529523872436977e-07,
|
|
"logits/chosen": -0.021857187151908875,
|
|
"logits/rejected": 0.0070018284022808075,
|
|
"logps/chosen": -265.6365966796875,
|
|
"logps/ref_chosen": -64.8538818359375,
|
|
"logps/ref_rejected": -78.5660171508789,
|
|
"logps/rejected": -396.03900146484375,
|
|
"loss": 0.9804,
|
|
"margin_dpo/margin_mean": 116.69024658203125,
|
|
"margin_dpo/margin_std": 143.55154418945312,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6284875183553598,
|
|
"fcm_dpo/beta": 0.00542007852345705,
|
|
"fcm_dpo/delta": -0.032713260501623154,
|
|
"fcm_dpo/margin": 162.3301239013672,
|
|
"fcm_dpo/q_t": 0.3234860599040985,
|
|
"grad_norm": 43.0552978515625,
|
|
"learning_rate": 1.8405649477212697e-07,
|
|
"logits/chosen": -0.028629587963223457,
|
|
"logits/rejected": -0.013756831176578999,
|
|
"logps/chosen": -317.6119384765625,
|
|
"logps/ref_chosen": -62.63666534423828,
|
|
"logps/ref_rejected": -103.28181457519531,
|
|
"logps/rejected": -520.5872192382812,
|
|
"loss": 0.8812,
|
|
"margin_dpo/margin_mean": 162.3301239013672,
|
|
"margin_dpo/margin_std": 179.6146240234375,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6299559471365639,
|
|
"fcm_dpo/beta": 0.005543345585465431,
|
|
"fcm_dpo/delta": 0.10069851577281952,
|
|
"fcm_dpo/margin": 136.0623779296875,
|
|
"fcm_dpo/q_t": 0.34946388006210327,
|
|
"grad_norm": 80.5544662475586,
|
|
"learning_rate": 1.828194884925749e-07,
|
|
"logits/chosen": 0.011677625589072704,
|
|
"logits/rejected": 0.06275086104869843,
|
|
"logps/chosen": -339.78070068359375,
|
|
"logps/ref_chosen": -81.23401641845703,
|
|
"logps/ref_rejected": -91.79493713378906,
|
|
"logps/rejected": -486.40399169921875,
|
|
"loss": 0.9663,
|
|
"margin_dpo/margin_mean": 136.0623779296875,
|
|
"margin_dpo/margin_std": 171.47412109375,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.631424375917768,
|
|
"fcm_dpo/beta": 0.005538782104849815,
|
|
"fcm_dpo/delta": -0.005278989672660828,
|
|
"fcm_dpo/margin": 154.27493286132812,
|
|
"fcm_dpo/q_t": 0.3279253840446472,
|
|
"grad_norm": 38.59894561767578,
|
|
"learning_rate": 1.8158425248197928e-07,
|
|
"logits/chosen": -0.07191786915063858,
|
|
"logits/rejected": -0.06347661465406418,
|
|
"logps/chosen": -284.88165283203125,
|
|
"logps/ref_chosen": -60.920326232910156,
|
|
"logps/ref_rejected": -104.42280578613281,
|
|
"logps/rejected": -482.6590576171875,
|
|
"loss": 0.8471,
|
|
"margin_dpo/margin_mean": 154.27493286132812,
|
|
"margin_dpo/margin_std": 157.42654418945312,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6328928046989721,
|
|
"fcm_dpo/beta": 0.005310466047376394,
|
|
"fcm_dpo/delta": -0.21516066789627075,
|
|
"fcm_dpo/margin": 196.02615356445312,
|
|
"fcm_dpo/q_t": 0.28844383358955383,
|
|
"grad_norm": 46.57950210571289,
|
|
"learning_rate": 1.8035081928995788e-07,
|
|
"logits/chosen": 0.06265595555305481,
|
|
"logits/rejected": 0.0806708112359047,
|
|
"logps/chosen": -286.49560546875,
|
|
"logps/ref_chosen": -57.34874725341797,
|
|
"logps/ref_rejected": -92.84022521972656,
|
|
"logps/rejected": -518.0132446289062,
|
|
"loss": 0.7499,
|
|
"margin_dpo/margin_mean": 196.0261688232422,
|
|
"margin_dpo/margin_std": 165.00961303710938,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6343612334801763,
|
|
"fcm_dpo/beta": 0.005262886174023151,
|
|
"fcm_dpo/delta": -0.11242011934518814,
|
|
"fcm_dpo/margin": 180.4857635498047,
|
|
"fcm_dpo/q_t": 0.3041759133338928,
|
|
"grad_norm": 33.68878173828125,
|
|
"learning_rate": 1.791192214186223e-07,
|
|
"logits/chosen": -0.06168440729379654,
|
|
"logits/rejected": -0.031240500509738922,
|
|
"logps/chosen": -296.9165954589844,
|
|
"logps/ref_chosen": -71.07479095458984,
|
|
"logps/ref_rejected": -98.57952880859375,
|
|
"logps/rejected": -504.9071044921875,
|
|
"loss": 0.7839,
|
|
"margin_dpo/margin_mean": 180.4857635498047,
|
|
"margin_dpo/margin_std": 153.55856323242188,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.6358296622613803,
|
|
"fcm_dpo/beta": 0.0052696773782372475,
|
|
"fcm_dpo/delta": 0.12958739697933197,
|
|
"fcm_dpo/margin": 138.4744415283203,
|
|
"fcm_dpo/q_t": 0.35025665163993835,
|
|
"grad_norm": 49.32050323486328,
|
|
"learning_rate": 1.7788949132172193e-07,
|
|
"logits/chosen": 0.043396446853876114,
|
|
"logits/rejected": 0.06199165806174278,
|
|
"logps/chosen": -338.40191650390625,
|
|
"logps/ref_chosen": -58.273193359375,
|
|
"logps/ref_rejected": -95.95089721679688,
|
|
"logps/rejected": -514.5540771484375,
|
|
"loss": 0.9753,
|
|
"margin_dpo/margin_mean": 138.4744415283203,
|
|
"margin_dpo/margin_std": 176.79898071289062,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.6372980910425844,
|
|
"fcm_dpo/beta": 0.005316519178450108,
|
|
"fcm_dpo/delta": 0.03797819837927818,
|
|
"fcm_dpo/margin": 153.3128204345703,
|
|
"fcm_dpo/q_t": 0.3362678289413452,
|
|
"grad_norm": 42.75183868408203,
|
|
"learning_rate": 1.7666166140378853e-07,
|
|
"logits/chosen": 0.09770476818084717,
|
|
"logits/rejected": 0.12834317982196808,
|
|
"logps/chosen": -315.55572509765625,
|
|
"logps/ref_chosen": -61.97370147705078,
|
|
"logps/ref_rejected": -78.49861145019531,
|
|
"logps/rejected": -485.3934631347656,
|
|
"loss": 0.8842,
|
|
"margin_dpo/margin_mean": 153.31283569335938,
|
|
"margin_dpo/margin_std": 169.85568237304688,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6387665198237885,
|
|
"fcm_dpo/beta": 0.00526207871735096,
|
|
"fcm_dpo/delta": -0.09070765972137451,
|
|
"fcm_dpo/margin": 177.1383056640625,
|
|
"fcm_dpo/q_t": 0.3105207681655884,
|
|
"grad_norm": 38.45564270019531,
|
|
"learning_rate": 1.7543576401928218e-07,
|
|
"logits/chosen": 0.17958560585975647,
|
|
"logits/rejected": 0.21033020317554474,
|
|
"logps/chosen": -298.01806640625,
|
|
"logps/ref_chosen": -51.502052307128906,
|
|
"logps/ref_rejected": -87.56689453125,
|
|
"logps/rejected": -511.22119140625,
|
|
"loss": 0.8444,
|
|
"margin_dpo/margin_mean": 177.13829040527344,
|
|
"margin_dpo/margin_std": 178.4004669189453,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6402349486049926,
|
|
"fcm_dpo/beta": 0.005174779333174229,
|
|
"fcm_dpo/delta": -0.07778394967317581,
|
|
"fcm_dpo/margin": 177.8702392578125,
|
|
"fcm_dpo/q_t": 0.31033855676651,
|
|
"grad_norm": 66.37271118164062,
|
|
"learning_rate": 1.742118314717391e-07,
|
|
"logits/chosen": -0.01764148473739624,
|
|
"logits/rejected": 0.029303671792149544,
|
|
"logps/chosen": -323.1204833984375,
|
|
"logps/ref_chosen": -71.40371704101562,
|
|
"logps/ref_rejected": -82.72775268554688,
|
|
"logps/rejected": -512.3147583007812,
|
|
"loss": 0.8009,
|
|
"margin_dpo/margin_mean": 177.8702392578125,
|
|
"margin_dpo/margin_std": 161.47940063476562,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6417033773861968,
|
|
"fcm_dpo/beta": 0.005187171511352062,
|
|
"fcm_dpo/delta": 0.008135082200169563,
|
|
"fcm_dpo/margin": 162.37954711914062,
|
|
"fcm_dpo/q_t": 0.32683050632476807,
|
|
"grad_norm": 44.145938873291016,
|
|
"learning_rate": 1.7298989601292036e-07,
|
|
"logits/chosen": 0.0989217460155487,
|
|
"logits/rejected": 0.1424962878227234,
|
|
"logps/chosen": -336.16766357421875,
|
|
"logps/ref_chosen": -64.7442626953125,
|
|
"logps/ref_rejected": -82.04356384277344,
|
|
"logps/rejected": -515.8464965820312,
|
|
"loss": 0.8751,
|
|
"margin_dpo/margin_mean": 162.37954711914062,
|
|
"margin_dpo/margin_std": 169.3526611328125,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6431718061674009,
|
|
"fcm_dpo/beta": 0.005006927065551281,
|
|
"fcm_dpo/delta": -0.15296681225299835,
|
|
"fcm_dpo/margin": 196.67837524414062,
|
|
"fcm_dpo/q_t": 0.29662394523620605,
|
|
"grad_norm": 44.88563537597656,
|
|
"learning_rate": 1.7176998984196144e-07,
|
|
"logits/chosen": 0.14420339465141296,
|
|
"logits/rejected": 0.1939929574728012,
|
|
"logps/chosen": -323.58074951171875,
|
|
"logps/ref_chosen": -59.0186653137207,
|
|
"logps/ref_rejected": -83.07682800292969,
|
|
"logps/rejected": -544.3172607421875,
|
|
"loss": 0.7626,
|
|
"margin_dpo/margin_mean": 196.67837524414062,
|
|
"margin_dpo/margin_std": 161.43214416503906,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.644640234948605,
|
|
"fcm_dpo/beta": 0.005011453293263912,
|
|
"fcm_dpo/delta": 0.08284294605255127,
|
|
"fcm_dpo/margin": 153.88717651367188,
|
|
"fcm_dpo/q_t": 0.34648653864860535,
|
|
"grad_norm": 50.955806732177734,
|
|
"learning_rate": 1.7055214510452458e-07,
|
|
"logits/chosen": 0.1353827714920044,
|
|
"logits/rejected": 0.14328114688396454,
|
|
"logps/chosen": -347.3162536621094,
|
|
"logps/ref_chosen": -53.78407669067383,
|
|
"logps/ref_rejected": -83.98545837402344,
|
|
"logps/rejected": -531.40478515625,
|
|
"loss": 0.9245,
|
|
"margin_dpo/margin_mean": 153.88717651367188,
|
|
"margin_dpo/margin_std": 174.40078735351562,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6461086637298091,
|
|
"fcm_dpo/beta": 0.0051966458559036255,
|
|
"fcm_dpo/delta": 0.10422302782535553,
|
|
"fcm_dpo/margin": 144.91168212890625,
|
|
"fcm_dpo/q_t": 0.3527141511440277,
|
|
"grad_norm": 75.77989196777344,
|
|
"learning_rate": 1.6933639389195134e-07,
|
|
"logits/chosen": 0.07095863670110703,
|
|
"logits/rejected": 0.11175039410591125,
|
|
"logps/chosen": -385.72686767578125,
|
|
"logps/ref_chosen": -78.56671905517578,
|
|
"logps/ref_rejected": -96.49775695800781,
|
|
"logps/rejected": -548.569580078125,
|
|
"loss": 0.9497,
|
|
"margin_dpo/margin_mean": 144.91168212890625,
|
|
"margin_dpo/margin_std": 183.58206176757812,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6475770925110133,
|
|
"fcm_dpo/beta": 0.005188990384340286,
|
|
"fcm_dpo/delta": -0.04949396103620529,
|
|
"fcm_dpo/margin": 172.47109985351562,
|
|
"fcm_dpo/q_t": 0.3268754184246063,
|
|
"grad_norm": 39.17652893066406,
|
|
"learning_rate": 1.681227682404166e-07,
|
|
"logits/chosen": 0.097470723092556,
|
|
"logits/rejected": 0.1150805875658989,
|
|
"logps/chosen": -390.5365295410156,
|
|
"logps/ref_chosen": -60.824440002441406,
|
|
"logps/ref_rejected": -96.47080993652344,
|
|
"logps/rejected": -598.6539916992188,
|
|
"loss": 0.9063,
|
|
"margin_dpo/margin_mean": 172.47109985351562,
|
|
"margin_dpo/margin_std": 205.70474243164062,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6490455212922174,
|
|
"fcm_dpo/beta": 0.005070436745882034,
|
|
"fcm_dpo/delta": -0.18530398607254028,
|
|
"fcm_dpo/margin": 200.40408325195312,
|
|
"fcm_dpo/q_t": 0.3012937307357788,
|
|
"grad_norm": 38.1731071472168,
|
|
"learning_rate": 1.669113001300851e-07,
|
|
"logits/chosen": 0.060047049075365067,
|
|
"logits/rejected": 0.07558459043502808,
|
|
"logps/chosen": -342.12982177734375,
|
|
"logps/ref_chosen": -47.01121520996094,
|
|
"logps/ref_rejected": -76.53926086425781,
|
|
"logps/rejected": -572.0619506835938,
|
|
"loss": 0.788,
|
|
"margin_dpo/margin_mean": 200.40408325195312,
|
|
"margin_dpo/margin_std": 188.1474151611328,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6505139500734214,
|
|
"fcm_dpo/beta": 0.0050366222858428955,
|
|
"fcm_dpo/delta": 0.1421085149049759,
|
|
"fcm_dpo/margin": 142.5586700439453,
|
|
"fcm_dpo/q_t": 0.35665544867515564,
|
|
"grad_norm": 53.045677185058594,
|
|
"learning_rate": 1.6570202148426815e-07,
|
|
"logits/chosen": 0.09138190746307373,
|
|
"logits/rejected": 0.10863150656223297,
|
|
"logps/chosen": -402.77294921875,
|
|
"logps/ref_chosen": -71.27301788330078,
|
|
"logps/ref_rejected": -86.679931640625,
|
|
"logps/rejected": -560.738525390625,
|
|
"loss": 0.9873,
|
|
"margin_dpo/margin_mean": 142.55868530273438,
|
|
"margin_dpo/margin_std": 193.21839904785156,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.6519823788546255,
|
|
"fcm_dpo/beta": 0.004996387287974358,
|
|
"fcm_dpo/delta": -0.14721286296844482,
|
|
"fcm_dpo/margin": 196.6611785888672,
|
|
"fcm_dpo/q_t": 0.3072533905506134,
|
|
"grad_norm": 48.08612823486328,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": 0.18914419412612915,
|
|
"logits/rejected": 0.20519858598709106,
|
|
"logps/chosen": -396.1791076660156,
|
|
"logps/ref_chosen": -57.213706970214844,
|
|
"logps/ref_rejected": -97.25489807128906,
|
|
"logps/rejected": -632.8814697265625,
|
|
"loss": 0.8182,
|
|
"margin_dpo/margin_mean": 196.66119384765625,
|
|
"margin_dpo/margin_std": 203.6089630126953,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.6534508076358296,
|
|
"fcm_dpo/beta": 0.0048555051907896996,
|
|
"fcm_dpo/delta": -0.1426890343427658,
|
|
"fcm_dpo/margin": 201.54562377929688,
|
|
"fcm_dpo/q_t": 0.3039454221725464,
|
|
"grad_norm": 74.20765686035156,
|
|
"learning_rate": 1.6329015999011182e-07,
|
|
"logits/chosen": 0.11190081387758255,
|
|
"logits/rejected": 0.13852503895759583,
|
|
"logps/chosen": -388.1588134765625,
|
|
"logps/ref_chosen": -67.29979705810547,
|
|
"logps/ref_rejected": -92.68267059326172,
|
|
"logps/rejected": -615.0872802734375,
|
|
"loss": 0.7866,
|
|
"margin_dpo/margin_mean": 201.54562377929688,
|
|
"margin_dpo/margin_std": 183.888427734375,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.6549192364170338,
|
|
"fcm_dpo/beta": 0.004782508127391338,
|
|
"fcm_dpo/delta": -0.11369301378726959,
|
|
"fcm_dpo/margin": 199.1475830078125,
|
|
"fcm_dpo/q_t": 0.30260688066482544,
|
|
"grad_norm": 52.3062858581543,
|
|
"learning_rate": 1.6208764069656578e-07,
|
|
"logits/chosen": 0.16620711982250214,
|
|
"logits/rejected": 0.1732136458158493,
|
|
"logps/chosen": -367.4357604980469,
|
|
"logps/ref_chosen": -59.098487854003906,
|
|
"logps/ref_rejected": -101.26419067382812,
|
|
"logps/rejected": -608.7490844726562,
|
|
"loss": 0.7721,
|
|
"margin_dpo/margin_mean": 199.1475830078125,
|
|
"margin_dpo/margin_std": 163.44342041015625,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6563876651982379,
|
|
"fcm_dpo/beta": 0.004636620171368122,
|
|
"fcm_dpo/delta": -0.07215973734855652,
|
|
"fcm_dpo/margin": 197.22711181640625,
|
|
"fcm_dpo/q_t": 0.3173784613609314,
|
|
"grad_norm": 45.591400146484375,
|
|
"learning_rate": 1.608874379754465e-07,
|
|
"logits/chosen": 0.1511530876159668,
|
|
"logits/rejected": 0.15351176261901855,
|
|
"logps/chosen": -388.9348449707031,
|
|
"logps/ref_chosen": -56.07533264160156,
|
|
"logps/ref_rejected": -98.69475555419922,
|
|
"logps/rejected": -628.7813720703125,
|
|
"loss": 0.8258,
|
|
"margin_dpo/margin_mean": 197.22711181640625,
|
|
"margin_dpo/margin_std": 203.49270629882812,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.657856093979442,
|
|
"fcm_dpo/beta": 0.004568365402519703,
|
|
"fcm_dpo/delta": -0.14031019806861877,
|
|
"fcm_dpo/margin": 213.8214874267578,
|
|
"fcm_dpo/q_t": 0.3027946352958679,
|
|
"grad_norm": 43.747013092041016,
|
|
"learning_rate": 1.5968958345321177e-07,
|
|
"logits/chosen": 0.1592235565185547,
|
|
"logits/rejected": 0.17409782111644745,
|
|
"logps/chosen": -417.6317138671875,
|
|
"logps/ref_chosen": -60.00384521484375,
|
|
"logps/ref_rejected": -102.26465606689453,
|
|
"logps/rejected": -673.7140502929688,
|
|
"loss": 0.7801,
|
|
"margin_dpo/margin_mean": 213.82147216796875,
|
|
"margin_dpo/margin_std": 191.90713500976562,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6593245227606461,
|
|
"fcm_dpo/beta": 0.00449769850820303,
|
|
"fcm_dpo/delta": -0.014355389401316643,
|
|
"fcm_dpo/margin": 191.867919921875,
|
|
"fcm_dpo/q_t": 0.3324083089828491,
|
|
"grad_norm": 48.77326202392578,
|
|
"learning_rate": 1.584941086944423e-07,
|
|
"logits/chosen": 0.10339124500751495,
|
|
"logits/rejected": 0.13415296375751495,
|
|
"logps/chosen": -414.3675537109375,
|
|
"logps/ref_chosen": -67.52661895751953,
|
|
"logps/ref_rejected": -88.59690856933594,
|
|
"logps/rejected": -627.3057861328125,
|
|
"loss": 0.9054,
|
|
"margin_dpo/margin_mean": 191.867919921875,
|
|
"margin_dpo/margin_std": 233.46246337890625,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6607929515418502,
|
|
"fcm_dpo/beta": 0.004431567154824734,
|
|
"fcm_dpo/delta": -0.14691829681396484,
|
|
"fcm_dpo/margin": 221.7427520751953,
|
|
"fcm_dpo/q_t": 0.2959328293800354,
|
|
"grad_norm": 35.59035873413086,
|
|
"learning_rate": 1.573010452010098e-07,
|
|
"logits/chosen": 0.13303320109844208,
|
|
"logits/rejected": 0.14206843078136444,
|
|
"logps/chosen": -363.13812255859375,
|
|
"logps/ref_chosen": -57.10811996459961,
|
|
"logps/ref_rejected": -102.75494384765625,
|
|
"logps/rejected": -630.5277099609375,
|
|
"loss": 0.7723,
|
|
"margin_dpo/margin_mean": 221.74276733398438,
|
|
"margin_dpo/margin_std": 192.90106201171875,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6622613803230544,
|
|
"fcm_dpo/beta": 0.004432865884155035,
|
|
"fcm_dpo/delta": 0.047808244824409485,
|
|
"fcm_dpo/margin": 181.61512756347656,
|
|
"fcm_dpo/q_t": 0.3292296528816223,
|
|
"grad_norm": 51.70342254638672,
|
|
"learning_rate": 1.5611042441124687e-07,
|
|
"logits/chosen": 0.31042128801345825,
|
|
"logits/rejected": 0.3536532521247864,
|
|
"logps/chosen": -417.8612365722656,
|
|
"logps/ref_chosen": -58.46883010864258,
|
|
"logps/ref_rejected": -72.92941284179688,
|
|
"logps/rejected": -613.9369506835938,
|
|
"loss": 0.9141,
|
|
"margin_dpo/margin_mean": 181.61514282226562,
|
|
"margin_dpo/margin_std": 208.4745330810547,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6637298091042585,
|
|
"fcm_dpo/beta": 0.004379277117550373,
|
|
"fcm_dpo/delta": -0.04874721169471741,
|
|
"fcm_dpo/margin": 204.20883178710938,
|
|
"fcm_dpo/q_t": 0.3148379921913147,
|
|
"grad_norm": 42.03379440307617,
|
|
"learning_rate": 1.549222776991186e-07,
|
|
"logits/chosen": 0.17657610774040222,
|
|
"logits/rejected": 0.17502948641777039,
|
|
"logps/chosen": -340.4559326171875,
|
|
"logps/ref_chosen": -50.39055252075195,
|
|
"logps/ref_rejected": -97.77142333984375,
|
|
"logps/rejected": -592.045654296875,
|
|
"loss": 0.8076,
|
|
"margin_dpo/margin_mean": 204.20883178710938,
|
|
"margin_dpo/margin_std": 183.80862426757812,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6651982378854625,
|
|
"fcm_dpo/beta": 0.004408560693264008,
|
|
"fcm_dpo/delta": 0.09181352704763412,
|
|
"fcm_dpo/margin": 173.53721618652344,
|
|
"fcm_dpo/q_t": 0.34526383876800537,
|
|
"grad_norm": 42.1302490234375,
|
|
"learning_rate": 1.5373663637339584e-07,
|
|
"logits/chosen": 0.09675667434930801,
|
|
"logits/rejected": 0.1360025852918625,
|
|
"logps/chosen": -382.1043701171875,
|
|
"logps/ref_chosen": -57.71485137939453,
|
|
"logps/ref_rejected": -82.20741271972656,
|
|
"logps/rejected": -580.1341552734375,
|
|
"loss": 0.9098,
|
|
"margin_dpo/margin_mean": 173.53721618652344,
|
|
"margin_dpo/margin_std": 197.82093811035156,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.004366679582744837,
|
|
"fcm_dpo/delta": -0.10246938467025757,
|
|
"fcm_dpo/margin": 215.6974334716797,
|
|
"fcm_dpo/q_t": 0.3123183846473694,
|
|
"grad_norm": 42.43370819091797,
|
|
"learning_rate": 1.5255353167683017e-07,
|
|
"logits/chosen": 0.2050754278898239,
|
|
"logits/rejected": 0.22980359196662903,
|
|
"logps/chosen": -414.22125244140625,
|
|
"logps/ref_chosen": -60.945648193359375,
|
|
"logps/ref_rejected": -84.95079040527344,
|
|
"logps/rejected": -653.923828125,
|
|
"loss": 0.8234,
|
|
"margin_dpo/margin_mean": 215.6974334716797,
|
|
"margin_dpo/margin_std": 221.3548583984375,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6681350954478708,
|
|
"fcm_dpo/beta": 0.0042896876111626625,
|
|
"fcm_dpo/delta": -0.17381757497787476,
|
|
"fcm_dpo/margin": 234.59808349609375,
|
|
"fcm_dpo/q_t": 0.29876023530960083,
|
|
"grad_norm": 34.73609924316406,
|
|
"learning_rate": 1.5137299478533064e-07,
|
|
"logits/chosen": 0.09607726335525513,
|
|
"logits/rejected": 0.10077625513076782,
|
|
"logps/chosen": -362.557861328125,
|
|
"logps/ref_chosen": -44.88671112060547,
|
|
"logps/ref_rejected": -115.30147552490234,
|
|
"logps/rejected": -667.5707397460938,
|
|
"loss": 0.778,
|
|
"margin_dpo/margin_mean": 234.59808349609375,
|
|
"margin_dpo/margin_std": 209.17662048339844,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6696035242290749,
|
|
"fcm_dpo/beta": 0.0041549326851964,
|
|
"fcm_dpo/delta": -0.10536178946495056,
|
|
"fcm_dpo/margin": 227.55477905273438,
|
|
"fcm_dpo/q_t": 0.3024177551269531,
|
|
"grad_norm": 29.933616638183594,
|
|
"learning_rate": 1.5019505680714232e-07,
|
|
"logits/chosen": 0.11028112471103668,
|
|
"logits/rejected": 0.1089896708726883,
|
|
"logps/chosen": -378.89727783203125,
|
|
"logps/ref_chosen": -57.036781311035156,
|
|
"logps/ref_rejected": -105.21784210205078,
|
|
"logps/rejected": -654.633056640625,
|
|
"loss": 0.7617,
|
|
"margin_dpo/margin_mean": 227.55477905273438,
|
|
"margin_dpo/margin_std": 182.1309051513672,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.671071953010279,
|
|
"fcm_dpo/beta": 0.0040961261838674545,
|
|
"fcm_dpo/delta": -0.016104087233543396,
|
|
"fcm_dpo/margin": 210.951171875,
|
|
"fcm_dpo/q_t": 0.31706666946411133,
|
|
"grad_norm": 50.435768127441406,
|
|
"learning_rate": 1.4901974878202627e-07,
|
|
"logits/chosen": 0.11340844631195068,
|
|
"logits/rejected": 0.15247830748558044,
|
|
"logps/chosen": -358.2364501953125,
|
|
"logps/ref_chosen": -54.24253845214844,
|
|
"logps/ref_rejected": -85.10956573486328,
|
|
"logps/rejected": -600.0546264648438,
|
|
"loss": 0.8288,
|
|
"margin_dpo/margin_mean": 210.951171875,
|
|
"margin_dpo/margin_std": 189.23641967773438,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6725403817914831,
|
|
"fcm_dpo/beta": 0.004061573650687933,
|
|
"fcm_dpo/delta": -0.05085879936814308,
|
|
"fcm_dpo/margin": 220.47543334960938,
|
|
"fcm_dpo/q_t": 0.3158029317855835,
|
|
"grad_norm": 33.672279357910156,
|
|
"learning_rate": 1.4784710168044212e-07,
|
|
"logits/chosen": 0.1747414469718933,
|
|
"logits/rejected": 0.1942700743675232,
|
|
"logps/chosen": -371.38616943359375,
|
|
"logps/ref_chosen": -55.40888214111328,
|
|
"logps/ref_rejected": -97.68325805664062,
|
|
"logps/rejected": -634.135986328125,
|
|
"loss": 0.819,
|
|
"margin_dpo/margin_mean": 220.47544860839844,
|
|
"margin_dpo/margin_std": 210.21676635742188,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6740088105726872,
|
|
"fcm_dpo/beta": 0.004079152829945087,
|
|
"fcm_dpo/delta": 0.02714114636182785,
|
|
"fcm_dpo/margin": 202.2575225830078,
|
|
"fcm_dpo/q_t": 0.328235924243927,
|
|
"grad_norm": 34.5672607421875,
|
|
"learning_rate": 1.466771464027316e-07,
|
|
"logits/chosen": 0.1831570863723755,
|
|
"logits/rejected": 0.19782592356204987,
|
|
"logps/chosen": -396.4834289550781,
|
|
"logps/ref_chosen": -46.55748748779297,
|
|
"logps/ref_rejected": -86.16854095458984,
|
|
"logps/rejected": -638.3519897460938,
|
|
"loss": 0.8566,
|
|
"margin_dpo/margin_mean": 202.2575225830078,
|
|
"margin_dpo/margin_std": 194.767578125,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6754772393538914,
|
|
"fcm_dpo/beta": 0.004061030223965645,
|
|
"fcm_dpo/delta": -0.057284481823444366,
|
|
"fcm_dpo/margin": 222.14083862304688,
|
|
"fcm_dpo/q_t": 0.3124066889286041,
|
|
"grad_norm": 40.84368133544922,
|
|
"learning_rate": 1.4550991377830423e-07,
|
|
"logits/chosen": 0.12749359011650085,
|
|
"logits/rejected": 0.12204212695360184,
|
|
"logps/chosen": -421.6571044921875,
|
|
"logps/ref_chosen": -51.63489532470703,
|
|
"logps/ref_rejected": -104.11935424804688,
|
|
"logps/rejected": -696.2824096679688,
|
|
"loss": 0.809,
|
|
"margin_dpo/margin_mean": 222.14083862304688,
|
|
"margin_dpo/margin_std": 199.95516967773438,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6769456681350955,
|
|
"fcm_dpo/beta": 0.004167584702372551,
|
|
"fcm_dpo/delta": 0.1559598743915558,
|
|
"fcm_dpo/margin": 168.80178833007812,
|
|
"fcm_dpo/q_t": 0.35687270760536194,
|
|
"grad_norm": 46.724700927734375,
|
|
"learning_rate": 1.4434543456482518e-07,
|
|
"logits/chosen": 0.15208810567855835,
|
|
"logits/rejected": 0.16312140226364136,
|
|
"logps/chosen": -449.5841369628906,
|
|
"logps/ref_chosen": -55.18195724487305,
|
|
"logps/ref_rejected": -86.47689819335938,
|
|
"logps/rejected": -649.680908203125,
|
|
"loss": 0.9531,
|
|
"margin_dpo/margin_mean": 168.80178833007812,
|
|
"margin_dpo/margin_std": 204.61175537109375,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6784140969162996,
|
|
"fcm_dpo/beta": 0.004273770377039909,
|
|
"fcm_dpo/delta": 0.17702484130859375,
|
|
"fcm_dpo/margin": 160.3841552734375,
|
|
"fcm_dpo/q_t": 0.35806363821029663,
|
|
"grad_norm": 62.64442825317383,
|
|
"learning_rate": 1.4318373944740484e-07,
|
|
"logits/chosen": 0.17464204132556915,
|
|
"logits/rejected": 0.1923956274986267,
|
|
"logps/chosen": -459.37158203125,
|
|
"logps/ref_chosen": -69.92803192138672,
|
|
"logps/ref_rejected": -78.84111022949219,
|
|
"logps/rejected": -628.6688232421875,
|
|
"loss": 0.9393,
|
|
"margin_dpo/margin_mean": 160.3841552734375,
|
|
"margin_dpo/margin_std": 182.3723602294922,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6798825256975036,
|
|
"fcm_dpo/beta": 0.004382624290883541,
|
|
"fcm_dpo/delta": 0.026647619903087616,
|
|
"fcm_dpo/margin": 187.98629760742188,
|
|
"fcm_dpo/q_t": 0.3361142873764038,
|
|
"grad_norm": 109.82402801513672,
|
|
"learning_rate": 1.4202485903778976e-07,
|
|
"logits/chosen": 0.15215685963630676,
|
|
"logits/rejected": 0.16740122437477112,
|
|
"logps/chosen": -449.4383850097656,
|
|
"logps/ref_chosen": -55.27437210083008,
|
|
"logps/ref_rejected": -89.02497863769531,
|
|
"logps/rejected": -671.17529296875,
|
|
"loss": 0.8995,
|
|
"margin_dpo/margin_mean": 187.98629760742188,
|
|
"margin_dpo/margin_std": 210.312255859375,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.6813509544787077,
|
|
"fcm_dpo/beta": 0.004141380079090595,
|
|
"fcm_dpo/delta": -0.27502915263175964,
|
|
"fcm_dpo/margin": 263.5333557128906,
|
|
"fcm_dpo/q_t": 0.2828797698020935,
|
|
"grad_norm": 56.10831832885742,
|
|
"learning_rate": 1.4086882387355658e-07,
|
|
"logits/chosen": 0.16000524163246155,
|
|
"logits/rejected": 0.16301248967647552,
|
|
"logps/chosen": -435.59814453125,
|
|
"logps/ref_chosen": -50.91230010986328,
|
|
"logps/ref_rejected": -102.4893798828125,
|
|
"logps/rejected": -750.7085571289062,
|
|
"loss": 0.7456,
|
|
"margin_dpo/margin_mean": 263.5333557128906,
|
|
"margin_dpo/margin_std": 231.73806762695312,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.6828193832599119,
|
|
"fcm_dpo/beta": 0.0040384442545473576,
|
|
"fcm_dpo/delta": -0.1352299153804779,
|
|
"fcm_dpo/margin": 240.72128295898438,
|
|
"fcm_dpo/q_t": 0.2990262508392334,
|
|
"grad_norm": 49.06411361694336,
|
|
"learning_rate": 1.3971566441730714e-07,
|
|
"logits/chosen": 0.1766095906496048,
|
|
"logits/rejected": 0.183375284075737,
|
|
"logps/chosen": -446.39813232421875,
|
|
"logps/ref_chosen": -60.116851806640625,
|
|
"logps/ref_rejected": -113.94602966308594,
|
|
"logps/rejected": -740.9486083984375,
|
|
"loss": 0.7733,
|
|
"margin_dpo/margin_mean": 240.72129821777344,
|
|
"margin_dpo/margin_std": 206.07766723632812,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.684287812041116,
|
|
"fcm_dpo/beta": 0.004008342977613211,
|
|
"fcm_dpo/delta": 0.03794410452246666,
|
|
"fcm_dpo/margin": 203.26609802246094,
|
|
"fcm_dpo/q_t": 0.33106571435928345,
|
|
"grad_norm": 64.88581848144531,
|
|
"learning_rate": 1.3856541105586545e-07,
|
|
"logits/chosen": 0.15662892162799835,
|
|
"logits/rejected": 0.16130861639976501,
|
|
"logps/chosen": -469.90032958984375,
|
|
"logps/ref_chosen": -52.920921325683594,
|
|
"logps/ref_rejected": -90.3154296875,
|
|
"logps/rejected": -710.5609130859375,
|
|
"loss": 0.8857,
|
|
"margin_dpo/margin_mean": 203.26611328125,
|
|
"margin_dpo/margin_std": 214.98695373535156,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.6857562408223201,
|
|
"fcm_dpo/beta": 0.003955831751227379,
|
|
"fcm_dpo/delta": -0.06096991151571274,
|
|
"fcm_dpo/margin": 228.0748291015625,
|
|
"fcm_dpo/q_t": 0.32432740926742554,
|
|
"grad_norm": 67.74291229248047,
|
|
"learning_rate": 1.3741809409947729e-07,
|
|
"logits/chosen": 0.20276635885238647,
|
|
"logits/rejected": 0.22354087233543396,
|
|
"logps/chosen": -561.0784912109375,
|
|
"logps/ref_chosen": -78.7158203125,
|
|
"logps/ref_rejected": -102.86019897460938,
|
|
"logps/rejected": -813.2977294921875,
|
|
"loss": 0.8992,
|
|
"margin_dpo/margin_mean": 228.0748291015625,
|
|
"margin_dpo/margin_std": 262.29058837890625,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.6872246696035242,
|
|
"fcm_dpo/beta": 0.003934096544981003,
|
|
"fcm_dpo/delta": -0.05663018673658371,
|
|
"fcm_dpo/margin": 228.93344116210938,
|
|
"fcm_dpo/q_t": 0.3182372748851776,
|
|
"grad_norm": 49.3443717956543,
|
|
"learning_rate": 1.362737437810114e-07,
|
|
"logits/chosen": 0.13819798827171326,
|
|
"logits/rejected": 0.17619337141513824,
|
|
"logps/chosen": -467.938720703125,
|
|
"logps/ref_chosen": -69.93536376953125,
|
|
"logps/ref_rejected": -101.02880859375,
|
|
"logps/rejected": -727.965576171875,
|
|
"loss": 0.8409,
|
|
"margin_dpo/margin_mean": 228.93344116210938,
|
|
"margin_dpo/margin_std": 230.77337646484375,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.6886930983847284,
|
|
"fcm_dpo/beta": 0.0038513573817908764,
|
|
"fcm_dpo/delta": -0.11148861795663834,
|
|
"fcm_dpo/margin": 246.43438720703125,
|
|
"fcm_dpo/q_t": 0.30608806014060974,
|
|
"grad_norm": 49.65244674682617,
|
|
"learning_rate": 1.351323902551631e-07,
|
|
"logits/chosen": 0.12890158593654633,
|
|
"logits/rejected": 0.16653740406036377,
|
|
"logps/chosen": -514.1097412109375,
|
|
"logps/ref_chosen": -68.12469482421875,
|
|
"logps/ref_rejected": -104.78640747070312,
|
|
"logps/rejected": -797.205810546875,
|
|
"loss": 0.7908,
|
|
"margin_dpo/margin_mean": 246.43438720703125,
|
|
"margin_dpo/margin_std": 220.98977661132812,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.6901615271659325,
|
|
"fcm_dpo/beta": 0.0038081035017967224,
|
|
"fcm_dpo/delta": -0.07959811389446259,
|
|
"fcm_dpo/margin": 242.1552734375,
|
|
"fcm_dpo/q_t": 0.3072653114795685,
|
|
"grad_norm": 37.78285217285156,
|
|
"learning_rate": 1.339940635976592e-07,
|
|
"logits/chosen": 0.19035938382148743,
|
|
"logits/rejected": 0.20564529299736023,
|
|
"logps/chosen": -424.4998779296875,
|
|
"logps/ref_chosen": -43.791927337646484,
|
|
"logps/ref_rejected": -82.70285034179688,
|
|
"logps/rejected": -705.5660400390625,
|
|
"loss": 0.8099,
|
|
"margin_dpo/margin_mean": 242.15525817871094,
|
|
"margin_dpo/margin_std": 222.6582489013672,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.6916299559471366,
|
|
"fcm_dpo/beta": 0.003821744117885828,
|
|
"fcm_dpo/delta": 0.09349526464939117,
|
|
"fcm_dpo/margin": 199.71197509765625,
|
|
"fcm_dpo/q_t": 0.3398565649986267,
|
|
"grad_norm": 52.308223724365234,
|
|
"learning_rate": 1.3285879380446563e-07,
|
|
"logits/chosen": 0.16461865603923798,
|
|
"logits/rejected": 0.1858828216791153,
|
|
"logps/chosen": -481.19781494140625,
|
|
"logps/ref_chosen": -63.33952331542969,
|
|
"logps/ref_rejected": -83.61048126220703,
|
|
"logps/rejected": -701.1807250976562,
|
|
"loss": 0.8859,
|
|
"margin_dpo/margin_mean": 199.71197509765625,
|
|
"margin_dpo/margin_std": 196.94680786132812,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.6930983847283406,
|
|
"fcm_dpo/beta": 0.003807269735261798,
|
|
"fcm_dpo/delta": -0.10628817975521088,
|
|
"fcm_dpo/margin": 248.5273895263672,
|
|
"fcm_dpo/q_t": 0.3150752782821655,
|
|
"grad_norm": 34.88620376586914,
|
|
"learning_rate": 1.317266107909975e-07,
|
|
"logits/chosen": -0.0007737856358289719,
|
|
"logits/rejected": 0.03774175047874451,
|
|
"logps/chosen": -490.617919921875,
|
|
"logps/ref_chosen": -83.66610717773438,
|
|
"logps/ref_rejected": -117.20919799804688,
|
|
"logps/rejected": -772.6884155273438,
|
|
"loss": 0.8165,
|
|
"margin_dpo/margin_mean": 248.5273895263672,
|
|
"margin_dpo/margin_std": 252.5367431640625,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.6945668135095447,
|
|
"fcm_dpo/beta": 0.0038944047410041094,
|
|
"fcm_dpo/delta": 0.18922805786132812,
|
|
"fcm_dpo/margin": 172.9485321044922,
|
|
"fcm_dpo/q_t": 0.362824410200119,
|
|
"grad_norm": 64.09027099609375,
|
|
"learning_rate": 1.3059754439133002e-07,
|
|
"logits/chosen": 0.20194607973098755,
|
|
"logits/rejected": 0.22875045239925385,
|
|
"logps/chosen": -531.016845703125,
|
|
"logps/ref_chosen": -63.49696731567383,
|
|
"logps/ref_rejected": -81.14657592773438,
|
|
"logps/rejected": -721.614990234375,
|
|
"loss": 1.0217,
|
|
"margin_dpo/margin_mean": 172.9485321044922,
|
|
"margin_dpo/margin_std": 254.22259521484375,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.6960352422907489,
|
|
"fcm_dpo/beta": 0.0040451884269714355,
|
|
"fcm_dpo/delta": 0.13111956417560577,
|
|
"fcm_dpo/margin": 179.43878173828125,
|
|
"fcm_dpo/q_t": 0.3505546450614929,
|
|
"grad_norm": 61.069889068603516,
|
|
"learning_rate": 1.2947162435741277e-07,
|
|
"logits/chosen": 0.12781840562820435,
|
|
"logits/rejected": 0.14249897003173828,
|
|
"logps/chosen": -453.209228515625,
|
|
"logps/ref_chosen": -52.6119384765625,
|
|
"logps/ref_rejected": -90.08041381835938,
|
|
"logps/rejected": -670.116455078125,
|
|
"loss": 0.9636,
|
|
"margin_dpo/margin_mean": 179.43878173828125,
|
|
"margin_dpo/margin_std": 223.81166076660156,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.697503671071953,
|
|
"fcm_dpo/beta": 0.003963591530919075,
|
|
"fcm_dpo/delta": -0.11174008250236511,
|
|
"fcm_dpo/margin": 239.89425659179688,
|
|
"fcm_dpo/q_t": 0.303996205329895,
|
|
"grad_norm": 83.10292053222656,
|
|
"learning_rate": 1.2834888035828596e-07,
|
|
"logits/chosen": 0.24623063206672668,
|
|
"logits/rejected": 0.2509077191352844,
|
|
"logps/chosen": -415.0943298339844,
|
|
"logps/ref_chosen": -42.49519348144531,
|
|
"logps/ref_rejected": -90.06294250488281,
|
|
"logps/rejected": -702.556396484375,
|
|
"loss": 0.7845,
|
|
"margin_dpo/margin_mean": 239.89425659179688,
|
|
"margin_dpo/margin_std": 210.8565673828125,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.6989720998531571,
|
|
"fcm_dpo/beta": 0.003959137946367264,
|
|
"fcm_dpo/delta": 0.025470474734902382,
|
|
"fcm_dpo/margin": 208.79278564453125,
|
|
"fcm_dpo/q_t": 0.32757434248924255,
|
|
"grad_norm": 55.3691520690918,
|
|
"learning_rate": 1.2722934197929802e-07,
|
|
"logits/chosen": 0.18537373840808868,
|
|
"logits/rejected": 0.20475903153419495,
|
|
"logps/chosen": -407.0469970703125,
|
|
"logps/ref_chosen": -42.94938278198242,
|
|
"logps/ref_rejected": -73.71023559570312,
|
|
"logps/rejected": -646.6005859375,
|
|
"loss": 0.847,
|
|
"margin_dpo/margin_mean": 208.7927703857422,
|
|
"margin_dpo/margin_std": 197.02679443359375,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7004405286343612,
|
|
"fcm_dpo/beta": 0.004016070161014795,
|
|
"fcm_dpo/delta": 0.07374633848667145,
|
|
"fcm_dpo/margin": 194.69967651367188,
|
|
"fcm_dpo/q_t": 0.3365083634853363,
|
|
"grad_norm": 47.38435363769531,
|
|
"learning_rate": 1.2611303872132631e-07,
|
|
"logits/chosen": 0.12200789898633957,
|
|
"logits/rejected": 0.1759755164384842,
|
|
"logps/chosen": -456.8595275878906,
|
|
"logps/ref_chosen": -70.77261352539062,
|
|
"logps/ref_rejected": -76.13737487792969,
|
|
"logps/rejected": -656.9239501953125,
|
|
"loss": 0.903,
|
|
"margin_dpo/margin_mean": 194.69967651367188,
|
|
"margin_dpo/margin_std": 213.22210693359375,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7019089574155654,
|
|
"fcm_dpo/beta": 0.004027483984827995,
|
|
"fcm_dpo/delta": -0.03790191560983658,
|
|
"fcm_dpo/margin": 219.52687072753906,
|
|
"fcm_dpo/q_t": 0.31887826323509216,
|
|
"grad_norm": 45.00698471069336,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 0.14754655957221985,
|
|
"logits/rejected": 0.16361096501350403,
|
|
"logps/chosen": -366.9229736328125,
|
|
"logps/ref_chosen": -41.440513610839844,
|
|
"logps/ref_rejected": -85.36196899414062,
|
|
"logps/rejected": -630.3712768554688,
|
|
"loss": 0.8133,
|
|
"margin_dpo/margin_mean": 219.52688598632812,
|
|
"margin_dpo/margin_std": 199.22171020507812,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7033773861967695,
|
|
"fcm_dpo/beta": 0.004113093018531799,
|
|
"fcm_dpo/delta": 0.10552472621202469,
|
|
"fcm_dpo/margin": 181.81784057617188,
|
|
"fcm_dpo/q_t": 0.339580774307251,
|
|
"grad_norm": 50.76175308227539,
|
|
"learning_rate": 1.2389025514492456e-07,
|
|
"logits/chosen": 0.15951776504516602,
|
|
"logits/rejected": 0.168975830078125,
|
|
"logps/chosen": -449.15814208984375,
|
|
"logps/ref_chosen": -53.907920837402344,
|
|
"logps/ref_rejected": -95.1163330078125,
|
|
"logps/rejected": -672.1844482421875,
|
|
"loss": 0.9043,
|
|
"margin_dpo/margin_mean": 181.81784057617188,
|
|
"margin_dpo/margin_std": 180.60443115234375,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7048458149779736,
|
|
"fcm_dpo/beta": 0.0042091310024261475,
|
|
"fcm_dpo/delta": 0.14673718810081482,
|
|
"fcm_dpo/margin": 168.90711975097656,
|
|
"fcm_dpo/q_t": 0.35500940680503845,
|
|
"grad_norm": 72.191650390625,
|
|
"learning_rate": 1.227838333989088e-07,
|
|
"logits/chosen": 0.20184411108493805,
|
|
"logits/rejected": 0.23821671307086945,
|
|
"logps/chosen": -465.4715576171875,
|
|
"logps/ref_chosen": -58.682701110839844,
|
|
"logps/ref_rejected": -82.93248748779297,
|
|
"logps/rejected": -658.6284790039062,
|
|
"loss": 0.9739,
|
|
"margin_dpo/margin_mean": 168.90713500976562,
|
|
"margin_dpo/margin_std": 211.50201416015625,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.7063142437591777,
|
|
"fcm_dpo/beta": 0.0041699884459376335,
|
|
"fcm_dpo/delta": -0.032555945217609406,
|
|
"fcm_dpo/margin": 210.88694763183594,
|
|
"fcm_dpo/q_t": 0.3185991644859314,
|
|
"grad_norm": 53.58448028564453,
|
|
"learning_rate": 1.2168076391719489e-07,
|
|
"logits/chosen": 0.14901934564113617,
|
|
"logits/rejected": 0.1714628040790558,
|
|
"logps/chosen": -417.12506103515625,
|
|
"logps/ref_chosen": -54.964271545410156,
|
|
"logps/ref_rejected": -92.42044067382812,
|
|
"logps/rejected": -665.4682006835938,
|
|
"loss": 0.8391,
|
|
"margin_dpo/margin_mean": 210.886962890625,
|
|
"margin_dpo/margin_std": 205.45460510253906,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7077826725403817,
|
|
"fcm_dpo/beta": 0.004300164058804512,
|
|
"fcm_dpo/delta": 0.2011958360671997,
|
|
"fcm_dpo/margin": 154.00543212890625,
|
|
"fcm_dpo/q_t": 0.3627600073814392,
|
|
"grad_norm": 81.67280578613281,
|
|
"learning_rate": 1.2058107576668938e-07,
|
|
"logits/chosen": 0.04958938807249069,
|
|
"logits/rejected": 0.0661940723657608,
|
|
"logps/chosen": -464.51654052734375,
|
|
"logps/ref_chosen": -67.553466796875,
|
|
"logps/ref_rejected": -87.58953857421875,
|
|
"logps/rejected": -638.5580444335938,
|
|
"loss": 1.0015,
|
|
"margin_dpo/margin_mean": 154.0054473876953,
|
|
"margin_dpo/margin_std": 211.11434936523438,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7092511013215859,
|
|
"fcm_dpo/beta": 0.004210878163576126,
|
|
"fcm_dpo/delta": -0.2048397660255432,
|
|
"fcm_dpo/margin": 245.37893676757812,
|
|
"fcm_dpo/q_t": 0.2938799262046814,
|
|
"grad_norm": 45.401424407958984,
|
|
"learning_rate": 1.194847979251979e-07,
|
|
"logits/chosen": 0.1060781329870224,
|
|
"logits/rejected": 0.14212235808372498,
|
|
"logps/chosen": -432.8665771484375,
|
|
"logps/ref_chosen": -63.32981872558594,
|
|
"logps/ref_rejected": -95.78697204589844,
|
|
"logps/rejected": -710.70263671875,
|
|
"loss": 0.7583,
|
|
"margin_dpo/margin_mean": 245.37890625,
|
|
"margin_dpo/margin_std": 219.05149841308594,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.71071953010279,
|
|
"fcm_dpo/beta": 0.004176275804638863,
|
|
"fcm_dpo/delta": -0.05583025887608528,
|
|
"fcm_dpo/margin": 215.44418334960938,
|
|
"fcm_dpo/q_t": 0.313819944858551,
|
|
"grad_norm": 44.620079040527344,
|
|
"learning_rate": 1.1839195928066101e-07,
|
|
"logits/chosen": 0.1926443874835968,
|
|
"logits/rejected": 0.22298608720302582,
|
|
"logps/chosen": -402.5946350097656,
|
|
"logps/ref_chosen": -59.13812255859375,
|
|
"logps/ref_rejected": -84.37144470214844,
|
|
"logps/rejected": -643.2721557617188,
|
|
"loss": 0.7975,
|
|
"margin_dpo/margin_mean": 215.44418334960938,
|
|
"margin_dpo/margin_std": 184.650146484375,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7121879588839941,
|
|
"fcm_dpo/beta": 0.0040985457599163055,
|
|
"fcm_dpo/delta": -0.07049298286437988,
|
|
"fcm_dpo/margin": 223.0164031982422,
|
|
"fcm_dpo/q_t": 0.31292468309402466,
|
|
"grad_norm": 58.52872848510742,
|
|
"learning_rate": 1.1730258863039347e-07,
|
|
"logits/chosen": 0.12118280678987503,
|
|
"logits/rejected": 0.13599839806556702,
|
|
"logps/chosen": -386.78875732421875,
|
|
"logps/ref_chosen": -58.849571228027344,
|
|
"logps/ref_rejected": -103.36408233642578,
|
|
"logps/rejected": -654.3197021484375,
|
|
"loss": 0.825,
|
|
"margin_dpo/margin_mean": 223.0164031982422,
|
|
"margin_dpo/margin_std": 212.99270629882812,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7136563876651982,
|
|
"fcm_dpo/beta": 0.004043485503643751,
|
|
"fcm_dpo/delta": -0.08242425322532654,
|
|
"fcm_dpo/margin": 228.66433715820312,
|
|
"fcm_dpo/q_t": 0.31584662199020386,
|
|
"grad_norm": 74.97518157958984,
|
|
"learning_rate": 1.1621671468032493e-07,
|
|
"logits/chosen": 0.17448630928993225,
|
|
"logits/rejected": 0.20201058685779572,
|
|
"logps/chosen": -424.857177734375,
|
|
"logps/ref_chosen": -55.25966262817383,
|
|
"logps/ref_rejected": -92.13936614990234,
|
|
"logps/rejected": -690.4011840820312,
|
|
"loss": 0.8548,
|
|
"margin_dpo/margin_mean": 228.66432189941406,
|
|
"margin_dpo/margin_std": 240.98989868164062,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7151248164464024,
|
|
"fcm_dpo/beta": 0.004021212458610535,
|
|
"fcm_dpo/delta": 0.06276712566614151,
|
|
"fcm_dpo/margin": 196.9682159423828,
|
|
"fcm_dpo/q_t": 0.32838380336761475,
|
|
"grad_norm": 51.10014724731445,
|
|
"learning_rate": 1.1513436604424378e-07,
|
|
"logits/chosen": 0.13462239503860474,
|
|
"logits/rejected": 0.15736320614814758,
|
|
"logps/chosen": -429.49041748046875,
|
|
"logps/ref_chosen": -53.06330871582031,
|
|
"logps/ref_rejected": -92.41883087158203,
|
|
"logps/rejected": -665.8141479492188,
|
|
"loss": 0.857,
|
|
"margin_dpo/margin_mean": 196.9682159423828,
|
|
"margin_dpo/margin_std": 176.92190551757812,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7165932452276065,
|
|
"fcm_dpo/beta": 0.004063923843204975,
|
|
"fcm_dpo/delta": 0.014564893208444118,
|
|
"fcm_dpo/margin": 205.87074279785156,
|
|
"fcm_dpo/q_t": 0.31959283351898193,
|
|
"grad_norm": 45.07450485229492,
|
|
"learning_rate": 1.1405557124304335e-07,
|
|
"logits/chosen": 0.1483457088470459,
|
|
"logits/rejected": 0.18104881048202515,
|
|
"logps/chosen": -406.4317932128906,
|
|
"logps/ref_chosen": -52.22815704345703,
|
|
"logps/ref_rejected": -84.00656127929688,
|
|
"logps/rejected": -644.0809326171875,
|
|
"loss": 0.8086,
|
|
"margin_dpo/margin_mean": 205.87075805664062,
|
|
"margin_dpo/margin_std": 163.66893005371094,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7180616740088106,
|
|
"fcm_dpo/beta": 0.00414654053747654,
|
|
"fcm_dpo/delta": 0.13501515984535217,
|
|
"fcm_dpo/margin": 174.794677734375,
|
|
"fcm_dpo/q_t": 0.35005754232406616,
|
|
"grad_norm": 48.02519607543945,
|
|
"learning_rate": 1.1298035870396985e-07,
|
|
"logits/chosen": 0.07817474007606506,
|
|
"logits/rejected": 0.10687027126550674,
|
|
"logps/chosen": -400.2724914550781,
|
|
"logps/ref_chosen": -55.989627838134766,
|
|
"logps/ref_rejected": -79.39812469482422,
|
|
"logps/rejected": -598.4757080078125,
|
|
"loss": 0.9059,
|
|
"margin_dpo/margin_mean": 174.794677734375,
|
|
"margin_dpo/margin_std": 185.54888916015625,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7195301027900147,
|
|
"fcm_dpo/beta": 0.004186190664768219,
|
|
"fcm_dpo/delta": -0.00793827697634697,
|
|
"fcm_dpo/margin": 204.7318878173828,
|
|
"fcm_dpo/q_t": 0.3310641646385193,
|
|
"grad_norm": 46.17793273925781,
|
|
"learning_rate": 1.1190875675987355e-07,
|
|
"logits/chosen": 0.06553199887275696,
|
|
"logits/rejected": 0.07176955789327621,
|
|
"logps/chosen": -421.15704345703125,
|
|
"logps/ref_chosen": -52.36639404296875,
|
|
"logps/ref_rejected": -110.4090576171875,
|
|
"logps/rejected": -683.9315795898438,
|
|
"loss": 0.8888,
|
|
"margin_dpo/margin_mean": 204.73190307617188,
|
|
"margin_dpo/margin_std": 232.12014770507812,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7209985315712188,
|
|
"fcm_dpo/beta": 0.00430173147469759,
|
|
"fcm_dpo/delta": 0.19360697269439697,
|
|
"fcm_dpo/margin": 155.4693603515625,
|
|
"fcm_dpo/q_t": 0.3650928735733032,
|
|
"grad_norm": 54.773380279541016,
|
|
"learning_rate": 1.1084079364846241e-07,
|
|
"logits/chosen": 0.20436616241931915,
|
|
"logits/rejected": 0.23191238939762115,
|
|
"logps/chosen": -448.61712646484375,
|
|
"logps/ref_chosen": -60.11626434326172,
|
|
"logps/ref_rejected": -73.27278900146484,
|
|
"logps/rejected": -617.2430419921875,
|
|
"loss": 0.9805,
|
|
"margin_dpo/margin_mean": 155.46937561035156,
|
|
"margin_dpo/margin_std": 202.46267700195312,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7224669603524229,
|
|
"fcm_dpo/beta": 0.00451195752248168,
|
|
"fcm_dpo/delta": 0.2712585926055908,
|
|
"fcm_dpo/margin": 131.85690307617188,
|
|
"fcm_dpo/q_t": 0.3848981261253357,
|
|
"grad_norm": 64.95230102539062,
|
|
"learning_rate": 1.097764975115576e-07,
|
|
"logits/chosen": 0.2151469886302948,
|
|
"logits/rejected": 0.23940692842006683,
|
|
"logps/chosen": -462.7021789550781,
|
|
"logps/ref_chosen": -53.994178771972656,
|
|
"logps/ref_rejected": -72.65962219238281,
|
|
"logps/rejected": -613.2244873046875,
|
|
"loss": 1.12,
|
|
"margin_dpo/margin_mean": 131.85690307617188,
|
|
"margin_dpo/margin_std": 238.486328125,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.723935389133627,
|
|
"fcm_dpo/beta": 0.004592692479491234,
|
|
"fcm_dpo/delta": 0.06759007275104523,
|
|
"fcm_dpo/margin": 171.4335174560547,
|
|
"fcm_dpo/q_t": 0.33537688851356506,
|
|
"grad_norm": 69.35541534423828,
|
|
"learning_rate": 1.0871589639435203e-07,
|
|
"logits/chosen": 0.1804664134979248,
|
|
"logits/rejected": 0.21824055910110474,
|
|
"logps/chosen": -479.1470642089844,
|
|
"logps/ref_chosen": -75.49723815917969,
|
|
"logps/ref_rejected": -87.32301330566406,
|
|
"logps/rejected": -662.4063720703125,
|
|
"loss": 0.8862,
|
|
"margin_dpo/margin_mean": 171.4335174560547,
|
|
"margin_dpo/margin_std": 175.6283416748047,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7254038179148311,
|
|
"fcm_dpo/beta": 0.004511960782110691,
|
|
"fcm_dpo/delta": -0.20742294192314148,
|
|
"fcm_dpo/margin": 229.67926025390625,
|
|
"fcm_dpo/q_t": 0.285893052816391,
|
|
"grad_norm": 53.599117279052734,
|
|
"learning_rate": 1.0765901824467166e-07,
|
|
"logits/chosen": 0.17816877365112305,
|
|
"logits/rejected": 0.18146733939647675,
|
|
"logps/chosen": -398.7049865722656,
|
|
"logps/ref_chosen": -41.35926818847656,
|
|
"logps/ref_rejected": -86.09136962890625,
|
|
"logps/rejected": -673.1163330078125,
|
|
"loss": 0.753,
|
|
"margin_dpo/margin_mean": 229.6792449951172,
|
|
"margin_dpo/margin_std": 192.52398681640625,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7268722466960352,
|
|
"fcm_dpo/beta": 0.004417981021106243,
|
|
"fcm_dpo/delta": -0.08530034869909286,
|
|
"fcm_dpo/margin": 209.88265991210938,
|
|
"fcm_dpo/q_t": 0.31514835357666016,
|
|
"grad_norm": 48.33843994140625,
|
|
"learning_rate": 1.0660589091223854e-07,
|
|
"logits/chosen": 0.09450630843639374,
|
|
"logits/rejected": 0.11175034940242767,
|
|
"logps/chosen": -458.7616271972656,
|
|
"logps/ref_chosen": -63.53507995605469,
|
|
"logps/ref_rejected": -91.42443084716797,
|
|
"logps/rejected": -696.5336303710938,
|
|
"loss": 0.8294,
|
|
"margin_dpo/margin_mean": 209.88265991210938,
|
|
"margin_dpo/margin_std": 212.19480895996094,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7283406754772394,
|
|
"fcm_dpo/beta": 0.004461340140551329,
|
|
"fcm_dpo/delta": 0.14957168698310852,
|
|
"fcm_dpo/margin": 159.43557739257812,
|
|
"fcm_dpo/q_t": 0.3518192172050476,
|
|
"grad_norm": 72.45964813232422,
|
|
"learning_rate": 1.0555654214793722e-07,
|
|
"logits/chosen": 0.12275470048189163,
|
|
"logits/rejected": 0.15248063206672668,
|
|
"logps/chosen": -506.01953125,
|
|
"logps/ref_chosen": -72.5919189453125,
|
|
"logps/ref_rejected": -84.32933807373047,
|
|
"logps/rejected": -677.1925048828125,
|
|
"loss": 0.9246,
|
|
"margin_dpo/margin_mean": 159.43557739257812,
|
|
"margin_dpo/margin_std": 172.95962524414062,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7298091042584435,
|
|
"fcm_dpo/beta": 0.004558074288070202,
|
|
"fcm_dpo/delta": 0.1612219661474228,
|
|
"fcm_dpo/margin": 153.30006408691406,
|
|
"fcm_dpo/q_t": 0.35529017448425293,
|
|
"grad_norm": 54.61373519897461,
|
|
"learning_rate": 1.0451099960308374e-07,
|
|
"logits/chosen": 0.1395212858915329,
|
|
"logits/rejected": 0.1674782931804657,
|
|
"logps/chosen": -488.2263488769531,
|
|
"logps/ref_chosen": -58.59397506713867,
|
|
"logps/ref_rejected": -76.28836822509766,
|
|
"logps/rejected": -659.2208251953125,
|
|
"loss": 0.9637,
|
|
"margin_dpo/margin_mean": 153.30006408691406,
|
|
"margin_dpo/margin_std": 177.5799560546875,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7312775330396476,
|
|
"fcm_dpo/beta": 0.004678555764257908,
|
|
"fcm_dpo/delta": 0.03473177179694176,
|
|
"fcm_dpo/margin": 174.86441040039062,
|
|
"fcm_dpo/q_t": 0.3393765687942505,
|
|
"grad_norm": 51.891902923583984,
|
|
"learning_rate": 1.0346929082869641e-07,
|
|
"logits/chosen": 0.16660353541374207,
|
|
"logits/rejected": 0.19526565074920654,
|
|
"logps/chosen": -492.501220703125,
|
|
"logps/ref_chosen": -71.20565795898438,
|
|
"logps/ref_rejected": -83.95803833007812,
|
|
"logps/rejected": -680.1179809570312,
|
|
"loss": 0.9475,
|
|
"margin_dpo/margin_mean": 174.86439514160156,
|
|
"margin_dpo/margin_std": 226.5364990234375,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7327459618208517,
|
|
"fcm_dpo/beta": 0.004574180580675602,
|
|
"fcm_dpo/delta": -0.16882994771003723,
|
|
"fcm_dpo/margin": 219.0482940673828,
|
|
"fcm_dpo/q_t": 0.2998501658439636,
|
|
"grad_norm": 52.29130935668945,
|
|
"learning_rate": 1.0243144327477013e-07,
|
|
"logits/chosen": 0.23509922623634338,
|
|
"logits/rejected": 0.2455160915851593,
|
|
"logps/chosen": -449.7971496582031,
|
|
"logps/ref_chosen": -51.25519561767578,
|
|
"logps/ref_rejected": -101.07870483398438,
|
|
"logps/rejected": -718.6689453125,
|
|
"loss": 0.8221,
|
|
"margin_dpo/margin_mean": 219.04830932617188,
|
|
"margin_dpo/margin_std": 219.20545959472656,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7342143906020558,
|
|
"fcm_dpo/beta": 0.004520269110798836,
|
|
"fcm_dpo/delta": -0.008800679817795753,
|
|
"fcm_dpo/margin": 189.812744140625,
|
|
"fcm_dpo/q_t": 0.3284400701522827,
|
|
"grad_norm": 70.3599853515625,
|
|
"learning_rate": 1.0139748428955333e-07,
|
|
"logits/chosen": 0.20507007837295532,
|
|
"logits/rejected": 0.20711365342140198,
|
|
"logps/chosen": -493.9044189453125,
|
|
"logps/ref_chosen": -57.027442932128906,
|
|
"logps/ref_rejected": -93.93421173095703,
|
|
"logps/rejected": -720.6239013671875,
|
|
"loss": 0.8791,
|
|
"margin_dpo/margin_mean": 189.812744140625,
|
|
"margin_dpo/margin_std": 210.09674072265625,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.73568281938326,
|
|
"fcm_dpo/beta": 0.004478934220969677,
|
|
"fcm_dpo/delta": -0.10455437749624252,
|
|
"fcm_dpo/margin": 210.9066162109375,
|
|
"fcm_dpo/q_t": 0.30981889367103577,
|
|
"grad_norm": 52.43331527709961,
|
|
"learning_rate": 1.0036744111882672e-07,
|
|
"logits/chosen": 0.19089315831661224,
|
|
"logits/rejected": 0.22942286729812622,
|
|
"logps/chosen": -450.37811279296875,
|
|
"logps/ref_chosen": -54.359527587890625,
|
|
"logps/ref_rejected": -80.15670013427734,
|
|
"logps/rejected": -687.0819091796875,
|
|
"loss": 0.8183,
|
|
"margin_dpo/margin_mean": 210.9066162109375,
|
|
"margin_dpo/margin_std": 205.54580688476562,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.737151248164464,
|
|
"fcm_dpo/beta": 0.004374990239739418,
|
|
"fcm_dpo/delta": -0.14604619145393372,
|
|
"fcm_dpo/margin": 224.31927490234375,
|
|
"fcm_dpo/q_t": 0.28903883695602417,
|
|
"grad_norm": 47.08027267456055,
|
|
"learning_rate": 9.934134090518592e-08,
|
|
"logits/chosen": 0.09788864850997925,
|
|
"logits/rejected": 0.13583651185035706,
|
|
"logps/chosen": -435.0950012207031,
|
|
"logps/ref_chosen": -67.60050964355469,
|
|
"logps/ref_rejected": -82.94876098632812,
|
|
"logps/rejected": -674.7625122070312,
|
|
"loss": 0.7296,
|
|
"margin_dpo/margin_mean": 224.31927490234375,
|
|
"margin_dpo/margin_std": 149.10888671875,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7386196769456681,
|
|
"fcm_dpo/beta": 0.0042848363518714905,
|
|
"fcm_dpo/delta": 0.005043823271989822,
|
|
"fcm_dpo/margin": 197.19277954101562,
|
|
"fcm_dpo/q_t": 0.3233737349510193,
|
|
"grad_norm": 42.94404983520508,
|
|
"learning_rate": 9.831921068732571e-08,
|
|
"logits/chosen": 0.20290768146514893,
|
|
"logits/rejected": 0.2336656153202057,
|
|
"logps/chosen": -442.6397399902344,
|
|
"logps/ref_chosen": -55.078407287597656,
|
|
"logps/ref_rejected": -82.50544738769531,
|
|
"logps/rejected": -667.259521484375,
|
|
"loss": 0.8323,
|
|
"margin_dpo/margin_mean": 197.19277954101562,
|
|
"margin_dpo/margin_std": 179.80767822265625,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7400881057268722,
|
|
"fcm_dpo/beta": 0.0042025502771139145,
|
|
"fcm_dpo/delta": -0.16033101081848145,
|
|
"fcm_dpo/margin": 236.57833862304688,
|
|
"fcm_dpo/q_t": 0.2951487600803375,
|
|
"grad_norm": 39.35810470581055,
|
|
"learning_rate": 9.730107739932805e-08,
|
|
"logits/chosen": 0.18868711590766907,
|
|
"logits/rejected": 0.19342690706253052,
|
|
"logps/chosen": -456.39227294921875,
|
|
"logps/ref_chosen": -59.96575164794922,
|
|
"logps/ref_rejected": -103.76212310791016,
|
|
"logps/rejected": -736.7669677734375,
|
|
"loss": 0.761,
|
|
"margin_dpo/margin_mean": 236.57835388183594,
|
|
"margin_dpo/margin_std": 194.76629638671875,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.7415565345080763,
|
|
"fcm_dpo/beta": 0.004324901849031448,
|
|
"fcm_dpo/delta": 0.23218274116516113,
|
|
"fcm_dpo/margin": 146.14663696289062,
|
|
"fcm_dpo/q_t": 0.37902575731277466,
|
|
"grad_norm": 94.34353637695312,
|
|
"learning_rate": 9.628696786995188e-08,
|
|
"logits/chosen": 0.12490086257457733,
|
|
"logits/rejected": 0.15079885721206665,
|
|
"logps/chosen": -492.49853515625,
|
|
"logps/ref_chosen": -76.1549072265625,
|
|
"logps/ref_rejected": -88.58537292480469,
|
|
"logps/rejected": -651.0756225585938,
|
|
"loss": 1.0587,
|
|
"margin_dpo/margin_mean": 146.14663696289062,
|
|
"margin_dpo/margin_std": 232.16616821289062,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.7430249632892805,
|
|
"fcm_dpo/beta": 0.004307071678340435,
|
|
"fcm_dpo/delta": -0.050437767058610916,
|
|
"fcm_dpo/margin": 207.72500610351562,
|
|
"fcm_dpo/q_t": 0.3202528655529022,
|
|
"grad_norm": 46.971092224121094,
|
|
"learning_rate": 9.527690882192635e-08,
|
|
"logits/chosen": 0.22537869215011597,
|
|
"logits/rejected": 0.24729499220848083,
|
|
"logps/chosen": -421.39666748046875,
|
|
"logps/ref_chosen": -48.96050262451172,
|
|
"logps/ref_rejected": -78.41505432128906,
|
|
"logps/rejected": -658.5762329101562,
|
|
"loss": 0.8507,
|
|
"margin_dpo/margin_mean": 207.72500610351562,
|
|
"margin_dpo/margin_std": 212.94607543945312,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7444933920704846,
|
|
"fcm_dpo/beta": 0.004366847686469555,
|
|
"fcm_dpo/delta": 0.05252450332045555,
|
|
"fcm_dpo/margin": 183.55238342285156,
|
|
"fcm_dpo/q_t": 0.33828064799308777,
|
|
"grad_norm": 39.45782470703125,
|
|
"learning_rate": 9.427092687124691e-08,
|
|
"logits/chosen": 0.2182435542345047,
|
|
"logits/rejected": 0.23851092159748077,
|
|
"logps/chosen": -477.6820068359375,
|
|
"logps/ref_chosen": -66.80149841308594,
|
|
"logps/ref_rejected": -95.37289428710938,
|
|
"logps/rejected": -689.8057861328125,
|
|
"loss": 0.8826,
|
|
"margin_dpo/margin_mean": 183.55239868164062,
|
|
"margin_dpo/margin_std": 199.75308227539062,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7459618208516887,
|
|
"fcm_dpo/beta": 0.004443483427166939,
|
|
"fcm_dpo/delta": 0.08964800834655762,
|
|
"fcm_dpo/margin": 172.5631561279297,
|
|
"fcm_dpo/q_t": 0.34734398126602173,
|
|
"grad_norm": 52.880027770996094,
|
|
"learning_rate": 9.326904852647344e-08,
|
|
"logits/chosen": 0.19013988971710205,
|
|
"logits/rejected": 0.20654146373271942,
|
|
"logps/chosen": -461.3097229003906,
|
|
"logps/ref_chosen": -71.303466796875,
|
|
"logps/ref_rejected": -95.6275405883789,
|
|
"logps/rejected": -658.1969604492188,
|
|
"loss": 0.9348,
|
|
"margin_dpo/margin_mean": 172.5631561279297,
|
|
"margin_dpo/margin_std": 208.7895965576172,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7474302496328928,
|
|
"fcm_dpo/beta": 0.004504289943724871,
|
|
"fcm_dpo/delta": 0.054536715149879456,
|
|
"fcm_dpo/margin": 177.45721435546875,
|
|
"fcm_dpo/q_t": 0.3389536142349243,
|
|
"grad_norm": 45.22426223754883,
|
|
"learning_rate": 9.227130018803195e-08,
|
|
"logits/chosen": 0.03568051755428314,
|
|
"logits/rejected": 0.04966065660119057,
|
|
"logps/chosen": -405.90570068359375,
|
|
"logps/ref_chosen": -63.81895065307617,
|
|
"logps/ref_rejected": -83.25643920898438,
|
|
"logps/rejected": -602.8004150390625,
|
|
"loss": 0.8893,
|
|
"margin_dpo/margin_mean": 177.45721435546875,
|
|
"margin_dpo/margin_std": 193.94020080566406,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.748898678414097,
|
|
"fcm_dpo/beta": 0.0044273133389651775,
|
|
"fcm_dpo/delta": -0.12143722176551819,
|
|
"fcm_dpo/margin": 216.8166961669922,
|
|
"fcm_dpo/q_t": 0.3008081316947937,
|
|
"grad_norm": 44.7673225402832,
|
|
"learning_rate": 9.127770814751932e-08,
|
|
"logits/chosen": 0.1561957597732544,
|
|
"logits/rejected": 0.15737219154834747,
|
|
"logps/chosen": -436.9364013671875,
|
|
"logps/ref_chosen": -51.878448486328125,
|
|
"logps/ref_rejected": -102.7651596069336,
|
|
"logps/rejected": -704.6397705078125,
|
|
"loss": 0.7805,
|
|
"margin_dpo/margin_mean": 216.8166961669922,
|
|
"margin_dpo/margin_std": 185.16363525390625,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.750367107195301,
|
|
"fcm_dpo/beta": 0.004378630314022303,
|
|
"fcm_dpo/delta": -0.055393002927303314,
|
|
"fcm_dpo/margin": 205.57594299316406,
|
|
"fcm_dpo/q_t": 0.32261648774147034,
|
|
"grad_norm": 46.155799865722656,
|
|
"learning_rate": 9.028829858700973e-08,
|
|
"logits/chosen": 0.18099631369113922,
|
|
"logits/rejected": 0.1909976303577423,
|
|
"logps/chosen": -429.7141418457031,
|
|
"logps/ref_chosen": -60.23811721801758,
|
|
"logps/ref_rejected": -92.85676574707031,
|
|
"logps/rejected": -667.9087524414062,
|
|
"loss": 0.8824,
|
|
"margin_dpo/margin_mean": 205.57594299316406,
|
|
"margin_dpo/margin_std": 232.60919189453125,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7518355359765051,
|
|
"fcm_dpo/beta": 0.00426853634417057,
|
|
"fcm_dpo/delta": -0.1081642284989357,
|
|
"fcm_dpo/margin": 222.03216552734375,
|
|
"fcm_dpo/q_t": 0.3019694983959198,
|
|
"grad_norm": 37.78676223754883,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": 0.26449891924858093,
|
|
"logits/rejected": 0.2904709279537201,
|
|
"logps/chosen": -410.3562927246094,
|
|
"logps/ref_chosen": -54.905494689941406,
|
|
"logps/ref_rejected": -81.87586975097656,
|
|
"logps/rejected": -659.35888671875,
|
|
"loss": 0.7736,
|
|
"margin_dpo/margin_mean": 222.03216552734375,
|
|
"margin_dpo/margin_std": 183.77151489257812,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7533039647577092,
|
|
"fcm_dpo/beta": 0.004307272844016552,
|
|
"fcm_dpo/delta": 0.13749617338180542,
|
|
"fcm_dpo/margin": 167.71078491210938,
|
|
"fcm_dpo/q_t": 0.35119718313217163,
|
|
"grad_norm": 74.993896484375,
|
|
"learning_rate": 8.832213108254863e-08,
|
|
"logits/chosen": 0.2815062999725342,
|
|
"logits/rejected": 0.31944650411605835,
|
|
"logps/chosen": -453.9299621582031,
|
|
"logps/ref_chosen": -64.91644287109375,
|
|
"logps/ref_rejected": -76.06245422363281,
|
|
"logps/rejected": -632.7867431640625,
|
|
"loss": 0.9587,
|
|
"margin_dpo/margin_mean": 167.71078491210938,
|
|
"margin_dpo/margin_std": 203.91494750976562,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7547723935389133,
|
|
"fcm_dpo/beta": 0.004421452060341835,
|
|
"fcm_dpo/delta": 0.08648448437452316,
|
|
"fcm_dpo/margin": 174.20184326171875,
|
|
"fcm_dpo/q_t": 0.3494170308113098,
|
|
"grad_norm": 63.60978698730469,
|
|
"learning_rate": 8.734542494893954e-08,
|
|
"logits/chosen": 0.15184932947158813,
|
|
"logits/rejected": 0.18162909150123596,
|
|
"logps/chosen": -454.2314453125,
|
|
"logps/ref_chosen": -74.22957611083984,
|
|
"logps/ref_rejected": -78.945556640625,
|
|
"logps/rejected": -633.1492919921875,
|
|
"loss": 0.9468,
|
|
"margin_dpo/margin_mean": 174.20184326171875,
|
|
"margin_dpo/margin_std": 221.69790649414062,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7562408223201175,
|
|
"fcm_dpo/beta": 0.004536722786724567,
|
|
"fcm_dpo/delta": 0.15707728266716003,
|
|
"fcm_dpo/margin": 155.23159790039062,
|
|
"fcm_dpo/q_t": 0.3546082079410553,
|
|
"grad_norm": 68.81385803222656,
|
|
"learning_rate": 8.637300491465272e-08,
|
|
"logits/chosen": 0.09542754292488098,
|
|
"logits/rejected": 0.10104284435510635,
|
|
"logps/chosen": -383.7822570800781,
|
|
"logps/ref_chosen": -50.40156555175781,
|
|
"logps/ref_rejected": -87.09774780273438,
|
|
"logps/rejected": -575.7100219726562,
|
|
"loss": 0.9591,
|
|
"margin_dpo/margin_mean": 155.23159790039062,
|
|
"margin_dpo/margin_std": 189.18768310546875,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7577092511013216,
|
|
"fcm_dpo/beta": 0.004510688595473766,
|
|
"fcm_dpo/delta": -0.14710889756679535,
|
|
"fcm_dpo/margin": 217.89169311523438,
|
|
"fcm_dpo/q_t": 0.2980697453022003,
|
|
"grad_norm": 47.67902755737305,
|
|
"learning_rate": 8.540489660386064e-08,
|
|
"logits/chosen": 0.25269433856010437,
|
|
"logits/rejected": 0.25404471158981323,
|
|
"logps/chosen": -409.7538757324219,
|
|
"logps/ref_chosen": -64.64956665039062,
|
|
"logps/ref_rejected": -111.72237396240234,
|
|
"logps/rejected": -674.7183837890625,
|
|
"loss": 0.7673,
|
|
"margin_dpo/margin_mean": 217.89169311523438,
|
|
"margin_dpo/margin_std": 184.21299743652344,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.7591776798825257,
|
|
"fcm_dpo/beta": 0.004363768734037876,
|
|
"fcm_dpo/delta": -0.09877770394086838,
|
|
"fcm_dpo/margin": 215.03372192382812,
|
|
"fcm_dpo/q_t": 0.31161239743232727,
|
|
"grad_norm": 51.91146469116211,
|
|
"learning_rate": 8.444112552711752e-08,
|
|
"logits/chosen": 0.12666018307209015,
|
|
"logits/rejected": 0.15497204661369324,
|
|
"logps/chosen": -419.92864990234375,
|
|
"logps/ref_chosen": -60.913551330566406,
|
|
"logps/ref_rejected": -89.08308410644531,
|
|
"logps/rejected": -663.1319580078125,
|
|
"loss": 0.8144,
|
|
"margin_dpo/margin_mean": 215.03372192382812,
|
|
"margin_dpo/margin_std": 210.22381591796875,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.7606461086637298,
|
|
"fcm_dpo/beta": 0.004400432575494051,
|
|
"fcm_dpo/delta": 0.06045955419540405,
|
|
"fcm_dpo/margin": 180.52365112304688,
|
|
"fcm_dpo/q_t": 0.3386451005935669,
|
|
"grad_norm": 47.44013595581055,
|
|
"learning_rate": 8.348171708068747e-08,
|
|
"logits/chosen": 0.14773935079574585,
|
|
"logits/rejected": 0.1560356616973877,
|
|
"logps/chosen": -411.3404541015625,
|
|
"logps/ref_chosen": -57.45589065551758,
|
|
"logps/ref_rejected": -85.31269836425781,
|
|
"logps/rejected": -619.7208862304688,
|
|
"loss": 0.9154,
|
|
"margin_dpo/margin_mean": 180.52365112304688,
|
|
"margin_dpo/margin_std": 208.49659729003906,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.762114537444934,
|
|
"fcm_dpo/beta": 0.004505423828959465,
|
|
"fcm_dpo/delta": 0.12294422835111618,
|
|
"fcm_dpo/margin": 163.3507080078125,
|
|
"fcm_dpo/q_t": 0.35014083981513977,
|
|
"grad_norm": 41.567039489746094,
|
|
"learning_rate": 8.25266965458755e-08,
|
|
"logits/chosen": 0.07905039191246033,
|
|
"logits/rejected": 0.09712234139442444,
|
|
"logps/chosen": -403.34033203125,
|
|
"logps/ref_chosen": -74.06331634521484,
|
|
"logps/ref_rejected": -104.44416809082031,
|
|
"logps/rejected": -597.0718994140625,
|
|
"loss": 0.9354,
|
|
"margin_dpo/margin_mean": 163.3507080078125,
|
|
"margin_dpo/margin_std": 193.86541748046875,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7635829662261381,
|
|
"fcm_dpo/beta": 0.00451300572603941,
|
|
"fcm_dpo/delta": 0.04465455561876297,
|
|
"fcm_dpo/margin": 178.88897705078125,
|
|
"fcm_dpo/q_t": 0.3343246579170227,
|
|
"grad_norm": 39.79988479614258,
|
|
"learning_rate": 8.15760890883607e-08,
|
|
"logits/chosen": 0.14485794305801392,
|
|
"logits/rejected": 0.15455442667007446,
|
|
"logps/chosen": -402.969482421875,
|
|
"logps/ref_chosen": -70.2998275756836,
|
|
"logps/ref_rejected": -99.98133850097656,
|
|
"logps/rejected": -611.5400390625,
|
|
"loss": 0.8643,
|
|
"margin_dpo/margin_mean": 178.88897705078125,
|
|
"margin_dpo/margin_std": 172.38330078125,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7650513950073421,
|
|
"fcm_dpo/beta": 0.0046501983888447285,
|
|
"fcm_dpo/delta": 0.020647704601287842,
|
|
"fcm_dpo/margin": 177.8216552734375,
|
|
"fcm_dpo/q_t": 0.3344711661338806,
|
|
"grad_norm": 54.50603103637695,
|
|
"learning_rate": 8.062991975753378e-08,
|
|
"logits/chosen": 0.19291797280311584,
|
|
"logits/rejected": 0.2164836972951889,
|
|
"logps/chosen": -377.03448486328125,
|
|
"logps/ref_chosen": -58.14292526245117,
|
|
"logps/ref_rejected": -83.28060913085938,
|
|
"logps/rejected": -579.9937744140625,
|
|
"loss": 0.8861,
|
|
"margin_dpo/margin_mean": 177.82164001464844,
|
|
"margin_dpo/margin_std": 186.47982788085938,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7665198237885462,
|
|
"fcm_dpo/beta": 0.004662246443331242,
|
|
"fcm_dpo/delta": 0.09469278156757355,
|
|
"fcm_dpo/margin": 163.50579833984375,
|
|
"fcm_dpo/q_t": 0.3395306468009949,
|
|
"grad_norm": 50.51918029785156,
|
|
"learning_rate": 7.968821348583643e-08,
|
|
"logits/chosen": 0.21042799949645996,
|
|
"logits/rejected": 0.22489967942237854,
|
|
"logps/chosen": -402.761474609375,
|
|
"logps/ref_chosen": -46.54766845703125,
|
|
"logps/ref_rejected": -66.01388549804688,
|
|
"logps/rejected": -585.7335205078125,
|
|
"loss": 0.9021,
|
|
"margin_dpo/margin_mean": 163.5058135986328,
|
|
"margin_dpo/margin_std": 173.2716522216797,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7679882525697503,
|
|
"fcm_dpo/beta": 0.0046957386657595634,
|
|
"fcm_dpo/delta": 0.042239535599946976,
|
|
"fcm_dpo/margin": 172.7479248046875,
|
|
"fcm_dpo/q_t": 0.32918986678123474,
|
|
"grad_norm": 57.94752502441406,
|
|
"learning_rate": 7.875099508810484e-08,
|
|
"logits/chosen": 0.1128598004579544,
|
|
"logits/rejected": 0.1486237496137619,
|
|
"logps/chosen": -405.51531982421875,
|
|
"logps/ref_chosen": -61.76960372924805,
|
|
"logps/ref_rejected": -83.76141357421875,
|
|
"logps/rejected": -600.2550048828125,
|
|
"loss": 0.8846,
|
|
"margin_dpo/margin_mean": 172.74790954589844,
|
|
"margin_dpo/margin_std": 180.76382446289062,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7694566813509545,
|
|
"fcm_dpo/beta": 0.004721796605736017,
|
|
"fcm_dpo/delta": 0.032916050404310226,
|
|
"fcm_dpo/margin": 173.58346557617188,
|
|
"fcm_dpo/q_t": 0.32910919189453125,
|
|
"grad_norm": 65.28778076171875,
|
|
"learning_rate": 7.781828926091535e-08,
|
|
"logits/chosen": 0.13729023933410645,
|
|
"logits/rejected": 0.17816482484340668,
|
|
"logps/chosen": -419.248779296875,
|
|
"logps/ref_chosen": -78.0720443725586,
|
|
"logps/ref_rejected": -81.30198669433594,
|
|
"logps/rejected": -596.0621948242188,
|
|
"loss": 0.8804,
|
|
"margin_dpo/margin_mean": 173.58346557617188,
|
|
"margin_dpo/margin_std": 179.30389404296875,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7709251101321586,
|
|
"fcm_dpo/beta": 0.0045774709433317184,
|
|
"fcm_dpo/delta": -0.18320885300636292,
|
|
"fcm_dpo/margin": 220.95278930664062,
|
|
"fcm_dpo/q_t": 0.2898215353488922,
|
|
"grad_norm": 56.061824798583984,
|
|
"learning_rate": 7.689012058193384e-08,
|
|
"logits/chosen": 0.2006542831659317,
|
|
"logits/rejected": 0.2043372541666031,
|
|
"logps/chosen": -392.4875183105469,
|
|
"logps/ref_chosen": -50.827857971191406,
|
|
"logps/ref_rejected": -100.05294036865234,
|
|
"logps/rejected": -662.6654052734375,
|
|
"loss": 0.747,
|
|
"margin_dpo/margin_mean": 220.95277404785156,
|
|
"margin_dpo/margin_std": 174.82464599609375,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7723935389133627,
|
|
"fcm_dpo/beta": 0.0045228018425405025,
|
|
"fcm_dpo/delta": -0.1400771588087082,
|
|
"fcm_dpo/margin": 215.81175231933594,
|
|
"fcm_dpo/q_t": 0.30008208751678467,
|
|
"grad_norm": 49.047428131103516,
|
|
"learning_rate": 7.596651350926836e-08,
|
|
"logits/chosen": 0.07574430853128433,
|
|
"logits/rejected": 0.10800427943468094,
|
|
"logps/chosen": -406.01776123046875,
|
|
"logps/ref_chosen": -63.167236328125,
|
|
"logps/ref_rejected": -86.30934143066406,
|
|
"logps/rejected": -644.9716186523438,
|
|
"loss": 0.7778,
|
|
"margin_dpo/margin_mean": 215.81175231933594,
|
|
"margin_dpo/margin_std": 185.41110229492188,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7738619676945668,
|
|
"fcm_dpo/beta": 0.0044424207881093025,
|
|
"fcm_dpo/delta": 0.03708699345588684,
|
|
"fcm_dpo/margin": 183.49468994140625,
|
|
"fcm_dpo/q_t": 0.33004334568977356,
|
|
"grad_norm": 53.18086624145508,
|
|
"learning_rate": 7.504749238082414e-08,
|
|
"logits/chosen": 0.27492955327033997,
|
|
"logits/rejected": 0.317468523979187,
|
|
"logps/chosen": -429.421142578125,
|
|
"logps/ref_chosen": -71.12867736816406,
|
|
"logps/ref_rejected": -78.3425521850586,
|
|
"logps/rejected": -620.1297607421875,
|
|
"loss": 0.854,
|
|
"margin_dpo/margin_mean": 183.49468994140625,
|
|
"margin_dpo/margin_std": 172.89886474609375,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.775330396475771,
|
|
"fcm_dpo/beta": 0.0044939653016626835,
|
|
"fcm_dpo/delta": 0.03126390278339386,
|
|
"fcm_dpo/margin": 182.74923706054688,
|
|
"fcm_dpo/q_t": 0.33725494146347046,
|
|
"grad_norm": 43.81291198730469,
|
|
"learning_rate": 7.413308141366254e-08,
|
|
"logits/chosen": 0.19857966899871826,
|
|
"logits/rejected": 0.22464889287948608,
|
|
"logps/chosen": -438.86474609375,
|
|
"logps/ref_chosen": -68.0894546508789,
|
|
"logps/ref_rejected": -93.91006469726562,
|
|
"logps/rejected": -647.4345703125,
|
|
"loss": 0.8966,
|
|
"margin_dpo/margin_mean": 182.74923706054688,
|
|
"margin_dpo/margin_std": 207.980224609375,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.7767988252569751,
|
|
"fcm_dpo/beta": 0.004637697245925665,
|
|
"fcm_dpo/delta": 0.18158438801765442,
|
|
"fcm_dpo/margin": 146.76773071289062,
|
|
"fcm_dpo/q_t": 0.3623617887496948,
|
|
"grad_norm": 63.04121017456055,
|
|
"learning_rate": 7.322330470336313e-08,
|
|
"logits/chosen": 0.2721368074417114,
|
|
"logits/rejected": 0.27319759130477905,
|
|
"logps/chosen": -482.9656982421875,
|
|
"logps/ref_chosen": -55.57495880126953,
|
|
"logps/ref_rejected": -89.20909118652344,
|
|
"logps/rejected": -663.3675537109375,
|
|
"loss": 0.9968,
|
|
"margin_dpo/margin_mean": 146.76773071289062,
|
|
"margin_dpo/margin_std": 192.93203735351562,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.7782672540381792,
|
|
"fcm_dpo/beta": 0.0045324950478971004,
|
|
"fcm_dpo/delta": -0.1806645691394806,
|
|
"fcm_dpo/margin": 223.07403564453125,
|
|
"fcm_dpo/q_t": 0.29591965675354004,
|
|
"grad_norm": 33.03998565673828,
|
|
"learning_rate": 7.231818622338822e-08,
|
|
"logits/chosen": 0.19959846138954163,
|
|
"logits/rejected": 0.21465358138084412,
|
|
"logps/chosen": -394.14892578125,
|
|
"logps/ref_chosen": -47.601417541503906,
|
|
"logps/ref_rejected": -87.2845230102539,
|
|
"logps/rejected": -656.906005859375,
|
|
"loss": 0.7904,
|
|
"margin_dpo/margin_mean": 223.07403564453125,
|
|
"margin_dpo/margin_std": 207.3885955810547,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.7797356828193832,
|
|
"fcm_dpo/beta": 0.004460107535123825,
|
|
"fcm_dpo/delta": -0.07478072494268417,
|
|
"fcm_dpo/margin": 205.7947235107422,
|
|
"fcm_dpo/q_t": 0.31518983840942383,
|
|
"grad_norm": 38.86375045776367,
|
|
"learning_rate": 7.141774982445147e-08,
|
|
"logits/chosen": 0.20167914032936096,
|
|
"logits/rejected": 0.2316320836544037,
|
|
"logps/chosen": -419.4433898925781,
|
|
"logps/ref_chosen": -55.246063232421875,
|
|
"logps/ref_rejected": -70.60598754882812,
|
|
"logps/rejected": -640.5980224609375,
|
|
"loss": 0.8204,
|
|
"margin_dpo/margin_mean": 205.79470825195312,
|
|
"margin_dpo/margin_std": 199.72259521484375,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.7812041116005873,
|
|
"fcm_dpo/beta": 0.004327258560806513,
|
|
"fcm_dpo/delta": -0.08652237057685852,
|
|
"fcm_dpo/margin": 213.4281005859375,
|
|
"fcm_dpo/q_t": 0.3172354996204376,
|
|
"grad_norm": 60.72142791748047,
|
|
"learning_rate": 7.052201923388953e-08,
|
|
"logits/chosen": 0.1827022284269333,
|
|
"logits/rejected": 0.21410122513771057,
|
|
"logps/chosen": -462.40985107421875,
|
|
"logps/ref_chosen": -70.28601837158203,
|
|
"logps/ref_rejected": -86.5913314819336,
|
|
"logps/rejected": -692.1432495117188,
|
|
"loss": 0.8703,
|
|
"margin_dpo/margin_mean": 213.4281005859375,
|
|
"margin_dpo/margin_std": 228.88232421875,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.7826725403817915,
|
|
"fcm_dpo/beta": 0.004470104351639748,
|
|
"fcm_dpo/delta": 0.0903887152671814,
|
|
"fcm_dpo/margin": 170.91561889648438,
|
|
"fcm_dpo/q_t": 0.34557852149009705,
|
|
"grad_norm": 68.10647583007812,
|
|
"learning_rate": 6.963101805503646e-08,
|
|
"logits/chosen": 0.28478050231933594,
|
|
"logits/rejected": 0.3137226700782776,
|
|
"logps/chosen": -441.4239196777344,
|
|
"logps/ref_chosen": -64.8551025390625,
|
|
"logps/ref_rejected": -76.58805847167969,
|
|
"logps/rejected": -624.072509765625,
|
|
"loss": 0.9315,
|
|
"margin_dpo/margin_mean": 170.91561889648438,
|
|
"margin_dpo/margin_std": 198.55360412597656,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.7841409691629956,
|
|
"fcm_dpo/beta": 0.00448148837313056,
|
|
"fcm_dpo/delta": 0.044943343847990036,
|
|
"fcm_dpo/margin": 180.44354248046875,
|
|
"fcm_dpo/q_t": 0.32874345779418945,
|
|
"grad_norm": 65.2886962890625,
|
|
"learning_rate": 6.874476976660184e-08,
|
|
"logits/chosen": 0.23872573673725128,
|
|
"logits/rejected": 0.2578517496585846,
|
|
"logps/chosen": -446.6659240722656,
|
|
"logps/ref_chosen": -60.119388580322266,
|
|
"logps/ref_rejected": -78.54347229003906,
|
|
"logps/rejected": -645.5335083007812,
|
|
"loss": 0.8705,
|
|
"margin_dpo/margin_mean": 180.4435577392578,
|
|
"margin_dpo/margin_std": 180.7930450439453,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.7856093979441997,
|
|
"fcm_dpo/beta": 0.004449739120900631,
|
|
"fcm_dpo/delta": -0.12627378106117249,
|
|
"fcm_dpo/margin": 216.5131072998047,
|
|
"fcm_dpo/q_t": 0.3024979829788208,
|
|
"grad_norm": 58.6319694519043,
|
|
"learning_rate": 6.786329772205246e-08,
|
|
"logits/chosen": 0.19479429721832275,
|
|
"logits/rejected": 0.21016496419906616,
|
|
"logps/chosen": -411.903076171875,
|
|
"logps/ref_chosen": -54.330238342285156,
|
|
"logps/ref_rejected": -96.30763244628906,
|
|
"logps/rejected": -670.3935546875,
|
|
"loss": 0.7944,
|
|
"margin_dpo/margin_mean": 216.51309204101562,
|
|
"margin_dpo/margin_std": 189.78607177734375,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.7870778267254038,
|
|
"fcm_dpo/beta": 0.004233510233461857,
|
|
"fcm_dpo/delta": -0.2273169904947281,
|
|
"fcm_dpo/margin": 248.81976318359375,
|
|
"fcm_dpo/q_t": 0.29112428426742554,
|
|
"grad_norm": 39.82522964477539,
|
|
"learning_rate": 6.698662514899638e-08,
|
|
"logits/chosen": 0.24402159452438354,
|
|
"logits/rejected": 0.2668285071849823,
|
|
"logps/chosen": -386.976806640625,
|
|
"logps/ref_chosen": -47.08053207397461,
|
|
"logps/ref_rejected": -89.09783935546875,
|
|
"logps/rejected": -677.8138427734375,
|
|
"loss": 0.7483,
|
|
"margin_dpo/margin_mean": 248.81976318359375,
|
|
"margin_dpo/margin_std": 229.91015625,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.788546255506608,
|
|
"fcm_dpo/beta": 0.0041783712804317474,
|
|
"fcm_dpo/delta": -0.026768144220113754,
|
|
"fcm_dpo/margin": 209.25230407714844,
|
|
"fcm_dpo/q_t": 0.3225961923599243,
|
|
"grad_norm": 42.43117904663086,
|
|
"learning_rate": 6.611477514857114e-08,
|
|
"logits/chosen": 0.30811983346939087,
|
|
"logits/rejected": 0.3353317379951477,
|
|
"logps/chosen": -426.273681640625,
|
|
"logps/ref_chosen": -57.747467041015625,
|
|
"logps/ref_rejected": -70.43838500976562,
|
|
"logps/rejected": -648.2169189453125,
|
|
"loss": 0.8527,
|
|
"margin_dpo/margin_mean": 209.25230407714844,
|
|
"margin_dpo/margin_std": 213.77560424804688,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.7900146842878121,
|
|
"fcm_dpo/beta": 0.004137400537729263,
|
|
"fcm_dpo/delta": -0.022371456027030945,
|
|
"fcm_dpo/margin": 210.346435546875,
|
|
"fcm_dpo/q_t": 0.31828272342681885,
|
|
"grad_norm": 72.39550018310547,
|
|
"learning_rate": 6.524777069483525e-08,
|
|
"logits/chosen": 0.17623773217201233,
|
|
"logits/rejected": 0.20072564482688904,
|
|
"logps/chosen": -463.2252197265625,
|
|
"logps/ref_chosen": -66.41594696044922,
|
|
"logps/ref_rejected": -84.22808837890625,
|
|
"logps/rejected": -691.3837890625,
|
|
"loss": 0.8202,
|
|
"margin_dpo/margin_mean": 210.346435546875,
|
|
"margin_dpo/margin_std": 190.68777465820312,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.7914831130690162,
|
|
"fcm_dpo/beta": 0.004174415022134781,
|
|
"fcm_dpo/delta": 0.07315509021282196,
|
|
"fcm_dpo/margin": 187.47557067871094,
|
|
"fcm_dpo/q_t": 0.3407885730266571,
|
|
"grad_norm": 45.78681564331055,
|
|
"learning_rate": 6.438563463416221e-08,
|
|
"logits/chosen": 0.27691328525543213,
|
|
"logits/rejected": 0.316150963306427,
|
|
"logps/chosen": -448.6224670410156,
|
|
"logps/ref_chosen": -58.492855072021484,
|
|
"logps/ref_rejected": -91.85395050048828,
|
|
"logps/rejected": -669.4591064453125,
|
|
"loss": 0.9127,
|
|
"margin_dpo/margin_mean": 187.4755859375,
|
|
"margin_dpo/margin_std": 210.38111877441406,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.7929515418502202,
|
|
"fcm_dpo/beta": 0.004112423397600651,
|
|
"fcm_dpo/delta": -0.17697307467460632,
|
|
"fcm_dpo/margin": 245.46176147460938,
|
|
"fcm_dpo/q_t": 0.29645735025405884,
|
|
"grad_norm": 56.68818664550781,
|
|
"learning_rate": 6.352838968463919e-08,
|
|
"logits/chosen": 0.1551690399646759,
|
|
"logits/rejected": 0.17202973365783691,
|
|
"logps/chosen": -429.8592834472656,
|
|
"logps/ref_chosen": -63.482513427734375,
|
|
"logps/ref_rejected": -116.42999267578125,
|
|
"logps/rejected": -728.2685546875,
|
|
"loss": 0.7798,
|
|
"margin_dpo/margin_mean": 245.46176147460938,
|
|
"margin_dpo/margin_std": 219.45751953125,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.7944199706314243,
|
|
"fcm_dpo/beta": 0.004118548706173897,
|
|
"fcm_dpo/delta": 0.16333335638046265,
|
|
"fcm_dpo/margin": 169.4357452392578,
|
|
"fcm_dpo/q_t": 0.3581434488296509,
|
|
"grad_norm": 41.786102294921875,
|
|
"learning_rate": 6.267605843546767e-08,
|
|
"logits/chosen": 0.24253280460834503,
|
|
"logits/rejected": 0.25958988070487976,
|
|
"logps/chosen": -488.85546875,
|
|
"logps/ref_chosen": -78.28036499023438,
|
|
"logps/ref_rejected": -103.273681640625,
|
|
"logps/rejected": -683.2845458984375,
|
|
"loss": 0.9505,
|
|
"margin_dpo/margin_mean": 169.43572998046875,
|
|
"margin_dpo/margin_std": 202.7386474609375,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.7958883994126285,
|
|
"fcm_dpo/beta": 0.004145963117480278,
|
|
"fcm_dpo/delta": -0.03485635668039322,
|
|
"fcm_dpo/margin": 212.4905548095703,
|
|
"fcm_dpo/q_t": 0.3207229971885681,
|
|
"grad_norm": 45.65351867675781,
|
|
"learning_rate": 6.182866334636888e-08,
|
|
"logits/chosen": 0.2975878119468689,
|
|
"logits/rejected": 0.3112480044364929,
|
|
"logps/chosen": -468.6758728027344,
|
|
"logps/ref_chosen": -57.48497009277344,
|
|
"logps/ref_rejected": -96.47506713867188,
|
|
"logps/rejected": -720.156494140625,
|
|
"loss": 0.855,
|
|
"margin_dpo/margin_mean": 212.4905548095703,
|
|
"margin_dpo/margin_std": 215.06793212890625,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.7973568281938326,
|
|
"fcm_dpo/beta": 0.004305819049477577,
|
|
"fcm_dpo/delta": 0.2336502969264984,
|
|
"fcm_dpo/margin": 146.6068878173828,
|
|
"fcm_dpo/q_t": 0.37988683581352234,
|
|
"grad_norm": 68.22380065917969,
|
|
"learning_rate": 6.098622674699147e-08,
|
|
"logits/chosen": 0.2173738032579422,
|
|
"logits/rejected": 0.22196441888809204,
|
|
"logps/chosen": -473.72698974609375,
|
|
"logps/ref_chosen": -60.61750793457031,
|
|
"logps/ref_rejected": -105.59896850585938,
|
|
"logps/rejected": -665.3153076171875,
|
|
"loss": 1.053,
|
|
"margin_dpo/margin_mean": 146.6068878173828,
|
|
"margin_dpo/margin_std": 232.6273956298828,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.7988252569750367,
|
|
"fcm_dpo/beta": 0.004282426554709673,
|
|
"fcm_dpo/delta": -0.11429713666439056,
|
|
"fcm_dpo/margin": 222.5892333984375,
|
|
"fcm_dpo/q_t": 0.3054772913455963,
|
|
"grad_norm": 75.6998062133789,
|
|
"learning_rate": 6.01487708363232e-08,
|
|
"logits/chosen": 0.2191922664642334,
|
|
"logits/rejected": 0.2349478006362915,
|
|
"logps/chosen": -479.7855224609375,
|
|
"logps/ref_chosen": -59.642303466796875,
|
|
"logps/ref_rejected": -100.95469665527344,
|
|
"logps/rejected": -743.6871337890625,
|
|
"loss": 0.7888,
|
|
"margin_dpo/margin_mean": 222.5892333984375,
|
|
"margin_dpo/margin_std": 208.547119140625,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8002936857562408,
|
|
"fcm_dpo/beta": 0.0042346809059381485,
|
|
"fcm_dpo/delta": -0.06286168098449707,
|
|
"fcm_dpo/margin": 214.2252197265625,
|
|
"fcm_dpo/q_t": 0.3142136335372925,
|
|
"grad_norm": 44.67675018310547,
|
|
"learning_rate": 5.9316317682106294e-08,
|
|
"logits/chosen": 0.19240760803222656,
|
|
"logits/rejected": 0.21735535562038422,
|
|
"logps/chosen": -476.2900390625,
|
|
"logps/ref_chosen": -67.64859771728516,
|
|
"logps/ref_rejected": -95.90800476074219,
|
|
"logps/rejected": -718.774658203125,
|
|
"loss": 0.8253,
|
|
"margin_dpo/margin_mean": 214.2252197265625,
|
|
"margin_dpo/margin_std": 203.46847534179688,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.801762114537445,
|
|
"fcm_dpo/beta": 0.0042076813988387585,
|
|
"fcm_dpo/delta": 0.02801503613591194,
|
|
"fcm_dpo/margin": 195.83470153808594,
|
|
"fcm_dpo/q_t": 0.323789119720459,
|
|
"grad_norm": 63.804405212402344,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.2083980143070221,
|
|
"logits/rejected": 0.21705862879753113,
|
|
"logps/chosen": -414.2108154296875,
|
|
"logps/ref_chosen": -50.744232177734375,
|
|
"logps/ref_rejected": -81.86622619628906,
|
|
"logps/rejected": -641.16748046875,
|
|
"loss": 0.8426,
|
|
"margin_dpo/margin_mean": 195.83468627929688,
|
|
"margin_dpo/margin_std": 175.96954345703125,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8032305433186491,
|
|
"fcm_dpo/beta": 0.004326602444052696,
|
|
"fcm_dpo/delta": 0.07258103787899017,
|
|
"fcm_dpo/margin": 180.27406311035156,
|
|
"fcm_dpo/q_t": 0.34270453453063965,
|
|
"grad_norm": 57.07929992675781,
|
|
"learning_rate": 5.7666507254280265e-08,
|
|
"logits/chosen": 0.15857240557670593,
|
|
"logits/rejected": 0.18794408440589905,
|
|
"logps/chosen": -483.2398376464844,
|
|
"logps/ref_chosen": -73.6877212524414,
|
|
"logps/ref_rejected": -90.76136779785156,
|
|
"logps/rejected": -680.5875244140625,
|
|
"loss": 0.9307,
|
|
"margin_dpo/margin_mean": 180.2740478515625,
|
|
"margin_dpo/margin_std": 210.82923889160156,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8046989720998532,
|
|
"fcm_dpo/beta": 0.004292323254048824,
|
|
"fcm_dpo/delta": -0.011337485164403915,
|
|
"fcm_dpo/margin": 200.43960571289062,
|
|
"fcm_dpo/q_t": 0.32825782895088196,
|
|
"grad_norm": 38.38809585571289,
|
|
"learning_rate": 5.684919345471029e-08,
|
|
"logits/chosen": 0.2047964632511139,
|
|
"logits/rejected": 0.23223000764846802,
|
|
"logps/chosen": -470.2217102050781,
|
|
"logps/ref_chosen": -65.24634552001953,
|
|
"logps/ref_rejected": -94.11807250976562,
|
|
"logps/rejected": -699.5330810546875,
|
|
"loss": 0.8781,
|
|
"margin_dpo/margin_mean": 200.43960571289062,
|
|
"margin_dpo/margin_std": 224.59344482421875,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8061674008810573,
|
|
"fcm_dpo/beta": 0.004320558160543442,
|
|
"fcm_dpo/delta": 0.03653667867183685,
|
|
"fcm_dpo/margin": 188.9108428955078,
|
|
"fcm_dpo/q_t": 0.33160555362701416,
|
|
"grad_norm": 45.373291015625,
|
|
"learning_rate": 5.603696935852426e-08,
|
|
"logits/chosen": 0.3711293637752533,
|
|
"logits/rejected": 0.3961469829082489,
|
|
"logps/chosen": -453.705322265625,
|
|
"logps/ref_chosen": -49.21235656738281,
|
|
"logps/ref_rejected": -73.91031646728516,
|
|
"logps/rejected": -667.3141479492188,
|
|
"loss": 0.8724,
|
|
"margin_dpo/margin_mean": 188.9108428955078,
|
|
"margin_dpo/margin_std": 194.69717407226562,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8076358296622613,
|
|
"fcm_dpo/beta": 0.004383495077490807,
|
|
"fcm_dpo/delta": 0.12022919952869415,
|
|
"fcm_dpo/margin": 168.54380798339844,
|
|
"fcm_dpo/q_t": 0.3434358239173889,
|
|
"grad_norm": 69.41998291015625,
|
|
"learning_rate": 5.5229856368582376e-08,
|
|
"logits/chosen": 0.2264268547296524,
|
|
"logits/rejected": 0.23777663707733154,
|
|
"logps/chosen": -461.681640625,
|
|
"logps/ref_chosen": -56.80695343017578,
|
|
"logps/ref_rejected": -95.12580871582031,
|
|
"logps/rejected": -668.5443115234375,
|
|
"loss": 0.9071,
|
|
"margin_dpo/margin_mean": 168.5438232421875,
|
|
"margin_dpo/margin_std": 176.12771606445312,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8091042584434655,
|
|
"fcm_dpo/beta": 0.004263910930603743,
|
|
"fcm_dpo/delta": -0.23163707554340363,
|
|
"fcm_dpo/margin": 247.79818725585938,
|
|
"fcm_dpo/q_t": 0.282936692237854,
|
|
"grad_norm": 62.00813674926758,
|
|
"learning_rate": 5.4427875753062734e-08,
|
|
"logits/chosen": 0.2144840955734253,
|
|
"logits/rejected": 0.22916750609874725,
|
|
"logps/chosen": -457.0981750488281,
|
|
"logps/ref_chosen": -59.10633087158203,
|
|
"logps/ref_rejected": -111.67280578613281,
|
|
"logps/rejected": -757.4628295898438,
|
|
"loss": 0.7384,
|
|
"margin_dpo/margin_mean": 247.79818725585938,
|
|
"margin_dpo/margin_std": 201.80621337890625,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8105726872246696,
|
|
"fcm_dpo/beta": 0.0039656441658735275,
|
|
"fcm_dpo/delta": -0.325361430644989,
|
|
"fcm_dpo/margin": 285.0164794921875,
|
|
"fcm_dpo/q_t": 0.2748231887817383,
|
|
"grad_norm": 48.39642333984375,
|
|
"learning_rate": 5.363104864490034e-08,
|
|
"logits/chosen": 0.27102088928222656,
|
|
"logits/rejected": 0.3110225796699524,
|
|
"logps/chosen": -439.73016357421875,
|
|
"logps/ref_chosen": -62.35459899902344,
|
|
"logps/ref_rejected": -104.56210327148438,
|
|
"logps/rejected": -766.9541625976562,
|
|
"loss": 0.7144,
|
|
"margin_dpo/margin_mean": 285.0164794921875,
|
|
"margin_dpo/margin_std": 232.68673706054688,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8120411160058737,
|
|
"fcm_dpo/beta": 0.004009841941297054,
|
|
"fcm_dpo/delta": 0.11692027747631073,
|
|
"fcm_dpo/margin": 185.01705932617188,
|
|
"fcm_dpo/q_t": 0.349956750869751,
|
|
"grad_norm": 53.67270278930664,
|
|
"learning_rate": 5.2839396041230415e-08,
|
|
"logits/chosen": 0.2673987150192261,
|
|
"logits/rejected": 0.27861133217811584,
|
|
"logps/chosen": -479.7618408203125,
|
|
"logps/ref_chosen": -68.25881958007812,
|
|
"logps/ref_rejected": -98.0971450805664,
|
|
"logps/rejected": -694.6171875,
|
|
"loss": 0.9325,
|
|
"margin_dpo/margin_mean": 185.01705932617188,
|
|
"margin_dpo/margin_std": 215.61569213867188,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8135095447870778,
|
|
"fcm_dpo/beta": 0.004004964604973793,
|
|
"fcm_dpo/delta": -0.13882309198379517,
|
|
"fcm_dpo/margin": 243.39794921875,
|
|
"fcm_dpo/q_t": 0.3023296296596527,
|
|
"grad_norm": 40.74170684814453,
|
|
"learning_rate": 5.205293880283551e-08,
|
|
"logits/chosen": 0.21930161118507385,
|
|
"logits/rejected": 0.2681744694709778,
|
|
"logps/chosen": -468.79296875,
|
|
"logps/ref_chosen": -67.94767761230469,
|
|
"logps/ref_rejected": -89.78272247314453,
|
|
"logps/rejected": -734.0260009765625,
|
|
"loss": 0.8201,
|
|
"margin_dpo/margin_mean": 243.39793395996094,
|
|
"margin_dpo/margin_std": 235.294677734375,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8149779735682819,
|
|
"fcm_dpo/beta": 0.0038568670861423016,
|
|
"fcm_dpo/delta": -0.09782232344150543,
|
|
"fcm_dpo/margin": 243.01727294921875,
|
|
"fcm_dpo/q_t": 0.31605011224746704,
|
|
"grad_norm": 46.52155685424805,
|
|
"learning_rate": 5.127169765359515e-08,
|
|
"logits/chosen": 0.1832244098186493,
|
|
"logits/rejected": 0.19693729281425476,
|
|
"logps/chosen": -459.95538330078125,
|
|
"logps/ref_chosen": -53.33049011230469,
|
|
"logps/ref_rejected": -108.47937774658203,
|
|
"logps/rejected": -758.1215209960938,
|
|
"loss": 0.8346,
|
|
"margin_dpo/margin_mean": 243.01727294921875,
|
|
"margin_dpo/margin_std": 251.5319366455078,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8164464023494861,
|
|
"fcm_dpo/beta": 0.003947093617171049,
|
|
"fcm_dpo/delta": 0.1339261531829834,
|
|
"fcm_dpo/margin": 183.7998046875,
|
|
"fcm_dpo/q_t": 0.3540470004081726,
|
|
"grad_norm": 52.72172927856445,
|
|
"learning_rate": 5.049569317994012e-08,
|
|
"logits/chosen": 0.33074843883514404,
|
|
"logits/rejected": 0.3498724699020386,
|
|
"logps/chosen": -489.8416748046875,
|
|
"logps/ref_chosen": -58.64447021484375,
|
|
"logps/ref_rejected": -101.34040832519531,
|
|
"logps/rejected": -716.33740234375,
|
|
"loss": 0.9402,
|
|
"margin_dpo/margin_mean": 183.7998046875,
|
|
"margin_dpo/margin_std": 221.2926025390625,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8179148311306902,
|
|
"fcm_dpo/beta": 0.0039650630205869675,
|
|
"fcm_dpo/delta": 0.025837931782007217,
|
|
"fcm_dpo/margin": 208.3311309814453,
|
|
"fcm_dpo/q_t": 0.3323240578174591,
|
|
"grad_norm": 60.84648895263672,
|
|
"learning_rate": 4.9724945830310144e-08,
|
|
"logits/chosen": 0.28956037759780884,
|
|
"logits/rejected": 0.3096484839916229,
|
|
"logps/chosen": -509.36187744140625,
|
|
"logps/ref_chosen": -67.84066009521484,
|
|
"logps/ref_rejected": -109.93965911865234,
|
|
"logps/rejected": -759.7919921875,
|
|
"loss": 0.8753,
|
|
"margin_dpo/margin_mean": 208.3311309814453,
|
|
"margin_dpo/margin_std": 217.27667236328125,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8193832599118943,
|
|
"fcm_dpo/beta": 0.0037960303016006947,
|
|
"fcm_dpo/delta": -0.2908962368965149,
|
|
"fcm_dpo/margin": 291.4281005859375,
|
|
"fcm_dpo/q_t": 0.2752188742160797,
|
|
"grad_norm": 40.26952362060547,
|
|
"learning_rate": 4.8959475914614554e-08,
|
|
"logits/chosen": 0.3146205544471741,
|
|
"logits/rejected": 0.34363412857055664,
|
|
"logps/chosen": -474.06671142578125,
|
|
"logps/ref_chosen": -62.36824035644531,
|
|
"logps/ref_rejected": -102.16102600097656,
|
|
"logps/rejected": -805.28759765625,
|
|
"loss": 0.7158,
|
|
"margin_dpo/margin_mean": 291.4281005859375,
|
|
"margin_dpo/margin_std": 233.851806640625,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8208516886930984,
|
|
"fcm_dpo/beta": 0.0037619564682245255,
|
|
"fcm_dpo/delta": 0.016046149656176567,
|
|
"fcm_dpo/margin": 222.0172576904297,
|
|
"fcm_dpo/q_t": 0.32790541648864746,
|
|
"grad_norm": 42.88578414916992,
|
|
"learning_rate": 4.8199303603697614e-08,
|
|
"logits/chosen": 0.36911362409591675,
|
|
"logits/rejected": 0.41761040687561035,
|
|
"logps/chosen": -506.20428466796875,
|
|
"logps/ref_chosen": -60.752323150634766,
|
|
"logps/ref_rejected": -93.44229125976562,
|
|
"logps/rejected": -760.9114990234375,
|
|
"loss": 0.8556,
|
|
"margin_dpo/margin_mean": 222.01727294921875,
|
|
"margin_dpo/margin_std": 218.80276489257812,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8223201174743024,
|
|
"fcm_dpo/beta": 0.003879477269947529,
|
|
"fcm_dpo/delta": 0.20861107110977173,
|
|
"fcm_dpo/margin": 168.96829223632812,
|
|
"fcm_dpo/q_t": 0.3588427007198334,
|
|
"grad_norm": 42.48268127441406,
|
|
"learning_rate": 4.7444448928806615e-08,
|
|
"logits/chosen": 0.1362684667110443,
|
|
"logits/rejected": 0.17207416892051697,
|
|
"logps/chosen": -461.98980712890625,
|
|
"logps/ref_chosen": -58.10382080078125,
|
|
"logps/ref_rejected": -79.99122619628906,
|
|
"logps/rejected": -652.8455200195312,
|
|
"loss": 0.9334,
|
|
"margin_dpo/margin_mean": 168.96829223632812,
|
|
"margin_dpo/margin_std": 171.30262756347656,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8237885462555066,
|
|
"fcm_dpo/beta": 0.004035362042486668,
|
|
"fcm_dpo/delta": 0.13644452393054962,
|
|
"fcm_dpo/margin": 178.99203491210938,
|
|
"fcm_dpo/q_t": 0.34845227003097534,
|
|
"grad_norm": 50.79757308959961,
|
|
"learning_rate": 4.669493178106432e-08,
|
|
"logits/chosen": 0.3586878478527069,
|
|
"logits/rejected": 0.3700917065143585,
|
|
"logps/chosen": -518.5108032226562,
|
|
"logps/ref_chosen": -50.912879943847656,
|
|
"logps/ref_rejected": -99.06856536865234,
|
|
"logps/rejected": -745.6585083007812,
|
|
"loss": 0.919,
|
|
"margin_dpo/margin_mean": 178.99203491210938,
|
|
"margin_dpo/margin_std": 194.42037963867188,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8252569750367107,
|
|
"fcm_dpo/beta": 0.003967948257923126,
|
|
"fcm_dpo/delta": -0.0841178148984909,
|
|
"fcm_dpo/margin": 233.1253204345703,
|
|
"fcm_dpo/q_t": 0.31131601333618164,
|
|
"grad_norm": 41.71147155761719,
|
|
"learning_rate": 4.5950771910944596e-08,
|
|
"logits/chosen": 0.29285359382629395,
|
|
"logits/rejected": 0.3221287131309509,
|
|
"logps/chosen": -477.37384033203125,
|
|
"logps/ref_chosen": -59.46440124511719,
|
|
"logps/ref_rejected": -96.54266357421875,
|
|
"logps/rejected": -747.577392578125,
|
|
"loss": 0.7998,
|
|
"margin_dpo/margin_mean": 233.1253204345703,
|
|
"margin_dpo/margin_std": 215.35162353515625,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8267254038179148,
|
|
"fcm_dpo/beta": 0.004087609238922596,
|
|
"fcm_dpo/delta": 0.11210109293460846,
|
|
"fcm_dpo/margin": 181.70925903320312,
|
|
"fcm_dpo/q_t": 0.3462071716785431,
|
|
"grad_norm": 52.469322204589844,
|
|
"learning_rate": 4.521198892775202e-08,
|
|
"logits/chosen": 0.39729082584381104,
|
|
"logits/rejected": 0.40474557876586914,
|
|
"logps/chosen": -518.0103149414062,
|
|
"logps/ref_chosen": -60.60819625854492,
|
|
"logps/ref_rejected": -94.56770324707031,
|
|
"logps/rejected": -733.6790771484375,
|
|
"loss": 0.9472,
|
|
"margin_dpo/margin_mean": 181.70925903320312,
|
|
"margin_dpo/margin_std": 211.15899658203125,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8281938325991189,
|
|
"fcm_dpo/beta": 0.004054870456457138,
|
|
"fcm_dpo/delta": -0.04862257093191147,
|
|
"fcm_dpo/margin": 220.50332641601562,
|
|
"fcm_dpo/q_t": 0.30795860290527344,
|
|
"grad_norm": 54.09901428222656,
|
|
"learning_rate": 4.447860229910544e-08,
|
|
"logits/chosen": 0.2921653985977173,
|
|
"logits/rejected": 0.34295654296875,
|
|
"logps/chosen": -511.1554870605469,
|
|
"logps/ref_chosen": -74.26837921142578,
|
|
"logps/ref_rejected": -93.23818969726562,
|
|
"logps/rejected": -750.628662109375,
|
|
"loss": 0.7805,
|
|
"margin_dpo/margin_mean": 220.50332641601562,
|
|
"margin_dpo/margin_std": 169.0012664794922,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.8296622613803231,
|
|
"fcm_dpo/beta": 0.003968285396695137,
|
|
"fcm_dpo/delta": -0.06328395754098892,
|
|
"fcm_dpo/margin": 228.4828338623047,
|
|
"fcm_dpo/q_t": 0.32925814390182495,
|
|
"grad_norm": 44.953067779541016,
|
|
"learning_rate": 4.375063135042445e-08,
|
|
"logits/chosen": 0.26385927200317383,
|
|
"logits/rejected": 0.29121464490890503,
|
|
"logps/chosen": -519.4959716796875,
|
|
"logps/ref_chosen": -69.0199203491211,
|
|
"logps/ref_rejected": -85.7789306640625,
|
|
"logps/rejected": -764.73779296875,
|
|
"loss": 0.8788,
|
|
"margin_dpo/margin_mean": 228.48284912109375,
|
|
"margin_dpo/margin_std": 268.53692626953125,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8311306901615272,
|
|
"fcm_dpo/beta": 0.0039923894219100475,
|
|
"fcm_dpo/delta": -0.0426069051027298,
|
|
"fcm_dpo/margin": 222.23052978515625,
|
|
"fcm_dpo/q_t": 0.3280484676361084,
|
|
"grad_norm": 52.76844024658203,
|
|
"learning_rate": 4.3028095264420525e-08,
|
|
"logits/chosen": 0.29818111658096313,
|
|
"logits/rejected": 0.3079987168312073,
|
|
"logps/chosen": -506.3726806640625,
|
|
"logps/ref_chosen": -66.5453109741211,
|
|
"logps/ref_rejected": -103.86932373046875,
|
|
"logps/rejected": -765.9271850585938,
|
|
"loss": 0.9018,
|
|
"margin_dpo/margin_mean": 222.2305145263672,
|
|
"margin_dpo/margin_std": 253.78787231445312,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8325991189427313,
|
|
"fcm_dpo/beta": 0.00396195612847805,
|
|
"fcm_dpo/delta": 0.037429843097925186,
|
|
"fcm_dpo/margin": 205.86737060546875,
|
|
"fcm_dpo/q_t": 0.3339134156703949,
|
|
"grad_norm": 55.71341323852539,
|
|
"learning_rate": 4.231101308059165e-08,
|
|
"logits/chosen": 0.41299670934677124,
|
|
"logits/rejected": 0.4278637766838074,
|
|
"logps/chosen": -497.1105651855469,
|
|
"logps/ref_chosen": -52.85829544067383,
|
|
"logps/ref_rejected": -85.37095642089844,
|
|
"logps/rejected": -735.4906005859375,
|
|
"loss": 0.891,
|
|
"margin_dpo/margin_mean": 205.86737060546875,
|
|
"margin_dpo/margin_std": 222.82290649414062,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8340675477239354,
|
|
"fcm_dpo/beta": 0.003875305177643895,
|
|
"fcm_dpo/delta": -0.12368164956569672,
|
|
"fcm_dpo/margin": 247.7938232421875,
|
|
"fcm_dpo/q_t": 0.2994388937950134,
|
|
"grad_norm": 42.331607818603516,
|
|
"learning_rate": 4.1599403694720145e-08,
|
|
"logits/chosen": 0.39330175518989563,
|
|
"logits/rejected": 0.40852218866348267,
|
|
"logps/chosen": -468.718505859375,
|
|
"logps/ref_chosen": -45.1923828125,
|
|
"logps/ref_rejected": -89.09236907958984,
|
|
"logps/rejected": -760.412353515625,
|
|
"loss": 0.7856,
|
|
"margin_dpo/margin_mean": 247.79380798339844,
|
|
"margin_dpo/margin_std": 211.7144012451172,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8355359765051396,
|
|
"fcm_dpo/beta": 0.003913109190762043,
|
|
"fcm_dpo/delta": 0.01810070499777794,
|
|
"fcm_dpo/margin": 212.70703125,
|
|
"fcm_dpo/q_t": 0.3307640552520752,
|
|
"grad_norm": 61.88147735595703,
|
|
"learning_rate": 4.089328585837512e-08,
|
|
"logits/chosen": 0.3647564649581909,
|
|
"logits/rejected": 0.3922520875930786,
|
|
"logps/chosen": -534.0196533203125,
|
|
"logps/ref_chosen": -63.72056198120117,
|
|
"logps/ref_rejected": -79.10325622558594,
|
|
"logps/rejected": -762.109375,
|
|
"loss": 0.8971,
|
|
"margin_dpo/margin_mean": 212.70703125,
|
|
"margin_dpo/margin_std": 238.76895141601562,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8370044052863436,
|
|
"fcm_dpo/beta": 0.0038719356525689363,
|
|
"fcm_dpo/delta": -0.008450102061033249,
|
|
"fcm_dpo/margin": 221.46524047851562,
|
|
"fcm_dpo/q_t": 0.3211510479450226,
|
|
"grad_norm": 38.58411407470703,
|
|
"learning_rate": 4.019267817841834e-08,
|
|
"logits/chosen": 0.3776249885559082,
|
|
"logits/rejected": 0.4106351137161255,
|
|
"logps/chosen": -510.22760009765625,
|
|
"logps/ref_chosen": -61.61454391479492,
|
|
"logps/ref_rejected": -82.14186096191406,
|
|
"logps/rejected": -752.2201538085938,
|
|
"loss": 0.8246,
|
|
"margin_dpo/margin_mean": 221.46524047851562,
|
|
"margin_dpo/margin_std": 198.52322387695312,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8384728340675477,
|
|
"fcm_dpo/beta": 0.0038212304934859276,
|
|
"fcm_dpo/delta": -0.06161798536777496,
|
|
"fcm_dpo/margin": 236.76951599121094,
|
|
"fcm_dpo/q_t": 0.31742697954177856,
|
|
"grad_norm": 43.7412109375,
|
|
"learning_rate": 3.9497599116513705e-08,
|
|
"logits/chosen": 0.35367658734321594,
|
|
"logits/rejected": 0.3578590452671051,
|
|
"logps/chosen": -511.2191162109375,
|
|
"logps/ref_chosen": -53.05406188964844,
|
|
"logps/ref_rejected": -91.33682250976562,
|
|
"logps/rejected": -786.2713623046875,
|
|
"loss": 0.8589,
|
|
"margin_dpo/margin_mean": 236.76953125,
|
|
"margin_dpo/margin_std": 248.20687866210938,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8399412628487518,
|
|
"fcm_dpo/beta": 0.003810744732618332,
|
|
"fcm_dpo/delta": -0.016613949090242386,
|
|
"fcm_dpo/margin": 226.94198608398438,
|
|
"fcm_dpo/q_t": 0.33660879731178284,
|
|
"grad_norm": 51.98419952392578,
|
|
"learning_rate": 3.880806698864086e-08,
|
|
"logits/chosen": 0.4193460941314697,
|
|
"logits/rejected": 0.4366481304168701,
|
|
"logps/chosen": -525.885986328125,
|
|
"logps/ref_chosen": -48.45928955078125,
|
|
"logps/ref_rejected": -83.55703735351562,
|
|
"logps/rejected": -787.92578125,
|
|
"loss": 0.9044,
|
|
"margin_dpo/margin_mean": 226.9420166015625,
|
|
"margin_dpo/margin_std": 278.49652099609375,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8414096916299559,
|
|
"fcm_dpo/beta": 0.0038648974150419235,
|
|
"fcm_dpo/delta": -0.006211414933204651,
|
|
"fcm_dpo/margin": 220.71739196777344,
|
|
"fcm_dpo/q_t": 0.325981080532074,
|
|
"grad_norm": 35.73110580444336,
|
|
"learning_rate": 3.812409996461275e-08,
|
|
"logits/chosen": 0.40158382058143616,
|
|
"logits/rejected": 0.42190420627593994,
|
|
"logps/chosen": -503.1387634277344,
|
|
"logps/ref_chosen": -51.62262725830078,
|
|
"logps/ref_rejected": -85.32499694824219,
|
|
"logps/rejected": -757.55859375,
|
|
"loss": 0.859,
|
|
"margin_dpo/margin_mean": 220.7174072265625,
|
|
"margin_dpo/margin_std": 216.8104248046875,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8428781204111601,
|
|
"fcm_dpo/beta": 0.0037943366914987564,
|
|
"fcm_dpo/delta": -0.05984480306506157,
|
|
"fcm_dpo/margin": 238.36622619628906,
|
|
"fcm_dpo/q_t": 0.3168014883995056,
|
|
"grad_norm": 39.142181396484375,
|
|
"learning_rate": 3.74457160675965e-08,
|
|
"logits/chosen": 0.27881181240081787,
|
|
"logits/rejected": 0.29145705699920654,
|
|
"logps/chosen": -482.63153076171875,
|
|
"logps/ref_chosen": -51.04446029663086,
|
|
"logps/ref_rejected": -92.80640411376953,
|
|
"logps/rejected": -762.759765625,
|
|
"loss": 0.8247,
|
|
"margin_dpo/margin_mean": 238.36622619628906,
|
|
"margin_dpo/margin_std": 230.02125549316406,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8443465491923642,
|
|
"fcm_dpo/beta": 0.0037506907247006893,
|
|
"fcm_dpo/delta": 0.04211752861738205,
|
|
"fcm_dpo/margin": 215.6813507080078,
|
|
"fcm_dpo/q_t": 0.3343145251274109,
|
|
"grad_norm": 64.44995880126953,
|
|
"learning_rate": 3.677293317363864e-08,
|
|
"logits/chosen": 0.3192945718765259,
|
|
"logits/rejected": 0.3290221691131592,
|
|
"logps/chosen": -558.0654296875,
|
|
"logps/ref_chosen": -71.7901382446289,
|
|
"logps/ref_rejected": -95.38619995117188,
|
|
"logps/rejected": -797.3428955078125,
|
|
"loss": 0.9433,
|
|
"margin_dpo/margin_mean": 215.6813507080078,
|
|
"margin_dpo/margin_std": 254.57664489746094,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8458149779735683,
|
|
"fcm_dpo/beta": 0.0038080730009824038,
|
|
"fcm_dpo/delta": 0.0639888197183609,
|
|
"fcm_dpo/margin": 207.49903869628906,
|
|
"fcm_dpo/q_t": 0.34286561608314514,
|
|
"grad_norm": 62.07902908325195,
|
|
"learning_rate": 3.6105769011194224e-08,
|
|
"logits/chosen": 0.39104163646698,
|
|
"logits/rejected": 0.39094260334968567,
|
|
"logps/chosen": -515.055419921875,
|
|
"logps/ref_chosen": -54.262962341308594,
|
|
"logps/ref_rejected": -100.75428009033203,
|
|
"logps/rejected": -769.0457763671875,
|
|
"loss": 0.94,
|
|
"margin_dpo/margin_mean": 207.49903869628906,
|
|
"margin_dpo/margin_std": 248.49119567871094,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.8472834067547724,
|
|
"fcm_dpo/beta": 0.0038475426845252514,
|
|
"fcm_dpo/delta": -0.0008432501927018166,
|
|
"fcm_dpo/margin": 221.11129760742188,
|
|
"fcm_dpo/q_t": 0.32097095251083374,
|
|
"grad_norm": 62.19895935058594,
|
|
"learning_rate": 3.5444241160659304e-08,
|
|
"logits/chosen": 0.3040631413459778,
|
|
"logits/rejected": 0.3278041481971741,
|
|
"logps/chosen": -475.20794677734375,
|
|
"logps/ref_chosen": -61.909706115722656,
|
|
"logps/ref_rejected": -84.07069396972656,
|
|
"logps/rejected": -718.4802856445312,
|
|
"loss": 0.8523,
|
|
"margin_dpo/margin_mean": 221.11129760742188,
|
|
"margin_dpo/margin_std": 212.4851837158203,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.8487518355359766,
|
|
"fcm_dpo/beta": 0.0038197203539311886,
|
|
"fcm_dpo/delta": 0.012510977685451508,
|
|
"fcm_dpo/margin": 219.00831604003906,
|
|
"fcm_dpo/q_t": 0.32471764087677,
|
|
"grad_norm": 44.588985443115234,
|
|
"learning_rate": 3.478836705390808e-08,
|
|
"logits/chosen": 0.26734110713005066,
|
|
"logits/rejected": 0.2847135066986084,
|
|
"logps/chosen": -472.98272705078125,
|
|
"logps/ref_chosen": -49.26368713378906,
|
|
"logps/ref_rejected": -83.4362564086914,
|
|
"logps/rejected": -726.1636352539062,
|
|
"loss": 0.8381,
|
|
"margin_dpo/margin_mean": 219.00831604003906,
|
|
"margin_dpo/margin_std": 195.05392456054688,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8502202643171806,
|
|
"fcm_dpo/beta": 0.003959665074944496,
|
|
"fcm_dpo/delta": 0.16658331453800201,
|
|
"fcm_dpo/margin": 175.51071166992188,
|
|
"fcm_dpo/q_t": 0.35233074426651,
|
|
"grad_norm": 41.42998123168945,
|
|
"learning_rate": 3.41381639738331e-08,
|
|
"logits/chosen": 0.30626869201660156,
|
|
"logits/rejected": 0.3083363175392151,
|
|
"logps/chosen": -489.62347412109375,
|
|
"logps/ref_chosen": -58.88581848144531,
|
|
"logps/ref_rejected": -94.78762817382812,
|
|
"logps/rejected": -701.0360107421875,
|
|
"loss": 0.9361,
|
|
"margin_dpo/margin_mean": 175.51071166992188,
|
|
"margin_dpo/margin_std": 193.71348571777344,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8516886930983847,
|
|
"fcm_dpo/beta": 0.0038715798873454332,
|
|
"fcm_dpo/delta": -0.14521247148513794,
|
|
"fcm_dpo/margin": 252.62350463867188,
|
|
"fcm_dpo/q_t": 0.3019111752510071,
|
|
"grad_norm": 31.885099411010742,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": 0.17919771373271942,
|
|
"logits/rejected": 0.19062325358390808,
|
|
"logps/chosen": -409.65533447265625,
|
|
"logps/ref_chosen": -48.70683670043945,
|
|
"logps/ref_rejected": -81.7583999633789,
|
|
"logps/rejected": -695.3304443359375,
|
|
"loss": 0.8033,
|
|
"margin_dpo/margin_mean": 252.62350463867188,
|
|
"margin_dpo/margin_std": 242.10226440429688,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8531571218795888,
|
|
"fcm_dpo/beta": 0.003961851820349693,
|
|
"fcm_dpo/delta": 0.13293930888175964,
|
|
"fcm_dpo/margin": 183.37216186523438,
|
|
"fcm_dpo/q_t": 0.351911723613739,
|
|
"grad_norm": 58.27201843261719,
|
|
"learning_rate": 3.285483927764726e-08,
|
|
"logits/chosen": 0.3543952703475952,
|
|
"logits/rejected": 0.37118589878082275,
|
|
"logps/chosen": -507.3199462890625,
|
|
"logps/ref_chosen": -62.22235107421875,
|
|
"logps/ref_rejected": -91.73568725585938,
|
|
"logps/rejected": -720.2054443359375,
|
|
"loss": 0.9357,
|
|
"margin_dpo/margin_mean": 183.37216186523438,
|
|
"margin_dpo/margin_std": 216.66363525390625,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8546255506607929,
|
|
"fcm_dpo/beta": 0.003983854316174984,
|
|
"fcm_dpo/delta": -0.07189857959747314,
|
|
"fcm_dpo/margin": 229.39111328125,
|
|
"fcm_dpo/q_t": 0.31426823139190674,
|
|
"grad_norm": 43.69565963745117,
|
|
"learning_rate": 3.222175147833556e-08,
|
|
"logits/chosen": 0.32797807455062866,
|
|
"logits/rejected": 0.3196754455566406,
|
|
"logps/chosen": -459.9640197753906,
|
|
"logps/ref_chosen": -58.228660583496094,
|
|
"logps/ref_rejected": -110.06959533691406,
|
|
"logps/rejected": -741.196044921875,
|
|
"loss": 0.8473,
|
|
"margin_dpo/margin_mean": 229.39111328125,
|
|
"margin_dpo/margin_std": 227.16668701171875,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.856093979441997,
|
|
"fcm_dpo/beta": 0.004001200199127197,
|
|
"fcm_dpo/delta": 0.17536485195159912,
|
|
"fcm_dpo/margin": 171.66009521484375,
|
|
"fcm_dpo/q_t": 0.36176609992980957,
|
|
"grad_norm": 77.05546569824219,
|
|
"learning_rate": 3.159440233840763e-08,
|
|
"logits/chosen": 0.2569863796234131,
|
|
"logits/rejected": 0.27533864974975586,
|
|
"logps/chosen": -492.8332824707031,
|
|
"logps/ref_chosen": -56.86286163330078,
|
|
"logps/ref_rejected": -88.4039306640625,
|
|
"logps/rejected": -696.034423828125,
|
|
"loss": 0.9734,
|
|
"margin_dpo/margin_mean": 171.66009521484375,
|
|
"margin_dpo/margin_std": 217.009033203125,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8575624082232012,
|
|
"fcm_dpo/beta": 0.0039225309155881405,
|
|
"fcm_dpo/delta": -0.24615153670310974,
|
|
"fcm_dpo/margin": 272.740966796875,
|
|
"fcm_dpo/q_t": 0.2808777689933777,
|
|
"grad_norm": 36.94783020019531,
|
|
"learning_rate": 3.0972808389096635e-08,
|
|
"logits/chosen": 0.3280525207519531,
|
|
"logits/rejected": 0.35432863235473633,
|
|
"logps/chosen": -447.52764892578125,
|
|
"logps/ref_chosen": -56.90068054199219,
|
|
"logps/ref_rejected": -97.63606262207031,
|
|
"logps/rejected": -761.0040283203125,
|
|
"loss": 0.7226,
|
|
"margin_dpo/margin_mean": 272.740966796875,
|
|
"margin_dpo/margin_std": 216.50282287597656,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8590308370044053,
|
|
"fcm_dpo/beta": 0.0038455924950540066,
|
|
"fcm_dpo/delta": -0.048364780843257904,
|
|
"fcm_dpo/margin": 232.48892211914062,
|
|
"fcm_dpo/q_t": 0.32074883580207825,
|
|
"grad_norm": 63.80207443237305,
|
|
"learning_rate": 3.035698600998121e-08,
|
|
"logits/chosen": 0.3095451295375824,
|
|
"logits/rejected": 0.34348541498184204,
|
|
"logps/chosen": -490.47076416015625,
|
|
"logps/ref_chosen": -60.973968505859375,
|
|
"logps/ref_rejected": -84.16952514648438,
|
|
"logps/rejected": -746.1552734375,
|
|
"loss": 0.8478,
|
|
"margin_dpo/margin_mean": 232.4889373779297,
|
|
"margin_dpo/margin_std": 235.10165405273438,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8604992657856094,
|
|
"fcm_dpo/beta": 0.003913631662726402,
|
|
"fcm_dpo/delta": 0.12130466103553772,
|
|
"fcm_dpo/margin": 188.37155151367188,
|
|
"fcm_dpo/q_t": 0.3504549264907837,
|
|
"grad_norm": 61.881404876708984,
|
|
"learning_rate": 2.974695142855388e-08,
|
|
"logits/chosen": 0.37644636631011963,
|
|
"logits/rejected": 0.3861965537071228,
|
|
"logps/chosen": -507.32269287109375,
|
|
"logps/ref_chosen": -56.85559844970703,
|
|
"logps/ref_rejected": -91.80261993408203,
|
|
"logps/rejected": -730.6412353515625,
|
|
"loss": 0.9639,
|
|
"margin_dpo/margin_mean": 188.37155151367188,
|
|
"margin_dpo/margin_std": 239.3880157470703,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8619676945668135,
|
|
"fcm_dpo/beta": 0.003928901627659798,
|
|
"fcm_dpo/delta": -0.005717615596950054,
|
|
"fcm_dpo/margin": 217.67236328125,
|
|
"fcm_dpo/q_t": 0.3260841369628906,
|
|
"grad_norm": 38.709537506103516,
|
|
"learning_rate": 2.9142720719793122e-08,
|
|
"logits/chosen": 0.42554670572280884,
|
|
"logits/rejected": 0.42748117446899414,
|
|
"logps/chosen": -425.38934326171875,
|
|
"logps/ref_chosen": -44.69159698486328,
|
|
"logps/ref_rejected": -82.62385559082031,
|
|
"logps/rejected": -680.9939575195312,
|
|
"loss": 0.8644,
|
|
"margin_dpo/margin_mean": 217.67236328125,
|
|
"margin_dpo/margin_std": 225.70684814453125,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8634361233480177,
|
|
"fcm_dpo/beta": 0.003943789284676313,
|
|
"fcm_dpo/delta": 0.06734154373407364,
|
|
"fcm_dpo/margin": 199.75949096679688,
|
|
"fcm_dpo/q_t": 0.33906006813049316,
|
|
"grad_norm": 51.019248962402344,
|
|
"learning_rate": 2.8544309805740018e-08,
|
|
"logits/chosen": 0.32632678747177124,
|
|
"logits/rejected": 0.32503804564476013,
|
|
"logps/chosen": -493.21112060546875,
|
|
"logps/ref_chosen": -50.29494857788086,
|
|
"logps/ref_rejected": -107.36988067626953,
|
|
"logps/rejected": -750.0455322265625,
|
|
"loss": 0.8986,
|
|
"margin_dpo/margin_mean": 199.75949096679688,
|
|
"margin_dpo/margin_std": 217.92599487304688,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.8649045521292217,
|
|
"fcm_dpo/beta": 0.0038381319027394056,
|
|
"fcm_dpo/delta": -0.22184929251670837,
|
|
"fcm_dpo/margin": 273.01702880859375,
|
|
"fcm_dpo/q_t": 0.2866641879081726,
|
|
"grad_norm": 51.86333084106445,
|
|
"learning_rate": 2.7951734455078786e-08,
|
|
"logits/chosen": 0.3192276358604431,
|
|
"logits/rejected": 0.3345522880554199,
|
|
"logps/chosen": -472.3736572265625,
|
|
"logps/ref_chosen": -59.929908752441406,
|
|
"logps/ref_rejected": -111.65534973144531,
|
|
"logps/rejected": -797.1161499023438,
|
|
"loss": 0.7354,
|
|
"margin_dpo/margin_mean": 273.01702880859375,
|
|
"margin_dpo/margin_std": 222.23690795898438,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.8663729809104258,
|
|
"fcm_dpo/beta": 0.003773223143070936,
|
|
"fcm_dpo/delta": -0.03967685252428055,
|
|
"fcm_dpo/margin": 234.80557250976562,
|
|
"fcm_dpo/q_t": 0.3133876919746399,
|
|
"grad_norm": 35.085304260253906,
|
|
"learning_rate": 2.736501028272095e-08,
|
|
"logits/chosen": 0.2434614598751068,
|
|
"logits/rejected": 0.23982518911361694,
|
|
"logps/chosen": -463.9925842285156,
|
|
"logps/ref_chosen": -55.80979537963867,
|
|
"logps/ref_rejected": -106.06282043457031,
|
|
"logps/rejected": -749.0511474609375,
|
|
"loss": 0.8136,
|
|
"margin_dpo/margin_mean": 234.80555725097656,
|
|
"margin_dpo/margin_std": 206.89405822753906,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8678414096916299,
|
|
"fcm_dpo/beta": 0.003768111113458872,
|
|
"fcm_dpo/delta": -0.02017318457365036,
|
|
"fcm_dpo/margin": 230.463134765625,
|
|
"fcm_dpo/q_t": 0.3158026337623596,
|
|
"grad_norm": 53.08750534057617,
|
|
"learning_rate": 2.678415274939408e-08,
|
|
"logits/chosen": 0.4338451623916626,
|
|
"logits/rejected": 0.47315138578414917,
|
|
"logps/chosen": -498.3539733886719,
|
|
"logps/ref_chosen": -56.24061965942383,
|
|
"logps/ref_rejected": -83.78629302978516,
|
|
"logps/rejected": -756.36279296875,
|
|
"loss": 0.8215,
|
|
"margin_dpo/margin_mean": 230.463134765625,
|
|
"margin_dpo/margin_std": 205.03244018554688,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.869309838472834,
|
|
"fcm_dpo/beta": 0.0037732375785708427,
|
|
"fcm_dpo/delta": 0.052767086774110794,
|
|
"fcm_dpo/margin": 212.37567138671875,
|
|
"fcm_dpo/q_t": 0.3374939262866974,
|
|
"grad_norm": 74.47343444824219,
|
|
"learning_rate": 2.6209177161234442e-08,
|
|
"logits/chosen": 0.47971880435943604,
|
|
"logits/rejected": 0.4983103573322296,
|
|
"logps/chosen": -526.7408447265625,
|
|
"logps/ref_chosen": -47.94025421142578,
|
|
"logps/ref_rejected": -75.73287963867188,
|
|
"logps/rejected": -766.9091186523438,
|
|
"loss": 0.9714,
|
|
"margin_dpo/margin_mean": 212.3756561279297,
|
|
"margin_dpo/margin_std": 278.6368713378906,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8707782672540382,
|
|
"fcm_dpo/beta": 0.003901700722053647,
|
|
"fcm_dpo/delta": 0.1894446462392807,
|
|
"fcm_dpo/margin": 172.6243896484375,
|
|
"fcm_dpo/q_t": 0.36253821849823,
|
|
"grad_norm": 71.90731811523438,
|
|
"learning_rate": 2.564009866938349e-08,
|
|
"logits/chosen": 0.2871604561805725,
|
|
"logits/rejected": 0.30711573362350464,
|
|
"logps/chosen": -473.5516357421875,
|
|
"logps/ref_chosen": -48.690757751464844,
|
|
"logps/ref_rejected": -60.90800094604492,
|
|
"logps/rejected": -658.393310546875,
|
|
"loss": 0.9842,
|
|
"margin_dpo/margin_mean": 172.6243896484375,
|
|
"margin_dpo/margin_std": 221.938232421875,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8722466960352423,
|
|
"fcm_dpo/beta": 0.0039725289680063725,
|
|
"fcm_dpo/delta": -0.07375653088092804,
|
|
"fcm_dpo/margin": 229.670654296875,
|
|
"fcm_dpo/q_t": 0.3251326084136963,
|
|
"grad_norm": 57.134742736816406,
|
|
"learning_rate": 2.5076932269588708e-08,
|
|
"logits/chosen": 0.32449811697006226,
|
|
"logits/rejected": 0.34925204515457153,
|
|
"logps/chosen": -467.6834411621094,
|
|
"logps/ref_chosen": -54.93488693237305,
|
|
"logps/ref_rejected": -86.09967803955078,
|
|
"logps/rejected": -728.5189208984375,
|
|
"loss": 0.8996,
|
|
"margin_dpo/margin_mean": 229.67066955566406,
|
|
"margin_dpo/margin_std": 259.094482421875,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8737151248164464,
|
|
"fcm_dpo/beta": 0.0038999663665890694,
|
|
"fcm_dpo/delta": 0.03644701838493347,
|
|
"fcm_dpo/margin": 209.3623504638672,
|
|
"fcm_dpo/q_t": 0.33476635813713074,
|
|
"grad_norm": 41.2265510559082,
|
|
"learning_rate": 2.451969280180849e-08,
|
|
"logits/chosen": 0.40078675746917725,
|
|
"logits/rejected": 0.42494046688079834,
|
|
"logps/chosen": -484.77764892578125,
|
|
"logps/ref_chosen": -49.4204216003418,
|
|
"logps/ref_rejected": -80.62731170654297,
|
|
"logps/rejected": -725.346923828125,
|
|
"loss": 0.8717,
|
|
"margin_dpo/margin_mean": 209.3623504638672,
|
|
"margin_dpo/margin_std": 223.2598419189453,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.8751835535976505,
|
|
"fcm_dpo/beta": 0.003989151678979397,
|
|
"fcm_dpo/delta": 0.09338672459125519,
|
|
"fcm_dpo/margin": 191.2503204345703,
|
|
"fcm_dpo/q_t": 0.3463008403778076,
|
|
"grad_norm": 54.039161682128906,
|
|
"learning_rate": 2.396839494982103e-08,
|
|
"logits/chosen": 0.3313291668891907,
|
|
"logits/rejected": 0.35179513692855835,
|
|
"logps/chosen": -501.94403076171875,
|
|
"logps/ref_chosen": -59.791683197021484,
|
|
"logps/ref_rejected": -80.09111785888672,
|
|
"logps/rejected": -713.4937744140625,
|
|
"loss": 0.9414,
|
|
"margin_dpo/margin_mean": 191.2503204345703,
|
|
"margin_dpo/margin_std": 234.5662841796875,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.8766519823788547,
|
|
"fcm_dpo/beta": 0.0038905441761016846,
|
|
"fcm_dpo/delta": -0.08888904005289078,
|
|
"fcm_dpo/margin": 238.23074340820312,
|
|
"fcm_dpo/q_t": 0.3173202574253082,
|
|
"grad_norm": 49.979923248291016,
|
|
"learning_rate": 2.3423053240837514e-08,
|
|
"logits/chosen": 0.3219120502471924,
|
|
"logits/rejected": 0.3208301365375519,
|
|
"logps/chosen": -515.6228637695312,
|
|
"logps/ref_chosen": -57.26078796386719,
|
|
"logps/ref_rejected": -100.6937255859375,
|
|
"logps/rejected": -797.2865600585938,
|
|
"loss": 0.849,
|
|
"margin_dpo/margin_mean": 238.23074340820312,
|
|
"margin_dpo/margin_std": 249.84375,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.8781204111600588,
|
|
"fcm_dpo/beta": 0.003907772246748209,
|
|
"fcm_dpo/delta": 0.07058212906122208,
|
|
"fcm_dpo/margin": 200.17642211914062,
|
|
"fcm_dpo/q_t": 0.3359318971633911,
|
|
"grad_norm": 63.312461853027344,
|
|
"learning_rate": 2.2883682045119062e-08,
|
|
"logits/chosen": 0.4510478973388672,
|
|
"logits/rejected": 0.47181278467178345,
|
|
"logps/chosen": -533.74072265625,
|
|
"logps/ref_chosen": -52.51850509643555,
|
|
"logps/ref_rejected": -89.44385528564453,
|
|
"logps/rejected": -770.842529296875,
|
|
"loss": 0.9574,
|
|
"margin_dpo/margin_mean": 200.17642211914062,
|
|
"margin_dpo/margin_std": 237.43214416503906,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.8795888399412628,
|
|
"fcm_dpo/beta": 0.003973391838371754,
|
|
"fcm_dpo/delta": 0.02901943027973175,
|
|
"fcm_dpo/margin": 207.14028930664062,
|
|
"fcm_dpo/q_t": 0.32341066002845764,
|
|
"grad_norm": 49.349613189697266,
|
|
"learning_rate": 2.2350295575598367e-08,
|
|
"logits/chosen": 0.3739485740661621,
|
|
"logits/rejected": 0.3845040798187256,
|
|
"logps/chosen": -481.701171875,
|
|
"logps/ref_chosen": -49.802677154541016,
|
|
"logps/ref_rejected": -82.978515625,
|
|
"logps/rejected": -722.017333984375,
|
|
"loss": 0.8359,
|
|
"margin_dpo/margin_mean": 207.14028930664062,
|
|
"margin_dpo/margin_std": 179.18177795410156,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"fcm_dpo/beta": 0.00399825070053339,
|
|
"fcm_dpo/delta": 0.015949290245771408,
|
|
"fcm_dpo/margin": 208.917236328125,
|
|
"fcm_dpo/q_t": 0.33174365758895874,
|
|
"grad_norm": 52.51283645629883,
|
|
"learning_rate": 2.1822907887504932e-08,
|
|
"logits/chosen": 0.4077937602996826,
|
|
"logits/rejected": 0.42742469906806946,
|
|
"logps/chosen": -556.25830078125,
|
|
"logps/ref_chosen": -66.43487548828125,
|
|
"logps/ref_rejected": -85.45649719238281,
|
|
"logps/rejected": -784.1971435546875,
|
|
"loss": 0.8902,
|
|
"margin_dpo/margin_mean": 208.917236328125,
|
|
"margin_dpo/margin_std": 230.8261260986328,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"eval_fcm_dpo/beta": 0.004011388868093491,
|
|
"eval_logits/chosen": 0.3258829414844513,
|
|
"eval_logits/rejected": 0.3477090895175934,
|
|
"eval_logps/chosen": -579.7042236328125,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -736.6627807617188,
|
|
"eval_loss": 0.531234085559845,
|
|
"eval_margin_dpo/margin_mean": 149.2117156982422,
|
|
"eval_margin_dpo/margin_std": 231.603759765625,
|
|
"eval_runtime": 39.2746,
|
|
"eval_samples_per_second": 59.555,
|
|
"eval_steps_per_second": 1.884,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.882525697503671,
|
|
"fcm_dpo/beta": 0.003966460935771465,
|
|
"fcm_dpo/delta": -0.039181776344776154,
|
|
"fcm_dpo/margin": 223.1067352294922,
|
|
"fcm_dpo/q_t": 0.32117369771003723,
|
|
"grad_norm": 48.35001754760742,
|
|
"learning_rate": 2.1301532877994742e-08,
|
|
"logits/chosen": 0.3656480312347412,
|
|
"logits/rejected": 0.38585516810417175,
|
|
"logps/chosen": -557.62841796875,
|
|
"logps/ref_chosen": -59.13361358642578,
|
|
"logps/ref_rejected": -94.69093322753906,
|
|
"logps/rejected": -816.29248046875,
|
|
"loss": 0.8571,
|
|
"margin_dpo/margin_mean": 223.10675048828125,
|
|
"margin_dpo/margin_std": 232.555908203125,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.8839941262848752,
|
|
"fcm_dpo/beta": 0.003887756960466504,
|
|
"fcm_dpo/delta": -0.1840885877609253,
|
|
"fcm_dpo/margin": 261.26611328125,
|
|
"fcm_dpo/q_t": 0.29467278718948364,
|
|
"grad_norm": 46.679325103759766,
|
|
"learning_rate": 2.0786184285784298e-08,
|
|
"logits/chosen": 0.35937434434890747,
|
|
"logits/rejected": 0.3779703676700592,
|
|
"logps/chosen": -445.2738037109375,
|
|
"logps/ref_chosen": -48.59352111816406,
|
|
"logps/ref_rejected": -87.6685562133789,
|
|
"logps/rejected": -745.614990234375,
|
|
"loss": 0.7682,
|
|
"margin_dpo/margin_mean": 261.26611328125,
|
|
"margin_dpo/margin_std": 232.40969848632812,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.8854625550660793,
|
|
"fcm_dpo/beta": 0.003834263887256384,
|
|
"fcm_dpo/delta": -0.023736726492643356,
|
|
"fcm_dpo/margin": 227.3087158203125,
|
|
"fcm_dpo/q_t": 0.3306729197502136,
|
|
"grad_norm": 67.39020538330078,
|
|
"learning_rate": 2.0276875690788204e-08,
|
|
"logits/chosen": 0.29140499234199524,
|
|
"logits/rejected": 0.31848928332328796,
|
|
"logps/chosen": -495.6649169921875,
|
|
"logps/ref_chosen": -70.41461944580078,
|
|
"logps/ref_rejected": -100.32559967041016,
|
|
"logps/rejected": -752.8846435546875,
|
|
"loss": 0.8786,
|
|
"margin_dpo/margin_mean": 227.3087158203125,
|
|
"margin_dpo/margin_std": 258.9732971191406,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.8869309838472834,
|
|
"fcm_dpo/beta": 0.0037519652396440506,
|
|
"fcm_dpo/delta": -0.11160653829574585,
|
|
"fcm_dpo/margin": 253.43344116210938,
|
|
"fcm_dpo/q_t": 0.3031109571456909,
|
|
"grad_norm": 40.32583236694336,
|
|
"learning_rate": 1.977362051376158e-08,
|
|
"logits/chosen": 0.343853235244751,
|
|
"logits/rejected": 0.3555789291858673,
|
|
"logps/chosen": -452.9412841796875,
|
|
"logps/ref_chosen": -46.45808029174805,
|
|
"logps/ref_rejected": -91.8544921875,
|
|
"logps/rejected": -751.7711181640625,
|
|
"loss": 0.8107,
|
|
"margin_dpo/margin_mean": 253.43344116210938,
|
|
"margin_dpo/margin_std": 235.12753295898438,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.8883994126284875,
|
|
"fcm_dpo/beta": 0.003726169466972351,
|
|
"fcm_dpo/delta": 0.026752449572086334,
|
|
"fcm_dpo/margin": 221.3822479248047,
|
|
"fcm_dpo/q_t": 0.3330296277999878,
|
|
"grad_norm": 40.421016693115234,
|
|
"learning_rate": 1.9276432015946446e-08,
|
|
"logits/chosen": 0.3358033299446106,
|
|
"logits/rejected": 0.3515666723251343,
|
|
"logps/chosen": -529.6724853515625,
|
|
"logps/ref_chosen": -66.24933624267578,
|
|
"logps/ref_rejected": -102.30496978759766,
|
|
"logps/rejected": -787.1104125976562,
|
|
"loss": 0.8714,
|
|
"margin_dpo/margin_mean": 221.3822479248047,
|
|
"margin_dpo/margin_std": 240.65045166015625,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.8898678414096917,
|
|
"fcm_dpo/beta": 0.003759493585675955,
|
|
"fcm_dpo/delta": -0.06783300638198853,
|
|
"fcm_dpo/margin": 242.02098083496094,
|
|
"fcm_dpo/q_t": 0.3137454688549042,
|
|
"grad_norm": 37.28118896484375,
|
|
"learning_rate": 1.8785323298722093e-08,
|
|
"logits/chosen": 0.39390993118286133,
|
|
"logits/rejected": 0.40036922693252563,
|
|
"logps/chosen": -500.85211181640625,
|
|
"logps/ref_chosen": -54.819122314453125,
|
|
"logps/ref_rejected": -98.37146759033203,
|
|
"logps/rejected": -786.4254760742188,
|
|
"loss": 0.8139,
|
|
"margin_dpo/margin_mean": 242.02099609375,
|
|
"margin_dpo/margin_std": 221.83334350585938,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.8913362701908958,
|
|
"fcm_dpo/beta": 0.0037815175019204617,
|
|
"fcm_dpo/delta": 0.13359740376472473,
|
|
"fcm_dpo/margin": 191.9698944091797,
|
|
"fcm_dpo/q_t": 0.34541046619415283,
|
|
"grad_norm": 50.32596206665039,
|
|
"learning_rate": 1.8300307303259904e-08,
|
|
"logits/chosen": 0.2882560193538666,
|
|
"logits/rejected": 0.30261147022247314,
|
|
"logps/chosen": -519.4744262695312,
|
|
"logps/ref_chosen": -58.08403778076172,
|
|
"logps/ref_rejected": -79.777099609375,
|
|
"logps/rejected": -733.137451171875,
|
|
"loss": 0.9034,
|
|
"margin_dpo/margin_mean": 191.96990966796875,
|
|
"margin_dpo/margin_std": 189.93313598632812,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.8928046989720999,
|
|
"fcm_dpo/beta": 0.0038187121972441673,
|
|
"fcm_dpo/delta": 0.02122838981449604,
|
|
"fcm_dpo/margin": 217.48269653320312,
|
|
"fcm_dpo/q_t": 0.32343119382858276,
|
|
"grad_norm": 38.28920364379883,
|
|
"learning_rate": 1.7821396810182437e-08,
|
|
"logits/chosen": 0.33918851613998413,
|
|
"logits/rejected": 0.37244075536727905,
|
|
"logps/chosen": -508.8272399902344,
|
|
"logps/ref_chosen": -57.450836181640625,
|
|
"logps/ref_rejected": -94.77339172363281,
|
|
"logps/rejected": -763.6325073242188,
|
|
"loss": 0.8291,
|
|
"margin_dpo/margin_mean": 217.4827117919922,
|
|
"margin_dpo/margin_std": 190.2075958251953,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.8942731277533039,
|
|
"fcm_dpo/beta": 0.0037732236087322235,
|
|
"fcm_dpo/delta": -0.0848221629858017,
|
|
"fcm_dpo/margin": 245.67294311523438,
|
|
"fcm_dpo/q_t": 0.3045893907546997,
|
|
"grad_norm": 58.272987365722656,
|
|
"learning_rate": 1.7348604439226617e-08,
|
|
"logits/chosen": 0.3716045022010803,
|
|
"logits/rejected": 0.3981201648712158,
|
|
"logps/chosen": -489.30975341796875,
|
|
"logps/ref_chosen": -58.805355072021484,
|
|
"logps/ref_rejected": -88.81600952148438,
|
|
"logps/rejected": -764.9933471679688,
|
|
"loss": 0.7859,
|
|
"margin_dpo/margin_mean": 245.67294311523438,
|
|
"margin_dpo/margin_std": 212.65243530273438,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.895741556534508,
|
|
"fcm_dpo/beta": 0.003800982143729925,
|
|
"fcm_dpo/delta": 0.07397213578224182,
|
|
"fcm_dpo/margin": 205.66021728515625,
|
|
"fcm_dpo/q_t": 0.3356453776359558,
|
|
"grad_norm": 50.508609771728516,
|
|
"learning_rate": 1.6881942648911074e-08,
|
|
"logits/chosen": 0.3072008490562439,
|
|
"logits/rejected": 0.3390934467315674,
|
|
"logps/chosen": -503.4935302734375,
|
|
"logps/ref_chosen": -65.69503784179688,
|
|
"logps/ref_rejected": -83.40538787841797,
|
|
"logps/rejected": -726.8641357421875,
|
|
"loss": 0.8856,
|
|
"margin_dpo/margin_mean": 205.66021728515625,
|
|
"margin_dpo/margin_std": 207.76187133789062,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.8972099853157122,
|
|
"fcm_dpo/beta": 0.003831944428384304,
|
|
"fcm_dpo/delta": -0.020623497664928436,
|
|
"fcm_dpo/margin": 226.4250946044922,
|
|
"fcm_dpo/q_t": 0.3246605396270752,
|
|
"grad_norm": 47.413475036621094,
|
|
"learning_rate": 1.6421423736208e-08,
|
|
"logits/chosen": 0.37874239683151245,
|
|
"logits/rejected": 0.39478302001953125,
|
|
"logps/chosen": -521.7601318359375,
|
|
"logps/ref_chosen": -52.59946823120117,
|
|
"logps/ref_rejected": -86.33099365234375,
|
|
"logps/rejected": -781.916748046875,
|
|
"loss": 0.864,
|
|
"margin_dpo/margin_mean": 226.42507934570312,
|
|
"margin_dpo/margin_std": 232.66787719726562,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.8986784140969163,
|
|
"fcm_dpo/beta": 0.0037551377899944782,
|
|
"fcm_dpo/delta": -0.0866103395819664,
|
|
"fcm_dpo/margin": 247.30072021484375,
|
|
"fcm_dpo/q_t": 0.3015454411506653,
|
|
"grad_norm": 33.60853958129883,
|
|
"learning_rate": 1.5967059836219042e-08,
|
|
"logits/chosen": 0.4164635241031647,
|
|
"logits/rejected": 0.4415166974067688,
|
|
"logps/chosen": -524.9508056640625,
|
|
"logps/ref_chosen": -59.32372283935547,
|
|
"logps/ref_rejected": -88.31239318847656,
|
|
"logps/rejected": -801.240234375,
|
|
"loss": 0.7631,
|
|
"margin_dpo/margin_mean": 247.30072021484375,
|
|
"margin_dpo/margin_std": 186.49124145507812,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.9001468428781204,
|
|
"fcm_dpo/beta": 0.0037209526635706425,
|
|
"fcm_dpo/delta": -0.020325224846601486,
|
|
"fcm_dpo/margin": 233.4252471923828,
|
|
"fcm_dpo/q_t": 0.3151775300502777,
|
|
"grad_norm": 41.69904708862305,
|
|
"learning_rate": 1.551886292185553e-08,
|
|
"logits/chosen": 0.2626866400241852,
|
|
"logits/rejected": 0.2640881836414337,
|
|
"logps/chosen": -483.8834228515625,
|
|
"logps/ref_chosen": -59.72996520996094,
|
|
"logps/ref_rejected": -105.10752868652344,
|
|
"logps/rejected": -762.6862182617188,
|
|
"loss": 0.7969,
|
|
"margin_dpo/margin_mean": 233.42526245117188,
|
|
"margin_dpo/margin_std": 188.75119018554688,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9016152716593245,
|
|
"fcm_dpo/beta": 0.003673199564218521,
|
|
"fcm_dpo/delta": -0.06576398015022278,
|
|
"fcm_dpo/margin": 247.63795471191406,
|
|
"fcm_dpo/q_t": 0.3121992349624634,
|
|
"grad_norm": 54.678871154785156,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": 0.45456773042678833,
|
|
"logits/rejected": 0.461800754070282,
|
|
"logps/chosen": -518.4677124023438,
|
|
"logps/ref_chosen": -52.93898010253906,
|
|
"logps/ref_rejected": -104.67938232421875,
|
|
"logps/rejected": -817.8460693359375,
|
|
"loss": 0.8123,
|
|
"margin_dpo/margin_mean": 247.63795471191406,
|
|
"margin_dpo/margin_std": 225.6822052001953,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9030837004405287,
|
|
"fcm_dpo/beta": 0.0037553110159933567,
|
|
"fcm_dpo/delta": 0.12973493337631226,
|
|
"fcm_dpo/margin": 194.0587158203125,
|
|
"fcm_dpo/q_t": 0.34725838899612427,
|
|
"grad_norm": 37.38533020019531,
|
|
"learning_rate": 1.4641017128809801e-08,
|
|
"logits/chosen": 0.2697402536869049,
|
|
"logits/rejected": 0.28964436054229736,
|
|
"logps/chosen": -507.75042724609375,
|
|
"logps/ref_chosen": -65.81727600097656,
|
|
"logps/ref_rejected": -95.17749786376953,
|
|
"logps/rejected": -731.1693115234375,
|
|
"loss": 0.8965,
|
|
"margin_dpo/margin_mean": 194.0587158203125,
|
|
"margin_dpo/margin_std": 199.11935424804688,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9045521292217328,
|
|
"fcm_dpo/beta": 0.003917084541171789,
|
|
"fcm_dpo/delta": 0.2312663495540619,
|
|
"fcm_dpo/margin": 161.20529174804688,
|
|
"fcm_dpo/q_t": 0.3703739643096924,
|
|
"grad_norm": 56.471641540527344,
|
|
"learning_rate": 1.4211391382180637e-08,
|
|
"logits/chosen": 0.3983857035636902,
|
|
"logits/rejected": 0.4274812340736389,
|
|
"logps/chosen": -579.7181396484375,
|
|
"logps/ref_chosen": -65.13285827636719,
|
|
"logps/ref_rejected": -74.70050048828125,
|
|
"logps/rejected": -750.4910888671875,
|
|
"loss": 0.9979,
|
|
"margin_dpo/margin_mean": 161.20529174804688,
|
|
"margin_dpo/margin_std": 210.05697631835938,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9060205580029369,
|
|
"fcm_dpo/beta": 0.004059099592268467,
|
|
"fcm_dpo/delta": 0.15998278558254242,
|
|
"fcm_dpo/margin": 172.38507080078125,
|
|
"fcm_dpo/q_t": 0.3537973463535309,
|
|
"grad_norm": 66.96424865722656,
|
|
"learning_rate": 1.378797888467345e-08,
|
|
"logits/chosen": 0.3297281563282013,
|
|
"logits/rejected": 0.3546701967716217,
|
|
"logps/chosen": -535.5614013671875,
|
|
"logps/ref_chosen": -63.005550384521484,
|
|
"logps/ref_rejected": -64.234130859375,
|
|
"logps/rejected": -709.175048828125,
|
|
"loss": 0.932,
|
|
"margin_dpo/margin_mean": 172.38510131835938,
|
|
"margin_dpo/margin_std": 191.4333953857422,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9074889867841409,
|
|
"fcm_dpo/beta": 0.004026922397315502,
|
|
"fcm_dpo/delta": -0.12715700268745422,
|
|
"fcm_dpo/margin": 239.53054809570312,
|
|
"fcm_dpo/q_t": 0.3070314824581146,
|
|
"grad_norm": 62.99699020385742,
|
|
"learning_rate": 1.3370790793601371e-08,
|
|
"logits/chosen": 0.33015531301498413,
|
|
"logits/rejected": 0.37518373131752014,
|
|
"logps/chosen": -553.628173828125,
|
|
"logps/ref_chosen": -67.10134887695312,
|
|
"logps/ref_rejected": -92.15340423583984,
|
|
"logps/rejected": -818.2107543945312,
|
|
"loss": 0.823,
|
|
"margin_dpo/margin_mean": 239.53053283691406,
|
|
"margin_dpo/margin_std": 232.60379028320312,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.908957415565345,
|
|
"fcm_dpo/beta": 0.00403250940144062,
|
|
"fcm_dpo/delta": 0.08001011610031128,
|
|
"fcm_dpo/margin": 192.30813598632812,
|
|
"fcm_dpo/q_t": 0.35367268323898315,
|
|
"grad_norm": 59.05690383911133,
|
|
"learning_rate": 1.2959838102258535e-08,
|
|
"logits/chosen": 0.38170328736305237,
|
|
"logits/rejected": 0.4015880227088928,
|
|
"logps/chosen": -535.88916015625,
|
|
"logps/ref_chosen": -55.978233337402344,
|
|
"logps/ref_rejected": -93.1854019165039,
|
|
"logps/rejected": -765.404541015625,
|
|
"loss": 0.982,
|
|
"margin_dpo/margin_mean": 192.30813598632812,
|
|
"margin_dpo/margin_std": 265.18170166015625,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9104258443465492,
|
|
"fcm_dpo/beta": 0.004014415666460991,
|
|
"fcm_dpo/delta": 0.02012522518634796,
|
|
"fcm_dpo/margin": 206.92474365234375,
|
|
"fcm_dpo/q_t": 0.3321415185928345,
|
|
"grad_norm": 58.67335891723633,
|
|
"learning_rate": 1.2555131639630567e-08,
|
|
"logits/chosen": 0.38881373405456543,
|
|
"logits/rejected": 0.41763806343078613,
|
|
"logps/chosen": -512.077880859375,
|
|
"logps/ref_chosen": -59.79750061035156,
|
|
"logps/ref_rejected": -78.41075134277344,
|
|
"logps/rejected": -737.6159057617188,
|
|
"loss": 0.8791,
|
|
"margin_dpo/margin_mean": 206.92474365234375,
|
|
"margin_dpo/margin_std": 222.4522705078125,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9118942731277533,
|
|
"fcm_dpo/beta": 0.003981994464993477,
|
|
"fcm_dpo/delta": -0.167287215590477,
|
|
"fcm_dpo/margin": 251.25936889648438,
|
|
"fcm_dpo/q_t": 0.2956671714782715,
|
|
"grad_norm": 49.28880310058594,
|
|
"learning_rate": 1.2156682070109086e-08,
|
|
"logits/chosen": 0.4875348210334778,
|
|
"logits/rejected": 0.504463255405426,
|
|
"logps/chosen": -502.497802734375,
|
|
"logps/ref_chosen": -53.93375778198242,
|
|
"logps/ref_rejected": -88.36951446533203,
|
|
"logps/rejected": -788.1929321289062,
|
|
"loss": 0.7763,
|
|
"margin_dpo/margin_mean": 251.25936889648438,
|
|
"margin_dpo/margin_std": 220.83163452148438,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9133627019089574,
|
|
"fcm_dpo/beta": 0.0038708075881004333,
|
|
"fcm_dpo/delta": -0.01944926381111145,
|
|
"fcm_dpo/margin": 223.79840087890625,
|
|
"fcm_dpo/q_t": 0.3231251835823059,
|
|
"grad_norm": 44.725032806396484,
|
|
"learning_rate": 1.1764499893210878e-08,
|
|
"logits/chosen": 0.297042578458786,
|
|
"logits/rejected": 0.3068755269050598,
|
|
"logps/chosen": -501.72003173828125,
|
|
"logps/ref_chosen": -60.28582000732422,
|
|
"logps/ref_rejected": -85.51873779296875,
|
|
"logps/rejected": -750.7514038085938,
|
|
"loss": 0.8368,
|
|
"margin_dpo/margin_mean": 223.79840087890625,
|
|
"margin_dpo/margin_std": 217.63043212890625,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9148311306901615,
|
|
"fcm_dpo/beta": 0.003980166278779507,
|
|
"fcm_dpo/delta": 0.1460477113723755,
|
|
"fcm_dpo/margin": 179.51904296875,
|
|
"fcm_dpo/q_t": 0.3587492108345032,
|
|
"grad_norm": 75.94093322753906,
|
|
"learning_rate": 1.1378595443300998e-08,
|
|
"logits/chosen": 0.4989446997642517,
|
|
"logits/rejected": 0.5313464403152466,
|
|
"logps/chosen": -542.784423828125,
|
|
"logps/ref_chosen": -64.1569595336914,
|
|
"logps/ref_rejected": -85.08304595947266,
|
|
"logps/rejected": -743.2296142578125,
|
|
"loss": 0.9751,
|
|
"margin_dpo/margin_mean": 179.51904296875,
|
|
"margin_dpo/margin_std": 242.48092651367188,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9162995594713657,
|
|
"fcm_dpo/beta": 0.0039426954463124275,
|
|
"fcm_dpo/delta": -0.11975458264350891,
|
|
"fcm_dpo/margin": 243.0377655029297,
|
|
"fcm_dpo/q_t": 0.30364617705345154,
|
|
"grad_norm": 53.13152313232422,
|
|
"learning_rate": 1.0998978889320582e-08,
|
|
"logits/chosen": 0.29805538058280945,
|
|
"logits/rejected": 0.33957502245903015,
|
|
"logps/chosen": -545.13232421875,
|
|
"logps/ref_chosen": -71.91862487792969,
|
|
"logps/ref_rejected": -97.13203430175781,
|
|
"logps/rejected": -813.383544921875,
|
|
"loss": 0.8084,
|
|
"margin_dpo/margin_mean": 243.03778076171875,
|
|
"margin_dpo/margin_std": 226.8072509765625,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9177679882525698,
|
|
"fcm_dpo/beta": 0.003897000104188919,
|
|
"fcm_dpo/delta": -0.03251214325428009,
|
|
"fcm_dpo/margin": 225.71258544921875,
|
|
"fcm_dpo/q_t": 0.3202013373374939,
|
|
"grad_norm": 51.93879699707031,
|
|
"learning_rate": 1.0625660234518913e-08,
|
|
"logits/chosen": 0.36836007237434387,
|
|
"logits/rejected": 0.4131643772125244,
|
|
"logps/chosen": -496.9722900390625,
|
|
"logps/ref_chosen": -58.342071533203125,
|
|
"logps/ref_rejected": -86.09038543701172,
|
|
"logps/rejected": -750.4332275390625,
|
|
"loss": 0.8224,
|
|
"margin_dpo/margin_mean": 225.71258544921875,
|
|
"margin_dpo/margin_std": 214.71905517578125,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9192364170337739,
|
|
"fcm_dpo/beta": 0.004009204916656017,
|
|
"fcm_dpo/delta": 0.19411587715148926,
|
|
"fcm_dpo/margin": 166.82199096679688,
|
|
"fcm_dpo/q_t": 0.36621803045272827,
|
|
"grad_norm": 52.73582458496094,
|
|
"learning_rate": 1.0258649316189721e-08,
|
|
"logits/chosen": 0.229852557182312,
|
|
"logits/rejected": 0.24690240621566772,
|
|
"logps/chosen": -562.8541259765625,
|
|
"logps/ref_chosen": -75.11260986328125,
|
|
"logps/ref_rejected": -99.188720703125,
|
|
"logps/rejected": -753.752197265625,
|
|
"loss": 0.9865,
|
|
"margin_dpo/margin_mean": 166.82199096679688,
|
|
"margin_dpo/margin_std": 216.57113647460938,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.920704845814978,
|
|
"fcm_dpo/beta": 0.003970067948102951,
|
|
"fcm_dpo/delta": -0.1275482475757599,
|
|
"fcm_dpo/margin": 243.10336303710938,
|
|
"fcm_dpo/q_t": 0.31875503063201904,
|
|
"grad_norm": 54.12873077392578,
|
|
"learning_rate": 9.897955805412e-09,
|
|
"logits/chosen": 0.2737880349159241,
|
|
"logits/rejected": 0.2716522216796875,
|
|
"logps/chosen": -457.3752136230469,
|
|
"logps/ref_chosen": -47.74314880371094,
|
|
"logps/ref_rejected": -106.75448608398438,
|
|
"logps/rejected": -759.4898681640625,
|
|
"loss": 0.8554,
|
|
"margin_dpo/margin_mean": 243.1033477783203,
|
|
"margin_dpo/margin_std": 278.228759765625,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.922173274596182,
|
|
"fcm_dpo/beta": 0.003842130536213517,
|
|
"fcm_dpo/delta": -0.12872882187366486,
|
|
"fcm_dpo/margin": 250.9643096923828,
|
|
"fcm_dpo/q_t": 0.30476510524749756,
|
|
"grad_norm": 57.179996490478516,
|
|
"learning_rate": 9.543589206795238e-09,
|
|
"logits/chosen": 0.40602755546569824,
|
|
"logits/rejected": 0.43877482414245605,
|
|
"logps/chosen": -525.9154663085938,
|
|
"logps/ref_chosen": -60.182945251464844,
|
|
"logps/ref_rejected": -101.55467224121094,
|
|
"logps/rejected": -818.25146484375,
|
|
"loss": 0.7991,
|
|
"margin_dpo/margin_mean": 250.9643096923828,
|
|
"margin_dpo/margin_std": 231.17425537109375,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9236417033773862,
|
|
"fcm_dpo/beta": 0.0038544987328350544,
|
|
"fcm_dpo/delta": 0.02374306507408619,
|
|
"fcm_dpo/margin": 214.87026977539062,
|
|
"fcm_dpo/q_t": 0.3250948488712311,
|
|
"grad_norm": 44.98953628540039,
|
|
"learning_rate": 9.19555885822887e-09,
|
|
"logits/chosen": 0.38332074880599976,
|
|
"logits/rejected": 0.4074392020702362,
|
|
"logps/chosen": -532.5030517578125,
|
|
"logps/ref_chosen": -64.21354675292969,
|
|
"logps/ref_rejected": -91.65367126464844,
|
|
"logps/rejected": -774.8133544921875,
|
|
"loss": 0.8342,
|
|
"margin_dpo/margin_mean": 214.87026977539062,
|
|
"margin_dpo/margin_std": 191.0750732421875,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9251101321585903,
|
|
"fcm_dpo/beta": 0.004004511050879955,
|
|
"fcm_dpo/delta": 0.21656584739685059,
|
|
"fcm_dpo/margin": 161.44676208496094,
|
|
"fcm_dpo/q_t": 0.37145158648490906,
|
|
"grad_norm": 80.25105285644531,
|
|
"learning_rate": 8.85387393063622e-09,
|
|
"logits/chosen": 0.24440869688987732,
|
|
"logits/rejected": 0.2650578022003174,
|
|
"logps/chosen": -476.164306640625,
|
|
"logps/ref_chosen": -59.29100036621094,
|
|
"logps/ref_rejected": -83.59829711914062,
|
|
"logps/rejected": -661.9183349609375,
|
|
"loss": 1.0023,
|
|
"margin_dpo/margin_mean": 161.44677734375,
|
|
"margin_dpo/margin_std": 225.14974975585938,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9265785609397944,
|
|
"fcm_dpo/beta": 0.004093495197594166,
|
|
"fcm_dpo/delta": 0.07495088130235672,
|
|
"fcm_dpo/margin": 190.66612243652344,
|
|
"fcm_dpo/q_t": 0.345422625541687,
|
|
"grad_norm": 56.53142166137695,
|
|
"learning_rate": 8.518543427732949e-09,
|
|
"logits/chosen": 0.4773872494697571,
|
|
"logits/rejected": 0.5107876658439636,
|
|
"logps/chosen": -547.016845703125,
|
|
"logps/ref_chosen": -59.45360565185547,
|
|
"logps/ref_rejected": -80.95156860351562,
|
|
"logps/rejected": -759.1809692382812,
|
|
"loss": 0.9594,
|
|
"margin_dpo/margin_mean": 190.6661376953125,
|
|
"margin_dpo/margin_std": 245.64004516601562,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9280469897209985,
|
|
"fcm_dpo/beta": 0.004062248859554529,
|
|
"fcm_dpo/delta": -0.014684807509183884,
|
|
"fcm_dpo/margin": 212.34161376953125,
|
|
"fcm_dpo/q_t": 0.3262500762939453,
|
|
"grad_norm": 50.996089935302734,
|
|
"learning_rate": 8.189576185789637e-09,
|
|
"logits/chosen": 0.4748516082763672,
|
|
"logits/rejected": 0.5130486488342285,
|
|
"logps/chosen": -536.0592041015625,
|
|
"logps/ref_chosen": -61.35155487060547,
|
|
"logps/ref_rejected": -86.16017150878906,
|
|
"logps/rejected": -773.20947265625,
|
|
"loss": 0.9309,
|
|
"margin_dpo/margin_mean": 212.34161376953125,
|
|
"margin_dpo/margin_std": 257.6206359863281,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9295154185022027,
|
|
"fcm_dpo/beta": 0.004250052385032177,
|
|
"fcm_dpo/delta": 0.20158423483371735,
|
|
"fcm_dpo/margin": 154.8126678466797,
|
|
"fcm_dpo/q_t": 0.3648519217967987,
|
|
"grad_norm": 59.050899505615234,
|
|
"learning_rate": 7.866980873399015e-09,
|
|
"logits/chosen": 0.40178602933883667,
|
|
"logits/rejected": 0.40316498279571533,
|
|
"logps/chosen": -537.0020751953125,
|
|
"logps/ref_chosen": -57.27816390991211,
|
|
"logps/ref_rejected": -91.58395385742188,
|
|
"logps/rejected": -726.1204833984375,
|
|
"loss": 1.0244,
|
|
"margin_dpo/margin_mean": 154.8126678466797,
|
|
"margin_dpo/margin_std": 218.823486328125,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9309838472834068,
|
|
"fcm_dpo/beta": 0.004316710866987705,
|
|
"fcm_dpo/delta": 0.12352225929498672,
|
|
"fcm_dpo/margin": 170.43814086914062,
|
|
"fcm_dpo/q_t": 0.35788172483444214,
|
|
"grad_norm": 69.04047393798828,
|
|
"learning_rate": 7.550765991247654e-09,
|
|
"logits/chosen": 0.33094990253448486,
|
|
"logits/rejected": 0.3353114426136017,
|
|
"logps/chosen": -551.9486694335938,
|
|
"logps/ref_chosen": -66.61896514892578,
|
|
"logps/ref_rejected": -107.12564849853516,
|
|
"logps/rejected": -762.8934936523438,
|
|
"loss": 0.9679,
|
|
"margin_dpo/margin_mean": 170.43814086914062,
|
|
"margin_dpo/margin_std": 223.65484619140625,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9324522760646109,
|
|
"fcm_dpo/beta": 0.00439867377281189,
|
|
"fcm_dpo/delta": 0.05664564669132233,
|
|
"fcm_dpo/margin": 181.37939453125,
|
|
"fcm_dpo/q_t": 0.34000444412231445,
|
|
"grad_norm": 53.52330017089844,
|
|
"learning_rate": 7.240939871891699e-09,
|
|
"logits/chosen": 0.2910565435886383,
|
|
"logits/rejected": 0.3432241678237915,
|
|
"logps/chosen": -531.345703125,
|
|
"logps/ref_chosen": -73.95551300048828,
|
|
"logps/ref_rejected": -82.50045776367188,
|
|
"logps/rejected": -721.2700805664062,
|
|
"loss": 0.8902,
|
|
"margin_dpo/margin_mean": 181.37940979003906,
|
|
"margin_dpo/margin_std": 200.13851928710938,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.933920704845815,
|
|
"fcm_dpo/beta": 0.004396742209792137,
|
|
"fcm_dpo/delta": -0.09724476933479309,
|
|
"fcm_dpo/margin": 213.02749633789062,
|
|
"fcm_dpo/q_t": 0.31033921241760254,
|
|
"grad_norm": 61.685367584228516,
|
|
"learning_rate": 6.937510679537628e-09,
|
|
"logits/chosen": 0.26561087369918823,
|
|
"logits/rejected": 0.2920706868171692,
|
|
"logps/chosen": -503.4400329589844,
|
|
"logps/ref_chosen": -59.628910064697266,
|
|
"logps/ref_rejected": -81.97883605957031,
|
|
"logps/rejected": -738.8173828125,
|
|
"loss": 0.8165,
|
|
"margin_dpo/margin_mean": 213.02749633789062,
|
|
"margin_dpo/margin_std": 203.03334045410156,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9353891336270191,
|
|
"fcm_dpo/beta": 0.00424953643232584,
|
|
"fcm_dpo/delta": -0.104090616106987,
|
|
"fcm_dpo/margin": 222.14358520507812,
|
|
"fcm_dpo/q_t": 0.31200093030929565,
|
|
"grad_norm": 56.29432678222656,
|
|
"learning_rate": 6.640486409826785e-09,
|
|
"logits/chosen": 0.47558510303497314,
|
|
"logits/rejected": 0.4967935085296631,
|
|
"logps/chosen": -525.0879516601562,
|
|
"logps/ref_chosen": -49.652687072753906,
|
|
"logps/ref_rejected": -98.40513610839844,
|
|
"logps/rejected": -795.9840087890625,
|
|
"loss": 0.8226,
|
|
"margin_dpo/margin_mean": 222.14358520507812,
|
|
"margin_dpo/margin_std": 222.4608154296875,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9368575624082232,
|
|
"fcm_dpo/beta": 0.004241650924086571,
|
|
"fcm_dpo/delta": -0.08875752985477448,
|
|
"fcm_dpo/margin": 218.70045471191406,
|
|
"fcm_dpo/q_t": 0.30789196491241455,
|
|
"grad_norm": 49.7493896484375,
|
|
"learning_rate": 6.349874889624962e-09,
|
|
"logits/chosen": 0.3231136202812195,
|
|
"logits/rejected": 0.34823527932167053,
|
|
"logps/chosen": -474.68023681640625,
|
|
"logps/ref_chosen": -58.156639099121094,
|
|
"logps/ref_rejected": -79.3014907836914,
|
|
"logps/rejected": -714.5255737304688,
|
|
"loss": 0.7903,
|
|
"margin_dpo/margin_mean": 218.700439453125,
|
|
"margin_dpo/margin_std": 176.45599365234375,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9383259911894273,
|
|
"fcm_dpo/beta": 0.004274230450391769,
|
|
"fcm_dpo/delta": 0.15763217210769653,
|
|
"fcm_dpo/margin": 164.3897247314453,
|
|
"fcm_dpo/q_t": 0.3588542342185974,
|
|
"grad_norm": 71.79363250732422,
|
|
"learning_rate": 6.065683776815933e-09,
|
|
"logits/chosen": 0.2641649842262268,
|
|
"logits/rejected": 0.2947526276111603,
|
|
"logps/chosen": -589.9560546875,
|
|
"logps/ref_chosen": -72.32319641113281,
|
|
"logps/ref_rejected": -74.2749252319336,
|
|
"logps/rejected": -756.2974853515625,
|
|
"loss": 0.9909,
|
|
"margin_dpo/margin_mean": 164.38970947265625,
|
|
"margin_dpo/margin_std": 221.13119506835938,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9397944199706314,
|
|
"fcm_dpo/beta": 0.004170107655227184,
|
|
"fcm_dpo/delta": -0.2509539723396301,
|
|
"fcm_dpo/margin": 257.6911926269531,
|
|
"fcm_dpo/q_t": 0.2941976487636566,
|
|
"grad_norm": 45.10141372680664,
|
|
"learning_rate": 5.7879205600998296e-09,
|
|
"logits/chosen": 0.2849113941192627,
|
|
"logits/rejected": 0.3171135187149048,
|
|
"logps/chosen": -502.7310485839844,
|
|
"logps/ref_chosen": -56.13436508178711,
|
|
"logps/ref_rejected": -108.60014343261719,
|
|
"logps/rejected": -812.8880004882812,
|
|
"loss": 0.773,
|
|
"margin_dpo/margin_mean": 257.691162109375,
|
|
"margin_dpo/margin_std": 253.909423828125,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9412628487518355,
|
|
"fcm_dpo/beta": 0.004141503944993019,
|
|
"fcm_dpo/delta": 0.13046443462371826,
|
|
"fcm_dpo/margin": 176.06033325195312,
|
|
"fcm_dpo/q_t": 0.3567558825016022,
|
|
"grad_norm": 63.07670593261719,
|
|
"learning_rate": 5.516592558795746e-09,
|
|
"logits/chosen": 0.3889944553375244,
|
|
"logits/rejected": 0.41144323348999023,
|
|
"logps/chosen": -580.721923828125,
|
|
"logps/ref_chosen": -64.99689483642578,
|
|
"logps/ref_rejected": -86.99232482910156,
|
|
"logps/rejected": -778.7777099609375,
|
|
"loss": 0.975,
|
|
"margin_dpo/margin_mean": 176.06033325195312,
|
|
"margin_dpo/margin_std": 239.46539306640625,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9427312775330396,
|
|
"fcm_dpo/beta": 0.004168546758592129,
|
|
"fcm_dpo/delta": -0.10531463474035263,
|
|
"fcm_dpo/margin": 226.55740356445312,
|
|
"fcm_dpo/q_t": 0.3239438533782959,
|
|
"grad_norm": 55.98142623901367,
|
|
"learning_rate": 5.251706922648868e-09,
|
|
"logits/chosen": 0.3034379482269287,
|
|
"logits/rejected": 0.32045918703079224,
|
|
"logps/chosen": -527.4705810546875,
|
|
"logps/ref_chosen": -65.68924713134766,
|
|
"logps/ref_rejected": -110.24205017089844,
|
|
"logps/rejected": -798.580810546875,
|
|
"loss": 0.8795,
|
|
"margin_dpo/margin_mean": 226.5574188232422,
|
|
"margin_dpo/margin_std": 269.38568115234375,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9441997063142438,
|
|
"fcm_dpo/beta": 0.0040768347680568695,
|
|
"fcm_dpo/delta": 0.026233568787574768,
|
|
"fcm_dpo/margin": 202.13023376464844,
|
|
"fcm_dpo/q_t": 0.3279660940170288,
|
|
"grad_norm": 47.93162536621094,
|
|
"learning_rate": 4.993270631642038e-09,
|
|
"logits/chosen": 0.41052716970443726,
|
|
"logits/rejected": 0.4136253595352173,
|
|
"logps/chosen": -503.70599365234375,
|
|
"logps/ref_chosen": -51.94999694824219,
|
|
"logps/ref_rejected": -87.46833801269531,
|
|
"logps/rejected": -741.3546142578125,
|
|
"loss": 0.8582,
|
|
"margin_dpo/margin_mean": 202.1302490234375,
|
|
"margin_dpo/margin_std": 190.30067443847656,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9456681350954479,
|
|
"fcm_dpo/beta": 0.004156759940087795,
|
|
"fcm_dpo/delta": 0.03570423275232315,
|
|
"fcm_dpo/margin": 196.5560302734375,
|
|
"fcm_dpo/q_t": 0.33971697092056274,
|
|
"grad_norm": 55.22520446777344,
|
|
"learning_rate": 4.741290495811873e-09,
|
|
"logits/chosen": 0.30671143531799316,
|
|
"logits/rejected": 0.3282683193683624,
|
|
"logps/chosen": -509.8187255859375,
|
|
"logps/ref_chosen": -59.017662048339844,
|
|
"logps/ref_rejected": -87.13668823242188,
|
|
"logps/rejected": -734.4937744140625,
|
|
"loss": 0.9303,
|
|
"margin_dpo/margin_mean": 196.55601501464844,
|
|
"margin_dpo/margin_std": 242.78518676757812,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.947136563876652,
|
|
"fcm_dpo/beta": 0.004250319674611092,
|
|
"fcm_dpo/delta": 0.20495975017547607,
|
|
"fcm_dpo/margin": 155.01222229003906,
|
|
"fcm_dpo/q_t": 0.37516582012176514,
|
|
"grad_norm": 72.1839828491211,
|
|
"learning_rate": 4.495773155069299e-09,
|
|
"logits/chosen": 0.2767280340194702,
|
|
"logits/rejected": 0.2697181701660156,
|
|
"logps/chosen": -518.177978515625,
|
|
"logps/ref_chosen": -55.87602233886719,
|
|
"logps/ref_rejected": -97.78080749511719,
|
|
"logps/rejected": -715.094970703125,
|
|
"loss": 1.0398,
|
|
"margin_dpo/margin_mean": 155.01222229003906,
|
|
"margin_dpo/margin_std": 236.4263153076172,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9486049926578561,
|
|
"fcm_dpo/beta": 0.004299083724617958,
|
|
"fcm_dpo/delta": 0.007297724485397339,
|
|
"fcm_dpo/margin": 195.88906860351562,
|
|
"fcm_dpo/q_t": 0.32827213406562805,
|
|
"grad_norm": 65.47455596923828,
|
|
"learning_rate": 4.256725079024553e-09,
|
|
"logits/chosen": 0.431162565946579,
|
|
"logits/rejected": 0.451549768447876,
|
|
"logps/chosen": -525.3079223632812,
|
|
"logps/ref_chosen": -61.275787353515625,
|
|
"logps/ref_rejected": -77.50580596923828,
|
|
"logps/rejected": -737.427001953125,
|
|
"loss": 0.8659,
|
|
"margin_dpo/margin_mean": 195.88906860351562,
|
|
"margin_dpo/margin_std": 201.49288940429688,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9500734214390602,
|
|
"fcm_dpo/beta": 0.004347026348114014,
|
|
"fcm_dpo/delta": 0.009064847603440285,
|
|
"fcm_dpo/margin": 193.61483764648438,
|
|
"fcm_dpo/q_t": 0.33186376094818115,
|
|
"grad_norm": 48.380271911621094,
|
|
"learning_rate": 4.024152566816791e-09,
|
|
"logits/chosen": 0.3102216124534607,
|
|
"logits/rejected": 0.3185359835624695,
|
|
"logps/chosen": -478.88702392578125,
|
|
"logps/ref_chosen": -54.8524169921875,
|
|
"logps/ref_rejected": -93.5194091796875,
|
|
"logps/rejected": -711.1688232421875,
|
|
"loss": 0.902,
|
|
"margin_dpo/margin_mean": 193.61483764648438,
|
|
"margin_dpo/margin_std": 220.6673583984375,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9515418502202643,
|
|
"fcm_dpo/beta": 0.004186989739537239,
|
|
"fcm_dpo/delta": -0.2561682164669037,
|
|
"fcm_dpo/margin": 257.642578125,
|
|
"fcm_dpo/q_t": 0.30367809534072876,
|
|
"grad_norm": 43.26372528076172,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": 0.40941837430000305,
|
|
"logits/rejected": 0.4284679889678955,
|
|
"logps/chosen": -514.1665649414062,
|
|
"logps/ref_chosen": -54.17146682739258,
|
|
"logps/ref_rejected": -98.7127914428711,
|
|
"logps/rejected": -816.3504638671875,
|
|
"loss": 0.8326,
|
|
"margin_dpo/margin_mean": 257.642578125,
|
|
"margin_dpo/margin_std": 296.09918212890625,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.9530102790014684,
|
|
"fcm_dpo/beta": 0.004244859330356121,
|
|
"fcm_dpo/delta": 0.16276229918003082,
|
|
"fcm_dpo/margin": 164.3397979736328,
|
|
"fcm_dpo/q_t": 0.36515751481056213,
|
|
"grad_norm": 71.0632553100586,
|
|
"learning_rate": 3.5784585771215235e-09,
|
|
"logits/chosen": 0.400471031665802,
|
|
"logits/rejected": 0.4290485382080078,
|
|
"logps/chosen": -526.2083740234375,
|
|
"logps/ref_chosen": -62.480350494384766,
|
|
"logps/ref_rejected": -80.07717895507812,
|
|
"logps/rejected": -708.14501953125,
|
|
"loss": 0.9944,
|
|
"margin_dpo/margin_mean": 164.33978271484375,
|
|
"margin_dpo/margin_std": 230.75076293945312,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.9544787077826725,
|
|
"fcm_dpo/beta": 0.004142679274082184,
|
|
"fcm_dpo/delta": -0.1436581313610077,
|
|
"fcm_dpo/margin": 235.9899139404297,
|
|
"fcm_dpo/q_t": 0.31016072630882263,
|
|
"grad_norm": 54.92049789428711,
|
|
"learning_rate": 3.3653488440851253e-09,
|
|
"logits/chosen": 0.4193887710571289,
|
|
"logits/rejected": 0.43305301666259766,
|
|
"logps/chosen": -511.2436828613281,
|
|
"logps/ref_chosen": -56.09281921386719,
|
|
"logps/ref_rejected": -98.26483917236328,
|
|
"logps/rejected": -789.4055786132812,
|
|
"loss": 0.8367,
|
|
"margin_dpo/margin_mean": 235.98989868164062,
|
|
"margin_dpo/margin_std": 247.71554565429688,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9559471365638766,
|
|
"fcm_dpo/beta": 0.004029122181236744,
|
|
"fcm_dpo/delta": -0.15634776651859283,
|
|
"fcm_dpo/margin": 245.6698455810547,
|
|
"fcm_dpo/q_t": 0.29818981885910034,
|
|
"grad_norm": 70.62873077392578,
|
|
"learning_rate": 3.158738163478475e-09,
|
|
"logits/chosen": 0.38377705216407776,
|
|
"logits/rejected": 0.3832590878009796,
|
|
"logps/chosen": -448.92266845703125,
|
|
"logps/ref_chosen": -43.42544937133789,
|
|
"logps/ref_rejected": -99.95791625976562,
|
|
"logps/rejected": -751.1248779296875,
|
|
"loss": 0.7724,
|
|
"margin_dpo/margin_mean": 245.66983032226562,
|
|
"margin_dpo/margin_std": 212.33193969726562,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9574155653450808,
|
|
"fcm_dpo/beta": 0.004000368528068066,
|
|
"fcm_dpo/delta": -0.031687185168266296,
|
|
"fcm_dpo/margin": 219.70718383789062,
|
|
"fcm_dpo/q_t": 0.32615405321121216,
|
|
"grad_norm": 50.73948287963867,
|
|
"learning_rate": 2.9586319796851555e-09,
|
|
"logits/chosen": 0.328296959400177,
|
|
"logits/rejected": 0.3477246165275574,
|
|
"logps/chosen": -495.43768310546875,
|
|
"logps/ref_chosen": -62.57680892944336,
|
|
"logps/ref_rejected": -111.76779174804688,
|
|
"logps/rejected": -764.3358154296875,
|
|
"loss": 0.8531,
|
|
"margin_dpo/margin_mean": 219.70718383789062,
|
|
"margin_dpo/margin_std": 232.84384155273438,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9588839941262849,
|
|
"fcm_dpo/beta": 0.003977050073444843,
|
|
"fcm_dpo/delta": -0.02044288069009781,
|
|
"fcm_dpo/margin": 218.42001342773438,
|
|
"fcm_dpo/q_t": 0.3240339756011963,
|
|
"grad_norm": 41.00118637084961,
|
|
"learning_rate": 2.7650355656892166e-09,
|
|
"logits/chosen": 0.3624820113182068,
|
|
"logits/rejected": 0.36829936504364014,
|
|
"logps/chosen": -525.9445190429688,
|
|
"logps/ref_chosen": -61.11295700073242,
|
|
"logps/ref_rejected": -103.24960327148438,
|
|
"logps/rejected": -786.501220703125,
|
|
"loss": 0.8455,
|
|
"margin_dpo/margin_mean": 218.42002868652344,
|
|
"margin_dpo/margin_std": 219.990966796875,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.960352422907489,
|
|
"fcm_dpo/beta": 0.004003086127340794,
|
|
"fcm_dpo/delta": 0.099447101354599,
|
|
"fcm_dpo/margin": 189.29266357421875,
|
|
"fcm_dpo/q_t": 0.35448741912841797,
|
|
"grad_norm": 52.74611282348633,
|
|
"learning_rate": 2.577954022936174e-09,
|
|
"logits/chosen": 0.3803136944770813,
|
|
"logits/rejected": 0.3972058892250061,
|
|
"logps/chosen": -546.88037109375,
|
|
"logps/ref_chosen": -61.7281379699707,
|
|
"logps/ref_rejected": -98.7738037109375,
|
|
"logps/rejected": -773.2186889648438,
|
|
"loss": 0.9898,
|
|
"margin_dpo/margin_mean": 189.29266357421875,
|
|
"margin_dpo/margin_std": 264.4116516113281,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9618208516886931,
|
|
"fcm_dpo/beta": 0.0040547847747802734,
|
|
"fcm_dpo/delta": 0.04648479074239731,
|
|
"fcm_dpo/margin": 198.96510314941406,
|
|
"fcm_dpo/q_t": 0.34709632396698,
|
|
"grad_norm": 67.65664672851562,
|
|
"learning_rate": 2.397392281198729e-09,
|
|
"logits/chosen": 0.45737969875335693,
|
|
"logits/rejected": 0.46436363458633423,
|
|
"logps/chosen": -495.4986572265625,
|
|
"logps/ref_chosen": -49.576812744140625,
|
|
"logps/ref_rejected": -98.29183197021484,
|
|
"logps/rejected": -743.1787719726562,
|
|
"loss": 0.9368,
|
|
"margin_dpo/margin_mean": 198.96511840820312,
|
|
"margin_dpo/margin_std": 253.21934509277344,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9632892804698973,
|
|
"fcm_dpo/beta": 0.003890886902809143,
|
|
"fcm_dpo/delta": -0.3531903326511383,
|
|
"fcm_dpo/margin": 298.80157470703125,
|
|
"fcm_dpo/q_t": 0.26101160049438477,
|
|
"grad_norm": 74.69690704345703,
|
|
"learning_rate": 2.223355098446622e-09,
|
|
"logits/chosen": 0.3172380328178406,
|
|
"logits/rejected": 0.3397536277770996,
|
|
"logps/chosen": -491.11737060546875,
|
|
"logps/ref_chosen": -52.54943084716797,
|
|
"logps/ref_rejected": -113.67464447021484,
|
|
"logps/rejected": -851.044189453125,
|
|
"loss": 0.6594,
|
|
"margin_dpo/margin_mean": 298.80157470703125,
|
|
"margin_dpo/margin_std": 208.3951416015625,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9647577092511013,
|
|
"fcm_dpo/beta": 0.0036947480402886868,
|
|
"fcm_dpo/delta": -0.17029432952404022,
|
|
"fcm_dpo/margin": 271.0969543457031,
|
|
"fcm_dpo/q_t": 0.2951889634132385,
|
|
"grad_norm": 44.1884880065918,
|
|
"learning_rate": 2.055847060721566e-09,
|
|
"logits/chosen": 0.4047412574291229,
|
|
"logits/rejected": 0.41397354006767273,
|
|
"logps/chosen": -476.5009460449219,
|
|
"logps/ref_chosen": -46.700538635253906,
|
|
"logps/ref_rejected": -97.91487121582031,
|
|
"logps/rejected": -798.812255859375,
|
|
"loss": 0.7586,
|
|
"margin_dpo/margin_mean": 271.0969543457031,
|
|
"margin_dpo/margin_std": 232.99600219726562,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9662261380323054,
|
|
"fcm_dpo/beta": 0.0036933058872818947,
|
|
"fcm_dpo/delta": 0.026351526379585266,
|
|
"fcm_dpo/margin": 223.59996032714844,
|
|
"fcm_dpo/q_t": 0.32264280319213867,
|
|
"grad_norm": 44.01875305175781,
|
|
"learning_rate": 1.8948725820160662e-09,
|
|
"logits/chosen": 0.4098268747329712,
|
|
"logits/rejected": 0.4359194040298462,
|
|
"logps/chosen": -537.1385498046875,
|
|
"logps/ref_chosen": -60.95820999145508,
|
|
"logps/ref_rejected": -95.93949127197266,
|
|
"logps/rejected": -795.7197875976562,
|
|
"loss": 0.8373,
|
|
"margin_dpo/margin_mean": 223.59996032714844,
|
|
"margin_dpo/margin_std": 197.29067993164062,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9676945668135095,
|
|
"fcm_dpo/beta": 0.00366408284753561,
|
|
"fcm_dpo/delta": -0.029130063951015472,
|
|
"fcm_dpo/margin": 238.9795379638672,
|
|
"fcm_dpo/q_t": 0.31922781467437744,
|
|
"grad_norm": 37.774574279785156,
|
|
"learning_rate": 1.7404359041573723e-09,
|
|
"logits/chosen": 0.2552376389503479,
|
|
"logits/rejected": 0.3021022081375122,
|
|
"logps/chosen": -518.0164794921875,
|
|
"logps/ref_chosen": -76.74298095703125,
|
|
"logps/ref_rejected": -87.4709701538086,
|
|
"logps/rejected": -767.7239990234375,
|
|
"loss": 0.8311,
|
|
"margin_dpo/margin_mean": 238.97952270507812,
|
|
"margin_dpo/margin_std": 225.7298583984375,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9691629955947136,
|
|
"fcm_dpo/beta": 0.0036529982462525368,
|
|
"fcm_dpo/delta": -0.053744181990623474,
|
|
"fcm_dpo/margin": 246.07659912109375,
|
|
"fcm_dpo/q_t": 0.3094879984855652,
|
|
"grad_norm": 46.95237731933594,
|
|
"learning_rate": 1.592541096695571e-09,
|
|
"logits/chosen": 0.4749673008918762,
|
|
"logits/rejected": 0.4998527765274048,
|
|
"logps/chosen": -520.5059814453125,
|
|
"logps/ref_chosen": -59.04788589477539,
|
|
"logps/ref_rejected": -75.96005249023438,
|
|
"logps/rejected": -783.4947509765625,
|
|
"loss": 0.7949,
|
|
"margin_dpo/margin_mean": 246.07659912109375,
|
|
"margin_dpo/margin_std": 207.9267578125,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.9706314243759178,
|
|
"fcm_dpo/beta": 0.0036703585647046566,
|
|
"fcm_dpo/delta": -0.015187568962574005,
|
|
"fcm_dpo/margin": 234.8988037109375,
|
|
"fcm_dpo/q_t": 0.3215470314025879,
|
|
"grad_norm": 45.32716751098633,
|
|
"learning_rate": 1.4511920567963908e-09,
|
|
"logits/chosen": 0.3362319767475128,
|
|
"logits/rejected": 0.3641771674156189,
|
|
"logps/chosen": -475.05731201171875,
|
|
"logps/ref_chosen": -50.673973083496094,
|
|
"logps/ref_rejected": -86.00569152832031,
|
|
"logps/rejected": -745.287841796875,
|
|
"loss": 0.8449,
|
|
"margin_dpo/margin_mean": 234.89878845214844,
|
|
"margin_dpo/margin_std": 223.83566284179688,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.9720998531571219,
|
|
"fcm_dpo/beta": 0.0037050643004477024,
|
|
"fcm_dpo/delta": 0.12543530762195587,
|
|
"fcm_dpo/margin": 197.98480224609375,
|
|
"fcm_dpo/q_t": 0.34611234068870544,
|
|
"grad_norm": 41.252899169921875,
|
|
"learning_rate": 1.3163925091384532e-09,
|
|
"logits/chosen": 0.2565455436706543,
|
|
"logits/rejected": 0.2876202464103699,
|
|
"logps/chosen": -525.746337890625,
|
|
"logps/ref_chosen": -69.26106262207031,
|
|
"logps/ref_rejected": -89.05593872070312,
|
|
"logps/rejected": -743.5260009765625,
|
|
"loss": 0.9211,
|
|
"margin_dpo/margin_mean": 197.98480224609375,
|
|
"margin_dpo/margin_std": 217.1225128173828,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.973568281938326,
|
|
"fcm_dpo/beta": 0.0036978046409785748,
|
|
"fcm_dpo/delta": -0.027755947783589363,
|
|
"fcm_dpo/margin": 236.65377807617188,
|
|
"fcm_dpo/q_t": 0.325141966342926,
|
|
"grad_norm": 35.571556091308594,
|
|
"learning_rate": 1.1881460058152382e-09,
|
|
"logits/chosen": 0.26951783895492554,
|
|
"logits/rejected": 0.2777261435985565,
|
|
"logps/chosen": -485.85986328125,
|
|
"logps/ref_chosen": -64.87890625,
|
|
"logps/ref_rejected": -113.92536926269531,
|
|
"logps/rejected": -771.56005859375,
|
|
"loss": 0.8532,
|
|
"margin_dpo/margin_mean": 236.65377807617188,
|
|
"margin_dpo/margin_std": 255.65853881835938,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.9750367107195301,
|
|
"fcm_dpo/beta": 0.0036702845245599747,
|
|
"fcm_dpo/delta": -0.040541913360357285,
|
|
"fcm_dpo/margin": 241.583984375,
|
|
"fcm_dpo/q_t": 0.31657952070236206,
|
|
"grad_norm": 57.33827209472656,
|
|
"learning_rate": 1.066455926241383e-09,
|
|
"logits/chosen": 0.39595216512680054,
|
|
"logits/rejected": 0.41890090703964233,
|
|
"logps/chosen": -528.522705078125,
|
|
"logps/ref_chosen": -60.88847351074219,
|
|
"logps/ref_rejected": -105.521728515625,
|
|
"logps/rejected": -814.739990234375,
|
|
"loss": 0.856,
|
|
"margin_dpo/margin_mean": 241.583984375,
|
|
"margin_dpo/margin_std": 250.49679565429688,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.9765051395007343,
|
|
"fcm_dpo/beta": 0.0036611277610063553,
|
|
"fcm_dpo/delta": -0.006559427827596664,
|
|
"fcm_dpo/margin": 233.7760009765625,
|
|
"fcm_dpo/q_t": 0.321765661239624,
|
|
"grad_norm": 42.60844039916992,
|
|
"learning_rate": 9.513254770636137e-10,
|
|
"logits/chosen": 0.516608715057373,
|
|
"logits/rejected": 0.5561769604682922,
|
|
"logps/chosen": -504.7494201660156,
|
|
"logps/ref_chosen": -60.56413269042969,
|
|
"logps/ref_rejected": -84.80882263183594,
|
|
"logps/rejected": -762.77001953125,
|
|
"loss": 0.8261,
|
|
"margin_dpo/margin_mean": 233.77598571777344,
|
|
"margin_dpo/margin_std": 211.45077514648438,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.9779735682819384,
|
|
"fcm_dpo/beta": 0.0036832215264439583,
|
|
"fcm_dpo/delta": 0.041882507503032684,
|
|
"fcm_dpo/margin": 220.32644653320312,
|
|
"fcm_dpo/q_t": 0.3285210132598877,
|
|
"grad_norm": 48.15813064575195,
|
|
"learning_rate": 8.427576920763956e-10,
|
|
"logits/chosen": 0.287090539932251,
|
|
"logits/rejected": 0.313105046749115,
|
|
"logps/chosen": -517.513671875,
|
|
"logps/ref_chosen": -64.41996002197266,
|
|
"logps/ref_rejected": -95.8916244506836,
|
|
"logps/rejected": -769.311767578125,
|
|
"loss": 0.8545,
|
|
"margin_dpo/margin_mean": 220.32644653320312,
|
|
"margin_dpo/margin_std": 204.0716094970703,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.9794419970631424,
|
|
"fcm_dpo/beta": 0.0037040242459625006,
|
|
"fcm_dpo/delta": 0.003546607680618763,
|
|
"fcm_dpo/margin": 228.5996856689453,
|
|
"fcm_dpo/q_t": 0.32051968574523926,
|
|
"grad_norm": 49.475914001464844,
|
|
"learning_rate": 7.407554321417764e-10,
|
|
"logits/chosen": 0.30403217673301697,
|
|
"logits/rejected": 0.3480105996131897,
|
|
"logps/chosen": -546.994873046875,
|
|
"logps/ref_chosen": -69.27702331542969,
|
|
"logps/ref_rejected": -87.83549499511719,
|
|
"logps/rejected": -794.1530151367188,
|
|
"loss": 0.8222,
|
|
"margin_dpo/margin_mean": 228.5996856689453,
|
|
"margin_dpo/margin_std": 200.08045959472656,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.9809104258443465,
|
|
"fcm_dpo/beta": 0.0037916412111371756,
|
|
"fcm_dpo/delta": 0.11322961747646332,
|
|
"fcm_dpo/margin": 196.06912231445312,
|
|
"fcm_dpo/q_t": 0.35045015811920166,
|
|
"grad_norm": 81.19744110107422,
|
|
"learning_rate": 6.453213851142225e-10,
|
|
"logits/chosen": 0.3269076347351074,
|
|
"logits/rejected": 0.3389139771461487,
|
|
"logps/chosen": -557.981689453125,
|
|
"logps/ref_chosen": -72.60400390625,
|
|
"logps/ref_rejected": -103.73905944824219,
|
|
"logps/rejected": -785.1858520507812,
|
|
"loss": 0.9587,
|
|
"margin_dpo/margin_mean": 196.06912231445312,
|
|
"margin_dpo/margin_std": 250.09097290039062,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.9823788546255506,
|
|
"fcm_dpo/beta": 0.003718376625329256,
|
|
"fcm_dpo/delta": -0.11621291935443878,
|
|
"fcm_dpo/margin": 256.8287048339844,
|
|
"fcm_dpo/q_t": 0.30499178171157837,
|
|
"grad_norm": 45.1661491394043,
|
|
"learning_rate": 5.564580657695939e-10,
|
|
"logits/chosen": 0.3824569880962372,
|
|
"logits/rejected": 0.4118229150772095,
|
|
"logps/chosen": -461.65716552734375,
|
|
"logps/ref_chosen": -46.116416931152344,
|
|
"logps/ref_rejected": -77.92434692382812,
|
|
"logps/rejected": -750.2938232421875,
|
|
"loss": 0.8171,
|
|
"margin_dpo/margin_mean": 256.82867431640625,
|
|
"margin_dpo/margin_std": 251.70738220214844,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.9838472834067548,
|
|
"fcm_dpo/beta": 0.0036435904912650585,
|
|
"fcm_dpo/delta": -0.1457192599773407,
|
|
"fcm_dpo/margin": 269.37896728515625,
|
|
"fcm_dpo/q_t": 0.30129629373550415,
|
|
"grad_norm": 36.26799774169922,
|
|
"learning_rate": 4.741678157389739e-10,
|
|
"logits/chosen": 0.3282250761985779,
|
|
"logits/rejected": 0.3458458185195923,
|
|
"logps/chosen": -467.46923828125,
|
|
"logps/ref_chosen": -62.34575271606445,
|
|
"logps/ref_rejected": -96.9405517578125,
|
|
"logps/rejected": -771.4429931640625,
|
|
"loss": 0.7877,
|
|
"margin_dpo/margin_mean": 269.3789367675781,
|
|
"margin_dpo/margin_std": 242.67819213867188,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.9853157121879589,
|
|
"fcm_dpo/beta": 0.0036506270989775658,
|
|
"fcm_dpo/delta": 0.05353014916181564,
|
|
"fcm_dpo/margin": 218.90988159179688,
|
|
"fcm_dpo/q_t": 0.3327978253364563,
|
|
"grad_norm": 48.93035888671875,
|
|
"learning_rate": 3.9845280344705245e-10,
|
|
"logits/chosen": 0.42166364192962646,
|
|
"logits/rejected": 0.4403586983680725,
|
|
"logps/chosen": -525.71435546875,
|
|
"logps/ref_chosen": -48.00010681152344,
|
|
"logps/ref_rejected": -83.81932067871094,
|
|
"logps/rejected": -780.4434814453125,
|
|
"loss": 0.8728,
|
|
"margin_dpo/margin_mean": 218.90988159179688,
|
|
"margin_dpo/margin_std": 214.47088623046875,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.986784140969163,
|
|
"fcm_dpo/beta": 0.003691216232255101,
|
|
"fcm_dpo/delta": 0.11230534315109253,
|
|
"fcm_dpo/margin": 202.13328552246094,
|
|
"fcm_dpo/q_t": 0.3466678857803345,
|
|
"grad_norm": 54.89302444458008,
|
|
"learning_rate": 3.293150240547549e-10,
|
|
"logits/chosen": 0.3996291756629944,
|
|
"logits/rejected": 0.4267166554927826,
|
|
"logps/chosen": -560.9033813476562,
|
|
"logps/ref_chosen": -58.58328628540039,
|
|
"logps/ref_rejected": -93.14015197753906,
|
|
"logps/rejected": -797.593505859375,
|
|
"loss": 0.926,
|
|
"margin_dpo/margin_mean": 202.13328552246094,
|
|
"margin_dpo/margin_std": 228.80947875976562,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.9882525697503671,
|
|
"fcm_dpo/beta": 0.0036685578525066376,
|
|
"fcm_dpo/delta": -0.07623796164989471,
|
|
"fcm_dpo/margin": 250.54046630859375,
|
|
"fcm_dpo/q_t": 0.3029751181602478,
|
|
"grad_norm": 41.40584945678711,
|
|
"learning_rate": 2.6675629940689504e-10,
|
|
"logits/chosen": 0.4694506525993347,
|
|
"logits/rejected": 0.4862295389175415,
|
|
"logps/chosen": -493.67596435546875,
|
|
"logps/ref_chosen": -46.72320556640625,
|
|
"logps/ref_rejected": -85.29623413085938,
|
|
"logps/rejected": -782.7894287109375,
|
|
"loss": 0.7762,
|
|
"margin_dpo/margin_mean": 250.54046630859375,
|
|
"margin_dpo/margin_std": 197.22250366210938,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.9897209985315712,
|
|
"fcm_dpo/beta": 0.003621612675487995,
|
|
"fcm_dpo/delta": -0.09082603454589844,
|
|
"fcm_dpo/margin": 257.4602355957031,
|
|
"fcm_dpo/q_t": 0.30964556336402893,
|
|
"grad_norm": 42.31052017211914,
|
|
"learning_rate": 2.1077827798404725e-10,
|
|
"logits/chosen": 0.41855841875076294,
|
|
"logits/rejected": 0.4410872459411621,
|
|
"logps/chosen": -469.55859375,
|
|
"logps/ref_chosen": -45.445526123046875,
|
|
"logps/ref_rejected": -70.04593658447266,
|
|
"logps/rejected": -751.6192626953125,
|
|
"loss": 0.7966,
|
|
"margin_dpo/margin_mean": 257.46026611328125,
|
|
"margin_dpo/margin_std": 237.116455078125,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.9911894273127754,
|
|
"fcm_dpo/beta": 0.0034906486980617046,
|
|
"fcm_dpo/delta": -0.15879766643047333,
|
|
"fcm_dpo/margin": 284.1156005859375,
|
|
"fcm_dpo/q_t": 0.2941362261772156,
|
|
"grad_norm": 45.08475875854492,
|
|
"learning_rate": 1.6138243485910863e-10,
|
|
"logits/chosen": 0.38219112157821655,
|
|
"logits/rejected": 0.4025418162345886,
|
|
"logps/chosen": -472.37396240234375,
|
|
"logps/ref_chosen": -44.17628479003906,
|
|
"logps/ref_rejected": -74.09197998046875,
|
|
"logps/rejected": -786.4052734375,
|
|
"loss": 0.7469,
|
|
"margin_dpo/margin_mean": 284.1156005859375,
|
|
"margin_dpo/margin_std": 227.85464477539062,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.9926578560939795,
|
|
"fcm_dpo/beta": 0.003486907808110118,
|
|
"fcm_dpo/delta": 0.027279622852802277,
|
|
"fcm_dpo/margin": 236.58204650878906,
|
|
"fcm_dpo/q_t": 0.3229207992553711,
|
|
"grad_norm": 32.578102111816406,
|
|
"learning_rate": 1.1857007165852472e-10,
|
|
"logits/chosen": 0.24134598672389984,
|
|
"logits/rejected": 0.27230340242385864,
|
|
"logps/chosen": -521.6865234375,
|
|
"logps/ref_chosen": -71.39852905273438,
|
|
"logps/ref_rejected": -88.3587646484375,
|
|
"logps/rejected": -775.2288208007812,
|
|
"loss": 0.8219,
|
|
"margin_dpo/margin_mean": 236.58204650878906,
|
|
"margin_dpo/margin_std": 195.95883178710938,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.9941262848751835,
|
|
"fcm_dpo/beta": 0.0034925828222185373,
|
|
"fcm_dpo/delta": -0.01529073528945446,
|
|
"fcm_dpo/margin": 247.37342834472656,
|
|
"fcm_dpo/q_t": 0.3151392936706543,
|
|
"grad_norm": 44.7586669921875,
|
|
"learning_rate": 8.23423165278725e-11,
|
|
"logits/chosen": 0.48236775398254395,
|
|
"logits/rejected": 0.522483229637146,
|
|
"logps/chosen": -513.5480346679688,
|
|
"logps/ref_chosen": -56.527435302734375,
|
|
"logps/ref_rejected": -78.22654724121094,
|
|
"logps/rejected": -782.62060546875,
|
|
"loss": 0.8038,
|
|
"margin_dpo/margin_mean": 247.37344360351562,
|
|
"margin_dpo/margin_std": 200.0,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.9955947136563876,
|
|
"fcm_dpo/beta": 0.0034883874468505383,
|
|
"fcm_dpo/delta": -0.0267241969704628,
|
|
"fcm_dpo/margin": 250.60003662109375,
|
|
"fcm_dpo/q_t": 0.32206088304519653,
|
|
"grad_norm": 45.41565704345703,
|
|
"learning_rate": 5.270012410216185e-11,
|
|
"logits/chosen": 0.40671655535697937,
|
|
"logits/rejected": 0.4284959137439728,
|
|
"logps/chosen": -485.13336181640625,
|
|
"logps/ref_chosen": -46.13447570800781,
|
|
"logps/ref_rejected": -80.60462951660156,
|
|
"logps/rejected": -770.2034912109375,
|
|
"loss": 0.8466,
|
|
"margin_dpo/margin_mean": 250.60003662109375,
|
|
"margin_dpo/margin_std": 250.03765869140625,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.9970631424375918,
|
|
"fcm_dpo/beta": 0.0035341887269169092,
|
|
"fcm_dpo/delta": 0.12530364096164703,
|
|
"fcm_dpo/margin": 207.66168212890625,
|
|
"fcm_dpo/q_t": 0.3447020351886749,
|
|
"grad_norm": 40.24350357055664,
|
|
"learning_rate": 2.9644275480772416e-11,
|
|
"logits/chosen": 0.3600369691848755,
|
|
"logits/rejected": 0.37837547063827515,
|
|
"logps/chosen": -492.26739501953125,
|
|
"logps/ref_chosen": -50.294921875,
|
|
"logps/ref_rejected": -76.59813690185547,
|
|
"logps/rejected": -726.2322998046875,
|
|
"loss": 0.8974,
|
|
"margin_dpo/margin_mean": 207.66168212890625,
|
|
"margin_dpo/margin_std": 209.05560302734375,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.9985315712187959,
|
|
"fcm_dpo/beta": 0.0035505383275449276,
|
|
"fcm_dpo/delta": -0.02245294116437435,
|
|
"fcm_dpo/margin": 245.17251586914062,
|
|
"fcm_dpo/q_t": 0.3214852809906006,
|
|
"grad_norm": 37.78384780883789,
|
|
"learning_rate": 1.31753782067201e-11,
|
|
"logits/chosen": 0.29616981744766235,
|
|
"logits/rejected": 0.32709792256355286,
|
|
"logps/chosen": -534.2918090820312,
|
|
"logps/ref_chosen": -76.91569519042969,
|
|
"logps/ref_rejected": -112.384765625,
|
|
"logps/rejected": -814.933349609375,
|
|
"loss": 0.8629,
|
|
"margin_dpo/margin_mean": 245.17251586914062,
|
|
"margin_dpo/margin_std": 256.5103759765625,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"fcm_dpo/beta": 0.003594727721065283,
|
|
"fcm_dpo/delta": 0.04884641245007515,
|
|
"fcm_dpo/margin": 223.56224060058594,
|
|
"fcm_dpo/q_t": 0.334045946598053,
|
|
"grad_norm": 73.19285583496094,
|
|
"learning_rate": 3.2938662507808745e-12,
|
|
"logits/chosen": 0.3444863557815552,
|
|
"logits/rejected": 0.3690025806427002,
|
|
"logps/chosen": -525.255615234375,
|
|
"logps/ref_chosen": -60.957279205322266,
|
|
"logps/ref_rejected": -88.55797576904297,
|
|
"logps/rejected": -776.4185791015625,
|
|
"loss": 0.8767,
|
|
"margin_dpo/margin_mean": 223.562255859375,
|
|
"margin_dpo/margin_std": 224.77215576171875,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 681,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.8810622134572784,
|
|
"train_runtime": 1867.9067,
|
|
"train_samples_per_second": 23.341,
|
|
"train_steps_per_second": 0.365
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 681,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|