Model: jackf857/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-q_t-0.5-s_star-0.4 Source: Original Platform
13034 lines
480 KiB
JSON
13034 lines
480 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 200,
|
|
"global_step": 681,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0014684287812041115,
|
|
"fcm_dpo/beta": 0.1020384356379509,
|
|
"fcm_dpo/delta": 0.19979780912399292,
|
|
"fcm_dpo/margin": -0.02287006378173828,
|
|
"fcm_dpo/q_t": 0.5005706548690796,
|
|
"grad_norm": 85.29718780517578,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.4974287748336792,
|
|
"logits/rejected": -0.43299180269241333,
|
|
"logps/chosen": -50.1435661315918,
|
|
"logps/ref_chosen": -50.14883804321289,
|
|
"logps/ref_rejected": -74.1280517578125,
|
|
"logps/rejected": -74.09991455078125,
|
|
"loss": 1.389,
|
|
"margin_dpo/margin_mean": -0.02287048101425171,
|
|
"margin_dpo/margin_std": 0.41920793056488037,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.002936857562408223,
|
|
"fcm_dpo/beta": 0.10407686978578568,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06572261452674866,
|
|
"fcm_dpo/q_t": 0.501709520816803,
|
|
"grad_norm": 75.22118377685547,
|
|
"learning_rate": 7.246376811594203e-09,
|
|
"logits/chosen": -0.49536412954330444,
|
|
"logits/rejected": -0.4594460427761078,
|
|
"logps/chosen": -52.65568923950195,
|
|
"logps/ref_chosen": -52.620704650878906,
|
|
"logps/ref_rejected": -75.30413818359375,
|
|
"logps/rejected": -75.27340698242188,
|
|
"loss": 1.3935,
|
|
"margin_dpo/margin_mean": -0.06572240591049194,
|
|
"margin_dpo/margin_std": 0.35048407316207886,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.004405286343612335,
|
|
"fcm_dpo/beta": 0.10832422226667404,
|
|
"fcm_dpo/delta": 0.1999952644109726,
|
|
"fcm_dpo/margin": -0.004782050848007202,
|
|
"fcm_dpo/q_t": 0.500129759311676,
|
|
"grad_norm": 76.9161376953125,
|
|
"learning_rate": 1.4492753623188406e-08,
|
|
"logits/chosen": -0.4816010594367981,
|
|
"logits/rejected": -0.44217073917388916,
|
|
"logps/chosen": -60.95306396484375,
|
|
"logps/ref_chosen": -60.981597900390625,
|
|
"logps/ref_rejected": -68.67259216308594,
|
|
"logps/rejected": -68.6392822265625,
|
|
"loss": 1.3872,
|
|
"margin_dpo/margin_mean": -0.004781663417816162,
|
|
"margin_dpo/margin_std": 0.33950307965278625,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.005873715124816446,
|
|
"fcm_dpo/beta": 0.10832422226667404,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06117314100265503,
|
|
"fcm_dpo/q_t": 0.501656711101532,
|
|
"grad_norm": 78.19353485107422,
|
|
"learning_rate": 2.1739130434782606e-08,
|
|
"logits/chosen": -0.4680081605911255,
|
|
"logits/rejected": -0.44041645526885986,
|
|
"logps/chosen": -56.78364181518555,
|
|
"logps/ref_chosen": -56.7677116394043,
|
|
"logps/ref_rejected": -86.64710998535156,
|
|
"logps/rejected": -86.60186767578125,
|
|
"loss": 1.3934,
|
|
"margin_dpo/margin_mean": -0.06117379665374756,
|
|
"margin_dpo/margin_std": 0.3837040364742279,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007342143906020558,
|
|
"fcm_dpo/beta": 0.11499093472957611,
|
|
"fcm_dpo/delta": 0.3965454697608948,
|
|
"fcm_dpo/margin": 0.031099945306777954,
|
|
"fcm_dpo/q_t": 0.49913665652275085,
|
|
"grad_norm": 103.3542709350586,
|
|
"learning_rate": 2.898550724637681e-08,
|
|
"logits/chosen": -0.5145474672317505,
|
|
"logits/rejected": -0.47077202796936035,
|
|
"logps/chosen": -53.81591033935547,
|
|
"logps/ref_chosen": -53.859375,
|
|
"logps/ref_rejected": -84.14918518066406,
|
|
"logps/rejected": -84.1368179321289,
|
|
"loss": 1.3831,
|
|
"margin_dpo/margin_mean": 0.031100064516067505,
|
|
"margin_dpo/margin_std": 0.32387232780456543,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.00881057268722467,
|
|
"fcm_dpo/beta": 0.12198945879936218,
|
|
"fcm_dpo/delta": 0.19748398661613464,
|
|
"fcm_dpo/margin": -0.011603772640228271,
|
|
"fcm_dpo/q_t": 0.5003781318664551,
|
|
"grad_norm": 112.3940658569336,
|
|
"learning_rate": 3.6231884057971014e-08,
|
|
"logits/chosen": -0.5098507404327393,
|
|
"logits/rejected": -0.4680579900741577,
|
|
"logps/chosen": -63.016353607177734,
|
|
"logps/ref_chosen": -63.007484436035156,
|
|
"logps/ref_rejected": -92.64534759521484,
|
|
"logps/rejected": -92.64260864257812,
|
|
"loss": 1.3883,
|
|
"margin_dpo/margin_mean": -0.011603862047195435,
|
|
"margin_dpo/margin_std": 0.39160168170928955,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010279001468428781,
|
|
"fcm_dpo/beta": 0.12692973017692566,
|
|
"fcm_dpo/delta": 0.19849498569965363,
|
|
"fcm_dpo/margin": 0.008680760860443115,
|
|
"fcm_dpo/q_t": 0.4997381567955017,
|
|
"grad_norm": 104.92411041259766,
|
|
"learning_rate": 4.347826086956521e-08,
|
|
"logits/chosen": -0.49692052602767944,
|
|
"logits/rejected": -0.4630710482597351,
|
|
"logps/chosen": -57.742652893066406,
|
|
"logps/ref_chosen": -57.774818420410156,
|
|
"logps/ref_rejected": -103.92059326171875,
|
|
"logps/rejected": -103.89710235595703,
|
|
"loss": 1.386,
|
|
"margin_dpo/margin_mean": 0.008680880069732666,
|
|
"margin_dpo/margin_std": 0.43539559841156006,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.011747430249632892,
|
|
"fcm_dpo/beta": 0.13201940059661865,
|
|
"fcm_dpo/delta": 0.19657650589942932,
|
|
"fcm_dpo/margin": -0.028942912817001343,
|
|
"fcm_dpo/q_t": 0.5009875297546387,
|
|
"grad_norm": 103.702880859375,
|
|
"learning_rate": 5.0724637681159424e-08,
|
|
"logits/chosen": -0.5114612579345703,
|
|
"logits/rejected": -0.48591524362564087,
|
|
"logps/chosen": -58.68933868408203,
|
|
"logps/ref_chosen": -58.716033935546875,
|
|
"logps/ref_rejected": -79.3114242553711,
|
|
"logps/rejected": -79.25579071044922,
|
|
"loss": 1.3909,
|
|
"margin_dpo/margin_mean": -0.028942614793777466,
|
|
"margin_dpo/margin_std": 0.4058513045310974,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013215859030837005,
|
|
"fcm_dpo/beta": 0.13730010390281677,
|
|
"fcm_dpo/delta": 0.1961011439561844,
|
|
"fcm_dpo/margin": 0.01041179895401001,
|
|
"fcm_dpo/q_t": 0.4996810555458069,
|
|
"grad_norm": 117.20587921142578,
|
|
"learning_rate": 5.797101449275362e-08,
|
|
"logits/chosen": -0.4901723265647888,
|
|
"logits/rejected": -0.44332075119018555,
|
|
"logps/chosen": -69.85332489013672,
|
|
"logps/ref_chosen": -69.8668441772461,
|
|
"logps/ref_rejected": -99.6026611328125,
|
|
"logps/rejected": -99.59955596923828,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.010411262512207031,
|
|
"margin_dpo/margin_std": 0.4200562834739685,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.014684287812041116,
|
|
"fcm_dpo/beta": 0.1400114893913269,
|
|
"fcm_dpo/delta": 0.1936788260936737,
|
|
"fcm_dpo/margin": 0.01942703127861023,
|
|
"fcm_dpo/q_t": 0.49933522939682007,
|
|
"grad_norm": 99.37556457519531,
|
|
"learning_rate": 6.521739130434782e-08,
|
|
"logits/chosen": -0.48142820596694946,
|
|
"logits/rejected": -0.43749985098838806,
|
|
"logps/chosen": -48.3685302734375,
|
|
"logps/ref_chosen": -48.35768508911133,
|
|
"logps/ref_rejected": -80.37206268310547,
|
|
"logps/rejected": -80.40232849121094,
|
|
"loss": 1.3841,
|
|
"margin_dpo/margin_mean": 0.019427448511123657,
|
|
"margin_dpo/margin_std": 0.374165415763855,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016152716593245228,
|
|
"fcm_dpo/beta": 0.14840584993362427,
|
|
"fcm_dpo/delta": 0.1952294558286667,
|
|
"fcm_dpo/margin": 0.02777162194252014,
|
|
"fcm_dpo/q_t": 0.49901843070983887,
|
|
"grad_norm": 101.62715911865234,
|
|
"learning_rate": 7.246376811594203e-08,
|
|
"logits/chosen": -0.4318368434906006,
|
|
"logits/rejected": -0.40546509623527527,
|
|
"logps/chosen": -52.996986389160156,
|
|
"logps/ref_chosen": -53.01685333251953,
|
|
"logps/ref_rejected": -87.78038024902344,
|
|
"logps/rejected": -87.78828430175781,
|
|
"loss": 1.3829,
|
|
"margin_dpo/margin_mean": 0.02777191996574402,
|
|
"margin_dpo/margin_std": 0.362691193819046,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.01762114537444934,
|
|
"fcm_dpo/beta": 0.15135988593101501,
|
|
"fcm_dpo/delta": 0.19519127905368805,
|
|
"fcm_dpo/margin": 0.01589415967464447,
|
|
"fcm_dpo/q_t": 0.4994090497493744,
|
|
"grad_norm": 140.9950408935547,
|
|
"learning_rate": 7.971014492753623e-08,
|
|
"logits/chosen": -0.5231326818466187,
|
|
"logits/rejected": -0.486427366733551,
|
|
"logps/chosen": -61.79826354980469,
|
|
"logps/ref_chosen": -61.80543518066406,
|
|
"logps/ref_rejected": -104.8582763671875,
|
|
"logps/rejected": -104.86698913574219,
|
|
"loss": 1.3844,
|
|
"margin_dpo/margin_mean": 0.01589377224445343,
|
|
"margin_dpo/margin_std": 0.3336790204048157,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.01908957415565345,
|
|
"fcm_dpo/beta": 0.163808211684227,
|
|
"fcm_dpo/delta": 0.39621487259864807,
|
|
"fcm_dpo/margin": 0.023847192525863647,
|
|
"fcm_dpo/q_t": 0.49905675649642944,
|
|
"grad_norm": 129.1976318359375,
|
|
"learning_rate": 8.695652173913042e-08,
|
|
"logits/chosen": -0.4889492690563202,
|
|
"logits/rejected": -0.4614258408546448,
|
|
"logps/chosen": -64.2720718383789,
|
|
"logps/ref_chosen": -64.2603530883789,
|
|
"logps/ref_rejected": -87.20307922363281,
|
|
"logps/rejected": -87.23863983154297,
|
|
"loss": 1.3831,
|
|
"margin_dpo/margin_mean": 0.023847103118896484,
|
|
"margin_dpo/margin_std": 0.31533756852149963,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.020558002936857563,
|
|
"fcm_dpo/beta": 0.1737682819366455,
|
|
"fcm_dpo/delta": 0.19745339453220367,
|
|
"fcm_dpo/margin": 5.2809715270996094e-05,
|
|
"fcm_dpo/q_t": 0.5000207424163818,
|
|
"grad_norm": 148.0649871826172,
|
|
"learning_rate": 9.420289855072464e-08,
|
|
"logits/chosen": -0.48691490292549133,
|
|
"logits/rejected": -0.4480026960372925,
|
|
"logps/chosen": -58.149009704589844,
|
|
"logps/ref_chosen": -58.11021041870117,
|
|
"logps/ref_rejected": -104.04708099365234,
|
|
"logps/rejected": -104.08592224121094,
|
|
"loss": 1.3876,
|
|
"margin_dpo/margin_mean": 5.313754081726074e-05,
|
|
"margin_dpo/margin_std": 0.41946524381637573,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022026431718061675,
|
|
"fcm_dpo/beta": 0.1737682819366455,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.01620301604270935,
|
|
"fcm_dpo/q_t": 0.5006987452507019,
|
|
"grad_norm": 112.28044891357422,
|
|
"learning_rate": 1.0144927536231885e-07,
|
|
"logits/chosen": -0.5082442164421082,
|
|
"logits/rejected": -0.49048274755477905,
|
|
"logps/chosen": -57.02097702026367,
|
|
"logps/ref_chosen": -56.96691131591797,
|
|
"logps/ref_rejected": -80.80863952636719,
|
|
"logps/rejected": -80.84651184082031,
|
|
"loss": 1.3902,
|
|
"margin_dpo/margin_mean": -0.01620301604270935,
|
|
"margin_dpo/margin_std": 0.3850763142108917,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.023494860499265784,
|
|
"fcm_dpo/beta": 0.18400363624095917,
|
|
"fcm_dpo/delta": 0.3830709159374237,
|
|
"fcm_dpo/margin": 0.09651938080787659,
|
|
"fcm_dpo/q_t": 0.4957681894302368,
|
|
"grad_norm": 152.3389892578125,
|
|
"learning_rate": 1.0869565217391303e-07,
|
|
"logits/chosen": -0.5282368659973145,
|
|
"logits/rejected": -0.4875671863555908,
|
|
"logps/chosen": -61.703330993652344,
|
|
"logps/ref_chosen": -61.739891052246094,
|
|
"logps/ref_rejected": -84.36947631835938,
|
|
"logps/rejected": -84.42942810058594,
|
|
"loss": 1.3699,
|
|
"margin_dpo/margin_mean": 0.09651932120323181,
|
|
"margin_dpo/margin_std": 0.3430694341659546,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.024963289280469897,
|
|
"fcm_dpo/beta": 0.19130608439445496,
|
|
"fcm_dpo/delta": 0.19350671768188477,
|
|
"fcm_dpo/margin": 0.0113239586353302,
|
|
"fcm_dpo/q_t": 0.4994708001613617,
|
|
"grad_norm": 150.90025329589844,
|
|
"learning_rate": 1.1594202898550725e-07,
|
|
"logits/chosen": -0.4968636631965637,
|
|
"logits/rejected": -0.4591953158378601,
|
|
"logps/chosen": -67.68939208984375,
|
|
"logps/ref_chosen": -67.71033477783203,
|
|
"logps/ref_rejected": -85.37865447998047,
|
|
"logps/rejected": -85.36903381347656,
|
|
"loss": 1.3851,
|
|
"margin_dpo/margin_mean": 0.011324524879455566,
|
|
"margin_dpo/margin_std": 0.35448014736175537,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.02643171806167401,
|
|
"fcm_dpo/beta": 0.19877119362354279,
|
|
"fcm_dpo/delta": 0.18936890363693237,
|
|
"fcm_dpo/margin": 0.026239663362503052,
|
|
"fcm_dpo/q_t": 0.49871736764907837,
|
|
"grad_norm": 163.8603515625,
|
|
"learning_rate": 1.2318840579710146e-07,
|
|
"logits/chosen": -0.48526573181152344,
|
|
"logits/rejected": -0.4287734031677246,
|
|
"logps/chosen": -47.76115417480469,
|
|
"logps/ref_chosen": -47.7394905090332,
|
|
"logps/ref_rejected": -75.4722900390625,
|
|
"logps/rejected": -75.52019500732422,
|
|
"loss": 1.3818,
|
|
"margin_dpo/margin_mean": 0.026239246129989624,
|
|
"margin_dpo/margin_std": 0.30841344594955444,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.027900146842878122,
|
|
"fcm_dpo/beta": 0.2146751880645752,
|
|
"fcm_dpo/delta": 0.3876880407333374,
|
|
"fcm_dpo/margin": 0.05988234281539917,
|
|
"fcm_dpo/q_t": 0.49692660570144653,
|
|
"grad_norm": 158.2223663330078,
|
|
"learning_rate": 1.3043478260869563e-07,
|
|
"logits/chosen": -0.4940911531448364,
|
|
"logits/rejected": -0.4445168375968933,
|
|
"logps/chosen": -70.18385314941406,
|
|
"logps/ref_chosen": -70.20536041259766,
|
|
"logps/ref_rejected": -89.7575912475586,
|
|
"logps/rejected": -89.79597473144531,
|
|
"loss": 1.3745,
|
|
"margin_dpo/margin_mean": 0.05988246202468872,
|
|
"margin_dpo/margin_std": 0.2845911383628845,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.02936857562408223,
|
|
"fcm_dpo/beta": 0.23141203820705414,
|
|
"fcm_dpo/delta": 0.3657988905906677,
|
|
"fcm_dpo/margin": 0.1521126627922058,
|
|
"fcm_dpo/q_t": 0.4914783835411072,
|
|
"grad_norm": 166.44786071777344,
|
|
"learning_rate": 1.3768115942028986e-07,
|
|
"logits/chosen": -0.5221278667449951,
|
|
"logits/rejected": -0.4627057909965515,
|
|
"logps/chosen": -50.78226089477539,
|
|
"logps/ref_chosen": -50.80324172973633,
|
|
"logps/ref_rejected": -78.82334899902344,
|
|
"logps/rejected": -78.9544677734375,
|
|
"loss": 1.3531,
|
|
"margin_dpo/margin_mean": 0.15211281180381775,
|
|
"margin_dpo/margin_std": 0.3802841305732727,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.030837004405286344,
|
|
"fcm_dpo/beta": 0.24898943305015564,
|
|
"fcm_dpo/delta": 0.3725013732910156,
|
|
"fcm_dpo/margin": 0.11505882441997528,
|
|
"fcm_dpo/q_t": 0.49314409494400024,
|
|
"grad_norm": 189.82945251464844,
|
|
"learning_rate": 1.4492753623188405e-07,
|
|
"logits/chosen": -0.5031737685203552,
|
|
"logits/rejected": -0.4799532890319824,
|
|
"logps/chosen": -50.0748291015625,
|
|
"logps/ref_chosen": -50.063018798828125,
|
|
"logps/ref_rejected": -77.86878967285156,
|
|
"logps/rejected": -77.99566650390625,
|
|
"loss": 1.3604,
|
|
"margin_dpo/margin_mean": 0.11505846679210663,
|
|
"margin_dpo/margin_std": 0.3957793712615967,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.032305433186490456,
|
|
"fcm_dpo/beta": 0.26710399985313416,
|
|
"fcm_dpo/delta": 0.3357844650745392,
|
|
"fcm_dpo/margin": 0.24726280570030212,
|
|
"fcm_dpo/q_t": 0.48401886224746704,
|
|
"grad_norm": 218.20628356933594,
|
|
"learning_rate": 1.5217391304347825e-07,
|
|
"logits/chosen": -0.485355019569397,
|
|
"logits/rejected": -0.44164812564849854,
|
|
"logps/chosen": -59.00285339355469,
|
|
"logps/ref_chosen": -59.05763626098633,
|
|
"logps/ref_rejected": -97.50466918945312,
|
|
"logps/rejected": -97.69715118408203,
|
|
"loss": 1.3248,
|
|
"margin_dpo/margin_mean": 0.247263103723526,
|
|
"margin_dpo/margin_std": 0.45322418212890625,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.033773861967694566,
|
|
"fcm_dpo/beta": 0.2858576774597168,
|
|
"fcm_dpo/delta": 0.34957826137542725,
|
|
"fcm_dpo/margin": 0.18317532539367676,
|
|
"fcm_dpo/q_t": 0.4874514639377594,
|
|
"grad_norm": 224.35922241210938,
|
|
"learning_rate": 1.5942028985507245e-07,
|
|
"logits/chosen": -0.4880913197994232,
|
|
"logits/rejected": -0.46549102663993835,
|
|
"logps/chosen": -60.035518646240234,
|
|
"logps/ref_chosen": -60.07769775390625,
|
|
"logps/ref_rejected": -81.13955688476562,
|
|
"logps/rejected": -81.28054809570312,
|
|
"loss": 1.3377,
|
|
"margin_dpo/margin_mean": 0.18317526578903198,
|
|
"margin_dpo/margin_std": 0.37381279468536377,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.03524229074889868,
|
|
"fcm_dpo/beta": 0.30613917112350464,
|
|
"fcm_dpo/delta": 0.33740469813346863,
|
|
"fcm_dpo/margin": 0.21142138540744781,
|
|
"fcm_dpo/q_t": 0.48438760638237,
|
|
"grad_norm": 254.505615234375,
|
|
"learning_rate": 1.6666666666666665e-07,
|
|
"logits/chosen": -0.5014858245849609,
|
|
"logits/rejected": -0.4851001501083374,
|
|
"logps/chosen": -44.279727935791016,
|
|
"logps/ref_chosen": -44.29103469848633,
|
|
"logps/ref_rejected": -99.12521362304688,
|
|
"logps/rejected": -99.32533264160156,
|
|
"loss": 1.3256,
|
|
"margin_dpo/margin_mean": 0.21142247319221497,
|
|
"margin_dpo/margin_std": 0.35174310207366943,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03671071953010279,
|
|
"fcm_dpo/beta": 0.3263891637325287,
|
|
"fcm_dpo/delta": 0.31944698095321655,
|
|
"fcm_dpo/margin": 0.25548607110977173,
|
|
"fcm_dpo/q_t": 0.4799500107765198,
|
|
"grad_norm": 231.55694580078125,
|
|
"learning_rate": 1.7391304347826085e-07,
|
|
"logits/chosen": -0.5223193168640137,
|
|
"logits/rejected": -0.49323010444641113,
|
|
"logps/chosen": -52.49665069580078,
|
|
"logps/ref_chosen": -52.537052154541016,
|
|
"logps/ref_rejected": -89.34219360351562,
|
|
"logps/rejected": -89.55728149414062,
|
|
"loss": 1.309,
|
|
"margin_dpo/margin_mean": 0.2554857134819031,
|
|
"margin_dpo/margin_std": 0.39272648096084595,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0381791483113069,
|
|
"fcm_dpo/beta": 0.3455832004547119,
|
|
"fcm_dpo/delta": 0.25443094968795776,
|
|
"fcm_dpo/margin": 0.4299333393573761,
|
|
"fcm_dpo/q_t": 0.4642295241355896,
|
|
"grad_norm": 271.15667724609375,
|
|
"learning_rate": 1.8115942028985507e-07,
|
|
"logits/chosen": -0.5332764387130737,
|
|
"logits/rejected": -0.5016044974327087,
|
|
"logps/chosen": -53.841529846191406,
|
|
"logps/ref_chosen": -53.92280578613281,
|
|
"logps/ref_rejected": -103.35971069335938,
|
|
"logps/rejected": -103.70835876464844,
|
|
"loss": 1.2531,
|
|
"margin_dpo/margin_mean": 0.4299335479736328,
|
|
"margin_dpo/margin_std": 0.5525455474853516,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.039647577092511016,
|
|
"fcm_dpo/beta": 0.3615337014198303,
|
|
"fcm_dpo/delta": 0.23555167019367218,
|
|
"fcm_dpo/margin": 0.4660835266113281,
|
|
"fcm_dpo/q_t": 0.45947709679603577,
|
|
"grad_norm": 305.2655944824219,
|
|
"learning_rate": 1.8840579710144927e-07,
|
|
"logits/chosen": -0.4840855598449707,
|
|
"logits/rejected": -0.44715797901153564,
|
|
"logps/chosen": -42.79102325439453,
|
|
"logps/ref_chosen": -42.898529052734375,
|
|
"logps/ref_rejected": -98.72419738769531,
|
|
"logps/rejected": -99.08277893066406,
|
|
"loss": 1.2344,
|
|
"margin_dpo/margin_mean": 0.46608299016952515,
|
|
"margin_dpo/margin_std": 0.5391252636909485,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.041116005873715125,
|
|
"fcm_dpo/beta": 0.3825129270553589,
|
|
"fcm_dpo/delta": 0.2807585895061493,
|
|
"fcm_dpo/margin": 0.31892046332359314,
|
|
"fcm_dpo/q_t": 0.4705425500869751,
|
|
"grad_norm": 266.1156921386719,
|
|
"learning_rate": 1.9565217391304347e-07,
|
|
"logits/chosen": -0.49522364139556885,
|
|
"logits/rejected": -0.4389377236366272,
|
|
"logps/chosen": -60.54632568359375,
|
|
"logps/ref_chosen": -60.55650329589844,
|
|
"logps/ref_rejected": -91.40111541748047,
|
|
"logps/rejected": -91.7098617553711,
|
|
"loss": 1.2817,
|
|
"margin_dpo/margin_mean": 0.3189205825328827,
|
|
"margin_dpo/margin_std": 0.6154531836509705,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.042584434654919234,
|
|
"fcm_dpo/beta": 0.39745935797691345,
|
|
"fcm_dpo/delta": 0.17806152999401093,
|
|
"fcm_dpo/margin": 0.5686274766921997,
|
|
"fcm_dpo/q_t": 0.4454106092453003,
|
|
"grad_norm": 315.0572814941406,
|
|
"learning_rate": 2.028985507246377e-07,
|
|
"logits/chosen": -0.5572994947433472,
|
|
"logits/rejected": -0.5113492608070374,
|
|
"logps/chosen": -57.70075225830078,
|
|
"logps/ref_chosen": -57.80778503417969,
|
|
"logps/ref_rejected": -97.39434814453125,
|
|
"logps/rejected": -97.85594177246094,
|
|
"loss": 1.1846,
|
|
"margin_dpo/margin_mean": 0.5686285495758057,
|
|
"margin_dpo/margin_std": 0.544101357460022,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.04405286343612335,
|
|
"fcm_dpo/beta": 0.4080265164375305,
|
|
"fcm_dpo/delta": 0.11794110387563705,
|
|
"fcm_dpo/margin": 0.6996808648109436,
|
|
"fcm_dpo/q_t": 0.4310336709022522,
|
|
"grad_norm": 297.2781677246094,
|
|
"learning_rate": 2.1014492753623187e-07,
|
|
"logits/chosen": -0.5077843070030212,
|
|
"logits/rejected": -0.4774767756462097,
|
|
"logps/chosen": -52.4200553894043,
|
|
"logps/ref_chosen": -52.577369689941406,
|
|
"logps/ref_rejected": -98.48920440673828,
|
|
"logps/rejected": -99.03157043457031,
|
|
"loss": 1.136,
|
|
"margin_dpo/margin_mean": 0.6996806859970093,
|
|
"margin_dpo/margin_std": 0.6045821905136108,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04552129221732746,
|
|
"fcm_dpo/beta": 0.42595115303993225,
|
|
"fcm_dpo/delta": 0.2233620285987854,
|
|
"fcm_dpo/margin": 0.42216721177101135,
|
|
"fcm_dpo/q_t": 0.45694005489349365,
|
|
"grad_norm": 259.3175048828125,
|
|
"learning_rate": 2.1739130434782607e-07,
|
|
"logits/chosen": -0.5219802856445312,
|
|
"logits/rejected": -0.4791126251220703,
|
|
"logps/chosen": -63.74620819091797,
|
|
"logps/ref_chosen": -63.806922912597656,
|
|
"logps/ref_rejected": -72.89400482177734,
|
|
"logps/rejected": -73.25546264648438,
|
|
"loss": 1.2309,
|
|
"margin_dpo/margin_mean": 0.4221669137477875,
|
|
"margin_dpo/margin_std": 0.6024155616760254,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04698972099853157,
|
|
"fcm_dpo/beta": 0.4344155192375183,
|
|
"fcm_dpo/delta": 0.08483266085386276,
|
|
"fcm_dpo/margin": 0.7318711280822754,
|
|
"fcm_dpo/q_t": 0.42483460903167725,
|
|
"grad_norm": 283.7579040527344,
|
|
"learning_rate": 2.2463768115942027e-07,
|
|
"logits/chosen": -0.5009861588478088,
|
|
"logits/rejected": -0.45893198251724243,
|
|
"logps/chosen": -62.55799865722656,
|
|
"logps/ref_chosen": -62.739524841308594,
|
|
"logps/ref_rejected": -89.3175048828125,
|
|
"logps/rejected": -89.86784362792969,
|
|
"loss": 1.1257,
|
|
"margin_dpo/margin_mean": 0.7318712472915649,
|
|
"margin_dpo/margin_std": 0.8441380262374878,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.048458149779735685,
|
|
"fcm_dpo/beta": 0.44722574949264526,
|
|
"fcm_dpo/delta": 0.138888880610466,
|
|
"fcm_dpo/margin": 0.5908744931221008,
|
|
"fcm_dpo/q_t": 0.43646401166915894,
|
|
"grad_norm": 274.2132873535156,
|
|
"learning_rate": 2.318840579710145e-07,
|
|
"logits/chosen": -0.5129827260971069,
|
|
"logits/rejected": -0.48746123909950256,
|
|
"logps/chosen": -53.16265106201172,
|
|
"logps/ref_chosen": -53.26097106933594,
|
|
"logps/ref_rejected": -87.8851318359375,
|
|
"logps/rejected": -88.377685546875,
|
|
"loss": 1.1604,
|
|
"margin_dpo/margin_mean": 0.5908748507499695,
|
|
"margin_dpo/margin_std": 0.6484410166740417,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.049926578560939794,
|
|
"fcm_dpo/beta": 0.45357927680015564,
|
|
"fcm_dpo/delta": 0.06588704884052277,
|
|
"fcm_dpo/margin": 0.7414969205856323,
|
|
"fcm_dpo/q_t": 0.4207090139389038,
|
|
"grad_norm": 277.12481689453125,
|
|
"learning_rate": 2.391304347826087e-07,
|
|
"logits/chosen": -0.48127520084381104,
|
|
"logits/rejected": -0.4638293981552124,
|
|
"logps/chosen": -50.74750518798828,
|
|
"logps/ref_chosen": -50.81732940673828,
|
|
"logps/ref_rejected": -101.92184448242188,
|
|
"logps/rejected": -102.59352111816406,
|
|
"loss": 1.113,
|
|
"margin_dpo/margin_mean": 0.7414963841438293,
|
|
"margin_dpo/margin_std": 0.8493252992630005,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.0513950073421439,
|
|
"fcm_dpo/beta": 0.4451371729373932,
|
|
"fcm_dpo/delta": -0.1301283985376358,
|
|
"fcm_dpo/margin": 1.1745426654815674,
|
|
"fcm_dpo/q_t": 0.38051727414131165,
|
|
"grad_norm": 252.35678100585938,
|
|
"learning_rate": 2.463768115942029e-07,
|
|
"logits/chosen": -0.508255124092102,
|
|
"logits/rejected": -0.4715331196784973,
|
|
"logps/chosen": -50.89655303955078,
|
|
"logps/ref_chosen": -51.02449035644531,
|
|
"logps/ref_rejected": -106.82443237304688,
|
|
"logps/rejected": -107.87103271484375,
|
|
"loss": 0.9853,
|
|
"margin_dpo/margin_mean": 1.174542784690857,
|
|
"margin_dpo/margin_std": 1.1653451919555664,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05286343612334802,
|
|
"fcm_dpo/beta": 0.44032353162765503,
|
|
"fcm_dpo/delta": -0.08942731469869614,
|
|
"fcm_dpo/margin": 1.101135015487671,
|
|
"fcm_dpo/q_t": 0.38899609446525574,
|
|
"grad_norm": 212.2360382080078,
|
|
"learning_rate": 2.536231884057971e-07,
|
|
"logits/chosen": -0.5859663486480713,
|
|
"logits/rejected": -0.5510756373405457,
|
|
"logps/chosen": -51.97636795043945,
|
|
"logps/ref_chosen": -51.991493225097656,
|
|
"logps/ref_rejected": -86.0406265258789,
|
|
"logps/rejected": -87.12663269042969,
|
|
"loss": 1.0312,
|
|
"margin_dpo/margin_mean": 1.1011340618133545,
|
|
"margin_dpo/margin_std": 1.233978033065796,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.05433186490455213,
|
|
"fcm_dpo/beta": 0.4299408495426178,
|
|
"fcm_dpo/delta": -0.03354019671678543,
|
|
"fcm_dpo/margin": 1.0015331506729126,
|
|
"fcm_dpo/q_t": 0.40413135290145874,
|
|
"grad_norm": 196.89625549316406,
|
|
"learning_rate": 2.6086956521739126e-07,
|
|
"logits/chosen": -0.5104124546051025,
|
|
"logits/rejected": -0.46673229336738586,
|
|
"logps/chosen": -62.82787322998047,
|
|
"logps/ref_chosen": -62.807106018066406,
|
|
"logps/ref_rejected": -77.89507293701172,
|
|
"logps/rejected": -78.91737365722656,
|
|
"loss": 1.0822,
|
|
"margin_dpo/margin_mean": 1.0015329122543335,
|
|
"margin_dpo/margin_std": 1.3581469058990479,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.055800293685756244,
|
|
"fcm_dpo/beta": 0.42291873693466187,
|
|
"fcm_dpo/delta": -0.16829678416252136,
|
|
"fcm_dpo/margin": 1.3221113681793213,
|
|
"fcm_dpo/q_t": 0.37941908836364746,
|
|
"grad_norm": 206.74301147460938,
|
|
"learning_rate": 2.681159420289855e-07,
|
|
"logits/chosen": -0.5344926118850708,
|
|
"logits/rejected": -0.5018054842948914,
|
|
"logps/chosen": -48.27055358886719,
|
|
"logps/ref_chosen": -48.39051818847656,
|
|
"logps/ref_rejected": -97.91244506835938,
|
|
"logps/rejected": -99.11459350585938,
|
|
"loss": 1.0001,
|
|
"margin_dpo/margin_mean": 1.3221120834350586,
|
|
"margin_dpo/margin_std": 1.6323070526123047,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05726872246696035,
|
|
"fcm_dpo/beta": 0.3985295593738556,
|
|
"fcm_dpo/delta": -0.29377901554107666,
|
|
"fcm_dpo/margin": 1.6867289543151855,
|
|
"fcm_dpo/q_t": 0.34562230110168457,
|
|
"grad_norm": 223.958984375,
|
|
"learning_rate": 2.753623188405797e-07,
|
|
"logits/chosen": -0.5842019319534302,
|
|
"logits/rejected": -0.5461462736129761,
|
|
"logps/chosen": -50.60572814941406,
|
|
"logps/ref_chosen": -50.75047302246094,
|
|
"logps/ref_rejected": -78.56951141357422,
|
|
"logps/rejected": -80.11149597167969,
|
|
"loss": 0.8956,
|
|
"margin_dpo/margin_mean": 1.686728835105896,
|
|
"margin_dpo/margin_std": 1.4286189079284668,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.05873715124816446,
|
|
"fcm_dpo/beta": 0.3848017454147339,
|
|
"fcm_dpo/delta": -0.19280429184436798,
|
|
"fcm_dpo/margin": 1.5124502182006836,
|
|
"fcm_dpo/q_t": 0.3725898265838623,
|
|
"grad_norm": 157.12765502929688,
|
|
"learning_rate": 2.8260869565217386e-07,
|
|
"logits/chosen": -0.525189995765686,
|
|
"logits/rejected": -0.4957452118396759,
|
|
"logps/chosen": -57.79692077636719,
|
|
"logps/ref_chosen": -57.985069274902344,
|
|
"logps/ref_rejected": -74.3000717163086,
|
|
"logps/rejected": -75.62437438964844,
|
|
"loss": 0.9784,
|
|
"margin_dpo/margin_mean": 1.512451171875,
|
|
"margin_dpo/margin_std": 1.693179965019226,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06020558002936858,
|
|
"fcm_dpo/beta": 0.3643730580806732,
|
|
"fcm_dpo/delta": -0.2775127589702606,
|
|
"fcm_dpo/margin": 1.809408187866211,
|
|
"fcm_dpo/q_t": 0.3553549349308014,
|
|
"grad_norm": 166.80831909179688,
|
|
"learning_rate": 2.898550724637681e-07,
|
|
"logits/chosen": -0.5380607843399048,
|
|
"logits/rejected": -0.5014735460281372,
|
|
"logps/chosen": -62.65604019165039,
|
|
"logps/ref_chosen": -62.69581604003906,
|
|
"logps/ref_rejected": -97.02352905273438,
|
|
"logps/rejected": -98.79315948486328,
|
|
"loss": 0.9346,
|
|
"margin_dpo/margin_mean": 1.8094090223312378,
|
|
"margin_dpo/margin_std": 1.9210506677627563,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06167400881057269,
|
|
"fcm_dpo/beta": 0.33479398488998413,
|
|
"fcm_dpo/delta": -0.4733234643936157,
|
|
"fcm_dpo/margin": 2.4904072284698486,
|
|
"fcm_dpo/q_t": 0.3218887448310852,
|
|
"grad_norm": 154.84799194335938,
|
|
"learning_rate": 2.971014492753623e-07,
|
|
"logits/chosen": -0.5270863771438599,
|
|
"logits/rejected": -0.4803048372268677,
|
|
"logps/chosen": -58.758487701416016,
|
|
"logps/ref_chosen": -58.966426849365234,
|
|
"logps/ref_rejected": -109.90837097167969,
|
|
"logps/rejected": -112.19084167480469,
|
|
"loss": 0.8362,
|
|
"margin_dpo/margin_mean": 2.4904069900512695,
|
|
"margin_dpo/margin_std": 2.361166477203369,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.0631424375917768,
|
|
"fcm_dpo/beta": 0.3071477711200714,
|
|
"fcm_dpo/delta": -0.3796875774860382,
|
|
"fcm_dpo/margin": 2.438110113143921,
|
|
"fcm_dpo/q_t": 0.32926446199417114,
|
|
"grad_norm": 144.34567260742188,
|
|
"learning_rate": 3.043478260869565e-07,
|
|
"logits/chosen": -0.5323761701583862,
|
|
"logits/rejected": -0.5072122812271118,
|
|
"logps/chosen": -53.617584228515625,
|
|
"logps/ref_chosen": -54.15599822998047,
|
|
"logps/ref_rejected": -96.48019409179688,
|
|
"logps/rejected": -98.37989044189453,
|
|
"loss": 0.8428,
|
|
"margin_dpo/margin_mean": 2.438110113143921,
|
|
"margin_dpo/margin_std": 1.899155855178833,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06461086637298091,
|
|
"fcm_dpo/beta": 0.28413423895835876,
|
|
"fcm_dpo/delta": -0.40553855895996094,
|
|
"fcm_dpo/margin": 2.7193827629089355,
|
|
"fcm_dpo/q_t": 0.3261229395866394,
|
|
"grad_norm": 146.47552490234375,
|
|
"learning_rate": 3.115942028985507e-07,
|
|
"logits/chosen": -0.4545024633407593,
|
|
"logits/rejected": -0.43494725227355957,
|
|
"logps/chosen": -49.821990966796875,
|
|
"logps/ref_chosen": -50.07849884033203,
|
|
"logps/ref_rejected": -108.78376007080078,
|
|
"logps/rejected": -111.24664306640625,
|
|
"loss": 0.836,
|
|
"margin_dpo/margin_mean": 2.7193822860717773,
|
|
"margin_dpo/margin_std": 2.205606460571289,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06607929515418502,
|
|
"fcm_dpo/beta": 0.27153611183166504,
|
|
"fcm_dpo/delta": -0.2071065902709961,
|
|
"fcm_dpo/margin": 2.1919541358947754,
|
|
"fcm_dpo/q_t": 0.3700290322303772,
|
|
"grad_norm": 116.2034683227539,
|
|
"learning_rate": 3.188405797101449e-07,
|
|
"logits/chosen": -0.5074399709701538,
|
|
"logits/rejected": -0.49551981687545776,
|
|
"logps/chosen": -48.25216293334961,
|
|
"logps/ref_chosen": -48.4149284362793,
|
|
"logps/ref_rejected": -77.93643188476562,
|
|
"logps/rejected": -79.96562194824219,
|
|
"loss": 0.9723,
|
|
"margin_dpo/margin_mean": 2.1919541358947754,
|
|
"margin_dpo/margin_std": 2.4712252616882324,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06754772393538913,
|
|
"fcm_dpo/beta": 0.2555280327796936,
|
|
"fcm_dpo/delta": -0.3303987383842468,
|
|
"fcm_dpo/margin": 2.7688708305358887,
|
|
"fcm_dpo/q_t": 0.34917011857032776,
|
|
"grad_norm": 124.21039581298828,
|
|
"learning_rate": 3.260869565217391e-07,
|
|
"logits/chosen": -0.5273990631103516,
|
|
"logits/rejected": -0.4762783646583557,
|
|
"logps/chosen": -55.79579162597656,
|
|
"logps/ref_chosen": -55.999427795410156,
|
|
"logps/ref_rejected": -95.652587890625,
|
|
"logps/rejected": -98.21781921386719,
|
|
"loss": 0.922,
|
|
"margin_dpo/margin_mean": 2.768871307373047,
|
|
"margin_dpo/margin_std": 3.0828027725219727,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.06901615271659324,
|
|
"fcm_dpo/beta": 0.24128976464271545,
|
|
"fcm_dpo/delta": -0.2963497042655945,
|
|
"fcm_dpo/margin": 2.806663990020752,
|
|
"fcm_dpo/q_t": 0.3475228548049927,
|
|
"grad_norm": 116.81612396240234,
|
|
"learning_rate": 3.333333333333333e-07,
|
|
"logits/chosen": -0.5757611989974976,
|
|
"logits/rejected": -0.5232735872268677,
|
|
"logps/chosen": -57.52677917480469,
|
|
"logps/ref_chosen": -57.92607879638672,
|
|
"logps/ref_rejected": -94.67920684814453,
|
|
"logps/rejected": -97.0865707397461,
|
|
"loss": 0.9024,
|
|
"margin_dpo/margin_mean": 2.80666446685791,
|
|
"margin_dpo/margin_std": 2.5121896266937256,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07048458149779736,
|
|
"fcm_dpo/beta": 0.22662241756916046,
|
|
"fcm_dpo/delta": -0.23767787218093872,
|
|
"fcm_dpo/margin": 2.7389888763427734,
|
|
"fcm_dpo/q_t": 0.3588021993637085,
|
|
"grad_norm": 126.09705352783203,
|
|
"learning_rate": 3.4057971014492755e-07,
|
|
"logits/chosen": -0.569898784160614,
|
|
"logits/rejected": -0.5105218291282654,
|
|
"logps/chosen": -57.07960510253906,
|
|
"logps/ref_chosen": -57.188072204589844,
|
|
"logps/ref_rejected": -88.0166015625,
|
|
"logps/rejected": -90.64712524414062,
|
|
"loss": 0.9453,
|
|
"margin_dpo/margin_mean": 2.7389891147613525,
|
|
"margin_dpo/margin_std": 2.5488128662109375,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07195301027900147,
|
|
"fcm_dpo/beta": 0.21518516540527344,
|
|
"fcm_dpo/delta": -0.34602996706962585,
|
|
"fcm_dpo/margin": 3.3560891151428223,
|
|
"fcm_dpo/q_t": 0.3433716297149658,
|
|
"grad_norm": 101.85260772705078,
|
|
"learning_rate": 3.478260869565217e-07,
|
|
"logits/chosen": -0.5557532906532288,
|
|
"logits/rejected": -0.49865707755088806,
|
|
"logps/chosen": -61.317138671875,
|
|
"logps/ref_chosen": -61.685272216796875,
|
|
"logps/ref_rejected": -83.76747131347656,
|
|
"logps/rejected": -86.75543212890625,
|
|
"loss": 0.8993,
|
|
"margin_dpo/margin_mean": 3.356088638305664,
|
|
"margin_dpo/margin_std": 3.3915152549743652,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07342143906020558,
|
|
"fcm_dpo/beta": 0.19847270846366882,
|
|
"fcm_dpo/delta": -0.36405348777770996,
|
|
"fcm_dpo/margin": 3.707047700881958,
|
|
"fcm_dpo/q_t": 0.33734625577926636,
|
|
"grad_norm": 95.96993255615234,
|
|
"learning_rate": 3.5507246376811595e-07,
|
|
"logits/chosen": -0.5368717908859253,
|
|
"logits/rejected": -0.5008732080459595,
|
|
"logps/chosen": -58.7468147277832,
|
|
"logps/ref_chosen": -58.72413635253906,
|
|
"logps/ref_rejected": -96.35814666748047,
|
|
"logps/rejected": -100.08787536621094,
|
|
"loss": 0.883,
|
|
"margin_dpo/margin_mean": 3.707047939300537,
|
|
"margin_dpo/margin_std": 3.5281009674072266,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07488986784140969,
|
|
"fcm_dpo/beta": 0.1852697730064392,
|
|
"fcm_dpo/delta": -0.3053027391433716,
|
|
"fcm_dpo/margin": 3.6747231483459473,
|
|
"fcm_dpo/q_t": 0.3589528203010559,
|
|
"grad_norm": 75.97203826904297,
|
|
"learning_rate": 3.6231884057971015e-07,
|
|
"logits/chosen": -0.514645516872406,
|
|
"logits/rejected": -0.48158469796180725,
|
|
"logps/chosen": -61.512142181396484,
|
|
"logps/ref_chosen": -61.3736686706543,
|
|
"logps/ref_rejected": -76.00199890136719,
|
|
"logps/rejected": -79.81520080566406,
|
|
"loss": 0.9563,
|
|
"margin_dpo/margin_mean": 3.674722671508789,
|
|
"margin_dpo/margin_std": 4.535408020019531,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.0763582966226138,
|
|
"fcm_dpo/beta": 0.16818588972091675,
|
|
"fcm_dpo/delta": -0.596248984336853,
|
|
"fcm_dpo/margin": 5.584807395935059,
|
|
"fcm_dpo/q_t": 0.29704979062080383,
|
|
"grad_norm": 75.52082824707031,
|
|
"learning_rate": 3.695652173913043e-07,
|
|
"logits/chosen": -0.4952273666858673,
|
|
"logits/rejected": -0.43586522340774536,
|
|
"logps/chosen": -51.88074493408203,
|
|
"logps/ref_chosen": -52.33735656738281,
|
|
"logps/ref_rejected": -79.97391510009766,
|
|
"logps/rejected": -85.10211181640625,
|
|
"loss": 0.7698,
|
|
"margin_dpo/margin_mean": 5.584807872772217,
|
|
"margin_dpo/margin_std": 4.602443218231201,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.07782672540381791,
|
|
"fcm_dpo/beta": 0.1531587839126587,
|
|
"fcm_dpo/delta": -0.5026867389678955,
|
|
"fcm_dpo/margin": 5.6284685134887695,
|
|
"fcm_dpo/q_t": 0.32493168115615845,
|
|
"grad_norm": 77.16937255859375,
|
|
"learning_rate": 3.7681159420289855e-07,
|
|
"logits/chosen": -0.6023041009902954,
|
|
"logits/rejected": -0.5804400444030762,
|
|
"logps/chosen": -53.40974044799805,
|
|
"logps/ref_chosen": -53.31465148925781,
|
|
"logps/ref_rejected": -91.78359985351562,
|
|
"logps/rejected": -97.50714111328125,
|
|
"loss": 0.8492,
|
|
"margin_dpo/margin_mean": 5.6284685134887695,
|
|
"margin_dpo/margin_std": 5.570339679718018,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.07929515418502203,
|
|
"fcm_dpo/beta": 0.14113633334636688,
|
|
"fcm_dpo/delta": -0.3154899477958679,
|
|
"fcm_dpo/margin": 4.917664527893066,
|
|
"fcm_dpo/q_t": 0.3461211621761322,
|
|
"grad_norm": 67.67288970947266,
|
|
"learning_rate": 3.8405797101449274e-07,
|
|
"logits/chosen": -0.5780713558197021,
|
|
"logits/rejected": -0.5238767862319946,
|
|
"logps/chosen": -50.9090576171875,
|
|
"logps/ref_chosen": -50.68865966796875,
|
|
"logps/ref_rejected": -91.71539306640625,
|
|
"logps/rejected": -96.85345458984375,
|
|
"loss": 0.8952,
|
|
"margin_dpo/margin_mean": 4.917665004730225,
|
|
"margin_dpo/margin_std": 4.633595943450928,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08076358296622614,
|
|
"fcm_dpo/beta": 0.13106518983840942,
|
|
"fcm_dpo/delta": -0.40200865268707275,
|
|
"fcm_dpo/margin": 5.890453338623047,
|
|
"fcm_dpo/q_t": 0.33843106031417847,
|
|
"grad_norm": 64.55609893798828,
|
|
"learning_rate": 3.9130434782608694e-07,
|
|
"logits/chosen": -0.6431792974472046,
|
|
"logits/rejected": -0.5808027982711792,
|
|
"logps/chosen": -63.28520965576172,
|
|
"logps/ref_chosen": -62.615234375,
|
|
"logps/ref_rejected": -88.99349975585938,
|
|
"logps/rejected": -95.55393981933594,
|
|
"loss": 0.908,
|
|
"margin_dpo/margin_mean": 5.890453338623047,
|
|
"margin_dpo/margin_std": 6.6398115158081055,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08223201174743025,
|
|
"fcm_dpo/beta": 0.12227150052785873,
|
|
"fcm_dpo/delta": -0.30633145570755005,
|
|
"fcm_dpo/margin": 5.606152057647705,
|
|
"fcm_dpo/q_t": 0.354331374168396,
|
|
"grad_norm": 57.52459716796875,
|
|
"learning_rate": 3.9855072463768114e-07,
|
|
"logits/chosen": -0.6030697822570801,
|
|
"logits/rejected": -0.5601364374160767,
|
|
"logps/chosen": -58.434322357177734,
|
|
"logps/ref_chosen": -57.9327278137207,
|
|
"logps/ref_rejected": -94.1744384765625,
|
|
"logps/rejected": -100.28218078613281,
|
|
"loss": 0.9514,
|
|
"margin_dpo/margin_mean": 5.606152534484863,
|
|
"margin_dpo/margin_std": 6.62627649307251,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08370044052863436,
|
|
"fcm_dpo/beta": 0.11491702497005463,
|
|
"fcm_dpo/delta": -0.34079399704933167,
|
|
"fcm_dpo/margin": 6.242837905883789,
|
|
"fcm_dpo/q_t": 0.33905744552612305,
|
|
"grad_norm": 59.57084655761719,
|
|
"learning_rate": 4.057971014492754e-07,
|
|
"logits/chosen": -0.5788109302520752,
|
|
"logits/rejected": -0.5509734153747559,
|
|
"logps/chosen": -71.0447998046875,
|
|
"logps/ref_chosen": -70.49528503417969,
|
|
"logps/ref_rejected": -95.56546020507812,
|
|
"logps/rejected": -102.35780334472656,
|
|
"loss": 0.8891,
|
|
"margin_dpo/margin_mean": 6.242837905883789,
|
|
"margin_dpo/margin_std": 5.736446857452393,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08516886930983847,
|
|
"fcm_dpo/beta": 0.10662582516670227,
|
|
"fcm_dpo/delta": -0.38594919443130493,
|
|
"fcm_dpo/margin": 7.108300685882568,
|
|
"fcm_dpo/q_t": 0.3379080295562744,
|
|
"grad_norm": 60.7117805480957,
|
|
"learning_rate": 4.1304347826086954e-07,
|
|
"logits/chosen": -0.6119065880775452,
|
|
"logits/rejected": -0.5379560589790344,
|
|
"logps/chosen": -62.78474807739258,
|
|
"logps/ref_chosen": -62.13294219970703,
|
|
"logps/ref_rejected": -84.61729431152344,
|
|
"logps/rejected": -92.37741088867188,
|
|
"loss": 0.8931,
|
|
"margin_dpo/margin_mean": 7.108301162719727,
|
|
"margin_dpo/margin_std": 7.328868389129639,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08663729809104258,
|
|
"fcm_dpo/beta": 0.0974373072385788,
|
|
"fcm_dpo/delta": -0.3749150037765503,
|
|
"fcm_dpo/margin": 7.614223957061768,
|
|
"fcm_dpo/q_t": 0.33926984667778015,
|
|
"grad_norm": 55.413719177246094,
|
|
"learning_rate": 4.2028985507246374e-07,
|
|
"logits/chosen": -0.6353350877761841,
|
|
"logits/rejected": -0.5950881242752075,
|
|
"logps/chosen": -53.027374267578125,
|
|
"logps/ref_chosen": -51.932525634765625,
|
|
"logps/ref_rejected": -88.88520050048828,
|
|
"logps/rejected": -97.59426879882812,
|
|
"loss": 0.8934,
|
|
"margin_dpo/margin_mean": 7.614224433898926,
|
|
"margin_dpo/margin_std": 7.698889255523682,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.0881057268722467,
|
|
"fcm_dpo/beta": 0.09344291687011719,
|
|
"fcm_dpo/delta": -0.1885889768600464,
|
|
"fcm_dpo/margin": 6.171331405639648,
|
|
"fcm_dpo/q_t": 0.3686023950576782,
|
|
"grad_norm": 60.355464935302734,
|
|
"learning_rate": 4.2753623188405794e-07,
|
|
"logits/chosen": -0.5876541137695312,
|
|
"logits/rejected": -0.5260422229766846,
|
|
"logps/chosen": -63.00043869018555,
|
|
"logps/ref_chosen": -60.94218826293945,
|
|
"logps/ref_rejected": -85.39340209960938,
|
|
"logps/rejected": -93.62298583984375,
|
|
"loss": 0.9854,
|
|
"margin_dpo/margin_mean": 6.171331405639648,
|
|
"margin_dpo/margin_std": 6.925416946411133,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.08957415565345081,
|
|
"fcm_dpo/beta": 0.08880254626274109,
|
|
"fcm_dpo/delta": -0.2756527066230774,
|
|
"fcm_dpo/margin": 7.387622833251953,
|
|
"fcm_dpo/q_t": 0.36539459228515625,
|
|
"grad_norm": 49.870140075683594,
|
|
"learning_rate": 4.3478260869565214e-07,
|
|
"logits/chosen": -0.6084675788879395,
|
|
"logits/rejected": -0.5737414956092834,
|
|
"logps/chosen": -61.716583251953125,
|
|
"logps/ref_chosen": -60.633522033691406,
|
|
"logps/ref_rejected": -89.85249328613281,
|
|
"logps/rejected": -98.32318115234375,
|
|
"loss": 0.9793,
|
|
"margin_dpo/margin_mean": 7.387622833251953,
|
|
"margin_dpo/margin_std": 9.976318359375,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09104258443465492,
|
|
"fcm_dpo/beta": 0.08695913851261139,
|
|
"fcm_dpo/delta": -0.10356283187866211,
|
|
"fcm_dpo/margin": 5.732213973999023,
|
|
"fcm_dpo/q_t": 0.3890203535556793,
|
|
"grad_norm": 48.48149871826172,
|
|
"learning_rate": 4.420289855072464e-07,
|
|
"logits/chosen": -0.6002976298332214,
|
|
"logits/rejected": -0.566005289554596,
|
|
"logps/chosen": -57.36714553833008,
|
|
"logps/ref_chosen": -56.15077209472656,
|
|
"logps/ref_rejected": -75.56619262695312,
|
|
"logps/rejected": -82.51478576660156,
|
|
"loss": 1.037,
|
|
"margin_dpo/margin_mean": 5.732213973999023,
|
|
"margin_dpo/margin_std": 7.161689758300781,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09251101321585903,
|
|
"fcm_dpo/beta": 0.08248023688793182,
|
|
"fcm_dpo/delta": -0.2507448196411133,
|
|
"fcm_dpo/margin": 7.671149253845215,
|
|
"fcm_dpo/q_t": 0.3569306433200836,
|
|
"grad_norm": 49.066436767578125,
|
|
"learning_rate": 4.4927536231884053e-07,
|
|
"logits/chosen": -0.5983408689498901,
|
|
"logits/rejected": -0.5525267720222473,
|
|
"logps/chosen": -75.25900268554688,
|
|
"logps/ref_chosen": -73.14739227294922,
|
|
"logps/ref_rejected": -97.61006164550781,
|
|
"logps/rejected": -107.39281463623047,
|
|
"loss": 0.9455,
|
|
"margin_dpo/margin_mean": 7.671149253845215,
|
|
"margin_dpo/margin_std": 7.900073528289795,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09397944199706314,
|
|
"fcm_dpo/beta": 0.07727767527103424,
|
|
"fcm_dpo/delta": -0.32136908173561096,
|
|
"fcm_dpo/margin": 8.976408004760742,
|
|
"fcm_dpo/q_t": 0.3494236171245575,
|
|
"grad_norm": 44.7556266784668,
|
|
"learning_rate": 4.5652173913043473e-07,
|
|
"logits/chosen": -0.590173602104187,
|
|
"logits/rejected": -0.5585036873817444,
|
|
"logps/chosen": -54.6782341003418,
|
|
"logps/ref_chosen": -53.998600006103516,
|
|
"logps/ref_rejected": -93.53019714355469,
|
|
"logps/rejected": -103.18624877929688,
|
|
"loss": 0.9307,
|
|
"margin_dpo/margin_mean": 8.976409912109375,
|
|
"margin_dpo/margin_std": 9.896392822265625,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.09544787077826726,
|
|
"fcm_dpo/beta": 0.07323503494262695,
|
|
"fcm_dpo/delta": -0.28665655851364136,
|
|
"fcm_dpo/margin": 9.088768005371094,
|
|
"fcm_dpo/q_t": 0.35149580240249634,
|
|
"grad_norm": 45.25883865356445,
|
|
"learning_rate": 4.63768115942029e-07,
|
|
"logits/chosen": -0.6546945571899414,
|
|
"logits/rejected": -0.6407773494720459,
|
|
"logps/chosen": -67.44660949707031,
|
|
"logps/ref_chosen": -64.83599853515625,
|
|
"logps/ref_rejected": -109.94645690917969,
|
|
"logps/rejected": -121.64584350585938,
|
|
"loss": 0.9441,
|
|
"margin_dpo/margin_mean": 9.088767051696777,
|
|
"margin_dpo/margin_std": 9.91242790222168,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09691629955947137,
|
|
"fcm_dpo/beta": 0.06984806060791016,
|
|
"fcm_dpo/delta": -0.20510993897914886,
|
|
"fcm_dpo/margin": 8.45463752746582,
|
|
"fcm_dpo/q_t": 0.3711079955101013,
|
|
"grad_norm": 39.93006134033203,
|
|
"learning_rate": 4.7101449275362313e-07,
|
|
"logits/chosen": -0.6191996932029724,
|
|
"logits/rejected": -0.5847162008285522,
|
|
"logps/chosen": -53.86325454711914,
|
|
"logps/ref_chosen": -51.44352722167969,
|
|
"logps/ref_rejected": -75.63629913330078,
|
|
"logps/rejected": -86.51066589355469,
|
|
"loss": 0.9827,
|
|
"margin_dpo/margin_mean": 8.45463752746582,
|
|
"margin_dpo/margin_std": 10.011677742004395,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.09838472834067548,
|
|
"fcm_dpo/beta": 0.06784674525260925,
|
|
"fcm_dpo/delta": -0.19093264639377594,
|
|
"fcm_dpo/margin": 8.551323890686035,
|
|
"fcm_dpo/q_t": 0.37226301431655884,
|
|
"grad_norm": 40.03382110595703,
|
|
"learning_rate": 4.782608695652174e-07,
|
|
"logits/chosen": -0.61040198802948,
|
|
"logits/rejected": -0.5693593621253967,
|
|
"logps/chosen": -61.51601028442383,
|
|
"logps/ref_chosen": -59.34080505371094,
|
|
"logps/ref_rejected": -72.78728485107422,
|
|
"logps/rejected": -83.5138168334961,
|
|
"loss": 0.9854,
|
|
"margin_dpo/margin_mean": 8.551323890686035,
|
|
"margin_dpo/margin_std": 9.986248016357422,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.09985315712187959,
|
|
"fcm_dpo/beta": 0.06566619873046875,
|
|
"fcm_dpo/delta": -0.15436488389968872,
|
|
"fcm_dpo/margin": 8.31753158569336,
|
|
"fcm_dpo/q_t": 0.3754774034023285,
|
|
"grad_norm": 39.015968322753906,
|
|
"learning_rate": 4.855072463768116e-07,
|
|
"logits/chosen": -0.6389807462692261,
|
|
"logits/rejected": -0.5808882713317871,
|
|
"logps/chosen": -67.62654113769531,
|
|
"logps/ref_chosen": -65.2058334350586,
|
|
"logps/ref_rejected": -77.20724487304688,
|
|
"logps/rejected": -87.94549560546875,
|
|
"loss": 0.9805,
|
|
"margin_dpo/margin_mean": 8.31753158569336,
|
|
"margin_dpo/margin_std": 8.358397483825684,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.1013215859030837,
|
|
"fcm_dpo/beta": 0.06245996803045273,
|
|
"fcm_dpo/delta": -0.2367386519908905,
|
|
"fcm_dpo/margin": 9.93748664855957,
|
|
"fcm_dpo/q_t": 0.3604923486709595,
|
|
"grad_norm": 41.36071014404297,
|
|
"learning_rate": 4.927536231884058e-07,
|
|
"logits/chosen": -0.5898059606552124,
|
|
"logits/rejected": -0.5660474896430969,
|
|
"logps/chosen": -62.712249755859375,
|
|
"logps/ref_chosen": -59.81924057006836,
|
|
"logps/ref_rejected": -103.38886260986328,
|
|
"logps/rejected": -116.2193603515625,
|
|
"loss": 0.9374,
|
|
"margin_dpo/margin_mean": 9.937487602233887,
|
|
"margin_dpo/margin_std": 9.718847274780273,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.1027900146842878,
|
|
"fcm_dpo/beta": 0.059792205691337585,
|
|
"fcm_dpo/delta": -0.26161158084869385,
|
|
"fcm_dpo/margin": 10.791955947875977,
|
|
"fcm_dpo/q_t": 0.3578525483608246,
|
|
"grad_norm": 41.59380340576172,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.6141102313995361,
|
|
"logits/rejected": -0.5785216093063354,
|
|
"logps/chosen": -66.17719268798828,
|
|
"logps/ref_chosen": -61.930641174316406,
|
|
"logps/ref_rejected": -91.06078338623047,
|
|
"logps/rejected": -106.09928894042969,
|
|
"loss": 0.9415,
|
|
"margin_dpo/margin_mean": 10.791955947875977,
|
|
"margin_dpo/margin_std": 11.36978530883789,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.10425844346549193,
|
|
"fcm_dpo/beta": 0.05598774552345276,
|
|
"fcm_dpo/delta": -0.31813400983810425,
|
|
"fcm_dpo/margin": 12.418464660644531,
|
|
"fcm_dpo/q_t": 0.3474087715148926,
|
|
"grad_norm": 38.49131393432617,
|
|
"learning_rate": 4.999967061337492e-07,
|
|
"logits/chosen": -0.613985538482666,
|
|
"logits/rejected": -0.5706911087036133,
|
|
"logps/chosen": -65.64326477050781,
|
|
"logps/ref_chosen": -61.750335693359375,
|
|
"logps/ref_rejected": -97.33662414550781,
|
|
"logps/rejected": -113.64801025390625,
|
|
"loss": 0.9031,
|
|
"margin_dpo/margin_mean": 12.418464660644531,
|
|
"margin_dpo/margin_std": 11.948598861694336,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10572687224669604,
|
|
"fcm_dpo/beta": 0.05243536829948425,
|
|
"fcm_dpo/delta": -0.32549160718917847,
|
|
"fcm_dpo/margin": 13.37977409362793,
|
|
"fcm_dpo/q_t": 0.3453982472419739,
|
|
"grad_norm": 38.92274856567383,
|
|
"learning_rate": 4.999868246217933e-07,
|
|
"logits/chosen": -0.6485065221786499,
|
|
"logits/rejected": -0.6123002767562866,
|
|
"logps/chosen": -70.4666748046875,
|
|
"logps/ref_chosen": -66.05341339111328,
|
|
"logps/ref_rejected": -95.2869873046875,
|
|
"logps/rejected": -113.08002471923828,
|
|
"loss": 0.9101,
|
|
"margin_dpo/margin_mean": 13.379773139953613,
|
|
"margin_dpo/margin_std": 13.722232818603516,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.10719530102790015,
|
|
"fcm_dpo/beta": 0.04936884716153145,
|
|
"fcm_dpo/delta": -0.28036195039749146,
|
|
"fcm_dpo/margin": 13.372041702270508,
|
|
"fcm_dpo/q_t": 0.36727890372276306,
|
|
"grad_norm": 37.18900680541992,
|
|
"learning_rate": 4.999703557245192e-07,
|
|
"logits/chosen": -0.6953590512275696,
|
|
"logits/rejected": -0.6547967195510864,
|
|
"logps/chosen": -72.60985565185547,
|
|
"logps/ref_chosen": -66.25627136230469,
|
|
"logps/ref_rejected": -90.45613098144531,
|
|
"logps/rejected": -110.1817626953125,
|
|
"loss": 1.0173,
|
|
"margin_dpo/margin_mean": 13.37204360961914,
|
|
"margin_dpo/margin_std": 19.566150665283203,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.10866372980910426,
|
|
"fcm_dpo/beta": 0.046581994742155075,
|
|
"fcm_dpo/delta": -0.3048560619354248,
|
|
"fcm_dpo/margin": 14.660598754882812,
|
|
"fcm_dpo/q_t": 0.3614646792411804,
|
|
"grad_norm": 38.133087158203125,
|
|
"learning_rate": 4.999472998758977e-07,
|
|
"logits/chosen": -0.6480814218521118,
|
|
"logits/rejected": -0.6374853849411011,
|
|
"logps/chosen": -60.36994171142578,
|
|
"logps/ref_chosen": -53.42488098144531,
|
|
"logps/ref_rejected": -95.94693756103516,
|
|
"logps/rejected": -117.55259704589844,
|
|
"loss": 0.9857,
|
|
"margin_dpo/margin_mean": 14.660598754882812,
|
|
"margin_dpo/margin_std": 21.60616111755371,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11013215859030837,
|
|
"fcm_dpo/beta": 0.04279695451259613,
|
|
"fcm_dpo/delta": -0.5349281430244446,
|
|
"fcm_dpo/margin": 20.78795051574707,
|
|
"fcm_dpo/q_t": 0.31636515259742737,
|
|
"grad_norm": 33.39282989501953,
|
|
"learning_rate": 4.999176576834721e-07,
|
|
"logits/chosen": -0.6751279830932617,
|
|
"logits/rejected": -0.6657723188400269,
|
|
"logps/chosen": -58.766300201416016,
|
|
"logps/ref_chosen": -51.861663818359375,
|
|
"logps/ref_rejected": -111.25398254394531,
|
|
"logps/rejected": -138.94656372070312,
|
|
"loss": 0.8338,
|
|
"margin_dpo/margin_mean": 20.787948608398438,
|
|
"margin_dpo/margin_std": 20.532333374023438,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11160058737151249,
|
|
"fcm_dpo/beta": 0.04038340225815773,
|
|
"fcm_dpo/delta": -0.1572856605052948,
|
|
"fcm_dpo/margin": 13.58674430847168,
|
|
"fcm_dpo/q_t": 0.37524014711380005,
|
|
"grad_norm": 33.306888580322266,
|
|
"learning_rate": 4.998814299283415e-07,
|
|
"logits/chosen": -0.6934888362884521,
|
|
"logits/rejected": -0.6505051851272583,
|
|
"logps/chosen": -61.79083251953125,
|
|
"logps/ref_chosen": -53.26603698730469,
|
|
"logps/ref_rejected": -78.21662902832031,
|
|
"logps/rejected": -100.32815551757812,
|
|
"loss": 1.006,
|
|
"margin_dpo/margin_mean": 13.58674430847168,
|
|
"margin_dpo/margin_std": 16.316781997680664,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1130690161527166,
|
|
"fcm_dpo/beta": 0.03723585233092308,
|
|
"fcm_dpo/delta": -0.4455404579639435,
|
|
"fcm_dpo/margin": 21.639652252197266,
|
|
"fcm_dpo/q_t": 0.3260105848312378,
|
|
"grad_norm": 34.49169158935547,
|
|
"learning_rate": 4.998386175651409e-07,
|
|
"logits/chosen": -0.679383397102356,
|
|
"logits/rejected": -0.6394709348678589,
|
|
"logps/chosen": -65.87094116210938,
|
|
"logps/ref_chosen": -58.0966796875,
|
|
"logps/ref_rejected": -93.77361297607422,
|
|
"logps/rejected": -123.18753814697266,
|
|
"loss": 0.8811,
|
|
"margin_dpo/margin_mean": 21.639652252197266,
|
|
"margin_dpo/margin_std": 22.59795379638672,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.1145374449339207,
|
|
"fcm_dpo/beta": 0.03562740236520767,
|
|
"fcm_dpo/delta": -0.2082417905330658,
|
|
"fcm_dpo/margin": 16.73519515991211,
|
|
"fcm_dpo/q_t": 0.3686809539794922,
|
|
"grad_norm": 31.130857467651367,
|
|
"learning_rate": 4.997892217220159e-07,
|
|
"logits/chosen": -0.6434469223022461,
|
|
"logits/rejected": -0.6172356605529785,
|
|
"logps/chosen": -63.4884033203125,
|
|
"logps/ref_chosen": -55.61378479003906,
|
|
"logps/ref_rejected": -84.93436431884766,
|
|
"logps/rejected": -109.54417419433594,
|
|
"loss": 0.9783,
|
|
"margin_dpo/margin_mean": 16.73519515991211,
|
|
"margin_dpo/margin_std": 19.067113876342773,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11600587371512482,
|
|
"fcm_dpo/beta": 0.03389370068907738,
|
|
"fcm_dpo/delta": -0.24003317952156067,
|
|
"fcm_dpo/margin": 18.436786651611328,
|
|
"fcm_dpo/q_t": 0.3689296245574951,
|
|
"grad_norm": 27.611059188842773,
|
|
"learning_rate": 4.997332437005931e-07,
|
|
"logits/chosen": -0.6699581742286682,
|
|
"logits/rejected": -0.6403902173042297,
|
|
"logps/chosen": -63.6839599609375,
|
|
"logps/ref_chosen": -55.45048522949219,
|
|
"logps/ref_rejected": -87.64756774902344,
|
|
"logps/rejected": -114.31782531738281,
|
|
"loss": 0.9907,
|
|
"margin_dpo/margin_mean": 18.436786651611328,
|
|
"margin_dpo/margin_std": 23.604507446289062,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.11747430249632893,
|
|
"fcm_dpo/beta": 0.032423943281173706,
|
|
"fcm_dpo/delta": -0.16374123096466064,
|
|
"fcm_dpo/margin": 17.029020309448242,
|
|
"fcm_dpo/q_t": 0.3843163251876831,
|
|
"grad_norm": 29.77735710144043,
|
|
"learning_rate": 4.996706849759452e-07,
|
|
"logits/chosen": -0.7156171798706055,
|
|
"logits/rejected": -0.6716504096984863,
|
|
"logps/chosen": -69.70584869384766,
|
|
"logps/ref_chosen": -58.519290924072266,
|
|
"logps/ref_rejected": -87.54750061035156,
|
|
"logps/rejected": -115.76307678222656,
|
|
"loss": 1.0366,
|
|
"margin_dpo/margin_mean": 17.029020309448242,
|
|
"margin_dpo/margin_std": 23.282854080200195,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.11894273127753303,
|
|
"fcm_dpo/beta": 0.030525632202625275,
|
|
"fcm_dpo/delta": -0.3602490723133087,
|
|
"fcm_dpo/margin": 23.939817428588867,
|
|
"fcm_dpo/q_t": 0.3476225733757019,
|
|
"grad_norm": 30.38198471069336,
|
|
"learning_rate": 4.996015471965529e-07,
|
|
"logits/chosen": -0.732462465763092,
|
|
"logits/rejected": -0.7031528353691101,
|
|
"logps/chosen": -76.67108154296875,
|
|
"logps/ref_chosen": -66.44886779785156,
|
|
"logps/ref_rejected": -129.66270446777344,
|
|
"logps/rejected": -163.82473754882812,
|
|
"loss": 0.9326,
|
|
"margin_dpo/margin_mean": 23.939817428588867,
|
|
"margin_dpo/margin_std": 28.79480743408203,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12041116005873716,
|
|
"fcm_dpo/beta": 0.02971896156668663,
|
|
"fcm_dpo/delta": -0.17042918503284454,
|
|
"fcm_dpo/margin": 18.8587703704834,
|
|
"fcm_dpo/q_t": 0.38334983587265015,
|
|
"grad_norm": 32.88773727416992,
|
|
"learning_rate": 4.995258321842611e-07,
|
|
"logits/chosen": -0.6365201473236084,
|
|
"logits/rejected": -0.6239097118377686,
|
|
"logps/chosen": -64.87715148925781,
|
|
"logps/ref_chosen": -52.232383728027344,
|
|
"logps/ref_rejected": -90.74325561523438,
|
|
"logps/rejected": -122.24678802490234,
|
|
"loss": 1.0784,
|
|
"margin_dpo/margin_mean": 18.8587703704834,
|
|
"margin_dpo/margin_std": 30.427291870117188,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.12187958883994127,
|
|
"fcm_dpo/beta": 0.028013106435537338,
|
|
"fcm_dpo/delta": -0.24482643604278564,
|
|
"fcm_dpo/margin": 22.431982040405273,
|
|
"fcm_dpo/q_t": 0.36464548110961914,
|
|
"grad_norm": 28.086669921875,
|
|
"learning_rate": 4.994435419342304e-07,
|
|
"logits/chosen": -0.6463146209716797,
|
|
"logits/rejected": -0.6088162660598755,
|
|
"logps/chosen": -68.94061279296875,
|
|
"logps/ref_chosen": -55.82738494873047,
|
|
"logps/ref_rejected": -103.71589660644531,
|
|
"logps/rejected": -139.2611083984375,
|
|
"loss": 0.98,
|
|
"margin_dpo/margin_mean": 22.43198013305664,
|
|
"margin_dpo/margin_std": 27.435033798217773,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12334801762114538,
|
|
"fcm_dpo/beta": 0.027015678584575653,
|
|
"fcm_dpo/delta": -0.13040480017662048,
|
|
"fcm_dpo/margin": 19.303150177001953,
|
|
"fcm_dpo/q_t": 0.37953460216522217,
|
|
"grad_norm": 27.229429244995117,
|
|
"learning_rate": 4.993546786148857e-07,
|
|
"logits/chosen": -0.6810680031776428,
|
|
"logits/rejected": -0.6438438892364502,
|
|
"logps/chosen": -79.30827331542969,
|
|
"logps/ref_chosen": -67.1761703491211,
|
|
"logps/ref_rejected": -87.29859924316406,
|
|
"logps/rejected": -118.73384857177734,
|
|
"loss": 1.0045,
|
|
"margin_dpo/margin_mean": 19.303150177001953,
|
|
"margin_dpo/margin_std": 20.458560943603516,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12481644640234948,
|
|
"fcm_dpo/beta": 0.02669249102473259,
|
|
"fcm_dpo/delta": -0.14147168397903442,
|
|
"fcm_dpo/margin": 19.99712371826172,
|
|
"fcm_dpo/q_t": 0.3820468783378601,
|
|
"grad_norm": 27.95656394958496,
|
|
"learning_rate": 4.992592445678582e-07,
|
|
"logits/chosen": -0.6734552383422852,
|
|
"logits/rejected": -0.6434047222137451,
|
|
"logps/chosen": -71.04620361328125,
|
|
"logps/ref_chosen": -58.4066162109375,
|
|
"logps/ref_rejected": -78.63880157470703,
|
|
"logps/rejected": -111.2755126953125,
|
|
"loss": 1.0251,
|
|
"margin_dpo/margin_mean": 19.99712371826172,
|
|
"margin_dpo/margin_std": 24.61843490600586,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.1262848751835536,
|
|
"fcm_dpo/beta": 0.025787636637687683,
|
|
"fcm_dpo/delta": -0.15711811184883118,
|
|
"fcm_dpo/margin": 21.28069305419922,
|
|
"fcm_dpo/q_t": 0.39334917068481445,
|
|
"grad_norm": 30.917089462280273,
|
|
"learning_rate": 4.991572423079235e-07,
|
|
"logits/chosen": -0.7074247598648071,
|
|
"logits/rejected": -0.6953707337379456,
|
|
"logps/chosen": -72.53890991210938,
|
|
"logps/ref_chosen": -56.13746643066406,
|
|
"logps/ref_rejected": -88.12165069580078,
|
|
"logps/rejected": -125.80378723144531,
|
|
"loss": 1.1033,
|
|
"margin_dpo/margin_mean": 21.28069305419922,
|
|
"margin_dpo/margin_std": 38.22045135498047,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.1277533039647577,
|
|
"fcm_dpo/beta": 0.024498071521520615,
|
|
"fcm_dpo/delta": -0.2259608507156372,
|
|
"fcm_dpo/margin": 24.91492462158203,
|
|
"fcm_dpo/q_t": 0.3681684136390686,
|
|
"grad_norm": 26.133350372314453,
|
|
"learning_rate": 4.990486745229364e-07,
|
|
"logits/chosen": -0.7258398532867432,
|
|
"logits/rejected": -0.7005423307418823,
|
|
"logps/chosen": -71.85640716552734,
|
|
"logps/ref_chosen": -55.63609313964844,
|
|
"logps/ref_rejected": -95.46757507324219,
|
|
"logps/rejected": -136.60281372070312,
|
|
"loss": 1.014,
|
|
"margin_dpo/margin_mean": 24.9149227142334,
|
|
"margin_dpo/margin_std": 33.578163146972656,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.12922173274596183,
|
|
"fcm_dpo/beta": 0.024026712402701378,
|
|
"fcm_dpo/delta": -0.052284643054008484,
|
|
"fcm_dpo/margin": 18.689456939697266,
|
|
"fcm_dpo/q_t": 0.4039269983768463,
|
|
"grad_norm": 27.430917739868164,
|
|
"learning_rate": 4.989335440737586e-07,
|
|
"logits/chosen": -0.6927683353424072,
|
|
"logits/rejected": -0.6857916116714478,
|
|
"logps/chosen": -94.15711975097656,
|
|
"logps/ref_chosen": -73.67115020751953,
|
|
"logps/ref_rejected": -106.70849609375,
|
|
"logps/rejected": -145.88392639160156,
|
|
"loss": 1.1188,
|
|
"margin_dpo/margin_mean": 18.689455032348633,
|
|
"margin_dpo/margin_std": 30.20696258544922,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.13069016152716592,
|
|
"fcm_dpo/beta": 0.023834798485040665,
|
|
"fcm_dpo/delta": -0.104669950902462,
|
|
"fcm_dpo/margin": 20.956254959106445,
|
|
"fcm_dpo/q_t": 0.3892160654067993,
|
|
"grad_norm": 24.95569610595703,
|
|
"learning_rate": 4.988118539941847e-07,
|
|
"logits/chosen": -0.7409356832504272,
|
|
"logits/rejected": -0.7092708349227905,
|
|
"logps/chosen": -73.19313049316406,
|
|
"logps/ref_chosen": -60.624916076660156,
|
|
"logps/ref_rejected": -82.08354949951172,
|
|
"logps/rejected": -115.60802459716797,
|
|
"loss": 1.0428,
|
|
"margin_dpo/margin_mean": 20.956254959106445,
|
|
"margin_dpo/margin_std": 27.846221923828125,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.13215859030837004,
|
|
"fcm_dpo/beta": 0.022901657968759537,
|
|
"fcm_dpo/delta": -0.2683162987232208,
|
|
"fcm_dpo/margin": 28.446239471435547,
|
|
"fcm_dpo/q_t": 0.3710969090461731,
|
|
"grad_norm": 26.91438865661621,
|
|
"learning_rate": 4.986836074908615e-07,
|
|
"logits/chosen": -0.6682174205780029,
|
|
"logits/rejected": -0.6794829964637756,
|
|
"logps/chosen": -69.5387191772461,
|
|
"logps/ref_chosen": -53.285308837890625,
|
|
"logps/ref_rejected": -111.54470825195312,
|
|
"logps/rejected": -156.24435424804688,
|
|
"loss": 1.0224,
|
|
"margin_dpo/margin_mean": 28.446239471435547,
|
|
"margin_dpo/margin_std": 41.36814880371094,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13362701908957417,
|
|
"fcm_dpo/beta": 0.022023282945156097,
|
|
"fcm_dpo/delta": -0.1283145546913147,
|
|
"fcm_dpo/margin": 23.693279266357422,
|
|
"fcm_dpo/q_t": 0.38849714398384094,
|
|
"grad_norm": 25.179744720458984,
|
|
"learning_rate": 4.985488079432037e-07,
|
|
"logits/chosen": -0.680759847164154,
|
|
"logits/rejected": -0.6463046669960022,
|
|
"logps/chosen": -78.63939666748047,
|
|
"logps/ref_chosen": -61.802955627441406,
|
|
"logps/ref_rejected": -87.87395477294922,
|
|
"logps/rejected": -128.40367126464844,
|
|
"loss": 1.0583,
|
|
"margin_dpo/margin_mean": 23.693279266357422,
|
|
"margin_dpo/margin_std": 34.601070404052734,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13509544787077826,
|
|
"fcm_dpo/beta": 0.021527327597141266,
|
|
"fcm_dpo/delta": -0.12105225771665573,
|
|
"fcm_dpo/margin": 23.916973114013672,
|
|
"fcm_dpo/q_t": 0.3887942433357239,
|
|
"grad_norm": 23.610260009765625,
|
|
"learning_rate": 4.984074589033043e-07,
|
|
"logits/chosen": -0.712161660194397,
|
|
"logits/rejected": -0.6888165473937988,
|
|
"logps/chosen": -66.98762512207031,
|
|
"logps/ref_chosen": -51.640769958496094,
|
|
"logps/ref_rejected": -77.88117980957031,
|
|
"logps/rejected": -117.14500427246094,
|
|
"loss": 1.0527,
|
|
"margin_dpo/margin_mean": 23.916976928710938,
|
|
"margin_dpo/margin_std": 33.57923889160156,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.13656387665198239,
|
|
"fcm_dpo/beta": 0.02094947174191475,
|
|
"fcm_dpo/delta": -0.10999026894569397,
|
|
"fcm_dpo/margin": 24.082473754882812,
|
|
"fcm_dpo/q_t": 0.38795292377471924,
|
|
"grad_norm": 24.15863037109375,
|
|
"learning_rate": 4.982595640958425e-07,
|
|
"logits/chosen": -0.7293976545333862,
|
|
"logits/rejected": -0.6787578463554382,
|
|
"logps/chosen": -69.72396087646484,
|
|
"logps/ref_chosen": -52.529239654541016,
|
|
"logps/ref_rejected": -77.16075134277344,
|
|
"logps/rejected": -118.43794250488281,
|
|
"loss": 1.0288,
|
|
"margin_dpo/margin_mean": 24.08247184753418,
|
|
"margin_dpo/margin_std": 30.17513656616211,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.13803230543318648,
|
|
"fcm_dpo/beta": 0.02017746865749359,
|
|
"fcm_dpo/delta": -0.17116506397724152,
|
|
"fcm_dpo/margin": 27.756542205810547,
|
|
"fcm_dpo/q_t": 0.37484580278396606,
|
|
"grad_norm": 23.89820098876953,
|
|
"learning_rate": 4.98105127417984e-07,
|
|
"logits/chosen": -0.6600474119186401,
|
|
"logits/rejected": -0.6440984010696411,
|
|
"logps/chosen": -79.61772155761719,
|
|
"logps/ref_chosen": -61.22261047363281,
|
|
"logps/ref_rejected": -99.59902954101562,
|
|
"logps/rejected": -145.75067138671875,
|
|
"loss": 0.9922,
|
|
"margin_dpo/margin_mean": 27.756542205810547,
|
|
"margin_dpo/margin_std": 31.244901657104492,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1395007342143906,
|
|
"fcm_dpo/beta": 0.019902190193533897,
|
|
"fcm_dpo/delta": -0.047649484127759933,
|
|
"fcm_dpo/margin": 22.366260528564453,
|
|
"fcm_dpo/q_t": 0.3979894518852234,
|
|
"grad_norm": 22.373977661132812,
|
|
"learning_rate": 4.979441529392784e-07,
|
|
"logits/chosen": -0.7095633149147034,
|
|
"logits/rejected": -0.6818147301673889,
|
|
"logps/chosen": -70.20155334472656,
|
|
"logps/ref_chosen": -52.523643493652344,
|
|
"logps/ref_rejected": -75.8803482055664,
|
|
"logps/rejected": -115.92452239990234,
|
|
"loss": 1.0719,
|
|
"margin_dpo/margin_mean": 22.366260528564453,
|
|
"margin_dpo/margin_std": 29.86621856689453,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14096916299559473,
|
|
"fcm_dpo/beta": 0.01920231431722641,
|
|
"fcm_dpo/delta": -0.20027107000350952,
|
|
"fcm_dpo/margin": 30.528703689575195,
|
|
"fcm_dpo/q_t": 0.371820330619812,
|
|
"grad_norm": 22.68387794494629,
|
|
"learning_rate": 4.977766449015534e-07,
|
|
"logits/chosen": -0.7119661569595337,
|
|
"logits/rejected": -0.6831298470497131,
|
|
"logps/chosen": -79.36348724365234,
|
|
"logps/ref_chosen": -62.15697479248047,
|
|
"logps/ref_rejected": -96.59601593017578,
|
|
"logps/rejected": -144.33123779296875,
|
|
"loss": 0.9831,
|
|
"margin_dpo/margin_mean": 30.528701782226562,
|
|
"margin_dpo/margin_std": 35.82476806640625,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14243759177679882,
|
|
"fcm_dpo/beta": 0.019140418618917465,
|
|
"fcm_dpo/delta": -0.05700864642858505,
|
|
"fcm_dpo/margin": 23.69298553466797,
|
|
"fcm_dpo/q_t": 0.3948918282985687,
|
|
"grad_norm": 23.630054473876953,
|
|
"learning_rate": 4.976026077188012e-07,
|
|
"logits/chosen": -0.6437740325927734,
|
|
"logits/rejected": -0.6002140045166016,
|
|
"logps/chosen": -73.06685638427734,
|
|
"logps/ref_chosen": -54.646366119384766,
|
|
"logps/ref_rejected": -76.96475219726562,
|
|
"logps/rejected": -119.0782241821289,
|
|
"loss": 1.0526,
|
|
"margin_dpo/margin_mean": 23.69298553466797,
|
|
"margin_dpo/margin_std": 27.06100845336914,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14390602055800295,
|
|
"fcm_dpo/beta": 0.018587183207273483,
|
|
"fcm_dpo/delta": -0.10853452980518341,
|
|
"fcm_dpo/margin": 27.052547454833984,
|
|
"fcm_dpo/q_t": 0.38545018434524536,
|
|
"grad_norm": 24.32488250732422,
|
|
"learning_rate": 4.974220459770639e-07,
|
|
"logits/chosen": -0.6696487069129944,
|
|
"logits/rejected": -0.653130829334259,
|
|
"logps/chosen": -87.99504089355469,
|
|
"logps/ref_chosen": -65.25862884521484,
|
|
"logps/ref_rejected": -96.5274887084961,
|
|
"logps/rejected": -146.3164520263672,
|
|
"loss": 1.0556,
|
|
"margin_dpo/margin_mean": 27.052547454833984,
|
|
"margin_dpo/margin_std": 37.12006759643555,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14537444933920704,
|
|
"fcm_dpo/beta": 0.01791193149983883,
|
|
"fcm_dpo/delta": -0.20442235469818115,
|
|
"fcm_dpo/margin": 33.02878189086914,
|
|
"fcm_dpo/q_t": 0.3736024498939514,
|
|
"grad_norm": 21.534595489501953,
|
|
"learning_rate": 4.972349644343108e-07,
|
|
"logits/chosen": -0.684786319732666,
|
|
"logits/rejected": -0.6852524876594543,
|
|
"logps/chosen": -63.73361587524414,
|
|
"logps/ref_chosen": -45.638484954833984,
|
|
"logps/ref_rejected": -86.43793487548828,
|
|
"logps/rejected": -137.5618438720703,
|
|
"loss": 0.992,
|
|
"margin_dpo/margin_mean": 33.02878189086914,
|
|
"margin_dpo/margin_std": 41.72602844238281,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.14684287812041116,
|
|
"fcm_dpo/beta": 0.01798401214182377,
|
|
"fcm_dpo/delta": 0.05157166346907616,
|
|
"fcm_dpo/margin": 19.44617462158203,
|
|
"fcm_dpo/q_t": 0.4210618734359741,
|
|
"grad_norm": 23.82549476623535,
|
|
"learning_rate": 4.970413680203148e-07,
|
|
"logits/chosen": -0.6766690015792847,
|
|
"logits/rejected": -0.6355087757110596,
|
|
"logps/chosen": -77.89352416992188,
|
|
"logps/ref_chosen": -57.59397888183594,
|
|
"logps/ref_rejected": -74.06021118164062,
|
|
"logps/rejected": -113.80592346191406,
|
|
"loss": 1.1629,
|
|
"margin_dpo/margin_mean": 19.44617462158203,
|
|
"margin_dpo/margin_std": 34.781532287597656,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.14831130690161526,
|
|
"fcm_dpo/beta": 0.017808571457862854,
|
|
"fcm_dpo/delta": -0.01938944309949875,
|
|
"fcm_dpo/margin": 23.464597702026367,
|
|
"fcm_dpo/q_t": 0.4106113314628601,
|
|
"grad_norm": 23.713424682617188,
|
|
"learning_rate": 4.968412618365215e-07,
|
|
"logits/chosen": -0.6706404685974121,
|
|
"logits/rejected": -0.639204740524292,
|
|
"logps/chosen": -86.684814453125,
|
|
"logps/ref_chosen": -61.64885330200195,
|
|
"logps/ref_rejected": -83.18968200683594,
|
|
"logps/rejected": -131.69024658203125,
|
|
"loss": 1.1273,
|
|
"margin_dpo/margin_mean": 23.464595794677734,
|
|
"margin_dpo/margin_std": 40.112548828125,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.14977973568281938,
|
|
"fcm_dpo/beta": 0.018256399780511856,
|
|
"fcm_dpo/delta": 0.08609801530838013,
|
|
"fcm_dpo/margin": 17.24917221069336,
|
|
"fcm_dpo/q_t": 0.4305152893066406,
|
|
"grad_norm": 27.488197326660156,
|
|
"learning_rate": 4.966346511559149e-07,
|
|
"logits/chosen": -0.6962743997573853,
|
|
"logits/rejected": -0.6518734693527222,
|
|
"logps/chosen": -91.27113342285156,
|
|
"logps/ref_chosen": -64.0788803100586,
|
|
"logps/ref_rejected": -68.18707275390625,
|
|
"logps/rejected": -112.62849426269531,
|
|
"loss": 1.2068,
|
|
"margin_dpo/margin_mean": 17.24917221069336,
|
|
"margin_dpo/margin_std": 37.05067825317383,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.1512481644640235,
|
|
"fcm_dpo/beta": 0.017573434859514236,
|
|
"fcm_dpo/delta": -0.222814679145813,
|
|
"fcm_dpo/margin": 34.63999938964844,
|
|
"fcm_dpo/q_t": 0.3685527443885803,
|
|
"grad_norm": 23.29231071472168,
|
|
"learning_rate": 4.964215414228785e-07,
|
|
"logits/chosen": -0.6724139451980591,
|
|
"logits/rejected": -0.6383209228515625,
|
|
"logps/chosen": -82.775146484375,
|
|
"logps/ref_chosen": -61.299278259277344,
|
|
"logps/ref_rejected": -93.57270812988281,
|
|
"logps/rejected": -149.6885986328125,
|
|
"loss": 0.98,
|
|
"margin_dpo/margin_mean": 34.63999938964844,
|
|
"margin_dpo/margin_std": 42.09827423095703,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.1527165932452276,
|
|
"fcm_dpo/beta": 0.017012089490890503,
|
|
"fcm_dpo/delta": -0.16792970895767212,
|
|
"fcm_dpo/margin": 32.84601593017578,
|
|
"fcm_dpo/q_t": 0.3835586905479431,
|
|
"grad_norm": 22.68368148803711,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": -0.7026796340942383,
|
|
"logits/rejected": -0.6724193096160889,
|
|
"logps/chosen": -77.75988006591797,
|
|
"logps/ref_chosen": -54.372772216796875,
|
|
"logps/ref_rejected": -89.5647201538086,
|
|
"logps/rejected": -145.79783630371094,
|
|
"loss": 1.0391,
|
|
"margin_dpo/margin_mean": 32.84601593017578,
|
|
"margin_dpo/margin_std": 46.943763732910156,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15418502202643172,
|
|
"fcm_dpo/beta": 0.0159467663615942,
|
|
"fcm_dpo/delta": -0.37162917852401733,
|
|
"fcm_dpo/margin": 46.66958236694336,
|
|
"fcm_dpo/q_t": 0.33258479833602905,
|
|
"grad_norm": 23.175701141357422,
|
|
"learning_rate": 4.959758474331832e-07,
|
|
"logits/chosen": -0.6862632036209106,
|
|
"logits/rejected": -0.6642385721206665,
|
|
"logps/chosen": -76.52045440673828,
|
|
"logps/ref_chosen": -54.638946533203125,
|
|
"logps/ref_rejected": -97.97351837158203,
|
|
"logps/rejected": -166.52459716796875,
|
|
"loss": 0.867,
|
|
"margin_dpo/margin_mean": 46.66958236694336,
|
|
"margin_dpo/margin_std": 41.3289680480957,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.15565345080763582,
|
|
"fcm_dpo/beta": 0.015470081940293312,
|
|
"fcm_dpo/delta": -0.05400984361767769,
|
|
"fcm_dpo/margin": 29.177989959716797,
|
|
"fcm_dpo/q_t": 0.3962496519088745,
|
|
"grad_norm": 22.519311904907227,
|
|
"learning_rate": 4.957432749209755e-07,
|
|
"logits/chosen": -0.6360474824905396,
|
|
"logits/rejected": -0.6046931743621826,
|
|
"logps/chosen": -79.4212875366211,
|
|
"logps/ref_chosen": -54.83289337158203,
|
|
"logps/ref_rejected": -85.22461700439453,
|
|
"logps/rejected": -138.99099731445312,
|
|
"loss": 1.0559,
|
|
"margin_dpo/margin_mean": 29.177989959716797,
|
|
"margin_dpo/margin_std": 35.5566520690918,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.15712187958883994,
|
|
"fcm_dpo/beta": 0.015153482556343079,
|
|
"fcm_dpo/delta": -0.10540027171373367,
|
|
"fcm_dpo/margin": 32.95298767089844,
|
|
"fcm_dpo/q_t": 0.3870370090007782,
|
|
"grad_norm": 21.292871475219727,
|
|
"learning_rate": 4.955042268449307e-07,
|
|
"logits/chosen": -0.6769276857376099,
|
|
"logits/rejected": -0.6308863162994385,
|
|
"logps/chosen": -99.10942840576172,
|
|
"logps/ref_chosen": -69.70780944824219,
|
|
"logps/ref_rejected": -94.73950958251953,
|
|
"logps/rejected": -157.0941162109375,
|
|
"loss": 1.042,
|
|
"margin_dpo/margin_mean": 32.95298767089844,
|
|
"margin_dpo/margin_std": 42.030609130859375,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.15859030837004406,
|
|
"fcm_dpo/beta": 0.01471179910004139,
|
|
"fcm_dpo/delta": -0.1934787929058075,
|
|
"fcm_dpo/margin": 39.57737731933594,
|
|
"fcm_dpo/q_t": 0.37953275442123413,
|
|
"grad_norm": 21.4735164642334,
|
|
"learning_rate": 4.952587095041881e-07,
|
|
"logits/chosen": -0.6490312814712524,
|
|
"logits/rejected": -0.6262690424919128,
|
|
"logps/chosen": -82.56188201904297,
|
|
"logps/ref_chosen": -56.0098876953125,
|
|
"logps/ref_rejected": -95.79601287841797,
|
|
"logps/rejected": -161.92538452148438,
|
|
"loss": 1.0361,
|
|
"margin_dpo/margin_mean": 39.57737731933594,
|
|
"margin_dpo/margin_std": 57.18327331542969,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16005873715124816,
|
|
"fcm_dpo/beta": 0.014065122231841087,
|
|
"fcm_dpo/delta": -0.20978516340255737,
|
|
"fcm_dpo/margin": 42.39696502685547,
|
|
"fcm_dpo/q_t": 0.3675564229488373,
|
|
"grad_norm": 21.875646591186523,
|
|
"learning_rate": 4.95006729368358e-07,
|
|
"logits/chosen": -0.595153272151947,
|
|
"logits/rejected": -0.5731344223022461,
|
|
"logps/chosen": -87.58088684082031,
|
|
"logps/ref_chosen": -62.88549041748047,
|
|
"logps/ref_rejected": -98.68573760986328,
|
|
"logps/rejected": -165.77810668945312,
|
|
"loss": 0.9853,
|
|
"margin_dpo/margin_mean": 42.39696502685547,
|
|
"margin_dpo/margin_std": 49.910247802734375,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16152716593245228,
|
|
"fcm_dpo/beta": 0.013536353595554829,
|
|
"fcm_dpo/delta": -0.12679250538349152,
|
|
"fcm_dpo/margin": 38.109649658203125,
|
|
"fcm_dpo/q_t": 0.38643020391464233,
|
|
"grad_norm": 19.236122131347656,
|
|
"learning_rate": 4.947482930773511e-07,
|
|
"logits/chosen": -0.5986282825469971,
|
|
"logits/rejected": -0.5608283877372742,
|
|
"logps/chosen": -84.11024475097656,
|
|
"logps/ref_chosen": -58.753684997558594,
|
|
"logps/ref_rejected": -79.75001525878906,
|
|
"logps/rejected": -143.2162322998047,
|
|
"loss": 1.0498,
|
|
"margin_dpo/margin_mean": 38.10965347290039,
|
|
"margin_dpo/margin_std": 50.650054931640625,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16299559471365638,
|
|
"fcm_dpo/beta": 0.013182668015360832,
|
|
"fcm_dpo/delta": -0.1789449155330658,
|
|
"fcm_dpo/margin": 43.02890396118164,
|
|
"fcm_dpo/q_t": 0.3763716518878937,
|
|
"grad_norm": 21.762737274169922,
|
|
"learning_rate": 4.944834074412042e-07,
|
|
"logits/chosen": -0.6486632227897644,
|
|
"logits/rejected": -0.6258302927017212,
|
|
"logps/chosen": -97.67506408691406,
|
|
"logps/ref_chosen": -68.62410736083984,
|
|
"logps/ref_rejected": -98.42886352539062,
|
|
"logps/rejected": -170.50872802734375,
|
|
"loss": 1.031,
|
|
"margin_dpo/margin_mean": 43.02890396118164,
|
|
"margin_dpo/margin_std": 58.42748260498047,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1644640234948605,
|
|
"fcm_dpo/beta": 0.013155752792954445,
|
|
"fcm_dpo/delta": 0.03874760866165161,
|
|
"fcm_dpo/margin": 27.551637649536133,
|
|
"fcm_dpo/q_t": 0.4168916344642639,
|
|
"grad_norm": 19.853069305419922,
|
|
"learning_rate": 4.942120794399002e-07,
|
|
"logits/chosen": -0.6284303665161133,
|
|
"logits/rejected": -0.5894876718521118,
|
|
"logps/chosen": -77.16087341308594,
|
|
"logps/ref_chosen": -50.24964141845703,
|
|
"logps/ref_rejected": -64.77442932128906,
|
|
"logps/rejected": -119.2373046875,
|
|
"loss": 1.1235,
|
|
"margin_dpo/margin_mean": 27.551637649536133,
|
|
"margin_dpo/margin_std": 40.0994873046875,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.16593245227606462,
|
|
"fcm_dpo/beta": 0.01325266994535923,
|
|
"fcm_dpo/delta": 0.006955064833164215,
|
|
"fcm_dpo/margin": 29.678356170654297,
|
|
"fcm_dpo/q_t": 0.40930402278900146,
|
|
"grad_norm": 20.103424072265625,
|
|
"learning_rate": 4.939343162231841e-07,
|
|
"logits/chosen": -0.6129658818244934,
|
|
"logits/rejected": -0.5705182552337646,
|
|
"logps/chosen": -100.33967590332031,
|
|
"logps/ref_chosen": -66.71295166015625,
|
|
"logps/ref_rejected": -77.96870422363281,
|
|
"logps/rejected": -141.2738037109375,
|
|
"loss": 1.0914,
|
|
"margin_dpo/margin_mean": 29.678359985351562,
|
|
"margin_dpo/margin_std": 38.312931060791016,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.16740088105726872,
|
|
"fcm_dpo/beta": 0.012752614915370941,
|
|
"fcm_dpo/delta": -0.2160319983959198,
|
|
"fcm_dpo/margin": 47.115623474121094,
|
|
"fcm_dpo/q_t": 0.3740980625152588,
|
|
"grad_norm": 21.467836380004883,
|
|
"learning_rate": 4.936501251103751e-07,
|
|
"logits/chosen": -0.6114667654037476,
|
|
"logits/rejected": -0.5787659287452698,
|
|
"logps/chosen": -88.59614562988281,
|
|
"logps/ref_chosen": -57.78507995605469,
|
|
"logps/ref_rejected": -87.10966491699219,
|
|
"logps/rejected": -165.03634643554688,
|
|
"loss": 0.9984,
|
|
"margin_dpo/margin_mean": 47.115623474121094,
|
|
"margin_dpo/margin_std": 63.08842086791992,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.16886930983847284,
|
|
"fcm_dpo/beta": 0.012707412242889404,
|
|
"fcm_dpo/delta": -0.013142341747879982,
|
|
"fcm_dpo/margin": 32.45915222167969,
|
|
"fcm_dpo/q_t": 0.41373395919799805,
|
|
"grad_norm": 27.934776306152344,
|
|
"learning_rate": 4.933595135901732e-07,
|
|
"logits/chosen": -0.6261277198791504,
|
|
"logits/rejected": -0.6049231290817261,
|
|
"logps/chosen": -105.82150268554688,
|
|
"logps/ref_chosen": -65.5826416015625,
|
|
"logps/ref_rejected": -98.56552124023438,
|
|
"logps/rejected": -171.2635498046875,
|
|
"loss": 1.1638,
|
|
"margin_dpo/margin_mean": 32.45915603637695,
|
|
"margin_dpo/margin_std": 65.33894348144531,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17033773861967694,
|
|
"fcm_dpo/beta": 0.01261107623577118,
|
|
"fcm_dpo/delta": -0.05706522613763809,
|
|
"fcm_dpo/margin": 36.02623748779297,
|
|
"fcm_dpo/q_t": 0.39755555987358093,
|
|
"grad_norm": 21.839122772216797,
|
|
"learning_rate": 4.930624893204624e-07,
|
|
"logits/chosen": -0.6210289001464844,
|
|
"logits/rejected": -0.6112542152404785,
|
|
"logps/chosen": -81.75634765625,
|
|
"logps/ref_chosen": -51.40031433105469,
|
|
"logps/ref_rejected": -80.5218505859375,
|
|
"logps/rejected": -146.90414428710938,
|
|
"loss": 1.0579,
|
|
"margin_dpo/margin_mean": 36.02623748779297,
|
|
"margin_dpo/margin_std": 45.45439910888672,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17180616740088106,
|
|
"fcm_dpo/beta": 0.01257578656077385,
|
|
"fcm_dpo/delta": 0.02945420891046524,
|
|
"fcm_dpo/margin": 29.551578521728516,
|
|
"fcm_dpo/q_t": 0.41677069664001465,
|
|
"grad_norm": 28.586557388305664,
|
|
"learning_rate": 4.927590601281083e-07,
|
|
"logits/chosen": -0.581455409526825,
|
|
"logits/rejected": -0.543665885925293,
|
|
"logps/chosen": -108.22999572753906,
|
|
"logps/ref_chosen": -69.29840850830078,
|
|
"logps/ref_rejected": -66.583984375,
|
|
"logps/rejected": -135.06715393066406,
|
|
"loss": 1.1416,
|
|
"margin_dpo/margin_mean": 29.55158042907715,
|
|
"margin_dpo/margin_std": 50.78779602050781,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17327459618208516,
|
|
"fcm_dpo/beta": 0.012499745935201645,
|
|
"fcm_dpo/delta": -0.05271512269973755,
|
|
"fcm_dpo/margin": 36.0295524597168,
|
|
"fcm_dpo/q_t": 0.39876455068588257,
|
|
"grad_norm": 20.938520431518555,
|
|
"learning_rate": 4.924492340087524e-07,
|
|
"logits/chosen": -0.6351233720779419,
|
|
"logits/rejected": -0.6163256168365479,
|
|
"logps/chosen": -86.66950988769531,
|
|
"logps/ref_chosen": -55.6409797668457,
|
|
"logps/ref_rejected": -75.66905975341797,
|
|
"logps/rejected": -142.72714233398438,
|
|
"loss": 1.062,
|
|
"margin_dpo/margin_mean": 36.0295524597168,
|
|
"margin_dpo/margin_std": 46.57566833496094,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17474302496328928,
|
|
"fcm_dpo/beta": 0.012305308133363724,
|
|
"fcm_dpo/delta": -0.03963203728199005,
|
|
"fcm_dpo/margin": 35.49628829956055,
|
|
"fcm_dpo/q_t": 0.4050098657608032,
|
|
"grad_norm": 23.495380401611328,
|
|
"learning_rate": 4.92133019126601e-07,
|
|
"logits/chosen": -0.6303149461746216,
|
|
"logits/rejected": -0.6183843016624451,
|
|
"logps/chosen": -115.98379516601562,
|
|
"logps/ref_chosen": -73.51019287109375,
|
|
"logps/ref_rejected": -102.977294921875,
|
|
"logps/rejected": -180.94717407226562,
|
|
"loss": 1.1013,
|
|
"margin_dpo/margin_mean": 35.49628829956055,
|
|
"margin_dpo/margin_std": 55.193031311035156,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.1762114537444934,
|
|
"fcm_dpo/beta": 0.011988421902060509,
|
|
"fcm_dpo/delta": -0.20115892589092255,
|
|
"fcm_dpo/margin": 49.14187240600586,
|
|
"fcm_dpo/q_t": 0.370976984500885,
|
|
"grad_norm": 22.095067977905273,
|
|
"learning_rate": 4.918104238142103e-07,
|
|
"logits/chosen": -0.6180063486099243,
|
|
"logits/rejected": -0.5856211185455322,
|
|
"logps/chosen": -120.44984436035156,
|
|
"logps/ref_chosen": -76.78083801269531,
|
|
"logps/ref_rejected": -108.02374267578125,
|
|
"logps/rejected": -200.83462524414062,
|
|
"loss": 0.9929,
|
|
"margin_dpo/margin_mean": 49.14187240600586,
|
|
"margin_dpo/margin_std": 59.94157409667969,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1776798825256975,
|
|
"fcm_dpo/beta": 0.011408919468522072,
|
|
"fcm_dpo/delta": -0.2337116003036499,
|
|
"fcm_dpo/margin": 54.12976837158203,
|
|
"fcm_dpo/q_t": 0.367302268743515,
|
|
"grad_norm": 24.336544036865234,
|
|
"learning_rate": 4.91481456572267e-07,
|
|
"logits/chosen": -0.5880341529846191,
|
|
"logits/rejected": -0.5841655731201172,
|
|
"logps/chosen": -104.06226348876953,
|
|
"logps/ref_chosen": -61.789894104003906,
|
|
"logps/ref_rejected": -109.99456787109375,
|
|
"logps/rejected": -206.39671325683594,
|
|
"loss": 0.9962,
|
|
"margin_dpo/margin_mean": 54.12976837158203,
|
|
"margin_dpo/margin_std": 69.66026306152344,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.17914831130690162,
|
|
"fcm_dpo/beta": 0.010763179510831833,
|
|
"fcm_dpo/delta": -0.35151687264442444,
|
|
"fcm_dpo/margin": 67.46307373046875,
|
|
"fcm_dpo/q_t": 0.3405070900917053,
|
|
"grad_norm": 23.621253967285156,
|
|
"learning_rate": 4.911461260693638e-07,
|
|
"logits/chosen": -0.5764192938804626,
|
|
"logits/rejected": -0.5921785831451416,
|
|
"logps/chosen": -85.21223449707031,
|
|
"logps/ref_chosen": -46.9022102355957,
|
|
"logps/ref_rejected": -106.71418762207031,
|
|
"logps/rejected": -212.48728942871094,
|
|
"loss": 0.893,
|
|
"margin_dpo/margin_mean": 67.46307373046875,
|
|
"margin_dpo/margin_std": 66.18208312988281,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18061674008810572,
|
|
"fcm_dpo/beta": 0.010407611727714539,
|
|
"fcm_dpo/delta": -0.06276418268680573,
|
|
"fcm_dpo/margin": 44.099815368652344,
|
|
"fcm_dpo/q_t": 0.4013745188713074,
|
|
"grad_norm": 20.684696197509766,
|
|
"learning_rate": 4.908044411417711e-07,
|
|
"logits/chosen": -0.569983184337616,
|
|
"logits/rejected": -0.5534219741821289,
|
|
"logps/chosen": -103.7418212890625,
|
|
"logps/ref_chosen": -61.33863830566406,
|
|
"logps/ref_rejected": -87.775390625,
|
|
"logps/rejected": -174.2783966064453,
|
|
"loss": 1.1084,
|
|
"margin_dpo/margin_mean": 44.099815368652344,
|
|
"margin_dpo/margin_std": 72.8822021484375,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.18208516886930984,
|
|
"fcm_dpo/beta": 0.009994514286518097,
|
|
"fcm_dpo/delta": -0.26900696754455566,
|
|
"fcm_dpo/margin": 65.09678649902344,
|
|
"fcm_dpo/q_t": 0.36936578154563904,
|
|
"grad_norm": 22.232603073120117,
|
|
"learning_rate": 4.904564107932048e-07,
|
|
"logits/chosen": -0.5371190309524536,
|
|
"logits/rejected": -0.5390757322311401,
|
|
"logps/chosen": -119.59730529785156,
|
|
"logps/ref_chosen": -71.44833374023438,
|
|
"logps/ref_rejected": -117.58056640625,
|
|
"logps/rejected": -230.82635498046875,
|
|
"loss": 1.0148,
|
|
"margin_dpo/margin_mean": 65.09678649902344,
|
|
"margin_dpo/margin_std": 93.31256103515625,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.18355359765051396,
|
|
"fcm_dpo/beta": 0.009659139439463615,
|
|
"fcm_dpo/delta": -0.16824766993522644,
|
|
"fcm_dpo/margin": 57.879905700683594,
|
|
"fcm_dpo/q_t": 0.3799300193786621,
|
|
"grad_norm": 18.988040924072266,
|
|
"learning_rate": 4.90102044194588e-07,
|
|
"logits/chosen": -0.49056124687194824,
|
|
"logits/rejected": -0.4914134740829468,
|
|
"logps/chosen": -90.02660369873047,
|
|
"logps/ref_chosen": -50.136940002441406,
|
|
"logps/ref_rejected": -83.98861694335938,
|
|
"logps/rejected": -181.7581787109375,
|
|
"loss": 1.0264,
|
|
"margin_dpo/margin_mean": 57.879905700683594,
|
|
"margin_dpo/margin_std": 77.51099395751953,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.18502202643171806,
|
|
"fcm_dpo/beta": 0.009380832314491272,
|
|
"fcm_dpo/delta": -0.11235487461090088,
|
|
"fcm_dpo/margin": 53.98036193847656,
|
|
"fcm_dpo/q_t": 0.38861894607543945,
|
|
"grad_norm": 20.407129287719727,
|
|
"learning_rate": 4.897413506838102e-07,
|
|
"logits/chosen": -0.5199521780014038,
|
|
"logits/rejected": -0.514183759689331,
|
|
"logps/chosen": -98.78469848632812,
|
|
"logps/ref_chosen": -55.66706848144531,
|
|
"logps/ref_rejected": -98.1297607421875,
|
|
"logps/rejected": -195.22775268554688,
|
|
"loss": 1.042,
|
|
"margin_dpo/margin_mean": 53.98036193847656,
|
|
"margin_dpo/margin_std": 71.43438720703125,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.18649045521292218,
|
|
"fcm_dpo/beta": 0.009391989558935165,
|
|
"fcm_dpo/delta": 0.029904596507549286,
|
|
"fcm_dpo/margin": 39.52136993408203,
|
|
"fcm_dpo/q_t": 0.4143528640270233,
|
|
"grad_norm": 21.105072021484375,
|
|
"learning_rate": 4.89374339765481e-07,
|
|
"logits/chosen": -0.5297501087188721,
|
|
"logits/rejected": -0.5099810361862183,
|
|
"logps/chosen": -98.52586364746094,
|
|
"logps/ref_chosen": -56.55467987060547,
|
|
"logps/ref_rejected": -76.7957763671875,
|
|
"logps/rejected": -158.288330078125,
|
|
"loss": 1.1295,
|
|
"margin_dpo/margin_mean": 39.52136993408203,
|
|
"margin_dpo/margin_std": 62.1949348449707,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.18795888399412627,
|
|
"fcm_dpo/beta": 0.009450054727494717,
|
|
"fcm_dpo/delta": 0.006840545684099197,
|
|
"fcm_dpo/margin": 41.593753814697266,
|
|
"fcm_dpo/q_t": 0.4126642346382141,
|
|
"grad_norm": 29.35715103149414,
|
|
"learning_rate": 4.890010211106795e-07,
|
|
"logits/chosen": -0.5065322518348694,
|
|
"logits/rejected": -0.484443336725235,
|
|
"logps/chosen": -103.02119445800781,
|
|
"logps/ref_chosen": -58.12095642089844,
|
|
"logps/ref_rejected": -76.43896484375,
|
|
"logps/rejected": -162.93295288085938,
|
|
"loss": 1.1414,
|
|
"margin_dpo/margin_mean": 41.593753814697266,
|
|
"margin_dpo/margin_std": 72.77674865722656,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.1894273127753304,
|
|
"fcm_dpo/beta": 0.009403524920344353,
|
|
"fcm_dpo/delta": -0.013958234339952469,
|
|
"fcm_dpo/margin": 43.95970916748047,
|
|
"fcm_dpo/q_t": 0.4135984778404236,
|
|
"grad_norm": 20.894304275512695,
|
|
"learning_rate": 4.88621404556699e-07,
|
|
"logits/chosen": -0.5332880020141602,
|
|
"logits/rejected": -0.5224489569664001,
|
|
"logps/chosen": -121.43072509765625,
|
|
"logps/ref_chosen": -66.91637420654297,
|
|
"logps/ref_rejected": -96.6422119140625,
|
|
"logps/rejected": -195.11627197265625,
|
|
"loss": 1.1476,
|
|
"margin_dpo/margin_mean": 43.95970916748047,
|
|
"margin_dpo/margin_std": 83.69145202636719,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.19089574155653452,
|
|
"fcm_dpo/beta": 0.009182717651128769,
|
|
"fcm_dpo/delta": -0.22041718661785126,
|
|
"fcm_dpo/margin": 66.11294555664062,
|
|
"fcm_dpo/q_t": 0.36964261531829834,
|
|
"grad_norm": 21.242704391479492,
|
|
"learning_rate": 4.882355001067891e-07,
|
|
"logits/chosen": -0.5126354098320007,
|
|
"logits/rejected": -0.50765061378479,
|
|
"logps/chosen": -85.27062225341797,
|
|
"logps/ref_chosen": -44.66685104370117,
|
|
"logps/ref_rejected": -82.78165435791016,
|
|
"logps/rejected": -189.49838256835938,
|
|
"loss": 0.996,
|
|
"margin_dpo/margin_mean": 66.11294555664062,
|
|
"margin_dpo/margin_std": 80.52020263671875,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.19236417033773862,
|
|
"fcm_dpo/beta": 0.008719469420611858,
|
|
"fcm_dpo/delta": -0.19158010184764862,
|
|
"fcm_dpo/margin": 66.538818359375,
|
|
"fcm_dpo/q_t": 0.36819595098495483,
|
|
"grad_norm": 28.657150268554688,
|
|
"learning_rate": 4.878433179298909e-07,
|
|
"logits/chosen": -0.5120518803596497,
|
|
"logits/rejected": -0.5190242528915405,
|
|
"logps/chosen": -81.01126098632812,
|
|
"logps/ref_chosen": -44.924591064453125,
|
|
"logps/ref_rejected": -88.44401550292969,
|
|
"logps/rejected": -191.06948852539062,
|
|
"loss": 0.9776,
|
|
"margin_dpo/margin_mean": 66.538818359375,
|
|
"margin_dpo/margin_std": 72.21876525878906,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19383259911894274,
|
|
"fcm_dpo/beta": 0.00854223407804966,
|
|
"fcm_dpo/delta": -0.06684187799692154,
|
|
"fcm_dpo/margin": 54.26338577270508,
|
|
"fcm_dpo/q_t": 0.40112632513046265,
|
|
"grad_norm": 20.267854690551758,
|
|
"learning_rate": 4.874448683603694e-07,
|
|
"logits/chosen": -0.5317084789276123,
|
|
"logits/rejected": -0.5302582383155823,
|
|
"logps/chosen": -107.2270278930664,
|
|
"logps/ref_chosen": -59.00108337402344,
|
|
"logps/ref_rejected": -87.89215087890625,
|
|
"logps/rejected": -190.38148498535156,
|
|
"loss": 1.0888,
|
|
"margin_dpo/margin_mean": 54.263389587402344,
|
|
"margin_dpo/margin_std": 85.4618148803711,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.19530102790014683,
|
|
"fcm_dpo/beta": 0.008498817682266235,
|
|
"fcm_dpo/delta": -0.012123266234993935,
|
|
"fcm_dpo/margin": 48.4220085144043,
|
|
"fcm_dpo/q_t": 0.40967467427253723,
|
|
"grad_norm": 26.216642379760742,
|
|
"learning_rate": 4.870401618977415e-07,
|
|
"logits/chosen": -0.5197868347167969,
|
|
"logits/rejected": -0.5063532590866089,
|
|
"logps/chosen": -123.58735656738281,
|
|
"logps/ref_chosen": -66.60449981689453,
|
|
"logps/ref_rejected": -96.33355712890625,
|
|
"logps/rejected": -201.73843383789062,
|
|
"loss": 1.108,
|
|
"margin_dpo/margin_mean": 48.4220085144043,
|
|
"margin_dpo/margin_std": 75.48104858398438,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.19676945668135096,
|
|
"fcm_dpo/beta": 0.008487870916724205,
|
|
"fcm_dpo/delta": -0.0391845665872097,
|
|
"fcm_dpo/margin": 51.5308952331543,
|
|
"fcm_dpo/q_t": 0.4019153118133545,
|
|
"grad_norm": 19.05293846130371,
|
|
"learning_rate": 4.866292092063986e-07,
|
|
"logits/chosen": -0.4650689363479614,
|
|
"logits/rejected": -0.45051348209381104,
|
|
"logps/chosen": -97.44313049316406,
|
|
"logps/ref_chosen": -52.06925582885742,
|
|
"logps/ref_rejected": -87.6545181274414,
|
|
"logps/rejected": -184.55929565429688,
|
|
"loss": 1.0679,
|
|
"margin_dpo/margin_mean": 51.5308952331543,
|
|
"margin_dpo/margin_std": 67.31776428222656,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.19823788546255505,
|
|
"fcm_dpo/beta": 0.00817069597542286,
|
|
"fcm_dpo/delta": -0.21977676451206207,
|
|
"fcm_dpo/margin": 74.21546936035156,
|
|
"fcm_dpo/q_t": 0.3703498840332031,
|
|
"grad_norm": 21.739349365234375,
|
|
"learning_rate": 4.862120211153265e-07,
|
|
"logits/chosen": -0.4731314182281494,
|
|
"logits/rejected": -0.5078834295272827,
|
|
"logps/chosen": -100.25476837158203,
|
|
"logps/ref_chosen": -50.353858947753906,
|
|
"logps/ref_rejected": -115.97975158691406,
|
|
"logps/rejected": -240.0961151123047,
|
|
"loss": 0.9934,
|
|
"margin_dpo/margin_mean": 74.21546936035156,
|
|
"margin_dpo/margin_std": 92.9926528930664,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.19970631424375918,
|
|
"fcm_dpo/beta": 0.00803595595061779,
|
|
"fcm_dpo/delta": 0.0032859407365322113,
|
|
"fcm_dpo/margin": 49.29758834838867,
|
|
"fcm_dpo/q_t": 0.4189005196094513,
|
|
"grad_norm": 20.417558670043945,
|
|
"learning_rate": 4.857886086178193e-07,
|
|
"logits/chosen": -0.5016771554946899,
|
|
"logits/rejected": -0.4939349293708801,
|
|
"logps/chosen": -124.20162963867188,
|
|
"logps/ref_chosen": -65.072509765625,
|
|
"logps/ref_rejected": -96.32122802734375,
|
|
"logps/rejected": -204.7479248046875,
|
|
"loss": 1.1426,
|
|
"margin_dpo/margin_mean": 49.297584533691406,
|
|
"margin_dpo/margin_std": 89.33781433105469,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.2011747430249633,
|
|
"fcm_dpo/beta": 0.007817087695002556,
|
|
"fcm_dpo/delta": -0.2199798822402954,
|
|
"fcm_dpo/margin": 77.56993103027344,
|
|
"fcm_dpo/q_t": 0.376494824886322,
|
|
"grad_norm": 18.184268951416016,
|
|
"learning_rate": 4.853589828711902e-07,
|
|
"logits/chosen": -0.4158422350883484,
|
|
"logits/rejected": -0.44304847717285156,
|
|
"logps/chosen": -105.83258056640625,
|
|
"logps/ref_chosen": -48.759117126464844,
|
|
"logps/ref_rejected": -113.86376953125,
|
|
"logps/rejected": -248.50717163085938,
|
|
"loss": 1.0242,
|
|
"margin_dpo/margin_mean": 77.56993103027344,
|
|
"margin_dpo/margin_std": 111.20265197753906,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.2026431718061674,
|
|
"fcm_dpo/beta": 0.007688170298933983,
|
|
"fcm_dpo/delta": -0.05497686192393303,
|
|
"fcm_dpo/margin": 58.859375,
|
|
"fcm_dpo/q_t": 0.3964359164237976,
|
|
"grad_norm": 21.49945831298828,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": -0.3968755602836609,
|
|
"logits/rejected": -0.38326364755630493,
|
|
"logps/chosen": -120.03235626220703,
|
|
"logps/ref_chosen": -60.519649505615234,
|
|
"logps/ref_rejected": -93.19694519042969,
|
|
"logps/rejected": -211.56903076171875,
|
|
"loss": 1.0554,
|
|
"margin_dpo/margin_mean": 58.859375,
|
|
"margin_dpo/margin_std": 72.39787292480469,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.20411160058737152,
|
|
"fcm_dpo/beta": 0.007515173405408859,
|
|
"fcm_dpo/delta": -0.13694770634174347,
|
|
"fcm_dpo/margin": 70.50794982910156,
|
|
"fcm_dpo/q_t": 0.3826453983783722,
|
|
"grad_norm": 18.61595916748047,
|
|
"learning_rate": 4.844811370781446e-07,
|
|
"logits/chosen": -0.451102077960968,
|
|
"logits/rejected": -0.4422782063484192,
|
|
"logps/chosen": -96.82493591308594,
|
|
"logps/ref_chosen": -46.89138412475586,
|
|
"logps/ref_rejected": -79.72798156738281,
|
|
"logps/rejected": -200.16949462890625,
|
|
"loss": 1.0181,
|
|
"margin_dpo/margin_mean": 70.50794982910156,
|
|
"margin_dpo/margin_std": 87.34086608886719,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.2055800293685756,
|
|
"fcm_dpo/beta": 0.00736122764647007,
|
|
"fcm_dpo/delta": -0.07254733890295029,
|
|
"fcm_dpo/margin": 63.72399139404297,
|
|
"fcm_dpo/q_t": 0.3958495855331421,
|
|
"grad_norm": 22.667436599731445,
|
|
"learning_rate": 4.840329401637809e-07,
|
|
"logits/chosen": -0.4456948935985565,
|
|
"logits/rejected": -0.43251848220825195,
|
|
"logps/chosen": -119.22482299804688,
|
|
"logps/ref_chosen": -58.97471618652344,
|
|
"logps/ref_rejected": -83.28410339355469,
|
|
"logps/rejected": -207.25820922851562,
|
|
"loss": 1.0686,
|
|
"margin_dpo/margin_mean": 63.72399139404297,
|
|
"margin_dpo/margin_std": 89.69435119628906,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.20704845814977973,
|
|
"fcm_dpo/beta": 0.007298792712390423,
|
|
"fcm_dpo/delta": -0.04338546097278595,
|
|
"fcm_dpo/margin": 60.48833465576172,
|
|
"fcm_dpo/q_t": 0.40087050199508667,
|
|
"grad_norm": 27.13235092163086,
|
|
"learning_rate": 4.83578576263792e-07,
|
|
"logits/chosen": -0.4381271302700043,
|
|
"logits/rejected": -0.42595377564430237,
|
|
"logps/chosen": -143.43618774414062,
|
|
"logps/ref_chosen": -75.07566833496094,
|
|
"logps/ref_rejected": -98.1922607421875,
|
|
"logps/rejected": -227.04110717773438,
|
|
"loss": 1.1013,
|
|
"margin_dpo/margin_mean": 60.48833465576172,
|
|
"margin_dpo/margin_std": 94.93978881835938,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.20851688693098386,
|
|
"fcm_dpo/beta": 0.0071950615383684635,
|
|
"fcm_dpo/delta": -0.10405787825584412,
|
|
"fcm_dpo/margin": 69.34352111816406,
|
|
"fcm_dpo/q_t": 0.39234134554862976,
|
|
"grad_norm": 27.40236473083496,
|
|
"learning_rate": 4.83118057351089e-07,
|
|
"logits/chosen": -0.4233921468257904,
|
|
"logits/rejected": -0.4229241907596588,
|
|
"logps/chosen": -127.72467803955078,
|
|
"logps/ref_chosen": -58.027931213378906,
|
|
"logps/ref_rejected": -94.58222961425781,
|
|
"logps/rejected": -233.62249755859375,
|
|
"loss": 1.0828,
|
|
"margin_dpo/margin_mean": 69.34352111816406,
|
|
"margin_dpo/margin_std": 106.0972900390625,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.20998531571218795,
|
|
"fcm_dpo/beta": 0.007188013754785061,
|
|
"fcm_dpo/delta": 0.08534206449985504,
|
|
"fcm_dpo/margin": 44.15364074707031,
|
|
"fcm_dpo/q_t": 0.4322444796562195,
|
|
"grad_norm": 23.33932876586914,
|
|
"learning_rate": 4.826513955607734e-07,
|
|
"logits/chosen": -0.3806174397468567,
|
|
"logits/rejected": -0.37236979603767395,
|
|
"logps/chosen": -131.15016174316406,
|
|
"logps/ref_chosen": -57.59645080566406,
|
|
"logps/ref_rejected": -78.99957275390625,
|
|
"logps/rejected": -196.70692443847656,
|
|
"loss": 1.1967,
|
|
"margin_dpo/margin_mean": 44.15364074707031,
|
|
"margin_dpo/margin_std": 92.67884826660156,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21145374449339208,
|
|
"fcm_dpo/beta": 0.007249427028000355,
|
|
"fcm_dpo/delta": 0.007432065438479185,
|
|
"fcm_dpo/margin": 54.190277099609375,
|
|
"fcm_dpo/q_t": 0.41100186109542847,
|
|
"grad_norm": 20.80966567993164,
|
|
"learning_rate": 4.821786031898176e-07,
|
|
"logits/chosen": -0.4202612638473511,
|
|
"logits/rejected": -0.4080207347869873,
|
|
"logps/chosen": -124.41218566894531,
|
|
"logps/ref_chosen": -59.90636444091797,
|
|
"logps/ref_rejected": -82.00025939941406,
|
|
"logps/rejected": -200.6963653564453,
|
|
"loss": 1.1065,
|
|
"margin_dpo/margin_mean": 54.190277099609375,
|
|
"margin_dpo/margin_std": 78.08949279785156,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21292217327459617,
|
|
"fcm_dpo/beta": 0.007232350297272205,
|
|
"fcm_dpo/delta": -0.020431246608495712,
|
|
"fcm_dpo/margin": 58.010536193847656,
|
|
"fcm_dpo/q_t": 0.40479713678359985,
|
|
"grad_norm": 23.85308837890625,
|
|
"learning_rate": 4.816996926967401e-07,
|
|
"logits/chosen": -0.4434972405433655,
|
|
"logits/rejected": -0.4278140068054199,
|
|
"logps/chosen": -118.61093139648438,
|
|
"logps/ref_chosen": -56.60066604614258,
|
|
"logps/ref_rejected": -77.86631774902344,
|
|
"logps/rejected": -197.88710021972656,
|
|
"loss": 1.0888,
|
|
"margin_dpo/margin_mean": 58.01053237915039,
|
|
"margin_dpo/margin_std": 80.87408447265625,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.2143906020558003,
|
|
"fcm_dpo/beta": 0.007291465997695923,
|
|
"fcm_dpo/delta": 0.07497753947973251,
|
|
"fcm_dpo/margin": 44.91979217529297,
|
|
"fcm_dpo/q_t": 0.4256930947303772,
|
|
"grad_norm": 27.03116226196289,
|
|
"learning_rate": 4.812146767012779e-07,
|
|
"logits/chosen": -0.42746251821517944,
|
|
"logits/rejected": -0.4016566276550293,
|
|
"logps/chosen": -150.37661743164062,
|
|
"logps/ref_chosen": -66.00045013427734,
|
|
"logps/ref_rejected": -81.70278930664062,
|
|
"logps/rejected": -210.99874877929688,
|
|
"loss": 1.1828,
|
|
"margin_dpo/margin_mean": 44.91979217529297,
|
|
"margin_dpo/margin_std": 87.16963195800781,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.21585903083700442,
|
|
"fcm_dpo/beta": 0.0072729880921542645,
|
|
"fcm_dpo/delta": -0.04680505767464638,
|
|
"fcm_dpo/margin": 61.123626708984375,
|
|
"fcm_dpo/q_t": 0.40164506435394287,
|
|
"grad_norm": 19.43584632873535,
|
|
"learning_rate": 4.807235679840536e-07,
|
|
"logits/chosen": -0.44083860516548157,
|
|
"logits/rejected": -0.4199514389038086,
|
|
"logps/chosen": -115.26583862304688,
|
|
"logps/ref_chosen": -53.405487060546875,
|
|
"logps/ref_rejected": -71.39060974121094,
|
|
"logps/rejected": -194.3745880126953,
|
|
"loss": 1.0883,
|
|
"margin_dpo/margin_mean": 61.12362289428711,
|
|
"margin_dpo/margin_std": 90.59841918945312,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2173274596182085,
|
|
"fcm_dpo/beta": 0.007355110719799995,
|
|
"fcm_dpo/delta": 0.012732595205307007,
|
|
"fcm_dpo/margin": 52.45569610595703,
|
|
"fcm_dpo/q_t": 0.41518306732177734,
|
|
"grad_norm": 19.188884735107422,
|
|
"learning_rate": 4.802263794862384e-07,
|
|
"logits/chosen": -0.497209370136261,
|
|
"logits/rejected": -0.4899882674217224,
|
|
"logps/chosen": -125.58383178710938,
|
|
"logps/ref_chosen": -64.93708038330078,
|
|
"logps/ref_rejected": -103.09384155273438,
|
|
"logps/rejected": -216.1962890625,
|
|
"loss": 1.1198,
|
|
"margin_dpo/margin_mean": 52.45569610595703,
|
|
"margin_dpo/margin_std": 76.64443969726562,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.21879588839941264,
|
|
"fcm_dpo/beta": 0.007154976017773151,
|
|
"fcm_dpo/delta": -0.069991335272789,
|
|
"fcm_dpo/margin": 64.97525024414062,
|
|
"fcm_dpo/q_t": 0.3939010500907898,
|
|
"grad_norm": 18.49083709716797,
|
|
"learning_rate": 4.797231243092118e-07,
|
|
"logits/chosen": -0.4779682755470276,
|
|
"logits/rejected": -0.46220314502716064,
|
|
"logps/chosen": -116.61399841308594,
|
|
"logps/ref_chosen": -58.47376251220703,
|
|
"logps/ref_rejected": -99.31474304199219,
|
|
"logps/rejected": -222.43020629882812,
|
|
"loss": 1.0514,
|
|
"margin_dpo/margin_mean": 64.97525024414062,
|
|
"margin_dpo/margin_std": 78.06156921386719,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22026431718061673,
|
|
"fcm_dpo/beta": 0.007078321650624275,
|
|
"fcm_dpo/delta": -0.05432058125734329,
|
|
"fcm_dpo/margin": 63.64070129394531,
|
|
"fcm_dpo/q_t": 0.40325814485549927,
|
|
"grad_norm": 18.106704711914062,
|
|
"learning_rate": 4.792138157142157e-07,
|
|
"logits/chosen": -0.4446331262588501,
|
|
"logits/rejected": -0.4476960301399231,
|
|
"logps/chosen": -96.900146484375,
|
|
"logps/ref_chosen": -45.705810546875,
|
|
"logps/ref_rejected": -83.34759521484375,
|
|
"logps/rejected": -198.18264770507812,
|
|
"loss": 1.0789,
|
|
"margin_dpo/margin_mean": 63.64070129394531,
|
|
"margin_dpo/margin_std": 92.74796295166016,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22173274596182085,
|
|
"fcm_dpo/beta": 0.0070512015372514725,
|
|
"fcm_dpo/delta": -0.053547054529190063,
|
|
"fcm_dpo/margin": 63.97663879394531,
|
|
"fcm_dpo/q_t": 0.3967490792274475,
|
|
"grad_norm": 21.403526306152344,
|
|
"learning_rate": 4.786984671220053e-07,
|
|
"logits/chosen": -0.5293259620666504,
|
|
"logits/rejected": -0.5012092590332031,
|
|
"logps/chosen": -134.41217041015625,
|
|
"logps/ref_chosen": -70.57083129882812,
|
|
"logps/ref_rejected": -100.46382141113281,
|
|
"logps/rejected": -228.28179931640625,
|
|
"loss": 1.0576,
|
|
"margin_dpo/margin_mean": 63.97663879394531,
|
|
"margin_dpo/margin_std": 79.69630432128906,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22320117474302498,
|
|
"fcm_dpo/beta": 0.0069010304287076,
|
|
"fcm_dpo/delta": -0.14396238327026367,
|
|
"fcm_dpo/margin": 77.73429870605469,
|
|
"fcm_dpo/q_t": 0.3808351755142212,
|
|
"grad_norm": 20.531400680541992,
|
|
"learning_rate": 4.78177092112495e-07,
|
|
"logits/chosen": -0.4896411895751953,
|
|
"logits/rejected": -0.48783737421035767,
|
|
"logps/chosen": -116.02652740478516,
|
|
"logps/ref_chosen": -60.16438674926758,
|
|
"logps/ref_rejected": -106.14045715332031,
|
|
"logps/rejected": -239.73690795898438,
|
|
"loss": 1.0136,
|
|
"margin_dpo/margin_mean": 77.73429870605469,
|
|
"margin_dpo/margin_std": 92.09419250488281,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.22466960352422907,
|
|
"fcm_dpo/beta": 0.00679405964910984,
|
|
"fcm_dpo/delta": -0.05286309868097305,
|
|
"fcm_dpo/margin": 66.30891418457031,
|
|
"fcm_dpo/q_t": 0.40227580070495605,
|
|
"grad_norm": 15.813210487365723,
|
|
"learning_rate": 4.776497044244016e-07,
|
|
"logits/chosen": -0.46835315227508545,
|
|
"logits/rejected": -0.4619212746620178,
|
|
"logps/chosen": -112.95548248291016,
|
|
"logps/ref_chosen": -56.315277099609375,
|
|
"logps/ref_rejected": -85.65583801269531,
|
|
"logps/rejected": -208.60496520996094,
|
|
"loss": 1.0864,
|
|
"margin_dpo/margin_mean": 66.30891418457031,
|
|
"margin_dpo/margin_std": 100.04182434082031,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2261380323054332,
|
|
"fcm_dpo/beta": 0.006759101524949074,
|
|
"fcm_dpo/delta": -0.03209718316793442,
|
|
"fcm_dpo/margin": 63.69126510620117,
|
|
"fcm_dpo/q_t": 0.4056595265865326,
|
|
"grad_norm": 19.18117332458496,
|
|
"learning_rate": 4.771163179548808e-07,
|
|
"logits/chosen": -0.4857712984085083,
|
|
"logits/rejected": -0.4887011647224426,
|
|
"logps/chosen": -131.005859375,
|
|
"logps/ref_chosen": -62.74256896972656,
|
|
"logps/ref_rejected": -104.24420166015625,
|
|
"logps/rejected": -236.19874572753906,
|
|
"loss": 1.1182,
|
|
"margin_dpo/margin_mean": 63.691261291503906,
|
|
"margin_dpo/margin_std": 105.09062194824219,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.2276064610866373,
|
|
"fcm_dpo/beta": 0.0067064836621284485,
|
|
"fcm_dpo/delta": -0.031813234090805054,
|
|
"fcm_dpo/margin": 64.17315673828125,
|
|
"fcm_dpo/q_t": 0.4035380482673645,
|
|
"grad_norm": 19.415454864501953,
|
|
"learning_rate": 4.7657694675916247e-07,
|
|
"logits/chosen": -0.49250417947769165,
|
|
"logits/rejected": -0.4741542339324951,
|
|
"logps/chosen": -123.27816772460938,
|
|
"logps/ref_chosen": -60.65318298339844,
|
|
"logps/ref_rejected": -77.49220275878906,
|
|
"logps/rejected": -204.29034423828125,
|
|
"loss": 1.0929,
|
|
"margin_dpo/margin_mean": 64.17315673828125,
|
|
"margin_dpo/margin_std": 94.83734130859375,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2290748898678414,
|
|
"fcm_dpo/beta": 0.006815006956458092,
|
|
"fcm_dpo/delta": 0.15295735001564026,
|
|
"fcm_dpo/margin": 36.849365234375,
|
|
"fcm_dpo/q_t": 0.4437222480773926,
|
|
"grad_norm": 30.32708168029785,
|
|
"learning_rate": 4.7603160505017893e-07,
|
|
"logits/chosen": -0.3995016813278198,
|
|
"logits/rejected": -0.39005500078201294,
|
|
"logps/chosen": -156.82156372070312,
|
|
"logps/ref_chosen": -69.49188232421875,
|
|
"logps/ref_rejected": -77.16929626464844,
|
|
"logps/rejected": -201.34835815429688,
|
|
"loss": 1.2742,
|
|
"margin_dpo/margin_mean": 36.849365234375,
|
|
"margin_dpo/margin_std": 106.61282348632812,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.2305433186490455,
|
|
"fcm_dpo/beta": 0.006692226976156235,
|
|
"fcm_dpo/delta": -0.1519441455602646,
|
|
"fcm_dpo/margin": 81.02685546875,
|
|
"fcm_dpo/q_t": 0.3766096234321594,
|
|
"grad_norm": 24.09219741821289,
|
|
"learning_rate": 4.7548030719819154e-07,
|
|
"logits/chosen": -0.3954240083694458,
|
|
"logits/rejected": -0.402127742767334,
|
|
"logps/chosen": -140.93927001953125,
|
|
"logps/ref_chosen": -61.368438720703125,
|
|
"logps/ref_rejected": -107.64636993408203,
|
|
"logps/rejected": -268.24407958984375,
|
|
"loss": 1.025,
|
|
"margin_dpo/margin_mean": 81.02685546875,
|
|
"margin_dpo/margin_std": 100.71873474121094,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23201174743024963,
|
|
"fcm_dpo/beta": 0.006510759703814983,
|
|
"fcm_dpo/delta": -0.17182058095932007,
|
|
"fcm_dpo/margin": 86.34577178955078,
|
|
"fcm_dpo/q_t": 0.385204017162323,
|
|
"grad_norm": 20.76033592224121,
|
|
"learning_rate": 4.7492306773041136e-07,
|
|
"logits/chosen": -0.38746166229248047,
|
|
"logits/rejected": -0.4051688611507416,
|
|
"logps/chosen": -137.71897888183594,
|
|
"logps/ref_chosen": -57.612918853759766,
|
|
"logps/ref_rejected": -113.6946792602539,
|
|
"logps/rejected": -280.146484375,
|
|
"loss": 1.0522,
|
|
"margin_dpo/margin_mean": 86.34577178955078,
|
|
"margin_dpo/margin_std": 131.83157348632812,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.23348017621145375,
|
|
"fcm_dpo/beta": 0.0064995670691132545,
|
|
"fcm_dpo/delta": 0.0181589238345623,
|
|
"fcm_dpo/margin": 58.781009674072266,
|
|
"fcm_dpo/q_t": 0.41514790058135986,
|
|
"grad_norm": 24.090534210205078,
|
|
"learning_rate": 4.743599013306165e-07,
|
|
"logits/chosen": -0.4077296257019043,
|
|
"logits/rejected": -0.377045214176178,
|
|
"logps/chosen": -171.456298828125,
|
|
"logps/ref_chosen": -81.56034851074219,
|
|
"logps/ref_rejected": -88.89871215820312,
|
|
"logps/rejected": -237.57568359375,
|
|
"loss": 1.1468,
|
|
"margin_dpo/margin_mean": 58.781009674072266,
|
|
"margin_dpo/margin_std": 103.61927795410156,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.23494860499265785,
|
|
"fcm_dpo/beta": 0.006338327657431364,
|
|
"fcm_dpo/delta": -0.11268580704927444,
|
|
"fcm_dpo/margin": 79.83772277832031,
|
|
"fcm_dpo/q_t": 0.39425748586654663,
|
|
"grad_norm": 24.490720748901367,
|
|
"learning_rate": 4.737908228387656e-07,
|
|
"logits/chosen": -0.37448424100875854,
|
|
"logits/rejected": -0.36534446477890015,
|
|
"logps/chosen": -157.87139892578125,
|
|
"logps/ref_chosen": -65.73088073730469,
|
|
"logps/ref_rejected": -97.21781921386719,
|
|
"logps/rejected": -269.196044921875,
|
|
"loss": 1.086,
|
|
"margin_dpo/margin_mean": 79.83772277832031,
|
|
"margin_dpo/margin_std": 128.0322265625,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.23641703377386197,
|
|
"fcm_dpo/beta": 0.006289385724812746,
|
|
"fcm_dpo/delta": -0.04286719858646393,
|
|
"fcm_dpo/margin": 70.11194610595703,
|
|
"fcm_dpo/q_t": 0.4028061032295227,
|
|
"grad_norm": 21.464141845703125,
|
|
"learning_rate": 4.7321584725060594e-07,
|
|
"logits/chosen": -0.38927197456359863,
|
|
"logits/rejected": -0.38858896493911743,
|
|
"logps/chosen": -130.46055603027344,
|
|
"logps/ref_chosen": -52.43647003173828,
|
|
"logps/ref_rejected": -83.43095397949219,
|
|
"logps/rejected": -231.56698608398438,
|
|
"loss": 1.0886,
|
|
"margin_dpo/margin_mean": 70.11195373535156,
|
|
"margin_dpo/margin_std": 103.08557891845703,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.23788546255506607,
|
|
"fcm_dpo/beta": 0.006195507012307644,
|
|
"fcm_dpo/delta": -0.030592873692512512,
|
|
"fcm_dpo/margin": 69.01253509521484,
|
|
"fcm_dpo/q_t": 0.40611007809638977,
|
|
"grad_norm": 22.719524383544922,
|
|
"learning_rate": 4.7263498971727905e-07,
|
|
"logits/chosen": -0.4335670471191406,
|
|
"logits/rejected": -0.4162771999835968,
|
|
"logps/chosen": -137.5789337158203,
|
|
"logps/ref_chosen": -62.6105842590332,
|
|
"logps/ref_rejected": -89.39057922363281,
|
|
"logps/rejected": -233.3714599609375,
|
|
"loss": 1.106,
|
|
"margin_dpo/margin_mean": 69.01253509521484,
|
|
"margin_dpo/margin_std": 106.23509216308594,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.2393538913362702,
|
|
"fcm_dpo/beta": 0.006221453659236431,
|
|
"fcm_dpo/delta": -0.020094340667128563,
|
|
"fcm_dpo/margin": 67.38839721679688,
|
|
"fcm_dpo/q_t": 0.40836769342422485,
|
|
"grad_norm": 20.796031951904297,
|
|
"learning_rate": 4.720482655449212e-07,
|
|
"logits/chosen": -0.35502612590789795,
|
|
"logits/rejected": -0.33497339487075806,
|
|
"logps/chosen": -138.83261108398438,
|
|
"logps/ref_chosen": -55.021629333496094,
|
|
"logps/ref_rejected": -75.418212890625,
|
|
"logps/rejected": -226.61758422851562,
|
|
"loss": 1.1113,
|
|
"margin_dpo/margin_mean": 67.38839721679688,
|
|
"margin_dpo/margin_std": 107.830322265625,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24082232011747431,
|
|
"fcm_dpo/beta": 0.006072811782360077,
|
|
"fcm_dpo/delta": -0.10795401781797409,
|
|
"fcm_dpo/margin": 82.47865295410156,
|
|
"fcm_dpo/q_t": 0.3873969316482544,
|
|
"grad_norm": 21.56767463684082,
|
|
"learning_rate": 4.714556901942599e-07,
|
|
"logits/chosen": -0.36625775694847107,
|
|
"logits/rejected": -0.35121026635169983,
|
|
"logps/chosen": -131.74513244628906,
|
|
"logps/ref_chosen": -55.64066696166992,
|
|
"logps/ref_rejected": -79.66463470458984,
|
|
"logps/rejected": -238.24774169921875,
|
|
"loss": 1.0342,
|
|
"margin_dpo/margin_mean": 82.47865295410156,
|
|
"margin_dpo/margin_std": 101.75411224365234,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2422907488986784,
|
|
"fcm_dpo/beta": 0.006134270690381527,
|
|
"fcm_dpo/delta": 0.07639573514461517,
|
|
"fcm_dpo/margin": 53.168251037597656,
|
|
"fcm_dpo/q_t": 0.42690160870552063,
|
|
"grad_norm": 24.297542572021484,
|
|
"learning_rate": 4.708572792802069e-07,
|
|
"logits/chosen": -0.3836957812309265,
|
|
"logits/rejected": -0.3541428744792938,
|
|
"logps/chosen": -143.4055633544922,
|
|
"logps/ref_chosen": -61.310691833496094,
|
|
"logps/ref_rejected": -73.67060852050781,
|
|
"logps/rejected": -208.93374633789062,
|
|
"loss": 1.1731,
|
|
"margin_dpo/margin_mean": 53.16825866699219,
|
|
"margin_dpo/margin_std": 98.54109191894531,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.24375917767988253,
|
|
"fcm_dpo/beta": 0.005966954864561558,
|
|
"fcm_dpo/delta": -0.19942107796669006,
|
|
"fcm_dpo/margin": 98.27601623535156,
|
|
"fcm_dpo/q_t": 0.37992316484451294,
|
|
"grad_norm": 17.423593521118164,
|
|
"learning_rate": 4.702530485714461e-07,
|
|
"logits/chosen": -0.35796594619750977,
|
|
"logits/rejected": -0.36847564578056335,
|
|
"logps/chosen": -123.2952880859375,
|
|
"logps/ref_chosen": -50.98360061645508,
|
|
"logps/ref_rejected": -98.09512329101562,
|
|
"logps/rejected": -268.682861328125,
|
|
"loss": 1.0159,
|
|
"margin_dpo/margin_mean": 98.27601623535156,
|
|
"margin_dpo/margin_std": 136.08973693847656,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.24522760646108663,
|
|
"fcm_dpo/beta": 0.005754595622420311,
|
|
"fcm_dpo/delta": -0.20378953218460083,
|
|
"fcm_dpo/margin": 102.81171417236328,
|
|
"fcm_dpo/q_t": 0.36769425868988037,
|
|
"grad_norm": 18.515390396118164,
|
|
"learning_rate": 4.6964301399001877e-07,
|
|
"logits/chosen": -0.3561704754829407,
|
|
"logits/rejected": -0.3588705360889435,
|
|
"logps/chosen": -123.58355712890625,
|
|
"logps/ref_chosen": -50.424095153808594,
|
|
"logps/ref_rejected": -96.03042602539062,
|
|
"logps/rejected": -272.0016174316406,
|
|
"loss": 0.9723,
|
|
"margin_dpo/margin_mean": 102.81171417236328,
|
|
"margin_dpo/margin_std": 112.32765197753906,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.24669603524229075,
|
|
"fcm_dpo/beta": 0.005659398622810841,
|
|
"fcm_dpo/delta": -0.030612219125032425,
|
|
"fcm_dpo/margin": 75.83096313476562,
|
|
"fcm_dpo/q_t": 0.40368539094924927,
|
|
"grad_norm": 19.071271896362305,
|
|
"learning_rate": 4.690271916109034e-07,
|
|
"logits/chosen": -0.3632907271385193,
|
|
"logits/rejected": -0.3534013032913208,
|
|
"logps/chosen": -129.71824645996094,
|
|
"logps/ref_chosen": -49.462825775146484,
|
|
"logps/ref_rejected": -75.30855560302734,
|
|
"logps/rejected": -231.39492797851562,
|
|
"loss": 1.0785,
|
|
"margin_dpo/margin_mean": 75.83096313476562,
|
|
"margin_dpo/margin_std": 102.57954406738281,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.24816446402349487,
|
|
"fcm_dpo/beta": 0.00563174020498991,
|
|
"fcm_dpo/delta": 0.021915137767791748,
|
|
"fcm_dpo/margin": 67.0512466430664,
|
|
"fcm_dpo/q_t": 0.41945117712020874,
|
|
"grad_norm": 21.434741973876953,
|
|
"learning_rate": 4.6840559766159235e-07,
|
|
"logits/chosen": -0.3872135281562805,
|
|
"logits/rejected": -0.37111321091651917,
|
|
"logps/chosen": -142.41534423828125,
|
|
"logps/ref_chosen": -59.803443908691406,
|
|
"logps/ref_rejected": -83.34574890136719,
|
|
"logps/rejected": -233.0089111328125,
|
|
"loss": 1.1591,
|
|
"margin_dpo/margin_mean": 67.05125427246094,
|
|
"margin_dpo/margin_std": 125.73867797851562,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.24963289280469897,
|
|
"fcm_dpo/beta": 0.005613743327558041,
|
|
"fcm_dpo/delta": -0.046133168041706085,
|
|
"fcm_dpo/margin": 78.93022918701172,
|
|
"fcm_dpo/q_t": 0.3986685574054718,
|
|
"grad_norm": 17.828161239624023,
|
|
"learning_rate": 4.6777824852166437e-07,
|
|
"logits/chosen": -0.3273963928222656,
|
|
"logits/rejected": -0.31577807664871216,
|
|
"logps/chosen": -122.65813446044922,
|
|
"logps/ref_chosen": -49.471771240234375,
|
|
"logps/ref_rejected": -75.91734313964844,
|
|
"logps/rejected": -228.033935546875,
|
|
"loss": 1.0728,
|
|
"margin_dpo/margin_mean": 78.93023681640625,
|
|
"margin_dpo/margin_std": 103.84635925292969,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2511013215859031,
|
|
"fcm_dpo/beta": 0.005664165131747723,
|
|
"fcm_dpo/delta": 0.037670087069272995,
|
|
"fcm_dpo/margin": 64.21273040771484,
|
|
"fcm_dpo/q_t": 0.4233492612838745,
|
|
"grad_norm": 29.94133186340332,
|
|
"learning_rate": 4.6714516072235273e-07,
|
|
"logits/chosen": -0.3752813935279846,
|
|
"logits/rejected": -0.3576093912124634,
|
|
"logps/chosen": -190.8294677734375,
|
|
"logps/ref_chosen": -84.49931335449219,
|
|
"logps/ref_rejected": -109.38209533691406,
|
|
"logps/rejected": -279.92498779296875,
|
|
"loss": 1.1713,
|
|
"margin_dpo/margin_mean": 64.21273040771484,
|
|
"margin_dpo/margin_std": 129.4434051513672,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2525697503671072,
|
|
"fcm_dpo/beta": 0.005686759948730469,
|
|
"fcm_dpo/delta": 0.019866658374667168,
|
|
"fcm_dpo/margin": 66.97691345214844,
|
|
"fcm_dpo/q_t": 0.4149053692817688,
|
|
"grad_norm": 21.02857780456543,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": -0.39714205265045166,
|
|
"logits/rejected": -0.378519207239151,
|
|
"logps/chosen": -163.3927001953125,
|
|
"logps/ref_chosen": -68.65391540527344,
|
|
"logps/ref_rejected": -85.43667602539062,
|
|
"logps/rejected": -247.15237426757812,
|
|
"loss": 1.1304,
|
|
"margin_dpo/margin_mean": 66.97691345214844,
|
|
"margin_dpo/margin_std": 110.04927062988281,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2540381791483113,
|
|
"fcm_dpo/beta": 0.005732472985982895,
|
|
"fcm_dpo/delta": 0.0141000896692276,
|
|
"fcm_dpo/margin": 67.36067199707031,
|
|
"fcm_dpo/q_t": 0.4125649333000183,
|
|
"grad_norm": 20.02904510498047,
|
|
"learning_rate": 4.6586183602616687e-07,
|
|
"logits/chosen": -0.42190614342689514,
|
|
"logits/rejected": -0.39422181248664856,
|
|
"logps/chosen": -148.86404418945312,
|
|
"logps/ref_chosen": -63.050880432128906,
|
|
"logps/ref_rejected": -78.68392181396484,
|
|
"logps/rejected": -231.85775756835938,
|
|
"loss": 1.1066,
|
|
"margin_dpo/margin_mean": 67.36067199707031,
|
|
"margin_dpo/margin_std": 95.92362976074219,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.2555066079295154,
|
|
"fcm_dpo/beta": 0.005698870867490768,
|
|
"fcm_dpo/delta": -0.06049712002277374,
|
|
"fcm_dpo/margin": 80.25811004638672,
|
|
"fcm_dpo/q_t": 0.39966872334480286,
|
|
"grad_norm": 23.803714752197266,
|
|
"learning_rate": 4.652116329460919e-07,
|
|
"logits/chosen": -0.371025949716568,
|
|
"logits/rejected": -0.3882911801338196,
|
|
"logps/chosen": -135.3501434326172,
|
|
"logps/ref_chosen": -53.36296844482422,
|
|
"logps/ref_rejected": -101.91120910644531,
|
|
"logps/rejected": -264.156494140625,
|
|
"loss": 1.0799,
|
|
"margin_dpo/margin_mean": 80.25811767578125,
|
|
"margin_dpo/margin_std": 114.95230102539062,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.25697503671071953,
|
|
"fcm_dpo/beta": 0.005467164795845747,
|
|
"fcm_dpo/delta": -0.20820683240890503,
|
|
"fcm_dpo/margin": 108.91580200195312,
|
|
"fcm_dpo/q_t": 0.3646426200866699,
|
|
"grad_norm": 27.59210968017578,
|
|
"learning_rate": 4.645557588393406e-07,
|
|
"logits/chosen": -0.3253830671310425,
|
|
"logits/rejected": -0.31133347749710083,
|
|
"logps/chosen": -121.3033447265625,
|
|
"logps/ref_chosen": -45.417762756347656,
|
|
"logps/ref_rejected": -89.50579833984375,
|
|
"logps/rejected": -274.30718994140625,
|
|
"loss": 0.954,
|
|
"margin_dpo/margin_mean": 108.91580200195312,
|
|
"margin_dpo/margin_std": 108.02481079101562,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.25844346549192365,
|
|
"fcm_dpo/beta": 0.005350666120648384,
|
|
"fcm_dpo/delta": -0.09525755047798157,
|
|
"fcm_dpo/margin": 91.70521545410156,
|
|
"fcm_dpo/q_t": 0.3924447298049927,
|
|
"grad_norm": 20.749011993408203,
|
|
"learning_rate": 4.638942309888058e-07,
|
|
"logits/chosen": -0.2885698676109314,
|
|
"logits/rejected": -0.3057425618171692,
|
|
"logps/chosen": -131.7587890625,
|
|
"logps/ref_chosen": -50.452842712402344,
|
|
"logps/ref_rejected": -95.5589599609375,
|
|
"logps/rejected": -268.57012939453125,
|
|
"loss": 1.043,
|
|
"margin_dpo/margin_mean": 91.70521545410156,
|
|
"margin_dpo/margin_std": 118.48809814453125,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2599118942731278,
|
|
"fcm_dpo/beta": 0.005270042456686497,
|
|
"fcm_dpo/delta": -0.07793374359607697,
|
|
"fcm_dpo/margin": 89.99525451660156,
|
|
"fcm_dpo/q_t": 0.3949745297431946,
|
|
"grad_norm": 29.508962631225586,
|
|
"learning_rate": 4.6322706682636137e-07,
|
|
"logits/chosen": -0.3700808584690094,
|
|
"logits/rejected": -0.36038738489151,
|
|
"logps/chosen": -155.28253173828125,
|
|
"logps/ref_chosen": -61.216468811035156,
|
|
"logps/ref_rejected": -95.89378356933594,
|
|
"logps/rejected": -279.955078125,
|
|
"loss": 1.0511,
|
|
"margin_dpo/margin_mean": 89.99525451660156,
|
|
"margin_dpo/margin_std": 117.3301773071289,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.26138032305433184,
|
|
"fcm_dpo/beta": 0.0050674136728048325,
|
|
"fcm_dpo/delta": -0.18977004289627075,
|
|
"fcm_dpo/margin": 114.02528381347656,
|
|
"fcm_dpo/q_t": 0.37513747811317444,
|
|
"grad_norm": 27.016380310058594,
|
|
"learning_rate": 4.6255428393240354e-07,
|
|
"logits/chosen": -0.256081223487854,
|
|
"logits/rejected": -0.24759094417095184,
|
|
"logps/chosen": -162.09182739257812,
|
|
"logps/ref_chosen": -58.26478958129883,
|
|
"logps/ref_rejected": -105.3653335571289,
|
|
"logps/rejected": -323.2176513671875,
|
|
"loss": 0.9993,
|
|
"margin_dpo/margin_mean": 114.02528381347656,
|
|
"margin_dpo/margin_std": 142.46255493164062,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.26284875183553597,
|
|
"fcm_dpo/beta": 0.0050058369524776936,
|
|
"fcm_dpo/delta": -0.010855477303266525,
|
|
"fcm_dpo/margin": 81.88908386230469,
|
|
"fcm_dpo/q_t": 0.409834086894989,
|
|
"grad_norm": 28.862895965576172,
|
|
"learning_rate": 4.6187590003538724e-07,
|
|
"logits/chosen": -0.3041580319404602,
|
|
"logits/rejected": -0.31187134981155396,
|
|
"logps/chosen": -166.816162109375,
|
|
"logps/ref_chosen": -61.05832290649414,
|
|
"logps/ref_rejected": -90.52782440185547,
|
|
"logps/rejected": -278.17474365234375,
|
|
"loss": 1.1298,
|
|
"margin_dpo/margin_mean": 81.88908386230469,
|
|
"margin_dpo/margin_std": 138.5677032470703,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.2643171806167401,
|
|
"fcm_dpo/beta": 0.0049311150796711445,
|
|
"fcm_dpo/delta": -0.1207597553730011,
|
|
"fcm_dpo/margin": 104.31163787841797,
|
|
"fcm_dpo/q_t": 0.3830791115760803,
|
|
"grad_norm": 20.39897918701172,
|
|
"learning_rate": 4.611919330113591e-07,
|
|
"logits/chosen": -0.30937910079956055,
|
|
"logits/rejected": -0.30372101068496704,
|
|
"logps/chosen": -145.08279418945312,
|
|
"logps/ref_chosen": -54.34272003173828,
|
|
"logps/ref_rejected": -98.21183776855469,
|
|
"logps/rejected": -293.2635498046875,
|
|
"loss": 1.0205,
|
|
"margin_dpo/margin_mean": 104.31163787841797,
|
|
"margin_dpo/margin_std": 123.76762390136719,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2657856093979442,
|
|
"fcm_dpo/beta": 0.004971269518136978,
|
|
"fcm_dpo/delta": 0.07836050540208817,
|
|
"fcm_dpo/margin": 65.1822509765625,
|
|
"fcm_dpo/q_t": 0.42635685205459595,
|
|
"grad_norm": 16.958599090576172,
|
|
"learning_rate": 4.605024008834863e-07,
|
|
"logits/chosen": -0.3443525433540344,
|
|
"logits/rejected": -0.3196682631969452,
|
|
"logps/chosen": -133.23472595214844,
|
|
"logps/ref_chosen": -55.000457763671875,
|
|
"logps/ref_rejected": -61.656166076660156,
|
|
"logps/rejected": -205.07269287109375,
|
|
"loss": 1.1641,
|
|
"margin_dpo/margin_mean": 65.1822509765625,
|
|
"margin_dpo/margin_std": 114.510498046875,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.26725403817914833,
|
|
"fcm_dpo/beta": 0.0048531051725149155,
|
|
"fcm_dpo/delta": -0.15253815054893494,
|
|
"fcm_dpo/margin": 111.9610366821289,
|
|
"fcm_dpo/q_t": 0.37664783000946045,
|
|
"grad_norm": 17.655406951904297,
|
|
"learning_rate": 4.598073218215817e-07,
|
|
"logits/chosen": -0.2682988941669464,
|
|
"logits/rejected": -0.27632054686546326,
|
|
"logps/chosen": -116.1263656616211,
|
|
"logps/ref_chosen": -41.107852935791016,
|
|
"logps/ref_rejected": -89.5215835571289,
|
|
"logps/rejected": -276.5011291503906,
|
|
"loss": 1.0094,
|
|
"margin_dpo/margin_mean": 111.96102905273438,
|
|
"margin_dpo/margin_std": 131.97494506835938,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2687224669603524,
|
|
"fcm_dpo/beta": 0.004865183494985104,
|
|
"fcm_dpo/delta": 0.1063179224729538,
|
|
"fcm_dpo/margin": 60.8377685546875,
|
|
"fcm_dpo/q_t": 0.4321151673793793,
|
|
"grad_norm": 19.247581481933594,
|
|
"learning_rate": 4.5910671414162484e-07,
|
|
"logits/chosen": -0.31034791469573975,
|
|
"logits/rejected": -0.29865318536758423,
|
|
"logps/chosen": -168.7334442138672,
|
|
"logps/ref_chosen": -57.52456283569336,
|
|
"logps/ref_rejected": -75.97572326660156,
|
|
"logps/rejected": -248.02236938476562,
|
|
"loss": 1.1792,
|
|
"margin_dpo/margin_mean": 60.8377685546875,
|
|
"margin_dpo/margin_std": 100.23750305175781,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2701908957415565,
|
|
"fcm_dpo/beta": 0.005026969127357006,
|
|
"fcm_dpo/delta": 0.08225920051336288,
|
|
"fcm_dpo/margin": 63.5242919921875,
|
|
"fcm_dpo/q_t": 0.42830824851989746,
|
|
"grad_norm": 17.908655166625977,
|
|
"learning_rate": 4.5840059630527985e-07,
|
|
"logits/chosen": -0.364746630191803,
|
|
"logits/rejected": -0.3552126884460449,
|
|
"logps/chosen": -151.75286865234375,
|
|
"logps/ref_chosen": -58.544952392578125,
|
|
"logps/ref_rejected": -76.63406372070312,
|
|
"logps/rejected": -233.36627197265625,
|
|
"loss": 1.1644,
|
|
"margin_dpo/margin_mean": 63.524295806884766,
|
|
"margin_dpo/margin_std": 108.76426696777344,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.27165932452276065,
|
|
"fcm_dpo/beta": 0.005125709809362888,
|
|
"fcm_dpo/delta": 0.14241455495357513,
|
|
"fcm_dpo/margin": 50.99299240112305,
|
|
"fcm_dpo/q_t": 0.44419747591018677,
|
|
"grad_norm": 20.483118057250977,
|
|
"learning_rate": 4.5768898691940836e-07,
|
|
"logits/chosen": -0.3164641857147217,
|
|
"logits/rejected": -0.291867733001709,
|
|
"logps/chosen": -162.77264404296875,
|
|
"logps/ref_chosen": -62.025848388671875,
|
|
"logps/ref_rejected": -73.7625961303711,
|
|
"logps/rejected": -225.5023956298828,
|
|
"loss": 1.2266,
|
|
"margin_dpo/margin_mean": 50.99299621582031,
|
|
"margin_dpo/margin_std": 119.36555480957031,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.27312775330396477,
|
|
"fcm_dpo/beta": 0.00508568761870265,
|
|
"fcm_dpo/delta": -0.09505677223205566,
|
|
"fcm_dpo/margin": 96.39396667480469,
|
|
"fcm_dpo/q_t": 0.3893883526325226,
|
|
"grad_norm": 28.187034606933594,
|
|
"learning_rate": 4.5697190473557947e-07,
|
|
"logits/chosen": -0.384753942489624,
|
|
"logits/rejected": -0.36420518159866333,
|
|
"logps/chosen": -161.88429260253906,
|
|
"logps/ref_chosen": -69.35346984863281,
|
|
"logps/ref_rejected": -88.07244873046875,
|
|
"logps/rejected": -276.99725341796875,
|
|
"loss": 1.0324,
|
|
"margin_dpo/margin_mean": 96.39396667480469,
|
|
"margin_dpo/margin_std": 115.44207763671875,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.2745961820851689,
|
|
"fcm_dpo/beta": 0.005085780750960112,
|
|
"fcm_dpo/delta": -0.005826789885759354,
|
|
"fcm_dpo/margin": 79.68223571777344,
|
|
"fcm_dpo/q_t": 0.407413512468338,
|
|
"grad_norm": 24.32848358154297,
|
|
"learning_rate": 4.5624936864957555e-07,
|
|
"logits/chosen": -0.3272107243537903,
|
|
"logits/rejected": -0.3207721710205078,
|
|
"logps/chosen": -140.29827880859375,
|
|
"logps/ref_chosen": -52.7564582824707,
|
|
"logps/ref_rejected": -81.96910095214844,
|
|
"logps/rejected": -249.19314575195312,
|
|
"loss": 1.0889,
|
|
"margin_dpo/margin_mean": 79.68223571777344,
|
|
"margin_dpo/margin_std": 104.66482543945312,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.27606461086637296,
|
|
"fcm_dpo/beta": 0.004998504184186459,
|
|
"fcm_dpo/delta": -0.0912085771560669,
|
|
"fcm_dpo/margin": 97.39512634277344,
|
|
"fcm_dpo/q_t": 0.3907063603401184,
|
|
"grad_norm": 30.731496810913086,
|
|
"learning_rate": 4.5552139770089454e-07,
|
|
"logits/chosen": -0.3083413541316986,
|
|
"logits/rejected": -0.313301146030426,
|
|
"logps/chosen": -131.1968994140625,
|
|
"logps/ref_chosen": -49.415489196777344,
|
|
"logps/ref_rejected": -89.54043579101562,
|
|
"logps/rejected": -268.71697998046875,
|
|
"loss": 1.0356,
|
|
"margin_dpo/margin_mean": 97.39511108398438,
|
|
"margin_dpo/margin_std": 118.06564331054688,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2775330396475771,
|
|
"fcm_dpo/beta": 0.0049751270562410355,
|
|
"fcm_dpo/delta": 0.0008535268716514111,
|
|
"fcm_dpo/margin": 80.23283386230469,
|
|
"fcm_dpo/q_t": 0.4126392900943756,
|
|
"grad_norm": 23.215417861938477,
|
|
"learning_rate": 4.5478801107224794e-07,
|
|
"logits/chosen": -0.35090625286102295,
|
|
"logits/rejected": -0.3341342806816101,
|
|
"logps/chosen": -148.1799774169922,
|
|
"logps/ref_chosen": -52.39896011352539,
|
|
"logps/ref_rejected": -72.16735076904297,
|
|
"logps/rejected": -248.1811981201172,
|
|
"loss": 1.1204,
|
|
"margin_dpo/margin_mean": 80.23283386230469,
|
|
"margin_dpo/margin_std": 131.99029541015625,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2790014684287812,
|
|
"fcm_dpo/beta": 0.004961079452186823,
|
|
"fcm_dpo/delta": -0.07475320994853973,
|
|
"fcm_dpo/margin": 94.80111694335938,
|
|
"fcm_dpo/q_t": 0.39625459909439087,
|
|
"grad_norm": 18.946704864501953,
|
|
"learning_rate": 4.5404922808905543e-07,
|
|
"logits/chosen": -0.38340574502944946,
|
|
"logits/rejected": -0.3741362690925598,
|
|
"logps/chosen": -167.19398498535156,
|
|
"logps/ref_chosen": -64.68305969238281,
|
|
"logps/ref_rejected": -102.55052185058594,
|
|
"logps/rejected": -299.862548828125,
|
|
"loss": 1.0765,
|
|
"margin_dpo/margin_mean": 94.80110931396484,
|
|
"margin_dpo/margin_std": 133.4532012939453,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.28046989720998533,
|
|
"fcm_dpo/beta": 0.0046934699639678,
|
|
"fcm_dpo/delta": -0.2578633725643158,
|
|
"fcm_dpo/margin": 136.34432983398438,
|
|
"fcm_dpo/q_t": 0.35979998111724854,
|
|
"grad_norm": 20.45859146118164,
|
|
"learning_rate": 4.5330506821893565e-07,
|
|
"logits/chosen": -0.3379257917404175,
|
|
"logits/rejected": -0.31569015979766846,
|
|
"logps/chosen": -163.58349609375,
|
|
"logps/ref_chosen": -68.65887451171875,
|
|
"logps/ref_rejected": -110.1396713256836,
|
|
"logps/rejected": -341.40863037109375,
|
|
"loss": 0.9469,
|
|
"margin_dpo/margin_mean": 136.34432983398438,
|
|
"margin_dpo/margin_std": 148.54867553710938,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.28193832599118945,
|
|
"fcm_dpo/beta": 0.004647374618798494,
|
|
"fcm_dpo/delta": -0.024048451334238052,
|
|
"fcm_dpo/margin": 91.00912475585938,
|
|
"fcm_dpo/q_t": 0.40724360942840576,
|
|
"grad_norm": 25.17645263671875,
|
|
"learning_rate": 4.5255555107119336e-07,
|
|
"logits/chosen": -0.27830517292022705,
|
|
"logits/rejected": -0.2767331004142761,
|
|
"logps/chosen": -193.28927612304688,
|
|
"logps/ref_chosen": -69.72691345214844,
|
|
"logps/ref_rejected": -103.32135009765625,
|
|
"logps/rejected": -317.892822265625,
|
|
"loss": 1.1082,
|
|
"margin_dpo/margin_mean": 91.00912475585938,
|
|
"margin_dpo/margin_std": 144.29054260253906,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.2834067547723935,
|
|
"fcm_dpo/beta": 0.0047539277002215385,
|
|
"fcm_dpo/delta": 0.14870129525661469,
|
|
"fcm_dpo/margin": 53.44430923461914,
|
|
"fcm_dpo/q_t": 0.4405200481414795,
|
|
"grad_norm": 30.352754592895508,
|
|
"learning_rate": 4.5180069639630236e-07,
|
|
"logits/chosen": -0.3066081404685974,
|
|
"logits/rejected": -0.2960602045059204,
|
|
"logps/chosen": -183.55735778808594,
|
|
"logps/ref_chosen": -60.19049835205078,
|
|
"logps/ref_rejected": -76.40755462646484,
|
|
"logps/rejected": -253.21871948242188,
|
|
"loss": 1.2487,
|
|
"margin_dpo/margin_mean": 53.44430923461914,
|
|
"margin_dpo/margin_std": 137.12416076660156,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.28487518355359764,
|
|
"fcm_dpo/beta": 0.004748090170323849,
|
|
"fcm_dpo/delta": -0.009687615558505058,
|
|
"fcm_dpo/margin": 86.15937805175781,
|
|
"fcm_dpo/q_t": 0.4043833017349243,
|
|
"grad_norm": 17.603483200073242,
|
|
"learning_rate": 4.510405240853854e-07,
|
|
"logits/chosen": -0.23391515016555786,
|
|
"logits/rejected": -0.21777713298797607,
|
|
"logps/chosen": -114.55517578125,
|
|
"logps/ref_chosen": -37.84037399291992,
|
|
"logps/ref_rejected": -60.684783935546875,
|
|
"logps/rejected": -223.5589599609375,
|
|
"loss": 1.07,
|
|
"margin_dpo/margin_mean": 86.15937805175781,
|
|
"margin_dpo/margin_std": 97.66061401367188,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.28634361233480177,
|
|
"fcm_dpo/beta": 0.004725456237792969,
|
|
"fcm_dpo/delta": -0.044938720762729645,
|
|
"fcm_dpo/margin": 93.73036193847656,
|
|
"fcm_dpo/q_t": 0.39892733097076416,
|
|
"grad_norm": 21.71803092956543,
|
|
"learning_rate": 4.5027505416968985e-07,
|
|
"logits/chosen": -0.24051514267921448,
|
|
"logits/rejected": -0.25914958119392395,
|
|
"logps/chosen": -176.97164916992188,
|
|
"logps/ref_chosen": -54.891571044921875,
|
|
"logps/ref_rejected": -96.77095794677734,
|
|
"logps/rejected": -312.5813903808594,
|
|
"loss": 1.0613,
|
|
"margin_dpo/margin_mean": 93.73036193847656,
|
|
"margin_dpo/margin_std": 116.85511016845703,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2878120411160059,
|
|
"fcm_dpo/beta": 0.004625506699085236,
|
|
"fcm_dpo/delta": -0.09405344724655151,
|
|
"fcm_dpo/margin": 105.57457733154297,
|
|
"fcm_dpo/q_t": 0.39183375239372253,
|
|
"grad_norm": 17.44955062866211,
|
|
"learning_rate": 4.495043068200599e-07,
|
|
"logits/chosen": -0.2761349380016327,
|
|
"logits/rejected": -0.25936779379844666,
|
|
"logps/chosen": -148.1903076171875,
|
|
"logps/ref_chosen": -53.245243072509766,
|
|
"logps/ref_rejected": -76.05294799804688,
|
|
"logps/rejected": -276.57257080078125,
|
|
"loss": 1.0531,
|
|
"margin_dpo/margin_mean": 105.57457733154297,
|
|
"margin_dpo/margin_std": 137.7983856201172,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.28928046989721,
|
|
"fcm_dpo/beta": 0.004657561890780926,
|
|
"fcm_dpo/delta": 0.02050638385117054,
|
|
"fcm_dpo/margin": 81.59483337402344,
|
|
"fcm_dpo/q_t": 0.4134349226951599,
|
|
"grad_norm": 51.03510284423828,
|
|
"learning_rate": 4.4872830234640493e-07,
|
|
"logits/chosen": -0.2589200735092163,
|
|
"logits/rejected": -0.252452552318573,
|
|
"logps/chosen": -158.63006591796875,
|
|
"logps/ref_chosen": -60.42033386230469,
|
|
"logps/ref_rejected": -77.20890808105469,
|
|
"logps/rejected": -257.01348876953125,
|
|
"loss": 1.106,
|
|
"margin_dpo/margin_mean": 81.59483337402344,
|
|
"margin_dpo/margin_std": 112.34895324707031,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.2907488986784141,
|
|
"fcm_dpo/beta": 0.004592553712427616,
|
|
"fcm_dpo/delta": -0.08417719602584839,
|
|
"fcm_dpo/margin": 104.55335998535156,
|
|
"fcm_dpo/q_t": 0.39459335803985596,
|
|
"grad_norm": 21.29139518737793,
|
|
"learning_rate": 4.479470611971645e-07,
|
|
"logits/chosen": -0.30762723088264465,
|
|
"logits/rejected": -0.30751878023147583,
|
|
"logps/chosen": -167.06329345703125,
|
|
"logps/ref_chosen": -55.03618621826172,
|
|
"logps/ref_rejected": -97.24325561523438,
|
|
"logps/rejected": -313.82373046875,
|
|
"loss": 1.0534,
|
|
"margin_dpo/margin_mean": 104.5533676147461,
|
|
"margin_dpo/margin_std": 141.0715789794922,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.2922173274596182,
|
|
"fcm_dpo/beta": 0.004494061227887869,
|
|
"fcm_dpo/delta": -0.07228090614080429,
|
|
"fcm_dpo/margin": 104.078369140625,
|
|
"fcm_dpo/q_t": 0.3951132893562317,
|
|
"grad_norm": 22.24437141418457,
|
|
"learning_rate": 4.471606039587695e-07,
|
|
"logits/chosen": -0.3077055811882019,
|
|
"logits/rejected": -0.2900369167327881,
|
|
"logps/chosen": -163.30047607421875,
|
|
"logps/ref_chosen": -56.828826904296875,
|
|
"logps/ref_rejected": -84.64820861816406,
|
|
"logps/rejected": -295.1982116699219,
|
|
"loss": 1.0639,
|
|
"margin_dpo/margin_mean": 104.078369140625,
|
|
"margin_dpo/margin_std": 139.16575622558594,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"fcm_dpo/beta": 0.004447395913302898,
|
|
"fcm_dpo/delta": -0.06333615630865097,
|
|
"fcm_dpo/margin": 103.42639923095703,
|
|
"fcm_dpo/q_t": 0.39867448806762695,
|
|
"grad_norm": 21.961410522460938,
|
|
"learning_rate": 4.4636895135509966e-07,
|
|
"logits/chosen": -0.25170740485191345,
|
|
"logits/rejected": -0.23479950428009033,
|
|
"logps/chosen": -158.19174194335938,
|
|
"logps/ref_chosen": -53.06706237792969,
|
|
"logps/ref_rejected": -80.60843658447266,
|
|
"logps/rejected": -289.1595153808594,
|
|
"loss": 1.0859,
|
|
"margin_dpo/margin_mean": 103.4263916015625,
|
|
"margin_dpo/margin_std": 155.78683471679688,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"eval_fcm_dpo/beta": 0.004448407795280218,
|
|
"eval_logits/chosen": -0.3343876898288727,
|
|
"eval_logits/rejected": -0.32057368755340576,
|
|
"eval_logps/chosen": -222.92990112304688,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -287.1362609863281,
|
|
"eval_loss": 0.6270496845245361,
|
|
"eval_margin_dpo/margin_mean": 56.45948028564453,
|
|
"eval_margin_dpo/margin_std": 146.72439575195312,
|
|
"eval_runtime": 39.2985,
|
|
"eval_samples_per_second": 59.519,
|
|
"eval_steps_per_second": 1.883,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.29515418502202645,
|
|
"fcm_dpo/beta": 0.0044220732524991035,
|
|
"fcm_dpo/delta": -0.05648049712181091,
|
|
"fcm_dpo/margin": 102.65122985839844,
|
|
"fcm_dpo/q_t": 0.39846765995025635,
|
|
"grad_norm": 28.671607971191406,
|
|
"learning_rate": 4.455721242469372e-07,
|
|
"logits/chosen": -0.3493998944759369,
|
|
"logits/rejected": -0.34729981422424316,
|
|
"logps/chosen": -184.7327880859375,
|
|
"logps/ref_chosen": -75.4022216796875,
|
|
"logps/ref_rejected": -114.80821990966797,
|
|
"logps/rejected": -326.7900390625,
|
|
"loss": 1.0774,
|
|
"margin_dpo/margin_mean": 102.65122985839844,
|
|
"margin_dpo/margin_std": 145.4634246826172,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.2966226138032305,
|
|
"fcm_dpo/beta": 0.004456365015357733,
|
|
"fcm_dpo/delta": 0.08349813520908356,
|
|
"fcm_dpo/margin": 71.60739135742188,
|
|
"fcm_dpo/q_t": 0.4296954274177551,
|
|
"grad_norm": 20.462011337280273,
|
|
"learning_rate": 4.4477014363141755e-07,
|
|
"logits/chosen": -0.2886938750743866,
|
|
"logits/rejected": -0.3031277060508728,
|
|
"logps/chosen": -163.66189575195312,
|
|
"logps/ref_chosen": -50.101318359375,
|
|
"logps/ref_rejected": -86.98503112792969,
|
|
"logps/rejected": -272.15301513671875,
|
|
"loss": 1.187,
|
|
"margin_dpo/margin_mean": 71.6073989868164,
|
|
"margin_dpo/margin_std": 143.59751892089844,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.29809104258443464,
|
|
"fcm_dpo/beta": 0.004469073843210936,
|
|
"fcm_dpo/delta": 0.0007087336853146553,
|
|
"fcm_dpo/margin": 89.34852600097656,
|
|
"fcm_dpo/q_t": 0.4091683030128479,
|
|
"grad_norm": 21.502296447753906,
|
|
"learning_rate": 4.439630306414758e-07,
|
|
"logits/chosen": -0.34994345903396606,
|
|
"logits/rejected": -0.34321272373199463,
|
|
"logps/chosen": -173.15109252929688,
|
|
"logps/ref_chosen": -60.60969543457031,
|
|
"logps/ref_rejected": -85.89596557617188,
|
|
"logps/rejected": -287.785888671875,
|
|
"loss": 1.0972,
|
|
"margin_dpo/margin_mean": 89.34852600097656,
|
|
"margin_dpo/margin_std": 124.40196228027344,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.29955947136563876,
|
|
"fcm_dpo/beta": 0.00449190940707922,
|
|
"fcm_dpo/delta": 0.030911792069673538,
|
|
"fcm_dpo/margin": 82.42413330078125,
|
|
"fcm_dpo/q_t": 0.41922539472579956,
|
|
"grad_norm": 25.3990535736084,
|
|
"learning_rate": 4.431508065452897e-07,
|
|
"logits/chosen": -0.3936639428138733,
|
|
"logits/rejected": -0.35490119457244873,
|
|
"logps/chosen": -204.40565490722656,
|
|
"logps/ref_chosen": -80.16496276855469,
|
|
"logps/ref_rejected": -87.69590759277344,
|
|
"logps/rejected": -294.3607177734375,
|
|
"loss": 1.1486,
|
|
"margin_dpo/margin_mean": 82.42413330078125,
|
|
"margin_dpo/margin_std": 145.6385040283203,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.3010279001468429,
|
|
"fcm_dpo/beta": 0.004410895984619856,
|
|
"fcm_dpo/delta": -0.08054696023464203,
|
|
"fcm_dpo/margin": 107.63643646240234,
|
|
"fcm_dpo/q_t": 0.3914853632450104,
|
|
"grad_norm": 22.971195220947266,
|
|
"learning_rate": 4.4233349274571974e-07,
|
|
"logits/chosen": -0.3133258819580078,
|
|
"logits/rejected": -0.2844025492668152,
|
|
"logps/chosen": -178.8911895751953,
|
|
"logps/ref_chosen": -59.384735107421875,
|
|
"logps/ref_rejected": -85.12505340576172,
|
|
"logps/rejected": -312.2679443359375,
|
|
"loss": 1.0581,
|
|
"margin_dpo/margin_mean": 107.63642883300781,
|
|
"margin_dpo/margin_std": 137.53057861328125,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.302496328928047,
|
|
"fcm_dpo/beta": 0.004336735233664513,
|
|
"fcm_dpo/delta": -0.1181631088256836,
|
|
"fcm_dpo/margin": 117.94447326660156,
|
|
"fcm_dpo/q_t": 0.38123589754104614,
|
|
"grad_norm": 24.863079071044922,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": -0.23951232433319092,
|
|
"logits/rejected": -0.2434745579957962,
|
|
"logps/chosen": -153.0740203857422,
|
|
"logps/ref_chosen": -46.964500427246094,
|
|
"logps/ref_rejected": -98.9534912109375,
|
|
"logps/rejected": -323.00750732421875,
|
|
"loss": 1.0098,
|
|
"margin_dpo/margin_mean": 117.94447326660156,
|
|
"margin_dpo/margin_std": 127.08613586425781,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3039647577092511,
|
|
"fcm_dpo/beta": 0.004215326625853777,
|
|
"fcm_dpo/delta": -0.1830846071243286,
|
|
"fcm_dpo/margin": 135.88345336914062,
|
|
"fcm_dpo/q_t": 0.3738357424736023,
|
|
"grad_norm": 23.037128448486328,
|
|
"learning_rate": 4.4068368231789365e-07,
|
|
"logits/chosen": -0.36506402492523193,
|
|
"logits/rejected": -0.34065380692481995,
|
|
"logps/chosen": -152.6772003173828,
|
|
"logps/ref_chosen": -56.05625915527344,
|
|
"logps/ref_rejected": -84.44779968261719,
|
|
"logps/rejected": -316.95220947265625,
|
|
"loss": 0.9863,
|
|
"margin_dpo/margin_mean": 135.88345336914062,
|
|
"margin_dpo/margin_std": 156.41925048828125,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3054331864904552,
|
|
"fcm_dpo/beta": 0.004147009924054146,
|
|
"fcm_dpo/delta": -0.024765145033597946,
|
|
"fcm_dpo/margin": 102.14737701416016,
|
|
"fcm_dpo/q_t": 0.40402868390083313,
|
|
"grad_norm": 24.734445571899414,
|
|
"learning_rate": 4.398512291636768e-07,
|
|
"logits/chosen": -0.3294498026371002,
|
|
"logits/rejected": -0.31117957830429077,
|
|
"logps/chosen": -219.45916748046875,
|
|
"logps/ref_chosen": -67.06761169433594,
|
|
"logps/ref_rejected": -94.28689575195312,
|
|
"logps/rejected": -348.8258361816406,
|
|
"loss": 1.1027,
|
|
"margin_dpo/margin_mean": 102.14736938476562,
|
|
"margin_dpo/margin_std": 155.35968017578125,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.3069016152716593,
|
|
"fcm_dpo/beta": 0.004163610748946667,
|
|
"fcm_dpo/delta": 0.01959494687616825,
|
|
"fcm_dpo/margin": 91.5401382446289,
|
|
"fcm_dpo/q_t": 0.4133692979812622,
|
|
"grad_norm": 29.469234466552734,
|
|
"learning_rate": 4.3901377325300857e-07,
|
|
"logits/chosen": -0.2552475929260254,
|
|
"logits/rejected": -0.24348849058151245,
|
|
"logps/chosen": -184.3878631591797,
|
|
"logps/ref_chosen": -56.18169403076172,
|
|
"logps/ref_rejected": -80.94152069091797,
|
|
"logps/rejected": -300.68780517578125,
|
|
"loss": 1.1275,
|
|
"margin_dpo/margin_mean": 91.54013061523438,
|
|
"margin_dpo/margin_std": 143.59213256835938,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.30837004405286345,
|
|
"fcm_dpo/beta": 0.0041510555893182755,
|
|
"fcm_dpo/delta": -0.04974536970257759,
|
|
"fcm_dpo/margin": 107.78252410888672,
|
|
"fcm_dpo/q_t": 0.39923179149627686,
|
|
"grad_norm": 23.570310592651367,
|
|
"learning_rate": 4.381713366536311e-07,
|
|
"logits/chosen": -0.2864752411842346,
|
|
"logits/rejected": -0.2775830030441284,
|
|
"logps/chosen": -160.75347900390625,
|
|
"logps/ref_chosen": -46.371822357177734,
|
|
"logps/ref_rejected": -76.68162536621094,
|
|
"logps/rejected": -298.84576416015625,
|
|
"loss": 1.0692,
|
|
"margin_dpo/margin_mean": 107.78252410888672,
|
|
"margin_dpo/margin_std": 142.31451416015625,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.30983847283406757,
|
|
"fcm_dpo/beta": 0.004142909776419401,
|
|
"fcm_dpo/delta": 0.02150268293917179,
|
|
"fcm_dpo/margin": 91.54325866699219,
|
|
"fcm_dpo/q_t": 0.41826772689819336,
|
|
"grad_norm": 31.811269760131836,
|
|
"learning_rate": 4.373239415645323e-07,
|
|
"logits/chosen": -0.2951120138168335,
|
|
"logits/rejected": -0.25315576791763306,
|
|
"logps/chosen": -244.23867797851562,
|
|
"logps/ref_chosen": -78.93235778808594,
|
|
"logps/ref_rejected": -86.82098388671875,
|
|
"logps/rejected": -343.6705627441406,
|
|
"loss": 1.1421,
|
|
"margin_dpo/margin_mean": 91.54325103759766,
|
|
"margin_dpo/margin_std": 157.68630981445312,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.31130690161527164,
|
|
"fcm_dpo/beta": 0.004025098867714405,
|
|
"fcm_dpo/delta": -0.14305897057056427,
|
|
"fcm_dpo/margin": 132.50680541992188,
|
|
"fcm_dpo/q_t": 0.38068991899490356,
|
|
"grad_norm": 27.229854583740234,
|
|
"learning_rate": 4.3647161031536086e-07,
|
|
"logits/chosen": -0.2998214364051819,
|
|
"logits/rejected": -0.2908783257007599,
|
|
"logps/chosen": -194.6740264892578,
|
|
"logps/ref_chosen": -58.19701385498047,
|
|
"logps/ref_rejected": -103.05785369873047,
|
|
"logps/rejected": -372.04168701171875,
|
|
"loss": 1.0273,
|
|
"margin_dpo/margin_mean": 132.50680541992188,
|
|
"margin_dpo/margin_std": 162.19839477539062,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.31277533039647576,
|
|
"fcm_dpo/beta": 0.0039492822252213955,
|
|
"fcm_dpo/delta": -0.1063137948513031,
|
|
"fcm_dpo/margin": 126.71992492675781,
|
|
"fcm_dpo/q_t": 0.3866674602031708,
|
|
"grad_norm": 29.196735382080078,
|
|
"learning_rate": 4.3561436536583774e-07,
|
|
"logits/chosen": -0.27108436822891235,
|
|
"logits/rejected": -0.24033893644809723,
|
|
"logps/chosen": -197.20233154296875,
|
|
"logps/ref_chosen": -67.51271057128906,
|
|
"logps/ref_rejected": -93.91471862792969,
|
|
"logps/rejected": -350.32427978515625,
|
|
"loss": 1.0335,
|
|
"margin_dpo/margin_mean": 126.71992492675781,
|
|
"margin_dpo/margin_std": 155.05508422851562,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3142437591776799,
|
|
"fcm_dpo/beta": 0.0039157988503575325,
|
|
"fcm_dpo/delta": -0.043942950665950775,
|
|
"fcm_dpo/margin": 112.87818145751953,
|
|
"fcm_dpo/q_t": 0.4002673327922821,
|
|
"grad_norm": 23.29557991027832,
|
|
"learning_rate": 4.3475222930516473e-07,
|
|
"logits/chosen": -0.2142457664012909,
|
|
"logits/rejected": -0.2187519669532776,
|
|
"logps/chosen": -156.87863159179688,
|
|
"logps/ref_chosen": -41.604888916015625,
|
|
"logps/ref_rejected": -77.51741027832031,
|
|
"logps/rejected": -305.66937255859375,
|
|
"loss": 1.0694,
|
|
"margin_dpo/margin_mean": 112.87818908691406,
|
|
"margin_dpo/margin_std": 148.09786987304688,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.315712187958884,
|
|
"fcm_dpo/beta": 0.003857589792460203,
|
|
"fcm_dpo/delta": -0.06539718061685562,
|
|
"fcm_dpo/margin": 119.77651977539062,
|
|
"fcm_dpo/q_t": 0.39416319131851196,
|
|
"grad_norm": 27.0275821685791,
|
|
"learning_rate": 4.3388522485142885e-07,
|
|
"logits/chosen": -0.2805659770965576,
|
|
"logits/rejected": -0.2722279727458954,
|
|
"logps/chosen": -188.13607788085938,
|
|
"logps/ref_chosen": -53.279266357421875,
|
|
"logps/ref_rejected": -89.96464538574219,
|
|
"logps/rejected": -344.59796142578125,
|
|
"loss": 1.0434,
|
|
"margin_dpo/margin_mean": 119.77651977539062,
|
|
"margin_dpo/margin_std": 138.7313232421875,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.31718061674008813,
|
|
"fcm_dpo/beta": 0.0038378096651285887,
|
|
"fcm_dpo/delta": -0.05455287545919418,
|
|
"fcm_dpo/margin": 117.78301239013672,
|
|
"fcm_dpo/q_t": 0.39905649423599243,
|
|
"grad_norm": 26.12622833251953,
|
|
"learning_rate": 4.330133748510036e-07,
|
|
"logits/chosen": -0.2915078401565552,
|
|
"logits/rejected": -0.27635902166366577,
|
|
"logps/chosen": -185.41506958007812,
|
|
"logps/ref_chosen": -48.887794494628906,
|
|
"logps/ref_rejected": -77.19892883300781,
|
|
"logps/rejected": -331.50921630859375,
|
|
"loss": 1.08,
|
|
"margin_dpo/margin_mean": 117.78302001953125,
|
|
"margin_dpo/margin_std": 167.56219482421875,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.3186490455212922,
|
|
"fcm_dpo/beta": 0.003737176302820444,
|
|
"fcm_dpo/delta": -0.12201692909002304,
|
|
"fcm_dpo/margin": 137.91058349609375,
|
|
"fcm_dpo/q_t": 0.383506178855896,
|
|
"grad_norm": 20.442758560180664,
|
|
"learning_rate": 4.3213670227794757e-07,
|
|
"logits/chosen": -0.26202258467674255,
|
|
"logits/rejected": -0.25628846883773804,
|
|
"logps/chosen": -187.60577392578125,
|
|
"logps/ref_chosen": -49.845306396484375,
|
|
"logps/ref_rejected": -100.07832336425781,
|
|
"logps/rejected": -375.7493896484375,
|
|
"loss": 1.0117,
|
|
"margin_dpo/margin_mean": 137.91058349609375,
|
|
"margin_dpo/margin_std": 156.00509643554688,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3201174743024963,
|
|
"fcm_dpo/beta": 0.003717987798154354,
|
|
"fcm_dpo/delta": 0.004697195254266262,
|
|
"fcm_dpo/margin": 106.35856628417969,
|
|
"fcm_dpo/q_t": 0.41124439239501953,
|
|
"grad_norm": 21.640470504760742,
|
|
"learning_rate": 4.3125523023339815e-07,
|
|
"logits/chosen": -0.29033294320106506,
|
|
"logits/rejected": -0.2837512791156769,
|
|
"logps/chosen": -200.13418579101562,
|
|
"logps/ref_chosen": -58.576683044433594,
|
|
"logps/ref_rejected": -87.84639739990234,
|
|
"logps/rejected": -335.762451171875,
|
|
"loss": 1.1106,
|
|
"margin_dpo/margin_mean": 106.35856628417969,
|
|
"margin_dpo/margin_std": 158.380859375,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.32158590308370044,
|
|
"fcm_dpo/beta": 0.0037690873723477125,
|
|
"fcm_dpo/delta": 0.04747757688164711,
|
|
"fcm_dpo/margin": 93.80690002441406,
|
|
"fcm_dpo/q_t": 0.42123115062713623,
|
|
"grad_norm": 26.6854305267334,
|
|
"learning_rate": 4.303689819449636e-07,
|
|
"logits/chosen": -0.3261488974094391,
|
|
"logits/rejected": -0.318808376789093,
|
|
"logps/chosen": -212.6107635498047,
|
|
"logps/ref_chosen": -61.083858489990234,
|
|
"logps/ref_rejected": -85.83042907714844,
|
|
"logps/rejected": -331.16424560546875,
|
|
"loss": 1.1637,
|
|
"margin_dpo/margin_mean": 93.80690002441406,
|
|
"margin_dpo/margin_std": 173.55465698242188,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.32305433186490456,
|
|
"fcm_dpo/beta": 0.003811020404100418,
|
|
"fcm_dpo/delta": 0.1028871089220047,
|
|
"fcm_dpo/margin": 78.8044662475586,
|
|
"fcm_dpo/q_t": 0.42963457107543945,
|
|
"grad_norm": 24.080263137817383,
|
|
"learning_rate": 4.2947798076611047e-07,
|
|
"logits/chosen": -0.29265105724334717,
|
|
"logits/rejected": -0.26879560947418213,
|
|
"logps/chosen": -240.15542602539062,
|
|
"logps/ref_chosen": -70.03128051757812,
|
|
"logps/ref_rejected": -87.68551635742188,
|
|
"logps/rejected": -336.6141357421875,
|
|
"loss": 1.1611,
|
|
"margin_dpo/margin_mean": 78.8044662475586,
|
|
"margin_dpo/margin_std": 122.5282974243164,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3245227606461087,
|
|
"fcm_dpo/beta": 0.003719739615917206,
|
|
"fcm_dpo/delta": -0.23312455415725708,
|
|
"fcm_dpo/margin": 166.45452880859375,
|
|
"fcm_dpo/q_t": 0.35831981897354126,
|
|
"grad_norm": 29.536046981811523,
|
|
"learning_rate": 4.285822501755485e-07,
|
|
"logits/chosen": -0.2684306204319,
|
|
"logits/rejected": -0.27492430806159973,
|
|
"logps/chosen": -199.0157470703125,
|
|
"logps/ref_chosen": -52.15470886230469,
|
|
"logps/ref_rejected": -106.46768188476562,
|
|
"logps/rejected": -419.7832336425781,
|
|
"loss": 0.9386,
|
|
"margin_dpo/margin_mean": 166.45452880859375,
|
|
"margin_dpo/margin_std": 155.8255157470703,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.32599118942731276,
|
|
"fcm_dpo/beta": 0.003646267345175147,
|
|
"fcm_dpo/delta": -0.06051616743206978,
|
|
"fcm_dpo/margin": 125.51266479492188,
|
|
"fcm_dpo/q_t": 0.3957686424255371,
|
|
"grad_norm": 22.21294403076172,
|
|
"learning_rate": 4.276818137766118e-07,
|
|
"logits/chosen": -0.32068097591400146,
|
|
"logits/rejected": -0.3213370442390442,
|
|
"logps/chosen": -214.42459106445312,
|
|
"logps/ref_chosen": -60.971099853515625,
|
|
"logps/ref_rejected": -100.00115203857422,
|
|
"logps/rejected": -378.9673156738281,
|
|
"loss": 1.056,
|
|
"margin_dpo/margin_mean": 125.51266479492188,
|
|
"margin_dpo/margin_std": 156.2869110107422,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3274596182085169,
|
|
"fcm_dpo/beta": 0.0036216324660927057,
|
|
"fcm_dpo/delta": 0.012630530633032322,
|
|
"fcm_dpo/margin": 107.08901977539062,
|
|
"fcm_dpo/q_t": 0.412469744682312,
|
|
"grad_norm": 29.19846534729004,
|
|
"learning_rate": 4.2677669529663686e-07,
|
|
"logits/chosen": -0.23973864316940308,
|
|
"logits/rejected": -0.23426464200019836,
|
|
"logps/chosen": -216.11801147460938,
|
|
"logps/ref_chosen": -52.64057540893555,
|
|
"logps/ref_rejected": -82.82502746582031,
|
|
"logps/rejected": -353.3914794921875,
|
|
"loss": 1.1276,
|
|
"margin_dpo/margin_mean": 107.08901977539062,
|
|
"margin_dpo/margin_std": 174.32290649414062,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.328928046989721,
|
|
"fcm_dpo/beta": 0.0035798242315649986,
|
|
"fcm_dpo/delta": -0.04205327853560448,
|
|
"fcm_dpo/margin": 122.58447265625,
|
|
"fcm_dpo/q_t": 0.4028151333332062,
|
|
"grad_norm": 26.769624710083008,
|
|
"learning_rate": 4.2586691858633747e-07,
|
|
"logits/chosen": -0.291404664516449,
|
|
"logits/rejected": -0.27433890104293823,
|
|
"logps/chosen": -191.07574462890625,
|
|
"logps/ref_chosen": -48.59541320800781,
|
|
"logps/ref_rejected": -77.11648559570312,
|
|
"logps/rejected": -342.1812744140625,
|
|
"loss": 1.0876,
|
|
"margin_dpo/margin_mean": 122.58447265625,
|
|
"margin_dpo/margin_std": 175.91018676757812,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3303964757709251,
|
|
"fcm_dpo/beta": 0.0035359251778572798,
|
|
"fcm_dpo/delta": -0.11218781769275665,
|
|
"fcm_dpo/margin": 143.213134765625,
|
|
"fcm_dpo/q_t": 0.3858293294906616,
|
|
"grad_norm": 22.56654167175293,
|
|
"learning_rate": 4.249525076191759e-07,
|
|
"logits/chosen": -0.32044434547424316,
|
|
"logits/rejected": -0.312914103269577,
|
|
"logps/chosen": -222.03887939453125,
|
|
"logps/ref_chosen": -58.000465393066406,
|
|
"logps/ref_rejected": -99.90291595458984,
|
|
"logps/rejected": -407.15447998046875,
|
|
"loss": 1.0315,
|
|
"margin_dpo/margin_mean": 143.213134765625,
|
|
"margin_dpo/margin_std": 177.5218048095703,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.33186490455212925,
|
|
"fcm_dpo/beta": 0.003493384225293994,
|
|
"fcm_dpo/delta": -0.009331781417131424,
|
|
"fcm_dpo/margin": 116.85462188720703,
|
|
"fcm_dpo/q_t": 0.4088728427886963,
|
|
"grad_norm": 29.890501022338867,
|
|
"learning_rate": 4.2403348649073167e-07,
|
|
"logits/chosen": -0.3462528586387634,
|
|
"logits/rejected": -0.30755919218063354,
|
|
"logps/chosen": -192.7152099609375,
|
|
"logps/ref_chosen": -58.898799896240234,
|
|
"logps/ref_rejected": -78.68775939941406,
|
|
"logps/rejected": -329.3587646484375,
|
|
"loss": 1.0972,
|
|
"margin_dpo/margin_mean": 116.85462951660156,
|
|
"margin_dpo/margin_std": 164.87612915039062,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"fcm_dpo/beta": 0.0034475913271307945,
|
|
"fcm_dpo/delta": -0.09939224272966385,
|
|
"fcm_dpo/margin": 143.24203491210938,
|
|
"fcm_dpo/q_t": 0.3873962163925171,
|
|
"grad_norm": 25.0889949798584,
|
|
"learning_rate": 4.2310987941806615e-07,
|
|
"logits/chosen": -0.32977473735809326,
|
|
"logits/rejected": -0.31640344858169556,
|
|
"logps/chosen": -220.12014770507812,
|
|
"logps/ref_chosen": -59.072181701660156,
|
|
"logps/ref_rejected": -99.41236877441406,
|
|
"logps/rejected": -403.7023620605469,
|
|
"loss": 1.0294,
|
|
"margin_dpo/margin_mean": 143.24203491210938,
|
|
"margin_dpo/margin_std": 169.32302856445312,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.33480176211453744,
|
|
"fcm_dpo/beta": 0.003472366835922003,
|
|
"fcm_dpo/delta": 0.04667496308684349,
|
|
"fcm_dpo/margin": 102.19868469238281,
|
|
"fcm_dpo/q_t": 0.4186497926712036,
|
|
"grad_norm": 24.252988815307617,
|
|
"learning_rate": 4.2218171073908463e-07,
|
|
"logits/chosen": -0.33692067861557007,
|
|
"logits/rejected": -0.32044440507888794,
|
|
"logps/chosen": -228.12362670898438,
|
|
"logps/ref_chosen": -65.89128875732422,
|
|
"logps/ref_rejected": -91.04875183105469,
|
|
"logps/rejected": -355.4797668457031,
|
|
"loss": 1.1385,
|
|
"margin_dpo/margin_mean": 102.19868469238281,
|
|
"margin_dpo/margin_std": 163.4045867919922,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.33627019089574156,
|
|
"fcm_dpo/beta": 0.003483015578240156,
|
|
"fcm_dpo/delta": 0.010966208763420582,
|
|
"fcm_dpo/margin": 111.81390380859375,
|
|
"fcm_dpo/q_t": 0.41027718782424927,
|
|
"grad_norm": 29.11672592163086,
|
|
"learning_rate": 4.212490049118951e-07,
|
|
"logits/chosen": -0.4054703414440155,
|
|
"logits/rejected": -0.37607651948928833,
|
|
"logps/chosen": -232.11636352539062,
|
|
"logps/ref_chosen": -70.70637512207031,
|
|
"logps/ref_rejected": -84.52741241455078,
|
|
"logps/rejected": -357.75128173828125,
|
|
"loss": 1.1063,
|
|
"margin_dpo/margin_mean": 111.81390380859375,
|
|
"margin_dpo/margin_std": 160.0384063720703,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3377386196769457,
|
|
"fcm_dpo/beta": 0.003395712934434414,
|
|
"fcm_dpo/delta": -0.15949219465255737,
|
|
"fcm_dpo/margin": 162.08526611328125,
|
|
"fcm_dpo/q_t": 0.37249940633773804,
|
|
"grad_norm": 26.661436080932617,
|
|
"learning_rate": 4.203117865141635e-07,
|
|
"logits/chosen": -0.30987975001335144,
|
|
"logits/rejected": -0.3130999505519867,
|
|
"logps/chosen": -165.25253295898438,
|
|
"logps/ref_chosen": -39.282005310058594,
|
|
"logps/ref_rejected": -85.62191009521484,
|
|
"logps/rejected": -373.6777038574219,
|
|
"loss": 0.9778,
|
|
"margin_dpo/margin_mean": 162.08526611328125,
|
|
"margin_dpo/margin_std": 160.76185607910156,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3392070484581498,
|
|
"fcm_dpo/beta": 0.003384451847523451,
|
|
"fcm_dpo/delta": 0.020527083426713943,
|
|
"fcm_dpo/margin": 112.35234069824219,
|
|
"fcm_dpo/q_t": 0.4136514365673065,
|
|
"grad_norm": 32.04772186279297,
|
|
"learning_rate": 4.1937008024246625e-07,
|
|
"logits/chosen": -0.37022462487220764,
|
|
"logits/rejected": -0.3420015573501587,
|
|
"logps/chosen": -207.71981811523438,
|
|
"logps/ref_chosen": -63.27644348144531,
|
|
"logps/ref_rejected": -74.1239013671875,
|
|
"logps/rejected": -330.91961669921875,
|
|
"loss": 1.1013,
|
|
"margin_dpo/margin_mean": 112.35232543945312,
|
|
"margin_dpo/margin_std": 149.82984924316406,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3406754772393539,
|
|
"fcm_dpo/beta": 0.0034229401499032974,
|
|
"fcm_dpo/delta": 0.08551573753356934,
|
|
"fcm_dpo/margin": 92.69304656982422,
|
|
"fcm_dpo/q_t": 0.42926985025405884,
|
|
"grad_norm": 22.970428466796875,
|
|
"learning_rate": 4.1842391091163933e-07,
|
|
"logits/chosen": -0.3794689476490021,
|
|
"logits/rejected": -0.3578903377056122,
|
|
"logps/chosen": -247.9998321533203,
|
|
"logps/ref_chosen": -70.74876403808594,
|
|
"logps/ref_rejected": -83.97706604003906,
|
|
"logps/rejected": -353.92120361328125,
|
|
"loss": 1.1598,
|
|
"margin_dpo/margin_mean": 92.69305419921875,
|
|
"margin_dpo/margin_std": 156.01400756835938,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.342143906020558,
|
|
"fcm_dpo/beta": 0.003400879679247737,
|
|
"fcm_dpo/delta": -0.09279187768697739,
|
|
"fcm_dpo/margin": 143.58230590820312,
|
|
"fcm_dpo/q_t": 0.3939361870288849,
|
|
"grad_norm": 25.750471115112305,
|
|
"learning_rate": 4.174733034541245e-07,
|
|
"logits/chosen": -0.37290647625923157,
|
|
"logits/rejected": -0.3758310079574585,
|
|
"logps/chosen": -220.12417602539062,
|
|
"logps/ref_chosen": -54.8829345703125,
|
|
"logps/ref_rejected": -107.4800796508789,
|
|
"logps/rejected": -416.30364990234375,
|
|
"loss": 1.0704,
|
|
"margin_dpo/margin_mean": 143.5823211669922,
|
|
"margin_dpo/margin_std": 209.883056640625,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.3436123348017621,
|
|
"fcm_dpo/beta": 0.0033045965246856213,
|
|
"fcm_dpo/delta": -0.10699286311864853,
|
|
"fcm_dpo/margin": 151.24588012695312,
|
|
"fcm_dpo/q_t": 0.38530829548835754,
|
|
"grad_norm": 28.405414581298828,
|
|
"learning_rate": 4.165182829193126e-07,
|
|
"logits/chosen": -0.36269479990005493,
|
|
"logits/rejected": -0.3897593021392822,
|
|
"logps/chosen": -197.34793090820312,
|
|
"logps/ref_chosen": -44.094520568847656,
|
|
"logps/ref_rejected": -100.00663757324219,
|
|
"logps/rejected": -404.50592041015625,
|
|
"loss": 1.0153,
|
|
"margin_dpo/margin_mean": 151.24588012695312,
|
|
"margin_dpo/margin_std": 159.92330932617188,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.34508076358296624,
|
|
"fcm_dpo/beta": 0.003356143133714795,
|
|
"fcm_dpo/delta": 0.07040451467037201,
|
|
"fcm_dpo/margin": 98.78762817382812,
|
|
"fcm_dpo/q_t": 0.4242765009403229,
|
|
"grad_norm": 24.10730743408203,
|
|
"learning_rate": 4.1555887447288255e-07,
|
|
"logits/chosen": -0.3934810757637024,
|
|
"logits/rejected": -0.3729557693004608,
|
|
"logps/chosen": -247.36497497558594,
|
|
"logps/ref_chosen": -62.237911224365234,
|
|
"logps/ref_rejected": -90.39506530761719,
|
|
"logps/rejected": -374.30975341796875,
|
|
"loss": 1.1544,
|
|
"margin_dpo/margin_mean": 98.78762817382812,
|
|
"margin_dpo/margin_std": 164.12014770507812,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.3465491923641703,
|
|
"fcm_dpo/beta": 0.0032943575643002987,
|
|
"fcm_dpo/delta": -0.12876766920089722,
|
|
"fcm_dpo/margin": 158.4656982421875,
|
|
"fcm_dpo/q_t": 0.3778604567050934,
|
|
"grad_norm": 42.23731231689453,
|
|
"learning_rate": 4.1459510339613946e-07,
|
|
"logits/chosen": -0.3503900170326233,
|
|
"logits/rejected": -0.34960126876831055,
|
|
"logps/chosen": -188.35073852539062,
|
|
"logps/ref_chosen": -49.34136199951172,
|
|
"logps/ref_rejected": -103.51162719726562,
|
|
"logps/rejected": -400.9866943359375,
|
|
"loss": 0.9814,
|
|
"margin_dpo/margin_mean": 158.4656982421875,
|
|
"margin_dpo/margin_std": 142.0621337890625,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.34801762114537443,
|
|
"fcm_dpo/beta": 0.0032789534889161587,
|
|
"fcm_dpo/delta": 0.005344166420400143,
|
|
"fcm_dpo/margin": 120.41532135009766,
|
|
"fcm_dpo/q_t": 0.41042613983154297,
|
|
"grad_norm": 30.962125778198242,
|
|
"learning_rate": 4.136269950853473e-07,
|
|
"logits/chosen": -0.3941130042076111,
|
|
"logits/rejected": -0.38884979486465454,
|
|
"logps/chosen": -242.329345703125,
|
|
"logps/ref_chosen": -54.168121337890625,
|
|
"logps/ref_rejected": -94.78036499023438,
|
|
"logps/rejected": -403.35693359375,
|
|
"loss": 1.1064,
|
|
"margin_dpo/margin_mean": 120.41531372070312,
|
|
"margin_dpo/margin_std": 176.17005920410156,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.34948604992657856,
|
|
"fcm_dpo/beta": 0.003268222790211439,
|
|
"fcm_dpo/delta": 0.013299603015184402,
|
|
"fcm_dpo/margin": 118.37882995605469,
|
|
"fcm_dpo/q_t": 0.4140799045562744,
|
|
"grad_norm": 29.681880950927734,
|
|
"learning_rate": 4.126545750510605e-07,
|
|
"logits/chosen": -0.3387226164340973,
|
|
"logits/rejected": -0.35407811403274536,
|
|
"logps/chosen": -227.864501953125,
|
|
"logps/ref_chosen": -53.973121643066406,
|
|
"logps/ref_rejected": -89.41795349121094,
|
|
"logps/rejected": -381.68817138671875,
|
|
"loss": 1.1072,
|
|
"margin_dpo/margin_mean": 118.37882995605469,
|
|
"margin_dpo/margin_std": 169.83518981933594,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.3509544787077827,
|
|
"fcm_dpo/beta": 0.00324842007830739,
|
|
"fcm_dpo/delta": -0.031969405710697174,
|
|
"fcm_dpo/margin": 132.2473602294922,
|
|
"fcm_dpo/q_t": 0.40041935443878174,
|
|
"grad_norm": 30.094566345214844,
|
|
"learning_rate": 4.116778689174514e-07,
|
|
"logits/chosen": -0.35974210500717163,
|
|
"logits/rejected": -0.34793728590011597,
|
|
"logps/chosen": -243.16796875,
|
|
"logps/ref_chosen": -58.09782409667969,
|
|
"logps/ref_rejected": -93.59294128417969,
|
|
"logps/rejected": -410.91046142578125,
|
|
"loss": 1.0679,
|
|
"margin_dpo/margin_mean": 132.2473602294922,
|
|
"margin_dpo/margin_std": 157.95797729492188,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.3524229074889868,
|
|
"fcm_dpo/beta": 0.003268931061029434,
|
|
"fcm_dpo/delta": 0.03347586840391159,
|
|
"fcm_dpo/margin": 112.4616928100586,
|
|
"fcm_dpo/q_t": 0.4169153571128845,
|
|
"grad_norm": 38.88202667236328,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": -0.390846848487854,
|
|
"logits/rejected": -0.36975374817848206,
|
|
"logps/chosen": -257.6044006347656,
|
|
"logps/ref_chosen": -60.6144905090332,
|
|
"logps/ref_rejected": -74.1185302734375,
|
|
"logps/rejected": -383.57012939453125,
|
|
"loss": 1.1434,
|
|
"margin_dpo/margin_mean": 112.4616928100586,
|
|
"margin_dpo/margin_std": 189.4227752685547,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.35389133627019087,
|
|
"fcm_dpo/beta": 0.0032093606423586607,
|
|
"fcm_dpo/delta": -0.17607250809669495,
|
|
"fcm_dpo/margin": 176.48245239257812,
|
|
"fcm_dpo/q_t": 0.374181866645813,
|
|
"grad_norm": 26.158462524414062,
|
|
"learning_rate": 4.097117014129903e-07,
|
|
"logits/chosen": -0.4116344749927521,
|
|
"logits/rejected": -0.38839179277420044,
|
|
"logps/chosen": -227.10543823242188,
|
|
"logps/ref_chosen": -66.091064453125,
|
|
"logps/ref_rejected": -88.06088256835938,
|
|
"logps/rejected": -425.5577087402344,
|
|
"loss": 0.9888,
|
|
"margin_dpo/margin_mean": 176.48245239257812,
|
|
"margin_dpo/margin_std": 196.57537841796875,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.355359765051395,
|
|
"fcm_dpo/beta": 0.0031865746714174747,
|
|
"fcm_dpo/delta": 0.0012423545122146606,
|
|
"fcm_dpo/margin": 125.03507995605469,
|
|
"fcm_dpo/q_t": 0.4101165533065796,
|
|
"grad_norm": 29.37732696533203,
|
|
"learning_rate": 4.087222918524807e-07,
|
|
"logits/chosen": -0.33866798877716064,
|
|
"logits/rejected": -0.3151482343673706,
|
|
"logps/chosen": -251.06314086914062,
|
|
"logps/ref_chosen": -67.86392974853516,
|
|
"logps/ref_rejected": -83.36033630371094,
|
|
"logps/rejected": -391.5946350097656,
|
|
"loss": 1.0969,
|
|
"margin_dpo/margin_mean": 125.03507995605469,
|
|
"margin_dpo/margin_std": 171.787353515625,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3568281938325991,
|
|
"fcm_dpo/beta": 0.0031295460648834705,
|
|
"fcm_dpo/delta": -0.08388511836528778,
|
|
"fcm_dpo/margin": 153.2886199951172,
|
|
"fcm_dpo/q_t": 0.39050671458244324,
|
|
"grad_norm": 22.981002807617188,
|
|
"learning_rate": 4.07728699811968e-07,
|
|
"logits/chosen": -0.35215356945991516,
|
|
"logits/rejected": -0.3224591612815857,
|
|
"logps/chosen": -251.0262451171875,
|
|
"logps/ref_chosen": -63.0842399597168,
|
|
"logps/ref_rejected": -76.33563232421875,
|
|
"logps/rejected": -417.56622314453125,
|
|
"loss": 1.0365,
|
|
"margin_dpo/margin_mean": 153.28863525390625,
|
|
"margin_dpo/margin_std": 182.36080932617188,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.35829662261380324,
|
|
"fcm_dpo/beta": 0.0030690422281622887,
|
|
"fcm_dpo/delta": -0.09600830078125,
|
|
"fcm_dpo/margin": 159.98179626464844,
|
|
"fcm_dpo/q_t": 0.38639965653419495,
|
|
"grad_norm": 30.8914737701416,
|
|
"learning_rate": 4.067309514735267e-07,
|
|
"logits/chosen": -0.4122433662414551,
|
|
"logits/rejected": -0.40430814027786255,
|
|
"logps/chosen": -223.86654663085938,
|
|
"logps/ref_chosen": -61.140689849853516,
|
|
"logps/ref_rejected": -94.89193725585938,
|
|
"logps/rejected": -417.5995788574219,
|
|
"loss": 1.0117,
|
|
"margin_dpo/margin_mean": 159.98179626464844,
|
|
"margin_dpo/margin_std": 160.8214111328125,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.35976505139500736,
|
|
"fcm_dpo/beta": 0.0030440008267760277,
|
|
"fcm_dpo/delta": 0.01792435348033905,
|
|
"fcm_dpo/margin": 125.35258483886719,
|
|
"fcm_dpo/q_t": 0.4126400649547577,
|
|
"grad_norm": 25.37367057800293,
|
|
"learning_rate": 4.057290731287531e-07,
|
|
"logits/chosen": -0.3932448625564575,
|
|
"logits/rejected": -0.36441653966903687,
|
|
"logps/chosen": -254.88162231445312,
|
|
"logps/ref_chosen": -67.26228332519531,
|
|
"logps/ref_rejected": -87.64010620117188,
|
|
"logps/rejected": -400.61199951171875,
|
|
"loss": 1.1127,
|
|
"margin_dpo/margin_mean": 125.35258483886719,
|
|
"margin_dpo/margin_std": 174.19549560546875,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.36123348017621143,
|
|
"fcm_dpo/beta": 0.0030658990144729614,
|
|
"fcm_dpo/delta": 0.0024497676640748978,
|
|
"fcm_dpo/margin": 129.66812133789062,
|
|
"fcm_dpo/q_t": 0.4106002449989319,
|
|
"grad_norm": 28.117544174194336,
|
|
"learning_rate": 4.047230911780736e-07,
|
|
"logits/chosen": -0.4509885311126709,
|
|
"logits/rejected": -0.4132448434829712,
|
|
"logps/chosen": -250.50326538085938,
|
|
"logps/ref_chosen": -66.69696807861328,
|
|
"logps/ref_rejected": -84.34634399414062,
|
|
"logps/rejected": -397.8207702636719,
|
|
"loss": 1.0995,
|
|
"margin_dpo/margin_mean": 129.66812133789062,
|
|
"margin_dpo/margin_std": 183.7802734375,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.36270190895741555,
|
|
"fcm_dpo/beta": 0.003005662001669407,
|
|
"fcm_dpo/delta": -0.14235088229179382,
|
|
"fcm_dpo/margin": 177.8526611328125,
|
|
"fcm_dpo/q_t": 0.37811583280563354,
|
|
"grad_norm": 33.17192840576172,
|
|
"learning_rate": 4.0371303213004814e-07,
|
|
"logits/chosen": -0.3711887001991272,
|
|
"logits/rejected": -0.3687683343887329,
|
|
"logps/chosen": -278.5263366699219,
|
|
"logps/ref_chosen": -56.6053466796875,
|
|
"logps/ref_rejected": -106.29326629638672,
|
|
"logps/rejected": -506.06695556640625,
|
|
"loss": 1.0024,
|
|
"margin_dpo/margin_mean": 177.85267639160156,
|
|
"margin_dpo/margin_std": 196.65899658203125,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3641703377386197,
|
|
"fcm_dpo/beta": 0.002947642235085368,
|
|
"fcm_dpo/delta": -0.05931021273136139,
|
|
"fcm_dpo/margin": 154.6879425048828,
|
|
"fcm_dpo/q_t": 0.39200544357299805,
|
|
"grad_norm": 25.346040725708008,
|
|
"learning_rate": 4.0269892260067197e-07,
|
|
"logits/chosen": -0.3714104890823364,
|
|
"logits/rejected": -0.38998764753341675,
|
|
"logps/chosen": -238.24075317382812,
|
|
"logps/ref_chosen": -44.043216705322266,
|
|
"logps/ref_rejected": -91.85687255859375,
|
|
"logps/rejected": -440.7423400878906,
|
|
"loss": 1.0253,
|
|
"margin_dpo/margin_mean": 154.6879425048828,
|
|
"margin_dpo/margin_std": 143.38400268554688,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3656387665198238,
|
|
"fcm_dpo/beta": 0.003015869064256549,
|
|
"fcm_dpo/delta": 0.14630991220474243,
|
|
"fcm_dpo/margin": 85.38651275634766,
|
|
"fcm_dpo/q_t": 0.4420652985572815,
|
|
"grad_norm": 42.6093864440918,
|
|
"learning_rate": 4.0168078931267426e-07,
|
|
"logits/chosen": -0.367745578289032,
|
|
"logits/rejected": -0.33774110674858093,
|
|
"logps/chosen": -308.61773681640625,
|
|
"logps/ref_chosen": -62.442352294921875,
|
|
"logps/ref_rejected": -80.46806335449219,
|
|
"logps/rejected": -412.02996826171875,
|
|
"loss": 1.2243,
|
|
"margin_dpo/margin_mean": 85.38652038574219,
|
|
"margin_dpo/margin_std": 190.7613983154297,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3671071953010279,
|
|
"fcm_dpo/beta": 0.003016442758962512,
|
|
"fcm_dpo/delta": -0.0814606249332428,
|
|
"fcm_dpo/margin": 158.3081817626953,
|
|
"fcm_dpo/q_t": 0.38880687952041626,
|
|
"grad_norm": 40.93179702758789,
|
|
"learning_rate": 4.006586590948141e-07,
|
|
"logits/chosen": -0.39541658759117126,
|
|
"logits/rejected": -0.33937403559684753,
|
|
"logps/chosen": -279.10040283203125,
|
|
"logps/ref_chosen": -65.63668823242188,
|
|
"logps/ref_rejected": -73.87184143066406,
|
|
"logps/rejected": -445.64373779296875,
|
|
"loss": 1.0225,
|
|
"margin_dpo/margin_mean": 158.30816650390625,
|
|
"margin_dpo/margin_std": 162.19927978515625,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.368575624082232,
|
|
"fcm_dpo/beta": 0.003019275376573205,
|
|
"fcm_dpo/delta": 0.08311907947063446,
|
|
"fcm_dpo/margin": 105.85592651367188,
|
|
"fcm_dpo/q_t": 0.4273427128791809,
|
|
"grad_norm": 30.16434669494629,
|
|
"learning_rate": 3.9963255888117325e-07,
|
|
"logits/chosen": -0.38391727209091187,
|
|
"logits/rejected": -0.35589849948883057,
|
|
"logps/chosen": -277.97418212890625,
|
|
"logps/ref_chosen": -57.182716369628906,
|
|
"logps/ref_rejected": -77.66343688964844,
|
|
"logps/rejected": -404.31085205078125,
|
|
"loss": 1.1639,
|
|
"margin_dpo/margin_mean": 105.85592651367188,
|
|
"margin_dpo/margin_std": 178.24061584472656,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.3700440528634361,
|
|
"fcm_dpo/beta": 0.0030162562616169453,
|
|
"fcm_dpo/delta": -0.037618160247802734,
|
|
"fcm_dpo/margin": 144.47677612304688,
|
|
"fcm_dpo/q_t": 0.39547207951545715,
|
|
"grad_norm": 24.476696014404297,
|
|
"learning_rate": 3.9860251571044666e-07,
|
|
"logits/chosen": -0.41118282079696655,
|
|
"logits/rejected": -0.36917877197265625,
|
|
"logps/chosen": -278.718994140625,
|
|
"logps/ref_chosen": -71.68563842773438,
|
|
"logps/ref_rejected": -84.75799560546875,
|
|
"logps/rejected": -436.26806640625,
|
|
"loss": 1.0411,
|
|
"margin_dpo/margin_mean": 144.47677612304688,
|
|
"margin_dpo/margin_std": 140.32473754882812,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.37151248164464024,
|
|
"fcm_dpo/beta": 0.003020418342202902,
|
|
"fcm_dpo/delta": -0.03368496149778366,
|
|
"fcm_dpo/margin": 142.9415283203125,
|
|
"fcm_dpo/q_t": 0.3989100456237793,
|
|
"grad_norm": 25.34270477294922,
|
|
"learning_rate": 3.9756855672522986e-07,
|
|
"logits/chosen": -0.3994063138961792,
|
|
"logits/rejected": -0.3902568817138672,
|
|
"logps/chosen": -246.81979370117188,
|
|
"logps/ref_chosen": -69.1339340209961,
|
|
"logps/ref_rejected": -98.70252990722656,
|
|
"logps/rejected": -419.3299255371094,
|
|
"loss": 1.0651,
|
|
"margin_dpo/margin_mean": 142.94154357910156,
|
|
"margin_dpo/margin_std": 168.36923217773438,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.37298091042584436,
|
|
"fcm_dpo/beta": 0.0030065332539379597,
|
|
"fcm_dpo/delta": 0.028634043410420418,
|
|
"fcm_dpo/margin": 123.87564086914062,
|
|
"fcm_dpo/q_t": 0.4176272749900818,
|
|
"grad_norm": 23.688716888427734,
|
|
"learning_rate": 3.965307091713037e-07,
|
|
"logits/chosen": -0.396982342004776,
|
|
"logits/rejected": -0.38199833035469055,
|
|
"logps/chosen": -232.9241943359375,
|
|
"logps/ref_chosen": -54.154998779296875,
|
|
"logps/ref_rejected": -90.30764770507812,
|
|
"logps/rejected": -392.9524841308594,
|
|
"loss": 1.1322,
|
|
"margin_dpo/margin_mean": 123.87564086914062,
|
|
"margin_dpo/margin_std": 203.29437255859375,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3744493392070485,
|
|
"fcm_dpo/beta": 0.003004954196512699,
|
|
"fcm_dpo/delta": 0.002638857811689377,
|
|
"fcm_dpo/margin": 132.18743896484375,
|
|
"fcm_dpo/q_t": 0.4083111584186554,
|
|
"grad_norm": 21.265838623046875,
|
|
"learning_rate": 3.954890003969163e-07,
|
|
"logits/chosen": -0.37806227803230286,
|
|
"logits/rejected": -0.36832255125045776,
|
|
"logps/chosen": -245.07669067382812,
|
|
"logps/ref_chosen": -57.14167022705078,
|
|
"logps/ref_rejected": -90.2085952758789,
|
|
"logps/rejected": -410.3310546875,
|
|
"loss": 1.1026,
|
|
"margin_dpo/margin_mean": 132.18743896484375,
|
|
"margin_dpo/margin_std": 184.99835205078125,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.37591776798825255,
|
|
"fcm_dpo/beta": 0.003001492004841566,
|
|
"fcm_dpo/delta": -0.032948367297649384,
|
|
"fcm_dpo/margin": 143.77066040039062,
|
|
"fcm_dpo/q_t": 0.40088456869125366,
|
|
"grad_norm": 21.909685134887695,
|
|
"learning_rate": 3.944434578520628e-07,
|
|
"logits/chosen": -0.3176652789115906,
|
|
"logits/rejected": -0.3248659372329712,
|
|
"logps/chosen": -223.44412231445312,
|
|
"logps/ref_chosen": -55.163490295410156,
|
|
"logps/ref_rejected": -92.56291961669922,
|
|
"logps/rejected": -404.61419677734375,
|
|
"loss": 1.07,
|
|
"margin_dpo/margin_mean": 143.77066040039062,
|
|
"margin_dpo/margin_std": 182.72714233398438,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.37738619676945667,
|
|
"fcm_dpo/beta": 0.0029555135406553745,
|
|
"fcm_dpo/delta": -0.04563986137509346,
|
|
"fcm_dpo/margin": 149.67494201660156,
|
|
"fcm_dpo/q_t": 0.3995245099067688,
|
|
"grad_norm": 19.213043212890625,
|
|
"learning_rate": 3.933941090877615e-07,
|
|
"logits/chosen": -0.33717894554138184,
|
|
"logits/rejected": -0.32419469952583313,
|
|
"logps/chosen": -221.68699645996094,
|
|
"logps/ref_chosen": -49.42369842529297,
|
|
"logps/ref_rejected": -79.53791809082031,
|
|
"logps/rejected": -401.47613525390625,
|
|
"loss": 1.0699,
|
|
"margin_dpo/margin_mean": 149.67494201660156,
|
|
"margin_dpo/margin_std": 189.41546630859375,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3788546255506608,
|
|
"fcm_dpo/beta": 0.0029583657160401344,
|
|
"fcm_dpo/delta": -0.03949305787682533,
|
|
"fcm_dpo/margin": 147.97097778320312,
|
|
"fcm_dpo/q_t": 0.3986639380455017,
|
|
"grad_norm": 27.519527435302734,
|
|
"learning_rate": 3.923409817553284e-07,
|
|
"logits/chosen": -0.3358303904533386,
|
|
"logits/rejected": -0.33348649740219116,
|
|
"logps/chosen": -284.0785827636719,
|
|
"logps/ref_chosen": -59.384124755859375,
|
|
"logps/ref_rejected": -95.99010467529297,
|
|
"logps/rejected": -468.655517578125,
|
|
"loss": 1.0828,
|
|
"margin_dpo/margin_mean": 147.97097778320312,
|
|
"margin_dpo/margin_std": 206.81240844726562,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3803230543318649,
|
|
"fcm_dpo/beta": 0.0029501118697226048,
|
|
"fcm_dpo/delta": 0.03456338495016098,
|
|
"fcm_dpo/margin": 124.2512435913086,
|
|
"fcm_dpo/q_t": 0.41663050651550293,
|
|
"grad_norm": 22.547470092773438,
|
|
"learning_rate": 3.9128410360564793e-07,
|
|
"logits/chosen": -0.37859004735946655,
|
|
"logits/rejected": -0.37319231033325195,
|
|
"logps/chosen": -262.14630126953125,
|
|
"logps/ref_chosen": -52.828346252441406,
|
|
"logps/ref_rejected": -89.191650390625,
|
|
"logps/rejected": -422.7608642578125,
|
|
"loss": 1.1244,
|
|
"margin_dpo/margin_mean": 124.25125122070312,
|
|
"margin_dpo/margin_std": 185.36854553222656,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.38179148311306904,
|
|
"fcm_dpo/beta": 0.0029368563555181026,
|
|
"fcm_dpo/delta": -0.09315244853496552,
|
|
"fcm_dpo/margin": 166.3863525390625,
|
|
"fcm_dpo/q_t": 0.3896779716014862,
|
|
"grad_norm": 29.52651596069336,
|
|
"learning_rate": 3.9022350248844246e-07,
|
|
"logits/chosen": -0.37252455949783325,
|
|
"logits/rejected": -0.38852792978286743,
|
|
"logps/chosen": -253.25216674804688,
|
|
"logps/ref_chosen": -47.41767501831055,
|
|
"logps/ref_rejected": -95.08978271484375,
|
|
"logps/rejected": -467.31060791015625,
|
|
"loss": 1.0226,
|
|
"margin_dpo/margin_mean": 166.3863525390625,
|
|
"margin_dpo/margin_std": 186.8936004638672,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3832599118942731,
|
|
"fcm_dpo/beta": 0.0028830531518906355,
|
|
"fcm_dpo/delta": -0.047782331705093384,
|
|
"fcm_dpo/margin": 154.48388671875,
|
|
"fcm_dpo/q_t": 0.39917880296707153,
|
|
"grad_norm": 22.343847274780273,
|
|
"learning_rate": 3.891592063515376e-07,
|
|
"logits/chosen": -0.33259785175323486,
|
|
"logits/rejected": -0.33178287744522095,
|
|
"logps/chosen": -259.1055603027344,
|
|
"logps/ref_chosen": -53.03137969970703,
|
|
"logps/ref_rejected": -88.51494598388672,
|
|
"logps/rejected": -449.072998046875,
|
|
"loss": 1.0681,
|
|
"margin_dpo/margin_mean": 154.48391723632812,
|
|
"margin_dpo/margin_std": 203.65478515625,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.38472834067547723,
|
|
"fcm_dpo/beta": 0.0029039657674729824,
|
|
"fcm_dpo/delta": 0.02600773237645626,
|
|
"fcm_dpo/margin": 129.03204345703125,
|
|
"fcm_dpo/q_t": 0.41260138154029846,
|
|
"grad_norm": 23.152450561523438,
|
|
"learning_rate": 3.880912432401264e-07,
|
|
"logits/chosen": -0.33915072679519653,
|
|
"logits/rejected": -0.3117906153202057,
|
|
"logps/chosen": -296.1290283203125,
|
|
"logps/ref_chosen": -59.620140075683594,
|
|
"logps/ref_rejected": -86.41853332519531,
|
|
"logps/rejected": -451.9594421386719,
|
|
"loss": 1.0985,
|
|
"margin_dpo/margin_mean": 129.03204345703125,
|
|
"margin_dpo/margin_std": 162.029296875,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.38619676945668135,
|
|
"fcm_dpo/beta": 0.0028360923752188683,
|
|
"fcm_dpo/delta": -0.1183972954750061,
|
|
"fcm_dpo/margin": 180.35324096679688,
|
|
"fcm_dpo/q_t": 0.3831443786621094,
|
|
"grad_norm": 25.438756942749023,
|
|
"learning_rate": 3.870196412960302e-07,
|
|
"logits/chosen": -0.36076992750167847,
|
|
"logits/rejected": -0.339572936296463,
|
|
"logps/chosen": -272.611083984375,
|
|
"logps/ref_chosen": -59.42094421386719,
|
|
"logps/ref_rejected": -96.85720825195312,
|
|
"logps/rejected": -490.400634765625,
|
|
"loss": 1.0191,
|
|
"margin_dpo/margin_mean": 180.35324096679688,
|
|
"margin_dpo/margin_std": 207.73788452148438,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.3876651982378855,
|
|
"fcm_dpo/beta": 0.0027964303735643625,
|
|
"fcm_dpo/delta": -0.035919900983572006,
|
|
"fcm_dpo/margin": 154.90243530273438,
|
|
"fcm_dpo/q_t": 0.4014216661453247,
|
|
"grad_norm": 25.1867733001709,
|
|
"learning_rate": 3.8594442875695665e-07,
|
|
"logits/chosen": -0.37755894660949707,
|
|
"logits/rejected": -0.3679601550102234,
|
|
"logps/chosen": -286.45306396484375,
|
|
"logps/ref_chosen": -62.722084045410156,
|
|
"logps/ref_rejected": -93.85620880126953,
|
|
"logps/rejected": -472.4896545410156,
|
|
"loss": 1.0749,
|
|
"margin_dpo/margin_mean": 154.90243530273438,
|
|
"margin_dpo/margin_std": 199.20541381835938,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.3891336270190896,
|
|
"fcm_dpo/beta": 0.002805937547236681,
|
|
"fcm_dpo/delta": -0.010240239091217518,
|
|
"fcm_dpo/margin": 146.05377197265625,
|
|
"fcm_dpo/q_t": 0.40965187549591064,
|
|
"grad_norm": 26.549524307250977,
|
|
"learning_rate": 3.848656339557562e-07,
|
|
"logits/chosen": -0.3260389268398285,
|
|
"logits/rejected": -0.3126361668109894,
|
|
"logps/chosen": -311.82476806640625,
|
|
"logps/ref_chosen": -61.971466064453125,
|
|
"logps/ref_rejected": -88.02059936523438,
|
|
"logps/rejected": -483.92767333984375,
|
|
"loss": 1.1221,
|
|
"margin_dpo/margin_mean": 146.05377197265625,
|
|
"margin_dpo/margin_std": 243.52978515625,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.39060205580029367,
|
|
"fcm_dpo/beta": 0.002838346641510725,
|
|
"fcm_dpo/delta": 0.07484833896160126,
|
|
"fcm_dpo/margin": 115.4014892578125,
|
|
"fcm_dpo/q_t": 0.425703227519989,
|
|
"grad_norm": 57.709693908691406,
|
|
"learning_rate": 3.8378328531967507e-07,
|
|
"logits/chosen": -0.3606586456298828,
|
|
"logits/rejected": -0.3276086449623108,
|
|
"logps/chosen": -325.3542785644531,
|
|
"logps/ref_chosen": -67.09967041015625,
|
|
"logps/ref_rejected": -67.97122192382812,
|
|
"logps/rejected": -441.6273498535156,
|
|
"loss": 1.1584,
|
|
"margin_dpo/margin_mean": 115.4014892578125,
|
|
"margin_dpo/margin_std": 196.9221954345703,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.3920704845814978,
|
|
"fcm_dpo/beta": 0.0028311798814684153,
|
|
"fcm_dpo/delta": -0.015104478225111961,
|
|
"fcm_dpo/margin": 146.29811096191406,
|
|
"fcm_dpo/q_t": 0.4068409502506256,
|
|
"grad_norm": 31.23808479309082,
|
|
"learning_rate": 3.8269741136960646e-07,
|
|
"logits/chosen": -0.3521597981452942,
|
|
"logits/rejected": -0.32346588373184204,
|
|
"logps/chosen": -300.9726257324219,
|
|
"logps/ref_chosen": -68.97075653076172,
|
|
"logps/ref_rejected": -90.16844940185547,
|
|
"logps/rejected": -468.4684143066406,
|
|
"loss": 1.1043,
|
|
"margin_dpo/margin_mean": 146.298095703125,
|
|
"margin_dpo/margin_std": 222.26060485839844,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.3935389133627019,
|
|
"fcm_dpo/beta": 0.002821648493409157,
|
|
"fcm_dpo/delta": -0.019557194784283638,
|
|
"fcm_dpo/margin": 148.31320190429688,
|
|
"fcm_dpo/q_t": 0.40599367022514343,
|
|
"grad_norm": 28.47870445251465,
|
|
"learning_rate": 3.8160804071933894e-07,
|
|
"logits/chosen": -0.31350523233413696,
|
|
"logits/rejected": -0.3184083104133606,
|
|
"logps/chosen": -297.250732421875,
|
|
"logps/ref_chosen": -55.90031051635742,
|
|
"logps/ref_rejected": -101.64763641357422,
|
|
"logps/rejected": -491.311279296875,
|
|
"loss": 1.098,
|
|
"margin_dpo/margin_mean": 148.3131866455078,
|
|
"margin_dpo/margin_std": 220.14576721191406,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.39500734214390604,
|
|
"fcm_dpo/beta": 0.002787231467664242,
|
|
"fcm_dpo/delta": -0.10659514367580414,
|
|
"fcm_dpo/margin": 179.87583923339844,
|
|
"fcm_dpo/q_t": 0.3871752619743347,
|
|
"grad_norm": 26.379419326782227,
|
|
"learning_rate": 3.8051520207480204e-07,
|
|
"logits/chosen": -0.3805098831653595,
|
|
"logits/rejected": -0.3652857542037964,
|
|
"logps/chosen": -320.774658203125,
|
|
"logps/ref_chosen": -70.03955841064453,
|
|
"logps/ref_rejected": -107.34937286376953,
|
|
"logps/rejected": -537.9603271484375,
|
|
"loss": 1.0468,
|
|
"margin_dpo/margin_mean": 179.8758544921875,
|
|
"margin_dpo/margin_std": 237.33578491210938,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.3964757709251101,
|
|
"fcm_dpo/beta": 0.002790778409689665,
|
|
"fcm_dpo/delta": 0.036502670496702194,
|
|
"fcm_dpo/margin": 130.6387939453125,
|
|
"fcm_dpo/q_t": 0.4163072109222412,
|
|
"grad_norm": 28.952787399291992,
|
|
"learning_rate": 3.794189242333106e-07,
|
|
"logits/chosen": -0.3982747793197632,
|
|
"logits/rejected": -0.3937312960624695,
|
|
"logps/chosen": -285.7011413574219,
|
|
"logps/ref_chosen": -69.53347778320312,
|
|
"logps/ref_rejected": -109.92864990234375,
|
|
"logps/rejected": -456.735107421875,
|
|
"loss": 1.1297,
|
|
"margin_dpo/margin_mean": 130.6387939453125,
|
|
"margin_dpo/margin_std": 202.16256713867188,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.39794419970631423,
|
|
"fcm_dpo/beta": 0.0027565429918468,
|
|
"fcm_dpo/delta": -0.058672454208135605,
|
|
"fcm_dpo/margin": 165.3160858154297,
|
|
"fcm_dpo/q_t": 0.39579975605010986,
|
|
"grad_norm": 23.75882911682129,
|
|
"learning_rate": 3.7831923608280514e-07,
|
|
"logits/chosen": -0.3246617317199707,
|
|
"logits/rejected": -0.3058730959892273,
|
|
"logps/chosen": -254.30191040039062,
|
|
"logps/ref_chosen": -56.76456832885742,
|
|
"logps/ref_rejected": -92.51383972167969,
|
|
"logps/rejected": -455.36724853515625,
|
|
"loss": 1.0457,
|
|
"margin_dpo/margin_mean": 165.3160858154297,
|
|
"margin_dpo/margin_std": 191.5840606689453,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.39941262848751835,
|
|
"fcm_dpo/beta": 0.002703585661947727,
|
|
"fcm_dpo/delta": -0.15685208141803741,
|
|
"fcm_dpo/margin": 202.88958740234375,
|
|
"fcm_dpo/q_t": 0.3729744553565979,
|
|
"grad_norm": 36.98969268798828,
|
|
"learning_rate": 3.772161666010912e-07,
|
|
"logits/chosen": -0.2816220223903656,
|
|
"logits/rejected": -0.29340463876724243,
|
|
"logps/chosen": -241.36251831054688,
|
|
"logps/ref_chosen": -49.497154235839844,
|
|
"logps/ref_rejected": -105.54279327392578,
|
|
"logps/rejected": -500.2977600097656,
|
|
"loss": 0.9761,
|
|
"margin_dpo/margin_mean": 202.88958740234375,
|
|
"margin_dpo/margin_std": 193.3899688720703,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4008810572687225,
|
|
"fcm_dpo/beta": 0.0026178741827607155,
|
|
"fcm_dpo/delta": -0.11314442753791809,
|
|
"fcm_dpo/margin": 193.62213134765625,
|
|
"fcm_dpo/q_t": 0.3831535577774048,
|
|
"grad_norm": 28.243759155273438,
|
|
"learning_rate": 3.761097448550755e-07,
|
|
"logits/chosen": -0.29644063115119934,
|
|
"logits/rejected": -0.279215931892395,
|
|
"logps/chosen": -290.4288330078125,
|
|
"logps/ref_chosen": -62.97539520263672,
|
|
"logps/ref_rejected": -92.49858093261719,
|
|
"logps/rejected": -513.5741577148438,
|
|
"loss": 1.0121,
|
|
"margin_dpo/margin_mean": 193.62213134765625,
|
|
"margin_dpo/margin_std": 209.56016540527344,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.4023494860499266,
|
|
"fcm_dpo/beta": 0.002625478897243738,
|
|
"fcm_dpo/delta": 0.020765498280525208,
|
|
"fcm_dpo/margin": 144.68017578125,
|
|
"fcm_dpo/q_t": 0.4123363792896271,
|
|
"grad_norm": 32.35233688354492,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -0.21601241827011108,
|
|
"logits/rejected": -0.19805273413658142,
|
|
"logps/chosen": -323.8441162109375,
|
|
"logps/ref_chosen": -55.66770935058594,
|
|
"logps/ref_rejected": -77.33308410644531,
|
|
"logps/rejected": -490.18963623046875,
|
|
"loss": 1.1032,
|
|
"margin_dpo/margin_mean": 144.68017578125,
|
|
"margin_dpo/margin_std": 193.43875122070312,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.40381791483113066,
|
|
"fcm_dpo/beta": 0.0026261399034410715,
|
|
"fcm_dpo/delta": -0.00641494058072567,
|
|
"fcm_dpo/margin": 154.6136016845703,
|
|
"fcm_dpo/q_t": 0.4062380790710449,
|
|
"grad_norm": 42.94489669799805,
|
|
"learning_rate": 3.738869612786737e-07,
|
|
"logits/chosen": -0.3160795271396637,
|
|
"logits/rejected": -0.32304587960243225,
|
|
"logps/chosen": -257.8700866699219,
|
|
"logps/ref_chosen": -48.594703674316406,
|
|
"logps/ref_rejected": -93.30369567871094,
|
|
"logps/rejected": -457.1926574707031,
|
|
"loss": 1.0864,
|
|
"margin_dpo/margin_mean": 154.61358642578125,
|
|
"margin_dpo/margin_std": 199.8903350830078,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.4052863436123348,
|
|
"fcm_dpo/beta": 0.0026143963914364576,
|
|
"fcm_dpo/delta": 0.0011903084814548492,
|
|
"fcm_dpo/margin": 152.5404815673828,
|
|
"fcm_dpo/q_t": 0.40908658504486084,
|
|
"grad_norm": 27.080881118774414,
|
|
"learning_rate": 3.7277065802070204e-07,
|
|
"logits/chosen": -0.2598373293876648,
|
|
"logits/rejected": -0.23683007061481476,
|
|
"logps/chosen": -282.9354248046875,
|
|
"logps/ref_chosen": -56.57740783691406,
|
|
"logps/ref_rejected": -70.36566925048828,
|
|
"logps/rejected": -449.26416015625,
|
|
"loss": 1.0986,
|
|
"margin_dpo/margin_mean": 152.5404815673828,
|
|
"margin_dpo/margin_std": 212.240966796875,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.4067547723935389,
|
|
"fcm_dpo/beta": 0.0026140885893255472,
|
|
"fcm_dpo/delta": -0.024904295802116394,
|
|
"fcm_dpo/margin": 162.13217163085938,
|
|
"fcm_dpo/q_t": 0.40205782651901245,
|
|
"grad_norm": 36.253150939941406,
|
|
"learning_rate": 3.71651119641714e-07,
|
|
"logits/chosen": -0.26820749044418335,
|
|
"logits/rejected": -0.2528313994407654,
|
|
"logps/chosen": -305.7073669433594,
|
|
"logps/ref_chosen": -56.27156066894531,
|
|
"logps/ref_rejected": -92.88127136230469,
|
|
"logps/rejected": -504.4492492675781,
|
|
"loss": 1.0753,
|
|
"margin_dpo/margin_mean": 162.13217163085938,
|
|
"margin_dpo/margin_std": 206.08795166015625,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.40822320117474303,
|
|
"fcm_dpo/beta": 0.0025698295794427395,
|
|
"fcm_dpo/delta": -0.0788620337843895,
|
|
"fcm_dpo/margin": 184.778076171875,
|
|
"fcm_dpo/q_t": 0.39111045002937317,
|
|
"grad_norm": 27.628742218017578,
|
|
"learning_rate": 3.705283756425872e-07,
|
|
"logits/chosen": -0.2766546607017517,
|
|
"logits/rejected": -0.28136929869651794,
|
|
"logps/chosen": -274.0843811035156,
|
|
"logps/ref_chosen": -52.94194030761719,
|
|
"logps/ref_rejected": -91.25357818603516,
|
|
"logps/rejected": -497.1741027832031,
|
|
"loss": 1.0325,
|
|
"margin_dpo/margin_mean": 184.778076171875,
|
|
"margin_dpo/margin_std": 208.0440673828125,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.40969162995594716,
|
|
"fcm_dpo/beta": 0.002526093740016222,
|
|
"fcm_dpo/delta": -0.06635798513889313,
|
|
"fcm_dpo/margin": 183.02890014648438,
|
|
"fcm_dpo/q_t": 0.3952983319759369,
|
|
"grad_norm": 28.49809455871582,
|
|
"learning_rate": 3.6940245560867e-07,
|
|
"logits/chosen": -0.2253936529159546,
|
|
"logits/rejected": -0.22515779733657837,
|
|
"logps/chosen": -302.7215576171875,
|
|
"logps/ref_chosen": -48.641319274902344,
|
|
"logps/ref_rejected": -87.8514404296875,
|
|
"logps/rejected": -524.9605712890625,
|
|
"loss": 1.0589,
|
|
"margin_dpo/margin_mean": 183.02891540527344,
|
|
"margin_dpo/margin_std": 234.94747924804688,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.4111600587371512,
|
|
"fcm_dpo/beta": 0.002501129638403654,
|
|
"fcm_dpo/delta": -0.07484998553991318,
|
|
"fcm_dpo/margin": 188.41152954101562,
|
|
"fcm_dpo/q_t": 0.38939058780670166,
|
|
"grad_norm": 25.34486961364746,
|
|
"learning_rate": 3.6827338920900253e-07,
|
|
"logits/chosen": -0.27106496691703796,
|
|
"logits/rejected": -0.27247706055641174,
|
|
"logps/chosen": -309.34259033203125,
|
|
"logps/ref_chosen": -58.797122955322266,
|
|
"logps/ref_rejected": -98.61885070800781,
|
|
"logps/rejected": -537.5758056640625,
|
|
"loss": 1.0243,
|
|
"margin_dpo/margin_mean": 188.41152954101562,
|
|
"margin_dpo/margin_std": 193.04116821289062,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.41262848751835535,
|
|
"fcm_dpo/beta": 0.00248980731703341,
|
|
"fcm_dpo/delta": -0.030596543103456497,
|
|
"fcm_dpo/margin": 172.38308715820312,
|
|
"fcm_dpo/q_t": 0.3983362019062042,
|
|
"grad_norm": 27.566728591918945,
|
|
"learning_rate": 3.6714120619553435e-07,
|
|
"logits/chosen": -0.3100356459617615,
|
|
"logits/rejected": -0.28896278142929077,
|
|
"logps/chosen": -283.4266662597656,
|
|
"logps/ref_chosen": -55.488521575927734,
|
|
"logps/ref_rejected": -80.88258361816406,
|
|
"logps/rejected": -481.2038269042969,
|
|
"loss": 1.0558,
|
|
"margin_dpo/margin_mean": 172.38308715820312,
|
|
"margin_dpo/margin_std": 188.12759399414062,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.41409691629955947,
|
|
"fcm_dpo/beta": 0.0025168233551084995,
|
|
"fcm_dpo/delta": 0.07814561575651169,
|
|
"fcm_dpo/margin": 128.67344665527344,
|
|
"fcm_dpo/q_t": 0.42755842208862305,
|
|
"grad_norm": 23.409597396850586,
|
|
"learning_rate": 3.660059364023408e-07,
|
|
"logits/chosen": -0.3624737560749054,
|
|
"logits/rejected": -0.3413255214691162,
|
|
"logps/chosen": -328.95660400390625,
|
|
"logps/ref_chosen": -73.07014465332031,
|
|
"logps/ref_rejected": -95.35098266601562,
|
|
"logps/rejected": -479.910888671875,
|
|
"loss": 1.1522,
|
|
"margin_dpo/margin_mean": 128.6734619140625,
|
|
"margin_dpo/margin_std": 211.4798583984375,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.4155653450807636,
|
|
"fcm_dpo/beta": 0.0024866703897714615,
|
|
"fcm_dpo/delta": -0.10790442675352097,
|
|
"fcm_dpo/margin": 202.11631774902344,
|
|
"fcm_dpo/q_t": 0.3826720118522644,
|
|
"grad_norm": 26.5286808013916,
|
|
"learning_rate": 3.6486760974483685e-07,
|
|
"logits/chosen": -0.317167729139328,
|
|
"logits/rejected": -0.3204939663410187,
|
|
"logps/chosen": -317.53839111328125,
|
|
"logps/ref_chosen": -61.89844512939453,
|
|
"logps/ref_rejected": -96.98655700683594,
|
|
"logps/rejected": -554.7427978515625,
|
|
"loss": 1.0059,
|
|
"margin_dpo/margin_mean": 202.11631774902344,
|
|
"margin_dpo/margin_std": 201.07449340820312,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4170337738619677,
|
|
"fcm_dpo/beta": 0.0024385638535022736,
|
|
"fcm_dpo/delta": -0.06003139913082123,
|
|
"fcm_dpo/margin": 187.47438049316406,
|
|
"fcm_dpo/q_t": 0.394646555185318,
|
|
"grad_norm": 31.048564910888672,
|
|
"learning_rate": 3.6372625621898863e-07,
|
|
"logits/chosen": -0.3580009341239929,
|
|
"logits/rejected": -0.344012975692749,
|
|
"logps/chosen": -303.29498291015625,
|
|
"logps/ref_chosen": -58.4355354309082,
|
|
"logps/ref_rejected": -93.46926879882812,
|
|
"logps/rejected": -525.8031005859375,
|
|
"loss": 1.037,
|
|
"margin_dpo/margin_mean": 187.474365234375,
|
|
"margin_dpo/margin_std": 205.1959228515625,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4185022026431718,
|
|
"fcm_dpo/beta": 0.002448021899908781,
|
|
"fcm_dpo/delta": 0.003494247794151306,
|
|
"fcm_dpo/margin": 161.81854248046875,
|
|
"fcm_dpo/q_t": 0.40617606043815613,
|
|
"grad_norm": 28.366336822509766,
|
|
"learning_rate": 3.625819059005228e-07,
|
|
"logits/chosen": -0.34775876998901367,
|
|
"logits/rejected": -0.3358742296695709,
|
|
"logps/chosen": -342.4769287109375,
|
|
"logps/ref_chosen": -66.23219299316406,
|
|
"logps/ref_rejected": -99.1268310546875,
|
|
"logps/rejected": -537.1901245117188,
|
|
"loss": 1.0806,
|
|
"margin_dpo/margin_mean": 161.8185272216797,
|
|
"margin_dpo/margin_std": 185.91748046875,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4199706314243759,
|
|
"fcm_dpo/beta": 0.0024225222878158092,
|
|
"fcm_dpo/delta": -0.04691235348582268,
|
|
"fcm_dpo/margin": 183.6310577392578,
|
|
"fcm_dpo/q_t": 0.3971766233444214,
|
|
"grad_norm": 32.24759292602539,
|
|
"learning_rate": 3.614345889441346e-07,
|
|
"logits/chosen": -0.3304685950279236,
|
|
"logits/rejected": -0.3126610517501831,
|
|
"logps/chosen": -349.9842224121094,
|
|
"logps/ref_chosen": -72.95100402832031,
|
|
"logps/ref_rejected": -88.58845520019531,
|
|
"logps/rejected": -549.2527465820312,
|
|
"loss": 1.0587,
|
|
"margin_dpo/margin_mean": 183.6310577392578,
|
|
"margin_dpo/margin_std": 223.42913818359375,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.42143906020558003,
|
|
"fcm_dpo/beta": 0.0024321102537214756,
|
|
"fcm_dpo/delta": 0.04561718553304672,
|
|
"fcm_dpo/margin": 146.354736328125,
|
|
"fcm_dpo/q_t": 0.4179733395576477,
|
|
"grad_norm": 27.742694854736328,
|
|
"learning_rate": 3.6028433558269275e-07,
|
|
"logits/chosen": -0.3509722948074341,
|
|
"logits/rejected": -0.3353240489959717,
|
|
"logps/chosen": -324.606201171875,
|
|
"logps/ref_chosen": -61.54115295410156,
|
|
"logps/ref_rejected": -77.69607543945312,
|
|
"logps/rejected": -487.1158447265625,
|
|
"loss": 1.1162,
|
|
"margin_dpo/margin_mean": 146.354736328125,
|
|
"margin_dpo/margin_std": 199.26669311523438,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.42290748898678415,
|
|
"fcm_dpo/beta": 0.002393337432295084,
|
|
"fcm_dpo/delta": -0.06459330767393112,
|
|
"fcm_dpo/margin": 192.16741943359375,
|
|
"fcm_dpo/q_t": 0.3921490013599396,
|
|
"grad_norm": 27.812746047973633,
|
|
"learning_rate": 3.5913117612644327e-07,
|
|
"logits/chosen": -0.3211832344532013,
|
|
"logits/rejected": -0.3044850826263428,
|
|
"logps/chosen": -312.4546813964844,
|
|
"logps/ref_chosen": -56.661224365234375,
|
|
"logps/ref_rejected": -87.33570098876953,
|
|
"logps/rejected": -535.296630859375,
|
|
"loss": 1.0334,
|
|
"margin_dpo/margin_mean": 192.1674346923828,
|
|
"margin_dpo/margin_std": 192.72792053222656,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.4243759177679883,
|
|
"fcm_dpo/beta": 0.0023707286454737186,
|
|
"fcm_dpo/delta": -0.11460113525390625,
|
|
"fcm_dpo/margin": 214.66830444335938,
|
|
"fcm_dpo/q_t": 0.38350093364715576,
|
|
"grad_norm": 27.846128463745117,
|
|
"learning_rate": 3.5797514096221024e-07,
|
|
"logits/chosen": -0.308102011680603,
|
|
"logits/rejected": -0.30951741337776184,
|
|
"logps/chosen": -305.4954833984375,
|
|
"logps/ref_chosen": -45.23039245605469,
|
|
"logps/ref_rejected": -87.64266967773438,
|
|
"logps/rejected": -562.5761108398438,
|
|
"loss": 1.0098,
|
|
"margin_dpo/margin_mean": 214.66830444335938,
|
|
"margin_dpo/margin_std": 229.25790405273438,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.42584434654919234,
|
|
"fcm_dpo/beta": 0.002311383606866002,
|
|
"fcm_dpo/delta": -0.10088926553726196,
|
|
"fcm_dpo/margin": 214.4608917236328,
|
|
"fcm_dpo/q_t": 0.38838887214660645,
|
|
"grad_norm": 24.528976440429688,
|
|
"learning_rate": 3.568162605525952e-07,
|
|
"logits/chosen": -0.28302210569381714,
|
|
"logits/rejected": -0.30255717039108276,
|
|
"logps/chosen": -338.3675231933594,
|
|
"logps/ref_chosen": -55.47149658203125,
|
|
"logps/ref_rejected": -116.70857238769531,
|
|
"logps/rejected": -614.0654296875,
|
|
"loss": 1.0326,
|
|
"margin_dpo/margin_mean": 214.46090698242188,
|
|
"margin_dpo/margin_std": 262.3920593261719,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.42731277533039647,
|
|
"fcm_dpo/beta": 0.0022962733637541533,
|
|
"fcm_dpo/delta": -0.051323793828487396,
|
|
"fcm_dpo/margin": 195.45303344726562,
|
|
"fcm_dpo/q_t": 0.3957386016845703,
|
|
"grad_norm": 24.162464141845703,
|
|
"learning_rate": 3.5565456543517485e-07,
|
|
"logits/chosen": -0.30087271332740784,
|
|
"logits/rejected": -0.2888765335083008,
|
|
"logps/chosen": -307.3430480957031,
|
|
"logps/ref_chosen": -63.26036834716797,
|
|
"logps/ref_rejected": -89.29708862304688,
|
|
"logps/rejected": -528.8328247070312,
|
|
"loss": 1.0536,
|
|
"margin_dpo/margin_mean": 195.45303344726562,
|
|
"margin_dpo/margin_std": 227.96817016601562,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4287812041116006,
|
|
"fcm_dpo/beta": 0.002262428868561983,
|
|
"fcm_dpo/delta": -0.07314444333314896,
|
|
"fcm_dpo/margin": 207.6175537109375,
|
|
"fcm_dpo/q_t": 0.392647922039032,
|
|
"grad_norm": 23.157028198242188,
|
|
"learning_rate": 3.5449008622169583e-07,
|
|
"logits/chosen": -0.3102809488773346,
|
|
"logits/rejected": -0.29813438653945923,
|
|
"logps/chosen": -320.7266540527344,
|
|
"logps/ref_chosen": -53.91852951049805,
|
|
"logps/ref_rejected": -89.96138000488281,
|
|
"logps/rejected": -564.3870849609375,
|
|
"loss": 1.056,
|
|
"margin_dpo/margin_mean": 207.6175537109375,
|
|
"margin_dpo/margin_std": 268.990234375,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4302496328928047,
|
|
"fcm_dpo/beta": 0.002260031644254923,
|
|
"fcm_dpo/delta": 0.028196241706609726,
|
|
"fcm_dpo/margin": 164.92308044433594,
|
|
"fcm_dpo/q_t": 0.41596880555152893,
|
|
"grad_norm": 51.17985534667969,
|
|
"learning_rate": 3.5332285359726846e-07,
|
|
"logits/chosen": -0.3236939013004303,
|
|
"logits/rejected": -0.3180049955844879,
|
|
"logps/chosen": -321.3856201171875,
|
|
"logps/ref_chosen": -60.376033782958984,
|
|
"logps/ref_rejected": -77.85244750976562,
|
|
"logps/rejected": -503.78509521484375,
|
|
"loss": 1.1218,
|
|
"margin_dpo/margin_mean": 164.92306518554688,
|
|
"margin_dpo/margin_std": 251.44886779785156,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.43171806167400884,
|
|
"fcm_dpo/beta": 0.0022536704782396555,
|
|
"fcm_dpo/delta": -0.0008471310138702393,
|
|
"fcm_dpo/margin": 177.8052978515625,
|
|
"fcm_dpo/q_t": 0.408719003200531,
|
|
"grad_norm": 20.008682250976562,
|
|
"learning_rate": 3.5215289831955786e-07,
|
|
"logits/chosen": -0.3001745939254761,
|
|
"logits/rejected": -0.3048727512359619,
|
|
"logps/chosen": -286.2076721191406,
|
|
"logps/ref_chosen": -48.0875358581543,
|
|
"logps/ref_rejected": -81.89698791503906,
|
|
"logps/rejected": -497.82244873046875,
|
|
"loss": 1.0938,
|
|
"margin_dpo/margin_mean": 177.8052978515625,
|
|
"margin_dpo/margin_std": 241.23526000976562,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4331864904552129,
|
|
"fcm_dpo/beta": 0.002245218027383089,
|
|
"fcm_dpo/delta": -0.04526704549789429,
|
|
"fcm_dpo/margin": 197.43405151367188,
|
|
"fcm_dpo/q_t": 0.39877989888191223,
|
|
"grad_norm": 25.436016082763672,
|
|
"learning_rate": 3.509802512179737e-07,
|
|
"logits/chosen": -0.3277565836906433,
|
|
"logits/rejected": -0.3330543041229248,
|
|
"logps/chosen": -333.48773193359375,
|
|
"logps/ref_chosen": -49.92467498779297,
|
|
"logps/ref_rejected": -87.45632934570312,
|
|
"logps/rejected": -568.4534301757812,
|
|
"loss": 1.0659,
|
|
"margin_dpo/margin_mean": 197.43405151367188,
|
|
"margin_dpo/margin_std": 250.5416717529297,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.434654919236417,
|
|
"fcm_dpo/beta": 0.002252609934657812,
|
|
"fcm_dpo/delta": 0.08743564784526825,
|
|
"fcm_dpo/margin": 139.83860778808594,
|
|
"fcm_dpo/q_t": 0.42798036336898804,
|
|
"grad_norm": 43.92038345336914,
|
|
"learning_rate": 3.498049431928577e-07,
|
|
"logits/chosen": -0.3475581109523773,
|
|
"logits/rejected": -0.33524176478385925,
|
|
"logps/chosen": -408.71044921875,
|
|
"logps/ref_chosen": -65.49124145507812,
|
|
"logps/ref_rejected": -93.08908081054688,
|
|
"logps/rejected": -576.1469116210938,
|
|
"loss": 1.1808,
|
|
"margin_dpo/margin_mean": 139.83860778808594,
|
|
"margin_dpo/margin_std": 259.12890625,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.43612334801762115,
|
|
"fcm_dpo/beta": 0.0022864262573421,
|
|
"fcm_dpo/delta": 0.03677614405751228,
|
|
"fcm_dpo/margin": 159.44927978515625,
|
|
"fcm_dpo/q_t": 0.4151715040206909,
|
|
"grad_norm": 40.28225326538086,
|
|
"learning_rate": 3.486270052146694e-07,
|
|
"logits/chosen": -0.3144751489162445,
|
|
"logits/rejected": -0.322353720664978,
|
|
"logps/chosen": -362.0162658691406,
|
|
"logps/ref_chosen": -56.476951599121094,
|
|
"logps/ref_rejected": -95.1385498046875,
|
|
"logps/rejected": -560.1271362304688,
|
|
"loss": 1.1061,
|
|
"margin_dpo/margin_mean": 159.44927978515625,
|
|
"margin_dpo/margin_std": 203.22805786132812,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.43759177679882527,
|
|
"fcm_dpo/beta": 0.0022769877687096596,
|
|
"fcm_dpo/delta": -0.07902979105710983,
|
|
"fcm_dpo/margin": 208.72238159179688,
|
|
"fcm_dpo/q_t": 0.39558666944503784,
|
|
"grad_norm": 26.517858505249023,
|
|
"learning_rate": 3.474464683231698e-07,
|
|
"logits/chosen": -0.3526347875595093,
|
|
"logits/rejected": -0.36952221393585205,
|
|
"logps/chosen": -382.0199890136719,
|
|
"logps/ref_chosen": -67.32516479492188,
|
|
"logps/ref_rejected": -116.66217041015625,
|
|
"logps/rejected": -640.079345703125,
|
|
"loss": 1.0697,
|
|
"margin_dpo/margin_mean": 208.72238159179688,
|
|
"margin_dpo/margin_std": 299.59674072265625,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4390602055800294,
|
|
"fcm_dpo/beta": 0.0022661760449409485,
|
|
"fcm_dpo/delta": -0.010751504451036453,
|
|
"fcm_dpo/margin": 180.86856079101562,
|
|
"fcm_dpo/q_t": 0.4070656895637512,
|
|
"grad_norm": 38.609622955322266,
|
|
"learning_rate": 3.462633636266041e-07,
|
|
"logits/chosen": -0.35478705167770386,
|
|
"logits/rejected": -0.36300161480903625,
|
|
"logps/chosen": -315.2637939453125,
|
|
"logps/ref_chosen": -48.96209716796875,
|
|
"logps/ref_rejected": -84.32823944091797,
|
|
"logps/rejected": -531.4984741210938,
|
|
"loss": 1.0927,
|
|
"margin_dpo/margin_mean": 180.86856079101562,
|
|
"margin_dpo/margin_std": 247.35357666015625,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.44052863436123346,
|
|
"fcm_dpo/beta": 0.002229666104540229,
|
|
"fcm_dpo/delta": -0.09078172594308853,
|
|
"fcm_dpo/margin": 218.17410278320312,
|
|
"fcm_dpo/q_t": 0.38910606503486633,
|
|
"grad_norm": 40.36360549926758,
|
|
"learning_rate": 3.4507772230088147e-07,
|
|
"logits/chosen": -0.3490832448005676,
|
|
"logits/rejected": -0.3574965000152588,
|
|
"logps/chosen": -383.52117919921875,
|
|
"logps/ref_chosen": -59.07371139526367,
|
|
"logps/ref_rejected": -95.9664535522461,
|
|
"logps/rejected": -638.5880126953125,
|
|
"loss": 1.049,
|
|
"margin_dpo/margin_mean": 218.17413330078125,
|
|
"margin_dpo/margin_std": 278.7861633300781,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.4419970631424376,
|
|
"fcm_dpo/beta": 0.0021854317747056484,
|
|
"fcm_dpo/delta": -0.06135018169879913,
|
|
"fcm_dpo/margin": 209.61598205566406,
|
|
"fcm_dpo/q_t": 0.3961915373802185,
|
|
"grad_norm": 23.53122329711914,
|
|
"learning_rate": 3.4388957558875316e-07,
|
|
"logits/chosen": -0.3479039669036865,
|
|
"logits/rejected": -0.3508484363555908,
|
|
"logps/chosen": -332.4249267578125,
|
|
"logps/ref_chosen": -57.249366760253906,
|
|
"logps/ref_rejected": -92.35354614257812,
|
|
"logps/rejected": -577.1451416015625,
|
|
"loss": 1.0508,
|
|
"margin_dpo/margin_mean": 209.615966796875,
|
|
"margin_dpo/margin_std": 255.20401000976562,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4434654919236417,
|
|
"fcm_dpo/beta": 0.0021836140658706427,
|
|
"fcm_dpo/delta": 0.016016894951462746,
|
|
"fcm_dpo/margin": 176.0408172607422,
|
|
"fcm_dpo/q_t": 0.4110547602176666,
|
|
"grad_norm": 26.080291748046875,
|
|
"learning_rate": 3.426989547989902e-07,
|
|
"logits/chosen": -0.3281886875629425,
|
|
"logits/rejected": -0.3344939947128296,
|
|
"logps/chosen": -289.31488037109375,
|
|
"logps/ref_chosen": -51.197994232177734,
|
|
"logps/ref_rejected": -97.22636413574219,
|
|
"logps/rejected": -511.38409423828125,
|
|
"loss": 1.0933,
|
|
"margin_dpo/margin_mean": 176.04083251953125,
|
|
"margin_dpo/margin_std": 219.8755645751953,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.44493392070484583,
|
|
"fcm_dpo/beta": 0.002201726660132408,
|
|
"fcm_dpo/delta": 0.041237279772758484,
|
|
"fcm_dpo/margin": 163.615966796875,
|
|
"fcm_dpo/q_t": 0.4173469543457031,
|
|
"grad_norm": 32.51033020019531,
|
|
"learning_rate": 3.4150589130555773e-07,
|
|
"logits/chosen": -0.3730643093585968,
|
|
"logits/rejected": -0.3612968325614929,
|
|
"logps/chosen": -314.7459716796875,
|
|
"logps/ref_chosen": -66.71394348144531,
|
|
"logps/ref_rejected": -86.94542694091797,
|
|
"logps/rejected": -498.59344482421875,
|
|
"loss": 1.1264,
|
|
"margin_dpo/margin_mean": 163.615966796875,
|
|
"margin_dpo/margin_std": 242.33187866210938,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.44640234948604995,
|
|
"fcm_dpo/beta": 0.0022171237505972385,
|
|
"fcm_dpo/delta": -0.006592735648155212,
|
|
"fcm_dpo/margin": 183.16299438476562,
|
|
"fcm_dpo/q_t": 0.4029558300971985,
|
|
"grad_norm": 34.26176071166992,
|
|
"learning_rate": 3.403104165467883e-07,
|
|
"logits/chosen": -0.40618133544921875,
|
|
"logits/rejected": -0.398425817489624,
|
|
"logps/chosen": -288.716796875,
|
|
"logps/ref_chosen": -71.95069885253906,
|
|
"logps/ref_rejected": -90.47203063964844,
|
|
"logps/rejected": -490.401123046875,
|
|
"loss": 1.0539,
|
|
"margin_dpo/margin_mean": 183.16299438476562,
|
|
"margin_dpo/margin_std": 163.05859375,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.447870778267254,
|
|
"fcm_dpo/beta": 0.0022104752715677023,
|
|
"fcm_dpo/delta": 0.043227050453424454,
|
|
"fcm_dpo/margin": 161.9324951171875,
|
|
"fcm_dpo/q_t": 0.4192023277282715,
|
|
"grad_norm": 26.293283462524414,
|
|
"learning_rate": 3.391125620245535e-07,
|
|
"logits/chosen": -0.3858333230018616,
|
|
"logits/rejected": -0.37053465843200684,
|
|
"logps/chosen": -311.5250244140625,
|
|
"logps/ref_chosen": -66.79523468017578,
|
|
"logps/ref_rejected": -92.75459289550781,
|
|
"logps/rejected": -499.4169006347656,
|
|
"loss": 1.1289,
|
|
"margin_dpo/margin_mean": 161.93251037597656,
|
|
"margin_dpo/margin_std": 241.1954345703125,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.44933920704845814,
|
|
"fcm_dpo/beta": 0.002244081348180771,
|
|
"fcm_dpo/delta": 0.04034552350640297,
|
|
"fcm_dpo/margin": 160.8773193359375,
|
|
"fcm_dpo/q_t": 0.416267991065979,
|
|
"grad_norm": 44.03425216674805,
|
|
"learning_rate": 3.3791235930343417e-07,
|
|
"logits/chosen": -0.3744924068450928,
|
|
"logits/rejected": -0.35346078872680664,
|
|
"logps/chosen": -308.999755859375,
|
|
"logps/ref_chosen": -69.68389892578125,
|
|
"logps/ref_rejected": -85.15919494628906,
|
|
"logps/rejected": -485.35235595703125,
|
|
"loss": 1.1064,
|
|
"margin_dpo/margin_mean": 160.87730407714844,
|
|
"margin_dpo/margin_std": 204.9425048828125,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.45080763582966227,
|
|
"fcm_dpo/beta": 0.0022494769655168056,
|
|
"fcm_dpo/delta": 0.0235704705119133,
|
|
"fcm_dpo/margin": 167.72592163085938,
|
|
"fcm_dpo/q_t": 0.4124801456928253,
|
|
"grad_norm": 28.95937728881836,
|
|
"learning_rate": 3.367098400098881e-07,
|
|
"logits/chosen": -0.3728075623512268,
|
|
"logits/rejected": -0.3534259498119354,
|
|
"logps/chosen": -297.20965576171875,
|
|
"logps/ref_chosen": -70.16542053222656,
|
|
"logps/ref_rejected": -86.97230529785156,
|
|
"logps/rejected": -481.74249267578125,
|
|
"loss": 1.1012,
|
|
"margin_dpo/margin_mean": 167.72592163085938,
|
|
"margin_dpo/margin_std": 218.9744415283203,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4522760646108664,
|
|
"fcm_dpo/beta": 0.002247368451207876,
|
|
"fcm_dpo/delta": -0.029369540512561798,
|
|
"fcm_dpo/margin": 190.49864196777344,
|
|
"fcm_dpo/q_t": 0.39926615357398987,
|
|
"grad_norm": 34.80091857910156,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": -0.347628116607666,
|
|
"logits/rejected": -0.3359089493751526,
|
|
"logps/chosen": -292.7979736328125,
|
|
"logps/ref_chosen": -55.2449951171875,
|
|
"logps/ref_rejected": -79.37226104736328,
|
|
"logps/rejected": -507.42388916015625,
|
|
"loss": 1.0523,
|
|
"margin_dpo/margin_mean": 190.49862670898438,
|
|
"margin_dpo/margin_std": 201.00457763671875,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.45374449339207046,
|
|
"fcm_dpo/beta": 0.002246787305921316,
|
|
"fcm_dpo/delta": -0.021639183163642883,
|
|
"fcm_dpo/margin": 187.0961151123047,
|
|
"fcm_dpo/q_t": 0.40152931213378906,
|
|
"grad_norm": 34.3463020324707,
|
|
"learning_rate": 3.3429797851573183e-07,
|
|
"logits/chosen": -0.336150586605072,
|
|
"logits/rejected": -0.3299615979194641,
|
|
"logps/chosen": -293.8416748046875,
|
|
"logps/ref_chosen": -48.959083557128906,
|
|
"logps/ref_rejected": -82.34072875976562,
|
|
"logps/rejected": -514.3194580078125,
|
|
"loss": 1.0797,
|
|
"margin_dpo/margin_mean": 187.0961151123047,
|
|
"margin_dpo/margin_std": 238.40676879882812,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.4552129221732746,
|
|
"fcm_dpo/beta": 0.002238691318780184,
|
|
"fcm_dpo/delta": -0.007201097905635834,
|
|
"fcm_dpo/margin": 181.66790771484375,
|
|
"fcm_dpo/q_t": 0.404284805059433,
|
|
"grad_norm": 27.16603660583496,
|
|
"learning_rate": 3.3308869986991487e-07,
|
|
"logits/chosen": -0.38059085607528687,
|
|
"logits/rejected": -0.37059611082077026,
|
|
"logps/chosen": -346.43719482421875,
|
|
"logps/ref_chosen": -62.74177932739258,
|
|
"logps/ref_rejected": -79.9302978515625,
|
|
"logps/rejected": -545.2936401367188,
|
|
"loss": 1.0678,
|
|
"margin_dpo/margin_mean": 181.66787719726562,
|
|
"margin_dpo/margin_std": 195.15277099609375,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.4566813509544787,
|
|
"fcm_dpo/beta": 0.0022144997492432594,
|
|
"fcm_dpo/delta": -0.047252584248781204,
|
|
"fcm_dpo/margin": 201.01556396484375,
|
|
"fcm_dpo/q_t": 0.3994860351085663,
|
|
"grad_norm": 31.022571563720703,
|
|
"learning_rate": 3.3187723175958346e-07,
|
|
"logits/chosen": -0.32762807607650757,
|
|
"logits/rejected": -0.3017122149467468,
|
|
"logps/chosen": -353.03253173828125,
|
|
"logps/ref_chosen": -53.02798080444336,
|
|
"logps/ref_rejected": -77.43820190429688,
|
|
"logps/rejected": -578.4583129882812,
|
|
"loss": 1.0672,
|
|
"margin_dpo/margin_mean": 201.0155487060547,
|
|
"margin_dpo/margin_std": 262.7830810546875,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.4581497797356828,
|
|
"fcm_dpo/beta": 0.0022153835743665695,
|
|
"fcm_dpo/delta": -0.009008888155221939,
|
|
"fcm_dpo/margin": 184.3203125,
|
|
"fcm_dpo/q_t": 0.4087637960910797,
|
|
"grad_norm": 25.93047332763672,
|
|
"learning_rate": 3.306636061080487e-07,
|
|
"logits/chosen": -0.2974693775177002,
|
|
"logits/rejected": -0.28706854581832886,
|
|
"logps/chosen": -339.06146240234375,
|
|
"logps/ref_chosen": -49.39221954345703,
|
|
"logps/ref_rejected": -75.79280853271484,
|
|
"logps/rejected": -549.7823486328125,
|
|
"loss": 1.0958,
|
|
"margin_dpo/margin_mean": 184.32032775878906,
|
|
"margin_dpo/margin_std": 263.0006103515625,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.45961820851688695,
|
|
"fcm_dpo/beta": 0.002215869491919875,
|
|
"fcm_dpo/delta": -0.016454219818115234,
|
|
"fcm_dpo/margin": 187.22032165527344,
|
|
"fcm_dpo/q_t": 0.40516579151153564,
|
|
"grad_norm": 31.79006004333496,
|
|
"learning_rate": 3.2944785489547537e-07,
|
|
"logits/chosen": -0.3782232999801636,
|
|
"logits/rejected": -0.376730352640152,
|
|
"logps/chosen": -320.020263671875,
|
|
"logps/ref_chosen": -50.152740478515625,
|
|
"logps/ref_rejected": -86.40620422363281,
|
|
"logps/rejected": -543.4940185546875,
|
|
"loss": 1.0904,
|
|
"margin_dpo/margin_mean": 187.22030639648438,
|
|
"margin_dpo/margin_std": 251.69192504882812,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.461086637298091,
|
|
"fcm_dpo/beta": 0.002191446255892515,
|
|
"fcm_dpo/delta": 0.009936392307281494,
|
|
"fcm_dpo/margin": 178.01272583007812,
|
|
"fcm_dpo/q_t": 0.41321849822998047,
|
|
"grad_norm": 22.70627784729004,
|
|
"learning_rate": 3.2823001015803857e-07,
|
|
"logits/chosen": -0.4097542464733124,
|
|
"logits/rejected": -0.4117008149623871,
|
|
"logps/chosen": -346.734375,
|
|
"logps/ref_chosen": -57.237579345703125,
|
|
"logps/ref_rejected": -97.5965347290039,
|
|
"logps/rejected": -565.1060791015625,
|
|
"loss": 1.1218,
|
|
"margin_dpo/margin_mean": 178.0127410888672,
|
|
"margin_dpo/margin_std": 280.118896484375,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.46255506607929514,
|
|
"fcm_dpo/beta": 0.0022113011218607426,
|
|
"fcm_dpo/delta": 0.0364176481962204,
|
|
"fcm_dpo/margin": 165.0262451171875,
|
|
"fcm_dpo/q_t": 0.4158650040626526,
|
|
"grad_norm": 23.05976104736328,
|
|
"learning_rate": 3.270101039870797e-07,
|
|
"logits/chosen": -0.3035910427570343,
|
|
"logits/rejected": -0.3061618208885193,
|
|
"logps/chosen": -304.7342834472656,
|
|
"logps/ref_chosen": -49.06958770751953,
|
|
"logps/ref_rejected": -85.68087768554688,
|
|
"logps/rejected": -506.371826171875,
|
|
"loss": 1.1088,
|
|
"margin_dpo/margin_mean": 165.0262451171875,
|
|
"margin_dpo/margin_std": 218.05349731445312,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.46402349486049926,
|
|
"fcm_dpo/beta": 0.0021732416935265064,
|
|
"fcm_dpo/delta": -0.10777918994426727,
|
|
"fcm_dpo/margin": 230.76441955566406,
|
|
"fcm_dpo/q_t": 0.3855735957622528,
|
|
"grad_norm": 25.089046478271484,
|
|
"learning_rate": 3.2578816852826086e-07,
|
|
"logits/chosen": -0.3702074885368347,
|
|
"logits/rejected": -0.3739718794822693,
|
|
"logps/chosen": -308.93255615234375,
|
|
"logps/ref_chosen": -54.26074981689453,
|
|
"logps/ref_rejected": -101.2814712524414,
|
|
"logps/rejected": -586.7176513671875,
|
|
"loss": 1.0133,
|
|
"margin_dpo/margin_mean": 230.764404296875,
|
|
"margin_dpo/margin_std": 248.62588500976562,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.4654919236417034,
|
|
"fcm_dpo/beta": 0.002131909830495715,
|
|
"fcm_dpo/delta": -0.14962507784366608,
|
|
"fcm_dpo/margin": 254.1263427734375,
|
|
"fcm_dpo/q_t": 0.3746058940887451,
|
|
"grad_norm": 29.67884635925293,
|
|
"learning_rate": 3.2456423598071783e-07,
|
|
"logits/chosen": -0.3865886330604553,
|
|
"logits/rejected": -0.37612611055374146,
|
|
"logps/chosen": -318.9429626464844,
|
|
"logps/ref_chosen": -56.094207763671875,
|
|
"logps/ref_rejected": -100.69905090332031,
|
|
"logps/rejected": -617.6741943359375,
|
|
"loss": 0.9798,
|
|
"margin_dpo/margin_mean": 254.1263427734375,
|
|
"margin_dpo/margin_std": 239.26747131347656,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.4669603524229075,
|
|
"fcm_dpo/beta": 0.0021128756925463676,
|
|
"fcm_dpo/delta": -0.0003537740558385849,
|
|
"fcm_dpo/margin": 189.40443420410156,
|
|
"fcm_dpo/q_t": 0.4078769087791443,
|
|
"grad_norm": 26.946083068847656,
|
|
"learning_rate": 3.233383385962115e-07,
|
|
"logits/chosen": -0.43728041648864746,
|
|
"logits/rejected": -0.4106597304344177,
|
|
"logps/chosen": -352.3509521484375,
|
|
"logps/ref_chosen": -64.64569854736328,
|
|
"logps/ref_rejected": -82.76425170898438,
|
|
"logps/rejected": -559.8739624023438,
|
|
"loss": 1.0848,
|
|
"margin_dpo/margin_mean": 189.40443420410156,
|
|
"margin_dpo/margin_std": 238.53329467773438,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.4684287812041116,
|
|
"fcm_dpo/beta": 0.002084306674078107,
|
|
"fcm_dpo/delta": -0.07317540049552917,
|
|
"fcm_dpo/margin": 225.37155151367188,
|
|
"fcm_dpo/q_t": 0.3912753760814667,
|
|
"grad_norm": 24.27452278137207,
|
|
"learning_rate": 3.2211050867827805e-07,
|
|
"logits/chosen": -0.37028658390045166,
|
|
"logits/rejected": -0.3828258514404297,
|
|
"logps/chosen": -309.1868591308594,
|
|
"logps/ref_chosen": -49.383758544921875,
|
|
"logps/ref_rejected": -113.90650939941406,
|
|
"logps/rejected": -599.0811767578125,
|
|
"loss": 1.0362,
|
|
"margin_dpo/margin_mean": 225.37155151367188,
|
|
"margin_dpo/margin_std": 256.1711120605469,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4698972099853157,
|
|
"fcm_dpo/beta": 0.002035951940342784,
|
|
"fcm_dpo/delta": -0.11059105396270752,
|
|
"fcm_dpo/margin": 247.78436279296875,
|
|
"fcm_dpo/q_t": 0.3837531805038452,
|
|
"grad_norm": 31.604127883911133,
|
|
"learning_rate": 3.208807785813777e-07,
|
|
"logits/chosen": -0.3997849225997925,
|
|
"logits/rejected": -0.4061777591705322,
|
|
"logps/chosen": -327.56396484375,
|
|
"logps/ref_chosen": -59.50489044189453,
|
|
"logps/ref_rejected": -97.66717529296875,
|
|
"logps/rejected": -613.5106201171875,
|
|
"loss": 1.0089,
|
|
"margin_dpo/margin_mean": 247.7843780517578,
|
|
"margin_dpo/margin_std": 260.9141845703125,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4713656387665198,
|
|
"fcm_dpo/beta": 0.002011922188103199,
|
|
"fcm_dpo/delta": -0.02466902881860733,
|
|
"fcm_dpo/margin": 210.1709442138672,
|
|
"fcm_dpo/q_t": 0.4029350280761719,
|
|
"grad_norm": 25.207433700561523,
|
|
"learning_rate": 3.1964918071004217e-07,
|
|
"logits/chosen": -0.3274344205856323,
|
|
"logits/rejected": -0.3175322413444519,
|
|
"logps/chosen": -384.12017822265625,
|
|
"logps/ref_chosen": -61.548683166503906,
|
|
"logps/ref_rejected": -91.64103698730469,
|
|
"logps/rejected": -624.3834838867188,
|
|
"loss": 1.0747,
|
|
"margin_dpo/margin_mean": 210.17095947265625,
|
|
"margin_dpo/margin_std": 260.92510986328125,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.47283406754772395,
|
|
"fcm_dpo/beta": 0.0019989702850580215,
|
|
"fcm_dpo/delta": -0.05126545578241348,
|
|
"fcm_dpo/margin": 224.41220092773438,
|
|
"fcm_dpo/q_t": 0.3951689302921295,
|
|
"grad_norm": 28.6334285736084,
|
|
"learning_rate": 3.184157475180207e-07,
|
|
"logits/chosen": -0.33229005336761475,
|
|
"logits/rejected": -0.3307497799396515,
|
|
"logps/chosen": -346.727294921875,
|
|
"logps/ref_chosen": -57.29003143310547,
|
|
"logps/ref_rejected": -95.74992370605469,
|
|
"logps/rejected": -609.599365234375,
|
|
"loss": 1.0416,
|
|
"margin_dpo/margin_mean": 224.41220092773438,
|
|
"margin_dpo/margin_std": 238.20156860351562,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.47430249632892807,
|
|
"fcm_dpo/beta": 0.002003198955208063,
|
|
"fcm_dpo/delta": 0.020026560872793198,
|
|
"fcm_dpo/margin": 190.05551147460938,
|
|
"fcm_dpo/q_t": 0.41150009632110596,
|
|
"grad_norm": 39.6695671081543,
|
|
"learning_rate": 3.171805115074251e-07,
|
|
"logits/chosen": -0.3865179419517517,
|
|
"logits/rejected": -0.3845609426498413,
|
|
"logps/chosen": -364.77532958984375,
|
|
"logps/ref_chosen": -51.23395919799805,
|
|
"logps/ref_rejected": -75.06192016601562,
|
|
"logps/rejected": -578.6588134765625,
|
|
"loss": 1.1034,
|
|
"margin_dpo/margin_mean": 190.05551147460938,
|
|
"margin_dpo/margin_std": 250.55490112304688,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.47577092511013214,
|
|
"fcm_dpo/beta": 0.0020291549153625965,
|
|
"fcm_dpo/delta": 0.020745858550071716,
|
|
"fcm_dpo/margin": 186.71697998046875,
|
|
"fcm_dpo/q_t": 0.41493675112724304,
|
|
"grad_norm": 45.34550857543945,
|
|
"learning_rate": 3.1594350522787295e-07,
|
|
"logits/chosen": -0.3825080394744873,
|
|
"logits/rejected": -0.36702072620391846,
|
|
"logps/chosen": -429.89703369140625,
|
|
"logps/ref_chosen": -65.13516998291016,
|
|
"logps/ref_rejected": -86.47750854492188,
|
|
"logps/rejected": -637.9563598632812,
|
|
"loss": 1.1259,
|
|
"margin_dpo/margin_mean": 186.71697998046875,
|
|
"margin_dpo/margin_std": 281.0679626464844,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.47723935389133626,
|
|
"fcm_dpo/beta": 0.0020416593179106712,
|
|
"fcm_dpo/delta": 0.07895328104496002,
|
|
"fcm_dpo/margin": 158.47682189941406,
|
|
"fcm_dpo/q_t": 0.4251885414123535,
|
|
"grad_norm": 28.742250442504883,
|
|
"learning_rate": 3.147047612756302e-07,
|
|
"logits/chosen": -0.4396970570087433,
|
|
"logits/rejected": -0.4218035936355591,
|
|
"logps/chosen": -342.92401123046875,
|
|
"logps/ref_chosen": -56.215599060058594,
|
|
"logps/ref_rejected": -70.08592987060547,
|
|
"logps/rejected": -515.2711181640625,
|
|
"loss": 1.1448,
|
|
"margin_dpo/margin_mean": 158.47682189941406,
|
|
"margin_dpo/margin_std": 230.89166259765625,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4787077826725404,
|
|
"fcm_dpo/beta": 0.0020674504339694977,
|
|
"fcm_dpo/delta": 0.07334257662296295,
|
|
"fcm_dpo/margin": 159.19859313964844,
|
|
"fcm_dpo/q_t": 0.42226988077163696,
|
|
"grad_norm": 46.8277473449707,
|
|
"learning_rate": 3.134643122927519e-07,
|
|
"logits/chosen": -0.446100115776062,
|
|
"logits/rejected": -0.4265139698982239,
|
|
"logps/chosen": -398.791748046875,
|
|
"logps/ref_chosen": -72.72496032714844,
|
|
"logps/ref_rejected": -79.8467788696289,
|
|
"logps/rejected": -565.1121826171875,
|
|
"loss": 1.1294,
|
|
"margin_dpo/margin_mean": 159.19859313964844,
|
|
"margin_dpo/margin_std": 203.34844970703125,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4801762114537445,
|
|
"fcm_dpo/beta": 0.002041102387011051,
|
|
"fcm_dpo/delta": -0.1034296378493309,
|
|
"fcm_dpo/margin": 243.88858032226562,
|
|
"fcm_dpo/q_t": 0.38400399684906006,
|
|
"grad_norm": 32.756866455078125,
|
|
"learning_rate": 3.1222219096622264e-07,
|
|
"logits/chosen": -0.40883296728134155,
|
|
"logits/rejected": -0.39531680941581726,
|
|
"logps/chosen": -362.8526611328125,
|
|
"logps/ref_chosen": -69.13441467285156,
|
|
"logps/ref_rejected": -111.93377685546875,
|
|
"logps/rejected": -649.5405883789062,
|
|
"loss": 1.0146,
|
|
"margin_dpo/margin_mean": 243.88858032226562,
|
|
"margin_dpo/margin_std": 258.3064880371094,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.48164464023494863,
|
|
"fcm_dpo/beta": 0.0020253488328307867,
|
|
"fcm_dpo/delta": -0.04178054630756378,
|
|
"fcm_dpo/margin": 217.22732543945312,
|
|
"fcm_dpo/q_t": 0.3996826410293579,
|
|
"grad_norm": 28.734113693237305,
|
|
"learning_rate": 3.1097843002709427e-07,
|
|
"logits/chosen": -0.4222732186317444,
|
|
"logits/rejected": -0.42595934867858887,
|
|
"logps/chosen": -361.3597412109375,
|
|
"logps/ref_chosen": -59.68719482421875,
|
|
"logps/ref_rejected": -90.85499572753906,
|
|
"logps/rejected": -609.7548828125,
|
|
"loss": 1.0643,
|
|
"margin_dpo/margin_mean": 217.22732543945312,
|
|
"margin_dpo/margin_std": 274.53643798828125,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4831130690161527,
|
|
"fcm_dpo/beta": 0.001993193756788969,
|
|
"fcm_dpo/delta": -0.050994060933589935,
|
|
"fcm_dpo/margin": 224.6842041015625,
|
|
"fcm_dpo/q_t": 0.39687401056289673,
|
|
"grad_norm": 31.805065155029297,
|
|
"learning_rate": 3.0973306224962437e-07,
|
|
"logits/chosen": -0.40810489654541016,
|
|
"logits/rejected": -0.4009937047958374,
|
|
"logps/chosen": -404.92718505859375,
|
|
"logps/ref_chosen": -65.2461929321289,
|
|
"logps/ref_rejected": -100.69770812988281,
|
|
"logps/rejected": -665.0628662109375,
|
|
"loss": 1.0694,
|
|
"margin_dpo/margin_mean": 224.6842041015625,
|
|
"margin_dpo/margin_std": 286.8136291503906,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4845814977973568,
|
|
"fcm_dpo/beta": 0.0019774779211729765,
|
|
"fcm_dpo/delta": -0.07221639156341553,
|
|
"fcm_dpo/margin": 237.08126831054688,
|
|
"fcm_dpo/q_t": 0.3920379877090454,
|
|
"grad_norm": 28.682498931884766,
|
|
"learning_rate": 3.084861204504122e-07,
|
|
"logits/chosen": -0.3676164746284485,
|
|
"logits/rejected": -0.36775562167167664,
|
|
"logps/chosen": -336.18048095703125,
|
|
"logps/ref_chosen": -46.998348236083984,
|
|
"logps/ref_rejected": -86.87684631347656,
|
|
"logps/rejected": -613.1402587890625,
|
|
"loss": 1.0379,
|
|
"margin_dpo/margin_mean": 237.08126831054688,
|
|
"margin_dpo/margin_std": 271.5751037597656,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.48604992657856094,
|
|
"fcm_dpo/beta": 0.001954648643732071,
|
|
"fcm_dpo/delta": -0.050193920731544495,
|
|
"fcm_dpo/margin": 229.15296936035156,
|
|
"fcm_dpo/q_t": 0.39381328225135803,
|
|
"grad_norm": 30.48107147216797,
|
|
"learning_rate": 3.072376374875335e-07,
|
|
"logits/chosen": -0.43283581733703613,
|
|
"logits/rejected": -0.43236637115478516,
|
|
"logps/chosen": -361.46075439453125,
|
|
"logps/ref_chosen": -50.52424621582031,
|
|
"logps/ref_rejected": -89.01544189453125,
|
|
"logps/rejected": -629.1049194335938,
|
|
"loss": 1.0226,
|
|
"margin_dpo/margin_mean": 229.15298461914062,
|
|
"margin_dpo/margin_std": 199.23004150390625,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.48751835535976507,
|
|
"fcm_dpo/beta": 0.0019774874672293663,
|
|
"fcm_dpo/delta": 0.08132193237543106,
|
|
"fcm_dpo/margin": 162.39129638671875,
|
|
"fcm_dpo/q_t": 0.42597857117652893,
|
|
"grad_norm": 23.958471298217773,
|
|
"learning_rate": 3.059876462596758e-07,
|
|
"logits/chosen": -0.4306356608867645,
|
|
"logits/rejected": -0.41308534145355225,
|
|
"logps/chosen": -359.36370849609375,
|
|
"logps/ref_chosen": -49.18028259277344,
|
|
"logps/ref_rejected": -76.48515319824219,
|
|
"logps/rejected": -549.0598754882812,
|
|
"loss": 1.1409,
|
|
"margin_dpo/margin_mean": 162.39129638671875,
|
|
"margin_dpo/margin_std": 234.40750122070312,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.4889867841409692,
|
|
"fcm_dpo/beta": 0.0019611469469964504,
|
|
"fcm_dpo/delta": -0.03113037347793579,
|
|
"fcm_dpo/margin": 218.6634521484375,
|
|
"fcm_dpo/q_t": 0.4022316336631775,
|
|
"grad_norm": 24.696426391601562,
|
|
"learning_rate": 3.0473617970527015e-07,
|
|
"logits/chosen": -0.4549393653869629,
|
|
"logits/rejected": -0.4510509967803955,
|
|
"logps/chosen": -389.515869140625,
|
|
"logps/ref_chosen": -63.75574493408203,
|
|
"logps/ref_rejected": -95.04411315917969,
|
|
"logps/rejected": -639.4676513671875,
|
|
"loss": 1.0836,
|
|
"margin_dpo/margin_mean": 218.6634521484375,
|
|
"margin_dpo/margin_std": 293.59735107421875,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.49045521292217326,
|
|
"fcm_dpo/beta": 0.001973837148398161,
|
|
"fcm_dpo/delta": 0.018399503082036972,
|
|
"fcm_dpo/margin": 193.68478393554688,
|
|
"fcm_dpo/q_t": 0.4129871726036072,
|
|
"grad_norm": 24.81572151184082,
|
|
"learning_rate": 3.034832708016243e-07,
|
|
"logits/chosen": -0.42541375756263733,
|
|
"logits/rejected": -0.4231783449649811,
|
|
"logps/chosen": -383.4892272949219,
|
|
"logps/ref_chosen": -66.97975158691406,
|
|
"logps/ref_rejected": -95.31692504882812,
|
|
"logps/rejected": -605.51123046875,
|
|
"loss": 1.1228,
|
|
"margin_dpo/margin_mean": 193.68478393554688,
|
|
"margin_dpo/margin_std": 298.8642272949219,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.4919236417033774,
|
|
"fcm_dpo/beta": 0.00201216503046453,
|
|
"fcm_dpo/delta": 0.08145836740732193,
|
|
"fcm_dpo/margin": 159.1428680419922,
|
|
"fcm_dpo/q_t": 0.42715874314308167,
|
|
"grad_norm": 32.46516036987305,
|
|
"learning_rate": 3.022289525640531e-07,
|
|
"logits/chosen": -0.46390390396118164,
|
|
"logits/rejected": -0.44169843196868896,
|
|
"logps/chosen": -399.4327697753906,
|
|
"logps/ref_chosen": -62.54248046875,
|
|
"logps/ref_rejected": -87.61770629882812,
|
|
"logps/rejected": -583.65087890625,
|
|
"loss": 1.1609,
|
|
"margin_dpo/margin_mean": 159.14288330078125,
|
|
"margin_dpo/margin_std": 264.8077087402344,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.4933920704845815,
|
|
"fcm_dpo/beta": 0.001989907817915082,
|
|
"fcm_dpo/delta": -0.08648539334535599,
|
|
"fcm_dpo/margin": 242.4196319580078,
|
|
"fcm_dpo/q_t": 0.39155828952789307,
|
|
"grad_norm": 30.810070037841797,
|
|
"learning_rate": 3.009732580450086e-07,
|
|
"logits/chosen": -0.4227680563926697,
|
|
"logits/rejected": -0.42365506291389465,
|
|
"logps/chosen": -381.827392578125,
|
|
"logps/ref_chosen": -54.53115463256836,
|
|
"logps/ref_rejected": -104.40424346923828,
|
|
"logps/rejected": -674.1201171875,
|
|
"loss": 1.0553,
|
|
"margin_dpo/margin_mean": 242.41964721679688,
|
|
"margin_dpo/margin_std": 320.0055236816406,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.4948604992657856,
|
|
"fcm_dpo/beta": 0.001954274019226432,
|
|
"fcm_dpo/delta": -0.06099873036146164,
|
|
"fcm_dpo/margin": 234.3765411376953,
|
|
"fcm_dpo/q_t": 0.39415156841278076,
|
|
"grad_norm": 37.6779899597168,
|
|
"learning_rate": 2.9971622033320914e-07,
|
|
"logits/chosen": -0.4804219603538513,
|
|
"logits/rejected": -0.47055840492248535,
|
|
"logps/chosen": -348.1040344238281,
|
|
"logps/ref_chosen": -65.12869262695312,
|
|
"logps/ref_rejected": -101.72701263427734,
|
|
"logps/rejected": -619.078857421875,
|
|
"loss": 1.0399,
|
|
"margin_dpo/margin_mean": 234.37652587890625,
|
|
"margin_dpo/margin_std": 261.6142578125,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.49632892804698975,
|
|
"fcm_dpo/beta": 0.0019397891592234373,
|
|
"fcm_dpo/delta": -0.03308578580617905,
|
|
"fcm_dpo/margin": 222.4942626953125,
|
|
"fcm_dpo/q_t": 0.3983496129512787,
|
|
"grad_norm": 51.19593048095703,
|
|
"learning_rate": 2.984578725527675e-07,
|
|
"logits/chosen": -0.4641989767551422,
|
|
"logits/rejected": -0.45939457416534424,
|
|
"logps/chosen": -313.06396484375,
|
|
"logps/ref_chosen": -58.422706604003906,
|
|
"logps/ref_rejected": -89.06854248046875,
|
|
"logps/rejected": -566.2040405273438,
|
|
"loss": 1.0441,
|
|
"margin_dpo/margin_mean": 222.4942626953125,
|
|
"margin_dpo/margin_std": 221.66220092773438,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.4977973568281938,
|
|
"fcm_dpo/beta": 0.001947054173797369,
|
|
"fcm_dpo/delta": -0.027510955929756165,
|
|
"fcm_dpo/margin": 218.38970947265625,
|
|
"fcm_dpo/q_t": 0.400276780128479,
|
|
"grad_norm": 36.372657775878906,
|
|
"learning_rate": 2.9719824786231796e-07,
|
|
"logits/chosen": -0.505604088306427,
|
|
"logits/rejected": -0.492832750082016,
|
|
"logps/chosen": -339.7549743652344,
|
|
"logps/ref_chosen": -59.99531555175781,
|
|
"logps/ref_rejected": -103.9109115600586,
|
|
"logps/rejected": -602.060302734375,
|
|
"loss": 1.0564,
|
|
"margin_dpo/margin_mean": 218.38970947265625,
|
|
"margin_dpo/margin_std": 221.18817138671875,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.49926578560939794,
|
|
"fcm_dpo/beta": 0.0019270360935479403,
|
|
"fcm_dpo/delta": 0.019969457760453224,
|
|
"fcm_dpo/margin": 197.50909423828125,
|
|
"fcm_dpo/q_t": 0.41230642795562744,
|
|
"grad_norm": 23.526317596435547,
|
|
"learning_rate": 2.959373794541426e-07,
|
|
"logits/chosen": -0.4352113902568817,
|
|
"logits/rejected": -0.41571658849716187,
|
|
"logps/chosen": -356.28045654296875,
|
|
"logps/ref_chosen": -52.83022689819336,
|
|
"logps/ref_rejected": -73.10723114013672,
|
|
"logps/rejected": -574.0665283203125,
|
|
"loss": 1.1044,
|
|
"margin_dpo/margin_mean": 197.50909423828125,
|
|
"margin_dpo/margin_std": 266.762451171875,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5007342143906021,
|
|
"fcm_dpo/beta": 0.0019175230991095304,
|
|
"fcm_dpo/delta": -0.03825069218873978,
|
|
"fcm_dpo/margin": 227.45579528808594,
|
|
"fcm_dpo/q_t": 0.39912909269332886,
|
|
"grad_norm": 26.079498291015625,
|
|
"learning_rate": 2.946753005532965e-07,
|
|
"logits/chosen": -0.403905987739563,
|
|
"logits/rejected": -0.4048531651496887,
|
|
"logps/chosen": -344.41949462890625,
|
|
"logps/ref_chosen": -47.899803161621094,
|
|
"logps/ref_rejected": -101.80987548828125,
|
|
"logps/rejected": -625.7853393554688,
|
|
"loss": 1.0547,
|
|
"margin_dpo/margin_mean": 227.45578002929688,
|
|
"margin_dpo/margin_std": 254.0438232421875,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5022026431718062,
|
|
"fcm_dpo/beta": 0.0019315474200993776,
|
|
"fcm_dpo/delta": 0.012259891256690025,
|
|
"fcm_dpo/margin": 200.836181640625,
|
|
"fcm_dpo/q_t": 0.4102938175201416,
|
|
"grad_norm": 26.406532287597656,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": -0.413399875164032,
|
|
"logits/rejected": -0.3872986137866974,
|
|
"logps/chosen": -373.72735595703125,
|
|
"logps/ref_chosen": -71.99664306640625,
|
|
"logps/ref_rejected": -92.58959197998047,
|
|
"logps/rejected": -595.156494140625,
|
|
"loss": 1.0985,
|
|
"margin_dpo/margin_mean": 200.836181640625,
|
|
"margin_dpo/margin_std": 267.432861328125,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5036710719530103,
|
|
"fcm_dpo/beta": 0.001907234895043075,
|
|
"fcm_dpo/delta": -0.06225571036338806,
|
|
"fcm_dpo/margin": 240.85353088378906,
|
|
"fcm_dpo/q_t": 0.3906010389328003,
|
|
"grad_norm": 25.300434112548828,
|
|
"learning_rate": 2.9214764433242476e-07,
|
|
"logits/chosen": -0.45764094591140747,
|
|
"logits/rejected": -0.46164631843566895,
|
|
"logps/chosen": -339.2222595214844,
|
|
"logps/ref_chosen": -54.405616760253906,
|
|
"logps/ref_rejected": -111.04142761230469,
|
|
"logps/rejected": -636.7116088867188,
|
|
"loss": 1.0151,
|
|
"margin_dpo/margin_mean": 240.85354614257812,
|
|
"margin_dpo/margin_std": 200.33447265625,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5051395007342144,
|
|
"fcm_dpo/beta": 0.001916981302201748,
|
|
"fcm_dpo/delta": -0.00981883704662323,
|
|
"fcm_dpo/margin": 212.7474365234375,
|
|
"fcm_dpo/q_t": 0.4074634909629822,
|
|
"grad_norm": 42.644657135009766,
|
|
"learning_rate": 2.9088213361849126e-07,
|
|
"logits/chosen": -0.4130118489265442,
|
|
"logits/rejected": -0.41583961248397827,
|
|
"logps/chosen": -340.350830078125,
|
|
"logps/ref_chosen": -53.96466827392578,
|
|
"logps/ref_rejected": -90.62336730957031,
|
|
"logps/rejected": -589.7569580078125,
|
|
"loss": 1.082,
|
|
"margin_dpo/margin_mean": 212.74746704101562,
|
|
"margin_dpo/margin_std": 255.60020446777344,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5066079295154186,
|
|
"fcm_dpo/beta": 0.001887032762169838,
|
|
"fcm_dpo/delta": -0.05342705175280571,
|
|
"fcm_dpo/margin": 239.02159118652344,
|
|
"fcm_dpo/q_t": 0.395052433013916,
|
|
"grad_norm": 27.184518814086914,
|
|
"learning_rate": 2.896155456223163e-07,
|
|
"logits/chosen": -0.446855753660202,
|
|
"logits/rejected": -0.44713422656059265,
|
|
"logps/chosen": -399.9010009765625,
|
|
"logps/ref_chosen": -61.685699462890625,
|
|
"logps/ref_rejected": -99.49041748046875,
|
|
"logps/rejected": -676.727294921875,
|
|
"loss": 1.0441,
|
|
"margin_dpo/margin_mean": 239.02159118652344,
|
|
"margin_dpo/margin_std": 266.046630859375,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5080763582966226,
|
|
"fcm_dpo/beta": 0.001873625093139708,
|
|
"fcm_dpo/delta": -0.01170763373374939,
|
|
"fcm_dpo/margin": 219.47308349609375,
|
|
"fcm_dpo/q_t": 0.4032544493675232,
|
|
"grad_norm": 26.40635871887207,
|
|
"learning_rate": 2.883479137196714e-07,
|
|
"logits/chosen": -0.4138724207878113,
|
|
"logits/rejected": -0.4036678373813629,
|
|
"logps/chosen": -398.43524169921875,
|
|
"logps/ref_chosen": -55.256263732910156,
|
|
"logps/ref_rejected": -77.41532135009766,
|
|
"logps/rejected": -640.0673828125,
|
|
"loss": 1.0715,
|
|
"margin_dpo/margin_mean": 219.4730987548828,
|
|
"margin_dpo/margin_std": 256.69036865234375,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5095447870778267,
|
|
"fcm_dpo/beta": 0.0018691536970436573,
|
|
"fcm_dpo/delta": -0.01727224886417389,
|
|
"fcm_dpo/margin": 222.85726928710938,
|
|
"fcm_dpo/q_t": 0.4037840962409973,
|
|
"grad_norm": 30.47779083251953,
|
|
"learning_rate": 2.8707927131383614e-07,
|
|
"logits/chosen": -0.39017215371131897,
|
|
"logits/rejected": -0.384768545627594,
|
|
"logps/chosen": -396.3253173828125,
|
|
"logps/ref_chosen": -57.56623840332031,
|
|
"logps/ref_rejected": -92.35509490966797,
|
|
"logps/rejected": -653.971435546875,
|
|
"loss": 1.0757,
|
|
"margin_dpo/margin_mean": 222.85726928710938,
|
|
"margin_dpo/margin_std": 278.2526550292969,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5110132158590308,
|
|
"fcm_dpo/beta": 0.0018709124997258186,
|
|
"fcm_dpo/delta": 0.05157490074634552,
|
|
"fcm_dpo/margin": 186.99984741210938,
|
|
"fcm_dpo/q_t": 0.4193815588951111,
|
|
"grad_norm": 26.73240089416504,
|
|
"learning_rate": 2.858096518347179e-07,
|
|
"logits/chosen": -0.4690071940422058,
|
|
"logits/rejected": -0.47212427854537964,
|
|
"logps/chosen": -365.4136657714844,
|
|
"logps/ref_chosen": -56.31770324707031,
|
|
"logps/ref_rejected": -89.13836669921875,
|
|
"logps/rejected": -585.2341918945312,
|
|
"loss": 1.122,
|
|
"margin_dpo/margin_mean": 186.99984741210938,
|
|
"margin_dpo/margin_std": 251.55563354492188,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.5124816446402349,
|
|
"fcm_dpo/beta": 0.0018943310715258121,
|
|
"fcm_dpo/delta": -1.317635178565979e-05,
|
|
"fcm_dpo/margin": 210.99227905273438,
|
|
"fcm_dpo/q_t": 0.40966325998306274,
|
|
"grad_norm": 21.820791244506836,
|
|
"learning_rate": 2.845390887379706e-07,
|
|
"logits/chosen": -0.4289626181125641,
|
|
"logits/rejected": -0.4302072525024414,
|
|
"logps/chosen": -338.00421142578125,
|
|
"logps/ref_chosen": -58.025516510009766,
|
|
"logps/ref_rejected": -97.50515747070312,
|
|
"logps/rejected": -588.4761352539062,
|
|
"loss": 1.0992,
|
|
"margin_dpo/margin_mean": 210.99227905273438,
|
|
"margin_dpo/margin_std": 296.27386474609375,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5139500734214391,
|
|
"fcm_dpo/beta": 0.0018844606820493937,
|
|
"fcm_dpo/delta": 0.020040031522512436,
|
|
"fcm_dpo/margin": 201.89549255371094,
|
|
"fcm_dpo/q_t": 0.41158032417297363,
|
|
"grad_norm": 36.85105895996094,
|
|
"learning_rate": 2.8326761550411346e-07,
|
|
"logits/chosen": -0.40707576274871826,
|
|
"logits/rejected": -0.40998172760009766,
|
|
"logps/chosen": -374.5255126953125,
|
|
"logps/ref_chosen": -64.33049011230469,
|
|
"logps/ref_rejected": -89.87164306640625,
|
|
"logps/rejected": -601.962158203125,
|
|
"loss": 1.1092,
|
|
"margin_dpo/margin_mean": 201.89547729492188,
|
|
"margin_dpo/margin_std": 281.0362243652344,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5154185022026432,
|
|
"fcm_dpo/beta": 0.0018699737265706062,
|
|
"fcm_dpo/delta": -0.06955541670322418,
|
|
"fcm_dpo/margin": 249.21286010742188,
|
|
"fcm_dpo/q_t": 0.3945404291152954,
|
|
"grad_norm": 31.279190063476562,
|
|
"learning_rate": 2.819952656376487e-07,
|
|
"logits/chosen": -0.4381694197654724,
|
|
"logits/rejected": -0.4381583034992218,
|
|
"logps/chosen": -339.1927795410156,
|
|
"logps/ref_chosen": -60.6721305847168,
|
|
"logps/ref_rejected": -101.5654296875,
|
|
"logps/rejected": -629.2989501953125,
|
|
"loss": 1.0495,
|
|
"margin_dpo/margin_mean": 249.212890625,
|
|
"margin_dpo/margin_std": 307.7030029296875,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5168869309838473,
|
|
"fcm_dpo/beta": 0.0018936812411993742,
|
|
"fcm_dpo/delta": 0.1119888573884964,
|
|
"fcm_dpo/margin": 153.8917999267578,
|
|
"fcm_dpo/q_t": 0.4330083727836609,
|
|
"grad_norm": 41.63142013549805,
|
|
"learning_rate": 2.8072207266617854e-07,
|
|
"logits/chosen": -0.43417030572891235,
|
|
"logits/rejected": -0.40401673316955566,
|
|
"logps/chosen": -405.0966491699219,
|
|
"logps/ref_chosen": -70.9434585571289,
|
|
"logps/ref_rejected": -76.6419677734375,
|
|
"logps/rejected": -564.68701171875,
|
|
"loss": 1.1811,
|
|
"margin_dpo/margin_mean": 153.8917999267578,
|
|
"margin_dpo/margin_std": 272.4174499511719,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5183553597650514,
|
|
"fcm_dpo/beta": 0.0018986646318808198,
|
|
"fcm_dpo/delta": -0.004761148244142532,
|
|
"fcm_dpo/margin": 212.76190185546875,
|
|
"fcm_dpo/q_t": 0.4080585241317749,
|
|
"grad_norm": 23.361574172973633,
|
|
"learning_rate": 2.794480701395219e-07,
|
|
"logits/chosen": -0.4715597331523895,
|
|
"logits/rejected": -0.459200382232666,
|
|
"logps/chosen": -371.7664489746094,
|
|
"logps/ref_chosen": -58.39533996582031,
|
|
"logps/ref_rejected": -80.33553314208984,
|
|
"logps/rejected": -606.468505859375,
|
|
"loss": 1.094,
|
|
"margin_dpo/margin_mean": 212.76190185546875,
|
|
"margin_dpo/margin_std": 286.2218017578125,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5198237885462555,
|
|
"fcm_dpo/beta": 0.0018948422512039542,
|
|
"fcm_dpo/delta": -0.03894488885998726,
|
|
"fcm_dpo/margin": 230.70608520507812,
|
|
"fcm_dpo/q_t": 0.3976895213127136,
|
|
"grad_norm": 43.41948699951172,
|
|
"learning_rate": 2.781732916288303e-07,
|
|
"logits/chosen": -0.44769683480262756,
|
|
"logits/rejected": -0.43864506483078003,
|
|
"logps/chosen": -324.921875,
|
|
"logps/ref_chosen": -59.80299377441406,
|
|
"logps/ref_rejected": -88.75750732421875,
|
|
"logps/rejected": -584.58251953125,
|
|
"loss": 1.0434,
|
|
"margin_dpo/margin_mean": 230.70606994628906,
|
|
"margin_dpo/margin_std": 237.67138671875,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5212922173274597,
|
|
"fcm_dpo/beta": 0.0018797200173139572,
|
|
"fcm_dpo/delta": -0.03509457781910896,
|
|
"fcm_dpo/margin": 230.56893920898438,
|
|
"fcm_dpo/q_t": 0.39808282256126404,
|
|
"grad_norm": 47.345184326171875,
|
|
"learning_rate": 2.7689777072570284e-07,
|
|
"logits/chosen": -0.4880906343460083,
|
|
"logits/rejected": -0.4761296510696411,
|
|
"logps/chosen": -330.4738464355469,
|
|
"logps/ref_chosen": -54.12849807739258,
|
|
"logps/ref_rejected": -82.40606689453125,
|
|
"logps/rejected": -589.3203125,
|
|
"loss": 1.0473,
|
|
"margin_dpo/margin_mean": 230.5689239501953,
|
|
"margin_dpo/margin_std": 238.7706298828125,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5227606461086637,
|
|
"fcm_dpo/beta": 0.001937782857567072,
|
|
"fcm_dpo/delta": 0.16766567528247833,
|
|
"fcm_dpo/margin": 121.26019287109375,
|
|
"fcm_dpo/q_t": 0.4479098618030548,
|
|
"grad_norm": 56.13861083984375,
|
|
"learning_rate": 2.7562154104130176e-07,
|
|
"logits/chosen": -0.47440847754478455,
|
|
"logits/rejected": -0.4546169638633728,
|
|
"logps/chosen": -418.028564453125,
|
|
"logps/ref_chosen": -64.6738052368164,
|
|
"logps/ref_rejected": -75.89926147460938,
|
|
"logps/rejected": -550.5142211914062,
|
|
"loss": 1.2588,
|
|
"margin_dpo/margin_mean": 121.26020050048828,
|
|
"margin_dpo/margin_std": 323.25531005859375,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5242290748898678,
|
|
"fcm_dpo/beta": 0.001942659611813724,
|
|
"fcm_dpo/delta": 0.021397359669208527,
|
|
"fcm_dpo/margin": 195.1722412109375,
|
|
"fcm_dpo/q_t": 0.4128842055797577,
|
|
"grad_norm": 41.46245193481445,
|
|
"learning_rate": 2.7434463620546594e-07,
|
|
"logits/chosen": -0.44245290756225586,
|
|
"logits/rejected": -0.42818719148635864,
|
|
"logps/chosen": -383.28961181640625,
|
|
"logps/ref_chosen": -52.725799560546875,
|
|
"logps/ref_rejected": -86.84115600585938,
|
|
"logps/rejected": -612.5772705078125,
|
|
"loss": 1.1159,
|
|
"margin_dpo/margin_mean": 195.1722412109375,
|
|
"margin_dpo/margin_std": 283.9158935546875,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5256975036710719,
|
|
"fcm_dpo/beta": 0.0019549184944480658,
|
|
"fcm_dpo/delta": 0.008138120174407959,
|
|
"fcm_dpo/margin": 200.61288452148438,
|
|
"fcm_dpo/q_t": 0.4124792516231537,
|
|
"grad_norm": 31.154674530029297,
|
|
"learning_rate": 2.730670898658255e-07,
|
|
"logits/chosen": -0.4751536548137665,
|
|
"logits/rejected": -0.45662182569503784,
|
|
"logps/chosen": -362.30657958984375,
|
|
"logps/ref_chosen": -63.20543670654297,
|
|
"logps/ref_rejected": -88.373291015625,
|
|
"logps/rejected": -588.0873413085938,
|
|
"loss": 1.1075,
|
|
"margin_dpo/margin_mean": 200.61288452148438,
|
|
"margin_dpo/margin_std": 293.744384765625,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.527165932452276,
|
|
"fcm_dpo/beta": 0.0019379984587430954,
|
|
"fcm_dpo/delta": -0.057486288249492645,
|
|
"fcm_dpo/margin": 234.68478393554688,
|
|
"fcm_dpo/q_t": 0.3975210189819336,
|
|
"grad_norm": 43.98432922363281,
|
|
"learning_rate": 2.717889356869146e-07,
|
|
"logits/chosen": -0.4332413673400879,
|
|
"logits/rejected": -0.41946709156036377,
|
|
"logps/chosen": -396.8498840332031,
|
|
"logps/ref_chosen": -56.370216369628906,
|
|
"logps/ref_rejected": -82.17375183105469,
|
|
"logps/rejected": -657.3382568359375,
|
|
"loss": 1.068,
|
|
"margin_dpo/margin_mean": 234.68478393554688,
|
|
"margin_dpo/margin_std": 311.1407775878906,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.5286343612334802,
|
|
"fcm_dpo/beta": 0.0019518618937581778,
|
|
"fcm_dpo/delta": 0.05515030398964882,
|
|
"fcm_dpo/margin": 177.61534118652344,
|
|
"fcm_dpo/q_t": 0.41836071014404297,
|
|
"grad_norm": 32.58218002319336,
|
|
"learning_rate": 2.7051020734928443e-07,
|
|
"logits/chosen": -0.4247097969055176,
|
|
"logits/rejected": -0.4117840826511383,
|
|
"logps/chosen": -380.07391357421875,
|
|
"logps/ref_chosen": -51.460384368896484,
|
|
"logps/ref_rejected": -69.83892059326172,
|
|
"logps/rejected": -576.0677490234375,
|
|
"loss": 1.109,
|
|
"margin_dpo/margin_mean": 177.61534118652344,
|
|
"margin_dpo/margin_std": 208.568359375,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5301027900146843,
|
|
"fcm_dpo/beta": 0.001977581763640046,
|
|
"fcm_dpo/delta": 0.06216863542795181,
|
|
"fcm_dpo/margin": 171.79086303710938,
|
|
"fcm_dpo/q_t": 0.42134982347488403,
|
|
"grad_norm": 42.63149642944336,
|
|
"learning_rate": 2.6923093854861593e-07,
|
|
"logits/chosen": -0.43728476762771606,
|
|
"logits/rejected": -0.4339728355407715,
|
|
"logps/chosen": -395.3648681640625,
|
|
"logps/ref_chosen": -53.86951446533203,
|
|
"logps/ref_rejected": -90.7692642211914,
|
|
"logps/rejected": -604.0554809570312,
|
|
"loss": 1.1364,
|
|
"margin_dpo/margin_mean": 171.79086303710938,
|
|
"margin_dpo/margin_std": 256.6304931640625,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.5315712187958884,
|
|
"fcm_dpo/beta": 0.00193558179307729,
|
|
"fcm_dpo/delta": -0.1272476613521576,
|
|
"fcm_dpo/margin": 268.6226806640625,
|
|
"fcm_dpo/q_t": 0.3800477683544159,
|
|
"grad_norm": 41.641483306884766,
|
|
"learning_rate": 2.679511629948319e-07,
|
|
"logits/chosen": -0.44722434878349304,
|
|
"logits/rejected": -0.46128737926483154,
|
|
"logps/chosen": -343.3209228515625,
|
|
"logps/ref_chosen": -58.639060974121094,
|
|
"logps/ref_rejected": -105.58195495605469,
|
|
"logps/rejected": -658.886474609375,
|
|
"loss": 0.9893,
|
|
"margin_dpo/margin_mean": 268.6226806640625,
|
|
"margin_dpo/margin_std": 259.2852783203125,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5330396475770925,
|
|
"fcm_dpo/beta": 0.0018970153760164976,
|
|
"fcm_dpo/delta": -0.1342625916004181,
|
|
"fcm_dpo/margin": 278.00701904296875,
|
|
"fcm_dpo/q_t": 0.3781528174877167,
|
|
"grad_norm": 37.65498733520508,
|
|
"learning_rate": 2.6667091441120816e-07,
|
|
"logits/chosen": -0.4264340400695801,
|
|
"logits/rejected": -0.42666763067245483,
|
|
"logps/chosen": -301.8473815917969,
|
|
"logps/ref_chosen": -44.558380126953125,
|
|
"logps/ref_rejected": -74.69496154785156,
|
|
"logps/rejected": -609.990966796875,
|
|
"loss": 0.9853,
|
|
"margin_dpo/margin_mean": 278.00701904296875,
|
|
"margin_dpo/margin_std": 266.7933349609375,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5345080763582967,
|
|
"fcm_dpo/beta": 0.0018907999619841576,
|
|
"fcm_dpo/delta": 0.014427829533815384,
|
|
"fcm_dpo/margin": 203.99966430664062,
|
|
"fcm_dpo/q_t": 0.4105517864227295,
|
|
"grad_norm": 23.780954360961914,
|
|
"learning_rate": 2.6539022653348575e-07,
|
|
"logits/chosen": -0.4266408383846283,
|
|
"logits/rejected": -0.4368516504764557,
|
|
"logps/chosen": -334.2245788574219,
|
|
"logps/ref_chosen": -48.894622802734375,
|
|
"logps/ref_rejected": -91.395751953125,
|
|
"logps/rejected": -580.725341796875,
|
|
"loss": 1.101,
|
|
"margin_dpo/margin_mean": 203.99966430664062,
|
|
"margin_dpo/margin_std": 271.7967529296875,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5359765051395007,
|
|
"fcm_dpo/beta": 0.0018834024667739868,
|
|
"fcm_dpo/delta": -0.004186911974102259,
|
|
"fcm_dpo/margin": 214.51487731933594,
|
|
"fcm_dpo/q_t": 0.40672045946121216,
|
|
"grad_norm": 26.113784790039062,
|
|
"learning_rate": 2.641091331089811e-07,
|
|
"logits/chosen": -0.4105398654937744,
|
|
"logits/rejected": -0.42130357027053833,
|
|
"logps/chosen": -327.7710266113281,
|
|
"logps/ref_chosen": -51.49274444580078,
|
|
"logps/ref_rejected": -92.70166778564453,
|
|
"logps/rejected": -583.4948120117188,
|
|
"loss": 1.0732,
|
|
"margin_dpo/margin_mean": 214.514892578125,
|
|
"margin_dpo/margin_std": 248.24508666992188,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5374449339207048,
|
|
"fcm_dpo/beta": 0.001870601437985897,
|
|
"fcm_dpo/delta": -0.01503688097000122,
|
|
"fcm_dpo/margin": 221.3247833251953,
|
|
"fcm_dpo/q_t": 0.40477490425109863,
|
|
"grad_norm": 28.54776954650879,
|
|
"learning_rate": 2.6282766789569736e-07,
|
|
"logits/chosen": -0.3827515244483948,
|
|
"logits/rejected": -0.3970991373062134,
|
|
"logps/chosen": -305.78424072265625,
|
|
"logps/ref_chosen": -44.7205696105957,
|
|
"logps/ref_rejected": -83.31040954589844,
|
|
"logps/rejected": -565.6988525390625,
|
|
"loss": 1.079,
|
|
"margin_dpo/margin_mean": 221.32476806640625,
|
|
"margin_dpo/margin_std": 278.6839904785156,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5389133627019089,
|
|
"fcm_dpo/beta": 0.001894644577987492,
|
|
"fcm_dpo/delta": 0.050500668585300446,
|
|
"fcm_dpo/margin": 185.32125854492188,
|
|
"fcm_dpo/q_t": 0.41730421781539917,
|
|
"grad_norm": 23.173542022705078,
|
|
"learning_rate": 2.615458646614349e-07,
|
|
"logits/chosen": -0.43230247497558594,
|
|
"logits/rejected": -0.4155634045600891,
|
|
"logps/chosen": -330.4307861328125,
|
|
"logps/ref_chosen": -58.405418395996094,
|
|
"logps/ref_rejected": -76.75132751464844,
|
|
"logps/rejected": -534.0979614257812,
|
|
"loss": 1.1118,
|
|
"margin_dpo/margin_mean": 185.32125854492188,
|
|
"margin_dpo/margin_std": 230.9281005859375,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.540381791483113,
|
|
"fcm_dpo/beta": 0.0018566998187452555,
|
|
"fcm_dpo/delta": -0.14017510414123535,
|
|
"fcm_dpo/margin": 286.89935302734375,
|
|
"fcm_dpo/q_t": 0.37319713830947876,
|
|
"grad_norm": 49.00677490234375,
|
|
"learning_rate": 2.6026375718290083e-07,
|
|
"logits/chosen": -0.42150646448135376,
|
|
"logits/rejected": -0.4286186695098877,
|
|
"logps/chosen": -302.36474609375,
|
|
"logps/ref_chosen": -44.452518463134766,
|
|
"logps/ref_rejected": -98.55526733398438,
|
|
"logps/rejected": -643.3668212890625,
|
|
"loss": 0.961,
|
|
"margin_dpo/margin_mean": 286.89935302734375,
|
|
"margin_dpo/margin_std": 217.0551300048828,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5418502202643172,
|
|
"fcm_dpo/beta": 0.001882010605186224,
|
|
"fcm_dpo/delta": 0.11852943897247314,
|
|
"fcm_dpo/margin": 151.08335876464844,
|
|
"fcm_dpo/q_t": 0.4338718354701996,
|
|
"grad_norm": 29.52981948852539,
|
|
"learning_rate": 2.589813792448196e-07,
|
|
"logits/chosen": -0.4294429123401642,
|
|
"logits/rejected": -0.4097931981086731,
|
|
"logps/chosen": -410.28509521484375,
|
|
"logps/ref_chosen": -71.38150024414062,
|
|
"logps/ref_rejected": -91.29582214355469,
|
|
"logps/rejected": -581.2828369140625,
|
|
"loss": 1.1846,
|
|
"margin_dpo/margin_mean": 151.08335876464844,
|
|
"margin_dpo/margin_std": 272.0557861328125,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5433186490455213,
|
|
"fcm_dpo/beta": 0.0019265762530267239,
|
|
"fcm_dpo/delta": 0.12166447192430496,
|
|
"fcm_dpo/margin": 146.07177734375,
|
|
"fcm_dpo/q_t": 0.43590569496154785,
|
|
"grad_norm": 39.32080841064453,
|
|
"learning_rate": 2.5769876463904263e-07,
|
|
"logits/chosen": -0.4616813063621521,
|
|
"logits/rejected": -0.45466917753219604,
|
|
"logps/chosen": -428.994384765625,
|
|
"logps/ref_chosen": -71.60749816894531,
|
|
"logps/ref_rejected": -97.25978088378906,
|
|
"logps/rejected": -600.718505859375,
|
|
"loss": 1.1947,
|
|
"margin_dpo/margin_mean": 146.07177734375,
|
|
"margin_dpo/margin_std": 281.0660095214844,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5447870778267254,
|
|
"fcm_dpo/beta": 0.0019367990316823125,
|
|
"fcm_dpo/delta": -0.011994550004601479,
|
|
"fcm_dpo/margin": 212.38925170898438,
|
|
"fcm_dpo/q_t": 0.4069690704345703,
|
|
"grad_norm": 33.81187057495117,
|
|
"learning_rate": 2.5641594716365744e-07,
|
|
"logits/chosen": -0.5018552541732788,
|
|
"logits/rejected": -0.4875721335411072,
|
|
"logps/chosen": -412.5298767089844,
|
|
"logps/ref_chosen": -69.41448974609375,
|
|
"logps/ref_rejected": -99.17217254638672,
|
|
"logps/rejected": -654.6768188476562,
|
|
"loss": 1.0955,
|
|
"margin_dpo/margin_mean": 212.3892364501953,
|
|
"margin_dpo/margin_std": 298.9532775878906,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.5462555066079295,
|
|
"fcm_dpo/beta": 0.0019036408048123121,
|
|
"fcm_dpo/delta": -0.08581465482711792,
|
|
"fcm_dpo/margin": 252.9879608154297,
|
|
"fcm_dpo/q_t": 0.39388322830200195,
|
|
"grad_norm": 24.240703582763672,
|
|
"learning_rate": 2.551329606220976e-07,
|
|
"logits/chosen": -0.46786242723464966,
|
|
"logits/rejected": -0.45145729184150696,
|
|
"logps/chosen": -392.63507080078125,
|
|
"logps/ref_chosen": -61.8179931640625,
|
|
"logps/ref_rejected": -78.53948974609375,
|
|
"logps/rejected": -662.344482421875,
|
|
"loss": 1.0529,
|
|
"margin_dpo/margin_mean": 252.9879608154297,
|
|
"margin_dpo/margin_std": 342.00787353515625,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.5477239353891337,
|
|
"fcm_dpo/beta": 0.0018930002115666866,
|
|
"fcm_dpo/delta": -0.05019930750131607,
|
|
"fcm_dpo/margin": 236.47280883789062,
|
|
"fcm_dpo/q_t": 0.3964656591415405,
|
|
"grad_norm": 29.245372772216797,
|
|
"learning_rate": 2.538498388222517e-07,
|
|
"logits/chosen": -0.4352779984474182,
|
|
"logits/rejected": -0.4134613871574402,
|
|
"logps/chosen": -427.55157470703125,
|
|
"logps/ref_chosen": -64.21713256835938,
|
|
"logps/ref_rejected": -85.95960998535156,
|
|
"logps/rejected": -685.766845703125,
|
|
"loss": 1.0577,
|
|
"margin_dpo/margin_mean": 236.47280883789062,
|
|
"margin_dpo/margin_std": 278.249755859375,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5491923641703378,
|
|
"fcm_dpo/beta": 0.0018695429898798466,
|
|
"fcm_dpo/delta": 0.0013285353779792786,
|
|
"fcm_dpo/margin": 213.04611206054688,
|
|
"fcm_dpo/q_t": 0.412605345249176,
|
|
"grad_norm": 28.108699798583984,
|
|
"learning_rate": 2.525666155755725e-07,
|
|
"logits/chosen": -0.5210952162742615,
|
|
"logits/rejected": -0.5011500120162964,
|
|
"logps/chosen": -390.86297607421875,
|
|
"logps/ref_chosen": -70.65018463134766,
|
|
"logps/ref_rejected": -93.64016723632812,
|
|
"logps/rejected": -626.8990478515625,
|
|
"loss": 1.1194,
|
|
"margin_dpo/margin_mean": 213.04612731933594,
|
|
"margin_dpo/margin_std": 341.2312316894531,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5506607929515418,
|
|
"fcm_dpo/beta": 0.0018667408730834723,
|
|
"fcm_dpo/delta": -0.00040426477789878845,
|
|
"fcm_dpo/margin": 214.07928466796875,
|
|
"fcm_dpo/q_t": 0.4087774455547333,
|
|
"grad_norm": 31.289676666259766,
|
|
"learning_rate": 2.512833246961859e-07,
|
|
"logits/chosen": -0.426000714302063,
|
|
"logits/rejected": -0.42188286781311035,
|
|
"logps/chosen": -394.99072265625,
|
|
"logps/ref_chosen": -60.080223083496094,
|
|
"logps/ref_rejected": -88.93830871582031,
|
|
"logps/rejected": -637.9281005859375,
|
|
"loss": 1.1052,
|
|
"margin_dpo/margin_mean": 214.07928466796875,
|
|
"margin_dpo/margin_std": 306.2797546386719,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5521292217327459,
|
|
"fcm_dpo/beta": 0.0018509968649595976,
|
|
"fcm_dpo/delta": -0.08695752173662186,
|
|
"fcm_dpo/margin": 260.70465087890625,
|
|
"fcm_dpo/q_t": 0.38965705037117004,
|
|
"grad_norm": 25.07152557373047,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": -0.4629325866699219,
|
|
"logits/rejected": -0.4503718614578247,
|
|
"logps/chosen": -398.86688232421875,
|
|
"logps/ref_chosen": -62.660308837890625,
|
|
"logps/ref_rejected": -105.52660369873047,
|
|
"logps/rejected": -702.4378051757812,
|
|
"loss": 1.0414,
|
|
"margin_dpo/margin_mean": 260.70465087890625,
|
|
"margin_dpo/margin_std": 318.3728942871094,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.55359765051395,
|
|
"fcm_dpo/beta": 0.0018346365541219711,
|
|
"fcm_dpo/delta": -0.055273640900850296,
|
|
"fcm_dpo/margin": 246.80406188964844,
|
|
"fcm_dpo/q_t": 0.3960615396499634,
|
|
"grad_norm": 27.58287811279297,
|
|
"learning_rate": 2.487166753038141e-07,
|
|
"logits/chosen": -0.4195740818977356,
|
|
"logits/rejected": -0.4171292185783386,
|
|
"logps/chosen": -392.3238830566406,
|
|
"logps/ref_chosen": -54.478736877441406,
|
|
"logps/ref_rejected": -98.70335388183594,
|
|
"logps/rejected": -683.3525390625,
|
|
"loss": 1.0526,
|
|
"margin_dpo/margin_mean": 246.80404663085938,
|
|
"margin_dpo/margin_std": 296.20745849609375,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5550660792951542,
|
|
"fcm_dpo/beta": 0.0018070295918732882,
|
|
"fcm_dpo/delta": -0.0666477382183075,
|
|
"fcm_dpo/margin": 256.5183410644531,
|
|
"fcm_dpo/q_t": 0.3916090726852417,
|
|
"grad_norm": 28.932802200317383,
|
|
"learning_rate": 2.4743338442442754e-07,
|
|
"logits/chosen": -0.4206734001636505,
|
|
"logits/rejected": -0.4349641501903534,
|
|
"logps/chosen": -375.62255859375,
|
|
"logps/ref_chosen": -45.02053451538086,
|
|
"logps/ref_rejected": -88.0469741821289,
|
|
"logps/rejected": -675.1673583984375,
|
|
"loss": 1.035,
|
|
"margin_dpo/margin_mean": 256.5183410644531,
|
|
"margin_dpo/margin_std": 279.6002197265625,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5565345080763583,
|
|
"fcm_dpo/beta": 0.0017812212463468313,
|
|
"fcm_dpo/delta": -0.045191098004579544,
|
|
"fcm_dpo/margin": 248.47398376464844,
|
|
"fcm_dpo/q_t": 0.39899513125419617,
|
|
"grad_norm": 36.7637939453125,
|
|
"learning_rate": 2.461501611777483e-07,
|
|
"logits/chosen": -0.37811413407325745,
|
|
"logits/rejected": -0.39795851707458496,
|
|
"logps/chosen": -419.84063720703125,
|
|
"logps/ref_chosen": -53.182098388671875,
|
|
"logps/ref_rejected": -114.3001708984375,
|
|
"logps/rejected": -729.4326782226562,
|
|
"loss": 1.0653,
|
|
"margin_dpo/margin_mean": 248.47398376464844,
|
|
"margin_dpo/margin_std": 314.6936340332031,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5580029368575624,
|
|
"fcm_dpo/beta": 0.0017588778864592314,
|
|
"fcm_dpo/delta": -0.10711812973022461,
|
|
"fcm_dpo/margin": 285.31103515625,
|
|
"fcm_dpo/q_t": 0.3839528560638428,
|
|
"grad_norm": 23.18588638305664,
|
|
"learning_rate": 2.4486703937790243e-07,
|
|
"logits/chosen": -0.4267119765281677,
|
|
"logits/rejected": -0.45323461294174194,
|
|
"logps/chosen": -399.3060607910156,
|
|
"logps/ref_chosen": -51.3530387878418,
|
|
"logps/ref_rejected": -104.19169616699219,
|
|
"logps/rejected": -737.4556884765625,
|
|
"loss": 1.0213,
|
|
"margin_dpo/margin_mean": 285.31103515625,
|
|
"margin_dpo/margin_std": 325.0993347167969,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5594713656387665,
|
|
"fcm_dpo/beta": 0.0017572679789736867,
|
|
"fcm_dpo/delta": 0.04623348265886307,
|
|
"fcm_dpo/margin": 202.26556396484375,
|
|
"fcm_dpo/q_t": 0.41859930753707886,
|
|
"grad_norm": 25.287288665771484,
|
|
"learning_rate": 2.435840528363426e-07,
|
|
"logits/chosen": -0.4158741235733032,
|
|
"logits/rejected": -0.39695611596107483,
|
|
"logps/chosen": -411.60211181640625,
|
|
"logps/ref_chosen": -57.80306625366211,
|
|
"logps/ref_rejected": -79.21940612792969,
|
|
"logps/rejected": -635.2840576171875,
|
|
"loss": 1.1416,
|
|
"margin_dpo/margin_mean": 202.26556396484375,
|
|
"margin_dpo/margin_std": 330.1795654296875,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5609397944199707,
|
|
"fcm_dpo/beta": 0.0017583861481398344,
|
|
"fcm_dpo/delta": -0.020793016999959946,
|
|
"fcm_dpo/margin": 238.81103515625,
|
|
"fcm_dpo/q_t": 0.40172097086906433,
|
|
"grad_norm": 27.541227340698242,
|
|
"learning_rate": 2.4230123536095745e-07,
|
|
"logits/chosen": -0.45488008856773376,
|
|
"logits/rejected": -0.46144935488700867,
|
|
"logps/chosen": -410.63665771484375,
|
|
"logps/ref_chosen": -66.02030181884766,
|
|
"logps/ref_rejected": -110.71016693115234,
|
|
"logps/rejected": -694.1375732421875,
|
|
"loss": 1.0582,
|
|
"margin_dpo/margin_mean": 238.81101989746094,
|
|
"margin_dpo/margin_std": 257.6006774902344,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5624082232011748,
|
|
"fcm_dpo/beta": 0.0017506459262222052,
|
|
"fcm_dpo/delta": -0.016186170279979706,
|
|
"fcm_dpo/margin": 237.33407592773438,
|
|
"fcm_dpo/q_t": 0.40410494804382324,
|
|
"grad_norm": 35.84563446044922,
|
|
"learning_rate": 2.4101862075518037e-07,
|
|
"logits/chosen": -0.4268774092197418,
|
|
"logits/rejected": -0.43681007623672485,
|
|
"logps/chosen": -397.21624755859375,
|
|
"logps/ref_chosen": -50.39148712158203,
|
|
"logps/ref_rejected": -93.71589660644531,
|
|
"logps/rejected": -677.874755859375,
|
|
"loss": 1.0905,
|
|
"margin_dpo/margin_mean": 237.33407592773438,
|
|
"margin_dpo/margin_std": 328.8858947753906,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5638766519823789,
|
|
"fcm_dpo/beta": 0.0017733362037688494,
|
|
"fcm_dpo/delta": 0.06153050810098648,
|
|
"fcm_dpo/margin": 191.76878356933594,
|
|
"fcm_dpo/q_t": 0.4192041754722595,
|
|
"grad_norm": 23.139604568481445,
|
|
"learning_rate": 2.397362428170992e-07,
|
|
"logits/chosen": -0.45157086849212646,
|
|
"logits/rejected": -0.4445483684539795,
|
|
"logps/chosen": -395.2562255859375,
|
|
"logps/ref_chosen": -52.046104431152344,
|
|
"logps/ref_rejected": -85.76089477539062,
|
|
"logps/rejected": -620.7398681640625,
|
|
"loss": 1.1117,
|
|
"margin_dpo/margin_mean": 191.76878356933594,
|
|
"margin_dpo/margin_std": 217.23977661132812,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.5653450807635829,
|
|
"fcm_dpo/beta": 0.0017724630888551474,
|
|
"fcm_dpo/delta": -0.0006713038310408592,
|
|
"fcm_dpo/margin": 226.03790283203125,
|
|
"fcm_dpo/q_t": 0.4047047793865204,
|
|
"grad_norm": 36.076045989990234,
|
|
"learning_rate": 2.3845413533856514e-07,
|
|
"logits/chosen": -0.4669855237007141,
|
|
"logits/rejected": -0.4436225891113281,
|
|
"logps/chosen": -375.51416015625,
|
|
"logps/ref_chosen": -65.55215454101562,
|
|
"logps/ref_rejected": -77.82792663574219,
|
|
"logps/rejected": -613.8278198242188,
|
|
"loss": 1.0627,
|
|
"margin_dpo/margin_mean": 226.03790283203125,
|
|
"margin_dpo/margin_std": 223.3173065185547,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.566813509544787,
|
|
"fcm_dpo/beta": 0.0017592008225619793,
|
|
"fcm_dpo/delta": -0.05203462019562721,
|
|
"fcm_dpo/margin": 255.63485717773438,
|
|
"fcm_dpo/q_t": 0.39665845036506653,
|
|
"grad_norm": 25.189849853515625,
|
|
"learning_rate": 2.3717233210430254e-07,
|
|
"logits/chosen": -0.4352476894855499,
|
|
"logits/rejected": -0.4300195872783661,
|
|
"logps/chosen": -392.69586181640625,
|
|
"logps/ref_chosen": -58.22185516357422,
|
|
"logps/ref_rejected": -92.32742309570312,
|
|
"logps/rejected": -682.436279296875,
|
|
"loss": 1.0532,
|
|
"margin_dpo/margin_mean": 255.63485717773438,
|
|
"margin_dpo/margin_std": 307.45904541015625,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5682819383259912,
|
|
"fcm_dpo/beta": 0.0017553928773850203,
|
|
"fcm_dpo/delta": 0.03558676689863205,
|
|
"fcm_dpo/margin": 208.20899963378906,
|
|
"fcm_dpo/q_t": 0.4137038588523865,
|
|
"grad_norm": 32.04467010498047,
|
|
"learning_rate": 2.3589086689101889e-07,
|
|
"logits/chosen": -0.5002990961074829,
|
|
"logits/rejected": -0.48599332571029663,
|
|
"logps/chosen": -418.4845275878906,
|
|
"logps/ref_chosen": -66.41944885253906,
|
|
"logps/ref_rejected": -92.16915893554688,
|
|
"logps/rejected": -652.4432373046875,
|
|
"loss": 1.096,
|
|
"margin_dpo/margin_mean": 208.208984375,
|
|
"margin_dpo/margin_std": 233.26101684570312,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5697503671071953,
|
|
"fcm_dpo/beta": 0.001736361999064684,
|
|
"fcm_dpo/delta": -0.09935353696346283,
|
|
"fcm_dpo/margin": 284.57513427734375,
|
|
"fcm_dpo/q_t": 0.38717541098594666,
|
|
"grad_norm": 25.433643341064453,
|
|
"learning_rate": 2.3460977346651428e-07,
|
|
"logits/chosen": -0.43736058473587036,
|
|
"logits/rejected": -0.44726645946502686,
|
|
"logps/chosen": -382.73590087890625,
|
|
"logps/ref_chosen": -50.129459381103516,
|
|
"logps/ref_rejected": -104.43305969238281,
|
|
"logps/rejected": -721.6146240234375,
|
|
"loss": 1.0199,
|
|
"margin_dpo/margin_mean": 284.57513427734375,
|
|
"margin_dpo/margin_std": 316.06414794921875,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5712187958883994,
|
|
"fcm_dpo/beta": 0.0017261260654777288,
|
|
"fcm_dpo/delta": -0.010240463539958,
|
|
"fcm_dpo/margin": 237.38491821289062,
|
|
"fcm_dpo/q_t": 0.4057249426841736,
|
|
"grad_norm": 29.678983688354492,
|
|
"learning_rate": 2.3332908558879177e-07,
|
|
"logits/chosen": -0.4718496799468994,
|
|
"logits/rejected": -0.46279120445251465,
|
|
"logps/chosen": -426.9161071777344,
|
|
"logps/ref_chosen": -57.906593322753906,
|
|
"logps/ref_rejected": -77.91454315185547,
|
|
"logps/rejected": -684.3089599609375,
|
|
"loss": 1.0856,
|
|
"margin_dpo/margin_mean": 237.3849334716797,
|
|
"margin_dpo/margin_std": 311.5970458984375,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5726872246696035,
|
|
"fcm_dpo/beta": 0.0017196969129145145,
|
|
"fcm_dpo/delta": -0.005931627005338669,
|
|
"fcm_dpo/margin": 235.7211151123047,
|
|
"fcm_dpo/q_t": 0.4099145531654358,
|
|
"grad_norm": 25.528303146362305,
|
|
"learning_rate": 2.320488370051681e-07,
|
|
"logits/chosen": -0.4342801570892334,
|
|
"logits/rejected": -0.4257649779319763,
|
|
"logps/chosen": -403.05682373046875,
|
|
"logps/ref_chosen": -49.22591781616211,
|
|
"logps/ref_rejected": -85.5281982421875,
|
|
"logps/rejected": -675.0802001953125,
|
|
"loss": 1.1052,
|
|
"margin_dpo/margin_mean": 235.72113037109375,
|
|
"margin_dpo/margin_std": 351.66534423828125,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5741556534508077,
|
|
"fcm_dpo/beta": 0.0017695992719382048,
|
|
"fcm_dpo/delta": 0.1603117436170578,
|
|
"fcm_dpo/margin": 137.55014038085938,
|
|
"fcm_dpo/q_t": 0.44382068514823914,
|
|
"grad_norm": 41.52486801147461,
|
|
"learning_rate": 2.3076906145138405e-07,
|
|
"logits/chosen": -0.46436670422554016,
|
|
"logits/rejected": -0.4556189179420471,
|
|
"logps/chosen": -420.87359619140625,
|
|
"logps/ref_chosen": -64.32965087890625,
|
|
"logps/ref_rejected": -86.73820495605469,
|
|
"logps/rejected": -580.832275390625,
|
|
"loss": 1.2149,
|
|
"margin_dpo/margin_mean": 137.55014038085938,
|
|
"margin_dpo/margin_std": 271.1540222167969,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5756240822320118,
|
|
"fcm_dpo/beta": 0.0017650609370321035,
|
|
"fcm_dpo/delta": -0.08989348262548447,
|
|
"fcm_dpo/margin": 275.10516357421875,
|
|
"fcm_dpo/q_t": 0.3868984878063202,
|
|
"grad_norm": 31.219282150268555,
|
|
"learning_rate": 2.294897926507156e-07,
|
|
"logits/chosen": -0.42887139320373535,
|
|
"logits/rejected": -0.4229578375816345,
|
|
"logps/chosen": -352.71368408203125,
|
|
"logps/ref_chosen": -53.50397872924805,
|
|
"logps/ref_rejected": -102.34584045410156,
|
|
"logps/rejected": -676.6607055664062,
|
|
"loss": 1.0083,
|
|
"margin_dpo/margin_mean": 275.10516357421875,
|
|
"margin_dpo/margin_std": 260.21282958984375,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5770925110132159,
|
|
"fcm_dpo/beta": 0.0017527798190712929,
|
|
"fcm_dpo/delta": 0.019200202077627182,
|
|
"fcm_dpo/margin": 217.66339111328125,
|
|
"fcm_dpo/q_t": 0.41545170545578003,
|
|
"grad_norm": 25.45195198059082,
|
|
"learning_rate": 2.2821106431308543e-07,
|
|
"logits/chosen": -0.4183732867240906,
|
|
"logits/rejected": -0.41586729884147644,
|
|
"logps/chosen": -345.04229736328125,
|
|
"logps/ref_chosen": -46.473915100097656,
|
|
"logps/ref_rejected": -71.96885681152344,
|
|
"logps/rejected": -588.20068359375,
|
|
"loss": 1.1209,
|
|
"margin_dpo/margin_mean": 217.66339111328125,
|
|
"margin_dpo/margin_std": 343.6617431640625,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.57856093979442,
|
|
"fcm_dpo/beta": 0.0017561479471623898,
|
|
"fcm_dpo/delta": 0.005379532463848591,
|
|
"fcm_dpo/margin": 224.81240844726562,
|
|
"fcm_dpo/q_t": 0.40968430042266846,
|
|
"grad_norm": 26.87594223022461,
|
|
"learning_rate": 2.2693291013417452e-07,
|
|
"logits/chosen": -0.416803240776062,
|
|
"logits/rejected": -0.41659796237945557,
|
|
"logps/chosen": -388.80706787109375,
|
|
"logps/ref_chosen": -52.91154861450195,
|
|
"logps/ref_rejected": -90.8226318359375,
|
|
"logps/rejected": -651.530517578125,
|
|
"loss": 1.0913,
|
|
"margin_dpo/margin_mean": 224.81239318847656,
|
|
"margin_dpo/margin_std": 293.50146484375,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.580029368575624,
|
|
"fcm_dpo/beta": 0.0017417933559045196,
|
|
"fcm_dpo/delta": -0.041999928653240204,
|
|
"fcm_dpo/margin": 252.47410583496094,
|
|
"fcm_dpo/q_t": 0.3995997905731201,
|
|
"grad_norm": 25.122419357299805,
|
|
"learning_rate": 2.2565536379453404e-07,
|
|
"logits/chosen": -0.47765952348709106,
|
|
"logits/rejected": -0.473066508769989,
|
|
"logps/chosen": -398.39801025390625,
|
|
"logps/ref_chosen": -62.546112060546875,
|
|
"logps/ref_rejected": -83.78262329101562,
|
|
"logps/rejected": -672.108642578125,
|
|
"loss": 1.0658,
|
|
"margin_dpo/margin_mean": 252.47409057617188,
|
|
"margin_dpo/margin_std": 318.75537109375,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5814977973568282,
|
|
"fcm_dpo/beta": 0.001741830026730895,
|
|
"fcm_dpo/delta": 0.00434575229883194,
|
|
"fcm_dpo/margin": 227.20425415039062,
|
|
"fcm_dpo/q_t": 0.40757039189338684,
|
|
"grad_norm": 29.88756561279297,
|
|
"learning_rate": 2.2437845895869825e-07,
|
|
"logits/chosen": -0.4663088619709015,
|
|
"logits/rejected": -0.4470548629760742,
|
|
"logps/chosen": -417.51031494140625,
|
|
"logps/ref_chosen": -68.99594116210938,
|
|
"logps/ref_rejected": -88.64665985107422,
|
|
"logps/rejected": -664.365234375,
|
|
"loss": 1.0774,
|
|
"margin_dpo/margin_mean": 227.20425415039062,
|
|
"margin_dpo/margin_std": 261.07513427734375,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.5829662261380323,
|
|
"fcm_dpo/beta": 0.001714893733151257,
|
|
"fcm_dpo/delta": -0.09588249027729034,
|
|
"fcm_dpo/margin": 286.07928466796875,
|
|
"fcm_dpo/q_t": 0.386309951543808,
|
|
"grad_norm": 33.899620056152344,
|
|
"learning_rate": 2.2310222927429716e-07,
|
|
"logits/chosen": -0.412584125995636,
|
|
"logits/rejected": -0.41607069969177246,
|
|
"logps/chosen": -393.6998291015625,
|
|
"logps/ref_chosen": -61.27716827392578,
|
|
"logps/ref_rejected": -103.11612701416016,
|
|
"logps/rejected": -721.6180419921875,
|
|
"loss": 1.0151,
|
|
"margin_dpo/margin_mean": 286.07928466796875,
|
|
"margin_dpo/margin_std": 297.44598388671875,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.5844346549192364,
|
|
"fcm_dpo/beta": 0.0017011422896757722,
|
|
"fcm_dpo/delta": -0.04757946729660034,
|
|
"fcm_dpo/margin": 261.866943359375,
|
|
"fcm_dpo/q_t": 0.3992553949356079,
|
|
"grad_norm": 21.9180965423584,
|
|
"learning_rate": 2.2182670837116972e-07,
|
|
"logits/chosen": -0.49817049503326416,
|
|
"logits/rejected": -0.49308010935783386,
|
|
"logps/chosen": -433.2986145019531,
|
|
"logps/ref_chosen": -68.15155029296875,
|
|
"logps/ref_rejected": -108.52360534667969,
|
|
"logps/rejected": -735.53759765625,
|
|
"loss": 1.0678,
|
|
"margin_dpo/margin_mean": 261.866943359375,
|
|
"margin_dpo/margin_std": 345.6563720703125,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.5859030837004405,
|
|
"fcm_dpo/beta": 0.0016931265126913786,
|
|
"fcm_dpo/delta": 0.007417585700750351,
|
|
"fcm_dpo/margin": 231.94464111328125,
|
|
"fcm_dpo/q_t": 0.4104883074760437,
|
|
"grad_norm": 35.04402160644531,
|
|
"learning_rate": 2.2055192986047804e-07,
|
|
"logits/chosen": -0.44559311866760254,
|
|
"logits/rejected": -0.4083556532859802,
|
|
"logps/chosen": -388.63470458984375,
|
|
"logps/ref_chosen": -60.889801025390625,
|
|
"logps/ref_rejected": -77.965576171875,
|
|
"logps/rejected": -637.6551513671875,
|
|
"loss": 1.1115,
|
|
"margin_dpo/margin_mean": 231.9446563720703,
|
|
"margin_dpo/margin_std": 344.48883056640625,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"fcm_dpo/beta": 0.0016531790606677532,
|
|
"fcm_dpo/delta": -0.16823890805244446,
|
|
"fcm_dpo/margin": 337.75396728515625,
|
|
"fcm_dpo/q_t": 0.36917316913604736,
|
|
"grad_norm": 28.63687515258789,
|
|
"learning_rate": 2.192779273338215e-07,
|
|
"logits/chosen": -0.4327552914619446,
|
|
"logits/rejected": -0.4296589195728302,
|
|
"logps/chosen": -371.40936279296875,
|
|
"logps/ref_chosen": -63.64359664916992,
|
|
"logps/ref_rejected": -105.252685546875,
|
|
"logps/rejected": -750.7724609375,
|
|
"loss": 0.9641,
|
|
"margin_dpo/margin_mean": 337.75396728515625,
|
|
"margin_dpo/margin_std": 308.93011474609375,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"eval_fcm_dpo/beta": 0.0016427375376224518,
|
|
"eval_logits/chosen": -0.5096563100814819,
|
|
"eval_logits/rejected": -0.49900755286216736,
|
|
"eval_logps/chosen": -501.3864440917969,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -661.2091064453125,
|
|
"eval_loss": 0.6132233738899231,
|
|
"eval_margin_dpo/margin_mean": 152.07579040527344,
|
|
"eval_margin_dpo/margin_std": 334.0150146484375,
|
|
"eval_runtime": 39.2529,
|
|
"eval_samples_per_second": 59.588,
|
|
"eval_steps_per_second": 1.885,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5888399412628488,
|
|
"fcm_dpo/beta": 0.001660442678257823,
|
|
"fcm_dpo/delta": 0.08897262066602707,
|
|
"fcm_dpo/margin": 189.05294799804688,
|
|
"fcm_dpo/q_t": 0.43077051639556885,
|
|
"grad_norm": 31.62966537475586,
|
|
"learning_rate": 2.1800473436235136e-07,
|
|
"logits/chosen": -0.46482232213020325,
|
|
"logits/rejected": -0.4563053250312805,
|
|
"logps/chosen": -419.8261413574219,
|
|
"logps/ref_chosen": -57.16303253173828,
|
|
"logps/ref_rejected": -83.79249572753906,
|
|
"logps/rejected": -635.508544921875,
|
|
"loss": 1.1971,
|
|
"margin_dpo/margin_mean": 189.05294799804688,
|
|
"margin_dpo/margin_std": 394.0697937011719,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.5903083700440529,
|
|
"fcm_dpo/beta": 0.0016284910961985588,
|
|
"fcm_dpo/delta": -0.17874157428741455,
|
|
"fcm_dpo/margin": 349.1449279785156,
|
|
"fcm_dpo/q_t": 0.36785176396369934,
|
|
"grad_norm": 21.58502197265625,
|
|
"learning_rate": 2.1673238449588665e-07,
|
|
"logits/chosen": -0.4704708456993103,
|
|
"logits/rejected": -0.4619377553462982,
|
|
"logps/chosen": -316.80438232421875,
|
|
"logps/ref_chosen": -50.74037170410156,
|
|
"logps/ref_rejected": -81.0460433959961,
|
|
"logps/rejected": -696.2550048828125,
|
|
"loss": 0.9548,
|
|
"margin_dpo/margin_mean": 349.1449279785156,
|
|
"margin_dpo/margin_std": 313.5357971191406,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.591776798825257,
|
|
"fcm_dpo/beta": 0.0016079884953796864,
|
|
"fcm_dpo/delta": -0.022973710671067238,
|
|
"fcm_dpo/margin": 262.4422607421875,
|
|
"fcm_dpo/q_t": 0.40218695998191833,
|
|
"grad_norm": 27.24091339111328,
|
|
"learning_rate": 2.154609112620295e-07,
|
|
"logits/chosen": -0.4586551785469055,
|
|
"logits/rejected": -0.4596661627292633,
|
|
"logps/chosen": -333.87322998046875,
|
|
"logps/ref_chosen": -47.14731216430664,
|
|
"logps/ref_rejected": -77.2666015625,
|
|
"logps/rejected": -626.4347534179688,
|
|
"loss": 1.0646,
|
|
"margin_dpo/margin_mean": 262.4422302246094,
|
|
"margin_dpo/margin_std": 302.470947265625,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.593245227606461,
|
|
"fcm_dpo/beta": 0.0016004211502149701,
|
|
"fcm_dpo/delta": -0.010493889451026917,
|
|
"fcm_dpo/margin": 256.16156005859375,
|
|
"fcm_dpo/q_t": 0.40553057193756104,
|
|
"grad_norm": 30.971912384033203,
|
|
"learning_rate": 2.1419034816528218e-07,
|
|
"logits/chosen": -0.44930434226989746,
|
|
"logits/rejected": -0.44178757071495056,
|
|
"logps/chosen": -366.42584228515625,
|
|
"logps/ref_chosen": -47.875274658203125,
|
|
"logps/ref_rejected": -77.15499877929688,
|
|
"logps/rejected": -651.8671264648438,
|
|
"loss": 1.0906,
|
|
"margin_dpo/margin_mean": 256.16156005859375,
|
|
"margin_dpo/margin_std": 345.9460144042969,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.5947136563876652,
|
|
"fcm_dpo/beta": 0.0016291348729282618,
|
|
"fcm_dpo/delta": 0.05789618194103241,
|
|
"fcm_dpo/margin": 210.23138427734375,
|
|
"fcm_dpo/q_t": 0.42367473244667053,
|
|
"grad_norm": 32.061859130859375,
|
|
"learning_rate": 2.129207286861638e-07,
|
|
"logits/chosen": -0.4601576626300812,
|
|
"logits/rejected": -0.4487999379634857,
|
|
"logps/chosen": -422.7500305175781,
|
|
"logps/ref_chosen": -65.16290283203125,
|
|
"logps/ref_rejected": -87.18678283691406,
|
|
"logps/rejected": -655.0052490234375,
|
|
"loss": 1.1669,
|
|
"margin_dpo/margin_mean": 210.23141479492188,
|
|
"margin_dpo/margin_std": 378.6610107421875,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.5961820851688693,
|
|
"fcm_dpo/beta": 0.0016158397775143385,
|
|
"fcm_dpo/delta": -0.05560196936130524,
|
|
"fcm_dpo/margin": 280.2799987792969,
|
|
"fcm_dpo/q_t": 0.39691057801246643,
|
|
"grad_norm": 28.331180572509766,
|
|
"learning_rate": 2.1165208628032861e-07,
|
|
"logits/chosen": -0.4773136377334595,
|
|
"logits/rejected": -0.4842330813407898,
|
|
"logps/chosen": -360.078857421875,
|
|
"logps/ref_chosen": -49.740814208984375,
|
|
"logps/ref_rejected": -92.07862854003906,
|
|
"logps/rejected": -682.6966552734375,
|
|
"loss": 1.0544,
|
|
"margin_dpo/margin_mean": 280.2799987792969,
|
|
"margin_dpo/margin_std": 338.8279113769531,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.5976505139500734,
|
|
"fcm_dpo/beta": 0.001641381997615099,
|
|
"fcm_dpo/delta": 0.11392003297805786,
|
|
"fcm_dpo/margin": 175.37591552734375,
|
|
"fcm_dpo/q_t": 0.43280327320098877,
|
|
"grad_norm": 32.0561408996582,
|
|
"learning_rate": 2.1038445437768375e-07,
|
|
"logits/chosen": -0.5068432092666626,
|
|
"logits/rejected": -0.4771941900253296,
|
|
"logps/chosen": -397.24237060546875,
|
|
"logps/ref_chosen": -56.33069610595703,
|
|
"logps/ref_rejected": -77.51209259033203,
|
|
"logps/rejected": -593.7996826171875,
|
|
"loss": 1.1901,
|
|
"margin_dpo/margin_mean": 175.37591552734375,
|
|
"margin_dpo/margin_std": 323.81439208984375,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.5991189427312775,
|
|
"fcm_dpo/beta": 0.001660007401369512,
|
|
"fcm_dpo/delta": 0.07628411054611206,
|
|
"fcm_dpo/margin": 196.483642578125,
|
|
"fcm_dpo/q_t": 0.42345568537712097,
|
|
"grad_norm": 31.536413192749023,
|
|
"learning_rate": 2.0911786638150872e-07,
|
|
"logits/chosen": -0.48006772994995117,
|
|
"logits/rejected": -0.4519917368888855,
|
|
"logps/chosen": -402.90374755859375,
|
|
"logps/ref_chosen": -69.789306640625,
|
|
"logps/ref_rejected": -90.09693908691406,
|
|
"logps/rejected": -619.6949462890625,
|
|
"loss": 1.1308,
|
|
"margin_dpo/margin_mean": 196.48365783691406,
|
|
"margin_dpo/margin_std": 258.8351745605469,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.6005873715124816,
|
|
"fcm_dpo/beta": 0.001686369301751256,
|
|
"fcm_dpo/delta": 0.06914930045604706,
|
|
"fcm_dpo/margin": 197.4263458251953,
|
|
"fcm_dpo/q_t": 0.4226710796356201,
|
|
"grad_norm": 30.05813980102539,
|
|
"learning_rate": 2.0785235566757517e-07,
|
|
"logits/chosen": -0.5069749355316162,
|
|
"logits/rejected": -0.48954230546951294,
|
|
"logps/chosen": -396.84429931640625,
|
|
"logps/ref_chosen": -67.31744384765625,
|
|
"logps/ref_rejected": -84.904296875,
|
|
"logps/rejected": -611.8575439453125,
|
|
"loss": 1.1345,
|
|
"margin_dpo/margin_mean": 197.42636108398438,
|
|
"margin_dpo/margin_std": 280.6907653808594,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6020558002936858,
|
|
"fcm_dpo/beta": 0.001684611663222313,
|
|
"fcm_dpo/delta": -0.007662855088710785,
|
|
"fcm_dpo/margin": 241.77908325195312,
|
|
"fcm_dpo/q_t": 0.4040978252887726,
|
|
"grad_norm": 28.958459854125977,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": -0.5035703778266907,
|
|
"logits/rejected": -0.50343918800354,
|
|
"logps/chosen": -351.52081298828125,
|
|
"logps/ref_chosen": -51.465354919433594,
|
|
"logps/ref_rejected": -83.198974609375,
|
|
"logps/rejected": -625.0335693359375,
|
|
"loss": 1.0731,
|
|
"margin_dpo/margin_mean": 241.77908325195312,
|
|
"margin_dpo/margin_std": 281.2712707519531,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6035242290748899,
|
|
"fcm_dpo/beta": 0.001677290303632617,
|
|
"fcm_dpo/delta": 0.01198473572731018,
|
|
"fcm_dpo/margin": 231.06283569335938,
|
|
"fcm_dpo/q_t": 0.41211268305778503,
|
|
"grad_norm": 26.263025283813477,
|
|
"learning_rate": 2.0532469944670343e-07,
|
|
"logits/chosen": -0.5023758411407471,
|
|
"logits/rejected": -0.5086607933044434,
|
|
"logps/chosen": -383.20245361328125,
|
|
"logps/ref_chosen": -52.30727005004883,
|
|
"logps/ref_rejected": -80.69495391845703,
|
|
"logps/rejected": -642.6529541015625,
|
|
"loss": 1.1057,
|
|
"margin_dpo/margin_mean": 231.06283569335938,
|
|
"margin_dpo/margin_std": 318.57080078125,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.604992657856094,
|
|
"fcm_dpo/beta": 0.0016901884227991104,
|
|
"fcm_dpo/delta": -0.0035414875019341707,
|
|
"fcm_dpo/margin": 238.66928100585938,
|
|
"fcm_dpo/q_t": 0.40699994564056396,
|
|
"grad_norm": 39.12421798706055,
|
|
"learning_rate": 2.0406262054585738e-07,
|
|
"logits/chosen": -0.5658432245254517,
|
|
"logits/rejected": -0.5902704000473022,
|
|
"logps/chosen": -392.9666748046875,
|
|
"logps/ref_chosen": -53.144126892089844,
|
|
"logps/ref_rejected": -100.0608139038086,
|
|
"logps/rejected": -678.5526123046875,
|
|
"loss": 1.0903,
|
|
"margin_dpo/margin_mean": 238.66928100585938,
|
|
"margin_dpo/margin_std": 317.6526794433594,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6064610866372981,
|
|
"fcm_dpo/beta": 0.0016896736342459917,
|
|
"fcm_dpo/delta": -0.01300879381597042,
|
|
"fcm_dpo/margin": 244.07077026367188,
|
|
"fcm_dpo/q_t": 0.40322571992874146,
|
|
"grad_norm": 35.633907318115234,
|
|
"learning_rate": 2.0280175213768205e-07,
|
|
"logits/chosen": -0.5198254585266113,
|
|
"logits/rejected": -0.5212410688400269,
|
|
"logps/chosen": -435.4099426269531,
|
|
"logps/ref_chosen": -61.58196258544922,
|
|
"logps/ref_rejected": -99.47340393066406,
|
|
"logps/rejected": -717.3721923828125,
|
|
"loss": 1.0867,
|
|
"margin_dpo/margin_mean": 244.07077026367188,
|
|
"margin_dpo/margin_std": 323.739990234375,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6079295154185022,
|
|
"fcm_dpo/beta": 0.0016835236456245184,
|
|
"fcm_dpo/delta": -0.05097716301679611,
|
|
"fcm_dpo/margin": 266.1756591796875,
|
|
"fcm_dpo/q_t": 0.3957219123840332,
|
|
"grad_norm": 31.402956008911133,
|
|
"learning_rate": 2.0154212744723247e-07,
|
|
"logits/chosen": -0.4976515769958496,
|
|
"logits/rejected": -0.4892701506614685,
|
|
"logps/chosen": -396.79071044921875,
|
|
"logps/ref_chosen": -46.63148498535156,
|
|
"logps/ref_rejected": -87.64653015136719,
|
|
"logps/rejected": -703.9813842773438,
|
|
"loss": 1.0572,
|
|
"margin_dpo/margin_mean": 266.1756286621094,
|
|
"margin_dpo/margin_std": 315.53411865234375,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6093979441997063,
|
|
"fcm_dpo/beta": 0.0016782158054411411,
|
|
"fcm_dpo/delta": 0.06835382431745529,
|
|
"fcm_dpo/margin": 198.95372009277344,
|
|
"fcm_dpo/q_t": 0.42239439487457275,
|
|
"grad_norm": 32.070098876953125,
|
|
"learning_rate": 2.002837796667909e-07,
|
|
"logits/chosen": -0.5548099279403687,
|
|
"logits/rejected": -0.5496998429298401,
|
|
"logps/chosen": -458.45635986328125,
|
|
"logps/ref_chosen": -78.6182861328125,
|
|
"logps/ref_rejected": -100.47752380371094,
|
|
"logps/rejected": -679.269287109375,
|
|
"loss": 1.1424,
|
|
"margin_dpo/margin_mean": 198.95372009277344,
|
|
"margin_dpo/margin_std": 302.24237060546875,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6108663729809104,
|
|
"fcm_dpo/beta": 0.001665110932663083,
|
|
"fcm_dpo/delta": -0.119843028485775,
|
|
"fcm_dpo/margin": 308.59735107421875,
|
|
"fcm_dpo/q_t": 0.3798748552799225,
|
|
"grad_norm": 42.64773941040039,
|
|
"learning_rate": 1.990267419549914e-07,
|
|
"logits/chosen": -0.5385543704032898,
|
|
"logits/rejected": -0.5424953699111938,
|
|
"logps/chosen": -396.99420166015625,
|
|
"logps/ref_chosen": -58.27912521362305,
|
|
"logps/ref_rejected": -90.56871795654297,
|
|
"logps/rejected": -737.881103515625,
|
|
"loss": 0.9898,
|
|
"margin_dpo/margin_mean": 308.59735107421875,
|
|
"margin_dpo/margin_std": 283.72137451171875,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6123348017621145,
|
|
"fcm_dpo/beta": 0.0016435376601293683,
|
|
"fcm_dpo/delta": -0.04060738533735275,
|
|
"fcm_dpo/margin": 267.00482177734375,
|
|
"fcm_dpo/q_t": 0.3967912495136261,
|
|
"grad_norm": 30.187908172607422,
|
|
"learning_rate": 1.9777104743594686e-07,
|
|
"logits/chosen": -0.5202246308326721,
|
|
"logits/rejected": -0.4980872869491577,
|
|
"logps/chosen": -385.2103271484375,
|
|
"logps/ref_chosen": -50.1987190246582,
|
|
"logps/ref_rejected": -68.15184020996094,
|
|
"logps/rejected": -670.1682739257812,
|
|
"loss": 1.0417,
|
|
"margin_dpo/margin_mean": 267.00482177734375,
|
|
"margin_dpo/margin_std": 270.859375,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6138032305433186,
|
|
"fcm_dpo/beta": 0.0016406815266236663,
|
|
"fcm_dpo/delta": -0.04235214740037918,
|
|
"fcm_dpo/margin": 267.85089111328125,
|
|
"fcm_dpo/q_t": 0.40181127190589905,
|
|
"grad_norm": 28.84935188293457,
|
|
"learning_rate": 1.965167291983757e-07,
|
|
"logits/chosen": -0.6032763719558716,
|
|
"logits/rejected": -0.5830473899841309,
|
|
"logps/chosen": -446.807373046875,
|
|
"logps/ref_chosen": -81.97846984863281,
|
|
"logps/ref_rejected": -104.69148254394531,
|
|
"logps/rejected": -737.3712768554688,
|
|
"loss": 1.0814,
|
|
"margin_dpo/margin_mean": 267.85089111328125,
|
|
"margin_dpo/margin_std": 362.703125,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6152716593245228,
|
|
"fcm_dpo/beta": 0.0016053111758083105,
|
|
"fcm_dpo/delta": -0.07667610794305801,
|
|
"fcm_dpo/margin": 294.6860656738281,
|
|
"fcm_dpo/q_t": 0.3896028399467468,
|
|
"grad_norm": 32.947139739990234,
|
|
"learning_rate": 1.9526382029472988e-07,
|
|
"logits/chosen": -0.5336101055145264,
|
|
"logits/rejected": -0.5327674150466919,
|
|
"logps/chosen": -387.23291015625,
|
|
"logps/ref_chosen": -52.948646545410156,
|
|
"logps/ref_rejected": -91.58309936523438,
|
|
"logps/rejected": -720.553466796875,
|
|
"loss": 1.0288,
|
|
"margin_dpo/margin_mean": 294.6860656738281,
|
|
"margin_dpo/margin_std": 316.33544921875,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6167400881057269,
|
|
"fcm_dpo/beta": 0.0016269120387732983,
|
|
"fcm_dpo/delta": 0.11196567863225937,
|
|
"fcm_dpo/margin": 178.9881591796875,
|
|
"fcm_dpo/q_t": 0.43498021364212036,
|
|
"grad_norm": 68.43128967285156,
|
|
"learning_rate": 1.9401235374032425e-07,
|
|
"logits/chosen": -0.544563353061676,
|
|
"logits/rejected": -0.5079815983772278,
|
|
"logps/chosen": -504.28094482421875,
|
|
"logps/ref_chosen": -77.7699203491211,
|
|
"logps/ref_rejected": -69.31985473632812,
|
|
"logps/rejected": -674.819091796875,
|
|
"loss": 1.2042,
|
|
"margin_dpo/margin_mean": 178.9881591796875,
|
|
"margin_dpo/margin_std": 377.63885498046875,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.618208516886931,
|
|
"fcm_dpo/beta": 0.0016551846638321877,
|
|
"fcm_dpo/delta": 0.057633526623249054,
|
|
"fcm_dpo/margin": 207.74978637695312,
|
|
"fcm_dpo/q_t": 0.4185563027858734,
|
|
"grad_norm": 26.919042587280273,
|
|
"learning_rate": 1.9276236251246653e-07,
|
|
"logits/chosen": -0.5525974035263062,
|
|
"logits/rejected": -0.5384413599967957,
|
|
"logps/chosen": -398.10992431640625,
|
|
"logps/ref_chosen": -53.765865325927734,
|
|
"logps/ref_rejected": -89.28144836425781,
|
|
"logps/rejected": -641.3753051757812,
|
|
"loss": 1.1259,
|
|
"margin_dpo/margin_mean": 207.74978637695312,
|
|
"margin_dpo/margin_std": 281.2756042480469,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6196769456681351,
|
|
"fcm_dpo/beta": 0.0016573498724028468,
|
|
"fcm_dpo/delta": 0.01915598101913929,
|
|
"fcm_dpo/margin": 230.2337646484375,
|
|
"fcm_dpo/q_t": 0.41092464327812195,
|
|
"grad_norm": 43.74554443359375,
|
|
"learning_rate": 1.9151387954958792e-07,
|
|
"logits/chosen": -0.5684947371482849,
|
|
"logits/rejected": -0.5682277083396912,
|
|
"logps/chosen": -455.68402099609375,
|
|
"logps/ref_chosen": -68.6337661743164,
|
|
"logps/ref_rejected": -87.86351013183594,
|
|
"logps/rejected": -705.1475830078125,
|
|
"loss": 1.1039,
|
|
"margin_dpo/margin_mean": 230.2337646484375,
|
|
"margin_dpo/margin_std": 312.58026123046875,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6211453744493393,
|
|
"fcm_dpo/beta": 0.0016523964004591107,
|
|
"fcm_dpo/delta": -0.024727419018745422,
|
|
"fcm_dpo/margin": 256.37628173828125,
|
|
"fcm_dpo/q_t": 0.4017714262008667,
|
|
"grad_norm": 34.65106201171875,
|
|
"learning_rate": 1.902669377503756e-07,
|
|
"logits/chosen": -0.5626628398895264,
|
|
"logits/rejected": -0.5683019757270813,
|
|
"logps/chosen": -435.6455993652344,
|
|
"logps/ref_chosen": -54.99030303955078,
|
|
"logps/ref_rejected": -86.30654907226562,
|
|
"logps/rejected": -723.338134765625,
|
|
"loss": 1.0655,
|
|
"margin_dpo/margin_mean": 256.3762512207031,
|
|
"margin_dpo/margin_std": 303.867431640625,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6226138032305433,
|
|
"fcm_dpo/beta": 0.0016435494180768728,
|
|
"fcm_dpo/delta": -0.0081382617354393,
|
|
"fcm_dpo/margin": 247.92364501953125,
|
|
"fcm_dpo/q_t": 0.4074801802635193,
|
|
"grad_norm": 43.08205032348633,
|
|
"learning_rate": 1.890215699729057e-07,
|
|
"logits/chosen": -0.5762988328933716,
|
|
"logits/rejected": -0.5552129745483398,
|
|
"logps/chosen": -404.3183898925781,
|
|
"logps/ref_chosen": -56.01192092895508,
|
|
"logps/ref_rejected": -66.47896575927734,
|
|
"logps/rejected": -662.7091064453125,
|
|
"loss": 1.0888,
|
|
"margin_dpo/margin_mean": 247.92367553710938,
|
|
"margin_dpo/margin_std": 331.8496398925781,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6240822320117474,
|
|
"fcm_dpo/beta": 0.001672400627285242,
|
|
"fcm_dpo/delta": 0.07351066917181015,
|
|
"fcm_dpo/margin": 196.439697265625,
|
|
"fcm_dpo/q_t": 0.42280128598213196,
|
|
"grad_norm": 42.44888687133789,
|
|
"learning_rate": 1.8777780903377732e-07,
|
|
"logits/chosen": -0.5834954977035522,
|
|
"logits/rejected": -0.5841509103775024,
|
|
"logps/chosen": -441.07666015625,
|
|
"logps/ref_chosen": -46.86899948120117,
|
|
"logps/ref_rejected": -95.92545318603516,
|
|
"logps/rejected": -686.5728149414062,
|
|
"loss": 1.1481,
|
|
"margin_dpo/margin_mean": 196.439697265625,
|
|
"margin_dpo/margin_std": 305.77667236328125,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6255506607929515,
|
|
"fcm_dpo/beta": 0.0016779915895313025,
|
|
"fcm_dpo/delta": -0.012628093361854553,
|
|
"fcm_dpo/margin": 245.40122985839844,
|
|
"fcm_dpo/q_t": 0.4041150212287903,
|
|
"grad_norm": 32.92577362060547,
|
|
"learning_rate": 1.8653568770724803e-07,
|
|
"logits/chosen": -0.5419399738311768,
|
|
"logits/rejected": -0.5101866722106934,
|
|
"logps/chosen": -436.7662353515625,
|
|
"logps/ref_chosen": -76.58354187011719,
|
|
"logps/ref_rejected": -81.26658630371094,
|
|
"logps/rejected": -686.8504638671875,
|
|
"loss": 1.0761,
|
|
"margin_dpo/margin_mean": 245.4012451171875,
|
|
"margin_dpo/margin_std": 292.03070068359375,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6270190895741556,
|
|
"fcm_dpo/beta": 0.0016908218385651708,
|
|
"fcm_dpo/delta": 0.09585842490196228,
|
|
"fcm_dpo/margin": 181.6783447265625,
|
|
"fcm_dpo/q_t": 0.4288369417190552,
|
|
"grad_norm": 34.73727798461914,
|
|
"learning_rate": 1.8529523872436977e-07,
|
|
"logits/chosen": -0.5694754123687744,
|
|
"logits/rejected": -0.5520645380020142,
|
|
"logps/chosen": -403.98638916015625,
|
|
"logps/ref_chosen": -64.8538818359375,
|
|
"logps/ref_rejected": -78.5660171508789,
|
|
"logps/rejected": -599.3768310546875,
|
|
"loss": 1.1532,
|
|
"margin_dpo/margin_mean": 181.6783447265625,
|
|
"margin_dpo/margin_std": 272.23016357421875,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6284875183553598,
|
|
"fcm_dpo/beta": 0.0016909840051084757,
|
|
"fcm_dpo/delta": -0.015947699546813965,
|
|
"fcm_dpo/margin": 245.3643341064453,
|
|
"fcm_dpo/q_t": 0.40564045310020447,
|
|
"grad_norm": 36.74341583251953,
|
|
"learning_rate": 1.8405649477212697e-07,
|
|
"logits/chosen": -0.5723360776901245,
|
|
"logits/rejected": -0.5774627923965454,
|
|
"logps/chosen": -486.622802734375,
|
|
"logps/ref_chosen": -62.63666534423828,
|
|
"logps/ref_rejected": -103.28181457519531,
|
|
"logps/rejected": -772.63232421875,
|
|
"loss": 1.1107,
|
|
"margin_dpo/margin_mean": 245.36434936523438,
|
|
"margin_dpo/margin_std": 380.26068115234375,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6299559471365639,
|
|
"fcm_dpo/beta": 0.0017246659845113754,
|
|
"fcm_dpo/delta": 0.06474070250988007,
|
|
"fcm_dpo/margin": 194.86798095703125,
|
|
"fcm_dpo/q_t": 0.4237772226333618,
|
|
"grad_norm": 36.05411148071289,
|
|
"learning_rate": 1.828194884925749e-07,
|
|
"logits/chosen": -0.5850157737731934,
|
|
"logits/rejected": -0.5665490627288818,
|
|
"logps/chosen": -504.0472717285156,
|
|
"logps/ref_chosen": -81.23401641845703,
|
|
"logps/ref_rejected": -91.79493713378906,
|
|
"logps/rejected": -709.4761962890625,
|
|
"loss": 1.1639,
|
|
"margin_dpo/margin_mean": 194.8679962158203,
|
|
"margin_dpo/margin_std": 337.99810791015625,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.631424375917768,
|
|
"fcm_dpo/beta": 0.0017335449811071157,
|
|
"fcm_dpo/delta": 0.04493825510144234,
|
|
"fcm_dpo/margin": 205.6468505859375,
|
|
"fcm_dpo/q_t": 0.4179939031600952,
|
|
"grad_norm": 33.19789505004883,
|
|
"learning_rate": 1.8158425248197928e-07,
|
|
"logits/chosen": -0.5243804454803467,
|
|
"logits/rejected": -0.5206818580627441,
|
|
"logps/chosen": -411.6202392578125,
|
|
"logps/ref_chosen": -60.920326232910156,
|
|
"logps/ref_rejected": -104.42280578613281,
|
|
"logps/rejected": -660.7695922851562,
|
|
"loss": 1.1167,
|
|
"margin_dpo/margin_mean": 205.64683532714844,
|
|
"margin_dpo/margin_std": 281.1217041015625,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6328928046989721,
|
|
"fcm_dpo/beta": 0.0017057711957022548,
|
|
"fcm_dpo/delta": -0.10787712037563324,
|
|
"fcm_dpo/margin": 294.5368957519531,
|
|
"fcm_dpo/q_t": 0.3836921751499176,
|
|
"grad_norm": 37.219364166259766,
|
|
"learning_rate": 1.8035081928995788e-07,
|
|
"logits/chosen": -0.5476272106170654,
|
|
"logits/rejected": -0.5541262626647949,
|
|
"logps/chosen": -408.8150634765625,
|
|
"logps/ref_chosen": -57.34874725341797,
|
|
"logps/ref_rejected": -92.84022521972656,
|
|
"logps/rejected": -738.8434448242188,
|
|
"loss": 1.0175,
|
|
"margin_dpo/margin_mean": 294.5368957519531,
|
|
"margin_dpo/margin_std": 320.72357177734375,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6343612334801763,
|
|
"fcm_dpo/beta": 0.0016846886137500405,
|
|
"fcm_dpo/delta": -0.09407821297645569,
|
|
"fcm_dpo/margin": 290.38677978515625,
|
|
"fcm_dpo/q_t": 0.3872066140174866,
|
|
"grad_norm": 31.853593826293945,
|
|
"learning_rate": 1.791192214186223e-07,
|
|
"logits/chosen": -0.5290106534957886,
|
|
"logits/rejected": -0.520604133605957,
|
|
"logps/chosen": -424.4138488769531,
|
|
"logps/ref_chosen": -71.07479095458984,
|
|
"logps/ref_rejected": -98.57952880859375,
|
|
"logps/rejected": -742.3053588867188,
|
|
"loss": 1.0162,
|
|
"margin_dpo/margin_mean": 290.38677978515625,
|
|
"margin_dpo/margin_std": 291.4584045410156,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.6358296622613803,
|
|
"fcm_dpo/beta": 0.0016858684830367565,
|
|
"fcm_dpo/delta": 0.07005324959754944,
|
|
"fcm_dpo/margin": 196.98558044433594,
|
|
"fcm_dpo/q_t": 0.4214964509010315,
|
|
"grad_norm": 34.71784591674805,
|
|
"learning_rate": 1.7788949132172193e-07,
|
|
"logits/chosen": -0.5854760408401489,
|
|
"logits/rejected": -0.5759547352790833,
|
|
"logps/chosen": -507.0865173339844,
|
|
"logps/ref_chosen": -58.273193359375,
|
|
"logps/ref_rejected": -95.95089721679688,
|
|
"logps/rejected": -741.7498168945312,
|
|
"loss": 1.1623,
|
|
"margin_dpo/margin_mean": 196.98558044433594,
|
|
"margin_dpo/margin_std": 344.61151123046875,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.6372980910425844,
|
|
"fcm_dpo/beta": 0.0016826842911541462,
|
|
"fcm_dpo/delta": -0.0009639374911785126,
|
|
"fcm_dpo/margin": 238.1170654296875,
|
|
"fcm_dpo/q_t": 0.4146474003791809,
|
|
"grad_norm": 31.148998260498047,
|
|
"learning_rate": 1.7666166140378853e-07,
|
|
"logits/chosen": -0.6250817775726318,
|
|
"logits/rejected": -0.6325300931930542,
|
|
"logps/chosen": -461.8482971191406,
|
|
"logps/ref_chosen": -61.97370147705078,
|
|
"logps/ref_rejected": -78.49861145019531,
|
|
"logps/rejected": -716.490234375,
|
|
"loss": 1.1158,
|
|
"margin_dpo/margin_mean": 238.1170654296875,
|
|
"margin_dpo/margin_std": 388.4697265625,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6387665198237885,
|
|
"fcm_dpo/beta": 0.0016721903812140226,
|
|
"fcm_dpo/delta": -0.05436144396662712,
|
|
"fcm_dpo/margin": 270.16436767578125,
|
|
"fcm_dpo/q_t": 0.3959371745586395,
|
|
"grad_norm": 27.92201805114746,
|
|
"learning_rate": 1.7543576401928218e-07,
|
|
"logits/chosen": -0.6887817978858948,
|
|
"logits/rejected": -0.684075117111206,
|
|
"logps/chosen": -414.2963562011719,
|
|
"logps/ref_chosen": -51.502052307128906,
|
|
"logps/ref_rejected": -87.56689453125,
|
|
"logps/rejected": -720.5255737304688,
|
|
"loss": 1.0586,
|
|
"margin_dpo/margin_mean": 270.16436767578125,
|
|
"margin_dpo/margin_std": 331.7671203613281,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6402349486049926,
|
|
"fcm_dpo/beta": 0.0016701570712029934,
|
|
"fcm_dpo/delta": 0.02181386575102806,
|
|
"fcm_dpo/margin": 226.842529296875,
|
|
"fcm_dpo/q_t": 0.4125426113605499,
|
|
"grad_norm": 47.98572540283203,
|
|
"learning_rate": 1.742118314717391e-07,
|
|
"logits/chosen": -0.6400831341743469,
|
|
"logits/rejected": -0.612436056137085,
|
|
"logps/chosen": -450.94403076171875,
|
|
"logps/ref_chosen": -71.40371704101562,
|
|
"logps/ref_rejected": -82.72775268554688,
|
|
"logps/rejected": -689.110595703125,
|
|
"loss": 1.1044,
|
|
"margin_dpo/margin_mean": 226.842529296875,
|
|
"margin_dpo/margin_std": 305.198486328125,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6417033773861968,
|
|
"fcm_dpo/beta": 0.0016808616928756237,
|
|
"fcm_dpo/delta": 0.017694242298603058,
|
|
"fcm_dpo/margin": 227.850830078125,
|
|
"fcm_dpo/q_t": 0.41099801659584045,
|
|
"grad_norm": 29.38176155090332,
|
|
"learning_rate": 1.7298989601292036e-07,
|
|
"logits/chosen": -0.652057945728302,
|
|
"logits/rejected": -0.6310602426528931,
|
|
"logps/chosen": -457.568359375,
|
|
"logps/ref_chosen": -64.7442626953125,
|
|
"logps/ref_rejected": -82.04356384277344,
|
|
"logps/rejected": -702.7184448242188,
|
|
"loss": 1.1034,
|
|
"margin_dpo/margin_mean": 227.85081481933594,
|
|
"margin_dpo/margin_std": 309.5552978515625,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6431718061674009,
|
|
"fcm_dpo/beta": 0.0016721580177545547,
|
|
"fcm_dpo/delta": -0.017781764268875122,
|
|
"fcm_dpo/margin": 249.15879821777344,
|
|
"fcm_dpo/q_t": 0.4034152030944824,
|
|
"grad_norm": 37.04447555541992,
|
|
"learning_rate": 1.7176998984196144e-07,
|
|
"logits/chosen": -0.6542237997055054,
|
|
"logits/rejected": -0.6281710863113403,
|
|
"logps/chosen": -465.63787841796875,
|
|
"logps/ref_chosen": -59.0186653137207,
|
|
"logps/ref_rejected": -83.07682800292969,
|
|
"logps/rejected": -738.8548583984375,
|
|
"loss": 1.0805,
|
|
"margin_dpo/margin_mean": 249.1588134765625,
|
|
"margin_dpo/margin_std": 318.4876403808594,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.644640234948605,
|
|
"fcm_dpo/beta": 0.0016700313426554203,
|
|
"fcm_dpo/delta": 0.03846631944179535,
|
|
"fcm_dpo/margin": 216.2918701171875,
|
|
"fcm_dpo/q_t": 0.41903701424598694,
|
|
"grad_norm": 34.50837326049805,
|
|
"learning_rate": 1.7055214510452458e-07,
|
|
"logits/chosen": -0.6358094811439514,
|
|
"logits/rejected": -0.6351268291473389,
|
|
"logps/chosen": -459.71185302734375,
|
|
"logps/ref_chosen": -53.78407669067383,
|
|
"logps/ref_rejected": -83.98545837402344,
|
|
"logps/rejected": -706.2051391601562,
|
|
"loss": 1.141,
|
|
"margin_dpo/margin_mean": 216.2918701171875,
|
|
"margin_dpo/margin_std": 336.371337890625,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6461086637298091,
|
|
"fcm_dpo/beta": 0.0016852959524840117,
|
|
"fcm_dpo/delta": -0.038592737168073654,
|
|
"fcm_dpo/margin": 259.22357177734375,
|
|
"fcm_dpo/q_t": 0.4044986069202423,
|
|
"grad_norm": 55.87424850463867,
|
|
"learning_rate": 1.6933639389195134e-07,
|
|
"logits/chosen": -0.6913414001464844,
|
|
"logits/rejected": -0.6863530874252319,
|
|
"logps/chosen": -513.6109008789062,
|
|
"logps/ref_chosen": -78.56671905517578,
|
|
"logps/ref_rejected": -96.49775695800781,
|
|
"logps/rejected": -790.7655029296875,
|
|
"loss": 1.0805,
|
|
"margin_dpo/margin_mean": 259.22357177734375,
|
|
"margin_dpo/margin_std": 368.9686279296875,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6475770925110133,
|
|
"fcm_dpo/beta": 0.0016829633386805654,
|
|
"fcm_dpo/delta": -0.005735956132411957,
|
|
"fcm_dpo/margin": 240.76620483398438,
|
|
"fcm_dpo/q_t": 0.41345399618148804,
|
|
"grad_norm": 47.60608673095703,
|
|
"learning_rate": 1.681227682404166e-07,
|
|
"logits/chosen": -0.6709833145141602,
|
|
"logits/rejected": -0.6541421413421631,
|
|
"logps/chosen": -547.8490600585938,
|
|
"logps/ref_chosen": -60.824440002441406,
|
|
"logps/ref_rejected": -96.47080993652344,
|
|
"logps/rejected": -824.2615966796875,
|
|
"loss": 1.144,
|
|
"margin_dpo/margin_mean": 240.76620483398438,
|
|
"margin_dpo/margin_std": 431.6340637207031,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6490455212922174,
|
|
"fcm_dpo/beta": 0.0016690012998878956,
|
|
"fcm_dpo/delta": -0.07323877513408661,
|
|
"fcm_dpo/margin": 281.21954345703125,
|
|
"fcm_dpo/q_t": 0.3959912061691284,
|
|
"grad_norm": 31.50290298461914,
|
|
"learning_rate": 1.669113001300851e-07,
|
|
"logits/chosen": -0.6310905814170837,
|
|
"logits/rejected": -0.6183385848999023,
|
|
"logps/chosen": -455.05615234375,
|
|
"logps/ref_chosen": -47.01121520996094,
|
|
"logps/ref_rejected": -76.53926086425781,
|
|
"logps/rejected": -765.8037719726562,
|
|
"loss": 1.0605,
|
|
"margin_dpo/margin_mean": 281.2195739746094,
|
|
"margin_dpo/margin_std": 366.8897399902344,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6505139500734214,
|
|
"fcm_dpo/beta": 0.0016703938599675894,
|
|
"fcm_dpo/delta": 0.11156397312879562,
|
|
"fcm_dpo/margin": 174.64288330078125,
|
|
"fcm_dpo/q_t": 0.43598759174346924,
|
|
"grad_norm": 41.197444915771484,
|
|
"learning_rate": 1.6570202148426815e-07,
|
|
"logits/chosen": -0.6210609674453735,
|
|
"logits/rejected": -0.5954668521881104,
|
|
"logps/chosen": -524.9894409179688,
|
|
"logps/ref_chosen": -71.27301788330078,
|
|
"logps/ref_rejected": -86.679931640625,
|
|
"logps/rejected": -715.0391845703125,
|
|
"loss": 1.2238,
|
|
"margin_dpo/margin_mean": 174.6428680419922,
|
|
"margin_dpo/margin_std": 401.8946533203125,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.6519823788546255,
|
|
"fcm_dpo/beta": 0.0016623300034552813,
|
|
"fcm_dpo/delta": -0.08805151283740997,
|
|
"fcm_dpo/margin": 290.83160400390625,
|
|
"fcm_dpo/q_t": 0.39226892590522766,
|
|
"grad_norm": 31.508068084716797,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": -0.5918526649475098,
|
|
"logits/rejected": -0.5983752012252808,
|
|
"logps/chosen": -496.802978515625,
|
|
"logps/ref_chosen": -57.213706970214844,
|
|
"logps/ref_rejected": -97.25489807128906,
|
|
"logps/rejected": -827.67578125,
|
|
"loss": 1.0517,
|
|
"margin_dpo/margin_mean": 290.83160400390625,
|
|
"margin_dpo/margin_std": 387.1565246582031,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.6534508076358296,
|
|
"fcm_dpo/beta": 0.0016500870697200298,
|
|
"fcm_dpo/delta": -0.04307527467608452,
|
|
"fcm_dpo/margin": 267.3776550292969,
|
|
"fcm_dpo/q_t": 0.3995034098625183,
|
|
"grad_norm": 31.755836486816406,
|
|
"learning_rate": 1.6329015999011182e-07,
|
|
"logits/chosen": -0.6003238558769226,
|
|
"logits/rejected": -0.5861713290214539,
|
|
"logps/chosen": -438.2301025390625,
|
|
"logps/ref_chosen": -67.29979705810547,
|
|
"logps/ref_rejected": -92.68267059326172,
|
|
"logps/rejected": -730.9906005859375,
|
|
"loss": 1.0641,
|
|
"margin_dpo/margin_mean": 267.3776550292969,
|
|
"margin_dpo/margin_std": 331.8627014160156,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.6549192364170338,
|
|
"fcm_dpo/beta": 0.0016383725451305509,
|
|
"fcm_dpo/delta": -0.07028612494468689,
|
|
"fcm_dpo/margin": 284.8059997558594,
|
|
"fcm_dpo/q_t": 0.39093977212905884,
|
|
"grad_norm": 36.69627380371094,
|
|
"learning_rate": 1.6208764069656578e-07,
|
|
"logits/chosen": -0.581207811832428,
|
|
"logits/rejected": -0.5915842056274414,
|
|
"logps/chosen": -395.5775146484375,
|
|
"logps/ref_chosen": -59.098487854003906,
|
|
"logps/ref_rejected": -101.26419067382812,
|
|
"logps/rejected": -722.5491943359375,
|
|
"loss": 1.032,
|
|
"margin_dpo/margin_mean": 284.80596923828125,
|
|
"margin_dpo/margin_std": 294.6192626953125,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6563876651982379,
|
|
"fcm_dpo/beta": 0.0015993504785001278,
|
|
"fcm_dpo/delta": -0.07478490471839905,
|
|
"fcm_dpo/margin": 294.4344482421875,
|
|
"fcm_dpo/q_t": 0.3939029276371002,
|
|
"grad_norm": 34.44708251953125,
|
|
"learning_rate": 1.608874379754465e-07,
|
|
"logits/chosen": -0.6691935658454895,
|
|
"logits/rejected": -0.6827399730682373,
|
|
"logps/chosen": -412.94537353515625,
|
|
"logps/ref_chosen": -56.07533264160156,
|
|
"logps/ref_rejected": -98.69475555419922,
|
|
"logps/rejected": -749.999267578125,
|
|
"loss": 1.045,
|
|
"margin_dpo/margin_mean": 294.4344482421875,
|
|
"margin_dpo/margin_std": 367.2359924316406,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.657856093979442,
|
|
"fcm_dpo/beta": 0.0015895452816039324,
|
|
"fcm_dpo/delta": -0.05442797392606735,
|
|
"fcm_dpo/margin": 284.32830810546875,
|
|
"fcm_dpo/q_t": 0.3961128890514374,
|
|
"grad_norm": 47.770294189453125,
|
|
"learning_rate": 1.5968958345321177e-07,
|
|
"logits/chosen": -0.553863525390625,
|
|
"logits/rejected": -0.5608095526695251,
|
|
"logps/chosen": -462.1229248046875,
|
|
"logps/ref_chosen": -60.00384521484375,
|
|
"logps/ref_rejected": -102.26465606689453,
|
|
"logps/rejected": -788.7120361328125,
|
|
"loss": 1.0526,
|
|
"margin_dpo/margin_mean": 284.32830810546875,
|
|
"margin_dpo/margin_std": 340.556884765625,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6593245227606461,
|
|
"fcm_dpo/beta": 0.0015652881702408195,
|
|
"fcm_dpo/delta": -0.054406676441431046,
|
|
"fcm_dpo/margin": 288.65020751953125,
|
|
"fcm_dpo/q_t": 0.39999139308929443,
|
|
"grad_norm": 29.779434204101562,
|
|
"learning_rate": 1.584941086944423e-07,
|
|
"logits/chosen": -0.5949973464012146,
|
|
"logits/rejected": -0.5889606475830078,
|
|
"logps/chosen": -468.46368408203125,
|
|
"logps/ref_chosen": -67.52661895751953,
|
|
"logps/ref_rejected": -88.59690856933594,
|
|
"logps/rejected": -778.1842041015625,
|
|
"loss": 1.0777,
|
|
"margin_dpo/margin_mean": 288.65020751953125,
|
|
"margin_dpo/margin_std": 417.9888610839844,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6607929515418502,
|
|
"fcm_dpo/beta": 0.0015472873346880078,
|
|
"fcm_dpo/delta": -0.08019885420799255,
|
|
"fcm_dpo/margin": 307.92584228515625,
|
|
"fcm_dpo/q_t": 0.38820013403892517,
|
|
"grad_norm": 45.0981559753418,
|
|
"learning_rate": 1.573010452010098e-07,
|
|
"logits/chosen": -0.6336866617202759,
|
|
"logits/rejected": -0.6424990892410278,
|
|
"logps/chosen": -382.67413330078125,
|
|
"logps/ref_chosen": -57.10811996459961,
|
|
"logps/ref_rejected": -102.75494384765625,
|
|
"logps/rejected": -736.246826171875,
|
|
"loss": 1.0184,
|
|
"margin_dpo/margin_mean": 307.92584228515625,
|
|
"margin_dpo/margin_std": 307.9268798828125,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6622613803230544,
|
|
"fcm_dpo/beta": 0.0015509811928495765,
|
|
"fcm_dpo/delta": 0.039534684270620346,
|
|
"fcm_dpo/margin": 233.21817016601562,
|
|
"fcm_dpo/q_t": 0.41526713967323303,
|
|
"grad_norm": 40.15821838378906,
|
|
"learning_rate": 1.5611042441124687e-07,
|
|
"logits/chosen": -0.6718031167984009,
|
|
"logits/rejected": -0.6435151100158691,
|
|
"logps/chosen": -512.0057983398438,
|
|
"logps/ref_chosen": -58.46883010864258,
|
|
"logps/ref_rejected": -72.92941284179688,
|
|
"logps/rejected": -759.6845703125,
|
|
"loss": 1.1567,
|
|
"margin_dpo/margin_mean": 233.21817016601562,
|
|
"margin_dpo/margin_std": 423.0076904296875,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6637298091042585,
|
|
"fcm_dpo/beta": 0.0015399182448163629,
|
|
"fcm_dpo/delta": -0.035000670701265335,
|
|
"fcm_dpo/margin": 281.4200439453125,
|
|
"fcm_dpo/q_t": 0.3974810838699341,
|
|
"grad_norm": 27.29172706604004,
|
|
"learning_rate": 1.549222776991186e-07,
|
|
"logits/chosen": -0.6307343244552612,
|
|
"logits/rejected": -0.6474366188049316,
|
|
"logps/chosen": -360.15216064453125,
|
|
"logps/ref_chosen": -50.39055252075195,
|
|
"logps/ref_rejected": -97.77142333984375,
|
|
"logps/rejected": -688.953125,
|
|
"loss": 1.0431,
|
|
"margin_dpo/margin_mean": 281.4200439453125,
|
|
"margin_dpo/margin_std": 278.75592041015625,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6651982378854625,
|
|
"fcm_dpo/beta": 0.0015358540695160627,
|
|
"fcm_dpo/delta": -0.0009381119161844254,
|
|
"fcm_dpo/margin": 260.94769287109375,
|
|
"fcm_dpo/q_t": 0.40897369384765625,
|
|
"grad_norm": 32.27933120727539,
|
|
"learning_rate": 1.5373663637339584e-07,
|
|
"logits/chosen": -0.6390811800956726,
|
|
"logits/rejected": -0.622653603553772,
|
|
"logps/chosen": -432.13372802734375,
|
|
"logps/ref_chosen": -57.71485137939453,
|
|
"logps/ref_rejected": -82.20741271972656,
|
|
"logps/rejected": -717.573974609375,
|
|
"loss": 1.0895,
|
|
"margin_dpo/margin_mean": 260.94769287109375,
|
|
"margin_dpo/margin_std": 345.93994140625,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.0015241008950397372,
|
|
"fcm_dpo/delta": -0.054964885115623474,
|
|
"fcm_dpo/margin": 296.76806640625,
|
|
"fcm_dpo/q_t": 0.3962477445602417,
|
|
"grad_norm": 28.71084213256836,
|
|
"learning_rate": 1.5255353167683017e-07,
|
|
"logits/chosen": -0.7012594938278198,
|
|
"logits/rejected": -0.6893137693405151,
|
|
"logps/chosen": -514.2677612304688,
|
|
"logps/ref_chosen": -60.945648193359375,
|
|
"logps/ref_rejected": -84.95079040527344,
|
|
"logps/rejected": -835.0409545898438,
|
|
"loss": 1.0532,
|
|
"margin_dpo/margin_mean": 296.76806640625,
|
|
"margin_dpo/margin_std": 363.46282958984375,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6681350954478708,
|
|
"fcm_dpo/beta": 0.0015060155419632792,
|
|
"fcm_dpo/delta": -0.11781027913093567,
|
|
"fcm_dpo/margin": 339.77191162109375,
|
|
"fcm_dpo/q_t": 0.3838193118572235,
|
|
"grad_norm": 38.13735580444336,
|
|
"learning_rate": 1.5137299478533064e-07,
|
|
"logits/chosen": -0.6432729363441467,
|
|
"logits/rejected": -0.6612046957015991,
|
|
"logps/chosen": -434.3492431640625,
|
|
"logps/ref_chosen": -44.88671112060547,
|
|
"logps/ref_rejected": -115.30147552490234,
|
|
"logps/rejected": -844.535888671875,
|
|
"loss": 1.0173,
|
|
"margin_dpo/margin_mean": 339.7718811035156,
|
|
"margin_dpo/margin_std": 379.62335205078125,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6696035242290749,
|
|
"fcm_dpo/beta": 0.001467827707529068,
|
|
"fcm_dpo/delta": -0.09735056757926941,
|
|
"fcm_dpo/margin": 335.6260986328125,
|
|
"fcm_dpo/q_t": 0.3862442374229431,
|
|
"grad_norm": 28.999698638916016,
|
|
"learning_rate": 1.5019505680714232e-07,
|
|
"logits/chosen": -0.6519438624382019,
|
|
"logits/rejected": -0.6727065443992615,
|
|
"logps/chosen": -465.74395751953125,
|
|
"logps/ref_chosen": -57.036781311035156,
|
|
"logps/ref_rejected": -105.21784210205078,
|
|
"logps/rejected": -849.5510864257812,
|
|
"loss": 1.0071,
|
|
"margin_dpo/margin_mean": 335.6260986328125,
|
|
"margin_dpo/margin_std": 334.113525390625,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.671071953010279,
|
|
"fcm_dpo/beta": 0.0014373862650245428,
|
|
"fcm_dpo/delta": -0.0841878205537796,
|
|
"fcm_dpo/margin": 333.8603820800781,
|
|
"fcm_dpo/q_t": 0.3876863121986389,
|
|
"grad_norm": 37.97410202026367,
|
|
"learning_rate": 1.4901974878202627e-07,
|
|
"logits/chosen": -0.6944575905799866,
|
|
"logits/rejected": -0.6935386061668396,
|
|
"logps/chosen": -451.5135498046875,
|
|
"logps/ref_chosen": -54.24253845214844,
|
|
"logps/ref_rejected": -85.10956573486328,
|
|
"logps/rejected": -816.240966796875,
|
|
"loss": 1.0151,
|
|
"margin_dpo/margin_mean": 333.8603515625,
|
|
"margin_dpo/margin_std": 328.8755187988281,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6725403817914831,
|
|
"fcm_dpo/beta": 0.0014226180501282215,
|
|
"fcm_dpo/delta": -0.02253449335694313,
|
|
"fcm_dpo/margin": 295.9541015625,
|
|
"fcm_dpo/q_t": 0.40318185091018677,
|
|
"grad_norm": 29.526060104370117,
|
|
"learning_rate": 1.4784710168044212e-07,
|
|
"logits/chosen": -0.6689471006393433,
|
|
"logits/rejected": -0.6608414649963379,
|
|
"logps/chosen": -469.71185302734375,
|
|
"logps/ref_chosen": -55.40888214111328,
|
|
"logps/ref_rejected": -97.68325805664062,
|
|
"logps/rejected": -807.9403076171875,
|
|
"loss": 1.0677,
|
|
"margin_dpo/margin_mean": 295.9541015625,
|
|
"margin_dpo/margin_std": 346.7279968261719,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6740088105726872,
|
|
"fcm_dpo/beta": 0.0014095264486968517,
|
|
"fcm_dpo/delta": -0.06269839406013489,
|
|
"fcm_dpo/margin": 325.8946533203125,
|
|
"fcm_dpo/q_t": 0.3947668671607971,
|
|
"grad_norm": 31.499923706054688,
|
|
"learning_rate": 1.466771464027316e-07,
|
|
"logits/chosen": -0.6561689376831055,
|
|
"logits/rejected": -0.6712781190872192,
|
|
"logps/chosen": -505.89019775390625,
|
|
"logps/ref_chosen": -46.55748748779297,
|
|
"logps/ref_rejected": -86.16854095458984,
|
|
"logps/rejected": -871.3959350585938,
|
|
"loss": 1.0538,
|
|
"margin_dpo/margin_mean": 325.8946533203125,
|
|
"margin_dpo/margin_std": 400.7102355957031,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6754772393538914,
|
|
"fcm_dpo/beta": 0.0013926841784268618,
|
|
"fcm_dpo/delta": -0.09006337821483612,
|
|
"fcm_dpo/margin": 348.79052734375,
|
|
"fcm_dpo/q_t": 0.3885030150413513,
|
|
"grad_norm": 40.160850524902344,
|
|
"learning_rate": 1.4550991377830423e-07,
|
|
"logits/chosen": -0.679194450378418,
|
|
"logits/rejected": -0.7077926993370056,
|
|
"logps/chosen": -545.5593872070312,
|
|
"logps/ref_chosen": -51.63489532470703,
|
|
"logps/ref_rejected": -104.11935424804688,
|
|
"logps/rejected": -946.8343505859375,
|
|
"loss": 1.0237,
|
|
"margin_dpo/margin_mean": 348.79052734375,
|
|
"margin_dpo/margin_std": 384.35693359375,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6769456681350955,
|
|
"fcm_dpo/beta": 0.0013905889354646206,
|
|
"fcm_dpo/delta": 0.008415699005126953,
|
|
"fcm_dpo/margin": 281.6998291015625,
|
|
"fcm_dpo/q_t": 0.4110637307167053,
|
|
"grad_norm": 28.433305740356445,
|
|
"learning_rate": 1.4434543456482518e-07,
|
|
"logits/chosen": -0.7387268543243408,
|
|
"logits/rejected": -0.7501698732376099,
|
|
"logps/chosen": -583.958984375,
|
|
"logps/ref_chosen": -55.18195724487305,
|
|
"logps/ref_rejected": -86.47689819335938,
|
|
"logps/rejected": -896.9537353515625,
|
|
"loss": 1.0996,
|
|
"margin_dpo/margin_mean": 281.6997985839844,
|
|
"margin_dpo/margin_std": 387.92791748046875,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6784140969162996,
|
|
"fcm_dpo/beta": 0.0014029676094651222,
|
|
"fcm_dpo/delta": 0.08377067744731903,
|
|
"fcm_dpo/margin": 227.35406494140625,
|
|
"fcm_dpo/q_t": 0.4282529056072235,
|
|
"grad_norm": 49.88774108886719,
|
|
"learning_rate": 1.4318373944740484e-07,
|
|
"logits/chosen": -0.8244759440422058,
|
|
"logits/rejected": -0.8147263526916504,
|
|
"logps/chosen": -598.8399658203125,
|
|
"logps/ref_chosen": -69.92803192138672,
|
|
"logps/ref_rejected": -78.84111022949219,
|
|
"logps/rejected": -835.1070556640625,
|
|
"loss": 1.1609,
|
|
"margin_dpo/margin_mean": 227.35406494140625,
|
|
"margin_dpo/margin_std": 385.42572021484375,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6798825256975036,
|
|
"fcm_dpo/beta": 0.0014092556666582823,
|
|
"fcm_dpo/delta": -0.037594083696603775,
|
|
"fcm_dpo/margin": 309.1751708984375,
|
|
"fcm_dpo/q_t": 0.40178006887435913,
|
|
"grad_norm": 37.86516189575195,
|
|
"learning_rate": 1.4202485903778976e-07,
|
|
"logits/chosen": -0.8040248155593872,
|
|
"logits/rejected": -0.8105688095092773,
|
|
"logps/chosen": -594.924072265625,
|
|
"logps/ref_chosen": -55.27437210083008,
|
|
"logps/ref_rejected": -89.02497863769531,
|
|
"logps/rejected": -937.849853515625,
|
|
"loss": 1.0796,
|
|
"margin_dpo/margin_mean": 309.1751708984375,
|
|
"margin_dpo/margin_std": 419.88360595703125,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.6813509544787077,
|
|
"fcm_dpo/beta": 0.0013409138191491365,
|
|
"fcm_dpo/delta": -0.24509315192699432,
|
|
"fcm_dpo/margin": 467.78326416015625,
|
|
"fcm_dpo/q_t": 0.35747382044792175,
|
|
"grad_norm": 48.06254959106445,
|
|
"learning_rate": 1.4086882387355658e-07,
|
|
"logits/chosen": -0.7823467254638672,
|
|
"logits/rejected": -0.8419663906097412,
|
|
"logps/chosen": -594.6781005859375,
|
|
"logps/ref_chosen": -50.91230010986328,
|
|
"logps/ref_rejected": -102.4893798828125,
|
|
"logps/rejected": -1114.03857421875,
|
|
"loss": 0.9356,
|
|
"margin_dpo/margin_mean": 467.78326416015625,
|
|
"margin_dpo/margin_std": 453.023681640625,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.6828193832599119,
|
|
"fcm_dpo/beta": 0.0013181334361433983,
|
|
"fcm_dpo/delta": -0.09859161078929901,
|
|
"fcm_dpo/margin": 374.6343994140625,
|
|
"fcm_dpo/q_t": 0.3844667673110962,
|
|
"grad_norm": 37.97553634643555,
|
|
"learning_rate": 1.3971566441730714e-07,
|
|
"logits/chosen": -0.78519207239151,
|
|
"logits/rejected": -0.8018290400505066,
|
|
"logps/chosen": -611.6665649414062,
|
|
"logps/ref_chosen": -60.116851806640625,
|
|
"logps/ref_rejected": -113.94602966308594,
|
|
"logps/rejected": -1040.130126953125,
|
|
"loss": 1.0469,
|
|
"margin_dpo/margin_mean": 374.6343994140625,
|
|
"margin_dpo/margin_std": 480.17120361328125,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.684287812041116,
|
|
"fcm_dpo/beta": 0.0012953735422343016,
|
|
"fcm_dpo/delta": -0.02665306255221367,
|
|
"fcm_dpo/margin": 328.0135803222656,
|
|
"fcm_dpo/q_t": 0.40215975046157837,
|
|
"grad_norm": 31.085790634155273,
|
|
"learning_rate": 1.3856541105586545e-07,
|
|
"logits/chosen": -0.822087287902832,
|
|
"logits/rejected": -0.8236336708068848,
|
|
"logps/chosen": -642.7122802734375,
|
|
"logps/ref_chosen": -52.920921325683594,
|
|
"logps/ref_rejected": -90.3154296875,
|
|
"logps/rejected": -1008.1204223632812,
|
|
"loss": 1.0834,
|
|
"margin_dpo/margin_mean": 328.01361083984375,
|
|
"margin_dpo/margin_std": 438.4411926269531,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.6857562408223201,
|
|
"fcm_dpo/beta": 0.001278050011023879,
|
|
"fcm_dpo/delta": -0.055538684129714966,
|
|
"fcm_dpo/margin": 352.9036865234375,
|
|
"fcm_dpo/q_t": 0.40140700340270996,
|
|
"grad_norm": 44.78834915161133,
|
|
"learning_rate": 1.3741809409947729e-07,
|
|
"logits/chosen": -0.8997122049331665,
|
|
"logits/rejected": -0.8935316801071167,
|
|
"logps/chosen": -827.7107543945312,
|
|
"logps/ref_chosen": -78.7158203125,
|
|
"logps/ref_rejected": -102.86019897460938,
|
|
"logps/rejected": -1204.7587890625,
|
|
"loss": 1.1325,
|
|
"margin_dpo/margin_mean": 352.9036865234375,
|
|
"margin_dpo/margin_std": 619.0111083984375,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.6872246696035242,
|
|
"fcm_dpo/beta": 0.0012572079431265593,
|
|
"fcm_dpo/delta": -0.14669275283813477,
|
|
"fcm_dpo/margin": 428.48065185546875,
|
|
"fcm_dpo/q_t": 0.3811851441860199,
|
|
"grad_norm": 51.817596435546875,
|
|
"learning_rate": 1.362737437810114e-07,
|
|
"logits/chosen": -0.8791412115097046,
|
|
"logits/rejected": -0.8881509304046631,
|
|
"logps/chosen": -655.0909423828125,
|
|
"logps/ref_chosen": -69.93536376953125,
|
|
"logps/ref_rejected": -101.02880859375,
|
|
"logps/rejected": -1114.6650390625,
|
|
"loss": 1.0157,
|
|
"margin_dpo/margin_mean": 428.48065185546875,
|
|
"margin_dpo/margin_std": 536.8350830078125,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.6886930983847284,
|
|
"fcm_dpo/beta": 0.0012133971322327852,
|
|
"fcm_dpo/delta": -0.1514207422733307,
|
|
"fcm_dpo/margin": 446.37615966796875,
|
|
"fcm_dpo/q_t": 0.3775207996368408,
|
|
"grad_norm": 46.171669006347656,
|
|
"learning_rate": 1.351323902551631e-07,
|
|
"logits/chosen": -0.8811942934989929,
|
|
"logits/rejected": -0.8931454420089722,
|
|
"logps/chosen": -704.7970581054688,
|
|
"logps/ref_chosen": -68.12469482421875,
|
|
"logps/ref_rejected": -104.78640747070312,
|
|
"logps/rejected": -1187.8349609375,
|
|
"loss": 1.0025,
|
|
"margin_dpo/margin_mean": 446.37615966796875,
|
|
"margin_dpo/margin_std": 505.142822265625,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.6901615271659325,
|
|
"fcm_dpo/beta": 0.0012034507235512137,
|
|
"fcm_dpo/delta": -0.06455770879983902,
|
|
"fcm_dpo/margin": 383.5496520996094,
|
|
"fcm_dpo/q_t": 0.3943213224411011,
|
|
"grad_norm": 33.37154006958008,
|
|
"learning_rate": 1.339940635976592e-07,
|
|
"logits/chosen": -0.8440713882446289,
|
|
"logits/rejected": -0.8514707088470459,
|
|
"logps/chosen": -594.4234008789062,
|
|
"logps/ref_chosen": -43.791927337646484,
|
|
"logps/ref_rejected": -82.70285034179688,
|
|
"logps/rejected": -1016.8839721679688,
|
|
"loss": 1.0655,
|
|
"margin_dpo/margin_mean": 383.5496520996094,
|
|
"margin_dpo/margin_std": 511.9697570800781,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.6916299559471366,
|
|
"fcm_dpo/beta": 0.0011930849868804216,
|
|
"fcm_dpo/delta": 0.018563054502010345,
|
|
"fcm_dpo/margin": 319.95709228515625,
|
|
"fcm_dpo/q_t": 0.4155677855014801,
|
|
"grad_norm": 46.75631332397461,
|
|
"learning_rate": 1.3285879380446563e-07,
|
|
"logits/chosen": -0.910805344581604,
|
|
"logits/rejected": -0.9144266843795776,
|
|
"logps/chosen": -742.8097534179688,
|
|
"logps/ref_chosen": -63.33952331542969,
|
|
"logps/ref_rejected": -83.61048126220703,
|
|
"logps/rejected": -1083.037841796875,
|
|
"loss": 1.1267,
|
|
"margin_dpo/margin_mean": 319.95709228515625,
|
|
"margin_dpo/margin_std": 512.3135375976562,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.6930983847283406,
|
|
"fcm_dpo/beta": 0.0011829681461676955,
|
|
"fcm_dpo/delta": -0.06849831342697144,
|
|
"fcm_dpo/margin": 392.803466796875,
|
|
"fcm_dpo/q_t": 0.40142130851745605,
|
|
"grad_norm": 37.54646301269531,
|
|
"learning_rate": 1.317266107909975e-07,
|
|
"logits/chosen": -0.9019899964332581,
|
|
"logits/rejected": -0.8787474036216736,
|
|
"logps/chosen": -746.1549682617188,
|
|
"logps/ref_chosen": -83.66610717773438,
|
|
"logps/ref_rejected": -117.20919799804688,
|
|
"logps/rejected": -1172.50146484375,
|
|
"loss": 1.0996,
|
|
"margin_dpo/margin_mean": 392.803466796875,
|
|
"margin_dpo/margin_std": 627.5294189453125,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.6945668135095447,
|
|
"fcm_dpo/beta": 0.001211212482303381,
|
|
"fcm_dpo/delta": 0.17383155226707458,
|
|
"fcm_dpo/margin": 190.3201904296875,
|
|
"fcm_dpo/q_t": 0.4516882002353668,
|
|
"grad_norm": 120.86524200439453,
|
|
"learning_rate": 1.3059754439133002e-07,
|
|
"logits/chosen": -0.9114212989807129,
|
|
"logits/rejected": -0.8835663795471191,
|
|
"logps/chosen": -869.204833984375,
|
|
"logps/ref_chosen": -63.49696731567383,
|
|
"logps/ref_rejected": -81.14657592773438,
|
|
"logps/rejected": -1077.174560546875,
|
|
"loss": 1.3499,
|
|
"margin_dpo/margin_mean": 190.32020568847656,
|
|
"margin_dpo/margin_std": 765.7159423828125,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.6960352422907489,
|
|
"fcm_dpo/beta": 0.0012354427017271519,
|
|
"fcm_dpo/delta": -0.011602118611335754,
|
|
"fcm_dpo/margin": 331.4249572753906,
|
|
"fcm_dpo/q_t": 0.40766799449920654,
|
|
"grad_norm": 39.35799789428711,
|
|
"learning_rate": 1.2947162435741277e-07,
|
|
"logits/chosen": -0.8279140591621399,
|
|
"logits/rejected": -0.8301948308944702,
|
|
"logps/chosen": -675.7288818359375,
|
|
"logps/ref_chosen": -52.6119384765625,
|
|
"logps/ref_rejected": -90.08041381835938,
|
|
"logps/rejected": -1044.622314453125,
|
|
"loss": 1.1359,
|
|
"margin_dpo/margin_mean": 331.42498779296875,
|
|
"margin_dpo/margin_std": 559.75341796875,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.697503671071953,
|
|
"fcm_dpo/beta": 0.0012009632773697376,
|
|
"fcm_dpo/delta": -0.10904163122177124,
|
|
"fcm_dpo/margin": 419.064697265625,
|
|
"fcm_dpo/q_t": 0.3838295340538025,
|
|
"grad_norm": 59.33665466308594,
|
|
"learning_rate": 1.2834888035828596e-07,
|
|
"logits/chosen": -0.8741481304168701,
|
|
"logits/rejected": -0.8975253701210022,
|
|
"logps/chosen": -498.5884094238281,
|
|
"logps/ref_chosen": -42.49519348144531,
|
|
"logps/ref_rejected": -90.06294250488281,
|
|
"logps/rejected": -965.2208251953125,
|
|
"loss": 1.0072,
|
|
"margin_dpo/margin_mean": 419.064697265625,
|
|
"margin_dpo/margin_std": 434.1658935546875,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.6989720998531571,
|
|
"fcm_dpo/beta": 0.0011981537099927664,
|
|
"fcm_dpo/delta": 0.014648713171482086,
|
|
"fcm_dpo/margin": 322.09075927734375,
|
|
"fcm_dpo/q_t": 0.41181665658950806,
|
|
"grad_norm": 54.55389404296875,
|
|
"learning_rate": 1.2722934197929802e-07,
|
|
"logits/chosen": -0.8839001655578613,
|
|
"logits/rejected": -0.8944188356399536,
|
|
"logps/chosen": -600.6002807617188,
|
|
"logps/ref_chosen": -42.94938278198242,
|
|
"logps/ref_rejected": -73.71023559570312,
|
|
"logps/rejected": -953.451904296875,
|
|
"loss": 1.0989,
|
|
"margin_dpo/margin_mean": 322.09075927734375,
|
|
"margin_dpo/margin_std": 432.81268310546875,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7004405286343612,
|
|
"fcm_dpo/beta": 0.0012043662136420608,
|
|
"fcm_dpo/delta": 0.001615665853023529,
|
|
"fcm_dpo/margin": 330.6528015136719,
|
|
"fcm_dpo/q_t": 0.4092486798763275,
|
|
"grad_norm": 37.25672149658203,
|
|
"learning_rate": 1.2611303872132631e-07,
|
|
"logits/chosen": -0.8755106925964355,
|
|
"logits/rejected": -0.8339799642562866,
|
|
"logps/chosen": -640.450439453125,
|
|
"logps/ref_chosen": -70.77261352539062,
|
|
"logps/ref_rejected": -76.13737487792969,
|
|
"logps/rejected": -976.4679565429688,
|
|
"loss": 1.123,
|
|
"margin_dpo/margin_mean": 330.65277099609375,
|
|
"margin_dpo/margin_std": 534.1469116210938,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7019089574155654,
|
|
"fcm_dpo/beta": 0.0011925864964723587,
|
|
"fcm_dpo/delta": -0.06166646629571915,
|
|
"fcm_dpo/margin": 384.75927734375,
|
|
"fcm_dpo/q_t": 0.3951849341392517,
|
|
"grad_norm": 45.197479248046875,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": -0.777691662311554,
|
|
"logits/rejected": -0.7990972995758057,
|
|
"logps/chosen": -495.652587890625,
|
|
"logps/ref_chosen": -41.440513610839844,
|
|
"logps/ref_rejected": -85.36196899414062,
|
|
"logps/rejected": -924.3333129882812,
|
|
"loss": 1.0533,
|
|
"margin_dpo/margin_mean": 384.75927734375,
|
|
"margin_dpo/margin_std": 474.02569580078125,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7033773861967695,
|
|
"fcm_dpo/beta": 0.001189418020658195,
|
|
"fcm_dpo/delta": -0.02111241966485977,
|
|
"fcm_dpo/margin": 352.7756042480469,
|
|
"fcm_dpo/q_t": 0.40721356868743896,
|
|
"grad_norm": 32.11994171142578,
|
|
"learning_rate": 1.2389025514492456e-07,
|
|
"logits/chosen": -0.8205797076225281,
|
|
"logits/rejected": -0.8455021381378174,
|
|
"logps/chosen": -669.05615234375,
|
|
"logps/ref_chosen": -53.907920837402344,
|
|
"logps/ref_rejected": -95.1163330078125,
|
|
"logps/rejected": -1063.040283203125,
|
|
"loss": 1.1155,
|
|
"margin_dpo/margin_mean": 352.7756042480469,
|
|
"margin_dpo/margin_std": 573.1203002929688,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7048458149779736,
|
|
"fcm_dpo/beta": 0.0012020855210721493,
|
|
"fcm_dpo/delta": 0.05400090664625168,
|
|
"fcm_dpo/margin": 287.3784484863281,
|
|
"fcm_dpo/q_t": 0.42420148849487305,
|
|
"grad_norm": 66.96390533447266,
|
|
"learning_rate": 1.227838333989088e-07,
|
|
"logits/chosen": -0.8848108053207397,
|
|
"logits/rejected": -0.8782625198364258,
|
|
"logps/chosen": -796.4169921875,
|
|
"logps/ref_chosen": -58.682701110839844,
|
|
"logps/ref_rejected": -82.93248748779297,
|
|
"logps/rejected": -1108.045166015625,
|
|
"loss": 1.1735,
|
|
"margin_dpo/margin_mean": 287.3784484863281,
|
|
"margin_dpo/margin_std": 522.4732055664062,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.7063142437591777,
|
|
"fcm_dpo/beta": 0.0011700207833200693,
|
|
"fcm_dpo/delta": -0.1395144760608673,
|
|
"fcm_dpo/margin": 454.84710693359375,
|
|
"fcm_dpo/q_t": 0.3807649314403534,
|
|
"grad_norm": 38.5280647277832,
|
|
"learning_rate": 1.2168076391719489e-07,
|
|
"logits/chosen": -0.9153322577476501,
|
|
"logits/rejected": -0.9426373243331909,
|
|
"logps/chosen": -667.7345581054688,
|
|
"logps/ref_chosen": -54.964271545410156,
|
|
"logps/ref_rejected": -92.42044067382812,
|
|
"logps/rejected": -1160.037841796875,
|
|
"loss": 1.0112,
|
|
"margin_dpo/margin_mean": 454.84710693359375,
|
|
"margin_dpo/margin_std": 539.13134765625,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7077826725403817,
|
|
"fcm_dpo/beta": 0.00119025819003582,
|
|
"fcm_dpo/delta": 0.16345669329166412,
|
|
"fcm_dpo/margin": 201.93572998046875,
|
|
"fcm_dpo/q_t": 0.4430094063282013,
|
|
"grad_norm": 72.33926391601562,
|
|
"learning_rate": 1.2058107576668938e-07,
|
|
"logits/chosen": -0.8522086143493652,
|
|
"logits/rejected": -0.8430407643318176,
|
|
"logps/chosen": -748.6646728515625,
|
|
"logps/ref_chosen": -67.553466796875,
|
|
"logps/ref_rejected": -87.58953857421875,
|
|
"logps/rejected": -970.6365356445312,
|
|
"loss": 1.2679,
|
|
"margin_dpo/margin_mean": 201.93572998046875,
|
|
"margin_dpo/margin_std": 574.7503051757812,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7092511013215859,
|
|
"fcm_dpo/beta": 0.0011707013472914696,
|
|
"fcm_dpo/delta": -0.15054769814014435,
|
|
"fcm_dpo/margin": 463.0313415527344,
|
|
"fcm_dpo/q_t": 0.3800230026245117,
|
|
"grad_norm": 44.42465591430664,
|
|
"learning_rate": 1.194847979251979e-07,
|
|
"logits/chosen": -0.9137969017028809,
|
|
"logits/rejected": -0.9261190891265869,
|
|
"logps/chosen": -685.5653076171875,
|
|
"logps/ref_chosen": -63.32981872558594,
|
|
"logps/ref_rejected": -95.78697204589844,
|
|
"logps/rejected": -1181.0537109375,
|
|
"loss": 1.0094,
|
|
"margin_dpo/margin_mean": 463.0313415527344,
|
|
"margin_dpo/margin_std": 563.2684936523438,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.71071953010279,
|
|
"fcm_dpo/beta": 0.001149929827079177,
|
|
"fcm_dpo/delta": -0.10865214467048645,
|
|
"fcm_dpo/margin": 437.65692138671875,
|
|
"fcm_dpo/q_t": 0.38679593801498413,
|
|
"grad_norm": 52.580448150634766,
|
|
"learning_rate": 1.1839195928066101e-07,
|
|
"logits/chosen": -0.9185287952423096,
|
|
"logits/rejected": -0.9445685148239136,
|
|
"logps/chosen": -590.5552368164062,
|
|
"logps/ref_chosen": -59.13812255859375,
|
|
"logps/ref_rejected": -84.37144470214844,
|
|
"logps/rejected": -1053.445556640625,
|
|
"loss": 1.0252,
|
|
"margin_dpo/margin_mean": 437.65692138671875,
|
|
"margin_dpo/margin_std": 514.391357421875,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7121879588839941,
|
|
"fcm_dpo/beta": 0.0011327798711135983,
|
|
"fcm_dpo/delta": -0.053013693541288376,
|
|
"fcm_dpo/margin": 397.80157470703125,
|
|
"fcm_dpo/q_t": 0.399851530790329,
|
|
"grad_norm": 38.13155746459961,
|
|
"learning_rate": 1.1730258863039347e-07,
|
|
"logits/chosen": -0.8724352717399597,
|
|
"logits/rejected": -0.8907235860824585,
|
|
"logps/chosen": -631.8712158203125,
|
|
"logps/ref_chosen": -58.849571228027344,
|
|
"logps/ref_rejected": -103.36408233642578,
|
|
"logps/rejected": -1074.187255859375,
|
|
"loss": 1.0791,
|
|
"margin_dpo/margin_mean": 397.80157470703125,
|
|
"margin_dpo/margin_std": 566.2918701171875,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7136563876651982,
|
|
"fcm_dpo/beta": 0.0011056348448619246,
|
|
"fcm_dpo/delta": -0.11685362458229065,
|
|
"fcm_dpo/margin": 461.8829345703125,
|
|
"fcm_dpo/q_t": 0.3876580595970154,
|
|
"grad_norm": 40.950435638427734,
|
|
"learning_rate": 1.1621671468032493e-07,
|
|
"logits/chosen": -0.9299178123474121,
|
|
"logits/rejected": -0.945504903793335,
|
|
"logps/chosen": -695.784423828125,
|
|
"logps/ref_chosen": -55.25966262817383,
|
|
"logps/ref_rejected": -92.13936614990234,
|
|
"logps/rejected": -1194.547119140625,
|
|
"loss": 1.0587,
|
|
"margin_dpo/margin_mean": 461.8829345703125,
|
|
"margin_dpo/margin_std": 665.51806640625,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7151248164464024,
|
|
"fcm_dpo/beta": 0.001111747114919126,
|
|
"fcm_dpo/delta": 0.03676484897732735,
|
|
"fcm_dpo/margin": 327.4232177734375,
|
|
"fcm_dpo/q_t": 0.41555002331733704,
|
|
"grad_norm": 40.30672073364258,
|
|
"learning_rate": 1.1513436604424378e-07,
|
|
"logits/chosen": -0.9425885677337646,
|
|
"logits/rejected": -0.9484249353408813,
|
|
"logps/chosen": -738.612548828125,
|
|
"logps/ref_chosen": -53.06330871582031,
|
|
"logps/ref_rejected": -92.41883087158203,
|
|
"logps/rejected": -1105.3912353515625,
|
|
"loss": 1.1361,
|
|
"margin_dpo/margin_mean": 327.4231872558594,
|
|
"margin_dpo/margin_std": 529.4686889648438,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7165932452276065,
|
|
"fcm_dpo/beta": 0.0011130350176244974,
|
|
"fcm_dpo/delta": 0.004432627931237221,
|
|
"fcm_dpo/margin": 355.3664855957031,
|
|
"fcm_dpo/q_t": 0.40918317437171936,
|
|
"grad_norm": 37.70393753051758,
|
|
"learning_rate": 1.1405557124304335e-07,
|
|
"logits/chosen": -0.8855708241462708,
|
|
"logits/rejected": -0.8907876014709473,
|
|
"logps/chosen": -617.8189697265625,
|
|
"logps/ref_chosen": -52.22815704345703,
|
|
"logps/ref_rejected": -84.00656127929688,
|
|
"logps/rejected": -1004.9638671875,
|
|
"loss": 1.0909,
|
|
"margin_dpo/margin_mean": 355.366455078125,
|
|
"margin_dpo/margin_std": 460.8048095703125,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7180616740088106,
|
|
"fcm_dpo/beta": 0.0011166043113917112,
|
|
"fcm_dpo/delta": 0.015580521896481514,
|
|
"fcm_dpo/margin": 344.5076599121094,
|
|
"fcm_dpo/q_t": 0.4143439531326294,
|
|
"grad_norm": 32.79881286621094,
|
|
"learning_rate": 1.1298035870396985e-07,
|
|
"logits/chosen": -0.9116396307945251,
|
|
"logits/rejected": -0.9118118286132812,
|
|
"logps/chosen": -587.8505859375,
|
|
"logps/ref_chosen": -55.989627838134766,
|
|
"logps/ref_rejected": -79.39812469482422,
|
|
"logps/rejected": -955.7667236328125,
|
|
"loss": 1.1132,
|
|
"margin_dpo/margin_mean": 344.5076904296875,
|
|
"margin_dpo/margin_std": 512.8632202148438,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7195301027900147,
|
|
"fcm_dpo/beta": 0.0011167211923748255,
|
|
"fcm_dpo/delta": 0.0001148320734500885,
|
|
"fcm_dpo/margin": 357.92462158203125,
|
|
"fcm_dpo/q_t": 0.4123692512512207,
|
|
"grad_norm": 48.77171325683594,
|
|
"learning_rate": 1.1190875675987355e-07,
|
|
"logits/chosen": -0.9318056106567383,
|
|
"logits/rejected": -0.9702289700508118,
|
|
"logps/chosen": -655.4432373046875,
|
|
"logps/ref_chosen": -52.36639404296875,
|
|
"logps/ref_rejected": -110.4090576171875,
|
|
"logps/rejected": -1071.4105224609375,
|
|
"loss": 1.1441,
|
|
"margin_dpo/margin_mean": 357.9245910644531,
|
|
"margin_dpo/margin_std": 644.6651611328125,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7209985315712188,
|
|
"fcm_dpo/beta": 0.0011353420559316874,
|
|
"fcm_dpo/delta": 0.1237509548664093,
|
|
"fcm_dpo/margin": 246.2215118408203,
|
|
"fcm_dpo/q_t": 0.4365187883377075,
|
|
"grad_norm": 30.707971572875977,
|
|
"learning_rate": 1.1084079364846241e-07,
|
|
"logits/chosen": -0.9380520582199097,
|
|
"logits/rejected": -0.9351228475570679,
|
|
"logps/chosen": -626.2109375,
|
|
"logps/ref_chosen": -60.11626434326172,
|
|
"logps/ref_rejected": -73.27278900146484,
|
|
"logps/rejected": -885.5888671875,
|
|
"loss": 1.1901,
|
|
"margin_dpo/margin_mean": 246.22149658203125,
|
|
"margin_dpo/margin_std": 459.95343017578125,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7224669603524229,
|
|
"fcm_dpo/beta": 0.0011589345522224903,
|
|
"fcm_dpo/delta": 0.11556318402290344,
|
|
"fcm_dpo/margin": 248.4207763671875,
|
|
"fcm_dpo/q_t": 0.43614447116851807,
|
|
"grad_norm": 39.64064407348633,
|
|
"learning_rate": 1.097764975115576e-07,
|
|
"logits/chosen": -0.9761683940887451,
|
|
"logits/rejected": -0.9563957452774048,
|
|
"logps/chosen": -648.32568359375,
|
|
"logps/ref_chosen": -53.994178771972656,
|
|
"logps/ref_rejected": -72.65962219238281,
|
|
"logps/rejected": -915.411865234375,
|
|
"loss": 1.2158,
|
|
"margin_dpo/margin_mean": 248.42079162597656,
|
|
"margin_dpo/margin_std": 551.939453125,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.723935389133627,
|
|
"fcm_dpo/beta": 0.0011761472560465336,
|
|
"fcm_dpo/delta": 0.08842451870441437,
|
|
"fcm_dpo/margin": 267.0850524902344,
|
|
"fcm_dpo/q_t": 0.426922470331192,
|
|
"grad_norm": 36.99184799194336,
|
|
"learning_rate": 1.0871589639435203e-07,
|
|
"logits/chosen": -0.9865261316299438,
|
|
"logits/rejected": -0.9545494318008423,
|
|
"logps/chosen": -706.1417236328125,
|
|
"logps/ref_chosen": -75.49723815917969,
|
|
"logps/ref_rejected": -87.32301330566406,
|
|
"logps/rejected": -985.0526123046875,
|
|
"loss": 1.1778,
|
|
"margin_dpo/margin_mean": 267.0850524902344,
|
|
"margin_dpo/margin_std": 489.79541015625,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7254038179148311,
|
|
"fcm_dpo/beta": 0.0011666524223983288,
|
|
"fcm_dpo/delta": -0.11960312724113464,
|
|
"fcm_dpo/margin": 440.0100402832031,
|
|
"fcm_dpo/q_t": 0.38171452283859253,
|
|
"grad_norm": 46.9616584777832,
|
|
"learning_rate": 1.0765901824467166e-07,
|
|
"logits/chosen": -0.8546612858772278,
|
|
"logits/rejected": -0.8873025178909302,
|
|
"logps/chosen": -538.9559326171875,
|
|
"logps/ref_chosen": -41.35926818847656,
|
|
"logps/ref_rejected": -86.09136962890625,
|
|
"logps/rejected": -1023.697998046875,
|
|
"loss": 1.0058,
|
|
"margin_dpo/margin_mean": 440.01007080078125,
|
|
"margin_dpo/margin_std": 467.8382263183594,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7268722466960352,
|
|
"fcm_dpo/beta": 0.0011541168205440044,
|
|
"fcm_dpo/delta": -0.04541929066181183,
|
|
"fcm_dpo/margin": 384.21527099609375,
|
|
"fcm_dpo/q_t": 0.40197789669036865,
|
|
"grad_norm": 42.29792022705078,
|
|
"learning_rate": 1.0660589091223854e-07,
|
|
"logits/chosen": -0.9579362869262695,
|
|
"logits/rejected": -0.9644915461540222,
|
|
"logps/chosen": -626.9910888671875,
|
|
"logps/ref_chosen": -63.53507995605469,
|
|
"logps/ref_rejected": -91.42443084716797,
|
|
"logps/rejected": -1039.095703125,
|
|
"loss": 1.0847,
|
|
"margin_dpo/margin_mean": 384.21527099609375,
|
|
"margin_dpo/margin_std": 566.3270874023438,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7283406754772394,
|
|
"fcm_dpo/beta": 0.0011813519522547722,
|
|
"fcm_dpo/delta": 0.176091730594635,
|
|
"fcm_dpo/margin": 192.8607177734375,
|
|
"fcm_dpo/q_t": 0.4472728371620178,
|
|
"grad_norm": 79.11801147460938,
|
|
"learning_rate": 1.0555654214793722e-07,
|
|
"logits/chosen": -0.9284073710441589,
|
|
"logits/rejected": -0.8919467926025391,
|
|
"logps/chosen": -776.5562744140625,
|
|
"logps/ref_chosen": -72.5919189453125,
|
|
"logps/ref_rejected": -84.32933807373047,
|
|
"logps/rejected": -981.1544189453125,
|
|
"loss": 1.2346,
|
|
"margin_dpo/margin_mean": 192.86068725585938,
|
|
"margin_dpo/margin_std": 430.8555908203125,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7298091042584435,
|
|
"fcm_dpo/beta": 0.001212080824188888,
|
|
"fcm_dpo/delta": 0.15942029654979706,
|
|
"fcm_dpo/margin": 201.83465576171875,
|
|
"fcm_dpo/q_t": 0.4441620111465454,
|
|
"grad_norm": 43.26478576660156,
|
|
"learning_rate": 1.0451099960308374e-07,
|
|
"logits/chosen": -0.8874781727790833,
|
|
"logits/rejected": -0.8688886165618896,
|
|
"logps/chosen": -716.3388671875,
|
|
"logps/ref_chosen": -58.59397506713867,
|
|
"logps/ref_rejected": -76.28836822509766,
|
|
"logps/rejected": -935.867919921875,
|
|
"loss": 1.2305,
|
|
"margin_dpo/margin_mean": 201.83465576171875,
|
|
"margin_dpo/margin_std": 438.799560546875,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7312775330396476,
|
|
"fcm_dpo/beta": 0.0012286882847547531,
|
|
"fcm_dpo/delta": 0.0005217818543314934,
|
|
"fcm_dpo/margin": 325.12445068359375,
|
|
"fcm_dpo/q_t": 0.40874072909355164,
|
|
"grad_norm": 41.114078521728516,
|
|
"learning_rate": 1.0346929082869641e-07,
|
|
"logits/chosen": -0.8940218687057495,
|
|
"logits/rejected": -0.8773443698883057,
|
|
"logps/chosen": -690.500732421875,
|
|
"logps/ref_chosen": -71.20565795898438,
|
|
"logps/ref_rejected": -83.95803833007812,
|
|
"logps/rejected": -1028.3775634765625,
|
|
"loss": 1.1315,
|
|
"margin_dpo/margin_mean": 325.12445068359375,
|
|
"margin_dpo/margin_std": 548.890380859375,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7327459618208517,
|
|
"fcm_dpo/beta": 0.0012098584556952119,
|
|
"fcm_dpo/delta": -0.1002291664481163,
|
|
"fcm_dpo/margin": 409.18572998046875,
|
|
"fcm_dpo/q_t": 0.38879793882369995,
|
|
"grad_norm": 43.56269454956055,
|
|
"learning_rate": 1.0243144327477013e-07,
|
|
"logits/chosen": -0.9197027683258057,
|
|
"logits/rejected": -0.9493994116783142,
|
|
"logps/chosen": -586.4039306640625,
|
|
"logps/ref_chosen": -51.25519561767578,
|
|
"logps/ref_rejected": -101.07870483398438,
|
|
"logps/rejected": -1045.4130859375,
|
|
"loss": 1.0504,
|
|
"margin_dpo/margin_mean": 409.18572998046875,
|
|
"margin_dpo/margin_std": 551.0729370117188,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7342143906020558,
|
|
"fcm_dpo/beta": 0.001203530584461987,
|
|
"fcm_dpo/delta": -0.00410887785255909,
|
|
"fcm_dpo/margin": 335.60675048828125,
|
|
"fcm_dpo/q_t": 0.4080291986465454,
|
|
"grad_norm": 34.33218765258789,
|
|
"learning_rate": 1.0139748428955333e-07,
|
|
"logits/chosen": -0.8823539018630981,
|
|
"logits/rejected": -0.9056754112243652,
|
|
"logps/chosen": -684.366455078125,
|
|
"logps/ref_chosen": -57.027442932128906,
|
|
"logps/ref_rejected": -93.93421173095703,
|
|
"logps/rejected": -1056.8798828125,
|
|
"loss": 1.116,
|
|
"margin_dpo/margin_mean": 335.6067810058594,
|
|
"margin_dpo/margin_std": 522.2293701171875,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.73568281938326,
|
|
"fcm_dpo/beta": 0.0011992482468485832,
|
|
"fcm_dpo/delta": -0.04197482019662857,
|
|
"fcm_dpo/margin": 367.0113525390625,
|
|
"fcm_dpo/q_t": 0.4015328288078308,
|
|
"grad_norm": 40.373878479003906,
|
|
"learning_rate": 1.0036744111882672e-07,
|
|
"logits/chosen": -0.8434513807296753,
|
|
"logits/rejected": -0.8268097639083862,
|
|
"logps/chosen": -608.291748046875,
|
|
"logps/ref_chosen": -54.359527587890625,
|
|
"logps/ref_rejected": -80.15670013427734,
|
|
"logps/rejected": -1001.100341796875,
|
|
"loss": 1.0956,
|
|
"margin_dpo/margin_mean": 367.0113830566406,
|
|
"margin_dpo/margin_std": 555.9649658203125,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.737151248164464,
|
|
"fcm_dpo/beta": 0.001190928858704865,
|
|
"fcm_dpo/delta": -0.029523320496082306,
|
|
"fcm_dpo/margin": 359.60321044921875,
|
|
"fcm_dpo/q_t": 0.4018331468105316,
|
|
"grad_norm": 32.51664733886719,
|
|
"learning_rate": 9.934134090518592e-08,
|
|
"logits/chosen": -0.7909951210021973,
|
|
"logits/rejected": -0.7704396843910217,
|
|
"logps/chosen": -573.842041015625,
|
|
"logps/ref_chosen": -67.60050964355469,
|
|
"logps/ref_rejected": -82.94876098632812,
|
|
"logps/rejected": -948.7935180664062,
|
|
"loss": 1.0626,
|
|
"margin_dpo/margin_mean": 359.60321044921875,
|
|
"margin_dpo/margin_std": 427.0804443359375,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7386196769456681,
|
|
"fcm_dpo/beta": 0.001186006236821413,
|
|
"fcm_dpo/delta": 0.008181419223546982,
|
|
"fcm_dpo/margin": 330.5501708984375,
|
|
"fcm_dpo/q_t": 0.4111096262931824,
|
|
"grad_norm": 25.633560180664062,
|
|
"learning_rate": 9.831921068732571e-08,
|
|
"logits/chosen": -0.7634469270706177,
|
|
"logits/rejected": -0.7438396215438843,
|
|
"logps/chosen": -551.8092651367188,
|
|
"logps/ref_chosen": -55.078407287597656,
|
|
"logps/ref_rejected": -82.50544738769531,
|
|
"logps/rejected": -909.7864990234375,
|
|
"loss": 1.0933,
|
|
"margin_dpo/margin_mean": 330.5502014160156,
|
|
"margin_dpo/margin_std": 435.82733154296875,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7400881057268722,
|
|
"fcm_dpo/beta": 0.0011800960637629032,
|
|
"fcm_dpo/delta": -0.064161516726017,
|
|
"fcm_dpo/margin": 390.8580322265625,
|
|
"fcm_dpo/q_t": 0.39606422185897827,
|
|
"grad_norm": 30.177688598632812,
|
|
"learning_rate": 9.730107739932805e-08,
|
|
"logits/chosen": -0.8217068314552307,
|
|
"logits/rejected": -0.8377784490585327,
|
|
"logps/chosen": -611.22216796875,
|
|
"logps/ref_chosen": -59.96575164794922,
|
|
"logps/ref_rejected": -103.76212310791016,
|
|
"logps/rejected": -1045.8765869140625,
|
|
"loss": 1.0653,
|
|
"margin_dpo/margin_mean": 390.8580017089844,
|
|
"margin_dpo/margin_std": 516.3780517578125,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.7415565345080763,
|
|
"fcm_dpo/beta": 0.0012019076384603977,
|
|
"fcm_dpo/delta": 0.14581286907196045,
|
|
"fcm_dpo/margin": 214.4113006591797,
|
|
"fcm_dpo/q_t": 0.4408836364746094,
|
|
"grad_norm": 37.63759231567383,
|
|
"learning_rate": 9.628696786995188e-08,
|
|
"logits/chosen": -0.852393627166748,
|
|
"logits/rejected": -0.8234021663665771,
|
|
"logps/chosen": -684.0704345703125,
|
|
"logps/ref_chosen": -76.1549072265625,
|
|
"logps/ref_rejected": -88.58537292480469,
|
|
"logps/rejected": -910.912109375,
|
|
"loss": 1.2104,
|
|
"margin_dpo/margin_mean": 214.41128540039062,
|
|
"margin_dpo/margin_std": 429.28167724609375,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.7430249632892805,
|
|
"fcm_dpo/beta": 0.001200593076646328,
|
|
"fcm_dpo/delta": -0.024187199771404266,
|
|
"fcm_dpo/margin": 352.2035217285156,
|
|
"fcm_dpo/q_t": 0.40292614698410034,
|
|
"grad_norm": 40.91268539428711,
|
|
"learning_rate": 9.527690882192635e-08,
|
|
"logits/chosen": -0.8200086355209351,
|
|
"logits/rejected": -0.8278741836547852,
|
|
"logps/chosen": -518.744873046875,
|
|
"logps/ref_chosen": -48.96050262451172,
|
|
"logps/ref_rejected": -78.41505432128906,
|
|
"logps/rejected": -900.4028930664062,
|
|
"loss": 1.0806,
|
|
"margin_dpo/margin_mean": 352.2035217285156,
|
|
"margin_dpo/margin_std": 464.32720947265625,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7444933920704846,
|
|
"fcm_dpo/beta": 0.0012085672933608294,
|
|
"fcm_dpo/delta": 0.0354694128036499,
|
|
"fcm_dpo/margin": 302.70806884765625,
|
|
"fcm_dpo/q_t": 0.42136380076408386,
|
|
"grad_norm": 32.80398178100586,
|
|
"learning_rate": 9.427092687124691e-08,
|
|
"logits/chosen": -0.8519065380096436,
|
|
"logits/rejected": -0.8482725620269775,
|
|
"logps/chosen": -630.7051391601562,
|
|
"logps/ref_chosen": -66.80149841308594,
|
|
"logps/ref_rejected": -95.37289428710938,
|
|
"logps/rejected": -961.984619140625,
|
|
"loss": 1.1537,
|
|
"margin_dpo/margin_mean": 302.70806884765625,
|
|
"margin_dpo/margin_std": 559.1907958984375,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7459618208516887,
|
|
"fcm_dpo/beta": 0.001229484099894762,
|
|
"fcm_dpo/delta": 0.0795753076672554,
|
|
"fcm_dpo/margin": 262.43280029296875,
|
|
"fcm_dpo/q_t": 0.42874249815940857,
|
|
"grad_norm": 36.952003479003906,
|
|
"learning_rate": 9.326904852647344e-08,
|
|
"logits/chosen": -0.8085677623748779,
|
|
"logits/rejected": -0.8026360273361206,
|
|
"logps/chosen": -644.8947143554688,
|
|
"logps/ref_chosen": -71.303466796875,
|
|
"logps/ref_rejected": -95.6275405883789,
|
|
"logps/rejected": -931.651611328125,
|
|
"loss": 1.192,
|
|
"margin_dpo/margin_mean": 262.4328308105469,
|
|
"margin_dpo/margin_std": 540.2072143554688,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7474302496328928,
|
|
"fcm_dpo/beta": 0.0012463298626244068,
|
|
"fcm_dpo/delta": 0.04969964176416397,
|
|
"fcm_dpo/margin": 281.92559814453125,
|
|
"fcm_dpo/q_t": 0.4197409749031067,
|
|
"grad_norm": 35.611900329589844,
|
|
"learning_rate": 9.227130018803195e-08,
|
|
"logits/chosen": -0.7330523729324341,
|
|
"logits/rejected": -0.7244545221328735,
|
|
"logps/chosen": -496.95751953125,
|
|
"logps/ref_chosen": -63.81895065307617,
|
|
"logps/ref_rejected": -83.25643920898438,
|
|
"logps/rejected": -798.3206176757812,
|
|
"loss": 1.132,
|
|
"margin_dpo/margin_mean": 281.92559814453125,
|
|
"margin_dpo/margin_std": 420.67022705078125,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.748898678414097,
|
|
"fcm_dpo/beta": 0.001235937001183629,
|
|
"fcm_dpo/delta": -0.050036292523145676,
|
|
"fcm_dpo/margin": 362.3361511230469,
|
|
"fcm_dpo/q_t": 0.39491382241249084,
|
|
"grad_norm": 36.38752746582031,
|
|
"learning_rate": 9.127770814751932e-08,
|
|
"logits/chosen": -0.7187889218330383,
|
|
"logits/rejected": -0.7337198257446289,
|
|
"logps/chosen": -575.311279296875,
|
|
"logps/ref_chosen": -51.878448486328125,
|
|
"logps/ref_rejected": -102.7651596069336,
|
|
"logps/rejected": -988.5341186523438,
|
|
"loss": 1.0406,
|
|
"margin_dpo/margin_mean": 362.33612060546875,
|
|
"margin_dpo/margin_std": 382.94219970703125,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.750367107195301,
|
|
"fcm_dpo/beta": 0.0012384748551994562,
|
|
"fcm_dpo/delta": 0.0352516807615757,
|
|
"fcm_dpo/margin": 295.53057861328125,
|
|
"fcm_dpo/q_t": 0.4174777865409851,
|
|
"grad_norm": 43.398075103759766,
|
|
"learning_rate": 9.028829858700973e-08,
|
|
"logits/chosen": -0.7895054817199707,
|
|
"logits/rejected": -0.7914731502532959,
|
|
"logps/chosen": -560.07421875,
|
|
"logps/ref_chosen": -60.23811721801758,
|
|
"logps/ref_rejected": -92.85676574707031,
|
|
"logps/rejected": -888.2234497070312,
|
|
"loss": 1.153,
|
|
"margin_dpo/margin_mean": 295.53057861328125,
|
|
"margin_dpo/margin_std": 535.6825561523438,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7518355359765051,
|
|
"fcm_dpo/beta": 0.0012218713527545333,
|
|
"fcm_dpo/delta": -0.08188765496015549,
|
|
"fcm_dpo/margin": 390.9322509765625,
|
|
"fcm_dpo/q_t": 0.3888099193572998,
|
|
"grad_norm": 54.90243911743164,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": -0.7493938207626343,
|
|
"logits/rejected": -0.7599710822105408,
|
|
"logps/chosen": -444.6104736328125,
|
|
"logps/ref_chosen": -54.905494689941406,
|
|
"logps/ref_rejected": -81.87586975097656,
|
|
"logps/rejected": -862.5130615234375,
|
|
"loss": 1.0207,
|
|
"margin_dpo/margin_mean": 390.9322509765625,
|
|
"margin_dpo/margin_std": 403.09393310546875,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7533039647577092,
|
|
"fcm_dpo/beta": 0.0012233736924827099,
|
|
"fcm_dpo/delta": 0.05911244451999664,
|
|
"fcm_dpo/margin": 280.02032470703125,
|
|
"fcm_dpo/q_t": 0.4215858578681946,
|
|
"grad_norm": 46.705345153808594,
|
|
"learning_rate": 8.832213108254863e-08,
|
|
"logits/chosen": -0.8222278356552124,
|
|
"logits/rejected": -0.8028245568275452,
|
|
"logps/chosen": -562.989013671875,
|
|
"logps/ref_chosen": -64.91644287109375,
|
|
"logps/ref_rejected": -76.06245422363281,
|
|
"logps/rejected": -854.1553955078125,
|
|
"loss": 1.1409,
|
|
"margin_dpo/margin_mean": 280.02032470703125,
|
|
"margin_dpo/margin_std": 426.9283447265625,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7547723935389133,
|
|
"fcm_dpo/beta": 0.0012440317077562213,
|
|
"fcm_dpo/delta": 0.044145580381155014,
|
|
"fcm_dpo/margin": 287.24462890625,
|
|
"fcm_dpo/q_t": 0.4205576777458191,
|
|
"grad_norm": 33.267601013183594,
|
|
"learning_rate": 8.734542494893954e-08,
|
|
"logits/chosen": -0.76353919506073,
|
|
"logits/rejected": -0.7479803562164307,
|
|
"logps/chosen": -612.5877685546875,
|
|
"logps/ref_chosen": -74.22957611083984,
|
|
"logps/ref_rejected": -78.945556640625,
|
|
"logps/rejected": -904.54833984375,
|
|
"loss": 1.1419,
|
|
"margin_dpo/margin_mean": 287.24462890625,
|
|
"margin_dpo/margin_std": 480.07623291015625,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7562408223201175,
|
|
"fcm_dpo/beta": 0.0012643520021811128,
|
|
"fcm_dpo/delta": 0.10326212644577026,
|
|
"fcm_dpo/margin": 237.156005859375,
|
|
"fcm_dpo/q_t": 0.42959409952163696,
|
|
"grad_norm": 43.65932083129883,
|
|
"learning_rate": 8.637300491465272e-08,
|
|
"logits/chosen": -0.7629779577255249,
|
|
"logits/rejected": -0.771415114402771,
|
|
"logps/chosen": -499.5674743652344,
|
|
"logps/ref_chosen": -50.40156555175781,
|
|
"logps/ref_rejected": -87.09774780273438,
|
|
"logps/rejected": -773.419677734375,
|
|
"loss": 1.1742,
|
|
"margin_dpo/margin_mean": 237.156005859375,
|
|
"margin_dpo/margin_std": 419.44488525390625,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7577092511013216,
|
|
"fcm_dpo/beta": 0.0012712322641164064,
|
|
"fcm_dpo/delta": -0.02079898491501808,
|
|
"fcm_dpo/margin": 330.2271728515625,
|
|
"fcm_dpo/q_t": 0.4013640880584717,
|
|
"grad_norm": 44.21212387084961,
|
|
"learning_rate": 8.540489660386064e-08,
|
|
"logits/chosen": -0.8088574409484863,
|
|
"logits/rejected": -0.8313539028167725,
|
|
"logps/chosen": -550.5634765625,
|
|
"logps/ref_chosen": -64.64956665039062,
|
|
"logps/ref_rejected": -111.72237396240234,
|
|
"logps/rejected": -927.8634033203125,
|
|
"loss": 1.0712,
|
|
"margin_dpo/margin_mean": 330.2271728515625,
|
|
"margin_dpo/margin_std": 396.8078918457031,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.7591776798825257,
|
|
"fcm_dpo/beta": 0.0012457960983738303,
|
|
"fcm_dpo/delta": -0.09264262765645981,
|
|
"fcm_dpo/margin": 391.58154296875,
|
|
"fcm_dpo/q_t": 0.3908216953277588,
|
|
"grad_norm": 32.42768478393555,
|
|
"learning_rate": 8.444112552711752e-08,
|
|
"logits/chosen": -0.7810485363006592,
|
|
"logits/rejected": -0.77211594581604,
|
|
"logps/chosen": -568.1607666015625,
|
|
"logps/ref_chosen": -60.913551330566406,
|
|
"logps/ref_rejected": -89.08308410644531,
|
|
"logps/rejected": -987.911865234375,
|
|
"loss": 1.041,
|
|
"margin_dpo/margin_mean": 391.5815734863281,
|
|
"margin_dpo/margin_std": 495.0039367675781,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.7606461086637298,
|
|
"fcm_dpo/beta": 0.0012362590059638023,
|
|
"fcm_dpo/delta": -0.010039325803518295,
|
|
"fcm_dpo/margin": 331.1033935546875,
|
|
"fcm_dpo/q_t": 0.40492960810661316,
|
|
"grad_norm": 51.907432556152344,
|
|
"learning_rate": 8.348171708068747e-08,
|
|
"logits/chosen": -0.8003250360488892,
|
|
"logits/rejected": -0.8121221661567688,
|
|
"logps/chosen": -537.2240600585938,
|
|
"logps/ref_chosen": -57.45589065551758,
|
|
"logps/ref_rejected": -85.31269836425781,
|
|
"logps/rejected": -896.184326171875,
|
|
"loss": 1.082,
|
|
"margin_dpo/margin_mean": 331.1033935546875,
|
|
"margin_dpo/margin_std": 417.5572509765625,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.762114537444934,
|
|
"fcm_dpo/beta": 0.0012659772764891386,
|
|
"fcm_dpo/delta": 0.11485999822616577,
|
|
"fcm_dpo/margin": 227.35385131835938,
|
|
"fcm_dpo/q_t": 0.43359264731407166,
|
|
"grad_norm": 34.693687438964844,
|
|
"learning_rate": 8.25266965458755e-08,
|
|
"logits/chosen": -0.8020866513252258,
|
|
"logits/rejected": -0.7832698822021484,
|
|
"logps/chosen": -571.0335693359375,
|
|
"logps/ref_chosen": -74.06331634521484,
|
|
"logps/ref_rejected": -104.44416809082031,
|
|
"logps/rejected": -828.768310546875,
|
|
"loss": 1.1924,
|
|
"margin_dpo/margin_mean": 227.35385131835938,
|
|
"margin_dpo/margin_std": 434.76171875,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7635829662261381,
|
|
"fcm_dpo/beta": 0.0012666715774685144,
|
|
"fcm_dpo/delta": 0.012290934100747108,
|
|
"fcm_dpo/margin": 306.2880859375,
|
|
"fcm_dpo/q_t": 0.41312289237976074,
|
|
"grad_norm": 41.684967041015625,
|
|
"learning_rate": 8.15760890883607e-08,
|
|
"logits/chosen": -0.7368162870407104,
|
|
"logits/rejected": -0.7360740900039673,
|
|
"logps/chosen": -598.6595458984375,
|
|
"logps/ref_chosen": -70.2998275756836,
|
|
"logps/ref_rejected": -99.98133850097656,
|
|
"logps/rejected": -934.629150390625,
|
|
"loss": 1.115,
|
|
"margin_dpo/margin_mean": 306.2880859375,
|
|
"margin_dpo/margin_std": 450.21893310546875,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7650513950073421,
|
|
"fcm_dpo/beta": 0.001273997942917049,
|
|
"fcm_dpo/delta": -0.03606198728084564,
|
|
"fcm_dpo/margin": 340.52056884765625,
|
|
"fcm_dpo/q_t": 0.4016761779785156,
|
|
"grad_norm": 35.294342041015625,
|
|
"learning_rate": 8.062991975753378e-08,
|
|
"logits/chosen": -0.8503248691558838,
|
|
"logits/rejected": -0.8509985208511353,
|
|
"logps/chosen": -542.428955078125,
|
|
"logps/ref_chosen": -58.14292526245117,
|
|
"logps/ref_rejected": -83.28060913085938,
|
|
"logps/rejected": -908.0872192382812,
|
|
"loss": 1.0758,
|
|
"margin_dpo/margin_mean": 340.5205383300781,
|
|
"margin_dpo/margin_std": 442.4564208984375,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7665198237885462,
|
|
"fcm_dpo/beta": 0.0012649366399273276,
|
|
"fcm_dpo/delta": 0.010182084515690804,
|
|
"fcm_dpo/margin": 308.485595703125,
|
|
"fcm_dpo/q_t": 0.41139504313468933,
|
|
"grad_norm": 30.87180519104004,
|
|
"learning_rate": 7.968821348583643e-08,
|
|
"logits/chosen": -0.854311466217041,
|
|
"logits/rejected": -0.8545736074447632,
|
|
"logps/chosen": -605.51220703125,
|
|
"logps/ref_chosen": -46.54766845703125,
|
|
"logps/ref_rejected": -66.01388549804688,
|
|
"logps/rejected": -933.4640502929688,
|
|
"loss": 1.1183,
|
|
"margin_dpo/margin_mean": 308.485595703125,
|
|
"margin_dpo/margin_std": 476.64263916015625,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7679882525697503,
|
|
"fcm_dpo/beta": 0.001264127902686596,
|
|
"fcm_dpo/delta": -0.007558091077953577,
|
|
"fcm_dpo/margin": 322.1591491699219,
|
|
"fcm_dpo/q_t": 0.41010820865631104,
|
|
"grad_norm": 41.77898406982422,
|
|
"learning_rate": 7.875099508810484e-08,
|
|
"logits/chosen": -0.9136984348297119,
|
|
"logits/rejected": -0.9157658815383911,
|
|
"logps/chosen": -658.3345947265625,
|
|
"logps/ref_chosen": -61.76960372924805,
|
|
"logps/ref_rejected": -83.76141357421875,
|
|
"logps/rejected": -1002.485595703125,
|
|
"loss": 1.1291,
|
|
"margin_dpo/margin_mean": 322.1591796875,
|
|
"margin_dpo/margin_std": 548.837158203125,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7694566813509545,
|
|
"fcm_dpo/beta": 0.001250438392162323,
|
|
"fcm_dpo/delta": -0.03454245999455452,
|
|
"fcm_dpo/margin": 345.728271484375,
|
|
"fcm_dpo/q_t": 0.39945122599601746,
|
|
"grad_norm": 37.57605743408203,
|
|
"learning_rate": 7.781828926091535e-08,
|
|
"logits/chosen": -0.9349164366722107,
|
|
"logits/rejected": -0.9216375350952148,
|
|
"logps/chosen": -658.25390625,
|
|
"logps/ref_chosen": -78.0720443725586,
|
|
"logps/ref_rejected": -81.30198669433594,
|
|
"logps/rejected": -1007.212158203125,
|
|
"loss": 1.0951,
|
|
"margin_dpo/margin_mean": 345.728271484375,
|
|
"margin_dpo/margin_std": 496.82781982421875,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7709251101321586,
|
|
"fcm_dpo/beta": 0.0012223366647958755,
|
|
"fcm_dpo/delta": -0.14574724435806274,
|
|
"fcm_dpo/margin": 439.2078857421875,
|
|
"fcm_dpo/q_t": 0.38136690855026245,
|
|
"grad_norm": 29.998579025268555,
|
|
"learning_rate": 7.689012058193384e-08,
|
|
"logits/chosen": -0.8645678162574768,
|
|
"logits/rejected": -0.9013247489929199,
|
|
"logps/chosen": -630.4777221679688,
|
|
"logps/ref_chosen": -50.827857971191406,
|
|
"logps/ref_rejected": -100.05294036865234,
|
|
"logps/rejected": -1118.91064453125,
|
|
"loss": 1.0154,
|
|
"margin_dpo/margin_mean": 439.2078857421875,
|
|
"margin_dpo/margin_std": 538.682373046875,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7723935389133627,
|
|
"fcm_dpo/beta": 0.0012013528030365705,
|
|
"fcm_dpo/delta": -0.12226028740406036,
|
|
"fcm_dpo/margin": 429.6107177734375,
|
|
"fcm_dpo/q_t": 0.38352981209754944,
|
|
"grad_norm": 31.9099178314209,
|
|
"learning_rate": 7.596651350926836e-08,
|
|
"logits/chosen": -0.8861783146858215,
|
|
"logits/rejected": -0.881539523601532,
|
|
"logps/chosen": -684.8582153320312,
|
|
"logps/ref_chosen": -63.167236328125,
|
|
"logps/ref_rejected": -86.30934143066406,
|
|
"logps/rejected": -1137.611083984375,
|
|
"loss": 1.048,
|
|
"margin_dpo/margin_mean": 429.6107177734375,
|
|
"margin_dpo/margin_std": 579.431396484375,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7738619676945668,
|
|
"fcm_dpo/beta": 0.0011940683471038938,
|
|
"fcm_dpo/delta": 0.04404643923044205,
|
|
"fcm_dpo/margin": 299.3970947265625,
|
|
"fcm_dpo/q_t": 0.41738709807395935,
|
|
"grad_norm": 34.606040954589844,
|
|
"learning_rate": 7.504749238082414e-08,
|
|
"logits/chosen": -1.0818817615509033,
|
|
"logits/rejected": -1.0509649515151978,
|
|
"logps/chosen": -709.3896484375,
|
|
"logps/ref_chosen": -71.12867736816406,
|
|
"logps/ref_rejected": -78.3425521850586,
|
|
"logps/rejected": -1016.0006713867188,
|
|
"loss": 1.1256,
|
|
"margin_dpo/margin_mean": 299.3970947265625,
|
|
"margin_dpo/margin_std": 437.45355224609375,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.775330396475771,
|
|
"fcm_dpo/beta": 0.001194945303723216,
|
|
"fcm_dpo/delta": -0.01856505125761032,
|
|
"fcm_dpo/margin": 349.5766906738281,
|
|
"fcm_dpo/q_t": 0.40821221470832825,
|
|
"grad_norm": 51.902496337890625,
|
|
"learning_rate": 7.413308141366254e-08,
|
|
"logits/chosen": -0.9746694564819336,
|
|
"logits/rejected": -0.9602982997894287,
|
|
"logps/chosen": -706.31396484375,
|
|
"logps/ref_chosen": -68.0894546508789,
|
|
"logps/ref_rejected": -93.91006469726562,
|
|
"logps/rejected": -1081.711181640625,
|
|
"loss": 1.1192,
|
|
"margin_dpo/margin_mean": 349.5766906738281,
|
|
"margin_dpo/margin_std": 573.882080078125,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.7767988252569751,
|
|
"fcm_dpo/beta": 0.0012100792955607176,
|
|
"fcm_dpo/delta": 0.10508871078491211,
|
|
"fcm_dpo/margin": 246.39369201660156,
|
|
"fcm_dpo/q_t": 0.4327790141105652,
|
|
"grad_norm": 48.65058898925781,
|
|
"learning_rate": 7.322330470336313e-08,
|
|
"logits/chosen": -0.996649980545044,
|
|
"logits/rejected": -1.008021593093872,
|
|
"logps/chosen": -809.325927734375,
|
|
"logps/ref_chosen": -55.57495880126953,
|
|
"logps/ref_rejected": -89.20909118652344,
|
|
"logps/rejected": -1089.353759765625,
|
|
"loss": 1.2287,
|
|
"margin_dpo/margin_mean": 246.39369201660156,
|
|
"margin_dpo/margin_std": 580.08642578125,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.7782672540381792,
|
|
"fcm_dpo/beta": 0.0012070810189470649,
|
|
"fcm_dpo/delta": -0.08302216976881027,
|
|
"fcm_dpo/margin": 396.90985107421875,
|
|
"fcm_dpo/q_t": 0.3978345990180969,
|
|
"grad_norm": 57.351463317871094,
|
|
"learning_rate": 7.231818622338822e-08,
|
|
"logits/chosen": -0.880218505859375,
|
|
"logits/rejected": -0.8767000436782837,
|
|
"logps/chosen": -690.0313720703125,
|
|
"logps/ref_chosen": -47.601417541503906,
|
|
"logps/ref_rejected": -87.2845230102539,
|
|
"logps/rejected": -1126.624267578125,
|
|
"loss": 1.1142,
|
|
"margin_dpo/margin_mean": 396.90985107421875,
|
|
"margin_dpo/margin_std": 693.56298828125,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.7797356828193832,
|
|
"fcm_dpo/beta": 0.0011991492938250303,
|
|
"fcm_dpo/delta": -0.024136528372764587,
|
|
"fcm_dpo/margin": 352.80865478515625,
|
|
"fcm_dpo/q_t": 0.40578585863113403,
|
|
"grad_norm": 44.2398681640625,
|
|
"learning_rate": 7.141774982445147e-08,
|
|
"logits/chosen": -1.0027759075164795,
|
|
"logits/rejected": -0.9875552654266357,
|
|
"logps/chosen": -751.2490234375,
|
|
"logps/ref_chosen": -55.246063232421875,
|
|
"logps/ref_rejected": -70.60598754882812,
|
|
"logps/rejected": -1119.4176025390625,
|
|
"loss": 1.1017,
|
|
"margin_dpo/margin_mean": 352.80865478515625,
|
|
"margin_dpo/margin_std": 537.1859741210938,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.7812041116005873,
|
|
"fcm_dpo/beta": 0.0011825578985735774,
|
|
"fcm_dpo/delta": -0.022645261138677597,
|
|
"fcm_dpo/margin": 355.6239318847656,
|
|
"fcm_dpo/q_t": 0.40732958912849426,
|
|
"grad_norm": 74.50859832763672,
|
|
"learning_rate": 7.052201923388953e-08,
|
|
"logits/chosen": -0.9509673714637756,
|
|
"logits/rejected": -0.9269773960113525,
|
|
"logps/chosen": -783.3470458984375,
|
|
"logps/ref_chosen": -70.28601837158203,
|
|
"logps/ref_rejected": -86.5913314819336,
|
|
"logps/rejected": -1155.2763671875,
|
|
"loss": 1.1382,
|
|
"margin_dpo/margin_mean": 355.62396240234375,
|
|
"margin_dpo/margin_std": 624.2130737304688,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.7826725403817915,
|
|
"fcm_dpo/beta": 0.001213046140037477,
|
|
"fcm_dpo/delta": 0.09190007299184799,
|
|
"fcm_dpo/margin": 255.36468505859375,
|
|
"fcm_dpo/q_t": 0.4308984875679016,
|
|
"grad_norm": 47.32761764526367,
|
|
"learning_rate": 6.963101805503646e-08,
|
|
"logits/chosen": -0.9791627526283264,
|
|
"logits/rejected": -0.9598466753959656,
|
|
"logps/chosen": -701.266357421875,
|
|
"logps/ref_chosen": -64.8551025390625,
|
|
"logps/ref_rejected": -76.58805847167969,
|
|
"logps/rejected": -968.364013671875,
|
|
"loss": 1.2054,
|
|
"margin_dpo/margin_mean": 255.3646697998047,
|
|
"margin_dpo/margin_std": 552.512451171875,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.7841409691629956,
|
|
"fcm_dpo/beta": 0.001198928919620812,
|
|
"fcm_dpo/delta": -0.02406427264213562,
|
|
"fcm_dpo/margin": 351.9185485839844,
|
|
"fcm_dpo/q_t": 0.40505561232566833,
|
|
"grad_norm": 48.15425491333008,
|
|
"learning_rate": 6.874476976660184e-08,
|
|
"logits/chosen": -0.9442458152770996,
|
|
"logits/rejected": -0.9411351680755615,
|
|
"logps/chosen": -707.2823486328125,
|
|
"logps/ref_chosen": -60.119388580322266,
|
|
"logps/ref_rejected": -78.54347229003906,
|
|
"logps/rejected": -1077.625,
|
|
"loss": 1.0963,
|
|
"margin_dpo/margin_mean": 351.9185791015625,
|
|
"margin_dpo/margin_std": 504.8380126953125,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.7856093979441997,
|
|
"fcm_dpo/beta": 0.0011966589372605085,
|
|
"fcm_dpo/delta": -0.0962173268198967,
|
|
"fcm_dpo/margin": 410.5157165527344,
|
|
"fcm_dpo/q_t": 0.390081524848938,
|
|
"grad_norm": 31.493331909179688,
|
|
"learning_rate": 6.786329772205246e-08,
|
|
"logits/chosen": -0.8576223850250244,
|
|
"logits/rejected": -0.8609852194786072,
|
|
"logps/chosen": -605.9679565429688,
|
|
"logps/ref_chosen": -54.330238342285156,
|
|
"logps/ref_rejected": -96.30763244628906,
|
|
"logps/rejected": -1058.4610595703125,
|
|
"loss": 1.0442,
|
|
"margin_dpo/margin_mean": 410.51568603515625,
|
|
"margin_dpo/margin_std": 513.386962890625,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.7870778267254038,
|
|
"fcm_dpo/beta": 0.001155639300122857,
|
|
"fcm_dpo/delta": -0.15889059007167816,
|
|
"fcm_dpo/margin": 476.0905456542969,
|
|
"fcm_dpo/q_t": 0.38305604457855225,
|
|
"grad_norm": 43.20456314086914,
|
|
"learning_rate": 6.698662514899638e-08,
|
|
"logits/chosen": -0.833030104637146,
|
|
"logits/rejected": -0.8629981279373169,
|
|
"logps/chosen": -540.8412475585938,
|
|
"logps/ref_chosen": -47.08053207397461,
|
|
"logps/ref_rejected": -89.09783935546875,
|
|
"logps/rejected": -1058.9490966796875,
|
|
"loss": 1.0253,
|
|
"margin_dpo/margin_mean": 476.09051513671875,
|
|
"margin_dpo/margin_std": 665.188232421875,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.788546255506608,
|
|
"fcm_dpo/beta": 0.0011549813207238913,
|
|
"fcm_dpo/delta": 0.018105141818523407,
|
|
"fcm_dpo/margin": 330.7102355957031,
|
|
"fcm_dpo/q_t": 0.4120485186576843,
|
|
"grad_norm": 46.58029556274414,
|
|
"learning_rate": 6.611477514857114e-08,
|
|
"logits/chosen": -0.8844733238220215,
|
|
"logits/rejected": -0.8643313646316528,
|
|
"logps/chosen": -571.9066162109375,
|
|
"logps/ref_chosen": -57.747467041015625,
|
|
"logps/ref_rejected": -70.43838500976562,
|
|
"logps/rejected": -915.3078002929688,
|
|
"loss": 1.1346,
|
|
"margin_dpo/margin_mean": 330.7102355957031,
|
|
"margin_dpo/margin_std": 539.7521362304688,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.7900146842878121,
|
|
"fcm_dpo/beta": 0.001142657594755292,
|
|
"fcm_dpo/delta": -0.025782715529203415,
|
|
"fcm_dpo/margin": 371.4954833984375,
|
|
"fcm_dpo/q_t": 0.40331846475601196,
|
|
"grad_norm": 30.739717483520508,
|
|
"learning_rate": 6.524777069483525e-08,
|
|
"logits/chosen": -0.902673065662384,
|
|
"logits/rejected": -0.8884932994842529,
|
|
"logps/chosen": -698.6116943359375,
|
|
"logps/ref_chosen": -66.41594696044922,
|
|
"logps/ref_rejected": -84.22808837890625,
|
|
"logps/rejected": -1087.919189453125,
|
|
"loss": 1.0796,
|
|
"margin_dpo/margin_mean": 371.4954833984375,
|
|
"margin_dpo/margin_std": 494.69195556640625,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.7914831130690162,
|
|
"fcm_dpo/beta": 0.0011477393563836813,
|
|
"fcm_dpo/delta": 0.007408445701003075,
|
|
"fcm_dpo/margin": 342.22515869140625,
|
|
"fcm_dpo/q_t": 0.40988558530807495,
|
|
"grad_norm": 47.36545944213867,
|
|
"learning_rate": 6.438563463416221e-08,
|
|
"logits/chosen": -0.9254465103149414,
|
|
"logits/rejected": -0.9171432256698608,
|
|
"logps/chosen": -587.165771484375,
|
|
"logps/ref_chosen": -58.492855072021484,
|
|
"logps/ref_rejected": -91.85395050048828,
|
|
"logps/rejected": -962.7520141601562,
|
|
"loss": 1.0922,
|
|
"margin_dpo/margin_mean": 342.22515869140625,
|
|
"margin_dpo/margin_std": 441.59429931640625,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.7929515418502202,
|
|
"fcm_dpo/beta": 0.0011303776409476995,
|
|
"fcm_dpo/delta": -0.10793224722146988,
|
|
"fcm_dpo/margin": 444.6667785644531,
|
|
"fcm_dpo/q_t": 0.3898891806602478,
|
|
"grad_norm": 39.17680740356445,
|
|
"learning_rate": 6.352838968463919e-08,
|
|
"logits/chosen": -0.8121837377548218,
|
|
"logits/rejected": -0.8325707912445068,
|
|
"logps/chosen": -587.6639404296875,
|
|
"logps/ref_chosen": -63.482513427734375,
|
|
"logps/ref_rejected": -116.42999267578125,
|
|
"logps/rejected": -1085.2781982421875,
|
|
"loss": 1.0491,
|
|
"margin_dpo/margin_mean": 444.666748046875,
|
|
"margin_dpo/margin_std": 597.3258056640625,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.7944199706314243,
|
|
"fcm_dpo/beta": 0.0011378416093066335,
|
|
"fcm_dpo/delta": 0.13771183788776398,
|
|
"fcm_dpo/margin": 233.8116912841797,
|
|
"fcm_dpo/q_t": 0.4404492974281311,
|
|
"grad_norm": 58.325618743896484,
|
|
"learning_rate": 6.267605843546767e-08,
|
|
"logits/chosen": -0.9590853452682495,
|
|
"logits/rejected": -0.949677586555481,
|
|
"logps/chosen": -730.1240234375,
|
|
"logps/ref_chosen": -78.28036499023438,
|
|
"logps/ref_rejected": -103.273681640625,
|
|
"logps/rejected": -988.9290771484375,
|
|
"loss": 1.2296,
|
|
"margin_dpo/margin_mean": 233.8116912841797,
|
|
"margin_dpo/margin_std": 537.7781372070312,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.7958883994126285,
|
|
"fcm_dpo/beta": 0.0011231580283492804,
|
|
"fcm_dpo/delta": -0.1151493713259697,
|
|
"fcm_dpo/margin": 451.2611083984375,
|
|
"fcm_dpo/q_t": 0.3872736692428589,
|
|
"grad_norm": 51.69799041748047,
|
|
"learning_rate": 6.182866334636888e-08,
|
|
"logits/chosen": -0.9066444635391235,
|
|
"logits/rejected": -0.9385085105895996,
|
|
"logps/chosen": -588.926025390625,
|
|
"logps/ref_chosen": -57.48497009277344,
|
|
"logps/ref_rejected": -96.47506713867188,
|
|
"logps/rejected": -1079.17724609375,
|
|
"loss": 1.0454,
|
|
"margin_dpo/margin_mean": 451.2610778808594,
|
|
"margin_dpo/margin_std": 590.90966796875,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.7973568281938326,
|
|
"fcm_dpo/beta": 0.0011308449320495129,
|
|
"fcm_dpo/delta": 0.04076213017106056,
|
|
"fcm_dpo/margin": 318.96612548828125,
|
|
"fcm_dpo/q_t": 0.42912137508392334,
|
|
"grad_norm": 38.154632568359375,
|
|
"learning_rate": 6.098622674699147e-08,
|
|
"logits/chosen": -0.8999680280685425,
|
|
"logits/rejected": -0.9285463690757751,
|
|
"logps/chosen": -678.298095703125,
|
|
"logps/ref_chosen": -60.61750793457031,
|
|
"logps/ref_rejected": -105.59896850585938,
|
|
"logps/rejected": -1042.2457275390625,
|
|
"loss": 1.189,
|
|
"margin_dpo/margin_mean": 318.96612548828125,
|
|
"margin_dpo/margin_std": 700.2628173828125,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.7988252569750367,
|
|
"fcm_dpo/beta": 0.0011320568155497313,
|
|
"fcm_dpo/delta": -0.019344709813594818,
|
|
"fcm_dpo/margin": 369.7061767578125,
|
|
"fcm_dpo/q_t": 0.40487393736839294,
|
|
"grad_norm": 33.63976287841797,
|
|
"learning_rate": 6.01487708363232e-08,
|
|
"logits/chosen": -0.8758711814880371,
|
|
"logits/rejected": -0.8889458775520325,
|
|
"logps/chosen": -673.983154296875,
|
|
"logps/ref_chosen": -59.642303466796875,
|
|
"logps/ref_rejected": -100.95469665527344,
|
|
"logps/rejected": -1085.001708984375,
|
|
"loss": 1.0941,
|
|
"margin_dpo/margin_mean": 369.7061767578125,
|
|
"margin_dpo/margin_std": 533.091064453125,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8002936857562408,
|
|
"fcm_dpo/beta": 0.0011182475136592984,
|
|
"fcm_dpo/delta": -0.08015477657318115,
|
|
"fcm_dpo/margin": 425.98883056640625,
|
|
"fcm_dpo/q_t": 0.39286336302757263,
|
|
"grad_norm": 36.59641647338867,
|
|
"learning_rate": 5.9316317682106294e-08,
|
|
"logits/chosen": -0.8046392202377319,
|
|
"logits/rejected": -0.8314776420593262,
|
|
"logps/chosen": -634.07080078125,
|
|
"logps/ref_chosen": -67.64859771728516,
|
|
"logps/ref_rejected": -95.90800476074219,
|
|
"logps/rejected": -1088.319091796875,
|
|
"loss": 1.0483,
|
|
"margin_dpo/margin_mean": 425.98883056640625,
|
|
"margin_dpo/margin_std": 539.1649169921875,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.801762114537445,
|
|
"fcm_dpo/beta": 0.0011238758452236652,
|
|
"fcm_dpo/delta": 0.06430754065513611,
|
|
"fcm_dpo/margin": 300.63092041015625,
|
|
"fcm_dpo/q_t": 0.42007124423980713,
|
|
"grad_norm": 35.92195510864258,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": -0.8405758738517761,
|
|
"logits/rejected": -0.8265654444694519,
|
|
"logps/chosen": -585.0632934570312,
|
|
"logps/ref_chosen": -50.744232177734375,
|
|
"logps/ref_rejected": -81.86622619628906,
|
|
"logps/rejected": -916.816162109375,
|
|
"loss": 1.1366,
|
|
"margin_dpo/margin_mean": 300.63092041015625,
|
|
"margin_dpo/margin_std": 442.31207275390625,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8032305433186491,
|
|
"fcm_dpo/beta": 0.0011259375605732203,
|
|
"fcm_dpo/delta": -0.025206491351127625,
|
|
"fcm_dpo/margin": 376.65985107421875,
|
|
"fcm_dpo/q_t": 0.40438586473464966,
|
|
"grad_norm": 50.428184509277344,
|
|
"learning_rate": 5.7666507254280265e-08,
|
|
"logits/chosen": -0.8404146432876587,
|
|
"logits/rejected": -0.8504692316055298,
|
|
"logps/chosen": -630.6488037109375,
|
|
"logps/ref_chosen": -73.6877212524414,
|
|
"logps/ref_rejected": -90.76136779785156,
|
|
"logps/rejected": -1024.38232421875,
|
|
"loss": 1.0885,
|
|
"margin_dpo/margin_mean": 376.65985107421875,
|
|
"margin_dpo/margin_std": 529.802490234375,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8046989720998532,
|
|
"fcm_dpo/beta": 0.0011212106328457594,
|
|
"fcm_dpo/delta": -0.001919570378959179,
|
|
"fcm_dpo/margin": 358.396728515625,
|
|
"fcm_dpo/q_t": 0.4131587743759155,
|
|
"grad_norm": 29.529178619384766,
|
|
"learning_rate": 5.684919345471029e-08,
|
|
"logits/chosen": -0.9091461896896362,
|
|
"logits/rejected": -0.9068449139595032,
|
|
"logps/chosen": -639.157470703125,
|
|
"logps/ref_chosen": -65.24634552001953,
|
|
"logps/ref_rejected": -94.11807250976562,
|
|
"logps/rejected": -1026.426025390625,
|
|
"loss": 1.11,
|
|
"margin_dpo/margin_mean": 358.3966979980469,
|
|
"margin_dpo/margin_std": 571.4837646484375,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8061674008810573,
|
|
"fcm_dpo/beta": 0.0011411058949306607,
|
|
"fcm_dpo/delta": 0.08808208256959915,
|
|
"fcm_dpo/margin": 275.1691589355469,
|
|
"fcm_dpo/q_t": 0.42972275614738464,
|
|
"grad_norm": 55.16639709472656,
|
|
"learning_rate": 5.603696935852426e-08,
|
|
"logits/chosen": -0.9102625846862793,
|
|
"logits/rejected": -0.8981518745422363,
|
|
"logps/chosen": -652.5174560546875,
|
|
"logps/ref_chosen": -49.21235656738281,
|
|
"logps/ref_rejected": -73.91031646728516,
|
|
"logps/rejected": -952.3845825195312,
|
|
"loss": 1.176,
|
|
"margin_dpo/margin_mean": 275.169189453125,
|
|
"margin_dpo/margin_std": 505.90155029296875,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8076358296622613,
|
|
"fcm_dpo/beta": 0.0011486895382404327,
|
|
"fcm_dpo/delta": 0.04329656437039375,
|
|
"fcm_dpo/margin": 311.8872375488281,
|
|
"fcm_dpo/q_t": 0.4181970953941345,
|
|
"grad_norm": 39.404815673828125,
|
|
"learning_rate": 5.5229856368582376e-08,
|
|
"logits/chosen": -0.8432351350784302,
|
|
"logits/rejected": -0.8644379377365112,
|
|
"logps/chosen": -670.8403930664062,
|
|
"logps/ref_chosen": -56.80695343017578,
|
|
"logps/ref_rejected": -95.12580871582031,
|
|
"logps/rejected": -1021.0465087890625,
|
|
"loss": 1.1318,
|
|
"margin_dpo/margin_mean": 311.88726806640625,
|
|
"margin_dpo/margin_std": 489.3165283203125,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8091042584434655,
|
|
"fcm_dpo/beta": 0.0011192983947694302,
|
|
"fcm_dpo/delta": -0.1783682256937027,
|
|
"fcm_dpo/margin": 507.3398132324219,
|
|
"fcm_dpo/q_t": 0.3693164587020874,
|
|
"grad_norm": 55.6103515625,
|
|
"learning_rate": 5.4427875753062734e-08,
|
|
"logits/chosen": -0.8312960863113403,
|
|
"logits/rejected": -0.8840258717536926,
|
|
"logps/chosen": -585.3047485351562,
|
|
"logps/ref_chosen": -59.10633087158203,
|
|
"logps/ref_rejected": -111.67280578613281,
|
|
"logps/rejected": -1145.2109375,
|
|
"loss": 0.9652,
|
|
"margin_dpo/margin_mean": 507.3398132324219,
|
|
"margin_dpo/margin_std": 490.194580078125,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8105726872246696,
|
|
"fcm_dpo/beta": 0.0010634324280545115,
|
|
"fcm_dpo/delta": -0.23344632983207703,
|
|
"fcm_dpo/margin": 578.7212524414062,
|
|
"fcm_dpo/q_t": 0.3663737177848816,
|
|
"grad_norm": 50.61666488647461,
|
|
"learning_rate": 5.363104864490034e-08,
|
|
"logits/chosen": -0.8697335720062256,
|
|
"logits/rejected": -0.9037412405014038,
|
|
"logps/chosen": -573.7623291015625,
|
|
"logps/ref_chosen": -62.35459899902344,
|
|
"logps/ref_rejected": -104.56210327148438,
|
|
"logps/rejected": -1194.691162109375,
|
|
"loss": 0.9718,
|
|
"margin_dpo/margin_mean": 578.7212524414062,
|
|
"margin_dpo/margin_std": 671.6409912109375,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8120411160058737,
|
|
"fcm_dpo/beta": 0.0010705923195928335,
|
|
"fcm_dpo/delta": 0.08465807139873505,
|
|
"fcm_dpo/margin": 297.1253967285156,
|
|
"fcm_dpo/q_t": 0.4299642741680145,
|
|
"grad_norm": 28.852155685424805,
|
|
"learning_rate": 5.2839396041230415e-08,
|
|
"logits/chosen": -0.8760408759117126,
|
|
"logits/rejected": -0.8712909817695618,
|
|
"logps/chosen": -675.16796875,
|
|
"logps/ref_chosen": -68.25881958007812,
|
|
"logps/ref_rejected": -98.0971450805664,
|
|
"logps/rejected": -1002.1317138671875,
|
|
"loss": 1.1695,
|
|
"margin_dpo/margin_mean": 297.1253662109375,
|
|
"margin_dpo/margin_std": 539.8681640625,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8135095447870778,
|
|
"fcm_dpo/beta": 0.0010796760907396674,
|
|
"fcm_dpo/delta": -0.04111691564321518,
|
|
"fcm_dpo/margin": 406.2252197265625,
|
|
"fcm_dpo/q_t": 0.40428856015205383,
|
|
"grad_norm": 116.00951385498047,
|
|
"learning_rate": 5.205293880283551e-08,
|
|
"logits/chosen": -0.8844936490058899,
|
|
"logits/rejected": -0.8602651953697205,
|
|
"logps/chosen": -693.2430419921875,
|
|
"logps/ref_chosen": -67.94767761230469,
|
|
"logps/ref_rejected": -89.78272247314453,
|
|
"logps/rejected": -1121.3033447265625,
|
|
"loss": 1.1261,
|
|
"margin_dpo/margin_mean": 406.2252197265625,
|
|
"margin_dpo/margin_std": 695.0372314453125,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8149779735682819,
|
|
"fcm_dpo/beta": 0.0010527544654905796,
|
|
"fcm_dpo/delta": -0.11765166372060776,
|
|
"fcm_dpo/margin": 486.11102294921875,
|
|
"fcm_dpo/q_t": 0.3908356726169586,
|
|
"grad_norm": 39.57646560668945,
|
|
"learning_rate": 5.127169765359515e-08,
|
|
"logits/chosen": -0.9440624713897705,
|
|
"logits/rejected": -0.9946380853652954,
|
|
"logps/chosen": -687.46533203125,
|
|
"logps/ref_chosen": -53.33049011230469,
|
|
"logps/ref_rejected": -108.47937774658203,
|
|
"logps/rejected": -1228.7252197265625,
|
|
"loss": 1.0654,
|
|
"margin_dpo/margin_mean": 486.11102294921875,
|
|
"margin_dpo/margin_std": 729.351318359375,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8164464023494861,
|
|
"fcm_dpo/beta": 0.001059158006682992,
|
|
"fcm_dpo/delta": 0.0916333943605423,
|
|
"fcm_dpo/margin": 293.83837890625,
|
|
"fcm_dpo/q_t": 0.4280283451080322,
|
|
"grad_norm": 39.030269622802734,
|
|
"learning_rate": 5.049569317994012e-08,
|
|
"logits/chosen": -0.9093760848045349,
|
|
"logits/rejected": -0.9025793671607971,
|
|
"logps/chosen": -666.353759765625,
|
|
"logps/ref_chosen": -58.64447021484375,
|
|
"logps/ref_rejected": -101.34040832519531,
|
|
"logps/rejected": -1002.8881225585938,
|
|
"loss": 1.148,
|
|
"margin_dpo/margin_mean": 293.83837890625,
|
|
"margin_dpo/margin_std": 430.22235107421875,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8179148311306902,
|
|
"fcm_dpo/beta": 0.001053366344422102,
|
|
"fcm_dpo/delta": -0.0524490550160408,
|
|
"fcm_dpo/margin": 427.0723876953125,
|
|
"fcm_dpo/q_t": 0.401094913482666,
|
|
"grad_norm": 53.48329162597656,
|
|
"learning_rate": 4.9724945830310144e-08,
|
|
"logits/chosen": -0.9332031011581421,
|
|
"logits/rejected": -0.9630019664764404,
|
|
"logps/chosen": -737.6878662109375,
|
|
"logps/ref_chosen": -67.84066009521484,
|
|
"logps/ref_rejected": -109.93965911865234,
|
|
"logps/rejected": -1206.859375,
|
|
"loss": 1.1001,
|
|
"margin_dpo/margin_mean": 427.0723876953125,
|
|
"margin_dpo/margin_std": 670.270751953125,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8193832599118943,
|
|
"fcm_dpo/beta": 0.0010221919510513544,
|
|
"fcm_dpo/delta": -0.2019827663898468,
|
|
"fcm_dpo/margin": 577.2942504882812,
|
|
"fcm_dpo/q_t": 0.36431533098220825,
|
|
"grad_norm": 37.130680084228516,
|
|
"learning_rate": 4.8959475914614554e-08,
|
|
"logits/chosen": -1.0133311748504639,
|
|
"logits/rejected": -1.0322959423065186,
|
|
"logps/chosen": -656.3045043945312,
|
|
"logps/ref_chosen": -62.36824035644531,
|
|
"logps/ref_rejected": -102.16102600097656,
|
|
"logps/rejected": -1273.3916015625,
|
|
"loss": 0.9729,
|
|
"margin_dpo/margin_mean": 577.2942504882812,
|
|
"margin_dpo/margin_std": 620.1890869140625,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8208516886930984,
|
|
"fcm_dpo/beta": 0.0009995660511776805,
|
|
"fcm_dpo/delta": -0.08984459936618805,
|
|
"fcm_dpo/margin": 485.7807312011719,
|
|
"fcm_dpo/q_t": 0.39124971628189087,
|
|
"grad_norm": 30.684978485107422,
|
|
"learning_rate": 4.8199303603697614e-08,
|
|
"logits/chosen": -1.0911483764648438,
|
|
"logits/rejected": -1.0965559482574463,
|
|
"logps/chosen": -750.491455078125,
|
|
"logps/ref_chosen": -60.752323150634766,
|
|
"logps/ref_rejected": -93.44229125976562,
|
|
"logps/rejected": -1268.962158203125,
|
|
"loss": 1.0455,
|
|
"margin_dpo/margin_mean": 485.7807312011719,
|
|
"margin_dpo/margin_std": 622.151123046875,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8223201174743024,
|
|
"fcm_dpo/beta": 0.000995859270915389,
|
|
"fcm_dpo/delta": 0.05174366384744644,
|
|
"fcm_dpo/margin": 351.25054931640625,
|
|
"fcm_dpo/q_t": 0.4200424551963806,
|
|
"grad_norm": 32.02336502075195,
|
|
"learning_rate": 4.7444448928806615e-08,
|
|
"logits/chosen": -0.8772056102752686,
|
|
"logits/rejected": -0.8566712141036987,
|
|
"logps/chosen": -679.903076171875,
|
|
"logps/ref_chosen": -58.10382080078125,
|
|
"logps/ref_rejected": -79.99122619628906,
|
|
"logps/rejected": -1053.041015625,
|
|
"loss": 1.1385,
|
|
"margin_dpo/margin_mean": 351.25054931640625,
|
|
"margin_dpo/margin_std": 544.828857421875,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8237885462555066,
|
|
"fcm_dpo/beta": 0.0010213316418230534,
|
|
"fcm_dpo/delta": 0.09696964174509048,
|
|
"fcm_dpo/margin": 299.154052734375,
|
|
"fcm_dpo/q_t": 0.42913612723350525,
|
|
"grad_norm": 46.0489501953125,
|
|
"learning_rate": 4.669493178106432e-08,
|
|
"logits/chosen": -1.0040576457977295,
|
|
"logits/rejected": -1.0211834907531738,
|
|
"logps/chosen": -775.0875244140625,
|
|
"logps/ref_chosen": -50.912879943847656,
|
|
"logps/ref_rejected": -99.06856536865234,
|
|
"logps/rejected": -1122.3973388671875,
|
|
"loss": 1.196,
|
|
"margin_dpo/margin_mean": 299.154052734375,
|
|
"margin_dpo/margin_std": 618.0548095703125,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8252569750367107,
|
|
"fcm_dpo/beta": 0.0010161999380216002,
|
|
"fcm_dpo/delta": -0.008913304656744003,
|
|
"fcm_dpo/margin": 401.3731689453125,
|
|
"fcm_dpo/q_t": 0.40855729579925537,
|
|
"grad_norm": 35.42552947998047,
|
|
"learning_rate": 4.5950771910944596e-08,
|
|
"logits/chosen": -0.9741950035095215,
|
|
"logits/rejected": -0.9822410345077515,
|
|
"logps/chosen": -739.70166015625,
|
|
"logps/ref_chosen": -59.46440124511719,
|
|
"logps/ref_rejected": -96.54266357421875,
|
|
"logps/rejected": -1178.153076171875,
|
|
"loss": 1.1047,
|
|
"margin_dpo/margin_mean": 401.3731689453125,
|
|
"margin_dpo/margin_std": 601.1802368164062,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8267254038179148,
|
|
"fcm_dpo/beta": 0.0010444659274071455,
|
|
"fcm_dpo/delta": 0.08401615172624588,
|
|
"fcm_dpo/margin": 302.5437927246094,
|
|
"fcm_dpo/q_t": 0.42215970158576965,
|
|
"grad_norm": 58.64229202270508,
|
|
"learning_rate": 4.521198892775202e-08,
|
|
"logits/chosen": -0.9864065647125244,
|
|
"logits/rejected": -0.98963463306427,
|
|
"logps/chosen": -830.58203125,
|
|
"logps/ref_chosen": -60.60819625854492,
|
|
"logps/ref_rejected": -94.56770324707031,
|
|
"logps/rejected": -1167.0853271484375,
|
|
"loss": 1.2354,
|
|
"margin_dpo/margin_mean": 302.5437927246094,
|
|
"margin_dpo/margin_std": 723.0938720703125,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8281938325991189,
|
|
"fcm_dpo/beta": 0.0010381749598309398,
|
|
"fcm_dpo/delta": -0.017018113285303116,
|
|
"fcm_dpo/margin": 400.96112060546875,
|
|
"fcm_dpo/q_t": 0.4057755768299103,
|
|
"grad_norm": 49.656681060791016,
|
|
"learning_rate": 4.447860229910544e-08,
|
|
"logits/chosen": -1.0767529010772705,
|
|
"logits/rejected": -1.0685616731643677,
|
|
"logps/chosen": -764.374267578125,
|
|
"logps/ref_chosen": -74.26837921142578,
|
|
"logps/ref_rejected": -93.23818969726562,
|
|
"logps/rejected": -1184.30517578125,
|
|
"loss": 1.0832,
|
|
"margin_dpo/margin_mean": 400.9611511230469,
|
|
"margin_dpo/margin_std": 527.44775390625,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.8296622613803231,
|
|
"fcm_dpo/beta": 0.0010299738496541977,
|
|
"fcm_dpo/delta": -0.030898885801434517,
|
|
"fcm_dpo/margin": 417.05035400390625,
|
|
"fcm_dpo/q_t": 0.40826284885406494,
|
|
"grad_norm": 58.330814361572266,
|
|
"learning_rate": 4.375063135042445e-08,
|
|
"logits/chosen": -0.9842187166213989,
|
|
"logits/rejected": -0.9827100038528442,
|
|
"logps/chosen": -795.4959716796875,
|
|
"logps/ref_chosen": -69.0199203491211,
|
|
"logps/ref_rejected": -85.7789306640625,
|
|
"logps/rejected": -1229.3052978515625,
|
|
"loss": 1.1307,
|
|
"margin_dpo/margin_mean": 417.05029296875,
|
|
"margin_dpo/margin_std": 738.177734375,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8311306901615272,
|
|
"fcm_dpo/beta": 0.0010237455135211349,
|
|
"fcm_dpo/delta": -0.07710428535938263,
|
|
"fcm_dpo/margin": 462.15057373046875,
|
|
"fcm_dpo/q_t": 0.3955567479133606,
|
|
"grad_norm": 44.30464553833008,
|
|
"learning_rate": 4.3028095264420525e-08,
|
|
"logits/chosen": -0.9977031946182251,
|
|
"logits/rejected": -1.0206011533737183,
|
|
"logps/chosen": -761.23193359375,
|
|
"logps/ref_chosen": -66.5453109741211,
|
|
"logps/ref_rejected": -103.86932373046875,
|
|
"logps/rejected": -1260.70654296875,
|
|
"loss": 1.1004,
|
|
"margin_dpo/margin_mean": 462.15057373046875,
|
|
"margin_dpo/margin_std": 732.3837890625,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8325991189427313,
|
|
"fcm_dpo/beta": 0.0010178061202168465,
|
|
"fcm_dpo/delta": 0.03579302877187729,
|
|
"fcm_dpo/margin": 359.13531494140625,
|
|
"fcm_dpo/q_t": 0.4146028161048889,
|
|
"grad_norm": 34.87330627441406,
|
|
"learning_rate": 4.231101308059165e-08,
|
|
"logits/chosen": -1.0946989059448242,
|
|
"logits/rejected": -1.097286343574524,
|
|
"logps/chosen": -709.2471313476562,
|
|
"logps/ref_chosen": -52.85829544067383,
|
|
"logps/ref_rejected": -85.37095642089844,
|
|
"logps/rejected": -1100.89501953125,
|
|
"loss": 1.1094,
|
|
"margin_dpo/margin_mean": 359.1352844238281,
|
|
"margin_dpo/margin_std": 478.47601318359375,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8340675477239354,
|
|
"fcm_dpo/beta": 0.0010078256018459797,
|
|
"fcm_dpo/delta": -0.077871173620224,
|
|
"fcm_dpo/margin": 470.42230224609375,
|
|
"fcm_dpo/q_t": 0.3900975286960602,
|
|
"grad_norm": 41.226470947265625,
|
|
"learning_rate": 4.1599403694720145e-08,
|
|
"logits/chosen": -0.9984632730484009,
|
|
"logits/rejected": -1.0317835807800293,
|
|
"logps/chosen": -692.5980224609375,
|
|
"logps/ref_chosen": -45.1923828125,
|
|
"logps/ref_rejected": -89.09236907958984,
|
|
"logps/rejected": -1206.9202880859375,
|
|
"loss": 1.0342,
|
|
"margin_dpo/margin_mean": 470.4223327636719,
|
|
"margin_dpo/margin_std": 534.6771240234375,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8355359765051396,
|
|
"fcm_dpo/beta": 0.0010118992067873478,
|
|
"fcm_dpo/delta": 0.0053036510944366455,
|
|
"fcm_dpo/margin": 389.440185546875,
|
|
"fcm_dpo/q_t": 0.411299467086792,
|
|
"grad_norm": 59.71284103393555,
|
|
"learning_rate": 4.089328585837512e-08,
|
|
"logits/chosen": -1.0540335178375244,
|
|
"logits/rejected": -1.0605683326721191,
|
|
"logps/chosen": -802.30126953125,
|
|
"logps/ref_chosen": -63.72056198120117,
|
|
"logps/ref_rejected": -79.10325622558594,
|
|
"logps/rejected": -1207.1240234375,
|
|
"loss": 1.1411,
|
|
"margin_dpo/margin_mean": 389.440185546875,
|
|
"margin_dpo/margin_std": 669.8353271484375,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8370044052863436,
|
|
"fcm_dpo/beta": 0.0010090538999065757,
|
|
"fcm_dpo/delta": 0.034501124173402786,
|
|
"fcm_dpo/margin": 363.46832275390625,
|
|
"fcm_dpo/q_t": 0.4174221158027649,
|
|
"grad_norm": 36.75635528564453,
|
|
"learning_rate": 4.019267817841834e-08,
|
|
"logits/chosen": -1.1069672107696533,
|
|
"logits/rejected": -1.0978264808654785,
|
|
"logps/chosen": -741.89794921875,
|
|
"logps/ref_chosen": -61.61454391479492,
|
|
"logps/ref_rejected": -82.14186096191406,
|
|
"logps/rejected": -1125.8935546875,
|
|
"loss": 1.1306,
|
|
"margin_dpo/margin_mean": 363.46832275390625,
|
|
"margin_dpo/margin_std": 565.5325927734375,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8384728340675477,
|
|
"fcm_dpo/beta": 0.001006947597488761,
|
|
"fcm_dpo/delta": -0.025127392262220383,
|
|
"fcm_dpo/margin": 421.017333984375,
|
|
"fcm_dpo/q_t": 0.40483880043029785,
|
|
"grad_norm": 46.7877197265625,
|
|
"learning_rate": 3.9497599116513705e-08,
|
|
"logits/chosen": -0.9774879217147827,
|
|
"logits/rejected": -0.9870057106018066,
|
|
"logps/chosen": -736.8277587890625,
|
|
"logps/ref_chosen": -53.05406188964844,
|
|
"logps/ref_rejected": -91.33682250976562,
|
|
"logps/rejected": -1196.1279296875,
|
|
"loss": 1.1072,
|
|
"margin_dpo/margin_mean": 421.01739501953125,
|
|
"margin_dpo/margin_std": 656.8223266601562,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8399412628487518,
|
|
"fcm_dpo/beta": 0.0009963458869606256,
|
|
"fcm_dpo/delta": -0.05837538465857506,
|
|
"fcm_dpo/margin": 457.11859130859375,
|
|
"fcm_dpo/q_t": 0.4020523428916931,
|
|
"grad_norm": 34.478126525878906,
|
|
"learning_rate": 3.880806698864086e-08,
|
|
"logits/chosen": -1.0106735229492188,
|
|
"logits/rejected": -1.0380733013153076,
|
|
"logps/chosen": -765.731201171875,
|
|
"logps/ref_chosen": -48.45928955078125,
|
|
"logps/ref_rejected": -83.55703735351562,
|
|
"logps/rejected": -1257.947509765625,
|
|
"loss": 1.101,
|
|
"margin_dpo/margin_mean": 457.11859130859375,
|
|
"margin_dpo/margin_std": 743.0745239257812,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8414096916299559,
|
|
"fcm_dpo/beta": 0.000998746370896697,
|
|
"fcm_dpo/delta": 0.01006124448031187,
|
|
"fcm_dpo/margin": 390.7725830078125,
|
|
"fcm_dpo/q_t": 0.41236811876296997,
|
|
"grad_norm": 31.542451858520508,
|
|
"learning_rate": 3.812409996461275e-08,
|
|
"logits/chosen": -1.0760366916656494,
|
|
"logits/rejected": -1.0830974578857422,
|
|
"logps/chosen": -746.3466186523438,
|
|
"logps/ref_chosen": -51.62262725830078,
|
|
"logps/ref_rejected": -85.32499694824219,
|
|
"logps/rejected": -1170.821533203125,
|
|
"loss": 1.1013,
|
|
"margin_dpo/margin_mean": 390.7725830078125,
|
|
"margin_dpo/margin_std": 552.9385986328125,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8428781204111601,
|
|
"fcm_dpo/beta": 0.0009959687013179064,
|
|
"fcm_dpo/delta": -0.0264823567122221,
|
|
"fcm_dpo/margin": 427.0505676269531,
|
|
"fcm_dpo/q_t": 0.40291672945022583,
|
|
"grad_norm": 34.11531066894531,
|
|
"learning_rate": 3.74457160675965e-08,
|
|
"logits/chosen": -1.0604960918426514,
|
|
"logits/rejected": -1.0845885276794434,
|
|
"logps/chosen": -673.98388671875,
|
|
"logps/ref_chosen": -51.04446029663086,
|
|
"logps/ref_rejected": -92.80640411376953,
|
|
"logps/rejected": -1142.79638671875,
|
|
"loss": 1.079,
|
|
"margin_dpo/margin_mean": 427.0505676269531,
|
|
"margin_dpo/margin_std": 563.7635498046875,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8443465491923642,
|
|
"fcm_dpo/beta": 0.0009870969224721193,
|
|
"fcm_dpo/delta": 0.015626847743988037,
|
|
"fcm_dpo/margin": 388.8648681640625,
|
|
"fcm_dpo/q_t": 0.4126191735267639,
|
|
"grad_norm": 41.480953216552734,
|
|
"learning_rate": 3.677293317363864e-08,
|
|
"logits/chosen": -0.9152381420135498,
|
|
"logits/rejected": -0.9155479669570923,
|
|
"logps/chosen": -773.0484619140625,
|
|
"logps/ref_chosen": -71.7901382446289,
|
|
"logps/ref_rejected": -95.38619995117188,
|
|
"logps/rejected": -1185.5093994140625,
|
|
"loss": 1.1384,
|
|
"margin_dpo/margin_mean": 388.8648681640625,
|
|
"margin_dpo/margin_std": 641.7487182617188,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8458149779735683,
|
|
"fcm_dpo/beta": 0.0010088002309203148,
|
|
"fcm_dpo/delta": 0.10819648951292038,
|
|
"fcm_dpo/margin": 292.5570983886719,
|
|
"fcm_dpo/q_t": 0.4330289363861084,
|
|
"grad_norm": 34.14402770996094,
|
|
"learning_rate": 3.6105769011194224e-08,
|
|
"logits/chosen": -1.0202168226242065,
|
|
"logits/rejected": -1.0450081825256348,
|
|
"logps/chosen": -745.5078735351562,
|
|
"logps/ref_chosen": -54.262962341308594,
|
|
"logps/ref_rejected": -100.75428009033203,
|
|
"logps/rejected": -1084.5562744140625,
|
|
"loss": 1.1883,
|
|
"margin_dpo/margin_mean": 292.5570983886719,
|
|
"margin_dpo/margin_std": 554.0349731445312,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.8472834067547724,
|
|
"fcm_dpo/beta": 0.0010158491786569357,
|
|
"fcm_dpo/delta": -0.00041581690311431885,
|
|
"fcm_dpo/margin": 394.10260009765625,
|
|
"fcm_dpo/q_t": 0.4098922610282898,
|
|
"grad_norm": 32.901432037353516,
|
|
"learning_rate": 3.5444241160659304e-08,
|
|
"logits/chosen": -0.9910534620285034,
|
|
"logits/rejected": -0.9746923446655273,
|
|
"logps/chosen": -663.3876953125,
|
|
"logps/ref_chosen": -61.909706115722656,
|
|
"logps/ref_rejected": -84.07069396972656,
|
|
"logps/rejected": -1079.6512451171875,
|
|
"loss": 1.1104,
|
|
"margin_dpo/margin_mean": 394.10260009765625,
|
|
"margin_dpo/margin_std": 565.6033325195312,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.8487518355359766,
|
|
"fcm_dpo/beta": 0.0010062268702313304,
|
|
"fcm_dpo/delta": -0.03685392439365387,
|
|
"fcm_dpo/margin": 431.7515563964844,
|
|
"fcm_dpo/q_t": 0.40013357996940613,
|
|
"grad_norm": 34.37710952758789,
|
|
"learning_rate": 3.478836705390808e-08,
|
|
"logits/chosen": -0.8883892297744751,
|
|
"logits/rejected": -0.9162989854812622,
|
|
"logps/chosen": -642.0396728515625,
|
|
"logps/ref_chosen": -49.26368713378906,
|
|
"logps/ref_rejected": -83.4362564086914,
|
|
"logps/rejected": -1107.9637451171875,
|
|
"loss": 1.0655,
|
|
"margin_dpo/margin_mean": 431.75152587890625,
|
|
"margin_dpo/margin_std": 524.7294921875,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8502202643171806,
|
|
"fcm_dpo/beta": 0.0010275598615407944,
|
|
"fcm_dpo/delta": 0.1303221881389618,
|
|
"fcm_dpo/margin": 266.0924072265625,
|
|
"fcm_dpo/q_t": 0.43752169609069824,
|
|
"grad_norm": 60.628578186035156,
|
|
"learning_rate": 3.41381639738331e-08,
|
|
"logits/chosen": -0.9820126295089722,
|
|
"logits/rejected": -0.9807819128036499,
|
|
"logps/chosen": -728.9359130859375,
|
|
"logps/ref_chosen": -58.88581848144531,
|
|
"logps/ref_rejected": -94.78762817382812,
|
|
"logps/rejected": -1030.9300537109375,
|
|
"loss": 1.212,
|
|
"margin_dpo/margin_mean": 266.0924072265625,
|
|
"margin_dpo/margin_std": 565.8082275390625,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8516886930983847,
|
|
"fcm_dpo/beta": 0.0010196480434387922,
|
|
"fcm_dpo/delta": -0.08530791848897934,
|
|
"fcm_dpo/margin": 471.34722900390625,
|
|
"fcm_dpo/q_t": 0.39458024501800537,
|
|
"grad_norm": 42.90260696411133,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": -0.842185378074646,
|
|
"logits/rejected": -0.8703323006629944,
|
|
"logps/chosen": -540.9808349609375,
|
|
"logps/ref_chosen": -48.70683670043945,
|
|
"logps/ref_rejected": -81.7583999633789,
|
|
"logps/rejected": -1045.379638671875,
|
|
"loss": 1.0552,
|
|
"margin_dpo/margin_mean": 471.3471984863281,
|
|
"margin_dpo/margin_std": 646.364013671875,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8531571218795888,
|
|
"fcm_dpo/beta": 0.001028277212753892,
|
|
"fcm_dpo/delta": 0.052076976746320724,
|
|
"fcm_dpo/margin": 340.040771484375,
|
|
"fcm_dpo/q_t": 0.4224529564380646,
|
|
"grad_norm": 42.94197082519531,
|
|
"learning_rate": 3.285483927764726e-08,
|
|
"logits/chosen": -1.043858528137207,
|
|
"logits/rejected": -1.0504353046417236,
|
|
"logps/chosen": -738.8606567382812,
|
|
"logps/ref_chosen": -62.22235107421875,
|
|
"logps/ref_rejected": -91.73568725585938,
|
|
"logps/rejected": -1108.414794921875,
|
|
"loss": 1.151,
|
|
"margin_dpo/margin_mean": 340.040771484375,
|
|
"margin_dpo/margin_std": 596.381591796875,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8546255506607929,
|
|
"fcm_dpo/beta": 0.0010394532000645995,
|
|
"fcm_dpo/delta": 0.0019838809967041016,
|
|
"fcm_dpo/margin": 381.43505859375,
|
|
"fcm_dpo/q_t": 0.4081156849861145,
|
|
"grad_norm": 32.26409149169922,
|
|
"learning_rate": 3.222175147833556e-08,
|
|
"logits/chosen": -0.9781073331832886,
|
|
"logits/rejected": -0.9989155530929565,
|
|
"logps/chosen": -653.311279296875,
|
|
"logps/ref_chosen": -58.228660583496094,
|
|
"logps/ref_rejected": -110.06959533691406,
|
|
"logps/rejected": -1086.5872802734375,
|
|
"loss": 1.1041,
|
|
"margin_dpo/margin_mean": 381.43505859375,
|
|
"margin_dpo/margin_std": 514.436279296875,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.856093979441997,
|
|
"fcm_dpo/beta": 0.001045125536620617,
|
|
"fcm_dpo/delta": 0.15199461579322815,
|
|
"fcm_dpo/margin": 240.80845642089844,
|
|
"fcm_dpo/q_t": 0.4445319175720215,
|
|
"grad_norm": 61.14152908325195,
|
|
"learning_rate": 3.159440233840763e-08,
|
|
"logits/chosen": -0.9267081022262573,
|
|
"logits/rejected": -0.9212468862533569,
|
|
"logps/chosen": -741.570068359375,
|
|
"logps/ref_chosen": -56.86286163330078,
|
|
"logps/ref_rejected": -88.4039306640625,
|
|
"logps/rejected": -1013.9195556640625,
|
|
"loss": 1.2611,
|
|
"margin_dpo/margin_mean": 240.80844116210938,
|
|
"margin_dpo/margin_std": 632.1082153320312,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8575624082232012,
|
|
"fcm_dpo/beta": 0.001047009602189064,
|
|
"fcm_dpo/delta": -0.09994575381278992,
|
|
"fcm_dpo/margin": 472.8240661621094,
|
|
"fcm_dpo/q_t": 0.38906019926071167,
|
|
"grad_norm": 33.5833854675293,
|
|
"learning_rate": 3.0972808389096635e-08,
|
|
"logits/chosen": -0.9425207376480103,
|
|
"logits/rejected": -0.9496064186096191,
|
|
"logps/chosen": -631.0457763671875,
|
|
"logps/ref_chosen": -56.90068054199219,
|
|
"logps/ref_rejected": -97.63606262207031,
|
|
"logps/rejected": -1144.6053466796875,
|
|
"loss": 1.0321,
|
|
"margin_dpo/margin_mean": 472.8240966796875,
|
|
"margin_dpo/margin_std": 567.1585693359375,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8590308370044053,
|
|
"fcm_dpo/beta": 0.0010301112197339535,
|
|
"fcm_dpo/delta": -0.050719231367111206,
|
|
"fcm_dpo/margin": 435.0365905761719,
|
|
"fcm_dpo/q_t": 0.4000217318534851,
|
|
"grad_norm": 38.895267486572266,
|
|
"learning_rate": 3.035698600998121e-08,
|
|
"logits/chosen": -0.9933120608329773,
|
|
"logits/rejected": -1.0140407085418701,
|
|
"logps/chosen": -725.3422241210938,
|
|
"logps/ref_chosen": -60.973968505859375,
|
|
"logps/ref_rejected": -84.16952514648438,
|
|
"logps/rejected": -1183.5743408203125,
|
|
"loss": 1.1026,
|
|
"margin_dpo/margin_mean": 435.03656005859375,
|
|
"margin_dpo/margin_std": 694.1354370117188,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8604992657856094,
|
|
"fcm_dpo/beta": 0.0010434159776195884,
|
|
"fcm_dpo/delta": 0.09400048106908798,
|
|
"fcm_dpo/margin": 296.1407775878906,
|
|
"fcm_dpo/q_t": 0.4296777844429016,
|
|
"grad_norm": 32.761634826660156,
|
|
"learning_rate": 2.974695142855388e-08,
|
|
"logits/chosen": -0.9788910746574402,
|
|
"logits/rejected": -0.997832715511322,
|
|
"logps/chosen": -762.8277587890625,
|
|
"logps/ref_chosen": -56.85559844970703,
|
|
"logps/ref_rejected": -91.80261993408203,
|
|
"logps/rejected": -1093.91552734375,
|
|
"loss": 1.1855,
|
|
"margin_dpo/margin_mean": 296.1407775878906,
|
|
"margin_dpo/margin_std": 574.4661865234375,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8619676945668135,
|
|
"fcm_dpo/beta": 0.001048217760398984,
|
|
"fcm_dpo/delta": -0.02521578222513199,
|
|
"fcm_dpo/margin": 404.6052551269531,
|
|
"fcm_dpo/q_t": 0.4032083749771118,
|
|
"grad_norm": 38.702091217041016,
|
|
"learning_rate": 2.9142720719793122e-08,
|
|
"logits/chosen": -1.0009737014770508,
|
|
"logits/rejected": -1.0238654613494873,
|
|
"logps/chosen": -547.4169921875,
|
|
"logps/ref_chosen": -44.69159698486328,
|
|
"logps/ref_rejected": -82.62385559082031,
|
|
"logps/rejected": -989.9544677734375,
|
|
"loss": 1.0848,
|
|
"margin_dpo/margin_mean": 404.6052551269531,
|
|
"margin_dpo/margin_std": 556.65869140625,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8634361233480177,
|
|
"fcm_dpo/beta": 0.0010463828220963478,
|
|
"fcm_dpo/delta": 0.038819462060928345,
|
|
"fcm_dpo/margin": 346.3304748535156,
|
|
"fcm_dpo/q_t": 0.41620907187461853,
|
|
"grad_norm": 33.679988861083984,
|
|
"learning_rate": 2.8544309805740018e-08,
|
|
"logits/chosen": -0.9818781614303589,
|
|
"logits/rejected": -1.0031819343566895,
|
|
"logps/chosen": -710.351318359375,
|
|
"logps/ref_chosen": -50.29494857788086,
|
|
"logps/ref_rejected": -107.36988067626953,
|
|
"logps/rejected": -1113.7567138671875,
|
|
"loss": 1.1241,
|
|
"margin_dpo/margin_mean": 346.3304748535156,
|
|
"margin_dpo/margin_std": 503.89788818359375,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.8649045521292217,
|
|
"fcm_dpo/beta": 0.001043025404214859,
|
|
"fcm_dpo/delta": -0.07069654762744904,
|
|
"fcm_dpo/margin": 448.1602783203125,
|
|
"fcm_dpo/q_t": 0.3938640058040619,
|
|
"grad_norm": 33.65080261230469,
|
|
"learning_rate": 2.7951734455078786e-08,
|
|
"logits/chosen": -0.9495465159416199,
|
|
"logits/rejected": -0.9597896933555603,
|
|
"logps/chosen": -717.1591796875,
|
|
"logps/ref_chosen": -59.929908752441406,
|
|
"logps/ref_rejected": -111.65534973144531,
|
|
"logps/rejected": -1217.044921875,
|
|
"loss": 1.0505,
|
|
"margin_dpo/margin_mean": 448.1602783203125,
|
|
"margin_dpo/margin_std": 561.6795654296875,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.8663729809104258,
|
|
"fcm_dpo/beta": 0.0010281222639605403,
|
|
"fcm_dpo/delta": -0.06000884622335434,
|
|
"fcm_dpo/margin": 444.7072448730469,
|
|
"fcm_dpo/q_t": 0.396173357963562,
|
|
"grad_norm": 31.502580642700195,
|
|
"learning_rate": 2.736501028272095e-08,
|
|
"logits/chosen": -0.9299443364143372,
|
|
"logits/rejected": -0.9518330097198486,
|
|
"logps/chosen": -629.3699340820312,
|
|
"logps/ref_chosen": -55.80979537963867,
|
|
"logps/ref_rejected": -106.06282043457031,
|
|
"logps/rejected": -1124.330078125,
|
|
"loss": 1.0524,
|
|
"margin_dpo/margin_mean": 444.70721435546875,
|
|
"margin_dpo/margin_std": 547.791748046875,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8678414096916299,
|
|
"fcm_dpo/beta": 0.0010233856737613678,
|
|
"fcm_dpo/delta": -0.014741834253072739,
|
|
"fcm_dpo/margin": 404.666748046875,
|
|
"fcm_dpo/q_t": 0.40434908866882324,
|
|
"grad_norm": 39.38751983642578,
|
|
"learning_rate": 2.678415274939408e-08,
|
|
"logits/chosen": -1.014183521270752,
|
|
"logits/rejected": -1.0046617984771729,
|
|
"logps/chosen": -680.9166259765625,
|
|
"logps/ref_chosen": -56.24061965942383,
|
|
"logps/ref_rejected": -83.78629302978516,
|
|
"logps/rejected": -1113.1290283203125,
|
|
"loss": 1.0914,
|
|
"margin_dpo/margin_mean": 404.666748046875,
|
|
"margin_dpo/margin_std": 561.1109619140625,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.869309838472834,
|
|
"fcm_dpo/beta": 0.0010261686984449625,
|
|
"fcm_dpo/delta": 0.026942353695631027,
|
|
"fcm_dpo/margin": 364.5346374511719,
|
|
"fcm_dpo/q_t": 0.4158400297164917,
|
|
"grad_norm": 37.25166702270508,
|
|
"learning_rate": 2.6209177161234442e-08,
|
|
"logits/chosen": -0.9915554523468018,
|
|
"logits/rejected": -0.9911011457443237,
|
|
"logps/chosen": -713.0601806640625,
|
|
"logps/ref_chosen": -47.94025421142578,
|
|
"logps/ref_rejected": -75.73287963867188,
|
|
"logps/rejected": -1105.387451171875,
|
|
"loss": 1.1667,
|
|
"margin_dpo/margin_mean": 364.5346374511719,
|
|
"margin_dpo/margin_std": 686.7486572265625,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8707782672540382,
|
|
"fcm_dpo/beta": 0.0010391025571152568,
|
|
"fcm_dpo/delta": 0.0972876325249672,
|
|
"fcm_dpo/margin": 294.2417297363281,
|
|
"fcm_dpo/q_t": 0.43218329548835754,
|
|
"grad_norm": 51.747840881347656,
|
|
"learning_rate": 2.564009866938349e-08,
|
|
"logits/chosen": -0.8582196235656738,
|
|
"logits/rejected": -0.8465102910995483,
|
|
"logps/chosen": -675.2666015625,
|
|
"logps/ref_chosen": -48.690757751464844,
|
|
"logps/ref_rejected": -60.90800094604492,
|
|
"logps/rejected": -981.7255859375,
|
|
"loss": 1.1985,
|
|
"margin_dpo/margin_mean": 294.2417297363281,
|
|
"margin_dpo/margin_std": 604.4602661132812,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8722466960352423,
|
|
"fcm_dpo/beta": 0.0010585633572191,
|
|
"fcm_dpo/delta": 0.01568973809480667,
|
|
"fcm_dpo/margin": 362.5345153808594,
|
|
"fcm_dpo/q_t": 0.41356098651885986,
|
|
"grad_norm": 41.935630798339844,
|
|
"learning_rate": 2.5076932269588708e-08,
|
|
"logits/chosen": -0.9748561382293701,
|
|
"logits/rejected": -0.9647470712661743,
|
|
"logps/chosen": -659.4791870117188,
|
|
"logps/ref_chosen": -54.93488693237305,
|
|
"logps/ref_rejected": -86.09967803955078,
|
|
"logps/rejected": -1053.178466796875,
|
|
"loss": 1.1296,
|
|
"margin_dpo/margin_mean": 362.5345153808594,
|
|
"margin_dpo/margin_std": 570.4609375,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8737151248164464,
|
|
"fcm_dpo/beta": 0.001042917836457491,
|
|
"fcm_dpo/delta": -0.03980486840009689,
|
|
"fcm_dpo/margin": 419.83013916015625,
|
|
"fcm_dpo/q_t": 0.4033294916152954,
|
|
"grad_norm": 47.1993293762207,
|
|
"learning_rate": 2.451969280180849e-08,
|
|
"logits/chosen": -0.9675269722938538,
|
|
"logits/rejected": -0.984915018081665,
|
|
"logps/chosen": -637.9202880859375,
|
|
"logps/ref_chosen": -49.4204216003418,
|
|
"logps/ref_rejected": -80.62731170654297,
|
|
"logps/rejected": -1088.957275390625,
|
|
"loss": 1.0748,
|
|
"margin_dpo/margin_mean": 419.83013916015625,
|
|
"margin_dpo/margin_std": 574.22412109375,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.8751835535976505,
|
|
"fcm_dpo/beta": 0.001060024369508028,
|
|
"fcm_dpo/delta": 0.09763069450855255,
|
|
"fcm_dpo/margin": 287.9169921875,
|
|
"fcm_dpo/q_t": 0.4336046576499939,
|
|
"grad_norm": 63.941131591796875,
|
|
"learning_rate": 2.396839494982103e-08,
|
|
"logits/chosen": -0.9581549167633057,
|
|
"logits/rejected": -0.9236706495285034,
|
|
"logps/chosen": -733.4730224609375,
|
|
"logps/ref_chosen": -59.791683197021484,
|
|
"logps/ref_rejected": -80.09111785888672,
|
|
"logps/rejected": -1041.689453125,
|
|
"loss": 1.1978,
|
|
"margin_dpo/margin_mean": 287.9169921875,
|
|
"margin_dpo/margin_std": 603.924560546875,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.8766519823788547,
|
|
"fcm_dpo/beta": 0.0010323208989575505,
|
|
"fcm_dpo/delta": -0.13189196586608887,
|
|
"fcm_dpo/margin": 505.18560791015625,
|
|
"fcm_dpo/q_t": 0.3857056498527527,
|
|
"grad_norm": 32.748313903808594,
|
|
"learning_rate": 2.3423053240837514e-08,
|
|
"logits/chosen": -0.8813081979751587,
|
|
"logits/rejected": -0.9265748262405396,
|
|
"logps/chosen": -683.490234375,
|
|
"logps/ref_chosen": -57.26078796386719,
|
|
"logps/ref_rejected": -100.6937255859375,
|
|
"logps/rejected": -1232.1087646484375,
|
|
"loss": 1.0412,
|
|
"margin_dpo/margin_mean": 505.18560791015625,
|
|
"margin_dpo/margin_std": 654.4864501953125,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.8781204111600588,
|
|
"fcm_dpo/beta": 0.001027698628604412,
|
|
"fcm_dpo/delta": -0.0025387555360794067,
|
|
"fcm_dpo/margin": 390.36993408203125,
|
|
"fcm_dpo/q_t": 0.4076780080795288,
|
|
"grad_norm": 37.18258285522461,
|
|
"learning_rate": 2.2883682045119062e-08,
|
|
"logits/chosen": -1.003603219985962,
|
|
"logits/rejected": -1.0124932527542114,
|
|
"logps/chosen": -688.0802001953125,
|
|
"logps/ref_chosen": -52.51850509643555,
|
|
"logps/ref_rejected": -89.44385528564453,
|
|
"logps/rejected": -1115.37548828125,
|
|
"loss": 1.1075,
|
|
"margin_dpo/margin_mean": 390.36993408203125,
|
|
"margin_dpo/margin_std": 558.430908203125,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.8795888399412628,
|
|
"fcm_dpo/beta": 0.0010358416475355625,
|
|
"fcm_dpo/delta": 0.05951521918177605,
|
|
"fcm_dpo/margin": 329.6752624511719,
|
|
"fcm_dpo/q_t": 0.41848763823509216,
|
|
"grad_norm": 40.37639236450195,
|
|
"learning_rate": 2.2350295575598367e-08,
|
|
"logits/chosen": -0.9107227325439453,
|
|
"logits/rejected": -0.916115403175354,
|
|
"logps/chosen": -684.2776489257812,
|
|
"logps/ref_chosen": -49.802677154541016,
|
|
"logps/ref_rejected": -82.978515625,
|
|
"logps/rejected": -1047.1287841796875,
|
|
"loss": 1.1367,
|
|
"margin_dpo/margin_mean": 329.6752624511719,
|
|
"margin_dpo/margin_std": 463.71551513671875,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"fcm_dpo/beta": 0.0010582783725112677,
|
|
"fcm_dpo/delta": 0.07414868474006653,
|
|
"fcm_dpo/margin": 310.2664794921875,
|
|
"fcm_dpo/q_t": 0.4272015690803528,
|
|
"grad_norm": 38.3448371887207,
|
|
"learning_rate": 2.1822907887504932e-08,
|
|
"logits/chosen": -1.036280870437622,
|
|
"logits/rejected": -1.0311980247497559,
|
|
"logps/chosen": -761.5162963867188,
|
|
"logps/ref_chosen": -66.43487548828125,
|
|
"logps/ref_rejected": -85.45649719238281,
|
|
"logps/rejected": -1090.804443359375,
|
|
"loss": 1.1787,
|
|
"margin_dpo/margin_mean": 310.2664794921875,
|
|
"margin_dpo/margin_std": 604.5630493164062,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"eval_fcm_dpo/beta": 0.0010637843515723944,
|
|
"eval_logits/chosen": -1.0093570947647095,
|
|
"eval_logits/rejected": -1.006962776184082,
|
|
"eval_logps/chosen": -820.5049438476562,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -1108.1163330078125,
|
|
"eval_loss": 0.6083120107650757,
|
|
"eval_margin_dpo/margin_mean": 279.864501953125,
|
|
"eval_margin_dpo/margin_std": 623.9760131835938,
|
|
"eval_runtime": 39.2935,
|
|
"eval_samples_per_second": 59.526,
|
|
"eval_steps_per_second": 1.883,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.882525697503671,
|
|
"fcm_dpo/beta": 0.0010579151567071676,
|
|
"fcm_dpo/delta": -0.02401512674987316,
|
|
"fcm_dpo/margin": 399.729248046875,
|
|
"fcm_dpo/q_t": 0.40252023935317993,
|
|
"grad_norm": 37.17901611328125,
|
|
"learning_rate": 2.1301532877994742e-08,
|
|
"logits/chosen": -0.9913003444671631,
|
|
"logits/rejected": -1.0062311887741089,
|
|
"logps/chosen": -783.751708984375,
|
|
"logps/ref_chosen": -59.13361358642578,
|
|
"logps/ref_rejected": -94.69093322753906,
|
|
"logps/rejected": -1219.038330078125,
|
|
"loss": 1.0849,
|
|
"margin_dpo/margin_mean": 399.7292785644531,
|
|
"margin_dpo/margin_std": 545.0308227539062,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.8839941262848752,
|
|
"fcm_dpo/beta": 0.0010464717634022236,
|
|
"fcm_dpo/delta": -0.12171060591936111,
|
|
"fcm_dpo/margin": 492.49066162109375,
|
|
"fcm_dpo/q_t": 0.3832111358642578,
|
|
"grad_norm": 63.6419677734375,
|
|
"learning_rate": 2.0786184285784298e-08,
|
|
"logits/chosen": -0.9910135269165039,
|
|
"logits/rejected": -1.024593710899353,
|
|
"logps/chosen": -532.450927734375,
|
|
"logps/ref_chosen": -48.59352111816406,
|
|
"logps/ref_rejected": -87.6685562133789,
|
|
"logps/rejected": -1064.0166015625,
|
|
"loss": 1.0113,
|
|
"margin_dpo/margin_mean": 492.4906921386719,
|
|
"margin_dpo/margin_std": 543.3355102539062,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.8854625550660793,
|
|
"fcm_dpo/beta": 0.001022031530737877,
|
|
"fcm_dpo/delta": -0.07094608247280121,
|
|
"fcm_dpo/margin": 457.523193359375,
|
|
"fcm_dpo/q_t": 0.3987892270088196,
|
|
"grad_norm": 45.804847717285156,
|
|
"learning_rate": 2.0276875690788204e-08,
|
|
"logits/chosen": -0.9808427095413208,
|
|
"logits/rejected": -0.9711321592330933,
|
|
"logps/chosen": -687.8028564453125,
|
|
"logps/ref_chosen": -70.41461944580078,
|
|
"logps/ref_rejected": -100.32559967041016,
|
|
"logps/rejected": -1175.237060546875,
|
|
"loss": 1.0778,
|
|
"margin_dpo/margin_mean": 457.523193359375,
|
|
"margin_dpo/margin_std": 686.7850341796875,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.8869309838472834,
|
|
"fcm_dpo/beta": 0.0010027764365077019,
|
|
"fcm_dpo/delta": -0.10522530972957611,
|
|
"fcm_dpo/margin": 498.571044921875,
|
|
"fcm_dpo/q_t": 0.3895166218280792,
|
|
"grad_norm": 45.26606369018555,
|
|
"learning_rate": 1.977362051376158e-08,
|
|
"logits/chosen": -0.970876157283783,
|
|
"logits/rejected": -1.0076401233673096,
|
|
"logps/chosen": -649.3685302734375,
|
|
"logps/ref_chosen": -46.45808029174805,
|
|
"logps/ref_rejected": -91.8544921875,
|
|
"logps/rejected": -1193.3359375,
|
|
"loss": 1.0481,
|
|
"margin_dpo/margin_mean": 498.571044921875,
|
|
"margin_dpo/margin_std": 668.9893798828125,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.8883994126284875,
|
|
"fcm_dpo/beta": 0.0010006949305534363,
|
|
"fcm_dpo/delta": 0.038943853229284286,
|
|
"fcm_dpo/margin": 362.182373046875,
|
|
"fcm_dpo/q_t": 0.41993600130081177,
|
|
"grad_norm": 39.667091369628906,
|
|
"learning_rate": 1.9276432015946446e-08,
|
|
"logits/chosen": -0.9392565488815308,
|
|
"logits/rejected": -0.9526230096817017,
|
|
"logps/chosen": -716.2191162109375,
|
|
"logps/ref_chosen": -66.24933624267578,
|
|
"logps/ref_rejected": -102.30496978759766,
|
|
"logps/rejected": -1114.45703125,
|
|
"loss": 1.1385,
|
|
"margin_dpo/margin_mean": 362.182373046875,
|
|
"margin_dpo/margin_std": 607.8369140625,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.8898678414096917,
|
|
"fcm_dpo/beta": 0.001008864026516676,
|
|
"fcm_dpo/delta": -0.003518037497997284,
|
|
"fcm_dpo/margin": 399.5196533203125,
|
|
"fcm_dpo/q_t": 0.40903180837631226,
|
|
"grad_norm": 28.302597045898438,
|
|
"learning_rate": 1.8785323298722093e-08,
|
|
"logits/chosen": -0.9261046051979065,
|
|
"logits/rejected": -0.9387686848640442,
|
|
"logps/chosen": -695.829345703125,
|
|
"logps/ref_chosen": -54.819122314453125,
|
|
"logps/ref_rejected": -98.37146759033203,
|
|
"logps/rejected": -1138.9013671875,
|
|
"loss": 1.0963,
|
|
"margin_dpo/margin_mean": 399.5196533203125,
|
|
"margin_dpo/margin_std": 556.862060546875,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.8913362701908958,
|
|
"fcm_dpo/beta": 0.0010172666516155005,
|
|
"fcm_dpo/delta": 0.06301670521497726,
|
|
"fcm_dpo/margin": 332.9754943847656,
|
|
"fcm_dpo/q_t": 0.4240596890449524,
|
|
"grad_norm": 30.476581573486328,
|
|
"learning_rate": 1.8300307303259904e-08,
|
|
"logits/chosen": -0.918168306350708,
|
|
"logits/rejected": -0.9049118161201477,
|
|
"logps/chosen": -733.3533935546875,
|
|
"logps/ref_chosen": -58.08403778076172,
|
|
"logps/ref_rejected": -79.777099609375,
|
|
"logps/rejected": -1088.02197265625,
|
|
"loss": 1.1554,
|
|
"margin_dpo/margin_mean": 332.97552490234375,
|
|
"margin_dpo/margin_std": 572.5631103515625,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.8928046989720999,
|
|
"fcm_dpo/beta": 0.00101991998963058,
|
|
"fcm_dpo/delta": 0.0018842313438653946,
|
|
"fcm_dpo/margin": 390.30804443359375,
|
|
"fcm_dpo/q_t": 0.4078730344772339,
|
|
"grad_norm": 31.276063919067383,
|
|
"learning_rate": 1.7821396810182437e-08,
|
|
"logits/chosen": -0.9649184942245483,
|
|
"logits/rejected": -0.9744598865509033,
|
|
"logps/chosen": -653.48583984375,
|
|
"logps/ref_chosen": -57.450836181640625,
|
|
"logps/ref_rejected": -94.77339172363281,
|
|
"logps/rejected": -1081.116455078125,
|
|
"loss": 1.0854,
|
|
"margin_dpo/margin_mean": 390.30804443359375,
|
|
"margin_dpo/margin_std": 487.88525390625,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.8942731277533039,
|
|
"fcm_dpo/beta": 0.0010023643262684345,
|
|
"fcm_dpo/delta": -0.10511058568954468,
|
|
"fcm_dpo/margin": 498.7186584472656,
|
|
"fcm_dpo/q_t": 0.3940780460834503,
|
|
"grad_norm": 31.827770233154297,
|
|
"learning_rate": 1.7348604439226617e-08,
|
|
"logits/chosen": -1.0191905498504639,
|
|
"logits/rejected": -1.0367772579193115,
|
|
"logps/chosen": -678.4195556640625,
|
|
"logps/ref_chosen": -58.805355072021484,
|
|
"logps/ref_rejected": -88.81600952148438,
|
|
"logps/rejected": -1207.14892578125,
|
|
"loss": 1.0632,
|
|
"margin_dpo/margin_mean": 498.7186584472656,
|
|
"margin_dpo/margin_std": 760.2215576171875,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.895741556534508,
|
|
"fcm_dpo/beta": 0.0010101648513227701,
|
|
"fcm_dpo/delta": 0.08314502984285355,
|
|
"fcm_dpo/margin": 316.170654296875,
|
|
"fcm_dpo/q_t": 0.426239937543869,
|
|
"grad_norm": 49.75373458862305,
|
|
"learning_rate": 1.6881942648911074e-08,
|
|
"logits/chosen": -0.9572958946228027,
|
|
"logits/rejected": -0.9347594380378723,
|
|
"logps/chosen": -688.2482299804688,
|
|
"logps/ref_chosen": -65.69503784179688,
|
|
"logps/ref_rejected": -83.40538787841797,
|
|
"logps/rejected": -1022.1292724609375,
|
|
"loss": 1.172,
|
|
"margin_dpo/margin_mean": 316.170654296875,
|
|
"margin_dpo/margin_std": 579.01513671875,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.8972099853157122,
|
|
"fcm_dpo/beta": 0.0009934802073985338,
|
|
"fcm_dpo/delta": -0.13049647212028503,
|
|
"fcm_dpo/margin": 527.0625,
|
|
"fcm_dpo/q_t": 0.38698631525039673,
|
|
"grad_norm": 32.91178894042969,
|
|
"learning_rate": 1.6421423736208e-08,
|
|
"logits/chosen": -1.0069807767868042,
|
|
"logits/rejected": -1.0485907793045044,
|
|
"logps/chosen": -711.209716796875,
|
|
"logps/ref_chosen": -52.59946823120117,
|
|
"logps/ref_rejected": -86.33099365234375,
|
|
"logps/rejected": -1272.003662109375,
|
|
"loss": 1.0454,
|
|
"margin_dpo/margin_mean": 527.0625,
|
|
"margin_dpo/margin_std": 733.5755615234375,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.8986784140969163,
|
|
"fcm_dpo/beta": 0.0009895211551338434,
|
|
"fcm_dpo/delta": -0.007441475987434387,
|
|
"fcm_dpo/margin": 411.2275085449219,
|
|
"fcm_dpo/q_t": 0.40736648440361023,
|
|
"grad_norm": 32.78879165649414,
|
|
"learning_rate": 1.5967059836219042e-08,
|
|
"logits/chosen": -1.0194255113601685,
|
|
"logits/rejected": -1.0259497165679932,
|
|
"logps/chosen": -765.8218994140625,
|
|
"logps/ref_chosen": -59.32372283935547,
|
|
"logps/ref_rejected": -88.31239318847656,
|
|
"logps/rejected": -1206.0380859375,
|
|
"loss": 1.0933,
|
|
"margin_dpo/margin_mean": 411.22747802734375,
|
|
"margin_dpo/margin_std": 567.0072021484375,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.9001468428781204,
|
|
"fcm_dpo/beta": 0.0009705990669317544,
|
|
"fcm_dpo/delta": -0.08382614701986313,
|
|
"fcm_dpo/margin": 493.93206787109375,
|
|
"fcm_dpo/q_t": 0.390705943107605,
|
|
"grad_norm": 35.579498291015625,
|
|
"learning_rate": 1.551886292185553e-08,
|
|
"logits/chosen": -0.9657102227210999,
|
|
"logits/rejected": -1.0158898830413818,
|
|
"logps/chosen": -682.1220703125,
|
|
"logps/ref_chosen": -59.72996520996094,
|
|
"logps/ref_rejected": -105.10752868652344,
|
|
"logps/rejected": -1221.431640625,
|
|
"loss": 1.042,
|
|
"margin_dpo/margin_mean": 493.93206787109375,
|
|
"margin_dpo/margin_std": 606.7100830078125,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9016152716593245,
|
|
"fcm_dpo/beta": 0.0009615451563149691,
|
|
"fcm_dpo/delta": -0.06227314844727516,
|
|
"fcm_dpo/margin": 477.830810546875,
|
|
"fcm_dpo/q_t": 0.3970866799354553,
|
|
"grad_norm": 54.85789108276367,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": -0.9549213647842407,
|
|
"logits/rejected": -1.0247011184692383,
|
|
"logps/chosen": -726.4387817382812,
|
|
"logps/ref_chosen": -52.93898010253906,
|
|
"logps/ref_rejected": -104.67938232421875,
|
|
"logps/rejected": -1256.010009765625,
|
|
"loss": 1.0719,
|
|
"margin_dpo/margin_mean": 477.8308410644531,
|
|
"margin_dpo/margin_std": 666.865478515625,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9030837004405287,
|
|
"fcm_dpo/beta": 0.0009616809547878802,
|
|
"fcm_dpo/delta": 0.005887992680072784,
|
|
"fcm_dpo/margin": 409.75201416015625,
|
|
"fcm_dpo/q_t": 0.41104698181152344,
|
|
"grad_norm": 30.430543899536133,
|
|
"learning_rate": 1.4641017128809801e-08,
|
|
"logits/chosen": -0.9402486681938171,
|
|
"logits/rejected": -0.9574323892593384,
|
|
"logps/chosen": -709.1848754882812,
|
|
"logps/ref_chosen": -65.81727600097656,
|
|
"logps/ref_rejected": -95.17749786376953,
|
|
"logps/rejected": -1148.297119140625,
|
|
"loss": 1.1207,
|
|
"margin_dpo/margin_mean": 409.75201416015625,
|
|
"margin_dpo/margin_std": 655.8992919921875,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9045521292217328,
|
|
"fcm_dpo/beta": 0.0009705583215691149,
|
|
"fcm_dpo/delta": 0.08069596439599991,
|
|
"fcm_dpo/margin": 331.6129455566406,
|
|
"fcm_dpo/q_t": 0.42673254013061523,
|
|
"grad_norm": 32.93946075439453,
|
|
"learning_rate": 1.4211391382180637e-08,
|
|
"logits/chosen": -1.0074067115783691,
|
|
"logits/rejected": -0.9894207715988159,
|
|
"logps/chosen": -825.5562744140625,
|
|
"logps/ref_chosen": -65.13285827636719,
|
|
"logps/ref_rejected": -74.70050048828125,
|
|
"logps/rejected": -1166.73681640625,
|
|
"loss": 1.161,
|
|
"margin_dpo/margin_mean": 331.6129455566406,
|
|
"margin_dpo/margin_std": 562.9627685546875,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9060205580029369,
|
|
"fcm_dpo/beta": 0.000999385491013527,
|
|
"fcm_dpo/delta": 0.16457805037498474,
|
|
"fcm_dpo/margin": 239.29647827148438,
|
|
"fcm_dpo/q_t": 0.4457574784755707,
|
|
"grad_norm": 54.66185760498047,
|
|
"learning_rate": 1.378797888467345e-08,
|
|
"logits/chosen": -0.9095033407211304,
|
|
"logits/rejected": -0.8759035468101501,
|
|
"logps/chosen": -777.98486328125,
|
|
"logps/ref_chosen": -63.005550384521484,
|
|
"logps/ref_rejected": -64.234130859375,
|
|
"logps/rejected": -1018.5098876953125,
|
|
"loss": 1.2344,
|
|
"margin_dpo/margin_mean": 239.29647827148438,
|
|
"margin_dpo/margin_std": 552.6876220703125,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9074889867841409,
|
|
"fcm_dpo/beta": 0.0009954373817890882,
|
|
"fcm_dpo/delta": -0.11912906914949417,
|
|
"fcm_dpo/margin": 515.2799072265625,
|
|
"fcm_dpo/q_t": 0.3897508680820465,
|
|
"grad_norm": 41.893890380859375,
|
|
"learning_rate": 1.3370790793601371e-08,
|
|
"logits/chosen": -0.9436015486717224,
|
|
"logits/rejected": -0.9779185652732849,
|
|
"logps/chosen": -797.0797119140625,
|
|
"logps/ref_chosen": -67.10134887695312,
|
|
"logps/ref_rejected": -92.15340423583984,
|
|
"logps/rejected": -1337.41162109375,
|
|
"loss": 1.0795,
|
|
"margin_dpo/margin_mean": 515.2799072265625,
|
|
"margin_dpo/margin_std": 803.7247314453125,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.908957415565345,
|
|
"fcm_dpo/beta": 0.0009861327707767487,
|
|
"fcm_dpo/delta": 0.018695583567023277,
|
|
"fcm_dpo/margin": 387.3755798339844,
|
|
"fcm_dpo/q_t": 0.42096269130706787,
|
|
"grad_norm": 51.36298370361328,
|
|
"learning_rate": 1.2959838102258535e-08,
|
|
"logits/chosen": -0.9847129583358765,
|
|
"logits/rejected": -0.9953620433807373,
|
|
"logps/chosen": -796.674072265625,
|
|
"logps/ref_chosen": -55.978233337402344,
|
|
"logps/ref_rejected": -93.1854019165039,
|
|
"logps/rejected": -1221.2568359375,
|
|
"loss": 1.1758,
|
|
"margin_dpo/margin_mean": 387.3755798339844,
|
|
"margin_dpo/margin_std": 792.19189453125,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9104258443465492,
|
|
"fcm_dpo/beta": 0.0009894105605781078,
|
|
"fcm_dpo/delta": 0.04409804195165634,
|
|
"fcm_dpo/margin": 361.19091796875,
|
|
"fcm_dpo/q_t": 0.420096218585968,
|
|
"grad_norm": 38.462669372558594,
|
|
"learning_rate": 1.2555131639630567e-08,
|
|
"logits/chosen": -1.0069198608398438,
|
|
"logits/rejected": -1.0124790668487549,
|
|
"logps/chosen": -751.1500244140625,
|
|
"logps/ref_chosen": -59.79750061035156,
|
|
"logps/ref_rejected": -78.41075134277344,
|
|
"logps/rejected": -1130.9541015625,
|
|
"loss": 1.147,
|
|
"margin_dpo/margin_mean": 361.19091796875,
|
|
"margin_dpo/margin_std": 616.5042724609375,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9118942731277533,
|
|
"fcm_dpo/beta": 0.0009799831314012408,
|
|
"fcm_dpo/delta": -0.15620173513889313,
|
|
"fcm_dpo/margin": 558.8348388671875,
|
|
"fcm_dpo/q_t": 0.3770964741706848,
|
|
"grad_norm": 40.64413070678711,
|
|
"learning_rate": 1.2156682070109086e-08,
|
|
"logits/chosen": -1.0363627672195435,
|
|
"logits/rejected": -1.0835423469543457,
|
|
"logps/chosen": -709.837646484375,
|
|
"logps/ref_chosen": -53.93375778198242,
|
|
"logps/ref_rejected": -88.36951446533203,
|
|
"logps/rejected": -1303.1083984375,
|
|
"loss": 1.0241,
|
|
"margin_dpo/margin_mean": 558.8348388671875,
|
|
"margin_dpo/margin_std": 708.3245849609375,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9133627019089574,
|
|
"fcm_dpo/beta": 0.0009631599532440305,
|
|
"fcm_dpo/delta": 0.02442977949976921,
|
|
"fcm_dpo/margin": 390.4576721191406,
|
|
"fcm_dpo/q_t": 0.4159480631351471,
|
|
"grad_norm": 40.2901611328125,
|
|
"learning_rate": 1.1764499893210878e-08,
|
|
"logits/chosen": -0.8899050951004028,
|
|
"logits/rejected": -0.8752150535583496,
|
|
"logps/chosen": -711.6151123046875,
|
|
"logps/ref_chosen": -60.28582000732422,
|
|
"logps/ref_rejected": -85.51873779296875,
|
|
"logps/rejected": -1127.3056640625,
|
|
"loss": 1.1281,
|
|
"margin_dpo/margin_mean": 390.45770263671875,
|
|
"margin_dpo/margin_std": 627.16259765625,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9148311306901615,
|
|
"fcm_dpo/beta": 0.0009855421958491206,
|
|
"fcm_dpo/delta": 0.10792680084705353,
|
|
"fcm_dpo/margin": 299.546630859375,
|
|
"fcm_dpo/q_t": 0.43511348962783813,
|
|
"grad_norm": 37.03680419921875,
|
|
"learning_rate": 1.1378595443300998e-08,
|
|
"logits/chosen": -1.0057023763656616,
|
|
"logits/rejected": -1.003598928451538,
|
|
"logps/chosen": -770.2930297851562,
|
|
"logps/ref_chosen": -64.1569595336914,
|
|
"logps/ref_rejected": -85.08304595947266,
|
|
"logps/rejected": -1090.765869140625,
|
|
"loss": 1.1935,
|
|
"margin_dpo/margin_mean": 299.5466613769531,
|
|
"margin_dpo/margin_std": 606.3578491210938,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9162995594713657,
|
|
"fcm_dpo/beta": 0.0009818391408771276,
|
|
"fcm_dpo/delta": -0.0640857070684433,
|
|
"fcm_dpo/margin": 469.6931457519531,
|
|
"fcm_dpo/q_t": 0.3927311301231384,
|
|
"grad_norm": 33.05645751953125,
|
|
"learning_rate": 1.0998978889320582e-08,
|
|
"logits/chosen": -1.034135103225708,
|
|
"logits/rejected": -1.032621145248413,
|
|
"logps/chosen": -779.2677612304688,
|
|
"logps/ref_chosen": -71.91862487792969,
|
|
"logps/ref_rejected": -97.13203430175781,
|
|
"logps/rejected": -1274.17431640625,
|
|
"loss": 1.0546,
|
|
"margin_dpo/margin_mean": 469.6931457519531,
|
|
"margin_dpo/margin_std": 583.8242797851562,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9177679882525698,
|
|
"fcm_dpo/beta": 0.0009644476231187582,
|
|
"fcm_dpo/delta": -0.11376336961984634,
|
|
"fcm_dpo/margin": 526.8627319335938,
|
|
"fcm_dpo/q_t": 0.3830944895744324,
|
|
"grad_norm": 67.42720794677734,
|
|
"learning_rate": 1.0625660234518913e-08,
|
|
"logits/chosen": -0.9315009117126465,
|
|
"logits/rejected": -0.9569610953330994,
|
|
"logps/chosen": -696.3958740234375,
|
|
"logps/ref_chosen": -58.342071533203125,
|
|
"logps/ref_rejected": -86.09038543701172,
|
|
"logps/rejected": -1251.0068359375,
|
|
"loss": 1.0001,
|
|
"margin_dpo/margin_mean": 526.8627319335938,
|
|
"margin_dpo/margin_std": 528.9290771484375,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9192364170337739,
|
|
"fcm_dpo/beta": 0.0009729490848258138,
|
|
"fcm_dpo/delta": 0.1208110824227333,
|
|
"fcm_dpo/margin": 290.5083923339844,
|
|
"fcm_dpo/q_t": 0.43388864398002625,
|
|
"grad_norm": 42.13291931152344,
|
|
"learning_rate": 1.0258649316189721e-08,
|
|
"logits/chosen": -0.9432566165924072,
|
|
"logits/rejected": -0.9342271685600281,
|
|
"logps/chosen": -887.2332763671875,
|
|
"logps/ref_chosen": -75.11260986328125,
|
|
"logps/ref_rejected": -99.188720703125,
|
|
"logps/rejected": -1201.81787109375,
|
|
"loss": 1.218,
|
|
"margin_dpo/margin_mean": 290.5083923339844,
|
|
"margin_dpo/margin_std": 636.7235107421875,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.920704845814978,
|
|
"fcm_dpo/beta": 0.0009612108115106821,
|
|
"fcm_dpo/delta": -0.1750987321138382,
|
|
"fcm_dpo/margin": 588.1961669921875,
|
|
"fcm_dpo/q_t": 0.3845459520816803,
|
|
"grad_norm": 32.053462982177734,
|
|
"learning_rate": 9.897955805412e-09,
|
|
"logits/chosen": -0.8771834373474121,
|
|
"logits/rejected": -0.9516627192497253,
|
|
"logps/chosen": -608.0906982421875,
|
|
"logps/ref_chosen": -47.74314880371094,
|
|
"logps/ref_rejected": -106.75448608398438,
|
|
"logps/rejected": -1255.29833984375,
|
|
"loss": 1.0301,
|
|
"margin_dpo/margin_mean": 588.1962280273438,
|
|
"margin_dpo/margin_std": 823.0296630859375,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.922173274596182,
|
|
"fcm_dpo/beta": 0.0009421667200513184,
|
|
"fcm_dpo/delta": -0.013625391758978367,
|
|
"fcm_dpo/margin": 438.33990478515625,
|
|
"fcm_dpo/q_t": 0.4074496626853943,
|
|
"grad_norm": 35.12033462524414,
|
|
"learning_rate": 9.543589206795238e-09,
|
|
"logits/chosen": -0.9957294464111328,
|
|
"logits/rejected": -1.0113835334777832,
|
|
"logps/chosen": -786.462646484375,
|
|
"logps/ref_chosen": -60.182945251464844,
|
|
"logps/ref_rejected": -101.55467224121094,
|
|
"logps/rejected": -1266.17431640625,
|
|
"loss": 1.1037,
|
|
"margin_dpo/margin_mean": 438.33990478515625,
|
|
"margin_dpo/margin_std": 655.6448974609375,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9236417033773862,
|
|
"fcm_dpo/beta": 0.0009447969496250153,
|
|
"fcm_dpo/delta": 0.008085597306489944,
|
|
"fcm_dpo/margin": 415.1349792480469,
|
|
"fcm_dpo/q_t": 0.4088371992111206,
|
|
"grad_norm": 39.81538772583008,
|
|
"learning_rate": 9.19555885822887e-09,
|
|
"logits/chosen": -0.9972596168518066,
|
|
"logits/rejected": -1.0130850076675415,
|
|
"logps/chosen": -760.9837646484375,
|
|
"logps/ref_chosen": -64.21354675292969,
|
|
"logps/ref_rejected": -91.65367126464844,
|
|
"logps/rejected": -1203.5589599609375,
|
|
"loss": 1.0959,
|
|
"margin_dpo/margin_mean": 415.1349792480469,
|
|
"margin_dpo/margin_std": 550.6982421875,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9251101321585903,
|
|
"fcm_dpo/beta": 0.0009731564205139875,
|
|
"fcm_dpo/delta": 0.17090709507465363,
|
|
"fcm_dpo/margin": 238.38504028320312,
|
|
"fcm_dpo/q_t": 0.45254063606262207,
|
|
"grad_norm": 57.21560287475586,
|
|
"learning_rate": 8.85387393063622e-09,
|
|
"logits/chosen": -0.9586611390113831,
|
|
"logits/rejected": -0.929883599281311,
|
|
"logps/chosen": -707.209716796875,
|
|
"logps/ref_chosen": -59.29100036621094,
|
|
"logps/ref_rejected": -83.59829711914062,
|
|
"logps/rejected": -969.902099609375,
|
|
"loss": 1.2624,
|
|
"margin_dpo/margin_mean": 238.38504028320312,
|
|
"margin_dpo/margin_std": 663.2811279296875,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9265785609397944,
|
|
"fcm_dpo/beta": 0.0009837900288403034,
|
|
"fcm_dpo/delta": 0.034737687557935715,
|
|
"fcm_dpo/margin": 372.49517822265625,
|
|
"fcm_dpo/q_t": 0.4164769649505615,
|
|
"grad_norm": 31.798059463500977,
|
|
"learning_rate": 8.518543427732949e-09,
|
|
"logits/chosen": -1.0678491592407227,
|
|
"logits/rejected": -1.0738096237182617,
|
|
"logps/chosen": -826.5518798828125,
|
|
"logps/ref_chosen": -59.45360565185547,
|
|
"logps/ref_rejected": -80.95156860351562,
|
|
"logps/rejected": -1220.544921875,
|
|
"loss": 1.1496,
|
|
"margin_dpo/margin_mean": 372.4952087402344,
|
|
"margin_dpo/margin_std": 653.4166870117188,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9280469897209985,
|
|
"fcm_dpo/beta": 0.0009830878116190434,
|
|
"fcm_dpo/delta": 0.02477090060710907,
|
|
"fcm_dpo/margin": 382.27484130859375,
|
|
"fcm_dpo/q_t": 0.41463130712509155,
|
|
"grad_norm": 43.760009765625,
|
|
"learning_rate": 8.189576185789637e-09,
|
|
"logits/chosen": -1.0130560398101807,
|
|
"logits/rejected": -1.014772891998291,
|
|
"logps/chosen": -740.15673828125,
|
|
"logps/ref_chosen": -61.35155487060547,
|
|
"logps/ref_rejected": -86.16017150878906,
|
|
"logps/rejected": -1147.240234375,
|
|
"loss": 1.1373,
|
|
"margin_dpo/margin_mean": 382.27484130859375,
|
|
"margin_dpo/margin_std": 631.7064819335938,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9295154185022027,
|
|
"fcm_dpo/beta": 0.0010105203837156296,
|
|
"fcm_dpo/delta": 0.12257733941078186,
|
|
"fcm_dpo/margin": 277.3817138671875,
|
|
"fcm_dpo/q_t": 0.43608927726745605,
|
|
"grad_norm": 44.86662673950195,
|
|
"learning_rate": 7.866980873399015e-09,
|
|
"logits/chosen": -1.0408048629760742,
|
|
"logits/rejected": -1.048929214477539,
|
|
"logps/chosen": -789.4072265625,
|
|
"logps/ref_chosen": -57.27816390991211,
|
|
"logps/ref_rejected": -91.58395385742188,
|
|
"logps/rejected": -1101.0947265625,
|
|
"loss": 1.206,
|
|
"margin_dpo/margin_mean": 277.3817443847656,
|
|
"margin_dpo/margin_std": 575.353515625,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9309838472834068,
|
|
"fcm_dpo/beta": 0.0010330864461138844,
|
|
"fcm_dpo/delta": 0.1345616579055786,
|
|
"fcm_dpo/margin": 260.56390380859375,
|
|
"fcm_dpo/q_t": 0.4413578510284424,
|
|
"grad_norm": 40.64011001586914,
|
|
"learning_rate": 7.550765991247654e-09,
|
|
"logits/chosen": -0.8922678828239441,
|
|
"logits/rejected": -0.8835877180099487,
|
|
"logps/chosen": -892.26904296875,
|
|
"logps/ref_chosen": -66.61896514892578,
|
|
"logps/ref_rejected": -107.12564849853516,
|
|
"logps/rejected": -1193.339599609375,
|
|
"loss": 1.2209,
|
|
"margin_dpo/margin_mean": 260.56390380859375,
|
|
"margin_dpo/margin_std": 591.0201416015625,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9324522760646109,
|
|
"fcm_dpo/beta": 0.0010454216971993446,
|
|
"fcm_dpo/delta": 0.03238925710320473,
|
|
"fcm_dpo/margin": 352.79302978515625,
|
|
"fcm_dpo/q_t": 0.4197089672088623,
|
|
"grad_norm": 44.7509765625,
|
|
"learning_rate": 7.240939871891699e-09,
|
|
"logits/chosen": -1.0014543533325195,
|
|
"logits/rejected": -0.9792279005050659,
|
|
"logps/chosen": -778.3882446289062,
|
|
"logps/ref_chosen": -73.95551300048828,
|
|
"logps/ref_rejected": -82.50045776367188,
|
|
"logps/rejected": -1139.726318359375,
|
|
"loss": 1.1468,
|
|
"margin_dpo/margin_mean": 352.79302978515625,
|
|
"margin_dpo/margin_std": 629.5213623046875,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.933920704845815,
|
|
"fcm_dpo/beta": 0.0010495057795196772,
|
|
"fcm_dpo/delta": -0.0377667136490345,
|
|
"fcm_dpo/margin": 414.8923034667969,
|
|
"fcm_dpo/q_t": 0.40556490421295166,
|
|
"grad_norm": 32.477806091308594,
|
|
"learning_rate": 6.937510679537628e-09,
|
|
"logits/chosen": -0.9295854568481445,
|
|
"logits/rejected": -0.9285162091255188,
|
|
"logps/chosen": -739.5404052734375,
|
|
"logps/ref_chosen": -59.628910064697266,
|
|
"logps/ref_rejected": -81.97883605957031,
|
|
"logps/rejected": -1176.78271484375,
|
|
"loss": 1.0913,
|
|
"margin_dpo/margin_mean": 414.8923034667969,
|
|
"margin_dpo/margin_std": 634.6422729492188,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9353891336270191,
|
|
"fcm_dpo/beta": 0.0010253810323774815,
|
|
"fcm_dpo/delta": -0.06474175304174423,
|
|
"fcm_dpo/margin": 449.619384765625,
|
|
"fcm_dpo/q_t": 0.39714446663856506,
|
|
"grad_norm": 31.273347854614258,
|
|
"learning_rate": 6.640486409826785e-09,
|
|
"logits/chosen": -1.0574856996536255,
|
|
"logits/rejected": -1.1046029329299927,
|
|
"logps/chosen": -716.3837890625,
|
|
"logps/ref_chosen": -49.652687072753906,
|
|
"logps/ref_rejected": -98.40513610839844,
|
|
"logps/rejected": -1214.755615234375,
|
|
"loss": 1.0641,
|
|
"margin_dpo/margin_mean": 449.619384765625,
|
|
"margin_dpo/margin_std": 604.8092651367188,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9368575624082232,
|
|
"fcm_dpo/beta": 0.0010413650888949633,
|
|
"fcm_dpo/delta": 0.01695757359266281,
|
|
"fcm_dpo/margin": 365.78363037109375,
|
|
"fcm_dpo/q_t": 0.40809565782546997,
|
|
"grad_norm": 42.46715545654297,
|
|
"learning_rate": 6.349874889624962e-09,
|
|
"logits/chosen": -0.9286128878593445,
|
|
"logits/rejected": -0.9132605791091919,
|
|
"logps/chosen": -708.2297973632812,
|
|
"logps/ref_chosen": -58.156639099121094,
|
|
"logps/ref_rejected": -79.3014907836914,
|
|
"logps/rejected": -1095.158203125,
|
|
"loss": 1.1521,
|
|
"margin_dpo/margin_mean": 365.7836608886719,
|
|
"margin_dpo/margin_std": 640.359130859375,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9383259911894273,
|
|
"fcm_dpo/beta": 0.0010672295466065407,
|
|
"fcm_dpo/delta": 0.23513709008693695,
|
|
"fcm_dpo/margin": 158.20433044433594,
|
|
"fcm_dpo/q_t": 0.46294790506362915,
|
|
"grad_norm": 126.83493041992188,
|
|
"learning_rate": 6.065683776815933e-09,
|
|
"logits/chosen": -0.9369577169418335,
|
|
"logits/rejected": -0.8788477182388306,
|
|
"logps/chosen": -971.4729614257812,
|
|
"logps/ref_chosen": -72.32319641113281,
|
|
"logps/ref_rejected": -74.2749252319336,
|
|
"logps/rejected": -1131.6290283203125,
|
|
"loss": 1.3492,
|
|
"margin_dpo/margin_mean": 158.20433044433594,
|
|
"margin_dpo/margin_std": 680.5343017578125,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9397944199706314,
|
|
"fcm_dpo/beta": 0.0010533035965636373,
|
|
"fcm_dpo/delta": -0.20259231328964233,
|
|
"fcm_dpo/margin": 561.17333984375,
|
|
"fcm_dpo/q_t": 0.3751782476902008,
|
|
"grad_norm": 42.19377136230469,
|
|
"learning_rate": 5.7879205600998296e-09,
|
|
"logits/chosen": -0.9074388742446899,
|
|
"logits/rejected": -0.9390972852706909,
|
|
"logps/chosen": -740.1415405273438,
|
|
"logps/ref_chosen": -56.13436508178711,
|
|
"logps/ref_rejected": -108.60014343261719,
|
|
"logps/rejected": -1353.78076171875,
|
|
"loss": 1.0146,
|
|
"margin_dpo/margin_mean": 561.17333984375,
|
|
"margin_dpo/margin_std": 757.92333984375,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9412628487518355,
|
|
"fcm_dpo/beta": 0.0010397237492725253,
|
|
"fcm_dpo/delta": 0.030443288385868073,
|
|
"fcm_dpo/margin": 356.50567626953125,
|
|
"fcm_dpo/q_t": 0.4193439483642578,
|
|
"grad_norm": 39.22941207885742,
|
|
"learning_rate": 5.516592558795746e-09,
|
|
"logits/chosen": -0.9675936102867126,
|
|
"logits/rejected": -0.9748636484146118,
|
|
"logps/chosen": -830.5360107421875,
|
|
"logps/ref_chosen": -64.99689483642578,
|
|
"logps/ref_rejected": -86.99232482910156,
|
|
"logps/rejected": -1209.037109375,
|
|
"loss": 1.1667,
|
|
"margin_dpo/margin_mean": 356.50567626953125,
|
|
"margin_dpo/margin_std": 696.1326293945312,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9427312775330396,
|
|
"fcm_dpo/beta": 0.0010383711196482182,
|
|
"fcm_dpo/delta": -0.061861053109169006,
|
|
"fcm_dpo/margin": 441.9483337402344,
|
|
"fcm_dpo/q_t": 0.40887451171875,
|
|
"grad_norm": 49.41666793823242,
|
|
"learning_rate": 5.251706922648868e-09,
|
|
"logits/chosen": -0.9242057800292969,
|
|
"logits/rejected": -0.9570740461349487,
|
|
"logps/chosen": -824.4931640625,
|
|
"logps/ref_chosen": -65.68924713134766,
|
|
"logps/ref_rejected": -110.24205017089844,
|
|
"logps/rejected": -1310.9942626953125,
|
|
"loss": 1.1366,
|
|
"margin_dpo/margin_mean": 441.9483337402344,
|
|
"margin_dpo/margin_std": 868.3840942382812,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9441997063142438,
|
|
"fcm_dpo/beta": 0.0010298211127519608,
|
|
"fcm_dpo/delta": 0.047456324100494385,
|
|
"fcm_dpo/margin": 343.14263916015625,
|
|
"fcm_dpo/q_t": 0.41927698254585266,
|
|
"grad_norm": 44.1935920715332,
|
|
"learning_rate": 4.993270631642038e-09,
|
|
"logits/chosen": -1.0508840084075928,
|
|
"logits/rejected": -1.0492839813232422,
|
|
"logps/chosen": -730.5237426757812,
|
|
"logps/ref_chosen": -51.94999694824219,
|
|
"logps/ref_rejected": -87.46833801269531,
|
|
"logps/rejected": -1109.1846923828125,
|
|
"loss": 1.1361,
|
|
"margin_dpo/margin_mean": 343.14263916015625,
|
|
"margin_dpo/margin_std": 513.9169311523438,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9456681350954479,
|
|
"fcm_dpo/beta": 0.0010451602283865213,
|
|
"fcm_dpo/delta": 0.03396453708410263,
|
|
"fcm_dpo/margin": 351.423583984375,
|
|
"fcm_dpo/q_t": 0.42045366764068604,
|
|
"grad_norm": 67.2789306640625,
|
|
"learning_rate": 4.741290495811873e-09,
|
|
"logits/chosen": -0.9379393458366394,
|
|
"logits/rejected": -0.9444572329521179,
|
|
"logps/chosen": -730.1318359375,
|
|
"logps/ref_chosen": -59.017662048339844,
|
|
"logps/ref_rejected": -87.13668823242188,
|
|
"logps/rejected": -1109.67431640625,
|
|
"loss": 1.1793,
|
|
"margin_dpo/margin_mean": 351.4235534667969,
|
|
"margin_dpo/margin_std": 714.7095947265625,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.947136563876652,
|
|
"fcm_dpo/beta": 0.0010768567444756627,
|
|
"fcm_dpo/delta": 0.21616601943969727,
|
|
"fcm_dpo/margin": 175.01731872558594,
|
|
"fcm_dpo/q_t": 0.46020662784576416,
|
|
"grad_norm": 97.86798858642578,
|
|
"learning_rate": 4.495773155069299e-09,
|
|
"logits/chosen": -0.9184206128120422,
|
|
"logits/rejected": -0.9053988456726074,
|
|
"logps/chosen": -743.8544921875,
|
|
"logps/ref_chosen": -55.87602233886719,
|
|
"logps/ref_rejected": -97.78080749511719,
|
|
"logps/rejected": -960.776611328125,
|
|
"loss": 1.3232,
|
|
"margin_dpo/margin_mean": 175.017333984375,
|
|
"margin_dpo/margin_std": 630.8798828125,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9486049926578561,
|
|
"fcm_dpo/beta": 0.0011009529698640108,
|
|
"fcm_dpo/delta": 0.07241631299257278,
|
|
"fcm_dpo/margin": 299.66253662109375,
|
|
"fcm_dpo/q_t": 0.4266805648803711,
|
|
"grad_norm": 62.45351791381836,
|
|
"learning_rate": 4.256725079024553e-09,
|
|
"logits/chosen": -0.9712661504745483,
|
|
"logits/rejected": -0.9501047730445862,
|
|
"logps/chosen": -719.6700439453125,
|
|
"logps/ref_chosen": -61.275787353515625,
|
|
"logps/ref_rejected": -77.50580596923828,
|
|
"logps/rejected": -1035.5626220703125,
|
|
"loss": 1.1724,
|
|
"margin_dpo/margin_mean": 299.66253662109375,
|
|
"margin_dpo/margin_std": 550.3168334960938,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9500734214390602,
|
|
"fcm_dpo/beta": 0.0011067369487136602,
|
|
"fcm_dpo/delta": -0.017161661759018898,
|
|
"fcm_dpo/margin": 376.28173828125,
|
|
"fcm_dpo/q_t": 0.4051622450351715,
|
|
"grad_norm": 33.93653106689453,
|
|
"learning_rate": 4.024152566816791e-09,
|
|
"logits/chosen": -0.8960561752319336,
|
|
"logits/rejected": -0.9237020015716553,
|
|
"logps/chosen": -640.0201416015625,
|
|
"logps/ref_chosen": -54.8524169921875,
|
|
"logps/ref_rejected": -93.5194091796875,
|
|
"logps/rejected": -1054.9688720703125,
|
|
"loss": 1.0902,
|
|
"margin_dpo/margin_mean": 376.28173828125,
|
|
"margin_dpo/margin_std": 518.373779296875,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9515418502202643,
|
|
"fcm_dpo/beta": 0.001069669146090746,
|
|
"fcm_dpo/delta": -0.2267727255821228,
|
|
"fcm_dpo/margin": 573.0113525390625,
|
|
"fcm_dpo/q_t": 0.3707248568534851,
|
|
"grad_norm": 32.7856559753418,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": -0.9845846891403198,
|
|
"logits/rejected": -1.0428485870361328,
|
|
"logps/chosen": -663.8997802734375,
|
|
"logps/ref_chosen": -54.17146682739258,
|
|
"logps/ref_rejected": -98.7127914428711,
|
|
"logps/rejected": -1281.452392578125,
|
|
"loss": 0.996,
|
|
"margin_dpo/margin_mean": 573.0113525390625,
|
|
"margin_dpo/margin_std": 760.2608032226562,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.9530102790014684,
|
|
"fcm_dpo/beta": 0.0010775276459753513,
|
|
"fcm_dpo/delta": 0.11305912584066391,
|
|
"fcm_dpo/margin": 268.9691162109375,
|
|
"fcm_dpo/q_t": 0.43929412961006165,
|
|
"grad_norm": 36.04080581665039,
|
|
"learning_rate": 3.5784585771215235e-09,
|
|
"logits/chosen": -0.995445191860199,
|
|
"logits/rejected": -0.9858365654945374,
|
|
"logps/chosen": -715.2255859375,
|
|
"logps/ref_chosen": -62.480350494384766,
|
|
"logps/ref_rejected": -80.07717895507812,
|
|
"logps/rejected": -1001.79150390625,
|
|
"loss": 1.2242,
|
|
"margin_dpo/margin_mean": 268.9690856933594,
|
|
"margin_dpo/margin_std": 619.8480224609375,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.9544787077826725,
|
|
"fcm_dpo/beta": 0.0010704685701057315,
|
|
"fcm_dpo/delta": -0.05206456780433655,
|
|
"fcm_dpo/margin": 419.9454345703125,
|
|
"fcm_dpo/q_t": 0.40182268619537354,
|
|
"grad_norm": 39.28509521484375,
|
|
"learning_rate": 3.3653488440851253e-09,
|
|
"logits/chosen": -0.9378179311752319,
|
|
"logits/rejected": -0.9557840824127197,
|
|
"logps/chosen": -739.037109375,
|
|
"logps/ref_chosen": -56.09281921386719,
|
|
"logps/ref_rejected": -98.26483917236328,
|
|
"logps/rejected": -1201.1546630859375,
|
|
"loss": 1.1067,
|
|
"margin_dpo/margin_mean": 419.9454345703125,
|
|
"margin_dpo/margin_std": 684.6904296875,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9559471365638766,
|
|
"fcm_dpo/beta": 0.001042212126776576,
|
|
"fcm_dpo/delta": -0.16722121834754944,
|
|
"fcm_dpo/margin": 535.0650634765625,
|
|
"fcm_dpo/q_t": 0.3747968077659607,
|
|
"grad_norm": 39.4487419128418,
|
|
"learning_rate": 3.158738163478475e-09,
|
|
"logits/chosen": -0.9745798110961914,
|
|
"logits/rejected": -1.02805757522583,
|
|
"logps/chosen": -506.2451171875,
|
|
"logps/ref_chosen": -43.42544937133789,
|
|
"logps/ref_rejected": -99.95791625976562,
|
|
"logps/rejected": -1097.842529296875,
|
|
"loss": 0.985,
|
|
"margin_dpo/margin_mean": 535.0650634765625,
|
|
"margin_dpo/margin_std": 584.2362060546875,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9574155653450808,
|
|
"fcm_dpo/beta": 0.0010319831781089306,
|
|
"fcm_dpo/delta": -0.022920312359929085,
|
|
"fcm_dpo/margin": 408.8804626464844,
|
|
"fcm_dpo/q_t": 0.4074384272098541,
|
|
"grad_norm": 37.125572204589844,
|
|
"learning_rate": 2.9586319796851555e-09,
|
|
"logits/chosen": -0.9512811303138733,
|
|
"logits/rejected": -0.9735857248306274,
|
|
"logps/chosen": -662.72412109375,
|
|
"logps/ref_chosen": -62.57680892944336,
|
|
"logps/ref_rejected": -111.76779174804688,
|
|
"logps/rejected": -1120.7955322265625,
|
|
"loss": 1.1193,
|
|
"margin_dpo/margin_mean": 408.88043212890625,
|
|
"margin_dpo/margin_std": 690.8095092773438,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9588839941262849,
|
|
"fcm_dpo/beta": 0.001033414271660149,
|
|
"fcm_dpo/delta": 0.01614570990204811,
|
|
"fcm_dpo/margin": 372.00970458984375,
|
|
"fcm_dpo/q_t": 0.41556644439697266,
|
|
"grad_norm": 43.73779296875,
|
|
"learning_rate": 2.7650355656892166e-09,
|
|
"logits/chosen": -1.027014970779419,
|
|
"logits/rejected": -1.0478019714355469,
|
|
"logps/chosen": -775.9241943359375,
|
|
"logps/ref_chosen": -61.11295700073242,
|
|
"logps/ref_rejected": -103.24960327148438,
|
|
"logps/rejected": -1190.070556640625,
|
|
"loss": 1.1355,
|
|
"margin_dpo/margin_mean": 372.00970458984375,
|
|
"margin_dpo/margin_std": 633.2871704101562,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.960352422907489,
|
|
"fcm_dpo/beta": 0.0010394034907221794,
|
|
"fcm_dpo/delta": 0.044013820588588715,
|
|
"fcm_dpo/margin": 344.02301025390625,
|
|
"fcm_dpo/q_t": 0.42064782977104187,
|
|
"grad_norm": 36.298988342285156,
|
|
"learning_rate": 2.577954022936174e-09,
|
|
"logits/chosen": -1.0113834142684937,
|
|
"logits/rejected": -1.0238120555877686,
|
|
"logps/chosen": -716.1892700195312,
|
|
"logps/ref_chosen": -61.7281379699707,
|
|
"logps/ref_rejected": -98.7738037109375,
|
|
"logps/rejected": -1097.2579345703125,
|
|
"loss": 1.1403,
|
|
"margin_dpo/margin_mean": 344.02301025390625,
|
|
"margin_dpo/margin_std": 565.0518798828125,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9618208516886931,
|
|
"fcm_dpo/beta": 0.001045609824359417,
|
|
"fcm_dpo/delta": 0.020625807344913483,
|
|
"fcm_dpo/margin": 363.5751647949219,
|
|
"fcm_dpo/q_t": 0.41563743352890015,
|
|
"grad_norm": 32.227134704589844,
|
|
"learning_rate": 2.397392281198729e-09,
|
|
"logits/chosen": -0.9684833288192749,
|
|
"logits/rejected": -1.0073820352554321,
|
|
"logps/chosen": -658.065673828125,
|
|
"logps/ref_chosen": -49.576812744140625,
|
|
"logps/ref_rejected": -98.29183197021484,
|
|
"logps/rejected": -1070.3558349609375,
|
|
"loss": 1.1311,
|
|
"margin_dpo/margin_mean": 363.57513427734375,
|
|
"margin_dpo/margin_std": 596.8692626953125,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9632892804698973,
|
|
"fcm_dpo/beta": 0.0010109146824106574,
|
|
"fcm_dpo/delta": -0.2578379213809967,
|
|
"fcm_dpo/margin": 634.94873046875,
|
|
"fcm_dpo/q_t": 0.3600732684135437,
|
|
"grad_norm": 76.16907501220703,
|
|
"learning_rate": 2.223355098446622e-09,
|
|
"logits/chosen": -0.8919037580490112,
|
|
"logits/rejected": -0.9556556344032288,
|
|
"logps/chosen": -714.67626953125,
|
|
"logps/ref_chosen": -52.54943084716797,
|
|
"logps/ref_rejected": -113.67464447021484,
|
|
"logps/rejected": -1410.750244140625,
|
|
"loss": 0.9459,
|
|
"margin_dpo/margin_mean": 634.9487915039062,
|
|
"margin_dpo/margin_std": 687.5736083984375,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9647577092511013,
|
|
"fcm_dpo/beta": 0.0009771925397217274,
|
|
"fcm_dpo/delta": -0.10524962842464447,
|
|
"fcm_dpo/margin": 511.46331787109375,
|
|
"fcm_dpo/q_t": 0.3879910707473755,
|
|
"grad_norm": 39.690528869628906,
|
|
"learning_rate": 2.055847060721566e-09,
|
|
"logits/chosen": -1.0206704139709473,
|
|
"logits/rejected": -1.0610435009002686,
|
|
"logps/chosen": -666.410888671875,
|
|
"logps/ref_chosen": -46.700538635253906,
|
|
"logps/ref_rejected": -97.91487121582031,
|
|
"logps/rejected": -1229.0885009765625,
|
|
"loss": 1.0394,
|
|
"margin_dpo/margin_mean": 511.46331787109375,
|
|
"margin_dpo/margin_std": 661.5406494140625,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9662261380323054,
|
|
"fcm_dpo/beta": 0.0009689409052953124,
|
|
"fcm_dpo/delta": 0.006780218333005905,
|
|
"fcm_dpo/margin": 405.5053405761719,
|
|
"fcm_dpo/q_t": 0.409820556640625,
|
|
"grad_norm": 38.81819534301758,
|
|
"learning_rate": 1.8948725820160662e-09,
|
|
"logits/chosen": -0.9818142652511597,
|
|
"logits/rejected": -1.0077033042907715,
|
|
"logps/chosen": -744.4568481445312,
|
|
"logps/ref_chosen": -60.95820999145508,
|
|
"logps/ref_rejected": -95.93949127197266,
|
|
"logps/rejected": -1184.943359375,
|
|
"loss": 1.1084,
|
|
"margin_dpo/margin_mean": 405.5053405761719,
|
|
"margin_dpo/margin_std": 579.2117919921875,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9676945668135095,
|
|
"fcm_dpo/beta": 0.0009741444955579937,
|
|
"fcm_dpo/delta": 0.01644733175635338,
|
|
"fcm_dpo/margin": 394.2469482421875,
|
|
"fcm_dpo/q_t": 0.41280660033226013,
|
|
"grad_norm": 38.62456512451172,
|
|
"learning_rate": 1.7404359041573723e-09,
|
|
"logits/chosen": -0.9040530920028687,
|
|
"logits/rejected": -0.8700916767120361,
|
|
"logps/chosen": -667.4215698242188,
|
|
"logps/ref_chosen": -76.74298095703125,
|
|
"logps/ref_rejected": -87.4709701538086,
|
|
"logps/rejected": -1072.396484375,
|
|
"loss": 1.1053,
|
|
"margin_dpo/margin_mean": 394.2469482421875,
|
|
"margin_dpo/margin_std": 549.975830078125,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9691629955947136,
|
|
"fcm_dpo/beta": 0.0009657872142270207,
|
|
"fcm_dpo/delta": -0.09469971060752869,
|
|
"fcm_dpo/margin": 507.5157470703125,
|
|
"fcm_dpo/q_t": 0.3888697624206543,
|
|
"grad_norm": 44.33787536621094,
|
|
"learning_rate": 1.592541096695571e-09,
|
|
"logits/chosen": -0.9871212840080261,
|
|
"logits/rejected": -1.0068674087524414,
|
|
"logps/chosen": -680.8031005859375,
|
|
"logps/ref_chosen": -59.04788589477539,
|
|
"logps/ref_rejected": -75.96005249023438,
|
|
"logps/rejected": -1205.23095703125,
|
|
"loss": 1.0367,
|
|
"margin_dpo/margin_mean": 507.5157470703125,
|
|
"margin_dpo/margin_std": 621.5484619140625,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.9706314243759178,
|
|
"fcm_dpo/beta": 0.0009580702753737569,
|
|
"fcm_dpo/delta": -0.03481549024581909,
|
|
"fcm_dpo/margin": 452.1288757324219,
|
|
"fcm_dpo/q_t": 0.40379828214645386,
|
|
"grad_norm": 48.46449279785156,
|
|
"learning_rate": 1.4511920567963908e-09,
|
|
"logits/chosen": -0.9898974895477295,
|
|
"logits/rejected": -1.0011284351348877,
|
|
"logps/chosen": -606.3502197265625,
|
|
"logps/ref_chosen": -50.673973083496094,
|
|
"logps/ref_rejected": -86.00569152832031,
|
|
"logps/rejected": -1093.810791015625,
|
|
"loss": 1.0762,
|
|
"margin_dpo/margin_mean": 452.12884521484375,
|
|
"margin_dpo/margin_std": 627.0391235351562,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.9720998531571219,
|
|
"fcm_dpo/beta": 0.000960524077527225,
|
|
"fcm_dpo/delta": 0.052273206412792206,
|
|
"fcm_dpo/margin": 363.91009521484375,
|
|
"fcm_dpo/q_t": 0.42221707105636597,
|
|
"grad_norm": 28.411582946777344,
|
|
"learning_rate": 1.3163925091384532e-09,
|
|
"logits/chosen": -0.9227521419525146,
|
|
"logits/rejected": -0.9219968318939209,
|
|
"logps/chosen": -726.12939453125,
|
|
"logps/ref_chosen": -69.26106262207031,
|
|
"logps/ref_rejected": -89.05593872070312,
|
|
"logps/rejected": -1109.834228515625,
|
|
"loss": 1.1608,
|
|
"margin_dpo/margin_mean": 363.9100341796875,
|
|
"margin_dpo/margin_std": 666.9813232421875,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.973568281938326,
|
|
"fcm_dpo/beta": 0.0009597113821655512,
|
|
"fcm_dpo/delta": -0.011007752269506454,
|
|
"fcm_dpo/margin": 427.71435546875,
|
|
"fcm_dpo/q_t": 0.41027143597602844,
|
|
"grad_norm": 28.180540084838867,
|
|
"learning_rate": 1.1881460058152382e-09,
|
|
"logits/chosen": -1.013875961303711,
|
|
"logits/rejected": -1.0345901250839233,
|
|
"logps/chosen": -687.0326538085938,
|
|
"logps/ref_chosen": -64.87890625,
|
|
"logps/ref_rejected": -113.92536926269531,
|
|
"logps/rejected": -1163.79345703125,
|
|
"loss": 1.1218,
|
|
"margin_dpo/margin_mean": 427.7143859863281,
|
|
"margin_dpo/margin_std": 725.150634765625,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.9750367107195301,
|
|
"fcm_dpo/beta": 0.000947619671933353,
|
|
"fcm_dpo/delta": -0.06448528170585632,
|
|
"fcm_dpo/margin": 486.3853759765625,
|
|
"fcm_dpo/q_t": 0.3969351351261139,
|
|
"grad_norm": 32.270606994628906,
|
|
"learning_rate": 1.066455926241383e-09,
|
|
"logits/chosen": -0.9712049961090088,
|
|
"logits/rejected": -1.0025453567504883,
|
|
"logps/chosen": -725.9244384765625,
|
|
"logps/ref_chosen": -60.88847351074219,
|
|
"logps/ref_rejected": -105.521728515625,
|
|
"logps/rejected": -1256.943115234375,
|
|
"loss": 1.0631,
|
|
"margin_dpo/margin_mean": 486.3853454589844,
|
|
"margin_dpo/margin_std": 648.6405639648438,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.9765051395007343,
|
|
"fcm_dpo/beta": 0.0009507514769211411,
|
|
"fcm_dpo/delta": 0.013285509310662746,
|
|
"fcm_dpo/margin": 407.2781677246094,
|
|
"fcm_dpo/q_t": 0.41020679473876953,
|
|
"grad_norm": 47.90570831298828,
|
|
"learning_rate": 9.513254770636137e-10,
|
|
"logits/chosen": -1.0707690715789795,
|
|
"logits/rejected": -1.0900096893310547,
|
|
"logps/chosen": -645.716552734375,
|
|
"logps/ref_chosen": -60.56413269042969,
|
|
"logps/ref_rejected": -84.80882263183594,
|
|
"logps/rejected": -1077.239501953125,
|
|
"loss": 1.0876,
|
|
"margin_dpo/margin_mean": 407.2781677246094,
|
|
"margin_dpo/margin_std": 489.22955322265625,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.9779735682819384,
|
|
"fcm_dpo/beta": 0.0009521855972707272,
|
|
"fcm_dpo/delta": 0.014441363513469696,
|
|
"fcm_dpo/margin": 405.50140380859375,
|
|
"fcm_dpo/q_t": 0.412578284740448,
|
|
"grad_norm": 29.04341697692871,
|
|
"learning_rate": 8.427576920763956e-10,
|
|
"logits/chosen": -0.878127932548523,
|
|
"logits/rejected": -0.8861696124076843,
|
|
"logps/chosen": -688.6695556640625,
|
|
"logps/ref_chosen": -64.41996002197266,
|
|
"logps/ref_rejected": -95.8916244506836,
|
|
"logps/rejected": -1125.642578125,
|
|
"loss": 1.1069,
|
|
"margin_dpo/margin_mean": 405.50140380859375,
|
|
"margin_dpo/margin_std": 567.3914184570312,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.9794419970631424,
|
|
"fcm_dpo/beta": 0.0009428112534806132,
|
|
"fcm_dpo/delta": -0.06218276545405388,
|
|
"fcm_dpo/margin": 486.9377136230469,
|
|
"fcm_dpo/q_t": 0.3959430158138275,
|
|
"grad_norm": 45.73838424682617,
|
|
"learning_rate": 7.407554321417764e-10,
|
|
"logits/chosen": -0.9214434623718262,
|
|
"logits/rejected": -0.9194406867027283,
|
|
"logps/chosen": -773.194091796875,
|
|
"logps/ref_chosen": -69.27702331542969,
|
|
"logps/ref_rejected": -87.83549499511719,
|
|
"logps/rejected": -1278.6903076171875,
|
|
"loss": 1.0586,
|
|
"margin_dpo/margin_mean": 486.937744140625,
|
|
"margin_dpo/margin_std": 627.3309326171875,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.9809104258443465,
|
|
"fcm_dpo/beta": 0.0009598816395737231,
|
|
"fcm_dpo/delta": 0.09079144895076752,
|
|
"fcm_dpo/margin": 324.0137939453125,
|
|
"fcm_dpo/q_t": 0.4328286647796631,
|
|
"grad_norm": 54.188987731933594,
|
|
"learning_rate": 6.453213851142225e-10,
|
|
"logits/chosen": -0.9863263964653015,
|
|
"logits/rejected": -0.9880591630935669,
|
|
"logps/chosen": -829.204345703125,
|
|
"logps/ref_chosen": -72.60400390625,
|
|
"logps/ref_rejected": -103.73905944824219,
|
|
"logps/rejected": -1184.353271484375,
|
|
"loss": 1.2095,
|
|
"margin_dpo/margin_mean": 324.0137634277344,
|
|
"margin_dpo/margin_std": 717.3749389648438,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.9823788546255506,
|
|
"fcm_dpo/beta": 0.0009502802276983857,
|
|
"fcm_dpo/delta": -0.07527793198823929,
|
|
"fcm_dpo/margin": 496.47698974609375,
|
|
"fcm_dpo/q_t": 0.3927857577800751,
|
|
"grad_norm": 28.336467742919922,
|
|
"learning_rate": 5.564580657695939e-10,
|
|
"logits/chosen": -0.9510325193405151,
|
|
"logits/rejected": -0.9615212678909302,
|
|
"logps/chosen": -596.826416015625,
|
|
"logps/ref_chosen": -46.116416931152344,
|
|
"logps/ref_rejected": -77.92434692382812,
|
|
"logps/rejected": -1125.111328125,
|
|
"loss": 1.0523,
|
|
"margin_dpo/margin_mean": 496.47698974609375,
|
|
"margin_dpo/margin_std": 634.5511474609375,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.9838472834067548,
|
|
"fcm_dpo/beta": 0.0009398977854289114,
|
|
"fcm_dpo/delta": -0.06452971696853638,
|
|
"fcm_dpo/margin": 490.96636962890625,
|
|
"fcm_dpo/q_t": 0.3954104483127594,
|
|
"grad_norm": 30.532726287841797,
|
|
"learning_rate": 4.741678157389739e-10,
|
|
"logits/chosen": -0.8932855129241943,
|
|
"logits/rejected": -0.9082292318344116,
|
|
"logps/chosen": -611.8515625,
|
|
"logps/ref_chosen": -62.34575271606445,
|
|
"logps/ref_rejected": -96.9405517578125,
|
|
"logps/rejected": -1137.412841796875,
|
|
"loss": 1.0638,
|
|
"margin_dpo/margin_mean": 490.96636962890625,
|
|
"margin_dpo/margin_std": 644.054931640625,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.9853157121879589,
|
|
"fcm_dpo/beta": 0.0009400290437042713,
|
|
"fcm_dpo/delta": 0.02662300132215023,
|
|
"fcm_dpo/margin": 397.8563232421875,
|
|
"fcm_dpo/q_t": 0.4140617549419403,
|
|
"grad_norm": 37.733253479003906,
|
|
"learning_rate": 3.9845280344705245e-10,
|
|
"logits/chosen": -1.0189416408538818,
|
|
"logits/rejected": -1.0476462841033936,
|
|
"logps/chosen": -758.807373046875,
|
|
"logps/ref_chosen": -48.00010681152344,
|
|
"logps/ref_rejected": -83.81932067871094,
|
|
"logps/rejected": -1192.48291015625,
|
|
"loss": 1.1326,
|
|
"margin_dpo/margin_mean": 397.8563232421875,
|
|
"margin_dpo/margin_std": 642.0556640625,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.986784140969163,
|
|
"fcm_dpo/beta": 0.0009393775835633278,
|
|
"fcm_dpo/delta": 0.019410815089941025,
|
|
"fcm_dpo/margin": 405.93304443359375,
|
|
"fcm_dpo/q_t": 0.4160517454147339,
|
|
"grad_norm": 52.242919921875,
|
|
"learning_rate": 3.293150240547549e-10,
|
|
"logits/chosen": -1.0485416650772095,
|
|
"logits/rejected": -1.0544018745422363,
|
|
"logps/chosen": -830.88720703125,
|
|
"logps/ref_chosen": -58.58328628540039,
|
|
"logps/ref_rejected": -93.14015197753906,
|
|
"logps/rejected": -1271.3770751953125,
|
|
"loss": 1.145,
|
|
"margin_dpo/margin_mean": 405.9330139160156,
|
|
"margin_dpo/margin_std": 711.9027099609375,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.9882525697503671,
|
|
"fcm_dpo/beta": 0.0009472850942984223,
|
|
"fcm_dpo/delta": 0.04647299647331238,
|
|
"fcm_dpo/margin": 374.96588134765625,
|
|
"fcm_dpo/q_t": 0.41997867822647095,
|
|
"grad_norm": 32.77516555786133,
|
|
"learning_rate": 2.6675629940689504e-10,
|
|
"logits/chosen": -0.9867175817489624,
|
|
"logits/rejected": -0.9888732433319092,
|
|
"logps/chosen": -728.9349365234375,
|
|
"logps/ref_chosen": -46.72320556640625,
|
|
"logps/ref_rejected": -85.29623413085938,
|
|
"logps/rejected": -1142.473876953125,
|
|
"loss": 1.1382,
|
|
"margin_dpo/margin_mean": 374.96588134765625,
|
|
"margin_dpo/margin_std": 617.45556640625,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.9897209985315712,
|
|
"fcm_dpo/beta": 0.0009443633025512099,
|
|
"fcm_dpo/delta": -0.0580584779381752,
|
|
"fcm_dpo/margin": 482.2635498046875,
|
|
"fcm_dpo/q_t": 0.39911842346191406,
|
|
"grad_norm": 40.93299865722656,
|
|
"learning_rate": 2.1077827798404725e-10,
|
|
"logits/chosen": -0.9459275007247925,
|
|
"logits/rejected": -0.9650304913520813,
|
|
"logps/chosen": -617.8692016601562,
|
|
"logps/ref_chosen": -45.445526123046875,
|
|
"logps/ref_rejected": -70.04593658447266,
|
|
"logps/rejected": -1124.733154296875,
|
|
"loss": 1.0626,
|
|
"margin_dpo/margin_mean": 482.26348876953125,
|
|
"margin_dpo/margin_std": 641.1407470703125,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.9911894273127754,
|
|
"fcm_dpo/beta": 0.0009247527341358364,
|
|
"fcm_dpo/delta": -0.05846470221877098,
|
|
"fcm_dpo/margin": 491.4967956542969,
|
|
"fcm_dpo/q_t": 0.39873215556144714,
|
|
"grad_norm": 28.94685935974121,
|
|
"learning_rate": 1.6138243485910863e-10,
|
|
"logits/chosen": -0.9571169018745422,
|
|
"logits/rejected": -0.9678725004196167,
|
|
"logps/chosen": -673.1968994140625,
|
|
"logps/ref_chosen": -44.17628479003906,
|
|
"logps/ref_rejected": -74.09197998046875,
|
|
"logps/rejected": -1194.609375,
|
|
"loss": 1.0628,
|
|
"margin_dpo/margin_mean": 491.4967956542969,
|
|
"margin_dpo/margin_std": 623.8955078125,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.9926578560939795,
|
|
"fcm_dpo/beta": 0.0009249091381207108,
|
|
"fcm_dpo/delta": -0.03686067834496498,
|
|
"fcm_dpo/margin": 470.5931091308594,
|
|
"fcm_dpo/q_t": 0.40108269453048706,
|
|
"grad_norm": 26.681962966918945,
|
|
"learning_rate": 1.1857007165852472e-10,
|
|
"logits/chosen": -0.9203785061836243,
|
|
"logits/rejected": -0.9312641620635986,
|
|
"logps/chosen": -757.725830078125,
|
|
"logps/ref_chosen": -71.39852905273438,
|
|
"logps/ref_rejected": -88.3587646484375,
|
|
"logps/rejected": -1245.279052734375,
|
|
"loss": 1.0637,
|
|
"margin_dpo/margin_mean": 470.59307861328125,
|
|
"margin_dpo/margin_std": 574.8800048828125,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.9941262848751835,
|
|
"fcm_dpo/beta": 0.0009184239897876978,
|
|
"fcm_dpo/delta": -0.015428077429533005,
|
|
"fcm_dpo/margin": 451.59100341796875,
|
|
"fcm_dpo/q_t": 0.4104122519493103,
|
|
"grad_norm": 28.66457176208496,
|
|
"learning_rate": 8.23423165278725e-11,
|
|
"logits/chosen": -0.9908117055892944,
|
|
"logits/rejected": -0.9794431328773499,
|
|
"logps/chosen": -754.1339111328125,
|
|
"logps/ref_chosen": -56.527435302734375,
|
|
"logps/ref_rejected": -78.22654724121094,
|
|
"logps/rejected": -1227.424072265625,
|
|
"loss": 1.1016,
|
|
"margin_dpo/margin_mean": 451.59100341796875,
|
|
"margin_dpo/margin_std": 696.70068359375,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.9955947136563876,
|
|
"fcm_dpo/beta": 0.0009117278386838734,
|
|
"fcm_dpo/delta": -0.07986919581890106,
|
|
"fcm_dpo/margin": 522.123779296875,
|
|
"fcm_dpo/q_t": 0.39320850372314453,
|
|
"grad_norm": 41.66019821166992,
|
|
"learning_rate": 5.270012410216185e-11,
|
|
"logits/chosen": -0.915215015411377,
|
|
"logits/rejected": -0.947140097618103,
|
|
"logps/chosen": -621.6987915039062,
|
|
"logps/ref_chosen": -46.13447570800781,
|
|
"logps/ref_rejected": -80.60462951660156,
|
|
"logps/rejected": -1178.292724609375,
|
|
"loss": 1.0567,
|
|
"margin_dpo/margin_mean": 522.123779296875,
|
|
"margin_dpo/margin_std": 695.4920654296875,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.9970631424375918,
|
|
"fcm_dpo/beta": 0.0009118493180721998,
|
|
"fcm_dpo/delta": 0.064061738550663,
|
|
"fcm_dpo/margin": 370.836669921875,
|
|
"fcm_dpo/q_t": 0.42350929975509644,
|
|
"grad_norm": 31.21086883544922,
|
|
"learning_rate": 2.9644275480772416e-11,
|
|
"logits/chosen": -0.9314343929290771,
|
|
"logits/rejected": -0.9189698696136475,
|
|
"logps/chosen": -707.2667236328125,
|
|
"logps/ref_chosen": -50.294921875,
|
|
"logps/ref_rejected": -76.59813690185547,
|
|
"logps/rejected": -1104.4066162109375,
|
|
"loss": 1.1437,
|
|
"margin_dpo/margin_mean": 370.836669921875,
|
|
"margin_dpo/margin_std": 591.001220703125,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.9985315712187959,
|
|
"fcm_dpo/beta": 0.0009041996672749519,
|
|
"fcm_dpo/delta": -0.052669595927000046,
|
|
"fcm_dpo/margin": 497.2313537597656,
|
|
"fcm_dpo/q_t": 0.39771518111228943,
|
|
"grad_norm": 35.252685546875,
|
|
"learning_rate": 1.31753782067201e-11,
|
|
"logits/chosen": -0.9416483640670776,
|
|
"logits/rejected": -0.9650506973266602,
|
|
"logps/chosen": -763.13818359375,
|
|
"logps/ref_chosen": -76.91569519042969,
|
|
"logps/ref_rejected": -112.384765625,
|
|
"logps/rejected": -1295.838623046875,
|
|
"loss": 1.0844,
|
|
"margin_dpo/margin_mean": 497.2313537597656,
|
|
"margin_dpo/margin_std": 722.805419921875,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"fcm_dpo/beta": 0.000918180332519114,
|
|
"fcm_dpo/delta": 0.040153294801712036,
|
|
"fcm_dpo/margin": 392.08013916015625,
|
|
"fcm_dpo/q_t": 0.4184055030345917,
|
|
"grad_norm": 39.58386993408203,
|
|
"learning_rate": 3.2938662507808745e-12,
|
|
"logits/chosen": -1.003857135772705,
|
|
"logits/rejected": -1.0180325508117676,
|
|
"logps/chosen": -744.2374267578125,
|
|
"logps/ref_chosen": -60.957279205322266,
|
|
"logps/ref_rejected": -88.55797576904297,
|
|
"logps/rejected": -1163.918212890625,
|
|
"loss": 1.1386,
|
|
"margin_dpo/margin_mean": 392.08013916015625,
|
|
"margin_dpo/margin_std": 609.6609497070312,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 681,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.0910053594522013,
|
|
"train_runtime": 1869.3984,
|
|
"train_samples_per_second": 23.322,
|
|
"train_steps_per_second": 0.364
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 681,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|