Model: W-61/llama3-8b-base-new-method-s_star0.6-20260425-180936 Source: Original Platform
9141 lines
335 KiB
JSON
9141 lines
335 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9989528795811519,
|
|
"eval_steps": 200,
|
|
"global_step": 477,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0020942408376963353,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02793481945991516,
|
|
"fcm_dpo/q_t": 0.500069797039032,
|
|
"grad_norm": 28.592390060424805,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.5898098945617676,
|
|
"logits/rejected": -0.604260265827179,
|
|
"logps/chosen": -275.28570556640625,
|
|
"logps/ref_chosen": -275.2312927246094,
|
|
"logps/ref_rejected": -222.9380340576172,
|
|
"logps/rejected": -222.96453857421875,
|
|
"loss": 5.5463,
|
|
"margin_dpo/margin_mean": -0.02793477475643158,
|
|
"margin_dpo/margin_std": 0.5724214911460876,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.004188481675392671,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.014312177896499634,
|
|
"fcm_dpo/q_t": 0.4999642074108124,
|
|
"grad_norm": 27.881120681762695,
|
|
"learning_rate": 1.0416666666666666e-08,
|
|
"logits/chosen": -0.6574729681015015,
|
|
"logits/rejected": -0.6464410424232483,
|
|
"logps/chosen": -264.7165222167969,
|
|
"logps/ref_chosen": -264.7611083984375,
|
|
"logps/ref_rejected": -242.5597686767578,
|
|
"logps/rejected": -242.52951049804688,
|
|
"loss": 5.5446,
|
|
"margin_dpo/margin_mean": 0.014312252402305603,
|
|
"margin_dpo/margin_std": 0.6423971652984619,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0062827225130890054,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.029146358370780945,
|
|
"fcm_dpo/q_t": 0.4999271333217621,
|
|
"grad_norm": 25.850038528442383,
|
|
"learning_rate": 2.083333333333333e-08,
|
|
"logits/chosen": -0.6840031743049622,
|
|
"logits/rejected": -0.7351865172386169,
|
|
"logps/chosen": -274.1335754394531,
|
|
"logps/ref_chosen": -274.1018981933594,
|
|
"logps/ref_rejected": -286.5882568359375,
|
|
"logps/rejected": -286.64910888671875,
|
|
"loss": 5.5441,
|
|
"margin_dpo/margin_mean": 0.02914564311504364,
|
|
"margin_dpo/margin_std": 0.7203992605209351,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.008376963350785341,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.10328760743141174,
|
|
"fcm_dpo/q_t": 0.499741792678833,
|
|
"grad_norm": 31.70708656311035,
|
|
"learning_rate": 3.125e-08,
|
|
"logits/chosen": -0.6172086000442505,
|
|
"logits/rejected": -0.6114800572395325,
|
|
"logps/chosen": -329.83612060546875,
|
|
"logps/ref_chosen": -329.8382568359375,
|
|
"logps/ref_rejected": -303.2850646972656,
|
|
"logps/rejected": -303.3861999511719,
|
|
"loss": 5.5411,
|
|
"margin_dpo/margin_mean": 0.10328748822212219,
|
|
"margin_dpo/margin_std": 0.8034393787384033,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.010471204188481676,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.01281556487083435,
|
|
"fcm_dpo/q_t": 0.4999679923057556,
|
|
"grad_norm": 29.54966163635254,
|
|
"learning_rate": 4.166666666666666e-08,
|
|
"logits/chosen": -0.5715648531913757,
|
|
"logits/rejected": -0.587770938873291,
|
|
"logps/chosen": -301.7329406738281,
|
|
"logps/ref_chosen": -301.7389221191406,
|
|
"logps/ref_rejected": -274.7654724121094,
|
|
"logps/rejected": -274.77227783203125,
|
|
"loss": 5.5447,
|
|
"margin_dpo/margin_mean": 0.012814819812774658,
|
|
"margin_dpo/margin_std": 0.8004137277603149,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.012565445026178011,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.05144025385379791,
|
|
"fcm_dpo/q_t": 0.4998714029788971,
|
|
"grad_norm": 28.23720932006836,
|
|
"learning_rate": 5.208333333333333e-08,
|
|
"logits/chosen": -0.6801129579544067,
|
|
"logits/rejected": -0.6429607272148132,
|
|
"logps/chosen": -285.62481689453125,
|
|
"logps/ref_chosen": -285.6946716308594,
|
|
"logps/ref_rejected": -245.8200225830078,
|
|
"logps/rejected": -245.80160522460938,
|
|
"loss": 5.5432,
|
|
"margin_dpo/margin_mean": 0.051440998911857605,
|
|
"margin_dpo/margin_std": 0.691977858543396,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.014659685863874346,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.057578980922698975,
|
|
"fcm_dpo/q_t": 0.5001440048217773,
|
|
"grad_norm": 28.544734954833984,
|
|
"learning_rate": 6.25e-08,
|
|
"logits/chosen": -0.5832664966583252,
|
|
"logits/rejected": -0.6165621280670166,
|
|
"logps/chosen": -264.64544677734375,
|
|
"logps/ref_chosen": -264.65545654296875,
|
|
"logps/ref_rejected": -253.10305786132812,
|
|
"logps/rejected": -253.03549194335938,
|
|
"loss": 5.5475,
|
|
"margin_dpo/margin_mean": -0.05757877230644226,
|
|
"margin_dpo/margin_std": 0.6711597442626953,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.016753926701570682,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09753617644309998,
|
|
"fcm_dpo/q_t": 0.4997561573982239,
|
|
"grad_norm": 30.755247116088867,
|
|
"learning_rate": 7.291666666666667e-08,
|
|
"logits/chosen": -0.6714497804641724,
|
|
"logits/rejected": -0.6773282885551453,
|
|
"logps/chosen": -354.1408996582031,
|
|
"logps/ref_chosen": -354.1887512207031,
|
|
"logps/ref_rejected": -282.9112243652344,
|
|
"logps/rejected": -282.96087646484375,
|
|
"loss": 5.5413,
|
|
"margin_dpo/margin_mean": 0.097537100315094,
|
|
"margin_dpo/margin_std": 0.7466810345649719,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.018848167539267015,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.026623502373695374,
|
|
"fcm_dpo/q_t": 0.5000665783882141,
|
|
"grad_norm": 27.906946182250977,
|
|
"learning_rate": 8.333333333333333e-08,
|
|
"logits/chosen": -0.6355319023132324,
|
|
"logits/rejected": -0.6535608768463135,
|
|
"logps/chosen": -285.5481872558594,
|
|
"logps/ref_chosen": -285.5502014160156,
|
|
"logps/ref_rejected": -267.99664306640625,
|
|
"logps/rejected": -267.9679870605469,
|
|
"loss": 5.5463,
|
|
"margin_dpo/margin_mean": -0.0266236811876297,
|
|
"margin_dpo/margin_std": 0.6391922831535339,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.020942408376963352,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.023563116788864136,
|
|
"fcm_dpo/q_t": 0.4999410808086395,
|
|
"grad_norm": 26.76718521118164,
|
|
"learning_rate": 9.375e-08,
|
|
"logits/chosen": -0.6935949325561523,
|
|
"logits/rejected": -0.6888067722320557,
|
|
"logps/chosen": -251.90386962890625,
|
|
"logps/ref_chosen": -251.91238403320312,
|
|
"logps/ref_rejected": -226.45260620117188,
|
|
"logps/rejected": -226.46763610839844,
|
|
"loss": 5.5443,
|
|
"margin_dpo/margin_mean": 0.0235632061958313,
|
|
"margin_dpo/margin_std": 0.7389193177223206,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.023036649214659685,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03727307915687561,
|
|
"fcm_dpo/q_t": 0.5000931620597839,
|
|
"grad_norm": 28.944982528686523,
|
|
"learning_rate": 1.0416666666666667e-07,
|
|
"logits/chosen": -0.5943973660469055,
|
|
"logits/rejected": -0.6493593454360962,
|
|
"logps/chosen": -301.0625,
|
|
"logps/ref_chosen": -301.08343505859375,
|
|
"logps/ref_rejected": -259.546630859375,
|
|
"logps/rejected": -259.4883728027344,
|
|
"loss": 5.5467,
|
|
"margin_dpo/margin_mean": -0.037272870540618896,
|
|
"margin_dpo/margin_std": 0.7176087498664856,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.025130890052356022,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.10183002054691315,
|
|
"fcm_dpo/q_t": 0.4997454583644867,
|
|
"grad_norm": 30.07319450378418,
|
|
"learning_rate": 1.1458333333333332e-07,
|
|
"logits/chosen": -0.5795747637748718,
|
|
"logits/rejected": -0.5394208431243896,
|
|
"logps/chosen": -287.56854248046875,
|
|
"logps/ref_chosen": -287.548095703125,
|
|
"logps/ref_rejected": -277.37945556640625,
|
|
"logps/rejected": -277.5017395019531,
|
|
"loss": 5.5412,
|
|
"margin_dpo/margin_mean": 0.10182976722717285,
|
|
"margin_dpo/margin_std": 0.6723535060882568,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.027225130890052355,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.15790200233459473,
|
|
"fcm_dpo/q_t": 0.4996052384376526,
|
|
"grad_norm": 27.2218017578125,
|
|
"learning_rate": 1.25e-07,
|
|
"logits/chosen": -0.6672236323356628,
|
|
"logits/rejected": -0.6754846572875977,
|
|
"logps/chosen": -270.6041564941406,
|
|
"logps/ref_chosen": -270.6664123535156,
|
|
"logps/ref_rejected": -274.6546936035156,
|
|
"logps/rejected": -274.7503662109375,
|
|
"loss": 5.5389,
|
|
"margin_dpo/margin_mean": 0.1579025536775589,
|
|
"margin_dpo/margin_std": 0.6908207535743713,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.02931937172774869,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.01676630973815918,
|
|
"fcm_dpo/q_t": 0.4999580979347229,
|
|
"grad_norm": 28.227462768554688,
|
|
"learning_rate": 1.3541666666666666e-07,
|
|
"logits/chosen": -0.623089611530304,
|
|
"logits/rejected": -0.6518293619155884,
|
|
"logps/chosen": -281.58538818359375,
|
|
"logps/ref_chosen": -281.59320068359375,
|
|
"logps/ref_rejected": -263.52215576171875,
|
|
"logps/rejected": -263.53106689453125,
|
|
"loss": 5.5446,
|
|
"margin_dpo/margin_mean": 0.016765296459197998,
|
|
"margin_dpo/margin_std": 0.6453270316123962,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.031413612565445025,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08115784823894501,
|
|
"fcm_dpo/q_t": 0.49979710578918457,
|
|
"grad_norm": 30.37261390686035,
|
|
"learning_rate": 1.4583333333333335e-07,
|
|
"logits/chosen": -0.6461591720581055,
|
|
"logits/rejected": -0.6596108675003052,
|
|
"logps/chosen": -298.36016845703125,
|
|
"logps/ref_chosen": -298.45343017578125,
|
|
"logps/ref_rejected": -227.17832946777344,
|
|
"logps/rejected": -227.16622924804688,
|
|
"loss": 5.542,
|
|
"margin_dpo/margin_mean": 0.08115695416927338,
|
|
"margin_dpo/margin_std": 0.6280770301818848,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.033507853403141365,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1199236512184143,
|
|
"fcm_dpo/q_t": 0.4997002184391022,
|
|
"grad_norm": 30.1366024017334,
|
|
"learning_rate": 1.5624999999999999e-07,
|
|
"logits/chosen": -0.6011725068092346,
|
|
"logits/rejected": -0.5981835722923279,
|
|
"logps/chosen": -293.8862609863281,
|
|
"logps/ref_chosen": -293.96661376953125,
|
|
"logps/ref_rejected": -250.78443908691406,
|
|
"logps/rejected": -250.82400512695312,
|
|
"loss": 5.5404,
|
|
"margin_dpo/margin_mean": 0.11992333829402924,
|
|
"margin_dpo/margin_std": 0.720985472202301,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.0356020942408377,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06940680742263794,
|
|
"fcm_dpo/q_t": 0.49982649087905884,
|
|
"grad_norm": 27.645227432250977,
|
|
"learning_rate": 1.6666666666666665e-07,
|
|
"logits/chosen": -0.558211624622345,
|
|
"logits/rejected": -0.5835133194923401,
|
|
"logps/chosen": -262.30767822265625,
|
|
"logps/ref_chosen": -262.39398193359375,
|
|
"logps/ref_rejected": -248.500244140625,
|
|
"logps/rejected": -248.48330688476562,
|
|
"loss": 5.5424,
|
|
"margin_dpo/margin_mean": 0.06940683722496033,
|
|
"margin_dpo/margin_std": 0.6322791576385498,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.03769633507853403,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.00013044476509094238,
|
|
"fcm_dpo/q_t": 0.5000002980232239,
|
|
"grad_norm": 29.71380043029785,
|
|
"learning_rate": 1.7708333333333334e-07,
|
|
"logits/chosen": -0.612942636013031,
|
|
"logits/rejected": -0.619144856929779,
|
|
"logps/chosen": -293.71783447265625,
|
|
"logps/ref_chosen": -293.709228515625,
|
|
"logps/ref_rejected": -274.5875244140625,
|
|
"logps/rejected": -274.5960388183594,
|
|
"loss": 5.5453,
|
|
"margin_dpo/margin_mean": -0.00012956559658050537,
|
|
"margin_dpo/margin_std": 0.7896002531051636,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.039790575916230364,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.02526429295539856,
|
|
"fcm_dpo/q_t": 0.4999368190765381,
|
|
"grad_norm": 28.150474548339844,
|
|
"learning_rate": 1.875e-07,
|
|
"logits/chosen": -0.6289379000663757,
|
|
"logits/rejected": -0.6254291534423828,
|
|
"logps/chosen": -280.205322265625,
|
|
"logps/ref_chosen": -280.26568603515625,
|
|
"logps/ref_rejected": -259.9742736816406,
|
|
"logps/rejected": -259.93914794921875,
|
|
"loss": 5.5442,
|
|
"margin_dpo/margin_mean": 0.025263652205467224,
|
|
"margin_dpo/margin_std": 0.7644654512405396,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.041884816753926704,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.10411535203456879,
|
|
"fcm_dpo/q_t": 0.49973970651626587,
|
|
"grad_norm": 29.686153411865234,
|
|
"learning_rate": 1.9791666666666664e-07,
|
|
"logits/chosen": -0.622660219669342,
|
|
"logits/rejected": -0.6548238396644592,
|
|
"logps/chosen": -303.71466064453125,
|
|
"logps/ref_chosen": -303.8954162597656,
|
|
"logps/ref_rejected": -260.214599609375,
|
|
"logps/rejected": -260.13800048828125,
|
|
"loss": 5.5411,
|
|
"margin_dpo/margin_mean": 0.10411512851715088,
|
|
"margin_dpo/margin_std": 0.766339898109436,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.04397905759162304,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04293261468410492,
|
|
"fcm_dpo/q_t": 0.4998926520347595,
|
|
"grad_norm": 35.22480392456055,
|
|
"learning_rate": 2.0833333333333333e-07,
|
|
"logits/chosen": -0.6408384442329407,
|
|
"logits/rejected": -0.6681733131408691,
|
|
"logps/chosen": -301.4923095703125,
|
|
"logps/ref_chosen": -301.5334777832031,
|
|
"logps/ref_rejected": -280.28900146484375,
|
|
"logps/rejected": -280.2907409667969,
|
|
"loss": 5.5435,
|
|
"margin_dpo/margin_mean": 0.04293195903301239,
|
|
"margin_dpo/margin_std": 0.8277014493942261,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.04607329842931937,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.030557870864868164,
|
|
"fcm_dpo/q_t": 0.500076413154602,
|
|
"grad_norm": 25.291522979736328,
|
|
"learning_rate": 2.1875e-07,
|
|
"logits/chosen": -0.6586352586746216,
|
|
"logits/rejected": -0.6604381799697876,
|
|
"logps/chosen": -259.9430236816406,
|
|
"logps/ref_chosen": -259.9951477050781,
|
|
"logps/ref_rejected": -243.0721435546875,
|
|
"logps/rejected": -242.98948669433594,
|
|
"loss": 5.5465,
|
|
"margin_dpo/margin_mean": -0.030558019876480103,
|
|
"margin_dpo/margin_std": 0.7162632346153259,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.048167539267015703,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09941744804382324,
|
|
"fcm_dpo/q_t": 0.4997514486312866,
|
|
"grad_norm": 27.887392044067383,
|
|
"learning_rate": 2.2916666666666663e-07,
|
|
"logits/chosen": -0.6176055669784546,
|
|
"logits/rejected": -0.6513772010803223,
|
|
"logps/chosen": -282.0886535644531,
|
|
"logps/ref_chosen": -282.1807556152344,
|
|
"logps/ref_rejected": -265.0758056640625,
|
|
"logps/rejected": -265.0830993652344,
|
|
"loss": 5.5413,
|
|
"margin_dpo/margin_mean": 0.09941692650318146,
|
|
"margin_dpo/margin_std": 0.7664570808410645,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.050261780104712044,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.24128368496894836,
|
|
"fcm_dpo/q_t": 0.49939680099487305,
|
|
"grad_norm": 29.68800163269043,
|
|
"learning_rate": 2.3958333333333335e-07,
|
|
"logits/chosen": -0.6591615676879883,
|
|
"logits/rejected": -0.5714296102523804,
|
|
"logps/chosen": -300.9408874511719,
|
|
"logps/ref_chosen": -301.17962646484375,
|
|
"logps/ref_rejected": -302.12786865234375,
|
|
"logps/rejected": -302.1304016113281,
|
|
"loss": 5.5356,
|
|
"margin_dpo/margin_mean": 0.24128423631191254,
|
|
"margin_dpo/margin_std": 0.7133185267448425,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.05235602094240838,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.16697196662425995,
|
|
"fcm_dpo/q_t": 0.49958258867263794,
|
|
"grad_norm": 26.460615158081055,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": -0.6041996479034424,
|
|
"logits/rejected": -0.6127534508705139,
|
|
"logps/chosen": -246.56582641601562,
|
|
"logps/ref_chosen": -246.74649047851562,
|
|
"logps/ref_rejected": -235.55638122558594,
|
|
"logps/rejected": -235.54269409179688,
|
|
"loss": 5.5386,
|
|
"margin_dpo/margin_mean": 0.16697131097316742,
|
|
"margin_dpo/margin_std": 0.7036500573158264,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.05445026178010471,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08371715247631073,
|
|
"fcm_dpo/q_t": 0.499790757894516,
|
|
"grad_norm": 28.732345581054688,
|
|
"learning_rate": 2.604166666666667e-07,
|
|
"logits/chosen": -0.6599952578544617,
|
|
"logits/rejected": -0.6750520467758179,
|
|
"logps/chosen": -281.9931335449219,
|
|
"logps/ref_chosen": -282.1955871582031,
|
|
"logps/ref_rejected": -235.3135528564453,
|
|
"logps/rejected": -235.19482421875,
|
|
"loss": 5.5419,
|
|
"margin_dpo/margin_mean": 0.08371736109256744,
|
|
"margin_dpo/margin_std": 0.8501687049865723,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.05654450261780105,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1340600550174713,
|
|
"fcm_dpo/q_t": 0.4996648132801056,
|
|
"grad_norm": 27.74333953857422,
|
|
"learning_rate": 2.708333333333333e-07,
|
|
"logits/chosen": -0.6518189907073975,
|
|
"logits/rejected": -0.6709730625152588,
|
|
"logps/chosen": -323.5457763671875,
|
|
"logps/ref_chosen": -323.8563537597656,
|
|
"logps/ref_rejected": -245.968017578125,
|
|
"logps/rejected": -245.7915496826172,
|
|
"loss": 5.5399,
|
|
"margin_dpo/margin_mean": 0.13406014442443848,
|
|
"margin_dpo/margin_std": 0.925900936126709,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.05863874345549738,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1435232013463974,
|
|
"fcm_dpo/q_t": 0.49964118003845215,
|
|
"grad_norm": 26.3175106048584,
|
|
"learning_rate": 2.8125e-07,
|
|
"logits/chosen": -0.622589111328125,
|
|
"logits/rejected": -0.6318536400794983,
|
|
"logps/chosen": -247.97296142578125,
|
|
"logps/ref_chosen": -248.24673461914062,
|
|
"logps/ref_rejected": -240.0382080078125,
|
|
"logps/rejected": -239.90797424316406,
|
|
"loss": 5.5395,
|
|
"margin_dpo/margin_mean": 0.1435234248638153,
|
|
"margin_dpo/margin_std": 0.7288922071456909,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.060732984293193716,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.14979667961597443,
|
|
"fcm_dpo/q_t": 0.4996255040168762,
|
|
"grad_norm": 29.79783821105957,
|
|
"learning_rate": 2.916666666666667e-07,
|
|
"logits/chosen": -0.599511444568634,
|
|
"logits/rejected": -0.6228891015052795,
|
|
"logps/chosen": -317.9765319824219,
|
|
"logps/ref_chosen": -318.2564392089844,
|
|
"logps/ref_rejected": -286.75848388671875,
|
|
"logps/rejected": -286.62841796875,
|
|
"loss": 5.5393,
|
|
"margin_dpo/margin_mean": 0.1497972011566162,
|
|
"margin_dpo/margin_std": 0.8029959201812744,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.06282722513089005,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2501027584075928,
|
|
"fcm_dpo/q_t": 0.49937474727630615,
|
|
"grad_norm": 28.9178524017334,
|
|
"learning_rate": 3.020833333333333e-07,
|
|
"logits/chosen": -0.5949351191520691,
|
|
"logits/rejected": -0.6119610071182251,
|
|
"logps/chosen": -252.69354248046875,
|
|
"logps/ref_chosen": -253.0491485595703,
|
|
"logps/ref_rejected": -261.30029296875,
|
|
"logps/rejected": -261.19482421875,
|
|
"loss": 5.5353,
|
|
"margin_dpo/margin_mean": 0.25010228157043457,
|
|
"margin_dpo/margin_std": 0.975698709487915,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.06492146596858639,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2504439949989319,
|
|
"fcm_dpo/q_t": 0.4993739426136017,
|
|
"grad_norm": 25.57700538635254,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"logits/chosen": -0.655745804309845,
|
|
"logits/rejected": -0.690646767616272,
|
|
"logps/chosen": -247.7589569091797,
|
|
"logps/ref_chosen": -248.15301513671875,
|
|
"logps/ref_rejected": -203.17703247070312,
|
|
"logps/rejected": -203.03338623046875,
|
|
"loss": 5.5353,
|
|
"margin_dpo/margin_mean": 0.25044363737106323,
|
|
"margin_dpo/margin_std": 1.0158027410507202,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.06701570680628273,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.33064448833465576,
|
|
"fcm_dpo/q_t": 0.4991734027862549,
|
|
"grad_norm": 29.667165756225586,
|
|
"learning_rate": 3.2291666666666666e-07,
|
|
"logits/chosen": -0.6167346835136414,
|
|
"logits/rejected": -0.622105062007904,
|
|
"logps/chosen": -304.966796875,
|
|
"logps/ref_chosen": -305.5399475097656,
|
|
"logps/ref_rejected": -267.6527099609375,
|
|
"logps/rejected": -267.4101867675781,
|
|
"loss": 5.5321,
|
|
"margin_dpo/margin_mean": 0.3306446075439453,
|
|
"margin_dpo/margin_std": 0.9724135994911194,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.06910994764397906,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.39138373732566833,
|
|
"fcm_dpo/q_t": 0.49902158975601196,
|
|
"grad_norm": 28.317340850830078,
|
|
"learning_rate": 3.333333333333333e-07,
|
|
"logits/chosen": -0.6441166400909424,
|
|
"logits/rejected": -0.6569768190383911,
|
|
"logps/chosen": -285.66668701171875,
|
|
"logps/ref_chosen": -286.2335205078125,
|
|
"logps/ref_rejected": -255.38748168945312,
|
|
"logps/rejected": -255.2120361328125,
|
|
"loss": 5.5297,
|
|
"margin_dpo/margin_mean": 0.39138340950012207,
|
|
"margin_dpo/margin_std": 1.0941178798675537,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.0712041884816754,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8573173880577087,
|
|
"fcm_dpo/q_t": 0.4978567957878113,
|
|
"grad_norm": 31.08516502380371,
|
|
"learning_rate": 3.4375e-07,
|
|
"logits/chosen": -0.619086503982544,
|
|
"logits/rejected": -0.6280518770217896,
|
|
"logps/chosen": -340.7860107421875,
|
|
"logps/ref_chosen": -341.5920104980469,
|
|
"logps/ref_rejected": -278.8866882324219,
|
|
"logps/rejected": -278.93798828125,
|
|
"loss": 5.5111,
|
|
"margin_dpo/margin_mean": 0.8573174476623535,
|
|
"margin_dpo/margin_std": 1.1765947341918945,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.07329842931937172,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3640906810760498,
|
|
"fcm_dpo/q_t": 0.499089777469635,
|
|
"grad_norm": 26.597396850585938,
|
|
"learning_rate": 3.541666666666667e-07,
|
|
"logits/chosen": -0.6332607269287109,
|
|
"logits/rejected": -0.653661847114563,
|
|
"logps/chosen": -264.4308166503906,
|
|
"logps/ref_chosen": -265.0795593261719,
|
|
"logps/ref_rejected": -264.4876708984375,
|
|
"logps/rejected": -264.20306396484375,
|
|
"loss": 5.5308,
|
|
"margin_dpo/margin_mean": 0.3640906810760498,
|
|
"margin_dpo/margin_std": 1.240203619003296,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.07539267015706806,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6718266606330872,
|
|
"fcm_dpo/q_t": 0.4983205497264862,
|
|
"grad_norm": 31.872516632080078,
|
|
"learning_rate": 3.645833333333333e-07,
|
|
"logits/chosen": -0.6042373180389404,
|
|
"logits/rejected": -0.6219602823257446,
|
|
"logps/chosen": -296.499755859375,
|
|
"logps/ref_chosen": -297.3261413574219,
|
|
"logps/ref_rejected": -282.09515380859375,
|
|
"logps/rejected": -281.94061279296875,
|
|
"loss": 5.5186,
|
|
"margin_dpo/margin_mean": 0.6718263030052185,
|
|
"margin_dpo/margin_std": 1.3965107202529907,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.0774869109947644,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5291406512260437,
|
|
"fcm_dpo/q_t": 0.49867722392082214,
|
|
"grad_norm": 30.831968307495117,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -0.6052833795547485,
|
|
"logits/rejected": -0.6201093196868896,
|
|
"logps/chosen": -313.28765869140625,
|
|
"logps/ref_chosen": -314.0340270996094,
|
|
"logps/ref_rejected": -299.3437805175781,
|
|
"logps/rejected": -299.1265563964844,
|
|
"loss": 5.5243,
|
|
"margin_dpo/margin_mean": 0.5291397571563721,
|
|
"margin_dpo/margin_std": 1.5934827327728271,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.07958115183246073,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6104308366775513,
|
|
"fcm_dpo/q_t": 0.49847403168678284,
|
|
"grad_norm": 28.433164596557617,
|
|
"learning_rate": 3.8541666666666665e-07,
|
|
"logits/chosen": -0.640455424785614,
|
|
"logits/rejected": -0.6528275012969971,
|
|
"logps/chosen": -281.47015380859375,
|
|
"logps/ref_chosen": -282.54119873046875,
|
|
"logps/ref_rejected": -269.7773132324219,
|
|
"logps/rejected": -269.3166809082031,
|
|
"loss": 5.5211,
|
|
"margin_dpo/margin_mean": 0.610431969165802,
|
|
"margin_dpo/margin_std": 1.5897610187530518,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.08167539267015707,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.2843832969665527,
|
|
"fcm_dpo/q_t": 0.49678951501846313,
|
|
"grad_norm": 29.44791603088379,
|
|
"learning_rate": 3.958333333333333e-07,
|
|
"logits/chosen": -0.6190811991691589,
|
|
"logits/rejected": -0.6332811713218689,
|
|
"logps/chosen": -275.41473388671875,
|
|
"logps/ref_chosen": -276.7729187011719,
|
|
"logps/ref_rejected": -249.95889282226562,
|
|
"logps/rejected": -249.88507080078125,
|
|
"loss": 5.4944,
|
|
"margin_dpo/margin_mean": 1.2843828201293945,
|
|
"margin_dpo/margin_std": 1.8405652046203613,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.08376963350785341,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8125598430633545,
|
|
"fcm_dpo/q_t": 0.49796897172927856,
|
|
"grad_norm": 27.380224227905273,
|
|
"learning_rate": 4.0625e-07,
|
|
"logits/chosen": -0.6235227584838867,
|
|
"logits/rejected": -0.6593804359436035,
|
|
"logps/chosen": -283.1932678222656,
|
|
"logps/ref_chosen": -284.30706787109375,
|
|
"logps/ref_rejected": -244.4459991455078,
|
|
"logps/rejected": -244.14476013183594,
|
|
"loss": 5.5131,
|
|
"margin_dpo/margin_mean": 0.8125599026679993,
|
|
"margin_dpo/margin_std": 1.823110580444336,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.08586387434554973,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8139923810958862,
|
|
"fcm_dpo/q_t": 0.4979651868343353,
|
|
"grad_norm": 30.188688278198242,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"logits/chosen": -0.6231560707092285,
|
|
"logits/rejected": -0.6478135585784912,
|
|
"logps/chosen": -292.701171875,
|
|
"logps/ref_chosen": -293.8151550292969,
|
|
"logps/ref_rejected": -252.16815185546875,
|
|
"logps/rejected": -251.86814880371094,
|
|
"loss": 5.513,
|
|
"margin_dpo/margin_mean": 0.8139930963516235,
|
|
"margin_dpo/margin_std": 1.6932668685913086,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.08795811518324607,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8815785050392151,
|
|
"fcm_dpo/q_t": 0.49779632687568665,
|
|
"grad_norm": 27.49101448059082,
|
|
"learning_rate": 4.270833333333333e-07,
|
|
"logits/chosen": -0.6369996666908264,
|
|
"logits/rejected": -0.6549193859100342,
|
|
"logps/chosen": -251.56045532226562,
|
|
"logps/ref_chosen": -252.76023864746094,
|
|
"logps/ref_rejected": -261.0414733886719,
|
|
"logps/rejected": -260.7232666015625,
|
|
"loss": 5.5106,
|
|
"margin_dpo/margin_mean": 0.881578803062439,
|
|
"margin_dpo/margin_std": 2.2027502059936523,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.09005235602094241,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.269668459892273,
|
|
"fcm_dpo/q_t": 0.4968262314796448,
|
|
"grad_norm": 29.89678955078125,
|
|
"learning_rate": 4.375e-07,
|
|
"logits/chosen": -0.5942052602767944,
|
|
"logits/rejected": -0.6096649169921875,
|
|
"logps/chosen": -315.5320129394531,
|
|
"logps/ref_chosen": -316.8347473144531,
|
|
"logps/ref_rejected": -273.7649230957031,
|
|
"logps/rejected": -273.73187255859375,
|
|
"loss": 5.4951,
|
|
"margin_dpo/margin_mean": 1.2696670293807983,
|
|
"margin_dpo/margin_std": 2.1477150917053223,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.09214659685863874,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.6501388549804688,
|
|
"fcm_dpo/q_t": 0.49587562680244446,
|
|
"grad_norm": 30.845321655273438,
|
|
"learning_rate": 4.479166666666667e-07,
|
|
"logits/chosen": -0.5972121953964233,
|
|
"logits/rejected": -0.5959709882736206,
|
|
"logps/chosen": -285.3184509277344,
|
|
"logps/ref_chosen": -286.8757019042969,
|
|
"logps/ref_rejected": -282.4681396484375,
|
|
"logps/rejected": -282.5610656738281,
|
|
"loss": 5.4804,
|
|
"margin_dpo/margin_mean": 1.6501388549804688,
|
|
"margin_dpo/margin_std": 2.9746947288513184,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.09424083769633508,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1927553415298462,
|
|
"fcm_dpo/q_t": 0.4970191717147827,
|
|
"grad_norm": 28.909330368041992,
|
|
"learning_rate": 4.5833333333333327e-07,
|
|
"logits/chosen": -0.6916259527206421,
|
|
"logits/rejected": -0.715716540813446,
|
|
"logps/chosen": -322.6328125,
|
|
"logps/ref_chosen": -324.2633972167969,
|
|
"logps/ref_rejected": -293.09466552734375,
|
|
"logps/rejected": -292.6568298339844,
|
|
"loss": 5.4985,
|
|
"margin_dpo/margin_mean": 1.192754864692688,
|
|
"margin_dpo/margin_std": 2.8390185832977295,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.09633507853403141,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5148533582687378,
|
|
"fcm_dpo/q_t": 0.4962137043476105,
|
|
"grad_norm": 30.279727935791016,
|
|
"learning_rate": 4.6874999999999996e-07,
|
|
"logits/chosen": -0.6287131309509277,
|
|
"logits/rejected": -0.6423863768577576,
|
|
"logps/chosen": -296.6163635253906,
|
|
"logps/ref_chosen": -298.3357238769531,
|
|
"logps/ref_rejected": -267.66204833984375,
|
|
"logps/rejected": -267.45751953125,
|
|
"loss": 5.4855,
|
|
"margin_dpo/margin_mean": 1.5148537158966064,
|
|
"margin_dpo/margin_std": 2.574115753173828,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.09842931937172775,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9817731976509094,
|
|
"fcm_dpo/q_t": 0.49754610657691956,
|
|
"grad_norm": 26.394506454467773,
|
|
"learning_rate": 4.791666666666667e-07,
|
|
"logits/chosen": -0.590155303478241,
|
|
"logits/rejected": -0.6099727153778076,
|
|
"logps/chosen": -261.077392578125,
|
|
"logps/ref_chosen": -262.5669250488281,
|
|
"logps/ref_rejected": -258.70989990234375,
|
|
"logps/rejected": -258.20208740234375,
|
|
"loss": 5.5072,
|
|
"margin_dpo/margin_mean": 0.9817725419998169,
|
|
"margin_dpo/margin_std": 3.2907633781433105,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.10052356020942409,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5850274562835693,
|
|
"fcm_dpo/q_t": 0.4960397481918335,
|
|
"grad_norm": 27.51393699645996,
|
|
"learning_rate": 4.895833333333333e-07,
|
|
"logits/chosen": -0.625287652015686,
|
|
"logits/rejected": -0.6512780785560608,
|
|
"logps/chosen": -267.6490173339844,
|
|
"logps/ref_chosen": -269.4932556152344,
|
|
"logps/ref_rejected": -241.888916015625,
|
|
"logps/rejected": -241.6297149658203,
|
|
"loss": 5.4831,
|
|
"margin_dpo/margin_mean": 1.5850276947021484,
|
|
"margin_dpo/margin_std": 3.051654100418091,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.10261780104712041,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.7378376722335815,
|
|
"fcm_dpo/q_t": 0.4956568479537964,
|
|
"grad_norm": 27.689477920532227,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.6677048802375793,
|
|
"logits/rejected": -0.6521282196044922,
|
|
"logps/chosen": -255.6532745361328,
|
|
"logps/ref_chosen": -257.8844909667969,
|
|
"logps/ref_rejected": -256.8912048339844,
|
|
"logps/rejected": -256.3978271484375,
|
|
"loss": 5.4772,
|
|
"margin_dpo/margin_mean": 1.7378380298614502,
|
|
"margin_dpo/margin_std": 3.387692928314209,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1869100332260132,
|
|
"fcm_dpo/q_t": 0.49703431129455566,
|
|
"grad_norm": 27.907745361328125,
|
|
"learning_rate": 4.999932966293553e-07,
|
|
"logits/chosen": -0.6318182945251465,
|
|
"logits/rejected": -0.6537318229675293,
|
|
"logps/chosen": -299.6126403808594,
|
|
"logps/ref_chosen": -301.62884521484375,
|
|
"logps/ref_rejected": -298.2716064453125,
|
|
"logps/rejected": -297.4422912597656,
|
|
"loss": 5.4995,
|
|
"margin_dpo/margin_mean": 1.1869091987609863,
|
|
"margin_dpo/margin_std": 3.856821298599243,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.1068062827225131,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.0278406143188477,
|
|
"fcm_dpo/q_t": 0.49493393301963806,
|
|
"grad_norm": 29.1589298248291,
|
|
"learning_rate": 4.999731868769026e-07,
|
|
"logits/chosen": -0.60748690366745,
|
|
"logits/rejected": -0.6017611026763916,
|
|
"logps/chosen": -267.2236022949219,
|
|
"logps/ref_chosen": -269.37237548828125,
|
|
"logps/ref_rejected": -297.0167541503906,
|
|
"logps/rejected": -296.89581298828125,
|
|
"loss": 5.4671,
|
|
"margin_dpo/margin_mean": 2.0278408527374268,
|
|
"margin_dpo/margin_std": 4.648531436920166,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.10890052356020942,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.078564167022705,
|
|
"fcm_dpo/q_t": 0.4923100769519806,
|
|
"grad_norm": 30.302101135253906,
|
|
"learning_rate": 4.99939671821067e-07,
|
|
"logits/chosen": -0.6508050560951233,
|
|
"logits/rejected": -0.6571372151374817,
|
|
"logps/chosen": -304.177978515625,
|
|
"logps/ref_chosen": -306.9028015136719,
|
|
"logps/ref_rejected": -281.24737548828125,
|
|
"logps/rejected": -281.6011047363281,
|
|
"loss": 5.4254,
|
|
"margin_dpo/margin_mean": 3.078564167022705,
|
|
"margin_dpo/margin_std": 4.690369606018066,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.11099476439790576,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.22479248046875,
|
|
"fcm_dpo/q_t": 0.4944427013397217,
|
|
"grad_norm": 31.287256240844727,
|
|
"learning_rate": 4.998927532591591e-07,
|
|
"logits/chosen": -0.6520200371742249,
|
|
"logits/rejected": -0.6926702260971069,
|
|
"logps/chosen": -283.11590576171875,
|
|
"logps/ref_chosen": -285.9759521484375,
|
|
"logps/ref_rejected": -273.9073486328125,
|
|
"logps/rejected": -273.2720947265625,
|
|
"loss": 5.4597,
|
|
"margin_dpo/margin_mean": 2.22479248046875,
|
|
"margin_dpo/margin_std": 5.078397274017334,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.1130890052356021,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.866455316543579,
|
|
"fcm_dpo/q_t": 0.49533912539482117,
|
|
"grad_norm": 26.340913772583008,
|
|
"learning_rate": 4.998324337072792e-07,
|
|
"logits/chosen": -0.68650883436203,
|
|
"logits/rejected": -0.6943265795707703,
|
|
"logps/chosen": -303.7992858886719,
|
|
"logps/ref_chosen": -306.504638671875,
|
|
"logps/ref_rejected": -272.67431640625,
|
|
"logps/rejected": -271.8354187011719,
|
|
"loss": 5.4741,
|
|
"margin_dpo/margin_mean": 1.866454839706421,
|
|
"margin_dpo/margin_std": 5.443723678588867,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.11518324607329843,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.2475740909576416,
|
|
"fcm_dpo/q_t": 0.494386225938797,
|
|
"grad_norm": 24.877641677856445,
|
|
"learning_rate": 4.997587164001815e-07,
|
|
"logits/chosen": -0.6414747834205627,
|
|
"logits/rejected": -0.6457206606864929,
|
|
"logps/chosen": -220.563720703125,
|
|
"logps/ref_chosen": -222.33013916015625,
|
|
"logps/ref_rejected": -206.59571838378906,
|
|
"logps/rejected": -207.07687377929688,
|
|
"loss": 5.4583,
|
|
"margin_dpo/margin_mean": 2.2475738525390625,
|
|
"margin_dpo/margin_std": 4.910269737243652,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.11727748691099477,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.1674323081970215,
|
|
"fcm_dpo/q_t": 0.49209311604499817,
|
|
"grad_norm": 27.445068359375,
|
|
"learning_rate": 4.996716052911017e-07,
|
|
"logits/chosen": -0.6101264357566833,
|
|
"logits/rejected": -0.6247260570526123,
|
|
"logps/chosen": -247.60667419433594,
|
|
"logps/ref_chosen": -250.47816467285156,
|
|
"logps/ref_rejected": -228.25848388671875,
|
|
"logps/rejected": -228.55442810058594,
|
|
"loss": 5.4237,
|
|
"margin_dpo/margin_mean": 3.167431354522705,
|
|
"margin_dpo/margin_std": 5.867389678955078,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.1193717277486911,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.167306423187256,
|
|
"fcm_dpo/q_t": 0.48959389328956604,
|
|
"grad_norm": 30.918928146362305,
|
|
"learning_rate": 4.99571105051544e-07,
|
|
"logits/chosen": -0.7010935544967651,
|
|
"logits/rejected": -0.6720656156539917,
|
|
"logps/chosen": -311.27130126953125,
|
|
"logps/ref_chosen": -315.1195373535156,
|
|
"logps/ref_rejected": -272.755615234375,
|
|
"logps/rejected": -273.0746765136719,
|
|
"loss": 5.3836,
|
|
"margin_dpo/margin_mean": 4.167305946350098,
|
|
"margin_dpo/margin_std": 5.622750759124756,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.12146596858638743,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.323967933654785,
|
|
"fcm_dpo/q_t": 0.49419358372688293,
|
|
"grad_norm": 27.555803298950195,
|
|
"learning_rate": 4.994572210710314e-07,
|
|
"logits/chosen": -0.6160457730293274,
|
|
"logits/rejected": -0.6402078866958618,
|
|
"logps/chosen": -262.7194519042969,
|
|
"logps/ref_chosen": -265.1816711425781,
|
|
"logps/ref_rejected": -268.2203369140625,
|
|
"logps/rejected": -268.0820617675781,
|
|
"loss": 5.4563,
|
|
"margin_dpo/margin_mean": 2.323967933654785,
|
|
"margin_dpo/margin_std": 5.637367248535156,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.12356020942408377,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5584101676940918,
|
|
"fcm_dpo/q_t": 0.4961104989051819,
|
|
"grad_norm": 29.654539108276367,
|
|
"learning_rate": 4.993299594568162e-07,
|
|
"logits/chosen": -0.5985250473022461,
|
|
"logits/rejected": -0.5907694697380066,
|
|
"logps/chosen": -284.25274658203125,
|
|
"logps/ref_chosen": -286.35394287109375,
|
|
"logps/ref_rejected": -260.6757507324219,
|
|
"logps/rejected": -260.1329345703125,
|
|
"loss": 5.4889,
|
|
"margin_dpo/margin_mean": 1.5584099292755127,
|
|
"margin_dpo/margin_std": 7.0632781982421875,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.1256544502617801,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.4080166816711426,
|
|
"fcm_dpo/q_t": 0.4914897680282593,
|
|
"grad_norm": 28.067386627197266,
|
|
"learning_rate": 4.991893270335525e-07,
|
|
"logits/chosen": -0.6754345297813416,
|
|
"logits/rejected": -0.699802577495575,
|
|
"logps/chosen": -255.8909912109375,
|
|
"logps/ref_chosen": -258.74859619140625,
|
|
"logps/ref_rejected": -255.04893493652344,
|
|
"logps/rejected": -255.59933471679688,
|
|
"loss": 5.4155,
|
|
"margin_dpo/margin_mean": 3.4080190658569336,
|
|
"margin_dpo/margin_std": 7.094330787658691,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.12774869109947645,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.202296257019043,
|
|
"fcm_dpo/q_t": 0.49200791120529175,
|
|
"grad_norm": 29.841184616088867,
|
|
"learning_rate": 4.990353313429303e-07,
|
|
"logits/chosen": -0.6437735557556152,
|
|
"logits/rejected": -0.6602544784545898,
|
|
"logps/chosen": -275.47747802734375,
|
|
"logps/ref_chosen": -278.4678955078125,
|
|
"logps/ref_rejected": -252.02720642089844,
|
|
"logps/rejected": -252.23904418945312,
|
|
"loss": 5.4247,
|
|
"margin_dpo/margin_mean": 3.202296257019043,
|
|
"margin_dpo/margin_std": 7.591219425201416,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.12984293193717278,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.3970751762390137,
|
|
"fcm_dpo/q_t": 0.4915270209312439,
|
|
"grad_norm": 26.655916213989258,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": -0.6097227334976196,
|
|
"logits/rejected": -0.6514406800270081,
|
|
"logps/chosen": -268.9474182128906,
|
|
"logps/ref_chosen": -272.92431640625,
|
|
"logps/ref_rejected": -260.7935485839844,
|
|
"logps/rejected": -260.2137145996094,
|
|
"loss": 5.4163,
|
|
"margin_dpo/margin_mean": 3.39707612991333,
|
|
"margin_dpo/margin_std": 7.437541961669922,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.1319371727748691,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.588202476501465,
|
|
"fcm_dpo/q_t": 0.4910445213317871,
|
|
"grad_norm": 28.286346435546875,
|
|
"learning_rate": 4.986872839090852e-07,
|
|
"logits/chosen": -0.6595807671546936,
|
|
"logits/rejected": -0.6659517288208008,
|
|
"logps/chosen": -273.69244384765625,
|
|
"logps/ref_chosen": -277.0889892578125,
|
|
"logps/ref_rejected": -273.3413391113281,
|
|
"logps/rejected": -273.532958984375,
|
|
"loss": 5.4086,
|
|
"margin_dpo/margin_mean": 3.588200807571411,
|
|
"margin_dpo/margin_std": 7.262460708618164,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.13403141361256546,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.24724006652832,
|
|
"fcm_dpo/q_t": 0.48940467834472656,
|
|
"grad_norm": 28.308141708374023,
|
|
"learning_rate": 4.9849325083059e-07,
|
|
"logits/chosen": -0.628346860408783,
|
|
"logits/rejected": -0.6231892704963684,
|
|
"logps/chosen": -279.7200927734375,
|
|
"logps/ref_chosen": -283.8244934082031,
|
|
"logps/ref_rejected": -263.29351806640625,
|
|
"logps/rejected": -263.4363708496094,
|
|
"loss": 5.3847,
|
|
"margin_dpo/margin_mean": 4.24724006652832,
|
|
"margin_dpo/margin_std": 8.381464958190918,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.13612565445026178,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.2838730812072754,
|
|
"fcm_dpo/q_t": 0.49180322885513306,
|
|
"grad_norm": 27.914520263671875,
|
|
"learning_rate": 4.982858918131906e-07,
|
|
"logits/chosen": -0.6988512277603149,
|
|
"logits/rejected": -0.6669014692306519,
|
|
"logps/chosen": -261.4900817871094,
|
|
"logps/ref_chosen": -264.8699645996094,
|
|
"logps/ref_rejected": -268.5076904296875,
|
|
"logps/rejected": -268.4117431640625,
|
|
"loss": 5.4206,
|
|
"margin_dpo/margin_mean": 3.2838728427886963,
|
|
"margin_dpo/margin_std": 7.35109281539917,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.1382198952879581,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.0454559326171875,
|
|
"fcm_dpo/q_t": 0.48991909623146057,
|
|
"grad_norm": 27.6617488861084,
|
|
"learning_rate": 4.980652179769217e-07,
|
|
"logits/chosen": -0.6764880418777466,
|
|
"logits/rejected": -0.696363091468811,
|
|
"logps/chosen": -269.8972473144531,
|
|
"logps/ref_chosen": -272.9283142089844,
|
|
"logps/ref_rejected": -280.94696044921875,
|
|
"logps/rejected": -281.96136474609375,
|
|
"loss": 5.3953,
|
|
"margin_dpo/margin_mean": 4.045454978942871,
|
|
"margin_dpo/margin_std": 9.999269485473633,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.14031413612565444,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.675020694732666,
|
|
"fcm_dpo/q_t": 0.4908318519592285,
|
|
"grad_norm": 25.536836624145508,
|
|
"learning_rate": 4.978312411558517e-07,
|
|
"logits/chosen": -0.6852215528488159,
|
|
"logits/rejected": -0.7170518636703491,
|
|
"logps/chosen": -262.1640319824219,
|
|
"logps/ref_chosen": -266.18695068359375,
|
|
"logps/ref_rejected": -250.17405700683594,
|
|
"logps/rejected": -249.82615661621094,
|
|
"loss": 5.4077,
|
|
"margin_dpo/margin_mean": 3.6750199794769287,
|
|
"margin_dpo/margin_std": 8.747812271118164,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.1424083769633508,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.583087921142578,
|
|
"fcm_dpo/q_t": 0.48859941959381104,
|
|
"grad_norm": 28.211336135864258,
|
|
"learning_rate": 4.975839738974473e-07,
|
|
"logits/chosen": -0.6900507211685181,
|
|
"logits/rejected": -0.7039142847061157,
|
|
"logps/chosen": -294.9899597167969,
|
|
"logps/ref_chosen": -297.9385986328125,
|
|
"logps/ref_rejected": -261.5141296386719,
|
|
"logps/rejected": -263.14862060546875,
|
|
"loss": 5.3759,
|
|
"margin_dpo/margin_mean": 4.583088397979736,
|
|
"margin_dpo/margin_std": 10.535322189331055,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.14450261780104712,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.044898986816406,
|
|
"fcm_dpo/q_t": 0.48493677377700806,
|
|
"grad_norm": 28.641454696655273,
|
|
"learning_rate": 4.97323429461901e-07,
|
|
"logits/chosen": -0.6796502470970154,
|
|
"logits/rejected": -0.7097989320755005,
|
|
"logps/chosen": -261.7384338378906,
|
|
"logps/ref_chosen": -265.6175231933594,
|
|
"logps/ref_rejected": -236.8287353515625,
|
|
"logps/rejected": -238.99456787109375,
|
|
"loss": 5.3173,
|
|
"margin_dpo/margin_mean": 6.044898509979248,
|
|
"margin_dpo/margin_std": 9.810757637023926,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.14659685863874344,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 5.822492599487305,
|
|
"fcm_dpo/q_t": 0.4854995310306549,
|
|
"grad_norm": 28.677330017089844,
|
|
"learning_rate": 4.970496218214204e-07,
|
|
"logits/chosen": -0.6750044822692871,
|
|
"logits/rejected": -0.7083183526992798,
|
|
"logps/chosen": -291.96441650390625,
|
|
"logps/ref_chosen": -296.2259216308594,
|
|
"logps/ref_rejected": -254.68496704101562,
|
|
"logps/rejected": -256.2459411621094,
|
|
"loss": 5.3286,
|
|
"margin_dpo/margin_mean": 5.822491645812988,
|
|
"margin_dpo/margin_std": 10.934935569763184,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1486910994764398,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 5.134796142578125,
|
|
"fcm_dpo/q_t": 0.4872013032436371,
|
|
"grad_norm": 28.424619674682617,
|
|
"learning_rate": 4.967625656594781e-07,
|
|
"logits/chosen": -0.6523040533065796,
|
|
"logits/rejected": -0.6413918733596802,
|
|
"logps/chosen": -283.8147277832031,
|
|
"logps/ref_chosen": -288.92724609375,
|
|
"logps/ref_rejected": -278.6405334472656,
|
|
"logps/rejected": -278.662841796875,
|
|
"loss": 5.3592,
|
|
"margin_dpo/margin_mean": 5.134795188903809,
|
|
"margin_dpo/margin_std": 12.370285034179688,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.15078534031413612,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 5.098928928375244,
|
|
"fcm_dpo/q_t": 0.4872835576534271,
|
|
"grad_norm": 28.138423919677734,
|
|
"learning_rate": 4.964622763700252e-07,
|
|
"logits/chosen": -0.6939007639884949,
|
|
"logits/rejected": -0.705129861831665,
|
|
"logps/chosen": -233.71646118164062,
|
|
"logps/ref_chosen": -237.0452880859375,
|
|
"logps/ref_rejected": -252.7946319580078,
|
|
"logps/rejected": -254.56471252441406,
|
|
"loss": 5.3564,
|
|
"margin_dpo/margin_mean": 5.098929405212402,
|
|
"margin_dpo/margin_std": 10.444880485534668,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.15287958115183245,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.748826503753662,
|
|
"fcm_dpo/q_t": 0.48815372586250305,
|
|
"grad_norm": 27.864391326904297,
|
|
"learning_rate": 4.961487700566646e-07,
|
|
"logits/chosen": -0.659065306186676,
|
|
"logits/rejected": -0.6768229603767395,
|
|
"logps/chosen": -268.7459411621094,
|
|
"logps/ref_chosen": -273.0531005859375,
|
|
"logps/ref_rejected": -246.8330841064453,
|
|
"logps/rejected": -247.2747802734375,
|
|
"loss": 5.3737,
|
|
"margin_dpo/margin_mean": 4.748826503753662,
|
|
"margin_dpo/margin_std": 12.207172393798828,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.1549738219895288,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.507737159729004,
|
|
"fcm_dpo/q_t": 0.48877474665641785,
|
|
"grad_norm": 30.305334091186523,
|
|
"learning_rate": 4.958220635317885e-07,
|
|
"logits/chosen": -0.7256600260734558,
|
|
"logits/rejected": -0.7039333581924438,
|
|
"logps/chosen": -338.9497985839844,
|
|
"logps/ref_chosen": -342.2818908691406,
|
|
"logps/ref_rejected": -330.0293884277344,
|
|
"logps/rejected": -331.2049865722656,
|
|
"loss": 5.3817,
|
|
"margin_dpo/margin_mean": 4.5077362060546875,
|
|
"margin_dpo/margin_std": 11.664762496948242,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.15706806282722513,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.5859527587890625,
|
|
"fcm_dpo/q_t": 0.48358994722366333,
|
|
"grad_norm": 29.470287322998047,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": -0.6431756615638733,
|
|
"logits/rejected": -0.6447348594665527,
|
|
"logps/chosen": -262.26544189453125,
|
|
"logps/ref_chosen": -266.8641662597656,
|
|
"logps/ref_rejected": -276.8699951171875,
|
|
"logps/rejected": -278.8572692871094,
|
|
"loss": 5.2993,
|
|
"margin_dpo/margin_mean": 6.585953712463379,
|
|
"margin_dpo/margin_std": 10.910937309265137,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.15916230366492146,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 7.311924457550049,
|
|
"fcm_dpo/q_t": 0.4818291962146759,
|
|
"grad_norm": 29.281173706054688,
|
|
"learning_rate": 4.951291206355559e-07,
|
|
"logits/chosen": -0.7205427289009094,
|
|
"logits/rejected": -0.7283482551574707,
|
|
"logps/chosen": -277.0059814453125,
|
|
"logps/ref_chosen": -281.174560546875,
|
|
"logps/ref_rejected": -263.6067199707031,
|
|
"logps/rejected": -266.7500305175781,
|
|
"loss": 5.2755,
|
|
"margin_dpo/margin_mean": 7.311923980712891,
|
|
"margin_dpo/margin_std": 12.705620765686035,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1612565445026178,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 5.805636405944824,
|
|
"fcm_dpo/q_t": 0.4855879247188568,
|
|
"grad_norm": 33.04362106323242,
|
|
"learning_rate": 4.947629214246236e-07,
|
|
"logits/chosen": -0.5541229248046875,
|
|
"logits/rejected": -0.5619992017745972,
|
|
"logps/chosen": -302.3905944824219,
|
|
"logps/ref_chosen": -306.09527587890625,
|
|
"logps/ref_rejected": -253.49569702148438,
|
|
"logps/rejected": -255.5966339111328,
|
|
"loss": 5.3378,
|
|
"margin_dpo/margin_mean": 5.805635452270508,
|
|
"margin_dpo/margin_std": 14.091662406921387,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.16335078534031414,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.697511672973633,
|
|
"fcm_dpo/q_t": 0.47841718792915344,
|
|
"grad_norm": 29.72622299194336,
|
|
"learning_rate": 4.943835963210323e-07,
|
|
"logits/chosen": -0.6819251179695129,
|
|
"logits/rejected": -0.6768004298210144,
|
|
"logps/chosen": -253.04547119140625,
|
|
"logps/ref_chosen": -256.90234375,
|
|
"logps/ref_rejected": -211.57154846191406,
|
|
"logps/rejected": -216.41221618652344,
|
|
"loss": 5.2264,
|
|
"margin_dpo/margin_mean": 8.697509765625,
|
|
"margin_dpo/margin_std": 14.436126708984375,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.16544502617801046,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.278536796569824,
|
|
"fcm_dpo/q_t": 0.479459285736084,
|
|
"grad_norm": 29.883098602294922,
|
|
"learning_rate": 4.939911656668361e-07,
|
|
"logits/chosen": -0.6628604531288147,
|
|
"logits/rejected": -0.6839243173599243,
|
|
"logps/chosen": -263.1708068847656,
|
|
"logps/ref_chosen": -266.2735595703125,
|
|
"logps/ref_rejected": -251.57257080078125,
|
|
"logps/rejected": -256.74835205078125,
|
|
"loss": 5.2449,
|
|
"margin_dpo/margin_mean": 8.278536796569824,
|
|
"margin_dpo/margin_std": 14.98855972290039,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.16753926701570682,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.632655143737793,
|
|
"fcm_dpo/q_t": 0.48353880643844604,
|
|
"grad_norm": 28.937639236450195,
|
|
"learning_rate": 4.935856505068998e-07,
|
|
"logits/chosen": -0.6736690998077393,
|
|
"logits/rejected": -0.7038200497627258,
|
|
"logps/chosen": -285.98919677734375,
|
|
"logps/ref_chosen": -287.8509826660156,
|
|
"logps/ref_rejected": -256.0766296386719,
|
|
"logps/rejected": -260.8474426269531,
|
|
"loss": 5.3041,
|
|
"margin_dpo/margin_mean": 6.632654190063477,
|
|
"margin_dpo/margin_std": 13.19876480102539,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.16963350785340314,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 7.253467559814453,
|
|
"fcm_dpo/q_t": 0.4820139408111572,
|
|
"grad_norm": 28.075214385986328,
|
|
"learning_rate": 4.93167072587771e-07,
|
|
"logits/chosen": -0.6420468091964722,
|
|
"logits/rejected": -0.6412660479545593,
|
|
"logps/chosen": -266.10321044921875,
|
|
"logps/ref_chosen": -268.5232238769531,
|
|
"logps/ref_rejected": -237.81137084960938,
|
|
"logps/rejected": -242.6448211669922,
|
|
"loss": 5.2899,
|
|
"margin_dpo/margin_mean": 7.253467559814453,
|
|
"margin_dpo/margin_std": 16.714815139770508,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.17172774869109947,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 7.97769021987915,
|
|
"fcm_dpo/q_t": 0.48026588559150696,
|
|
"grad_norm": 27.7528018951416,
|
|
"learning_rate": 4.92735454356513e-07,
|
|
"logits/chosen": -0.7254935503005981,
|
|
"logits/rejected": -0.7326993346214294,
|
|
"logps/chosen": -276.9371032714844,
|
|
"logps/ref_chosen": -279.36395263671875,
|
|
"logps/ref_rejected": -236.51365661621094,
|
|
"logps/rejected": -242.0644989013672,
|
|
"loss": 5.2605,
|
|
"margin_dpo/margin_mean": 7.977689743041992,
|
|
"margin_dpo/margin_std": 15.65487289428711,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.17382198952879582,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.64671516418457,
|
|
"fcm_dpo/q_t": 0.4785246253013611,
|
|
"grad_norm": 30.814836502075195,
|
|
"learning_rate": 4.922908189595017e-07,
|
|
"logits/chosen": -0.6886410713195801,
|
|
"logits/rejected": -0.6722111105918884,
|
|
"logps/chosen": -273.9360046386719,
|
|
"logps/ref_chosen": -274.21923828125,
|
|
"logps/ref_rejected": -276.2212219238281,
|
|
"logps/rejected": -284.584716796875,
|
|
"loss": 5.243,
|
|
"margin_dpo/margin_mean": 8.646713256835938,
|
|
"margin_dpo/margin_std": 18.04184913635254,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.17591623036649215,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 7.324193477630615,
|
|
"fcm_dpo/q_t": 0.48183199763298035,
|
|
"grad_norm": 29.859872817993164,
|
|
"learning_rate": 4.918331902411841e-07,
|
|
"logits/chosen": -0.7265677452087402,
|
|
"logits/rejected": -0.7404079437255859,
|
|
"logps/chosen": -293.82232666015625,
|
|
"logps/ref_chosen": -294.3975524902344,
|
|
"logps/ref_rejected": -279.81884765625,
|
|
"logps/rejected": -286.56781005859375,
|
|
"loss": 5.2888,
|
|
"margin_dpo/margin_mean": 7.324193477630615,
|
|
"margin_dpo/margin_std": 16.89883804321289,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.17801047120418848,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 5.70173454284668,
|
|
"fcm_dpo/q_t": 0.4858514070510864,
|
|
"grad_norm": 29.388431549072266,
|
|
"learning_rate": 4.913625927427995e-07,
|
|
"logits/chosen": -0.6746452450752258,
|
|
"logits/rejected": -0.6829299330711365,
|
|
"logps/chosen": -245.21981811523438,
|
|
"logps/ref_chosen": -243.66220092773438,
|
|
"logps/ref_rejected": -263.9421691894531,
|
|
"logps/rejected": -271.2015380859375,
|
|
"loss": 5.3473,
|
|
"margin_dpo/margin_mean": 5.701735019683838,
|
|
"margin_dpo/margin_std": 15.451016426086426,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.18010471204188483,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.923068046569824,
|
|
"fcm_dpo/q_t": 0.4778454601764679,
|
|
"grad_norm": 34.907039642333984,
|
|
"learning_rate": 4.908790517010636e-07,
|
|
"logits/chosen": -0.6926656365394592,
|
|
"logits/rejected": -0.6865877509117126,
|
|
"logps/chosen": -308.2077941894531,
|
|
"logps/ref_chosen": -309.4306945800781,
|
|
"logps/ref_rejected": -290.91278076171875,
|
|
"logps/rejected": -298.6129455566406,
|
|
"loss": 5.2276,
|
|
"margin_dpo/margin_mean": 8.923067092895508,
|
|
"margin_dpo/margin_std": 17.267658233642578,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.18219895287958116,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 10.27783203125,
|
|
"fcm_dpo/q_t": 0.47462230920791626,
|
|
"grad_norm": 29.65764617919922,
|
|
"learning_rate": 4.903825930468148e-07,
|
|
"logits/chosen": -0.755806028842926,
|
|
"logits/rejected": -0.7502421736717224,
|
|
"logps/chosen": -278.2044677734375,
|
|
"logps/ref_chosen": -278.0277099609375,
|
|
"logps/ref_rejected": -245.70123291015625,
|
|
"logps/rejected": -256.15582275390625,
|
|
"loss": 5.1833,
|
|
"margin_dpo/margin_mean": 10.27783203125,
|
|
"margin_dpo/margin_std": 18.962289810180664,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.18429319371727748,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.847644805908203,
|
|
"fcm_dpo/q_t": 0.4781361222267151,
|
|
"grad_norm": 28.80191421508789,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": -0.7761508822441101,
|
|
"logits/rejected": -0.7929233312606812,
|
|
"logps/chosen": -268.6051025390625,
|
|
"logps/ref_chosen": -266.5148010253906,
|
|
"logps/ref_rejected": -265.90081787109375,
|
|
"logps/rejected": -276.8387756347656,
|
|
"loss": 5.2374,
|
|
"margin_dpo/margin_mean": 8.847643852233887,
|
|
"margin_dpo/margin_std": 19.153247833251953,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.18638743455497384,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 9.27825927734375,
|
|
"fcm_dpo/q_t": 0.4770185947418213,
|
|
"grad_norm": 30.52988624572754,
|
|
"learning_rate": 4.893510300863676e-07,
|
|
"logits/chosen": -0.7448249459266663,
|
|
"logits/rejected": -0.7356829643249512,
|
|
"logps/chosen": -265.67352294921875,
|
|
"logps/ref_chosen": -265.6893005371094,
|
|
"logps/ref_rejected": -251.49314880371094,
|
|
"logps/rejected": -260.7556457519531,
|
|
"loss": 5.2198,
|
|
"margin_dpo/margin_mean": 9.27825927734375,
|
|
"margin_dpo/margin_std": 18.18901824951172,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.18848167539267016,
|
|
"fcm_dpo/beta": 0.009999998845160007,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.289998054504395,
|
|
"fcm_dpo/q_t": 0.4794497489929199,
|
|
"grad_norm": 29.910764694213867,
|
|
"learning_rate": 4.8881598109976e-07,
|
|
"logits/chosen": -0.7497580051422119,
|
|
"logits/rejected": -0.7592126131057739,
|
|
"logps/chosen": -308.5013122558594,
|
|
"logps/ref_chosen": -307.4250183105469,
|
|
"logps/ref_rejected": -265.7172546386719,
|
|
"logps/rejected": -275.0835266113281,
|
|
"loss": 5.2544,
|
|
"margin_dpo/margin_mean": 8.289999008178711,
|
|
"margin_dpo/margin_std": 17.661346435546875,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.1905759162303665,
|
|
"fcm_dpo/beta": 0.010252725332975388,
|
|
"fcm_dpo/delta": 0.04954978823661804,
|
|
"fcm_dpo/margin": 9.483511924743652,
|
|
"fcm_dpo/q_t": 0.47634202241897583,
|
|
"grad_norm": 33.50828552246094,
|
|
"learning_rate": 4.882681251368548e-07,
|
|
"logits/chosen": -0.6733120679855347,
|
|
"logits/rejected": -0.6901057958602905,
|
|
"logps/chosen": -237.88088989257812,
|
|
"logps/ref_chosen": -235.74098205566406,
|
|
"logps/ref_rejected": -226.6428985595703,
|
|
"logps/rejected": -238.2663116455078,
|
|
"loss": 5.2111,
|
|
"margin_dpo/margin_mean": 9.483511924743652,
|
|
"margin_dpo/margin_std": 19.319496154785156,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.19267015706806281,
|
|
"fcm_dpo/beta": 0.010404359549283981,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 10.150311470031738,
|
|
"fcm_dpo/q_t": 0.47393330931663513,
|
|
"grad_norm": 34.50823974609375,
|
|
"learning_rate": 4.877074915775048e-07,
|
|
"logits/chosen": -0.7354683876037598,
|
|
"logits/rejected": -0.7188453674316406,
|
|
"logps/chosen": -286.5132751464844,
|
|
"logps/ref_chosen": -283.4475402832031,
|
|
"logps/ref_rejected": -273.134033203125,
|
|
"logps/rejected": -286.35009765625,
|
|
"loss": 5.1857,
|
|
"margin_dpo/margin_mean": 10.150311470031738,
|
|
"margin_dpo/margin_std": 21.28767967224121,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.19476439790575917,
|
|
"fcm_dpo/beta": 0.010404359549283981,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 9.547552108764648,
|
|
"fcm_dpo/q_t": 0.47551578283309937,
|
|
"grad_norm": 29.792530059814453,
|
|
"learning_rate": 4.871341104867864e-07,
|
|
"logits/chosen": -0.7289955019950867,
|
|
"logits/rejected": -0.7523810267448425,
|
|
"logps/chosen": -235.75485229492188,
|
|
"logps/ref_chosen": -233.33714294433594,
|
|
"logps/ref_rejected": -230.54273986816406,
|
|
"logps/rejected": -242.5079803466797,
|
|
"loss": 5.2018,
|
|
"margin_dpo/margin_mean": 9.547552108764648,
|
|
"margin_dpo/margin_std": 19.47620391845703,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.1968586387434555,
|
|
"fcm_dpo/beta": 0.010404359549283981,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 9.37955093383789,
|
|
"fcm_dpo/q_t": 0.47586768865585327,
|
|
"grad_norm": 32.49482727050781,
|
|
"learning_rate": 4.865480126133871e-07,
|
|
"logits/chosen": -0.6883825659751892,
|
|
"logits/rejected": -0.7099732160568237,
|
|
"logps/chosen": -297.0543212890625,
|
|
"logps/ref_chosen": -294.6528015136719,
|
|
"logps/ref_rejected": -283.657958984375,
|
|
"logps/rejected": -295.4390563964844,
|
|
"loss": 5.2195,
|
|
"margin_dpo/margin_mean": 9.379551887512207,
|
|
"margin_dpo/margin_std": 21.819246292114258,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.19895287958115182,
|
|
"fcm_dpo/beta": 0.010404359549283981,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 10.186941146850586,
|
|
"fcm_dpo/q_t": 0.4739447236061096,
|
|
"grad_norm": 34.7429313659668,
|
|
"learning_rate": 4.859492293879573e-07,
|
|
"logits/chosen": -0.7289009094238281,
|
|
"logits/rejected": -0.7504929304122925,
|
|
"logps/chosen": -314.9253845214844,
|
|
"logps/ref_chosen": -311.6697082519531,
|
|
"logps/ref_rejected": -262.7471923828125,
|
|
"logps/rejected": -276.1898193359375,
|
|
"loss": 5.1947,
|
|
"margin_dpo/margin_mean": 10.18694019317627,
|
|
"margin_dpo/margin_std": 22.561256408691406,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.20104712041884817,
|
|
"fcm_dpo/beta": 0.010404359549283981,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 10.215592384338379,
|
|
"fcm_dpo/q_t": 0.4738875925540924,
|
|
"grad_norm": 36.46210479736328,
|
|
"learning_rate": 4.853377929214243e-07,
|
|
"logits/chosen": -0.7038691639900208,
|
|
"logits/rejected": -0.7164921760559082,
|
|
"logps/chosen": -287.2462158203125,
|
|
"logps/ref_chosen": -282.55596923828125,
|
|
"logps/ref_rejected": -242.71588134765625,
|
|
"logps/rejected": -257.6216735839844,
|
|
"loss": 5.1946,
|
|
"margin_dpo/margin_mean": 10.215592384338379,
|
|
"margin_dpo/margin_std": 23.375957489013672,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.2031413612565445,
|
|
"fcm_dpo/beta": 0.010404359549283981,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 12.028409957885742,
|
|
"fcm_dpo/q_t": 0.4692074954509735,
|
|
"grad_norm": 33.21619415283203,
|
|
"learning_rate": 4.847137360032699e-07,
|
|
"logits/chosen": -0.7515384554862976,
|
|
"logits/rejected": -0.7390632629394531,
|
|
"logps/chosen": -307.90765380859375,
|
|
"logps/ref_chosen": -303.57781982421875,
|
|
"logps/ref_rejected": -264.22491455078125,
|
|
"logps/rejected": -280.58319091796875,
|
|
"loss": 5.1173,
|
|
"margin_dpo/margin_mean": 12.028410911560059,
|
|
"margin_dpo/margin_std": 22.326217651367188,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.20523560209424083,
|
|
"fcm_dpo/beta": 0.010807948186993599,
|
|
"fcm_dpo/delta": 0.08449017256498337,
|
|
"fcm_dpo/margin": 12.213380813598633,
|
|
"fcm_dpo/q_t": 0.4684543013572693,
|
|
"grad_norm": 37.011268615722656,
|
|
"learning_rate": 4.84077092099773e-07,
|
|
"logits/chosen": -0.7741104364395142,
|
|
"logits/rejected": -0.7865383625030518,
|
|
"logps/chosen": -291.7223815917969,
|
|
"logps/ref_chosen": -286.8303527832031,
|
|
"logps/ref_rejected": -278.08331298828125,
|
|
"logps/rejected": -295.1886901855469,
|
|
"loss": 5.1132,
|
|
"margin_dpo/margin_mean": 12.213380813598633,
|
|
"margin_dpo/margin_std": 22.774032592773438,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.20732984293193718,
|
|
"fcm_dpo/beta": 0.011437967419624329,
|
|
"fcm_dpo/delta": 0.09145952761173248,
|
|
"fcm_dpo/margin": 12.564851760864258,
|
|
"fcm_dpo/q_t": 0.46524322032928467,
|
|
"grad_norm": 37.08080291748047,
|
|
"learning_rate": 4.834278953522137e-07,
|
|
"logits/chosen": -0.7426201701164246,
|
|
"logits/rejected": -0.756097137928009,
|
|
"logps/chosen": -285.139404296875,
|
|
"logps/ref_chosen": -279.92120361328125,
|
|
"logps/ref_rejected": -250.3365478515625,
|
|
"logps/rejected": -268.11956787109375,
|
|
"loss": 5.0901,
|
|
"margin_dpo/margin_mean": 12.564850807189941,
|
|
"margin_dpo/margin_std": 27.0224666595459,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"fcm_dpo/beta": 0.012557308189570904,
|
|
"fcm_dpo/delta": 0.07896663248538971,
|
|
"fcm_dpo/margin": 12.57419490814209,
|
|
"fcm_dpo/q_t": 0.46233466267585754,
|
|
"grad_norm": 43.29024887084961,
|
|
"learning_rate": 4.827661805750437e-07,
|
|
"logits/chosen": -0.7817738056182861,
|
|
"logits/rejected": -0.7950529456138611,
|
|
"logps/chosen": -304.5354309082031,
|
|
"logps/ref_chosen": -296.8276672363281,
|
|
"logps/ref_rejected": -275.56146240234375,
|
|
"logps/rejected": -295.8433837890625,
|
|
"loss": 5.04,
|
|
"margin_dpo/margin_mean": 12.574195861816406,
|
|
"margin_dpo/margin_std": 24.096710205078125,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.21151832460732983,
|
|
"fcm_dpo/beta": 0.013111630454659462,
|
|
"fcm_dpo/delta": 0.07151152938604355,
|
|
"fcm_dpo/margin": 14.857452392578125,
|
|
"fcm_dpo/q_t": 0.45359134674072266,
|
|
"grad_norm": 41.36968231201172,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": -0.770940363407135,
|
|
"logits/rejected": -0.7847775816917419,
|
|
"logps/chosen": -257.88330078125,
|
|
"logps/ref_chosen": -252.74203491210938,
|
|
"logps/ref_rejected": -276.4185485839844,
|
|
"logps/rejected": -296.41729736328125,
|
|
"loss": 4.9313,
|
|
"margin_dpo/margin_mean": 14.857452392578125,
|
|
"margin_dpo/margin_std": 26.29358673095703,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.2136125654450262,
|
|
"fcm_dpo/beta": 0.014527034014463425,
|
|
"fcm_dpo/delta": 0.15123134851455688,
|
|
"fcm_dpo/margin": 14.755053520202637,
|
|
"fcm_dpo/q_t": 0.4487529695034027,
|
|
"grad_norm": 44.943565368652344,
|
|
"learning_rate": 4.814053395442932e-07,
|
|
"logits/chosen": -0.7487014532089233,
|
|
"logits/rejected": -0.7447975873947144,
|
|
"logps/chosen": -224.57212829589844,
|
|
"logps/ref_chosen": -219.5537109375,
|
|
"logps/ref_rejected": -231.90853881835938,
|
|
"logps/rejected": -251.68197631835938,
|
|
"loss": 4.8727,
|
|
"margin_dpo/margin_mean": 14.75505256652832,
|
|
"margin_dpo/margin_std": 24.941452026367188,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.2157068062827225,
|
|
"fcm_dpo/beta": 0.016830556094646454,
|
|
"fcm_dpo/delta": 0.15375208854675293,
|
|
"fcm_dpo/margin": 13.61697769165039,
|
|
"fcm_dpo/q_t": 0.44760948419570923,
|
|
"grad_norm": 53.148414611816406,
|
|
"learning_rate": 4.807062862684873e-07,
|
|
"logits/chosen": -0.7735249996185303,
|
|
"logits/rejected": -0.770460307598114,
|
|
"logps/chosen": -264.299560546875,
|
|
"logps/ref_chosen": -259.6750793457031,
|
|
"logps/ref_rejected": -278.7400817871094,
|
|
"logps/rejected": -296.9815368652344,
|
|
"loss": 4.8764,
|
|
"margin_dpo/margin_mean": 13.61697769165039,
|
|
"margin_dpo/margin_std": 25.619842529296875,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.21780104712041884,
|
|
"fcm_dpo/beta": 0.01824803464114666,
|
|
"fcm_dpo/delta": 0.12172321230173111,
|
|
"fcm_dpo/margin": 10.087403297424316,
|
|
"fcm_dpo/q_t": 0.45723575353622437,
|
|
"grad_norm": 59.135841369628906,
|
|
"learning_rate": 4.799948609147061e-07,
|
|
"logits/chosen": -0.7728451490402222,
|
|
"logits/rejected": -0.7799044251441956,
|
|
"logps/chosen": -276.86041259765625,
|
|
"logps/ref_chosen": -267.9741516113281,
|
|
"logps/ref_rejected": -230.5306396484375,
|
|
"logps/rejected": -249.5042724609375,
|
|
"loss": 5.0674,
|
|
"margin_dpo/margin_mean": 10.087403297424316,
|
|
"margin_dpo/margin_std": 26.16142463684082,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.2198952879581152,
|
|
"fcm_dpo/beta": 0.019626103341579437,
|
|
"fcm_dpo/delta": 0.09441255033016205,
|
|
"fcm_dpo/margin": 20.40988540649414,
|
|
"fcm_dpo/q_t": 0.40757566690444946,
|
|
"grad_norm": 62.410152435302734,
|
|
"learning_rate": 4.792711016345321e-07,
|
|
"logits/chosen": -0.7623639106750488,
|
|
"logits/rejected": -0.7740727066993713,
|
|
"logps/chosen": -327.2814025878906,
|
|
"logps/ref_chosen": -322.25482177734375,
|
|
"logps/ref_rejected": -279.02978515625,
|
|
"logps/rejected": -304.46624755859375,
|
|
"loss": 4.3973,
|
|
"margin_dpo/margin_mean": 20.409887313842773,
|
|
"margin_dpo/margin_std": 26.728302001953125,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.22198952879581152,
|
|
"fcm_dpo/beta": 0.021983552724123,
|
|
"fcm_dpo/delta": 0.10915235430002213,
|
|
"fcm_dpo/margin": 12.417057991027832,
|
|
"fcm_dpo/q_t": 0.4385029673576355,
|
|
"grad_norm": 80.47908020019531,
|
|
"learning_rate": 4.785350472409791e-07,
|
|
"logits/chosen": -0.7452399730682373,
|
|
"logits/rejected": -0.782451868057251,
|
|
"logps/chosen": -308.17291259765625,
|
|
"logps/ref_chosen": -296.15777587890625,
|
|
"logps/ref_rejected": -266.2691650390625,
|
|
"logps/rejected": -290.70135498046875,
|
|
"loss": 4.9373,
|
|
"margin_dpo/margin_mean": 12.4170560836792,
|
|
"margin_dpo/margin_std": 29.023073196411133,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.22408376963350785,
|
|
"fcm_dpo/beta": 0.024012316018342972,
|
|
"fcm_dpo/delta": 0.14064227044582367,
|
|
"fcm_dpo/margin": 19.3704833984375,
|
|
"fcm_dpo/q_t": 0.3971790373325348,
|
|
"grad_norm": 77.79216766357422,
|
|
"learning_rate": 4.777867372064105e-07,
|
|
"logits/chosen": -0.78067547082901,
|
|
"logits/rejected": -0.7740224599838257,
|
|
"logps/chosen": -310.7627868652344,
|
|
"logps/ref_chosen": -306.996337890625,
|
|
"logps/ref_rejected": -296.79412841796875,
|
|
"logps/rejected": -319.9310302734375,
|
|
"loss": 4.3062,
|
|
"margin_dpo/margin_mean": 19.370481491088867,
|
|
"margin_dpo/margin_std": 27.15206527709961,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.2261780104712042,
|
|
"fcm_dpo/beta": 0.025636808946728706,
|
|
"fcm_dpo/delta": 0.09028993546962738,
|
|
"fcm_dpo/margin": 17.937637329101562,
|
|
"fcm_dpo/q_t": 0.4029965102672577,
|
|
"grad_norm": 286.3813781738281,
|
|
"learning_rate": 4.770262116604223e-07,
|
|
"logits/chosen": -0.7616235017776489,
|
|
"logits/rejected": -0.7734853625297546,
|
|
"logps/chosen": -299.8006286621094,
|
|
"logps/ref_chosen": -295.1526794433594,
|
|
"logps/ref_rejected": -235.974853515625,
|
|
"logps/rejected": -258.5604553222656,
|
|
"loss": 4.4937,
|
|
"margin_dpo/margin_mean": 17.937637329101562,
|
|
"margin_dpo/margin_std": 29.53498649597168,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.22827225130890053,
|
|
"fcm_dpo/beta": 0.02691740356385708,
|
|
"fcm_dpo/delta": 0.008004628121852875,
|
|
"fcm_dpo/margin": 19.709096908569336,
|
|
"fcm_dpo/q_t": 0.3899995982646942,
|
|
"grad_norm": 89.63356018066406,
|
|
"learning_rate": 4.7625351138769166e-07,
|
|
"logits/chosen": -0.7978358268737793,
|
|
"logits/rejected": -0.796513020992279,
|
|
"logps/chosen": -333.0800476074219,
|
|
"logps/ref_chosen": -325.9248046875,
|
|
"logps/ref_rejected": -279.15423583984375,
|
|
"logps/rejected": -306.0185852050781,
|
|
"loss": 4.3147,
|
|
"margin_dpo/margin_mean": 19.709096908569336,
|
|
"margin_dpo/margin_std": 30.691986083984375,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.23036649214659685,
|
|
"fcm_dpo/beta": 0.028422407805919647,
|
|
"fcm_dpo/delta": 0.0776296854019165,
|
|
"fcm_dpo/margin": 18.461952209472656,
|
|
"fcm_dpo/q_t": 0.392780601978302,
|
|
"grad_norm": 86.87859344482422,
|
|
"learning_rate": 4.75468677825789e-07,
|
|
"logits/chosen": -0.7915902137756348,
|
|
"logits/rejected": -0.780044674873352,
|
|
"logps/chosen": -281.5834655761719,
|
|
"logps/ref_chosen": -274.439208984375,
|
|
"logps/ref_rejected": -260.0552062988281,
|
|
"logps/rejected": -285.6614074707031,
|
|
"loss": 4.4874,
|
|
"margin_dpo/margin_mean": 18.46194839477539,
|
|
"margin_dpo/margin_std": 32.64317321777344,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.2324607329842932,
|
|
"fcm_dpo/beta": 0.029570797458291054,
|
|
"fcm_dpo/delta": 0.029479999095201492,
|
|
"fcm_dpo/margin": 19.259639739990234,
|
|
"fcm_dpo/q_t": 0.38573166728019714,
|
|
"grad_norm": 94.41033935546875,
|
|
"learning_rate": 4.7467175306295647e-07,
|
|
"logits/chosen": -0.8291243314743042,
|
|
"logits/rejected": -0.8090481162071228,
|
|
"logps/chosen": -336.6731262207031,
|
|
"logps/ref_chosen": -329.2361755371094,
|
|
"logps/ref_rejected": -287.82830810546875,
|
|
"logps/rejected": -314.52490234375,
|
|
"loss": 4.4329,
|
|
"margin_dpo/margin_mean": 19.259639739990234,
|
|
"margin_dpo/margin_std": 32.591739654541016,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.23455497382198953,
|
|
"fcm_dpo/beta": 0.028911547735333443,
|
|
"fcm_dpo/delta": -0.026821672916412354,
|
|
"fcm_dpo/margin": 12.475048065185547,
|
|
"fcm_dpo/q_t": 0.4273641109466553,
|
|
"grad_norm": 117.3414077758789,
|
|
"learning_rate": 4.7386277983585053e-07,
|
|
"logits/chosen": -0.7319104671478271,
|
|
"logits/rejected": -0.7623211145401001,
|
|
"logps/chosen": -269.155517578125,
|
|
"logps/ref_chosen": -257.0593566894531,
|
|
"logps/ref_rejected": -272.9595031738281,
|
|
"logps/rejected": -297.53070068359375,
|
|
"loss": 5.1793,
|
|
"margin_dpo/margin_mean": 12.475048065185547,
|
|
"margin_dpo/margin_std": 33.321533203125,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.23664921465968586,
|
|
"fcm_dpo/beta": 0.02749396488070488,
|
|
"fcm_dpo/delta": -0.030711829662322998,
|
|
"fcm_dpo/margin": 22.709571838378906,
|
|
"fcm_dpo/q_t": 0.3759039044380188,
|
|
"grad_norm": 89.33954620361328,
|
|
"learning_rate": 4.7304180152725024e-07,
|
|
"logits/chosen": -0.7936510443687439,
|
|
"logits/rejected": -0.7988536953926086,
|
|
"logps/chosen": -294.9634094238281,
|
|
"logps/ref_chosen": -286.0416564941406,
|
|
"logps/ref_rejected": -270.374267578125,
|
|
"logps/rejected": -302.00555419921875,
|
|
"loss": 4.2854,
|
|
"margin_dpo/margin_mean": 22.709571838378906,
|
|
"margin_dpo/margin_std": 36.09437942504883,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.2387434554973822,
|
|
"fcm_dpo/beta": 0.029794633388519287,
|
|
"fcm_dpo/delta": 0.07771297544240952,
|
|
"fcm_dpo/margin": 12.448982238769531,
|
|
"fcm_dpo/q_t": 0.42108646035194397,
|
|
"grad_norm": 107.05913543701172,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": -0.83903968334198,
|
|
"logits/rejected": -0.841633677482605,
|
|
"logps/chosen": -271.00335693359375,
|
|
"logps/ref_chosen": -260.0084533691406,
|
|
"logps/ref_rejected": -246.67190551757812,
|
|
"logps/rejected": -270.11578369140625,
|
|
"loss": 5.0306,
|
|
"margin_dpo/margin_mean": 12.448982238769531,
|
|
"margin_dpo/margin_std": 31.104576110839844,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.24083769633507854,
|
|
"fcm_dpo/beta": 0.029507935047149658,
|
|
"fcm_dpo/delta": 0.016908658668398857,
|
|
"fcm_dpo/margin": 12.143805503845215,
|
|
"fcm_dpo/q_t": 0.4331102967262268,
|
|
"grad_norm": 120.61966705322266,
|
|
"learning_rate": 4.7136400641330245e-07,
|
|
"logits/chosen": -0.8338419795036316,
|
|
"logits/rejected": -0.7967959642410278,
|
|
"logps/chosen": -310.32476806640625,
|
|
"logps/ref_chosen": -299.4229736328125,
|
|
"logps/ref_rejected": -272.1186828613281,
|
|
"logps/rejected": -295.1643371582031,
|
|
"loss": 5.1519,
|
|
"margin_dpo/margin_mean": 12.143804550170898,
|
|
"margin_dpo/margin_std": 34.1319465637207,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.24293193717277486,
|
|
"fcm_dpo/beta": 0.03158475458621979,
|
|
"fcm_dpo/delta": 0.08304879814386368,
|
|
"fcm_dpo/margin": 16.471763610839844,
|
|
"fcm_dpo/q_t": 0.39313048124313354,
|
|
"grad_norm": 96.66343688964844,
|
|
"learning_rate": 4.70507279583015e-07,
|
|
"logits/chosen": -0.8481428027153015,
|
|
"logits/rejected": -0.8136316537857056,
|
|
"logps/chosen": -284.63897705078125,
|
|
"logps/ref_chosen": -279.263916015625,
|
|
"logps/ref_rejected": -253.6192169189453,
|
|
"logps/rejected": -275.46600341796875,
|
|
"loss": 4.5386,
|
|
"margin_dpo/margin_mean": 16.471763610839844,
|
|
"margin_dpo/margin_std": 29.921730041503906,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.2450261780104712,
|
|
"fcm_dpo/beta": 0.03184635192155838,
|
|
"fcm_dpo/delta": -0.10544593632221222,
|
|
"fcm_dpo/margin": 17.663230895996094,
|
|
"fcm_dpo/q_t": 0.3876641094684601,
|
|
"grad_norm": 113.3866958618164,
|
|
"learning_rate": 4.6963872761652834e-07,
|
|
"logits/chosen": -0.7901442646980286,
|
|
"logits/rejected": -0.7954122424125671,
|
|
"logps/chosen": -266.06890869140625,
|
|
"logps/ref_chosen": -259.2248840332031,
|
|
"logps/ref_rejected": -229.3042755126953,
|
|
"logps/rejected": -253.81155395507812,
|
|
"loss": 4.4728,
|
|
"margin_dpo/margin_mean": 17.663230895996094,
|
|
"margin_dpo/margin_std": 28.427824020385742,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.24712041884816754,
|
|
"fcm_dpo/beta": 0.029734350740909576,
|
|
"fcm_dpo/delta": -0.03558747097849846,
|
|
"fcm_dpo/margin": 19.60186004638672,
|
|
"fcm_dpo/q_t": 0.38436776399612427,
|
|
"grad_norm": 113.9665756225586,
|
|
"learning_rate": 4.687583970916486e-07,
|
|
"logits/chosen": -0.7948500514030457,
|
|
"logits/rejected": -0.7873266935348511,
|
|
"logps/chosen": -276.48236083984375,
|
|
"logps/ref_chosen": -267.0707092285156,
|
|
"logps/ref_rejected": -272.7322082519531,
|
|
"logps/rejected": -301.7456970214844,
|
|
"loss": 4.4767,
|
|
"margin_dpo/margin_mean": 19.601858139038086,
|
|
"margin_dpo/margin_std": 34.46326446533203,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.24921465968586387,
|
|
"fcm_dpo/beta": 0.029399575665593147,
|
|
"fcm_dpo/delta": -0.0034819915890693665,
|
|
"fcm_dpo/margin": 15.70901870727539,
|
|
"fcm_dpo/q_t": 0.4097801744937897,
|
|
"grad_norm": 116.46439361572266,
|
|
"learning_rate": 4.6786633521783005e-07,
|
|
"logits/chosen": -0.8555842638015747,
|
|
"logits/rejected": -0.8587056398391724,
|
|
"logps/chosen": -336.5263671875,
|
|
"logps/ref_chosen": -324.6766357421875,
|
|
"logps/ref_rejected": -306.0322265625,
|
|
"logps/rejected": -333.5909423828125,
|
|
"loss": 4.8796,
|
|
"margin_dpo/margin_mean": 15.709016799926758,
|
|
"margin_dpo/margin_std": 34.084205627441406,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.2513089005235602,
|
|
"fcm_dpo/beta": 0.029722902923822403,
|
|
"fcm_dpo/delta": 0.015680911019444466,
|
|
"fcm_dpo/margin": 15.187647819519043,
|
|
"fcm_dpo/q_t": 0.41209471225738525,
|
|
"grad_norm": 98.18533325195312,
|
|
"learning_rate": 4.669625898336438e-07,
|
|
"logits/chosen": -0.8075263500213623,
|
|
"logits/rejected": -0.8280918598175049,
|
|
"logps/chosen": -324.54205322265625,
|
|
"logps/ref_chosen": -315.2617492675781,
|
|
"logps/ref_rejected": -265.32501220703125,
|
|
"logps/rejected": -289.79296875,
|
|
"loss": 4.8908,
|
|
"margin_dpo/margin_mean": 15.187647819519043,
|
|
"margin_dpo/margin_std": 33.20510482788086,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.2534031413612565,
|
|
"fcm_dpo/beta": 0.03124306909739971,
|
|
"fcm_dpo/delta": 0.14657826721668243,
|
|
"fcm_dpo/margin": 12.737668991088867,
|
|
"fcm_dpo/q_t": 0.42548656463623047,
|
|
"grad_norm": 110.32199096679688,
|
|
"learning_rate": 4.6604720940421207e-07,
|
|
"logits/chosen": -0.8259115815162659,
|
|
"logits/rejected": -0.8430719971656799,
|
|
"logps/chosen": -235.69189453125,
|
|
"logps/ref_chosen": -222.99609375,
|
|
"logps/ref_rejected": -226.92860412597656,
|
|
"logps/rejected": -252.36209106445312,
|
|
"loss": 5.0129,
|
|
"margin_dpo/margin_mean": 12.737669944763184,
|
|
"margin_dpo/margin_std": 32.302677154541016,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.2554973821989529,
|
|
"fcm_dpo/beta": 0.032185669988393784,
|
|
"fcm_dpo/delta": -0.019368404522538185,
|
|
"fcm_dpo/margin": 15.059699058532715,
|
|
"fcm_dpo/q_t": 0.40549296140670776,
|
|
"grad_norm": 117.572509765625,
|
|
"learning_rate": 4.651202430186092e-07,
|
|
"logits/chosen": -0.8742939829826355,
|
|
"logits/rejected": -0.8380413055419922,
|
|
"logps/chosen": -288.3365783691406,
|
|
"logps/ref_chosen": -276.02630615234375,
|
|
"logps/ref_rejected": -277.97418212890625,
|
|
"logps/rejected": -305.3441162109375,
|
|
"loss": 4.9239,
|
|
"margin_dpo/margin_mean": 15.059700012207031,
|
|
"margin_dpo/margin_std": 34.5055046081543,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.25759162303664923,
|
|
"fcm_dpo/beta": 0.03192441910505295,
|
|
"fcm_dpo/delta": -0.06708841025829315,
|
|
"fcm_dpo/margin": 20.746381759643555,
|
|
"fcm_dpo/q_t": 0.3659403324127197,
|
|
"grad_norm": 114.86331176757812,
|
|
"learning_rate": 4.6418174038722924e-07,
|
|
"logits/chosen": -0.7943709492683411,
|
|
"logits/rejected": -0.794708788394928,
|
|
"logps/chosen": -334.998291015625,
|
|
"logps/ref_chosen": -328.1546325683594,
|
|
"logps/ref_rejected": -280.6911315917969,
|
|
"logps/rejected": -308.28118896484375,
|
|
"loss": 4.2403,
|
|
"margin_dpo/margin_mean": 20.746379852294922,
|
|
"margin_dpo/margin_std": 32.13544464111328,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.25968586387434556,
|
|
"fcm_dpo/beta": 0.030819490551948547,
|
|
"fcm_dpo/delta": 0.03179997205734253,
|
|
"fcm_dpo/margin": 16.770048141479492,
|
|
"fcm_dpo/q_t": 0.39261382818222046,
|
|
"grad_norm": 99.67831420898438,
|
|
"learning_rate": 4.6323175183912023e-07,
|
|
"logits/chosen": -0.8295610547065735,
|
|
"logits/rejected": -0.8021270036697388,
|
|
"logps/chosen": -285.3762512207031,
|
|
"logps/ref_chosen": -275.6961975097656,
|
|
"logps/ref_rejected": -225.361572265625,
|
|
"logps/rejected": -251.8116455078125,
|
|
"loss": 4.5914,
|
|
"margin_dpo/margin_mean": 16.770048141479492,
|
|
"margin_dpo/margin_std": 29.99167251586914,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.2617801047120419,
|
|
"fcm_dpo/beta": 0.03114517405629158,
|
|
"fcm_dpo/delta": -0.03549438342452049,
|
|
"fcm_dpo/margin": 16.479698181152344,
|
|
"fcm_dpo/q_t": 0.40479522943496704,
|
|
"grad_norm": 124.4884033203125,
|
|
"learning_rate": 4.6227032831928483e-07,
|
|
"logits/chosen": -0.7944302558898926,
|
|
"logits/rejected": -0.7552446722984314,
|
|
"logps/chosen": -288.5038757324219,
|
|
"logps/ref_chosen": -278.06976318359375,
|
|
"logps/ref_rejected": -265.63873291015625,
|
|
"logps/rejected": -292.5525207519531,
|
|
"loss": 4.8872,
|
|
"margin_dpo/margin_mean": 16.479698181152344,
|
|
"margin_dpo/margin_std": 36.26911544799805,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.2638743455497382,
|
|
"fcm_dpo/beta": 0.03014766052365303,
|
|
"fcm_dpo/delta": -0.020349113270640373,
|
|
"fcm_dpo/margin": 18.311885833740234,
|
|
"fcm_dpo/q_t": 0.38548335433006287,
|
|
"grad_norm": 109.14166259765625,
|
|
"learning_rate": 4.612975213859487e-07,
|
|
"logits/chosen": -0.8047983646392822,
|
|
"logits/rejected": -0.8239343166351318,
|
|
"logps/chosen": -329.813232421875,
|
|
"logps/ref_chosen": -321.3960876464844,
|
|
"logps/ref_rejected": -285.37664794921875,
|
|
"logps/rejected": -312.10565185546875,
|
|
"loss": 4.4441,
|
|
"margin_dpo/margin_mean": 18.311885833740234,
|
|
"margin_dpo/margin_std": 30.9145450592041,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.26596858638743454,
|
|
"fcm_dpo/beta": 0.030198298394680023,
|
|
"fcm_dpo/delta": -0.08730512112379074,
|
|
"fcm_dpo/margin": 20.74812889099121,
|
|
"fcm_dpo/q_t": 0.37573808431625366,
|
|
"grad_norm": 107.66555786132812,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": -0.87255859375,
|
|
"logits/rejected": -0.8262636661529541,
|
|
"logps/chosen": -313.08575439453125,
|
|
"logps/ref_chosen": -306.55877685546875,
|
|
"logps/ref_rejected": -274.8651428222656,
|
|
"logps/rejected": -302.1402587890625,
|
|
"loss": 4.3108,
|
|
"margin_dpo/margin_mean": 20.748130798339844,
|
|
"margin_dpo/margin_std": 31.776979446411133,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.2680628272251309,
|
|
"fcm_dpo/beta": 0.027010329067707062,
|
|
"fcm_dpo/delta": -0.06802891194820404,
|
|
"fcm_dpo/margin": 22.768661499023438,
|
|
"fcm_dpo/q_t": 0.37576210498809814,
|
|
"grad_norm": 87.82177734375,
|
|
"learning_rate": 4.5931796656116837e-07,
|
|
"logits/chosen": -0.7685502171516418,
|
|
"logits/rejected": -0.7739553451538086,
|
|
"logps/chosen": -268.2386779785156,
|
|
"logps/ref_chosen": -265.3973693847656,
|
|
"logps/ref_rejected": -250.9737548828125,
|
|
"logps/rejected": -276.5837097167969,
|
|
"loss": 4.2397,
|
|
"margin_dpo/margin_mean": 22.768665313720703,
|
|
"margin_dpo/margin_std": 34.84334945678711,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.27015706806282724,
|
|
"fcm_dpo/beta": 0.027978552505373955,
|
|
"fcm_dpo/delta": 0.057149242609739304,
|
|
"fcm_dpo/margin": 19.382301330566406,
|
|
"fcm_dpo/q_t": 0.39152759313583374,
|
|
"grad_norm": 95.93099212646484,
|
|
"learning_rate": 4.5831132482724193e-07,
|
|
"logits/chosen": -0.7922682166099548,
|
|
"logits/rejected": -0.795950710773468,
|
|
"logps/chosen": -307.2889404296875,
|
|
"logps/ref_chosen": -303.158447265625,
|
|
"logps/ref_rejected": -275.9891052246094,
|
|
"logps/rejected": -299.50189208984375,
|
|
"loss": 4.457,
|
|
"margin_dpo/margin_mean": 19.38229751586914,
|
|
"margin_dpo/margin_std": 33.825496673583984,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.27225130890052357,
|
|
"fcm_dpo/beta": 0.028778987005352974,
|
|
"fcm_dpo/delta": 0.07279841601848602,
|
|
"fcm_dpo/margin": 16.71417999267578,
|
|
"fcm_dpo/q_t": 0.401623010635376,
|
|
"grad_norm": 103.26705932617188,
|
|
"learning_rate": 4.5729351198915705e-07,
|
|
"logits/chosen": -0.7664986848831177,
|
|
"logits/rejected": -0.8091428875923157,
|
|
"logps/chosen": -292.35394287109375,
|
|
"logps/ref_chosen": -286.4073486328125,
|
|
"logps/ref_rejected": -294.38665771484375,
|
|
"logps/rejected": -317.0474853515625,
|
|
"loss": 4.6181,
|
|
"margin_dpo/margin_mean": 16.714181900024414,
|
|
"margin_dpo/margin_std": 32.14228820800781,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.2743455497382199,
|
|
"fcm_dpo/beta": 0.03138697147369385,
|
|
"fcm_dpo/delta": 0.12776511907577515,
|
|
"fcm_dpo/margin": 15.206416130065918,
|
|
"fcm_dpo/q_t": 0.40508803725242615,
|
|
"grad_norm": 126.76692199707031,
|
|
"learning_rate": 4.5626458262912735e-07,
|
|
"logits/chosen": -0.8393828868865967,
|
|
"logits/rejected": -0.7898960113525391,
|
|
"logps/chosen": -317.5235290527344,
|
|
"logps/ref_chosen": -311.5650634765625,
|
|
"logps/ref_rejected": -291.62432861328125,
|
|
"logps/rejected": -312.7891845703125,
|
|
"loss": 4.7479,
|
|
"margin_dpo/margin_mean": 15.206417083740234,
|
|
"margin_dpo/margin_std": 31.15882110595703,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.2764397905759162,
|
|
"fcm_dpo/beta": 0.03294968605041504,
|
|
"fcm_dpo/delta": -0.09555768966674805,
|
|
"fcm_dpo/margin": 20.942249298095703,
|
|
"fcm_dpo/q_t": 0.37162911891937256,
|
|
"grad_norm": 136.78445434570312,
|
|
"learning_rate": 4.5522459192551166e-07,
|
|
"logits/chosen": -0.8077597618103027,
|
|
"logits/rejected": -0.7918823957443237,
|
|
"logps/chosen": -272.0758972167969,
|
|
"logps/ref_chosen": -270.0818176269531,
|
|
"logps/ref_rejected": -284.3084411621094,
|
|
"logps/rejected": -307.24481201171875,
|
|
"loss": 4.3352,
|
|
"margin_dpo/margin_mean": 20.942249298095703,
|
|
"margin_dpo/margin_std": 33.96846389770508,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.27853403141361255,
|
|
"fcm_dpo/beta": 0.030396468937397003,
|
|
"fcm_dpo/delta": -0.021258918568491936,
|
|
"fcm_dpo/margin": 18.287437438964844,
|
|
"fcm_dpo/q_t": 0.38483449816703796,
|
|
"grad_norm": 97.30946350097656,
|
|
"learning_rate": 4.541735956498554e-07,
|
|
"logits/chosen": -0.8339589834213257,
|
|
"logits/rejected": -0.841139018535614,
|
|
"logps/chosen": -287.4236145019531,
|
|
"logps/ref_chosen": -285.6213684082031,
|
|
"logps/ref_rejected": -251.19386291503906,
|
|
"logps/rejected": -271.2835693359375,
|
|
"loss": 4.4624,
|
|
"margin_dpo/margin_mean": 18.287437438964844,
|
|
"margin_dpo/margin_std": 30.66234588623047,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.2806282722513089,
|
|
"fcm_dpo/beta": 0.03080589883029461,
|
|
"fcm_dpo/delta": 0.050978198647499084,
|
|
"fcm_dpo/margin": 15.381253242492676,
|
|
"fcm_dpo/q_t": 0.40036991238594055,
|
|
"grad_norm": 106.52774810791016,
|
|
"learning_rate": 4.5311165016389914e-07,
|
|
"logits/chosen": -0.8478070497512817,
|
|
"logits/rejected": -0.8514746427536011,
|
|
"logps/chosen": -328.1652526855469,
|
|
"logps/ref_chosen": -318.92083740234375,
|
|
"logps/ref_rejected": -293.1894836425781,
|
|
"logps/rejected": -317.8151550292969,
|
|
"loss": 4.662,
|
|
"margin_dpo/margin_mean": 15.381254196166992,
|
|
"margin_dpo/margin_std": 29.16480255126953,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.28272251308900526,
|
|
"fcm_dpo/beta": 0.031100064516067505,
|
|
"fcm_dpo/delta": -0.0012684855610132217,
|
|
"fcm_dpo/margin": 17.726974487304688,
|
|
"fcm_dpo/q_t": 0.38483473658561707,
|
|
"grad_norm": 133.8107147216797,
|
|
"learning_rate": 4.520388124165564e-07,
|
|
"logits/chosen": -0.7306185364723206,
|
|
"logits/rejected": -0.7757068872451782,
|
|
"logps/chosen": -296.7521667480469,
|
|
"logps/ref_chosen": -292.8217468261719,
|
|
"logps/ref_rejected": -269.2896728515625,
|
|
"logps/rejected": -290.9470520019531,
|
|
"loss": 4.365,
|
|
"margin_dpo/margin_mean": 17.726974487304688,
|
|
"margin_dpo/margin_std": 27.901412963867188,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.2848167539267016,
|
|
"fcm_dpo/beta": 0.03128836303949356,
|
|
"fcm_dpo/delta": 0.02984962984919548,
|
|
"fcm_dpo/margin": 16.618209838867188,
|
|
"fcm_dpo/q_t": 0.3998725414276123,
|
|
"grad_norm": 131.5796356201172,
|
|
"learning_rate": 4.5095513994085974e-07,
|
|
"logits/chosen": -0.7901206612586975,
|
|
"logits/rejected": -0.7873492240905762,
|
|
"logps/chosen": -278.5729064941406,
|
|
"logps/ref_chosen": -272.8525390625,
|
|
"logps/ref_rejected": -252.68202209472656,
|
|
"logps/rejected": -275.0205993652344,
|
|
"loss": 4.711,
|
|
"margin_dpo/margin_mean": 16.618209838867188,
|
|
"margin_dpo/margin_std": 32.85752487182617,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.2869109947643979,
|
|
"fcm_dpo/beta": 0.03243596479296684,
|
|
"fcm_dpo/delta": 0.0307313185185194,
|
|
"fcm_dpo/margin": 15.30479621887207,
|
|
"fcm_dpo/q_t": 0.4031601846218109,
|
|
"grad_norm": 127.34578704833984,
|
|
"learning_rate": 4.498606908508753e-07,
|
|
"logits/chosen": -0.8465839624404907,
|
|
"logits/rejected": -0.8333037495613098,
|
|
"logps/chosen": -308.864013671875,
|
|
"logps/ref_chosen": -300.7522277832031,
|
|
"logps/ref_rejected": -286.1935119628906,
|
|
"logps/rejected": -309.6100769042969,
|
|
"loss": 4.7322,
|
|
"margin_dpo/margin_mean": 15.30479621887207,
|
|
"margin_dpo/margin_std": 30.956771850585938,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.28900523560209423,
|
|
"fcm_dpo/beta": 0.032454121857881546,
|
|
"fcm_dpo/delta": 0.005412563681602478,
|
|
"fcm_dpo/margin": 18.291305541992188,
|
|
"fcm_dpo/q_t": 0.3903680145740509,
|
|
"grad_norm": 106.79438781738281,
|
|
"learning_rate": 4.487555238385862e-07,
|
|
"logits/chosen": -0.7613782286643982,
|
|
"logits/rejected": -0.7434461712837219,
|
|
"logps/chosen": -294.6986083984375,
|
|
"logps/ref_chosen": -288.9369812011719,
|
|
"logps/ref_rejected": -263.7076416015625,
|
|
"logps/rejected": -287.7606201171875,
|
|
"loss": 4.5596,
|
|
"margin_dpo/margin_mean": 18.291303634643555,
|
|
"margin_dpo/margin_std": 34.35835266113281,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.29109947643979056,
|
|
"fcm_dpo/beta": 0.03308243677020073,
|
|
"fcm_dpo/delta": 0.03304573893547058,
|
|
"fcm_dpo/margin": 13.005290985107422,
|
|
"fcm_dpo/q_t": 0.41861557960510254,
|
|
"grad_norm": 116.88390350341797,
|
|
"learning_rate": 4.476396981707453e-07,
|
|
"logits/chosen": -0.7813708782196045,
|
|
"logits/rejected": -0.8129632472991943,
|
|
"logps/chosen": -274.0767517089844,
|
|
"logps/ref_chosen": -270.0443115234375,
|
|
"logps/ref_rejected": -267.3226013183594,
|
|
"logps/rejected": -284.3603210449219,
|
|
"loss": 4.901,
|
|
"margin_dpo/margin_mean": 13.005290031433105,
|
|
"margin_dpo/margin_std": 29.456113815307617,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.2931937172774869,
|
|
"fcm_dpo/beta": 0.03421860188245773,
|
|
"fcm_dpo/delta": 0.0003454945981502533,
|
|
"fcm_dpo/margin": 17.4409236907959,
|
|
"fcm_dpo/q_t": 0.3806909918785095,
|
|
"grad_norm": 129.08346557617188,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": -0.8671438694000244,
|
|
"logits/rejected": -0.841330349445343,
|
|
"logps/chosen": -287.3354187011719,
|
|
"logps/ref_chosen": -282.9555969238281,
|
|
"logps/ref_rejected": -251.17181396484375,
|
|
"logps/rejected": -272.9925537109375,
|
|
"loss": 4.2991,
|
|
"margin_dpo/margin_mean": 17.440921783447266,
|
|
"margin_dpo/margin_std": 27.346405029296875,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.29528795811518327,
|
|
"fcm_dpo/beta": 0.03223487734794617,
|
|
"fcm_dpo/delta": -0.05671250820159912,
|
|
"fcm_dpo/margin": 20.16065216064453,
|
|
"fcm_dpo/q_t": 0.36774590611457825,
|
|
"grad_norm": 107.1982192993164,
|
|
"learning_rate": 4.453763107901675e-07,
|
|
"logits/chosen": -0.7901620268821716,
|
|
"logits/rejected": -0.7895568013191223,
|
|
"logps/chosen": -298.8914794921875,
|
|
"logps/ref_chosen": -296.3001708984375,
|
|
"logps/ref_rejected": -279.8486633300781,
|
|
"logps/rejected": -302.6006164550781,
|
|
"loss": 4.2532,
|
|
"margin_dpo/margin_mean": 20.160648345947266,
|
|
"margin_dpo/margin_std": 31.275304794311523,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2973821989528796,
|
|
"fcm_dpo/beta": 0.031261567026376724,
|
|
"fcm_dpo/delta": -0.0013711625942960382,
|
|
"fcm_dpo/margin": 16.695674896240234,
|
|
"fcm_dpo/q_t": 0.40020960569381714,
|
|
"grad_norm": 104.9339370727539,
|
|
"learning_rate": 4.4422887045602674e-07,
|
|
"logits/chosen": -0.8087915778160095,
|
|
"logits/rejected": -0.8152974843978882,
|
|
"logps/chosen": -304.1774597167969,
|
|
"logps/ref_chosen": -300.56585693359375,
|
|
"logps/ref_rejected": -231.43316650390625,
|
|
"logps/rejected": -251.74044799804688,
|
|
"loss": 4.7019,
|
|
"margin_dpo/margin_mean": 16.695674896240234,
|
|
"margin_dpo/margin_std": 33.27724075317383,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2994764397905759,
|
|
"fcm_dpo/beta": 0.03194243088364601,
|
|
"fcm_dpo/delta": -0.005147319287061691,
|
|
"fcm_dpo/margin": 18.84752655029297,
|
|
"fcm_dpo/q_t": 0.37769022583961487,
|
|
"grad_norm": 109.19286346435547,
|
|
"learning_rate": 4.4307101421701755e-07,
|
|
"logits/chosen": -0.7999674677848816,
|
|
"logits/rejected": -0.7869732975959778,
|
|
"logps/chosen": -300.0097961425781,
|
|
"logps/ref_chosen": -296.73236083984375,
|
|
"logps/ref_rejected": -266.45257568359375,
|
|
"logps/rejected": -288.5776062011719,
|
|
"loss": 4.3005,
|
|
"margin_dpo/margin_mean": 18.847524642944336,
|
|
"margin_dpo/margin_std": 29.313934326171875,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.30157068062827225,
|
|
"fcm_dpo/beta": 0.03147399052977562,
|
|
"fcm_dpo/delta": 0.05607675388455391,
|
|
"fcm_dpo/margin": 16.06841278076172,
|
|
"fcm_dpo/q_t": 0.4018367528915405,
|
|
"grad_norm": 109.13096618652344,
|
|
"learning_rate": 4.419028041654559e-07,
|
|
"logits/chosen": -0.8504543304443359,
|
|
"logits/rejected": -0.8398086428642273,
|
|
"logps/chosen": -302.9305419921875,
|
|
"logps/ref_chosen": -298.843994140625,
|
|
"logps/ref_rejected": -266.120849609375,
|
|
"logps/rejected": -286.2757873535156,
|
|
"loss": 4.6688,
|
|
"margin_dpo/margin_mean": 16.06841278076172,
|
|
"margin_dpo/margin_std": 32.15247344970703,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.3036649214659686,
|
|
"fcm_dpo/beta": 0.03136536106467247,
|
|
"fcm_dpo/delta": -0.10698030889034271,
|
|
"fcm_dpo/margin": 20.321487426757812,
|
|
"fcm_dpo/q_t": 0.36860162019729614,
|
|
"grad_norm": 104.23075103759766,
|
|
"learning_rate": 4.4072430294890166e-07,
|
|
"logits/chosen": -0.8474912047386169,
|
|
"logits/rejected": -0.8559509515762329,
|
|
"logps/chosen": -278.58154296875,
|
|
"logps/ref_chosen": -275.7528381347656,
|
|
"logps/ref_rejected": -214.74807739257812,
|
|
"logps/rejected": -237.8982391357422,
|
|
"loss": 4.1589,
|
|
"margin_dpo/margin_mean": 20.321487426757812,
|
|
"margin_dpo/margin_std": 28.55498504638672,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.3057591623036649,
|
|
"fcm_dpo/beta": 0.030392833054065704,
|
|
"fcm_dpo/delta": 0.009405029937624931,
|
|
"fcm_dpo/margin": 19.343936920166016,
|
|
"fcm_dpo/q_t": 0.38136640191078186,
|
|
"grad_norm": 100.81139373779297,
|
|
"learning_rate": 4.395355737667985e-07,
|
|
"logits/chosen": -0.817609429359436,
|
|
"logits/rejected": -0.8185821771621704,
|
|
"logps/chosen": -284.92779541015625,
|
|
"logps/ref_chosen": -277.09820556640625,
|
|
"logps/ref_rejected": -265.41046142578125,
|
|
"logps/rejected": -292.58392333984375,
|
|
"loss": 4.2804,
|
|
"margin_dpo/margin_mean": 19.343936920166016,
|
|
"margin_dpo/margin_std": 29.182607650756836,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.3078534031413613,
|
|
"fcm_dpo/beta": 0.03225337713956833,
|
|
"fcm_dpo/delta": 0.03318355232477188,
|
|
"fcm_dpo/margin": 15.443235397338867,
|
|
"fcm_dpo/q_t": 0.40001511573791504,
|
|
"grad_norm": 107.14227294921875,
|
|
"learning_rate": 4.3833668036708483e-07,
|
|
"logits/chosen": -0.8150308132171631,
|
|
"logits/rejected": -0.8176466822624207,
|
|
"logps/chosen": -299.32708740234375,
|
|
"logps/ref_chosen": -291.4185791015625,
|
|
"logps/ref_rejected": -253.43051147460938,
|
|
"logps/rejected": -276.7822265625,
|
|
"loss": 4.7888,
|
|
"margin_dpo/margin_mean": 15.443236351013184,
|
|
"margin_dpo/margin_std": 31.842870712280273,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.3099476439790576,
|
|
"fcm_dpo/beta": 0.03284765034914017,
|
|
"fcm_dpo/delta": 0.06887248158454895,
|
|
"fcm_dpo/margin": 15.128622055053711,
|
|
"fcm_dpo/q_t": 0.4055444300174713,
|
|
"grad_norm": 105.72512817382812,
|
|
"learning_rate": 4.3712768704277524e-07,
|
|
"logits/chosen": -0.8757432699203491,
|
|
"logits/rejected": -0.8821508288383484,
|
|
"logps/chosen": -244.3941650390625,
|
|
"logps/ref_chosen": -236.74850463867188,
|
|
"logps/ref_rejected": -231.4674072265625,
|
|
"logps/rejected": -254.24166870117188,
|
|
"loss": 4.7548,
|
|
"margin_dpo/margin_mean": 15.128622055053711,
|
|
"margin_dpo/margin_std": 31.366443634033203,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.31204188481675393,
|
|
"fcm_dpo/beta": 0.032370131462812424,
|
|
"fcm_dpo/delta": -0.04777521640062332,
|
|
"fcm_dpo/margin": 19.857650756835938,
|
|
"fcm_dpo/q_t": 0.3684397339820862,
|
|
"grad_norm": 107.77214813232422,
|
|
"learning_rate": 4.3590865862851263e-07,
|
|
"logits/chosen": -0.8252199292182922,
|
|
"logits/rejected": -0.8139665126800537,
|
|
"logps/chosen": -326.12774658203125,
|
|
"logps/ref_chosen": -319.9284973144531,
|
|
"logps/ref_rejected": -308.20233154296875,
|
|
"logps/rejected": -334.2592468261719,
|
|
"loss": 4.0675,
|
|
"margin_dpo/margin_mean": 19.857654571533203,
|
|
"margin_dpo/margin_std": 27.37247085571289,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"fcm_dpo/beta": 0.032013505697250366,
|
|
"fcm_dpo/delta": 0.011951310560107231,
|
|
"fcm_dpo/margin": 18.362552642822266,
|
|
"fcm_dpo/q_t": 0.38145214319229126,
|
|
"grad_norm": 108.20628356933594,
|
|
"learning_rate": 4.346796604970912e-07,
|
|
"logits/chosen": -0.8032433390617371,
|
|
"logits/rejected": -0.7947119474411011,
|
|
"logps/chosen": -286.0317077636719,
|
|
"logps/ref_chosen": -276.3182373046875,
|
|
"logps/ref_rejected": -273.02215576171875,
|
|
"logps/rejected": -301.0981750488281,
|
|
"loss": 4.3501,
|
|
"margin_dpo/margin_mean": 18.362550735473633,
|
|
"margin_dpo/margin_std": 29.839893341064453,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3162303664921466,
|
|
"fcm_dpo/beta": 0.029401123523712158,
|
|
"fcm_dpo/delta": -0.19104339182376862,
|
|
"fcm_dpo/margin": 26.312572479248047,
|
|
"fcm_dpo/q_t": 0.34187808632850647,
|
|
"grad_norm": 90.58390045166016,
|
|
"learning_rate": 4.3344075855595097e-07,
|
|
"logits/chosen": -0.8197271823883057,
|
|
"logits/rejected": -0.8265554308891296,
|
|
"logps/chosen": -304.98236083984375,
|
|
"logps/ref_chosen": -297.31280517578125,
|
|
"logps/ref_rejected": -266.1003723144531,
|
|
"logps/rejected": -300.0824890136719,
|
|
"loss": 3.7524,
|
|
"margin_dpo/margin_mean": 26.312572479248047,
|
|
"margin_dpo/margin_std": 30.86597442626953,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.3183246073298429,
|
|
"fcm_dpo/beta": 0.027028188109397888,
|
|
"fcm_dpo/delta": -0.029568390920758247,
|
|
"fcm_dpo/margin": 20.28197479248047,
|
|
"fcm_dpo/q_t": 0.38840028643608093,
|
|
"grad_norm": 95.14047241210938,
|
|
"learning_rate": 4.3219201924364323e-07,
|
|
"logits/chosen": -0.8347331285476685,
|
|
"logits/rejected": -0.8374426364898682,
|
|
"logps/chosen": -276.0089416503906,
|
|
"logps/ref_chosen": -270.2470397949219,
|
|
"logps/ref_rejected": -269.7749328613281,
|
|
"logps/rejected": -295.8188781738281,
|
|
"loss": 4.3761,
|
|
"margin_dpo/margin_mean": 20.28197479248047,
|
|
"margin_dpo/margin_std": 31.723121643066406,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.3204188481675393,
|
|
"fcm_dpo/beta": 0.0251263827085495,
|
|
"fcm_dpo/delta": -0.1380881667137146,
|
|
"fcm_dpo/margin": 28.840253829956055,
|
|
"fcm_dpo/q_t": 0.34343641996383667,
|
|
"grad_norm": 84.2206039428711,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": -0.8285923004150391,
|
|
"logits/rejected": -0.8218899369239807,
|
|
"logps/chosen": -283.19036865234375,
|
|
"logps/ref_chosen": -273.779052734375,
|
|
"logps/ref_rejected": -280.9530944824219,
|
|
"logps/rejected": -319.20465087890625,
|
|
"loss": 3.6856,
|
|
"margin_dpo/margin_mean": 28.840253829956055,
|
|
"margin_dpo/margin_std": 30.283130645751953,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.3225130890052356,
|
|
"fcm_dpo/beta": 0.024389155209064484,
|
|
"fcm_dpo/delta": -0.017212260514497757,
|
|
"fcm_dpo/margin": 20.448200225830078,
|
|
"fcm_dpo/q_t": 0.3947216272354126,
|
|
"grad_norm": 89.34386444091797,
|
|
"learning_rate": 4.2966529689388064e-07,
|
|
"logits/chosen": -0.8547238707542419,
|
|
"logits/rejected": -0.841791033744812,
|
|
"logps/chosen": -301.56524658203125,
|
|
"logps/ref_chosen": -289.9031982421875,
|
|
"logps/ref_rejected": -261.5166320800781,
|
|
"logps/rejected": -293.62689208984375,
|
|
"loss": 4.4855,
|
|
"margin_dpo/margin_mean": 20.448200225830078,
|
|
"margin_dpo/margin_std": 34.4425048828125,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.32460732984293195,
|
|
"fcm_dpo/beta": 0.0245128832757473,
|
|
"fcm_dpo/delta": 0.04135804995894432,
|
|
"fcm_dpo/margin": 20.464195251464844,
|
|
"fcm_dpo/q_t": 0.3978845477104187,
|
|
"grad_norm": 100.74219512939453,
|
|
"learning_rate": 4.2838744935687716e-07,
|
|
"logits/chosen": -0.7908228635787964,
|
|
"logits/rejected": -0.7928870916366577,
|
|
"logps/chosen": -299.3333435058594,
|
|
"logps/ref_chosen": -285.8612060546875,
|
|
"logps/ref_rejected": -300.1272888183594,
|
|
"logps/rejected": -334.0636291503906,
|
|
"loss": 4.4532,
|
|
"margin_dpo/margin_mean": 20.464195251464844,
|
|
"margin_dpo/margin_std": 34.932029724121094,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.3267015706806283,
|
|
"fcm_dpo/beta": 0.024317309260368347,
|
|
"fcm_dpo/delta": -0.13548636436462402,
|
|
"fcm_dpo/margin": 29.958675384521484,
|
|
"fcm_dpo/q_t": 0.3527216911315918,
|
|
"grad_norm": 76.70926666259766,
|
|
"learning_rate": 4.271000354423425e-07,
|
|
"logits/chosen": -0.8232815265655518,
|
|
"logits/rejected": -0.8277627825737,
|
|
"logps/chosen": -291.4639587402344,
|
|
"logps/ref_chosen": -279.0354919433594,
|
|
"logps/ref_rejected": -244.2198486328125,
|
|
"logps/rejected": -286.6070251464844,
|
|
"loss": 3.9783,
|
|
"margin_dpo/margin_mean": 29.95867919921875,
|
|
"margin_dpo/margin_std": 39.6899299621582,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.3287958115183246,
|
|
"fcm_dpo/beta": 0.0227323267608881,
|
|
"fcm_dpo/delta": 0.0647030621767044,
|
|
"fcm_dpo/margin": 21.048620223999023,
|
|
"fcm_dpo/q_t": 0.39951539039611816,
|
|
"grad_norm": 85.27225494384766,
|
|
"learning_rate": 4.258031241903777e-07,
|
|
"logits/chosen": -0.8875189423561096,
|
|
"logits/rejected": -0.8885977864265442,
|
|
"logps/chosen": -287.203125,
|
|
"logps/ref_chosen": -270.830322265625,
|
|
"logps/ref_rejected": -259.08319091796875,
|
|
"logps/rejected": -296.504638671875,
|
|
"loss": 4.4747,
|
|
"margin_dpo/margin_mean": 21.048620223999023,
|
|
"margin_dpo/margin_std": 34.90514373779297,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.3308900523560209,
|
|
"fcm_dpo/beta": 0.02372920699417591,
|
|
"fcm_dpo/delta": -0.022162986919283867,
|
|
"fcm_dpo/margin": 23.346778869628906,
|
|
"fcm_dpo/q_t": 0.38233768939971924,
|
|
"grad_norm": 88.78839874267578,
|
|
"learning_rate": 4.2449678515039743e-07,
|
|
"logits/chosen": -0.8333015441894531,
|
|
"logits/rejected": -0.822943389415741,
|
|
"logps/chosen": -306.6914367675781,
|
|
"logps/ref_chosen": -289.9663391113281,
|
|
"logps/ref_rejected": -271.335693359375,
|
|
"logps/rejected": -311.4075927734375,
|
|
"loss": 4.2801,
|
|
"margin_dpo/margin_mean": 23.346778869628906,
|
|
"margin_dpo/margin_std": 34.549774169921875,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.33298429319371725,
|
|
"fcm_dpo/beta": 0.023554343730211258,
|
|
"fcm_dpo/delta": 0.04829606041312218,
|
|
"fcm_dpo/margin": 18.543855667114258,
|
|
"fcm_dpo/q_t": 0.4130256772041321,
|
|
"grad_norm": 94.4974136352539,
|
|
"learning_rate": 4.2318108837739986e-07,
|
|
"logits/chosen": -0.9109346270561218,
|
|
"logits/rejected": -0.8718158602714539,
|
|
"logps/chosen": -340.6222839355469,
|
|
"logps/ref_chosen": -321.37835693359375,
|
|
"logps/ref_rejected": -250.45652770996094,
|
|
"logps/rejected": -288.24432373046875,
|
|
"loss": 4.8111,
|
|
"margin_dpo/margin_mean": 18.54385757446289,
|
|
"margin_dpo/margin_std": 39.477230072021484,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.33507853403141363,
|
|
"fcm_dpo/beta": 0.023043226450681686,
|
|
"fcm_dpo/delta": -0.07298657298088074,
|
|
"fcm_dpo/margin": 28.90218734741211,
|
|
"fcm_dpo/q_t": 0.3582006096839905,
|
|
"grad_norm": 87.2422866821289,
|
|
"learning_rate": 4.218561044282098e-07,
|
|
"logits/chosen": -0.8484607338905334,
|
|
"logits/rejected": -0.8577648401260376,
|
|
"logps/chosen": -291.74420166015625,
|
|
"logps/ref_chosen": -276.28350830078125,
|
|
"logps/ref_rejected": -262.7477722167969,
|
|
"logps/rejected": -307.1106262207031,
|
|
"loss": 3.88,
|
|
"margin_dpo/margin_mean": 28.90218734741211,
|
|
"margin_dpo/margin_std": 33.74877166748047,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.33717277486910996,
|
|
"fcm_dpo/beta": 0.02312248945236206,
|
|
"fcm_dpo/delta": -0.00832156278192997,
|
|
"fcm_dpo/margin": 26.23219108581543,
|
|
"fcm_dpo/q_t": 0.3745940327644348,
|
|
"grad_norm": 87.6370620727539,
|
|
"learning_rate": 4.2052190435769554e-07,
|
|
"logits/chosen": -0.8637784719467163,
|
|
"logits/rejected": -0.8568350076675415,
|
|
"logps/chosen": -329.4377746582031,
|
|
"logps/ref_chosen": -310.4927978515625,
|
|
"logps/ref_rejected": -250.25347900390625,
|
|
"logps/rejected": -295.4306335449219,
|
|
"loss": 4.238,
|
|
"margin_dpo/margin_mean": 26.23219108581543,
|
|
"margin_dpo/margin_std": 39.27847671508789,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.3392670157068063,
|
|
"fcm_dpo/beta": 0.02204562909901142,
|
|
"fcm_dpo/delta": -0.00742918998003006,
|
|
"fcm_dpo/margin": 22.998626708984375,
|
|
"fcm_dpo/q_t": 0.3925955891609192,
|
|
"grad_norm": 84.41416931152344,
|
|
"learning_rate": 4.1917855971495763e-07,
|
|
"logits/chosen": -0.845470130443573,
|
|
"logits/rejected": -0.8392305374145508,
|
|
"logps/chosen": -313.5657653808594,
|
|
"logps/ref_chosen": -296.1105041503906,
|
|
"logps/ref_rejected": -253.4247589111328,
|
|
"logps/rejected": -293.8786926269531,
|
|
"loss": 4.4313,
|
|
"margin_dpo/margin_mean": 22.998626708984375,
|
|
"margin_dpo/margin_std": 37.05625534057617,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.3413612565445026,
|
|
"fcm_dpo/beta": 0.023259364068508148,
|
|
"fcm_dpo/delta": 0.029428036883473396,
|
|
"fcm_dpo/margin": 24.4699764251709,
|
|
"fcm_dpo/q_t": 0.37805965542793274,
|
|
"grad_norm": 105.51744842529297,
|
|
"learning_rate": 4.1782614253949255e-07,
|
|
"logits/chosen": -0.8878765106201172,
|
|
"logits/rejected": -0.8931166529655457,
|
|
"logps/chosen": -313.3696594238281,
|
|
"logps/ref_chosen": -293.4999084472656,
|
|
"logps/ref_rejected": -266.7116394042969,
|
|
"logps/rejected": -311.0514221191406,
|
|
"loss": 4.2088,
|
|
"margin_dpo/margin_mean": 24.4699764251709,
|
|
"margin_dpo/margin_std": 34.383941650390625,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.34345549738219894,
|
|
"fcm_dpo/beta": 0.02358204685151577,
|
|
"fcm_dpo/delta": -0.005520589649677277,
|
|
"fcm_dpo/margin": 25.577049255371094,
|
|
"fcm_dpo/q_t": 0.3782970905303955,
|
|
"grad_norm": 93.9336929321289,
|
|
"learning_rate": 4.164647253573289e-07,
|
|
"logits/chosen": -0.8413535356521606,
|
|
"logits/rejected": -0.8617441654205322,
|
|
"logps/chosen": -291.18902587890625,
|
|
"logps/ref_chosen": -267.04949951171875,
|
|
"logps/ref_rejected": -215.9768829345703,
|
|
"logps/rejected": -265.6934509277344,
|
|
"loss": 4.2881,
|
|
"margin_dpo/margin_mean": 25.577049255371094,
|
|
"margin_dpo/margin_std": 39.729583740234375,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.34554973821989526,
|
|
"fcm_dpo/beta": 0.023120472207665443,
|
|
"fcm_dpo/delta": 0.014161716215312481,
|
|
"fcm_dpo/margin": 20.357099533081055,
|
|
"fcm_dpo/q_t": 0.40322345495224,
|
|
"grad_norm": 96.0807113647461,
|
|
"learning_rate": 4.1509438117713863e-07,
|
|
"logits/chosen": -0.8848339319229126,
|
|
"logits/rejected": -0.8598626255989075,
|
|
"logps/chosen": -296.1021728515625,
|
|
"logps/ref_chosen": -278.06146240234375,
|
|
"logps/ref_rejected": -260.4288635253906,
|
|
"logps/rejected": -298.82666015625,
|
|
"loss": 4.5232,
|
|
"margin_dpo/margin_mean": 20.357099533081055,
|
|
"margin_dpo/margin_std": 35.15179443359375,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.34764397905759165,
|
|
"fcm_dpo/beta": 0.02365921624004841,
|
|
"fcm_dpo/delta": 0.07884444296360016,
|
|
"fcm_dpo/margin": 22.211210250854492,
|
|
"fcm_dpo/q_t": 0.3956853151321411,
|
|
"grad_norm": 100.91581726074219,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": -0.8429009914398193,
|
|
"logits/rejected": -0.8110395669937134,
|
|
"logps/chosen": -292.6982727050781,
|
|
"logps/ref_chosen": -275.9490661621094,
|
|
"logps/ref_rejected": -232.13473510742188,
|
|
"logps/rejected": -271.09515380859375,
|
|
"loss": 4.4726,
|
|
"margin_dpo/margin_mean": 22.211214065551758,
|
|
"margin_dpo/margin_std": 38.937843322753906,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.34973821989528797,
|
|
"fcm_dpo/beta": 0.025131061673164368,
|
|
"fcm_dpo/delta": 0.01401679590344429,
|
|
"fcm_dpo/margin": 23.275146484375,
|
|
"fcm_dpo/q_t": 0.382481187582016,
|
|
"grad_norm": 97.39994049072266,
|
|
"learning_rate": 4.123272062470633e-07,
|
|
"logits/chosen": -0.8488789796829224,
|
|
"logits/rejected": -0.8377172946929932,
|
|
"logps/chosen": -299.45098876953125,
|
|
"logps/ref_chosen": -280.5514221191406,
|
|
"logps/ref_rejected": -255.2896728515625,
|
|
"logps/rejected": -297.46441650390625,
|
|
"loss": 4.4208,
|
|
"margin_dpo/margin_mean": 23.275146484375,
|
|
"margin_dpo/margin_std": 39.44821548461914,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.3518324607329843,
|
|
"fcm_dpo/beta": 0.023984873667359352,
|
|
"fcm_dpo/delta": -0.06481810659170151,
|
|
"fcm_dpo/margin": 25.343101501464844,
|
|
"fcm_dpo/q_t": 0.3719956874847412,
|
|
"grad_norm": 296.59173583984375,
|
|
"learning_rate": 4.1093052389237174e-07,
|
|
"logits/chosen": -0.8263663649559021,
|
|
"logits/rejected": -0.8027467727661133,
|
|
"logps/chosen": -334.6053771972656,
|
|
"logps/ref_chosen": -315.7982177734375,
|
|
"logps/ref_rejected": -291.48406982421875,
|
|
"logps/rejected": -335.63433837890625,
|
|
"loss": 4.4031,
|
|
"margin_dpo/margin_mean": 25.34310531616211,
|
|
"margin_dpo/margin_std": 42.009727478027344,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.3539267015706806,
|
|
"fcm_dpo/beta": 0.022055521607398987,
|
|
"fcm_dpo/delta": -0.17693692445755005,
|
|
"fcm_dpo/margin": 34.57185363769531,
|
|
"fcm_dpo/q_t": 0.34355735778808594,
|
|
"grad_norm": 79.44908905029297,
|
|
"learning_rate": 4.0952521132208267e-07,
|
|
"logits/chosen": -0.8222439885139465,
|
|
"logits/rejected": -0.8391299843788147,
|
|
"logps/chosen": -275.7101745605469,
|
|
"logps/ref_chosen": -261.06427001953125,
|
|
"logps/ref_rejected": -235.40663146972656,
|
|
"logps/rejected": -284.6243896484375,
|
|
"loss": 3.6944,
|
|
"margin_dpo/margin_mean": 34.57185363769531,
|
|
"margin_dpo/margin_std": 37.923160552978516,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.35602094240837695,
|
|
"fcm_dpo/beta": 0.02063800022006035,
|
|
"fcm_dpo/delta": 0.05438760668039322,
|
|
"fcm_dpo/margin": 26.59862518310547,
|
|
"fcm_dpo/q_t": 0.3886667490005493,
|
|
"grad_norm": 96.43052673339844,
|
|
"learning_rate": 4.081113438988443e-07,
|
|
"logits/chosen": -0.7964289784431458,
|
|
"logits/rejected": -0.7981937527656555,
|
|
"logps/chosen": -324.3586120605469,
|
|
"logps/ref_chosen": -308.96722412109375,
|
|
"logps/ref_rejected": -263.8466796875,
|
|
"logps/rejected": -305.836669921875,
|
|
"loss": 4.3947,
|
|
"margin_dpo/margin_mean": 26.598623275756836,
|
|
"margin_dpo/margin_std": 45.38837432861328,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3581151832460733,
|
|
"fcm_dpo/beta": 0.020479857921600342,
|
|
"fcm_dpo/delta": -0.09359031170606613,
|
|
"fcm_dpo/margin": 30.383586883544922,
|
|
"fcm_dpo/q_t": 0.36607781052589417,
|
|
"grad_norm": 93.717529296875,
|
|
"learning_rate": 4.0668899744407567e-07,
|
|
"logits/chosen": -0.8218968510627747,
|
|
"logits/rejected": -0.8354977369308472,
|
|
"logps/chosen": -269.9103698730469,
|
|
"logps/ref_chosen": -258.8890380859375,
|
|
"logps/ref_rejected": -262.19140625,
|
|
"logps/rejected": -303.5963439941406,
|
|
"loss": 3.9434,
|
|
"margin_dpo/margin_mean": 30.383586883544922,
|
|
"margin_dpo/margin_std": 35.17938995361328,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.36020942408376966,
|
|
"fcm_dpo/beta": 0.021176544949412346,
|
|
"fcm_dpo/delta": 0.16231057047843933,
|
|
"fcm_dpo/margin": 16.054502487182617,
|
|
"fcm_dpo/q_t": 0.42630359530448914,
|
|
"grad_norm": 96.79520416259766,
|
|
"learning_rate": 4.0525824823390043e-07,
|
|
"logits/chosen": -0.8345335125923157,
|
|
"logits/rejected": -0.853988766670227,
|
|
"logps/chosen": -352.2255554199219,
|
|
"logps/ref_chosen": -339.0223388671875,
|
|
"logps/ref_rejected": -295.78759765625,
|
|
"logps/rejected": -325.0453186035156,
|
|
"loss": 4.8841,
|
|
"margin_dpo/margin_mean": 16.054502487182617,
|
|
"margin_dpo/margin_std": 36.764705657958984,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.362303664921466,
|
|
"fcm_dpo/beta": 0.023571645841002464,
|
|
"fcm_dpo/delta": 0.06804777681827545,
|
|
"fcm_dpo/margin": 22.532241821289062,
|
|
"fcm_dpo/q_t": 0.3934495151042938,
|
|
"grad_norm": 84.94215393066406,
|
|
"learning_rate": 4.0381917299505686e-07,
|
|
"logits/chosen": -0.8429185748100281,
|
|
"logits/rejected": -0.8447529077529907,
|
|
"logps/chosen": -313.66534423828125,
|
|
"logps/ref_chosen": -300.1114501953125,
|
|
"logps/ref_rejected": -273.78460693359375,
|
|
"logps/rejected": -309.87078857421875,
|
|
"loss": 4.4559,
|
|
"margin_dpo/margin_mean": 22.532241821289062,
|
|
"margin_dpo/margin_std": 38.33403015136719,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.3643979057591623,
|
|
"fcm_dpo/beta": 0.023526517674326897,
|
|
"fcm_dpo/delta": -0.038947440683841705,
|
|
"fcm_dpo/margin": 27.008258819580078,
|
|
"fcm_dpo/q_t": 0.3678101897239685,
|
|
"grad_norm": 109.56539154052734,
|
|
"learning_rate": 4.0237184890078243e-07,
|
|
"logits/chosen": -0.8134390711784363,
|
|
"logits/rejected": -0.8019281625747681,
|
|
"logps/chosen": -348.16650390625,
|
|
"logps/ref_chosen": -335.0538635253906,
|
|
"logps/ref_rejected": -257.4646911621094,
|
|
"logps/rejected": -297.5855407714844,
|
|
"loss": 4.0733,
|
|
"margin_dpo/margin_mean": 27.008256912231445,
|
|
"margin_dpo/margin_std": 36.92762756347656,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.36649214659685864,
|
|
"fcm_dpo/beta": 0.023315949365496635,
|
|
"fcm_dpo/delta": -0.02021496742963791,
|
|
"fcm_dpo/margin": 26.327842712402344,
|
|
"fcm_dpo/q_t": 0.3810538947582245,
|
|
"grad_norm": 105.11174011230469,
|
|
"learning_rate": 4.00916353566676e-07,
|
|
"logits/chosen": -0.8290956616401672,
|
|
"logits/rejected": -0.8322280645370483,
|
|
"logps/chosen": -303.4194030761719,
|
|
"logps/ref_chosen": -284.39556884765625,
|
|
"logps/ref_rejected": -283.3876647949219,
|
|
"logps/rejected": -328.7392883300781,
|
|
"loss": 4.3747,
|
|
"margin_dpo/margin_mean": 26.327844619750977,
|
|
"margin_dpo/margin_std": 42.5020637512207,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.36858638743455496,
|
|
"fcm_dpo/beta": 0.023749521002173424,
|
|
"fcm_dpo/delta": 0.04947128891944885,
|
|
"fcm_dpo/margin": 20.36212158203125,
|
|
"fcm_dpo/q_t": 0.40329134464263916,
|
|
"grad_norm": 95.4178695678711,
|
|
"learning_rate": 3.994527650465352e-07,
|
|
"logits/chosen": -0.7997909784317017,
|
|
"logits/rejected": -0.8140876293182373,
|
|
"logps/chosen": -271.2232360839844,
|
|
"logps/ref_chosen": -251.81280517578125,
|
|
"logps/ref_rejected": -242.05328369140625,
|
|
"logps/rejected": -281.8258361816406,
|
|
"loss": 4.8359,
|
|
"margin_dpo/margin_mean": 20.36212158203125,
|
|
"margin_dpo/margin_std": 43.5911750793457,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.3706806282722513,
|
|
"fcm_dpo/beta": 0.023227877914905548,
|
|
"fcm_dpo/delta": -0.04320107400417328,
|
|
"fcm_dpo/margin": 20.517908096313477,
|
|
"fcm_dpo/q_t": 0.40150418877601624,
|
|
"grad_norm": 95.16880798339844,
|
|
"learning_rate": 3.979811618281705e-07,
|
|
"logits/chosen": -0.8828033804893494,
|
|
"logits/rejected": -0.8596282005310059,
|
|
"logps/chosen": -318.2162780761719,
|
|
"logps/ref_chosen": -298.6463928222656,
|
|
"logps/ref_rejected": -295.66534423828125,
|
|
"logps/rejected": -335.75311279296875,
|
|
"loss": 4.7767,
|
|
"margin_dpo/margin_mean": 20.517908096313477,
|
|
"margin_dpo/margin_std": 41.196895599365234,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.37277486910994767,
|
|
"fcm_dpo/beta": 0.02242261730134487,
|
|
"fcm_dpo/delta": -0.029734821990132332,
|
|
"fcm_dpo/margin": 27.962230682373047,
|
|
"fcm_dpo/q_t": 0.3739369809627533,
|
|
"grad_norm": 87.00016021728516,
|
|
"learning_rate": 3.9650162282919654e-07,
|
|
"logits/chosen": -0.7981923222541809,
|
|
"logits/rejected": -0.7972285747528076,
|
|
"logps/chosen": -301.7319641113281,
|
|
"logps/ref_chosen": -286.2576599121094,
|
|
"logps/ref_rejected": -243.97491455078125,
|
|
"logps/rejected": -287.41143798828125,
|
|
"loss": 4.1371,
|
|
"margin_dpo/margin_mean": 27.962230682373047,
|
|
"margin_dpo/margin_std": 40.20293426513672,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.374869109947644,
|
|
"fcm_dpo/beta": 0.021783435717225075,
|
|
"fcm_dpo/delta": -0.04039537161588669,
|
|
"fcm_dpo/margin": 23.8038272857666,
|
|
"fcm_dpo/q_t": 0.392859548330307,
|
|
"grad_norm": 93.1056137084961,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": -0.7682486772537231,
|
|
"logits/rejected": -0.7733548283576965,
|
|
"logps/chosen": -276.9896240234375,
|
|
"logps/ref_chosen": -259.737060546875,
|
|
"logps/ref_rejected": -277.8813171386719,
|
|
"logps/rejected": -318.9377136230469,
|
|
"loss": 4.5235,
|
|
"margin_dpo/margin_mean": 23.80382537841797,
|
|
"margin_dpo/margin_std": 41.399452209472656,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.3769633507853403,
|
|
"fcm_dpo/beta": 0.02132536470890045,
|
|
"fcm_dpo/delta": -0.050105344504117966,
|
|
"fcm_dpo/margin": 28.025800704956055,
|
|
"fcm_dpo/q_t": 0.378864049911499,
|
|
"grad_norm": 80.81954956054688,
|
|
"learning_rate": 3.935190552834828e-07,
|
|
"logits/chosen": -0.8179333209991455,
|
|
"logits/rejected": -0.8522875905036926,
|
|
"logps/chosen": -284.9200744628906,
|
|
"logps/ref_chosen": -267.30889892578125,
|
|
"logps/ref_rejected": -230.4376983642578,
|
|
"logps/rejected": -276.0746765136719,
|
|
"loss": 4.1849,
|
|
"margin_dpo/margin_mean": 28.025800704956055,
|
|
"margin_dpo/margin_std": 40.71231460571289,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.37905759162303665,
|
|
"fcm_dpo/beta": 0.021433616057038307,
|
|
"fcm_dpo/delta": 0.09068157523870468,
|
|
"fcm_dpo/margin": 23.975404739379883,
|
|
"fcm_dpo/q_t": 0.39596718549728394,
|
|
"grad_norm": 105.24143981933594,
|
|
"learning_rate": 3.920161866827889e-07,
|
|
"logits/chosen": -0.8095259666442871,
|
|
"logits/rejected": -0.8213891386985779,
|
|
"logps/chosen": -321.52716064453125,
|
|
"logps/ref_chosen": -300.49139404296875,
|
|
"logps/ref_rejected": -278.98284912109375,
|
|
"logps/rejected": -323.9939880371094,
|
|
"loss": 4.5794,
|
|
"margin_dpo/margin_mean": 23.97540283203125,
|
|
"margin_dpo/margin_std": 44.497955322265625,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.381151832460733,
|
|
"fcm_dpo/beta": 0.02187720127403736,
|
|
"fcm_dpo/delta": -0.12704817950725555,
|
|
"fcm_dpo/margin": 32.94600296020508,
|
|
"fcm_dpo/q_t": 0.350864440202713,
|
|
"grad_norm": 94.21673583984375,
|
|
"learning_rate": 3.90505702185e-07,
|
|
"logits/chosen": -0.7871803045272827,
|
|
"logits/rejected": -0.8218678832054138,
|
|
"logps/chosen": -297.6783142089844,
|
|
"logps/ref_chosen": -279.4981689453125,
|
|
"logps/ref_rejected": -263.6926574707031,
|
|
"logps/rejected": -314.8188171386719,
|
|
"loss": 3.8389,
|
|
"margin_dpo/margin_mean": 32.94600296020508,
|
|
"margin_dpo/margin_std": 39.00600051879883,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.3832460732984293,
|
|
"fcm_dpo/beta": 0.020077742636203766,
|
|
"fcm_dpo/delta": 0.016617465764284134,
|
|
"fcm_dpo/margin": 29.090024948120117,
|
|
"fcm_dpo/q_t": 0.38179779052734375,
|
|
"grad_norm": 83.8680191040039,
|
|
"learning_rate": 3.889876827928156e-07,
|
|
"logits/chosen": -0.842463493347168,
|
|
"logits/rejected": -0.8533914685249329,
|
|
"logps/chosen": -289.95166015625,
|
|
"logps/ref_chosen": -271.2057189941406,
|
|
"logps/ref_rejected": -243.91549682617188,
|
|
"logps/rejected": -291.75146484375,
|
|
"loss": 4.2765,
|
|
"margin_dpo/margin_mean": 29.090024948120117,
|
|
"margin_dpo/margin_std": 45.345638275146484,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.38534031413612563,
|
|
"fcm_dpo/beta": 0.018667876720428467,
|
|
"fcm_dpo/delta": -0.12012484669685364,
|
|
"fcm_dpo/margin": 37.77571487426758,
|
|
"fcm_dpo/q_t": 0.35291537642478943,
|
|
"grad_norm": 92.6821060180664,
|
|
"learning_rate": 3.874622099130087e-07,
|
|
"logits/chosen": -0.8658108711242676,
|
|
"logits/rejected": -0.8556749820709229,
|
|
"logps/chosen": -331.8802185058594,
|
|
"logps/ref_chosen": -318.4457702636719,
|
|
"logps/ref_rejected": -266.640869140625,
|
|
"logps/rejected": -317.8509826660156,
|
|
"loss": 3.8875,
|
|
"margin_dpo/margin_mean": 37.77571487426758,
|
|
"margin_dpo/margin_std": 46.467491149902344,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.387434554973822,
|
|
"fcm_dpo/beta": 0.018280260264873505,
|
|
"fcm_dpo/delta": -0.017129220068454742,
|
|
"fcm_dpo/margin": 31.00004768371582,
|
|
"fcm_dpo/q_t": 0.382385790348053,
|
|
"grad_norm": 80.60724639892578,
|
|
"learning_rate": 3.859293653520604e-07,
|
|
"logits/chosen": -0.854312539100647,
|
|
"logits/rejected": -0.8550869226455688,
|
|
"logps/chosen": -296.79412841796875,
|
|
"logps/ref_chosen": -274.308837890625,
|
|
"logps/ref_rejected": -260.7274169921875,
|
|
"logps/rejected": -314.2127685546875,
|
|
"loss": 4.2308,
|
|
"margin_dpo/margin_mean": 31.000051498413086,
|
|
"margin_dpo/margin_std": 45.35227584838867,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.38952879581151834,
|
|
"fcm_dpo/beta": 0.018405750393867493,
|
|
"fcm_dpo/delta": 0.0071517787873744965,
|
|
"fcm_dpo/margin": 29.293418884277344,
|
|
"fcm_dpo/q_t": 0.3844657838344574,
|
|
"grad_norm": 82.40447998046875,
|
|
"learning_rate": 3.8438923131177237e-07,
|
|
"logits/chosen": -0.8595123291015625,
|
|
"logits/rejected": -0.8700802326202393,
|
|
"logps/chosen": -321.4861145019531,
|
|
"logps/ref_chosen": -299.00537109375,
|
|
"logps/ref_rejected": -274.4014587402344,
|
|
"logps/rejected": -326.1756286621094,
|
|
"loss": 4.2669,
|
|
"margin_dpo/margin_mean": 29.293418884277344,
|
|
"margin_dpo/margin_std": 41.822120666503906,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.39162303664921466,
|
|
"fcm_dpo/beta": 0.01949167065322399,
|
|
"fcm_dpo/delta": 0.11492104828357697,
|
|
"fcm_dpo/margin": 25.117904663085938,
|
|
"fcm_dpo/q_t": 0.39704573154449463,
|
|
"grad_norm": 108.40086364746094,
|
|
"learning_rate": 3.828418903848593e-07,
|
|
"logits/chosen": -0.8057087659835815,
|
|
"logits/rejected": -0.800156831741333,
|
|
"logps/chosen": -356.62225341796875,
|
|
"logps/ref_chosen": -329.8253173828125,
|
|
"logps/ref_rejected": -263.73175048828125,
|
|
"logps/rejected": -315.6466064453125,
|
|
"loss": 4.6723,
|
|
"margin_dpo/margin_mean": 25.117904663085938,
|
|
"margin_dpo/margin_std": 48.73664093017578,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.393717277486911,
|
|
"fcm_dpo/beta": 0.01967058703303337,
|
|
"fcm_dpo/delta": -0.03304888680577278,
|
|
"fcm_dpo/margin": 29.974576950073242,
|
|
"fcm_dpo/q_t": 0.38121888041496277,
|
|
"grad_norm": 85.1061019897461,
|
|
"learning_rate": 3.812874255505191e-07,
|
|
"logits/chosen": -0.8419395089149475,
|
|
"logits/rejected": -0.8400317430496216,
|
|
"logps/chosen": -289.6829528808594,
|
|
"logps/ref_chosen": -263.005615234375,
|
|
"logps/ref_rejected": -247.08668518066406,
|
|
"logps/rejected": -303.7385559082031,
|
|
"loss": 4.4389,
|
|
"margin_dpo/margin_mean": 29.974576950073242,
|
|
"margin_dpo/margin_std": 50.47289276123047,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.3958115183246073,
|
|
"fcm_dpo/beta": 0.018633361905813217,
|
|
"fcm_dpo/delta": -0.060549549758434296,
|
|
"fcm_dpo/margin": 35.01060104370117,
|
|
"fcm_dpo/q_t": 0.3630969822406769,
|
|
"grad_norm": 82.80532836914062,
|
|
"learning_rate": 3.797259201699833e-07,
|
|
"logits/chosen": -0.859175443649292,
|
|
"logits/rejected": -0.8690008521080017,
|
|
"logps/chosen": -291.63153076171875,
|
|
"logps/ref_chosen": -272.96038818359375,
|
|
"logps/ref_rejected": -275.13238525390625,
|
|
"logps/rejected": -328.81414794921875,
|
|
"loss": 3.9104,
|
|
"margin_dpo/margin_mean": 35.01060104370117,
|
|
"margin_dpo/margin_std": 41.501155853271484,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.39790575916230364,
|
|
"fcm_dpo/beta": 0.018668456003069878,
|
|
"fcm_dpo/delta": 0.0034092608839273453,
|
|
"fcm_dpo/margin": 31.924047470092773,
|
|
"fcm_dpo/q_t": 0.3739194869995117,
|
|
"grad_norm": 86.3962173461914,
|
|
"learning_rate": 3.781574579820464e-07,
|
|
"logits/chosen": -0.8613168597221375,
|
|
"logits/rejected": -0.8277738094329834,
|
|
"logps/chosen": -275.919677734375,
|
|
"logps/ref_chosen": -257.79754638671875,
|
|
"logps/ref_rejected": -225.2164306640625,
|
|
"logps/rejected": -275.2625732421875,
|
|
"loss": 4.0864,
|
|
"margin_dpo/margin_mean": 31.924047470092773,
|
|
"margin_dpo/margin_std": 42.496273040771484,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"fcm_dpo/beta": 0.018610456958413124,
|
|
"fcm_dpo/delta": -0.014111967757344246,
|
|
"fcm_dpo/margin": 31.048202514648438,
|
|
"fcm_dpo/q_t": 0.3805280923843384,
|
|
"grad_norm": 87.75660705566406,
|
|
"learning_rate": 3.765821230985757e-07,
|
|
"logits/chosen": -0.8736047148704529,
|
|
"logits/rejected": -0.8768740296363831,
|
|
"logps/chosen": -260.75518798828125,
|
|
"logps/ref_chosen": -243.8585205078125,
|
|
"logps/ref_rejected": -245.12136840820312,
|
|
"logps/rejected": -293.0662536621094,
|
|
"loss": 4.2548,
|
|
"margin_dpo/margin_mean": 31.048202514648438,
|
|
"margin_dpo/margin_std": 46.76060104370117,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.40209424083769635,
|
|
"fcm_dpo/beta": 0.018671073019504547,
|
|
"fcm_dpo/delta": 0.009804993867874146,
|
|
"fcm_dpo/margin": 25.756927490234375,
|
|
"fcm_dpo/q_t": 0.39886969327926636,
|
|
"grad_norm": 83.8148193359375,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -0.825681746006012,
|
|
"logits/rejected": -0.8136826157569885,
|
|
"logps/chosen": -289.8357238769531,
|
|
"logps/ref_chosen": -266.9799499511719,
|
|
"logps/ref_rejected": -260.1697082519531,
|
|
"logps/rejected": -308.78240966796875,
|
|
"loss": 4.5612,
|
|
"margin_dpo/margin_mean": 25.756927490234375,
|
|
"margin_dpo/margin_std": 45.346221923828125,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.4041884816753927,
|
|
"fcm_dpo/beta": 0.018258847296237946,
|
|
"fcm_dpo/delta": -0.021077796816825867,
|
|
"fcm_dpo/margin": 30.8725643157959,
|
|
"fcm_dpo/q_t": 0.38373884558677673,
|
|
"grad_norm": 91.01241302490234,
|
|
"learning_rate": 3.734111735307796e-07,
|
|
"logits/chosen": -0.8784509897232056,
|
|
"logits/rejected": -0.8553139567375183,
|
|
"logps/chosen": -308.2591247558594,
|
|
"logps/ref_chosen": -280.25323486328125,
|
|
"logps/ref_rejected": -291.0348815917969,
|
|
"logps/rejected": -349.9133605957031,
|
|
"loss": 4.3134,
|
|
"margin_dpo/margin_mean": 30.872562408447266,
|
|
"margin_dpo/margin_std": 47.93418884277344,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.406282722513089,
|
|
"fcm_dpo/beta": 0.019326101988554,
|
|
"fcm_dpo/delta": 0.08527359366416931,
|
|
"fcm_dpo/margin": 23.81899070739746,
|
|
"fcm_dpo/q_t": 0.4056922197341919,
|
|
"grad_norm": 106.6082992553711,
|
|
"learning_rate": 3.7181572889485623e-07,
|
|
"logits/chosen": -0.8528724908828735,
|
|
"logits/rejected": -0.8473402261734009,
|
|
"logps/chosen": -318.2233581542969,
|
|
"logps/ref_chosen": -288.13946533203125,
|
|
"logps/ref_rejected": -251.31529235839844,
|
|
"logps/rejected": -305.2181701660156,
|
|
"loss": 4.5547,
|
|
"margin_dpo/margin_mean": 23.818988800048828,
|
|
"margin_dpo/margin_std": 42.86112594604492,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.4083769633507853,
|
|
"fcm_dpo/beta": 0.020995743572711945,
|
|
"fcm_dpo/delta": 0.09806863218545914,
|
|
"fcm_dpo/margin": 21.186416625976562,
|
|
"fcm_dpo/q_t": 0.41219669580459595,
|
|
"grad_norm": 105.31787872314453,
|
|
"learning_rate": 3.7021375165108377e-07,
|
|
"logits/chosen": -0.8652254343032837,
|
|
"logits/rejected": -0.8719401359558105,
|
|
"logps/chosen": -305.6102600097656,
|
|
"logps/ref_chosen": -274.0006408691406,
|
|
"logps/ref_rejected": -280.22723388671875,
|
|
"logps/rejected": -333.0232849121094,
|
|
"loss": 4.659,
|
|
"margin_dpo/margin_mean": 21.186416625976562,
|
|
"margin_dpo/margin_std": 41.24464797973633,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.41047120418848165,
|
|
"fcm_dpo/beta": 0.021107617765665054,
|
|
"fcm_dpo/delta": -0.02604127675294876,
|
|
"fcm_dpo/margin": 29.49114227294922,
|
|
"fcm_dpo/q_t": 0.37593233585357666,
|
|
"grad_norm": 109.97003173828125,
|
|
"learning_rate": 3.6860532770864005e-07,
|
|
"logits/chosen": -0.8447614908218384,
|
|
"logits/rejected": -0.8570613861083984,
|
|
"logps/chosen": -298.1605529785156,
|
|
"logps/ref_chosen": -274.90069580078125,
|
|
"logps/ref_rejected": -248.7281951904297,
|
|
"logps/rejected": -301.47918701171875,
|
|
"loss": 4.2574,
|
|
"margin_dpo/margin_mean": 29.49114227294922,
|
|
"margin_dpo/margin_std": 46.1149787902832,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.41256544502617803,
|
|
"fcm_dpo/beta": 0.02037704363465309,
|
|
"fcm_dpo/delta": -0.11450602114200592,
|
|
"fcm_dpo/margin": 34.678550720214844,
|
|
"fcm_dpo/q_t": 0.35536617040634155,
|
|
"grad_norm": 116.41548156738281,
|
|
"learning_rate": 3.6699054332241985e-07,
|
|
"logits/chosen": -0.8692039847373962,
|
|
"logits/rejected": -0.8588843941688538,
|
|
"logps/chosen": -335.359375,
|
|
"logps/ref_chosen": -309.5348205566406,
|
|
"logps/ref_rejected": -264.3179931640625,
|
|
"logps/rejected": -324.8210754394531,
|
|
"loss": 3.9332,
|
|
"margin_dpo/margin_mean": 34.678550720214844,
|
|
"margin_dpo/margin_std": 43.28546142578125,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.41465968586387436,
|
|
"fcm_dpo/beta": 0.0187942273914814,
|
|
"fcm_dpo/delta": -0.014170356094837189,
|
|
"fcm_dpo/margin": 32.54724884033203,
|
|
"fcm_dpo/q_t": 0.3783041536808014,
|
|
"grad_norm": 99.18403625488281,
|
|
"learning_rate": 3.653694850884091e-07,
|
|
"logits/chosen": -0.8634573221206665,
|
|
"logits/rejected": -0.841856062412262,
|
|
"logps/chosen": -326.5914306640625,
|
|
"logps/ref_chosen": -301.0134582519531,
|
|
"logps/ref_rejected": -292.84185791015625,
|
|
"logps/rejected": -350.9670715332031,
|
|
"loss": 4.31,
|
|
"margin_dpo/margin_mean": 32.5472526550293,
|
|
"margin_dpo/margin_std": 51.99414825439453,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.4167539267015707,
|
|
"fcm_dpo/beta": 0.01868726871907711,
|
|
"fcm_dpo/delta": -0.040653832256793976,
|
|
"fcm_dpo/margin": 31.534244537353516,
|
|
"fcm_dpo/q_t": 0.3784925043582916,
|
|
"grad_norm": 91.59637451171875,
|
|
"learning_rate": 3.6374223993904124e-07,
|
|
"logits/chosen": -0.8504621982574463,
|
|
"logits/rejected": -0.8154540061950684,
|
|
"logps/chosen": -290.4877014160156,
|
|
"logps/ref_chosen": -264.6058654785156,
|
|
"logps/ref_rejected": -214.9014892578125,
|
|
"logps/rejected": -272.31756591796875,
|
|
"loss": 4.1757,
|
|
"margin_dpo/margin_mean": 31.534244537353516,
|
|
"margin_dpo/margin_std": 45.6278190612793,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"fcm_dpo/beta": 0.018357042223215103,
|
|
"fcm_dpo/delta": 0.04033544659614563,
|
|
"fcm_dpo/margin": 28.317873001098633,
|
|
"fcm_dpo/q_t": 0.39727315306663513,
|
|
"grad_norm": 104.42108917236328,
|
|
"learning_rate": 3.621088951385353e-07,
|
|
"logits/chosen": -0.8921913504600525,
|
|
"logits/rejected": -0.8735958337783813,
|
|
"logps/chosen": -352.2391662597656,
|
|
"logps/ref_chosen": -324.1588134765625,
|
|
"logps/ref_rejected": -277.80218505859375,
|
|
"logps/rejected": -334.200439453125,
|
|
"loss": 4.6087,
|
|
"margin_dpo/margin_mean": 28.31787872314453,
|
|
"margin_dpo/margin_std": 53.46382522583008,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"eval_fcm_dpo/beta": 0.018857382237911224,
|
|
"eval_logits/chosen": -0.8679316639900208,
|
|
"eval_logits/rejected": -0.8609716296195984,
|
|
"eval_logps/chosen": -320.89276123046875,
|
|
"eval_logps/ref_chosen": -287.8267517089844,
|
|
"eval_logps/ref_rejected": -266.9313659667969,
|
|
"eval_logps/rejected": -329.564697265625,
|
|
"eval_loss": 0.5497193336486816,
|
|
"eval_margin_dpo/margin_mean": 29.56734848022461,
|
|
"eval_margin_dpo/margin_std": 48.380184173583984,
|
|
"eval_runtime": 81.4797,
|
|
"eval_samples_per_second": 24.546,
|
|
"eval_steps_per_second": 1.534,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.42094240837696334,
|
|
"fcm_dpo/beta": 0.019249822944402695,
|
|
"fcm_dpo/delta": -0.007784634828567505,
|
|
"fcm_dpo/margin": 31.416568756103516,
|
|
"fcm_dpo/q_t": 0.3744759261608124,
|
|
"grad_norm": 98.83305358886719,
|
|
"learning_rate": 3.604695382782159e-07,
|
|
"logits/chosen": -0.8689364194869995,
|
|
"logits/rejected": -0.8637883067131042,
|
|
"logps/chosen": -304.6473388671875,
|
|
"logps/ref_chosen": -271.49566650390625,
|
|
"logps/ref_rejected": -245.71414184570312,
|
|
"logps/rejected": -310.2823791503906,
|
|
"loss": 4.2192,
|
|
"margin_dpo/margin_mean": 31.416568756103516,
|
|
"margin_dpo/margin_std": 46.150325775146484,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.42303664921465967,
|
|
"fcm_dpo/beta": 0.018503909930586815,
|
|
"fcm_dpo/delta": -0.041275542229413986,
|
|
"fcm_dpo/margin": 31.27553939819336,
|
|
"fcm_dpo/q_t": 0.3803809881210327,
|
|
"grad_norm": 98.9993667602539,
|
|
"learning_rate": 3.588242572718162e-07,
|
|
"logits/chosen": -0.8732012510299683,
|
|
"logits/rejected": -0.8661995530128479,
|
|
"logps/chosen": -304.1346435546875,
|
|
"logps/ref_chosen": -272.0979309082031,
|
|
"logps/ref_rejected": -235.94805908203125,
|
|
"logps/rejected": -299.2603454589844,
|
|
"loss": 4.3415,
|
|
"margin_dpo/margin_mean": 31.27553939819336,
|
|
"margin_dpo/margin_std": 48.76369094848633,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.42513089005235605,
|
|
"fcm_dpo/beta": 0.01837236061692238,
|
|
"fcm_dpo/delta": 0.05643375590443611,
|
|
"fcm_dpo/margin": 24.213150024414062,
|
|
"fcm_dpo/q_t": 0.40584272146224976,
|
|
"grad_norm": 100.67874145507812,
|
|
"learning_rate": 3.571731403507635e-07,
|
|
"logits/chosen": -0.850642204284668,
|
|
"logits/rejected": -0.8625622391700745,
|
|
"logps/chosen": -318.47943115234375,
|
|
"logps/ref_chosen": -280.2221374511719,
|
|
"logps/ref_rejected": -251.79798889160156,
|
|
"logps/rejected": -314.2684326171875,
|
|
"loss": 4.5736,
|
|
"margin_dpo/margin_mean": 24.213150024414062,
|
|
"margin_dpo/margin_std": 43.66739273071289,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.4272251308900524,
|
|
"fcm_dpo/beta": 0.018126487731933594,
|
|
"fcm_dpo/delta": -0.06936343759298325,
|
|
"fcm_dpo/margin": 36.58906173706055,
|
|
"fcm_dpo/q_t": 0.3620806634426117,
|
|
"grad_norm": 95.67644500732422,
|
|
"learning_rate": 3.5551627605944746e-07,
|
|
"logits/chosen": -0.8942813277244568,
|
|
"logits/rejected": -0.8736305236816406,
|
|
"logps/chosen": -348.34130859375,
|
|
"logps/ref_chosen": -318.7960510253906,
|
|
"logps/ref_rejected": -269.69921875,
|
|
"logps/rejected": -335.83355712890625,
|
|
"loss": 3.9389,
|
|
"margin_dpo/margin_mean": 36.58906173706055,
|
|
"margin_dpo/margin_std": 46.73650360107422,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.4293193717277487,
|
|
"fcm_dpo/beta": 0.01768399402499199,
|
|
"fcm_dpo/delta": -0.04656511917710304,
|
|
"fcm_dpo/margin": 36.3321418762207,
|
|
"fcm_dpo/q_t": 0.36954307556152344,
|
|
"grad_norm": 89.59551239013672,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": -0.8388100862503052,
|
|
"logits/rejected": -0.8084380626678467,
|
|
"logps/chosen": -316.36187744140625,
|
|
"logps/ref_chosen": -283.7620544433594,
|
|
"logps/ref_rejected": -297.69439697265625,
|
|
"logps/rejected": -366.6263732910156,
|
|
"loss": 4.04,
|
|
"margin_dpo/margin_mean": 36.3321418762207,
|
|
"margin_dpo/margin_std": 49.33777618408203,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.431413612565445,
|
|
"fcm_dpo/beta": 0.017556358128786087,
|
|
"fcm_dpo/delta": 0.019014529883861542,
|
|
"fcm_dpo/margin": 30.180171966552734,
|
|
"fcm_dpo/q_t": 0.3905620276927948,
|
|
"grad_norm": 98.87091827392578,
|
|
"learning_rate": 3.5218566107988867e-07,
|
|
"logits/chosen": -0.8614488840103149,
|
|
"logits/rejected": -0.8843433260917664,
|
|
"logps/chosen": -330.2857971191406,
|
|
"logps/ref_chosen": -293.66387939453125,
|
|
"logps/ref_rejected": -291.3056640625,
|
|
"logps/rejected": -358.1078186035156,
|
|
"loss": 4.453,
|
|
"margin_dpo/margin_mean": 30.18017578125,
|
|
"margin_dpo/margin_std": 50.2305908203125,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.43350785340314135,
|
|
"fcm_dpo/beta": 0.017863312736153603,
|
|
"fcm_dpo/delta": 0.02159544639289379,
|
|
"fcm_dpo/margin": 29.29644775390625,
|
|
"fcm_dpo/q_t": 0.39205509424209595,
|
|
"grad_norm": 100.55647277832031,
|
|
"learning_rate": 3.505120890024195e-07,
|
|
"logits/chosen": -0.8120275735855103,
|
|
"logits/rejected": -0.8208277821540833,
|
|
"logps/chosen": -303.7646484375,
|
|
"logps/ref_chosen": -270.5350646972656,
|
|
"logps/ref_rejected": -278.7747497558594,
|
|
"logps/rejected": -341.30072021484375,
|
|
"loss": 4.5997,
|
|
"margin_dpo/margin_mean": 29.296445846557617,
|
|
"margin_dpo/margin_std": 55.331058502197266,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.4356020942408377,
|
|
"fcm_dpo/beta": 0.01773180440068245,
|
|
"fcm_dpo/delta": -0.038446761667728424,
|
|
"fcm_dpo/margin": 35.76897430419922,
|
|
"fcm_dpo/q_t": 0.37097251415252686,
|
|
"grad_norm": 86.00871276855469,
|
|
"learning_rate": 3.4883312676665534e-07,
|
|
"logits/chosen": -0.8688828945159912,
|
|
"logits/rejected": -0.8223684430122375,
|
|
"logps/chosen": -317.2559509277344,
|
|
"logps/ref_chosen": -279.582763671875,
|
|
"logps/ref_rejected": -290.041015625,
|
|
"logps/rejected": -363.483154296875,
|
|
"loss": 4.108,
|
|
"margin_dpo/margin_mean": 35.76897430419922,
|
|
"margin_dpo/margin_std": 50.78927993774414,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.437696335078534,
|
|
"fcm_dpo/beta": 0.018073974177241325,
|
|
"fcm_dpo/delta": 0.07754447311162949,
|
|
"fcm_dpo/margin": 26.4322566986084,
|
|
"fcm_dpo/q_t": 0.40094897150993347,
|
|
"grad_norm": 106.75226593017578,
|
|
"learning_rate": 3.4714886441024573e-07,
|
|
"logits/chosen": -0.7833099365234375,
|
|
"logits/rejected": -0.7856354713439941,
|
|
"logps/chosen": -360.97906494140625,
|
|
"logps/ref_chosen": -318.8725280761719,
|
|
"logps/ref_rejected": -270.64324951171875,
|
|
"logps/rejected": -339.18206787109375,
|
|
"loss": 4.7071,
|
|
"margin_dpo/margin_mean": 26.4322566986084,
|
|
"margin_dpo/margin_std": 52.98542785644531,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.4397905759162304,
|
|
"fcm_dpo/beta": 0.018200790509581566,
|
|
"fcm_dpo/delta": 0.0208455678075552,
|
|
"fcm_dpo/margin": 31.805618286132812,
|
|
"fcm_dpo/q_t": 0.3811089098453522,
|
|
"grad_norm": 105.60123443603516,
|
|
"learning_rate": 3.454593922550693e-07,
|
|
"logits/chosen": -0.8259727358818054,
|
|
"logits/rejected": -0.8135036826133728,
|
|
"logps/chosen": -320.53704833984375,
|
|
"logps/ref_chosen": -283.14031982421875,
|
|
"logps/ref_rejected": -287.2986755371094,
|
|
"logps/rejected": -356.50103759765625,
|
|
"loss": 4.3226,
|
|
"margin_dpo/margin_mean": 31.805618286132812,
|
|
"margin_dpo/margin_std": 50.25780487060547,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4418848167539267,
|
|
"fcm_dpo/beta": 0.01751658506691456,
|
|
"fcm_dpo/delta": -0.1294037252664566,
|
|
"fcm_dpo/margin": 40.80807876586914,
|
|
"fcm_dpo/q_t": 0.34755995869636536,
|
|
"grad_norm": 86.59803771972656,
|
|
"learning_rate": 3.4376480090239047e-07,
|
|
"logits/chosen": -0.8490579128265381,
|
|
"logits/rejected": -0.829590916633606,
|
|
"logps/chosen": -310.4613342285156,
|
|
"logps/ref_chosen": -276.4228515625,
|
|
"logps/ref_rejected": -252.40603637695312,
|
|
"logps/rejected": -327.25262451171875,
|
|
"loss": 3.7428,
|
|
"margin_dpo/margin_mean": 40.80807876586914,
|
|
"margin_dpo/margin_std": 43.20057678222656,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.44397905759162304,
|
|
"fcm_dpo/beta": 0.01712076924741268,
|
|
"fcm_dpo/delta": 0.03554587438702583,
|
|
"fcm_dpo/margin": 28.673137664794922,
|
|
"fcm_dpo/q_t": 0.39428529143333435,
|
|
"grad_norm": 94.82775115966797,
|
|
"learning_rate": 3.4206518122800055e-07,
|
|
"logits/chosen": -0.8299760818481445,
|
|
"logits/rejected": -0.8329156041145325,
|
|
"logps/chosen": -309.0224914550781,
|
|
"logps/ref_chosen": -271.7055358886719,
|
|
"logps/ref_rejected": -241.18511962890625,
|
|
"logps/rejected": -307.17529296875,
|
|
"loss": 4.4766,
|
|
"margin_dpo/margin_mean": 28.673141479492188,
|
|
"margin_dpo/margin_std": 47.071434020996094,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.44607329842931936,
|
|
"fcm_dpo/beta": 0.017721228301525116,
|
|
"fcm_dpo/delta": 0.03447887301445007,
|
|
"fcm_dpo/margin": 29.419530868530273,
|
|
"fcm_dpo/q_t": 0.3976650834083557,
|
|
"grad_norm": 103.67435455322266,
|
|
"learning_rate": 3.403606243773448e-07,
|
|
"logits/chosen": -0.824676513671875,
|
|
"logits/rejected": -0.8418750762939453,
|
|
"logps/chosen": -341.2528076171875,
|
|
"logps/ref_chosen": -302.2976379394531,
|
|
"logps/ref_rejected": -303.6202087402344,
|
|
"logps/rejected": -371.9948425292969,
|
|
"loss": 4.5048,
|
|
"margin_dpo/margin_mean": 29.41952896118164,
|
|
"margin_dpo/margin_std": 53.222564697265625,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.4481675392670157,
|
|
"fcm_dpo/beta": 0.017746904864907265,
|
|
"fcm_dpo/delta": -0.004792161285877228,
|
|
"fcm_dpo/margin": 33.86843490600586,
|
|
"fcm_dpo/q_t": 0.3744812309741974,
|
|
"grad_norm": 106.49107360839844,
|
|
"learning_rate": 3.3865122176063385e-07,
|
|
"logits/chosen": -0.830028772354126,
|
|
"logits/rejected": -0.8322975635528564,
|
|
"logps/chosen": -319.8704528808594,
|
|
"logps/ref_chosen": -272.13262939453125,
|
|
"logps/ref_rejected": -294.82354736328125,
|
|
"logps/rejected": -376.4298095703125,
|
|
"loss": 4.0953,
|
|
"margin_dpo/margin_mean": 33.86843490600586,
|
|
"margin_dpo/margin_std": 43.68943405151367,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.450261780104712,
|
|
"fcm_dpo/beta": 0.017707258462905884,
|
|
"fcm_dpo/delta": 0.005800800397992134,
|
|
"fcm_dpo/margin": 26.85211944580078,
|
|
"fcm_dpo/q_t": 0.40805721282958984,
|
|
"grad_norm": 100.26293182373047,
|
|
"learning_rate": 3.3693706504794243e-07,
|
|
"logits/chosen": -0.8703227043151855,
|
|
"logits/rejected": -0.8574371933937073,
|
|
"logps/chosen": -335.0187072753906,
|
|
"logps/ref_chosen": -291.3782958984375,
|
|
"logps/ref_rejected": -261.05792236328125,
|
|
"logps/rejected": -331.5504150390625,
|
|
"loss": 4.6754,
|
|
"margin_dpo/margin_mean": 26.85211944580078,
|
|
"margin_dpo/margin_std": 53.00439453125,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.4523560209424084,
|
|
"fcm_dpo/beta": 0.017213810235261917,
|
|
"fcm_dpo/delta": -0.00978805497288704,
|
|
"fcm_dpo/margin": 35.283409118652344,
|
|
"fcm_dpo/q_t": 0.3760201334953308,
|
|
"grad_norm": 95.87169647216797,
|
|
"learning_rate": 3.3521824616429284e-07,
|
|
"logits/chosen": -0.8963602185249329,
|
|
"logits/rejected": -0.8926108479499817,
|
|
"logps/chosen": -375.2848815917969,
|
|
"logps/ref_chosen": -338.50543212890625,
|
|
"logps/ref_rejected": -305.76104736328125,
|
|
"logps/rejected": -377.8238830566406,
|
|
"loss": 4.2837,
|
|
"margin_dpo/margin_mean": 35.283409118652344,
|
|
"margin_dpo/margin_std": 54.5643196105957,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.4544502617801047,
|
|
"fcm_dpo/beta": 0.01671535335481167,
|
|
"fcm_dpo/delta": -0.13403168320655823,
|
|
"fcm_dpo/margin": 43.277740478515625,
|
|
"fcm_dpo/q_t": 0.3528442978858948,
|
|
"grad_norm": 85.40447235107422,
|
|
"learning_rate": 3.334948572847253e-07,
|
|
"logits/chosen": -0.7879663109779358,
|
|
"logits/rejected": -0.7589735388755798,
|
|
"logps/chosen": -332.1008605957031,
|
|
"logps/ref_chosen": -293.5498046875,
|
|
"logps/ref_rejected": -256.7830810546875,
|
|
"logps/rejected": -338.6118469238281,
|
|
"loss": 3.9244,
|
|
"margin_dpo/margin_mean": 43.277740478515625,
|
|
"margin_dpo/margin_std": 55.66615295410156,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.45654450261780105,
|
|
"fcm_dpo/beta": 0.015897490084171295,
|
|
"fcm_dpo/delta": 0.003095601685345173,
|
|
"fcm_dpo/margin": 37.482078552246094,
|
|
"fcm_dpo/q_t": 0.3739127516746521,
|
|
"grad_norm": 89.20011901855469,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": -0.8181397914886475,
|
|
"logits/rejected": -0.8422555923461914,
|
|
"logps/chosen": -357.48828125,
|
|
"logps/ref_chosen": -320.579345703125,
|
|
"logps/ref_rejected": -294.0381164550781,
|
|
"logps/rejected": -368.42913818359375,
|
|
"loss": 4.0961,
|
|
"margin_dpo/margin_mean": 37.482078552246094,
|
|
"margin_dpo/margin_std": 50.897701263427734,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.4586387434554974,
|
|
"fcm_dpo/beta": 0.015867143869400024,
|
|
"fcm_dpo/delta": -0.031162606552243233,
|
|
"fcm_dpo/margin": 39.53302001953125,
|
|
"fcm_dpo/q_t": 0.36894065141677856,
|
|
"grad_norm": 85.81663513183594,
|
|
"learning_rate": 3.300347394584172e-07,
|
|
"logits/chosen": -0.8200687170028687,
|
|
"logits/rejected": -0.846379280090332,
|
|
"logps/chosen": -301.2198486328125,
|
|
"logps/ref_chosen": -268.4186096191406,
|
|
"logps/ref_rejected": -265.7808837890625,
|
|
"logps/rejected": -338.1151428222656,
|
|
"loss": 4.1022,
|
|
"margin_dpo/margin_mean": 39.53302001953125,
|
|
"margin_dpo/margin_std": 54.08649826049805,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.4607329842931937,
|
|
"fcm_dpo/beta": 0.015527862124145031,
|
|
"fcm_dpo/delta": 0.009382149204611778,
|
|
"fcm_dpo/margin": 38.0103759765625,
|
|
"fcm_dpo/q_t": 0.3744858205318451,
|
|
"grad_norm": 86.28771209716797,
|
|
"learning_rate": 3.2829819606729477e-07,
|
|
"logits/chosen": -0.8505481481552124,
|
|
"logits/rejected": -0.8325619697570801,
|
|
"logps/chosen": -347.1203918457031,
|
|
"logps/ref_chosen": -312.8864440917969,
|
|
"logps/ref_rejected": -259.5191955566406,
|
|
"logps/rejected": -331.7634582519531,
|
|
"loss": 4.1899,
|
|
"margin_dpo/margin_mean": 38.0103759765625,
|
|
"margin_dpo/margin_std": 54.48101043701172,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.46282722513089003,
|
|
"fcm_dpo/beta": 0.016174497082829475,
|
|
"fcm_dpo/delta": 0.0048094987869262695,
|
|
"fcm_dpo/margin": 30.234722137451172,
|
|
"fcm_dpo/q_t": 0.4027414321899414,
|
|
"grad_norm": 90.07968139648438,
|
|
"learning_rate": 3.265574537815398e-07,
|
|
"logits/chosen": -0.7801198363304138,
|
|
"logits/rejected": -0.79371577501297,
|
|
"logps/chosen": -337.3284606933594,
|
|
"logps/ref_chosen": -300.32586669921875,
|
|
"logps/ref_rejected": -286.312255859375,
|
|
"logps/rejected": -353.549560546875,
|
|
"loss": 4.5684,
|
|
"margin_dpo/margin_mean": 30.234722137451172,
|
|
"margin_dpo/margin_std": 54.877281188964844,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.4649214659685864,
|
|
"fcm_dpo/beta": 0.015347619540989399,
|
|
"fcm_dpo/delta": -0.009789157658815384,
|
|
"fcm_dpo/margin": 36.647762298583984,
|
|
"fcm_dpo/q_t": 0.3810715973377228,
|
|
"grad_norm": 95.45844268798828,
|
|
"learning_rate": 3.248126059518784e-07,
|
|
"logits/chosen": -0.8610984086990356,
|
|
"logits/rejected": -0.8496800661087036,
|
|
"logps/chosen": -329.9424743652344,
|
|
"logps/ref_chosen": -297.1113586425781,
|
|
"logps/ref_rejected": -235.53146362304688,
|
|
"logps/rejected": -305.0103454589844,
|
|
"loss": 4.2022,
|
|
"margin_dpo/margin_mean": 36.64776611328125,
|
|
"margin_dpo/margin_std": 50.83029556274414,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.46701570680628274,
|
|
"fcm_dpo/beta": 0.015580544248223305,
|
|
"fcm_dpo/delta": -0.005719708278775215,
|
|
"fcm_dpo/margin": 38.75231170654297,
|
|
"fcm_dpo/q_t": 0.37368282675743103,
|
|
"grad_norm": 83.94607543945312,
|
|
"learning_rate": 3.230637461492043e-07,
|
|
"logits/chosen": -0.8233493566513062,
|
|
"logits/rejected": -0.7984543442726135,
|
|
"logps/chosen": -322.42913818359375,
|
|
"logps/ref_chosen": -286.41510009765625,
|
|
"logps/ref_rejected": -241.1181640625,
|
|
"logps/rejected": -315.884521484375,
|
|
"loss": 4.139,
|
|
"margin_dpo/margin_mean": 38.75231170654297,
|
|
"margin_dpo/margin_std": 53.93544006347656,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.46910994764397906,
|
|
"fcm_dpo/beta": 0.015308534726500511,
|
|
"fcm_dpo/delta": -0.07908003032207489,
|
|
"fcm_dpo/margin": 41.015872955322266,
|
|
"fcm_dpo/q_t": 0.36839425563812256,
|
|
"grad_norm": 83.50463104248047,
|
|
"learning_rate": 3.213109681595612e-07,
|
|
"logits/chosen": -0.7854145765304565,
|
|
"logits/rejected": -0.8054001927375793,
|
|
"logps/chosen": -282.39862060546875,
|
|
"logps/ref_chosen": -249.49234008789062,
|
|
"logps/ref_rejected": -233.10752868652344,
|
|
"logps/rejected": -307.02972412109375,
|
|
"loss": 3.9926,
|
|
"margin_dpo/margin_mean": 41.015872955322266,
|
|
"margin_dpo/margin_std": 51.277225494384766,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.4712041884816754,
|
|
"fcm_dpo/beta": 0.01455092616379261,
|
|
"fcm_dpo/delta": 0.04796172305941582,
|
|
"fcm_dpo/margin": 38.115787506103516,
|
|
"fcm_dpo/q_t": 0.3868432939052582,
|
|
"grad_norm": 94.90240478515625,
|
|
"learning_rate": 3.1955436597911315e-07,
|
|
"logits/chosen": -0.8136327266693115,
|
|
"logits/rejected": -0.7935799360275269,
|
|
"logps/chosen": -353.4432067871094,
|
|
"logps/ref_chosen": -311.8583679199219,
|
|
"logps/ref_rejected": -336.8523864746094,
|
|
"logps/rejected": -416.5530090332031,
|
|
"loss": 4.3046,
|
|
"margin_dpo/margin_mean": 38.11579132080078,
|
|
"margin_dpo/margin_std": 58.1151237487793,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.4732984293193717,
|
|
"fcm_dpo/beta": 0.015586531721055508,
|
|
"fcm_dpo/delta": 0.07917778939008713,
|
|
"fcm_dpo/margin": 33.66019821166992,
|
|
"fcm_dpo/q_t": 0.3920612037181854,
|
|
"grad_norm": 80.37389373779297,
|
|
"learning_rate": 3.1779403380910425e-07,
|
|
"logits/chosen": -0.8555701971054077,
|
|
"logits/rejected": -0.8487062454223633,
|
|
"logps/chosen": -290.4698486328125,
|
|
"logps/ref_chosen": -252.20123291015625,
|
|
"logps/ref_rejected": -254.41162109375,
|
|
"logps/rejected": -326.3404541015625,
|
|
"loss": 4.3932,
|
|
"margin_dpo/margin_mean": 33.66019821166992,
|
|
"margin_dpo/margin_std": 55.53483581542969,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.47539267015706804,
|
|
"fcm_dpo/beta": 0.01585298217833042,
|
|
"fcm_dpo/delta": -0.0467713437974453,
|
|
"fcm_dpo/margin": 40.5180549621582,
|
|
"fcm_dpo/q_t": 0.36852991580963135,
|
|
"grad_norm": 112.51945495605469,
|
|
"learning_rate": 3.160300660508064e-07,
|
|
"logits/chosen": -0.8035961985588074,
|
|
"logits/rejected": -0.8008553385734558,
|
|
"logps/chosen": -324.879150390625,
|
|
"logps/ref_chosen": -285.25946044921875,
|
|
"logps/ref_rejected": -261.3220520019531,
|
|
"logps/rejected": -341.4598083496094,
|
|
"loss": 4.2243,
|
|
"margin_dpo/margin_mean": 40.5180549621582,
|
|
"margin_dpo/margin_std": 60.73136901855469,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.4774869109947644,
|
|
"fcm_dpo/beta": 0.015510935336351395,
|
|
"fcm_dpo/delta": -0.051342956721782684,
|
|
"fcm_dpo/margin": 41.696563720703125,
|
|
"fcm_dpo/q_t": 0.3670775890350342,
|
|
"grad_norm": 85.83709716796875,
|
|
"learning_rate": 3.1426255730045695e-07,
|
|
"logits/chosen": -0.8358519077301025,
|
|
"logits/rejected": -0.8068508505821228,
|
|
"logps/chosen": -348.1343078613281,
|
|
"logps/ref_chosen": -313.81878662109375,
|
|
"logps/ref_rejected": -258.07061767578125,
|
|
"logps/rejected": -334.08270263671875,
|
|
"loss": 4.0336,
|
|
"margin_dpo/margin_mean": 41.696563720703125,
|
|
"margin_dpo/margin_std": 54.898597717285156,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.47958115183246075,
|
|
"fcm_dpo/beta": 0.014525864273309708,
|
|
"fcm_dpo/delta": -0.08014161139726639,
|
|
"fcm_dpo/margin": 46.30763244628906,
|
|
"fcm_dpo/q_t": 0.3581668734550476,
|
|
"grad_norm": 171.63238525390625,
|
|
"learning_rate": 3.1249160234418644e-07,
|
|
"logits/chosen": -0.8062803149223328,
|
|
"logits/rejected": -0.8233762979507446,
|
|
"logps/chosen": -334.2206726074219,
|
|
"logps/ref_chosen": -291.9707946777344,
|
|
"logps/ref_rejected": -263.42059326171875,
|
|
"logps/rejected": -351.9781494140625,
|
|
"loss": 3.9764,
|
|
"margin_dpo/margin_mean": 46.30763244628906,
|
|
"margin_dpo/margin_std": 58.0003662109375,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.4816753926701571,
|
|
"fcm_dpo/beta": 0.013805052265524864,
|
|
"fcm_dpo/delta": -0.005476825870573521,
|
|
"fcm_dpo/margin": 43.70093536376953,
|
|
"fcm_dpo/q_t": 0.37367361783981323,
|
|
"grad_norm": 79.83263397216797,
|
|
"learning_rate": 3.1071729615293424e-07,
|
|
"logits/chosen": -0.8613869547843933,
|
|
"logits/rejected": -0.8628825545310974,
|
|
"logps/chosen": -272.9879150390625,
|
|
"logps/ref_chosen": -233.2601318359375,
|
|
"logps/ref_rejected": -238.922119140625,
|
|
"logps/rejected": -322.3508605957031,
|
|
"loss": 4.134,
|
|
"margin_dpo/margin_mean": 43.70093536376953,
|
|
"margin_dpo/margin_std": 60.809654235839844,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.4837696335078534,
|
|
"fcm_dpo/beta": 0.014280532486736774,
|
|
"fcm_dpo/delta": 0.054186657071113586,
|
|
"fcm_dpo/margin": 34.104496002197266,
|
|
"fcm_dpo/q_t": 0.39545977115631104,
|
|
"grad_norm": 89.10991668701172,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": -0.8317367434501648,
|
|
"logits/rejected": -0.8229210376739502,
|
|
"logps/chosen": -370.91632080078125,
|
|
"logps/ref_chosen": -322.1551818847656,
|
|
"logps/ref_rejected": -280.97613525390625,
|
|
"logps/rejected": -363.8418273925781,
|
|
"loss": 4.4358,
|
|
"margin_dpo/margin_mean": 34.10449981689453,
|
|
"margin_dpo/margin_std": 54.3597412109375,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.48586387434554973,
|
|
"fcm_dpo/beta": 0.01439041830599308,
|
|
"fcm_dpo/delta": -0.028247211128473282,
|
|
"fcm_dpo/margin": 37.74383544921875,
|
|
"fcm_dpo/q_t": 0.38723382353782654,
|
|
"grad_norm": 111.32173919677734,
|
|
"learning_rate": 3.071590108427243e-07,
|
|
"logits/chosen": -0.8064876198768616,
|
|
"logits/rejected": -0.7893252372741699,
|
|
"logps/chosen": -321.2291564941406,
|
|
"logps/ref_chosen": -271.7437744140625,
|
|
"logps/ref_rejected": -249.94981384277344,
|
|
"logps/rejected": -337.1790466308594,
|
|
"loss": 4.4283,
|
|
"margin_dpo/margin_mean": 37.74383544921875,
|
|
"margin_dpo/margin_std": 60.81903839111328,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.48795811518324606,
|
|
"fcm_dpo/beta": 0.013932683505117893,
|
|
"fcm_dpo/delta": -0.07837289571762085,
|
|
"fcm_dpo/margin": 41.53171157836914,
|
|
"fcm_dpo/q_t": 0.37816399335861206,
|
|
"grad_norm": 93.736328125,
|
|
"learning_rate": 3.05375222543809e-07,
|
|
"logits/chosen": -0.8585054278373718,
|
|
"logits/rejected": -0.8508076071739197,
|
|
"logps/chosen": -335.4866943359375,
|
|
"logps/ref_chosen": -285.3423156738281,
|
|
"logps/ref_rejected": -266.34320068359375,
|
|
"logps/rejected": -358.0192565917969,
|
|
"loss": 4.2142,
|
|
"margin_dpo/margin_mean": 41.53171157836914,
|
|
"margin_dpo/margin_std": 59.24362564086914,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.4900523560209424,
|
|
"fcm_dpo/beta": 0.01374006737023592,
|
|
"fcm_dpo/delta": 0.036976464092731476,
|
|
"fcm_dpo/margin": 41.03116226196289,
|
|
"fcm_dpo/q_t": 0.3843136429786682,
|
|
"grad_norm": 78.69235229492188,
|
|
"learning_rate": 3.035884646397637e-07,
|
|
"logits/chosen": -0.829176664352417,
|
|
"logits/rejected": -0.812563419342041,
|
|
"logps/chosen": -345.6146545410156,
|
|
"logps/ref_chosen": -294.9057312011719,
|
|
"logps/ref_rejected": -299.37054443359375,
|
|
"logps/rejected": -391.11065673828125,
|
|
"loss": 4.4275,
|
|
"margin_dpo/margin_mean": 41.03116226196289,
|
|
"margin_dpo/margin_std": 68.48192596435547,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.49214659685863876,
|
|
"fcm_dpo/beta": 0.01411922462284565,
|
|
"fcm_dpo/delta": 0.003345828503370285,
|
|
"fcm_dpo/margin": 42.21276092529297,
|
|
"fcm_dpo/q_t": 0.37557002902030945,
|
|
"grad_norm": 109.72699737548828,
|
|
"learning_rate": 3.017988329489923e-07,
|
|
"logits/chosen": -0.8408417701721191,
|
|
"logits/rejected": -0.8409253358840942,
|
|
"logps/chosen": -343.94256591796875,
|
|
"logps/ref_chosen": -289.49755859375,
|
|
"logps/ref_rejected": -247.55076599121094,
|
|
"logps/rejected": -344.20849609375,
|
|
"loss": 4.2826,
|
|
"margin_dpo/margin_mean": 42.2127571105957,
|
|
"margin_dpo/margin_std": 65.22442626953125,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.4942408376963351,
|
|
"fcm_dpo/beta": 0.013934805057942867,
|
|
"fcm_dpo/delta": -0.03408358246088028,
|
|
"fcm_dpo/margin": 42.292049407958984,
|
|
"fcm_dpo/q_t": 0.3777884840965271,
|
|
"grad_norm": 81.88858032226562,
|
|
"learning_rate": 3.000064234440111e-07,
|
|
"logits/chosen": -0.8615151643753052,
|
|
"logits/rejected": -0.8628526926040649,
|
|
"logps/chosen": -339.2417297363281,
|
|
"logps/ref_chosen": -288.8846435546875,
|
|
"logps/ref_rejected": -242.0452880859375,
|
|
"logps/rejected": -334.6944274902344,
|
|
"loss": 4.2538,
|
|
"margin_dpo/margin_mean": 42.292049407958984,
|
|
"margin_dpo/margin_std": 62.85895538330078,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.4963350785340314,
|
|
"fcm_dpo/beta": 0.013490064069628716,
|
|
"fcm_dpo/delta": -0.031569261103868484,
|
|
"fcm_dpo/margin": 42.74472427368164,
|
|
"fcm_dpo/q_t": 0.3792114853858948,
|
|
"grad_norm": 85.20064544677734,
|
|
"learning_rate": 2.9821133224630223e-07,
|
|
"logits/chosen": -0.8437389731407166,
|
|
"logits/rejected": -0.8258963227272034,
|
|
"logps/chosen": -320.6917419433594,
|
|
"logps/ref_chosen": -265.47869873046875,
|
|
"logps/ref_rejected": -267.9891357421875,
|
|
"logps/rejected": -365.94683837890625,
|
|
"loss": 4.2127,
|
|
"margin_dpo/margin_mean": 42.74472427368164,
|
|
"margin_dpo/margin_std": 61.919334411621094,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.49842931937172774,
|
|
"fcm_dpo/beta": 0.013200972229242325,
|
|
"fcm_dpo/delta": 0.01033791620284319,
|
|
"fcm_dpo/margin": 40.789093017578125,
|
|
"fcm_dpo/q_t": 0.38993343710899353,
|
|
"grad_norm": 93.29105377197266,
|
|
"learning_rate": 2.964136556211588e-07,
|
|
"logits/chosen": -0.8295376300811768,
|
|
"logits/rejected": -0.8033552169799805,
|
|
"logps/chosen": -369.406982421875,
|
|
"logps/ref_chosen": -312.0026550292969,
|
|
"logps/ref_rejected": -270.0257263183594,
|
|
"logps/rejected": -368.21917724609375,
|
|
"loss": 4.327,
|
|
"margin_dpo/margin_mean": 40.789093017578125,
|
|
"margin_dpo/margin_std": 64.44735717773438,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.5005235602094241,
|
|
"fcm_dpo/beta": 0.013887631706893444,
|
|
"fcm_dpo/delta": 0.09677696973085403,
|
|
"fcm_dpo/margin": 36.4874382019043,
|
|
"fcm_dpo/q_t": 0.3990153670310974,
|
|
"grad_norm": 100.5346908569336,
|
|
"learning_rate": 2.946134899725226e-07,
|
|
"logits/chosen": -0.8349162936210632,
|
|
"logits/rejected": -0.8748633861541748,
|
|
"logps/chosen": -320.4220275878906,
|
|
"logps/ref_chosen": -267.167236328125,
|
|
"logps/ref_rejected": -275.99468994140625,
|
|
"logps/rejected": -365.7369384765625,
|
|
"loss": 4.6325,
|
|
"margin_dpo/margin_mean": 36.4874382019043,
|
|
"margin_dpo/margin_std": 70.55658721923828,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.5026178010471204,
|
|
"fcm_dpo/beta": 0.013791955076158047,
|
|
"fcm_dpo/delta": -0.048899125307798386,
|
|
"fcm_dpo/margin": 46.676414489746094,
|
|
"fcm_dpo/q_t": 0.3687818944454193,
|
|
"grad_norm": 117.41996765136719,
|
|
"learning_rate": 2.9281093183781403e-07,
|
|
"logits/chosen": -0.8881155848503113,
|
|
"logits/rejected": -0.8836052417755127,
|
|
"logps/chosen": -337.365478515625,
|
|
"logps/ref_chosen": -285.9796142578125,
|
|
"logps/ref_rejected": -256.8258056640625,
|
|
"logps/rejected": -354.8880615234375,
|
|
"loss": 4.0805,
|
|
"margin_dpo/margin_mean": 46.67641830444336,
|
|
"margin_dpo/margin_std": 65.10855102539062,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5047120418848168,
|
|
"fcm_dpo/beta": 0.013698762282729149,
|
|
"fcm_dpo/delta": 0.03592575713992119,
|
|
"fcm_dpo/margin": 37.19944381713867,
|
|
"fcm_dpo/q_t": 0.3960975408554077,
|
|
"grad_norm": 95.49946594238281,
|
|
"learning_rate": 2.910060778827554e-07,
|
|
"logits/chosen": -0.7951388359069824,
|
|
"logits/rejected": -0.7752350568771362,
|
|
"logps/chosen": -321.134033203125,
|
|
"logps/ref_chosen": -261.516845703125,
|
|
"logps/ref_rejected": -250.2250518798828,
|
|
"logps/rejected": -347.0416564941406,
|
|
"loss": 4.529,
|
|
"margin_dpo/margin_mean": 37.19944381713867,
|
|
"margin_dpo/margin_std": 65.24166107177734,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.506806282722513,
|
|
"fcm_dpo/beta": 0.014109227806329727,
|
|
"fcm_dpo/delta": -0.023093625903129578,
|
|
"fcm_dpo/margin": 43.914390563964844,
|
|
"fcm_dpo/q_t": 0.3734211027622223,
|
|
"grad_norm": 97.55506134033203,
|
|
"learning_rate": 2.891990248961871e-07,
|
|
"logits/chosen": -0.8705978393554688,
|
|
"logits/rejected": -0.8577161431312561,
|
|
"logps/chosen": -322.5037536621094,
|
|
"logps/ref_chosen": -270.51397705078125,
|
|
"logps/ref_rejected": -244.8560791015625,
|
|
"logps/rejected": -340.76025390625,
|
|
"loss": 4.1074,
|
|
"margin_dpo/margin_mean": 43.91438674926758,
|
|
"margin_dpo/margin_std": 60.707244873046875,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.5089005235602094,
|
|
"fcm_dpo/beta": 0.013829024508595467,
|
|
"fcm_dpo/delta": -0.07154600322246552,
|
|
"fcm_dpo/margin": 48.14585876464844,
|
|
"fcm_dpo/q_t": 0.36528927087783813,
|
|
"grad_norm": 109.1782455444336,
|
|
"learning_rate": 2.873898697848762e-07,
|
|
"logits/chosen": -0.8485463857650757,
|
|
"logits/rejected": -0.8369187116622925,
|
|
"logps/chosen": -370.865234375,
|
|
"logps/ref_chosen": -324.68206787109375,
|
|
"logps/ref_rejected": -307.1111755371094,
|
|
"logps/rejected": -401.440185546875,
|
|
"loss": 4.0443,
|
|
"margin_dpo/margin_mean": 48.14585876464844,
|
|
"margin_dpo/margin_std": 65.65919494628906,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.5109947643979058,
|
|
"fcm_dpo/beta": 0.012862252071499825,
|
|
"fcm_dpo/delta": -0.007831787690520287,
|
|
"fcm_dpo/margin": 47.06397247314453,
|
|
"fcm_dpo/q_t": 0.3703567385673523,
|
|
"grad_norm": 87.85368347167969,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": -0.8476990461349487,
|
|
"logits/rejected": -0.8005751967430115,
|
|
"logps/chosen": -365.91729736328125,
|
|
"logps/ref_chosen": -318.979248046875,
|
|
"logps/ref_rejected": -269.67572021484375,
|
|
"logps/rejected": -363.677734375,
|
|
"loss": 4.0768,
|
|
"margin_dpo/margin_mean": 47.06397247314453,
|
|
"margin_dpo/margin_std": 60.87822723388672,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.5130890052356021,
|
|
"fcm_dpo/beta": 0.012774711474776268,
|
|
"fcm_dpo/delta": -0.07881193608045578,
|
|
"fcm_dpo/margin": 47.673194885253906,
|
|
"fcm_dpo/q_t": 0.3689280152320862,
|
|
"grad_norm": 81.23341369628906,
|
|
"learning_rate": 2.837656413735479e-07,
|
|
"logits/chosen": -0.8486171960830688,
|
|
"logits/rejected": -0.8539371490478516,
|
|
"logps/chosen": -338.697265625,
|
|
"logps/ref_chosen": -294.8980712890625,
|
|
"logps/ref_rejected": -239.8111114501953,
|
|
"logps/rejected": -331.2834777832031,
|
|
"loss": 4.0503,
|
|
"margin_dpo/margin_mean": 47.67319869995117,
|
|
"margin_dpo/margin_std": 59.50359344482422,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.5151832460732985,
|
|
"fcm_dpo/beta": 0.012836070731282234,
|
|
"fcm_dpo/delta": 0.08966440707445145,
|
|
"fcm_dpo/margin": 35.876922607421875,
|
|
"fcm_dpo/q_t": 0.4043683707714081,
|
|
"grad_norm": 97.06179809570312,
|
|
"learning_rate": 2.8195076242990116e-07,
|
|
"logits/chosen": -0.823259711265564,
|
|
"logits/rejected": -0.8320043087005615,
|
|
"logps/chosen": -336.7489318847656,
|
|
"logps/ref_chosen": -280.6854248046875,
|
|
"logps/ref_rejected": -253.65382385253906,
|
|
"logps/rejected": -345.59423828125,
|
|
"loss": 4.5646,
|
|
"margin_dpo/margin_mean": 35.876922607421875,
|
|
"margin_dpo/margin_std": 64.8729248046875,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.5172774869109947,
|
|
"fcm_dpo/beta": 0.01340182963758707,
|
|
"fcm_dpo/delta": 0.010741522535681725,
|
|
"fcm_dpo/margin": 40.165985107421875,
|
|
"fcm_dpo/q_t": 0.38809463381767273,
|
|
"grad_norm": 82.3198013305664,
|
|
"learning_rate": 2.801341700638307e-07,
|
|
"logits/chosen": -0.8334712386131287,
|
|
"logits/rejected": -0.8363280296325684,
|
|
"logps/chosen": -332.05615234375,
|
|
"logps/ref_chosen": -281.1091003417969,
|
|
"logps/ref_rejected": -260.3700866699219,
|
|
"logps/rejected": -351.4831237792969,
|
|
"loss": 4.295,
|
|
"margin_dpo/margin_mean": 40.165985107421875,
|
|
"margin_dpo/margin_std": 59.34774398803711,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.5193717277486911,
|
|
"fcm_dpo/beta": 0.013335911557078362,
|
|
"fcm_dpo/delta": 0.03803172707557678,
|
|
"fcm_dpo/margin": 37.84646224975586,
|
|
"fcm_dpo/q_t": 0.39170122146606445,
|
|
"grad_norm": 96.70375061035156,
|
|
"learning_rate": 2.7831596169367227e-07,
|
|
"logits/chosen": -0.795592188835144,
|
|
"logits/rejected": -0.8106747269630432,
|
|
"logps/chosen": -320.57391357421875,
|
|
"logps/ref_chosen": -270.318359375,
|
|
"logps/ref_rejected": -233.46778869628906,
|
|
"logps/rejected": -321.56982421875,
|
|
"loss": 4.3839,
|
|
"margin_dpo/margin_mean": 37.84646224975586,
|
|
"margin_dpo/margin_std": 58.59114074707031,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.5214659685863874,
|
|
"fcm_dpo/beta": 0.013947556726634502,
|
|
"fcm_dpo/delta": 0.03363037109375,
|
|
"fcm_dpo/margin": 36.132991790771484,
|
|
"fcm_dpo/q_t": 0.3958445191383362,
|
|
"grad_norm": 102.5847396850586,
|
|
"learning_rate": 2.7649623482442274e-07,
|
|
"logits/chosen": -0.8209048509597778,
|
|
"logits/rejected": -0.8001272082328796,
|
|
"logps/chosen": -337.85406494140625,
|
|
"logps/ref_chosen": -275.8088684082031,
|
|
"logps/ref_rejected": -243.45138549804688,
|
|
"logps/rejected": -341.6295471191406,
|
|
"loss": 4.566,
|
|
"margin_dpo/margin_mean": 36.132991790771484,
|
|
"margin_dpo/margin_std": 66.05538940429688,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"fcm_dpo/beta": 0.013355924747884274,
|
|
"fcm_dpo/delta": -0.04608849063515663,
|
|
"fcm_dpo/margin": 47.88051223754883,
|
|
"fcm_dpo/q_t": 0.36725619435310364,
|
|
"grad_norm": 95.6384048461914,
|
|
"learning_rate": 2.7467508704251135e-07,
|
|
"logits/chosen": -0.829230010509491,
|
|
"logits/rejected": -0.8328065872192383,
|
|
"logps/chosen": -355.0364990234375,
|
|
"logps/ref_chosen": -292.4945373535156,
|
|
"logps/ref_rejected": -284.2869567871094,
|
|
"logps/rejected": -394.7093811035156,
|
|
"loss": 4.1352,
|
|
"margin_dpo/margin_mean": 47.88051223754883,
|
|
"margin_dpo/margin_std": 67.44532012939453,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5256544502617801,
|
|
"fcm_dpo/beta": 0.013813665136694908,
|
|
"fcm_dpo/delta": -0.007170406170189381,
|
|
"fcm_dpo/margin": 43.663360595703125,
|
|
"fcm_dpo/q_t": 0.3814099431037903,
|
|
"grad_norm": 100.86103820800781,
|
|
"learning_rate": 2.7285261601056697e-07,
|
|
"logits/chosen": -0.8296136856079102,
|
|
"logits/rejected": -0.8152703046798706,
|
|
"logps/chosen": -336.88873291015625,
|
|
"logps/ref_chosen": -281.736572265625,
|
|
"logps/ref_rejected": -255.9419708251953,
|
|
"logps/rejected": -354.75750732421875,
|
|
"loss": 4.1787,
|
|
"margin_dpo/margin_mean": 43.66335678100586,
|
|
"margin_dpo/margin_std": 63.43466567993164,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.5277486910994764,
|
|
"fcm_dpo/beta": 0.013664179481565952,
|
|
"fcm_dpo/delta": 0.0335024930536747,
|
|
"fcm_dpo/margin": 41.402076721191406,
|
|
"fcm_dpo/q_t": 0.3806874752044678,
|
|
"grad_norm": 102.68427276611328,
|
|
"learning_rate": 2.7102891946217994e-07,
|
|
"logits/chosen": -0.8773578405380249,
|
|
"logits/rejected": -0.854051411151886,
|
|
"logps/chosen": -360.0166931152344,
|
|
"logps/ref_chosen": -295.9674072265625,
|
|
"logps/ref_rejected": -280.111572265625,
|
|
"logps/rejected": -385.56292724609375,
|
|
"loss": 4.3841,
|
|
"margin_dpo/margin_mean": 41.40208053588867,
|
|
"margin_dpo/margin_std": 66.1944580078125,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.5298429319371728,
|
|
"fcm_dpo/beta": 0.013652501627802849,
|
|
"fcm_dpo/delta": -0.021744156256318092,
|
|
"fcm_dpo/margin": 41.56562805175781,
|
|
"fcm_dpo/q_t": 0.38615942001342773,
|
|
"grad_norm": 96.21172332763672,
|
|
"learning_rate": 2.692040951966617e-07,
|
|
"logits/chosen": -0.8553462624549866,
|
|
"logits/rejected": -0.848787248134613,
|
|
"logps/chosen": -346.29815673828125,
|
|
"logps/ref_chosen": -277.072265625,
|
|
"logps/ref_rejected": -247.31643676757812,
|
|
"logps/rejected": -358.10791015625,
|
|
"loss": 4.3891,
|
|
"margin_dpo/margin_mean": 41.56563186645508,
|
|
"margin_dpo/margin_std": 68.40611267089844,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.5319371727748691,
|
|
"fcm_dpo/beta": 0.01416382659226656,
|
|
"fcm_dpo/delta": -0.016133006662130356,
|
|
"fcm_dpo/margin": 43.26961898803711,
|
|
"fcm_dpo/q_t": 0.37535524368286133,
|
|
"grad_norm": 99.73017120361328,
|
|
"learning_rate": 2.6737824107379947e-07,
|
|
"logits/chosen": -0.7875509858131409,
|
|
"logits/rejected": -0.7763053774833679,
|
|
"logps/chosen": -334.57989501953125,
|
|
"logps/ref_chosen": -269.9478454589844,
|
|
"logps/ref_rejected": -249.45005798339844,
|
|
"logps/rejected": -357.3516845703125,
|
|
"loss": 4.18,
|
|
"margin_dpo/margin_mean": 43.26961898803711,
|
|
"margin_dpo/margin_std": 61.28417205810547,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.5340314136125655,
|
|
"fcm_dpo/beta": 0.013342966325581074,
|
|
"fcm_dpo/delta": -0.06489241868257523,
|
|
"fcm_dpo/margin": 49.52783966064453,
|
|
"fcm_dpo/q_t": 0.36613547801971436,
|
|
"grad_norm": 90.38292694091797,
|
|
"learning_rate": 2.655514550086086e-07,
|
|
"logits/chosen": -0.8106395602226257,
|
|
"logits/rejected": -0.7797207832336426,
|
|
"logps/chosen": -370.4023742675781,
|
|
"logps/ref_chosen": -306.6552734375,
|
|
"logps/ref_rejected": -254.47528076171875,
|
|
"logps/rejected": -367.7502136230469,
|
|
"loss": 4.1532,
|
|
"margin_dpo/margin_mean": 49.52783966064453,
|
|
"margin_dpo/margin_std": 72.60646057128906,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.5361256544502618,
|
|
"fcm_dpo/beta": 0.012888522818684578,
|
|
"fcm_dpo/delta": -0.017480649054050446,
|
|
"fcm_dpo/margin": 47.27513122558594,
|
|
"fcm_dpo/q_t": 0.3648688495159149,
|
|
"grad_norm": 255.97872924804688,
|
|
"learning_rate": 2.6372383496608186e-07,
|
|
"logits/chosen": -0.8314058184623718,
|
|
"logits/rejected": -0.827141523361206,
|
|
"logps/chosen": -388.43408203125,
|
|
"logps/ref_chosen": -323.7181701660156,
|
|
"logps/ref_rejected": -254.1871337890625,
|
|
"logps/rejected": -366.1781311035156,
|
|
"loss": 4.5363,
|
|
"margin_dpo/margin_mean": 47.2751350402832,
|
|
"margin_dpo/margin_std": 78.98124694824219,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.5382198952879581,
|
|
"fcm_dpo/beta": 0.012398256920278072,
|
|
"fcm_dpo/delta": -0.015751376748085022,
|
|
"fcm_dpo/margin": 49.54781723022461,
|
|
"fcm_dpo/q_t": 0.3713992238044739,
|
|
"grad_norm": 97.32785034179688,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": -0.8290724158287048,
|
|
"logits/rejected": -0.8196491003036499,
|
|
"logps/chosen": -331.4079895019531,
|
|
"logps/ref_chosen": -267.21209716796875,
|
|
"logps/ref_rejected": -249.12579345703125,
|
|
"logps/rejected": -362.8694763183594,
|
|
"loss": 4.039,
|
|
"margin_dpo/margin_mean": 49.54781723022461,
|
|
"margin_dpo/margin_std": 66.0081558227539,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.5403141361256545,
|
|
"fcm_dpo/beta": 0.011941884644329548,
|
|
"fcm_dpo/delta": -0.021545007824897766,
|
|
"fcm_dpo/margin": 51.69853210449219,
|
|
"fcm_dpo/q_t": 0.36628904938697815,
|
|
"grad_norm": 81.3831787109375,
|
|
"learning_rate": 2.600664850273538e-07,
|
|
"logits/chosen": -0.8486968278884888,
|
|
"logits/rejected": -0.8191419243812561,
|
|
"logps/chosen": -345.801025390625,
|
|
"logps/ref_chosen": -277.6827392578125,
|
|
"logps/ref_rejected": -250.73385620117188,
|
|
"logps/rejected": -370.5506591796875,
|
|
"loss": 3.9976,
|
|
"margin_dpo/margin_mean": 51.69853591918945,
|
|
"margin_dpo/margin_std": 62.97686004638672,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.5424083769633508,
|
|
"fcm_dpo/beta": 0.01243941206485033,
|
|
"fcm_dpo/delta": 0.02084418572485447,
|
|
"fcm_dpo/margin": 46.594276428222656,
|
|
"fcm_dpo/q_t": 0.3780772387981415,
|
|
"grad_norm": 86.16590118408203,
|
|
"learning_rate": 2.582369512637302e-07,
|
|
"logits/chosen": -0.8632500171661377,
|
|
"logits/rejected": -0.8614512085914612,
|
|
"logps/chosen": -354.69976806640625,
|
|
"logps/ref_chosen": -294.6099853515625,
|
|
"logps/ref_rejected": -272.2725830078125,
|
|
"logps/rejected": -378.9566345214844,
|
|
"loss": 4.1194,
|
|
"margin_dpo/margin_mean": 46.594268798828125,
|
|
"margin_dpo/margin_std": 63.69516372680664,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.5445026178010471,
|
|
"fcm_dpo/beta": 0.013677787035703659,
|
|
"fcm_dpo/delta": 0.19107326865196228,
|
|
"fcm_dpo/margin": 22.353225708007812,
|
|
"fcm_dpo/q_t": 0.43973931670188904,
|
|
"grad_norm": 113.30580139160156,
|
|
"learning_rate": 2.5640697577740815e-07,
|
|
"logits/chosen": -0.8496757745742798,
|
|
"logits/rejected": -0.8480501174926758,
|
|
"logps/chosen": -357.86456298828125,
|
|
"logps/ref_chosen": -290.85711669921875,
|
|
"logps/ref_rejected": -277.5970153808594,
|
|
"logps/rejected": -366.95770263671875,
|
|
"loss": 5.1521,
|
|
"margin_dpo/margin_mean": 22.353225708007812,
|
|
"margin_dpo/margin_std": 64.10260772705078,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5465968586387434,
|
|
"fcm_dpo/beta": 0.014235386624932289,
|
|
"fcm_dpo/delta": -0.05504711717367172,
|
|
"fcm_dpo/margin": 37.57318115234375,
|
|
"fcm_dpo/q_t": 0.39546385407447815,
|
|
"grad_norm": 130.93417358398438,
|
|
"learning_rate": 2.5457665670441937e-07,
|
|
"logits/chosen": -0.733401358127594,
|
|
"logits/rejected": -0.7483704090118408,
|
|
"logps/chosen": -322.2603454589844,
|
|
"logps/ref_chosen": -251.13223266601562,
|
|
"logps/ref_rejected": -244.76016235351562,
|
|
"logps/rejected": -353.4614562988281,
|
|
"loss": 4.6495,
|
|
"margin_dpo/margin_mean": 37.57318115234375,
|
|
"margin_dpo/margin_std": 71.4426040649414,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.5486910994764398,
|
|
"fcm_dpo/beta": 0.013635975308716297,
|
|
"fcm_dpo/delta": -0.06950134038925171,
|
|
"fcm_dpo/margin": 48.76420211791992,
|
|
"fcm_dpo/q_t": 0.365522176027298,
|
|
"grad_norm": 102.4128646850586,
|
|
"learning_rate": 2.527460921992209e-07,
|
|
"logits/chosen": -0.7756036520004272,
|
|
"logits/rejected": -0.7701444625854492,
|
|
"logps/chosen": -363.00665283203125,
|
|
"logps/ref_chosen": -299.7217712402344,
|
|
"logps/ref_rejected": -277.0969543457031,
|
|
"logps/rejected": -389.14605712890625,
|
|
"loss": 4.0242,
|
|
"margin_dpo/margin_mean": 48.76420211791992,
|
|
"margin_dpo/margin_std": 65.38024139404297,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.5507853403141362,
|
|
"fcm_dpo/beta": 0.013076528906822205,
|
|
"fcm_dpo/delta": -0.0265921950340271,
|
|
"fcm_dpo/margin": 40.98373794555664,
|
|
"fcm_dpo/q_t": 0.38731229305267334,
|
|
"grad_norm": 84.20980072021484,
|
|
"learning_rate": 2.509153804294318e-07,
|
|
"logits/chosen": -0.7757068276405334,
|
|
"logits/rejected": -0.7594835758209229,
|
|
"logps/chosen": -350.37353515625,
|
|
"logps/ref_chosen": -279.95257568359375,
|
|
"logps/ref_rejected": -256.5327453613281,
|
|
"logps/rejected": -367.9373779296875,
|
|
"loss": 4.4423,
|
|
"margin_dpo/margin_mean": 40.983741760253906,
|
|
"margin_dpo/margin_std": 67.5083236694336,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.5528795811518324,
|
|
"fcm_dpo/beta": 0.012376993894577026,
|
|
"fcm_dpo/delta": -0.06797336786985397,
|
|
"fcm_dpo/margin": 49.21453857421875,
|
|
"fcm_dpo/q_t": 0.37006676197052,
|
|
"grad_norm": 106.1910400390625,
|
|
"learning_rate": 2.4908461957056825e-07,
|
|
"logits/chosen": -0.7897322177886963,
|
|
"logits/rejected": -0.7906150817871094,
|
|
"logps/chosen": -323.62689208984375,
|
|
"logps/ref_chosen": -260.53509521484375,
|
|
"logps/ref_rejected": -255.53799438476562,
|
|
"logps/rejected": -367.8443298339844,
|
|
"loss": 4.0584,
|
|
"margin_dpo/margin_mean": 49.21453857421875,
|
|
"margin_dpo/margin_std": 64.37642669677734,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.5549738219895288,
|
|
"fcm_dpo/beta": 0.011801987886428833,
|
|
"fcm_dpo/delta": -0.024343391880393028,
|
|
"fcm_dpo/margin": 52.564517974853516,
|
|
"fcm_dpo/q_t": 0.36950555443763733,
|
|
"grad_norm": 83.8652114868164,
|
|
"learning_rate": 2.4725390780077905e-07,
|
|
"logits/chosen": -0.8621577024459839,
|
|
"logits/rejected": -0.8714127540588379,
|
|
"logps/chosen": -347.90740966796875,
|
|
"logps/ref_chosen": -283.7130432128906,
|
|
"logps/ref_rejected": -270.3209533691406,
|
|
"logps/rejected": -387.079833984375,
|
|
"loss": 4.1157,
|
|
"margin_dpo/margin_mean": 52.564517974853516,
|
|
"margin_dpo/margin_std": 71.61198425292969,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.5570680628272251,
|
|
"fcm_dpo/beta": 0.01190432533621788,
|
|
"fcm_dpo/delta": -0.02119002863764763,
|
|
"fcm_dpo/margin": 51.92703628540039,
|
|
"fcm_dpo/q_t": 0.36642715334892273,
|
|
"grad_norm": 75.40443420410156,
|
|
"learning_rate": 2.454233432955807e-07,
|
|
"logits/chosen": -0.8703705072402954,
|
|
"logits/rejected": -0.8404238224029541,
|
|
"logps/chosen": -333.9207458496094,
|
|
"logps/ref_chosen": -278.09930419921875,
|
|
"logps/ref_rejected": -260.6734619140625,
|
|
"logps/rejected": -368.4219665527344,
|
|
"loss": 3.9296,
|
|
"margin_dpo/margin_mean": 51.92703628540039,
|
|
"margin_dpo/margin_std": 59.450191497802734,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.5591623036649215,
|
|
"fcm_dpo/beta": 0.011871559545397758,
|
|
"fcm_dpo/delta": 0.046678848564624786,
|
|
"fcm_dpo/margin": 42.63209533691406,
|
|
"fcm_dpo/q_t": 0.39243483543395996,
|
|
"grad_norm": 101.08575439453125,
|
|
"learning_rate": 2.435930242225919e-07,
|
|
"logits/chosen": -0.8200643658638,
|
|
"logits/rejected": -0.834830105304718,
|
|
"logps/chosen": -349.9026794433594,
|
|
"logps/ref_chosen": -280.33319091796875,
|
|
"logps/ref_rejected": -247.78099060058594,
|
|
"logps/rejected": -359.9825744628906,
|
|
"loss": 4.3181,
|
|
"margin_dpo/margin_mean": 42.63209533691406,
|
|
"margin_dpo/margin_std": 63.018951416015625,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.5612565445026177,
|
|
"fcm_dpo/beta": 0.012035196647047997,
|
|
"fcm_dpo/delta": -0.05291684344410896,
|
|
"fcm_dpo/margin": 53.979488372802734,
|
|
"fcm_dpo/q_t": 0.36334070563316345,
|
|
"grad_norm": 90.82762145996094,
|
|
"learning_rate": 2.4176304873626984e-07,
|
|
"logits/chosen": -0.7756884098052979,
|
|
"logits/rejected": -0.7557308673858643,
|
|
"logps/chosen": -370.0815734863281,
|
|
"logps/ref_chosen": -304.1787109375,
|
|
"logps/ref_rejected": -272.80316162109375,
|
|
"logps/rejected": -392.685546875,
|
|
"loss": 3.968,
|
|
"margin_dpo/margin_mean": 53.979488372802734,
|
|
"margin_dpo/margin_std": 67.94827270507812,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.5633507853403141,
|
|
"fcm_dpo/beta": 0.012536915019154549,
|
|
"fcm_dpo/delta": 0.11777209490537643,
|
|
"fcm_dpo/margin": 38.674407958984375,
|
|
"fcm_dpo/q_t": 0.39720258116722107,
|
|
"grad_norm": 126.63288116455078,
|
|
"learning_rate": 2.399335149726463e-07,
|
|
"logits/chosen": -0.8296777606010437,
|
|
"logits/rejected": -0.8268716931343079,
|
|
"logps/chosen": -321.98870849609375,
|
|
"logps/ref_chosen": -249.84512329101562,
|
|
"logps/ref_rejected": -223.37356567382812,
|
|
"logps/rejected": -334.19158935546875,
|
|
"loss": 4.5857,
|
|
"margin_dpo/margin_mean": 38.67441177368164,
|
|
"margin_dpo/margin_std": 72.29447174072266,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.5654450261780105,
|
|
"fcm_dpo/beta": 0.012732122093439102,
|
|
"fcm_dpo/delta": 0.004775438457727432,
|
|
"fcm_dpo/margin": 46.69505310058594,
|
|
"fcm_dpo/q_t": 0.3786012828350067,
|
|
"grad_norm": 100.78535461425781,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": -0.8925029635429382,
|
|
"logits/rejected": -0.9094992876052856,
|
|
"logps/chosen": -395.2410888671875,
|
|
"logps/ref_chosen": -318.5623779296875,
|
|
"logps/ref_rejected": -281.1880798339844,
|
|
"logps/rejected": -404.5618591308594,
|
|
"loss": 4.3373,
|
|
"margin_dpo/margin_mean": 46.69505310058594,
|
|
"margin_dpo/margin_std": 75.69879150390625,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5675392670157068,
|
|
"fcm_dpo/beta": 0.013183288276195526,
|
|
"fcm_dpo/delta": -0.0032483600080013275,
|
|
"fcm_dpo/margin": 45.56004333496094,
|
|
"fcm_dpo/q_t": 0.3815266489982605,
|
|
"grad_norm": 102.04464721679688,
|
|
"learning_rate": 2.3627616503391812e-07,
|
|
"logits/chosen": -0.7499503493309021,
|
|
"logits/rejected": -0.7473767995834351,
|
|
"logps/chosen": -358.6291198730469,
|
|
"logps/ref_chosen": -284.104736328125,
|
|
"logps/ref_rejected": -253.9580535888672,
|
|
"logps/rejected": -374.0425720214844,
|
|
"loss": 4.2719,
|
|
"margin_dpo/margin_mean": 45.5600471496582,
|
|
"margin_dpo/margin_std": 70.25496673583984,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.5696335078534032,
|
|
"fcm_dpo/beta": 0.01266053318977356,
|
|
"fcm_dpo/delta": -0.029928136616945267,
|
|
"fcm_dpo/margin": 49.448184967041016,
|
|
"fcm_dpo/q_t": 0.37268373370170593,
|
|
"grad_norm": 87.62028503417969,
|
|
"learning_rate": 2.344485449913914e-07,
|
|
"logits/chosen": -0.862612247467041,
|
|
"logits/rejected": -0.8508659601211548,
|
|
"logps/chosen": -367.183349609375,
|
|
"logps/ref_chosen": -297.3590087890625,
|
|
"logps/ref_rejected": -279.20196533203125,
|
|
"logps/rejected": -398.4744873046875,
|
|
"loss": 4.303,
|
|
"margin_dpo/margin_mean": 49.448184967041016,
|
|
"margin_dpo/margin_std": 78.49717712402344,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.5717277486910994,
|
|
"fcm_dpo/beta": 0.012191718444228172,
|
|
"fcm_dpo/delta": -0.02252171002328396,
|
|
"fcm_dpo/margin": 50.77750778198242,
|
|
"fcm_dpo/q_t": 0.3730708956718445,
|
|
"grad_norm": 96.84104919433594,
|
|
"learning_rate": 2.3262175892620062e-07,
|
|
"logits/chosen": -0.829898476600647,
|
|
"logits/rejected": -0.8430629968643188,
|
|
"logps/chosen": -365.34466552734375,
|
|
"logps/ref_chosen": -293.20574951171875,
|
|
"logps/ref_rejected": -274.7646789550781,
|
|
"logps/rejected": -397.68109130859375,
|
|
"loss": 4.2016,
|
|
"margin_dpo/margin_mean": 50.77750778198242,
|
|
"margin_dpo/margin_std": 74.58787536621094,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.5738219895287958,
|
|
"fcm_dpo/beta": 0.011691069230437279,
|
|
"fcm_dpo/delta": -0.11797457188367844,
|
|
"fcm_dpo/margin": 60.72999954223633,
|
|
"fcm_dpo/q_t": 0.3517453372478485,
|
|
"grad_norm": 88.58201599121094,
|
|
"learning_rate": 2.3079590480333827e-07,
|
|
"logits/chosen": -0.7908748388290405,
|
|
"logits/rejected": -0.7618493437767029,
|
|
"logps/chosen": -342.7866516113281,
|
|
"logps/ref_chosen": -270.55865478515625,
|
|
"logps/ref_rejected": -239.47048950195312,
|
|
"logps/rejected": -372.428466796875,
|
|
"loss": 3.8295,
|
|
"margin_dpo/margin_mean": 60.72999572753906,
|
|
"margin_dpo/margin_std": 72.17181396484375,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.5759162303664922,
|
|
"fcm_dpo/beta": 0.010889939963817596,
|
|
"fcm_dpo/delta": -0.05324774980545044,
|
|
"fcm_dpo/margin": 59.565277099609375,
|
|
"fcm_dpo/q_t": 0.3633711040019989,
|
|
"grad_norm": 70.90768432617188,
|
|
"learning_rate": 2.2897108053782e-07,
|
|
"logits/chosen": -0.8442721962928772,
|
|
"logits/rejected": -0.8297668099403381,
|
|
"logps/chosen": -315.0128173828125,
|
|
"logps/ref_chosen": -250.31922912597656,
|
|
"logps/ref_rejected": -249.3187255859375,
|
|
"logps/rejected": -373.57757568359375,
|
|
"loss": 3.905,
|
|
"margin_dpo/margin_mean": 59.56527328491211,
|
|
"margin_dpo/margin_std": 71.25727844238281,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.5780104712041885,
|
|
"fcm_dpo/beta": 0.010673362761735916,
|
|
"fcm_dpo/delta": 0.0510733537375927,
|
|
"fcm_dpo/margin": 51.6572265625,
|
|
"fcm_dpo/q_t": 0.38332486152648926,
|
|
"grad_norm": 80.28546905517578,
|
|
"learning_rate": 2.2714738398943308e-07,
|
|
"logits/chosen": -0.910760223865509,
|
|
"logits/rejected": -0.8880026340484619,
|
|
"logps/chosen": -372.0062561035156,
|
|
"logps/ref_chosen": -297.6310729980469,
|
|
"logps/ref_rejected": -295.225830078125,
|
|
"logps/rejected": -421.25823974609375,
|
|
"loss": 4.2562,
|
|
"margin_dpo/margin_mean": 51.6572265625,
|
|
"margin_dpo/margin_std": 74.79243469238281,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.5801047120418849,
|
|
"fcm_dpo/beta": 0.01158787589520216,
|
|
"fcm_dpo/delta": 0.07226106524467468,
|
|
"fcm_dpo/margin": 45.672664642333984,
|
|
"fcm_dpo/q_t": 0.38873162865638733,
|
|
"grad_norm": 104.22013092041016,
|
|
"learning_rate": 2.2532491295748865e-07,
|
|
"logits/chosen": -0.8400160074234009,
|
|
"logits/rejected": -0.8426806330680847,
|
|
"logps/chosen": -344.77178955078125,
|
|
"logps/ref_chosen": -266.3604736328125,
|
|
"logps/ref_rejected": -253.36767578125,
|
|
"logps/rejected": -377.45166015625,
|
|
"loss": 4.4179,
|
|
"margin_dpo/margin_mean": 45.672664642333984,
|
|
"margin_dpo/margin_std": 74.9579849243164,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.5821989528795811,
|
|
"fcm_dpo/beta": 0.0121334008872509,
|
|
"fcm_dpo/delta": 0.04825280234217644,
|
|
"fcm_dpo/margin": 33.917503356933594,
|
|
"fcm_dpo/q_t": 0.420282781124115,
|
|
"grad_norm": 115.2516860961914,
|
|
"learning_rate": 2.2350376517557726e-07,
|
|
"logits/chosen": -0.8667393326759338,
|
|
"logits/rejected": -0.8342878222465515,
|
|
"logps/chosen": -357.53857421875,
|
|
"logps/ref_chosen": -267.40728759765625,
|
|
"logps/ref_rejected": -229.5758514404297,
|
|
"logps/rejected": -353.6246337890625,
|
|
"loss": 4.9917,
|
|
"margin_dpo/margin_mean": 33.917503356933594,
|
|
"margin_dpo/margin_std": 81.22914123535156,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.5842931937172775,
|
|
"fcm_dpo/beta": 0.011737332679331303,
|
|
"fcm_dpo/delta": -0.12058336287736893,
|
|
"fcm_dpo/margin": 55.69242858886719,
|
|
"fcm_dpo/q_t": 0.3652134835720062,
|
|
"grad_norm": 112.99444580078125,
|
|
"learning_rate": 2.2168403830632769e-07,
|
|
"logits/chosen": -0.781296968460083,
|
|
"logits/rejected": -0.7669795751571655,
|
|
"logps/chosen": -393.08892822265625,
|
|
"logps/ref_chosen": -313.3677978515625,
|
|
"logps/ref_rejected": -299.1744384765625,
|
|
"logps/rejected": -434.5880126953125,
|
|
"loss": 4.1187,
|
|
"margin_dpo/margin_mean": 55.69242477416992,
|
|
"margin_dpo/margin_std": 78.3192138671875,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.5863874345549738,
|
|
"fcm_dpo/beta": 0.011380909010767937,
|
|
"fcm_dpo/delta": 0.04981427267193794,
|
|
"fcm_dpo/margin": 48.567710876464844,
|
|
"fcm_dpo/q_t": 0.3854876160621643,
|
|
"grad_norm": 81.46392059326172,
|
|
"learning_rate": 2.1986582993616925e-07,
|
|
"logits/chosen": -0.8543354272842407,
|
|
"logits/rejected": -0.8661242127418518,
|
|
"logps/chosen": -334.360595703125,
|
|
"logps/ref_chosen": -265.5558166503906,
|
|
"logps/ref_rejected": -247.1573944091797,
|
|
"logps/rejected": -364.5298767089844,
|
|
"loss": 4.3441,
|
|
"margin_dpo/margin_mean": 48.567710876464844,
|
|
"margin_dpo/margin_std": 78.15949249267578,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.5884816753926702,
|
|
"fcm_dpo/beta": 0.011563065461814404,
|
|
"fcm_dpo/delta": 0.030172260478138924,
|
|
"fcm_dpo/margin": 49.29734802246094,
|
|
"fcm_dpo/q_t": 0.3839731514453888,
|
|
"grad_norm": 101.9212875366211,
|
|
"learning_rate": 2.1804923757009882e-07,
|
|
"logits/chosen": -0.8250092267990112,
|
|
"logits/rejected": -0.8347154855728149,
|
|
"logps/chosen": -380.861328125,
|
|
"logps/ref_chosen": -295.2995910644531,
|
|
"logps/ref_rejected": -293.80877685546875,
|
|
"logps/rejected": -428.6678161621094,
|
|
"loss": 4.2825,
|
|
"margin_dpo/margin_mean": 49.29734420776367,
|
|
"margin_dpo/margin_std": 74.99603271484375,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.5905759162303665,
|
|
"fcm_dpo/beta": 0.011664286255836487,
|
|
"fcm_dpo/delta": -0.011024218052625656,
|
|
"fcm_dpo/margin": 52.18028259277344,
|
|
"fcm_dpo/q_t": 0.3756002187728882,
|
|
"grad_norm": 89.68161010742188,
|
|
"learning_rate": 2.1623435862645205e-07,
|
|
"logits/chosen": -0.8206506967544556,
|
|
"logits/rejected": -0.8235145211219788,
|
|
"logps/chosen": -391.75213623046875,
|
|
"logps/ref_chosen": -318.63714599609375,
|
|
"logps/ref_rejected": -273.5943603515625,
|
|
"logps/rejected": -398.88958740234375,
|
|
"loss": 4.2229,
|
|
"margin_dpo/margin_mean": 52.18027877807617,
|
|
"margin_dpo/margin_std": 77.2578353881836,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.5926701570680628,
|
|
"fcm_dpo/beta": 0.012338871136307716,
|
|
"fcm_dpo/delta": 0.04455633834004402,
|
|
"fcm_dpo/margin": 44.96173858642578,
|
|
"fcm_dpo/q_t": 0.3896506428718567,
|
|
"grad_norm": 90.43144989013672,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": -0.8331937193870544,
|
|
"logits/rejected": -0.8291042447090149,
|
|
"logps/chosen": -333.6814270019531,
|
|
"logps/ref_chosen": -254.66053771972656,
|
|
"logps/ref_rejected": -236.8627166748047,
|
|
"logps/rejected": -360.8453369140625,
|
|
"loss": 4.3718,
|
|
"margin_dpo/margin_mean": 44.96173858642578,
|
|
"margin_dpo/margin_std": 73.44696044921875,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.5947643979057592,
|
|
"fcm_dpo/beta": 0.011935784481465816,
|
|
"fcm_dpo/delta": -0.03917480632662773,
|
|
"fcm_dpo/margin": 53.04762649536133,
|
|
"fcm_dpo/q_t": 0.37296316027641296,
|
|
"grad_norm": 118.94564819335938,
|
|
"learning_rate": 2.1261013021512378e-07,
|
|
"logits/chosen": -0.8013940453529358,
|
|
"logits/rejected": -0.7824323773384094,
|
|
"logps/chosen": -353.3193359375,
|
|
"logps/ref_chosen": -273.355224609375,
|
|
"logps/ref_rejected": -259.84759521484375,
|
|
"logps/rejected": -392.8592834472656,
|
|
"loss": 4.2865,
|
|
"margin_dpo/margin_mean": 53.047630310058594,
|
|
"margin_dpo/margin_std": 81.07416534423828,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.5968586387434555,
|
|
"fcm_dpo/beta": 0.012555155903100967,
|
|
"fcm_dpo/delta": 0.06368312239646912,
|
|
"fcm_dpo/margin": 36.739646911621094,
|
|
"fcm_dpo/q_t": 0.40852105617523193,
|
|
"grad_norm": 148.76206970214844,
|
|
"learning_rate": 2.1080097510381294e-07,
|
|
"logits/chosen": -0.8108698725700378,
|
|
"logits/rejected": -0.8106898069381714,
|
|
"logps/chosen": -394.7806701660156,
|
|
"logps/ref_chosen": -309.8022155761719,
|
|
"logps/ref_rejected": -279.11846923828125,
|
|
"logps/rejected": -400.8365478515625,
|
|
"loss": 4.7712,
|
|
"margin_dpo/margin_mean": 36.739646911621094,
|
|
"margin_dpo/margin_std": 75.5753402709961,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.5989528795811518,
|
|
"fcm_dpo/beta": 0.012427356094121933,
|
|
"fcm_dpo/delta": 0.030025284737348557,
|
|
"fcm_dpo/margin": 45.97602844238281,
|
|
"fcm_dpo/q_t": 0.38827937841415405,
|
|
"grad_norm": 124.07766723632812,
|
|
"learning_rate": 2.089939221172446e-07,
|
|
"logits/chosen": -0.8039661049842834,
|
|
"logits/rejected": -0.7928801774978638,
|
|
"logps/chosen": -349.1395568847656,
|
|
"logps/ref_chosen": -271.4655456542969,
|
|
"logps/ref_rejected": -279.531494140625,
|
|
"logps/rejected": -403.1815185546875,
|
|
"loss": 4.4449,
|
|
"margin_dpo/margin_mean": 45.97602844238281,
|
|
"margin_dpo/margin_std": 79.96269989013672,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.6010471204188481,
|
|
"fcm_dpo/beta": 0.012432662770152092,
|
|
"fcm_dpo/delta": -0.020902253687381744,
|
|
"fcm_dpo/margin": 49.71929931640625,
|
|
"fcm_dpo/q_t": 0.37481507658958435,
|
|
"grad_norm": 98.54369354248047,
|
|
"learning_rate": 2.0718906816218595e-07,
|
|
"logits/chosen": -0.8174068331718445,
|
|
"logits/rejected": -0.8055183291435242,
|
|
"logps/chosen": -350.4097595214844,
|
|
"logps/ref_chosen": -277.0932312011719,
|
|
"logps/ref_rejected": -233.55599975585938,
|
|
"logps/rejected": -356.591796875,
|
|
"loss": 4.3139,
|
|
"margin_dpo/margin_mean": 49.71929931640625,
|
|
"margin_dpo/margin_std": 79.30457305908203,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.6031413612565445,
|
|
"fcm_dpo/beta": 0.01297105010598898,
|
|
"fcm_dpo/delta": -0.004447203129529953,
|
|
"fcm_dpo/margin": 46.434669494628906,
|
|
"fcm_dpo/q_t": 0.3777006268501282,
|
|
"grad_norm": 120.47964477539062,
|
|
"learning_rate": 2.053865100274774e-07,
|
|
"logits/chosen": -0.8263804316520691,
|
|
"logits/rejected": -0.8423773050308228,
|
|
"logps/chosen": -362.74114990234375,
|
|
"logps/ref_chosen": -293.1681823730469,
|
|
"logps/ref_rejected": -263.4059143066406,
|
|
"logps/rejected": -379.41351318359375,
|
|
"loss": 4.2667,
|
|
"margin_dpo/margin_mean": 46.43466567993164,
|
|
"margin_dpo/margin_std": 71.86286926269531,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.6052356020942409,
|
|
"fcm_dpo/beta": 0.013167420402169228,
|
|
"fcm_dpo/delta": 0.1251918226480484,
|
|
"fcm_dpo/margin": 32.636474609375,
|
|
"fcm_dpo/q_t": 0.41369497776031494,
|
|
"grad_norm": 108.58908081054688,
|
|
"learning_rate": 2.035863443788411e-07,
|
|
"logits/chosen": -0.8092857599258423,
|
|
"logits/rejected": -0.7957339882850647,
|
|
"logps/chosen": -412.3819580078125,
|
|
"logps/ref_chosen": -329.9574279785156,
|
|
"logps/ref_rejected": -276.7565002441406,
|
|
"logps/rejected": -391.8175048828125,
|
|
"loss": 4.8066,
|
|
"margin_dpo/margin_mean": 32.636474609375,
|
|
"margin_dpo/margin_std": 70.96094512939453,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.6073298429319371,
|
|
"fcm_dpo/beta": 0.012977060861885548,
|
|
"fcm_dpo/delta": -0.08185821771621704,
|
|
"fcm_dpo/margin": 44.748985290527344,
|
|
"fcm_dpo/q_t": 0.38607901334762573,
|
|
"grad_norm": 140.78160095214844,
|
|
"learning_rate": 2.0178866775369774e-07,
|
|
"logits/chosen": -0.8182957172393799,
|
|
"logits/rejected": -0.7599232196807861,
|
|
"logps/chosen": -399.31103515625,
|
|
"logps/ref_chosen": -324.6690673828125,
|
|
"logps/ref_rejected": -311.8439636230469,
|
|
"logps/rejected": -431.23492431640625,
|
|
"loss": 4.4597,
|
|
"margin_dpo/margin_mean": 44.74897766113281,
|
|
"margin_dpo/margin_std": 74.92218780517578,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6094240837696335,
|
|
"fcm_dpo/beta": 0.012201309204101562,
|
|
"fcm_dpo/delta": -0.08980172872543335,
|
|
"fcm_dpo/margin": 55.73931121826172,
|
|
"fcm_dpo/q_t": 0.3614313304424286,
|
|
"grad_norm": 100.59260559082031,
|
|
"learning_rate": 1.9999357655598891e-07,
|
|
"logits/chosen": -0.7959886193275452,
|
|
"logits/rejected": -0.789124608039856,
|
|
"logps/chosen": -342.9815673828125,
|
|
"logps/ref_chosen": -274.1440734863281,
|
|
"logps/ref_rejected": -278.07208251953125,
|
|
"logps/rejected": -402.6488342285156,
|
|
"loss": 3.9778,
|
|
"margin_dpo/margin_mean": 55.73931121826172,
|
|
"margin_dpo/margin_std": 71.53327941894531,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.6115183246073298,
|
|
"fcm_dpo/beta": 0.012481886893510818,
|
|
"fcm_dpo/delta": 0.098934106528759,
|
|
"fcm_dpo/margin": 40.45347213745117,
|
|
"fcm_dpo/q_t": 0.39631906151771545,
|
|
"grad_norm": 105.07350158691406,
|
|
"learning_rate": 1.9820116705100775e-07,
|
|
"logits/chosen": -0.7960292100906372,
|
|
"logits/rejected": -0.7907694578170776,
|
|
"logps/chosen": -324.7724914550781,
|
|
"logps/ref_chosen": -259.3636779785156,
|
|
"logps/ref_rejected": -279.30218505859375,
|
|
"logps/rejected": -385.16448974609375,
|
|
"loss": 4.5424,
|
|
"margin_dpo/margin_mean": 40.45347213745117,
|
|
"margin_dpo/margin_std": 71.65106964111328,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.6136125654450262,
|
|
"fcm_dpo/beta": 0.012850621715188026,
|
|
"fcm_dpo/delta": -0.047699183225631714,
|
|
"fcm_dpo/margin": 50.01988983154297,
|
|
"fcm_dpo/q_t": 0.3677240014076233,
|
|
"grad_norm": 105.40121459960938,
|
|
"learning_rate": 1.9641153536023642e-07,
|
|
"logits/chosen": -0.8889198899269104,
|
|
"logits/rejected": -0.8521823287010193,
|
|
"logps/chosen": -376.5350646972656,
|
|
"logps/ref_chosen": -303.77081298828125,
|
|
"logps/ref_rejected": -270.07513427734375,
|
|
"logps/rejected": -392.8592224121094,
|
|
"loss": 4.0238,
|
|
"margin_dpo/margin_mean": 50.01988220214844,
|
|
"margin_dpo/margin_std": 65.39283752441406,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.6157068062827226,
|
|
"fcm_dpo/beta": 0.012599381618201733,
|
|
"fcm_dpo/delta": -0.005292973015457392,
|
|
"fcm_dpo/margin": 47.930747985839844,
|
|
"fcm_dpo/q_t": 0.3788926601409912,
|
|
"grad_norm": 105.63341522216797,
|
|
"learning_rate": 1.9462477745619106e-07,
|
|
"logits/chosen": -0.795003354549408,
|
|
"logits/rejected": -0.8052266240119934,
|
|
"logps/chosen": -302.889892578125,
|
|
"logps/ref_chosen": -240.23831176757812,
|
|
"logps/ref_rejected": -229.187744140625,
|
|
"logps/rejected": -339.77008056640625,
|
|
"loss": 4.1926,
|
|
"margin_dpo/margin_mean": 47.93075180053711,
|
|
"margin_dpo/margin_std": 71.22593688964844,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.6178010471204188,
|
|
"fcm_dpo/beta": 0.012672440148890018,
|
|
"fcm_dpo/delta": 0.043098170310258865,
|
|
"fcm_dpo/margin": 44.029075622558594,
|
|
"fcm_dpo/q_t": 0.38533294200897217,
|
|
"grad_norm": 89.81253814697266,
|
|
"learning_rate": 1.928409891572757e-07,
|
|
"logits/chosen": -0.7766979932785034,
|
|
"logits/rejected": -0.7932155728340149,
|
|
"logps/chosen": -319.8603210449219,
|
|
"logps/ref_chosen": -251.00970458984375,
|
|
"logps/ref_rejected": -244.15142822265625,
|
|
"logps/rejected": -357.0310974121094,
|
|
"loss": 4.3008,
|
|
"margin_dpo/margin_mean": 44.029075622558594,
|
|
"margin_dpo/margin_std": 67.06430053710938,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.6198952879581152,
|
|
"fcm_dpo/beta": 0.012121832929551601,
|
|
"fcm_dpo/delta": -0.1331343948841095,
|
|
"fcm_dpo/margin": 59.48346710205078,
|
|
"fcm_dpo/q_t": 0.3515579402446747,
|
|
"grad_norm": 86.63916015625,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": -0.7734822034835815,
|
|
"logits/rejected": -0.7490954995155334,
|
|
"logps/chosen": -363.75494384765625,
|
|
"logps/ref_chosen": -293.880615234375,
|
|
"logps/ref_rejected": -283.4175720214844,
|
|
"logps/rejected": -412.775390625,
|
|
"loss": 3.9594,
|
|
"margin_dpo/margin_mean": 59.48346710205078,
|
|
"margin_dpo/margin_std": 77.31208801269531,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.6219895287958115,
|
|
"fcm_dpo/beta": 0.011517000384628773,
|
|
"fcm_dpo/delta": 0.023262428119778633,
|
|
"fcm_dpo/margin": 41.557411193847656,
|
|
"fcm_dpo/q_t": 0.39879322052001953,
|
|
"grad_norm": 90.58515930175781,
|
|
"learning_rate": 1.8928270384706582e-07,
|
|
"logits/chosen": -0.8670139312744141,
|
|
"logits/rejected": -0.8624626994132996,
|
|
"logps/chosen": -358.82000732421875,
|
|
"logps/ref_chosen": -289.4600830078125,
|
|
"logps/ref_rejected": -283.69110107421875,
|
|
"logps/rejected": -394.60845947265625,
|
|
"loss": 4.4825,
|
|
"margin_dpo/margin_mean": 41.557411193847656,
|
|
"margin_dpo/margin_std": 69.22006225585938,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.6240837696335079,
|
|
"fcm_dpo/beta": 0.01156252808868885,
|
|
"fcm_dpo/delta": -0.06845314055681229,
|
|
"fcm_dpo/margin": 48.89856719970703,
|
|
"fcm_dpo/q_t": 0.38479888439178467,
|
|
"grad_norm": 105.2696533203125,
|
|
"learning_rate": 1.875083976558136e-07,
|
|
"logits/chosen": -0.7988805770874023,
|
|
"logits/rejected": -0.7908245921134949,
|
|
"logps/chosen": -369.48431396484375,
|
|
"logps/ref_chosen": -306.5150146484375,
|
|
"logps/ref_rejected": -280.6969909667969,
|
|
"logps/rejected": -392.5648193359375,
|
|
"loss": 4.3527,
|
|
"margin_dpo/margin_mean": 48.89856719970703,
|
|
"margin_dpo/margin_std": 77.35851287841797,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.6261780104712041,
|
|
"fcm_dpo/beta": 0.01131986640393734,
|
|
"fcm_dpo/delta": 0.04895632341504097,
|
|
"fcm_dpo/margin": 43.01826477050781,
|
|
"fcm_dpo/q_t": 0.39540231227874756,
|
|
"grad_norm": 94.2397232055664,
|
|
"learning_rate": 1.8573744269954297e-07,
|
|
"logits/chosen": -0.7741419076919556,
|
|
"logits/rejected": -0.7654407024383545,
|
|
"logps/chosen": -358.94085693359375,
|
|
"logps/ref_chosen": -281.36376953125,
|
|
"logps/ref_rejected": -270.39508056640625,
|
|
"logps/rejected": -390.9903869628906,
|
|
"loss": 4.399,
|
|
"margin_dpo/margin_mean": 43.01826477050781,
|
|
"margin_dpo/margin_std": 66.27005767822266,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"fcm_dpo/beta": 0.012323617935180664,
|
|
"fcm_dpo/delta": 0.09153569489717484,
|
|
"fcm_dpo/margin": 41.455352783203125,
|
|
"fcm_dpo/q_t": 0.39375266432762146,
|
|
"grad_norm": 146.0800323486328,
|
|
"learning_rate": 1.839699339491937e-07,
|
|
"logits/chosen": -0.81211256980896,
|
|
"logits/rejected": -0.788737952709198,
|
|
"logps/chosen": -392.5552978515625,
|
|
"logps/ref_chosen": -314.83575439453125,
|
|
"logps/ref_rejected": -269.1154479980469,
|
|
"logps/rejected": -388.29034423828125,
|
|
"loss": 4.4804,
|
|
"margin_dpo/margin_mean": 41.45535659790039,
|
|
"margin_dpo/margin_std": 71.92596435546875,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6303664921465969,
|
|
"fcm_dpo/beta": 0.0128701226785779,
|
|
"fcm_dpo/delta": 0.04166974872350693,
|
|
"fcm_dpo/margin": 43.445735931396484,
|
|
"fcm_dpo/q_t": 0.3868556618690491,
|
|
"grad_norm": 91.15668487548828,
|
|
"learning_rate": 1.8220596619089573e-07,
|
|
"logits/chosen": -0.814331591129303,
|
|
"logits/rejected": -0.8293969035148621,
|
|
"logps/chosen": -353.07177734375,
|
|
"logps/ref_chosen": -279.89453125,
|
|
"logps/ref_rejected": -271.6694641113281,
|
|
"logps/rejected": -388.2925109863281,
|
|
"loss": 4.3454,
|
|
"margin_dpo/margin_mean": 43.44573974609375,
|
|
"margin_dpo/margin_std": 68.72089385986328,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.6324607329842932,
|
|
"fcm_dpo/beta": 0.012415561825037003,
|
|
"fcm_dpo/delta": -0.08762803673744202,
|
|
"fcm_dpo/margin": 54.83326721191406,
|
|
"fcm_dpo/q_t": 0.3612514138221741,
|
|
"grad_norm": 117.01398468017578,
|
|
"learning_rate": 1.8044563402088682e-07,
|
|
"logits/chosen": -0.7894245386123657,
|
|
"logits/rejected": -0.7746908068656921,
|
|
"logps/chosen": -341.82904052734375,
|
|
"logps/ref_chosen": -271.3318176269531,
|
|
"logps/ref_rejected": -256.5587158203125,
|
|
"logps/rejected": -381.88922119140625,
|
|
"loss": 4.0128,
|
|
"margin_dpo/margin_mean": 54.83326721191406,
|
|
"margin_dpo/margin_std": 74.75352478027344,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.6345549738219896,
|
|
"fcm_dpo/beta": 0.012025467120110989,
|
|
"fcm_dpo/delta": -0.04249938949942589,
|
|
"fcm_dpo/margin": 48.98454666137695,
|
|
"fcm_dpo/q_t": 0.3781017065048218,
|
|
"grad_norm": 116.30992126464844,
|
|
"learning_rate": 1.7868903184043885e-07,
|
|
"logits/chosen": -0.7718071937561035,
|
|
"logits/rejected": -0.7559300661087036,
|
|
"logps/chosen": -381.57781982421875,
|
|
"logps/ref_chosen": -304.88104248046875,
|
|
"logps/ref_rejected": -269.063720703125,
|
|
"logps/rejected": -394.7451171875,
|
|
"loss": 4.2912,
|
|
"margin_dpo/margin_mean": 48.98455047607422,
|
|
"margin_dpo/margin_std": 75.46881103515625,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.6366492146596858,
|
|
"fcm_dpo/beta": 0.011567480862140656,
|
|
"fcm_dpo/delta": -0.02077743411064148,
|
|
"fcm_dpo/margin": 53.529052734375,
|
|
"fcm_dpo/q_t": 0.37393832206726074,
|
|
"grad_norm": 108.21533966064453,
|
|
"learning_rate": 1.7693625385079574e-07,
|
|
"logits/chosen": -0.7794772982597351,
|
|
"logits/rejected": -0.7964142560958862,
|
|
"logps/chosen": -375.22418212890625,
|
|
"logps/ref_chosen": -290.7109680175781,
|
|
"logps/ref_rejected": -237.6885986328125,
|
|
"logps/rejected": -375.7308654785156,
|
|
"loss": 4.1337,
|
|
"margin_dpo/margin_mean": 53.529056549072266,
|
|
"margin_dpo/margin_std": 77.61477661132812,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.6387434554973822,
|
|
"fcm_dpo/beta": 0.010576148517429829,
|
|
"fcm_dpo/delta": -0.1733783483505249,
|
|
"fcm_dpo/margin": 71.53874206542969,
|
|
"fcm_dpo/q_t": 0.3409091830253601,
|
|
"grad_norm": 89.68358612060547,
|
|
"learning_rate": 1.7518739404812155e-07,
|
|
"logits/chosen": -0.8426798582077026,
|
|
"logits/rejected": -0.8134666085243225,
|
|
"logps/chosen": -331.08544921875,
|
|
"logps/ref_chosen": -256.4839782714844,
|
|
"logps/ref_rejected": -266.4063415527344,
|
|
"logps/rejected": -412.5465087890625,
|
|
"loss": 3.7185,
|
|
"margin_dpo/margin_mean": 71.53873443603516,
|
|
"margin_dpo/margin_std": 78.6550064086914,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.6408376963350786,
|
|
"fcm_dpo/beta": 0.010183380916714668,
|
|
"fcm_dpo/delta": 0.026483479887247086,
|
|
"fcm_dpo/margin": 45.928916931152344,
|
|
"fcm_dpo/q_t": 0.3991745114326477,
|
|
"grad_norm": 85.03260803222656,
|
|
"learning_rate": 1.7344254621846017e-07,
|
|
"logits/chosen": -0.8300163745880127,
|
|
"logits/rejected": -0.8189243078231812,
|
|
"logps/chosen": -402.5002746582031,
|
|
"logps/ref_chosen": -320.6492004394531,
|
|
"logps/ref_rejected": -273.36773681640625,
|
|
"logps/rejected": -401.1476745605469,
|
|
"loss": 4.3841,
|
|
"margin_dpo/margin_mean": 45.92892074584961,
|
|
"margin_dpo/margin_std": 69.21966552734375,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.6429319371727749,
|
|
"fcm_dpo/beta": 0.010290293022990227,
|
|
"fcm_dpo/delta": -0.004134609363973141,
|
|
"fcm_dpo/margin": 51.01483154296875,
|
|
"fcm_dpo/q_t": 0.38504621386528015,
|
|
"grad_norm": 133.7060546875,
|
|
"learning_rate": 1.717018039327053e-07,
|
|
"logits/chosen": -0.7672021389007568,
|
|
"logits/rejected": -0.8132136464118958,
|
|
"logps/chosen": -379.48583984375,
|
|
"logps/ref_chosen": -279.4541931152344,
|
|
"logps/ref_rejected": -240.3796844482422,
|
|
"logps/rejected": -391.4261474609375,
|
|
"loss": 4.2074,
|
|
"margin_dpo/margin_mean": 51.01482391357422,
|
|
"margin_dpo/margin_std": 66.68528747558594,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.6450261780104712,
|
|
"fcm_dpo/beta": 0.010651452466845512,
|
|
"fcm_dpo/delta": 0.11725203692913055,
|
|
"fcm_dpo/margin": 41.120540618896484,
|
|
"fcm_dpo/q_t": 0.4069848656654358,
|
|
"grad_norm": 93.3102035522461,
|
|
"learning_rate": 1.699652605415828e-07,
|
|
"logits/chosen": -0.8160425424575806,
|
|
"logits/rejected": -0.8356633186340332,
|
|
"logps/chosen": -400.3565673828125,
|
|
"logps/ref_chosen": -296.598388671875,
|
|
"logps/ref_rejected": -258.6953430175781,
|
|
"logps/rejected": -403.57403564453125,
|
|
"loss": 4.584,
|
|
"margin_dpo/margin_mean": 41.120540618896484,
|
|
"margin_dpo/margin_std": 74.20211791992188,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.6471204188481675,
|
|
"fcm_dpo/beta": 0.011160111054778099,
|
|
"fcm_dpo/delta": -0.028207721188664436,
|
|
"fcm_dpo/margin": 56.09125518798828,
|
|
"fcm_dpo/q_t": 0.3682219386100769,
|
|
"grad_norm": 90.17739868164062,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": -0.8190463781356812,
|
|
"logits/rejected": -0.832842230796814,
|
|
"logps/chosen": -382.3316955566406,
|
|
"logps/ref_chosen": -281.3881530761719,
|
|
"logps/ref_rejected": -262.458740234375,
|
|
"logps/rejected": -419.4935302734375,
|
|
"loss": 4.0162,
|
|
"margin_dpo/margin_mean": 56.09125518798828,
|
|
"margin_dpo/margin_std": 73.06241607666016,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.6492146596858639,
|
|
"fcm_dpo/beta": 0.011178172193467617,
|
|
"fcm_dpo/delta": 0.008381815627217293,
|
|
"fcm_dpo/margin": 52.827457427978516,
|
|
"fcm_dpo/q_t": 0.3756198287010193,
|
|
"grad_norm": 120.2673568725586,
|
|
"learning_rate": 1.6650514271527465e-07,
|
|
"logits/chosen": -0.8169420957565308,
|
|
"logits/rejected": -0.7943635582923889,
|
|
"logps/chosen": -377.6767578125,
|
|
"logps/ref_chosen": -279.1872863769531,
|
|
"logps/ref_rejected": -261.8279724121094,
|
|
"logps/rejected": -413.1448974609375,
|
|
"loss": 4.1525,
|
|
"margin_dpo/margin_mean": 52.82746124267578,
|
|
"margin_dpo/margin_std": 73.31507873535156,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6513089005235602,
|
|
"fcm_dpo/beta": 0.011097338050603867,
|
|
"fcm_dpo/delta": 0.006144438870251179,
|
|
"fcm_dpo/margin": 53.27901840209961,
|
|
"fcm_dpo/q_t": 0.37421154975891113,
|
|
"grad_norm": 124.52708435058594,
|
|
"learning_rate": 1.647817538357072e-07,
|
|
"logits/chosen": -0.8149024844169617,
|
|
"logits/rejected": -0.7987397909164429,
|
|
"logps/chosen": -371.7431335449219,
|
|
"logps/ref_chosen": -271.39813232421875,
|
|
"logps/ref_rejected": -266.12701416015625,
|
|
"logps/rejected": -419.75103759765625,
|
|
"loss": 4.2385,
|
|
"margin_dpo/margin_mean": 53.279022216796875,
|
|
"margin_dpo/margin_std": 77.10747528076172,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.6534031413612565,
|
|
"fcm_dpo/beta": 0.011262207292020321,
|
|
"fcm_dpo/delta": 0.04970509931445122,
|
|
"fcm_dpo/margin": 48.75176239013672,
|
|
"fcm_dpo/q_t": 0.3897601068019867,
|
|
"grad_norm": 105.6718521118164,
|
|
"learning_rate": 1.6306293495205755e-07,
|
|
"logits/chosen": -0.8199286460876465,
|
|
"logits/rejected": -0.8051372766494751,
|
|
"logps/chosen": -381.46502685546875,
|
|
"logps/ref_chosen": -282.3850402832031,
|
|
"logps/ref_rejected": -246.35389709472656,
|
|
"logps/rejected": -394.1856689453125,
|
|
"loss": 4.5143,
|
|
"margin_dpo/margin_mean": 48.75176239013672,
|
|
"margin_dpo/margin_std": 85.32847595214844,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.6554973821989529,
|
|
"fcm_dpo/beta": 0.011512380093336105,
|
|
"fcm_dpo/delta": -0.049063071608543396,
|
|
"fcm_dpo/margin": 51.31576156616211,
|
|
"fcm_dpo/q_t": 0.3808843493461609,
|
|
"grad_norm": 92.33170318603516,
|
|
"learning_rate": 1.6134877823936607e-07,
|
|
"logits/chosen": -0.8606759309768677,
|
|
"logits/rejected": -0.8551607131958008,
|
|
"logps/chosen": -401.7897033691406,
|
|
"logps/ref_chosen": -303.630859375,
|
|
"logps/ref_rejected": -273.1156921386719,
|
|
"logps/rejected": -422.5903015136719,
|
|
"loss": 4.3579,
|
|
"margin_dpo/margin_mean": 51.31576156616211,
|
|
"margin_dpo/margin_std": 80.45419311523438,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.6575916230366492,
|
|
"fcm_dpo/beta": 0.011472068727016449,
|
|
"fcm_dpo/delta": 0.017066676169633865,
|
|
"fcm_dpo/margin": 50.73048782348633,
|
|
"fcm_dpo/q_t": 0.3782009482383728,
|
|
"grad_norm": 96.01194763183594,
|
|
"learning_rate": 1.5963937562265522e-07,
|
|
"logits/chosen": -0.8795362114906311,
|
|
"logits/rejected": -0.8655129671096802,
|
|
"logps/chosen": -394.734619140625,
|
|
"logps/ref_chosen": -302.3042907714844,
|
|
"logps/ref_rejected": -273.6416015625,
|
|
"logps/rejected": -416.8023681640625,
|
|
"loss": 4.2074,
|
|
"margin_dpo/margin_mean": 50.73048782348633,
|
|
"margin_dpo/margin_std": 72.94244384765625,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.6596858638743456,
|
|
"fcm_dpo/beta": 0.011077978648245335,
|
|
"fcm_dpo/delta": -0.06065046787261963,
|
|
"fcm_dpo/margin": 59.16315841674805,
|
|
"fcm_dpo/q_t": 0.3630554676055908,
|
|
"grad_norm": 93.33431243896484,
|
|
"learning_rate": 1.5793481877199943e-07,
|
|
"logits/chosen": -0.8478763103485107,
|
|
"logits/rejected": -0.834101140499115,
|
|
"logps/chosen": -394.13946533203125,
|
|
"logps/ref_chosen": -302.729248046875,
|
|
"logps/ref_rejected": -270.26910400390625,
|
|
"logps/rejected": -420.8424377441406,
|
|
"loss": 4.0009,
|
|
"margin_dpo/margin_mean": 59.16315841674805,
|
|
"margin_dpo/margin_std": 75.37049102783203,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.6617801047120419,
|
|
"fcm_dpo/beta": 0.010612869635224342,
|
|
"fcm_dpo/delta": -0.011615972965955734,
|
|
"fcm_dpo/margin": 57.42588806152344,
|
|
"fcm_dpo/q_t": 0.37472862005233765,
|
|
"grad_norm": 80.97152709960938,
|
|
"learning_rate": 1.562351990976095e-07,
|
|
"logits/chosen": -0.8666278123855591,
|
|
"logits/rejected": -0.8581745624542236,
|
|
"logps/chosen": -398.5106506347656,
|
|
"logps/ref_chosen": -310.5706481933594,
|
|
"logps/ref_rejected": -272.9354553222656,
|
|
"logps/rejected": -418.3013610839844,
|
|
"loss": 4.1489,
|
|
"margin_dpo/margin_mean": 57.42588806152344,
|
|
"margin_dpo/margin_std": 81.77729797363281,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.6638743455497382,
|
|
"fcm_dpo/beta": 0.010676562786102295,
|
|
"fcm_dpo/delta": 0.02444746345281601,
|
|
"fcm_dpo/margin": 54.00886535644531,
|
|
"fcm_dpo/q_t": 0.3743758201599121,
|
|
"grad_norm": 83.4330825805664,
|
|
"learning_rate": 1.5454060774493065e-07,
|
|
"logits/chosen": -0.8651271462440491,
|
|
"logits/rejected": -0.8354383111000061,
|
|
"logps/chosen": -327.0922546386719,
|
|
"logps/ref_chosen": -253.90036010742188,
|
|
"logps/ref_rejected": -218.74078369140625,
|
|
"logps/rejected": -345.9415283203125,
|
|
"loss": 4.0552,
|
|
"margin_dpo/margin_mean": 54.00886535644531,
|
|
"margin_dpo/margin_std": 67.14466094970703,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.6659685863874345,
|
|
"fcm_dpo/beta": 0.010473274625837803,
|
|
"fcm_dpo/delta": -0.02130315639078617,
|
|
"fcm_dpo/margin": 58.93336486816406,
|
|
"fcm_dpo/q_t": 0.36683323979377747,
|
|
"grad_norm": 77.39559173583984,
|
|
"learning_rate": 1.5285113558975427e-07,
|
|
"logits/chosen": -0.883613646030426,
|
|
"logits/rejected": -0.8504911065101624,
|
|
"logps/chosen": -352.9744873046875,
|
|
"logps/ref_chosen": -270.8228759765625,
|
|
"logps/ref_rejected": -255.30972290039062,
|
|
"logps/rejected": -396.39471435546875,
|
|
"loss": 3.9828,
|
|
"margin_dpo/margin_mean": 58.93336486816406,
|
|
"margin_dpo/margin_std": 70.93016052246094,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.6680628272251309,
|
|
"fcm_dpo/beta": 0.010347644798457623,
|
|
"fcm_dpo/delta": 0.004265286028385162,
|
|
"fcm_dpo/margin": 57.502281188964844,
|
|
"fcm_dpo/q_t": 0.3711587190628052,
|
|
"grad_norm": 106.93011474609375,
|
|
"learning_rate": 1.5116687323334464e-07,
|
|
"logits/chosen": -0.8568066358566284,
|
|
"logits/rejected": -0.8343677520751953,
|
|
"logps/chosen": -389.6893310546875,
|
|
"logps/ref_chosen": -301.0028076171875,
|
|
"logps/ref_rejected": -242.39002990722656,
|
|
"logps/rejected": -388.5788269042969,
|
|
"loss": 4.0047,
|
|
"margin_dpo/margin_mean": 57.50227737426758,
|
|
"margin_dpo/margin_std": 70.3616714477539,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.6701570680628273,
|
|
"fcm_dpo/beta": 0.010691437870264053,
|
|
"fcm_dpo/delta": 0.03203843906521797,
|
|
"fcm_dpo/margin": 53.148414611816406,
|
|
"fcm_dpo/q_t": 0.38301903009414673,
|
|
"grad_norm": 128.77078247070312,
|
|
"learning_rate": 1.4948791099758052e-07,
|
|
"logits/chosen": -0.823917806148529,
|
|
"logits/rejected": -0.8286012411117554,
|
|
"logps/chosen": -385.59344482421875,
|
|
"logps/ref_chosen": -303.6225891113281,
|
|
"logps/ref_rejected": -280.85174560546875,
|
|
"logps/rejected": -415.97100830078125,
|
|
"loss": 4.3482,
|
|
"margin_dpo/margin_mean": 53.148414611816406,
|
|
"margin_dpo/margin_std": 85.3245849609375,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6722513089005235,
|
|
"fcm_dpo/beta": 0.011223748326301575,
|
|
"fcm_dpo/delta": 0.034036025404930115,
|
|
"fcm_dpo/margin": 40.71882629394531,
|
|
"fcm_dpo/q_t": 0.4071481227874756,
|
|
"grad_norm": 100.50800323486328,
|
|
"learning_rate": 1.478143389201113e-07,
|
|
"logits/chosen": -0.8584508299827576,
|
|
"logits/rejected": -0.8295794725418091,
|
|
"logps/chosen": -380.6787109375,
|
|
"logps/ref_chosen": -288.98583984375,
|
|
"logps/ref_rejected": -241.1822052001953,
|
|
"logps/rejected": -373.59393310546875,
|
|
"loss": 4.6134,
|
|
"margin_dpo/margin_mean": 40.71883010864258,
|
|
"margin_dpo/margin_std": 77.2356948852539,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.6743455497382199,
|
|
"fcm_dpo/beta": 0.011451047845184803,
|
|
"fcm_dpo/delta": -0.009086892008781433,
|
|
"fcm_dpo/margin": 52.995948791503906,
|
|
"fcm_dpo/q_t": 0.37670472264289856,
|
|
"grad_norm": 84.11673736572266,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": -0.9030950665473938,
|
|
"logits/rejected": -0.8643764853477478,
|
|
"logps/chosen": -400.96453857421875,
|
|
"logps/ref_chosen": -308.54345703125,
|
|
"logps/ref_rejected": -269.7995910644531,
|
|
"logps/rejected": -415.2165832519531,
|
|
"loss": 4.1867,
|
|
"margin_dpo/margin_mean": 52.99595260620117,
|
|
"margin_dpo/margin_std": 78.33987426757812,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.6764397905759162,
|
|
"fcm_dpo/beta": 0.011756744235754013,
|
|
"fcm_dpo/delta": 0.1289975494146347,
|
|
"fcm_dpo/margin": 36.21906661987305,
|
|
"fcm_dpo/q_t": 0.41570037603378296,
|
|
"grad_norm": 103.57855987548828,
|
|
"learning_rate": 1.4448372394055246e-07,
|
|
"logits/chosen": -0.8654804229736328,
|
|
"logits/rejected": -0.8599724173545837,
|
|
"logps/chosen": -372.8677978515625,
|
|
"logps/ref_chosen": -282.49365234375,
|
|
"logps/ref_rejected": -227.7105255126953,
|
|
"logps/rejected": -354.30377197265625,
|
|
"loss": 4.8564,
|
|
"margin_dpo/margin_mean": 36.21906280517578,
|
|
"margin_dpo/margin_std": 81.94871520996094,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.6785340314136126,
|
|
"fcm_dpo/beta": 0.011317353695631027,
|
|
"fcm_dpo/delta": -0.13536657392978668,
|
|
"fcm_dpo/margin": 63.58089065551758,
|
|
"fcm_dpo/q_t": 0.34825509786605835,
|
|
"grad_norm": 99.381103515625,
|
|
"learning_rate": 1.428268596492364e-07,
|
|
"logits/chosen": -0.8112601637840271,
|
|
"logits/rejected": -0.8100103139877319,
|
|
"logps/chosen": -317.1282958984375,
|
|
"logps/ref_chosen": -239.33836364746094,
|
|
"logps/ref_rejected": -230.53775024414062,
|
|
"logps/rejected": -371.9085388183594,
|
|
"loss": 3.756,
|
|
"margin_dpo/margin_mean": 63.58089065551758,
|
|
"margin_dpo/margin_std": 70.64297485351562,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.680628272251309,
|
|
"fcm_dpo/beta": 0.011040986515581608,
|
|
"fcm_dpo/delta": -0.02803659997880459,
|
|
"fcm_dpo/margin": 52.0232048034668,
|
|
"fcm_dpo/q_t": 0.38475099205970764,
|
|
"grad_norm": 113.279296875,
|
|
"learning_rate": 1.4117574272818386e-07,
|
|
"logits/chosen": -0.8139001131057739,
|
|
"logits/rejected": -0.7986257076263428,
|
|
"logps/chosen": -370.62933349609375,
|
|
"logps/ref_chosen": -280.62896728515625,
|
|
"logps/ref_rejected": -270.5085754394531,
|
|
"logps/rejected": -412.5321350097656,
|
|
"loss": 4.3798,
|
|
"margin_dpo/margin_mean": 52.0232048034668,
|
|
"margin_dpo/margin_std": 84.57878112792969,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.6827225130890052,
|
|
"fcm_dpo/beta": 0.011147797107696533,
|
|
"fcm_dpo/delta": 0.03236812353134155,
|
|
"fcm_dpo/margin": 51.00239562988281,
|
|
"fcm_dpo/q_t": 0.38107889890670776,
|
|
"grad_norm": 111.60675048828125,
|
|
"learning_rate": 1.3953046172178413e-07,
|
|
"logits/chosen": -0.9249294996261597,
|
|
"logits/rejected": -0.9140468239784241,
|
|
"logps/chosen": -322.252685546875,
|
|
"logps/ref_chosen": -240.9871368408203,
|
|
"logps/ref_rejected": -261.0238342285156,
|
|
"logps/rejected": -393.291748046875,
|
|
"loss": 4.2602,
|
|
"margin_dpo/margin_mean": 51.00239562988281,
|
|
"margin_dpo/margin_std": 76.8713607788086,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.6848167539267016,
|
|
"fcm_dpo/beta": 0.010913331992924213,
|
|
"fcm_dpo/delta": -0.048158351331949234,
|
|
"fcm_dpo/margin": 58.91179275512695,
|
|
"fcm_dpo/q_t": 0.36473149061203003,
|
|
"grad_norm": 80.92235565185547,
|
|
"learning_rate": 1.3789110486146468e-07,
|
|
"logits/chosen": -0.8732993006706238,
|
|
"logits/rejected": -0.8531113862991333,
|
|
"logps/chosen": -352.11798095703125,
|
|
"logps/ref_chosen": -279.52001953125,
|
|
"logps/ref_rejected": -269.51824951171875,
|
|
"logps/rejected": -401.02801513671875,
|
|
"loss": 3.9857,
|
|
"margin_dpo/margin_mean": 58.91179275512695,
|
|
"margin_dpo/margin_std": 74.66950225830078,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.6869109947643979,
|
|
"fcm_dpo/beta": 0.010607855394482613,
|
|
"fcm_dpo/delta": 0.03731272369623184,
|
|
"fcm_dpo/margin": 53.156368255615234,
|
|
"fcm_dpo/q_t": 0.38036584854125977,
|
|
"grad_norm": 105.32549285888672,
|
|
"learning_rate": 1.362577600609588e-07,
|
|
"logits/chosen": -0.8312807083129883,
|
|
"logits/rejected": -0.8335475325584412,
|
|
"logps/chosen": -384.21630859375,
|
|
"logps/ref_chosen": -301.033447265625,
|
|
"logps/ref_rejected": -284.2101135253906,
|
|
"logps/rejected": -420.5493469238281,
|
|
"loss": 4.1238,
|
|
"margin_dpo/margin_mean": 53.156368255615234,
|
|
"margin_dpo/margin_std": 68.87464141845703,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.6890052356020943,
|
|
"fcm_dpo/beta": 0.011003939434885979,
|
|
"fcm_dpo/delta": -0.0023946845903992653,
|
|
"fcm_dpo/margin": 54.590736389160156,
|
|
"fcm_dpo/q_t": 0.3825053870677948,
|
|
"grad_norm": 104.08448791503906,
|
|
"learning_rate": 1.3463051491159093e-07,
|
|
"logits/chosen": -0.8463307619094849,
|
|
"logits/rejected": -0.8228050470352173,
|
|
"logps/chosen": -409.5216369628906,
|
|
"logps/ref_chosen": -319.9888610839844,
|
|
"logps/ref_rejected": -307.5588684082031,
|
|
"logps/rejected": -451.6824035644531,
|
|
"loss": 4.2917,
|
|
"margin_dpo/margin_mean": 54.590736389160156,
|
|
"margin_dpo/margin_std": 86.76227569580078,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.6910994764397905,
|
|
"fcm_dpo/beta": 0.011315654963254929,
|
|
"fcm_dpo/delta": 0.03985806554555893,
|
|
"fcm_dpo/margin": 49.51683807373047,
|
|
"fcm_dpo/q_t": 0.3813677728176117,
|
|
"grad_norm": 110.84941864013672,
|
|
"learning_rate": 1.3300945667758012e-07,
|
|
"logits/chosen": -0.8280748128890991,
|
|
"logits/rejected": -0.8414457440376282,
|
|
"logps/chosen": -388.23834228515625,
|
|
"logps/ref_chosen": -301.11474609375,
|
|
"logps/ref_rejected": -299.673095703125,
|
|
"logps/rejected": -436.3134765625,
|
|
"loss": 4.1843,
|
|
"margin_dpo/margin_mean": 49.51683807373047,
|
|
"margin_dpo/margin_std": 68.95203399658203,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.6931937172774869,
|
|
"fcm_dpo/beta": 0.011244787834584713,
|
|
"fcm_dpo/delta": 0.009060085751116276,
|
|
"fcm_dpo/margin": 52.50252151489258,
|
|
"fcm_dpo/q_t": 0.38197970390319824,
|
|
"grad_norm": 180.15573120117188,
|
|
"learning_rate": 1.3139467229135998e-07,
|
|
"logits/chosen": -0.8683615922927856,
|
|
"logits/rejected": -0.8551488518714905,
|
|
"logps/chosen": -356.96160888671875,
|
|
"logps/ref_chosen": -277.59149169921875,
|
|
"logps/ref_rejected": -256.025634765625,
|
|
"logps/rejected": -387.8982849121094,
|
|
"loss": 4.3397,
|
|
"margin_dpo/margin_mean": 52.50252151489258,
|
|
"margin_dpo/margin_std": 86.5692138671875,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.6952879581151833,
|
|
"fcm_dpo/beta": 0.011165878735482693,
|
|
"fcm_dpo/delta": -0.0032293088734149933,
|
|
"fcm_dpo/margin": 53.855308532714844,
|
|
"fcm_dpo/q_t": 0.37883564829826355,
|
|
"grad_norm": 115.64824676513672,
|
|
"learning_rate": 1.2978624834891626e-07,
|
|
"logits/chosen": -0.865576982498169,
|
|
"logits/rejected": -0.8442394137382507,
|
|
"logps/chosen": -352.7916259765625,
|
|
"logps/ref_chosen": -269.97369384765625,
|
|
"logps/ref_rejected": -235.03164672851562,
|
|
"logps/rejected": -371.70489501953125,
|
|
"loss": 4.2568,
|
|
"margin_dpo/margin_mean": 53.855308532714844,
|
|
"margin_dpo/margin_std": 82.26235961914062,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.6973821989528796,
|
|
"fcm_dpo/beta": 0.011527864262461662,
|
|
"fcm_dpo/delta": 0.004975374788045883,
|
|
"fcm_dpo/margin": 47.826690673828125,
|
|
"fcm_dpo/q_t": 0.38533908128738403,
|
|
"grad_norm": 103.64472198486328,
|
|
"learning_rate": 1.281842711051438e-07,
|
|
"logits/chosen": -0.9276981949806213,
|
|
"logits/rejected": -0.8942596316337585,
|
|
"logps/chosen": -381.27313232421875,
|
|
"logps/ref_chosen": -296.76300048828125,
|
|
"logps/ref_rejected": -265.97991943359375,
|
|
"logps/rejected": -398.31671142578125,
|
|
"loss": 4.226,
|
|
"margin_dpo/margin_mean": 47.826690673828125,
|
|
"margin_dpo/margin_std": 69.83184051513672,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.6994764397905759,
|
|
"fcm_dpo/beta": 0.011430593207478523,
|
|
"fcm_dpo/delta": -0.059879280626773834,
|
|
"fcm_dpo/margin": 57.263526916503906,
|
|
"fcm_dpo/q_t": 0.3644816279411316,
|
|
"grad_norm": 102.11288452148438,
|
|
"learning_rate": 1.2658882646922033e-07,
|
|
"logits/chosen": -0.839641809463501,
|
|
"logits/rejected": -0.8142789602279663,
|
|
"logps/chosen": -379.44012451171875,
|
|
"logps/ref_chosen": -301.0367431640625,
|
|
"logps/ref_rejected": -268.87652587890625,
|
|
"logps/rejected": -404.54339599609375,
|
|
"loss": 4.0678,
|
|
"margin_dpo/margin_mean": 57.263526916503906,
|
|
"margin_dpo/margin_std": 76.02151489257812,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.7015706806282722,
|
|
"fcm_dpo/beta": 0.010619346983730793,
|
|
"fcm_dpo/delta": -7.020309567451477e-05,
|
|
"fcm_dpo/margin": 56.28835678100586,
|
|
"fcm_dpo/q_t": 0.377109557390213,
|
|
"grad_norm": 112.79352569580078,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": -0.8311583399772644,
|
|
"logits/rejected": -0.827835738658905,
|
|
"logps/chosen": -365.9309997558594,
|
|
"logps/ref_chosen": -276.13275146484375,
|
|
"logps/ref_rejected": -243.44203186035156,
|
|
"logps/rejected": -389.5285949707031,
|
|
"loss": 4.245,
|
|
"margin_dpo/margin_mean": 56.28835678100586,
|
|
"margin_dpo/margin_std": 83.26809692382812,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.7036649214659686,
|
|
"fcm_dpo/beta": 0.010861254297196865,
|
|
"fcm_dpo/delta": -0.004219849593937397,
|
|
"fcm_dpo/margin": 50.423587799072266,
|
|
"fcm_dpo/q_t": 0.3916303515434265,
|
|
"grad_norm": 112.85342407226562,
|
|
"learning_rate": 1.2341787690142435e-07,
|
|
"logits/chosen": -0.839414119720459,
|
|
"logits/rejected": -0.7747617959976196,
|
|
"logps/chosen": -337.7890319824219,
|
|
"logps/ref_chosen": -246.2626495361328,
|
|
"logps/ref_rejected": -261.0617980957031,
|
|
"logps/rejected": -403.0118408203125,
|
|
"loss": 4.3563,
|
|
"margin_dpo/margin_mean": 50.423587799072266,
|
|
"margin_dpo/margin_std": 81.8228988647461,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.7057591623036649,
|
|
"fcm_dpo/beta": 0.010828062891960144,
|
|
"fcm_dpo/delta": -0.06099002808332443,
|
|
"fcm_dpo/margin": 60.63716125488281,
|
|
"fcm_dpo/q_t": 0.3647434711456299,
|
|
"grad_norm": 89.56388854980469,
|
|
"learning_rate": 1.2184254201795363e-07,
|
|
"logits/chosen": -0.8643673062324524,
|
|
"logits/rejected": -0.8361295461654663,
|
|
"logps/chosen": -350.9415283203125,
|
|
"logps/ref_chosen": -266.9937744140625,
|
|
"logps/ref_rejected": -253.015625,
|
|
"logps/rejected": -397.60052490234375,
|
|
"loss": 3.9831,
|
|
"margin_dpo/margin_mean": 60.63715362548828,
|
|
"margin_dpo/margin_std": 78.05126953125,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.7078534031413612,
|
|
"fcm_dpo/beta": 0.010671587660908699,
|
|
"fcm_dpo/delta": 0.035113610327243805,
|
|
"fcm_dpo/margin": 52.91345977783203,
|
|
"fcm_dpo/q_t": 0.38380661606788635,
|
|
"grad_norm": 123.61051177978516,
|
|
"learning_rate": 1.202740798300168e-07,
|
|
"logits/chosen": -0.8847794532775879,
|
|
"logits/rejected": -0.867152214050293,
|
|
"logps/chosen": -357.63946533203125,
|
|
"logps/ref_chosen": -276.5925598144531,
|
|
"logps/ref_rejected": -233.979248046875,
|
|
"logps/rejected": -367.9396057128906,
|
|
"loss": 4.264,
|
|
"margin_dpo/margin_mean": 52.9134521484375,
|
|
"margin_dpo/margin_std": 80.15204620361328,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.7099476439790576,
|
|
"fcm_dpo/beta": 0.010630465112626553,
|
|
"fcm_dpo/delta": -0.030053602531552315,
|
|
"fcm_dpo/margin": 58.987159729003906,
|
|
"fcm_dpo/q_t": 0.36951741576194763,
|
|
"grad_norm": 107.49327087402344,
|
|
"learning_rate": 1.1871257444948096e-07,
|
|
"logits/chosen": -0.8885621428489685,
|
|
"logits/rejected": -0.8790793418884277,
|
|
"logps/chosen": -392.2843933105469,
|
|
"logps/ref_chosen": -303.5277404785156,
|
|
"logps/ref_rejected": -283.11676025390625,
|
|
"logps/rejected": -430.860595703125,
|
|
"loss": 4.1489,
|
|
"margin_dpo/margin_mean": 58.987159729003906,
|
|
"margin_dpo/margin_std": 83.707763671875,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.7120418848167539,
|
|
"fcm_dpo/beta": 0.010451890528202057,
|
|
"fcm_dpo/delta": -0.00904519110918045,
|
|
"fcm_dpo/margin": 53.435546875,
|
|
"fcm_dpo/q_t": 0.3880341053009033,
|
|
"grad_norm": 126.61689758300781,
|
|
"learning_rate": 1.1715810961514072e-07,
|
|
"logits/chosen": -0.8415927886962891,
|
|
"logits/rejected": -0.8401827812194824,
|
|
"logps/chosen": -354.4100341796875,
|
|
"logps/ref_chosen": -261.5257568359375,
|
|
"logps/ref_rejected": -259.39862060546875,
|
|
"logps/rejected": -405.7184143066406,
|
|
"loss": 4.4717,
|
|
"margin_dpo/margin_mean": 53.435546875,
|
|
"margin_dpo/margin_std": 92.72853088378906,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7141361256544503,
|
|
"fcm_dpo/beta": 0.010697471909224987,
|
|
"fcm_dpo/delta": 0.08240307867527008,
|
|
"fcm_dpo/margin": 40.59882354736328,
|
|
"fcm_dpo/q_t": 0.4121650159358978,
|
|
"grad_norm": 146.87213134765625,
|
|
"learning_rate": 1.1561076868822755e-07,
|
|
"logits/chosen": -0.86383056640625,
|
|
"logits/rejected": -0.8326124548912048,
|
|
"logps/chosen": -426.71337890625,
|
|
"logps/ref_chosen": -315.903564453125,
|
|
"logps/ref_rejected": -308.02392578125,
|
|
"logps/rejected": -459.4324951171875,
|
|
"loss": 4.9048,
|
|
"margin_dpo/margin_mean": 40.59882354736328,
|
|
"margin_dpo/margin_std": 90.46697235107422,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.7162303664921466,
|
|
"fcm_dpo/beta": 0.011284704320132732,
|
|
"fcm_dpo/delta": -0.020554201677441597,
|
|
"fcm_dpo/margin": 54.822906494140625,
|
|
"fcm_dpo/q_t": 0.3675943613052368,
|
|
"grad_norm": 101.63287353515625,
|
|
"learning_rate": 1.1407063464793965e-07,
|
|
"logits/chosen": -0.8525506258010864,
|
|
"logits/rejected": -0.8500516414642334,
|
|
"logps/chosen": -356.83026123046875,
|
|
"logps/ref_chosen": -269.17864990234375,
|
|
"logps/ref_rejected": -260.8977355957031,
|
|
"logps/rejected": -403.3722229003906,
|
|
"loss": 4.0639,
|
|
"margin_dpo/margin_mean": 54.822906494140625,
|
|
"margin_dpo/margin_std": 71.24502563476562,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.7183246073298429,
|
|
"fcm_dpo/beta": 0.01099632028490305,
|
|
"fcm_dpo/delta": 0.047995634377002716,
|
|
"fcm_dpo/margin": 50.445194244384766,
|
|
"fcm_dpo/q_t": 0.3869495093822479,
|
|
"grad_norm": 110.69547271728516,
|
|
"learning_rate": 1.125377900869913e-07,
|
|
"logits/chosen": -0.8448514938354492,
|
|
"logits/rejected": -0.8279154896736145,
|
|
"logps/chosen": -402.746826171875,
|
|
"logps/ref_chosen": -310.719970703125,
|
|
"logps/ref_rejected": -263.5224914550781,
|
|
"logps/rejected": -405.9945373535156,
|
|
"loss": 4.3061,
|
|
"margin_dpo/margin_mean": 50.445194244384766,
|
|
"margin_dpo/margin_std": 79.09446716308594,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.7204188481675393,
|
|
"fcm_dpo/beta": 0.011513441801071167,
|
|
"fcm_dpo/delta": -0.023599928244948387,
|
|
"fcm_dpo/margin": 53.812904357910156,
|
|
"fcm_dpo/q_t": 0.3725927770137787,
|
|
"grad_norm": 115.21056365966797,
|
|
"learning_rate": 1.110123172071844e-07,
|
|
"logits/chosen": -0.8441615104675293,
|
|
"logits/rejected": -0.8278071880340576,
|
|
"logps/chosen": -395.754150390625,
|
|
"logps/ref_chosen": -301.7999267578125,
|
|
"logps/ref_rejected": -257.9061584472656,
|
|
"logps/rejected": -405.67327880859375,
|
|
"loss": 4.225,
|
|
"margin_dpo/margin_mean": 53.812904357910156,
|
|
"margin_dpo/margin_std": 79.05390930175781,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.7225130890052356,
|
|
"fcm_dpo/beta": 0.011425694450736046,
|
|
"fcm_dpo/delta": 0.05544426292181015,
|
|
"fcm_dpo/margin": 47.61176300048828,
|
|
"fcm_dpo/q_t": 0.387326180934906,
|
|
"grad_norm": 137.6807098388672,
|
|
"learning_rate": 1.09494297815e-07,
|
|
"logits/chosen": -0.842475175857544,
|
|
"logits/rejected": -0.842012345790863,
|
|
"logps/chosen": -375.22137451171875,
|
|
"logps/ref_chosen": -283.0184326171875,
|
|
"logps/ref_rejected": -266.8457336425781,
|
|
"logps/rejected": -406.660400390625,
|
|
"loss": 4.2707,
|
|
"margin_dpo/margin_mean": 47.61176300048828,
|
|
"margin_dpo/margin_std": 67.73701477050781,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.724607329842932,
|
|
"fcm_dpo/beta": 0.011425861157476902,
|
|
"fcm_dpo/delta": -0.07231096923351288,
|
|
"fcm_dpo/margin": 58.19831848144531,
|
|
"fcm_dpo/q_t": 0.36130863428115845,
|
|
"grad_norm": 90.98426055908203,
|
|
"learning_rate": 1.0798381331721107e-07,
|
|
"logits/chosen": -0.9370063543319702,
|
|
"logits/rejected": -0.8892075419425964,
|
|
"logps/chosen": -366.372802734375,
|
|
"logps/ref_chosen": -268.44122314453125,
|
|
"logps/ref_rejected": -227.8225860595703,
|
|
"logps/rejected": -383.952392578125,
|
|
"loss": 4.1066,
|
|
"margin_dpo/margin_mean": 58.19831848144531,
|
|
"margin_dpo/margin_std": 78.29531860351562,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.7267015706806282,
|
|
"fcm_dpo/beta": 0.010861432179808617,
|
|
"fcm_dpo/delta": -0.010385667905211449,
|
|
"fcm_dpo/margin": 51.505802154541016,
|
|
"fcm_dpo/q_t": 0.380726158618927,
|
|
"grad_norm": 98.91201782226562,
|
|
"learning_rate": 1.0648094471651722e-07,
|
|
"logits/chosen": -0.7857590913772583,
|
|
"logits/rejected": -0.8135133981704712,
|
|
"logps/chosen": -364.33441162109375,
|
|
"logps/ref_chosen": -273.70355224609375,
|
|
"logps/ref_rejected": -243.65521240234375,
|
|
"logps/rejected": -385.7918395996094,
|
|
"loss": 4.2412,
|
|
"margin_dpo/margin_mean": 51.505802154541016,
|
|
"margin_dpo/margin_std": 73.8308334350586,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.7287958115183246,
|
|
"fcm_dpo/beta": 0.011517523787915707,
|
|
"fcm_dpo/delta": 0.09734243154525757,
|
|
"fcm_dpo/margin": 43.836822509765625,
|
|
"fcm_dpo/q_t": 0.40036576986312866,
|
|
"grad_norm": 90.68925476074219,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": -0.882627010345459,
|
|
"logits/rejected": -0.8689060807228088,
|
|
"logps/chosen": -374.64874267578125,
|
|
"logps/ref_chosen": -285.64141845703125,
|
|
"logps/ref_rejected": -265.6270446777344,
|
|
"logps/rejected": -398.47119140625,
|
|
"loss": 4.4704,
|
|
"margin_dpo/margin_mean": 43.836822509765625,
|
|
"margin_dpo/margin_std": 75.92671966552734,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.7308900523560209,
|
|
"fcm_dpo/beta": 0.011294200085103512,
|
|
"fcm_dpo/delta": -0.10136254876852036,
|
|
"fcm_dpo/margin": 61.32181930541992,
|
|
"fcm_dpo/q_t": 0.36070722341537476,
|
|
"grad_norm": 167.52288818359375,
|
|
"learning_rate": 1.0349837717080347e-07,
|
|
"logits/chosen": -0.8177285194396973,
|
|
"logits/rejected": -0.8121789693832397,
|
|
"logps/chosen": -418.723876953125,
|
|
"logps/ref_chosen": -328.3175048828125,
|
|
"logps/ref_rejected": -292.37872314453125,
|
|
"logps/rejected": -444.10687255859375,
|
|
"loss": 4.0752,
|
|
"margin_dpo/margin_mean": 61.32181930541992,
|
|
"margin_dpo/margin_std": 85.29824829101562,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"fcm_dpo/beta": 0.011082770302891731,
|
|
"fcm_dpo/delta": 0.0001247054897248745,
|
|
"fcm_dpo/margin": 49.612892150878906,
|
|
"fcm_dpo/q_t": 0.38704627752304077,
|
|
"grad_norm": 104.57560729980469,
|
|
"learning_rate": 1.0201883817182949e-07,
|
|
"logits/chosen": -0.8255881071090698,
|
|
"logits/rejected": -0.8411324620246887,
|
|
"logps/chosen": -392.31982421875,
|
|
"logps/ref_chosen": -292.8046569824219,
|
|
"logps/ref_rejected": -250.35504150390625,
|
|
"logps/rejected": -399.4831237792969,
|
|
"loss": 4.4145,
|
|
"margin_dpo/margin_mean": 49.61289978027344,
|
|
"margin_dpo/margin_std": 82.11981201171875,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7350785340314137,
|
|
"fcm_dpo/beta": 0.011276098899543285,
|
|
"fcm_dpo/delta": 0.053270816802978516,
|
|
"fcm_dpo/margin": 36.07026290893555,
|
|
"fcm_dpo/q_t": 0.4199068546295166,
|
|
"grad_norm": 156.5530242919922,
|
|
"learning_rate": 1.0054723495346482e-07,
|
|
"logits/chosen": -0.8887529373168945,
|
|
"logits/rejected": -0.8778947591781616,
|
|
"logps/chosen": -404.2864074707031,
|
|
"logps/ref_chosen": -311.8890380859375,
|
|
"logps/ref_rejected": -263.59033203125,
|
|
"logps/rejected": -392.0579833984375,
|
|
"loss": 4.9768,
|
|
"margin_dpo/margin_mean": 36.07026290893555,
|
|
"margin_dpo/margin_std": 87.96586608886719,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.7371727748691099,
|
|
"fcm_dpo/beta": 0.010786263272166252,
|
|
"fcm_dpo/delta": -0.10786393284797668,
|
|
"fcm_dpo/margin": 64.68121337890625,
|
|
"fcm_dpo/q_t": 0.3563269376754761,
|
|
"grad_norm": 107.21913146972656,
|
|
"learning_rate": 9.908364643332398e-08,
|
|
"logits/chosen": -0.8178911805152893,
|
|
"logits/rejected": -0.7894106507301331,
|
|
"logps/chosen": -341.0050354003906,
|
|
"logps/ref_chosen": -254.9078826904297,
|
|
"logps/ref_rejected": -257.1688232421875,
|
|
"logps/rejected": -407.94720458984375,
|
|
"loss": 3.9965,
|
|
"margin_dpo/margin_mean": 64.68122100830078,
|
|
"margin_dpo/margin_std": 83.1053466796875,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.7392670157068063,
|
|
"fcm_dpo/beta": 0.01031852513551712,
|
|
"fcm_dpo/delta": 0.012895338237285614,
|
|
"fcm_dpo/margin": 50.85631561279297,
|
|
"fcm_dpo/q_t": 0.3920055627822876,
|
|
"grad_norm": 121.84527587890625,
|
|
"learning_rate": 9.76281510992176e-08,
|
|
"logits/chosen": -0.836536169052124,
|
|
"logits/rejected": -0.8306083679199219,
|
|
"logps/chosen": -365.62139892578125,
|
|
"logps/ref_chosen": -270.3760681152344,
|
|
"logps/ref_rejected": -264.65234375,
|
|
"logps/rejected": -410.7539367675781,
|
|
"loss": 4.4158,
|
|
"margin_dpo/margin_mean": 50.85631561279297,
|
|
"margin_dpo/margin_std": 82.86323547363281,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.7413612565445026,
|
|
"fcm_dpo/beta": 0.01093815453350544,
|
|
"fcm_dpo/delta": 0.10080662369728088,
|
|
"fcm_dpo/margin": 37.49406433105469,
|
|
"fcm_dpo/q_t": 0.4171503186225891,
|
|
"grad_norm": 136.33518981933594,
|
|
"learning_rate": 9.618082700494318e-08,
|
|
"logits/chosen": -0.8385964632034302,
|
|
"logits/rejected": -0.8738152384757996,
|
|
"logps/chosen": -354.9613037109375,
|
|
"logps/ref_chosen": -257.6485595703125,
|
|
"logps/ref_rejected": -246.94203186035156,
|
|
"logps/rejected": -381.74884033203125,
|
|
"loss": 4.8602,
|
|
"margin_dpo/margin_mean": 37.49407196044922,
|
|
"margin_dpo/margin_std": 83.91649627685547,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.743455497382199,
|
|
"fcm_dpo/beta": 0.010349645279347897,
|
|
"fcm_dpo/delta": -0.12334014475345612,
|
|
"fcm_dpo/margin": 62.511470794677734,
|
|
"fcm_dpo/q_t": 0.36480429768562317,
|
|
"grad_norm": 95.06636047363281,
|
|
"learning_rate": 9.474175176609956e-08,
|
|
"logits/chosen": -0.8743699193000793,
|
|
"logits/rejected": -0.875370979309082,
|
|
"logps/chosen": -384.0447692871094,
|
|
"logps/ref_chosen": -293.35333251953125,
|
|
"logps/ref_rejected": -275.6051940917969,
|
|
"logps/rejected": -428.80804443359375,
|
|
"loss": 4.1536,
|
|
"margin_dpo/margin_mean": 62.511470794677734,
|
|
"margin_dpo/margin_std": 87.62345886230469,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.7455497382198953,
|
|
"fcm_dpo/beta": 0.01060514897108078,
|
|
"fcm_dpo/delta": 0.06647256016731262,
|
|
"fcm_dpo/margin": 40.10423278808594,
|
|
"fcm_dpo/q_t": 0.4083283841609955,
|
|
"grad_norm": 88.60588836669922,
|
|
"learning_rate": 9.331100255592436e-08,
|
|
"logits/chosen": -0.796362042427063,
|
|
"logits/rejected": -0.8256345391273499,
|
|
"logps/chosen": -293.0960388183594,
|
|
"logps/ref_chosen": -204.25550842285156,
|
|
"logps/ref_rejected": -213.467529296875,
|
|
"logps/rejected": -342.41229248046875,
|
|
"loss": 4.548,
|
|
"margin_dpo/margin_mean": 40.10423278808594,
|
|
"margin_dpo/margin_std": 67.21572875976562,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.7476439790575916,
|
|
"fcm_dpo/beta": 0.010486958548426628,
|
|
"fcm_dpo/delta": -0.07790210843086243,
|
|
"fcm_dpo/margin": 58.63288879394531,
|
|
"fcm_dpo/q_t": 0.37552568316459656,
|
|
"grad_norm": 98.0359115600586,
|
|
"learning_rate": 9.18886561011557e-08,
|
|
"logits/chosen": -0.7627823352813721,
|
|
"logits/rejected": -0.7633357048034668,
|
|
"logps/chosen": -362.4690246582031,
|
|
"logps/ref_chosen": -266.3705749511719,
|
|
"logps/ref_rejected": -239.04490661621094,
|
|
"logps/rejected": -393.7762451171875,
|
|
"loss": 4.2021,
|
|
"margin_dpo/margin_mean": 58.63289260864258,
|
|
"margin_dpo/margin_std": 85.52519226074219,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.749738219895288,
|
|
"fcm_dpo/beta": 0.009996353648602962,
|
|
"fcm_dpo/delta": -0.06562351435422897,
|
|
"fcm_dpo/margin": 66.09445190429688,
|
|
"fcm_dpo/q_t": 0.3610166311264038,
|
|
"grad_norm": 88.50709533691406,
|
|
"learning_rate": 9.047478867791731e-08,
|
|
"logits/chosen": -0.8669772148132324,
|
|
"logits/rejected": -0.8496595621109009,
|
|
"logps/chosen": -382.9401550292969,
|
|
"logps/ref_chosen": -299.1474609375,
|
|
"logps/ref_rejected": -257.2531433105469,
|
|
"logps/rejected": -407.1402587890625,
|
|
"loss": 4.0085,
|
|
"margin_dpo/margin_mean": 66.09444427490234,
|
|
"margin_dpo/margin_std": 85.0299072265625,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.7518324607329843,
|
|
"fcm_dpo/beta": 0.010169594548642635,
|
|
"fcm_dpo/delta": 0.03530079498887062,
|
|
"fcm_dpo/margin": 55.34699249267578,
|
|
"fcm_dpo/q_t": 0.3793519139289856,
|
|
"grad_norm": 106.86293029785156,
|
|
"learning_rate": 8.906947610762825e-08,
|
|
"logits/chosen": -0.8287184238433838,
|
|
"logits/rejected": -0.8446385860443115,
|
|
"logps/chosen": -390.8289794921875,
|
|
"logps/ref_chosen": -302.99786376953125,
|
|
"logps/ref_rejected": -260.4137268066406,
|
|
"logps/rejected": -403.5918273925781,
|
|
"loss": 4.1275,
|
|
"margin_dpo/margin_mean": 55.34699249267578,
|
|
"margin_dpo/margin_std": 72.260009765625,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.7539267015706806,
|
|
"fcm_dpo/beta": 0.010196023620665073,
|
|
"fcm_dpo/delta": 0.05344226956367493,
|
|
"fcm_dpo/margin": 48.37961196899414,
|
|
"fcm_dpo/q_t": 0.39275315403938293,
|
|
"grad_norm": 115.53006744384766,
|
|
"learning_rate": 8.76727937529367e-08,
|
|
"logits/chosen": -0.8422183394432068,
|
|
"logits/rejected": -0.8362429141998291,
|
|
"logps/chosen": -404.19610595703125,
|
|
"logps/ref_chosen": -309.6114501953125,
|
|
"logps/ref_rejected": -256.64031982421875,
|
|
"logps/rejected": -399.6045837402344,
|
|
"loss": 4.4924,
|
|
"margin_dpo/margin_mean": 48.37961196899414,
|
|
"margin_dpo/margin_std": 80.97713470458984,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.7560209424083769,
|
|
"fcm_dpo/beta": 0.010226656682789326,
|
|
"fcm_dpo/delta": -0.05834663659334183,
|
|
"fcm_dpo/margin": 64.01289367675781,
|
|
"fcm_dpo/q_t": 0.36634212732315063,
|
|
"grad_norm": 100.24212646484375,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": -0.798484742641449,
|
|
"logits/rejected": -0.7778838276863098,
|
|
"logps/chosen": -340.5452575683594,
|
|
"logps/ref_chosen": -263.3797607421875,
|
|
"logps/ref_rejected": -271.18157958984375,
|
|
"logps/rejected": -412.3599548339844,
|
|
"loss": 4.1178,
|
|
"margin_dpo/margin_mean": 64.01289367675781,
|
|
"margin_dpo/margin_std": 91.16770935058594,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.7581151832460733,
|
|
"fcm_dpo/beta": 0.009908447042107582,
|
|
"fcm_dpo/delta": 0.03887121379375458,
|
|
"fcm_dpo/margin": 56.83973693847656,
|
|
"fcm_dpo/q_t": 0.3782539367675781,
|
|
"grad_norm": 90.40959930419922,
|
|
"learning_rate": 8.490561882286135e-08,
|
|
"logits/chosen": -0.8111223578453064,
|
|
"logits/rejected": -0.8046758770942688,
|
|
"logps/chosen": -389.388916015625,
|
|
"logps/ref_chosen": -303.2583923339844,
|
|
"logps/ref_rejected": -243.22891235351562,
|
|
"logps/rejected": -386.1991271972656,
|
|
"loss": 4.0886,
|
|
"margin_dpo/margin_mean": 56.83973693847656,
|
|
"margin_dpo/margin_std": 72.43896484375,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.7602094240837697,
|
|
"fcm_dpo/beta": 0.010420668870210648,
|
|
"fcm_dpo/delta": 0.029644204303622246,
|
|
"fcm_dpo/margin": 54.629852294921875,
|
|
"fcm_dpo/q_t": 0.38353899121284485,
|
|
"grad_norm": 97.74794006347656,
|
|
"learning_rate": 8.353527464267104e-08,
|
|
"logits/chosen": -0.8362611532211304,
|
|
"logits/rejected": -0.7902975678443909,
|
|
"logps/chosen": -395.0887451171875,
|
|
"logps/ref_chosen": -303.34722900390625,
|
|
"logps/ref_rejected": -262.05419921875,
|
|
"logps/rejected": -408.4255676269531,
|
|
"loss": 4.3097,
|
|
"margin_dpo/margin_mean": 54.62985610961914,
|
|
"margin_dpo/margin_std": 84.8177261352539,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.762303664921466,
|
|
"fcm_dpo/beta": 0.010761250741779804,
|
|
"fcm_dpo/delta": 0.1065862700343132,
|
|
"fcm_dpo/margin": 46.43891143798828,
|
|
"fcm_dpo/q_t": 0.3989133834838867,
|
|
"grad_norm": 97.99282836914062,
|
|
"learning_rate": 8.217385746050742e-08,
|
|
"logits/chosen": -0.806189239025116,
|
|
"logits/rejected": -0.8205310702323914,
|
|
"logps/chosen": -395.6390075683594,
|
|
"logps/ref_chosen": -285.54376220703125,
|
|
"logps/ref_rejected": -284.84619140625,
|
|
"logps/rejected": -441.38031005859375,
|
|
"loss": 4.6575,
|
|
"margin_dpo/margin_mean": 46.43891143798828,
|
|
"margin_dpo/margin_std": 89.2318115234375,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.7643979057591623,
|
|
"fcm_dpo/beta": 0.011114663444459438,
|
|
"fcm_dpo/delta": -0.062224358320236206,
|
|
"fcm_dpo/margin": 54.65919876098633,
|
|
"fcm_dpo/q_t": 0.37894606590270996,
|
|
"grad_norm": 99.08690643310547,
|
|
"learning_rate": 8.082144028504231e-08,
|
|
"logits/chosen": -0.8273904323577881,
|
|
"logits/rejected": -0.8326528668403625,
|
|
"logps/chosen": -370.6837158203125,
|
|
"logps/ref_chosen": -274.7878112792969,
|
|
"logps/ref_rejected": -256.5738220214844,
|
|
"logps/rejected": -407.1288757324219,
|
|
"loss": 4.2368,
|
|
"margin_dpo/margin_mean": 54.6591911315918,
|
|
"margin_dpo/margin_std": 82.35843658447266,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.7664921465968586,
|
|
"fcm_dpo/beta": 0.010609567165374756,
|
|
"fcm_dpo/delta": -0.053685709834098816,
|
|
"fcm_dpo/margin": 61.24087905883789,
|
|
"fcm_dpo/q_t": 0.3654269874095917,
|
|
"grad_norm": 92.26556396484375,
|
|
"learning_rate": 7.947809564230445e-08,
|
|
"logits/chosen": -0.7945237159729004,
|
|
"logits/rejected": -0.8086446523666382,
|
|
"logps/chosen": -376.56878662109375,
|
|
"logps/ref_chosen": -286.6496276855469,
|
|
"logps/ref_rejected": -251.97140502929688,
|
|
"logps/rejected": -403.1314697265625,
|
|
"loss": 4.0641,
|
|
"margin_dpo/margin_mean": 61.24087905883789,
|
|
"margin_dpo/margin_std": 84.4163818359375,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.768586387434555,
|
|
"fcm_dpo/beta": 0.009982116520404816,
|
|
"fcm_dpo/delta": -0.006225086748600006,
|
|
"fcm_dpo/margin": 60.51777648925781,
|
|
"fcm_dpo/q_t": 0.3717145025730133,
|
|
"grad_norm": 107.34187316894531,
|
|
"learning_rate": 7.814389557179016e-08,
|
|
"logits/chosen": -0.7962571382522583,
|
|
"logits/rejected": -0.7791531085968018,
|
|
"logps/chosen": -392.7152099609375,
|
|
"logps/ref_chosen": -301.9449768066406,
|
|
"logps/ref_rejected": -265.5677185058594,
|
|
"logps/rejected": -416.85565185546875,
|
|
"loss": 4.0549,
|
|
"margin_dpo/margin_mean": 60.51777648925781,
|
|
"margin_dpo/margin_std": 78.20709228515625,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.7706806282722513,
|
|
"fcm_dpo/beta": 0.009898173622786999,
|
|
"fcm_dpo/delta": -0.10373516380786896,
|
|
"fcm_dpo/margin": 70.26689147949219,
|
|
"fcm_dpo/q_t": 0.3502688705921173,
|
|
"grad_norm": 72.27176666259766,
|
|
"learning_rate": 7.681891162260015e-08,
|
|
"logits/chosen": -0.7817418575286865,
|
|
"logits/rejected": -0.7951399683952332,
|
|
"logps/chosen": -379.85211181640625,
|
|
"logps/ref_chosen": -294.62652587890625,
|
|
"logps/ref_rejected": -258.7628479003906,
|
|
"logps/rejected": -414.2553405761719,
|
|
"loss": 3.7321,
|
|
"margin_dpo/margin_mean": 70.26689910888672,
|
|
"margin_dpo/margin_std": 73.98336791992188,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.7727748691099476,
|
|
"fcm_dpo/beta": 0.009732791222631931,
|
|
"fcm_dpo/delta": 0.07320413738489151,
|
|
"fcm_dpo/margin": 54.49217987060547,
|
|
"fcm_dpo/q_t": 0.38609111309051514,
|
|
"grad_norm": 93.12295532226562,
|
|
"learning_rate": 7.550321484960251e-08,
|
|
"logits/chosen": -0.8595657348632812,
|
|
"logits/rejected": -0.8424580097198486,
|
|
"logps/chosen": -375.50518798828125,
|
|
"logps/ref_chosen": -282.5057373046875,
|
|
"logps/ref_rejected": -266.41607666015625,
|
|
"logps/rejected": -413.9076843261719,
|
|
"loss": 4.2228,
|
|
"margin_dpo/margin_mean": 54.49217987060547,
|
|
"margin_dpo/margin_std": 75.60814666748047,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.774869109947644,
|
|
"fcm_dpo/beta": 0.009743990376591682,
|
|
"fcm_dpo/delta": -0.03128061443567276,
|
|
"fcm_dpo/margin": 64.41328430175781,
|
|
"fcm_dpo/q_t": 0.36713510751724243,
|
|
"grad_norm": 76.50689697265625,
|
|
"learning_rate": 7.419687580962222e-08,
|
|
"logits/chosen": -0.8467559218406677,
|
|
"logits/rejected": -0.8696060180664062,
|
|
"logps/chosen": -336.12493896484375,
|
|
"logps/ref_chosen": -251.00640869140625,
|
|
"logps/ref_rejected": -238.12542724609375,
|
|
"logps/rejected": -387.6571960449219,
|
|
"loss": 4.0569,
|
|
"margin_dpo/margin_mean": 64.41327667236328,
|
|
"margin_dpo/margin_std": 86.21026611328125,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7769633507853403,
|
|
"fcm_dpo/beta": 0.010253066197037697,
|
|
"fcm_dpo/delta": 0.0842670351266861,
|
|
"fcm_dpo/margin": 50.67913818359375,
|
|
"fcm_dpo/q_t": 0.3899438977241516,
|
|
"grad_norm": 123.32413482666016,
|
|
"learning_rate": 7.289996455765748e-08,
|
|
"logits/chosen": -0.7954655885696411,
|
|
"logits/rejected": -0.7937295436859131,
|
|
"logps/chosen": -393.655029296875,
|
|
"logps/ref_chosen": -296.6591491699219,
|
|
"logps/ref_rejected": -251.14675903320312,
|
|
"logps/rejected": -398.8217468261719,
|
|
"loss": 4.3229,
|
|
"margin_dpo/margin_mean": 50.679134368896484,
|
|
"margin_dpo/margin_std": 76.93683624267578,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.7790575916230367,
|
|
"fcm_dpo/beta": 0.010081680491566658,
|
|
"fcm_dpo/delta": -0.051262035965919495,
|
|
"fcm_dpo/margin": 63.9815788269043,
|
|
"fcm_dpo/q_t": 0.365226149559021,
|
|
"grad_norm": 83.84464263916016,
|
|
"learning_rate": 7.161255064312283e-08,
|
|
"logits/chosen": -0.7702327370643616,
|
|
"logits/rejected": -0.7675243020057678,
|
|
"logps/chosen": -424.36273193359375,
|
|
"logps/ref_chosen": -331.3714599609375,
|
|
"logps/ref_rejected": -285.56805419921875,
|
|
"logps/rejected": -442.5409240722656,
|
|
"loss": 4.0537,
|
|
"margin_dpo/margin_mean": 63.98158645629883,
|
|
"margin_dpo/margin_std": 84.29653930664062,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.7811518324607329,
|
|
"fcm_dpo/beta": 0.009867929853498936,
|
|
"fcm_dpo/delta": -0.005473626311868429,
|
|
"fcm_dpo/margin": 61.17123031616211,
|
|
"fcm_dpo/q_t": 0.3673017919063568,
|
|
"grad_norm": 91.18738555908203,
|
|
"learning_rate": 7.033470310611945e-08,
|
|
"logits/chosen": -0.8663382530212402,
|
|
"logits/rejected": -0.843439519405365,
|
|
"logps/chosen": -405.7951354980469,
|
|
"logps/ref_chosen": -321.9429931640625,
|
|
"logps/ref_rejected": -271.2288513183594,
|
|
"logps/rejected": -416.25225830078125,
|
|
"loss": 4.0013,
|
|
"margin_dpo/margin_mean": 61.171226501464844,
|
|
"margin_dpo/margin_std": 72.36864471435547,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.7832460732984293,
|
|
"fcm_dpo/beta": 0.010404913686215878,
|
|
"fcm_dpo/delta": 0.08060853183269501,
|
|
"fcm_dpo/margin": 50.20440673828125,
|
|
"fcm_dpo/q_t": 0.39187973737716675,
|
|
"grad_norm": 74.0364990234375,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": -0.8531290292739868,
|
|
"logits/rejected": -0.8525895476341248,
|
|
"logps/chosen": -410.4658203125,
|
|
"logps/ref_chosen": -319.1685485839844,
|
|
"logps/ref_rejected": -284.6263732910156,
|
|
"logps/rejected": -426.1280212402344,
|
|
"loss": 4.3572,
|
|
"margin_dpo/margin_mean": 50.204410552978516,
|
|
"margin_dpo/margin_std": 79.54742431640625,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.7853403141361257,
|
|
"fcm_dpo/beta": 0.010893258266150951,
|
|
"fcm_dpo/delta": 0.029163816943764687,
|
|
"fcm_dpo/margin": 47.64557647705078,
|
|
"fcm_dpo/q_t": 0.39366090297698975,
|
|
"grad_norm": 104.41280364990234,
|
|
"learning_rate": 6.780798075635675e-08,
|
|
"logits/chosen": -0.8502262830734253,
|
|
"logits/rejected": -0.8328761458396912,
|
|
"logps/chosen": -412.74224853515625,
|
|
"logps/ref_chosen": -314.87579345703125,
|
|
"logps/ref_rejected": -259.1965026855469,
|
|
"logps/rejected": -404.70849609375,
|
|
"loss": 4.457,
|
|
"margin_dpo/margin_mean": 47.64557647705078,
|
|
"margin_dpo/margin_std": 81.04498291015625,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.787434554973822,
|
|
"fcm_dpo/beta": 0.010992622934281826,
|
|
"fcm_dpo/delta": -0.005566142499446869,
|
|
"fcm_dpo/margin": 54.897132873535156,
|
|
"fcm_dpo/q_t": 0.3781394064426422,
|
|
"grad_norm": 112.78710174560547,
|
|
"learning_rate": 6.655924144404906e-08,
|
|
"logits/chosen": -0.8241918087005615,
|
|
"logits/rejected": -0.832420825958252,
|
|
"logps/chosen": -385.7311096191406,
|
|
"logps/ref_chosen": -287.6732482910156,
|
|
"logps/ref_rejected": -256.6697082519531,
|
|
"logps/rejected": -409.6247253417969,
|
|
"loss": 4.2815,
|
|
"margin_dpo/margin_mean": 54.897132873535156,
|
|
"margin_dpo/margin_std": 85.11792755126953,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.7895287958115184,
|
|
"fcm_dpo/beta": 0.01128990575671196,
|
|
"fcm_dpo/delta": 0.05416107177734375,
|
|
"fcm_dpo/margin": 38.42709732055664,
|
|
"fcm_dpo/q_t": 0.41005009412765503,
|
|
"grad_norm": 113.04798889160156,
|
|
"learning_rate": 6.532033950290885e-08,
|
|
"logits/chosen": -0.8132824897766113,
|
|
"logits/rejected": -0.8157401084899902,
|
|
"logps/chosen": -409.5943298339844,
|
|
"logps/ref_chosen": -305.261474609375,
|
|
"logps/ref_rejected": -271.8887023925781,
|
|
"logps/rejected": -414.6486511230469,
|
|
"loss": 4.8146,
|
|
"margin_dpo/margin_mean": 38.427101135253906,
|
|
"margin_dpo/margin_std": 82.6573486328125,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.7916230366492146,
|
|
"fcm_dpo/beta": 0.011431505903601646,
|
|
"fcm_dpo/delta": 0.02444280870258808,
|
|
"fcm_dpo/margin": 46.53923416137695,
|
|
"fcm_dpo/q_t": 0.39048752188682556,
|
|
"grad_norm": 110.65091705322266,
|
|
"learning_rate": 6.409134137148736e-08,
|
|
"logits/chosen": -0.8158414363861084,
|
|
"logits/rejected": -0.8029335737228394,
|
|
"logps/chosen": -378.9710388183594,
|
|
"logps/ref_chosen": -281.5295715332031,
|
|
"logps/ref_rejected": -296.980224609375,
|
|
"logps/rejected": -440.9609069824219,
|
|
"loss": 4.3736,
|
|
"margin_dpo/margin_mean": 46.53923416137695,
|
|
"margin_dpo/margin_std": 74.37672424316406,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.793717277486911,
|
|
"fcm_dpo/beta": 0.011517210863530636,
|
|
"fcm_dpo/delta": 0.008636513724923134,
|
|
"fcm_dpo/margin": 51.24773406982422,
|
|
"fcm_dpo/q_t": 0.38045018911361694,
|
|
"grad_norm": 115.9316635131836,
|
|
"learning_rate": 6.28723129572247e-08,
|
|
"logits/chosen": -0.8725168704986572,
|
|
"logits/rejected": -0.8533939123153687,
|
|
"logps/chosen": -355.72607421875,
|
|
"logps/ref_chosen": -265.0807800292969,
|
|
"logps/ref_rejected": -230.58932495117188,
|
|
"logps/rejected": -372.4823303222656,
|
|
"loss": 4.336,
|
|
"margin_dpo/margin_mean": 51.247737884521484,
|
|
"margin_dpo/margin_std": 82.44475555419922,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.7958115183246073,
|
|
"fcm_dpo/beta": 0.011387725360691547,
|
|
"fcm_dpo/delta": -0.07844444364309311,
|
|
"fcm_dpo/margin": 53.415035247802734,
|
|
"fcm_dpo/q_t": 0.37550121545791626,
|
|
"grad_norm": 118.78459167480469,
|
|
"learning_rate": 6.166331963291519e-08,
|
|
"logits/chosen": -0.8518512845039368,
|
|
"logits/rejected": -0.8342669010162354,
|
|
"logps/chosen": -403.8897399902344,
|
|
"logps/ref_chosen": -305.90838623046875,
|
|
"logps/ref_rejected": -286.5906677246094,
|
|
"logps/rejected": -437.987060546875,
|
|
"loss": 4.2305,
|
|
"margin_dpo/margin_mean": 53.4150390625,
|
|
"margin_dpo/margin_std": 78.6050033569336,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.7979057591623037,
|
|
"fcm_dpo/beta": 0.011148151010274887,
|
|
"fcm_dpo/delta": -0.023326825350522995,
|
|
"fcm_dpo/margin": 55.73320388793945,
|
|
"fcm_dpo/q_t": 0.3738594651222229,
|
|
"grad_norm": 100.60095977783203,
|
|
"learning_rate": 6.046442623320145e-08,
|
|
"logits/chosen": -0.8115476369857788,
|
|
"logits/rejected": -0.7750450372695923,
|
|
"logps/chosen": -346.8482666015625,
|
|
"logps/ref_chosen": -252.87066650390625,
|
|
"logps/ref_rejected": -261.1927490234375,
|
|
"logps/rejected": -410.9035339355469,
|
|
"loss": 4.1363,
|
|
"margin_dpo/margin_mean": 55.73320007324219,
|
|
"margin_dpo/margin_std": 79.52117919921875,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"fcm_dpo/beta": 0.010607222095131874,
|
|
"fcm_dpo/delta": -0.08224906027317047,
|
|
"fcm_dpo/margin": 63.71092987060547,
|
|
"fcm_dpo/q_t": 0.35713696479797363,
|
|
"grad_norm": 90.26844024658203,
|
|
"learning_rate": 5.9275697051098275e-08,
|
|
"logits/chosen": -0.8479326963424683,
|
|
"logits/rejected": -0.8441295623779297,
|
|
"logps/chosen": -379.1060791015625,
|
|
"logps/ref_chosen": -289.2114562988281,
|
|
"logps/ref_rejected": -278.45751953125,
|
|
"logps/rejected": -432.06298828125,
|
|
"loss": 3.9126,
|
|
"margin_dpo/margin_mean": 63.71092224121094,
|
|
"margin_dpo/margin_std": 76.458740234375,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.8020942408376963,
|
|
"fcm_dpo/beta": 0.010085361078381538,
|
|
"fcm_dpo/delta": -0.03180404752492905,
|
|
"fcm_dpo/margin": 57.359859466552734,
|
|
"fcm_dpo/q_t": 0.37756213545799255,
|
|
"grad_norm": 110.38035583496094,
|
|
"learning_rate": 5.809719583454414e-08,
|
|
"logits/chosen": -0.8326891660690308,
|
|
"logits/rejected": -0.8139215111732483,
|
|
"logps/chosen": -362.66534423828125,
|
|
"logps/ref_chosen": -273.630859375,
|
|
"logps/ref_rejected": -261.44024658203125,
|
|
"logps/rejected": -407.83465576171875,
|
|
"loss": 4.187,
|
|
"margin_dpo/margin_mean": 57.359867095947266,
|
|
"margin_dpo/margin_std": 80.45777893066406,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.8041884816753927,
|
|
"fcm_dpo/beta": 0.010192757472395897,
|
|
"fcm_dpo/delta": 0.057503946125507355,
|
|
"fcm_dpo/margin": 48.359901428222656,
|
|
"fcm_dpo/q_t": 0.3959079384803772,
|
|
"grad_norm": 82.36161804199219,
|
|
"learning_rate": 5.6928985782982524e-08,
|
|
"logits/chosen": -0.8383417725563049,
|
|
"logits/rejected": -0.8363715410232544,
|
|
"logps/chosen": -369.3538818359375,
|
|
"logps/ref_chosen": -274.5699462890625,
|
|
"logps/ref_rejected": -285.8253479003906,
|
|
"logps/rejected": -428.9691467285156,
|
|
"loss": 4.4124,
|
|
"margin_dpo/margin_mean": 48.35989761352539,
|
|
"margin_dpo/margin_std": 78.35002899169922,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.806282722513089,
|
|
"fcm_dpo/beta": 0.010394207201898098,
|
|
"fcm_dpo/delta": 0.002360312268137932,
|
|
"fcm_dpo/margin": 52.75077819824219,
|
|
"fcm_dpo/q_t": 0.3835112154483795,
|
|
"grad_norm": 88.78260803222656,
|
|
"learning_rate": 5.57711295439732e-08,
|
|
"logits/chosen": -0.7930533289909363,
|
|
"logits/rejected": -0.794459342956543,
|
|
"logps/chosen": -380.3506774902344,
|
|
"logps/ref_chosen": -284.150634765625,
|
|
"logps/ref_rejected": -244.87921142578125,
|
|
"logps/rejected": -393.8300476074219,
|
|
"loss": 4.204,
|
|
"margin_dpo/margin_mean": 52.75077819824219,
|
|
"margin_dpo/margin_std": 73.82457733154297,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.8083769633507853,
|
|
"fcm_dpo/beta": 0.009621590375900269,
|
|
"fcm_dpo/delta": -0.12054447084665298,
|
|
"fcm_dpo/margin": 67.64607238769531,
|
|
"fcm_dpo/q_t": 0.35985732078552246,
|
|
"grad_norm": 86.24301147460938,
|
|
"learning_rate": 5.4623689209832484e-08,
|
|
"logits/chosen": -0.785068929195404,
|
|
"logits/rejected": -0.7856448888778687,
|
|
"logps/chosen": -407.9813537597656,
|
|
"logps/ref_chosen": -320.1762390136719,
|
|
"logps/ref_rejected": -302.05023193359375,
|
|
"logps/rejected": -457.50140380859375,
|
|
"loss": 3.8964,
|
|
"margin_dpo/margin_mean": 67.64607238769531,
|
|
"margin_dpo/margin_std": 75.94105529785156,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.8104712041884817,
|
|
"fcm_dpo/beta": 0.009473450481891632,
|
|
"fcm_dpo/delta": 0.00044431351125240326,
|
|
"fcm_dpo/margin": 57.35693359375,
|
|
"fcm_dpo/q_t": 0.3816065788269043,
|
|
"grad_norm": 81.3988037109375,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": -0.8247092962265015,
|
|
"logits/rejected": -0.8297352194786072,
|
|
"logps/chosen": -366.90478515625,
|
|
"logps/ref_chosen": -272.2801513671875,
|
|
"logps/ref_rejected": -265.1615905761719,
|
|
"logps/rejected": -417.14312744140625,
|
|
"loss": 4.2071,
|
|
"margin_dpo/margin_mean": 57.356929779052734,
|
|
"margin_dpo/margin_std": 78.60884094238281,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.812565445026178,
|
|
"fcm_dpo/beta": 0.009732890874147415,
|
|
"fcm_dpo/delta": 0.09739725291728973,
|
|
"fcm_dpo/margin": 41.571998596191406,
|
|
"fcm_dpo/q_t": 0.41511738300323486,
|
|
"grad_norm": 105.03797149658203,
|
|
"learning_rate": 5.2360301829254745e-08,
|
|
"logits/chosen": -0.8036607503890991,
|
|
"logits/rejected": -0.7966702580451965,
|
|
"logps/chosen": -378.40643310546875,
|
|
"logps/ref_chosen": -272.5313415527344,
|
|
"logps/ref_rejected": -239.55735778808594,
|
|
"logps/rejected": -387.00445556640625,
|
|
"loss": 4.7304,
|
|
"margin_dpo/margin_mean": 41.571998596191406,
|
|
"margin_dpo/margin_std": 82.30770874023438,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.8146596858638744,
|
|
"fcm_dpo/beta": 0.009908687323331833,
|
|
"fcm_dpo/delta": -0.03899161145091057,
|
|
"fcm_dpo/margin": 52.967166900634766,
|
|
"fcm_dpo/q_t": 0.3908618688583374,
|
|
"grad_norm": 86.0737075805664,
|
|
"learning_rate": 5.1244476161413806e-08,
|
|
"logits/chosen": -0.8428322076797485,
|
|
"logits/rejected": -0.8418431878089905,
|
|
"logps/chosen": -380.26837158203125,
|
|
"logps/ref_chosen": -281.0892639160156,
|
|
"logps/ref_rejected": -246.50045776367188,
|
|
"logps/rejected": -398.646728515625,
|
|
"loss": 4.3874,
|
|
"margin_dpo/margin_mean": 52.96717071533203,
|
|
"margin_dpo/margin_std": 83.29864501953125,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.8167539267015707,
|
|
"fcm_dpo/beta": 0.010237889364361763,
|
|
"fcm_dpo/delta": 0.0414692722260952,
|
|
"fcm_dpo/margin": 54.53743362426758,
|
|
"fcm_dpo/q_t": 0.3818510174751282,
|
|
"grad_norm": 83.15040588378906,
|
|
"learning_rate": 5.013930914912476e-08,
|
|
"logits/chosen": -0.852079451084137,
|
|
"logits/rejected": -0.8583500981330872,
|
|
"logps/chosen": -382.2542419433594,
|
|
"logps/ref_chosen": -283.98748779296875,
|
|
"logps/ref_rejected": -283.465087890625,
|
|
"logps/rejected": -436.26922607421875,
|
|
"loss": 4.2404,
|
|
"margin_dpo/margin_mean": 54.53743362426758,
|
|
"margin_dpo/margin_std": 78.8434066772461,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.818848167539267,
|
|
"fcm_dpo/beta": 0.009870692156255245,
|
|
"fcm_dpo/delta": -0.02596093714237213,
|
|
"fcm_dpo/margin": 57.44294738769531,
|
|
"fcm_dpo/q_t": 0.38027477264404297,
|
|
"grad_norm": 101.38391876220703,
|
|
"learning_rate": 4.904486005914027e-08,
|
|
"logits/chosen": -0.7972782850265503,
|
|
"logits/rejected": -0.7920839190483093,
|
|
"logps/chosen": -389.5296325683594,
|
|
"logps/ref_chosen": -283.86138916015625,
|
|
"logps/ref_rejected": -263.5093688964844,
|
|
"logps/rejected": -426.6205749511719,
|
|
"loss": 4.1996,
|
|
"margin_dpo/margin_mean": 57.44294357299805,
|
|
"margin_dpo/margin_std": 80.77877807617188,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.8209424083769633,
|
|
"fcm_dpo/beta": 0.009380877017974854,
|
|
"fcm_dpo/delta": -0.06458516418933868,
|
|
"fcm_dpo/margin": 69.83071899414062,
|
|
"fcm_dpo/q_t": 0.3584578335285187,
|
|
"grad_norm": 85.31986236572266,
|
|
"learning_rate": 4.796118758344353e-08,
|
|
"logits/chosen": -0.7884517312049866,
|
|
"logits/rejected": -0.8135141730308533,
|
|
"logps/chosen": -403.9764099121094,
|
|
"logps/ref_chosen": -310.070068359375,
|
|
"logps/ref_rejected": -252.89817810058594,
|
|
"logps/rejected": -416.63519287109375,
|
|
"loss": 3.8778,
|
|
"margin_dpo/margin_mean": 69.83071899414062,
|
|
"margin_dpo/margin_std": 76.77499389648438,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.8230366492146597,
|
|
"fcm_dpo/beta": 0.010007185861468315,
|
|
"fcm_dpo/delta": 0.061848465353250504,
|
|
"fcm_dpo/margin": 53.800540924072266,
|
|
"fcm_dpo/q_t": 0.3861052393913269,
|
|
"grad_norm": 114.60466766357422,
|
|
"learning_rate": 4.688834983610082e-08,
|
|
"logits/chosen": -0.8373547792434692,
|
|
"logits/rejected": -0.8279107213020325,
|
|
"logps/chosen": -378.2579040527344,
|
|
"logps/ref_chosen": -286.7156677246094,
|
|
"logps/ref_rejected": -230.00357055664062,
|
|
"logps/rejected": -375.34637451171875,
|
|
"loss": 4.2635,
|
|
"margin_dpo/margin_mean": 53.800537109375,
|
|
"margin_dpo/margin_std": 79.83255767822266,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.8251308900523561,
|
|
"fcm_dpo/beta": 0.010027028620243073,
|
|
"fcm_dpo/delta": 0.04232503101229668,
|
|
"fcm_dpo/margin": 49.44554901123047,
|
|
"fcm_dpo/q_t": 0.39943477511405945,
|
|
"grad_norm": 78.94566345214844,
|
|
"learning_rate": 4.582640435014459e-08,
|
|
"logits/chosen": -0.8648529052734375,
|
|
"logits/rejected": -0.8650112152099609,
|
|
"logps/chosen": -419.1053161621094,
|
|
"logps/ref_chosen": -325.9934387207031,
|
|
"logps/ref_rejected": -317.42706298828125,
|
|
"logps/rejected": -459.9844970703125,
|
|
"loss": 4.4582,
|
|
"margin_dpo/margin_mean": 49.4455451965332,
|
|
"margin_dpo/margin_std": 82.50776672363281,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.8272251308900523,
|
|
"fcm_dpo/beta": 0.010306437499821186,
|
|
"fcm_dpo/delta": -0.029076773673295975,
|
|
"fcm_dpo/margin": 60.80992889404297,
|
|
"fcm_dpo/q_t": 0.37129712104797363,
|
|
"grad_norm": 75.36946868896484,
|
|
"learning_rate": 4.477540807448832e-08,
|
|
"logits/chosen": -0.8019086122512817,
|
|
"logits/rejected": -0.8114342093467712,
|
|
"logps/chosen": -360.0174560546875,
|
|
"logps/ref_chosen": -268.90081787109375,
|
|
"logps/ref_rejected": -272.85809326171875,
|
|
"logps/rejected": -424.7846984863281,
|
|
"loss": 4.0261,
|
|
"margin_dpo/margin_mean": 60.8099250793457,
|
|
"margin_dpo/margin_std": 79.55804443359375,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.8293193717277487,
|
|
"fcm_dpo/beta": 0.010009893216192722,
|
|
"fcm_dpo/delta": -0.016297191381454468,
|
|
"fcm_dpo/margin": 55.63848114013672,
|
|
"fcm_dpo/q_t": 0.3808242976665497,
|
|
"grad_norm": 90.71900177001953,
|
|
"learning_rate": 4.373541737087263e-08,
|
|
"logits/chosen": -0.8296109437942505,
|
|
"logits/rejected": -0.8163138628005981,
|
|
"logps/chosen": -384.39410400390625,
|
|
"logps/ref_chosen": -291.19830322265625,
|
|
"logps/ref_rejected": -253.2803955078125,
|
|
"logps/rejected": -402.11468505859375,
|
|
"loss": 4.1991,
|
|
"margin_dpo/margin_mean": 55.63848114013672,
|
|
"margin_dpo/margin_std": 76.70056915283203,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.831413612565445,
|
|
"fcm_dpo/beta": 0.009893465787172318,
|
|
"fcm_dpo/delta": -0.023841019719839096,
|
|
"fcm_dpo/margin": 49.24290466308594,
|
|
"fcm_dpo/q_t": 0.39738088846206665,
|
|
"grad_norm": 90.27240753173828,
|
|
"learning_rate": 4.270648801084295e-08,
|
|
"logits/chosen": -0.8341606259346008,
|
|
"logits/rejected": -0.8116894960403442,
|
|
"logps/chosen": -400.8775939941406,
|
|
"logps/ref_chosen": -309.8224182128906,
|
|
"logps/ref_rejected": -291.9057922363281,
|
|
"logps/rejected": -432.20391845703125,
|
|
"loss": 4.5174,
|
|
"margin_dpo/margin_mean": 49.24290466308594,
|
|
"margin_dpo/margin_std": 83.20286560058594,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.8335078534031414,
|
|
"fcm_dpo/beta": 0.009832684881985188,
|
|
"fcm_dpo/delta": 0.07155661284923553,
|
|
"fcm_dpo/margin": 46.61724853515625,
|
|
"fcm_dpo/q_t": 0.40298062562942505,
|
|
"grad_norm": 107.44989776611328,
|
|
"learning_rate": 4.168867517275806e-08,
|
|
"logits/chosen": -0.7414498925209045,
|
|
"logits/rejected": -0.7821962833404541,
|
|
"logps/chosen": -398.432861328125,
|
|
"logps/ref_chosen": -297.8135070800781,
|
|
"logps/ref_rejected": -270.5025634765625,
|
|
"logps/rejected": -417.7391662597656,
|
|
"loss": 4.726,
|
|
"margin_dpo/margin_mean": 46.61724853515625,
|
|
"margin_dpo/margin_std": 91.47262573242188,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.8356020942408376,
|
|
"fcm_dpo/beta": 0.010510783642530441,
|
|
"fcm_dpo/delta": 0.05873828008770943,
|
|
"fcm_dpo/margin": 51.76239776611328,
|
|
"fcm_dpo/q_t": 0.38723278045654297,
|
|
"grad_norm": 91.62894439697266,
|
|
"learning_rate": 4.0682033438831584e-08,
|
|
"logits/chosen": -0.8432673811912537,
|
|
"logits/rejected": -0.80589359998703,
|
|
"logps/chosen": -392.64324951171875,
|
|
"logps/ref_chosen": -292.8467712402344,
|
|
"logps/ref_rejected": -268.3638916015625,
|
|
"logps/rejected": -419.9228210449219,
|
|
"loss": 4.3291,
|
|
"margin_dpo/margin_mean": 51.76239776611328,
|
|
"margin_dpo/margin_std": 81.01602172851562,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"fcm_dpo/beta": 0.010746605694293976,
|
|
"fcm_dpo/delta": 0.025092536583542824,
|
|
"fcm_dpo/margin": 53.3397331237793,
|
|
"fcm_dpo/q_t": 0.3807898461818695,
|
|
"grad_norm": 134.78067016601562,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": -0.8801178932189941,
|
|
"logits/rejected": -0.8791629672050476,
|
|
"logps/chosen": -358.38555908203125,
|
|
"logps/ref_chosen": -263.6763916015625,
|
|
"logps/ref_rejected": -258.67266845703125,
|
|
"logps/rejected": -406.7215881347656,
|
|
"loss": 4.3167,
|
|
"margin_dpo/margin_mean": 53.3397331237793,
|
|
"margin_dpo/margin_std": 79.3423080444336,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"eval_fcm_dpo/beta": 0.011007222346961498,
|
|
"eval_logits/chosen": -0.840668797492981,
|
|
"eval_logits/rejected": -0.8345889449119568,
|
|
"eval_logps/chosen": -383.9891357421875,
|
|
"eval_logps/ref_chosen": -287.8267517089844,
|
|
"eval_logps/ref_rejected": -266.9313659667969,
|
|
"eval_logps/rejected": -417.3312072753906,
|
|
"eval_loss": 0.5351805090904236,
|
|
"eval_margin_dpo/margin_mean": 54.237510681152344,
|
|
"eval_margin_dpo/margin_std": 83.07901763916016,
|
|
"eval_runtime": 81.6128,
|
|
"eval_samples_per_second": 24.506,
|
|
"eval_steps_per_second": 1.532,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.8397905759162304,
|
|
"fcm_dpo/beta": 0.01083466224372387,
|
|
"fcm_dpo/delta": -0.03301737830042839,
|
|
"fcm_dpo/margin": 58.17015075683594,
|
|
"fcm_dpo/q_t": 0.3694632053375244,
|
|
"grad_norm": 130.7917022705078,
|
|
"learning_rate": 3.8702478614051345e-08,
|
|
"logits/chosen": -0.8163310289382935,
|
|
"logits/rejected": -0.8166416883468628,
|
|
"logps/chosen": -411.48193359375,
|
|
"logps/ref_chosen": -318.2853088378906,
|
|
"logps/ref_rejected": -293.75225830078125,
|
|
"logps/rejected": -445.11895751953125,
|
|
"loss": 4.0959,
|
|
"margin_dpo/margin_mean": 58.1701545715332,
|
|
"margin_dpo/margin_std": 81.16681671142578,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.8418848167539267,
|
|
"fcm_dpo/beta": 0.010784516111016273,
|
|
"fcm_dpo/delta": 0.002740806434303522,
|
|
"fcm_dpo/margin": 55.34971237182617,
|
|
"fcm_dpo/q_t": 0.37727218866348267,
|
|
"grad_norm": 109.63217163085938,
|
|
"learning_rate": 3.772967168071517e-08,
|
|
"logits/chosen": -0.8767110705375671,
|
|
"logits/rejected": -0.8513585329055786,
|
|
"logps/chosen": -398.0880126953125,
|
|
"logps/ref_chosen": -309.4278564453125,
|
|
"logps/ref_rejected": -282.0279846191406,
|
|
"logps/rejected": -426.037841796875,
|
|
"loss": 4.1861,
|
|
"margin_dpo/margin_mean": 55.34970474243164,
|
|
"margin_dpo/margin_std": 82.14326477050781,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.8439790575916231,
|
|
"fcm_dpo/beta": 0.010111565701663494,
|
|
"fcm_dpo/delta": -0.1565774530172348,
|
|
"fcm_dpo/margin": 73.72638702392578,
|
|
"fcm_dpo/q_t": 0.34308868646621704,
|
|
"grad_norm": 77.10204315185547,
|
|
"learning_rate": 3.676824816087978e-08,
|
|
"logits/chosen": -0.8601398468017578,
|
|
"logits/rejected": -0.8417026996612549,
|
|
"logps/chosen": -399.95440673828125,
|
|
"logps/ref_chosen": -309.0284729003906,
|
|
"logps/ref_rejected": -272.9622497558594,
|
|
"logps/rejected": -437.61456298828125,
|
|
"loss": 3.6889,
|
|
"margin_dpo/margin_mean": 73.72638702392578,
|
|
"margin_dpo/margin_std": 79.83676147460938,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.8460732984293193,
|
|
"fcm_dpo/beta": 0.009832248091697693,
|
|
"fcm_dpo/delta": 0.06779766827821732,
|
|
"fcm_dpo/margin": 54.37714385986328,
|
|
"fcm_dpo/q_t": 0.3864296078681946,
|
|
"grad_norm": 93.5862045288086,
|
|
"learning_rate": 3.581825961277074e-08,
|
|
"logits/chosen": -0.88753741979599,
|
|
"logits/rejected": -0.8670026063919067,
|
|
"logps/chosen": -398.1080627441406,
|
|
"logps/ref_chosen": -297.2837219238281,
|
|
"logps/ref_rejected": -256.99041748046875,
|
|
"logps/rejected": -412.1919250488281,
|
|
"loss": 4.3263,
|
|
"margin_dpo/margin_mean": 54.377140045166016,
|
|
"margin_dpo/margin_std": 83.19239044189453,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.8481675392670157,
|
|
"fcm_dpo/beta": 0.010070566087961197,
|
|
"fcm_dpo/delta": -0.0018516681157052517,
|
|
"fcm_dpo/margin": 59.72123718261719,
|
|
"fcm_dpo/q_t": 0.37372201681137085,
|
|
"grad_norm": 72.33039093017578,
|
|
"learning_rate": 3.487975698139084e-08,
|
|
"logits/chosen": -0.7841629385948181,
|
|
"logits/rejected": -0.7917266488075256,
|
|
"logps/chosen": -349.7501220703125,
|
|
"logps/ref_chosen": -257.96533203125,
|
|
"logps/ref_rejected": -255.811279296875,
|
|
"logps/rejected": -407.3173522949219,
|
|
"loss": 4.0942,
|
|
"margin_dpo/margin_mean": 59.72124099731445,
|
|
"margin_dpo/margin_std": 81.7040786743164,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.8502617801047121,
|
|
"fcm_dpo/beta": 0.010788071900606155,
|
|
"fcm_dpo/delta": 0.11286494135856628,
|
|
"fcm_dpo/margin": 45.265289306640625,
|
|
"fcm_dpo/q_t": 0.3963577449321747,
|
|
"grad_norm": 122.36735534667969,
|
|
"learning_rate": 3.3952790595787986e-08,
|
|
"logits/chosen": -0.8172638416290283,
|
|
"logits/rejected": -0.7938133478164673,
|
|
"logps/chosen": -388.5911865234375,
|
|
"logps/ref_chosen": -285.1810607910156,
|
|
"logps/ref_rejected": -264.41351318359375,
|
|
"logps/rejected": -413.0889892578125,
|
|
"loss": 4.4481,
|
|
"margin_dpo/margin_mean": 45.265289306640625,
|
|
"margin_dpo/margin_std": 74.5055160522461,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.8523560209424084,
|
|
"fcm_dpo/beta": 0.010678643360733986,
|
|
"fcm_dpo/delta": -0.038571376353502274,
|
|
"fcm_dpo/margin": 59.46702575683594,
|
|
"fcm_dpo/q_t": 0.3713955879211426,
|
|
"grad_norm": 115.4225082397461,
|
|
"learning_rate": 3.303741016635614e-08,
|
|
"logits/chosen": -0.8237298130989075,
|
|
"logits/rejected": -0.8525005578994751,
|
|
"logps/chosen": -370.30657958984375,
|
|
"logps/ref_chosen": -265.23809814453125,
|
|
"logps/ref_rejected": -219.0631561279297,
|
|
"logps/rejected": -383.59869384765625,
|
|
"loss": 4.1242,
|
|
"margin_dpo/margin_mean": 59.46702575683594,
|
|
"margin_dpo/margin_std": 84.35752868652344,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.8544502617801047,
|
|
"fcm_dpo/beta": 0.010710010305047035,
|
|
"fcm_dpo/delta": -0.021216176450252533,
|
|
"fcm_dpo/margin": 57.599510192871094,
|
|
"fcm_dpo/q_t": 0.3745374381542206,
|
|
"grad_norm": 82.85662841796875,
|
|
"learning_rate": 3.2133664782169944e-08,
|
|
"logits/chosen": -0.853847861289978,
|
|
"logits/rejected": -0.8488500118255615,
|
|
"logps/chosen": -388.76116943359375,
|
|
"logps/ref_chosen": -296.9726257324219,
|
|
"logps/ref_rejected": -295.4786376953125,
|
|
"logps/rejected": -444.86669921875,
|
|
"loss": 4.1418,
|
|
"margin_dpo/margin_mean": 57.599510192871094,
|
|
"margin_dpo/margin_std": 79.41829681396484,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.856544502617801,
|
|
"fcm_dpo/beta": 0.010306498035788536,
|
|
"fcm_dpo/delta": -0.026171572506427765,
|
|
"fcm_dpo/margin": 55.8836669921875,
|
|
"fcm_dpo/q_t": 0.38002270460128784,
|
|
"grad_norm": 89.53182220458984,
|
|
"learning_rate": 3.12416029083514e-08,
|
|
"logits/chosen": -0.8308712244033813,
|
|
"logits/rejected": -0.8197529315948486,
|
|
"logps/chosen": -387.86822509765625,
|
|
"logps/ref_chosen": -287.37933349609375,
|
|
"logps/ref_rejected": -275.80291748046875,
|
|
"logps/rejected": -432.1754150390625,
|
|
"loss": 4.3765,
|
|
"margin_dpo/margin_mean": 55.8836669921875,
|
|
"margin_dpo/margin_std": 91.40107727050781,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.8586387434554974,
|
|
"fcm_dpo/beta": 0.010539250448346138,
|
|
"fcm_dpo/delta": 0.05248191952705383,
|
|
"fcm_dpo/margin": 52.03704833984375,
|
|
"fcm_dpo/q_t": 0.38655808568000793,
|
|
"grad_norm": 104.86951446533203,
|
|
"learning_rate": 3.036127238347164e-08,
|
|
"logits/chosen": -0.8444973826408386,
|
|
"logits/rejected": -0.8523566722869873,
|
|
"logps/chosen": -379.1742248535156,
|
|
"logps/ref_chosen": -281.7801818847656,
|
|
"logps/ref_rejected": -266.7550354003906,
|
|
"logps/rejected": -416.18609619140625,
|
|
"loss": 4.3978,
|
|
"margin_dpo/margin_mean": 52.03704833984375,
|
|
"margin_dpo/margin_std": 85.04647827148438,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8607329842931937,
|
|
"fcm_dpo/beta": 0.010161810554564,
|
|
"fcm_dpo/delta": -0.07766594737768173,
|
|
"fcm_dpo/margin": 65.88399505615234,
|
|
"fcm_dpo/q_t": 0.35847824811935425,
|
|
"grad_norm": 82.89816284179688,
|
|
"learning_rate": 2.9492720416985e-08,
|
|
"logits/chosen": -0.8404784798622131,
|
|
"logits/rejected": -0.8063231706619263,
|
|
"logps/chosen": -373.0326843261719,
|
|
"logps/ref_chosen": -281.5872497558594,
|
|
"logps/ref_rejected": -254.78916931152344,
|
|
"logps/rejected": -412.1186218261719,
|
|
"loss": 3.8683,
|
|
"margin_dpo/margin_mean": 65.88399505615234,
|
|
"margin_dpo/margin_std": 77.45508575439453,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.86282722513089,
|
|
"fcm_dpo/beta": 0.009724740870296955,
|
|
"fcm_dpo/delta": -0.014092553406953812,
|
|
"fcm_dpo/margin": 49.745506286621094,
|
|
"fcm_dpo/q_t": 0.39898359775543213,
|
|
"grad_norm": 88.29672241210938,
|
|
"learning_rate": 2.863599358669755e-08,
|
|
"logits/chosen": -0.8222418427467346,
|
|
"logits/rejected": -0.8297065496444702,
|
|
"logps/chosen": -382.5504455566406,
|
|
"logps/ref_chosen": -276.5341796875,
|
|
"logps/ref_rejected": -273.8751220703125,
|
|
"logps/rejected": -429.636962890625,
|
|
"loss": 4.4639,
|
|
"margin_dpo/margin_mean": 49.745506286621094,
|
|
"margin_dpo/margin_std": 82.65047454833984,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.8649214659685864,
|
|
"fcm_dpo/beta": 0.01038271188735962,
|
|
"fcm_dpo/delta": 0.1108207255601883,
|
|
"fcm_dpo/margin": 47.65922927856445,
|
|
"fcm_dpo/q_t": 0.3976641297340393,
|
|
"grad_norm": 122.68524169921875,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": -0.8296762704849243,
|
|
"logits/rejected": -0.8280857801437378,
|
|
"logps/chosen": -370.8761901855469,
|
|
"logps/ref_chosen": -271.2745666503906,
|
|
"logps/ref_rejected": -270.16912841796875,
|
|
"logps/rejected": -417.4300231933594,
|
|
"loss": 4.4352,
|
|
"margin_dpo/margin_mean": 47.659236907958984,
|
|
"margin_dpo/margin_std": 79.33601379394531,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.8670157068062827,
|
|
"fcm_dpo/beta": 0.010576970875263214,
|
|
"fcm_dpo/delta": -0.07696720957756042,
|
|
"fcm_dpo/margin": 63.35627746582031,
|
|
"fcm_dpo/q_t": 0.3609466850757599,
|
|
"grad_norm": 91.79287719726562,
|
|
"learning_rate": 2.6958198472749717e-08,
|
|
"logits/chosen": -0.8634947538375854,
|
|
"logits/rejected": -0.8709216117858887,
|
|
"logps/chosen": -394.9339904785156,
|
|
"logps/ref_chosen": -297.11505126953125,
|
|
"logps/ref_rejected": -271.7034606933594,
|
|
"logps/rejected": -432.8786926269531,
|
|
"loss": 3.9873,
|
|
"margin_dpo/margin_mean": 63.35627746582031,
|
|
"margin_dpo/margin_std": 80.128173828125,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.8691099476439791,
|
|
"fcm_dpo/beta": 0.010473713278770447,
|
|
"fcm_dpo/delta": 0.03152439743280411,
|
|
"fcm_dpo/margin": 54.287376403808594,
|
|
"fcm_dpo/q_t": 0.37784260511398315,
|
|
"grad_norm": 88.63931274414062,
|
|
"learning_rate": 2.613722016414943e-08,
|
|
"logits/chosen": -0.8671582937240601,
|
|
"logits/rejected": -0.8537446856498718,
|
|
"logps/chosen": -394.3064880371094,
|
|
"logps/ref_chosen": -297.6926574707031,
|
|
"logps/ref_rejected": -279.0503234863281,
|
|
"logps/rejected": -429.9515380859375,
|
|
"loss": 4.1391,
|
|
"margin_dpo/margin_mean": 54.287376403808594,
|
|
"margin_dpo/margin_std": 73.61995697021484,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.8712041884816754,
|
|
"fcm_dpo/beta": 0.010003462433815002,
|
|
"fcm_dpo/delta": -0.06168883666396141,
|
|
"fcm_dpo/margin": 65.28002166748047,
|
|
"fcm_dpo/q_t": 0.3622613847255707,
|
|
"grad_norm": 75.9556655883789,
|
|
"learning_rate": 2.5328246937043525e-08,
|
|
"logits/chosen": -0.8746985197067261,
|
|
"logits/rejected": -0.8851325511932373,
|
|
"logps/chosen": -402.248046875,
|
|
"logps/ref_chosen": -311.8255615234375,
|
|
"logps/ref_rejected": -268.6170654296875,
|
|
"logps/rejected": -424.3195495605469,
|
|
"loss": 4.0131,
|
|
"margin_dpo/margin_mean": 65.28001403808594,
|
|
"margin_dpo/margin_std": 82.11227416992188,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.8732984293193717,
|
|
"fcm_dpo/beta": 0.009751483798027039,
|
|
"fcm_dpo/delta": -0.026388350874185562,
|
|
"fcm_dpo/margin": 57.194740295410156,
|
|
"fcm_dpo/q_t": 0.38324517011642456,
|
|
"grad_norm": 92.91184997558594,
|
|
"learning_rate": 2.4531322174210973e-08,
|
|
"logits/chosen": -0.8104668259620667,
|
|
"logits/rejected": -0.8152583837509155,
|
|
"logps/chosen": -410.4869384765625,
|
|
"logps/ref_chosen": -310.43682861328125,
|
|
"logps/ref_rejected": -277.15283203125,
|
|
"logps/rejected": -434.3976745605469,
|
|
"loss": 4.2952,
|
|
"margin_dpo/margin_mean": 57.194740295410156,
|
|
"margin_dpo/margin_std": 84.70370483398438,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.875392670157068,
|
|
"fcm_dpo/beta": 0.009737811051309109,
|
|
"fcm_dpo/delta": -0.03723875805735588,
|
|
"fcm_dpo/margin": 54.46815490722656,
|
|
"fcm_dpo/q_t": 0.38777798414230347,
|
|
"grad_norm": 96.64009094238281,
|
|
"learning_rate": 2.3746488612308295e-08,
|
|
"logits/chosen": -0.8096103072166443,
|
|
"logits/rejected": -0.7874211668968201,
|
|
"logps/chosen": -387.23211669921875,
|
|
"logps/ref_chosen": -278.49591064453125,
|
|
"logps/ref_rejected": -276.56671142578125,
|
|
"logps/rejected": -439.77105712890625,
|
|
"loss": 4.3411,
|
|
"margin_dpo/margin_mean": 54.46815490722656,
|
|
"margin_dpo/margin_std": 80.28997802734375,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.8774869109947644,
|
|
"fcm_dpo/beta": 0.00948832742869854,
|
|
"fcm_dpo/delta": 0.0006105322390794754,
|
|
"fcm_dpo/margin": 62.94663619995117,
|
|
"fcm_dpo/q_t": 0.3723425269126892,
|
|
"grad_norm": 94.83244323730469,
|
|
"learning_rate": 2.297378833957761e-08,
|
|
"logits/chosen": -0.8623223304748535,
|
|
"logits/rejected": -0.841428816318512,
|
|
"logps/chosen": -406.687744140625,
|
|
"logps/ref_chosen": -298.9002380371094,
|
|
"logps/ref_rejected": -246.1540985107422,
|
|
"logps/rejected": -416.88824462890625,
|
|
"loss": 4.1616,
|
|
"margin_dpo/margin_mean": 62.946632385253906,
|
|
"margin_dpo/margin_std": 87.8830337524414,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.8795811518324608,
|
|
"fcm_dpo/beta": 0.009303269907832146,
|
|
"fcm_dpo/delta": -0.02820839360356331,
|
|
"fcm_dpo/margin": 67.16934967041016,
|
|
"fcm_dpo/q_t": 0.3698027729988098,
|
|
"grad_norm": 119.92971801757812,
|
|
"learning_rate": 2.2213262793589482e-08,
|
|
"logits/chosen": -0.8005006909370422,
|
|
"logits/rejected": -0.7742573618888855,
|
|
"logps/chosen": -369.0135498046875,
|
|
"logps/ref_chosen": -264.5608825683594,
|
|
"logps/ref_rejected": -245.67031860351562,
|
|
"logps/rejected": -417.29229736328125,
|
|
"loss": 4.1315,
|
|
"margin_dpo/margin_mean": 67.16934967041016,
|
|
"margin_dpo/margin_std": 95.13330078125,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.881675392670157,
|
|
"fcm_dpo/beta": 0.009417861700057983,
|
|
"fcm_dpo/delta": 0.0553901270031929,
|
|
"fcm_dpo/margin": 58.053810119628906,
|
|
"fcm_dpo/q_t": 0.3806764483451843,
|
|
"grad_norm": 95.21514129638672,
|
|
"learning_rate": 2.1464952759020856e-08,
|
|
"logits/chosen": -0.87691730260849,
|
|
"logits/rejected": -0.8619418144226074,
|
|
"logps/chosen": -393.3926086425781,
|
|
"logps/ref_chosen": -297.70501708984375,
|
|
"logps/ref_rejected": -243.74771118164062,
|
|
"logps/rejected": -397.4891052246094,
|
|
"loss": 4.1826,
|
|
"margin_dpo/margin_mean": 58.053810119628906,
|
|
"margin_dpo/margin_std": 79.26122283935547,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.8837696335078534,
|
|
"fcm_dpo/beta": 0.009782630950212479,
|
|
"fcm_dpo/delta": 0.011894671246409416,
|
|
"fcm_dpo/margin": 60.101036071777344,
|
|
"fcm_dpo/q_t": 0.378351628780365,
|
|
"grad_norm": 73.66893768310547,
|
|
"learning_rate": 2.07288983654679e-08,
|
|
"logits/chosen": -0.7312873601913452,
|
|
"logits/rejected": -0.7808342576026917,
|
|
"logps/chosen": -388.74200439453125,
|
|
"logps/ref_chosen": -288.3587646484375,
|
|
"logps/ref_rejected": -256.4377746582031,
|
|
"logps/rejected": -416.9220886230469,
|
|
"loss": 4.2494,
|
|
"margin_dpo/margin_mean": 60.101036071777344,
|
|
"margin_dpo/margin_std": 90.90327453613281,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.8858638743455497,
|
|
"fcm_dpo/beta": 0.009724876843392849,
|
|
"fcm_dpo/delta": -0.009197833016514778,
|
|
"fcm_dpo/margin": 62.44007873535156,
|
|
"fcm_dpo/q_t": 0.3724360466003418,
|
|
"grad_norm": 106.5963134765625,
|
|
"learning_rate": 2.0005139085293942e-08,
|
|
"logits/chosen": -0.8626726269721985,
|
|
"logits/rejected": -0.847291886806488,
|
|
"logps/chosen": -398.4643859863281,
|
|
"logps/ref_chosen": -296.00701904296875,
|
|
"logps/ref_rejected": -261.3480529785156,
|
|
"logps/rejected": -426.2454833984375,
|
|
"loss": 4.104,
|
|
"margin_dpo/margin_mean": 62.44007873535156,
|
|
"margin_dpo/margin_std": 85.05935668945312,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.8879581151832461,
|
|
"fcm_dpo/beta": 0.00962867308408022,
|
|
"fcm_dpo/delta": -0.012859391048550606,
|
|
"fcm_dpo/margin": 63.36909103393555,
|
|
"fcm_dpo/q_t": 0.36758118867874146,
|
|
"grad_norm": 92.60458374023438,
|
|
"learning_rate": 1.9293713731512673e-08,
|
|
"logits/chosen": -0.8467947840690613,
|
|
"logits/rejected": -0.8503403663635254,
|
|
"logps/chosen": -404.1105041503906,
|
|
"logps/ref_chosen": -309.421875,
|
|
"logps/ref_rejected": -249.14886474609375,
|
|
"logps/rejected": -407.2065734863281,
|
|
"loss": 3.9839,
|
|
"margin_dpo/margin_mean": 63.36909103393555,
|
|
"margin_dpo/margin_std": 75.94871520996094,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.8900523560209425,
|
|
"fcm_dpo/beta": 0.009777205064892769,
|
|
"fcm_dpo/delta": 0.03504558652639389,
|
|
"fcm_dpo/margin": 50.999881744384766,
|
|
"fcm_dpo/q_t": 0.3966800570487976,
|
|
"grad_norm": 110.89618682861328,
|
|
"learning_rate": 1.8594660455706763e-08,
|
|
"logits/chosen": -0.82796710729599,
|
|
"logits/rejected": -0.8337902426719666,
|
|
"logps/chosen": -382.77001953125,
|
|
"logps/ref_chosen": -280.50909423828125,
|
|
"logps/ref_rejected": -276.8252258300781,
|
|
"logps/rejected": -430.08599853515625,
|
|
"loss": 4.5049,
|
|
"margin_dpo/margin_mean": 50.99988555908203,
|
|
"margin_dpo/margin_std": 87.4363784790039,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.8921465968586387,
|
|
"fcm_dpo/beta": 0.009954184293746948,
|
|
"fcm_dpo/delta": 0.007840080186724663,
|
|
"fcm_dpo/margin": 59.37195587158203,
|
|
"fcm_dpo/q_t": 0.37472671270370483,
|
|
"grad_norm": 97.03230285644531,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": -0.852469801902771,
|
|
"logits/rejected": -0.839727520942688,
|
|
"logps/chosen": -397.4804992675781,
|
|
"logps/ref_chosen": -292.78521728515625,
|
|
"logps/ref_rejected": -255.62698364257812,
|
|
"logps/rejected": -419.69415283203125,
|
|
"loss": 4.1268,
|
|
"margin_dpo/margin_mean": 59.37195587158203,
|
|
"margin_dpo/margin_std": 79.54149627685547,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.8942408376963351,
|
|
"fcm_dpo/beta": 0.009696273133158684,
|
|
"fcm_dpo/delta": -0.15165768563747406,
|
|
"fcm_dpo/margin": 76.89569091796875,
|
|
"fcm_dpo/q_t": 0.34561559557914734,
|
|
"grad_norm": 90.61172485351562,
|
|
"learning_rate": 1.7233819424956247e-08,
|
|
"logits/chosen": -0.8438408374786377,
|
|
"logits/rejected": -0.8143002390861511,
|
|
"logps/chosen": -388.6639099121094,
|
|
"logps/ref_chosen": -288.7687072753906,
|
|
"logps/ref_rejected": -268.4986572265625,
|
|
"logps/rejected": -445.2895812988281,
|
|
"loss": 3.8296,
|
|
"margin_dpo/margin_mean": 76.89569091796875,
|
|
"margin_dpo/margin_std": 89.48991394042969,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.8963350785340314,
|
|
"fcm_dpo/beta": 0.008838072419166565,
|
|
"fcm_dpo/delta": -0.012139791622757912,
|
|
"fcm_dpo/margin": 69.09696197509766,
|
|
"fcm_dpo/q_t": 0.36812734603881836,
|
|
"grad_norm": 81.80809783935547,
|
|
"learning_rate": 1.6572104647786245e-08,
|
|
"logits/chosen": -0.79007887840271,
|
|
"logits/rejected": -0.8174630999565125,
|
|
"logps/chosen": -407.681640625,
|
|
"logps/ref_chosen": -295.5209655761719,
|
|
"logps/ref_rejected": -275.71026611328125,
|
|
"logps/rejected": -456.96795654296875,
|
|
"loss": 4.0523,
|
|
"margin_dpo/margin_mean": 69.09696197509766,
|
|
"margin_dpo/margin_std": 90.01223754882812,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.8984293193717278,
|
|
"fcm_dpo/beta": 0.00869191437959671,
|
|
"fcm_dpo/delta": -0.019767988473176956,
|
|
"fcm_dpo/margin": 62.65784454345703,
|
|
"fcm_dpo/q_t": 0.3779388666152954,
|
|
"grad_norm": 152.89610290527344,
|
|
"learning_rate": 1.5922907900227017e-08,
|
|
"logits/chosen": -0.8012307286262512,
|
|
"logits/rejected": -0.8117492198944092,
|
|
"logps/chosen": -377.2466735839844,
|
|
"logps/ref_chosen": -274.392333984375,
|
|
"logps/ref_rejected": -258.574462890625,
|
|
"logps/rejected": -424.086669921875,
|
|
"loss": 4.3069,
|
|
"margin_dpo/margin_mean": 62.65784454345703,
|
|
"margin_dpo/margin_std": 93.02066802978516,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.900523560209424,
|
|
"fcm_dpo/beta": 0.008904652670025826,
|
|
"fcm_dpo/delta": 0.03332711011171341,
|
|
"fcm_dpo/margin": 52.0653076171875,
|
|
"fcm_dpo/q_t": 0.400870144367218,
|
|
"grad_norm": 87.17733001708984,
|
|
"learning_rate": 1.5286263996730026e-08,
|
|
"logits/chosen": -0.8745531439781189,
|
|
"logits/rejected": -0.8473076224327087,
|
|
"logps/chosen": -389.5596923828125,
|
|
"logps/ref_chosen": -288.7391357421875,
|
|
"logps/ref_rejected": -268.6106262207031,
|
|
"logps/rejected": -421.49652099609375,
|
|
"loss": 4.4476,
|
|
"margin_dpo/margin_mean": 52.06529998779297,
|
|
"margin_dpo/margin_std": 83.41305541992188,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.9026178010471204,
|
|
"fcm_dpo/beta": 0.009530465118587017,
|
|
"fcm_dpo/delta": 0.10892680287361145,
|
|
"fcm_dpo/margin": 46.10047912597656,
|
|
"fcm_dpo/q_t": 0.4076777696609497,
|
|
"grad_norm": 102.02164459228516,
|
|
"learning_rate": 1.4662207078575684e-08,
|
|
"logits/chosen": -0.8528344631195068,
|
|
"logits/rejected": -0.8196998238563538,
|
|
"logps/chosen": -378.56854248046875,
|
|
"logps/ref_chosen": -275.7247314453125,
|
|
"logps/ref_rejected": -268.91729736328125,
|
|
"logps/rejected": -417.8615417480469,
|
|
"loss": 4.5373,
|
|
"margin_dpo/margin_mean": 46.10047912597656,
|
|
"margin_dpo/margin_std": 79.43936157226562,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.9047120418848168,
|
|
"fcm_dpo/beta": 0.009441605769097805,
|
|
"fcm_dpo/delta": 0.008324447087943554,
|
|
"fcm_dpo/margin": 62.6112174987793,
|
|
"fcm_dpo/q_t": 0.3774021565914154,
|
|
"grad_norm": 76.50348663330078,
|
|
"learning_rate": 1.40507706120426e-08,
|
|
"logits/chosen": -0.8662209510803223,
|
|
"logits/rejected": -0.8538703918457031,
|
|
"logps/chosen": -387.158203125,
|
|
"logps/ref_chosen": -291.42010498046875,
|
|
"logps/ref_rejected": -255.48202514648438,
|
|
"logps/rejected": -413.8314208984375,
|
|
"loss": 4.1391,
|
|
"margin_dpo/margin_mean": 62.6112174987793,
|
|
"margin_dpo/margin_std": 86.902587890625,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.9068062827225131,
|
|
"fcm_dpo/beta": 0.009825142100453377,
|
|
"fcm_dpo/delta": 0.0619993582367897,
|
|
"fcm_dpo/margin": 55.066165924072266,
|
|
"fcm_dpo/q_t": 0.3830508589744568,
|
|
"grad_norm": 89.54483795166016,
|
|
"learning_rate": 1.345198738661285e-08,
|
|
"logits/chosen": -0.8318926692008972,
|
|
"logits/rejected": -0.8286322951316833,
|
|
"logps/chosen": -353.9482116699219,
|
|
"logps/ref_chosen": -246.2268829345703,
|
|
"logps/ref_rejected": -253.65924072265625,
|
|
"logps/rejected": -416.4466552734375,
|
|
"loss": 4.2477,
|
|
"margin_dpo/margin_mean": 55.066165924072266,
|
|
"margin_dpo/margin_std": 80.00988006591797,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.9089005235602095,
|
|
"fcm_dpo/beta": 0.01034282986074686,
|
|
"fcm_dpo/delta": 0.03874684125185013,
|
|
"fcm_dpo/margin": 54.308753967285156,
|
|
"fcm_dpo/q_t": 0.3828258812427521,
|
|
"grad_norm": 85.50724029541016,
|
|
"learning_rate": 1.2865889513213628e-08,
|
|
"logits/chosen": -0.8225914239883423,
|
|
"logits/rejected": -0.8342767953872681,
|
|
"logps/chosen": -406.2464294433594,
|
|
"logps/ref_chosen": -295.4618225097656,
|
|
"logps/ref_rejected": -256.2254333496094,
|
|
"logps/rejected": -421.31878662109375,
|
|
"loss": 4.2621,
|
|
"margin_dpo/margin_mean": 54.308753967285156,
|
|
"margin_dpo/margin_std": 81.47319030761719,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.9109947643979057,
|
|
"fcm_dpo/beta": 0.010198265314102173,
|
|
"fcm_dpo/delta": -0.014171771705150604,
|
|
"fcm_dpo/margin": 59.84620666503906,
|
|
"fcm_dpo/q_t": 0.37271490693092346,
|
|
"grad_norm": 118.80712890625,
|
|
"learning_rate": 1.2292508422495157e-08,
|
|
"logits/chosen": -0.8360690474510193,
|
|
"logits/rejected": -0.8230299949645996,
|
|
"logps/chosen": -361.0164489746094,
|
|
"logps/ref_chosen": -260.7384033203125,
|
|
"logps/ref_rejected": -248.5688018798828,
|
|
"logps/rejected": -408.69305419921875,
|
|
"loss": 4.0566,
|
|
"margin_dpo/margin_mean": 59.84620666503906,
|
|
"margin_dpo/margin_std": 77.26177978515625,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.9130890052356021,
|
|
"fcm_dpo/beta": 0.010558899492025375,
|
|
"fcm_dpo/delta": 0.0584358386695385,
|
|
"fcm_dpo/margin": 51.58165740966797,
|
|
"fcm_dpo/q_t": 0.38922375440597534,
|
|
"grad_norm": 111.06973266601562,
|
|
"learning_rate": 1.1731874863145142e-08,
|
|
"logits/chosen": -0.8108433485031128,
|
|
"logits/rejected": -0.8116201162338257,
|
|
"logps/chosen": -426.4559326171875,
|
|
"logps/ref_chosen": -319.3224792480469,
|
|
"logps/ref_rejected": -299.30322265625,
|
|
"logps/rejected": -458.01837158203125,
|
|
"loss": 4.3549,
|
|
"margin_dpo/margin_mean": 51.58165740966797,
|
|
"margin_dpo/margin_std": 84.18111419677734,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.9151832460732985,
|
|
"fcm_dpo/beta": 0.010173209011554718,
|
|
"fcm_dpo/delta": -0.1447606235742569,
|
|
"fcm_dpo/margin": 67.71795654296875,
|
|
"fcm_dpo/q_t": 0.35751214623451233,
|
|
"grad_norm": 94.34660339355469,
|
|
"learning_rate": 1.118401890024001e-08,
|
|
"logits/chosen": -0.844616174697876,
|
|
"logits/rejected": -0.8318252563476562,
|
|
"logps/chosen": -377.5007019042969,
|
|
"logps/ref_chosen": -278.82879638671875,
|
|
"logps/ref_rejected": -272.55303955078125,
|
|
"logps/rejected": -438.94293212890625,
|
|
"loss": 3.9626,
|
|
"margin_dpo/margin_mean": 67.71794891357422,
|
|
"margin_dpo/margin_std": 85.74967956542969,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.9172774869109948,
|
|
"fcm_dpo/beta": 0.009965099394321442,
|
|
"fcm_dpo/delta": 0.061277735978364944,
|
|
"fcm_dpo/margin": 36.24957275390625,
|
|
"fcm_dpo/q_t": 0.42433011531829834,
|
|
"grad_norm": 114.82047271728516,
|
|
"learning_rate": 1.06489699136324e-08,
|
|
"logits/chosen": -0.81844162940979,
|
|
"logits/rejected": -0.842022716999054,
|
|
"logps/chosen": -362.96392822265625,
|
|
"logps/ref_chosen": -259.31903076171875,
|
|
"logps/ref_rejected": -240.99581909179688,
|
|
"logps/rejected": -380.8902282714844,
|
|
"loss": 4.9148,
|
|
"margin_dpo/margin_mean": 36.24957275390625,
|
|
"margin_dpo/margin_std": 83.63807678222656,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.9193717277486911,
|
|
"fcm_dpo/beta": 0.010149678215384483,
|
|
"fcm_dpo/delta": 0.01735379360616207,
|
|
"fcm_dpo/margin": 57.44043731689453,
|
|
"fcm_dpo/q_t": 0.3788164556026459,
|
|
"grad_norm": 111.58253479003906,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": -0.8203510046005249,
|
|
"logits/rejected": -0.8303657174110413,
|
|
"logps/chosen": -361.6324462890625,
|
|
"logps/ref_chosen": -257.1243896484375,
|
|
"logps/ref_rejected": -243.20416259765625,
|
|
"logps/rejected": -405.1526184082031,
|
|
"loss": 4.1973,
|
|
"margin_dpo/margin_mean": 57.44043731689453,
|
|
"margin_dpo/margin_std": 83.50491333007812,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.9214659685863874,
|
|
"fcm_dpo/beta": 0.01079685427248478,
|
|
"fcm_dpo/delta": 0.07901112735271454,
|
|
"fcm_dpo/margin": 43.94521713256836,
|
|
"fcm_dpo/q_t": 0.39794009923934937,
|
|
"grad_norm": 109.37852478027344,
|
|
"learning_rate": 9.617406953185136e-09,
|
|
"logits/chosen": -0.8688513040542603,
|
|
"logits/rejected": -0.8636762499809265,
|
|
"logps/chosen": -421.8122863769531,
|
|
"logps/ref_chosen": -307.5315246582031,
|
|
"logps/ref_rejected": -264.3540954589844,
|
|
"logps/rejected": -422.580078125,
|
|
"loss": 4.5155,
|
|
"margin_dpo/margin_mean": 43.94521713256836,
|
|
"margin_dpo/margin_std": 75.19562530517578,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9235602094240838,
|
|
"fcm_dpo/beta": 0.01067368034273386,
|
|
"fcm_dpo/delta": -0.08818989247083664,
|
|
"fcm_dpo/margin": 63.886962890625,
|
|
"fcm_dpo/q_t": 0.3568004071712494,
|
|
"grad_norm": 96.44715881347656,
|
|
"learning_rate": 9.12094829893642e-09,
|
|
"logits/chosen": -0.820861279964447,
|
|
"logits/rejected": -0.8048292994499207,
|
|
"logps/chosen": -411.4300537109375,
|
|
"logps/ref_chosen": -309.9819641113281,
|
|
"logps/ref_rejected": -297.4968566894531,
|
|
"logps/rejected": -462.8319091796875,
|
|
"loss": 3.9218,
|
|
"margin_dpo/margin_mean": 63.88697052001953,
|
|
"margin_dpo/margin_std": 77.22992706298828,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.9256544502617801,
|
|
"fcm_dpo/beta": 0.010254503227770329,
|
|
"fcm_dpo/delta": 0.07677368074655533,
|
|
"fcm_dpo/margin": 51.52596664428711,
|
|
"fcm_dpo/q_t": 0.3904913365840912,
|
|
"grad_norm": 98.88241577148438,
|
|
"learning_rate": 8.637407257200496e-09,
|
|
"logits/chosen": -0.8967298865318298,
|
|
"logits/rejected": -0.8527672290802002,
|
|
"logps/chosen": -388.2881774902344,
|
|
"logps/ref_chosen": -278.9791564941406,
|
|
"logps/ref_rejected": -242.87310791015625,
|
|
"logps/rejected": -403.7081298828125,
|
|
"loss": 4.4585,
|
|
"margin_dpo/margin_mean": 51.525962829589844,
|
|
"margin_dpo/margin_std": 85.75384521484375,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.9277486910994764,
|
|
"fcm_dpo/beta": 0.010872803628444672,
|
|
"fcm_dpo/delta": -0.026267580687999725,
|
|
"fcm_dpo/margin": 57.026920318603516,
|
|
"fcm_dpo/q_t": 0.3696047067642212,
|
|
"grad_norm": 103.62533569335938,
|
|
"learning_rate": 8.166809758815895e-09,
|
|
"logits/chosen": -0.7956724166870117,
|
|
"logits/rejected": -0.8195681571960449,
|
|
"logps/chosen": -375.178955078125,
|
|
"logps/ref_chosen": -273.5590515136719,
|
|
"logps/ref_rejected": -264.0199279785156,
|
|
"logps/rejected": -422.6667785644531,
|
|
"loss": 4.1475,
|
|
"margin_dpo/margin_mean": 57.026920318603516,
|
|
"margin_dpo/margin_std": 78.09822082519531,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.9298429319371728,
|
|
"fcm_dpo/beta": 0.010260455310344696,
|
|
"fcm_dpo/delta": -0.04022660851478577,
|
|
"fcm_dpo/margin": 61.927947998046875,
|
|
"fcm_dpo/q_t": 0.3720618486404419,
|
|
"grad_norm": 100.3301773071289,
|
|
"learning_rate": 7.709181040498253e-09,
|
|
"logits/chosen": -0.807881772518158,
|
|
"logits/rejected": -0.7976375818252563,
|
|
"logps/chosen": -399.5924377441406,
|
|
"logps/ref_chosen": -298.1441955566406,
|
|
"logps/ref_rejected": -268.0572814941406,
|
|
"logps/rejected": -431.4334716796875,
|
|
"loss": 4.209,
|
|
"margin_dpo/margin_mean": 61.927947998046875,
|
|
"margin_dpo/margin_std": 93.60353088378906,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.9319371727748691,
|
|
"fcm_dpo/beta": 0.009982587769627571,
|
|
"fcm_dpo/delta": -0.0828336626291275,
|
|
"fcm_dpo/margin": 50.54986572265625,
|
|
"fcm_dpo/q_t": 0.39341387152671814,
|
|
"grad_norm": 95.27164459228516,
|
|
"learning_rate": 7.2645456434869965e-09,
|
|
"logits/chosen": -0.8636192679405212,
|
|
"logits/rejected": -0.8787074685096741,
|
|
"logps/chosen": -358.3545837402344,
|
|
"logps/ref_chosen": -254.54067993164062,
|
|
"logps/ref_rejected": -264.2445983886719,
|
|
"logps/rejected": -418.6083679199219,
|
|
"loss": 4.4375,
|
|
"margin_dpo/margin_mean": 50.54986572265625,
|
|
"margin_dpo/margin_std": 77.68645477294922,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.9340314136125655,
|
|
"fcm_dpo/beta": 0.009562542662024498,
|
|
"fcm_dpo/delta": 0.023602399975061417,
|
|
"fcm_dpo/margin": 60.29633331298828,
|
|
"fcm_dpo/q_t": 0.3763912618160248,
|
|
"grad_norm": 82.18879699707031,
|
|
"learning_rate": 6.832927412229017e-09,
|
|
"logits/chosen": -0.8063375949859619,
|
|
"logits/rejected": -0.8075209856033325,
|
|
"logps/chosen": -404.150634765625,
|
|
"logps/ref_chosen": -306.72247314453125,
|
|
"logps/ref_rejected": -266.3735656738281,
|
|
"logps/rejected": -424.0980224609375,
|
|
"loss": 4.179,
|
|
"margin_dpo/margin_mean": 60.29633331298828,
|
|
"margin_dpo/margin_std": 84.00418853759766,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.9361256544502617,
|
|
"fcm_dpo/beta": 0.009372793138027191,
|
|
"fcm_dpo/delta": -0.06785252690315247,
|
|
"fcm_dpo/margin": 65.36695098876953,
|
|
"fcm_dpo/q_t": 0.36677664518356323,
|
|
"grad_norm": 81.27397155761719,
|
|
"learning_rate": 6.414349493100129e-09,
|
|
"logits/chosen": -0.8006303906440735,
|
|
"logits/rejected": -0.8021730184555054,
|
|
"logps/chosen": -357.7692565917969,
|
|
"logps/ref_chosen": -260.51727294921875,
|
|
"logps/ref_rejected": -236.47061157226562,
|
|
"logps/rejected": -399.0894775390625,
|
|
"loss": 3.948,
|
|
"margin_dpo/margin_mean": 65.36695098876953,
|
|
"margin_dpo/margin_std": 76.74752044677734,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.9382198952879581,
|
|
"fcm_dpo/beta": 0.009358673356473446,
|
|
"fcm_dpo/delta": 0.04219186305999756,
|
|
"fcm_dpo/margin": 59.74993133544922,
|
|
"fcm_dpo/q_t": 0.38221871852874756,
|
|
"grad_norm": 101.68222045898438,
|
|
"learning_rate": 6.0088343331638756e-09,
|
|
"logits/chosen": -0.8103606104850769,
|
|
"logits/rejected": -0.8069367408752441,
|
|
"logps/chosen": -372.63238525390625,
|
|
"logps/ref_chosen": -268.78704833984375,
|
|
"logps/ref_rejected": -262.1703796386719,
|
|
"logps/rejected": -425.76568603515625,
|
|
"loss": 4.17,
|
|
"margin_dpo/margin_mean": 59.74993896484375,
|
|
"margin_dpo/margin_std": 81.6711654663086,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.9403141361256544,
|
|
"fcm_dpo/beta": 0.00959862396121025,
|
|
"fcm_dpo/delta": -0.02114713191986084,
|
|
"fcm_dpo/margin": 64.47396850585938,
|
|
"fcm_dpo/q_t": 0.36557552218437195,
|
|
"grad_norm": 131.04855346679688,
|
|
"learning_rate": 5.616403678967624e-09,
|
|
"logits/chosen": -0.893824577331543,
|
|
"logits/rejected": -0.8799617290496826,
|
|
"logps/chosen": -422.57275390625,
|
|
"logps/ref_chosen": -330.9514465332031,
|
|
"logps/ref_rejected": -239.76974487304688,
|
|
"logps/rejected": -395.8650207519531,
|
|
"loss": 4.0123,
|
|
"margin_dpo/margin_mean": 64.47396850585938,
|
|
"margin_dpo/margin_std": 80.27033233642578,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"fcm_dpo/beta": 0.009571806527674198,
|
|
"fcm_dpo/delta": 0.05357804149389267,
|
|
"fcm_dpo/margin": 52.08460235595703,
|
|
"fcm_dpo/q_t": 0.3920726478099823,
|
|
"grad_norm": 97.84994506835938,
|
|
"learning_rate": 5.2370785753763356e-09,
|
|
"logits/chosen": -0.784131646156311,
|
|
"logits/rejected": -0.7929754257202148,
|
|
"logps/chosen": -395.12738037109375,
|
|
"logps/ref_chosen": -284.26544189453125,
|
|
"logps/ref_rejected": -250.5401611328125,
|
|
"logps/rejected": -413.4866943359375,
|
|
"loss": 4.2733,
|
|
"margin_dpo/margin_mean": 52.08460235595703,
|
|
"margin_dpo/margin_std": 72.87914276123047,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9445026178010472,
|
|
"fcm_dpo/beta": 0.009548072703182697,
|
|
"fcm_dpo/delta": -0.01880437321960926,
|
|
"fcm_dpo/margin": 52.78652572631836,
|
|
"fcm_dpo/q_t": 0.3943302035331726,
|
|
"grad_norm": 102.84935760498047,
|
|
"learning_rate": 4.8708793644441086e-09,
|
|
"logits/chosen": -0.8045037984848022,
|
|
"logits/rejected": -0.777286171913147,
|
|
"logps/chosen": -414.0666809082031,
|
|
"logps/ref_chosen": -302.3209228515625,
|
|
"logps/ref_rejected": -254.09747314453125,
|
|
"logps/rejected": -418.62969970703125,
|
|
"loss": 4.419,
|
|
"margin_dpo/margin_mean": 52.78652572631836,
|
|
"margin_dpo/margin_std": 83.30145263671875,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.9465968586387434,
|
|
"fcm_dpo/beta": 0.009476564824581146,
|
|
"fcm_dpo/delta": 0.00583769753575325,
|
|
"fcm_dpo/margin": 57.515316009521484,
|
|
"fcm_dpo/q_t": 0.3849991261959076,
|
|
"grad_norm": 92.07205963134766,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": -0.8670358061790466,
|
|
"logits/rejected": -0.8449291586875916,
|
|
"logps/chosen": -398.6337890625,
|
|
"logps/ref_chosen": -299.39215087890625,
|
|
"logps/ref_rejected": -284.3475036621094,
|
|
"logps/rejected": -441.1044616699219,
|
|
"loss": 4.2481,
|
|
"margin_dpo/margin_mean": 57.51531219482422,
|
|
"margin_dpo/margin_std": 82.63733673095703,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.9486910994764398,
|
|
"fcm_dpo/beta": 0.009641487151384354,
|
|
"fcm_dpo/delta": -0.013517485931515694,
|
|
"fcm_dpo/margin": 63.50697326660156,
|
|
"fcm_dpo/q_t": 0.3711569905281067,
|
|
"grad_norm": 95.07856750488281,
|
|
"learning_rate": 4.1779364682113794e-09,
|
|
"logits/chosen": -0.8013238310813904,
|
|
"logits/rejected": -0.7985789179801941,
|
|
"logps/chosen": -429.9082336425781,
|
|
"logps/ref_chosen": -324.6517028808594,
|
|
"logps/ref_rejected": -304.1527099609375,
|
|
"logps/rejected": -472.91619873046875,
|
|
"loss": 4.0431,
|
|
"margin_dpo/margin_mean": 63.50697326660156,
|
|
"margin_dpo/margin_std": 84.71268463134766,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.9507853403141361,
|
|
"fcm_dpo/beta": 0.009615411050617695,
|
|
"fcm_dpo/delta": -0.0042562056332826614,
|
|
"fcm_dpo/margin": 62.60576248168945,
|
|
"fcm_dpo/q_t": 0.3715764582157135,
|
|
"grad_norm": 76.87505340576172,
|
|
"learning_rate": 3.851229943335393e-09,
|
|
"logits/chosen": -0.8534815907478333,
|
|
"logits/rejected": -0.8655160665512085,
|
|
"logps/chosen": -401.67681884765625,
|
|
"logps/ref_chosen": -299.6117248535156,
|
|
"logps/ref_rejected": -303.74224853515625,
|
|
"logps/rejected": -468.4130554199219,
|
|
"loss": 4.1252,
|
|
"margin_dpo/margin_mean": 62.60576248168945,
|
|
"margin_dpo/margin_std": 85.04026794433594,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.9528795811518325,
|
|
"fcm_dpo/beta": 0.010230256244540215,
|
|
"fcm_dpo/delta": 0.13277457654476166,
|
|
"fcm_dpo/margin": 46.14201736450195,
|
|
"fcm_dpo/q_t": 0.402716726064682,
|
|
"grad_norm": 95.32615661621094,
|
|
"learning_rate": 3.5377236299748147e-09,
|
|
"logits/chosen": -0.807562530040741,
|
|
"logits/rejected": -0.8190088272094727,
|
|
"logps/chosen": -374.5747985839844,
|
|
"logps/ref_chosen": -273.6116943359375,
|
|
"logps/ref_rejected": -274.4293518066406,
|
|
"logps/rejected": -421.5345458984375,
|
|
"loss": 4.5798,
|
|
"margin_dpo/margin_mean": 46.14202117919922,
|
|
"margin_dpo/margin_std": 85.73726654052734,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.9549738219895288,
|
|
"fcm_dpo/beta": 0.010185835883021355,
|
|
"fcm_dpo/delta": -0.10059641301631927,
|
|
"fcm_dpo/margin": 63.664310455322266,
|
|
"fcm_dpo/q_t": 0.3761172890663147,
|
|
"grad_norm": 98.11104583740234,
|
|
"learning_rate": 3.2374343405217884e-09,
|
|
"logits/chosen": -0.7371918559074402,
|
|
"logits/rejected": -0.7502031326293945,
|
|
"logps/chosen": -438.56854248046875,
|
|
"logps/ref_chosen": -322.17193603515625,
|
|
"logps/ref_rejected": -294.54461669921875,
|
|
"logps/rejected": -474.6055603027344,
|
|
"loss": 4.3393,
|
|
"margin_dpo/margin_mean": 63.664310455322266,
|
|
"margin_dpo/margin_std": 105.6181411743164,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.9570680628272251,
|
|
"fcm_dpo/beta": 0.009687078185379505,
|
|
"fcm_dpo/delta": -0.013452993705868721,
|
|
"fcm_dpo/margin": 63.15357971191406,
|
|
"fcm_dpo/q_t": 0.3667003810405731,
|
|
"grad_norm": 85.27921295166016,
|
|
"learning_rate": 2.9503781785795713e-09,
|
|
"logits/chosen": -0.7975083589553833,
|
|
"logits/rejected": -0.8043266534805298,
|
|
"logps/chosen": -416.79241943359375,
|
|
"logps/ref_chosen": -307.7962341308594,
|
|
"logps/ref_rejected": -274.5501403808594,
|
|
"logps/rejected": -446.69989013671875,
|
|
"loss": 4.1568,
|
|
"margin_dpo/margin_mean": 63.15358352661133,
|
|
"margin_dpo/margin_std": 89.4337158203125,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.9591623036649215,
|
|
"fcm_dpo/beta": 0.010026252828538418,
|
|
"fcm_dpo/delta": 0.03702447563409805,
|
|
"fcm_dpo/margin": 56.15598678588867,
|
|
"fcm_dpo/q_t": 0.3839564621448517,
|
|
"grad_norm": 83.73405456542969,
|
|
"learning_rate": 2.6765705380989432e-09,
|
|
"logits/chosen": -0.822134256362915,
|
|
"logits/rejected": -0.8091610670089722,
|
|
"logps/chosen": -403.46807861328125,
|
|
"logps/ref_chosen": -297.0316467285156,
|
|
"logps/ref_rejected": -276.1112365722656,
|
|
"logps/rejected": -438.7036437988281,
|
|
"loss": 4.318,
|
|
"margin_dpo/margin_mean": 56.15598678588867,
|
|
"margin_dpo/margin_std": 87.26219940185547,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.9612565445026178,
|
|
"fcm_dpo/beta": 0.010189807042479515,
|
|
"fcm_dpo/delta": 0.022624600678682327,
|
|
"fcm_dpo/margin": 51.38506317138672,
|
|
"fcm_dpo/q_t": 0.39040350914001465,
|
|
"grad_norm": 119.48713684082031,
|
|
"learning_rate": 2.416026102552732e-09,
|
|
"logits/chosen": -0.8736098408699036,
|
|
"logits/rejected": -0.8673666715621948,
|
|
"logps/chosen": -394.96923828125,
|
|
"logps/ref_chosen": -293.5252990722656,
|
|
"logps/ref_rejected": -289.30126953125,
|
|
"logps/rejected": -442.1302490234375,
|
|
"loss": 4.3828,
|
|
"margin_dpo/margin_mean": 51.385066986083984,
|
|
"margin_dpo/margin_std": 80.866455078125,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.9633507853403142,
|
|
"fcm_dpo/beta": 0.010447122156620026,
|
|
"fcm_dpo/delta": 0.01566571742296219,
|
|
"fcm_dpo/margin": 50.86057662963867,
|
|
"fcm_dpo/q_t": 0.38575083017349243,
|
|
"grad_norm": 106.79894256591797,
|
|
"learning_rate": 2.168758844148272e-09,
|
|
"logits/chosen": -0.8482452034950256,
|
|
"logits/rejected": -0.8550105094909668,
|
|
"logps/chosen": -422.16461181640625,
|
|
"logps/ref_chosen": -318.7803649902344,
|
|
"logps/ref_rejected": -258.7906799316406,
|
|
"logps/rejected": -413.0354919433594,
|
|
"loss": 4.335,
|
|
"margin_dpo/margin_mean": 50.86057662963867,
|
|
"margin_dpo/margin_std": 78.11962890625,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9654450261780104,
|
|
"fcm_dpo/beta": 0.010231072083115578,
|
|
"fcm_dpo/delta": -0.023067938163876534,
|
|
"fcm_dpo/margin": 56.03329849243164,
|
|
"fcm_dpo/q_t": 0.3828889727592468,
|
|
"grad_norm": 106.28280639648438,
|
|
"learning_rate": 1.9347820230782295e-09,
|
|
"logits/chosen": -0.8239161372184753,
|
|
"logits/rejected": -0.8513062596321106,
|
|
"logps/chosen": -346.3656311035156,
|
|
"logps/ref_chosen": -243.9099884033203,
|
|
"logps/ref_rejected": -232.6382293701172,
|
|
"logps/rejected": -391.1271667480469,
|
|
"loss": 4.351,
|
|
"margin_dpo/margin_mean": 56.03329849243164,
|
|
"margin_dpo/margin_std": 89.30838775634766,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.9675392670157068,
|
|
"fcm_dpo/beta": 0.00970435980707407,
|
|
"fcm_dpo/delta": -0.0686081126332283,
|
|
"fcm_dpo/margin": 68.12385559082031,
|
|
"fcm_dpo/q_t": 0.3644864857196808,
|
|
"grad_norm": 94.2146987915039,
|
|
"learning_rate": 1.7141081868094209e-09,
|
|
"logits/chosen": -0.8403683304786682,
|
|
"logits/rejected": -0.7992677092552185,
|
|
"logps/chosen": -448.24212646484375,
|
|
"logps/ref_chosen": -344.09100341796875,
|
|
"logps/ref_rejected": -252.45037841796875,
|
|
"logps/rejected": -424.72540283203125,
|
|
"loss": 4.0783,
|
|
"margin_dpo/margin_mean": 68.12385559082031,
|
|
"margin_dpo/margin_std": 93.93057250976562,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.9696335078534032,
|
|
"fcm_dpo/beta": 0.010029610246419907,
|
|
"fcm_dpo/delta": 0.08034525066614151,
|
|
"fcm_dpo/margin": 51.993682861328125,
|
|
"fcm_dpo/q_t": 0.39002859592437744,
|
|
"grad_norm": 99.33654022216797,
|
|
"learning_rate": 1.5067491694100153e-09,
|
|
"logits/chosen": -0.8565876483917236,
|
|
"logits/rejected": -0.8210662603378296,
|
|
"logps/chosen": -397.64654541015625,
|
|
"logps/ref_chosen": -297.1424560546875,
|
|
"logps/ref_rejected": -234.0208282470703,
|
|
"logps/rejected": -386.5185852050781,
|
|
"loss": 4.4017,
|
|
"margin_dpo/margin_mean": 51.99367904663086,
|
|
"margin_dpo/margin_std": 84.37198638916016,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.9717277486910995,
|
|
"fcm_dpo/beta": 0.01041481550782919,
|
|
"fcm_dpo/delta": 0.04956157132983208,
|
|
"fcm_dpo/margin": 52.998783111572266,
|
|
"fcm_dpo/q_t": 0.3862907886505127,
|
|
"grad_norm": 133.43360900878906,
|
|
"learning_rate": 1.3127160909147672e-09,
|
|
"logits/chosen": -0.8275717496871948,
|
|
"logits/rejected": -0.8531575202941895,
|
|
"logps/chosen": -378.3173522949219,
|
|
"logps/ref_chosen": -265.71075439453125,
|
|
"logps/ref_rejected": -256.4108581542969,
|
|
"logps/rejected": -422.01617431640625,
|
|
"loss": 4.3843,
|
|
"margin_dpo/margin_mean": 52.998779296875,
|
|
"margin_dpo/margin_std": 86.16059112548828,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.9738219895287958,
|
|
"fcm_dpo/beta": 0.009905759245157242,
|
|
"fcm_dpo/delta": -0.13921670615673065,
|
|
"fcm_dpo/margin": 65.77295684814453,
|
|
"fcm_dpo/q_t": 0.36359280347824097,
|
|
"grad_norm": 68.23556518554688,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": -0.8828303217887878,
|
|
"logits/rejected": -0.8572342395782471,
|
|
"logps/chosen": -391.30364990234375,
|
|
"logps/ref_chosen": -293.1527404785156,
|
|
"logps/ref_rejected": -293.70947265625,
|
|
"logps/rejected": -457.63336181640625,
|
|
"loss": 4.0356,
|
|
"margin_dpo/margin_mean": 65.77295684814453,
|
|
"margin_dpo/margin_std": 82.50149536132812,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.9759162303664921,
|
|
"fcm_dpo/beta": 0.009194673970341682,
|
|
"fcm_dpo/delta": -0.052108634263277054,
|
|
"fcm_dpo/margin": 70.25971221923828,
|
|
"fcm_dpo/q_t": 0.36073338985443115,
|
|
"grad_norm": 77.50592803955078,
|
|
"learning_rate": 9.64668657069706e-10,
|
|
"logits/chosen": -0.8009305000305176,
|
|
"logits/rejected": -0.7544541954994202,
|
|
"logps/chosen": -353.8938293457031,
|
|
"logps/ref_chosen": -261.4775695800781,
|
|
"logps/ref_rejected": -248.36282348632812,
|
|
"logps/rejected": -411.038818359375,
|
|
"loss": 3.8645,
|
|
"margin_dpo/margin_mean": 70.25971221923828,
|
|
"margin_dpo/margin_std": 77.51724243164062,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.9780104712041885,
|
|
"fcm_dpo/beta": 0.009051669389009476,
|
|
"fcm_dpo/delta": 0.01738828979432583,
|
|
"fcm_dpo/margin": 50.43449401855469,
|
|
"fcm_dpo/q_t": 0.40313076972961426,
|
|
"grad_norm": 100.67909240722656,
|
|
"learning_rate": 8.106729664475176e-10,
|
|
"logits/chosen": -0.7964373230934143,
|
|
"logits/rejected": -0.7927530407905579,
|
|
"logps/chosen": -372.4566955566406,
|
|
"logps/ref_chosen": -266.354248046875,
|
|
"logps/ref_rejected": -277.76324462890625,
|
|
"logps/rejected": -434.3001708984375,
|
|
"loss": 4.5803,
|
|
"margin_dpo/margin_mean": 50.43449401855469,
|
|
"margin_dpo/margin_std": 88.83377075195312,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.9801047120418848,
|
|
"fcm_dpo/beta": 0.009555336087942123,
|
|
"fcm_dpo/delta": 0.04439329728484154,
|
|
"fcm_dpo/margin": 52.71699523925781,
|
|
"fcm_dpo/q_t": 0.39236387610435486,
|
|
"grad_norm": 95.95642852783203,
|
|
"learning_rate": 6.700405431837585e-10,
|
|
"logits/chosen": -0.8729247450828552,
|
|
"logits/rejected": -0.847733736038208,
|
|
"logps/chosen": -419.738525390625,
|
|
"logps/ref_chosen": -317.9631652832031,
|
|
"logps/ref_rejected": -261.8744201660156,
|
|
"logps/rejected": -416.3667297363281,
|
|
"loss": 4.3829,
|
|
"margin_dpo/margin_mean": 52.71699523925781,
|
|
"margin_dpo/margin_std": 82.52155303955078,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.9821989528795811,
|
|
"fcm_dpo/beta": 0.009460176341235638,
|
|
"fcm_dpo/delta": -0.016594115644693375,
|
|
"fcm_dpo/margin": 64.84768676757812,
|
|
"fcm_dpo/q_t": 0.37026524543762207,
|
|
"grad_norm": 80.08511352539062,
|
|
"learning_rate": 5.427789289685347e-10,
|
|
"logits/chosen": -0.813917875289917,
|
|
"logits/rejected": -0.8034530282020569,
|
|
"logps/chosen": -421.9589538574219,
|
|
"logps/ref_chosen": -324.8868103027344,
|
|
"logps/ref_rejected": -264.0421447753906,
|
|
"logps/rejected": -425.9620056152344,
|
|
"loss": 4.1315,
|
|
"margin_dpo/margin_mean": 64.84768676757812,
|
|
"margin_dpo/margin_std": 89.04574584960938,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.9842931937172775,
|
|
"fcm_dpo/beta": 0.009737596847116947,
|
|
"fcm_dpo/delta": -0.01096111536026001,
|
|
"fcm_dpo/margin": 62.489173889160156,
|
|
"fcm_dpo/q_t": 0.3725891411304474,
|
|
"grad_norm": 75.43241119384766,
|
|
"learning_rate": 4.288949484559934e-10,
|
|
"logits/chosen": -0.8106139898300171,
|
|
"logits/rejected": -0.8112368583679199,
|
|
"logps/chosen": -408.78070068359375,
|
|
"logps/ref_chosen": -314.7042541503906,
|
|
"logps/ref_rejected": -259.2276611328125,
|
|
"logps/rejected": -415.7933044433594,
|
|
"loss": 4.1024,
|
|
"margin_dpo/margin_mean": 62.489173889160156,
|
|
"margin_dpo/margin_std": 83.64241027832031,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9863874345549738,
|
|
"fcm_dpo/beta": 0.009809708222746849,
|
|
"fcm_dpo/delta": 0.014611058868467808,
|
|
"fcm_dpo/margin": 54.49514389038086,
|
|
"fcm_dpo/q_t": 0.3918081820011139,
|
|
"grad_norm": 100.71548461914062,
|
|
"learning_rate": 3.2839470889836627e-10,
|
|
"logits/chosen": -0.8629408478736877,
|
|
"logits/rejected": -0.8535081148147583,
|
|
"logps/chosen": -400.7052917480469,
|
|
"logps/ref_chosen": -292.5748291015625,
|
|
"logps/ref_rejected": -298.7525329589844,
|
|
"logps/rejected": -461.37811279296875,
|
|
"loss": 4.3804,
|
|
"margin_dpo/margin_mean": 54.49514389038086,
|
|
"margin_dpo/margin_std": 88.37163543701172,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.9884816753926702,
|
|
"fcm_dpo/beta": 0.009502904489636421,
|
|
"fcm_dpo/delta": -0.0033622095361351967,
|
|
"fcm_dpo/margin": 63.385826110839844,
|
|
"fcm_dpo/q_t": 0.3735297918319702,
|
|
"grad_norm": 83.12667083740234,
|
|
"learning_rate": 2.412835998185092e-10,
|
|
"logits/chosen": -0.8532136082649231,
|
|
"logits/rejected": -0.8691096901893616,
|
|
"logps/chosen": -336.4410400390625,
|
|
"logps/ref_chosen": -243.37380981445312,
|
|
"logps/ref_rejected": -251.12109375,
|
|
"logps/rejected": -407.5741271972656,
|
|
"loss": 4.0089,
|
|
"margin_dpo/margin_mean": 63.385826110839844,
|
|
"margin_dpo/margin_std": 79.39556884765625,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.9905759162303664,
|
|
"fcm_dpo/beta": 0.00958459172397852,
|
|
"fcm_dpo/delta": -0.033373601734638214,
|
|
"fcm_dpo/margin": 65.87982177734375,
|
|
"fcm_dpo/q_t": 0.36491870880126953,
|
|
"grad_norm": 100.53154754638672,
|
|
"learning_rate": 1.6756629272085544e-10,
|
|
"logits/chosen": -0.8084653615951538,
|
|
"logits/rejected": -0.8148469924926758,
|
|
"logps/chosen": -385.27142333984375,
|
|
"logps/ref_chosen": -286.3286437988281,
|
|
"logps/ref_rejected": -258.6535339355469,
|
|
"logps/rejected": -423.4761962890625,
|
|
"loss": 3.9915,
|
|
"margin_dpo/margin_mean": 65.87982177734375,
|
|
"margin_dpo/margin_std": 82.26141357421875,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.9926701570680628,
|
|
"fcm_dpo/beta": 0.009304332546889782,
|
|
"fcm_dpo/delta": 0.059272147715091705,
|
|
"fcm_dpo/margin": 51.1921272277832,
|
|
"fcm_dpo/q_t": 0.3959723114967346,
|
|
"grad_norm": 109.58087921142578,
|
|
"learning_rate": 1.072467408408384e-10,
|
|
"logits/chosen": -0.839458167552948,
|
|
"logits/rejected": -0.8423305749893188,
|
|
"logps/chosen": -393.1019287109375,
|
|
"logps/ref_chosen": -288.08966064453125,
|
|
"logps/ref_rejected": -266.69696044921875,
|
|
"logps/rejected": -422.9013977050781,
|
|
"loss": 4.3602,
|
|
"margin_dpo/margin_mean": 51.1921272277832,
|
|
"margin_dpo/margin_std": 72.16545104980469,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.9947643979057592,
|
|
"fcm_dpo/beta": 0.009782197885215282,
|
|
"fcm_dpo/delta": 0.013262166641652584,
|
|
"fcm_dpo/margin": 53.376014709472656,
|
|
"fcm_dpo/q_t": 0.3906119465827942,
|
|
"grad_norm": 89.71319580078125,
|
|
"learning_rate": 6.032817893297793e-11,
|
|
"logits/chosen": -0.812603771686554,
|
|
"logits/rejected": -0.8350270390510559,
|
|
"logps/chosen": -350.7452087402344,
|
|
"logps/ref_chosen": -256.0030517578125,
|
|
"logps/ref_rejected": -244.50660705566406,
|
|
"logps/rejected": -392.624755859375,
|
|
"loss": 4.3125,
|
|
"margin_dpo/margin_mean": 53.376007080078125,
|
|
"margin_dpo/margin_std": 78.27056884765625,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.9968586387434555,
|
|
"fcm_dpo/beta": 0.01014248188585043,
|
|
"fcm_dpo/delta": 0.01945674978196621,
|
|
"fcm_dpo/margin": 56.881683349609375,
|
|
"fcm_dpo/q_t": 0.3829057216644287,
|
|
"grad_norm": 124.16419982910156,
|
|
"learning_rate": 2.6813123097352287e-11,
|
|
"logits/chosen": -0.8888995051383972,
|
|
"logits/rejected": -0.8492950797080994,
|
|
"logps/chosen": -414.1214904785156,
|
|
"logps/ref_chosen": -321.467529296875,
|
|
"logps/ref_rejected": -295.0592956542969,
|
|
"logps/rejected": -444.594970703125,
|
|
"loss": 4.316,
|
|
"margin_dpo/margin_mean": 56.881683349609375,
|
|
"margin_dpo/margin_std": 87.3014907836914,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"fcm_dpo/beta": 0.010023507289588451,
|
|
"fcm_dpo/delta": -0.04792780801653862,
|
|
"fcm_dpo/margin": 59.55145263671875,
|
|
"fcm_dpo/q_t": 0.38020825386047363,
|
|
"grad_norm": 126.22605895996094,
|
|
"learning_rate": 6.7033706447061635e-12,
|
|
"logits/chosen": -0.7799222469329834,
|
|
"logits/rejected": -0.792705774307251,
|
|
"logps/chosen": -385.03021240234375,
|
|
"logps/ref_chosen": -276.7939758300781,
|
|
"logps/ref_rejected": -244.82919311523438,
|
|
"logps/rejected": -412.61688232421875,
|
|
"loss": 4.3806,
|
|
"margin_dpo/margin_mean": 59.55145263671875,
|
|
"margin_dpo/margin_std": 96.72840881347656,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"step": 477,
|
|
"total_flos": 0.0,
|
|
"train_loss": 4.542374380479568,
|
|
"train_runtime": 6039.2377,
|
|
"train_samples_per_second": 10.123,
|
|
"train_steps_per_second": 0.079
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 477,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|