Model: W-61/qwen3-8b-base-beta-dpo-ultrafeedback-4xh200-batch-128-20260423-040315 Source: Original Platform
6752 lines
233 KiB
JSON
6752 lines
233 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9989528795811519,
|
|
"eval_steps": 200,
|
|
"global_step": 477,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"beta_dpo/beta_used": 0.010316052474081516,
|
|
"beta_dpo/beta_used_raw": 0.010316052474081516,
|
|
"beta_dpo/gap_mean": -0.0030604612547904253,
|
|
"beta_dpo/gap_std": 0.273499995470047,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.0020942408376963353,
|
|
"grad_norm": 15.496143341064453,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 2.203179359436035,
|
|
"logits/rejected": 2.035616397857666,
|
|
"loss": 5.5428,
|
|
"step": 1
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009904756210744381,
|
|
"beta_dpo/beta_used_raw": 0.009904756210744381,
|
|
"beta_dpo/gap_mean": 0.0473581925034523,
|
|
"beta_dpo/gap_std": 0.6410814523696899,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.004188481675392671,
|
|
"grad_norm": 15.881836891174316,
|
|
"learning_rate": 1.0416666666666666e-08,
|
|
"logits/chosen": 2.1704792976379395,
|
|
"logits/rejected": 2.0754430294036865,
|
|
"loss": 5.5442,
|
|
"step": 2
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010276634246110916,
|
|
"beta_dpo/beta_used_raw": 0.010276634246110916,
|
|
"beta_dpo/gap_mean": 0.040970198810100555,
|
|
"beta_dpo/gap_std": 0.7673041224479675,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.0062827225130890054,
|
|
"grad_norm": 16.63137435913086,
|
|
"learning_rate": 2.083333333333333e-08,
|
|
"logits/chosen": 2.4686079025268555,
|
|
"logits/rejected": 2.464277505874634,
|
|
"loss": 5.5428,
|
|
"step": 3
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01017595175653696,
|
|
"beta_dpo/beta_used_raw": 0.01017595175653696,
|
|
"beta_dpo/gap_mean": 0.06479164212942123,
|
|
"beta_dpo/gap_std": 0.8090450763702393,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.008376963350785341,
|
|
"grad_norm": 19.53766632080078,
|
|
"learning_rate": 3.125e-08,
|
|
"logits/chosen": 1.7211281061172485,
|
|
"logits/rejected": 1.5812376737594604,
|
|
"loss": 5.5403,
|
|
"step": 4
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009877461940050125,
|
|
"beta_dpo/beta_used_raw": 0.009877461940050125,
|
|
"beta_dpo/gap_mean": 0.03874587640166283,
|
|
"beta_dpo/gap_std": 0.8403902649879456,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.010471204188481676,
|
|
"grad_norm": 17.47425651550293,
|
|
"learning_rate": 4.166666666666666e-08,
|
|
"logits/chosen": 1.8391205072402954,
|
|
"logits/rejected": 1.8945659399032593,
|
|
"loss": 5.5435,
|
|
"step": 5
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009602357633411884,
|
|
"beta_dpo/beta_used_raw": 0.009602357633411884,
|
|
"beta_dpo/gap_mean": 0.013125958852469921,
|
|
"beta_dpo/gap_std": 0.8970670700073242,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.012565445026178011,
|
|
"grad_norm": 17.965578079223633,
|
|
"learning_rate": 5.208333333333333e-08,
|
|
"logits/chosen": 1.8753392696380615,
|
|
"logits/rejected": 1.806428074836731,
|
|
"loss": 5.546,
|
|
"step": 6
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010046536102890968,
|
|
"beta_dpo/beta_used_raw": 0.010046536102890968,
|
|
"beta_dpo/gap_mean": 0.00752235297113657,
|
|
"beta_dpo/gap_std": 0.9090036153793335,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.014659685863874346,
|
|
"grad_norm": 18.481788635253906,
|
|
"learning_rate": 6.25e-08,
|
|
"logits/chosen": 2.1977810859680176,
|
|
"logits/rejected": 2.027773141860962,
|
|
"loss": 5.543,
|
|
"step": 7
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009285343810915947,
|
|
"beta_dpo/beta_used_raw": 0.009285343810915947,
|
|
"beta_dpo/gap_mean": -0.0737709105014801,
|
|
"beta_dpo/gap_std": 0.9767862558364868,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.016753926701570682,
|
|
"grad_norm": 17.283451080322266,
|
|
"learning_rate": 7.291666666666667e-08,
|
|
"logits/chosen": 2.3551371097564697,
|
|
"logits/rejected": 2.089672088623047,
|
|
"loss": 5.5522,
|
|
"step": 8
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010606064461171627,
|
|
"beta_dpo/beta_used_raw": 0.010606064461171627,
|
|
"beta_dpo/gap_mean": -0.04680243134498596,
|
|
"beta_dpo/gap_std": 0.9687216281890869,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.018848167539267015,
|
|
"grad_norm": 16.163658142089844,
|
|
"learning_rate": 8.333333333333333e-08,
|
|
"logits/chosen": 2.1110918521881104,
|
|
"logits/rejected": 2.0067708492279053,
|
|
"loss": 5.5433,
|
|
"step": 9
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00987918209284544,
|
|
"beta_dpo/beta_used_raw": 0.00987918209284544,
|
|
"beta_dpo/gap_mean": -0.03316927328705788,
|
|
"beta_dpo/gap_std": 0.8964071273803711,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.020942408376963352,
|
|
"grad_norm": 15.014591217041016,
|
|
"learning_rate": 9.375e-08,
|
|
"logits/chosen": 1.858559012413025,
|
|
"logits/rejected": 2.0337729454040527,
|
|
"loss": 5.5481,
|
|
"step": 10
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010337094776332378,
|
|
"beta_dpo/beta_used_raw": 0.010337094776332378,
|
|
"beta_dpo/gap_mean": 0.03589403256773949,
|
|
"beta_dpo/gap_std": 0.8406289219856262,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.023036649214659685,
|
|
"grad_norm": 18.00157356262207,
|
|
"learning_rate": 1.0416666666666667e-07,
|
|
"logits/chosen": 1.893631100654602,
|
|
"logits/rejected": 1.8213893175125122,
|
|
"loss": 5.5413,
|
|
"step": 11
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009809032082557678,
|
|
"beta_dpo/beta_used_raw": 0.009809032082557678,
|
|
"beta_dpo/gap_mean": 0.031110307201743126,
|
|
"beta_dpo/gap_std": 0.8743820190429688,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.025130890052356022,
|
|
"grad_norm": 16.61766815185547,
|
|
"learning_rate": 1.1458333333333332e-07,
|
|
"logits/chosen": 1.5167274475097656,
|
|
"logits/rejected": 1.6536264419555664,
|
|
"loss": 5.5438,
|
|
"step": 12
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009467006660997868,
|
|
"beta_dpo/beta_used_raw": 0.009467006660997868,
|
|
"beta_dpo/gap_mean": -9.547406807541847e-05,
|
|
"beta_dpo/gap_std": 0.9159330725669861,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.027225130890052355,
|
|
"grad_norm": 18.662208557128906,
|
|
"learning_rate": 1.25e-07,
|
|
"logits/chosen": 1.8461039066314697,
|
|
"logits/rejected": 1.8939508199691772,
|
|
"loss": 5.5481,
|
|
"step": 13
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009789557196199894,
|
|
"beta_dpo/beta_used_raw": 0.009789557196199894,
|
|
"beta_dpo/gap_mean": -0.035510119050741196,
|
|
"beta_dpo/gap_std": 0.8479209542274475,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.02931937172774869,
|
|
"grad_norm": 15.506324768066406,
|
|
"learning_rate": 1.3541666666666666e-07,
|
|
"logits/chosen": 1.8386187553405762,
|
|
"logits/rejected": 1.5979816913604736,
|
|
"loss": 5.5477,
|
|
"step": 14
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010104680433869362,
|
|
"beta_dpo/beta_used_raw": 0.010104680433869362,
|
|
"beta_dpo/gap_mean": -0.05601261928677559,
|
|
"beta_dpo/gap_std": 0.8992904424667358,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.031413612565445025,
|
|
"grad_norm": 17.449304580688477,
|
|
"learning_rate": 1.4583333333333335e-07,
|
|
"logits/chosen": 1.9075326919555664,
|
|
"logits/rejected": 1.7650988101959229,
|
|
"loss": 5.5445,
|
|
"step": 15
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010083270259201527,
|
|
"beta_dpo/beta_used_raw": 0.010083270259201527,
|
|
"beta_dpo/gap_mean": -0.037581950426101685,
|
|
"beta_dpo/gap_std": 0.9426290988922119,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.033507853403141365,
|
|
"grad_norm": 18.769243240356445,
|
|
"learning_rate": 1.5624999999999999e-07,
|
|
"logits/chosen": 2.0930874347686768,
|
|
"logits/rejected": 1.8253268003463745,
|
|
"loss": 5.5458,
|
|
"step": 16
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009928649291396141,
|
|
"beta_dpo/beta_used_raw": 0.009928649291396141,
|
|
"beta_dpo/gap_mean": -0.03386215493083,
|
|
"beta_dpo/gap_std": 0.9212523102760315,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.0356020942408377,
|
|
"grad_norm": 20.794923782348633,
|
|
"learning_rate": 1.6666666666666665e-07,
|
|
"logits/chosen": 1.769667387008667,
|
|
"logits/rejected": 1.7814725637435913,
|
|
"loss": 5.5484,
|
|
"step": 17
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01007060892879963,
|
|
"beta_dpo/beta_used_raw": 0.01007060892879963,
|
|
"beta_dpo/gap_mean": -0.01796822063624859,
|
|
"beta_dpo/gap_std": 0.8694018721580505,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.03769633507853403,
|
|
"grad_norm": 16.827281951904297,
|
|
"learning_rate": 1.7708333333333334e-07,
|
|
"logits/chosen": 1.7808014154434204,
|
|
"logits/rejected": 1.7646872997283936,
|
|
"loss": 5.5437,
|
|
"step": 18
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009850156493484974,
|
|
"beta_dpo/beta_used_raw": 0.009850156493484974,
|
|
"beta_dpo/gap_mean": -0.04470803216099739,
|
|
"beta_dpo/gap_std": 0.8516724705696106,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.039790575916230364,
|
|
"grad_norm": 16.883514404296875,
|
|
"learning_rate": 1.875e-07,
|
|
"logits/chosen": 2.054273843765259,
|
|
"logits/rejected": 2.0647222995758057,
|
|
"loss": 5.5483,
|
|
"step": 19
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009869220666587353,
|
|
"beta_dpo/beta_used_raw": 0.009869220666587353,
|
|
"beta_dpo/gap_mean": -0.02124340645968914,
|
|
"beta_dpo/gap_std": 0.8342310190200806,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.041884816753926704,
|
|
"grad_norm": 17.35634994506836,
|
|
"learning_rate": 1.9791666666666664e-07,
|
|
"logits/chosen": 2.368907928466797,
|
|
"logits/rejected": 2.167264223098755,
|
|
"loss": 5.5473,
|
|
"step": 20
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009426544420421124,
|
|
"beta_dpo/beta_used_raw": 0.009426544420421124,
|
|
"beta_dpo/gap_mean": -0.017612561583518982,
|
|
"beta_dpo/gap_std": 0.8350470066070557,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.04397905759162304,
|
|
"grad_norm": 15.612009048461914,
|
|
"learning_rate": 2.0833333333333333e-07,
|
|
"logits/chosen": 2.1447153091430664,
|
|
"logits/rejected": 2.121504545211792,
|
|
"loss": 5.5489,
|
|
"step": 21
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01062285527586937,
|
|
"beta_dpo/beta_used_raw": 0.01062285527586937,
|
|
"beta_dpo/gap_mean": 0.06357374787330627,
|
|
"beta_dpo/gap_std": 0.8492311835289001,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.04607329842931937,
|
|
"grad_norm": 17.105073928833008,
|
|
"learning_rate": 2.1875e-07,
|
|
"logits/chosen": 1.6775203943252563,
|
|
"logits/rejected": 1.841507911682129,
|
|
"loss": 5.5386,
|
|
"step": 22
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009609552100300789,
|
|
"beta_dpo/beta_used_raw": 0.009609552100300789,
|
|
"beta_dpo/gap_mean": 0.09488284587860107,
|
|
"beta_dpo/gap_std": 0.7845069169998169,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.048167539267015703,
|
|
"grad_norm": 17.074167251586914,
|
|
"learning_rate": 2.2916666666666663e-07,
|
|
"logits/chosen": 2.0019335746765137,
|
|
"logits/rejected": 1.876702070236206,
|
|
"loss": 5.5427,
|
|
"step": 23
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009548784233629704,
|
|
"beta_dpo/beta_used_raw": 0.009548784233629704,
|
|
"beta_dpo/gap_mean": 0.01768093928694725,
|
|
"beta_dpo/gap_std": 0.821352481842041,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.050261780104712044,
|
|
"grad_norm": 16.67466163635254,
|
|
"learning_rate": 2.3958333333333335e-07,
|
|
"logits/chosen": 2.0418663024902344,
|
|
"logits/rejected": 1.9522861242294312,
|
|
"loss": 5.5466,
|
|
"step": 24
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010621692053973675,
|
|
"beta_dpo/beta_used_raw": 0.010621692053973675,
|
|
"beta_dpo/gap_mean": 0.02274535596370697,
|
|
"beta_dpo/gap_std": 0.7953328490257263,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.05235602094240838,
|
|
"grad_norm": 18.33420753479004,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 1.807928204536438,
|
|
"logits/rejected": 1.8295968770980835,
|
|
"loss": 5.5401,
|
|
"step": 25
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009963510558009148,
|
|
"beta_dpo/beta_used_raw": 0.009963510558009148,
|
|
"beta_dpo/gap_mean": 0.053856804966926575,
|
|
"beta_dpo/gap_std": 0.7753854990005493,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.05445026178010471,
|
|
"grad_norm": 17.823503494262695,
|
|
"learning_rate": 2.604166666666667e-07,
|
|
"logits/chosen": 1.6102561950683594,
|
|
"logits/rejected": 1.5492463111877441,
|
|
"loss": 5.5438,
|
|
"step": 26
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009892760775983334,
|
|
"beta_dpo/beta_used_raw": 0.009892760775983334,
|
|
"beta_dpo/gap_mean": 0.035262782126665115,
|
|
"beta_dpo/gap_std": 0.7987048625946045,
|
|
"beta_dpo/mask_keep_frac": 0.90625,
|
|
"epoch": 0.05654450261780105,
|
|
"grad_norm": 17.028757095336914,
|
|
"learning_rate": 2.708333333333333e-07,
|
|
"logits/chosen": 2.1599764823913574,
|
|
"logits/rejected": 1.9214812517166138,
|
|
"loss": 5.5447,
|
|
"step": 27
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010526652447879314,
|
|
"beta_dpo/beta_used_raw": 0.010526652447879314,
|
|
"beta_dpo/gap_mean": 0.05413653701543808,
|
|
"beta_dpo/gap_std": 0.794916033744812,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.05863874345549738,
|
|
"grad_norm": 19.700441360473633,
|
|
"learning_rate": 2.8125e-07,
|
|
"logits/chosen": 1.9106848239898682,
|
|
"logits/rejected": 2.0312745571136475,
|
|
"loss": 5.5371,
|
|
"step": 28
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010448331013321877,
|
|
"beta_dpo/beta_used_raw": 0.010448331013321877,
|
|
"beta_dpo/gap_mean": 0.02559659071266651,
|
|
"beta_dpo/gap_std": 0.8567264080047607,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.060732984293193716,
|
|
"grad_norm": 16.468107223510742,
|
|
"learning_rate": 2.916666666666667e-07,
|
|
"logits/chosen": 2.2274394035339355,
|
|
"logits/rejected": 1.952311635017395,
|
|
"loss": 5.5416,
|
|
"step": 29
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009916335344314575,
|
|
"beta_dpo/beta_used_raw": 0.009916335344314575,
|
|
"beta_dpo/gap_mean": 0.04508252441883087,
|
|
"beta_dpo/gap_std": 0.8601223826408386,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.06282722513089005,
|
|
"grad_norm": 16.325408935546875,
|
|
"learning_rate": 3.020833333333333e-07,
|
|
"logits/chosen": 1.463683843612671,
|
|
"logits/rejected": 1.4335768222808838,
|
|
"loss": 5.5426,
|
|
"step": 30
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010172335430979729,
|
|
"beta_dpo/beta_used_raw": 0.010172335430979729,
|
|
"beta_dpo/gap_mean": 0.06362677365541458,
|
|
"beta_dpo/gap_std": 0.7783647775650024,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.06492146596858639,
|
|
"grad_norm": 15.478079795837402,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"logits/chosen": 1.877584457397461,
|
|
"logits/rejected": 1.7691612243652344,
|
|
"loss": 5.5409,
|
|
"step": 31
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010151976719498634,
|
|
"beta_dpo/beta_used_raw": 0.010151976719498634,
|
|
"beta_dpo/gap_mean": 0.06375724077224731,
|
|
"beta_dpo/gap_std": 0.8205698728561401,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.06701570680628273,
|
|
"grad_norm": 16.919126510620117,
|
|
"learning_rate": 3.2291666666666666e-07,
|
|
"logits/chosen": 1.713607668876648,
|
|
"logits/rejected": 1.5853075981140137,
|
|
"loss": 5.5403,
|
|
"step": 32
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010386324487626553,
|
|
"beta_dpo/beta_used_raw": 0.010386324487626553,
|
|
"beta_dpo/gap_mean": 0.08595895767211914,
|
|
"beta_dpo/gap_std": 0.9470534324645996,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.06910994764397906,
|
|
"grad_norm": 18.542863845825195,
|
|
"learning_rate": 3.333333333333333e-07,
|
|
"logits/chosen": 1.8243309259414673,
|
|
"logits/rejected": 1.729980230331421,
|
|
"loss": 5.5374,
|
|
"step": 33
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009925332851707935,
|
|
"beta_dpo/beta_used_raw": 0.009925332851707935,
|
|
"beta_dpo/gap_mean": 0.09634880721569061,
|
|
"beta_dpo/gap_std": 0.9391544461250305,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.0712041884816754,
|
|
"grad_norm": 20.447566986083984,
|
|
"learning_rate": 3.4375e-07,
|
|
"logits/chosen": 2.0654332637786865,
|
|
"logits/rejected": 2.0050528049468994,
|
|
"loss": 5.5405,
|
|
"step": 34
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009798412211239338,
|
|
"beta_dpo/beta_used_raw": 0.009798412211239338,
|
|
"beta_dpo/gap_mean": 0.09882716089487076,
|
|
"beta_dpo/gap_std": 0.9505617022514343,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.07329842931937172,
|
|
"grad_norm": 15.859660148620605,
|
|
"learning_rate": 3.541666666666667e-07,
|
|
"logits/chosen": 1.4941397905349731,
|
|
"logits/rejected": 1.6851754188537598,
|
|
"loss": 5.5409,
|
|
"step": 35
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010313436388969421,
|
|
"beta_dpo/beta_used_raw": 0.010313436388969421,
|
|
"beta_dpo/gap_mean": 0.12937475740909576,
|
|
"beta_dpo/gap_std": 0.9316422939300537,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.07539267015706806,
|
|
"grad_norm": 17.933530807495117,
|
|
"learning_rate": 3.645833333333333e-07,
|
|
"logits/chosen": 1.7557207345962524,
|
|
"logits/rejected": 1.8125189542770386,
|
|
"loss": 5.5377,
|
|
"step": 36
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009959274902939796,
|
|
"beta_dpo/beta_used_raw": 0.009959274902939796,
|
|
"beta_dpo/gap_mean": 0.13312453031539917,
|
|
"beta_dpo/gap_std": 0.9395788908004761,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.0774869109947644,
|
|
"grad_norm": 18.94852638244629,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 2.1051876544952393,
|
|
"logits/rejected": 2.0780932903289795,
|
|
"loss": 5.5388,
|
|
"step": 37
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009908566251397133,
|
|
"beta_dpo/beta_used_raw": 0.009908566251397133,
|
|
"beta_dpo/gap_mean": 0.16690538823604584,
|
|
"beta_dpo/gap_std": 0.9445586800575256,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.07958115183246073,
|
|
"grad_norm": 16.41166114807129,
|
|
"learning_rate": 3.8541666666666665e-07,
|
|
"logits/chosen": 2.1622610092163086,
|
|
"logits/rejected": 2.414966344833374,
|
|
"loss": 5.5385,
|
|
"step": 38
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009442973881959915,
|
|
"beta_dpo/beta_used_raw": 0.009442973881959915,
|
|
"beta_dpo/gap_mean": 0.2755042314529419,
|
|
"beta_dpo/gap_std": 0.9882732629776001,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.08167539267015707,
|
|
"grad_norm": 16.65612030029297,
|
|
"learning_rate": 3.958333333333333e-07,
|
|
"logits/chosen": 2.00819730758667,
|
|
"logits/rejected": 2.0810117721557617,
|
|
"loss": 5.5383,
|
|
"step": 39
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00932924635708332,
|
|
"beta_dpo/beta_used_raw": 0.00932924635708332,
|
|
"beta_dpo/gap_mean": 0.2719506323337555,
|
|
"beta_dpo/gap_std": 1.0504027605056763,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.08376963350785341,
|
|
"grad_norm": 14.621367454528809,
|
|
"learning_rate": 4.0625e-07,
|
|
"logits/chosen": 1.8936258554458618,
|
|
"logits/rejected": 1.895420789718628,
|
|
"loss": 5.5403,
|
|
"step": 40
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009584764949977398,
|
|
"beta_dpo/beta_used_raw": 0.009584764949977398,
|
|
"beta_dpo/gap_mean": 0.19441170990467072,
|
|
"beta_dpo/gap_std": 1.045138955116272,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.08586387434554973,
|
|
"grad_norm": 19.228687286376953,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"logits/chosen": 1.925986647605896,
|
|
"logits/rejected": 1.7834522724151611,
|
|
"loss": 5.539,
|
|
"step": 41
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01015196181833744,
|
|
"beta_dpo/beta_used_raw": 0.01015196181833744,
|
|
"beta_dpo/gap_mean": 0.273733526468277,
|
|
"beta_dpo/gap_std": 1.0639562606811523,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.08795811518324607,
|
|
"grad_norm": 16.421497344970703,
|
|
"learning_rate": 4.270833333333333e-07,
|
|
"logits/chosen": 2.446347236633301,
|
|
"logits/rejected": 2.493040084838867,
|
|
"loss": 5.533,
|
|
"step": 42
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010610947385430336,
|
|
"beta_dpo/beta_used_raw": 0.010610947385430336,
|
|
"beta_dpo/gap_mean": 0.32640647888183594,
|
|
"beta_dpo/gap_std": 1.1364136934280396,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.09005235602094241,
|
|
"grad_norm": 17.893566131591797,
|
|
"learning_rate": 4.375e-07,
|
|
"logits/chosen": 1.920936107635498,
|
|
"logits/rejected": 1.9038302898406982,
|
|
"loss": 5.5271,
|
|
"step": 43
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01078065950423479,
|
|
"beta_dpo/beta_used_raw": 0.01078065950423479,
|
|
"beta_dpo/gap_mean": 0.3758638799190521,
|
|
"beta_dpo/gap_std": 1.1031302213668823,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.09214659685863874,
|
|
"grad_norm": 20.40181541442871,
|
|
"learning_rate": 4.479166666666667e-07,
|
|
"logits/chosen": 1.7042187452316284,
|
|
"logits/rejected": 1.6264781951904297,
|
|
"loss": 5.5232,
|
|
"step": 44
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009485357441008091,
|
|
"beta_dpo/beta_used_raw": 0.009485357441008091,
|
|
"beta_dpo/gap_mean": 0.4286791682243347,
|
|
"beta_dpo/gap_std": 1.1151459217071533,
|
|
"beta_dpo/mask_keep_frac": 0.9375,
|
|
"epoch": 0.09424083769633508,
|
|
"grad_norm": 18.97907829284668,
|
|
"learning_rate": 4.5833333333333327e-07,
|
|
"logits/chosen": 2.0053882598876953,
|
|
"logits/rejected": 1.8914456367492676,
|
|
"loss": 5.5308,
|
|
"step": 45
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009595800191164017,
|
|
"beta_dpo/beta_used_raw": 0.009595800191164017,
|
|
"beta_dpo/gap_mean": 0.4576748311519623,
|
|
"beta_dpo/gap_std": 1.219599723815918,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.09633507853403141,
|
|
"grad_norm": 18.820371627807617,
|
|
"learning_rate": 4.6874999999999996e-07,
|
|
"logits/chosen": 1.9573893547058105,
|
|
"logits/rejected": 2.0128352642059326,
|
|
"loss": 5.5285,
|
|
"step": 46
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00906536914408207,
|
|
"beta_dpo/beta_used_raw": 0.00906536914408207,
|
|
"beta_dpo/gap_mean": 0.4006018042564392,
|
|
"beta_dpo/gap_std": 1.2177817821502686,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.09842931937172775,
|
|
"grad_norm": 14.672569274902344,
|
|
"learning_rate": 4.791666666666667e-07,
|
|
"logits/chosen": 1.6562869548797607,
|
|
"logits/rejected": 2.0256872177124023,
|
|
"loss": 5.5352,
|
|
"step": 47
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009402711875736713,
|
|
"beta_dpo/beta_used_raw": 0.009402711875736713,
|
|
"beta_dpo/gap_mean": 0.41205257177352905,
|
|
"beta_dpo/gap_std": 1.2531991004943848,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.10052356020942409,
|
|
"grad_norm": 14.840012550354004,
|
|
"learning_rate": 4.895833333333333e-07,
|
|
"logits/chosen": 2.0920979976654053,
|
|
"logits/rejected": 2.0639383792877197,
|
|
"loss": 5.531,
|
|
"step": 48
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008774153888225555,
|
|
"beta_dpo/beta_used_raw": 0.008774153888225555,
|
|
"beta_dpo/gap_mean": 0.4781131148338318,
|
|
"beta_dpo/gap_std": 1.356748342514038,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.10261780104712041,
|
|
"grad_norm": 13.876286506652832,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 1.855541706085205,
|
|
"logits/rejected": 1.988050937652588,
|
|
"loss": 5.5333,
|
|
"step": 49
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009108037687838078,
|
|
"beta_dpo/beta_used_raw": 0.009108037687838078,
|
|
"beta_dpo/gap_mean": 0.38943564891815186,
|
|
"beta_dpo/gap_std": 1.4389784336090088,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.10471204188481675,
|
|
"grad_norm": 15.5634126663208,
|
|
"learning_rate": 4.999932966293553e-07,
|
|
"logits/chosen": 2.0256078243255615,
|
|
"logits/rejected": 2.1688108444213867,
|
|
"loss": 5.5343,
|
|
"step": 50
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01051395758986473,
|
|
"beta_dpo/beta_used_raw": 0.01051395758986473,
|
|
"beta_dpo/gap_mean": 0.49393463134765625,
|
|
"beta_dpo/gap_std": 1.5790597200393677,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.1068062827225131,
|
|
"grad_norm": 20.477724075317383,
|
|
"learning_rate": 4.999731868769026e-07,
|
|
"logits/chosen": 1.561262607574463,
|
|
"logits/rejected": 1.7091399431228638,
|
|
"loss": 5.5166,
|
|
"step": 51
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010623252019286156,
|
|
"beta_dpo/beta_used_raw": 0.010623252019286156,
|
|
"beta_dpo/gap_mean": 0.6119964122772217,
|
|
"beta_dpo/gap_std": 1.613837480545044,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.10890052356020942,
|
|
"grad_norm": 21.817190170288086,
|
|
"learning_rate": 4.99939671821067e-07,
|
|
"logits/chosen": 2.082730531692505,
|
|
"logits/rejected": 2.247464656829834,
|
|
"loss": 5.5081,
|
|
"step": 52
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01156248152256012,
|
|
"beta_dpo/beta_used_raw": 0.01156248152256012,
|
|
"beta_dpo/gap_mean": 0.5816015601158142,
|
|
"beta_dpo/gap_std": 1.6215416193008423,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.11099476439790576,
|
|
"grad_norm": 22.167213439941406,
|
|
"learning_rate": 4.998927532591591e-07,
|
|
"logits/chosen": 2.082489013671875,
|
|
"logits/rejected": 2.072319984436035,
|
|
"loss": 5.497,
|
|
"step": 53
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008477726019918919,
|
|
"beta_dpo/beta_used_raw": 0.008477726019918919,
|
|
"beta_dpo/gap_mean": 0.7841604948043823,
|
|
"beta_dpo/gap_std": 1.7853457927703857,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.1130890052356021,
|
|
"grad_norm": 14.047541618347168,
|
|
"learning_rate": 4.998324337072792e-07,
|
|
"logits/chosen": 1.5058391094207764,
|
|
"logits/rejected": 1.5753705501556396,
|
|
"loss": 5.5252,
|
|
"step": 54
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009478636085987091,
|
|
"beta_dpo/beta_used_raw": 0.009478636085987091,
|
|
"beta_dpo/gap_mean": 0.5571960210800171,
|
|
"beta_dpo/gap_std": 1.6621750593185425,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.11518324607329843,
|
|
"grad_norm": 14.583319664001465,
|
|
"learning_rate": 4.997587164001815e-07,
|
|
"logits/chosen": 2.003282308578491,
|
|
"logits/rejected": 2.013611316680908,
|
|
"loss": 5.5249,
|
|
"step": 55
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009290758520364761,
|
|
"beta_dpo/beta_used_raw": 0.009290758520364761,
|
|
"beta_dpo/gap_mean": 0.638902485370636,
|
|
"beta_dpo/gap_std": 1.8342792987823486,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.11727748691099477,
|
|
"grad_norm": 15.21347713470459,
|
|
"learning_rate": 4.996716052911017e-07,
|
|
"logits/chosen": 2.15181565284729,
|
|
"logits/rejected": 2.135338306427002,
|
|
"loss": 5.5226,
|
|
"step": 56
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009111498482525349,
|
|
"beta_dpo/beta_used_raw": 0.009111498482525349,
|
|
"beta_dpo/gap_mean": 0.9660211801528931,
|
|
"beta_dpo/gap_std": 1.9951261281967163,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.1193717277486911,
|
|
"grad_norm": 16.580799102783203,
|
|
"learning_rate": 4.99571105051544e-07,
|
|
"logits/chosen": 2.130098581314087,
|
|
"logits/rejected": 1.8486499786376953,
|
|
"loss": 5.5134,
|
|
"step": 57
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008915345184504986,
|
|
"beta_dpo/beta_used_raw": 0.008915345184504986,
|
|
"beta_dpo/gap_mean": 0.9618982076644897,
|
|
"beta_dpo/gap_std": 1.7987135648727417,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.12146596858638743,
|
|
"grad_norm": 15.42608642578125,
|
|
"learning_rate": 4.994572210710314e-07,
|
|
"logits/chosen": 1.6894437074661255,
|
|
"logits/rejected": 1.699744462966919,
|
|
"loss": 5.5158,
|
|
"step": 58
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009204288944602013,
|
|
"beta_dpo/beta_used_raw": 0.009204288944602013,
|
|
"beta_dpo/gap_mean": 0.8019428253173828,
|
|
"beta_dpo/gap_std": 2.0088188648223877,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.12356020942408377,
|
|
"grad_norm": 15.577202796936035,
|
|
"learning_rate": 4.993299594568162e-07,
|
|
"logits/chosen": 1.5538208484649658,
|
|
"logits/rejected": 1.6072800159454346,
|
|
"loss": 5.52,
|
|
"step": 59
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009918388910591602,
|
|
"beta_dpo/beta_used_raw": 0.009918388910591602,
|
|
"beta_dpo/gap_mean": 0.847707986831665,
|
|
"beta_dpo/gap_std": 2.123305320739746,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.1256544502617801,
|
|
"grad_norm": 14.793850898742676,
|
|
"learning_rate": 4.991893270335525e-07,
|
|
"logits/chosen": 2.0483858585357666,
|
|
"logits/rejected": 1.8020352125167847,
|
|
"loss": 5.5111,
|
|
"step": 60
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009820302948355675,
|
|
"beta_dpo/beta_used_raw": 0.009820302948355675,
|
|
"beta_dpo/gap_mean": 0.9802277684211731,
|
|
"beta_dpo/gap_std": 2.0959830284118652,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.12774869109947645,
|
|
"grad_norm": 16.083724975585938,
|
|
"learning_rate": 4.990353313429303e-07,
|
|
"logits/chosen": 1.9473985433578491,
|
|
"logits/rejected": 1.9882135391235352,
|
|
"loss": 5.5041,
|
|
"step": 61
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010419272817671299,
|
|
"beta_dpo/beta_used_raw": 0.010419272817671299,
|
|
"beta_dpo/gap_mean": 0.979004442691803,
|
|
"beta_dpo/gap_std": 2.1615118980407715,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.12984293193717278,
|
|
"grad_norm": 18.826759338378906,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 1.872680902481079,
|
|
"logits/rejected": 1.8009073734283447,
|
|
"loss": 5.5007,
|
|
"step": 62
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00935581885278225,
|
|
"beta_dpo/beta_used_raw": 0.00935581885278225,
|
|
"beta_dpo/gap_mean": 1.0244998931884766,
|
|
"beta_dpo/gap_std": 2.4170455932617188,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.1319371727748691,
|
|
"grad_norm": 15.110966682434082,
|
|
"learning_rate": 4.986872839090852e-07,
|
|
"logits/chosen": 1.9980614185333252,
|
|
"logits/rejected": 2.105093002319336,
|
|
"loss": 5.5107,
|
|
"step": 63
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010298279114067554,
|
|
"beta_dpo/beta_used_raw": 0.010298279114067554,
|
|
"beta_dpo/gap_mean": 1.1149272918701172,
|
|
"beta_dpo/gap_std": 2.4519460201263428,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.13403141361256546,
|
|
"grad_norm": 20.477684020996094,
|
|
"learning_rate": 4.9849325083059e-07,
|
|
"logits/chosen": 1.7054760456085205,
|
|
"logits/rejected": 1.951492428779602,
|
|
"loss": 5.4844,
|
|
"step": 64
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009701458737254143,
|
|
"beta_dpo/beta_used_raw": 0.009701458737254143,
|
|
"beta_dpo/gap_mean": 1.1075406074523926,
|
|
"beta_dpo/gap_std": 2.5126233100891113,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.13612565445026178,
|
|
"grad_norm": 16.23882484436035,
|
|
"learning_rate": 4.982858918131906e-07,
|
|
"logits/chosen": 1.9961862564086914,
|
|
"logits/rejected": 2.0398294925689697,
|
|
"loss": 5.502,
|
|
"step": 65
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010468224063515663,
|
|
"beta_dpo/beta_used_raw": 0.010468224063515663,
|
|
"beta_dpo/gap_mean": 1.0450140237808228,
|
|
"beta_dpo/gap_std": 2.6909701824188232,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.1382198952879581,
|
|
"grad_norm": 18.444570541381836,
|
|
"learning_rate": 4.980652179769217e-07,
|
|
"logits/chosen": 1.6719987392425537,
|
|
"logits/rejected": 1.881594181060791,
|
|
"loss": 5.4931,
|
|
"step": 66
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010425317101180553,
|
|
"beta_dpo/beta_used_raw": 0.010425317101180553,
|
|
"beta_dpo/gap_mean": 1.015570878982544,
|
|
"beta_dpo/gap_std": 2.8450400829315186,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.14031413612565444,
|
|
"grad_norm": 17.675512313842773,
|
|
"learning_rate": 4.978312411558517e-07,
|
|
"logits/chosen": 2.0440990924835205,
|
|
"logits/rejected": 2.0636091232299805,
|
|
"loss": 5.4964,
|
|
"step": 67
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009568197652697563,
|
|
"beta_dpo/beta_used_raw": 0.009568197652697563,
|
|
"beta_dpo/gap_mean": 1.0808396339416504,
|
|
"beta_dpo/gap_std": 3.0677380561828613,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.1424083769633508,
|
|
"grad_norm": 15.759063720703125,
|
|
"learning_rate": 4.975839738974473e-07,
|
|
"logits/chosen": 1.5441210269927979,
|
|
"logits/rejected": 1.3784618377685547,
|
|
"loss": 5.5019,
|
|
"step": 68
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011599601246416569,
|
|
"beta_dpo/beta_used_raw": 0.011599601246416569,
|
|
"beta_dpo/gap_mean": 1.4021799564361572,
|
|
"beta_dpo/gap_std": 3.188746213912964,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.14450261780104712,
|
|
"grad_norm": 22.499710083007812,
|
|
"learning_rate": 4.97323429461901e-07,
|
|
"logits/chosen": 1.9849984645843506,
|
|
"logits/rejected": 1.8482412099838257,
|
|
"loss": 5.4511,
|
|
"step": 69
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009116853587329388,
|
|
"beta_dpo/beta_used_raw": 0.009116853587329388,
|
|
"beta_dpo/gap_mean": 1.547209620475769,
|
|
"beta_dpo/gap_std": 3.23995304107666,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.14659685863874344,
|
|
"grad_norm": 15.340811729431152,
|
|
"learning_rate": 4.970496218214204e-07,
|
|
"logits/chosen": 2.295590400695801,
|
|
"logits/rejected": 2.3875482082366943,
|
|
"loss": 5.4922,
|
|
"step": 70
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.012471513822674751,
|
|
"beta_dpo/beta_used_raw": 0.012471513822674751,
|
|
"beta_dpo/gap_mean": 1.582148551940918,
|
|
"beta_dpo/gap_std": 3.453483819961548,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.1486910994764398,
|
|
"grad_norm": 22.772903442382812,
|
|
"learning_rate": 4.967625656594781e-07,
|
|
"logits/chosen": 1.8594659566879272,
|
|
"logits/rejected": 1.9157780408859253,
|
|
"loss": 5.4328,
|
|
"step": 71
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010649541392922401,
|
|
"beta_dpo/beta_used_raw": 0.009485064074397087,
|
|
"beta_dpo/gap_mean": 1.6831897497177124,
|
|
"beta_dpo/gap_std": 3.4016518592834473,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.15078534031413612,
|
|
"grad_norm": 19.1998348236084,
|
|
"learning_rate": 4.964622763700252e-07,
|
|
"logits/chosen": 1.8293884992599487,
|
|
"logits/rejected": 1.892337679862976,
|
|
"loss": 5.4513,
|
|
"step": 72
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011073922738432884,
|
|
"beta_dpo/beta_used_raw": 0.010616803541779518,
|
|
"beta_dpo/gap_mean": 1.6742221117019653,
|
|
"beta_dpo/gap_std": 3.5703773498535156,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.15287958115183245,
|
|
"grad_norm": 20.304622650146484,
|
|
"learning_rate": 4.961487700566646e-07,
|
|
"logits/chosen": 2.2375855445861816,
|
|
"logits/rejected": 2.2253012657165527,
|
|
"loss": 5.4517,
|
|
"step": 73
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009733829647302628,
|
|
"beta_dpo/beta_used_raw": 0.009427759796380997,
|
|
"beta_dpo/gap_mean": 1.560795783996582,
|
|
"beta_dpo/gap_std": 3.745507001876831,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.1549738219895288,
|
|
"grad_norm": 20.791471481323242,
|
|
"learning_rate": 4.958220635317885e-07,
|
|
"logits/chosen": 1.8168758153915405,
|
|
"logits/rejected": 1.7319445610046387,
|
|
"loss": 5.4672,
|
|
"step": 74
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013009906746447086,
|
|
"beta_dpo/beta_used_raw": 0.013009906746447086,
|
|
"beta_dpo/gap_mean": 1.750954031944275,
|
|
"beta_dpo/gap_std": 3.6934804916381836,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.15706806282722513,
|
|
"grad_norm": 23.435768127441406,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 1.8880510330200195,
|
|
"logits/rejected": 1.9295786619186401,
|
|
"loss": 5.4219,
|
|
"step": 75
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007087262813001871,
|
|
"beta_dpo/beta_used_raw": 0.007036793977022171,
|
|
"beta_dpo/gap_mean": 2.2242462635040283,
|
|
"beta_dpo/gap_std": 3.95930814743042,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.15916230366492146,
|
|
"grad_norm": 17.705944061279297,
|
|
"learning_rate": 4.951291206355559e-07,
|
|
"logits/chosen": 2.0245938301086426,
|
|
"logits/rejected": 1.793765902519226,
|
|
"loss": 5.4853,
|
|
"step": 76
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008353885263204575,
|
|
"beta_dpo/beta_used_raw": 0.008143781684339046,
|
|
"beta_dpo/gap_mean": 2.021268844604492,
|
|
"beta_dpo/gap_std": 4.135770797729492,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.1612565445026178,
|
|
"grad_norm": 16.503738403320312,
|
|
"learning_rate": 4.947629214246236e-07,
|
|
"logits/chosen": 2.1751253604888916,
|
|
"logits/rejected": 2.1524720191955566,
|
|
"loss": 5.4772,
|
|
"step": 77
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011361459270119667,
|
|
"beta_dpo/beta_used_raw": 0.011361459270119667,
|
|
"beta_dpo/gap_mean": 2.2091753482818604,
|
|
"beta_dpo/gap_std": 4.447847843170166,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.16335078534031414,
|
|
"grad_norm": 19.320999145507812,
|
|
"learning_rate": 4.943835963210323e-07,
|
|
"logits/chosen": 1.7212610244750977,
|
|
"logits/rejected": 1.7153496742248535,
|
|
"loss": 5.4244,
|
|
"step": 78
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009612835012376308,
|
|
"beta_dpo/beta_used_raw": 0.00958208180963993,
|
|
"beta_dpo/gap_mean": 2.3663156032562256,
|
|
"beta_dpo/gap_std": 4.715466022491455,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.16544502617801046,
|
|
"grad_norm": 19.817529678344727,
|
|
"learning_rate": 4.939911656668361e-07,
|
|
"logits/chosen": 1.9014170169830322,
|
|
"logits/rejected": 2.1795027256011963,
|
|
"loss": 5.4218,
|
|
"step": 79
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009250715374946594,
|
|
"beta_dpo/beta_used_raw": 0.008980360813438892,
|
|
"beta_dpo/gap_mean": 2.0934667587280273,
|
|
"beta_dpo/gap_std": 5.130978584289551,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.16753926701570682,
|
|
"grad_norm": 18.116151809692383,
|
|
"learning_rate": 4.935856505068998e-07,
|
|
"logits/chosen": 1.5658341646194458,
|
|
"logits/rejected": 1.7265154123306274,
|
|
"loss": 5.4494,
|
|
"step": 80
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009883089922368526,
|
|
"beta_dpo/beta_used_raw": 0.009712887927889824,
|
|
"beta_dpo/gap_mean": 2.5941665172576904,
|
|
"beta_dpo/gap_std": 5.163574695587158,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.16963350785340314,
|
|
"grad_norm": 22.016693115234375,
|
|
"learning_rate": 4.93167072587771e-07,
|
|
"logits/chosen": 1.742193579673767,
|
|
"logits/rejected": 1.9251035451889038,
|
|
"loss": 5.2421,
|
|
"step": 81
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009547875262796879,
|
|
"beta_dpo/beta_used_raw": 0.009547875262796879,
|
|
"beta_dpo/gap_mean": 2.4227218627929688,
|
|
"beta_dpo/gap_std": 5.073668956756592,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.17172774869109947,
|
|
"grad_norm": 17.96396255493164,
|
|
"learning_rate": 4.92735454356513e-07,
|
|
"logits/chosen": 1.9680440425872803,
|
|
"logits/rejected": 1.9148989915847778,
|
|
"loss": 5.4469,
|
|
"step": 82
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010678245685994625,
|
|
"beta_dpo/beta_used_raw": 0.009905948303639889,
|
|
"beta_dpo/gap_mean": 2.5397074222564697,
|
|
"beta_dpo/gap_std": 5.242867469787598,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.17382198952879582,
|
|
"grad_norm": 23.018129348754883,
|
|
"learning_rate": 4.922908189595017e-07,
|
|
"logits/chosen": 1.5621941089630127,
|
|
"logits/rejected": 1.5305424928665161,
|
|
"loss": 5.3852,
|
|
"step": 83
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.006417885888367891,
|
|
"beta_dpo/beta_used_raw": 0.006086358800530434,
|
|
"beta_dpo/gap_mean": 2.7024130821228027,
|
|
"beta_dpo/gap_std": 5.565805435180664,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.17591623036649215,
|
|
"grad_norm": 13.125260353088379,
|
|
"learning_rate": 4.918331902411841e-07,
|
|
"logits/chosen": 2.024345874786377,
|
|
"logits/rejected": 1.9076447486877441,
|
|
"loss": 5.4801,
|
|
"step": 84
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009424247778952122,
|
|
"beta_dpo/beta_used_raw": 0.008895869366824627,
|
|
"beta_dpo/gap_mean": 2.2540838718414307,
|
|
"beta_dpo/gap_std": 5.414524555206299,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.17801047120418848,
|
|
"grad_norm": 18.945358276367188,
|
|
"learning_rate": 4.913625927427995e-07,
|
|
"logits/chosen": 1.51369047164917,
|
|
"logits/rejected": 1.6780593395233154,
|
|
"loss": 5.4333,
|
|
"step": 85
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013801836408674717,
|
|
"beta_dpo/beta_used_raw": 0.013801836408674717,
|
|
"beta_dpo/gap_mean": 2.4163331985473633,
|
|
"beta_dpo/gap_std": 5.740031719207764,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.18010471204188483,
|
|
"grad_norm": 25.516857147216797,
|
|
"learning_rate": 4.908790517010636e-07,
|
|
"logits/chosen": 1.8556016683578491,
|
|
"logits/rejected": 1.872323751449585,
|
|
"loss": 5.3655,
|
|
"step": 86
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008744290098547935,
|
|
"beta_dpo/beta_used_raw": 0.008744290098547935,
|
|
"beta_dpo/gap_mean": 2.9491662979125977,
|
|
"beta_dpo/gap_std": 5.92836856842041,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.18219895287958116,
|
|
"grad_norm": 20.971223831176758,
|
|
"learning_rate": 4.903825930468148e-07,
|
|
"logits/chosen": 1.6977579593658447,
|
|
"logits/rejected": 1.6770415306091309,
|
|
"loss": 5.4258,
|
|
"step": 87
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007864508777856827,
|
|
"beta_dpo/beta_used_raw": 0.007664060685783625,
|
|
"beta_dpo/gap_mean": 3.0257012844085693,
|
|
"beta_dpo/gap_std": 5.952022552490234,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.18429319371727748,
|
|
"grad_norm": 15.7445068359375,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 1.5104684829711914,
|
|
"logits/rejected": 1.357150912284851,
|
|
"loss": 5.4513,
|
|
"step": 88
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010421731509268284,
|
|
"beta_dpo/beta_used_raw": 0.01035550981760025,
|
|
"beta_dpo/gap_mean": 2.823183536529541,
|
|
"beta_dpo/gap_std": 6.035218238830566,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.18638743455497384,
|
|
"grad_norm": 21.113414764404297,
|
|
"learning_rate": 4.893510300863676e-07,
|
|
"logits/chosen": 1.9621143341064453,
|
|
"logits/rejected": 1.8874907493591309,
|
|
"loss": 5.402,
|
|
"step": 89
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.012045778334140778,
|
|
"beta_dpo/beta_used_raw": 0.010188662447035313,
|
|
"beta_dpo/gap_mean": 2.964503288269043,
|
|
"beta_dpo/gap_std": 5.843700408935547,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.18848167539267016,
|
|
"grad_norm": 30.074321746826172,
|
|
"learning_rate": 4.8881598109976e-07,
|
|
"logits/chosen": 2.1660492420196533,
|
|
"logits/rejected": 2.0563719272613525,
|
|
"loss": 5.3301,
|
|
"step": 90
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007549135014414787,
|
|
"beta_dpo/beta_used_raw": 0.005311334040015936,
|
|
"beta_dpo/gap_mean": 2.952354669570923,
|
|
"beta_dpo/gap_std": 6.251888751983643,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.1905759162303665,
|
|
"grad_norm": 17.551513671875,
|
|
"learning_rate": 4.882681251368548e-07,
|
|
"logits/chosen": 1.2380826473236084,
|
|
"logits/rejected": 1.557425618171692,
|
|
"loss": 5.2785,
|
|
"step": 91
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015128381550312042,
|
|
"beta_dpo/beta_used_raw": 0.014704037457704544,
|
|
"beta_dpo/gap_mean": 2.930189847946167,
|
|
"beta_dpo/gap_std": 6.301963806152344,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.19267015706806281,
|
|
"grad_norm": 30.674835205078125,
|
|
"learning_rate": 4.877074915775048e-07,
|
|
"logits/chosen": 1.6860748529434204,
|
|
"logits/rejected": 1.4988112449645996,
|
|
"loss": 5.2723,
|
|
"step": 92
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.006954543758183718,
|
|
"beta_dpo/beta_used_raw": 0.0063597094267606735,
|
|
"beta_dpo/gap_mean": 3.009707450866699,
|
|
"beta_dpo/gap_std": 6.455717086791992,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.19476439790575917,
|
|
"grad_norm": 12.83521556854248,
|
|
"learning_rate": 4.871341104867864e-07,
|
|
"logits/chosen": 1.9297364950180054,
|
|
"logits/rejected": 1.8627700805664062,
|
|
"loss": 5.4614,
|
|
"step": 93
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0072138672694563866,
|
|
"beta_dpo/beta_used_raw": 0.005733566824346781,
|
|
"beta_dpo/gap_mean": 3.3237226009368896,
|
|
"beta_dpo/gap_std": 6.866450786590576,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.1968586387434555,
|
|
"grad_norm": 15.222475051879883,
|
|
"learning_rate": 4.865480126133871e-07,
|
|
"logits/chosen": 1.5820927619934082,
|
|
"logits/rejected": 1.6416268348693848,
|
|
"loss": 5.444,
|
|
"step": 94
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008435830473899841,
|
|
"beta_dpo/beta_used_raw": 0.007779551669955254,
|
|
"beta_dpo/gap_mean": 3.4265336990356445,
|
|
"beta_dpo/gap_std": 7.192251205444336,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.19895287958115182,
|
|
"grad_norm": 17.31826400756836,
|
|
"learning_rate": 4.859492293879573e-07,
|
|
"logits/chosen": 1.7770836353302002,
|
|
"logits/rejected": 1.5319178104400635,
|
|
"loss": 5.4109,
|
|
"step": 95
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010932082310318947,
|
|
"beta_dpo/beta_used_raw": 0.00794284138828516,
|
|
"beta_dpo/gap_mean": 3.5308783054351807,
|
|
"beta_dpo/gap_std": 7.482184886932373,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.20104712041884817,
|
|
"grad_norm": 24.832975387573242,
|
|
"learning_rate": 4.853377929214243e-07,
|
|
"logits/chosen": 1.4598766565322876,
|
|
"logits/rejected": 1.3611279726028442,
|
|
"loss": 5.3563,
|
|
"step": 96
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010159716010093689,
|
|
"beta_dpo/beta_used_raw": 0.010018959641456604,
|
|
"beta_dpo/gap_mean": 3.793192148208618,
|
|
"beta_dpo/gap_std": 7.78098201751709,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.2031413612565445,
|
|
"grad_norm": 22.265670776367188,
|
|
"learning_rate": 4.847137360032699e-07,
|
|
"logits/chosen": 1.5520637035369873,
|
|
"logits/rejected": 1.644052505493164,
|
|
"loss": 5.3533,
|
|
"step": 97
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01027124933898449,
|
|
"beta_dpo/beta_used_raw": 0.009908015839755535,
|
|
"beta_dpo/gap_mean": 3.9612808227539062,
|
|
"beta_dpo/gap_std": 7.822225093841553,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.20523560209424083,
|
|
"grad_norm": 21.846027374267578,
|
|
"learning_rate": 4.84077092099773e-07,
|
|
"logits/chosen": 2.0662131309509277,
|
|
"logits/rejected": 2.265798807144165,
|
|
"loss": 5.3616,
|
|
"step": 98
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01488437969237566,
|
|
"beta_dpo/beta_used_raw": 0.01488437969237566,
|
|
"beta_dpo/gap_mean": 3.7299928665161133,
|
|
"beta_dpo/gap_std": 8.350497245788574,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.20732984293193718,
|
|
"grad_norm": 27.384540557861328,
|
|
"learning_rate": 4.834278953522137e-07,
|
|
"logits/chosen": 1.9069733619689941,
|
|
"logits/rejected": 1.8735466003417969,
|
|
"loss": 5.251,
|
|
"step": 99
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.005023906007409096,
|
|
"beta_dpo/beta_used_raw": 0.002925662323832512,
|
|
"beta_dpo/gap_mean": 4.102505207061768,
|
|
"beta_dpo/gap_std": 8.151671409606934,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.2094240837696335,
|
|
"grad_norm": 16.64201545715332,
|
|
"learning_rate": 4.827661805750437e-07,
|
|
"logits/chosen": 1.9069080352783203,
|
|
"logits/rejected": 1.840613842010498,
|
|
"loss": 5.4547,
|
|
"step": 100
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013062715530395508,
|
|
"beta_dpo/beta_used_raw": 0.013062715530395508,
|
|
"beta_dpo/gap_mean": 3.8761510848999023,
|
|
"beta_dpo/gap_std": 8.57790756225586,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.21151832460732983,
|
|
"grad_norm": 25.09943389892578,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 1.3813724517822266,
|
|
"logits/rejected": 1.6055908203125,
|
|
"loss": 5.3,
|
|
"step": 101
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014250491745769978,
|
|
"beta_dpo/beta_used_raw": 0.011926423758268356,
|
|
"beta_dpo/gap_mean": 4.320952892303467,
|
|
"beta_dpo/gap_std": 8.283108711242676,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.2136125654450262,
|
|
"grad_norm": 24.078140258789062,
|
|
"learning_rate": 4.814053395442932e-07,
|
|
"logits/chosen": 1.7069716453552246,
|
|
"logits/rejected": 1.822311520576477,
|
|
"loss": 5.2401,
|
|
"step": 102
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.006543359719216824,
|
|
"beta_dpo/beta_used_raw": 0.003442541928961873,
|
|
"beta_dpo/gap_mean": 4.461350917816162,
|
|
"beta_dpo/gap_std": 8.508588790893555,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.2157068062827225,
|
|
"grad_norm": 14.553688049316406,
|
|
"learning_rate": 4.807062862684873e-07,
|
|
"logits/chosen": 2.264915943145752,
|
|
"logits/rejected": 2.3848659992218018,
|
|
"loss": 5.4299,
|
|
"step": 103
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007411661557853222,
|
|
"beta_dpo/beta_used_raw": 0.006676441989839077,
|
|
"beta_dpo/gap_mean": 3.8371684551239014,
|
|
"beta_dpo/gap_std": 9.153058052062988,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.21780104712041884,
|
|
"grad_norm": 15.451645851135254,
|
|
"learning_rate": 4.799948609147061e-07,
|
|
"logits/chosen": 1.8409569263458252,
|
|
"logits/rejected": 1.6925066709518433,
|
|
"loss": 5.4174,
|
|
"step": 104
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01699206791818142,
|
|
"beta_dpo/beta_used_raw": 0.016010824590921402,
|
|
"beta_dpo/gap_mean": 4.9135966300964355,
|
|
"beta_dpo/gap_std": 8.913808822631836,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.2198952879581152,
|
|
"grad_norm": 37.052093505859375,
|
|
"learning_rate": 4.792711016345321e-07,
|
|
"logits/chosen": 1.8707003593444824,
|
|
"logits/rejected": 1.7518517971038818,
|
|
"loss": 5.0469,
|
|
"step": 105
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00902323704212904,
|
|
"beta_dpo/beta_used_raw": 0.007059420458972454,
|
|
"beta_dpo/gap_mean": 4.627331256866455,
|
|
"beta_dpo/gap_std": 9.46343994140625,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.22198952879581152,
|
|
"grad_norm": 24.8957576751709,
|
|
"learning_rate": 4.785350472409791e-07,
|
|
"logits/chosen": 1.8109657764434814,
|
|
"logits/rejected": 2.01666522026062,
|
|
"loss": 5.3041,
|
|
"step": 106
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009695657528936863,
|
|
"beta_dpo/beta_used_raw": 0.007410034071654081,
|
|
"beta_dpo/gap_mean": 5.23702335357666,
|
|
"beta_dpo/gap_std": 9.842565536499023,
|
|
"beta_dpo/mask_keep_frac": 0.90625,
|
|
"epoch": 0.22408376963350785,
|
|
"grad_norm": 23.953954696655273,
|
|
"learning_rate": 4.777867372064105e-07,
|
|
"logits/chosen": 1.8471797704696655,
|
|
"logits/rejected": 1.797261357307434,
|
|
"loss": 5.2947,
|
|
"step": 107
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01452508196234703,
|
|
"beta_dpo/beta_used_raw": 0.014417744241654873,
|
|
"beta_dpo/gap_mean": 5.950323581695557,
|
|
"beta_dpo/gap_std": 9.602670669555664,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.2261780104712042,
|
|
"grad_norm": 28.78997039794922,
|
|
"learning_rate": 4.770262116604223e-07,
|
|
"logits/chosen": 1.781799077987671,
|
|
"logits/rejected": 1.9572784900665283,
|
|
"loss": 5.1167,
|
|
"step": 108
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0066536241210997105,
|
|
"beta_dpo/beta_used_raw": 0.004698293283581734,
|
|
"beta_dpo/gap_mean": 6.354887962341309,
|
|
"beta_dpo/gap_std": 10.01487922668457,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.22827225130890053,
|
|
"grad_norm": 13.27180004119873,
|
|
"learning_rate": 4.7625351138769166e-07,
|
|
"logits/chosen": 1.9066269397735596,
|
|
"logits/rejected": 1.9160687923431396,
|
|
"loss": 5.3954,
|
|
"step": 109
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010541049763560295,
|
|
"beta_dpo/beta_used_raw": 0.009356118738651276,
|
|
"beta_dpo/gap_mean": 6.276027202606201,
|
|
"beta_dpo/gap_std": 11.113080978393555,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.23036649214659685,
|
|
"grad_norm": 22.47654151916504,
|
|
"learning_rate": 4.75468677825789e-07,
|
|
"logits/chosen": 1.6488604545593262,
|
|
"logits/rejected": 1.6686369180679321,
|
|
"loss": 5.2445,
|
|
"step": 110
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010099717415869236,
|
|
"beta_dpo/beta_used_raw": 0.01006684172898531,
|
|
"beta_dpo/gap_mean": 6.5077595710754395,
|
|
"beta_dpo/gap_std": 11.19198989868164,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.2324607329842932,
|
|
"grad_norm": 23.18780517578125,
|
|
"learning_rate": 4.7467175306295647e-07,
|
|
"logits/chosen": 1.820462942123413,
|
|
"logits/rejected": 1.9046530723571777,
|
|
"loss": 5.2581,
|
|
"step": 111
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008607706055045128,
|
|
"beta_dpo/beta_used_raw": 0.003359769470989704,
|
|
"beta_dpo/gap_mean": 5.609295845031738,
|
|
"beta_dpo/gap_std": 11.112923622131348,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.23455497382198953,
|
|
"grad_norm": 20.108413696289062,
|
|
"learning_rate": 4.7386277983585053e-07,
|
|
"logits/chosen": 1.7908120155334473,
|
|
"logits/rejected": 1.8937515020370483,
|
|
"loss": 5.3333,
|
|
"step": 112
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.016104042530059814,
|
|
"beta_dpo/beta_used_raw": 0.013628358021378517,
|
|
"beta_dpo/gap_mean": 6.332000255584717,
|
|
"beta_dpo/gap_std": 11.891839981079102,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.23664921465968586,
|
|
"grad_norm": 32.54283142089844,
|
|
"learning_rate": 4.7304180152725024e-07,
|
|
"logits/chosen": 1.49177885055542,
|
|
"logits/rejected": 1.6306943893432617,
|
|
"loss": 4.845,
|
|
"step": 113
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007905099540948868,
|
|
"beta_dpo/beta_used_raw": 0.0018536364659667015,
|
|
"beta_dpo/gap_mean": 5.829183101654053,
|
|
"beta_dpo/gap_std": 12.018501281738281,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.2387434554973822,
|
|
"grad_norm": 22.07866859436035,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 1.4004794359207153,
|
|
"logits/rejected": 1.3008323907852173,
|
|
"loss": 5.3357,
|
|
"step": 114
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009187846444547176,
|
|
"beta_dpo/beta_used_raw": 0.003049051621928811,
|
|
"beta_dpo/gap_mean": 4.952703952789307,
|
|
"beta_dpo/gap_std": 11.791646957397461,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.24083769633507854,
|
|
"grad_norm": 19.946712493896484,
|
|
"learning_rate": 4.7136400641330245e-07,
|
|
"logits/chosen": 1.9727150201797485,
|
|
"logits/rejected": 1.7037996053695679,
|
|
"loss": 5.325,
|
|
"step": 115
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011235121637582779,
|
|
"beta_dpo/beta_used_raw": 0.010504303500056267,
|
|
"beta_dpo/gap_mean": 5.199014663696289,
|
|
"beta_dpo/gap_std": 11.840551376342773,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.24293193717277486,
|
|
"grad_norm": 25.633787155151367,
|
|
"learning_rate": 4.70507279583015e-07,
|
|
"logits/chosen": 1.7236762046813965,
|
|
"logits/rejected": 1.8275989294052124,
|
|
"loss": 5.2519,
|
|
"step": 116
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015477584674954414,
|
|
"beta_dpo/beta_used_raw": 0.011517820879817009,
|
|
"beta_dpo/gap_mean": 5.6072611808776855,
|
|
"beta_dpo/gap_std": 11.469279289245605,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.2450261780104712,
|
|
"grad_norm": 34.2017936706543,
|
|
"learning_rate": 4.6963872761652834e-07,
|
|
"logits/chosen": 1.724921703338623,
|
|
"logits/rejected": 1.5013651847839355,
|
|
"loss": 5.0554,
|
|
"step": 117
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01235922146588564,
|
|
"beta_dpo/beta_used_raw": 0.007801849860697985,
|
|
"beta_dpo/gap_mean": 6.555847644805908,
|
|
"beta_dpo/gap_std": 11.524944305419922,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.24712041884816754,
|
|
"grad_norm": 32.83587646484375,
|
|
"learning_rate": 4.687583970916486e-07,
|
|
"logits/chosen": 1.7096357345581055,
|
|
"logits/rejected": 1.7614951133728027,
|
|
"loss": 5.103,
|
|
"step": 118
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007942959666252136,
|
|
"beta_dpo/beta_used_raw": 0.0018032464431598783,
|
|
"beta_dpo/gap_mean": 6.371241569519043,
|
|
"beta_dpo/gap_std": 12.957239151000977,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.24921465968586387,
|
|
"grad_norm": 19.372495651245117,
|
|
"learning_rate": 4.6786633521783005e-07,
|
|
"logits/chosen": 1.8338923454284668,
|
|
"logits/rejected": 1.9390045404434204,
|
|
"loss": 5.3143,
|
|
"step": 119
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009077337570488453,
|
|
"beta_dpo/beta_used_raw": 0.006467485800385475,
|
|
"beta_dpo/gap_mean": 6.747334003448486,
|
|
"beta_dpo/gap_std": 13.51995849609375,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.2513089005235602,
|
|
"grad_norm": 25.99736785888672,
|
|
"learning_rate": 4.669625898336438e-07,
|
|
"logits/chosen": 1.904350757598877,
|
|
"logits/rejected": 1.7881104946136475,
|
|
"loss": 5.2818,
|
|
"step": 120
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.004188536666333675,
|
|
"beta_dpo/beta_used_raw": -0.0010998877696692944,
|
|
"beta_dpo/gap_mean": 5.929210662841797,
|
|
"beta_dpo/gap_std": 12.944700241088867,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.2534031413612565,
|
|
"grad_norm": 11.07016372680664,
|
|
"learning_rate": 4.6604720940421207e-07,
|
|
"logits/chosen": 1.178752064704895,
|
|
"logits/rejected": 1.4918150901794434,
|
|
"loss": 5.4539,
|
|
"step": 121
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013262229040265083,
|
|
"beta_dpo/beta_used_raw": 0.010668408125638962,
|
|
"beta_dpo/gap_mean": 6.120506286621094,
|
|
"beta_dpo/gap_std": 13.898996353149414,
|
|
"beta_dpo/mask_keep_frac": 0.59375,
|
|
"epoch": 0.2554973821989529,
|
|
"grad_norm": 27.32390785217285,
|
|
"learning_rate": 4.651202430186092e-07,
|
|
"logits/chosen": 1.6907187700271606,
|
|
"logits/rejected": 2.047647714614868,
|
|
"loss": 5.1692,
|
|
"step": 122
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02133483625948429,
|
|
"beta_dpo/beta_used_raw": 0.01663101837038994,
|
|
"beta_dpo/gap_mean": 6.646225929260254,
|
|
"beta_dpo/gap_std": 14.434886932373047,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.25759162303664923,
|
|
"grad_norm": 43.08592987060547,
|
|
"learning_rate": 4.6418174038722924e-07,
|
|
"logits/chosen": 1.7687194347381592,
|
|
"logits/rejected": 1.6279195547103882,
|
|
"loss": 4.832,
|
|
"step": 123
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010348731651902199,
|
|
"beta_dpo/beta_used_raw": 0.00513859186321497,
|
|
"beta_dpo/gap_mean": 7.770158290863037,
|
|
"beta_dpo/gap_std": 14.987278938293457,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.25968586387434556,
|
|
"grad_norm": 27.494583129882812,
|
|
"learning_rate": 4.6323175183912023e-07,
|
|
"logits/chosen": 1.4107732772827148,
|
|
"logits/rejected": 1.5245213508605957,
|
|
"loss": 5.1637,
|
|
"step": 124
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01517592091113329,
|
|
"beta_dpo/beta_used_raw": 0.009712353348731995,
|
|
"beta_dpo/gap_mean": 7.219732284545898,
|
|
"beta_dpo/gap_std": 14.99057388305664,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.2617801047120419,
|
|
"grad_norm": 35.12456130981445,
|
|
"learning_rate": 4.6227032831928483e-07,
|
|
"logits/chosen": 1.578749418258667,
|
|
"logits/rejected": 1.5831409692764282,
|
|
"loss": 5.0641,
|
|
"step": 125
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014101858250796795,
|
|
"beta_dpo/beta_used_raw": 0.006124613806605339,
|
|
"beta_dpo/gap_mean": 8.286654472351074,
|
|
"beta_dpo/gap_std": 15.358405113220215,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.2638743455497382,
|
|
"grad_norm": 30.269481658935547,
|
|
"learning_rate": 4.612975213859487e-07,
|
|
"logits/chosen": 2.050217866897583,
|
|
"logits/rejected": 2.2472779750823975,
|
|
"loss": 4.999,
|
|
"step": 126
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01756615750491619,
|
|
"beta_dpo/beta_used_raw": 0.016226252540946007,
|
|
"beta_dpo/gap_mean": 8.151988983154297,
|
|
"beta_dpo/gap_std": 15.966252326965332,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.26596858638743454,
|
|
"grad_norm": 35.137901306152344,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 1.3663495779037476,
|
|
"logits/rejected": 1.3178493976593018,
|
|
"loss": 4.8628,
|
|
"step": 127
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.012780067510902882,
|
|
"beta_dpo/beta_used_raw": 0.012262159027159214,
|
|
"beta_dpo/gap_mean": 9.522705078125,
|
|
"beta_dpo/gap_std": 15.977328300476074,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.2680628272251309,
|
|
"grad_norm": 26.821195602416992,
|
|
"learning_rate": 4.5931796656116837e-07,
|
|
"logits/chosen": 1.4073151350021362,
|
|
"logits/rejected": 1.3889837265014648,
|
|
"loss": 5.0196,
|
|
"step": 128
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013786004856228828,
|
|
"beta_dpo/beta_used_raw": 0.006445377133786678,
|
|
"beta_dpo/gap_mean": 9.859175682067871,
|
|
"beta_dpo/gap_std": 16.836477279663086,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.27015706806282724,
|
|
"grad_norm": 30.80266761779785,
|
|
"learning_rate": 4.5831132482724193e-07,
|
|
"logits/chosen": 1.4743335247039795,
|
|
"logits/rejected": 1.6113927364349365,
|
|
"loss": 4.9661,
|
|
"step": 129
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013385320082306862,
|
|
"beta_dpo/beta_used_raw": 0.007840610109269619,
|
|
"beta_dpo/gap_mean": 9.868795394897461,
|
|
"beta_dpo/gap_std": 16.45522117614746,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.27225130890052357,
|
|
"grad_norm": 31.25225067138672,
|
|
"learning_rate": 4.5729351198915705e-07,
|
|
"logits/chosen": 1.6542197465896606,
|
|
"logits/rejected": 1.9003280401229858,
|
|
"loss": 4.9968,
|
|
"step": 130
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011940027587115765,
|
|
"beta_dpo/beta_used_raw": 0.007653850130736828,
|
|
"beta_dpo/gap_mean": 8.79969596862793,
|
|
"beta_dpo/gap_std": 16.86931037902832,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.2743455497382199,
|
|
"grad_norm": 26.647390365600586,
|
|
"learning_rate": 4.5626458262912735e-07,
|
|
"logits/chosen": 1.2588789463043213,
|
|
"logits/rejected": 1.1883281469345093,
|
|
"loss": 5.1746,
|
|
"step": 131
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.020324911922216415,
|
|
"beta_dpo/beta_used_raw": 0.009501131251454353,
|
|
"beta_dpo/gap_mean": 8.400039672851562,
|
|
"beta_dpo/gap_std": 17.63036346435547,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.2764397905759162,
|
|
"grad_norm": 42.30400085449219,
|
|
"learning_rate": 4.5522459192551166e-07,
|
|
"logits/chosen": 1.688079833984375,
|
|
"logits/rejected": 1.7301361560821533,
|
|
"loss": 4.7249,
|
|
"step": 132
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.027873020619153976,
|
|
"beta_dpo/beta_used_raw": 0.021997135132551193,
|
|
"beta_dpo/gap_mean": 10.159527778625488,
|
|
"beta_dpo/gap_std": 18.784109115600586,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.27853403141361255,
|
|
"grad_norm": 44.038734436035156,
|
|
"learning_rate": 4.541735956498554e-07,
|
|
"logits/chosen": 1.7651947736740112,
|
|
"logits/rejected": 1.7092256546020508,
|
|
"loss": 4.3015,
|
|
"step": 133
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009234755299985409,
|
|
"beta_dpo/beta_used_raw": 0.00529387965798378,
|
|
"beta_dpo/gap_mean": 9.394585609436035,
|
|
"beta_dpo/gap_std": 17.975656509399414,
|
|
"beta_dpo/mask_keep_frac": 0.90625,
|
|
"epoch": 0.2806282722513089,
|
|
"grad_norm": 26.31687355041504,
|
|
"learning_rate": 4.5311165016389914e-07,
|
|
"logits/chosen": 2.0336687564849854,
|
|
"logits/rejected": 2.0945892333984375,
|
|
"loss": 5.1635,
|
|
"step": 134
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.018918566405773163,
|
|
"beta_dpo/beta_used_raw": 0.01316812727600336,
|
|
"beta_dpo/gap_mean": 9.845601081848145,
|
|
"beta_dpo/gap_std": 16.85881805419922,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.28272251308900526,
|
|
"grad_norm": 37.51054382324219,
|
|
"learning_rate": 4.520388124165564e-07,
|
|
"logits/chosen": 1.188499927520752,
|
|
"logits/rejected": 0.9699570536613464,
|
|
"loss": 4.7303,
|
|
"step": 135
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009488210082054138,
|
|
"beta_dpo/beta_used_raw": 0.004158595576882362,
|
|
"beta_dpo/gap_mean": 10.281312942504883,
|
|
"beta_dpo/gap_std": 17.496814727783203,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.2848167539267016,
|
|
"grad_norm": 22.803377151489258,
|
|
"learning_rate": 4.5095513994085974e-07,
|
|
"logits/chosen": 1.2178832292556763,
|
|
"logits/rejected": 1.4434417486190796,
|
|
"loss": 5.1349,
|
|
"step": 136
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01097769383341074,
|
|
"beta_dpo/beta_used_raw": 0.00757699366658926,
|
|
"beta_dpo/gap_mean": 9.332605361938477,
|
|
"beta_dpo/gap_std": 18.028961181640625,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.2869109947643979,
|
|
"grad_norm": 26.255542755126953,
|
|
"learning_rate": 4.498606908508753e-07,
|
|
"logits/chosen": 1.823258876800537,
|
|
"logits/rejected": 1.6405431032180786,
|
|
"loss": 5.0908,
|
|
"step": 137
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008300930261611938,
|
|
"beta_dpo/beta_used_raw": -0.002543874317780137,
|
|
"beta_dpo/gap_mean": 9.568643569946289,
|
|
"beta_dpo/gap_std": 18.384599685668945,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.28900523560209423,
|
|
"grad_norm": 22.109146118164062,
|
|
"learning_rate": 4.487555238385862e-07,
|
|
"logits/chosen": 1.5952801704406738,
|
|
"logits/rejected": 1.5838592052459717,
|
|
"loss": 5.217,
|
|
"step": 138
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011962666176259518,
|
|
"beta_dpo/beta_used_raw": 0.004802809562534094,
|
|
"beta_dpo/gap_mean": 7.955426216125488,
|
|
"beta_dpo/gap_std": 19.22389793395996,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.29109947643979056,
|
|
"grad_norm": 25.960317611694336,
|
|
"learning_rate": 4.476396981707453e-07,
|
|
"logits/chosen": 1.4421442747116089,
|
|
"logits/rejected": 1.5927166938781738,
|
|
"loss": 5.1662,
|
|
"step": 139
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02191855013370514,
|
|
"beta_dpo/beta_used_raw": 0.017846662551164627,
|
|
"beta_dpo/gap_mean": 8.63882064819336,
|
|
"beta_dpo/gap_std": 19.29082679748535,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.2931937172774869,
|
|
"grad_norm": 49.216670989990234,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 1.5183682441711426,
|
|
"logits/rejected": 1.5717380046844482,
|
|
"loss": 4.6103,
|
|
"step": 140
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014003738760948181,
|
|
"beta_dpo/beta_used_raw": 0.006822553928941488,
|
|
"beta_dpo/gap_mean": 10.601947784423828,
|
|
"beta_dpo/gap_std": 19.063888549804688,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.29528795811518327,
|
|
"grad_norm": 43.43609619140625,
|
|
"learning_rate": 4.453763107901675e-07,
|
|
"logits/chosen": 1.4985511302947998,
|
|
"logits/rejected": 1.5825482606887817,
|
|
"loss": 4.9577,
|
|
"step": 141
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022530585527420044,
|
|
"beta_dpo/beta_used_raw": 0.0178590789437294,
|
|
"beta_dpo/gap_mean": 10.459101676940918,
|
|
"beta_dpo/gap_std": 20.326278686523438,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.2973821989528796,
|
|
"grad_norm": 39.85797882080078,
|
|
"learning_rate": 4.4422887045602674e-07,
|
|
"logits/chosen": 2.03916335105896,
|
|
"logits/rejected": 1.778942584991455,
|
|
"loss": 4.5715,
|
|
"step": 142
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.016412286087870598,
|
|
"beta_dpo/beta_used_raw": 0.009996837005019188,
|
|
"beta_dpo/gap_mean": 11.491534233093262,
|
|
"beta_dpo/gap_std": 21.220121383666992,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.2994764397905759,
|
|
"grad_norm": 45.14045333862305,
|
|
"learning_rate": 4.4307101421701755e-07,
|
|
"logits/chosen": 1.4854329824447632,
|
|
"logits/rejected": 1.3263810873031616,
|
|
"loss": 4.7507,
|
|
"step": 143
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009764298796653748,
|
|
"beta_dpo/beta_used_raw": -0.003037895541638136,
|
|
"beta_dpo/gap_mean": 12.049786567687988,
|
|
"beta_dpo/gap_std": 21.212291717529297,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.30157068062827225,
|
|
"grad_norm": 27.531478881835938,
|
|
"learning_rate": 4.419028041654559e-07,
|
|
"logits/chosen": 1.4089610576629639,
|
|
"logits/rejected": 1.3612356185913086,
|
|
"loss": 5.076,
|
|
"step": 144
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015810877084732056,
|
|
"beta_dpo/beta_used_raw": 0.0027779447846114635,
|
|
"beta_dpo/gap_mean": 12.608784675598145,
|
|
"beta_dpo/gap_std": 21.368688583374023,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.3036649214659686,
|
|
"grad_norm": 30.809558868408203,
|
|
"learning_rate": 4.4072430294890166e-07,
|
|
"logits/chosen": 1.900479793548584,
|
|
"logits/rejected": 1.9564039707183838,
|
|
"loss": 4.7396,
|
|
"step": 145
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.005745714530348778,
|
|
"beta_dpo/beta_used_raw": -0.005364367738366127,
|
|
"beta_dpo/gap_mean": 11.638813018798828,
|
|
"beta_dpo/gap_std": 20.36126708984375,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.3057591623036649,
|
|
"grad_norm": 19.06623077392578,
|
|
"learning_rate": 4.395355737667985e-07,
|
|
"logits/chosen": 1.4604260921478271,
|
|
"logits/rejected": 1.6670466661453247,
|
|
"loss": 5.2938,
|
|
"step": 146
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01017170213162899,
|
|
"beta_dpo/beta_used_raw": -0.009537998586893082,
|
|
"beta_dpo/gap_mean": 10.316466331481934,
|
|
"beta_dpo/gap_std": 20.63652992248535,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.3078534031413613,
|
|
"grad_norm": 28.077390670776367,
|
|
"learning_rate": 4.3833668036708483e-07,
|
|
"logits/chosen": 1.601604700088501,
|
|
"logits/rejected": 1.5656179189682007,
|
|
"loss": 5.1406,
|
|
"step": 147
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013707359321415424,
|
|
"beta_dpo/beta_used_raw": 0.004445759579539299,
|
|
"beta_dpo/gap_mean": 10.2113618850708,
|
|
"beta_dpo/gap_std": 21.985990524291992,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.3099476439790576,
|
|
"grad_norm": 33.60511016845703,
|
|
"learning_rate": 4.3712768704277524e-07,
|
|
"logits/chosen": 1.3828201293945312,
|
|
"logits/rejected": 1.3478338718414307,
|
|
"loss": 5.0395,
|
|
"step": 148
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008657879196107388,
|
|
"beta_dpo/beta_used_raw": 0.0010744923492893577,
|
|
"beta_dpo/gap_mean": 11.151147842407227,
|
|
"beta_dpo/gap_std": 20.73192024230957,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.31204188481675393,
|
|
"grad_norm": 28.912668228149414,
|
|
"learning_rate": 4.3590865862851263e-07,
|
|
"logits/chosen": 2.108185291290283,
|
|
"logits/rejected": 1.9332281351089478,
|
|
"loss": 5.2272,
|
|
"step": 149
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01538037694990635,
|
|
"beta_dpo/beta_used_raw": 0.010280387476086617,
|
|
"beta_dpo/gap_mean": 11.221325874328613,
|
|
"beta_dpo/gap_std": 20.35310173034668,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.31413612565445026,
|
|
"grad_norm": 42.4512939453125,
|
|
"learning_rate": 4.346796604970912e-07,
|
|
"logits/chosen": 1.8120979070663452,
|
|
"logits/rejected": 1.7387409210205078,
|
|
"loss": 4.8116,
|
|
"step": 150
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028699517250061035,
|
|
"beta_dpo/beta_used_raw": 0.02786320261657238,
|
|
"beta_dpo/gap_mean": 12.776216506958008,
|
|
"beta_dpo/gap_std": 21.87693977355957,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.3162303664921466,
|
|
"grad_norm": 46.40315628051758,
|
|
"learning_rate": 4.3344075855595097e-07,
|
|
"logits/chosen": 1.5828508138656616,
|
|
"logits/rejected": 1.6035374402999878,
|
|
"loss": 4.197,
|
|
"step": 151
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014542932622134686,
|
|
"beta_dpo/beta_used_raw": 9.965314529836178e-05,
|
|
"beta_dpo/gap_mean": 13.169672966003418,
|
|
"beta_dpo/gap_std": 21.826007843017578,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.3183246073298429,
|
|
"grad_norm": 33.306884765625,
|
|
"learning_rate": 4.3219201924364323e-07,
|
|
"logits/chosen": 1.3182780742645264,
|
|
"logits/rejected": 1.7138738632202148,
|
|
"loss": 4.8325,
|
|
"step": 152
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02487529069185257,
|
|
"beta_dpo/beta_used_raw": 0.022432954981923103,
|
|
"beta_dpo/gap_mean": 15.099176406860352,
|
|
"beta_dpo/gap_std": 21.7235050201416,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.3204188481675393,
|
|
"grad_norm": 43.83867263793945,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 1.5923478603363037,
|
|
"logits/rejected": 1.5436244010925293,
|
|
"loss": 4.2459,
|
|
"step": 153
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019335608929395676,
|
|
"beta_dpo/beta_used_raw": 0.007598421536386013,
|
|
"beta_dpo/gap_mean": 15.192681312561035,
|
|
"beta_dpo/gap_std": 23.77366828918457,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.3225130890052356,
|
|
"grad_norm": 33.363792419433594,
|
|
"learning_rate": 4.2966529689388064e-07,
|
|
"logits/chosen": 1.4466509819030762,
|
|
"logits/rejected": 1.4517470598220825,
|
|
"loss": 4.5467,
|
|
"step": 154
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.018129050731658936,
|
|
"beta_dpo/beta_used_raw": 0.009811091236770153,
|
|
"beta_dpo/gap_mean": 13.158918380737305,
|
|
"beta_dpo/gap_std": 22.92918586730957,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.32460732984293195,
|
|
"grad_norm": 34.454673767089844,
|
|
"learning_rate": 4.2838744935687716e-07,
|
|
"logits/chosen": 1.3940773010253906,
|
|
"logits/rejected": 1.3722490072250366,
|
|
"loss": 4.5952,
|
|
"step": 155
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022653408348560333,
|
|
"beta_dpo/beta_used_raw": 0.017730802297592163,
|
|
"beta_dpo/gap_mean": 13.508572578430176,
|
|
"beta_dpo/gap_std": 24.86406135559082,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.3267015706806283,
|
|
"grad_norm": 49.66926574707031,
|
|
"learning_rate": 4.271000354423425e-07,
|
|
"logits/chosen": 1.7816330194473267,
|
|
"logits/rejected": 1.626936912536621,
|
|
"loss": 4.4733,
|
|
"step": 156
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00911460816860199,
|
|
"beta_dpo/beta_used_raw": -0.00761047936975956,
|
|
"beta_dpo/gap_mean": 13.641767501831055,
|
|
"beta_dpo/gap_std": 25.110754013061523,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.3287958115183246,
|
|
"grad_norm": 28.902727127075195,
|
|
"learning_rate": 4.258031241903777e-07,
|
|
"logits/chosen": 1.8037209510803223,
|
|
"logits/rejected": 1.9432283639907837,
|
|
"loss": 5.0592,
|
|
"step": 157
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022043395787477493,
|
|
"beta_dpo/beta_used_raw": 0.013519931584596634,
|
|
"beta_dpo/gap_mean": 12.658366203308105,
|
|
"beta_dpo/gap_std": 24.050304412841797,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.3308900523560209,
|
|
"grad_norm": 49.30216979980469,
|
|
"learning_rate": 4.2449678515039743e-07,
|
|
"logits/chosen": 1.9826464653015137,
|
|
"logits/rejected": 2.0838711261749268,
|
|
"loss": 4.5538,
|
|
"step": 158
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013070004992187023,
|
|
"beta_dpo/beta_used_raw": 0.00028916902374476194,
|
|
"beta_dpo/gap_mean": 11.834725379943848,
|
|
"beta_dpo/gap_std": 25.340810775756836,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.33298429319371725,
|
|
"grad_norm": 54.22214126586914,
|
|
"learning_rate": 4.2318108837739986e-07,
|
|
"logits/chosen": 1.4999477863311768,
|
|
"logits/rejected": 1.369155764579773,
|
|
"loss": 5.1162,
|
|
"step": 159
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024159716442227364,
|
|
"beta_dpo/beta_used_raw": 0.009181090630590916,
|
|
"beta_dpo/gap_mean": 13.555554389953613,
|
|
"beta_dpo/gap_std": 24.396202087402344,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.33507853403141363,
|
|
"grad_norm": 49.655757904052734,
|
|
"learning_rate": 4.218561044282098e-07,
|
|
"logits/chosen": 1.9025671482086182,
|
|
"logits/rejected": 1.5475167036056519,
|
|
"loss": 4.3937,
|
|
"step": 160
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026369977742433548,
|
|
"beta_dpo/beta_used_raw": 0.018488148227334023,
|
|
"beta_dpo/gap_mean": 14.321226119995117,
|
|
"beta_dpo/gap_std": 25.79440689086914,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.33717277486910996,
|
|
"grad_norm": 56.462730407714844,
|
|
"learning_rate": 4.2052190435769554e-07,
|
|
"logits/chosen": 1.4102540016174316,
|
|
"logits/rejected": 1.2628462314605713,
|
|
"loss": 4.4445,
|
|
"step": 161
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026266392320394516,
|
|
"beta_dpo/beta_used_raw": 0.02344740927219391,
|
|
"beta_dpo/gap_mean": 15.67480182647705,
|
|
"beta_dpo/gap_std": 26.169410705566406,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.3392670157068063,
|
|
"grad_norm": 64.92961120605469,
|
|
"learning_rate": 4.1917855971495763e-07,
|
|
"logits/chosen": 1.5759161710739136,
|
|
"logits/rejected": 1.4259589910507202,
|
|
"loss": 4.3731,
|
|
"step": 162
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010873702354729176,
|
|
"beta_dpo/beta_used_raw": -0.004741042852401733,
|
|
"beta_dpo/gap_mean": 15.373876571655273,
|
|
"beta_dpo/gap_std": 24.578004837036133,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.3413612565445026,
|
|
"grad_norm": 35.616493225097656,
|
|
"learning_rate": 4.1782614253949255e-07,
|
|
"logits/chosen": 1.7332031726837158,
|
|
"logits/rejected": 1.7425578832626343,
|
|
"loss": 4.9473,
|
|
"step": 163
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.025458887219429016,
|
|
"beta_dpo/beta_used_raw": 0.015234654769301414,
|
|
"beta_dpo/gap_mean": 14.888280868530273,
|
|
"beta_dpo/gap_std": 24.105310440063477,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.34345549738219894,
|
|
"grad_norm": 43.64400863647461,
|
|
"learning_rate": 4.164647253573289e-07,
|
|
"logits/chosen": 1.4580204486846924,
|
|
"logits/rejected": 1.6339696645736694,
|
|
"loss": 4.1504,
|
|
"step": 164
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010639484040439129,
|
|
"beta_dpo/beta_used_raw": -0.005685774143785238,
|
|
"beta_dpo/gap_mean": 14.408177375793457,
|
|
"beta_dpo/gap_std": 23.938827514648438,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.34554973821989526,
|
|
"grad_norm": 29.951501846313477,
|
|
"learning_rate": 4.1509438117713863e-07,
|
|
"logits/chosen": 2.0571203231811523,
|
|
"logits/rejected": 2.0520873069763184,
|
|
"loss": 4.9575,
|
|
"step": 165
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013327265158295631,
|
|
"beta_dpo/beta_used_raw": 0.0013559209182858467,
|
|
"beta_dpo/gap_mean": 12.96614933013916,
|
|
"beta_dpo/gap_std": 25.120412826538086,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.34764397905759165,
|
|
"grad_norm": 36.66180419921875,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 1.6311808824539185,
|
|
"logits/rejected": 1.59664785861969,
|
|
"loss": 4.864,
|
|
"step": 166
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03245996683835983,
|
|
"beta_dpo/beta_used_raw": 0.031837042421102524,
|
|
"beta_dpo/gap_mean": 12.544686317443848,
|
|
"beta_dpo/gap_std": 25.848405838012695,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.34973821989528797,
|
|
"grad_norm": 92.97169494628906,
|
|
"learning_rate": 4.123272062470633e-07,
|
|
"logits/chosen": 1.7561887502670288,
|
|
"logits/rejected": 1.5244758129119873,
|
|
"loss": 4.5144,
|
|
"step": 167
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.029823748394846916,
|
|
"beta_dpo/beta_used_raw": 0.022257408127188683,
|
|
"beta_dpo/gap_mean": 15.493486404418945,
|
|
"beta_dpo/gap_std": 25.659543991088867,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.3518324607329843,
|
|
"grad_norm": 77.38569641113281,
|
|
"learning_rate": 4.1093052389237174e-07,
|
|
"logits/chosen": 1.3179136514663696,
|
|
"logits/rejected": 1.1715956926345825,
|
|
"loss": 4.0093,
|
|
"step": 168
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01944730058312416,
|
|
"beta_dpo/beta_used_raw": 0.01372382789850235,
|
|
"beta_dpo/gap_mean": 16.43326187133789,
|
|
"beta_dpo/gap_std": 25.575986862182617,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.3539267015706806,
|
|
"grad_norm": 51.683170318603516,
|
|
"learning_rate": 4.0952521132208267e-07,
|
|
"logits/chosen": 1.7002696990966797,
|
|
"logits/rejected": 1.8345009088516235,
|
|
"loss": 4.4362,
|
|
"step": 169
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.003313018474727869,
|
|
"beta_dpo/beta_used_raw": -0.008983142673969269,
|
|
"beta_dpo/gap_mean": 18.35196304321289,
|
|
"beta_dpo/gap_std": 25.07719612121582,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.35602094240837695,
|
|
"grad_norm": 15.952840805053711,
|
|
"learning_rate": 4.081113438988443e-07,
|
|
"logits/chosen": 1.7776952981948853,
|
|
"logits/rejected": 1.684997797012329,
|
|
"loss": 5.3302,
|
|
"step": 170
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015446186996996403,
|
|
"beta_dpo/beta_used_raw": -0.0005397915374487638,
|
|
"beta_dpo/gap_mean": 17.90646743774414,
|
|
"beta_dpo/gap_std": 25.070568084716797,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.3581151832460733,
|
|
"grad_norm": 43.073421478271484,
|
|
"learning_rate": 4.0668899744407567e-07,
|
|
"logits/chosen": 1.6446658372879028,
|
|
"logits/rejected": 1.5069741010665894,
|
|
"loss": 4.6088,
|
|
"step": 171
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.009129172191023827,
|
|
"beta_dpo/beta_used_raw": -0.007493500132113695,
|
|
"beta_dpo/gap_mean": 15.301614761352539,
|
|
"beta_dpo/gap_std": 25.80316925048828,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.36020942408376966,
|
|
"grad_norm": 37.880577087402344,
|
|
"learning_rate": 4.0525824823390043e-07,
|
|
"logits/chosen": 1.5476915836334229,
|
|
"logits/rejected": 1.720083236694336,
|
|
"loss": 5.1515,
|
|
"step": 172
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022744204849004745,
|
|
"beta_dpo/beta_used_raw": 0.012280027382075787,
|
|
"beta_dpo/gap_mean": 14.178143501281738,
|
|
"beta_dpo/gap_std": 26.050079345703125,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.362303664921466,
|
|
"grad_norm": 45.3817024230957,
|
|
"learning_rate": 4.0381917299505686e-07,
|
|
"logits/chosen": 1.6695926189422607,
|
|
"logits/rejected": 1.337355136871338,
|
|
"loss": 4.4431,
|
|
"step": 173
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0274057500064373,
|
|
"beta_dpo/beta_used_raw": 0.015672199428081512,
|
|
"beta_dpo/gap_mean": 16.109161376953125,
|
|
"beta_dpo/gap_std": 25.606597900390625,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.3643979057591623,
|
|
"grad_norm": 50.70249557495117,
|
|
"learning_rate": 4.0237184890078243e-07,
|
|
"logits/chosen": 2.1374263763427734,
|
|
"logits/rejected": 1.9051423072814941,
|
|
"loss": 4.1047,
|
|
"step": 174
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022395484149456024,
|
|
"beta_dpo/beta_used_raw": 0.020498108118772507,
|
|
"beta_dpo/gap_mean": 16.25571632385254,
|
|
"beta_dpo/gap_std": 25.667404174804688,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.36649214659685864,
|
|
"grad_norm": 41.90084457397461,
|
|
"learning_rate": 4.00916353566676e-07,
|
|
"logits/chosen": 1.5944123268127441,
|
|
"logits/rejected": 1.6246697902679443,
|
|
"loss": 4.3686,
|
|
"step": 175
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0224157627671957,
|
|
"beta_dpo/beta_used_raw": 0.011731607839465141,
|
|
"beta_dpo/gap_mean": 13.99099349975586,
|
|
"beta_dpo/gap_std": 27.471248626708984,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.36858638743455496,
|
|
"grad_norm": 63.70330047607422,
|
|
"learning_rate": 3.994527650465352e-07,
|
|
"logits/chosen": 1.1375683546066284,
|
|
"logits/rejected": 1.2096847295761108,
|
|
"loss": 4.6342,
|
|
"step": 176
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01675129495561123,
|
|
"beta_dpo/beta_used_raw": 0.0006211861036717892,
|
|
"beta_dpo/gap_mean": 11.935150146484375,
|
|
"beta_dpo/gap_std": 28.26276397705078,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.3706806282722513,
|
|
"grad_norm": 45.043968200683594,
|
|
"learning_rate": 3.979811618281705e-07,
|
|
"logits/chosen": 1.7941234111785889,
|
|
"logits/rejected": 1.5880272388458252,
|
|
"loss": 4.8643,
|
|
"step": 177
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.025348788127303123,
|
|
"beta_dpo/beta_used_raw": 0.017674200236797333,
|
|
"beta_dpo/gap_mean": 14.811019897460938,
|
|
"beta_dpo/gap_std": 28.847448348999023,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.37277486910994767,
|
|
"grad_norm": 72.1207046508789,
|
|
"learning_rate": 3.9650162282919654e-07,
|
|
"logits/chosen": 1.5051298141479492,
|
|
"logits/rejected": 1.527164101600647,
|
|
"loss": 4.3474,
|
|
"step": 178
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024870071560144424,
|
|
"beta_dpo/beta_used_raw": 0.0016013816930353642,
|
|
"beta_dpo/gap_mean": 15.476740837097168,
|
|
"beta_dpo/gap_std": 27.874025344848633,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.374869109947644,
|
|
"grad_norm": 51.20316696166992,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 1.2467797994613647,
|
|
"logits/rejected": 1.2580769062042236,
|
|
"loss": 4.336,
|
|
"step": 179
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03025144338607788,
|
|
"beta_dpo/beta_used_raw": 0.0288880355656147,
|
|
"beta_dpo/gap_mean": 15.403278350830078,
|
|
"beta_dpo/gap_std": 27.956090927124023,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.3769633507853403,
|
|
"grad_norm": 47.122596740722656,
|
|
"learning_rate": 3.935190552834828e-07,
|
|
"logits/chosen": 1.592002034187317,
|
|
"logits/rejected": 1.4925694465637207,
|
|
"loss": 4.0441,
|
|
"step": 180
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.020815353840589523,
|
|
"beta_dpo/beta_used_raw": 0.016023779287934303,
|
|
"beta_dpo/gap_mean": 16.58497428894043,
|
|
"beta_dpo/gap_std": 27.86528205871582,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.37905759162303665,
|
|
"grad_norm": 44.79503631591797,
|
|
"learning_rate": 3.920161866827889e-07,
|
|
"logits/chosen": 1.3529762029647827,
|
|
"logits/rejected": 1.3037437200546265,
|
|
"loss": 4.3222,
|
|
"step": 181
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.023737944662570953,
|
|
"beta_dpo/beta_used_raw": 0.017001213505864143,
|
|
"beta_dpo/gap_mean": 17.020750045776367,
|
|
"beta_dpo/gap_std": 27.084413528442383,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.381151832460733,
|
|
"grad_norm": 51.928287506103516,
|
|
"learning_rate": 3.90505702185e-07,
|
|
"logits/chosen": 1.4569286108016968,
|
|
"logits/rejected": 1.4212331771850586,
|
|
"loss": 4.1784,
|
|
"step": 182
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01689002849161625,
|
|
"beta_dpo/beta_used_raw": 0.011375264264643192,
|
|
"beta_dpo/gap_mean": 18.06576919555664,
|
|
"beta_dpo/gap_std": 28.06887435913086,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.3832460732984293,
|
|
"grad_norm": 32.73753356933594,
|
|
"learning_rate": 3.889876827928156e-07,
|
|
"logits/chosen": 1.1345239877700806,
|
|
"logits/rejected": 1.2237826585769653,
|
|
"loss": 4.4976,
|
|
"step": 183
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02527700364589691,
|
|
"beta_dpo/beta_used_raw": 0.02064402773976326,
|
|
"beta_dpo/gap_mean": 20.417850494384766,
|
|
"beta_dpo/gap_std": 29.51577377319336,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.38534031413612563,
|
|
"grad_norm": 52.284393310546875,
|
|
"learning_rate": 3.874622099130087e-07,
|
|
"logits/chosen": 1.6561375856399536,
|
|
"logits/rejected": 1.639233946800232,
|
|
"loss": 4.2447,
|
|
"step": 184
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.006265235599130392,
|
|
"beta_dpo/beta_used_raw": -0.01659151166677475,
|
|
"beta_dpo/gap_mean": 20.119701385498047,
|
|
"beta_dpo/gap_std": 30.129091262817383,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.387434554973822,
|
|
"grad_norm": 34.30731201171875,
|
|
"learning_rate": 3.859293653520604e-07,
|
|
"logits/chosen": 1.819935917854309,
|
|
"logits/rejected": 1.873971939086914,
|
|
"loss": 5.1,
|
|
"step": 185
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.016711510717868805,
|
|
"beta_dpo/beta_used_raw": 0.00014704966451972723,
|
|
"beta_dpo/gap_mean": 17.954086303710938,
|
|
"beta_dpo/gap_std": 29.141178131103516,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.38952879581151834,
|
|
"grad_norm": 43.99291229248047,
|
|
"learning_rate": 3.8438923131177237e-07,
|
|
"logits/chosen": 1.7304484844207764,
|
|
"logits/rejected": 1.6357572078704834,
|
|
"loss": 4.5823,
|
|
"step": 186
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010872665792703629,
|
|
"beta_dpo/beta_used_raw": -0.0031126337125897408,
|
|
"beta_dpo/gap_mean": 16.949188232421875,
|
|
"beta_dpo/gap_std": 30.313583374023438,
|
|
"beta_dpo/mask_keep_frac": 0.59375,
|
|
"epoch": 0.39162303664921466,
|
|
"grad_norm": 29.05012321472168,
|
|
"learning_rate": 3.828418903848593e-07,
|
|
"logits/chosen": 1.5062894821166992,
|
|
"logits/rejected": 1.626598834991455,
|
|
"loss": 4.9126,
|
|
"step": 187
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.021171947941184044,
|
|
"beta_dpo/beta_used_raw": 0.010028712451457977,
|
|
"beta_dpo/gap_mean": 16.50074577331543,
|
|
"beta_dpo/gap_std": 30.938051223754883,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.393717277486911,
|
|
"grad_norm": 45.898658752441406,
|
|
"learning_rate": 3.812874255505191e-07,
|
|
"logits/chosen": 1.5269906520843506,
|
|
"logits/rejected": 1.3458209037780762,
|
|
"loss": 4.5338,
|
|
"step": 188
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03484039008617401,
|
|
"beta_dpo/beta_used_raw": 0.022049371153116226,
|
|
"beta_dpo/gap_mean": 17.477540969848633,
|
|
"beta_dpo/gap_std": 29.527908325195312,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.3958115183246073,
|
|
"grad_norm": 70.90847778320312,
|
|
"learning_rate": 3.797259201699833e-07,
|
|
"logits/chosen": 1.5551743507385254,
|
|
"logits/rejected": 1.6014527082443237,
|
|
"loss": 3.8358,
|
|
"step": 189
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0200703926384449,
|
|
"beta_dpo/beta_used_raw": 0.012458120472729206,
|
|
"beta_dpo/gap_mean": 18.339256286621094,
|
|
"beta_dpo/gap_std": 28.938512802124023,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.39790575916230364,
|
|
"grad_norm": 56.81261444091797,
|
|
"learning_rate": 3.781574579820464e-07,
|
|
"logits/chosen": 0.9362454414367676,
|
|
"logits/rejected": 0.9899096488952637,
|
|
"loss": 4.414,
|
|
"step": 190
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01700519025325775,
|
|
"beta_dpo/beta_used_raw": -0.005064443219453096,
|
|
"beta_dpo/gap_mean": 18.290935516357422,
|
|
"beta_dpo/gap_std": 30.99585723876953,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.4,
|
|
"grad_norm": 56.54753494262695,
|
|
"learning_rate": 3.765821230985757e-07,
|
|
"logits/chosen": 1.404714822769165,
|
|
"logits/rejected": 1.5215625762939453,
|
|
"loss": 4.8064,
|
|
"step": 191
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031894296407699585,
|
|
"beta_dpo/beta_used_raw": 0.020558489486575127,
|
|
"beta_dpo/gap_mean": 16.527379989624023,
|
|
"beta_dpo/gap_std": 31.373319625854492,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.40209424083769635,
|
|
"grad_norm": 70.80916595458984,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 1.5337512493133545,
|
|
"logits/rejected": 1.7164283990859985,
|
|
"loss": 4.1842,
|
|
"step": 192
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01636136882007122,
|
|
"beta_dpo/beta_used_raw": 0.005046369507908821,
|
|
"beta_dpo/gap_mean": 15.377167701721191,
|
|
"beta_dpo/gap_std": 31.938879013061523,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.4041884816753927,
|
|
"grad_norm": 55.80176544189453,
|
|
"learning_rate": 3.734111735307796e-07,
|
|
"logits/chosen": 1.7271709442138672,
|
|
"logits/rejected": 1.558451533317566,
|
|
"loss": 4.6877,
|
|
"step": 193
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007611277513206005,
|
|
"beta_dpo/beta_used_raw": -0.01890621893107891,
|
|
"beta_dpo/gap_mean": 14.705482482910156,
|
|
"beta_dpo/gap_std": 30.904098510742188,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.406282722513089,
|
|
"grad_norm": 22.856273651123047,
|
|
"learning_rate": 3.7181572889485623e-07,
|
|
"logits/chosen": 1.3973853588104248,
|
|
"logits/rejected": 1.4764728546142578,
|
|
"loss": 5.1599,
|
|
"step": 194
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031484756618738174,
|
|
"beta_dpo/beta_used_raw": 0.0071922894567251205,
|
|
"beta_dpo/gap_mean": 13.331430435180664,
|
|
"beta_dpo/gap_std": 30.900182723999023,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.4083769633507853,
|
|
"grad_norm": 66.02396392822266,
|
|
"learning_rate": 3.7021375165108377e-07,
|
|
"logits/chosen": 1.272679328918457,
|
|
"logits/rejected": 1.2566474676132202,
|
|
"loss": 4.3933,
|
|
"step": 195
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03322502225637436,
|
|
"beta_dpo/beta_used_raw": 0.02791755273938179,
|
|
"beta_dpo/gap_mean": 14.905118942260742,
|
|
"beta_dpo/gap_std": 30.485837936401367,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.41047120418848165,
|
|
"grad_norm": 79.28156280517578,
|
|
"learning_rate": 3.6860532770864005e-07,
|
|
"logits/chosen": 1.275534749031067,
|
|
"logits/rejected": 1.4435292482376099,
|
|
"loss": 4.0546,
|
|
"step": 196
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04939180985093117,
|
|
"beta_dpo/beta_used_raw": 0.043553970754146576,
|
|
"beta_dpo/gap_mean": 18.400535583496094,
|
|
"beta_dpo/gap_std": 30.686927795410156,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.41256544502617803,
|
|
"grad_norm": 67.8173828125,
|
|
"learning_rate": 3.6699054332241985e-07,
|
|
"logits/chosen": 1.38494873046875,
|
|
"logits/rejected": 1.254716157913208,
|
|
"loss": 3.338,
|
|
"step": 197
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022166196256875992,
|
|
"beta_dpo/beta_used_raw": 0.007883399724960327,
|
|
"beta_dpo/gap_mean": 20.444957733154297,
|
|
"beta_dpo/gap_std": 32.35297393798828,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.41465968586387436,
|
|
"grad_norm": 60.04436492919922,
|
|
"learning_rate": 3.653694850884091e-07,
|
|
"logits/chosen": 1.9333720207214355,
|
|
"logits/rejected": 2.020900011062622,
|
|
"loss": 4.4855,
|
|
"step": 198
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02409629337489605,
|
|
"beta_dpo/beta_used_raw": 0.021340614184737206,
|
|
"beta_dpo/gap_mean": 19.7289981842041,
|
|
"beta_dpo/gap_std": 33.021812438964844,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.4167539267015707,
|
|
"grad_norm": 47.563514709472656,
|
|
"learning_rate": 3.6374223993904124e-07,
|
|
"logits/chosen": 0.8853669762611389,
|
|
"logits/rejected": 0.8789573907852173,
|
|
"loss": 4.2058,
|
|
"step": 199
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019850242882966995,
|
|
"beta_dpo/beta_used_raw": 0.0021575437858700752,
|
|
"beta_dpo/gap_mean": 18.26460075378418,
|
|
"beta_dpo/gap_std": 35.18665313720703,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.418848167539267,
|
|
"grad_norm": 75.98006439208984,
|
|
"learning_rate": 3.621088951385353e-07,
|
|
"logits/chosen": 1.4998607635498047,
|
|
"logits/rejected": 1.4999333620071411,
|
|
"loss": 4.6811,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"eval_beta_dpo/beta_used": 0.027261212468147278,
|
|
"eval_beta_dpo/beta_used_raw": 0.011497409082949162,
|
|
"eval_beta_dpo/gap_mean": 17.349489212036133,
|
|
"eval_beta_dpo/gap_std": 36.29584884643555,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": 1.4600857496261597,
|
|
"eval_logits/rejected": 1.4735403060913086,
|
|
"eval_loss": 0.582222044467926,
|
|
"eval_runtime": 93.942,
|
|
"eval_samples_per_second": 21.29,
|
|
"eval_steps_per_second": 1.331,
|
|
"step": 200
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015480000525712967,
|
|
"beta_dpo/beta_used_raw": 0.0017268508672714233,
|
|
"beta_dpo/gap_mean": 16.916603088378906,
|
|
"beta_dpo/gap_std": 34.051475524902344,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.42094240837696334,
|
|
"grad_norm": 41.899105072021484,
|
|
"learning_rate": 3.604695382782159e-07,
|
|
"logits/chosen": 1.3517783880233765,
|
|
"logits/rejected": 1.4856456518173218,
|
|
"loss": 4.8087,
|
|
"step": 201
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03667040914297104,
|
|
"beta_dpo/beta_used_raw": 0.02494371309876442,
|
|
"beta_dpo/gap_mean": 18.696678161621094,
|
|
"beta_dpo/gap_std": 34.44628143310547,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.42303664921465967,
|
|
"grad_norm": 94.09333038330078,
|
|
"learning_rate": 3.588242572718162e-07,
|
|
"logits/chosen": 1.9142837524414062,
|
|
"logits/rejected": 1.8261678218841553,
|
|
"loss": 4.2233,
|
|
"step": 202
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.017151907086372375,
|
|
"beta_dpo/beta_used_raw": 0.00911116972565651,
|
|
"beta_dpo/gap_mean": 16.54568862915039,
|
|
"beta_dpo/gap_std": 32.38970184326172,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.42513089005235605,
|
|
"grad_norm": 44.56381607055664,
|
|
"learning_rate": 3.571731403507635e-07,
|
|
"logits/chosen": 1.4302637577056885,
|
|
"logits/rejected": 1.2982755899429321,
|
|
"loss": 4.5763,
|
|
"step": 203
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.034039054065942764,
|
|
"beta_dpo/beta_used_raw": 0.0323847234249115,
|
|
"beta_dpo/gap_mean": 18.076196670532227,
|
|
"beta_dpo/gap_std": 31.370433807373047,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.4272251308900524,
|
|
"grad_norm": 71.95513153076172,
|
|
"learning_rate": 3.5551627605944746e-07,
|
|
"logits/chosen": 2.1505026817321777,
|
|
"logits/rejected": 2.025639772415161,
|
|
"loss": 3.8071,
|
|
"step": 204
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.027348071336746216,
|
|
"beta_dpo/beta_used_raw": 0.006836746819317341,
|
|
"beta_dpo/gap_mean": 18.946754455566406,
|
|
"beta_dpo/gap_std": 31.32244110107422,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.4293193717277487,
|
|
"grad_norm": 45.75480651855469,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 1.419930100440979,
|
|
"logits/rejected": 1.7142930030822754,
|
|
"loss": 4.1724,
|
|
"step": 205
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.016552381217479706,
|
|
"beta_dpo/beta_used_raw": -0.0049156793393194675,
|
|
"beta_dpo/gap_mean": 19.863826751708984,
|
|
"beta_dpo/gap_std": 30.71218490600586,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.431413612565445,
|
|
"grad_norm": 41.705875396728516,
|
|
"learning_rate": 3.5218566107988867e-07,
|
|
"logits/chosen": 1.124336838722229,
|
|
"logits/rejected": 1.3756214380264282,
|
|
"loss": 4.77,
|
|
"step": 206
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015663469210267067,
|
|
"beta_dpo/beta_used_raw": 0.0052419002167880535,
|
|
"beta_dpo/gap_mean": 17.88925552368164,
|
|
"beta_dpo/gap_std": 31.518335342407227,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.43350785340314135,
|
|
"grad_norm": 69.29541015625,
|
|
"learning_rate": 3.505120890024195e-07,
|
|
"logits/chosen": 1.4753804206848145,
|
|
"logits/rejected": 1.621216058731079,
|
|
"loss": 4.8643,
|
|
"step": 207
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02139691449701786,
|
|
"beta_dpo/beta_used_raw": 0.005481313914060593,
|
|
"beta_dpo/gap_mean": 16.749000549316406,
|
|
"beta_dpo/gap_std": 32.0452880859375,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.4356020942408377,
|
|
"grad_norm": 42.13248825073242,
|
|
"learning_rate": 3.4883312676665534e-07,
|
|
"logits/chosen": 1.683328628540039,
|
|
"logits/rejected": 1.6666276454925537,
|
|
"loss": 4.4627,
|
|
"step": 208
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022991986945271492,
|
|
"beta_dpo/beta_used_raw": 0.0012511502718552947,
|
|
"beta_dpo/gap_mean": 16.64447784423828,
|
|
"beta_dpo/gap_std": 31.43779945373535,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.437696335078534,
|
|
"grad_norm": 47.651954650878906,
|
|
"learning_rate": 3.4714886441024573e-07,
|
|
"logits/chosen": 1.4982630014419556,
|
|
"logits/rejected": 1.2422916889190674,
|
|
"loss": 4.65,
|
|
"step": 209
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.023505035787820816,
|
|
"beta_dpo/beta_used_raw": 0.01351526565849781,
|
|
"beta_dpo/gap_mean": 16.755630493164062,
|
|
"beta_dpo/gap_std": 30.364093780517578,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.4397905759162304,
|
|
"grad_norm": 40.224185943603516,
|
|
"learning_rate": 3.454593922550693e-07,
|
|
"logits/chosen": 1.622258186340332,
|
|
"logits/rejected": 1.7734078168869019,
|
|
"loss": 4.4717,
|
|
"step": 210
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.017476221546530724,
|
|
"beta_dpo/beta_used_raw": 0.009140146896243095,
|
|
"beta_dpo/gap_mean": 18.972339630126953,
|
|
"beta_dpo/gap_std": 29.722959518432617,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.4418848167539267,
|
|
"grad_norm": 31.73094940185547,
|
|
"learning_rate": 3.4376480090239047e-07,
|
|
"logits/chosen": 1.476683497428894,
|
|
"logits/rejected": 1.5253487825393677,
|
|
"loss": 4.7,
|
|
"step": 211
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019394179806113243,
|
|
"beta_dpo/beta_used_raw": 0.015454288572072983,
|
|
"beta_dpo/gap_mean": 19.75035858154297,
|
|
"beta_dpo/gap_std": 29.714906692504883,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.44397905759162304,
|
|
"grad_norm": 41.0455322265625,
|
|
"learning_rate": 3.4206518122800055e-07,
|
|
"logits/chosen": 1.2970361709594727,
|
|
"logits/rejected": 1.37529456615448,
|
|
"loss": 4.3472,
|
|
"step": 212
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011834348551928997,
|
|
"beta_dpo/beta_used_raw": -0.017926108092069626,
|
|
"beta_dpo/gap_mean": 17.426942825317383,
|
|
"beta_dpo/gap_std": 29.695297241210938,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.44607329842931936,
|
|
"grad_norm": 33.91777038574219,
|
|
"learning_rate": 3.403606243773448e-07,
|
|
"logits/chosen": 1.5579262971878052,
|
|
"logits/rejected": 1.68187415599823,
|
|
"loss": 4.9313,
|
|
"step": 213
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01894894242286682,
|
|
"beta_dpo/beta_used_raw": 0.013838745653629303,
|
|
"beta_dpo/gap_mean": 15.725707054138184,
|
|
"beta_dpo/gap_std": 30.105939865112305,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.4481675392670157,
|
|
"grad_norm": 39.40108871459961,
|
|
"learning_rate": 3.3865122176063385e-07,
|
|
"logits/chosen": 1.7685400247573853,
|
|
"logits/rejected": 1.8661746978759766,
|
|
"loss": 4.5791,
|
|
"step": 214
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010954681783914566,
|
|
"beta_dpo/beta_used_raw": -0.014796811155974865,
|
|
"beta_dpo/gap_mean": 16.314573287963867,
|
|
"beta_dpo/gap_std": 32.43828201293945,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.450261780104712,
|
|
"grad_norm": 35.527313232421875,
|
|
"learning_rate": 3.3693706504794243e-07,
|
|
"logits/chosen": 2.244570732116699,
|
|
"logits/rejected": 2.2803215980529785,
|
|
"loss": 4.9752,
|
|
"step": 215
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03374152258038521,
|
|
"beta_dpo/beta_used_raw": 0.027103085070848465,
|
|
"beta_dpo/gap_mean": 17.088348388671875,
|
|
"beta_dpo/gap_std": 31.838451385498047,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.4523560209424084,
|
|
"grad_norm": 109.9549331665039,
|
|
"learning_rate": 3.3521824616429284e-07,
|
|
"logits/chosen": 1.6181087493896484,
|
|
"logits/rejected": 1.51048743724823,
|
|
"loss": 4.0202,
|
|
"step": 216
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02223392203450203,
|
|
"beta_dpo/beta_used_raw": 0.01797131821513176,
|
|
"beta_dpo/gap_mean": 18.80224609375,
|
|
"beta_dpo/gap_std": 33.52192306518555,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.4544502617801047,
|
|
"grad_norm": 46.53620910644531,
|
|
"learning_rate": 3.334948572847253e-07,
|
|
"logits/chosen": 1.5461317300796509,
|
|
"logits/rejected": 1.6692814826965332,
|
|
"loss": 4.1595,
|
|
"step": 217
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0231946911662817,
|
|
"beta_dpo/beta_used_raw": 0.0011494825594127178,
|
|
"beta_dpo/gap_mean": 20.61969757080078,
|
|
"beta_dpo/gap_std": 33.30976486206055,
|
|
"beta_dpo/mask_keep_frac": 0.59375,
|
|
"epoch": 0.45654450261780105,
|
|
"grad_norm": 93.0323715209961,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 1.7362779378890991,
|
|
"logits/rejected": 1.9851727485656738,
|
|
"loss": 4.5681,
|
|
"step": 218
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01022251509130001,
|
|
"beta_dpo/beta_used_raw": -0.0047258916310966015,
|
|
"beta_dpo/gap_mean": 20.800567626953125,
|
|
"beta_dpo/gap_std": 31.367717742919922,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.4586387434554974,
|
|
"grad_norm": 25.090171813964844,
|
|
"learning_rate": 3.300347394584172e-07,
|
|
"logits/chosen": 1.3783564567565918,
|
|
"logits/rejected": 1.4508250951766968,
|
|
"loss": 4.8685,
|
|
"step": 219
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010799276642501354,
|
|
"beta_dpo/beta_used_raw": -0.0034070992842316628,
|
|
"beta_dpo/gap_mean": 21.666975021362305,
|
|
"beta_dpo/gap_std": 31.608016967773438,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.4607329842931937,
|
|
"grad_norm": 27.500185012817383,
|
|
"learning_rate": 3.2829819606729477e-07,
|
|
"logits/chosen": 2.0254147052764893,
|
|
"logits/rejected": 1.8281564712524414,
|
|
"loss": 4.886,
|
|
"step": 220
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014519060961902142,
|
|
"beta_dpo/beta_used_raw": -0.009499384090304375,
|
|
"beta_dpo/gap_mean": 19.58493423461914,
|
|
"beta_dpo/gap_std": 32.41563415527344,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.46282722513089003,
|
|
"grad_norm": 48.36823272705078,
|
|
"learning_rate": 3.265574537815398e-07,
|
|
"logits/chosen": 1.2991694211959839,
|
|
"logits/rejected": 1.4876360893249512,
|
|
"loss": 4.8269,
|
|
"step": 221
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015203127637505531,
|
|
"beta_dpo/beta_used_raw": -0.0034404161851853132,
|
|
"beta_dpo/gap_mean": 19.56608009338379,
|
|
"beta_dpo/gap_std": 32.176658630371094,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.4649214659685864,
|
|
"grad_norm": 48.740413665771484,
|
|
"learning_rate": 3.248126059518784e-07,
|
|
"logits/chosen": 1.5680516958236694,
|
|
"logits/rejected": 1.4788140058517456,
|
|
"loss": 4.7036,
|
|
"step": 222
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04362927004694939,
|
|
"beta_dpo/beta_used_raw": 0.04143592342734337,
|
|
"beta_dpo/gap_mean": 20.544513702392578,
|
|
"beta_dpo/gap_std": 32.305206298828125,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.46701570680628274,
|
|
"grad_norm": 91.45673370361328,
|
|
"learning_rate": 3.230637461492043e-07,
|
|
"logits/chosen": 1.3730167150497437,
|
|
"logits/rejected": 1.3536475896835327,
|
|
"loss": 3.6045,
|
|
"step": 223
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.027421563863754272,
|
|
"beta_dpo/beta_used_raw": 0.016309306025505066,
|
|
"beta_dpo/gap_mean": 20.78533935546875,
|
|
"beta_dpo/gap_std": 32.98493957519531,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.46910994764397906,
|
|
"grad_norm": 50.12267303466797,
|
|
"learning_rate": 3.213109681595612e-07,
|
|
"logits/chosen": 1.4133144617080688,
|
|
"logits/rejected": 1.5317778587341309,
|
|
"loss": 4.1259,
|
|
"step": 224
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01402560155838728,
|
|
"beta_dpo/beta_used_raw": -0.016284221783280373,
|
|
"beta_dpo/gap_mean": 21.371601104736328,
|
|
"beta_dpo/gap_std": 34.09131622314453,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.4712041884816754,
|
|
"grad_norm": 37.89866256713867,
|
|
"learning_rate": 3.1955436597911315e-07,
|
|
"logits/chosen": 1.8815144300460815,
|
|
"logits/rejected": 1.992702603340149,
|
|
"loss": 4.6298,
|
|
"step": 225
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022889500483870506,
|
|
"beta_dpo/beta_used_raw": 0.017042387276887894,
|
|
"beta_dpo/gap_mean": 17.320327758789062,
|
|
"beta_dpo/gap_std": 35.05849075317383,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.4732984293193717,
|
|
"grad_norm": 60.14391326904297,
|
|
"learning_rate": 3.1779403380910425e-07,
|
|
"logits/chosen": 1.0302306413650513,
|
|
"logits/rejected": 1.2303485870361328,
|
|
"loss": 4.3797,
|
|
"step": 226
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04840033873915672,
|
|
"beta_dpo/beta_used_raw": 0.047016169875860214,
|
|
"beta_dpo/gap_mean": 18.82254409790039,
|
|
"beta_dpo/gap_std": 34.905059814453125,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.47539267015706804,
|
|
"grad_norm": 90.10204315185547,
|
|
"learning_rate": 3.160300660508064e-07,
|
|
"logits/chosen": 1.6820147037506104,
|
|
"logits/rejected": 1.8873445987701416,
|
|
"loss": 3.4083,
|
|
"step": 227
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02424338273704052,
|
|
"beta_dpo/beta_used_raw": 0.009696955792605877,
|
|
"beta_dpo/gap_mean": 21.77010726928711,
|
|
"beta_dpo/gap_std": 34.2744140625,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.4774869109947644,
|
|
"grad_norm": 56.25212860107422,
|
|
"learning_rate": 3.1426255730045695e-07,
|
|
"logits/chosen": 1.5530939102172852,
|
|
"logits/rejected": 1.6357148885726929,
|
|
"loss": 4.4004,
|
|
"step": 228
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03371588513255119,
|
|
"beta_dpo/beta_used_raw": 0.027584807947278023,
|
|
"beta_dpo/gap_mean": 25.558032989501953,
|
|
"beta_dpo/gap_std": 33.908870697021484,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.47958115183246075,
|
|
"grad_norm": 96.84803771972656,
|
|
"learning_rate": 3.1249160234418644e-07,
|
|
"logits/chosen": 1.348872184753418,
|
|
"logits/rejected": 1.2927398681640625,
|
|
"loss": 3.7788,
|
|
"step": 229
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.008082384243607521,
|
|
"beta_dpo/beta_used_raw": -0.00950661115348339,
|
|
"beta_dpo/gap_mean": 25.10620880126953,
|
|
"beta_dpo/gap_std": 34.92431640625,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.4816753926701571,
|
|
"grad_norm": 20.94957160949707,
|
|
"learning_rate": 3.1071729615293424e-07,
|
|
"logits/chosen": 1.1827516555786133,
|
|
"logits/rejected": 1.1730360984802246,
|
|
"loss": 4.9364,
|
|
"step": 230
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.005764795932918787,
|
|
"beta_dpo/beta_used_raw": -0.024570820853114128,
|
|
"beta_dpo/gap_mean": 22.6708927154541,
|
|
"beta_dpo/gap_std": 34.03562927246094,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.4837696335078534,
|
|
"grad_norm": 38.76413345336914,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 1.2684296369552612,
|
|
"logits/rejected": 1.329715609550476,
|
|
"loss": 5.1608,
|
|
"step": 231
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.021905038505792618,
|
|
"beta_dpo/beta_used_raw": -0.017752759158611298,
|
|
"beta_dpo/gap_mean": 20.692659378051758,
|
|
"beta_dpo/gap_std": 33.874855041503906,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.48586387434554973,
|
|
"grad_norm": 112.87725067138672,
|
|
"learning_rate": 3.071590108427243e-07,
|
|
"logits/chosen": 1.426222562789917,
|
|
"logits/rejected": 1.5956566333770752,
|
|
"loss": 4.6128,
|
|
"step": 232
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.039701350033283234,
|
|
"beta_dpo/beta_used_raw": 0.035932619124650955,
|
|
"beta_dpo/gap_mean": 21.13761329650879,
|
|
"beta_dpo/gap_std": 34.44068908691406,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.48795811518324606,
|
|
"grad_norm": 89.65320587158203,
|
|
"learning_rate": 3.05375222543809e-07,
|
|
"logits/chosen": 1.137376070022583,
|
|
"logits/rejected": 1.239527940750122,
|
|
"loss": 3.6241,
|
|
"step": 233
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.020853759720921516,
|
|
"beta_dpo/beta_used_raw": 0.006785106845200062,
|
|
"beta_dpo/gap_mean": 22.310590744018555,
|
|
"beta_dpo/gap_std": 36.559181213378906,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.4900523560209424,
|
|
"grad_norm": 152.07374572753906,
|
|
"learning_rate": 3.035884646397637e-07,
|
|
"logits/chosen": 1.3747183084487915,
|
|
"logits/rejected": 1.4081201553344727,
|
|
"loss": 4.5591,
|
|
"step": 234
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022356968373060226,
|
|
"beta_dpo/beta_used_raw": 0.017687149345874786,
|
|
"beta_dpo/gap_mean": 21.469078063964844,
|
|
"beta_dpo/gap_std": 38.99213790893555,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.49214659685863876,
|
|
"grad_norm": 49.92569351196289,
|
|
"learning_rate": 3.017988329489923e-07,
|
|
"logits/chosen": 1.6978657245635986,
|
|
"logits/rejected": 1.6188864707946777,
|
|
"loss": 4.497,
|
|
"step": 235
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.027202440425753593,
|
|
"beta_dpo/beta_used_raw": 0.013716357760131359,
|
|
"beta_dpo/gap_mean": 21.86897087097168,
|
|
"beta_dpo/gap_std": 38.970787048339844,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.4942408376963351,
|
|
"grad_norm": 51.87431335449219,
|
|
"learning_rate": 3.000064234440111e-07,
|
|
"logits/chosen": 1.4140355587005615,
|
|
"logits/rejected": 1.421186923980713,
|
|
"loss": 4.3147,
|
|
"step": 236
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026827599853277206,
|
|
"beta_dpo/beta_used_raw": 0.002097531221807003,
|
|
"beta_dpo/gap_mean": 21.94005584716797,
|
|
"beta_dpo/gap_std": 36.81498718261719,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.4963350785340314,
|
|
"grad_norm": 54.699974060058594,
|
|
"learning_rate": 2.9821133224630223e-07,
|
|
"logits/chosen": 1.4084728956222534,
|
|
"logits/rejected": 1.6357187032699585,
|
|
"loss": 4.0251,
|
|
"step": 237
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013866505585610867,
|
|
"beta_dpo/beta_used_raw": -0.01890200935304165,
|
|
"beta_dpo/gap_mean": 23.559459686279297,
|
|
"beta_dpo/gap_std": 35.92485427856445,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.49842931937172774,
|
|
"grad_norm": 52.4506950378418,
|
|
"learning_rate": 2.964136556211588e-07,
|
|
"logits/chosen": 1.2949869632720947,
|
|
"logits/rejected": 1.249887228012085,
|
|
"loss": 4.7275,
|
|
"step": 238
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02205376699566841,
|
|
"beta_dpo/beta_used_raw": 0.0070870416238904,
|
|
"beta_dpo/gap_mean": 21.201807022094727,
|
|
"beta_dpo/gap_std": 37.64961624145508,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.5005235602094241,
|
|
"grad_norm": 65.6231918334961,
|
|
"learning_rate": 2.946134899725226e-07,
|
|
"logits/chosen": 1.542831540107727,
|
|
"logits/rejected": 1.6906412839889526,
|
|
"loss": 4.8225,
|
|
"step": 239
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.016875216737389565,
|
|
"beta_dpo/beta_used_raw": 0.013624901883304119,
|
|
"beta_dpo/gap_mean": 22.050025939941406,
|
|
"beta_dpo/gap_std": 35.68221664428711,
|
|
"beta_dpo/mask_keep_frac": 0.9375,
|
|
"epoch": 0.5026178010471204,
|
|
"grad_norm": 71.22918701171875,
|
|
"learning_rate": 2.9281093183781403e-07,
|
|
"logits/chosen": 1.3054808378219604,
|
|
"logits/rejected": 1.2251484394073486,
|
|
"loss": 4.3994,
|
|
"step": 240
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010428352281451225,
|
|
"beta_dpo/beta_used_raw": -0.009664381854236126,
|
|
"beta_dpo/gap_mean": 20.70039176940918,
|
|
"beta_dpo/gap_std": 36.04539108276367,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5047120418848168,
|
|
"grad_norm": 35.78901290893555,
|
|
"learning_rate": 2.910060778827554e-07,
|
|
"logits/chosen": 1.4216902256011963,
|
|
"logits/rejected": 1.5455743074417114,
|
|
"loss": 5.0779,
|
|
"step": 241
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03037761151790619,
|
|
"beta_dpo/beta_used_raw": 0.01391815859824419,
|
|
"beta_dpo/gap_mean": 21.673847198486328,
|
|
"beta_dpo/gap_std": 35.858516693115234,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.506806282722513,
|
|
"grad_norm": 76.41270446777344,
|
|
"learning_rate": 2.891990248961871e-07,
|
|
"logits/chosen": 1.8587148189544678,
|
|
"logits/rejected": 1.6864495277404785,
|
|
"loss": 4.2734,
|
|
"step": 242
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.035171203315258026,
|
|
"beta_dpo/beta_used_raw": 0.02459963783621788,
|
|
"beta_dpo/gap_mean": 23.178098678588867,
|
|
"beta_dpo/gap_std": 35.096439361572266,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.5089005235602094,
|
|
"grad_norm": 77.48847198486328,
|
|
"learning_rate": 2.873898697848762e-07,
|
|
"logits/chosen": 1.6573126316070557,
|
|
"logits/rejected": 1.6771302223205566,
|
|
"loss": 3.6758,
|
|
"step": 243
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02781713753938675,
|
|
"beta_dpo/beta_used_raw": 0.016341306269168854,
|
|
"beta_dpo/gap_mean": 26.562307357788086,
|
|
"beta_dpo/gap_std": 36.088531494140625,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.5109947643979058,
|
|
"grad_norm": 90.8469009399414,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 1.3104796409606934,
|
|
"logits/rejected": 1.1022838354110718,
|
|
"loss": 4.1908,
|
|
"step": 244
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.023681480437517166,
|
|
"beta_dpo/beta_used_raw": 0.007075564004480839,
|
|
"beta_dpo/gap_mean": 26.648090362548828,
|
|
"beta_dpo/gap_std": 35.5743522644043,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.5130890052356021,
|
|
"grad_norm": 49.45195007324219,
|
|
"learning_rate": 2.837656413735479e-07,
|
|
"logits/chosen": 1.8954524993896484,
|
|
"logits/rejected": 1.5859884023666382,
|
|
"loss": 4.2197,
|
|
"step": 245
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007673209998756647,
|
|
"beta_dpo/beta_used_raw": -0.03442414849996567,
|
|
"beta_dpo/gap_mean": 23.275249481201172,
|
|
"beta_dpo/gap_std": 37.69624328613281,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.5151832460732985,
|
|
"grad_norm": 34.83370590209961,
|
|
"learning_rate": 2.8195076242990116e-07,
|
|
"logits/chosen": 1.075661301612854,
|
|
"logits/rejected": 0.9320878982543945,
|
|
"loss": 5.0958,
|
|
"step": 246
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02569355070590973,
|
|
"beta_dpo/beta_used_raw": 0.011104905046522617,
|
|
"beta_dpo/gap_mean": 21.302507400512695,
|
|
"beta_dpo/gap_std": 37.52021789550781,
|
|
"beta_dpo/mask_keep_frac": 0.59375,
|
|
"epoch": 0.5172774869109947,
|
|
"grad_norm": 60.37564468383789,
|
|
"learning_rate": 2.801341700638307e-07,
|
|
"logits/chosen": 1.1491472721099854,
|
|
"logits/rejected": 0.9154660105705261,
|
|
"loss": 4.1681,
|
|
"step": 247
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0077388836070895195,
|
|
"beta_dpo/beta_used_raw": -0.01996331661939621,
|
|
"beta_dpo/gap_mean": 21.50804328918457,
|
|
"beta_dpo/gap_std": 37.68701934814453,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5193717277486911,
|
|
"grad_norm": 42.47214126586914,
|
|
"learning_rate": 2.7831596169367227e-07,
|
|
"logits/chosen": 1.0854613780975342,
|
|
"logits/rejected": 1.1457273960113525,
|
|
"loss": 5.0609,
|
|
"step": 248
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00828784704208374,
|
|
"beta_dpo/beta_used_raw": -0.007833743467926979,
|
|
"beta_dpo/gap_mean": 20.011716842651367,
|
|
"beta_dpo/gap_std": 37.14725875854492,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.5214659685863874,
|
|
"grad_norm": 41.61552810668945,
|
|
"learning_rate": 2.7649623482442274e-07,
|
|
"logits/chosen": 1.2434636354446411,
|
|
"logits/rejected": 1.2950477600097656,
|
|
"loss": 5.0897,
|
|
"step": 249
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028967518359422684,
|
|
"beta_dpo/beta_used_raw": 0.017602279782295227,
|
|
"beta_dpo/gap_mean": 21.15532112121582,
|
|
"beta_dpo/gap_std": 36.99894714355469,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.5235602094240838,
|
|
"grad_norm": 111.2298583984375,
|
|
"learning_rate": 2.7467508704251135e-07,
|
|
"logits/chosen": 1.5354533195495605,
|
|
"logits/rejected": 1.6301560401916504,
|
|
"loss": 4.2794,
|
|
"step": 250
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02736206352710724,
|
|
"beta_dpo/beta_used_raw": 0.006099463440477848,
|
|
"beta_dpo/gap_mean": 20.01749038696289,
|
|
"beta_dpo/gap_std": 37.12480926513672,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.5256544502617801,
|
|
"grad_norm": 79.339599609375,
|
|
"learning_rate": 2.7285261601056697e-07,
|
|
"logits/chosen": 1.3763610124588013,
|
|
"logits/rejected": 1.155696988105774,
|
|
"loss": 4.4696,
|
|
"step": 251
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03472306579351425,
|
|
"beta_dpo/beta_used_raw": 0.02851836569607258,
|
|
"beta_dpo/gap_mean": 22.56066131591797,
|
|
"beta_dpo/gap_std": 38.38005065917969,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5277486910994764,
|
|
"grad_norm": 57.71303939819336,
|
|
"learning_rate": 2.7102891946217994e-07,
|
|
"logits/chosen": 1.829942226409912,
|
|
"logits/rejected": 1.845513105392456,
|
|
"loss": 3.7725,
|
|
"step": 252
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030697450041770935,
|
|
"beta_dpo/beta_used_raw": 0.01660301722586155,
|
|
"beta_dpo/gap_mean": 19.772396087646484,
|
|
"beta_dpo/gap_std": 39.422203063964844,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5298429319371728,
|
|
"grad_norm": 70.57060241699219,
|
|
"learning_rate": 2.692040951966617e-07,
|
|
"logits/chosen": 1.419633388519287,
|
|
"logits/rejected": 1.3010826110839844,
|
|
"loss": 4.158,
|
|
"step": 253
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03239889442920685,
|
|
"beta_dpo/beta_used_raw": 0.021261408925056458,
|
|
"beta_dpo/gap_mean": 19.49216079711914,
|
|
"beta_dpo/gap_std": 36.011436462402344,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.5319371727748691,
|
|
"grad_norm": 85.16039276123047,
|
|
"learning_rate": 2.6737824107379947e-07,
|
|
"logits/chosen": 1.652917504310608,
|
|
"logits/rejected": 1.5930885076522827,
|
|
"loss": 4.1323,
|
|
"step": 254
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.07058847695589066,
|
|
"beta_dpo/beta_used_raw": 0.0682307779788971,
|
|
"beta_dpo/gap_mean": 22.544225692749023,
|
|
"beta_dpo/gap_std": 38.23542022705078,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5340314136125655,
|
|
"grad_norm": 126.1849365234375,
|
|
"learning_rate": 2.655514550086086e-07,
|
|
"logits/chosen": 1.4259027242660522,
|
|
"logits/rejected": 1.4180747270584106,
|
|
"loss": 2.8543,
|
|
"step": 255
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.035115234553813934,
|
|
"beta_dpo/beta_used_raw": 0.01770986244082451,
|
|
"beta_dpo/gap_mean": 25.101337432861328,
|
|
"beta_dpo/gap_std": 40.27662658691406,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.5361256544502618,
|
|
"grad_norm": 74.76777648925781,
|
|
"learning_rate": 2.6372383496608186e-07,
|
|
"logits/chosen": 1.584543228149414,
|
|
"logits/rejected": 1.6146832704544067,
|
|
"loss": 4.0922,
|
|
"step": 256
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02713741734623909,
|
|
"beta_dpo/beta_used_raw": 0.0023514775093644857,
|
|
"beta_dpo/gap_mean": 26.48859977722168,
|
|
"beta_dpo/gap_std": 40.16349792480469,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5382198952879581,
|
|
"grad_norm": 87.41287231445312,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 1.334143042564392,
|
|
"logits/rejected": 1.4390063285827637,
|
|
"loss": 4.1405,
|
|
"step": 257
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024670587852597237,
|
|
"beta_dpo/beta_used_raw": 0.006600758992135525,
|
|
"beta_dpo/gap_mean": 24.859146118164062,
|
|
"beta_dpo/gap_std": 38.38996505737305,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.5403141361256545,
|
|
"grad_norm": 78.93328094482422,
|
|
"learning_rate": 2.600664850273538e-07,
|
|
"logits/chosen": 1.2462736368179321,
|
|
"logits/rejected": 1.4119253158569336,
|
|
"loss": 4.1682,
|
|
"step": 258
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026468459516763687,
|
|
"beta_dpo/beta_used_raw": 0.009973703883588314,
|
|
"beta_dpo/gap_mean": 22.97103500366211,
|
|
"beta_dpo/gap_std": 37.827335357666016,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5424083769633508,
|
|
"grad_norm": 62.47282409667969,
|
|
"learning_rate": 2.582369512637302e-07,
|
|
"logits/chosen": 1.400333285331726,
|
|
"logits/rejected": 1.3363168239593506,
|
|
"loss": 4.2019,
|
|
"step": 259
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0057443841360509396,
|
|
"beta_dpo/beta_used_raw": -0.03509850427508354,
|
|
"beta_dpo/gap_mean": 19.301353454589844,
|
|
"beta_dpo/gap_std": 37.98316192626953,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.5445026178010471,
|
|
"grad_norm": 29.450904846191406,
|
|
"learning_rate": 2.5640697577740815e-07,
|
|
"logits/chosen": 1.2627638578414917,
|
|
"logits/rejected": 1.3713899850845337,
|
|
"loss": 5.2443,
|
|
"step": 260
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02370859682559967,
|
|
"beta_dpo/beta_used_raw": 0.009769135154783726,
|
|
"beta_dpo/gap_mean": 17.073835372924805,
|
|
"beta_dpo/gap_std": 38.706729888916016,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.5465968586387434,
|
|
"grad_norm": 119.15771484375,
|
|
"learning_rate": 2.5457665670441937e-07,
|
|
"logits/chosen": 0.9551135301589966,
|
|
"logits/rejected": 0.7918010354042053,
|
|
"loss": 4.8051,
|
|
"step": 261
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01725778356194496,
|
|
"beta_dpo/beta_used_raw": 0.007985102012753487,
|
|
"beta_dpo/gap_mean": 19.15559959411621,
|
|
"beta_dpo/gap_std": 37.25046920776367,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.5486910994764398,
|
|
"grad_norm": 42.16154479980469,
|
|
"learning_rate": 2.527460921992209e-07,
|
|
"logits/chosen": 1.7428507804870605,
|
|
"logits/rejected": 1.745199203491211,
|
|
"loss": 4.7292,
|
|
"step": 262
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.027581116184592247,
|
|
"beta_dpo/beta_used_raw": 0.0017390409484505653,
|
|
"beta_dpo/gap_mean": 21.374671936035156,
|
|
"beta_dpo/gap_std": 36.47187805175781,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5507853403141362,
|
|
"grad_norm": 72.0134506225586,
|
|
"learning_rate": 2.509153804294318e-07,
|
|
"logits/chosen": 1.3248748779296875,
|
|
"logits/rejected": 1.480365514755249,
|
|
"loss": 4.2062,
|
|
"step": 263
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015040460973978043,
|
|
"beta_dpo/beta_used_raw": -0.002720870077610016,
|
|
"beta_dpo/gap_mean": 22.537841796875,
|
|
"beta_dpo/gap_std": 36.9581298828125,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.5528795811518324,
|
|
"grad_norm": 53.91576385498047,
|
|
"learning_rate": 2.4908461957056825e-07,
|
|
"logits/chosen": 1.3922407627105713,
|
|
"logits/rejected": 1.1616618633270264,
|
|
"loss": 4.7735,
|
|
"step": 264
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04024341329932213,
|
|
"beta_dpo/beta_used_raw": 0.02337898127734661,
|
|
"beta_dpo/gap_mean": 23.94507598876953,
|
|
"beta_dpo/gap_std": 36.818138122558594,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.5549738219895288,
|
|
"grad_norm": 190.59609985351562,
|
|
"learning_rate": 2.4725390780077905e-07,
|
|
"logits/chosen": 1.6322290897369385,
|
|
"logits/rejected": 1.6508582830429077,
|
|
"loss": 4.2363,
|
|
"step": 265
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026812460273504257,
|
|
"beta_dpo/beta_used_raw": 0.015981679782271385,
|
|
"beta_dpo/gap_mean": 23.17593002319336,
|
|
"beta_dpo/gap_std": 35.23807907104492,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5570680628272251,
|
|
"grad_norm": 115.75420379638672,
|
|
"learning_rate": 2.454233432955807e-07,
|
|
"logits/chosen": 1.3934905529022217,
|
|
"logits/rejected": 1.4551239013671875,
|
|
"loss": 4.156,
|
|
"step": 266
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014945639297366142,
|
|
"beta_dpo/beta_used_raw": -0.003206442343071103,
|
|
"beta_dpo/gap_mean": 22.777759552001953,
|
|
"beta_dpo/gap_std": 35.72869873046875,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5591623036649215,
|
|
"grad_norm": 42.64310073852539,
|
|
"learning_rate": 2.435930242225919e-07,
|
|
"logits/chosen": 1.5525813102722168,
|
|
"logits/rejected": 1.673789143562317,
|
|
"loss": 4.8052,
|
|
"step": 267
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030046723783016205,
|
|
"beta_dpo/beta_used_raw": 0.024244606494903564,
|
|
"beta_dpo/gap_mean": 21.284276962280273,
|
|
"beta_dpo/gap_std": 36.792415618896484,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.5612565445026177,
|
|
"grad_norm": 85.80408477783203,
|
|
"learning_rate": 2.4176304873626984e-07,
|
|
"logits/chosen": 1.1172372102737427,
|
|
"logits/rejected": 1.1572062969207764,
|
|
"loss": 4.0405,
|
|
"step": 268
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.016361307352781296,
|
|
"beta_dpo/beta_used_raw": -0.008380460552871227,
|
|
"beta_dpo/gap_mean": 21.142919540405273,
|
|
"beta_dpo/gap_std": 36.69437789916992,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.5633507853403141,
|
|
"grad_norm": 30.00682258605957,
|
|
"learning_rate": 2.399335149726463e-07,
|
|
"logits/chosen": 1.3953180313110352,
|
|
"logits/rejected": 1.582595944404602,
|
|
"loss": 4.8939,
|
|
"step": 269
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024136360734701157,
|
|
"beta_dpo/beta_used_raw": 0.01455269567668438,
|
|
"beta_dpo/gap_mean": 20.730382919311523,
|
|
"beta_dpo/gap_std": 38.18457794189453,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5654450261780105,
|
|
"grad_norm": 104.796630859375,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 1.706362009048462,
|
|
"logits/rejected": 1.9905970096588135,
|
|
"loss": 4.8619,
|
|
"step": 270
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015366212464869022,
|
|
"beta_dpo/beta_used_raw": -0.010098990984261036,
|
|
"beta_dpo/gap_mean": 20.525156021118164,
|
|
"beta_dpo/gap_std": 36.195465087890625,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.5675392670157068,
|
|
"grad_norm": 27.481109619140625,
|
|
"learning_rate": 2.3627616503391812e-07,
|
|
"logits/chosen": 1.2522549629211426,
|
|
"logits/rejected": 1.3000314235687256,
|
|
"loss": 4.6612,
|
|
"step": 271
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02246049977838993,
|
|
"beta_dpo/beta_used_raw": 0.002398681826889515,
|
|
"beta_dpo/gap_mean": 20.4349365234375,
|
|
"beta_dpo/gap_std": 35.98146438598633,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.5696335078534032,
|
|
"grad_norm": 223.59896850585938,
|
|
"learning_rate": 2.344485449913914e-07,
|
|
"logits/chosen": 1.606691837310791,
|
|
"logits/rejected": 1.451743483543396,
|
|
"loss": 4.7041,
|
|
"step": 272
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.025656994432210922,
|
|
"beta_dpo/beta_used_raw": 0.001691313460469246,
|
|
"beta_dpo/gap_mean": 21.252532958984375,
|
|
"beta_dpo/gap_std": 34.84130096435547,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.5717277486910994,
|
|
"grad_norm": 60.19879913330078,
|
|
"learning_rate": 2.3262175892620062e-07,
|
|
"logits/chosen": 1.5752846002578735,
|
|
"logits/rejected": 1.6109840869903564,
|
|
"loss": 4.3398,
|
|
"step": 273
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024387702345848083,
|
|
"beta_dpo/beta_used_raw": 0.01869470439851284,
|
|
"beta_dpo/gap_mean": 22.542556762695312,
|
|
"beta_dpo/gap_std": 35.69194030761719,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.5738219895287958,
|
|
"grad_norm": 37.60686492919922,
|
|
"learning_rate": 2.3079590480333827e-07,
|
|
"logits/chosen": 1.6102871894836426,
|
|
"logits/rejected": 1.7174773216247559,
|
|
"loss": 4.1491,
|
|
"step": 274
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.043057817965745926,
|
|
"beta_dpo/beta_used_raw": 0.04157021641731262,
|
|
"beta_dpo/gap_mean": 24.984006881713867,
|
|
"beta_dpo/gap_std": 35.83733367919922,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.5759162303664922,
|
|
"grad_norm": 96.29705810546875,
|
|
"learning_rate": 2.2897108053782e-07,
|
|
"logits/chosen": 1.1287708282470703,
|
|
"logits/rejected": 1.208784818649292,
|
|
"loss": 3.0424,
|
|
"step": 275
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0038480497896671295,
|
|
"beta_dpo/beta_used_raw": -0.015348054468631744,
|
|
"beta_dpo/gap_mean": 25.66550064086914,
|
|
"beta_dpo/gap_std": 33.74402618408203,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.5780104712041885,
|
|
"grad_norm": 24.424198150634766,
|
|
"learning_rate": 2.2714738398943308e-07,
|
|
"logits/chosen": 1.8258295059204102,
|
|
"logits/rejected": 1.6733819246292114,
|
|
"loss": 5.2051,
|
|
"step": 276
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.017688903957605362,
|
|
"beta_dpo/beta_used_raw": -0.011028681881725788,
|
|
"beta_dpo/gap_mean": 22.55120086669922,
|
|
"beta_dpo/gap_std": 35.05712890625,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5801047120418849,
|
|
"grad_norm": 33.83370590209961,
|
|
"learning_rate": 2.2532491295748865e-07,
|
|
"logits/chosen": 1.1561347246170044,
|
|
"logits/rejected": 1.3503713607788086,
|
|
"loss": 4.532,
|
|
"step": 277
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019777359440922737,
|
|
"beta_dpo/beta_used_raw": -0.004533551167696714,
|
|
"beta_dpo/gap_mean": 19.028533935546875,
|
|
"beta_dpo/gap_std": 36.112735748291016,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5821989528795811,
|
|
"grad_norm": 42.212650299072266,
|
|
"learning_rate": 2.2350376517557726e-07,
|
|
"logits/chosen": 1.0686261653900146,
|
|
"logits/rejected": 1.0221307277679443,
|
|
"loss": 4.6354,
|
|
"step": 278
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02981048822402954,
|
|
"beta_dpo/beta_used_raw": 0.028192678466439247,
|
|
"beta_dpo/gap_mean": 19.808574676513672,
|
|
"beta_dpo/gap_std": 35.35283660888672,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.5842931937172775,
|
|
"grad_norm": 53.312747955322266,
|
|
"learning_rate": 2.2168403830632769e-07,
|
|
"logits/chosen": 1.2553820610046387,
|
|
"logits/rejected": 1.2719086408615112,
|
|
"loss": 3.9651,
|
|
"step": 279
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0026543322019279003,
|
|
"beta_dpo/beta_used_raw": -0.015082788653671741,
|
|
"beta_dpo/gap_mean": 21.008886337280273,
|
|
"beta_dpo/gap_std": 34.17639923095703,
|
|
"beta_dpo/mask_keep_frac": 0.59375,
|
|
"epoch": 0.5863874345549738,
|
|
"grad_norm": 11.17526912689209,
|
|
"learning_rate": 2.1986582993616925e-07,
|
|
"logits/chosen": 1.5121065378189087,
|
|
"logits/rejected": 1.5147109031677246,
|
|
"loss": 5.2115,
|
|
"step": 280
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015546365641057491,
|
|
"beta_dpo/beta_used_raw": -0.014291130006313324,
|
|
"beta_dpo/gap_mean": 20.403629302978516,
|
|
"beta_dpo/gap_std": 34.77376174926758,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.5884816753926702,
|
|
"grad_norm": 57.08203125,
|
|
"learning_rate": 2.1804923757009882e-07,
|
|
"logits/chosen": 1.4907077550888062,
|
|
"logits/rejected": 1.448096513748169,
|
|
"loss": 4.8509,
|
|
"step": 281
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013758410699665546,
|
|
"beta_dpo/beta_used_raw": -0.0017688155639916658,
|
|
"beta_dpo/gap_mean": 20.669015884399414,
|
|
"beta_dpo/gap_std": 35.69584274291992,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5905759162303665,
|
|
"grad_norm": 28.517318725585938,
|
|
"learning_rate": 2.1623435862645205e-07,
|
|
"logits/chosen": 1.7699363231658936,
|
|
"logits/rejected": 1.8309452533721924,
|
|
"loss": 5.0077,
|
|
"step": 282
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028719400987029076,
|
|
"beta_dpo/beta_used_raw": 0.018162164837121964,
|
|
"beta_dpo/gap_mean": 20.43427276611328,
|
|
"beta_dpo/gap_std": 35.05901336669922,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5926701570680628,
|
|
"grad_norm": 74.51838684082031,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 1.243952751159668,
|
|
"logits/rejected": 1.4681645631790161,
|
|
"loss": 4.1383,
|
|
"step": 283
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022418132051825523,
|
|
"beta_dpo/beta_used_raw": -0.00897371955215931,
|
|
"beta_dpo/gap_mean": 20.829967498779297,
|
|
"beta_dpo/gap_std": 37.05330276489258,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5947643979057592,
|
|
"grad_norm": 53.890785217285156,
|
|
"learning_rate": 2.1261013021512378e-07,
|
|
"logits/chosen": 1.3836698532104492,
|
|
"logits/rejected": 1.3280866146087646,
|
|
"loss": 4.7208,
|
|
"step": 284
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0186537504196167,
|
|
"beta_dpo/beta_used_raw": -0.003015751950442791,
|
|
"beta_dpo/gap_mean": 18.022796630859375,
|
|
"beta_dpo/gap_std": 36.89912414550781,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.5968586387434555,
|
|
"grad_norm": 28.00040626525879,
|
|
"learning_rate": 2.1080097510381294e-07,
|
|
"logits/chosen": 1.706050157546997,
|
|
"logits/rejected": 1.584727168083191,
|
|
"loss": 4.632,
|
|
"step": 285
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013129707425832748,
|
|
"beta_dpo/beta_used_raw": -0.00048280227929353714,
|
|
"beta_dpo/gap_mean": 19.448501586914062,
|
|
"beta_dpo/gap_std": 36.36820983886719,
|
|
"beta_dpo/mask_keep_frac": 0.90625,
|
|
"epoch": 0.5989528795811518,
|
|
"grad_norm": 51.00930404663086,
|
|
"learning_rate": 2.089939221172446e-07,
|
|
"logits/chosen": 1.2181655168533325,
|
|
"logits/rejected": 1.2918510437011719,
|
|
"loss": 4.8983,
|
|
"step": 286
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0334862619638443,
|
|
"beta_dpo/beta_used_raw": 0.031023263931274414,
|
|
"beta_dpo/gap_mean": 20.484294891357422,
|
|
"beta_dpo/gap_std": 38.072418212890625,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.6010471204188481,
|
|
"grad_norm": 68.44963073730469,
|
|
"learning_rate": 2.0718906816218595e-07,
|
|
"logits/chosen": 1.4797168970108032,
|
|
"logits/rejected": 1.5804214477539062,
|
|
"loss": 4.3089,
|
|
"step": 287
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031299516558647156,
|
|
"beta_dpo/beta_used_raw": 0.020958131179213524,
|
|
"beta_dpo/gap_mean": 19.536659240722656,
|
|
"beta_dpo/gap_std": 37.194252014160156,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.6031413612565445,
|
|
"grad_norm": 125.42591857910156,
|
|
"learning_rate": 2.053865100274774e-07,
|
|
"logits/chosen": 1.6277129650115967,
|
|
"logits/rejected": 1.4404486417770386,
|
|
"loss": 4.2485,
|
|
"step": 288
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013463410548865795,
|
|
"beta_dpo/beta_used_raw": -0.002038992242887616,
|
|
"beta_dpo/gap_mean": 18.123918533325195,
|
|
"beta_dpo/gap_std": 37.70576477050781,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.6052356020942409,
|
|
"grad_norm": 50.54543685913086,
|
|
"learning_rate": 2.035863443788411e-07,
|
|
"logits/chosen": 1.6278074979782104,
|
|
"logits/rejected": 1.5724064111709595,
|
|
"loss": 4.813,
|
|
"step": 289
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013063677586615086,
|
|
"beta_dpo/beta_used_raw": -0.02197786420583725,
|
|
"beta_dpo/gap_mean": 19.04131317138672,
|
|
"beta_dpo/gap_std": 35.90309524536133,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.6073298429319371,
|
|
"grad_norm": 41.749141693115234,
|
|
"learning_rate": 2.0178866775369774e-07,
|
|
"logits/chosen": 1.3940989971160889,
|
|
"logits/rejected": 1.3121880292892456,
|
|
"loss": 4.8478,
|
|
"step": 290
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03433792293071747,
|
|
"beta_dpo/beta_used_raw": 0.0014921380206942558,
|
|
"beta_dpo/gap_mean": 20.723804473876953,
|
|
"beta_dpo/gap_std": 36.17911148071289,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.6094240837696335,
|
|
"grad_norm": 95.32479095458984,
|
|
"learning_rate": 1.9999357655598891e-07,
|
|
"logits/chosen": 1.084555983543396,
|
|
"logits/rejected": 1.1702072620391846,
|
|
"loss": 4.7487,
|
|
"step": 291
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03228276968002319,
|
|
"beta_dpo/beta_used_raw": 0.018787425011396408,
|
|
"beta_dpo/gap_mean": 20.76034164428711,
|
|
"beta_dpo/gap_std": 37.097103118896484,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6115183246073298,
|
|
"grad_norm": 67.72441864013672,
|
|
"learning_rate": 1.9820116705100775e-07,
|
|
"logits/chosen": 1.160035252571106,
|
|
"logits/rejected": 1.1472792625427246,
|
|
"loss": 3.9976,
|
|
"step": 292
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02482818439602852,
|
|
"beta_dpo/beta_used_raw": -0.0007117787608876824,
|
|
"beta_dpo/gap_mean": 20.157255172729492,
|
|
"beta_dpo/gap_std": 39.040748596191406,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6136125654450262,
|
|
"grad_norm": 244.3824462890625,
|
|
"learning_rate": 1.9641153536023642e-07,
|
|
"logits/chosen": 2.0036768913269043,
|
|
"logits/rejected": 1.8342108726501465,
|
|
"loss": 4.5759,
|
|
"step": 293
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02537180297076702,
|
|
"beta_dpo/beta_used_raw": 0.0016407333314418793,
|
|
"beta_dpo/gap_mean": 21.209617614746094,
|
|
"beta_dpo/gap_std": 38.50959777832031,
|
|
"beta_dpo/mask_keep_frac": 0.90625,
|
|
"epoch": 0.6157068062827226,
|
|
"grad_norm": 76.85967254638672,
|
|
"learning_rate": 1.9462477745619106e-07,
|
|
"logits/chosen": 1.4297269582748413,
|
|
"logits/rejected": 1.5640549659729004,
|
|
"loss": 4.6346,
|
|
"step": 294
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03157725930213928,
|
|
"beta_dpo/beta_used_raw": 0.02505682222545147,
|
|
"beta_dpo/gap_mean": 21.574724197387695,
|
|
"beta_dpo/gap_std": 39.374446868896484,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.6178010471204188,
|
|
"grad_norm": 86.37284851074219,
|
|
"learning_rate": 1.928409891572757e-07,
|
|
"logits/chosen": 1.1579641103744507,
|
|
"logits/rejected": 1.1256705522537231,
|
|
"loss": 4.4772,
|
|
"step": 295
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03921440243721008,
|
|
"beta_dpo/beta_used_raw": 0.030128249898552895,
|
|
"beta_dpo/gap_mean": 26.082651138305664,
|
|
"beta_dpo/gap_std": 39.295570373535156,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.6198952879581152,
|
|
"grad_norm": 129.71774291992188,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 1.5179616212844849,
|
|
"logits/rejected": 1.6978120803833008,
|
|
"loss": 3.8258,
|
|
"step": 296
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.029375022277235985,
|
|
"beta_dpo/beta_used_raw": 0.011093353852629662,
|
|
"beta_dpo/gap_mean": 27.48119354248047,
|
|
"beta_dpo/gap_std": 39.495452880859375,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6219895287958115,
|
|
"grad_norm": 118.33712768554688,
|
|
"learning_rate": 1.8928270384706582e-07,
|
|
"logits/chosen": 1.495194435119629,
|
|
"logits/rejected": 1.649183988571167,
|
|
"loss": 4.2477,
|
|
"step": 297
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03636423125863075,
|
|
"beta_dpo/beta_used_raw": 0.028217561542987823,
|
|
"beta_dpo/gap_mean": 26.37271499633789,
|
|
"beta_dpo/gap_std": 39.67487335205078,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.6240837696335079,
|
|
"grad_norm": 85.49053955078125,
|
|
"learning_rate": 1.875083976558136e-07,
|
|
"logits/chosen": 1.4574960470199585,
|
|
"logits/rejected": 1.3186194896697998,
|
|
"loss": 3.8972,
|
|
"step": 298
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02849549427628517,
|
|
"beta_dpo/beta_used_raw": -0.0016860419418662786,
|
|
"beta_dpo/gap_mean": 24.45018196105957,
|
|
"beta_dpo/gap_std": 39.10914993286133,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6261780104712041,
|
|
"grad_norm": 41.191104888916016,
|
|
"learning_rate": 1.8573744269954297e-07,
|
|
"logits/chosen": 1.6376529932022095,
|
|
"logits/rejected": 1.6397225856781006,
|
|
"loss": 3.844,
|
|
"step": 299
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02068179100751877,
|
|
"beta_dpo/beta_used_raw": 0.005061999429017305,
|
|
"beta_dpo/gap_mean": 22.947368621826172,
|
|
"beta_dpo/gap_std": 38.15463638305664,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.6282722513089005,
|
|
"grad_norm": 54.7095947265625,
|
|
"learning_rate": 1.839699339491937e-07,
|
|
"logits/chosen": 1.2076692581176758,
|
|
"logits/rejected": 1.2860641479492188,
|
|
"loss": 4.3858,
|
|
"step": 300
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03375673294067383,
|
|
"beta_dpo/beta_used_raw": 0.011599482968449593,
|
|
"beta_dpo/gap_mean": 21.817138671875,
|
|
"beta_dpo/gap_std": 40.71202850341797,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6303664921465969,
|
|
"grad_norm": 67.1680908203125,
|
|
"learning_rate": 1.8220596619089573e-07,
|
|
"logits/chosen": 1.5903642177581787,
|
|
"logits/rejected": 1.5883557796478271,
|
|
"loss": 3.9801,
|
|
"step": 301
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031289342790842056,
|
|
"beta_dpo/beta_used_raw": 0.02020403742790222,
|
|
"beta_dpo/gap_mean": 22.630334854125977,
|
|
"beta_dpo/gap_std": 39.44662094116211,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6324607329842932,
|
|
"grad_norm": 73.67294311523438,
|
|
"learning_rate": 1.8044563402088682e-07,
|
|
"logits/chosen": 1.4647196531295776,
|
|
"logits/rejected": 1.6538636684417725,
|
|
"loss": 3.8922,
|
|
"step": 302
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026227440685033798,
|
|
"beta_dpo/beta_used_raw": 0.01093749888241291,
|
|
"beta_dpo/gap_mean": 21.381053924560547,
|
|
"beta_dpo/gap_std": 40.288665771484375,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6345549738219896,
|
|
"grad_norm": 75.99285888671875,
|
|
"learning_rate": 1.7868903184043885e-07,
|
|
"logits/chosen": 1.345954179763794,
|
|
"logits/rejected": 1.4914484024047852,
|
|
"loss": 4.3761,
|
|
"step": 303
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024651650339365005,
|
|
"beta_dpo/beta_used_raw": 0.010574829764664173,
|
|
"beta_dpo/gap_mean": 21.974733352661133,
|
|
"beta_dpo/gap_std": 38.83090591430664,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6366492146596858,
|
|
"grad_norm": 230.4051513671875,
|
|
"learning_rate": 1.7693625385079574e-07,
|
|
"logits/chosen": 1.2385737895965576,
|
|
"logits/rejected": 1.2572718858718872,
|
|
"loss": 4.7737,
|
|
"step": 304
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.023414814844727516,
|
|
"beta_dpo/beta_used_raw": 0.013659648597240448,
|
|
"beta_dpo/gap_mean": 24.257299423217773,
|
|
"beta_dpo/gap_std": 38.524078369140625,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6387434554973822,
|
|
"grad_norm": 46.621604919433594,
|
|
"learning_rate": 1.7518739404812155e-07,
|
|
"logits/chosen": 1.235711932182312,
|
|
"logits/rejected": 1.2289034128189087,
|
|
"loss": 4.3571,
|
|
"step": 305
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0353800505399704,
|
|
"beta_dpo/beta_used_raw": -0.00411562342196703,
|
|
"beta_dpo/gap_mean": 26.567459106445312,
|
|
"beta_dpo/gap_std": 40.30250549316406,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.6408376963350786,
|
|
"grad_norm": 185.1968536376953,
|
|
"learning_rate": 1.7344254621846017e-07,
|
|
"logits/chosen": 1.48641836643219,
|
|
"logits/rejected": 1.3792299032211304,
|
|
"loss": 4.523,
|
|
"step": 306
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.057250961661338806,
|
|
"beta_dpo/beta_used_raw": 0.05049164220690727,
|
|
"beta_dpo/gap_mean": 26.73577117919922,
|
|
"beta_dpo/gap_std": 40.15787124633789,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.6429319371727749,
|
|
"grad_norm": 112.26713562011719,
|
|
"learning_rate": 1.717018039327053e-07,
|
|
"logits/chosen": 1.2322039604187012,
|
|
"logits/rejected": 1.3177506923675537,
|
|
"loss": 2.6335,
|
|
"step": 307
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.012129316106438637,
|
|
"beta_dpo/beta_used_raw": -0.021789539605379105,
|
|
"beta_dpo/gap_mean": 25.66850471496582,
|
|
"beta_dpo/gap_std": 39.91798400878906,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6450261780104712,
|
|
"grad_norm": 49.086910247802734,
|
|
"learning_rate": 1.699652605415828e-07,
|
|
"logits/chosen": 1.3670289516448975,
|
|
"logits/rejected": 1.3430283069610596,
|
|
"loss": 4.7575,
|
|
"step": 308
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04727376997470856,
|
|
"beta_dpo/beta_used_raw": 0.04426693171262741,
|
|
"beta_dpo/gap_mean": 24.053421020507812,
|
|
"beta_dpo/gap_std": 41.2784309387207,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.6471204188481675,
|
|
"grad_norm": 212.80130004882812,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 1.8778178691864014,
|
|
"logits/rejected": 1.6358754634857178,
|
|
"loss": 4.2193,
|
|
"step": 309
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019439999014139175,
|
|
"beta_dpo/beta_used_raw": -0.004033832810819149,
|
|
"beta_dpo/gap_mean": 24.703720092773438,
|
|
"beta_dpo/gap_std": 41.20947265625,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6492146596858639,
|
|
"grad_norm": 48.24752426147461,
|
|
"learning_rate": 1.6650514271527465e-07,
|
|
"logits/chosen": 1.368004560470581,
|
|
"logits/rejected": 1.6040199995040894,
|
|
"loss": 4.5291,
|
|
"step": 310
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015120752155780792,
|
|
"beta_dpo/beta_used_raw": -0.0021106062922626734,
|
|
"beta_dpo/gap_mean": 23.902956008911133,
|
|
"beta_dpo/gap_std": 41.10802459716797,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6513089005235602,
|
|
"grad_norm": 83.40555572509766,
|
|
"learning_rate": 1.647817538357072e-07,
|
|
"logits/chosen": 1.4084839820861816,
|
|
"logits/rejected": 1.5573794841766357,
|
|
"loss": 5.0402,
|
|
"step": 311
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03297141566872597,
|
|
"beta_dpo/beta_used_raw": 0.015015541575849056,
|
|
"beta_dpo/gap_mean": 25.408002853393555,
|
|
"beta_dpo/gap_std": 40.86416244506836,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6534031413612565,
|
|
"grad_norm": 73.86492156982422,
|
|
"learning_rate": 1.6306293495205755e-07,
|
|
"logits/chosen": 1.538864016532898,
|
|
"logits/rejected": 1.5750356912612915,
|
|
"loss": 4.1733,
|
|
"step": 312
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026556478813290596,
|
|
"beta_dpo/beta_used_raw": 0.014360915869474411,
|
|
"beta_dpo/gap_mean": 22.71212387084961,
|
|
"beta_dpo/gap_std": 41.899532318115234,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6554973821989529,
|
|
"grad_norm": 43.307254791259766,
|
|
"learning_rate": 1.6134877823936607e-07,
|
|
"logits/chosen": 1.4833365678787231,
|
|
"logits/rejected": 1.5087875127792358,
|
|
"loss": 4.3745,
|
|
"step": 313
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04993228241801262,
|
|
"beta_dpo/beta_used_raw": 0.04400447756052017,
|
|
"beta_dpo/gap_mean": 23.01084327697754,
|
|
"beta_dpo/gap_std": 41.7484245300293,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6575916230366492,
|
|
"grad_norm": 185.30311584472656,
|
|
"learning_rate": 1.5963937562265522e-07,
|
|
"logits/chosen": 1.5994868278503418,
|
|
"logits/rejected": 1.6039897203445435,
|
|
"loss": 3.9626,
|
|
"step": 314
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019904792308807373,
|
|
"beta_dpo/beta_used_raw": 0.0038104329723864794,
|
|
"beta_dpo/gap_mean": 24.50067710876465,
|
|
"beta_dpo/gap_std": 41.975162506103516,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6596858638743456,
|
|
"grad_norm": 60.188743591308594,
|
|
"learning_rate": 1.5793481877199943e-07,
|
|
"logits/chosen": 1.8757685422897339,
|
|
"logits/rejected": 1.802669644355774,
|
|
"loss": 4.3242,
|
|
"step": 315
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011747484095394611,
|
|
"beta_dpo/beta_used_raw": -0.011036318726837635,
|
|
"beta_dpo/gap_mean": 25.946598052978516,
|
|
"beta_dpo/gap_std": 41.94285583496094,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6617801047120419,
|
|
"grad_norm": 96.64191436767578,
|
|
"learning_rate": 1.562351990976095e-07,
|
|
"logits/chosen": 1.2265623807907104,
|
|
"logits/rejected": 1.3494703769683838,
|
|
"loss": 4.9269,
|
|
"step": 316
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01594529114663601,
|
|
"beta_dpo/beta_used_raw": -0.011434204876422882,
|
|
"beta_dpo/gap_mean": 25.075801849365234,
|
|
"beta_dpo/gap_std": 42.253684997558594,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.6638743455497382,
|
|
"grad_norm": 66.5694580078125,
|
|
"learning_rate": 1.5454060774493065e-07,
|
|
"logits/chosen": 1.4082281589508057,
|
|
"logits/rejected": 1.4196900129318237,
|
|
"loss": 4.7001,
|
|
"step": 317
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04018227756023407,
|
|
"beta_dpo/beta_used_raw": 0.030732491984963417,
|
|
"beta_dpo/gap_mean": 24.28862953186035,
|
|
"beta_dpo/gap_std": 38.98953628540039,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6659685863874345,
|
|
"grad_norm": 67.83879852294922,
|
|
"learning_rate": 1.5285113558975427e-07,
|
|
"logits/chosen": 1.5352228879928589,
|
|
"logits/rejected": 1.7299730777740479,
|
|
"loss": 3.609,
|
|
"step": 318
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.021217646077275276,
|
|
"beta_dpo/beta_used_raw": 0.006635315250605345,
|
|
"beta_dpo/gap_mean": 25.60199737548828,
|
|
"beta_dpo/gap_std": 38.8849983215332,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.6680628272251309,
|
|
"grad_norm": 51.74640655517578,
|
|
"learning_rate": 1.5116687323334464e-07,
|
|
"logits/chosen": 1.2286893129348755,
|
|
"logits/rejected": 1.462414026260376,
|
|
"loss": 4.2736,
|
|
"step": 319
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015490580350160599,
|
|
"beta_dpo/beta_used_raw": -0.004652615636587143,
|
|
"beta_dpo/gap_mean": 25.7495059967041,
|
|
"beta_dpo/gap_std": 39.36385726928711,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6701570680628273,
|
|
"grad_norm": 66.98085021972656,
|
|
"learning_rate": 1.4948791099758052e-07,
|
|
"logits/chosen": 1.9294114112854004,
|
|
"logits/rejected": 1.8916367292404175,
|
|
"loss": 4.5611,
|
|
"step": 320
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02924424409866333,
|
|
"beta_dpo/beta_used_raw": 0.012194283306598663,
|
|
"beta_dpo/gap_mean": 23.17910385131836,
|
|
"beta_dpo/gap_std": 40.0921745300293,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6722513089005235,
|
|
"grad_norm": 75.77815246582031,
|
|
"learning_rate": 1.478143389201113e-07,
|
|
"logits/chosen": 1.6986711025238037,
|
|
"logits/rejected": 1.4788739681243896,
|
|
"loss": 3.9257,
|
|
"step": 321
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01892891526222229,
|
|
"beta_dpo/beta_used_raw": 0.0015440168790519238,
|
|
"beta_dpo/gap_mean": 23.837888717651367,
|
|
"beta_dpo/gap_std": 39.51669692993164,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6743455497382199,
|
|
"grad_norm": 61.61996841430664,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 1.2796248197555542,
|
|
"logits/rejected": 1.2974272966384888,
|
|
"loss": 4.6315,
|
|
"step": 322
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02444988675415516,
|
|
"beta_dpo/beta_used_raw": -0.004915682598948479,
|
|
"beta_dpo/gap_mean": 22.961061477661133,
|
|
"beta_dpo/gap_std": 40.85033416748047,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6764397905759162,
|
|
"grad_norm": 61.971153259277344,
|
|
"learning_rate": 1.4448372394055246e-07,
|
|
"logits/chosen": 1.2066650390625,
|
|
"logits/rejected": 0.9574912190437317,
|
|
"loss": 4.1271,
|
|
"step": 323
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04017874598503113,
|
|
"beta_dpo/beta_used_raw": 0.02891341596841812,
|
|
"beta_dpo/gap_mean": 23.883920669555664,
|
|
"beta_dpo/gap_std": 40.295066833496094,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.6785340314136126,
|
|
"grad_norm": 67.87089538574219,
|
|
"learning_rate": 1.428268596492364e-07,
|
|
"logits/chosen": 1.6108598709106445,
|
|
"logits/rejected": 1.5994318723678589,
|
|
"loss": 3.8856,
|
|
"step": 324
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04682011157274246,
|
|
"beta_dpo/beta_used_raw": 0.020984284579753876,
|
|
"beta_dpo/gap_mean": 25.911354064941406,
|
|
"beta_dpo/gap_std": 41.97956085205078,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.680628272251309,
|
|
"grad_norm": 139.75146484375,
|
|
"learning_rate": 1.4117574272818386e-07,
|
|
"logits/chosen": 1.6725175380706787,
|
|
"logits/rejected": 1.797964096069336,
|
|
"loss": 4.4611,
|
|
"step": 325
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01575140468776226,
|
|
"beta_dpo/beta_used_raw": -0.0014644484035670757,
|
|
"beta_dpo/gap_mean": 23.560775756835938,
|
|
"beta_dpo/gap_std": 44.54059982299805,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.6827225130890052,
|
|
"grad_norm": 56.50615310668945,
|
|
"learning_rate": 1.3953046172178413e-07,
|
|
"logits/chosen": 1.166620135307312,
|
|
"logits/rejected": 1.4378832578659058,
|
|
"loss": 4.8138,
|
|
"step": 326
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.060598503798246384,
|
|
"beta_dpo/beta_used_raw": 0.054446715861558914,
|
|
"beta_dpo/gap_mean": 25.692852020263672,
|
|
"beta_dpo/gap_std": 43.64955520629883,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.6848167539267016,
|
|
"grad_norm": 121.72166442871094,
|
|
"learning_rate": 1.3789110486146468e-07,
|
|
"logits/chosen": 1.5548646450042725,
|
|
"logits/rejected": 1.4554078578948975,
|
|
"loss": 3.1471,
|
|
"step": 327
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015800345689058304,
|
|
"beta_dpo/beta_used_raw": -0.010136552155017853,
|
|
"beta_dpo/gap_mean": 27.02881622314453,
|
|
"beta_dpo/gap_std": 41.867454528808594,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.6869109947643979,
|
|
"grad_norm": 41.46779251098633,
|
|
"learning_rate": 1.362577600609588e-07,
|
|
"logits/chosen": 1.3131914138793945,
|
|
"logits/rejected": 1.3917593955993652,
|
|
"loss": 4.4447,
|
|
"step": 328
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013338714838027954,
|
|
"beta_dpo/beta_used_raw": -0.004739915020763874,
|
|
"beta_dpo/gap_mean": 25.284814834594727,
|
|
"beta_dpo/gap_std": 41.969566345214844,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6890052356020943,
|
|
"grad_norm": 60.99818420410156,
|
|
"learning_rate": 1.3463051491159093e-07,
|
|
"logits/chosen": 1.4903924465179443,
|
|
"logits/rejected": 1.814817190170288,
|
|
"loss": 5.1013,
|
|
"step": 329
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028788069263100624,
|
|
"beta_dpo/beta_used_raw": 0.005851927679032087,
|
|
"beta_dpo/gap_mean": 22.452590942382812,
|
|
"beta_dpo/gap_std": 44.61354064941406,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.6910994764397905,
|
|
"grad_norm": 179.97225952148438,
|
|
"learning_rate": 1.3300945667758012e-07,
|
|
"logits/chosen": 1.6997681856155396,
|
|
"logits/rejected": 1.6331228017807007,
|
|
"loss": 4.3589,
|
|
"step": 330
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028657177463173866,
|
|
"beta_dpo/beta_used_raw": 0.019459933042526245,
|
|
"beta_dpo/gap_mean": 23.764484405517578,
|
|
"beta_dpo/gap_std": 42.601539611816406,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.6931937172774869,
|
|
"grad_norm": 48.68358612060547,
|
|
"learning_rate": 1.3139467229135998e-07,
|
|
"logits/chosen": 1.441627860069275,
|
|
"logits/rejected": 1.3355118036270142,
|
|
"loss": 4.303,
|
|
"step": 331
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04341350123286247,
|
|
"beta_dpo/beta_used_raw": 0.038200560957193375,
|
|
"beta_dpo/gap_mean": 26.584733963012695,
|
|
"beta_dpo/gap_std": 41.82080078125,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.6952879581151833,
|
|
"grad_norm": 73.14544677734375,
|
|
"learning_rate": 1.2978624834891626e-07,
|
|
"logits/chosen": 1.2019636631011963,
|
|
"logits/rejected": 1.203635334968567,
|
|
"loss": 3.8045,
|
|
"step": 332
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011696412228047848,
|
|
"beta_dpo/beta_used_raw": -0.014751153066754341,
|
|
"beta_dpo/gap_mean": 23.98305892944336,
|
|
"beta_dpo/gap_std": 42.328861236572266,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.6973821989528796,
|
|
"grad_norm": 47.275943756103516,
|
|
"learning_rate": 1.281842711051438e-07,
|
|
"logits/chosen": 1.2524588108062744,
|
|
"logits/rejected": 1.1359145641326904,
|
|
"loss": 4.9502,
|
|
"step": 333
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.034421779215335846,
|
|
"beta_dpo/beta_used_raw": 0.018691357225179672,
|
|
"beta_dpo/gap_mean": 22.934709548950195,
|
|
"beta_dpo/gap_std": 41.71361541748047,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6994764397905759,
|
|
"grad_norm": 63.18965530395508,
|
|
"learning_rate": 1.2658882646922033e-07,
|
|
"logits/chosen": 1.3189448118209839,
|
|
"logits/rejected": 1.3639788627624512,
|
|
"loss": 3.9628,
|
|
"step": 334
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02628299593925476,
|
|
"beta_dpo/beta_used_raw": -0.008556408807635307,
|
|
"beta_dpo/gap_mean": 23.939117431640625,
|
|
"beta_dpo/gap_std": 43.04575729370117,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.7015706806282722,
|
|
"grad_norm": 158.49334716796875,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 1.460978627204895,
|
|
"logits/rejected": 1.5252642631530762,
|
|
"loss": 4.7371,
|
|
"step": 335
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013360177166759968,
|
|
"beta_dpo/beta_used_raw": -0.006929943338036537,
|
|
"beta_dpo/gap_mean": 21.377792358398438,
|
|
"beta_dpo/gap_std": 43.017784118652344,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.7036649214659686,
|
|
"grad_norm": 49.986663818359375,
|
|
"learning_rate": 1.2341787690142435e-07,
|
|
"logits/chosen": 1.5372939109802246,
|
|
"logits/rejected": 1.7963600158691406,
|
|
"loss": 4.9661,
|
|
"step": 336
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03531493619084358,
|
|
"beta_dpo/beta_used_raw": 0.009031134657561779,
|
|
"beta_dpo/gap_mean": 21.560890197753906,
|
|
"beta_dpo/gap_std": 42.4267578125,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.7057591623036649,
|
|
"grad_norm": 93.49922943115234,
|
|
"learning_rate": 1.2184254201795363e-07,
|
|
"logits/chosen": 1.0734624862670898,
|
|
"logits/rejected": 0.9902403950691223,
|
|
"loss": 4.5839,
|
|
"step": 337
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026785733178257942,
|
|
"beta_dpo/beta_used_raw": 0.008016789332032204,
|
|
"beta_dpo/gap_mean": 24.554834365844727,
|
|
"beta_dpo/gap_std": 42.207237243652344,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.7078534031413612,
|
|
"grad_norm": 270.1446533203125,
|
|
"learning_rate": 1.202740798300168e-07,
|
|
"logits/chosen": 1.5387308597564697,
|
|
"logits/rejected": 1.5395488739013672,
|
|
"loss": 4.6984,
|
|
"step": 338
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03279449790716171,
|
|
"beta_dpo/beta_used_raw": 0.020053986459970474,
|
|
"beta_dpo/gap_mean": 27.445066452026367,
|
|
"beta_dpo/gap_std": 43.14484405517578,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.7099476439790576,
|
|
"grad_norm": 70.26140594482422,
|
|
"learning_rate": 1.1871257444948096e-07,
|
|
"logits/chosen": 1.5849591493606567,
|
|
"logits/rejected": 1.5081734657287598,
|
|
"loss": 4.0688,
|
|
"step": 339
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013446008786559105,
|
|
"beta_dpo/beta_used_raw": -0.02429656684398651,
|
|
"beta_dpo/gap_mean": 26.41143226623535,
|
|
"beta_dpo/gap_std": 44.58018493652344,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.7120418848167539,
|
|
"grad_norm": 44.72693634033203,
|
|
"learning_rate": 1.1715810961514072e-07,
|
|
"logits/chosen": 0.8878348469734192,
|
|
"logits/rejected": 1.03843355178833,
|
|
"loss": 4.9074,
|
|
"step": 340
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02605244144797325,
|
|
"beta_dpo/beta_used_raw": -0.017769023776054382,
|
|
"beta_dpo/gap_mean": 21.7451114654541,
|
|
"beta_dpo/gap_std": 44.111759185791016,
|
|
"beta_dpo/mask_keep_frac": 0.90625,
|
|
"epoch": 0.7141361256544503,
|
|
"grad_norm": 71.31874084472656,
|
|
"learning_rate": 1.1561076868822755e-07,
|
|
"logits/chosen": 1.4821139574050903,
|
|
"logits/rejected": 1.688697338104248,
|
|
"loss": 4.741,
|
|
"step": 341
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.039557162672281265,
|
|
"beta_dpo/beta_used_raw": 0.024851929396390915,
|
|
"beta_dpo/gap_mean": 22.442163467407227,
|
|
"beta_dpo/gap_std": 42.288307189941406,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7162303664921466,
|
|
"grad_norm": 90.50724029541016,
|
|
"learning_rate": 1.1407063464793965e-07,
|
|
"logits/chosen": 1.515696406364441,
|
|
"logits/rejected": 1.6636167764663696,
|
|
"loss": 3.8821,
|
|
"step": 342
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028740962967276573,
|
|
"beta_dpo/beta_used_raw": 0.023837603628635406,
|
|
"beta_dpo/gap_mean": 22.93502426147461,
|
|
"beta_dpo/gap_std": 41.14816665649414,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7183246073298429,
|
|
"grad_norm": 84.98859405517578,
|
|
"learning_rate": 1.125377900869913e-07,
|
|
"logits/chosen": 1.6616275310516357,
|
|
"logits/rejected": 1.49526846408844,
|
|
"loss": 4.1559,
|
|
"step": 343
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03510721027851105,
|
|
"beta_dpo/beta_used_raw": 0.019166965037584305,
|
|
"beta_dpo/gap_mean": 22.779037475585938,
|
|
"beta_dpo/gap_std": 41.92900085449219,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7204188481675393,
|
|
"grad_norm": 148.84140014648438,
|
|
"learning_rate": 1.110123172071844e-07,
|
|
"logits/chosen": 1.341618537902832,
|
|
"logits/rejected": 1.4202890396118164,
|
|
"loss": 4.5051,
|
|
"step": 344
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02456255815923214,
|
|
"beta_dpo/beta_used_raw": -0.002841557841747999,
|
|
"beta_dpo/gap_mean": 23.927555084228516,
|
|
"beta_dpo/gap_std": 41.32786560058594,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.7225130890052356,
|
|
"grad_norm": 71.29635620117188,
|
|
"learning_rate": 1.09494297815e-07,
|
|
"logits/chosen": 1.6482702493667603,
|
|
"logits/rejected": 1.768045425415039,
|
|
"loss": 4.6483,
|
|
"step": 345
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03388482332229614,
|
|
"beta_dpo/beta_used_raw": 0.01795162260532379,
|
|
"beta_dpo/gap_mean": 24.46042251586914,
|
|
"beta_dpo/gap_std": 38.79722595214844,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.724607329842932,
|
|
"grad_norm": 70.42410278320312,
|
|
"learning_rate": 1.0798381331721107e-07,
|
|
"logits/chosen": 1.0491037368774414,
|
|
"logits/rejected": 1.1440801620483398,
|
|
"loss": 4.0215,
|
|
"step": 346
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028078395873308182,
|
|
"beta_dpo/beta_used_raw": 0.014503560960292816,
|
|
"beta_dpo/gap_mean": 25.07908058166504,
|
|
"beta_dpo/gap_std": 40.29609680175781,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.7267015706806282,
|
|
"grad_norm": 71.0637435913086,
|
|
"learning_rate": 1.0648094471651722e-07,
|
|
"logits/chosen": 1.4748269319534302,
|
|
"logits/rejected": 1.4847553968429565,
|
|
"loss": 4.2448,
|
|
"step": 347
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014106756076216698,
|
|
"beta_dpo/beta_used_raw": -0.01745045930147171,
|
|
"beta_dpo/gap_mean": 20.426612854003906,
|
|
"beta_dpo/gap_std": 37.750858306884766,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7287958115183246,
|
|
"grad_norm": 41.9898681640625,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 1.4606678485870361,
|
|
"logits/rejected": 1.539605736732483,
|
|
"loss": 4.7306,
|
|
"step": 348
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03836182504892349,
|
|
"beta_dpo/beta_used_raw": 0.027038609609007835,
|
|
"beta_dpo/gap_mean": 23.00733757019043,
|
|
"beta_dpo/gap_std": 40.6578369140625,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.7308900523560209,
|
|
"grad_norm": 116.7526626586914,
|
|
"learning_rate": 1.0349837717080347e-07,
|
|
"logits/chosen": 1.5413777828216553,
|
|
"logits/rejected": 1.6035332679748535,
|
|
"loss": 4.5157,
|
|
"step": 349
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03894190117716789,
|
|
"beta_dpo/beta_used_raw": 0.026062268763780594,
|
|
"beta_dpo/gap_mean": 24.171770095825195,
|
|
"beta_dpo/gap_std": 41.29063415527344,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7329842931937173,
|
|
"grad_norm": 110.67535400390625,
|
|
"learning_rate": 1.0201883817182949e-07,
|
|
"logits/chosen": 1.7762742042541504,
|
|
"logits/rejected": 1.5685731172561646,
|
|
"loss": 3.9019,
|
|
"step": 350
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.005233833100646734,
|
|
"beta_dpo/beta_used_raw": -0.016874097287654877,
|
|
"beta_dpo/gap_mean": 21.94039535522461,
|
|
"beta_dpo/gap_std": 42.503211975097656,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7350785340314137,
|
|
"grad_norm": 17.592376708984375,
|
|
"learning_rate": 1.0054723495346482e-07,
|
|
"logits/chosen": 1.4498162269592285,
|
|
"logits/rejected": 1.4771305322647095,
|
|
"loss": 5.2076,
|
|
"step": 351
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05153050646185875,
|
|
"beta_dpo/beta_used_raw": 0.0483248271048069,
|
|
"beta_dpo/gap_mean": 23.78329086303711,
|
|
"beta_dpo/gap_std": 43.25350570678711,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7371727748691099,
|
|
"grad_norm": 260.2582092285156,
|
|
"learning_rate": 9.908364643332398e-08,
|
|
"logits/chosen": 1.537024974822998,
|
|
"logits/rejected": 1.781685471534729,
|
|
"loss": 3.9455,
|
|
"step": 352
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030592329800128937,
|
|
"beta_dpo/beta_used_raw": 0.014342766255140305,
|
|
"beta_dpo/gap_mean": 25.760425567626953,
|
|
"beta_dpo/gap_std": 40.68629455566406,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7392670157068063,
|
|
"grad_norm": 90.93749237060547,
|
|
"learning_rate": 9.76281510992176e-08,
|
|
"logits/chosen": 1.2568163871765137,
|
|
"logits/rejected": 1.252407193183899,
|
|
"loss": 4.1275,
|
|
"step": 353
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.012342535890638828,
|
|
"beta_dpo/beta_used_raw": -0.01871517114341259,
|
|
"beta_dpo/gap_mean": 23.74026870727539,
|
|
"beta_dpo/gap_std": 42.1845703125,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.7413612565445026,
|
|
"grad_norm": 94.49176025390625,
|
|
"learning_rate": 9.618082700494318e-08,
|
|
"logits/chosen": 1.3868615627288818,
|
|
"logits/rejected": 1.4805989265441895,
|
|
"loss": 6.0723,
|
|
"step": 354
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04293268173933029,
|
|
"beta_dpo/beta_used_raw": 0.03582005202770233,
|
|
"beta_dpo/gap_mean": 23.41856575012207,
|
|
"beta_dpo/gap_std": 43.963043212890625,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.743455497382199,
|
|
"grad_norm": 109.3790054321289,
|
|
"learning_rate": 9.474175176609956e-08,
|
|
"logits/chosen": 1.5852292776107788,
|
|
"logits/rejected": 1.7418677806854248,
|
|
"loss": 4.0902,
|
|
"step": 355
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.017568301409482956,
|
|
"beta_dpo/beta_used_raw": -0.009104796685278416,
|
|
"beta_dpo/gap_mean": 22.803916931152344,
|
|
"beta_dpo/gap_std": 39.86484909057617,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.7455497382198953,
|
|
"grad_norm": 80.7624282836914,
|
|
"learning_rate": 9.331100255592436e-08,
|
|
"logits/chosen": 1.3812074661254883,
|
|
"logits/rejected": 1.4987109899520874,
|
|
"loss": 4.7965,
|
|
"step": 356
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028205767273902893,
|
|
"beta_dpo/beta_used_raw": 0.009551008231937885,
|
|
"beta_dpo/gap_mean": 21.426677703857422,
|
|
"beta_dpo/gap_std": 41.5255012512207,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.7476439790575916,
|
|
"grad_norm": 198.40061950683594,
|
|
"learning_rate": 9.18886561011557e-08,
|
|
"logits/chosen": 1.535756230354309,
|
|
"logits/rejected": 1.5348542928695679,
|
|
"loss": 4.8634,
|
|
"step": 357
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03555550426244736,
|
|
"beta_dpo/beta_used_raw": 0.02786700241267681,
|
|
"beta_dpo/gap_mean": 24.894935607910156,
|
|
"beta_dpo/gap_std": 42.7304801940918,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.749738219895288,
|
|
"grad_norm": 100.15424346923828,
|
|
"learning_rate": 9.047478867791731e-08,
|
|
"logits/chosen": 1.3941529989242554,
|
|
"logits/rejected": 1.3515270948410034,
|
|
"loss": 4.5553,
|
|
"step": 358
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022278830409049988,
|
|
"beta_dpo/beta_used_raw": 0.012576328590512276,
|
|
"beta_dpo/gap_mean": 25.73493194580078,
|
|
"beta_dpo/gap_std": 42.311771392822266,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7518324607329843,
|
|
"grad_norm": 63.86215591430664,
|
|
"learning_rate": 8.906947610762825e-08,
|
|
"logits/chosen": 1.4539521932601929,
|
|
"logits/rejected": 1.5561376810073853,
|
|
"loss": 4.4114,
|
|
"step": 359
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011337094008922577,
|
|
"beta_dpo/beta_used_raw": -0.001419117208570242,
|
|
"beta_dpo/gap_mean": 25.067241668701172,
|
|
"beta_dpo/gap_std": 41.38372802734375,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.7539267015706806,
|
|
"grad_norm": 33.68746566772461,
|
|
"learning_rate": 8.76727937529367e-08,
|
|
"logits/chosen": 1.602333664894104,
|
|
"logits/rejected": 1.5335873365402222,
|
|
"loss": 4.7231,
|
|
"step": 360
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05738076567649841,
|
|
"beta_dpo/beta_used_raw": 0.053437668830156326,
|
|
"beta_dpo/gap_mean": 26.05853271484375,
|
|
"beta_dpo/gap_std": 42.002994537353516,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.7560209424083769,
|
|
"grad_norm": 172.0302734375,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 1.2185293436050415,
|
|
"logits/rejected": 1.4148153066635132,
|
|
"loss": 3.4371,
|
|
"step": 361
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04557962343096733,
|
|
"beta_dpo/beta_used_raw": 0.027200574055314064,
|
|
"beta_dpo/gap_mean": 26.162132263183594,
|
|
"beta_dpo/gap_std": 42.437416076660156,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.7581151832460733,
|
|
"grad_norm": 90.24806213378906,
|
|
"learning_rate": 8.490561882286135e-08,
|
|
"logits/chosen": 1.3487976789474487,
|
|
"logits/rejected": 1.3411986827850342,
|
|
"loss": 3.4565,
|
|
"step": 362
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03907949849963188,
|
|
"beta_dpo/beta_used_raw": 0.0354890413582325,
|
|
"beta_dpo/gap_mean": 25.548728942871094,
|
|
"beta_dpo/gap_std": 42.264503479003906,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.7602094240837697,
|
|
"grad_norm": 101.23867797851562,
|
|
"learning_rate": 8.353527464267104e-08,
|
|
"logits/chosen": 1.5559055805206299,
|
|
"logits/rejected": 1.4353469610214233,
|
|
"loss": 3.6541,
|
|
"step": 363
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019362712278962135,
|
|
"beta_dpo/beta_used_raw": -0.005188856739550829,
|
|
"beta_dpo/gap_mean": 24.893081665039062,
|
|
"beta_dpo/gap_std": 41.87436294555664,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.762303664921466,
|
|
"grad_norm": 84.14205932617188,
|
|
"learning_rate": 8.217385746050742e-08,
|
|
"logits/chosen": 1.8355655670166016,
|
|
"logits/rejected": 1.5974853038787842,
|
|
"loss": 4.7009,
|
|
"step": 364
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02898905798792839,
|
|
"beta_dpo/beta_used_raw": 0.02243414893746376,
|
|
"beta_dpo/gap_mean": 23.674781799316406,
|
|
"beta_dpo/gap_std": 41.810665130615234,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.7643979057591623,
|
|
"grad_norm": 77.57154083251953,
|
|
"learning_rate": 8.082144028504231e-08,
|
|
"logits/chosen": 1.512800693511963,
|
|
"logits/rejected": 1.7196999788284302,
|
|
"loss": 4.3814,
|
|
"step": 365
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030239790678024292,
|
|
"beta_dpo/beta_used_raw": 0.0004999339580535889,
|
|
"beta_dpo/gap_mean": 25.40928840637207,
|
|
"beta_dpo/gap_std": 41.03025817871094,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7664921465968586,
|
|
"grad_norm": 41.87646484375,
|
|
"learning_rate": 7.947809564230445e-08,
|
|
"logits/chosen": 1.4762005805969238,
|
|
"logits/rejected": 1.3744585514068604,
|
|
"loss": 4.3359,
|
|
"step": 366
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028317891061306,
|
|
"beta_dpo/beta_used_raw": 0.008798494935035706,
|
|
"beta_dpo/gap_mean": 25.357412338256836,
|
|
"beta_dpo/gap_std": 39.42461013793945,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.768586387434555,
|
|
"grad_norm": 70.21991729736328,
|
|
"learning_rate": 7.814389557179016e-08,
|
|
"logits/chosen": 1.8320472240447998,
|
|
"logits/rejected": 1.5733611583709717,
|
|
"loss": 3.8554,
|
|
"step": 367
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.040644265711307526,
|
|
"beta_dpo/beta_used_raw": 0.029841335490345955,
|
|
"beta_dpo/gap_mean": 27.69914436340332,
|
|
"beta_dpo/gap_std": 39.52192687988281,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7706806282722513,
|
|
"grad_norm": 51.4883918762207,
|
|
"learning_rate": 7.681891162260015e-08,
|
|
"logits/chosen": 1.7997376918792725,
|
|
"logits/rejected": 1.644882321357727,
|
|
"loss": 3.7779,
|
|
"step": 368
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007025650702416897,
|
|
"beta_dpo/beta_used_raw": -0.022717807441949844,
|
|
"beta_dpo/gap_mean": 26.59383201599121,
|
|
"beta_dpo/gap_std": 39.74239730834961,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7727748691099476,
|
|
"grad_norm": 31.33010482788086,
|
|
"learning_rate": 7.550321484960251e-08,
|
|
"logits/chosen": 1.567758560180664,
|
|
"logits/rejected": 1.5652072429656982,
|
|
"loss": 5.0706,
|
|
"step": 369
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03189126402139664,
|
|
"beta_dpo/beta_used_raw": -0.0016455072909593582,
|
|
"beta_dpo/gap_mean": 25.960403442382812,
|
|
"beta_dpo/gap_std": 41.779354095458984,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.774869109947644,
|
|
"grad_norm": 61.498207092285156,
|
|
"learning_rate": 7.419687580962222e-08,
|
|
"logits/chosen": 1.4514704942703247,
|
|
"logits/rejected": 1.6543275117874146,
|
|
"loss": 4.0113,
|
|
"step": 370
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02217245101928711,
|
|
"beta_dpo/beta_used_raw": 0.0033044693991541862,
|
|
"beta_dpo/gap_mean": 22.760690689086914,
|
|
"beta_dpo/gap_std": 41.05923080444336,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.7769633507853403,
|
|
"grad_norm": 52.41913604736328,
|
|
"learning_rate": 7.289996455765748e-08,
|
|
"logits/chosen": 0.8454320430755615,
|
|
"logits/rejected": 1.0241940021514893,
|
|
"loss": 4.3701,
|
|
"step": 371
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06150563433766365,
|
|
"beta_dpo/beta_used_raw": 0.060376305133104324,
|
|
"beta_dpo/gap_mean": 26.305227279663086,
|
|
"beta_dpo/gap_std": 40.897579193115234,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.7790575916230367,
|
|
"grad_norm": 95.63087463378906,
|
|
"learning_rate": 7.161255064312283e-08,
|
|
"logits/chosen": 1.3337714672088623,
|
|
"logits/rejected": 1.200531244277954,
|
|
"loss": 3.4199,
|
|
"step": 372
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.017467252910137177,
|
|
"beta_dpo/beta_used_raw": -0.0006841365247964859,
|
|
"beta_dpo/gap_mean": 27.40023422241211,
|
|
"beta_dpo/gap_std": 41.40983963012695,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7811518324607329,
|
|
"grad_norm": 65.07406616210938,
|
|
"learning_rate": 7.033470310611945e-08,
|
|
"logits/chosen": 1.5559697151184082,
|
|
"logits/rejected": 1.267425537109375,
|
|
"loss": 4.8366,
|
|
"step": 373
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.020001672208309174,
|
|
"beta_dpo/beta_used_raw": -0.006743720732629299,
|
|
"beta_dpo/gap_mean": 25.95492172241211,
|
|
"beta_dpo/gap_std": 42.976318359375,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.7832460732984293,
|
|
"grad_norm": 43.096229553222656,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 1.5863916873931885,
|
|
"logits/rejected": 1.7011443376541138,
|
|
"loss": 4.5082,
|
|
"step": 374
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.012499826960265636,
|
|
"beta_dpo/beta_used_raw": -0.00970209576189518,
|
|
"beta_dpo/gap_mean": 23.713022232055664,
|
|
"beta_dpo/gap_std": 42.88922119140625,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7853403141361257,
|
|
"grad_norm": 35.47541427612305,
|
|
"learning_rate": 6.780798075635675e-08,
|
|
"logits/chosen": 1.4474728107452393,
|
|
"logits/rejected": 1.3061145544052124,
|
|
"loss": 4.878,
|
|
"step": 375
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.032169777899980545,
|
|
"beta_dpo/beta_used_raw": 0.023137152194976807,
|
|
"beta_dpo/gap_mean": 23.426164627075195,
|
|
"beta_dpo/gap_std": 42.51594924926758,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.787434554973822,
|
|
"grad_norm": 95.04769897460938,
|
|
"learning_rate": 6.655924144404906e-08,
|
|
"logits/chosen": 1.573278546333313,
|
|
"logits/rejected": 1.815221905708313,
|
|
"loss": 4.1144,
|
|
"step": 376
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030707208439707756,
|
|
"beta_dpo/beta_used_raw": 0.005986468866467476,
|
|
"beta_dpo/gap_mean": 23.08704376220703,
|
|
"beta_dpo/gap_std": 41.96858596801758,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.7895287958115184,
|
|
"grad_norm": 90.52848052978516,
|
|
"learning_rate": 6.532033950290885e-08,
|
|
"logits/chosen": 1.5606698989868164,
|
|
"logits/rejected": 1.6266758441925049,
|
|
"loss": 4.5857,
|
|
"step": 377
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0293353870511055,
|
|
"beta_dpo/beta_used_raw": 0.019241416826844215,
|
|
"beta_dpo/gap_mean": 21.17989730834961,
|
|
"beta_dpo/gap_std": 42.731689453125,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.7916230366492146,
|
|
"grad_norm": 168.0338897705078,
|
|
"learning_rate": 6.409134137148736e-08,
|
|
"logits/chosen": 1.567497968673706,
|
|
"logits/rejected": 1.6306406259536743,
|
|
"loss": 4.6972,
|
|
"step": 378
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.021104762330651283,
|
|
"beta_dpo/beta_used_raw": -0.0026983979623764753,
|
|
"beta_dpo/gap_mean": 22.86931610107422,
|
|
"beta_dpo/gap_std": 42.299137115478516,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.793717277486911,
|
|
"grad_norm": 53.448760986328125,
|
|
"learning_rate": 6.28723129572247e-08,
|
|
"logits/chosen": 1.6663786172866821,
|
|
"logits/rejected": 1.593047022819519,
|
|
"loss": 4.8597,
|
|
"step": 379
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014543892815709114,
|
|
"beta_dpo/beta_used_raw": 0.004879960790276527,
|
|
"beta_dpo/gap_mean": 23.742534637451172,
|
|
"beta_dpo/gap_std": 42.56512451171875,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.7958115183246073,
|
|
"grad_norm": 51.2754020690918,
|
|
"learning_rate": 6.166331963291519e-08,
|
|
"logits/chosen": 1.9557546377182007,
|
|
"logits/rejected": 1.7796638011932373,
|
|
"loss": 4.7633,
|
|
"step": 380
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0125275244936347,
|
|
"beta_dpo/beta_used_raw": -0.0011020167730748653,
|
|
"beta_dpo/gap_mean": 24.683391571044922,
|
|
"beta_dpo/gap_std": 41.60409927368164,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.7979057591623037,
|
|
"grad_norm": 35.46774673461914,
|
|
"learning_rate": 6.046442623320145e-08,
|
|
"logits/chosen": 1.191896677017212,
|
|
"logits/rejected": 1.2276725769042969,
|
|
"loss": 5.1082,
|
|
"step": 381
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03925769403576851,
|
|
"beta_dpo/beta_used_raw": 0.03246406838297844,
|
|
"beta_dpo/gap_mean": 26.57273292541504,
|
|
"beta_dpo/gap_std": 40.347042083740234,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8,
|
|
"grad_norm": 91.7632064819336,
|
|
"learning_rate": 5.9275697051098275e-08,
|
|
"logits/chosen": 1.5332963466644287,
|
|
"logits/rejected": 1.5386418104171753,
|
|
"loss": 3.9613,
|
|
"step": 382
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026574671268463135,
|
|
"beta_dpo/beta_used_raw": 0.0041369106620550156,
|
|
"beta_dpo/gap_mean": 27.20392608642578,
|
|
"beta_dpo/gap_std": 41.187217712402344,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8020942408376963,
|
|
"grad_norm": 90.72322082519531,
|
|
"learning_rate": 5.809719583454414e-08,
|
|
"logits/chosen": 1.213146448135376,
|
|
"logits/rejected": 1.4346027374267578,
|
|
"loss": 4.2591,
|
|
"step": 383
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.018568674102425575,
|
|
"beta_dpo/beta_used_raw": -0.005661527160555124,
|
|
"beta_dpo/gap_mean": 23.266735076904297,
|
|
"beta_dpo/gap_std": 40.896419525146484,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8041884816753927,
|
|
"grad_norm": 97.89303588867188,
|
|
"learning_rate": 5.6928985782982524e-08,
|
|
"logits/chosen": 1.4912177324295044,
|
|
"logits/rejected": 1.8480693101882935,
|
|
"loss": 4.8446,
|
|
"step": 384
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.025455057621002197,
|
|
"beta_dpo/beta_used_raw": 0.020301831886172295,
|
|
"beta_dpo/gap_mean": 22.282352447509766,
|
|
"beta_dpo/gap_std": 40.13404846191406,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.806282722513089,
|
|
"grad_norm": 87.30133056640625,
|
|
"learning_rate": 5.57711295439732e-08,
|
|
"logits/chosen": 1.6445767879486084,
|
|
"logits/rejected": 1.6937466859817505,
|
|
"loss": 4.6559,
|
|
"step": 385
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.07003487646579742,
|
|
"beta_dpo/beta_used_raw": 0.05945579335093498,
|
|
"beta_dpo/gap_mean": 25.810016632080078,
|
|
"beta_dpo/gap_std": 40.25865936279297,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8083769633507853,
|
|
"grad_norm": 74.50102233886719,
|
|
"learning_rate": 5.4623689209832484e-08,
|
|
"logits/chosen": 1.644815444946289,
|
|
"logits/rejected": 1.745370864868164,
|
|
"loss": 3.0306,
|
|
"step": 386
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02559298276901245,
|
|
"beta_dpo/beta_used_raw": -0.001606471836566925,
|
|
"beta_dpo/gap_mean": 25.212953567504883,
|
|
"beta_dpo/gap_std": 42.34771728515625,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.8104712041884817,
|
|
"grad_norm": 97.23846435546875,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 1.5523253679275513,
|
|
"logits/rejected": 1.617262363433838,
|
|
"loss": 4.3258,
|
|
"step": 387
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019050609320402145,
|
|
"beta_dpo/beta_used_raw": -0.013357133604586124,
|
|
"beta_dpo/gap_mean": 24.462581634521484,
|
|
"beta_dpo/gap_std": 42.33854675292969,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.812565445026178,
|
|
"grad_norm": 115.40874481201172,
|
|
"learning_rate": 5.2360301829254745e-08,
|
|
"logits/chosen": 1.898555040359497,
|
|
"logits/rejected": 1.8352364301681519,
|
|
"loss": 4.8619,
|
|
"step": 388
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031168397516012192,
|
|
"beta_dpo/beta_used_raw": 0.01580439880490303,
|
|
"beta_dpo/gap_mean": 24.110021591186523,
|
|
"beta_dpo/gap_std": 41.419647216796875,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8146596858638744,
|
|
"grad_norm": 114.02845001220703,
|
|
"learning_rate": 5.1244476161413806e-08,
|
|
"logits/chosen": 1.7501044273376465,
|
|
"logits/rejected": 1.5219378471374512,
|
|
"loss": 4.4305,
|
|
"step": 389
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.037911996245384216,
|
|
"beta_dpo/beta_used_raw": 0.03245529904961586,
|
|
"beta_dpo/gap_mean": 24.70856475830078,
|
|
"beta_dpo/gap_std": 42.322147369384766,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.8167539267015707,
|
|
"grad_norm": 107.86334228515625,
|
|
"learning_rate": 5.013930914912476e-08,
|
|
"logits/chosen": 1.4109928607940674,
|
|
"logits/rejected": 1.5585747957229614,
|
|
"loss": 3.9697,
|
|
"step": 390
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01184625644236803,
|
|
"beta_dpo/beta_used_raw": -0.0196970384567976,
|
|
"beta_dpo/gap_mean": 25.497241973876953,
|
|
"beta_dpo/gap_std": 39.925994873046875,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.818848167539267,
|
|
"grad_norm": 36.966331481933594,
|
|
"learning_rate": 4.904486005914027e-08,
|
|
"logits/chosen": 1.4992268085479736,
|
|
"logits/rejected": 1.4016600847244263,
|
|
"loss": 4.8753,
|
|
"step": 391
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031304676085710526,
|
|
"beta_dpo/beta_used_raw": 0.019566738978028297,
|
|
"beta_dpo/gap_mean": 29.37858772277832,
|
|
"beta_dpo/gap_std": 39.760597229003906,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8209424083769633,
|
|
"grad_norm": 57.252769470214844,
|
|
"learning_rate": 4.796118758344353e-08,
|
|
"logits/chosen": 1.1666127443313599,
|
|
"logits/rejected": 1.1494946479797363,
|
|
"loss": 3.3712,
|
|
"step": 392
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02446107193827629,
|
|
"beta_dpo/beta_used_raw": 0.00717612449079752,
|
|
"beta_dpo/gap_mean": 27.458255767822266,
|
|
"beta_dpo/gap_std": 40.529483795166016,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8230366492146597,
|
|
"grad_norm": 41.9975700378418,
|
|
"learning_rate": 4.688834983610082e-08,
|
|
"logits/chosen": 1.3543047904968262,
|
|
"logits/rejected": 1.1334538459777832,
|
|
"loss": 4.502,
|
|
"step": 393
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.013751739636063576,
|
|
"beta_dpo/beta_used_raw": -0.013827711343765259,
|
|
"beta_dpo/gap_mean": 25.792306900024414,
|
|
"beta_dpo/gap_std": 41.532981872558594,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8251308900523561,
|
|
"grad_norm": 38.37825012207031,
|
|
"learning_rate": 4.582640435014459e-08,
|
|
"logits/chosen": 1.755271077156067,
|
|
"logits/rejected": 1.836128830909729,
|
|
"loss": 4.8139,
|
|
"step": 394
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03642860800027847,
|
|
"beta_dpo/beta_used_raw": 0.02762317843735218,
|
|
"beta_dpo/gap_mean": 22.787147521972656,
|
|
"beta_dpo/gap_std": 39.04203414916992,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.8272251308900523,
|
|
"grad_norm": 76.76990509033203,
|
|
"learning_rate": 4.477540807448832e-08,
|
|
"logits/chosen": 1.3757838010787964,
|
|
"logits/rejected": 1.4005060195922852,
|
|
"loss": 3.6736,
|
|
"step": 395
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.016622822731733322,
|
|
"beta_dpo/beta_used_raw": -0.0027820090763270855,
|
|
"beta_dpo/gap_mean": 23.37274932861328,
|
|
"beta_dpo/gap_std": 39.84015655517578,
|
|
"beta_dpo/mask_keep_frac": 0.625,
|
|
"epoch": 0.8293193717277487,
|
|
"grad_norm": 105.81222534179688,
|
|
"learning_rate": 4.373541737087263e-08,
|
|
"logits/chosen": 1.650363802909851,
|
|
"logits/rejected": 1.6201927661895752,
|
|
"loss": 5.2625,
|
|
"step": 396
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022990621626377106,
|
|
"beta_dpo/beta_used_raw": -0.0033985301852226257,
|
|
"beta_dpo/gap_mean": 23.020658493041992,
|
|
"beta_dpo/gap_std": 39.6679573059082,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.831413612565445,
|
|
"grad_norm": 91.26580047607422,
|
|
"learning_rate": 4.270648801084295e-08,
|
|
"logits/chosen": 1.4977787733078003,
|
|
"logits/rejected": 1.5780669450759888,
|
|
"loss": 4.5482,
|
|
"step": 397
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02246342971920967,
|
|
"beta_dpo/beta_used_raw": 0.007876865565776825,
|
|
"beta_dpo/gap_mean": 21.515539169311523,
|
|
"beta_dpo/gap_std": 42.26047134399414,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8335078534031414,
|
|
"grad_norm": 80.77655029296875,
|
|
"learning_rate": 4.168867517275806e-08,
|
|
"logits/chosen": 1.3882070779800415,
|
|
"logits/rejected": 1.648177146911621,
|
|
"loss": 4.6146,
|
|
"step": 398
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030784644186496735,
|
|
"beta_dpo/beta_used_raw": 0.016542304307222366,
|
|
"beta_dpo/gap_mean": 22.006698608398438,
|
|
"beta_dpo/gap_std": 42.646385192871094,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8356020942408376,
|
|
"grad_norm": 157.0540313720703,
|
|
"learning_rate": 4.0682033438831584e-08,
|
|
"logits/chosen": 1.6338375806808472,
|
|
"logits/rejected": 1.731345772743225,
|
|
"loss": 4.4317,
|
|
"step": 399
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04295587167143822,
|
|
"beta_dpo/beta_used_raw": 0.029314618557691574,
|
|
"beta_dpo/gap_mean": 21.83963394165039,
|
|
"beta_dpo/gap_std": 39.70830154418945,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.837696335078534,
|
|
"grad_norm": 134.598388671875,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 1.497736930847168,
|
|
"logits/rejected": 1.427824854850769,
|
|
"loss": 4.2926,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"eval_beta_dpo/beta_used": 0.03352755680680275,
|
|
"eval_beta_dpo/beta_used_raw": 0.014615737833082676,
|
|
"eval_beta_dpo/gap_mean": 23.013574600219727,
|
|
"eval_beta_dpo/gap_std": 39.912696838378906,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": 1.5097905397415161,
|
|
"eval_logits/rejected": 1.546280860900879,
|
|
"eval_loss": 0.5896762609481812,
|
|
"eval_runtime": 92.7086,
|
|
"eval_samples_per_second": 21.573,
|
|
"eval_steps_per_second": 1.348,
|
|
"step": 400
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031112950295209885,
|
|
"beta_dpo/beta_used_raw": 0.018789593130350113,
|
|
"beta_dpo/gap_mean": 25.120380401611328,
|
|
"beta_dpo/gap_std": 39.081172943115234,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8397905759162304,
|
|
"grad_norm": 94.05326843261719,
|
|
"learning_rate": 3.8702478614051345e-08,
|
|
"logits/chosen": 1.4719927310943604,
|
|
"logits/rejected": 1.6373367309570312,
|
|
"loss": 4.188,
|
|
"step": 401
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02975967340171337,
|
|
"beta_dpo/beta_used_raw": 0.020481513813138008,
|
|
"beta_dpo/gap_mean": 25.850921630859375,
|
|
"beta_dpo/gap_std": 40.83582305908203,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8418848167539267,
|
|
"grad_norm": 68.26434326171875,
|
|
"learning_rate": 3.772967168071517e-08,
|
|
"logits/chosen": 1.4517847299575806,
|
|
"logits/rejected": 1.3798197507858276,
|
|
"loss": 4.0377,
|
|
"step": 402
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.033130984753370285,
|
|
"beta_dpo/beta_used_raw": 0.026949459686875343,
|
|
"beta_dpo/gap_mean": 27.959623336791992,
|
|
"beta_dpo/gap_std": 38.593902587890625,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.8439790575916231,
|
|
"grad_norm": 56.32769012451172,
|
|
"learning_rate": 3.676824816087978e-08,
|
|
"logits/chosen": 1.6041405200958252,
|
|
"logits/rejected": 1.634192705154419,
|
|
"loss": 3.6404,
|
|
"step": 403
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014831377193331718,
|
|
"beta_dpo/beta_used_raw": -0.013218341395258904,
|
|
"beta_dpo/gap_mean": 29.18805694580078,
|
|
"beta_dpo/gap_std": 39.73085021972656,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8460732984293193,
|
|
"grad_norm": 27.067461013793945,
|
|
"learning_rate": 3.581825961277074e-08,
|
|
"logits/chosen": 1.493395209312439,
|
|
"logits/rejected": 1.3758317232131958,
|
|
"loss": 4.6703,
|
|
"step": 404
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03309793025255203,
|
|
"beta_dpo/beta_used_raw": 0.011897753924131393,
|
|
"beta_dpo/gap_mean": 26.401506423950195,
|
|
"beta_dpo/gap_std": 40.610694885253906,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8481675392670157,
|
|
"grad_norm": 67.01002502441406,
|
|
"learning_rate": 3.487975698139084e-08,
|
|
"logits/chosen": 1.5461680889129639,
|
|
"logits/rejected": 1.6689039468765259,
|
|
"loss": 3.8802,
|
|
"step": 405
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007973221130669117,
|
|
"beta_dpo/beta_used_raw": -0.02517438679933548,
|
|
"beta_dpo/gap_mean": 23.499588012695312,
|
|
"beta_dpo/gap_std": 41.003013610839844,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8502617801047121,
|
|
"grad_norm": 30.062997817993164,
|
|
"learning_rate": 3.3952790595787986e-08,
|
|
"logits/chosen": 1.3487330675125122,
|
|
"logits/rejected": 1.2552706003189087,
|
|
"loss": 5.0999,
|
|
"step": 406
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.018556706607341766,
|
|
"beta_dpo/beta_used_raw": 0.006646966561675072,
|
|
"beta_dpo/gap_mean": 23.741344451904297,
|
|
"beta_dpo/gap_std": 42.31064987182617,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.8523560209424084,
|
|
"grad_norm": 77.14202880859375,
|
|
"learning_rate": 3.303741016635614e-08,
|
|
"logits/chosen": 1.38568913936615,
|
|
"logits/rejected": 1.1631001234054565,
|
|
"loss": 4.6002,
|
|
"step": 407
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04947693645954132,
|
|
"beta_dpo/beta_used_raw": 0.024193253368139267,
|
|
"beta_dpo/gap_mean": 23.99530029296875,
|
|
"beta_dpo/gap_std": 40.86692810058594,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8544502617801047,
|
|
"grad_norm": 144.13487243652344,
|
|
"learning_rate": 3.2133664782169944e-08,
|
|
"logits/chosen": 1.0143358707427979,
|
|
"logits/rejected": 1.08698308467865,
|
|
"loss": 4.4916,
|
|
"step": 408
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01759941130876541,
|
|
"beta_dpo/beta_used_raw": -0.006128270179033279,
|
|
"beta_dpo/gap_mean": 25.6751708984375,
|
|
"beta_dpo/gap_std": 40.675594329833984,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.856544502617801,
|
|
"grad_norm": 66.57832336425781,
|
|
"learning_rate": 3.12416029083514e-08,
|
|
"logits/chosen": 1.6948835849761963,
|
|
"logits/rejected": 1.8402390480041504,
|
|
"loss": 4.5883,
|
|
"step": 409
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.032623328268527985,
|
|
"beta_dpo/beta_used_raw": 0.020593255758285522,
|
|
"beta_dpo/gap_mean": 23.831777572631836,
|
|
"beta_dpo/gap_std": 41.50251770019531,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8586387434554974,
|
|
"grad_norm": 108.39352416992188,
|
|
"learning_rate": 3.036127238347164e-08,
|
|
"logits/chosen": 1.7509747743606567,
|
|
"logits/rejected": 1.7223472595214844,
|
|
"loss": 4.1702,
|
|
"step": 410
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.058568619191646576,
|
|
"beta_dpo/beta_used_raw": 0.03209678828716278,
|
|
"beta_dpo/gap_mean": 26.16048812866211,
|
|
"beta_dpo/gap_std": 41.54467010498047,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.8607329842931937,
|
|
"grad_norm": 156.947265625,
|
|
"learning_rate": 2.9492720416985e-08,
|
|
"logits/chosen": 1.5110323429107666,
|
|
"logits/rejected": 1.5965254306793213,
|
|
"loss": 3.4559,
|
|
"step": 411
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.023946017026901245,
|
|
"beta_dpo/beta_used_raw": 0.0037475526332855225,
|
|
"beta_dpo/gap_mean": 25.7176456451416,
|
|
"beta_dpo/gap_std": 42.220760345458984,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.86282722513089,
|
|
"grad_norm": 45.27512741088867,
|
|
"learning_rate": 2.863599358669755e-08,
|
|
"logits/chosen": 1.275376796722412,
|
|
"logits/rejected": 1.481441855430603,
|
|
"loss": 4.4762,
|
|
"step": 412
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.034958455711603165,
|
|
"beta_dpo/beta_used_raw": 0.017024677246809006,
|
|
"beta_dpo/gap_mean": 23.186616897583008,
|
|
"beta_dpo/gap_std": 41.46014404296875,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.8649214659685864,
|
|
"grad_norm": 124.6803970336914,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 1.6735713481903076,
|
|
"logits/rejected": 1.6593836545944214,
|
|
"loss": 4.0813,
|
|
"step": 413
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026967719197273254,
|
|
"beta_dpo/beta_used_raw": -0.0016478030011057854,
|
|
"beta_dpo/gap_mean": 23.66002655029297,
|
|
"beta_dpo/gap_std": 41.970882415771484,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8670157068062827,
|
|
"grad_norm": 153.2272491455078,
|
|
"learning_rate": 2.6958198472749717e-08,
|
|
"logits/chosen": 1.6639155149459839,
|
|
"logits/rejected": 1.536154866218567,
|
|
"loss": 4.332,
|
|
"step": 414
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04223136603832245,
|
|
"beta_dpo/beta_used_raw": 0.038000062108039856,
|
|
"beta_dpo/gap_mean": 25.557510375976562,
|
|
"beta_dpo/gap_std": 42.886444091796875,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8691099476439791,
|
|
"grad_norm": 165.83090209960938,
|
|
"learning_rate": 2.613722016414943e-08,
|
|
"logits/chosen": 1.1066584587097168,
|
|
"logits/rejected": 1.1601117849349976,
|
|
"loss": 4.1273,
|
|
"step": 415
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.028374191373586655,
|
|
"beta_dpo/beta_used_raw": 0.01894223876297474,
|
|
"beta_dpo/gap_mean": 28.670167922973633,
|
|
"beta_dpo/gap_std": 42.47052001953125,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8712041884816754,
|
|
"grad_norm": 66.93905639648438,
|
|
"learning_rate": 2.5328246937043525e-08,
|
|
"logits/chosen": 1.5560580492019653,
|
|
"logits/rejected": 1.6145976781845093,
|
|
"loss": 3.9753,
|
|
"step": 416
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.043879032135009766,
|
|
"beta_dpo/beta_used_raw": 0.02182396501302719,
|
|
"beta_dpo/gap_mean": 26.690717697143555,
|
|
"beta_dpo/gap_std": 41.90580368041992,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8732984293193717,
|
|
"grad_norm": 92.42415618896484,
|
|
"learning_rate": 2.4531322174210973e-08,
|
|
"logits/chosen": 1.2475701570510864,
|
|
"logits/rejected": 1.3210117816925049,
|
|
"loss": 4.1788,
|
|
"step": 417
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03364454209804535,
|
|
"beta_dpo/beta_used_raw": 0.005448690615594387,
|
|
"beta_dpo/gap_mean": 25.629501342773438,
|
|
"beta_dpo/gap_std": 40.84889602661133,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.875392670157068,
|
|
"grad_norm": 60.8049430847168,
|
|
"learning_rate": 2.3746488612308295e-08,
|
|
"logits/chosen": 1.3086042404174805,
|
|
"logits/rejected": 1.1799873113632202,
|
|
"loss": 3.8843,
|
|
"step": 418
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.040316130965948105,
|
|
"beta_dpo/beta_used_raw": 0.024059785529971123,
|
|
"beta_dpo/gap_mean": 29.127347946166992,
|
|
"beta_dpo/gap_std": 42.379608154296875,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8774869109947644,
|
|
"grad_norm": 65.74553680419922,
|
|
"learning_rate": 2.297378833957761e-08,
|
|
"logits/chosen": 1.9729444980621338,
|
|
"logits/rejected": 1.894222617149353,
|
|
"loss": 3.9346,
|
|
"step": 419
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030392050743103027,
|
|
"beta_dpo/beta_used_raw": 0.015165509656071663,
|
|
"beta_dpo/gap_mean": 29.2987060546875,
|
|
"beta_dpo/gap_std": 43.514549255371094,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8795811518324608,
|
|
"grad_norm": 112.77594757080078,
|
|
"learning_rate": 2.2213262793589482e-08,
|
|
"logits/chosen": 1.2061651945114136,
|
|
"logits/rejected": 1.2414170503616333,
|
|
"loss": 4.1674,
|
|
"step": 420
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.037078239023685455,
|
|
"beta_dpo/beta_used_raw": 0.006579352542757988,
|
|
"beta_dpo/gap_mean": 30.2874698638916,
|
|
"beta_dpo/gap_std": 41.12751007080078,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.881675392670157,
|
|
"grad_norm": 50.55178451538086,
|
|
"learning_rate": 2.1464952759020856e-08,
|
|
"logits/chosen": 1.381372332572937,
|
|
"logits/rejected": 1.1805065870285034,
|
|
"loss": 3.5599,
|
|
"step": 421
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.027763448655605316,
|
|
"beta_dpo/beta_used_raw": 0.0037402785383164883,
|
|
"beta_dpo/gap_mean": 26.626432418823242,
|
|
"beta_dpo/gap_std": 42.52971649169922,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.8837696335078534,
|
|
"grad_norm": 80.29391479492188,
|
|
"learning_rate": 2.07288983654679e-08,
|
|
"logits/chosen": 1.6077336072921753,
|
|
"logits/rejected": 1.651180624961853,
|
|
"loss": 4.4944,
|
|
"step": 422
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03539786487817764,
|
|
"beta_dpo/beta_used_raw": 0.004768058191984892,
|
|
"beta_dpo/gap_mean": 26.751209259033203,
|
|
"beta_dpo/gap_std": 42.32147979736328,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.8858638743455497,
|
|
"grad_norm": 90.14205169677734,
|
|
"learning_rate": 2.0005139085293942e-08,
|
|
"logits/chosen": 1.4197824001312256,
|
|
"logits/rejected": 1.5385533571243286,
|
|
"loss": 4.5795,
|
|
"step": 423
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01704780012369156,
|
|
"beta_dpo/beta_used_raw": 0.012394540943205357,
|
|
"beta_dpo/gap_mean": 27.506437301635742,
|
|
"beta_dpo/gap_std": 42.84564208984375,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8879581151832461,
|
|
"grad_norm": 52.7910041809082,
|
|
"learning_rate": 1.9293713731512673e-08,
|
|
"logits/chosen": 1.3633639812469482,
|
|
"logits/rejected": 1.1960315704345703,
|
|
"loss": 4.4306,
|
|
"step": 424
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0063092270866036415,
|
|
"beta_dpo/beta_used_raw": -0.041274845600128174,
|
|
"beta_dpo/gap_mean": 27.02210807800293,
|
|
"beta_dpo/gap_std": 40.46715545654297,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.8900523560209425,
|
|
"grad_norm": 22.821779251098633,
|
|
"learning_rate": 1.8594660455706763e-08,
|
|
"logits/chosen": 1.476675033569336,
|
|
"logits/rejected": 1.6865489482879639,
|
|
"loss": 4.8895,
|
|
"step": 425
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.033457279205322266,
|
|
"beta_dpo/beta_used_raw": 0.02916746772825718,
|
|
"beta_dpo/gap_mean": 24.161306381225586,
|
|
"beta_dpo/gap_std": 39.77753448486328,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.8921465968586387,
|
|
"grad_norm": 88.06718444824219,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 1.2509461641311646,
|
|
"logits/rejected": 1.4100229740142822,
|
|
"loss": 3.9195,
|
|
"step": 426
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04828907176852226,
|
|
"beta_dpo/beta_used_raw": 0.03954368457198143,
|
|
"beta_dpo/gap_mean": 27.65555191040039,
|
|
"beta_dpo/gap_std": 40.21341323852539,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.8942408376963351,
|
|
"grad_norm": 90.2916030883789,
|
|
"learning_rate": 1.7233819424956247e-08,
|
|
"logits/chosen": 1.3937939405441284,
|
|
"logits/rejected": 1.3810914754867554,
|
|
"loss": 3.5748,
|
|
"step": 427
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04655870795249939,
|
|
"beta_dpo/beta_used_raw": 0.024588048458099365,
|
|
"beta_dpo/gap_mean": 32.9439582824707,
|
|
"beta_dpo/gap_std": 39.263301849365234,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8963350785340314,
|
|
"grad_norm": 68.10398864746094,
|
|
"learning_rate": 1.6572104647786245e-08,
|
|
"logits/chosen": 1.752288818359375,
|
|
"logits/rejected": 1.9130034446716309,
|
|
"loss": 3.5059,
|
|
"step": 428
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02215776965022087,
|
|
"beta_dpo/beta_used_raw": -0.016678031533956528,
|
|
"beta_dpo/gap_mean": 31.625703811645508,
|
|
"beta_dpo/gap_std": 43.56167984008789,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.8984293193717278,
|
|
"grad_norm": 60.726661682128906,
|
|
"learning_rate": 1.5922907900227017e-08,
|
|
"logits/chosen": 1.458854079246521,
|
|
"logits/rejected": 1.4256439208984375,
|
|
"loss": 4.6525,
|
|
"step": 429
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02091900259256363,
|
|
"beta_dpo/beta_used_raw": 0.0070409020408988,
|
|
"beta_dpo/gap_mean": 27.687143325805664,
|
|
"beta_dpo/gap_std": 44.989070892333984,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.900523560209424,
|
|
"grad_norm": 140.8614959716797,
|
|
"learning_rate": 1.5286263996730026e-08,
|
|
"logits/chosen": 1.4701473712921143,
|
|
"logits/rejected": 1.5857133865356445,
|
|
"loss": 4.5919,
|
|
"step": 430
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014125513844192028,
|
|
"beta_dpo/beta_used_raw": -0.018212314695119858,
|
|
"beta_dpo/gap_mean": 24.097793579101562,
|
|
"beta_dpo/gap_std": 43.06412124633789,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9026178010471204,
|
|
"grad_norm": 114.83171081542969,
|
|
"learning_rate": 1.4662207078575684e-08,
|
|
"logits/chosen": 1.7383248805999756,
|
|
"logits/rejected": 1.805346965789795,
|
|
"loss": 4.965,
|
|
"step": 431
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.032197486609220505,
|
|
"beta_dpo/beta_used_raw": 0.023590974509716034,
|
|
"beta_dpo/gap_mean": 26.495365142822266,
|
|
"beta_dpo/gap_std": 43.16999435424805,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9047120418848168,
|
|
"grad_norm": 93.71367645263672,
|
|
"learning_rate": 1.40507706120426e-08,
|
|
"logits/chosen": 1.4706007242202759,
|
|
"logits/rejected": 1.6791198253631592,
|
|
"loss": 4.1943,
|
|
"step": 432
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.029206298291683197,
|
|
"beta_dpo/beta_used_raw": 0.02880963124334812,
|
|
"beta_dpo/gap_mean": 24.425756454467773,
|
|
"beta_dpo/gap_std": 42.32783889770508,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.9068062827225131,
|
|
"grad_norm": 79.65400695800781,
|
|
"learning_rate": 1.345198738661285e-08,
|
|
"logits/chosen": 1.5126326084136963,
|
|
"logits/rejected": 1.4506518840789795,
|
|
"loss": 4.3461,
|
|
"step": 433
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04482489451766014,
|
|
"beta_dpo/beta_used_raw": 0.019631531089544296,
|
|
"beta_dpo/gap_mean": 23.61885643005371,
|
|
"beta_dpo/gap_std": 41.121665954589844,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.9089005235602095,
|
|
"grad_norm": 76.20455169677734,
|
|
"learning_rate": 1.2865889513213628e-08,
|
|
"logits/chosen": 1.9426430463790894,
|
|
"logits/rejected": 1.9414358139038086,
|
|
"loss": 3.626,
|
|
"step": 434
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024887006729841232,
|
|
"beta_dpo/beta_used_raw": 0.01590941660106182,
|
|
"beta_dpo/gap_mean": 23.983257293701172,
|
|
"beta_dpo/gap_std": 40.91677474975586,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9109947643979057,
|
|
"grad_norm": 117.16897583007812,
|
|
"learning_rate": 1.2292508422495157e-08,
|
|
"logits/chosen": 1.6585721969604492,
|
|
"logits/rejected": 1.773654580116272,
|
|
"loss": 4.7233,
|
|
"step": 435
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022081829607486725,
|
|
"beta_dpo/beta_used_raw": -0.0030337003991007805,
|
|
"beta_dpo/gap_mean": 21.94788932800293,
|
|
"beta_dpo/gap_std": 40.543338775634766,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9130890052356021,
|
|
"grad_norm": 41.44011688232422,
|
|
"learning_rate": 1.1731874863145142e-08,
|
|
"logits/chosen": 1.3716554641723633,
|
|
"logits/rejected": 1.4048748016357422,
|
|
"loss": 4.5878,
|
|
"step": 436
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03096182271838188,
|
|
"beta_dpo/beta_used_raw": 0.029562827199697495,
|
|
"beta_dpo/gap_mean": 23.157291412353516,
|
|
"beta_dpo/gap_std": 40.46465301513672,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.9151832460732985,
|
|
"grad_norm": 81.40292358398438,
|
|
"learning_rate": 1.118401890024001e-08,
|
|
"logits/chosen": 1.6667184829711914,
|
|
"logits/rejected": 1.8092567920684814,
|
|
"loss": 4.1753,
|
|
"step": 437
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.00928124412894249,
|
|
"beta_dpo/beta_used_raw": -0.024261336773633957,
|
|
"beta_dpo/gap_mean": 20.033138275146484,
|
|
"beta_dpo/gap_std": 41.23052215576172,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9172774869109948,
|
|
"grad_norm": 50.23611068725586,
|
|
"learning_rate": 1.06489699136324e-08,
|
|
"logits/chosen": 1.3478763103485107,
|
|
"logits/rejected": 1.4908018112182617,
|
|
"loss": 5.1895,
|
|
"step": 438
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04591372609138489,
|
|
"beta_dpo/beta_used_raw": 0.04151216149330139,
|
|
"beta_dpo/gap_mean": 19.75481414794922,
|
|
"beta_dpo/gap_std": 41.36615753173828,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.9193717277486911,
|
|
"grad_norm": 145.0548095703125,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 1.5163558721542358,
|
|
"logits/rejected": 1.5085352659225464,
|
|
"loss": 3.9243,
|
|
"step": 439
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014184126630425453,
|
|
"beta_dpo/beta_used_raw": -0.008172026835381985,
|
|
"beta_dpo/gap_mean": 20.215518951416016,
|
|
"beta_dpo/gap_std": 39.6240119934082,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.9214659685863874,
|
|
"grad_norm": 50.933837890625,
|
|
"learning_rate": 9.617406953185136e-09,
|
|
"logits/chosen": 1.4577587842941284,
|
|
"logits/rejected": 1.234389305114746,
|
|
"loss": 4.9376,
|
|
"step": 440
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.048469383269548416,
|
|
"beta_dpo/beta_used_raw": 0.0424063466489315,
|
|
"beta_dpo/gap_mean": 22.741992950439453,
|
|
"beta_dpo/gap_std": 39.93981170654297,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9235602094240838,
|
|
"grad_norm": 101.18359375,
|
|
"learning_rate": 9.12094829893642e-09,
|
|
"logits/chosen": 1.7504223585128784,
|
|
"logits/rejected": 1.9641519784927368,
|
|
"loss": 4.1214,
|
|
"step": 441
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03708556294441223,
|
|
"beta_dpo/beta_used_raw": 0.02750963345170021,
|
|
"beta_dpo/gap_mean": 24.97802734375,
|
|
"beta_dpo/gap_std": 41.040199279785156,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.9256544502617801,
|
|
"grad_norm": 100.34196472167969,
|
|
"learning_rate": 8.637407257200496e-09,
|
|
"logits/chosen": 1.3330552577972412,
|
|
"logits/rejected": 1.4373996257781982,
|
|
"loss": 4.1536,
|
|
"step": 442
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04446953535079956,
|
|
"beta_dpo/beta_used_raw": 0.022015634924173355,
|
|
"beta_dpo/gap_mean": 22.627042770385742,
|
|
"beta_dpo/gap_std": 41.79437255859375,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.9277486910994764,
|
|
"grad_norm": 65.15979766845703,
|
|
"learning_rate": 8.166809758815895e-09,
|
|
"logits/chosen": 1.2715387344360352,
|
|
"logits/rejected": 1.2342997789382935,
|
|
"loss": 3.7321,
|
|
"step": 443
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014596132561564445,
|
|
"beta_dpo/beta_used_raw": -0.007604743354022503,
|
|
"beta_dpo/gap_mean": 24.320253372192383,
|
|
"beta_dpo/gap_std": 41.13831329345703,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.9298429319371728,
|
|
"grad_norm": 47.09414291381836,
|
|
"learning_rate": 7.709181040498253e-09,
|
|
"logits/chosen": 1.0621271133422852,
|
|
"logits/rejected": 1.241407871246338,
|
|
"loss": 4.861,
|
|
"step": 444
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02850104495882988,
|
|
"beta_dpo/beta_used_raw": 0.016521329060196877,
|
|
"beta_dpo/gap_mean": 22.053783416748047,
|
|
"beta_dpo/gap_std": 42.03921890258789,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.9319371727748691,
|
|
"grad_norm": 111.25325775146484,
|
|
"learning_rate": 7.2645456434869965e-09,
|
|
"logits/chosen": 1.5844391584396362,
|
|
"logits/rejected": 1.637407898902893,
|
|
"loss": 4.4202,
|
|
"step": 445
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.029594026505947113,
|
|
"beta_dpo/beta_used_raw": 0.0185114536434412,
|
|
"beta_dpo/gap_mean": 24.767749786376953,
|
|
"beta_dpo/gap_std": 41.35893249511719,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9340314136125655,
|
|
"grad_norm": 41.6215705871582,
|
|
"learning_rate": 6.832927412229017e-09,
|
|
"logits/chosen": 1.4550718069076538,
|
|
"logits/rejected": 1.433241367340088,
|
|
"loss": 4.1939,
|
|
"step": 446
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03615984693169594,
|
|
"beta_dpo/beta_used_raw": 0.03083086758852005,
|
|
"beta_dpo/gap_mean": 28.0212345123291,
|
|
"beta_dpo/gap_std": 39.88979721069336,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9361256544502617,
|
|
"grad_norm": 43.32276153564453,
|
|
"learning_rate": 6.414349493100129e-09,
|
|
"logits/chosen": 1.5409138202667236,
|
|
"logits/rejected": 1.6101213693618774,
|
|
"loss": 3.7742,
|
|
"step": 447
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031161731109023094,
|
|
"beta_dpo/beta_used_raw": 0.012630118057131767,
|
|
"beta_dpo/gap_mean": 27.05018424987793,
|
|
"beta_dpo/gap_std": 40.15449905395508,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9382198952879581,
|
|
"grad_norm": 235.60301208496094,
|
|
"learning_rate": 6.0088343331638756e-09,
|
|
"logits/chosen": 1.854709506034851,
|
|
"logits/rejected": 1.8700783252716064,
|
|
"loss": 4.4011,
|
|
"step": 448
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.035951972007751465,
|
|
"beta_dpo/beta_used_raw": 0.021076416596770287,
|
|
"beta_dpo/gap_mean": 26.136516571044922,
|
|
"beta_dpo/gap_std": 39.963043212890625,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9403141361256544,
|
|
"grad_norm": 100.77395629882812,
|
|
"learning_rate": 5.616403678967624e-09,
|
|
"logits/chosen": 2.0368571281433105,
|
|
"logits/rejected": 1.7351016998291016,
|
|
"loss": 3.8561,
|
|
"step": 449
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01658363826572895,
|
|
"beta_dpo/beta_used_raw": -0.019273536279797554,
|
|
"beta_dpo/gap_mean": 25.731136322021484,
|
|
"beta_dpo/gap_std": 40.702030181884766,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9424083769633508,
|
|
"grad_norm": 41.71562957763672,
|
|
"learning_rate": 5.2370785753763356e-09,
|
|
"logits/chosen": 1.7945507764816284,
|
|
"logits/rejected": 1.5377925634384155,
|
|
"loss": 4.7532,
|
|
"step": 450
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.030548732727766037,
|
|
"beta_dpo/beta_used_raw": 0.022728927433490753,
|
|
"beta_dpo/gap_mean": 24.457050323486328,
|
|
"beta_dpo/gap_std": 39.438201904296875,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.9445026178010472,
|
|
"grad_norm": 84.55509948730469,
|
|
"learning_rate": 4.8708793644441086e-09,
|
|
"logits/chosen": 1.5343233346939087,
|
|
"logits/rejected": 1.6422300338745117,
|
|
"loss": 4.0291,
|
|
"step": 451
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022664647549390793,
|
|
"beta_dpo/beta_used_raw": -0.009222008287906647,
|
|
"beta_dpo/gap_mean": 25.828996658325195,
|
|
"beta_dpo/gap_std": 41.49300003051758,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.9465968586387434,
|
|
"grad_norm": 116.09254455566406,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 1.4695273637771606,
|
|
"logits/rejected": 1.6382958889007568,
|
|
"loss": 4.6374,
|
|
"step": 452
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.026332221925258636,
|
|
"beta_dpo/beta_used_raw": 0.0258626826107502,
|
|
"beta_dpo/gap_mean": 24.971637725830078,
|
|
"beta_dpo/gap_std": 39.16703414916992,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9486910994764398,
|
|
"grad_norm": 85.11585998535156,
|
|
"learning_rate": 4.1779364682113794e-09,
|
|
"logits/chosen": 1.7189387083053589,
|
|
"logits/rejected": 1.8478630781173706,
|
|
"loss": 4.0201,
|
|
"step": 453
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02039419114589691,
|
|
"beta_dpo/beta_used_raw": 0.003133818507194519,
|
|
"beta_dpo/gap_mean": 25.356918334960938,
|
|
"beta_dpo/gap_std": 39.97523498535156,
|
|
"beta_dpo/mask_keep_frac": 0.6875,
|
|
"epoch": 0.9507853403141361,
|
|
"grad_norm": 47.996421813964844,
|
|
"learning_rate": 3.851229943335393e-09,
|
|
"logits/chosen": 2.0254852771759033,
|
|
"logits/rejected": 1.9557225704193115,
|
|
"loss": 4.2785,
|
|
"step": 454
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.014043524861335754,
|
|
"beta_dpo/beta_used_raw": -0.01924164779484272,
|
|
"beta_dpo/gap_mean": 24.39451789855957,
|
|
"beta_dpo/gap_std": 40.95219039916992,
|
|
"beta_dpo/mask_keep_frac": 0.59375,
|
|
"epoch": 0.9528795811518325,
|
|
"grad_norm": 79.08866882324219,
|
|
"learning_rate": 3.5377236299748147e-09,
|
|
"logits/chosen": 1.5097756385803223,
|
|
"logits/rejected": 1.603163242340088,
|
|
"loss": 4.8423,
|
|
"step": 455
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05385340750217438,
|
|
"beta_dpo/beta_used_raw": 0.029043981805443764,
|
|
"beta_dpo/gap_mean": 25.243539810180664,
|
|
"beta_dpo/gap_std": 42.33509063720703,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.9549738219895288,
|
|
"grad_norm": 108.52057647705078,
|
|
"learning_rate": 3.2374343405217884e-09,
|
|
"logits/chosen": 1.6896770000457764,
|
|
"logits/rejected": 1.829254150390625,
|
|
"loss": 4.0501,
|
|
"step": 456
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04148964211344719,
|
|
"beta_dpo/beta_used_raw": 0.018075397238135338,
|
|
"beta_dpo/gap_mean": 27.367046356201172,
|
|
"beta_dpo/gap_std": 43.94456100463867,
|
|
"beta_dpo/mask_keep_frac": 0.59375,
|
|
"epoch": 0.9570680628272251,
|
|
"grad_norm": 287.26763916015625,
|
|
"learning_rate": 2.9503781785795713e-09,
|
|
"logits/chosen": 1.5245857238769531,
|
|
"logits/rejected": 1.4000697135925293,
|
|
"loss": 4.0654,
|
|
"step": 457
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011880859732627869,
|
|
"beta_dpo/beta_used_raw": -0.01639743149280548,
|
|
"beta_dpo/gap_mean": 25.620864868164062,
|
|
"beta_dpo/gap_std": 41.843963623046875,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.9591623036649215,
|
|
"grad_norm": 45.80873107910156,
|
|
"learning_rate": 2.6765705380989432e-09,
|
|
"logits/chosen": 1.518320918083191,
|
|
"logits/rejected": 1.3533384799957275,
|
|
"loss": 4.969,
|
|
"step": 458
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.052308086305856705,
|
|
"beta_dpo/beta_used_raw": 0.024883100762963295,
|
|
"beta_dpo/gap_mean": 23.730758666992188,
|
|
"beta_dpo/gap_std": 41.868125915527344,
|
|
"beta_dpo/mask_keep_frac": 0.90625,
|
|
"epoch": 0.9612565445026178,
|
|
"grad_norm": 121.35041809082031,
|
|
"learning_rate": 2.416026102552732e-09,
|
|
"logits/chosen": 1.4219530820846558,
|
|
"logits/rejected": 1.2508901357650757,
|
|
"loss": 3.466,
|
|
"step": 459
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.017205236479640007,
|
|
"beta_dpo/beta_used_raw": -0.0033456708770245314,
|
|
"beta_dpo/gap_mean": 22.880821228027344,
|
|
"beta_dpo/gap_std": 45.12669372558594,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.9633507853403142,
|
|
"grad_norm": 73.71929931640625,
|
|
"learning_rate": 2.168758844148272e-09,
|
|
"logits/chosen": 1.3608553409576416,
|
|
"logits/rejected": 1.3055371046066284,
|
|
"loss": 5.0311,
|
|
"step": 460
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04041147232055664,
|
|
"beta_dpo/beta_used_raw": 0.029882332310080528,
|
|
"beta_dpo/gap_mean": 22.926301956176758,
|
|
"beta_dpo/gap_std": 44.20081329345703,
|
|
"beta_dpo/mask_keep_frac": 0.65625,
|
|
"epoch": 0.9654450261780104,
|
|
"grad_norm": 86.9037094116211,
|
|
"learning_rate": 1.9347820230782295e-09,
|
|
"logits/chosen": 1.735243797302246,
|
|
"logits/rejected": 1.66280996799469,
|
|
"loss": 3.8386,
|
|
"step": 461
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03556675463914871,
|
|
"beta_dpo/beta_used_raw": 0.027323313057422638,
|
|
"beta_dpo/gap_mean": 25.144573211669922,
|
|
"beta_dpo/gap_std": 43.731327056884766,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.9675392670157068,
|
|
"grad_norm": 159.07362365722656,
|
|
"learning_rate": 1.7141081868094209e-09,
|
|
"logits/chosen": 1.5209287405014038,
|
|
"logits/rejected": 1.4356799125671387,
|
|
"loss": 4.4081,
|
|
"step": 462
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03522716090083122,
|
|
"beta_dpo/beta_used_raw": 0.009717161767184734,
|
|
"beta_dpo/gap_mean": 25.50450325012207,
|
|
"beta_dpo/gap_std": 42.545188903808594,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9696335078534032,
|
|
"grad_norm": 87.98490142822266,
|
|
"learning_rate": 1.5067491694100153e-09,
|
|
"logits/chosen": 1.5676113367080688,
|
|
"logits/rejected": 1.6250090599060059,
|
|
"loss": 3.8654,
|
|
"step": 463
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.021527249366044998,
|
|
"beta_dpo/beta_used_raw": 0.011957229115068913,
|
|
"beta_dpo/gap_mean": 24.842899322509766,
|
|
"beta_dpo/gap_std": 42.1388053894043,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.9717277486910995,
|
|
"grad_norm": 79.93045043945312,
|
|
"learning_rate": 1.3127160909147672e-09,
|
|
"logits/chosen": 1.8131260871887207,
|
|
"logits/rejected": 1.744214653968811,
|
|
"loss": 4.3541,
|
|
"step": 464
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03722041845321655,
|
|
"beta_dpo/beta_used_raw": 0.016623277217149734,
|
|
"beta_dpo/gap_mean": 26.415016174316406,
|
|
"beta_dpo/gap_std": 41.290672302246094,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9738219895287958,
|
|
"grad_norm": 76.404541015625,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 1.4614487886428833,
|
|
"logits/rejected": 1.4553896188735962,
|
|
"loss": 4.0587,
|
|
"step": 465
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0460047721862793,
|
|
"beta_dpo/beta_used_raw": 0.03773031011223793,
|
|
"beta_dpo/gap_mean": 28.092792510986328,
|
|
"beta_dpo/gap_std": 40.66791534423828,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9759162303664921,
|
|
"grad_norm": 89.3587417602539,
|
|
"learning_rate": 9.64668657069706e-10,
|
|
"logits/chosen": 1.3052603006362915,
|
|
"logits/rejected": 1.347874641418457,
|
|
"loss": 3.221,
|
|
"step": 466
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02018456533551216,
|
|
"beta_dpo/beta_used_raw": -0.0011910395696759224,
|
|
"beta_dpo/gap_mean": 25.53974151611328,
|
|
"beta_dpo/gap_std": 40.64295196533203,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9780104712041885,
|
|
"grad_norm": 181.21160888671875,
|
|
"learning_rate": 8.106729664475176e-10,
|
|
"logits/chosen": 0.9222959876060486,
|
|
"logits/rejected": 1.1561161279678345,
|
|
"loss": 5.0134,
|
|
"step": 467
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015501348301768303,
|
|
"beta_dpo/beta_used_raw": -0.027839092537760735,
|
|
"beta_dpo/gap_mean": 24.239940643310547,
|
|
"beta_dpo/gap_std": 40.417659759521484,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.9801047120418848,
|
|
"grad_norm": 32.802974700927734,
|
|
"learning_rate": 6.700405431837585e-10,
|
|
"logits/chosen": 1.68427312374115,
|
|
"logits/rejected": 1.4638608694076538,
|
|
"loss": 4.8736,
|
|
"step": 468
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.043932512402534485,
|
|
"beta_dpo/beta_used_raw": 0.02907262183725834,
|
|
"beta_dpo/gap_mean": 23.42894744873047,
|
|
"beta_dpo/gap_std": 40.17053985595703,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9821989528795811,
|
|
"grad_norm": 74.9969482421875,
|
|
"learning_rate": 5.427789289685347e-10,
|
|
"logits/chosen": 1.6762428283691406,
|
|
"logits/rejected": 1.6395068168640137,
|
|
"loss": 3.908,
|
|
"step": 469
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05348680168390274,
|
|
"beta_dpo/beta_used_raw": 0.047200098633766174,
|
|
"beta_dpo/gap_mean": 26.360820770263672,
|
|
"beta_dpo/gap_std": 41.91456985473633,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.9842931937172775,
|
|
"grad_norm": 92.29603576660156,
|
|
"learning_rate": 4.288949484559934e-10,
|
|
"logits/chosen": 0.9740282297134399,
|
|
"logits/rejected": 0.9412952065467834,
|
|
"loss": 3.2812,
|
|
"step": 470
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.011362526565790176,
|
|
"beta_dpo/beta_used_raw": -0.002826599171385169,
|
|
"beta_dpo/gap_mean": 26.84084701538086,
|
|
"beta_dpo/gap_std": 42.06930160522461,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9863874345549738,
|
|
"grad_norm": 39.85667419433594,
|
|
"learning_rate": 3.2839470889836627e-10,
|
|
"logits/chosen": 1.6476500034332275,
|
|
"logits/rejected": 1.6063101291656494,
|
|
"loss": 4.6968,
|
|
"step": 471
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04099735617637634,
|
|
"beta_dpo/beta_used_raw": 0.03528433293104172,
|
|
"beta_dpo/gap_mean": 27.509807586669922,
|
|
"beta_dpo/gap_std": 42.573822021484375,
|
|
"beta_dpo/mask_keep_frac": 0.78125,
|
|
"epoch": 0.9884816753926702,
|
|
"grad_norm": 135.5849609375,
|
|
"learning_rate": 2.412835998185092e-10,
|
|
"logits/chosen": 1.3469210863113403,
|
|
"logits/rejected": 1.4127790927886963,
|
|
"loss": 3.8637,
|
|
"step": 472
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.020598269999027252,
|
|
"beta_dpo/beta_used_raw": 0.011284598149359226,
|
|
"beta_dpo/gap_mean": 26.61202621459961,
|
|
"beta_dpo/gap_std": 42.61575698852539,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9905759162303664,
|
|
"grad_norm": 39.22035598754883,
|
|
"learning_rate": 1.6756629272085544e-10,
|
|
"logits/chosen": 1.4856796264648438,
|
|
"logits/rejected": 1.2598925828933716,
|
|
"loss": 4.3459,
|
|
"step": 473
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.015516398474574089,
|
|
"beta_dpo/beta_used_raw": -0.019677024334669113,
|
|
"beta_dpo/gap_mean": 27.40287971496582,
|
|
"beta_dpo/gap_std": 42.025856018066406,
|
|
"beta_dpo/mask_keep_frac": 0.71875,
|
|
"epoch": 0.9926701570680628,
|
|
"grad_norm": 34.569698333740234,
|
|
"learning_rate": 1.072467408408384e-10,
|
|
"logits/chosen": 1.5088553428649902,
|
|
"logits/rejected": 1.615687370300293,
|
|
"loss": 4.6458,
|
|
"step": 474
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.01646936498582363,
|
|
"beta_dpo/beta_used_raw": -0.006308557000011206,
|
|
"beta_dpo/gap_mean": 22.261816024780273,
|
|
"beta_dpo/gap_std": 39.92071533203125,
|
|
"beta_dpo/mask_keep_frac": 0.5,
|
|
"epoch": 0.9947643979057592,
|
|
"grad_norm": 33.08564758300781,
|
|
"learning_rate": 6.032817893297793e-11,
|
|
"logits/chosen": 1.1749279499053955,
|
|
"logits/rejected": 1.2055437564849854,
|
|
"loss": 4.2627,
|
|
"step": 475
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.027492396533489227,
|
|
"beta_dpo/beta_used_raw": 0.008521707728505135,
|
|
"beta_dpo/gap_mean": 23.523109436035156,
|
|
"beta_dpo/gap_std": 40.176387786865234,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.9968586387434555,
|
|
"grad_norm": 60.566287994384766,
|
|
"learning_rate": 2.6813123097352287e-11,
|
|
"logits/chosen": 1.3323711156845093,
|
|
"logits/rejected": 1.4667065143585205,
|
|
"loss": 4.2655,
|
|
"step": 476
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.032702527940273285,
|
|
"beta_dpo/beta_used_raw": 0.020156463608145714,
|
|
"beta_dpo/gap_mean": 24.190080642700195,
|
|
"beta_dpo/gap_std": 42.31235885620117,
|
|
"beta_dpo/mask_keep_frac": 0.84375,
|
|
"epoch": 0.9989528795811519,
|
|
"grad_norm": 115.37435150146484,
|
|
"learning_rate": 6.7033706447061635e-12,
|
|
"logits/chosen": 1.080468773841858,
|
|
"logits/rejected": 1.1553194522857666,
|
|
"loss": 4.2411,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"step": 477,
|
|
"total_flos": 0.0,
|
|
"train_loss": 4.692083022879355,
|
|
"train_runtime": 7712.5154,
|
|
"train_samples_per_second": 7.927,
|
|
"train_steps_per_second": 0.062
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 477,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|