Files
qwen3-8b-base-beta-dpo-ultr…/trainer_state.json
ModelHub XC 5d807c939e 初始化项目,由ModelHub XC社区提供模型
Model: W-61/qwen3-8b-base-beta-dpo-ultrafeedback-4xh200-batch-128-20260423-040315
Source: Original Platform
2026-05-17 10:51:02 +08:00

6752 lines
233 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989528795811519,
"eval_steps": 200,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"beta_dpo/beta_used": 0.010316052474081516,
"beta_dpo/beta_used_raw": 0.010316052474081516,
"beta_dpo/gap_mean": -0.0030604612547904253,
"beta_dpo/gap_std": 0.273499995470047,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.0020942408376963353,
"grad_norm": 15.496143341064453,
"learning_rate": 0.0,
"logits/chosen": 2.203179359436035,
"logits/rejected": 2.035616397857666,
"loss": 5.5428,
"step": 1
},
{
"beta_dpo/beta_used": 0.009904756210744381,
"beta_dpo/beta_used_raw": 0.009904756210744381,
"beta_dpo/gap_mean": 0.0473581925034523,
"beta_dpo/gap_std": 0.6410814523696899,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.004188481675392671,
"grad_norm": 15.881836891174316,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": 2.1704792976379395,
"logits/rejected": 2.0754430294036865,
"loss": 5.5442,
"step": 2
},
{
"beta_dpo/beta_used": 0.010276634246110916,
"beta_dpo/beta_used_raw": 0.010276634246110916,
"beta_dpo/gap_mean": 0.040970198810100555,
"beta_dpo/gap_std": 0.7673041224479675,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.0062827225130890054,
"grad_norm": 16.63137435913086,
"learning_rate": 2.083333333333333e-08,
"logits/chosen": 2.4686079025268555,
"logits/rejected": 2.464277505874634,
"loss": 5.5428,
"step": 3
},
{
"beta_dpo/beta_used": 0.01017595175653696,
"beta_dpo/beta_used_raw": 0.01017595175653696,
"beta_dpo/gap_mean": 0.06479164212942123,
"beta_dpo/gap_std": 0.8090450763702393,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.008376963350785341,
"grad_norm": 19.53766632080078,
"learning_rate": 3.125e-08,
"logits/chosen": 1.7211281061172485,
"logits/rejected": 1.5812376737594604,
"loss": 5.5403,
"step": 4
},
{
"beta_dpo/beta_used": 0.009877461940050125,
"beta_dpo/beta_used_raw": 0.009877461940050125,
"beta_dpo/gap_mean": 0.03874587640166283,
"beta_dpo/gap_std": 0.8403902649879456,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.010471204188481676,
"grad_norm": 17.47425651550293,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": 1.8391205072402954,
"logits/rejected": 1.8945659399032593,
"loss": 5.5435,
"step": 5
},
{
"beta_dpo/beta_used": 0.009602357633411884,
"beta_dpo/beta_used_raw": 0.009602357633411884,
"beta_dpo/gap_mean": 0.013125958852469921,
"beta_dpo/gap_std": 0.8970670700073242,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.012565445026178011,
"grad_norm": 17.965578079223633,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": 1.8753392696380615,
"logits/rejected": 1.806428074836731,
"loss": 5.546,
"step": 6
},
{
"beta_dpo/beta_used": 0.010046536102890968,
"beta_dpo/beta_used_raw": 0.010046536102890968,
"beta_dpo/gap_mean": 0.00752235297113657,
"beta_dpo/gap_std": 0.9090036153793335,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.014659685863874346,
"grad_norm": 18.481788635253906,
"learning_rate": 6.25e-08,
"logits/chosen": 2.1977810859680176,
"logits/rejected": 2.027773141860962,
"loss": 5.543,
"step": 7
},
{
"beta_dpo/beta_used": 0.009285343810915947,
"beta_dpo/beta_used_raw": 0.009285343810915947,
"beta_dpo/gap_mean": -0.0737709105014801,
"beta_dpo/gap_std": 0.9767862558364868,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.016753926701570682,
"grad_norm": 17.283451080322266,
"learning_rate": 7.291666666666667e-08,
"logits/chosen": 2.3551371097564697,
"logits/rejected": 2.089672088623047,
"loss": 5.5522,
"step": 8
},
{
"beta_dpo/beta_used": 0.010606064461171627,
"beta_dpo/beta_used_raw": 0.010606064461171627,
"beta_dpo/gap_mean": -0.04680243134498596,
"beta_dpo/gap_std": 0.9687216281890869,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.018848167539267015,
"grad_norm": 16.163658142089844,
"learning_rate": 8.333333333333333e-08,
"logits/chosen": 2.1110918521881104,
"logits/rejected": 2.0067708492279053,
"loss": 5.5433,
"step": 9
},
{
"beta_dpo/beta_used": 0.00987918209284544,
"beta_dpo/beta_used_raw": 0.00987918209284544,
"beta_dpo/gap_mean": -0.03316927328705788,
"beta_dpo/gap_std": 0.8964071273803711,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.020942408376963352,
"grad_norm": 15.014591217041016,
"learning_rate": 9.375e-08,
"logits/chosen": 1.858559012413025,
"logits/rejected": 2.0337729454040527,
"loss": 5.5481,
"step": 10
},
{
"beta_dpo/beta_used": 0.010337094776332378,
"beta_dpo/beta_used_raw": 0.010337094776332378,
"beta_dpo/gap_mean": 0.03589403256773949,
"beta_dpo/gap_std": 0.8406289219856262,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.023036649214659685,
"grad_norm": 18.00157356262207,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": 1.893631100654602,
"logits/rejected": 1.8213893175125122,
"loss": 5.5413,
"step": 11
},
{
"beta_dpo/beta_used": 0.009809032082557678,
"beta_dpo/beta_used_raw": 0.009809032082557678,
"beta_dpo/gap_mean": 0.031110307201743126,
"beta_dpo/gap_std": 0.8743820190429688,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.025130890052356022,
"grad_norm": 16.61766815185547,
"learning_rate": 1.1458333333333332e-07,
"logits/chosen": 1.5167274475097656,
"logits/rejected": 1.6536264419555664,
"loss": 5.5438,
"step": 12
},
{
"beta_dpo/beta_used": 0.009467006660997868,
"beta_dpo/beta_used_raw": 0.009467006660997868,
"beta_dpo/gap_mean": -9.547406807541847e-05,
"beta_dpo/gap_std": 0.9159330725669861,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.027225130890052355,
"grad_norm": 18.662208557128906,
"learning_rate": 1.25e-07,
"logits/chosen": 1.8461039066314697,
"logits/rejected": 1.8939508199691772,
"loss": 5.5481,
"step": 13
},
{
"beta_dpo/beta_used": 0.009789557196199894,
"beta_dpo/beta_used_raw": 0.009789557196199894,
"beta_dpo/gap_mean": -0.035510119050741196,
"beta_dpo/gap_std": 0.8479209542274475,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.02931937172774869,
"grad_norm": 15.506324768066406,
"learning_rate": 1.3541666666666666e-07,
"logits/chosen": 1.8386187553405762,
"logits/rejected": 1.5979816913604736,
"loss": 5.5477,
"step": 14
},
{
"beta_dpo/beta_used": 0.010104680433869362,
"beta_dpo/beta_used_raw": 0.010104680433869362,
"beta_dpo/gap_mean": -0.05601261928677559,
"beta_dpo/gap_std": 0.8992904424667358,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.031413612565445025,
"grad_norm": 17.449304580688477,
"learning_rate": 1.4583333333333335e-07,
"logits/chosen": 1.9075326919555664,
"logits/rejected": 1.7650988101959229,
"loss": 5.5445,
"step": 15
},
{
"beta_dpo/beta_used": 0.010083270259201527,
"beta_dpo/beta_used_raw": 0.010083270259201527,
"beta_dpo/gap_mean": -0.037581950426101685,
"beta_dpo/gap_std": 0.9426290988922119,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.033507853403141365,
"grad_norm": 18.769243240356445,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": 2.0930874347686768,
"logits/rejected": 1.8253268003463745,
"loss": 5.5458,
"step": 16
},
{
"beta_dpo/beta_used": 0.009928649291396141,
"beta_dpo/beta_used_raw": 0.009928649291396141,
"beta_dpo/gap_mean": -0.03386215493083,
"beta_dpo/gap_std": 0.9212523102760315,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.0356020942408377,
"grad_norm": 20.794923782348633,
"learning_rate": 1.6666666666666665e-07,
"logits/chosen": 1.769667387008667,
"logits/rejected": 1.7814725637435913,
"loss": 5.5484,
"step": 17
},
{
"beta_dpo/beta_used": 0.01007060892879963,
"beta_dpo/beta_used_raw": 0.01007060892879963,
"beta_dpo/gap_mean": -0.01796822063624859,
"beta_dpo/gap_std": 0.8694018721580505,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.03769633507853403,
"grad_norm": 16.827281951904297,
"learning_rate": 1.7708333333333334e-07,
"logits/chosen": 1.7808014154434204,
"logits/rejected": 1.7646872997283936,
"loss": 5.5437,
"step": 18
},
{
"beta_dpo/beta_used": 0.009850156493484974,
"beta_dpo/beta_used_raw": 0.009850156493484974,
"beta_dpo/gap_mean": -0.04470803216099739,
"beta_dpo/gap_std": 0.8516724705696106,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.039790575916230364,
"grad_norm": 16.883514404296875,
"learning_rate": 1.875e-07,
"logits/chosen": 2.054273843765259,
"logits/rejected": 2.0647222995758057,
"loss": 5.5483,
"step": 19
},
{
"beta_dpo/beta_used": 0.009869220666587353,
"beta_dpo/beta_used_raw": 0.009869220666587353,
"beta_dpo/gap_mean": -0.02124340645968914,
"beta_dpo/gap_std": 0.8342310190200806,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.041884816753926704,
"grad_norm": 17.35634994506836,
"learning_rate": 1.9791666666666664e-07,
"logits/chosen": 2.368907928466797,
"logits/rejected": 2.167264223098755,
"loss": 5.5473,
"step": 20
},
{
"beta_dpo/beta_used": 0.009426544420421124,
"beta_dpo/beta_used_raw": 0.009426544420421124,
"beta_dpo/gap_mean": -0.017612561583518982,
"beta_dpo/gap_std": 0.8350470066070557,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.04397905759162304,
"grad_norm": 15.612009048461914,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": 2.1447153091430664,
"logits/rejected": 2.121504545211792,
"loss": 5.5489,
"step": 21
},
{
"beta_dpo/beta_used": 0.01062285527586937,
"beta_dpo/beta_used_raw": 0.01062285527586937,
"beta_dpo/gap_mean": 0.06357374787330627,
"beta_dpo/gap_std": 0.8492311835289001,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04607329842931937,
"grad_norm": 17.105073928833008,
"learning_rate": 2.1875e-07,
"logits/chosen": 1.6775203943252563,
"logits/rejected": 1.841507911682129,
"loss": 5.5386,
"step": 22
},
{
"beta_dpo/beta_used": 0.009609552100300789,
"beta_dpo/beta_used_raw": 0.009609552100300789,
"beta_dpo/gap_mean": 0.09488284587860107,
"beta_dpo/gap_std": 0.7845069169998169,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.048167539267015703,
"grad_norm": 17.074167251586914,
"learning_rate": 2.2916666666666663e-07,
"logits/chosen": 2.0019335746765137,
"logits/rejected": 1.876702070236206,
"loss": 5.5427,
"step": 23
},
{
"beta_dpo/beta_used": 0.009548784233629704,
"beta_dpo/beta_used_raw": 0.009548784233629704,
"beta_dpo/gap_mean": 0.01768093928694725,
"beta_dpo/gap_std": 0.821352481842041,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.050261780104712044,
"grad_norm": 16.67466163635254,
"learning_rate": 2.3958333333333335e-07,
"logits/chosen": 2.0418663024902344,
"logits/rejected": 1.9522861242294312,
"loss": 5.5466,
"step": 24
},
{
"beta_dpo/beta_used": 0.010621692053973675,
"beta_dpo/beta_used_raw": 0.010621692053973675,
"beta_dpo/gap_mean": 0.02274535596370697,
"beta_dpo/gap_std": 0.7953328490257263,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.05235602094240838,
"grad_norm": 18.33420753479004,
"learning_rate": 2.5e-07,
"logits/chosen": 1.807928204536438,
"logits/rejected": 1.8295968770980835,
"loss": 5.5401,
"step": 25
},
{
"beta_dpo/beta_used": 0.009963510558009148,
"beta_dpo/beta_used_raw": 0.009963510558009148,
"beta_dpo/gap_mean": 0.053856804966926575,
"beta_dpo/gap_std": 0.7753854990005493,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05445026178010471,
"grad_norm": 17.823503494262695,
"learning_rate": 2.604166666666667e-07,
"logits/chosen": 1.6102561950683594,
"logits/rejected": 1.5492463111877441,
"loss": 5.5438,
"step": 26
},
{
"beta_dpo/beta_used": 0.009892760775983334,
"beta_dpo/beta_used_raw": 0.009892760775983334,
"beta_dpo/gap_mean": 0.035262782126665115,
"beta_dpo/gap_std": 0.7987048625946045,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.05654450261780105,
"grad_norm": 17.028757095336914,
"learning_rate": 2.708333333333333e-07,
"logits/chosen": 2.1599764823913574,
"logits/rejected": 1.9214812517166138,
"loss": 5.5447,
"step": 27
},
{
"beta_dpo/beta_used": 0.010526652447879314,
"beta_dpo/beta_used_raw": 0.010526652447879314,
"beta_dpo/gap_mean": 0.05413653701543808,
"beta_dpo/gap_std": 0.794916033744812,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05863874345549738,
"grad_norm": 19.700441360473633,
"learning_rate": 2.8125e-07,
"logits/chosen": 1.9106848239898682,
"logits/rejected": 2.0312745571136475,
"loss": 5.5371,
"step": 28
},
{
"beta_dpo/beta_used": 0.010448331013321877,
"beta_dpo/beta_used_raw": 0.010448331013321877,
"beta_dpo/gap_mean": 0.02559659071266651,
"beta_dpo/gap_std": 0.8567264080047607,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.060732984293193716,
"grad_norm": 16.468107223510742,
"learning_rate": 2.916666666666667e-07,
"logits/chosen": 2.2274394035339355,
"logits/rejected": 1.952311635017395,
"loss": 5.5416,
"step": 29
},
{
"beta_dpo/beta_used": 0.009916335344314575,
"beta_dpo/beta_used_raw": 0.009916335344314575,
"beta_dpo/gap_mean": 0.04508252441883087,
"beta_dpo/gap_std": 0.8601223826408386,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.06282722513089005,
"grad_norm": 16.325408935546875,
"learning_rate": 3.020833333333333e-07,
"logits/chosen": 1.463683843612671,
"logits/rejected": 1.4335768222808838,
"loss": 5.5426,
"step": 30
},
{
"beta_dpo/beta_used": 0.010172335430979729,
"beta_dpo/beta_used_raw": 0.010172335430979729,
"beta_dpo/gap_mean": 0.06362677365541458,
"beta_dpo/gap_std": 0.7783647775650024,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.06492146596858639,
"grad_norm": 15.478079795837402,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": 1.877584457397461,
"logits/rejected": 1.7691612243652344,
"loss": 5.5409,
"step": 31
},
{
"beta_dpo/beta_used": 0.010151976719498634,
"beta_dpo/beta_used_raw": 0.010151976719498634,
"beta_dpo/gap_mean": 0.06375724077224731,
"beta_dpo/gap_std": 0.8205698728561401,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.06701570680628273,
"grad_norm": 16.919126510620117,
"learning_rate": 3.2291666666666666e-07,
"logits/chosen": 1.713607668876648,
"logits/rejected": 1.5853075981140137,
"loss": 5.5403,
"step": 32
},
{
"beta_dpo/beta_used": 0.010386324487626553,
"beta_dpo/beta_used_raw": 0.010386324487626553,
"beta_dpo/gap_mean": 0.08595895767211914,
"beta_dpo/gap_std": 0.9470534324645996,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.06910994764397906,
"grad_norm": 18.542863845825195,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": 1.8243309259414673,
"logits/rejected": 1.729980230331421,
"loss": 5.5374,
"step": 33
},
{
"beta_dpo/beta_used": 0.009925332851707935,
"beta_dpo/beta_used_raw": 0.009925332851707935,
"beta_dpo/gap_mean": 0.09634880721569061,
"beta_dpo/gap_std": 0.9391544461250305,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.0712041884816754,
"grad_norm": 20.447566986083984,
"learning_rate": 3.4375e-07,
"logits/chosen": 2.0654332637786865,
"logits/rejected": 2.0050528049468994,
"loss": 5.5405,
"step": 34
},
{
"beta_dpo/beta_used": 0.009798412211239338,
"beta_dpo/beta_used_raw": 0.009798412211239338,
"beta_dpo/gap_mean": 0.09882716089487076,
"beta_dpo/gap_std": 0.9505617022514343,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.07329842931937172,
"grad_norm": 15.859660148620605,
"learning_rate": 3.541666666666667e-07,
"logits/chosen": 1.4941397905349731,
"logits/rejected": 1.6851754188537598,
"loss": 5.5409,
"step": 35
},
{
"beta_dpo/beta_used": 0.010313436388969421,
"beta_dpo/beta_used_raw": 0.010313436388969421,
"beta_dpo/gap_mean": 0.12937475740909576,
"beta_dpo/gap_std": 0.9316422939300537,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07539267015706806,
"grad_norm": 17.933530807495117,
"learning_rate": 3.645833333333333e-07,
"logits/chosen": 1.7557207345962524,
"logits/rejected": 1.8125189542770386,
"loss": 5.5377,
"step": 36
},
{
"beta_dpo/beta_used": 0.009959274902939796,
"beta_dpo/beta_used_raw": 0.009959274902939796,
"beta_dpo/gap_mean": 0.13312453031539917,
"beta_dpo/gap_std": 0.9395788908004761,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.0774869109947644,
"grad_norm": 18.94852638244629,
"learning_rate": 3.75e-07,
"logits/chosen": 2.1051876544952393,
"logits/rejected": 2.0780932903289795,
"loss": 5.5388,
"step": 37
},
{
"beta_dpo/beta_used": 0.009908566251397133,
"beta_dpo/beta_used_raw": 0.009908566251397133,
"beta_dpo/gap_mean": 0.16690538823604584,
"beta_dpo/gap_std": 0.9445586800575256,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07958115183246073,
"grad_norm": 16.41166114807129,
"learning_rate": 3.8541666666666665e-07,
"logits/chosen": 2.1622610092163086,
"logits/rejected": 2.414966344833374,
"loss": 5.5385,
"step": 38
},
{
"beta_dpo/beta_used": 0.009442973881959915,
"beta_dpo/beta_used_raw": 0.009442973881959915,
"beta_dpo/gap_mean": 0.2755042314529419,
"beta_dpo/gap_std": 0.9882732629776001,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.08167539267015707,
"grad_norm": 16.65612030029297,
"learning_rate": 3.958333333333333e-07,
"logits/chosen": 2.00819730758667,
"logits/rejected": 2.0810117721557617,
"loss": 5.5383,
"step": 39
},
{
"beta_dpo/beta_used": 0.00932924635708332,
"beta_dpo/beta_used_raw": 0.00932924635708332,
"beta_dpo/gap_mean": 0.2719506323337555,
"beta_dpo/gap_std": 1.0504027605056763,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.08376963350785341,
"grad_norm": 14.621367454528809,
"learning_rate": 4.0625e-07,
"logits/chosen": 1.8936258554458618,
"logits/rejected": 1.895420789718628,
"loss": 5.5403,
"step": 40
},
{
"beta_dpo/beta_used": 0.009584764949977398,
"beta_dpo/beta_used_raw": 0.009584764949977398,
"beta_dpo/gap_mean": 0.19441170990467072,
"beta_dpo/gap_std": 1.045138955116272,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08586387434554973,
"grad_norm": 19.228687286376953,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": 1.925986647605896,
"logits/rejected": 1.7834522724151611,
"loss": 5.539,
"step": 41
},
{
"beta_dpo/beta_used": 0.01015196181833744,
"beta_dpo/beta_used_raw": 0.01015196181833744,
"beta_dpo/gap_mean": 0.273733526468277,
"beta_dpo/gap_std": 1.0639562606811523,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.08795811518324607,
"grad_norm": 16.421497344970703,
"learning_rate": 4.270833333333333e-07,
"logits/chosen": 2.446347236633301,
"logits/rejected": 2.493040084838867,
"loss": 5.533,
"step": 42
},
{
"beta_dpo/beta_used": 0.010610947385430336,
"beta_dpo/beta_used_raw": 0.010610947385430336,
"beta_dpo/gap_mean": 0.32640647888183594,
"beta_dpo/gap_std": 1.1364136934280396,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.09005235602094241,
"grad_norm": 17.893566131591797,
"learning_rate": 4.375e-07,
"logits/chosen": 1.920936107635498,
"logits/rejected": 1.9038302898406982,
"loss": 5.5271,
"step": 43
},
{
"beta_dpo/beta_used": 0.01078065950423479,
"beta_dpo/beta_used_raw": 0.01078065950423479,
"beta_dpo/gap_mean": 0.3758638799190521,
"beta_dpo/gap_std": 1.1031302213668823,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.09214659685863874,
"grad_norm": 20.40181541442871,
"learning_rate": 4.479166666666667e-07,
"logits/chosen": 1.7042187452316284,
"logits/rejected": 1.6264781951904297,
"loss": 5.5232,
"step": 44
},
{
"beta_dpo/beta_used": 0.009485357441008091,
"beta_dpo/beta_used_raw": 0.009485357441008091,
"beta_dpo/gap_mean": 0.4286791682243347,
"beta_dpo/gap_std": 1.1151459217071533,
"beta_dpo/mask_keep_frac": 0.9375,
"epoch": 0.09424083769633508,
"grad_norm": 18.97907829284668,
"learning_rate": 4.5833333333333327e-07,
"logits/chosen": 2.0053882598876953,
"logits/rejected": 1.8914456367492676,
"loss": 5.5308,
"step": 45
},
{
"beta_dpo/beta_used": 0.009595800191164017,
"beta_dpo/beta_used_raw": 0.009595800191164017,
"beta_dpo/gap_mean": 0.4576748311519623,
"beta_dpo/gap_std": 1.219599723815918,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.09633507853403141,
"grad_norm": 18.820371627807617,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": 1.9573893547058105,
"logits/rejected": 2.0128352642059326,
"loss": 5.5285,
"step": 46
},
{
"beta_dpo/beta_used": 0.00906536914408207,
"beta_dpo/beta_used_raw": 0.00906536914408207,
"beta_dpo/gap_mean": 0.4006018042564392,
"beta_dpo/gap_std": 1.2177817821502686,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.09842931937172775,
"grad_norm": 14.672569274902344,
"learning_rate": 4.791666666666667e-07,
"logits/chosen": 1.6562869548797607,
"logits/rejected": 2.0256872177124023,
"loss": 5.5352,
"step": 47
},
{
"beta_dpo/beta_used": 0.009402711875736713,
"beta_dpo/beta_used_raw": 0.009402711875736713,
"beta_dpo/gap_mean": 0.41205257177352905,
"beta_dpo/gap_std": 1.2531991004943848,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10052356020942409,
"grad_norm": 14.840012550354004,
"learning_rate": 4.895833333333333e-07,
"logits/chosen": 2.0920979976654053,
"logits/rejected": 2.0639383792877197,
"loss": 5.531,
"step": 48
},
{
"beta_dpo/beta_used": 0.008774153888225555,
"beta_dpo/beta_used_raw": 0.008774153888225555,
"beta_dpo/gap_mean": 0.4781131148338318,
"beta_dpo/gap_std": 1.356748342514038,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.10261780104712041,
"grad_norm": 13.876286506652832,
"learning_rate": 5e-07,
"logits/chosen": 1.855541706085205,
"logits/rejected": 1.988050937652588,
"loss": 5.5333,
"step": 49
},
{
"beta_dpo/beta_used": 0.009108037687838078,
"beta_dpo/beta_used_raw": 0.009108037687838078,
"beta_dpo/gap_mean": 0.38943564891815186,
"beta_dpo/gap_std": 1.4389784336090088,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.10471204188481675,
"grad_norm": 15.5634126663208,
"learning_rate": 4.999932966293553e-07,
"logits/chosen": 2.0256078243255615,
"logits/rejected": 2.1688108444213867,
"loss": 5.5343,
"step": 50
},
{
"beta_dpo/beta_used": 0.01051395758986473,
"beta_dpo/beta_used_raw": 0.01051395758986473,
"beta_dpo/gap_mean": 0.49393463134765625,
"beta_dpo/gap_std": 1.5790597200393677,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.1068062827225131,
"grad_norm": 20.477724075317383,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": 1.561262607574463,
"logits/rejected": 1.7091399431228638,
"loss": 5.5166,
"step": 51
},
{
"beta_dpo/beta_used": 0.010623252019286156,
"beta_dpo/beta_used_raw": 0.010623252019286156,
"beta_dpo/gap_mean": 0.6119964122772217,
"beta_dpo/gap_std": 1.613837480545044,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.10890052356020942,
"grad_norm": 21.817190170288086,
"learning_rate": 4.99939671821067e-07,
"logits/chosen": 2.082730531692505,
"logits/rejected": 2.247464656829834,
"loss": 5.5081,
"step": 52
},
{
"beta_dpo/beta_used": 0.01156248152256012,
"beta_dpo/beta_used_raw": 0.01156248152256012,
"beta_dpo/gap_mean": 0.5816015601158142,
"beta_dpo/gap_std": 1.6215416193008423,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.11099476439790576,
"grad_norm": 22.167213439941406,
"learning_rate": 4.998927532591591e-07,
"logits/chosen": 2.082489013671875,
"logits/rejected": 2.072319984436035,
"loss": 5.497,
"step": 53
},
{
"beta_dpo/beta_used": 0.008477726019918919,
"beta_dpo/beta_used_raw": 0.008477726019918919,
"beta_dpo/gap_mean": 0.7841604948043823,
"beta_dpo/gap_std": 1.7853457927703857,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.1130890052356021,
"grad_norm": 14.047541618347168,
"learning_rate": 4.998324337072792e-07,
"logits/chosen": 1.5058391094207764,
"logits/rejected": 1.5753705501556396,
"loss": 5.5252,
"step": 54
},
{
"beta_dpo/beta_used": 0.009478636085987091,
"beta_dpo/beta_used_raw": 0.009478636085987091,
"beta_dpo/gap_mean": 0.5571960210800171,
"beta_dpo/gap_std": 1.6621750593185425,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11518324607329843,
"grad_norm": 14.583319664001465,
"learning_rate": 4.997587164001815e-07,
"logits/chosen": 2.003282308578491,
"logits/rejected": 2.013611316680908,
"loss": 5.5249,
"step": 55
},
{
"beta_dpo/beta_used": 0.009290758520364761,
"beta_dpo/beta_used_raw": 0.009290758520364761,
"beta_dpo/gap_mean": 0.638902485370636,
"beta_dpo/gap_std": 1.8342792987823486,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.11727748691099477,
"grad_norm": 15.21347713470459,
"learning_rate": 4.996716052911017e-07,
"logits/chosen": 2.15181565284729,
"logits/rejected": 2.135338306427002,
"loss": 5.5226,
"step": 56
},
{
"beta_dpo/beta_used": 0.009111498482525349,
"beta_dpo/beta_used_raw": 0.009111498482525349,
"beta_dpo/gap_mean": 0.9660211801528931,
"beta_dpo/gap_std": 1.9951261281967163,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.1193717277486911,
"grad_norm": 16.580799102783203,
"learning_rate": 4.99571105051544e-07,
"logits/chosen": 2.130098581314087,
"logits/rejected": 1.8486499786376953,
"loss": 5.5134,
"step": 57
},
{
"beta_dpo/beta_used": 0.008915345184504986,
"beta_dpo/beta_used_raw": 0.008915345184504986,
"beta_dpo/gap_mean": 0.9618982076644897,
"beta_dpo/gap_std": 1.7987135648727417,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.12146596858638743,
"grad_norm": 15.42608642578125,
"learning_rate": 4.994572210710314e-07,
"logits/chosen": 1.6894437074661255,
"logits/rejected": 1.699744462966919,
"loss": 5.5158,
"step": 58
},
{
"beta_dpo/beta_used": 0.009204288944602013,
"beta_dpo/beta_used_raw": 0.009204288944602013,
"beta_dpo/gap_mean": 0.8019428253173828,
"beta_dpo/gap_std": 2.0088188648223877,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.12356020942408377,
"grad_norm": 15.577202796936035,
"learning_rate": 4.993299594568162e-07,
"logits/chosen": 1.5538208484649658,
"logits/rejected": 1.6072800159454346,
"loss": 5.52,
"step": 59
},
{
"beta_dpo/beta_used": 0.009918388910591602,
"beta_dpo/beta_used_raw": 0.009918388910591602,
"beta_dpo/gap_mean": 0.847707986831665,
"beta_dpo/gap_std": 2.123305320739746,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.1256544502617801,
"grad_norm": 14.793850898742676,
"learning_rate": 4.991893270335525e-07,
"logits/chosen": 2.0483858585357666,
"logits/rejected": 1.8020352125167847,
"loss": 5.5111,
"step": 60
},
{
"beta_dpo/beta_used": 0.009820302948355675,
"beta_dpo/beta_used_raw": 0.009820302948355675,
"beta_dpo/gap_mean": 0.9802277684211731,
"beta_dpo/gap_std": 2.0959830284118652,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.12774869109947645,
"grad_norm": 16.083724975585938,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": 1.9473985433578491,
"logits/rejected": 1.9882135391235352,
"loss": 5.5041,
"step": 61
},
{
"beta_dpo/beta_used": 0.010419272817671299,
"beta_dpo/beta_used_raw": 0.010419272817671299,
"beta_dpo/gap_mean": 0.979004442691803,
"beta_dpo/gap_std": 2.1615118980407715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12984293193717278,
"grad_norm": 18.826759338378906,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": 1.872680902481079,
"logits/rejected": 1.8009073734283447,
"loss": 5.5007,
"step": 62
},
{
"beta_dpo/beta_used": 0.00935581885278225,
"beta_dpo/beta_used_raw": 0.00935581885278225,
"beta_dpo/gap_mean": 1.0244998931884766,
"beta_dpo/gap_std": 2.4170455932617188,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.1319371727748691,
"grad_norm": 15.110966682434082,
"learning_rate": 4.986872839090852e-07,
"logits/chosen": 1.9980614185333252,
"logits/rejected": 2.105093002319336,
"loss": 5.5107,
"step": 63
},
{
"beta_dpo/beta_used": 0.010298279114067554,
"beta_dpo/beta_used_raw": 0.010298279114067554,
"beta_dpo/gap_mean": 1.1149272918701172,
"beta_dpo/gap_std": 2.4519460201263428,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.13403141361256546,
"grad_norm": 20.477684020996094,
"learning_rate": 4.9849325083059e-07,
"logits/chosen": 1.7054760456085205,
"logits/rejected": 1.951492428779602,
"loss": 5.4844,
"step": 64
},
{
"beta_dpo/beta_used": 0.009701458737254143,
"beta_dpo/beta_used_raw": 0.009701458737254143,
"beta_dpo/gap_mean": 1.1075406074523926,
"beta_dpo/gap_std": 2.5126233100891113,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.13612565445026178,
"grad_norm": 16.23882484436035,
"learning_rate": 4.982858918131906e-07,
"logits/chosen": 1.9961862564086914,
"logits/rejected": 2.0398294925689697,
"loss": 5.502,
"step": 65
},
{
"beta_dpo/beta_used": 0.010468224063515663,
"beta_dpo/beta_used_raw": 0.010468224063515663,
"beta_dpo/gap_mean": 1.0450140237808228,
"beta_dpo/gap_std": 2.6909701824188232,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.1382198952879581,
"grad_norm": 18.444570541381836,
"learning_rate": 4.980652179769217e-07,
"logits/chosen": 1.6719987392425537,
"logits/rejected": 1.881594181060791,
"loss": 5.4931,
"step": 66
},
{
"beta_dpo/beta_used": 0.010425317101180553,
"beta_dpo/beta_used_raw": 0.010425317101180553,
"beta_dpo/gap_mean": 1.015570878982544,
"beta_dpo/gap_std": 2.8450400829315186,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.14031413612565444,
"grad_norm": 17.675512313842773,
"learning_rate": 4.978312411558517e-07,
"logits/chosen": 2.0440990924835205,
"logits/rejected": 2.0636091232299805,
"loss": 5.4964,
"step": 67
},
{
"beta_dpo/beta_used": 0.009568197652697563,
"beta_dpo/beta_used_raw": 0.009568197652697563,
"beta_dpo/gap_mean": 1.0808396339416504,
"beta_dpo/gap_std": 3.0677380561828613,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1424083769633508,
"grad_norm": 15.759063720703125,
"learning_rate": 4.975839738974473e-07,
"logits/chosen": 1.5441210269927979,
"logits/rejected": 1.3784618377685547,
"loss": 5.5019,
"step": 68
},
{
"beta_dpo/beta_used": 0.011599601246416569,
"beta_dpo/beta_used_raw": 0.011599601246416569,
"beta_dpo/gap_mean": 1.4021799564361572,
"beta_dpo/gap_std": 3.188746213912964,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.14450261780104712,
"grad_norm": 22.499710083007812,
"learning_rate": 4.97323429461901e-07,
"logits/chosen": 1.9849984645843506,
"logits/rejected": 1.8482412099838257,
"loss": 5.4511,
"step": 69
},
{
"beta_dpo/beta_used": 0.009116853587329388,
"beta_dpo/beta_used_raw": 0.009116853587329388,
"beta_dpo/gap_mean": 1.547209620475769,
"beta_dpo/gap_std": 3.23995304107666,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.14659685863874344,
"grad_norm": 15.340811729431152,
"learning_rate": 4.970496218214204e-07,
"logits/chosen": 2.295590400695801,
"logits/rejected": 2.3875482082366943,
"loss": 5.4922,
"step": 70
},
{
"beta_dpo/beta_used": 0.012471513822674751,
"beta_dpo/beta_used_raw": 0.012471513822674751,
"beta_dpo/gap_mean": 1.582148551940918,
"beta_dpo/gap_std": 3.453483819961548,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1486910994764398,
"grad_norm": 22.772903442382812,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": 1.8594659566879272,
"logits/rejected": 1.9157780408859253,
"loss": 5.4328,
"step": 71
},
{
"beta_dpo/beta_used": 0.010649541392922401,
"beta_dpo/beta_used_raw": 0.009485064074397087,
"beta_dpo/gap_mean": 1.6831897497177124,
"beta_dpo/gap_std": 3.4016518592834473,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.15078534031413612,
"grad_norm": 19.1998348236084,
"learning_rate": 4.964622763700252e-07,
"logits/chosen": 1.8293884992599487,
"logits/rejected": 1.892337679862976,
"loss": 5.4513,
"step": 72
},
{
"beta_dpo/beta_used": 0.011073922738432884,
"beta_dpo/beta_used_raw": 0.010616803541779518,
"beta_dpo/gap_mean": 1.6742221117019653,
"beta_dpo/gap_std": 3.5703773498535156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15287958115183245,
"grad_norm": 20.304622650146484,
"learning_rate": 4.961487700566646e-07,
"logits/chosen": 2.2375855445861816,
"logits/rejected": 2.2253012657165527,
"loss": 5.4517,
"step": 73
},
{
"beta_dpo/beta_used": 0.009733829647302628,
"beta_dpo/beta_used_raw": 0.009427759796380997,
"beta_dpo/gap_mean": 1.560795783996582,
"beta_dpo/gap_std": 3.745507001876831,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.1549738219895288,
"grad_norm": 20.791471481323242,
"learning_rate": 4.958220635317885e-07,
"logits/chosen": 1.8168758153915405,
"logits/rejected": 1.7319445610046387,
"loss": 5.4672,
"step": 74
},
{
"beta_dpo/beta_used": 0.013009906746447086,
"beta_dpo/beta_used_raw": 0.013009906746447086,
"beta_dpo/gap_mean": 1.750954031944275,
"beta_dpo/gap_std": 3.6934804916381836,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.15706806282722513,
"grad_norm": 23.435768127441406,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": 1.8880510330200195,
"logits/rejected": 1.9295786619186401,
"loss": 5.4219,
"step": 75
},
{
"beta_dpo/beta_used": 0.007087262813001871,
"beta_dpo/beta_used_raw": 0.007036793977022171,
"beta_dpo/gap_mean": 2.2242462635040283,
"beta_dpo/gap_std": 3.95930814743042,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.15916230366492146,
"grad_norm": 17.705944061279297,
"learning_rate": 4.951291206355559e-07,
"logits/chosen": 2.0245938301086426,
"logits/rejected": 1.793765902519226,
"loss": 5.4853,
"step": 76
},
{
"beta_dpo/beta_used": 0.008353885263204575,
"beta_dpo/beta_used_raw": 0.008143781684339046,
"beta_dpo/gap_mean": 2.021268844604492,
"beta_dpo/gap_std": 4.135770797729492,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.1612565445026178,
"grad_norm": 16.503738403320312,
"learning_rate": 4.947629214246236e-07,
"logits/chosen": 2.1751253604888916,
"logits/rejected": 2.1524720191955566,
"loss": 5.4772,
"step": 77
},
{
"beta_dpo/beta_used": 0.011361459270119667,
"beta_dpo/beta_used_raw": 0.011361459270119667,
"beta_dpo/gap_mean": 2.2091753482818604,
"beta_dpo/gap_std": 4.447847843170166,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.16335078534031414,
"grad_norm": 19.320999145507812,
"learning_rate": 4.943835963210323e-07,
"logits/chosen": 1.7212610244750977,
"logits/rejected": 1.7153496742248535,
"loss": 5.4244,
"step": 78
},
{
"beta_dpo/beta_used": 0.009612835012376308,
"beta_dpo/beta_used_raw": 0.00958208180963993,
"beta_dpo/gap_mean": 2.3663156032562256,
"beta_dpo/gap_std": 4.715466022491455,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.16544502617801046,
"grad_norm": 19.817529678344727,
"learning_rate": 4.939911656668361e-07,
"logits/chosen": 1.9014170169830322,
"logits/rejected": 2.1795027256011963,
"loss": 5.4218,
"step": 79
},
{
"beta_dpo/beta_used": 0.009250715374946594,
"beta_dpo/beta_used_raw": 0.008980360813438892,
"beta_dpo/gap_mean": 2.0934667587280273,
"beta_dpo/gap_std": 5.130978584289551,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16753926701570682,
"grad_norm": 18.116151809692383,
"learning_rate": 4.935856505068998e-07,
"logits/chosen": 1.5658341646194458,
"logits/rejected": 1.7265154123306274,
"loss": 5.4494,
"step": 80
},
{
"beta_dpo/beta_used": 0.009883089922368526,
"beta_dpo/beta_used_raw": 0.009712887927889824,
"beta_dpo/gap_mean": 2.5941665172576904,
"beta_dpo/gap_std": 5.163574695587158,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.16963350785340314,
"grad_norm": 22.016693115234375,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": 1.742193579673767,
"logits/rejected": 1.9251035451889038,
"loss": 5.2421,
"step": 81
},
{
"beta_dpo/beta_used": 0.009547875262796879,
"beta_dpo/beta_used_raw": 0.009547875262796879,
"beta_dpo/gap_mean": 2.4227218627929688,
"beta_dpo/gap_std": 5.073668956756592,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.17172774869109947,
"grad_norm": 17.96396255493164,
"learning_rate": 4.92735454356513e-07,
"logits/chosen": 1.9680440425872803,
"logits/rejected": 1.9148989915847778,
"loss": 5.4469,
"step": 82
},
{
"beta_dpo/beta_used": 0.010678245685994625,
"beta_dpo/beta_used_raw": 0.009905948303639889,
"beta_dpo/gap_mean": 2.5397074222564697,
"beta_dpo/gap_std": 5.242867469787598,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.17382198952879582,
"grad_norm": 23.018129348754883,
"learning_rate": 4.922908189595017e-07,
"logits/chosen": 1.5621941089630127,
"logits/rejected": 1.5305424928665161,
"loss": 5.3852,
"step": 83
},
{
"beta_dpo/beta_used": 0.006417885888367891,
"beta_dpo/beta_used_raw": 0.006086358800530434,
"beta_dpo/gap_mean": 2.7024130821228027,
"beta_dpo/gap_std": 5.565805435180664,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.17591623036649215,
"grad_norm": 13.125260353088379,
"learning_rate": 4.918331902411841e-07,
"logits/chosen": 2.024345874786377,
"logits/rejected": 1.9076447486877441,
"loss": 5.4801,
"step": 84
},
{
"beta_dpo/beta_used": 0.009424247778952122,
"beta_dpo/beta_used_raw": 0.008895869366824627,
"beta_dpo/gap_mean": 2.2540838718414307,
"beta_dpo/gap_std": 5.414524555206299,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.17801047120418848,
"grad_norm": 18.945358276367188,
"learning_rate": 4.913625927427995e-07,
"logits/chosen": 1.51369047164917,
"logits/rejected": 1.6780593395233154,
"loss": 5.4333,
"step": 85
},
{
"beta_dpo/beta_used": 0.013801836408674717,
"beta_dpo/beta_used_raw": 0.013801836408674717,
"beta_dpo/gap_mean": 2.4163331985473633,
"beta_dpo/gap_std": 5.740031719207764,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.18010471204188483,
"grad_norm": 25.516857147216797,
"learning_rate": 4.908790517010636e-07,
"logits/chosen": 1.8556016683578491,
"logits/rejected": 1.872323751449585,
"loss": 5.3655,
"step": 86
},
{
"beta_dpo/beta_used": 0.008744290098547935,
"beta_dpo/beta_used_raw": 0.008744290098547935,
"beta_dpo/gap_mean": 2.9491662979125977,
"beta_dpo/gap_std": 5.92836856842041,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.18219895287958116,
"grad_norm": 20.971223831176758,
"learning_rate": 4.903825930468148e-07,
"logits/chosen": 1.6977579593658447,
"logits/rejected": 1.6770415306091309,
"loss": 5.4258,
"step": 87
},
{
"beta_dpo/beta_used": 0.007864508777856827,
"beta_dpo/beta_used_raw": 0.007664060685783625,
"beta_dpo/gap_mean": 3.0257012844085693,
"beta_dpo/gap_std": 5.952022552490234,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.18429319371727748,
"grad_norm": 15.7445068359375,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": 1.5104684829711914,
"logits/rejected": 1.357150912284851,
"loss": 5.4513,
"step": 88
},
{
"beta_dpo/beta_used": 0.010421731509268284,
"beta_dpo/beta_used_raw": 0.01035550981760025,
"beta_dpo/gap_mean": 2.823183536529541,
"beta_dpo/gap_std": 6.035218238830566,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.18638743455497384,
"grad_norm": 21.113414764404297,
"learning_rate": 4.893510300863676e-07,
"logits/chosen": 1.9621143341064453,
"logits/rejected": 1.8874907493591309,
"loss": 5.402,
"step": 89
},
{
"beta_dpo/beta_used": 0.012045778334140778,
"beta_dpo/beta_used_raw": 0.010188662447035313,
"beta_dpo/gap_mean": 2.964503288269043,
"beta_dpo/gap_std": 5.843700408935547,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.18848167539267016,
"grad_norm": 30.074321746826172,
"learning_rate": 4.8881598109976e-07,
"logits/chosen": 2.1660492420196533,
"logits/rejected": 2.0563719272613525,
"loss": 5.3301,
"step": 90
},
{
"beta_dpo/beta_used": 0.007549135014414787,
"beta_dpo/beta_used_raw": 0.005311334040015936,
"beta_dpo/gap_mean": 2.952354669570923,
"beta_dpo/gap_std": 6.251888751983643,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1905759162303665,
"grad_norm": 17.551513671875,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": 1.2380826473236084,
"logits/rejected": 1.557425618171692,
"loss": 5.2785,
"step": 91
},
{
"beta_dpo/beta_used": 0.015128381550312042,
"beta_dpo/beta_used_raw": 0.014704037457704544,
"beta_dpo/gap_mean": 2.930189847946167,
"beta_dpo/gap_std": 6.301963806152344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19267015706806281,
"grad_norm": 30.674835205078125,
"learning_rate": 4.877074915775048e-07,
"logits/chosen": 1.6860748529434204,
"logits/rejected": 1.4988112449645996,
"loss": 5.2723,
"step": 92
},
{
"beta_dpo/beta_used": 0.006954543758183718,
"beta_dpo/beta_used_raw": 0.0063597094267606735,
"beta_dpo/gap_mean": 3.009707450866699,
"beta_dpo/gap_std": 6.455717086791992,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.19476439790575917,
"grad_norm": 12.83521556854248,
"learning_rate": 4.871341104867864e-07,
"logits/chosen": 1.9297364950180054,
"logits/rejected": 1.8627700805664062,
"loss": 5.4614,
"step": 93
},
{
"beta_dpo/beta_used": 0.0072138672694563866,
"beta_dpo/beta_used_raw": 0.005733566824346781,
"beta_dpo/gap_mean": 3.3237226009368896,
"beta_dpo/gap_std": 6.866450786590576,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.1968586387434555,
"grad_norm": 15.222475051879883,
"learning_rate": 4.865480126133871e-07,
"logits/chosen": 1.5820927619934082,
"logits/rejected": 1.6416268348693848,
"loss": 5.444,
"step": 94
},
{
"beta_dpo/beta_used": 0.008435830473899841,
"beta_dpo/beta_used_raw": 0.007779551669955254,
"beta_dpo/gap_mean": 3.4265336990356445,
"beta_dpo/gap_std": 7.192251205444336,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.19895287958115182,
"grad_norm": 17.31826400756836,
"learning_rate": 4.859492293879573e-07,
"logits/chosen": 1.7770836353302002,
"logits/rejected": 1.5319178104400635,
"loss": 5.4109,
"step": 95
},
{
"beta_dpo/beta_used": 0.010932082310318947,
"beta_dpo/beta_used_raw": 0.00794284138828516,
"beta_dpo/gap_mean": 3.5308783054351807,
"beta_dpo/gap_std": 7.482184886932373,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.20104712041884817,
"grad_norm": 24.832975387573242,
"learning_rate": 4.853377929214243e-07,
"logits/chosen": 1.4598766565322876,
"logits/rejected": 1.3611279726028442,
"loss": 5.3563,
"step": 96
},
{
"beta_dpo/beta_used": 0.010159716010093689,
"beta_dpo/beta_used_raw": 0.010018959641456604,
"beta_dpo/gap_mean": 3.793192148208618,
"beta_dpo/gap_std": 7.78098201751709,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2031413612565445,
"grad_norm": 22.265670776367188,
"learning_rate": 4.847137360032699e-07,
"logits/chosen": 1.5520637035369873,
"logits/rejected": 1.644052505493164,
"loss": 5.3533,
"step": 97
},
{
"beta_dpo/beta_used": 0.01027124933898449,
"beta_dpo/beta_used_raw": 0.009908015839755535,
"beta_dpo/gap_mean": 3.9612808227539062,
"beta_dpo/gap_std": 7.822225093841553,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.20523560209424083,
"grad_norm": 21.846027374267578,
"learning_rate": 4.84077092099773e-07,
"logits/chosen": 2.0662131309509277,
"logits/rejected": 2.265798807144165,
"loss": 5.3616,
"step": 98
},
{
"beta_dpo/beta_used": 0.01488437969237566,
"beta_dpo/beta_used_raw": 0.01488437969237566,
"beta_dpo/gap_mean": 3.7299928665161133,
"beta_dpo/gap_std": 8.350497245788574,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.20732984293193718,
"grad_norm": 27.384540557861328,
"learning_rate": 4.834278953522137e-07,
"logits/chosen": 1.9069733619689941,
"logits/rejected": 1.8735466003417969,
"loss": 5.251,
"step": 99
},
{
"beta_dpo/beta_used": 0.005023906007409096,
"beta_dpo/beta_used_raw": 0.002925662323832512,
"beta_dpo/gap_mean": 4.102505207061768,
"beta_dpo/gap_std": 8.151671409606934,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.2094240837696335,
"grad_norm": 16.64201545715332,
"learning_rate": 4.827661805750437e-07,
"logits/chosen": 1.9069080352783203,
"logits/rejected": 1.840613842010498,
"loss": 5.4547,
"step": 100
},
{
"beta_dpo/beta_used": 0.013062715530395508,
"beta_dpo/beta_used_raw": 0.013062715530395508,
"beta_dpo/gap_mean": 3.8761510848999023,
"beta_dpo/gap_std": 8.57790756225586,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.21151832460732983,
"grad_norm": 25.09943389892578,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 1.3813724517822266,
"logits/rejected": 1.6055908203125,
"loss": 5.3,
"step": 101
},
{
"beta_dpo/beta_used": 0.014250491745769978,
"beta_dpo/beta_used_raw": 0.011926423758268356,
"beta_dpo/gap_mean": 4.320952892303467,
"beta_dpo/gap_std": 8.283108711242676,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.2136125654450262,
"grad_norm": 24.078140258789062,
"learning_rate": 4.814053395442932e-07,
"logits/chosen": 1.7069716453552246,
"logits/rejected": 1.822311520576477,
"loss": 5.2401,
"step": 102
},
{
"beta_dpo/beta_used": 0.006543359719216824,
"beta_dpo/beta_used_raw": 0.003442541928961873,
"beta_dpo/gap_mean": 4.461350917816162,
"beta_dpo/gap_std": 8.508588790893555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2157068062827225,
"grad_norm": 14.553688049316406,
"learning_rate": 4.807062862684873e-07,
"logits/chosen": 2.264915943145752,
"logits/rejected": 2.3848659992218018,
"loss": 5.4299,
"step": 103
},
{
"beta_dpo/beta_used": 0.007411661557853222,
"beta_dpo/beta_used_raw": 0.006676441989839077,
"beta_dpo/gap_mean": 3.8371684551239014,
"beta_dpo/gap_std": 9.153058052062988,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.21780104712041884,
"grad_norm": 15.451645851135254,
"learning_rate": 4.799948609147061e-07,
"logits/chosen": 1.8409569263458252,
"logits/rejected": 1.6925066709518433,
"loss": 5.4174,
"step": 104
},
{
"beta_dpo/beta_used": 0.01699206791818142,
"beta_dpo/beta_used_raw": 0.016010824590921402,
"beta_dpo/gap_mean": 4.9135966300964355,
"beta_dpo/gap_std": 8.913808822631836,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.2198952879581152,
"grad_norm": 37.052093505859375,
"learning_rate": 4.792711016345321e-07,
"logits/chosen": 1.8707003593444824,
"logits/rejected": 1.7518517971038818,
"loss": 5.0469,
"step": 105
},
{
"beta_dpo/beta_used": 0.00902323704212904,
"beta_dpo/beta_used_raw": 0.007059420458972454,
"beta_dpo/gap_mean": 4.627331256866455,
"beta_dpo/gap_std": 9.46343994140625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22198952879581152,
"grad_norm": 24.8957576751709,
"learning_rate": 4.785350472409791e-07,
"logits/chosen": 1.8109657764434814,
"logits/rejected": 2.01666522026062,
"loss": 5.3041,
"step": 106
},
{
"beta_dpo/beta_used": 0.009695657528936863,
"beta_dpo/beta_used_raw": 0.007410034071654081,
"beta_dpo/gap_mean": 5.23702335357666,
"beta_dpo/gap_std": 9.842565536499023,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.22408376963350785,
"grad_norm": 23.953954696655273,
"learning_rate": 4.777867372064105e-07,
"logits/chosen": 1.8471797704696655,
"logits/rejected": 1.797261357307434,
"loss": 5.2947,
"step": 107
},
{
"beta_dpo/beta_used": 0.01452508196234703,
"beta_dpo/beta_used_raw": 0.014417744241654873,
"beta_dpo/gap_mean": 5.950323581695557,
"beta_dpo/gap_std": 9.602670669555664,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2261780104712042,
"grad_norm": 28.78997039794922,
"learning_rate": 4.770262116604223e-07,
"logits/chosen": 1.781799077987671,
"logits/rejected": 1.9572784900665283,
"loss": 5.1167,
"step": 108
},
{
"beta_dpo/beta_used": 0.0066536241210997105,
"beta_dpo/beta_used_raw": 0.004698293283581734,
"beta_dpo/gap_mean": 6.354887962341309,
"beta_dpo/gap_std": 10.01487922668457,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22827225130890053,
"grad_norm": 13.27180004119873,
"learning_rate": 4.7625351138769166e-07,
"logits/chosen": 1.9066269397735596,
"logits/rejected": 1.9160687923431396,
"loss": 5.3954,
"step": 109
},
{
"beta_dpo/beta_used": 0.010541049763560295,
"beta_dpo/beta_used_raw": 0.009356118738651276,
"beta_dpo/gap_mean": 6.276027202606201,
"beta_dpo/gap_std": 11.113080978393555,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.23036649214659685,
"grad_norm": 22.47654151916504,
"learning_rate": 4.75468677825789e-07,
"logits/chosen": 1.6488604545593262,
"logits/rejected": 1.6686369180679321,
"loss": 5.2445,
"step": 110
},
{
"beta_dpo/beta_used": 0.010099717415869236,
"beta_dpo/beta_used_raw": 0.01006684172898531,
"beta_dpo/gap_mean": 6.5077595710754395,
"beta_dpo/gap_std": 11.19198989868164,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.2324607329842932,
"grad_norm": 23.18780517578125,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": 1.820462942123413,
"logits/rejected": 1.9046530723571777,
"loss": 5.2581,
"step": 111
},
{
"beta_dpo/beta_used": 0.008607706055045128,
"beta_dpo/beta_used_raw": 0.003359769470989704,
"beta_dpo/gap_mean": 5.609295845031738,
"beta_dpo/gap_std": 11.112923622131348,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.23455497382198953,
"grad_norm": 20.108413696289062,
"learning_rate": 4.7386277983585053e-07,
"logits/chosen": 1.7908120155334473,
"logits/rejected": 1.8937515020370483,
"loss": 5.3333,
"step": 112
},
{
"beta_dpo/beta_used": 0.016104042530059814,
"beta_dpo/beta_used_raw": 0.013628358021378517,
"beta_dpo/gap_mean": 6.332000255584717,
"beta_dpo/gap_std": 11.891839981079102,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.23664921465968586,
"grad_norm": 32.54283142089844,
"learning_rate": 4.7304180152725024e-07,
"logits/chosen": 1.49177885055542,
"logits/rejected": 1.6306943893432617,
"loss": 4.845,
"step": 113
},
{
"beta_dpo/beta_used": 0.007905099540948868,
"beta_dpo/beta_used_raw": 0.0018536364659667015,
"beta_dpo/gap_mean": 5.829183101654053,
"beta_dpo/gap_std": 12.018501281738281,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.2387434554973822,
"grad_norm": 22.07866859436035,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": 1.4004794359207153,
"logits/rejected": 1.3008323907852173,
"loss": 5.3357,
"step": 114
},
{
"beta_dpo/beta_used": 0.009187846444547176,
"beta_dpo/beta_used_raw": 0.003049051621928811,
"beta_dpo/gap_mean": 4.952703952789307,
"beta_dpo/gap_std": 11.791646957397461,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.24083769633507854,
"grad_norm": 19.946712493896484,
"learning_rate": 4.7136400641330245e-07,
"logits/chosen": 1.9727150201797485,
"logits/rejected": 1.7037996053695679,
"loss": 5.325,
"step": 115
},
{
"beta_dpo/beta_used": 0.011235121637582779,
"beta_dpo/beta_used_raw": 0.010504303500056267,
"beta_dpo/gap_mean": 5.199014663696289,
"beta_dpo/gap_std": 11.840551376342773,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24293193717277486,
"grad_norm": 25.633787155151367,
"learning_rate": 4.70507279583015e-07,
"logits/chosen": 1.7236762046813965,
"logits/rejected": 1.8275989294052124,
"loss": 5.2519,
"step": 116
},
{
"beta_dpo/beta_used": 0.015477584674954414,
"beta_dpo/beta_used_raw": 0.011517820879817009,
"beta_dpo/gap_mean": 5.6072611808776855,
"beta_dpo/gap_std": 11.469279289245605,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2450261780104712,
"grad_norm": 34.2017936706543,
"learning_rate": 4.6963872761652834e-07,
"logits/chosen": 1.724921703338623,
"logits/rejected": 1.5013651847839355,
"loss": 5.0554,
"step": 117
},
{
"beta_dpo/beta_used": 0.01235922146588564,
"beta_dpo/beta_used_raw": 0.007801849860697985,
"beta_dpo/gap_mean": 6.555847644805908,
"beta_dpo/gap_std": 11.524944305419922,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.24712041884816754,
"grad_norm": 32.83587646484375,
"learning_rate": 4.687583970916486e-07,
"logits/chosen": 1.7096357345581055,
"logits/rejected": 1.7614951133728027,
"loss": 5.103,
"step": 118
},
{
"beta_dpo/beta_used": 0.007942959666252136,
"beta_dpo/beta_used_raw": 0.0018032464431598783,
"beta_dpo/gap_mean": 6.371241569519043,
"beta_dpo/gap_std": 12.957239151000977,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.24921465968586387,
"grad_norm": 19.372495651245117,
"learning_rate": 4.6786633521783005e-07,
"logits/chosen": 1.8338923454284668,
"logits/rejected": 1.9390045404434204,
"loss": 5.3143,
"step": 119
},
{
"beta_dpo/beta_used": 0.009077337570488453,
"beta_dpo/beta_used_raw": 0.006467485800385475,
"beta_dpo/gap_mean": 6.747334003448486,
"beta_dpo/gap_std": 13.51995849609375,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2513089005235602,
"grad_norm": 25.99736785888672,
"learning_rate": 4.669625898336438e-07,
"logits/chosen": 1.904350757598877,
"logits/rejected": 1.7881104946136475,
"loss": 5.2818,
"step": 120
},
{
"beta_dpo/beta_used": 0.004188536666333675,
"beta_dpo/beta_used_raw": -0.0010998877696692944,
"beta_dpo/gap_mean": 5.929210662841797,
"beta_dpo/gap_std": 12.944700241088867,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.2534031413612565,
"grad_norm": 11.07016372680664,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": 1.178752064704895,
"logits/rejected": 1.4918150901794434,
"loss": 5.4539,
"step": 121
},
{
"beta_dpo/beta_used": 0.013262229040265083,
"beta_dpo/beta_used_raw": 0.010668408125638962,
"beta_dpo/gap_mean": 6.120506286621094,
"beta_dpo/gap_std": 13.898996353149414,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.2554973821989529,
"grad_norm": 27.32390785217285,
"learning_rate": 4.651202430186092e-07,
"logits/chosen": 1.6907187700271606,
"logits/rejected": 2.047647714614868,
"loss": 5.1692,
"step": 122
},
{
"beta_dpo/beta_used": 0.02133483625948429,
"beta_dpo/beta_used_raw": 0.01663101837038994,
"beta_dpo/gap_mean": 6.646225929260254,
"beta_dpo/gap_std": 14.434886932373047,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25759162303664923,
"grad_norm": 43.08592987060547,
"learning_rate": 4.6418174038722924e-07,
"logits/chosen": 1.7687194347381592,
"logits/rejected": 1.6279195547103882,
"loss": 4.832,
"step": 123
},
{
"beta_dpo/beta_used": 0.010348731651902199,
"beta_dpo/beta_used_raw": 0.00513859186321497,
"beta_dpo/gap_mean": 7.770158290863037,
"beta_dpo/gap_std": 14.987278938293457,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.25968586387434556,
"grad_norm": 27.494583129882812,
"learning_rate": 4.6323175183912023e-07,
"logits/chosen": 1.4107732772827148,
"logits/rejected": 1.5245213508605957,
"loss": 5.1637,
"step": 124
},
{
"beta_dpo/beta_used": 0.01517592091113329,
"beta_dpo/beta_used_raw": 0.009712353348731995,
"beta_dpo/gap_mean": 7.219732284545898,
"beta_dpo/gap_std": 14.99057388305664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2617801047120419,
"grad_norm": 35.12456130981445,
"learning_rate": 4.6227032831928483e-07,
"logits/chosen": 1.578749418258667,
"logits/rejected": 1.5831409692764282,
"loss": 5.0641,
"step": 125
},
{
"beta_dpo/beta_used": 0.014101858250796795,
"beta_dpo/beta_used_raw": 0.006124613806605339,
"beta_dpo/gap_mean": 8.286654472351074,
"beta_dpo/gap_std": 15.358405113220215,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.2638743455497382,
"grad_norm": 30.269481658935547,
"learning_rate": 4.612975213859487e-07,
"logits/chosen": 2.050217866897583,
"logits/rejected": 2.2472779750823975,
"loss": 4.999,
"step": 126
},
{
"beta_dpo/beta_used": 0.01756615750491619,
"beta_dpo/beta_used_raw": 0.016226252540946007,
"beta_dpo/gap_mean": 8.151988983154297,
"beta_dpo/gap_std": 15.966252326965332,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.26596858638743454,
"grad_norm": 35.137901306152344,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": 1.3663495779037476,
"logits/rejected": 1.3178493976593018,
"loss": 4.8628,
"step": 127
},
{
"beta_dpo/beta_used": 0.012780067510902882,
"beta_dpo/beta_used_raw": 0.012262159027159214,
"beta_dpo/gap_mean": 9.522705078125,
"beta_dpo/gap_std": 15.977328300476074,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.2680628272251309,
"grad_norm": 26.821195602416992,
"learning_rate": 4.5931796656116837e-07,
"logits/chosen": 1.4073151350021362,
"logits/rejected": 1.3889837265014648,
"loss": 5.0196,
"step": 128
},
{
"beta_dpo/beta_used": 0.013786004856228828,
"beta_dpo/beta_used_raw": 0.006445377133786678,
"beta_dpo/gap_mean": 9.859175682067871,
"beta_dpo/gap_std": 16.836477279663086,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.27015706806282724,
"grad_norm": 30.80266761779785,
"learning_rate": 4.5831132482724193e-07,
"logits/chosen": 1.4743335247039795,
"logits/rejected": 1.6113927364349365,
"loss": 4.9661,
"step": 129
},
{
"beta_dpo/beta_used": 0.013385320082306862,
"beta_dpo/beta_used_raw": 0.007840610109269619,
"beta_dpo/gap_mean": 9.868795394897461,
"beta_dpo/gap_std": 16.45522117614746,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.27225130890052357,
"grad_norm": 31.25225067138672,
"learning_rate": 4.5729351198915705e-07,
"logits/chosen": 1.6542197465896606,
"logits/rejected": 1.9003280401229858,
"loss": 4.9968,
"step": 130
},
{
"beta_dpo/beta_used": 0.011940027587115765,
"beta_dpo/beta_used_raw": 0.007653850130736828,
"beta_dpo/gap_mean": 8.79969596862793,
"beta_dpo/gap_std": 16.86931037902832,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.2743455497382199,
"grad_norm": 26.647390365600586,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": 1.2588789463043213,
"logits/rejected": 1.1883281469345093,
"loss": 5.1746,
"step": 131
},
{
"beta_dpo/beta_used": 0.020324911922216415,
"beta_dpo/beta_used_raw": 0.009501131251454353,
"beta_dpo/gap_mean": 8.400039672851562,
"beta_dpo/gap_std": 17.63036346435547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2764397905759162,
"grad_norm": 42.30400085449219,
"learning_rate": 4.5522459192551166e-07,
"logits/chosen": 1.688079833984375,
"logits/rejected": 1.7301361560821533,
"loss": 4.7249,
"step": 132
},
{
"beta_dpo/beta_used": 0.027873020619153976,
"beta_dpo/beta_used_raw": 0.021997135132551193,
"beta_dpo/gap_mean": 10.159527778625488,
"beta_dpo/gap_std": 18.784109115600586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.27853403141361255,
"grad_norm": 44.038734436035156,
"learning_rate": 4.541735956498554e-07,
"logits/chosen": 1.7651947736740112,
"logits/rejected": 1.7092256546020508,
"loss": 4.3015,
"step": 133
},
{
"beta_dpo/beta_used": 0.009234755299985409,
"beta_dpo/beta_used_raw": 0.00529387965798378,
"beta_dpo/gap_mean": 9.394585609436035,
"beta_dpo/gap_std": 17.975656509399414,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.2806282722513089,
"grad_norm": 26.31687355041504,
"learning_rate": 4.5311165016389914e-07,
"logits/chosen": 2.0336687564849854,
"logits/rejected": 2.0945892333984375,
"loss": 5.1635,
"step": 134
},
{
"beta_dpo/beta_used": 0.018918566405773163,
"beta_dpo/beta_used_raw": 0.01316812727600336,
"beta_dpo/gap_mean": 9.845601081848145,
"beta_dpo/gap_std": 16.85881805419922,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.28272251308900526,
"grad_norm": 37.51054382324219,
"learning_rate": 4.520388124165564e-07,
"logits/chosen": 1.188499927520752,
"logits/rejected": 0.9699570536613464,
"loss": 4.7303,
"step": 135
},
{
"beta_dpo/beta_used": 0.009488210082054138,
"beta_dpo/beta_used_raw": 0.004158595576882362,
"beta_dpo/gap_mean": 10.281312942504883,
"beta_dpo/gap_std": 17.496814727783203,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2848167539267016,
"grad_norm": 22.803377151489258,
"learning_rate": 4.5095513994085974e-07,
"logits/chosen": 1.2178832292556763,
"logits/rejected": 1.4434417486190796,
"loss": 5.1349,
"step": 136
},
{
"beta_dpo/beta_used": 0.01097769383341074,
"beta_dpo/beta_used_raw": 0.00757699366658926,
"beta_dpo/gap_mean": 9.332605361938477,
"beta_dpo/gap_std": 18.028961181640625,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.2869109947643979,
"grad_norm": 26.255542755126953,
"learning_rate": 4.498606908508753e-07,
"logits/chosen": 1.823258876800537,
"logits/rejected": 1.6405431032180786,
"loss": 5.0908,
"step": 137
},
{
"beta_dpo/beta_used": 0.008300930261611938,
"beta_dpo/beta_used_raw": -0.002543874317780137,
"beta_dpo/gap_mean": 9.568643569946289,
"beta_dpo/gap_std": 18.384599685668945,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.28900523560209423,
"grad_norm": 22.109146118164062,
"learning_rate": 4.487555238385862e-07,
"logits/chosen": 1.5952801704406738,
"logits/rejected": 1.5838592052459717,
"loss": 5.217,
"step": 138
},
{
"beta_dpo/beta_used": 0.011962666176259518,
"beta_dpo/beta_used_raw": 0.004802809562534094,
"beta_dpo/gap_mean": 7.955426216125488,
"beta_dpo/gap_std": 19.22389793395996,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.29109947643979056,
"grad_norm": 25.960317611694336,
"learning_rate": 4.476396981707453e-07,
"logits/chosen": 1.4421442747116089,
"logits/rejected": 1.5927166938781738,
"loss": 5.1662,
"step": 139
},
{
"beta_dpo/beta_used": 0.02191855013370514,
"beta_dpo/beta_used_raw": 0.017846662551164627,
"beta_dpo/gap_mean": 8.63882064819336,
"beta_dpo/gap_std": 19.29082679748535,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.2931937172774869,
"grad_norm": 49.216670989990234,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": 1.5183682441711426,
"logits/rejected": 1.5717380046844482,
"loss": 4.6103,
"step": 140
},
{
"beta_dpo/beta_used": 0.014003738760948181,
"beta_dpo/beta_used_raw": 0.006822553928941488,
"beta_dpo/gap_mean": 10.601947784423828,
"beta_dpo/gap_std": 19.063888549804688,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.29528795811518327,
"grad_norm": 43.43609619140625,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": 1.4985511302947998,
"logits/rejected": 1.5825482606887817,
"loss": 4.9577,
"step": 141
},
{
"beta_dpo/beta_used": 0.022530585527420044,
"beta_dpo/beta_used_raw": 0.0178590789437294,
"beta_dpo/gap_mean": 10.459101676940918,
"beta_dpo/gap_std": 20.326278686523438,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2973821989528796,
"grad_norm": 39.85797882080078,
"learning_rate": 4.4422887045602674e-07,
"logits/chosen": 2.03916335105896,
"logits/rejected": 1.778942584991455,
"loss": 4.5715,
"step": 142
},
{
"beta_dpo/beta_used": 0.016412286087870598,
"beta_dpo/beta_used_raw": 0.009996837005019188,
"beta_dpo/gap_mean": 11.491534233093262,
"beta_dpo/gap_std": 21.220121383666992,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2994764397905759,
"grad_norm": 45.14045333862305,
"learning_rate": 4.4307101421701755e-07,
"logits/chosen": 1.4854329824447632,
"logits/rejected": 1.3263810873031616,
"loss": 4.7507,
"step": 143
},
{
"beta_dpo/beta_used": 0.009764298796653748,
"beta_dpo/beta_used_raw": -0.003037895541638136,
"beta_dpo/gap_mean": 12.049786567687988,
"beta_dpo/gap_std": 21.212291717529297,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.30157068062827225,
"grad_norm": 27.531478881835938,
"learning_rate": 4.419028041654559e-07,
"logits/chosen": 1.4089610576629639,
"logits/rejected": 1.3612356185913086,
"loss": 5.076,
"step": 144
},
{
"beta_dpo/beta_used": 0.015810877084732056,
"beta_dpo/beta_used_raw": 0.0027779447846114635,
"beta_dpo/gap_mean": 12.608784675598145,
"beta_dpo/gap_std": 21.368688583374023,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.3036649214659686,
"grad_norm": 30.809558868408203,
"learning_rate": 4.4072430294890166e-07,
"logits/chosen": 1.900479793548584,
"logits/rejected": 1.9564039707183838,
"loss": 4.7396,
"step": 145
},
{
"beta_dpo/beta_used": 0.005745714530348778,
"beta_dpo/beta_used_raw": -0.005364367738366127,
"beta_dpo/gap_mean": 11.638813018798828,
"beta_dpo/gap_std": 20.36126708984375,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.3057591623036649,
"grad_norm": 19.06623077392578,
"learning_rate": 4.395355737667985e-07,
"logits/chosen": 1.4604260921478271,
"logits/rejected": 1.6670466661453247,
"loss": 5.2938,
"step": 146
},
{
"beta_dpo/beta_used": 0.01017170213162899,
"beta_dpo/beta_used_raw": -0.009537998586893082,
"beta_dpo/gap_mean": 10.316466331481934,
"beta_dpo/gap_std": 20.63652992248535,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3078534031413613,
"grad_norm": 28.077390670776367,
"learning_rate": 4.3833668036708483e-07,
"logits/chosen": 1.601604700088501,
"logits/rejected": 1.5656179189682007,
"loss": 5.1406,
"step": 147
},
{
"beta_dpo/beta_used": 0.013707359321415424,
"beta_dpo/beta_used_raw": 0.004445759579539299,
"beta_dpo/gap_mean": 10.2113618850708,
"beta_dpo/gap_std": 21.985990524291992,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.3099476439790576,
"grad_norm": 33.60511016845703,
"learning_rate": 4.3712768704277524e-07,
"logits/chosen": 1.3828201293945312,
"logits/rejected": 1.3478338718414307,
"loss": 5.0395,
"step": 148
},
{
"beta_dpo/beta_used": 0.008657879196107388,
"beta_dpo/beta_used_raw": 0.0010744923492893577,
"beta_dpo/gap_mean": 11.151147842407227,
"beta_dpo/gap_std": 20.73192024230957,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.31204188481675393,
"grad_norm": 28.912668228149414,
"learning_rate": 4.3590865862851263e-07,
"logits/chosen": 2.108185291290283,
"logits/rejected": 1.9332281351089478,
"loss": 5.2272,
"step": 149
},
{
"beta_dpo/beta_used": 0.01538037694990635,
"beta_dpo/beta_used_raw": 0.010280387476086617,
"beta_dpo/gap_mean": 11.221325874328613,
"beta_dpo/gap_std": 20.35310173034668,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31413612565445026,
"grad_norm": 42.4512939453125,
"learning_rate": 4.346796604970912e-07,
"logits/chosen": 1.8120979070663452,
"logits/rejected": 1.7387409210205078,
"loss": 4.8116,
"step": 150
},
{
"beta_dpo/beta_used": 0.028699517250061035,
"beta_dpo/beta_used_raw": 0.02786320261657238,
"beta_dpo/gap_mean": 12.776216506958008,
"beta_dpo/gap_std": 21.87693977355957,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3162303664921466,
"grad_norm": 46.40315628051758,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": 1.5828508138656616,
"logits/rejected": 1.6035374402999878,
"loss": 4.197,
"step": 151
},
{
"beta_dpo/beta_used": 0.014542932622134686,
"beta_dpo/beta_used_raw": 9.965314529836178e-05,
"beta_dpo/gap_mean": 13.169672966003418,
"beta_dpo/gap_std": 21.826007843017578,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.3183246073298429,
"grad_norm": 33.306884765625,
"learning_rate": 4.3219201924364323e-07,
"logits/chosen": 1.3182780742645264,
"logits/rejected": 1.7138738632202148,
"loss": 4.8325,
"step": 152
},
{
"beta_dpo/beta_used": 0.02487529069185257,
"beta_dpo/beta_used_raw": 0.022432954981923103,
"beta_dpo/gap_mean": 15.099176406860352,
"beta_dpo/gap_std": 21.7235050201416,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.3204188481675393,
"grad_norm": 43.83867263793945,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": 1.5923478603363037,
"logits/rejected": 1.5436244010925293,
"loss": 4.2459,
"step": 153
},
{
"beta_dpo/beta_used": 0.019335608929395676,
"beta_dpo/beta_used_raw": 0.007598421536386013,
"beta_dpo/gap_mean": 15.192681312561035,
"beta_dpo/gap_std": 23.77366828918457,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3225130890052356,
"grad_norm": 33.363792419433594,
"learning_rate": 4.2966529689388064e-07,
"logits/chosen": 1.4466509819030762,
"logits/rejected": 1.4517470598220825,
"loss": 4.5467,
"step": 154
},
{
"beta_dpo/beta_used": 0.018129050731658936,
"beta_dpo/beta_used_raw": 0.009811091236770153,
"beta_dpo/gap_mean": 13.158918380737305,
"beta_dpo/gap_std": 22.92918586730957,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.32460732984293195,
"grad_norm": 34.454673767089844,
"learning_rate": 4.2838744935687716e-07,
"logits/chosen": 1.3940773010253906,
"logits/rejected": 1.3722490072250366,
"loss": 4.5952,
"step": 155
},
{
"beta_dpo/beta_used": 0.022653408348560333,
"beta_dpo/beta_used_raw": 0.017730802297592163,
"beta_dpo/gap_mean": 13.508572578430176,
"beta_dpo/gap_std": 24.86406135559082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3267015706806283,
"grad_norm": 49.66926574707031,
"learning_rate": 4.271000354423425e-07,
"logits/chosen": 1.7816330194473267,
"logits/rejected": 1.626936912536621,
"loss": 4.4733,
"step": 156
},
{
"beta_dpo/beta_used": 0.00911460816860199,
"beta_dpo/beta_used_raw": -0.00761047936975956,
"beta_dpo/gap_mean": 13.641767501831055,
"beta_dpo/gap_std": 25.110754013061523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3287958115183246,
"grad_norm": 28.902727127075195,
"learning_rate": 4.258031241903777e-07,
"logits/chosen": 1.8037209510803223,
"logits/rejected": 1.9432283639907837,
"loss": 5.0592,
"step": 157
},
{
"beta_dpo/beta_used": 0.022043395787477493,
"beta_dpo/beta_used_raw": 0.013519931584596634,
"beta_dpo/gap_mean": 12.658366203308105,
"beta_dpo/gap_std": 24.050304412841797,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.3308900523560209,
"grad_norm": 49.30216979980469,
"learning_rate": 4.2449678515039743e-07,
"logits/chosen": 1.9826464653015137,
"logits/rejected": 2.0838711261749268,
"loss": 4.5538,
"step": 158
},
{
"beta_dpo/beta_used": 0.013070004992187023,
"beta_dpo/beta_used_raw": 0.00028916902374476194,
"beta_dpo/gap_mean": 11.834725379943848,
"beta_dpo/gap_std": 25.340810775756836,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.33298429319371725,
"grad_norm": 54.22214126586914,
"learning_rate": 4.2318108837739986e-07,
"logits/chosen": 1.4999477863311768,
"logits/rejected": 1.369155764579773,
"loss": 5.1162,
"step": 159
},
{
"beta_dpo/beta_used": 0.024159716442227364,
"beta_dpo/beta_used_raw": 0.009181090630590916,
"beta_dpo/gap_mean": 13.555554389953613,
"beta_dpo/gap_std": 24.396202087402344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.33507853403141363,
"grad_norm": 49.655757904052734,
"learning_rate": 4.218561044282098e-07,
"logits/chosen": 1.9025671482086182,
"logits/rejected": 1.5475167036056519,
"loss": 4.3937,
"step": 160
},
{
"beta_dpo/beta_used": 0.026369977742433548,
"beta_dpo/beta_used_raw": 0.018488148227334023,
"beta_dpo/gap_mean": 14.321226119995117,
"beta_dpo/gap_std": 25.79440689086914,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.33717277486910996,
"grad_norm": 56.462730407714844,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": 1.4102540016174316,
"logits/rejected": 1.2628462314605713,
"loss": 4.4445,
"step": 161
},
{
"beta_dpo/beta_used": 0.026266392320394516,
"beta_dpo/beta_used_raw": 0.02344740927219391,
"beta_dpo/gap_mean": 15.67480182647705,
"beta_dpo/gap_std": 26.169410705566406,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.3392670157068063,
"grad_norm": 64.92961120605469,
"learning_rate": 4.1917855971495763e-07,
"logits/chosen": 1.5759161710739136,
"logits/rejected": 1.4259589910507202,
"loss": 4.3731,
"step": 162
},
{
"beta_dpo/beta_used": 0.010873702354729176,
"beta_dpo/beta_used_raw": -0.004741042852401733,
"beta_dpo/gap_mean": 15.373876571655273,
"beta_dpo/gap_std": 24.578004837036133,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.3413612565445026,
"grad_norm": 35.616493225097656,
"learning_rate": 4.1782614253949255e-07,
"logits/chosen": 1.7332031726837158,
"logits/rejected": 1.7425578832626343,
"loss": 4.9473,
"step": 163
},
{
"beta_dpo/beta_used": 0.025458887219429016,
"beta_dpo/beta_used_raw": 0.015234654769301414,
"beta_dpo/gap_mean": 14.888280868530273,
"beta_dpo/gap_std": 24.105310440063477,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.34345549738219894,
"grad_norm": 43.64400863647461,
"learning_rate": 4.164647253573289e-07,
"logits/chosen": 1.4580204486846924,
"logits/rejected": 1.6339696645736694,
"loss": 4.1504,
"step": 164
},
{
"beta_dpo/beta_used": 0.010639484040439129,
"beta_dpo/beta_used_raw": -0.005685774143785238,
"beta_dpo/gap_mean": 14.408177375793457,
"beta_dpo/gap_std": 23.938827514648438,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.34554973821989526,
"grad_norm": 29.951501846313477,
"learning_rate": 4.1509438117713863e-07,
"logits/chosen": 2.0571203231811523,
"logits/rejected": 2.0520873069763184,
"loss": 4.9575,
"step": 165
},
{
"beta_dpo/beta_used": 0.013327265158295631,
"beta_dpo/beta_used_raw": 0.0013559209182858467,
"beta_dpo/gap_mean": 12.96614933013916,
"beta_dpo/gap_std": 25.120412826538086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.34764397905759165,
"grad_norm": 36.66180419921875,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 1.6311808824539185,
"logits/rejected": 1.59664785861969,
"loss": 4.864,
"step": 166
},
{
"beta_dpo/beta_used": 0.03245996683835983,
"beta_dpo/beta_used_raw": 0.031837042421102524,
"beta_dpo/gap_mean": 12.544686317443848,
"beta_dpo/gap_std": 25.848405838012695,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.34973821989528797,
"grad_norm": 92.97169494628906,
"learning_rate": 4.123272062470633e-07,
"logits/chosen": 1.7561887502670288,
"logits/rejected": 1.5244758129119873,
"loss": 4.5144,
"step": 167
},
{
"beta_dpo/beta_used": 0.029823748394846916,
"beta_dpo/beta_used_raw": 0.022257408127188683,
"beta_dpo/gap_mean": 15.493486404418945,
"beta_dpo/gap_std": 25.659543991088867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3518324607329843,
"grad_norm": 77.38569641113281,
"learning_rate": 4.1093052389237174e-07,
"logits/chosen": 1.3179136514663696,
"logits/rejected": 1.1715956926345825,
"loss": 4.0093,
"step": 168
},
{
"beta_dpo/beta_used": 0.01944730058312416,
"beta_dpo/beta_used_raw": 0.01372382789850235,
"beta_dpo/gap_mean": 16.43326187133789,
"beta_dpo/gap_std": 25.575986862182617,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.3539267015706806,
"grad_norm": 51.683170318603516,
"learning_rate": 4.0952521132208267e-07,
"logits/chosen": 1.7002696990966797,
"logits/rejected": 1.8345009088516235,
"loss": 4.4362,
"step": 169
},
{
"beta_dpo/beta_used": 0.003313018474727869,
"beta_dpo/beta_used_raw": -0.008983142673969269,
"beta_dpo/gap_mean": 18.35196304321289,
"beta_dpo/gap_std": 25.07719612121582,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.35602094240837695,
"grad_norm": 15.952840805053711,
"learning_rate": 4.081113438988443e-07,
"logits/chosen": 1.7776952981948853,
"logits/rejected": 1.684997797012329,
"loss": 5.3302,
"step": 170
},
{
"beta_dpo/beta_used": 0.015446186996996403,
"beta_dpo/beta_used_raw": -0.0005397915374487638,
"beta_dpo/gap_mean": 17.90646743774414,
"beta_dpo/gap_std": 25.070568084716797,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.3581151832460733,
"grad_norm": 43.073421478271484,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": 1.6446658372879028,
"logits/rejected": 1.5069741010665894,
"loss": 4.6088,
"step": 171
},
{
"beta_dpo/beta_used": 0.009129172191023827,
"beta_dpo/beta_used_raw": -0.007493500132113695,
"beta_dpo/gap_mean": 15.301614761352539,
"beta_dpo/gap_std": 25.80316925048828,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.36020942408376966,
"grad_norm": 37.880577087402344,
"learning_rate": 4.0525824823390043e-07,
"logits/chosen": 1.5476915836334229,
"logits/rejected": 1.720083236694336,
"loss": 5.1515,
"step": 172
},
{
"beta_dpo/beta_used": 0.022744204849004745,
"beta_dpo/beta_used_raw": 0.012280027382075787,
"beta_dpo/gap_mean": 14.178143501281738,
"beta_dpo/gap_std": 26.050079345703125,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.362303664921466,
"grad_norm": 45.3817024230957,
"learning_rate": 4.0381917299505686e-07,
"logits/chosen": 1.6695926189422607,
"logits/rejected": 1.337355136871338,
"loss": 4.4431,
"step": 173
},
{
"beta_dpo/beta_used": 0.0274057500064373,
"beta_dpo/beta_used_raw": 0.015672199428081512,
"beta_dpo/gap_mean": 16.109161376953125,
"beta_dpo/gap_std": 25.606597900390625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3643979057591623,
"grad_norm": 50.70249557495117,
"learning_rate": 4.0237184890078243e-07,
"logits/chosen": 2.1374263763427734,
"logits/rejected": 1.9051423072814941,
"loss": 4.1047,
"step": 174
},
{
"beta_dpo/beta_used": 0.022395484149456024,
"beta_dpo/beta_used_raw": 0.020498108118772507,
"beta_dpo/gap_mean": 16.25571632385254,
"beta_dpo/gap_std": 25.667404174804688,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.36649214659685864,
"grad_norm": 41.90084457397461,
"learning_rate": 4.00916353566676e-07,
"logits/chosen": 1.5944123268127441,
"logits/rejected": 1.6246697902679443,
"loss": 4.3686,
"step": 175
},
{
"beta_dpo/beta_used": 0.0224157627671957,
"beta_dpo/beta_used_raw": 0.011731607839465141,
"beta_dpo/gap_mean": 13.99099349975586,
"beta_dpo/gap_std": 27.471248626708984,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.36858638743455496,
"grad_norm": 63.70330047607422,
"learning_rate": 3.994527650465352e-07,
"logits/chosen": 1.1375683546066284,
"logits/rejected": 1.2096847295761108,
"loss": 4.6342,
"step": 176
},
{
"beta_dpo/beta_used": 0.01675129495561123,
"beta_dpo/beta_used_raw": 0.0006211861036717892,
"beta_dpo/gap_mean": 11.935150146484375,
"beta_dpo/gap_std": 28.26276397705078,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.3706806282722513,
"grad_norm": 45.043968200683594,
"learning_rate": 3.979811618281705e-07,
"logits/chosen": 1.7941234111785889,
"logits/rejected": 1.5880272388458252,
"loss": 4.8643,
"step": 177
},
{
"beta_dpo/beta_used": 0.025348788127303123,
"beta_dpo/beta_used_raw": 0.017674200236797333,
"beta_dpo/gap_mean": 14.811019897460938,
"beta_dpo/gap_std": 28.847448348999023,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.37277486910994767,
"grad_norm": 72.1207046508789,
"learning_rate": 3.9650162282919654e-07,
"logits/chosen": 1.5051298141479492,
"logits/rejected": 1.527164101600647,
"loss": 4.3474,
"step": 178
},
{
"beta_dpo/beta_used": 0.024870071560144424,
"beta_dpo/beta_used_raw": 0.0016013816930353642,
"beta_dpo/gap_mean": 15.476740837097168,
"beta_dpo/gap_std": 27.874025344848633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.374869109947644,
"grad_norm": 51.20316696166992,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": 1.2467797994613647,
"logits/rejected": 1.2580769062042236,
"loss": 4.336,
"step": 179
},
{
"beta_dpo/beta_used": 0.03025144338607788,
"beta_dpo/beta_used_raw": 0.0288880355656147,
"beta_dpo/gap_mean": 15.403278350830078,
"beta_dpo/gap_std": 27.956090927124023,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.3769633507853403,
"grad_norm": 47.122596740722656,
"learning_rate": 3.935190552834828e-07,
"logits/chosen": 1.592002034187317,
"logits/rejected": 1.4925694465637207,
"loss": 4.0441,
"step": 180
},
{
"beta_dpo/beta_used": 0.020815353840589523,
"beta_dpo/beta_used_raw": 0.016023779287934303,
"beta_dpo/gap_mean": 16.58497428894043,
"beta_dpo/gap_std": 27.86528205871582,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37905759162303665,
"grad_norm": 44.79503631591797,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": 1.3529762029647827,
"logits/rejected": 1.3037437200546265,
"loss": 4.3222,
"step": 181
},
{
"beta_dpo/beta_used": 0.023737944662570953,
"beta_dpo/beta_used_raw": 0.017001213505864143,
"beta_dpo/gap_mean": 17.020750045776367,
"beta_dpo/gap_std": 27.084413528442383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.381151832460733,
"grad_norm": 51.928287506103516,
"learning_rate": 3.90505702185e-07,
"logits/chosen": 1.4569286108016968,
"logits/rejected": 1.4212331771850586,
"loss": 4.1784,
"step": 182
},
{
"beta_dpo/beta_used": 0.01689002849161625,
"beta_dpo/beta_used_raw": 0.011375264264643192,
"beta_dpo/gap_mean": 18.06576919555664,
"beta_dpo/gap_std": 28.06887435913086,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.3832460732984293,
"grad_norm": 32.73753356933594,
"learning_rate": 3.889876827928156e-07,
"logits/chosen": 1.1345239877700806,
"logits/rejected": 1.2237826585769653,
"loss": 4.4976,
"step": 183
},
{
"beta_dpo/beta_used": 0.02527700364589691,
"beta_dpo/beta_used_raw": 0.02064402773976326,
"beta_dpo/gap_mean": 20.417850494384766,
"beta_dpo/gap_std": 29.51577377319336,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.38534031413612563,
"grad_norm": 52.284393310546875,
"learning_rate": 3.874622099130087e-07,
"logits/chosen": 1.6561375856399536,
"logits/rejected": 1.639233946800232,
"loss": 4.2447,
"step": 184
},
{
"beta_dpo/beta_used": 0.006265235599130392,
"beta_dpo/beta_used_raw": -0.01659151166677475,
"beta_dpo/gap_mean": 20.119701385498047,
"beta_dpo/gap_std": 30.129091262817383,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.387434554973822,
"grad_norm": 34.30731201171875,
"learning_rate": 3.859293653520604e-07,
"logits/chosen": 1.819935917854309,
"logits/rejected": 1.873971939086914,
"loss": 5.1,
"step": 185
},
{
"beta_dpo/beta_used": 0.016711510717868805,
"beta_dpo/beta_used_raw": 0.00014704966451972723,
"beta_dpo/gap_mean": 17.954086303710938,
"beta_dpo/gap_std": 29.141178131103516,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.38952879581151834,
"grad_norm": 43.99291229248047,
"learning_rate": 3.8438923131177237e-07,
"logits/chosen": 1.7304484844207764,
"logits/rejected": 1.6357572078704834,
"loss": 4.5823,
"step": 186
},
{
"beta_dpo/beta_used": 0.010872665792703629,
"beta_dpo/beta_used_raw": -0.0031126337125897408,
"beta_dpo/gap_mean": 16.949188232421875,
"beta_dpo/gap_std": 30.313583374023438,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.39162303664921466,
"grad_norm": 29.05012321472168,
"learning_rate": 3.828418903848593e-07,
"logits/chosen": 1.5062894821166992,
"logits/rejected": 1.626598834991455,
"loss": 4.9126,
"step": 187
},
{
"beta_dpo/beta_used": 0.021171947941184044,
"beta_dpo/beta_used_raw": 0.010028712451457977,
"beta_dpo/gap_mean": 16.50074577331543,
"beta_dpo/gap_std": 30.938051223754883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.393717277486911,
"grad_norm": 45.898658752441406,
"learning_rate": 3.812874255505191e-07,
"logits/chosen": 1.5269906520843506,
"logits/rejected": 1.3458209037780762,
"loss": 4.5338,
"step": 188
},
{
"beta_dpo/beta_used": 0.03484039008617401,
"beta_dpo/beta_used_raw": 0.022049371153116226,
"beta_dpo/gap_mean": 17.477540969848633,
"beta_dpo/gap_std": 29.527908325195312,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.3958115183246073,
"grad_norm": 70.90847778320312,
"learning_rate": 3.797259201699833e-07,
"logits/chosen": 1.5551743507385254,
"logits/rejected": 1.6014527082443237,
"loss": 3.8358,
"step": 189
},
{
"beta_dpo/beta_used": 0.0200703926384449,
"beta_dpo/beta_used_raw": 0.012458120472729206,
"beta_dpo/gap_mean": 18.339256286621094,
"beta_dpo/gap_std": 28.938512802124023,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.39790575916230364,
"grad_norm": 56.81261444091797,
"learning_rate": 3.781574579820464e-07,
"logits/chosen": 0.9362454414367676,
"logits/rejected": 0.9899096488952637,
"loss": 4.414,
"step": 190
},
{
"beta_dpo/beta_used": 0.01700519025325775,
"beta_dpo/beta_used_raw": -0.005064443219453096,
"beta_dpo/gap_mean": 18.290935516357422,
"beta_dpo/gap_std": 30.99585723876953,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.4,
"grad_norm": 56.54753494262695,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": 1.404714822769165,
"logits/rejected": 1.5215625762939453,
"loss": 4.8064,
"step": 191
},
{
"beta_dpo/beta_used": 0.031894296407699585,
"beta_dpo/beta_used_raw": 0.020558489486575127,
"beta_dpo/gap_mean": 16.527379989624023,
"beta_dpo/gap_std": 31.373319625854492,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.40209424083769635,
"grad_norm": 70.80916595458984,
"learning_rate": 3.75e-07,
"logits/chosen": 1.5337512493133545,
"logits/rejected": 1.7164283990859985,
"loss": 4.1842,
"step": 192
},
{
"beta_dpo/beta_used": 0.01636136882007122,
"beta_dpo/beta_used_raw": 0.005046369507908821,
"beta_dpo/gap_mean": 15.377167701721191,
"beta_dpo/gap_std": 31.938879013061523,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4041884816753927,
"grad_norm": 55.80176544189453,
"learning_rate": 3.734111735307796e-07,
"logits/chosen": 1.7271709442138672,
"logits/rejected": 1.558451533317566,
"loss": 4.6877,
"step": 193
},
{
"beta_dpo/beta_used": 0.007611277513206005,
"beta_dpo/beta_used_raw": -0.01890621893107891,
"beta_dpo/gap_mean": 14.705482482910156,
"beta_dpo/gap_std": 30.904098510742188,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.406282722513089,
"grad_norm": 22.856273651123047,
"learning_rate": 3.7181572889485623e-07,
"logits/chosen": 1.3973853588104248,
"logits/rejected": 1.4764728546142578,
"loss": 5.1599,
"step": 194
},
{
"beta_dpo/beta_used": 0.031484756618738174,
"beta_dpo/beta_used_raw": 0.0071922894567251205,
"beta_dpo/gap_mean": 13.331430435180664,
"beta_dpo/gap_std": 30.900182723999023,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4083769633507853,
"grad_norm": 66.02396392822266,
"learning_rate": 3.7021375165108377e-07,
"logits/chosen": 1.272679328918457,
"logits/rejected": 1.2566474676132202,
"loss": 4.3933,
"step": 195
},
{
"beta_dpo/beta_used": 0.03322502225637436,
"beta_dpo/beta_used_raw": 0.02791755273938179,
"beta_dpo/gap_mean": 14.905118942260742,
"beta_dpo/gap_std": 30.485837936401367,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.41047120418848165,
"grad_norm": 79.28156280517578,
"learning_rate": 3.6860532770864005e-07,
"logits/chosen": 1.275534749031067,
"logits/rejected": 1.4435292482376099,
"loss": 4.0546,
"step": 196
},
{
"beta_dpo/beta_used": 0.04939180985093117,
"beta_dpo/beta_used_raw": 0.043553970754146576,
"beta_dpo/gap_mean": 18.400535583496094,
"beta_dpo/gap_std": 30.686927795410156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41256544502617803,
"grad_norm": 67.8173828125,
"learning_rate": 3.6699054332241985e-07,
"logits/chosen": 1.38494873046875,
"logits/rejected": 1.254716157913208,
"loss": 3.338,
"step": 197
},
{
"beta_dpo/beta_used": 0.022166196256875992,
"beta_dpo/beta_used_raw": 0.007883399724960327,
"beta_dpo/gap_mean": 20.444957733154297,
"beta_dpo/gap_std": 32.35297393798828,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.41465968586387436,
"grad_norm": 60.04436492919922,
"learning_rate": 3.653694850884091e-07,
"logits/chosen": 1.9333720207214355,
"logits/rejected": 2.020900011062622,
"loss": 4.4855,
"step": 198
},
{
"beta_dpo/beta_used": 0.02409629337489605,
"beta_dpo/beta_used_raw": 0.021340614184737206,
"beta_dpo/gap_mean": 19.7289981842041,
"beta_dpo/gap_std": 33.021812438964844,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.4167539267015707,
"grad_norm": 47.563514709472656,
"learning_rate": 3.6374223993904124e-07,
"logits/chosen": 0.8853669762611389,
"logits/rejected": 0.8789573907852173,
"loss": 4.2058,
"step": 199
},
{
"beta_dpo/beta_used": 0.019850242882966995,
"beta_dpo/beta_used_raw": 0.0021575437858700752,
"beta_dpo/gap_mean": 18.26460075378418,
"beta_dpo/gap_std": 35.18665313720703,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.418848167539267,
"grad_norm": 75.98006439208984,
"learning_rate": 3.621088951385353e-07,
"logits/chosen": 1.4998607635498047,
"logits/rejected": 1.4999333620071411,
"loss": 4.6811,
"step": 200
},
{
"epoch": 0.418848167539267,
"eval_beta_dpo/beta_used": 0.027261212468147278,
"eval_beta_dpo/beta_used_raw": 0.011497409082949162,
"eval_beta_dpo/gap_mean": 17.349489212036133,
"eval_beta_dpo/gap_std": 36.29584884643555,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 1.4600857496261597,
"eval_logits/rejected": 1.4735403060913086,
"eval_loss": 0.582222044467926,
"eval_runtime": 93.942,
"eval_samples_per_second": 21.29,
"eval_steps_per_second": 1.331,
"step": 200
},
{
"beta_dpo/beta_used": 0.015480000525712967,
"beta_dpo/beta_used_raw": 0.0017268508672714233,
"beta_dpo/gap_mean": 16.916603088378906,
"beta_dpo/gap_std": 34.051475524902344,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.42094240837696334,
"grad_norm": 41.899105072021484,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": 1.3517783880233765,
"logits/rejected": 1.4856456518173218,
"loss": 4.8087,
"step": 201
},
{
"beta_dpo/beta_used": 0.03667040914297104,
"beta_dpo/beta_used_raw": 0.02494371309876442,
"beta_dpo/gap_mean": 18.696678161621094,
"beta_dpo/gap_std": 34.44628143310547,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.42303664921465967,
"grad_norm": 94.09333038330078,
"learning_rate": 3.588242572718162e-07,
"logits/chosen": 1.9142837524414062,
"logits/rejected": 1.8261678218841553,
"loss": 4.2233,
"step": 202
},
{
"beta_dpo/beta_used": 0.017151907086372375,
"beta_dpo/beta_used_raw": 0.00911116972565651,
"beta_dpo/gap_mean": 16.54568862915039,
"beta_dpo/gap_std": 32.38970184326172,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.42513089005235605,
"grad_norm": 44.56381607055664,
"learning_rate": 3.571731403507635e-07,
"logits/chosen": 1.4302637577056885,
"logits/rejected": 1.2982755899429321,
"loss": 4.5763,
"step": 203
},
{
"beta_dpo/beta_used": 0.034039054065942764,
"beta_dpo/beta_used_raw": 0.0323847234249115,
"beta_dpo/gap_mean": 18.076196670532227,
"beta_dpo/gap_std": 31.370433807373047,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.4272251308900524,
"grad_norm": 71.95513153076172,
"learning_rate": 3.5551627605944746e-07,
"logits/chosen": 2.1505026817321777,
"logits/rejected": 2.025639772415161,
"loss": 3.8071,
"step": 204
},
{
"beta_dpo/beta_used": 0.027348071336746216,
"beta_dpo/beta_used_raw": 0.006836746819317341,
"beta_dpo/gap_mean": 18.946754455566406,
"beta_dpo/gap_std": 31.32244110107422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4293193717277487,
"grad_norm": 45.75480651855469,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": 1.419930100440979,
"logits/rejected": 1.7142930030822754,
"loss": 4.1724,
"step": 205
},
{
"beta_dpo/beta_used": 0.016552381217479706,
"beta_dpo/beta_used_raw": -0.0049156793393194675,
"beta_dpo/gap_mean": 19.863826751708984,
"beta_dpo/gap_std": 30.71218490600586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.431413612565445,
"grad_norm": 41.705875396728516,
"learning_rate": 3.5218566107988867e-07,
"logits/chosen": 1.124336838722229,
"logits/rejected": 1.3756214380264282,
"loss": 4.77,
"step": 206
},
{
"beta_dpo/beta_used": 0.015663469210267067,
"beta_dpo/beta_used_raw": 0.0052419002167880535,
"beta_dpo/gap_mean": 17.88925552368164,
"beta_dpo/gap_std": 31.518335342407227,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.43350785340314135,
"grad_norm": 69.29541015625,
"learning_rate": 3.505120890024195e-07,
"logits/chosen": 1.4753804206848145,
"logits/rejected": 1.621216058731079,
"loss": 4.8643,
"step": 207
},
{
"beta_dpo/beta_used": 0.02139691449701786,
"beta_dpo/beta_used_raw": 0.005481313914060593,
"beta_dpo/gap_mean": 16.749000549316406,
"beta_dpo/gap_std": 32.0452880859375,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.4356020942408377,
"grad_norm": 42.13248825073242,
"learning_rate": 3.4883312676665534e-07,
"logits/chosen": 1.683328628540039,
"logits/rejected": 1.6666276454925537,
"loss": 4.4627,
"step": 208
},
{
"beta_dpo/beta_used": 0.022991986945271492,
"beta_dpo/beta_used_raw": 0.0012511502718552947,
"beta_dpo/gap_mean": 16.64447784423828,
"beta_dpo/gap_std": 31.43779945373535,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.437696335078534,
"grad_norm": 47.651954650878906,
"learning_rate": 3.4714886441024573e-07,
"logits/chosen": 1.4982630014419556,
"logits/rejected": 1.2422916889190674,
"loss": 4.65,
"step": 209
},
{
"beta_dpo/beta_used": 0.023505035787820816,
"beta_dpo/beta_used_raw": 0.01351526565849781,
"beta_dpo/gap_mean": 16.755630493164062,
"beta_dpo/gap_std": 30.364093780517578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4397905759162304,
"grad_norm": 40.224185943603516,
"learning_rate": 3.454593922550693e-07,
"logits/chosen": 1.622258186340332,
"logits/rejected": 1.7734078168869019,
"loss": 4.4717,
"step": 210
},
{
"beta_dpo/beta_used": 0.017476221546530724,
"beta_dpo/beta_used_raw": 0.009140146896243095,
"beta_dpo/gap_mean": 18.972339630126953,
"beta_dpo/gap_std": 29.722959518432617,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4418848167539267,
"grad_norm": 31.73094940185547,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": 1.476683497428894,
"logits/rejected": 1.5253487825393677,
"loss": 4.7,
"step": 211
},
{
"beta_dpo/beta_used": 0.019394179806113243,
"beta_dpo/beta_used_raw": 0.015454288572072983,
"beta_dpo/gap_mean": 19.75035858154297,
"beta_dpo/gap_std": 29.714906692504883,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.44397905759162304,
"grad_norm": 41.0455322265625,
"learning_rate": 3.4206518122800055e-07,
"logits/chosen": 1.2970361709594727,
"logits/rejected": 1.37529456615448,
"loss": 4.3472,
"step": 212
},
{
"beta_dpo/beta_used": 0.011834348551928997,
"beta_dpo/beta_used_raw": -0.017926108092069626,
"beta_dpo/gap_mean": 17.426942825317383,
"beta_dpo/gap_std": 29.695297241210938,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.44607329842931936,
"grad_norm": 33.91777038574219,
"learning_rate": 3.403606243773448e-07,
"logits/chosen": 1.5579262971878052,
"logits/rejected": 1.68187415599823,
"loss": 4.9313,
"step": 213
},
{
"beta_dpo/beta_used": 0.01894894242286682,
"beta_dpo/beta_used_raw": 0.013838745653629303,
"beta_dpo/gap_mean": 15.725707054138184,
"beta_dpo/gap_std": 30.105939865112305,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.4481675392670157,
"grad_norm": 39.40108871459961,
"learning_rate": 3.3865122176063385e-07,
"logits/chosen": 1.7685400247573853,
"logits/rejected": 1.8661746978759766,
"loss": 4.5791,
"step": 214
},
{
"beta_dpo/beta_used": 0.010954681783914566,
"beta_dpo/beta_used_raw": -0.014796811155974865,
"beta_dpo/gap_mean": 16.314573287963867,
"beta_dpo/gap_std": 32.43828201293945,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.450261780104712,
"grad_norm": 35.527313232421875,
"learning_rate": 3.3693706504794243e-07,
"logits/chosen": 2.244570732116699,
"logits/rejected": 2.2803215980529785,
"loss": 4.9752,
"step": 215
},
{
"beta_dpo/beta_used": 0.03374152258038521,
"beta_dpo/beta_used_raw": 0.027103085070848465,
"beta_dpo/gap_mean": 17.088348388671875,
"beta_dpo/gap_std": 31.838451385498047,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.4523560209424084,
"grad_norm": 109.9549331665039,
"learning_rate": 3.3521824616429284e-07,
"logits/chosen": 1.6181087493896484,
"logits/rejected": 1.51048743724823,
"loss": 4.0202,
"step": 216
},
{
"beta_dpo/beta_used": 0.02223392203450203,
"beta_dpo/beta_used_raw": 0.01797131821513176,
"beta_dpo/gap_mean": 18.80224609375,
"beta_dpo/gap_std": 33.52192306518555,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.4544502617801047,
"grad_norm": 46.53620910644531,
"learning_rate": 3.334948572847253e-07,
"logits/chosen": 1.5461317300796509,
"logits/rejected": 1.6692814826965332,
"loss": 4.1595,
"step": 217
},
{
"beta_dpo/beta_used": 0.0231946911662817,
"beta_dpo/beta_used_raw": 0.0011494825594127178,
"beta_dpo/gap_mean": 20.61969757080078,
"beta_dpo/gap_std": 33.30976486206055,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.45654450261780105,
"grad_norm": 93.0323715209961,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": 1.7362779378890991,
"logits/rejected": 1.9851727485656738,
"loss": 4.5681,
"step": 218
},
{
"beta_dpo/beta_used": 0.01022251509130001,
"beta_dpo/beta_used_raw": -0.0047258916310966015,
"beta_dpo/gap_mean": 20.800567626953125,
"beta_dpo/gap_std": 31.367717742919922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4586387434554974,
"grad_norm": 25.090171813964844,
"learning_rate": 3.300347394584172e-07,
"logits/chosen": 1.3783564567565918,
"logits/rejected": 1.4508250951766968,
"loss": 4.8685,
"step": 219
},
{
"beta_dpo/beta_used": 0.010799276642501354,
"beta_dpo/beta_used_raw": -0.0034070992842316628,
"beta_dpo/gap_mean": 21.666975021362305,
"beta_dpo/gap_std": 31.608016967773438,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.4607329842931937,
"grad_norm": 27.500185012817383,
"learning_rate": 3.2829819606729477e-07,
"logits/chosen": 2.0254147052764893,
"logits/rejected": 1.8281564712524414,
"loss": 4.886,
"step": 220
},
{
"beta_dpo/beta_used": 0.014519060961902142,
"beta_dpo/beta_used_raw": -0.009499384090304375,
"beta_dpo/gap_mean": 19.58493423461914,
"beta_dpo/gap_std": 32.41563415527344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46282722513089003,
"grad_norm": 48.36823272705078,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": 1.2991694211959839,
"logits/rejected": 1.4876360893249512,
"loss": 4.8269,
"step": 221
},
{
"beta_dpo/beta_used": 0.015203127637505531,
"beta_dpo/beta_used_raw": -0.0034404161851853132,
"beta_dpo/gap_mean": 19.56608009338379,
"beta_dpo/gap_std": 32.176658630371094,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.4649214659685864,
"grad_norm": 48.740413665771484,
"learning_rate": 3.248126059518784e-07,
"logits/chosen": 1.5680516958236694,
"logits/rejected": 1.4788140058517456,
"loss": 4.7036,
"step": 222
},
{
"beta_dpo/beta_used": 0.04362927004694939,
"beta_dpo/beta_used_raw": 0.04143592342734337,
"beta_dpo/gap_mean": 20.544513702392578,
"beta_dpo/gap_std": 32.305206298828125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46701570680628274,
"grad_norm": 91.45673370361328,
"learning_rate": 3.230637461492043e-07,
"logits/chosen": 1.3730167150497437,
"logits/rejected": 1.3536475896835327,
"loss": 3.6045,
"step": 223
},
{
"beta_dpo/beta_used": 0.027421563863754272,
"beta_dpo/beta_used_raw": 0.016309306025505066,
"beta_dpo/gap_mean": 20.78533935546875,
"beta_dpo/gap_std": 32.98493957519531,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.46910994764397906,
"grad_norm": 50.12267303466797,
"learning_rate": 3.213109681595612e-07,
"logits/chosen": 1.4133144617080688,
"logits/rejected": 1.5317778587341309,
"loss": 4.1259,
"step": 224
},
{
"beta_dpo/beta_used": 0.01402560155838728,
"beta_dpo/beta_used_raw": -0.016284221783280373,
"beta_dpo/gap_mean": 21.371601104736328,
"beta_dpo/gap_std": 34.09131622314453,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.4712041884816754,
"grad_norm": 37.89866256713867,
"learning_rate": 3.1955436597911315e-07,
"logits/chosen": 1.8815144300460815,
"logits/rejected": 1.992702603340149,
"loss": 4.6298,
"step": 225
},
{
"beta_dpo/beta_used": 0.022889500483870506,
"beta_dpo/beta_used_raw": 0.017042387276887894,
"beta_dpo/gap_mean": 17.320327758789062,
"beta_dpo/gap_std": 35.05849075317383,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.4732984293193717,
"grad_norm": 60.14391326904297,
"learning_rate": 3.1779403380910425e-07,
"logits/chosen": 1.0302306413650513,
"logits/rejected": 1.2303485870361328,
"loss": 4.3797,
"step": 226
},
{
"beta_dpo/beta_used": 0.04840033873915672,
"beta_dpo/beta_used_raw": 0.047016169875860214,
"beta_dpo/gap_mean": 18.82254409790039,
"beta_dpo/gap_std": 34.905059814453125,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.47539267015706804,
"grad_norm": 90.10204315185547,
"learning_rate": 3.160300660508064e-07,
"logits/chosen": 1.6820147037506104,
"logits/rejected": 1.8873445987701416,
"loss": 3.4083,
"step": 227
},
{
"beta_dpo/beta_used": 0.02424338273704052,
"beta_dpo/beta_used_raw": 0.009696955792605877,
"beta_dpo/gap_mean": 21.77010726928711,
"beta_dpo/gap_std": 34.2744140625,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.4774869109947644,
"grad_norm": 56.25212860107422,
"learning_rate": 3.1426255730045695e-07,
"logits/chosen": 1.5530939102172852,
"logits/rejected": 1.6357148885726929,
"loss": 4.4004,
"step": 228
},
{
"beta_dpo/beta_used": 0.03371588513255119,
"beta_dpo/beta_used_raw": 0.027584807947278023,
"beta_dpo/gap_mean": 25.558032989501953,
"beta_dpo/gap_std": 33.908870697021484,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.47958115183246075,
"grad_norm": 96.84803771972656,
"learning_rate": 3.1249160234418644e-07,
"logits/chosen": 1.348872184753418,
"logits/rejected": 1.2927398681640625,
"loss": 3.7788,
"step": 229
},
{
"beta_dpo/beta_used": 0.008082384243607521,
"beta_dpo/beta_used_raw": -0.00950661115348339,
"beta_dpo/gap_mean": 25.10620880126953,
"beta_dpo/gap_std": 34.92431640625,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4816753926701571,
"grad_norm": 20.94957160949707,
"learning_rate": 3.1071729615293424e-07,
"logits/chosen": 1.1827516555786133,
"logits/rejected": 1.1730360984802246,
"loss": 4.9364,
"step": 230
},
{
"beta_dpo/beta_used": 0.005764795932918787,
"beta_dpo/beta_used_raw": -0.024570820853114128,
"beta_dpo/gap_mean": 22.6708927154541,
"beta_dpo/gap_std": 34.03562927246094,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.4837696335078534,
"grad_norm": 38.76413345336914,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 1.2684296369552612,
"logits/rejected": 1.329715609550476,
"loss": 5.1608,
"step": 231
},
{
"beta_dpo/beta_used": 0.021905038505792618,
"beta_dpo/beta_used_raw": -0.017752759158611298,
"beta_dpo/gap_mean": 20.692659378051758,
"beta_dpo/gap_std": 33.874855041503906,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.48586387434554973,
"grad_norm": 112.87725067138672,
"learning_rate": 3.071590108427243e-07,
"logits/chosen": 1.426222562789917,
"logits/rejected": 1.5956566333770752,
"loss": 4.6128,
"step": 232
},
{
"beta_dpo/beta_used": 0.039701350033283234,
"beta_dpo/beta_used_raw": 0.035932619124650955,
"beta_dpo/gap_mean": 21.13761329650879,
"beta_dpo/gap_std": 34.44068908691406,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.48795811518324606,
"grad_norm": 89.65320587158203,
"learning_rate": 3.05375222543809e-07,
"logits/chosen": 1.137376070022583,
"logits/rejected": 1.239527940750122,
"loss": 3.6241,
"step": 233
},
{
"beta_dpo/beta_used": 0.020853759720921516,
"beta_dpo/beta_used_raw": 0.006785106845200062,
"beta_dpo/gap_mean": 22.310590744018555,
"beta_dpo/gap_std": 36.559181213378906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4900523560209424,
"grad_norm": 152.07374572753906,
"learning_rate": 3.035884646397637e-07,
"logits/chosen": 1.3747183084487915,
"logits/rejected": 1.4081201553344727,
"loss": 4.5591,
"step": 234
},
{
"beta_dpo/beta_used": 0.022356968373060226,
"beta_dpo/beta_used_raw": 0.017687149345874786,
"beta_dpo/gap_mean": 21.469078063964844,
"beta_dpo/gap_std": 38.99213790893555,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.49214659685863876,
"grad_norm": 49.92569351196289,
"learning_rate": 3.017988329489923e-07,
"logits/chosen": 1.6978657245635986,
"logits/rejected": 1.6188864707946777,
"loss": 4.497,
"step": 235
},
{
"beta_dpo/beta_used": 0.027202440425753593,
"beta_dpo/beta_used_raw": 0.013716357760131359,
"beta_dpo/gap_mean": 21.86897087097168,
"beta_dpo/gap_std": 38.970787048339844,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.4942408376963351,
"grad_norm": 51.87431335449219,
"learning_rate": 3.000064234440111e-07,
"logits/chosen": 1.4140355587005615,
"logits/rejected": 1.421186923980713,
"loss": 4.3147,
"step": 236
},
{
"beta_dpo/beta_used": 0.026827599853277206,
"beta_dpo/beta_used_raw": 0.002097531221807003,
"beta_dpo/gap_mean": 21.94005584716797,
"beta_dpo/gap_std": 36.81498718261719,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4963350785340314,
"grad_norm": 54.699974060058594,
"learning_rate": 2.9821133224630223e-07,
"logits/chosen": 1.4084728956222534,
"logits/rejected": 1.6357187032699585,
"loss": 4.0251,
"step": 237
},
{
"beta_dpo/beta_used": 0.013866505585610867,
"beta_dpo/beta_used_raw": -0.01890200935304165,
"beta_dpo/gap_mean": 23.559459686279297,
"beta_dpo/gap_std": 35.92485427856445,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.49842931937172774,
"grad_norm": 52.4506950378418,
"learning_rate": 2.964136556211588e-07,
"logits/chosen": 1.2949869632720947,
"logits/rejected": 1.249887228012085,
"loss": 4.7275,
"step": 238
},
{
"beta_dpo/beta_used": 0.02205376699566841,
"beta_dpo/beta_used_raw": 0.0070870416238904,
"beta_dpo/gap_mean": 21.201807022094727,
"beta_dpo/gap_std": 37.64961624145508,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5005235602094241,
"grad_norm": 65.6231918334961,
"learning_rate": 2.946134899725226e-07,
"logits/chosen": 1.542831540107727,
"logits/rejected": 1.6906412839889526,
"loss": 4.8225,
"step": 239
},
{
"beta_dpo/beta_used": 0.016875216737389565,
"beta_dpo/beta_used_raw": 0.013624901883304119,
"beta_dpo/gap_mean": 22.050025939941406,
"beta_dpo/gap_std": 35.68221664428711,
"beta_dpo/mask_keep_frac": 0.9375,
"epoch": 0.5026178010471204,
"grad_norm": 71.22918701171875,
"learning_rate": 2.9281093183781403e-07,
"logits/chosen": 1.3054808378219604,
"logits/rejected": 1.2251484394073486,
"loss": 4.3994,
"step": 240
},
{
"beta_dpo/beta_used": 0.010428352281451225,
"beta_dpo/beta_used_raw": -0.009664381854236126,
"beta_dpo/gap_mean": 20.70039176940918,
"beta_dpo/gap_std": 36.04539108276367,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5047120418848168,
"grad_norm": 35.78901290893555,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": 1.4216902256011963,
"logits/rejected": 1.5455743074417114,
"loss": 5.0779,
"step": 241
},
{
"beta_dpo/beta_used": 0.03037761151790619,
"beta_dpo/beta_used_raw": 0.01391815859824419,
"beta_dpo/gap_mean": 21.673847198486328,
"beta_dpo/gap_std": 35.858516693115234,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.506806282722513,
"grad_norm": 76.41270446777344,
"learning_rate": 2.891990248961871e-07,
"logits/chosen": 1.8587148189544678,
"logits/rejected": 1.6864495277404785,
"loss": 4.2734,
"step": 242
},
{
"beta_dpo/beta_used": 0.035171203315258026,
"beta_dpo/beta_used_raw": 0.02459963783621788,
"beta_dpo/gap_mean": 23.178098678588867,
"beta_dpo/gap_std": 35.096439361572266,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.5089005235602094,
"grad_norm": 77.48847198486328,
"learning_rate": 2.873898697848762e-07,
"logits/chosen": 1.6573126316070557,
"logits/rejected": 1.6771302223205566,
"loss": 3.6758,
"step": 243
},
{
"beta_dpo/beta_used": 0.02781713753938675,
"beta_dpo/beta_used_raw": 0.016341306269168854,
"beta_dpo/gap_mean": 26.562307357788086,
"beta_dpo/gap_std": 36.088531494140625,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5109947643979058,
"grad_norm": 90.8469009399414,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": 1.3104796409606934,
"logits/rejected": 1.1022838354110718,
"loss": 4.1908,
"step": 244
},
{
"beta_dpo/beta_used": 0.023681480437517166,
"beta_dpo/beta_used_raw": 0.007075564004480839,
"beta_dpo/gap_mean": 26.648090362548828,
"beta_dpo/gap_std": 35.5743522644043,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5130890052356021,
"grad_norm": 49.45195007324219,
"learning_rate": 2.837656413735479e-07,
"logits/chosen": 1.8954524993896484,
"logits/rejected": 1.5859884023666382,
"loss": 4.2197,
"step": 245
},
{
"beta_dpo/beta_used": 0.007673209998756647,
"beta_dpo/beta_used_raw": -0.03442414849996567,
"beta_dpo/gap_mean": 23.275249481201172,
"beta_dpo/gap_std": 37.69624328613281,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5151832460732985,
"grad_norm": 34.83370590209961,
"learning_rate": 2.8195076242990116e-07,
"logits/chosen": 1.075661301612854,
"logits/rejected": 0.9320878982543945,
"loss": 5.0958,
"step": 246
},
{
"beta_dpo/beta_used": 0.02569355070590973,
"beta_dpo/beta_used_raw": 0.011104905046522617,
"beta_dpo/gap_mean": 21.302507400512695,
"beta_dpo/gap_std": 37.52021789550781,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.5172774869109947,
"grad_norm": 60.37564468383789,
"learning_rate": 2.801341700638307e-07,
"logits/chosen": 1.1491472721099854,
"logits/rejected": 0.9154660105705261,
"loss": 4.1681,
"step": 247
},
{
"beta_dpo/beta_used": 0.0077388836070895195,
"beta_dpo/beta_used_raw": -0.01996331661939621,
"beta_dpo/gap_mean": 21.50804328918457,
"beta_dpo/gap_std": 37.68701934814453,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5193717277486911,
"grad_norm": 42.47214126586914,
"learning_rate": 2.7831596169367227e-07,
"logits/chosen": 1.0854613780975342,
"logits/rejected": 1.1457273960113525,
"loss": 5.0609,
"step": 248
},
{
"beta_dpo/beta_used": 0.00828784704208374,
"beta_dpo/beta_used_raw": -0.007833743467926979,
"beta_dpo/gap_mean": 20.011716842651367,
"beta_dpo/gap_std": 37.14725875854492,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5214659685863874,
"grad_norm": 41.61552810668945,
"learning_rate": 2.7649623482442274e-07,
"logits/chosen": 1.2434636354446411,
"logits/rejected": 1.2950477600097656,
"loss": 5.0897,
"step": 249
},
{
"beta_dpo/beta_used": 0.028967518359422684,
"beta_dpo/beta_used_raw": 0.017602279782295227,
"beta_dpo/gap_mean": 21.15532112121582,
"beta_dpo/gap_std": 36.99894714355469,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5235602094240838,
"grad_norm": 111.2298583984375,
"learning_rate": 2.7467508704251135e-07,
"logits/chosen": 1.5354533195495605,
"logits/rejected": 1.6301560401916504,
"loss": 4.2794,
"step": 250
},
{
"beta_dpo/beta_used": 0.02736206352710724,
"beta_dpo/beta_used_raw": 0.006099463440477848,
"beta_dpo/gap_mean": 20.01749038696289,
"beta_dpo/gap_std": 37.12480926513672,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.5256544502617801,
"grad_norm": 79.339599609375,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": 1.3763610124588013,
"logits/rejected": 1.155696988105774,
"loss": 4.4696,
"step": 251
},
{
"beta_dpo/beta_used": 0.03472306579351425,
"beta_dpo/beta_used_raw": 0.02851836569607258,
"beta_dpo/gap_mean": 22.56066131591797,
"beta_dpo/gap_std": 38.38005065917969,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5277486910994764,
"grad_norm": 57.71303939819336,
"learning_rate": 2.7102891946217994e-07,
"logits/chosen": 1.829942226409912,
"logits/rejected": 1.845513105392456,
"loss": 3.7725,
"step": 252
},
{
"beta_dpo/beta_used": 0.030697450041770935,
"beta_dpo/beta_used_raw": 0.01660301722586155,
"beta_dpo/gap_mean": 19.772396087646484,
"beta_dpo/gap_std": 39.422203063964844,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5298429319371728,
"grad_norm": 70.57060241699219,
"learning_rate": 2.692040951966617e-07,
"logits/chosen": 1.419633388519287,
"logits/rejected": 1.3010826110839844,
"loss": 4.158,
"step": 253
},
{
"beta_dpo/beta_used": 0.03239889442920685,
"beta_dpo/beta_used_raw": 0.021261408925056458,
"beta_dpo/gap_mean": 19.49216079711914,
"beta_dpo/gap_std": 36.011436462402344,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5319371727748691,
"grad_norm": 85.16039276123047,
"learning_rate": 2.6737824107379947e-07,
"logits/chosen": 1.652917504310608,
"logits/rejected": 1.5930885076522827,
"loss": 4.1323,
"step": 254
},
{
"beta_dpo/beta_used": 0.07058847695589066,
"beta_dpo/beta_used_raw": 0.0682307779788971,
"beta_dpo/gap_mean": 22.544225692749023,
"beta_dpo/gap_std": 38.23542022705078,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5340314136125655,
"grad_norm": 126.1849365234375,
"learning_rate": 2.655514550086086e-07,
"logits/chosen": 1.4259027242660522,
"logits/rejected": 1.4180747270584106,
"loss": 2.8543,
"step": 255
},
{
"beta_dpo/beta_used": 0.035115234553813934,
"beta_dpo/beta_used_raw": 0.01770986244082451,
"beta_dpo/gap_mean": 25.101337432861328,
"beta_dpo/gap_std": 40.27662658691406,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.5361256544502618,
"grad_norm": 74.76777648925781,
"learning_rate": 2.6372383496608186e-07,
"logits/chosen": 1.584543228149414,
"logits/rejected": 1.6146832704544067,
"loss": 4.0922,
"step": 256
},
{
"beta_dpo/beta_used": 0.02713741734623909,
"beta_dpo/beta_used_raw": 0.0023514775093644857,
"beta_dpo/gap_mean": 26.48859977722168,
"beta_dpo/gap_std": 40.16349792480469,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5382198952879581,
"grad_norm": 87.41287231445312,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": 1.334143042564392,
"logits/rejected": 1.4390063285827637,
"loss": 4.1405,
"step": 257
},
{
"beta_dpo/beta_used": 0.024670587852597237,
"beta_dpo/beta_used_raw": 0.006600758992135525,
"beta_dpo/gap_mean": 24.859146118164062,
"beta_dpo/gap_std": 38.38996505737305,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5403141361256545,
"grad_norm": 78.93328094482422,
"learning_rate": 2.600664850273538e-07,
"logits/chosen": 1.2462736368179321,
"logits/rejected": 1.4119253158569336,
"loss": 4.1682,
"step": 258
},
{
"beta_dpo/beta_used": 0.026468459516763687,
"beta_dpo/beta_used_raw": 0.009973703883588314,
"beta_dpo/gap_mean": 22.97103500366211,
"beta_dpo/gap_std": 37.827335357666016,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5424083769633508,
"grad_norm": 62.47282409667969,
"learning_rate": 2.582369512637302e-07,
"logits/chosen": 1.400333285331726,
"logits/rejected": 1.3363168239593506,
"loss": 4.2019,
"step": 259
},
{
"beta_dpo/beta_used": 0.0057443841360509396,
"beta_dpo/beta_used_raw": -0.03509850427508354,
"beta_dpo/gap_mean": 19.301353454589844,
"beta_dpo/gap_std": 37.98316192626953,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5445026178010471,
"grad_norm": 29.450904846191406,
"learning_rate": 2.5640697577740815e-07,
"logits/chosen": 1.2627638578414917,
"logits/rejected": 1.3713899850845337,
"loss": 5.2443,
"step": 260
},
{
"beta_dpo/beta_used": 0.02370859682559967,
"beta_dpo/beta_used_raw": 0.009769135154783726,
"beta_dpo/gap_mean": 17.073835372924805,
"beta_dpo/gap_std": 38.706729888916016,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5465968586387434,
"grad_norm": 119.15771484375,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": 0.9551135301589966,
"logits/rejected": 0.7918010354042053,
"loss": 4.8051,
"step": 261
},
{
"beta_dpo/beta_used": 0.01725778356194496,
"beta_dpo/beta_used_raw": 0.007985102012753487,
"beta_dpo/gap_mean": 19.15559959411621,
"beta_dpo/gap_std": 37.25046920776367,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5486910994764398,
"grad_norm": 42.16154479980469,
"learning_rate": 2.527460921992209e-07,
"logits/chosen": 1.7428507804870605,
"logits/rejected": 1.745199203491211,
"loss": 4.7292,
"step": 262
},
{
"beta_dpo/beta_used": 0.027581116184592247,
"beta_dpo/beta_used_raw": 0.0017390409484505653,
"beta_dpo/gap_mean": 21.374671936035156,
"beta_dpo/gap_std": 36.47187805175781,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5507853403141362,
"grad_norm": 72.0134506225586,
"learning_rate": 2.509153804294318e-07,
"logits/chosen": 1.3248748779296875,
"logits/rejected": 1.480365514755249,
"loss": 4.2062,
"step": 263
},
{
"beta_dpo/beta_used": 0.015040460973978043,
"beta_dpo/beta_used_raw": -0.002720870077610016,
"beta_dpo/gap_mean": 22.537841796875,
"beta_dpo/gap_std": 36.9581298828125,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5528795811518324,
"grad_norm": 53.91576385498047,
"learning_rate": 2.4908461957056825e-07,
"logits/chosen": 1.3922407627105713,
"logits/rejected": 1.1616618633270264,
"loss": 4.7735,
"step": 264
},
{
"beta_dpo/beta_used": 0.04024341329932213,
"beta_dpo/beta_used_raw": 0.02337898127734661,
"beta_dpo/gap_mean": 23.94507598876953,
"beta_dpo/gap_std": 36.818138122558594,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5549738219895288,
"grad_norm": 190.59609985351562,
"learning_rate": 2.4725390780077905e-07,
"logits/chosen": 1.6322290897369385,
"logits/rejected": 1.6508582830429077,
"loss": 4.2363,
"step": 265
},
{
"beta_dpo/beta_used": 0.026812460273504257,
"beta_dpo/beta_used_raw": 0.015981679782271385,
"beta_dpo/gap_mean": 23.17593002319336,
"beta_dpo/gap_std": 35.23807907104492,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5570680628272251,
"grad_norm": 115.75420379638672,
"learning_rate": 2.454233432955807e-07,
"logits/chosen": 1.3934905529022217,
"logits/rejected": 1.4551239013671875,
"loss": 4.156,
"step": 266
},
{
"beta_dpo/beta_used": 0.014945639297366142,
"beta_dpo/beta_used_raw": -0.003206442343071103,
"beta_dpo/gap_mean": 22.777759552001953,
"beta_dpo/gap_std": 35.72869873046875,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5591623036649215,
"grad_norm": 42.64310073852539,
"learning_rate": 2.435930242225919e-07,
"logits/chosen": 1.5525813102722168,
"logits/rejected": 1.673789143562317,
"loss": 4.8052,
"step": 267
},
{
"beta_dpo/beta_used": 0.030046723783016205,
"beta_dpo/beta_used_raw": 0.024244606494903564,
"beta_dpo/gap_mean": 21.284276962280273,
"beta_dpo/gap_std": 36.792415618896484,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5612565445026177,
"grad_norm": 85.80408477783203,
"learning_rate": 2.4176304873626984e-07,
"logits/chosen": 1.1172372102737427,
"logits/rejected": 1.1572062969207764,
"loss": 4.0405,
"step": 268
},
{
"beta_dpo/beta_used": 0.016361307352781296,
"beta_dpo/beta_used_raw": -0.008380460552871227,
"beta_dpo/gap_mean": 21.142919540405273,
"beta_dpo/gap_std": 36.69437789916992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5633507853403141,
"grad_norm": 30.00682258605957,
"learning_rate": 2.399335149726463e-07,
"logits/chosen": 1.3953180313110352,
"logits/rejected": 1.582595944404602,
"loss": 4.8939,
"step": 269
},
{
"beta_dpo/beta_used": 0.024136360734701157,
"beta_dpo/beta_used_raw": 0.01455269567668438,
"beta_dpo/gap_mean": 20.730382919311523,
"beta_dpo/gap_std": 38.18457794189453,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5654450261780105,
"grad_norm": 104.796630859375,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": 1.706362009048462,
"logits/rejected": 1.9905970096588135,
"loss": 4.8619,
"step": 270
},
{
"beta_dpo/beta_used": 0.015366212464869022,
"beta_dpo/beta_used_raw": -0.010098990984261036,
"beta_dpo/gap_mean": 20.525156021118164,
"beta_dpo/gap_std": 36.195465087890625,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5675392670157068,
"grad_norm": 27.481109619140625,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": 1.2522549629211426,
"logits/rejected": 1.3000314235687256,
"loss": 4.6612,
"step": 271
},
{
"beta_dpo/beta_used": 0.02246049977838993,
"beta_dpo/beta_used_raw": 0.002398681826889515,
"beta_dpo/gap_mean": 20.4349365234375,
"beta_dpo/gap_std": 35.98146438598633,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5696335078534032,
"grad_norm": 223.59896850585938,
"learning_rate": 2.344485449913914e-07,
"logits/chosen": 1.606691837310791,
"logits/rejected": 1.451743483543396,
"loss": 4.7041,
"step": 272
},
{
"beta_dpo/beta_used": 0.025656994432210922,
"beta_dpo/beta_used_raw": 0.001691313460469246,
"beta_dpo/gap_mean": 21.252532958984375,
"beta_dpo/gap_std": 34.84130096435547,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5717277486910994,
"grad_norm": 60.19879913330078,
"learning_rate": 2.3262175892620062e-07,
"logits/chosen": 1.5752846002578735,
"logits/rejected": 1.6109840869903564,
"loss": 4.3398,
"step": 273
},
{
"beta_dpo/beta_used": 0.024387702345848083,
"beta_dpo/beta_used_raw": 0.01869470439851284,
"beta_dpo/gap_mean": 22.542556762695312,
"beta_dpo/gap_std": 35.69194030761719,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.5738219895287958,
"grad_norm": 37.60686492919922,
"learning_rate": 2.3079590480333827e-07,
"logits/chosen": 1.6102871894836426,
"logits/rejected": 1.7174773216247559,
"loss": 4.1491,
"step": 274
},
{
"beta_dpo/beta_used": 0.043057817965745926,
"beta_dpo/beta_used_raw": 0.04157021641731262,
"beta_dpo/gap_mean": 24.984006881713867,
"beta_dpo/gap_std": 35.83733367919922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5759162303664922,
"grad_norm": 96.29705810546875,
"learning_rate": 2.2897108053782e-07,
"logits/chosen": 1.1287708282470703,
"logits/rejected": 1.208784818649292,
"loss": 3.0424,
"step": 275
},
{
"beta_dpo/beta_used": 0.0038480497896671295,
"beta_dpo/beta_used_raw": -0.015348054468631744,
"beta_dpo/gap_mean": 25.66550064086914,
"beta_dpo/gap_std": 33.74402618408203,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.5780104712041885,
"grad_norm": 24.424198150634766,
"learning_rate": 2.2714738398943308e-07,
"logits/chosen": 1.8258295059204102,
"logits/rejected": 1.6733819246292114,
"loss": 5.2051,
"step": 276
},
{
"beta_dpo/beta_used": 0.017688903957605362,
"beta_dpo/beta_used_raw": -0.011028681881725788,
"beta_dpo/gap_mean": 22.55120086669922,
"beta_dpo/gap_std": 35.05712890625,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5801047120418849,
"grad_norm": 33.83370590209961,
"learning_rate": 2.2532491295748865e-07,
"logits/chosen": 1.1561347246170044,
"logits/rejected": 1.3503713607788086,
"loss": 4.532,
"step": 277
},
{
"beta_dpo/beta_used": 0.019777359440922737,
"beta_dpo/beta_used_raw": -0.004533551167696714,
"beta_dpo/gap_mean": 19.028533935546875,
"beta_dpo/gap_std": 36.112735748291016,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5821989528795811,
"grad_norm": 42.212650299072266,
"learning_rate": 2.2350376517557726e-07,
"logits/chosen": 1.0686261653900146,
"logits/rejected": 1.0221307277679443,
"loss": 4.6354,
"step": 278
},
{
"beta_dpo/beta_used": 0.02981048822402954,
"beta_dpo/beta_used_raw": 0.028192678466439247,
"beta_dpo/gap_mean": 19.808574676513672,
"beta_dpo/gap_std": 35.35283660888672,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5842931937172775,
"grad_norm": 53.312747955322266,
"learning_rate": 2.2168403830632769e-07,
"logits/chosen": 1.2553820610046387,
"logits/rejected": 1.2719086408615112,
"loss": 3.9651,
"step": 279
},
{
"beta_dpo/beta_used": 0.0026543322019279003,
"beta_dpo/beta_used_raw": -0.015082788653671741,
"beta_dpo/gap_mean": 21.008886337280273,
"beta_dpo/gap_std": 34.17639923095703,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.5863874345549738,
"grad_norm": 11.17526912689209,
"learning_rate": 2.1986582993616925e-07,
"logits/chosen": 1.5121065378189087,
"logits/rejected": 1.5147109031677246,
"loss": 5.2115,
"step": 280
},
{
"beta_dpo/beta_used": 0.015546365641057491,
"beta_dpo/beta_used_raw": -0.014291130006313324,
"beta_dpo/gap_mean": 20.403629302978516,
"beta_dpo/gap_std": 34.77376174926758,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.5884816753926702,
"grad_norm": 57.08203125,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": 1.4907077550888062,
"logits/rejected": 1.448096513748169,
"loss": 4.8509,
"step": 281
},
{
"beta_dpo/beta_used": 0.013758410699665546,
"beta_dpo/beta_used_raw": -0.0017688155639916658,
"beta_dpo/gap_mean": 20.669015884399414,
"beta_dpo/gap_std": 35.69584274291992,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5905759162303665,
"grad_norm": 28.517318725585938,
"learning_rate": 2.1623435862645205e-07,
"logits/chosen": 1.7699363231658936,
"logits/rejected": 1.8309452533721924,
"loss": 5.0077,
"step": 282
},
{
"beta_dpo/beta_used": 0.028719400987029076,
"beta_dpo/beta_used_raw": 0.018162164837121964,
"beta_dpo/gap_mean": 20.43427276611328,
"beta_dpo/gap_std": 35.05901336669922,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5926701570680628,
"grad_norm": 74.51838684082031,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": 1.243952751159668,
"logits/rejected": 1.4681645631790161,
"loss": 4.1383,
"step": 283
},
{
"beta_dpo/beta_used": 0.022418132051825523,
"beta_dpo/beta_used_raw": -0.00897371955215931,
"beta_dpo/gap_mean": 20.829967498779297,
"beta_dpo/gap_std": 37.05330276489258,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5947643979057592,
"grad_norm": 53.890785217285156,
"learning_rate": 2.1261013021512378e-07,
"logits/chosen": 1.3836698532104492,
"logits/rejected": 1.3280866146087646,
"loss": 4.7208,
"step": 284
},
{
"beta_dpo/beta_used": 0.0186537504196167,
"beta_dpo/beta_used_raw": -0.003015751950442791,
"beta_dpo/gap_mean": 18.022796630859375,
"beta_dpo/gap_std": 36.89912414550781,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5968586387434555,
"grad_norm": 28.00040626525879,
"learning_rate": 2.1080097510381294e-07,
"logits/chosen": 1.706050157546997,
"logits/rejected": 1.584727168083191,
"loss": 4.632,
"step": 285
},
{
"beta_dpo/beta_used": 0.013129707425832748,
"beta_dpo/beta_used_raw": -0.00048280227929353714,
"beta_dpo/gap_mean": 19.448501586914062,
"beta_dpo/gap_std": 36.36820983886719,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.5989528795811518,
"grad_norm": 51.00930404663086,
"learning_rate": 2.089939221172446e-07,
"logits/chosen": 1.2181655168533325,
"logits/rejected": 1.2918510437011719,
"loss": 4.8983,
"step": 286
},
{
"beta_dpo/beta_used": 0.0334862619638443,
"beta_dpo/beta_used_raw": 0.031023263931274414,
"beta_dpo/gap_mean": 20.484294891357422,
"beta_dpo/gap_std": 38.072418212890625,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6010471204188481,
"grad_norm": 68.44963073730469,
"learning_rate": 2.0718906816218595e-07,
"logits/chosen": 1.4797168970108032,
"logits/rejected": 1.5804214477539062,
"loss": 4.3089,
"step": 287
},
{
"beta_dpo/beta_used": 0.031299516558647156,
"beta_dpo/beta_used_raw": 0.020958131179213524,
"beta_dpo/gap_mean": 19.536659240722656,
"beta_dpo/gap_std": 37.194252014160156,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6031413612565445,
"grad_norm": 125.42591857910156,
"learning_rate": 2.053865100274774e-07,
"logits/chosen": 1.6277129650115967,
"logits/rejected": 1.4404486417770386,
"loss": 4.2485,
"step": 288
},
{
"beta_dpo/beta_used": 0.013463410548865795,
"beta_dpo/beta_used_raw": -0.002038992242887616,
"beta_dpo/gap_mean": 18.123918533325195,
"beta_dpo/gap_std": 37.70576477050781,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6052356020942409,
"grad_norm": 50.54543685913086,
"learning_rate": 2.035863443788411e-07,
"logits/chosen": 1.6278074979782104,
"logits/rejected": 1.5724064111709595,
"loss": 4.813,
"step": 289
},
{
"beta_dpo/beta_used": 0.013063677586615086,
"beta_dpo/beta_used_raw": -0.02197786420583725,
"beta_dpo/gap_mean": 19.04131317138672,
"beta_dpo/gap_std": 35.90309524536133,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.6073298429319371,
"grad_norm": 41.749141693115234,
"learning_rate": 2.0178866775369774e-07,
"logits/chosen": 1.3940989971160889,
"logits/rejected": 1.3121880292892456,
"loss": 4.8478,
"step": 290
},
{
"beta_dpo/beta_used": 0.03433792293071747,
"beta_dpo/beta_used_raw": 0.0014921380206942558,
"beta_dpo/gap_mean": 20.723804473876953,
"beta_dpo/gap_std": 36.17911148071289,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6094240837696335,
"grad_norm": 95.32479095458984,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": 1.084555983543396,
"logits/rejected": 1.1702072620391846,
"loss": 4.7487,
"step": 291
},
{
"beta_dpo/beta_used": 0.03228276968002319,
"beta_dpo/beta_used_raw": 0.018787425011396408,
"beta_dpo/gap_mean": 20.76034164428711,
"beta_dpo/gap_std": 37.097103118896484,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6115183246073298,
"grad_norm": 67.72441864013672,
"learning_rate": 1.9820116705100775e-07,
"logits/chosen": 1.160035252571106,
"logits/rejected": 1.1472792625427246,
"loss": 3.9976,
"step": 292
},
{
"beta_dpo/beta_used": 0.02482818439602852,
"beta_dpo/beta_used_raw": -0.0007117787608876824,
"beta_dpo/gap_mean": 20.157255172729492,
"beta_dpo/gap_std": 39.040748596191406,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6136125654450262,
"grad_norm": 244.3824462890625,
"learning_rate": 1.9641153536023642e-07,
"logits/chosen": 2.0036768913269043,
"logits/rejected": 1.8342108726501465,
"loss": 4.5759,
"step": 293
},
{
"beta_dpo/beta_used": 0.02537180297076702,
"beta_dpo/beta_used_raw": 0.0016407333314418793,
"beta_dpo/gap_mean": 21.209617614746094,
"beta_dpo/gap_std": 38.50959777832031,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.6157068062827226,
"grad_norm": 76.85967254638672,
"learning_rate": 1.9462477745619106e-07,
"logits/chosen": 1.4297269582748413,
"logits/rejected": 1.5640549659729004,
"loss": 4.6346,
"step": 294
},
{
"beta_dpo/beta_used": 0.03157725930213928,
"beta_dpo/beta_used_raw": 0.02505682222545147,
"beta_dpo/gap_mean": 21.574724197387695,
"beta_dpo/gap_std": 39.374446868896484,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.6178010471204188,
"grad_norm": 86.37284851074219,
"learning_rate": 1.928409891572757e-07,
"logits/chosen": 1.1579641103744507,
"logits/rejected": 1.1256705522537231,
"loss": 4.4772,
"step": 295
},
{
"beta_dpo/beta_used": 0.03921440243721008,
"beta_dpo/beta_used_raw": 0.030128249898552895,
"beta_dpo/gap_mean": 26.082651138305664,
"beta_dpo/gap_std": 39.295570373535156,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6198952879581152,
"grad_norm": 129.71774291992188,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 1.5179616212844849,
"logits/rejected": 1.6978120803833008,
"loss": 3.8258,
"step": 296
},
{
"beta_dpo/beta_used": 0.029375022277235985,
"beta_dpo/beta_used_raw": 0.011093353852629662,
"beta_dpo/gap_mean": 27.48119354248047,
"beta_dpo/gap_std": 39.495452880859375,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6219895287958115,
"grad_norm": 118.33712768554688,
"learning_rate": 1.8928270384706582e-07,
"logits/chosen": 1.495194435119629,
"logits/rejected": 1.649183988571167,
"loss": 4.2477,
"step": 297
},
{
"beta_dpo/beta_used": 0.03636423125863075,
"beta_dpo/beta_used_raw": 0.028217561542987823,
"beta_dpo/gap_mean": 26.37271499633789,
"beta_dpo/gap_std": 39.67487335205078,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.6240837696335079,
"grad_norm": 85.49053955078125,
"learning_rate": 1.875083976558136e-07,
"logits/chosen": 1.4574960470199585,
"logits/rejected": 1.3186194896697998,
"loss": 3.8972,
"step": 298
},
{
"beta_dpo/beta_used": 0.02849549427628517,
"beta_dpo/beta_used_raw": -0.0016860419418662786,
"beta_dpo/gap_mean": 24.45018196105957,
"beta_dpo/gap_std": 39.10914993286133,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6261780104712041,
"grad_norm": 41.191104888916016,
"learning_rate": 1.8573744269954297e-07,
"logits/chosen": 1.6376529932022095,
"logits/rejected": 1.6397225856781006,
"loss": 3.844,
"step": 299
},
{
"beta_dpo/beta_used": 0.02068179100751877,
"beta_dpo/beta_used_raw": 0.005061999429017305,
"beta_dpo/gap_mean": 22.947368621826172,
"beta_dpo/gap_std": 38.15463638305664,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6282722513089005,
"grad_norm": 54.7095947265625,
"learning_rate": 1.839699339491937e-07,
"logits/chosen": 1.2076692581176758,
"logits/rejected": 1.2860641479492188,
"loss": 4.3858,
"step": 300
},
{
"beta_dpo/beta_used": 0.03375673294067383,
"beta_dpo/beta_used_raw": 0.011599482968449593,
"beta_dpo/gap_mean": 21.817138671875,
"beta_dpo/gap_std": 40.71202850341797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6303664921465969,
"grad_norm": 67.1680908203125,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": 1.5903642177581787,
"logits/rejected": 1.5883557796478271,
"loss": 3.9801,
"step": 301
},
{
"beta_dpo/beta_used": 0.031289342790842056,
"beta_dpo/beta_used_raw": 0.02020403742790222,
"beta_dpo/gap_mean": 22.630334854125977,
"beta_dpo/gap_std": 39.44662094116211,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6324607329842932,
"grad_norm": 73.67294311523438,
"learning_rate": 1.8044563402088682e-07,
"logits/chosen": 1.4647196531295776,
"logits/rejected": 1.6538636684417725,
"loss": 3.8922,
"step": 302
},
{
"beta_dpo/beta_used": 0.026227440685033798,
"beta_dpo/beta_used_raw": 0.01093749888241291,
"beta_dpo/gap_mean": 21.381053924560547,
"beta_dpo/gap_std": 40.288665771484375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6345549738219896,
"grad_norm": 75.99285888671875,
"learning_rate": 1.7868903184043885e-07,
"logits/chosen": 1.345954179763794,
"logits/rejected": 1.4914484024047852,
"loss": 4.3761,
"step": 303
},
{
"beta_dpo/beta_used": 0.024651650339365005,
"beta_dpo/beta_used_raw": 0.010574829764664173,
"beta_dpo/gap_mean": 21.974733352661133,
"beta_dpo/gap_std": 38.83090591430664,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6366492146596858,
"grad_norm": 230.4051513671875,
"learning_rate": 1.7693625385079574e-07,
"logits/chosen": 1.2385737895965576,
"logits/rejected": 1.2572718858718872,
"loss": 4.7737,
"step": 304
},
{
"beta_dpo/beta_used": 0.023414814844727516,
"beta_dpo/beta_used_raw": 0.013659648597240448,
"beta_dpo/gap_mean": 24.257299423217773,
"beta_dpo/gap_std": 38.524078369140625,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6387434554973822,
"grad_norm": 46.621604919433594,
"learning_rate": 1.7518739404812155e-07,
"logits/chosen": 1.235711932182312,
"logits/rejected": 1.2289034128189087,
"loss": 4.3571,
"step": 305
},
{
"beta_dpo/beta_used": 0.0353800505399704,
"beta_dpo/beta_used_raw": -0.00411562342196703,
"beta_dpo/gap_mean": 26.567459106445312,
"beta_dpo/gap_std": 40.30250549316406,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.6408376963350786,
"grad_norm": 185.1968536376953,
"learning_rate": 1.7344254621846017e-07,
"logits/chosen": 1.48641836643219,
"logits/rejected": 1.3792299032211304,
"loss": 4.523,
"step": 306
},
{
"beta_dpo/beta_used": 0.057250961661338806,
"beta_dpo/beta_used_raw": 0.05049164220690727,
"beta_dpo/gap_mean": 26.73577117919922,
"beta_dpo/gap_std": 40.15787124633789,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.6429319371727749,
"grad_norm": 112.26713562011719,
"learning_rate": 1.717018039327053e-07,
"logits/chosen": 1.2322039604187012,
"logits/rejected": 1.3177506923675537,
"loss": 2.6335,
"step": 307
},
{
"beta_dpo/beta_used": 0.012129316106438637,
"beta_dpo/beta_used_raw": -0.021789539605379105,
"beta_dpo/gap_mean": 25.66850471496582,
"beta_dpo/gap_std": 39.91798400878906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6450261780104712,
"grad_norm": 49.086910247802734,
"learning_rate": 1.699652605415828e-07,
"logits/chosen": 1.3670289516448975,
"logits/rejected": 1.3430283069610596,
"loss": 4.7575,
"step": 308
},
{
"beta_dpo/beta_used": 0.04727376997470856,
"beta_dpo/beta_used_raw": 0.04426693171262741,
"beta_dpo/gap_mean": 24.053421020507812,
"beta_dpo/gap_std": 41.2784309387207,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.6471204188481675,
"grad_norm": 212.80130004882812,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": 1.8778178691864014,
"logits/rejected": 1.6358754634857178,
"loss": 4.2193,
"step": 309
},
{
"beta_dpo/beta_used": 0.019439999014139175,
"beta_dpo/beta_used_raw": -0.004033832810819149,
"beta_dpo/gap_mean": 24.703720092773438,
"beta_dpo/gap_std": 41.20947265625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6492146596858639,
"grad_norm": 48.24752426147461,
"learning_rate": 1.6650514271527465e-07,
"logits/chosen": 1.368004560470581,
"logits/rejected": 1.6040199995040894,
"loss": 4.5291,
"step": 310
},
{
"beta_dpo/beta_used": 0.015120752155780792,
"beta_dpo/beta_used_raw": -0.0021106062922626734,
"beta_dpo/gap_mean": 23.902956008911133,
"beta_dpo/gap_std": 41.10802459716797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6513089005235602,
"grad_norm": 83.40555572509766,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": 1.4084839820861816,
"logits/rejected": 1.5573794841766357,
"loss": 5.0402,
"step": 311
},
{
"beta_dpo/beta_used": 0.03297141566872597,
"beta_dpo/beta_used_raw": 0.015015541575849056,
"beta_dpo/gap_mean": 25.408002853393555,
"beta_dpo/gap_std": 40.86416244506836,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6534031413612565,
"grad_norm": 73.86492156982422,
"learning_rate": 1.6306293495205755e-07,
"logits/chosen": 1.538864016532898,
"logits/rejected": 1.5750356912612915,
"loss": 4.1733,
"step": 312
},
{
"beta_dpo/beta_used": 0.026556478813290596,
"beta_dpo/beta_used_raw": 0.014360915869474411,
"beta_dpo/gap_mean": 22.71212387084961,
"beta_dpo/gap_std": 41.899532318115234,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6554973821989529,
"grad_norm": 43.307254791259766,
"learning_rate": 1.6134877823936607e-07,
"logits/chosen": 1.4833365678787231,
"logits/rejected": 1.5087875127792358,
"loss": 4.3745,
"step": 313
},
{
"beta_dpo/beta_used": 0.04993228241801262,
"beta_dpo/beta_used_raw": 0.04400447756052017,
"beta_dpo/gap_mean": 23.01084327697754,
"beta_dpo/gap_std": 41.7484245300293,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6575916230366492,
"grad_norm": 185.30311584472656,
"learning_rate": 1.5963937562265522e-07,
"logits/chosen": 1.5994868278503418,
"logits/rejected": 1.6039897203445435,
"loss": 3.9626,
"step": 314
},
{
"beta_dpo/beta_used": 0.019904792308807373,
"beta_dpo/beta_used_raw": 0.0038104329723864794,
"beta_dpo/gap_mean": 24.50067710876465,
"beta_dpo/gap_std": 41.975162506103516,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6596858638743456,
"grad_norm": 60.188743591308594,
"learning_rate": 1.5793481877199943e-07,
"logits/chosen": 1.8757685422897339,
"logits/rejected": 1.802669644355774,
"loss": 4.3242,
"step": 315
},
{
"beta_dpo/beta_used": 0.011747484095394611,
"beta_dpo/beta_used_raw": -0.011036318726837635,
"beta_dpo/gap_mean": 25.946598052978516,
"beta_dpo/gap_std": 41.94285583496094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6617801047120419,
"grad_norm": 96.64191436767578,
"learning_rate": 1.562351990976095e-07,
"logits/chosen": 1.2265623807907104,
"logits/rejected": 1.3494703769683838,
"loss": 4.9269,
"step": 316
},
{
"beta_dpo/beta_used": 0.01594529114663601,
"beta_dpo/beta_used_raw": -0.011434204876422882,
"beta_dpo/gap_mean": 25.075801849365234,
"beta_dpo/gap_std": 42.253684997558594,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.6638743455497382,
"grad_norm": 66.5694580078125,
"learning_rate": 1.5454060774493065e-07,
"logits/chosen": 1.4082281589508057,
"logits/rejected": 1.4196900129318237,
"loss": 4.7001,
"step": 317
},
{
"beta_dpo/beta_used": 0.04018227756023407,
"beta_dpo/beta_used_raw": 0.030732491984963417,
"beta_dpo/gap_mean": 24.28862953186035,
"beta_dpo/gap_std": 38.98953628540039,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6659685863874345,
"grad_norm": 67.83879852294922,
"learning_rate": 1.5285113558975427e-07,
"logits/chosen": 1.5352228879928589,
"logits/rejected": 1.7299730777740479,
"loss": 3.609,
"step": 318
},
{
"beta_dpo/beta_used": 0.021217646077275276,
"beta_dpo/beta_used_raw": 0.006635315250605345,
"beta_dpo/gap_mean": 25.60199737548828,
"beta_dpo/gap_std": 38.8849983215332,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6680628272251309,
"grad_norm": 51.74640655517578,
"learning_rate": 1.5116687323334464e-07,
"logits/chosen": 1.2286893129348755,
"logits/rejected": 1.462414026260376,
"loss": 4.2736,
"step": 319
},
{
"beta_dpo/beta_used": 0.015490580350160599,
"beta_dpo/beta_used_raw": -0.004652615636587143,
"beta_dpo/gap_mean": 25.7495059967041,
"beta_dpo/gap_std": 39.36385726928711,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6701570680628273,
"grad_norm": 66.98085021972656,
"learning_rate": 1.4948791099758052e-07,
"logits/chosen": 1.9294114112854004,
"logits/rejected": 1.8916367292404175,
"loss": 4.5611,
"step": 320
},
{
"beta_dpo/beta_used": 0.02924424409866333,
"beta_dpo/beta_used_raw": 0.012194283306598663,
"beta_dpo/gap_mean": 23.17910385131836,
"beta_dpo/gap_std": 40.0921745300293,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6722513089005235,
"grad_norm": 75.77815246582031,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": 1.6986711025238037,
"logits/rejected": 1.4788739681243896,
"loss": 3.9257,
"step": 321
},
{
"beta_dpo/beta_used": 0.01892891526222229,
"beta_dpo/beta_used_raw": 0.0015440168790519238,
"beta_dpo/gap_mean": 23.837888717651367,
"beta_dpo/gap_std": 39.51669692993164,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6743455497382199,
"grad_norm": 61.61996841430664,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": 1.2796248197555542,
"logits/rejected": 1.2974272966384888,
"loss": 4.6315,
"step": 322
},
{
"beta_dpo/beta_used": 0.02444988675415516,
"beta_dpo/beta_used_raw": -0.004915682598948479,
"beta_dpo/gap_mean": 22.961061477661133,
"beta_dpo/gap_std": 40.85033416748047,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6764397905759162,
"grad_norm": 61.971153259277344,
"learning_rate": 1.4448372394055246e-07,
"logits/chosen": 1.2066650390625,
"logits/rejected": 0.9574912190437317,
"loss": 4.1271,
"step": 323
},
{
"beta_dpo/beta_used": 0.04017874598503113,
"beta_dpo/beta_used_raw": 0.02891341596841812,
"beta_dpo/gap_mean": 23.883920669555664,
"beta_dpo/gap_std": 40.295066833496094,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6785340314136126,
"grad_norm": 67.87089538574219,
"learning_rate": 1.428268596492364e-07,
"logits/chosen": 1.6108598709106445,
"logits/rejected": 1.5994318723678589,
"loss": 3.8856,
"step": 324
},
{
"beta_dpo/beta_used": 0.04682011157274246,
"beta_dpo/beta_used_raw": 0.020984284579753876,
"beta_dpo/gap_mean": 25.911354064941406,
"beta_dpo/gap_std": 41.97956085205078,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.680628272251309,
"grad_norm": 139.75146484375,
"learning_rate": 1.4117574272818386e-07,
"logits/chosen": 1.6725175380706787,
"logits/rejected": 1.797964096069336,
"loss": 4.4611,
"step": 325
},
{
"beta_dpo/beta_used": 0.01575140468776226,
"beta_dpo/beta_used_raw": -0.0014644484035670757,
"beta_dpo/gap_mean": 23.560775756835938,
"beta_dpo/gap_std": 44.54059982299805,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.6827225130890052,
"grad_norm": 56.50615310668945,
"learning_rate": 1.3953046172178413e-07,
"logits/chosen": 1.166620135307312,
"logits/rejected": 1.4378832578659058,
"loss": 4.8138,
"step": 326
},
{
"beta_dpo/beta_used": 0.060598503798246384,
"beta_dpo/beta_used_raw": 0.054446715861558914,
"beta_dpo/gap_mean": 25.692852020263672,
"beta_dpo/gap_std": 43.64955520629883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6848167539267016,
"grad_norm": 121.72166442871094,
"learning_rate": 1.3789110486146468e-07,
"logits/chosen": 1.5548646450042725,
"logits/rejected": 1.4554078578948975,
"loss": 3.1471,
"step": 327
},
{
"beta_dpo/beta_used": 0.015800345689058304,
"beta_dpo/beta_used_raw": -0.010136552155017853,
"beta_dpo/gap_mean": 27.02881622314453,
"beta_dpo/gap_std": 41.867454528808594,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6869109947643979,
"grad_norm": 41.46779251098633,
"learning_rate": 1.362577600609588e-07,
"logits/chosen": 1.3131914138793945,
"logits/rejected": 1.3917593955993652,
"loss": 4.4447,
"step": 328
},
{
"beta_dpo/beta_used": 0.013338714838027954,
"beta_dpo/beta_used_raw": -0.004739915020763874,
"beta_dpo/gap_mean": 25.284814834594727,
"beta_dpo/gap_std": 41.969566345214844,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6890052356020943,
"grad_norm": 60.99818420410156,
"learning_rate": 1.3463051491159093e-07,
"logits/chosen": 1.4903924465179443,
"logits/rejected": 1.814817190170288,
"loss": 5.1013,
"step": 329
},
{
"beta_dpo/beta_used": 0.028788069263100624,
"beta_dpo/beta_used_raw": 0.005851927679032087,
"beta_dpo/gap_mean": 22.452590942382812,
"beta_dpo/gap_std": 44.61354064941406,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6910994764397905,
"grad_norm": 179.97225952148438,
"learning_rate": 1.3300945667758012e-07,
"logits/chosen": 1.6997681856155396,
"logits/rejected": 1.6331228017807007,
"loss": 4.3589,
"step": 330
},
{
"beta_dpo/beta_used": 0.028657177463173866,
"beta_dpo/beta_used_raw": 0.019459933042526245,
"beta_dpo/gap_mean": 23.764484405517578,
"beta_dpo/gap_std": 42.601539611816406,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6931937172774869,
"grad_norm": 48.68358612060547,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": 1.441627860069275,
"logits/rejected": 1.3355118036270142,
"loss": 4.303,
"step": 331
},
{
"beta_dpo/beta_used": 0.04341350123286247,
"beta_dpo/beta_used_raw": 0.038200560957193375,
"beta_dpo/gap_mean": 26.584733963012695,
"beta_dpo/gap_std": 41.82080078125,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.6952879581151833,
"grad_norm": 73.14544677734375,
"learning_rate": 1.2978624834891626e-07,
"logits/chosen": 1.2019636631011963,
"logits/rejected": 1.203635334968567,
"loss": 3.8045,
"step": 332
},
{
"beta_dpo/beta_used": 0.011696412228047848,
"beta_dpo/beta_used_raw": -0.014751153066754341,
"beta_dpo/gap_mean": 23.98305892944336,
"beta_dpo/gap_std": 42.328861236572266,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6973821989528796,
"grad_norm": 47.275943756103516,
"learning_rate": 1.281842711051438e-07,
"logits/chosen": 1.2524588108062744,
"logits/rejected": 1.1359145641326904,
"loss": 4.9502,
"step": 333
},
{
"beta_dpo/beta_used": 0.034421779215335846,
"beta_dpo/beta_used_raw": 0.018691357225179672,
"beta_dpo/gap_mean": 22.934709548950195,
"beta_dpo/gap_std": 41.71361541748047,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6994764397905759,
"grad_norm": 63.18965530395508,
"learning_rate": 1.2658882646922033e-07,
"logits/chosen": 1.3189448118209839,
"logits/rejected": 1.3639788627624512,
"loss": 3.9628,
"step": 334
},
{
"beta_dpo/beta_used": 0.02628299593925476,
"beta_dpo/beta_used_raw": -0.008556408807635307,
"beta_dpo/gap_mean": 23.939117431640625,
"beta_dpo/gap_std": 43.04575729370117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7015706806282722,
"grad_norm": 158.49334716796875,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 1.460978627204895,
"logits/rejected": 1.5252642631530762,
"loss": 4.7371,
"step": 335
},
{
"beta_dpo/beta_used": 0.013360177166759968,
"beta_dpo/beta_used_raw": -0.006929943338036537,
"beta_dpo/gap_mean": 21.377792358398438,
"beta_dpo/gap_std": 43.017784118652344,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.7036649214659686,
"grad_norm": 49.986663818359375,
"learning_rate": 1.2341787690142435e-07,
"logits/chosen": 1.5372939109802246,
"logits/rejected": 1.7963600158691406,
"loss": 4.9661,
"step": 336
},
{
"beta_dpo/beta_used": 0.03531493619084358,
"beta_dpo/beta_used_raw": 0.009031134657561779,
"beta_dpo/gap_mean": 21.560890197753906,
"beta_dpo/gap_std": 42.4267578125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7057591623036649,
"grad_norm": 93.49922943115234,
"learning_rate": 1.2184254201795363e-07,
"logits/chosen": 1.0734624862670898,
"logits/rejected": 0.9902403950691223,
"loss": 4.5839,
"step": 337
},
{
"beta_dpo/beta_used": 0.026785733178257942,
"beta_dpo/beta_used_raw": 0.008016789332032204,
"beta_dpo/gap_mean": 24.554834365844727,
"beta_dpo/gap_std": 42.207237243652344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7078534031413612,
"grad_norm": 270.1446533203125,
"learning_rate": 1.202740798300168e-07,
"logits/chosen": 1.5387308597564697,
"logits/rejected": 1.5395488739013672,
"loss": 4.6984,
"step": 338
},
{
"beta_dpo/beta_used": 0.03279449790716171,
"beta_dpo/beta_used_raw": 0.020053986459970474,
"beta_dpo/gap_mean": 27.445066452026367,
"beta_dpo/gap_std": 43.14484405517578,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.7099476439790576,
"grad_norm": 70.26140594482422,
"learning_rate": 1.1871257444948096e-07,
"logits/chosen": 1.5849591493606567,
"logits/rejected": 1.5081734657287598,
"loss": 4.0688,
"step": 339
},
{
"beta_dpo/beta_used": 0.013446008786559105,
"beta_dpo/beta_used_raw": -0.02429656684398651,
"beta_dpo/gap_mean": 26.41143226623535,
"beta_dpo/gap_std": 44.58018493652344,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7120418848167539,
"grad_norm": 44.72693634033203,
"learning_rate": 1.1715810961514072e-07,
"logits/chosen": 0.8878348469734192,
"logits/rejected": 1.03843355178833,
"loss": 4.9074,
"step": 340
},
{
"beta_dpo/beta_used": 0.02605244144797325,
"beta_dpo/beta_used_raw": -0.017769023776054382,
"beta_dpo/gap_mean": 21.7451114654541,
"beta_dpo/gap_std": 44.111759185791016,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.7141361256544503,
"grad_norm": 71.31874084472656,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": 1.4821139574050903,
"logits/rejected": 1.688697338104248,
"loss": 4.741,
"step": 341
},
{
"beta_dpo/beta_used": 0.039557162672281265,
"beta_dpo/beta_used_raw": 0.024851929396390915,
"beta_dpo/gap_mean": 22.442163467407227,
"beta_dpo/gap_std": 42.288307189941406,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7162303664921466,
"grad_norm": 90.50724029541016,
"learning_rate": 1.1407063464793965e-07,
"logits/chosen": 1.515696406364441,
"logits/rejected": 1.6636167764663696,
"loss": 3.8821,
"step": 342
},
{
"beta_dpo/beta_used": 0.028740962967276573,
"beta_dpo/beta_used_raw": 0.023837603628635406,
"beta_dpo/gap_mean": 22.93502426147461,
"beta_dpo/gap_std": 41.14816665649414,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7183246073298429,
"grad_norm": 84.98859405517578,
"learning_rate": 1.125377900869913e-07,
"logits/chosen": 1.6616275310516357,
"logits/rejected": 1.49526846408844,
"loss": 4.1559,
"step": 343
},
{
"beta_dpo/beta_used": 0.03510721027851105,
"beta_dpo/beta_used_raw": 0.019166965037584305,
"beta_dpo/gap_mean": 22.779037475585938,
"beta_dpo/gap_std": 41.92900085449219,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7204188481675393,
"grad_norm": 148.84140014648438,
"learning_rate": 1.110123172071844e-07,
"logits/chosen": 1.341618537902832,
"logits/rejected": 1.4202890396118164,
"loss": 4.5051,
"step": 344
},
{
"beta_dpo/beta_used": 0.02456255815923214,
"beta_dpo/beta_used_raw": -0.002841557841747999,
"beta_dpo/gap_mean": 23.927555084228516,
"beta_dpo/gap_std": 41.32786560058594,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.7225130890052356,
"grad_norm": 71.29635620117188,
"learning_rate": 1.09494297815e-07,
"logits/chosen": 1.6482702493667603,
"logits/rejected": 1.768045425415039,
"loss": 4.6483,
"step": 345
},
{
"beta_dpo/beta_used": 0.03388482332229614,
"beta_dpo/beta_used_raw": 0.01795162260532379,
"beta_dpo/gap_mean": 24.46042251586914,
"beta_dpo/gap_std": 38.79722595214844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.724607329842932,
"grad_norm": 70.42410278320312,
"learning_rate": 1.0798381331721107e-07,
"logits/chosen": 1.0491037368774414,
"logits/rejected": 1.1440801620483398,
"loss": 4.0215,
"step": 346
},
{
"beta_dpo/beta_used": 0.028078395873308182,
"beta_dpo/beta_used_raw": 0.014503560960292816,
"beta_dpo/gap_mean": 25.07908058166504,
"beta_dpo/gap_std": 40.29609680175781,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7267015706806282,
"grad_norm": 71.0637435913086,
"learning_rate": 1.0648094471651722e-07,
"logits/chosen": 1.4748269319534302,
"logits/rejected": 1.4847553968429565,
"loss": 4.2448,
"step": 347
},
{
"beta_dpo/beta_used": 0.014106756076216698,
"beta_dpo/beta_used_raw": -0.01745045930147171,
"beta_dpo/gap_mean": 20.426612854003906,
"beta_dpo/gap_std": 37.750858306884766,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7287958115183246,
"grad_norm": 41.9898681640625,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": 1.4606678485870361,
"logits/rejected": 1.539605736732483,
"loss": 4.7306,
"step": 348
},
{
"beta_dpo/beta_used": 0.03836182504892349,
"beta_dpo/beta_used_raw": 0.027038609609007835,
"beta_dpo/gap_mean": 23.00733757019043,
"beta_dpo/gap_std": 40.6578369140625,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7308900523560209,
"grad_norm": 116.7526626586914,
"learning_rate": 1.0349837717080347e-07,
"logits/chosen": 1.5413777828216553,
"logits/rejected": 1.6035332679748535,
"loss": 4.5157,
"step": 349
},
{
"beta_dpo/beta_used": 0.03894190117716789,
"beta_dpo/beta_used_raw": 0.026062268763780594,
"beta_dpo/gap_mean": 24.171770095825195,
"beta_dpo/gap_std": 41.29063415527344,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7329842931937173,
"grad_norm": 110.67535400390625,
"learning_rate": 1.0201883817182949e-07,
"logits/chosen": 1.7762742042541504,
"logits/rejected": 1.5685731172561646,
"loss": 3.9019,
"step": 350
},
{
"beta_dpo/beta_used": 0.005233833100646734,
"beta_dpo/beta_used_raw": -0.016874097287654877,
"beta_dpo/gap_mean": 21.94039535522461,
"beta_dpo/gap_std": 42.503211975097656,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7350785340314137,
"grad_norm": 17.592376708984375,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": 1.4498162269592285,
"logits/rejected": 1.4771305322647095,
"loss": 5.2076,
"step": 351
},
{
"beta_dpo/beta_used": 0.05153050646185875,
"beta_dpo/beta_used_raw": 0.0483248271048069,
"beta_dpo/gap_mean": 23.78329086303711,
"beta_dpo/gap_std": 43.25350570678711,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7371727748691099,
"grad_norm": 260.2582092285156,
"learning_rate": 9.908364643332398e-08,
"logits/chosen": 1.537024974822998,
"logits/rejected": 1.781685471534729,
"loss": 3.9455,
"step": 352
},
{
"beta_dpo/beta_used": 0.030592329800128937,
"beta_dpo/beta_used_raw": 0.014342766255140305,
"beta_dpo/gap_mean": 25.760425567626953,
"beta_dpo/gap_std": 40.68629455566406,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7392670157068063,
"grad_norm": 90.93749237060547,
"learning_rate": 9.76281510992176e-08,
"logits/chosen": 1.2568163871765137,
"logits/rejected": 1.252407193183899,
"loss": 4.1275,
"step": 353
},
{
"beta_dpo/beta_used": 0.012342535890638828,
"beta_dpo/beta_used_raw": -0.01871517114341259,
"beta_dpo/gap_mean": 23.74026870727539,
"beta_dpo/gap_std": 42.1845703125,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.7413612565445026,
"grad_norm": 94.49176025390625,
"learning_rate": 9.618082700494318e-08,
"logits/chosen": 1.3868615627288818,
"logits/rejected": 1.4805989265441895,
"loss": 6.0723,
"step": 354
},
{
"beta_dpo/beta_used": 0.04293268173933029,
"beta_dpo/beta_used_raw": 0.03582005202770233,
"beta_dpo/gap_mean": 23.41856575012207,
"beta_dpo/gap_std": 43.963043212890625,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.743455497382199,
"grad_norm": 109.3790054321289,
"learning_rate": 9.474175176609956e-08,
"logits/chosen": 1.5852292776107788,
"logits/rejected": 1.7418677806854248,
"loss": 4.0902,
"step": 355
},
{
"beta_dpo/beta_used": 0.017568301409482956,
"beta_dpo/beta_used_raw": -0.009104796685278416,
"beta_dpo/gap_mean": 22.803916931152344,
"beta_dpo/gap_std": 39.86484909057617,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7455497382198953,
"grad_norm": 80.7624282836914,
"learning_rate": 9.331100255592436e-08,
"logits/chosen": 1.3812074661254883,
"logits/rejected": 1.4987109899520874,
"loss": 4.7965,
"step": 356
},
{
"beta_dpo/beta_used": 0.028205767273902893,
"beta_dpo/beta_used_raw": 0.009551008231937885,
"beta_dpo/gap_mean": 21.426677703857422,
"beta_dpo/gap_std": 41.5255012512207,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7476439790575916,
"grad_norm": 198.40061950683594,
"learning_rate": 9.18886561011557e-08,
"logits/chosen": 1.535756230354309,
"logits/rejected": 1.5348542928695679,
"loss": 4.8634,
"step": 357
},
{
"beta_dpo/beta_used": 0.03555550426244736,
"beta_dpo/beta_used_raw": 0.02786700241267681,
"beta_dpo/gap_mean": 24.894935607910156,
"beta_dpo/gap_std": 42.7304801940918,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.749738219895288,
"grad_norm": 100.15424346923828,
"learning_rate": 9.047478867791731e-08,
"logits/chosen": 1.3941529989242554,
"logits/rejected": 1.3515270948410034,
"loss": 4.5553,
"step": 358
},
{
"beta_dpo/beta_used": 0.022278830409049988,
"beta_dpo/beta_used_raw": 0.012576328590512276,
"beta_dpo/gap_mean": 25.73493194580078,
"beta_dpo/gap_std": 42.311771392822266,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7518324607329843,
"grad_norm": 63.86215591430664,
"learning_rate": 8.906947610762825e-08,
"logits/chosen": 1.4539521932601929,
"logits/rejected": 1.5561376810073853,
"loss": 4.4114,
"step": 359
},
{
"beta_dpo/beta_used": 0.011337094008922577,
"beta_dpo/beta_used_raw": -0.001419117208570242,
"beta_dpo/gap_mean": 25.067241668701172,
"beta_dpo/gap_std": 41.38372802734375,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.7539267015706806,
"grad_norm": 33.68746566772461,
"learning_rate": 8.76727937529367e-08,
"logits/chosen": 1.602333664894104,
"logits/rejected": 1.5335873365402222,
"loss": 4.7231,
"step": 360
},
{
"beta_dpo/beta_used": 0.05738076567649841,
"beta_dpo/beta_used_raw": 0.053437668830156326,
"beta_dpo/gap_mean": 26.05853271484375,
"beta_dpo/gap_std": 42.002994537353516,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.7560209424083769,
"grad_norm": 172.0302734375,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 1.2185293436050415,
"logits/rejected": 1.4148153066635132,
"loss": 3.4371,
"step": 361
},
{
"beta_dpo/beta_used": 0.04557962343096733,
"beta_dpo/beta_used_raw": 0.027200574055314064,
"beta_dpo/gap_mean": 26.162132263183594,
"beta_dpo/gap_std": 42.437416076660156,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7581151832460733,
"grad_norm": 90.24806213378906,
"learning_rate": 8.490561882286135e-08,
"logits/chosen": 1.3487976789474487,
"logits/rejected": 1.3411986827850342,
"loss": 3.4565,
"step": 362
},
{
"beta_dpo/beta_used": 0.03907949849963188,
"beta_dpo/beta_used_raw": 0.0354890413582325,
"beta_dpo/gap_mean": 25.548728942871094,
"beta_dpo/gap_std": 42.264503479003906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7602094240837697,
"grad_norm": 101.23867797851562,
"learning_rate": 8.353527464267104e-08,
"logits/chosen": 1.5559055805206299,
"logits/rejected": 1.4353469610214233,
"loss": 3.6541,
"step": 363
},
{
"beta_dpo/beta_used": 0.019362712278962135,
"beta_dpo/beta_used_raw": -0.005188856739550829,
"beta_dpo/gap_mean": 24.893081665039062,
"beta_dpo/gap_std": 41.87436294555664,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.762303664921466,
"grad_norm": 84.14205932617188,
"learning_rate": 8.217385746050742e-08,
"logits/chosen": 1.8355655670166016,
"logits/rejected": 1.5974853038787842,
"loss": 4.7009,
"step": 364
},
{
"beta_dpo/beta_used": 0.02898905798792839,
"beta_dpo/beta_used_raw": 0.02243414893746376,
"beta_dpo/gap_mean": 23.674781799316406,
"beta_dpo/gap_std": 41.810665130615234,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7643979057591623,
"grad_norm": 77.57154083251953,
"learning_rate": 8.082144028504231e-08,
"logits/chosen": 1.512800693511963,
"logits/rejected": 1.7196999788284302,
"loss": 4.3814,
"step": 365
},
{
"beta_dpo/beta_used": 0.030239790678024292,
"beta_dpo/beta_used_raw": 0.0004999339580535889,
"beta_dpo/gap_mean": 25.40928840637207,
"beta_dpo/gap_std": 41.03025817871094,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7664921465968586,
"grad_norm": 41.87646484375,
"learning_rate": 7.947809564230445e-08,
"logits/chosen": 1.4762005805969238,
"logits/rejected": 1.3744585514068604,
"loss": 4.3359,
"step": 366
},
{
"beta_dpo/beta_used": 0.028317891061306,
"beta_dpo/beta_used_raw": 0.008798494935035706,
"beta_dpo/gap_mean": 25.357412338256836,
"beta_dpo/gap_std": 39.42461013793945,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.768586387434555,
"grad_norm": 70.21991729736328,
"learning_rate": 7.814389557179016e-08,
"logits/chosen": 1.8320472240447998,
"logits/rejected": 1.5733611583709717,
"loss": 3.8554,
"step": 367
},
{
"beta_dpo/beta_used": 0.040644265711307526,
"beta_dpo/beta_used_raw": 0.029841335490345955,
"beta_dpo/gap_mean": 27.69914436340332,
"beta_dpo/gap_std": 39.52192687988281,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7706806282722513,
"grad_norm": 51.4883918762207,
"learning_rate": 7.681891162260015e-08,
"logits/chosen": 1.7997376918792725,
"logits/rejected": 1.644882321357727,
"loss": 3.7779,
"step": 368
},
{
"beta_dpo/beta_used": 0.007025650702416897,
"beta_dpo/beta_used_raw": -0.022717807441949844,
"beta_dpo/gap_mean": 26.59383201599121,
"beta_dpo/gap_std": 39.74239730834961,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7727748691099476,
"grad_norm": 31.33010482788086,
"learning_rate": 7.550321484960251e-08,
"logits/chosen": 1.567758560180664,
"logits/rejected": 1.5652072429656982,
"loss": 5.0706,
"step": 369
},
{
"beta_dpo/beta_used": 0.03189126402139664,
"beta_dpo/beta_used_raw": -0.0016455072909593582,
"beta_dpo/gap_mean": 25.960403442382812,
"beta_dpo/gap_std": 41.779354095458984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.774869109947644,
"grad_norm": 61.498207092285156,
"learning_rate": 7.419687580962222e-08,
"logits/chosen": 1.4514704942703247,
"logits/rejected": 1.6543275117874146,
"loss": 4.0113,
"step": 370
},
{
"beta_dpo/beta_used": 0.02217245101928711,
"beta_dpo/beta_used_raw": 0.0033044693991541862,
"beta_dpo/gap_mean": 22.760690689086914,
"beta_dpo/gap_std": 41.05923080444336,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7769633507853403,
"grad_norm": 52.41913604736328,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": 0.8454320430755615,
"logits/rejected": 1.0241940021514893,
"loss": 4.3701,
"step": 371
},
{
"beta_dpo/beta_used": 0.06150563433766365,
"beta_dpo/beta_used_raw": 0.060376305133104324,
"beta_dpo/gap_mean": 26.305227279663086,
"beta_dpo/gap_std": 40.897579193115234,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.7790575916230367,
"grad_norm": 95.63087463378906,
"learning_rate": 7.161255064312283e-08,
"logits/chosen": 1.3337714672088623,
"logits/rejected": 1.200531244277954,
"loss": 3.4199,
"step": 372
},
{
"beta_dpo/beta_used": 0.017467252910137177,
"beta_dpo/beta_used_raw": -0.0006841365247964859,
"beta_dpo/gap_mean": 27.40023422241211,
"beta_dpo/gap_std": 41.40983963012695,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7811518324607329,
"grad_norm": 65.07406616210938,
"learning_rate": 7.033470310611945e-08,
"logits/chosen": 1.5559697151184082,
"logits/rejected": 1.267425537109375,
"loss": 4.8366,
"step": 373
},
{
"beta_dpo/beta_used": 0.020001672208309174,
"beta_dpo/beta_used_raw": -0.006743720732629299,
"beta_dpo/gap_mean": 25.95492172241211,
"beta_dpo/gap_std": 42.976318359375,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7832460732984293,
"grad_norm": 43.096229553222656,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": 1.5863916873931885,
"logits/rejected": 1.7011443376541138,
"loss": 4.5082,
"step": 374
},
{
"beta_dpo/beta_used": 0.012499826960265636,
"beta_dpo/beta_used_raw": -0.00970209576189518,
"beta_dpo/gap_mean": 23.713022232055664,
"beta_dpo/gap_std": 42.88922119140625,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7853403141361257,
"grad_norm": 35.47541427612305,
"learning_rate": 6.780798075635675e-08,
"logits/chosen": 1.4474728107452393,
"logits/rejected": 1.3061145544052124,
"loss": 4.878,
"step": 375
},
{
"beta_dpo/beta_used": 0.032169777899980545,
"beta_dpo/beta_used_raw": 0.023137152194976807,
"beta_dpo/gap_mean": 23.426164627075195,
"beta_dpo/gap_std": 42.51594924926758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.787434554973822,
"grad_norm": 95.04769897460938,
"learning_rate": 6.655924144404906e-08,
"logits/chosen": 1.573278546333313,
"logits/rejected": 1.815221905708313,
"loss": 4.1144,
"step": 376
},
{
"beta_dpo/beta_used": 0.030707208439707756,
"beta_dpo/beta_used_raw": 0.005986468866467476,
"beta_dpo/gap_mean": 23.08704376220703,
"beta_dpo/gap_std": 41.96858596801758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7895287958115184,
"grad_norm": 90.52848052978516,
"learning_rate": 6.532033950290885e-08,
"logits/chosen": 1.5606698989868164,
"logits/rejected": 1.6266758441925049,
"loss": 4.5857,
"step": 377
},
{
"beta_dpo/beta_used": 0.0293353870511055,
"beta_dpo/beta_used_raw": 0.019241416826844215,
"beta_dpo/gap_mean": 21.17989730834961,
"beta_dpo/gap_std": 42.731689453125,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7916230366492146,
"grad_norm": 168.0338897705078,
"learning_rate": 6.409134137148736e-08,
"logits/chosen": 1.567497968673706,
"logits/rejected": 1.6306406259536743,
"loss": 4.6972,
"step": 378
},
{
"beta_dpo/beta_used": 0.021104762330651283,
"beta_dpo/beta_used_raw": -0.0026983979623764753,
"beta_dpo/gap_mean": 22.86931610107422,
"beta_dpo/gap_std": 42.299137115478516,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.793717277486911,
"grad_norm": 53.448760986328125,
"learning_rate": 6.28723129572247e-08,
"logits/chosen": 1.6663786172866821,
"logits/rejected": 1.593047022819519,
"loss": 4.8597,
"step": 379
},
{
"beta_dpo/beta_used": 0.014543892815709114,
"beta_dpo/beta_used_raw": 0.004879960790276527,
"beta_dpo/gap_mean": 23.742534637451172,
"beta_dpo/gap_std": 42.56512451171875,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7958115183246073,
"grad_norm": 51.2754020690918,
"learning_rate": 6.166331963291519e-08,
"logits/chosen": 1.9557546377182007,
"logits/rejected": 1.7796638011932373,
"loss": 4.7633,
"step": 380
},
{
"beta_dpo/beta_used": 0.0125275244936347,
"beta_dpo/beta_used_raw": -0.0011020167730748653,
"beta_dpo/gap_mean": 24.683391571044922,
"beta_dpo/gap_std": 41.60409927368164,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.7979057591623037,
"grad_norm": 35.46774673461914,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": 1.191896677017212,
"logits/rejected": 1.2276725769042969,
"loss": 5.1082,
"step": 381
},
{
"beta_dpo/beta_used": 0.03925769403576851,
"beta_dpo/beta_used_raw": 0.03246406838297844,
"beta_dpo/gap_mean": 26.57273292541504,
"beta_dpo/gap_std": 40.347042083740234,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8,
"grad_norm": 91.7632064819336,
"learning_rate": 5.9275697051098275e-08,
"logits/chosen": 1.5332963466644287,
"logits/rejected": 1.5386418104171753,
"loss": 3.9613,
"step": 382
},
{
"beta_dpo/beta_used": 0.026574671268463135,
"beta_dpo/beta_used_raw": 0.0041369106620550156,
"beta_dpo/gap_mean": 27.20392608642578,
"beta_dpo/gap_std": 41.187217712402344,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8020942408376963,
"grad_norm": 90.72322082519531,
"learning_rate": 5.809719583454414e-08,
"logits/chosen": 1.213146448135376,
"logits/rejected": 1.4346027374267578,
"loss": 4.2591,
"step": 383
},
{
"beta_dpo/beta_used": 0.018568674102425575,
"beta_dpo/beta_used_raw": -0.005661527160555124,
"beta_dpo/gap_mean": 23.266735076904297,
"beta_dpo/gap_std": 40.896419525146484,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8041884816753927,
"grad_norm": 97.89303588867188,
"learning_rate": 5.6928985782982524e-08,
"logits/chosen": 1.4912177324295044,
"logits/rejected": 1.8480693101882935,
"loss": 4.8446,
"step": 384
},
{
"beta_dpo/beta_used": 0.025455057621002197,
"beta_dpo/beta_used_raw": 0.020301831886172295,
"beta_dpo/gap_mean": 22.282352447509766,
"beta_dpo/gap_std": 40.13404846191406,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.806282722513089,
"grad_norm": 87.30133056640625,
"learning_rate": 5.57711295439732e-08,
"logits/chosen": 1.6445767879486084,
"logits/rejected": 1.6937466859817505,
"loss": 4.6559,
"step": 385
},
{
"beta_dpo/beta_used": 0.07003487646579742,
"beta_dpo/beta_used_raw": 0.05945579335093498,
"beta_dpo/gap_mean": 25.810016632080078,
"beta_dpo/gap_std": 40.25865936279297,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8083769633507853,
"grad_norm": 74.50102233886719,
"learning_rate": 5.4623689209832484e-08,
"logits/chosen": 1.644815444946289,
"logits/rejected": 1.745370864868164,
"loss": 3.0306,
"step": 386
},
{
"beta_dpo/beta_used": 0.02559298276901245,
"beta_dpo/beta_used_raw": -0.001606471836566925,
"beta_dpo/gap_mean": 25.212953567504883,
"beta_dpo/gap_std": 42.34771728515625,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8104712041884817,
"grad_norm": 97.23846435546875,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": 1.5523253679275513,
"logits/rejected": 1.617262363433838,
"loss": 4.3258,
"step": 387
},
{
"beta_dpo/beta_used": 0.019050609320402145,
"beta_dpo/beta_used_raw": -0.013357133604586124,
"beta_dpo/gap_mean": 24.462581634521484,
"beta_dpo/gap_std": 42.33854675292969,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.812565445026178,
"grad_norm": 115.40874481201172,
"learning_rate": 5.2360301829254745e-08,
"logits/chosen": 1.898555040359497,
"logits/rejected": 1.8352364301681519,
"loss": 4.8619,
"step": 388
},
{
"beta_dpo/beta_used": 0.031168397516012192,
"beta_dpo/beta_used_raw": 0.01580439880490303,
"beta_dpo/gap_mean": 24.110021591186523,
"beta_dpo/gap_std": 41.419647216796875,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8146596858638744,
"grad_norm": 114.02845001220703,
"learning_rate": 5.1244476161413806e-08,
"logits/chosen": 1.7501044273376465,
"logits/rejected": 1.5219378471374512,
"loss": 4.4305,
"step": 389
},
{
"beta_dpo/beta_used": 0.037911996245384216,
"beta_dpo/beta_used_raw": 0.03245529904961586,
"beta_dpo/gap_mean": 24.70856475830078,
"beta_dpo/gap_std": 42.322147369384766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8167539267015707,
"grad_norm": 107.86334228515625,
"learning_rate": 5.013930914912476e-08,
"logits/chosen": 1.4109928607940674,
"logits/rejected": 1.5585747957229614,
"loss": 3.9697,
"step": 390
},
{
"beta_dpo/beta_used": 0.01184625644236803,
"beta_dpo/beta_used_raw": -0.0196970384567976,
"beta_dpo/gap_mean": 25.497241973876953,
"beta_dpo/gap_std": 39.925994873046875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.818848167539267,
"grad_norm": 36.966331481933594,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": 1.4992268085479736,
"logits/rejected": 1.4016600847244263,
"loss": 4.8753,
"step": 391
},
{
"beta_dpo/beta_used": 0.031304676085710526,
"beta_dpo/beta_used_raw": 0.019566738978028297,
"beta_dpo/gap_mean": 29.37858772277832,
"beta_dpo/gap_std": 39.760597229003906,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8209424083769633,
"grad_norm": 57.252769470214844,
"learning_rate": 4.796118758344353e-08,
"logits/chosen": 1.1666127443313599,
"logits/rejected": 1.1494946479797363,
"loss": 3.3712,
"step": 392
},
{
"beta_dpo/beta_used": 0.02446107193827629,
"beta_dpo/beta_used_raw": 0.00717612449079752,
"beta_dpo/gap_mean": 27.458255767822266,
"beta_dpo/gap_std": 40.529483795166016,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8230366492146597,
"grad_norm": 41.9975700378418,
"learning_rate": 4.688834983610082e-08,
"logits/chosen": 1.3543047904968262,
"logits/rejected": 1.1334538459777832,
"loss": 4.502,
"step": 393
},
{
"beta_dpo/beta_used": 0.013751739636063576,
"beta_dpo/beta_used_raw": -0.013827711343765259,
"beta_dpo/gap_mean": 25.792306900024414,
"beta_dpo/gap_std": 41.532981872558594,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8251308900523561,
"grad_norm": 38.37825012207031,
"learning_rate": 4.582640435014459e-08,
"logits/chosen": 1.755271077156067,
"logits/rejected": 1.836128830909729,
"loss": 4.8139,
"step": 394
},
{
"beta_dpo/beta_used": 0.03642860800027847,
"beta_dpo/beta_used_raw": 0.02762317843735218,
"beta_dpo/gap_mean": 22.787147521972656,
"beta_dpo/gap_std": 39.04203414916992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8272251308900523,
"grad_norm": 76.76990509033203,
"learning_rate": 4.477540807448832e-08,
"logits/chosen": 1.3757838010787964,
"logits/rejected": 1.4005060195922852,
"loss": 3.6736,
"step": 395
},
{
"beta_dpo/beta_used": 0.016622822731733322,
"beta_dpo/beta_used_raw": -0.0027820090763270855,
"beta_dpo/gap_mean": 23.37274932861328,
"beta_dpo/gap_std": 39.84015655517578,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.8293193717277487,
"grad_norm": 105.81222534179688,
"learning_rate": 4.373541737087263e-08,
"logits/chosen": 1.650363802909851,
"logits/rejected": 1.6201927661895752,
"loss": 5.2625,
"step": 396
},
{
"beta_dpo/beta_used": 0.022990621626377106,
"beta_dpo/beta_used_raw": -0.0033985301852226257,
"beta_dpo/gap_mean": 23.020658493041992,
"beta_dpo/gap_std": 39.6679573059082,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.831413612565445,
"grad_norm": 91.26580047607422,
"learning_rate": 4.270648801084295e-08,
"logits/chosen": 1.4977787733078003,
"logits/rejected": 1.5780669450759888,
"loss": 4.5482,
"step": 397
},
{
"beta_dpo/beta_used": 0.02246342971920967,
"beta_dpo/beta_used_raw": 0.007876865565776825,
"beta_dpo/gap_mean": 21.515539169311523,
"beta_dpo/gap_std": 42.26047134399414,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8335078534031414,
"grad_norm": 80.77655029296875,
"learning_rate": 4.168867517275806e-08,
"logits/chosen": 1.3882070779800415,
"logits/rejected": 1.648177146911621,
"loss": 4.6146,
"step": 398
},
{
"beta_dpo/beta_used": 0.030784644186496735,
"beta_dpo/beta_used_raw": 0.016542304307222366,
"beta_dpo/gap_mean": 22.006698608398438,
"beta_dpo/gap_std": 42.646385192871094,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8356020942408376,
"grad_norm": 157.0540313720703,
"learning_rate": 4.0682033438831584e-08,
"logits/chosen": 1.6338375806808472,
"logits/rejected": 1.731345772743225,
"loss": 4.4317,
"step": 399
},
{
"beta_dpo/beta_used": 0.04295587167143822,
"beta_dpo/beta_used_raw": 0.029314618557691574,
"beta_dpo/gap_mean": 21.83963394165039,
"beta_dpo/gap_std": 39.70830154418945,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.837696335078534,
"grad_norm": 134.598388671875,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": 1.497736930847168,
"logits/rejected": 1.427824854850769,
"loss": 4.2926,
"step": 400
},
{
"epoch": 0.837696335078534,
"eval_beta_dpo/beta_used": 0.03352755680680275,
"eval_beta_dpo/beta_used_raw": 0.014615737833082676,
"eval_beta_dpo/gap_mean": 23.013574600219727,
"eval_beta_dpo/gap_std": 39.912696838378906,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 1.5097905397415161,
"eval_logits/rejected": 1.546280860900879,
"eval_loss": 0.5896762609481812,
"eval_runtime": 92.7086,
"eval_samples_per_second": 21.573,
"eval_steps_per_second": 1.348,
"step": 400
},
{
"beta_dpo/beta_used": 0.031112950295209885,
"beta_dpo/beta_used_raw": 0.018789593130350113,
"beta_dpo/gap_mean": 25.120380401611328,
"beta_dpo/gap_std": 39.081172943115234,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8397905759162304,
"grad_norm": 94.05326843261719,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": 1.4719927310943604,
"logits/rejected": 1.6373367309570312,
"loss": 4.188,
"step": 401
},
{
"beta_dpo/beta_used": 0.02975967340171337,
"beta_dpo/beta_used_raw": 0.020481513813138008,
"beta_dpo/gap_mean": 25.850921630859375,
"beta_dpo/gap_std": 40.83582305908203,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8418848167539267,
"grad_norm": 68.26434326171875,
"learning_rate": 3.772967168071517e-08,
"logits/chosen": 1.4517847299575806,
"logits/rejected": 1.3798197507858276,
"loss": 4.0377,
"step": 402
},
{
"beta_dpo/beta_used": 0.033130984753370285,
"beta_dpo/beta_used_raw": 0.026949459686875343,
"beta_dpo/gap_mean": 27.959623336791992,
"beta_dpo/gap_std": 38.593902587890625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8439790575916231,
"grad_norm": 56.32769012451172,
"learning_rate": 3.676824816087978e-08,
"logits/chosen": 1.6041405200958252,
"logits/rejected": 1.634192705154419,
"loss": 3.6404,
"step": 403
},
{
"beta_dpo/beta_used": 0.014831377193331718,
"beta_dpo/beta_used_raw": -0.013218341395258904,
"beta_dpo/gap_mean": 29.18805694580078,
"beta_dpo/gap_std": 39.73085021972656,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8460732984293193,
"grad_norm": 27.067461013793945,
"learning_rate": 3.581825961277074e-08,
"logits/chosen": 1.493395209312439,
"logits/rejected": 1.3758317232131958,
"loss": 4.6703,
"step": 404
},
{
"beta_dpo/beta_used": 0.03309793025255203,
"beta_dpo/beta_used_raw": 0.011897753924131393,
"beta_dpo/gap_mean": 26.401506423950195,
"beta_dpo/gap_std": 40.610694885253906,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8481675392670157,
"grad_norm": 67.01002502441406,
"learning_rate": 3.487975698139084e-08,
"logits/chosen": 1.5461680889129639,
"logits/rejected": 1.6689039468765259,
"loss": 3.8802,
"step": 405
},
{
"beta_dpo/beta_used": 0.007973221130669117,
"beta_dpo/beta_used_raw": -0.02517438679933548,
"beta_dpo/gap_mean": 23.499588012695312,
"beta_dpo/gap_std": 41.003013610839844,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8502617801047121,
"grad_norm": 30.062997817993164,
"learning_rate": 3.3952790595787986e-08,
"logits/chosen": 1.3487330675125122,
"logits/rejected": 1.2552706003189087,
"loss": 5.0999,
"step": 406
},
{
"beta_dpo/beta_used": 0.018556706607341766,
"beta_dpo/beta_used_raw": 0.006646966561675072,
"beta_dpo/gap_mean": 23.741344451904297,
"beta_dpo/gap_std": 42.31064987182617,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.8523560209424084,
"grad_norm": 77.14202880859375,
"learning_rate": 3.303741016635614e-08,
"logits/chosen": 1.38568913936615,
"logits/rejected": 1.1631001234054565,
"loss": 4.6002,
"step": 407
},
{
"beta_dpo/beta_used": 0.04947693645954132,
"beta_dpo/beta_used_raw": 0.024193253368139267,
"beta_dpo/gap_mean": 23.99530029296875,
"beta_dpo/gap_std": 40.86692810058594,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8544502617801047,
"grad_norm": 144.13487243652344,
"learning_rate": 3.2133664782169944e-08,
"logits/chosen": 1.0143358707427979,
"logits/rejected": 1.08698308467865,
"loss": 4.4916,
"step": 408
},
{
"beta_dpo/beta_used": 0.01759941130876541,
"beta_dpo/beta_used_raw": -0.006128270179033279,
"beta_dpo/gap_mean": 25.6751708984375,
"beta_dpo/gap_std": 40.675594329833984,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.856544502617801,
"grad_norm": 66.57832336425781,
"learning_rate": 3.12416029083514e-08,
"logits/chosen": 1.6948835849761963,
"logits/rejected": 1.8402390480041504,
"loss": 4.5883,
"step": 409
},
{
"beta_dpo/beta_used": 0.032623328268527985,
"beta_dpo/beta_used_raw": 0.020593255758285522,
"beta_dpo/gap_mean": 23.831777572631836,
"beta_dpo/gap_std": 41.50251770019531,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8586387434554974,
"grad_norm": 108.39352416992188,
"learning_rate": 3.036127238347164e-08,
"logits/chosen": 1.7509747743606567,
"logits/rejected": 1.7223472595214844,
"loss": 4.1702,
"step": 410
},
{
"beta_dpo/beta_used": 0.058568619191646576,
"beta_dpo/beta_used_raw": 0.03209678828716278,
"beta_dpo/gap_mean": 26.16048812866211,
"beta_dpo/gap_std": 41.54467010498047,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8607329842931937,
"grad_norm": 156.947265625,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": 1.5110323429107666,
"logits/rejected": 1.5965254306793213,
"loss": 3.4559,
"step": 411
},
{
"beta_dpo/beta_used": 0.023946017026901245,
"beta_dpo/beta_used_raw": 0.0037475526332855225,
"beta_dpo/gap_mean": 25.7176456451416,
"beta_dpo/gap_std": 42.220760345458984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.86282722513089,
"grad_norm": 45.27512741088867,
"learning_rate": 2.863599358669755e-08,
"logits/chosen": 1.275376796722412,
"logits/rejected": 1.481441855430603,
"loss": 4.4762,
"step": 412
},
{
"beta_dpo/beta_used": 0.034958455711603165,
"beta_dpo/beta_used_raw": 0.017024677246809006,
"beta_dpo/gap_mean": 23.186616897583008,
"beta_dpo/gap_std": 41.46014404296875,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8649214659685864,
"grad_norm": 124.6803970336914,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": 1.6735713481903076,
"logits/rejected": 1.6593836545944214,
"loss": 4.0813,
"step": 413
},
{
"beta_dpo/beta_used": 0.026967719197273254,
"beta_dpo/beta_used_raw": -0.0016478030011057854,
"beta_dpo/gap_mean": 23.66002655029297,
"beta_dpo/gap_std": 41.970882415771484,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8670157068062827,
"grad_norm": 153.2272491455078,
"learning_rate": 2.6958198472749717e-08,
"logits/chosen": 1.6639155149459839,
"logits/rejected": 1.536154866218567,
"loss": 4.332,
"step": 414
},
{
"beta_dpo/beta_used": 0.04223136603832245,
"beta_dpo/beta_used_raw": 0.038000062108039856,
"beta_dpo/gap_mean": 25.557510375976562,
"beta_dpo/gap_std": 42.886444091796875,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8691099476439791,
"grad_norm": 165.83090209960938,
"learning_rate": 2.613722016414943e-08,
"logits/chosen": 1.1066584587097168,
"logits/rejected": 1.1601117849349976,
"loss": 4.1273,
"step": 415
},
{
"beta_dpo/beta_used": 0.028374191373586655,
"beta_dpo/beta_used_raw": 0.01894223876297474,
"beta_dpo/gap_mean": 28.670167922973633,
"beta_dpo/gap_std": 42.47052001953125,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8712041884816754,
"grad_norm": 66.93905639648438,
"learning_rate": 2.5328246937043525e-08,
"logits/chosen": 1.5560580492019653,
"logits/rejected": 1.6145976781845093,
"loss": 3.9753,
"step": 416
},
{
"beta_dpo/beta_used": 0.043879032135009766,
"beta_dpo/beta_used_raw": 0.02182396501302719,
"beta_dpo/gap_mean": 26.690717697143555,
"beta_dpo/gap_std": 41.90580368041992,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8732984293193717,
"grad_norm": 92.42415618896484,
"learning_rate": 2.4531322174210973e-08,
"logits/chosen": 1.2475701570510864,
"logits/rejected": 1.3210117816925049,
"loss": 4.1788,
"step": 417
},
{
"beta_dpo/beta_used": 0.03364454209804535,
"beta_dpo/beta_used_raw": 0.005448690615594387,
"beta_dpo/gap_mean": 25.629501342773438,
"beta_dpo/gap_std": 40.84889602661133,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.875392670157068,
"grad_norm": 60.8049430847168,
"learning_rate": 2.3746488612308295e-08,
"logits/chosen": 1.3086042404174805,
"logits/rejected": 1.1799873113632202,
"loss": 3.8843,
"step": 418
},
{
"beta_dpo/beta_used": 0.040316130965948105,
"beta_dpo/beta_used_raw": 0.024059785529971123,
"beta_dpo/gap_mean": 29.127347946166992,
"beta_dpo/gap_std": 42.379608154296875,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8774869109947644,
"grad_norm": 65.74553680419922,
"learning_rate": 2.297378833957761e-08,
"logits/chosen": 1.9729444980621338,
"logits/rejected": 1.894222617149353,
"loss": 3.9346,
"step": 419
},
{
"beta_dpo/beta_used": 0.030392050743103027,
"beta_dpo/beta_used_raw": 0.015165509656071663,
"beta_dpo/gap_mean": 29.2987060546875,
"beta_dpo/gap_std": 43.514549255371094,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8795811518324608,
"grad_norm": 112.77594757080078,
"learning_rate": 2.2213262793589482e-08,
"logits/chosen": 1.2061651945114136,
"logits/rejected": 1.2414170503616333,
"loss": 4.1674,
"step": 420
},
{
"beta_dpo/beta_used": 0.037078239023685455,
"beta_dpo/beta_used_raw": 0.006579352542757988,
"beta_dpo/gap_mean": 30.2874698638916,
"beta_dpo/gap_std": 41.12751007080078,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.881675392670157,
"grad_norm": 50.55178451538086,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": 1.381372332572937,
"logits/rejected": 1.1805065870285034,
"loss": 3.5599,
"step": 421
},
{
"beta_dpo/beta_used": 0.027763448655605316,
"beta_dpo/beta_used_raw": 0.0037402785383164883,
"beta_dpo/gap_mean": 26.626432418823242,
"beta_dpo/gap_std": 42.52971649169922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8837696335078534,
"grad_norm": 80.29391479492188,
"learning_rate": 2.07288983654679e-08,
"logits/chosen": 1.6077336072921753,
"logits/rejected": 1.651180624961853,
"loss": 4.4944,
"step": 422
},
{
"beta_dpo/beta_used": 0.03539786487817764,
"beta_dpo/beta_used_raw": 0.004768058191984892,
"beta_dpo/gap_mean": 26.751209259033203,
"beta_dpo/gap_std": 42.32147979736328,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.8858638743455497,
"grad_norm": 90.14205169677734,
"learning_rate": 2.0005139085293942e-08,
"logits/chosen": 1.4197824001312256,
"logits/rejected": 1.5385533571243286,
"loss": 4.5795,
"step": 423
},
{
"beta_dpo/beta_used": 0.01704780012369156,
"beta_dpo/beta_used_raw": 0.012394540943205357,
"beta_dpo/gap_mean": 27.506437301635742,
"beta_dpo/gap_std": 42.84564208984375,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8879581151832461,
"grad_norm": 52.7910041809082,
"learning_rate": 1.9293713731512673e-08,
"logits/chosen": 1.3633639812469482,
"logits/rejected": 1.1960315704345703,
"loss": 4.4306,
"step": 424
},
{
"beta_dpo/beta_used": 0.0063092270866036415,
"beta_dpo/beta_used_raw": -0.041274845600128174,
"beta_dpo/gap_mean": 27.02210807800293,
"beta_dpo/gap_std": 40.46715545654297,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.8900523560209425,
"grad_norm": 22.821779251098633,
"learning_rate": 1.8594660455706763e-08,
"logits/chosen": 1.476675033569336,
"logits/rejected": 1.6865489482879639,
"loss": 4.8895,
"step": 425
},
{
"beta_dpo/beta_used": 0.033457279205322266,
"beta_dpo/beta_used_raw": 0.02916746772825718,
"beta_dpo/gap_mean": 24.161306381225586,
"beta_dpo/gap_std": 39.77753448486328,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.8921465968586387,
"grad_norm": 88.06718444824219,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 1.2509461641311646,
"logits/rejected": 1.4100229740142822,
"loss": 3.9195,
"step": 426
},
{
"beta_dpo/beta_used": 0.04828907176852226,
"beta_dpo/beta_used_raw": 0.03954368457198143,
"beta_dpo/gap_mean": 27.65555191040039,
"beta_dpo/gap_std": 40.21341323852539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8942408376963351,
"grad_norm": 90.2916030883789,
"learning_rate": 1.7233819424956247e-08,
"logits/chosen": 1.3937939405441284,
"logits/rejected": 1.3810914754867554,
"loss": 3.5748,
"step": 427
},
{
"beta_dpo/beta_used": 0.04655870795249939,
"beta_dpo/beta_used_raw": 0.024588048458099365,
"beta_dpo/gap_mean": 32.9439582824707,
"beta_dpo/gap_std": 39.263301849365234,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8963350785340314,
"grad_norm": 68.10398864746094,
"learning_rate": 1.6572104647786245e-08,
"logits/chosen": 1.752288818359375,
"logits/rejected": 1.9130034446716309,
"loss": 3.5059,
"step": 428
},
{
"beta_dpo/beta_used": 0.02215776965022087,
"beta_dpo/beta_used_raw": -0.016678031533956528,
"beta_dpo/gap_mean": 31.625703811645508,
"beta_dpo/gap_std": 43.56167984008789,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8984293193717278,
"grad_norm": 60.726661682128906,
"learning_rate": 1.5922907900227017e-08,
"logits/chosen": 1.458854079246521,
"logits/rejected": 1.4256439208984375,
"loss": 4.6525,
"step": 429
},
{
"beta_dpo/beta_used": 0.02091900259256363,
"beta_dpo/beta_used_raw": 0.0070409020408988,
"beta_dpo/gap_mean": 27.687143325805664,
"beta_dpo/gap_std": 44.989070892333984,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.900523560209424,
"grad_norm": 140.8614959716797,
"learning_rate": 1.5286263996730026e-08,
"logits/chosen": 1.4701473712921143,
"logits/rejected": 1.5857133865356445,
"loss": 4.5919,
"step": 430
},
{
"beta_dpo/beta_used": 0.014125513844192028,
"beta_dpo/beta_used_raw": -0.018212314695119858,
"beta_dpo/gap_mean": 24.097793579101562,
"beta_dpo/gap_std": 43.06412124633789,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9026178010471204,
"grad_norm": 114.83171081542969,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": 1.7383248805999756,
"logits/rejected": 1.805346965789795,
"loss": 4.965,
"step": 431
},
{
"beta_dpo/beta_used": 0.032197486609220505,
"beta_dpo/beta_used_raw": 0.023590974509716034,
"beta_dpo/gap_mean": 26.495365142822266,
"beta_dpo/gap_std": 43.16999435424805,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9047120418848168,
"grad_norm": 93.71367645263672,
"learning_rate": 1.40507706120426e-08,
"logits/chosen": 1.4706007242202759,
"logits/rejected": 1.6791198253631592,
"loss": 4.1943,
"step": 432
},
{
"beta_dpo/beta_used": 0.029206298291683197,
"beta_dpo/beta_used_raw": 0.02880963124334812,
"beta_dpo/gap_mean": 24.425756454467773,
"beta_dpo/gap_std": 42.32783889770508,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9068062827225131,
"grad_norm": 79.65400695800781,
"learning_rate": 1.345198738661285e-08,
"logits/chosen": 1.5126326084136963,
"logits/rejected": 1.4506518840789795,
"loss": 4.3461,
"step": 433
},
{
"beta_dpo/beta_used": 0.04482489451766014,
"beta_dpo/beta_used_raw": 0.019631531089544296,
"beta_dpo/gap_mean": 23.61885643005371,
"beta_dpo/gap_std": 41.121665954589844,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.9089005235602095,
"grad_norm": 76.20455169677734,
"learning_rate": 1.2865889513213628e-08,
"logits/chosen": 1.9426430463790894,
"logits/rejected": 1.9414358139038086,
"loss": 3.626,
"step": 434
},
{
"beta_dpo/beta_used": 0.024887006729841232,
"beta_dpo/beta_used_raw": 0.01590941660106182,
"beta_dpo/gap_mean": 23.983257293701172,
"beta_dpo/gap_std": 40.91677474975586,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9109947643979057,
"grad_norm": 117.16897583007812,
"learning_rate": 1.2292508422495157e-08,
"logits/chosen": 1.6585721969604492,
"logits/rejected": 1.773654580116272,
"loss": 4.7233,
"step": 435
},
{
"beta_dpo/beta_used": 0.022081829607486725,
"beta_dpo/beta_used_raw": -0.0030337003991007805,
"beta_dpo/gap_mean": 21.94788932800293,
"beta_dpo/gap_std": 40.543338775634766,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9130890052356021,
"grad_norm": 41.44011688232422,
"learning_rate": 1.1731874863145142e-08,
"logits/chosen": 1.3716554641723633,
"logits/rejected": 1.4048748016357422,
"loss": 4.5878,
"step": 436
},
{
"beta_dpo/beta_used": 0.03096182271838188,
"beta_dpo/beta_used_raw": 0.029562827199697495,
"beta_dpo/gap_mean": 23.157291412353516,
"beta_dpo/gap_std": 40.46465301513672,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9151832460732985,
"grad_norm": 81.40292358398438,
"learning_rate": 1.118401890024001e-08,
"logits/chosen": 1.6667184829711914,
"logits/rejected": 1.8092567920684814,
"loss": 4.1753,
"step": 437
},
{
"beta_dpo/beta_used": 0.00928124412894249,
"beta_dpo/beta_used_raw": -0.024261336773633957,
"beta_dpo/gap_mean": 20.033138275146484,
"beta_dpo/gap_std": 41.23052215576172,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9172774869109948,
"grad_norm": 50.23611068725586,
"learning_rate": 1.06489699136324e-08,
"logits/chosen": 1.3478763103485107,
"logits/rejected": 1.4908018112182617,
"loss": 5.1895,
"step": 438
},
{
"beta_dpo/beta_used": 0.04591372609138489,
"beta_dpo/beta_used_raw": 0.04151216149330139,
"beta_dpo/gap_mean": 19.75481414794922,
"beta_dpo/gap_std": 41.36615753173828,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9193717277486911,
"grad_norm": 145.0548095703125,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": 1.5163558721542358,
"logits/rejected": 1.5085352659225464,
"loss": 3.9243,
"step": 439
},
{
"beta_dpo/beta_used": 0.014184126630425453,
"beta_dpo/beta_used_raw": -0.008172026835381985,
"beta_dpo/gap_mean": 20.215518951416016,
"beta_dpo/gap_std": 39.6240119934082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9214659685863874,
"grad_norm": 50.933837890625,
"learning_rate": 9.617406953185136e-09,
"logits/chosen": 1.4577587842941284,
"logits/rejected": 1.234389305114746,
"loss": 4.9376,
"step": 440
},
{
"beta_dpo/beta_used": 0.048469383269548416,
"beta_dpo/beta_used_raw": 0.0424063466489315,
"beta_dpo/gap_mean": 22.741992950439453,
"beta_dpo/gap_std": 39.93981170654297,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9235602094240838,
"grad_norm": 101.18359375,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": 1.7504223585128784,
"logits/rejected": 1.9641519784927368,
"loss": 4.1214,
"step": 441
},
{
"beta_dpo/beta_used": 0.03708556294441223,
"beta_dpo/beta_used_raw": 0.02750963345170021,
"beta_dpo/gap_mean": 24.97802734375,
"beta_dpo/gap_std": 41.040199279785156,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9256544502617801,
"grad_norm": 100.34196472167969,
"learning_rate": 8.637407257200496e-09,
"logits/chosen": 1.3330552577972412,
"logits/rejected": 1.4373996257781982,
"loss": 4.1536,
"step": 442
},
{
"beta_dpo/beta_used": 0.04446953535079956,
"beta_dpo/beta_used_raw": 0.022015634924173355,
"beta_dpo/gap_mean": 22.627042770385742,
"beta_dpo/gap_std": 41.79437255859375,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9277486910994764,
"grad_norm": 65.15979766845703,
"learning_rate": 8.166809758815895e-09,
"logits/chosen": 1.2715387344360352,
"logits/rejected": 1.2342997789382935,
"loss": 3.7321,
"step": 443
},
{
"beta_dpo/beta_used": 0.014596132561564445,
"beta_dpo/beta_used_raw": -0.007604743354022503,
"beta_dpo/gap_mean": 24.320253372192383,
"beta_dpo/gap_std": 41.13831329345703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9298429319371728,
"grad_norm": 47.09414291381836,
"learning_rate": 7.709181040498253e-09,
"logits/chosen": 1.0621271133422852,
"logits/rejected": 1.241407871246338,
"loss": 4.861,
"step": 444
},
{
"beta_dpo/beta_used": 0.02850104495882988,
"beta_dpo/beta_used_raw": 0.016521329060196877,
"beta_dpo/gap_mean": 22.053783416748047,
"beta_dpo/gap_std": 42.03921890258789,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9319371727748691,
"grad_norm": 111.25325775146484,
"learning_rate": 7.2645456434869965e-09,
"logits/chosen": 1.5844391584396362,
"logits/rejected": 1.637407898902893,
"loss": 4.4202,
"step": 445
},
{
"beta_dpo/beta_used": 0.029594026505947113,
"beta_dpo/beta_used_raw": 0.0185114536434412,
"beta_dpo/gap_mean": 24.767749786376953,
"beta_dpo/gap_std": 41.35893249511719,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9340314136125655,
"grad_norm": 41.6215705871582,
"learning_rate": 6.832927412229017e-09,
"logits/chosen": 1.4550718069076538,
"logits/rejected": 1.433241367340088,
"loss": 4.1939,
"step": 446
},
{
"beta_dpo/beta_used": 0.03615984693169594,
"beta_dpo/beta_used_raw": 0.03083086758852005,
"beta_dpo/gap_mean": 28.0212345123291,
"beta_dpo/gap_std": 39.88979721069336,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9361256544502617,
"grad_norm": 43.32276153564453,
"learning_rate": 6.414349493100129e-09,
"logits/chosen": 1.5409138202667236,
"logits/rejected": 1.6101213693618774,
"loss": 3.7742,
"step": 447
},
{
"beta_dpo/beta_used": 0.031161731109023094,
"beta_dpo/beta_used_raw": 0.012630118057131767,
"beta_dpo/gap_mean": 27.05018424987793,
"beta_dpo/gap_std": 40.15449905395508,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9382198952879581,
"grad_norm": 235.60301208496094,
"learning_rate": 6.0088343331638756e-09,
"logits/chosen": 1.854709506034851,
"logits/rejected": 1.8700783252716064,
"loss": 4.4011,
"step": 448
},
{
"beta_dpo/beta_used": 0.035951972007751465,
"beta_dpo/beta_used_raw": 0.021076416596770287,
"beta_dpo/gap_mean": 26.136516571044922,
"beta_dpo/gap_std": 39.963043212890625,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9403141361256544,
"grad_norm": 100.77395629882812,
"learning_rate": 5.616403678967624e-09,
"logits/chosen": 2.0368571281433105,
"logits/rejected": 1.7351016998291016,
"loss": 3.8561,
"step": 449
},
{
"beta_dpo/beta_used": 0.01658363826572895,
"beta_dpo/beta_used_raw": -0.019273536279797554,
"beta_dpo/gap_mean": 25.731136322021484,
"beta_dpo/gap_std": 40.702030181884766,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9424083769633508,
"grad_norm": 41.71562957763672,
"learning_rate": 5.2370785753763356e-09,
"logits/chosen": 1.7945507764816284,
"logits/rejected": 1.5377925634384155,
"loss": 4.7532,
"step": 450
},
{
"beta_dpo/beta_used": 0.030548732727766037,
"beta_dpo/beta_used_raw": 0.022728927433490753,
"beta_dpo/gap_mean": 24.457050323486328,
"beta_dpo/gap_std": 39.438201904296875,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9445026178010472,
"grad_norm": 84.55509948730469,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": 1.5343233346939087,
"logits/rejected": 1.6422300338745117,
"loss": 4.0291,
"step": 451
},
{
"beta_dpo/beta_used": 0.022664647549390793,
"beta_dpo/beta_used_raw": -0.009222008287906647,
"beta_dpo/gap_mean": 25.828996658325195,
"beta_dpo/gap_std": 41.49300003051758,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9465968586387434,
"grad_norm": 116.09254455566406,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": 1.4695273637771606,
"logits/rejected": 1.6382958889007568,
"loss": 4.6374,
"step": 452
},
{
"beta_dpo/beta_used": 0.026332221925258636,
"beta_dpo/beta_used_raw": 0.0258626826107502,
"beta_dpo/gap_mean": 24.971637725830078,
"beta_dpo/gap_std": 39.16703414916992,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9486910994764398,
"grad_norm": 85.11585998535156,
"learning_rate": 4.1779364682113794e-09,
"logits/chosen": 1.7189387083053589,
"logits/rejected": 1.8478630781173706,
"loss": 4.0201,
"step": 453
},
{
"beta_dpo/beta_used": 0.02039419114589691,
"beta_dpo/beta_used_raw": 0.003133818507194519,
"beta_dpo/gap_mean": 25.356918334960938,
"beta_dpo/gap_std": 39.97523498535156,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.9507853403141361,
"grad_norm": 47.996421813964844,
"learning_rate": 3.851229943335393e-09,
"logits/chosen": 2.0254852771759033,
"logits/rejected": 1.9557225704193115,
"loss": 4.2785,
"step": 454
},
{
"beta_dpo/beta_used": 0.014043524861335754,
"beta_dpo/beta_used_raw": -0.01924164779484272,
"beta_dpo/gap_mean": 24.39451789855957,
"beta_dpo/gap_std": 40.95219039916992,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.9528795811518325,
"grad_norm": 79.08866882324219,
"learning_rate": 3.5377236299748147e-09,
"logits/chosen": 1.5097756385803223,
"logits/rejected": 1.603163242340088,
"loss": 4.8423,
"step": 455
},
{
"beta_dpo/beta_used": 0.05385340750217438,
"beta_dpo/beta_used_raw": 0.029043981805443764,
"beta_dpo/gap_mean": 25.243539810180664,
"beta_dpo/gap_std": 42.33509063720703,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9549738219895288,
"grad_norm": 108.52057647705078,
"learning_rate": 3.2374343405217884e-09,
"logits/chosen": 1.6896770000457764,
"logits/rejected": 1.829254150390625,
"loss": 4.0501,
"step": 456
},
{
"beta_dpo/beta_used": 0.04148964211344719,
"beta_dpo/beta_used_raw": 0.018075397238135338,
"beta_dpo/gap_mean": 27.367046356201172,
"beta_dpo/gap_std": 43.94456100463867,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.9570680628272251,
"grad_norm": 287.26763916015625,
"learning_rate": 2.9503781785795713e-09,
"logits/chosen": 1.5245857238769531,
"logits/rejected": 1.4000697135925293,
"loss": 4.0654,
"step": 457
},
{
"beta_dpo/beta_used": 0.011880859732627869,
"beta_dpo/beta_used_raw": -0.01639743149280548,
"beta_dpo/gap_mean": 25.620864868164062,
"beta_dpo/gap_std": 41.843963623046875,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9591623036649215,
"grad_norm": 45.80873107910156,
"learning_rate": 2.6765705380989432e-09,
"logits/chosen": 1.518320918083191,
"logits/rejected": 1.3533384799957275,
"loss": 4.969,
"step": 458
},
{
"beta_dpo/beta_used": 0.052308086305856705,
"beta_dpo/beta_used_raw": 0.024883100762963295,
"beta_dpo/gap_mean": 23.730758666992188,
"beta_dpo/gap_std": 41.868125915527344,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.9612565445026178,
"grad_norm": 121.35041809082031,
"learning_rate": 2.416026102552732e-09,
"logits/chosen": 1.4219530820846558,
"logits/rejected": 1.2508901357650757,
"loss": 3.466,
"step": 459
},
{
"beta_dpo/beta_used": 0.017205236479640007,
"beta_dpo/beta_used_raw": -0.0033456708770245314,
"beta_dpo/gap_mean": 22.880821228027344,
"beta_dpo/gap_std": 45.12669372558594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9633507853403142,
"grad_norm": 73.71929931640625,
"learning_rate": 2.168758844148272e-09,
"logits/chosen": 1.3608553409576416,
"logits/rejected": 1.3055371046066284,
"loss": 5.0311,
"step": 460
},
{
"beta_dpo/beta_used": 0.04041147232055664,
"beta_dpo/beta_used_raw": 0.029882332310080528,
"beta_dpo/gap_mean": 22.926301956176758,
"beta_dpo/gap_std": 44.20081329345703,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.9654450261780104,
"grad_norm": 86.9037094116211,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": 1.735243797302246,
"logits/rejected": 1.66280996799469,
"loss": 3.8386,
"step": 461
},
{
"beta_dpo/beta_used": 0.03556675463914871,
"beta_dpo/beta_used_raw": 0.027323313057422638,
"beta_dpo/gap_mean": 25.144573211669922,
"beta_dpo/gap_std": 43.731327056884766,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.9675392670157068,
"grad_norm": 159.07362365722656,
"learning_rate": 1.7141081868094209e-09,
"logits/chosen": 1.5209287405014038,
"logits/rejected": 1.4356799125671387,
"loss": 4.4081,
"step": 462
},
{
"beta_dpo/beta_used": 0.03522716090083122,
"beta_dpo/beta_used_raw": 0.009717161767184734,
"beta_dpo/gap_mean": 25.50450325012207,
"beta_dpo/gap_std": 42.545188903808594,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9696335078534032,
"grad_norm": 87.98490142822266,
"learning_rate": 1.5067491694100153e-09,
"logits/chosen": 1.5676113367080688,
"logits/rejected": 1.6250090599060059,
"loss": 3.8654,
"step": 463
},
{
"beta_dpo/beta_used": 0.021527249366044998,
"beta_dpo/beta_used_raw": 0.011957229115068913,
"beta_dpo/gap_mean": 24.842899322509766,
"beta_dpo/gap_std": 42.1388053894043,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9717277486910995,
"grad_norm": 79.93045043945312,
"learning_rate": 1.3127160909147672e-09,
"logits/chosen": 1.8131260871887207,
"logits/rejected": 1.744214653968811,
"loss": 4.3541,
"step": 464
},
{
"beta_dpo/beta_used": 0.03722041845321655,
"beta_dpo/beta_used_raw": 0.016623277217149734,
"beta_dpo/gap_mean": 26.415016174316406,
"beta_dpo/gap_std": 41.290672302246094,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9738219895287958,
"grad_norm": 76.404541015625,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": 1.4614487886428833,
"logits/rejected": 1.4553896188735962,
"loss": 4.0587,
"step": 465
},
{
"beta_dpo/beta_used": 0.0460047721862793,
"beta_dpo/beta_used_raw": 0.03773031011223793,
"beta_dpo/gap_mean": 28.092792510986328,
"beta_dpo/gap_std": 40.66791534423828,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9759162303664921,
"grad_norm": 89.3587417602539,
"learning_rate": 9.64668657069706e-10,
"logits/chosen": 1.3052603006362915,
"logits/rejected": 1.347874641418457,
"loss": 3.221,
"step": 466
},
{
"beta_dpo/beta_used": 0.02018456533551216,
"beta_dpo/beta_used_raw": -0.0011910395696759224,
"beta_dpo/gap_mean": 25.53974151611328,
"beta_dpo/gap_std": 40.64295196533203,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9780104712041885,
"grad_norm": 181.21160888671875,
"learning_rate": 8.106729664475176e-10,
"logits/chosen": 0.9222959876060486,
"logits/rejected": 1.1561161279678345,
"loss": 5.0134,
"step": 467
},
{
"beta_dpo/beta_used": 0.015501348301768303,
"beta_dpo/beta_used_raw": -0.027839092537760735,
"beta_dpo/gap_mean": 24.239940643310547,
"beta_dpo/gap_std": 40.417659759521484,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.9801047120418848,
"grad_norm": 32.802974700927734,
"learning_rate": 6.700405431837585e-10,
"logits/chosen": 1.68427312374115,
"logits/rejected": 1.4638608694076538,
"loss": 4.8736,
"step": 468
},
{
"beta_dpo/beta_used": 0.043932512402534485,
"beta_dpo/beta_used_raw": 0.02907262183725834,
"beta_dpo/gap_mean": 23.42894744873047,
"beta_dpo/gap_std": 40.17053985595703,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9821989528795811,
"grad_norm": 74.9969482421875,
"learning_rate": 5.427789289685347e-10,
"logits/chosen": 1.6762428283691406,
"logits/rejected": 1.6395068168640137,
"loss": 3.908,
"step": 469
},
{
"beta_dpo/beta_used": 0.05348680168390274,
"beta_dpo/beta_used_raw": 0.047200098633766174,
"beta_dpo/gap_mean": 26.360820770263672,
"beta_dpo/gap_std": 41.91456985473633,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9842931937172775,
"grad_norm": 92.29603576660156,
"learning_rate": 4.288949484559934e-10,
"logits/chosen": 0.9740282297134399,
"logits/rejected": 0.9412952065467834,
"loss": 3.2812,
"step": 470
},
{
"beta_dpo/beta_used": 0.011362526565790176,
"beta_dpo/beta_used_raw": -0.002826599171385169,
"beta_dpo/gap_mean": 26.84084701538086,
"beta_dpo/gap_std": 42.06930160522461,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9863874345549738,
"grad_norm": 39.85667419433594,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": 1.6476500034332275,
"logits/rejected": 1.6063101291656494,
"loss": 4.6968,
"step": 471
},
{
"beta_dpo/beta_used": 0.04099735617637634,
"beta_dpo/beta_used_raw": 0.03528433293104172,
"beta_dpo/gap_mean": 27.509807586669922,
"beta_dpo/gap_std": 42.573822021484375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9884816753926702,
"grad_norm": 135.5849609375,
"learning_rate": 2.412835998185092e-10,
"logits/chosen": 1.3469210863113403,
"logits/rejected": 1.4127790927886963,
"loss": 3.8637,
"step": 472
},
{
"beta_dpo/beta_used": 0.020598269999027252,
"beta_dpo/beta_used_raw": 0.011284598149359226,
"beta_dpo/gap_mean": 26.61202621459961,
"beta_dpo/gap_std": 42.61575698852539,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9905759162303664,
"grad_norm": 39.22035598754883,
"learning_rate": 1.6756629272085544e-10,
"logits/chosen": 1.4856796264648438,
"logits/rejected": 1.2598925828933716,
"loss": 4.3459,
"step": 473
},
{
"beta_dpo/beta_used": 0.015516398474574089,
"beta_dpo/beta_used_raw": -0.019677024334669113,
"beta_dpo/gap_mean": 27.40287971496582,
"beta_dpo/gap_std": 42.025856018066406,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9926701570680628,
"grad_norm": 34.569698333740234,
"learning_rate": 1.072467408408384e-10,
"logits/chosen": 1.5088553428649902,
"logits/rejected": 1.615687370300293,
"loss": 4.6458,
"step": 474
},
{
"beta_dpo/beta_used": 0.01646936498582363,
"beta_dpo/beta_used_raw": -0.006308557000011206,
"beta_dpo/gap_mean": 22.261816024780273,
"beta_dpo/gap_std": 39.92071533203125,
"beta_dpo/mask_keep_frac": 0.5,
"epoch": 0.9947643979057592,
"grad_norm": 33.08564758300781,
"learning_rate": 6.032817893297793e-11,
"logits/chosen": 1.1749279499053955,
"logits/rejected": 1.2055437564849854,
"loss": 4.2627,
"step": 475
},
{
"beta_dpo/beta_used": 0.027492396533489227,
"beta_dpo/beta_used_raw": 0.008521707728505135,
"beta_dpo/gap_mean": 23.523109436035156,
"beta_dpo/gap_std": 40.176387786865234,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9968586387434555,
"grad_norm": 60.566287994384766,
"learning_rate": 2.6813123097352287e-11,
"logits/chosen": 1.3323711156845093,
"logits/rejected": 1.4667065143585205,
"loss": 4.2655,
"step": 476
},
{
"beta_dpo/beta_used": 0.032702527940273285,
"beta_dpo/beta_used_raw": 0.020156463608145714,
"beta_dpo/gap_mean": 24.190080642700195,
"beta_dpo/gap_std": 42.31235885620117,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9989528795811519,
"grad_norm": 115.37435150146484,
"learning_rate": 6.7033706447061635e-12,
"logits/chosen": 1.080468773841858,
"logits/rejected": 1.1553194522857666,
"loss": 4.2411,
"step": 477
},
{
"epoch": 0.9989528795811519,
"step": 477,
"total_flos": 0.0,
"train_loss": 4.692083022879355,
"train_runtime": 7712.5154,
"train_samples_per_second": 7.927,
"train_steps_per_second": 0.062
}
],
"logging_steps": 1,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}