1127 lines
40 KiB
JSON
1127 lines
40 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_steps": 100,
|
||
|
|
"global_step": 340,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0029411764705882353,
|
||
|
|
"grad_norm": 23.717201232910156,
|
||
|
|
"learning_rate": 0.0,
|
||
|
|
"logits/chosen": -0.4739703834056854,
|
||
|
|
"logits/rejected": -0.44689586758613586,
|
||
|
|
"logps/chosen": -72.44038391113281,
|
||
|
|
"logps/ref_chosen": -72.42105865478516,
|
||
|
|
"logps/ref_rejected": -71.02362823486328,
|
||
|
|
"logps/rejected": -70.95858764648438,
|
||
|
|
"loss": 0.6938,
|
||
|
|
"margin_dpo/margin_mean": -0.0843656063079834,
|
||
|
|
"margin_dpo/margin_std": 0.20181308686733246,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.014705882352941176,
|
||
|
|
"grad_norm": 24.15522003173828,
|
||
|
|
"learning_rate": 5.88235294117647e-08,
|
||
|
|
"logits/chosen": -0.5054930448532104,
|
||
|
|
"logits/rejected": -0.4999650716781616,
|
||
|
|
"logps/chosen": -76.55665588378906,
|
||
|
|
"logps/ref_chosen": -76.4837875366211,
|
||
|
|
"logps/ref_rejected": -71.7144775390625,
|
||
|
|
"logps/rejected": -71.69610595703125,
|
||
|
|
"loss": 0.6943,
|
||
|
|
"margin_dpo/margin_mean": -0.0912436842918396,
|
||
|
|
"margin_dpo/margin_std": 0.36911237239837646,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.029411764705882353,
|
||
|
|
"grad_norm": 23.068735122680664,
|
||
|
|
"learning_rate": 1.3235294117647057e-07,
|
||
|
|
"logits/chosen": -0.5124594569206238,
|
||
|
|
"logits/rejected": -0.49317699670791626,
|
||
|
|
"logps/chosen": -76.17481994628906,
|
||
|
|
"logps/ref_chosen": -76.15269470214844,
|
||
|
|
"logps/ref_rejected": -73.87877655029297,
|
||
|
|
"logps/rejected": -73.90404510498047,
|
||
|
|
"loss": 0.6933,
|
||
|
|
"margin_dpo/margin_mean": 0.0031534195877611637,
|
||
|
|
"margin_dpo/margin_std": 0.3234597444534302,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04411764705882353,
|
||
|
|
"grad_norm": 28.796030044555664,
|
||
|
|
"learning_rate": 2.0588235294117645e-07,
|
||
|
|
"logits/chosen": -0.5413268208503723,
|
||
|
|
"logits/rejected": -0.5226410031318665,
|
||
|
|
"logps/chosen": -67.05145263671875,
|
||
|
|
"logps/ref_chosen": -67.0902099609375,
|
||
|
|
"logps/ref_rejected": -73.005859375,
|
||
|
|
"logps/rejected": -73.06277465820312,
|
||
|
|
"loss": 0.6898,
|
||
|
|
"margin_dpo/margin_mean": 0.09566803276538849,
|
||
|
|
"margin_dpo/margin_std": 0.3500857353210449,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.058823529411764705,
|
||
|
|
"grad_norm": 20.94307518005371,
|
||
|
|
"learning_rate": 2.7941176470588235e-07,
|
||
|
|
"logits/chosen": -0.5276651382446289,
|
||
|
|
"logits/rejected": -0.5001177787780762,
|
||
|
|
"logps/chosen": -73.87080383300781,
|
||
|
|
"logps/ref_chosen": -73.9133071899414,
|
||
|
|
"logps/ref_rejected": -80.46495056152344,
|
||
|
|
"logps/rejected": -80.62101745605469,
|
||
|
|
"loss": 0.6824,
|
||
|
|
"margin_dpo/margin_mean": 0.19857604801654816,
|
||
|
|
"margin_dpo/margin_std": 0.378338098526001,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07352941176470588,
|
||
|
|
"grad_norm": 24.610126495361328,
|
||
|
|
"learning_rate": 3.529411764705882e-07,
|
||
|
|
"logits/chosen": -0.5061219930648804,
|
||
|
|
"logits/rejected": -0.5009726285934448,
|
||
|
|
"logps/chosen": -60.977256774902344,
|
||
|
|
"logps/ref_chosen": -61.014869689941406,
|
||
|
|
"logps/ref_rejected": -74.33148193359375,
|
||
|
|
"logps/rejected": -74.73905181884766,
|
||
|
|
"loss": 0.6642,
|
||
|
|
"margin_dpo/margin_mean": 0.44518008828163147,
|
||
|
|
"margin_dpo/margin_std": 0.6063351631164551,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08823529411764706,
|
||
|
|
"grad_norm": 21.515533447265625,
|
||
|
|
"learning_rate": 4.264705882352941e-07,
|
||
|
|
"logits/chosen": -0.5904145240783691,
|
||
|
|
"logits/rejected": -0.5685775279998779,
|
||
|
|
"logps/chosen": -78.83164978027344,
|
||
|
|
"logps/ref_chosen": -78.80770111083984,
|
||
|
|
"logps/ref_rejected": -81.50379943847656,
|
||
|
|
"logps/rejected": -83.10078430175781,
|
||
|
|
"loss": 0.6294,
|
||
|
|
"margin_dpo/margin_mean": 1.5730347633361816,
|
||
|
|
"margin_dpo/margin_std": 1.7553781270980835,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10294117647058823,
|
||
|
|
"grad_norm": 19.351747512817383,
|
||
|
|
"learning_rate": 5e-07,
|
||
|
|
"logits/chosen": -0.5566071271896362,
|
||
|
|
"logits/rejected": -0.5428273677825928,
|
||
|
|
"logps/chosen": -86.93069458007812,
|
||
|
|
"logps/ref_chosen": -86.67269134521484,
|
||
|
|
"logps/ref_rejected": -86.13935852050781,
|
||
|
|
"logps/rejected": -88.55570220947266,
|
||
|
|
"loss": 0.6028,
|
||
|
|
"margin_dpo/margin_mean": 2.158336877822876,
|
||
|
|
"margin_dpo/margin_std": 2.8764147758483887,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11764705882352941,
|
||
|
|
"grad_norm": 18.829681396484375,
|
||
|
|
"learning_rate": 4.996706849759452e-07,
|
||
|
|
"logits/chosen": -0.6493271589279175,
|
||
|
|
"logits/rejected": -0.6133594512939453,
|
||
|
|
"logps/chosen": -71.7585220336914,
|
||
|
|
"logps/ref_chosen": -69.31690216064453,
|
||
|
|
"logps/ref_rejected": -83.9319076538086,
|
||
|
|
"logps/rejected": -91.31529235839844,
|
||
|
|
"loss": 0.5446,
|
||
|
|
"margin_dpo/margin_mean": 4.941764831542969,
|
||
|
|
"margin_dpo/margin_std": 8.191742897033691,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1323529411764706,
|
||
|
|
"grad_norm": 23.498613357543945,
|
||
|
|
"learning_rate": 4.986836074908615e-07,
|
||
|
|
"logits/chosen": -0.6821354627609253,
|
||
|
|
"logits/rejected": -0.6494560837745667,
|
||
|
|
"logps/chosen": -73.5013427734375,
|
||
|
|
"logps/ref_chosen": -69.97550964355469,
|
||
|
|
"logps/ref_rejected": -100.10908508300781,
|
||
|
|
"logps/rejected": -108.92988586425781,
|
||
|
|
"loss": 0.553,
|
||
|
|
"margin_dpo/margin_mean": 5.294968128204346,
|
||
|
|
"margin_dpo/margin_std": 6.769883632659912,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14705882352941177,
|
||
|
|
"grad_norm": 30.29952621459961,
|
||
|
|
"learning_rate": 4.970413680203148e-07,
|
||
|
|
"logits/chosen": -0.6595835089683533,
|
||
|
|
"logits/rejected": -0.6233135461807251,
|
||
|
|
"logps/chosen": -78.32559967041016,
|
||
|
|
"logps/ref_chosen": -72.90187072753906,
|
||
|
|
"logps/ref_rejected": -85.52653503417969,
|
||
|
|
"logps/rejected": -95.23252868652344,
|
||
|
|
"loss": 0.5518,
|
||
|
|
"margin_dpo/margin_mean": 4.282275199890137,
|
||
|
|
"margin_dpo/margin_std": 7.439302921295166,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16176470588235295,
|
||
|
|
"grad_norm": 23.780656814575195,
|
||
|
|
"learning_rate": 4.947482930773511e-07,
|
||
|
|
"logits/chosen": -0.7151781916618347,
|
||
|
|
"logits/rejected": -0.6897321939468384,
|
||
|
|
"logps/chosen": -91.6336898803711,
|
||
|
|
"logps/ref_chosen": -87.45826721191406,
|
||
|
|
"logps/ref_rejected": -97.73722076416016,
|
||
|
|
"logps/rejected": -109.0378646850586,
|
||
|
|
"loss": 0.5112,
|
||
|
|
"margin_dpo/margin_mean": 7.125207424163818,
|
||
|
|
"margin_dpo/margin_std": 9.734245300292969,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17647058823529413,
|
||
|
|
"grad_norm": 20.72915267944336,
|
||
|
|
"learning_rate": 4.918104238142103e-07,
|
||
|
|
"logits/chosen": -0.6631725430488586,
|
||
|
|
"logits/rejected": -0.6214786767959595,
|
||
|
|
"logps/chosen": -110.2301254272461,
|
||
|
|
"logps/ref_chosen": -106.60343933105469,
|
||
|
|
"logps/ref_rejected": -89.84490203857422,
|
||
|
|
"logps/rejected": -99.53703308105469,
|
||
|
|
"loss": 0.5286,
|
||
|
|
"margin_dpo/margin_mean": 6.065438747406006,
|
||
|
|
"margin_dpo/margin_std": 10.341069221496582,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19117647058823528,
|
||
|
|
"grad_norm": 16.05661392211914,
|
||
|
|
"learning_rate": 4.882355001067891e-07,
|
||
|
|
"logits/chosen": -0.6507592797279358,
|
||
|
|
"logits/rejected": -0.6253207921981812,
|
||
|
|
"logps/chosen": -79.79920959472656,
|
||
|
|
"logps/ref_chosen": -76.7091064453125,
|
||
|
|
"logps/ref_rejected": -84.54231262207031,
|
||
|
|
"logps/rejected": -93.5802001953125,
|
||
|
|
"loss": 0.4746,
|
||
|
|
"margin_dpo/margin_mean": 5.947785377502441,
|
||
|
|
"margin_dpo/margin_std": 7.2523908615112305,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20588235294117646,
|
||
|
|
"grad_norm": 16.453359603881836,
|
||
|
|
"learning_rate": 4.840329401637809e-07,
|
||
|
|
"logits/chosen": -0.698811411857605,
|
||
|
|
"logits/rejected": -0.6621960401535034,
|
||
|
|
"logps/chosen": -74.00252532958984,
|
||
|
|
"logps/ref_chosen": -70.0877914428711,
|
||
|
|
"logps/ref_rejected": -91.75868225097656,
|
||
|
|
"logps/rejected": -103.95845031738281,
|
||
|
|
"loss": 0.4662,
|
||
|
|
"margin_dpo/margin_mean": 8.28502082824707,
|
||
|
|
"margin_dpo/margin_std": 8.248537063598633,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22058823529411764,
|
||
|
|
"grad_norm": 17.00535011291504,
|
||
|
|
"learning_rate": 4.792138157142157e-07,
|
||
|
|
"logits/chosen": -0.6827956438064575,
|
||
|
|
"logits/rejected": -0.6566829681396484,
|
||
|
|
"logps/chosen": -78.68012237548828,
|
||
|
|
"logps/ref_chosen": -74.91792297363281,
|
||
|
|
"logps/ref_rejected": -85.64566802978516,
|
||
|
|
"logps/rejected": -97.5809555053711,
|
||
|
|
"loss": 0.4863,
|
||
|
|
"margin_dpo/margin_mean": 8.173115730285645,
|
||
|
|
"margin_dpo/margin_std": 8.817681312561035,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23529411764705882,
|
||
|
|
"grad_norm": 21.13958168029785,
|
||
|
|
"learning_rate": 4.737908228387656e-07,
|
||
|
|
"logits/chosen": -0.7372442483901978,
|
||
|
|
"logits/rejected": -0.689995288848877,
|
||
|
|
"logps/chosen": -102.5855941772461,
|
||
|
|
"logps/ref_chosen": -97.75636291503906,
|
||
|
|
"logps/ref_rejected": -92.88613891601562,
|
||
|
|
"logps/rejected": -105.6670150756836,
|
||
|
|
"loss": 0.451,
|
||
|
|
"margin_dpo/margin_mean": 7.951646327972412,
|
||
|
|
"margin_dpo/margin_std": 8.248537063598633,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25,
|
||
|
|
"grad_norm": 18.165218353271484,
|
||
|
|
"learning_rate": 4.6777824852166437e-07,
|
||
|
|
"logits/chosen": -0.6671745777130127,
|
||
|
|
"logits/rejected": -0.6385531425476074,
|
||
|
|
"logps/chosen": -85.70280456542969,
|
||
|
|
"logps/ref_chosen": -78.9326171875,
|
||
|
|
"logps/ref_rejected": -88.00363159179688,
|
||
|
|
"logps/rejected": -101.9955825805664,
|
||
|
|
"loss": 0.4569,
|
||
|
|
"margin_dpo/margin_mean": 7.221736907958984,
|
||
|
|
"margin_dpo/margin_std": 8.439001083374023,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2647058823529412,
|
||
|
|
"grad_norm": 20.739215850830078,
|
||
|
|
"learning_rate": 4.611919330113591e-07,
|
||
|
|
"logits/chosen": -0.6510001420974731,
|
||
|
|
"logits/rejected": -0.629525899887085,
|
||
|
|
"logps/chosen": -84.86643981933594,
|
||
|
|
"logps/ref_chosen": -78.78388214111328,
|
||
|
|
"logps/ref_rejected": -90.2783203125,
|
||
|
|
"logps/rejected": -105.78071594238281,
|
||
|
|
"loss": 0.4419,
|
||
|
|
"margin_dpo/margin_mean": 9.419827461242676,
|
||
|
|
"margin_dpo/margin_std": 9.238184928894043,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27941176470588236,
|
||
|
|
"grad_norm": 17.511486053466797,
|
||
|
|
"learning_rate": 4.5404922808905543e-07,
|
||
|
|
"logits/chosen": -0.6517031788825989,
|
||
|
|
"logits/rejected": -0.6104840040206909,
|
||
|
|
"logps/chosen": -74.32402038574219,
|
||
|
|
"logps/ref_chosen": -65.91403198242188,
|
||
|
|
"logps/ref_rejected": -62.45396041870117,
|
||
|
|
"logps/rejected": -78.22425842285156,
|
||
|
|
"loss": 0.4514,
|
||
|
|
"margin_dpo/margin_mean": 7.360299587249756,
|
||
|
|
"margin_dpo/margin_std": 11.319549560546875,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29411764705882354,
|
||
|
|
"grad_norm": 18.769145965576172,
|
||
|
|
"learning_rate": 4.4636895135509966e-07,
|
||
|
|
"logits/chosen": -0.6338332295417786,
|
||
|
|
"logits/rejected": -0.6123248338699341,
|
||
|
|
"logps/chosen": -84.81422424316406,
|
||
|
|
"logps/ref_chosen": -77.24075317382812,
|
||
|
|
"logps/ref_rejected": -93.24552917480469,
|
||
|
|
"logps/rejected": -110.46153259277344,
|
||
|
|
"loss": 0.4265,
|
||
|
|
"margin_dpo/margin_mean": 9.642545700073242,
|
||
|
|
"margin_dpo/margin_std": 11.237717628479004,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29411764705882354,
|
||
|
|
"eval_logits/chosen": -0.6361338496208191,
|
||
|
|
"eval_logits/rejected": -0.6085699200630188,
|
||
|
|
"eval_logps/chosen": -107.19888305664062,
|
||
|
|
"eval_logps/ref_chosen": -97.0617446899414,
|
||
|
|
"eval_logps/ref_rejected": -80.18183135986328,
|
||
|
|
"eval_logps/rejected": -95.6607437133789,
|
||
|
|
"eval_loss": 0.5427329540252686,
|
||
|
|
"eval_margin_dpo/margin_mean": 5.341787338256836,
|
||
|
|
"eval_margin_dpo/margin_std": 10.061349868774414,
|
||
|
|
"eval_runtime": 20.4041,
|
||
|
|
"eval_samples_per_second": 114.634,
|
||
|
|
"eval_steps_per_second": 0.931,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3088235294117647,
|
||
|
|
"grad_norm": 17.255924224853516,
|
||
|
|
"learning_rate": 4.381713366536311e-07,
|
||
|
|
"logits/chosen": -0.6774856448173523,
|
||
|
|
"logits/rejected": -0.6355584263801575,
|
||
|
|
"logps/chosen": -76.29129791259766,
|
||
|
|
"logps/ref_chosen": -70.76807403564453,
|
||
|
|
"logps/ref_rejected": -74.71427917480469,
|
||
|
|
"logps/rejected": -92.5668716430664,
|
||
|
|
"loss": 0.427,
|
||
|
|
"margin_dpo/margin_mean": 12.32937240600586,
|
||
|
|
"margin_dpo/margin_std": 11.30049991607666,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3235294117647059,
|
||
|
|
"grad_norm": 18.394851684570312,
|
||
|
|
"learning_rate": 4.2947798076611047e-07,
|
||
|
|
"logits/chosen": -0.6861704587936401,
|
||
|
|
"logits/rejected": -0.6574342846870422,
|
||
|
|
"logps/chosen": -89.50286102294922,
|
||
|
|
"logps/ref_chosen": -81.14533996582031,
|
||
|
|
"logps/ref_rejected": -89.10765838623047,
|
||
|
|
"logps/rejected": -109.15755462646484,
|
||
|
|
"loss": 0.4145,
|
||
|
|
"margin_dpo/margin_mean": 11.69237995147705,
|
||
|
|
"margin_dpo/margin_std": 12.493224143981934,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3382352941176471,
|
||
|
|
"grad_norm": 18.24220848083496,
|
||
|
|
"learning_rate": 4.203117865141635e-07,
|
||
|
|
"logits/chosen": -0.6698350310325623,
|
||
|
|
"logits/rejected": -0.6532580256462097,
|
||
|
|
"logps/chosen": -76.1394271850586,
|
||
|
|
"logps/ref_chosen": -64.77717590332031,
|
||
|
|
"logps/ref_rejected": -99.79936218261719,
|
||
|
|
"logps/rejected": -122.98934173583984,
|
||
|
|
"loss": 0.4011,
|
||
|
|
"margin_dpo/margin_mean": 11.827718734741211,
|
||
|
|
"margin_dpo/margin_std": 12.786788940429688,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35294117647058826,
|
||
|
|
"grad_norm": 22.541603088378906,
|
||
|
|
"learning_rate": 4.106969024216348e-07,
|
||
|
|
"logits/chosen": -0.6674679517745972,
|
||
|
|
"logits/rejected": -0.6289718151092529,
|
||
|
|
"logps/chosen": -86.44108581542969,
|
||
|
|
"logps/ref_chosen": -77.35191345214844,
|
||
|
|
"logps/ref_rejected": -82.3753433227539,
|
||
|
|
"logps/rejected": -104.9658203125,
|
||
|
|
"loss": 0.4154,
|
||
|
|
"margin_dpo/margin_mean": 13.501307487487793,
|
||
|
|
"margin_dpo/margin_std": 12.104052543640137,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36764705882352944,
|
||
|
|
"grad_norm": 18.34996223449707,
|
||
|
|
"learning_rate": 4.006586590948141e-07,
|
||
|
|
"logits/chosen": -0.6953171491622925,
|
||
|
|
"logits/rejected": -0.6653636693954468,
|
||
|
|
"logps/chosen": -84.34068298339844,
|
||
|
|
"logps/ref_chosen": -74.56766510009766,
|
||
|
|
"logps/ref_rejected": -87.71104431152344,
|
||
|
|
"logps/rejected": -109.58891296386719,
|
||
|
|
"loss": 0.3871,
|
||
|
|
"margin_dpo/margin_mean": 12.104842185974121,
|
||
|
|
"margin_dpo/margin_std": 12.706830978393555,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38235294117647056,
|
||
|
|
"grad_norm": 19.588794708251953,
|
||
|
|
"learning_rate": 3.9022350248844246e-07,
|
||
|
|
"logits/chosen": -0.6832663416862488,
|
||
|
|
"logits/rejected": -0.6475099325180054,
|
||
|
|
"logps/chosen": -92.37910461425781,
|
||
|
|
"logps/ref_chosen": -79.86932373046875,
|
||
|
|
"logps/ref_rejected": -92.48243713378906,
|
||
|
|
"logps/rejected": -118.1786117553711,
|
||
|
|
"loss": 0.4128,
|
||
|
|
"margin_dpo/margin_mean": 13.186391830444336,
|
||
|
|
"margin_dpo/margin_std": 16.62637710571289,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39705882352941174,
|
||
|
|
"grad_norm": 19.383163452148438,
|
||
|
|
"learning_rate": 3.794189242333106e-07,
|
||
|
|
"logits/chosen": -0.6862474679946899,
|
||
|
|
"logits/rejected": -0.6508306264877319,
|
||
|
|
"logps/chosen": -93.94104766845703,
|
||
|
|
"logps/ref_chosen": -82.55046081542969,
|
||
|
|
"logps/ref_rejected": -91.73478698730469,
|
||
|
|
"logps/rejected": -115.92558288574219,
|
||
|
|
"loss": 0.3855,
|
||
|
|
"margin_dpo/margin_mean": 12.800195693969727,
|
||
|
|
"margin_dpo/margin_std": 15.272809028625488,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4117647058823529,
|
||
|
|
"grad_norm": 15.547196388244629,
|
||
|
|
"learning_rate": 3.6827338920900253e-07,
|
||
|
|
"logits/chosen": -0.6315192580223083,
|
||
|
|
"logits/rejected": -0.5951318740844727,
|
||
|
|
"logps/chosen": -86.93388366699219,
|
||
|
|
"logps/ref_chosen": -76.40785217285156,
|
||
|
|
"logps/ref_rejected": -88.25675964355469,
|
||
|
|
"logps/rejected": -111.50956726074219,
|
||
|
|
"loss": 0.3782,
|
||
|
|
"margin_dpo/margin_mean": 12.726763725280762,
|
||
|
|
"margin_dpo/margin_std": 12.062446594238281,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4264705882352941,
|
||
|
|
"grad_norm": 16.376129150390625,
|
||
|
|
"learning_rate": 3.568162605525952e-07,
|
||
|
|
"logits/chosen": -0.694092869758606,
|
||
|
|
"logits/rejected": -0.6596013307571411,
|
||
|
|
"logps/chosen": -90.28238677978516,
|
||
|
|
"logps/ref_chosen": -79.43595123291016,
|
||
|
|
"logps/ref_rejected": -80.57792663574219,
|
||
|
|
"logps/rejected": -105.7525863647461,
|
||
|
|
"loss": 0.3722,
|
||
|
|
"margin_dpo/margin_mean": 14.328218460083008,
|
||
|
|
"margin_dpo/margin_std": 13.251609802246094,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4411764705882353,
|
||
|
|
"grad_norm": 19.48674201965332,
|
||
|
|
"learning_rate": 3.4507772230088147e-07,
|
||
|
|
"logits/chosen": -0.6205201745033264,
|
||
|
|
"logits/rejected": -0.5989262461662292,
|
||
|
|
"logps/chosen": -82.92797088623047,
|
||
|
|
"logps/ref_chosen": -69.55223846435547,
|
||
|
|
"logps/ref_rejected": -76.5206298828125,
|
||
|
|
"logps/rejected": -99.82804870605469,
|
||
|
|
"loss": 0.4063,
|
||
|
|
"margin_dpo/margin_mean": 9.931692123413086,
|
||
|
|
"margin_dpo/margin_std": 11.23712158203125,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45588235294117646,
|
||
|
|
"grad_norm": 18.904706954956055,
|
||
|
|
"learning_rate": 3.3308869986991487e-07,
|
||
|
|
"logits/chosen": -0.6716780662536621,
|
||
|
|
"logits/rejected": -0.6312578320503235,
|
||
|
|
"logps/chosen": -98.33650207519531,
|
||
|
|
"logps/ref_chosen": -83.78580474853516,
|
||
|
|
"logps/ref_rejected": -79.48396301269531,
|
||
|
|
"logps/rejected": -106.65342712402344,
|
||
|
|
"loss": 0.3818,
|
||
|
|
"margin_dpo/margin_mean": 12.618766784667969,
|
||
|
|
"margin_dpo/margin_std": 14.547628402709961,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47058823529411764,
|
||
|
|
"grad_norm": 16.047494888305664,
|
||
|
|
"learning_rate": 3.208807785813777e-07,
|
||
|
|
"logits/chosen": -0.6216621994972229,
|
||
|
|
"logits/rejected": -0.5977298617362976,
|
||
|
|
"logps/chosen": -85.62313079833984,
|
||
|
|
"logps/ref_chosen": -71.89569091796875,
|
||
|
|
"logps/ref_rejected": -95.74468231201172,
|
||
|
|
"logps/rejected": -123.3752212524414,
|
||
|
|
"loss": 0.3508,
|
||
|
|
"margin_dpo/margin_mean": 13.903097152709961,
|
||
|
|
"margin_dpo/margin_std": 10.593317031860352,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4852941176470588,
|
||
|
|
"grad_norm": 17.789417266845703,
|
||
|
|
"learning_rate": 3.084861204504122e-07,
|
||
|
|
"logits/chosen": -0.6328192949295044,
|
||
|
|
"logits/rejected": -0.5899003148078918,
|
||
|
|
"logps/chosen": -91.7447738647461,
|
||
|
|
"logps/ref_chosen": -77.03978729248047,
|
||
|
|
"logps/ref_rejected": -88.47887420654297,
|
||
|
|
"logps/rejected": -120.28157806396484,
|
||
|
|
"loss": 0.3544,
|
||
|
|
"margin_dpo/margin_mean": 17.097713470458984,
|
||
|
|
"margin_dpo/margin_std": 14.805742263793945,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"grad_norm": 19.674264907836914,
|
||
|
|
"learning_rate": 2.959373794541426e-07,
|
||
|
|
"logits/chosen": -0.6691595911979675,
|
||
|
|
"logits/rejected": -0.6374617218971252,
|
||
|
|
"logps/chosen": -88.34684753417969,
|
||
|
|
"logps/ref_chosen": -71.93138122558594,
|
||
|
|
"logps/ref_rejected": -88.34697723388672,
|
||
|
|
"logps/rejected": -119.37635803222656,
|
||
|
|
"loss": 0.3454,
|
||
|
|
"margin_dpo/margin_mean": 14.613912582397461,
|
||
|
|
"margin_dpo/margin_std": 12.491094589233398,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5147058823529411,
|
||
|
|
"grad_norm": 20.303539276123047,
|
||
|
|
"learning_rate": 2.8326761550411346e-07,
|
||
|
|
"logits/chosen": -0.6473700404167175,
|
||
|
|
"logits/rejected": -0.6196728944778442,
|
||
|
|
"logps/chosen": -86.78947448730469,
|
||
|
|
"logps/ref_chosen": -68.0127182006836,
|
||
|
|
"logps/ref_rejected": -92.58775329589844,
|
||
|
|
"logps/rejected": -123.58447265625,
|
||
|
|
"loss": 0.3713,
|
||
|
|
"margin_dpo/margin_mean": 12.219950675964355,
|
||
|
|
"margin_dpo/margin_std": 13.678237915039062,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5294117647058824,
|
||
|
|
"grad_norm": 17.388011932373047,
|
||
|
|
"learning_rate": 2.7051020734928443e-07,
|
||
|
|
"logits/chosen": -0.611466646194458,
|
||
|
|
"logits/rejected": -0.587906002998352,
|
||
|
|
"logps/chosen": -80.26910400390625,
|
||
|
|
"logps/ref_chosen": -61.942466735839844,
|
||
|
|
"logps/ref_rejected": -87.44703674316406,
|
||
|
|
"logps/rejected": -122.92547607421875,
|
||
|
|
"loss": 0.3585,
|
||
|
|
"margin_dpo/margin_mean": 17.15180778503418,
|
||
|
|
"margin_dpo/margin_std": 14.575396537780762,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5441176470588235,
|
||
|
|
"grad_norm": 19.291353225708008,
|
||
|
|
"learning_rate": 2.5769876463904263e-07,
|
||
|
|
"logits/chosen": -0.6199885606765747,
|
||
|
|
"logits/rejected": -0.5656689405441284,
|
||
|
|
"logps/chosen": -87.93196105957031,
|
||
|
|
"logps/ref_chosen": -72.35160064697266,
|
||
|
|
"logps/ref_rejected": -69.03958129882812,
|
||
|
|
"logps/rejected": -99.19012451171875,
|
||
|
|
"loss": 0.3501,
|
||
|
|
"margin_dpo/margin_mean": 14.570175170898438,
|
||
|
|
"margin_dpo/margin_std": 14.043818473815918,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5588235294117647,
|
||
|
|
"grad_norm": 18.855066299438477,
|
||
|
|
"learning_rate": 2.4486703937790243e-07,
|
||
|
|
"logits/chosen": -0.6612949967384338,
|
||
|
|
"logits/rejected": -0.6132839322090149,
|
||
|
|
"logps/chosen": -100.40862274169922,
|
||
|
|
"logps/ref_chosen": -79.45222473144531,
|
||
|
|
"logps/ref_rejected": -71.31239318847656,
|
||
|
|
"logps/rejected": -106.55586242675781,
|
||
|
|
"loss": 0.3605,
|
||
|
|
"margin_dpo/margin_mean": 14.287073135375977,
|
||
|
|
"margin_dpo/margin_std": 14.959236145019531,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5735294117647058,
|
||
|
|
"grad_norm": 20.077083587646484,
|
||
|
|
"learning_rate": 2.320488370051681e-07,
|
||
|
|
"logits/chosen": -0.667130172252655,
|
||
|
|
"logits/rejected": -0.6179927587509155,
|
||
|
|
"logps/chosen": -89.4631118774414,
|
||
|
|
"logps/ref_chosen": -71.20511627197266,
|
||
|
|
"logps/ref_rejected": -84.8467025756836,
|
||
|
|
"logps/rejected": -121.50825500488281,
|
||
|
|
"loss": 0.3429,
|
||
|
|
"margin_dpo/margin_mean": 18.4035587310791,
|
||
|
|
"margin_dpo/margin_std": 15.252446174621582,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5882352941176471,
|
||
|
|
"grad_norm": 17.699968338012695,
|
||
|
|
"learning_rate": 2.192779273338215e-07,
|
||
|
|
"logits/chosen": -0.6087943911552429,
|
||
|
|
"logits/rejected": -0.5693117380142212,
|
||
|
|
"logps/chosen": -89.16279602050781,
|
||
|
|
"logps/ref_chosen": -71.31782531738281,
|
||
|
|
"logps/ref_rejected": -70.8514404296875,
|
||
|
|
"logps/rejected": -104.97953796386719,
|
||
|
|
"loss": 0.3411,
|
||
|
|
"margin_dpo/margin_mean": 16.283123016357422,
|
||
|
|
"margin_dpo/margin_std": 15.363842964172363,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5882352941176471,
|
||
|
|
"eval_logits/chosen": -0.602095365524292,
|
||
|
|
"eval_logits/rejected": -0.5640405416488647,
|
||
|
|
"eval_logps/chosen": -119.31637573242188,
|
||
|
|
"eval_logps/ref_chosen": -97.0617446899414,
|
||
|
|
"eval_logps/ref_rejected": -80.18183135986328,
|
||
|
|
"eval_logps/rejected": -112.73600769042969,
|
||
|
|
"eval_loss": 0.4754122495651245,
|
||
|
|
"eval_margin_dpo/margin_mean": 10.299551010131836,
|
||
|
|
"eval_margin_dpo/margin_std": 14.652626991271973,
|
||
|
|
"eval_runtime": 20.3073,
|
||
|
|
"eval_samples_per_second": 115.18,
|
||
|
|
"eval_steps_per_second": 0.936,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6029411764705882,
|
||
|
|
"grad_norm": 19.535417556762695,
|
||
|
|
"learning_rate": 2.065879555832674e-07,
|
||
|
|
"logits/chosen": -0.5760528445243835,
|
||
|
|
"logits/rejected": -0.5279114842414856,
|
||
|
|
"logps/chosen": -104.2248764038086,
|
||
|
|
"logps/ref_chosen": -84.44103240966797,
|
||
|
|
"logps/ref_rejected": -71.78230285644531,
|
||
|
|
"logps/rejected": -104.95343017578125,
|
||
|
|
"loss": 0.3792,
|
||
|
|
"margin_dpo/margin_mean": 13.387273788452148,
|
||
|
|
"margin_dpo/margin_std": 14.807754516601562,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6176470588235294,
|
||
|
|
"grad_norm": 17.17575454711914,
|
||
|
|
"learning_rate": 1.9401235374032425e-07,
|
||
|
|
"logits/chosen": -0.6245664358139038,
|
||
|
|
"logits/rejected": -0.5699684619903564,
|
||
|
|
"logps/chosen": -101.36656188964844,
|
||
|
|
"logps/ref_chosen": -83.94493103027344,
|
||
|
|
"logps/ref_rejected": -76.44892120361328,
|
||
|
|
"logps/rejected": -108.5728988647461,
|
||
|
|
"loss": 0.3251,
|
||
|
|
"margin_dpo/margin_mean": 14.702362060546875,
|
||
|
|
"margin_dpo/margin_std": 16.377933502197266,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6323529411764706,
|
||
|
|
"grad_norm": 20.044084548950195,
|
||
|
|
"learning_rate": 1.8158425248197928e-07,
|
||
|
|
"logits/chosen": -0.5605936050415039,
|
||
|
|
"logits/rejected": -0.5190353393554688,
|
||
|
|
"logps/chosen": -102.8707275390625,
|
||
|
|
"logps/ref_chosen": -82.23881530761719,
|
||
|
|
"logps/ref_rejected": -85.1430892944336,
|
||
|
|
"logps/rejected": -122.053955078125,
|
||
|
|
"loss": 0.3633,
|
||
|
|
"margin_dpo/margin_mean": 16.278963088989258,
|
||
|
|
"margin_dpo/margin_std": 19.206457138061523,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6470588235294118,
|
||
|
|
"grad_norm": 21.036956787109375,
|
||
|
|
"learning_rate": 1.6933639389195134e-07,
|
||
|
|
"logits/chosen": -0.621160626411438,
|
||
|
|
"logits/rejected": -0.585429310798645,
|
||
|
|
"logps/chosen": -97.38944244384766,
|
||
|
|
"logps/ref_chosen": -76.5594482421875,
|
||
|
|
"logps/ref_rejected": -84.79225158691406,
|
||
|
|
"logps/rejected": -117.23432922363281,
|
||
|
|
"loss": 0.3587,
|
||
|
|
"margin_dpo/margin_mean": 11.612079620361328,
|
||
|
|
"margin_dpo/margin_std": 14.565820693969727,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6617647058823529,
|
||
|
|
"grad_norm": 21.023571014404297,
|
||
|
|
"learning_rate": 1.573010452010098e-07,
|
||
|
|
"logits/chosen": -0.6097210049629211,
|
||
|
|
"logits/rejected": -0.6041680574417114,
|
||
|
|
"logps/chosen": -87.20682525634766,
|
||
|
|
"logps/ref_chosen": -68.70957946777344,
|
||
|
|
"logps/ref_rejected": -95.65819549560547,
|
||
|
|
"logps/rejected": -132.78231811523438,
|
||
|
|
"loss": 0.3385,
|
||
|
|
"margin_dpo/margin_mean": 18.626880645751953,
|
||
|
|
"margin_dpo/margin_std": 18.950374603271484,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6764705882352942,
|
||
|
|
"grad_norm": 19.34729766845703,
|
||
|
|
"learning_rate": 1.4550991377830423e-07,
|
||
|
|
"logits/chosen": -0.6367233395576477,
|
||
|
|
"logits/rejected": -0.5984948873519897,
|
||
|
|
"logps/chosen": -92.71955871582031,
|
||
|
|
"logps/ref_chosen": -76.04148864746094,
|
||
|
|
"logps/ref_rejected": -98.15973663330078,
|
||
|
|
"logps/rejected": -129.41712951660156,
|
||
|
|
"loss": 0.3269,
|
||
|
|
"margin_dpo/margin_mean": 14.579324722290039,
|
||
|
|
"margin_dpo/margin_std": 14.860456466674805,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6911764705882353,
|
||
|
|
"grad_norm": 18.263099670410156,
|
||
|
|
"learning_rate": 1.339940635976592e-07,
|
||
|
|
"logits/chosen": -0.6155376434326172,
|
||
|
|
"logits/rejected": -0.5955866575241089,
|
||
|
|
"logps/chosen": -88.53390502929688,
|
||
|
|
"logps/ref_chosen": -70.64253997802734,
|
||
|
|
"logps/ref_rejected": -90.60277557373047,
|
||
|
|
"logps/rejected": -127.80912780761719,
|
||
|
|
"loss": 0.3347,
|
||
|
|
"margin_dpo/margin_mean": 19.314985275268555,
|
||
|
|
"margin_dpo/margin_std": 15.413273811340332,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7058823529411765,
|
||
|
|
"grad_norm": 21.18890380859375,
|
||
|
|
"learning_rate": 1.227838333989088e-07,
|
||
|
|
"logits/chosen": -0.5532498955726624,
|
||
|
|
"logits/rejected": -0.5167180299758911,
|
||
|
|
"logps/chosen": -94.69210052490234,
|
||
|
|
"logps/ref_chosen": -75.90282440185547,
|
||
|
|
"logps/ref_rejected": -70.22077178955078,
|
||
|
|
"logps/rejected": -106.57359313964844,
|
||
|
|
"loss": 0.3433,
|
||
|
|
"margin_dpo/margin_mean": 17.56354331970215,
|
||
|
|
"margin_dpo/margin_std": 16.671550750732422,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7205882352941176,
|
||
|
|
"grad_norm": 19.42283058166504,
|
||
|
|
"learning_rate": 1.1190875675987355e-07,
|
||
|
|
"logits/chosen": -0.5711519122123718,
|
||
|
|
"logits/rejected": -0.5506427884101868,
|
||
|
|
"logps/chosen": -87.87870788574219,
|
||
|
|
"logps/ref_chosen": -68.88108825683594,
|
||
|
|
"logps/ref_rejected": -102.547119140625,
|
||
|
|
"logps/rejected": -142.7686767578125,
|
||
|
|
"loss": 0.3073,
|
||
|
|
"margin_dpo/margin_mean": 21.223926544189453,
|
||
|
|
"margin_dpo/margin_std": 16.53793716430664,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7352941176470589,
|
||
|
|
"grad_norm": 21.975610733032227,
|
||
|
|
"learning_rate": 1.0139748428955333e-07,
|
||
|
|
"logits/chosen": -0.63815838098526,
|
||
|
|
"logits/rejected": -0.5797184705734253,
|
||
|
|
"logps/chosen": -104.53717041015625,
|
||
|
|
"logps/ref_chosen": -88.11860656738281,
|
||
|
|
"logps/ref_rejected": -85.85978698730469,
|
||
|
|
"logps/rejected": -118.47982025146484,
|
||
|
|
"loss": 0.4138,
|
||
|
|
"margin_dpo/margin_mean": 16.201473236083984,
|
||
|
|
"margin_dpo/margin_std": 15.055798530578613,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.75,
|
||
|
|
"grad_norm": 21.86973762512207,
|
||
|
|
"learning_rate": 9.127770814751932e-08,
|
||
|
|
"logits/chosen": -0.5965814590454102,
|
||
|
|
"logits/rejected": -0.5407648682594299,
|
||
|
|
"logps/chosen": -113.81512451171875,
|
||
|
|
"logps/ref_chosen": -93.02457427978516,
|
||
|
|
"logps/ref_rejected": -86.20562744140625,
|
||
|
|
"logps/rejected": -123.86918640136719,
|
||
|
|
"loss": 0.3314,
|
||
|
|
"margin_dpo/margin_mean": 16.87302017211914,
|
||
|
|
"margin_dpo/margin_std": 16.191524505615234,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7647058823529411,
|
||
|
|
"grad_norm": 20.748577117919922,
|
||
|
|
"learning_rate": 8.15760890883607e-08,
|
||
|
|
"logits/chosen": -0.5860427618026733,
|
||
|
|
"logits/rejected": -0.5433794856071472,
|
||
|
|
"logps/chosen": -98.30900573730469,
|
||
|
|
"logps/ref_chosen": -79.27108001708984,
|
||
|
|
"logps/ref_rejected": -94.08381652832031,
|
||
|
|
"logps/rejected": -133.5509796142578,
|
||
|
|
"loss": 0.3414,
|
||
|
|
"margin_dpo/margin_mean": 20.42922592163086,
|
||
|
|
"margin_dpo/margin_std": 16.98196029663086,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7794117647058824,
|
||
|
|
"grad_norm": 20.377286911010742,
|
||
|
|
"learning_rate": 7.231818622338822e-08,
|
||
|
|
"logits/chosen": -0.5678300857543945,
|
||
|
|
"logits/rejected": -0.5425071120262146,
|
||
|
|
"logps/chosen": -99.11347198486328,
|
||
|
|
"logps/ref_chosen": -79.24869537353516,
|
||
|
|
"logps/ref_rejected": -92.03797912597656,
|
||
|
|
"logps/rejected": -126.92435455322266,
|
||
|
|
"loss": 0.3493,
|
||
|
|
"margin_dpo/margin_mean": 15.021594047546387,
|
||
|
|
"margin_dpo/margin_std": 12.837465286254883,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7941176470588235,
|
||
|
|
"grad_norm": 17.822444915771484,
|
||
|
|
"learning_rate": 6.352838968463919e-08,
|
||
|
|
"logits/chosen": -0.606745719909668,
|
||
|
|
"logits/rejected": -0.5473134517669678,
|
||
|
|
"logps/chosen": -97.48078918457031,
|
||
|
|
"logps/ref_chosen": -80.15914154052734,
|
||
|
|
"logps/ref_rejected": -82.13599395751953,
|
||
|
|
"logps/rejected": -116.37190246582031,
|
||
|
|
"loss": 0.332,
|
||
|
|
"margin_dpo/margin_mean": 16.91426658630371,
|
||
|
|
"margin_dpo/margin_std": 14.53496265411377,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8088235294117647,
|
||
|
|
"grad_norm": 20.570648193359375,
|
||
|
|
"learning_rate": 5.5229856368582376e-08,
|
||
|
|
"logits/chosen": -0.6010477542877197,
|
||
|
|
"logits/rejected": -0.5661951899528503,
|
||
|
|
"logps/chosen": -99.41848754882812,
|
||
|
|
"logps/ref_chosen": -78.87225341796875,
|
||
|
|
"logps/ref_rejected": -84.97318267822266,
|
||
|
|
"logps/rejected": -122.4229965209961,
|
||
|
|
"loss": 0.3348,
|
||
|
|
"margin_dpo/margin_mean": 16.90357780456543,
|
||
|
|
"margin_dpo/margin_std": 20.21615219116211,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8235294117647058,
|
||
|
|
"grad_norm": 18.737754821777344,
|
||
|
|
"learning_rate": 4.7444448928806615e-08,
|
||
|
|
"logits/chosen": -0.5662145018577576,
|
||
|
|
"logits/rejected": -0.525722324848175,
|
||
|
|
"logps/chosen": -117.15876770019531,
|
||
|
|
"logps/ref_chosen": -96.47113800048828,
|
||
|
|
"logps/ref_rejected": -113.1217041015625,
|
||
|
|
"logps/rejected": -154.00479125976562,
|
||
|
|
"loss": 0.3329,
|
||
|
|
"margin_dpo/margin_mean": 20.195457458496094,
|
||
|
|
"margin_dpo/margin_std": 19.39859390258789,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8382352941176471,
|
||
|
|
"grad_norm": 21.463726043701172,
|
||
|
|
"learning_rate": 4.019267817841834e-08,
|
||
|
|
"logits/chosen": -0.630197286605835,
|
||
|
|
"logits/rejected": -0.5674210786819458,
|
||
|
|
"logps/chosen": -111.90663146972656,
|
||
|
|
"logps/ref_chosen": -91.53522491455078,
|
||
|
|
"logps/ref_rejected": -76.2660140991211,
|
||
|
|
"logps/rejected": -114.01655578613281,
|
||
|
|
"loss": 0.3382,
|
||
|
|
"margin_dpo/margin_mean": 17.379127502441406,
|
||
|
|
"margin_dpo/margin_std": 17.829914093017578,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8529411764705882,
|
||
|
|
"grad_norm": 18.62375831604004,
|
||
|
|
"learning_rate": 3.349364905389032e-08,
|
||
|
|
"logits/chosen": -0.5863774418830872,
|
||
|
|
"logits/rejected": -0.5456980466842651,
|
||
|
|
"logps/chosen": -98.92496490478516,
|
||
|
|
"logps/ref_chosen": -78.96186828613281,
|
||
|
|
"logps/ref_rejected": -78.63177490234375,
|
||
|
|
"logps/rejected": -117.43675231933594,
|
||
|
|
"loss": 0.3409,
|
||
|
|
"margin_dpo/margin_mean": 18.841894149780273,
|
||
|
|
"margin_dpo/margin_std": 18.295745849609375,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8676470588235294,
|
||
|
|
"grad_norm": 16.586910247802734,
|
||
|
|
"learning_rate": 2.736501028272095e-08,
|
||
|
|
"logits/chosen": -0.5259509086608887,
|
||
|
|
"logits/rejected": -0.5359938144683838,
|
||
|
|
"logps/chosen": -85.10719299316406,
|
||
|
|
"logps/ref_chosen": -64.14302825927734,
|
||
|
|
"logps/ref_rejected": -98.70811462402344,
|
||
|
|
"logps/rejected": -135.39389038085938,
|
||
|
|
"loss": 0.3351,
|
||
|
|
"margin_dpo/margin_mean": 15.721613883972168,
|
||
|
|
"margin_dpo/margin_std": 16.5610294342041,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8823529411764706,
|
||
|
|
"grad_norm": 19.39561653137207,
|
||
|
|
"learning_rate": 2.1822907887504932e-08,
|
||
|
|
"logits/chosen": -0.5196036696434021,
|
||
|
|
"logits/rejected": -0.5250274538993835,
|
||
|
|
"logps/chosen": -80.19596099853516,
|
||
|
|
"logps/ref_chosen": -59.2784423828125,
|
||
|
|
"logps/ref_rejected": -91.62141418457031,
|
||
|
|
"logps/rejected": -130.80763244628906,
|
||
|
|
"loss": 0.3552,
|
||
|
|
"margin_dpo/margin_mean": 18.2686824798584,
|
||
|
|
"margin_dpo/margin_std": 16.341278076171875,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8823529411764706,
|
||
|
|
"eval_logits/chosen": -0.5876314640045166,
|
||
|
|
"eval_logits/rejected": -0.5494834184646606,
|
||
|
|
"eval_logps/chosen": -119.7147216796875,
|
||
|
|
"eval_logps/ref_chosen": -97.0617446899414,
|
||
|
|
"eval_logps/ref_rejected": -80.18183135986328,
|
||
|
|
"eval_logps/rejected": -113.95352935791016,
|
||
|
|
"eval_loss": 0.4588142931461334,
|
||
|
|
"eval_margin_dpo/margin_mean": 11.118718147277832,
|
||
|
|
"eval_margin_dpo/margin_std": 15.069600105285645,
|
||
|
|
"eval_runtime": 20.3107,
|
||
|
|
"eval_samples_per_second": 115.161,
|
||
|
|
"eval_steps_per_second": 0.935,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8970588235294118,
|
||
|
|
"grad_norm": 20.72559356689453,
|
||
|
|
"learning_rate": 1.6881942648911074e-08,
|
||
|
|
"logits/chosen": -0.6059945821762085,
|
||
|
|
"logits/rejected": -0.5594589710235596,
|
||
|
|
"logps/chosen": -110.14324951171875,
|
||
|
|
"logps/ref_chosen": -90.05252838134766,
|
||
|
|
"logps/ref_rejected": -93.02938842773438,
|
||
|
|
"logps/rejected": -134.03268432617188,
|
||
|
|
"loss": 0.3241,
|
||
|
|
"margin_dpo/margin_mean": 20.912582397460938,
|
||
|
|
"margin_dpo/margin_std": 15.790578842163086,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9117647058823529,
|
||
|
|
"grad_norm": 18.592208862304688,
|
||
|
|
"learning_rate": 1.2555131639630567e-08,
|
||
|
|
"logits/chosen": -0.5199320316314697,
|
||
|
|
"logits/rejected": -0.48348456621170044,
|
||
|
|
"logps/chosen": -99.32337951660156,
|
||
|
|
"logps/ref_chosen": -76.26285552978516,
|
||
|
|
"logps/ref_rejected": -81.56607055664062,
|
||
|
|
"logps/rejected": -121.98432922363281,
|
||
|
|
"loss": 0.3336,
|
||
|
|
"margin_dpo/margin_mean": 17.357715606689453,
|
||
|
|
"margin_dpo/margin_std": 17.407108306884766,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9264705882352942,
|
||
|
|
"grad_norm": 19.586881637573242,
|
||
|
|
"learning_rate": 8.85387393063622e-09,
|
||
|
|
"logits/chosen": -0.5956140160560608,
|
||
|
|
"logits/rejected": -0.5609453916549683,
|
||
|
|
"logps/chosen": -108.92083740234375,
|
||
|
|
"logps/ref_chosen": -89.47105407714844,
|
||
|
|
"logps/ref_rejected": -92.69927215576172,
|
||
|
|
"logps/rejected": -129.36099243164062,
|
||
|
|
"loss": 0.3444,
|
||
|
|
"margin_dpo/margin_mean": 17.211929321289062,
|
||
|
|
"margin_dpo/margin_std": 18.306108474731445,
|
||
|
|
"step": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9411764705882353,
|
||
|
|
"grad_norm": 21.697298049926758,
|
||
|
|
"learning_rate": 5.7879205600998296e-09,
|
||
|
|
"logits/chosen": -0.6173444986343384,
|
||
|
|
"logits/rejected": -0.5614223480224609,
|
||
|
|
"logps/chosen": -98.2002182006836,
|
||
|
|
"logps/ref_chosen": -76.45301818847656,
|
||
|
|
"logps/ref_rejected": -65.2257308959961,
|
||
|
|
"logps/rejected": -102.35930633544922,
|
||
|
|
"loss": 0.3732,
|
||
|
|
"margin_dpo/margin_mean": 15.386384963989258,
|
||
|
|
"margin_dpo/margin_std": 15.031097412109375,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9558823529411765,
|
||
|
|
"grad_norm": 22.468570709228516,
|
||
|
|
"learning_rate": 3.3653488440851253e-09,
|
||
|
|
"logits/chosen": -0.5936331152915955,
|
||
|
|
"logits/rejected": -0.5392800569534302,
|
||
|
|
"logps/chosen": -89.80387878417969,
|
||
|
|
"logps/ref_chosen": -71.98212432861328,
|
||
|
|
"logps/ref_rejected": -68.71195983886719,
|
||
|
|
"logps/rejected": -102.19793701171875,
|
||
|
|
"loss": 0.3374,
|
||
|
|
"margin_dpo/margin_mean": 15.664227485656738,
|
||
|
|
"margin_dpo/margin_std": 12.905950546264648,
|
||
|
|
"step": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9705882352941176,
|
||
|
|
"grad_norm": 18.491226196289062,
|
||
|
|
"learning_rate": 1.592541096695571e-09,
|
||
|
|
"logits/chosen": -0.5897213816642761,
|
||
|
|
"logits/rejected": -0.5493496656417847,
|
||
|
|
"logps/chosen": -95.72080993652344,
|
||
|
|
"logps/ref_chosen": -77.13968658447266,
|
||
|
|
"logps/ref_rejected": -93.0115737915039,
|
||
|
|
"logps/rejected": -132.95394897460938,
|
||
|
|
"loss": 0.3212,
|
||
|
|
"margin_dpo/margin_mean": 21.361230850219727,
|
||
|
|
"margin_dpo/margin_std": 19.999116897583008,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9852941176470589,
|
||
|
|
"grad_norm": 17.843168258666992,
|
||
|
|
"learning_rate": 4.741678157389739e-10,
|
||
|
|
"logits/chosen": -0.5449101328849792,
|
||
|
|
"logits/rejected": -0.506639301776886,
|
||
|
|
"logps/chosen": -97.75109100341797,
|
||
|
|
"logps/ref_chosen": -78.12508392333984,
|
||
|
|
"logps/ref_rejected": -73.1583480834961,
|
||
|
|
"logps/rejected": -106.00955963134766,
|
||
|
|
"loss": 0.3225,
|
||
|
|
"margin_dpo/margin_mean": 13.225196838378906,
|
||
|
|
"margin_dpo/margin_std": 12.341458320617676,
|
||
|
|
"step": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 19.59518051147461,
|
||
|
|
"learning_rate": 1.31753782067201e-11,
|
||
|
|
"logits/chosen": -0.5786937475204468,
|
||
|
|
"logits/rejected": -0.544124186038971,
|
||
|
|
"logps/chosen": -85.4710922241211,
|
||
|
|
"logps/ref_chosen": -64.36441802978516,
|
||
|
|
"logps/ref_rejected": -73.83573913574219,
|
||
|
|
"logps/rejected": -113.6316146850586,
|
||
|
|
"loss": 0.3138,
|
||
|
|
"margin_dpo/margin_mean": 18.689212799072266,
|
||
|
|
"margin_dpo/margin_std": 18.127058029174805,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"step": 340,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.4133688477908864,
|
||
|
|
"train_runtime": 1436.8705,
|
||
|
|
"train_samples_per_second": 30.342,
|
||
|
|
"train_steps_per_second": 0.237
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 340,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 200,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 16,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|