Files
llama-3-8b-base-beta-dpo-hh…/trainer_state.json

1055 lines
37 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 340,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"beta_dpo/beta_used": 0.10121209919452667,
"beta_dpo/beta_used_raw": 0.10121209919452667,
"beta_dpo/gap_mean": -0.0009442940354347229,
"beta_dpo/gap_std": 0.03691839799284935,
"beta_dpo/mask_keep_frac": 0.9375,
"epoch": 0.0029411764705882353,
"grad_norm": 23.302410125732422,
"learning_rate": 0.0,
"logits/chosen": -0.4739703834056854,
"logits/rejected": -0.44689586758613586,
"loss": 0.6919,
"step": 1
},
{
"beta_dpo/beta_used": 0.10032124072313309,
"beta_dpo/beta_used_raw": 0.10032124072313309,
"beta_dpo/gap_mean": -0.0016960372449830174,
"beta_dpo/gap_std": 0.1151522547006607,
"beta_dpo/mask_keep_frac": 0.765625,
"epoch": 0.014705882352941176,
"grad_norm": 24.834075927734375,
"learning_rate": 5.88235294117647e-08,
"logits/chosen": -0.49943581223487854,
"logits/rejected": -0.4934660494327545,
"loss": 0.693,
"step": 5
},
{
"beta_dpo/beta_used": 0.101251520216465,
"beta_dpo/beta_used_raw": 0.101251520216465,
"beta_dpo/gap_mean": 0.0030363830737769604,
"beta_dpo/gap_std": 0.2163175642490387,
"beta_dpo/mask_keep_frac": 0.824999988079071,
"epoch": 0.029411764705882353,
"grad_norm": 21.942047119140625,
"learning_rate": 1.3235294117647057e-07,
"logits/chosen": -0.5174359083175659,
"logits/rejected": -0.5005401968955994,
"loss": 0.692,
"step": 10
},
{
"beta_dpo/beta_used": 0.10108586400747299,
"beta_dpo/beta_used_raw": 0.10108586400747299,
"beta_dpo/gap_mean": 0.024518460035324097,
"beta_dpo/gap_std": 0.2784799039363861,
"beta_dpo/mask_keep_frac": 0.824999988079071,
"epoch": 0.04411764705882353,
"grad_norm": 28.207460403442383,
"learning_rate": 2.0588235294117645e-07,
"logits/chosen": -0.5348216891288757,
"logits/rejected": -0.5156930088996887,
"loss": 0.6911,
"step": 15
},
{
"beta_dpo/beta_used": 0.10244777053594589,
"beta_dpo/beta_used_raw": 0.10244777053594589,
"beta_dpo/gap_mean": 0.0749056339263916,
"beta_dpo/gap_std": 0.33879655599594116,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.058823529411764705,
"grad_norm": 19.902040481567383,
"learning_rate": 2.7941176470588235e-07,
"logits/chosen": -0.5660465955734253,
"logits/rejected": -0.5419166088104248,
"loss": 0.6874,
"step": 20
},
{
"beta_dpo/beta_used": 0.10622622072696686,
"beta_dpo/beta_used_raw": 0.10622622072696686,
"beta_dpo/gap_mean": 0.20916345715522766,
"beta_dpo/gap_std": 0.456662118434906,
"beta_dpo/mask_keep_frac": 0.887499988079071,
"epoch": 0.07352941176470588,
"grad_norm": 22.522640228271484,
"learning_rate": 3.529411764705882e-07,
"logits/chosen": -0.5110368132591248,
"logits/rejected": -0.5050845146179199,
"loss": 0.6769,
"step": 25
},
{
"beta_dpo/beta_used": 0.10997174680233002,
"beta_dpo/beta_used_raw": 0.10997174680233002,
"beta_dpo/gap_mean": 0.5209842920303345,
"beta_dpo/gap_std": 0.7702666521072388,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.08823529411764706,
"grad_norm": 19.37394142150879,
"learning_rate": 4.264705882352941e-07,
"logits/chosen": -0.5535926222801208,
"logits/rejected": -0.5316442251205444,
"loss": 0.6574,
"step": 30
},
{
"beta_dpo/beta_used": 0.11611036211252213,
"beta_dpo/beta_used_raw": 0.11611036211252213,
"beta_dpo/gap_mean": 0.9489548802375793,
"beta_dpo/gap_std": 1.3326656818389893,
"beta_dpo/mask_keep_frac": 0.6625000238418579,
"epoch": 0.10294117647058823,
"grad_norm": 23.465280532836914,
"learning_rate": 5e-07,
"logits/chosen": -0.5605362057685852,
"logits/rejected": -0.5497816801071167,
"loss": 0.6265,
"step": 35
},
{
"beta_dpo/beta_used": 0.1326821744441986,
"beta_dpo/beta_used_raw": 0.1326821744441986,
"beta_dpo/gap_mean": 1.789758324623108,
"beta_dpo/gap_std": 2.447655200958252,
"beta_dpo/mask_keep_frac": 0.7749999761581421,
"epoch": 0.11764705882352941,
"grad_norm": 20.867738723754883,
"learning_rate": 4.996706849759452e-07,
"logits/chosen": -0.6393685340881348,
"logits/rejected": -0.6073721051216125,
"loss": 0.5663,
"step": 40
},
{
"beta_dpo/beta_used": 0.12091531604528427,
"beta_dpo/beta_used_raw": 0.12091531604528427,
"beta_dpo/gap_mean": 2.8082375526428223,
"beta_dpo/gap_std": 4.084892272949219,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.1323529411764706,
"grad_norm": 19.109943389892578,
"learning_rate": 4.986836074908615e-07,
"logits/chosen": -0.6684064865112305,
"logits/rejected": -0.6361075639724731,
"loss": 0.5411,
"step": 45
},
{
"beta_dpo/beta_used": 0.11514081805944443,
"beta_dpo/beta_used_raw": 0.11514081805944443,
"beta_dpo/gap_mean": 3.5657267570495605,
"beta_dpo/gap_std": 5.662721633911133,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.14705882352941177,
"grad_norm": 22.54947853088379,
"learning_rate": 4.970413680203148e-07,
"logits/chosen": -0.6647295951843262,
"logits/rejected": -0.6276803612709045,
"loss": 0.536,
"step": 50
},
{
"beta_dpo/beta_used": 0.13137957453727722,
"beta_dpo/beta_used_raw": 0.13137957453727722,
"beta_dpo/gap_mean": 4.35926628112793,
"beta_dpo/gap_std": 7.092940330505371,
"beta_dpo/mask_keep_frac": 0.7250000238418579,
"epoch": 0.16176470588235295,
"grad_norm": 38.691123962402344,
"learning_rate": 4.947482930773511e-07,
"logits/chosen": -0.7010586261749268,
"logits/rejected": -0.675391435623169,
"loss": 0.493,
"step": 55
},
{
"beta_dpo/beta_used": 0.094205841422081,
"beta_dpo/beta_used_raw": 0.094205841422081,
"beta_dpo/gap_mean": 5.047989845275879,
"beta_dpo/gap_std": 8.23731803894043,
"beta_dpo/mask_keep_frac": 0.7250000238418579,
"epoch": 0.17647058823529413,
"grad_norm": 31.127901077270508,
"learning_rate": 4.918104238142103e-07,
"logits/chosen": -0.724422812461853,
"logits/rejected": -0.6809322237968445,
"loss": 0.5315,
"step": 60
},
{
"beta_dpo/beta_used": 0.11677428334951401,
"beta_dpo/beta_used_raw": 0.11677428334951401,
"beta_dpo/gap_mean": 5.827352523803711,
"beta_dpo/gap_std": 8.861337661743164,
"beta_dpo/mask_keep_frac": 0.8500000238418579,
"epoch": 0.19117647058823528,
"grad_norm": 20.432043075561523,
"learning_rate": 4.882355001067891e-07,
"logits/chosen": -0.6648474931716919,
"logits/rejected": -0.637535572052002,
"loss": 0.4741,
"step": 65
},
{
"beta_dpo/beta_used": 0.09036926180124283,
"beta_dpo/beta_used_raw": 0.09036926180124283,
"beta_dpo/gap_mean": 6.462141990661621,
"beta_dpo/gap_std": 9.157753944396973,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.20588235294117646,
"grad_norm": 24.550621032714844,
"learning_rate": 4.840329401637809e-07,
"logits/chosen": -0.6986874341964722,
"logits/rejected": -0.6637295484542847,
"loss": 0.5026,
"step": 70
},
{
"beta_dpo/beta_used": 0.07552285492420197,
"beta_dpo/beta_used_raw": 0.0741354450583458,
"beta_dpo/gap_mean": 6.905457496643066,
"beta_dpo/gap_std": 9.706171035766602,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.22058823529411764,
"grad_norm": 21.727449417114258,
"learning_rate": 4.792138157142157e-07,
"logits/chosen": -0.7469242215156555,
"logits/rejected": -0.7223338484764099,
"loss": 0.5172,
"step": 75
},
{
"beta_dpo/beta_used": 0.09862758219242096,
"beta_dpo/beta_used_raw": 0.09059171378612518,
"beta_dpo/gap_mean": 7.501389980316162,
"beta_dpo/gap_std": 10.180580139160156,
"beta_dpo/mask_keep_frac": 0.8374999761581421,
"epoch": 0.23529411764705882,
"grad_norm": 0.3042762279510498,
"learning_rate": 4.737908228387656e-07,
"logits/chosen": -0.7543509602546692,
"logits/rejected": -0.7017374038696289,
"loss": 0.4756,
"step": 80
},
{
"beta_dpo/beta_used": 0.06848205626010895,
"beta_dpo/beta_used_raw": 0.05231575295329094,
"beta_dpo/gap_mean": 7.9440507888793945,
"beta_dpo/gap_std": 10.780364990234375,
"beta_dpo/mask_keep_frac": 0.800000011920929,
"epoch": 0.25,
"grad_norm": 0.3057352602481842,
"learning_rate": 4.6777824852166437e-07,
"logits/chosen": -0.7203555107116699,
"logits/rejected": -0.6912198066711426,
"loss": 0.5345,
"step": 85
},
{
"beta_dpo/beta_used": 0.08253253251314163,
"beta_dpo/beta_used_raw": 0.08253253251314163,
"beta_dpo/gap_mean": 8.4508638381958,
"beta_dpo/gap_std": 11.448507308959961,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.2647058823529412,
"grad_norm": 21.29926300048828,
"learning_rate": 4.611919330113591e-07,
"logits/chosen": -0.6781951189041138,
"logits/rejected": -0.6568866968154907,
"loss": 0.4944,
"step": 90
},
{
"beta_dpo/beta_used": 0.05684714391827583,
"beta_dpo/beta_used_raw": 0.05684714391827583,
"beta_dpo/gap_mean": 8.875980377197266,
"beta_dpo/gap_std": 12.020231246948242,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.27941176470588236,
"grad_norm": 15.502776145935059,
"learning_rate": 4.5404922808905543e-07,
"logits/chosen": -0.7086650729179382,
"logits/rejected": -0.6651682257652283,
"loss": 0.5335,
"step": 95
},
{
"beta_dpo/beta_used": 0.1179933100938797,
"beta_dpo/beta_used_raw": 0.1179933100938797,
"beta_dpo/gap_mean": 9.62360954284668,
"beta_dpo/gap_std": 12.684171676635742,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.29411764705882354,
"grad_norm": 34.129478454589844,
"learning_rate": 4.4636895135509966e-07,
"logits/chosen": -0.6843082904815674,
"logits/rejected": -0.660758912563324,
"loss": 0.4098,
"step": 100
},
{
"epoch": 0.29411764705882354,
"eval_beta_dpo/beta_used": 0.04330332204699516,
"eval_beta_dpo/beta_used_raw": 0.01594320312142372,
"eval_beta_dpo/gap_mean": 7.997772216796875,
"eval_beta_dpo/gap_std": 13.260690689086914,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -0.6977978944778442,
"eval_logits/rejected": -0.6668843626976013,
"eval_loss": 0.6251118183135986,
"eval_runtime": 20.4115,
"eval_samples_per_second": 114.592,
"eval_steps_per_second": 0.931,
"step": 100
},
{
"beta_dpo/beta_used": 0.15054886043071747,
"beta_dpo/beta_used_raw": 0.15054886043071747,
"beta_dpo/gap_mean": 7.8849334716796875,
"beta_dpo/gap_std": 13.30543041229248,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.3088235294117647,
"grad_norm": 15.665854454040527,
"learning_rate": 4.381713366536311e-07,
"logits/chosen": -0.7553393244743347,
"logits/rejected": -0.710943341255188,
"loss": 0.4061,
"step": 105
},
{
"beta_dpo/beta_used": 0.09847154468297958,
"beta_dpo/beta_used_raw": 0.09847154468297958,
"beta_dpo/gap_mean": 9.127924919128418,
"beta_dpo/gap_std": 13.331835746765137,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.3235294117647059,
"grad_norm": 7.982070446014404,
"learning_rate": 4.2947798076611047e-07,
"logits/chosen": -0.7284727692604065,
"logits/rejected": -0.696673572063446,
"loss": 0.4595,
"step": 110
},
{
"beta_dpo/beta_used": 0.12598751485347748,
"beta_dpo/beta_used_raw": 0.12598751485347748,
"beta_dpo/gap_mean": 9.942410469055176,
"beta_dpo/gap_std": 13.166864395141602,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.3382352941176471,
"grad_norm": 36.213523864746094,
"learning_rate": 4.203117865141635e-07,
"logits/chosen": -0.7145182490348816,
"logits/rejected": -0.6985291242599487,
"loss": 0.3778,
"step": 115
},
{
"beta_dpo/beta_used": 0.05509430170059204,
"beta_dpo/beta_used_raw": 0.048566654324531555,
"beta_dpo/gap_mean": 10.542096138000488,
"beta_dpo/gap_std": 13.39216136932373,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.35294117647058826,
"grad_norm": 14.08332347869873,
"learning_rate": 4.106969024216348e-07,
"logits/chosen": -0.7127692103385925,
"logits/rejected": -0.6740670204162598,
"loss": 0.5271,
"step": 120
},
{
"beta_dpo/beta_used": 0.05550508573651314,
"beta_dpo/beta_used_raw": 0.05064947530627251,
"beta_dpo/gap_mean": 11.009790420532227,
"beta_dpo/gap_std": 13.461648941040039,
"beta_dpo/mask_keep_frac": 0.824999988079071,
"epoch": 0.36764705882352944,
"grad_norm": 1.2771987915039062,
"learning_rate": 4.006586590948141e-07,
"logits/chosen": -0.7180671691894531,
"logits/rejected": -0.6869726777076721,
"loss": 0.513,
"step": 125
},
{
"beta_dpo/beta_used": 0.05528440326452255,
"beta_dpo/beta_used_raw": 0.05528440326452255,
"beta_dpo/gap_mean": 11.50378704071045,
"beta_dpo/gap_std": 14.039319038391113,
"beta_dpo/mask_keep_frac": 0.7749999761581421,
"epoch": 0.38235294117647056,
"grad_norm": 5.609388828277588,
"learning_rate": 3.9022350248844246e-07,
"logits/chosen": -0.7099085450172424,
"logits/rejected": -0.6715607643127441,
"loss": 0.5068,
"step": 130
},
{
"beta_dpo/beta_used": 0.08324670791625977,
"beta_dpo/beta_used_raw": 0.08324670791625977,
"beta_dpo/gap_mean": 12.224153518676758,
"beta_dpo/gap_std": 15.014795303344727,
"beta_dpo/mask_keep_frac": 0.7749999761581421,
"epoch": 0.39705882352941174,
"grad_norm": 22.845937728881836,
"learning_rate": 3.794189242333106e-07,
"logits/chosen": -0.7034512758255005,
"logits/rejected": -0.6600346565246582,
"loss": 0.4231,
"step": 135
},
{
"beta_dpo/beta_used": 0.10875506699085236,
"beta_dpo/beta_used_raw": 0.10875506699085236,
"beta_dpo/gap_mean": 13.073277473449707,
"beta_dpo/gap_std": 15.834657669067383,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.4117647058823529,
"grad_norm": 31.461519241333008,
"learning_rate": 3.6827338920900253e-07,
"logits/chosen": -0.6739553213119507,
"logits/rejected": -0.6366498470306396,
"loss": 0.3902,
"step": 140
},
{
"beta_dpo/beta_used": 0.10356837511062622,
"beta_dpo/beta_used_raw": 0.10356837511062622,
"beta_dpo/gap_mean": 13.881492614746094,
"beta_dpo/gap_std": 16.42840003967285,
"beta_dpo/mask_keep_frac": 0.7250000238418579,
"epoch": 0.4264705882352941,
"grad_norm": 29.94761085510254,
"learning_rate": 3.568162605525952e-07,
"logits/chosen": -0.7220578193664551,
"logits/rejected": -0.6809359788894653,
"loss": 0.3819,
"step": 145
},
{
"beta_dpo/beta_used": 0.019714761525392532,
"beta_dpo/beta_used_raw": -0.023198971524834633,
"beta_dpo/gap_mean": 14.227258682250977,
"beta_dpo/gap_std": 17.448183059692383,
"beta_dpo/mask_keep_frac": 0.862500011920929,
"epoch": 0.4411764705882353,
"grad_norm": 5.527612209320068,
"learning_rate": 3.4507772230088147e-07,
"logits/chosen": -0.6326473355293274,
"logits/rejected": -0.6032054424285889,
"loss": 0.607,
"step": 150
},
{
"beta_dpo/beta_used": 0.07979521155357361,
"beta_dpo/beta_used_raw": 0.06505511701107025,
"beta_dpo/gap_mean": 14.670598983764648,
"beta_dpo/gap_std": 18.554828643798828,
"beta_dpo/mask_keep_frac": 0.699999988079071,
"epoch": 0.45588235294117646,
"grad_norm": 23.10860824584961,
"learning_rate": 3.3308869986991487e-07,
"logits/chosen": -0.7037164568901062,
"logits/rejected": -0.6613154411315918,
"loss": 0.4478,
"step": 155
},
{
"beta_dpo/beta_used": 0.06584476679563522,
"beta_dpo/beta_used_raw": 0.040607184171676636,
"beta_dpo/gap_mean": 15.389450073242188,
"beta_dpo/gap_std": 19.081418991088867,
"beta_dpo/mask_keep_frac": 0.8374999761581421,
"epoch": 0.47058823529411764,
"grad_norm": 31.426233291625977,
"learning_rate": 3.208807785813777e-07,
"logits/chosen": -0.6574662923812866,
"logits/rejected": -0.630233883857727,
"loss": 0.4758,
"step": 160
},
{
"beta_dpo/beta_used": 0.08810704201459885,
"beta_dpo/beta_used_raw": 0.07671914994716644,
"beta_dpo/gap_mean": 16.22821617126465,
"beta_dpo/gap_std": 19.637792587280273,
"beta_dpo/mask_keep_frac": 0.800000011920929,
"epoch": 0.4852941176470588,
"grad_norm": 73.11759948730469,
"learning_rate": 3.084861204504122e-07,
"logits/chosen": -0.6618590354919434,
"logits/rejected": -0.6243924498558044,
"loss": 0.4768,
"step": 165
},
{
"beta_dpo/beta_used": 0.02722100354731083,
"beta_dpo/beta_used_raw": 0.02408101223409176,
"beta_dpo/gap_mean": 17.07744598388672,
"beta_dpo/gap_std": 20.277606964111328,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.5,
"grad_norm": 0.5254238247871399,
"learning_rate": 2.959373794541426e-07,
"logits/chosen": -0.6966148614883423,
"logits/rejected": -0.666491687297821,
"loss": 0.5686,
"step": 170
},
{
"beta_dpo/beta_used": 0.0664793998003006,
"beta_dpo/beta_used_raw": 0.03722615912556648,
"beta_dpo/gap_mean": 17.654155731201172,
"beta_dpo/gap_std": 21.08226776123047,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5147058823529411,
"grad_norm": 0.5429490804672241,
"learning_rate": 2.8326761550411346e-07,
"logits/chosen": -0.6848665475845337,
"logits/rejected": -0.6613831520080566,
"loss": 0.4753,
"step": 175
},
{
"beta_dpo/beta_used": 0.024588093161582947,
"beta_dpo/beta_used_raw": -0.008070843294262886,
"beta_dpo/gap_mean": 17.7331485748291,
"beta_dpo/gap_std": 22.08762550354004,
"beta_dpo/mask_keep_frac": 0.824999988079071,
"epoch": 0.5294117647058824,
"grad_norm": 15.737401962280273,
"learning_rate": 2.7051020734928443e-07,
"logits/chosen": -0.6688377261161804,
"logits/rejected": -0.6460214853286743,
"loss": 0.5579,
"step": 180
},
{
"beta_dpo/beta_used": 0.08162590861320496,
"beta_dpo/beta_used_raw": 0.05483890324831009,
"beta_dpo/gap_mean": 18.408456802368164,
"beta_dpo/gap_std": 22.61962890625,
"beta_dpo/mask_keep_frac": 0.862500011920929,
"epoch": 0.5441176470588235,
"grad_norm": 0.506800651550293,
"learning_rate": 2.5769876463904263e-07,
"logits/chosen": -0.7215656042098999,
"logits/rejected": -0.6699239611625671,
"loss": 0.4855,
"step": 185
},
{
"beta_dpo/beta_used": 0.05661209672689438,
"beta_dpo/beta_used_raw": 0.027242619544267654,
"beta_dpo/gap_mean": 18.814666748046875,
"beta_dpo/gap_std": 22.990680694580078,
"beta_dpo/mask_keep_frac": 0.824999988079071,
"epoch": 0.5588235294117647,
"grad_norm": 26.899166107177734,
"learning_rate": 2.4486703937790243e-07,
"logits/chosen": -0.7019311785697937,
"logits/rejected": -0.6498968005180359,
"loss": 0.4949,
"step": 190
},
{
"beta_dpo/beta_used": 0.02816765382885933,
"beta_dpo/beta_used_raw": 0.001962479902431369,
"beta_dpo/gap_mean": 19.533567428588867,
"beta_dpo/gap_std": 23.629451751708984,
"beta_dpo/mask_keep_frac": 0.824999988079071,
"epoch": 0.5735294117647058,
"grad_norm": 11.008431434631348,
"learning_rate": 2.320488370051681e-07,
"logits/chosen": -0.7254117727279663,
"logits/rejected": -0.6765154004096985,
"loss": 0.5286,
"step": 195
},
{
"beta_dpo/beta_used": 0.0643467828631401,
"beta_dpo/beta_used_raw": -0.0015767127042636275,
"beta_dpo/gap_mean": 20.06936264038086,
"beta_dpo/gap_std": 24.53436851501465,
"beta_dpo/mask_keep_frac": 0.862500011920929,
"epoch": 0.5882352941176471,
"grad_norm": 78.41595458984375,
"learning_rate": 2.192779273338215e-07,
"logits/chosen": -0.6875912547111511,
"logits/rejected": -0.6458339095115662,
"loss": 0.5527,
"step": 200
},
{
"epoch": 0.5882352941176471,
"eval_beta_dpo/beta_used": 0.03733323514461517,
"eval_beta_dpo/beta_used_raw": -0.0800742357969284,
"eval_beta_dpo/gap_mean": 17.105911254882812,
"eval_beta_dpo/gap_std": 25.945871353149414,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -0.6982784271240234,
"eval_logits/rejected": -0.6586927771568298,
"eval_loss": 0.6420564651489258,
"eval_runtime": 20.3208,
"eval_samples_per_second": 115.104,
"eval_steps_per_second": 0.935,
"step": 200
},
{
"beta_dpo/beta_used": 0.18078216910362244,
"beta_dpo/beta_used_raw": 0.18078216910362244,
"beta_dpo/gap_mean": 16.670331954956055,
"beta_dpo/gap_std": 27.034832000732422,
"beta_dpo/mask_keep_frac": 0.887499988079071,
"epoch": 0.6029411764705882,
"grad_norm": 76.40715789794922,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": -0.6358317136764526,
"logits/rejected": -0.5890509486198425,
"loss": 0.4449,
"step": 205
},
{
"beta_dpo/beta_used": 0.187847301363945,
"beta_dpo/beta_used_raw": 0.187847301363945,
"beta_dpo/gap_mean": 18.74222183227539,
"beta_dpo/gap_std": 27.3233642578125,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.6176470588235294,
"grad_norm": 39.985557556152344,
"learning_rate": 1.9401235374032425e-07,
"logits/chosen": -0.6837745308876038,
"logits/rejected": -0.6255474090576172,
"loss": 0.4006,
"step": 210
},
{
"beta_dpo/beta_used": 0.0615837462246418,
"beta_dpo/beta_used_raw": 0.053963758051395416,
"beta_dpo/gap_mean": 20.168214797973633,
"beta_dpo/gap_std": 27.281606674194336,
"beta_dpo/mask_keep_frac": 0.862500011920929,
"epoch": 0.6323529411764706,
"grad_norm": 56.95214080810547,
"learning_rate": 1.8158425248197928e-07,
"logits/chosen": -0.5969057083129883,
"logits/rejected": -0.5545859336853027,
"loss": 0.5414,
"step": 215
},
{
"beta_dpo/beta_used": 0.031995899975299835,
"beta_dpo/beta_used_raw": 0.008795802481472492,
"beta_dpo/gap_mean": 20.327245712280273,
"beta_dpo/gap_std": 26.49213218688965,
"beta_dpo/mask_keep_frac": 0.8374999761581421,
"epoch": 0.6470588235294118,
"grad_norm": 0.543950080871582,
"learning_rate": 1.6933639389195134e-07,
"logits/chosen": -0.6841639280319214,
"logits/rejected": -0.6511374711990356,
"loss": 0.5464,
"step": 220
},
{
"beta_dpo/beta_used": 0.04347361996769905,
"beta_dpo/beta_used_raw": 0.016606144607067108,
"beta_dpo/gap_mean": 20.5634765625,
"beta_dpo/gap_std": 25.834671020507812,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.6617647058823529,
"grad_norm": 29.658409118652344,
"learning_rate": 1.573010452010098e-07,
"logits/chosen": -0.6632441282272339,
"logits/rejected": -0.6577039957046509,
"loss": 0.4873,
"step": 225
},
{
"beta_dpo/beta_used": 0.06577815115451813,
"beta_dpo/beta_used_raw": 0.05692853406071663,
"beta_dpo/gap_mean": 21.10856819152832,
"beta_dpo/gap_std": 25.58962059020996,
"beta_dpo/mask_keep_frac": 0.737500011920929,
"epoch": 0.6764705882352942,
"grad_norm": 0.6241604685783386,
"learning_rate": 1.4550991377830423e-07,
"logits/chosen": -0.7060235738754272,
"logits/rejected": -0.669354259967804,
"loss": 0.4797,
"step": 230
},
{
"beta_dpo/beta_used": 0.06475953757762909,
"beta_dpo/beta_used_raw": 0.023799167945981026,
"beta_dpo/gap_mean": 21.435104370117188,
"beta_dpo/gap_std": 25.414148330688477,
"beta_dpo/mask_keep_frac": 0.800000011920929,
"epoch": 0.6911764705882353,
"grad_norm": 14.550406455993652,
"learning_rate": 1.339940635976592e-07,
"logits/chosen": -0.6889506578445435,
"logits/rejected": -0.6716668009757996,
"loss": 0.4357,
"step": 235
},
{
"beta_dpo/beta_used": 0.022588472813367844,
"beta_dpo/beta_used_raw": 0.0025838587898761034,
"beta_dpo/gap_mean": 21.869482040405273,
"beta_dpo/gap_std": 25.504459381103516,
"beta_dpo/mask_keep_frac": 0.737500011920929,
"epoch": 0.7058823529411765,
"grad_norm": 11.02522087097168,
"learning_rate": 1.227838333989088e-07,
"logits/chosen": -0.6110752820968628,
"logits/rejected": -0.5741311311721802,
"loss": 0.5265,
"step": 240
},
{
"beta_dpo/beta_used": 0.03154964745044708,
"beta_dpo/beta_used_raw": -0.007365362253040075,
"beta_dpo/gap_mean": 22.568851470947266,
"beta_dpo/gap_std": 25.90200424194336,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.7205882352941176,
"grad_norm": 0.5983785390853882,
"learning_rate": 1.1190875675987355e-07,
"logits/chosen": -0.6300492286682129,
"logits/rejected": -0.6109535098075867,
"loss": 0.5448,
"step": 245
},
{
"beta_dpo/beta_used": 0.010828005149960518,
"beta_dpo/beta_used_raw": -0.058729518204927444,
"beta_dpo/gap_mean": 22.215688705444336,
"beta_dpo/gap_std": 27.019912719726562,
"beta_dpo/mask_keep_frac": 0.8374999761581421,
"epoch": 0.7352941176470589,
"grad_norm": 17.40310287475586,
"learning_rate": 1.0139748428955333e-07,
"logits/chosen": -0.6890392303466797,
"logits/rejected": -0.629682183265686,
"loss": 0.629,
"step": 250
},
{
"beta_dpo/beta_used": 0.09269052743911743,
"beta_dpo/beta_used_raw": 0.08087030053138733,
"beta_dpo/gap_mean": 22.695995330810547,
"beta_dpo/gap_std": 27.621633529663086,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.75,
"grad_norm": 53.0207405090332,
"learning_rate": 9.127770814751932e-08,
"logits/chosen": -0.6670210361480713,
"logits/rejected": -0.6118627786636353,
"loss": 0.483,
"step": 255
},
{
"beta_dpo/beta_used": 0.06755250692367554,
"beta_dpo/beta_used_raw": 0.03205912187695503,
"beta_dpo/gap_mean": 23.055011749267578,
"beta_dpo/gap_std": 28.25390625,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.7647058823529411,
"grad_norm": 0.6237814426422119,
"learning_rate": 8.15760890883607e-08,
"logits/chosen": -0.6667768359184265,
"logits/rejected": -0.6239995956420898,
"loss": 0.4968,
"step": 260
},
{
"beta_dpo/beta_used": 0.11417696624994278,
"beta_dpo/beta_used_raw": 0.1062905341386795,
"beta_dpo/gap_mean": 22.97963523864746,
"beta_dpo/gap_std": 28.165149688720703,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.7794117647058824,
"grad_norm": 93.24835205078125,
"learning_rate": 7.231818622338822e-08,
"logits/chosen": -0.6425198316574097,
"logits/rejected": -0.6141684651374817,
"loss": 0.4401,
"step": 265
},
{
"beta_dpo/beta_used": 0.07874588668346405,
"beta_dpo/beta_used_raw": -0.0038310796953737736,
"beta_dpo/gap_mean": 23.209665298461914,
"beta_dpo/gap_std": 28.643651962280273,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7941176470588235,
"grad_norm": 12.020166397094727,
"learning_rate": 6.352838968463919e-08,
"logits/chosen": -0.6789681911468506,
"logits/rejected": -0.6184022426605225,
"loss": 0.4832,
"step": 270
},
{
"beta_dpo/beta_used": 0.05330665037035942,
"beta_dpo/beta_used_raw": -0.023246586322784424,
"beta_dpo/gap_mean": 24.013660430908203,
"beta_dpo/gap_std": 29.33469009399414,
"beta_dpo/mask_keep_frac": 0.7250000238418579,
"epoch": 0.8088235294117647,
"grad_norm": 0.6346384882926941,
"learning_rate": 5.5229856368582376e-08,
"logits/chosen": -0.6784375905990601,
"logits/rejected": -0.6448493599891663,
"loss": 0.5253,
"step": 275
},
{
"beta_dpo/beta_used": 0.05292302370071411,
"beta_dpo/beta_used_raw": -0.010663707740604877,
"beta_dpo/gap_mean": 24.447540283203125,
"beta_dpo/gap_std": 29.648815155029297,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.8235294117647058,
"grad_norm": 0.6082450151443481,
"learning_rate": 4.7444448928806615e-08,
"logits/chosen": -0.6179937720298767,
"logits/rejected": -0.5764154195785522,
"loss": 0.53,
"step": 280
},
{
"beta_dpo/beta_used": 0.007934780791401863,
"beta_dpo/beta_used_raw": -0.07739663124084473,
"beta_dpo/gap_mean": 24.31735610961914,
"beta_dpo/gap_std": 29.43593406677246,
"beta_dpo/mask_keep_frac": 0.762499988079071,
"epoch": 0.8382352941176471,
"grad_norm": 0.6881201863288879,
"learning_rate": 4.019267817841834e-08,
"logits/chosen": -0.6771946549415588,
"logits/rejected": -0.6086295247077942,
"loss": 0.6357,
"step": 285
},
{
"beta_dpo/beta_used": 0.06074627488851547,
"beta_dpo/beta_used_raw": 0.009315362200140953,
"beta_dpo/gap_mean": 24.635099411010742,
"beta_dpo/gap_std": 30.013864517211914,
"beta_dpo/mask_keep_frac": 0.862500011920929,
"epoch": 0.8529411764705882,
"grad_norm": 2.793721914291382,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": -0.6379111409187317,
"logits/rejected": -0.5973175764083862,
"loss": 0.5345,
"step": 290
},
{
"beta_dpo/beta_used": 0.03756168484687805,
"beta_dpo/beta_used_raw": -0.01273317076265812,
"beta_dpo/gap_mean": 24.830781936645508,
"beta_dpo/gap_std": 30.81571388244629,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.8676470588235294,
"grad_norm": 50.0855598449707,
"learning_rate": 2.736501028272095e-08,
"logits/chosen": -0.617714524269104,
"logits/rejected": -0.6301193237304688,
"loss": 0.5441,
"step": 295
},
{
"beta_dpo/beta_used": 0.044209837913513184,
"beta_dpo/beta_used_raw": 0.004354533273726702,
"beta_dpo/gap_mean": 24.904342651367188,
"beta_dpo/gap_std": 31.082351684570312,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8823529411764706,
"grad_norm": 0.6376844048500061,
"learning_rate": 2.1822907887504932e-08,
"logits/chosen": -0.5964897274971008,
"logits/rejected": -0.6028931736946106,
"loss": 0.5831,
"step": 300
},
{
"epoch": 0.8823529411764706,
"eval_beta_dpo/beta_used": 0.031669970601797104,
"eval_beta_dpo/beta_used_raw": -0.11691396683454514,
"eval_beta_dpo/gap_mean": 20.08871841430664,
"eval_beta_dpo/gap_std": 30.078739166259766,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -0.6625580191612244,
"eval_logits/rejected": -0.6206780672073364,
"eval_loss": 0.6427361965179443,
"eval_runtime": 20.3459,
"eval_samples_per_second": 114.962,
"eval_steps_per_second": 0.934,
"step": 300
},
{
"beta_dpo/beta_used": 0.20903603732585907,
"beta_dpo/beta_used_raw": 0.20903603732585907,
"beta_dpo/gap_mean": 19.461414337158203,
"beta_dpo/gap_std": 29.93111801147461,
"beta_dpo/mask_keep_frac": 0.824999988079071,
"epoch": 0.8970588235294118,
"grad_norm": 89.4169692993164,
"learning_rate": 1.6881942648911074e-08,
"logits/chosen": -0.6793561577796936,
"logits/rejected": -0.6282657384872437,
"loss": 0.5221,
"step": 305
},
{
"beta_dpo/beta_used": 0.1465708315372467,
"beta_dpo/beta_used_raw": 0.13552138209342957,
"beta_dpo/gap_mean": 21.82315444946289,
"beta_dpo/gap_std": 30.26885414123535,
"beta_dpo/mask_keep_frac": 0.800000011920929,
"epoch": 0.9117647058823529,
"grad_norm": 0.6243640780448914,
"learning_rate": 1.2555131639630567e-08,
"logits/chosen": -0.5958537459373474,
"logits/rejected": -0.5621305704116821,
"loss": 0.4927,
"step": 310
},
{
"beta_dpo/beta_used": 0.06637457758188248,
"beta_dpo/beta_used_raw": -0.010266167111694813,
"beta_dpo/gap_mean": 23.013385772705078,
"beta_dpo/gap_std": 30.935138702392578,
"beta_dpo/mask_keep_frac": 0.7875000238418579,
"epoch": 0.9264705882352942,
"grad_norm": 0.5633993148803711,
"learning_rate": 8.85387393063622e-09,
"logits/chosen": -0.6413298845291138,
"logits/rejected": -0.6052228808403015,
"loss": 0.5593,
"step": 315
},
{
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.06694652885198593,
"beta_dpo/gap_mean": 22.677587509155273,
"beta_dpo/gap_std": 31.181507110595703,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.9411764705882353,
"grad_norm": 0.6485550999641418,
"learning_rate": 5.7879205600998296e-09,
"logits/chosen": -0.6589199304580688,
"logits/rejected": -0.6012631058692932,
"loss": 0.6832,
"step": 320
},
{
"beta_dpo/beta_used": 0.03462111949920654,
"beta_dpo/beta_used_raw": -0.054052434861660004,
"beta_dpo/gap_mean": 23.140369415283203,
"beta_dpo/gap_std": 31.43625259399414,
"beta_dpo/mask_keep_frac": 0.8500000238418579,
"epoch": 0.9558823529411765,
"grad_norm": 0.5991944670677185,
"learning_rate": 3.3653488440851253e-09,
"logits/chosen": -0.6647250652313232,
"logits/rejected": -0.6088197231292725,
"loss": 0.6068,
"step": 325
},
{
"beta_dpo/beta_used": 0.03790256381034851,
"beta_dpo/beta_used_raw": 0.029860854148864746,
"beta_dpo/gap_mean": 24.296361923217773,
"beta_dpo/gap_std": 31.577083587646484,
"beta_dpo/mask_keep_frac": 0.7124999761581421,
"epoch": 0.9705882352941176,
"grad_norm": 0.614765465259552,
"learning_rate": 1.592541096695571e-09,
"logits/chosen": -0.6624591946601868,
"logits/rejected": -0.62751704454422,
"loss": 0.4828,
"step": 330
},
{
"beta_dpo/beta_used": 0.02071220614016056,
"beta_dpo/beta_used_raw": -0.03980039432644844,
"beta_dpo/gap_mean": 24.669193267822266,
"beta_dpo/gap_std": 30.901264190673828,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9852941176470589,
"grad_norm": 25.446868896484375,
"learning_rate": 4.741678157389739e-10,
"logits/chosen": -0.6353505849838257,
"logits/rejected": -0.590802788734436,
"loss": 0.5653,
"step": 335
},
{
"beta_dpo/beta_used": 0.04662991315126419,
"beta_dpo/beta_used_raw": -0.04880411922931671,
"beta_dpo/gap_mean": 25.268396377563477,
"beta_dpo/gap_std": 30.97623062133789,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 1.0,
"grad_norm": 0.6792064309120178,
"learning_rate": 1.31753782067201e-11,
"logits/chosen": -0.6614812016487122,
"logits/rejected": -0.6312215924263,
"loss": 0.6244,
"step": 340
},
{
"epoch": 1.0,
"step": 340,
"total_flos": 0.0,
"train_loss": 0.5267414394546958,
"train_runtime": 1440.2657,
"train_samples_per_second": 30.271,
"train_steps_per_second": 0.236
}
],
"logging_steps": 5,
"max_steps": 340,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}