1055 lines
37 KiB
JSON
1055 lines
37 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 100,
|
|
"global_step": 340,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"beta_dpo/beta_used": 0.10121209919452667,
|
|
"beta_dpo/beta_used_raw": 0.10121209919452667,
|
|
"beta_dpo/gap_mean": -0.0009442940354347229,
|
|
"beta_dpo/gap_std": 0.03691839799284935,
|
|
"beta_dpo/mask_keep_frac": 0.9375,
|
|
"epoch": 0.0029411764705882353,
|
|
"grad_norm": 23.302410125732422,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.4739703834056854,
|
|
"logits/rejected": -0.44689586758613586,
|
|
"loss": 0.6919,
|
|
"step": 1
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10032124072313309,
|
|
"beta_dpo/beta_used_raw": 0.10032124072313309,
|
|
"beta_dpo/gap_mean": -0.0016960372449830174,
|
|
"beta_dpo/gap_std": 0.1151522547006607,
|
|
"beta_dpo/mask_keep_frac": 0.765625,
|
|
"epoch": 0.014705882352941176,
|
|
"grad_norm": 24.834075927734375,
|
|
"learning_rate": 5.88235294117647e-08,
|
|
"logits/chosen": -0.49943581223487854,
|
|
"logits/rejected": -0.4934660494327545,
|
|
"loss": 0.693,
|
|
"step": 5
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.101251520216465,
|
|
"beta_dpo/beta_used_raw": 0.101251520216465,
|
|
"beta_dpo/gap_mean": 0.0030363830737769604,
|
|
"beta_dpo/gap_std": 0.2163175642490387,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.029411764705882353,
|
|
"grad_norm": 21.942047119140625,
|
|
"learning_rate": 1.3235294117647057e-07,
|
|
"logits/chosen": -0.5174359083175659,
|
|
"logits/rejected": -0.5005401968955994,
|
|
"loss": 0.692,
|
|
"step": 10
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10108586400747299,
|
|
"beta_dpo/beta_used_raw": 0.10108586400747299,
|
|
"beta_dpo/gap_mean": 0.024518460035324097,
|
|
"beta_dpo/gap_std": 0.2784799039363861,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.04411764705882353,
|
|
"grad_norm": 28.207460403442383,
|
|
"learning_rate": 2.0588235294117645e-07,
|
|
"logits/chosen": -0.5348216891288757,
|
|
"logits/rejected": -0.5156930088996887,
|
|
"loss": 0.6911,
|
|
"step": 15
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10244777053594589,
|
|
"beta_dpo/beta_used_raw": 0.10244777053594589,
|
|
"beta_dpo/gap_mean": 0.0749056339263916,
|
|
"beta_dpo/gap_std": 0.33879655599594116,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.058823529411764705,
|
|
"grad_norm": 19.902040481567383,
|
|
"learning_rate": 2.7941176470588235e-07,
|
|
"logits/chosen": -0.5660465955734253,
|
|
"logits/rejected": -0.5419166088104248,
|
|
"loss": 0.6874,
|
|
"step": 20
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10622622072696686,
|
|
"beta_dpo/beta_used_raw": 0.10622622072696686,
|
|
"beta_dpo/gap_mean": 0.20916345715522766,
|
|
"beta_dpo/gap_std": 0.456662118434906,
|
|
"beta_dpo/mask_keep_frac": 0.887499988079071,
|
|
"epoch": 0.07352941176470588,
|
|
"grad_norm": 22.522640228271484,
|
|
"learning_rate": 3.529411764705882e-07,
|
|
"logits/chosen": -0.5110368132591248,
|
|
"logits/rejected": -0.5050845146179199,
|
|
"loss": 0.6769,
|
|
"step": 25
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10997174680233002,
|
|
"beta_dpo/beta_used_raw": 0.10997174680233002,
|
|
"beta_dpo/gap_mean": 0.5209842920303345,
|
|
"beta_dpo/gap_std": 0.7702666521072388,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.08823529411764706,
|
|
"grad_norm": 19.37394142150879,
|
|
"learning_rate": 4.264705882352941e-07,
|
|
"logits/chosen": -0.5535926222801208,
|
|
"logits/rejected": -0.5316442251205444,
|
|
"loss": 0.6574,
|
|
"step": 30
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11611036211252213,
|
|
"beta_dpo/beta_used_raw": 0.11611036211252213,
|
|
"beta_dpo/gap_mean": 0.9489548802375793,
|
|
"beta_dpo/gap_std": 1.3326656818389893,
|
|
"beta_dpo/mask_keep_frac": 0.6625000238418579,
|
|
"epoch": 0.10294117647058823,
|
|
"grad_norm": 23.465280532836914,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.5605362057685852,
|
|
"logits/rejected": -0.5497816801071167,
|
|
"loss": 0.6265,
|
|
"step": 35
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1326821744441986,
|
|
"beta_dpo/beta_used_raw": 0.1326821744441986,
|
|
"beta_dpo/gap_mean": 1.789758324623108,
|
|
"beta_dpo/gap_std": 2.447655200958252,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 20.867738723754883,
|
|
"learning_rate": 4.996706849759452e-07,
|
|
"logits/chosen": -0.6393685340881348,
|
|
"logits/rejected": -0.6073721051216125,
|
|
"loss": 0.5663,
|
|
"step": 40
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.12091531604528427,
|
|
"beta_dpo/beta_used_raw": 0.12091531604528427,
|
|
"beta_dpo/gap_mean": 2.8082375526428223,
|
|
"beta_dpo/gap_std": 4.084892272949219,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.1323529411764706,
|
|
"grad_norm": 19.109943389892578,
|
|
"learning_rate": 4.986836074908615e-07,
|
|
"logits/chosen": -0.6684064865112305,
|
|
"logits/rejected": -0.6361075639724731,
|
|
"loss": 0.5411,
|
|
"step": 45
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11514081805944443,
|
|
"beta_dpo/beta_used_raw": 0.11514081805944443,
|
|
"beta_dpo/gap_mean": 3.5657267570495605,
|
|
"beta_dpo/gap_std": 5.662721633911133,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.14705882352941177,
|
|
"grad_norm": 22.54947853088379,
|
|
"learning_rate": 4.970413680203148e-07,
|
|
"logits/chosen": -0.6647295951843262,
|
|
"logits/rejected": -0.6276803612709045,
|
|
"loss": 0.536,
|
|
"step": 50
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.13137957453727722,
|
|
"beta_dpo/beta_used_raw": 0.13137957453727722,
|
|
"beta_dpo/gap_mean": 4.35926628112793,
|
|
"beta_dpo/gap_std": 7.092940330505371,
|
|
"beta_dpo/mask_keep_frac": 0.7250000238418579,
|
|
"epoch": 0.16176470588235295,
|
|
"grad_norm": 38.691123962402344,
|
|
"learning_rate": 4.947482930773511e-07,
|
|
"logits/chosen": -0.7010586261749268,
|
|
"logits/rejected": -0.675391435623169,
|
|
"loss": 0.493,
|
|
"step": 55
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.094205841422081,
|
|
"beta_dpo/beta_used_raw": 0.094205841422081,
|
|
"beta_dpo/gap_mean": 5.047989845275879,
|
|
"beta_dpo/gap_std": 8.23731803894043,
|
|
"beta_dpo/mask_keep_frac": 0.7250000238418579,
|
|
"epoch": 0.17647058823529413,
|
|
"grad_norm": 31.127901077270508,
|
|
"learning_rate": 4.918104238142103e-07,
|
|
"logits/chosen": -0.724422812461853,
|
|
"logits/rejected": -0.6809322237968445,
|
|
"loss": 0.5315,
|
|
"step": 60
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11677428334951401,
|
|
"beta_dpo/beta_used_raw": 0.11677428334951401,
|
|
"beta_dpo/gap_mean": 5.827352523803711,
|
|
"beta_dpo/gap_std": 8.861337661743164,
|
|
"beta_dpo/mask_keep_frac": 0.8500000238418579,
|
|
"epoch": 0.19117647058823528,
|
|
"grad_norm": 20.432043075561523,
|
|
"learning_rate": 4.882355001067891e-07,
|
|
"logits/chosen": -0.6648474931716919,
|
|
"logits/rejected": -0.637535572052002,
|
|
"loss": 0.4741,
|
|
"step": 65
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09036926180124283,
|
|
"beta_dpo/beta_used_raw": 0.09036926180124283,
|
|
"beta_dpo/gap_mean": 6.462141990661621,
|
|
"beta_dpo/gap_std": 9.157753944396973,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.20588235294117646,
|
|
"grad_norm": 24.550621032714844,
|
|
"learning_rate": 4.840329401637809e-07,
|
|
"logits/chosen": -0.6986874341964722,
|
|
"logits/rejected": -0.6637295484542847,
|
|
"loss": 0.5026,
|
|
"step": 70
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.07552285492420197,
|
|
"beta_dpo/beta_used_raw": 0.0741354450583458,
|
|
"beta_dpo/gap_mean": 6.905457496643066,
|
|
"beta_dpo/gap_std": 9.706171035766602,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.22058823529411764,
|
|
"grad_norm": 21.727449417114258,
|
|
"learning_rate": 4.792138157142157e-07,
|
|
"logits/chosen": -0.7469242215156555,
|
|
"logits/rejected": -0.7223338484764099,
|
|
"loss": 0.5172,
|
|
"step": 75
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09862758219242096,
|
|
"beta_dpo/beta_used_raw": 0.09059171378612518,
|
|
"beta_dpo/gap_mean": 7.501389980316162,
|
|
"beta_dpo/gap_std": 10.180580139160156,
|
|
"beta_dpo/mask_keep_frac": 0.8374999761581421,
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 0.3042762279510498,
|
|
"learning_rate": 4.737908228387656e-07,
|
|
"logits/chosen": -0.7543509602546692,
|
|
"logits/rejected": -0.7017374038696289,
|
|
"loss": 0.4756,
|
|
"step": 80
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06848205626010895,
|
|
"beta_dpo/beta_used_raw": 0.05231575295329094,
|
|
"beta_dpo/gap_mean": 7.9440507888793945,
|
|
"beta_dpo/gap_std": 10.780364990234375,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.25,
|
|
"grad_norm": 0.3057352602481842,
|
|
"learning_rate": 4.6777824852166437e-07,
|
|
"logits/chosen": -0.7203555107116699,
|
|
"logits/rejected": -0.6912198066711426,
|
|
"loss": 0.5345,
|
|
"step": 85
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08253253251314163,
|
|
"beta_dpo/beta_used_raw": 0.08253253251314163,
|
|
"beta_dpo/gap_mean": 8.4508638381958,
|
|
"beta_dpo/gap_std": 11.448507308959961,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.2647058823529412,
|
|
"grad_norm": 21.29926300048828,
|
|
"learning_rate": 4.611919330113591e-07,
|
|
"logits/chosen": -0.6781951189041138,
|
|
"logits/rejected": -0.6568866968154907,
|
|
"loss": 0.4944,
|
|
"step": 90
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05684714391827583,
|
|
"beta_dpo/beta_used_raw": 0.05684714391827583,
|
|
"beta_dpo/gap_mean": 8.875980377197266,
|
|
"beta_dpo/gap_std": 12.020231246948242,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.27941176470588236,
|
|
"grad_norm": 15.502776145935059,
|
|
"learning_rate": 4.5404922808905543e-07,
|
|
"logits/chosen": -0.7086650729179382,
|
|
"logits/rejected": -0.6651682257652283,
|
|
"loss": 0.5335,
|
|
"step": 95
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1179933100938797,
|
|
"beta_dpo/beta_used_raw": 0.1179933100938797,
|
|
"beta_dpo/gap_mean": 9.62360954284668,
|
|
"beta_dpo/gap_std": 12.684171676635742,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 34.129478454589844,
|
|
"learning_rate": 4.4636895135509966e-07,
|
|
"logits/chosen": -0.6843082904815674,
|
|
"logits/rejected": -0.660758912563324,
|
|
"loss": 0.4098,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"eval_beta_dpo/beta_used": 0.04330332204699516,
|
|
"eval_beta_dpo/beta_used_raw": 0.01594320312142372,
|
|
"eval_beta_dpo/gap_mean": 7.997772216796875,
|
|
"eval_beta_dpo/gap_std": 13.260690689086914,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": -0.6977978944778442,
|
|
"eval_logits/rejected": -0.6668843626976013,
|
|
"eval_loss": 0.6251118183135986,
|
|
"eval_runtime": 20.4115,
|
|
"eval_samples_per_second": 114.592,
|
|
"eval_steps_per_second": 0.931,
|
|
"step": 100
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.15054886043071747,
|
|
"beta_dpo/beta_used_raw": 0.15054886043071747,
|
|
"beta_dpo/gap_mean": 7.8849334716796875,
|
|
"beta_dpo/gap_std": 13.30543041229248,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.3088235294117647,
|
|
"grad_norm": 15.665854454040527,
|
|
"learning_rate": 4.381713366536311e-07,
|
|
"logits/chosen": -0.7553393244743347,
|
|
"logits/rejected": -0.710943341255188,
|
|
"loss": 0.4061,
|
|
"step": 105
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09847154468297958,
|
|
"beta_dpo/beta_used_raw": 0.09847154468297958,
|
|
"beta_dpo/gap_mean": 9.127924919128418,
|
|
"beta_dpo/gap_std": 13.331835746765137,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.3235294117647059,
|
|
"grad_norm": 7.982070446014404,
|
|
"learning_rate": 4.2947798076611047e-07,
|
|
"logits/chosen": -0.7284727692604065,
|
|
"logits/rejected": -0.696673572063446,
|
|
"loss": 0.4595,
|
|
"step": 110
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.12598751485347748,
|
|
"beta_dpo/beta_used_raw": 0.12598751485347748,
|
|
"beta_dpo/gap_mean": 9.942410469055176,
|
|
"beta_dpo/gap_std": 13.166864395141602,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.3382352941176471,
|
|
"grad_norm": 36.213523864746094,
|
|
"learning_rate": 4.203117865141635e-07,
|
|
"logits/chosen": -0.7145182490348816,
|
|
"logits/rejected": -0.6985291242599487,
|
|
"loss": 0.3778,
|
|
"step": 115
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05509430170059204,
|
|
"beta_dpo/beta_used_raw": 0.048566654324531555,
|
|
"beta_dpo/gap_mean": 10.542096138000488,
|
|
"beta_dpo/gap_std": 13.39216136932373,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 14.08332347869873,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": -0.7127692103385925,
|
|
"logits/rejected": -0.6740670204162598,
|
|
"loss": 0.5271,
|
|
"step": 120
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05550508573651314,
|
|
"beta_dpo/beta_used_raw": 0.05064947530627251,
|
|
"beta_dpo/gap_mean": 11.009790420532227,
|
|
"beta_dpo/gap_std": 13.461648941040039,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.36764705882352944,
|
|
"grad_norm": 1.2771987915039062,
|
|
"learning_rate": 4.006586590948141e-07,
|
|
"logits/chosen": -0.7180671691894531,
|
|
"logits/rejected": -0.6869726777076721,
|
|
"loss": 0.513,
|
|
"step": 125
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05528440326452255,
|
|
"beta_dpo/beta_used_raw": 0.05528440326452255,
|
|
"beta_dpo/gap_mean": 11.50378704071045,
|
|
"beta_dpo/gap_std": 14.039319038391113,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.38235294117647056,
|
|
"grad_norm": 5.609388828277588,
|
|
"learning_rate": 3.9022350248844246e-07,
|
|
"logits/chosen": -0.7099085450172424,
|
|
"logits/rejected": -0.6715607643127441,
|
|
"loss": 0.5068,
|
|
"step": 130
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08324670791625977,
|
|
"beta_dpo/beta_used_raw": 0.08324670791625977,
|
|
"beta_dpo/gap_mean": 12.224153518676758,
|
|
"beta_dpo/gap_std": 15.014795303344727,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.39705882352941174,
|
|
"grad_norm": 22.845937728881836,
|
|
"learning_rate": 3.794189242333106e-07,
|
|
"logits/chosen": -0.7034512758255005,
|
|
"logits/rejected": -0.6600346565246582,
|
|
"loss": 0.4231,
|
|
"step": 135
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10875506699085236,
|
|
"beta_dpo/beta_used_raw": 0.10875506699085236,
|
|
"beta_dpo/gap_mean": 13.073277473449707,
|
|
"beta_dpo/gap_std": 15.834657669067383,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.4117647058823529,
|
|
"grad_norm": 31.461519241333008,
|
|
"learning_rate": 3.6827338920900253e-07,
|
|
"logits/chosen": -0.6739553213119507,
|
|
"logits/rejected": -0.6366498470306396,
|
|
"loss": 0.3902,
|
|
"step": 140
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10356837511062622,
|
|
"beta_dpo/beta_used_raw": 0.10356837511062622,
|
|
"beta_dpo/gap_mean": 13.881492614746094,
|
|
"beta_dpo/gap_std": 16.42840003967285,
|
|
"beta_dpo/mask_keep_frac": 0.7250000238418579,
|
|
"epoch": 0.4264705882352941,
|
|
"grad_norm": 29.94761085510254,
|
|
"learning_rate": 3.568162605525952e-07,
|
|
"logits/chosen": -0.7220578193664551,
|
|
"logits/rejected": -0.6809359788894653,
|
|
"loss": 0.3819,
|
|
"step": 145
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.019714761525392532,
|
|
"beta_dpo/beta_used_raw": -0.023198971524834633,
|
|
"beta_dpo/gap_mean": 14.227258682250977,
|
|
"beta_dpo/gap_std": 17.448183059692383,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.4411764705882353,
|
|
"grad_norm": 5.527612209320068,
|
|
"learning_rate": 3.4507772230088147e-07,
|
|
"logits/chosen": -0.6326473355293274,
|
|
"logits/rejected": -0.6032054424285889,
|
|
"loss": 0.607,
|
|
"step": 150
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.07979521155357361,
|
|
"beta_dpo/beta_used_raw": 0.06505511701107025,
|
|
"beta_dpo/gap_mean": 14.670598983764648,
|
|
"beta_dpo/gap_std": 18.554828643798828,
|
|
"beta_dpo/mask_keep_frac": 0.699999988079071,
|
|
"epoch": 0.45588235294117646,
|
|
"grad_norm": 23.10860824584961,
|
|
"learning_rate": 3.3308869986991487e-07,
|
|
"logits/chosen": -0.7037164568901062,
|
|
"logits/rejected": -0.6613154411315918,
|
|
"loss": 0.4478,
|
|
"step": 155
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06584476679563522,
|
|
"beta_dpo/beta_used_raw": 0.040607184171676636,
|
|
"beta_dpo/gap_mean": 15.389450073242188,
|
|
"beta_dpo/gap_std": 19.081418991088867,
|
|
"beta_dpo/mask_keep_frac": 0.8374999761581421,
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 31.426233291625977,
|
|
"learning_rate": 3.208807785813777e-07,
|
|
"logits/chosen": -0.6574662923812866,
|
|
"logits/rejected": -0.630233883857727,
|
|
"loss": 0.4758,
|
|
"step": 160
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08810704201459885,
|
|
"beta_dpo/beta_used_raw": 0.07671914994716644,
|
|
"beta_dpo/gap_mean": 16.22821617126465,
|
|
"beta_dpo/gap_std": 19.637792587280273,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.4852941176470588,
|
|
"grad_norm": 73.11759948730469,
|
|
"learning_rate": 3.084861204504122e-07,
|
|
"logits/chosen": -0.6618590354919434,
|
|
"logits/rejected": -0.6243924498558044,
|
|
"loss": 0.4768,
|
|
"step": 165
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02722100354731083,
|
|
"beta_dpo/beta_used_raw": 0.02408101223409176,
|
|
"beta_dpo/gap_mean": 17.07744598388672,
|
|
"beta_dpo/gap_std": 20.277606964111328,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.5254238247871399,
|
|
"learning_rate": 2.959373794541426e-07,
|
|
"logits/chosen": -0.6966148614883423,
|
|
"logits/rejected": -0.666491687297821,
|
|
"loss": 0.5686,
|
|
"step": 170
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0664793998003006,
|
|
"beta_dpo/beta_used_raw": 0.03722615912556648,
|
|
"beta_dpo/gap_mean": 17.654155731201172,
|
|
"beta_dpo/gap_std": 21.08226776123047,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.5147058823529411,
|
|
"grad_norm": 0.5429490804672241,
|
|
"learning_rate": 2.8326761550411346e-07,
|
|
"logits/chosen": -0.6848665475845337,
|
|
"logits/rejected": -0.6613831520080566,
|
|
"loss": 0.4753,
|
|
"step": 175
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.024588093161582947,
|
|
"beta_dpo/beta_used_raw": -0.008070843294262886,
|
|
"beta_dpo/gap_mean": 17.7331485748291,
|
|
"beta_dpo/gap_std": 22.08762550354004,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.5294117647058824,
|
|
"grad_norm": 15.737401962280273,
|
|
"learning_rate": 2.7051020734928443e-07,
|
|
"logits/chosen": -0.6688377261161804,
|
|
"logits/rejected": -0.6460214853286743,
|
|
"loss": 0.5579,
|
|
"step": 180
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08162590861320496,
|
|
"beta_dpo/beta_used_raw": 0.05483890324831009,
|
|
"beta_dpo/gap_mean": 18.408456802368164,
|
|
"beta_dpo/gap_std": 22.61962890625,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.5441176470588235,
|
|
"grad_norm": 0.506800651550293,
|
|
"learning_rate": 2.5769876463904263e-07,
|
|
"logits/chosen": -0.7215656042098999,
|
|
"logits/rejected": -0.6699239611625671,
|
|
"loss": 0.4855,
|
|
"step": 185
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05661209672689438,
|
|
"beta_dpo/beta_used_raw": 0.027242619544267654,
|
|
"beta_dpo/gap_mean": 18.814666748046875,
|
|
"beta_dpo/gap_std": 22.990680694580078,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.5588235294117647,
|
|
"grad_norm": 26.899166107177734,
|
|
"learning_rate": 2.4486703937790243e-07,
|
|
"logits/chosen": -0.7019311785697937,
|
|
"logits/rejected": -0.6498968005180359,
|
|
"loss": 0.4949,
|
|
"step": 190
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02816765382885933,
|
|
"beta_dpo/beta_used_raw": 0.001962479902431369,
|
|
"beta_dpo/gap_mean": 19.533567428588867,
|
|
"beta_dpo/gap_std": 23.629451751708984,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.5735294117647058,
|
|
"grad_norm": 11.008431434631348,
|
|
"learning_rate": 2.320488370051681e-07,
|
|
"logits/chosen": -0.7254117727279663,
|
|
"logits/rejected": -0.6765154004096985,
|
|
"loss": 0.5286,
|
|
"step": 195
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0643467828631401,
|
|
"beta_dpo/beta_used_raw": -0.0015767127042636275,
|
|
"beta_dpo/gap_mean": 20.06936264038086,
|
|
"beta_dpo/gap_std": 24.53436851501465,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 78.41595458984375,
|
|
"learning_rate": 2.192779273338215e-07,
|
|
"logits/chosen": -0.6875912547111511,
|
|
"logits/rejected": -0.6458339095115662,
|
|
"loss": 0.5527,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"eval_beta_dpo/beta_used": 0.03733323514461517,
|
|
"eval_beta_dpo/beta_used_raw": -0.0800742357969284,
|
|
"eval_beta_dpo/gap_mean": 17.105911254882812,
|
|
"eval_beta_dpo/gap_std": 25.945871353149414,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": -0.6982784271240234,
|
|
"eval_logits/rejected": -0.6586927771568298,
|
|
"eval_loss": 0.6420564651489258,
|
|
"eval_runtime": 20.3208,
|
|
"eval_samples_per_second": 115.104,
|
|
"eval_steps_per_second": 0.935,
|
|
"step": 200
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.18078216910362244,
|
|
"beta_dpo/beta_used_raw": 0.18078216910362244,
|
|
"beta_dpo/gap_mean": 16.670331954956055,
|
|
"beta_dpo/gap_std": 27.034832000732422,
|
|
"beta_dpo/mask_keep_frac": 0.887499988079071,
|
|
"epoch": 0.6029411764705882,
|
|
"grad_norm": 76.40715789794922,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": -0.6358317136764526,
|
|
"logits/rejected": -0.5890509486198425,
|
|
"loss": 0.4449,
|
|
"step": 205
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.187847301363945,
|
|
"beta_dpo/beta_used_raw": 0.187847301363945,
|
|
"beta_dpo/gap_mean": 18.74222183227539,
|
|
"beta_dpo/gap_std": 27.3233642578125,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.6176470588235294,
|
|
"grad_norm": 39.985557556152344,
|
|
"learning_rate": 1.9401235374032425e-07,
|
|
"logits/chosen": -0.6837745308876038,
|
|
"logits/rejected": -0.6255474090576172,
|
|
"loss": 0.4006,
|
|
"step": 210
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0615837462246418,
|
|
"beta_dpo/beta_used_raw": 0.053963758051395416,
|
|
"beta_dpo/gap_mean": 20.168214797973633,
|
|
"beta_dpo/gap_std": 27.281606674194336,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.6323529411764706,
|
|
"grad_norm": 56.95214080810547,
|
|
"learning_rate": 1.8158425248197928e-07,
|
|
"logits/chosen": -0.5969057083129883,
|
|
"logits/rejected": -0.5545859336853027,
|
|
"loss": 0.5414,
|
|
"step": 215
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.031995899975299835,
|
|
"beta_dpo/beta_used_raw": 0.008795802481472492,
|
|
"beta_dpo/gap_mean": 20.327245712280273,
|
|
"beta_dpo/gap_std": 26.49213218688965,
|
|
"beta_dpo/mask_keep_frac": 0.8374999761581421,
|
|
"epoch": 0.6470588235294118,
|
|
"grad_norm": 0.543950080871582,
|
|
"learning_rate": 1.6933639389195134e-07,
|
|
"logits/chosen": -0.6841639280319214,
|
|
"logits/rejected": -0.6511374711990356,
|
|
"loss": 0.5464,
|
|
"step": 220
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04347361996769905,
|
|
"beta_dpo/beta_used_raw": 0.016606144607067108,
|
|
"beta_dpo/gap_mean": 20.5634765625,
|
|
"beta_dpo/gap_std": 25.834671020507812,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.6617647058823529,
|
|
"grad_norm": 29.658409118652344,
|
|
"learning_rate": 1.573010452010098e-07,
|
|
"logits/chosen": -0.6632441282272339,
|
|
"logits/rejected": -0.6577039957046509,
|
|
"loss": 0.4873,
|
|
"step": 225
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06577815115451813,
|
|
"beta_dpo/beta_used_raw": 0.05692853406071663,
|
|
"beta_dpo/gap_mean": 21.10856819152832,
|
|
"beta_dpo/gap_std": 25.58962059020996,
|
|
"beta_dpo/mask_keep_frac": 0.737500011920929,
|
|
"epoch": 0.6764705882352942,
|
|
"grad_norm": 0.6241604685783386,
|
|
"learning_rate": 1.4550991377830423e-07,
|
|
"logits/chosen": -0.7060235738754272,
|
|
"logits/rejected": -0.669354259967804,
|
|
"loss": 0.4797,
|
|
"step": 230
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06475953757762909,
|
|
"beta_dpo/beta_used_raw": 0.023799167945981026,
|
|
"beta_dpo/gap_mean": 21.435104370117188,
|
|
"beta_dpo/gap_std": 25.414148330688477,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.6911764705882353,
|
|
"grad_norm": 14.550406455993652,
|
|
"learning_rate": 1.339940635976592e-07,
|
|
"logits/chosen": -0.6889506578445435,
|
|
"logits/rejected": -0.6716668009757996,
|
|
"loss": 0.4357,
|
|
"step": 235
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.022588472813367844,
|
|
"beta_dpo/beta_used_raw": 0.0025838587898761034,
|
|
"beta_dpo/gap_mean": 21.869482040405273,
|
|
"beta_dpo/gap_std": 25.504459381103516,
|
|
"beta_dpo/mask_keep_frac": 0.737500011920929,
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 11.02522087097168,
|
|
"learning_rate": 1.227838333989088e-07,
|
|
"logits/chosen": -0.6110752820968628,
|
|
"logits/rejected": -0.5741311311721802,
|
|
"loss": 0.5265,
|
|
"step": 240
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03154964745044708,
|
|
"beta_dpo/beta_used_raw": -0.007365362253040075,
|
|
"beta_dpo/gap_mean": 22.568851470947266,
|
|
"beta_dpo/gap_std": 25.90200424194336,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.7205882352941176,
|
|
"grad_norm": 0.5983785390853882,
|
|
"learning_rate": 1.1190875675987355e-07,
|
|
"logits/chosen": -0.6300492286682129,
|
|
"logits/rejected": -0.6109535098075867,
|
|
"loss": 0.5448,
|
|
"step": 245
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.010828005149960518,
|
|
"beta_dpo/beta_used_raw": -0.058729518204927444,
|
|
"beta_dpo/gap_mean": 22.215688705444336,
|
|
"beta_dpo/gap_std": 27.019912719726562,
|
|
"beta_dpo/mask_keep_frac": 0.8374999761581421,
|
|
"epoch": 0.7352941176470589,
|
|
"grad_norm": 17.40310287475586,
|
|
"learning_rate": 1.0139748428955333e-07,
|
|
"logits/chosen": -0.6890392303466797,
|
|
"logits/rejected": -0.629682183265686,
|
|
"loss": 0.629,
|
|
"step": 250
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09269052743911743,
|
|
"beta_dpo/beta_used_raw": 0.08087030053138733,
|
|
"beta_dpo/gap_mean": 22.695995330810547,
|
|
"beta_dpo/gap_std": 27.621633529663086,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.75,
|
|
"grad_norm": 53.0207405090332,
|
|
"learning_rate": 9.127770814751932e-08,
|
|
"logits/chosen": -0.6670210361480713,
|
|
"logits/rejected": -0.6118627786636353,
|
|
"loss": 0.483,
|
|
"step": 255
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06755250692367554,
|
|
"beta_dpo/beta_used_raw": 0.03205912187695503,
|
|
"beta_dpo/gap_mean": 23.055011749267578,
|
|
"beta_dpo/gap_std": 28.25390625,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.7647058823529411,
|
|
"grad_norm": 0.6237814426422119,
|
|
"learning_rate": 8.15760890883607e-08,
|
|
"logits/chosen": -0.6667768359184265,
|
|
"logits/rejected": -0.6239995956420898,
|
|
"loss": 0.4968,
|
|
"step": 260
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11417696624994278,
|
|
"beta_dpo/beta_used_raw": 0.1062905341386795,
|
|
"beta_dpo/gap_mean": 22.97963523864746,
|
|
"beta_dpo/gap_std": 28.165149688720703,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.7794117647058824,
|
|
"grad_norm": 93.24835205078125,
|
|
"learning_rate": 7.231818622338822e-08,
|
|
"logits/chosen": -0.6425198316574097,
|
|
"logits/rejected": -0.6141684651374817,
|
|
"loss": 0.4401,
|
|
"step": 265
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.07874588668346405,
|
|
"beta_dpo/beta_used_raw": -0.0038310796953737736,
|
|
"beta_dpo/gap_mean": 23.209665298461914,
|
|
"beta_dpo/gap_std": 28.643651962280273,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7941176470588235,
|
|
"grad_norm": 12.020166397094727,
|
|
"learning_rate": 6.352838968463919e-08,
|
|
"logits/chosen": -0.6789681911468506,
|
|
"logits/rejected": -0.6184022426605225,
|
|
"loss": 0.4832,
|
|
"step": 270
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05330665037035942,
|
|
"beta_dpo/beta_used_raw": -0.023246586322784424,
|
|
"beta_dpo/gap_mean": 24.013660430908203,
|
|
"beta_dpo/gap_std": 29.33469009399414,
|
|
"beta_dpo/mask_keep_frac": 0.7250000238418579,
|
|
"epoch": 0.8088235294117647,
|
|
"grad_norm": 0.6346384882926941,
|
|
"learning_rate": 5.5229856368582376e-08,
|
|
"logits/chosen": -0.6784375905990601,
|
|
"logits/rejected": -0.6448493599891663,
|
|
"loss": 0.5253,
|
|
"step": 275
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05292302370071411,
|
|
"beta_dpo/beta_used_raw": -0.010663707740604877,
|
|
"beta_dpo/gap_mean": 24.447540283203125,
|
|
"beta_dpo/gap_std": 29.648815155029297,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 0.6082450151443481,
|
|
"learning_rate": 4.7444448928806615e-08,
|
|
"logits/chosen": -0.6179937720298767,
|
|
"logits/rejected": -0.5764154195785522,
|
|
"loss": 0.53,
|
|
"step": 280
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.007934780791401863,
|
|
"beta_dpo/beta_used_raw": -0.07739663124084473,
|
|
"beta_dpo/gap_mean": 24.31735610961914,
|
|
"beta_dpo/gap_std": 29.43593406677246,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.8382352941176471,
|
|
"grad_norm": 0.6881201863288879,
|
|
"learning_rate": 4.019267817841834e-08,
|
|
"logits/chosen": -0.6771946549415588,
|
|
"logits/rejected": -0.6086295247077942,
|
|
"loss": 0.6357,
|
|
"step": 285
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06074627488851547,
|
|
"beta_dpo/beta_used_raw": 0.009315362200140953,
|
|
"beta_dpo/gap_mean": 24.635099411010742,
|
|
"beta_dpo/gap_std": 30.013864517211914,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.8529411764705882,
|
|
"grad_norm": 2.793721914291382,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": -0.6379111409187317,
|
|
"logits/rejected": -0.5973175764083862,
|
|
"loss": 0.5345,
|
|
"step": 290
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03756168484687805,
|
|
"beta_dpo/beta_used_raw": -0.01273317076265812,
|
|
"beta_dpo/gap_mean": 24.830781936645508,
|
|
"beta_dpo/gap_std": 30.81571388244629,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.8676470588235294,
|
|
"grad_norm": 50.0855598449707,
|
|
"learning_rate": 2.736501028272095e-08,
|
|
"logits/chosen": -0.617714524269104,
|
|
"logits/rejected": -0.6301193237304688,
|
|
"loss": 0.5441,
|
|
"step": 295
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.044209837913513184,
|
|
"beta_dpo/beta_used_raw": 0.004354533273726702,
|
|
"beta_dpo/gap_mean": 24.904342651367188,
|
|
"beta_dpo/gap_std": 31.082351684570312,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 0.6376844048500061,
|
|
"learning_rate": 2.1822907887504932e-08,
|
|
"logits/chosen": -0.5964897274971008,
|
|
"logits/rejected": -0.6028931736946106,
|
|
"loss": 0.5831,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"eval_beta_dpo/beta_used": 0.031669970601797104,
|
|
"eval_beta_dpo/beta_used_raw": -0.11691396683454514,
|
|
"eval_beta_dpo/gap_mean": 20.08871841430664,
|
|
"eval_beta_dpo/gap_std": 30.078739166259766,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": -0.6625580191612244,
|
|
"eval_logits/rejected": -0.6206780672073364,
|
|
"eval_loss": 0.6427361965179443,
|
|
"eval_runtime": 20.3459,
|
|
"eval_samples_per_second": 114.962,
|
|
"eval_steps_per_second": 0.934,
|
|
"step": 300
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.20903603732585907,
|
|
"beta_dpo/beta_used_raw": 0.20903603732585907,
|
|
"beta_dpo/gap_mean": 19.461414337158203,
|
|
"beta_dpo/gap_std": 29.93111801147461,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.8970588235294118,
|
|
"grad_norm": 89.4169692993164,
|
|
"learning_rate": 1.6881942648911074e-08,
|
|
"logits/chosen": -0.6793561577796936,
|
|
"logits/rejected": -0.6282657384872437,
|
|
"loss": 0.5221,
|
|
"step": 305
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1465708315372467,
|
|
"beta_dpo/beta_used_raw": 0.13552138209342957,
|
|
"beta_dpo/gap_mean": 21.82315444946289,
|
|
"beta_dpo/gap_std": 30.26885414123535,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.9117647058823529,
|
|
"grad_norm": 0.6243640780448914,
|
|
"learning_rate": 1.2555131639630567e-08,
|
|
"logits/chosen": -0.5958537459373474,
|
|
"logits/rejected": -0.5621305704116821,
|
|
"loss": 0.4927,
|
|
"step": 310
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06637457758188248,
|
|
"beta_dpo/beta_used_raw": -0.010266167111694813,
|
|
"beta_dpo/gap_mean": 23.013385772705078,
|
|
"beta_dpo/gap_std": 30.935138702392578,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.9264705882352942,
|
|
"grad_norm": 0.5633993148803711,
|
|
"learning_rate": 8.85387393063622e-09,
|
|
"logits/chosen": -0.6413298845291138,
|
|
"logits/rejected": -0.6052228808403015,
|
|
"loss": 0.5593,
|
|
"step": 315
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0010000000474974513,
|
|
"beta_dpo/beta_used_raw": -0.06694652885198593,
|
|
"beta_dpo/gap_mean": 22.677587509155273,
|
|
"beta_dpo/gap_std": 31.181507110595703,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 0.6485550999641418,
|
|
"learning_rate": 5.7879205600998296e-09,
|
|
"logits/chosen": -0.6589199304580688,
|
|
"logits/rejected": -0.6012631058692932,
|
|
"loss": 0.6832,
|
|
"step": 320
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03462111949920654,
|
|
"beta_dpo/beta_used_raw": -0.054052434861660004,
|
|
"beta_dpo/gap_mean": 23.140369415283203,
|
|
"beta_dpo/gap_std": 31.43625259399414,
|
|
"beta_dpo/mask_keep_frac": 0.8500000238418579,
|
|
"epoch": 0.9558823529411765,
|
|
"grad_norm": 0.5991944670677185,
|
|
"learning_rate": 3.3653488440851253e-09,
|
|
"logits/chosen": -0.6647250652313232,
|
|
"logits/rejected": -0.6088197231292725,
|
|
"loss": 0.6068,
|
|
"step": 325
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.03790256381034851,
|
|
"beta_dpo/beta_used_raw": 0.029860854148864746,
|
|
"beta_dpo/gap_mean": 24.296361923217773,
|
|
"beta_dpo/gap_std": 31.577083587646484,
|
|
"beta_dpo/mask_keep_frac": 0.7124999761581421,
|
|
"epoch": 0.9705882352941176,
|
|
"grad_norm": 0.614765465259552,
|
|
"learning_rate": 1.592541096695571e-09,
|
|
"logits/chosen": -0.6624591946601868,
|
|
"logits/rejected": -0.62751704454422,
|
|
"loss": 0.4828,
|
|
"step": 330
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.02071220614016056,
|
|
"beta_dpo/beta_used_raw": -0.03980039432644844,
|
|
"beta_dpo/gap_mean": 24.669193267822266,
|
|
"beta_dpo/gap_std": 30.901264190673828,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9852941176470589,
|
|
"grad_norm": 25.446868896484375,
|
|
"learning_rate": 4.741678157389739e-10,
|
|
"logits/chosen": -0.6353505849838257,
|
|
"logits/rejected": -0.590802788734436,
|
|
"loss": 0.5653,
|
|
"step": 335
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.04662991315126419,
|
|
"beta_dpo/beta_used_raw": -0.04880411922931671,
|
|
"beta_dpo/gap_mean": 25.268396377563477,
|
|
"beta_dpo/gap_std": 30.97623062133789,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.6792064309120178,
|
|
"learning_rate": 1.31753782067201e-11,
|
|
"logits/chosen": -0.6614812016487122,
|
|
"logits/rejected": -0.6312215924263,
|
|
"loss": 0.6244,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 340,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.5267414394546958,
|
|
"train_runtime": 1440.2657,
|
|
"train_samples_per_second": 30.271,
|
|
"train_steps_per_second": 0.236
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 340,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|