1027 lines
36 KiB
JSON
1027 lines
36 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 100,
|
|
"global_step": 330,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"beta_dpo/beta_used": 0.10009249299764633,
|
|
"beta_dpo/beta_used_raw": 0.10009249299764633,
|
|
"beta_dpo/gap_mean": 0.0012140885228291154,
|
|
"beta_dpo/gap_std": 0.029596734791994095,
|
|
"beta_dpo/mask_keep_frac": 0.9375,
|
|
"epoch": 0.0030303030303030303,
|
|
"grad_norm": 11.079418182373047,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.818070113658905,
|
|
"logits/rejected": -0.7612971663475037,
|
|
"loss": 0.6929,
|
|
"step": 1
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10004878044128418,
|
|
"beta_dpo/beta_used_raw": 0.10004878044128418,
|
|
"beta_dpo/gap_mean": -0.003181760897859931,
|
|
"beta_dpo/gap_std": 0.09769059717655182,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.015151515151515152,
|
|
"grad_norm": 12.246779441833496,
|
|
"learning_rate": 6.060606060606061e-08,
|
|
"logits/chosen": -0.8416346907615662,
|
|
"logits/rejected": -0.8071619272232056,
|
|
"loss": 0.6934,
|
|
"step": 5
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10060784965753555,
|
|
"beta_dpo/beta_used_raw": 0.10060784965753555,
|
|
"beta_dpo/gap_mean": -0.0015905939508229494,
|
|
"beta_dpo/gap_std": 0.1881129890680313,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.030303030303030304,
|
|
"grad_norm": 11.778424263000488,
|
|
"learning_rate": 1.3636363636363635e-07,
|
|
"logits/chosen": -0.7911893129348755,
|
|
"logits/rejected": -0.7587390542030334,
|
|
"loss": 0.6928,
|
|
"step": 10
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10040197521448135,
|
|
"beta_dpo/beta_used_raw": 0.10040197521448135,
|
|
"beta_dpo/gap_mean": 0.0006210329011082649,
|
|
"beta_dpo/gap_std": 0.24522730708122253,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.045454545454545456,
|
|
"grad_norm": 12.626185417175293,
|
|
"learning_rate": 2.121212121212121e-07,
|
|
"logits/chosen": -0.8082472085952759,
|
|
"logits/rejected": -0.8093615770339966,
|
|
"loss": 0.6928,
|
|
"step": 15
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10040859878063202,
|
|
"beta_dpo/beta_used_raw": 0.10040859878063202,
|
|
"beta_dpo/gap_mean": 0.008134648203849792,
|
|
"beta_dpo/gap_std": 0.2810249626636505,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.06060606060606061,
|
|
"grad_norm": 12.163843154907227,
|
|
"learning_rate": 2.878787878787879e-07,
|
|
"logits/chosen": -0.7914258241653442,
|
|
"logits/rejected": -0.7522870302200317,
|
|
"loss": 0.6925,
|
|
"step": 20
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10019676387310028,
|
|
"beta_dpo/beta_used_raw": 0.10019676387310028,
|
|
"beta_dpo/gap_mean": 0.007132118102163076,
|
|
"beta_dpo/gap_std": 0.3137893080711365,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.07575757575757576,
|
|
"grad_norm": 12.878430366516113,
|
|
"learning_rate": 3.636363636363636e-07,
|
|
"logits/chosen": -0.7768210172653198,
|
|
"logits/rejected": -0.771538496017456,
|
|
"loss": 0.6926,
|
|
"step": 25
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10199077427387238,
|
|
"beta_dpo/beta_used_raw": 0.10199077427387238,
|
|
"beta_dpo/gap_mean": 0.015979086980223656,
|
|
"beta_dpo/gap_std": 0.34232962131500244,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.09090909090909091,
|
|
"grad_norm": 11.947314262390137,
|
|
"learning_rate": 4.3939393939393937e-07,
|
|
"logits/chosen": -0.8367147445678711,
|
|
"logits/rejected": -0.8112382888793945,
|
|
"loss": 0.6907,
|
|
"step": 30
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10177697986364365,
|
|
"beta_dpo/beta_used_raw": 0.10177697986364365,
|
|
"beta_dpo/gap_mean": 0.0375533364713192,
|
|
"beta_dpo/gap_std": 0.3859425187110901,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.10606060606060606,
|
|
"grad_norm": 14.33592700958252,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": -0.8096274137496948,
|
|
"logits/rejected": -0.7928019762039185,
|
|
"loss": 0.6898,
|
|
"step": 35
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10338791459798813,
|
|
"beta_dpo/beta_used_raw": 0.10338791459798813,
|
|
"beta_dpo/gap_mean": 0.06975066661834717,
|
|
"beta_dpo/gap_std": 0.45846351981163025,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.12121212121212122,
|
|
"grad_norm": 11.904743194580078,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": -0.7240467667579651,
|
|
"logits/rejected": -0.6869294047355652,
|
|
"loss": 0.6868,
|
|
"step": 40
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.105168916285038,
|
|
"beta_dpo/beta_used_raw": 0.105168916285038,
|
|
"beta_dpo/gap_mean": 0.14308178424835205,
|
|
"beta_dpo/gap_std": 0.5644584894180298,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.13636363636363635,
|
|
"grad_norm": 13.17418098449707,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": -0.7734057307243347,
|
|
"logits/rejected": -0.7477155923843384,
|
|
"loss": 0.6818,
|
|
"step": 45
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10223841667175293,
|
|
"beta_dpo/beta_used_raw": 0.10223841667175293,
|
|
"beta_dpo/gap_mean": 0.21264997124671936,
|
|
"beta_dpo/gap_std": 0.7354207038879395,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 12.405279159545898,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": -0.7339795827865601,
|
|
"logits/rejected": -0.7022608518600464,
|
|
"loss": 0.6815,
|
|
"step": 50
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10513879358768463,
|
|
"beta_dpo/beta_used_raw": 0.10513879358768463,
|
|
"beta_dpo/gap_mean": 0.27966898679733276,
|
|
"beta_dpo/gap_std": 1.0065762996673584,
|
|
"beta_dpo/mask_keep_frac": 0.875,
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 13.70584774017334,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": -0.7537848949432373,
|
|
"logits/rejected": -0.7295504808425903,
|
|
"loss": 0.6752,
|
|
"step": 55
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10337547957897186,
|
|
"beta_dpo/beta_used_raw": 0.10337547957897186,
|
|
"beta_dpo/gap_mean": 0.3844713568687439,
|
|
"beta_dpo/gap_std": 1.2807694673538208,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.18181818181818182,
|
|
"grad_norm": 12.184106826782227,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": -0.7029341459274292,
|
|
"logits/rejected": -0.6750706434249878,
|
|
"loss": 0.6718,
|
|
"step": 60
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10123707354068756,
|
|
"beta_dpo/beta_used_raw": 0.10123707354068756,
|
|
"beta_dpo/gap_mean": 0.5187833309173584,
|
|
"beta_dpo/gap_std": 1.5582863092422485,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.19696969696969696,
|
|
"grad_norm": 12.474862098693848,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": -0.7182232737541199,
|
|
"logits/rejected": -0.6864453554153442,
|
|
"loss": 0.668,
|
|
"step": 65
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10362961143255234,
|
|
"beta_dpo/beta_used_raw": 0.10362961143255234,
|
|
"beta_dpo/gap_mean": 0.6425492763519287,
|
|
"beta_dpo/gap_std": 1.8649520874023438,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.21212121212121213,
|
|
"grad_norm": 13.411380767822266,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": -0.6498057842254639,
|
|
"logits/rejected": -0.6468607783317566,
|
|
"loss": 0.6611,
|
|
"step": 70
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10772015154361725,
|
|
"beta_dpo/beta_used_raw": 0.10772015154361725,
|
|
"beta_dpo/gap_mean": 0.7031647562980652,
|
|
"beta_dpo/gap_std": 2.167182683944702,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.22727272727272727,
|
|
"grad_norm": 12.674415588378906,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": -0.6153755187988281,
|
|
"logits/rejected": -0.606307327747345,
|
|
"loss": 0.653,
|
|
"step": 75
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10870923101902008,
|
|
"beta_dpo/beta_used_raw": 0.10870923101902008,
|
|
"beta_dpo/gap_mean": 0.8461316227912903,
|
|
"beta_dpo/gap_std": 2.5076112747192383,
|
|
"beta_dpo/mask_keep_frac": 0.8374999761581421,
|
|
"epoch": 0.24242424242424243,
|
|
"grad_norm": 13.425226211547852,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": -0.6497966647148132,
|
|
"logits/rejected": -0.6329380869865417,
|
|
"loss": 0.6466,
|
|
"step": 80
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1060580238699913,
|
|
"beta_dpo/beta_used_raw": 0.1060580238699913,
|
|
"beta_dpo/gap_mean": 0.9982147216796875,
|
|
"beta_dpo/gap_std": 2.806090831756592,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.25757575757575757,
|
|
"grad_norm": 9.75727653503418,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": -0.6012470722198486,
|
|
"logits/rejected": -0.5752061605453491,
|
|
"loss": 0.6435,
|
|
"step": 85
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11574982106685638,
|
|
"beta_dpo/beta_used_raw": 0.11574982106685638,
|
|
"beta_dpo/gap_mean": 1.2254174947738647,
|
|
"beta_dpo/gap_std": 3.2572083473205566,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.2727272727272727,
|
|
"grad_norm": 10.738388061523438,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": -0.650251567363739,
|
|
"logits/rejected": -0.6243180632591248,
|
|
"loss": 0.6219,
|
|
"step": 90
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09826114773750305,
|
|
"beta_dpo/beta_used_raw": 0.09826114773750305,
|
|
"beta_dpo/gap_mean": 1.4264709949493408,
|
|
"beta_dpo/gap_std": 3.7166686058044434,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.2878787878787879,
|
|
"grad_norm": 13.121673583984375,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": -0.5675602555274963,
|
|
"logits/rejected": -0.5547417402267456,
|
|
"loss": 0.6362,
|
|
"step": 95
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10674748569726944,
|
|
"beta_dpo/beta_used_raw": 0.10674748569726944,
|
|
"beta_dpo/gap_mean": 1.5260875225067139,
|
|
"beta_dpo/gap_std": 4.1418657302856445,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 15.6002197265625,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": -0.5712032914161682,
|
|
"logits/rejected": -0.5290790796279907,
|
|
"loss": 0.6231,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"eval_beta_dpo/beta_used": 0.11167524755001068,
|
|
"eval_beta_dpo/beta_used_raw": 0.11167524755001068,
|
|
"eval_beta_dpo/gap_mean": 1.9525233507156372,
|
|
"eval_beta_dpo/gap_std": 4.847992897033691,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": -0.5574179887771606,
|
|
"eval_logits/rejected": -0.540048360824585,
|
|
"eval_loss": 0.6185675263404846,
|
|
"eval_runtime": 18.8608,
|
|
"eval_samples_per_second": 122.105,
|
|
"eval_steps_per_second": 0.954,
|
|
"step": 100
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06386379897594452,
|
|
"beta_dpo/beta_used_raw": 0.06386379897594452,
|
|
"beta_dpo/gap_mean": 2.0449135303497314,
|
|
"beta_dpo/gap_std": 5.11466121673584,
|
|
"beta_dpo/mask_keep_frac": 0.887499988079071,
|
|
"epoch": 0.3181818181818182,
|
|
"grad_norm": 10.90100383758545,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": -0.5555615425109863,
|
|
"logits/rejected": -0.528151273727417,
|
|
"loss": 0.6534,
|
|
"step": 105
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08590348809957504,
|
|
"beta_dpo/beta_used_raw": 0.08590348809957504,
|
|
"beta_dpo/gap_mean": 2.1610352993011475,
|
|
"beta_dpo/gap_std": 5.504552364349365,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 7.672910690307617,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": -0.4595974385738373,
|
|
"logits/rejected": -0.45340991020202637,
|
|
"loss": 0.6317,
|
|
"step": 110
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10557971149682999,
|
|
"beta_dpo/beta_used_raw": 0.10557971149682999,
|
|
"beta_dpo/gap_mean": 2.390939474105835,
|
|
"beta_dpo/gap_std": 5.818662166595459,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.3484848484848485,
|
|
"grad_norm": 8.269521713256836,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": -0.5435389280319214,
|
|
"logits/rejected": -0.4987867474555969,
|
|
"loss": 0.5959,
|
|
"step": 115
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08998899161815643,
|
|
"beta_dpo/beta_used_raw": 0.08998899161815643,
|
|
"beta_dpo/gap_mean": 2.3944687843322754,
|
|
"beta_dpo/gap_std": 6.05053186416626,
|
|
"beta_dpo/mask_keep_frac": 0.8374999761581421,
|
|
"epoch": 0.36363636363636365,
|
|
"grad_norm": 13.379582405090332,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": -0.5789726972579956,
|
|
"logits/rejected": -0.5432100296020508,
|
|
"loss": 0.6198,
|
|
"step": 120
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08791515231132507,
|
|
"beta_dpo/beta_used_raw": 0.08791515231132507,
|
|
"beta_dpo/gap_mean": 2.5297319889068604,
|
|
"beta_dpo/gap_std": 6.210949897766113,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.3787878787878788,
|
|
"grad_norm": 7.562979221343994,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": -0.5596938729286194,
|
|
"logits/rejected": -0.5469728708267212,
|
|
"loss": 0.6146,
|
|
"step": 125
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11058609187602997,
|
|
"beta_dpo/beta_used_raw": 0.11058609187602997,
|
|
"beta_dpo/gap_mean": 2.536633014678955,
|
|
"beta_dpo/gap_std": 6.392093181610107,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.3939393939393939,
|
|
"grad_norm": 23.452016830444336,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": -0.5439124703407288,
|
|
"logits/rejected": -0.5279029607772827,
|
|
"loss": 0.5928,
|
|
"step": 130
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10615509748458862,
|
|
"beta_dpo/beta_used_raw": 0.10615509748458862,
|
|
"beta_dpo/gap_mean": 2.8626952171325684,
|
|
"beta_dpo/gap_std": 6.557906150817871,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.4090909090909091,
|
|
"grad_norm": 16.79780387878418,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": -0.556363582611084,
|
|
"logits/rejected": -0.5632845163345337,
|
|
"loss": 0.5811,
|
|
"step": 135
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1162651777267456,
|
|
"beta_dpo/beta_used_raw": 0.1162651777267456,
|
|
"beta_dpo/gap_mean": 3.088381290435791,
|
|
"beta_dpo/gap_std": 6.59566593170166,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.42424242424242425,
|
|
"grad_norm": 14.226531982421875,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": -0.5420447587966919,
|
|
"logits/rejected": -0.5301133990287781,
|
|
"loss": 0.5488,
|
|
"step": 140
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11434066295623779,
|
|
"beta_dpo/beta_used_raw": 0.11434066295623779,
|
|
"beta_dpo/gap_mean": 3.461772918701172,
|
|
"beta_dpo/gap_std": 6.666165828704834,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.4393939393939394,
|
|
"grad_norm": 13.191394805908203,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": -0.49957942962646484,
|
|
"logits/rejected": -0.4835745394229889,
|
|
"loss": 0.5499,
|
|
"step": 145
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.12056032568216324,
|
|
"beta_dpo/beta_used_raw": 0.12056032568216324,
|
|
"beta_dpo/gap_mean": 3.900587797164917,
|
|
"beta_dpo/gap_std": 6.922667026519775,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 10.217402458190918,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": -0.5163663625717163,
|
|
"logits/rejected": -0.4923931062221527,
|
|
"loss": 0.5155,
|
|
"step": 150
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08996663987636566,
|
|
"beta_dpo/beta_used_raw": 0.08996663987636566,
|
|
"beta_dpo/gap_mean": 4.022343635559082,
|
|
"beta_dpo/gap_std": 7.262037754058838,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.4696969696969697,
|
|
"grad_norm": 6.328583240509033,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": -0.47460970282554626,
|
|
"logits/rejected": -0.4646075665950775,
|
|
"loss": 0.5723,
|
|
"step": 155
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09257197380065918,
|
|
"beta_dpo/beta_used_raw": 0.09257197380065918,
|
|
"beta_dpo/gap_mean": 4.135162353515625,
|
|
"beta_dpo/gap_std": 7.709047794342041,
|
|
"beta_dpo/mask_keep_frac": 0.8374999761581421,
|
|
"epoch": 0.48484848484848486,
|
|
"grad_norm": 2.340575933456421,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": -0.549339234828949,
|
|
"logits/rejected": -0.5254893898963928,
|
|
"loss": 0.5706,
|
|
"step": 160
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1215561255812645,
|
|
"beta_dpo/beta_used_raw": 0.1215561255812645,
|
|
"beta_dpo/gap_mean": 4.385509490966797,
|
|
"beta_dpo/gap_std": 8.18330192565918,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.5,
|
|
"grad_norm": 27.537439346313477,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": -0.4928368926048279,
|
|
"logits/rejected": -0.46984148025512695,
|
|
"loss": 0.5273,
|
|
"step": 165
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08485610783100128,
|
|
"beta_dpo/beta_used_raw": 0.08485610783100128,
|
|
"beta_dpo/gap_mean": 4.619694709777832,
|
|
"beta_dpo/gap_std": 8.622313499450684,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.5151515151515151,
|
|
"grad_norm": 10.716350555419922,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": -0.47423356771469116,
|
|
"logits/rejected": -0.43696826696395874,
|
|
"loss": 0.5656,
|
|
"step": 170
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10904519259929657,
|
|
"beta_dpo/beta_used_raw": 0.10904519259929657,
|
|
"beta_dpo/gap_mean": 4.983495712280273,
|
|
"beta_dpo/gap_std": 9.088811874389648,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.5303030303030303,
|
|
"grad_norm": 16.4443416595459,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": -0.4739559590816498,
|
|
"logits/rejected": -0.452726274728775,
|
|
"loss": 0.5275,
|
|
"step": 175
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09932375699281693,
|
|
"beta_dpo/beta_used_raw": 0.09932375699281693,
|
|
"beta_dpo/gap_mean": 5.506978511810303,
|
|
"beta_dpo/gap_std": 9.59619426727295,
|
|
"beta_dpo/mask_keep_frac": 0.887499988079071,
|
|
"epoch": 0.5454545454545454,
|
|
"grad_norm": 6.31719446182251,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": -0.5071254968643188,
|
|
"logits/rejected": -0.4881424307823181,
|
|
"loss": 0.5367,
|
|
"step": 180
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08257903903722763,
|
|
"beta_dpo/beta_used_raw": 0.08257903903722763,
|
|
"beta_dpo/gap_mean": 5.807556629180908,
|
|
"beta_dpo/gap_std": 10.00381088256836,
|
|
"beta_dpo/mask_keep_frac": 0.9125000238418579,
|
|
"epoch": 0.5606060606060606,
|
|
"grad_norm": 16.983186721801758,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": -0.44962626695632935,
|
|
"logits/rejected": -0.4310552179813385,
|
|
"loss": 0.5442,
|
|
"step": 185
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1385645568370819,
|
|
"beta_dpo/beta_used_raw": 0.1385645568370819,
|
|
"beta_dpo/gap_mean": 5.958134651184082,
|
|
"beta_dpo/gap_std": 10.562962532043457,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.5757575757575758,
|
|
"grad_norm": 31.49508285522461,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": -0.4748384356498718,
|
|
"logits/rejected": -0.4529237151145935,
|
|
"loss": 0.4962,
|
|
"step": 190
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.10011277347803116,
|
|
"beta_dpo/beta_used_raw": 0.09850181639194489,
|
|
"beta_dpo/gap_mean": 6.100876808166504,
|
|
"beta_dpo/gap_std": 11.020359992980957,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.5909090909090909,
|
|
"grad_norm": 17.15842056274414,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": -0.4534582495689392,
|
|
"logits/rejected": -0.42914143204689026,
|
|
"loss": 0.5502,
|
|
"step": 195
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1180671900510788,
|
|
"beta_dpo/beta_used_raw": 0.1180671900510788,
|
|
"beta_dpo/gap_mean": 6.612210273742676,
|
|
"beta_dpo/gap_std": 11.322927474975586,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 13.65029239654541,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": -0.4936196208000183,
|
|
"logits/rejected": -0.4612639546394348,
|
|
"loss": 0.498,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"eval_beta_dpo/beta_used": 0.10561517626047134,
|
|
"eval_beta_dpo/beta_used_raw": 0.10561517626047134,
|
|
"eval_beta_dpo/gap_mean": 6.780107498168945,
|
|
"eval_beta_dpo/gap_std": 11.72070598602295,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": -0.4722588062286377,
|
|
"eval_logits/rejected": -0.45819586515426636,
|
|
"eval_loss": 0.5506138801574707,
|
|
"eval_runtime": 18.8213,
|
|
"eval_samples_per_second": 122.361,
|
|
"eval_steps_per_second": 0.956,
|
|
"step": 200
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08741272985935211,
|
|
"beta_dpo/beta_used_raw": 0.08735300600528717,
|
|
"beta_dpo/gap_mean": 7.251504421234131,
|
|
"beta_dpo/gap_std": 11.868724822998047,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.6212121212121212,
|
|
"grad_norm": 0.15343494713306427,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": -0.4946843981742859,
|
|
"logits/rejected": -0.46265077590942383,
|
|
"loss": 0.5233,
|
|
"step": 205
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08492619544267654,
|
|
"beta_dpo/beta_used_raw": 0.08492619544267654,
|
|
"beta_dpo/gap_mean": 7.168964385986328,
|
|
"beta_dpo/gap_std": 11.9141845703125,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.6363636363636364,
|
|
"grad_norm": 38.745361328125,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": -0.42799100279808044,
|
|
"logits/rejected": -0.4196823239326477,
|
|
"loss": 0.5237,
|
|
"step": 210
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08925200998783112,
|
|
"beta_dpo/beta_used_raw": 0.08484373241662979,
|
|
"beta_dpo/gap_mean": 7.09285831451416,
|
|
"beta_dpo/gap_std": 12.202669143676758,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.6515151515151515,
|
|
"grad_norm": 39.51192092895508,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": -0.43246760964393616,
|
|
"logits/rejected": -0.4298061430454254,
|
|
"loss": 0.5466,
|
|
"step": 215
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1373816877603531,
|
|
"beta_dpo/beta_used_raw": 0.1373816877603531,
|
|
"beta_dpo/gap_mean": 7.408307075500488,
|
|
"beta_dpo/gap_std": 12.6698579788208,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 66.92206573486328,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": -0.5049004554748535,
|
|
"logits/rejected": -0.4828864634037018,
|
|
"loss": 0.4731,
|
|
"step": 220
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11999156326055527,
|
|
"beta_dpo/beta_used_raw": 0.11999156326055527,
|
|
"beta_dpo/gap_mean": 7.8069658279418945,
|
|
"beta_dpo/gap_std": 12.916173934936523,
|
|
"beta_dpo/mask_keep_frac": 0.7124999761581421,
|
|
"epoch": 0.6818181818181818,
|
|
"grad_norm": 5.55664587020874,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": -0.4607675075531006,
|
|
"logits/rejected": -0.429083913564682,
|
|
"loss": 0.4933,
|
|
"step": 225
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11390962451696396,
|
|
"beta_dpo/beta_used_raw": 0.11390962451696396,
|
|
"beta_dpo/gap_mean": 7.83342981338501,
|
|
"beta_dpo/gap_std": 12.932693481445312,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.696969696969697,
|
|
"grad_norm": 32.68361282348633,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": -0.41661542654037476,
|
|
"logits/rejected": -0.4079780578613281,
|
|
"loss": 0.4954,
|
|
"step": 230
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09100167453289032,
|
|
"beta_dpo/beta_used_raw": 0.09100167453289032,
|
|
"beta_dpo/gap_mean": 8.167860984802246,
|
|
"beta_dpo/gap_std": 12.970059394836426,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 0.7121212121212122,
|
|
"grad_norm": 1.9182671308517456,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": -0.4386097490787506,
|
|
"logits/rejected": -0.42474693059921265,
|
|
"loss": 0.5136,
|
|
"step": 235
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11001662909984589,
|
|
"beta_dpo/beta_used_raw": 0.11001662909984589,
|
|
"beta_dpo/gap_mean": 8.317561149597168,
|
|
"beta_dpo/gap_std": 13.424278259277344,
|
|
"beta_dpo/mask_keep_frac": 0.800000011920929,
|
|
"epoch": 0.7272727272727273,
|
|
"grad_norm": 17.994626998901367,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": -0.4545617997646332,
|
|
"logits/rejected": -0.4394044280052185,
|
|
"loss": 0.4769,
|
|
"step": 240
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.07068195939064026,
|
|
"beta_dpo/beta_used_raw": 0.07068195939064026,
|
|
"beta_dpo/gap_mean": 8.271533966064453,
|
|
"beta_dpo/gap_std": 13.785310745239258,
|
|
"beta_dpo/mask_keep_frac": 0.824999988079071,
|
|
"epoch": 0.7424242424242424,
|
|
"grad_norm": 9.725923538208008,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": -0.45390695333480835,
|
|
"logits/rejected": -0.43619924783706665,
|
|
"loss": 0.5268,
|
|
"step": 245
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08607280999422073,
|
|
"beta_dpo/beta_used_raw": 0.08015486598014832,
|
|
"beta_dpo/gap_mean": 8.123547554016113,
|
|
"beta_dpo/gap_std": 14.15746021270752,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 19.712242126464844,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": -0.4595223069190979,
|
|
"logits/rejected": -0.4408304691314697,
|
|
"loss": 0.5287,
|
|
"step": 250
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0958368107676506,
|
|
"beta_dpo/beta_used_raw": 0.08722580969333649,
|
|
"beta_dpo/gap_mean": 8.267644882202148,
|
|
"beta_dpo/gap_std": 14.14880657196045,
|
|
"beta_dpo/mask_keep_frac": 0.8999999761581421,
|
|
"epoch": 0.7727272727272727,
|
|
"grad_norm": 61.9700927734375,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": -0.44903382658958435,
|
|
"logits/rejected": -0.4424815773963928,
|
|
"loss": 0.5257,
|
|
"step": 255
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.07386674731969833,
|
|
"beta_dpo/beta_used_raw": 0.06767500936985016,
|
|
"beta_dpo/gap_mean": 8.649662017822266,
|
|
"beta_dpo/gap_std": 14.375146865844727,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.7878787878787878,
|
|
"grad_norm": 20.901798248291016,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": -0.46160441637039185,
|
|
"logits/rejected": -0.44480133056640625,
|
|
"loss": 0.5284,
|
|
"step": 260
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08889990299940109,
|
|
"beta_dpo/beta_used_raw": 0.05368128418922424,
|
|
"beta_dpo/gap_mean": 8.253731727600098,
|
|
"beta_dpo/gap_std": 14.49620532989502,
|
|
"beta_dpo/mask_keep_frac": 0.75,
|
|
"epoch": 0.803030303030303,
|
|
"grad_norm": 36.13115692138672,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": -0.4071124196052551,
|
|
"logits/rejected": -0.38313764333724976,
|
|
"loss": 0.5524,
|
|
"step": 265
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05102431774139404,
|
|
"beta_dpo/beta_used_raw": 0.05102431774139404,
|
|
"beta_dpo/gap_mean": 8.481303215026855,
|
|
"beta_dpo/gap_std": 14.435537338256836,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.8181818181818182,
|
|
"grad_norm": 4.406769275665283,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": -0.43619123101234436,
|
|
"logits/rejected": -0.40814194083213806,
|
|
"loss": 0.5676,
|
|
"step": 270
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08905264735221863,
|
|
"beta_dpo/beta_used_raw": 0.08905264735221863,
|
|
"beta_dpo/gap_mean": 8.75959587097168,
|
|
"beta_dpo/gap_std": 14.441301345825195,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 13.085917472839355,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": -0.43446803092956543,
|
|
"logits/rejected": -0.4283529818058014,
|
|
"loss": 0.5225,
|
|
"step": 275
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.1104244738817215,
|
|
"beta_dpo/beta_used_raw": 0.1104244738817215,
|
|
"beta_dpo/gap_mean": 8.6881103515625,
|
|
"beta_dpo/gap_std": 14.51659870147705,
|
|
"beta_dpo/mask_keep_frac": 0.7875000238418579,
|
|
"epoch": 0.8484848484848485,
|
|
"grad_norm": 47.124366760253906,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": -0.4580152630805969,
|
|
"logits/rejected": -0.4392933249473572,
|
|
"loss": 0.4767,
|
|
"step": 280
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.14569848775863647,
|
|
"beta_dpo/beta_used_raw": 0.14569848775863647,
|
|
"beta_dpo/gap_mean": 9.179306030273438,
|
|
"beta_dpo/gap_std": 14.847735404968262,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.8636363636363636,
|
|
"grad_norm": 43.69351577758789,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": -0.42454952001571655,
|
|
"logits/rejected": -0.3965614438056946,
|
|
"loss": 0.4529,
|
|
"step": 285
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06421518325805664,
|
|
"beta_dpo/beta_used_raw": 0.056242913007736206,
|
|
"beta_dpo/gap_mean": 9.178163528442383,
|
|
"beta_dpo/gap_std": 14.94957160949707,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.8787878787878788,
|
|
"grad_norm": 19.567977905273438,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": -0.4124082624912262,
|
|
"logits/rejected": -0.38752835988998413,
|
|
"loss": 0.5666,
|
|
"step": 290
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.11043484508991241,
|
|
"beta_dpo/beta_used_raw": 0.11043484508991241,
|
|
"beta_dpo/gap_mean": 9.004778861999512,
|
|
"beta_dpo/gap_std": 15.063299179077148,
|
|
"beta_dpo/mask_keep_frac": 0.737500011920929,
|
|
"epoch": 0.8939393939393939,
|
|
"grad_norm": 45.88330841064453,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": -0.41249990463256836,
|
|
"logits/rejected": -0.41048282384872437,
|
|
"loss": 0.4783,
|
|
"step": 295
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.06652533262968063,
|
|
"beta_dpo/beta_used_raw": 0.05020095035433769,
|
|
"beta_dpo/gap_mean": 9.056544303894043,
|
|
"beta_dpo/gap_std": 15.056539535522461,
|
|
"beta_dpo/mask_keep_frac": 0.762499988079071,
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 0.25523823499679565,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": -0.4351003170013428,
|
|
"logits/rejected": -0.42235302925109863,
|
|
"loss": 0.5615,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"eval_beta_dpo/beta_used": 0.10696752369403839,
|
|
"eval_beta_dpo/beta_used_raw": 0.10696752369403839,
|
|
"eval_beta_dpo/gap_mean": 8.805192947387695,
|
|
"eval_beta_dpo/gap_std": 15.178271293640137,
|
|
"eval_beta_dpo/mask_keep_frac": 1.0,
|
|
"eval_logits/chosen": -0.4217662513256073,
|
|
"eval_logits/rejected": -0.4089266359806061,
|
|
"eval_loss": 0.5633069276809692,
|
|
"eval_runtime": 18.8692,
|
|
"eval_samples_per_second": 122.051,
|
|
"eval_steps_per_second": 0.954,
|
|
"step": 300
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.0679563358426094,
|
|
"beta_dpo/beta_used_raw": 0.06361763179302216,
|
|
"beta_dpo/gap_mean": 9.039968490600586,
|
|
"beta_dpo/gap_std": 15.006390571594238,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.9242424242424242,
|
|
"grad_norm": 26.64524269104004,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": -0.40837812423706055,
|
|
"logits/rejected": -0.3757531940937042,
|
|
"loss": 0.5354,
|
|
"step": 305
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09475517272949219,
|
|
"beta_dpo/beta_used_raw": 0.09475517272949219,
|
|
"beta_dpo/gap_mean": 9.129568099975586,
|
|
"beta_dpo/gap_std": 14.912490844726562,
|
|
"beta_dpo/mask_keep_frac": 0.8125,
|
|
"epoch": 0.9393939393939394,
|
|
"grad_norm": 17.02347755432129,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": -0.4192012846469879,
|
|
"logits/rejected": -0.4020632803440094,
|
|
"loss": 0.4862,
|
|
"step": 310
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.09896779805421829,
|
|
"beta_dpo/beta_used_raw": 0.09896779805421829,
|
|
"beta_dpo/gap_mean": 9.311323165893555,
|
|
"beta_dpo/gap_std": 14.838136672973633,
|
|
"beta_dpo/mask_keep_frac": 0.7749999761581421,
|
|
"epoch": 0.9545454545454546,
|
|
"grad_norm": 13.178363800048828,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": -0.41689127683639526,
|
|
"logits/rejected": -0.41213899850845337,
|
|
"loss": 0.5065,
|
|
"step": 315
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.05972599983215332,
|
|
"beta_dpo/beta_used_raw": 0.048868484795093536,
|
|
"beta_dpo/gap_mean": 9.482072830200195,
|
|
"beta_dpo/gap_std": 15.056081771850586,
|
|
"beta_dpo/mask_keep_frac": 0.8999999761581421,
|
|
"epoch": 0.9696969696969697,
|
|
"grad_norm": 16.041927337646484,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": -0.42210960388183594,
|
|
"logits/rejected": -0.38882067799568176,
|
|
"loss": 0.5702,
|
|
"step": 320
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.12381196022033691,
|
|
"beta_dpo/beta_used_raw": 0.12381196022033691,
|
|
"beta_dpo/gap_mean": 9.218812942504883,
|
|
"beta_dpo/gap_std": 15.04699993133545,
|
|
"beta_dpo/mask_keep_frac": 0.887499988079071,
|
|
"epoch": 0.9848484848484849,
|
|
"grad_norm": 30.680978775024414,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": -0.4276047348976135,
|
|
"logits/rejected": -0.4020787179470062,
|
|
"loss": 0.4571,
|
|
"step": 325
|
|
},
|
|
{
|
|
"beta_dpo/beta_used": 0.08325864374637604,
|
|
"beta_dpo/beta_used_raw": 0.07991620153188705,
|
|
"beta_dpo/gap_mean": 9.292040824890137,
|
|
"beta_dpo/gap_std": 15.013906478881836,
|
|
"beta_dpo/mask_keep_frac": 0.862500011920929,
|
|
"epoch": 1.0,
|
|
"grad_norm": 12.934744834899902,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": -0.45221251249313354,
|
|
"logits/rejected": -0.42801961302757263,
|
|
"loss": 0.5248,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 330,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.5772968926213005,
|
|
"train_runtime": 1407.4268,
|
|
"train_samples_per_second": 30.08,
|
|
"train_steps_per_second": 0.234
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 330,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|