2898 lines
97 KiB
JSON
2898 lines
97 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 2000,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0,
|
||
|
|
"learning_rate": 3.3333333333333334e-09,
|
||
|
|
"logits/chosen": -1.9057868719100952,
|
||
|
|
"logits/rejected": -1.3045780658721924,
|
||
|
|
"logps/chosen": -0.3248765170574188,
|
||
|
|
"logps/rejected": -209.01206970214844,
|
||
|
|
"loss": 0.6931,
|
||
|
|
"rewards/accuracies": 0.0,
|
||
|
|
"rewards/chosen": 0.0,
|
||
|
|
"rewards/margins": 0.0,
|
||
|
|
"rewards/rejected": 0.0,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01,
|
||
|
|
"learning_rate": 3.3333333333333334e-08,
|
||
|
|
"logits/chosen": -0.6534053087234497,
|
||
|
|
"logits/rejected": -0.6231400370597839,
|
||
|
|
"logps/chosen": -1.4383400678634644,
|
||
|
|
"logps/rejected": -9.363475799560547,
|
||
|
|
"loss": 0.6957,
|
||
|
|
"rewards/accuracies": 0.2222222238779068,
|
||
|
|
"rewards/chosen": 0.0005377347115427256,
|
||
|
|
"rewards/margins": -0.004949397407472134,
|
||
|
|
"rewards/rejected": 0.005487131420522928,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01,
|
||
|
|
"learning_rate": 6.666666666666667e-08,
|
||
|
|
"logits/chosen": -0.7646675705909729,
|
||
|
|
"logits/rejected": -0.6648472547531128,
|
||
|
|
"logps/chosen": -1.4389005899429321,
|
||
|
|
"logps/rejected": -24.582592010498047,
|
||
|
|
"loss": 0.6987,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.0026078899390995502,
|
||
|
|
"rewards/margins": -0.010636803694069386,
|
||
|
|
"rewards/rejected": 0.008028914220631123,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01,
|
||
|
|
"learning_rate": 1e-07,
|
||
|
|
"logits/chosen": -1.079425573348999,
|
||
|
|
"logits/rejected": -0.8333357572555542,
|
||
|
|
"logps/chosen": -1.2763736248016357,
|
||
|
|
"logps/rejected": -20.987844467163086,
|
||
|
|
"loss": 0.6927,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.004916798323392868,
|
||
|
|
"rewards/margins": 0.0008920803666114807,
|
||
|
|
"rewards/rejected": -0.005808879155665636,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02,
|
||
|
|
"learning_rate": 1.3333333333333334e-07,
|
||
|
|
"logits/chosen": -0.7393895983695984,
|
||
|
|
"logits/rejected": -0.7093493342399597,
|
||
|
|
"logps/chosen": -1.157975196838379,
|
||
|
|
"logps/rejected": -20.361637115478516,
|
||
|
|
"loss": 0.6921,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.006825856864452362,
|
||
|
|
"rewards/margins": 0.002141424920409918,
|
||
|
|
"rewards/rejected": -0.008967281319200993,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03,
|
||
|
|
"learning_rate": 1.6666666666666665e-07,
|
||
|
|
"logits/chosen": -0.7778738141059875,
|
||
|
|
"logits/rejected": -0.6960525512695312,
|
||
|
|
"logps/chosen": -1.0992166996002197,
|
||
|
|
"logps/rejected": -8.134437561035156,
|
||
|
|
"loss": 0.6907,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.007767821196466684,
|
||
|
|
"rewards/margins": 0.005042984150350094,
|
||
|
|
"rewards/rejected": -0.012810803949832916,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03,
|
||
|
|
"learning_rate": 2e-07,
|
||
|
|
"logits/chosen": -1.124731421470642,
|
||
|
|
"logits/rejected": -1.046034574508667,
|
||
|
|
"logps/chosen": -1.1876769065856934,
|
||
|
|
"logps/rejected": -18.648529052734375,
|
||
|
|
"loss": 0.6264,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.01165817491710186,
|
||
|
|
"rewards/margins": 0.294198215007782,
|
||
|
|
"rewards/rejected": -0.3058564066886902,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04,
|
||
|
|
"learning_rate": 2.3333333333333333e-07,
|
||
|
|
"logits/chosen": -0.9541412591934204,
|
||
|
|
"logits/rejected": -0.9094465374946594,
|
||
|
|
"logps/chosen": -1.2173497676849365,
|
||
|
|
"logps/rejected": -9.718514442443848,
|
||
|
|
"loss": 0.68,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.014510683715343475,
|
||
|
|
"rewards/margins": 0.02750353142619133,
|
||
|
|
"rewards/rejected": -0.042014218866825104,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04,
|
||
|
|
"learning_rate": 2.6666666666666667e-07,
|
||
|
|
"logits/chosen": -0.8979532122612,
|
||
|
|
"logits/rejected": -0.771776556968689,
|
||
|
|
"logps/chosen": -1.3160572052001953,
|
||
|
|
"logps/rejected": -39.639564514160156,
|
||
|
|
"loss": 0.6714,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.001878797309473157,
|
||
|
|
"rewards/margins": 0.04782485216856003,
|
||
|
|
"rewards/rejected": -0.04970364645123482,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04,
|
||
|
|
"learning_rate": 3e-07,
|
||
|
|
"logits/chosen": -0.8290653228759766,
|
||
|
|
"logits/rejected": -0.7559410333633423,
|
||
|
|
"logps/chosen": -1.5210120677947998,
|
||
|
|
"logps/rejected": -10.581136703491211,
|
||
|
|
"loss": 0.6876,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.004562065005302429,
|
||
|
|
"rewards/margins": 0.01130376011133194,
|
||
|
|
"rewards/rejected": -0.01586582511663437,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05,
|
||
|
|
"learning_rate": 3.333333333333333e-07,
|
||
|
|
"logits/chosen": -0.8234152793884277,
|
||
|
|
"logits/rejected": -0.597020149230957,
|
||
|
|
"logps/chosen": -1.6208550930023193,
|
||
|
|
"logps/rejected": -23.56104278564453,
|
||
|
|
"loss": 0.6735,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.006368436850607395,
|
||
|
|
"rewards/margins": 0.04360882192850113,
|
||
|
|
"rewards/rejected": -0.03724038228392601,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06,
|
||
|
|
"learning_rate": 3.666666666666666e-07,
|
||
|
|
"logits/chosen": -0.7968894243240356,
|
||
|
|
"logits/rejected": -0.6932646036148071,
|
||
|
|
"logps/chosen": -1.2827363014221191,
|
||
|
|
"logps/rejected": -8.872964859008789,
|
||
|
|
"loss": 0.6971,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.010955500416457653,
|
||
|
|
"rewards/margins": -0.007888413965702057,
|
||
|
|
"rewards/rejected": -0.003067085286602378,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06,
|
||
|
|
"learning_rate": 4e-07,
|
||
|
|
"logits/chosen": -0.7554408311843872,
|
||
|
|
"logits/rejected": -0.7240918278694153,
|
||
|
|
"logps/chosen": -2.111884355545044,
|
||
|
|
"logps/rejected": -13.839550971984863,
|
||
|
|
"loss": 0.6801,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.0014948326861485839,
|
||
|
|
"rewards/margins": 0.028023576363921165,
|
||
|
|
"rewards/rejected": -0.026528745889663696,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07,
|
||
|
|
"learning_rate": 4.3333333333333335e-07,
|
||
|
|
"logits/chosen": -0.785883903503418,
|
||
|
|
"logits/rejected": -0.7439943552017212,
|
||
|
|
"logps/chosen": -0.9020355343818665,
|
||
|
|
"logps/rejected": -6.26843786239624,
|
||
|
|
"loss": 0.6953,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.012480726465582848,
|
||
|
|
"rewards/margins": -0.00418872619047761,
|
||
|
|
"rewards/rejected": -0.00829199980944395,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07,
|
||
|
|
"learning_rate": 4.6666666666666666e-07,
|
||
|
|
"logits/chosen": -0.7403808832168579,
|
||
|
|
"logits/rejected": -0.7405279874801636,
|
||
|
|
"logps/chosen": -0.5109063386917114,
|
||
|
|
"logps/rejected": -0.5015324354171753,
|
||
|
|
"loss": 0.6947,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.009772956371307373,
|
||
|
|
"rewards/margins": -0.0031128614209592342,
|
||
|
|
"rewards/rejected": 0.012885818257927895,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07,
|
||
|
|
"learning_rate": 5e-07,
|
||
|
|
"logits/chosen": -0.9348798990249634,
|
||
|
|
"logits/rejected": -0.7525736689567566,
|
||
|
|
"logps/chosen": -1.8914287090301514,
|
||
|
|
"logps/rejected": -51.92854690551758,
|
||
|
|
"loss": 0.6319,
|
||
|
|
"rewards/accuracies": 0.8999999761581421,
|
||
|
|
"rewards/chosen": 0.06059448793530464,
|
||
|
|
"rewards/margins": 0.13546349108219147,
|
||
|
|
"rewards/rejected": -0.07486900687217712,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08,
|
||
|
|
"learning_rate": 4.972972972972973e-07,
|
||
|
|
"logits/chosen": -1.309621810913086,
|
||
|
|
"logits/rejected": -0.8352526426315308,
|
||
|
|
"logps/chosen": -1.6440341472625732,
|
||
|
|
"logps/rejected": -77.83134460449219,
|
||
|
|
"loss": 0.4626,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": 0.07928337156772614,
|
||
|
|
"rewards/margins": 0.8833033442497253,
|
||
|
|
"rewards/rejected": -0.8040200471878052,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09,
|
||
|
|
"learning_rate": 4.945945945945945e-07,
|
||
|
|
"logits/chosen": -0.8511091470718384,
|
||
|
|
"logits/rejected": -0.8510860204696655,
|
||
|
|
"logps/chosen": -1.1958461999893188,
|
||
|
|
"logps/rejected": -1.1916860342025757,
|
||
|
|
"loss": 0.6933,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": 0.06808782368898392,
|
||
|
|
"rewards/margins": -0.00024626366212032735,
|
||
|
|
"rewards/rejected": 0.06833408772945404,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09,
|
||
|
|
"learning_rate": 4.918918918918919e-07,
|
||
|
|
"logits/chosen": -0.5700015425682068,
|
||
|
|
"logits/rejected": -0.5157918930053711,
|
||
|
|
"logps/chosen": -0.31525278091430664,
|
||
|
|
"logps/rejected": -9.227466583251953,
|
||
|
|
"loss": 0.6653,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.042024269700050354,
|
||
|
|
"rewards/margins": 0.06360773742198944,
|
||
|
|
"rewards/rejected": -0.021583477035164833,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1,
|
||
|
|
"learning_rate": 4.891891891891891e-07,
|
||
|
|
"logits/chosen": -0.6965602040290833,
|
||
|
|
"logits/rejected": -0.639516294002533,
|
||
|
|
"logps/chosen": -0.856187641620636,
|
||
|
|
"logps/rejected": -11.494100570678711,
|
||
|
|
"loss": 0.6298,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.06196604296565056,
|
||
|
|
"rewards/margins": 0.2772708833217621,
|
||
|
|
"rewards/rejected": -0.21530480682849884,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1,
|
||
|
|
"learning_rate": 4.864864864864865e-07,
|
||
|
|
"logits/chosen": -0.6798317432403564,
|
||
|
|
"logits/rejected": -0.5887733101844788,
|
||
|
|
"logps/chosen": -0.7568815350532532,
|
||
|
|
"logps/rejected": -19.734209060668945,
|
||
|
|
"loss": 0.6161,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.07841428369283676,
|
||
|
|
"rewards/margins": 0.2028009593486786,
|
||
|
|
"rewards/rejected": -0.12438668310642242,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1,
|
||
|
|
"learning_rate": 4.837837837837838e-07,
|
||
|
|
"logits/chosen": -0.5506168007850647,
|
||
|
|
"logits/rejected": -0.5255534052848816,
|
||
|
|
"logps/chosen": -0.594875693321228,
|
||
|
|
"logps/rejected": -10.898618698120117,
|
||
|
|
"loss": 0.616,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": 0.08072157204151154,
|
||
|
|
"rewards/margins": 0.20680299401283264,
|
||
|
|
"rewards/rejected": -0.12608139216899872,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11,
|
||
|
|
"learning_rate": 4.810810810810811e-07,
|
||
|
|
"logits/chosen": -0.6420431137084961,
|
||
|
|
"logits/rejected": -0.4758230149745941,
|
||
|
|
"logps/chosen": -0.9624983072280884,
|
||
|
|
"logps/rejected": -15.536343574523926,
|
||
|
|
"loss": 0.6356,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.15020258724689484,
|
||
|
|
"rewards/margins": 0.19687768816947937,
|
||
|
|
"rewards/rejected": -0.04667510837316513,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12,
|
||
|
|
"learning_rate": 4.783783783783784e-07,
|
||
|
|
"logits/chosen": -0.47034144401550293,
|
||
|
|
"logits/rejected": -0.4152582287788391,
|
||
|
|
"logps/chosen": -0.6653767824172974,
|
||
|
|
"logps/rejected": -26.184194564819336,
|
||
|
|
"loss": 0.591,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": 0.08964937925338745,
|
||
|
|
"rewards/margins": 0.5100451707839966,
|
||
|
|
"rewards/rejected": -0.42039579153060913,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12,
|
||
|
|
"learning_rate": 4.7567567567567566e-07,
|
||
|
|
"logits/chosen": -0.5280557870864868,
|
||
|
|
"logits/rejected": -0.5208911299705505,
|
||
|
|
"logps/chosen": -1.561173915863037,
|
||
|
|
"logps/rejected": -4.852164268493652,
|
||
|
|
"loss": 0.6755,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.08790391683578491,
|
||
|
|
"rewards/margins": 0.03883642703294754,
|
||
|
|
"rewards/rejected": 0.04906748980283737,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12,
|
||
|
|
"learning_rate": 4.7297297297297294e-07,
|
||
|
|
"logits/chosen": -0.6650221347808838,
|
||
|
|
"logits/rejected": -0.4887104630470276,
|
||
|
|
"logps/chosen": -0.8397138714790344,
|
||
|
|
"logps/rejected": -24.225984573364258,
|
||
|
|
"loss": 0.581,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": 0.036707792431116104,
|
||
|
|
"rewards/margins": 0.3958829939365387,
|
||
|
|
"rewards/rejected": -0.3591752350330353,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13,
|
||
|
|
"learning_rate": 4.702702702702703e-07,
|
||
|
|
"logits/chosen": -0.6431881785392761,
|
||
|
|
"logits/rejected": -0.5451701879501343,
|
||
|
|
"logps/chosen": -0.8626500368118286,
|
||
|
|
"logps/rejected": -18.065229415893555,
|
||
|
|
"loss": 0.6263,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": 0.05334913730621338,
|
||
|
|
"rewards/margins": 0.4245104193687439,
|
||
|
|
"rewards/rejected": -0.3711613118648529,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14,
|
||
|
|
"learning_rate": 4.6756756756756757e-07,
|
||
|
|
"logits/chosen": -0.9750626683235168,
|
||
|
|
"logits/rejected": -0.6229163408279419,
|
||
|
|
"logps/chosen": -1.2636909484863281,
|
||
|
|
"logps/rejected": -39.48912811279297,
|
||
|
|
"loss": 0.5592,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.07849131524562836,
|
||
|
|
"rewards/margins": 0.8403557538986206,
|
||
|
|
"rewards/rejected": -0.7618645429611206,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14,
|
||
|
|
"learning_rate": 4.6486486486486485e-07,
|
||
|
|
"logits/chosen": -0.6725428700447083,
|
||
|
|
"logits/rejected": -0.5742911100387573,
|
||
|
|
"logps/chosen": -0.5612740516662598,
|
||
|
|
"logps/rejected": -11.10429573059082,
|
||
|
|
"loss": 0.6343,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.05452188849449158,
|
||
|
|
"rewards/margins": 0.2184288203716278,
|
||
|
|
"rewards/rejected": -0.16390694677829742,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14,
|
||
|
|
"learning_rate": 4.6216216216216214e-07,
|
||
|
|
"logits/chosen": -0.5694271922111511,
|
||
|
|
"logits/rejected": -0.5494506359100342,
|
||
|
|
"logps/chosen": -0.36877548694610596,
|
||
|
|
"logps/rejected": -2.7846176624298096,
|
||
|
|
"loss": 0.6649,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": 0.057922400534152985,
|
||
|
|
"rewards/margins": 0.06819172948598862,
|
||
|
|
"rewards/rejected": -0.010269328020513058,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15,
|
||
|
|
"learning_rate": 4.594594594594595e-07,
|
||
|
|
"logits/chosen": -0.43670493364334106,
|
||
|
|
"logits/rejected": -0.4366675019264221,
|
||
|
|
"logps/chosen": -0.8144774436950684,
|
||
|
|
"logps/rejected": -0.8205119967460632,
|
||
|
|
"loss": 0.6935,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.049146492034196854,
|
||
|
|
"rewards/margins": -0.0007481955690309405,
|
||
|
|
"rewards/rejected": 0.04989469051361084,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15,
|
||
|
|
"learning_rate": 4.567567567567567e-07,
|
||
|
|
"logits/chosen": -1.080672264099121,
|
||
|
|
"logits/rejected": -0.5540364980697632,
|
||
|
|
"logps/chosen": -1.4554121494293213,
|
||
|
|
"logps/rejected": -44.3469352722168,
|
||
|
|
"loss": 0.5069,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.10377122461795807,
|
||
|
|
"rewards/margins": 1.340423583984375,
|
||
|
|
"rewards/rejected": -1.2366522550582886,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16,
|
||
|
|
"learning_rate": 4.5405405405405405e-07,
|
||
|
|
"logits/chosen": -0.7922333478927612,
|
||
|
|
"logits/rejected": -0.4999925494194031,
|
||
|
|
"logps/chosen": -1.0405428409576416,
|
||
|
|
"logps/rejected": -32.01726531982422,
|
||
|
|
"loss": 0.5538,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.07706712186336517,
|
||
|
|
"rewards/margins": 1.2412185668945312,
|
||
|
|
"rewards/rejected": -1.1641514301300049,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17,
|
||
|
|
"learning_rate": 4.5135135135135134e-07,
|
||
|
|
"logits/chosen": -0.5424150228500366,
|
||
|
|
"logits/rejected": -0.5425628423690796,
|
||
|
|
"logps/chosen": -1.4353896379470825,
|
||
|
|
"logps/rejected": -1.433706521987915,
|
||
|
|
"loss": 0.6926,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.0135975182056427,
|
||
|
|
"rewards/margins": 0.0010272532235831022,
|
||
|
|
"rewards/rejected": 0.012570266611874104,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17,
|
||
|
|
"learning_rate": 4.486486486486487e-07,
|
||
|
|
"logits/chosen": -0.6280439496040344,
|
||
|
|
"logits/rejected": -0.43740400671958923,
|
||
|
|
"logps/chosen": -1.5716569423675537,
|
||
|
|
"logps/rejected": -42.92607116699219,
|
||
|
|
"loss": 0.5532,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.10536029189825058,
|
||
|
|
"rewards/margins": 1.679062843322754,
|
||
|
|
"rewards/rejected": -1.5737024545669556,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17,
|
||
|
|
"learning_rate": 4.459459459459459e-07,
|
||
|
|
"logits/chosen": -0.6212655305862427,
|
||
|
|
"logits/rejected": -0.5467087030410767,
|
||
|
|
"logps/chosen": -1.4269088506698608,
|
||
|
|
"logps/rejected": -15.58141040802002,
|
||
|
|
"loss": 0.5713,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.02300642430782318,
|
||
|
|
"rewards/margins": 0.6028021574020386,
|
||
|
|
"rewards/rejected": -0.5797957181930542,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18,
|
||
|
|
"learning_rate": 4.4324324324324325e-07,
|
||
|
|
"logits/chosen": -0.5765206217765808,
|
||
|
|
"logits/rejected": -0.5639607310295105,
|
||
|
|
"logps/chosen": -1.3736517429351807,
|
||
|
|
"logps/rejected": -7.530215263366699,
|
||
|
|
"loss": 0.6255,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.014159053564071655,
|
||
|
|
"rewards/margins": 0.3620375394821167,
|
||
|
|
"rewards/rejected": -0.37619656324386597,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18,
|
||
|
|
"learning_rate": 4.4054054054054053e-07,
|
||
|
|
"logits/chosen": -0.774685263633728,
|
||
|
|
"logits/rejected": -0.48308247327804565,
|
||
|
|
"logps/chosen": -0.7153536677360535,
|
||
|
|
"logps/rejected": -31.57724952697754,
|
||
|
|
"loss": 0.5592,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.09816861152648926,
|
||
|
|
"rewards/margins": 1.5147247314453125,
|
||
|
|
"rewards/rejected": -1.4165561199188232,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19,
|
||
|
|
"learning_rate": 4.378378378378378e-07,
|
||
|
|
"logits/chosen": -0.5976985692977905,
|
||
|
|
"logits/rejected": -0.4839654862880707,
|
||
|
|
"logps/chosen": -0.9786246418952942,
|
||
|
|
"logps/rejected": -32.14298629760742,
|
||
|
|
"loss": 0.4226,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.043240711092948914,
|
||
|
|
"rewards/margins": 2.0043439865112305,
|
||
|
|
"rewards/rejected": -2.0475847721099854,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2,
|
||
|
|
"learning_rate": 4.351351351351351e-07,
|
||
|
|
"logits/chosen": -0.40646108984947205,
|
||
|
|
"logits/rejected": -0.40619462728500366,
|
||
|
|
"logps/chosen": -1.4551324844360352,
|
||
|
|
"logps/rejected": -1.436767339706421,
|
||
|
|
"loss": 0.6951,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.09133030474185944,
|
||
|
|
"rewards/margins": -0.0038097582291811705,
|
||
|
|
"rewards/rejected": -0.087520532310009,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2,
|
||
|
|
"learning_rate": 4.3243243243243244e-07,
|
||
|
|
"logits/chosen": -0.8727982640266418,
|
||
|
|
"logits/rejected": -0.4593069553375244,
|
||
|
|
"logps/chosen": -1.0557596683502197,
|
||
|
|
"logps/rejected": -44.42220687866211,
|
||
|
|
"loss": 0.4219,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.11892731487751007,
|
||
|
|
"rewards/margins": 2.8284239768981934,
|
||
|
|
"rewards/rejected": -2.70949649810791,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2,
|
||
|
|
"learning_rate": 4.2972972972972973e-07,
|
||
|
|
"logits/chosen": -0.7421806454658508,
|
||
|
|
"logits/rejected": -0.5064016580581665,
|
||
|
|
"logps/chosen": -1.2260805368423462,
|
||
|
|
"logps/rejected": -38.49885940551758,
|
||
|
|
"loss": 0.5559,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.0014935005456209183,
|
||
|
|
"rewards/margins": 2.3735568523406982,
|
||
|
|
"rewards/rejected": -2.372063636779785,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21,
|
||
|
|
"learning_rate": 4.27027027027027e-07,
|
||
|
|
"logits/chosen": -0.5848358869552612,
|
||
|
|
"logits/rejected": -0.37570345401763916,
|
||
|
|
"logps/chosen": -0.7797711491584778,
|
||
|
|
"logps/rejected": -33.012638092041016,
|
||
|
|
"loss": 0.5546,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": 0.05262111499905586,
|
||
|
|
"rewards/margins": 1.6432182788848877,
|
||
|
|
"rewards/rejected": -1.5905970335006714,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21,
|
||
|
|
"learning_rate": 4.243243243243243e-07,
|
||
|
|
"logits/chosen": -0.5585634708404541,
|
||
|
|
"logits/rejected": -0.5583919286727905,
|
||
|
|
"logps/chosen": -0.8610560297966003,
|
||
|
|
"logps/rejected": -0.8612043261528015,
|
||
|
|
"loss": 0.6937,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.048935629427433014,
|
||
|
|
"rewards/margins": -0.0010301045840606093,
|
||
|
|
"rewards/rejected": 0.04996573179960251,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22,
|
||
|
|
"learning_rate": 4.2162162162162164e-07,
|
||
|
|
"logits/chosen": -0.4769681394100189,
|
||
|
|
"logits/rejected": -0.45251044631004333,
|
||
|
|
"logps/chosen": -1.607731819152832,
|
||
|
|
"logps/rejected": -8.222475051879883,
|
||
|
|
"loss": 0.6243,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.004343940410763025,
|
||
|
|
"rewards/margins": 0.5505796670913696,
|
||
|
|
"rewards/rejected": -0.5549236536026001,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23,
|
||
|
|
"learning_rate": 4.189189189189189e-07,
|
||
|
|
"logits/chosen": -0.4478166997432709,
|
||
|
|
"logits/rejected": -0.42751985788345337,
|
||
|
|
"logps/chosen": -1.0469465255737305,
|
||
|
|
"logps/rejected": -5.765268325805664,
|
||
|
|
"loss": 0.626,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.046685151755809784,
|
||
|
|
"rewards/margins": 0.36600273847579956,
|
||
|
|
"rewards/rejected": -0.4126877784729004,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23,
|
||
|
|
"learning_rate": 4.162162162162162e-07,
|
||
|
|
"logits/chosen": -0.5552384257316589,
|
||
|
|
"logits/rejected": -0.4869106709957123,
|
||
|
|
"logps/chosen": -0.8934662938117981,
|
||
|
|
"logps/rejected": -15.432965278625488,
|
||
|
|
"loss": 0.6253,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": 0.07785534113645554,
|
||
|
|
"rewards/margins": 0.6028213500976562,
|
||
|
|
"rewards/rejected": -0.5249660611152649,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23,
|
||
|
|
"learning_rate": 4.135135135135135e-07,
|
||
|
|
"logits/chosen": -0.636600911617279,
|
||
|
|
"logits/rejected": -0.3652159571647644,
|
||
|
|
"logps/chosen": -0.6702336668968201,
|
||
|
|
"logps/rejected": -65.19319915771484,
|
||
|
|
"loss": 0.4183,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.059565335512161255,
|
||
|
|
"rewards/margins": 4.7467474937438965,
|
||
|
|
"rewards/rejected": -4.6871819496154785,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24,
|
||
|
|
"learning_rate": 4.1081081081081084e-07,
|
||
|
|
"logits/chosen": -0.27527302503585815,
|
||
|
|
"logits/rejected": -0.2753751873970032,
|
||
|
|
"logps/chosen": -0.7251207232475281,
|
||
|
|
"logps/rejected": -0.7327331900596619,
|
||
|
|
"loss": 0.6924,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": 0.09307748824357986,
|
||
|
|
"rewards/margins": 0.0015917860437184572,
|
||
|
|
"rewards/rejected": 0.09148569405078888,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24,
|
||
|
|
"learning_rate": 4.0810810810810807e-07,
|
||
|
|
"logits/chosen": -0.2706758379936218,
|
||
|
|
"logits/rejected": -0.2708207964897156,
|
||
|
|
"logps/chosen": -0.31038787961006165,
|
||
|
|
"logps/rejected": -0.30583518743515015,
|
||
|
|
"loss": 0.6945,
|
||
|
|
"rewards/accuracies": 0.0,
|
||
|
|
"rewards/chosen": 0.030269015580415726,
|
||
|
|
"rewards/margins": -0.002708534011617303,
|
||
|
|
"rewards/rejected": 0.032977551221847534,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25,
|
||
|
|
"learning_rate": 4.054054054054054e-07,
|
||
|
|
"logits/chosen": -0.5894482135772705,
|
||
|
|
"logits/rejected": -0.48265695571899414,
|
||
|
|
"logps/chosen": -0.7016115188598633,
|
||
|
|
"logps/rejected": -17.208364486694336,
|
||
|
|
"loss": 0.5006,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.13988427817821503,
|
||
|
|
"rewards/margins": 0.8726360201835632,
|
||
|
|
"rewards/rejected": -0.732751727104187,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25,
|
||
|
|
"eval_logits/chosen": -0.48661941289901733,
|
||
|
|
"eval_logits/rejected": -0.3604813516139984,
|
||
|
|
"eval_logps/chosen": -1.2089859247207642,
|
||
|
|
"eval_logps/rejected": -21.482067108154297,
|
||
|
|
"eval_loss": 0.5730764269828796,
|
||
|
|
"eval_rewards/accuracies": 0.46000000834465027,
|
||
|
|
"eval_rewards/chosen": 0.09127917140722275,
|
||
|
|
"eval_rewards/margins": 1.2398022413253784,
|
||
|
|
"eval_rewards/rejected": -1.148523211479187,
|
||
|
|
"eval_runtime": 26.6494,
|
||
|
|
"eval_samples_per_second": 3.752,
|
||
|
|
"eval_steps_per_second": 3.752,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26,
|
||
|
|
"learning_rate": 4.027027027027027e-07,
|
||
|
|
"logits/chosen": -0.5123504400253296,
|
||
|
|
"logits/rejected": -0.37626415491104126,
|
||
|
|
"logps/chosen": -0.7230352163314819,
|
||
|
|
"logps/rejected": -32.59217071533203,
|
||
|
|
"loss": 0.5533,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.11992353200912476,
|
||
|
|
"rewards/margins": 2.84232497215271,
|
||
|
|
"rewards/rejected": -2.7224011421203613,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26,
|
||
|
|
"learning_rate": 4e-07,
|
||
|
|
"logits/chosen": -0.5709089636802673,
|
||
|
|
"logits/rejected": -0.4344344735145569,
|
||
|
|
"logps/chosen": -1.6505239009857178,
|
||
|
|
"logps/rejected": -29.4504337310791,
|
||
|
|
"loss": 0.5534,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.012776079587638378,
|
||
|
|
"rewards/margins": 2.109302043914795,
|
||
|
|
"rewards/rejected": -2.1220779418945312,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27,
|
||
|
|
"learning_rate": 3.9729729729729727e-07,
|
||
|
|
"logits/chosen": -0.7366935014724731,
|
||
|
|
"logits/rejected": -0.2669216990470886,
|
||
|
|
"logps/chosen": -1.2830209732055664,
|
||
|
|
"logps/rejected": -63.51800537109375,
|
||
|
|
"loss": 0.4195,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": 0.121388278901577,
|
||
|
|
"rewards/margins": 2.739515542984009,
|
||
|
|
"rewards/rejected": -2.6181273460388184,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27,
|
||
|
|
"learning_rate": 3.945945945945946e-07,
|
||
|
|
"logits/chosen": -0.43380504846572876,
|
||
|
|
"logits/rejected": -0.3422037661075592,
|
||
|
|
"logps/chosen": -1.0721886157989502,
|
||
|
|
"logps/rejected": -18.663774490356445,
|
||
|
|
"loss": 0.5626,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.05732693150639534,
|
||
|
|
"rewards/margins": 1.6029354333877563,
|
||
|
|
"rewards/rejected": -1.5456085205078125,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28,
|
||
|
|
"learning_rate": 3.918918918918919e-07,
|
||
|
|
"logits/chosen": -0.5525709986686707,
|
||
|
|
"logits/rejected": -0.41818103194236755,
|
||
|
|
"logps/chosen": -1.293927788734436,
|
||
|
|
"logps/rejected": -8.750811576843262,
|
||
|
|
"loss": 0.5738,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.009553834795951843,
|
||
|
|
"rewards/margins": 0.4876217246055603,
|
||
|
|
"rewards/rejected": -0.4971756041049957,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28,
|
||
|
|
"learning_rate": 3.891891891891892e-07,
|
||
|
|
"logits/chosen": -0.27140697836875916,
|
||
|
|
"logits/rejected": -0.27138635516166687,
|
||
|
|
"logps/chosen": -1.5451117753982544,
|
||
|
|
"logps/rejected": -1.539139986038208,
|
||
|
|
"loss": 0.6938,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.05115029215812683,
|
||
|
|
"rewards/margins": -0.0012699353974312544,
|
||
|
|
"rewards/rejected": -0.04988035187125206,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28,
|
||
|
|
"learning_rate": 3.8648648648648646e-07,
|
||
|
|
"logits/chosen": -0.3724200129508972,
|
||
|
|
"logits/rejected": -0.37271934747695923,
|
||
|
|
"logps/chosen": -1.388579249382019,
|
||
|
|
"logps/rejected": -1.4015172719955444,
|
||
|
|
"loss": 0.6919,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.030899781733751297,
|
||
|
|
"rewards/margins": 0.0024078444112092257,
|
||
|
|
"rewards/rejected": -0.033307623118162155,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29,
|
||
|
|
"learning_rate": 3.837837837837838e-07,
|
||
|
|
"logits/chosen": -0.48146286606788635,
|
||
|
|
"logits/rejected": -0.3344786763191223,
|
||
|
|
"logps/chosen": -1.6303869485855103,
|
||
|
|
"logps/rejected": -16.107934951782227,
|
||
|
|
"loss": 0.604,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.04100670665502548,
|
||
|
|
"rewards/margins": 1.0195186138153076,
|
||
|
|
"rewards/rejected": -1.0605252981185913,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29,
|
||
|
|
"learning_rate": 3.8108108108108104e-07,
|
||
|
|
"logits/chosen": -0.7203977108001709,
|
||
|
|
"logits/rejected": -0.3984030783176422,
|
||
|
|
"logps/chosen": -1.4085242748260498,
|
||
|
|
"logps/rejected": -33.98643493652344,
|
||
|
|
"loss": 0.5603,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.022265303879976273,
|
||
|
|
"rewards/margins": 2.3924202919006348,
|
||
|
|
"rewards/rejected": -2.4146857261657715,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3,
|
||
|
|
"learning_rate": 3.783783783783784e-07,
|
||
|
|
"logits/chosen": -0.58781498670578,
|
||
|
|
"logits/rejected": -0.2599487900733948,
|
||
|
|
"logps/chosen": -0.7738112211227417,
|
||
|
|
"logps/rejected": -37.13945770263672,
|
||
|
|
"loss": 0.4929,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.011140326038002968,
|
||
|
|
"rewards/margins": 1.930899977684021,
|
||
|
|
"rewards/rejected": -1.919759750366211,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3,
|
||
|
|
"learning_rate": 3.7567567567567566e-07,
|
||
|
|
"logits/chosen": -0.17637184262275696,
|
||
|
|
"logits/rejected": -0.17617428302764893,
|
||
|
|
"logps/chosen": -1.59823477268219,
|
||
|
|
"logps/rejected": -1.589928150177002,
|
||
|
|
"loss": 0.6933,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.006097626406699419,
|
||
|
|
"rewards/margins": -0.0003702353569678962,
|
||
|
|
"rewards/rejected": 0.0064678615890443325,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31,
|
||
|
|
"learning_rate": 3.72972972972973e-07,
|
||
|
|
"logits/chosen": -0.2997194230556488,
|
||
|
|
"logits/rejected": -0.22588582336902618,
|
||
|
|
"logps/chosen": -1.5014773607254028,
|
||
|
|
"logps/rejected": -19.09648323059082,
|
||
|
|
"loss": 0.6233,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.14116178452968597,
|
||
|
|
"rewards/margins": 1.9240531921386719,
|
||
|
|
"rewards/rejected": -2.0652148723602295,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32,
|
||
|
|
"learning_rate": 3.7027027027027023e-07,
|
||
|
|
"logits/chosen": -0.48020777106285095,
|
||
|
|
"logits/rejected": -0.2105187177658081,
|
||
|
|
"logps/chosen": -1.2748075723648071,
|
||
|
|
"logps/rejected": -30.414684295654297,
|
||
|
|
"loss": 0.6246,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.05854244902729988,
|
||
|
|
"rewards/margins": 1.8600146770477295,
|
||
|
|
"rewards/rejected": -1.9185569286346436,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32,
|
||
|
|
"learning_rate": 3.6756756756756757e-07,
|
||
|
|
"logits/chosen": -0.32584524154663086,
|
||
|
|
"logits/rejected": -0.325616717338562,
|
||
|
|
"logps/chosen": -1.2563741207122803,
|
||
|
|
"logps/rejected": -1.2441630363464355,
|
||
|
|
"loss": 0.6948,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.052650950849056244,
|
||
|
|
"rewards/margins": -0.003209482878446579,
|
||
|
|
"rewards/rejected": -0.04944147169589996,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33,
|
||
|
|
"learning_rate": 3.6486486486486486e-07,
|
||
|
|
"logits/chosen": -0.5669493675231934,
|
||
|
|
"logits/rejected": -0.4724608063697815,
|
||
|
|
"logps/chosen": -1.0622951984405518,
|
||
|
|
"logps/rejected": -32.84709930419922,
|
||
|
|
"loss": 0.5534,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": 0.011947070248425007,
|
||
|
|
"rewards/margins": 2.908785581588745,
|
||
|
|
"rewards/rejected": -2.896839141845703,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33,
|
||
|
|
"learning_rate": 3.6216216216216214e-07,
|
||
|
|
"logits/chosen": -0.771874725818634,
|
||
|
|
"logits/rejected": -0.4653758108615875,
|
||
|
|
"logps/chosen": -2.2719671726226807,
|
||
|
|
"logps/rejected": -45.466407775878906,
|
||
|
|
"loss": 0.5563,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.012174086645245552,
|
||
|
|
"rewards/margins": 2.240072727203369,
|
||
|
|
"rewards/rejected": -2.252246856689453,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34,
|
||
|
|
"learning_rate": 3.5945945945945943e-07,
|
||
|
|
"logits/chosen": -0.5078957080841064,
|
||
|
|
"logits/rejected": -0.30749645829200745,
|
||
|
|
"logps/chosen": -1.5645476579666138,
|
||
|
|
"logps/rejected": -26.516870498657227,
|
||
|
|
"loss": 0.5576,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.032517604529857635,
|
||
|
|
"rewards/margins": 2.3935961723327637,
|
||
|
|
"rewards/rejected": -2.426114082336426,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34,
|
||
|
|
"learning_rate": 3.5675675675675677e-07,
|
||
|
|
"logits/chosen": -0.4348447918891907,
|
||
|
|
"logits/rejected": -0.30584046244621277,
|
||
|
|
"logps/chosen": -1.2801238298416138,
|
||
|
|
"logps/rejected": -25.644485473632812,
|
||
|
|
"loss": 0.5972,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.03776795417070389,
|
||
|
|
"rewards/margins": 1.9652618169784546,
|
||
|
|
"rewards/rejected": -2.0030298233032227,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34,
|
||
|
|
"learning_rate": 3.5405405405405406e-07,
|
||
|
|
"logits/chosen": -0.28920015692710876,
|
||
|
|
"logits/rejected": -0.28904658555984497,
|
||
|
|
"logps/chosen": -1.448441743850708,
|
||
|
|
"logps/rejected": -1.4275623559951782,
|
||
|
|
"loss": 0.6958,
|
||
|
|
"rewards/accuracies": 0.10000000149011612,
|
||
|
|
"rewards/chosen": -0.08941696584224701,
|
||
|
|
"rewards/margins": -0.005217382218688726,
|
||
|
|
"rewards/rejected": -0.08419958502054214,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35,
|
||
|
|
"learning_rate": 3.5135135135135134e-07,
|
||
|
|
"logits/chosen": -0.182106152176857,
|
||
|
|
"logits/rejected": -0.18192948400974274,
|
||
|
|
"logps/chosen": -1.501230239868164,
|
||
|
|
"logps/rejected": -1.5124905109405518,
|
||
|
|
"loss": 0.6925,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.03814355283975601,
|
||
|
|
"rewards/margins": 0.0013828824739903212,
|
||
|
|
"rewards/rejected": -0.03952643647789955,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35,
|
||
|
|
"learning_rate": 3.4864864864864863e-07,
|
||
|
|
"logits/chosen": -0.669059157371521,
|
||
|
|
"logits/rejected": -0.36879947781562805,
|
||
|
|
"logps/chosen": -1.125857949256897,
|
||
|
|
"logps/rejected": -40.65812301635742,
|
||
|
|
"loss": 0.5525,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": 0.04682355001568794,
|
||
|
|
"rewards/margins": 2.6082589626312256,
|
||
|
|
"rewards/rejected": -2.5614356994628906,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36,
|
||
|
|
"learning_rate": 3.4594594594594597e-07,
|
||
|
|
"logits/chosen": -0.3450705409049988,
|
||
|
|
"logits/rejected": -0.3405342400074005,
|
||
|
|
"logps/chosen": -1.4364194869995117,
|
||
|
|
"logps/rejected": -5.2330427169799805,
|
||
|
|
"loss": 0.6319,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.12864340841770172,
|
||
|
|
"rewards/margins": 0.24519333243370056,
|
||
|
|
"rewards/rejected": -0.3738367259502411,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36,
|
||
|
|
"learning_rate": 3.432432432432432e-07,
|
||
|
|
"logits/chosen": -0.19730427861213684,
|
||
|
|
"logits/rejected": -0.19721275568008423,
|
||
|
|
"logps/chosen": -1.455517053604126,
|
||
|
|
"logps/rejected": -1.468201756477356,
|
||
|
|
"loss": 0.6921,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.07263887673616409,
|
||
|
|
"rewards/margins": 0.0021636115852743387,
|
||
|
|
"rewards/rejected": -0.07480248063802719,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37,
|
||
|
|
"learning_rate": 3.4054054054054054e-07,
|
||
|
|
"logits/chosen": -0.6894451975822449,
|
||
|
|
"logits/rejected": -0.3585297465324402,
|
||
|
|
"logps/chosen": -2.602849245071411,
|
||
|
|
"logps/rejected": -59.2165641784668,
|
||
|
|
"loss": 0.4853,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.09771852195262909,
|
||
|
|
"rewards/margins": 3.6492016315460205,
|
||
|
|
"rewards/rejected": -3.746920347213745,
|
||
|
|
"step": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38,
|
||
|
|
"learning_rate": 3.378378378378378e-07,
|
||
|
|
"logits/chosen": -0.48982128500938416,
|
||
|
|
"logits/rejected": -0.19518586993217468,
|
||
|
|
"logps/chosen": -1.4529268741607666,
|
||
|
|
"logps/rejected": -40.79501724243164,
|
||
|
|
"loss": 0.6238,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.05402202159166336,
|
||
|
|
"rewards/margins": 2.7485110759735107,
|
||
|
|
"rewards/rejected": -2.802532911300659,
|
||
|
|
"step": 750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38,
|
||
|
|
"learning_rate": 3.3513513513513516e-07,
|
||
|
|
"logits/chosen": -0.3830093741416931,
|
||
|
|
"logits/rejected": -0.38310927152633667,
|
||
|
|
"logps/chosen": -1.994246482849121,
|
||
|
|
"logps/rejected": -1.9973366260528564,
|
||
|
|
"loss": 0.6927,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.09356460720300674,
|
||
|
|
"rewards/margins": 0.0009627247345633805,
|
||
|
|
"rewards/rejected": -0.09452733397483826,
|
||
|
|
"step": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39,
|
||
|
|
"learning_rate": 3.324324324324324e-07,
|
||
|
|
"logits/chosen": -0.5786948204040527,
|
||
|
|
"logits/rejected": -0.29584842920303345,
|
||
|
|
"logps/chosen": -1.9216482639312744,
|
||
|
|
"logps/rejected": -53.21647262573242,
|
||
|
|
"loss": 0.5528,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.14687001705169678,
|
||
|
|
"rewards/margins": 3.7029500007629395,
|
||
|
|
"rewards/rejected": -3.8498198986053467,
|
||
|
|
"step": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39,
|
||
|
|
"learning_rate": 3.2972972972972973e-07,
|
||
|
|
"logits/chosen": -0.44832101464271545,
|
||
|
|
"logits/rejected": -0.3366854190826416,
|
||
|
|
"logps/chosen": -1.6166212558746338,
|
||
|
|
"logps/rejected": -20.03157615661621,
|
||
|
|
"loss": 0.5547,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.16137336194515228,
|
||
|
|
"rewards/margins": 1.7396749258041382,
|
||
|
|
"rewards/rejected": -1.9010480642318726,
|
||
|
|
"step": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4,
|
||
|
|
"learning_rate": 3.27027027027027e-07,
|
||
|
|
"logits/chosen": -0.5321040153503418,
|
||
|
|
"logits/rejected": -0.2713443636894226,
|
||
|
|
"logps/chosen": -2.4121830463409424,
|
||
|
|
"logps/rejected": -41.183345794677734,
|
||
|
|
"loss": 0.518,
|
||
|
|
"rewards/accuracies": 0.800000011920929,
|
||
|
|
"rewards/chosen": -0.1596633493900299,
|
||
|
|
"rewards/margins": 2.9636120796203613,
|
||
|
|
"rewards/rejected": -3.1232752799987793,
|
||
|
|
"step": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4,
|
||
|
|
"learning_rate": 3.243243243243243e-07,
|
||
|
|
"logits/chosen": -0.23825044929981232,
|
||
|
|
"logits/rejected": -0.238026425242424,
|
||
|
|
"logps/chosen": -1.5677788257598877,
|
||
|
|
"logps/rejected": -1.5716334581375122,
|
||
|
|
"loss": 0.6941,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.07762937247753143,
|
||
|
|
"rewards/margins": -0.0019604426342993975,
|
||
|
|
"rewards/rejected": -0.07566893845796585,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41,
|
||
|
|
"learning_rate": 3.216216216216216e-07,
|
||
|
|
"logits/chosen": -0.22825762629508972,
|
||
|
|
"logits/rejected": -0.2077532261610031,
|
||
|
|
"logps/chosen": -2.0755527019500732,
|
||
|
|
"logps/rejected": -17.001359939575195,
|
||
|
|
"loss": 0.6234,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.23694956302642822,
|
||
|
|
"rewards/margins": 1.708189606666565,
|
||
|
|
"rewards/rejected": -1.9451383352279663,
|
||
|
|
"step": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41,
|
||
|
|
"learning_rate": 3.1891891891891893e-07,
|
||
|
|
"logits/chosen": -0.12132171541452408,
|
||
|
|
"logits/rejected": -0.12116007506847382,
|
||
|
|
"logps/chosen": -2.348914623260498,
|
||
|
|
"logps/rejected": -2.347259044647217,
|
||
|
|
"loss": 0.6945,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.19036361575126648,
|
||
|
|
"rewards/margins": -0.0027313902974128723,
|
||
|
|
"rewards/rejected": -0.1876322329044342,
|
||
|
|
"step": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41,
|
||
|
|
"learning_rate": 3.162162162162162e-07,
|
||
|
|
"logits/chosen": -0.26363319158554077,
|
||
|
|
"logits/rejected": -0.16369813680648804,
|
||
|
|
"logps/chosen": -1.8039945363998413,
|
||
|
|
"logps/rejected": -29.438343048095703,
|
||
|
|
"loss": 0.5547,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.18878988921642303,
|
||
|
|
"rewards/margins": 2.6108524799346924,
|
||
|
|
"rewards/rejected": -2.799642562866211,
|
||
|
|
"step": 830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42,
|
||
|
|
"learning_rate": 3.135135135135135e-07,
|
||
|
|
"logits/chosen": -0.3847021460533142,
|
||
|
|
"logits/rejected": -0.19058382511138916,
|
||
|
|
"logps/chosen": -1.9469951391220093,
|
||
|
|
"logps/rejected": -23.151546478271484,
|
||
|
|
"loss": 0.5567,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.12975777685642242,
|
||
|
|
"rewards/margins": 1.3818089962005615,
|
||
|
|
"rewards/rejected": -1.5115668773651123,
|
||
|
|
"step": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42,
|
||
|
|
"learning_rate": 3.108108108108108e-07,
|
||
|
|
"logits/chosen": -0.5285392999649048,
|
||
|
|
"logits/rejected": -0.3884163200855255,
|
||
|
|
"logps/chosen": -2.879500150680542,
|
||
|
|
"logps/rejected": -37.99626541137695,
|
||
|
|
"loss": 0.5552,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.22818878293037415,
|
||
|
|
"rewards/margins": 2.465597629547119,
|
||
|
|
"rewards/rejected": -2.693786144256592,
|
||
|
|
"step": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43,
|
||
|
|
"learning_rate": 3.0810810810810813e-07,
|
||
|
|
"logits/chosen": -0.4468708038330078,
|
||
|
|
"logits/rejected": -0.16887584328651428,
|
||
|
|
"logps/chosen": -2.016929864883423,
|
||
|
|
"logps/rejected": -29.296972274780273,
|
||
|
|
"loss": 0.5575,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.18932850658893585,
|
||
|
|
"rewards/margins": 1.512830138206482,
|
||
|
|
"rewards/rejected": -1.7021586894989014,
|
||
|
|
"step": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43,
|
||
|
|
"learning_rate": 3.0540540540540536e-07,
|
||
|
|
"logits/chosen": -0.6263138055801392,
|
||
|
|
"logits/rejected": -0.1559181660413742,
|
||
|
|
"logps/chosen": -1.9883623123168945,
|
||
|
|
"logps/rejected": -97.72358703613281,
|
||
|
|
"loss": 0.3479,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.06004463508725166,
|
||
|
|
"rewards/margins": 7.97985315322876,
|
||
|
|
"rewards/rejected": -8.039897918701172,
|
||
|
|
"step": 870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44,
|
||
|
|
"learning_rate": 3.027027027027027e-07,
|
||
|
|
"logits/chosen": -0.3427307605743408,
|
||
|
|
"logits/rejected": -0.11785911023616791,
|
||
|
|
"logps/chosen": -2.4055051803588867,
|
||
|
|
"logps/rejected": -50.41362380981445,
|
||
|
|
"loss": 0.4854,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.08261863887310028,
|
||
|
|
"rewards/margins": 5.198975563049316,
|
||
|
|
"rewards/rejected": -5.281594276428223,
|
||
|
|
"step": 880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45,
|
||
|
|
"learning_rate": 3e-07,
|
||
|
|
"logits/chosen": -0.3482494354248047,
|
||
|
|
"logits/rejected": -0.2711491584777832,
|
||
|
|
"logps/chosen": -2.2725043296813965,
|
||
|
|
"logps/rejected": -20.280826568603516,
|
||
|
|
"loss": 0.6235,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.32118305563926697,
|
||
|
|
"rewards/margins": 1.9148486852645874,
|
||
|
|
"rewards/rejected": -2.236032009124756,
|
||
|
|
"step": 890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45,
|
||
|
|
"learning_rate": 2.972972972972973e-07,
|
||
|
|
"logits/chosen": -0.4881555438041687,
|
||
|
|
"logits/rejected": -0.4544796049594879,
|
||
|
|
"logps/chosen": -2.1451969146728516,
|
||
|
|
"logps/rejected": -12.899765014648438,
|
||
|
|
"loss": 0.6245,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.20810385048389435,
|
||
|
|
"rewards/margins": 0.9362422227859497,
|
||
|
|
"rewards/rejected": -1.144345998764038,
|
||
|
|
"step": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46,
|
||
|
|
"learning_rate": 2.9459459459459456e-07,
|
||
|
|
"logits/chosen": -0.5529161095619202,
|
||
|
|
"logits/rejected": -0.01812545582652092,
|
||
|
|
"logps/chosen": -2.3933329582214355,
|
||
|
|
"logps/rejected": -77.99223327636719,
|
||
|
|
"loss": 0.4831,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.13273164629936218,
|
||
|
|
"rewards/margins": 5.853229999542236,
|
||
|
|
"rewards/rejected": -5.985960960388184,
|
||
|
|
"step": 910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46,
|
||
|
|
"learning_rate": 2.918918918918919e-07,
|
||
|
|
"logits/chosen": -0.6067525148391724,
|
||
|
|
"logits/rejected": -0.22178903222084045,
|
||
|
|
"logps/chosen": -2.893634080886841,
|
||
|
|
"logps/rejected": -89.12733459472656,
|
||
|
|
"loss": 0.4156,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.3031526207923889,
|
||
|
|
"rewards/margins": 6.654090881347656,
|
||
|
|
"rewards/rejected": -6.9572434425354,
|
||
|
|
"step": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47,
|
||
|
|
"learning_rate": 2.891891891891892e-07,
|
||
|
|
"logits/chosen": -0.3381947875022888,
|
||
|
|
"logits/rejected": -0.2047184258699417,
|
||
|
|
"logps/chosen": -2.5098400115966797,
|
||
|
|
"logps/rejected": -36.30571365356445,
|
||
|
|
"loss": 0.4865,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.22798296809196472,
|
||
|
|
"rewards/margins": 2.872692108154297,
|
||
|
|
"rewards/rejected": -3.100675106048584,
|
||
|
|
"step": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47,
|
||
|
|
"learning_rate": 2.8648648648648647e-07,
|
||
|
|
"logits/chosen": -0.4152565002441406,
|
||
|
|
"logits/rejected": -0.24672865867614746,
|
||
|
|
"logps/chosen": -2.5835165977478027,
|
||
|
|
"logps/rejected": -41.009944915771484,
|
||
|
|
"loss": 0.4875,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.27744337916374207,
|
||
|
|
"rewards/margins": 2.9683868885040283,
|
||
|
|
"rewards/rejected": -3.2458300590515137,
|
||
|
|
"step": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47,
|
||
|
|
"learning_rate": 2.8378378378378376e-07,
|
||
|
|
"logits/chosen": -0.18132410943508148,
|
||
|
|
"logits/rejected": -0.08390505611896515,
|
||
|
|
"logps/chosen": -3.1348986625671387,
|
||
|
|
"logps/rejected": -34.38957977294922,
|
||
|
|
"loss": 0.623,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.3771377205848694,
|
||
|
|
"rewards/margins": 2.4244353771209717,
|
||
|
|
"rewards/rejected": -2.8015732765197754,
|
||
|
|
"step": 950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48,
|
||
|
|
"learning_rate": 2.810810810810811e-07,
|
||
|
|
"logits/chosen": -0.31961002945899963,
|
||
|
|
"logits/rejected": -0.3197285532951355,
|
||
|
|
"logps/chosen": -3.2407329082489014,
|
||
|
|
"logps/rejected": -3.257624864578247,
|
||
|
|
"loss": 0.6924,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.42207542061805725,
|
||
|
|
"rewards/margins": 0.0015475511318072677,
|
||
|
|
"rewards/rejected": -0.4236229956150055,
|
||
|
|
"step": 960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48,
|
||
|
|
"learning_rate": 2.7837837837837833e-07,
|
||
|
|
"logits/chosen": -0.1493210643529892,
|
||
|
|
"logits/rejected": -0.02894291840493679,
|
||
|
|
"logps/chosen": -2.7680492401123047,
|
||
|
|
"logps/rejected": -39.11075210571289,
|
||
|
|
"loss": 0.4872,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.3285685181617737,
|
||
|
|
"rewards/margins": 2.61509370803833,
|
||
|
|
"rewards/rejected": -2.943662166595459,
|
||
|
|
"step": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49,
|
||
|
|
"learning_rate": 2.7567567567567567e-07,
|
||
|
|
"logits/chosen": -0.2658258378505707,
|
||
|
|
"logits/rejected": -0.14939609169960022,
|
||
|
|
"logps/chosen": -3.0067975521087646,
|
||
|
|
"logps/rejected": -9.233423233032227,
|
||
|
|
"loss": 0.6251,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.3324972093105316,
|
||
|
|
"rewards/margins": 0.6167054772377014,
|
||
|
|
"rewards/rejected": -0.9492026567459106,
|
||
|
|
"step": 980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49,
|
||
|
|
"learning_rate": 2.7297297297297295e-07,
|
||
|
|
"logits/chosen": 0.09181342273950577,
|
||
|
|
"logits/rejected": 0.0917346253991127,
|
||
|
|
"logps/chosen": -2.657616138458252,
|
||
|
|
"logps/rejected": -2.649034023284912,
|
||
|
|
"loss": 0.694,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.4064779281616211,
|
||
|
|
"rewards/margins": -0.0017143071163445711,
|
||
|
|
"rewards/rejected": -0.4047636091709137,
|
||
|
|
"step": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"learning_rate": 2.702702702702703e-07,
|
||
|
|
"logits/chosen": -0.5827860832214355,
|
||
|
|
"logits/rejected": -0.3403246998786926,
|
||
|
|
"logps/chosen": -3.277855396270752,
|
||
|
|
"logps/rejected": -52.3044548034668,
|
||
|
|
"loss": 0.4839,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.37350228428840637,
|
||
|
|
"rewards/margins": 4.705965995788574,
|
||
|
|
"rewards/rejected": -5.079468727111816,
|
||
|
|
"step": 1000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"eval_logits/chosen": -0.248033806681633,
|
||
|
|
"eval_logits/rejected": -0.10857230424880981,
|
||
|
|
"eval_logps/chosen": -3.3428871631622314,
|
||
|
|
"eval_logps/rejected": -28.17331314086914,
|
||
|
|
"eval_loss": 0.5582190752029419,
|
||
|
|
"eval_rewards/accuracies": 0.4000000059604645,
|
||
|
|
"eval_rewards/chosen": -0.335501104593277,
|
||
|
|
"eval_rewards/margins": 2.151271104812622,
|
||
|
|
"eval_rewards/rejected": -2.486772060394287,
|
||
|
|
"eval_runtime": 28.5461,
|
||
|
|
"eval_samples_per_second": 3.503,
|
||
|
|
"eval_steps_per_second": 3.503,
|
||
|
|
"step": 1000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.51,
|
||
|
|
"learning_rate": 2.675675675675675e-07,
|
||
|
|
"logits/chosen": -0.5932222604751587,
|
||
|
|
"logits/rejected": -0.03573286160826683,
|
||
|
|
"logps/chosen": -3.360830307006836,
|
||
|
|
"logps/rejected": -68.9535903930664,
|
||
|
|
"loss": 0.5535,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.29853901267051697,
|
||
|
|
"rewards/margins": 6.061123371124268,
|
||
|
|
"rewards/rejected": -6.3596625328063965,
|
||
|
|
"step": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.51,
|
||
|
|
"learning_rate": 2.6486486486486486e-07,
|
||
|
|
"logits/chosen": -0.10955256223678589,
|
||
|
|
"logits/rejected": -0.09704247862100601,
|
||
|
|
"logps/chosen": -2.8949010372161865,
|
||
|
|
"logps/rejected": -11.125778198242188,
|
||
|
|
"loss": 0.6242,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.321120023727417,
|
||
|
|
"rewards/margins": 0.8519529104232788,
|
||
|
|
"rewards/rejected": -1.1730728149414062,
|
||
|
|
"step": 1020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.52,
|
||
|
|
"learning_rate": 2.6216216216216215e-07,
|
||
|
|
"logits/chosen": -0.2371162623167038,
|
||
|
|
"logits/rejected": -0.02502809837460518,
|
||
|
|
"logps/chosen": -3.312039613723755,
|
||
|
|
"logps/rejected": -56.23164749145508,
|
||
|
|
"loss": 0.4161,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.3528529703617096,
|
||
|
|
"rewards/margins": 5.84485387802124,
|
||
|
|
"rewards/rejected": -6.197707176208496,
|
||
|
|
"step": 1030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.52,
|
||
|
|
"learning_rate": 2.594594594594595e-07,
|
||
|
|
"logits/chosen": -0.3305162191390991,
|
||
|
|
"logits/rejected": -0.12264938652515411,
|
||
|
|
"logps/chosen": -3.0487217903137207,
|
||
|
|
"logps/rejected": -31.816059112548828,
|
||
|
|
"loss": 0.5539,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.4525887966156006,
|
||
|
|
"rewards/margins": 2.451446056365967,
|
||
|
|
"rewards/rejected": -2.9040348529815674,
|
||
|
|
"step": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.53,
|
||
|
|
"learning_rate": 2.567567567567567e-07,
|
||
|
|
"logits/chosen": -0.3112742304801941,
|
||
|
|
"logits/rejected": -0.14556877315044403,
|
||
|
|
"logps/chosen": -4.298488140106201,
|
||
|
|
"logps/rejected": -32.196067810058594,
|
||
|
|
"loss": 0.625,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.45854702591896057,
|
||
|
|
"rewards/margins": 1.6213452816009521,
|
||
|
|
"rewards/rejected": -2.079892635345459,
|
||
|
|
"step": 1050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.53,
|
||
|
|
"learning_rate": 2.5405405405405406e-07,
|
||
|
|
"logits/chosen": -0.47445744276046753,
|
||
|
|
"logits/rejected": -0.17515473067760468,
|
||
|
|
"logps/chosen": -3.6740562915802,
|
||
|
|
"logps/rejected": -53.9892692565918,
|
||
|
|
"loss": 0.4853,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.4956344664096832,
|
||
|
|
"rewards/margins": 4.100377559661865,
|
||
|
|
"rewards/rejected": -4.596012115478516,
|
||
|
|
"step": 1060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.54,
|
||
|
|
"learning_rate": 2.5135135135135135e-07,
|
||
|
|
"logits/chosen": -0.11676591634750366,
|
||
|
|
"logits/rejected": -0.05013752728700638,
|
||
|
|
"logps/chosen": -3.1980667114257812,
|
||
|
|
"logps/rejected": -23.358375549316406,
|
||
|
|
"loss": 0.5551,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.470308393239975,
|
||
|
|
"rewards/margins": 2.065474033355713,
|
||
|
|
"rewards/rejected": -2.5357823371887207,
|
||
|
|
"step": 1070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.54,
|
||
|
|
"learning_rate": 2.4864864864864863e-07,
|
||
|
|
"logits/chosen": -0.08944498002529144,
|
||
|
|
"logits/rejected": -0.04073537513613701,
|
||
|
|
"logps/chosen": -3.459454298019409,
|
||
|
|
"logps/rejected": -17.295406341552734,
|
||
|
|
"loss": 0.6246,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.4144110083580017,
|
||
|
|
"rewards/margins": 1.442077875137329,
|
||
|
|
"rewards/rejected": -1.856488823890686,
|
||
|
|
"step": 1080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.55,
|
||
|
|
"learning_rate": 2.4594594594594597e-07,
|
||
|
|
"logits/chosen": -0.4133351445198059,
|
||
|
|
"logits/rejected": -0.11843075603246689,
|
||
|
|
"logps/chosen": -3.5334014892578125,
|
||
|
|
"logps/rejected": -50.10197830200195,
|
||
|
|
"loss": 0.5545,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.49260371923446655,
|
||
|
|
"rewards/margins": 3.9945976734161377,
|
||
|
|
"rewards/rejected": -4.487200736999512,
|
||
|
|
"step": 1090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.55,
|
||
|
|
"learning_rate": 2.4324324324324326e-07,
|
||
|
|
"logits/chosen": -0.017034702003002167,
|
||
|
|
"logits/rejected": 0.03910530358552933,
|
||
|
|
"logps/chosen": -3.597899913787842,
|
||
|
|
"logps/rejected": -19.891422271728516,
|
||
|
|
"loss": 0.554,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.47992175817489624,
|
||
|
|
"rewards/margins": 1.7824008464813232,
|
||
|
|
"rewards/rejected": -2.2623229026794434,
|
||
|
|
"step": 1100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.56,
|
||
|
|
"learning_rate": 2.4054054054054054e-07,
|
||
|
|
"logits/chosen": -0.16526077687740326,
|
||
|
|
"logits/rejected": -0.15733735263347626,
|
||
|
|
"logps/chosen": -3.504206895828247,
|
||
|
|
"logps/rejected": -6.611302852630615,
|
||
|
|
"loss": 0.6313,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.473609060049057,
|
||
|
|
"rewards/margins": 0.266178697347641,
|
||
|
|
"rewards/rejected": -0.7397876977920532,
|
||
|
|
"step": 1110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.56,
|
||
|
|
"learning_rate": 2.3783783783783783e-07,
|
||
|
|
"logits/chosen": -0.6425670385360718,
|
||
|
|
"logits/rejected": -0.21171347796916962,
|
||
|
|
"logps/chosen": -3.7131621837615967,
|
||
|
|
"logps/rejected": -65.28227233886719,
|
||
|
|
"loss": 0.4824,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.4607169032096863,
|
||
|
|
"rewards/margins": 6.263044834136963,
|
||
|
|
"rewards/rejected": -6.72376012802124,
|
||
|
|
"step": 1120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.56,
|
||
|
|
"learning_rate": 2.3513513513513514e-07,
|
||
|
|
"logits/chosen": -0.1325923502445221,
|
||
|
|
"logits/rejected": 0.008936069905757904,
|
||
|
|
"logps/chosen": -2.75840425491333,
|
||
|
|
"logps/rejected": -39.773658752441406,
|
||
|
|
"loss": 0.6259,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.38081198930740356,
|
||
|
|
"rewards/margins": 3.1747312545776367,
|
||
|
|
"rewards/rejected": -3.5555434226989746,
|
||
|
|
"step": 1130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.57,
|
||
|
|
"learning_rate": 2.3243243243243243e-07,
|
||
|
|
"logits/chosen": -0.15440817177295685,
|
||
|
|
"logits/rejected": -0.15450319647789001,
|
||
|
|
"logps/chosen": -3.1343817710876465,
|
||
|
|
"logps/rejected": -3.1313533782958984,
|
||
|
|
"loss": 0.6944,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.3842015564441681,
|
||
|
|
"rewards/margins": -0.002351009752601385,
|
||
|
|
"rewards/rejected": -0.38185054063796997,
|
||
|
|
"step": 1140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.57,
|
||
|
|
"learning_rate": 2.2972972972972974e-07,
|
||
|
|
"logits/chosen": -0.229770228266716,
|
||
|
|
"logits/rejected": -0.22977308928966522,
|
||
|
|
"logps/chosen": -3.290398359298706,
|
||
|
|
"logps/rejected": -3.267608165740967,
|
||
|
|
"loss": 0.6948,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.3774813413619995,
|
||
|
|
"rewards/margins": -0.003159981919452548,
|
||
|
|
"rewards/rejected": -0.3743213415145874,
|
||
|
|
"step": 1150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.58,
|
||
|
|
"learning_rate": 2.2702702702702703e-07,
|
||
|
|
"logits/chosen": -0.07762424647808075,
|
||
|
|
"logits/rejected": 0.020208783447742462,
|
||
|
|
"logps/chosen": -2.1210696697235107,
|
||
|
|
"logps/rejected": -36.04180908203125,
|
||
|
|
"loss": 0.4959,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.12996070086956024,
|
||
|
|
"rewards/margins": 3.2478432655334473,
|
||
|
|
"rewards/rejected": -3.3778038024902344,
|
||
|
|
"step": 1160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.58,
|
||
|
|
"learning_rate": 2.2432432432432434e-07,
|
||
|
|
"logits/chosen": -0.10635857284069061,
|
||
|
|
"logits/rejected": -0.10641410201787949,
|
||
|
|
"logps/chosen": -3.266584873199463,
|
||
|
|
"logps/rejected": -3.2667083740234375,
|
||
|
|
"loss": 0.6908,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.42059874534606934,
|
||
|
|
"rewards/margins": 0.004667977802455425,
|
||
|
|
"rewards/rejected": -0.42526674270629883,
|
||
|
|
"step": 1170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.59,
|
||
|
|
"learning_rate": 2.2162162162162162e-07,
|
||
|
|
"logits/chosen": 0.016759177669882774,
|
||
|
|
"logits/rejected": 0.04513305425643921,
|
||
|
|
"logps/chosen": -3.0024590492248535,
|
||
|
|
"logps/rejected": -15.668512344360352,
|
||
|
|
"loss": 0.6221,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.4371574819087982,
|
||
|
|
"rewards/margins": 0.9103776216506958,
|
||
|
|
"rewards/rejected": -1.347535252571106,
|
||
|
|
"step": 1180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.59,
|
||
|
|
"learning_rate": 2.189189189189189e-07,
|
||
|
|
"logits/chosen": -0.05008067935705185,
|
||
|
|
"logits/rejected": 0.06452060490846634,
|
||
|
|
"logps/chosen": -3.3980178833007812,
|
||
|
|
"logps/rejected": -43.369049072265625,
|
||
|
|
"loss": 0.4841,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.45819729566574097,
|
||
|
|
"rewards/margins": 4.862743854522705,
|
||
|
|
"rewards/rejected": -5.320940971374512,
|
||
|
|
"step": 1190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6,
|
||
|
|
"learning_rate": 2.1621621621621622e-07,
|
||
|
|
"logits/chosen": -0.24912318587303162,
|
||
|
|
"logits/rejected": -0.09523974359035492,
|
||
|
|
"logps/chosen": -4.080093860626221,
|
||
|
|
"logps/rejected": -51.317169189453125,
|
||
|
|
"loss": 0.4837,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.44981464743614197,
|
||
|
|
"rewards/margins": 4.76999044418335,
|
||
|
|
"rewards/rejected": -5.2198052406311035,
|
||
|
|
"step": 1200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6,
|
||
|
|
"learning_rate": 2.135135135135135e-07,
|
||
|
|
"logits/chosen": -0.2081013172864914,
|
||
|
|
"logits/rejected": -0.05113474279642105,
|
||
|
|
"logps/chosen": -4.021970272064209,
|
||
|
|
"logps/rejected": -38.43590545654297,
|
||
|
|
"loss": 0.5536,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.3837403953075409,
|
||
|
|
"rewards/margins": 3.026031017303467,
|
||
|
|
"rewards/rejected": -3.40977144241333,
|
||
|
|
"step": 1210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.61,
|
||
|
|
"learning_rate": 2.1081081081081082e-07,
|
||
|
|
"logits/chosen": -0.2219875603914261,
|
||
|
|
"logits/rejected": -0.051030516624450684,
|
||
|
|
"logps/chosen": -3.1909682750701904,
|
||
|
|
"logps/rejected": -46.616905212402344,
|
||
|
|
"loss": 0.4852,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.50543612241745,
|
||
|
|
"rewards/margins": 3.5189208984375,
|
||
|
|
"rewards/rejected": -4.024357318878174,
|
||
|
|
"step": 1220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.61,
|
||
|
|
"learning_rate": 2.081081081081081e-07,
|
||
|
|
"logits/chosen": 0.03644023835659027,
|
||
|
|
"logits/rejected": 0.06066631153225899,
|
||
|
|
"logps/chosen": -3.725661516189575,
|
||
|
|
"logps/rejected": -23.01262092590332,
|
||
|
|
"loss": 0.5528,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.4582531452178955,
|
||
|
|
"rewards/margins": 2.343986988067627,
|
||
|
|
"rewards/rejected": -2.8022401332855225,
|
||
|
|
"step": 1230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.62,
|
||
|
|
"learning_rate": 2.0540540540540542e-07,
|
||
|
|
"logits/chosen": -0.15473979711532593,
|
||
|
|
"logits/rejected": 0.008312966674566269,
|
||
|
|
"logps/chosen": -2.7111215591430664,
|
||
|
|
"logps/rejected": -31.44228172302246,
|
||
|
|
"loss": 0.553,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.30419665575027466,
|
||
|
|
"rewards/margins": 1.9790928363800049,
|
||
|
|
"rewards/rejected": -2.2832894325256348,
|
||
|
|
"step": 1240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.62,
|
||
|
|
"learning_rate": 2.027027027027027e-07,
|
||
|
|
"logits/chosen": -0.0361042395234108,
|
||
|
|
"logits/rejected": -0.035879164934158325,
|
||
|
|
"logps/chosen": -3.412536144256592,
|
||
|
|
"logps/rejected": -3.4127590656280518,
|
||
|
|
"loss": 0.6934,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.4676332473754883,
|
||
|
|
"rewards/margins": -0.0005405143019743264,
|
||
|
|
"rewards/rejected": -0.46709269285202026,
|
||
|
|
"step": 1250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.63,
|
||
|
|
"learning_rate": 2e-07,
|
||
|
|
"logits/chosen": -0.1826142817735672,
|
||
|
|
"logits/rejected": -0.11776237189769745,
|
||
|
|
"logps/chosen": -4.550057411193848,
|
||
|
|
"logps/rejected": -18.74826431274414,
|
||
|
|
"loss": 0.6218,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.7531288266181946,
|
||
|
|
"rewards/margins": 1.4324684143066406,
|
||
|
|
"rewards/rejected": -2.1855974197387695,
|
||
|
|
"step": 1260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.64,
|
||
|
|
"learning_rate": 1.972972972972973e-07,
|
||
|
|
"logits/chosen": -0.1055067628622055,
|
||
|
|
"logits/rejected": 0.11131813377141953,
|
||
|
|
"logps/chosen": -2.982210159301758,
|
||
|
|
"logps/rejected": -58.02173614501953,
|
||
|
|
"loss": 0.5565,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.366026908159256,
|
||
|
|
"rewards/margins": 6.08723258972168,
|
||
|
|
"rewards/rejected": -6.453258514404297,
|
||
|
|
"step": 1270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.64,
|
||
|
|
"learning_rate": 1.945945945945946e-07,
|
||
|
|
"logits/chosen": -0.038440294563770294,
|
||
|
|
"logits/rejected": 0.10673608630895615,
|
||
|
|
"logps/chosen": -3.7921531200408936,
|
||
|
|
"logps/rejected": -37.6851692199707,
|
||
|
|
"loss": 0.5568,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.5283007025718689,
|
||
|
|
"rewards/margins": 3.1157898902893066,
|
||
|
|
"rewards/rejected": -3.644090175628662,
|
||
|
|
"step": 1280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.65,
|
||
|
|
"learning_rate": 1.918918918918919e-07,
|
||
|
|
"logits/chosen": 0.040133845061063766,
|
||
|
|
"logits/rejected": 0.06518431752920151,
|
||
|
|
"logps/chosen": -3.61901593208313,
|
||
|
|
"logps/rejected": -14.156512260437012,
|
||
|
|
"loss": 0.6239,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.5208865404129028,
|
||
|
|
"rewards/margins": 1.1946496963500977,
|
||
|
|
"rewards/rejected": -1.7155358791351318,
|
||
|
|
"step": 1290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.65,
|
||
|
|
"learning_rate": 1.891891891891892e-07,
|
||
|
|
"logits/chosen": -0.41648340225219727,
|
||
|
|
"logits/rejected": -0.07645007967948914,
|
||
|
|
"logps/chosen": -2.7470812797546387,
|
||
|
|
"logps/rejected": -100.8885726928711,
|
||
|
|
"loss": 0.4846,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.22242751717567444,
|
||
|
|
"rewards/margins": 8.930724143981934,
|
||
|
|
"rewards/rejected": -9.153151512145996,
|
||
|
|
"step": 1300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.66,
|
||
|
|
"learning_rate": 1.864864864864865e-07,
|
||
|
|
"logits/chosen": -0.43833446502685547,
|
||
|
|
"logits/rejected": -0.2219434529542923,
|
||
|
|
"logps/chosen": -3.622807741165161,
|
||
|
|
"logps/rejected": -45.591163635253906,
|
||
|
|
"loss": 0.6216,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.5277474522590637,
|
||
|
|
"rewards/margins": 4.0409955978393555,
|
||
|
|
"rewards/rejected": -4.568743705749512,
|
||
|
|
"step": 1310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.66,
|
||
|
|
"learning_rate": 1.8378378378378379e-07,
|
||
|
|
"logits/chosen": -0.1983651965856552,
|
||
|
|
"logits/rejected": -0.062241751700639725,
|
||
|
|
"logps/chosen": -4.179381370544434,
|
||
|
|
"logps/rejected": -50.65184783935547,
|
||
|
|
"loss": 0.4826,
|
||
|
|
"rewards/accuracies": 0.800000011920929,
|
||
|
|
"rewards/chosen": -0.6547450423240662,
|
||
|
|
"rewards/margins": 4.6731157302856445,
|
||
|
|
"rewards/rejected": -5.327860355377197,
|
||
|
|
"step": 1320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.67,
|
||
|
|
"learning_rate": 1.8108108108108107e-07,
|
||
|
|
"logits/chosen": -0.16832640767097473,
|
||
|
|
"logits/rejected": -0.14985333383083344,
|
||
|
|
"logps/chosen": -3.139037609100342,
|
||
|
|
"logps/rejected": -7.793992519378662,
|
||
|
|
"loss": 0.6251,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.3424663543701172,
|
||
|
|
"rewards/margins": 0.5691796541213989,
|
||
|
|
"rewards/rejected": -0.9116460680961609,
|
||
|
|
"step": 1330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.67,
|
||
|
|
"learning_rate": 1.7837837837837838e-07,
|
||
|
|
"logits/chosen": -0.06780896335840225,
|
||
|
|
"logits/rejected": 0.03146328404545784,
|
||
|
|
"logps/chosen": -4.410508155822754,
|
||
|
|
"logps/rejected": -24.816577911376953,
|
||
|
|
"loss": 0.5552,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.5241163969039917,
|
||
|
|
"rewards/margins": 2.202816963195801,
|
||
|
|
"rewards/rejected": -2.726933002471924,
|
||
|
|
"step": 1340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.68,
|
||
|
|
"learning_rate": 1.7567567567567567e-07,
|
||
|
|
"logits/chosen": -0.355562299489975,
|
||
|
|
"logits/rejected": 0.05104954168200493,
|
||
|
|
"logps/chosen": -3.206317901611328,
|
||
|
|
"logps/rejected": -77.11846923828125,
|
||
|
|
"loss": 0.3602,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.4161297380924225,
|
||
|
|
"rewards/margins": 8.3109130859375,
|
||
|
|
"rewards/rejected": -8.727044105529785,
|
||
|
|
"step": 1350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.68,
|
||
|
|
"learning_rate": 1.7297297297297298e-07,
|
||
|
|
"logits/chosen": -0.04275091364979744,
|
||
|
|
"logits/rejected": 0.1841900795698166,
|
||
|
|
"logps/chosen": -4.1488471031188965,
|
||
|
|
"logps/rejected": -67.05726623535156,
|
||
|
|
"loss": 0.5551,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.43814602494239807,
|
||
|
|
"rewards/margins": 5.562375068664551,
|
||
|
|
"rewards/rejected": -6.000521659851074,
|
||
|
|
"step": 1360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.69,
|
||
|
|
"learning_rate": 1.7027027027027027e-07,
|
||
|
|
"logits/chosen": -0.02052360214293003,
|
||
|
|
"logits/rejected": 0.09947922080755234,
|
||
|
|
"logps/chosen": -2.9535369873046875,
|
||
|
|
"logps/rejected": -24.08611297607422,
|
||
|
|
"loss": 0.5545,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.3186715245246887,
|
||
|
|
"rewards/margins": 2.140519142150879,
|
||
|
|
"rewards/rejected": -2.459190607070923,
|
||
|
|
"step": 1370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.69,
|
||
|
|
"learning_rate": 1.6756756756756758e-07,
|
||
|
|
"logits/chosen": -0.12382978200912476,
|
||
|
|
"logits/rejected": -0.1093042716383934,
|
||
|
|
"logps/chosen": -4.469671249389648,
|
||
|
|
"logps/rejected": -20.117874145507812,
|
||
|
|
"loss": 0.6187,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.5799933671951294,
|
||
|
|
"rewards/margins": 1.2598206996917725,
|
||
|
|
"rewards/rejected": -1.8398144245147705,
|
||
|
|
"step": 1380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.69,
|
||
|
|
"learning_rate": 1.6486486486486487e-07,
|
||
|
|
"logits/chosen": 0.18149466812610626,
|
||
|
|
"logits/rejected": 0.18185070157051086,
|
||
|
|
"logps/chosen": -4.917318344116211,
|
||
|
|
"logps/rejected": -4.922450065612793,
|
||
|
|
"loss": 0.6934,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.7914124727249146,
|
||
|
|
"rewards/margins": -0.000482580071548,
|
||
|
|
"rewards/rejected": -0.790929913520813,
|
||
|
|
"step": 1390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7,
|
||
|
|
"learning_rate": 1.6216216216216215e-07,
|
||
|
|
"logits/chosen": -0.1398981809616089,
|
||
|
|
"logits/rejected": 0.06895492970943451,
|
||
|
|
"logps/chosen": -2.947361469268799,
|
||
|
|
"logps/rejected": -44.058815002441406,
|
||
|
|
"loss": 0.4173,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.3960328698158264,
|
||
|
|
"rewards/margins": 4.381956577301025,
|
||
|
|
"rewards/rejected": -4.777989387512207,
|
||
|
|
"step": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7,
|
||
|
|
"learning_rate": 1.5945945945945947e-07,
|
||
|
|
"logits/chosen": 0.006692798342555761,
|
||
|
|
"logits/rejected": 0.0069901407696306705,
|
||
|
|
"logps/chosen": -3.508424758911133,
|
||
|
|
"logps/rejected": -3.4823012351989746,
|
||
|
|
"loss": 0.696,
|
||
|
|
"rewards/accuracies": 0.10000000149011612,
|
||
|
|
"rewards/chosen": -0.5846782922744751,
|
||
|
|
"rewards/margins": -0.005588895175606012,
|
||
|
|
"rewards/rejected": -0.5790894031524658,
|
||
|
|
"step": 1410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.71,
|
||
|
|
"learning_rate": 1.5675675675675675e-07,
|
||
|
|
"logits/chosen": -0.08773870766162872,
|
||
|
|
"logits/rejected": 0.2178010493516922,
|
||
|
|
"logps/chosen": -3.085310935974121,
|
||
|
|
"logps/rejected": -40.300601959228516,
|
||
|
|
"loss": 0.6219,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.3880263566970825,
|
||
|
|
"rewards/margins": 3.6542656421661377,
|
||
|
|
"rewards/rejected": -4.042291164398193,
|
||
|
|
"step": 1420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.71,
|
||
|
|
"learning_rate": 1.5405405405405406e-07,
|
||
|
|
"logits/chosen": -0.20309165120124817,
|
||
|
|
"logits/rejected": 0.12440992891788483,
|
||
|
|
"logps/chosen": -3.6524136066436768,
|
||
|
|
"logps/rejected": -88.05582427978516,
|
||
|
|
"loss": 0.4843,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.37993237376213074,
|
||
|
|
"rewards/margins": 8.410959243774414,
|
||
|
|
"rewards/rejected": -8.790891647338867,
|
||
|
|
"step": 1430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.72,
|
||
|
|
"learning_rate": 1.5135135135135135e-07,
|
||
|
|
"logits/chosen": -0.03451851010322571,
|
||
|
|
"logits/rejected": -0.03453352302312851,
|
||
|
|
"logps/chosen": -3.138916015625,
|
||
|
|
"logps/rejected": -3.1397087574005127,
|
||
|
|
"loss": 0.694,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.5204917788505554,
|
||
|
|
"rewards/margins": -0.001755397766828537,
|
||
|
|
"rewards/rejected": -0.5187363624572754,
|
||
|
|
"step": 1440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.72,
|
||
|
|
"learning_rate": 1.4864864864864866e-07,
|
||
|
|
"logits/chosen": 0.10227219760417938,
|
||
|
|
"logits/rejected": 0.10233476012945175,
|
||
|
|
"logps/chosen": -4.582298755645752,
|
||
|
|
"logps/rejected": -4.5724334716796875,
|
||
|
|
"loss": 0.694,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.6803295612335205,
|
||
|
|
"rewards/margins": -0.0017308980459347367,
|
||
|
|
"rewards/rejected": -0.6785987019538879,
|
||
|
|
"step": 1450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.73,
|
||
|
|
"learning_rate": 1.4594594594594595e-07,
|
||
|
|
"logits/chosen": -0.5597248077392578,
|
||
|
|
"logits/rejected": 0.06213284283876419,
|
||
|
|
"logps/chosen": -2.8770649433135986,
|
||
|
|
"logps/rejected": -96.86759185791016,
|
||
|
|
"loss": 0.4859,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.2279277741909027,
|
||
|
|
"rewards/margins": 10.135581970214844,
|
||
|
|
"rewards/rejected": -10.363508224487305,
|
||
|
|
"step": 1460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.73,
|
||
|
|
"learning_rate": 1.4324324324324323e-07,
|
||
|
|
"logits/chosen": 0.09240862727165222,
|
||
|
|
"logits/rejected": 0.0921512097120285,
|
||
|
|
"logps/chosen": -3.724111557006836,
|
||
|
|
"logps/rejected": -3.7362544536590576,
|
||
|
|
"loss": 0.6918,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.5456451773643494,
|
||
|
|
"rewards/margins": 0.002606689929962158,
|
||
|
|
"rewards/rejected": -0.5482519268989563,
|
||
|
|
"step": 1470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.74,
|
||
|
|
"learning_rate": 1.4054054054054055e-07,
|
||
|
|
"logits/chosen": -0.12598402798175812,
|
||
|
|
"logits/rejected": -0.015654001384973526,
|
||
|
|
"logps/chosen": -4.451899528503418,
|
||
|
|
"logps/rejected": -42.00288772583008,
|
||
|
|
"loss": 0.6218,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.48525696992874146,
|
||
|
|
"rewards/margins": 2.573787212371826,
|
||
|
|
"rewards/rejected": -3.059044361114502,
|
||
|
|
"step": 1480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.74,
|
||
|
|
"learning_rate": 1.3783783783783783e-07,
|
||
|
|
"logits/chosen": -0.4521062970161438,
|
||
|
|
"logits/rejected": -0.004660460166633129,
|
||
|
|
"logps/chosen": -2.9012811183929443,
|
||
|
|
"logps/rejected": -115.5091781616211,
|
||
|
|
"loss": 0.4862,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.19031697511672974,
|
||
|
|
"rewards/margins": 10.333425521850586,
|
||
|
|
"rewards/rejected": -10.523741722106934,
|
||
|
|
"step": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.75,
|
||
|
|
"learning_rate": 1.3513513513513515e-07,
|
||
|
|
"logits/chosen": -0.0979565680027008,
|
||
|
|
"logits/rejected": 0.11449885368347168,
|
||
|
|
"logps/chosen": -3.5851001739501953,
|
||
|
|
"logps/rejected": -76.48931884765625,
|
||
|
|
"loss": 0.4849,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.4619313180446625,
|
||
|
|
"rewards/margins": 5.934266090393066,
|
||
|
|
"rewards/rejected": -6.396197319030762,
|
||
|
|
"step": 1500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.75,
|
||
|
|
"eval_logits/chosen": -0.11751853674650192,
|
||
|
|
"eval_logits/rejected": 0.025734370574355125,
|
||
|
|
"eval_logps/chosen": -3.4931278228759766,
|
||
|
|
"eval_logps/rejected": -30.210371017456055,
|
||
|
|
"eval_loss": 0.5550487637519836,
|
||
|
|
"eval_rewards/accuracies": 0.3799999952316284,
|
||
|
|
"eval_rewards/chosen": -0.3655491769313812,
|
||
|
|
"eval_rewards/margins": 2.528634548187256,
|
||
|
|
"eval_rewards/rejected": -2.89418363571167,
|
||
|
|
"eval_runtime": 28.6292,
|
||
|
|
"eval_samples_per_second": 3.493,
|
||
|
|
"eval_steps_per_second": 3.493,
|
||
|
|
"step": 1500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.76,
|
||
|
|
"learning_rate": 1.3243243243243243e-07,
|
||
|
|
"logits/chosen": -0.014925278723239899,
|
||
|
|
"logits/rejected": 0.0027809618040919304,
|
||
|
|
"logps/chosen": -3.423701524734497,
|
||
|
|
"logps/rejected": -14.077951431274414,
|
||
|
|
"loss": 0.6212,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.4755886495113373,
|
||
|
|
"rewards/margins": 1.5500478744506836,
|
||
|
|
"rewards/rejected": -2.025636672973633,
|
||
|
|
"step": 1510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.76,
|
||
|
|
"learning_rate": 1.2972972972972974e-07,
|
||
|
|
"logits/chosen": -0.10386158525943756,
|
||
|
|
"logits/rejected": 0.07022352516651154,
|
||
|
|
"logps/chosen": -4.077963829040527,
|
||
|
|
"logps/rejected": -35.390384674072266,
|
||
|
|
"loss": 0.6257,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.5015150308609009,
|
||
|
|
"rewards/margins": 2.290374755859375,
|
||
|
|
"rewards/rejected": -2.7918896675109863,
|
||
|
|
"step": 1520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.77,
|
||
|
|
"learning_rate": 1.2702702702702703e-07,
|
||
|
|
"logits/chosen": -0.5414426326751709,
|
||
|
|
"logits/rejected": 0.043320734053850174,
|
||
|
|
"logps/chosen": -2.5671865940093994,
|
||
|
|
"logps/rejected": -73.81044006347656,
|
||
|
|
"loss": 0.5565,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.255888968706131,
|
||
|
|
"rewards/margins": 6.815358638763428,
|
||
|
|
"rewards/rejected": -7.0712480545043945,
|
||
|
|
"step": 1530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.77,
|
||
|
|
"learning_rate": 1.2432432432432432e-07,
|
||
|
|
"logits/chosen": -0.08797252923250198,
|
||
|
|
"logits/rejected": -0.08808515965938568,
|
||
|
|
"logps/chosen": -4.043805122375488,
|
||
|
|
"logps/rejected": -4.037627696990967,
|
||
|
|
"loss": 0.6933,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.5562341213226318,
|
||
|
|
"rewards/margins": -0.00024047940678428859,
|
||
|
|
"rewards/rejected": -0.5559936761856079,
|
||
|
|
"step": 1540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.78,
|
||
|
|
"learning_rate": 1.2162162162162163e-07,
|
||
|
|
"logits/chosen": -0.04218659549951553,
|
||
|
|
"logits/rejected": -0.036192767322063446,
|
||
|
|
"logps/chosen": -2.878105401992798,
|
||
|
|
"logps/rejected": -9.950769424438477,
|
||
|
|
"loss": 0.6258,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.3870970606803894,
|
||
|
|
"rewards/margins": 1.0968773365020752,
|
||
|
|
"rewards/rejected": -1.4839744567871094,
|
||
|
|
"step": 1550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.78,
|
||
|
|
"learning_rate": 1.1891891891891891e-07,
|
||
|
|
"logits/chosen": -0.10892989486455917,
|
||
|
|
"logits/rejected": 0.07078132778406143,
|
||
|
|
"logps/chosen": -3.934946060180664,
|
||
|
|
"logps/rejected": -26.729068756103516,
|
||
|
|
"loss": 0.6248,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.47404026985168457,
|
||
|
|
"rewards/margins": 1.411412000656128,
|
||
|
|
"rewards/rejected": -1.8854520320892334,
|
||
|
|
"step": 1560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.79,
|
||
|
|
"learning_rate": 1.1621621621621621e-07,
|
||
|
|
"logits/chosen": -0.2891389727592468,
|
||
|
|
"logits/rejected": 0.12911613285541534,
|
||
|
|
"logps/chosen": -2.690746545791626,
|
||
|
|
"logps/rejected": -63.75246047973633,
|
||
|
|
"loss": 0.4878,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.25007423758506775,
|
||
|
|
"rewards/margins": 5.411334991455078,
|
||
|
|
"rewards/rejected": -5.6614089012146,
|
||
|
|
"step": 1570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.79,
|
||
|
|
"learning_rate": 1.1351351351351351e-07,
|
||
|
|
"logits/chosen": -0.5384889841079712,
|
||
|
|
"logits/rejected": 0.012323490343987942,
|
||
|
|
"logps/chosen": -4.0185675621032715,
|
||
|
|
"logps/rejected": -59.906455993652344,
|
||
|
|
"loss": 0.4854,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.40213894844055176,
|
||
|
|
"rewards/margins": 4.404304504394531,
|
||
|
|
"rewards/rejected": -4.806443691253662,
|
||
|
|
"step": 1580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8,
|
||
|
|
"learning_rate": 1.1081081081081081e-07,
|
||
|
|
"logits/chosen": -0.04935307055711746,
|
||
|
|
"logits/rejected": 0.07298599183559418,
|
||
|
|
"logps/chosen": -2.5241751670837402,
|
||
|
|
"logps/rejected": -33.777976989746094,
|
||
|
|
"loss": 0.4127,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.2943554222583771,
|
||
|
|
"rewards/margins": 3.7618205547332764,
|
||
|
|
"rewards/rejected": -4.05617618560791,
|
||
|
|
"step": 1590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8,
|
||
|
|
"learning_rate": 1.0810810810810811e-07,
|
||
|
|
"logits/chosen": -0.17055651545524597,
|
||
|
|
"logits/rejected": 0.0044966209679841995,
|
||
|
|
"logps/chosen": -3.5189216136932373,
|
||
|
|
"logps/rejected": -49.50484848022461,
|
||
|
|
"loss": 0.4863,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.36419206857681274,
|
||
|
|
"rewards/margins": 4.198049068450928,
|
||
|
|
"rewards/rejected": -4.562241077423096,
|
||
|
|
"step": 1600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.81,
|
||
|
|
"learning_rate": 1.0540540540540541e-07,
|
||
|
|
"logits/chosen": -0.2850159704685211,
|
||
|
|
"logits/rejected": -0.21129021048545837,
|
||
|
|
"logps/chosen": -3.473625898361206,
|
||
|
|
"logps/rejected": -26.81081199645996,
|
||
|
|
"loss": 0.6218,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.40565043687820435,
|
||
|
|
"rewards/margins": 2.5721168518066406,
|
||
|
|
"rewards/rejected": -2.9777674674987793,
|
||
|
|
"step": 1610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.81,
|
||
|
|
"learning_rate": 1.0270270270270271e-07,
|
||
|
|
"logits/chosen": -0.19239701330661774,
|
||
|
|
"logits/rejected": -0.003470667405053973,
|
||
|
|
"logps/chosen": -3.0670382976531982,
|
||
|
|
"logps/rejected": -51.9354362487793,
|
||
|
|
"loss": 0.5542,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.40356960892677307,
|
||
|
|
"rewards/margins": 4.389127731323242,
|
||
|
|
"rewards/rejected": -4.792697429656982,
|
||
|
|
"step": 1620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.81,
|
||
|
|
"learning_rate": 1e-07,
|
||
|
|
"logits/chosen": 0.008682191371917725,
|
||
|
|
"logits/rejected": 0.008879792876541615,
|
||
|
|
"logps/chosen": -3.3635425567626953,
|
||
|
|
"logps/rejected": -3.363541841506958,
|
||
|
|
"loss": 0.6931,
|
||
|
|
"rewards/accuracies": 0.0,
|
||
|
|
"rewards/chosen": -0.4913901686668396,
|
||
|
|
"rewards/margins": -1.5556812513750629e-06,
|
||
|
|
"rewards/rejected": -0.4913886487483978,
|
||
|
|
"step": 1630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.82,
|
||
|
|
"learning_rate": 9.72972972972973e-08,
|
||
|
|
"logits/chosen": -0.10675084590911865,
|
||
|
|
"logits/rejected": 0.005977548658847809,
|
||
|
|
"logps/chosen": -3.781205415725708,
|
||
|
|
"logps/rejected": -25.103981018066406,
|
||
|
|
"loss": 0.5558,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.5226456522941589,
|
||
|
|
"rewards/margins": 2.3052382469177246,
|
||
|
|
"rewards/rejected": -2.8278839588165283,
|
||
|
|
"step": 1640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.82,
|
||
|
|
"learning_rate": 9.45945945945946e-08,
|
||
|
|
"logits/chosen": -0.03640662506222725,
|
||
|
|
"logits/rejected": 0.03828350454568863,
|
||
|
|
"logps/chosen": -3.3790955543518066,
|
||
|
|
"logps/rejected": -10.027456283569336,
|
||
|
|
"loss": 0.625,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.4088617265224457,
|
||
|
|
"rewards/margins": 0.5881537199020386,
|
||
|
|
"rewards/rejected": -0.9970153570175171,
|
||
|
|
"step": 1650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.83,
|
||
|
|
"learning_rate": 9.189189189189189e-08,
|
||
|
|
"logits/chosen": -0.22357909381389618,
|
||
|
|
"logits/rejected": -0.22356662154197693,
|
||
|
|
"logps/chosen": -4.314882755279541,
|
||
|
|
"logps/rejected": -4.330056190490723,
|
||
|
|
"loss": 0.6915,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.5269622802734375,
|
||
|
|
"rewards/margins": 0.0032821507193148136,
|
||
|
|
"rewards/rejected": -0.5302444100379944,
|
||
|
|
"step": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.83,
|
||
|
|
"learning_rate": 8.918918918918919e-08,
|
||
|
|
"logits/chosen": 0.0022161633241921663,
|
||
|
|
"logits/rejected": 0.0022703767754137516,
|
||
|
|
"logps/chosen": -4.084799289703369,
|
||
|
|
"logps/rejected": -4.066622734069824,
|
||
|
|
"loss": 0.6944,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.5763648748397827,
|
||
|
|
"rewards/margins": -0.0025318176485598087,
|
||
|
|
"rewards/rejected": -0.5738331079483032,
|
||
|
|
"step": 1670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.84,
|
||
|
|
"learning_rate": 8.648648648648649e-08,
|
||
|
|
"logits/chosen": 0.12270589172840118,
|
||
|
|
"logits/rejected": 0.1225275993347168,
|
||
|
|
"logps/chosen": -2.5021376609802246,
|
||
|
|
"logps/rejected": -2.52251935005188,
|
||
|
|
"loss": 0.6911,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.2609153687953949,
|
||
|
|
"rewards/margins": 0.004049187991768122,
|
||
|
|
"rewards/rejected": -0.26496458053588867,
|
||
|
|
"step": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.84,
|
||
|
|
"learning_rate": 8.378378378378379e-08,
|
||
|
|
"logits/chosen": -0.11181571334600449,
|
||
|
|
"logits/rejected": 0.11308126151561737,
|
||
|
|
"logps/chosen": -2.6813411712646484,
|
||
|
|
"logps/rejected": -53.58498001098633,
|
||
|
|
"loss": 0.4154,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.3474321663379669,
|
||
|
|
"rewards/margins": 4.637228965759277,
|
||
|
|
"rewards/rejected": -4.984661102294922,
|
||
|
|
"step": 1690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.85,
|
||
|
|
"learning_rate": 8.108108108108108e-08,
|
||
|
|
"logits/chosen": -0.17449359595775604,
|
||
|
|
"logits/rejected": -0.03248979523777962,
|
||
|
|
"logps/chosen": -3.2400214672088623,
|
||
|
|
"logps/rejected": -48.60923767089844,
|
||
|
|
"loss": 0.5534,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.30690425634384155,
|
||
|
|
"rewards/margins": 4.303795337677002,
|
||
|
|
"rewards/rejected": -4.610699653625488,
|
||
|
|
"step": 1700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.85,
|
||
|
|
"learning_rate": 7.837837837837838e-08,
|
||
|
|
"logits/chosen": -0.5135300755500793,
|
||
|
|
"logits/rejected": -0.15363462269306183,
|
||
|
|
"logps/chosen": -3.052196741104126,
|
||
|
|
"logps/rejected": -54.189735412597656,
|
||
|
|
"loss": 0.554,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.1840744912624359,
|
||
|
|
"rewards/margins": 4.594564914703369,
|
||
|
|
"rewards/rejected": -4.778639316558838,
|
||
|
|
"step": 1710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.86,
|
||
|
|
"learning_rate": 7.567567567567568e-08,
|
||
|
|
"logits/chosen": -0.30552589893341064,
|
||
|
|
"logits/rejected": -0.014799046330153942,
|
||
|
|
"logps/chosen": -2.9994537830352783,
|
||
|
|
"logps/rejected": -33.19937515258789,
|
||
|
|
"loss": 0.6267,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.33928194642066956,
|
||
|
|
"rewards/margins": 2.3723442554473877,
|
||
|
|
"rewards/rejected": -2.7116260528564453,
|
||
|
|
"step": 1720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.86,
|
||
|
|
"learning_rate": 7.297297297297297e-08,
|
||
|
|
"logits/chosen": -0.031065676361322403,
|
||
|
|
"logits/rejected": 0.12822124361991882,
|
||
|
|
"logps/chosen": -2.5956356525421143,
|
||
|
|
"logps/rejected": -44.217403411865234,
|
||
|
|
"loss": 0.557,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.38229799270629883,
|
||
|
|
"rewards/margins": 4.7165846824646,
|
||
|
|
"rewards/rejected": -5.098883152008057,
|
||
|
|
"step": 1730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.87,
|
||
|
|
"learning_rate": 7.027027027027027e-08,
|
||
|
|
"logits/chosen": -0.07541604340076447,
|
||
|
|
"logits/rejected": -0.07454513013362885,
|
||
|
|
"logps/chosen": -3.0078632831573486,
|
||
|
|
"logps/rejected": -9.483304977416992,
|
||
|
|
"loss": 0.6243,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.3317088484764099,
|
||
|
|
"rewards/margins": 0.9091914892196655,
|
||
|
|
"rewards/rejected": -1.2409002780914307,
|
||
|
|
"step": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.88,
|
||
|
|
"learning_rate": 6.756756756756757e-08,
|
||
|
|
"logits/chosen": -0.06383942067623138,
|
||
|
|
"logits/rejected": 0.10376764833927155,
|
||
|
|
"logps/chosen": -2.433790683746338,
|
||
|
|
"logps/rejected": -46.35570526123047,
|
||
|
|
"loss": 0.5548,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.22169184684753418,
|
||
|
|
"rewards/margins": 4.687034606933594,
|
||
|
|
"rewards/rejected": -4.908726692199707,
|
||
|
|
"step": 1750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.88,
|
||
|
|
"learning_rate": 6.486486486486487e-08,
|
||
|
|
"logits/chosen": -0.1200224757194519,
|
||
|
|
"logits/rejected": -0.01750156842172146,
|
||
|
|
"logps/chosen": -2.9395155906677246,
|
||
|
|
"logps/rejected": -21.974435806274414,
|
||
|
|
"loss": 0.6243,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.30157679319381714,
|
||
|
|
"rewards/margins": 1.9352340698242188,
|
||
|
|
"rewards/rejected": -2.2368111610412598,
|
||
|
|
"step": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.89,
|
||
|
|
"learning_rate": 6.216216216216216e-08,
|
||
|
|
"logits/chosen": -0.05863137170672417,
|
||
|
|
"logits/rejected": 0.04493208974599838,
|
||
|
|
"logps/chosen": -2.9632556438446045,
|
||
|
|
"logps/rejected": -37.81572723388672,
|
||
|
|
"loss": 0.4845,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.307827889919281,
|
||
|
|
"rewards/margins": 3.4735076427459717,
|
||
|
|
"rewards/rejected": -3.7813358306884766,
|
||
|
|
"step": 1770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.89,
|
||
|
|
"learning_rate": 5.945945945945946e-08,
|
||
|
|
"logits/chosen": 0.08130965381860733,
|
||
|
|
"logits/rejected": 0.10798033326864243,
|
||
|
|
"logps/chosen": -3.6858971118927,
|
||
|
|
"logps/rejected": -11.559499740600586,
|
||
|
|
"loss": 0.6218,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.4870009422302246,
|
||
|
|
"rewards/margins": 0.6409335732460022,
|
||
|
|
"rewards/rejected": -1.127934455871582,
|
||
|
|
"step": 1780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9,
|
||
|
|
"learning_rate": 5.6756756756756756e-08,
|
||
|
|
"logits/chosen": -0.023443683981895447,
|
||
|
|
"logits/rejected": 0.08183663338422775,
|
||
|
|
"logps/chosen": -2.5851669311523438,
|
||
|
|
"logps/rejected": -28.7203369140625,
|
||
|
|
"loss": 0.6222,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.3686230778694153,
|
||
|
|
"rewards/margins": 3.186333417892456,
|
||
|
|
"rewards/rejected": -3.5549559593200684,
|
||
|
|
"step": 1790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9,
|
||
|
|
"learning_rate": 5.4054054054054056e-08,
|
||
|
|
"logits/chosen": -0.03929876536130905,
|
||
|
|
"logits/rejected": -0.0392637625336647,
|
||
|
|
"logps/chosen": -3.2762999534606934,
|
||
|
|
"logps/rejected": -3.265766143798828,
|
||
|
|
"loss": 0.6936,
|
||
|
|
"rewards/accuracies": 0.10000000149011612,
|
||
|
|
"rewards/chosen": -0.44085612893104553,
|
||
|
|
"rewards/margins": -0.0008075117948465049,
|
||
|
|
"rewards/rejected": -0.44004860520362854,
|
||
|
|
"step": 1800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.91,
|
||
|
|
"learning_rate": 5.1351351351351355e-08,
|
||
|
|
"logits/chosen": 0.051630906760692596,
|
||
|
|
"logits/rejected": 0.051436759531497955,
|
||
|
|
"logps/chosen": -3.7616355419158936,
|
||
|
|
"logps/rejected": -3.779416561126709,
|
||
|
|
"loss": 0.6912,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.5269044041633606,
|
||
|
|
"rewards/margins": 0.003887352766469121,
|
||
|
|
"rewards/rejected": -0.530791699886322,
|
||
|
|
"step": 1810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.91,
|
||
|
|
"learning_rate": 4.864864864864865e-08,
|
||
|
|
"logits/chosen": -0.1325063705444336,
|
||
|
|
"logits/rejected": -0.011743051931262016,
|
||
|
|
"logps/chosen": -3.144193649291992,
|
||
|
|
"logps/rejected": -31.121057510375977,
|
||
|
|
"loss": 0.5549,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.45744314789772034,
|
||
|
|
"rewards/margins": 2.0994107723236084,
|
||
|
|
"rewards/rejected": -2.556854009628296,
|
||
|
|
"step": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.92,
|
||
|
|
"learning_rate": 4.5945945945945947e-08,
|
||
|
|
"logits/chosen": -0.032472122460603714,
|
||
|
|
"logits/rejected": 0.010701514780521393,
|
||
|
|
"logps/chosen": -3.4038867950439453,
|
||
|
|
"logps/rejected": -27.584125518798828,
|
||
|
|
"loss": 0.5532,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.43393245339393616,
|
||
|
|
"rewards/margins": 2.7115139961242676,
|
||
|
|
"rewards/rejected": -3.145446300506592,
|
||
|
|
"step": 1830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.92,
|
||
|
|
"learning_rate": 4.3243243243243246e-08,
|
||
|
|
"logits/chosen": 0.15468727052211761,
|
||
|
|
"logits/rejected": 0.15615049004554749,
|
||
|
|
"logps/chosen": -3.311304807662964,
|
||
|
|
"logps/rejected": -9.414125442504883,
|
||
|
|
"loss": 0.6202,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.4434788227081299,
|
||
|
|
"rewards/margins": 0.7733428478240967,
|
||
|
|
"rewards/rejected": -1.2168217897415161,
|
||
|
|
"step": 1840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.93,
|
||
|
|
"learning_rate": 4.054054054054054e-08,
|
||
|
|
"logits/chosen": -0.24390192329883575,
|
||
|
|
"logits/rejected": -0.007583351340144873,
|
||
|
|
"logps/chosen": -3.870515823364258,
|
||
|
|
"logps/rejected": -39.793636322021484,
|
||
|
|
"loss": 0.5515,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.4439309239387512,
|
||
|
|
"rewards/margins": 3.1208810806274414,
|
||
|
|
"rewards/rejected": -3.5648117065429688,
|
||
|
|
"step": 1850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.93,
|
||
|
|
"learning_rate": 3.783783783783784e-08,
|
||
|
|
"logits/chosen": -0.013895763084292412,
|
||
|
|
"logits/rejected": -0.014107026159763336,
|
||
|
|
"logps/chosen": -2.9954867362976074,
|
||
|
|
"logps/rejected": -2.9771194458007812,
|
||
|
|
"loss": 0.6946,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.3877726197242737,
|
||
|
|
"rewards/margins": -0.0028153404127806425,
|
||
|
|
"rewards/rejected": -0.3849572539329529,
|
||
|
|
"step": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.94,
|
||
|
|
"learning_rate": 3.513513513513514e-08,
|
||
|
|
"logits/chosen": 0.02430087886750698,
|
||
|
|
"logits/rejected": 0.03372497111558914,
|
||
|
|
"logps/chosen": -2.9531288146972656,
|
||
|
|
"logps/rejected": -9.80555248260498,
|
||
|
|
"loss": 0.6233,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.42032957077026367,
|
||
|
|
"rewards/margins": 1.0188238620758057,
|
||
|
|
"rewards/rejected": -1.4391534328460693,
|
||
|
|
"step": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.94,
|
||
|
|
"learning_rate": 3.2432432432432436e-08,
|
||
|
|
"logits/chosen": 0.07200212776660919,
|
||
|
|
"logits/rejected": 0.13575957715511322,
|
||
|
|
"logps/chosen": -2.400723934173584,
|
||
|
|
"logps/rejected": -35.662139892578125,
|
||
|
|
"loss": 0.495,
|
||
|
|
"rewards/accuracies": 0.8999999761581421,
|
||
|
|
"rewards/chosen": -0.2884984314441681,
|
||
|
|
"rewards/margins": 3.8999791145324707,
|
||
|
|
"rewards/rejected": -4.188477516174316,
|
||
|
|
"step": 1880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.94,
|
||
|
|
"learning_rate": 2.972972972972973e-08,
|
||
|
|
"logits/chosen": -0.36185282468795776,
|
||
|
|
"logits/rejected": 0.017909474670886993,
|
||
|
|
"logps/chosen": -3.7324061393737793,
|
||
|
|
"logps/rejected": -76.72266387939453,
|
||
|
|
"loss": 0.484,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.46223974227905273,
|
||
|
|
"rewards/margins": 5.9966840744018555,
|
||
|
|
"rewards/rejected": -6.458924293518066,
|
||
|
|
"step": 1890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.95,
|
||
|
|
"learning_rate": 2.7027027027027028e-08,
|
||
|
|
"logits/chosen": 0.10856851190328598,
|
||
|
|
"logits/rejected": 0.156903475522995,
|
||
|
|
"logps/chosen": -2.646015167236328,
|
||
|
|
"logps/rejected": -21.1688175201416,
|
||
|
|
"loss": 0.6246,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.32123029232025146,
|
||
|
|
"rewards/margins": 2.1456785202026367,
|
||
|
|
"rewards/rejected": -2.4669089317321777,
|
||
|
|
"step": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.95,
|
||
|
|
"learning_rate": 2.4324324324324324e-08,
|
||
|
|
"logits/chosen": 0.06192172318696976,
|
||
|
|
"logits/rejected": 0.17663788795471191,
|
||
|
|
"logps/chosen": -2.737999677658081,
|
||
|
|
"logps/rejected": -26.416967391967773,
|
||
|
|
"loss": 0.5541,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.3163206875324249,
|
||
|
|
"rewards/margins": 2.440368175506592,
|
||
|
|
"rewards/rejected": -2.756688117980957,
|
||
|
|
"step": 1910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.96,
|
||
|
|
"learning_rate": 2.1621621621621623e-08,
|
||
|
|
"logits/chosen": -0.042785413563251495,
|
||
|
|
"logits/rejected": 0.24969927966594696,
|
||
|
|
"logps/chosen": -2.8873772621154785,
|
||
|
|
"logps/rejected": -53.33967208862305,
|
||
|
|
"loss": 0.5587,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.28660848736763,
|
||
|
|
"rewards/margins": 4.235617160797119,
|
||
|
|
"rewards/rejected": -4.522225379943848,
|
||
|
|
"step": 1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.96,
|
||
|
|
"learning_rate": 1.891891891891892e-08,
|
||
|
|
"logits/chosen": -0.05043508857488632,
|
||
|
|
"logits/rejected": -0.047407329082489014,
|
||
|
|
"logps/chosen": -2.8302414417266846,
|
||
|
|
"logps/rejected": -8.945648193359375,
|
||
|
|
"loss": 0.6245,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.37974387407302856,
|
||
|
|
"rewards/margins": 0.7309183478355408,
|
||
|
|
"rewards/rejected": -1.1106622219085693,
|
||
|
|
"step": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.97,
|
||
|
|
"learning_rate": 1.6216216216216218e-08,
|
||
|
|
"logits/chosen": 0.0068864584900438786,
|
||
|
|
"logits/rejected": 0.09314581006765366,
|
||
|
|
"logps/chosen": -3.152959108352661,
|
||
|
|
"logps/rejected": -26.90890121459961,
|
||
|
|
"loss": 0.5543,
|
||
|
|
"rewards/accuracies": 0.800000011920929,
|
||
|
|
"rewards/chosen": -0.4128968119621277,
|
||
|
|
"rewards/margins": 2.2474188804626465,
|
||
|
|
"rewards/rejected": -2.660315990447998,
|
||
|
|
"step": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.97,
|
||
|
|
"learning_rate": 1.3513513513513514e-08,
|
||
|
|
"logits/chosen": -0.013911092653870583,
|
||
|
|
"logits/rejected": 0.13355985283851624,
|
||
|
|
"logps/chosen": -4.011211395263672,
|
||
|
|
"logps/rejected": -43.98572540283203,
|
||
|
|
"loss": 0.6181,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.4239436686038971,
|
||
|
|
"rewards/margins": 3.6849803924560547,
|
||
|
|
"rewards/rejected": -4.10892391204834,
|
||
|
|
"step": 1950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.98,
|
||
|
|
"learning_rate": 1.0810810810810811e-08,
|
||
|
|
"logits/chosen": -0.06818331778049469,
|
||
|
|
"logits/rejected": -0.02771920897066593,
|
||
|
|
"logps/chosen": -2.947129964828491,
|
||
|
|
"logps/rejected": -16.071887969970703,
|
||
|
|
"loss": 0.6201,
|
||
|
|
"rewards/accuracies": 0.30000001192092896,
|
||
|
|
"rewards/chosen": -0.47470030188560486,
|
||
|
|
"rewards/margins": 1.8438775539398193,
|
||
|
|
"rewards/rejected": -2.318577766418457,
|
||
|
|
"step": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.98,
|
||
|
|
"learning_rate": 8.108108108108109e-09,
|
||
|
|
"logits/chosen": -0.07423537969589233,
|
||
|
|
"logits/rejected": -0.07442188262939453,
|
||
|
|
"logps/chosen": -3.448483943939209,
|
||
|
|
"logps/rejected": -3.465106964111328,
|
||
|
|
"loss": 0.6926,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.5680142641067505,
|
||
|
|
"rewards/margins": 0.0010964989196509123,
|
||
|
|
"rewards/rejected": -0.5691107511520386,
|
||
|
|
"step": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.99,
|
||
|
|
"learning_rate": 5.405405405405406e-09,
|
||
|
|
"logits/chosen": -0.31879547238349915,
|
||
|
|
"logits/rejected": 0.06546586751937866,
|
||
|
|
"logps/chosen": -2.377040386199951,
|
||
|
|
"logps/rejected": -101.98162078857422,
|
||
|
|
"loss": 0.4165,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -0.21218986809253693,
|
||
|
|
"rewards/margins": 10.473356246948242,
|
||
|
|
"rewards/rejected": -10.685547828674316,
|
||
|
|
"step": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.99,
|
||
|
|
"learning_rate": 2.702702702702703e-09,
|
||
|
|
"logits/chosen": 0.08084534108638763,
|
||
|
|
"logits/rejected": 0.0808691754937172,
|
||
|
|
"logps/chosen": -4.6396708488464355,
|
||
|
|
"logps/rejected": -4.617379188537598,
|
||
|
|
"loss": 0.6956,
|
||
|
|
"rewards/accuracies": 0.20000000298023224,
|
||
|
|
"rewards/chosen": -0.7069979906082153,
|
||
|
|
"rewards/margins": -0.004753425717353821,
|
||
|
|
"rewards/rejected": -0.7022445797920227,
|
||
|
|
"step": 1990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"learning_rate": 0.0,
|
||
|
|
"logits/chosen": -0.14494428038597107,
|
||
|
|
"logits/rejected": 0.04603511840105057,
|
||
|
|
"logps/chosen": -2.9900779724121094,
|
||
|
|
"logps/rejected": -54.25315475463867,
|
||
|
|
"loss": 0.4851,
|
||
|
|
"rewards/accuracies": 0.4000000059604645,
|
||
|
|
"rewards/chosen": -0.3263259530067444,
|
||
|
|
"rewards/margins": 4.676065921783447,
|
||
|
|
"rewards/rejected": -5.002391815185547,
|
||
|
|
"step": 2000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_logits/chosen": -0.10488395392894745,
|
||
|
|
"eval_logits/rejected": 0.038251034915447235,
|
||
|
|
"eval_logps/chosen": -3.3913989067077637,
|
||
|
|
"eval_logps/rejected": -30.402114868164062,
|
||
|
|
"eval_loss": 0.5553861260414124,
|
||
|
|
"eval_rewards/accuracies": 0.4000000059604645,
|
||
|
|
"eval_rewards/chosen": -0.34520336985588074,
|
||
|
|
"eval_rewards/margins": 2.5873284339904785,
|
||
|
|
"eval_rewards/rejected": -2.9325320720672607,
|
||
|
|
"eval_runtime": 43.9337,
|
||
|
|
"eval_samples_per_second": 2.276,
|
||
|
|
"eval_steps_per_second": 2.276,
|
||
|
|
"step": 2000
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 10,
|
||
|
|
"max_steps": 2000,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 500,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|