7022 lines
229 KiB
JSON
7022 lines
229 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.6543432030099787,
|
|
"eval_steps": 500,
|
|
"global_step": 500,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"logits/chosen": -1.977054476737976,
|
|
"logits/rejected": -2.017892599105835,
|
|
"logps/chosen": -169.97320556640625,
|
|
"logps/rejected": -186.7821807861328,
|
|
"loss": 0.7082,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.016062308102846146,
|
|
"rewards/margins": -0.02699420601129532,
|
|
"rewards/rejected": 0.010931899771094322,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"logits/chosen": -1.8305251598358154,
|
|
"logits/rejected": -1.8582998514175415,
|
|
"logps/chosen": -155.8516082763672,
|
|
"logps/rejected": -165.23692321777344,
|
|
"loss": 0.6929,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.0017804148374125361,
|
|
"rewards/margins": 0.004515504464507103,
|
|
"rewards/rejected": -0.0027350913733243942,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 1.5e-06,
|
|
"logits/chosen": -1.7455682754516602,
|
|
"logits/rejected": -1.7944730520248413,
|
|
"logps/chosen": -158.9869842529297,
|
|
"logps/rejected": -179.4861602783203,
|
|
"loss": 0.6886,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.00722665898501873,
|
|
"rewards/margins": 0.01148004550486803,
|
|
"rewards/rejected": -0.004253389313817024,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"logits/chosen": -1.8648240566253662,
|
|
"logits/rejected": -1.8583375215530396,
|
|
"logps/chosen": -186.27041625976562,
|
|
"logps/rejected": -174.98153686523438,
|
|
"loss": 0.6867,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.013353967107832432,
|
|
"rewards/margins": 0.014140583574771881,
|
|
"rewards/rejected": -0.0007866150699555874,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 2.5e-06,
|
|
"logits/chosen": -1.5987751483917236,
|
|
"logits/rejected": -1.602742075920105,
|
|
"logps/chosen": -166.3009796142578,
|
|
"logps/rejected": -182.16493225097656,
|
|
"loss": 0.6973,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.01031036488711834,
|
|
"rewards/margins": -0.007737827021628618,
|
|
"rewards/rejected": -0.0025725378654897213,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 3e-06,
|
|
"logits/chosen": -1.4330253601074219,
|
|
"logits/rejected": -1.4852710962295532,
|
|
"logps/chosen": -170.77926635742188,
|
|
"logps/rejected": -203.38731384277344,
|
|
"loss": 0.6967,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.0022835731506347656,
|
|
"rewards/margins": -0.005300428252667189,
|
|
"rewards/rejected": 0.003016853705048561,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 3.5000000000000004e-06,
|
|
"logits/chosen": -1.5559855699539185,
|
|
"logits/rejected": -1.5930315256118774,
|
|
"logps/chosen": -200.5897979736328,
|
|
"logps/rejected": -216.58615112304688,
|
|
"loss": 0.6832,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": 0.015020323917269707,
|
|
"rewards/margins": 0.02334442362189293,
|
|
"rewards/rejected": -0.008324098773300648,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"logits/chosen": -1.7643598318099976,
|
|
"logits/rejected": -1.7637088298797607,
|
|
"logps/chosen": -166.2308349609375,
|
|
"logps/rejected": -167.15399169921875,
|
|
"loss": 0.6973,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.03954277187585831,
|
|
"rewards/margins": -0.005299141630530357,
|
|
"rewards/rejected": -0.0342436321079731,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 4.5e-06,
|
|
"logits/chosen": -2.035048246383667,
|
|
"logits/rejected": -2.1090240478515625,
|
|
"logps/chosen": -192.5548095703125,
|
|
"logps/rejected": -188.7212371826172,
|
|
"loss": 0.6788,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.019565440714359283,
|
|
"rewards/margins": 0.030419450253248215,
|
|
"rewards/rejected": -0.010854003950953484,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 5e-06,
|
|
"logits/chosen": -1.78806471824646,
|
|
"logits/rejected": -1.771653175354004,
|
|
"logps/chosen": -169.3682403564453,
|
|
"logps/rejected": -163.67990112304688,
|
|
"loss": 0.6861,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.0037752394564449787,
|
|
"rewards/margins": 0.01528622955083847,
|
|
"rewards/rejected": -0.01906147226691246,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 5.500000000000001e-06,
|
|
"logits/chosen": -1.6899704933166504,
|
|
"logits/rejected": -1.7716753482818604,
|
|
"logps/chosen": -197.17576599121094,
|
|
"logps/rejected": -232.6337127685547,
|
|
"loss": 0.7158,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.06386594474315643,
|
|
"rewards/margins": -0.04272947832942009,
|
|
"rewards/rejected": -0.02113647386431694,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 6e-06,
|
|
"logits/chosen": -1.8938390016555786,
|
|
"logits/rejected": -1.8334659337997437,
|
|
"logps/chosen": -150.29385375976562,
|
|
"logps/rejected": -159.55947875976562,
|
|
"loss": 0.6895,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.0028306012973189354,
|
|
"rewards/margins": 0.01067290361970663,
|
|
"rewards/rejected": -0.00784230325371027,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 6.5000000000000004e-06,
|
|
"logits/chosen": -1.87659752368927,
|
|
"logits/rejected": -1.8801605701446533,
|
|
"logps/chosen": -205.460693359375,
|
|
"logps/rejected": -202.0462188720703,
|
|
"loss": 0.6984,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": 0.0009463531896471977,
|
|
"rewards/margins": -0.007184028625488281,
|
|
"rewards/rejected": 0.008130382746458054,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 7.000000000000001e-06,
|
|
"logits/chosen": -1.9238759279251099,
|
|
"logits/rejected": -2.0003201961517334,
|
|
"logps/chosen": -184.6726531982422,
|
|
"logps/rejected": -177.62509155273438,
|
|
"loss": 0.6972,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.01391973439604044,
|
|
"rewards/margins": -0.004122593905776739,
|
|
"rewards/rejected": -0.009797144681215286,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 7.5e-06,
|
|
"logits/chosen": -1.569742202758789,
|
|
"logits/rejected": -1.5442943572998047,
|
|
"logps/chosen": -171.07496643066406,
|
|
"logps/rejected": -170.74981689453125,
|
|
"loss": 0.6992,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.020387031137943268,
|
|
"rewards/margins": -0.008263109251856804,
|
|
"rewards/rejected": -0.012123920023441315,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 8.000000000000001e-06,
|
|
"logits/chosen": -1.7774536609649658,
|
|
"logits/rejected": -1.8797023296356201,
|
|
"logps/chosen": -173.0279541015625,
|
|
"logps/rejected": -187.43557739257812,
|
|
"loss": 0.6999,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.03834056854248047,
|
|
"rewards/margins": -0.01055521797388792,
|
|
"rewards/rejected": -0.027785349637269974,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 8.500000000000002e-06,
|
|
"logits/chosen": -1.7320338487625122,
|
|
"logits/rejected": -1.6945910453796387,
|
|
"logps/chosen": -185.27392578125,
|
|
"logps/rejected": -189.87738037109375,
|
|
"loss": 0.6808,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.007571219466626644,
|
|
"rewards/margins": 0.025459958240389824,
|
|
"rewards/rejected": -0.017888737842440605,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9e-06,
|
|
"logits/chosen": -1.7604248523712158,
|
|
"logits/rejected": -1.7776434421539307,
|
|
"logps/chosen": -196.05404663085938,
|
|
"logps/rejected": -190.14569091796875,
|
|
"loss": 0.6914,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.02323136478662491,
|
|
"rewards/margins": 0.00760660320520401,
|
|
"rewards/rejected": -0.03083796612918377,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.5e-06,
|
|
"logits/chosen": -1.59357488155365,
|
|
"logits/rejected": -1.5590327978134155,
|
|
"logps/chosen": -213.9495391845703,
|
|
"logps/rejected": -218.56654357910156,
|
|
"loss": 0.7222,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.05001959949731827,
|
|
"rewards/margins": -0.05468587949872017,
|
|
"rewards/rejected": 0.004666280932724476,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1e-05,
|
|
"logits/chosen": -1.752557635307312,
|
|
"logits/rejected": -1.7027242183685303,
|
|
"logps/chosen": -213.31336975097656,
|
|
"logps/rejected": -204.09646606445312,
|
|
"loss": 0.6887,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.017501091584563255,
|
|
"rewards/margins": 0.010245682671666145,
|
|
"rewards/rejected": -0.027746770530939102,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.05e-05,
|
|
"logits/chosen": -1.9553875923156738,
|
|
"logits/rejected": -1.9184911251068115,
|
|
"logps/chosen": -175.35333251953125,
|
|
"logps/rejected": -180.54550170898438,
|
|
"loss": 0.655,
|
|
"rewards/accuracies": 0.9375,
|
|
"rewards/chosen": 0.021092725917696953,
|
|
"rewards/margins": 0.08075069636106491,
|
|
"rewards/rejected": -0.059657976031303406,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.1000000000000001e-05,
|
|
"logits/chosen": -1.8159900903701782,
|
|
"logits/rejected": -1.771599531173706,
|
|
"logps/chosen": -185.33059692382812,
|
|
"logps/rejected": -209.6474609375,
|
|
"loss": 0.6913,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.017464350908994675,
|
|
"rewards/margins": 0.006189251318573952,
|
|
"rewards/rejected": -0.023653600364923477,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.1500000000000002e-05,
|
|
"logits/chosen": -1.8995972871780396,
|
|
"logits/rejected": -1.9293156862258911,
|
|
"logps/chosen": -178.39755249023438,
|
|
"logps/rejected": -211.63937377929688,
|
|
"loss": 0.7191,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.05782761424779892,
|
|
"rewards/margins": -0.04498009383678436,
|
|
"rewards/rejected": -0.012847519479691982,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.2e-05,
|
|
"logits/chosen": -1.8924778699874878,
|
|
"logits/rejected": -1.8979182243347168,
|
|
"logps/chosen": -167.22109985351562,
|
|
"logps/rejected": -176.33663940429688,
|
|
"loss": 0.7028,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.02767309918999672,
|
|
"rewards/margins": -0.016704557463526726,
|
|
"rewards/rejected": -0.010968542657792568,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.25e-05,
|
|
"logits/chosen": -1.840967059135437,
|
|
"logits/rejected": -1.797666311264038,
|
|
"logps/chosen": -175.57766723632812,
|
|
"logps/rejected": -148.11917114257812,
|
|
"loss": 0.6967,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.01812169887125492,
|
|
"rewards/margins": -0.003334569279104471,
|
|
"rewards/rejected": -0.014787126332521439,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.3000000000000001e-05,
|
|
"logits/chosen": -1.8270690441131592,
|
|
"logits/rejected": -1.7811157703399658,
|
|
"logps/chosen": -165.993896484375,
|
|
"logps/rejected": -165.7574005126953,
|
|
"loss": 0.7151,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.06210968643426895,
|
|
"rewards/margins": -0.040364596992731094,
|
|
"rewards/rejected": -0.02174508571624756,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.3500000000000001e-05,
|
|
"logits/chosen": -1.877970576286316,
|
|
"logits/rejected": -1.9722176790237427,
|
|
"logps/chosen": -148.4381103515625,
|
|
"logps/rejected": -173.12579345703125,
|
|
"loss": 0.6835,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.022617867216467857,
|
|
"rewards/margins": 0.021304797381162643,
|
|
"rewards/rejected": -0.04392266273498535,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.4000000000000001e-05,
|
|
"logits/chosen": -1.922934889793396,
|
|
"logits/rejected": -1.9657589197158813,
|
|
"logps/chosen": -170.66554260253906,
|
|
"logps/rejected": -176.7419891357422,
|
|
"loss": 0.7316,
|
|
"rewards/accuracies": 0.1875,
|
|
"rewards/chosen": -0.10023985058069229,
|
|
"rewards/margins": -0.0725136250257492,
|
|
"rewards/rejected": -0.027726221829652786,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.45e-05,
|
|
"logits/chosen": -1.9069832563400269,
|
|
"logits/rejected": -1.8908956050872803,
|
|
"logps/chosen": -193.16102600097656,
|
|
"logps/rejected": -193.22003173828125,
|
|
"loss": 0.6847,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": 0.005608892068266869,
|
|
"rewards/margins": 0.020983649417757988,
|
|
"rewards/rejected": -0.01537475548684597,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.5e-05,
|
|
"logits/chosen": -1.7016417980194092,
|
|
"logits/rejected": -1.7221649885177612,
|
|
"logps/chosen": -188.40786743164062,
|
|
"logps/rejected": -175.92909240722656,
|
|
"loss": 0.7016,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.09070225059986115,
|
|
"rewards/margins": -0.013078359887003899,
|
|
"rewards/rejected": -0.0776238888502121,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.55e-05,
|
|
"logits/chosen": -1.9492610692977905,
|
|
"logits/rejected": -1.9104335308074951,
|
|
"logps/chosen": -180.67147827148438,
|
|
"logps/rejected": -184.8843994140625,
|
|
"loss": 0.6708,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.019913675263524055,
|
|
"rewards/margins": 0.049428701400756836,
|
|
"rewards/rejected": -0.02951502799987793,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"logits/chosen": -1.8524138927459717,
|
|
"logits/rejected": -1.9383589029312134,
|
|
"logps/chosen": -159.52560424804688,
|
|
"logps/rejected": -170.27255249023438,
|
|
"loss": 0.7146,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.03338008001446724,
|
|
"rewards/margins": -0.03860168159008026,
|
|
"rewards/rejected": 0.00522160530090332,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.65e-05,
|
|
"logits/chosen": -1.7788478136062622,
|
|
"logits/rejected": -1.835086703300476,
|
|
"logps/chosen": -180.18177795410156,
|
|
"logps/rejected": -206.07110595703125,
|
|
"loss": 0.7059,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.05509199947118759,
|
|
"rewards/margins": -0.023075008764863014,
|
|
"rewards/rejected": -0.03201699256896973,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.7000000000000003e-05,
|
|
"logits/chosen": -1.8998254537582397,
|
|
"logits/rejected": -1.9138494729995728,
|
|
"logps/chosen": -194.03167724609375,
|
|
"logps/rejected": -203.6524658203125,
|
|
"loss": 0.6915,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.05050516128540039,
|
|
"rewards/margins": 0.007271335460245609,
|
|
"rewards/rejected": -0.057776499539613724,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.75e-05,
|
|
"logits/chosen": -1.9758315086364746,
|
|
"logits/rejected": -2.0455610752105713,
|
|
"logps/chosen": -153.28883361816406,
|
|
"logps/rejected": -162.89920043945312,
|
|
"loss": 0.7122,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.04079794883728027,
|
|
"rewards/margins": -0.034585997462272644,
|
|
"rewards/rejected": -0.006211946718394756,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.8e-05,
|
|
"logits/chosen": -1.597517967224121,
|
|
"logits/rejected": -1.631009817123413,
|
|
"logps/chosen": -160.9354248046875,
|
|
"logps/rejected": -168.85618591308594,
|
|
"loss": 0.6918,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.07598643749952316,
|
|
"rewards/margins": 0.005950784310698509,
|
|
"rewards/rejected": -0.08193722367286682,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.85e-05,
|
|
"logits/chosen": -1.7368295192718506,
|
|
"logits/rejected": -1.7420881986618042,
|
|
"logps/chosen": -169.4617462158203,
|
|
"logps/rejected": -184.20599365234375,
|
|
"loss": 0.6948,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.0290069580078125,
|
|
"rewards/margins": -0.0007306085899472237,
|
|
"rewards/rejected": -0.0282763484865427,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9e-05,
|
|
"logits/chosen": -1.687361717224121,
|
|
"logits/rejected": -1.7176148891448975,
|
|
"logps/chosen": -174.06297302246094,
|
|
"logps/rejected": -192.69715881347656,
|
|
"loss": 0.6973,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.09517102688550949,
|
|
"rewards/margins": -0.0029970891773700714,
|
|
"rewards/rejected": -0.09217393398284912,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 1.9500000000000003e-05,
|
|
"logits/chosen": -1.8842296600341797,
|
|
"logits/rejected": -1.9039793014526367,
|
|
"logps/chosen": -162.37741088867188,
|
|
"logps/rejected": -176.11697387695312,
|
|
"loss": 0.7109,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.13322168588638306,
|
|
"rewards/margins": -0.03214216232299805,
|
|
"rewards/rejected": -0.10107951611280441,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 2e-05,
|
|
"logits/chosen": -1.6896815299987793,
|
|
"logits/rejected": -1.7052747011184692,
|
|
"logps/chosen": -155.33111572265625,
|
|
"logps/rejected": -159.064208984375,
|
|
"loss": 0.6846,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.04464542865753174,
|
|
"rewards/margins": 0.025189755484461784,
|
|
"rewards/rejected": -0.06983518600463867,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 2.05e-05,
|
|
"logits/chosen": -1.7584484815597534,
|
|
"logits/rejected": -1.6827234029769897,
|
|
"logps/chosen": -161.47369384765625,
|
|
"logps/rejected": -166.0779571533203,
|
|
"loss": 0.6856,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.07150936126708984,
|
|
"rewards/margins": 0.019259024411439896,
|
|
"rewards/rejected": -0.09076839685440063,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 2.1e-05,
|
|
"logits/chosen": -2.0230112075805664,
|
|
"logits/rejected": -1.9628382921218872,
|
|
"logps/chosen": -160.32073974609375,
|
|
"logps/rejected": -182.98248291015625,
|
|
"loss": 0.6857,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.09749487042427063,
|
|
"rewards/margins": 0.01751875691115856,
|
|
"rewards/rejected": -0.11501362919807434,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.15e-05,
|
|
"logits/chosen": -1.8469973802566528,
|
|
"logits/rejected": -1.814754843711853,
|
|
"logps/chosen": -194.247314453125,
|
|
"logps/rejected": -204.80491638183594,
|
|
"loss": 0.7142,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.15520897507667542,
|
|
"rewards/margins": -0.03922419250011444,
|
|
"rewards/rejected": -0.11598476767539978,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.2000000000000003e-05,
|
|
"logits/chosen": -1.9133589267730713,
|
|
"logits/rejected": -1.9711928367614746,
|
|
"logps/chosen": -186.54978942871094,
|
|
"logps/rejected": -189.23045349121094,
|
|
"loss": 0.7238,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.1358685940504074,
|
|
"rewards/margins": -0.05334620922803879,
|
|
"rewards/rejected": -0.08252239227294922,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.25e-05,
|
|
"logits/chosen": -1.6977447271347046,
|
|
"logits/rejected": -1.7883001565933228,
|
|
"logps/chosen": -178.20858764648438,
|
|
"logps/rejected": -201.03770446777344,
|
|
"loss": 0.6465,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.053537700325250626,
|
|
"rewards/margins": 0.1018117368221283,
|
|
"rewards/rejected": -0.15534944832324982,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.3000000000000003e-05,
|
|
"logits/chosen": -1.9271280765533447,
|
|
"logits/rejected": -1.9463679790496826,
|
|
"logps/chosen": -176.5238800048828,
|
|
"logps/rejected": -166.8386993408203,
|
|
"loss": 0.7159,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.09149947762489319,
|
|
"rewards/margins": -0.04024248570203781,
|
|
"rewards/rejected": -0.05125699192285538,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.35e-05,
|
|
"logits/chosen": -1.7680522203445435,
|
|
"logits/rejected": -1.668277621269226,
|
|
"logps/chosen": -189.94366455078125,
|
|
"logps/rejected": -177.8812255859375,
|
|
"loss": 0.6647,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.08811293542385101,
|
|
"rewards/margins": 0.06799888610839844,
|
|
"rewards/rejected": -0.15611180663108826,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.4e-05,
|
|
"logits/chosen": -1.5737125873565674,
|
|
"logits/rejected": -1.626237154006958,
|
|
"logps/chosen": -177.7281036376953,
|
|
"logps/rejected": -185.34068298339844,
|
|
"loss": 0.7038,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.12158739566802979,
|
|
"rewards/margins": -0.004673934541642666,
|
|
"rewards/rejected": -0.11691347509622574,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.45e-05,
|
|
"logits/chosen": -1.487099528312683,
|
|
"logits/rejected": -1.5200517177581787,
|
|
"logps/chosen": -152.00973510742188,
|
|
"logps/rejected": -163.13150024414062,
|
|
"loss": 0.7211,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.12497053295373917,
|
|
"rewards/margins": -0.05033881962299347,
|
|
"rewards/rejected": -0.0746317207813263,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.5e-05,
|
|
"logits/chosen": -1.751338243484497,
|
|
"logits/rejected": -1.7326526641845703,
|
|
"logps/chosen": -180.03758239746094,
|
|
"logps/rejected": -191.03634643554688,
|
|
"loss": 0.7029,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.12404017895460129,
|
|
"rewards/margins": -0.013475272804498672,
|
|
"rewards/rejected": -0.11056490242481232,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.5500000000000003e-05,
|
|
"logits/chosen": -1.9426243305206299,
|
|
"logits/rejected": -1.9601188898086548,
|
|
"logps/chosen": -158.4928741455078,
|
|
"logps/rejected": -164.06317138671875,
|
|
"loss": 0.7142,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.12027917802333832,
|
|
"rewards/margins": -0.028645988553762436,
|
|
"rewards/rejected": -0.09163318574428558,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.6000000000000002e-05,
|
|
"logits/chosen": -1.5965681076049805,
|
|
"logits/rejected": -1.597716212272644,
|
|
"logps/chosen": -195.06454467773438,
|
|
"logps/rejected": -193.9747314453125,
|
|
"loss": 0.6905,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.14499236643314362,
|
|
"rewards/margins": 0.02140347845852375,
|
|
"rewards/rejected": -0.16639582812786102,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.6500000000000004e-05,
|
|
"logits/chosen": -1.9194509983062744,
|
|
"logits/rejected": -1.883784294128418,
|
|
"logps/chosen": -161.39295959472656,
|
|
"logps/rejected": -167.61610412597656,
|
|
"loss": 0.6965,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.11441681534051895,
|
|
"rewards/margins": -0.00019459612667560577,
|
|
"rewards/rejected": -0.1142222210764885,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.7000000000000002e-05,
|
|
"logits/chosen": -1.7368502616882324,
|
|
"logits/rejected": -1.7216427326202393,
|
|
"logps/chosen": -205.18130493164062,
|
|
"logps/rejected": -191.90237426757812,
|
|
"loss": 0.66,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.1574774831533432,
|
|
"rewards/margins": 0.07581701874732971,
|
|
"rewards/rejected": -0.23329448699951172,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.7500000000000004e-05,
|
|
"logits/chosen": -2.0303847789764404,
|
|
"logits/rejected": -2.0456559658050537,
|
|
"logps/chosen": -169.33453369140625,
|
|
"logps/rejected": -166.40707397460938,
|
|
"loss": 0.6479,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.14774441719055176,
|
|
"rewards/margins": 0.1020236536860466,
|
|
"rewards/rejected": -0.24976806342601776,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.8000000000000003e-05,
|
|
"logits/chosen": -2.035883903503418,
|
|
"logits/rejected": -1.9933511018753052,
|
|
"logps/chosen": -190.82186889648438,
|
|
"logps/rejected": -190.29147338867188,
|
|
"loss": 0.7023,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.11226066946983337,
|
|
"rewards/margins": -0.01058507151901722,
|
|
"rewards/rejected": -0.1016756072640419,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 2.8499999999999998e-05,
|
|
"logits/chosen": -1.9773008823394775,
|
|
"logits/rejected": -1.935595989227295,
|
|
"logps/chosen": -180.8785400390625,
|
|
"logps/rejected": -193.48155212402344,
|
|
"loss": 0.6739,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.2835652828216553,
|
|
"rewards/margins": 0.048453718423843384,
|
|
"rewards/rejected": -0.33201897144317627,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 2.9e-05,
|
|
"logits/chosen": -1.681299090385437,
|
|
"logits/rejected": -1.6699227094650269,
|
|
"logps/chosen": -165.8355255126953,
|
|
"logps/rejected": -191.2743377685547,
|
|
"loss": 0.7356,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.17172232270240784,
|
|
"rewards/margins": -0.07655029743909836,
|
|
"rewards/rejected": -0.09517201036214828,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 2.95e-05,
|
|
"logits/chosen": -1.71488356590271,
|
|
"logits/rejected": -1.7937383651733398,
|
|
"logps/chosen": -177.51776123046875,
|
|
"logps/rejected": -188.60760498046875,
|
|
"loss": 0.6358,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.1568491905927658,
|
|
"rewards/margins": 0.12602630257606506,
|
|
"rewards/rejected": -0.2828754782676697,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 3e-05,
|
|
"logits/chosen": -2.0625619888305664,
|
|
"logits/rejected": -2.1217410564422607,
|
|
"logps/chosen": -190.26431274414062,
|
|
"logps/rejected": -215.28648376464844,
|
|
"loss": 0.7777,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3512558341026306,
|
|
"rewards/margins": -0.13770633935928345,
|
|
"rewards/rejected": -0.21354950964450836,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 3.05e-05,
|
|
"logits/chosen": -1.5000535249710083,
|
|
"logits/rejected": -1.508121371269226,
|
|
"logps/chosen": -167.42588806152344,
|
|
"logps/rejected": -151.4751739501953,
|
|
"loss": 0.692,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.15655343234539032,
|
|
"rewards/margins": 0.01617264747619629,
|
|
"rewards/rejected": -0.1727260947227478,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 3.1e-05,
|
|
"logits/chosen": -1.628549337387085,
|
|
"logits/rejected": -1.7045596837997437,
|
|
"logps/chosen": -206.2198944091797,
|
|
"logps/rejected": -188.40350341796875,
|
|
"loss": 0.7688,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2638259530067444,
|
|
"rewards/margins": -0.12546539306640625,
|
|
"rewards/rejected": -0.13836055994033813,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 3.15e-05,
|
|
"logits/chosen": -1.6634955406188965,
|
|
"logits/rejected": -1.641862154006958,
|
|
"logps/chosen": -173.11769104003906,
|
|
"logps/rejected": -179.14816284179688,
|
|
"loss": 0.6446,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.1516265571117401,
|
|
"rewards/margins": 0.10875654965639114,
|
|
"rewards/rejected": -0.26038309931755066,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 3.2000000000000005e-05,
|
|
"logits/chosen": -1.90239417552948,
|
|
"logits/rejected": -1.846576452255249,
|
|
"logps/chosen": -185.16522216796875,
|
|
"logps/rejected": -187.98654174804688,
|
|
"loss": 0.696,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.26641684770584106,
|
|
"rewards/margins": 0.004999059252440929,
|
|
"rewards/rejected": -0.27141591906547546,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.2500000000000004e-05,
|
|
"logits/chosen": -1.9015129804611206,
|
|
"logits/rejected": -1.9043163061141968,
|
|
"logps/chosen": -166.89393615722656,
|
|
"logps/rejected": -185.8280487060547,
|
|
"loss": 0.7096,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3341715931892395,
|
|
"rewards/margins": -0.005350928753614426,
|
|
"rewards/rejected": -0.32882067561149597,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.3e-05,
|
|
"logits/chosen": -1.7935004234313965,
|
|
"logits/rejected": -1.8295319080352783,
|
|
"logps/chosen": -145.04159545898438,
|
|
"logps/rejected": -152.42388916015625,
|
|
"loss": 0.7515,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.3907526433467865,
|
|
"rewards/margins": -0.10265941917896271,
|
|
"rewards/rejected": -0.2880932092666626,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.35e-05,
|
|
"logits/chosen": -1.6959490776062012,
|
|
"logits/rejected": -1.680245280265808,
|
|
"logps/chosen": -165.76852416992188,
|
|
"logps/rejected": -161.328857421875,
|
|
"loss": 0.7212,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3491905629634857,
|
|
"rewards/margins": -0.03104216605424881,
|
|
"rewards/rejected": -0.3181484043598175,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.4000000000000007e-05,
|
|
"logits/chosen": -1.5831743478775024,
|
|
"logits/rejected": -1.5631486177444458,
|
|
"logps/chosen": -182.02952575683594,
|
|
"logps/rejected": -183.06907653808594,
|
|
"loss": 0.6674,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.22372691333293915,
|
|
"rewards/margins": 0.07622986286878586,
|
|
"rewards/rejected": -0.2999567687511444,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.45e-05,
|
|
"logits/chosen": -1.7518221139907837,
|
|
"logits/rejected": -1.7015327215194702,
|
|
"logps/chosen": -181.39324951171875,
|
|
"logps/rejected": -181.88720703125,
|
|
"loss": 0.6668,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.3097344934940338,
|
|
"rewards/margins": 0.0728234276175499,
|
|
"rewards/rejected": -0.3825579285621643,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.5e-05,
|
|
"logits/chosen": -1.8494486808776855,
|
|
"logits/rejected": -1.80762779712677,
|
|
"logps/chosen": -181.36915588378906,
|
|
"logps/rejected": -186.561279296875,
|
|
"loss": 0.7449,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.47243672609329224,
|
|
"rewards/margins": -0.07020688056945801,
|
|
"rewards/rejected": -0.40222981572151184,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.55e-05,
|
|
"logits/chosen": -1.701026201248169,
|
|
"logits/rejected": -1.6742680072784424,
|
|
"logps/chosen": -157.6632843017578,
|
|
"logps/rejected": -161.87045288085938,
|
|
"loss": 0.6544,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.27613064646720886,
|
|
"rewards/margins": 0.10120917111635208,
|
|
"rewards/rejected": -0.37733981013298035,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 3.6e-05,
|
|
"logits/chosen": -1.7675864696502686,
|
|
"logits/rejected": -1.8016642332077026,
|
|
"logps/chosen": -185.2778778076172,
|
|
"logps/rejected": -212.45452880859375,
|
|
"loss": 0.6621,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.3811972141265869,
|
|
"rewards/margins": 0.07623375207185745,
|
|
"rewards/rejected": -0.45743098855018616,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.65e-05,
|
|
"logits/chosen": -1.8149230480194092,
|
|
"logits/rejected": -1.8410683870315552,
|
|
"logps/chosen": -167.20913696289062,
|
|
"logps/rejected": -197.37989807128906,
|
|
"loss": 0.7319,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.48692482709884644,
|
|
"rewards/margins": -0.050217654556035995,
|
|
"rewards/rejected": -0.43670713901519775,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.7e-05,
|
|
"logits/chosen": -1.8524658679962158,
|
|
"logits/rejected": -1.8538126945495605,
|
|
"logps/chosen": -164.4295654296875,
|
|
"logps/rejected": -180.02191162109375,
|
|
"loss": 0.7363,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.4177883267402649,
|
|
"rewards/margins": -0.050662752240896225,
|
|
"rewards/rejected": -0.36712557077407837,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.7500000000000003e-05,
|
|
"logits/chosen": -1.2946817874908447,
|
|
"logits/rejected": -1.3125836849212646,
|
|
"logps/chosen": -225.81192016601562,
|
|
"logps/rejected": -252.005859375,
|
|
"loss": 0.7049,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.37055703997612,
|
|
"rewards/margins": 0.016286462545394897,
|
|
"rewards/rejected": -0.3868435025215149,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.8e-05,
|
|
"logits/chosen": -1.7864320278167725,
|
|
"logits/rejected": -1.8551700115203857,
|
|
"logps/chosen": -176.35296630859375,
|
|
"logps/rejected": -182.03231811523438,
|
|
"loss": 0.7029,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.49798864126205444,
|
|
"rewards/margins": 0.02277611568570137,
|
|
"rewards/rejected": -0.5207647681236267,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.85e-05,
|
|
"logits/chosen": -1.7414928674697876,
|
|
"logits/rejected": -1.7363300323486328,
|
|
"logps/chosen": -191.74154663085938,
|
|
"logps/rejected": -199.81724548339844,
|
|
"loss": 0.6928,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.4908958375453949,
|
|
"rewards/margins": 0.02561158686876297,
|
|
"rewards/rejected": -0.5165074467658997,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.9000000000000006e-05,
|
|
"logits/chosen": -1.835726261138916,
|
|
"logits/rejected": -1.840294599533081,
|
|
"logps/chosen": -191.46029663085938,
|
|
"logps/rejected": -168.4354248046875,
|
|
"loss": 0.7986,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.5988375544548035,
|
|
"rewards/margins": -0.1457357108592987,
|
|
"rewards/rejected": -0.45310184359550476,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 3.9500000000000005e-05,
|
|
"logits/chosen": -1.6384897232055664,
|
|
"logits/rejected": -1.5781760215759277,
|
|
"logps/chosen": -217.35772705078125,
|
|
"logps/rejected": -217.51126098632812,
|
|
"loss": 0.7354,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6283512115478516,
|
|
"rewards/margins": -0.027955979108810425,
|
|
"rewards/rejected": -0.6003952026367188,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 4e-05,
|
|
"logits/chosen": -1.7660375833511353,
|
|
"logits/rejected": -1.7639034986495972,
|
|
"logps/chosen": -152.1668243408203,
|
|
"logps/rejected": -141.91021728515625,
|
|
"loss": 0.7167,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.5395210385322571,
|
|
"rewards/margins": -0.012110946699976921,
|
|
"rewards/rejected": -0.527410089969635,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.05e-05,
|
|
"logits/chosen": -1.5262843370437622,
|
|
"logits/rejected": -1.534919261932373,
|
|
"logps/chosen": -182.58859252929688,
|
|
"logps/rejected": -200.82281494140625,
|
|
"loss": 0.6912,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6189998388290405,
|
|
"rewards/margins": 0.02953934296965599,
|
|
"rewards/rejected": -0.6485391855239868,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.1e-05,
|
|
"logits/chosen": -1.6617029905319214,
|
|
"logits/rejected": -1.6961578130722046,
|
|
"logps/chosen": -177.42286682128906,
|
|
"logps/rejected": -168.7061767578125,
|
|
"loss": 0.7955,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7245805859565735,
|
|
"rewards/margins": -0.15311545133590698,
|
|
"rewards/rejected": -0.5714651942253113,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.15e-05,
|
|
"logits/chosen": -1.6676827669143677,
|
|
"logits/rejected": -1.5490450859069824,
|
|
"logps/chosen": -171.3192596435547,
|
|
"logps/rejected": -173.60470581054688,
|
|
"loss": 0.7403,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.5414891242980957,
|
|
"rewards/margins": -0.05256550386548042,
|
|
"rewards/rejected": -0.4889236092567444,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.2e-05,
|
|
"logits/chosen": -1.8843668699264526,
|
|
"logits/rejected": -1.9513225555419922,
|
|
"logps/chosen": -172.60548400878906,
|
|
"logps/rejected": -164.8987579345703,
|
|
"loss": 0.7633,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.5382730960845947,
|
|
"rewards/margins": -0.10875138640403748,
|
|
"rewards/rejected": -0.42952167987823486,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.25e-05,
|
|
"logits/chosen": -1.9940603971481323,
|
|
"logits/rejected": -1.9973390102386475,
|
|
"logps/chosen": -161.12863159179688,
|
|
"logps/rejected": -164.32958984375,
|
|
"loss": 0.7724,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5231802463531494,
|
|
"rewards/margins": -0.11174039542675018,
|
|
"rewards/rejected": -0.41143983602523804,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.3e-05,
|
|
"logits/chosen": -1.8103983402252197,
|
|
"logits/rejected": -1.7585985660552979,
|
|
"logps/chosen": -195.51132202148438,
|
|
"logps/rejected": -189.4246826171875,
|
|
"loss": 0.747,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7077435255050659,
|
|
"rewards/margins": -0.08058477193117142,
|
|
"rewards/rejected": -0.6271587610244751,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.35e-05,
|
|
"logits/chosen": -1.9289149045944214,
|
|
"logits/rejected": -1.982129693031311,
|
|
"logps/chosen": -170.78253173828125,
|
|
"logps/rejected": -172.57882690429688,
|
|
"loss": 0.8229,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.6636737585067749,
|
|
"rewards/margins": -0.2039366364479065,
|
|
"rewards/rejected": -0.4597371816635132,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.4000000000000006e-05,
|
|
"logits/chosen": -1.875556230545044,
|
|
"logits/rejected": -1.8859914541244507,
|
|
"logps/chosen": -160.60577392578125,
|
|
"logps/rejected": -171.32774353027344,
|
|
"loss": 0.6905,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.3266763389110565,
|
|
"rewards/margins": 0.03434550017118454,
|
|
"rewards/rejected": -0.36102184653282166,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.4500000000000004e-05,
|
|
"logits/chosen": -1.7824954986572266,
|
|
"logits/rejected": -1.7617722749710083,
|
|
"logps/chosen": -177.59042358398438,
|
|
"logps/rejected": -200.9052276611328,
|
|
"loss": 0.6799,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.4084916114807129,
|
|
"rewards/margins": 0.0701964944601059,
|
|
"rewards/rejected": -0.47868812084198,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.5e-05,
|
|
"logits/chosen": -1.7419114112854004,
|
|
"logits/rejected": -1.7621021270751953,
|
|
"logps/chosen": -185.32742309570312,
|
|
"logps/rejected": -171.69085693359375,
|
|
"loss": 0.8305,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.3024221956729889,
|
|
"rewards/margins": -0.20739878714084625,
|
|
"rewards/rejected": -0.09502339363098145,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.55e-05,
|
|
"logits/chosen": -1.83467435836792,
|
|
"logits/rejected": -1.8328973054885864,
|
|
"logps/chosen": -207.63388061523438,
|
|
"logps/rejected": -210.3101043701172,
|
|
"loss": 0.6628,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.3415074646472931,
|
|
"rewards/margins": 0.07163538783788681,
|
|
"rewards/rejected": -0.4131428897380829,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.600000000000001e-05,
|
|
"logits/chosen": -1.6864506006240845,
|
|
"logits/rejected": -1.7266168594360352,
|
|
"logps/chosen": -180.49522399902344,
|
|
"logps/rejected": -198.40599060058594,
|
|
"loss": 0.6523,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.27834925055503845,
|
|
"rewards/margins": 0.10665541142225266,
|
|
"rewards/rejected": -0.3850046396255493,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.6500000000000005e-05,
|
|
"logits/chosen": -1.4600155353546143,
|
|
"logits/rejected": -1.4545408487319946,
|
|
"logps/chosen": -191.01947021484375,
|
|
"logps/rejected": -180.70298767089844,
|
|
"loss": 0.7165,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.1802830994129181,
|
|
"rewards/margins": -0.027431445196270943,
|
|
"rewards/rejected": -0.1528516411781311,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.7e-05,
|
|
"logits/chosen": -1.8134263753890991,
|
|
"logits/rejected": -1.8928412199020386,
|
|
"logps/chosen": -184.84503173828125,
|
|
"logps/rejected": -193.90377807617188,
|
|
"loss": 0.6644,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.3032827377319336,
|
|
"rewards/margins": 0.0916454941034317,
|
|
"rewards/rejected": -0.3949282467365265,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.75e-05,
|
|
"logits/chosen": -1.8253490924835205,
|
|
"logits/rejected": -1.810834527015686,
|
|
"logps/chosen": -208.2128448486328,
|
|
"logps/rejected": -175.4803009033203,
|
|
"loss": 0.7444,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.5496629476547241,
|
|
"rewards/margins": -0.08057989180088043,
|
|
"rewards/rejected": -0.4690830111503601,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.8e-05,
|
|
"logits/chosen": -1.908506989479065,
|
|
"logits/rejected": -1.9602031707763672,
|
|
"logps/chosen": -184.815185546875,
|
|
"logps/rejected": -176.07138061523438,
|
|
"loss": 0.6982,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3538370728492737,
|
|
"rewards/margins": 0.013028910383582115,
|
|
"rewards/rejected": -0.36686599254608154,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.85e-05,
|
|
"logits/chosen": -1.7022223472595215,
|
|
"logits/rejected": -1.6424753665924072,
|
|
"logps/chosen": -198.0614776611328,
|
|
"logps/rejected": -215.3161163330078,
|
|
"loss": 0.7737,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.2290499359369278,
|
|
"rewards/margins": -0.12730106711387634,
|
|
"rewards/rejected": -0.10174884647130966,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.9e-05,
|
|
"logits/chosen": -2.018606662750244,
|
|
"logits/rejected": -2.027151584625244,
|
|
"logps/chosen": -167.92147827148438,
|
|
"logps/rejected": -166.90982055664062,
|
|
"loss": 0.6961,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.3297194838523865,
|
|
"rewards/margins": 0.02775608003139496,
|
|
"rewards/rejected": -0.35747551918029785,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.9500000000000004e-05,
|
|
"logits/chosen": -1.7240030765533447,
|
|
"logits/rejected": -1.7241712808609009,
|
|
"logps/chosen": -180.00389099121094,
|
|
"logps/rejected": -189.3558349609375,
|
|
"loss": 0.7948,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.4574447572231293,
|
|
"rewards/margins": -0.12154103070497513,
|
|
"rewards/rejected": -0.33590370416641235,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 5e-05,
|
|
"logits/chosen": -1.7809938192367554,
|
|
"logits/rejected": -1.9036985635757446,
|
|
"logps/chosen": -157.1652069091797,
|
|
"logps/rejected": -173.80288696289062,
|
|
"loss": 0.6839,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.2328178882598877,
|
|
"rewards/margins": 0.05165515094995499,
|
|
"rewards/rejected": -0.2844730615615845,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.999997432392803e-05,
|
|
"logits/chosen": -1.9480023384094238,
|
|
"logits/rejected": -1.9346106052398682,
|
|
"logps/chosen": -197.60128784179688,
|
|
"logps/rejected": -193.88124084472656,
|
|
"loss": 0.6427,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.26800671219825745,
|
|
"rewards/margins": 0.13769717514514923,
|
|
"rewards/rejected": -0.40570390224456787,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.9999897295764844e-05,
|
|
"logits/chosen": -2.048476457595825,
|
|
"logits/rejected": -2.0353472232818604,
|
|
"logps/chosen": -180.3016357421875,
|
|
"logps/rejected": -180.51600646972656,
|
|
"loss": 0.6366,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.35791218280792236,
|
|
"rewards/margins": 0.15290379524230957,
|
|
"rewards/rejected": -0.5108159780502319,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.9999768915668665e-05,
|
|
"logits/chosen": -1.9134316444396973,
|
|
"logits/rejected": -1.8900353908538818,
|
|
"logps/chosen": -152.71189880371094,
|
|
"logps/rejected": -153.96690368652344,
|
|
"loss": 0.6941,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.18337132036685944,
|
|
"rewards/margins": 0.03930587321519852,
|
|
"rewards/rejected": -0.22267718613147736,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.999958918390321e-05,
|
|
"logits/chosen": -1.8933653831481934,
|
|
"logits/rejected": -1.8493753671646118,
|
|
"logps/chosen": -188.6655731201172,
|
|
"logps/rejected": -183.1341552734375,
|
|
"loss": 0.7168,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.38441047072410583,
|
|
"rewards/margins": -0.010426240041851997,
|
|
"rewards/rejected": -0.3739842474460602,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.999935810083766e-05,
|
|
"logits/chosen": -1.7264684438705444,
|
|
"logits/rejected": -1.694319486618042,
|
|
"logps/chosen": -156.22084045410156,
|
|
"logps/rejected": -152.80894470214844,
|
|
"loss": 0.6763,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.28453487157821655,
|
|
"rewards/margins": 0.052657999098300934,
|
|
"rewards/rejected": -0.3371928334236145,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.999907566694667e-05,
|
|
"logits/chosen": -1.8885689973831177,
|
|
"logits/rejected": -1.9364815950393677,
|
|
"logps/chosen": -167.39117431640625,
|
|
"logps/rejected": -191.052001953125,
|
|
"loss": 0.6963,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.21066321432590485,
|
|
"rewards/margins": 0.03920959681272507,
|
|
"rewards/rejected": -0.2498728483915329,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.9998741882810384e-05,
|
|
"logits/chosen": -1.7698872089385986,
|
|
"logits/rejected": -1.7441281080245972,
|
|
"logps/chosen": -178.68572998046875,
|
|
"logps/rejected": -174.1964569091797,
|
|
"loss": 0.7459,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.24953651428222656,
|
|
"rewards/margins": -0.08057431131601334,
|
|
"rewards/rejected": -0.16896219551563263,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.999835674911443e-05,
|
|
"logits/chosen": -1.812888741493225,
|
|
"logits/rejected": -1.7789666652679443,
|
|
"logps/chosen": -228.23631286621094,
|
|
"logps/rejected": -203.85357666015625,
|
|
"loss": 0.6838,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.10872795432806015,
|
|
"rewards/margins": 0.05488254129886627,
|
|
"rewards/rejected": -0.16361048817634583,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.999792026664991e-05,
|
|
"logits/chosen": -1.6739952564239502,
|
|
"logits/rejected": -1.6701843738555908,
|
|
"logps/chosen": -203.80810546875,
|
|
"logps/rejected": -211.82334899902344,
|
|
"loss": 0.6853,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3305152356624603,
|
|
"rewards/margins": 0.05953298509120941,
|
|
"rewards/rejected": -0.39004823565483093,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.9997432436313384e-05,
|
|
"logits/chosen": -1.6255407333374023,
|
|
"logits/rejected": -1.586310625076294,
|
|
"logps/chosen": -165.31040954589844,
|
|
"logps/rejected": -186.821044921875,
|
|
"loss": 0.646,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.3459918200969696,
|
|
"rewards/margins": 0.1395949125289917,
|
|
"rewards/rejected": -0.4855867028236389,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.99968932591069e-05,
|
|
"logits/chosen": -1.8984012603759766,
|
|
"logits/rejected": -1.8569310903549194,
|
|
"logps/chosen": -182.6356201171875,
|
|
"logps/rejected": -176.14752197265625,
|
|
"loss": 0.7585,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3858329653739929,
|
|
"rewards/margins": -0.05486001819372177,
|
|
"rewards/rejected": -0.33097296953201294,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.999630273613799e-05,
|
|
"logits/chosen": -1.8298226594924927,
|
|
"logits/rejected": -1.812253475189209,
|
|
"logps/chosen": -163.0826416015625,
|
|
"logps/rejected": -200.62098693847656,
|
|
"loss": 0.7425,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2680785357952118,
|
|
"rewards/margins": -0.04629645124077797,
|
|
"rewards/rejected": -0.22178205847740173,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.999566086861961e-05,
|
|
"logits/chosen": -1.6931625604629517,
|
|
"logits/rejected": -1.7089687585830688,
|
|
"logps/chosen": -147.44491577148438,
|
|
"logps/rejected": -150.4454345703125,
|
|
"loss": 0.7146,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.224314883351326,
|
|
"rewards/margins": 0.05584639310836792,
|
|
"rewards/rejected": -0.2801613211631775,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.999496765787024e-05,
|
|
"logits/chosen": -1.7311415672302246,
|
|
"logits/rejected": -1.612362027168274,
|
|
"logps/chosen": -195.65594482421875,
|
|
"logps/rejected": -194.56130981445312,
|
|
"loss": 0.7017,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.28545114398002625,
|
|
"rewards/margins": 0.020438771694898605,
|
|
"rewards/rejected": -0.30588990449905396,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.9994223105313774e-05,
|
|
"logits/chosen": -1.9310710430145264,
|
|
"logits/rejected": -1.9658077955245972,
|
|
"logps/chosen": -179.00779724121094,
|
|
"logps/rejected": -181.42135620117188,
|
|
"loss": 0.6605,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.2686367928981781,
|
|
"rewards/margins": 0.10176892578601837,
|
|
"rewards/rejected": -0.37040573358535767,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.9993427212479606e-05,
|
|
"logits/chosen": -1.7969621419906616,
|
|
"logits/rejected": -1.8055509328842163,
|
|
"logps/chosen": -176.26036071777344,
|
|
"logps/rejected": -171.2470703125,
|
|
"loss": 0.6656,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.33693569898605347,
|
|
"rewards/margins": 0.08709227293729782,
|
|
"rewards/rejected": -0.4240279793739319,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.999257998100254e-05,
|
|
"logits/chosen": -1.5152311325073242,
|
|
"logits/rejected": -1.553884506225586,
|
|
"logps/chosen": -179.60372924804688,
|
|
"logps/rejected": -164.34481811523438,
|
|
"loss": 0.7563,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3611801564693451,
|
|
"rewards/margins": -0.08427368104457855,
|
|
"rewards/rejected": -0.27690649032592773,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.999168141262289e-05,
|
|
"logits/chosen": -1.7877562046051025,
|
|
"logits/rejected": -1.7855968475341797,
|
|
"logps/chosen": -165.04220581054688,
|
|
"logps/rejected": -179.1771240234375,
|
|
"loss": 0.7999,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.3364974856376648,
|
|
"rewards/margins": -0.1316596120595932,
|
|
"rewards/rejected": -0.20483790338039398,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.9990731509186376e-05,
|
|
"logits/chosen": -1.4499574899673462,
|
|
"logits/rejected": -1.4440600872039795,
|
|
"logps/chosen": -183.58778381347656,
|
|
"logps/rejected": -198.231689453125,
|
|
"loss": 0.7216,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3034099340438843,
|
|
"rewards/margins": -0.00025239214301109314,
|
|
"rewards/rejected": -0.3031575381755829,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.998973027264419e-05,
|
|
"logits/chosen": -1.6583937406539917,
|
|
"logits/rejected": -1.689673900604248,
|
|
"logps/chosen": -187.60508728027344,
|
|
"logps/rejected": -212.7592315673828,
|
|
"loss": 0.6404,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.3053140342235565,
|
|
"rewards/margins": 0.17152619361877441,
|
|
"rewards/rejected": -0.47684019804000854,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.998867770505295e-05,
|
|
"logits/chosen": -1.6554609537124634,
|
|
"logits/rejected": -1.6350326538085938,
|
|
"logps/chosen": -181.46209716796875,
|
|
"logps/rejected": -173.83157348632812,
|
|
"loss": 0.726,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.24238371849060059,
|
|
"rewards/margins": 0.002255776897072792,
|
|
"rewards/rejected": -0.24463950097560883,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.9987573808574726e-05,
|
|
"logits/chosen": -1.8908902406692505,
|
|
"logits/rejected": -1.858853816986084,
|
|
"logps/chosen": -174.7786865234375,
|
|
"logps/rejected": -177.30545043945312,
|
|
"loss": 0.7269,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.0712527185678482,
|
|
"rewards/margins": -0.02764507755637169,
|
|
"rewards/rejected": 0.09889779984951019,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.9986418585477016e-05,
|
|
"logits/chosen": -1.7081693410873413,
|
|
"logits/rejected": -1.7438371181488037,
|
|
"logps/chosen": -191.28123474121094,
|
|
"logps/rejected": -197.5807647705078,
|
|
"loss": 0.6859,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.2962351143360138,
|
|
"rewards/margins": 0.06765662133693695,
|
|
"rewards/rejected": -0.36389175057411194,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.998521203813274e-05,
|
|
"logits/chosen": -1.805833101272583,
|
|
"logits/rejected": -1.7511969804763794,
|
|
"logps/chosen": -166.08702087402344,
|
|
"logps/rejected": -159.9141082763672,
|
|
"loss": 0.6387,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.16753454506397247,
|
|
"rewards/margins": 0.16043387353420258,
|
|
"rewards/rejected": -0.32796838879585266,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.9983954169020256e-05,
|
|
"logits/chosen": -1.5241700410842896,
|
|
"logits/rejected": -1.6004612445831299,
|
|
"logps/chosen": -189.8046875,
|
|
"logps/rejected": -198.48001098632812,
|
|
"loss": 0.7803,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.35226038098335266,
|
|
"rewards/margins": -0.09872373938560486,
|
|
"rewards/rejected": -0.2535366714000702,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.9982644980723334e-05,
|
|
"logits/chosen": -1.3276153802871704,
|
|
"logits/rejected": -1.372768759727478,
|
|
"logps/chosen": -179.91940307617188,
|
|
"logps/rejected": -180.60751342773438,
|
|
"loss": 0.7245,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.4333783686161041,
|
|
"rewards/margins": -0.014091454446315765,
|
|
"rewards/rejected": -0.41928690671920776,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.998128447593117e-05,
|
|
"logits/chosen": -1.5195338726043701,
|
|
"logits/rejected": -1.4305707216262817,
|
|
"logps/chosen": -179.16712951660156,
|
|
"logps/rejected": -162.93356323242188,
|
|
"loss": 0.7394,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3149658441543579,
|
|
"rewards/margins": -0.04299226403236389,
|
|
"rewards/rejected": -0.2719736099243164,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.997987265743834e-05,
|
|
"logits/chosen": -1.8512688875198364,
|
|
"logits/rejected": -1.791621446609497,
|
|
"logps/chosen": -171.71990966796875,
|
|
"logps/rejected": -177.26954650878906,
|
|
"loss": 0.7217,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2529350221157074,
|
|
"rewards/margins": -0.008312180638313293,
|
|
"rewards/rejected": -0.2446228414773941,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.997840952814484e-05,
|
|
"logits/chosen": -1.8096928596496582,
|
|
"logits/rejected": -1.7790553569793701,
|
|
"logps/chosen": -174.4140167236328,
|
|
"logps/rejected": -176.01708984375,
|
|
"loss": 0.7997,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.24751965701580048,
|
|
"rewards/margins": -0.1494641900062561,
|
|
"rewards/rejected": -0.09805545210838318,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.9976895091056075e-05,
|
|
"logits/chosen": -1.7724186182022095,
|
|
"logits/rejected": -1.777117371559143,
|
|
"logps/chosen": -171.97897338867188,
|
|
"logps/rejected": -197.2012939453125,
|
|
"loss": 0.6544,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3243841528892517,
|
|
"rewards/margins": 0.10846509784460068,
|
|
"rewards/rejected": -0.432849258184433,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.9975329349282826e-05,
|
|
"logits/chosen": -1.6474465131759644,
|
|
"logits/rejected": -1.632018804550171,
|
|
"logps/chosen": -176.29461669921875,
|
|
"logps/rejected": -191.7507781982422,
|
|
"loss": 0.7379,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.5088250637054443,
|
|
"rewards/margins": -0.030252262949943542,
|
|
"rewards/rejected": -0.4785728454589844,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.9973712306041256e-05,
|
|
"logits/chosen": -2.033618927001953,
|
|
"logits/rejected": -2.0342512130737305,
|
|
"logps/chosen": -198.94796752929688,
|
|
"logps/rejected": -192.28482055664062,
|
|
"loss": 0.826,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.34583836793899536,
|
|
"rewards/margins": -0.18120835721492767,
|
|
"rewards/rejected": -0.1646299660205841,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.997204396465292e-05,
|
|
"logits/chosen": -1.5918235778808594,
|
|
"logits/rejected": -1.5706799030303955,
|
|
"logps/chosen": -157.1723175048828,
|
|
"logps/rejected": -182.7075958251953,
|
|
"loss": 0.8134,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.5003759860992432,
|
|
"rewards/margins": -0.07385056465864182,
|
|
"rewards/rejected": -0.42652541399002075,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.997032432854472e-05,
|
|
"logits/chosen": -1.6958372592926025,
|
|
"logits/rejected": -1.7072336673736572,
|
|
"logps/chosen": -182.0128173828125,
|
|
"logps/rejected": -202.56802368164062,
|
|
"loss": 0.52,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.08301500976085663,
|
|
"rewards/margins": 0.4688724875450134,
|
|
"rewards/rejected": -0.5518875122070312,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.996855340124894e-05,
|
|
"logits/chosen": -1.7832672595977783,
|
|
"logits/rejected": -1.767440915107727,
|
|
"logps/chosen": -193.7747344970703,
|
|
"logps/rejected": -197.20289611816406,
|
|
"loss": 0.7392,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.39458268880844116,
|
|
"rewards/margins": -0.038821250200271606,
|
|
"rewards/rejected": -0.35576146841049194,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.996673118640323e-05,
|
|
"logits/chosen": -1.7633535861968994,
|
|
"logits/rejected": -1.7420529127120972,
|
|
"logps/chosen": -154.8060760498047,
|
|
"logps/rejected": -170.86996459960938,
|
|
"loss": 0.6108,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.15521948039531708,
|
|
"rewards/margins": 0.2389230877161026,
|
|
"rewards/rejected": -0.3941425681114197,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.996485768775055e-05,
|
|
"logits/chosen": -1.7529706954956055,
|
|
"logits/rejected": -1.7141683101654053,
|
|
"logps/chosen": -233.09747314453125,
|
|
"logps/rejected": -237.85906982421875,
|
|
"loss": 0.7094,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4181414544582367,
|
|
"rewards/margins": 0.051600366830825806,
|
|
"rewards/rejected": -0.4697418212890625,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.996293290913926e-05,
|
|
"logits/chosen": -1.6701140403747559,
|
|
"logits/rejected": -1.684870719909668,
|
|
"logps/chosen": -192.6127166748047,
|
|
"logps/rejected": -188.32798767089844,
|
|
"loss": 0.5773,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.0851680114865303,
|
|
"rewards/margins": 0.31171905994415283,
|
|
"rewards/rejected": -0.39688706398010254,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.9960956854522986e-05,
|
|
"logits/chosen": -1.7777843475341797,
|
|
"logits/rejected": -1.73331618309021,
|
|
"logps/chosen": -199.59889221191406,
|
|
"logps/rejected": -174.27069091796875,
|
|
"loss": 0.785,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5378831028938293,
|
|
"rewards/margins": -0.13611721992492676,
|
|
"rewards/rejected": -0.4017658531665802,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.995892952796074e-05,
|
|
"logits/chosen": -1.7307249307632446,
|
|
"logits/rejected": -1.7584043741226196,
|
|
"logps/chosen": -184.14364624023438,
|
|
"logps/rejected": -196.90878295898438,
|
|
"loss": 0.6489,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.4751961827278137,
|
|
"rewards/margins": 0.1301158368587494,
|
|
"rewards/rejected": -0.6053119897842407,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.995685093361682e-05,
|
|
"logits/chosen": -1.770018219947815,
|
|
"logits/rejected": -1.832364797592163,
|
|
"logps/chosen": -183.0137939453125,
|
|
"logps/rejected": -204.83804321289062,
|
|
"loss": 0.7497,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4263853132724762,
|
|
"rewards/margins": -0.025808706879615784,
|
|
"rewards/rejected": -0.4005766212940216,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.9954721075760824e-05,
|
|
"logits/chosen": -1.7604196071624756,
|
|
"logits/rejected": -1.7073791027069092,
|
|
"logps/chosen": -191.46923828125,
|
|
"logps/rejected": -201.73545837402344,
|
|
"loss": 0.8181,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5646533370018005,
|
|
"rewards/margins": -0.1740642488002777,
|
|
"rewards/rejected": -0.3905891180038452,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.995253995876767e-05,
|
|
"logits/chosen": -1.5798883438110352,
|
|
"logits/rejected": -1.5167430639266968,
|
|
"logps/chosen": -235.8765106201172,
|
|
"logps/rejected": -222.53204345703125,
|
|
"loss": 0.7115,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.5441933870315552,
|
|
"rewards/margins": 0.09400075674057007,
|
|
"rewards/rejected": -0.6381941437721252,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.995030758711756e-05,
|
|
"logits/chosen": -1.932177186012268,
|
|
"logits/rejected": -1.8748353719711304,
|
|
"logps/chosen": -182.81915283203125,
|
|
"logps/rejected": -170.09844970703125,
|
|
"loss": 0.6448,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.48413196206092834,
|
|
"rewards/margins": 0.19957152009010315,
|
|
"rewards/rejected": -0.6837034821510315,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.994802396539598e-05,
|
|
"logits/chosen": -1.8008129596710205,
|
|
"logits/rejected": -1.7928048372268677,
|
|
"logps/chosen": -202.68096923828125,
|
|
"logps/rejected": -199.8201904296875,
|
|
"loss": 0.8172,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -1.0157395601272583,
|
|
"rewards/margins": -0.10564298927783966,
|
|
"rewards/rejected": -0.9100965857505798,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.994568909829368e-05,
|
|
"logits/chosen": -1.7543656826019287,
|
|
"logits/rejected": -1.7212865352630615,
|
|
"logps/chosen": -201.3524932861328,
|
|
"logps/rejected": -218.5485382080078,
|
|
"loss": 0.9316,
|
|
"rewards/accuracies": 0.1875,
|
|
"rewards/chosen": -0.7368103265762329,
|
|
"rewards/margins": -0.3777569532394409,
|
|
"rewards/rejected": -0.359053373336792,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.9943302990606684e-05,
|
|
"logits/chosen": -1.7704360485076904,
|
|
"logits/rejected": -1.6632460355758667,
|
|
"logps/chosen": -187.3475341796875,
|
|
"logps/rejected": -180.01144409179688,
|
|
"loss": 0.751,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4657554626464844,
|
|
"rewards/margins": 0.006175771355628967,
|
|
"rewards/rejected": -0.47193124890327454,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.994086564723626e-05,
|
|
"logits/chosen": -1.9261763095855713,
|
|
"logits/rejected": -1.9572409391403198,
|
|
"logps/chosen": -171.56101989746094,
|
|
"logps/rejected": -182.58717346191406,
|
|
"loss": 0.7349,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.7470525503158569,
|
|
"rewards/margins": -0.043389588594436646,
|
|
"rewards/rejected": -0.7036629915237427,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.9938377073188905e-05,
|
|
"logits/chosen": -1.9480628967285156,
|
|
"logits/rejected": -2.002164363861084,
|
|
"logps/chosen": -197.41912841796875,
|
|
"logps/rejected": -184.93325805664062,
|
|
"loss": 0.813,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.6927478909492493,
|
|
"rewards/margins": -0.12049313634634018,
|
|
"rewards/rejected": -0.5722547769546509,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.993583727357638e-05,
|
|
"logits/chosen": -1.6262449026107788,
|
|
"logits/rejected": -1.640842080116272,
|
|
"logps/chosen": -205.38461303710938,
|
|
"logps/rejected": -213.60650634765625,
|
|
"loss": 0.7821,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7866306304931641,
|
|
"rewards/margins": -0.11763662099838257,
|
|
"rewards/rejected": -0.6689940094947815,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.993324625361565e-05,
|
|
"logits/chosen": -1.8480533361434937,
|
|
"logits/rejected": -1.8557144403457642,
|
|
"logps/chosen": -158.26290893554688,
|
|
"logps/rejected": -169.06105041503906,
|
|
"loss": 0.7438,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6200124025344849,
|
|
"rewards/margins": -0.012443792074918747,
|
|
"rewards/rejected": -0.607568621635437,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.993060401862888e-05,
|
|
"logits/chosen": -1.8685041666030884,
|
|
"logits/rejected": -1.8648606538772583,
|
|
"logps/chosen": -176.7852020263672,
|
|
"logps/rejected": -183.40328979492188,
|
|
"loss": 0.6935,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.7892077565193176,
|
|
"rewards/margins": 0.08869240432977676,
|
|
"rewards/rejected": -0.877900242805481,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.9927910574043465e-05,
|
|
"logits/chosen": -1.9234154224395752,
|
|
"logits/rejected": -1.904573917388916,
|
|
"logps/chosen": -159.97625732421875,
|
|
"logps/rejected": -152.645263671875,
|
|
"loss": 0.7778,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6230319142341614,
|
|
"rewards/margins": -0.06979034841060638,
|
|
"rewards/rejected": -0.5532415509223938,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.992516592539196e-05,
|
|
"logits/chosen": -1.6896902322769165,
|
|
"logits/rejected": -1.7036737203598022,
|
|
"logps/chosen": -148.6313018798828,
|
|
"logps/rejected": -164.7644500732422,
|
|
"loss": 0.5546,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.29331690073013306,
|
|
"rewards/margins": 0.44352981448173523,
|
|
"rewards/rejected": -0.7368468046188354,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.9922370078312105e-05,
|
|
"logits/chosen": -2.013890266418457,
|
|
"logits/rejected": -1.9424934387207031,
|
|
"logps/chosen": -215.90118408203125,
|
|
"logps/rejected": -209.59071350097656,
|
|
"loss": 0.4938,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.2655085027217865,
|
|
"rewards/margins": 0.5123203992843628,
|
|
"rewards/rejected": -0.7778289318084717,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.991952303854682e-05,
|
|
"logits/chosen": -1.8328962326049805,
|
|
"logits/rejected": -1.8138638734817505,
|
|
"logps/chosen": -170.13475036621094,
|
|
"logps/rejected": -176.11810302734375,
|
|
"loss": 0.6684,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.3248033821582794,
|
|
"rewards/margins": 0.12224595248699188,
|
|
"rewards/rejected": -0.4470493197441101,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9916624811944175e-05,
|
|
"logits/chosen": -1.9051162004470825,
|
|
"logits/rejected": -1.9407715797424316,
|
|
"logps/chosen": -177.2139434814453,
|
|
"logps/rejected": -185.92947387695312,
|
|
"loss": 0.6297,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.6396099328994751,
|
|
"rewards/margins": 0.1931779384613037,
|
|
"rewards/rejected": -0.832787811756134,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.991367540445735e-05,
|
|
"logits/chosen": -1.7430989742279053,
|
|
"logits/rejected": -1.7986749410629272,
|
|
"logps/chosen": -199.38021850585938,
|
|
"logps/rejected": -195.27647399902344,
|
|
"loss": 0.6881,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.8835601210594177,
|
|
"rewards/margins": 0.10807879269123077,
|
|
"rewards/rejected": -0.9916388392448425,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.991067482214471e-05,
|
|
"logits/chosen": -1.868577241897583,
|
|
"logits/rejected": -1.799201488494873,
|
|
"logps/chosen": -177.93130493164062,
|
|
"logps/rejected": -164.038330078125,
|
|
"loss": 0.7179,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.7006940245628357,
|
|
"rewards/margins": 0.020756253972649574,
|
|
"rewards/rejected": -0.7214502096176147,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9907623071169686e-05,
|
|
"logits/chosen": -1.8050721883773804,
|
|
"logits/rejected": -1.7359880208969116,
|
|
"logps/chosen": -197.66583251953125,
|
|
"logps/rejected": -172.34146118164062,
|
|
"loss": 0.6379,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.7713190317153931,
|
|
"rewards/margins": 0.19513630867004395,
|
|
"rewards/rejected": -0.966455340385437,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.990452015780085e-05,
|
|
"logits/chosen": -1.74982750415802,
|
|
"logits/rejected": -1.719763159751892,
|
|
"logps/chosen": -204.21517944335938,
|
|
"logps/rejected": -196.62576293945312,
|
|
"loss": 0.7434,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.35949403047561646,
|
|
"rewards/margins": 0.03506145626306534,
|
|
"rewards/rejected": -0.3945554494857788,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9901366088411846e-05,
|
|
"logits/chosen": -1.6477172374725342,
|
|
"logits/rejected": -1.622018814086914,
|
|
"logps/chosen": -207.30174255371094,
|
|
"logps/rejected": -229.1028289794922,
|
|
"loss": 0.9207,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -1.1269769668579102,
|
|
"rewards/margins": -0.2236841320991516,
|
|
"rewards/rejected": -0.9032928943634033,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.98981608694814e-05,
|
|
"logits/chosen": -1.8223265409469604,
|
|
"logits/rejected": -1.8162957429885864,
|
|
"logps/chosen": -171.70675659179688,
|
|
"logps/rejected": -171.83108520507812,
|
|
"loss": 0.9174,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.8722136616706848,
|
|
"rewards/margins": -0.21637174487113953,
|
|
"rewards/rejected": -0.6558419466018677,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.9894904507593316e-05,
|
|
"logits/chosen": -1.9424258470535278,
|
|
"logits/rejected": -1.828155279159546,
|
|
"logps/chosen": -196.1715545654297,
|
|
"logps/rejected": -192.08624267578125,
|
|
"loss": 0.7506,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.5839821696281433,
|
|
"rewards/margins": 0.0038854647427797318,
|
|
"rewards/rejected": -0.5878676772117615,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.989159700943643e-05,
|
|
"logits/chosen": -1.623518705368042,
|
|
"logits/rejected": -1.679386854171753,
|
|
"logps/chosen": -175.6849365234375,
|
|
"logps/rejected": -179.52059936523438,
|
|
"loss": 0.9491,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7352603673934937,
|
|
"rewards/margins": -0.2515270411968231,
|
|
"rewards/rejected": -0.4837333559989929,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.988823838180464e-05,
|
|
"logits/chosen": -1.997894048690796,
|
|
"logits/rejected": -1.9666211605072021,
|
|
"logps/chosen": -167.2881622314453,
|
|
"logps/rejected": -183.72474670410156,
|
|
"loss": 0.6663,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.5555349588394165,
|
|
"rewards/margins": 0.17635540664196014,
|
|
"rewards/rejected": -0.7318904399871826,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.988482863159684e-05,
|
|
"logits/chosen": -1.8912848234176636,
|
|
"logits/rejected": -1.968542218208313,
|
|
"logps/chosen": -174.55911254882812,
|
|
"logps/rejected": -169.36610412597656,
|
|
"loss": 0.7905,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.227503702044487,
|
|
"rewards/margins": -0.08394889533519745,
|
|
"rewards/rejected": -0.14355483651161194,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.988136776581696e-05,
|
|
"logits/chosen": -2.151402711868286,
|
|
"logits/rejected": -2.186088800430298,
|
|
"logps/chosen": -157.3328094482422,
|
|
"logps/rejected": -150.9196319580078,
|
|
"loss": 0.6989,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.29456794261932373,
|
|
"rewards/margins": 0.04511295258998871,
|
|
"rewards/rejected": -0.33968091011047363,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.9877855791573915e-05,
|
|
"logits/chosen": -1.8164244890213013,
|
|
"logits/rejected": -1.8539032936096191,
|
|
"logps/chosen": -188.2481689453125,
|
|
"logps/rejected": -171.02090454101562,
|
|
"loss": 0.9341,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.5969239473342896,
|
|
"rewards/margins": -0.33455953001976013,
|
|
"rewards/rejected": -0.2623644471168518,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.9874292716081595e-05,
|
|
"logits/chosen": -1.7419726848602295,
|
|
"logits/rejected": -1.7444337606430054,
|
|
"logps/chosen": -169.46658325195312,
|
|
"logps/rejected": -173.33348083496094,
|
|
"loss": 0.6817,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.4356076121330261,
|
|
"rewards/margins": 0.14688673615455627,
|
|
"rewards/rejected": -0.5824943780899048,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.9870678546658865e-05,
|
|
"logits/chosen": -1.6884466409683228,
|
|
"logits/rejected": -1.7295485734939575,
|
|
"logps/chosen": -160.8187713623047,
|
|
"logps/rejected": -176.16746520996094,
|
|
"loss": 0.9101,
|
|
"rewards/accuracies": 0.125,
|
|
"rewards/chosen": -0.6096308827400208,
|
|
"rewards/margins": -0.28187263011932373,
|
|
"rewards/rejected": -0.3277583122253418,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.9867013290729535e-05,
|
|
"logits/chosen": -1.932777762413025,
|
|
"logits/rejected": -1.8993282318115234,
|
|
"logps/chosen": -198.2738037109375,
|
|
"logps/rejected": -197.27252197265625,
|
|
"loss": 0.7183,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.19178181886672974,
|
|
"rewards/margins": 0.006053738296031952,
|
|
"rewards/rejected": -0.1978355497121811,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.986329695582237e-05,
|
|
"logits/chosen": -2.03489351272583,
|
|
"logits/rejected": -2.076827049255371,
|
|
"logps/chosen": -179.49679565429688,
|
|
"logps/rejected": -177.2965545654297,
|
|
"loss": 0.7798,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6112560629844666,
|
|
"rewards/margins": -0.09085651487112045,
|
|
"rewards/rejected": -0.5203995704650879,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.985952954957103e-05,
|
|
"logits/chosen": -1.8877630233764648,
|
|
"logits/rejected": -1.8290894031524658,
|
|
"logps/chosen": -233.42221069335938,
|
|
"logps/rejected": -228.90179443359375,
|
|
"loss": 0.7997,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.4796496629714966,
|
|
"rewards/margins": 0.0218522846698761,
|
|
"rewards/rejected": -0.5015019774436951,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.985571107971408e-05,
|
|
"logits/chosen": -1.8358758687973022,
|
|
"logits/rejected": -1.8217942714691162,
|
|
"logps/chosen": -173.35556030273438,
|
|
"logps/rejected": -176.17031860351562,
|
|
"loss": 0.6678,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.03331407904624939,
|
|
"rewards/margins": 0.18146347999572754,
|
|
"rewards/rejected": -0.14814940094947815,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.9851841554095e-05,
|
|
"logits/chosen": -1.9195314645767212,
|
|
"logits/rejected": -1.9220951795578003,
|
|
"logps/chosen": -233.7957305908203,
|
|
"logps/rejected": -216.81581115722656,
|
|
"loss": 0.5707,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.0034320950508117676,
|
|
"rewards/margins": 0.3490698039531708,
|
|
"rewards/rejected": -0.35250189900398254,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.9847920980662134e-05,
|
|
"logits/chosen": -1.598222255706787,
|
|
"logits/rejected": -1.591965913772583,
|
|
"logps/chosen": -226.130615234375,
|
|
"logps/rejected": -236.63760375976562,
|
|
"loss": 0.6588,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.06091824173927307,
|
|
"rewards/margins": 0.13529105484485626,
|
|
"rewards/rejected": -0.07437281310558319,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.984394936746865e-05,
|
|
"logits/chosen": -1.7949796915054321,
|
|
"logits/rejected": -1.805631399154663,
|
|
"logps/chosen": -232.09498596191406,
|
|
"logps/rejected": -235.5016632080078,
|
|
"loss": 0.709,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.5284535884857178,
|
|
"rewards/margins": 0.1780376434326172,
|
|
"rewards/rejected": -0.706491231918335,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.98399267226726e-05,
|
|
"logits/chosen": -2.0285134315490723,
|
|
"logits/rejected": -2.060884714126587,
|
|
"logps/chosen": -183.62449645996094,
|
|
"logps/rejected": -172.28872680664062,
|
|
"loss": 0.7795,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.42201852798461914,
|
|
"rewards/margins": -0.029273340478539467,
|
|
"rewards/rejected": -0.3927451968193054,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.9835853054536846e-05,
|
|
"logits/chosen": -1.7763793468475342,
|
|
"logits/rejected": -1.8531831502914429,
|
|
"logps/chosen": -182.07516479492188,
|
|
"logps/rejected": -205.60504150390625,
|
|
"loss": 0.9431,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.2773244380950928,
|
|
"rewards/margins": -0.36518311500549316,
|
|
"rewards/rejected": 0.08785867691040039,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.9831728371429046e-05,
|
|
"logits/chosen": -1.8723288774490356,
|
|
"logits/rejected": -1.9752717018127441,
|
|
"logps/chosen": -200.94346618652344,
|
|
"logps/rejected": -210.53933715820312,
|
|
"loss": 0.7466,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.15453863143920898,
|
|
"rewards/margins": -0.037006717175245285,
|
|
"rewards/rejected": -0.117531917989254,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.982755268182164e-05,
|
|
"logits/chosen": -1.7186784744262695,
|
|
"logits/rejected": -1.752870798110962,
|
|
"logps/chosen": -168.73394775390625,
|
|
"logps/rejected": -177.32054138183594,
|
|
"loss": 0.7175,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.1720157265663147,
|
|
"rewards/margins": -0.019572071731090546,
|
|
"rewards/rejected": -0.15244367718696594,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.982332599429187e-05,
|
|
"logits/chosen": -1.9437085390090942,
|
|
"logits/rejected": -1.9051276445388794,
|
|
"logps/chosen": -168.2744903564453,
|
|
"logps/rejected": -158.18289184570312,
|
|
"loss": 0.8465,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.40080204606056213,
|
|
"rewards/margins": -0.24280087649822235,
|
|
"rewards/rejected": -0.15800118446350098,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.981904831752171e-05,
|
|
"logits/chosen": -1.9985157251358032,
|
|
"logits/rejected": -1.9973095655441284,
|
|
"logps/chosen": -182.0140380859375,
|
|
"logps/rejected": -169.61883544921875,
|
|
"loss": 0.6244,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.13748487830162048,
|
|
"rewards/margins": 0.31921717524528503,
|
|
"rewards/rejected": -0.4567020535469055,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.981471966029787e-05,
|
|
"logits/chosen": -1.9101628065109253,
|
|
"logits/rejected": -1.9257937669754028,
|
|
"logps/chosen": -192.24288940429688,
|
|
"logps/rejected": -188.32144165039062,
|
|
"loss": 0.6148,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.2866635024547577,
|
|
"rewards/margins": 0.2923651337623596,
|
|
"rewards/rejected": -0.00570157915353775,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.981034003151178e-05,
|
|
"logits/chosen": -1.8881627321243286,
|
|
"logits/rejected": -1.9550986289978027,
|
|
"logps/chosen": -195.71372985839844,
|
|
"logps/rejected": -210.1089324951172,
|
|
"loss": 0.8689,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2077367603778839,
|
|
"rewards/margins": -0.24497191607952118,
|
|
"rewards/rejected": 0.03723515570163727,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.980590944015958e-05,
|
|
"logits/chosen": -1.7422230243682861,
|
|
"logits/rejected": -1.7998031377792358,
|
|
"logps/chosen": -216.39398193359375,
|
|
"logps/rejected": -222.33880615234375,
|
|
"loss": 0.7159,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.11723195761442184,
|
|
"rewards/margins": 0.26700979471206665,
|
|
"rewards/rejected": -0.14977779984474182,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.98014278953421e-05,
|
|
"logits/chosen": -1.8071612119674683,
|
|
"logits/rejected": -1.8180828094482422,
|
|
"logps/chosen": -176.4374542236328,
|
|
"logps/rejected": -189.68438720703125,
|
|
"loss": 0.8578,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": 0.02567705512046814,
|
|
"rewards/margins": -0.21896688640117645,
|
|
"rewards/rejected": 0.2446439117193222,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.979689540626479e-05,
|
|
"logits/chosen": -1.7617331743240356,
|
|
"logits/rejected": -1.8094758987426758,
|
|
"logps/chosen": -158.13441467285156,
|
|
"logps/rejected": -177.64797973632812,
|
|
"loss": 0.5662,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.3027116656303406,
|
|
"rewards/margins": 0.35941970348358154,
|
|
"rewards/rejected": -0.05670810118317604,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.9792311982237774e-05,
|
|
"logits/chosen": -1.528577208518982,
|
|
"logits/rejected": -1.5104334354400635,
|
|
"logps/chosen": -164.3386993408203,
|
|
"logps/rejected": -167.84840393066406,
|
|
"loss": 0.7506,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.04946248233318329,
|
|
"rewards/margins": -0.02245260775089264,
|
|
"rewards/rejected": -0.027009889483451843,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.9787677632675825e-05,
|
|
"logits/chosen": -1.86935555934906,
|
|
"logits/rejected": -1.8780128955841064,
|
|
"logps/chosen": -167.26119995117188,
|
|
"logps/rejected": -187.39584350585938,
|
|
"loss": 0.6634,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.09376392513513565,
|
|
"rewards/margins": 0.16032886505126953,
|
|
"rewards/rejected": -0.06656493991613388,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.978299236709826e-05,
|
|
"logits/chosen": -1.79931640625,
|
|
"logits/rejected": -1.7840967178344727,
|
|
"logps/chosen": -173.83987426757812,
|
|
"logps/rejected": -174.4556427001953,
|
|
"loss": 0.8509,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.17128746211528778,
|
|
"rewards/margins": -0.21691852807998657,
|
|
"rewards/rejected": 0.04563106596469879,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.977825619512904e-05,
|
|
"logits/chosen": -1.9714162349700928,
|
|
"logits/rejected": -1.9255212545394897,
|
|
"logps/chosen": -216.35391235351562,
|
|
"logps/rejected": -213.25570678710938,
|
|
"loss": 0.787,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.07267741113901138,
|
|
"rewards/margins": -0.07426212728023529,
|
|
"rewards/rejected": 0.0015847217291593552,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.977346912649666e-05,
|
|
"logits/chosen": -1.8389030694961548,
|
|
"logits/rejected": -1.872612476348877,
|
|
"logps/chosen": -199.1173095703125,
|
|
"logps/rejected": -170.51205444335938,
|
|
"loss": 0.6682,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.054506294429302216,
|
|
"rewards/margins": 0.1410951465368271,
|
|
"rewards/rejected": -0.1956014782190323,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.9768631171034175e-05,
|
|
"logits/chosen": -1.6105570793151855,
|
|
"logits/rejected": -1.655145525932312,
|
|
"logps/chosen": -183.67723083496094,
|
|
"logps/rejected": -176.943115234375,
|
|
"loss": 0.8043,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.09822743386030197,
|
|
"rewards/margins": -0.16278451681137085,
|
|
"rewards/rejected": 0.06455708295106888,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.9763742338679145e-05,
|
|
"logits/chosen": -1.589104413986206,
|
|
"logits/rejected": -1.544286847114563,
|
|
"logps/chosen": -188.53167724609375,
|
|
"logps/rejected": -190.18521118164062,
|
|
"loss": 0.8959,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.08583441376686096,
|
|
"rewards/margins": -0.30193185806274414,
|
|
"rewards/rejected": 0.21609747409820557,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.975880263947367e-05,
|
|
"logits/chosen": -1.5240559577941895,
|
|
"logits/rejected": -1.5749508142471313,
|
|
"logps/chosen": -173.70506286621094,
|
|
"logps/rejected": -174.54681396484375,
|
|
"loss": 0.708,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.030573375523090363,
|
|
"rewards/margins": 0.030332941561937332,
|
|
"rewards/rejected": 0.00024041905999183655,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.9753812083564304e-05,
|
|
"logits/chosen": -1.879758596420288,
|
|
"logits/rejected": -1.8182477951049805,
|
|
"logps/chosen": -186.97525024414062,
|
|
"logps/rejected": -159.67259216308594,
|
|
"loss": 0.7133,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.18908704817295074,
|
|
"rewards/margins": 0.015400439500808716,
|
|
"rewards/rejected": 0.17368660867214203,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.974877068120208e-05,
|
|
"logits/chosen": -1.8003509044647217,
|
|
"logits/rejected": -1.8137016296386719,
|
|
"logps/chosen": -180.23757934570312,
|
|
"logps/rejected": -191.359375,
|
|
"loss": 0.7853,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.0054572634398937225,
|
|
"rewards/margins": -0.046278372406959534,
|
|
"rewards/rejected": 0.05173564702272415,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.974367844274248e-05,
|
|
"logits/chosen": -1.7015259265899658,
|
|
"logits/rejected": -1.6682251691818237,
|
|
"logps/chosen": -155.98707580566406,
|
|
"logps/rejected": -146.8739776611328,
|
|
"loss": 0.7147,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.09909596294164658,
|
|
"rewards/margins": 0.09801648557186127,
|
|
"rewards/rejected": 0.001079469919204712,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.973853537864538e-05,
|
|
"logits/chosen": -1.9078797101974487,
|
|
"logits/rejected": -1.8878577947616577,
|
|
"logps/chosen": -213.63034057617188,
|
|
"logps/rejected": -196.01133728027344,
|
|
"loss": 0.7176,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.07858332991600037,
|
|
"rewards/margins": 0.04805755987763405,
|
|
"rewards/rejected": -0.12664087116718292,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.973334149947508e-05,
|
|
"logits/chosen": -1.8100643157958984,
|
|
"logits/rejected": -1.8916385173797607,
|
|
"logps/chosen": -154.20033264160156,
|
|
"logps/rejected": -180.57833862304688,
|
|
"loss": 0.9816,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.11480588465929031,
|
|
"rewards/margins": -0.3577505350112915,
|
|
"rewards/rejected": 0.24294468760490417,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.972809681590026e-05,
|
|
"logits/chosen": -1.6414841413497925,
|
|
"logits/rejected": -1.6490445137023926,
|
|
"logps/chosen": -186.58531188964844,
|
|
"logps/rejected": -188.83343505859375,
|
|
"loss": 0.7131,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.0989757627248764,
|
|
"rewards/margins": 0.02306460589170456,
|
|
"rewards/rejected": 0.07591113448143005,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.972280133869396e-05,
|
|
"logits/chosen": -1.7980338335037231,
|
|
"logits/rejected": -1.8528972864151,
|
|
"logps/chosen": -194.47787475585938,
|
|
"logps/rejected": -186.64306640625,
|
|
"loss": 0.7408,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.09773577749729156,
|
|
"rewards/margins": 0.03546319156885147,
|
|
"rewards/rejected": 0.06227259710431099,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.971745507873352e-05,
|
|
"logits/chosen": -1.8588396310806274,
|
|
"logits/rejected": -1.7567483186721802,
|
|
"logps/chosen": -181.69345092773438,
|
|
"logps/rejected": -187.5961151123047,
|
|
"loss": 0.8451,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.1902497261762619,
|
|
"rewards/margins": -0.21404078602790833,
|
|
"rewards/rejected": 0.023791024461388588,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.971205804700063e-05,
|
|
"logits/chosen": -1.8144092559814453,
|
|
"logits/rejected": -1.8621768951416016,
|
|
"logps/chosen": -143.86961364746094,
|
|
"logps/rejected": -168.99295043945312,
|
|
"loss": 0.8074,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.11274349689483643,
|
|
"rewards/margins": -0.13580113649368286,
|
|
"rewards/rejected": 0.023057660087943077,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.970661025458125e-05,
|
|
"logits/chosen": -1.573486089706421,
|
|
"logits/rejected": -1.5931901931762695,
|
|
"logps/chosen": -157.45480346679688,
|
|
"logps/rejected": -169.86550903320312,
|
|
"loss": 0.7474,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": 0.007056853733956814,
|
|
"rewards/margins": -0.06664810329675674,
|
|
"rewards/rejected": 0.07370495796203613,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.9701111712665625e-05,
|
|
"logits/chosen": -2.0106117725372314,
|
|
"logits/rejected": -1.9366306066513062,
|
|
"logps/chosen": -190.40353393554688,
|
|
"logps/rejected": -176.56820678710938,
|
|
"loss": 0.6494,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": 0.12490460276603699,
|
|
"rewards/margins": 0.2138100564479828,
|
|
"rewards/rejected": -0.08890549838542938,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.969556243254822e-05,
|
|
"logits/chosen": -1.7928516864776611,
|
|
"logits/rejected": -1.8632910251617432,
|
|
"logps/chosen": -232.3812255859375,
|
|
"logps/rejected": -237.18328857421875,
|
|
"loss": 0.725,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.20639315247535706,
|
|
"rewards/margins": 0.010047540068626404,
|
|
"rewards/rejected": 0.19634561240673065,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.968996242562774e-05,
|
|
"logits/chosen": -1.8890585899353027,
|
|
"logits/rejected": -1.8688653707504272,
|
|
"logps/chosen": -191.57362365722656,
|
|
"logps/rejected": -199.00550842285156,
|
|
"loss": 0.7262,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2404320389032364,
|
|
"rewards/margins": -0.03374467045068741,
|
|
"rewards/rejected": -0.2066873461008072,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.968431170340706e-05,
|
|
"logits/chosen": -1.6715140342712402,
|
|
"logits/rejected": -1.619262456893921,
|
|
"logps/chosen": -178.09326171875,
|
|
"logps/rejected": -181.5880126953125,
|
|
"loss": 0.7146,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.08340275287628174,
|
|
"rewards/margins": 0.0807909369468689,
|
|
"rewards/rejected": 0.0026118261739611626,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.9678610277493275e-05,
|
|
"logits/chosen": -1.6335276365280151,
|
|
"logits/rejected": -1.6984977722167969,
|
|
"logps/chosen": -181.51470947265625,
|
|
"logps/rejected": -188.1268768310547,
|
|
"loss": 0.6818,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.13411936163902283,
|
|
"rewards/margins": 0.05135034769773483,
|
|
"rewards/rejected": 0.0827689841389656,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.967285815959759e-05,
|
|
"logits/chosen": -1.5702835321426392,
|
|
"logits/rejected": -1.6215555667877197,
|
|
"logps/chosen": -177.0418701171875,
|
|
"logps/rejected": -186.50494384765625,
|
|
"loss": 0.6603,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.04579095542430878,
|
|
"rewards/margins": 0.188707172870636,
|
|
"rewards/rejected": -0.23449814319610596,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.9667055361535354e-05,
|
|
"logits/chosen": -1.7180269956588745,
|
|
"logits/rejected": -1.7135778665542603,
|
|
"logps/chosen": -195.20785522460938,
|
|
"logps/rejected": -210.96878051757812,
|
|
"loss": 0.9002,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.03346429392695427,
|
|
"rewards/margins": -0.26186707615852356,
|
|
"rewards/rejected": 0.29533132910728455,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.9661201895226e-05,
|
|
"logits/chosen": -1.7542705535888672,
|
|
"logits/rejected": -1.7284282445907593,
|
|
"logps/chosen": -173.01751708984375,
|
|
"logps/rejected": -157.28419494628906,
|
|
"loss": 0.6185,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.02107839845120907,
|
|
"rewards/margins": 0.2878818213939667,
|
|
"rewards/rejected": -0.26680341362953186,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.965529777269306e-05,
|
|
"logits/chosen": -1.736549973487854,
|
|
"logits/rejected": -1.771423578262329,
|
|
"logps/chosen": -158.92160034179688,
|
|
"logps/rejected": -163.87283325195312,
|
|
"loss": 0.7139,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.006686069071292877,
|
|
"rewards/margins": 0.04404951259493828,
|
|
"rewards/rejected": -0.0373634397983551,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.964934300606411e-05,
|
|
"logits/chosen": -1.511568307876587,
|
|
"logits/rejected": -1.5145093202590942,
|
|
"logps/chosen": -170.07809448242188,
|
|
"logps/rejected": -186.4696807861328,
|
|
"loss": 0.607,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.2800918221473694,
|
|
"rewards/margins": 0.30444180965423584,
|
|
"rewards/rejected": -0.02434997633099556,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.964333760757074e-05,
|
|
"logits/chosen": -1.436962366104126,
|
|
"logits/rejected": -1.4119391441345215,
|
|
"logps/chosen": -309.4395446777344,
|
|
"logps/rejected": -291.1634521484375,
|
|
"loss": 0.6898,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": 0.0010376125574111938,
|
|
"rewards/margins": 0.03738358989357948,
|
|
"rewards/rejected": -0.036345988512039185,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.963728158954856e-05,
|
|
"logits/chosen": -1.891182541847229,
|
|
"logits/rejected": -1.8770077228546143,
|
|
"logps/chosen": -162.81988525390625,
|
|
"logps/rejected": -169.5299072265625,
|
|
"loss": 0.8258,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": 0.07328500598669052,
|
|
"rewards/margins": -0.13723579049110413,
|
|
"rewards/rejected": 0.21052080392837524,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.963117496443715e-05,
|
|
"logits/chosen": -1.8470525741577148,
|
|
"logits/rejected": -1.8625476360321045,
|
|
"logps/chosen": -166.7591552734375,
|
|
"logps/rejected": -194.91290283203125,
|
|
"loss": 0.9473,
|
|
"rewards/accuracies": 0.1875,
|
|
"rewards/chosen": -0.4374409317970276,
|
|
"rewards/margins": -0.35452863574028015,
|
|
"rewards/rejected": -0.08291231095790863,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.9625017744780045e-05,
|
|
"logits/chosen": -1.5161206722259521,
|
|
"logits/rejected": -1.4952480792999268,
|
|
"logps/chosen": -173.3487548828125,
|
|
"logps/rejected": -167.93292236328125,
|
|
"loss": 0.8109,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.0004083700478076935,
|
|
"rewards/margins": -0.171901136636734,
|
|
"rewards/rejected": 0.1714927703142166,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.96188099432247e-05,
|
|
"logits/chosen": -1.617663025856018,
|
|
"logits/rejected": -1.6117898225784302,
|
|
"logps/chosen": -194.1741180419922,
|
|
"logps/rejected": -188.07025146484375,
|
|
"loss": 0.7477,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.21349965035915375,
|
|
"rewards/margins": 0.016070939600467682,
|
|
"rewards/rejected": -0.22957059741020203,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.9612551572522464e-05,
|
|
"logits/chosen": -1.899290680885315,
|
|
"logits/rejected": -1.8964145183563232,
|
|
"logps/chosen": -192.67469787597656,
|
|
"logps/rejected": -179.21112060546875,
|
|
"loss": 1.0079,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.24318143725395203,
|
|
"rewards/margins": -0.3611811697483063,
|
|
"rewards/rejected": 0.11799970269203186,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.960624264552858e-05,
|
|
"logits/chosen": -1.6361061334609985,
|
|
"logits/rejected": -1.6821738481521606,
|
|
"logps/chosen": -194.6638641357422,
|
|
"logps/rejected": -179.90225219726562,
|
|
"loss": 0.6424,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.19569191336631775,
|
|
"rewards/margins": 0.2166380137205124,
|
|
"rewards/rejected": -0.020946092903614044,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.9599883175202124e-05,
|
|
"logits/chosen": -1.522665023803711,
|
|
"logits/rejected": -1.4538843631744385,
|
|
"logps/chosen": -222.08251953125,
|
|
"logps/rejected": -200.12472534179688,
|
|
"loss": 0.8261,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.26220959424972534,
|
|
"rewards/margins": -0.16530472040176392,
|
|
"rewards/rejected": -0.09690490365028381,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.9593473174605974e-05,
|
|
"logits/chosen": -1.4936704635620117,
|
|
"logits/rejected": -1.5721888542175293,
|
|
"logps/chosen": -182.029541015625,
|
|
"logps/rejected": -204.53567504882812,
|
|
"loss": 0.6881,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.3360684812068939,
|
|
"rewards/margins": 0.14660386741161346,
|
|
"rewards/rejected": -0.4826723635196686,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.958701265690685e-05,
|
|
"logits/chosen": -1.6544736623764038,
|
|
"logits/rejected": -1.672118902206421,
|
|
"logps/chosen": -174.0331268310547,
|
|
"logps/rejected": -203.7425079345703,
|
|
"loss": 0.7058,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.34017136693000793,
|
|
"rewards/margins": 0.01865684613585472,
|
|
"rewards/rejected": -0.3588281571865082,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.958050163537519e-05,
|
|
"logits/chosen": -1.8430697917938232,
|
|
"logits/rejected": -1.7734485864639282,
|
|
"logps/chosen": -208.96421813964844,
|
|
"logps/rejected": -212.52711486816406,
|
|
"loss": 0.8319,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.6053764820098877,
|
|
"rewards/margins": -0.1353476196527481,
|
|
"rewards/rejected": -0.4700288772583008,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.957394012338519e-05,
|
|
"logits/chosen": -1.9725829362869263,
|
|
"logits/rejected": -1.9301397800445557,
|
|
"logps/chosen": -229.4776153564453,
|
|
"logps/rejected": -215.60470581054688,
|
|
"loss": 0.7281,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.046676263213157654,
|
|
"rewards/margins": 0.04501792788505554,
|
|
"rewards/rejected": 0.0016583409160375595,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.956732813441477e-05,
|
|
"logits/chosen": -1.733205795288086,
|
|
"logits/rejected": -1.617655873298645,
|
|
"logps/chosen": -174.42959594726562,
|
|
"logps/rejected": -154.43499755859375,
|
|
"loss": 0.9236,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.30461782217025757,
|
|
"rewards/margins": -0.30838024616241455,
|
|
"rewards/rejected": 0.0037624058313667774,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.956066568204552e-05,
|
|
"logits/chosen": -1.6661994457244873,
|
|
"logits/rejected": -1.7003294229507446,
|
|
"logps/chosen": -179.96853637695312,
|
|
"logps/rejected": -187.9959716796875,
|
|
"loss": 0.8541,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.47250720858573914,
|
|
"rewards/margins": -0.12853975594043732,
|
|
"rewards/rejected": -0.3439674377441406,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.955395277996268e-05,
|
|
"logits/chosen": -1.786563754081726,
|
|
"logits/rejected": -1.7648732662200928,
|
|
"logps/chosen": -193.4469757080078,
|
|
"logps/rejected": -202.775146484375,
|
|
"loss": 0.5666,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.182941734790802,
|
|
"rewards/margins": 0.3387344479560852,
|
|
"rewards/rejected": -0.5216761231422424,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.954718944195512e-05,
|
|
"logits/chosen": -1.7109639644622803,
|
|
"logits/rejected": -1.6762810945510864,
|
|
"logps/chosen": -154.25538635253906,
|
|
"logps/rejected": -158.89683532714844,
|
|
"loss": 0.7073,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.25806286931037903,
|
|
"rewards/margins": 0.007834136486053467,
|
|
"rewards/rejected": -0.2658970057964325,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.954037568191534e-05,
|
|
"logits/chosen": -1.7765631675720215,
|
|
"logits/rejected": -1.8208155632019043,
|
|
"logps/chosen": -189.78883361816406,
|
|
"logps/rejected": -214.85067749023438,
|
|
"loss": 0.7049,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.3182518482208252,
|
|
"rewards/margins": 0.08276516944169998,
|
|
"rewards/rejected": -0.4010169804096222,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.9533511513839384e-05,
|
|
"logits/chosen": -1.6697825193405151,
|
|
"logits/rejected": -1.688278079032898,
|
|
"logps/chosen": -174.92227172851562,
|
|
"logps/rejected": -173.88099670410156,
|
|
"loss": 0.579,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.08302205801010132,
|
|
"rewards/margins": 0.34601137042045593,
|
|
"rewards/rejected": -0.42903345823287964,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.9526596951826824e-05,
|
|
"logits/chosen": -1.9389506578445435,
|
|
"logits/rejected": -1.8745498657226562,
|
|
"logps/chosen": -193.29092407226562,
|
|
"logps/rejected": -187.18719482421875,
|
|
"loss": 0.8118,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6870115995407104,
|
|
"rewards/margins": -0.025065027177333832,
|
|
"rewards/rejected": -0.6619465351104736,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.951963201008076e-05,
|
|
"logits/chosen": -1.9140545129776,
|
|
"logits/rejected": -1.8272314071655273,
|
|
"logps/chosen": -154.29287719726562,
|
|
"logps/rejected": -149.56021118164062,
|
|
"loss": 0.8096,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.10280251502990723,
|
|
"rewards/margins": -0.07113885879516602,
|
|
"rewards/rejected": -0.03166365623474121,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.951261670290781e-05,
|
|
"logits/chosen": -2.0082364082336426,
|
|
"logits/rejected": -2.00789737701416,
|
|
"logps/chosen": -186.12057495117188,
|
|
"logps/rejected": -193.4689483642578,
|
|
"loss": 0.7454,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.14050771296024323,
|
|
"rewards/margins": 0.02027921937406063,
|
|
"rewards/rejected": -0.16078691184520721,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.950555104471799e-05,
|
|
"logits/chosen": -1.851813554763794,
|
|
"logits/rejected": -1.8402018547058105,
|
|
"logps/chosen": -157.9239959716797,
|
|
"logps/rejected": -147.2781524658203,
|
|
"loss": 0.7481,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.21430522203445435,
|
|
"rewards/margins": -0.030568838119506836,
|
|
"rewards/rejected": -0.1837363839149475,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.949843505002477e-05,
|
|
"logits/chosen": -1.9467929601669312,
|
|
"logits/rejected": -1.976270079612732,
|
|
"logps/chosen": -170.30682373046875,
|
|
"logps/rejected": -167.61927795410156,
|
|
"loss": 0.7485,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2596588134765625,
|
|
"rewards/margins": -0.021375911310315132,
|
|
"rewards/rejected": -0.23828287422657013,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.9491268733445034e-05,
|
|
"logits/chosen": -1.724785327911377,
|
|
"logits/rejected": -1.7340233325958252,
|
|
"logps/chosen": -204.80548095703125,
|
|
"logps/rejected": -209.44329833984375,
|
|
"loss": 0.7051,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.09321151673793793,
|
|
"rewards/margins": 0.0742495059967041,
|
|
"rewards/rejected": -0.16746100783348083,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.9484052109698984e-05,
|
|
"logits/chosen": -1.7430789470672607,
|
|
"logits/rejected": -1.7313511371612549,
|
|
"logps/chosen": -181.70632934570312,
|
|
"logps/rejected": -162.24334716796875,
|
|
"loss": 0.844,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.014489106833934784,
|
|
"rewards/margins": -0.029044844210147858,
|
|
"rewards/rejected": 0.014555716887116432,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.947678519361021e-05,
|
|
"logits/chosen": -1.9160277843475342,
|
|
"logits/rejected": -1.8753935098648071,
|
|
"logps/chosen": -175.2951202392578,
|
|
"logps/rejected": -161.4536590576172,
|
|
"loss": 0.6499,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.1920948028564453,
|
|
"rewards/margins": 0.18029190599918365,
|
|
"rewards/rejected": -0.3723866939544678,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.946946800010556e-05,
|
|
"logits/chosen": -1.788377046585083,
|
|
"logits/rejected": -1.804762601852417,
|
|
"logps/chosen": -190.5827178955078,
|
|
"logps/rejected": -207.48460388183594,
|
|
"loss": 0.7442,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.20643703639507294,
|
|
"rewards/margins": 0.0016913870349526405,
|
|
"rewards/rejected": -0.20812839269638062,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.946210054421518e-05,
|
|
"logits/chosen": -1.943693995475769,
|
|
"logits/rejected": -1.9860758781433105,
|
|
"logps/chosen": -162.47232055664062,
|
|
"logps/rejected": -187.59640502929688,
|
|
"loss": 0.5544,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.04131259024143219,
|
|
"rewards/margins": 0.34646379947662354,
|
|
"rewards/rejected": -0.3877764344215393,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.945468284107246e-05,
|
|
"logits/chosen": -1.7154016494750977,
|
|
"logits/rejected": -1.729323387145996,
|
|
"logps/chosen": -151.67153930664062,
|
|
"logps/rejected": -175.7374725341797,
|
|
"loss": 0.7351,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3676840364933014,
|
|
"rewards/margins": -0.05210195109248161,
|
|
"rewards/rejected": -0.3155820667743683,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.944721490591401e-05,
|
|
"logits/chosen": -1.5419683456420898,
|
|
"logits/rejected": -1.5722306966781616,
|
|
"logps/chosen": -158.3173065185547,
|
|
"logps/rejected": -168.21975708007812,
|
|
"loss": 0.7106,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.040096037089824677,
|
|
"rewards/margins": 0.040343452244997025,
|
|
"rewards/rejected": -0.00024740397930145264,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.9439696754079595e-05,
|
|
"logits/chosen": -1.8851487636566162,
|
|
"logits/rejected": -1.927181601524353,
|
|
"logps/chosen": -163.97447204589844,
|
|
"logps/rejected": -171.12020874023438,
|
|
"loss": 0.6697,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4691685140132904,
|
|
"rewards/margins": 0.14997676014900208,
|
|
"rewards/rejected": -0.6191452741622925,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.9432128401012144e-05,
|
|
"logits/chosen": -1.5929148197174072,
|
|
"logits/rejected": -1.5544054508209229,
|
|
"logps/chosen": -143.14022827148438,
|
|
"logps/rejected": -158.71368408203125,
|
|
"loss": 0.6763,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.0918157696723938,
|
|
"rewards/margins": 0.08131375163793564,
|
|
"rewards/rejected": -0.17312952876091003,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.9424509862257706e-05,
|
|
"logits/chosen": -1.599873423576355,
|
|
"logits/rejected": -1.5568993091583252,
|
|
"logps/chosen": -197.35276794433594,
|
|
"logps/rejected": -228.1996307373047,
|
|
"loss": 0.6008,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.22382640838623047,
|
|
"rewards/margins": 0.30180901288986206,
|
|
"rewards/rejected": -0.5256354808807373,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.941684115346541e-05,
|
|
"logits/chosen": -1.9682908058166504,
|
|
"logits/rejected": -1.9601702690124512,
|
|
"logps/chosen": -178.14833068847656,
|
|
"logps/rejected": -180.44769287109375,
|
|
"loss": 0.6096,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.2652210593223572,
|
|
"rewards/margins": 0.3119816184043884,
|
|
"rewards/rejected": -0.5772026181221008,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.940912229038745e-05,
|
|
"logits/chosen": -1.7443188428878784,
|
|
"logits/rejected": -1.720470666885376,
|
|
"logps/chosen": -176.2379150390625,
|
|
"logps/rejected": -166.10626220703125,
|
|
"loss": 0.8199,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5284540057182312,
|
|
"rewards/margins": -0.06340186297893524,
|
|
"rewards/rejected": -0.46505218744277954,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.9401353288879024e-05,
|
|
"logits/chosen": -1.8005255460739136,
|
|
"logits/rejected": -1.814915657043457,
|
|
"logps/chosen": -173.22021484375,
|
|
"logps/rejected": -187.9818878173828,
|
|
"loss": 0.6487,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.36548957228660583,
|
|
"rewards/margins": 0.12811800837516785,
|
|
"rewards/rejected": -0.49360761046409607,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.9393534164898335e-05,
|
|
"logits/chosen": -1.8766534328460693,
|
|
"logits/rejected": -1.9305753707885742,
|
|
"logps/chosen": -166.86866760253906,
|
|
"logps/rejected": -195.80569458007812,
|
|
"loss": 0.6937,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.2575337588787079,
|
|
"rewards/margins": 0.2505089044570923,
|
|
"rewards/rejected": -0.5080426931381226,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.9385664934506526e-05,
|
|
"logits/chosen": -1.7149075269699097,
|
|
"logits/rejected": -1.7573699951171875,
|
|
"logps/chosen": -169.73626708984375,
|
|
"logps/rejected": -178.3174591064453,
|
|
"loss": 0.6638,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.4145450294017792,
|
|
"rewards/margins": 0.1643792688846588,
|
|
"rewards/rejected": -0.578924298286438,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.937774561386768e-05,
|
|
"logits/chosen": -1.8144739866256714,
|
|
"logits/rejected": -1.8054416179656982,
|
|
"logps/chosen": -201.68247985839844,
|
|
"logps/rejected": -208.70188903808594,
|
|
"loss": 0.7625,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.2262319028377533,
|
|
"rewards/margins": -0.053480371832847595,
|
|
"rewards/rejected": -0.1727515161037445,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.936977621924875e-05,
|
|
"logits/chosen": -1.721892237663269,
|
|
"logits/rejected": -1.7585711479187012,
|
|
"logps/chosen": -193.45179748535156,
|
|
"logps/rejected": -200.13726806640625,
|
|
"loss": 0.798,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7297480702400208,
|
|
"rewards/margins": -0.1376451551914215,
|
|
"rewards/rejected": -0.5921030044555664,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.9361756767019564e-05,
|
|
"logits/chosen": -1.8132922649383545,
|
|
"logits/rejected": -1.8062866926193237,
|
|
"logps/chosen": -204.11619567871094,
|
|
"logps/rejected": -196.605224609375,
|
|
"loss": 0.802,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.10710492730140686,
|
|
"rewards/margins": -0.09782031178474426,
|
|
"rewards/rejected": -0.009284593164920807,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.935368727365276e-05,
|
|
"logits/chosen": -1.6550960540771484,
|
|
"logits/rejected": -1.6377525329589844,
|
|
"logps/chosen": -191.87579345703125,
|
|
"logps/rejected": -181.06930541992188,
|
|
"loss": 0.7402,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.8034918308258057,
|
|
"rewards/margins": -0.04004772752523422,
|
|
"rewards/rejected": -0.763444185256958,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.934556775572377e-05,
|
|
"logits/chosen": -1.9349067211151123,
|
|
"logits/rejected": -1.9205700159072876,
|
|
"logps/chosen": -173.06373596191406,
|
|
"logps/rejected": -173.32766723632812,
|
|
"loss": 0.6361,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.24924635887145996,
|
|
"rewards/margins": 0.23114144802093506,
|
|
"rewards/rejected": -0.480387806892395,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.9337398229910784e-05,
|
|
"logits/chosen": -1.8233386278152466,
|
|
"logits/rejected": -1.8753117322921753,
|
|
"logps/chosen": -189.73959350585938,
|
|
"logps/rejected": -197.85728454589844,
|
|
"loss": 0.7225,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.24001392722129822,
|
|
"rewards/margins": 0.08805333077907562,
|
|
"rewards/rejected": -0.32806724309921265,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.932917871299471e-05,
|
|
"logits/chosen": -1.540401816368103,
|
|
"logits/rejected": -1.5170302391052246,
|
|
"logps/chosen": -205.3408203125,
|
|
"logps/rejected": -206.5533905029297,
|
|
"loss": 0.8948,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.7126679420471191,
|
|
"rewards/margins": -0.22102710604667664,
|
|
"rewards/rejected": -0.4916408061981201,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.9320909221859134e-05,
|
|
"logits/chosen": -1.934309482574463,
|
|
"logits/rejected": -1.945433497428894,
|
|
"logps/chosen": -170.4419708251953,
|
|
"logps/rejected": -165.6936492919922,
|
|
"loss": 0.7461,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.23198306560516357,
|
|
"rewards/margins": -0.018536821007728577,
|
|
"rewards/rejected": -0.213446244597435,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.9312589773490304e-05,
|
|
"logits/chosen": -2.026982545852661,
|
|
"logits/rejected": -1.9359885454177856,
|
|
"logps/chosen": -185.02920532226562,
|
|
"logps/rejected": -173.5999298095703,
|
|
"loss": 0.6839,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.39162588119506836,
|
|
"rewards/margins": 0.09019112586975098,
|
|
"rewards/rejected": -0.48181700706481934,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.930422038497708e-05,
|
|
"logits/chosen": -1.9103275537490845,
|
|
"logits/rejected": -1.8527649641036987,
|
|
"logps/chosen": -167.06378173828125,
|
|
"logps/rejected": -153.1953125,
|
|
"loss": 0.6232,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.32273101806640625,
|
|
"rewards/margins": 0.2372804582118988,
|
|
"rewards/rejected": -0.5600115060806274,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.92958010735109e-05,
|
|
"logits/chosen": -1.9541754722595215,
|
|
"logits/rejected": -2.0632808208465576,
|
|
"logps/chosen": -181.56781005859375,
|
|
"logps/rejected": -199.48483276367188,
|
|
"loss": 0.5323,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.1671663373708725,
|
|
"rewards/margins": 0.5313636064529419,
|
|
"rewards/rejected": -0.6985299587249756,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.928733185638575e-05,
|
|
"logits/chosen": -1.7843657732009888,
|
|
"logits/rejected": -1.8627678155899048,
|
|
"logps/chosen": -167.1883087158203,
|
|
"logps/rejected": -172.48223876953125,
|
|
"loss": 0.7816,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.2200300097465515,
|
|
"rewards/margins": -0.0496581606566906,
|
|
"rewards/rejected": -0.1703718900680542,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.927881275099815e-05,
|
|
"logits/chosen": -1.713842511177063,
|
|
"logits/rejected": -1.805971622467041,
|
|
"logps/chosen": -192.92250061035156,
|
|
"logps/rejected": -212.14866638183594,
|
|
"loss": 0.6343,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.17002353072166443,
|
|
"rewards/margins": 0.28741562366485596,
|
|
"rewards/rejected": -0.4574391543865204,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.927024377484705e-05,
|
|
"logits/chosen": -1.682020664215088,
|
|
"logits/rejected": -1.7268104553222656,
|
|
"logps/chosen": -156.587158203125,
|
|
"logps/rejected": -159.53341674804688,
|
|
"loss": 0.6657,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.16453158855438232,
|
|
"rewards/margins": 0.15859198570251465,
|
|
"rewards/rejected": -0.323123574256897,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.9261624945533855e-05,
|
|
"logits/chosen": -1.8595139980316162,
|
|
"logits/rejected": -1.8612048625946045,
|
|
"logps/chosen": -163.1502685546875,
|
|
"logps/rejected": -193.11166381835938,
|
|
"loss": 0.6646,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.2516610622406006,
|
|
"rewards/margins": 0.18862426280975342,
|
|
"rewards/rejected": -0.440285325050354,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.925295628076241e-05,
|
|
"logits/chosen": -1.8986504077911377,
|
|
"logits/rejected": -1.951588749885559,
|
|
"logps/chosen": -161.01625061035156,
|
|
"logps/rejected": -171.39744567871094,
|
|
"loss": 0.7413,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.18460389971733093,
|
|
"rewards/margins": -0.004900887608528137,
|
|
"rewards/rejected": -0.1797029972076416,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.9244237798338866e-05,
|
|
"logits/chosen": -1.7942367792129517,
|
|
"logits/rejected": -1.8609907627105713,
|
|
"logps/chosen": -190.5836181640625,
|
|
"logps/rejected": -181.18942260742188,
|
|
"loss": 0.8213,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7920368909835815,
|
|
"rewards/margins": -0.13006603717803955,
|
|
"rewards/rejected": -0.6619707942008972,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.923546951617175e-05,
|
|
"logits/chosen": -1.7586820125579834,
|
|
"logits/rejected": -1.8338139057159424,
|
|
"logps/chosen": -161.4608154296875,
|
|
"logps/rejected": -174.6810302734375,
|
|
"loss": 0.7838,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.15520796179771423,
|
|
"rewards/margins": 0.007751762866973877,
|
|
"rewards/rejected": -0.1629597246646881,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.922665145227187e-05,
|
|
"logits/chosen": -1.999558925628662,
|
|
"logits/rejected": -1.9667410850524902,
|
|
"logps/chosen": -177.38986206054688,
|
|
"logps/rejected": -182.5379638671875,
|
|
"loss": 0.8666,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.26553717255592346,
|
|
"rewards/margins": -0.1137295514345169,
|
|
"rewards/rejected": -0.15180760622024536,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.9217783624752266e-05,
|
|
"logits/chosen": -1.7799978256225586,
|
|
"logits/rejected": -1.800316572189331,
|
|
"logps/chosen": -189.352783203125,
|
|
"logps/rejected": -173.5353546142578,
|
|
"loss": 0.7974,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.6156832575798035,
|
|
"rewards/margins": -0.09660260379314423,
|
|
"rewards/rejected": -0.519080638885498,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.920886605182823e-05,
|
|
"logits/chosen": -1.7154024839401245,
|
|
"logits/rejected": -1.7451473474502563,
|
|
"logps/chosen": -162.1999053955078,
|
|
"logps/rejected": -177.0426483154297,
|
|
"loss": 0.6234,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.3276807367801666,
|
|
"rewards/margins": 0.274338036775589,
|
|
"rewards/rejected": -0.6020187139511108,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.919989875181722e-05,
|
|
"logits/chosen": -1.769112467765808,
|
|
"logits/rejected": -1.759423851966858,
|
|
"logps/chosen": -170.71876525878906,
|
|
"logps/rejected": -173.2998809814453,
|
|
"loss": 0.7953,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2965158224105835,
|
|
"rewards/margins": 0.05386320501565933,
|
|
"rewards/rejected": -0.3503790497779846,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 4.919088174313884e-05,
|
|
"logits/chosen": -1.2533071041107178,
|
|
"logits/rejected": -1.3566581010818481,
|
|
"logps/chosen": -200.62548828125,
|
|
"logps/rejected": -190.6791534423828,
|
|
"loss": 0.7712,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7463827729225159,
|
|
"rewards/margins": -0.011319484561681747,
|
|
"rewards/rejected": -0.7350633144378662,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.91818150443148e-05,
|
|
"logits/chosen": -1.958874225616455,
|
|
"logits/rejected": -1.8372151851654053,
|
|
"logps/chosen": -184.42295837402344,
|
|
"logps/rejected": -178.30995178222656,
|
|
"loss": 0.9,
|
|
"rewards/accuracies": 0.1875,
|
|
"rewards/chosen": -0.6464177370071411,
|
|
"rewards/margins": -0.2897469401359558,
|
|
"rewards/rejected": -0.3566707670688629,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.917269867396886e-05,
|
|
"logits/chosen": -1.731322169303894,
|
|
"logits/rejected": -1.8198059797286987,
|
|
"logps/chosen": -157.6179962158203,
|
|
"logps/rejected": -162.81597900390625,
|
|
"loss": 0.7364,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.6854066848754883,
|
|
"rewards/margins": 0.03314337879419327,
|
|
"rewards/rejected": -0.7185500860214233,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.916353265082686e-05,
|
|
"logits/chosen": -1.5918539762496948,
|
|
"logits/rejected": -1.5675849914550781,
|
|
"logps/chosen": -229.21499633789062,
|
|
"logps/rejected": -248.54562377929688,
|
|
"loss": 0.6943,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4582862854003906,
|
|
"rewards/margins": 0.10020212829113007,
|
|
"rewards/rejected": -0.5584883689880371,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.9154316993716565e-05,
|
|
"logits/chosen": -1.9066779613494873,
|
|
"logits/rejected": -1.9495766162872314,
|
|
"logps/chosen": -149.90614318847656,
|
|
"logps/rejected": -146.74632263183594,
|
|
"loss": 0.7492,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.44106772541999817,
|
|
"rewards/margins": -0.07195230573415756,
|
|
"rewards/rejected": -0.36911541223526,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.9145051721567734e-05,
|
|
"logits/chosen": -2.0382936000823975,
|
|
"logits/rejected": -1.9569990634918213,
|
|
"logps/chosen": -166.32168579101562,
|
|
"logps/rejected": -162.79673767089844,
|
|
"loss": 0.6561,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.3699617087841034,
|
|
"rewards/margins": 0.14387893676757812,
|
|
"rewards/rejected": -0.5138406157493591,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.913573685341205e-05,
|
|
"logits/chosen": -1.474026083946228,
|
|
"logits/rejected": -1.5045154094696045,
|
|
"logps/chosen": -229.38619995117188,
|
|
"logps/rejected": -236.83164978027344,
|
|
"loss": 0.8663,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.9608985781669617,
|
|
"rewards/margins": -0.17912134528160095,
|
|
"rewards/rejected": -0.7817772626876831,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.9126372408383025e-05,
|
|
"logits/chosen": -1.8263182640075684,
|
|
"logits/rejected": -1.8128119707107544,
|
|
"logps/chosen": -190.57106018066406,
|
|
"logps/rejected": -223.3215789794922,
|
|
"loss": 0.7163,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.5443102717399597,
|
|
"rewards/margins": 0.06481970846652985,
|
|
"rewards/rejected": -0.6091300249099731,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 4.911695840571605e-05,
|
|
"logits/chosen": -1.7644751071929932,
|
|
"logits/rejected": -1.8120529651641846,
|
|
"logps/chosen": -205.31759643554688,
|
|
"logps/rejected": -199.3420867919922,
|
|
"loss": 0.8917,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.6110701560974121,
|
|
"rewards/margins": -0.2775843143463135,
|
|
"rewards/rejected": -0.33348581194877625,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.910749486474828e-05,
|
|
"logits/chosen": -1.6636385917663574,
|
|
"logits/rejected": -1.5808690786361694,
|
|
"logps/chosen": -176.58572387695312,
|
|
"logps/rejected": -195.53761291503906,
|
|
"loss": 0.7043,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.6308318972587585,
|
|
"rewards/margins": 0.1960841417312622,
|
|
"rewards/rejected": -0.8269160985946655,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.909798180491865e-05,
|
|
"logits/chosen": -1.946243166923523,
|
|
"logits/rejected": -2.0143542289733887,
|
|
"logps/chosen": -178.8447265625,
|
|
"logps/rejected": -188.88853454589844,
|
|
"loss": 0.796,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4359901547431946,
|
|
"rewards/margins": -0.07123897969722748,
|
|
"rewards/rejected": -0.3647511601448059,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.9088419245767803e-05,
|
|
"logits/chosen": -2.0332159996032715,
|
|
"logits/rejected": -1.9962480068206787,
|
|
"logps/chosen": -181.74835205078125,
|
|
"logps/rejected": -198.86822509765625,
|
|
"loss": 0.8508,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.9367510676383972,
|
|
"rewards/margins": -0.1084718257188797,
|
|
"rewards/rejected": -0.8282791972160339,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.907880720693804e-05,
|
|
"logits/chosen": -2.006517171859741,
|
|
"logits/rejected": -1.8688626289367676,
|
|
"logps/chosen": -176.2784423828125,
|
|
"logps/rejected": -171.2430419921875,
|
|
"loss": 0.8853,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.8897907733917236,
|
|
"rewards/margins": -0.2713664770126343,
|
|
"rewards/rejected": -0.6184243559837341,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.9069145708173324e-05,
|
|
"logits/chosen": -1.914872169494629,
|
|
"logits/rejected": -1.9809991121292114,
|
|
"logps/chosen": -152.42398071289062,
|
|
"logps/rejected": -172.4163360595703,
|
|
"loss": 0.7878,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.8594350814819336,
|
|
"rewards/margins": -0.11893659830093384,
|
|
"rewards/rejected": -0.740498423576355,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.9059434769319205e-05,
|
|
"logits/chosen": -1.4920971393585205,
|
|
"logits/rejected": -1.479290246963501,
|
|
"logps/chosen": -221.7364501953125,
|
|
"logps/rejected": -217.07138061523438,
|
|
"loss": 0.8203,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.8598781824111938,
|
|
"rewards/margins": -0.09480879455804825,
|
|
"rewards/rejected": -0.7650693655014038,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.904967441032278e-05,
|
|
"logits/chosen": -1.6795321702957153,
|
|
"logits/rejected": -1.7278181314468384,
|
|
"logps/chosen": -176.05941772460938,
|
|
"logps/rejected": -176.9445037841797,
|
|
"loss": 0.6673,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.6722793579101562,
|
|
"rewards/margins": 0.1413556933403015,
|
|
"rewards/rejected": -0.8136351108551025,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 4.903986465123266e-05,
|
|
"logits/chosen": -1.8752367496490479,
|
|
"logits/rejected": -1.815263032913208,
|
|
"logps/chosen": -163.13931274414062,
|
|
"logps/rejected": -167.08154296875,
|
|
"loss": 0.6859,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.587963879108429,
|
|
"rewards/margins": 0.14399470388889313,
|
|
"rewards/rejected": -0.7319585084915161,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.903000551219894e-05,
|
|
"logits/chosen": -2.049884796142578,
|
|
"logits/rejected": -2.096831798553467,
|
|
"logps/chosen": -159.4933624267578,
|
|
"logps/rejected": -172.0717010498047,
|
|
"loss": 0.7986,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7208172082901001,
|
|
"rewards/margins": -0.12478935718536377,
|
|
"rewards/rejected": -0.5960277915000916,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.902009701347313e-05,
|
|
"logits/chosen": -1.733120322227478,
|
|
"logits/rejected": -1.7238028049468994,
|
|
"logps/chosen": -207.2619171142578,
|
|
"logps/rejected": -196.6807098388672,
|
|
"loss": 0.7436,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.46741175651550293,
|
|
"rewards/margins": 0.01438647136092186,
|
|
"rewards/rejected": -0.4817982316017151,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.901013917540814e-05,
|
|
"logits/chosen": -2.027272939682007,
|
|
"logits/rejected": -1.9993281364440918,
|
|
"logps/chosen": -184.6562042236328,
|
|
"logps/rejected": -183.0182647705078,
|
|
"loss": 0.7772,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.6997889280319214,
|
|
"rewards/margins": 0.0013962779194116592,
|
|
"rewards/rejected": -0.7011851668357849,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.900013201845821e-05,
|
|
"logits/chosen": -1.5796035528182983,
|
|
"logits/rejected": -1.3949412107467651,
|
|
"logps/chosen": -175.8470458984375,
|
|
"logps/rejected": -212.80296325683594,
|
|
"loss": 0.7523,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.37455612421035767,
|
|
"rewards/margins": -0.005637466907501221,
|
|
"rewards/rejected": -0.36891865730285645,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.899007556317893e-05,
|
|
"logits/chosen": -1.9345701932907104,
|
|
"logits/rejected": -2.001033306121826,
|
|
"logps/chosen": -250.92816162109375,
|
|
"logps/rejected": -241.9100341796875,
|
|
"loss": 0.7686,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.4188327193260193,
|
|
"rewards/margins": -0.09854313731193542,
|
|
"rewards/rejected": -0.32028958201408386,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.8979969830227086e-05,
|
|
"logits/chosen": -1.9917688369750977,
|
|
"logits/rejected": -2.0171382427215576,
|
|
"logps/chosen": -177.34434509277344,
|
|
"logps/rejected": -170.58657836914062,
|
|
"loss": 0.8629,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.5179021954536438,
|
|
"rewards/margins": -0.09615220129489899,
|
|
"rewards/rejected": -0.4217500388622284,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 4.896981484036074e-05,
|
|
"logits/chosen": -2.010779619216919,
|
|
"logits/rejected": -2.0212411880493164,
|
|
"logps/chosen": -190.38487243652344,
|
|
"logps/rejected": -189.54226684570312,
|
|
"loss": 0.6184,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.45494896173477173,
|
|
"rewards/margins": 0.2873493432998657,
|
|
"rewards/rejected": -0.7422983050346375,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.895961061443911e-05,
|
|
"logits/chosen": -1.8409286737442017,
|
|
"logits/rejected": -1.7947205305099487,
|
|
"logps/chosen": -189.4046173095703,
|
|
"logps/rejected": -178.43617248535156,
|
|
"loss": 0.8209,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.7244110107421875,
|
|
"rewards/margins": -0.0501336008310318,
|
|
"rewards/rejected": -0.6742774248123169,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.894935717342255e-05,
|
|
"logits/chosen": -1.7337801456451416,
|
|
"logits/rejected": -1.6998471021652222,
|
|
"logps/chosen": -228.77264404296875,
|
|
"logps/rejected": -214.67515563964844,
|
|
"loss": 0.7953,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.7624901533126831,
|
|
"rewards/margins": -0.0503702238202095,
|
|
"rewards/rejected": -0.7121198773384094,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.8939054538372496e-05,
|
|
"logits/chosen": -1.7520679235458374,
|
|
"logits/rejected": -1.752877116203308,
|
|
"logps/chosen": -186.9480743408203,
|
|
"logps/rejected": -195.37429809570312,
|
|
"loss": 0.8588,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6405542492866516,
|
|
"rewards/margins": -0.206780344247818,
|
|
"rewards/rejected": -0.4337739050388336,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.8928702730451456e-05,
|
|
"logits/chosen": -1.9656537771224976,
|
|
"logits/rejected": -2.027155876159668,
|
|
"logps/chosen": -199.00657653808594,
|
|
"logps/rejected": -212.56494140625,
|
|
"loss": 0.7799,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.6149024963378906,
|
|
"rewards/margins": -0.018680021166801453,
|
|
"rewards/rejected": -0.5962225198745728,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.891830177092294e-05,
|
|
"logits/chosen": -1.629424810409546,
|
|
"logits/rejected": -1.6878198385238647,
|
|
"logps/chosen": -170.93138122558594,
|
|
"logps/rejected": -171.99090576171875,
|
|
"loss": 0.6828,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.673701822757721,
|
|
"rewards/margins": 0.13800469040870667,
|
|
"rewards/rejected": -0.8117064237594604,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.8907851681151396e-05,
|
|
"logits/chosen": -1.7744640111923218,
|
|
"logits/rejected": -1.855400800704956,
|
|
"logps/chosen": -162.42169189453125,
|
|
"logps/rejected": -166.64620971679688,
|
|
"loss": 0.9846,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.8018124103546143,
|
|
"rewards/margins": -0.3769915699958801,
|
|
"rewards/rejected": -0.42482078075408936,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.889735248260221e-05,
|
|
"logits/chosen": -1.889973521232605,
|
|
"logits/rejected": -1.907044768333435,
|
|
"logps/chosen": -166.85736083984375,
|
|
"logps/rejected": -188.35101318359375,
|
|
"loss": 0.6128,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.3316620886325836,
|
|
"rewards/margins": 0.2465287297964096,
|
|
"rewards/rejected": -0.578190803527832,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 4.8886804196841626e-05,
|
|
"logits/chosen": -2.047497034072876,
|
|
"logits/rejected": -2.0145740509033203,
|
|
"logps/chosen": -201.215087890625,
|
|
"logps/rejected": -203.3246307373047,
|
|
"loss": 0.7635,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.49714764952659607,
|
|
"rewards/margins": 0.03132334351539612,
|
|
"rewards/rejected": -0.5284709930419922,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.887620684553674e-05,
|
|
"logits/chosen": -1.831432580947876,
|
|
"logits/rejected": -1.826611876487732,
|
|
"logps/chosen": -170.79603576660156,
|
|
"logps/rejected": -184.5007781982422,
|
|
"loss": 0.734,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7777528166770935,
|
|
"rewards/margins": 0.054428160190582275,
|
|
"rewards/rejected": -0.832180917263031,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.886556045045542e-05,
|
|
"logits/chosen": -2.050309896469116,
|
|
"logits/rejected": -2.0222089290618896,
|
|
"logps/chosen": -186.81155395507812,
|
|
"logps/rejected": -177.9705810546875,
|
|
"loss": 0.7883,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.6639207601547241,
|
|
"rewards/margins": 0.03769933432340622,
|
|
"rewards/rejected": -0.7016200423240662,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.8854865033466275e-05,
|
|
"logits/chosen": -2.076099395751953,
|
|
"logits/rejected": -2.0824105739593506,
|
|
"logps/chosen": -179.39295959472656,
|
|
"logps/rejected": -174.53573608398438,
|
|
"loss": 0.7306,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6415868401527405,
|
|
"rewards/margins": 0.04155872389674187,
|
|
"rewards/rejected": -0.6831455230712891,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.88441206165386e-05,
|
|
"logits/chosen": -1.6709840297698975,
|
|
"logits/rejected": -1.8081105947494507,
|
|
"logps/chosen": -166.87539672851562,
|
|
"logps/rejected": -183.02621459960938,
|
|
"loss": 0.7271,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.518951952457428,
|
|
"rewards/margins": 0.06794089823961258,
|
|
"rewards/rejected": -0.58689284324646,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.8833327221742356e-05,
|
|
"logits/chosen": -1.9388179779052734,
|
|
"logits/rejected": -1.9303326606750488,
|
|
"logps/chosen": -229.31259155273438,
|
|
"logps/rejected": -218.38491821289062,
|
|
"loss": 0.9369,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7209200263023376,
|
|
"rewards/margins": -0.36736997961997986,
|
|
"rewards/rejected": -0.3535500466823578,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.88224848712481e-05,
|
|
"logits/chosen": -1.8626773357391357,
|
|
"logits/rejected": -1.9242452383041382,
|
|
"logps/chosen": -190.93325805664062,
|
|
"logps/rejected": -220.92437744140625,
|
|
"loss": 0.7958,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.9179306030273438,
|
|
"rewards/margins": -0.005557693541049957,
|
|
"rewards/rejected": -0.9123728275299072,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.881159358732694e-05,
|
|
"logits/chosen": -1.9244499206542969,
|
|
"logits/rejected": -1.9016033411026,
|
|
"logps/chosen": -208.19638061523438,
|
|
"logps/rejected": -194.61898803710938,
|
|
"loss": 0.6003,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.5419009923934937,
|
|
"rewards/margins": 0.4854595363140106,
|
|
"rewards/rejected": -1.0273605585098267,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 4.8800653392350526e-05,
|
|
"logits/chosen": -2.065157413482666,
|
|
"logits/rejected": -1.979295015335083,
|
|
"logps/chosen": -167.4358367919922,
|
|
"logps/rejected": -148.04000854492188,
|
|
"loss": 0.79,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6589381098747253,
|
|
"rewards/margins": -0.009491220116615295,
|
|
"rewards/rejected": -0.6494468450546265,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.8789664308790936e-05,
|
|
"logits/chosen": -1.8643383979797363,
|
|
"logits/rejected": -1.801065444946289,
|
|
"logps/chosen": -163.1219482421875,
|
|
"logps/rejected": -162.10537719726562,
|
|
"loss": 0.6148,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.19017738103866577,
|
|
"rewards/margins": 0.2843154966831207,
|
|
"rewards/rejected": -0.4744928777217865,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.8778626359220715e-05,
|
|
"logits/chosen": -1.7437247037887573,
|
|
"logits/rejected": -1.704676628112793,
|
|
"logps/chosen": -152.5662078857422,
|
|
"logps/rejected": -159.8699493408203,
|
|
"loss": 0.7078,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5272018909454346,
|
|
"rewards/margins": 0.054309070110321045,
|
|
"rewards/rejected": -0.5815109014511108,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.8767539566312734e-05,
|
|
"logits/chosen": -1.884958028793335,
|
|
"logits/rejected": -1.849034070968628,
|
|
"logps/chosen": -162.9790802001953,
|
|
"logps/rejected": -154.30078125,
|
|
"loss": 0.7916,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.5811585187911987,
|
|
"rewards/margins": -0.11695164442062378,
|
|
"rewards/rejected": -0.46420690417289734,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.875640395284023e-05,
|
|
"logits/chosen": -1.9072563648223877,
|
|
"logits/rejected": -1.8811615705490112,
|
|
"logps/chosen": -173.24008178710938,
|
|
"logps/rejected": -179.58273315429688,
|
|
"loss": 0.7763,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.484822154045105,
|
|
"rewards/margins": -0.08394555747509003,
|
|
"rewards/rejected": -0.40087658166885376,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.874521954167671e-05,
|
|
"logits/chosen": -1.8919446468353271,
|
|
"logits/rejected": -1.9220166206359863,
|
|
"logps/chosen": -158.00631713867188,
|
|
"logps/rejected": -158.1962890625,
|
|
"loss": 0.6527,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.2469102144241333,
|
|
"rewards/margins": 0.14598041772842407,
|
|
"rewards/rejected": -0.39289066195487976,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.8733986355795905e-05,
|
|
"logits/chosen": -2.0576484203338623,
|
|
"logits/rejected": -1.957137107849121,
|
|
"logps/chosen": -183.29859924316406,
|
|
"logps/rejected": -164.59544372558594,
|
|
"loss": 0.6596,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.19953250885009766,
|
|
"rewards/margins": 0.1992800384759903,
|
|
"rewards/rejected": -0.39881253242492676,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 4.8722704418271745e-05,
|
|
"logits/chosen": -1.9749754667282104,
|
|
"logits/rejected": -1.918540120124817,
|
|
"logps/chosen": -167.0876922607422,
|
|
"logps/rejected": -155.88064575195312,
|
|
"loss": 0.8313,
|
|
"rewards/accuracies": 0.1875,
|
|
"rewards/chosen": -0.398946613073349,
|
|
"rewards/margins": -0.19024060666561127,
|
|
"rewards/rejected": -0.20870603621006012,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.871137375227829e-05,
|
|
"logits/chosen": -1.6178803443908691,
|
|
"logits/rejected": -1.73251211643219,
|
|
"logps/chosen": -267.42718505859375,
|
|
"logps/rejected": -300.83477783203125,
|
|
"loss": 0.8896,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.6622705459594727,
|
|
"rewards/margins": -0.21010492742061615,
|
|
"rewards/rejected": -0.4521656334400177,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.869999438108971e-05,
|
|
"logits/chosen": -1.888526439666748,
|
|
"logits/rejected": -1.8802413940429688,
|
|
"logps/chosen": -190.6936798095703,
|
|
"logps/rejected": -198.58245849609375,
|
|
"loss": 0.8107,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6914330124855042,
|
|
"rewards/margins": -0.09704277664422989,
|
|
"rewards/rejected": -0.5943902134895325,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.8688566328080215e-05,
|
|
"logits/chosen": -1.5124105215072632,
|
|
"logits/rejected": -1.5067615509033203,
|
|
"logps/chosen": -282.9752502441406,
|
|
"logps/rejected": -281.37933349609375,
|
|
"loss": 0.7879,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.5186710953712463,
|
|
"rewards/margins": -0.08422104269266129,
|
|
"rewards/rejected": -0.4344501197338104,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.867708961672399e-05,
|
|
"logits/chosen": -1.8901722431182861,
|
|
"logits/rejected": -1.8916290998458862,
|
|
"logps/chosen": -194.58010864257812,
|
|
"logps/rejected": -206.20706176757812,
|
|
"loss": 0.7058,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.2762334942817688,
|
|
"rewards/margins": 0.04776221513748169,
|
|
"rewards/rejected": -0.3239956796169281,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.866556427059519e-05,
|
|
"logits/chosen": -1.9781274795532227,
|
|
"logits/rejected": -1.991908073425293,
|
|
"logps/chosen": -190.02462768554688,
|
|
"logps/rejected": -172.36788940429688,
|
|
"loss": 0.6999,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3898717761039734,
|
|
"rewards/margins": 0.08414015173912048,
|
|
"rewards/rejected": -0.47401195764541626,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.865399031336787e-05,
|
|
"logits/chosen": -1.7196893692016602,
|
|
"logits/rejected": -1.7024250030517578,
|
|
"logps/chosen": -183.65408325195312,
|
|
"logps/rejected": -183.18309020996094,
|
|
"loss": 0.6556,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.18018794059753418,
|
|
"rewards/margins": 0.1877795308828354,
|
|
"rewards/rejected": -0.3679674565792084,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.8642367768815936e-05,
|
|
"logits/chosen": -1.82463800907135,
|
|
"logits/rejected": -1.9342741966247559,
|
|
"logps/chosen": -136.54745483398438,
|
|
"logps/rejected": -149.12637329101562,
|
|
"loss": 0.7994,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.28317660093307495,
|
|
"rewards/margins": -0.111660435795784,
|
|
"rewards/rejected": -0.17151619493961334,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.863069666081307e-05,
|
|
"logits/chosen": -1.9666064977645874,
|
|
"logits/rejected": -1.9600831270217896,
|
|
"logps/chosen": -160.50009155273438,
|
|
"logps/rejected": -172.79641723632812,
|
|
"loss": 0.7938,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.3549930453300476,
|
|
"rewards/margins": -0.036753974854946136,
|
|
"rewards/rejected": -0.3182390332221985,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.861897701333274e-05,
|
|
"logits/chosen": -1.7229609489440918,
|
|
"logits/rejected": -1.7512197494506836,
|
|
"logps/chosen": -160.8597869873047,
|
|
"logps/rejected": -182.94717407226562,
|
|
"loss": 0.7614,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.5264466404914856,
|
|
"rewards/margins": -0.05138474702835083,
|
|
"rewards/rejected": -0.4750618636608124,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.86072088504481e-05,
|
|
"logits/chosen": -1.8726041316986084,
|
|
"logits/rejected": -1.8346372842788696,
|
|
"logps/chosen": -193.1851348876953,
|
|
"logps/rejected": -175.16256713867188,
|
|
"loss": 0.7167,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.04130769148468971,
|
|
"rewards/margins": 0.13345830142498016,
|
|
"rewards/rejected": -0.09215061366558075,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.859539219633199e-05,
|
|
"logits/chosen": -1.6277759075164795,
|
|
"logits/rejected": -1.650618553161621,
|
|
"logps/chosen": -192.23538208007812,
|
|
"logps/rejected": -195.0891876220703,
|
|
"loss": 0.9001,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.2764393389225006,
|
|
"rewards/margins": -0.1653566211462021,
|
|
"rewards/rejected": -0.11108270287513733,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.8583527075256804e-05,
|
|
"logits/chosen": -1.9752824306488037,
|
|
"logits/rejected": -1.976406455039978,
|
|
"logps/chosen": -215.763916015625,
|
|
"logps/rejected": -222.56468200683594,
|
|
"loss": 0.6023,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.3105733394622803,
|
|
"rewards/margins": 0.3618737459182739,
|
|
"rewards/rejected": -0.6724470853805542,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.857161351159454e-05,
|
|
"logits/chosen": -1.7939121723175049,
|
|
"logits/rejected": -1.8076589107513428,
|
|
"logps/chosen": -175.3466033935547,
|
|
"logps/rejected": -176.5845947265625,
|
|
"loss": 0.6957,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.10015758872032166,
|
|
"rewards/margins": 0.10513995587825775,
|
|
"rewards/rejected": -0.2052975744009018,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.8559651529816664e-05,
|
|
"logits/chosen": -1.7438242435455322,
|
|
"logits/rejected": -1.7907154560089111,
|
|
"logps/chosen": -190.70948791503906,
|
|
"logps/rejected": -204.99143981933594,
|
|
"loss": 0.6727,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.06472301483154297,
|
|
"rewards/margins": 0.1332576423883438,
|
|
"rewards/rejected": -0.19798064231872559,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.854764115449411e-05,
|
|
"logits/chosen": -1.7970941066741943,
|
|
"logits/rejected": -1.8352383375167847,
|
|
"logps/chosen": -167.88230895996094,
|
|
"logps/rejected": -166.8907928466797,
|
|
"loss": 0.6771,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.2522534728050232,
|
|
"rewards/margins": 0.06870199739933014,
|
|
"rewards/rejected": -0.3209554851055145,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.853558241029723e-05,
|
|
"logits/chosen": -1.9054516553878784,
|
|
"logits/rejected": -1.9092512130737305,
|
|
"logps/chosen": -173.30734252929688,
|
|
"logps/rejected": -189.3043975830078,
|
|
"loss": 0.7071,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.2111372947692871,
|
|
"rewards/margins": 0.07802311331033707,
|
|
"rewards/rejected": -0.2891604006290436,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.8523475321995715e-05,
|
|
"logits/chosen": -1.6928297281265259,
|
|
"logits/rejected": -1.7166639566421509,
|
|
"logps/chosen": -163.96253967285156,
|
|
"logps/rejected": -157.01870727539062,
|
|
"loss": 0.6892,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.023365147411823273,
|
|
"rewards/margins": 0.09150275588035583,
|
|
"rewards/rejected": -0.06813760101795197,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.8511319914458555e-05,
|
|
"logits/chosen": -1.5925623178482056,
|
|
"logits/rejected": -1.6342376470565796,
|
|
"logps/chosen": -169.8107452392578,
|
|
"logps/rejected": -174.4763641357422,
|
|
"loss": 0.7952,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6270018815994263,
|
|
"rewards/margins": -0.09866765886545181,
|
|
"rewards/rejected": -0.5283341407775879,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.849911621265401e-05,
|
|
"logits/chosen": -1.6875545978546143,
|
|
"logits/rejected": -1.6878042221069336,
|
|
"logps/chosen": -160.625732421875,
|
|
"logps/rejected": -186.27923583984375,
|
|
"loss": 0.6322,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.33780720829963684,
|
|
"rewards/margins": 0.20755568146705627,
|
|
"rewards/rejected": -0.5453628301620483,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.848686424164953e-05,
|
|
"logits/chosen": -1.846010684967041,
|
|
"logits/rejected": -1.860701084136963,
|
|
"logps/chosen": -181.06878662109375,
|
|
"logps/rejected": -178.49310302734375,
|
|
"loss": 0.7739,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3998520076274872,
|
|
"rewards/margins": -0.03460027277469635,
|
|
"rewards/rejected": -0.36525171995162964,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.84745640266117e-05,
|
|
"logits/chosen": -1.8941820859909058,
|
|
"logits/rejected": -1.8956482410430908,
|
|
"logps/chosen": -228.6375732421875,
|
|
"logps/rejected": -222.01824951171875,
|
|
"loss": 0.7173,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.3104371130466461,
|
|
"rewards/margins": 0.0530361533164978,
|
|
"rewards/rejected": -0.3634732663631439,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.846221559280624e-05,
|
|
"logits/chosen": -1.8459906578063965,
|
|
"logits/rejected": -1.9024940729141235,
|
|
"logps/chosen": -159.93936157226562,
|
|
"logps/rejected": -165.43307495117188,
|
|
"loss": 0.896,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.23201912641525269,
|
|
"rewards/margins": -0.12730032205581665,
|
|
"rewards/rejected": -0.10471877455711365,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.844981896559787e-05,
|
|
"logits/chosen": -2.048933506011963,
|
|
"logits/rejected": -2.061128616333008,
|
|
"logps/chosen": -181.51718139648438,
|
|
"logps/rejected": -185.25296020507812,
|
|
"loss": 0.8408,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.32012802362442017,
|
|
"rewards/margins": -0.20446370542049408,
|
|
"rewards/rejected": -0.1156642958521843,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.8437374170450344e-05,
|
|
"logits/chosen": -1.9143999814987183,
|
|
"logits/rejected": -1.9218837022781372,
|
|
"logps/chosen": -182.51480102539062,
|
|
"logps/rejected": -182.2652130126953,
|
|
"loss": 0.8078,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.6757728457450867,
|
|
"rewards/margins": -0.08970170468091965,
|
|
"rewards/rejected": -0.5860711932182312,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.842488123292632e-05,
|
|
"logits/chosen": -1.736176609992981,
|
|
"logits/rejected": -1.7229468822479248,
|
|
"logps/chosen": -159.5717315673828,
|
|
"logps/rejected": -189.70269775390625,
|
|
"loss": 0.6977,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.2442682683467865,
|
|
"rewards/margins": 0.0333622470498085,
|
|
"rewards/rejected": -0.2776305675506592,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.8412340178687374e-05,
|
|
"logits/chosen": -1.6423401832580566,
|
|
"logits/rejected": -1.689012050628662,
|
|
"logps/chosen": -151.13458251953125,
|
|
"logps/rejected": -175.60679626464844,
|
|
"loss": 0.8116,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.33638978004455566,
|
|
"rewards/margins": -0.15178707242012024,
|
|
"rewards/rejected": -0.18460272252559662,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.839975103349391e-05,
|
|
"logits/chosen": -1.8096094131469727,
|
|
"logits/rejected": -1.8605788946151733,
|
|
"logps/chosen": -158.09568786621094,
|
|
"logps/rejected": -172.2019805908203,
|
|
"loss": 0.8539,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.17084676027297974,
|
|
"rewards/margins": -0.16634216904640198,
|
|
"rewards/rejected": -0.004504583775997162,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.8387113823205096e-05,
|
|
"logits/chosen": -1.8915197849273682,
|
|
"logits/rejected": -1.9206541776657104,
|
|
"logps/chosen": -173.2298583984375,
|
|
"logps/rejected": -181.8549346923828,
|
|
"loss": 0.6712,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.22103241086006165,
|
|
"rewards/margins": 0.12150835990905762,
|
|
"rewards/rejected": -0.34254080057144165,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.8374428573778864e-05,
|
|
"logits/chosen": -1.9554250240325928,
|
|
"logits/rejected": -2.0212881565093994,
|
|
"logps/chosen": -188.2921142578125,
|
|
"logps/rejected": -199.60987854003906,
|
|
"loss": 0.7256,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.0011289417743682861,
|
|
"rewards/margins": 0.15477600693702698,
|
|
"rewards/rejected": -0.1536470353603363,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.8361695311271795e-05,
|
|
"logits/chosen": -1.626081943511963,
|
|
"logits/rejected": -1.5311224460601807,
|
|
"logps/chosen": -185.79702758789062,
|
|
"logps/rejected": -203.40957641601562,
|
|
"loss": 0.8635,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.516851007938385,
|
|
"rewards/margins": -0.19476839900016785,
|
|
"rewards/rejected": -0.32208263874053955,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.83489140618391e-05,
|
|
"logits/chosen": -1.7895288467407227,
|
|
"logits/rejected": -1.753225564956665,
|
|
"logps/chosen": -217.21707153320312,
|
|
"logps/rejected": -191.97915649414062,
|
|
"loss": 0.7744,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.4739922285079956,
|
|
"rewards/margins": -0.06083906441926956,
|
|
"rewards/rejected": -0.41315317153930664,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.833608485173457e-05,
|
|
"logits/chosen": -1.9408211708068848,
|
|
"logits/rejected": -1.916908860206604,
|
|
"logps/chosen": -210.69586181640625,
|
|
"logps/rejected": -233.63662719726562,
|
|
"loss": 0.7485,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.3478991985321045,
|
|
"rewards/margins": -0.0014675185084342957,
|
|
"rewards/rejected": -0.3464316725730896,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.8323207707310496e-05,
|
|
"logits/chosen": -2.0299384593963623,
|
|
"logits/rejected": -2.0607504844665527,
|
|
"logps/chosen": -186.0093231201172,
|
|
"logps/rejected": -179.7775115966797,
|
|
"loss": 0.6698,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.22601480782032013,
|
|
"rewards/margins": 0.12358909845352173,
|
|
"rewards/rejected": -0.34960389137268066,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.831028265501764e-05,
|
|
"logits/chosen": -1.7695857286453247,
|
|
"logits/rejected": -1.7599815130233765,
|
|
"logps/chosen": -161.50375366210938,
|
|
"logps/rejected": -168.04541015625,
|
|
"loss": 0.6868,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.32897791266441345,
|
|
"rewards/margins": 0.10846008360385895,
|
|
"rewards/rejected": -0.437438040971756,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.829730972140517e-05,
|
|
"logits/chosen": -1.8524454832077026,
|
|
"logits/rejected": -1.9345438480377197,
|
|
"logps/chosen": -149.3410186767578,
|
|
"logps/rejected": -160.43539428710938,
|
|
"loss": 0.7965,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.08119592815637589,
|
|
"rewards/margins": -0.04433928430080414,
|
|
"rewards/rejected": -0.03685663640499115,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.8284288933120594e-05,
|
|
"logits/chosen": -1.8411493301391602,
|
|
"logits/rejected": -1.8524004220962524,
|
|
"logps/chosen": -175.48532104492188,
|
|
"logps/rejected": -182.57582092285156,
|
|
"loss": 0.5989,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.46560171246528625,
|
|
"rewards/margins": 0.3886818289756775,
|
|
"rewards/rejected": -0.8542835712432861,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.8271220316909735e-05,
|
|
"logits/chosen": -1.687551498413086,
|
|
"logits/rejected": -1.722497582435608,
|
|
"logps/chosen": -167.26939392089844,
|
|
"logps/rejected": -181.43814086914062,
|
|
"loss": 0.9314,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5299715399742126,
|
|
"rewards/margins": -0.17927514016628265,
|
|
"rewards/rejected": -0.35069650411605835,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.825810389961666e-05,
|
|
"logits/chosen": -1.916797399520874,
|
|
"logits/rejected": -1.9123202562332153,
|
|
"logps/chosen": -198.55821228027344,
|
|
"logps/rejected": -225.2833709716797,
|
|
"loss": 0.8431,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.3632412254810333,
|
|
"rewards/margins": -0.22463062405586243,
|
|
"rewards/rejected": -0.1386105716228485,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.8244939708183596e-05,
|
|
"logits/chosen": -1.6189442873001099,
|
|
"logits/rejected": -1.6897720098495483,
|
|
"logps/chosen": -186.33004760742188,
|
|
"logps/rejected": -181.720947265625,
|
|
"loss": 0.8053,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": 0.27750128507614136,
|
|
"rewards/margins": -0.1327960044145584,
|
|
"rewards/rejected": 0.41029733419418335,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.823172776965094e-05,
|
|
"logits/chosen": -2.003798246383667,
|
|
"logits/rejected": -1.9069428443908691,
|
|
"logps/chosen": -206.22410583496094,
|
|
"logps/rejected": -202.70343017578125,
|
|
"loss": 0.7931,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.07843560725450516,
|
|
"rewards/margins": 0.052309781312942505,
|
|
"rewards/rejected": -0.13074536621570587,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.821846811115713e-05,
|
|
"logits/chosen": -1.4173839092254639,
|
|
"logits/rejected": -1.469193696975708,
|
|
"logps/chosen": -252.58900451660156,
|
|
"logps/rejected": -266.8291015625,
|
|
"loss": 0.7776,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.2866368591785431,
|
|
"rewards/margins": -0.01211586594581604,
|
|
"rewards/rejected": -0.27452099323272705,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.820516075993865e-05,
|
|
"logits/chosen": -1.9226216077804565,
|
|
"logits/rejected": -1.8626333475112915,
|
|
"logps/chosen": -189.990966796875,
|
|
"logps/rejected": -212.71446228027344,
|
|
"loss": 0.7835,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.2961033880710602,
|
|
"rewards/margins": -0.048593662679195404,
|
|
"rewards/rejected": -0.24750974774360657,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.819180574332994e-05,
|
|
"logits/chosen": -2.1082520484924316,
|
|
"logits/rejected": -2.0929677486419678,
|
|
"logps/chosen": -172.19317626953125,
|
|
"logps/rejected": -166.5411376953125,
|
|
"loss": 0.7558,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.3426204323768616,
|
|
"rewards/margins": 0.029606737196445465,
|
|
"rewards/rejected": -0.37222716212272644,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.8178403088763355e-05,
|
|
"logits/chosen": -1.8143612146377563,
|
|
"logits/rejected": -1.8444868326187134,
|
|
"logps/chosen": -219.51095581054688,
|
|
"logps/rejected": -223.00332641601562,
|
|
"loss": 0.7669,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5216841697692871,
|
|
"rewards/margins": -0.06673192977905273,
|
|
"rewards/rejected": -0.454952210187912,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.8164952823769085e-05,
|
|
"logits/chosen": -2.0486927032470703,
|
|
"logits/rejected": -1.986092209815979,
|
|
"logps/chosen": -179.5719757080078,
|
|
"logps/rejected": -176.9426727294922,
|
|
"loss": 0.7705,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.12122049927711487,
|
|
"rewards/margins": -0.017412271350622177,
|
|
"rewards/rejected": -0.1038082093000412,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.815145497597514e-05,
|
|
"logits/chosen": -1.6093693971633911,
|
|
"logits/rejected": -1.6691244840621948,
|
|
"logps/chosen": -220.64230346679688,
|
|
"logps/rejected": -226.87054443359375,
|
|
"loss": 0.739,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.48053669929504395,
|
|
"rewards/margins": 0.01233639195561409,
|
|
"rewards/rejected": -0.4928731620311737,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.8137909573107246e-05,
|
|
"logits/chosen": -1.5354715585708618,
|
|
"logits/rejected": -1.4911106824874878,
|
|
"logps/chosen": -173.0819549560547,
|
|
"logps/rejected": -171.91053771972656,
|
|
"loss": 0.7285,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.09934857487678528,
|
|
"rewards/margins": 0.08546656370162964,
|
|
"rewards/rejected": -0.18481513857841492,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.812431664298883e-05,
|
|
"logits/chosen": -1.8645007610321045,
|
|
"logits/rejected": -1.8654489517211914,
|
|
"logps/chosen": -174.5074005126953,
|
|
"logps/rejected": -174.8561248779297,
|
|
"loss": 0.7322,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.34228798747062683,
|
|
"rewards/margins": 0.07575173676013947,
|
|
"rewards/rejected": -0.4180397093296051,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.811067621354094e-05,
|
|
"logits/chosen": -1.672195315361023,
|
|
"logits/rejected": -1.7209053039550781,
|
|
"logps/chosen": -179.9447784423828,
|
|
"logps/rejected": -163.33111572265625,
|
|
"loss": 0.9538,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.3875757157802582,
|
|
"rewards/margins": -0.4043459892272949,
|
|
"rewards/rejected": 0.016770271584391594,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.8096988312782174e-05,
|
|
"logits/chosen": -2.031759023666382,
|
|
"logits/rejected": -2.071578025817871,
|
|
"logps/chosen": -179.43516540527344,
|
|
"logps/rejected": -177.50064086914062,
|
|
"loss": 0.8834,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.4397561550140381,
|
|
"rewards/margins": -0.1974533647298813,
|
|
"rewards/rejected": -0.2423027753829956,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.8083252968828665e-05,
|
|
"logits/chosen": -1.9421418905258179,
|
|
"logits/rejected": -1.922428846359253,
|
|
"logps/chosen": -142.18792724609375,
|
|
"logps/rejected": -137.76077270507812,
|
|
"loss": 0.5961,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.12917383015155792,
|
|
"rewards/margins": 0.23805946111679077,
|
|
"rewards/rejected": -0.3672332763671875,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.8069470209893974e-05,
|
|
"logits/chosen": -1.8579202890396118,
|
|
"logits/rejected": -1.7998918294906616,
|
|
"logps/chosen": -155.51712036132812,
|
|
"logps/rejected": -156.0476531982422,
|
|
"loss": 0.7183,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.02486548200249672,
|
|
"rewards/margins": 0.029943522065877914,
|
|
"rewards/rejected": -0.054809004068374634,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.8055640064289086e-05,
|
|
"logits/chosen": -1.9409297704696655,
|
|
"logits/rejected": -1.911987066268921,
|
|
"logps/chosen": -245.75701904296875,
|
|
"logps/rejected": -248.6239013671875,
|
|
"loss": 0.8102,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.4732462763786316,
|
|
"rewards/margins": -0.1539333313703537,
|
|
"rewards/rejected": -0.3193129599094391,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.80417625604223e-05,
|
|
"logits/chosen": -1.8907678127288818,
|
|
"logits/rejected": -1.8695294857025146,
|
|
"logps/chosen": -177.34124755859375,
|
|
"logps/rejected": -179.99476623535156,
|
|
"loss": 0.6441,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.10845950245857239,
|
|
"rewards/margins": 0.19328270852565765,
|
|
"rewards/rejected": -0.3017422556877136,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.8027837726799205e-05,
|
|
"logits/chosen": -1.8234997987747192,
|
|
"logits/rejected": -1.8477734327316284,
|
|
"logps/chosen": -152.91793823242188,
|
|
"logps/rejected": -166.2785186767578,
|
|
"loss": 0.7019,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.19078156352043152,
|
|
"rewards/margins": 0.07663966715335846,
|
|
"rewards/rejected": -0.26742124557495117,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.801386559202259e-05,
|
|
"logits/chosen": -1.9331165552139282,
|
|
"logits/rejected": -1.9343795776367188,
|
|
"logps/chosen": -197.23309326171875,
|
|
"logps/rejected": -216.14816284179688,
|
|
"loss": 0.6427,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.1457238346338272,
|
|
"rewards/margins": 0.22981032729148865,
|
|
"rewards/rejected": -0.37553414702415466,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.799984618479242e-05,
|
|
"logits/chosen": -1.7535991668701172,
|
|
"logits/rejected": -1.8327221870422363,
|
|
"logps/chosen": -170.40121459960938,
|
|
"logps/rejected": -194.4297332763672,
|
|
"loss": 0.7553,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.2612544000148773,
|
|
"rewards/margins": 0.023236550390720367,
|
|
"rewards/rejected": -0.2844909727573395,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.798577953390577e-05,
|
|
"logits/chosen": -1.9297330379486084,
|
|
"logits/rejected": -1.8824340105056763,
|
|
"logps/chosen": -192.4176788330078,
|
|
"logps/rejected": -204.23397827148438,
|
|
"loss": 0.7058,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.016188140958547592,
|
|
"rewards/margins": 0.1552063524723053,
|
|
"rewards/rejected": -0.1713944971561432,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.797166566825675e-05,
|
|
"logits/chosen": -1.983964204788208,
|
|
"logits/rejected": -2.0162336826324463,
|
|
"logps/chosen": -165.31370544433594,
|
|
"logps/rejected": -175.11459350585938,
|
|
"loss": 0.8134,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.43316060304641724,
|
|
"rewards/margins": -0.14317026734352112,
|
|
"rewards/rejected": -0.2899903357028961,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.795750461683644e-05,
|
|
"logits/chosen": -1.7382255792617798,
|
|
"logits/rejected": -1.7152175903320312,
|
|
"logps/chosen": -162.63970947265625,
|
|
"logps/rejected": -167.93568420410156,
|
|
"loss": 0.8503,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3506108224391937,
|
|
"rewards/margins": -0.19645802676677704,
|
|
"rewards/rejected": -0.1541527807712555,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.794329640873285e-05,
|
|
"logits/chosen": -1.9835039377212524,
|
|
"logits/rejected": -1.9371455907821655,
|
|
"logps/chosen": -164.90518188476562,
|
|
"logps/rejected": -153.89732360839844,
|
|
"loss": 0.8365,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.10440421104431152,
|
|
"rewards/margins": -0.1423448920249939,
|
|
"rewards/rejected": 0.03794068843126297,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.7929041073130867e-05,
|
|
"logits/chosen": -1.6812117099761963,
|
|
"logits/rejected": -1.7789117097854614,
|
|
"logps/chosen": -171.525390625,
|
|
"logps/rejected": -194.28512573242188,
|
|
"loss": 0.6979,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": 0.21003414690494537,
|
|
"rewards/margins": 0.1351175457239151,
|
|
"rewards/rejected": 0.07491665333509445,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.7914738639312165e-05,
|
|
"logits/chosen": -1.9188036918640137,
|
|
"logits/rejected": -1.9109784364700317,
|
|
"logps/chosen": -189.7833251953125,
|
|
"logps/rejected": -164.029541015625,
|
|
"loss": 0.8337,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.3247818648815155,
|
|
"rewards/margins": -0.18027785420417786,
|
|
"rewards/rejected": -0.14450398087501526,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.790038913665519e-05,
|
|
"logits/chosen": -2.0011377334594727,
|
|
"logits/rejected": -2.060800552368164,
|
|
"logps/chosen": -176.84857177734375,
|
|
"logps/rejected": -189.64443969726562,
|
|
"loss": 0.7481,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.09154945611953735,
|
|
"rewards/margins": -0.03873196616768837,
|
|
"rewards/rejected": -0.052817486226558685,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.788599259463502e-05,
|
|
"logits/chosen": -1.8452333211898804,
|
|
"logits/rejected": -1.8431695699691772,
|
|
"logps/chosen": -154.12435913085938,
|
|
"logps/rejected": -155.32220458984375,
|
|
"loss": 0.7097,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.14383897185325623,
|
|
"rewards/margins": 0.18797016143798828,
|
|
"rewards/rejected": -0.3318091332912445,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.787154904282341e-05,
|
|
"logits/chosen": -1.3743209838867188,
|
|
"logits/rejected": -1.4174141883850098,
|
|
"logps/chosen": -177.5303192138672,
|
|
"logps/rejected": -211.40457153320312,
|
|
"loss": 0.5768,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.2827177047729492,
|
|
"rewards/margins": 0.42551764845848083,
|
|
"rewards/rejected": -0.1427999585866928,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.7857058510888645e-05,
|
|
"logits/chosen": -2.1841466426849365,
|
|
"logits/rejected": -2.124525547027588,
|
|
"logps/chosen": -246.4276885986328,
|
|
"logps/rejected": -244.47906494140625,
|
|
"loss": 0.6631,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.038991779088974,
|
|
"rewards/margins": 0.20248231291770935,
|
|
"rewards/rejected": -0.24147410690784454,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.7842521028595526e-05,
|
|
"logits/chosen": -1.8262689113616943,
|
|
"logits/rejected": -1.8402464389801025,
|
|
"logps/chosen": -158.2376708984375,
|
|
"logps/rejected": -177.70938110351562,
|
|
"loss": 0.7674,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.19041013717651367,
|
|
"rewards/margins": 0.02704358845949173,
|
|
"rewards/rejected": -0.2174537032842636,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.7827936625805284e-05,
|
|
"logits/chosen": -1.9320769309997559,
|
|
"logits/rejected": -1.9378974437713623,
|
|
"logps/chosen": -162.44107055664062,
|
|
"logps/rejected": -162.588623046875,
|
|
"loss": 0.702,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.22483624517917633,
|
|
"rewards/margins": 0.04632706940174103,
|
|
"rewards/rejected": 0.1785091906785965,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.7813305332475535e-05,
|
|
"logits/chosen": -2.023815870285034,
|
|
"logits/rejected": -2.1101415157318115,
|
|
"logps/chosen": -161.48867797851562,
|
|
"logps/rejected": -177.5762939453125,
|
|
"loss": 0.7641,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.2686167359352112,
|
|
"rewards/margins": -0.034188684076070786,
|
|
"rewards/rejected": -0.2344280332326889,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.77986271786602e-05,
|
|
"logits/chosen": -1.9036113023757935,
|
|
"logits/rejected": -1.8756999969482422,
|
|
"logps/chosen": -200.5419464111328,
|
|
"logps/rejected": -212.63906860351562,
|
|
"loss": 0.6506,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.06710982322692871,
|
|
"rewards/margins": 0.18568173050880432,
|
|
"rewards/rejected": -0.11857189238071442,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.778390219450949e-05,
|
|
"logits/chosen": -1.8086354732513428,
|
|
"logits/rejected": -1.8471179008483887,
|
|
"logps/chosen": -152.69277954101562,
|
|
"logps/rejected": -143.47000122070312,
|
|
"loss": 0.6566,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.007534712553024292,
|
|
"rewards/margins": 0.15669256448745728,
|
|
"rewards/rejected": -0.14915785193443298,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.776913041026976e-05,
|
|
"logits/chosen": -2.1575872898101807,
|
|
"logits/rejected": -2.189612627029419,
|
|
"logps/chosen": -178.6571044921875,
|
|
"logps/rejected": -187.46389770507812,
|
|
"loss": 0.8387,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.3838292360305786,
|
|
"rewards/margins": -0.1892194300889969,
|
|
"rewards/rejected": -0.19460979104042053,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.775431185628353e-05,
|
|
"logits/chosen": -2.0314245223999023,
|
|
"logits/rejected": -2.0670695304870605,
|
|
"logps/chosen": -139.46705627441406,
|
|
"logps/rejected": -137.02342224121094,
|
|
"loss": 0.799,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.3406837582588196,
|
|
"rewards/margins": -0.09202456474304199,
|
|
"rewards/rejected": -0.24865922331809998,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.7739446562989384e-05,
|
|
"logits/chosen": -1.7543888092041016,
|
|
"logits/rejected": -1.8098934888839722,
|
|
"logps/chosen": -171.0781707763672,
|
|
"logps/rejected": -192.0042724609375,
|
|
"loss": 0.7873,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.4363712966442108,
|
|
"rewards/margins": 0.039258234202861786,
|
|
"rewards/rejected": -0.4756295382976532,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.772453456092191e-05,
|
|
"logits/chosen": -1.8036949634552002,
|
|
"logits/rejected": -1.8187798261642456,
|
|
"logps/chosen": -176.441650390625,
|
|
"logps/rejected": -187.6487274169922,
|
|
"loss": 0.7976,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.22391349077224731,
|
|
"rewards/margins": -0.052816301584243774,
|
|
"rewards/rejected": -0.17109718918800354,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 4.7709575880711634e-05,
|
|
"logits/chosen": -2.0721547603607178,
|
|
"logits/rejected": -2.05245041847229,
|
|
"logps/chosen": -181.74232482910156,
|
|
"logps/rejected": -187.65249633789062,
|
|
"loss": 0.5921,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.2778853476047516,
|
|
"rewards/margins": 0.33293941617012024,
|
|
"rewards/rejected": -0.05505405738949776,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.769457055308497e-05,
|
|
"logits/chosen": -2.026765823364258,
|
|
"logits/rejected": -2.018843412399292,
|
|
"logps/chosen": -197.64260864257812,
|
|
"logps/rejected": -179.19317626953125,
|
|
"loss": 0.9551,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.30001845955848694,
|
|
"rewards/margins": -0.15433457493782043,
|
|
"rewards/rejected": -0.1456838697195053,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.767951860886415e-05,
|
|
"logits/chosen": -1.7545514106750488,
|
|
"logits/rejected": -1.7311463356018066,
|
|
"logps/chosen": -189.0639190673828,
|
|
"logps/rejected": -202.9300537109375,
|
|
"loss": 0.7032,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.16693828999996185,
|
|
"rewards/margins": 0.15034297108650208,
|
|
"rewards/rejected": -0.3172812759876251,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.766442007896715e-05,
|
|
"logits/chosen": -1.3602584600448608,
|
|
"logits/rejected": -1.3138482570648193,
|
|
"logps/chosen": -229.68812561035156,
|
|
"logps/rejected": -228.81280517578125,
|
|
"loss": 0.7063,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.30803146958351135,
|
|
"rewards/margins": 0.05981824919581413,
|
|
"rewards/rejected": -0.3678497076034546,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.764927499440767e-05,
|
|
"logits/chosen": -1.2695108652114868,
|
|
"logits/rejected": -1.2994788885116577,
|
|
"logps/chosen": -178.60507202148438,
|
|
"logps/rejected": -209.27049255371094,
|
|
"loss": 0.9922,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.590323805809021,
|
|
"rewards/margins": -0.39020806550979614,
|
|
"rewards/rejected": -0.20011577010154724,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.763408338629498e-05,
|
|
"logits/chosen": -2.1045475006103516,
|
|
"logits/rejected": -2.1285929679870605,
|
|
"logps/chosen": -230.02374267578125,
|
|
"logps/rejected": -225.12106323242188,
|
|
"loss": 0.8737,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.33914846181869507,
|
|
"rewards/margins": -0.23797425627708435,
|
|
"rewards/rejected": -0.10117418318986893,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.761884528583396e-05,
|
|
"logits/chosen": -1.4888020753860474,
|
|
"logits/rejected": -1.4826213121414185,
|
|
"logps/chosen": -221.777587890625,
|
|
"logps/rejected": -241.30885314941406,
|
|
"loss": 0.7111,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.6717454791069031,
|
|
"rewards/margins": 0.279751718044281,
|
|
"rewards/rejected": -0.9514971375465393,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 4.760356072432498e-05,
|
|
"logits/chosen": -1.8832398653030396,
|
|
"logits/rejected": -2.041220188140869,
|
|
"logps/chosen": -290.4461364746094,
|
|
"logps/rejected": -303.868408203125,
|
|
"loss": 0.747,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.1965964436531067,
|
|
"rewards/margins": 0.01566828042268753,
|
|
"rewards/rejected": 0.1809280961751938,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.7588229733163834e-05,
|
|
"logits/chosen": -1.9910494089126587,
|
|
"logits/rejected": -2.065354585647583,
|
|
"logps/chosen": -189.50331115722656,
|
|
"logps/rejected": -204.8780975341797,
|
|
"loss": 0.7243,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.7861883640289307,
|
|
"rewards/margins": 0.04858472943305969,
|
|
"rewards/rejected": -0.834773063659668,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.757285234384169e-05,
|
|
"logits/chosen": -1.94736909866333,
|
|
"logits/rejected": -2.0690090656280518,
|
|
"logps/chosen": -183.47666931152344,
|
|
"logps/rejected": -199.96681213378906,
|
|
"loss": 0.7047,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.5590149164199829,
|
|
"rewards/margins": 0.15124721825122833,
|
|
"rewards/rejected": -0.7102621793746948,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.755742858794503e-05,
|
|
"logits/chosen": -2.1221137046813965,
|
|
"logits/rejected": -2.069035291671753,
|
|
"logps/chosen": -203.06430053710938,
|
|
"logps/rejected": -187.47093200683594,
|
|
"loss": 0.7289,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4738054871559143,
|
|
"rewards/margins": 0.03721272572875023,
|
|
"rewards/rejected": -0.5110181570053101,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.754195849715557e-05,
|
|
"logits/chosen": -1.9132802486419678,
|
|
"logits/rejected": -1.976714849472046,
|
|
"logps/chosen": -162.88668823242188,
|
|
"logps/rejected": -180.09291076660156,
|
|
"loss": 0.658,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.10689959675073624,
|
|
"rewards/margins": 0.1677176058292389,
|
|
"rewards/rejected": -0.2746172249317169,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.75264421032502e-05,
|
|
"logits/chosen": -1.8963744640350342,
|
|
"logits/rejected": -1.8926461935043335,
|
|
"logps/chosen": -199.32736206054688,
|
|
"logps/rejected": -198.4470672607422,
|
|
"loss": 0.6689,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.6300160884857178,
|
|
"rewards/margins": 0.09289233386516571,
|
|
"rewards/rejected": -0.7229084968566895,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.751087943810093e-05,
|
|
"logits/chosen": -1.8022470474243164,
|
|
"logits/rejected": -1.797208547592163,
|
|
"logps/chosen": -184.97244262695312,
|
|
"logps/rejected": -180.94895935058594,
|
|
"loss": 0.5437,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.35914158821105957,
|
|
"rewards/margins": 0.4712386727333069,
|
|
"rewards/rejected": -0.8303802609443665,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.749527053367481e-05,
|
|
"logits/chosen": -1.7871997356414795,
|
|
"logits/rejected": -1.7747814655303955,
|
|
"logps/chosen": -189.15310668945312,
|
|
"logps/rejected": -178.39805603027344,
|
|
"loss": 0.842,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7546498775482178,
|
|
"rewards/margins": -0.17995727062225342,
|
|
"rewards/rejected": -0.5746926069259644,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 4.747961542203386e-05,
|
|
"logits/chosen": -1.872157335281372,
|
|
"logits/rejected": -1.9486424922943115,
|
|
"logps/chosen": -172.46145629882812,
|
|
"logps/rejected": -195.35421752929688,
|
|
"loss": 0.7544,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.42608222365379333,
|
|
"rewards/margins": -0.038050394505262375,
|
|
"rewards/rejected": -0.38803184032440186,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.746391413533503e-05,
|
|
"logits/chosen": -1.9934085607528687,
|
|
"logits/rejected": -2.0255632400512695,
|
|
"logps/chosen": -166.4789276123047,
|
|
"logps/rejected": -183.97271728515625,
|
|
"loss": 0.6867,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.414784699678421,
|
|
"rewards/margins": 0.18602606654167175,
|
|
"rewards/rejected": -0.600810706615448,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.74481667058301e-05,
|
|
"logits/chosen": -1.8942878246307373,
|
|
"logits/rejected": -1.87151038646698,
|
|
"logps/chosen": -173.86004638671875,
|
|
"logps/rejected": -163.0937957763672,
|
|
"loss": 0.8824,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.772480309009552,
|
|
"rewards/margins": -0.22370155155658722,
|
|
"rewards/rejected": -0.5487788319587708,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.743237316586564e-05,
|
|
"logits/chosen": -1.9378021955490112,
|
|
"logits/rejected": -1.9703481197357178,
|
|
"logps/chosen": -180.11892700195312,
|
|
"logps/rejected": -195.14578247070312,
|
|
"loss": 0.7388,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3675965666770935,
|
|
"rewards/margins": -0.02655930444598198,
|
|
"rewards/rejected": -0.3410373032093048,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.741653354788295e-05,
|
|
"logits/chosen": -2.0154881477355957,
|
|
"logits/rejected": -2.0120866298675537,
|
|
"logps/chosen": -174.45498657226562,
|
|
"logps/rejected": -182.1272430419922,
|
|
"loss": 0.8029,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.7631839513778687,
|
|
"rewards/margins": -0.11559872329235077,
|
|
"rewards/rejected": -0.6475852727890015,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.7400647884417956e-05,
|
|
"logits/chosen": -1.8835885524749756,
|
|
"logits/rejected": -2.027597665786743,
|
|
"logps/chosen": -169.2918701171875,
|
|
"logps/rejected": -167.33514404296875,
|
|
"loss": 0.6719,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.33407047390937805,
|
|
"rewards/margins": 0.15689513087272644,
|
|
"rewards/rejected": -0.49096566438674927,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.7384716208101166e-05,
|
|
"logits/chosen": -2.028184413909912,
|
|
"logits/rejected": -2.0173041820526123,
|
|
"logps/chosen": -168.9990692138672,
|
|
"logps/rejected": -160.54428100585938,
|
|
"loss": 0.7799,
|
|
"rewards/accuracies": 0.1875,
|
|
"rewards/chosen": -0.5599774122238159,
|
|
"rewards/margins": -0.1413966715335846,
|
|
"rewards/rejected": -0.4185807704925537,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.736873855165762e-05,
|
|
"logits/chosen": -1.9846090078353882,
|
|
"logits/rejected": -1.9857451915740967,
|
|
"logps/chosen": -193.67715454101562,
|
|
"logps/rejected": -175.9185333251953,
|
|
"loss": 0.6656,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.5016548037528992,
|
|
"rewards/margins": 0.22657424211502075,
|
|
"rewards/rejected": -0.7282290458679199,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 4.735271494790678e-05,
|
|
"logits/chosen": -1.92975914478302,
|
|
"logits/rejected": -1.9406872987747192,
|
|
"logps/chosen": -181.31509399414062,
|
|
"logps/rejected": -167.101806640625,
|
|
"loss": 0.7386,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.18490439653396606,
|
|
"rewards/margins": 0.0683104544878006,
|
|
"rewards/rejected": -0.25321486592292786,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.733664542976253e-05,
|
|
"logits/chosen": -1.9516609907150269,
|
|
"logits/rejected": -2.005047559738159,
|
|
"logps/chosen": -167.55972290039062,
|
|
"logps/rejected": -168.99807739257812,
|
|
"loss": 0.9659,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.3683350086212158,
|
|
"rewards/margins": -0.24237681925296783,
|
|
"rewards/rejected": -0.12595820426940918,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.732053003023301e-05,
|
|
"logits/chosen": -1.934274435043335,
|
|
"logits/rejected": -1.9798094034194946,
|
|
"logps/chosen": -153.78518676757812,
|
|
"logps/rejected": -166.25979614257812,
|
|
"loss": 0.7058,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5433434247970581,
|
|
"rewards/margins": 0.13497428596019745,
|
|
"rewards/rejected": -0.6783177256584167,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.730436878242064e-05,
|
|
"logits/chosen": -1.9575129747390747,
|
|
"logits/rejected": -1.9926663637161255,
|
|
"logps/chosen": -153.06951904296875,
|
|
"logps/rejected": -174.07875061035156,
|
|
"loss": 0.8359,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4944266080856323,
|
|
"rewards/margins": -0.00036665797233581543,
|
|
"rewards/rejected": -0.4940599203109741,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.7288161719522016e-05,
|
|
"logits/chosen": -1.9566092491149902,
|
|
"logits/rejected": -1.9228875637054443,
|
|
"logps/chosen": -162.54771423339844,
|
|
"logps/rejected": -167.0357666015625,
|
|
"loss": 0.8701,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5988922715187073,
|
|
"rewards/margins": -0.1607256382703781,
|
|
"rewards/rejected": -0.43816661834716797,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.727190887482783e-05,
|
|
"logits/chosen": -2.212228775024414,
|
|
"logits/rejected": -2.238290309906006,
|
|
"logps/chosen": -185.6492462158203,
|
|
"logps/rejected": -201.63949584960938,
|
|
"loss": 0.7027,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.45072147250175476,
|
|
"rewards/margins": 0.03795819729566574,
|
|
"rewards/rejected": -0.4886796772480011,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.725561028172282e-05,
|
|
"logits/chosen": -2.08243989944458,
|
|
"logits/rejected": -2.100586414337158,
|
|
"logps/chosen": -169.82723999023438,
|
|
"logps/rejected": -169.41476440429688,
|
|
"loss": 0.8011,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.5032753348350525,
|
|
"rewards/margins": -0.07662791758775711,
|
|
"rewards/rejected": -0.4266473352909088,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 4.7239265973685696e-05,
|
|
"logits/chosen": -1.7974039316177368,
|
|
"logits/rejected": -1.802499771118164,
|
|
"logps/chosen": -166.3297119140625,
|
|
"logps/rejected": -184.35450744628906,
|
|
"loss": 0.6115,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.3483116924762726,
|
|
"rewards/margins": 0.2958502173423767,
|
|
"rewards/rejected": -0.6441619396209717,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.722287598428907e-05,
|
|
"logits/chosen": -1.9482653141021729,
|
|
"logits/rejected": -1.9997018575668335,
|
|
"logps/chosen": -202.8570098876953,
|
|
"logps/rejected": -219.10565185546875,
|
|
"loss": 0.6303,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.0477115735411644,
|
|
"rewards/margins": 0.21878241002559662,
|
|
"rewards/rejected": -0.26649394631385803,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.720644034719938e-05,
|
|
"logits/chosen": -1.8863980770111084,
|
|
"logits/rejected": -1.8580697774887085,
|
|
"logps/chosen": -178.37112426757812,
|
|
"logps/rejected": -196.09762573242188,
|
|
"loss": 0.7045,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.359438419342041,
|
|
"rewards/margins": 0.0665070116519928,
|
|
"rewards/rejected": -0.4259454607963562,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.7189959096176825e-05,
|
|
"logits/chosen": -1.962789535522461,
|
|
"logits/rejected": -2.0059375762939453,
|
|
"logps/chosen": -168.6863250732422,
|
|
"logps/rejected": -207.6638946533203,
|
|
"loss": 0.7517,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.33223748207092285,
|
|
"rewards/margins": 0.007875222712755203,
|
|
"rewards/rejected": -0.3401126563549042,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.7173432265075334e-05,
|
|
"logits/chosen": -2.13173770904541,
|
|
"logits/rejected": -2.1997811794281006,
|
|
"logps/chosen": -171.5570526123047,
|
|
"logps/rejected": -173.00726318359375,
|
|
"loss": 0.7831,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.5620677471160889,
|
|
"rewards/margins": -0.08540257066488266,
|
|
"rewards/rejected": -0.47666510939598083,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.7156859887842416e-05,
|
|
"logits/chosen": -1.9717164039611816,
|
|
"logits/rejected": -1.9659010171890259,
|
|
"logps/chosen": -163.4027099609375,
|
|
"logps/rejected": -171.51205444335938,
|
|
"loss": 0.8767,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.36586710810661316,
|
|
"rewards/margins": -0.22375579178333282,
|
|
"rewards/rejected": -0.14211128652095795,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.714024199851915e-05,
|
|
"logits/chosen": -1.9461572170257568,
|
|
"logits/rejected": -1.9711329936981201,
|
|
"logps/chosen": -173.81141662597656,
|
|
"logps/rejected": -172.41860961914062,
|
|
"loss": 0.7655,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.5734329223632812,
|
|
"rewards/margins": 0.0679081380367279,
|
|
"rewards/rejected": -0.6413410305976868,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.712357863124013e-05,
|
|
"logits/chosen": -2.0299744606018066,
|
|
"logits/rejected": -2.055668830871582,
|
|
"logps/chosen": -168.48684692382812,
|
|
"logps/rejected": -182.92257690429688,
|
|
"loss": 0.69,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.44026386737823486,
|
|
"rewards/margins": 0.11328400671482086,
|
|
"rewards/rejected": -0.5535478591918945,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 4.710686982023332e-05,
|
|
"logits/chosen": -2.0356907844543457,
|
|
"logits/rejected": -1.9616978168487549,
|
|
"logps/chosen": -147.8501739501953,
|
|
"logps/rejected": -154.8659210205078,
|
|
"loss": 0.7309,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.06124575436115265,
|
|
"rewards/margins": 0.03877441585063934,
|
|
"rewards/rejected": -0.10002017021179199,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.709011559982006e-05,
|
|
"logits/chosen": -2.043642997741699,
|
|
"logits/rejected": -1.9762914180755615,
|
|
"logps/chosen": -193.67340087890625,
|
|
"logps/rejected": -186.66543579101562,
|
|
"loss": 0.7127,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.34980466961860657,
|
|
"rewards/margins": 0.07452677190303802,
|
|
"rewards/rejected": -0.4243314862251282,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.707331600441495e-05,
|
|
"logits/chosen": -2.072479724884033,
|
|
"logits/rejected": -2.091381549835205,
|
|
"logps/chosen": -191.45059204101562,
|
|
"logps/rejected": -176.9473876953125,
|
|
"loss": 0.6801,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.029101327061653137,
|
|
"rewards/margins": 0.26256757974624634,
|
|
"rewards/rejected": -0.2916688919067383,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.705647106852581e-05,
|
|
"logits/chosen": -1.9268254041671753,
|
|
"logits/rejected": -1.8882079124450684,
|
|
"logps/chosen": -171.5392303466797,
|
|
"logps/rejected": -182.5844268798828,
|
|
"loss": 0.7921,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2353130429983139,
|
|
"rewards/margins": -0.0611705407500267,
|
|
"rewards/rejected": -0.1741425096988678,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.7039580826753564e-05,
|
|
"logits/chosen": -2.029810667037964,
|
|
"logits/rejected": -2.026019334793091,
|
|
"logps/chosen": -169.64918518066406,
|
|
"logps/rejected": -186.5985107421875,
|
|
"loss": 0.6615,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.34466996788978577,
|
|
"rewards/margins": 0.14720463752746582,
|
|
"rewards/rejected": -0.491874635219574,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.7022645313792235e-05,
|
|
"logits/chosen": -1.5730178356170654,
|
|
"logits/rejected": -1.5958049297332764,
|
|
"logps/chosen": -160.8270263671875,
|
|
"logps/rejected": -176.29554748535156,
|
|
"loss": 0.7613,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.34809601306915283,
|
|
"rewards/margins": 0.23688597977161407,
|
|
"rewards/rejected": -0.5849819779396057,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.700566456442882e-05,
|
|
"logits/chosen": -2.009403705596924,
|
|
"logits/rejected": -1.9752486944198608,
|
|
"logps/chosen": -178.82701110839844,
|
|
"logps/rejected": -180.32125854492188,
|
|
"loss": 1.0244,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.6049767136573792,
|
|
"rewards/margins": -0.4276657700538635,
|
|
"rewards/rejected": -0.1773110032081604,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.6988638613543216e-05,
|
|
"logits/chosen": -1.7354819774627686,
|
|
"logits/rejected": -1.731933355331421,
|
|
"logps/chosen": -167.52633666992188,
|
|
"logps/rejected": -181.66583251953125,
|
|
"loss": 0.789,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.6287019848823547,
|
|
"rewards/margins": -0.054200708866119385,
|
|
"rewards/rejected": -0.5745012760162354,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 4.6971567496108206e-05,
|
|
"logits/chosen": -1.9981721639633179,
|
|
"logits/rejected": -2.026167154312134,
|
|
"logps/chosen": -212.54891967773438,
|
|
"logps/rejected": -215.69253540039062,
|
|
"loss": 0.7395,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.4362500011920929,
|
|
"rewards/margins": -0.020926453173160553,
|
|
"rewards/rejected": -0.4153235852718353,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.695445124718931e-05,
|
|
"logits/chosen": -2.239379405975342,
|
|
"logits/rejected": -2.186093807220459,
|
|
"logps/chosen": -189.42547607421875,
|
|
"logps/rejected": -181.0236358642578,
|
|
"loss": 0.6656,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.3167141079902649,
|
|
"rewards/margins": 0.10905791819095612,
|
|
"rewards/rejected": -0.4257720112800598,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.693728990194479e-05,
|
|
"logits/chosen": -2.132059097290039,
|
|
"logits/rejected": -2.1224942207336426,
|
|
"logps/chosen": -211.412841796875,
|
|
"logps/rejected": -197.23439025878906,
|
|
"loss": 0.6771,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.29473942518234253,
|
|
"rewards/margins": 0.20832209289073944,
|
|
"rewards/rejected": -0.5030615329742432,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.692008349562551e-05,
|
|
"logits/chosen": -2.2113986015319824,
|
|
"logits/rejected": -2.1679513454437256,
|
|
"logps/chosen": -174.953369140625,
|
|
"logps/rejected": -188.64576721191406,
|
|
"loss": 0.9728,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.5910927057266235,
|
|
"rewards/margins": -0.3634761869907379,
|
|
"rewards/rejected": -0.22761650383472443,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.690283206357491e-05,
|
|
"logits/chosen": -1.984092116355896,
|
|
"logits/rejected": -2.0135576725006104,
|
|
"logps/chosen": -187.25738525390625,
|
|
"logps/rejected": -189.52325439453125,
|
|
"loss": 0.672,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.4424164593219757,
|
|
"rewards/margins": 0.20770896971225739,
|
|
"rewards/rejected": -0.6501253843307495,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.6885535641228904e-05,
|
|
"logits/chosen": -2.0326945781707764,
|
|
"logits/rejected": -2.0409088134765625,
|
|
"logps/chosen": -189.7724609375,
|
|
"logps/rejected": -196.32164001464844,
|
|
"loss": 0.846,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.2984338402748108,
|
|
"rewards/margins": -0.14787545800209045,
|
|
"rewards/rejected": -0.15055838227272034,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.6868194264115833e-05,
|
|
"logits/chosen": -1.8356232643127441,
|
|
"logits/rejected": -1.860498309135437,
|
|
"logps/chosen": -190.26791381835938,
|
|
"logps/rejected": -192.42124938964844,
|
|
"loss": 0.6865,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.5506845712661743,
|
|
"rewards/margins": 0.12794733047485352,
|
|
"rewards/rejected": -0.6786318421363831,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 4.685080796785637e-05,
|
|
"logits/chosen": -2.0949289798736572,
|
|
"logits/rejected": -2.105635643005371,
|
|
"logps/chosen": -188.15576171875,
|
|
"logps/rejected": -177.27642822265625,
|
|
"loss": 0.7395,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.6156455874443054,
|
|
"rewards/margins": 0.00026201456785202026,
|
|
"rewards/rejected": -0.615907609462738,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.683337678816345e-05,
|
|
"logits/chosen": -2.062208414077759,
|
|
"logits/rejected": -1.9715068340301514,
|
|
"logps/chosen": -247.74729919433594,
|
|
"logps/rejected": -225.91094970703125,
|
|
"loss": 0.8454,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.7724697589874268,
|
|
"rewards/margins": -0.1988295614719391,
|
|
"rewards/rejected": -0.5736401081085205,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.6815900760842236e-05,
|
|
"logits/chosen": -1.999656081199646,
|
|
"logits/rejected": -2.013665199279785,
|
|
"logps/chosen": -189.76425170898438,
|
|
"logps/rejected": -200.77801513671875,
|
|
"loss": 0.7929,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -1.1113797426223755,
|
|
"rewards/margins": -0.07439464330673218,
|
|
"rewards/rejected": -1.0369850397109985,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.679837992178996e-05,
|
|
"logits/chosen": -1.962624192237854,
|
|
"logits/rejected": -1.8894569873809814,
|
|
"logps/chosen": -163.32264709472656,
|
|
"logps/rejected": -176.6225128173828,
|
|
"loss": 0.6683,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.5831518769264221,
|
|
"rewards/margins": 0.18081454932689667,
|
|
"rewards/rejected": -0.76396644115448,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.678081430699594e-05,
|
|
"logits/chosen": -1.9277567863464355,
|
|
"logits/rejected": -1.9991313219070435,
|
|
"logps/chosen": -177.146484375,
|
|
"logps/rejected": -180.6768798828125,
|
|
"loss": 0.549,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5843685865402222,
|
|
"rewards/margins": 0.5196143388748169,
|
|
"rewards/rejected": -1.103982925415039,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.676320395254146e-05,
|
|
"logits/chosen": -1.8018032312393188,
|
|
"logits/rejected": -1.7631927728652954,
|
|
"logps/chosen": -193.01077270507812,
|
|
"logps/rejected": -193.25103759765625,
|
|
"loss": 0.6489,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.5062118172645569,
|
|
"rewards/margins": 0.23078583180904388,
|
|
"rewards/rejected": -0.736997663974762,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.674554889459968e-05,
|
|
"logits/chosen": -1.7966090440750122,
|
|
"logits/rejected": -1.7850843667984009,
|
|
"logps/chosen": -182.4764404296875,
|
|
"logps/rejected": -184.0175018310547,
|
|
"loss": 0.7334,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.4785413146018982,
|
|
"rewards/margins": -0.006460566073656082,
|
|
"rewards/rejected": -0.4720807671546936,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.672784916943562e-05,
|
|
"logits/chosen": -1.6125917434692383,
|
|
"logits/rejected": -1.6394853591918945,
|
|
"logps/chosen": -182.64862060546875,
|
|
"logps/rejected": -196.8616943359375,
|
|
"loss": 0.5382,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.8303923010826111,
|
|
"rewards/margins": 0.4725598096847534,
|
|
"rewards/rejected": -1.3029520511627197,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 4.6710104813406034e-05,
|
|
"logits/chosen": -1.7587897777557373,
|
|
"logits/rejected": -1.7165967226028442,
|
|
"logps/chosen": -178.50250244140625,
|
|
"logps/rejected": -159.33375549316406,
|
|
"loss": 0.9045,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.5282204151153564,
|
|
"rewards/margins": -0.22660110890865326,
|
|
"rewards/rejected": -0.30161935091018677,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.669231586295934e-05,
|
|
"logits/chosen": -1.8907124996185303,
|
|
"logits/rejected": -1.9228183031082153,
|
|
"logps/chosen": -169.16119384765625,
|
|
"logps/rejected": -180.715087890625,
|
|
"loss": 0.7763,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.8981459736824036,
|
|
"rewards/margins": -0.11685739457607269,
|
|
"rewards/rejected": -0.7812885046005249,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.667448235463557e-05,
|
|
"logits/chosen": -1.2660267353057861,
|
|
"logits/rejected": -1.2475149631500244,
|
|
"logps/chosen": -183.68353271484375,
|
|
"logps/rejected": -182.84422302246094,
|
|
"loss": 0.8929,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.7595192790031433,
|
|
"rewards/margins": -0.14992079138755798,
|
|
"rewards/rejected": -0.6095985174179077,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.665660432506629e-05,
|
|
"logits/chosen": -1.8095303773880005,
|
|
"logits/rejected": -1.8506840467453003,
|
|
"logps/chosen": -213.27145385742188,
|
|
"logps/rejected": -220.65640258789062,
|
|
"loss": 0.7946,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.6156535744667053,
|
|
"rewards/margins": -0.057905957102775574,
|
|
"rewards/rejected": -0.5577476024627686,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.6638681810974496e-05,
|
|
"logits/chosen": -1.758918285369873,
|
|
"logits/rejected": -1.7417278289794922,
|
|
"logps/chosen": -180.2194366455078,
|
|
"logps/rejected": -199.62014770507812,
|
|
"loss": 0.5957,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.17319843173027039,
|
|
"rewards/margins": 0.4147520363330841,
|
|
"rewards/rejected": -0.5879504680633545,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.6620714849174576e-05,
|
|
"logits/chosen": -1.5012279748916626,
|
|
"logits/rejected": -1.495218276977539,
|
|
"logps/chosen": -227.26577758789062,
|
|
"logps/rejected": -223.47470092773438,
|
|
"loss": 0.7009,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7020196318626404,
|
|
"rewards/margins": 0.10670151561498642,
|
|
"rewards/rejected": -0.808721125125885,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.660270347657219e-05,
|
|
"logits/chosen": -1.4245662689208984,
|
|
"logits/rejected": -1.4722357988357544,
|
|
"logps/chosen": -219.63504028320312,
|
|
"logps/rejected": -246.0736846923828,
|
|
"loss": 0.6091,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7042097449302673,
|
|
"rewards/margins": 0.5171206593513489,
|
|
"rewards/rejected": -1.2213302850723267,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 4.658464773016428e-05,
|
|
"logits/chosen": -1.7068259716033936,
|
|
"logits/rejected": -1.6351027488708496,
|
|
"logps/chosen": -199.54136657714844,
|
|
"logps/rejected": -181.50997924804688,
|
|
"loss": 0.8131,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -1.2041518688201904,
|
|
"rewards/margins": -0.023167330771684647,
|
|
"rewards/rejected": -1.180984616279602,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.6566547647038864e-05,
|
|
"logits/chosen": -1.7098909616470337,
|
|
"logits/rejected": -1.80801522731781,
|
|
"logps/chosen": -167.95101928710938,
|
|
"logps/rejected": -180.0511016845703,
|
|
"loss": 0.5463,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.2591591477394104,
|
|
"rewards/margins": 0.41101884841918945,
|
|
"rewards/rejected": -0.6701779961585999,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.6548403264375074e-05,
|
|
"logits/chosen": -2.014415979385376,
|
|
"logits/rejected": -2.0018627643585205,
|
|
"logps/chosen": -181.22947692871094,
|
|
"logps/rejected": -189.5872344970703,
|
|
"loss": 0.8377,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.9818540811538696,
|
|
"rewards/margins": -0.06248188391327858,
|
|
"rewards/rejected": -0.9193722009658813,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.6530214619443037e-05,
|
|
"logits/chosen": -1.902940034866333,
|
|
"logits/rejected": -1.940006971359253,
|
|
"logps/chosen": -156.01939392089844,
|
|
"logps/rejected": -156.20623779296875,
|
|
"loss": 0.8502,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.6196620464324951,
|
|
"rewards/margins": -0.18280625343322754,
|
|
"rewards/rejected": -0.4368557929992676,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.6511981749603775e-05,
|
|
"logits/chosen": -1.8872562646865845,
|
|
"logits/rejected": -1.9487504959106445,
|
|
"logps/chosen": -178.33872985839844,
|
|
"logps/rejected": -182.2080535888672,
|
|
"loss": 0.7332,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.6403207778930664,
|
|
"rewards/margins": 0.12355762720108032,
|
|
"rewards/rejected": -0.7638784050941467,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.6493704692309175e-05,
|
|
"logits/chosen": -1.8873028755187988,
|
|
"logits/rejected": -1.8430054187774658,
|
|
"logps/chosen": -248.9535675048828,
|
|
"logps/rejected": -238.04327392578125,
|
|
"loss": 1.062,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -1.2121449708938599,
|
|
"rewards/margins": -0.3689318299293518,
|
|
"rewards/rejected": -0.8432131409645081,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.647538348510189e-05,
|
|
"logits/chosen": -1.8361527919769287,
|
|
"logits/rejected": -1.856339454650879,
|
|
"logps/chosen": -171.12091064453125,
|
|
"logps/rejected": -179.4962158203125,
|
|
"loss": 0.6846,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7720082998275757,
|
|
"rewards/margins": 0.10657864063978195,
|
|
"rewards/rejected": -0.8785868883132935,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.645701816561523e-05,
|
|
"logits/chosen": -1.6982722282409668,
|
|
"logits/rejected": -1.7370768785476685,
|
|
"logps/chosen": -232.54293823242188,
|
|
"logps/rejected": -213.66964721679688,
|
|
"loss": 0.7178,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.8362730741500854,
|
|
"rewards/margins": 0.07781472057104111,
|
|
"rewards/rejected": -0.9140878319740295,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 4.643860877157314e-05,
|
|
"logits/chosen": -1.7802523374557495,
|
|
"logits/rejected": -1.7304799556732178,
|
|
"logps/chosen": -168.3419189453125,
|
|
"logps/rejected": -205.67333984375,
|
|
"loss": 0.8153,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.4274996221065521,
|
|
"rewards/margins": -0.046173423528671265,
|
|
"rewards/rejected": -0.38132619857788086,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.642015534079012e-05,
|
|
"logits/chosen": -1.9037768840789795,
|
|
"logits/rejected": -1.8988232612609863,
|
|
"logps/chosen": -173.9936981201172,
|
|
"logps/rejected": -197.27523803710938,
|
|
"loss": 0.6135,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.32596248388290405,
|
|
"rewards/margins": 0.23917633295059204,
|
|
"rewards/rejected": -0.5651388168334961,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.640165791117106e-05,
|
|
"logits/chosen": -1.9618606567382812,
|
|
"logits/rejected": -1.9455369710922241,
|
|
"logps/chosen": -190.39830017089844,
|
|
"logps/rejected": -175.3238067626953,
|
|
"loss": 0.9091,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -1.0257893800735474,
|
|
"rewards/margins": -0.2109348475933075,
|
|
"rewards/rejected": -0.8148545622825623,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.63831165207113e-05,
|
|
"logits/chosen": -1.864621877670288,
|
|
"logits/rejected": -1.889084815979004,
|
|
"logps/chosen": -205.9281005859375,
|
|
"logps/rejected": -230.0751953125,
|
|
"loss": 0.7761,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.6850899457931519,
|
|
"rewards/margins": -0.05720193684101105,
|
|
"rewards/rejected": -0.6278879046440125,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.6364531207496426e-05,
|
|
"logits/chosen": -1.737329363822937,
|
|
"logits/rejected": -1.745915412902832,
|
|
"logps/chosen": -171.1361846923828,
|
|
"logps/rejected": -175.88906860351562,
|
|
"loss": 0.6837,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.743391215801239,
|
|
"rewards/margins": 0.0780097097158432,
|
|
"rewards/rejected": -0.8214008808135986,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.634590200970227e-05,
|
|
"logits/chosen": -1.8280831575393677,
|
|
"logits/rejected": -1.8323631286621094,
|
|
"logps/chosen": -188.1634521484375,
|
|
"logps/rejected": -213.10360717773438,
|
|
"loss": 0.7336,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.7827669382095337,
|
|
"rewards/margins": 0.3351660966873169,
|
|
"rewards/rejected": -1.1179330348968506,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.632722896559481e-05,
|
|
"logits/chosen": -1.9295848608016968,
|
|
"logits/rejected": -1.9276199340820312,
|
|
"logps/chosen": -167.31385803222656,
|
|
"logps/rejected": -186.1995391845703,
|
|
"loss": 0.6013,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.16915923357009888,
|
|
"rewards/margins": 0.3335033357143402,
|
|
"rewards/rejected": -0.5026625394821167,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.630851211353007e-05,
|
|
"logits/chosen": -1.6712524890899658,
|
|
"logits/rejected": -1.788968801498413,
|
|
"logps/chosen": -164.82725524902344,
|
|
"logps/rejected": -182.7891387939453,
|
|
"loss": 0.7642,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.609403669834137,
|
|
"rewards/margins": 0.03477644547820091,
|
|
"rewards/rejected": -0.644180178642273,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 4.628975149195407e-05,
|
|
"logits/chosen": -1.2296477556228638,
|
|
"logits/rejected": -1.2632193565368652,
|
|
"logps/chosen": -204.8614044189453,
|
|
"logps/rejected": -224.30543518066406,
|
|
"loss": 0.7234,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7619196176528931,
|
|
"rewards/margins": 0.12807466089725494,
|
|
"rewards/rejected": -0.8899943232536316,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.6270947139402744e-05,
|
|
"logits/chosen": -2.047361373901367,
|
|
"logits/rejected": -2.1051080226898193,
|
|
"logps/chosen": -169.24703979492188,
|
|
"logps/rejected": -184.35586547851562,
|
|
"loss": 0.66,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.6050511002540588,
|
|
"rewards/margins": 0.13156384229660034,
|
|
"rewards/rejected": -0.7366149425506592,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.6252099094501834e-05,
|
|
"logits/chosen": -1.9163178205490112,
|
|
"logits/rejected": -1.8832037448883057,
|
|
"logps/chosen": -202.50064086914062,
|
|
"logps/rejected": -206.1787109375,
|
|
"loss": 0.8078,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.8862631320953369,
|
|
"rewards/margins": -0.06633087992668152,
|
|
"rewards/rejected": -0.819932222366333,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.623320739596685e-05,
|
|
"logits/chosen": -1.943336009979248,
|
|
"logits/rejected": -1.9594800472259521,
|
|
"logps/chosen": -184.20272827148438,
|
|
"logps/rejected": -185.5780029296875,
|
|
"loss": 0.948,
|
|
"rewards/accuracies": 0.1875,
|
|
"rewards/chosen": -0.8648966550827026,
|
|
"rewards/margins": -0.32853201031684875,
|
|
"rewards/rejected": -0.5363646745681763,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.621427208260296e-05,
|
|
"logits/chosen": -2.0543949604034424,
|
|
"logits/rejected": -2.09141206741333,
|
|
"logps/chosen": -186.11021423339844,
|
|
"logps/rejected": -197.07164001464844,
|
|
"loss": 0.6593,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.6588226556777954,
|
|
"rewards/margins": 0.23558923602104187,
|
|
"rewards/rejected": -0.8944119811058044,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.6195293193304915e-05,
|
|
"logits/chosen": -2.2013731002807617,
|
|
"logits/rejected": -2.209264039993286,
|
|
"logps/chosen": -192.5195770263672,
|
|
"logps/rejected": -188.9171600341797,
|
|
"loss": 0.8303,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.8534584641456604,
|
|
"rewards/margins": -0.10596348345279694,
|
|
"rewards/rejected": -0.7474948763847351,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.6176270767056976e-05,
|
|
"logits/chosen": -1.8635625839233398,
|
|
"logits/rejected": -1.8899545669555664,
|
|
"logps/chosen": -193.61715698242188,
|
|
"logps/rejected": -196.3071746826172,
|
|
"loss": 0.5859,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.620331346988678,
|
|
"rewards/margins": 0.35552215576171875,
|
|
"rewards/rejected": -0.9758535027503967,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 4.615720484293286e-05,
|
|
"logits/chosen": -2.0970966815948486,
|
|
"logits/rejected": -2.0922045707702637,
|
|
"logps/chosen": -171.4237060546875,
|
|
"logps/rejected": -173.91969299316406,
|
|
"loss": 0.7777,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.7394740581512451,
|
|
"rewards/margins": 0.09215141832828522,
|
|
"rewards/rejected": -0.8316254615783691,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.613809546009558e-05,
|
|
"logits/chosen": -1.923639178276062,
|
|
"logits/rejected": -1.9087320566177368,
|
|
"logps/chosen": -210.64447021484375,
|
|
"logps/rejected": -202.98309326171875,
|
|
"loss": 0.7005,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.7617734670639038,
|
|
"rewards/margins": 0.28139957785606384,
|
|
"rewards/rejected": -1.04317307472229,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.611894265779748e-05,
|
|
"logits/chosen": -1.7692898511886597,
|
|
"logits/rejected": -1.8441616296768188,
|
|
"logps/chosen": -181.05316162109375,
|
|
"logps/rejected": -190.46311950683594,
|
|
"loss": 0.8492,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": -0.9923998117446899,
|
|
"rewards/margins": -0.16852112114429474,
|
|
"rewards/rejected": -0.8238787651062012,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.609974647538003e-05,
|
|
"logits/chosen": -2.242365837097168,
|
|
"logits/rejected": -2.2200753688812256,
|
|
"logps/chosen": -192.88491821289062,
|
|
"logps/rejected": -209.79190063476562,
|
|
"loss": 0.7824,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.7258377075195312,
|
|
"rewards/margins": 0.06173846498131752,
|
|
"rewards/rejected": -0.7875760793685913,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.608050695227385e-05,
|
|
"logits/chosen": -2.0390326976776123,
|
|
"logits/rejected": -2.0602505207061768,
|
|
"logps/chosen": -159.614013671875,
|
|
"logps/rejected": -159.90530395507812,
|
|
"loss": 0.6495,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -0.6496396660804749,
|
|
"rewards/margins": 0.15987172722816467,
|
|
"rewards/rejected": -0.8095113635063171,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.606122412799857e-05,
|
|
"logits/chosen": -1.8621050119400024,
|
|
"logits/rejected": -1.843872308731079,
|
|
"logps/chosen": -191.3387451171875,
|
|
"logps/rejected": -212.04867553710938,
|
|
"loss": 0.838,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.9131224751472473,
|
|
"rewards/margins": -0.06589814275503159,
|
|
"rewards/rejected": -0.847224235534668,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.6041898042162764e-05,
|
|
"logits/chosen": -1.9165095090866089,
|
|
"logits/rejected": -1.9768743515014648,
|
|
"logps/chosen": -179.1850128173828,
|
|
"logps/rejected": -197.76953125,
|
|
"loss": 0.7127,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.8291003704071045,
|
|
"rewards/margins": 0.08120033144950867,
|
|
"rewards/rejected": -0.9103007316589355,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.602252873446386e-05,
|
|
"logits/chosen": -1.71052086353302,
|
|
"logits/rejected": -1.7267752885818481,
|
|
"logps/chosen": -233.17083740234375,
|
|
"logps/rejected": -238.11651611328125,
|
|
"loss": 0.7183,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.6033438444137573,
|
|
"rewards/margins": 0.14365322887897491,
|
|
"rewards/rejected": -0.7469971179962158,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 4.60031162446881e-05,
|
|
"logits/chosen": -1.685623049736023,
|
|
"logits/rejected": -1.759178876876831,
|
|
"logps/chosen": -179.87600708007812,
|
|
"logps/rejected": -183.2005615234375,
|
|
"loss": 0.7049,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": -0.7123013734817505,
|
|
"rewards/margins": 0.11958488076925278,
|
|
"rewards/rejected": -0.8318862915039062,
|
|
"step": 500
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 2292,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|