3184 lines
109 KiB
JSON
3184 lines
109 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.9886363636363638,
|
|
"eval_steps": 50.0,
|
|
"global_step": 210,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00946969696969697,
|
|
"grad_norm": 29.0056470375542,
|
|
"learning_rate": 4.761904761904761e-09,
|
|
"logits/chosen": -1.2287073135375977,
|
|
"logits/rejected": -1.226149559020996,
|
|
"logps/chosen": -298.41802978515625,
|
|
"logps/rejected": -125.05058288574219,
|
|
"loss": 0.6931,
|
|
"rewards/accuracies": 0.0,
|
|
"rewards/chosen": 0.0,
|
|
"rewards/margins": 0.0,
|
|
"rewards/rejected": 0.0,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.01893939393939394,
|
|
"grad_norm": 29.598707271309717,
|
|
"learning_rate": 9.523809523809522e-09,
|
|
"logits/chosen": -1.1785930395126343,
|
|
"logits/rejected": -1.2503385543823242,
|
|
"logps/chosen": -327.542236328125,
|
|
"logps/rejected": -152.739990234375,
|
|
"loss": 0.6931,
|
|
"rewards/accuracies": 0.0,
|
|
"rewards/chosen": 0.0,
|
|
"rewards/margins": 0.0,
|
|
"rewards/rejected": 0.0,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.028409090909090908,
|
|
"grad_norm": 30.815773047267687,
|
|
"learning_rate": 1.4285714285714284e-08,
|
|
"logits/chosen": -1.2728688716888428,
|
|
"logits/rejected": -1.2958910465240479,
|
|
"logps/chosen": -162.8279266357422,
|
|
"logps/rejected": -92.91324615478516,
|
|
"loss": 0.6939,
|
|
"rewards/accuracies": 0.46666663885116577,
|
|
"rewards/chosen": 0.0023424469400197268,
|
|
"rewards/margins": 0.005515547469258308,
|
|
"rewards/rejected": -0.003173100296407938,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.03787878787878788,
|
|
"grad_norm": 30.410758788816498,
|
|
"learning_rate": 1.9047619047619045e-08,
|
|
"logits/chosen": -1.3308671712875366,
|
|
"logits/rejected": -1.4291285276412964,
|
|
"logps/chosen": -361.55377197265625,
|
|
"logps/rejected": -185.06399536132812,
|
|
"loss": 0.6944,
|
|
"rewards/accuracies": 0.2666666805744171,
|
|
"rewards/chosen": -0.0036378882359713316,
|
|
"rewards/margins": -0.005622108932584524,
|
|
"rewards/rejected": 0.001984220463782549,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.04734848484848485,
|
|
"grad_norm": 33.72650012517962,
|
|
"learning_rate": 2.3809523809523807e-08,
|
|
"logits/chosen": -1.2832980155944824,
|
|
"logits/rejected": -1.2578494548797607,
|
|
"logps/chosen": -311.66046142578125,
|
|
"logps/rejected": -113.93507385253906,
|
|
"loss": 0.6944,
|
|
"rewards/accuracies": 0.5333333611488342,
|
|
"rewards/chosen": 0.0004738619609270245,
|
|
"rewards/margins": 0.000339136749971658,
|
|
"rewards/rejected": 0.0001347253128187731,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.056818181818181816,
|
|
"grad_norm": 33.54755632608565,
|
|
"learning_rate": 2.857142857142857e-08,
|
|
"logits/chosen": -1.2438254356384277,
|
|
"logits/rejected": -1.3154207468032837,
|
|
"logps/chosen": -237.88046264648438,
|
|
"logps/rejected": -144.86170959472656,
|
|
"loss": 0.6927,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.0015382947167381644,
|
|
"rewards/margins": 0.0072678267024457455,
|
|
"rewards/rejected": -0.005729531869292259,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.06628787878787878,
|
|
"grad_norm": 32.506024543201846,
|
|
"learning_rate": 3.333333333333333e-08,
|
|
"logits/chosen": -1.1219017505645752,
|
|
"logits/rejected": -1.3356969356536865,
|
|
"logps/chosen": -377.674072265625,
|
|
"logps/rejected": -208.0950164794922,
|
|
"loss": 0.6946,
|
|
"rewards/accuracies": 0.46666663885116577,
|
|
"rewards/chosen": 0.003615258727222681,
|
|
"rewards/margins": 0.004607487935572863,
|
|
"rewards/rejected": -0.0009922297904267907,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.07575757575757576,
|
|
"grad_norm": 26.869719248200415,
|
|
"learning_rate": 3.809523809523809e-08,
|
|
"logits/chosen": -1.2021812200546265,
|
|
"logits/rejected": -1.2994741201400757,
|
|
"logps/chosen": -223.2887420654297,
|
|
"logps/rejected": -210.76904296875,
|
|
"loss": 0.6941,
|
|
"rewards/accuracies": 0.46666663885116577,
|
|
"rewards/chosen": -0.0010813308181241155,
|
|
"rewards/margins": -0.0014500105753540993,
|
|
"rewards/rejected": 0.00036868025199510157,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.08522727272727272,
|
|
"grad_norm": 28.851260723123424,
|
|
"learning_rate": 4.285714285714285e-08,
|
|
"logits/chosen": -1.2686998844146729,
|
|
"logits/rejected": -1.2887214422225952,
|
|
"logps/chosen": -414.627685546875,
|
|
"logps/rejected": -157.76890563964844,
|
|
"loss": 0.6925,
|
|
"rewards/accuracies": 0.3333333432674408,
|
|
"rewards/chosen": -0.008543035015463829,
|
|
"rewards/margins": -0.008067367598414421,
|
|
"rewards/rejected": -0.00047566817374899983,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.0946969696969697,
|
|
"grad_norm": 29.908833040399635,
|
|
"learning_rate": 4.7619047619047613e-08,
|
|
"logits/chosen": -1.2252384424209595,
|
|
"logits/rejected": -1.4116103649139404,
|
|
"logps/chosen": -379.18719482421875,
|
|
"logps/rejected": -134.59957885742188,
|
|
"loss": 0.6913,
|
|
"rewards/accuracies": 0.5333333611488342,
|
|
"rewards/chosen": 0.003142343368381262,
|
|
"rewards/margins": 0.004382309503853321,
|
|
"rewards/rejected": -0.0012399652041494846,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.10416666666666667,
|
|
"grad_norm": 34.95508468311557,
|
|
"learning_rate": 5.238095238095238e-08,
|
|
"logits/chosen": -1.2304970026016235,
|
|
"logits/rejected": -1.4173061847686768,
|
|
"logps/chosen": -274.32415771484375,
|
|
"logps/rejected": -143.79139709472656,
|
|
"loss": 0.6934,
|
|
"rewards/accuracies": 0.5333333015441895,
|
|
"rewards/chosen": 0.001248523243702948,
|
|
"rewards/margins": 0.00037327370955608785,
|
|
"rewards/rejected": 0.0008752490393817425,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.11363636363636363,
|
|
"grad_norm": 33.36383008667387,
|
|
"learning_rate": 5.714285714285714e-08,
|
|
"logits/chosen": -1.2315393686294556,
|
|
"logits/rejected": -1.3125958442687988,
|
|
"logps/chosen": -414.57220458984375,
|
|
"logps/rejected": -175.58706665039062,
|
|
"loss": 0.6913,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.009653424844145775,
|
|
"rewards/margins": 0.005845812149345875,
|
|
"rewards/rejected": 0.0038076122291386127,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.12310606060606061,
|
|
"grad_norm": 33.31390804979405,
|
|
"learning_rate": 6.19047619047619e-08,
|
|
"logits/chosen": -1.226459264755249,
|
|
"logits/rejected": -1.333916425704956,
|
|
"logps/chosen": -220.2379913330078,
|
|
"logps/rejected": -109.96634674072266,
|
|
"loss": 0.6914,
|
|
"rewards/accuracies": 0.5333333611488342,
|
|
"rewards/chosen": 0.003358738962560892,
|
|
"rewards/margins": 0.0006390871712937951,
|
|
"rewards/rejected": 0.002719651209190488,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.13257575757575757,
|
|
"grad_norm": 30.861328252470425,
|
|
"learning_rate": 6.666666666666665e-08,
|
|
"logits/chosen": -1.2634985446929932,
|
|
"logits/rejected": -1.4346576929092407,
|
|
"logps/chosen": -280.9840087890625,
|
|
"logps/rejected": -140.47079467773438,
|
|
"loss": 0.6921,
|
|
"rewards/accuracies": 0.5333333611488342,
|
|
"rewards/chosen": -0.0012296470813453197,
|
|
"rewards/margins": -0.0024472051300108433,
|
|
"rewards/rejected": 0.0012175582814961672,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.14204545454545456,
|
|
"grad_norm": 29.301187205022977,
|
|
"learning_rate": 7.142857142857142e-08,
|
|
"logits/chosen": -1.2319475412368774,
|
|
"logits/rejected": -1.3931156396865845,
|
|
"logps/chosen": -279.740234375,
|
|
"logps/rejected": -63.6292724609375,
|
|
"loss": 0.6916,
|
|
"rewards/accuracies": 0.6666666865348816,
|
|
"rewards/chosen": 0.005358916707336903,
|
|
"rewards/margins": 0.00306467292830348,
|
|
"rewards/rejected": 0.002294243313372135,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 33.40165234179044,
|
|
"learning_rate": 7.619047619047618e-08,
|
|
"logits/chosen": -1.2616360187530518,
|
|
"logits/rejected": -1.2523891925811768,
|
|
"logps/chosen": -248.74118041992188,
|
|
"logps/rejected": -139.3803253173828,
|
|
"loss": 0.6899,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.00993367563933134,
|
|
"rewards/margins": 0.0073961131274700165,
|
|
"rewards/rejected": 0.0025375632103532553,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.16098484848484848,
|
|
"grad_norm": 31.850407532445676,
|
|
"learning_rate": 8.095238095238095e-08,
|
|
"logits/chosen": -1.300764560699463,
|
|
"logits/rejected": -1.4897656440734863,
|
|
"logps/chosen": -199.72067260742188,
|
|
"logps/rejected": -117.5413818359375,
|
|
"loss": 0.6876,
|
|
"rewards/accuracies": 0.5333333611488342,
|
|
"rewards/chosen": 0.005169999785721302,
|
|
"rewards/margins": 0.004817936569452286,
|
|
"rewards/rejected": 0.0003520629834383726,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.17045454545454544,
|
|
"grad_norm": 31.778736512108093,
|
|
"learning_rate": 8.57142857142857e-08,
|
|
"logits/chosen": -1.2460119724273682,
|
|
"logits/rejected": -1.3352010250091553,
|
|
"logps/chosen": -430.80096435546875,
|
|
"logps/rejected": -209.36929321289062,
|
|
"loss": 0.6886,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.022602636367082596,
|
|
"rewards/margins": 0.02024615928530693,
|
|
"rewards/rejected": 0.002356476616114378,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.17992424242424243,
|
|
"grad_norm": 31.888287525403957,
|
|
"learning_rate": 9.047619047619047e-08,
|
|
"logits/chosen": -1.2491956949234009,
|
|
"logits/rejected": -1.2977077960968018,
|
|
"logps/chosen": -226.297607421875,
|
|
"logps/rejected": -110.85136413574219,
|
|
"loss": 0.6849,
|
|
"rewards/accuracies": 0.5333333015441895,
|
|
"rewards/chosen": 0.0118248350918293,
|
|
"rewards/margins": 0.008896713145077229,
|
|
"rewards/rejected": 0.0029281210154294968,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.1893939393939394,
|
|
"grad_norm": 30.890781750471852,
|
|
"learning_rate": 9.523809523809523e-08,
|
|
"logits/chosen": -1.2209726572036743,
|
|
"logits/rejected": -1.4927175045013428,
|
|
"logps/chosen": -337.1795959472656,
|
|
"logps/rejected": -123.1536865234375,
|
|
"loss": 0.6865,
|
|
"rewards/accuracies": 0.6666666865348816,
|
|
"rewards/chosen": 0.019501259550452232,
|
|
"rewards/margins": 0.021095363423228264,
|
|
"rewards/rejected": -0.001594100845977664,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.19886363636363635,
|
|
"grad_norm": 29.49005689794317,
|
|
"learning_rate": 1e-07,
|
|
"logits/chosen": -1.2870718240737915,
|
|
"logits/rejected": -1.3624413013458252,
|
|
"logps/chosen": -226.2629852294922,
|
|
"logps/rejected": -94.34611511230469,
|
|
"loss": 0.6852,
|
|
"rewards/accuracies": 0.6666666269302368,
|
|
"rewards/chosen": 0.011245525442063808,
|
|
"rewards/margins": 0.0074931420385837555,
|
|
"rewards/rejected": 0.0037523836363106966,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.20833333333333334,
|
|
"grad_norm": 35.528523903361425,
|
|
"learning_rate": 9.947089947089947e-08,
|
|
"logits/chosen": -1.149263858795166,
|
|
"logits/rejected": -1.1778504848480225,
|
|
"logps/chosen": -522.42626953125,
|
|
"logps/rejected": -187.16226196289062,
|
|
"loss": 0.6755,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.05816841125488281,
|
|
"rewards/margins": 0.0636506900191307,
|
|
"rewards/rejected": -0.005482281092554331,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.2178030303030303,
|
|
"grad_norm": 30.463057051689077,
|
|
"learning_rate": 9.894179894179894e-08,
|
|
"logits/chosen": -1.2491704225540161,
|
|
"logits/rejected": -1.4007856845855713,
|
|
"logps/chosen": -221.8249053955078,
|
|
"logps/rejected": -94.48908233642578,
|
|
"loss": 0.6733,
|
|
"rewards/accuracies": 0.6666666865348816,
|
|
"rewards/chosen": 0.03221073001623154,
|
|
"rewards/margins": 0.02945198491215706,
|
|
"rewards/rejected": 0.0027587474323809147,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.22727272727272727,
|
|
"grad_norm": 32.66406919328301,
|
|
"learning_rate": 9.84126984126984e-08,
|
|
"logits/chosen": -1.213392734527588,
|
|
"logits/rejected": -1.2315367460250854,
|
|
"logps/chosen": -484.7833557128906,
|
|
"logps/rejected": -179.44985961914062,
|
|
"loss": 0.6688,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.07118237763643265,
|
|
"rewards/margins": 0.07119101285934448,
|
|
"rewards/rejected": -8.63661989569664e-06,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.23674242424242425,
|
|
"grad_norm": 27.949898051091324,
|
|
"learning_rate": 9.788359788359788e-08,
|
|
"logits/chosen": -1.2136789560317993,
|
|
"logits/rejected": -1.3596720695495605,
|
|
"logps/chosen": -315.8499755859375,
|
|
"logps/rejected": -146.63519287109375,
|
|
"loss": 0.6695,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.05525461584329605,
|
|
"rewards/margins": 0.05192388966679573,
|
|
"rewards/rejected": 0.003330723149701953,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.24621212121212122,
|
|
"grad_norm": 31.30737207115018,
|
|
"learning_rate": 9.735449735449735e-08,
|
|
"logits/chosen": -1.326267957687378,
|
|
"logits/rejected": -1.3485438823699951,
|
|
"logps/chosen": -318.2695007324219,
|
|
"logps/rejected": -93.18646240234375,
|
|
"loss": 0.6664,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.06859429180622101,
|
|
"rewards/margins": 0.06752636283636093,
|
|
"rewards/rejected": 0.0010679200058802962,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.2556818181818182,
|
|
"grad_norm": 27.163805337499927,
|
|
"learning_rate": 9.682539682539682e-08,
|
|
"logits/chosen": -1.3347467184066772,
|
|
"logits/rejected": -1.39474618434906,
|
|
"logps/chosen": -252.51211547851562,
|
|
"logps/rejected": -130.60934448242188,
|
|
"loss": 0.6685,
|
|
"rewards/accuracies": 0.6666666269302368,
|
|
"rewards/chosen": 0.04859574884176254,
|
|
"rewards/margins": 0.04069235175848007,
|
|
"rewards/rejected": 0.007903401739895344,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.26515151515151514,
|
|
"grad_norm": 26.747663753533093,
|
|
"learning_rate": 9.629629629629629e-08,
|
|
"logits/chosen": -1.1969085931777954,
|
|
"logits/rejected": -1.3106380701065063,
|
|
"logps/chosen": -282.17694091796875,
|
|
"logps/rejected": -131.32461547851562,
|
|
"loss": 0.6674,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.053814828395843506,
|
|
"rewards/margins": 0.05295072868466377,
|
|
"rewards/rejected": 0.0008641040185466409,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.2746212121212121,
|
|
"grad_norm": 28.110282404080507,
|
|
"learning_rate": 9.576719576719576e-08,
|
|
"logits/chosen": -1.205107569694519,
|
|
"logits/rejected": -1.3407138586044312,
|
|
"logps/chosen": -262.7644958496094,
|
|
"logps/rejected": -134.28187561035156,
|
|
"loss": 0.6658,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.05242891237139702,
|
|
"rewards/margins": 0.05327294394373894,
|
|
"rewards/rejected": -0.0008440311066806316,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.2840909090909091,
|
|
"grad_norm": 24.744190890187145,
|
|
"learning_rate": 9.523809523809523e-08,
|
|
"logits/chosen": -1.2627979516983032,
|
|
"logits/rejected": -1.3917124271392822,
|
|
"logps/chosen": -238.56619262695312,
|
|
"logps/rejected": -120.55537414550781,
|
|
"loss": 0.6677,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.04753679037094116,
|
|
"rewards/margins": 0.04130668193101883,
|
|
"rewards/rejected": 0.006230102851986885,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.2935606060606061,
|
|
"grad_norm": 27.968644068426777,
|
|
"learning_rate": 9.470899470899471e-08,
|
|
"logits/chosen": -1.3139160871505737,
|
|
"logits/rejected": -1.4469630718231201,
|
|
"logps/chosen": -368.27374267578125,
|
|
"logps/rejected": -130.02462768554688,
|
|
"loss": 0.6626,
|
|
"rewards/accuracies": 0.8666666150093079,
|
|
"rewards/chosen": 0.07182395458221436,
|
|
"rewards/margins": 0.07256630808115005,
|
|
"rewards/rejected": -0.000742347736377269,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 26.902166854256873,
|
|
"learning_rate": 9.417989417989417e-08,
|
|
"logits/chosen": -1.2979252338409424,
|
|
"logits/rejected": -1.2534756660461426,
|
|
"logps/chosen": -361.4090576171875,
|
|
"logps/rejected": -205.6992645263672,
|
|
"loss": 0.6487,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.10240572690963745,
|
|
"rewards/margins": 0.09547950327396393,
|
|
"rewards/rejected": 0.006926238536834717,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 24.41388186700755,
|
|
"learning_rate": 9.365079365079365e-08,
|
|
"logits/chosen": -1.2682493925094604,
|
|
"logits/rejected": -1.3996570110321045,
|
|
"logps/chosen": -301.144775390625,
|
|
"logps/rejected": -178.94155883789062,
|
|
"loss": 0.6366,
|
|
"rewards/accuracies": 0.6666666865348816,
|
|
"rewards/chosen": 0.1342059224843979,
|
|
"rewards/margins": 0.09688643366098404,
|
|
"rewards/rejected": 0.03731948137283325,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.32196969696969696,
|
|
"grad_norm": 23.025986498306636,
|
|
"learning_rate": 9.312169312169311e-08,
|
|
"logits/chosen": -1.2429295778274536,
|
|
"logits/rejected": -1.3105340003967285,
|
|
"logps/chosen": -333.86077880859375,
|
|
"logps/rejected": -116.7296371459961,
|
|
"loss": 0.6333,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.1280738264322281,
|
|
"rewards/margins": 0.13231658935546875,
|
|
"rewards/rejected": -0.004242760129272938,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.3314393939393939,
|
|
"grad_norm": 24.498334455805622,
|
|
"learning_rate": 9.259259259259259e-08,
|
|
"logits/chosen": -1.3265695571899414,
|
|
"logits/rejected": -1.3860602378845215,
|
|
"logps/chosen": -299.00750732421875,
|
|
"logps/rejected": -140.71592712402344,
|
|
"loss": 0.6286,
|
|
"rewards/accuracies": 0.6666666269302368,
|
|
"rewards/chosen": 0.11404500156641006,
|
|
"rewards/margins": 0.11178859323263168,
|
|
"rewards/rejected": 0.002256409265100956,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.3409090909090909,
|
|
"grad_norm": 24.884921186077833,
|
|
"learning_rate": 9.206349206349205e-08,
|
|
"logits/chosen": -1.2984504699707031,
|
|
"logits/rejected": -1.3744697570800781,
|
|
"logps/chosen": -285.7705078125,
|
|
"logps/rejected": -122.30088806152344,
|
|
"loss": 0.6168,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.15358424186706543,
|
|
"rewards/margins": 0.15173228085041046,
|
|
"rewards/rejected": 0.001851982669904828,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.3503787878787879,
|
|
"grad_norm": 23.82555156631703,
|
|
"learning_rate": 9.153439153439153e-08,
|
|
"logits/chosen": -1.2476222515106201,
|
|
"logits/rejected": -1.3255198001861572,
|
|
"logps/chosen": -366.74029541015625,
|
|
"logps/rejected": -130.18682861328125,
|
|
"loss": 0.6238,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.1664523035287857,
|
|
"rewards/margins": 0.15406028926372528,
|
|
"rewards/rejected": 0.012392010539770126,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.35984848484848486,
|
|
"grad_norm": 24.07477880888288,
|
|
"learning_rate": 9.1005291005291e-08,
|
|
"logits/chosen": -1.335256814956665,
|
|
"logits/rejected": -1.4410061836242676,
|
|
"logps/chosen": -219.09616088867188,
|
|
"logps/rejected": -99.69376373291016,
|
|
"loss": 0.6137,
|
|
"rewards/accuracies": 0.7333332896232605,
|
|
"rewards/chosen": 0.1590159386396408,
|
|
"rewards/margins": 0.1581113636493683,
|
|
"rewards/rejected": 0.0009045897168107331,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.3693181818181818,
|
|
"grad_norm": 23.16275958406235,
|
|
"learning_rate": 9.047619047619047e-08,
|
|
"logits/chosen": -1.2550255060195923,
|
|
"logits/rejected": -1.3113772869110107,
|
|
"logps/chosen": -370.3097839355469,
|
|
"logps/rejected": -170.9385986328125,
|
|
"loss": 0.6112,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.23174059391021729,
|
|
"rewards/margins": 0.23885612189769745,
|
|
"rewards/rejected": -0.00711551308631897,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.3787878787878788,
|
|
"grad_norm": 21.543309745960354,
|
|
"learning_rate": 8.994708994708994e-08,
|
|
"logits/chosen": -1.3221899271011353,
|
|
"logits/rejected": -1.4326074123382568,
|
|
"logps/chosen": -290.75933837890625,
|
|
"logps/rejected": -167.4901123046875,
|
|
"loss": 0.6159,
|
|
"rewards/accuracies": 0.6666666865348816,
|
|
"rewards/chosen": 0.13931059837341309,
|
|
"rewards/margins": 0.13773180544376373,
|
|
"rewards/rejected": 0.0015788152813911438,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.38825757575757575,
|
|
"grad_norm": 22.418812006239037,
|
|
"learning_rate": 8.941798941798941e-08,
|
|
"logits/chosen": -1.2849162817001343,
|
|
"logits/rejected": -1.3343697786331177,
|
|
"logps/chosen": -369.89764404296875,
|
|
"logps/rejected": -133.05355834960938,
|
|
"loss": 0.6093,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.19583266973495483,
|
|
"rewards/margins": 0.19024832546710968,
|
|
"rewards/rejected": 0.005584346130490303,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.3977272727272727,
|
|
"grad_norm": 25.71519469395891,
|
|
"learning_rate": 8.888888888888888e-08,
|
|
"logits/chosen": -1.286842703819275,
|
|
"logits/rejected": -1.3505761623382568,
|
|
"logps/chosen": -447.53778076171875,
|
|
"logps/rejected": -138.85006713867188,
|
|
"loss": 0.5864,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.31389278173446655,
|
|
"rewards/margins": 0.3263290822505951,
|
|
"rewards/rejected": -0.01243629027158022,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.4071969696969697,
|
|
"grad_norm": 19.24790527585849,
|
|
"learning_rate": 8.835978835978835e-08,
|
|
"logits/chosen": -1.3817297220230103,
|
|
"logits/rejected": -1.4520673751831055,
|
|
"logps/chosen": -279.92059326171875,
|
|
"logps/rejected": -121.86234283447266,
|
|
"loss": 0.6187,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.15583908557891846,
|
|
"rewards/margins": 0.15511645376682281,
|
|
"rewards/rejected": 0.0007226344314403832,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.4166666666666667,
|
|
"grad_norm": 22.13389198587296,
|
|
"learning_rate": 8.783068783068782e-08,
|
|
"logits/chosen": -1.3588807582855225,
|
|
"logits/rejected": -1.3591158390045166,
|
|
"logps/chosen": -430.5394592285156,
|
|
"logps/rejected": -167.34222412109375,
|
|
"loss": 0.5983,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.22864654660224915,
|
|
"rewards/margins": 0.19965213537216187,
|
|
"rewards/rejected": 0.028994422405958176,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.42613636363636365,
|
|
"grad_norm": 19.709066122231608,
|
|
"learning_rate": 8.730158730158729e-08,
|
|
"logits/chosen": -1.319620132446289,
|
|
"logits/rejected": -1.3754303455352783,
|
|
"logps/chosen": -301.44891357421875,
|
|
"logps/rejected": -139.29481506347656,
|
|
"loss": 0.6036,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.22895975410938263,
|
|
"rewards/margins": 0.213044673204422,
|
|
"rewards/rejected": 0.015915077179670334,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.4356060606060606,
|
|
"grad_norm": 20.432654588017233,
|
|
"learning_rate": 8.677248677248676e-08,
|
|
"logits/chosen": -1.314937710762024,
|
|
"logits/rejected": -1.3968394994735718,
|
|
"logps/chosen": -240.7213897705078,
|
|
"logps/rejected": -84.15922546386719,
|
|
"loss": 0.6013,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.15969710052013397,
|
|
"rewards/margins": 0.16978248953819275,
|
|
"rewards/rejected": -0.010085375979542732,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.44507575757575757,
|
|
"grad_norm": 17.302724108599822,
|
|
"learning_rate": 8.624338624338625e-08,
|
|
"logits/chosen": -1.3449150323867798,
|
|
"logits/rejected": -1.4744312763214111,
|
|
"logps/chosen": -345.28387451171875,
|
|
"logps/rejected": -119.1164321899414,
|
|
"loss": 0.6147,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.1876756250858307,
|
|
"rewards/margins": 0.18070700764656067,
|
|
"rewards/rejected": 0.006968595087528229,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 18.038050767324112,
|
|
"learning_rate": 8.57142857142857e-08,
|
|
"logits/chosen": -1.3185852766036987,
|
|
"logits/rejected": -1.4122602939605713,
|
|
"logps/chosen": -212.75405883789062,
|
|
"logps/rejected": -140.37655639648438,
|
|
"loss": 0.6088,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.1278039515018463,
|
|
"rewards/margins": 0.04179360717535019,
|
|
"rewards/rejected": 0.08601033687591553,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.4640151515151515,
|
|
"grad_norm": 22.986861694117593,
|
|
"learning_rate": 8.518518518518519e-08,
|
|
"logits/chosen": -1.2588977813720703,
|
|
"logits/rejected": -1.4479458332061768,
|
|
"logps/chosen": -338.35858154296875,
|
|
"logps/rejected": -192.6273956298828,
|
|
"loss": 0.5795,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.22073189914226532,
|
|
"rewards/margins": 0.24155020713806152,
|
|
"rewards/rejected": -0.02081829309463501,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.4734848484848485,
|
|
"grad_norm": 19.82460905470704,
|
|
"learning_rate": 8.465608465608464e-08,
|
|
"logits/chosen": -1.2549892663955688,
|
|
"logits/rejected": -1.4687315225601196,
|
|
"logps/chosen": -310.2621154785156,
|
|
"logps/rejected": -137.7465057373047,
|
|
"loss": 0.5936,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.2339044064283371,
|
|
"rewards/margins": 0.22999358177185059,
|
|
"rewards/rejected": 0.003910848870873451,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.48295454545454547,
|
|
"grad_norm": 19.652707095228752,
|
|
"learning_rate": 8.412698412698413e-08,
|
|
"logits/chosen": -1.397512674331665,
|
|
"logits/rejected": -1.4295556545257568,
|
|
"logps/chosen": -350.259033203125,
|
|
"logps/rejected": -154.9229278564453,
|
|
"loss": 0.5959,
|
|
"rewards/accuracies": 0.73333340883255,
|
|
"rewards/chosen": 0.2114483118057251,
|
|
"rewards/margins": 0.1936836689710617,
|
|
"rewards/rejected": 0.017764627933502197,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.49242424242424243,
|
|
"grad_norm": 18.932470131303603,
|
|
"learning_rate": 8.359788359788358e-08,
|
|
"logits/chosen": -1.3295793533325195,
|
|
"logits/rejected": -1.4178295135498047,
|
|
"logps/chosen": -228.3561248779297,
|
|
"logps/rejected": -152.6548309326172,
|
|
"loss": 0.5968,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.17612284421920776,
|
|
"rewards/margins": 0.1884087324142456,
|
|
"rewards/rejected": -0.012285866774618626,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.5018939393939394,
|
|
"grad_norm": 22.56023512380409,
|
|
"learning_rate": 8.306878306878307e-08,
|
|
"logits/chosen": -1.329941749572754,
|
|
"logits/rejected": -1.437350869178772,
|
|
"logps/chosen": -272.7892150878906,
|
|
"logps/rejected": -130.10751342773438,
|
|
"loss": 0.562,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.2890825569629669,
|
|
"rewards/margins": 0.3040001392364502,
|
|
"rewards/rejected": -0.014917601831257343,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.5113636363636364,
|
|
"grad_norm": 16.32960137441355,
|
|
"learning_rate": 8.253968253968254e-08,
|
|
"logits/chosen": -1.4222466945648193,
|
|
"logits/rejected": -1.6047979593276978,
|
|
"logps/chosen": -327.20196533203125,
|
|
"logps/rejected": -94.08998107910156,
|
|
"loss": 0.5619,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.3207431137561798,
|
|
"rewards/margins": 0.362914502620697,
|
|
"rewards/rejected": -0.0421714186668396,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.5208333333333334,
|
|
"grad_norm": 16.970648675586034,
|
|
"learning_rate": 8.201058201058201e-08,
|
|
"logits/chosen": -1.4205853939056396,
|
|
"logits/rejected": -1.4251158237457275,
|
|
"logps/chosen": -283.05108642578125,
|
|
"logps/rejected": -105.49992370605469,
|
|
"loss": 0.547,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.31010201573371887,
|
|
"rewards/margins": 0.3413071930408478,
|
|
"rewards/rejected": -0.031205186620354652,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.5303030303030303,
|
|
"grad_norm": 13.112771276876163,
|
|
"learning_rate": 8.148148148148148e-08,
|
|
"logits/chosen": -1.3618199825286865,
|
|
"logits/rejected": -1.506471037864685,
|
|
"logps/chosen": -345.26434326171875,
|
|
"logps/rejected": -164.83676147460938,
|
|
"loss": 0.5754,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.41369715332984924,
|
|
"rewards/margins": 0.42479389905929565,
|
|
"rewards/rejected": -0.011096751317381859,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.5397727272727273,
|
|
"grad_norm": 15.00178283487482,
|
|
"learning_rate": 8.095238095238095e-08,
|
|
"logits/chosen": -1.3916102647781372,
|
|
"logits/rejected": -1.4226547479629517,
|
|
"logps/chosen": -266.93389892578125,
|
|
"logps/rejected": -123.35113525390625,
|
|
"loss": 0.5448,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.2940821647644043,
|
|
"rewards/margins": 0.32441002130508423,
|
|
"rewards/rejected": -0.030327826738357544,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.5492424242424242,
|
|
"grad_norm": 13.894167304668269,
|
|
"learning_rate": 8.042328042328042e-08,
|
|
"logits/chosen": -1.4024507999420166,
|
|
"logits/rejected": -1.499112844467163,
|
|
"logps/chosen": -299.7988586425781,
|
|
"logps/rejected": -212.5677032470703,
|
|
"loss": 0.5271,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.4066668450832367,
|
|
"rewards/margins": 0.4445282816886902,
|
|
"rewards/rejected": -0.03786151856184006,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.5587121212121212,
|
|
"grad_norm": 13.686152249765941,
|
|
"learning_rate": 7.989417989417989e-08,
|
|
"logits/chosen": -1.4456459283828735,
|
|
"logits/rejected": -1.4960581064224243,
|
|
"logps/chosen": -218.0140380859375,
|
|
"logps/rejected": -91.45022583007812,
|
|
"loss": 0.5359,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.23951879143714905,
|
|
"rewards/margins": 0.2641497552394867,
|
|
"rewards/rejected": -0.02463097684085369,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.5681818181818182,
|
|
"grad_norm": 13.294977223020641,
|
|
"learning_rate": 7.936507936507936e-08,
|
|
"logits/chosen": -1.446040391921997,
|
|
"logits/rejected": -1.4068689346313477,
|
|
"logps/chosen": -271.784912109375,
|
|
"logps/rejected": -104.23944091796875,
|
|
"loss": 0.5248,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.33832794427871704,
|
|
"rewards/margins": 0.3952670991420746,
|
|
"rewards/rejected": -0.05693921446800232,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.5776515151515151,
|
|
"grad_norm": 14.213258268574368,
|
|
"learning_rate": 7.883597883597883e-08,
|
|
"logits/chosen": -1.525424599647522,
|
|
"logits/rejected": -1.5090603828430176,
|
|
"logps/chosen": -307.25115966796875,
|
|
"logps/rejected": -123.07181549072266,
|
|
"loss": 0.5072,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.2689701318740845,
|
|
"rewards/margins": 0.3598218560218811,
|
|
"rewards/rejected": -0.09085171669721603,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.5871212121212122,
|
|
"grad_norm": 11.942720090111948,
|
|
"learning_rate": 7.83068783068783e-08,
|
|
"logits/chosen": -1.341205358505249,
|
|
"logits/rejected": -1.4334052801132202,
|
|
"logps/chosen": -235.2104034423828,
|
|
"logps/rejected": -95.51054382324219,
|
|
"loss": 0.512,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.2976926863193512,
|
|
"rewards/margins": 0.3563759922981262,
|
|
"rewards/rejected": -0.05868334323167801,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.5965909090909091,
|
|
"grad_norm": 14.098400355821441,
|
|
"learning_rate": 7.777777777777778e-08,
|
|
"logits/chosen": -1.5624217987060547,
|
|
"logits/rejected": -1.550048828125,
|
|
"logps/chosen": -395.7930603027344,
|
|
"logps/rejected": -119.40779113769531,
|
|
"loss": 0.487,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.40899619460105896,
|
|
"rewards/margins": 0.47930893301963806,
|
|
"rewards/rejected": -0.07031276077032089,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 11.1041564672598,
|
|
"learning_rate": 7.724867724867724e-08,
|
|
"logits/chosen": -1.4298704862594604,
|
|
"logits/rejected": -1.354206919670105,
|
|
"logps/chosen": -312.83697509765625,
|
|
"logps/rejected": -110.90681457519531,
|
|
"loss": 0.5224,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.32718759775161743,
|
|
"rewards/margins": 0.3963702321052551,
|
|
"rewards/rejected": -0.06918264925479889,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.615530303030303,
|
|
"grad_norm": 13.357200204611026,
|
|
"learning_rate": 7.671957671957672e-08,
|
|
"logits/chosen": -1.423771619796753,
|
|
"logits/rejected": -1.495290756225586,
|
|
"logps/chosen": -341.0536193847656,
|
|
"logps/rejected": -195.5428466796875,
|
|
"loss": 0.4798,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.47605428099632263,
|
|
"rewards/margins": 0.5310575366020203,
|
|
"rewards/rejected": -0.055003322660923004,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 13.546486727992278,
|
|
"learning_rate": 7.619047619047618e-08,
|
|
"logits/chosen": -1.5028284788131714,
|
|
"logits/rejected": -1.4564129114151,
|
|
"logps/chosen": -285.58917236328125,
|
|
"logps/rejected": -131.5936279296875,
|
|
"loss": 0.4836,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.36600998044013977,
|
|
"rewards/margins": 0.4425935745239258,
|
|
"rewards/rejected": -0.0765836089849472,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.634469696969697,
|
|
"grad_norm": 12.596961700455271,
|
|
"learning_rate": 7.566137566137566e-08,
|
|
"logits/chosen": -1.4141170978546143,
|
|
"logits/rejected": -1.4340078830718994,
|
|
"logps/chosen": -261.4342041015625,
|
|
"logps/rejected": -95.14923858642578,
|
|
"loss": 0.4883,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.44839000701904297,
|
|
"rewards/margins": 0.5747641324996948,
|
|
"rewards/rejected": -0.12637418508529663,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.6439393939393939,
|
|
"grad_norm": 12.384595380406164,
|
|
"learning_rate": 7.513227513227512e-08,
|
|
"logits/chosen": -1.4482860565185547,
|
|
"logits/rejected": -1.5673211812973022,
|
|
"logps/chosen": -237.4990234375,
|
|
"logps/rejected": -84.68649291992188,
|
|
"loss": 0.5,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.37629473209381104,
|
|
"rewards/margins": 0.42733898758888245,
|
|
"rewards/rejected": -0.05104423686861992,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.6534090909090909,
|
|
"grad_norm": 11.573197865338908,
|
|
"learning_rate": 7.46031746031746e-08,
|
|
"logits/chosen": -1.4828598499298096,
|
|
"logits/rejected": -1.5483115911483765,
|
|
"logps/chosen": -294.4015197753906,
|
|
"logps/rejected": -105.67552185058594,
|
|
"loss": 0.4914,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.42700010538101196,
|
|
"rewards/margins": 0.5032913684844971,
|
|
"rewards/rejected": -0.07629130035638809,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.6628787878787878,
|
|
"grad_norm": 11.526501769285554,
|
|
"learning_rate": 7.407407407407407e-08,
|
|
"logits/chosen": -1.3778324127197266,
|
|
"logits/rejected": -1.3970375061035156,
|
|
"logps/chosen": -356.0700988769531,
|
|
"logps/rejected": -174.7995147705078,
|
|
"loss": 0.4855,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.5114725232124329,
|
|
"rewards/margins": 0.5073350071907043,
|
|
"rewards/rejected": 0.004137503914535046,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.6723484848484849,
|
|
"grad_norm": 11.786777529592367,
|
|
"learning_rate": 7.354497354497354e-08,
|
|
"logits/chosen": -1.4470304250717163,
|
|
"logits/rejected": -1.5260601043701172,
|
|
"logps/chosen": -257.9070739746094,
|
|
"logps/rejected": -82.23036193847656,
|
|
"loss": 0.5138,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.3575395941734314,
|
|
"rewards/margins": 0.4714414179325104,
|
|
"rewards/rejected": -0.11390187591314316,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.6818181818181818,
|
|
"grad_norm": 13.911478948655224,
|
|
"learning_rate": 7.301587301587301e-08,
|
|
"logits/chosen": -1.4967644214630127,
|
|
"logits/rejected": -1.379651427268982,
|
|
"logps/chosen": -456.3885803222656,
|
|
"logps/rejected": -181.92654418945312,
|
|
"loss": 0.4686,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6715552806854248,
|
|
"rewards/margins": 0.8583688735961914,
|
|
"rewards/rejected": -0.18681350350379944,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.6912878787878788,
|
|
"grad_norm": 11.631616954008805,
|
|
"learning_rate": 7.248677248677248e-08,
|
|
"logits/chosen": -1.4026285409927368,
|
|
"logits/rejected": -1.4543156623840332,
|
|
"logps/chosen": -337.62078857421875,
|
|
"logps/rejected": -139.98934936523438,
|
|
"loss": 0.5001,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.5019295811653137,
|
|
"rewards/margins": 0.5479447245597839,
|
|
"rewards/rejected": -0.04601515084505081,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.7007575757575758,
|
|
"grad_norm": 9.936203845833878,
|
|
"learning_rate": 7.195767195767195e-08,
|
|
"logits/chosen": -1.5050134658813477,
|
|
"logits/rejected": -1.5326780080795288,
|
|
"logps/chosen": -387.0226135253906,
|
|
"logps/rejected": -181.61231994628906,
|
|
"loss": 0.5285,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.47982874512672424,
|
|
"rewards/margins": 0.5457210540771484,
|
|
"rewards/rejected": -0.0658923089504242,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.7102272727272727,
|
|
"grad_norm": 12.184941282416535,
|
|
"learning_rate": 7.142857142857142e-08,
|
|
"logits/chosen": -1.4602888822555542,
|
|
"logits/rejected": -1.4935940504074097,
|
|
"logps/chosen": -471.65643310546875,
|
|
"logps/rejected": -181.0192108154297,
|
|
"loss": 0.4831,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.696118175983429,
|
|
"rewards/margins": 0.8435744047164917,
|
|
"rewards/rejected": -0.1474563181400299,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.7196969696969697,
|
|
"grad_norm": 11.235163839695879,
|
|
"learning_rate": 7.08994708994709e-08,
|
|
"logits/chosen": -1.5082679986953735,
|
|
"logits/rejected": -1.5974117517471313,
|
|
"logps/chosen": -399.4970703125,
|
|
"logps/rejected": -201.58975219726562,
|
|
"loss": 0.4768,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6427886486053467,
|
|
"rewards/margins": 0.7292692065238953,
|
|
"rewards/rejected": -0.08648059517145157,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.7291666666666666,
|
|
"grad_norm": 12.9574754516344,
|
|
"learning_rate": 7.037037037037036e-08,
|
|
"logits/chosen": -1.4920450448989868,
|
|
"logits/rejected": -1.4171216487884521,
|
|
"logps/chosen": -332.895263671875,
|
|
"logps/rejected": -176.00601196289062,
|
|
"loss": 0.4783,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.4683101773262024,
|
|
"rewards/margins": 0.5503407716751099,
|
|
"rewards/rejected": -0.08203061670064926,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.7386363636363636,
|
|
"grad_norm": 11.558597299124692,
|
|
"learning_rate": 6.984126984126983e-08,
|
|
"logits/chosen": -1.4621433019638062,
|
|
"logits/rejected": -1.5110585689544678,
|
|
"logps/chosen": -361.80889892578125,
|
|
"logps/rejected": -157.5341339111328,
|
|
"loss": 0.4715,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.555467963218689,
|
|
"rewards/margins": 0.682567834854126,
|
|
"rewards/rejected": -0.12709984183311462,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.7481060606060606,
|
|
"grad_norm": 13.548458950264191,
|
|
"learning_rate": 6.931216931216932e-08,
|
|
"logits/chosen": -1.523284673690796,
|
|
"logits/rejected": -1.4904415607452393,
|
|
"logps/chosen": -309.17816162109375,
|
|
"logps/rejected": -115.497802734375,
|
|
"loss": 0.493,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.4258233904838562,
|
|
"rewards/margins": 0.4799923896789551,
|
|
"rewards/rejected": -0.05416899174451828,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 10.624056172874939,
|
|
"learning_rate": 6.878306878306877e-08,
|
|
"logits/chosen": -1.4721736907958984,
|
|
"logits/rejected": -1.556168794631958,
|
|
"logps/chosen": -349.3233337402344,
|
|
"logps/rejected": -119.07454681396484,
|
|
"loss": 0.4971,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.4824630320072174,
|
|
"rewards/margins": 0.6270266771316528,
|
|
"rewards/rejected": -0.1445636749267578,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.7670454545454546,
|
|
"grad_norm": 10.877663952011163,
|
|
"learning_rate": 6.825396825396826e-08,
|
|
"logits/chosen": -1.448540210723877,
|
|
"logits/rejected": -1.5003968477249146,
|
|
"logps/chosen": -341.19549560546875,
|
|
"logps/rejected": -161.4232635498047,
|
|
"loss": 0.4997,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.512890636920929,
|
|
"rewards/margins": 0.5583428144454956,
|
|
"rewards/rejected": -0.04545217379927635,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.7765151515151515,
|
|
"grad_norm": 13.655792524642862,
|
|
"learning_rate": 6.772486772486771e-08,
|
|
"logits/chosen": -1.4932104349136353,
|
|
"logits/rejected": -1.4824309349060059,
|
|
"logps/chosen": -377.6290588378906,
|
|
"logps/rejected": -145.4966583251953,
|
|
"loss": 0.4506,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5544673204421997,
|
|
"rewards/margins": 0.7561152577400208,
|
|
"rewards/rejected": -0.20164790749549866,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.7859848484848485,
|
|
"grad_norm": 13.094215069002113,
|
|
"learning_rate": 6.71957671957672e-08,
|
|
"logits/chosen": -1.5782554149627686,
|
|
"logits/rejected": -1.660269021987915,
|
|
"logps/chosen": -349.7228088378906,
|
|
"logps/rejected": -182.11880493164062,
|
|
"loss": 0.4818,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.6241387128829956,
|
|
"rewards/margins": 0.7656179666519165,
|
|
"rewards/rejected": -0.1414792239665985,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.7954545454545454,
|
|
"grad_norm": 12.779977757002404,
|
|
"learning_rate": 6.666666666666665e-08,
|
|
"logits/chosen": -1.5646089315414429,
|
|
"logits/rejected": -1.5739343166351318,
|
|
"logps/chosen": -311.35760498046875,
|
|
"logps/rejected": -131.74862670898438,
|
|
"loss": 0.4655,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.5058180093765259,
|
|
"rewards/margins": 0.6643168926239014,
|
|
"rewards/rejected": -0.1584988683462143,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.8049242424242424,
|
|
"grad_norm": 10.185073249389749,
|
|
"learning_rate": 6.613756613756614e-08,
|
|
"logits/chosen": -1.4740874767303467,
|
|
"logits/rejected": -1.4708284139633179,
|
|
"logps/chosen": -319.00286865234375,
|
|
"logps/rejected": -152.86892700195312,
|
|
"loss": 0.4822,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5287100076675415,
|
|
"rewards/margins": 0.6535229682922363,
|
|
"rewards/rejected": -0.1248130053281784,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.8143939393939394,
|
|
"grad_norm": 11.203293253753769,
|
|
"learning_rate": 6.560846560846561e-08,
|
|
"logits/chosen": -1.5367662906646729,
|
|
"logits/rejected": -1.4478954076766968,
|
|
"logps/chosen": -201.14149475097656,
|
|
"logps/rejected": -86.86661529541016,
|
|
"loss": 0.4671,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.37096017599105835,
|
|
"rewards/margins": 0.45440879464149475,
|
|
"rewards/rejected": -0.0834486186504364,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.8238636363636364,
|
|
"grad_norm": 10.604116644121472,
|
|
"learning_rate": 6.507936507936508e-08,
|
|
"logits/chosen": -1.4273946285247803,
|
|
"logits/rejected": -1.4703240394592285,
|
|
"logps/chosen": -301.97882080078125,
|
|
"logps/rejected": -157.4232635498047,
|
|
"loss": 0.4514,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.624559223651886,
|
|
"rewards/margins": 0.7758625745773315,
|
|
"rewards/rejected": -0.15130344033241272,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 9.972392236357932,
|
|
"learning_rate": 6.455026455026455e-08,
|
|
"logits/chosen": -1.482578158378601,
|
|
"logits/rejected": -1.4413843154907227,
|
|
"logps/chosen": -254.91702270507812,
|
|
"logps/rejected": -123.97944641113281,
|
|
"loss": 0.4721,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.48685789108276367,
|
|
"rewards/margins": 0.6410495638847351,
|
|
"rewards/rejected": -0.1541917622089386,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.8428030303030303,
|
|
"grad_norm": 10.523325958099775,
|
|
"learning_rate": 6.402116402116402e-08,
|
|
"logits/chosen": -1.480163335800171,
|
|
"logits/rejected": -1.5355768203735352,
|
|
"logps/chosen": -304.09197998046875,
|
|
"logps/rejected": -142.2599639892578,
|
|
"loss": 0.4744,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.5564759969711304,
|
|
"rewards/margins": 0.6676918864250183,
|
|
"rewards/rejected": -0.11121580749750137,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.8522727272727273,
|
|
"grad_norm": 9.519106187436845,
|
|
"learning_rate": 6.349206349206349e-08,
|
|
"logits/chosen": -1.5268163681030273,
|
|
"logits/rejected": -1.5646402835845947,
|
|
"logps/chosen": -286.10540771484375,
|
|
"logps/rejected": -157.7232208251953,
|
|
"loss": 0.4714,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.4824487268924713,
|
|
"rewards/margins": 0.5735751986503601,
|
|
"rewards/rejected": -0.09112647920846939,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.8617424242424242,
|
|
"grad_norm": 9.51590024478788,
|
|
"learning_rate": 6.296296296296296e-08,
|
|
"logits/chosen": -1.5810682773590088,
|
|
"logits/rejected": -1.561959981918335,
|
|
"logps/chosen": -292.758056640625,
|
|
"logps/rejected": -114.890625,
|
|
"loss": 0.5022,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.47544798254966736,
|
|
"rewards/margins": 0.5433242321014404,
|
|
"rewards/rejected": -0.06787623465061188,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.8712121212121212,
|
|
"grad_norm": 9.702200423985945,
|
|
"learning_rate": 6.243386243386243e-08,
|
|
"logits/chosen": -1.4159653186798096,
|
|
"logits/rejected": -1.5273025035858154,
|
|
"logps/chosen": -310.64093017578125,
|
|
"logps/rejected": -160.01473999023438,
|
|
"loss": 0.4457,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.5551848411560059,
|
|
"rewards/margins": 0.6525019407272339,
|
|
"rewards/rejected": -0.097317174077034,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.8806818181818182,
|
|
"grad_norm": 12.532628428602317,
|
|
"learning_rate": 6.19047619047619e-08,
|
|
"logits/chosen": -1.4812260866165161,
|
|
"logits/rejected": -1.5698903799057007,
|
|
"logps/chosen": -313.69659423828125,
|
|
"logps/rejected": -125.74027252197266,
|
|
"loss": 0.4221,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.512529730796814,
|
|
"rewards/margins": 0.7795059084892273,
|
|
"rewards/rejected": -0.26697611808776855,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.8901515151515151,
|
|
"grad_norm": 9.937234934609606,
|
|
"learning_rate": 6.137566137566137e-08,
|
|
"logits/chosen": -1.4872493743896484,
|
|
"logits/rejected": -1.4849263429641724,
|
|
"logps/chosen": -306.2376708984375,
|
|
"logps/rejected": -151.22828674316406,
|
|
"loss": 0.4843,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.49157848954200745,
|
|
"rewards/margins": 0.6916278004646301,
|
|
"rewards/rejected": -0.2000492364168167,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.8996212121212122,
|
|
"grad_norm": 10.966223203578922,
|
|
"learning_rate": 6.084656084656084e-08,
|
|
"logits/chosen": -1.5097806453704834,
|
|
"logits/rejected": -1.548575520515442,
|
|
"logps/chosen": -248.2743682861328,
|
|
"logps/rejected": -143.56008911132812,
|
|
"loss": 0.4619,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.29616254568099976,
|
|
"rewards/margins": 0.44729262590408325,
|
|
"rewards/rejected": -0.1511300802230835,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 10.290146618878584,
|
|
"learning_rate": 6.031746031746031e-08,
|
|
"logits/chosen": -1.3948135375976562,
|
|
"logits/rejected": -1.4724781513214111,
|
|
"logps/chosen": -200.97560119628906,
|
|
"logps/rejected": -59.250823974609375,
|
|
"loss": 0.4559,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.31829187273979187,
|
|
"rewards/margins": 0.4826883375644684,
|
|
"rewards/rejected": -0.1643964797258377,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.9185606060606061,
|
|
"grad_norm": 10.023113393783985,
|
|
"learning_rate": 5.978835978835979e-08,
|
|
"logits/chosen": -1.4924052953720093,
|
|
"logits/rejected": -1.481069803237915,
|
|
"logps/chosen": -210.6460418701172,
|
|
"logps/rejected": -101.57307434082031,
|
|
"loss": 0.4809,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.3884206712245941,
|
|
"rewards/margins": 0.5861561298370361,
|
|
"rewards/rejected": -0.19773544371128082,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.928030303030303,
|
|
"grad_norm": 9.340197405706663,
|
|
"learning_rate": 5.925925925925925e-08,
|
|
"logits/chosen": -1.5180368423461914,
|
|
"logits/rejected": -1.4653794765472412,
|
|
"logps/chosen": -321.68426513671875,
|
|
"logps/rejected": -144.31082153320312,
|
|
"loss": 0.4381,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.4283772110939026,
|
|
"rewards/margins": 0.8070729970932007,
|
|
"rewards/rejected": -0.3786957859992981,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 9.85547123624337,
|
|
"learning_rate": 5.873015873015873e-08,
|
|
"logits/chosen": -1.5044419765472412,
|
|
"logits/rejected": -1.5305202007293701,
|
|
"logps/chosen": -289.38946533203125,
|
|
"logps/rejected": -109.14179992675781,
|
|
"loss": 0.4682,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.4926362931728363,
|
|
"rewards/margins": 0.6987640261650085,
|
|
"rewards/rejected": -0.20612768828868866,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.946969696969697,
|
|
"grad_norm": 10.445428542153785,
|
|
"learning_rate": 5.82010582010582e-08,
|
|
"logits/chosen": -1.4615473747253418,
|
|
"logits/rejected": -1.507441520690918,
|
|
"logps/chosen": -233.7719268798828,
|
|
"logps/rejected": -87.18647766113281,
|
|
"loss": 0.4687,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.3351677358150482,
|
|
"rewards/margins": 0.5392307043075562,
|
|
"rewards/rejected": -0.20406293869018555,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.9564393939393939,
|
|
"grad_norm": 10.068872305516752,
|
|
"learning_rate": 5.7671957671957674e-08,
|
|
"logits/chosen": -1.4435585737228394,
|
|
"logits/rejected": -1.592414379119873,
|
|
"logps/chosen": -288.0964660644531,
|
|
"logps/rejected": -245.06857299804688,
|
|
"loss": 0.4671,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.519100546836853,
|
|
"rewards/margins": 0.7276883721351624,
|
|
"rewards/rejected": -0.20858784019947052,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.9659090909090909,
|
|
"grad_norm": 9.94558673339714,
|
|
"learning_rate": 5.714285714285714e-08,
|
|
"logits/chosen": -1.5683315992355347,
|
|
"logits/rejected": -1.5737329721450806,
|
|
"logps/chosen": -380.98223876953125,
|
|
"logps/rejected": -148.6276092529297,
|
|
"loss": 0.4173,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6376638412475586,
|
|
"rewards/margins": 0.9165285229682922,
|
|
"rewards/rejected": -0.2788645625114441,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.9753787878787878,
|
|
"grad_norm": 9.63450569739456,
|
|
"learning_rate": 5.6613756613756614e-08,
|
|
"logits/chosen": -1.5396722555160522,
|
|
"logits/rejected": -1.5394847393035889,
|
|
"logps/chosen": -313.8475341796875,
|
|
"logps/rejected": -103.21199035644531,
|
|
"loss": 0.4177,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.6744638681411743,
|
|
"rewards/margins": 0.8535541296005249,
|
|
"rewards/rejected": -0.17909026145935059,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.9848484848484849,
|
|
"grad_norm": 10.940866953801127,
|
|
"learning_rate": 5.608465608465608e-08,
|
|
"logits/chosen": -1.5241594314575195,
|
|
"logits/rejected": -1.637131690979004,
|
|
"logps/chosen": -340.30072021484375,
|
|
"logps/rejected": -126.37461853027344,
|
|
"loss": 0.4387,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5279501080513,
|
|
"rewards/margins": 0.7279192805290222,
|
|
"rewards/rejected": -0.1999691277742386,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.9943181818181818,
|
|
"grad_norm": 9.396057853660293,
|
|
"learning_rate": 5.5555555555555555e-08,
|
|
"logits/chosen": -1.4265496730804443,
|
|
"logits/rejected": -1.5132142305374146,
|
|
"logps/chosen": -269.0768127441406,
|
|
"logps/rejected": -128.25860595703125,
|
|
"loss": 0.4426,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.511640727519989,
|
|
"rewards/margins": 0.7144440412521362,
|
|
"rewards/rejected": -0.20280325412750244,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 1.003787878787879,
|
|
"grad_norm": 7.831685341832891,
|
|
"learning_rate": 5.502645502645502e-08,
|
|
"logits/chosen": -1.5368983745574951,
|
|
"logits/rejected": -1.4743095636367798,
|
|
"logps/chosen": -280.42279052734375,
|
|
"logps/rejected": -121.66993713378906,
|
|
"loss": 0.4581,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.5917797684669495,
|
|
"rewards/margins": 0.7320090532302856,
|
|
"rewards/rejected": -0.14022931456565857,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 1.0132575757575757,
|
|
"grad_norm": 8.940391987752884,
|
|
"learning_rate": 5.4497354497354495e-08,
|
|
"logits/chosen": -1.491202712059021,
|
|
"logits/rejected": -1.5914320945739746,
|
|
"logps/chosen": -321.1029052734375,
|
|
"logps/rejected": -147.7200164794922,
|
|
"loss": 0.4276,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.5120298862457275,
|
|
"rewards/margins": 0.6171775460243225,
|
|
"rewards/rejected": -0.10514764487743378,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 1.0227272727272727,
|
|
"grad_norm": 7.697241759390277,
|
|
"learning_rate": 5.3968253968253965e-08,
|
|
"logits/chosen": -1.5066335201263428,
|
|
"logits/rejected": -1.528194546699524,
|
|
"logps/chosen": -292.2257385253906,
|
|
"logps/rejected": -174.4923553466797,
|
|
"loss": 0.4566,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5518472790718079,
|
|
"rewards/margins": 0.8220493197441101,
|
|
"rewards/rejected": -0.270202100276947,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 1.0321969696969697,
|
|
"grad_norm": 7.9963617328841226,
|
|
"learning_rate": 5.343915343915344e-08,
|
|
"logits/chosen": -1.5413943529129028,
|
|
"logits/rejected": -1.6188589334487915,
|
|
"logps/chosen": -312.98065185546875,
|
|
"logps/rejected": -115.0594253540039,
|
|
"loss": 0.4259,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.5731326341629028,
|
|
"rewards/margins": 0.9009225964546204,
|
|
"rewards/rejected": -0.32778996229171753,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 1.0416666666666667,
|
|
"grad_norm": 9.995827902761533,
|
|
"learning_rate": 5.2910052910052905e-08,
|
|
"logits/chosen": -1.6093279123306274,
|
|
"logits/rejected": -1.713158369064331,
|
|
"logps/chosen": -373.02288818359375,
|
|
"logps/rejected": -202.49237060546875,
|
|
"loss": 0.4242,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.4924096167087555,
|
|
"rewards/margins": 0.7550744414329529,
|
|
"rewards/rejected": -0.2626648545265198,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 1.0511363636363635,
|
|
"grad_norm": 6.7346694181058,
|
|
"learning_rate": 5.238095238095238e-08,
|
|
"logits/chosen": -1.571112871170044,
|
|
"logits/rejected": -1.5796878337860107,
|
|
"logps/chosen": -312.8564453125,
|
|
"logps/rejected": -136.47171020507812,
|
|
"loss": 0.4814,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5543137788772583,
|
|
"rewards/margins": 0.8626272082328796,
|
|
"rewards/rejected": -0.30831339955329895,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 1.0606060606060606,
|
|
"grad_norm": 6.9001053561660015,
|
|
"learning_rate": 5.1851851851851846e-08,
|
|
"logits/chosen": -1.5522701740264893,
|
|
"logits/rejected": -1.6721560955047607,
|
|
"logps/chosen": -323.85064697265625,
|
|
"logps/rejected": -118.36842346191406,
|
|
"loss": 0.4539,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6884499192237854,
|
|
"rewards/margins": 0.9783046841621399,
|
|
"rewards/rejected": -0.28985467553138733,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 1.0700757575757576,
|
|
"grad_norm": 8.261082045277393,
|
|
"learning_rate": 5.132275132275132e-08,
|
|
"logits/chosen": -1.5419814586639404,
|
|
"logits/rejected": -1.6027164459228516,
|
|
"logps/chosen": -262.14154052734375,
|
|
"logps/rejected": -85.59474182128906,
|
|
"loss": 0.42,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.4974077641963959,
|
|
"rewards/margins": 0.7110608220100403,
|
|
"rewards/rejected": -0.21365304291248322,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 1.0795454545454546,
|
|
"grad_norm": 7.731297247151736,
|
|
"learning_rate": 5.0793650793650786e-08,
|
|
"logits/chosen": -1.5612366199493408,
|
|
"logits/rejected": -1.6464793682098389,
|
|
"logps/chosen": -212.521484375,
|
|
"logps/rejected": -63.86298751831055,
|
|
"loss": 0.408,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.43088382482528687,
|
|
"rewards/margins": 0.6654319167137146,
|
|
"rewards/rejected": -0.23454804718494415,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 1.0890151515151516,
|
|
"grad_norm": 8.268687229033572,
|
|
"learning_rate": 5.026455026455026e-08,
|
|
"logits/chosen": -1.4982279539108276,
|
|
"logits/rejected": -1.5205714702606201,
|
|
"logps/chosen": -284.63787841796875,
|
|
"logps/rejected": -165.73988342285156,
|
|
"loss": 0.4353,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.5018287301063538,
|
|
"rewards/margins": 0.7900735139846802,
|
|
"rewards/rejected": -0.2882448136806488,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 1.0984848484848484,
|
|
"grad_norm": 7.675182494137689,
|
|
"learning_rate": 4.973544973544973e-08,
|
|
"logits/chosen": -1.3966763019561768,
|
|
"logits/rejected": -1.5771713256835938,
|
|
"logps/chosen": -306.68499755859375,
|
|
"logps/rejected": -174.55003356933594,
|
|
"loss": 0.3851,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.6541851162910461,
|
|
"rewards/margins": 0.9976884722709656,
|
|
"rewards/rejected": -0.34350335597991943,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 1.1079545454545454,
|
|
"grad_norm": 7.288879950855222,
|
|
"learning_rate": 4.92063492063492e-08,
|
|
"logits/chosen": -1.6342532634735107,
|
|
"logits/rejected": -1.7281395196914673,
|
|
"logps/chosen": -312.36895751953125,
|
|
"logps/rejected": -198.22055053710938,
|
|
"loss": 0.4289,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6851889491081238,
|
|
"rewards/margins": 0.873565673828125,
|
|
"rewards/rejected": -0.18837669491767883,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 1.1174242424242424,
|
|
"grad_norm": 7.471188637782996,
|
|
"learning_rate": 4.867724867724867e-08,
|
|
"logits/chosen": -1.5435848236083984,
|
|
"logits/rejected": -1.5624885559082031,
|
|
"logps/chosen": -276.4817810058594,
|
|
"logps/rejected": -140.15501403808594,
|
|
"loss": 0.4277,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.4910324513912201,
|
|
"rewards/margins": 0.6622015237808228,
|
|
"rewards/rejected": -0.17116901278495789,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 1.1268939393939394,
|
|
"grad_norm": 6.073468496072038,
|
|
"learning_rate": 4.814814814814814e-08,
|
|
"logits/chosen": -1.549534559249878,
|
|
"logits/rejected": -1.7240833044052124,
|
|
"logps/chosen": -237.54714965820312,
|
|
"logps/rejected": -110.4820327758789,
|
|
"loss": 0.4246,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.43428367376327515,
|
|
"rewards/margins": 0.675010085105896,
|
|
"rewards/rejected": -0.2407263219356537,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 1.1363636363636362,
|
|
"grad_norm": 6.229810704784674,
|
|
"learning_rate": 4.7619047619047613e-08,
|
|
"logits/chosen": -1.5288211107254028,
|
|
"logits/rejected": -1.6095609664916992,
|
|
"logps/chosen": -300.1777038574219,
|
|
"logps/rejected": -178.78512573242188,
|
|
"loss": 0.4332,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.45809292793273926,
|
|
"rewards/margins": 0.775213360786438,
|
|
"rewards/rejected": -0.31712037324905396,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 1.1458333333333333,
|
|
"grad_norm": 6.153778206401315,
|
|
"learning_rate": 4.7089947089947084e-08,
|
|
"logits/chosen": -1.446118950843811,
|
|
"logits/rejected": -1.6403175592422485,
|
|
"logps/chosen": -261.62823486328125,
|
|
"logps/rejected": -110.68792724609375,
|
|
"loss": 0.4185,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5491897463798523,
|
|
"rewards/margins": 0.9256091117858887,
|
|
"rewards/rejected": -0.3764193654060364,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 1.1553030303030303,
|
|
"grad_norm": 8.460733071100409,
|
|
"learning_rate": 4.6560846560846554e-08,
|
|
"logits/chosen": -1.5496113300323486,
|
|
"logits/rejected": -1.6899559497833252,
|
|
"logps/chosen": -339.3661193847656,
|
|
"logps/rejected": -121.66202545166016,
|
|
"loss": 0.3873,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.7354398369789124,
|
|
"rewards/margins": 1.1846985816955566,
|
|
"rewards/rejected": -0.44925880432128906,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 1.1647727272727273,
|
|
"grad_norm": 7.315349675684202,
|
|
"learning_rate": 4.6031746031746024e-08,
|
|
"logits/chosen": -1.6141818761825562,
|
|
"logits/rejected": -1.6296923160552979,
|
|
"logps/chosen": -238.61257934570312,
|
|
"logps/rejected": -122.18446350097656,
|
|
"loss": 0.3669,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.37908780574798584,
|
|
"rewards/margins": 0.6342587471008301,
|
|
"rewards/rejected": -0.2551709711551666,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 1.1742424242424243,
|
|
"grad_norm": 7.280988098748099,
|
|
"learning_rate": 4.55026455026455e-08,
|
|
"logits/chosen": -1.5299112796783447,
|
|
"logits/rejected": -1.5701675415039062,
|
|
"logps/chosen": -316.45538330078125,
|
|
"logps/rejected": -162.38204956054688,
|
|
"loss": 0.3653,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.628962516784668,
|
|
"rewards/margins": 1.195112943649292,
|
|
"rewards/rejected": -0.566150426864624,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 1.183712121212121,
|
|
"grad_norm": 6.336179594301882,
|
|
"learning_rate": 4.497354497354497e-08,
|
|
"logits/chosen": -1.557908058166504,
|
|
"logits/rejected": -1.6704943180084229,
|
|
"logps/chosen": -325.92266845703125,
|
|
"logps/rejected": -167.68130493164062,
|
|
"loss": 0.3747,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.6076933741569519,
|
|
"rewards/margins": 0.9305548667907715,
|
|
"rewards/rejected": -0.3228614330291748,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 1.1931818181818181,
|
|
"grad_norm": 6.805130421455224,
|
|
"learning_rate": 4.444444444444444e-08,
|
|
"logits/chosen": -1.4893940687179565,
|
|
"logits/rejected": -1.5416975021362305,
|
|
"logps/chosen": -164.71548461914062,
|
|
"logps/rejected": -102.4400634765625,
|
|
"loss": 0.3971,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.2525160014629364,
|
|
"rewards/margins": 0.4948664605617523,
|
|
"rewards/rejected": -0.2423505038022995,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 1.2026515151515151,
|
|
"grad_norm": 6.005160247149641,
|
|
"learning_rate": 4.391534391534391e-08,
|
|
"logits/chosen": -1.6034702062606812,
|
|
"logits/rejected": -1.7156803607940674,
|
|
"logps/chosen": -327.7132568359375,
|
|
"logps/rejected": -122.96253967285156,
|
|
"loss": 0.4048,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.611197292804718,
|
|
"rewards/margins": 0.990843653678894,
|
|
"rewards/rejected": -0.3796464204788208,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 1.2121212121212122,
|
|
"grad_norm": 6.064066327086945,
|
|
"learning_rate": 4.338624338624338e-08,
|
|
"logits/chosen": -1.4845657348632812,
|
|
"logits/rejected": -1.6025197505950928,
|
|
"logps/chosen": -266.589599609375,
|
|
"logps/rejected": -185.93374633789062,
|
|
"loss": 0.3821,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.46795588731765747,
|
|
"rewards/margins": 0.7385295033454895,
|
|
"rewards/rejected": -0.27057361602783203,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 1.2215909090909092,
|
|
"grad_norm": 7.118625992094876,
|
|
"learning_rate": 4.285714285714285e-08,
|
|
"logits/chosen": -1.5417770147323608,
|
|
"logits/rejected": -1.6401869058609009,
|
|
"logps/chosen": -364.9320983886719,
|
|
"logps/rejected": -157.976806640625,
|
|
"loss": 0.3608,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.7918987274169922,
|
|
"rewards/margins": 1.300087571144104,
|
|
"rewards/rejected": -0.5081888437271118,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 1.231060606060606,
|
|
"grad_norm": 5.890825166861846,
|
|
"learning_rate": 4.232804232804232e-08,
|
|
"logits/chosen": -1.6265586614608765,
|
|
"logits/rejected": -1.7117183208465576,
|
|
"logps/chosen": -360.13519287109375,
|
|
"logps/rejected": -177.2240753173828,
|
|
"loss": 0.4508,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.7431641221046448,
|
|
"rewards/margins": 1.2213959693908691,
|
|
"rewards/rejected": -0.47823190689086914,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 1.240530303030303,
|
|
"grad_norm": 6.0694144195552315,
|
|
"learning_rate": 4.179894179894179e-08,
|
|
"logits/chosen": -1.5653178691864014,
|
|
"logits/rejected": -1.6441690921783447,
|
|
"logps/chosen": -263.4643859863281,
|
|
"logps/rejected": -122.57698059082031,
|
|
"loss": 0.4199,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.47927695512771606,
|
|
"rewards/margins": 0.9024354219436646,
|
|
"rewards/rejected": -0.4231584668159485,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 5.640397757089755,
|
|
"learning_rate": 4.126984126984127e-08,
|
|
"logits/chosen": -1.5248239040374756,
|
|
"logits/rejected": -1.5807774066925049,
|
|
"logps/chosen": -232.2427520751953,
|
|
"logps/rejected": -182.7039794921875,
|
|
"loss": 0.4312,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.33463865518569946,
|
|
"rewards/margins": 0.5401297807693481,
|
|
"rewards/rejected": -0.20549115538597107,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 1.259469696969697,
|
|
"grad_norm": 6.188835951236042,
|
|
"learning_rate": 4.074074074074074e-08,
|
|
"logits/chosen": -1.5644571781158447,
|
|
"logits/rejected": -1.5982444286346436,
|
|
"logps/chosen": -316.8279724121094,
|
|
"logps/rejected": -114.7803726196289,
|
|
"loss": 0.3815,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6954594254493713,
|
|
"rewards/margins": 1.1970874071121216,
|
|
"rewards/rejected": -0.5016279816627502,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 1.268939393939394,
|
|
"grad_norm": 5.913640581542438,
|
|
"learning_rate": 4.021164021164021e-08,
|
|
"logits/chosen": -1.5394070148468018,
|
|
"logits/rejected": -1.6787744760513306,
|
|
"logps/chosen": -210.8607940673828,
|
|
"logps/rejected": -104.78348541259766,
|
|
"loss": 0.4341,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.422776460647583,
|
|
"rewards/margins": 0.6558708548545837,
|
|
"rewards/rejected": -0.2330944538116455,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 1.2784090909090908,
|
|
"grad_norm": 5.979621420732666,
|
|
"learning_rate": 3.968253968253968e-08,
|
|
"logits/chosen": -1.5477142333984375,
|
|
"logits/rejected": -1.659967064857483,
|
|
"logps/chosen": -297.19622802734375,
|
|
"logps/rejected": -153.1532745361328,
|
|
"loss": 0.3554,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6835674047470093,
|
|
"rewards/margins": 1.379019021987915,
|
|
"rewards/rejected": -0.695451557636261,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 1.2878787878787878,
|
|
"grad_norm": 5.932788102313539,
|
|
"learning_rate": 3.915343915343915e-08,
|
|
"logits/chosen": -1.4924932718276978,
|
|
"logits/rejected": -1.6393858194351196,
|
|
"logps/chosen": -246.09750366210938,
|
|
"logps/rejected": -120.7838363647461,
|
|
"loss": 0.3555,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.5313190221786499,
|
|
"rewards/margins": 0.9934673309326172,
|
|
"rewards/rejected": -0.4621482789516449,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 1.2973484848484849,
|
|
"grad_norm": 14.930220486136447,
|
|
"learning_rate": 3.862433862433862e-08,
|
|
"logits/chosen": -1.5214557647705078,
|
|
"logits/rejected": -1.6087700128555298,
|
|
"logps/chosen": -384.80938720703125,
|
|
"logps/rejected": -210.44235229492188,
|
|
"loss": 0.3889,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.7282799482345581,
|
|
"rewards/margins": 1.31002676486969,
|
|
"rewards/rejected": -0.5817468762397766,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 1.3068181818181819,
|
|
"grad_norm": 5.971012894528325,
|
|
"learning_rate": 3.809523809523809e-08,
|
|
"logits/chosen": -1.5881704092025757,
|
|
"logits/rejected": -1.569000005722046,
|
|
"logps/chosen": -297.71490478515625,
|
|
"logps/rejected": -158.21453857421875,
|
|
"loss": 0.3827,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.487607479095459,
|
|
"rewards/margins": 0.8952295184135437,
|
|
"rewards/rejected": -0.40762215852737427,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 1.316287878787879,
|
|
"grad_norm": 5.470550316559649,
|
|
"learning_rate": 3.756613756613756e-08,
|
|
"logits/chosen": -1.5592622756958008,
|
|
"logits/rejected": -1.6217191219329834,
|
|
"logps/chosen": -208.8889923095703,
|
|
"logps/rejected": -99.66307067871094,
|
|
"loss": 0.3742,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.47726184129714966,
|
|
"rewards/margins": 0.8086546063423157,
|
|
"rewards/rejected": -0.3313927948474884,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 1.3257575757575757,
|
|
"grad_norm": 5.53149417210968,
|
|
"learning_rate": 3.7037037037037036e-08,
|
|
"logits/chosen": -1.5759552717208862,
|
|
"logits/rejected": -1.6656872034072876,
|
|
"logps/chosen": -282.05584716796875,
|
|
"logps/rejected": -137.61387634277344,
|
|
"loss": 0.3673,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5351212024688721,
|
|
"rewards/margins": 0.9544070363044739,
|
|
"rewards/rejected": -0.41928577423095703,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 1.3352272727272727,
|
|
"grad_norm": 5.589581006217635,
|
|
"learning_rate": 3.6507936507936506e-08,
|
|
"logits/chosen": -1.6098802089691162,
|
|
"logits/rejected": -1.6987054347991943,
|
|
"logps/chosen": -248.8202362060547,
|
|
"logps/rejected": -105.59635925292969,
|
|
"loss": 0.4226,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5064533352851868,
|
|
"rewards/margins": 0.982274055480957,
|
|
"rewards/rejected": -0.47582077980041504,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 1.3446969696969697,
|
|
"grad_norm": 6.277665119781322,
|
|
"learning_rate": 3.5978835978835977e-08,
|
|
"logits/chosen": -1.5494630336761475,
|
|
"logits/rejected": -1.6693788766860962,
|
|
"logps/chosen": -351.03125,
|
|
"logps/rejected": -129.0759735107422,
|
|
"loss": 0.3673,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.7771791219711304,
|
|
"rewards/margins": 1.4887819290161133,
|
|
"rewards/rejected": -0.7116026878356934,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 1.3541666666666667,
|
|
"grad_norm": 5.8355671091327075,
|
|
"learning_rate": 3.544973544973545e-08,
|
|
"logits/chosen": -1.559160828590393,
|
|
"logits/rejected": -1.5491154193878174,
|
|
"logps/chosen": -335.740234375,
|
|
"logps/rejected": -170.25552368164062,
|
|
"loss": 0.4013,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.7102228999137878,
|
|
"rewards/margins": 1.2653253078460693,
|
|
"rewards/rejected": -0.555102527141571,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 1.3636363636363638,
|
|
"grad_norm": 5.768525584457291,
|
|
"learning_rate": 3.492063492063492e-08,
|
|
"logits/chosen": -1.6074180603027344,
|
|
"logits/rejected": -1.7357871532440186,
|
|
"logps/chosen": -264.52679443359375,
|
|
"logps/rejected": -100.31982421875,
|
|
"loss": 0.408,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.4864141345024109,
|
|
"rewards/margins": 0.7962800860404968,
|
|
"rewards/rejected": -0.30986595153808594,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 1.3731060606060606,
|
|
"grad_norm": 5.592283665576824,
|
|
"learning_rate": 3.439153439153439e-08,
|
|
"logits/chosen": -1.5794652700424194,
|
|
"logits/rejected": -1.6425234079360962,
|
|
"logps/chosen": -305.49005126953125,
|
|
"logps/rejected": -128.72915649414062,
|
|
"loss": 0.3636,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.6161352396011353,
|
|
"rewards/margins": 1.2325931787490845,
|
|
"rewards/rejected": -0.6164579391479492,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 1.3825757575757576,
|
|
"grad_norm": 5.2043920846765515,
|
|
"learning_rate": 3.386243386243386e-08,
|
|
"logits/chosen": -1.6179447174072266,
|
|
"logits/rejected": -1.7832599878311157,
|
|
"logps/chosen": -317.20703125,
|
|
"logps/rejected": -164.9311065673828,
|
|
"loss": 0.3351,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.6158957481384277,
|
|
"rewards/margins": 1.1830036640167236,
|
|
"rewards/rejected": -0.5671079158782959,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 1.3920454545454546,
|
|
"grad_norm": 5.389376859152257,
|
|
"learning_rate": 3.333333333333333e-08,
|
|
"logits/chosen": -1.5436028242111206,
|
|
"logits/rejected": -1.7525603771209717,
|
|
"logps/chosen": -217.86569213867188,
|
|
"logps/rejected": -93.09127044677734,
|
|
"loss": 0.3912,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.4181972146034241,
|
|
"rewards/margins": 0.8372113108634949,
|
|
"rewards/rejected": -0.4190141260623932,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 1.4015151515151514,
|
|
"grad_norm": 5.429011665107999,
|
|
"learning_rate": 3.2804232804232804e-08,
|
|
"logits/chosen": -1.5702577829360962,
|
|
"logits/rejected": -1.6486690044403076,
|
|
"logps/chosen": -319.3020324707031,
|
|
"logps/rejected": -182.95730590820312,
|
|
"loss": 0.3457,
|
|
"rewards/accuracies": 0.7333333492279053,
|
|
"rewards/chosen": 0.5197921395301819,
|
|
"rewards/margins": 0.8637401461601257,
|
|
"rewards/rejected": -0.34394803643226624,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 1.4109848484848486,
|
|
"grad_norm": 5.368022221479501,
|
|
"learning_rate": 3.2275132275132274e-08,
|
|
"logits/chosen": -1.4937713146209717,
|
|
"logits/rejected": -1.5651482343673706,
|
|
"logps/chosen": -234.36328125,
|
|
"logps/rejected": -148.08285522460938,
|
|
"loss": 0.3444,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.23662462830543518,
|
|
"rewards/margins": 0.6261420845985413,
|
|
"rewards/rejected": -0.3895173966884613,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 1.4204545454545454,
|
|
"grad_norm": 5.493890942440773,
|
|
"learning_rate": 3.1746031746031744e-08,
|
|
"logits/chosen": -1.6106767654418945,
|
|
"logits/rejected": -1.5224909782409668,
|
|
"logps/chosen": -302.6184997558594,
|
|
"logps/rejected": -146.08517456054688,
|
|
"loss": 0.3663,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.48458927869796753,
|
|
"rewards/margins": 0.9824308156967163,
|
|
"rewards/rejected": -0.49784159660339355,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 1.4299242424242424,
|
|
"grad_norm": 5.480765334687315,
|
|
"learning_rate": 3.1216931216931215e-08,
|
|
"logits/chosen": -1.5799682140350342,
|
|
"logits/rejected": -1.7092950344085693,
|
|
"logps/chosen": -245.9119110107422,
|
|
"logps/rejected": -150.98989868164062,
|
|
"loss": 0.4027,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.4121614396572113,
|
|
"rewards/margins": 0.8830633163452148,
|
|
"rewards/rejected": -0.47090187668800354,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 1.4393939393939394,
|
|
"grad_norm": 6.044424074410309,
|
|
"learning_rate": 3.0687830687830685e-08,
|
|
"logits/chosen": -1.5871868133544922,
|
|
"logits/rejected": -1.6946001052856445,
|
|
"logps/chosen": -346.11822509765625,
|
|
"logps/rejected": -244.7041778564453,
|
|
"loss": 0.4391,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.3353393077850342,
|
|
"rewards/margins": 0.7316913604736328,
|
|
"rewards/rejected": -0.39635199308395386,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 1.4488636363636362,
|
|
"grad_norm": 4.974140552150539,
|
|
"learning_rate": 3.0158730158730155e-08,
|
|
"logits/chosen": -1.5874955654144287,
|
|
"logits/rejected": -1.64119553565979,
|
|
"logps/chosen": -270.36883544921875,
|
|
"logps/rejected": -129.67535400390625,
|
|
"loss": 0.3404,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.7120252251625061,
|
|
"rewards/margins": 1.3487727642059326,
|
|
"rewards/rejected": -0.6367474794387817,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 1.4583333333333333,
|
|
"grad_norm": 5.350141771926167,
|
|
"learning_rate": 2.9629629629629625e-08,
|
|
"logits/chosen": -1.5650367736816406,
|
|
"logits/rejected": -1.6837724447250366,
|
|
"logps/chosen": -271.3443298339844,
|
|
"logps/rejected": -136.472412109375,
|
|
"loss": 0.3361,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.6383367776870728,
|
|
"rewards/margins": 1.251798391342163,
|
|
"rewards/rejected": -0.6134616136550903,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 1.4678030303030303,
|
|
"grad_norm": 5.429121594474915,
|
|
"learning_rate": 2.91005291005291e-08,
|
|
"logits/chosen": -1.568342924118042,
|
|
"logits/rejected": -1.6543916463851929,
|
|
"logps/chosen": -284.5910339355469,
|
|
"logps/rejected": -152.6836395263672,
|
|
"loss": 0.3293,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5576558113098145,
|
|
"rewards/margins": 0.9742603302001953,
|
|
"rewards/rejected": -0.41660451889038086,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 1.4772727272727273,
|
|
"grad_norm": 5.748560761291252,
|
|
"learning_rate": 2.857142857142857e-08,
|
|
"logits/chosen": -1.4832626581192017,
|
|
"logits/rejected": -1.566612958908081,
|
|
"logps/chosen": -323.40142822265625,
|
|
"logps/rejected": -178.55923461914062,
|
|
"loss": 0.421,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.6060717105865479,
|
|
"rewards/margins": 1.2732772827148438,
|
|
"rewards/rejected": -0.6672054529190063,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 1.4867424242424243,
|
|
"grad_norm": 5.310060511796978,
|
|
"learning_rate": 2.804232804232804e-08,
|
|
"logits/chosen": -1.5736675262451172,
|
|
"logits/rejected": -1.6186530590057373,
|
|
"logps/chosen": -316.97698974609375,
|
|
"logps/rejected": -155.43638610839844,
|
|
"loss": 0.3521,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6889209747314453,
|
|
"rewards/margins": 1.1352983713150024,
|
|
"rewards/rejected": -0.44637736678123474,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 1.496212121212121,
|
|
"grad_norm": 5.118308720530541,
|
|
"learning_rate": 2.751322751322751e-08,
|
|
"logits/chosen": -1.560807704925537,
|
|
"logits/rejected": -1.7643381357192993,
|
|
"logps/chosen": -402.16632080078125,
|
|
"logps/rejected": -132.56011962890625,
|
|
"loss": 0.3576,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.7692705392837524,
|
|
"rewards/margins": 1.3775382041931152,
|
|
"rewards/rejected": -0.6082676649093628,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 1.5056818181818183,
|
|
"grad_norm": 5.9001461393206505,
|
|
"learning_rate": 2.6984126984126982e-08,
|
|
"logits/chosen": -1.5756150484085083,
|
|
"logits/rejected": -1.6618843078613281,
|
|
"logps/chosen": -144.79986572265625,
|
|
"logps/rejected": -142.03944396972656,
|
|
"loss": 0.3529,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.27606311440467834,
|
|
"rewards/margins": 0.5494292378425598,
|
|
"rewards/rejected": -0.27336612343788147,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 1.5151515151515151,
|
|
"grad_norm": 5.919050512179857,
|
|
"learning_rate": 2.6455026455026453e-08,
|
|
"logits/chosen": -1.596430778503418,
|
|
"logits/rejected": -1.7081763744354248,
|
|
"logps/chosen": -381.9976806640625,
|
|
"logps/rejected": -168.22308349609375,
|
|
"loss": 0.3632,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.7063287496566772,
|
|
"rewards/margins": 1.4756296873092651,
|
|
"rewards/rejected": -0.7693011164665222,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 1.5246212121212122,
|
|
"grad_norm": 6.771452793232538,
|
|
"learning_rate": 2.5925925925925923e-08,
|
|
"logits/chosen": -1.6167709827423096,
|
|
"logits/rejected": -1.702599287033081,
|
|
"logps/chosen": -272.24017333984375,
|
|
"logps/rejected": -205.1815185546875,
|
|
"loss": 0.3699,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.6321600675582886,
|
|
"rewards/margins": 1.0702993869781494,
|
|
"rewards/rejected": -0.43813928961753845,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 1.5340909090909092,
|
|
"grad_norm": 5.844973840956046,
|
|
"learning_rate": 2.5396825396825393e-08,
|
|
"logits/chosen": -1.5797332525253296,
|
|
"logits/rejected": -1.6782172918319702,
|
|
"logps/chosen": -294.10540771484375,
|
|
"logps/rejected": -178.05677795410156,
|
|
"loss": 0.4349,
|
|
"rewards/accuracies": 0.8666666150093079,
|
|
"rewards/chosen": 0.4381368160247803,
|
|
"rewards/margins": 0.7244521975517273,
|
|
"rewards/rejected": -0.28631535172462463,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 1.543560606060606,
|
|
"grad_norm": 5.9799282018427355,
|
|
"learning_rate": 2.4867724867724866e-08,
|
|
"logits/chosen": -1.5741981267929077,
|
|
"logits/rejected": -1.7271267175674438,
|
|
"logps/chosen": -279.97894287109375,
|
|
"logps/rejected": -155.4124298095703,
|
|
"loss": 0.3788,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.5363057255744934,
|
|
"rewards/margins": 1.2445316314697266,
|
|
"rewards/rejected": -0.7082260251045227,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 1.553030303030303,
|
|
"grad_norm": 5.688137477051427,
|
|
"learning_rate": 2.4338624338624337e-08,
|
|
"logits/chosen": -1.5423920154571533,
|
|
"logits/rejected": -1.7703033685684204,
|
|
"logps/chosen": -318.68963623046875,
|
|
"logps/rejected": -139.80978393554688,
|
|
"loss": 0.3199,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.7159373760223389,
|
|
"rewards/margins": 1.3830876350402832,
|
|
"rewards/rejected": -0.6671503186225891,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 1.5625,
|
|
"grad_norm": 5.159199147076709,
|
|
"learning_rate": 2.3809523809523807e-08,
|
|
"logits/chosen": -1.6055580377578735,
|
|
"logits/rejected": -1.7235870361328125,
|
|
"logps/chosen": -283.23931884765625,
|
|
"logps/rejected": -170.2466278076172,
|
|
"loss": 0.3879,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5190203785896301,
|
|
"rewards/margins": 0.8587003946304321,
|
|
"rewards/rejected": -0.3396799862384796,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 1.571969696969697,
|
|
"grad_norm": 5.51593434736744,
|
|
"learning_rate": 2.3280423280423277e-08,
|
|
"logits/chosen": -1.6276752948760986,
|
|
"logits/rejected": -1.705373764038086,
|
|
"logps/chosen": -361.263671875,
|
|
"logps/rejected": -157.3756866455078,
|
|
"loss": 0.3503,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.813951313495636,
|
|
"rewards/margins": 1.6092281341552734,
|
|
"rewards/rejected": -0.7952768206596375,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 1.581439393939394,
|
|
"grad_norm": 5.105458599961924,
|
|
"learning_rate": 2.275132275132275e-08,
|
|
"logits/chosen": -1.4909486770629883,
|
|
"logits/rejected": -1.6180715560913086,
|
|
"logps/chosen": -216.1019287109375,
|
|
"logps/rejected": -78.71076965332031,
|
|
"loss": 0.3386,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.488716185092926,
|
|
"rewards/margins": 1.0591917037963867,
|
|
"rewards/rejected": -0.5704755187034607,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 1.5909090909090908,
|
|
"grad_norm": 4.489310539882785,
|
|
"learning_rate": 2.222222222222222e-08,
|
|
"logits/chosen": -1.5696783065795898,
|
|
"logits/rejected": -1.6010246276855469,
|
|
"logps/chosen": -306.27294921875,
|
|
"logps/rejected": -123.8807373046875,
|
|
"loss": 0.3769,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.7523918151855469,
|
|
"rewards/margins": 1.3328092098236084,
|
|
"rewards/rejected": -0.5804173946380615,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 1.6003787878787878,
|
|
"grad_norm": 5.400154224053731,
|
|
"learning_rate": 2.169312169312169e-08,
|
|
"logits/chosen": -1.5251656770706177,
|
|
"logits/rejected": -1.6274874210357666,
|
|
"logps/chosen": -130.54373168945312,
|
|
"logps/rejected": -54.27812576293945,
|
|
"loss": 0.4268,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.27597469091415405,
|
|
"rewards/margins": 0.5285931825637817,
|
|
"rewards/rejected": -0.2526185214519501,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 1.6098484848484849,
|
|
"grad_norm": 4.692184736415839,
|
|
"learning_rate": 2.116402116402116e-08,
|
|
"logits/chosen": -1.5292972326278687,
|
|
"logits/rejected": -1.6729373931884766,
|
|
"logps/chosen": -360.6134948730469,
|
|
"logps/rejected": -151.4227294921875,
|
|
"loss": 0.3845,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.8804464340209961,
|
|
"rewards/margins": 1.6148141622543335,
|
|
"rewards/rejected": -0.7343679666519165,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 1.6193181818181817,
|
|
"grad_norm": 5.938380627671962,
|
|
"learning_rate": 2.0634920634920634e-08,
|
|
"logits/chosen": -1.5955592393875122,
|
|
"logits/rejected": -1.7036798000335693,
|
|
"logps/chosen": -384.96380615234375,
|
|
"logps/rejected": -170.1694793701172,
|
|
"loss": 0.3205,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.8083392977714539,
|
|
"rewards/margins": 1.5802361965179443,
|
|
"rewards/rejected": -0.7718968391418457,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 1.628787878787879,
|
|
"grad_norm": 5.0095603748386415,
|
|
"learning_rate": 2.0105820105820104e-08,
|
|
"logits/chosen": -1.559670329093933,
|
|
"logits/rejected": -1.6990169286727905,
|
|
"logps/chosen": -260.5107421875,
|
|
"logps/rejected": -138.97021484375,
|
|
"loss": 0.3619,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6676410436630249,
|
|
"rewards/margins": 1.1652050018310547,
|
|
"rewards/rejected": -0.49756401777267456,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 1.6382575757575757,
|
|
"grad_norm": 5.046726473189086,
|
|
"learning_rate": 1.9576719576719575e-08,
|
|
"logits/chosen": -1.5894399881362915,
|
|
"logits/rejected": -1.711214303970337,
|
|
"logps/chosen": -308.7052307128906,
|
|
"logps/rejected": -193.82850646972656,
|
|
"loss": 0.3758,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.514888346195221,
|
|
"rewards/margins": 1.0377848148345947,
|
|
"rewards/rejected": -0.5228964686393738,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 1.6477272727272727,
|
|
"grad_norm": 10.53036659721855,
|
|
"learning_rate": 1.9047619047619045e-08,
|
|
"logits/chosen": -1.5739425420761108,
|
|
"logits/rejected": -1.5461231470108032,
|
|
"logps/chosen": -272.7663269042969,
|
|
"logps/rejected": -152.17575073242188,
|
|
"loss": 0.3848,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.457489013671875,
|
|
"rewards/margins": 1.1182512044906616,
|
|
"rewards/rejected": -0.6607621908187866,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 1.6571969696969697,
|
|
"grad_norm": 5.78981346354209,
|
|
"learning_rate": 1.8518518518518518e-08,
|
|
"logits/chosen": -1.5584112405776978,
|
|
"logits/rejected": -1.66403329372406,
|
|
"logps/chosen": -247.25527954101562,
|
|
"logps/rejected": -88.81005096435547,
|
|
"loss": 0.3545,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.38863998651504517,
|
|
"rewards/margins": 0.8395411372184753,
|
|
"rewards/rejected": -0.45090118050575256,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 5.544069471962977,
|
|
"learning_rate": 1.7989417989417988e-08,
|
|
"logits/chosen": -1.5310989618301392,
|
|
"logits/rejected": -1.6411716938018799,
|
|
"logps/chosen": -177.37261962890625,
|
|
"logps/rejected": -94.64937591552734,
|
|
"loss": 0.3993,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.40063905715942383,
|
|
"rewards/margins": 0.7527322769165039,
|
|
"rewards/rejected": -0.35209327936172485,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 1.6761363636363638,
|
|
"grad_norm": 6.870515621075784,
|
|
"learning_rate": 1.746031746031746e-08,
|
|
"logits/chosen": -1.6042630672454834,
|
|
"logits/rejected": -1.7060787677764893,
|
|
"logps/chosen": -329.9851989746094,
|
|
"logps/rejected": -170.91259765625,
|
|
"loss": 0.3258,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6290345191955566,
|
|
"rewards/margins": 1.0913975238800049,
|
|
"rewards/rejected": -0.4623629152774811,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 1.6856060606060606,
|
|
"grad_norm": 5.283044248226327,
|
|
"learning_rate": 1.693121693121693e-08,
|
|
"logits/chosen": -1.5727237462997437,
|
|
"logits/rejected": -1.670090675354004,
|
|
"logps/chosen": -242.1526336669922,
|
|
"logps/rejected": -123.24870300292969,
|
|
"loss": 0.3156,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5657342076301575,
|
|
"rewards/margins": 1.0815435647964478,
|
|
"rewards/rejected": -0.5158092379570007,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 1.6950757575757576,
|
|
"grad_norm": 4.714665015837958,
|
|
"learning_rate": 1.6402116402116402e-08,
|
|
"logits/chosen": -1.6179364919662476,
|
|
"logits/rejected": -1.6783673763275146,
|
|
"logps/chosen": -268.26666259765625,
|
|
"logps/rejected": -125.2500991821289,
|
|
"loss": 0.3028,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6312516331672668,
|
|
"rewards/margins": 1.2757152318954468,
|
|
"rewards/rejected": -0.6444636583328247,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 1.7045454545454546,
|
|
"grad_norm": 5.48800386545785,
|
|
"learning_rate": 1.5873015873015872e-08,
|
|
"logits/chosen": -1.587045431137085,
|
|
"logits/rejected": -1.5843976736068726,
|
|
"logps/chosen": -314.2275085449219,
|
|
"logps/rejected": -202.48590087890625,
|
|
"loss": 0.3407,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.4960121214389801,
|
|
"rewards/margins": 1.1439507007598877,
|
|
"rewards/rejected": -0.64793860912323,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 1.7140151515151514,
|
|
"grad_norm": 4.904790456188903,
|
|
"learning_rate": 1.5343915343915342e-08,
|
|
"logits/chosen": -1.5327612161636353,
|
|
"logits/rejected": -1.6153557300567627,
|
|
"logps/chosen": -337.2438659667969,
|
|
"logps/rejected": -185.306396484375,
|
|
"loss": 0.3592,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.5627692937850952,
|
|
"rewards/margins": 1.312888741493225,
|
|
"rewards/rejected": -0.7501195669174194,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 1.7234848484848486,
|
|
"grad_norm": 4.656789025606906,
|
|
"learning_rate": 1.4814814814814813e-08,
|
|
"logits/chosen": -1.6096194982528687,
|
|
"logits/rejected": -1.7528839111328125,
|
|
"logps/chosen": -336.3609313964844,
|
|
"logps/rejected": -170.637939453125,
|
|
"loss": 0.3605,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.5875322818756104,
|
|
"rewards/margins": 1.3081997632980347,
|
|
"rewards/rejected": -0.7206674218177795,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 1.7329545454545454,
|
|
"grad_norm": 5.444360894971359,
|
|
"learning_rate": 1.4285714285714284e-08,
|
|
"logits/chosen": -1.5957181453704834,
|
|
"logits/rejected": -1.64398992061615,
|
|
"logps/chosen": -289.0057678222656,
|
|
"logps/rejected": -167.2021484375,
|
|
"loss": 0.3833,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.6649573445320129,
|
|
"rewards/margins": 1.3411014080047607,
|
|
"rewards/rejected": -0.6761440634727478,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 1.7424242424242424,
|
|
"grad_norm": 4.598365388289265,
|
|
"learning_rate": 1.3756613756613755e-08,
|
|
"logits/chosen": -1.6330028772354126,
|
|
"logits/rejected": -1.6998701095581055,
|
|
"logps/chosen": -245.7683868408203,
|
|
"logps/rejected": -142.4471435546875,
|
|
"loss": 0.392,
|
|
"rewards/accuracies": 0.73333340883255,
|
|
"rewards/chosen": 0.36274439096450806,
|
|
"rewards/margins": 0.7017933130264282,
|
|
"rewards/rejected": -0.3390488922595978,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 1.7518939393939394,
|
|
"grad_norm": 5.234791929984342,
|
|
"learning_rate": 1.3227513227513226e-08,
|
|
"logits/chosen": -1.5748271942138672,
|
|
"logits/rejected": -1.6001355648040771,
|
|
"logps/chosen": -276.45074462890625,
|
|
"logps/rejected": -114.4822006225586,
|
|
"loss": 0.3666,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6184806227684021,
|
|
"rewards/margins": 1.1440364122390747,
|
|
"rewards/rejected": -0.5255557894706726,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 1.7613636363636362,
|
|
"grad_norm": 5.537848306393154,
|
|
"learning_rate": 1.2698412698412696e-08,
|
|
"logits/chosen": -1.5969727039337158,
|
|
"logits/rejected": -1.673282265663147,
|
|
"logps/chosen": -222.10464477539062,
|
|
"logps/rejected": -115.9425048828125,
|
|
"loss": 0.3751,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.4460936188697815,
|
|
"rewards/margins": 0.7085092663764954,
|
|
"rewards/rejected": -0.26241564750671387,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 1.7708333333333335,
|
|
"grad_norm": 4.835457356224698,
|
|
"learning_rate": 1.2169312169312168e-08,
|
|
"logits/chosen": -1.5568116903305054,
|
|
"logits/rejected": -1.554927110671997,
|
|
"logps/chosen": -256.4602966308594,
|
|
"logps/rejected": -167.30160522460938,
|
|
"loss": 0.3714,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.4909042418003082,
|
|
"rewards/margins": 1.2990154027938843,
|
|
"rewards/rejected": -0.8081111907958984,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 1.7803030303030303,
|
|
"grad_norm": 5.606816215751534,
|
|
"learning_rate": 1.1640211640211638e-08,
|
|
"logits/chosen": -1.578005313873291,
|
|
"logits/rejected": -1.5507481098175049,
|
|
"logps/chosen": -302.0408630371094,
|
|
"logps/rejected": -188.82948303222656,
|
|
"loss": 0.3905,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.4071738123893738,
|
|
"rewards/margins": 0.8548933267593384,
|
|
"rewards/rejected": -0.44771942496299744,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 1.7897727272727273,
|
|
"grad_norm": 5.104068105653885,
|
|
"learning_rate": 1.111111111111111e-08,
|
|
"logits/chosen": -1.5117594003677368,
|
|
"logits/rejected": -1.6626886129379272,
|
|
"logps/chosen": -316.0799255371094,
|
|
"logps/rejected": -152.69705200195312,
|
|
"loss": 0.3749,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.49852848052978516,
|
|
"rewards/margins": 0.9504121541976929,
|
|
"rewards/rejected": -0.45188361406326294,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 1.7992424242424243,
|
|
"grad_norm": 4.472316490828338,
|
|
"learning_rate": 1.058201058201058e-08,
|
|
"logits/chosen": -1.62430739402771,
|
|
"logits/rejected": -1.6985547542572021,
|
|
"logps/chosen": -448.1697692871094,
|
|
"logps/rejected": -224.6002655029297,
|
|
"loss": 0.3446,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.8359583616256714,
|
|
"rewards/margins": 1.992840051651001,
|
|
"rewards/rejected": -1.1568816900253296,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 1.808712121212121,
|
|
"grad_norm": 5.810880253949101,
|
|
"learning_rate": 1.0052910052910052e-08,
|
|
"logits/chosen": -1.5918437242507935,
|
|
"logits/rejected": -1.765323281288147,
|
|
"logps/chosen": -375.71429443359375,
|
|
"logps/rejected": -159.8873291015625,
|
|
"loss": 0.399,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.9120176434516907,
|
|
"rewards/margins": 1.7272794246673584,
|
|
"rewards/rejected": -0.8152618408203125,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 1.8181818181818183,
|
|
"grad_norm": 4.244845222552646,
|
|
"learning_rate": 9.523809523809522e-09,
|
|
"logits/chosen": -1.5941245555877686,
|
|
"logits/rejected": -1.6600911617279053,
|
|
"logps/chosen": -305.6556701660156,
|
|
"logps/rejected": -134.87252807617188,
|
|
"loss": 0.3493,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.5449376106262207,
|
|
"rewards/margins": 1.1537270545959473,
|
|
"rewards/rejected": -0.6087895631790161,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 1.8276515151515151,
|
|
"grad_norm": 4.738897289851384,
|
|
"learning_rate": 8.994708994708994e-09,
|
|
"logits/chosen": -1.5437757968902588,
|
|
"logits/rejected": -1.6903842687606812,
|
|
"logps/chosen": -321.27752685546875,
|
|
"logps/rejected": -126.69343566894531,
|
|
"loss": 0.3567,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6183031797409058,
|
|
"rewards/margins": 1.4520056247711182,
|
|
"rewards/rejected": -0.8337022662162781,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 1.8371212121212122,
|
|
"grad_norm": 5.490859875590591,
|
|
"learning_rate": 8.465608465608464e-09,
|
|
"logits/chosen": -1.6326143741607666,
|
|
"logits/rejected": -1.7613766193389893,
|
|
"logps/chosen": -266.3793029785156,
|
|
"logps/rejected": -130.99026489257812,
|
|
"loss": 0.4226,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5176450610160828,
|
|
"rewards/margins": 0.859043300151825,
|
|
"rewards/rejected": -0.3413982093334198,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 1.8465909090909092,
|
|
"grad_norm": 5.583102046304065,
|
|
"learning_rate": 7.936507936507936e-09,
|
|
"logits/chosen": -1.5793282985687256,
|
|
"logits/rejected": -1.789536714553833,
|
|
"logps/chosen": -386.55694580078125,
|
|
"logps/rejected": -170.34373474121094,
|
|
"loss": 0.3427,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5163098573684692,
|
|
"rewards/margins": 0.9769344329833984,
|
|
"rewards/rejected": -0.4606245458126068,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 1.856060606060606,
|
|
"grad_norm": 6.799122206347572,
|
|
"learning_rate": 7.407407407407406e-09,
|
|
"logits/chosen": -1.5655455589294434,
|
|
"logits/rejected": -1.7494919300079346,
|
|
"logps/chosen": -340.42059326171875,
|
|
"logps/rejected": -156.17343139648438,
|
|
"loss": 0.3914,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.6122487783432007,
|
|
"rewards/margins": 1.0115610361099243,
|
|
"rewards/rejected": -0.39931216835975647,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 1.865530303030303,
|
|
"grad_norm": 6.33489638516624,
|
|
"learning_rate": 6.878306878306877e-09,
|
|
"logits/chosen": -1.6181929111480713,
|
|
"logits/rejected": -1.7337757349014282,
|
|
"logps/chosen": -297.11236572265625,
|
|
"logps/rejected": -133.5535430908203,
|
|
"loss": 0.3948,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.5498533844947815,
|
|
"rewards/margins": 1.0373098850250244,
|
|
"rewards/rejected": -0.48745641112327576,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 1.875,
|
|
"grad_norm": 4.468769998980834,
|
|
"learning_rate": 6.349206349206348e-09,
|
|
"logits/chosen": -1.4640628099441528,
|
|
"logits/rejected": -1.5838810205459595,
|
|
"logps/chosen": -238.00106811523438,
|
|
"logps/rejected": -104.63729095458984,
|
|
"loss": 0.3595,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.5341485142707825,
|
|
"rewards/margins": 1.1648151874542236,
|
|
"rewards/rejected": -0.6306666135787964,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 1.884469696969697,
|
|
"grad_norm": 5.080896081269075,
|
|
"learning_rate": 5.820105820105819e-09,
|
|
"logits/chosen": -1.5636723041534424,
|
|
"logits/rejected": -1.658831238746643,
|
|
"logps/chosen": -203.7349395751953,
|
|
"logps/rejected": -110.94892883300781,
|
|
"loss": 0.3676,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.3449416756629944,
|
|
"rewards/margins": 0.7937332987785339,
|
|
"rewards/rejected": -0.44879165291786194,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 1.893939393939394,
|
|
"grad_norm": 5.280192220580751,
|
|
"learning_rate": 5.29100529100529e-09,
|
|
"logits/chosen": -1.5569162368774414,
|
|
"logits/rejected": -1.6692053079605103,
|
|
"logps/chosen": -310.94293212890625,
|
|
"logps/rejected": -180.21963500976562,
|
|
"loss": 0.4127,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.6343954801559448,
|
|
"rewards/margins": 1.3330564498901367,
|
|
"rewards/rejected": -0.6986608505249023,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 1.9034090909090908,
|
|
"grad_norm": 4.3144901310959956,
|
|
"learning_rate": 4.761904761904761e-09,
|
|
"logits/chosen": -1.5700764656066895,
|
|
"logits/rejected": -1.5398668050765991,
|
|
"logps/chosen": -123.7179946899414,
|
|
"logps/rejected": -168.89608764648438,
|
|
"loss": 0.3673,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.18818268179893494,
|
|
"rewards/margins": 0.4049772620201111,
|
|
"rewards/rejected": -0.21679461002349854,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 1.9128787878787878,
|
|
"grad_norm": 4.809421227753802,
|
|
"learning_rate": 4.232804232804232e-09,
|
|
"logits/chosen": -1.5965473651885986,
|
|
"logits/rejected": -1.7248961925506592,
|
|
"logps/chosen": -306.676025390625,
|
|
"logps/rejected": -186.1800994873047,
|
|
"loss": 0.363,
|
|
"rewards/accuracies": 0.8666666746139526,
|
|
"rewards/chosen": 0.401868999004364,
|
|
"rewards/margins": 0.8128830194473267,
|
|
"rewards/rejected": -0.41101402044296265,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 1.9223484848484849,
|
|
"grad_norm": 4.639189256023351,
|
|
"learning_rate": 3.703703703703703e-09,
|
|
"logits/chosen": -1.5983052253723145,
|
|
"logits/rejected": -1.6595712900161743,
|
|
"logps/chosen": -307.19183349609375,
|
|
"logps/rejected": -195.84725952148438,
|
|
"loss": 0.3743,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.6213973760604858,
|
|
"rewards/margins": 1.057089924812317,
|
|
"rewards/rejected": -0.4356924891471863,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 1.9318181818181817,
|
|
"grad_norm": 5.728537848527359,
|
|
"learning_rate": 3.174603174603174e-09,
|
|
"logits/chosen": -1.5698726177215576,
|
|
"logits/rejected": -1.6646344661712646,
|
|
"logps/chosen": -269.0412292480469,
|
|
"logps/rejected": -137.50003051757812,
|
|
"loss": 0.3975,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.5319095849990845,
|
|
"rewards/margins": 1.2130988836288452,
|
|
"rewards/rejected": -0.6811891794204712,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 1.941287878787879,
|
|
"grad_norm": 5.416585887060536,
|
|
"learning_rate": 2.645502645502645e-09,
|
|
"logits/chosen": -1.6159827709197998,
|
|
"logits/rejected": -1.8529045581817627,
|
|
"logps/chosen": -324.2301025390625,
|
|
"logps/rejected": -169.9268035888672,
|
|
"loss": 0.4161,
|
|
"rewards/accuracies": 0.8666666150093079,
|
|
"rewards/chosen": 0.44274115562438965,
|
|
"rewards/margins": 0.865648090839386,
|
|
"rewards/rejected": -0.4229070544242859,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 1.9507575757575757,
|
|
"grad_norm": 5.021756713871844,
|
|
"learning_rate": 2.116402116402116e-09,
|
|
"logits/chosen": -1.6490414142608643,
|
|
"logits/rejected": -1.701080083847046,
|
|
"logps/chosen": -223.4647979736328,
|
|
"logps/rejected": -112.0922622680664,
|
|
"loss": 0.4041,
|
|
"rewards/accuracies": 0.9333332777023315,
|
|
"rewards/chosen": 0.25973638892173767,
|
|
"rewards/margins": 0.5601609349250793,
|
|
"rewards/rejected": -0.3004245162010193,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 1.9602272727272727,
|
|
"grad_norm": 5.226300428110887,
|
|
"learning_rate": 1.587301587301587e-09,
|
|
"logits/chosen": -1.6259441375732422,
|
|
"logits/rejected": -1.6718814373016357,
|
|
"logps/chosen": -314.91583251953125,
|
|
"logps/rejected": -182.3265838623047,
|
|
"loss": 0.3799,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.5016330480575562,
|
|
"rewards/margins": 1.0548944473266602,
|
|
"rewards/rejected": -0.553261399269104,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 1.9696969696969697,
|
|
"grad_norm": 4.5059277056393725,
|
|
"learning_rate": 1.058201058201058e-09,
|
|
"logits/chosen": -1.6556800603866577,
|
|
"logits/rejected": -1.7395381927490234,
|
|
"logps/chosen": -346.4697570800781,
|
|
"logps/rejected": -191.90931701660156,
|
|
"loss": 0.3702,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": 0.8226908445358276,
|
|
"rewards/margins": 1.620936393737793,
|
|
"rewards/rejected": -0.7982456684112549,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 1.9791666666666665,
|
|
"grad_norm": 4.466235222468724,
|
|
"learning_rate": 5.29100529100529e-10,
|
|
"logits/chosen": -1.5888211727142334,
|
|
"logits/rejected": -1.6132078170776367,
|
|
"logps/chosen": -215.74612426757812,
|
|
"logps/rejected": -148.16468811035156,
|
|
"loss": 0.3622,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.3967617154121399,
|
|
"rewards/margins": 0.8830636143684387,
|
|
"rewards/rejected": -0.48630183935165405,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 1.9886363636363638,
|
|
"grad_norm": 4.940010839228651,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -1.6120548248291016,
|
|
"logits/rejected": -1.675747275352478,
|
|
"logps/chosen": -285.6683654785156,
|
|
"logps/rejected": -142.8597869873047,
|
|
"loss": 0.3871,
|
|
"rewards/accuracies": 0.9333333969116211,
|
|
"rewards/chosen": 0.4850710332393646,
|
|
"rewards/margins": 1.1287145614624023,
|
|
"rewards/rejected": -0.6436434984207153,
|
|
"step": 210
|
|
}
|
|
],
|
|
"logging_steps": 1.0,
|
|
"max_steps": 210,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 2,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 3,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|