2200 lines
86 KiB
JSON
2200 lines
86 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.5344378381989445,
|
|
"eval_steps": 500,
|
|
"global_step": 1000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0005344378381989445,
|
|
"grad_norm": 31.37949562072754,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.2252655029296875,
|
|
"logits/rejected": -0.192626953125,
|
|
"logps/chosen": -110.828125,
|
|
"logps/rejected": -115.515625,
|
|
"logps/weighted_chosen": -0.31903076171875,
|
|
"logps/weighted_rejected": -0.333709716796875,
|
|
"loss": 0.6914,
|
|
"rewards/accuracies": 0.0,
|
|
"rewards/chosen": 0.0,
|
|
"rewards/margins": 0.0,
|
|
"rewards/rejected": 0.0,
|
|
"rewards/weighted_accuracies": 0.0,
|
|
"rewards/weighted_chosen": 0.0,
|
|
"rewards/weighted_margins": 0.0,
|
|
"rewards/weighted_rejected": 0.0,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.005344378381989445,
|
|
"grad_norm": 75.76042938232422,
|
|
"learning_rate": 4.787234042553191e-08,
|
|
"logits/chosen": -0.3284708559513092,
|
|
"logits/rejected": -0.3214448392391205,
|
|
"logps/chosen": -134.46006774902344,
|
|
"logps/rejected": -138.06597900390625,
|
|
"logps/weighted_chosen": -0.3890923261642456,
|
|
"logps/weighted_rejected": -0.4067721962928772,
|
|
"loss": 0.6953,
|
|
"rewards/accuracies": 0.2951388955116272,
|
|
"rewards/chosen": -0.2072482705116272,
|
|
"rewards/margins": -0.1790364533662796,
|
|
"rewards/rejected": -0.0282118059694767,
|
|
"rewards/weighted_accuracies": 0.3472222089767456,
|
|
"rewards/weighted_chosen": -0.0032717387657612562,
|
|
"rewards/weighted_margins": -0.0047516291961073875,
|
|
"rewards/weighted_rejected": 0.0014813741436228156,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.01068875676397889,
|
|
"grad_norm": 24.94420623779297,
|
|
"learning_rate": 1.0106382978723404e-07,
|
|
"logits/chosen": -0.2780090272426605,
|
|
"logits/rejected": -0.2689048647880554,
|
|
"logps/chosen": -115.3070297241211,
|
|
"logps/rejected": -114.8101577758789,
|
|
"logps/weighted_chosen": -0.354583740234375,
|
|
"logps/weighted_rejected": -0.36929017305374146,
|
|
"loss": 0.6925,
|
|
"rewards/accuracies": 0.3687500059604645,
|
|
"rewards/chosen": 0.0087890625,
|
|
"rewards/margins": -0.02734375,
|
|
"rewards/rejected": 0.0361328125,
|
|
"rewards/weighted_accuracies": 0.4312500059604645,
|
|
"rewards/weighted_chosen": 0.0016719817649573088,
|
|
"rewards/weighted_margins": 0.0011638641590252519,
|
|
"rewards/weighted_rejected": 0.0005052566411904991,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.016033135145968335,
|
|
"grad_norm": 26.90618133544922,
|
|
"learning_rate": 1.5425531914893615e-07,
|
|
"logits/chosen": -0.26707762479782104,
|
|
"logits/rejected": -0.2697288393974304,
|
|
"logps/chosen": -122.49687194824219,
|
|
"logps/rejected": -128.2218780517578,
|
|
"logps/weighted_chosen": -0.364663690328598,
|
|
"logps/weighted_rejected": -0.40430909395217896,
|
|
"loss": 0.6919,
|
|
"rewards/accuracies": 0.3687500059604645,
|
|
"rewards/chosen": 0.099609375,
|
|
"rewards/margins": 0.2177734375,
|
|
"rewards/rejected": -0.1181640625,
|
|
"rewards/weighted_accuracies": 0.4468750059604645,
|
|
"rewards/weighted_chosen": 0.0011037830263376236,
|
|
"rewards/weighted_margins": 0.0029600143898278475,
|
|
"rewards/weighted_rejected": -0.0018524170154705644,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.02137751352795778,
|
|
"grad_norm": 19.056455612182617,
|
|
"learning_rate": 2.074468085106383e-07,
|
|
"logits/chosen": -0.31552428007125854,
|
|
"logits/rejected": -0.309671014547348,
|
|
"logps/chosen": -126.5132827758789,
|
|
"logps/rejected": -127.7515640258789,
|
|
"logps/weighted_chosen": -0.3717803955078125,
|
|
"logps/weighted_rejected": -0.36720579862594604,
|
|
"loss": 0.6927,
|
|
"rewards/accuracies": 0.4000000059604645,
|
|
"rewards/chosen": 0.041015625,
|
|
"rewards/margins": 0.150390625,
|
|
"rewards/rejected": -0.109375,
|
|
"rewards/weighted_accuracies": 0.44062501192092896,
|
|
"rewards/weighted_chosen": 0.0009314537164755166,
|
|
"rewards/weighted_margins": 0.0009433746454305947,
|
|
"rewards/weighted_rejected": -1.831054760259576e-05,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.026721891909947223,
|
|
"grad_norm": 94.1146469116211,
|
|
"learning_rate": 2.6063829787234044e-07,
|
|
"logits/chosen": -0.2799697816371918,
|
|
"logits/rejected": -0.2664199769496918,
|
|
"logps/chosen": -120.34375,
|
|
"logps/rejected": -120.19062805175781,
|
|
"logps/weighted_chosen": -0.367788702249527,
|
|
"logps/weighted_rejected": -0.37299805879592896,
|
|
"loss": 0.6934,
|
|
"rewards/accuracies": 0.3499999940395355,
|
|
"rewards/chosen": -0.01806640625,
|
|
"rewards/margins": -0.11865234375,
|
|
"rewards/rejected": 0.1005859375,
|
|
"rewards/weighted_accuracies": 0.421875,
|
|
"rewards/weighted_chosen": 0.0011091709602624178,
|
|
"rewards/weighted_margins": -0.0005058288807049394,
|
|
"rewards/weighted_rejected": 0.00161571498028934,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.03206627029193667,
|
|
"grad_norm": 47.161922454833984,
|
|
"learning_rate": 3.1382978723404253e-07,
|
|
"logits/chosen": -0.22172394394874573,
|
|
"logits/rejected": -0.2157600373029709,
|
|
"logps/chosen": -115.8382797241211,
|
|
"logps/rejected": -118.75859069824219,
|
|
"logps/weighted_chosen": -0.36602783203125,
|
|
"logps/weighted_rejected": -0.369253545999527,
|
|
"loss": 0.6908,
|
|
"rewards/accuracies": 0.3499999940395355,
|
|
"rewards/chosen": -0.10546875,
|
|
"rewards/margins": -0.1259765625,
|
|
"rewards/rejected": 0.0205078125,
|
|
"rewards/weighted_accuracies": 0.4000000059604645,
|
|
"rewards/weighted_chosen": 0.008862781338393688,
|
|
"rewards/weighted_margins": 0.00811080913990736,
|
|
"rewards/weighted_rejected": 0.0007405281066894531,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.037410648673926114,
|
|
"grad_norm": 42.38877868652344,
|
|
"learning_rate": 3.6702127659574467e-07,
|
|
"logits/chosen": -0.299722284078598,
|
|
"logits/rejected": -0.29665374755859375,
|
|
"logps/chosen": -114.1656265258789,
|
|
"logps/rejected": -118.0765609741211,
|
|
"logps/weighted_chosen": -0.35313719511032104,
|
|
"logps/weighted_rejected": -0.3739013671875,
|
|
"loss": 0.6949,
|
|
"rewards/accuracies": 0.375,
|
|
"rewards/chosen": -0.0693359375,
|
|
"rewards/margins": -0.0458984375,
|
|
"rewards/rejected": -0.0234375,
|
|
"rewards/weighted_accuracies": 0.390625,
|
|
"rewards/weighted_chosen": 0.0011390686267986894,
|
|
"rewards/weighted_margins": -0.0028884888160973787,
|
|
"rewards/weighted_rejected": 0.0040260315872728825,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04275502705591556,
|
|
"grad_norm": 35.451927185058594,
|
|
"learning_rate": 4.202127659574468e-07,
|
|
"logits/chosen": -0.30262452363967896,
|
|
"logits/rejected": -0.24024733901023865,
|
|
"logps/chosen": -112.3812484741211,
|
|
"logps/rejected": -111.8375015258789,
|
|
"logps/weighted_chosen": -0.37481385469436646,
|
|
"logps/weighted_rejected": -0.38435667753219604,
|
|
"loss": 0.694,
|
|
"rewards/accuracies": 0.390625,
|
|
"rewards/chosen": -0.1552734375,
|
|
"rewards/margins": -0.0341796875,
|
|
"rewards/rejected": -0.12109375,
|
|
"rewards/weighted_accuracies": 0.48750001192092896,
|
|
"rewards/weighted_chosen": 0.01006317138671875,
|
|
"rewards/weighted_margins": 0.0036018372047692537,
|
|
"rewards/weighted_rejected": 0.006462156772613525,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.048099405437905,
|
|
"grad_norm": 42.264678955078125,
|
|
"learning_rate": 4.734042553191489e-07,
|
|
"logits/chosen": -0.2891853451728821,
|
|
"logits/rejected": -0.23835448920726776,
|
|
"logps/chosen": -118.0296859741211,
|
|
"logps/rejected": -116.484375,
|
|
"logps/weighted_chosen": -0.376077264547348,
|
|
"logps/weighted_rejected": -0.3848114013671875,
|
|
"loss": 0.6935,
|
|
"rewards/accuracies": 0.4625000059604645,
|
|
"rewards/chosen": 0.0458984375,
|
|
"rewards/margins": 0.267578125,
|
|
"rewards/rejected": -0.2216796875,
|
|
"rewards/weighted_accuracies": 0.46562498807907104,
|
|
"rewards/weighted_chosen": 0.016840171068906784,
|
|
"rewards/weighted_margins": 0.014923477545380592,
|
|
"rewards/weighted_rejected": 0.00187511439435184,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.053443783819894446,
|
|
"grad_norm": 36.87267303466797,
|
|
"learning_rate": 5.26595744680851e-07,
|
|
"logits/chosen": -0.3333755433559418,
|
|
"logits/rejected": -0.28821104764938354,
|
|
"logps/chosen": -124.0367202758789,
|
|
"logps/rejected": -124.33906555175781,
|
|
"logps/weighted_chosen": -0.39268797636032104,
|
|
"logps/weighted_rejected": -0.4093261659145355,
|
|
"loss": 0.6875,
|
|
"rewards/accuracies": 0.44062501192092896,
|
|
"rewards/chosen": -0.0302734375,
|
|
"rewards/margins": 0.36835938692092896,
|
|
"rewards/rejected": -0.39863282442092896,
|
|
"rewards/weighted_accuracies": 0.484375,
|
|
"rewards/weighted_chosen": 0.012582575902342796,
|
|
"rewards/weighted_margins": 0.026942063122987747,
|
|
"rewards/weighted_rejected": -0.014329910278320312,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.058788162201883896,
|
|
"grad_norm": 17.8848876953125,
|
|
"learning_rate": 5.797872340425531e-07,
|
|
"logits/chosen": -0.3335327208042145,
|
|
"logits/rejected": -0.32384032011032104,
|
|
"logps/chosen": -117.6968765258789,
|
|
"logps/rejected": -119.85859680175781,
|
|
"logps/weighted_chosen": -0.35866087675094604,
|
|
"logps/weighted_rejected": -0.37585145235061646,
|
|
"loss": 0.7015,
|
|
"rewards/accuracies": 0.3968749940395355,
|
|
"rewards/chosen": -0.524609386920929,
|
|
"rewards/margins": -0.01328125037252903,
|
|
"rewards/rejected": -0.511523425579071,
|
|
"rewards/weighted_accuracies": 0.4281249940395355,
|
|
"rewards/weighted_chosen": -0.0057319640181958675,
|
|
"rewards/weighted_margins": 0.0012493133544921875,
|
|
"rewards/weighted_rejected": -0.0070056915283203125,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.06413254058387334,
|
|
"grad_norm": 37.190059661865234,
|
|
"learning_rate": 6.329787234042553e-07,
|
|
"logits/chosen": -0.29607391357421875,
|
|
"logits/rejected": -0.2735137939453125,
|
|
"logps/chosen": -120.81718444824219,
|
|
"logps/rejected": -127.04219055175781,
|
|
"logps/weighted_chosen": -0.41831666231155396,
|
|
"logps/weighted_rejected": -0.42036741971969604,
|
|
"loss": 0.7443,
|
|
"rewards/accuracies": 0.42500001192092896,
|
|
"rewards/chosen": -0.43964844942092896,
|
|
"rewards/margins": 0.690234363079071,
|
|
"rewards/rejected": -1.1298828125,
|
|
"rewards/weighted_accuracies": 0.45625001192092896,
|
|
"rewards/weighted_chosen": -0.07387389987707138,
|
|
"rewards/weighted_margins": -0.04258232191205025,
|
|
"rewards/weighted_rejected": -0.031409453600645065,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.06947691896586278,
|
|
"grad_norm": 22.76742172241211,
|
|
"learning_rate": 6.861702127659574e-07,
|
|
"logits/chosen": -0.30709609389305115,
|
|
"logits/rejected": -0.29381561279296875,
|
|
"logps/chosen": -120.3140640258789,
|
|
"logps/rejected": -121.73515319824219,
|
|
"logps/weighted_chosen": -0.38916015625,
|
|
"logps/weighted_rejected": -0.38067322969436646,
|
|
"loss": 0.6906,
|
|
"rewards/accuracies": 0.4312500059604645,
|
|
"rewards/chosen": -0.8837890625,
|
|
"rewards/margins": 0.4273437559604645,
|
|
"rewards/rejected": -1.310937523841858,
|
|
"rewards/weighted_accuracies": 0.46875,
|
|
"rewards/weighted_chosen": -0.0019147873390465975,
|
|
"rewards/weighted_margins": 0.025261688977479935,
|
|
"rewards/weighted_rejected": -0.027143806219100952,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.07482129734785223,
|
|
"grad_norm": 25.72498321533203,
|
|
"learning_rate": 7.393617021276596e-07,
|
|
"logits/chosen": -0.2590804994106293,
|
|
"logits/rejected": -0.25146180391311646,
|
|
"logps/chosen": -114.2992172241211,
|
|
"logps/rejected": -118.26094055175781,
|
|
"logps/weighted_chosen": -0.376434326171875,
|
|
"logps/weighted_rejected": -0.39961546659469604,
|
|
"loss": 0.6968,
|
|
"rewards/accuracies": 0.4906249940395355,
|
|
"rewards/chosen": -1.7428710460662842,
|
|
"rewards/margins": 1.043554663658142,
|
|
"rewards/rejected": -2.7867188453674316,
|
|
"rewards/weighted_accuracies": 0.46562498807907104,
|
|
"rewards/weighted_chosen": -0.01796722412109375,
|
|
"rewards/weighted_margins": 0.044054411351680756,
|
|
"rewards/weighted_rejected": -0.062059782445430756,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.08016567572984168,
|
|
"grad_norm": 27.819217681884766,
|
|
"learning_rate": 7.925531914893616e-07,
|
|
"logits/chosen": -0.260824590921402,
|
|
"logits/rejected": -0.2456924468278885,
|
|
"logps/chosen": -118.3109359741211,
|
|
"logps/rejected": -116.1898422241211,
|
|
"logps/weighted_chosen": -0.3584175109863281,
|
|
"logps/weighted_rejected": -0.389230340719223,
|
|
"loss": 0.693,
|
|
"rewards/accuracies": 0.518750011920929,
|
|
"rewards/chosen": -2.796679735183716,
|
|
"rewards/margins": 1.470312476158142,
|
|
"rewards/rejected": -4.267773628234863,
|
|
"rewards/weighted_accuracies": 0.5406249761581421,
|
|
"rewards/weighted_chosen": 0.02875671349465847,
|
|
"rewards/weighted_margins": 0.08138389885425568,
|
|
"rewards/weighted_rejected": -0.05276889726519585,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.08551005411183112,
|
|
"grad_norm": 20.792280197143555,
|
|
"learning_rate": 8.457446808510637e-07,
|
|
"logits/chosen": -0.27181702852249146,
|
|
"logits/rejected": -0.26198044419288635,
|
|
"logps/chosen": -119.7906265258789,
|
|
"logps/rejected": -120.09687805175781,
|
|
"logps/weighted_chosen": -0.3836608827114105,
|
|
"logps/weighted_rejected": -0.40611571073532104,
|
|
"loss": 0.6795,
|
|
"rewards/accuracies": 0.503125011920929,
|
|
"rewards/chosen": -3.6748046875,
|
|
"rewards/margins": 1.8689453601837158,
|
|
"rewards/rejected": -5.542382717132568,
|
|
"rewards/weighted_accuracies": 0.543749988079071,
|
|
"rewards/weighted_chosen": -0.013747024349868298,
|
|
"rewards/weighted_margins": 0.10787200927734375,
|
|
"rewards/weighted_rejected": -0.12159118801355362,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.09085443249382057,
|
|
"grad_norm": 28.162086486816406,
|
|
"learning_rate": 8.989361702127659e-07,
|
|
"logits/chosen": -0.30354803800582886,
|
|
"logits/rejected": -0.28291016817092896,
|
|
"logps/chosen": -118.81172180175781,
|
|
"logps/rejected": -123.3851547241211,
|
|
"logps/weighted_chosen": -0.364785760641098,
|
|
"logps/weighted_rejected": -0.40638428926467896,
|
|
"loss": 0.7104,
|
|
"rewards/accuracies": 0.528124988079071,
|
|
"rewards/chosen": -4.450781345367432,
|
|
"rewards/margins": 1.540624976158142,
|
|
"rewards/rejected": -5.989843845367432,
|
|
"rewards/weighted_accuracies": 0.49687498807907104,
|
|
"rewards/weighted_chosen": -0.05272483825683594,
|
|
"rewards/weighted_margins": 0.03959999233484268,
|
|
"rewards/weighted_rejected": -0.0922950729727745,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.09619881087581,
|
|
"grad_norm": 62.7450065612793,
|
|
"learning_rate": 9.52127659574468e-07,
|
|
"logits/chosen": -0.3086685240268707,
|
|
"logits/rejected": -0.29756468534469604,
|
|
"logps/chosen": -120.9000015258789,
|
|
"logps/rejected": -121.6031265258789,
|
|
"logps/weighted_chosen": -0.4021057188510895,
|
|
"logps/weighted_rejected": -0.43016356229782104,
|
|
"loss": 0.6902,
|
|
"rewards/accuracies": 0.5062500238418579,
|
|
"rewards/chosen": -5.364843845367432,
|
|
"rewards/margins": 0.95654296875,
|
|
"rewards/rejected": -6.323437690734863,
|
|
"rewards/weighted_accuracies": 0.515625,
|
|
"rewards/weighted_chosen": -0.07762374728918076,
|
|
"rewards/weighted_margins": 0.042090605944395065,
|
|
"rewards/weighted_rejected": -0.11983337253332138,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.10154318925779945,
|
|
"grad_norm": 107.93684387207031,
|
|
"learning_rate": 9.99999129927477e-07,
|
|
"logits/chosen": -0.2998809814453125,
|
|
"logits/rejected": -0.2729034423828125,
|
|
"logps/chosen": -121.68046569824219,
|
|
"logps/rejected": -122.90547180175781,
|
|
"logps/weighted_chosen": -0.4000488221645355,
|
|
"logps/weighted_rejected": -0.49153441190719604,
|
|
"loss": 0.6846,
|
|
"rewards/accuracies": 0.4906249940395355,
|
|
"rewards/chosen": -7.084570407867432,
|
|
"rewards/margins": 1.763671875,
|
|
"rewards/rejected": -8.850390434265137,
|
|
"rewards/weighted_accuracies": 0.5625,
|
|
"rewards/weighted_chosen": -0.06708984076976776,
|
|
"rewards/weighted_margins": 0.19685058295726776,
|
|
"rewards/weighted_rejected": -0.2636512815952301,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.10688756763978889,
|
|
"grad_norm": 42.62810516357422,
|
|
"learning_rate": 9.99894724888679e-07,
|
|
"logits/chosen": -0.2994216978549957,
|
|
"logits/rejected": -0.2775813937187195,
|
|
"logps/chosen": -129.10311889648438,
|
|
"logps/rejected": -127.71015930175781,
|
|
"logps/weighted_chosen": -0.40317994356155396,
|
|
"logps/weighted_rejected": -0.4718689024448395,
|
|
"loss": 0.7052,
|
|
"rewards/accuracies": 0.4937500059604645,
|
|
"rewards/chosen": -8.422460556030273,
|
|
"rewards/margins": 1.215234398841858,
|
|
"rewards/rejected": -9.638280868530273,
|
|
"rewards/weighted_accuracies": 0.518750011920929,
|
|
"rewards/weighted_chosen": -0.10227356106042862,
|
|
"rewards/weighted_margins": 0.08837012946605682,
|
|
"rewards/weighted_rejected": -0.19074249267578125,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.11223194602177834,
|
|
"grad_norm": 56.36786651611328,
|
|
"learning_rate": 9.996163469793475e-07,
|
|
"logits/chosen": -0.3200393617153168,
|
|
"logits/rejected": -0.28080445528030396,
|
|
"logps/chosen": -127.65625,
|
|
"logps/rejected": -122.5406265258789,
|
|
"logps/weighted_chosen": -0.402487188577652,
|
|
"logps/weighted_rejected": -0.4784179627895355,
|
|
"loss": 0.6855,
|
|
"rewards/accuracies": 0.559374988079071,
|
|
"rewards/chosen": -9.696874618530273,
|
|
"rewards/margins": 3.138671875,
|
|
"rewards/rejected": -12.8359375,
|
|
"rewards/weighted_accuracies": 0.559374988079071,
|
|
"rewards/weighted_chosen": -0.07228164374828339,
|
|
"rewards/weighted_margins": 0.1660926789045334,
|
|
"rewards/weighted_rejected": -0.23847046494483948,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.11757632440376779,
|
|
"grad_norm": 24.45851707458496,
|
|
"learning_rate": 9.991640930802883e-07,
|
|
"logits/chosen": -0.30699461698532104,
|
|
"logits/rejected": -0.3066558837890625,
|
|
"logps/chosen": -125.8734359741211,
|
|
"logps/rejected": -129.6999969482422,
|
|
"logps/weighted_chosen": -0.41710203886032104,
|
|
"logps/weighted_rejected": -0.4827117919921875,
|
|
"loss": 0.6884,
|
|
"rewards/accuracies": 0.546875,
|
|
"rewards/chosen": -12.277539253234863,
|
|
"rewards/margins": 2.571484327316284,
|
|
"rewards/rejected": -14.851171493530273,
|
|
"rewards/weighted_accuracies": 0.5218750238418579,
|
|
"rewards/weighted_chosen": -0.12371826171875,
|
|
"rewards/weighted_margins": 0.1390731781721115,
|
|
"rewards/weighted_rejected": -0.26273268461227417,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.12292070278575723,
|
|
"grad_norm": 27.215944290161133,
|
|
"learning_rate": 9.98538120584459e-07,
|
|
"logits/chosen": -0.3107505738735199,
|
|
"logits/rejected": -0.283193975687027,
|
|
"logps/chosen": -134.8015594482422,
|
|
"logps/rejected": -138.5890655517578,
|
|
"logps/weighted_chosen": -0.4462524354457855,
|
|
"logps/weighted_rejected": -0.511853039264679,
|
|
"loss": 0.6939,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": -15.346875190734863,
|
|
"rewards/margins": 2.830273389816284,
|
|
"rewards/rejected": -18.179492950439453,
|
|
"rewards/weighted_accuracies": 0.5562499761581421,
|
|
"rewards/weighted_chosen": -0.1530204713344574,
|
|
"rewards/weighted_margins": 0.1789344847202301,
|
|
"rewards/weighted_rejected": -0.3320491909980774,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.12826508116774668,
|
|
"grad_norm": 17.173702239990234,
|
|
"learning_rate": 9.977386473421917e-07,
|
|
"logits/chosen": -0.27986279129981995,
|
|
"logits/rejected": -0.2772073745727539,
|
|
"logps/chosen": -121.0453109741211,
|
|
"logps/rejected": -124.65312194824219,
|
|
"logps/weighted_chosen": -0.4528869688510895,
|
|
"logps/weighted_rejected": -0.501416027545929,
|
|
"loss": 0.7222,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": -18.59375,
|
|
"rewards/margins": 2.5132813453674316,
|
|
"rewards/rejected": -21.106054306030273,
|
|
"rewards/weighted_accuracies": 0.578125,
|
|
"rewards/weighted_chosen": -0.2343955934047699,
|
|
"rewards/weighted_margins": 0.07334060966968536,
|
|
"rewards/weighted_rejected": -0.30777662992477417,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.13360945954973613,
|
|
"grad_norm": 24.67556381225586,
|
|
"learning_rate": 9.96765951585378e-07,
|
|
"logits/chosen": -0.262664794921875,
|
|
"logits/rejected": -0.24544373154640198,
|
|
"logps/chosen": -121.7421875,
|
|
"logps/rejected": -126.42500305175781,
|
|
"logps/weighted_chosen": -0.46705931425094604,
|
|
"logps/weighted_rejected": -0.547576904296875,
|
|
"loss": 0.6683,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -21.9228515625,
|
|
"rewards/margins": 2.5238280296325684,
|
|
"rewards/rejected": -24.447071075439453,
|
|
"rewards/weighted_accuracies": 0.5687500238418579,
|
|
"rewards/weighted_chosen": -0.2455238401889801,
|
|
"rewards/weighted_margins": 0.1756332367658615,
|
|
"rewards/weighted_rejected": -0.4210983216762543,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.13895383793172555,
|
|
"grad_norm": 33.55352783203125,
|
|
"learning_rate": 9.956203718306388e-07,
|
|
"logits/chosen": -0.18781813979148865,
|
|
"logits/rejected": -0.156982421875,
|
|
"logps/chosen": -127.8578109741211,
|
|
"logps/rejected": -132.35546875,
|
|
"logps/weighted_chosen": -0.4968322813510895,
|
|
"logps/weighted_rejected": -0.5148254632949829,
|
|
"loss": 0.7213,
|
|
"rewards/accuracies": 0.5531250238418579,
|
|
"rewards/chosen": -26.109960556030273,
|
|
"rewards/margins": 2.660937547683716,
|
|
"rewards/rejected": -28.761327743530273,
|
|
"rewards/weighted_accuracies": 0.534375011920929,
|
|
"rewards/weighted_chosen": -0.29717254638671875,
|
|
"rewards/weighted_margins": 0.04170074313879013,
|
|
"rewards/weighted_rejected": -0.3386779725551605,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.144298216313715,
|
|
"grad_norm": 242.56521606445312,
|
|
"learning_rate": 9.943023067615136e-07,
|
|
"logits/chosen": -0.17297974228858948,
|
|
"logits/rejected": -0.1584724485874176,
|
|
"logps/chosen": -139.23046875,
|
|
"logps/rejected": -142.3390655517578,
|
|
"logps/weighted_chosen": -0.48270875215530396,
|
|
"logps/weighted_rejected": -0.560772716999054,
|
|
"loss": 0.7059,
|
|
"rewards/accuracies": 0.5375000238418579,
|
|
"rewards/chosen": -33.55195236206055,
|
|
"rewards/margins": 3.075976610183716,
|
|
"rewards/rejected": -36.6328125,
|
|
"rewards/weighted_accuracies": 0.4937500059604645,
|
|
"rewards/weighted_chosen": -0.321615606546402,
|
|
"rewards/weighted_margins": 0.13364562392234802,
|
|
"rewards/weighted_rejected": -0.45517730712890625,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.14964259469570446,
|
|
"grad_norm": 16.8142147064209,
|
|
"learning_rate": 9.928122150897112e-07,
|
|
"logits/chosen": -0.21183013916015625,
|
|
"logits/rejected": -0.168986514210701,
|
|
"logps/chosen": -130.66250610351562,
|
|
"logps/rejected": -132.0812530517578,
|
|
"logps/weighted_chosen": -0.48672789335250854,
|
|
"logps/weighted_rejected": -0.578961193561554,
|
|
"loss": 0.6614,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -34.4287109375,
|
|
"rewards/margins": 3.7144532203674316,
|
|
"rewards/rejected": -38.127342224121094,
|
|
"rewards/weighted_accuracies": 0.5687500238418579,
|
|
"rewards/weighted_chosen": -0.28246229887008667,
|
|
"rewards/weighted_margins": 0.23281364142894745,
|
|
"rewards/weighted_rejected": -0.5149310827255249,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.1549869730776939,
|
|
"grad_norm": 17.110448837280273,
|
|
"learning_rate": 9.9115061539547e-07,
|
|
"logits/chosen": -0.20588979125022888,
|
|
"logits/rejected": -0.18258285522460938,
|
|
"logps/chosen": -141.0203094482422,
|
|
"logps/rejected": -142.50625610351562,
|
|
"logps/weighted_chosen": -0.4897003173828125,
|
|
"logps/weighted_rejected": -0.5554351806640625,
|
|
"loss": 0.7051,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": -38.395896911621094,
|
|
"rewards/margins": 3.002734422683716,
|
|
"rewards/rejected": -41.394920349121094,
|
|
"rewards/weighted_accuracies": 0.5406249761581421,
|
|
"rewards/weighted_chosen": -0.3098343014717102,
|
|
"rewards/weighted_margins": 0.1397857666015625,
|
|
"rewards/weighted_rejected": -0.4498863220214844,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.16033135145968336,
|
|
"grad_norm": 21.960878372192383,
|
|
"learning_rate": 9.893180859470818e-07,
|
|
"logits/chosen": -0.1905662566423416,
|
|
"logits/rejected": -0.16956177353858948,
|
|
"logps/chosen": -131.609375,
|
|
"logps/rejected": -133.328125,
|
|
"logps/weighted_chosen": -0.5007995367050171,
|
|
"logps/weighted_rejected": -0.5999816656112671,
|
|
"loss": 0.6676,
|
|
"rewards/accuracies": 0.559374988079071,
|
|
"rewards/chosen": -38.5810546875,
|
|
"rewards/margins": 5.082226753234863,
|
|
"rewards/rejected": -43.662498474121094,
|
|
"rewards/weighted_accuracies": 0.590624988079071,
|
|
"rewards/weighted_chosen": -0.27146607637405396,
|
|
"rewards/weighted_margins": 0.2570602297782898,
|
|
"rewards/weighted_rejected": -0.5287536382675171,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.16567572984167278,
|
|
"grad_norm": 45.954952239990234,
|
|
"learning_rate": 9.873152644996424e-07,
|
|
"logits/chosen": -0.23566055297851562,
|
|
"logits/rejected": -0.23574523627758026,
|
|
"logps/chosen": -134.5734405517578,
|
|
"logps/rejected": -136.5500030517578,
|
|
"logps/weighted_chosen": -0.565338134765625,
|
|
"logps/weighted_rejected": -0.620849609375,
|
|
"loss": 0.7314,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -44.349021911621094,
|
|
"rewards/margins": 6.559765815734863,
|
|
"rewards/rejected": -50.908592224121094,
|
|
"rewards/weighted_accuracies": 0.606249988079071,
|
|
"rewards/weighted_chosen": -0.44298553466796875,
|
|
"rewards/weighted_margins": 0.1432647705078125,
|
|
"rewards/weighted_rejected": -0.5857940912246704,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.17102010822366223,
|
|
"grad_norm": 22.280086517333984,
|
|
"learning_rate": 9.85142848073103e-07,
|
|
"logits/chosen": -0.2385093718767166,
|
|
"logits/rejected": -0.21721191704273224,
|
|
"logps/chosen": -138.27499389648438,
|
|
"logps/rejected": -138.13516235351562,
|
|
"logps/weighted_chosen": -0.5242675542831421,
|
|
"logps/weighted_rejected": -0.591705322265625,
|
|
"loss": 0.6903,
|
|
"rewards/accuracies": 0.543749988079071,
|
|
"rewards/chosen": -48.820899963378906,
|
|
"rewards/margins": 1.562109351158142,
|
|
"rewards/rejected": -50.394920349121094,
|
|
"rewards/weighted_accuracies": 0.5718749761581421,
|
|
"rewards/weighted_chosen": -0.3802032470703125,
|
|
"rewards/weighted_margins": 0.14343567192554474,
|
|
"rewards/weighted_rejected": -0.523608386516571,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.17636448660565168,
|
|
"grad_norm": 19.125673294067383,
|
|
"learning_rate": 9.828015927096914e-07,
|
|
"logits/chosen": -0.2693939208984375,
|
|
"logits/rejected": -0.23780974745750427,
|
|
"logps/chosen": -148.56875610351562,
|
|
"logps/rejected": -147.24453735351562,
|
|
"logps/weighted_chosen": -0.521636962890625,
|
|
"logps/weighted_rejected": -0.5547729730606079,
|
|
"loss": 0.6703,
|
|
"rewards/accuracies": 0.528124988079071,
|
|
"rewards/chosen": -50.613868713378906,
|
|
"rewards/margins": 3.5126953125,
|
|
"rewards/rejected": -54.113670349121094,
|
|
"rewards/weighted_accuracies": 0.550000011920929,
|
|
"rewards/weighted_chosen": -0.321145623922348,
|
|
"rewards/weighted_margins": 0.18524780869483948,
|
|
"rewards/weighted_rejected": -0.5064395666122437,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.18170886498764113,
|
|
"grad_norm": 14.343570709228516,
|
|
"learning_rate": 9.802923132107968e-07,
|
|
"logits/chosen": -0.25108033418655396,
|
|
"logits/rejected": -0.2313240021467209,
|
|
"logps/chosen": -149.78671264648438,
|
|
"logps/rejected": -153.48046875,
|
|
"logps/weighted_chosen": -0.562329113483429,
|
|
"logps/weighted_rejected": -0.583233654499054,
|
|
"loss": 0.741,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": -58.06855392456055,
|
|
"rewards/margins": 4.334374904632568,
|
|
"rewards/rejected": -62.40312576293945,
|
|
"rewards/weighted_accuracies": 0.512499988079071,
|
|
"rewards/weighted_chosen": -0.47095948457717896,
|
|
"rewards/weighted_margins": 0.02723388746380806,
|
|
"rewards/weighted_rejected": -0.498382568359375,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.18705324336963056,
|
|
"grad_norm": 66.20745849609375,
|
|
"learning_rate": 9.776158828534024e-07,
|
|
"logits/chosen": -0.2837265133857727,
|
|
"logits/rejected": -0.2537124752998352,
|
|
"logps/chosen": -149.5359344482422,
|
|
"logps/rejected": -194.4765625,
|
|
"logps/weighted_chosen": -0.540771484375,
|
|
"logps/weighted_rejected": -0.589801013469696,
|
|
"loss": 0.6779,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -59.75273513793945,
|
|
"rewards/margins": 7.256640434265137,
|
|
"rewards/rejected": -66.99531555175781,
|
|
"rewards/weighted_accuracies": 0.5531250238418579,
|
|
"rewards/weighted_chosen": -0.4245468080043793,
|
|
"rewards/weighted_margins": 0.146717831492424,
|
|
"rewards/weighted_rejected": -0.571148693561554,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.19239762175162,
|
|
"grad_norm": 13.690327644348145,
|
|
"learning_rate": 9.747732330861695e-07,
|
|
"logits/chosen": -0.18397827446460724,
|
|
"logits/rejected": -0.15533828735351562,
|
|
"logps/chosen": -143.4562530517578,
|
|
"logps/rejected": -145.44686889648438,
|
|
"logps/weighted_chosen": -0.5499817132949829,
|
|
"logps/weighted_rejected": -0.6820312738418579,
|
|
"loss": 0.6644,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -63.58203125,
|
|
"rewards/margins": 7.447851657867432,
|
|
"rewards/rejected": -71.0308609008789,
|
|
"rewards/weighted_accuracies": 0.6031249761581421,
|
|
"rewards/weighted_chosen": -0.43936461210250854,
|
|
"rewards/weighted_margins": 0.3239502012729645,
|
|
"rewards/weighted_rejected": -0.7634918093681335,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.19774200013360946,
|
|
"grad_norm": 20.364688873291016,
|
|
"learning_rate": 9.717653532052742e-07,
|
|
"logits/chosen": -0.16991272568702698,
|
|
"logits/rejected": -0.16076354682445526,
|
|
"logps/chosen": -137.11172485351562,
|
|
"logps/rejected": -146.09375,
|
|
"logps/weighted_chosen": -0.609234631061554,
|
|
"logps/weighted_rejected": -0.658831775188446,
|
|
"loss": 0.7108,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -63.02363204956055,
|
|
"rewards/margins": 9.1494140625,
|
|
"rewards/rejected": -72.1488265991211,
|
|
"rewards/weighted_accuracies": 0.59375,
|
|
"rewards/weighted_chosen": -0.5135604739189148,
|
|
"rewards/weighted_margins": 0.1833236664533615,
|
|
"rewards/weighted_rejected": -0.6966766119003296,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.2030863785155989,
|
|
"grad_norm": 15.459892272949219,
|
|
"learning_rate": 9.685932900101146e-07,
|
|
"logits/chosen": -0.17396697402000427,
|
|
"logits/rejected": -0.1525276154279709,
|
|
"logps/chosen": -143.62655639648438,
|
|
"logps/rejected": -147.03515625,
|
|
"logps/weighted_chosen": -0.584460437297821,
|
|
"logps/weighted_rejected": -0.642120361328125,
|
|
"loss": 0.7159,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -68.6537094116211,
|
|
"rewards/margins": 5.928124904632568,
|
|
"rewards/rejected": -74.57109069824219,
|
|
"rewards/weighted_accuracies": 0.59375,
|
|
"rewards/weighted_chosen": -0.528277575969696,
|
|
"rewards/weighted_margins": 0.11352996528148651,
|
|
"rewards/weighted_rejected": -0.6417190432548523,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.20843075689758836,
|
|
"grad_norm": 25.710723876953125,
|
|
"learning_rate": 9.652581474390043e-07,
|
|
"logits/chosen": -0.17167052626609802,
|
|
"logits/rejected": -0.14335784316062927,
|
|
"logps/chosen": -147.3156280517578,
|
|
"logps/rejected": -151.1484375,
|
|
"logps/weighted_chosen": -0.5953735113143921,
|
|
"logps/weighted_rejected": -0.649151623249054,
|
|
"loss": 0.7014,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -70.93769836425781,
|
|
"rewards/margins": 9.441015243530273,
|
|
"rewards/rejected": -80.35664367675781,
|
|
"rewards/weighted_accuracies": 0.612500011920929,
|
|
"rewards/weighted_chosen": -0.5777953863143921,
|
|
"rewards/weighted_margins": 0.10624237358570099,
|
|
"rewards/weighted_rejected": -0.6836212277412415,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.21377513527957778,
|
|
"grad_norm": 42.97126007080078,
|
|
"learning_rate": 9.61761086184981e-07,
|
|
"logits/chosen": -0.192851260304451,
|
|
"logits/rejected": -0.16070251166820526,
|
|
"logps/chosen": -148.39688110351562,
|
|
"logps/rejected": -149.6046905517578,
|
|
"logps/weighted_chosen": -0.6315368413925171,
|
|
"logps/weighted_rejected": -0.7087768316268921,
|
|
"loss": 0.7113,
|
|
"rewards/accuracies": 0.565625011920929,
|
|
"rewards/chosen": -73.91288757324219,
|
|
"rewards/margins": 6.164453029632568,
|
|
"rewards/rejected": -80.05976867675781,
|
|
"rewards/weighted_accuracies": 0.559374988079071,
|
|
"rewards/weighted_chosen": -0.6400848627090454,
|
|
"rewards/weighted_margins": 0.11194305121898651,
|
|
"rewards/weighted_rejected": -0.752105712890625,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.21911951366156723,
|
|
"grad_norm": 17.99481773376465,
|
|
"learning_rate": 9.581033232918629e-07,
|
|
"logits/chosen": -0.14135894179344177,
|
|
"logits/rejected": -0.11229457706212997,
|
|
"logps/chosen": -145.88827514648438,
|
|
"logps/rejected": -149.74063110351562,
|
|
"logps/weighted_chosen": -0.6018310785293579,
|
|
"logps/weighted_rejected": -0.7620849609375,
|
|
"loss": 0.6764,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -74.9345703125,
|
|
"rewards/margins": 8.443944931030273,
|
|
"rewards/rejected": -83.36601257324219,
|
|
"rewards/weighted_accuracies": 0.578125,
|
|
"rewards/weighted_chosen": -0.578625500202179,
|
|
"rewards/weighted_margins": 0.35536497831344604,
|
|
"rewards/weighted_rejected": -0.9342681765556335,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.22446389204355668,
|
|
"grad_norm": 19.236024856567383,
|
|
"learning_rate": 9.542861317306952e-07,
|
|
"logits/chosen": -0.1445457488298416,
|
|
"logits/rejected": -0.1324237883090973,
|
|
"logps/chosen": -148.76718139648438,
|
|
"logps/rejected": -150.97811889648438,
|
|
"logps/weighted_chosen": -0.5991576910018921,
|
|
"logps/weighted_rejected": -0.674072265625,
|
|
"loss": 0.6735,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": -80.28242492675781,
|
|
"rewards/margins": 4.427929878234863,
|
|
"rewards/rejected": -84.7109375,
|
|
"rewards/weighted_accuracies": 0.559374988079071,
|
|
"rewards/weighted_chosen": -0.543652355670929,
|
|
"rewards/weighted_margins": 0.184315487742424,
|
|
"rewards/weighted_rejected": -0.727569580078125,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.22980827042554614,
|
|
"grad_norm": 14.300553321838379,
|
|
"learning_rate": 9.503108399567308e-07,
|
|
"logits/chosen": -0.14830398559570312,
|
|
"logits/rejected": -0.09484557807445526,
|
|
"logps/chosen": -162.40625,
|
|
"logps/rejected": -167.7195281982422,
|
|
"logps/weighted_chosen": -0.5840820074081421,
|
|
"logps/weighted_rejected": -0.647021472454071,
|
|
"loss": 0.672,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": -89.32051086425781,
|
|
"rewards/margins": 8.4365234375,
|
|
"rewards/rejected": -97.79023742675781,
|
|
"rewards/weighted_accuracies": 0.596875011920929,
|
|
"rewards/weighted_chosen": -0.545318603515625,
|
|
"rewards/weighted_margins": 0.13585510849952698,
|
|
"rewards/weighted_rejected": -0.681243896484375,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.23515264880753559,
|
|
"grad_norm": 21.087541580200195,
|
|
"learning_rate": 9.461788314471034e-07,
|
|
"logits/chosen": -0.10236664116382599,
|
|
"logits/rejected": -0.05696678161621094,
|
|
"logps/chosen": -155.13827514648438,
|
|
"logps/rejected": -159.828125,
|
|
"logps/weighted_chosen": -0.6568237543106079,
|
|
"logps/weighted_rejected": -0.7309814691543579,
|
|
"loss": 0.6898,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": -92.36328125,
|
|
"rewards/margins": 7.942968845367432,
|
|
"rewards/rejected": -100.3109359741211,
|
|
"rewards/weighted_accuracies": 0.559374988079071,
|
|
"rewards/weighted_chosen": -0.6760101318359375,
|
|
"rewards/weighted_margins": 0.16096191108226776,
|
|
"rewards/weighted_rejected": -0.836883544921875,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.240497027189525,
|
|
"grad_norm": 19.655607223510742,
|
|
"learning_rate": 9.418915442193509e-07,
|
|
"logits/chosen": -0.10150299221277237,
|
|
"logits/rejected": -0.05734825134277344,
|
|
"logps/chosen": -152.95858764648438,
|
|
"logps/rejected": -165.30078125,
|
|
"logps/weighted_chosen": -0.6426635980606079,
|
|
"logps/weighted_rejected": -0.69622802734375,
|
|
"loss": 0.7073,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -89.14433288574219,
|
|
"rewards/margins": 14.668359756469727,
|
|
"rewards/rejected": -103.8412094116211,
|
|
"rewards/weighted_accuracies": 0.565625011920929,
|
|
"rewards/weighted_chosen": -0.694122314453125,
|
|
"rewards/weighted_margins": 0.11862488090991974,
|
|
"rewards/weighted_rejected": -0.8125030398368835,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.24584140557151446,
|
|
"grad_norm": 35.64816665649414,
|
|
"learning_rate": 9.374504703309579e-07,
|
|
"logits/chosen": -0.1730697602033615,
|
|
"logits/rejected": -0.1376514434814453,
|
|
"logps/chosen": -149.97811889648438,
|
|
"logps/rejected": -152.8171844482422,
|
|
"logps/weighted_chosen": -0.638531506061554,
|
|
"logps/weighted_rejected": -0.77386474609375,
|
|
"loss": 0.6563,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": -86.3251953125,
|
|
"rewards/margins": 8.056055068969727,
|
|
"rewards/rejected": -94.4281234741211,
|
|
"rewards/weighted_accuracies": 0.5625,
|
|
"rewards/weighted_chosen": -0.6749175786972046,
|
|
"rewards/weighted_margins": 0.30900877714157104,
|
|
"rewards/weighted_rejected": -0.983563244342804,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.2511857839535039,
|
|
"grad_norm": 20.12373161315918,
|
|
"learning_rate": 9.328571553600915e-07,
|
|
"logits/chosen": -0.14519290626049042,
|
|
"logits/rejected": -0.11081619560718536,
|
|
"logps/chosen": -154.73828125,
|
|
"logps/rejected": -158.703125,
|
|
"logps/weighted_chosen": -0.656390368938446,
|
|
"logps/weighted_rejected": -0.7307983636856079,
|
|
"loss": 0.7214,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -92.1263656616211,
|
|
"rewards/margins": 7.731054782867432,
|
|
"rewards/rejected": -99.86836242675781,
|
|
"rewards/weighted_accuracies": 0.565625011920929,
|
|
"rewards/weighted_chosen": -0.6984283328056335,
|
|
"rewards/weighted_margins": 0.14166870713233948,
|
|
"rewards/weighted_rejected": -0.8403259515762329,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.25653016233549336,
|
|
"grad_norm": 19.16153907775879,
|
|
"learning_rate": 9.281131978677106e-07,
|
|
"logits/chosen": -0.1819503754377365,
|
|
"logits/rejected": -0.14701232314109802,
|
|
"logps/chosen": -160.57968139648438,
|
|
"logps/rejected": -164.2414093017578,
|
|
"logps/weighted_chosen": -0.609344482421875,
|
|
"logps/weighted_rejected": -0.750903308391571,
|
|
"loss": 0.6525,
|
|
"rewards/accuracies": 0.6156250238418579,
|
|
"rewards/chosen": -93.19140625,
|
|
"rewards/margins": 9.908788681030273,
|
|
"rewards/rejected": -103.0589828491211,
|
|
"rewards/weighted_accuracies": 0.590624988079071,
|
|
"rewards/weighted_chosen": -0.6482604742050171,
|
|
"rewards/weighted_margins": 0.2539626955986023,
|
|
"rewards/weighted_rejected": -0.9027160406112671,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.2618745407174828,
|
|
"grad_norm": 47.42090606689453,
|
|
"learning_rate": 9.232202488412361e-07,
|
|
"logits/chosen": -0.18560639023780823,
|
|
"logits/rejected": -0.152149960398674,
|
|
"logps/chosen": -150.15078735351562,
|
|
"logps/rejected": -156.22109985351562,
|
|
"logps/weighted_chosen": -0.6249145269393921,
|
|
"logps/weighted_rejected": -0.755505383014679,
|
|
"loss": 0.6793,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -89.3853530883789,
|
|
"rewards/margins": 8.602734565734863,
|
|
"rewards/rejected": -97.9830093383789,
|
|
"rewards/weighted_accuracies": 0.5531250238418579,
|
|
"rewards/weighted_chosen": -0.6580413579940796,
|
|
"rewards/weighted_margins": 0.2508483827114105,
|
|
"rewards/weighted_rejected": -0.909197986125946,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.26721891909947226,
|
|
"grad_norm": 21.92582893371582,
|
|
"learning_rate": 9.181800111199766e-07,
|
|
"logits/chosen": -0.2139892578125,
|
|
"logits/rejected": -0.185211181640625,
|
|
"logps/chosen": -153.05624389648438,
|
|
"logps/rejected": -155.6671905517578,
|
|
"logps/weighted_chosen": -0.660186767578125,
|
|
"logps/weighted_rejected": -0.740765392780304,
|
|
"loss": 0.6631,
|
|
"rewards/accuracies": 0.596875011920929,
|
|
"rewards/chosen": -91.49101257324219,
|
|
"rewards/margins": 12.721094131469727,
|
|
"rewards/rejected": -104.1937484741211,
|
|
"rewards/weighted_accuracies": 0.6156250238418579,
|
|
"rewards/weighted_chosen": -0.679455578327179,
|
|
"rewards/weighted_margins": 0.2384185791015625,
|
|
"rewards/weighted_rejected": -0.9178100824356079,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.26721891909947226,
|
|
"eval_logits/chosen": -0.27163267135620117,
|
|
"eval_logits/rejected": -0.24348750710487366,
|
|
"eval_logps/chosen": -159.72760009765625,
|
|
"eval_logps/rejected": -165.05091857910156,
|
|
"eval_logps/weighted_chosen": -0.6352449059486389,
|
|
"eval_logps/weighted_rejected": -0.7405111789703369,
|
|
"eval_loss": 0.6863088607788086,
|
|
"eval_rewards/accuracies": 0.5992871522903442,
|
|
"eval_rewards/chosen": -97.6285629272461,
|
|
"eval_rewards/margins": 10.883528709411621,
|
|
"eval_rewards/rejected": -108.50712585449219,
|
|
"eval_rewards/weighted_accuracies": 0.5972505211830139,
|
|
"eval_rewards/weighted_chosen": -0.6794247031211853,
|
|
"eval_rewards/weighted_margins": 0.20837070047855377,
|
|
"eval_rewards/weighted_rejected": -0.8877954483032227,
|
|
"eval_runtime": 1137.126,
|
|
"eval_samples_per_second": 1.725,
|
|
"eval_steps_per_second": 0.432,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.2725632974814617,
|
|
"grad_norm": 13.315505027770996,
|
|
"learning_rate": 9.129942388025066e-07,
|
|
"logits/chosen": -0.16319426894187927,
|
|
"logits/rejected": -0.12026214599609375,
|
|
"logps/chosen": -149.140625,
|
|
"logps/rejected": -157.3195343017578,
|
|
"logps/weighted_chosen": -0.686871349811554,
|
|
"logps/weighted_rejected": -0.76031494140625,
|
|
"loss": 0.6974,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -96.3252944946289,
|
|
"rewards/margins": 11.934374809265137,
|
|
"rewards/rejected": -108.2183609008789,
|
|
"rewards/weighted_accuracies": 0.5531250238418579,
|
|
"rewards/weighted_chosen": -0.7806671261787415,
|
|
"rewards/weighted_margins": 0.146159365773201,
|
|
"rewards/weighted_rejected": -0.926666259765625,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.2779076758634511,
|
|
"grad_norm": 32.33969497680664,
|
|
"learning_rate": 9.076647366362082e-07,
|
|
"logits/chosen": -0.12646484375,
|
|
"logits/rejected": -0.08115959167480469,
|
|
"logps/chosen": -154.6750030517578,
|
|
"logps/rejected": -163.578125,
|
|
"logps/weighted_chosen": -0.655651867389679,
|
|
"logps/weighted_rejected": -0.7443176507949829,
|
|
"loss": 0.7056,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -102.30839538574219,
|
|
"rewards/margins": 14.767578125,
|
|
"rewards/rejected": -117.12773132324219,
|
|
"rewards/weighted_accuracies": 0.5562499761581421,
|
|
"rewards/weighted_chosen": -0.716845691204071,
|
|
"rewards/weighted_margins": 0.19047698378562927,
|
|
"rewards/weighted_rejected": -0.9073394536972046,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.2832520542454406,
|
|
"grad_norm": 37.720340728759766,
|
|
"learning_rate": 9.021933593891841e-07,
|
|
"logits/chosen": -0.08947906643152237,
|
|
"logits/rejected": -0.060358427464962006,
|
|
"logps/chosen": -156.99063110351562,
|
|
"logps/rejected": -160.6953125,
|
|
"logps/weighted_chosen": -0.643658459186554,
|
|
"logps/weighted_rejected": -0.7408691644668579,
|
|
"loss": 0.6914,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -111.7953109741211,
|
|
"rewards/margins": 9.351758003234863,
|
|
"rewards/rejected": -121.13594055175781,
|
|
"rewards/weighted_accuracies": 0.5687500238418579,
|
|
"rewards/weighted_chosen": -0.7329131960868835,
|
|
"rewards/weighted_margins": 0.17979125678539276,
|
|
"rewards/weighted_rejected": -0.9128783941268921,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.28859643262743,
|
|
"grad_norm": 14.224921226501465,
|
|
"learning_rate": 8.965820112047629e-07,
|
|
"logits/chosen": -0.09932632744312286,
|
|
"logits/rejected": -0.08042526245117188,
|
|
"logps/chosen": -167.265625,
|
|
"logps/rejected": -178.6164093017578,
|
|
"logps/weighted_chosen": -0.663098156452179,
|
|
"logps/weighted_rejected": -0.736401379108429,
|
|
"loss": 0.6642,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -116.47245788574219,
|
|
"rewards/margins": 15.925390243530273,
|
|
"rewards/rejected": -132.3351593017578,
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
|
"rewards/weighted_chosen": -0.7144225835800171,
|
|
"rewards/weighted_margins": 0.17554016411304474,
|
|
"rewards/weighted_rejected": -0.889788806438446,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.2939408110094195,
|
|
"grad_norm": 25.44819450378418,
|
|
"learning_rate": 8.908326449388213e-07,
|
|
"logits/chosen": -0.09075965732336044,
|
|
"logits/rejected": -0.0493927001953125,
|
|
"logps/chosen": -160.14608764648438,
|
|
"logps/rejected": -166.7140655517578,
|
|
"logps/weighted_chosen": -0.66522216796875,
|
|
"logps/weighted_rejected": -0.734057605266571,
|
|
"loss": 0.6806,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -115.48554992675781,
|
|
"rewards/margins": 15.63916015625,
|
|
"rewards/rejected": -131.1242218017578,
|
|
"rewards/weighted_accuracies": 0.590624988079071,
|
|
"rewards/weighted_chosen": -0.7069762945175171,
|
|
"rewards/weighted_margins": 0.17700500786304474,
|
|
"rewards/weighted_rejected": -0.8841186761856079,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.2992851893914089,
|
|
"grad_norm": 16.921142578125,
|
|
"learning_rate": 8.849472614801527e-07,
|
|
"logits/chosen": -0.09975433349609375,
|
|
"logits/rejected": -0.07463760673999786,
|
|
"logps/chosen": -165.10311889648438,
|
|
"logps/rejected": -171.703125,
|
|
"logps/weighted_chosen": -0.670971691608429,
|
|
"logps/weighted_rejected": -0.7793823480606079,
|
|
"loss": 0.6625,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -121.6898422241211,
|
|
"rewards/margins": 17.183399200439453,
|
|
"rewards/rejected": -138.83261108398438,
|
|
"rewards/weighted_accuracies": 0.59375,
|
|
"rewards/weighted_chosen": -0.7858215570449829,
|
|
"rewards/weighted_margins": 0.20911255478858948,
|
|
"rewards/weighted_rejected": -0.994738757610321,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.30462956777339834,
|
|
"grad_norm": 25.59569549560547,
|
|
"learning_rate": 8.789279090541208e-07,
|
|
"logits/chosen": -0.12498245388269424,
|
|
"logits/rejected": -0.07875537872314453,
|
|
"logps/chosen": -168.8429718017578,
|
|
"logps/rejected": -182.640625,
|
|
"logps/weighted_chosen": -0.729663074016571,
|
|
"logps/weighted_rejected": -0.8487914800643921,
|
|
"loss": 0.7176,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -128.54061889648438,
|
|
"rewards/margins": 19.065624237060547,
|
|
"rewards/rejected": -147.580078125,
|
|
"rewards/weighted_accuracies": 0.565625011920929,
|
|
"rewards/weighted_chosen": -0.860760509967804,
|
|
"rewards/weighted_margins": 0.2542465329170227,
|
|
"rewards/weighted_rejected": -1.1147247552871704,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.3099739461553878,
|
|
"grad_norm": 29.514760971069336,
|
|
"learning_rate": 8.72776682509837e-07,
|
|
"logits/chosen": -0.06974849849939346,
|
|
"logits/rejected": -0.010189438238739967,
|
|
"logps/chosen": -165.45547485351562,
|
|
"logps/rejected": -175.70938110351562,
|
|
"logps/weighted_chosen": -0.70281982421875,
|
|
"logps/weighted_rejected": -0.855236828327179,
|
|
"loss": 0.6413,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -129.60879516601562,
|
|
"rewards/margins": 22.657421112060547,
|
|
"rewards/rejected": -152.33944702148438,
|
|
"rewards/weighted_accuracies": 0.6156250238418579,
|
|
"rewards/weighted_chosen": -0.8432861566543579,
|
|
"rewards/weighted_margins": 0.32232969999313354,
|
|
"rewards/weighted_rejected": -1.1654784679412842,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.31531832453737724,
|
|
"grad_norm": 30.710643768310547,
|
|
"learning_rate": 8.664957225911138e-07,
|
|
"logits/chosen": -0.06455497443675995,
|
|
"logits/rejected": -0.048407744616270065,
|
|
"logps/chosen": -178.4812469482422,
|
|
"logps/rejected": -201.52969360351562,
|
|
"logps/weighted_chosen": -0.7231200933456421,
|
|
"logps/weighted_rejected": -0.8162475824356079,
|
|
"loss": 0.6554,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -144.26171875,
|
|
"rewards/margins": 14.5029296875,
|
|
"rewards/rejected": -158.80703735351562,
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
|
"rewards/weighted_chosen": -0.8793884515762329,
|
|
"rewards/weighted_margins": 0.18951721489429474,
|
|
"rewards/weighted_rejected": -1.0688660144805908,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.3206627029193667,
|
|
"grad_norm": 12.866159439086914,
|
|
"learning_rate": 8.600872151914451e-07,
|
|
"logits/chosen": -0.011664772406220436,
|
|
"logits/rejected": 0.0007385254139080644,
|
|
"logps/chosen": -169.0890655517578,
|
|
"logps/rejected": -180.8078155517578,
|
|
"logps/weighted_chosen": -0.693652331829071,
|
|
"logps/weighted_rejected": -0.879650890827179,
|
|
"loss": 0.6483,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -140.900390625,
|
|
"rewards/margins": 15.367383003234863,
|
|
"rewards/rejected": -156.2814483642578,
|
|
"rewards/weighted_accuracies": 0.612500011920929,
|
|
"rewards/weighted_chosen": -0.8526153564453125,
|
|
"rewards/weighted_margins": 0.34896546602249146,
|
|
"rewards/weighted_rejected": -1.201562523841858,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.32600708130135614,
|
|
"grad_norm": 19.085651397705078,
|
|
"learning_rate": 8.535533905932737e-07,
|
|
"logits/chosen": 0.02297821082174778,
|
|
"logits/rejected": 0.06475830078125,
|
|
"logps/chosen": -190.3406219482422,
|
|
"logps/rejected": -203.453125,
|
|
"logps/weighted_chosen": -0.7635498046875,
|
|
"logps/weighted_rejected": -0.876513659954071,
|
|
"loss": 0.6785,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -164.71640014648438,
|
|
"rewards/margins": 23.290233612060547,
|
|
"rewards/rejected": -187.9656219482422,
|
|
"rewards/weighted_accuracies": 0.5687500238418579,
|
|
"rewards/weighted_chosen": -0.9807373285293579,
|
|
"rewards/weighted_margins": 0.23782959580421448,
|
|
"rewards/weighted_rejected": -1.21783447265625,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.33135145968334556,
|
|
"grad_norm": 32.081443786621094,
|
|
"learning_rate": 8.468965226918105e-07,
|
|
"logits/chosen": -0.011585617437958717,
|
|
"logits/rejected": 0.016324615105986595,
|
|
"logps/chosen": -189.3132781982422,
|
|
"logps/rejected": -203.7937469482422,
|
|
"logps/weighted_chosen": -0.7615417242050171,
|
|
"logps/weighted_rejected": -0.82696533203125,
|
|
"loss": 0.7265,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -167.82070922851562,
|
|
"rewards/margins": 21.9140625,
|
|
"rewards/rejected": -189.74276733398438,
|
|
"rewards/weighted_accuracies": 0.581250011920929,
|
|
"rewards/weighted_chosen": -1.022314429283142,
|
|
"rewards/weighted_margins": 0.10553588718175888,
|
|
"rewards/weighted_rejected": -1.127905249595642,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.33669583806533504,
|
|
"grad_norm": 21.689685821533203,
|
|
"learning_rate": 8.40118928203676e-07,
|
|
"logits/chosen": 0.06978149712085724,
|
|
"logits/rejected": 0.07966003566980362,
|
|
"logps/chosen": -171.18905639648438,
|
|
"logps/rejected": -187.87344360351562,
|
|
"logps/weighted_chosen": -0.8035522699356079,
|
|
"logps/weighted_rejected": -0.938281238079071,
|
|
"loss": 0.6889,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -159.8556671142578,
|
|
"rewards/margins": 22.441015243530273,
|
|
"rewards/rejected": -182.2810516357422,
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
|
"rewards/weighted_chosen": -1.125823974609375,
|
|
"rewards/weighted_margins": 0.22769927978515625,
|
|
"rewards/weighted_rejected": -1.353857398033142,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.34204021644732446,
|
|
"grad_norm": 15.847578048706055,
|
|
"learning_rate": 8.332229658606382e-07,
|
|
"logits/chosen": 0.009455871768295765,
|
|
"logits/rejected": 0.03966980054974556,
|
|
"logps/chosen": -200.93984985351562,
|
|
"logps/rejected": -211.3125,
|
|
"logps/weighted_chosen": -0.783465564250946,
|
|
"logps/weighted_rejected": -0.895214855670929,
|
|
"loss": 0.6535,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -177.21133422851562,
|
|
"rewards/margins": 26.989063262939453,
|
|
"rewards/rejected": -204.1365203857422,
|
|
"rewards/weighted_accuracies": 0.637499988079071,
|
|
"rewards/weighted_chosen": -1.0060241222381592,
|
|
"rewards/weighted_margins": 0.2687225341796875,
|
|
"rewards/weighted_rejected": -1.274450659751892,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.34738459482931394,
|
|
"grad_norm": 22.713172912597656,
|
|
"learning_rate": 8.262110355887302e-07,
|
|
"logits/chosen": 0.028325652703642845,
|
|
"logits/rejected": 0.06662559509277344,
|
|
"logps/chosen": -189.8406219482422,
|
|
"logps/rejected": -196.30859375,
|
|
"logps/weighted_chosen": -0.7696533203125,
|
|
"logps/weighted_rejected": -0.8599487543106079,
|
|
"loss": 0.7008,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -176.921875,
|
|
"rewards/margins": 18.607227325439453,
|
|
"rewards/rejected": -195.5730438232422,
|
|
"rewards/weighted_accuracies": 0.6000000238418579,
|
|
"rewards/weighted_chosen": -1.030175805091858,
|
|
"rewards/weighted_margins": 0.17038574814796448,
|
|
"rewards/weighted_rejected": -1.2006652355194092,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.35272897321130336,
|
|
"grad_norm": 21.770971298217773,
|
|
"learning_rate": 8.190855776730293e-07,
|
|
"logits/chosen": -0.02209014818072319,
|
|
"logits/rejected": 0.02955322340130806,
|
|
"logps/chosen": -191.64297485351562,
|
|
"logps/rejected": -206.6015625,
|
|
"logps/weighted_chosen": -0.783374011516571,
|
|
"logps/weighted_rejected": -0.9095824956893921,
|
|
"loss": 0.6596,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -177.8122100830078,
|
|
"rewards/margins": 24.329492568969727,
|
|
"rewards/rejected": -202.0851593017578,
|
|
"rewards/weighted_accuracies": 0.612500011920929,
|
|
"rewards/weighted_chosen": -1.0215880870819092,
|
|
"rewards/weighted_margins": 0.25640565156936646,
|
|
"rewards/weighted_rejected": -1.2780640125274658,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.3580733515932928,
|
|
"grad_norm": 27.83735466003418,
|
|
"learning_rate": 8.118490719083917e-07,
|
|
"logits/chosen": -0.04197654873132706,
|
|
"logits/rejected": -0.011433410458266735,
|
|
"logps/chosen": -217.96249389648438,
|
|
"logps/rejected": -221.2265625,
|
|
"logps/weighted_chosen": -0.8158324956893921,
|
|
"logps/weighted_rejected": -0.9188903570175171,
|
|
"loss": 0.6605,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": -213.95703125,
|
|
"rewards/margins": 13.51513671875,
|
|
"rewards/rejected": -227.47109985351562,
|
|
"rewards/weighted_accuracies": 0.574999988079071,
|
|
"rewards/weighted_chosen": -1.0736420154571533,
|
|
"rewards/weighted_margins": 0.30453795194625854,
|
|
"rewards/weighted_rejected": -1.37799072265625,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.36341772997528227,
|
|
"grad_norm": 63.59722137451172,
|
|
"learning_rate": 8.045040367364368e-07,
|
|
"logits/chosen": 0.06599731743335724,
|
|
"logits/rejected": 0.10958633571863174,
|
|
"logps/chosen": -183.30899047851562,
|
|
"logps/rejected": -192.22030639648438,
|
|
"logps/weighted_chosen": -0.8281921148300171,
|
|
"logps/weighted_rejected": -0.978344738483429,
|
|
"loss": 0.6439,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -185.7732391357422,
|
|
"rewards/margins": 25.359766006469727,
|
|
"rewards/rejected": -211.1357421875,
|
|
"rewards/weighted_accuracies": 0.671875,
|
|
"rewards/weighted_chosen": -1.1530334949493408,
|
|
"rewards/weighted_margins": 0.34730225801467896,
|
|
"rewards/weighted_rejected": -1.500341773033142,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.3687621083572717,
|
|
"grad_norm": 11.727298736572266,
|
|
"learning_rate": 7.970530283690819e-07,
|
|
"logits/chosen": 0.10012397915124893,
|
|
"logits/rejected": 0.14632339775562286,
|
|
"logps/chosen": -193.36328125,
|
|
"logps/rejected": -209.76406860351562,
|
|
"logps/weighted_chosen": -0.856884777545929,
|
|
"logps/weighted_rejected": -0.98321533203125,
|
|
"loss": 0.6659,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -201.2302703857422,
|
|
"rewards/margins": 32.98652267456055,
|
|
"rewards/rejected": -234.34707641601562,
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
|
"rewards/weighted_chosen": -1.2462646961212158,
|
|
"rewards/weighted_margins": 0.27206724882125854,
|
|
"rewards/weighted_rejected": -1.5183227062225342,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.3741064867392611,
|
|
"grad_norm": 16.15506362915039,
|
|
"learning_rate": 7.89498639898931e-07,
|
|
"logits/chosen": 0.17198029160499573,
|
|
"logits/rejected": 0.22177276015281677,
|
|
"logps/chosen": -201.234375,
|
|
"logps/rejected": -211.76718139648438,
|
|
"logps/weighted_chosen": -0.888415515422821,
|
|
"logps/weighted_rejected": -1.013879418373108,
|
|
"loss": 0.6659,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -227.2578125,
|
|
"rewards/margins": 29.7451171875,
|
|
"rewards/rejected": -256.93048095703125,
|
|
"rewards/weighted_accuracies": 0.581250011920929,
|
|
"rewards/weighted_chosen": -1.3159301280975342,
|
|
"rewards/weighted_margins": 0.291015625,
|
|
"rewards/weighted_rejected": -1.6072266101837158,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.3794508651212506,
|
|
"grad_norm": 20.944732666015625,
|
|
"learning_rate": 7.818435003968305e-07,
|
|
"logits/chosen": 0.11227264255285263,
|
|
"logits/rejected": 0.1453804075717926,
|
|
"logps/chosen": -220.72891235351562,
|
|
"logps/rejected": -239.18594360351562,
|
|
"logps/weighted_chosen": -0.891796886920929,
|
|
"logps/weighted_rejected": -1.0748779773712158,
|
|
"loss": 0.6297,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -247.6140594482422,
|
|
"rewards/margins": 40.565818786621094,
|
|
"rewards/rejected": -288.025390625,
|
|
"rewards/weighted_accuracies": 0.6499999761581421,
|
|
"rewards/weighted_chosen": -1.3091919422149658,
|
|
"rewards/weighted_margins": 0.36735838651657104,
|
|
"rewards/weighted_rejected": -1.67669677734375,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.38479524350324,
|
|
"grad_norm": 16.061363220214844,
|
|
"learning_rate": 7.740902739969008e-07,
|
|
"logits/chosen": 0.08527755737304688,
|
|
"logits/rejected": 0.10508499294519424,
|
|
"logps/chosen": -219.64688110351562,
|
|
"logps/rejected": -228.9499969482422,
|
|
"logps/weighted_chosen": -0.8562988042831421,
|
|
"logps/weighted_rejected": -0.9676269292831421,
|
|
"loss": 0.6551,
|
|
"rewards/accuracies": 0.596875011920929,
|
|
"rewards/chosen": -247.7687530517578,
|
|
"rewards/margins": 23.225780487060547,
|
|
"rewards/rejected": -270.9765625,
|
|
"rewards/weighted_accuracies": 0.5874999761581421,
|
|
"rewards/weighted_chosen": -1.203149437904358,
|
|
"rewards/weighted_margins": 0.285797119140625,
|
|
"rewards/weighted_rejected": -1.489160180091858,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.3901396218852295,
|
|
"grad_norm": 31.3476505279541,
|
|
"learning_rate": 7.662416589693695e-07,
|
|
"logits/chosen": 0.127583310008049,
|
|
"logits/rejected": 0.17155151069164276,
|
|
"logps/chosen": -201.3367156982422,
|
|
"logps/rejected": -212.42422485351562,
|
|
"logps/weighted_chosen": -0.8604370355606079,
|
|
"logps/weighted_rejected": -1.0564453601837158,
|
|
"loss": 0.6553,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": -219.3408203125,
|
|
"rewards/margins": 29.0703125,
|
|
"rewards/rejected": -248.494140625,
|
|
"rewards/weighted_accuracies": 0.684374988079071,
|
|
"rewards/weighted_chosen": -1.220678687095642,
|
|
"rewards/weighted_margins": 0.39063721895217896,
|
|
"rewards/weighted_rejected": -1.611718773841858,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.3954840002672189,
|
|
"grad_norm": 12.7977294921875,
|
|
"learning_rate": 7.583003867815192e-07,
|
|
"logits/chosen": 0.09622497856616974,
|
|
"logits/rejected": 0.12804412841796875,
|
|
"logps/chosen": -228.9718780517578,
|
|
"logps/rejected": -242.92031860351562,
|
|
"logps/weighted_chosen": -0.877368152141571,
|
|
"logps/weighted_rejected": -0.99322509765625,
|
|
"loss": 0.6549,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -245.8718719482422,
|
|
"rewards/margins": 29.608789443969727,
|
|
"rewards/rejected": -275.35076904296875,
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
|
"rewards/weighted_chosen": -1.26458740234375,
|
|
"rewards/weighted_margins": 0.26112061738967896,
|
|
"rewards/weighted_rejected": -1.525964379310608,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.40082837864920834,
|
|
"grad_norm": 31.35419464111328,
|
|
"learning_rate": 7.502692211470869e-07,
|
|
"logits/chosen": 0.10113067924976349,
|
|
"logits/rejected": 0.14377517998218536,
|
|
"logps/chosen": -222.765625,
|
|
"logps/rejected": -238.73983764648438,
|
|
"logps/weighted_chosen": -0.8651977777481079,
|
|
"logps/weighted_rejected": -0.9940551519393921,
|
|
"loss": 0.6788,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -250.9093780517578,
|
|
"rewards/margins": 38.716407775878906,
|
|
"rewards/rejected": -289.56915283203125,
|
|
"rewards/weighted_accuracies": 0.565625011920929,
|
|
"rewards/weighted_chosen": -1.2682373523712158,
|
|
"rewards/weighted_margins": 0.230926513671875,
|
|
"rewards/weighted_rejected": -1.499151587486267,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.4061727570311978,
|
|
"grad_norm": 17.88278579711914,
|
|
"learning_rate": 7.421509570644387e-07,
|
|
"logits/chosen": 0.13302917778491974,
|
|
"logits/rejected": 0.17120666801929474,
|
|
"logps/chosen": -220.7859344482422,
|
|
"logps/rejected": -228.9296875,
|
|
"logps/weighted_chosen": -0.8490234613418579,
|
|
"logps/weighted_rejected": -1.0133788585662842,
|
|
"loss": 0.6282,
|
|
"rewards/accuracies": 0.596875011920929,
|
|
"rewards/chosen": -258.48907470703125,
|
|
"rewards/margins": 20.644336700439453,
|
|
"rewards/rejected": -279.12579345703125,
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
|
"rewards/weighted_chosen": -1.235162377357483,
|
|
"rewards/weighted_margins": 0.39671021699905396,
|
|
"rewards/weighted_rejected": -1.631689429283142,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.41151713541318724,
|
|
"grad_norm": 19.770469665527344,
|
|
"learning_rate": 7.339484198438566e-07,
|
|
"logits/chosen": 0.2122901976108551,
|
|
"logits/rejected": 0.268341064453125,
|
|
"logps/chosen": -206.8859405517578,
|
|
"logps/rejected": -226.79452514648438,
|
|
"logps/weighted_chosen": -0.9818481206893921,
|
|
"logps/weighted_rejected": -1.1372802257537842,
|
|
"loss": 0.6596,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -249.8464813232422,
|
|
"rewards/margins": 49.36640548706055,
|
|
"rewards/rejected": -299.32110595703125,
|
|
"rewards/weighted_accuracies": 0.640625,
|
|
"rewards/weighted_chosen": -1.4886353015899658,
|
|
"rewards/weighted_margins": 0.38934630155563354,
|
|
"rewards/weighted_rejected": -1.8777344226837158,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.4168615137951767,
|
|
"grad_norm": 17.136322021484375,
|
|
"learning_rate": 7.256644641242773e-07,
|
|
"logits/chosen": 0.2901626527309418,
|
|
"logits/rejected": 0.3380989134311676,
|
|
"logps/chosen": -231.08633422851562,
|
|
"logps/rejected": -247.6374969482422,
|
|
"logps/weighted_chosen": -1.022985816001892,
|
|
"logps/weighted_rejected": -1.143164038658142,
|
|
"loss": 0.7062,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -293.5406188964844,
|
|
"rewards/margins": 38.568748474121094,
|
|
"rewards/rejected": -331.9984436035156,
|
|
"rewards/weighted_accuracies": 0.59375,
|
|
"rewards/weighted_chosen": -1.6519286632537842,
|
|
"rewards/weighted_margins": 0.23250122368335724,
|
|
"rewards/weighted_rejected": -1.8852417469024658,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.42220589217716614,
|
|
"grad_norm": 16.78958511352539,
|
|
"learning_rate": 7.173019728798234e-07,
|
|
"logits/chosen": 0.22947922348976135,
|
|
"logits/rejected": 0.2762344479560852,
|
|
"logps/chosen": -225.95468139648438,
|
|
"logps/rejected": -246.8957061767578,
|
|
"logps/weighted_chosen": -0.9952758550643921,
|
|
"logps/weighted_rejected": -1.151769995689392,
|
|
"loss": 0.6317,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -281.17462158203125,
|
|
"rewards/margins": 49.26288986206055,
|
|
"rewards/rejected": -330.38671875,
|
|
"rewards/weighted_accuracies": 0.628125011920929,
|
|
"rewards/weighted_chosen": -1.5531127452850342,
|
|
"rewards/weighted_margins": 0.3658691346645355,
|
|
"rewards/weighted_rejected": -1.918725609779358,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.42755027055915557,
|
|
"grad_norm": 26.674007415771484,
|
|
"learning_rate": 7.088638564164738e-07,
|
|
"logits/chosen": 0.2732749879360199,
|
|
"logits/rejected": 0.32427138090133667,
|
|
"logps/chosen": -263.5464782714844,
|
|
"logps/rejected": -295.19061279296875,
|
|
"logps/weighted_chosen": -1.066674828529358,
|
|
"logps/weighted_rejected": -1.1934082508087158,
|
|
"loss": 0.6437,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -355.3199157714844,
|
|
"rewards/margins": 68.63945007324219,
|
|
"rewards/rejected": -424.07501220703125,
|
|
"rewards/weighted_accuracies": 0.6499999761581421,
|
|
"rewards/weighted_chosen": -1.735803246498108,
|
|
"rewards/weighted_margins": 0.3184448182582855,
|
|
"rewards/weighted_rejected": -2.0541014671325684,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.43289464894114504,
|
|
"grad_norm": 53.29134750366211,
|
|
"learning_rate": 7.003530513592215e-07,
|
|
"logits/chosen": 0.30975571274757385,
|
|
"logits/rejected": 0.36716994643211365,
|
|
"logps/chosen": -259.92498779296875,
|
|
"logps/rejected": -295.8374938964844,
|
|
"logps/weighted_chosen": -1.0916259288787842,
|
|
"logps/weighted_rejected": -1.302832007408142,
|
|
"loss": 0.6407,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -362.6820373535156,
|
|
"rewards/margins": 78.9546890258789,
|
|
"rewards/rejected": -441.49298095703125,
|
|
"rewards/weighted_accuracies": 0.6656249761581421,
|
|
"rewards/weighted_chosen": -1.836395263671875,
|
|
"rewards/weighted_margins": 0.42828065156936646,
|
|
"rewards/weighted_rejected": -2.264721632003784,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.43823902732313447,
|
|
"grad_norm": 21.21747589111328,
|
|
"learning_rate": 6.917725196300726e-07,
|
|
"logits/chosen": 0.24695205688476562,
|
|
"logits/rejected": 0.290322482585907,
|
|
"logps/chosen": -282.28790283203125,
|
|
"logps/rejected": -317.53045654296875,
|
|
"logps/weighted_chosen": -1.034826636314392,
|
|
"logps/weighted_rejected": -1.1845214366912842,
|
|
"loss": 0.6366,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -389.09100341796875,
|
|
"rewards/margins": 74.9951171875,
|
|
"rewards/rejected": -464.04376220703125,
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
|
"rewards/weighted_chosen": -1.68133544921875,
|
|
"rewards/weighted_margins": 0.3324523866176605,
|
|
"rewards/weighted_rejected": -2.014007568359375,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.44358340570512395,
|
|
"grad_norm": 25.014272689819336,
|
|
"learning_rate": 6.831252474172411e-07,
|
|
"logits/chosen": 0.2618546485900879,
|
|
"logits/rejected": 0.29917725920677185,
|
|
"logps/chosen": -255.4765625,
|
|
"logps/rejected": -285.0953063964844,
|
|
"logps/weighted_chosen": -1.060217261314392,
|
|
"logps/weighted_rejected": -1.23773193359375,
|
|
"loss": 0.6233,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -353.51092529296875,
|
|
"rewards/margins": 61.749610900878906,
|
|
"rewards/rejected": -415.51873779296875,
|
|
"rewards/weighted_accuracies": 0.628125011920929,
|
|
"rewards/weighted_chosen": -1.6349976062774658,
|
|
"rewards/weighted_margins": 0.495330810546875,
|
|
"rewards/weighted_rejected": -2.130786180496216,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.44892778408711337,
|
|
"grad_norm": 16.74439239501953,
|
|
"learning_rate": 6.74414244135898e-07,
|
|
"logits/chosen": 0.240010067820549,
|
|
"logits/rejected": 0.31660157442092896,
|
|
"logps/chosen": -265.046875,
|
|
"logps/rejected": -296.85626220703125,
|
|
"logps/weighted_chosen": -1.003625512123108,
|
|
"logps/weighted_rejected": -1.165197730064392,
|
|
"loss": 0.6336,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -371.21112060546875,
|
|
"rewards/margins": 74.8984375,
|
|
"rewards/rejected": -446.146484375,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -1.5787353515625,
|
|
"rewards/weighted_margins": 0.36824339628219604,
|
|
"rewards/weighted_rejected": -1.946313500404358,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.4542721624691028,
|
|
"grad_norm": 20.773204803466797,
|
|
"learning_rate": 6.656425413808388e-07,
|
|
"logits/chosen": 0.2946815490722656,
|
|
"logits/rejected": 0.31943321228027344,
|
|
"logps/chosen": -284.6898498535156,
|
|
"logps/rejected": -300.85858154296875,
|
|
"logps/weighted_chosen": -1.0427734851837158,
|
|
"logps/weighted_rejected": -1.2700927257537842,
|
|
"loss": 0.6239,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -416.8919982910156,
|
|
"rewards/margins": 38.0087890625,
|
|
"rewards/rejected": -454.7835998535156,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -1.7086913585662842,
|
|
"rewards/weighted_margins": 0.4794067442417145,
|
|
"rewards/weighted_rejected": -2.187939405441284,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.45961654085109227,
|
|
"grad_norm": 14.845000267028809,
|
|
"learning_rate": 6.568131918714294e-07,
|
|
"logits/chosen": 0.3231925964355469,
|
|
"logits/rejected": 0.36410826444625854,
|
|
"logps/chosen": -282.16876220703125,
|
|
"logps/rejected": -317.7523498535156,
|
|
"logps/weighted_chosen": -1.077661156654358,
|
|
"logps/weighted_rejected": -1.253027319908142,
|
|
"loss": 0.6359,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -413.7085876464844,
|
|
"rewards/margins": 79.2416000366211,
|
|
"rewards/rejected": -492.9193420410156,
|
|
"rewards/weighted_accuracies": 0.653124988079071,
|
|
"rewards/weighted_chosen": -1.803625464439392,
|
|
"rewards/weighted_margins": 0.37888795137405396,
|
|
"rewards/weighted_rejected": -2.1824707984924316,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.4649609192330817,
|
|
"grad_norm": 23.40928077697754,
|
|
"learning_rate": 6.47929268389203e-07,
|
|
"logits/chosen": 0.48909300565719604,
|
|
"logits/rejected": 0.5615142583847046,
|
|
"logps/chosen": -364.40155029296875,
|
|
"logps/rejected": -441.33123779296875,
|
|
"logps/weighted_chosen": -1.275048851966858,
|
|
"logps/weighted_rejected": -1.454126000404358,
|
|
"loss": 0.6207,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -599.5953369140625,
|
|
"rewards/margins": 173.45858764648438,
|
|
"rewards/rejected": -773.1968994140625,
|
|
"rewards/weighted_accuracies": 0.65625,
|
|
"rewards/weighted_chosen": -2.249072313308716,
|
|
"rewards/weighted_margins": 0.4446777403354645,
|
|
"rewards/weighted_rejected": -2.6942381858825684,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.47030529761507117,
|
|
"grad_norm": 18.79306411743164,
|
|
"learning_rate": 6.389938627084732e-07,
|
|
"logits/chosen": 0.577728271484375,
|
|
"logits/rejected": 0.6453384160995483,
|
|
"logps/chosen": -411.82342529296875,
|
|
"logps/rejected": -475.7250061035156,
|
|
"logps/weighted_chosen": -1.4719970226287842,
|
|
"logps/weighted_rejected": -1.6781737804412842,
|
|
"loss": 0.6194,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -739.5625,
|
|
"rewards/margins": 146.572265625,
|
|
"rewards/rejected": -886.0562744140625,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -2.7164063453674316,
|
|
"rewards/weighted_margins": 0.531750500202179,
|
|
"rewards/weighted_rejected": -3.248828172683716,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.4756496759970606,
|
|
"grad_norm": 22.009668350219727,
|
|
"learning_rate": 6.300100845203373e-07,
|
|
"logits/chosen": 0.5401207208633423,
|
|
"logits/rejected": 0.5778656005859375,
|
|
"logps/chosen": -414.01953125,
|
|
"logps/rejected": -455.6171875,
|
|
"logps/weighted_chosen": -1.2978026866912842,
|
|
"logps/weighted_rejected": -1.476660132408142,
|
|
"loss": 0.6357,
|
|
"rewards/accuracies": 0.559374988079071,
|
|
"rewards/chosen": -709.7568359375,
|
|
"rewards/margins": 100.93086242675781,
|
|
"rewards/rejected": -810.8453369140625,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -2.32794189453125,
|
|
"rewards/weighted_margins": 0.41016846895217896,
|
|
"rewards/weighted_rejected": -2.738818407058716,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.48099405437905,
|
|
"grad_norm": 21.886869430541992,
|
|
"learning_rate": 6.20981060350445e-07,
|
|
"logits/chosen": 0.39915162324905396,
|
|
"logits/rejected": 0.4594573974609375,
|
|
"logps/chosen": -342.47186279296875,
|
|
"logps/rejected": -374.34063720703125,
|
|
"logps/weighted_chosen": -1.194909691810608,
|
|
"logps/weighted_rejected": -1.406274437904358,
|
|
"loss": 0.5957,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -555.0218505859375,
|
|
"rewards/margins": 85.990234375,
|
|
"rewards/rejected": -640.8515625,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -2.103381395339966,
|
|
"rewards/weighted_margins": 0.46428221464157104,
|
|
"rewards/weighted_rejected": -2.5683836936950684,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.4863384327610395,
|
|
"grad_norm": 16.282630920410156,
|
|
"learning_rate": 6.119099324709087e-07,
|
|
"logits/chosen": 0.3816207945346832,
|
|
"logits/rejected": 0.4475570619106293,
|
|
"logps/chosen": -316.72186279296875,
|
|
"logps/rejected": -365.28045654296875,
|
|
"logps/weighted_chosen": -1.1351439952850342,
|
|
"logps/weighted_rejected": -1.284033179283142,
|
|
"loss": 0.6351,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -497.158203125,
|
|
"rewards/margins": 119.31523132324219,
|
|
"rewards/rejected": -616.5943603515625,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -1.967187523841858,
|
|
"rewards/weighted_margins": 0.3160339295864105,
|
|
"rewards/weighted_rejected": -2.284106492996216,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.4916828111430289,
|
|
"grad_norm": 18.44144630432129,
|
|
"learning_rate": 6.027998578067316e-07,
|
|
"logits/chosen": 0.4347885251045227,
|
|
"logits/rejected": 0.48958054184913635,
|
|
"logps/chosen": -366.1976623535156,
|
|
"logps/rejected": -429.29998779296875,
|
|
"logps/weighted_chosen": -1.150183081626892,
|
|
"logps/weighted_rejected": -1.3400390148162842,
|
|
"loss": 0.6143,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -597.3343505859375,
|
|
"rewards/margins": 151.3175811767578,
|
|
"rewards/rejected": -748.48046875,
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
|
"rewards/weighted_chosen": -1.9908447265625,
|
|
"rewards/weighted_margins": 0.4314208924770355,
|
|
"rewards/weighted_rejected": -2.4218993186950684,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.4970271895250184,
|
|
"grad_norm": 25.489429473876953,
|
|
"learning_rate": 5.936540068371394e-07,
|
|
"logits/chosen": 0.45708006620407104,
|
|
"logits/rejected": 0.5000015497207642,
|
|
"logps/chosen": -349.76251220703125,
|
|
"logps/rejected": -390.9984436035156,
|
|
"logps/weighted_chosen": -1.220849633216858,
|
|
"logps/weighted_rejected": -1.4126708507537842,
|
|
"loss": 0.6245,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -577.7796630859375,
|
|
"rewards/margins": 97.06953430175781,
|
|
"rewards/rejected": -675.0695190429688,
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
|
"rewards/weighted_chosen": -2.1529297828674316,
|
|
"rewards/weighted_margins": 0.42708128690719604,
|
|
"rewards/weighted_rejected": -2.580029249191284,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.5023715679070078,
|
|
"grad_norm": 19.472640991210938,
|
|
"learning_rate": 5.844755624921918e-07,
|
|
"logits/chosen": 0.5162017941474915,
|
|
"logits/rejected": 0.609728991985321,
|
|
"logps/chosen": -363.6421813964844,
|
|
"logps/rejected": -415.4140625,
|
|
"logps/weighted_chosen": -1.33294677734375,
|
|
"logps/weighted_rejected": -1.4788939952850342,
|
|
"loss": 0.6754,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -618.7273559570312,
|
|
"rewards/margins": 130.2453155517578,
|
|
"rewards/rejected": -749.0703125,
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
|
"rewards/weighted_chosen": -2.408947706222534,
|
|
"rewards/weighted_margins": 0.34906005859375,
|
|
"rewards/weighted_rejected": -2.757458448410034,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.5077159462889973,
|
|
"grad_norm": 32.006065368652344,
|
|
"learning_rate": 5.752677190450634e-07,
|
|
"logits/chosen": 0.688891589641571,
|
|
"logits/rejected": 0.7397063970565796,
|
|
"logps/chosen": -482.10235595703125,
|
|
"logps/rejected": -568.703125,
|
|
"logps/weighted_chosen": -1.4970214366912842,
|
|
"logps/weighted_rejected": -1.689294457435608,
|
|
"loss": 0.6354,
|
|
"rewards/accuracies": 0.6156250238418579,
|
|
"rewards/chosen": -910.1663818359375,
|
|
"rewards/margins": 212.23184204101562,
|
|
"rewards/rejected": -1122.731201171875,
|
|
"rewards/weighted_accuracies": 0.6812499761581421,
|
|
"rewards/weighted_chosen": -2.8426513671875,
|
|
"rewards/weighted_margins": 0.47370606660842896,
|
|
"rewards/weighted_rejected": -3.3175048828125,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.5130603246709867,
|
|
"grad_norm": 24.5925235748291,
|
|
"learning_rate": 5.66033681000375e-07,
|
|
"logits/chosen": 0.743194580078125,
|
|
"logits/rejected": 0.8281310796737671,
|
|
"logps/chosen": -493.4140625,
|
|
"logps/rejected": -624.3343505859375,
|
|
"logps/weighted_chosen": -1.50146484375,
|
|
"logps/weighted_rejected": -1.747949242591858,
|
|
"loss": 0.5978,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -935.1187744140625,
|
|
"rewards/margins": 319.94921875,
|
|
"rewards/rejected": -1255.2484130859375,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -2.852978467941284,
|
|
"rewards/weighted_margins": 0.568621814250946,
|
|
"rewards/weighted_rejected": -3.422070264816284,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.5184047030529761,
|
|
"grad_norm": 18.53244400024414,
|
|
"learning_rate": 5.567766619789658e-07,
|
|
"logits/chosen": 0.5608322024345398,
|
|
"logits/rejected": 0.6179229617118835,
|
|
"logps/chosen": -431.1343688964844,
|
|
"logps/rejected": -476.00701904296875,
|
|
"logps/weighted_chosen": -1.348486304283142,
|
|
"logps/weighted_rejected": -1.502197265625,
|
|
"loss": 0.6584,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -772.9921875,
|
|
"rewards/margins": 109.23124694824219,
|
|
"rewards/rejected": -882.5031127929688,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -2.4359130859375,
|
|
"rewards/weighted_margins": 0.3783813416957855,
|
|
"rewards/weighted_rejected": -2.814013719558716,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.5237490814349656,
|
|
"grad_norm": 32.86454772949219,
|
|
"learning_rate": 5.474998835994916e-07,
|
|
"logits/chosen": 0.539044201374054,
|
|
"logits/rejected": 0.6025703549385071,
|
|
"logps/chosen": -401.83905029296875,
|
|
"logps/rejected": -486.6234436035156,
|
|
"logps/weighted_chosen": -1.262475609779358,
|
|
"logps/weighted_rejected": -1.458227515220642,
|
|
"loss": 0.6286,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -694.048828125,
|
|
"rewards/margins": 198.5105438232422,
|
|
"rewards/rejected": -892.8984375,
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
|
"rewards/weighted_chosen": -2.239306688308716,
|
|
"rewards/weighted_margins": 0.41253662109375,
|
|
"rewards/weighted_rejected": -2.6525635719299316,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.529093459816955,
|
|
"grad_norm": 29.745466232299805,
|
|
"learning_rate": 5.382065743572411e-07,
|
|
"logits/chosen": 0.5060653686523438,
|
|
"logits/rejected": 0.6303970217704773,
|
|
"logps/chosen": -385.96405029296875,
|
|
"logps/rejected": -484.296875,
|
|
"logps/weighted_chosen": -1.272680640220642,
|
|
"logps/weighted_rejected": -1.508935570716858,
|
|
"loss": 0.5645,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": -660.4656372070312,
|
|
"rewards/margins": 236.1494140625,
|
|
"rewards/rejected": -896.5562744140625,
|
|
"rewards/weighted_accuracies": 0.7250000238418579,
|
|
"rewards/weighted_chosen": -2.2663817405700684,
|
|
"rewards/weighted_margins": 0.583996593952179,
|
|
"rewards/weighted_rejected": -2.849902391433716,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.5344378381989445,
|
|
"grad_norm": 23.71160125732422,
|
|
"learning_rate": 5.288999685005571e-07,
|
|
"logits/chosen": 0.6983550786972046,
|
|
"logits/rejected": 0.7909576296806335,
|
|
"logps/chosen": -492.20623779296875,
|
|
"logps/rejected": -586.3453369140625,
|
|
"logps/weighted_chosen": -1.4745604991912842,
|
|
"logps/weighted_rejected": -1.7019531726837158,
|
|
"loss": 0.5934,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -929.5031127929688,
|
|
"rewards/margins": 230.67343139648438,
|
|
"rewards/rejected": -1159.8773193359375,
|
|
"rewards/weighted_accuracies": 0.671875,
|
|
"rewards/weighted_chosen": -2.7806153297424316,
|
|
"rewards/weighted_margins": 0.5472351312637329,
|
|
"rewards/weighted_rejected": -3.3272705078125,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.5344378381989445,
|
|
"eval_logits/chosen": 0.8178273439407349,
|
|
"eval_logits/rejected": 0.9030020833015442,
|
|
"eval_logps/chosen": -545.8350219726562,
|
|
"eval_logps/rejected": -631.2993774414062,
|
|
"eval_logps/weighted_chosen": -1.671688437461853,
|
|
"eval_logps/weighted_rejected": -1.9300999641418457,
|
|
"eval_loss": 0.6100515723228455,
|
|
"eval_rewards/accuracies": 0.612525463104248,
|
|
"eval_rewards/chosen": -1062.7760009765625,
|
|
"eval_rewards/margins": 211.23606872558594,
|
|
"eval_rewards/rejected": -1274.2708740234375,
|
|
"eval_rewards/weighted_accuracies": 0.6924643516540527,
|
|
"eval_rewards/weighted_chosen": -3.270533561706543,
|
|
"eval_rewards/weighted_margins": 0.5912336111068726,
|
|
"eval_rewards/weighted_rejected": -3.861767292022705,
|
|
"eval_runtime": 1069.2656,
|
|
"eval_samples_per_second": 1.834,
|
|
"eval_steps_per_second": 0.459,
|
|
"step": 1000
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1872,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|