Model: W-61/llama-3-8b-base-new-dpo-hh-harmless-s_star1.0-4xh200-batch-64-20260421-213851 Source: Original Platform
2631 lines
96 KiB
JSON
2631 lines
96 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 200,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0013532638549804688,
|
|
"fcm_dpo/q_t": 0.5000336766242981,
|
|
"grad_norm": 28.219865798950195,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 0.13337239623069763,
|
|
"logits/rejected": 0.12492948770523071,
|
|
"logps/chosen": -64.5841293334961,
|
|
"logps/ref_chosen": -64.61280822753906,
|
|
"logps/ref_rejected": -64.17195129394531,
|
|
"logps/rejected": -64.14192199707031,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.0013527870178222656,
|
|
"margin_dpo/margin_std": 0.2561596930027008,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.017816998064517975,
|
|
"fcm_dpo/q_t": 0.49955499172210693,
|
|
"grad_norm": 29.562381744384766,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 0.09712683409452438,
|
|
"logits/rejected": 0.06875293701887131,
|
|
"logps/chosen": -65.33859252929688,
|
|
"logps/ref_chosen": -65.34695434570312,
|
|
"logps/ref_rejected": -79.315673828125,
|
|
"logps/rejected": -79.32512664794922,
|
|
"loss": 1.3848,
|
|
"margin_dpo/margin_mean": 0.01781691610813141,
|
|
"margin_dpo/margin_std": 0.3064817190170288,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.005693185143172741,
|
|
"fcm_dpo/q_t": 0.4998575747013092,
|
|
"grad_norm": 29.699796676635742,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 0.11204878240823746,
|
|
"logits/rejected": 0.07268079370260239,
|
|
"logps/chosen": -56.658607482910156,
|
|
"logps/ref_chosen": -56.65692901611328,
|
|
"logps/ref_rejected": -80.12786865234375,
|
|
"logps/rejected": -80.13523864746094,
|
|
"loss": 1.386,
|
|
"margin_dpo/margin_mean": 0.005692988634109497,
|
|
"margin_dpo/margin_std": 0.2990571856498718,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03819512203335762,
|
|
"fcm_dpo/q_t": 0.49904537200927734,
|
|
"grad_norm": 33.20173263549805,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 0.08240987360477448,
|
|
"logits/rejected": 0.05311817675828934,
|
|
"logps/chosen": -60.08363723754883,
|
|
"logps/ref_chosen": -60.09392166137695,
|
|
"logps/ref_rejected": -78.99056243896484,
|
|
"logps/rejected": -79.01847839355469,
|
|
"loss": 1.3827,
|
|
"margin_dpo/margin_mean": 0.03819500282406807,
|
|
"margin_dpo/margin_std": 0.31105566024780273,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.020285824313759804,
|
|
"fcm_dpo/q_t": 0.4994930624961853,
|
|
"grad_norm": 29.200054168701172,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 0.09025263786315918,
|
|
"logits/rejected": 0.06288890540599823,
|
|
"logps/chosen": -55.444427490234375,
|
|
"logps/ref_chosen": -55.464561462402344,
|
|
"logps/ref_rejected": -77.40013122558594,
|
|
"logps/rejected": -77.40027618408203,
|
|
"loss": 1.3845,
|
|
"margin_dpo/margin_mean": 0.02028590813279152,
|
|
"margin_dpo/margin_std": 0.30799758434295654,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.026140457019209862,
|
|
"fcm_dpo/q_t": 0.4993467926979065,
|
|
"grad_norm": 29.59698486328125,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 0.10830628871917725,
|
|
"logits/rejected": 0.07654412090778351,
|
|
"logps/chosen": -60.71149826049805,
|
|
"logps/ref_chosen": -60.711814880371094,
|
|
"logps/ref_rejected": -82.71756744384766,
|
|
"logps/rejected": -82.7433853149414,
|
|
"loss": 1.3839,
|
|
"margin_dpo/margin_mean": 0.026140112429857254,
|
|
"margin_dpo/margin_std": 0.30088797211647034,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.005647105164825916,
|
|
"fcm_dpo/q_t": 0.5001412034034729,
|
|
"grad_norm": 30.446046829223633,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 0.10900239646434784,
|
|
"logits/rejected": 0.08392996340990067,
|
|
"logps/chosen": -60.911338806152344,
|
|
"logps/ref_chosen": -60.880210876464844,
|
|
"logps/ref_rejected": -78.44148254394531,
|
|
"logps/rejected": -78.46694946289062,
|
|
"loss": 1.3871,
|
|
"margin_dpo/margin_mean": -0.005647194571793079,
|
|
"margin_dpo/margin_std": 0.3177885413169861,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.02348313294351101,
|
|
"fcm_dpo/q_t": 0.4994131922721863,
|
|
"grad_norm": 27.848325729370117,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 0.06021007150411606,
|
|
"logits/rejected": 0.03433113545179367,
|
|
"logps/chosen": -62.28969192504883,
|
|
"logps/ref_chosen": -62.248138427734375,
|
|
"logps/ref_rejected": -79.56475830078125,
|
|
"logps/rejected": -79.62977600097656,
|
|
"loss": 1.3843,
|
|
"margin_dpo/margin_mean": 0.023483287543058395,
|
|
"margin_dpo/margin_std": 0.3572625517845154,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.05089018866419792,
|
|
"fcm_dpo/q_t": 0.4987284243106842,
|
|
"grad_norm": 31.253028869628906,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 0.10225018113851547,
|
|
"logits/rejected": 0.05802680179476738,
|
|
"logps/chosen": -58.953025817871094,
|
|
"logps/ref_chosen": -58.87812423706055,
|
|
"logps/ref_rejected": -84.22982025146484,
|
|
"logps/rejected": -84.3556137084961,
|
|
"loss": 1.3816,
|
|
"margin_dpo/margin_mean": 0.0508904755115509,
|
|
"margin_dpo/margin_std": 0.358222097158432,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04126746580004692,
|
|
"fcm_dpo/q_t": 0.4989686906337738,
|
|
"grad_norm": 31.914793014526367,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 0.07056603580713272,
|
|
"logits/rejected": 0.04329410195350647,
|
|
"logps/chosen": -66.03207397460938,
|
|
"logps/ref_chosen": -65.88298034667969,
|
|
"logps/ref_rejected": -83.87881469726562,
|
|
"logps/rejected": -84.06917572021484,
|
|
"loss": 1.3826,
|
|
"margin_dpo/margin_mean": 0.04126756638288498,
|
|
"margin_dpo/margin_std": 0.3951026499271393,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09929290413856506,
|
|
"fcm_dpo/q_t": 0.49751925468444824,
|
|
"grad_norm": 27.976877212524414,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 0.07899191230535507,
|
|
"logits/rejected": 0.04546006768941879,
|
|
"logps/chosen": -55.375457763671875,
|
|
"logps/ref_chosen": -55.172386169433594,
|
|
"logps/ref_rejected": -69.63300323486328,
|
|
"logps/rejected": -69.93535614013672,
|
|
"loss": 1.3769,
|
|
"margin_dpo/margin_mean": 0.099293053150177,
|
|
"margin_dpo/margin_std": 0.43157047033309937,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.20769624412059784,
|
|
"fcm_dpo/q_t": 0.494814395904541,
|
|
"grad_norm": 31.212663650512695,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 0.07141193002462387,
|
|
"logits/rejected": 0.03525683656334877,
|
|
"logps/chosen": -57.53242874145508,
|
|
"logps/ref_chosen": -57.193580627441406,
|
|
"logps/ref_rejected": -79.69940948486328,
|
|
"logps/rejected": -80.24595642089844,
|
|
"loss": 1.3664,
|
|
"margin_dpo/margin_mean": 0.20769624412059784,
|
|
"margin_dpo/margin_std": 0.5274912118911743,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2171248197555542,
|
|
"fcm_dpo/q_t": 0.4945871829986572,
|
|
"grad_norm": 29.49753761291504,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 0.11127179861068726,
|
|
"logits/rejected": 0.07682862877845764,
|
|
"logps/chosen": -60.594451904296875,
|
|
"logps/ref_chosen": -60.068870544433594,
|
|
"logps/ref_rejected": -74.41178894042969,
|
|
"logps/rejected": -75.15449523925781,
|
|
"loss": 1.3662,
|
|
"margin_dpo/margin_mean": 0.21712493896484375,
|
|
"margin_dpo/margin_std": 0.7363327741622925,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.29596805572509766,
|
|
"fcm_dpo/q_t": 0.49262505769729614,
|
|
"grad_norm": 30.78485870361328,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 0.15267546474933624,
|
|
"logits/rejected": 0.12211690843105316,
|
|
"logps/chosen": -58.8918342590332,
|
|
"logps/ref_chosen": -58.1558952331543,
|
|
"logps/ref_rejected": -76.06512451171875,
|
|
"logps/rejected": -77.09703063964844,
|
|
"loss": 1.3594,
|
|
"margin_dpo/margin_mean": 0.29596781730651855,
|
|
"margin_dpo/margin_std": 0.9618409276008606,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4171249270439148,
|
|
"fcm_dpo/q_t": 0.489621102809906,
|
|
"grad_norm": 28.725130081176758,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 0.09666416794061661,
|
|
"logits/rejected": 0.06161420792341232,
|
|
"logps/chosen": -68.41984558105469,
|
|
"logps/ref_chosen": -67.35506439208984,
|
|
"logps/ref_rejected": -82.24962615966797,
|
|
"logps/rejected": -83.73153686523438,
|
|
"loss": 1.3485,
|
|
"margin_dpo/margin_mean": 0.41712498664855957,
|
|
"margin_dpo/margin_std": 1.1459109783172607,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5088232755661011,
|
|
"fcm_dpo/q_t": 0.48737573623657227,
|
|
"grad_norm": 26.100767135620117,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 0.1186274066567421,
|
|
"logits/rejected": 0.08292581140995026,
|
|
"logps/chosen": -58.2871208190918,
|
|
"logps/ref_chosen": -56.86763381958008,
|
|
"logps/ref_rejected": -72.56938934326172,
|
|
"logps/rejected": -74.4977035522461,
|
|
"loss": 1.3414,
|
|
"margin_dpo/margin_mean": 0.5088233947753906,
|
|
"margin_dpo/margin_std": 1.4009206295013428,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6476309895515442,
|
|
"fcm_dpo/q_t": 0.48400768637657166,
|
|
"grad_norm": 28.427127838134766,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 0.13891419768333435,
|
|
"logits/rejected": 0.09406773746013641,
|
|
"logps/chosen": -59.4609489440918,
|
|
"logps/ref_chosen": -57.687095642089844,
|
|
"logps/ref_rejected": -78.06813049316406,
|
|
"logps/rejected": -80.48960876464844,
|
|
"loss": 1.3303,
|
|
"margin_dpo/margin_mean": 0.6476308703422546,
|
|
"margin_dpo/margin_std": 1.698277235031128,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0366328954696655,
|
|
"fcm_dpo/q_t": 0.47437912225723267,
|
|
"grad_norm": 26.8502254486084,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 0.15370258688926697,
|
|
"logits/rejected": 0.10728434473276138,
|
|
"logps/chosen": -59.108184814453125,
|
|
"logps/ref_chosen": -56.96040725708008,
|
|
"logps/ref_rejected": -75.22166442871094,
|
|
"logps/rejected": -78.40606689453125,
|
|
"loss": 1.2938,
|
|
"margin_dpo/margin_mean": 1.036632776260376,
|
|
"margin_dpo/margin_std": 1.8020055294036865,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1005299091339111,
|
|
"fcm_dpo/q_t": 0.47299760580062866,
|
|
"grad_norm": 29.18059730529785,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 0.20202812552452087,
|
|
"logits/rejected": 0.15330952405929565,
|
|
"logps/chosen": -60.3327751159668,
|
|
"logps/ref_chosen": -57.41730499267578,
|
|
"logps/ref_rejected": -80.87986755371094,
|
|
"logps/rejected": -84.89588165283203,
|
|
"loss": 1.295,
|
|
"margin_dpo/margin_mean": 1.1005302667617798,
|
|
"margin_dpo/margin_std": 2.45450758934021,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.4387786388397217,
|
|
"fcm_dpo/q_t": 0.46537190675735474,
|
|
"grad_norm": 29.217578887939453,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 0.2133176326751709,
|
|
"logits/rejected": 0.16870227456092834,
|
|
"logps/chosen": -57.59110641479492,
|
|
"logps/ref_chosen": -54.08087158203125,
|
|
"logps/ref_rejected": -76.15860748291016,
|
|
"logps/rejected": -81.10761260986328,
|
|
"loss": 1.2756,
|
|
"margin_dpo/margin_mean": 1.4387786388397217,
|
|
"margin_dpo/margin_std": 3.3348701000213623,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1994415521621704,
|
|
"fcm_dpo/q_t": 0.4720003604888916,
|
|
"grad_norm": 33.0871467590332,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 0.22379426658153534,
|
|
"logits/rejected": 0.19079172611236572,
|
|
"logps/chosen": -68.65206909179688,
|
|
"logps/ref_chosen": -63.875038146972656,
|
|
"logps/ref_rejected": -82.077880859375,
|
|
"logps/rejected": -88.05435180664062,
|
|
"loss": 1.325,
|
|
"margin_dpo/margin_mean": 1.1994414329528809,
|
|
"margin_dpo/margin_std": 4.622773170471191,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 0.12098121643066406,
|
|
"fcm_dpo/delta": 0.44622865319252014,
|
|
"fcm_dpo/margin": 1.7318670749664307,
|
|
"fcm_dpo/q_t": 0.45575252175331116,
|
|
"grad_norm": 46.90412521362305,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 0.22520050406455994,
|
|
"logits/rejected": 0.1741827428340912,
|
|
"logps/chosen": -67.464599609375,
|
|
"logps/ref_chosen": -62.572479248046875,
|
|
"logps/ref_rejected": -80.93415069580078,
|
|
"logps/rejected": -87.5581283569336,
|
|
"loss": 1.2749,
|
|
"margin_dpo/margin_mean": 1.7318668365478516,
|
|
"margin_dpo/margin_std": 4.902867317199707,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 0.21049818396568298,
|
|
"fcm_dpo/delta": 0.4119214117527008,
|
|
"fcm_dpo/margin": 1.4851243495941162,
|
|
"fcm_dpo/q_t": 0.4385649561882019,
|
|
"grad_norm": 121.93978881835938,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 0.1968976855278015,
|
|
"logits/rejected": 0.1738019436597824,
|
|
"logps/chosen": -74.165283203125,
|
|
"logps/ref_chosen": -68.67534637451172,
|
|
"logps/ref_rejected": -78.82028198242188,
|
|
"logps/rejected": -85.79533386230469,
|
|
"loss": 1.3615,
|
|
"margin_dpo/margin_mean": 1.4851243495941162,
|
|
"margin_dpo/margin_std": 5.345554351806641,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 0.29616811871528625,
|
|
"fcm_dpo/delta": 0.38898569345474243,
|
|
"fcm_dpo/margin": 1.868272066116333,
|
|
"fcm_dpo/q_t": 0.40087947249412537,
|
|
"grad_norm": 101.64105987548828,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 0.23364102840423584,
|
|
"logits/rejected": 0.17991967499256134,
|
|
"logps/chosen": -63.528472900390625,
|
|
"logps/ref_chosen": -58.65370559692383,
|
|
"logps/ref_rejected": -81.89688873291016,
|
|
"logps/rejected": -88.63993835449219,
|
|
"loss": 1.2105,
|
|
"margin_dpo/margin_mean": 1.8682724237442017,
|
|
"margin_dpo/margin_std": 4.156613349914551,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 0.3783304691314697,
|
|
"fcm_dpo/delta": 0.16895940899848938,
|
|
"fcm_dpo/margin": 2.0111002922058105,
|
|
"fcm_dpo/q_t": 0.36017656326293945,
|
|
"grad_norm": 91.80081939697266,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 0.2208259552717209,
|
|
"logits/rejected": 0.17146429419517517,
|
|
"logps/chosen": -60.4900016784668,
|
|
"logps/ref_chosen": -56.16423797607422,
|
|
"logps/ref_rejected": -75.87689971923828,
|
|
"logps/rejected": -82.2137680053711,
|
|
"loss": 1.0835,
|
|
"margin_dpo/margin_mean": 2.0111002922058105,
|
|
"margin_dpo/margin_std": 3.2962546348571777,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 0.4266550540924072,
|
|
"fcm_dpo/delta": 0.04714610427618027,
|
|
"fcm_dpo/margin": 2.240276575088501,
|
|
"fcm_dpo/q_t": 0.3378121256828308,
|
|
"grad_norm": 101.21341705322266,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 0.22850540280342102,
|
|
"logits/rejected": 0.1721857488155365,
|
|
"logps/chosen": -64.10456085205078,
|
|
"logps/ref_chosen": -59.744285583496094,
|
|
"logps/ref_rejected": -86.77314758300781,
|
|
"logps/rejected": -93.37370300292969,
|
|
"loss": 0.9694,
|
|
"margin_dpo/margin_mean": 2.24027681350708,
|
|
"margin_dpo/margin_std": 3.2456538677215576,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 0.46494919061660767,
|
|
"fcm_dpo/delta": 0.13100966811180115,
|
|
"fcm_dpo/margin": 1.8871605396270752,
|
|
"fcm_dpo/q_t": 0.352043092250824,
|
|
"grad_norm": 143.7098846435547,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 0.15394529700279236,
|
|
"logits/rejected": 0.12398996204137802,
|
|
"logps/chosen": -68.61878967285156,
|
|
"logps/ref_chosen": -64.15296936035156,
|
|
"logps/ref_rejected": -75.17271423339844,
|
|
"logps/rejected": -81.52568054199219,
|
|
"loss": 1.0946,
|
|
"margin_dpo/margin_mean": 1.8871605396270752,
|
|
"margin_dpo/margin_std": 3.1590871810913086,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 0.4789491593837738,
|
|
"fcm_dpo/delta": -0.02141920104622841,
|
|
"fcm_dpo/margin": 2.124882221221924,
|
|
"fcm_dpo/q_t": 0.326783150434494,
|
|
"grad_norm": 103.89042663574219,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 0.200178861618042,
|
|
"logits/rejected": 0.16137003898620605,
|
|
"logps/chosen": -61.23235321044922,
|
|
"logps/ref_chosen": -57.006690979003906,
|
|
"logps/ref_rejected": -73.71768188476562,
|
|
"logps/rejected": -80.06822204589844,
|
|
"loss": 1.0191,
|
|
"margin_dpo/margin_mean": 2.1248817443847656,
|
|
"margin_dpo/margin_std": 3.1723520755767822,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 0.48734474182128906,
|
|
"fcm_dpo/delta": -0.02495430037379265,
|
|
"fcm_dpo/margin": 2.0965754985809326,
|
|
"fcm_dpo/q_t": 0.3367246687412262,
|
|
"grad_norm": 139.94981384277344,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 0.16469994187355042,
|
|
"logits/rejected": 0.12346775829792023,
|
|
"logps/chosen": -67.96693420410156,
|
|
"logps/ref_chosen": -63.36246871948242,
|
|
"logps/ref_rejected": -79.62621307373047,
|
|
"logps/rejected": -86.32726287841797,
|
|
"loss": 1.0969,
|
|
"margin_dpo/margin_mean": 2.0965757369995117,
|
|
"margin_dpo/margin_std": 3.4793498516082764,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 0.4783070683479309,
|
|
"fcm_dpo/delta": 0.03718746080994606,
|
|
"fcm_dpo/margin": 2.0187315940856934,
|
|
"fcm_dpo/q_t": 0.34199976921081543,
|
|
"grad_norm": 141.78863525390625,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 0.1696348637342453,
|
|
"logits/rejected": 0.15050214529037476,
|
|
"logps/chosen": -69.42279052734375,
|
|
"logps/ref_chosen": -65.01470184326172,
|
|
"logps/ref_rejected": -80.49073791503906,
|
|
"logps/rejected": -86.91755676269531,
|
|
"loss": 1.0997,
|
|
"margin_dpo/margin_mean": 2.0187315940856934,
|
|
"margin_dpo/margin_std": 3.3963191509246826,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 0.4847794473171234,
|
|
"fcm_dpo/delta": -0.09536196291446686,
|
|
"fcm_dpo/margin": 2.245018243789673,
|
|
"fcm_dpo/q_t": 0.31795617938041687,
|
|
"grad_norm": 95.42865753173828,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 0.16661684215068817,
|
|
"logits/rejected": 0.12393312156200409,
|
|
"logps/chosen": -63.37464141845703,
|
|
"logps/ref_chosen": -59.19135284423828,
|
|
"logps/ref_rejected": -74.0339126586914,
|
|
"logps/rejected": -80.46221923828125,
|
|
"loss": 0.9733,
|
|
"margin_dpo/margin_mean": 2.2450177669525146,
|
|
"margin_dpo/margin_std": 3.1093335151672363,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 0.4329656660556793,
|
|
"fcm_dpo/delta": 0.01940056122839451,
|
|
"fcm_dpo/margin": 2.265953302383423,
|
|
"fcm_dpo/q_t": 0.3330842852592468,
|
|
"grad_norm": 119.63710021972656,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 0.17574964463710785,
|
|
"logits/rejected": 0.1448710560798645,
|
|
"logps/chosen": -65.07331848144531,
|
|
"logps/ref_chosen": -60.93949508666992,
|
|
"logps/ref_rejected": -74.51151275634766,
|
|
"logps/rejected": -80.91129302978516,
|
|
"loss": 1.022,
|
|
"margin_dpo/margin_mean": 2.2659528255462646,
|
|
"margin_dpo/margin_std": 3.3525185585021973,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 0.4543333649635315,
|
|
"fcm_dpo/delta": 0.024721205234527588,
|
|
"fcm_dpo/margin": 2.1477513313293457,
|
|
"fcm_dpo/q_t": 0.3333725333213806,
|
|
"grad_norm": 114.04137420654297,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": 0.14524099230766296,
|
|
"logits/rejected": 0.11148606240749359,
|
|
"logps/chosen": -62.98958206176758,
|
|
"logps/ref_chosen": -58.763816833496094,
|
|
"logps/ref_rejected": -74.94743347167969,
|
|
"logps/rejected": -81.32093811035156,
|
|
"loss": 1.0069,
|
|
"margin_dpo/margin_mean": 2.147751569747925,
|
|
"margin_dpo/margin_std": 3.1879055500030518,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 0.43008118867874146,
|
|
"fcm_dpo/delta": -0.13953472673892975,
|
|
"fcm_dpo/margin": 2.613295316696167,
|
|
"fcm_dpo/q_t": 0.3099823296070099,
|
|
"grad_norm": 84.81941986083984,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 0.17675867676734924,
|
|
"logits/rejected": 0.13011161983013153,
|
|
"logps/chosen": -60.25602340698242,
|
|
"logps/ref_chosen": -55.70417022705078,
|
|
"logps/ref_rejected": -76.59439849853516,
|
|
"logps/rejected": -83.7595443725586,
|
|
"loss": 0.9239,
|
|
"margin_dpo/margin_mean": 2.613295793533325,
|
|
"margin_dpo/margin_std": 3.334416151046753,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 0.43538564443588257,
|
|
"fcm_dpo/delta": 0.10406246036291122,
|
|
"fcm_dpo/margin": 2.0714168548583984,
|
|
"fcm_dpo/q_t": 0.34412893652915955,
|
|
"grad_norm": 95.16687774658203,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 0.1478302776813507,
|
|
"logits/rejected": 0.11266426742076874,
|
|
"logps/chosen": -65.52555847167969,
|
|
"logps/ref_chosen": -61.169105529785156,
|
|
"logps/ref_rejected": -77.21674346923828,
|
|
"logps/rejected": -83.64461517333984,
|
|
"loss": 1.0106,
|
|
"margin_dpo/margin_mean": 2.0714163780212402,
|
|
"margin_dpo/margin_std": 3.096672534942627,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 0.4220406115055084,
|
|
"fcm_dpo/delta": -0.11823473125696182,
|
|
"fcm_dpo/margin": 2.6146881580352783,
|
|
"fcm_dpo/q_t": 0.3217321038246155,
|
|
"grad_norm": 106.60182189941406,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 0.19509582221508026,
|
|
"logits/rejected": 0.1433831751346588,
|
|
"logps/chosen": -63.82081985473633,
|
|
"logps/ref_chosen": -59.24176788330078,
|
|
"logps/ref_rejected": -81.80384826660156,
|
|
"logps/rejected": -88.99759674072266,
|
|
"loss": 1.0154,
|
|
"margin_dpo/margin_mean": 2.6146881580352783,
|
|
"margin_dpo/margin_std": 3.812539577484131,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 0.39969393610954285,
|
|
"fcm_dpo/delta": 0.044442176818847656,
|
|
"fcm_dpo/margin": 2.3937482833862305,
|
|
"fcm_dpo/q_t": 0.34041857719421387,
|
|
"grad_norm": 114.9355697631836,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 0.16883400082588196,
|
|
"logits/rejected": 0.1312471628189087,
|
|
"logps/chosen": -68.145751953125,
|
|
"logps/ref_chosen": -63.24883270263672,
|
|
"logps/ref_rejected": -79.00736236572266,
|
|
"logps/rejected": -86.29803466796875,
|
|
"loss": 1.0492,
|
|
"margin_dpo/margin_mean": 2.3937482833862305,
|
|
"margin_dpo/margin_std": 3.709543228149414,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 0.4048680365085602,
|
|
"fcm_dpo/delta": 0.024971742182970047,
|
|
"fcm_dpo/margin": 2.408203601837158,
|
|
"fcm_dpo/q_t": 0.3345295786857605,
|
|
"grad_norm": 101.9957046508789,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 0.19345621764659882,
|
|
"logits/rejected": 0.14936503767967224,
|
|
"logps/chosen": -61.29529571533203,
|
|
"logps/ref_chosen": -56.390625,
|
|
"logps/ref_rejected": -76.81001281738281,
|
|
"logps/rejected": -84.12287902832031,
|
|
"loss": 1.018,
|
|
"margin_dpo/margin_mean": 2.4082038402557373,
|
|
"margin_dpo/margin_std": 3.511875867843628,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 0.41114169359207153,
|
|
"fcm_dpo/delta": -0.07483033090829849,
|
|
"fcm_dpo/margin": 2.591209888458252,
|
|
"fcm_dpo/q_t": 0.31742408871650696,
|
|
"grad_norm": 99.17886352539062,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 0.1815711259841919,
|
|
"logits/rejected": 0.13424037396907806,
|
|
"logps/chosen": -72.9486312866211,
|
|
"logps/ref_chosen": -68.25389099121094,
|
|
"logps/ref_rejected": -86.461181640625,
|
|
"logps/rejected": -93.74713134765625,
|
|
"loss": 0.9814,
|
|
"margin_dpo/margin_mean": 2.591209888458252,
|
|
"margin_dpo/margin_std": 3.6213302612304688,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 0.4059467911720276,
|
|
"fcm_dpo/delta": 0.04542668163776398,
|
|
"fcm_dpo/margin": 2.355367422103882,
|
|
"fcm_dpo/q_t": 0.33451682329177856,
|
|
"grad_norm": 103.90733337402344,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 0.19510746002197266,
|
|
"logits/rejected": 0.17633949220180511,
|
|
"logps/chosen": -67.19489288330078,
|
|
"logps/ref_chosen": -62.1484260559082,
|
|
"logps/ref_rejected": -71.33458709716797,
|
|
"logps/rejected": -78.73640441894531,
|
|
"loss": 1.0004,
|
|
"margin_dpo/margin_mean": 2.3553671836853027,
|
|
"margin_dpo/margin_std": 3.4357311725616455,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 0.40270957350730896,
|
|
"fcm_dpo/delta": -0.10094480216503143,
|
|
"fcm_dpo/margin": 2.705155611038208,
|
|
"fcm_dpo/q_t": 0.31455981731414795,
|
|
"grad_norm": 86.0372085571289,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 0.22762131690979004,
|
|
"logits/rejected": 0.17773905396461487,
|
|
"logps/chosen": -61.629058837890625,
|
|
"logps/ref_chosen": -56.950096130371094,
|
|
"logps/ref_rejected": -78.66989135742188,
|
|
"logps/rejected": -86.05402374267578,
|
|
"loss": 0.9878,
|
|
"margin_dpo/margin_mean": 2.705155611038208,
|
|
"margin_dpo/margin_std": 3.7367305755615234,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 0.44184160232543945,
|
|
"eval_fcm_dpo/delta": 0.019765857607126236,
|
|
"eval_fcm_dpo/margin": 2.217298984527588,
|
|
"eval_fcm_dpo/q_t": 0.34747642278671265,
|
|
"eval_logits/chosen": 0.23132538795471191,
|
|
"eval_logits/rejected": 0.19121667742729187,
|
|
"eval_logps/chosen": -79.51517486572266,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -86.4219970703125,
|
|
"eval_loss": 0.5709094405174255,
|
|
"eval_margin_dpo/margin_mean": 2.217298984527588,
|
|
"eval_margin_dpo/margin_std": 3.880441665649414,
|
|
"eval_runtime": 38.3936,
|
|
"eval_samples_per_second": 59.984,
|
|
"eval_steps_per_second": 1.875,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 0.4148198664188385,
|
|
"fcm_dpo/delta": -0.02567175403237343,
|
|
"fcm_dpo/margin": 2.4526686668395996,
|
|
"fcm_dpo/q_t": 0.3341201841831207,
|
|
"grad_norm": 142.11912536621094,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 0.22209465503692627,
|
|
"logits/rejected": 0.16845160722732544,
|
|
"logps/chosen": -62.80231475830078,
|
|
"logps/ref_chosen": -57.99428176879883,
|
|
"logps/ref_rejected": -83.5367431640625,
|
|
"logps/rejected": -90.79744720458984,
|
|
"loss": 1.0393,
|
|
"margin_dpo/margin_mean": 2.4526686668395996,
|
|
"margin_dpo/margin_std": 3.7842280864715576,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 0.42894989252090454,
|
|
"fcm_dpo/delta": -0.02596813440322876,
|
|
"fcm_dpo/margin": 2.375964879989624,
|
|
"fcm_dpo/q_t": 0.3406800329685211,
|
|
"grad_norm": 91.5486068725586,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 0.2209012508392334,
|
|
"logits/rejected": 0.1760309338569641,
|
|
"logps/chosen": -68.58443450927734,
|
|
"logps/ref_chosen": -63.77195358276367,
|
|
"logps/ref_rejected": -82.56491088867188,
|
|
"logps/rejected": -89.75337219238281,
|
|
"loss": 1.0863,
|
|
"margin_dpo/margin_mean": 2.375964641571045,
|
|
"margin_dpo/margin_std": 3.8247761726379395,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.399940550327301,
|
|
"fcm_dpo/delta": -0.0751124769449234,
|
|
"fcm_dpo/margin": 2.6684412956237793,
|
|
"fcm_dpo/q_t": 0.3200802505016327,
|
|
"grad_norm": 96.38282012939453,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 0.19080176949501038,
|
|
"logits/rejected": 0.13846060633659363,
|
|
"logps/chosen": -64.53096008300781,
|
|
"logps/ref_chosen": -60.27800750732422,
|
|
"logps/ref_rejected": -83.91607666015625,
|
|
"logps/rejected": -90.83747863769531,
|
|
"loss": 0.9493,
|
|
"margin_dpo/margin_mean": 2.6684412956237793,
|
|
"margin_dpo/margin_std": 3.6150002479553223,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.36988887190818787,
|
|
"fcm_dpo/delta": 0.039208363741636276,
|
|
"fcm_dpo/margin": 2.6028714179992676,
|
|
"fcm_dpo/q_t": 0.33981165289878845,
|
|
"grad_norm": 77.6009521484375,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 0.18831773102283478,
|
|
"logits/rejected": 0.13733841478824615,
|
|
"logps/chosen": -65.37849426269531,
|
|
"logps/ref_chosen": -60.88572311401367,
|
|
"logps/ref_rejected": -80.1805191040039,
|
|
"logps/rejected": -87.27616119384766,
|
|
"loss": 1.0103,
|
|
"margin_dpo/margin_mean": 2.6028714179992676,
|
|
"margin_dpo/margin_std": 3.930713653564453,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.3809080719947815,
|
|
"fcm_dpo/delta": -0.03357607498764992,
|
|
"fcm_dpo/margin": 2.6959452629089355,
|
|
"fcm_dpo/q_t": 0.325172483921051,
|
|
"grad_norm": 119.093017578125,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 0.23317813873291016,
|
|
"logits/rejected": 0.17639882862567902,
|
|
"logps/chosen": -65.24988555908203,
|
|
"logps/ref_chosen": -61.02507781982422,
|
|
"logps/ref_rejected": -91.92439270019531,
|
|
"logps/rejected": -98.84513854980469,
|
|
"loss": 0.9308,
|
|
"margin_dpo/margin_mean": 2.6959452629089355,
|
|
"margin_dpo/margin_std": 3.5449512004852295,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.3940550982952118,
|
|
"fcm_dpo/delta": 0.04864387959241867,
|
|
"fcm_dpo/margin": 2.42222261428833,
|
|
"fcm_dpo/q_t": 0.3377589285373688,
|
|
"grad_norm": 103.90534210205078,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 0.2553010582923889,
|
|
"logits/rejected": 0.21687361598014832,
|
|
"logps/chosen": -58.69348907470703,
|
|
"logps/ref_chosen": -54.49797821044922,
|
|
"logps/ref_rejected": -71.96363830566406,
|
|
"logps/rejected": -78.58135986328125,
|
|
"loss": 1.0312,
|
|
"margin_dpo/margin_mean": 2.422222375869751,
|
|
"margin_dpo/margin_std": 3.698529005050659,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.38651371002197266,
|
|
"fcm_dpo/delta": -0.027495551854372025,
|
|
"fcm_dpo/margin": 2.6372780799865723,
|
|
"fcm_dpo/q_t": 0.3264053761959076,
|
|
"grad_norm": 89.52863311767578,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 0.17925067245960236,
|
|
"logits/rejected": 0.1555873155593872,
|
|
"logps/chosen": -67.6564712524414,
|
|
"logps/ref_chosen": -63.250282287597656,
|
|
"logps/ref_rejected": -73.09049987792969,
|
|
"logps/rejected": -80.13397216796875,
|
|
"loss": 0.967,
|
|
"margin_dpo/margin_mean": 2.637277603149414,
|
|
"margin_dpo/margin_std": 3.5861003398895264,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.3813626766204834,
|
|
"fcm_dpo/delta": -0.09251350164413452,
|
|
"fcm_dpo/margin": 2.839731454849243,
|
|
"fcm_dpo/q_t": 0.3197898268699646,
|
|
"grad_norm": 71.10482025146484,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 0.22568419575691223,
|
|
"logits/rejected": 0.18240895867347717,
|
|
"logps/chosen": -69.98919677734375,
|
|
"logps/ref_chosen": -65.26150512695312,
|
|
"logps/ref_rejected": -87.60311126708984,
|
|
"logps/rejected": -95.17052459716797,
|
|
"loss": 0.9725,
|
|
"margin_dpo/margin_mean": 2.839731454849243,
|
|
"margin_dpo/margin_std": 3.927607774734497,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.3627152442932129,
|
|
"fcm_dpo/delta": -0.06611888110637665,
|
|
"fcm_dpo/margin": 2.9209442138671875,
|
|
"fcm_dpo/q_t": 0.3157356381416321,
|
|
"grad_norm": 111.84679412841797,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 0.2060960978269577,
|
|
"logits/rejected": 0.17647871375083923,
|
|
"logps/chosen": -70.17327117919922,
|
|
"logps/ref_chosen": -65.73170471191406,
|
|
"logps/ref_rejected": -75.19642639160156,
|
|
"logps/rejected": -82.55892944335938,
|
|
"loss": 0.9053,
|
|
"margin_dpo/margin_mean": 2.9209437370300293,
|
|
"margin_dpo/margin_std": 3.611414670944214,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.3453001379966736,
|
|
"fcm_dpo/delta": 0.03979369252920151,
|
|
"fcm_dpo/margin": 2.5490431785583496,
|
|
"fcm_dpo/q_t": 0.34757569432258606,
|
|
"grad_norm": 78.27101135253906,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 0.24320515990257263,
|
|
"logits/rejected": 0.23427622020244598,
|
|
"logps/chosen": -76.07374572753906,
|
|
"logps/ref_chosen": -70.71224212646484,
|
|
"logps/ref_rejected": -76.12723541259766,
|
|
"logps/rejected": -84.03779602050781,
|
|
"loss": 1.0729,
|
|
"margin_dpo/margin_mean": 2.5490429401397705,
|
|
"margin_dpo/margin_std": 4.084795951843262,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.36765122413635254,
|
|
"fcm_dpo/delta": 0.06216100975871086,
|
|
"fcm_dpo/margin": 2.552335739135742,
|
|
"fcm_dpo/q_t": 0.3432448208332062,
|
|
"grad_norm": 86.85004425048828,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 0.22906799614429474,
|
|
"logits/rejected": 0.1917153000831604,
|
|
"logps/chosen": -66.92555236816406,
|
|
"logps/ref_chosen": -61.767662048339844,
|
|
"logps/ref_rejected": -77.38813018798828,
|
|
"logps/rejected": -85.09835052490234,
|
|
"loss": 1.0684,
|
|
"margin_dpo/margin_mean": 2.552335262298584,
|
|
"margin_dpo/margin_std": 4.104119300842285,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.3392147421836853,
|
|
"fcm_dpo/delta": -0.16018646955490112,
|
|
"fcm_dpo/margin": 3.369103193283081,
|
|
"fcm_dpo/q_t": 0.308208167552948,
|
|
"grad_norm": 71.6130142211914,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 0.2075103521347046,
|
|
"logits/rejected": 0.14859376847743988,
|
|
"logps/chosen": -67.01765441894531,
|
|
"logps/ref_chosen": -61.57584762573242,
|
|
"logps/ref_rejected": -91.87513732910156,
|
|
"logps/rejected": -100.68604278564453,
|
|
"loss": 0.9038,
|
|
"margin_dpo/margin_mean": 3.369102954864502,
|
|
"margin_dpo/margin_std": 4.229114532470703,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.3240419924259186,
|
|
"fcm_dpo/delta": 0.021506184712052345,
|
|
"fcm_dpo/margin": 3.0214786529541016,
|
|
"fcm_dpo/q_t": 0.33430081605911255,
|
|
"grad_norm": 99.64929962158203,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 0.1896849423646927,
|
|
"logits/rejected": 0.1461310237646103,
|
|
"logps/chosen": -71.33131408691406,
|
|
"logps/ref_chosen": -65.75422668457031,
|
|
"logps/ref_rejected": -77.9569320678711,
|
|
"logps/rejected": -86.55549621582031,
|
|
"loss": 1.0053,
|
|
"margin_dpo/margin_mean": 3.0214788913726807,
|
|
"margin_dpo/margin_std": 4.428906440734863,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.33263370394706726,
|
|
"fcm_dpo/delta": 0.0555957667529583,
|
|
"fcm_dpo/margin": 2.8429040908813477,
|
|
"fcm_dpo/q_t": 0.3359856605529785,
|
|
"grad_norm": 92.8035659790039,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 0.22854265570640564,
|
|
"logits/rejected": 0.1897846907377243,
|
|
"logps/chosen": -67.77484893798828,
|
|
"logps/ref_chosen": -62.27649688720703,
|
|
"logps/ref_rejected": -76.56950378417969,
|
|
"logps/rejected": -84.91075134277344,
|
|
"loss": 1.01,
|
|
"margin_dpo/margin_mean": 2.8429043292999268,
|
|
"margin_dpo/margin_std": 4.193612098693848,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.33189326524734497,
|
|
"fcm_dpo/delta": -0.07388236373662949,
|
|
"fcm_dpo/margin": 3.2117831707000732,
|
|
"fcm_dpo/q_t": 0.3266224265098572,
|
|
"grad_norm": 73.1767349243164,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 0.21344022452831268,
|
|
"logits/rejected": 0.1750694364309311,
|
|
"logps/chosen": -67.38113403320312,
|
|
"logps/ref_chosen": -61.854393005371094,
|
|
"logps/ref_rejected": -77.22246551513672,
|
|
"logps/rejected": -85.96098327636719,
|
|
"loss": 0.9878,
|
|
"margin_dpo/margin_mean": 3.211782455444336,
|
|
"margin_dpo/margin_std": 4.597712516784668,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.30937570333480835,
|
|
"fcm_dpo/delta": -0.10143546760082245,
|
|
"fcm_dpo/margin": 3.528640031814575,
|
|
"fcm_dpo/q_t": 0.30518898367881775,
|
|
"grad_norm": 53.0545654296875,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 0.20327389240264893,
|
|
"logits/rejected": 0.17730286717414856,
|
|
"logps/chosen": -66.44621276855469,
|
|
"logps/ref_chosen": -61.29896926879883,
|
|
"logps/ref_rejected": -73.35762023925781,
|
|
"logps/rejected": -82.03350830078125,
|
|
"loss": 0.8753,
|
|
"margin_dpo/margin_mean": 3.528640031814575,
|
|
"margin_dpo/margin_std": 4.124866962432861,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.2883167862892151,
|
|
"fcm_dpo/delta": 0.04716776683926582,
|
|
"fcm_dpo/margin": 3.311861038208008,
|
|
"fcm_dpo/q_t": 0.332479327917099,
|
|
"grad_norm": 77.8005142211914,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 0.21987095475196838,
|
|
"logits/rejected": 0.18027493357658386,
|
|
"logps/chosen": -68.98224639892578,
|
|
"logps/ref_chosen": -63.435462951660156,
|
|
"logps/ref_rejected": -79.73661804199219,
|
|
"logps/rejected": -88.59527587890625,
|
|
"loss": 0.9495,
|
|
"margin_dpo/margin_mean": 3.311861038208008,
|
|
"margin_dpo/margin_std": 4.424164295196533,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.27836745977401733,
|
|
"fcm_dpo/delta": -0.07125671207904816,
|
|
"fcm_dpo/margin": 3.8071327209472656,
|
|
"fcm_dpo/q_t": 0.31692713499069214,
|
|
"grad_norm": 69.05030059814453,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 0.20964176952838898,
|
|
"logits/rejected": 0.15969504415988922,
|
|
"logps/chosen": -63.75525665283203,
|
|
"logps/ref_chosen": -57.696876525878906,
|
|
"logps/ref_rejected": -79.78132629394531,
|
|
"logps/rejected": -89.64683532714844,
|
|
"loss": 0.9226,
|
|
"margin_dpo/margin_mean": 3.8071320056915283,
|
|
"margin_dpo/margin_std": 4.934117794036865,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.270934522151947,
|
|
"fcm_dpo/delta": -0.03950778394937515,
|
|
"fcm_dpo/margin": 3.814711809158325,
|
|
"fcm_dpo/q_t": 0.3220524191856384,
|
|
"grad_norm": 65.58736419677734,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 0.2670535147190094,
|
|
"logits/rejected": 0.2111613005399704,
|
|
"logps/chosen": -61.7304573059082,
|
|
"logps/ref_chosen": -55.430633544921875,
|
|
"logps/ref_rejected": -78.1390151977539,
|
|
"logps/rejected": -88.25354766845703,
|
|
"loss": 0.931,
|
|
"margin_dpo/margin_mean": 3.814711093902588,
|
|
"margin_dpo/margin_std": 5.020625114440918,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.2575019299983978,
|
|
"fcm_dpo/delta": -0.013848213478922844,
|
|
"fcm_dpo/margin": 3.926861524581909,
|
|
"fcm_dpo/q_t": 0.3218505382537842,
|
|
"grad_norm": 71.43912506103516,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 0.23339371383190155,
|
|
"logits/rejected": 0.189311683177948,
|
|
"logps/chosen": -68.25260925292969,
|
|
"logps/ref_chosen": -61.207069396972656,
|
|
"logps/ref_rejected": -75.23294067382812,
|
|
"logps/rejected": -86.2053451538086,
|
|
"loss": 0.9303,
|
|
"margin_dpo/margin_mean": 3.926861524581909,
|
|
"margin_dpo/margin_std": 5.063118934631348,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.263566255569458,
|
|
"fcm_dpo/delta": -0.02661963179707527,
|
|
"fcm_dpo/margin": 3.8765950202941895,
|
|
"fcm_dpo/q_t": 0.32818618416786194,
|
|
"grad_norm": 68.17465209960938,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 0.2163856476545334,
|
|
"logits/rejected": 0.1833379715681076,
|
|
"logps/chosen": -70.30329895019531,
|
|
"logps/ref_chosen": -63.06663131713867,
|
|
"logps/ref_rejected": -78.45845031738281,
|
|
"logps/rejected": -89.57170104980469,
|
|
"loss": 0.9829,
|
|
"margin_dpo/margin_mean": 3.8765950202941895,
|
|
"margin_dpo/margin_std": 5.42116117477417,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.273120254278183,
|
|
"fcm_dpo/delta": 0.1272657811641693,
|
|
"fcm_dpo/margin": 3.23095965385437,
|
|
"fcm_dpo/q_t": 0.35429221391677856,
|
|
"grad_norm": 88.56037139892578,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 0.26219043135643005,
|
|
"logits/rejected": 0.22065551578998566,
|
|
"logps/chosen": -70.99202728271484,
|
|
"logps/ref_chosen": -63.60908889770508,
|
|
"logps/ref_rejected": -74.06394958496094,
|
|
"logps/rejected": -84.67784881591797,
|
|
"loss": 1.0785,
|
|
"margin_dpo/margin_mean": 3.23095965385437,
|
|
"margin_dpo/margin_std": 5.263034343719482,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.2818896472454071,
|
|
"fcm_dpo/delta": -0.07698482275009155,
|
|
"fcm_dpo/margin": 3.7881081104278564,
|
|
"fcm_dpo/q_t": 0.319245308637619,
|
|
"grad_norm": 60.92436599731445,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 0.23960921168327332,
|
|
"logits/rejected": 0.192758709192276,
|
|
"logps/chosen": -68.99839782714844,
|
|
"logps/ref_chosen": -62.31493377685547,
|
|
"logps/ref_rejected": -75.07472229003906,
|
|
"logps/rejected": -85.54630279541016,
|
|
"loss": 0.9575,
|
|
"margin_dpo/margin_mean": 3.7881076335906982,
|
|
"margin_dpo/margin_std": 5.181168556213379,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.2548847794532776,
|
|
"fcm_dpo/delta": -0.0447889044880867,
|
|
"fcm_dpo/margin": 4.059728622436523,
|
|
"fcm_dpo/q_t": 0.32673633098602295,
|
|
"grad_norm": 64.22303009033203,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 0.22829946875572205,
|
|
"logits/rejected": 0.17820891737937927,
|
|
"logps/chosen": -61.4847526550293,
|
|
"logps/ref_chosen": -55.336036682128906,
|
|
"logps/ref_rejected": -80.05536651611328,
|
|
"logps/rejected": -90.26380920410156,
|
|
"loss": 0.9724,
|
|
"margin_dpo/margin_mean": 4.059728145599365,
|
|
"margin_dpo/margin_std": 5.59163236618042,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.2541687786579132,
|
|
"fcm_dpo/delta": -0.005901790224015713,
|
|
"fcm_dpo/margin": 3.942878246307373,
|
|
"fcm_dpo/q_t": 0.3219433128833771,
|
|
"grad_norm": 60.860626220703125,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 0.22886808216571808,
|
|
"logits/rejected": 0.18000295758247375,
|
|
"logps/chosen": -64.44058227539062,
|
|
"logps/ref_chosen": -57.90629959106445,
|
|
"logps/ref_rejected": -74.2243881225586,
|
|
"logps/rejected": -84.70155334472656,
|
|
"loss": 0.9461,
|
|
"margin_dpo/margin_mean": 3.942878007888794,
|
|
"margin_dpo/margin_std": 5.178929328918457,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.27623167634010315,
|
|
"fcm_dpo/delta": 0.06018294021487236,
|
|
"fcm_dpo/margin": 3.404806137084961,
|
|
"fcm_dpo/q_t": 0.3431999981403351,
|
|
"grad_norm": 62.56698989868164,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 0.21622386574745178,
|
|
"logits/rejected": 0.1816556602716446,
|
|
"logps/chosen": -71.98468780517578,
|
|
"logps/ref_chosen": -65.17555236816406,
|
|
"logps/ref_rejected": -78.53681182861328,
|
|
"logps/rejected": -88.7507553100586,
|
|
"loss": 1.0657,
|
|
"margin_dpo/margin_mean": 3.404806137084961,
|
|
"margin_dpo/margin_std": 5.439192771911621,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.24731174111366272,
|
|
"fcm_dpo/delta": -0.11190152168273926,
|
|
"fcm_dpo/margin": 4.425177097320557,
|
|
"fcm_dpo/q_t": 0.3129493296146393,
|
|
"grad_norm": 56.091400146484375,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 0.250847190618515,
|
|
"logits/rejected": 0.2109779417514801,
|
|
"logps/chosen": -69.31832885742188,
|
|
"logps/ref_chosen": -62.62797927856445,
|
|
"logps/ref_rejected": -79.9095458984375,
|
|
"logps/rejected": -91.02506256103516,
|
|
"loss": 0.915,
|
|
"margin_dpo/margin_mean": 4.425177574157715,
|
|
"margin_dpo/margin_std": 5.686088562011719,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.2548673748970032,
|
|
"fcm_dpo/delta": 0.03888889402151108,
|
|
"fcm_dpo/margin": 3.7756190299987793,
|
|
"fcm_dpo/q_t": 0.3394353687763214,
|
|
"grad_norm": 57.22810745239258,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 0.20316573977470398,
|
|
"logits/rejected": 0.1698659211397171,
|
|
"logps/chosen": -68.96516418457031,
|
|
"logps/ref_chosen": -61.1064567565918,
|
|
"logps/ref_rejected": -76.71846008300781,
|
|
"logps/rejected": -88.35279846191406,
|
|
"loss": 1.0012,
|
|
"margin_dpo/margin_mean": 3.7756195068359375,
|
|
"margin_dpo/margin_std": 5.557743549346924,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.24767637252807617,
|
|
"fcm_dpo/delta": 0.010343861766159534,
|
|
"fcm_dpo/margin": 3.979682445526123,
|
|
"fcm_dpo/q_t": 0.3323804438114166,
|
|
"grad_norm": 55.119606018066406,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 0.24728891253471375,
|
|
"logits/rejected": 0.2121780663728714,
|
|
"logps/chosen": -67.48667907714844,
|
|
"logps/ref_chosen": -60.12370681762695,
|
|
"logps/ref_rejected": -78.58574676513672,
|
|
"logps/rejected": -89.92839813232422,
|
|
"loss": 0.9973,
|
|
"margin_dpo/margin_mean": 3.9796817302703857,
|
|
"margin_dpo/margin_std": 5.61331844329834,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.2374342978000641,
|
|
"fcm_dpo/delta": -0.14152035117149353,
|
|
"fcm_dpo/margin": 4.743535041809082,
|
|
"fcm_dpo/q_t": 0.3163720965385437,
|
|
"grad_norm": 61.54487228393555,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 0.28813233971595764,
|
|
"logits/rejected": 0.2352372407913208,
|
|
"logps/chosen": -62.17229461669922,
|
|
"logps/ref_chosen": -55.104461669921875,
|
|
"logps/ref_rejected": -80.63292694091797,
|
|
"logps/rejected": -92.44429016113281,
|
|
"loss": 0.9174,
|
|
"margin_dpo/margin_mean": 4.743535041809082,
|
|
"margin_dpo/margin_std": 6.253825664520264,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.22558502852916718,
|
|
"fcm_dpo/delta": 0.006811001803725958,
|
|
"fcm_dpo/margin": 4.399797439575195,
|
|
"fcm_dpo/q_t": 0.32226401567459106,
|
|
"grad_norm": 57.688541412353516,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 0.2438465654850006,
|
|
"logits/rejected": 0.1791260838508606,
|
|
"logps/chosen": -61.78578567504883,
|
|
"logps/ref_chosen": -54.87224197387695,
|
|
"logps/ref_rejected": -77.01316833496094,
|
|
"logps/rejected": -88.32652282714844,
|
|
"loss": 0.9512,
|
|
"margin_dpo/margin_mean": 4.399797439575195,
|
|
"margin_dpo/margin_std": 5.840807914733887,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.23347434401512146,
|
|
"fcm_dpo/delta": 0.06479227542877197,
|
|
"fcm_dpo/margin": 4.013222694396973,
|
|
"fcm_dpo/q_t": 0.34076085686683655,
|
|
"grad_norm": 62.883846282958984,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 0.2215835154056549,
|
|
"logits/rejected": 0.1821989119052887,
|
|
"logps/chosen": -68.06861114501953,
|
|
"logps/ref_chosen": -60.75285720825195,
|
|
"logps/ref_rejected": -75.21507263183594,
|
|
"logps/rejected": -86.54405212402344,
|
|
"loss": 1.0389,
|
|
"margin_dpo/margin_mean": 4.013222694396973,
|
|
"margin_dpo/margin_std": 6.100465297698975,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.2259870022535324,
|
|
"fcm_dpo/delta": -0.08469346910715103,
|
|
"fcm_dpo/margin": 4.753388404846191,
|
|
"fcm_dpo/q_t": 0.3118368983268738,
|
|
"grad_norm": 45.306915283203125,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.2682866156101227,
|
|
"logits/rejected": 0.20303437113761902,
|
|
"logps/chosen": -65.59626007080078,
|
|
"logps/ref_chosen": -58.56513595581055,
|
|
"logps/ref_rejected": -84.06403350830078,
|
|
"logps/rejected": -95.8485336303711,
|
|
"loss": 0.9033,
|
|
"margin_dpo/margin_mean": 4.753389358520508,
|
|
"margin_dpo/margin_std": 5.948471546173096,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.2373490035533905,
|
|
"fcm_dpo/delta": 0.0985020250082016,
|
|
"fcm_dpo/margin": 3.820103406906128,
|
|
"fcm_dpo/q_t": 0.3401463031768799,
|
|
"grad_norm": 58.69527816772461,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 0.21440906822681427,
|
|
"logits/rejected": 0.17156557738780975,
|
|
"logps/chosen": -66.21595764160156,
|
|
"logps/ref_chosen": -59.443138122558594,
|
|
"logps/ref_rejected": -75.80937194824219,
|
|
"logps/rejected": -86.40229797363281,
|
|
"loss": 1.042,
|
|
"margin_dpo/margin_mean": 3.820103406906128,
|
|
"margin_dpo/margin_std": 5.791916847229004,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.2474198043346405,
|
|
"fcm_dpo/delta": 0.04625866562128067,
|
|
"fcm_dpo/margin": 3.8570492267608643,
|
|
"fcm_dpo/q_t": 0.3406515121459961,
|
|
"grad_norm": 73.3750228881836,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 0.26100030541419983,
|
|
"logits/rejected": 0.2167011946439743,
|
|
"logps/chosen": -65.69026184082031,
|
|
"logps/ref_chosen": -58.59185028076172,
|
|
"logps/ref_rejected": -73.7529525756836,
|
|
"logps/rejected": -84.70841979980469,
|
|
"loss": 1.0779,
|
|
"margin_dpo/margin_mean": 3.8570494651794434,
|
|
"margin_dpo/margin_std": 6.083317279815674,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.23849084973335266,
|
|
"fcm_dpo/delta": -0.10434339195489883,
|
|
"fcm_dpo/margin": 4.571185111999512,
|
|
"fcm_dpo/q_t": 0.31551235914230347,
|
|
"grad_norm": 53.46732711791992,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 0.26094746589660645,
|
|
"logits/rejected": 0.21969576179981232,
|
|
"logps/chosen": -65.65458679199219,
|
|
"logps/ref_chosen": -58.93424606323242,
|
|
"logps/ref_rejected": -76.27055358886719,
|
|
"logps/rejected": -87.56207275390625,
|
|
"loss": 0.943,
|
|
"margin_dpo/margin_mean": 4.5711846351623535,
|
|
"margin_dpo/margin_std": 6.037328243255615,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.23582050204277039,
|
|
"fcm_dpo/delta": 0.02780415117740631,
|
|
"fcm_dpo/margin": 3.7493813037872314,
|
|
"fcm_dpo/q_t": 0.3406650125980377,
|
|
"grad_norm": 59.925880432128906,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 0.21887190639972687,
|
|
"logits/rejected": 0.1941918432712555,
|
|
"logps/chosen": -73.59498596191406,
|
|
"logps/ref_chosen": -66.42684173583984,
|
|
"logps/ref_rejected": -76.96304321289062,
|
|
"logps/rejected": -87.88056945800781,
|
|
"loss": 0.9882,
|
|
"margin_dpo/margin_mean": 3.7493815422058105,
|
|
"margin_dpo/margin_std": 5.2429423332214355,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.2368488758802414,
|
|
"fcm_dpo/delta": 0.016512060537934303,
|
|
"fcm_dpo/margin": 4.15440559387207,
|
|
"fcm_dpo/q_t": 0.3241703510284424,
|
|
"grad_norm": 48.153385162353516,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 0.24604515731334686,
|
|
"logits/rejected": 0.2002202570438385,
|
|
"logps/chosen": -67.92310333251953,
|
|
"logps/ref_chosen": -60.984214782714844,
|
|
"logps/ref_rejected": -79.54056549072266,
|
|
"logps/rejected": -90.63386535644531,
|
|
"loss": 0.9327,
|
|
"margin_dpo/margin_mean": 4.15440559387207,
|
|
"margin_dpo/margin_std": 5.388613224029541,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.22452032566070557,
|
|
"fcm_dpo/delta": -0.045371972024440765,
|
|
"fcm_dpo/margin": 4.619940757751465,
|
|
"fcm_dpo/q_t": 0.32055288553237915,
|
|
"grad_norm": 57.57057571411133,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 0.2859688103199005,
|
|
"logits/rejected": 0.2365344762802124,
|
|
"logps/chosen": -64.88384246826172,
|
|
"logps/ref_chosen": -58.30937957763672,
|
|
"logps/ref_rejected": -80.09587097167969,
|
|
"logps/rejected": -91.290283203125,
|
|
"loss": 0.9273,
|
|
"margin_dpo/margin_mean": 4.619940757751465,
|
|
"margin_dpo/margin_std": 5.92281436920166,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.22984282672405243,
|
|
"fcm_dpo/delta": 0.02407177910208702,
|
|
"fcm_dpo/margin": 4.2350172996521,
|
|
"fcm_dpo/q_t": 0.3303438723087311,
|
|
"grad_norm": 63.975730895996094,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 0.22668063640594482,
|
|
"logits/rejected": 0.16040459275245667,
|
|
"logps/chosen": -68.25566101074219,
|
|
"logps/ref_chosen": -61.39867401123047,
|
|
"logps/ref_rejected": -89.0177993774414,
|
|
"logps/rejected": -100.10980224609375,
|
|
"loss": 0.966,
|
|
"margin_dpo/margin_mean": 4.2350172996521,
|
|
"margin_dpo/margin_std": 5.741770267486572,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.2746705710887909,
|
|
"eval_fcm_dpo/delta": 0.019361913204193115,
|
|
"eval_fcm_dpo/margin": 3.5686707496643066,
|
|
"eval_fcm_dpo/q_t": 0.3450649082660675,
|
|
"eval_logits/chosen": 0.2557302713394165,
|
|
"eval_logits/rejected": 0.21348130702972412,
|
|
"eval_logps/chosen": -81.68497467041016,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -89.94316864013672,
|
|
"eval_loss": 0.5573223829269409,
|
|
"eval_margin_dpo/margin_mean": 3.5686707496643066,
|
|
"eval_margin_dpo/margin_std": 6.046079635620117,
|
|
"eval_runtime": 38.3156,
|
|
"eval_samples_per_second": 60.106,
|
|
"eval_steps_per_second": 1.879,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.22812703251838684,
|
|
"fcm_dpo/delta": -0.1910632848739624,
|
|
"fcm_dpo/margin": 5.0697832107543945,
|
|
"fcm_dpo/q_t": 0.30286386609077454,
|
|
"grad_norm": 43.19565200805664,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 0.2721787691116333,
|
|
"logits/rejected": 0.2133924514055252,
|
|
"logps/chosen": -62.4058837890625,
|
|
"logps/ref_chosen": -55.953521728515625,
|
|
"logps/ref_rejected": -77.67539978027344,
|
|
"logps/rejected": -89.19754791259766,
|
|
"loss": 0.8714,
|
|
"margin_dpo/margin_mean": 5.0697832107543945,
|
|
"margin_dpo/margin_std": 5.984508514404297,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.2223322093486786,
|
|
"fcm_dpo/delta": 0.04787999764084816,
|
|
"fcm_dpo/margin": 4.290686130523682,
|
|
"fcm_dpo/q_t": 0.3321172595024109,
|
|
"grad_norm": 56.05967712402344,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 0.24456481635570526,
|
|
"logits/rejected": 0.19028040766716003,
|
|
"logps/chosen": -70.0745620727539,
|
|
"logps/ref_chosen": -63.40419387817383,
|
|
"logps/ref_rejected": -80.85710144042969,
|
|
"logps/rejected": -91.81815338134766,
|
|
"loss": 0.9542,
|
|
"margin_dpo/margin_mean": 4.29068660736084,
|
|
"margin_dpo/margin_std": 5.697317123413086,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.22563305497169495,
|
|
"fcm_dpo/delta": -0.03537094593048096,
|
|
"fcm_dpo/margin": 4.56794548034668,
|
|
"fcm_dpo/q_t": 0.3295273780822754,
|
|
"grad_norm": 55.72998046875,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 0.2193383276462555,
|
|
"logits/rejected": 0.17733624577522278,
|
|
"logps/chosen": -65.1448745727539,
|
|
"logps/ref_chosen": -57.6942024230957,
|
|
"logps/ref_rejected": -71.74036407470703,
|
|
"logps/rejected": -83.75898742675781,
|
|
"loss": 0.988,
|
|
"margin_dpo/margin_mean": 4.56794548034668,
|
|
"margin_dpo/margin_std": 6.607818603515625,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.23104313015937805,
|
|
"fcm_dpo/delta": 0.0747731551527977,
|
|
"fcm_dpo/margin": 4.003242492675781,
|
|
"fcm_dpo/q_t": 0.3458429276943207,
|
|
"grad_norm": 71.31780242919922,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 0.24291574954986572,
|
|
"logits/rejected": 0.20572228729724884,
|
|
"logps/chosen": -66.96260070800781,
|
|
"logps/ref_chosen": -59.169517517089844,
|
|
"logps/ref_rejected": -69.47721099853516,
|
|
"logps/rejected": -81.27352142333984,
|
|
"loss": 1.0849,
|
|
"margin_dpo/margin_mean": 4.0032429695129395,
|
|
"margin_dpo/margin_std": 6.466610908508301,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.23313617706298828,
|
|
"fcm_dpo/delta": -0.0782787948846817,
|
|
"fcm_dpo/margin": 4.59422492980957,
|
|
"fcm_dpo/q_t": 0.32748058438301086,
|
|
"grad_norm": 48.63573455810547,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 0.24288901686668396,
|
|
"logits/rejected": 0.2018076479434967,
|
|
"logps/chosen": -65.39869689941406,
|
|
"logps/ref_chosen": -58.09320831298828,
|
|
"logps/ref_rejected": -73.98226165771484,
|
|
"logps/rejected": -85.88197326660156,
|
|
"loss": 0.9959,
|
|
"margin_dpo/margin_mean": 4.594224452972412,
|
|
"margin_dpo/margin_std": 6.688135623931885,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.23262247443199158,
|
|
"fcm_dpo/delta": -0.010167494416236877,
|
|
"fcm_dpo/margin": 4.329963684082031,
|
|
"fcm_dpo/q_t": 0.3332260251045227,
|
|
"grad_norm": 52.93145751953125,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 0.2085982859134674,
|
|
"logits/rejected": 0.1877393275499344,
|
|
"logps/chosen": -70.56092834472656,
|
|
"logps/ref_chosen": -62.7039909362793,
|
|
"logps/ref_rejected": -74.52284240722656,
|
|
"logps/rejected": -86.70974731445312,
|
|
"loss": 1.0091,
|
|
"margin_dpo/margin_mean": 4.329963684082031,
|
|
"margin_dpo/margin_std": 6.388288974761963,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.21396084129810333,
|
|
"fcm_dpo/delta": -0.05841977149248123,
|
|
"fcm_dpo/margin": 4.911672115325928,
|
|
"fcm_dpo/q_t": 0.3213272988796234,
|
|
"grad_norm": 51.059974670410156,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 0.2870796024799347,
|
|
"logits/rejected": 0.249590203166008,
|
|
"logps/chosen": -63.383323669433594,
|
|
"logps/ref_chosen": -56.12516403198242,
|
|
"logps/ref_rejected": -74.36073303222656,
|
|
"logps/rejected": -86.53057861328125,
|
|
"loss": 0.9411,
|
|
"margin_dpo/margin_mean": 4.9116716384887695,
|
|
"margin_dpo/margin_std": 6.554174900054932,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.20786520838737488,
|
|
"fcm_dpo/delta": -0.039162855595350266,
|
|
"fcm_dpo/margin": 4.9660820960998535,
|
|
"fcm_dpo/q_t": 0.3247433304786682,
|
|
"grad_norm": 46.13332748413086,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 0.21864204108715057,
|
|
"logits/rejected": 0.15293407440185547,
|
|
"logps/chosen": -63.088623046875,
|
|
"logps/ref_chosen": -55.67548751831055,
|
|
"logps/ref_rejected": -76.62055206298828,
|
|
"logps/rejected": -88.9997787475586,
|
|
"loss": 0.9627,
|
|
"margin_dpo/margin_mean": 4.966081619262695,
|
|
"margin_dpo/margin_std": 6.732314109802246,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.19659116864204407,
|
|
"fcm_dpo/delta": -0.011401364579796791,
|
|
"fcm_dpo/margin": 4.798353672027588,
|
|
"fcm_dpo/q_t": 0.33275923132896423,
|
|
"grad_norm": 54.457401275634766,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 0.23866701126098633,
|
|
"logits/rejected": 0.18620404601097107,
|
|
"logps/chosen": -67.8885269165039,
|
|
"logps/ref_chosen": -59.903411865234375,
|
|
"logps/ref_rejected": -82.02873229980469,
|
|
"logps/rejected": -94.81220245361328,
|
|
"loss": 0.9728,
|
|
"margin_dpo/margin_mean": 4.79835319519043,
|
|
"margin_dpo/margin_std": 6.5291314125061035,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.18794873356819153,
|
|
"fcm_dpo/delta": 0.019849028438329697,
|
|
"fcm_dpo/margin": 5.214951038360596,
|
|
"fcm_dpo/q_t": 0.3214188814163208,
|
|
"grad_norm": 53.57640838623047,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 0.2403557002544403,
|
|
"logits/rejected": 0.17988719046115875,
|
|
"logps/chosen": -63.68009567260742,
|
|
"logps/ref_chosen": -55.83526611328125,
|
|
"logps/ref_rejected": -79.63658142089844,
|
|
"logps/rejected": -92.69636535644531,
|
|
"loss": 0.9086,
|
|
"margin_dpo/margin_mean": 5.214951038360596,
|
|
"margin_dpo/margin_std": 6.322065830230713,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.20542562007904053,
|
|
"fcm_dpo/delta": -0.014168155379593372,
|
|
"fcm_dpo/margin": 4.929433345794678,
|
|
"fcm_dpo/q_t": 0.3281521499156952,
|
|
"grad_norm": 49.470947265625,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 0.19945120811462402,
|
|
"logits/rejected": 0.1575821340084076,
|
|
"logps/chosen": -68.32371520996094,
|
|
"logps/ref_chosen": -60.59226608276367,
|
|
"logps/ref_rejected": -73.37936401367188,
|
|
"logps/rejected": -86.04025268554688,
|
|
"loss": 0.9693,
|
|
"margin_dpo/margin_mean": 4.929433345794678,
|
|
"margin_dpo/margin_std": 6.865820407867432,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.19809255003929138,
|
|
"fcm_dpo/delta": -0.07750917971134186,
|
|
"fcm_dpo/margin": 5.395981788635254,
|
|
"fcm_dpo/q_t": 0.3125055730342865,
|
|
"grad_norm": 47.08208465576172,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 0.2629553973674774,
|
|
"logits/rejected": 0.1926957219839096,
|
|
"logps/chosen": -64.09815216064453,
|
|
"logps/ref_chosen": -56.21283721923828,
|
|
"logps/ref_rejected": -83.02075958251953,
|
|
"logps/rejected": -96.30205535888672,
|
|
"loss": 0.8901,
|
|
"margin_dpo/margin_mean": 5.395981788635254,
|
|
"margin_dpo/margin_std": 6.470156669616699,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.18275338411331177,
|
|
"fcm_dpo/delta": 0.028541725128889084,
|
|
"fcm_dpo/margin": 5.32494592666626,
|
|
"fcm_dpo/q_t": 0.3231423497200012,
|
|
"grad_norm": 48.6711540222168,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 0.27234551310539246,
|
|
"logits/rejected": 0.2267368733882904,
|
|
"logps/chosen": -67.08876037597656,
|
|
"logps/ref_chosen": -59.0674934387207,
|
|
"logps/ref_rejected": -74.53498840332031,
|
|
"logps/rejected": -87.88118743896484,
|
|
"loss": 0.9223,
|
|
"margin_dpo/margin_mean": 5.32494592666626,
|
|
"margin_dpo/margin_std": 6.607783317565918,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.19131594896316528,
|
|
"fcm_dpo/delta": -0.0211162306368351,
|
|
"fcm_dpo/margin": 5.313529014587402,
|
|
"fcm_dpo/q_t": 0.31958022713661194,
|
|
"grad_norm": 38.842506408691406,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 0.26374131441116333,
|
|
"logits/rejected": 0.2210657298564911,
|
|
"logps/chosen": -66.67548370361328,
|
|
"logps/ref_chosen": -58.3397331237793,
|
|
"logps/ref_rejected": -74.33660125732422,
|
|
"logps/rejected": -87.98587799072266,
|
|
"loss": 0.895,
|
|
"margin_dpo/margin_mean": 5.313528537750244,
|
|
"margin_dpo/margin_std": 6.436234474182129,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.17899836599826813,
|
|
"fcm_dpo/delta": -0.07962033152580261,
|
|
"fcm_dpo/margin": 5.9777021408081055,
|
|
"fcm_dpo/q_t": 0.3140580356121063,
|
|
"grad_norm": 35.938438415527344,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 0.2762961983680725,
|
|
"logits/rejected": 0.21573925018310547,
|
|
"logps/chosen": -62.51293182373047,
|
|
"logps/ref_chosen": -54.60407638549805,
|
|
"logps/ref_rejected": -79.94635009765625,
|
|
"logps/rejected": -93.8329086303711,
|
|
"loss": 0.9163,
|
|
"margin_dpo/margin_mean": 5.9777021408081055,
|
|
"margin_dpo/margin_std": 7.518684387207031,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.18707513809204102,
|
|
"fcm_dpo/delta": 0.11744797229766846,
|
|
"fcm_dpo/margin": 4.751503944396973,
|
|
"fcm_dpo/q_t": 0.3464050590991974,
|
|
"grad_norm": 58.852413177490234,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 0.21019065380096436,
|
|
"logits/rejected": 0.1871183216571808,
|
|
"logps/chosen": -72.26695251464844,
|
|
"logps/ref_chosen": -63.0672492980957,
|
|
"logps/ref_rejected": -68.59602355957031,
|
|
"logps/rejected": -82.5472183227539,
|
|
"loss": 1.0813,
|
|
"margin_dpo/margin_mean": 4.751503944396973,
|
|
"margin_dpo/margin_std": 7.717199802398682,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.18762502074241638,
|
|
"fcm_dpo/delta": -0.02065492607653141,
|
|
"fcm_dpo/margin": 5.407547950744629,
|
|
"fcm_dpo/q_t": 0.3266620337963104,
|
|
"grad_norm": 46.27238845825195,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 0.26643306016921997,
|
|
"logits/rejected": 0.2090064287185669,
|
|
"logps/chosen": -67.2301254272461,
|
|
"logps/ref_chosen": -58.75799560546875,
|
|
"logps/ref_rejected": -79.72233581542969,
|
|
"logps/rejected": -93.60199737548828,
|
|
"loss": 0.9668,
|
|
"margin_dpo/margin_mean": 5.407547950744629,
|
|
"margin_dpo/margin_std": 7.433469295501709,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.19302329421043396,
|
|
"fcm_dpo/delta": 0.07708380371332169,
|
|
"fcm_dpo/margin": 4.815155982971191,
|
|
"fcm_dpo/q_t": 0.34535330533981323,
|
|
"grad_norm": 64.28440856933594,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 0.2915256917476654,
|
|
"logits/rejected": 0.23610806465148926,
|
|
"logps/chosen": -70.24661254882812,
|
|
"logps/ref_chosen": -61.394195556640625,
|
|
"logps/ref_rejected": -81.1914291381836,
|
|
"logps/rejected": -94.8590087890625,
|
|
"loss": 1.0442,
|
|
"margin_dpo/margin_mean": 4.815155982971191,
|
|
"margin_dpo/margin_std": 7.485970497131348,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.205574631690979,
|
|
"fcm_dpo/delta": 0.04338858649134636,
|
|
"fcm_dpo/margin": 4.661031246185303,
|
|
"fcm_dpo/q_t": 0.33606356382369995,
|
|
"grad_norm": 53.13848114013672,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 0.2322273999452591,
|
|
"logits/rejected": 0.1737196147441864,
|
|
"logps/chosen": -68.44369506835938,
|
|
"logps/ref_chosen": -59.85382843017578,
|
|
"logps/ref_rejected": -80.63748931884766,
|
|
"logps/rejected": -93.88838195800781,
|
|
"loss": 1.0377,
|
|
"margin_dpo/margin_mean": 4.661031723022461,
|
|
"margin_dpo/margin_std": 7.028813362121582,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.1951872855424881,
|
|
"fcm_dpo/delta": -0.0991867184638977,
|
|
"fcm_dpo/margin": 5.557717323303223,
|
|
"fcm_dpo/q_t": 0.325251966714859,
|
|
"grad_norm": 56.068965911865234,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 0.2677212357521057,
|
|
"logits/rejected": 0.22101454436779022,
|
|
"logps/chosen": -74.45133209228516,
|
|
"logps/ref_chosen": -66.17753601074219,
|
|
"logps/ref_rejected": -83.75955200195312,
|
|
"logps/rejected": -97.59107971191406,
|
|
"loss": 0.9758,
|
|
"margin_dpo/margin_mean": 5.557717323303223,
|
|
"margin_dpo/margin_std": 7.881464958190918,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.1971052587032318,
|
|
"fcm_dpo/delta": 0.01763380505144596,
|
|
"fcm_dpo/margin": 4.979878902435303,
|
|
"fcm_dpo/q_t": 0.3262530267238617,
|
|
"grad_norm": 58.05271530151367,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 0.24825136363506317,
|
|
"logits/rejected": 0.2213972508907318,
|
|
"logps/chosen": -69.82575225830078,
|
|
"logps/ref_chosen": -62.11005401611328,
|
|
"logps/ref_rejected": -74.64705657958984,
|
|
"logps/rejected": -87.34264373779297,
|
|
"loss": 0.943,
|
|
"margin_dpo/margin_mean": 4.979878902435303,
|
|
"margin_dpo/margin_std": 6.590427398681641,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.19753201305866241,
|
|
"fcm_dpo/delta": -0.0035357594024389982,
|
|
"fcm_dpo/margin": 5.0731520652771,
|
|
"fcm_dpo/q_t": 0.3300931751728058,
|
|
"grad_norm": 55.001102447509766,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 0.2655506432056427,
|
|
"logits/rejected": 0.20919163525104523,
|
|
"logps/chosen": -72.16453552246094,
|
|
"logps/ref_chosen": -64.42265319824219,
|
|
"logps/ref_rejected": -87.00096130371094,
|
|
"logps/rejected": -99.81599426269531,
|
|
"loss": 0.989,
|
|
"margin_dpo/margin_mean": 5.073152542114258,
|
|
"margin_dpo/margin_std": 7.320687294006348,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.18178601562976837,
|
|
"fcm_dpo/delta": -0.08417822420597076,
|
|
"fcm_dpo/margin": 5.895651817321777,
|
|
"fcm_dpo/q_t": 0.3174353837966919,
|
|
"grad_norm": 43.048763275146484,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 0.262310266494751,
|
|
"logits/rejected": 0.2180895060300827,
|
|
"logps/chosen": -66.26689147949219,
|
|
"logps/ref_chosen": -58.284393310546875,
|
|
"logps/ref_rejected": -79.09356689453125,
|
|
"logps/rejected": -92.97171020507812,
|
|
"loss": 0.9102,
|
|
"margin_dpo/margin_mean": 5.895651817321777,
|
|
"margin_dpo/margin_std": 8.012345314025879,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.19615530967712402,
|
|
"fcm_dpo/delta": 0.15623337030410767,
|
|
"fcm_dpo/margin": 4.374403953552246,
|
|
"fcm_dpo/q_t": 0.3525107502937317,
|
|
"grad_norm": 52.284854888916016,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 0.2730554938316345,
|
|
"logits/rejected": 0.23126861453056335,
|
|
"logps/chosen": -69.12674713134766,
|
|
"logps/ref_chosen": -61.03638458251953,
|
|
"logps/ref_rejected": -72.15824890136719,
|
|
"logps/rejected": -84.6230239868164,
|
|
"loss": 1.0682,
|
|
"margin_dpo/margin_mean": 4.374403953552246,
|
|
"margin_dpo/margin_std": 7.115353584289551,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.20899620652198792,
|
|
"fcm_dpo/delta": 0.03430444374680519,
|
|
"fcm_dpo/margin": 4.636659145355225,
|
|
"fcm_dpo/q_t": 0.3303223252296448,
|
|
"grad_norm": 62.26331329345703,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.20217880606651306,
|
|
"logits/rejected": 0.1589132845401764,
|
|
"logps/chosen": -75.64788055419922,
|
|
"logps/ref_chosen": -68.02732849121094,
|
|
"logps/ref_rejected": -85.41429901123047,
|
|
"logps/rejected": -97.6715087890625,
|
|
"loss": 1.0154,
|
|
"margin_dpo/margin_mean": 4.636659145355225,
|
|
"margin_dpo/margin_std": 6.868948936462402,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.22276242077350616,
|
|
"fcm_dpo/delta": -0.033804379403591156,
|
|
"fcm_dpo/margin": 4.592731475830078,
|
|
"fcm_dpo/q_t": 0.3295581638813019,
|
|
"grad_norm": 47.57779312133789,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.23308193683624268,
|
|
"logits/rejected": 0.17806780338287354,
|
|
"logps/chosen": -66.49713134765625,
|
|
"logps/ref_chosen": -58.67436599731445,
|
|
"logps/ref_rejected": -79.38807678222656,
|
|
"logps/rejected": -91.80358123779297,
|
|
"loss": 1.0217,
|
|
"margin_dpo/margin_mean": 4.5927324295043945,
|
|
"margin_dpo/margin_std": 6.516105651855469,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.21365702152252197,
|
|
"fcm_dpo/delta": 0.007706022821366787,
|
|
"fcm_dpo/margin": 4.637831687927246,
|
|
"fcm_dpo/q_t": 0.33375898003578186,
|
|
"grad_norm": 52.6577262878418,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 0.23046866059303284,
|
|
"logits/rejected": 0.17907290160655975,
|
|
"logps/chosen": -65.31705474853516,
|
|
"logps/ref_chosen": -57.640098571777344,
|
|
"logps/ref_rejected": -77.25399780273438,
|
|
"logps/rejected": -89.56879425048828,
|
|
"loss": 1.0116,
|
|
"margin_dpo/margin_mean": 4.637831211090088,
|
|
"margin_dpo/margin_std": 6.8540802001953125,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.19856581091880798,
|
|
"fcm_dpo/delta": -0.12930835783481598,
|
|
"fcm_dpo/margin": 5.611011028289795,
|
|
"fcm_dpo/q_t": 0.30457136034965515,
|
|
"grad_norm": 49.485713958740234,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 0.2640102505683899,
|
|
"logits/rejected": 0.199564129114151,
|
|
"logps/chosen": -67.96708679199219,
|
|
"logps/ref_chosen": -60.17341995239258,
|
|
"logps/ref_rejected": -85.50316619873047,
|
|
"logps/rejected": -98.90785217285156,
|
|
"loss": 0.8669,
|
|
"margin_dpo/margin_mean": 5.611011981964111,
|
|
"margin_dpo/margin_std": 6.6042799949646,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.17933054268360138,
|
|
"fcm_dpo/delta": -0.039366770535707474,
|
|
"fcm_dpo/margin": 5.763962268829346,
|
|
"fcm_dpo/q_t": 0.3183125853538513,
|
|
"grad_norm": 49.12068557739258,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 0.2411998063325882,
|
|
"logits/rejected": 0.19200275838375092,
|
|
"logps/chosen": -64.20506286621094,
|
|
"logps/ref_chosen": -56.985809326171875,
|
|
"logps/ref_rejected": -73.21353912353516,
|
|
"logps/rejected": -86.19674682617188,
|
|
"loss": 0.9215,
|
|
"margin_dpo/margin_mean": 5.7639617919921875,
|
|
"margin_dpo/margin_std": 7.375536918640137,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.18896816670894623,
|
|
"fcm_dpo/delta": 0.1245899647474289,
|
|
"fcm_dpo/margin": 4.69591760635376,
|
|
"fcm_dpo/q_t": 0.33968135714530945,
|
|
"grad_norm": 46.302913665771484,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 0.2536870241165161,
|
|
"logits/rejected": 0.20810946822166443,
|
|
"logps/chosen": -67.05742645263672,
|
|
"logps/ref_chosen": -59.600929260253906,
|
|
"logps/ref_rejected": -75.24870300292969,
|
|
"logps/rejected": -87.401123046875,
|
|
"loss": 0.9894,
|
|
"margin_dpo/margin_mean": 4.695918083190918,
|
|
"margin_dpo/margin_std": 6.6248931884765625,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.20476670563220978,
|
|
"fcm_dpo/delta": 0.011952433735132217,
|
|
"fcm_dpo/margin": 4.818479537963867,
|
|
"fcm_dpo/q_t": 0.33133333921432495,
|
|
"grad_norm": 60.33503341674805,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 0.2548312544822693,
|
|
"logits/rejected": 0.2083214819431305,
|
|
"logps/chosen": -71.74968719482422,
|
|
"logps/ref_chosen": -63.578895568847656,
|
|
"logps/ref_rejected": -78.87867736816406,
|
|
"logps/rejected": -91.8679428100586,
|
|
"loss": 1.0108,
|
|
"margin_dpo/margin_mean": 4.818479537963867,
|
|
"margin_dpo/margin_std": 7.097817897796631,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.19495727121829987,
|
|
"fcm_dpo/delta": -0.06875023245811462,
|
|
"fcm_dpo/margin": 5.4297003746032715,
|
|
"fcm_dpo/q_t": 0.32562586665153503,
|
|
"grad_norm": 50.54167556762695,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 0.26778554916381836,
|
|
"logits/rejected": 0.2139805108308792,
|
|
"logps/chosen": -66.76048278808594,
|
|
"logps/ref_chosen": -58.651512145996094,
|
|
"logps/ref_rejected": -78.67181396484375,
|
|
"logps/rejected": -92.21048736572266,
|
|
"loss": 0.9439,
|
|
"margin_dpo/margin_mean": 5.42970085144043,
|
|
"margin_dpo/margin_std": 7.295182704925537,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.18790480494499207,
|
|
"fcm_dpo/delta": -0.006370419170707464,
|
|
"fcm_dpo/margin": 5.345377445220947,
|
|
"fcm_dpo/q_t": 0.32594844698905945,
|
|
"grad_norm": 53.373992919921875,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 0.2542131841182709,
|
|
"logits/rejected": 0.2154400646686554,
|
|
"logps/chosen": -68.05675506591797,
|
|
"logps/ref_chosen": -60.3114128112793,
|
|
"logps/ref_rejected": -78.25270080566406,
|
|
"logps/rejected": -91.34342956542969,
|
|
"loss": 0.951,
|
|
"margin_dpo/margin_mean": 5.345377445220947,
|
|
"margin_dpo/margin_std": 7.17926549911499,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.1877862513065338,
|
|
"fcm_dpo/delta": -0.07747501134872437,
|
|
"fcm_dpo/margin": 5.681626796722412,
|
|
"fcm_dpo/q_t": 0.31316810846328735,
|
|
"grad_norm": 37.85681915283203,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 0.24570398032665253,
|
|
"logits/rejected": 0.18830426037311554,
|
|
"logps/chosen": -65.35658264160156,
|
|
"logps/ref_chosen": -57.752410888671875,
|
|
"logps/ref_rejected": -76.99858093261719,
|
|
"logps/rejected": -90.28437805175781,
|
|
"loss": 0.9025,
|
|
"margin_dpo/margin_mean": 5.681626319885254,
|
|
"margin_dpo/margin_std": 6.906379699707031,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.18777219951152802,
|
|
"fcm_dpo/delta": 0.09439138323068619,
|
|
"fcm_dpo/margin": 4.848970890045166,
|
|
"fcm_dpo/q_t": 0.3400737941265106,
|
|
"grad_norm": 51.46808624267578,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 0.27044713497161865,
|
|
"logits/rejected": 0.23382814228534698,
|
|
"logps/chosen": -71.76069641113281,
|
|
"logps/ref_chosen": -63.61958694458008,
|
|
"logps/ref_rejected": -79.51353454589844,
|
|
"logps/rejected": -92.50362396240234,
|
|
"loss": 1.0032,
|
|
"margin_dpo/margin_mean": 4.848970890045166,
|
|
"margin_dpo/margin_std": 7.03188943862915,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.19913935661315918,
|
|
"fcm_dpo/delta": 0.025188129395246506,
|
|
"fcm_dpo/margin": 4.890405178070068,
|
|
"fcm_dpo/q_t": 0.3381398320198059,
|
|
"grad_norm": 47.50596618652344,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 0.25481683015823364,
|
|
"logits/rejected": 0.21231190860271454,
|
|
"logps/chosen": -64.9234390258789,
|
|
"logps/ref_chosen": -57.3541145324707,
|
|
"logps/ref_rejected": -73.14434051513672,
|
|
"logps/rejected": -85.60404968261719,
|
|
"loss": 1.0075,
|
|
"margin_dpo/margin_mean": 4.89040470123291,
|
|
"margin_dpo/margin_std": 7.159416198730469,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.2018500119447708,
|
|
"fcm_dpo/delta": 0.039844244718551636,
|
|
"fcm_dpo/margin": 4.77426815032959,
|
|
"fcm_dpo/q_t": 0.3346126675605774,
|
|
"grad_norm": 52.080108642578125,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 0.302755206823349,
|
|
"logits/rejected": 0.2517249584197998,
|
|
"logps/chosen": -63.728919982910156,
|
|
"logps/ref_chosen": -56.0127067565918,
|
|
"logps/ref_rejected": -77.16522216796875,
|
|
"logps/rejected": -89.65570068359375,
|
|
"loss": 1.0162,
|
|
"margin_dpo/margin_mean": 4.774267673492432,
|
|
"margin_dpo/margin_std": 7.161828517913818,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.1958218663930893,
|
|
"fcm_dpo/delta": -0.1253487765789032,
|
|
"fcm_dpo/margin": 5.685278415679932,
|
|
"fcm_dpo/q_t": 0.3043304681777954,
|
|
"grad_norm": 44.7454833984375,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.27173739671707153,
|
|
"logits/rejected": 0.23606888949871063,
|
|
"logps/chosen": -68.17321014404297,
|
|
"logps/ref_chosen": -60.5894660949707,
|
|
"logps/ref_rejected": -74.34771728515625,
|
|
"logps/rejected": -87.61674499511719,
|
|
"loss": 0.9031,
|
|
"margin_dpo/margin_mean": 5.685278415679932,
|
|
"margin_dpo/margin_std": 7.15541934967041,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.18476364016532898,
|
|
"fcm_dpo/delta": 0.004565200302749872,
|
|
"fcm_dpo/margin": 5.378830909729004,
|
|
"fcm_dpo/q_t": 0.3307590186595917,
|
|
"grad_norm": 45.189571380615234,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 0.2524837553501129,
|
|
"logits/rejected": 0.1859164535999298,
|
|
"logps/chosen": -62.0955924987793,
|
|
"logps/ref_chosen": -54.77838897705078,
|
|
"logps/ref_rejected": -78.102783203125,
|
|
"logps/rejected": -90.79881286621094,
|
|
"loss": 0.9849,
|
|
"margin_dpo/margin_mean": 5.378830909729004,
|
|
"margin_dpo/margin_std": 7.575669765472412,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.20738473534584045,
|
|
"fcm_dpo/delta": 0.18134655058383942,
|
|
"fcm_dpo/margin": 3.9983153343200684,
|
|
"fcm_dpo/q_t": 0.3663412928581238,
|
|
"grad_norm": 58.46778869628906,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 0.27015766501426697,
|
|
"logits/rejected": 0.23463740944862366,
|
|
"logps/chosen": -66.70384216308594,
|
|
"logps/ref_chosen": -58.45500564575195,
|
|
"logps/ref_rejected": -70.7367172241211,
|
|
"logps/rejected": -82.9838638305664,
|
|
"loss": 1.122,
|
|
"margin_dpo/margin_mean": 3.9983153343200684,
|
|
"margin_dpo/margin_std": 7.113587856292725,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.2267770618200302,
|
|
"eval_fcm_dpo/delta": -0.0016693489160388708,
|
|
"eval_fcm_dpo/margin": 4.408850193023682,
|
|
"eval_fcm_dpo/q_t": 0.34116730093955994,
|
|
"eval_logits/chosen": 0.27240264415740967,
|
|
"eval_logits/rejected": 0.2290111631155014,
|
|
"eval_logps/chosen": -82.5570068359375,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -91.6553726196289,
|
|
"eval_loss": 0.5467005968093872,
|
|
"eval_margin_dpo/margin_mean": 4.40885066986084,
|
|
"eval_margin_dpo/margin_std": 7.250354290008545,
|
|
"eval_runtime": 38.3062,
|
|
"eval_samples_per_second": 60.121,
|
|
"eval_steps_per_second": 1.88,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.20211096107959747,
|
|
"fcm_dpo/delta": -0.07903344184160233,
|
|
"fcm_dpo/margin": 5.284127235412598,
|
|
"fcm_dpo/q_t": 0.31968027353286743,
|
|
"grad_norm": 52.800880432128906,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 0.2609863877296448,
|
|
"logits/rejected": 0.21765032410621643,
|
|
"logps/chosen": -66.56036376953125,
|
|
"logps/ref_chosen": -59.87483596801758,
|
|
"logps/ref_rejected": -75.75318908691406,
|
|
"logps/rejected": -87.72285461425781,
|
|
"loss": 0.9413,
|
|
"margin_dpo/margin_mean": 5.2841267585754395,
|
|
"margin_dpo/margin_std": 7.117745399475098,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.19957733154296875,
|
|
"fcm_dpo/delta": -0.02773882821202278,
|
|
"fcm_dpo/margin": 5.118582248687744,
|
|
"fcm_dpo/q_t": 0.3286603093147278,
|
|
"grad_norm": 64.0735092163086,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 0.2576659619808197,
|
|
"logits/rejected": 0.22138860821723938,
|
|
"logps/chosen": -67.77408599853516,
|
|
"logps/ref_chosen": -60.35883712768555,
|
|
"logps/ref_rejected": -81.3543930053711,
|
|
"logps/rejected": -93.88822937011719,
|
|
"loss": 0.964,
|
|
"margin_dpo/margin_mean": 5.118581771850586,
|
|
"margin_dpo/margin_std": 7.064711093902588,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.19206738471984863,
|
|
"fcm_dpo/delta": -0.05985846370458603,
|
|
"fcm_dpo/margin": 5.479418754577637,
|
|
"fcm_dpo/q_t": 0.31544992327690125,
|
|
"grad_norm": 52.961814880371094,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 0.22922465205192566,
|
|
"logits/rejected": 0.19491711258888245,
|
|
"logps/chosen": -66.57718658447266,
|
|
"logps/ref_chosen": -59.17219161987305,
|
|
"logps/ref_rejected": -79.92167663574219,
|
|
"logps/rejected": -92.80609130859375,
|
|
"loss": 0.9192,
|
|
"margin_dpo/margin_mean": 5.4794182777404785,
|
|
"margin_dpo/margin_std": 6.942657470703125,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.19354508817195892,
|
|
"fcm_dpo/delta": 0.08557742834091187,
|
|
"fcm_dpo/margin": 4.7475481033325195,
|
|
"fcm_dpo/q_t": 0.3413916826248169,
|
|
"grad_norm": 58.06659698486328,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 0.2654898166656494,
|
|
"logits/rejected": 0.21224336326122284,
|
|
"logps/chosen": -65.464599609375,
|
|
"logps/ref_chosen": -58.052696228027344,
|
|
"logps/ref_rejected": -78.37252807617188,
|
|
"logps/rejected": -90.53197479248047,
|
|
"loss": 0.9722,
|
|
"margin_dpo/margin_mean": 4.7475481033325195,
|
|
"margin_dpo/margin_std": 6.606284141540527,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.20503361523151398,
|
|
"fcm_dpo/delta": 0.02592085301876068,
|
|
"fcm_dpo/margin": 4.755415916442871,
|
|
"fcm_dpo/q_t": 0.3336712718009949,
|
|
"grad_norm": 52.6633186340332,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 0.26381465792655945,
|
|
"logits/rejected": 0.19538459181785583,
|
|
"logps/chosen": -64.57536315917969,
|
|
"logps/ref_chosen": -56.957862854003906,
|
|
"logps/ref_rejected": -82.68255615234375,
|
|
"logps/rejected": -95.05546569824219,
|
|
"loss": 0.9651,
|
|
"margin_dpo/margin_mean": 4.755415916442871,
|
|
"margin_dpo/margin_std": 6.575987815856934,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.18615694344043732,
|
|
"fcm_dpo/delta": -0.13678616285324097,
|
|
"fcm_dpo/margin": 6.003110408782959,
|
|
"fcm_dpo/q_t": 0.30930259823799133,
|
|
"grad_norm": 38.42726135253906,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 0.26059260964393616,
|
|
"logits/rejected": 0.20223116874694824,
|
|
"logps/chosen": -63.660064697265625,
|
|
"logps/ref_chosen": -56.71510696411133,
|
|
"logps/ref_rejected": -82.94544219970703,
|
|
"logps/rejected": -95.89350128173828,
|
|
"loss": 0.8913,
|
|
"margin_dpo/margin_mean": 6.003109931945801,
|
|
"margin_dpo/margin_std": 7.36514949798584,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.17870579659938812,
|
|
"fcm_dpo/delta": 0.08864951133728027,
|
|
"fcm_dpo/margin": 5.159267425537109,
|
|
"fcm_dpo/q_t": 0.34538301825523376,
|
|
"grad_norm": 59.73903274536133,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 0.25905171036720276,
|
|
"logits/rejected": 0.21522016823291779,
|
|
"logps/chosen": -66.6440658569336,
|
|
"logps/ref_chosen": -59.33793258666992,
|
|
"logps/ref_rejected": -75.01703643798828,
|
|
"logps/rejected": -87.4824447631836,
|
|
"loss": 1.0145,
|
|
"margin_dpo/margin_mean": 5.159267425537109,
|
|
"margin_dpo/margin_std": 7.501204013824463,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.18961693346500397,
|
|
"fcm_dpo/delta": -0.015742216259241104,
|
|
"fcm_dpo/margin": 5.340132713317871,
|
|
"fcm_dpo/q_t": 0.32362303137779236,
|
|
"grad_norm": 49.96174240112305,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 0.2457093894481659,
|
|
"logits/rejected": 0.1768445074558258,
|
|
"logps/chosen": -66.05986022949219,
|
|
"logps/ref_chosen": -58.1605339050293,
|
|
"logps/ref_rejected": -79.85365295410156,
|
|
"logps/rejected": -93.09309387207031,
|
|
"loss": 0.9413,
|
|
"margin_dpo/margin_mean": 5.340132713317871,
|
|
"margin_dpo/margin_std": 7.175882816314697,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.20850110054016113,
|
|
"fcm_dpo/delta": 0.11010245233774185,
|
|
"fcm_dpo/margin": 4.303619384765625,
|
|
"fcm_dpo/q_t": 0.3440888524055481,
|
|
"grad_norm": 55.303855895996094,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 0.235604926943779,
|
|
"logits/rejected": 0.20262756943702698,
|
|
"logps/chosen": -71.58079528808594,
|
|
"logps/ref_chosen": -63.45180130004883,
|
|
"logps/ref_rejected": -74.18285369873047,
|
|
"logps/rejected": -86.61546325683594,
|
|
"loss": 1.0835,
|
|
"margin_dpo/margin_mean": 4.303619384765625,
|
|
"margin_dpo/margin_std": 7.141520023345947,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.19852934777736664,
|
|
"fcm_dpo/delta": -0.0924367681145668,
|
|
"fcm_dpo/margin": 5.4358906745910645,
|
|
"fcm_dpo/q_t": 0.31767454743385315,
|
|
"grad_norm": 64.30652618408203,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 0.2805251479148865,
|
|
"logits/rejected": 0.22169442474842072,
|
|
"logps/chosen": -67.74933624267578,
|
|
"logps/ref_chosen": -59.75496292114258,
|
|
"logps/ref_rejected": -84.31481170654297,
|
|
"logps/rejected": -97.74507141113281,
|
|
"loss": 0.9544,
|
|
"margin_dpo/margin_mean": 5.4358906745910645,
|
|
"margin_dpo/margin_std": 7.351178169250488,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.19725769758224487,
|
|
"fcm_dpo/delta": 0.0005639016744680703,
|
|
"fcm_dpo/margin": 5.0511932373046875,
|
|
"fcm_dpo/q_t": 0.32906073331832886,
|
|
"grad_norm": 39.39985656738281,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 0.2633139193058014,
|
|
"logits/rejected": 0.1995994746685028,
|
|
"logps/chosen": -65.23994445800781,
|
|
"logps/ref_chosen": -57.817848205566406,
|
|
"logps/ref_rejected": -79.81755065917969,
|
|
"logps/rejected": -92.29084014892578,
|
|
"loss": 0.9413,
|
|
"margin_dpo/margin_mean": 5.0511932373046875,
|
|
"margin_dpo/margin_std": 6.741064548492432,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.18813875317573547,
|
|
"fcm_dpo/delta": -0.03273053467273712,
|
|
"fcm_dpo/margin": 5.455845832824707,
|
|
"fcm_dpo/q_t": 0.3237132430076599,
|
|
"grad_norm": 46.501102447509766,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 0.3373282551765442,
|
|
"logits/rejected": 0.2789747714996338,
|
|
"logps/chosen": -67.12478637695312,
|
|
"logps/ref_chosen": -59.12651443481445,
|
|
"logps/ref_rejected": -79.42085266113281,
|
|
"logps/rejected": -92.8749771118164,
|
|
"loss": 0.9505,
|
|
"margin_dpo/margin_mean": 5.455845832824707,
|
|
"margin_dpo/margin_std": 7.354535102844238,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.0460260132617922,
|
|
"train_runtime": 1747.5645,
|
|
"train_samples_per_second": 24.226,
|
|
"train_steps_per_second": 0.378
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|