Model: jackf857/llama-3-8b-base-new-dpo-harmless-s_star0.4-q_t0.4 Source: Original Platform
2622 lines
95 KiB
JSON
2622 lines
95 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 200,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0013532638549804688,
|
|
"fcm_dpo/q_t": 0.5000336766242981,
|
|
"grad_norm": 28.21938133239746,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 0.13337239623069763,
|
|
"logits/rejected": 0.12492948770523071,
|
|
"logps/chosen": -64.5841293334961,
|
|
"logps/ref_chosen": -64.61280822753906,
|
|
"logps/ref_rejected": -64.17195129394531,
|
|
"logps/rejected": -64.14192199707031,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.0013527870178222656,
|
|
"margin_dpo/margin_std": 0.2561596930027008,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.012731105089187622,
|
|
"fcm_dpo/q_t": 0.49968191981315613,
|
|
"grad_norm": 29.578828811645508,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 0.09623775631189346,
|
|
"logits/rejected": 0.06788332760334015,
|
|
"logps/chosen": -65.3349380493164,
|
|
"logps/ref_chosen": -65.34695434570312,
|
|
"logps/ref_rejected": -79.315673828125,
|
|
"logps/rejected": -79.31640625,
|
|
"loss": 1.3853,
|
|
"margin_dpo/margin_mean": 0.012730807065963745,
|
|
"margin_dpo/margin_std": 0.3051193654537201,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0006597042083740234,
|
|
"fcm_dpo/q_t": 0.5000167489051819,
|
|
"grad_norm": 29.6705322265625,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 0.10582169145345688,
|
|
"logits/rejected": 0.06683535873889923,
|
|
"logps/chosen": -56.6657829284668,
|
|
"logps/ref_chosen": -56.65692901611328,
|
|
"logps/ref_rejected": -80.12786865234375,
|
|
"logps/rejected": -80.13607025146484,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.00065990089206025,
|
|
"margin_dpo/margin_std": 0.3203383684158325,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.01577478088438511,
|
|
"fcm_dpo/q_t": 0.49960583448410034,
|
|
"grad_norm": 32.982215881347656,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 0.08618224412202835,
|
|
"logits/rejected": 0.0566771999001503,
|
|
"logps/chosen": -60.09851837158203,
|
|
"logps/ref_chosen": -60.09392166137695,
|
|
"logps/ref_rejected": -78.99056243896484,
|
|
"logps/rejected": -79.01094818115234,
|
|
"loss": 1.385,
|
|
"margin_dpo/margin_mean": 0.01577501930296421,
|
|
"margin_dpo/margin_std": 0.3348791301250458,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.004742377903312445,
|
|
"fcm_dpo/q_t": 0.5001183748245239,
|
|
"grad_norm": 29.284229278564453,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 0.09735535085201263,
|
|
"logits/rejected": 0.06951850652694702,
|
|
"logps/chosen": -55.4586296081543,
|
|
"logps/ref_chosen": -55.464561462402344,
|
|
"logps/ref_rejected": -77.40013122558594,
|
|
"logps/rejected": -77.38945007324219,
|
|
"loss": 1.387,
|
|
"margin_dpo/margin_mean": -0.004742181394249201,
|
|
"margin_dpo/margin_std": 0.29244670271873474,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.00360795552842319,
|
|
"fcm_dpo/q_t": 0.4999099671840668,
|
|
"grad_norm": 29.778076171875,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 0.10357926040887833,
|
|
"logits/rejected": 0.07201702892780304,
|
|
"logps/chosen": -60.72992706298828,
|
|
"logps/ref_chosen": -60.711814880371094,
|
|
"logps/ref_rejected": -82.71756744384766,
|
|
"logps/rejected": -82.7392807006836,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.0036078630946576595,
|
|
"margin_dpo/margin_std": 0.30398499965667725,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.010024601593613625,
|
|
"fcm_dpo/q_t": 0.5002505779266357,
|
|
"grad_norm": 30.504179000854492,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 0.1048557385802269,
|
|
"logits/rejected": 0.0802190899848938,
|
|
"logps/chosen": -60.91057205200195,
|
|
"logps/ref_chosen": -60.880210876464844,
|
|
"logps/ref_rejected": -78.44148254394531,
|
|
"logps/rejected": -78.4618148803711,
|
|
"loss": 1.3875,
|
|
"margin_dpo/margin_mean": -0.0100246611982584,
|
|
"margin_dpo/margin_std": 0.30975908041000366,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.013202684931457043,
|
|
"fcm_dpo/q_t": 0.49967002868652344,
|
|
"grad_norm": 27.877357482910156,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 0.08469098061323166,
|
|
"logits/rejected": 0.057735610753297806,
|
|
"logps/chosen": -62.28917694091797,
|
|
"logps/ref_chosen": -62.248138427734375,
|
|
"logps/ref_rejected": -79.56475830078125,
|
|
"logps/rejected": -79.61898803710938,
|
|
"loss": 1.3853,
|
|
"margin_dpo/margin_mean": 0.013202887959778309,
|
|
"margin_dpo/margin_std": 0.32241854071617126,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.05649406835436821,
|
|
"fcm_dpo/q_t": 0.4985879957675934,
|
|
"grad_norm": 31.444271087646484,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 0.11410780251026154,
|
|
"logits/rejected": 0.06925268471240997,
|
|
"logps/chosen": -58.951148986816406,
|
|
"logps/ref_chosen": -58.87812423706055,
|
|
"logps/ref_rejected": -84.22982025146484,
|
|
"logps/rejected": -84.35932922363281,
|
|
"loss": 1.381,
|
|
"margin_dpo/margin_mean": 0.056494224816560745,
|
|
"margin_dpo/margin_std": 0.3627670109272003,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06682233512401581,
|
|
"fcm_dpo/q_t": 0.498330295085907,
|
|
"grad_norm": 31.839420318603516,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 0.05706251785159111,
|
|
"logits/rejected": 0.03058524802327156,
|
|
"logps/chosen": -66.00699615478516,
|
|
"logps/ref_chosen": -65.88298034667969,
|
|
"logps/ref_rejected": -83.87881469726562,
|
|
"logps/rejected": -84.06964111328125,
|
|
"loss": 1.3801,
|
|
"margin_dpo/margin_mean": 0.06682238727807999,
|
|
"margin_dpo/margin_std": 0.4172392785549164,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09804753959178925,
|
|
"fcm_dpo/q_t": 0.4975499212741852,
|
|
"grad_norm": 27.911935806274414,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 0.09519219398498535,
|
|
"logits/rejected": 0.06103789061307907,
|
|
"logps/chosen": -55.37559127807617,
|
|
"logps/ref_chosen": -55.172386169433594,
|
|
"logps/ref_rejected": -69.63300323486328,
|
|
"logps/rejected": -69.93424224853516,
|
|
"loss": 1.377,
|
|
"margin_dpo/margin_mean": 0.09804768860340118,
|
|
"margin_dpo/margin_std": 0.4135734438896179,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.18540987372398376,
|
|
"fcm_dpo/q_t": 0.4953702390193939,
|
|
"grad_norm": 31.428316116333008,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 0.0665198341012001,
|
|
"logits/rejected": 0.030963122844696045,
|
|
"logps/chosen": -57.539833068847656,
|
|
"logps/ref_chosen": -57.193580627441406,
|
|
"logps/ref_rejected": -79.69940948486328,
|
|
"logps/rejected": -80.23106384277344,
|
|
"loss": 1.3686,
|
|
"margin_dpo/margin_mean": 0.18540982902050018,
|
|
"margin_dpo/margin_std": 0.5479583144187927,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.20901694893836975,
|
|
"fcm_dpo/q_t": 0.4947921633720398,
|
|
"grad_norm": 29.466659545898438,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 0.11891994625329971,
|
|
"logits/rejected": 0.0848398357629776,
|
|
"logps/chosen": -60.59447479248047,
|
|
"logps/ref_chosen": -60.068870544433594,
|
|
"logps/ref_rejected": -74.41178894042969,
|
|
"logps/rejected": -75.14640808105469,
|
|
"loss": 1.3671,
|
|
"margin_dpo/margin_mean": 0.20901694893836975,
|
|
"margin_dpo/margin_std": 0.7362244129180908,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3001774847507477,
|
|
"fcm_dpo/q_t": 0.4925141930580139,
|
|
"grad_norm": 30.807130813598633,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 0.13861653208732605,
|
|
"logits/rejected": 0.10865757614374161,
|
|
"logps/chosen": -58.88816452026367,
|
|
"logps/ref_chosen": -58.1558952331543,
|
|
"logps/ref_rejected": -76.06512451171875,
|
|
"logps/rejected": -77.09757995605469,
|
|
"loss": 1.3589,
|
|
"margin_dpo/margin_mean": 0.3001771569252014,
|
|
"margin_dpo/margin_std": 0.9452616572380066,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.39394986629486084,
|
|
"fcm_dpo/q_t": 0.49018916487693787,
|
|
"grad_norm": 28.67983627319336,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 0.10006751120090485,
|
|
"logits/rejected": 0.06497758626937866,
|
|
"logps/chosen": -68.39962005615234,
|
|
"logps/ref_chosen": -67.35506439208984,
|
|
"logps/ref_rejected": -82.24962615966797,
|
|
"logps/rejected": -83.6881332397461,
|
|
"loss": 1.3507,
|
|
"margin_dpo/margin_mean": 0.39395004510879517,
|
|
"margin_dpo/margin_std": 1.1363012790679932,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5148524045944214,
|
|
"fcm_dpo/q_t": 0.48723092675209045,
|
|
"grad_norm": 26.078073501586914,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 0.10058212280273438,
|
|
"logits/rejected": 0.06556755304336548,
|
|
"logps/chosen": -58.295387268066406,
|
|
"logps/ref_chosen": -56.86763381958008,
|
|
"logps/ref_rejected": -72.56938934326172,
|
|
"logps/rejected": -74.51200103759766,
|
|
"loss": 1.3408,
|
|
"margin_dpo/margin_mean": 0.5148526430130005,
|
|
"margin_dpo/margin_std": 1.398964762687683,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6505656242370605,
|
|
"fcm_dpo/q_t": 0.48392024636268616,
|
|
"grad_norm": 28.3653621673584,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 0.13842633366584778,
|
|
"logits/rejected": 0.09329269081354141,
|
|
"logps/chosen": -59.446983337402344,
|
|
"logps/ref_chosen": -57.687095642089844,
|
|
"logps/ref_rejected": -78.06813049316406,
|
|
"logps/rejected": -80.47857666015625,
|
|
"loss": 1.3297,
|
|
"margin_dpo/margin_mean": 0.6505654454231262,
|
|
"margin_dpo/margin_std": 1.6704308986663818,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0345122814178467,
|
|
"fcm_dpo/q_t": 0.47443389892578125,
|
|
"grad_norm": 26.892316818237305,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 0.1578936129808426,
|
|
"logits/rejected": 0.11112338304519653,
|
|
"logps/chosen": -59.1240234375,
|
|
"logps/ref_chosen": -56.96040725708008,
|
|
"logps/ref_rejected": -75.22166442871094,
|
|
"logps/rejected": -78.41979217529297,
|
|
"loss": 1.2938,
|
|
"margin_dpo/margin_mean": 1.0345120429992676,
|
|
"margin_dpo/margin_std": 1.7866588830947876,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1209580898284912,
|
|
"fcm_dpo/q_t": 0.47252073884010315,
|
|
"grad_norm": 29.175752639770508,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 0.20981314778327942,
|
|
"logits/rejected": 0.16077354550361633,
|
|
"logps/chosen": -60.326324462890625,
|
|
"logps/ref_chosen": -57.41730499267578,
|
|
"logps/ref_rejected": -80.87986755371094,
|
|
"logps/rejected": -84.90985870361328,
|
|
"loss": 1.2935,
|
|
"margin_dpo/margin_mean": 1.1209580898284912,
|
|
"margin_dpo/margin_std": 2.4879467487335205,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.4390569925308228,
|
|
"fcm_dpo/q_t": 0.46531516313552856,
|
|
"grad_norm": 29.310806274414062,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 0.1912127137184143,
|
|
"logits/rejected": 0.1479342132806778,
|
|
"logps/chosen": -57.60295867919922,
|
|
"logps/ref_chosen": -54.08087158203125,
|
|
"logps/ref_rejected": -76.15860748291016,
|
|
"logps/rejected": -81.1197509765625,
|
|
"loss": 1.2749,
|
|
"margin_dpo/margin_mean": 1.4390567541122437,
|
|
"margin_dpo/margin_std": 3.292581558227539,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.211411714553833,
|
|
"fcm_dpo/q_t": 0.47177332639694214,
|
|
"grad_norm": 33.16886520385742,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 0.2135041207075119,
|
|
"logits/rejected": 0.18092623353004456,
|
|
"logps/chosen": -68.66203308105469,
|
|
"logps/ref_chosen": -63.875038146972656,
|
|
"logps/ref_rejected": -82.077880859375,
|
|
"logps/rejected": -88.07627868652344,
|
|
"loss": 1.3247,
|
|
"margin_dpo/margin_mean": 1.211411714553833,
|
|
"margin_dpo/margin_std": 4.652392387390137,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.7167237997055054,
|
|
"fcm_dpo/q_t": 0.460963636636734,
|
|
"grad_norm": 30.796314239501953,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 0.2201795130968094,
|
|
"logits/rejected": 0.16930809617042542,
|
|
"logps/chosen": -67.5002670288086,
|
|
"logps/ref_chosen": -62.572479248046875,
|
|
"logps/ref_rejected": -80.93415069580078,
|
|
"logps/rejected": -87.57865905761719,
|
|
"loss": 1.283,
|
|
"margin_dpo/margin_mean": 1.7167232036590576,
|
|
"margin_dpo/margin_std": 4.936122894287109,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5425608158111572,
|
|
"fcm_dpo/q_t": 0.4638938009738922,
|
|
"grad_norm": 44.27668380737305,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 0.1879446804523468,
|
|
"logits/rejected": 0.16481170058250427,
|
|
"logps/chosen": -74.3304672241211,
|
|
"logps/ref_chosen": -68.67534637451172,
|
|
"logps/ref_rejected": -78.82028198242188,
|
|
"logps/rejected": -86.01795959472656,
|
|
"loss": 1.3142,
|
|
"margin_dpo/margin_mean": 1.5425606966018677,
|
|
"margin_dpo/margin_std": 5.584108829498291,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.017230272293091,
|
|
"fcm_dpo/q_t": 0.45290979743003845,
|
|
"grad_norm": 28.293357849121094,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 0.23582999408245087,
|
|
"logits/rejected": 0.18265566229820251,
|
|
"logps/chosen": -64.07593536376953,
|
|
"logps/ref_chosen": -58.65370559692383,
|
|
"logps/ref_rejected": -81.89688873291016,
|
|
"logps/rejected": -89.33635711669922,
|
|
"loss": 1.2493,
|
|
"margin_dpo/margin_mean": 2.0172300338745117,
|
|
"margin_dpo/margin_std": 4.730603218078613,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.405763864517212,
|
|
"fcm_dpo/q_t": 0.4429899752140045,
|
|
"grad_norm": 28.859172821044922,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 0.24149473011493683,
|
|
"logits/rejected": 0.19213181734085083,
|
|
"logps/chosen": -61.583091735839844,
|
|
"logps/ref_chosen": -56.16423797607422,
|
|
"logps/ref_rejected": -75.87689971923828,
|
|
"logps/rejected": -83.70152282714844,
|
|
"loss": 1.208,
|
|
"margin_dpo/margin_mean": 2.405764102935791,
|
|
"margin_dpo/margin_std": 4.32560396194458,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.015474319458008,
|
|
"fcm_dpo/q_t": 0.4313550889492035,
|
|
"grad_norm": 29.275339126586914,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 0.26413899660110474,
|
|
"logits/rejected": 0.20717649161815643,
|
|
"logps/chosen": -66.0591049194336,
|
|
"logps/ref_chosen": -59.744285583496094,
|
|
"logps/ref_rejected": -86.77314758300781,
|
|
"logps/rejected": -96.10343933105469,
|
|
"loss": 1.1669,
|
|
"margin_dpo/margin_mean": 3.015474319458008,
|
|
"margin_dpo/margin_std": 5.040165901184082,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 0.09993546456098557,
|
|
"fcm_dpo/delta": -0.0032323698978871107,
|
|
"fcm_dpo/margin": 2.8389410972595215,
|
|
"fcm_dpo/q_t": 0.4344428479671478,
|
|
"grad_norm": 30.352113723754883,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 0.1967930942773819,
|
|
"logits/rejected": 0.1665017306804657,
|
|
"logps/chosen": -71.52397918701172,
|
|
"logps/ref_chosen": -64.15296936035156,
|
|
"logps/ref_rejected": -75.17271423339844,
|
|
"logps/rejected": -85.38265991210938,
|
|
"loss": 1.1919,
|
|
"margin_dpo/margin_mean": 2.8389410972595215,
|
|
"margin_dpo/margin_std": 5.271792411804199,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 0.09838803857564926,
|
|
"fcm_dpo/delta": -0.024441083893179893,
|
|
"fcm_dpo/margin": 3.772266387939453,
|
|
"fcm_dpo/q_t": 0.4152294099330902,
|
|
"grad_norm": 24.730987548828125,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 0.26162657141685486,
|
|
"logits/rejected": 0.22124962508678436,
|
|
"logps/chosen": -65.22847747802734,
|
|
"logps/ref_chosen": -57.006690979003906,
|
|
"logps/ref_rejected": -73.71768188476562,
|
|
"logps/rejected": -85.71173095703125,
|
|
"loss": 1.1324,
|
|
"margin_dpo/margin_mean": 3.772266387939453,
|
|
"margin_dpo/margin_std": 5.984399318695068,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 0.09531065821647644,
|
|
"fcm_dpo/delta": -0.05013541504740715,
|
|
"fcm_dpo/margin": 4.44573450088501,
|
|
"fcm_dpo/q_t": 0.4060499668121338,
|
|
"grad_norm": 29.4134464263916,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 0.25046736001968384,
|
|
"logits/rejected": 0.2064397782087326,
|
|
"logps/chosen": -74.15589904785156,
|
|
"logps/ref_chosen": -63.36246871948242,
|
|
"logps/ref_rejected": -79.62621307373047,
|
|
"logps/rejected": -94.8653793334961,
|
|
"loss": 1.1281,
|
|
"margin_dpo/margin_mean": 4.445734977722168,
|
|
"margin_dpo/margin_std": 7.5196404457092285,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 0.08971674740314484,
|
|
"fcm_dpo/delta": -0.04721946269273758,
|
|
"fcm_dpo/margin": 4.761581897735596,
|
|
"fcm_dpo/q_t": 0.40657633543014526,
|
|
"grad_norm": 30.589385986328125,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 0.2786180078983307,
|
|
"logits/rejected": 0.2569752633571625,
|
|
"logps/chosen": -77.37989044189453,
|
|
"logps/ref_chosen": -65.01470184326172,
|
|
"logps/ref_rejected": -80.49073791503906,
|
|
"logps/rejected": -97.61750793457031,
|
|
"loss": 1.1506,
|
|
"margin_dpo/margin_mean": 4.761581897735596,
|
|
"margin_dpo/margin_std": 8.919352531433105,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 0.08510036766529083,
|
|
"fcm_dpo/delta": -0.11195192486047745,
|
|
"fcm_dpo/margin": 5.9401068687438965,
|
|
"fcm_dpo/q_t": 0.39034393429756165,
|
|
"grad_norm": 22.236682891845703,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 0.2792455554008484,
|
|
"logits/rejected": 0.23401157557964325,
|
|
"logps/chosen": -72.20897674560547,
|
|
"logps/ref_chosen": -59.19135284423828,
|
|
"logps/ref_rejected": -74.0339126586914,
|
|
"logps/rejected": -92.99165344238281,
|
|
"loss": 1.0828,
|
|
"margin_dpo/margin_mean": 5.9401068687438965,
|
|
"margin_dpo/margin_std": 9.036436080932617,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 0.07190684974193573,
|
|
"fcm_dpo/delta": -0.13100460171699524,
|
|
"fcm_dpo/margin": 7.103701591491699,
|
|
"fcm_dpo/q_t": 0.38805317878723145,
|
|
"grad_norm": 21.95441246032715,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 0.3055272698402405,
|
|
"logits/rejected": 0.2715781033039093,
|
|
"logps/chosen": -74.09068298339844,
|
|
"logps/ref_chosen": -60.93949508666992,
|
|
"logps/ref_rejected": -74.51151275634766,
|
|
"logps/rejected": -94.76640319824219,
|
|
"loss": 1.0779,
|
|
"margin_dpo/margin_mean": 7.103701114654541,
|
|
"margin_dpo/margin_std": 10.82844352722168,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 0.06413034349679947,
|
|
"fcm_dpo/delta": -0.11762239784002304,
|
|
"fcm_dpo/margin": 7.9449782371521,
|
|
"fcm_dpo/q_t": 0.38926568627357483,
|
|
"grad_norm": 20.171205520629883,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": 0.2932426929473877,
|
|
"logits/rejected": 0.25532081723213196,
|
|
"logps/chosen": -73.47798919677734,
|
|
"logps/ref_chosen": -58.763816833496094,
|
|
"logps/ref_rejected": -74.94743347167969,
|
|
"logps/rejected": -97.60658264160156,
|
|
"loss": 1.0755,
|
|
"margin_dpo/margin_mean": 7.9449782371521,
|
|
"margin_dpo/margin_std": 12.001934051513672,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 0.05613988637924194,
|
|
"fcm_dpo/delta": -0.1726417988538742,
|
|
"fcm_dpo/margin": 10.18576431274414,
|
|
"fcm_dpo/q_t": 0.3745308518409729,
|
|
"grad_norm": 17.679018020629883,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 0.3483496308326721,
|
|
"logits/rejected": 0.2956962287425995,
|
|
"logps/chosen": -72.38264465332031,
|
|
"logps/ref_chosen": -55.70417022705078,
|
|
"logps/ref_rejected": -76.59439849853516,
|
|
"logps/rejected": -103.45863342285156,
|
|
"loss": 1.0359,
|
|
"margin_dpo/margin_mean": 10.18576431274414,
|
|
"margin_dpo/margin_std": 13.736506462097168,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 0.04928728565573692,
|
|
"fcm_dpo/delta": -0.09952159970998764,
|
|
"fcm_dpo/margin": 10.129049301147461,
|
|
"fcm_dpo/q_t": 0.3913528621196747,
|
|
"grad_norm": 16.242919921875,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 0.3343687057495117,
|
|
"logits/rejected": 0.2925954759120941,
|
|
"logps/chosen": -76.33804321289062,
|
|
"logps/ref_chosen": -61.169105529785156,
|
|
"logps/ref_rejected": -77.21674346923828,
|
|
"logps/rejected": -102.5147476196289,
|
|
"loss": 1.0787,
|
|
"margin_dpo/margin_mean": 10.129049301147461,
|
|
"margin_dpo/margin_std": 15.241543769836426,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 0.04289032891392708,
|
|
"fcm_dpo/delta": -0.16773784160614014,
|
|
"fcm_dpo/margin": 12.669050216674805,
|
|
"fcm_dpo/q_t": 0.38352128863334656,
|
|
"grad_norm": 16.280622482299805,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 0.4107338786125183,
|
|
"logits/rejected": 0.3484509587287903,
|
|
"logps/chosen": -75.43938446044922,
|
|
"logps/ref_chosen": -59.24176788330078,
|
|
"logps/ref_rejected": -81.80384826660156,
|
|
"logps/rejected": -110.67051696777344,
|
|
"loss": 1.075,
|
|
"margin_dpo/margin_mean": 12.669050216674805,
|
|
"margin_dpo/margin_std": 19.41705322265625,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 0.03678743541240692,
|
|
"fcm_dpo/delta": -0.11334402859210968,
|
|
"fcm_dpo/margin": 13.086156845092773,
|
|
"fcm_dpo/q_t": 0.3945137560367584,
|
|
"grad_norm": 14.899900436401367,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 0.3705318868160248,
|
|
"logits/rejected": 0.3279619812965393,
|
|
"logps/chosen": -80.0919418334961,
|
|
"logps/ref_chosen": -63.24883270263672,
|
|
"logps/ref_rejected": -79.00736236572266,
|
|
"logps/rejected": -108.93663024902344,
|
|
"loss": 1.0966,
|
|
"margin_dpo/margin_mean": 13.086158752441406,
|
|
"margin_dpo/margin_std": 20.56978988647461,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 0.03294721618294716,
|
|
"fcm_dpo/delta": -0.11023982614278793,
|
|
"fcm_dpo/margin": 13.870523452758789,
|
|
"fcm_dpo/q_t": 0.39916402101516724,
|
|
"grad_norm": 14.874372482299805,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 0.43493133783340454,
|
|
"logits/rejected": 0.38104137778282166,
|
|
"logps/chosen": -73.21731567382812,
|
|
"logps/ref_chosen": -56.390625,
|
|
"logps/ref_rejected": -76.81001281738281,
|
|
"logps/rejected": -107.5072250366211,
|
|
"loss": 1.1048,
|
|
"margin_dpo/margin_mean": 13.870523452758789,
|
|
"margin_dpo/margin_std": 21.951461791992188,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 0.02976861596107483,
|
|
"fcm_dpo/delta": -0.130234032869339,
|
|
"fcm_dpo/margin": 17.27200698852539,
|
|
"fcm_dpo/q_t": 0.3894996643066406,
|
|
"grad_norm": 21.018056869506836,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 0.42057228088378906,
|
|
"logits/rejected": 0.36500033736228943,
|
|
"logps/chosen": -86.78327178955078,
|
|
"logps/ref_chosen": -68.25389099121094,
|
|
"logps/ref_rejected": -86.461181640625,
|
|
"logps/rejected": -122.26255798339844,
|
|
"loss": 1.0765,
|
|
"margin_dpo/margin_mean": 17.27200698852539,
|
|
"margin_dpo/margin_std": 26.312463760375977,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 0.026562869548797607,
|
|
"fcm_dpo/delta": -0.1006811112165451,
|
|
"fcm_dpo/margin": 17.410314559936523,
|
|
"fcm_dpo/q_t": 0.3985130786895752,
|
|
"grad_norm": 17.732221603393555,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 0.45999032258987427,
|
|
"logits/rejected": 0.4397885799407959,
|
|
"logps/chosen": -90.21788024902344,
|
|
"logps/ref_chosen": -62.1484260559082,
|
|
"logps/ref_rejected": -71.33458709716797,
|
|
"logps/rejected": -116.81434631347656,
|
|
"loss": 1.1108,
|
|
"margin_dpo/margin_mean": 17.41031265258789,
|
|
"margin_dpo/margin_std": 28.3375301361084,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 0.02375042252242565,
|
|
"fcm_dpo/delta": -0.12756529450416565,
|
|
"fcm_dpo/margin": 21.052818298339844,
|
|
"fcm_dpo/q_t": 0.3899378478527069,
|
|
"grad_norm": 14.28109073638916,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 0.5520531535148621,
|
|
"logits/rejected": 0.4885140061378479,
|
|
"logps/chosen": -89.1515121459961,
|
|
"logps/ref_chosen": -56.950096130371094,
|
|
"logps/ref_rejected": -78.66989135742188,
|
|
"logps/rejected": -131.92413330078125,
|
|
"loss": 1.0917,
|
|
"margin_dpo/margin_mean": 21.052818298339844,
|
|
"margin_dpo/margin_std": 32.75715637207031,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 0.022144686430692673,
|
|
"eval_logits/chosen": 0.5289739966392517,
|
|
"eval_logits/rejected": 0.47875019907951355,
|
|
"eval_logps/chosen": -112.04022979736328,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -137.68849182128906,
|
|
"eval_loss": 0.5616376996040344,
|
|
"eval_margin_dpo/margin_mean": 20.95873260498047,
|
|
"eval_margin_dpo/margin_std": 36.02712631225586,
|
|
"eval_runtime": 39.0532,
|
|
"eval_samples_per_second": 58.971,
|
|
"eval_steps_per_second": 1.844,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 0.020863929763436317,
|
|
"fcm_dpo/delta": -0.10121381282806396,
|
|
"fcm_dpo/margin": 23.369535446166992,
|
|
"fcm_dpo/q_t": 0.3925306797027588,
|
|
"grad_norm": 18.312232971191406,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 0.5444221496582031,
|
|
"logits/rejected": 0.47210827469825745,
|
|
"logps/chosen": -100.59618377685547,
|
|
"logps/ref_chosen": -57.99428176879883,
|
|
"logps/ref_rejected": -83.5367431640625,
|
|
"logps/rejected": -149.5081787109375,
|
|
"loss": 1.0877,
|
|
"margin_dpo/margin_mean": 23.369535446166992,
|
|
"margin_dpo/margin_std": 35.94400405883789,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 0.019625190645456314,
|
|
"fcm_dpo/delta": -0.06917699426412582,
|
|
"fcm_dpo/margin": 21.765804290771484,
|
|
"fcm_dpo/q_t": 0.4085807204246521,
|
|
"grad_norm": 16.501283645629883,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 0.5628946423530579,
|
|
"logits/rejected": 0.5068370699882507,
|
|
"logps/chosen": -115.35340881347656,
|
|
"logps/ref_chosen": -63.77195358276367,
|
|
"logps/ref_rejected": -82.56491088867188,
|
|
"logps/rejected": -155.9121856689453,
|
|
"loss": 1.1576,
|
|
"margin_dpo/margin_mean": 21.765806198120117,
|
|
"margin_dpo/margin_std": 41.01616668701172,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.01737585850059986,
|
|
"fcm_dpo/delta": -0.14525336027145386,
|
|
"fcm_dpo/margin": 31.006107330322266,
|
|
"fcm_dpo/q_t": 0.3825533390045166,
|
|
"grad_norm": 14.236336708068848,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 0.55656498670578,
|
|
"logits/rejected": 0.48811864852905273,
|
|
"logps/chosen": -107.94661712646484,
|
|
"logps/ref_chosen": -60.27800750732422,
|
|
"logps/ref_rejected": -83.91607666015625,
|
|
"logps/rejected": -162.59080505371094,
|
|
"loss": 1.0518,
|
|
"margin_dpo/margin_mean": 31.006107330322266,
|
|
"margin_dpo/margin_std": 43.699501037597656,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.01522024255245924,
|
|
"fcm_dpo/delta": -0.08151903748512268,
|
|
"fcm_dpo/margin": 28.928936004638672,
|
|
"fcm_dpo/q_t": 0.4007510244846344,
|
|
"grad_norm": 13.642237663269043,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 0.5403038263320923,
|
|
"logits/rejected": 0.47905245423316956,
|
|
"logps/chosen": -108.02642822265625,
|
|
"logps/ref_chosen": -60.88572311401367,
|
|
"logps/ref_rejected": -80.1805191040039,
|
|
"logps/rejected": -156.2501678466797,
|
|
"loss": 1.103,
|
|
"margin_dpo/margin_mean": 28.92893409729004,
|
|
"margin_dpo/margin_std": 44.418190002441406,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.014115704223513603,
|
|
"fcm_dpo/delta": -0.08850517123937607,
|
|
"fcm_dpo/margin": 32.46453857421875,
|
|
"fcm_dpo/q_t": 0.3981640040874481,
|
|
"grad_norm": 12.134458541870117,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 0.6014004945755005,
|
|
"logits/rejected": 0.5260181427001953,
|
|
"logps/chosen": -108.61814880371094,
|
|
"logps/ref_chosen": -61.02507781982422,
|
|
"logps/ref_rejected": -91.92439270019531,
|
|
"logps/rejected": -171.98199462890625,
|
|
"loss": 1.0861,
|
|
"margin_dpo/margin_mean": 32.464542388916016,
|
|
"margin_dpo/margin_std": 47.451393127441406,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.013391288928687572,
|
|
"fcm_dpo/delta": -0.04506213217973709,
|
|
"fcm_dpo/margin": 31.46584701538086,
|
|
"fcm_dpo/q_t": 0.4058153033256531,
|
|
"grad_norm": 13.718859672546387,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 0.6671124696731567,
|
|
"logits/rejected": 0.6181380748748779,
|
|
"logps/chosen": -106.39383697509766,
|
|
"logps/ref_chosen": -54.49797821044922,
|
|
"logps/ref_rejected": -71.96363830566406,
|
|
"logps/rejected": -155.32534790039062,
|
|
"loss": 1.1278,
|
|
"margin_dpo/margin_mean": 31.46584701538086,
|
|
"margin_dpo/margin_std": 53.070556640625,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.012218359857797623,
|
|
"fcm_dpo/delta": -0.10176394134759903,
|
|
"fcm_dpo/margin": 35.79335403442383,
|
|
"fcm_dpo/q_t": 0.4017709791660309,
|
|
"grad_norm": 15.619464874267578,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 0.6200426816940308,
|
|
"logits/rejected": 0.5975286364555359,
|
|
"logps/chosen": -124.5421142578125,
|
|
"logps/ref_chosen": -63.250282287597656,
|
|
"logps/ref_rejected": -73.09049987792969,
|
|
"logps/rejected": -170.17568969726562,
|
|
"loss": 1.1063,
|
|
"margin_dpo/margin_mean": 35.79335403442383,
|
|
"margin_dpo/margin_std": 54.02311325073242,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.011470427736639977,
|
|
"fcm_dpo/delta": -0.07215714454650879,
|
|
"fcm_dpo/margin": 40.14154815673828,
|
|
"fcm_dpo/q_t": 0.3966708779335022,
|
|
"grad_norm": 12.957676887512207,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 0.6564071774482727,
|
|
"logits/rejected": 0.601380467414856,
|
|
"logps/chosen": -138.31361389160156,
|
|
"logps/ref_chosen": -65.26150512695312,
|
|
"logps/ref_rejected": -87.60311126708984,
|
|
"logps/rejected": -200.7967529296875,
|
|
"loss": 1.0907,
|
|
"margin_dpo/margin_mean": 40.14154815673828,
|
|
"margin_dpo/margin_std": 59.86652755737305,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.010648809373378754,
|
|
"fcm_dpo/delta": -0.08149583637714386,
|
|
"fcm_dpo/margin": 43.929100036621094,
|
|
"fcm_dpo/q_t": 0.39428311586380005,
|
|
"grad_norm": 14.245344161987305,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 0.6480621099472046,
|
|
"logits/rejected": 0.6164118051528931,
|
|
"logps/chosen": -135.7429656982422,
|
|
"logps/ref_chosen": -65.73170471191406,
|
|
"logps/ref_rejected": -75.19642639160156,
|
|
"logps/rejected": -189.1367645263672,
|
|
"loss": 1.0693,
|
|
"margin_dpo/margin_mean": 43.92909622192383,
|
|
"margin_dpo/margin_std": 59.444854736328125,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.009915231727063656,
|
|
"fcm_dpo/delta": -0.041826874017715454,
|
|
"fcm_dpo/margin": 35.88407897949219,
|
|
"fcm_dpo/q_t": 0.4197370409965515,
|
|
"grad_norm": 14.5069580078125,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 0.7002509832382202,
|
|
"logits/rejected": 0.6972779035568237,
|
|
"logps/chosen": -155.23341369628906,
|
|
"logps/ref_chosen": -70.71224212646484,
|
|
"logps/ref_rejected": -76.12723541259766,
|
|
"logps/rejected": -196.532470703125,
|
|
"loss": 1.1785,
|
|
"margin_dpo/margin_mean": 35.88407516479492,
|
|
"margin_dpo/margin_std": 69.27667999267578,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.009629678912460804,
|
|
"fcm_dpo/delta": -0.012941457331180573,
|
|
"fcm_dpo/margin": 34.634307861328125,
|
|
"fcm_dpo/q_t": 0.4239214360713959,
|
|
"grad_norm": 15.92194938659668,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 0.6967302560806274,
|
|
"logits/rejected": 0.6524414420127869,
|
|
"logps/chosen": -145.23605346679688,
|
|
"logps/ref_chosen": -61.767662048339844,
|
|
"logps/ref_rejected": -77.38813018798828,
|
|
"logps/rejected": -195.49081420898438,
|
|
"loss": 1.1757,
|
|
"margin_dpo/margin_mean": 34.63430404663086,
|
|
"margin_dpo/margin_std": 64.64119720458984,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.009221619926393032,
|
|
"fcm_dpo/delta": -0.06601964682340622,
|
|
"fcm_dpo/margin": 48.99907684326172,
|
|
"fcm_dpo/q_t": 0.39814695715904236,
|
|
"grad_norm": 19.110883712768555,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 0.6849242448806763,
|
|
"logits/rejected": 0.603645384311676,
|
|
"logps/chosen": -148.38861083984375,
|
|
"logps/ref_chosen": -61.57584762573242,
|
|
"logps/ref_rejected": -91.87513732910156,
|
|
"logps/rejected": -227.6869659423828,
|
|
"loss": 1.0902,
|
|
"margin_dpo/margin_mean": 48.99907684326172,
|
|
"margin_dpo/margin_std": 72.31168365478516,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.008941135369241238,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 41.382564544677734,
|
|
"fcm_dpo/q_t": 0.41540035605430603,
|
|
"grad_norm": 11.394349098205566,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 0.643638551235199,
|
|
"logits/rejected": 0.5954387187957764,
|
|
"logps/chosen": -147.72586059570312,
|
|
"logps/ref_chosen": -65.75422668457031,
|
|
"logps/ref_rejected": -77.9569320678711,
|
|
"logps/rejected": -201.31112670898438,
|
|
"loss": 1.1392,
|
|
"margin_dpo/margin_mean": 41.38256072998047,
|
|
"margin_dpo/margin_std": 68.18827819824219,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.008837602101266384,
|
|
"fcm_dpo/delta": -0.02355731837451458,
|
|
"fcm_dpo/margin": 42.396751403808594,
|
|
"fcm_dpo/q_t": 0.41459059715270996,
|
|
"grad_norm": 15.444929122924805,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 0.6901830434799194,
|
|
"logits/rejected": 0.6426895260810852,
|
|
"logps/chosen": -138.05111694335938,
|
|
"logps/ref_chosen": -62.27649688720703,
|
|
"logps/ref_rejected": -76.56950378417969,
|
|
"logps/rejected": -194.7408905029297,
|
|
"loss": 1.1371,
|
|
"margin_dpo/margin_mean": 42.396751403808594,
|
|
"margin_dpo/margin_std": 68.82071685791016,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.008553928695619106,
|
|
"fcm_dpo/delta": -0.03487258031964302,
|
|
"fcm_dpo/margin": 46.58140182495117,
|
|
"fcm_dpo/q_t": 0.40970954298973083,
|
|
"grad_norm": 20.471668243408203,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 0.6841222047805786,
|
|
"logits/rejected": 0.6400257349014282,
|
|
"logps/chosen": -143.00296020507812,
|
|
"logps/ref_chosen": -61.854393005371094,
|
|
"logps/ref_rejected": -77.22246551513672,
|
|
"logps/rejected": -204.95242309570312,
|
|
"loss": 1.1265,
|
|
"margin_dpo/margin_mean": 46.58140182495117,
|
|
"margin_dpo/margin_std": 74.56303405761719,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.008292925544083118,
|
|
"fcm_dpo/delta": -0.060983072966337204,
|
|
"fcm_dpo/margin": 49.10211944580078,
|
|
"fcm_dpo/q_t": 0.4059298038482666,
|
|
"grad_norm": 13.393473625183105,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 0.6645344495773315,
|
|
"logits/rejected": 0.6346439719200134,
|
|
"logps/chosen": -146.82679748535156,
|
|
"logps/ref_chosen": -61.29896926879883,
|
|
"logps/ref_rejected": -73.35762023925781,
|
|
"logps/rejected": -207.98757934570312,
|
|
"loss": 1.1041,
|
|
"margin_dpo/margin_mean": 49.10211944580078,
|
|
"margin_dpo/margin_std": 69.17396545410156,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.007784596644341946,
|
|
"fcm_dpo/delta": -0.027562415227293968,
|
|
"fcm_dpo/margin": 48.093055725097656,
|
|
"fcm_dpo/q_t": 0.41425347328186035,
|
|
"grad_norm": 16.781484603881836,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 0.6967326402664185,
|
|
"logits/rejected": 0.6534906625747681,
|
|
"logps/chosen": -159.16049194335938,
|
|
"logps/ref_chosen": -63.435462951660156,
|
|
"logps/ref_rejected": -79.73661804199219,
|
|
"logps/rejected": -223.55471801757812,
|
|
"loss": 1.1319,
|
|
"margin_dpo/margin_mean": 48.093055725097656,
|
|
"margin_dpo/margin_std": 75.91288757324219,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.007469588425010443,
|
|
"fcm_dpo/delta": -0.042763665318489075,
|
|
"fcm_dpo/margin": 55.20690155029297,
|
|
"fcm_dpo/q_t": 0.40540584921836853,
|
|
"grad_norm": 15.624042510986328,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 0.711457371711731,
|
|
"logits/rejected": 0.650222659111023,
|
|
"logps/chosen": -162.69920349121094,
|
|
"logps/ref_chosen": -57.696876525878906,
|
|
"logps/ref_rejected": -79.78132629394531,
|
|
"logps/rejected": -239.9905548095703,
|
|
"loss": 1.0921,
|
|
"margin_dpo/margin_mean": 55.20690155029297,
|
|
"margin_dpo/margin_std": 75.91992950439453,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.00719553604722023,
|
|
"fcm_dpo/delta": -0.038509003818035126,
|
|
"fcm_dpo/margin": 52.451446533203125,
|
|
"fcm_dpo/q_t": 0.4142046570777893,
|
|
"grad_norm": 17.099523544311523,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 0.7705697417259216,
|
|
"logits/rejected": 0.6970144510269165,
|
|
"logps/chosen": -168.26991271972656,
|
|
"logps/ref_chosen": -55.430633544921875,
|
|
"logps/ref_rejected": -78.1390151977539,
|
|
"logps/rejected": -243.4297332763672,
|
|
"loss": 1.1278,
|
|
"margin_dpo/margin_mean": 52.451446533203125,
|
|
"margin_dpo/margin_std": 81.40741729736328,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.007098735310137272,
|
|
"fcm_dpo/delta": -0.0035702171735465527,
|
|
"fcm_dpo/margin": 45.36979293823242,
|
|
"fcm_dpo/q_t": 0.4261544346809387,
|
|
"grad_norm": 12.7843656539917,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 0.707840621471405,
|
|
"logits/rejected": 0.6562869548797607,
|
|
"logps/chosen": -185.88197326660156,
|
|
"logps/ref_chosen": -61.207069396972656,
|
|
"logps/ref_rejected": -75.23294067382812,
|
|
"logps/rejected": -245.2776336669922,
|
|
"loss": 1.1763,
|
|
"margin_dpo/margin_mean": 45.36979293823242,
|
|
"margin_dpo/margin_std": 83.63997650146484,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.007054163608700037,
|
|
"fcm_dpo/delta": -0.011994509026408195,
|
|
"fcm_dpo/margin": 47.528289794921875,
|
|
"fcm_dpo/q_t": 0.423266738653183,
|
|
"grad_norm": 14.771221160888672,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 0.6945724487304688,
|
|
"logits/rejected": 0.6594172716140747,
|
|
"logps/chosen": -184.41525268554688,
|
|
"logps/ref_chosen": -63.06663131713867,
|
|
"logps/ref_rejected": -78.45845031738281,
|
|
"logps/rejected": -247.3353729248047,
|
|
"loss": 1.1687,
|
|
"margin_dpo/margin_mean": 47.52829360961914,
|
|
"margin_dpo/margin_std": 85.50675201416016,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.006967984139919281,
|
|
"fcm_dpo/delta": -0.005523760803043842,
|
|
"fcm_dpo/margin": 38.326297760009766,
|
|
"fcm_dpo/q_t": 0.43881019949913025,
|
|
"grad_norm": 12.633563995361328,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 0.7481607794761658,
|
|
"logits/rejected": 0.7011669874191284,
|
|
"logps/chosen": -181.23231506347656,
|
|
"logps/ref_chosen": -63.60908889770508,
|
|
"logps/ref_rejected": -74.06394958496094,
|
|
"logps/rejected": -230.0135040283203,
|
|
"loss": 1.2204,
|
|
"margin_dpo/margin_mean": 38.3262939453125,
|
|
"margin_dpo/margin_std": 83.02953338623047,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.006899350322782993,
|
|
"fcm_dpo/delta": -0.015122666954994202,
|
|
"fcm_dpo/margin": 49.54688262939453,
|
|
"fcm_dpo/q_t": 0.4215819239616394,
|
|
"grad_norm": 12.57418441772461,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 0.6818271279335022,
|
|
"logits/rejected": 0.6326395869255066,
|
|
"logps/chosen": -168.5291748046875,
|
|
"logps/ref_chosen": -62.31493377685547,
|
|
"logps/ref_rejected": -75.07472229003906,
|
|
"logps/rejected": -230.8358612060547,
|
|
"loss": 1.1467,
|
|
"margin_dpo/margin_mean": 49.54688262939453,
|
|
"margin_dpo/margin_std": 79.58061981201172,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.00665700901299715,
|
|
"fcm_dpo/delta": -0.0466286763548851,
|
|
"fcm_dpo/margin": 58.286048889160156,
|
|
"fcm_dpo/q_t": 0.41121044754981995,
|
|
"grad_norm": 11.143263816833496,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 0.6711269617080688,
|
|
"logits/rejected": 0.6105794310569763,
|
|
"logps/chosen": -152.8118133544922,
|
|
"logps/ref_chosen": -55.336036682128906,
|
|
"logps/ref_rejected": -80.05536651611328,
|
|
"logps/rejected": -235.81716918945312,
|
|
"loss": 1.1163,
|
|
"margin_dpo/margin_mean": 58.286048889160156,
|
|
"margin_dpo/margin_std": 85.82135009765625,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.006425836123526096,
|
|
"fcm_dpo/delta": -0.025686081498861313,
|
|
"fcm_dpo/margin": 57.461631774902344,
|
|
"fcm_dpo/q_t": 0.4144781231880188,
|
|
"grad_norm": 10.588150978088379,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 0.6904675364494324,
|
|
"logits/rejected": 0.63264399766922,
|
|
"logps/chosen": -163.8726043701172,
|
|
"logps/ref_chosen": -57.90629959106445,
|
|
"logps/ref_rejected": -74.2243881225586,
|
|
"logps/rejected": -237.65234375,
|
|
"loss": 1.1236,
|
|
"margin_dpo/margin_mean": 57.461631774902344,
|
|
"margin_dpo/margin_std": 84.99284362792969,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.006382169667631388,
|
|
"fcm_dpo/delta": -0.0038101542741060257,
|
|
"fcm_dpo/margin": 48.41600799560547,
|
|
"fcm_dpo/q_t": 0.42902547121047974,
|
|
"grad_norm": 11.327309608459473,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 0.6518532037734985,
|
|
"logits/rejected": 0.615720272064209,
|
|
"logps/chosen": -183.9138641357422,
|
|
"logps/ref_chosen": -65.17555236816406,
|
|
"logps/ref_rejected": -78.53681182861328,
|
|
"logps/rejected": -245.69113159179688,
|
|
"loss": 1.1791,
|
|
"margin_dpo/margin_mean": 48.41600799560547,
|
|
"margin_dpo/margin_std": 88.16541290283203,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.0062421816401183605,
|
|
"fcm_dpo/delta": -0.024326926097273827,
|
|
"fcm_dpo/margin": 60.58441162109375,
|
|
"fcm_dpo/q_t": 0.4129098057746887,
|
|
"grad_norm": 14.136266708374023,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 0.7140273451805115,
|
|
"logits/rejected": 0.6678114533424377,
|
|
"logps/chosen": -181.29025268554688,
|
|
"logps/ref_chosen": -62.62797927856445,
|
|
"logps/ref_rejected": -79.9095458984375,
|
|
"logps/rejected": -259.15618896484375,
|
|
"loss": 1.116,
|
|
"margin_dpo/margin_mean": 60.58441162109375,
|
|
"margin_dpo/margin_std": 88.1111831665039,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.006153796333819628,
|
|
"fcm_dpo/delta": -0.008682211861014366,
|
|
"fcm_dpo/margin": 52.959136962890625,
|
|
"fcm_dpo/q_t": 0.4254566729068756,
|
|
"grad_norm": 11.051826477050781,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 0.6959365010261536,
|
|
"logits/rejected": 0.6593111753463745,
|
|
"logps/chosen": -193.18997192382812,
|
|
"logps/ref_chosen": -61.1064567565918,
|
|
"logps/ref_rejected": -76.71846008300781,
|
|
"logps/rejected": -261.7611083984375,
|
|
"loss": 1.1653,
|
|
"margin_dpo/margin_mean": 52.959144592285156,
|
|
"margin_dpo/margin_std": 92.80432891845703,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.00603325804695487,
|
|
"fcm_dpo/delta": -0.01978192664682865,
|
|
"fcm_dpo/margin": 59.29703903198242,
|
|
"fcm_dpo/q_t": 0.41810742020606995,
|
|
"grad_norm": 11.418231010437012,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 0.6984354853630066,
|
|
"logits/rejected": 0.6573707461357117,
|
|
"logps/chosen": -188.5522003173828,
|
|
"logps/ref_chosen": -60.12370681762695,
|
|
"logps/ref_rejected": -78.58574676513672,
|
|
"logps/rejected": -266.3112487792969,
|
|
"loss": 1.1408,
|
|
"margin_dpo/margin_mean": 59.29703903198242,
|
|
"margin_dpo/margin_std": 95.01673889160156,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.005945051088929176,
|
|
"fcm_dpo/delta": -0.04099785163998604,
|
|
"fcm_dpo/margin": 68.73493957519531,
|
|
"fcm_dpo/q_t": 0.40639615058898926,
|
|
"grad_norm": 13.632902145385742,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 0.7776240706443787,
|
|
"logits/rejected": 0.7138788104057312,
|
|
"logps/chosen": -179.670166015625,
|
|
"logps/ref_chosen": -55.104461669921875,
|
|
"logps/ref_rejected": -80.63292694091797,
|
|
"logps/rejected": -273.9335632324219,
|
|
"loss": 1.0955,
|
|
"margin_dpo/margin_mean": 68.73492431640625,
|
|
"margin_dpo/margin_std": 94.61377716064453,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.005738373845815659,
|
|
"fcm_dpo/delta": -0.02628186345100403,
|
|
"fcm_dpo/margin": 70.14204406738281,
|
|
"fcm_dpo/q_t": 0.4074961543083191,
|
|
"grad_norm": 11.920957565307617,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 0.7566885352134705,
|
|
"logits/rejected": 0.6853598356246948,
|
|
"logps/chosen": -179.594970703125,
|
|
"logps/ref_chosen": -54.87224197387695,
|
|
"logps/ref_rejected": -77.01316833496094,
|
|
"logps/rejected": -271.8779602050781,
|
|
"loss": 1.1034,
|
|
"margin_dpo/margin_mean": 70.14204406738281,
|
|
"margin_dpo/margin_std": 100.20321655273438,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.005551532376557589,
|
|
"fcm_dpo/delta": -0.027073120698332787,
|
|
"fcm_dpo/margin": 58.166900634765625,
|
|
"fcm_dpo/q_t": 0.42612725496292114,
|
|
"grad_norm": 11.87866497039795,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 0.7266971468925476,
|
|
"logits/rejected": 0.6876090168952942,
|
|
"logps/chosen": -207.0313262939453,
|
|
"logps/ref_chosen": -60.75285720825195,
|
|
"logps/ref_rejected": -75.21507263183594,
|
|
"logps/rejected": -279.6604309082031,
|
|
"loss": 1.1756,
|
|
"margin_dpo/margin_mean": 58.166900634765625,
|
|
"margin_dpo/margin_std": 105.21885681152344,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.0053816549479961395,
|
|
"fcm_dpo/delta": -0.03808388113975525,
|
|
"fcm_dpo/margin": 80.42271423339844,
|
|
"fcm_dpo/q_t": 0.4026545584201813,
|
|
"grad_norm": 10.223109245300293,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.7709358930587769,
|
|
"logits/rejected": 0.7040495872497559,
|
|
"logps/chosen": -198.53555297851562,
|
|
"logps/ref_chosen": -58.56513595581055,
|
|
"logps/ref_rejected": -84.06403350830078,
|
|
"logps/rejected": -304.4571533203125,
|
|
"loss": 1.0842,
|
|
"margin_dpo/margin_mean": 80.42271423339844,
|
|
"margin_dpo/margin_std": 112.6312026977539,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.005276652052998543,
|
|
"fcm_dpo/delta": -0.007817991077899933,
|
|
"fcm_dpo/margin": 64.3998031616211,
|
|
"fcm_dpo/q_t": 0.4214501976966858,
|
|
"grad_norm": 12.37844181060791,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 0.7387269735336304,
|
|
"logits/rejected": 0.6979778409004211,
|
|
"logps/chosen": -199.47242736816406,
|
|
"logps/ref_chosen": -59.443138122558594,
|
|
"logps/ref_rejected": -75.80937194824219,
|
|
"logps/rejected": -280.23846435546875,
|
|
"loss": 1.1548,
|
|
"margin_dpo/margin_mean": 64.3998031616211,
|
|
"margin_dpo/margin_std": 108.75566101074219,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.005195076577365398,
|
|
"fcm_dpo/delta": -0.016707830131053925,
|
|
"fcm_dpo/margin": 63.93414306640625,
|
|
"fcm_dpo/q_t": 0.4242404103279114,
|
|
"grad_norm": 14.827937126159668,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 0.7876321077346802,
|
|
"logits/rejected": 0.7421884536743164,
|
|
"logps/chosen": -198.78579711914062,
|
|
"logps/ref_chosen": -58.59185028076172,
|
|
"logps/ref_rejected": -73.7529525756836,
|
|
"logps/rejected": -277.88104248046875,
|
|
"loss": 1.1759,
|
|
"margin_dpo/margin_mean": 63.93414306640625,
|
|
"margin_dpo/margin_std": 119.758544921875,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.005114838946610689,
|
|
"fcm_dpo/delta": -0.013521865010261536,
|
|
"fcm_dpo/margin": 71.66112518310547,
|
|
"fcm_dpo/q_t": 0.41665583848953247,
|
|
"grad_norm": 12.766351699829102,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 0.7841566801071167,
|
|
"logits/rejected": 0.7393085956573486,
|
|
"logps/chosen": -195.7206573486328,
|
|
"logps/ref_chosen": -58.93424606323242,
|
|
"logps/ref_rejected": -76.27055358886719,
|
|
"logps/rejected": -284.71807861328125,
|
|
"loss": 1.1354,
|
|
"margin_dpo/margin_mean": 71.66112518310547,
|
|
"margin_dpo/margin_std": 114.41337585449219,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.0050823139026761055,
|
|
"fcm_dpo/delta": -0.010413008742034435,
|
|
"fcm_dpo/margin": 60.48808670043945,
|
|
"fcm_dpo/q_t": 0.4290579855442047,
|
|
"grad_norm": 9.910415649414062,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 0.7422327399253845,
|
|
"logits/rejected": 0.723129153251648,
|
|
"logps/chosen": -211.396728515625,
|
|
"logps/ref_chosen": -66.42684173583984,
|
|
"logps/ref_rejected": -76.96304321289062,
|
|
"logps/rejected": -282.4209899902344,
|
|
"loss": 1.1822,
|
|
"margin_dpo/margin_mean": 60.48808670043945,
|
|
"margin_dpo/margin_std": 111.88542175292969,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.005034881643950939,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 65.50511169433594,
|
|
"fcm_dpo/q_t": 0.42413240671157837,
|
|
"grad_norm": 11.989714622497559,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 0.7661797404289246,
|
|
"logits/rejected": 0.7171558141708374,
|
|
"logps/chosen": -205.19943237304688,
|
|
"logps/ref_chosen": -60.984214782714844,
|
|
"logps/ref_rejected": -79.54056549072266,
|
|
"logps/rejected": -289.26092529296875,
|
|
"loss": 1.1583,
|
|
"margin_dpo/margin_mean": 65.50511169433594,
|
|
"margin_dpo/margin_std": 110.4152603149414,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.0049595460295677185,
|
|
"fcm_dpo/delta": -0.023463377729058266,
|
|
"fcm_dpo/margin": 75.90830993652344,
|
|
"fcm_dpo/q_t": 0.4133135676383972,
|
|
"grad_norm": 11.980950355529785,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 0.7978567481040955,
|
|
"logits/rejected": 0.7469469308853149,
|
|
"logps/chosen": -197.7019805908203,
|
|
"logps/ref_chosen": -58.30937957763672,
|
|
"logps/ref_rejected": -80.09587097167969,
|
|
"logps/rejected": -295.39678955078125,
|
|
"loss": 1.1188,
|
|
"margin_dpo/margin_mean": 75.90830993652344,
|
|
"margin_dpo/margin_std": 111.34185791015625,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.004786391276866198,
|
|
"fcm_dpo/delta": -0.049943797290325165,
|
|
"fcm_dpo/margin": 75.52302551269531,
|
|
"fcm_dpo/q_t": 0.41768354177474976,
|
|
"grad_norm": 14.266228675842285,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 0.7723232507705688,
|
|
"logits/rejected": 0.6996694207191467,
|
|
"logps/chosen": -212.7456512451172,
|
|
"logps/ref_chosen": -61.39867401123047,
|
|
"logps/ref_rejected": -89.0177993774414,
|
|
"logps/rejected": -315.8878173828125,
|
|
"loss": 1.134,
|
|
"margin_dpo/margin_mean": 75.52302551269531,
|
|
"margin_dpo/margin_std": 114.94172668457031,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.004678524564951658,
|
|
"eval_logits/chosen": 0.7730867862701416,
|
|
"eval_logits/rejected": 0.7383347749710083,
|
|
"eval_logps/chosen": -231.1050567626953,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -297.61297607421875,
|
|
"eval_loss": 0.5988211035728455,
|
|
"eval_margin_dpo/margin_mean": 61.81840133666992,
|
|
"eval_margin_dpo/margin_std": 122.34429931640625,
|
|
"eval_runtime": 39.0005,
|
|
"eval_samples_per_second": 59.051,
|
|
"eval_steps_per_second": 1.846,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.004617620259523392,
|
|
"fcm_dpo/delta": -0.021043911576271057,
|
|
"fcm_dpo/margin": 78.88172149658203,
|
|
"fcm_dpo/q_t": 0.41677242517471313,
|
|
"grad_norm": 9.603548049926758,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 0.8381478190422058,
|
|
"logits/rejected": 0.7813048958778381,
|
|
"logps/chosen": -199.7872772216797,
|
|
"logps/ref_chosen": -55.953521728515625,
|
|
"logps/ref_rejected": -77.67539978027344,
|
|
"logps/rejected": -300.390869140625,
|
|
"loss": 1.1288,
|
|
"margin_dpo/margin_mean": 78.88172149658203,
|
|
"margin_dpo/margin_std": 118.5303726196289,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.0044863419607281685,
|
|
"fcm_dpo/delta": -0.03810073807835579,
|
|
"fcm_dpo/margin": 80.86897277832031,
|
|
"fcm_dpo/q_t": 0.4153751730918884,
|
|
"grad_norm": 10.528109550476074,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 0.800028920173645,
|
|
"logits/rejected": 0.7490849494934082,
|
|
"logps/chosen": -218.7476043701172,
|
|
"logps/ref_chosen": -63.40419387817383,
|
|
"logps/ref_rejected": -80.85710144042969,
|
|
"logps/rejected": -317.06951904296875,
|
|
"loss": 1.1232,
|
|
"margin_dpo/margin_mean": 80.86897277832031,
|
|
"margin_dpo/margin_std": 114.73890686035156,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.004409838933497667,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 74.53548431396484,
|
|
"fcm_dpo/q_t": 0.42364954948425293,
|
|
"grad_norm": 9.875075340270996,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 0.8113610148429871,
|
|
"logits/rejected": 0.7791782021522522,
|
|
"logps/chosen": -228.55026245117188,
|
|
"logps/ref_chosen": -57.6942024230957,
|
|
"logps/ref_rejected": -71.74036407470703,
|
|
"logps/rejected": -317.13189697265625,
|
|
"loss": 1.1495,
|
|
"margin_dpo/margin_mean": 74.53548431396484,
|
|
"margin_dpo/margin_std": 117.6064682006836,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.004353252239525318,
|
|
"fcm_dpo/delta": -0.016652632504701614,
|
|
"fcm_dpo/margin": 68.00981140136719,
|
|
"fcm_dpo/q_t": 0.4319809377193451,
|
|
"grad_norm": 12.201674461364746,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 0.8484581708908081,
|
|
"logits/rejected": 0.819144606590271,
|
|
"logps/chosen": -241.6039581298828,
|
|
"logps/ref_chosen": -59.169517517089844,
|
|
"logps/ref_rejected": -69.47721099853516,
|
|
"logps/rejected": -319.92144775390625,
|
|
"loss": 1.1926,
|
|
"margin_dpo/margin_mean": 68.00981140136719,
|
|
"margin_dpo/margin_std": 129.67337036132812,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.00428891833871603,
|
|
"fcm_dpo/delta": -0.021387049928307533,
|
|
"fcm_dpo/margin": 82.73738861083984,
|
|
"fcm_dpo/q_t": 0.4183295667171478,
|
|
"grad_norm": 9.915822982788086,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 0.8384604454040527,
|
|
"logits/rejected": 0.809127151966095,
|
|
"logps/chosen": -226.8582763671875,
|
|
"logps/ref_chosen": -58.09320831298828,
|
|
"logps/ref_rejected": -73.98226165771484,
|
|
"logps/rejected": -325.4847412109375,
|
|
"loss": 1.1363,
|
|
"margin_dpo/margin_mean": 82.73738861083984,
|
|
"margin_dpo/margin_std": 128.06600952148438,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.004245240706950426,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 70.30382537841797,
|
|
"fcm_dpo/q_t": 0.4308013319969177,
|
|
"grad_norm": 11.620451927185059,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 0.8153077363967896,
|
|
"logits/rejected": 0.803315281867981,
|
|
"logps/chosen": -252.0271759033203,
|
|
"logps/ref_chosen": -62.7039909362793,
|
|
"logps/ref_rejected": -74.52284240722656,
|
|
"logps/rejected": -334.14984130859375,
|
|
"loss": 1.1889,
|
|
"margin_dpo/margin_mean": 70.30382537841797,
|
|
"margin_dpo/margin_std": 133.47201538085938,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.004219419322907925,
|
|
"fcm_dpo/delta": -0.015322555787861347,
|
|
"fcm_dpo/margin": 82.38020324707031,
|
|
"fcm_dpo/q_t": 0.41900143027305603,
|
|
"grad_norm": 11.40176773071289,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 0.8734161257743835,
|
|
"logits/rejected": 0.8401018977165222,
|
|
"logps/chosen": -224.8971710205078,
|
|
"logps/ref_chosen": -56.12516403198242,
|
|
"logps/ref_rejected": -74.36073303222656,
|
|
"logps/rejected": -325.5129089355469,
|
|
"loss": 1.1412,
|
|
"margin_dpo/margin_mean": 82.38020324707031,
|
|
"margin_dpo/margin_std": 129.3118133544922,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.0041299303993582726,
|
|
"fcm_dpo/delta": -0.023912524804472923,
|
|
"fcm_dpo/margin": 82.6595230102539,
|
|
"fcm_dpo/q_t": 0.4219323992729187,
|
|
"grad_norm": 13.837409019470215,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 0.835965633392334,
|
|
"logits/rejected": 0.7754732966423035,
|
|
"logps/chosen": -231.00851440429688,
|
|
"logps/ref_chosen": -55.67548751831055,
|
|
"logps/ref_rejected": -76.62055206298828,
|
|
"logps/rejected": -334.6131286621094,
|
|
"loss": 1.1507,
|
|
"margin_dpo/margin_mean": 82.65951538085938,
|
|
"margin_dpo/margin_std": 135.90968322753906,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.004010509233921766,
|
|
"fcm_dpo/delta": -0.024360598996281624,
|
|
"fcm_dpo/margin": 94.17513275146484,
|
|
"fcm_dpo/q_t": 0.41288742423057556,
|
|
"grad_norm": 9.381706237792969,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 0.8353230357170105,
|
|
"logits/rejected": 0.7917869091033936,
|
|
"logps/chosen": -241.6427001953125,
|
|
"logps/ref_chosen": -59.903411865234375,
|
|
"logps/ref_rejected": -82.02873229980469,
|
|
"logps/rejected": -357.94317626953125,
|
|
"loss": 1.111,
|
|
"margin_dpo/margin_mean": 94.17513275146484,
|
|
"margin_dpo/margin_std": 131.37210083007812,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.003899561706930399,
|
|
"fcm_dpo/delta": -0.024923671036958694,
|
|
"fcm_dpo/margin": 86.66099548339844,
|
|
"fcm_dpo/q_t": 0.4215773642063141,
|
|
"grad_norm": 10.855829238891602,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 0.8515201807022095,
|
|
"logits/rejected": 0.7979717254638672,
|
|
"logps/chosen": -237.58364868164062,
|
|
"logps/ref_chosen": -55.83526611328125,
|
|
"logps/ref_rejected": -79.63658142089844,
|
|
"logps/rejected": -348.04595947265625,
|
|
"loss": 1.1448,
|
|
"margin_dpo/margin_mean": 86.66099548339844,
|
|
"margin_dpo/margin_std": 133.32977294921875,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.0038856077007949352,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 78.07716369628906,
|
|
"fcm_dpo/q_t": 0.4294430613517761,
|
|
"grad_norm": 11.119799613952637,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 0.8199604153633118,
|
|
"logits/rejected": 0.7901821136474609,
|
|
"logps/chosen": -240.78634643554688,
|
|
"logps/ref_chosen": -60.59226608276367,
|
|
"logps/ref_rejected": -73.37936401367188,
|
|
"logps/rejected": -331.6506042480469,
|
|
"loss": 1.1685,
|
|
"margin_dpo/margin_mean": 78.07716369628906,
|
|
"margin_dpo/margin_std": 132.1455535888672,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.0038638408295810223,
|
|
"fcm_dpo/delta": -0.020884912461042404,
|
|
"fcm_dpo/margin": 93.4942855834961,
|
|
"fcm_dpo/q_t": 0.41626229882240295,
|
|
"grad_norm": 9.670624732971191,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 0.8717799186706543,
|
|
"logits/rejected": 0.808576226234436,
|
|
"logps/chosen": -236.9020538330078,
|
|
"logps/ref_chosen": -56.21283721923828,
|
|
"logps/ref_rejected": -83.02075958251953,
|
|
"logps/rejected": -357.20428466796875,
|
|
"loss": 1.1265,
|
|
"margin_dpo/margin_mean": 93.4942855834961,
|
|
"margin_dpo/margin_std": 139.18475341796875,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.0037416815757751465,
|
|
"fcm_dpo/delta": -0.01730378530919552,
|
|
"fcm_dpo/margin": 90.57196044921875,
|
|
"fcm_dpo/q_t": 0.4205476641654968,
|
|
"grad_norm": 11.144495964050293,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 0.8878629803657532,
|
|
"logits/rejected": 0.8561855554580688,
|
|
"logps/chosen": -241.06063842773438,
|
|
"logps/ref_chosen": -59.0674934387207,
|
|
"logps/ref_rejected": -74.53498840332031,
|
|
"logps/rejected": -347.10009765625,
|
|
"loss": 1.1409,
|
|
"margin_dpo/margin_mean": 90.57195281982422,
|
|
"margin_dpo/margin_std": 135.90689086914062,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.0037400186993181705,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 81.56043243408203,
|
|
"fcm_dpo/q_t": 0.4288889467716217,
|
|
"grad_norm": 10.959892272949219,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 0.8900365829467773,
|
|
"logits/rejected": 0.856399655342102,
|
|
"logps/chosen": -243.0541229248047,
|
|
"logps/ref_chosen": -58.3397331237793,
|
|
"logps/ref_rejected": -74.33660125732422,
|
|
"logps/rejected": -340.6114501953125,
|
|
"loss": 1.1733,
|
|
"margin_dpo/margin_mean": 81.56043243408203,
|
|
"margin_dpo/margin_std": 142.09490966796875,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.0037331648636609316,
|
|
"fcm_dpo/delta": -0.00920518022030592,
|
|
"fcm_dpo/margin": 99.83604431152344,
|
|
"fcm_dpo/q_t": 0.4153580069541931,
|
|
"grad_norm": 7.998687267303467,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 0.8998042345046997,
|
|
"logits/rejected": 0.8499566912651062,
|
|
"logps/chosen": -225.0286102294922,
|
|
"logps/ref_chosen": -54.60407638549805,
|
|
"logps/ref_rejected": -79.94635009765625,
|
|
"logps/rejected": -350.2068786621094,
|
|
"loss": 1.1275,
|
|
"margin_dpo/margin_mean": 99.83604431152344,
|
|
"margin_dpo/margin_std": 155.26611328125,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.003702650312334299,
|
|
"fcm_dpo/delta": -0.008397220633924007,
|
|
"fcm_dpo/margin": 73.63870239257812,
|
|
"fcm_dpo/q_t": 0.43734756112098694,
|
|
"grad_norm": 14.331131935119629,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 0.853714108467102,
|
|
"logits/rejected": 0.8454058766365051,
|
|
"logps/chosen": -257.2554016113281,
|
|
"logps/ref_chosen": -63.0672492980957,
|
|
"logps/ref_rejected": -68.59602355957031,
|
|
"logps/rejected": -336.4228820800781,
|
|
"loss": 1.2098,
|
|
"margin_dpo/margin_mean": 73.63871002197266,
|
|
"margin_dpo/margin_std": 151.20285034179688,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.0036670055706053972,
|
|
"fcm_dpo/delta": -0.004230070393532515,
|
|
"fcm_dpo/margin": 85.2213363647461,
|
|
"fcm_dpo/q_t": 0.4283173978328705,
|
|
"grad_norm": 11.36840534210205,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 0.8761838674545288,
|
|
"logits/rejected": 0.8311912417411804,
|
|
"logps/chosen": -251.0254669189453,
|
|
"logps/ref_chosen": -58.75799560546875,
|
|
"logps/ref_rejected": -79.72233581542969,
|
|
"logps/rejected": -357.21112060546875,
|
|
"loss": 1.1635,
|
|
"margin_dpo/margin_mean": 85.2213363647461,
|
|
"margin_dpo/margin_std": 141.01046752929688,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.003649166552349925,
|
|
"fcm_dpo/delta": -0.005526586435735226,
|
|
"fcm_dpo/margin": 79.44273376464844,
|
|
"fcm_dpo/q_t": 0.4346071183681488,
|
|
"grad_norm": 9.696874618530273,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 0.9166892170906067,
|
|
"logits/rejected": 0.8767145872116089,
|
|
"logps/chosen": -262.38348388671875,
|
|
"logps/ref_chosen": -61.394195556640625,
|
|
"logps/ref_rejected": -81.1914291381836,
|
|
"logps/rejected": -361.6234436035156,
|
|
"loss": 1.1984,
|
|
"margin_dpo/margin_mean": 79.44273376464844,
|
|
"margin_dpo/margin_std": 160.96810913085938,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.0036221942864358425,
|
|
"fcm_dpo/delta": -0.009324881248176098,
|
|
"fcm_dpo/margin": 80.0656967163086,
|
|
"fcm_dpo/q_t": 0.43500107526779175,
|
|
"grad_norm": 14.121245384216309,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 0.8733257055282593,
|
|
"logits/rejected": 0.8286750912666321,
|
|
"logps/chosen": -253.50320434570312,
|
|
"logps/ref_chosen": -59.85382843017578,
|
|
"logps/ref_rejected": -80.63748931884766,
|
|
"logps/rejected": -354.3525695800781,
|
|
"loss": 1.1983,
|
|
"margin_dpo/margin_mean": 80.06568908691406,
|
|
"margin_dpo/margin_std": 160.94277954101562,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.0036053061485290527,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 94.49732971191406,
|
|
"fcm_dpo/q_t": 0.4224638342857361,
|
|
"grad_norm": 11.681142807006836,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 0.8845356106758118,
|
|
"logits/rejected": 0.8533564805984497,
|
|
"logps/chosen": -260.6972351074219,
|
|
"logps/ref_chosen": -66.17753601074219,
|
|
"logps/ref_rejected": -83.75955200195312,
|
|
"logps/rejected": -372.7765808105469,
|
|
"loss": 1.1603,
|
|
"margin_dpo/margin_mean": 94.49732971191406,
|
|
"margin_dpo/margin_std": 167.73483276367188,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.0035957619547843933,
|
|
"fcm_dpo/delta": -0.008863715454936028,
|
|
"fcm_dpo/margin": 81.07406616210938,
|
|
"fcm_dpo/q_t": 0.431431382894516,
|
|
"grad_norm": 11.598374366760254,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 0.8734153509140015,
|
|
"logits/rejected": 0.8628484010696411,
|
|
"logps/chosen": -250.24264526367188,
|
|
"logps/ref_chosen": -62.11005401611328,
|
|
"logps/ref_rejected": -74.64705657958984,
|
|
"logps/rejected": -343.85369873046875,
|
|
"loss": 1.1724,
|
|
"margin_dpo/margin_mean": 81.07406616210938,
|
|
"margin_dpo/margin_std": 132.01162719726562,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.0035542245022952557,
|
|
"fcm_dpo/delta": -0.010841513983905315,
|
|
"fcm_dpo/margin": 93.92044067382812,
|
|
"fcm_dpo/q_t": 0.42539000511169434,
|
|
"grad_norm": 10.781188011169434,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 0.8776324391365051,
|
|
"logits/rejected": 0.8372354507446289,
|
|
"logps/chosen": -260.9776916503906,
|
|
"logps/ref_chosen": -64.42265319824219,
|
|
"logps/ref_rejected": -87.00096130371094,
|
|
"logps/rejected": -377.47650146484375,
|
|
"loss": 1.1734,
|
|
"margin_dpo/margin_mean": 93.92044830322266,
|
|
"margin_dpo/margin_std": 176.3047637939453,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.0035349582321941853,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 90.1122055053711,
|
|
"fcm_dpo/q_t": 0.42532816529273987,
|
|
"grad_norm": 9.599004745483398,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 0.9000862240791321,
|
|
"logits/rejected": 0.8689319491386414,
|
|
"logps/chosen": -247.7484893798828,
|
|
"logps/ref_chosen": -58.284393310546875,
|
|
"logps/ref_rejected": -79.09356689453125,
|
|
"logps/rejected": -358.6697998046875,
|
|
"loss": 1.1575,
|
|
"margin_dpo/margin_mean": 90.11221313476562,
|
|
"margin_dpo/margin_std": 144.56326293945312,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.0035349582321941853,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 71.46282958984375,
|
|
"fcm_dpo/q_t": 0.4412451684474945,
|
|
"grad_norm": 13.013392448425293,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 0.9094135165214539,
|
|
"logits/rejected": 0.8865984678268433,
|
|
"logps/chosen": -254.99520874023438,
|
|
"logps/ref_chosen": -61.03638458251953,
|
|
"logps/ref_rejected": -72.15824890136719,
|
|
"logps/rejected": -337.57989501953125,
|
|
"loss": 1.2161,
|
|
"margin_dpo/margin_mean": 71.46283721923828,
|
|
"margin_dpo/margin_std": 148.10073852539062,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.0035349582321941853,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 81.25996398925781,
|
|
"fcm_dpo/q_t": 0.43352437019348145,
|
|
"grad_norm": 13.817595481872559,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.8353425860404968,
|
|
"logits/rejected": 0.804462730884552,
|
|
"logps/chosen": -264.1961669921875,
|
|
"logps/ref_chosen": -68.02732849121094,
|
|
"logps/ref_rejected": -85.41429901123047,
|
|
"logps/rejected": -362.8431396484375,
|
|
"loss": 1.1911,
|
|
"margin_dpo/margin_mean": 81.25996398925781,
|
|
"margin_dpo/margin_std": 154.6202850341797,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.0035310834646224976,
|
|
"fcm_dpo/delta": -0.003660431830212474,
|
|
"fcm_dpo/margin": 77.5444564819336,
|
|
"fcm_dpo/q_t": 0.4368818402290344,
|
|
"grad_norm": 12.4998779296875,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.8905463218688965,
|
|
"logits/rejected": 0.8446179628372192,
|
|
"logps/chosen": -255.26919555664062,
|
|
"logps/ref_chosen": -58.67436599731445,
|
|
"logps/ref_rejected": -79.38807678222656,
|
|
"logps/rejected": -353.52734375,
|
|
"loss": 1.1992,
|
|
"margin_dpo/margin_mean": 77.5444564819336,
|
|
"margin_dpo/margin_std": 147.32528686523438,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.003522042650729418,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 82.69160461425781,
|
|
"fcm_dpo/q_t": 0.4328169822692871,
|
|
"grad_norm": 11.253222465515137,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 0.8829957842826843,
|
|
"logits/rejected": 0.842154324054718,
|
|
"logps/chosen": -248.08706665039062,
|
|
"logps/ref_chosen": -57.640098571777344,
|
|
"logps/ref_rejected": -77.25399780273438,
|
|
"logps/rejected": -350.39251708984375,
|
|
"loss": 1.1841,
|
|
"margin_dpo/margin_mean": 82.69161224365234,
|
|
"margin_dpo/margin_std": 150.93914794921875,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.0035197685938328505,
|
|
"fcm_dpo/delta": -0.006476428359746933,
|
|
"fcm_dpo/margin": 101.35395050048828,
|
|
"fcm_dpo/q_t": 0.4183521270751953,
|
|
"grad_norm": 9.733626365661621,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 0.9086763262748718,
|
|
"logits/rejected": 0.8577451705932617,
|
|
"logps/chosen": -256.18011474609375,
|
|
"logps/ref_chosen": -60.17341995239258,
|
|
"logps/ref_rejected": -85.50316619873047,
|
|
"logps/rejected": -382.8638000488281,
|
|
"loss": 1.1288,
|
|
"margin_dpo/margin_mean": 101.35395050048828,
|
|
"margin_dpo/margin_std": 152.10256958007812,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.00349930627271533,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 94.39086151123047,
|
|
"fcm_dpo/q_t": 0.4234938621520996,
|
|
"grad_norm": 12.908324241638184,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 0.917340874671936,
|
|
"logits/rejected": 0.8850774765014648,
|
|
"logps/chosen": -242.275390625,
|
|
"logps/ref_chosen": -56.985809326171875,
|
|
"logps/ref_rejected": -73.21353912353516,
|
|
"logps/rejected": -352.8939514160156,
|
|
"loss": 1.1519,
|
|
"margin_dpo/margin_mean": 94.39086151123047,
|
|
"margin_dpo/margin_std": 151.55343627929688,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.0034850898664444685,
|
|
"fcm_dpo/delta": -0.00509117916226387,
|
|
"fcm_dpo/margin": 76.22270202636719,
|
|
"fcm_dpo/q_t": 0.4382683336734772,
|
|
"grad_norm": 11.022492408752441,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 0.912948489189148,
|
|
"logits/rejected": 0.8859111666679382,
|
|
"logps/chosen": -255.4915008544922,
|
|
"logps/ref_chosen": -59.600929260253906,
|
|
"logps/ref_rejected": -75.24870300292969,
|
|
"logps/rejected": -347.3619689941406,
|
|
"loss": 1.2114,
|
|
"margin_dpo/margin_mean": 76.22270202636719,
|
|
"margin_dpo/margin_std": 156.75267028808594,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.003481535706669092,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 83.40065002441406,
|
|
"fcm_dpo/q_t": 0.43253573775291443,
|
|
"grad_norm": 11.873709678649902,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 0.9214051365852356,
|
|
"logits/rejected": 0.8947726488113403,
|
|
"logps/chosen": -266.4524841308594,
|
|
"logps/ref_chosen": -63.578895568847656,
|
|
"logps/ref_rejected": -78.87867736816406,
|
|
"logps/rejected": -365.1529235839844,
|
|
"loss": 1.1841,
|
|
"margin_dpo/margin_mean": 83.4006576538086,
|
|
"margin_dpo/margin_std": 151.18142700195312,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.003460574196651578,
|
|
"fcm_dpo/delta": -0.009147383272647858,
|
|
"fcm_dpo/margin": 102.399658203125,
|
|
"fcm_dpo/q_t": 0.4190526604652405,
|
|
"grad_norm": 12.284520149230957,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 0.9413592219352722,
|
|
"logits/rejected": 0.9070916175842285,
|
|
"logps/chosen": -256.690673828125,
|
|
"logps/ref_chosen": -58.651512145996094,
|
|
"logps/ref_rejected": -78.67181396484375,
|
|
"logps/rejected": -379.1106262207031,
|
|
"loss": 1.1302,
|
|
"margin_dpo/margin_mean": 102.399658203125,
|
|
"margin_dpo/margin_std": 151.68862915039062,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.0034138336777687073,
|
|
"fcm_dpo/delta": -0.013129929080605507,
|
|
"fcm_dpo/margin": 100.43526458740234,
|
|
"fcm_dpo/q_t": 0.4204806387424469,
|
|
"grad_norm": 12.020137786865234,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 0.9278671145439148,
|
|
"logits/rejected": 0.9034161567687988,
|
|
"logps/chosen": -253.25778198242188,
|
|
"logps/ref_chosen": -60.3114128112793,
|
|
"logps/ref_rejected": -78.25270080566406,
|
|
"logps/rejected": -371.63433837890625,
|
|
"loss": 1.1388,
|
|
"margin_dpo/margin_mean": 100.43526458740234,
|
|
"margin_dpo/margin_std": 153.38381958007812,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.0033486653119325638,
|
|
"fcm_dpo/delta": -0.02772103250026703,
|
|
"fcm_dpo/margin": 99.43392944335938,
|
|
"fcm_dpo/q_t": 0.42399096488952637,
|
|
"grad_norm": 12.34563159942627,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 0.9331613779067993,
|
|
"logits/rejected": 0.8943105936050415,
|
|
"logps/chosen": -249.2666473388672,
|
|
"logps/ref_chosen": -57.752410888671875,
|
|
"logps/ref_rejected": -76.99858093261719,
|
|
"logps/rejected": -367.94671630859375,
|
|
"loss": 1.1578,
|
|
"margin_dpo/margin_mean": 99.43392944335938,
|
|
"margin_dpo/margin_std": 166.1993865966797,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.0033004791475832462,
|
|
"fcm_dpo/delta": -0.011403532698750496,
|
|
"fcm_dpo/margin": 86.78084564208984,
|
|
"fcm_dpo/q_t": 0.4353984296321869,
|
|
"grad_norm": 11.008131980895996,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 0.9364798665046692,
|
|
"logits/rejected": 0.9186896085739136,
|
|
"logps/chosen": -265.30657958984375,
|
|
"logps/ref_chosen": -63.61958694458008,
|
|
"logps/ref_rejected": -79.51353454589844,
|
|
"logps/rejected": -367.98138427734375,
|
|
"loss": 1.1951,
|
|
"margin_dpo/margin_mean": 86.78085327148438,
|
|
"margin_dpo/margin_std": 166.95652770996094,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.0032326322980225086,
|
|
"fcm_dpo/delta": -0.012774638831615448,
|
|
"fcm_dpo/margin": 98.78877258300781,
|
|
"fcm_dpo/q_t": 0.42647188901901245,
|
|
"grad_norm": 12.614327430725098,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 0.9242687225341797,
|
|
"logits/rejected": 0.9064335823059082,
|
|
"logps/chosen": -247.09750366210938,
|
|
"logps/ref_chosen": -57.3541145324707,
|
|
"logps/ref_rejected": -73.14434051513672,
|
|
"logps/rejected": -361.676513671875,
|
|
"loss": 1.1637,
|
|
"margin_dpo/margin_mean": 98.78877258300781,
|
|
"margin_dpo/margin_std": 166.97918701171875,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.0032326322980225086,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 91.58312225341797,
|
|
"fcm_dpo/q_t": 0.43279165029525757,
|
|
"grad_norm": 11.919511795043945,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 0.9634637832641602,
|
|
"logits/rejected": 0.9281566739082336,
|
|
"logps/chosen": -253.19546508789062,
|
|
"logps/ref_chosen": -56.0127067565918,
|
|
"logps/ref_rejected": -77.16522216796875,
|
|
"logps/rejected": -365.93109130859375,
|
|
"loss": 1.1847,
|
|
"margin_dpo/margin_mean": 91.5831298828125,
|
|
"margin_dpo/margin_std": 169.7579803466797,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.0032326322980225086,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 87.02014923095703,
|
|
"fcm_dpo/q_t": 0.43297773599624634,
|
|
"grad_norm": 13.126020431518555,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.9554249048233032,
|
|
"logits/rejected": 0.9359169006347656,
|
|
"logps/chosen": -263.68682861328125,
|
|
"logps/ref_chosen": -60.5894660949707,
|
|
"logps/ref_rejected": -74.34771728515625,
|
|
"logps/rejected": -364.4652404785156,
|
|
"loss": 1.1894,
|
|
"margin_dpo/margin_mean": 87.02014923095703,
|
|
"margin_dpo/margin_std": 158.75521850585938,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.00319870188832283,
|
|
"fcm_dpo/delta": -0.010551819577813148,
|
|
"fcm_dpo/margin": 100.10346984863281,
|
|
"fcm_dpo/q_t": 0.4260531961917877,
|
|
"grad_norm": 10.075697898864746,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 0.9538490176200867,
|
|
"logits/rejected": 0.9052039384841919,
|
|
"logps/chosen": -240.1796875,
|
|
"logps/ref_chosen": -54.77838897705078,
|
|
"logps/ref_rejected": -78.102783203125,
|
|
"logps/rejected": -363.6075744628906,
|
|
"loss": 1.1566,
|
|
"margin_dpo/margin_mean": 100.10346221923828,
|
|
"margin_dpo/margin_std": 161.6309356689453,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.00319870188832283,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 65.01690673828125,
|
|
"fcm_dpo/q_t": 0.45148009061813354,
|
|
"grad_norm": 12.22407341003418,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 0.9378520846366882,
|
|
"logits/rejected": 0.9170206189155579,
|
|
"logps/chosen": -263.9869384765625,
|
|
"logps/ref_chosen": -58.45500564575195,
|
|
"logps/ref_rejected": -70.7367172241211,
|
|
"logps/rejected": -341.2855529785156,
|
|
"loss": 1.242,
|
|
"margin_dpo/margin_mean": 65.01690673828125,
|
|
"margin_dpo/margin_std": 143.92819213867188,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.00319870188832283,
|
|
"eval_logits/chosen": 0.8913569450378418,
|
|
"eval_logits/rejected": 0.8742244243621826,
|
|
"eval_logps/chosen": -278.61993408203125,
|
|
"eval_logps/ref_chosen": -74.85946655273438,
|
|
"eval_logps/ref_rejected": -79.54898834228516,
|
|
"eval_logps/rejected": -364.2364501953125,
|
|
"eval_loss": 0.6074615716934204,
|
|
"eval_margin_dpo/margin_mean": 80.9269790649414,
|
|
"eval_margin_dpo/margin_std": 168.3527069091797,
|
|
"eval_runtime": 39.0585,
|
|
"eval_samples_per_second": 58.963,
|
|
"eval_steps_per_second": 1.843,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.00319870188832283,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 91.87323760986328,
|
|
"fcm_dpo/q_t": 0.4317256510257721,
|
|
"grad_norm": 10.033231735229492,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 0.9322928190231323,
|
|
"logits/rejected": 0.9066828489303589,
|
|
"logps/chosen": -246.4706573486328,
|
|
"logps/ref_chosen": -59.87483596801758,
|
|
"logps/ref_rejected": -75.75318908691406,
|
|
"logps/rejected": -354.2222595214844,
|
|
"loss": 1.1779,
|
|
"margin_dpo/margin_mean": 91.87324523925781,
|
|
"margin_dpo/margin_std": 161.2457733154297,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.003174326615408063,
|
|
"fcm_dpo/delta": -0.009571181610226631,
|
|
"fcm_dpo/margin": 99.8558120727539,
|
|
"fcm_dpo/q_t": 0.42549604177474976,
|
|
"grad_norm": 13.667858123779297,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 0.9424371719360352,
|
|
"logits/rejected": 0.9166472554206848,
|
|
"logps/chosen": -255.5699462890625,
|
|
"logps/ref_chosen": -60.35883712768555,
|
|
"logps/ref_rejected": -81.3543930053711,
|
|
"logps/rejected": -376.42132568359375,
|
|
"loss": 1.1567,
|
|
"margin_dpo/margin_mean": 99.85580444335938,
|
|
"margin_dpo/margin_std": 156.640869140625,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.0031617539934813976,
|
|
"fcm_dpo/delta": -0.010276397690176964,
|
|
"fcm_dpo/margin": 101.62857818603516,
|
|
"fcm_dpo/q_t": 0.4245510995388031,
|
|
"grad_norm": 9.765281677246094,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 0.9229475855827332,
|
|
"logits/rejected": 0.8982959985733032,
|
|
"logps/chosen": -253.18075561523438,
|
|
"logps/ref_chosen": -59.17219161987305,
|
|
"logps/ref_rejected": -79.92167663574219,
|
|
"logps/rejected": -375.55877685546875,
|
|
"loss": 1.1485,
|
|
"margin_dpo/margin_mean": 101.62857818603516,
|
|
"margin_dpo/margin_std": 152.48818969726562,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.0031358408741652966,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 89.38777923583984,
|
|
"fcm_dpo/q_t": 0.4349850118160248,
|
|
"grad_norm": 10.843006134033203,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 0.9537204504013062,
|
|
"logits/rejected": 0.9183050394058228,
|
|
"logps/chosen": -253.4351043701172,
|
|
"logps/ref_chosen": -58.052696228027344,
|
|
"logps/ref_rejected": -78.37252807617188,
|
|
"logps/rejected": -363.1427307128906,
|
|
"loss": 1.1848,
|
|
"margin_dpo/margin_mean": 89.38777160644531,
|
|
"margin_dpo/margin_std": 156.2245635986328,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.0031358408741652966,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 96.6719970703125,
|
|
"fcm_dpo/q_t": 0.4313376545906067,
|
|
"grad_norm": 10.53563404083252,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 0.9534462094306946,
|
|
"logits/rejected": 0.900356113910675,
|
|
"logps/chosen": -253.8286895751953,
|
|
"logps/ref_chosen": -56.957862854003906,
|
|
"logps/ref_rejected": -82.68255615234375,
|
|
"logps/rejected": -376.225341796875,
|
|
"loss": 1.1779,
|
|
"margin_dpo/margin_mean": 96.6719970703125,
|
|
"margin_dpo/margin_std": 174.75888061523438,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.0031358408741652966,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 105.3574447631836,
|
|
"fcm_dpo/q_t": 0.42352789640426636,
|
|
"grad_norm": 10.356474876403809,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 0.9468274116516113,
|
|
"logits/rejected": 0.9034187197685242,
|
|
"logps/chosen": -247.63818359375,
|
|
"logps/ref_chosen": -56.71510696411133,
|
|
"logps/ref_rejected": -82.94544219970703,
|
|
"logps/rejected": -379.2259521484375,
|
|
"loss": 1.1485,
|
|
"margin_dpo/margin_mean": 105.35743713378906,
|
|
"margin_dpo/margin_std": 166.2731475830078,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.0030982757452875376,
|
|
"fcm_dpo/delta": -0.012051684781908989,
|
|
"fcm_dpo/margin": 89.59659576416016,
|
|
"fcm_dpo/q_t": 0.4353605806827545,
|
|
"grad_norm": 10.570072174072266,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 0.9560381174087524,
|
|
"logits/rejected": 0.9319430589675903,
|
|
"logps/chosen": -254.63623046875,
|
|
"logps/ref_chosen": -59.33793258666992,
|
|
"logps/ref_rejected": -75.01703643798828,
|
|
"logps/rejected": -359.91192626953125,
|
|
"loss": 1.1902,
|
|
"margin_dpo/margin_mean": 89.59659576416016,
|
|
"margin_dpo/margin_std": 159.2709197998047,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.0030982757452875376,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 97.72769927978516,
|
|
"fcm_dpo/q_t": 0.42967167496681213,
|
|
"grad_norm": 10.967521667480469,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 0.9475260972976685,
|
|
"logits/rejected": 0.903438925743103,
|
|
"logps/chosen": -259.0896301269531,
|
|
"logps/ref_chosen": -58.1605339050293,
|
|
"logps/ref_rejected": -79.85365295410156,
|
|
"logps/rejected": -378.5104064941406,
|
|
"loss": 1.1668,
|
|
"margin_dpo/margin_mean": 97.72769165039062,
|
|
"margin_dpo/margin_std": 162.35629272460938,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.0030982757452875376,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 77.14000701904297,
|
|
"fcm_dpo/q_t": 0.44398292899131775,
|
|
"grad_norm": 15.546424865722656,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 0.9349179267883301,
|
|
"logits/rejected": 0.922415554523468,
|
|
"logps/chosen": -264.6257629394531,
|
|
"logps/ref_chosen": -63.45180130004883,
|
|
"logps/ref_rejected": -74.18285369873047,
|
|
"logps/rejected": -352.49676513671875,
|
|
"loss": 1.2142,
|
|
"margin_dpo/margin_mean": 77.14000701904297,
|
|
"margin_dpo/margin_std": 147.8017120361328,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.0030982757452875376,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 95.89753723144531,
|
|
"fcm_dpo/q_t": 0.4306652545928955,
|
|
"grad_norm": 9.394474029541016,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 0.9502288103103638,
|
|
"logits/rejected": 0.9044377207756042,
|
|
"logps/chosen": -268.394775390625,
|
|
"logps/ref_chosen": -59.75496292114258,
|
|
"logps/ref_rejected": -84.31481170654297,
|
|
"logps/rejected": -388.8521423339844,
|
|
"loss": 1.1769,
|
|
"margin_dpo/margin_mean": 95.89753723144531,
|
|
"margin_dpo/margin_std": 167.29580688476562,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.0030882812570780516,
|
|
"fcm_dpo/delta": -0.016260221600532532,
|
|
"fcm_dpo/margin": 95.9172592163086,
|
|
"fcm_dpo/q_t": 0.43137580156326294,
|
|
"grad_norm": 10.780631065368652,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 0.9268029928207397,
|
|
"logits/rejected": 0.8841239809989929,
|
|
"logps/chosen": -252.7764892578125,
|
|
"logps/ref_chosen": -57.817848205566406,
|
|
"logps/ref_rejected": -79.81755065917969,
|
|
"logps/rejected": -370.6934814453125,
|
|
"loss": 1.1725,
|
|
"margin_dpo/margin_mean": 95.9172592163086,
|
|
"margin_dpo/margin_std": 159.18435668945312,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.003048304468393326,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 98.31352996826172,
|
|
"fcm_dpo/q_t": 0.43072566390037537,
|
|
"grad_norm": 11.600972175598145,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 0.9769344329833984,
|
|
"logits/rejected": 0.9424291849136353,
|
|
"logps/chosen": -260.94500732421875,
|
|
"logps/ref_chosen": -59.12651443481445,
|
|
"logps/ref_rejected": -79.42085266113281,
|
|
"logps/rejected": -379.5528869628906,
|
|
"loss": 1.1768,
|
|
"margin_dpo/margin_mean": 98.31353759765625,
|
|
"margin_dpo/margin_std": 174.72268676757812,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.1812975648311552,
|
|
"train_runtime": 1809.2515,
|
|
"train_samples_per_second": 23.4,
|
|
"train_steps_per_second": 0.365
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|