Files
llama-3-8b-base-new-dpo-har…/trainer_state.json
ModelHub XC d5bfea8476 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-new-dpo-harmless-s_star0.4-q_t0.4
Source: Original Platform
2026-05-14 11:41:38 +08:00

2622 lines
95 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 200,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015117157974300832,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0013532638549804688,
"fcm_dpo/q_t": 0.5000336766242981,
"grad_norm": 28.21938133239746,
"learning_rate": 0.0,
"logits/chosen": 0.13337239623069763,
"logits/rejected": 0.12492948770523071,
"logps/chosen": -64.5841293334961,
"logps/ref_chosen": -64.61280822753906,
"logps/ref_rejected": -64.17195129394531,
"logps/rejected": -64.14192199707031,
"loss": 1.3866,
"margin_dpo/margin_mean": -0.0013527870178222656,
"margin_dpo/margin_std": 0.2561596930027008,
"step": 1
},
{
"epoch": 0.007558578987150416,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.012731105089187622,
"fcm_dpo/q_t": 0.49968191981315613,
"grad_norm": 29.578828811645508,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": 0.09623775631189346,
"logits/rejected": 0.06788332760334015,
"logps/chosen": -65.3349380493164,
"logps/ref_chosen": -65.34695434570312,
"logps/ref_rejected": -79.315673828125,
"logps/rejected": -79.31640625,
"loss": 1.3853,
"margin_dpo/margin_mean": 0.012730807065963745,
"margin_dpo/margin_std": 0.3051193654537201,
"step": 5
},
{
"epoch": 0.015117157974300832,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0006597042083740234,
"fcm_dpo/q_t": 0.5000167489051819,
"grad_norm": 29.6705322265625,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": 0.10582169145345688,
"logits/rejected": 0.06683535873889923,
"logps/chosen": -56.6657829284668,
"logps/ref_chosen": -56.65692901611328,
"logps/ref_rejected": -80.12786865234375,
"logps/rejected": -80.13607025146484,
"loss": 1.3866,
"margin_dpo/margin_mean": -0.00065990089206025,
"margin_dpo/margin_std": 0.3203383684158325,
"step": 10
},
{
"epoch": 0.022675736961451247,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.01577478088438511,
"fcm_dpo/q_t": 0.49960583448410034,
"grad_norm": 32.982215881347656,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": 0.08618224412202835,
"logits/rejected": 0.0566771999001503,
"logps/chosen": -60.09851837158203,
"logps/ref_chosen": -60.09392166137695,
"logps/ref_rejected": -78.99056243896484,
"logps/rejected": -79.01094818115234,
"loss": 1.385,
"margin_dpo/margin_mean": 0.01577501930296421,
"margin_dpo/margin_std": 0.3348791301250458,
"step": 15
},
{
"epoch": 0.030234315948601664,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.004742377903312445,
"fcm_dpo/q_t": 0.5001183748245239,
"grad_norm": 29.284229278564453,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": 0.09735535085201263,
"logits/rejected": 0.06951850652694702,
"logps/chosen": -55.4586296081543,
"logps/ref_chosen": -55.464561462402344,
"logps/ref_rejected": -77.40013122558594,
"logps/rejected": -77.38945007324219,
"loss": 1.387,
"margin_dpo/margin_mean": -0.004742181394249201,
"margin_dpo/margin_std": 0.29244670271873474,
"step": 20
},
{
"epoch": 0.03779289493575208,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.00360795552842319,
"fcm_dpo/q_t": 0.4999099671840668,
"grad_norm": 29.778076171875,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": 0.10357926040887833,
"logits/rejected": 0.07201702892780304,
"logps/chosen": -60.72992706298828,
"logps/ref_chosen": -60.711814880371094,
"logps/ref_rejected": -82.71756744384766,
"logps/rejected": -82.7392807006836,
"loss": 1.3862,
"margin_dpo/margin_mean": 0.0036078630946576595,
"margin_dpo/margin_std": 0.30398499965667725,
"step": 25
},
{
"epoch": 0.045351473922902494,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.010024601593613625,
"fcm_dpo/q_t": 0.5002505779266357,
"grad_norm": 30.504179000854492,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": 0.1048557385802269,
"logits/rejected": 0.0802190899848938,
"logps/chosen": -60.91057205200195,
"logps/ref_chosen": -60.880210876464844,
"logps/ref_rejected": -78.44148254394531,
"logps/rejected": -78.4618148803711,
"loss": 1.3875,
"margin_dpo/margin_mean": -0.0100246611982584,
"margin_dpo/margin_std": 0.30975908041000366,
"step": 30
},
{
"epoch": 0.05291005291005291,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.013202684931457043,
"fcm_dpo/q_t": 0.49967002868652344,
"grad_norm": 27.877357482910156,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": 0.08469098061323166,
"logits/rejected": 0.057735610753297806,
"logps/chosen": -62.28917694091797,
"logps/ref_chosen": -62.248138427734375,
"logps/ref_rejected": -79.56475830078125,
"logps/rejected": -79.61898803710938,
"loss": 1.3853,
"margin_dpo/margin_mean": 0.013202887959778309,
"margin_dpo/margin_std": 0.32241854071617126,
"step": 35
},
{
"epoch": 0.06046863189720333,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05649406835436821,
"fcm_dpo/q_t": 0.4985879957675934,
"grad_norm": 31.444271087646484,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": 0.11410780251026154,
"logits/rejected": 0.06925268471240997,
"logps/chosen": -58.951148986816406,
"logps/ref_chosen": -58.87812423706055,
"logps/ref_rejected": -84.22982025146484,
"logps/rejected": -84.35932922363281,
"loss": 1.381,
"margin_dpo/margin_mean": 0.056494224816560745,
"margin_dpo/margin_std": 0.3627670109272003,
"step": 40
},
{
"epoch": 0.06802721088435375,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.06682233512401581,
"fcm_dpo/q_t": 0.498330295085907,
"grad_norm": 31.839420318603516,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": 0.05706251785159111,
"logits/rejected": 0.03058524802327156,
"logps/chosen": -66.00699615478516,
"logps/ref_chosen": -65.88298034667969,
"logps/ref_rejected": -83.87881469726562,
"logps/rejected": -84.06964111328125,
"loss": 1.3801,
"margin_dpo/margin_mean": 0.06682238727807999,
"margin_dpo/margin_std": 0.4172392785549164,
"step": 45
},
{
"epoch": 0.07558578987150416,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.09804753959178925,
"fcm_dpo/q_t": 0.4975499212741852,
"grad_norm": 27.911935806274414,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": 0.09519219398498535,
"logits/rejected": 0.06103789061307907,
"logps/chosen": -55.37559127807617,
"logps/ref_chosen": -55.172386169433594,
"logps/ref_rejected": -69.63300323486328,
"logps/rejected": -69.93424224853516,
"loss": 1.377,
"margin_dpo/margin_mean": 0.09804768860340118,
"margin_dpo/margin_std": 0.4135734438896179,
"step": 50
},
{
"epoch": 0.08314436885865457,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.18540987372398376,
"fcm_dpo/q_t": 0.4953702390193939,
"grad_norm": 31.428316116333008,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": 0.0665198341012001,
"logits/rejected": 0.030963122844696045,
"logps/chosen": -57.539833068847656,
"logps/ref_chosen": -57.193580627441406,
"logps/ref_rejected": -79.69940948486328,
"logps/rejected": -80.23106384277344,
"loss": 1.3686,
"margin_dpo/margin_mean": 0.18540982902050018,
"margin_dpo/margin_std": 0.5479583144187927,
"step": 55
},
{
"epoch": 0.09070294784580499,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.20901694893836975,
"fcm_dpo/q_t": 0.4947921633720398,
"grad_norm": 29.466659545898438,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": 0.11891994625329971,
"logits/rejected": 0.0848398357629776,
"logps/chosen": -60.59447479248047,
"logps/ref_chosen": -60.068870544433594,
"logps/ref_rejected": -74.41178894042969,
"logps/rejected": -75.14640808105469,
"loss": 1.3671,
"margin_dpo/margin_mean": 0.20901694893836975,
"margin_dpo/margin_std": 0.7362244129180908,
"step": 60
},
{
"epoch": 0.0982615268329554,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.3001774847507477,
"fcm_dpo/q_t": 0.4925141930580139,
"grad_norm": 30.807130813598633,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": 0.13861653208732605,
"logits/rejected": 0.10865757614374161,
"logps/chosen": -58.88816452026367,
"logps/ref_chosen": -58.1558952331543,
"logps/ref_rejected": -76.06512451171875,
"logps/rejected": -77.09757995605469,
"loss": 1.3589,
"margin_dpo/margin_mean": 0.3001771569252014,
"margin_dpo/margin_std": 0.9452616572380066,
"step": 65
},
{
"epoch": 0.10582010582010581,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.39394986629486084,
"fcm_dpo/q_t": 0.49018916487693787,
"grad_norm": 28.67983627319336,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 0.10006751120090485,
"logits/rejected": 0.06497758626937866,
"logps/chosen": -68.39962005615234,
"logps/ref_chosen": -67.35506439208984,
"logps/ref_rejected": -82.24962615966797,
"logps/rejected": -83.6881332397461,
"loss": 1.3507,
"margin_dpo/margin_mean": 0.39395004510879517,
"margin_dpo/margin_std": 1.1363012790679932,
"step": 70
},
{
"epoch": 0.11337868480725624,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5148524045944214,
"fcm_dpo/q_t": 0.48723092675209045,
"grad_norm": 26.078073501586914,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": 0.10058212280273438,
"logits/rejected": 0.06556755304336548,
"logps/chosen": -58.295387268066406,
"logps/ref_chosen": -56.86763381958008,
"logps/ref_rejected": -72.56938934326172,
"logps/rejected": -74.51200103759766,
"loss": 1.3408,
"margin_dpo/margin_mean": 0.5148526430130005,
"margin_dpo/margin_std": 1.398964762687683,
"step": 75
},
{
"epoch": 0.12093726379440665,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6505656242370605,
"fcm_dpo/q_t": 0.48392024636268616,
"grad_norm": 28.3653621673584,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 0.13842633366584778,
"logits/rejected": 0.09329269081354141,
"logps/chosen": -59.446983337402344,
"logps/ref_chosen": -57.687095642089844,
"logps/ref_rejected": -78.06813049316406,
"logps/rejected": -80.47857666015625,
"loss": 1.3297,
"margin_dpo/margin_mean": 0.6505654454231262,
"margin_dpo/margin_std": 1.6704308986663818,
"step": 80
},
{
"epoch": 0.12849584278155707,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.0345122814178467,
"fcm_dpo/q_t": 0.47443389892578125,
"grad_norm": 26.892316818237305,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": 0.1578936129808426,
"logits/rejected": 0.11112338304519653,
"logps/chosen": -59.1240234375,
"logps/ref_chosen": -56.96040725708008,
"logps/ref_rejected": -75.22166442871094,
"logps/rejected": -78.41979217529297,
"loss": 1.2938,
"margin_dpo/margin_mean": 1.0345120429992676,
"margin_dpo/margin_std": 1.7866588830947876,
"step": 85
},
{
"epoch": 0.1360544217687075,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.1209580898284912,
"fcm_dpo/q_t": 0.47252073884010315,
"grad_norm": 29.175752639770508,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 0.20981314778327942,
"logits/rejected": 0.16077354550361633,
"logps/chosen": -60.326324462890625,
"logps/ref_chosen": -57.41730499267578,
"logps/ref_rejected": -80.87986755371094,
"logps/rejected": -84.90985870361328,
"loss": 1.2935,
"margin_dpo/margin_mean": 1.1209580898284912,
"margin_dpo/margin_std": 2.4879467487335205,
"step": 90
},
{
"epoch": 0.1436130007558579,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.4390569925308228,
"fcm_dpo/q_t": 0.46531516313552856,
"grad_norm": 29.310806274414062,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": 0.1912127137184143,
"logits/rejected": 0.1479342132806778,
"logps/chosen": -57.60295867919922,
"logps/ref_chosen": -54.08087158203125,
"logps/ref_rejected": -76.15860748291016,
"logps/rejected": -81.1197509765625,
"loss": 1.2749,
"margin_dpo/margin_mean": 1.4390567541122437,
"margin_dpo/margin_std": 3.292581558227539,
"step": 95
},
{
"epoch": 0.15117157974300832,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.211411714553833,
"fcm_dpo/q_t": 0.47177332639694214,
"grad_norm": 33.16886520385742,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 0.2135041207075119,
"logits/rejected": 0.18092623353004456,
"logps/chosen": -68.66203308105469,
"logps/ref_chosen": -63.875038146972656,
"logps/ref_rejected": -82.077880859375,
"logps/rejected": -88.07627868652344,
"loss": 1.3247,
"margin_dpo/margin_mean": 1.211411714553833,
"margin_dpo/margin_std": 4.652392387390137,
"step": 100
},
{
"epoch": 0.15873015873015872,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.7167237997055054,
"fcm_dpo/q_t": 0.460963636636734,
"grad_norm": 30.796314239501953,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": 0.2201795130968094,
"logits/rejected": 0.16930809617042542,
"logps/chosen": -67.5002670288086,
"logps/ref_chosen": -62.572479248046875,
"logps/ref_rejected": -80.93415069580078,
"logps/rejected": -87.57865905761719,
"loss": 1.283,
"margin_dpo/margin_mean": 1.7167232036590576,
"margin_dpo/margin_std": 4.936122894287109,
"step": 105
},
{
"epoch": 0.16628873771730915,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.5425608158111572,
"fcm_dpo/q_t": 0.4638938009738922,
"grad_norm": 44.27668380737305,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 0.1879446804523468,
"logits/rejected": 0.16481170058250427,
"logps/chosen": -74.3304672241211,
"logps/ref_chosen": -68.67534637451172,
"logps/ref_rejected": -78.82028198242188,
"logps/rejected": -86.01795959472656,
"loss": 1.3142,
"margin_dpo/margin_mean": 1.5425606966018677,
"margin_dpo/margin_std": 5.584108829498291,
"step": 110
},
{
"epoch": 0.17384731670445955,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.017230272293091,
"fcm_dpo/q_t": 0.45290979743003845,
"grad_norm": 28.293357849121094,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": 0.23582999408245087,
"logits/rejected": 0.18265566229820251,
"logps/chosen": -64.07593536376953,
"logps/ref_chosen": -58.65370559692383,
"logps/ref_rejected": -81.89688873291016,
"logps/rejected": -89.33635711669922,
"loss": 1.2493,
"margin_dpo/margin_mean": 2.0172300338745117,
"margin_dpo/margin_std": 4.730603218078613,
"step": 115
},
{
"epoch": 0.18140589569160998,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.405763864517212,
"fcm_dpo/q_t": 0.4429899752140045,
"grad_norm": 28.859172821044922,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 0.24149473011493683,
"logits/rejected": 0.19213181734085083,
"logps/chosen": -61.583091735839844,
"logps/ref_chosen": -56.16423797607422,
"logps/ref_rejected": -75.87689971923828,
"logps/rejected": -83.70152282714844,
"loss": 1.208,
"margin_dpo/margin_mean": 2.405764102935791,
"margin_dpo/margin_std": 4.32560396194458,
"step": 120
},
{
"epoch": 0.1889644746787604,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.015474319458008,
"fcm_dpo/q_t": 0.4313550889492035,
"grad_norm": 29.275339126586914,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": 0.26413899660110474,
"logits/rejected": 0.20717649161815643,
"logps/chosen": -66.0591049194336,
"logps/ref_chosen": -59.744285583496094,
"logps/ref_rejected": -86.77314758300781,
"logps/rejected": -96.10343933105469,
"loss": 1.1669,
"margin_dpo/margin_mean": 3.015474319458008,
"margin_dpo/margin_std": 5.040165901184082,
"step": 125
},
{
"epoch": 0.1965230536659108,
"fcm_dpo/beta": 0.09993546456098557,
"fcm_dpo/delta": -0.0032323698978871107,
"fcm_dpo/margin": 2.8389410972595215,
"fcm_dpo/q_t": 0.4344428479671478,
"grad_norm": 30.352113723754883,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 0.1967930942773819,
"logits/rejected": 0.1665017306804657,
"logps/chosen": -71.52397918701172,
"logps/ref_chosen": -64.15296936035156,
"logps/ref_rejected": -75.17271423339844,
"logps/rejected": -85.38265991210938,
"loss": 1.1919,
"margin_dpo/margin_mean": 2.8389410972595215,
"margin_dpo/margin_std": 5.271792411804199,
"step": 130
},
{
"epoch": 0.20408163265306123,
"fcm_dpo/beta": 0.09838803857564926,
"fcm_dpo/delta": -0.024441083893179893,
"fcm_dpo/margin": 3.772266387939453,
"fcm_dpo/q_t": 0.4152294099330902,
"grad_norm": 24.730987548828125,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": 0.26162657141685486,
"logits/rejected": 0.22124962508678436,
"logps/chosen": -65.22847747802734,
"logps/ref_chosen": -57.006690979003906,
"logps/ref_rejected": -73.71768188476562,
"logps/rejected": -85.71173095703125,
"loss": 1.1324,
"margin_dpo/margin_mean": 3.772266387939453,
"margin_dpo/margin_std": 5.984399318695068,
"step": 135
},
{
"epoch": 0.21164021164021163,
"fcm_dpo/beta": 0.09531065821647644,
"fcm_dpo/delta": -0.05013541504740715,
"fcm_dpo/margin": 4.44573450088501,
"fcm_dpo/q_t": 0.4060499668121338,
"grad_norm": 29.4134464263916,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 0.25046736001968384,
"logits/rejected": 0.2064397782087326,
"logps/chosen": -74.15589904785156,
"logps/ref_chosen": -63.36246871948242,
"logps/ref_rejected": -79.62621307373047,
"logps/rejected": -94.8653793334961,
"loss": 1.1281,
"margin_dpo/margin_mean": 4.445734977722168,
"margin_dpo/margin_std": 7.5196404457092285,
"step": 140
},
{
"epoch": 0.21919879062736206,
"fcm_dpo/beta": 0.08971674740314484,
"fcm_dpo/delta": -0.04721946269273758,
"fcm_dpo/margin": 4.761581897735596,
"fcm_dpo/q_t": 0.40657633543014526,
"grad_norm": 30.589385986328125,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": 0.2786180078983307,
"logits/rejected": 0.2569752633571625,
"logps/chosen": -77.37989044189453,
"logps/ref_chosen": -65.01470184326172,
"logps/ref_rejected": -80.49073791503906,
"logps/rejected": -97.61750793457031,
"loss": 1.1506,
"margin_dpo/margin_mean": 4.761581897735596,
"margin_dpo/margin_std": 8.919352531433105,
"step": 145
},
{
"epoch": 0.22675736961451248,
"fcm_dpo/beta": 0.08510036766529083,
"fcm_dpo/delta": -0.11195192486047745,
"fcm_dpo/margin": 5.9401068687438965,
"fcm_dpo/q_t": 0.39034393429756165,
"grad_norm": 22.236682891845703,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 0.2792455554008484,
"logits/rejected": 0.23401157557964325,
"logps/chosen": -72.20897674560547,
"logps/ref_chosen": -59.19135284423828,
"logps/ref_rejected": -74.0339126586914,
"logps/rejected": -92.99165344238281,
"loss": 1.0828,
"margin_dpo/margin_mean": 5.9401068687438965,
"margin_dpo/margin_std": 9.036436080932617,
"step": 150
},
{
"epoch": 0.23431594860166288,
"fcm_dpo/beta": 0.07190684974193573,
"fcm_dpo/delta": -0.13100460171699524,
"fcm_dpo/margin": 7.103701591491699,
"fcm_dpo/q_t": 0.38805317878723145,
"grad_norm": 21.95441246032715,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": 0.3055272698402405,
"logits/rejected": 0.2715781033039093,
"logps/chosen": -74.09068298339844,
"logps/ref_chosen": -60.93949508666992,
"logps/ref_rejected": -74.51151275634766,
"logps/rejected": -94.76640319824219,
"loss": 1.0779,
"margin_dpo/margin_mean": 7.103701114654541,
"margin_dpo/margin_std": 10.82844352722168,
"step": 155
},
{
"epoch": 0.2418745275888133,
"fcm_dpo/beta": 0.06413034349679947,
"fcm_dpo/delta": -0.11762239784002304,
"fcm_dpo/margin": 7.9449782371521,
"fcm_dpo/q_t": 0.38926568627357483,
"grad_norm": 20.171205520629883,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": 0.2932426929473877,
"logits/rejected": 0.25532081723213196,
"logps/chosen": -73.47798919677734,
"logps/ref_chosen": -58.763816833496094,
"logps/ref_rejected": -74.94743347167969,
"logps/rejected": -97.60658264160156,
"loss": 1.0755,
"margin_dpo/margin_mean": 7.9449782371521,
"margin_dpo/margin_std": 12.001934051513672,
"step": 160
},
{
"epoch": 0.2494331065759637,
"fcm_dpo/beta": 0.05613988637924194,
"fcm_dpo/delta": -0.1726417988538742,
"fcm_dpo/margin": 10.18576431274414,
"fcm_dpo/q_t": 0.3745308518409729,
"grad_norm": 17.679018020629883,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": 0.3483496308326721,
"logits/rejected": 0.2956962287425995,
"logps/chosen": -72.38264465332031,
"logps/ref_chosen": -55.70417022705078,
"logps/ref_rejected": -76.59439849853516,
"logps/rejected": -103.45863342285156,
"loss": 1.0359,
"margin_dpo/margin_mean": 10.18576431274414,
"margin_dpo/margin_std": 13.736506462097168,
"step": 165
},
{
"epoch": 0.25699168556311414,
"fcm_dpo/beta": 0.04928728565573692,
"fcm_dpo/delta": -0.09952159970998764,
"fcm_dpo/margin": 10.129049301147461,
"fcm_dpo/q_t": 0.3913528621196747,
"grad_norm": 16.242919921875,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 0.3343687057495117,
"logits/rejected": 0.2925954759120941,
"logps/chosen": -76.33804321289062,
"logps/ref_chosen": -61.169105529785156,
"logps/ref_rejected": -77.21674346923828,
"logps/rejected": -102.5147476196289,
"loss": 1.0787,
"margin_dpo/margin_mean": 10.129049301147461,
"margin_dpo/margin_std": 15.241543769836426,
"step": 170
},
{
"epoch": 0.26455026455026454,
"fcm_dpo/beta": 0.04289032891392708,
"fcm_dpo/delta": -0.16773784160614014,
"fcm_dpo/margin": 12.669050216674805,
"fcm_dpo/q_t": 0.38352128863334656,
"grad_norm": 16.280622482299805,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": 0.4107338786125183,
"logits/rejected": 0.3484509587287903,
"logps/chosen": -75.43938446044922,
"logps/ref_chosen": -59.24176788330078,
"logps/ref_rejected": -81.80384826660156,
"logps/rejected": -110.67051696777344,
"loss": 1.075,
"margin_dpo/margin_mean": 12.669050216674805,
"margin_dpo/margin_std": 19.41705322265625,
"step": 175
},
{
"epoch": 0.272108843537415,
"fcm_dpo/beta": 0.03678743541240692,
"fcm_dpo/delta": -0.11334402859210968,
"fcm_dpo/margin": 13.086156845092773,
"fcm_dpo/q_t": 0.3945137560367584,
"grad_norm": 14.899900436401367,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": 0.3705318868160248,
"logits/rejected": 0.3279619812965393,
"logps/chosen": -80.0919418334961,
"logps/ref_chosen": -63.24883270263672,
"logps/ref_rejected": -79.00736236572266,
"logps/rejected": -108.93663024902344,
"loss": 1.0966,
"margin_dpo/margin_mean": 13.086158752441406,
"margin_dpo/margin_std": 20.56978988647461,
"step": 180
},
{
"epoch": 0.2796674225245654,
"fcm_dpo/beta": 0.03294721618294716,
"fcm_dpo/delta": -0.11023982614278793,
"fcm_dpo/margin": 13.870523452758789,
"fcm_dpo/q_t": 0.39916402101516724,
"grad_norm": 14.874372482299805,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": 0.43493133783340454,
"logits/rejected": 0.38104137778282166,
"logps/chosen": -73.21731567382812,
"logps/ref_chosen": -56.390625,
"logps/ref_rejected": -76.81001281738281,
"logps/rejected": -107.5072250366211,
"loss": 1.1048,
"margin_dpo/margin_mean": 13.870523452758789,
"margin_dpo/margin_std": 21.951461791992188,
"step": 185
},
{
"epoch": 0.2872260015117158,
"fcm_dpo/beta": 0.02976861596107483,
"fcm_dpo/delta": -0.130234032869339,
"fcm_dpo/margin": 17.27200698852539,
"fcm_dpo/q_t": 0.3894996643066406,
"grad_norm": 21.018056869506836,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 0.42057228088378906,
"logits/rejected": 0.36500033736228943,
"logps/chosen": -86.78327178955078,
"logps/ref_chosen": -68.25389099121094,
"logps/ref_rejected": -86.461181640625,
"logps/rejected": -122.26255798339844,
"loss": 1.0765,
"margin_dpo/margin_mean": 17.27200698852539,
"margin_dpo/margin_std": 26.312463760375977,
"step": 190
},
{
"epoch": 0.2947845804988662,
"fcm_dpo/beta": 0.026562869548797607,
"fcm_dpo/delta": -0.1006811112165451,
"fcm_dpo/margin": 17.410314559936523,
"fcm_dpo/q_t": 0.3985130786895752,
"grad_norm": 17.732221603393555,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": 0.45999032258987427,
"logits/rejected": 0.4397885799407959,
"logps/chosen": -90.21788024902344,
"logps/ref_chosen": -62.1484260559082,
"logps/ref_rejected": -71.33458709716797,
"logps/rejected": -116.81434631347656,
"loss": 1.1108,
"margin_dpo/margin_mean": 17.41031265258789,
"margin_dpo/margin_std": 28.3375301361084,
"step": 195
},
{
"epoch": 0.30234315948601664,
"fcm_dpo/beta": 0.02375042252242565,
"fcm_dpo/delta": -0.12756529450416565,
"fcm_dpo/margin": 21.052818298339844,
"fcm_dpo/q_t": 0.3899378478527069,
"grad_norm": 14.28109073638916,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.5520531535148621,
"logits/rejected": 0.4885140061378479,
"logps/chosen": -89.1515121459961,
"logps/ref_chosen": -56.950096130371094,
"logps/ref_rejected": -78.66989135742188,
"logps/rejected": -131.92413330078125,
"loss": 1.0917,
"margin_dpo/margin_mean": 21.052818298339844,
"margin_dpo/margin_std": 32.75715637207031,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_fcm_dpo/beta": 0.022144686430692673,
"eval_logits/chosen": 0.5289739966392517,
"eval_logits/rejected": 0.47875019907951355,
"eval_logps/chosen": -112.04022979736328,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -137.68849182128906,
"eval_loss": 0.5616376996040344,
"eval_margin_dpo/margin_mean": 20.95873260498047,
"eval_margin_dpo/margin_std": 36.02712631225586,
"eval_runtime": 39.0532,
"eval_samples_per_second": 58.971,
"eval_steps_per_second": 1.844,
"step": 200
},
{
"epoch": 0.30990173847316704,
"fcm_dpo/beta": 0.020863929763436317,
"fcm_dpo/delta": -0.10121381282806396,
"fcm_dpo/margin": 23.369535446166992,
"fcm_dpo/q_t": 0.3925306797027588,
"grad_norm": 18.312232971191406,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": 0.5444221496582031,
"logits/rejected": 0.47210827469825745,
"logps/chosen": -100.59618377685547,
"logps/ref_chosen": -57.99428176879883,
"logps/ref_rejected": -83.5367431640625,
"logps/rejected": -149.5081787109375,
"loss": 1.0877,
"margin_dpo/margin_mean": 23.369535446166992,
"margin_dpo/margin_std": 35.94400405883789,
"step": 205
},
{
"epoch": 0.31746031746031744,
"fcm_dpo/beta": 0.019625190645456314,
"fcm_dpo/delta": -0.06917699426412582,
"fcm_dpo/margin": 21.765804290771484,
"fcm_dpo/q_t": 0.4085807204246521,
"grad_norm": 16.501283645629883,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 0.5628946423530579,
"logits/rejected": 0.5068370699882507,
"logps/chosen": -115.35340881347656,
"logps/ref_chosen": -63.77195358276367,
"logps/ref_rejected": -82.56491088867188,
"logps/rejected": -155.9121856689453,
"loss": 1.1576,
"margin_dpo/margin_mean": 21.765806198120117,
"margin_dpo/margin_std": 41.01616668701172,
"step": 210
},
{
"epoch": 0.3250188964474679,
"fcm_dpo/beta": 0.01737585850059986,
"fcm_dpo/delta": -0.14525336027145386,
"fcm_dpo/margin": 31.006107330322266,
"fcm_dpo/q_t": 0.3825533390045166,
"grad_norm": 14.236336708068848,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": 0.55656498670578,
"logits/rejected": 0.48811864852905273,
"logps/chosen": -107.94661712646484,
"logps/ref_chosen": -60.27800750732422,
"logps/ref_rejected": -83.91607666015625,
"logps/rejected": -162.59080505371094,
"loss": 1.0518,
"margin_dpo/margin_mean": 31.006107330322266,
"margin_dpo/margin_std": 43.699501037597656,
"step": 215
},
{
"epoch": 0.3325774754346183,
"fcm_dpo/beta": 0.01522024255245924,
"fcm_dpo/delta": -0.08151903748512268,
"fcm_dpo/margin": 28.928936004638672,
"fcm_dpo/q_t": 0.4007510244846344,
"grad_norm": 13.642237663269043,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 0.5403038263320923,
"logits/rejected": 0.47905245423316956,
"logps/chosen": -108.02642822265625,
"logps/ref_chosen": -60.88572311401367,
"logps/ref_rejected": -80.1805191040039,
"logps/rejected": -156.2501678466797,
"loss": 1.103,
"margin_dpo/margin_mean": 28.92893409729004,
"margin_dpo/margin_std": 44.418190002441406,
"step": 220
},
{
"epoch": 0.3401360544217687,
"fcm_dpo/beta": 0.014115704223513603,
"fcm_dpo/delta": -0.08850517123937607,
"fcm_dpo/margin": 32.46453857421875,
"fcm_dpo/q_t": 0.3981640040874481,
"grad_norm": 12.134458541870117,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": 0.6014004945755005,
"logits/rejected": 0.5260181427001953,
"logps/chosen": -108.61814880371094,
"logps/ref_chosen": -61.02507781982422,
"logps/ref_rejected": -91.92439270019531,
"logps/rejected": -171.98199462890625,
"loss": 1.0861,
"margin_dpo/margin_mean": 32.464542388916016,
"margin_dpo/margin_std": 47.451393127441406,
"step": 225
},
{
"epoch": 0.3476946334089191,
"fcm_dpo/beta": 0.013391288928687572,
"fcm_dpo/delta": -0.04506213217973709,
"fcm_dpo/margin": 31.46584701538086,
"fcm_dpo/q_t": 0.4058153033256531,
"grad_norm": 13.718859672546387,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 0.6671124696731567,
"logits/rejected": 0.6181380748748779,
"logps/chosen": -106.39383697509766,
"logps/ref_chosen": -54.49797821044922,
"logps/ref_rejected": -71.96363830566406,
"logps/rejected": -155.32534790039062,
"loss": 1.1278,
"margin_dpo/margin_mean": 31.46584701538086,
"margin_dpo/margin_std": 53.070556640625,
"step": 230
},
{
"epoch": 0.35525321239606955,
"fcm_dpo/beta": 0.012218359857797623,
"fcm_dpo/delta": -0.10176394134759903,
"fcm_dpo/margin": 35.79335403442383,
"fcm_dpo/q_t": 0.4017709791660309,
"grad_norm": 15.619464874267578,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": 0.6200426816940308,
"logits/rejected": 0.5975286364555359,
"logps/chosen": -124.5421142578125,
"logps/ref_chosen": -63.250282287597656,
"logps/ref_rejected": -73.09049987792969,
"logps/rejected": -170.17568969726562,
"loss": 1.1063,
"margin_dpo/margin_mean": 35.79335403442383,
"margin_dpo/margin_std": 54.02311325073242,
"step": 235
},
{
"epoch": 0.36281179138321995,
"fcm_dpo/beta": 0.011470427736639977,
"fcm_dpo/delta": -0.07215714454650879,
"fcm_dpo/margin": 40.14154815673828,
"fcm_dpo/q_t": 0.3966708779335022,
"grad_norm": 12.957676887512207,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 0.6564071774482727,
"logits/rejected": 0.601380467414856,
"logps/chosen": -138.31361389160156,
"logps/ref_chosen": -65.26150512695312,
"logps/ref_rejected": -87.60311126708984,
"logps/rejected": -200.7967529296875,
"loss": 1.0907,
"margin_dpo/margin_mean": 40.14154815673828,
"margin_dpo/margin_std": 59.86652755737305,
"step": 240
},
{
"epoch": 0.37037037037037035,
"fcm_dpo/beta": 0.010648809373378754,
"fcm_dpo/delta": -0.08149583637714386,
"fcm_dpo/margin": 43.929100036621094,
"fcm_dpo/q_t": 0.39428311586380005,
"grad_norm": 14.245344161987305,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": 0.6480621099472046,
"logits/rejected": 0.6164118051528931,
"logps/chosen": -135.7429656982422,
"logps/ref_chosen": -65.73170471191406,
"logps/ref_rejected": -75.19642639160156,
"logps/rejected": -189.1367645263672,
"loss": 1.0693,
"margin_dpo/margin_mean": 43.92909622192383,
"margin_dpo/margin_std": 59.444854736328125,
"step": 245
},
{
"epoch": 0.3779289493575208,
"fcm_dpo/beta": 0.009915231727063656,
"fcm_dpo/delta": -0.041826874017715454,
"fcm_dpo/margin": 35.88407897949219,
"fcm_dpo/q_t": 0.4197370409965515,
"grad_norm": 14.5069580078125,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 0.7002509832382202,
"logits/rejected": 0.6972779035568237,
"logps/chosen": -155.23341369628906,
"logps/ref_chosen": -70.71224212646484,
"logps/ref_rejected": -76.12723541259766,
"logps/rejected": -196.532470703125,
"loss": 1.1785,
"margin_dpo/margin_mean": 35.88407516479492,
"margin_dpo/margin_std": 69.27667999267578,
"step": 250
},
{
"epoch": 0.3854875283446712,
"fcm_dpo/beta": 0.009629678912460804,
"fcm_dpo/delta": -0.012941457331180573,
"fcm_dpo/margin": 34.634307861328125,
"fcm_dpo/q_t": 0.4239214360713959,
"grad_norm": 15.92194938659668,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": 0.6967302560806274,
"logits/rejected": 0.6524414420127869,
"logps/chosen": -145.23605346679688,
"logps/ref_chosen": -61.767662048339844,
"logps/ref_rejected": -77.38813018798828,
"logps/rejected": -195.49081420898438,
"loss": 1.1757,
"margin_dpo/margin_mean": 34.63430404663086,
"margin_dpo/margin_std": 64.64119720458984,
"step": 255
},
{
"epoch": 0.3930461073318216,
"fcm_dpo/beta": 0.009221619926393032,
"fcm_dpo/delta": -0.06601964682340622,
"fcm_dpo/margin": 48.99907684326172,
"fcm_dpo/q_t": 0.39814695715904236,
"grad_norm": 19.110883712768555,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 0.6849242448806763,
"logits/rejected": 0.603645384311676,
"logps/chosen": -148.38861083984375,
"logps/ref_chosen": -61.57584762573242,
"logps/ref_rejected": -91.87513732910156,
"logps/rejected": -227.6869659423828,
"loss": 1.0902,
"margin_dpo/margin_mean": 48.99907684326172,
"margin_dpo/margin_std": 72.31168365478516,
"step": 260
},
{
"epoch": 0.40060468631897206,
"fcm_dpo/beta": 0.008941135369241238,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 41.382564544677734,
"fcm_dpo/q_t": 0.41540035605430603,
"grad_norm": 11.394349098205566,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": 0.643638551235199,
"logits/rejected": 0.5954387187957764,
"logps/chosen": -147.72586059570312,
"logps/ref_chosen": -65.75422668457031,
"logps/ref_rejected": -77.9569320678711,
"logps/rejected": -201.31112670898438,
"loss": 1.1392,
"margin_dpo/margin_mean": 41.38256072998047,
"margin_dpo/margin_std": 68.18827819824219,
"step": 265
},
{
"epoch": 0.40816326530612246,
"fcm_dpo/beta": 0.008837602101266384,
"fcm_dpo/delta": -0.02355731837451458,
"fcm_dpo/margin": 42.396751403808594,
"fcm_dpo/q_t": 0.41459059715270996,
"grad_norm": 15.444929122924805,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 0.6901830434799194,
"logits/rejected": 0.6426895260810852,
"logps/chosen": -138.05111694335938,
"logps/ref_chosen": -62.27649688720703,
"logps/ref_rejected": -76.56950378417969,
"logps/rejected": -194.7408905029297,
"loss": 1.1371,
"margin_dpo/margin_mean": 42.396751403808594,
"margin_dpo/margin_std": 68.82071685791016,
"step": 270
},
{
"epoch": 0.41572184429327286,
"fcm_dpo/beta": 0.008553928695619106,
"fcm_dpo/delta": -0.03487258031964302,
"fcm_dpo/margin": 46.58140182495117,
"fcm_dpo/q_t": 0.40970954298973083,
"grad_norm": 20.471668243408203,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": 0.6841222047805786,
"logits/rejected": 0.6400257349014282,
"logps/chosen": -143.00296020507812,
"logps/ref_chosen": -61.854393005371094,
"logps/ref_rejected": -77.22246551513672,
"logps/rejected": -204.95242309570312,
"loss": 1.1265,
"margin_dpo/margin_mean": 46.58140182495117,
"margin_dpo/margin_std": 74.56303405761719,
"step": 275
},
{
"epoch": 0.42328042328042326,
"fcm_dpo/beta": 0.008292925544083118,
"fcm_dpo/delta": -0.060983072966337204,
"fcm_dpo/margin": 49.10211944580078,
"fcm_dpo/q_t": 0.4059298038482666,
"grad_norm": 13.393473625183105,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 0.6645344495773315,
"logits/rejected": 0.6346439719200134,
"logps/chosen": -146.82679748535156,
"logps/ref_chosen": -61.29896926879883,
"logps/ref_rejected": -73.35762023925781,
"logps/rejected": -207.98757934570312,
"loss": 1.1041,
"margin_dpo/margin_mean": 49.10211944580078,
"margin_dpo/margin_std": 69.17396545410156,
"step": 280
},
{
"epoch": 0.4308390022675737,
"fcm_dpo/beta": 0.007784596644341946,
"fcm_dpo/delta": -0.027562415227293968,
"fcm_dpo/margin": 48.093055725097656,
"fcm_dpo/q_t": 0.41425347328186035,
"grad_norm": 16.781484603881836,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": 0.6967326402664185,
"logits/rejected": 0.6534906625747681,
"logps/chosen": -159.16049194335938,
"logps/ref_chosen": -63.435462951660156,
"logps/ref_rejected": -79.73661804199219,
"logps/rejected": -223.55471801757812,
"loss": 1.1319,
"margin_dpo/margin_mean": 48.093055725097656,
"margin_dpo/margin_std": 75.91288757324219,
"step": 285
},
{
"epoch": 0.4383975812547241,
"fcm_dpo/beta": 0.007469588425010443,
"fcm_dpo/delta": -0.042763665318489075,
"fcm_dpo/margin": 55.20690155029297,
"fcm_dpo/q_t": 0.40540584921836853,
"grad_norm": 15.624042510986328,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 0.711457371711731,
"logits/rejected": 0.650222659111023,
"logps/chosen": -162.69920349121094,
"logps/ref_chosen": -57.696876525878906,
"logps/ref_rejected": -79.78132629394531,
"logps/rejected": -239.9905548095703,
"loss": 1.0921,
"margin_dpo/margin_mean": 55.20690155029297,
"margin_dpo/margin_std": 75.91992950439453,
"step": 290
},
{
"epoch": 0.4459561602418745,
"fcm_dpo/beta": 0.00719553604722023,
"fcm_dpo/delta": -0.038509003818035126,
"fcm_dpo/margin": 52.451446533203125,
"fcm_dpo/q_t": 0.4142046570777893,
"grad_norm": 17.099523544311523,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": 0.7705697417259216,
"logits/rejected": 0.6970144510269165,
"logps/chosen": -168.26991271972656,
"logps/ref_chosen": -55.430633544921875,
"logps/ref_rejected": -78.1390151977539,
"logps/rejected": -243.4297332763672,
"loss": 1.1278,
"margin_dpo/margin_mean": 52.451446533203125,
"margin_dpo/margin_std": 81.40741729736328,
"step": 295
},
{
"epoch": 0.45351473922902497,
"fcm_dpo/beta": 0.007098735310137272,
"fcm_dpo/delta": -0.0035702171735465527,
"fcm_dpo/margin": 45.36979293823242,
"fcm_dpo/q_t": 0.4261544346809387,
"grad_norm": 12.7843656539917,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 0.707840621471405,
"logits/rejected": 0.6562869548797607,
"logps/chosen": -185.88197326660156,
"logps/ref_chosen": -61.207069396972656,
"logps/ref_rejected": -75.23294067382812,
"logps/rejected": -245.2776336669922,
"loss": 1.1763,
"margin_dpo/margin_mean": 45.36979293823242,
"margin_dpo/margin_std": 83.63997650146484,
"step": 300
},
{
"epoch": 0.46107331821617537,
"fcm_dpo/beta": 0.007054163608700037,
"fcm_dpo/delta": -0.011994509026408195,
"fcm_dpo/margin": 47.528289794921875,
"fcm_dpo/q_t": 0.423266738653183,
"grad_norm": 14.771221160888672,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": 0.6945724487304688,
"logits/rejected": 0.6594172716140747,
"logps/chosen": -184.41525268554688,
"logps/ref_chosen": -63.06663131713867,
"logps/ref_rejected": -78.45845031738281,
"logps/rejected": -247.3353729248047,
"loss": 1.1687,
"margin_dpo/margin_mean": 47.52829360961914,
"margin_dpo/margin_std": 85.50675201416016,
"step": 305
},
{
"epoch": 0.46863189720332576,
"fcm_dpo/beta": 0.006967984139919281,
"fcm_dpo/delta": -0.005523760803043842,
"fcm_dpo/margin": 38.326297760009766,
"fcm_dpo/q_t": 0.43881019949913025,
"grad_norm": 12.633563995361328,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 0.7481607794761658,
"logits/rejected": 0.7011669874191284,
"logps/chosen": -181.23231506347656,
"logps/ref_chosen": -63.60908889770508,
"logps/ref_rejected": -74.06394958496094,
"logps/rejected": -230.0135040283203,
"loss": 1.2204,
"margin_dpo/margin_mean": 38.3262939453125,
"margin_dpo/margin_std": 83.02953338623047,
"step": 310
},
{
"epoch": 0.47619047619047616,
"fcm_dpo/beta": 0.006899350322782993,
"fcm_dpo/delta": -0.015122666954994202,
"fcm_dpo/margin": 49.54688262939453,
"fcm_dpo/q_t": 0.4215819239616394,
"grad_norm": 12.57418441772461,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": 0.6818271279335022,
"logits/rejected": 0.6326395869255066,
"logps/chosen": -168.5291748046875,
"logps/ref_chosen": -62.31493377685547,
"logps/ref_rejected": -75.07472229003906,
"logps/rejected": -230.8358612060547,
"loss": 1.1467,
"margin_dpo/margin_mean": 49.54688262939453,
"margin_dpo/margin_std": 79.58061981201172,
"step": 315
},
{
"epoch": 0.4837490551776266,
"fcm_dpo/beta": 0.00665700901299715,
"fcm_dpo/delta": -0.0466286763548851,
"fcm_dpo/margin": 58.286048889160156,
"fcm_dpo/q_t": 0.41121044754981995,
"grad_norm": 11.143263816833496,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.6711269617080688,
"logits/rejected": 0.6105794310569763,
"logps/chosen": -152.8118133544922,
"logps/ref_chosen": -55.336036682128906,
"logps/ref_rejected": -80.05536651611328,
"logps/rejected": -235.81716918945312,
"loss": 1.1163,
"margin_dpo/margin_mean": 58.286048889160156,
"margin_dpo/margin_std": 85.82135009765625,
"step": 320
},
{
"epoch": 0.491307634164777,
"fcm_dpo/beta": 0.006425836123526096,
"fcm_dpo/delta": -0.025686081498861313,
"fcm_dpo/margin": 57.461631774902344,
"fcm_dpo/q_t": 0.4144781231880188,
"grad_norm": 10.588150978088379,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": 0.6904675364494324,
"logits/rejected": 0.63264399766922,
"logps/chosen": -163.8726043701172,
"logps/ref_chosen": -57.90629959106445,
"logps/ref_rejected": -74.2243881225586,
"logps/rejected": -237.65234375,
"loss": 1.1236,
"margin_dpo/margin_mean": 57.461631774902344,
"margin_dpo/margin_std": 84.99284362792969,
"step": 325
},
{
"epoch": 0.4988662131519274,
"fcm_dpo/beta": 0.006382169667631388,
"fcm_dpo/delta": -0.0038101542741060257,
"fcm_dpo/margin": 48.41600799560547,
"fcm_dpo/q_t": 0.42902547121047974,
"grad_norm": 11.327309608459473,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 0.6518532037734985,
"logits/rejected": 0.615720272064209,
"logps/chosen": -183.9138641357422,
"logps/ref_chosen": -65.17555236816406,
"logps/ref_rejected": -78.53681182861328,
"logps/rejected": -245.69113159179688,
"loss": 1.1791,
"margin_dpo/margin_mean": 48.41600799560547,
"margin_dpo/margin_std": 88.16541290283203,
"step": 330
},
{
"epoch": 0.5064247921390779,
"fcm_dpo/beta": 0.0062421816401183605,
"fcm_dpo/delta": -0.024326926097273827,
"fcm_dpo/margin": 60.58441162109375,
"fcm_dpo/q_t": 0.4129098057746887,
"grad_norm": 14.136266708374023,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": 0.7140273451805115,
"logits/rejected": 0.6678114533424377,
"logps/chosen": -181.29025268554688,
"logps/ref_chosen": -62.62797927856445,
"logps/ref_rejected": -79.9095458984375,
"logps/rejected": -259.15618896484375,
"loss": 1.116,
"margin_dpo/margin_mean": 60.58441162109375,
"margin_dpo/margin_std": 88.1111831665039,
"step": 335
},
{
"epoch": 0.5139833711262283,
"fcm_dpo/beta": 0.006153796333819628,
"fcm_dpo/delta": -0.008682211861014366,
"fcm_dpo/margin": 52.959136962890625,
"fcm_dpo/q_t": 0.4254566729068756,
"grad_norm": 11.051826477050781,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 0.6959365010261536,
"logits/rejected": 0.6593111753463745,
"logps/chosen": -193.18997192382812,
"logps/ref_chosen": -61.1064567565918,
"logps/ref_rejected": -76.71846008300781,
"logps/rejected": -261.7611083984375,
"loss": 1.1653,
"margin_dpo/margin_mean": 52.959144592285156,
"margin_dpo/margin_std": 92.80432891845703,
"step": 340
},
{
"epoch": 0.5215419501133787,
"fcm_dpo/beta": 0.00603325804695487,
"fcm_dpo/delta": -0.01978192664682865,
"fcm_dpo/margin": 59.29703903198242,
"fcm_dpo/q_t": 0.41810742020606995,
"grad_norm": 11.418231010437012,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": 0.6984354853630066,
"logits/rejected": 0.6573707461357117,
"logps/chosen": -188.5522003173828,
"logps/ref_chosen": -60.12370681762695,
"logps/ref_rejected": -78.58574676513672,
"logps/rejected": -266.3112487792969,
"loss": 1.1408,
"margin_dpo/margin_mean": 59.29703903198242,
"margin_dpo/margin_std": 95.01673889160156,
"step": 345
},
{
"epoch": 0.5291005291005291,
"fcm_dpo/beta": 0.005945051088929176,
"fcm_dpo/delta": -0.04099785163998604,
"fcm_dpo/margin": 68.73493957519531,
"fcm_dpo/q_t": 0.40639615058898926,
"grad_norm": 13.632902145385742,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 0.7776240706443787,
"logits/rejected": 0.7138788104057312,
"logps/chosen": -179.670166015625,
"logps/ref_chosen": -55.104461669921875,
"logps/ref_rejected": -80.63292694091797,
"logps/rejected": -273.9335632324219,
"loss": 1.0955,
"margin_dpo/margin_mean": 68.73492431640625,
"margin_dpo/margin_std": 94.61377716064453,
"step": 350
},
{
"epoch": 0.5366591080876795,
"fcm_dpo/beta": 0.005738373845815659,
"fcm_dpo/delta": -0.02628186345100403,
"fcm_dpo/margin": 70.14204406738281,
"fcm_dpo/q_t": 0.4074961543083191,
"grad_norm": 11.920957565307617,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": 0.7566885352134705,
"logits/rejected": 0.6853598356246948,
"logps/chosen": -179.594970703125,
"logps/ref_chosen": -54.87224197387695,
"logps/ref_rejected": -77.01316833496094,
"logps/rejected": -271.8779602050781,
"loss": 1.1034,
"margin_dpo/margin_mean": 70.14204406738281,
"margin_dpo/margin_std": 100.20321655273438,
"step": 355
},
{
"epoch": 0.54421768707483,
"fcm_dpo/beta": 0.005551532376557589,
"fcm_dpo/delta": -0.027073120698332787,
"fcm_dpo/margin": 58.166900634765625,
"fcm_dpo/q_t": 0.42612725496292114,
"grad_norm": 11.87866497039795,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 0.7266971468925476,
"logits/rejected": 0.6876090168952942,
"logps/chosen": -207.0313262939453,
"logps/ref_chosen": -60.75285720825195,
"logps/ref_rejected": -75.21507263183594,
"logps/rejected": -279.6604309082031,
"loss": 1.1756,
"margin_dpo/margin_mean": 58.166900634765625,
"margin_dpo/margin_std": 105.21885681152344,
"step": 360
},
{
"epoch": 0.5517762660619804,
"fcm_dpo/beta": 0.0053816549479961395,
"fcm_dpo/delta": -0.03808388113975525,
"fcm_dpo/margin": 80.42271423339844,
"fcm_dpo/q_t": 0.4026545584201813,
"grad_norm": 10.223109245300293,
"learning_rate": 2.5e-07,
"logits/chosen": 0.7709358930587769,
"logits/rejected": 0.7040495872497559,
"logps/chosen": -198.53555297851562,
"logps/ref_chosen": -58.56513595581055,
"logps/ref_rejected": -84.06403350830078,
"logps/rejected": -304.4571533203125,
"loss": 1.0842,
"margin_dpo/margin_mean": 80.42271423339844,
"margin_dpo/margin_std": 112.6312026977539,
"step": 365
},
{
"epoch": 0.5593348450491308,
"fcm_dpo/beta": 0.005276652052998543,
"fcm_dpo/delta": -0.007817991077899933,
"fcm_dpo/margin": 64.3998031616211,
"fcm_dpo/q_t": 0.4214501976966858,
"grad_norm": 12.37844181060791,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 0.7387269735336304,
"logits/rejected": 0.6979778409004211,
"logps/chosen": -199.47242736816406,
"logps/ref_chosen": -59.443138122558594,
"logps/ref_rejected": -75.80937194824219,
"logps/rejected": -280.23846435546875,
"loss": 1.1548,
"margin_dpo/margin_mean": 64.3998031616211,
"margin_dpo/margin_std": 108.75566101074219,
"step": 370
},
{
"epoch": 0.5668934240362812,
"fcm_dpo/beta": 0.005195076577365398,
"fcm_dpo/delta": -0.016707830131053925,
"fcm_dpo/margin": 63.93414306640625,
"fcm_dpo/q_t": 0.4242404103279114,
"grad_norm": 14.827937126159668,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": 0.7876321077346802,
"logits/rejected": 0.7421884536743164,
"logps/chosen": -198.78579711914062,
"logps/ref_chosen": -58.59185028076172,
"logps/ref_rejected": -73.7529525756836,
"logps/rejected": -277.88104248046875,
"loss": 1.1759,
"margin_dpo/margin_mean": 63.93414306640625,
"margin_dpo/margin_std": 119.758544921875,
"step": 375
},
{
"epoch": 0.5744520030234316,
"fcm_dpo/beta": 0.005114838946610689,
"fcm_dpo/delta": -0.013521865010261536,
"fcm_dpo/margin": 71.66112518310547,
"fcm_dpo/q_t": 0.41665583848953247,
"grad_norm": 12.766351699829102,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 0.7841566801071167,
"logits/rejected": 0.7393085956573486,
"logps/chosen": -195.7206573486328,
"logps/ref_chosen": -58.93424606323242,
"logps/ref_rejected": -76.27055358886719,
"logps/rejected": -284.71807861328125,
"loss": 1.1354,
"margin_dpo/margin_mean": 71.66112518310547,
"margin_dpo/margin_std": 114.41337585449219,
"step": 380
},
{
"epoch": 0.582010582010582,
"fcm_dpo/beta": 0.0050823139026761055,
"fcm_dpo/delta": -0.010413008742034435,
"fcm_dpo/margin": 60.48808670043945,
"fcm_dpo/q_t": 0.4290579855442047,
"grad_norm": 9.910415649414062,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": 0.7422327399253845,
"logits/rejected": 0.723129153251648,
"logps/chosen": -211.396728515625,
"logps/ref_chosen": -66.42684173583984,
"logps/ref_rejected": -76.96304321289062,
"logps/rejected": -282.4209899902344,
"loss": 1.1822,
"margin_dpo/margin_mean": 60.48808670043945,
"margin_dpo/margin_std": 111.88542175292969,
"step": 385
},
{
"epoch": 0.5895691609977324,
"fcm_dpo/beta": 0.005034881643950939,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 65.50511169433594,
"fcm_dpo/q_t": 0.42413240671157837,
"grad_norm": 11.989714622497559,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 0.7661797404289246,
"logits/rejected": 0.7171558141708374,
"logps/chosen": -205.19943237304688,
"logps/ref_chosen": -60.984214782714844,
"logps/ref_rejected": -79.54056549072266,
"logps/rejected": -289.26092529296875,
"loss": 1.1583,
"margin_dpo/margin_mean": 65.50511169433594,
"margin_dpo/margin_std": 110.4152603149414,
"step": 390
},
{
"epoch": 0.5971277399848829,
"fcm_dpo/beta": 0.0049595460295677185,
"fcm_dpo/delta": -0.023463377729058266,
"fcm_dpo/margin": 75.90830993652344,
"fcm_dpo/q_t": 0.4133135676383972,
"grad_norm": 11.980950355529785,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": 0.7978567481040955,
"logits/rejected": 0.7469469308853149,
"logps/chosen": -197.7019805908203,
"logps/ref_chosen": -58.30937957763672,
"logps/ref_rejected": -80.09587097167969,
"logps/rejected": -295.39678955078125,
"loss": 1.1188,
"margin_dpo/margin_mean": 75.90830993652344,
"margin_dpo/margin_std": 111.34185791015625,
"step": 395
},
{
"epoch": 0.6046863189720333,
"fcm_dpo/beta": 0.004786391276866198,
"fcm_dpo/delta": -0.049943797290325165,
"fcm_dpo/margin": 75.52302551269531,
"fcm_dpo/q_t": 0.41768354177474976,
"grad_norm": 14.266228675842285,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 0.7723232507705688,
"logits/rejected": 0.6996694207191467,
"logps/chosen": -212.7456512451172,
"logps/ref_chosen": -61.39867401123047,
"logps/ref_rejected": -89.0177993774414,
"logps/rejected": -315.8878173828125,
"loss": 1.134,
"margin_dpo/margin_mean": 75.52302551269531,
"margin_dpo/margin_std": 114.94172668457031,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_fcm_dpo/beta": 0.004678524564951658,
"eval_logits/chosen": 0.7730867862701416,
"eval_logits/rejected": 0.7383347749710083,
"eval_logps/chosen": -231.1050567626953,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -297.61297607421875,
"eval_loss": 0.5988211035728455,
"eval_margin_dpo/margin_mean": 61.81840133666992,
"eval_margin_dpo/margin_std": 122.34429931640625,
"eval_runtime": 39.0005,
"eval_samples_per_second": 59.051,
"eval_steps_per_second": 1.846,
"step": 400
},
{
"epoch": 0.6122448979591837,
"fcm_dpo/beta": 0.004617620259523392,
"fcm_dpo/delta": -0.021043911576271057,
"fcm_dpo/margin": 78.88172149658203,
"fcm_dpo/q_t": 0.41677242517471313,
"grad_norm": 9.603548049926758,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": 0.8381478190422058,
"logits/rejected": 0.7813048958778381,
"logps/chosen": -199.7872772216797,
"logps/ref_chosen": -55.953521728515625,
"logps/ref_rejected": -77.67539978027344,
"logps/rejected": -300.390869140625,
"loss": 1.1288,
"margin_dpo/margin_mean": 78.88172149658203,
"margin_dpo/margin_std": 118.5303726196289,
"step": 405
},
{
"epoch": 0.6198034769463341,
"fcm_dpo/beta": 0.0044863419607281685,
"fcm_dpo/delta": -0.03810073807835579,
"fcm_dpo/margin": 80.86897277832031,
"fcm_dpo/q_t": 0.4153751730918884,
"grad_norm": 10.528109550476074,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 0.800028920173645,
"logits/rejected": 0.7490849494934082,
"logps/chosen": -218.7476043701172,
"logps/ref_chosen": -63.40419387817383,
"logps/ref_rejected": -80.85710144042969,
"logps/rejected": -317.06951904296875,
"loss": 1.1232,
"margin_dpo/margin_mean": 80.86897277832031,
"margin_dpo/margin_std": 114.73890686035156,
"step": 410
},
{
"epoch": 0.6273620559334845,
"fcm_dpo/beta": 0.004409838933497667,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 74.53548431396484,
"fcm_dpo/q_t": 0.42364954948425293,
"grad_norm": 9.875075340270996,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": 0.8113610148429871,
"logits/rejected": 0.7791782021522522,
"logps/chosen": -228.55026245117188,
"logps/ref_chosen": -57.6942024230957,
"logps/ref_rejected": -71.74036407470703,
"logps/rejected": -317.13189697265625,
"loss": 1.1495,
"margin_dpo/margin_mean": 74.53548431396484,
"margin_dpo/margin_std": 117.6064682006836,
"step": 415
},
{
"epoch": 0.6349206349206349,
"fcm_dpo/beta": 0.004353252239525318,
"fcm_dpo/delta": -0.016652632504701614,
"fcm_dpo/margin": 68.00981140136719,
"fcm_dpo/q_t": 0.4319809377193451,
"grad_norm": 12.201674461364746,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 0.8484581708908081,
"logits/rejected": 0.819144606590271,
"logps/chosen": -241.6039581298828,
"logps/ref_chosen": -59.169517517089844,
"logps/ref_rejected": -69.47721099853516,
"logps/rejected": -319.92144775390625,
"loss": 1.1926,
"margin_dpo/margin_mean": 68.00981140136719,
"margin_dpo/margin_std": 129.67337036132812,
"step": 420
},
{
"epoch": 0.6424792139077853,
"fcm_dpo/beta": 0.00428891833871603,
"fcm_dpo/delta": -0.021387049928307533,
"fcm_dpo/margin": 82.73738861083984,
"fcm_dpo/q_t": 0.4183295667171478,
"grad_norm": 9.915822982788086,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": 0.8384604454040527,
"logits/rejected": 0.809127151966095,
"logps/chosen": -226.8582763671875,
"logps/ref_chosen": -58.09320831298828,
"logps/ref_rejected": -73.98226165771484,
"logps/rejected": -325.4847412109375,
"loss": 1.1363,
"margin_dpo/margin_mean": 82.73738861083984,
"margin_dpo/margin_std": 128.06600952148438,
"step": 425
},
{
"epoch": 0.6500377928949358,
"fcm_dpo/beta": 0.004245240706950426,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 70.30382537841797,
"fcm_dpo/q_t": 0.4308013319969177,
"grad_norm": 11.620451927185059,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 0.8153077363967896,
"logits/rejected": 0.803315281867981,
"logps/chosen": -252.0271759033203,
"logps/ref_chosen": -62.7039909362793,
"logps/ref_rejected": -74.52284240722656,
"logps/rejected": -334.14984130859375,
"loss": 1.1889,
"margin_dpo/margin_mean": 70.30382537841797,
"margin_dpo/margin_std": 133.47201538085938,
"step": 430
},
{
"epoch": 0.6575963718820862,
"fcm_dpo/beta": 0.004219419322907925,
"fcm_dpo/delta": -0.015322555787861347,
"fcm_dpo/margin": 82.38020324707031,
"fcm_dpo/q_t": 0.41900143027305603,
"grad_norm": 11.40176773071289,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": 0.8734161257743835,
"logits/rejected": 0.8401018977165222,
"logps/chosen": -224.8971710205078,
"logps/ref_chosen": -56.12516403198242,
"logps/ref_rejected": -74.36073303222656,
"logps/rejected": -325.5129089355469,
"loss": 1.1412,
"margin_dpo/margin_mean": 82.38020324707031,
"margin_dpo/margin_std": 129.3118133544922,
"step": 435
},
{
"epoch": 0.6651549508692366,
"fcm_dpo/beta": 0.0041299303993582726,
"fcm_dpo/delta": -0.023912524804472923,
"fcm_dpo/margin": 82.6595230102539,
"fcm_dpo/q_t": 0.4219323992729187,
"grad_norm": 13.837409019470215,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 0.835965633392334,
"logits/rejected": 0.7754732966423035,
"logps/chosen": -231.00851440429688,
"logps/ref_chosen": -55.67548751831055,
"logps/ref_rejected": -76.62055206298828,
"logps/rejected": -334.6131286621094,
"loss": 1.1507,
"margin_dpo/margin_mean": 82.65951538085938,
"margin_dpo/margin_std": 135.90968322753906,
"step": 440
},
{
"epoch": 0.672713529856387,
"fcm_dpo/beta": 0.004010509233921766,
"fcm_dpo/delta": -0.024360598996281624,
"fcm_dpo/margin": 94.17513275146484,
"fcm_dpo/q_t": 0.41288742423057556,
"grad_norm": 9.381706237792969,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": 0.8353230357170105,
"logits/rejected": 0.7917869091033936,
"logps/chosen": -241.6427001953125,
"logps/ref_chosen": -59.903411865234375,
"logps/ref_rejected": -82.02873229980469,
"logps/rejected": -357.94317626953125,
"loss": 1.111,
"margin_dpo/margin_mean": 94.17513275146484,
"margin_dpo/margin_std": 131.37210083007812,
"step": 445
},
{
"epoch": 0.6802721088435374,
"fcm_dpo/beta": 0.003899561706930399,
"fcm_dpo/delta": -0.024923671036958694,
"fcm_dpo/margin": 86.66099548339844,
"fcm_dpo/q_t": 0.4215773642063141,
"grad_norm": 10.855829238891602,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 0.8515201807022095,
"logits/rejected": 0.7979717254638672,
"logps/chosen": -237.58364868164062,
"logps/ref_chosen": -55.83526611328125,
"logps/ref_rejected": -79.63658142089844,
"logps/rejected": -348.04595947265625,
"loss": 1.1448,
"margin_dpo/margin_mean": 86.66099548339844,
"margin_dpo/margin_std": 133.32977294921875,
"step": 450
},
{
"epoch": 0.6878306878306878,
"fcm_dpo/beta": 0.0038856077007949352,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 78.07716369628906,
"fcm_dpo/q_t": 0.4294430613517761,
"grad_norm": 11.119799613952637,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": 0.8199604153633118,
"logits/rejected": 0.7901821136474609,
"logps/chosen": -240.78634643554688,
"logps/ref_chosen": -60.59226608276367,
"logps/ref_rejected": -73.37936401367188,
"logps/rejected": -331.6506042480469,
"loss": 1.1685,
"margin_dpo/margin_mean": 78.07716369628906,
"margin_dpo/margin_std": 132.1455535888672,
"step": 455
},
{
"epoch": 0.6953892668178382,
"fcm_dpo/beta": 0.0038638408295810223,
"fcm_dpo/delta": -0.020884912461042404,
"fcm_dpo/margin": 93.4942855834961,
"fcm_dpo/q_t": 0.41626229882240295,
"grad_norm": 9.670624732971191,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 0.8717799186706543,
"logits/rejected": 0.808576226234436,
"logps/chosen": -236.9020538330078,
"logps/ref_chosen": -56.21283721923828,
"logps/ref_rejected": -83.02075958251953,
"logps/rejected": -357.20428466796875,
"loss": 1.1265,
"margin_dpo/margin_mean": 93.4942855834961,
"margin_dpo/margin_std": 139.18475341796875,
"step": 460
},
{
"epoch": 0.7029478458049887,
"fcm_dpo/beta": 0.0037416815757751465,
"fcm_dpo/delta": -0.01730378530919552,
"fcm_dpo/margin": 90.57196044921875,
"fcm_dpo/q_t": 0.4205476641654968,
"grad_norm": 11.144495964050293,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": 0.8878629803657532,
"logits/rejected": 0.8561855554580688,
"logps/chosen": -241.06063842773438,
"logps/ref_chosen": -59.0674934387207,
"logps/ref_rejected": -74.53498840332031,
"logps/rejected": -347.10009765625,
"loss": 1.1409,
"margin_dpo/margin_mean": 90.57195281982422,
"margin_dpo/margin_std": 135.90689086914062,
"step": 465
},
{
"epoch": 0.7105064247921391,
"fcm_dpo/beta": 0.0037400186993181705,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 81.56043243408203,
"fcm_dpo/q_t": 0.4288889467716217,
"grad_norm": 10.959892272949219,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 0.8900365829467773,
"logits/rejected": 0.856399655342102,
"logps/chosen": -243.0541229248047,
"logps/ref_chosen": -58.3397331237793,
"logps/ref_rejected": -74.33660125732422,
"logps/rejected": -340.6114501953125,
"loss": 1.1733,
"margin_dpo/margin_mean": 81.56043243408203,
"margin_dpo/margin_std": 142.09490966796875,
"step": 470
},
{
"epoch": 0.7180650037792895,
"fcm_dpo/beta": 0.0037331648636609316,
"fcm_dpo/delta": -0.00920518022030592,
"fcm_dpo/margin": 99.83604431152344,
"fcm_dpo/q_t": 0.4153580069541931,
"grad_norm": 7.998687267303467,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": 0.8998042345046997,
"logits/rejected": 0.8499566912651062,
"logps/chosen": -225.0286102294922,
"logps/ref_chosen": -54.60407638549805,
"logps/ref_rejected": -79.94635009765625,
"logps/rejected": -350.2068786621094,
"loss": 1.1275,
"margin_dpo/margin_mean": 99.83604431152344,
"margin_dpo/margin_std": 155.26611328125,
"step": 475
},
{
"epoch": 0.7256235827664399,
"fcm_dpo/beta": 0.003702650312334299,
"fcm_dpo/delta": -0.008397220633924007,
"fcm_dpo/margin": 73.63870239257812,
"fcm_dpo/q_t": 0.43734756112098694,
"grad_norm": 14.331131935119629,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 0.853714108467102,
"logits/rejected": 0.8454058766365051,
"logps/chosen": -257.2554016113281,
"logps/ref_chosen": -63.0672492980957,
"logps/ref_rejected": -68.59602355957031,
"logps/rejected": -336.4228820800781,
"loss": 1.2098,
"margin_dpo/margin_mean": 73.63871002197266,
"margin_dpo/margin_std": 151.20285034179688,
"step": 480
},
{
"epoch": 0.7331821617535903,
"fcm_dpo/beta": 0.0036670055706053972,
"fcm_dpo/delta": -0.004230070393532515,
"fcm_dpo/margin": 85.2213363647461,
"fcm_dpo/q_t": 0.4283173978328705,
"grad_norm": 11.36840534210205,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": 0.8761838674545288,
"logits/rejected": 0.8311912417411804,
"logps/chosen": -251.0254669189453,
"logps/ref_chosen": -58.75799560546875,
"logps/ref_rejected": -79.72233581542969,
"logps/rejected": -357.21112060546875,
"loss": 1.1635,
"margin_dpo/margin_mean": 85.2213363647461,
"margin_dpo/margin_std": 141.01046752929688,
"step": 485
},
{
"epoch": 0.7407407407407407,
"fcm_dpo/beta": 0.003649166552349925,
"fcm_dpo/delta": -0.005526586435735226,
"fcm_dpo/margin": 79.44273376464844,
"fcm_dpo/q_t": 0.4346071183681488,
"grad_norm": 9.696874618530273,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 0.9166892170906067,
"logits/rejected": 0.8767145872116089,
"logps/chosen": -262.38348388671875,
"logps/ref_chosen": -61.394195556640625,
"logps/ref_rejected": -81.1914291381836,
"logps/rejected": -361.6234436035156,
"loss": 1.1984,
"margin_dpo/margin_mean": 79.44273376464844,
"margin_dpo/margin_std": 160.96810913085938,
"step": 490
},
{
"epoch": 0.7482993197278912,
"fcm_dpo/beta": 0.0036221942864358425,
"fcm_dpo/delta": -0.009324881248176098,
"fcm_dpo/margin": 80.0656967163086,
"fcm_dpo/q_t": 0.43500107526779175,
"grad_norm": 14.121245384216309,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": 0.8733257055282593,
"logits/rejected": 0.8286750912666321,
"logps/chosen": -253.50320434570312,
"logps/ref_chosen": -59.85382843017578,
"logps/ref_rejected": -80.63748931884766,
"logps/rejected": -354.3525695800781,
"loss": 1.1983,
"margin_dpo/margin_mean": 80.06568908691406,
"margin_dpo/margin_std": 160.94277954101562,
"step": 495
},
{
"epoch": 0.7558578987150416,
"fcm_dpo/beta": 0.0036053061485290527,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 94.49732971191406,
"fcm_dpo/q_t": 0.4224638342857361,
"grad_norm": 11.681142807006836,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.8845356106758118,
"logits/rejected": 0.8533564805984497,
"logps/chosen": -260.6972351074219,
"logps/ref_chosen": -66.17753601074219,
"logps/ref_rejected": -83.75955200195312,
"logps/rejected": -372.7765808105469,
"loss": 1.1603,
"margin_dpo/margin_mean": 94.49732971191406,
"margin_dpo/margin_std": 167.73483276367188,
"step": 500
},
{
"epoch": 0.763416477702192,
"fcm_dpo/beta": 0.0035957619547843933,
"fcm_dpo/delta": -0.008863715454936028,
"fcm_dpo/margin": 81.07406616210938,
"fcm_dpo/q_t": 0.431431382894516,
"grad_norm": 11.598374366760254,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": 0.8734153509140015,
"logits/rejected": 0.8628484010696411,
"logps/chosen": -250.24264526367188,
"logps/ref_chosen": -62.11005401611328,
"logps/ref_rejected": -74.64705657958984,
"logps/rejected": -343.85369873046875,
"loss": 1.1724,
"margin_dpo/margin_mean": 81.07406616210938,
"margin_dpo/margin_std": 132.01162719726562,
"step": 505
},
{
"epoch": 0.7709750566893424,
"fcm_dpo/beta": 0.0035542245022952557,
"fcm_dpo/delta": -0.010841513983905315,
"fcm_dpo/margin": 93.92044067382812,
"fcm_dpo/q_t": 0.42539000511169434,
"grad_norm": 10.781188011169434,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 0.8776324391365051,
"logits/rejected": 0.8372354507446289,
"logps/chosen": -260.9776916503906,
"logps/ref_chosen": -64.42265319824219,
"logps/ref_rejected": -87.00096130371094,
"logps/rejected": -377.47650146484375,
"loss": 1.1734,
"margin_dpo/margin_mean": 93.92044830322266,
"margin_dpo/margin_std": 176.3047637939453,
"step": 510
},
{
"epoch": 0.7785336356764928,
"fcm_dpo/beta": 0.0035349582321941853,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 90.1122055053711,
"fcm_dpo/q_t": 0.42532816529273987,
"grad_norm": 9.599004745483398,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": 0.9000862240791321,
"logits/rejected": 0.8689319491386414,
"logps/chosen": -247.7484893798828,
"logps/ref_chosen": -58.284393310546875,
"logps/ref_rejected": -79.09356689453125,
"logps/rejected": -358.6697998046875,
"loss": 1.1575,
"margin_dpo/margin_mean": 90.11221313476562,
"margin_dpo/margin_std": 144.56326293945312,
"step": 515
},
{
"epoch": 0.7860922146636432,
"fcm_dpo/beta": 0.0035349582321941853,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 71.46282958984375,
"fcm_dpo/q_t": 0.4412451684474945,
"grad_norm": 13.013392448425293,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 0.9094135165214539,
"logits/rejected": 0.8865984678268433,
"logps/chosen": -254.99520874023438,
"logps/ref_chosen": -61.03638458251953,
"logps/ref_rejected": -72.15824890136719,
"logps/rejected": -337.57989501953125,
"loss": 1.2161,
"margin_dpo/margin_mean": 71.46283721923828,
"margin_dpo/margin_std": 148.10073852539062,
"step": 520
},
{
"epoch": 0.7936507936507936,
"fcm_dpo/beta": 0.0035349582321941853,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 81.25996398925781,
"fcm_dpo/q_t": 0.43352437019348145,
"grad_norm": 13.817595481872559,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": 0.8353425860404968,
"logits/rejected": 0.804462730884552,
"logps/chosen": -264.1961669921875,
"logps/ref_chosen": -68.02732849121094,
"logps/ref_rejected": -85.41429901123047,
"logps/rejected": -362.8431396484375,
"loss": 1.1911,
"margin_dpo/margin_mean": 81.25996398925781,
"margin_dpo/margin_std": 154.6202850341797,
"step": 525
},
{
"epoch": 0.8012093726379441,
"fcm_dpo/beta": 0.0035310834646224976,
"fcm_dpo/delta": -0.003660431830212474,
"fcm_dpo/margin": 77.5444564819336,
"fcm_dpo/q_t": 0.4368818402290344,
"grad_norm": 12.4998779296875,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.8905463218688965,
"logits/rejected": 0.8446179628372192,
"logps/chosen": -255.26919555664062,
"logps/ref_chosen": -58.67436599731445,
"logps/ref_rejected": -79.38807678222656,
"logps/rejected": -353.52734375,
"loss": 1.1992,
"margin_dpo/margin_mean": 77.5444564819336,
"margin_dpo/margin_std": 147.32528686523438,
"step": 530
},
{
"epoch": 0.8087679516250945,
"fcm_dpo/beta": 0.003522042650729418,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 82.69160461425781,
"fcm_dpo/q_t": 0.4328169822692871,
"grad_norm": 11.253222465515137,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": 0.8829957842826843,
"logits/rejected": 0.842154324054718,
"logps/chosen": -248.08706665039062,
"logps/ref_chosen": -57.640098571777344,
"logps/ref_rejected": -77.25399780273438,
"logps/rejected": -350.39251708984375,
"loss": 1.1841,
"margin_dpo/margin_mean": 82.69161224365234,
"margin_dpo/margin_std": 150.93914794921875,
"step": 535
},
{
"epoch": 0.8163265306122449,
"fcm_dpo/beta": 0.0035197685938328505,
"fcm_dpo/delta": -0.006476428359746933,
"fcm_dpo/margin": 101.35395050048828,
"fcm_dpo/q_t": 0.4183521270751953,
"grad_norm": 9.733626365661621,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 0.9086763262748718,
"logits/rejected": 0.8577451705932617,
"logps/chosen": -256.18011474609375,
"logps/ref_chosen": -60.17341995239258,
"logps/ref_rejected": -85.50316619873047,
"logps/rejected": -382.8638000488281,
"loss": 1.1288,
"margin_dpo/margin_mean": 101.35395050048828,
"margin_dpo/margin_std": 152.10256958007812,
"step": 540
},
{
"epoch": 0.8238851095993953,
"fcm_dpo/beta": 0.00349930627271533,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 94.39086151123047,
"fcm_dpo/q_t": 0.4234938621520996,
"grad_norm": 12.908324241638184,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": 0.917340874671936,
"logits/rejected": 0.8850774765014648,
"logps/chosen": -242.275390625,
"logps/ref_chosen": -56.985809326171875,
"logps/ref_rejected": -73.21353912353516,
"logps/rejected": -352.8939514160156,
"loss": 1.1519,
"margin_dpo/margin_mean": 94.39086151123047,
"margin_dpo/margin_std": 151.55343627929688,
"step": 545
},
{
"epoch": 0.8314436885865457,
"fcm_dpo/beta": 0.0034850898664444685,
"fcm_dpo/delta": -0.00509117916226387,
"fcm_dpo/margin": 76.22270202636719,
"fcm_dpo/q_t": 0.4382683336734772,
"grad_norm": 11.022492408752441,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 0.912948489189148,
"logits/rejected": 0.8859111666679382,
"logps/chosen": -255.4915008544922,
"logps/ref_chosen": -59.600929260253906,
"logps/ref_rejected": -75.24870300292969,
"logps/rejected": -347.3619689941406,
"loss": 1.2114,
"margin_dpo/margin_mean": 76.22270202636719,
"margin_dpo/margin_std": 156.75267028808594,
"step": 550
},
{
"epoch": 0.8390022675736961,
"fcm_dpo/beta": 0.003481535706669092,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 83.40065002441406,
"fcm_dpo/q_t": 0.43253573775291443,
"grad_norm": 11.873709678649902,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": 0.9214051365852356,
"logits/rejected": 0.8947726488113403,
"logps/chosen": -266.4524841308594,
"logps/ref_chosen": -63.578895568847656,
"logps/ref_rejected": -78.87867736816406,
"logps/rejected": -365.1529235839844,
"loss": 1.1841,
"margin_dpo/margin_mean": 83.4006576538086,
"margin_dpo/margin_std": 151.18142700195312,
"step": 555
},
{
"epoch": 0.8465608465608465,
"fcm_dpo/beta": 0.003460574196651578,
"fcm_dpo/delta": -0.009147383272647858,
"fcm_dpo/margin": 102.399658203125,
"fcm_dpo/q_t": 0.4190526604652405,
"grad_norm": 12.284520149230957,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 0.9413592219352722,
"logits/rejected": 0.9070916175842285,
"logps/chosen": -256.690673828125,
"logps/ref_chosen": -58.651512145996094,
"logps/ref_rejected": -78.67181396484375,
"logps/rejected": -379.1106262207031,
"loss": 1.1302,
"margin_dpo/margin_mean": 102.399658203125,
"margin_dpo/margin_std": 151.68862915039062,
"step": 560
},
{
"epoch": 0.854119425547997,
"fcm_dpo/beta": 0.0034138336777687073,
"fcm_dpo/delta": -0.013129929080605507,
"fcm_dpo/margin": 100.43526458740234,
"fcm_dpo/q_t": 0.4204806387424469,
"grad_norm": 12.020137786865234,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": 0.9278671145439148,
"logits/rejected": 0.9034161567687988,
"logps/chosen": -253.25778198242188,
"logps/ref_chosen": -60.3114128112793,
"logps/ref_rejected": -78.25270080566406,
"logps/rejected": -371.63433837890625,
"loss": 1.1388,
"margin_dpo/margin_mean": 100.43526458740234,
"margin_dpo/margin_std": 153.38381958007812,
"step": 565
},
{
"epoch": 0.8616780045351474,
"fcm_dpo/beta": 0.0033486653119325638,
"fcm_dpo/delta": -0.02772103250026703,
"fcm_dpo/margin": 99.43392944335938,
"fcm_dpo/q_t": 0.42399096488952637,
"grad_norm": 12.34563159942627,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 0.9331613779067993,
"logits/rejected": 0.8943105936050415,
"logps/chosen": -249.2666473388672,
"logps/ref_chosen": -57.752410888671875,
"logps/ref_rejected": -76.99858093261719,
"logps/rejected": -367.94671630859375,
"loss": 1.1578,
"margin_dpo/margin_mean": 99.43392944335938,
"margin_dpo/margin_std": 166.1993865966797,
"step": 570
},
{
"epoch": 0.8692365835222978,
"fcm_dpo/beta": 0.0033004791475832462,
"fcm_dpo/delta": -0.011403532698750496,
"fcm_dpo/margin": 86.78084564208984,
"fcm_dpo/q_t": 0.4353984296321869,
"grad_norm": 11.008131980895996,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": 0.9364798665046692,
"logits/rejected": 0.9186896085739136,
"logps/chosen": -265.30657958984375,
"logps/ref_chosen": -63.61958694458008,
"logps/ref_rejected": -79.51353454589844,
"logps/rejected": -367.98138427734375,
"loss": 1.1951,
"margin_dpo/margin_mean": 86.78085327148438,
"margin_dpo/margin_std": 166.95652770996094,
"step": 575
},
{
"epoch": 0.8767951625094482,
"fcm_dpo/beta": 0.0032326322980225086,
"fcm_dpo/delta": -0.012774638831615448,
"fcm_dpo/margin": 98.78877258300781,
"fcm_dpo/q_t": 0.42647188901901245,
"grad_norm": 12.614327430725098,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 0.9242687225341797,
"logits/rejected": 0.9064335823059082,
"logps/chosen": -247.09750366210938,
"logps/ref_chosen": -57.3541145324707,
"logps/ref_rejected": -73.14434051513672,
"logps/rejected": -361.676513671875,
"loss": 1.1637,
"margin_dpo/margin_mean": 98.78877258300781,
"margin_dpo/margin_std": 166.97918701171875,
"step": 580
},
{
"epoch": 0.8843537414965986,
"fcm_dpo/beta": 0.0032326322980225086,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 91.58312225341797,
"fcm_dpo/q_t": 0.43279165029525757,
"grad_norm": 11.919511795043945,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": 0.9634637832641602,
"logits/rejected": 0.9281566739082336,
"logps/chosen": -253.19546508789062,
"logps/ref_chosen": -56.0127067565918,
"logps/ref_rejected": -77.16522216796875,
"logps/rejected": -365.93109130859375,
"loss": 1.1847,
"margin_dpo/margin_mean": 91.5831298828125,
"margin_dpo/margin_std": 169.7579803466797,
"step": 585
},
{
"epoch": 0.891912320483749,
"fcm_dpo/beta": 0.0032326322980225086,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 87.02014923095703,
"fcm_dpo/q_t": 0.43297773599624634,
"grad_norm": 13.126020431518555,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 0.9554249048233032,
"logits/rejected": 0.9359169006347656,
"logps/chosen": -263.68682861328125,
"logps/ref_chosen": -60.5894660949707,
"logps/ref_rejected": -74.34771728515625,
"logps/rejected": -364.4652404785156,
"loss": 1.1894,
"margin_dpo/margin_mean": 87.02014923095703,
"margin_dpo/margin_std": 158.75521850585938,
"step": 590
},
{
"epoch": 0.8994708994708994,
"fcm_dpo/beta": 0.00319870188832283,
"fcm_dpo/delta": -0.010551819577813148,
"fcm_dpo/margin": 100.10346984863281,
"fcm_dpo/q_t": 0.4260531961917877,
"grad_norm": 10.075697898864746,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": 0.9538490176200867,
"logits/rejected": 0.9052039384841919,
"logps/chosen": -240.1796875,
"logps/ref_chosen": -54.77838897705078,
"logps/ref_rejected": -78.102783203125,
"logps/rejected": -363.6075744628906,
"loss": 1.1566,
"margin_dpo/margin_mean": 100.10346221923828,
"margin_dpo/margin_std": 161.6309356689453,
"step": 595
},
{
"epoch": 0.9070294784580499,
"fcm_dpo/beta": 0.00319870188832283,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 65.01690673828125,
"fcm_dpo/q_t": 0.45148009061813354,
"grad_norm": 12.22407341003418,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 0.9378520846366882,
"logits/rejected": 0.9170206189155579,
"logps/chosen": -263.9869384765625,
"logps/ref_chosen": -58.45500564575195,
"logps/ref_rejected": -70.7367172241211,
"logps/rejected": -341.2855529785156,
"loss": 1.242,
"margin_dpo/margin_mean": 65.01690673828125,
"margin_dpo/margin_std": 143.92819213867188,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_fcm_dpo/beta": 0.00319870188832283,
"eval_logits/chosen": 0.8913569450378418,
"eval_logits/rejected": 0.8742244243621826,
"eval_logps/chosen": -278.61993408203125,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -364.2364501953125,
"eval_loss": 0.6074615716934204,
"eval_margin_dpo/margin_mean": 80.9269790649414,
"eval_margin_dpo/margin_std": 168.3527069091797,
"eval_runtime": 39.0585,
"eval_samples_per_second": 58.963,
"eval_steps_per_second": 1.843,
"step": 600
},
{
"epoch": 0.9145880574452003,
"fcm_dpo/beta": 0.00319870188832283,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 91.87323760986328,
"fcm_dpo/q_t": 0.4317256510257721,
"grad_norm": 10.033231735229492,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": 0.9322928190231323,
"logits/rejected": 0.9066828489303589,
"logps/chosen": -246.4706573486328,
"logps/ref_chosen": -59.87483596801758,
"logps/ref_rejected": -75.75318908691406,
"logps/rejected": -354.2222595214844,
"loss": 1.1779,
"margin_dpo/margin_mean": 91.87324523925781,
"margin_dpo/margin_std": 161.2457733154297,
"step": 605
},
{
"epoch": 0.9221466364323507,
"fcm_dpo/beta": 0.003174326615408063,
"fcm_dpo/delta": -0.009571181610226631,
"fcm_dpo/margin": 99.8558120727539,
"fcm_dpo/q_t": 0.42549604177474976,
"grad_norm": 13.667858123779297,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 0.9424371719360352,
"logits/rejected": 0.9166472554206848,
"logps/chosen": -255.5699462890625,
"logps/ref_chosen": -60.35883712768555,
"logps/ref_rejected": -81.3543930053711,
"logps/rejected": -376.42132568359375,
"loss": 1.1567,
"margin_dpo/margin_mean": 99.85580444335938,
"margin_dpo/margin_std": 156.640869140625,
"step": 610
},
{
"epoch": 0.9297052154195011,
"fcm_dpo/beta": 0.0031617539934813976,
"fcm_dpo/delta": -0.010276397690176964,
"fcm_dpo/margin": 101.62857818603516,
"fcm_dpo/q_t": 0.4245510995388031,
"grad_norm": 9.765281677246094,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": 0.9229475855827332,
"logits/rejected": 0.8982959985733032,
"logps/chosen": -253.18075561523438,
"logps/ref_chosen": -59.17219161987305,
"logps/ref_rejected": -79.92167663574219,
"logps/rejected": -375.55877685546875,
"loss": 1.1485,
"margin_dpo/margin_mean": 101.62857818603516,
"margin_dpo/margin_std": 152.48818969726562,
"step": 615
},
{
"epoch": 0.9372637944066515,
"fcm_dpo/beta": 0.0031358408741652966,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 89.38777923583984,
"fcm_dpo/q_t": 0.4349850118160248,
"grad_norm": 10.843006134033203,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 0.9537204504013062,
"logits/rejected": 0.9183050394058228,
"logps/chosen": -253.4351043701172,
"logps/ref_chosen": -58.052696228027344,
"logps/ref_rejected": -78.37252807617188,
"logps/rejected": -363.1427307128906,
"loss": 1.1848,
"margin_dpo/margin_mean": 89.38777160644531,
"margin_dpo/margin_std": 156.2245635986328,
"step": 620
},
{
"epoch": 0.9448223733938019,
"fcm_dpo/beta": 0.0031358408741652966,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 96.6719970703125,
"fcm_dpo/q_t": 0.4313376545906067,
"grad_norm": 10.53563404083252,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": 0.9534462094306946,
"logits/rejected": 0.900356113910675,
"logps/chosen": -253.8286895751953,
"logps/ref_chosen": -56.957862854003906,
"logps/ref_rejected": -82.68255615234375,
"logps/rejected": -376.225341796875,
"loss": 1.1779,
"margin_dpo/margin_mean": 96.6719970703125,
"margin_dpo/margin_std": 174.75888061523438,
"step": 625
},
{
"epoch": 0.9523809523809523,
"fcm_dpo/beta": 0.0031358408741652966,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 105.3574447631836,
"fcm_dpo/q_t": 0.42352789640426636,
"grad_norm": 10.356474876403809,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 0.9468274116516113,
"logits/rejected": 0.9034187197685242,
"logps/chosen": -247.63818359375,
"logps/ref_chosen": -56.71510696411133,
"logps/ref_rejected": -82.94544219970703,
"logps/rejected": -379.2259521484375,
"loss": 1.1485,
"margin_dpo/margin_mean": 105.35743713378906,
"margin_dpo/margin_std": 166.2731475830078,
"step": 630
},
{
"epoch": 0.9599395313681028,
"fcm_dpo/beta": 0.0030982757452875376,
"fcm_dpo/delta": -0.012051684781908989,
"fcm_dpo/margin": 89.59659576416016,
"fcm_dpo/q_t": 0.4353605806827545,
"grad_norm": 10.570072174072266,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": 0.9560381174087524,
"logits/rejected": 0.9319430589675903,
"logps/chosen": -254.63623046875,
"logps/ref_chosen": -59.33793258666992,
"logps/ref_rejected": -75.01703643798828,
"logps/rejected": -359.91192626953125,
"loss": 1.1902,
"margin_dpo/margin_mean": 89.59659576416016,
"margin_dpo/margin_std": 159.2709197998047,
"step": 635
},
{
"epoch": 0.9674981103552532,
"fcm_dpo/beta": 0.0030982757452875376,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 97.72769927978516,
"fcm_dpo/q_t": 0.42967167496681213,
"grad_norm": 10.967521667480469,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 0.9475260972976685,
"logits/rejected": 0.903438925743103,
"logps/chosen": -259.0896301269531,
"logps/ref_chosen": -58.1605339050293,
"logps/ref_rejected": -79.85365295410156,
"logps/rejected": -378.5104064941406,
"loss": 1.1668,
"margin_dpo/margin_mean": 97.72769165039062,
"margin_dpo/margin_std": 162.35629272460938,
"step": 640
},
{
"epoch": 0.9750566893424036,
"fcm_dpo/beta": 0.0030982757452875376,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 77.14000701904297,
"fcm_dpo/q_t": 0.44398292899131775,
"grad_norm": 15.546424865722656,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": 0.9349179267883301,
"logits/rejected": 0.922415554523468,
"logps/chosen": -264.6257629394531,
"logps/ref_chosen": -63.45180130004883,
"logps/ref_rejected": -74.18285369873047,
"logps/rejected": -352.49676513671875,
"loss": 1.2142,
"margin_dpo/margin_mean": 77.14000701904297,
"margin_dpo/margin_std": 147.8017120361328,
"step": 645
},
{
"epoch": 0.982615268329554,
"fcm_dpo/beta": 0.0030982757452875376,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 95.89753723144531,
"fcm_dpo/q_t": 0.4306652545928955,
"grad_norm": 9.394474029541016,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 0.9502288103103638,
"logits/rejected": 0.9044377207756042,
"logps/chosen": -268.394775390625,
"logps/ref_chosen": -59.75496292114258,
"logps/ref_rejected": -84.31481170654297,
"logps/rejected": -388.8521423339844,
"loss": 1.1769,
"margin_dpo/margin_mean": 95.89753723144531,
"margin_dpo/margin_std": 167.29580688476562,
"step": 650
},
{
"epoch": 0.9901738473167044,
"fcm_dpo/beta": 0.0030882812570780516,
"fcm_dpo/delta": -0.016260221600532532,
"fcm_dpo/margin": 95.9172592163086,
"fcm_dpo/q_t": 0.43137580156326294,
"grad_norm": 10.780631065368652,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": 0.9268029928207397,
"logits/rejected": 0.8841239809989929,
"logps/chosen": -252.7764892578125,
"logps/ref_chosen": -57.817848205566406,
"logps/ref_rejected": -79.81755065917969,
"logps/rejected": -370.6934814453125,
"loss": 1.1725,
"margin_dpo/margin_mean": 95.9172592163086,
"margin_dpo/margin_std": 159.18435668945312,
"step": 655
},
{
"epoch": 0.9977324263038548,
"fcm_dpo/beta": 0.003048304468393326,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 98.31352996826172,
"fcm_dpo/q_t": 0.43072566390037537,
"grad_norm": 11.600972175598145,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 0.9769344329833984,
"logits/rejected": 0.9424291849136353,
"logps/chosen": -260.94500732421875,
"logps/ref_chosen": -59.12651443481445,
"logps/ref_rejected": -79.42085266113281,
"logps/rejected": -379.5528869628906,
"loss": 1.1768,
"margin_dpo/margin_mean": 98.31353759765625,
"margin_dpo/margin_std": 174.72268676757812,
"step": 660
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.1812975648311552,
"train_runtime": 1809.2515,
"train_samples_per_second": 23.4,
"train_steps_per_second": 0.365
}
],
"logging_steps": 5,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}