Model: W-61/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.4-eta-0.05 Source: Original Platform
15247 lines
572 KiB
JSON
15247 lines
572 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 200,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"KL/chosen_KL_mean": 0.02867889404296875,
|
|
"KL/mean": 0.029354453086853027,
|
|
"KL/rejected_KL_mean": 0.030029296875,
|
|
"KL/std": 0.2071000635623932,
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0013532638549804688,
|
|
"fcm_dpo/q_t": 0.5000336766242981,
|
|
"grad_norm": 28.220561981201172,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 0.13337239623069763,
|
|
"logits/rejected": 0.12492949515581131,
|
|
"logps/chosen": -64.5841293334961,
|
|
"logps/ref_chosen": -64.61280822753906,
|
|
"logps/ref_rejected": -64.17195129394531,
|
|
"logps/rejected": -64.14192199707031,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.0013527870178222656,
|
|
"margin_dpo/margin_std": 0.2561596930027008,
|
|
"step": 1
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.00289154052734375,
|
|
"KL/mean": -0.021616414189338684,
|
|
"KL/rejected_KL_mean": -0.04033660888671875,
|
|
"KL/std": 0.19624735414981842,
|
|
"epoch": 0.0030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.037450045347213745,
|
|
"fcm_dpo/q_t": 0.4990639090538025,
|
|
"grad_norm": 27.829313278198242,
|
|
"learning_rate": 7.462686567164179e-09,
|
|
"logits/chosen": 0.09414851665496826,
|
|
"logits/rejected": 0.07363267242908478,
|
|
"logps/chosen": -56.101890563964844,
|
|
"logps/ref_chosen": -56.0989990234375,
|
|
"logps/ref_rejected": -66.59971618652344,
|
|
"logps/rejected": -66.64006042480469,
|
|
"loss": 1.3828,
|
|
"margin_dpo/margin_mean": 0.03744968771934509,
|
|
"margin_dpo/margin_std": 0.27811938524246216,
|
|
"step": 2
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.03314208984375,
|
|
"KL/mean": 0.03694796562194824,
|
|
"KL/rejected_KL_mean": 0.040752410888671875,
|
|
"KL/std": 0.28543820977211,
|
|
"epoch": 0.0045351473922902496,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.007608771324157715,
|
|
"fcm_dpo/q_t": 0.5001896619796753,
|
|
"grad_norm": 31.200183868408203,
|
|
"learning_rate": 1.4925373134328357e-08,
|
|
"logits/chosen": 0.0995122492313385,
|
|
"logits/rejected": 0.06140997260808945,
|
|
"logps/chosen": -65.42411804199219,
|
|
"logps/ref_chosen": -65.45726013183594,
|
|
"logps/ref_rejected": -90.82853698730469,
|
|
"logps/rejected": -90.78778076171875,
|
|
"loss": 1.3874,
|
|
"margin_dpo/margin_mean": -0.007609039545059204,
|
|
"margin_dpo/margin_std": 0.3628672957420349,
|
|
"step": 3
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.008510589599609375,
|
|
"KL/mean": 0.01718522608280182,
|
|
"KL/rejected_KL_mean": 0.042877197265625,
|
|
"KL/std": 0.2634391784667969,
|
|
"epoch": 0.006046863189720333,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.05138656497001648,
|
|
"fcm_dpo/q_t": 0.5012847185134888,
|
|
"grad_norm": 34.36368179321289,
|
|
"learning_rate": 2.2388059701492534e-08,
|
|
"logits/chosen": 0.11296099424362183,
|
|
"logits/rejected": 0.09673739969730377,
|
|
"logps/chosen": -76.86869812011719,
|
|
"logps/ref_chosen": -76.86018371582031,
|
|
"logps/ref_rejected": -79.91523742675781,
|
|
"logps/rejected": -79.87236022949219,
|
|
"loss": 1.3918,
|
|
"margin_dpo/margin_mean": -0.051386892795562744,
|
|
"margin_dpo/margin_std": 0.3760377764701843,
|
|
"step": 4
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.006275177001953125,
|
|
"KL/mean": 0.00796547532081604,
|
|
"KL/rejected_KL_mean": 0.009662628173828125,
|
|
"KL/std": 0.22592966258525848,
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0033860206604003906,
|
|
"fcm_dpo/q_t": 0.5000848770141602,
|
|
"grad_norm": 29.5997314453125,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 0.0820743665099144,
|
|
"logits/rejected": 0.04312673956155777,
|
|
"logps/chosen": -62.965065002441406,
|
|
"logps/ref_chosen": -62.97134017944336,
|
|
"logps/ref_rejected": -79.9192123413086,
|
|
"logps/rejected": -79.9095458984375,
|
|
"loss": 1.3869,
|
|
"margin_dpo/margin_mean": -0.003385692834854126,
|
|
"margin_dpo/margin_std": 0.311041921377182,
|
|
"step": 5
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.07618141174316406,
|
|
"KL/mean": -0.04137462377548218,
|
|
"KL/rejected_KL_mean": -0.0065631866455078125,
|
|
"KL/std": 0.18795457482337952,
|
|
"epoch": 0.009070294784580499,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06961393356323242,
|
|
"fcm_dpo/q_t": 0.5017400979995728,
|
|
"grad_norm": 29.731212615966797,
|
|
"learning_rate": 3.731343283582089e-08,
|
|
"logits/chosen": 0.14878220856189728,
|
|
"logits/rejected": 0.10909736901521683,
|
|
"logps/chosen": -51.383544921875,
|
|
"logps/ref_chosen": -51.30736541748047,
|
|
"logps/ref_rejected": -82.77239227294922,
|
|
"logps/rejected": -82.77896118164062,
|
|
"loss": 1.3934,
|
|
"margin_dpo/margin_mean": -0.06961464881896973,
|
|
"margin_dpo/margin_std": 0.2415209412574768,
|
|
"step": 6
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.029163360595703125,
|
|
"KL/mean": 0.015947937965393066,
|
|
"KL/rejected_KL_mean": 0.0027313232421875,
|
|
"KL/std": 0.19804246723651886,
|
|
"epoch": 0.010582010582010581,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.026432573795318604,
|
|
"fcm_dpo/q_t": 0.49933910369873047,
|
|
"grad_norm": 27.144798278808594,
|
|
"learning_rate": 4.477611940298507e-08,
|
|
"logits/chosen": 0.022467611357569695,
|
|
"logits/rejected": -0.02164413407444954,
|
|
"logps/chosen": -51.43024826049805,
|
|
"logps/ref_chosen": -51.45941162109375,
|
|
"logps/ref_rejected": -66.3828125,
|
|
"logps/rejected": -66.38008117675781,
|
|
"loss": 1.3838,
|
|
"margin_dpo/margin_mean": 0.02643263339996338,
|
|
"margin_dpo/margin_std": 0.23562361299991608,
|
|
"step": 7
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.0071506500244140625,
|
|
"KL/mean": 0.009569257497787476,
|
|
"KL/rejected_KL_mean": 0.011989593505859375,
|
|
"KL/std": 0.2280169427394867,
|
|
"epoch": 0.012093726379440665,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.004838764667510986,
|
|
"fcm_dpo/q_t": 0.5001217126846313,
|
|
"grad_norm": 28.669523239135742,
|
|
"learning_rate": 5.223880597014925e-08,
|
|
"logits/chosen": 0.07114364206790924,
|
|
"logits/rejected": 0.049073293805122375,
|
|
"logps/chosen": -62.19039535522461,
|
|
"logps/ref_chosen": -62.197547912597656,
|
|
"logps/ref_rejected": -74.66180419921875,
|
|
"logps/rejected": -74.64981842041016,
|
|
"loss": 1.387,
|
|
"margin_dpo/margin_mean": -0.004839152097702026,
|
|
"margin_dpo/margin_std": 0.3230676054954529,
|
|
"step": 8
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.041790008544921875,
|
|
"KL/mean": -0.02482415735721588,
|
|
"KL/rejected_KL_mean": -0.007862091064453125,
|
|
"KL/std": 0.2600027918815613,
|
|
"epoch": 0.013605442176870748,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.033925265073776245,
|
|
"fcm_dpo/q_t": 0.5008475184440613,
|
|
"grad_norm": 31.706531524658203,
|
|
"learning_rate": 5.970149253731343e-08,
|
|
"logits/chosen": 0.1778247058391571,
|
|
"logits/rejected": 0.11781768500804901,
|
|
"logps/chosen": -55.6715087890625,
|
|
"logps/ref_chosen": -55.629722595214844,
|
|
"logps/ref_rejected": -86.21221923828125,
|
|
"logps/rejected": -86.22008514404297,
|
|
"loss": 1.39,
|
|
"margin_dpo/margin_mean": -0.03392493724822998,
|
|
"margin_dpo/margin_std": 0.37033605575561523,
|
|
"step": 9
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.006801605224609375,
|
|
"KL/mean": 0.0033866912126541138,
|
|
"KL/rejected_KL_mean": -3.4332275390625e-05,
|
|
"KL/std": 0.22908765077590942,
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0068267881870269775,
|
|
"fcm_dpo/q_t": 0.49982950091362,
|
|
"grad_norm": 29.79790687561035,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 0.12383204698562622,
|
|
"logits/rejected": 0.0932975709438324,
|
|
"logps/chosen": -62.683799743652344,
|
|
"logps/ref_chosen": -62.69060134887695,
|
|
"logps/ref_rejected": -90.610107421875,
|
|
"logps/rejected": -90.61014556884766,
|
|
"loss": 1.3859,
|
|
"margin_dpo/margin_mean": 0.006826251745223999,
|
|
"margin_dpo/margin_std": 0.3240160644054413,
|
|
"step": 10
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.021305084228515625,
|
|
"KL/mean": -0.009378388524055481,
|
|
"KL/rejected_KL_mean": 0.002544403076171875,
|
|
"KL/std": 0.21124267578125,
|
|
"epoch": 0.016628873771730914,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.023848801851272583,
|
|
"fcm_dpo/q_t": 0.500595211982727,
|
|
"grad_norm": 29.450275421142578,
|
|
"learning_rate": 7.462686567164178e-08,
|
|
"logits/chosen": 0.1082124412059784,
|
|
"logits/rejected": 0.1013953760266304,
|
|
"logps/chosen": -65.78842163085938,
|
|
"logps/ref_chosen": -65.76712036132812,
|
|
"logps/ref_rejected": -72.4764633178711,
|
|
"logps/rejected": -72.47392272949219,
|
|
"loss": 1.389,
|
|
"margin_dpo/margin_mean": -0.023848414421081543,
|
|
"margin_dpo/margin_std": 0.31496310234069824,
|
|
"step": 11
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.00022125244140625,
|
|
"KL/mean": -0.020917758345603943,
|
|
"KL/rejected_KL_mean": -0.0416107177734375,
|
|
"KL/std": 0.2180933803319931,
|
|
"epoch": 0.018140589569160998,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04139283299446106,
|
|
"fcm_dpo/q_t": 0.498965322971344,
|
|
"grad_norm": 28.157424926757812,
|
|
"learning_rate": 8.208955223880596e-08,
|
|
"logits/chosen": 0.02831427939236164,
|
|
"logits/rejected": 0.012133650481700897,
|
|
"logps/chosen": -60.70511245727539,
|
|
"logps/ref_chosen": -60.704891204833984,
|
|
"logps/ref_rejected": -69.41564178466797,
|
|
"logps/rejected": -69.4572525024414,
|
|
"loss": 1.3824,
|
|
"margin_dpo/margin_mean": 0.041393160820007324,
|
|
"margin_dpo/margin_std": 0.2782641649246216,
|
|
"step": 12
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.0318450927734375,
|
|
"KL/mean": 0.05899037420749664,
|
|
"KL/rejected_KL_mean": 0.08614349365234375,
|
|
"KL/std": 0.23153722286224365,
|
|
"epoch": 0.019652305366591082,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.054293423891067505,
|
|
"fcm_dpo/q_t": 0.5013566613197327,
|
|
"grad_norm": 29.11477279663086,
|
|
"learning_rate": 8.955223880597014e-08,
|
|
"logits/chosen": 0.06593827903270721,
|
|
"logits/rejected": 0.007763490080833435,
|
|
"logps/chosen": -49.877410888671875,
|
|
"logps/ref_chosen": -49.90925598144531,
|
|
"logps/ref_rejected": -92.37818145751953,
|
|
"logps/rejected": -92.29203796386719,
|
|
"loss": 1.392,
|
|
"margin_dpo/margin_mean": -0.05429357290267944,
|
|
"margin_dpo/margin_std": 0.3109407424926758,
|
|
"step": 13
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.027740478515625,
|
|
"KL/mean": 0.0046707987785339355,
|
|
"KL/rejected_KL_mean": -0.01839447021484375,
|
|
"KL/std": 0.20197857916355133,
|
|
"epoch": 0.021164021164021163,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.046134233474731445,
|
|
"fcm_dpo/q_t": 0.49884724617004395,
|
|
"grad_norm": 29.57986068725586,
|
|
"learning_rate": 9.701492537313432e-08,
|
|
"logits/chosen": 0.08484087139368057,
|
|
"logits/rejected": 0.0672261118888855,
|
|
"logps/chosen": -60.5910530090332,
|
|
"logps/ref_chosen": -60.61879348754883,
|
|
"logps/ref_rejected": -71.79306030273438,
|
|
"logps/rejected": -71.81145477294922,
|
|
"loss": 1.3819,
|
|
"margin_dpo/margin_mean": 0.04613363742828369,
|
|
"margin_dpo/margin_std": 0.2983606457710266,
|
|
"step": 14
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.03234100341796875,
|
|
"KL/mean": -0.025052621960639954,
|
|
"KL/rejected_KL_mean": -0.017765045166015625,
|
|
"KL/std": 0.23586109280586243,
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.01457950472831726,
|
|
"fcm_dpo/q_t": 0.5003635287284851,
|
|
"grad_norm": 33.33202362060547,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 0.05998123437166214,
|
|
"logits/rejected": 0.017427468672394753,
|
|
"logps/chosen": -63.50187683105469,
|
|
"logps/ref_chosen": -63.46953582763672,
|
|
"logps/ref_rejected": -88.88951110839844,
|
|
"logps/rejected": -88.90727996826172,
|
|
"loss": 1.388,
|
|
"margin_dpo/margin_mean": -0.014578819274902344,
|
|
"margin_dpo/margin_std": 0.312533974647522,
|
|
"step": 15
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.011747360229492188,
|
|
"KL/mean": -0.016225792467594147,
|
|
"KL/rejected_KL_mean": -0.020709991455078125,
|
|
"KL/std": 0.21998129785060883,
|
|
"epoch": 0.02418745275888133,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.008957013487815857,
|
|
"fcm_dpo/q_t": 0.49977654218673706,
|
|
"grad_norm": 26.868680953979492,
|
|
"learning_rate": 1.1194029850746268e-07,
|
|
"logits/chosen": 0.10955867171287537,
|
|
"logits/rejected": 0.07288958132266998,
|
|
"logps/chosen": -46.544044494628906,
|
|
"logps/ref_chosen": -46.53229904174805,
|
|
"logps/ref_rejected": -74.27533721923828,
|
|
"logps/rejected": -74.29605102539062,
|
|
"loss": 1.3856,
|
|
"margin_dpo/margin_mean": 0.008957013487815857,
|
|
"margin_dpo/margin_std": 0.29685065150260925,
|
|
"step": 16
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.008335113525390625,
|
|
"KL/mean": 0.011452361941337585,
|
|
"KL/rejected_KL_mean": 0.0145721435546875,
|
|
"KL/std": 0.2385525107383728,
|
|
"epoch": 0.025699168556311415,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.006234914064407349,
|
|
"fcm_dpo/q_t": 0.5001556873321533,
|
|
"grad_norm": 33.03755569458008,
|
|
"learning_rate": 1.1940298507462686e-07,
|
|
"logits/chosen": 0.07020756602287292,
|
|
"logits/rejected": 0.05094723403453827,
|
|
"logps/chosen": -64.06949615478516,
|
|
"logps/ref_chosen": -64.07783508300781,
|
|
"logps/ref_rejected": -86.40876770019531,
|
|
"logps/rejected": -86.39419555664062,
|
|
"loss": 1.3872,
|
|
"margin_dpo/margin_mean": -0.006234914064407349,
|
|
"margin_dpo/margin_std": 0.3088860511779785,
|
|
"step": 17
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.016176223754882812,
|
|
"KL/mean": -0.023748770356178284,
|
|
"KL/rejected_KL_mean": -0.03132057189941406,
|
|
"KL/std": 0.214952290058136,
|
|
"epoch": 0.027210884353741496,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.015142887830734253,
|
|
"fcm_dpo/q_t": 0.4996216893196106,
|
|
"grad_norm": 27.715377807617188,
|
|
"learning_rate": 1.2686567164179106e-07,
|
|
"logits/chosen": 0.07380260527133942,
|
|
"logits/rejected": 0.029822219163179398,
|
|
"logps/chosen": -44.89051055908203,
|
|
"logps/ref_chosen": -44.87433624267578,
|
|
"logps/ref_rejected": -70.97604370117188,
|
|
"logps/rejected": -71.00736999511719,
|
|
"loss": 1.385,
|
|
"margin_dpo/margin_mean": 0.015142649412155151,
|
|
"margin_dpo/margin_std": 0.3129231929779053,
|
|
"step": 18
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.027788162231445312,
|
|
"KL/mean": -0.006608419120311737,
|
|
"KL/rejected_KL_mean": -0.04100799560546875,
|
|
"KL/std": 0.2719251215457916,
|
|
"epoch": 0.02872260015117158,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06879554688930511,
|
|
"fcm_dpo/q_t": 0.49828028678894043,
|
|
"grad_norm": 30.783519744873047,
|
|
"learning_rate": 1.343283582089552e-07,
|
|
"logits/chosen": 0.05657193809747696,
|
|
"logits/rejected": 0.043277401477098465,
|
|
"logps/chosen": -68.13201904296875,
|
|
"logps/ref_chosen": -68.1598129272461,
|
|
"logps/ref_rejected": -81.17138671875,
|
|
"logps/rejected": -81.21240234375,
|
|
"loss": 1.3798,
|
|
"margin_dpo/margin_mean": 0.06879596412181854,
|
|
"margin_dpo/margin_std": 0.3576847314834595,
|
|
"step": 19
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.0046100616455078125,
|
|
"KL/mean": 0.013587579131126404,
|
|
"KL/rejected_KL_mean": 0.022571563720703125,
|
|
"KL/std": 0.19717076420783997,
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.01796087622642517,
|
|
"fcm_dpo/q_t": 0.5004487633705139,
|
|
"grad_norm": 29.42017364501953,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 0.150754913687706,
|
|
"logits/rejected": 0.12703603506088257,
|
|
"logps/chosen": -53.6739501953125,
|
|
"logps/ref_chosen": -53.67856216430664,
|
|
"logps/ref_rejected": -74.16911315917969,
|
|
"logps/rejected": -74.14654541015625,
|
|
"loss": 1.3883,
|
|
"margin_dpo/margin_mean": -0.017960846424102783,
|
|
"margin_dpo/margin_std": 0.295392781496048,
|
|
"step": 20
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.017730712890625,
|
|
"KL/mean": -0.01410750299692154,
|
|
"KL/rejected_KL_mean": -0.0104827880859375,
|
|
"KL/std": 0.2105174958705902,
|
|
"epoch": 0.031746031746031744,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0072454363107681274,
|
|
"fcm_dpo/q_t": 0.5001811385154724,
|
|
"grad_norm": 29.44789695739746,
|
|
"learning_rate": 1.4925373134328355e-07,
|
|
"logits/chosen": 0.0918562263250351,
|
|
"logits/rejected": 0.0668744146823883,
|
|
"logps/chosen": -64.71928405761719,
|
|
"logps/ref_chosen": -64.70155334472656,
|
|
"logps/ref_rejected": -81.02095031738281,
|
|
"logps/rejected": -81.03143310546875,
|
|
"loss": 1.3872,
|
|
"margin_dpo/margin_mean": -0.007246255874633789,
|
|
"margin_dpo/margin_std": 0.2946098744869232,
|
|
"step": 21
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.0073108673095703125,
|
|
"KL/mean": -0.011000528931617737,
|
|
"KL/rejected_KL_mean": -0.014690399169921875,
|
|
"KL/std": 0.22859472036361694,
|
|
"epoch": 0.03325774754346183,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.007384508848190308,
|
|
"fcm_dpo/q_t": 0.4998154044151306,
|
|
"grad_norm": 28.996417999267578,
|
|
"learning_rate": 1.5671641791044775e-07,
|
|
"logits/chosen": 0.001006007776595652,
|
|
"logits/rejected": -0.01976313814520836,
|
|
"logps/chosen": -58.043304443359375,
|
|
"logps/ref_chosen": -58.03599166870117,
|
|
"logps/ref_rejected": -80.72721862792969,
|
|
"logps/rejected": -80.74191284179688,
|
|
"loss": 1.3858,
|
|
"margin_dpo/margin_mean": 0.007384538650512695,
|
|
"margin_dpo/margin_std": 0.2887970209121704,
|
|
"step": 22
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.0045013427734375,
|
|
"KL/mean": -0.020362719893455505,
|
|
"KL/rejected_KL_mean": -0.04522705078125,
|
|
"KL/std": 0.22603841125965118,
|
|
"epoch": 0.03476946334089191,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04972890019416809,
|
|
"fcm_dpo/q_t": 0.4987572729587555,
|
|
"grad_norm": 32.71550750732422,
|
|
"learning_rate": 1.6417910447761193e-07,
|
|
"logits/chosen": 0.11910876631736755,
|
|
"logits/rejected": 0.09463324397802353,
|
|
"logps/chosen": -66.3515853881836,
|
|
"logps/ref_chosen": -66.35608673095703,
|
|
"logps/ref_rejected": -93.02769470214844,
|
|
"logps/rejected": -93.07292175292969,
|
|
"loss": 1.3815,
|
|
"margin_dpo/margin_mean": 0.04972848296165466,
|
|
"margin_dpo/margin_std": 0.27672749757766724,
|
|
"step": 23
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.0032978057861328125,
|
|
"KL/mean": 0.004052832722663879,
|
|
"KL/rejected_KL_mean": 0.0048084259033203125,
|
|
"KL/std": 0.19972658157348633,
|
|
"epoch": 0.036281179138321996,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.001508086919784546,
|
|
"fcm_dpo/q_t": 0.500037431716919,
|
|
"grad_norm": 26.32447624206543,
|
|
"learning_rate": 1.716417910447761e-07,
|
|
"logits/chosen": 0.11018004268407822,
|
|
"logits/rejected": 0.07803471386432648,
|
|
"logps/chosen": -54.45793914794922,
|
|
"logps/ref_chosen": -54.461238861083984,
|
|
"logps/ref_rejected": -68.33817291259766,
|
|
"logps/rejected": -68.33336639404297,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.0015083253383636475,
|
|
"margin_dpo/margin_std": 0.26809483766555786,
|
|
"step": 24
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.0533905029296875,
|
|
"KL/mean": -0.0579182505607605,
|
|
"KL/rejected_KL_mean": -0.062442779541015625,
|
|
"KL/std": 0.22538413107395172,
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.009047925472259521,
|
|
"fcm_dpo/q_t": 0.49977388978004456,
|
|
"grad_norm": 29.704883575439453,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 0.10709188878536224,
|
|
"logits/rejected": 0.05541721731424332,
|
|
"logps/chosen": -60.057594299316406,
|
|
"logps/ref_chosen": -60.00420379638672,
|
|
"logps/ref_rejected": -90.47376251220703,
|
|
"logps/rejected": -90.53620910644531,
|
|
"loss": 1.3856,
|
|
"margin_dpo/margin_mean": 0.009047985076904297,
|
|
"margin_dpo/margin_std": 0.2810651659965515,
|
|
"step": 25
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.01007843017578125,
|
|
"KL/mean": -0.01913144439458847,
|
|
"KL/rejected_KL_mean": -0.0281829833984375,
|
|
"KL/std": 0.191893070936203,
|
|
"epoch": 0.039304610733182165,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.018103912472724915,
|
|
"fcm_dpo/q_t": 0.49954742193222046,
|
|
"grad_norm": 29.8209228515625,
|
|
"learning_rate": 1.8656716417910447e-07,
|
|
"logits/chosen": 0.11784784495830536,
|
|
"logits/rejected": 0.09917742758989334,
|
|
"logps/chosen": -56.82923126220703,
|
|
"logps/ref_chosen": -56.81915283203125,
|
|
"logps/ref_rejected": -77.84333038330078,
|
|
"logps/rejected": -77.87151336669922,
|
|
"loss": 1.3847,
|
|
"margin_dpo/margin_mean": 0.018104106187820435,
|
|
"margin_dpo/margin_std": 0.2743600606918335,
|
|
"step": 26
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.011585235595703125,
|
|
"KL/mean": -0.015466853976249695,
|
|
"KL/rejected_KL_mean": -0.01934814453125,
|
|
"KL/std": 0.2070031762123108,
|
|
"epoch": 0.04081632653061224,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.00776335597038269,
|
|
"fcm_dpo/q_t": 0.49980592727661133,
|
|
"grad_norm": 28.947845458984375,
|
|
"learning_rate": 1.9402985074626865e-07,
|
|
"logits/chosen": 0.10872644186019897,
|
|
"logits/rejected": 0.08362270146608353,
|
|
"logps/chosen": -62.88861083984375,
|
|
"logps/ref_chosen": -62.87702560424805,
|
|
"logps/ref_rejected": -71.34437561035156,
|
|
"logps/rejected": -71.36372375488281,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.007763028144836426,
|
|
"margin_dpo/margin_std": 0.2453039586544037,
|
|
"step": 27
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.031110763549804688,
|
|
"KL/mean": -0.007741585373878479,
|
|
"KL/rejected_KL_mean": 0.015625,
|
|
"KL/std": 0.21955916285514832,
|
|
"epoch": 0.042328042328042326,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.046735793352127075,
|
|
"fcm_dpo/q_t": 0.5011680126190186,
|
|
"grad_norm": 27.712875366210938,
|
|
"learning_rate": 2.0149253731343282e-07,
|
|
"logits/chosen": 0.06208399683237076,
|
|
"logits/rejected": 0.05314202606678009,
|
|
"logps/chosen": -59.86448669433594,
|
|
"logps/ref_chosen": -59.8333740234375,
|
|
"logps/ref_rejected": -70.39804077148438,
|
|
"logps/rejected": -70.38241577148438,
|
|
"loss": 1.3912,
|
|
"margin_dpo/margin_mean": -0.0467359721660614,
|
|
"margin_dpo/margin_std": 0.26044827699661255,
|
|
"step": 28
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.037715911865234375,
|
|
"KL/mean": -0.025590412318706512,
|
|
"KL/rejected_KL_mean": -0.013462066650390625,
|
|
"KL/std": 0.24481341242790222,
|
|
"epoch": 0.04383975812547241,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.024254724383354187,
|
|
"fcm_dpo/q_t": 0.5006059408187866,
|
|
"grad_norm": 32.759521484375,
|
|
"learning_rate": 2.08955223880597e-07,
|
|
"logits/chosen": 0.12057951092720032,
|
|
"logits/rejected": 0.10345769673585892,
|
|
"logps/chosen": -74.15791320800781,
|
|
"logps/ref_chosen": -74.12020111083984,
|
|
"logps/ref_rejected": -83.33099365234375,
|
|
"logps/rejected": -83.34445190429688,
|
|
"loss": 1.389,
|
|
"margin_dpo/margin_mean": -0.02425485849380493,
|
|
"margin_dpo/margin_std": 0.3270670175552368,
|
|
"step": 29
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.05994606018066406,
|
|
"KL/mean": -0.05441446602344513,
|
|
"KL/rejected_KL_mean": -0.04888916015625,
|
|
"KL/std": 0.21107196807861328,
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.011056751012802124,
|
|
"fcm_dpo/q_t": 0.5002766847610474,
|
|
"grad_norm": 30.45713233947754,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 0.15005697309970856,
|
|
"logits/rejected": 0.09357762336730957,
|
|
"logps/chosen": -50.81123352050781,
|
|
"logps/ref_chosen": -50.75128936767578,
|
|
"logps/ref_rejected": -89.29063415527344,
|
|
"logps/rejected": -89.33952331542969,
|
|
"loss": 1.3876,
|
|
"margin_dpo/margin_mean": -0.01105719804763794,
|
|
"margin_dpo/margin_std": 0.30437785387039185,
|
|
"step": 30
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.05399894714355469,
|
|
"KL/mean": -0.05780626833438873,
|
|
"KL/rejected_KL_mean": -0.061614990234375,
|
|
"KL/std": 0.2318582683801651,
|
|
"epoch": 0.04686318972033258,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.007615476846694946,
|
|
"fcm_dpo/q_t": 0.4998096227645874,
|
|
"grad_norm": 34.348426818847656,
|
|
"learning_rate": 2.2388059701492537e-07,
|
|
"logits/chosen": 0.12432706356048584,
|
|
"logits/rejected": 0.07688245177268982,
|
|
"logps/chosen": -65.39075469970703,
|
|
"logps/ref_chosen": -65.33675384521484,
|
|
"logps/ref_rejected": -100.76666259765625,
|
|
"logps/rejected": -100.82827758789062,
|
|
"loss": 1.3858,
|
|
"margin_dpo/margin_mean": 0.007616162300109863,
|
|
"margin_dpo/margin_std": 0.329498827457428,
|
|
"step": 31
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.01386260986328125,
|
|
"KL/mean": -0.014202281832695007,
|
|
"KL/rejected_KL_mean": -0.014545440673828125,
|
|
"KL/std": 0.20463216304779053,
|
|
"epoch": 0.04837490551776266,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0006882846355438232,
|
|
"fcm_dpo/q_t": 0.49998271465301514,
|
|
"grad_norm": 30.011537551879883,
|
|
"learning_rate": 2.3134328358208954e-07,
|
|
"logits/chosen": 0.09332571923732758,
|
|
"logits/rejected": 0.08537882566452026,
|
|
"logps/chosen": -67.19718933105469,
|
|
"logps/ref_chosen": -67.18333435058594,
|
|
"logps/ref_rejected": -82.80763244628906,
|
|
"logps/rejected": -82.82218170166016,
|
|
"loss": 1.3864,
|
|
"margin_dpo/margin_mean": 0.0006878972053527832,
|
|
"margin_dpo/margin_std": 0.26308125257492065,
|
|
"step": 32
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.01883697509765625,
|
|
"KL/mean": -0.038420893251895905,
|
|
"KL/rejected_KL_mean": -0.05800628662109375,
|
|
"KL/std": 0.20567819476127625,
|
|
"epoch": 0.049886621315192746,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.039161697030067444,
|
|
"fcm_dpo/q_t": 0.4990212917327881,
|
|
"grad_norm": 30.979841232299805,
|
|
"learning_rate": 2.388059701492537e-07,
|
|
"logits/chosen": 0.01221918873488903,
|
|
"logits/rejected": -0.01288791373372078,
|
|
"logps/chosen": -64.05831909179688,
|
|
"logps/ref_chosen": -64.03948211669922,
|
|
"logps/ref_rejected": -75.68357849121094,
|
|
"logps/rejected": -75.74159240722656,
|
|
"loss": 1.3826,
|
|
"margin_dpo/margin_mean": 0.03916238248348236,
|
|
"margin_dpo/margin_std": 0.31601831316947937,
|
|
"step": 33
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.06904792785644531,
|
|
"KL/mean": -0.07404738664627075,
|
|
"KL/rejected_KL_mean": -0.07904815673828125,
|
|
"KL/std": 0.21560761332511902,
|
|
"epoch": 0.05139833711262283,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.010003805160522461,
|
|
"fcm_dpo/q_t": 0.4997497797012329,
|
|
"grad_norm": 28.545251846313477,
|
|
"learning_rate": 2.4626865671641786e-07,
|
|
"logits/chosen": 0.08115334808826447,
|
|
"logits/rejected": 0.052157625555992126,
|
|
"logps/chosen": -53.73334503173828,
|
|
"logps/ref_chosen": -53.6642951965332,
|
|
"logps/ref_rejected": -65.77989959716797,
|
|
"logps/rejected": -65.85894775390625,
|
|
"loss": 1.3855,
|
|
"margin_dpo/margin_mean": 0.010003626346588135,
|
|
"margin_dpo/margin_std": 0.2855505347251892,
|
|
"step": 34
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.10087203979492188,
|
|
"KL/mean": -0.09881065785884857,
|
|
"KL/rejected_KL_mean": -0.09675216674804688,
|
|
"KL/std": 0.22443602979183197,
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.004124492406845093,
|
|
"fcm_dpo/q_t": 0.5001026391983032,
|
|
"grad_norm": 27.967744827270508,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 0.06974397599697113,
|
|
"logits/rejected": 0.04678075760602951,
|
|
"logps/chosen": -61.11773681640625,
|
|
"logps/ref_chosen": -61.01686096191406,
|
|
"logps/ref_rejected": -72.78598022460938,
|
|
"logps/rejected": -72.88273620605469,
|
|
"loss": 1.387,
|
|
"margin_dpo/margin_mean": -0.004124671220779419,
|
|
"margin_dpo/margin_std": 0.36277151107788086,
|
|
"step": 35
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.0939483642578125,
|
|
"KL/mean": -0.09617127478122711,
|
|
"KL/rejected_KL_mean": -0.09839248657226562,
|
|
"KL/std": 0.2507082223892212,
|
|
"epoch": 0.05442176870748299,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.004447370767593384,
|
|
"fcm_dpo/q_t": 0.4998890459537506,
|
|
"grad_norm": 28.77927017211914,
|
|
"learning_rate": 2.611940298507462e-07,
|
|
"logits/chosen": 0.10440988838672638,
|
|
"logits/rejected": 0.051250994205474854,
|
|
"logps/chosen": -50.631309509277344,
|
|
"logps/ref_chosen": -50.53736114501953,
|
|
"logps/ref_rejected": -78.11678314208984,
|
|
"logps/rejected": -78.21517944335938,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.0044477880001068115,
|
|
"margin_dpo/margin_std": 0.34456297755241394,
|
|
"step": 36
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.031030654907226562,
|
|
"KL/mean": -0.08082780241966248,
|
|
"KL/rejected_KL_mean": -0.13062667846679688,
|
|
"KL/std": 0.2832157015800476,
|
|
"epoch": 0.055933484504913075,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09959191083908081,
|
|
"fcm_dpo/q_t": 0.49751192331314087,
|
|
"grad_norm": 37.906978607177734,
|
|
"learning_rate": 2.686567164179104e-07,
|
|
"logits/chosen": 0.09839829057455063,
|
|
"logits/rejected": 0.01927458494901657,
|
|
"logps/chosen": -59.58497619628906,
|
|
"logps/ref_chosen": -59.55394744873047,
|
|
"logps/ref_rejected": -108.27702331542969,
|
|
"logps/rejected": -108.40765380859375,
|
|
"loss": 1.3768,
|
|
"margin_dpo/margin_mean": 0.09959128499031067,
|
|
"margin_dpo/margin_std": 0.417254775762558,
|
|
"step": 37
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.07107925415039062,
|
|
"KL/mean": -0.0987686961889267,
|
|
"KL/rejected_KL_mean": -0.1264629364013672,
|
|
"KL/std": 0.2710877060890198,
|
|
"epoch": 0.05744520030234316,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.055382877588272095,
|
|
"fcm_dpo/q_t": 0.4986163079738617,
|
|
"grad_norm": 29.613418579101562,
|
|
"learning_rate": 2.761194029850746e-07,
|
|
"logits/chosen": 0.0700533539056778,
|
|
"logits/rejected": 0.056070465594530106,
|
|
"logps/chosen": -65.85943603515625,
|
|
"logps/ref_chosen": -65.78836059570312,
|
|
"logps/ref_rejected": -76.1619873046875,
|
|
"logps/rejected": -76.2884521484375,
|
|
"loss": 1.3811,
|
|
"margin_dpo/margin_mean": 0.05538263916969299,
|
|
"margin_dpo/margin_std": 0.3556036353111267,
|
|
"step": 38
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.09274101257324219,
|
|
"KL/mean": -0.10645350813865662,
|
|
"KL/rejected_KL_mean": -0.1201629638671875,
|
|
"KL/std": 0.2401476353406906,
|
|
"epoch": 0.05895691609977324,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.02742624282836914,
|
|
"fcm_dpo/q_t": 0.4993141293525696,
|
|
"grad_norm": 29.30498695373535,
|
|
"learning_rate": 2.8358208955223876e-07,
|
|
"logits/chosen": 0.16070523858070374,
|
|
"logits/rejected": 0.13387100398540497,
|
|
"logps/chosen": -57.26955032348633,
|
|
"logps/ref_chosen": -57.17681121826172,
|
|
"logps/ref_rejected": -79.486328125,
|
|
"logps/rejected": -79.60649108886719,
|
|
"loss": 1.3839,
|
|
"margin_dpo/margin_mean": 0.027426093816757202,
|
|
"margin_dpo/margin_std": 0.3451729714870453,
|
|
"step": 39
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.10124588012695312,
|
|
"KL/mean": -0.12174936383962631,
|
|
"KL/rejected_KL_mean": -0.14225387573242188,
|
|
"KL/std": 0.23551294207572937,
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04101024568080902,
|
|
"fcm_dpo/q_t": 0.49897509813308716,
|
|
"grad_norm": 31.32738494873047,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 0.09927191585302353,
|
|
"logits/rejected": 0.05032794922590256,
|
|
"logps/chosen": -61.4354133605957,
|
|
"logps/ref_chosen": -61.33416748046875,
|
|
"logps/ref_rejected": -79.10697174072266,
|
|
"logps/rejected": -79.24922180175781,
|
|
"loss": 1.3825,
|
|
"margin_dpo/margin_mean": 0.041010692715644836,
|
|
"margin_dpo/margin_std": 0.34118902683258057,
|
|
"step": 40
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.12536239624023438,
|
|
"KL/mean": -0.17089422047138214,
|
|
"KL/rejected_KL_mean": -0.2164154052734375,
|
|
"KL/std": 0.2776281237602234,
|
|
"epoch": 0.06198034769463341,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09104952216148376,
|
|
"fcm_dpo/q_t": 0.4977242350578308,
|
|
"grad_norm": 30.495880126953125,
|
|
"learning_rate": 2.985074626865671e-07,
|
|
"logits/chosen": 0.05292656272649765,
|
|
"logits/rejected": 0.03277691453695297,
|
|
"logps/chosen": -67.67208862304688,
|
|
"logps/ref_chosen": -67.5467300415039,
|
|
"logps/ref_rejected": -83.87788391113281,
|
|
"logps/rejected": -84.09429931640625,
|
|
"loss": 1.3775,
|
|
"margin_dpo/margin_mean": 0.09105029702186584,
|
|
"margin_dpo/margin_std": 0.3468291759490967,
|
|
"step": 41
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.15981101989746094,
|
|
"KL/mean": -0.16435763239860535,
|
|
"KL/rejected_KL_mean": -0.16890716552734375,
|
|
"KL/std": 0.27211910486221313,
|
|
"epoch": 0.06349206349206349,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.00909416377544403,
|
|
"fcm_dpo/q_t": 0.4997718334197998,
|
|
"grad_norm": 29.411195755004883,
|
|
"learning_rate": 3.059701492537313e-07,
|
|
"logits/chosen": 0.05215911194682121,
|
|
"logits/rejected": 0.030555259436368942,
|
|
"logps/chosen": -61.42466735839844,
|
|
"logps/ref_chosen": -61.26485824584961,
|
|
"logps/ref_rejected": -76.3629150390625,
|
|
"logps/rejected": -76.53182220458984,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.009094223380088806,
|
|
"margin_dpo/margin_std": 0.36873728036880493,
|
|
"step": 42
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.09221267700195312,
|
|
"KL/mean": -0.10451853275299072,
|
|
"KL/rejected_KL_mean": -0.1168212890625,
|
|
"KL/std": 0.2634015381336212,
|
|
"epoch": 0.06500377928949358,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.02461099624633789,
|
|
"fcm_dpo/q_t": 0.49938422441482544,
|
|
"grad_norm": 34.57097244262695,
|
|
"learning_rate": 3.134328358208955e-07,
|
|
"logits/chosen": 0.07498491555452347,
|
|
"logits/rejected": 0.06423506140708923,
|
|
"logps/chosen": -71.9012451171875,
|
|
"logps/ref_chosen": -71.80902862548828,
|
|
"logps/ref_rejected": -81.12464141845703,
|
|
"logps/rejected": -81.24146270751953,
|
|
"loss": 1.3842,
|
|
"margin_dpo/margin_mean": 0.02461165189743042,
|
|
"margin_dpo/margin_std": 0.36098232865333557,
|
|
"step": 43
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.18452072143554688,
|
|
"KL/mean": -0.16400179266929626,
|
|
"KL/rejected_KL_mean": -0.14348220825195312,
|
|
"KL/std": 0.2962299883365631,
|
|
"epoch": 0.06651549508692366,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.041034042835235596,
|
|
"fcm_dpo/q_t": 0.5010261535644531,
|
|
"grad_norm": 32.7348747253418,
|
|
"learning_rate": 3.2089552238805965e-07,
|
|
"logits/chosen": 0.07599400728940964,
|
|
"logits/rejected": 0.04434208199381828,
|
|
"logps/chosen": -66.73495483398438,
|
|
"logps/ref_chosen": -66.55043029785156,
|
|
"logps/ref_rejected": -85.06198120117188,
|
|
"logps/rejected": -85.20545959472656,
|
|
"loss": 1.3909,
|
|
"margin_dpo/margin_mean": -0.04103401303291321,
|
|
"margin_dpo/margin_std": 0.43350720405578613,
|
|
"step": 44
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.16771316528320312,
|
|
"KL/mean": -0.18838873505592346,
|
|
"KL/rejected_KL_mean": -0.20906448364257812,
|
|
"KL/std": 0.28000783920288086,
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04135562479496002,
|
|
"fcm_dpo/q_t": 0.4989660382270813,
|
|
"grad_norm": 31.9616756439209,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 0.12065313011407852,
|
|
"logits/rejected": 0.0678417906165123,
|
|
"logps/chosen": -62.41156768798828,
|
|
"logps/ref_chosen": -62.24385452270508,
|
|
"logps/ref_rejected": -92.96665954589844,
|
|
"logps/rejected": -93.17572784423828,
|
|
"loss": 1.3826,
|
|
"margin_dpo/margin_mean": 0.04135587811470032,
|
|
"margin_dpo/margin_std": 0.41056525707244873,
|
|
"step": 45
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.11720657348632812,
|
|
"KL/mean": -0.17983010411262512,
|
|
"KL/rejected_KL_mean": -0.24245071411132812,
|
|
"KL/std": 0.3021848797798157,
|
|
"epoch": 0.06953892668178382,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.12523764371871948,
|
|
"fcm_dpo/q_t": 0.496870219707489,
|
|
"grad_norm": 31.18047332763672,
|
|
"learning_rate": 3.3582089552238805e-07,
|
|
"logits/chosen": 0.13696330785751343,
|
|
"logits/rejected": 0.09033239632844925,
|
|
"logps/chosen": -61.616111755371094,
|
|
"logps/ref_chosen": -61.498905181884766,
|
|
"logps/ref_rejected": -78.91172790527344,
|
|
"logps/rejected": -79.1541748046875,
|
|
"loss": 1.3742,
|
|
"margin_dpo/margin_mean": 0.12523746490478516,
|
|
"margin_dpo/margin_std": 0.4021596908569336,
|
|
"step": 46
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.11099815368652344,
|
|
"KL/mean": -0.1947942078113556,
|
|
"KL/rejected_KL_mean": -0.2785911560058594,
|
|
"KL/std": 0.28510791063308716,
|
|
"epoch": 0.0710506424792139,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.16759371757507324,
|
|
"fcm_dpo/q_t": 0.4958115220069885,
|
|
"grad_norm": 28.516759872436523,
|
|
"learning_rate": 3.432835820895522e-07,
|
|
"logits/chosen": 0.017579158768057823,
|
|
"logits/rejected": -0.024237021803855896,
|
|
"logps/chosen": -51.68934631347656,
|
|
"logps/ref_chosen": -51.578346252441406,
|
|
"logps/ref_rejected": -68.2215576171875,
|
|
"logps/rejected": -68.50015258789062,
|
|
"loss": 1.3699,
|
|
"margin_dpo/margin_mean": 0.16759389638900757,
|
|
"margin_dpo/margin_std": 0.3437148332595825,
|
|
"step": 47
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.20029067993164062,
|
|
"KL/mean": -0.24191534519195557,
|
|
"KL/rejected_KL_mean": -0.2835407257080078,
|
|
"KL/std": 0.3233688771724701,
|
|
"epoch": 0.07256235827664399,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08324891328811646,
|
|
"fcm_dpo/q_t": 0.4979211091995239,
|
|
"grad_norm": 26.766143798828125,
|
|
"learning_rate": 3.507462686567164e-07,
|
|
"logits/chosen": 0.1553555279970169,
|
|
"logits/rejected": 0.12541115283966064,
|
|
"logps/chosen": -51.99394226074219,
|
|
"logps/ref_chosen": -51.79365158081055,
|
|
"logps/ref_rejected": -64.22503662109375,
|
|
"logps/rejected": -64.50857543945312,
|
|
"loss": 1.3785,
|
|
"margin_dpo/margin_mean": 0.08324909210205078,
|
|
"margin_dpo/margin_std": 0.45652705430984497,
|
|
"step": 48
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.1995697021484375,
|
|
"KL/mean": -0.2532599866390228,
|
|
"KL/rejected_KL_mean": -0.30695152282714844,
|
|
"KL/std": 0.35652607679367065,
|
|
"epoch": 0.07407407407407407,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.10738348960876465,
|
|
"fcm_dpo/q_t": 0.49731600284576416,
|
|
"grad_norm": 27.212228775024414,
|
|
"learning_rate": 3.5820895522388055e-07,
|
|
"logits/chosen": 0.025986451655626297,
|
|
"logits/rejected": 0.00463508814573288,
|
|
"logps/chosen": -58.334171295166016,
|
|
"logps/ref_chosen": -58.13460159301758,
|
|
"logps/ref_rejected": -64.63206481933594,
|
|
"logps/rejected": -64.93901824951172,
|
|
"loss": 1.3761,
|
|
"margin_dpo/margin_mean": 0.10738390684127808,
|
|
"margin_dpo/margin_std": 0.4513172507286072,
|
|
"step": 49
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.28635406494140625,
|
|
"KL/mean": -0.3434775471687317,
|
|
"KL/rejected_KL_mean": -0.4006004333496094,
|
|
"KL/std": 0.33685302734375,
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.11424615979194641,
|
|
"fcm_dpo/q_t": 0.4971450865268707,
|
|
"grad_norm": 27.976703643798828,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 0.12224933505058289,
|
|
"logits/rejected": 0.09207739681005478,
|
|
"logps/chosen": -53.142791748046875,
|
|
"logps/ref_chosen": -52.85643768310547,
|
|
"logps/ref_rejected": -72.17460632324219,
|
|
"logps/rejected": -72.57520294189453,
|
|
"loss": 1.3752,
|
|
"margin_dpo/margin_mean": 0.1142461895942688,
|
|
"margin_dpo/margin_std": 0.35645347833633423,
|
|
"step": 50
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.28582000732421875,
|
|
"KL/mean": -0.37373238801956177,
|
|
"KL/rejected_KL_mean": -0.4616508483886719,
|
|
"KL/std": 0.37711915373802185,
|
|
"epoch": 0.07709750566893424,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.175826758146286,
|
|
"fcm_dpo/q_t": 0.4956084191799164,
|
|
"grad_norm": 30.997961044311523,
|
|
"learning_rate": 3.7313432835820895e-07,
|
|
"logits/chosen": 0.07733315229415894,
|
|
"logits/rejected": 0.04983144998550415,
|
|
"logps/chosen": -63.9422607421875,
|
|
"logps/ref_chosen": -63.65644073486328,
|
|
"logps/ref_rejected": -86.13229370117188,
|
|
"logps/rejected": -86.59394836425781,
|
|
"loss": 1.3695,
|
|
"margin_dpo/margin_mean": 0.17582657933235168,
|
|
"margin_dpo/margin_std": 0.519061267375946,
|
|
"step": 51
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.30011749267578125,
|
|
"KL/mean": -0.41180944442749023,
|
|
"KL/rejected_KL_mean": -0.5235023498535156,
|
|
"KL/std": 0.42145514488220215,
|
|
"epoch": 0.07860922146636433,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.22337864339351654,
|
|
"fcm_dpo/q_t": 0.4944203495979309,
|
|
"grad_norm": 32.6346321105957,
|
|
"learning_rate": 3.805970149253731e-07,
|
|
"logits/chosen": 0.08316340297460556,
|
|
"logits/rejected": 0.033959146589040756,
|
|
"logps/chosen": -68.14033508300781,
|
|
"logps/ref_chosen": -67.8402099609375,
|
|
"logps/ref_rejected": -96.97090911865234,
|
|
"logps/rejected": -97.49441528320312,
|
|
"loss": 1.3647,
|
|
"margin_dpo/margin_mean": 0.223378524184227,
|
|
"margin_dpo/margin_std": 0.49499744176864624,
|
|
"step": 52
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.3559112548828125,
|
|
"KL/mean": -0.43811851739883423,
|
|
"KL/rejected_KL_mean": -0.52032470703125,
|
|
"KL/std": 0.37847423553466797,
|
|
"epoch": 0.0801209372637944,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.16441354155540466,
|
|
"fcm_dpo/q_t": 0.4958914518356323,
|
|
"grad_norm": 27.523468017578125,
|
|
"learning_rate": 3.880597014925373e-07,
|
|
"logits/chosen": 0.09345749765634537,
|
|
"logits/rejected": 0.08262845873832703,
|
|
"logps/chosen": -57.23405075073242,
|
|
"logps/ref_chosen": -56.87813949584961,
|
|
"logps/ref_rejected": -60.75569152832031,
|
|
"logps/rejected": -61.27601623535156,
|
|
"loss": 1.3704,
|
|
"margin_dpo/margin_mean": 0.16441425681114197,
|
|
"margin_dpo/margin_std": 0.4513196349143982,
|
|
"step": 53
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.3927898406982422,
|
|
"KL/mean": -0.4750269055366516,
|
|
"KL/rejected_KL_mean": -0.5572643280029297,
|
|
"KL/std": 0.4068900942802429,
|
|
"epoch": 0.08163265306122448,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.16447404026985168,
|
|
"fcm_dpo/q_t": 0.49589112401008606,
|
|
"grad_norm": 26.510074615478516,
|
|
"learning_rate": 3.9552238805970144e-07,
|
|
"logits/chosen": 0.05423099547624588,
|
|
"logits/rejected": 0.03896608203649521,
|
|
"logps/chosen": -47.65970993041992,
|
|
"logps/ref_chosen": -47.26692199707031,
|
|
"logps/ref_rejected": -62.19426727294922,
|
|
"logps/rejected": -62.751529693603516,
|
|
"loss": 1.3706,
|
|
"margin_dpo/margin_mean": 0.16447380185127258,
|
|
"margin_dpo/margin_std": 0.5178990364074707,
|
|
"step": 54
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.3732719421386719,
|
|
"KL/mean": -0.5044463276863098,
|
|
"KL/rejected_KL_mean": -0.6356239318847656,
|
|
"KL/std": 0.49312472343444824,
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2623523473739624,
|
|
"fcm_dpo/q_t": 0.49345502257347107,
|
|
"grad_norm": 31.297718048095703,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 0.038510311394929886,
|
|
"logits/rejected": -0.03799784556031227,
|
|
"logps/chosen": -50.699462890625,
|
|
"logps/ref_chosen": -50.32619094848633,
|
|
"logps/ref_rejected": -92.44389343261719,
|
|
"logps/rejected": -93.07951354980469,
|
|
"loss": 1.3614,
|
|
"margin_dpo/margin_mean": 0.26235252618789673,
|
|
"margin_dpo/margin_std": 0.6503203511238098,
|
|
"step": 55
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.3966865539550781,
|
|
"KL/mean": -0.4692612886428833,
|
|
"KL/rejected_KL_mean": -0.5418262481689453,
|
|
"KL/std": 0.48004990816116333,
|
|
"epoch": 0.08465608465608465,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.14514300227165222,
|
|
"fcm_dpo/q_t": 0.496371328830719,
|
|
"grad_norm": 27.65566062927246,
|
|
"learning_rate": 4.1044776119402984e-07,
|
|
"logits/chosen": 0.1349899023771286,
|
|
"logits/rejected": 0.11251580715179443,
|
|
"logps/chosen": -57.163658142089844,
|
|
"logps/ref_chosen": -56.766971588134766,
|
|
"logps/ref_rejected": -66.30504608154297,
|
|
"logps/rejected": -66.84687042236328,
|
|
"loss": 1.3729,
|
|
"margin_dpo/margin_mean": 0.14514318108558655,
|
|
"margin_dpo/margin_std": 0.6373658180236816,
|
|
"step": 56
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.48480796813964844,
|
|
"KL/mean": -0.6359806060791016,
|
|
"KL/rejected_KL_mean": -0.7871551513671875,
|
|
"KL/std": 0.5985706448554993,
|
|
"epoch": 0.08616780045351474,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.30234667658805847,
|
|
"fcm_dpo/q_t": 0.49245190620422363,
|
|
"grad_norm": 30.535959243774414,
|
|
"learning_rate": 4.17910447761194e-07,
|
|
"logits/chosen": 0.11555229127407074,
|
|
"logits/rejected": 0.05037260800600052,
|
|
"logps/chosen": -58.2525520324707,
|
|
"logps/ref_chosen": -57.76774597167969,
|
|
"logps/ref_rejected": -82.75698852539062,
|
|
"logps/rejected": -83.54414367675781,
|
|
"loss": 1.3575,
|
|
"margin_dpo/margin_mean": 0.3023465573787689,
|
|
"margin_dpo/margin_std": 0.6800855994224548,
|
|
"step": 57
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.585968017578125,
|
|
"KL/mean": -0.7032837867736816,
|
|
"KL/rejected_KL_mean": -0.8205986022949219,
|
|
"KL/std": 0.6994506120681763,
|
|
"epoch": 0.08767951625094482,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.23463168740272522,
|
|
"fcm_dpo/q_t": 0.4942084550857544,
|
|
"grad_norm": 30.1091365814209,
|
|
"learning_rate": 4.253731343283582e-07,
|
|
"logits/chosen": 0.055696651339530945,
|
|
"logits/rejected": 0.040357884019613266,
|
|
"logps/chosen": -73.35005187988281,
|
|
"logps/ref_chosen": -72.76408386230469,
|
|
"logps/ref_rejected": -84.49275207519531,
|
|
"logps/rejected": -85.3133544921875,
|
|
"loss": 1.3658,
|
|
"margin_dpo/margin_mean": 0.23463141918182373,
|
|
"margin_dpo/margin_std": 0.992514967918396,
|
|
"step": 58
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.5627365112304688,
|
|
"KL/mean": -0.6970531940460205,
|
|
"KL/rejected_KL_mean": -0.8313713073730469,
|
|
"KL/std": 0.6456717848777771,
|
|
"epoch": 0.08919123204837491,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2686322033405304,
|
|
"fcm_dpo/q_t": 0.49330949783325195,
|
|
"grad_norm": 26.610946655273438,
|
|
"learning_rate": 4.3283582089552234e-07,
|
|
"logits/chosen": 0.13447514176368713,
|
|
"logits/rejected": 0.06816666573286057,
|
|
"logps/chosen": -50.383514404296875,
|
|
"logps/ref_chosen": -49.820777893066406,
|
|
"logps/ref_rejected": -77.14368438720703,
|
|
"logps/rejected": -77.97505187988281,
|
|
"loss": 1.3616,
|
|
"margin_dpo/margin_mean": 0.26863253116607666,
|
|
"margin_dpo/margin_std": 0.7996192574501038,
|
|
"step": 59
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.5796718597412109,
|
|
"KL/mean": -0.638730525970459,
|
|
"KL/rejected_KL_mean": -0.6977901458740234,
|
|
"KL/std": 0.59131920337677,
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.11811408400535583,
|
|
"fcm_dpo/q_t": 0.49705010652542114,
|
|
"grad_norm": 29.352375030517578,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 0.11834181100130081,
|
|
"logits/rejected": 0.11683766543865204,
|
|
"logps/chosen": -63.804443359375,
|
|
"logps/ref_chosen": -63.22477340698242,
|
|
"logps/ref_rejected": -61.360477447509766,
|
|
"logps/rejected": -62.058265686035156,
|
|
"loss": 1.3759,
|
|
"margin_dpo/margin_mean": 0.11811378598213196,
|
|
"margin_dpo/margin_std": 0.7242846488952637,
|
|
"step": 60
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.7418594360351562,
|
|
"KL/mean": -0.7584520578384399,
|
|
"KL/rejected_KL_mean": -0.7750473022460938,
|
|
"KL/std": 0.7113430500030518,
|
|
"epoch": 0.09221466364323508,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03318530321121216,
|
|
"fcm_dpo/q_t": 0.4991641640663147,
|
|
"grad_norm": 28.153261184692383,
|
|
"learning_rate": 4.4776119402985074e-07,
|
|
"logits/chosen": 0.13738226890563965,
|
|
"logits/rejected": 0.10483655333518982,
|
|
"logps/chosen": -49.75865936279297,
|
|
"logps/ref_chosen": -49.01679992675781,
|
|
"logps/ref_rejected": -74.90817260742188,
|
|
"logps/rejected": -75.68321228027344,
|
|
"loss": 1.3853,
|
|
"margin_dpo/margin_mean": 0.03318554162979126,
|
|
"margin_dpo/margin_std": 0.9568428993225098,
|
|
"step": 61
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.7497825622558594,
|
|
"KL/mean": -0.8697815537452698,
|
|
"KL/rejected_KL_mean": -0.9897804260253906,
|
|
"KL/std": 0.646126389503479,
|
|
"epoch": 0.09372637944066516,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.23999658226966858,
|
|
"fcm_dpo/q_t": 0.49401551485061646,
|
|
"grad_norm": 28.551755905151367,
|
|
"learning_rate": 4.552238805970149e-07,
|
|
"logits/chosen": 0.12108489871025085,
|
|
"logits/rejected": 0.08170643448829651,
|
|
"logps/chosen": -63.501651763916016,
|
|
"logps/ref_chosen": -62.751869201660156,
|
|
"logps/ref_rejected": -78.93360900878906,
|
|
"logps/rejected": -79.92338562011719,
|
|
"loss": 1.3645,
|
|
"margin_dpo/margin_mean": 0.239996075630188,
|
|
"margin_dpo/margin_std": 0.9071613550186157,
|
|
"step": 62
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.5743579864501953,
|
|
"KL/mean": -0.8481771945953369,
|
|
"KL/rejected_KL_mean": -1.1219978332519531,
|
|
"KL/std": 0.7021645903587341,
|
|
"epoch": 0.09523809523809523,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5476406812667847,
|
|
"fcm_dpo/q_t": 0.48634016513824463,
|
|
"grad_norm": 31.33037757873535,
|
|
"learning_rate": 4.626865671641791e-07,
|
|
"logits/chosen": 0.19000200927257538,
|
|
"logits/rejected": 0.16499674320220947,
|
|
"logps/chosen": -61.08960723876953,
|
|
"logps/ref_chosen": -60.51525115966797,
|
|
"logps/ref_rejected": -85.11021423339844,
|
|
"logps/rejected": -86.23220825195312,
|
|
"loss": 1.3342,
|
|
"margin_dpo/margin_mean": 0.5476408004760742,
|
|
"margin_dpo/margin_std": 0.8797933459281921,
|
|
"step": 63
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.8081417083740234,
|
|
"KL/mean": -0.8544296026229858,
|
|
"KL/rejected_KL_mean": -0.9007225036621094,
|
|
"KL/std": 0.7138861417770386,
|
|
"epoch": 0.09674981103552532,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0925779640674591,
|
|
"fcm_dpo/q_t": 0.4976833462715149,
|
|
"grad_norm": 26.15093231201172,
|
|
"learning_rate": 4.701492537313433e-07,
|
|
"logits/chosen": 0.1063627153635025,
|
|
"logits/rejected": 0.0809309184551239,
|
|
"logps/chosen": -52.01498794555664,
|
|
"logps/ref_chosen": -51.20684814453125,
|
|
"logps/ref_rejected": -66.93081665039062,
|
|
"logps/rejected": -67.83154296875,
|
|
"loss": 1.3788,
|
|
"margin_dpo/margin_mean": 0.09257814288139343,
|
|
"margin_dpo/margin_std": 0.8439943790435791,
|
|
"step": 64
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.8604145050048828,
|
|
"KL/mean": -1.1185753345489502,
|
|
"KL/rejected_KL_mean": -1.376739501953125,
|
|
"KL/std": 0.8980135917663574,
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.516322135925293,
|
|
"fcm_dpo/q_t": 0.4871528744697571,
|
|
"grad_norm": 30.78108787536621,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 0.19524145126342773,
|
|
"logits/rejected": 0.16560634970664978,
|
|
"logps/chosen": -68.14910888671875,
|
|
"logps/ref_chosen": -67.2886962890625,
|
|
"logps/ref_rejected": -74.44281005859375,
|
|
"logps/rejected": -75.81954956054688,
|
|
"loss": 1.3388,
|
|
"margin_dpo/margin_mean": 0.5163211226463318,
|
|
"margin_dpo/margin_std": 1.1658228635787964,
|
|
"step": 65
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.9204158782958984,
|
|
"KL/mean": -1.0870476961135864,
|
|
"KL/rejected_KL_mean": -1.2536849975585938,
|
|
"KL/std": 0.8283898830413818,
|
|
"epoch": 0.09977324263038549,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.33327189087867737,
|
|
"fcm_dpo/q_t": 0.4916980564594269,
|
|
"grad_norm": 29.318849563598633,
|
|
"learning_rate": 4.850746268656717e-07,
|
|
"logits/chosen": 0.09849925339221954,
|
|
"logits/rejected": 0.07456282526254654,
|
|
"logps/chosen": -71.66382598876953,
|
|
"logps/ref_chosen": -70.743408203125,
|
|
"logps/ref_rejected": -77.26499938964844,
|
|
"logps/rejected": -78.51869201660156,
|
|
"loss": 1.3565,
|
|
"margin_dpo/margin_mean": 0.33327144384384155,
|
|
"margin_dpo/margin_std": 1.1157681941986084,
|
|
"step": 66
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.7900562286376953,
|
|
"KL/mean": -0.9416995048522949,
|
|
"KL/rejected_KL_mean": -1.0933418273925781,
|
|
"KL/std": 0.7619195580482483,
|
|
"epoch": 0.10128495842781557,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3032826781272888,
|
|
"fcm_dpo/q_t": 0.4924405813217163,
|
|
"grad_norm": 29.13656997680664,
|
|
"learning_rate": 4.925373134328357e-07,
|
|
"logits/chosen": 0.09606395661830902,
|
|
"logits/rejected": 0.040146730840206146,
|
|
"logps/chosen": -61.39265823364258,
|
|
"logps/ref_chosen": -60.60260009765625,
|
|
"logps/ref_rejected": -75.22235870361328,
|
|
"logps/rejected": -76.31570434570312,
|
|
"loss": 1.3584,
|
|
"margin_dpo/margin_mean": 0.3032824993133545,
|
|
"margin_dpo/margin_std": 0.9291085004806519,
|
|
"step": 67
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.2312240600585938,
|
|
"KL/mean": -1.395501732826233,
|
|
"KL/rejected_KL_mean": -1.559783935546875,
|
|
"KL/std": 0.9134629964828491,
|
|
"epoch": 0.10279667422524566,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.32855933904647827,
|
|
"fcm_dpo/q_t": 0.49181535840034485,
|
|
"grad_norm": 31.360973358154297,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 0.04667443037033081,
|
|
"logits/rejected": 0.017545577138662338,
|
|
"logps/chosen": -78.75959014892578,
|
|
"logps/ref_chosen": -77.52836608886719,
|
|
"logps/ref_rejected": -93.17778015136719,
|
|
"logps/rejected": -94.73756408691406,
|
|
"loss": 1.3573,
|
|
"margin_dpo/margin_mean": 0.3285592198371887,
|
|
"margin_dpo/margin_std": 1.1761231422424316,
|
|
"step": 68
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.152059555053711,
|
|
"KL/mean": -1.4604136943817139,
|
|
"KL/rejected_KL_mean": -1.7687644958496094,
|
|
"KL/std": 1.0009979009628296,
|
|
"epoch": 0.10430839002267574,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6167032718658447,
|
|
"fcm_dpo/q_t": 0.4846718907356262,
|
|
"grad_norm": 30.895992279052734,
|
|
"learning_rate": 4.999965034812934e-07,
|
|
"logits/chosen": 0.09503303468227386,
|
|
"logits/rejected": 0.05234440043568611,
|
|
"logps/chosen": -67.09510803222656,
|
|
"logps/ref_chosen": -65.94305419921875,
|
|
"logps/ref_rejected": -89.7735595703125,
|
|
"logps/rejected": -91.54232788085938,
|
|
"loss": 1.3293,
|
|
"margin_dpo/margin_mean": 0.6167031526565552,
|
|
"margin_dpo/margin_std": 1.1830682754516602,
|
|
"step": 69
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.2276573181152344,
|
|
"KL/mean": -1.3978652954101562,
|
|
"KL/rejected_KL_mean": -1.5680675506591797,
|
|
"KL/std": 0.8889704942703247,
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3404098153114319,
|
|
"fcm_dpo/q_t": 0.4915243089199066,
|
|
"grad_norm": 28.746898651123047,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 0.15330246090888977,
|
|
"logits/rejected": 0.130038321018219,
|
|
"logps/chosen": -63.185569763183594,
|
|
"logps/ref_chosen": -61.95791244506836,
|
|
"logps/ref_rejected": -75.80945587158203,
|
|
"logps/rejected": -77.37752532958984,
|
|
"loss": 1.356,
|
|
"margin_dpo/margin_mean": 0.3404100239276886,
|
|
"margin_dpo/margin_std": 1.1732439994812012,
|
|
"step": 70
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.4630184173583984,
|
|
"KL/mean": -1.5811960697174072,
|
|
"KL/rejected_KL_mean": -1.6993789672851562,
|
|
"KL/std": 1.024835228919983,
|
|
"epoch": 0.1073318216175359,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.23635895550251007,
|
|
"fcm_dpo/q_t": 0.4940851926803589,
|
|
"grad_norm": 28.53828239440918,
|
|
"learning_rate": 4.999685319184688e-07,
|
|
"logits/chosen": 0.113439179956913,
|
|
"logits/rejected": 0.097498819231987,
|
|
"logps/chosen": -64.81059265136719,
|
|
"logps/ref_chosen": -63.34757995605469,
|
|
"logps/ref_rejected": -67.49658203125,
|
|
"logps/rejected": -69.19596099853516,
|
|
"loss": 1.3682,
|
|
"margin_dpo/margin_mean": 0.23635955154895782,
|
|
"margin_dpo/margin_std": 1.4618197679519653,
|
|
"step": 71
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.2217864990234375,
|
|
"KL/mean": -1.5561046600341797,
|
|
"KL/rejected_KL_mean": -1.8904190063476562,
|
|
"KL/std": 1.0268670320510864,
|
|
"epoch": 0.10884353741496598,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6686317920684814,
|
|
"fcm_dpo/q_t": 0.48336172103881836,
|
|
"grad_norm": 30.480880737304688,
|
|
"learning_rate": 4.999440576567755e-07,
|
|
"logits/chosen": 0.12785610556602478,
|
|
"logits/rejected": 0.06399575620889664,
|
|
"logps/chosen": -57.081085205078125,
|
|
"logps/ref_chosen": -55.85929870605469,
|
|
"logps/ref_rejected": -68.45423889160156,
|
|
"logps/rejected": -70.34466552734375,
|
|
"loss": 1.3246,
|
|
"margin_dpo/margin_mean": 0.6686320304870605,
|
|
"margin_dpo/margin_std": 1.2384132146835327,
|
|
"step": 72
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.8382225036621094,
|
|
"KL/mean": -1.8982160091400146,
|
|
"KL/rejected_KL_mean": -1.9582157135009766,
|
|
"KL/std": 1.252902865409851,
|
|
"epoch": 0.11035525321239607,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.11999374628067017,
|
|
"fcm_dpo/q_t": 0.4971500039100647,
|
|
"grad_norm": 32.18018341064453,
|
|
"learning_rate": 4.999125919224965e-07,
|
|
"logits/chosen": 0.11486872285604477,
|
|
"logits/rejected": 0.10065251588821411,
|
|
"logps/chosen": -70.9770278930664,
|
|
"logps/ref_chosen": -69.13880920410156,
|
|
"logps/ref_rejected": -79.04586791992188,
|
|
"logps/rejected": -81.00408935546875,
|
|
"loss": 1.3809,
|
|
"margin_dpo/margin_mean": 0.11999452114105225,
|
|
"margin_dpo/margin_std": 1.542948603630066,
|
|
"step": 73
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.2243270874023438,
|
|
"KL/mean": -1.5955650806427002,
|
|
"KL/rejected_KL_mean": -1.9668006896972656,
|
|
"KL/std": 1.0309767723083496,
|
|
"epoch": 0.11186696900982615,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7424741983413696,
|
|
"fcm_dpo/q_t": 0.4815741777420044,
|
|
"grad_norm": 27.766450881958008,
|
|
"learning_rate": 4.998741355957963e-07,
|
|
"logits/chosen": 0.13750191032886505,
|
|
"logits/rejected": 0.08446033298969269,
|
|
"logps/chosen": -51.14806365966797,
|
|
"logps/ref_chosen": -49.923736572265625,
|
|
"logps/ref_rejected": -81.73213958740234,
|
|
"logps/rejected": -83.69894409179688,
|
|
"loss": 1.3183,
|
|
"margin_dpo/margin_mean": 0.7424739599227905,
|
|
"margin_dpo/margin_std": 1.3656647205352783,
|
|
"step": 74
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.4066581726074219,
|
|
"KL/mean": -1.7639660835266113,
|
|
"KL/rejected_KL_mean": -2.1212711334228516,
|
|
"KL/std": 1.1110789775848389,
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.714612603187561,
|
|
"fcm_dpo/q_t": 0.48227035999298096,
|
|
"grad_norm": 26.187969207763672,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 0.0853753462433815,
|
|
"logits/rejected": 0.055323123931884766,
|
|
"logps/chosen": -47.47541046142578,
|
|
"logps/ref_chosen": -46.06875228881836,
|
|
"logps/ref_rejected": -66.1181411743164,
|
|
"logps/rejected": -68.23941040039062,
|
|
"loss": 1.3208,
|
|
"margin_dpo/margin_mean": 0.7146125435829163,
|
|
"margin_dpo/margin_std": 1.3723992109298706,
|
|
"step": 75
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.5841503143310547,
|
|
"KL/mean": -1.7442498207092285,
|
|
"KL/rejected_KL_mean": -1.9043540954589844,
|
|
"KL/std": 1.209139108657837,
|
|
"epoch": 0.11489040060468632,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3202054500579834,
|
|
"fcm_dpo/q_t": 0.49202924966812134,
|
|
"grad_norm": 29.322471618652344,
|
|
"learning_rate": 4.997762556634679e-07,
|
|
"logits/chosen": 0.1321137547492981,
|
|
"logits/rejected": 0.0878082737326622,
|
|
"logps/chosen": -55.64690017700195,
|
|
"logps/ref_chosen": -54.06275177001953,
|
|
"logps/ref_rejected": -74.87464141845703,
|
|
"logps/rejected": -76.77899932861328,
|
|
"loss": 1.3608,
|
|
"margin_dpo/margin_mean": 0.32020577788352966,
|
|
"margin_dpo/margin_std": 1.5879979133605957,
|
|
"step": 76
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.5995197296142578,
|
|
"KL/mean": -1.999023675918579,
|
|
"KL/rejected_KL_mean": -2.398529052734375,
|
|
"KL/std": 1.2187130451202393,
|
|
"epoch": 0.1164021164021164,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7990062236785889,
|
|
"fcm_dpo/q_t": 0.48018041253089905,
|
|
"grad_norm": 29.149084091186523,
|
|
"learning_rate": 4.99716834795752e-07,
|
|
"logits/chosen": 0.18161866068840027,
|
|
"logits/rejected": 0.1386856734752655,
|
|
"logps/chosen": -54.67561340332031,
|
|
"logps/ref_chosen": -53.07609176635742,
|
|
"logps/ref_rejected": -74.45601654052734,
|
|
"logps/rejected": -76.85454559326172,
|
|
"loss": 1.3134,
|
|
"margin_dpo/margin_mean": 0.7990065813064575,
|
|
"margin_dpo/margin_std": 1.432692289352417,
|
|
"step": 77
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.8092498779296875,
|
|
"KL/mean": -2.028080463409424,
|
|
"KL/rejected_KL_mean": -2.2469139099121094,
|
|
"KL/std": 1.2629542350769043,
|
|
"epoch": 0.11791383219954649,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.437660276889801,
|
|
"fcm_dpo/q_t": 0.48911595344543457,
|
|
"grad_norm": 29.325321197509766,
|
|
"learning_rate": 4.996504288113623e-07,
|
|
"logits/chosen": 0.11230715364217758,
|
|
"logits/rejected": 0.09202325344085693,
|
|
"logps/chosen": -69.53466796875,
|
|
"logps/ref_chosen": -67.72541809082031,
|
|
"logps/ref_rejected": -79.03926849365234,
|
|
"logps/rejected": -81.28617858886719,
|
|
"loss": 1.348,
|
|
"margin_dpo/margin_mean": 0.437660276889801,
|
|
"margin_dpo/margin_std": 1.4120639562606812,
|
|
"step": 78
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.7188167572021484,
|
|
"KL/mean": -2.289360523223877,
|
|
"KL/rejected_KL_mean": -2.859905242919922,
|
|
"KL/std": 1.5313067436218262,
|
|
"epoch": 0.11942554799697656,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1410882472991943,
|
|
"fcm_dpo/q_t": 0.471996545791626,
|
|
"grad_norm": 30.843666076660156,
|
|
"learning_rate": 4.995770395678171e-07,
|
|
"logits/chosen": 0.18221601843833923,
|
|
"logits/rejected": 0.12115125358104706,
|
|
"logps/chosen": -53.87946319580078,
|
|
"logps/ref_chosen": -52.16064453125,
|
|
"logps/ref_rejected": -83.31062316894531,
|
|
"logps/rejected": -86.17052459716797,
|
|
"loss": 1.2853,
|
|
"margin_dpo/margin_mean": 1.1410883665084839,
|
|
"margin_dpo/margin_std": 1.9968395233154297,
|
|
"step": 79
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.0572662353515625,
|
|
"KL/mean": -2.3389010429382324,
|
|
"KL/rejected_KL_mean": -2.620532989501953,
|
|
"KL/std": 1.4025987386703491,
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5632685422897339,
|
|
"fcm_dpo/q_t": 0.4860914945602417,
|
|
"grad_norm": 28.519983291625977,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 0.18296973407268524,
|
|
"logits/rejected": 0.12012198567390442,
|
|
"logps/chosen": -63.46782684326172,
|
|
"logps/ref_chosen": -61.410560607910156,
|
|
"logps/ref_rejected": -78.66004943847656,
|
|
"logps/rejected": -81.28057861328125,
|
|
"loss": 1.3412,
|
|
"margin_dpo/margin_mean": 0.5632679462432861,
|
|
"margin_dpo/margin_std": 1.9810881614685059,
|
|
"step": 80
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.139486312866211,
|
|
"KL/mean": -2.57236909866333,
|
|
"KL/rejected_KL_mean": -3.0052452087402344,
|
|
"KL/std": 1.448852300643921,
|
|
"epoch": 0.12244897959183673,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8657598495483398,
|
|
"fcm_dpo/q_t": 0.4786525368690491,
|
|
"grad_norm": 29.090269088745117,
|
|
"learning_rate": 4.994093197099587e-07,
|
|
"logits/chosen": 0.13764840364456177,
|
|
"logits/rejected": 0.10329818725585938,
|
|
"logps/chosen": -65.94385528564453,
|
|
"logps/ref_chosen": -63.80437088012695,
|
|
"logps/ref_rejected": -79.3484115600586,
|
|
"logps/rejected": -82.35365295410156,
|
|
"loss": 1.3095,
|
|
"margin_dpo/margin_mean": 0.8657599091529846,
|
|
"margin_dpo/margin_std": 1.7277480363845825,
|
|
"step": 81
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.7470703125,
|
|
"KL/mean": -2.2802882194519043,
|
|
"KL/rejected_KL_mean": -2.813508987426758,
|
|
"KL/std": 1.3716259002685547,
|
|
"epoch": 0.12396069538926682,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.066437005996704,
|
|
"fcm_dpo/q_t": 0.47353169322013855,
|
|
"grad_norm": 26.587879180908203,
|
|
"learning_rate": 4.993149937871306e-07,
|
|
"logits/chosen": 0.1174364686012268,
|
|
"logits/rejected": 0.05398055911064148,
|
|
"logps/chosen": -50.564964294433594,
|
|
"logps/ref_chosen": -48.817893981933594,
|
|
"logps/ref_rejected": -70.31497955322266,
|
|
"logps/rejected": -73.12848663330078,
|
|
"loss": 1.2885,
|
|
"margin_dpo/margin_mean": 1.066436767578125,
|
|
"margin_dpo/margin_std": 1.5313122272491455,
|
|
"step": 82
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.0619564056396484,
|
|
"KL/mean": -2.641958713531494,
|
|
"KL/rejected_KL_mean": -3.2219581604003906,
|
|
"KL/std": 1.4681512117385864,
|
|
"epoch": 0.1254724111866969,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1600065231323242,
|
|
"fcm_dpo/q_t": 0.47136521339416504,
|
|
"grad_norm": 29.170665740966797,
|
|
"learning_rate": 4.992136939879856e-07,
|
|
"logits/chosen": 0.20790967345237732,
|
|
"logits/rejected": 0.15718865394592285,
|
|
"logps/chosen": -59.212730407714844,
|
|
"logps/ref_chosen": -57.15077209472656,
|
|
"logps/ref_rejected": -75.1710205078125,
|
|
"logps/rejected": -78.39298248291016,
|
|
"loss": 1.282,
|
|
"margin_dpo/margin_mean": 1.1600065231323242,
|
|
"margin_dpo/margin_std": 1.8252286911010742,
|
|
"step": 83
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.4618263244628906,
|
|
"KL/mean": -2.9306540489196777,
|
|
"KL/rejected_KL_mean": -3.3994903564453125,
|
|
"KL/std": 1.7998237609863281,
|
|
"epoch": 0.12698412698412698,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9376668930053711,
|
|
"fcm_dpo/q_t": 0.47680941224098206,
|
|
"grad_norm": 30.444793701171875,
|
|
"learning_rate": 4.991054231460969e-07,
|
|
"logits/chosen": 0.16930170357227325,
|
|
"logits/rejected": 0.12721401453018188,
|
|
"logps/chosen": -67.23912811279297,
|
|
"logps/ref_chosen": -64.77729797363281,
|
|
"logps/ref_rejected": -84.71949768066406,
|
|
"logps/rejected": -88.11898803710938,
|
|
"loss": 1.3041,
|
|
"margin_dpo/margin_mean": 0.9376665353775024,
|
|
"margin_dpo/margin_std": 1.9332594871520996,
|
|
"step": 84
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.2590370178222656,
|
|
"KL/mean": -2.8957643508911133,
|
|
"KL/rejected_KL_mean": -3.532489776611328,
|
|
"KL/std": 1.7578613758087158,
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.273453712463379,
|
|
"fcm_dpo/q_t": 0.4685523509979248,
|
|
"grad_norm": 26.91790199279785,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 0.1867402195930481,
|
|
"logits/rejected": 0.14158368110656738,
|
|
"logps/chosen": -52.51073455810547,
|
|
"logps/ref_chosen": -50.25169372558594,
|
|
"logps/ref_rejected": -66.55439758300781,
|
|
"logps/rejected": -70.08688354492188,
|
|
"loss": 1.273,
|
|
"margin_dpo/margin_mean": 1.273453712463379,
|
|
"margin_dpo/margin_std": 1.9371047019958496,
|
|
"step": 85
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.8414573669433594,
|
|
"KL/mean": -3.319411516189575,
|
|
"KL/rejected_KL_mean": -3.79736328125,
|
|
"KL/std": 1.924058437347412,
|
|
"epoch": 0.13000755857898716,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9559066295623779,
|
|
"fcm_dpo/q_t": 0.4763745963573456,
|
|
"grad_norm": 27.2183895111084,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 0.18826769292354584,
|
|
"logits/rejected": 0.1696854829788208,
|
|
"logps/chosen": -63.57063674926758,
|
|
"logps/ref_chosen": -60.72917938232422,
|
|
"logps/ref_rejected": -72.30961608886719,
|
|
"logps/rejected": -76.10697937011719,
|
|
"loss": 1.3038,
|
|
"margin_dpo/margin_mean": 0.9559069871902466,
|
|
"margin_dpo/margin_std": 2.079893112182617,
|
|
"step": 86
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.985912322998047,
|
|
"KL/mean": -3.500776767730713,
|
|
"KL/rejected_KL_mean": -4.0156402587890625,
|
|
"KL/std": 2.0505805015563965,
|
|
"epoch": 0.13151927437641722,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.02973473072052,
|
|
"fcm_dpo/q_t": 0.47460830211639404,
|
|
"grad_norm": 30.657825469970703,
|
|
"learning_rate": 4.987388156241114e-07,
|
|
"logits/chosen": 0.15459910035133362,
|
|
"logits/rejected": 0.09668188542127609,
|
|
"logps/chosen": -68.74388122558594,
|
|
"logps/ref_chosen": -65.75796508789062,
|
|
"logps/ref_rejected": -84.81159973144531,
|
|
"logps/rejected": -88.82723999023438,
|
|
"loss": 1.3014,
|
|
"margin_dpo/margin_mean": 1.0297355651855469,
|
|
"margin_dpo/margin_std": 2.4715027809143066,
|
|
"step": 87
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.0392837524414062,
|
|
"KL/mean": -3.451742649078369,
|
|
"KL/rejected_KL_mean": -3.86419677734375,
|
|
"KL/std": 2.3190979957580566,
|
|
"epoch": 0.1330309901738473,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8249186873435974,
|
|
"fcm_dpo/q_t": 0.4798330068588257,
|
|
"grad_norm": 30.353662490844727,
|
|
"learning_rate": 4.986026928455767e-07,
|
|
"logits/chosen": 0.22335346043109894,
|
|
"logits/rejected": 0.19652315974235535,
|
|
"logps/chosen": -65.86331176757812,
|
|
"logps/ref_chosen": -62.82402801513672,
|
|
"logps/ref_rejected": -74.9607162475586,
|
|
"logps/rejected": -78.82491302490234,
|
|
"loss": 1.3279,
|
|
"margin_dpo/margin_mean": 0.8249189853668213,
|
|
"margin_dpo/margin_std": 2.912872314453125,
|
|
"step": 88
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.5309410095214844,
|
|
"KL/mean": -3.233988046646118,
|
|
"KL/rejected_KL_mean": -3.9370384216308594,
|
|
"KL/std": 2.136443853378296,
|
|
"epoch": 0.1345427059712774,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.4060938358306885,
|
|
"fcm_dpo/q_t": 0.4656294584274292,
|
|
"grad_norm": 29.17704200744629,
|
|
"learning_rate": 4.984596161153135e-07,
|
|
"logits/chosen": 0.26556918025016785,
|
|
"logits/rejected": 0.18124108016490936,
|
|
"logps/chosen": -43.722373962402344,
|
|
"logps/ref_chosen": -41.191436767578125,
|
|
"logps/ref_rejected": -85.44769287109375,
|
|
"logps/rejected": -89.38473510742188,
|
|
"loss": 1.2662,
|
|
"margin_dpo/margin_mean": 1.4060943126678467,
|
|
"margin_dpo/margin_std": 2.436030864715576,
|
|
"step": 89
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.0982589721679688,
|
|
"KL/mean": -3.7379865646362305,
|
|
"KL/rejected_KL_mean": -4.377716064453125,
|
|
"KL/std": 2.4204049110412598,
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.2794535160064697,
|
|
"fcm_dpo/q_t": 0.46857839822769165,
|
|
"grad_norm": 29.27582359313965,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 0.14284776151180267,
|
|
"logits/rejected": 0.08878590166568756,
|
|
"logps/chosen": -59.682167053222656,
|
|
"logps/ref_chosen": -56.58390808105469,
|
|
"logps/ref_rejected": -86.86978149414062,
|
|
"logps/rejected": -91.24749755859375,
|
|
"loss": 1.2781,
|
|
"margin_dpo/margin_mean": 1.2794532775878906,
|
|
"margin_dpo/margin_std": 2.4565696716308594,
|
|
"step": 90
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.1206626892089844,
|
|
"KL/mean": -3.703566074371338,
|
|
"KL/rejected_KL_mean": -4.286468505859375,
|
|
"KL/std": 2.3108620643615723,
|
|
"epoch": 0.13756613756613756,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1658039093017578,
|
|
"fcm_dpo/q_t": 0.4718097448348999,
|
|
"grad_norm": 25.670793533325195,
|
|
"learning_rate": 4.98152617002662e-07,
|
|
"logits/chosen": 0.183273583650589,
|
|
"logits/rejected": 0.1379939168691635,
|
|
"logps/chosen": -55.50300598144531,
|
|
"logps/ref_chosen": -52.38234329223633,
|
|
"logps/ref_rejected": -72.17642211914062,
|
|
"logps/rejected": -76.462890625,
|
|
"loss": 1.2931,
|
|
"margin_dpo/margin_mean": 1.1658039093017578,
|
|
"margin_dpo/margin_std": 2.827420711517334,
|
|
"step": 91
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.2123165130615234,
|
|
"KL/mean": -4.000484466552734,
|
|
"KL/rejected_KL_mean": -4.788658142089844,
|
|
"KL/std": 2.644622802734375,
|
|
"epoch": 0.13907785336356765,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5763365030288696,
|
|
"fcm_dpo/q_t": 0.46193015575408936,
|
|
"grad_norm": 26.96859359741211,
|
|
"learning_rate": 4.979887032076988e-07,
|
|
"logits/chosen": 0.2296270728111267,
|
|
"logits/rejected": 0.1883561909198761,
|
|
"logps/chosen": -56.22101593017578,
|
|
"logps/ref_chosen": -53.00870132446289,
|
|
"logps/ref_rejected": -79.77812957763672,
|
|
"logps/rejected": -84.56678771972656,
|
|
"loss": 1.2593,
|
|
"margin_dpo/margin_mean": 1.576336145401001,
|
|
"margin_dpo/margin_std": 3.1369757652282715,
|
|
"step": 92
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.5621109008789062,
|
|
"KL/mean": -4.066263675689697,
|
|
"KL/rejected_KL_mean": -4.570411682128906,
|
|
"KL/std": 2.607365608215332,
|
|
"epoch": 0.14058956916099774,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0083057880401611,
|
|
"fcm_dpo/q_t": 0.47590065002441406,
|
|
"grad_norm": 24.573469161987305,
|
|
"learning_rate": 4.978178526356172e-07,
|
|
"logits/chosen": 0.19418981671333313,
|
|
"logits/rejected": 0.16540709137916565,
|
|
"logps/chosen": -48.46916198730469,
|
|
"logps/ref_chosen": -44.90705108642578,
|
|
"logps/ref_rejected": -58.7879524230957,
|
|
"logps/rejected": -63.35836410522461,
|
|
"loss": 1.3131,
|
|
"margin_dpo/margin_mean": 1.008305549621582,
|
|
"margin_dpo/margin_std": 3.2077713012695312,
|
|
"step": 93
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.274076461791992,
|
|
"KL/mean": -4.20676851272583,
|
|
"KL/rejected_KL_mean": -5.13946533203125,
|
|
"KL/std": 3.2196216583251953,
|
|
"epoch": 0.1421012849584278,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.865389347076416,
|
|
"fcm_dpo/q_t": 0.4552207291126251,
|
|
"grad_norm": 27.44774055480957,
|
|
"learning_rate": 4.976400700654751e-07,
|
|
"logits/chosen": 0.253595232963562,
|
|
"logits/rejected": 0.2133285403251648,
|
|
"logps/chosen": -63.21184539794922,
|
|
"logps/ref_chosen": -59.93777084350586,
|
|
"logps/ref_rejected": -79.3138427734375,
|
|
"logps/rejected": -84.45330810546875,
|
|
"loss": 1.2472,
|
|
"margin_dpo/margin_mean": 1.8653895854949951,
|
|
"margin_dpo/margin_std": 3.972233295440674,
|
|
"step": 94
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.243429183959961,
|
|
"KL/mean": -5.0223283767700195,
|
|
"KL/rejected_KL_mean": -5.8012237548828125,
|
|
"KL/std": 2.6840476989746094,
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5577924251556396,
|
|
"fcm_dpo/q_t": 0.4623205065727234,
|
|
"grad_norm": 29.382665634155273,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 0.14584560692310333,
|
|
"logits/rejected": 0.08205562084913254,
|
|
"logps/chosen": -64.41191864013672,
|
|
"logps/ref_chosen": -60.168487548828125,
|
|
"logps/ref_rejected": -90.73665618896484,
|
|
"logps/rejected": -96.53787994384766,
|
|
"loss": 1.2645,
|
|
"margin_dpo/margin_mean": 1.5577917098999023,
|
|
"margin_dpo/margin_std": 3.3648838996887207,
|
|
"step": 95
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.170978546142578,
|
|
"KL/mean": -5.111754417419434,
|
|
"KL/rejected_KL_mean": -6.0525360107421875,
|
|
"KL/std": 3.405519962310791,
|
|
"epoch": 0.14512471655328799,
|
|
"fcm_dpo/beta": 0.1004667580127716,
|
|
"fcm_dpo/delta": 0.0929190143942833,
|
|
"fcm_dpo/margin": 1.8815617561340332,
|
|
"fcm_dpo/q_t": 0.4562252461910248,
|
|
"grad_norm": 27.50398063659668,
|
|
"learning_rate": 4.972637290166157e-07,
|
|
"logits/chosen": 0.19873782992362976,
|
|
"logits/rejected": 0.1555817723274231,
|
|
"logps/chosen": -64.83975219726562,
|
|
"logps/ref_chosen": -60.66877746582031,
|
|
"logps/ref_rejected": -88.30673217773438,
|
|
"logps/rejected": -94.35926818847656,
|
|
"loss": 1.2552,
|
|
"margin_dpo/margin_mean": 1.8815608024597168,
|
|
"margin_dpo/margin_std": 4.454163551330566,
|
|
"step": 96
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.948160171508789,
|
|
"KL/mean": -5.476700782775879,
|
|
"KL/rejected_KL_mean": -6.005241394042969,
|
|
"KL/std": 3.6406145095825195,
|
|
"epoch": 0.14663643235071808,
|
|
"fcm_dpo/beta": 0.10093352198600769,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.057085633277893,
|
|
"fcm_dpo/q_t": 0.474324107170105,
|
|
"grad_norm": 36.619651794433594,
|
|
"learning_rate": 4.970651810649666e-07,
|
|
"logits/chosen": 0.14290565252304077,
|
|
"logits/rejected": 0.09900049865245819,
|
|
"logps/chosen": -69.99227905273438,
|
|
"logps/ref_chosen": -65.04412078857422,
|
|
"logps/ref_rejected": -78.42092895507812,
|
|
"logps/rejected": -84.4261703491211,
|
|
"loss": 1.3432,
|
|
"margin_dpo/margin_mean": 1.0570858716964722,
|
|
"margin_dpo/margin_std": 4.968430995941162,
|
|
"step": 97
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.596563339233398,
|
|
"KL/mean": -4.943195343017578,
|
|
"KL/rejected_KL_mean": -5.289825439453125,
|
|
"KL/std": 3.2688210010528564,
|
|
"epoch": 0.14814814814814814,
|
|
"fcm_dpo/beta": 0.10093352198600769,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6932592391967773,
|
|
"fcm_dpo/q_t": 0.48246508836746216,
|
|
"grad_norm": 30.675567626953125,
|
|
"learning_rate": 4.968597221690985e-07,
|
|
"logits/chosen": 0.25428539514541626,
|
|
"logits/rejected": 0.22620899975299835,
|
|
"logps/chosen": -60.09979248046875,
|
|
"logps/ref_chosen": -55.503231048583984,
|
|
"logps/ref_rejected": -72.81553649902344,
|
|
"logps/rejected": -78.10536193847656,
|
|
"loss": 1.3448,
|
|
"margin_dpo/margin_mean": 0.6932586431503296,
|
|
"margin_dpo/margin_std": 3.2555806636810303,
|
|
"step": 98
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.628164291381836,
|
|
"KL/mean": -5.147042751312256,
|
|
"KL/rejected_KL_mean": -5.665924072265625,
|
|
"KL/std": 3.482905626296997,
|
|
"epoch": 0.14965986394557823,
|
|
"fcm_dpo/beta": 0.10093352198600769,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0377589464187622,
|
|
"fcm_dpo/q_t": 0.47516801953315735,
|
|
"grad_norm": 32.826751708984375,
|
|
"learning_rate": 4.966473580761389e-07,
|
|
"logits/chosen": 0.24727635085582733,
|
|
"logits/rejected": 0.2075592279434204,
|
|
"logps/chosen": -63.20380401611328,
|
|
"logps/ref_chosen": -58.57563781738281,
|
|
"logps/ref_rejected": -78.693603515625,
|
|
"logps/rejected": -84.35952758789062,
|
|
"loss": 1.3357,
|
|
"margin_dpo/margin_mean": 1.037758469581604,
|
|
"margin_dpo/margin_std": 4.538744926452637,
|
|
"step": 99
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.31182861328125,
|
|
"KL/mean": -6.015585899353027,
|
|
"KL/rejected_KL_mean": -6.719341278076172,
|
|
"KL/std": 4.146139144897461,
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 0.10093352198600769,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.407517433166504,
|
|
"fcm_dpo/q_t": 0.46836134791374207,
|
|
"grad_norm": 33.68187713623047,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 0.20970787107944489,
|
|
"logits/rejected": 0.20217393338680267,
|
|
"logps/chosen": -84.895263671875,
|
|
"logps/ref_chosen": -79.58343505859375,
|
|
"logps/ref_rejected": -92.152587890625,
|
|
"logps/rejected": -98.8719253540039,
|
|
"loss": 1.3362,
|
|
"margin_dpo/margin_mean": 1.407517910003662,
|
|
"margin_dpo/margin_std": 5.832108497619629,
|
|
"step": 100
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.249458312988281,
|
|
"KL/mean": -5.285594940185547,
|
|
"KL/rejected_KL_mean": -6.321733474731445,
|
|
"KL/std": 3.651992082595825,
|
|
"epoch": 0.15268329554043839,
|
|
"fcm_dpo/beta": 0.1013248860836029,
|
|
"fcm_dpo/delta": 0.07724865525960922,
|
|
"fcm_dpo/margin": 2.0722780227661133,
|
|
"fcm_dpo/q_t": 0.4495403468608856,
|
|
"grad_norm": 26.05370330810547,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": 0.24619242548942566,
|
|
"logits/rejected": 0.20622767508029938,
|
|
"logps/chosen": -56.582244873046875,
|
|
"logps/ref_chosen": -52.332786560058594,
|
|
"logps/ref_rejected": -69.55589294433594,
|
|
"logps/rejected": -75.87763214111328,
|
|
"loss": 1.2313,
|
|
"margin_dpo/margin_mean": 2.072277545928955,
|
|
"margin_dpo/margin_std": 4.133984565734863,
|
|
"step": 101
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.286844253540039,
|
|
"KL/mean": -5.713034629821777,
|
|
"KL/rejected_KL_mean": -6.139228820800781,
|
|
"KL/std": 3.665177822113037,
|
|
"epoch": 0.15419501133786848,
|
|
"fcm_dpo/beta": 0.10171624273061752,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8523828983306885,
|
|
"fcm_dpo/q_t": 0.48202332854270935,
|
|
"grad_norm": 33.209964752197266,
|
|
"learning_rate": 4.959688949822748e-07,
|
|
"logits/chosen": 0.16377325356006622,
|
|
"logits/rejected": 0.12484108656644821,
|
|
"logps/chosen": -70.03032684326172,
|
|
"logps/ref_chosen": -64.74348449707031,
|
|
"logps/ref_rejected": -69.06132507324219,
|
|
"logps/rejected": -75.2005615234375,
|
|
"loss": 1.3722,
|
|
"margin_dpo/margin_mean": 0.8523826599121094,
|
|
"margin_dpo/margin_std": 5.390408039093018,
|
|
"step": 102
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.834880828857422,
|
|
"KL/mean": -5.6018266677856445,
|
|
"KL/rejected_KL_mean": -6.368770599365234,
|
|
"KL/std": 3.9508004188537598,
|
|
"epoch": 0.15570672713529857,
|
|
"fcm_dpo/beta": 0.10171624273061752,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.533888816833496,
|
|
"fcm_dpo/q_t": 0.462660551071167,
|
|
"grad_norm": 30.056943893432617,
|
|
"learning_rate": 4.957289714327572e-07,
|
|
"logits/chosen": 0.26205867528915405,
|
|
"logits/rejected": 0.22815784811973572,
|
|
"logps/chosen": -68.67152404785156,
|
|
"logps/ref_chosen": -63.83664321899414,
|
|
"logps/ref_rejected": -79.32362365722656,
|
|
"logps/rejected": -85.69239807128906,
|
|
"loss": 1.2721,
|
|
"margin_dpo/margin_mean": 1.533888578414917,
|
|
"margin_dpo/margin_std": 3.7162890434265137,
|
|
"step": 103
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.70960807800293,
|
|
"KL/mean": -5.844485282897949,
|
|
"KL/rejected_KL_mean": -6.9793701171875,
|
|
"KL/std": 4.977723121643066,
|
|
"epoch": 0.15721844293272866,
|
|
"fcm_dpo/beta": 0.10203637182712555,
|
|
"fcm_dpo/delta": 0.06274870783090591,
|
|
"fcm_dpo/margin": 2.269758462905884,
|
|
"fcm_dpo/q_t": 0.4507865607738495,
|
|
"grad_norm": 31.535688400268555,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 0.2682804465293884,
|
|
"logits/rejected": 0.1793670356273651,
|
|
"logps/chosen": -65.70881652832031,
|
|
"logps/ref_chosen": -60.99920654296875,
|
|
"logps/ref_rejected": -98.84645080566406,
|
|
"logps/rejected": -105.82582092285156,
|
|
"loss": 1.2646,
|
|
"margin_dpo/margin_mean": 2.2697577476501465,
|
|
"margin_dpo/margin_std": 6.315805912017822,
|
|
"step": 104
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.341209411621094,
|
|
"KL/mean": -6.223649978637695,
|
|
"KL/rejected_KL_mean": -7.106090545654297,
|
|
"KL/std": 3.9617457389831543,
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 0.10235650837421417,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.7648828029632568,
|
|
"fcm_dpo/q_t": 0.4587235152721405,
|
|
"grad_norm": 31.592397689819336,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 0.18700921535491943,
|
|
"logits/rejected": 0.1328585147857666,
|
|
"logps/chosen": -76.29147338867188,
|
|
"logps/ref_chosen": -70.95027160644531,
|
|
"logps/ref_rejected": -87.88340759277344,
|
|
"logps/rejected": -94.989501953125,
|
|
"loss": 1.2747,
|
|
"margin_dpo/margin_mean": 1.7648820877075195,
|
|
"margin_dpo/margin_std": 4.891418933868408,
|
|
"step": 105
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.016611099243164,
|
|
"KL/mean": -5.786171913146973,
|
|
"KL/rejected_KL_mean": -6.555728912353516,
|
|
"KL/std": 3.899449348449707,
|
|
"epoch": 0.1602418745275888,
|
|
"fcm_dpo/beta": 0.10235650837421417,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5391156673431396,
|
|
"fcm_dpo/q_t": 0.4623813033103943,
|
|
"grad_norm": 31.88960838317871,
|
|
"learning_rate": 4.949679871846857e-07,
|
|
"logits/chosen": 0.24787446856498718,
|
|
"logits/rejected": 0.23365378379821777,
|
|
"logps/chosen": -67.47594451904297,
|
|
"logps/ref_chosen": -62.45933151245117,
|
|
"logps/ref_rejected": -67.00595092773438,
|
|
"logps/rejected": -73.56167602539062,
|
|
"loss": 1.3064,
|
|
"margin_dpo/margin_mean": 1.5391154289245605,
|
|
"margin_dpo/margin_std": 5.316427230834961,
|
|
"step": 106
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.450595855712891,
|
|
"KL/mean": -6.885671138763428,
|
|
"KL/rejected_KL_mean": -7.320747375488281,
|
|
"KL/std": 4.911082744598389,
|
|
"epoch": 0.1617535903250189,
|
|
"fcm_dpo/beta": 0.10235650837421417,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8701525926589966,
|
|
"fcm_dpo/q_t": 0.4771697223186493,
|
|
"grad_norm": 43.05903244018555,
|
|
"learning_rate": 4.947006115536947e-07,
|
|
"logits/chosen": 0.18717175722122192,
|
|
"logits/rejected": 0.16289639472961426,
|
|
"logps/chosen": -82.28855895996094,
|
|
"logps/ref_chosen": -75.83796691894531,
|
|
"logps/ref_rejected": -87.74038696289062,
|
|
"logps/rejected": -95.06112670898438,
|
|
"loss": 1.3957,
|
|
"margin_dpo/margin_mean": 0.8701522946357727,
|
|
"margin_dpo/margin_std": 6.188943862915039,
|
|
"step": 107
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.454765319824219,
|
|
"KL/mean": -6.393495559692383,
|
|
"KL/rejected_KL_mean": -7.332218170166016,
|
|
"KL/std": 4.075876235961914,
|
|
"epoch": 0.16326530612244897,
|
|
"fcm_dpo/beta": 0.10235650837421417,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.8774504661560059,
|
|
"fcm_dpo/q_t": 0.45611995458602905,
|
|
"grad_norm": 29.322237014770508,
|
|
"learning_rate": 4.944263911205772e-07,
|
|
"logits/chosen": 0.1697622388601303,
|
|
"logits/rejected": 0.1390148103237152,
|
|
"logps/chosen": -73.84800720214844,
|
|
"logps/ref_chosen": -68.39323425292969,
|
|
"logps/ref_rejected": -83.24267578125,
|
|
"logps/rejected": -90.57489013671875,
|
|
"loss": 1.2765,
|
|
"margin_dpo/margin_mean": 1.877450704574585,
|
|
"margin_dpo/margin_std": 5.375058650970459,
|
|
"step": 108
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.853111267089844,
|
|
"KL/mean": -6.346570014953613,
|
|
"KL/rejected_KL_mean": -7.840030670166016,
|
|
"KL/std": 4.951888084411621,
|
|
"epoch": 0.16477702191987906,
|
|
"fcm_dpo/beta": 0.103099524974823,
|
|
"fcm_dpo/delta": 0.09346499294042587,
|
|
"fcm_dpo/margin": 2.9869184494018555,
|
|
"fcm_dpo/q_t": 0.43255865573883057,
|
|
"grad_norm": 28.842710494995117,
|
|
"learning_rate": 4.941453335558681e-07,
|
|
"logits/chosen": 0.17274720966815948,
|
|
"logits/rejected": 0.11982676386833191,
|
|
"logps/chosen": -60.380592346191406,
|
|
"logps/ref_chosen": -55.52748107910156,
|
|
"logps/ref_rejected": -83.55218505859375,
|
|
"logps/rejected": -91.39221954345703,
|
|
"loss": 1.1694,
|
|
"margin_dpo/margin_mean": 2.9869184494018555,
|
|
"margin_dpo/margin_std": 5.36320686340332,
|
|
"step": 109
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.247074127197266,
|
|
"KL/mean": -6.370278835296631,
|
|
"KL/rejected_KL_mean": -6.493480682373047,
|
|
"KL/std": 4.0468854904174805,
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 0.10331767052412033,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.24640044569969177,
|
|
"fcm_dpo/q_t": 0.4917522668838501,
|
|
"grad_norm": 45.9508056640625,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 0.17314262688159943,
|
|
"logits/rejected": 0.18239563703536987,
|
|
"logps/chosen": -87.40582275390625,
|
|
"logps/ref_chosen": -81.15874481201172,
|
|
"logps/ref_rejected": -72.56021118164062,
|
|
"logps/rejected": -79.05369567871094,
|
|
"loss": 1.4345,
|
|
"margin_dpo/margin_mean": 0.2464001178741455,
|
|
"margin_dpo/margin_std": 5.418988227844238,
|
|
"step": 110
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.624446868896484,
|
|
"KL/mean": -6.6341352462768555,
|
|
"KL/rejected_KL_mean": -7.643825531005859,
|
|
"KL/std": 4.291510105133057,
|
|
"epoch": 0.16780045351473924,
|
|
"fcm_dpo/beta": 0.10375148057937622,
|
|
"fcm_dpo/delta": 0.08362551033496857,
|
|
"fcm_dpo/margin": 2.019381284713745,
|
|
"fcm_dpo/q_t": 0.4531843066215515,
|
|
"grad_norm": 26.552736282348633,
|
|
"learning_rate": 4.935627386698418e-07,
|
|
"logits/chosen": 0.2836073338985443,
|
|
"logits/rejected": 0.24599884450435638,
|
|
"logps/chosen": -57.983428955078125,
|
|
"logps/ref_chosen": -52.358985900878906,
|
|
"logps/ref_rejected": -77.06150817871094,
|
|
"logps/rejected": -84.70532989501953,
|
|
"loss": 1.2563,
|
|
"margin_dpo/margin_mean": 2.019381523132324,
|
|
"margin_dpo/margin_std": 5.18630313873291,
|
|
"step": 111
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.328195571899414,
|
|
"KL/mean": -6.536831855773926,
|
|
"KL/rejected_KL_mean": -7.7454681396484375,
|
|
"KL/std": 4.255629539489746,
|
|
"epoch": 0.1693121693121693,
|
|
"fcm_dpo/beta": 0.1053951308131218,
|
|
"fcm_dpo/delta": 0.14701415598392487,
|
|
"fcm_dpo/margin": 2.4172747135162354,
|
|
"fcm_dpo/q_t": 0.4403117597103119,
|
|
"grad_norm": 33.79671859741211,
|
|
"learning_rate": 4.932612176449559e-07,
|
|
"logits/chosen": 0.18986698985099792,
|
|
"logits/rejected": 0.12487274408340454,
|
|
"logps/chosen": -68.34825897216797,
|
|
"logps/ref_chosen": -63.02006530761719,
|
|
"logps/ref_rejected": -111.36941528320312,
|
|
"logps/rejected": -119.11488342285156,
|
|
"loss": 1.2066,
|
|
"margin_dpo/margin_mean": 2.417274236679077,
|
|
"margin_dpo/margin_std": 4.706630229949951,
|
|
"step": 112
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.327106475830078,
|
|
"KL/mean": -5.9943695068359375,
|
|
"KL/rejected_KL_mean": -6.661628723144531,
|
|
"KL/std": 3.567197799682617,
|
|
"epoch": 0.1708238851095994,
|
|
"fcm_dpo/beta": 0.10572827607393265,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.3345184326171875,
|
|
"fcm_dpo/q_t": 0.4660409092903137,
|
|
"grad_norm": 38.02436828613281,
|
|
"learning_rate": 4.929528920808854e-07,
|
|
"logits/chosen": 0.22416824102401733,
|
|
"logits/rejected": 0.18370281159877777,
|
|
"logps/chosen": -61.134769439697266,
|
|
"logps/ref_chosen": -55.80766296386719,
|
|
"logps/ref_rejected": -69.84014129638672,
|
|
"logps/rejected": -76.50177001953125,
|
|
"loss": 1.3078,
|
|
"margin_dpo/margin_mean": 1.334518551826477,
|
|
"margin_dpo/margin_std": 4.543076515197754,
|
|
"step": 113
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.011161804199219,
|
|
"KL/mean": -6.181464195251465,
|
|
"KL/rejected_KL_mean": -7.351772308349609,
|
|
"KL/std": 3.8884191513061523,
|
|
"epoch": 0.17233560090702948,
|
|
"fcm_dpo/beta": 0.10687725245952606,
|
|
"fcm_dpo/delta": 0.15181350708007812,
|
|
"fcm_dpo/margin": 2.340602397918701,
|
|
"fcm_dpo/q_t": 0.44304314255714417,
|
|
"grad_norm": 31.40993881225586,
|
|
"learning_rate": 4.92637770602159e-07,
|
|
"logits/chosen": 0.23694977164268494,
|
|
"logits/rejected": 0.17697958648204803,
|
|
"logps/chosen": -71.34393310546875,
|
|
"logps/ref_chosen": -66.33277130126953,
|
|
"logps/ref_rejected": -71.61489868164062,
|
|
"logps/rejected": -78.96666717529297,
|
|
"loss": 1.211,
|
|
"margin_dpo/margin_mean": 2.3406028747558594,
|
|
"margin_dpo/margin_std": 4.686898231506348,
|
|
"step": 114
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.412651062011719,
|
|
"KL/mean": -6.370328903198242,
|
|
"KL/rejected_KL_mean": -7.328008651733398,
|
|
"KL/std": 3.5519421100616455,
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 0.10825435817241669,
|
|
"fcm_dpo/delta": 0.08429938554763794,
|
|
"fcm_dpo/margin": 1.9153550863265991,
|
|
"fcm_dpo/q_t": 0.45180806517601013,
|
|
"grad_norm": 30.687053680419922,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 0.25733867287635803,
|
|
"logits/rejected": 0.19262482225894928,
|
|
"logps/chosen": -61.16168975830078,
|
|
"logps/ref_chosen": -55.74903869628906,
|
|
"logps/ref_rejected": -79.59849548339844,
|
|
"logps/rejected": -86.92650604248047,
|
|
"loss": 1.2394,
|
|
"margin_dpo/margin_mean": 1.9153550863265991,
|
|
"margin_dpo/margin_std": 4.179965019226074,
|
|
"step": 115
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.132673263549805,
|
|
"KL/mean": -6.544622421264648,
|
|
"KL/rejected_KL_mean": -7.956573486328125,
|
|
"KL/std": 3.6682581901550293,
|
|
"epoch": 0.17535903250188964,
|
|
"fcm_dpo/beta": 0.10918746143579483,
|
|
"fcm_dpo/delta": 0.09284006059169769,
|
|
"fcm_dpo/margin": 2.8239059448242188,
|
|
"fcm_dpo/q_t": 0.4272872805595398,
|
|
"grad_norm": 27.67125701904297,
|
|
"learning_rate": 4.91987175349089e-07,
|
|
"logits/chosen": 0.21911393105983734,
|
|
"logits/rejected": 0.1517239660024643,
|
|
"logps/chosen": -54.497840881347656,
|
|
"logps/ref_chosen": -49.36516571044922,
|
|
"logps/ref_rejected": -72.84671020507812,
|
|
"logps/rejected": -80.80328369140625,
|
|
"loss": 1.1479,
|
|
"margin_dpo/margin_mean": 2.823906421661377,
|
|
"margin_dpo/margin_std": 3.975419044494629,
|
|
"step": 116
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.114622116088867,
|
|
"KL/mean": -6.045646667480469,
|
|
"KL/rejected_KL_mean": -6.976673126220703,
|
|
"KL/std": 3.4384045600891113,
|
|
"epoch": 0.17687074829931973,
|
|
"fcm_dpo/beta": 0.10940033197402954,
|
|
"fcm_dpo/delta": 0.012463591992855072,
|
|
"fcm_dpo/margin": 1.8620429039001465,
|
|
"fcm_dpo/q_t": 0.4515898823738098,
|
|
"grad_norm": 30.863542556762695,
|
|
"learning_rate": 4.916517197732933e-07,
|
|
"logits/chosen": 0.23854109644889832,
|
|
"logits/rejected": 0.20172467827796936,
|
|
"logps/chosen": -62.82551956176758,
|
|
"logps/ref_chosen": -57.710899353027344,
|
|
"logps/ref_rejected": -69.77253723144531,
|
|
"logps/rejected": -76.74920654296875,
|
|
"loss": 1.2529,
|
|
"margin_dpo/margin_mean": 1.8620433807373047,
|
|
"margin_dpo/margin_std": 4.300942897796631,
|
|
"step": 117
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.922035217285156,
|
|
"KL/mean": -6.207369804382324,
|
|
"KL/rejected_KL_mean": -7.492698669433594,
|
|
"KL/std": 3.757565498352051,
|
|
"epoch": 0.17838246409674982,
|
|
"fcm_dpo/beta": 0.11024913191795349,
|
|
"fcm_dpo/delta": 0.1183442771434784,
|
|
"fcm_dpo/margin": 2.5706686973571777,
|
|
"fcm_dpo/q_t": 0.43307480216026306,
|
|
"grad_norm": 30.435558319091797,
|
|
"learning_rate": 4.913095046794281e-07,
|
|
"logits/chosen": 0.3092609643936157,
|
|
"logits/rejected": 0.26714813709259033,
|
|
"logps/chosen": -57.40193176269531,
|
|
"logps/ref_chosen": -52.479896545410156,
|
|
"logps/ref_rejected": -81.359130859375,
|
|
"logps/rejected": -88.85182189941406,
|
|
"loss": 1.1672,
|
|
"margin_dpo/margin_mean": 2.5706682205200195,
|
|
"margin_dpo/margin_std": 3.851738929748535,
|
|
"step": 118
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.317985534667969,
|
|
"KL/mean": -7.205575942993164,
|
|
"KL/rejected_KL_mean": -8.09316635131836,
|
|
"KL/std": 4.350160121917725,
|
|
"epoch": 0.17989417989417988,
|
|
"fcm_dpo/beta": 0.11111591756343842,
|
|
"fcm_dpo/delta": 0.07437408715486526,
|
|
"fcm_dpo/margin": 1.775181531906128,
|
|
"fcm_dpo/q_t": 0.45529645681381226,
|
|
"grad_norm": 32.99928665161133,
|
|
"learning_rate": 4.909605396399855e-07,
|
|
"logits/chosen": 0.22920635342597961,
|
|
"logits/rejected": 0.18934877216815948,
|
|
"logps/chosen": -67.6756591796875,
|
|
"logps/ref_chosen": -61.35767364501953,
|
|
"logps/ref_rejected": -75.71510314941406,
|
|
"logps/rejected": -83.80826568603516,
|
|
"loss": 1.283,
|
|
"margin_dpo/margin_mean": 1.7751821279525757,
|
|
"margin_dpo/margin_std": 5.408249855041504,
|
|
"step": 119
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.1925811767578125,
|
|
"KL/mean": -6.578503608703613,
|
|
"KL/rejected_KL_mean": -7.964427947998047,
|
|
"KL/std": 3.708730936050415,
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 0.1123550608754158,
|
|
"fcm_dpo/delta": 0.08989394456148148,
|
|
"fcm_dpo/margin": 2.771848440170288,
|
|
"fcm_dpo/q_t": 0.4271494150161743,
|
|
"grad_norm": 31.620399475097656,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 0.19706860184669495,
|
|
"logits/rejected": 0.1373499035835266,
|
|
"logps/chosen": -65.10015106201172,
|
|
"logps/ref_chosen": -59.907569885253906,
|
|
"logps/ref_rejected": -79.6910629272461,
|
|
"logps/rejected": -87.65548706054688,
|
|
"loss": 1.1431,
|
|
"margin_dpo/margin_mean": 2.771848440170288,
|
|
"margin_dpo/margin_std": 3.778219223022461,
|
|
"step": 120
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.717071533203125,
|
|
"KL/mean": -7.17664098739624,
|
|
"KL/rejected_KL_mean": -8.636207580566406,
|
|
"KL/std": 4.332400321960449,
|
|
"epoch": 0.18291761148904007,
|
|
"fcm_dpo/beta": 0.11306065320968628,
|
|
"fcm_dpo/delta": 0.07116404175758362,
|
|
"fcm_dpo/margin": 2.9191367626190186,
|
|
"fcm_dpo/q_t": 0.4271777272224426,
|
|
"grad_norm": 29.542659759521484,
|
|
"learning_rate": 4.902423989581143e-07,
|
|
"logits/chosen": 0.32054561376571655,
|
|
"logits/rejected": 0.23324054479599,
|
|
"logps/chosen": -61.383113861083984,
|
|
"logps/ref_chosen": -55.66604232788086,
|
|
"logps/ref_rejected": -101.56233978271484,
|
|
"logps/rejected": -110.19854736328125,
|
|
"loss": 1.1498,
|
|
"margin_dpo/margin_mean": 2.9191365242004395,
|
|
"margin_dpo/margin_std": 4.834861755371094,
|
|
"step": 121
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.173484802246094,
|
|
"KL/mean": -7.516587734222412,
|
|
"KL/rejected_KL_mean": -8.859687805175781,
|
|
"KL/std": 4.608508586883545,
|
|
"epoch": 0.18442932728647016,
|
|
"fcm_dpo/beta": 0.11428318917751312,
|
|
"fcm_dpo/delta": 0.09428457915782928,
|
|
"fcm_dpo/margin": 2.6862008571624756,
|
|
"fcm_dpo/q_t": 0.43481603264808655,
|
|
"grad_norm": 34.92070007324219,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 0.2274283468723297,
|
|
"logits/rejected": 0.1917957067489624,
|
|
"logps/chosen": -69.50785827636719,
|
|
"logps/ref_chosen": -63.334373474121094,
|
|
"logps/ref_rejected": -73.67523193359375,
|
|
"logps/rejected": -82.53492736816406,
|
|
"loss": 1.1942,
|
|
"margin_dpo/margin_mean": 2.6862008571624756,
|
|
"margin_dpo/margin_std": 5.642106056213379,
|
|
"step": 122
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -5.9217681884765625,
|
|
"KL/mean": -7.191183567047119,
|
|
"KL/rejected_KL_mean": -8.460601806640625,
|
|
"KL/std": 4.03624153137207,
|
|
"epoch": 0.18594104308390022,
|
|
"fcm_dpo/beta": 0.11543236672878265,
|
|
"fcm_dpo/delta": 0.1084226667881012,
|
|
"fcm_dpo/margin": 2.538832187652588,
|
|
"fcm_dpo/q_t": 0.43287205696105957,
|
|
"grad_norm": 30.712724685668945,
|
|
"learning_rate": 4.894973780788722e-07,
|
|
"logits/chosen": 0.24583575129508972,
|
|
"logits/rejected": 0.20430734753608704,
|
|
"logps/chosen": -62.82051086425781,
|
|
"logps/ref_chosen": -56.89874267578125,
|
|
"logps/ref_rejected": -78.97028350830078,
|
|
"logps/rejected": -87.4308853149414,
|
|
"loss": 1.1743,
|
|
"margin_dpo/margin_mean": 2.538831949234009,
|
|
"margin_dpo/margin_std": 4.3621063232421875,
|
|
"step": 123
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.242483139038086,
|
|
"KL/mean": -8.041402816772461,
|
|
"KL/rejected_KL_mean": -9.840328216552734,
|
|
"KL/std": 4.452130317687988,
|
|
"epoch": 0.1874527588813303,
|
|
"fcm_dpo/beta": 0.11543804407119751,
|
|
"fcm_dpo/delta": -0.015687942504882812,
|
|
"fcm_dpo/margin": 3.5978426933288574,
|
|
"fcm_dpo/q_t": 0.4053274989128113,
|
|
"grad_norm": 30.56203269958496,
|
|
"learning_rate": 4.89114813497619e-07,
|
|
"logits/chosen": 0.26211291551589966,
|
|
"logits/rejected": 0.2005375623703003,
|
|
"logps/chosen": -63.35856628417969,
|
|
"logps/ref_chosen": -57.116085052490234,
|
|
"logps/ref_rejected": -87.93074035644531,
|
|
"logps/rejected": -97.77107238769531,
|
|
"loss": 1.0827,
|
|
"margin_dpo/margin_mean": 3.597841739654541,
|
|
"margin_dpo/margin_std": 4.737724781036377,
|
|
"step": 124
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.713808059692383,
|
|
"KL/mean": -8.287500381469727,
|
|
"KL/rejected_KL_mean": -9.861190795898438,
|
|
"KL/std": 4.656261920928955,
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 0.115517258644104,
|
|
"fcm_dpo/delta": 0.03694707155227661,
|
|
"fcm_dpo/margin": 3.147381067276001,
|
|
"fcm_dpo/q_t": 0.4181087613105774,
|
|
"grad_norm": 32.7068977355957,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 0.290385901927948,
|
|
"logits/rejected": 0.22929246723651886,
|
|
"logps/chosen": -72.41998291015625,
|
|
"logps/ref_chosen": -65.7061767578125,
|
|
"logps/ref_rejected": -91.72711944580078,
|
|
"logps/rejected": -101.58831024169922,
|
|
"loss": 1.1283,
|
|
"margin_dpo/margin_mean": 3.147380828857422,
|
|
"margin_dpo/margin_std": 4.8141021728515625,
|
|
"step": 125
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.38752555847168,
|
|
"KL/mean": -7.263304710388184,
|
|
"KL/rejected_KL_mean": -8.139087677001953,
|
|
"KL/std": 4.334300994873047,
|
|
"epoch": 0.19047619047619047,
|
|
"fcm_dpo/beta": 0.11643783003091812,
|
|
"fcm_dpo/delta": 0.09117639064788818,
|
|
"fcm_dpo/margin": 1.751561164855957,
|
|
"fcm_dpo/q_t": 0.45305609703063965,
|
|
"grad_norm": 33.06489944458008,
|
|
"learning_rate": 4.883296295573176e-07,
|
|
"logits/chosen": 0.09684337675571442,
|
|
"logits/rejected": 0.0911843329668045,
|
|
"logps/chosen": -74.56361389160156,
|
|
"logps/ref_chosen": -68.17608642578125,
|
|
"logps/ref_rejected": -65.1175537109375,
|
|
"logps/rejected": -73.25663757324219,
|
|
"loss": 1.2614,
|
|
"margin_dpo/margin_mean": 1.751561164855957,
|
|
"margin_dpo/margin_std": 4.63372802734375,
|
|
"step": 126
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.5929718017578125,
|
|
"KL/mean": -8.221467971801758,
|
|
"KL/rejected_KL_mean": -9.849967956542969,
|
|
"KL/std": 3.88958740234375,
|
|
"epoch": 0.19198790627362056,
|
|
"fcm_dpo/beta": 0.11684253811836243,
|
|
"fcm_dpo/delta": 0.01960890367627144,
|
|
"fcm_dpo/margin": 3.2569966316223145,
|
|
"fcm_dpo/q_t": 0.4116781949996948,
|
|
"grad_norm": 30.322776794433594,
|
|
"learning_rate": 4.87927032161552e-07,
|
|
"logits/chosen": 0.19757479429244995,
|
|
"logits/rejected": 0.16764190793037415,
|
|
"logps/chosen": -68.47320556640625,
|
|
"logps/ref_chosen": -61.88023376464844,
|
|
"logps/ref_rejected": -68.46012878417969,
|
|
"logps/rejected": -78.31008911132812,
|
|
"loss": 1.0994,
|
|
"margin_dpo/margin_mean": 3.2569963932037354,
|
|
"margin_dpo/margin_std": 4.116855144500732,
|
|
"step": 127
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -7.130456924438477,
|
|
"KL/mean": -8.872931480407715,
|
|
"KL/rejected_KL_mean": -10.615409851074219,
|
|
"KL/std": 5.264225959777832,
|
|
"epoch": 0.19349962207105065,
|
|
"fcm_dpo/beta": 0.11692208051681519,
|
|
"fcm_dpo/delta": -0.007765013724565506,
|
|
"fcm_dpo/margin": 3.484951972961426,
|
|
"fcm_dpo/q_t": 0.41211092472076416,
|
|
"grad_norm": 31.57915687561035,
|
|
"learning_rate": 4.875177794352363e-07,
|
|
"logits/chosen": 0.2635500133037567,
|
|
"logits/rejected": 0.20409663021564484,
|
|
"logps/chosen": -73.83943939208984,
|
|
"logps/ref_chosen": -66.708984375,
|
|
"logps/ref_rejected": -94.97969055175781,
|
|
"logps/rejected": -105.59510040283203,
|
|
"loss": 1.1447,
|
|
"margin_dpo/margin_mean": 3.484952211380005,
|
|
"margin_dpo/margin_std": 6.370296478271484,
|
|
"step": 128
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -8.001535415649414,
|
|
"KL/mean": -9.024829864501953,
|
|
"KL/rejected_KL_mean": -10.04812240600586,
|
|
"KL/std": 4.746467590332031,
|
|
"epoch": 0.19501133786848074,
|
|
"fcm_dpo/beta": 0.11855511367321014,
|
|
"fcm_dpo/delta": 0.15923556685447693,
|
|
"fcm_dpo/margin": 2.0465922355651855,
|
|
"fcm_dpo/q_t": 0.4452970027923584,
|
|
"grad_norm": 41.03274154663086,
|
|
"learning_rate": 4.871018828260491e-07,
|
|
"logits/chosen": 0.23353171348571777,
|
|
"logits/rejected": 0.22759993374347687,
|
|
"logps/chosen": -73.34036254882812,
|
|
"logps/ref_chosen": -65.33882904052734,
|
|
"logps/ref_rejected": -68.06109619140625,
|
|
"logps/rejected": -78.10921478271484,
|
|
"loss": 1.2634,
|
|
"margin_dpo/margin_mean": 2.0465919971466064,
|
|
"margin_dpo/margin_std": 5.672805309295654,
|
|
"step": 129
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -7.560056686401367,
|
|
"KL/mean": -9.167448043823242,
|
|
"KL/rejected_KL_mean": -10.774837493896484,
|
|
"KL/std": 4.7286200523376465,
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 0.11883430927991867,
|
|
"fcm_dpo/delta": 0.018089991062879562,
|
|
"fcm_dpo/margin": 3.2147793769836426,
|
|
"fcm_dpo/q_t": 0.4126940369606018,
|
|
"grad_norm": 35.749507904052734,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 0.19802308082580566,
|
|
"logits/rejected": 0.1460653394460678,
|
|
"logps/chosen": -66.2208023071289,
|
|
"logps/ref_chosen": -58.660743713378906,
|
|
"logps/ref_rejected": -79.24510192871094,
|
|
"logps/rejected": -90.01994323730469,
|
|
"loss": 1.1175,
|
|
"margin_dpo/margin_mean": 3.214779853820801,
|
|
"margin_dpo/margin_std": 4.750698089599609,
|
|
"step": 130
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -7.219947814941406,
|
|
"KL/mean": -9.312915802001953,
|
|
"KL/rejected_KL_mean": -11.405887603759766,
|
|
"KL/std": 5.4851226806640625,
|
|
"epoch": 0.1980347694633409,
|
|
"fcm_dpo/beta": 0.11819542944431305,
|
|
"fcm_dpo/delta": -0.09734071046113968,
|
|
"fcm_dpo/margin": 4.185941696166992,
|
|
"fcm_dpo/q_t": 0.3932657837867737,
|
|
"grad_norm": 29.757938385009766,
|
|
"learning_rate": 4.86250204678667e-07,
|
|
"logits/chosen": 0.21579259634017944,
|
|
"logits/rejected": 0.14932771027088165,
|
|
"logps/chosen": -59.73448181152344,
|
|
"logps/ref_chosen": -52.51453399658203,
|
|
"logps/ref_rejected": -85.18299865722656,
|
|
"logps/rejected": -96.58888244628906,
|
|
"loss": 1.0717,
|
|
"margin_dpo/margin_mean": 4.185941696166992,
|
|
"margin_dpo/margin_std": 6.140892028808594,
|
|
"step": 131
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -7.943172454833984,
|
|
"KL/mean": -9.690788269042969,
|
|
"KL/rejected_KL_mean": -11.43840217590332,
|
|
"KL/std": 5.216340065002441,
|
|
"epoch": 0.19954648526077098,
|
|
"fcm_dpo/beta": 0.11792843043804169,
|
|
"fcm_dpo/delta": -0.012444393709301949,
|
|
"fcm_dpo/margin": 3.495227336883545,
|
|
"fcm_dpo/q_t": 0.4039040803909302,
|
|
"grad_norm": 31.720706939697266,
|
|
"learning_rate": 4.858144469637408e-07,
|
|
"logits/chosen": 0.279098778963089,
|
|
"logits/rejected": 0.24664485454559326,
|
|
"logps/chosen": -73.62830352783203,
|
|
"logps/ref_chosen": -65.68513488769531,
|
|
"logps/ref_rejected": -69.54120635986328,
|
|
"logps/rejected": -80.9796142578125,
|
|
"loss": 1.0998,
|
|
"margin_dpo/margin_mean": 3.495227813720703,
|
|
"margin_dpo/margin_std": 5.031069278717041,
|
|
"step": 132
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -8.388786315917969,
|
|
"KL/mean": -9.769898414611816,
|
|
"KL/rejected_KL_mean": -11.151012420654297,
|
|
"KL/std": 5.134222030639648,
|
|
"epoch": 0.20105820105820105,
|
|
"fcm_dpo/beta": 0.11857283115386963,
|
|
"fcm_dpo/delta": 0.07366795837879181,
|
|
"fcm_dpo/margin": 2.7622251510620117,
|
|
"fcm_dpo/q_t": 0.4260021448135376,
|
|
"grad_norm": 37.483551025390625,
|
|
"learning_rate": 4.853720930118138e-07,
|
|
"logits/chosen": 0.19250085949897766,
|
|
"logits/rejected": 0.18288807570934296,
|
|
"logps/chosen": -71.98690032958984,
|
|
"logps/ref_chosen": -63.598114013671875,
|
|
"logps/ref_rejected": -73.72798156738281,
|
|
"logps/rejected": -84.87899780273438,
|
|
"loss": 1.1877,
|
|
"margin_dpo/margin_mean": 2.7622251510620117,
|
|
"margin_dpo/margin_std": 5.574526309967041,
|
|
"step": 133
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -7.781076431274414,
|
|
"KL/mean": -10.149148941040039,
|
|
"KL/rejected_KL_mean": -12.517219543457031,
|
|
"KL/std": 5.914173126220703,
|
|
"epoch": 0.20256991685563114,
|
|
"fcm_dpo/beta": 0.11740978062152863,
|
|
"fcm_dpo/delta": -0.16047140955924988,
|
|
"fcm_dpo/margin": 4.736141204833984,
|
|
"fcm_dpo/q_t": 0.3786366581916809,
|
|
"grad_norm": 29.212146759033203,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": 0.30975794792175293,
|
|
"logits/rejected": 0.2520185708999634,
|
|
"logps/chosen": -61.575645446777344,
|
|
"logps/ref_chosen": -53.79457092285156,
|
|
"logps/ref_rejected": -74.16741943359375,
|
|
"logps/rejected": -86.68463897705078,
|
|
"loss": 1.015,
|
|
"margin_dpo/margin_mean": 4.736141681671143,
|
|
"margin_dpo/margin_std": 6.00445556640625,
|
|
"step": 134
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -7.842548370361328,
|
|
"KL/mean": -9.333450317382812,
|
|
"KL/rejected_KL_mean": -10.824359893798828,
|
|
"KL/std": 5.478485107421875,
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 0.11740288138389587,
|
|
"fcm_dpo/delta": 0.050766605883836746,
|
|
"fcm_dpo/margin": 2.981807231903076,
|
|
"fcm_dpo/q_t": 0.4210662245750427,
|
|
"grad_norm": 29.386869430541992,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 0.2893332242965698,
|
|
"logits/rejected": 0.2552085518836975,
|
|
"logps/chosen": -57.28362274169922,
|
|
"logps/ref_chosen": -49.441078186035156,
|
|
"logps/ref_rejected": -65.96878051757812,
|
|
"logps/rejected": -76.79313659667969,
|
|
"loss": 1.1678,
|
|
"margin_dpo/margin_mean": 2.981806993484497,
|
|
"margin_dpo/margin_std": 5.6487040519714355,
|
|
"step": 135
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -9.717037200927734,
|
|
"KL/mean": -11.659026145935059,
|
|
"KL/rejected_KL_mean": -13.601020812988281,
|
|
"KL/std": 6.137080192565918,
|
|
"epoch": 0.20559334845049132,
|
|
"fcm_dpo/beta": 0.11668679118156433,
|
|
"fcm_dpo/delta": -0.054873041808605194,
|
|
"fcm_dpo/margin": 3.883988618850708,
|
|
"fcm_dpo/q_t": 0.40512531995773315,
|
|
"grad_norm": 36.174842834472656,
|
|
"learning_rate": 4.840055783904106e-07,
|
|
"logits/chosen": 0.24782195687294006,
|
|
"logits/rejected": 0.17235609889030457,
|
|
"logps/chosen": -76.47630310058594,
|
|
"logps/ref_chosen": -66.75926208496094,
|
|
"logps/ref_rejected": -94.61787414550781,
|
|
"logps/rejected": -108.2188949584961,
|
|
"loss": 1.1633,
|
|
"margin_dpo/margin_mean": 3.8839893341064453,
|
|
"margin_dpo/margin_std": 7.67537784576416,
|
|
"step": 136
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -9.004161834716797,
|
|
"KL/mean": -11.004858016967773,
|
|
"KL/rejected_KL_mean": -13.00555419921875,
|
|
"KL/std": 5.812673091888428,
|
|
"epoch": 0.20710506424792138,
|
|
"fcm_dpo/beta": 0.11650143563747406,
|
|
"fcm_dpo/delta": -0.0677945464849472,
|
|
"fcm_dpo/margin": 4.001392364501953,
|
|
"fcm_dpo/q_t": 0.398629367351532,
|
|
"grad_norm": 29.692665100097656,
|
|
"learning_rate": 4.835369650662767e-07,
|
|
"logits/chosen": 0.2612660527229309,
|
|
"logits/rejected": 0.23181885480880737,
|
|
"logps/chosen": -65.7879638671875,
|
|
"logps/ref_chosen": -56.78379821777344,
|
|
"logps/ref_rejected": -69.89952087402344,
|
|
"logps/rejected": -82.90507507324219,
|
|
"loss": 1.0898,
|
|
"margin_dpo/margin_mean": 4.001392364501953,
|
|
"margin_dpo/margin_std": 6.17383337020874,
|
|
"step": 137
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -10.656959533691406,
|
|
"KL/mean": -12.375408172607422,
|
|
"KL/rejected_KL_mean": -14.093864440917969,
|
|
"KL/std": 6.01499080657959,
|
|
"epoch": 0.20861678004535147,
|
|
"fcm_dpo/beta": 0.11605371534824371,
|
|
"fcm_dpo/delta": 0.0011580323334783316,
|
|
"fcm_dpo/margin": 3.4369006156921387,
|
|
"fcm_dpo/q_t": 0.4111366868019104,
|
|
"grad_norm": 32.066978454589844,
|
|
"learning_rate": 4.830618192112065e-07,
|
|
"logits/chosen": 0.2572246193885803,
|
|
"logits/rejected": 0.2206147015094757,
|
|
"logps/chosen": -69.4229736328125,
|
|
"logps/ref_chosen": -58.766014099121094,
|
|
"logps/ref_rejected": -68.12371826171875,
|
|
"logps/rejected": -82.21758270263672,
|
|
"loss": 1.1716,
|
|
"margin_dpo/margin_mean": 3.4369001388549805,
|
|
"margin_dpo/margin_std": 6.844020843505859,
|
|
"step": 138
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.175987243652344,
|
|
"KL/mean": -13.195121765136719,
|
|
"KL/rejected_KL_mean": -15.214263916015625,
|
|
"KL/std": 5.84468412399292,
|
|
"epoch": 0.21012849584278157,
|
|
"fcm_dpo/beta": 0.11551456153392792,
|
|
"fcm_dpo/delta": -0.06804588437080383,
|
|
"fcm_dpo/margin": 4.038267135620117,
|
|
"fcm_dpo/q_t": 0.3957091271877289,
|
|
"grad_norm": 34.502567291259766,
|
|
"learning_rate": 4.825801541160509e-07,
|
|
"logits/chosen": 0.21503251791000366,
|
|
"logits/rejected": 0.18497824668884277,
|
|
"logps/chosen": -82.40158081054688,
|
|
"logps/ref_chosen": -71.2255859375,
|
|
"logps/ref_rejected": -82.1834716796875,
|
|
"logps/rejected": -97.39773559570312,
|
|
"loss": 1.0802,
|
|
"margin_dpo/margin_mean": 4.038267135620117,
|
|
"margin_dpo/margin_std": 5.930106163024902,
|
|
"step": 139
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -10.20223617553711,
|
|
"KL/mean": -13.006385803222656,
|
|
"KL/rejected_KL_mean": -15.810531616210938,
|
|
"KL/std": 7.805213928222656,
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 0.1130751371383667,
|
|
"fcm_dpo/delta": -0.24216951429843903,
|
|
"fcm_dpo/margin": 5.608301162719727,
|
|
"fcm_dpo/q_t": 0.3683100938796997,
|
|
"grad_norm": 35.70717239379883,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 0.23811732232570648,
|
|
"logits/rejected": 0.18922963738441467,
|
|
"logps/chosen": -73.47989654541016,
|
|
"logps/ref_chosen": -63.27766418457031,
|
|
"logps/ref_rejected": -83.30647277832031,
|
|
"logps/rejected": -99.11700439453125,
|
|
"loss": 1.0622,
|
|
"margin_dpo/margin_mean": 5.608301162719727,
|
|
"margin_dpo/margin_std": 8.777132034301758,
|
|
"step": 140
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.293815612792969,
|
|
"KL/mean": -13.853639602661133,
|
|
"KL/rejected_KL_mean": -16.413467407226562,
|
|
"KL/std": 6.5127787590026855,
|
|
"epoch": 0.21315192743764172,
|
|
"fcm_dpo/beta": 0.110617995262146,
|
|
"fcm_dpo/delta": -0.17256447672843933,
|
|
"fcm_dpo/margin": 5.1196489334106445,
|
|
"fcm_dpo/q_t": 0.3834548890590668,
|
|
"grad_norm": 32.43185806274414,
|
|
"learning_rate": 4.815973202802966e-07,
|
|
"logits/chosen": 0.283972829580307,
|
|
"logits/rejected": 0.23658016324043274,
|
|
"logps/chosen": -73.06058502197266,
|
|
"logps/ref_chosen": -61.76676940917969,
|
|
"logps/ref_rejected": -88.60601806640625,
|
|
"logps/rejected": -105.01948547363281,
|
|
"loss": 1.0822,
|
|
"margin_dpo/margin_mean": 5.1196489334106445,
|
|
"margin_dpo/margin_std": 8.20843505859375,
|
|
"step": 141
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -10.983600616455078,
|
|
"KL/mean": -13.09145736694336,
|
|
"KL/rejected_KL_mean": -15.199317932128906,
|
|
"KL/std": 6.306560516357422,
|
|
"epoch": 0.2146636432350718,
|
|
"fcm_dpo/beta": 0.10995540022850037,
|
|
"fcm_dpo/delta": -0.06513672322034836,
|
|
"fcm_dpo/margin": 4.215723991394043,
|
|
"fcm_dpo/q_t": 0.3978484570980072,
|
|
"grad_norm": 30.0311336517334,
|
|
"learning_rate": 4.810961790316729e-07,
|
|
"logits/chosen": 0.26272276043891907,
|
|
"logits/rejected": 0.23739004135131836,
|
|
"logps/chosen": -76.25837707519531,
|
|
"logps/ref_chosen": -65.2747802734375,
|
|
"logps/ref_rejected": -81.1378173828125,
|
|
"logps/rejected": -96.3371353149414,
|
|
"loss": 1.0905,
|
|
"margin_dpo/margin_mean": 4.215723991394043,
|
|
"margin_dpo/margin_std": 6.492724418640137,
|
|
"step": 142
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.772371292114258,
|
|
"KL/mean": -13.561727523803711,
|
|
"KL/rejected_KL_mean": -15.351081848144531,
|
|
"KL/std": 7.431132793426514,
|
|
"epoch": 0.2161753590325019,
|
|
"fcm_dpo/beta": 0.10999909043312073,
|
|
"fcm_dpo/delta": 0.0064488742500543594,
|
|
"fcm_dpo/margin": 3.5787086486816406,
|
|
"fcm_dpo/q_t": 0.41158154606819153,
|
|
"grad_norm": 42.05965805053711,
|
|
"learning_rate": 4.805885735261454e-07,
|
|
"logits/chosen": 0.28689733147621155,
|
|
"logits/rejected": 0.26905614137649536,
|
|
"logps/chosen": -74.39019775390625,
|
|
"logps/ref_chosen": -62.617828369140625,
|
|
"logps/ref_rejected": -70.39239501953125,
|
|
"logps/rejected": -85.74346923828125,
|
|
"loss": 1.2344,
|
|
"margin_dpo/margin_mean": 3.5787086486816406,
|
|
"margin_dpo/margin_std": 8.725080490112305,
|
|
"step": 143
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.96786117553711,
|
|
"KL/mean": -14.297059059143066,
|
|
"KL/rejected_KL_mean": -16.626251220703125,
|
|
"KL/std": 7.43912410736084,
|
|
"epoch": 0.21768707482993196,
|
|
"fcm_dpo/beta": 0.1087704747915268,
|
|
"fcm_dpo/delta": -0.10999403148889542,
|
|
"fcm_dpo/margin": 4.658395767211914,
|
|
"fcm_dpo/q_t": 0.3969062268733978,
|
|
"grad_norm": 32.87115478515625,
|
|
"learning_rate": 4.800745179625307e-07,
|
|
"logits/chosen": 0.27016690373420715,
|
|
"logits/rejected": 0.23740969598293304,
|
|
"logps/chosen": -72.77055358886719,
|
|
"logps/ref_chosen": -60.80268859863281,
|
|
"logps/ref_rejected": -79.07284545898438,
|
|
"logps/rejected": -95.6990966796875,
|
|
"loss": 1.1308,
|
|
"margin_dpo/margin_mean": 4.658395767211914,
|
|
"margin_dpo/margin_std": 8.569746971130371,
|
|
"step": 144
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.111255645751953,
|
|
"KL/mean": -13.578890800476074,
|
|
"KL/rejected_KL_mean": -16.046524047851562,
|
|
"KL/std": 7.551879405975342,
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 0.10756328701972961,
|
|
"fcm_dpo/delta": -0.13456054031848907,
|
|
"fcm_dpo/margin": 4.93527889251709,
|
|
"fcm_dpo/q_t": 0.3875006139278412,
|
|
"grad_norm": 36.411136627197266,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 0.2210046797990799,
|
|
"logits/rejected": 0.23701998591423035,
|
|
"logps/chosen": -85.72271728515625,
|
|
"logps/ref_chosen": -74.61146545410156,
|
|
"logps/ref_rejected": -83.24461364746094,
|
|
"logps/rejected": -99.2911376953125,
|
|
"loss": 1.1271,
|
|
"margin_dpo/margin_mean": 4.93527889251709,
|
|
"margin_dpo/margin_std": 9.065813064575195,
|
|
"step": 145
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -10.803302764892578,
|
|
"KL/mean": -13.2725191116333,
|
|
"KL/rejected_KL_mean": -15.74173355102539,
|
|
"KL/std": 7.354823112487793,
|
|
"epoch": 0.22071050642479215,
|
|
"fcm_dpo/beta": 0.10643485188484192,
|
|
"fcm_dpo/delta": -0.12890943884849548,
|
|
"fcm_dpo/margin": 4.9384307861328125,
|
|
"fcm_dpo/q_t": 0.38945895433425903,
|
|
"grad_norm": 31.73560905456543,
|
|
"learning_rate": 4.790271143580173e-07,
|
|
"logits/chosen": 0.207697793841362,
|
|
"logits/rejected": 0.19365434348583221,
|
|
"logps/chosen": -68.644287109375,
|
|
"logps/ref_chosen": -57.84098434448242,
|
|
"logps/ref_rejected": -67.47422790527344,
|
|
"logps/rejected": -83.21595764160156,
|
|
"loss": 1.0949,
|
|
"margin_dpo/margin_mean": 4.9384307861328125,
|
|
"margin_dpo/margin_std": 8.135544776916504,
|
|
"step": 146
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.445867538452148,
|
|
"KL/mean": -14.43240737915039,
|
|
"KL/rejected_KL_mean": -16.41895294189453,
|
|
"KL/std": 7.903876781463623,
|
|
"epoch": 0.2222222222222222,
|
|
"fcm_dpo/beta": 0.10576187074184418,
|
|
"fcm_dpo/delta": -0.020632311701774597,
|
|
"fcm_dpo/margin": 3.9730844497680664,
|
|
"fcm_dpo/q_t": 0.40969616174697876,
|
|
"grad_norm": 38.59626388549805,
|
|
"learning_rate": 4.784937956152489e-07,
|
|
"logits/chosen": 0.25783491134643555,
|
|
"logits/rejected": 0.21288403868675232,
|
|
"logps/chosen": -79.25933837890625,
|
|
"logps/ref_chosen": -66.81346893310547,
|
|
"logps/ref_rejected": -81.1796875,
|
|
"logps/rejected": -97.59864807128906,
|
|
"loss": 1.1888,
|
|
"margin_dpo/margin_mean": 3.9730851650238037,
|
|
"margin_dpo/margin_std": 8.535297393798828,
|
|
"step": 147
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.264463424682617,
|
|
"KL/mean": -13.848915100097656,
|
|
"KL/rejected_KL_mean": -16.433364868164062,
|
|
"KL/std": 7.326770782470703,
|
|
"epoch": 0.2237339380196523,
|
|
"fcm_dpo/beta": 0.1047644168138504,
|
|
"fcm_dpo/delta": -0.14532725512981415,
|
|
"fcm_dpo/margin": 5.168903350830078,
|
|
"fcm_dpo/q_t": 0.383495569229126,
|
|
"grad_norm": 25.93487548828125,
|
|
"learning_rate": 4.779540854098347e-07,
|
|
"logits/chosen": 0.37720394134521484,
|
|
"logits/rejected": 0.3021549582481384,
|
|
"logps/chosen": -59.95221710205078,
|
|
"logps/ref_chosen": -48.6877555847168,
|
|
"logps/ref_rejected": -67.50503540039062,
|
|
"logps/rejected": -83.93840026855469,
|
|
"loss": 1.0811,
|
|
"margin_dpo/margin_mean": 5.1689043045043945,
|
|
"margin_dpo/margin_std": 8.112524032592773,
|
|
"step": 148
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.620237350463867,
|
|
"KL/mean": -16.485435485839844,
|
|
"KL/rejected_KL_mean": -19.350631713867188,
|
|
"KL/std": 7.7701005935668945,
|
|
"epoch": 0.2252456538170824,
|
|
"fcm_dpo/beta": 0.10273667424917221,
|
|
"fcm_dpo/delta": -0.19433575868606567,
|
|
"fcm_dpo/margin": 5.730397701263428,
|
|
"fcm_dpo/q_t": 0.3712954819202423,
|
|
"grad_norm": 27.19110870361328,
|
|
"learning_rate": 4.774079988386296e-07,
|
|
"logits/chosen": 0.239324152469635,
|
|
"logits/rejected": 0.1915998011827469,
|
|
"logps/chosen": -68.7640151977539,
|
|
"logps/ref_chosen": -55.143775939941406,
|
|
"logps/ref_rejected": -64.79888916015625,
|
|
"logps/rejected": -84.14952087402344,
|
|
"loss": 1.0065,
|
|
"margin_dpo/margin_mean": 5.7303972244262695,
|
|
"margin_dpo/margin_std": 7.3015971183776855,
|
|
"step": 149
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.457605361938477,
|
|
"KL/mean": -15.221206665039062,
|
|
"KL/rejected_KL_mean": -18.98480224609375,
|
|
"KL/std": 8.49504280090332,
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 0.0993858277797699,
|
|
"fcm_dpo/delta": -0.3618656396865845,
|
|
"fcm_dpo/margin": 7.527195930480957,
|
|
"fcm_dpo/q_t": 0.3462602496147156,
|
|
"grad_norm": 26.44401741027832,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 0.2652711868286133,
|
|
"logits/rejected": 0.21960538625717163,
|
|
"logps/chosen": -78.92835235595703,
|
|
"logps/ref_chosen": -67.47074890136719,
|
|
"logps/ref_rejected": -89.21170806884766,
|
|
"logps/rejected": -108.1965103149414,
|
|
"loss": 0.9367,
|
|
"margin_dpo/margin_mean": 7.527196407318115,
|
|
"margin_dpo/margin_std": 8.762171745300293,
|
|
"step": 150
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -9.854202270507812,
|
|
"KL/mean": -14.096034049987793,
|
|
"KL/rejected_KL_mean": -18.337867736816406,
|
|
"KL/std": 9.082784652709961,
|
|
"epoch": 0.22826908541194255,
|
|
"fcm_dpo/beta": 0.09548157453536987,
|
|
"fcm_dpo/delta": -0.4272121787071228,
|
|
"fcm_dpo/margin": 8.483661651611328,
|
|
"fcm_dpo/q_t": 0.3325921893119812,
|
|
"grad_norm": 23.73583984375,
|
|
"learning_rate": 4.762967578776406e-07,
|
|
"logits/chosen": 0.2599151134490967,
|
|
"logits/rejected": 0.20441243052482605,
|
|
"logps/chosen": -62.313743591308594,
|
|
"logps/ref_chosen": -52.45954132080078,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -97.40087890625,
|
|
"loss": 0.8842,
|
|
"margin_dpo/margin_mean": 8.483660697937012,
|
|
"margin_dpo/margin_std": 9.148792266845703,
|
|
"step": 151
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.627342224121094,
|
|
"KL/mean": -15.364206314086914,
|
|
"KL/rejected_KL_mean": -18.101070404052734,
|
|
"KL/std": 8.539461135864258,
|
|
"epoch": 0.22978080120937264,
|
|
"fcm_dpo/beta": 0.09393717348575592,
|
|
"fcm_dpo/delta": -0.11732172966003418,
|
|
"fcm_dpo/margin": 5.473731994628906,
|
|
"fcm_dpo/q_t": 0.3932981789112091,
|
|
"grad_norm": 27.975107192993164,
|
|
"learning_rate": 4.757316345716553e-07,
|
|
"logits/chosen": 0.33348941802978516,
|
|
"logits/rejected": 0.2787918150424957,
|
|
"logps/chosen": -69.1811752319336,
|
|
"logps/ref_chosen": -56.5538330078125,
|
|
"logps/ref_rejected": -76.55074310302734,
|
|
"logps/rejected": -94.65180969238281,
|
|
"loss": 1.1259,
|
|
"margin_dpo/margin_mean": 5.473731994628906,
|
|
"margin_dpo/margin_std": 9.714651107788086,
|
|
"step": 152
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.781326293945312,
|
|
"KL/mean": -14.979900360107422,
|
|
"KL/rejected_KL_mean": -18.17847442626953,
|
|
"KL/std": 8.806741714477539,
|
|
"epoch": 0.23129251700680273,
|
|
"fcm_dpo/beta": 0.091956228017807,
|
|
"fcm_dpo/delta": -0.19399213790893555,
|
|
"fcm_dpo/margin": 6.397145748138428,
|
|
"fcm_dpo/q_t": 0.37217292189598083,
|
|
"grad_norm": 25.705537796020508,
|
|
"learning_rate": 4.751601970666064e-07,
|
|
"logits/chosen": 0.21460071206092834,
|
|
"logits/rejected": 0.17887841165065765,
|
|
"logps/chosen": -79.78822326660156,
|
|
"logps/ref_chosen": -68.00689697265625,
|
|
"logps/ref_rejected": -74.83482360839844,
|
|
"logps/rejected": -93.01329803466797,
|
|
"loss": 1.0114,
|
|
"margin_dpo/margin_mean": 6.397146224975586,
|
|
"margin_dpo/margin_std": 8.284008026123047,
|
|
"step": 153
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.869159698486328,
|
|
"KL/mean": -15.23004150390625,
|
|
"KL/rejected_KL_mean": -17.590923309326172,
|
|
"KL/std": 9.147069931030273,
|
|
"epoch": 0.2328042328042328,
|
|
"fcm_dpo/beta": 0.0912463515996933,
|
|
"fcm_dpo/delta": -0.031556978821754456,
|
|
"fcm_dpo/margin": 4.721765518188477,
|
|
"fcm_dpo/q_t": 0.40602871775627136,
|
|
"grad_norm": 27.32335090637207,
|
|
"learning_rate": 4.745824613468292e-07,
|
|
"logits/chosen": 0.31948935985565186,
|
|
"logits/rejected": 0.3167087435722351,
|
|
"logps/chosen": -72.0916976928711,
|
|
"logps/ref_chosen": -59.222537994384766,
|
|
"logps/ref_rejected": -64.19131469726562,
|
|
"logps/rejected": -81.78224182128906,
|
|
"loss": 1.1706,
|
|
"margin_dpo/margin_mean": 4.721765518188477,
|
|
"margin_dpo/margin_std": 9.456390380859375,
|
|
"step": 154
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.604690551757812,
|
|
"KL/mean": -16.90361976623535,
|
|
"KL/rejected_KL_mean": -20.20254898071289,
|
|
"KL/std": 9.015103340148926,
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 0.08960270881652832,
|
|
"fcm_dpo/delta": -0.19806252419948578,
|
|
"fcm_dpo/margin": 6.5978569984436035,
|
|
"fcm_dpo/q_t": 0.3803700804710388,
|
|
"grad_norm": 27.178985595703125,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 0.31627488136291504,
|
|
"logits/rejected": 0.29584386944770813,
|
|
"logps/chosen": -82.05938720703125,
|
|
"logps/ref_chosen": -68.45469665527344,
|
|
"logps/ref_rejected": -77.91763305664062,
|
|
"logps/rejected": -98.12018585205078,
|
|
"loss": 1.1063,
|
|
"margin_dpo/margin_mean": 6.597856521606445,
|
|
"margin_dpo/margin_std": 11.45124626159668,
|
|
"step": 155
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.478513717651367,
|
|
"KL/mean": -17.361495971679688,
|
|
"KL/rejected_KL_mean": -21.244476318359375,
|
|
"KL/std": 10.097107887268066,
|
|
"epoch": 0.23582766439909297,
|
|
"fcm_dpo/beta": 0.08758161216974258,
|
|
"fcm_dpo/delta": -0.2898622155189514,
|
|
"fcm_dpo/margin": 7.765964984893799,
|
|
"fcm_dpo/q_t": 0.35733336210250854,
|
|
"grad_norm": 25.112524032592773,
|
|
"learning_rate": 4.7340816008085305e-07,
|
|
"logits/chosen": 0.2658316195011139,
|
|
"logits/rejected": 0.2201889455318451,
|
|
"logps/chosen": -80.74810791015625,
|
|
"logps/ref_chosen": -67.26959991455078,
|
|
"logps/ref_rejected": -86.95914459228516,
|
|
"logps/rejected": -108.20362091064453,
|
|
"loss": 0.9651,
|
|
"margin_dpo/margin_mean": 7.765966415405273,
|
|
"margin_dpo/margin_std": 9.467267990112305,
|
|
"step": 156
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.03335189819336,
|
|
"KL/mean": -15.147748947143555,
|
|
"KL/rejected_KL_mean": -18.26213836669922,
|
|
"KL/std": 9.688674926757812,
|
|
"epoch": 0.23733938019652306,
|
|
"fcm_dpo/beta": 0.08544515073299408,
|
|
"fcm_dpo/delta": -0.13836640119552612,
|
|
"fcm_dpo/margin": 6.228787422180176,
|
|
"fcm_dpo/q_t": 0.3858155310153961,
|
|
"grad_norm": 25.20146369934082,
|
|
"learning_rate": 4.728116273823847e-07,
|
|
"logits/chosen": 0.286736398935318,
|
|
"logits/rejected": 0.26793140172958374,
|
|
"logps/chosen": -66.80622863769531,
|
|
"logps/ref_chosen": -54.77287292480469,
|
|
"logps/ref_rejected": -63.87866973876953,
|
|
"logps/rejected": -82.14080810546875,
|
|
"loss": 1.0821,
|
|
"margin_dpo/margin_mean": 6.228787422180176,
|
|
"margin_dpo/margin_std": 9.449630737304688,
|
|
"step": 157
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.391231536865234,
|
|
"KL/mean": -16.675254821777344,
|
|
"KL/rejected_KL_mean": -19.959278106689453,
|
|
"KL/std": 9.875959396362305,
|
|
"epoch": 0.23885109599395313,
|
|
"fcm_dpo/beta": 0.08471238613128662,
|
|
"fcm_dpo/delta": -0.1607985496520996,
|
|
"fcm_dpo/margin": 6.568045616149902,
|
|
"fcm_dpo/q_t": 0.3842330574989319,
|
|
"grad_norm": 25.875486373901367,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 0.30243319272994995,
|
|
"logits/rejected": 0.26424646377563477,
|
|
"logps/chosen": -78.31394958496094,
|
|
"logps/ref_chosen": -64.92271423339844,
|
|
"logps/ref_rejected": -82.23789978027344,
|
|
"logps/rejected": -102.19717407226562,
|
|
"loss": 1.0783,
|
|
"margin_dpo/margin_mean": 6.568045139312744,
|
|
"margin_dpo/margin_std": 10.496780395507812,
|
|
"step": 158
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.602977752685547,
|
|
"KL/mean": -17.37712860107422,
|
|
"KL/rejected_KL_mean": -21.151283264160156,
|
|
"KL/std": 10.568428993225098,
|
|
"epoch": 0.24036281179138322,
|
|
"fcm_dpo/beta": 0.0831126868724823,
|
|
"fcm_dpo/delta": -0.2343847155570984,
|
|
"fcm_dpo/margin": 7.548300743103027,
|
|
"fcm_dpo/q_t": 0.3679664134979248,
|
|
"grad_norm": 27.259275436401367,
|
|
"learning_rate": 4.715998812855304e-07,
|
|
"logits/chosen": 0.31240391731262207,
|
|
"logits/rejected": 0.27262261509895325,
|
|
"logps/chosen": -70.64997100830078,
|
|
"logps/ref_chosen": -57.046993255615234,
|
|
"logps/ref_rejected": -73.32441711425781,
|
|
"logps/rejected": -94.47570037841797,
|
|
"loss": 1.0399,
|
|
"margin_dpo/margin_mean": 7.548300743103027,
|
|
"margin_dpo/margin_std": 11.128408432006836,
|
|
"step": 159
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.248756408691406,
|
|
"KL/mean": -18.459033966064453,
|
|
"KL/rejected_KL_mean": -21.669315338134766,
|
|
"KL/std": 9.847359657287598,
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 0.08162681758403778,
|
|
"fcm_dpo/delta": -0.12738092243671417,
|
|
"fcm_dpo/margin": 6.420557498931885,
|
|
"fcm_dpo/q_t": 0.3946857750415802,
|
|
"grad_norm": 24.286088943481445,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": 0.19038847088813782,
|
|
"logits/rejected": 0.1437274068593979,
|
|
"logps/chosen": -65.05567169189453,
|
|
"logps/ref_chosen": -49.806915283203125,
|
|
"logps/ref_rejected": -68.3370132446289,
|
|
"logps/rejected": -90.00633239746094,
|
|
"loss": 1.1159,
|
|
"margin_dpo/margin_mean": 6.420557498931885,
|
|
"margin_dpo/margin_std": 11.484304428100586,
|
|
"step": 160
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -14.994487762451172,
|
|
"KL/mean": -18.58053207397461,
|
|
"KL/rejected_KL_mean": -22.166580200195312,
|
|
"KL/std": 9.70354175567627,
|
|
"epoch": 0.24338624338624337,
|
|
"fcm_dpo/beta": 0.08026713132858276,
|
|
"fcm_dpo/delta": -0.1808307021856308,
|
|
"fcm_dpo/margin": 7.172094345092773,
|
|
"fcm_dpo/q_t": 0.3774510622024536,
|
|
"grad_norm": 22.900474548339844,
|
|
"learning_rate": 4.703633408618955e-07,
|
|
"logits/chosen": 0.33459678292274475,
|
|
"logits/rejected": 0.2944002151489258,
|
|
"logps/chosen": -67.49497985839844,
|
|
"logps/ref_chosen": -52.50048828125,
|
|
"logps/ref_rejected": -66.04540252685547,
|
|
"logps/rejected": -88.21198272705078,
|
|
"loss": 1.0611,
|
|
"margin_dpo/margin_mean": 7.172094345092773,
|
|
"margin_dpo/margin_std": 10.89350700378418,
|
|
"step": 161
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.946052551269531,
|
|
"KL/mean": -20.630077362060547,
|
|
"KL/rejected_KL_mean": -25.314098358154297,
|
|
"KL/std": 10.967508316040039,
|
|
"epoch": 0.24489795918367346,
|
|
"fcm_dpo/beta": 0.0778420940041542,
|
|
"fcm_dpo/delta": -0.34178629517555237,
|
|
"fcm_dpo/margin": 9.368051528930664,
|
|
"fcm_dpo/q_t": 0.3451705574989319,
|
|
"grad_norm": 24.434083938598633,
|
|
"learning_rate": 4.697358159051549e-07,
|
|
"logits/chosen": 0.3445582687854767,
|
|
"logits/rejected": 0.29412025213241577,
|
|
"logps/chosen": -85.41524505615234,
|
|
"logps/ref_chosen": -69.46919250488281,
|
|
"logps/ref_rejected": -92.00952911376953,
|
|
"logps/rejected": -117.32362365722656,
|
|
"loss": 0.9482,
|
|
"margin_dpo/margin_mean": 9.368051528930664,
|
|
"margin_dpo/margin_std": 11.26630687713623,
|
|
"step": 162
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.82813835144043,
|
|
"KL/mean": -18.537067413330078,
|
|
"KL/rejected_KL_mean": -23.245990753173828,
|
|
"KL/std": 10.732690811157227,
|
|
"epoch": 0.24640967498110355,
|
|
"fcm_dpo/beta": 0.07583911716938019,
|
|
"fcm_dpo/delta": -0.3254932761192322,
|
|
"fcm_dpo/margin": 9.417854309082031,
|
|
"fcm_dpo/q_t": 0.35043394565582275,
|
|
"grad_norm": 22.02515411376953,
|
|
"learning_rate": 4.691021444652876e-07,
|
|
"logits/chosen": 0.28051143884658813,
|
|
"logits/rejected": 0.23206675052642822,
|
|
"logps/chosen": -64.44197082519531,
|
|
"logps/ref_chosen": -50.613834381103516,
|
|
"logps/ref_rejected": -74.62033081054688,
|
|
"logps/rejected": -97.86631774902344,
|
|
"loss": 0.9704,
|
|
"margin_dpo/margin_mean": 9.417855262756348,
|
|
"margin_dpo/margin_std": 11.235950469970703,
|
|
"step": 163
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -14.978483200073242,
|
|
"KL/mean": -19.53133773803711,
|
|
"KL/rejected_KL_mean": -24.084190368652344,
|
|
"KL/std": 11.330362319946289,
|
|
"epoch": 0.24792139077853365,
|
|
"fcm_dpo/beta": 0.07343673706054688,
|
|
"fcm_dpo/delta": -0.27751466631889343,
|
|
"fcm_dpo/margin": 9.10571002960205,
|
|
"fcm_dpo/q_t": 0.36070793867111206,
|
|
"grad_norm": 20.95972442626953,
|
|
"learning_rate": 4.6846234426744624e-07,
|
|
"logits/chosen": 0.2815134525299072,
|
|
"logits/rejected": 0.2163783758878708,
|
|
"logps/chosen": -69.82659912109375,
|
|
"logps/ref_chosen": -54.848114013671875,
|
|
"logps/ref_rejected": -79.0630111694336,
|
|
"logps/rejected": -103.14720153808594,
|
|
"loss": 1.0092,
|
|
"margin_dpo/margin_mean": 9.105710983276367,
|
|
"margin_dpo/margin_std": 12.207426071166992,
|
|
"step": 164
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -16.717864990234375,
|
|
"KL/mean": -20.885393142700195,
|
|
"KL/rejected_KL_mean": -25.052921295166016,
|
|
"KL/std": 11.485513687133789,
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 0.0718020647764206,
|
|
"fcm_dpo/delta": -0.20435869693756104,
|
|
"fcm_dpo/margin": 8.335058212280273,
|
|
"fcm_dpo/q_t": 0.3723691999912262,
|
|
"grad_norm": 22.1568660736084,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 0.34408366680145264,
|
|
"logits/rejected": 0.28766632080078125,
|
|
"logps/chosen": -67.80707550048828,
|
|
"logps/ref_chosen": -51.089210510253906,
|
|
"logps/ref_rejected": -71.23370361328125,
|
|
"logps/rejected": -96.28662109375,
|
|
"loss": 1.0599,
|
|
"margin_dpo/margin_mean": 8.335057258605957,
|
|
"margin_dpo/margin_std": 12.37674617767334,
|
|
"step": 165
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.999536514282227,
|
|
"KL/mean": -19.23455047607422,
|
|
"KL/rejected_KL_mean": -22.469566345214844,
|
|
"KL/std": 11.364204406738281,
|
|
"epoch": 0.2509448223733938,
|
|
"fcm_dpo/beta": 0.07079511880874634,
|
|
"fcm_dpo/delta": -0.05954471975564957,
|
|
"fcm_dpo/margin": 6.470028877258301,
|
|
"fcm_dpo/q_t": 0.4028851389884949,
|
|
"grad_norm": 25.06746482849121,
|
|
"learning_rate": 4.6716442935512214e-07,
|
|
"logits/chosen": 0.29567545652389526,
|
|
"logits/rejected": 0.20208358764648438,
|
|
"logps/chosen": -79.19034576416016,
|
|
"logps/ref_chosen": -63.19081115722656,
|
|
"logps/ref_rejected": -93.8402099609375,
|
|
"logps/rejected": -116.30977630615234,
|
|
"loss": 1.1123,
|
|
"margin_dpo/margin_mean": 6.470028877258301,
|
|
"margin_dpo/margin_std": 10.8497314453125,
|
|
"step": 166
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.521272659301758,
|
|
"KL/mean": -18.514968872070312,
|
|
"KL/rejected_KL_mean": -23.5086669921875,
|
|
"KL/std": 11.532522201538086,
|
|
"epoch": 0.25245653817082386,
|
|
"fcm_dpo/beta": 0.06897695362567902,
|
|
"fcm_dpo/delta": -0.30067089200019836,
|
|
"fcm_dpo/margin": 9.987388610839844,
|
|
"fcm_dpo/q_t": 0.3529023826122284,
|
|
"grad_norm": 20.71422004699707,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": 0.2599755823612213,
|
|
"logits/rejected": 0.22453096508979797,
|
|
"logps/chosen": -72.44554138183594,
|
|
"logps/ref_chosen": -58.92427062988281,
|
|
"logps/ref_rejected": -72.97377014160156,
|
|
"logps/rejected": -96.48243713378906,
|
|
"loss": 0.9425,
|
|
"margin_dpo/margin_mean": 9.98738956451416,
|
|
"margin_dpo/margin_std": 11.209365844726562,
|
|
"step": 167
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.894006729125977,
|
|
"KL/mean": -19.80014991760254,
|
|
"KL/rejected_KL_mean": -23.70629119873047,
|
|
"KL/std": 12.325262069702148,
|
|
"epoch": 0.25396825396825395,
|
|
"fcm_dpo/beta": 0.06793458014726639,
|
|
"fcm_dpo/delta": -0.13443899154663086,
|
|
"fcm_dpo/margin": 7.812285423278809,
|
|
"fcm_dpo/q_t": 0.3883206844329834,
|
|
"grad_norm": 22.438262939453125,
|
|
"learning_rate": 4.6584221638904767e-07,
|
|
"logits/chosen": 0.23517316579818726,
|
|
"logits/rejected": 0.19996830821037292,
|
|
"logps/chosen": -81.5453872680664,
|
|
"logps/ref_chosen": -65.65138244628906,
|
|
"logps/ref_rejected": -79.71418762207031,
|
|
"logps/rejected": -103.42047882080078,
|
|
"loss": 1.0602,
|
|
"margin_dpo/margin_mean": 7.81228494644165,
|
|
"margin_dpo/margin_std": 11.497498512268066,
|
|
"step": 168
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.940778732299805,
|
|
"KL/mean": -17.389175415039062,
|
|
"KL/rejected_KL_mean": -21.837570190429688,
|
|
"KL/std": 12.186710357666016,
|
|
"epoch": 0.25547996976568405,
|
|
"fcm_dpo/beta": 0.06676998734474182,
|
|
"fcm_dpo/delta": -0.199951633810997,
|
|
"fcm_dpo/margin": 8.896793365478516,
|
|
"fcm_dpo/q_t": 0.37868428230285645,
|
|
"grad_norm": 24.159345626831055,
|
|
"learning_rate": 4.651720442612075e-07,
|
|
"logits/chosen": 0.3393987715244293,
|
|
"logits/rejected": 0.30449235439300537,
|
|
"logps/chosen": -74.36664581298828,
|
|
"logps/ref_chosen": -61.425865173339844,
|
|
"logps/ref_rejected": -76.09590148925781,
|
|
"logps/rejected": -97.9334716796875,
|
|
"loss": 1.0728,
|
|
"margin_dpo/margin_mean": 8.896793365478516,
|
|
"margin_dpo/margin_std": 14.509865760803223,
|
|
"step": 169
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.241409301757812,
|
|
"KL/mean": -16.749591827392578,
|
|
"KL/rejected_KL_mean": -21.25777244567871,
|
|
"KL/std": 11.816784858703613,
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 0.06554418802261353,
|
|
"fcm_dpo/delta": -0.19659365713596344,
|
|
"fcm_dpo/margin": 9.016363143920898,
|
|
"fcm_dpo/q_t": 0.37576770782470703,
|
|
"grad_norm": 19.540283203125,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 0.28061461448669434,
|
|
"logits/rejected": 0.2783413231372833,
|
|
"logps/chosen": -68.89459991455078,
|
|
"logps/ref_chosen": -56.65319061279297,
|
|
"logps/ref_rejected": -63.45965576171875,
|
|
"logps/rejected": -84.71742248535156,
|
|
"loss": 1.0852,
|
|
"margin_dpo/margin_mean": 9.016363143920898,
|
|
"margin_dpo/margin_std": 14.522989273071289,
|
|
"step": 170
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.957141876220703,
|
|
"KL/mean": -18.882259368896484,
|
|
"KL/rejected_KL_mean": -23.80738067626953,
|
|
"KL/std": 13.55274772644043,
|
|
"epoch": 0.2585034013605442,
|
|
"fcm_dpo/beta": 0.06364390254020691,
|
|
"fcm_dpo/delta": -0.23671725392341614,
|
|
"fcm_dpo/margin": 9.850237846374512,
|
|
"fcm_dpo/q_t": 0.3690917491912842,
|
|
"grad_norm": 20.97186279296875,
|
|
"learning_rate": 4.6381366244617224e-07,
|
|
"logits/chosen": 0.3738405704498291,
|
|
"logits/rejected": 0.32105594873428345,
|
|
"logps/chosen": -77.69190216064453,
|
|
"logps/ref_chosen": -63.73476028442383,
|
|
"logps/ref_rejected": -78.50328063964844,
|
|
"logps/rejected": -102.31065368652344,
|
|
"loss": 1.068,
|
|
"margin_dpo/margin_mean": 9.850237846374512,
|
|
"margin_dpo/margin_std": 15.312220573425293,
|
|
"step": 171
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.449905395507812,
|
|
"KL/mean": -20.06391143798828,
|
|
"KL/rejected_KL_mean": -24.67791748046875,
|
|
"KL/std": 13.651723861694336,
|
|
"epoch": 0.2600151171579743,
|
|
"fcm_dpo/beta": 0.06302116811275482,
|
|
"fcm_dpo/delta": -0.18701273202896118,
|
|
"fcm_dpo/margin": 9.228013038635254,
|
|
"fcm_dpo/q_t": 0.37867966294288635,
|
|
"grad_norm": 21.416004180908203,
|
|
"learning_rate": 4.631254907558365e-07,
|
|
"logits/chosen": 0.3488103747367859,
|
|
"logits/rejected": 0.28891897201538086,
|
|
"logps/chosen": -67.65166473388672,
|
|
"logps/ref_chosen": -52.201759338378906,
|
|
"logps/ref_rejected": -82.85285949707031,
|
|
"logps/rejected": -107.53077697753906,
|
|
"loss": 1.0621,
|
|
"margin_dpo/margin_mean": 9.228013038635254,
|
|
"margin_dpo/margin_std": 13.995540618896484,
|
|
"step": 172
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -14.324943542480469,
|
|
"KL/mean": -19.28638458251953,
|
|
"KL/rejected_KL_mean": -24.247833251953125,
|
|
"KL/std": 13.903328895568848,
|
|
"epoch": 0.2615268329554044,
|
|
"fcm_dpo/beta": 0.06105411797761917,
|
|
"fcm_dpo/delta": -0.21514512598514557,
|
|
"fcm_dpo/margin": 9.922887802124023,
|
|
"fcm_dpo/q_t": 0.38025251030921936,
|
|
"grad_norm": 21.202089309692383,
|
|
"learning_rate": 4.624313574873786e-07,
|
|
"logits/chosen": 0.34909164905548096,
|
|
"logits/rejected": 0.2589060068130493,
|
|
"logps/chosen": -69.7596664428711,
|
|
"logps/ref_chosen": -55.434722900390625,
|
|
"logps/ref_rejected": -77.81967163085938,
|
|
"logps/rejected": -102.0675048828125,
|
|
"loss": 1.1187,
|
|
"margin_dpo/margin_mean": 9.92288875579834,
|
|
"margin_dpo/margin_std": 17.306182861328125,
|
|
"step": 173
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.99144172668457,
|
|
"KL/mean": -21.33533477783203,
|
|
"KL/rejected_KL_mean": -26.679229736328125,
|
|
"KL/std": 13.950437545776367,
|
|
"epoch": 0.26303854875283444,
|
|
"fcm_dpo/beta": 0.05990336090326309,
|
|
"fcm_dpo/delta": -0.2485434114933014,
|
|
"fcm_dpo/margin": 10.687789916992188,
|
|
"fcm_dpo/q_t": 0.3658965826034546,
|
|
"grad_norm": 21.50412368774414,
|
|
"learning_rate": 4.61731282057198e-07,
|
|
"logits/chosen": 0.3327246308326721,
|
|
"logits/rejected": 0.2640421688556671,
|
|
"logps/chosen": -73.16339111328125,
|
|
"logps/ref_chosen": -57.17195129394531,
|
|
"logps/ref_rejected": -85.47578430175781,
|
|
"logps/rejected": -112.15501403808594,
|
|
"loss": 1.0224,
|
|
"margin_dpo/margin_mean": 10.687789916992188,
|
|
"margin_dpo/margin_std": 15.164789199829102,
|
|
"step": 174
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -14.038763046264648,
|
|
"KL/mean": -20.00885772705078,
|
|
"KL/rejected_KL_mean": -25.978954315185547,
|
|
"KL/std": 14.190459251403809,
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 0.05832141265273094,
|
|
"fcm_dpo/delta": -0.3067561388015747,
|
|
"fcm_dpo/margin": 11.940191268920898,
|
|
"fcm_dpo/q_t": 0.3609797954559326,
|
|
"grad_norm": 19.909940719604492,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 0.3773840069770813,
|
|
"logits/rejected": 0.34598737955093384,
|
|
"logps/chosen": -81.70439147949219,
|
|
"logps/ref_chosen": -67.6656265258789,
|
|
"logps/ref_rejected": -84.36766815185547,
|
|
"logps/rejected": -110.34661865234375,
|
|
"loss": 1.0039,
|
|
"margin_dpo/margin_mean": 11.940191268920898,
|
|
"margin_dpo/margin_std": 16.549762725830078,
|
|
"step": 175
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -17.38861083984375,
|
|
"KL/mean": -21.252628326416016,
|
|
"KL/rejected_KL_mean": -25.116657257080078,
|
|
"KL/std": 15.09546184539795,
|
|
"epoch": 0.2660619803476946,
|
|
"fcm_dpo/beta": 0.05742862448096275,
|
|
"fcm_dpo/delta": -0.04531227424740791,
|
|
"fcm_dpo/margin": 7.728045463562012,
|
|
"fcm_dpo/q_t": 0.4092855155467987,
|
|
"grad_norm": 25.49968147277832,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 0.2886342704296112,
|
|
"logits/rejected": 0.2623506188392639,
|
|
"logps/chosen": -95.24737548828125,
|
|
"logps/ref_chosen": -77.8587646484375,
|
|
"logps/ref_rejected": -81.08732604980469,
|
|
"logps/rejected": -106.2039794921875,
|
|
"loss": 1.199,
|
|
"margin_dpo/margin_mean": 7.7280449867248535,
|
|
"margin_dpo/margin_std": 16.84091567993164,
|
|
"step": 176
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.261825561523438,
|
|
"KL/mean": -20.738998413085938,
|
|
"KL/rejected_KL_mean": -28.216175079345703,
|
|
"KL/std": 15.247730255126953,
|
|
"epoch": 0.2675736961451247,
|
|
"fcm_dpo/beta": 0.05552824214100838,
|
|
"fcm_dpo/delta": -0.45012667775154114,
|
|
"fcm_dpo/margin": 14.954345703125,
|
|
"fcm_dpo/q_t": 0.3269483745098114,
|
|
"grad_norm": 23.578044891357422,
|
|
"learning_rate": 4.5959559945025183e-07,
|
|
"logits/chosen": 0.4196733236312866,
|
|
"logits/rejected": 0.3236994743347168,
|
|
"logps/chosen": -68.48222351074219,
|
|
"logps/ref_chosen": -55.22039794921875,
|
|
"logps/ref_rejected": -92.54973602294922,
|
|
"logps/rejected": -120.76591491699219,
|
|
"loss": 0.8742,
|
|
"margin_dpo/margin_mean": 14.954344749450684,
|
|
"margin_dpo/margin_std": 15.604692459106445,
|
|
"step": 177
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.70490837097168,
|
|
"KL/mean": -20.143939971923828,
|
|
"KL/rejected_KL_mean": -24.582965850830078,
|
|
"KL/std": 14.290495872497559,
|
|
"epoch": 0.2690854119425548,
|
|
"fcm_dpo/beta": 0.05439098924398422,
|
|
"fcm_dpo/delta": -0.08626912534236908,
|
|
"fcm_dpo/margin": 8.878059387207031,
|
|
"fcm_dpo/q_t": 0.39554363489151,
|
|
"grad_norm": 22.767946243286133,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"logits/chosen": 0.33091747760772705,
|
|
"logits/rejected": 0.27923867106437683,
|
|
"logps/chosen": -76.51539611816406,
|
|
"logps/ref_chosen": -60.81049346923828,
|
|
"logps/ref_rejected": -81.12973022460938,
|
|
"logps/rejected": -105.71269226074219,
|
|
"loss": 1.1037,
|
|
"margin_dpo/margin_mean": 8.878059387207031,
|
|
"margin_dpo/margin_std": 14.287181854248047,
|
|
"step": 178
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -16.183090209960938,
|
|
"KL/mean": -21.770275115966797,
|
|
"KL/rejected_KL_mean": -27.357460021972656,
|
|
"KL/std": 15.00064468383789,
|
|
"epoch": 0.2705971277399849,
|
|
"fcm_dpo/beta": 0.05368083715438843,
|
|
"fcm_dpo/delta": -0.20610719919204712,
|
|
"fcm_dpo/margin": 11.174365997314453,
|
|
"fcm_dpo/q_t": 0.3749498724937439,
|
|
"grad_norm": 21.92656135559082,
|
|
"learning_rate": 4.581424636586928e-07,
|
|
"logits/chosen": 0.35283780097961426,
|
|
"logits/rejected": 0.33666449785232544,
|
|
"logps/chosen": -81.85480499267578,
|
|
"logps/ref_chosen": -65.67171478271484,
|
|
"logps/ref_rejected": -75.32586669921875,
|
|
"logps/rejected": -102.6833267211914,
|
|
"loss": 1.0661,
|
|
"margin_dpo/margin_mean": 11.174365997314453,
|
|
"margin_dpo/margin_std": 17.65848159790039,
|
|
"step": 179
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.384201049804688,
|
|
"KL/mean": -16.967748641967773,
|
|
"KL/rejected_KL_mean": -21.551294326782227,
|
|
"KL/std": 14.509037017822266,
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 0.05312761291861534,
|
|
"fcm_dpo/delta": -0.08914913237094879,
|
|
"fcm_dpo/margin": 9.167095184326172,
|
|
"fcm_dpo/q_t": 0.3985925614833832,
|
|
"grad_norm": 18.639013290405273,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 0.19387683272361755,
|
|
"logits/rejected": 0.17462685704231262,
|
|
"logps/chosen": -69.06700897216797,
|
|
"logps/ref_chosen": -56.68280792236328,
|
|
"logps/ref_rejected": -64.94414520263672,
|
|
"logps/rejected": -86.49543762207031,
|
|
"loss": 1.1328,
|
|
"margin_dpo/margin_mean": 9.167095184326172,
|
|
"margin_dpo/margin_std": 16.858341217041016,
|
|
"step": 180
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -10.723928451538086,
|
|
"KL/mean": -17.369586944580078,
|
|
"KL/rejected_KL_mean": -24.015239715576172,
|
|
"KL/std": 15.4943265914917,
|
|
"epoch": 0.273620559334845,
|
|
"fcm_dpo/beta": 0.05172853171825409,
|
|
"fcm_dpo/delta": -0.29812926054000854,
|
|
"fcm_dpo/margin": 13.29130744934082,
|
|
"fcm_dpo/q_t": 0.35378360748291016,
|
|
"grad_norm": 19.595779418945312,
|
|
"learning_rate": 4.566660392614228e-07,
|
|
"logits/chosen": 0.35855722427368164,
|
|
"logits/rejected": 0.3173817992210388,
|
|
"logps/chosen": -71.4999771118164,
|
|
"logps/ref_chosen": -60.77604675292969,
|
|
"logps/ref_rejected": -83.98361206054688,
|
|
"logps/rejected": -107.99885559082031,
|
|
"loss": 0.9392,
|
|
"margin_dpo/margin_mean": 13.29130744934082,
|
|
"margin_dpo/margin_std": 14.981464385986328,
|
|
"step": 181
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.057918548583984,
|
|
"KL/mean": -21.97003746032715,
|
|
"KL/rejected_KL_mean": -28.882156372070312,
|
|
"KL/std": 16.515424728393555,
|
|
"epoch": 0.2751322751322751,
|
|
"fcm_dpo/beta": 0.05023058131337166,
|
|
"fcm_dpo/delta": -0.30502113699913025,
|
|
"fcm_dpo/margin": 13.824239730834961,
|
|
"fcm_dpo/q_t": 0.3568967580795288,
|
|
"grad_norm": 20.854793548583984,
|
|
"learning_rate": 4.5591914535745817e-07,
|
|
"logits/chosen": 0.3515995144844055,
|
|
"logits/rejected": 0.26597678661346436,
|
|
"logps/chosen": -75.31170654296875,
|
|
"logps/ref_chosen": -60.2537841796875,
|
|
"logps/ref_rejected": -89.7706298828125,
|
|
"logps/rejected": -118.65278625488281,
|
|
"loss": 1.0006,
|
|
"margin_dpo/margin_mean": 13.824240684509277,
|
|
"margin_dpo/margin_std": 18.800689697265625,
|
|
"step": 182
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -18.68174171447754,
|
|
"KL/mean": -21.533245086669922,
|
|
"KL/rejected_KL_mean": -24.384742736816406,
|
|
"KL/std": 15.86686897277832,
|
|
"epoch": 0.2766439909297052,
|
|
"fcm_dpo/beta": 0.05020998418331146,
|
|
"fcm_dpo/delta": 0.11538577824831009,
|
|
"fcm_dpo/margin": 5.702999114990234,
|
|
"fcm_dpo/q_t": 0.43668049573898315,
|
|
"grad_norm": 20.45594024658203,
|
|
"learning_rate": 4.551664914523433e-07,
|
|
"logits/chosen": 0.34212377667427063,
|
|
"logits/rejected": 0.3193387985229492,
|
|
"logps/chosen": -80.44316101074219,
|
|
"logps/ref_chosen": -61.76142120361328,
|
|
"logps/ref_rejected": -72.54627990722656,
|
|
"logps/rejected": -96.9310302734375,
|
|
"loss": 1.269,
|
|
"margin_dpo/margin_mean": 5.702999114990234,
|
|
"margin_dpo/margin_std": 16.033796310424805,
|
|
"step": 183
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.346687316894531,
|
|
"KL/mean": -16.45113182067871,
|
|
"KL/rejected_KL_mean": -21.555572509765625,
|
|
"KL/std": 13.646240234375,
|
|
"epoch": 0.2781557067271353,
|
|
"fcm_dpo/beta": 0.04984622076153755,
|
|
"fcm_dpo/delta": -0.11246835440397263,
|
|
"fcm_dpo/margin": 10.208887100219727,
|
|
"fcm_dpo/q_t": 0.3908190131187439,
|
|
"grad_norm": 18.455541610717773,
|
|
"learning_rate": 4.544080985994258e-07,
|
|
"logits/chosen": 0.45561403036117554,
|
|
"logits/rejected": 0.38757556676864624,
|
|
"logps/chosen": -58.187408447265625,
|
|
"logps/ref_chosen": -46.840721130371094,
|
|
"logps/ref_rejected": -69.3609390258789,
|
|
"logps/rejected": -90.91651153564453,
|
|
"loss": 1.0558,
|
|
"margin_dpo/margin_mean": 10.208887100219727,
|
|
"margin_dpo/margin_std": 14.173319816589355,
|
|
"step": 184
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -14.60038948059082,
|
|
"KL/mean": -20.471843719482422,
|
|
"KL/rejected_KL_mean": -26.34329605102539,
|
|
"KL/std": 16.403898239135742,
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 0.048992227762937546,
|
|
"fcm_dpo/delta": -0.18187808990478516,
|
|
"fcm_dpo/margin": 11.742910385131836,
|
|
"fcm_dpo/q_t": 0.3814876079559326,
|
|
"grad_norm": 17.726289749145508,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 0.3845561444759369,
|
|
"logits/rejected": 0.33581483364105225,
|
|
"logps/chosen": -66.9215316772461,
|
|
"logps/ref_chosen": -52.32114028930664,
|
|
"logps/ref_rejected": -68.3885726928711,
|
|
"logps/rejected": -94.73187255859375,
|
|
"loss": 1.1078,
|
|
"margin_dpo/margin_mean": 11.742910385131836,
|
|
"margin_dpo/margin_std": 20.043315887451172,
|
|
"step": 185
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -12.772802352905273,
|
|
"KL/mean": -19.23457908630371,
|
|
"KL/rejected_KL_mean": -25.69635772705078,
|
|
"KL/std": 16.36713981628418,
|
|
"epoch": 0.2811791383219955,
|
|
"fcm_dpo/beta": 0.048097483813762665,
|
|
"fcm_dpo/delta": -0.22889259457588196,
|
|
"fcm_dpo/margin": 12.923553466796875,
|
|
"fcm_dpo/q_t": 0.37144482135772705,
|
|
"grad_norm": 22.234642028808594,
|
|
"learning_rate": 4.5287418106563354e-07,
|
|
"logits/chosen": 0.32052579522132874,
|
|
"logits/rejected": 0.2757483124732971,
|
|
"logps/chosen": -80.19293212890625,
|
|
"logps/ref_chosen": -67.42012786865234,
|
|
"logps/ref_rejected": -82.50968933105469,
|
|
"logps/rejected": -108.20604705810547,
|
|
"loss": 1.0525,
|
|
"margin_dpo/margin_mean": 12.923553466796875,
|
|
"margin_dpo/margin_std": 19.863771438598633,
|
|
"step": 186
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -16.360855102539062,
|
|
"KL/mean": -22.07530403137207,
|
|
"KL/rejected_KL_mean": -27.789756774902344,
|
|
"KL/std": 17.251888275146484,
|
|
"epoch": 0.28269085411942557,
|
|
"fcm_dpo/beta": 0.04727368801832199,
|
|
"fcm_dpo/delta": -0.14446723461151123,
|
|
"fcm_dpo/margin": 11.4288969039917,
|
|
"fcm_dpo/q_t": 0.3902089297771454,
|
|
"grad_norm": 21.387920379638672,
|
|
"learning_rate": 4.520986992917297e-07,
|
|
"logits/chosen": 0.3397790789604187,
|
|
"logits/rejected": 0.28441479802131653,
|
|
"logps/chosen": -91.8863525390625,
|
|
"logps/ref_chosen": -75.52549743652344,
|
|
"logps/ref_rejected": -94.76289367675781,
|
|
"logps/rejected": -122.55265045166016,
|
|
"loss": 1.1136,
|
|
"margin_dpo/margin_mean": 11.428895950317383,
|
|
"margin_dpo/margin_std": 20.1195068359375,
|
|
"step": 187
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -14.269599914550781,
|
|
"KL/mean": -20.715669631958008,
|
|
"KL/rejected_KL_mean": -27.161739349365234,
|
|
"KL/std": 17.475027084350586,
|
|
"epoch": 0.2842025699168556,
|
|
"fcm_dpo/beta": 0.0464702770113945,
|
|
"fcm_dpo/delta": -0.20517385005950928,
|
|
"fcm_dpo/margin": 12.89213752746582,
|
|
"fcm_dpo/q_t": 0.37629660964012146,
|
|
"grad_norm": 18.12533950805664,
|
|
"learning_rate": 4.5131756438276466e-07,
|
|
"logits/chosen": 0.3968737721443176,
|
|
"logits/rejected": 0.35108232498168945,
|
|
"logps/chosen": -85.79293060302734,
|
|
"logps/ref_chosen": -71.52333068847656,
|
|
"logps/ref_rejected": -78.29949951171875,
|
|
"logps/rejected": -105.46124267578125,
|
|
"loss": 1.0386,
|
|
"margin_dpo/margin_mean": 12.89213752746582,
|
|
"margin_dpo/margin_std": 18.89546775817871,
|
|
"step": 188
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.979364395141602,
|
|
"KL/mean": -19.860326766967773,
|
|
"KL/rejected_KL_mean": -25.741283416748047,
|
|
"KL/std": 17.464611053466797,
|
|
"epoch": 0.2857142857142857,
|
|
"fcm_dpo/beta": 0.04549255967140198,
|
|
"fcm_dpo/delta": -0.1405901312828064,
|
|
"fcm_dpo/margin": 11.761919021606445,
|
|
"fcm_dpo/q_t": 0.3859013319015503,
|
|
"grad_norm": 20.145153045654297,
|
|
"learning_rate": 4.5053079818876096e-07,
|
|
"logits/chosen": 0.3909458816051483,
|
|
"logits/rejected": 0.40543025732040405,
|
|
"logps/chosen": -86.15563201904297,
|
|
"logps/ref_chosen": -72.17626953125,
|
|
"logps/ref_rejected": -75.26313781738281,
|
|
"logps/rejected": -101.0044174194336,
|
|
"loss": 1.0799,
|
|
"margin_dpo/margin_mean": 11.761919021606445,
|
|
"margin_dpo/margin_std": 18.16063690185547,
|
|
"step": 189
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.273983001708984,
|
|
"KL/mean": -21.27578353881836,
|
|
"KL/rejected_KL_mean": -29.277587890625,
|
|
"KL/std": 17.967754364013672,
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 0.0444646030664444,
|
|
"fcm_dpo/delta": -0.3232154846191406,
|
|
"fcm_dpo/margin": 16.003604888916016,
|
|
"fcm_dpo/q_t": 0.3522093892097473,
|
|
"grad_norm": 24.078123092651367,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 0.4594009816646576,
|
|
"logits/rejected": 0.3064201772212982,
|
|
"logps/chosen": -67.89825439453125,
|
|
"logps/ref_chosen": -54.624271392822266,
|
|
"logps/ref_rejected": -101.47068786621094,
|
|
"logps/rejected": -130.74827575683594,
|
|
"loss": 0.9506,
|
|
"margin_dpo/margin_mean": 16.003604888916016,
|
|
"margin_dpo/margin_std": 19.2811279296875,
|
|
"step": 190
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -19.140254974365234,
|
|
"KL/mean": -25.439712524414062,
|
|
"KL/rejected_KL_mean": -31.739166259765625,
|
|
"KL/std": 18.872146606445312,
|
|
"epoch": 0.2887377173091459,
|
|
"fcm_dpo/beta": 0.043599970638751984,
|
|
"fcm_dpo/delta": -0.1535775065422058,
|
|
"fcm_dpo/margin": 12.598907470703125,
|
|
"fcm_dpo/q_t": 0.38200899958610535,
|
|
"grad_norm": 21.121891021728516,
|
|
"learning_rate": 4.48940460132708e-07,
|
|
"logits/chosen": 0.4362499415874481,
|
|
"logits/rejected": 0.40572458505630493,
|
|
"logps/chosen": -92.07276916503906,
|
|
"logps/ref_chosen": -72.93251037597656,
|
|
"logps/ref_rejected": -89.95103454589844,
|
|
"logps/rejected": -121.69020080566406,
|
|
"loss": 1.0856,
|
|
"margin_dpo/margin_mean": 12.598907470703125,
|
|
"margin_dpo/margin_std": 20.377561569213867,
|
|
"step": 191
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -20.72743034362793,
|
|
"KL/mean": -24.720291137695312,
|
|
"KL/rejected_KL_mean": -28.713146209716797,
|
|
"KL/std": 19.529537200927734,
|
|
"epoch": 0.29024943310657597,
|
|
"fcm_dpo/beta": 0.04367196932435036,
|
|
"fcm_dpo/delta": 0.05201123654842377,
|
|
"fcm_dpo/margin": 7.985716819763184,
|
|
"fcm_dpo/q_t": 0.42336931824684143,
|
|
"grad_norm": 17.46987533569336,
|
|
"learning_rate": 4.481369327558329e-07,
|
|
"logits/chosen": 0.4228086471557617,
|
|
"logits/rejected": 0.3950343728065491,
|
|
"logps/chosen": -74.72854614257812,
|
|
"logps/ref_chosen": -54.001121520996094,
|
|
"logps/ref_rejected": -63.531551361083984,
|
|
"logps/rejected": -92.24469757080078,
|
|
"loss": 1.1928,
|
|
"margin_dpo/margin_mean": 7.985716819763184,
|
|
"margin_dpo/margin_std": 16.939682006835938,
|
|
"step": 192
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -16.589162826538086,
|
|
"KL/mean": -24.004480361938477,
|
|
"KL/rejected_KL_mean": -31.419795989990234,
|
|
"KL/std": 18.17107582092285,
|
|
"epoch": 0.29176114890400606,
|
|
"fcm_dpo/beta": 0.04276707395911217,
|
|
"fcm_dpo/delta": -0.24291932582855225,
|
|
"fcm_dpo/margin": 14.830631256103516,
|
|
"fcm_dpo/q_t": 0.3653998374938965,
|
|
"grad_norm": 16.093433380126953,
|
|
"learning_rate": 4.47327863063023e-07,
|
|
"logits/chosen": 0.3720834255218506,
|
|
"logits/rejected": 0.34900301694869995,
|
|
"logps/chosen": -73.33843994140625,
|
|
"logps/ref_chosen": -56.74927520751953,
|
|
"logps/ref_rejected": -58.80629348754883,
|
|
"logps/rejected": -90.22608947753906,
|
|
"loss": 0.9903,
|
|
"margin_dpo/margin_mean": 14.830631256103516,
|
|
"margin_dpo/margin_std": 18.81682586669922,
|
|
"step": 193
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -17.805133819580078,
|
|
"KL/mean": -23.88231658935547,
|
|
"KL/rejected_KL_mean": -29.959503173828125,
|
|
"KL/std": 18.674776077270508,
|
|
"epoch": 0.29327286470143615,
|
|
"fcm_dpo/beta": 0.042039357125759125,
|
|
"fcm_dpo/delta": -0.11552996933460236,
|
|
"fcm_dpo/margin": 12.154373168945312,
|
|
"fcm_dpo/q_t": 0.3923408091068268,
|
|
"grad_norm": 20.05245590209961,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 0.4321303963661194,
|
|
"logits/rejected": 0.4012456238269806,
|
|
"logps/chosen": -74.45457458496094,
|
|
"logps/ref_chosen": -56.64944076538086,
|
|
"logps/ref_rejected": -69.98954772949219,
|
|
"logps/rejected": -99.94905090332031,
|
|
"loss": 1.1381,
|
|
"margin_dpo/margin_mean": 12.154373168945312,
|
|
"margin_dpo/margin_std": 22.00497817993164,
|
|
"step": 194
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -21.650157928466797,
|
|
"KL/mean": -29.36663818359375,
|
|
"KL/rejected_KL_mean": -37.0831184387207,
|
|
"KL/std": 21.187183380126953,
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 0.041272297501564026,
|
|
"fcm_dpo/delta": -0.24553193151950836,
|
|
"fcm_dpo/margin": 15.432966232299805,
|
|
"fcm_dpo/q_t": 0.36648187041282654,
|
|
"grad_norm": 20.21274757385254,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 0.34546446800231934,
|
|
"logits/rejected": 0.3514538109302521,
|
|
"logps/chosen": -92.0599365234375,
|
|
"logps/ref_chosen": -70.40977478027344,
|
|
"logps/ref_rejected": -74.39448547363281,
|
|
"logps/rejected": -111.47760009765625,
|
|
"loss": 1.0178,
|
|
"margin_dpo/margin_mean": 15.432966232299805,
|
|
"margin_dpo/margin_std": 21.322336196899414,
|
|
"step": 195
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -20.97208595275879,
|
|
"KL/mean": -27.23445701599121,
|
|
"KL/rejected_KL_mean": -33.496829986572266,
|
|
"KL/std": 21.009262084960938,
|
|
"epoch": 0.2962962962962963,
|
|
"fcm_dpo/beta": 0.04086890444159508,
|
|
"fcm_dpo/delta": -0.11479339748620987,
|
|
"fcm_dpo/margin": 12.524740219116211,
|
|
"fcm_dpo/q_t": 0.38693487644195557,
|
|
"grad_norm": 19.50908851623535,
|
|
"learning_rate": 4.448676271745197e-07,
|
|
"logits/chosen": 0.42419660091400146,
|
|
"logits/rejected": 0.37993836402893066,
|
|
"logps/chosen": -80.19966125488281,
|
|
"logps/ref_chosen": -59.227577209472656,
|
|
"logps/ref_rejected": -83.54757690429688,
|
|
"logps/rejected": -117.04440307617188,
|
|
"loss": 1.0686,
|
|
"margin_dpo/margin_mean": 12.524740219116211,
|
|
"margin_dpo/margin_std": 18.34944725036621,
|
|
"step": 196
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -18.3264102935791,
|
|
"KL/mean": -25.974586486816406,
|
|
"KL/rejected_KL_mean": -33.62276077270508,
|
|
"KL/std": 20.36353874206543,
|
|
"epoch": 0.29780801209372637,
|
|
"fcm_dpo/beta": 0.0401434525847435,
|
|
"fcm_dpo/delta": -0.22060605883598328,
|
|
"fcm_dpo/margin": 15.296344757080078,
|
|
"fcm_dpo/q_t": 0.3728818893432617,
|
|
"grad_norm": 19.591636657714844,
|
|
"learning_rate": 4.440366160729392e-07,
|
|
"logits/chosen": 0.5295861959457397,
|
|
"logits/rejected": 0.47452324628829956,
|
|
"logps/chosen": -69.85554504394531,
|
|
"logps/ref_chosen": -51.52912902832031,
|
|
"logps/ref_rejected": -73.70631408691406,
|
|
"logps/rejected": -107.32907104492188,
|
|
"loss": 1.0938,
|
|
"margin_dpo/margin_mean": 15.296344757080078,
|
|
"margin_dpo/margin_std": 25.17190170288086,
|
|
"step": 197
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -20.066226959228516,
|
|
"KL/mean": -28.18023109436035,
|
|
"KL/rejected_KL_mean": -36.29423904418945,
|
|
"KL/std": 20.17511558532715,
|
|
"epoch": 0.29931972789115646,
|
|
"fcm_dpo/beta": 0.0391586534678936,
|
|
"fcm_dpo/delta": -0.24283115565776825,
|
|
"fcm_dpo/margin": 16.22800636291504,
|
|
"fcm_dpo/q_t": 0.36237627267837524,
|
|
"grad_norm": 18.336078643798828,
|
|
"learning_rate": 4.432001773500957e-07,
|
|
"logits/chosen": 0.4924355745315552,
|
|
"logits/rejected": 0.4487847089767456,
|
|
"logps/chosen": -79.84890747070312,
|
|
"logps/ref_chosen": -59.78268051147461,
|
|
"logps/ref_rejected": -72.24533081054688,
|
|
"logps/rejected": -108.53956604003906,
|
|
"loss": 0.9875,
|
|
"margin_dpo/margin_mean": 16.22800636291504,
|
|
"margin_dpo/margin_std": 20.010257720947266,
|
|
"step": 198
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -24.407445907592773,
|
|
"KL/mean": -30.827865600585938,
|
|
"KL/rejected_KL_mean": -37.24828338623047,
|
|
"KL/std": 20.915069580078125,
|
|
"epoch": 0.30083144368858655,
|
|
"fcm_dpo/beta": 0.03871579468250275,
|
|
"fcm_dpo/delta": -0.1002165675163269,
|
|
"fcm_dpo/margin": 12.840841293334961,
|
|
"fcm_dpo/q_t": 0.39358729124069214,
|
|
"grad_norm": 17.74066734313965,
|
|
"learning_rate": 4.4235833440297856e-07,
|
|
"logits/chosen": 0.454384982585907,
|
|
"logits/rejected": 0.3614826798439026,
|
|
"logps/chosen": -80.79421997070312,
|
|
"logps/ref_chosen": -56.38677215576172,
|
|
"logps/ref_rejected": -74.56779479980469,
|
|
"logps/rejected": -111.81608581542969,
|
|
"loss": 1.1464,
|
|
"margin_dpo/margin_mean": 12.840841293334961,
|
|
"margin_dpo/margin_std": 23.53649139404297,
|
|
"step": 199
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -19.27438735961914,
|
|
"KL/mean": -28.462909698486328,
|
|
"KL/rejected_KL_mean": -37.65142822265625,
|
|
"KL/std": 22.011890411376953,
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 0.03747054934501648,
|
|
"fcm_dpo/delta": -0.29989588260650635,
|
|
"fcm_dpo/margin": 18.377042770385742,
|
|
"fcm_dpo/q_t": 0.3584999442100525,
|
|
"grad_norm": 16.770082473754883,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 0.46499067544937134,
|
|
"logits/rejected": 0.3886083960533142,
|
|
"logps/chosen": -77.09870910644531,
|
|
"logps/ref_chosen": -57.82432556152344,
|
|
"logps/ref_rejected": -89.28246307373047,
|
|
"logps/rejected": -126.93389129638672,
|
|
"loss": 1.0135,
|
|
"margin_dpo/margin_mean": 18.377044677734375,
|
|
"margin_dpo/margin_std": 25.930667877197266,
|
|
"step": 200
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -27.206161499023438,
|
|
"KL/mean": -34.41475296020508,
|
|
"KL/rejected_KL_mean": -41.62334442138672,
|
|
"KL/std": 23.186702728271484,
|
|
"epoch": 0.30385487528344673,
|
|
"fcm_dpo/beta": 0.036784444004297256,
|
|
"fcm_dpo/delta": -0.13475748896598816,
|
|
"fcm_dpo/margin": 14.417179107666016,
|
|
"fcm_dpo/q_t": 0.38926324248313904,
|
|
"grad_norm": 19.149494171142578,
|
|
"learning_rate": 4.4065853017905953e-07,
|
|
"logits/chosen": 0.5019490122795105,
|
|
"logits/rejected": 0.45298606157302856,
|
|
"logps/chosen": -86.20591735839844,
|
|
"logps/ref_chosen": -58.999759674072266,
|
|
"logps/ref_rejected": -84.67575073242188,
|
|
"logps/rejected": -126.29908752441406,
|
|
"loss": 1.0988,
|
|
"margin_dpo/margin_mean": 14.417179107666016,
|
|
"margin_dpo/margin_std": 24.065019607543945,
|
|
"step": 201
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -22.737823486328125,
|
|
"KL/mean": -31.539012908935547,
|
|
"KL/rejected_KL_mean": -40.3401985168457,
|
|
"KL/std": 21.906368255615234,
|
|
"epoch": 0.30536659108087677,
|
|
"fcm_dpo/beta": 0.03618372231721878,
|
|
"fcm_dpo/delta": -0.244398295879364,
|
|
"fcm_dpo/margin": 17.602378845214844,
|
|
"fcm_dpo/q_t": 0.3618626594543457,
|
|
"grad_norm": 19.34604263305664,
|
|
"learning_rate": 4.3980061644943575e-07,
|
|
"logits/chosen": 0.4105333387851715,
|
|
"logits/rejected": 0.33329737186431885,
|
|
"logps/chosen": -70.39846801757812,
|
|
"logps/ref_chosen": -47.660648345947266,
|
|
"logps/ref_rejected": -73.63249969482422,
|
|
"logps/rejected": -113.97270202636719,
|
|
"loss": 1.0141,
|
|
"margin_dpo/margin_mean": 17.602378845214844,
|
|
"margin_dpo/margin_std": 23.802017211914062,
|
|
"step": 202
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -27.778629302978516,
|
|
"KL/mean": -35.67098617553711,
|
|
"KL/rejected_KL_mean": -43.56334686279297,
|
|
"KL/std": 22.51268196105957,
|
|
"epoch": 0.30687830687830686,
|
|
"fcm_dpo/beta": 0.0354929156601429,
|
|
"fcm_dpo/delta": -0.16473078727722168,
|
|
"fcm_dpo/margin": 15.784713745117188,
|
|
"fcm_dpo/q_t": 0.3824731707572937,
|
|
"grad_norm": 20.284854888916016,
|
|
"learning_rate": 4.3893739358856455e-07,
|
|
"logits/chosen": 0.4810147285461426,
|
|
"logits/rejected": 0.39881980419158936,
|
|
"logps/chosen": -90.10416412353516,
|
|
"logps/ref_chosen": -62.32553482055664,
|
|
"logps/ref_rejected": -99.37226104736328,
|
|
"logps/rejected": -142.93560791015625,
|
|
"loss": 1.0676,
|
|
"margin_dpo/margin_mean": 15.784713745117188,
|
|
"margin_dpo/margin_std": 24.52391815185547,
|
|
"step": 203
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -25.31327247619629,
|
|
"KL/mean": -33.930267333984375,
|
|
"KL/rejected_KL_mean": -42.5472526550293,
|
|
"KL/std": 23.459644317626953,
|
|
"epoch": 0.30839002267573695,
|
|
"fcm_dpo/beta": 0.03455708920955658,
|
|
"fcm_dpo/delta": -0.20436403155326843,
|
|
"fcm_dpo/margin": 17.23397445678711,
|
|
"fcm_dpo/q_t": 0.37407928705215454,
|
|
"grad_norm": 18.417537689208984,
|
|
"learning_rate": 4.380688857426449e-07,
|
|
"logits/chosen": 0.4438938498497009,
|
|
"logits/rejected": 0.3712119460105896,
|
|
"logps/chosen": -75.94258880615234,
|
|
"logps/ref_chosen": -50.62931823730469,
|
|
"logps/ref_rejected": -66.60475158691406,
|
|
"logps/rejected": -109.15200805664062,
|
|
"loss": 1.0322,
|
|
"margin_dpo/margin_mean": 17.23397445678711,
|
|
"margin_dpo/margin_std": 23.524925231933594,
|
|
"step": 204
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -29.739261627197266,
|
|
"KL/mean": -37.9832649230957,
|
|
"KL/rejected_KL_mean": -46.22726821899414,
|
|
"KL/std": 24.711761474609375,
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 0.03411586210131645,
|
|
"fcm_dpo/delta": -0.16750092804431915,
|
|
"fcm_dpo/margin": 16.488008499145508,
|
|
"fcm_dpo/q_t": 0.3833587169647217,
|
|
"grad_norm": 22.228839874267578,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 0.49487367272377014,
|
|
"logits/rejected": 0.4229486584663391,
|
|
"logps/chosen": -100.09544372558594,
|
|
"logps/ref_chosen": -70.3561782836914,
|
|
"logps/ref_rejected": -93.39848327636719,
|
|
"logps/rejected": -139.62574768066406,
|
|
"loss": 1.092,
|
|
"margin_dpo/margin_mean": 16.48801040649414,
|
|
"margin_dpo/margin_std": 27.579925537109375,
|
|
"step": 205
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -31.69842529296875,
|
|
"KL/mean": -37.65776443481445,
|
|
"KL/rejected_KL_mean": -43.61709976196289,
|
|
"KL/std": 24.08857536315918,
|
|
"epoch": 0.31141345427059713,
|
|
"fcm_dpo/beta": 0.034101758152246475,
|
|
"fcm_dpo/delta": -0.0069284625351428986,
|
|
"fcm_dpo/margin": 11.918676376342773,
|
|
"fcm_dpo/q_t": 0.41744327545166016,
|
|
"grad_norm": 19.496170043945312,
|
|
"learning_rate": 4.363161124189387e-07,
|
|
"logits/chosen": 0.5401093363761902,
|
|
"logits/rejected": 0.5227783918380737,
|
|
"logps/chosen": -99.34390258789062,
|
|
"logps/ref_chosen": -67.64547729492188,
|
|
"logps/ref_rejected": -79.89584350585938,
|
|
"logps/rejected": -123.512939453125,
|
|
"loss": 1.2359,
|
|
"margin_dpo/margin_mean": 11.918676376342773,
|
|
"margin_dpo/margin_std": 28.636157989501953,
|
|
"step": 206
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -37.1961669921875,
|
|
"KL/mean": -45.79343795776367,
|
|
"KL/rejected_KL_mean": -54.39070129394531,
|
|
"KL/std": 25.222087860107422,
|
|
"epoch": 0.3129251700680272,
|
|
"fcm_dpo/beta": 0.03351462259888649,
|
|
"fcm_dpo/delta": -0.1816268265247345,
|
|
"fcm_dpo/margin": 17.194538116455078,
|
|
"fcm_dpo/q_t": 0.38177117705345154,
|
|
"grad_norm": 17.590639114379883,
|
|
"learning_rate": 4.3543189596998986e-07,
|
|
"logits/chosen": 0.4621313214302063,
|
|
"logits/rejected": 0.3907325565814972,
|
|
"logps/chosen": -104.86035919189453,
|
|
"logps/ref_chosen": -67.66419219970703,
|
|
"logps/ref_rejected": -85.10249328613281,
|
|
"logps/rejected": -139.49319458007812,
|
|
"loss": 1.0548,
|
|
"margin_dpo/margin_mean": 17.194538116455078,
|
|
"margin_dpo/margin_std": 26.133255004882812,
|
|
"step": 207
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -30.363033294677734,
|
|
"KL/mean": -34.847679138183594,
|
|
"KL/rejected_KL_mean": -39.33232116699219,
|
|
"KL/std": 23.218036651611328,
|
|
"epoch": 0.3144368858654573,
|
|
"fcm_dpo/beta": 0.033614836633205414,
|
|
"fcm_dpo/delta": 0.1000470221042633,
|
|
"fcm_dpo/margin": 8.969287872314453,
|
|
"fcm_dpo/q_t": 0.437006413936615,
|
|
"grad_norm": 24.3613338470459,
|
|
"learning_rate": 4.3454249259229664e-07,
|
|
"logits/chosen": 0.46849048137664795,
|
|
"logits/rejected": 0.4409424066543579,
|
|
"logps/chosen": -88.09474182128906,
|
|
"logps/ref_chosen": -57.731712341308594,
|
|
"logps/ref_rejected": -74.19276428222656,
|
|
"logps/rejected": -113.52508544921875,
|
|
"loss": 1.2876,
|
|
"margin_dpo/margin_mean": 8.96928882598877,
|
|
"margin_dpo/margin_std": 26.18239974975586,
|
|
"step": 208
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -31.021589279174805,
|
|
"KL/mean": -41.59681701660156,
|
|
"KL/rejected_KL_mean": -52.17204284667969,
|
|
"KL/std": 26.97199249267578,
|
|
"epoch": 0.31594860166288735,
|
|
"fcm_dpo/beta": 0.03296378627419472,
|
|
"fcm_dpo/delta": -0.30783510208129883,
|
|
"fcm_dpo/margin": 21.150447845458984,
|
|
"fcm_dpo/q_t": 0.3560887277126312,
|
|
"grad_norm": 20.026927947998047,
|
|
"learning_rate": 4.336479271643833e-07,
|
|
"logits/chosen": 0.4072892665863037,
|
|
"logits/rejected": 0.35210084915161133,
|
|
"logps/chosen": -99.57167053222656,
|
|
"logps/ref_chosen": -68.55007934570312,
|
|
"logps/ref_rejected": -87.90541076660156,
|
|
"logps/rejected": -140.07745361328125,
|
|
"loss": 1.0303,
|
|
"margin_dpo/margin_mean": 21.150447845458984,
|
|
"margin_dpo/margin_std": 31.4323673248291,
|
|
"step": 209
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -30.996309280395508,
|
|
"KL/mean": -41.185943603515625,
|
|
"KL/rejected_KL_mean": -51.37557601928711,
|
|
"KL/std": 28.448989868164062,
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 0.03215660899877548,
|
|
"fcm_dpo/delta": -0.2635525166988373,
|
|
"fcm_dpo/margin": 20.379261016845703,
|
|
"fcm_dpo/q_t": 0.3656451404094696,
|
|
"grad_norm": 18.18603515625,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 0.5226341485977173,
|
|
"logits/rejected": 0.4185715317726135,
|
|
"logps/chosen": -88.26458740234375,
|
|
"logps/ref_chosen": -57.268272399902344,
|
|
"logps/ref_rejected": -85.72807312011719,
|
|
"logps/rejected": -137.10365295410156,
|
|
"loss": 1.0144,
|
|
"margin_dpo/margin_mean": 20.379261016845703,
|
|
"margin_dpo/margin_std": 28.25695037841797,
|
|
"step": 210
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -28.313474655151367,
|
|
"KL/mean": -38.20074462890625,
|
|
"KL/rejected_KL_mean": -48.08802032470703,
|
|
"KL/std": 25.332550048828125,
|
|
"epoch": 0.31897203325774753,
|
|
"fcm_dpo/beta": 0.03140837699174881,
|
|
"fcm_dpo/delta": -0.22785824537277222,
|
|
"fcm_dpo/margin": 19.7745418548584,
|
|
"fcm_dpo/q_t": 0.3721453845500946,
|
|
"grad_norm": 20.554920196533203,
|
|
"learning_rate": 4.3184341039326217e-07,
|
|
"logits/chosen": 0.5314611792564392,
|
|
"logits/rejected": 0.42976200580596924,
|
|
"logps/chosen": -81.95418548583984,
|
|
"logps/ref_chosen": -53.640708923339844,
|
|
"logps/ref_rejected": -93.0387954711914,
|
|
"logps/rejected": -141.12681579589844,
|
|
"loss": 1.0074,
|
|
"margin_dpo/margin_mean": 19.774539947509766,
|
|
"margin_dpo/margin_std": 26.591251373291016,
|
|
"step": 211
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -32.53465270996094,
|
|
"KL/mean": -42.73491287231445,
|
|
"KL/rejected_KL_mean": -52.935184478759766,
|
|
"KL/std": 28.05126953125,
|
|
"epoch": 0.3204837490551776,
|
|
"fcm_dpo/beta": 0.030590374022722244,
|
|
"fcm_dpo/delta": -0.23131054639816284,
|
|
"fcm_dpo/margin": 20.40053367614746,
|
|
"fcm_dpo/q_t": 0.36764803528785706,
|
|
"grad_norm": 15.261198043823242,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 0.5394423007965088,
|
|
"logits/rejected": 0.4615115523338318,
|
|
"logps/chosen": -89.90139770507812,
|
|
"logps/ref_chosen": -57.36674499511719,
|
|
"logps/ref_rejected": -79.89643096923828,
|
|
"logps/rejected": -132.8316192626953,
|
|
"loss": 1.035,
|
|
"margin_dpo/margin_mean": 20.40053367614746,
|
|
"margin_dpo/margin_std": 29.40656280517578,
|
|
"step": 212
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -25.58594512939453,
|
|
"KL/mean": -37.33778381347656,
|
|
"KL/rejected_KL_mean": -49.08962631225586,
|
|
"KL/std": 28.459300994873047,
|
|
"epoch": 0.3219954648526077,
|
|
"fcm_dpo/beta": 0.02963617816567421,
|
|
"fcm_dpo/delta": -0.3085259199142456,
|
|
"fcm_dpo/margin": 23.503684997558594,
|
|
"fcm_dpo/q_t": 0.3557588458061218,
|
|
"grad_norm": 14.419411659240723,
|
|
"learning_rate": 4.3001854756006724e-07,
|
|
"logits/chosen": 0.4899771511554718,
|
|
"logits/rejected": 0.46523317694664,
|
|
"logps/chosen": -90.80706024169922,
|
|
"logps/ref_chosen": -65.22111511230469,
|
|
"logps/ref_rejected": -80.1810302734375,
|
|
"logps/rejected": -129.27066040039062,
|
|
"loss": 0.9921,
|
|
"margin_dpo/margin_mean": 23.503684997558594,
|
|
"margin_dpo/margin_std": 31.640892028808594,
|
|
"step": 213
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -30.26761817932129,
|
|
"KL/mean": -42.18492889404297,
|
|
"KL/rejected_KL_mean": -54.10224151611328,
|
|
"KL/std": 28.68476104736328,
|
|
"epoch": 0.3235071806500378,
|
|
"fcm_dpo/beta": 0.02895916998386383,
|
|
"fcm_dpo/delta": -0.30005916953086853,
|
|
"fcm_dpo/margin": 23.834623336791992,
|
|
"fcm_dpo/q_t": 0.3596445918083191,
|
|
"grad_norm": 18.361404418945312,
|
|
"learning_rate": 4.290985500881143e-07,
|
|
"logits/chosen": 0.37772849202156067,
|
|
"logits/rejected": 0.3589029312133789,
|
|
"logps/chosen": -91.55995178222656,
|
|
"logps/ref_chosen": -61.292327880859375,
|
|
"logps/ref_rejected": -67.69841003417969,
|
|
"logps/rejected": -121.80065155029297,
|
|
"loss": 1.0059,
|
|
"margin_dpo/margin_mean": 23.834623336791992,
|
|
"margin_dpo/margin_std": 32.449485778808594,
|
|
"step": 214
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -35.39847946166992,
|
|
"KL/mean": -47.391883850097656,
|
|
"KL/rejected_KL_mean": -59.385284423828125,
|
|
"KL/std": 30.048267364501953,
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.028154313564300537,
|
|
"fcm_dpo/delta": -0.28452175855636597,
|
|
"fcm_dpo/margin": 23.986801147460938,
|
|
"fcm_dpo/q_t": 0.3634524345397949,
|
|
"grad_norm": 16.6279239654541,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 0.42392605543136597,
|
|
"logits/rejected": 0.30763229727745056,
|
|
"logps/chosen": -99.26761627197266,
|
|
"logps/ref_chosen": -63.869136810302734,
|
|
"logps/ref_rejected": -98.7657241821289,
|
|
"logps/rejected": -158.1510009765625,
|
|
"loss": 1.0254,
|
|
"margin_dpo/margin_mean": 23.986801147460938,
|
|
"margin_dpo/margin_std": 33.91448211669922,
|
|
"step": 215
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -30.46402359008789,
|
|
"KL/mean": -43.171348571777344,
|
|
"KL/rejected_KL_mean": -55.8786735534668,
|
|
"KL/std": 29.37746238708496,
|
|
"epoch": 0.32653061224489793,
|
|
"fcm_dpo/beta": 0.02718636393547058,
|
|
"fcm_dpo/delta": -0.3014890253543854,
|
|
"fcm_dpo/margin": 25.414648056030273,
|
|
"fcm_dpo/q_t": 0.3515187203884125,
|
|
"grad_norm": 23.37505531311035,
|
|
"learning_rate": 4.2724355170431247e-07,
|
|
"logits/chosen": 0.5613178014755249,
|
|
"logits/rejected": 0.46850571036338806,
|
|
"logps/chosen": -98.28897857666016,
|
|
"logps/ref_chosen": -67.824951171875,
|
|
"logps/ref_rejected": -96.40231323242188,
|
|
"logps/rejected": -152.28099060058594,
|
|
"loss": 0.9509,
|
|
"margin_dpo/margin_mean": 25.414648056030273,
|
|
"margin_dpo/margin_std": 29.654399871826172,
|
|
"step": 216
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -34.42728805541992,
|
|
"KL/mean": -47.74620819091797,
|
|
"KL/rejected_KL_mean": -61.06512451171875,
|
|
"KL/std": 31.195194244384766,
|
|
"epoch": 0.328042328042328,
|
|
"fcm_dpo/beta": 0.0263175331056118,
|
|
"fcm_dpo/delta": -0.3127400279045105,
|
|
"fcm_dpo/margin": 26.637840270996094,
|
|
"fcm_dpo/q_t": 0.3537965714931488,
|
|
"grad_norm": 14.712310791015625,
|
|
"learning_rate": 4.26308602680756e-07,
|
|
"logits/chosen": 0.5112285614013672,
|
|
"logits/rejected": 0.40257206559181213,
|
|
"logps/chosen": -94.93228149414062,
|
|
"logps/ref_chosen": -60.5049934387207,
|
|
"logps/ref_rejected": -84.26618194580078,
|
|
"logps/rejected": -145.331298828125,
|
|
"loss": 0.9607,
|
|
"margin_dpo/margin_mean": 26.637840270996094,
|
|
"margin_dpo/margin_std": 32.72651672363281,
|
|
"step": 217
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -37.614295959472656,
|
|
"KL/mean": -45.84656524658203,
|
|
"KL/rejected_KL_mean": -54.078826904296875,
|
|
"KL/std": 30.16741180419922,
|
|
"epoch": 0.3295540438397581,
|
|
"fcm_dpo/beta": 0.026213286444544792,
|
|
"fcm_dpo/delta": -0.03296023607254028,
|
|
"fcm_dpo/margin": 16.464534759521484,
|
|
"fcm_dpo/q_t": 0.4088500738143921,
|
|
"grad_norm": 17.38701057434082,
|
|
"learning_rate": 4.253687219265803e-07,
|
|
"logits/chosen": 0.36019447445869446,
|
|
"logits/rejected": 0.35970914363861084,
|
|
"logps/chosen": -108.20861053466797,
|
|
"logps/ref_chosen": -70.59431457519531,
|
|
"logps/ref_rejected": -73.89038848876953,
|
|
"logps/rejected": -127.9692153930664,
|
|
"loss": 1.2084,
|
|
"margin_dpo/margin_mean": 16.464534759521484,
|
|
"margin_dpo/margin_std": 35.91667175292969,
|
|
"step": 218
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -35.46495819091797,
|
|
"KL/mean": -44.257240295410156,
|
|
"KL/rejected_KL_mean": -53.049522399902344,
|
|
"KL/std": 30.532825469970703,
|
|
"epoch": 0.3310657596371882,
|
|
"fcm_dpo/beta": 0.025908175855875015,
|
|
"fcm_dpo/delta": -0.05717352032661438,
|
|
"fcm_dpo/margin": 17.58456802368164,
|
|
"fcm_dpo/q_t": 0.40124112367630005,
|
|
"grad_norm": 16.024593353271484,
|
|
"learning_rate": 4.2442393573227043e-07,
|
|
"logits/chosen": 0.4679608643054962,
|
|
"logits/rejected": 0.42435723543167114,
|
|
"logps/chosen": -95.95590209960938,
|
|
"logps/ref_chosen": -60.490943908691406,
|
|
"logps/ref_rejected": -75.85001373291016,
|
|
"logps/rejected": -128.8995361328125,
|
|
"loss": 1.1147,
|
|
"margin_dpo/margin_mean": 17.58456802368164,
|
|
"margin_dpo/margin_std": 29.443004608154297,
|
|
"step": 219
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -29.79060173034668,
|
|
"KL/mean": -40.15729522705078,
|
|
"KL/rejected_KL_mean": -50.523990631103516,
|
|
"KL/std": 31.198163986206055,
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.025561682879924774,
|
|
"fcm_dpo/delta": -0.1347234696149826,
|
|
"fcm_dpo/margin": 20.733386993408203,
|
|
"fcm_dpo/q_t": 0.39098191261291504,
|
|
"grad_norm": 16.73904800415039,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 0.5676250457763672,
|
|
"logits/rejected": 0.4984397888183594,
|
|
"logps/chosen": -74.80400085449219,
|
|
"logps/ref_chosen": -45.013397216796875,
|
|
"logps/ref_rejected": -70.49369812011719,
|
|
"logps/rejected": -121.01769256591797,
|
|
"loss": 1.1093,
|
|
"margin_dpo/margin_mean": 20.733386993408203,
|
|
"margin_dpo/margin_std": 35.2826042175293,
|
|
"step": 220
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -29.598405838012695,
|
|
"KL/mean": -41.12885284423828,
|
|
"KL/rejected_KL_mean": -52.6593017578125,
|
|
"KL/std": 30.94803810119629,
|
|
"epoch": 0.3340891912320484,
|
|
"fcm_dpo/beta": 0.02524815872311592,
|
|
"fcm_dpo/delta": -0.1875613033771515,
|
|
"fcm_dpo/margin": 23.06089973449707,
|
|
"fcm_dpo/q_t": 0.38034531474113464,
|
|
"grad_norm": 18.37561798095703,
|
|
"learning_rate": 4.22519752870528e-07,
|
|
"logits/chosen": 0.5222690105438232,
|
|
"logits/rejected": 0.44172531366348267,
|
|
"logps/chosen": -88.69424438476562,
|
|
"logps/ref_chosen": -59.09584045410156,
|
|
"logps/ref_rejected": -88.64388275146484,
|
|
"logps/rejected": -141.30319213867188,
|
|
"loss": 1.0541,
|
|
"margin_dpo/margin_mean": 23.06089973449707,
|
|
"margin_dpo/margin_std": 35.15159606933594,
|
|
"step": 221
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -30.148216247558594,
|
|
"KL/mean": -45.70598220825195,
|
|
"KL/rejected_KL_mean": -61.26374053955078,
|
|
"KL/std": 35.202728271484375,
|
|
"epoch": 0.3356009070294785,
|
|
"fcm_dpo/beta": 0.02441229112446308,
|
|
"fcm_dpo/delta": -0.3738223910331726,
|
|
"fcm_dpo/margin": 31.115528106689453,
|
|
"fcm_dpo/q_t": 0.3424602150917053,
|
|
"grad_norm": 15.55783462524414,
|
|
"learning_rate": 4.2156040946718343e-07,
|
|
"logits/chosen": 0.5593434572219849,
|
|
"logits/rejected": 0.466124027967453,
|
|
"logps/chosen": -86.14590454101562,
|
|
"logps/ref_chosen": -55.9976921081543,
|
|
"logps/ref_rejected": -111.94727325439453,
|
|
"logps/rejected": -173.2110137939453,
|
|
"loss": 0.934,
|
|
"margin_dpo/margin_mean": 31.115528106689453,
|
|
"margin_dpo/margin_std": 36.53013610839844,
|
|
"step": 222
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -35.494258880615234,
|
|
"KL/mean": -47.961822509765625,
|
|
"KL/rejected_KL_mean": -60.42938995361328,
|
|
"KL/std": 33.96479797363281,
|
|
"epoch": 0.3371126228269085,
|
|
"fcm_dpo/beta": 0.023781482130289078,
|
|
"fcm_dpo/delta": -0.19938182830810547,
|
|
"fcm_dpo/margin": 24.93512725830078,
|
|
"fcm_dpo/q_t": 0.3747786283493042,
|
|
"grad_norm": 16.416410446166992,
|
|
"learning_rate": 4.2059626715039065e-07,
|
|
"logits/chosen": 0.5625898838043213,
|
|
"logits/rejected": 0.5028936862945557,
|
|
"logps/chosen": -95.38568115234375,
|
|
"logps/ref_chosen": -59.891422271728516,
|
|
"logps/ref_rejected": -86.28954315185547,
|
|
"logps/rejected": -146.71893310546875,
|
|
"loss": 1.0215,
|
|
"margin_dpo/margin_mean": 24.93512725830078,
|
|
"margin_dpo/margin_std": 33.791534423828125,
|
|
"step": 223
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -40.67298126220703,
|
|
"KL/mean": -47.8661994934082,
|
|
"KL/rejected_KL_mean": -55.059410095214844,
|
|
"KL/std": 33.604251861572266,
|
|
"epoch": 0.3386243386243386,
|
|
"fcm_dpo/beta": 0.023779569193720818,
|
|
"fcm_dpo/delta": 0.05891609564423561,
|
|
"fcm_dpo/margin": 14.386428833007812,
|
|
"fcm_dpo/q_t": 0.42780160903930664,
|
|
"grad_norm": 20.349035263061523,
|
|
"learning_rate": 4.1962735288928304e-07,
|
|
"logits/chosen": 0.5375025272369385,
|
|
"logits/rejected": 0.5148888230323792,
|
|
"logps/chosen": -104.71762084960938,
|
|
"logps/ref_chosen": -64.04463195800781,
|
|
"logps/ref_rejected": -75.05450439453125,
|
|
"logps/rejected": -130.11390686035156,
|
|
"loss": 1.2289,
|
|
"margin_dpo/margin_mean": 14.386428833007812,
|
|
"margin_dpo/margin_std": 34.92803192138672,
|
|
"step": 224
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -38.989234924316406,
|
|
"KL/mean": -52.760276794433594,
|
|
"KL/rejected_KL_mean": -66.53131103515625,
|
|
"KL/std": 37.69361114501953,
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.023313988000154495,
|
|
"fcm_dpo/delta": -0.25164204835891724,
|
|
"fcm_dpo/margin": 27.542081832885742,
|
|
"fcm_dpo/q_t": 0.3650243282318115,
|
|
"grad_norm": 15.8230562210083,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 0.5628567934036255,
|
|
"logits/rejected": 0.443564236164093,
|
|
"logps/chosen": -105.08505249023438,
|
|
"logps/ref_chosen": -66.0958251953125,
|
|
"logps/ref_rejected": -97.68675231933594,
|
|
"logps/rejected": -164.2180633544922,
|
|
"loss": 0.9918,
|
|
"margin_dpo/margin_mean": 27.542081832885742,
|
|
"margin_dpo/margin_std": 35.07389831542969,
|
|
"step": 225
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -35.82355499267578,
|
|
"KL/mean": -46.651546478271484,
|
|
"KL/rejected_KL_mean": -57.47953796386719,
|
|
"KL/std": 33.90411376953125,
|
|
"epoch": 0.3416477702191988,
|
|
"fcm_dpo/beta": 0.02311975695192814,
|
|
"fcm_dpo/delta": -0.10322263836860657,
|
|
"fcm_dpo/margin": 21.655977249145508,
|
|
"fcm_dpo/q_t": 0.393219918012619,
|
|
"grad_norm": 15.993927955627441,
|
|
"learning_rate": 4.176753170773052e-07,
|
|
"logits/chosen": 0.5577676296234131,
|
|
"logits/rejected": 0.5109608173370361,
|
|
"logps/chosen": -87.24043273925781,
|
|
"logps/ref_chosen": -51.4168701171875,
|
|
"logps/ref_rejected": -66.30068969726562,
|
|
"logps/rejected": -123.78022766113281,
|
|
"loss": 1.136,
|
|
"margin_dpo/margin_mean": 21.655975341796875,
|
|
"margin_dpo/margin_std": 39.516639709472656,
|
|
"step": 226
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -38.44837951660156,
|
|
"KL/mean": -49.849884033203125,
|
|
"KL/rejected_KL_mean": -61.25138473510742,
|
|
"KL/std": 37.103668212890625,
|
|
"epoch": 0.3431594860166289,
|
|
"fcm_dpo/beta": 0.022783003747463226,
|
|
"fcm_dpo/delta": -0.1228446364402771,
|
|
"fcm_dpo/margin": 22.803009033203125,
|
|
"fcm_dpo/q_t": 0.3919198513031006,
|
|
"grad_norm": 17.734506607055664,
|
|
"learning_rate": 4.166922501290729e-07,
|
|
"logits/chosen": 0.6010725498199463,
|
|
"logits/rejected": 0.5616019368171692,
|
|
"logps/chosen": -96.43815612792969,
|
|
"logps/ref_chosen": -57.989776611328125,
|
|
"logps/ref_rejected": -75.05464172363281,
|
|
"logps/rejected": -136.3060302734375,
|
|
"loss": 1.1224,
|
|
"margin_dpo/margin_mean": 22.803007125854492,
|
|
"margin_dpo/margin_std": 40.790489196777344,
|
|
"step": 227
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -40.48957061767578,
|
|
"KL/mean": -53.27311706542969,
|
|
"KL/rejected_KL_mean": -66.05665588378906,
|
|
"KL/std": 35.14244842529297,
|
|
"epoch": 0.34467120181405897,
|
|
"fcm_dpo/beta": 0.022472485899925232,
|
|
"fcm_dpo/delta": -0.1795240342617035,
|
|
"fcm_dpo/margin": 25.567081451416016,
|
|
"fcm_dpo/q_t": 0.3769599199295044,
|
|
"grad_norm": 15.979297637939453,
|
|
"learning_rate": 4.1570452044027405e-07,
|
|
"logits/chosen": 0.5634814500808716,
|
|
"logits/rejected": 0.4821271300315857,
|
|
"logps/chosen": -96.04893493652344,
|
|
"logps/ref_chosen": -55.55936813354492,
|
|
"logps/ref_rejected": -77.02364349365234,
|
|
"logps/rejected": -143.08030700683594,
|
|
"loss": 1.0413,
|
|
"margin_dpo/margin_mean": 25.567081451416016,
|
|
"margin_dpo/margin_std": 36.318824768066406,
|
|
"step": 228
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -34.7919921875,
|
|
"KL/mean": -45.6795654296875,
|
|
"KL/rejected_KL_mean": -56.567138671875,
|
|
"KL/std": 34.212554931640625,
|
|
"epoch": 0.34618291761148906,
|
|
"fcm_dpo/beta": 0.022183677181601524,
|
|
"fcm_dpo/delta": -0.08511705696582794,
|
|
"fcm_dpo/margin": 21.7751407623291,
|
|
"fcm_dpo/q_t": 0.39761051535606384,
|
|
"grad_norm": 25.149538040161133,
|
|
"learning_rate": 4.147121556398312e-07,
|
|
"logits/chosen": 0.6579074859619141,
|
|
"logits/rejected": 0.5816039443016052,
|
|
"logps/chosen": -85.58665466308594,
|
|
"logps/ref_chosen": -50.79466247558594,
|
|
"logps/ref_rejected": -78.4474105834961,
|
|
"logps/rejected": -135.01455688476562,
|
|
"loss": 1.1482,
|
|
"margin_dpo/margin_mean": 21.77513885498047,
|
|
"margin_dpo/margin_std": 41.5511474609375,
|
|
"step": 229
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -38.13761901855469,
|
|
"KL/mean": -50.8995361328125,
|
|
"KL/rejected_KL_mean": -63.661460876464844,
|
|
"KL/std": 38.15025329589844,
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.02197270840406418,
|
|
"fcm_dpo/delta": -0.16561289131641388,
|
|
"fcm_dpo/margin": 25.523839950561523,
|
|
"fcm_dpo/q_t": 0.3808749318122864,
|
|
"grad_norm": 15.51723575592041,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 0.5493189096450806,
|
|
"logits/rejected": 0.5497379302978516,
|
|
"logps/chosen": -94.8668441772461,
|
|
"logps/ref_chosen": -56.729225158691406,
|
|
"logps/ref_rejected": -62.99180603027344,
|
|
"logps/rejected": -126.65326690673828,
|
|
"loss": 1.0569,
|
|
"margin_dpo/margin_mean": 25.523839950561523,
|
|
"margin_dpo/margin_std": 37.60498046875,
|
|
"step": 230
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -42.99673080444336,
|
|
"KL/mean": -62.38915252685547,
|
|
"KL/rejected_KL_mean": -81.78157043457031,
|
|
"KL/std": 37.13996887207031,
|
|
"epoch": 0.3492063492063492,
|
|
"fcm_dpo/beta": 0.021028637886047363,
|
|
"fcm_dpo/delta": -0.4346139132976532,
|
|
"fcm_dpo/margin": 38.78483581542969,
|
|
"fcm_dpo/q_t": 0.32425105571746826,
|
|
"grad_norm": 15.981878280639648,
|
|
"learning_rate": 4.1271363186719835e-07,
|
|
"logits/chosen": 0.5063939094543457,
|
|
"logits/rejected": 0.5007983446121216,
|
|
"logps/chosen": -115.59382629394531,
|
|
"logps/ref_chosen": -72.59709930419922,
|
|
"logps/ref_rejected": -86.2322998046875,
|
|
"logps/rejected": -168.0138702392578,
|
|
"loss": 0.8772,
|
|
"margin_dpo/margin_mean": 38.78483963012695,
|
|
"margin_dpo/margin_std": 38.89888000488281,
|
|
"step": 231
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -43.787479400634766,
|
|
"KL/mean": -56.7198371887207,
|
|
"KL/rejected_KL_mean": -69.65219116210938,
|
|
"KL/std": 41.07295227050781,
|
|
"epoch": 0.3507180650037793,
|
|
"fcm_dpo/beta": 0.02062969096004963,
|
|
"fcm_dpo/delta": -0.13728736340999603,
|
|
"fcm_dpo/margin": 25.86471176147461,
|
|
"fcm_dpo/q_t": 0.39167624711990356,
|
|
"grad_norm": 16.32636833190918,
|
|
"learning_rate": 4.1170752879801436e-07,
|
|
"logits/chosen": 0.4878928065299988,
|
|
"logits/rejected": 0.45853301882743835,
|
|
"logps/chosen": -111.906005859375,
|
|
"logps/ref_chosen": -68.1185302734375,
|
|
"logps/ref_rejected": -83.79415893554688,
|
|
"logps/rejected": -153.44635009765625,
|
|
"loss": 1.1217,
|
|
"margin_dpo/margin_mean": 25.86471176147461,
|
|
"margin_dpo/margin_std": 46.780723571777344,
|
|
"step": 232
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -50.1059684753418,
|
|
"KL/mean": -59.9688720703125,
|
|
"KL/rejected_KL_mean": -69.83177185058594,
|
|
"KL/std": 38.36199188232422,
|
|
"epoch": 0.35222978080120937,
|
|
"fcm_dpo/beta": 0.020316744223237038,
|
|
"fcm_dpo/delta": -0.12102457880973816,
|
|
"fcm_dpo/margin": 19.72581672668457,
|
|
"fcm_dpo/q_t": 0.4155174791812897,
|
|
"grad_norm": 16.09383773803711,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": 0.5895452499389648,
|
|
"logits/rejected": 0.5286322832107544,
|
|
"logps/chosen": -105.17611694335938,
|
|
"logps/ref_chosen": -55.070152282714844,
|
|
"logps/ref_rejected": -66.61845397949219,
|
|
"logps/rejected": -136.45022583007812,
|
|
"loss": 1.1703,
|
|
"margin_dpo/margin_mean": 19.72581672668457,
|
|
"margin_dpo/margin_std": 37.7828369140625,
|
|
"step": 233
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -44.98041915893555,
|
|
"KL/mean": -54.64990997314453,
|
|
"KL/rejected_KL_mean": -64.31939697265625,
|
|
"KL/std": 37.067466735839844,
|
|
"epoch": 0.35374149659863946,
|
|
"fcm_dpo/beta": 0.020170819014310837,
|
|
"fcm_dpo/delta": -0.1446922868490219,
|
|
"fcm_dpo/margin": 19.33897590637207,
|
|
"fcm_dpo/q_t": 0.4179201126098633,
|
|
"grad_norm": 22.129478454589844,
|
|
"learning_rate": 4.09681781007452e-07,
|
|
"logits/chosen": 0.49770036339759827,
|
|
"logits/rejected": 0.4915820062160492,
|
|
"logps/chosen": -100.90631103515625,
|
|
"logps/ref_chosen": -55.92589569091797,
|
|
"logps/ref_rejected": -51.11608123779297,
|
|
"logps/rejected": -115.43547821044922,
|
|
"loss": 1.2043,
|
|
"margin_dpo/margin_mean": 19.33897590637207,
|
|
"margin_dpo/margin_std": 40.08860778808594,
|
|
"step": 234
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -39.90374755859375,
|
|
"KL/mean": -57.16230773925781,
|
|
"KL/rejected_KL_mean": -74.42086791992188,
|
|
"KL/std": 40.59101104736328,
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.019624676555395126,
|
|
"fcm_dpo/delta": -0.2868325710296631,
|
|
"fcm_dpo/margin": 34.517120361328125,
|
|
"fcm_dpo/q_t": 0.3528672456741333,
|
|
"grad_norm": 15.841700553894043,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 0.564241886138916,
|
|
"logits/rejected": 0.5506614446640015,
|
|
"logps/chosen": -104.44346618652344,
|
|
"logps/ref_chosen": -64.53972625732422,
|
|
"logps/ref_rejected": -77.69151306152344,
|
|
"logps/rejected": -152.1123809814453,
|
|
"loss": 0.9444,
|
|
"margin_dpo/margin_mean": 34.517120361328125,
|
|
"margin_dpo/margin_std": 38.370826721191406,
|
|
"step": 235
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -60.181358337402344,
|
|
"KL/mean": -73.05296325683594,
|
|
"KL/rejected_KL_mean": -85.924560546875,
|
|
"KL/std": 39.53045654296875,
|
|
"epoch": 0.35676492819349964,
|
|
"fcm_dpo/beta": 0.019282015040516853,
|
|
"fcm_dpo/delta": -0.09912340342998505,
|
|
"fcm_dpo/margin": 25.74319839477539,
|
|
"fcm_dpo/q_t": 0.39322900772094727,
|
|
"grad_norm": 16.594552993774414,
|
|
"learning_rate": 4.076381667711306e-07,
|
|
"logits/chosen": 0.534003734588623,
|
|
"logits/rejected": 0.5209078788757324,
|
|
"logps/chosen": -131.33609008789062,
|
|
"logps/ref_chosen": -71.15473937988281,
|
|
"logps/ref_rejected": -84.88541412353516,
|
|
"logps/rejected": -170.80996704101562,
|
|
"loss": 1.1156,
|
|
"margin_dpo/margin_mean": 25.74319839477539,
|
|
"margin_dpo/margin_std": 44.24085235595703,
|
|
"step": 236
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -49.93571472167969,
|
|
"KL/mean": -65.59040832519531,
|
|
"KL/rejected_KL_mean": -81.24510192871094,
|
|
"KL/std": 39.86061096191406,
|
|
"epoch": 0.35827664399092973,
|
|
"fcm_dpo/beta": 0.019004706293344498,
|
|
"fcm_dpo/delta": -0.2008163332939148,
|
|
"fcm_dpo/margin": 31.30939292907715,
|
|
"fcm_dpo/q_t": 0.37146705389022827,
|
|
"grad_norm": 18.890247344970703,
|
|
"learning_rate": 4.066097311132753e-07,
|
|
"logits/chosen": 0.5791411399841309,
|
|
"logits/rejected": 0.5694358348846436,
|
|
"logps/chosen": -126.07772827148438,
|
|
"logps/ref_chosen": -76.14201354980469,
|
|
"logps/ref_rejected": -80.88479614257812,
|
|
"logps/rejected": -162.12989807128906,
|
|
"loss": 1.0454,
|
|
"margin_dpo/margin_mean": 31.30939483642578,
|
|
"margin_dpo/margin_std": 44.860679626464844,
|
|
"step": 237
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -45.610618591308594,
|
|
"KL/mean": -61.096900939941406,
|
|
"KL/rejected_KL_mean": -76.58319091796875,
|
|
"KL/std": 41.32268524169922,
|
|
"epoch": 0.35978835978835977,
|
|
"fcm_dpo/beta": 0.018570806831121445,
|
|
"fcm_dpo/delta": -0.18135708570480347,
|
|
"fcm_dpo/margin": 30.97256088256836,
|
|
"fcm_dpo/q_t": 0.3766130208969116,
|
|
"grad_norm": 20.00119400024414,
|
|
"learning_rate": 4.0557691474458414e-07,
|
|
"logits/chosen": 0.5349264740943909,
|
|
"logits/rejected": 0.5261259078979492,
|
|
"logps/chosen": -114.49546813964844,
|
|
"logps/ref_chosen": -68.88484954833984,
|
|
"logps/ref_rejected": -75.8946304321289,
|
|
"logps/rejected": -152.47781372070312,
|
|
"loss": 1.0394,
|
|
"margin_dpo/margin_mean": 30.972562789916992,
|
|
"margin_dpo/margin_std": 43.140052795410156,
|
|
"step": 238
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -55.99228286743164,
|
|
"KL/mean": -71.22900390625,
|
|
"KL/rejected_KL_mean": -86.4657211303711,
|
|
"KL/std": 44.9942626953125,
|
|
"epoch": 0.36130007558578986,
|
|
"fcm_dpo/beta": 0.018381305038928986,
|
|
"fcm_dpo/delta": -0.16466887295246124,
|
|
"fcm_dpo/margin": 30.47344398498535,
|
|
"fcm_dpo/q_t": 0.3811022639274597,
|
|
"grad_norm": 18.84499740600586,
|
|
"learning_rate": 4.045397465551513e-07,
|
|
"logits/chosen": 0.6969434022903442,
|
|
"logits/rejected": 0.5538977980613708,
|
|
"logps/chosen": -112.76411437988281,
|
|
"logps/ref_chosen": -56.771827697753906,
|
|
"logps/ref_rejected": -116.23050689697266,
|
|
"logps/rejected": -202.69622802734375,
|
|
"loss": 1.0672,
|
|
"margin_dpo/margin_mean": 30.47344207763672,
|
|
"margin_dpo/margin_std": 46.13434600830078,
|
|
"step": 239
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -53.32081604003906,
|
|
"KL/mean": -72.77437591552734,
|
|
"KL/rejected_KL_mean": -92.22793579101562,
|
|
"KL/std": 45.232093811035156,
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.017842736095190048,
|
|
"fcm_dpo/delta": -0.30462610721588135,
|
|
"fcm_dpo/margin": 38.9071159362793,
|
|
"fcm_dpo/q_t": 0.3519837558269501,
|
|
"grad_norm": 13.985093116760254,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 0.5877687931060791,
|
|
"logits/rejected": 0.4893539845943451,
|
|
"logps/chosen": -106.6749267578125,
|
|
"logps/ref_chosen": -53.35411071777344,
|
|
"logps/ref_rejected": -80.12019348144531,
|
|
"logps/rejected": -172.34812927246094,
|
|
"loss": 0.9579,
|
|
"margin_dpo/margin_mean": 38.90711212158203,
|
|
"margin_dpo/margin_std": 46.387420654296875,
|
|
"step": 240
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -50.62466812133789,
|
|
"KL/mean": -63.334598541259766,
|
|
"KL/rejected_KL_mean": -76.04451751708984,
|
|
"KL/std": 41.78208923339844,
|
|
"epoch": 0.36432350718065004,
|
|
"fcm_dpo/beta": 0.01765056699514389,
|
|
"fcm_dpo/delta": -0.04979285970330238,
|
|
"fcm_dpo/margin": 25.41985321044922,
|
|
"fcm_dpo/q_t": 0.4046594500541687,
|
|
"grad_norm": 19.16240119934082,
|
|
"learning_rate": 4.0245247088227377e-07,
|
|
"logits/chosen": 0.5626074075698853,
|
|
"logits/rejected": 0.5247374773025513,
|
|
"logps/chosen": -122.52008056640625,
|
|
"logps/ref_chosen": -71.89541625976562,
|
|
"logps/ref_rejected": -83.03492736816406,
|
|
"logps/rejected": -159.07943725585938,
|
|
"loss": 1.1369,
|
|
"margin_dpo/margin_mean": 25.41985321044922,
|
|
"margin_dpo/margin_std": 46.09049606323242,
|
|
"step": 241
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -52.369239807128906,
|
|
"KL/mean": -68.80877685546875,
|
|
"KL/rejected_KL_mean": -85.24830627441406,
|
|
"KL/std": 44.740806579589844,
|
|
"epoch": 0.36583522297808013,
|
|
"fcm_dpo/beta": 0.017314117401838303,
|
|
"fcm_dpo/delta": -0.1754074990749359,
|
|
"fcm_dpo/margin": 32.879066467285156,
|
|
"fcm_dpo/q_t": 0.37907618284225464,
|
|
"grad_norm": 14.597370147705078,
|
|
"learning_rate": 4.0140242178441665e-07,
|
|
"logits/chosen": 0.5742692947387695,
|
|
"logits/rejected": 0.5533444285392761,
|
|
"logps/chosen": -110.29667663574219,
|
|
"logps/ref_chosen": -57.927433013916016,
|
|
"logps/ref_rejected": -67.838623046875,
|
|
"logps/rejected": -153.08692932128906,
|
|
"loss": 1.0464,
|
|
"margin_dpo/margin_mean": 32.879066467285156,
|
|
"margin_dpo/margin_std": 47.375030517578125,
|
|
"step": 242
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -50.97338104248047,
|
|
"KL/mean": -66.1138916015625,
|
|
"KL/rejected_KL_mean": -81.25440216064453,
|
|
"KL/std": 44.024742126464844,
|
|
"epoch": 0.3673469387755102,
|
|
"fcm_dpo/beta": 0.017144039273262024,
|
|
"fcm_dpo/delta": -0.12230625748634338,
|
|
"fcm_dpo/margin": 30.28102684020996,
|
|
"fcm_dpo/q_t": 0.3867276906967163,
|
|
"grad_norm": 17.810152053833008,
|
|
"learning_rate": 4.003481376353596e-07,
|
|
"logits/chosen": 0.5383376479148865,
|
|
"logits/rejected": 0.5490964651107788,
|
|
"logps/chosen": -125.25006103515625,
|
|
"logps/ref_chosen": -74.27667236328125,
|
|
"logps/ref_rejected": -73.24340057373047,
|
|
"logps/rejected": -154.497802734375,
|
|
"loss": 1.0671,
|
|
"margin_dpo/margin_mean": 30.281028747558594,
|
|
"margin_dpo/margin_std": 44.882232666015625,
|
|
"step": 243
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -48.637115478515625,
|
|
"KL/mean": -68.560302734375,
|
|
"KL/rejected_KL_mean": -88.48348999023438,
|
|
"KL/std": 43.82518768310547,
|
|
"epoch": 0.3688586545729403,
|
|
"fcm_dpo/beta": 0.016777563840150833,
|
|
"fcm_dpo/delta": -0.2773972749710083,
|
|
"fcm_dpo/margin": 39.84636688232422,
|
|
"fcm_dpo/q_t": 0.3547373414039612,
|
|
"grad_norm": 14.01524829864502,
|
|
"learning_rate": 3.9928964792569654e-07,
|
|
"logits/chosen": 0.5952280759811401,
|
|
"logits/rejected": 0.5085329413414001,
|
|
"logps/chosen": -102.00102233886719,
|
|
"logps/ref_chosen": -53.36390686035156,
|
|
"logps/ref_rejected": -71.10276794433594,
|
|
"logps/rejected": -159.5862579345703,
|
|
"loss": 0.9497,
|
|
"margin_dpo/margin_mean": 39.84636688232422,
|
|
"margin_dpo/margin_std": 44.31079864501953,
|
|
"step": 244
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -53.07337188720703,
|
|
"KL/mean": -76.02486419677734,
|
|
"KL/rejected_KL_mean": -98.97635650634766,
|
|
"KL/std": 45.43760681152344,
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.016174674034118652,
|
|
"fcm_dpo/delta": -0.35598015785217285,
|
|
"fcm_dpo/margin": 45.902984619140625,
|
|
"fcm_dpo/q_t": 0.33660876750946045,
|
|
"grad_norm": 22.613052368164062,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 0.6260331869125366,
|
|
"logits/rejected": 0.5986565351486206,
|
|
"logps/chosen": -124.26847839355469,
|
|
"logps/ref_chosen": -71.19510650634766,
|
|
"logps/ref_rejected": -80.76235961914062,
|
|
"logps/rejected": -179.73870849609375,
|
|
"loss": 0.8944,
|
|
"margin_dpo/margin_mean": 45.902984619140625,
|
|
"margin_dpo/margin_std": 45.24086380004883,
|
|
"step": 245
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -63.764801025390625,
|
|
"KL/mean": -80.96756744384766,
|
|
"KL/rejected_KL_mean": -98.17033386230469,
|
|
"KL/std": 45.43177032470703,
|
|
"epoch": 0.37188208616780044,
|
|
"fcm_dpo/beta": 0.015904948115348816,
|
|
"fcm_dpo/delta": -0.1512506753206253,
|
|
"fcm_dpo/margin": 34.40553283691406,
|
|
"fcm_dpo/q_t": 0.38361304998397827,
|
|
"grad_norm": 15.275558471679688,
|
|
"learning_rate": 3.971601703742932e-07,
|
|
"logits/chosen": 0.6260999441146851,
|
|
"logits/rejected": 0.5634744167327881,
|
|
"logps/chosen": -135.38584899902344,
|
|
"logps/ref_chosen": -71.62104797363281,
|
|
"logps/ref_rejected": -94.03392028808594,
|
|
"logps/rejected": -192.20425415039062,
|
|
"loss": 1.097,
|
|
"margin_dpo/margin_mean": 34.40553283691406,
|
|
"margin_dpo/margin_std": 56.88691329956055,
|
|
"step": 246
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -66.78804779052734,
|
|
"KL/mean": -77.9229736328125,
|
|
"KL/rejected_KL_mean": -89.05789184570312,
|
|
"KL/std": 44.57904052734375,
|
|
"epoch": 0.37339380196523053,
|
|
"fcm_dpo/beta": 0.015894196927547455,
|
|
"fcm_dpo/delta": 0.04651079326868057,
|
|
"fcm_dpo/margin": 22.26984214782715,
|
|
"fcm_dpo/q_t": 0.4219495952129364,
|
|
"grad_norm": 19.00952911376953,
|
|
"learning_rate": 3.960892420986177e-07,
|
|
"logits/chosen": 0.631608784198761,
|
|
"logits/rejected": 0.6215128898620605,
|
|
"logps/chosen": -146.8105926513672,
|
|
"logps/ref_chosen": -80.02254486083984,
|
|
"logps/ref_rejected": -89.22705841064453,
|
|
"logps/rejected": -178.28494262695312,
|
|
"loss": 1.208,
|
|
"margin_dpo/margin_mean": 22.269840240478516,
|
|
"margin_dpo/margin_std": 48.957366943359375,
|
|
"step": 247
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -57.070762634277344,
|
|
"KL/mean": -75.29137420654297,
|
|
"KL/rejected_KL_mean": -93.51199340820312,
|
|
"KL/std": 48.84654998779297,
|
|
"epoch": 0.3749055177626606,
|
|
"fcm_dpo/beta": 0.015692584216594696,
|
|
"fcm_dpo/delta": -0.17673294246196747,
|
|
"fcm_dpo/margin": 36.44123840332031,
|
|
"fcm_dpo/q_t": 0.38018378615379333,
|
|
"grad_norm": 17.344486236572266,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 0.587178111076355,
|
|
"logits/rejected": 0.643383264541626,
|
|
"logps/chosen": -122.44871520996094,
|
|
"logps/ref_chosen": -65.37796020507812,
|
|
"logps/ref_rejected": -61.365787506103516,
|
|
"logps/rejected": -154.87777709960938,
|
|
"loss": 1.0606,
|
|
"margin_dpo/margin_mean": 36.44123840332031,
|
|
"margin_dpo/margin_std": 54.921714782714844,
|
|
"step": 248
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -71.58647155761719,
|
|
"KL/mean": -78.58194732666016,
|
|
"KL/rejected_KL_mean": -85.57742309570312,
|
|
"KL/std": 45.291297912597656,
|
|
"epoch": 0.3764172335600907,
|
|
"fcm_dpo/beta": 0.015605769120156765,
|
|
"fcm_dpo/delta": 0.012846356257796288,
|
|
"fcm_dpo/margin": 13.990938186645508,
|
|
"fcm_dpo/q_t": 0.4508556127548218,
|
|
"grad_norm": 20.001558303833008,
|
|
"learning_rate": 3.9393515632731094e-07,
|
|
"logits/chosen": 0.5763317942619324,
|
|
"logits/rejected": 0.6159162521362305,
|
|
"logps/chosen": -146.18792724609375,
|
|
"logps/ref_chosen": -74.60145568847656,
|
|
"logps/ref_rejected": -63.79338455200195,
|
|
"logps/rejected": -149.3708038330078,
|
|
"loss": 1.3406,
|
|
"margin_dpo/margin_mean": 13.99094009399414,
|
|
"margin_dpo/margin_std": 51.653045654296875,
|
|
"step": 249
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -62.29826354980469,
|
|
"KL/mean": -81.17459106445312,
|
|
"KL/rejected_KL_mean": -100.0509262084961,
|
|
"KL/std": 49.769805908203125,
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.015448343940079212,
|
|
"fcm_dpo/delta": -0.18853572010993958,
|
|
"fcm_dpo/margin": 37.752655029296875,
|
|
"fcm_dpo/q_t": 0.3732265830039978,
|
|
"grad_norm": 15.649453163146973,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 0.7136039137840271,
|
|
"logits/rejected": 0.6699525713920593,
|
|
"logps/chosen": -124.2364730834961,
|
|
"logps/ref_chosen": -61.938209533691406,
|
|
"logps/ref_rejected": -72.21602630615234,
|
|
"logps/rejected": -172.26695251464844,
|
|
"loss": 1.0276,
|
|
"margin_dpo/margin_mean": 37.752655029296875,
|
|
"margin_dpo/margin_std": 51.16533660888672,
|
|
"step": 250
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -67.25079345703125,
|
|
"KL/mean": -78.99317169189453,
|
|
"KL/rejected_KL_mean": -90.73553466796875,
|
|
"KL/std": 45.22268295288086,
|
|
"epoch": 0.3794406651549509,
|
|
"fcm_dpo/beta": 0.015353530645370483,
|
|
"fcm_dpo/delta": 0.04014592990279198,
|
|
"fcm_dpo/margin": 23.484750747680664,
|
|
"fcm_dpo/q_t": 0.4205470681190491,
|
|
"grad_norm": 24.55247688293457,
|
|
"learning_rate": 3.9176496596569265e-07,
|
|
"logits/chosen": 0.6722888350486755,
|
|
"logits/rejected": 0.6293883323669434,
|
|
"logps/chosen": -134.1077423095703,
|
|
"logps/ref_chosen": -66.85694885253906,
|
|
"logps/ref_rejected": -84.83396911621094,
|
|
"logps/rejected": -175.56951904296875,
|
|
"loss": 1.1997,
|
|
"margin_dpo/margin_mean": 23.48474884033203,
|
|
"margin_dpo/margin_std": 50.94117736816406,
|
|
"step": 251
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -63.607887268066406,
|
|
"KL/mean": -73.45584106445312,
|
|
"KL/rejected_KL_mean": -83.30378723144531,
|
|
"KL/std": 47.53770065307617,
|
|
"epoch": 0.38095238095238093,
|
|
"fcm_dpo/beta": 0.015337368473410606,
|
|
"fcm_dpo/delta": -0.06313152611255646,
|
|
"fcm_dpo/margin": 19.69589614868164,
|
|
"fcm_dpo/q_t": 0.43303439021110535,
|
|
"grad_norm": 21.98233413696289,
|
|
"learning_rate": 3.9067390737445254e-07,
|
|
"logits/chosen": 0.6177150011062622,
|
|
"logits/rejected": 0.5613386631011963,
|
|
"logps/chosen": -119.83181762695312,
|
|
"logps/ref_chosen": -56.22393035888672,
|
|
"logps/ref_rejected": -77.1136245727539,
|
|
"logps/rejected": -160.41741943359375,
|
|
"loss": 1.269,
|
|
"margin_dpo/margin_mean": 19.69589614868164,
|
|
"margin_dpo/margin_std": 52.63837432861328,
|
|
"step": 252
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -61.829811096191406,
|
|
"KL/mean": -75.81027221679688,
|
|
"KL/rejected_KL_mean": -89.79072570800781,
|
|
"KL/std": 45.785125732421875,
|
|
"epoch": 0.382464096749811,
|
|
"fcm_dpo/beta": 0.015203773975372314,
|
|
"fcm_dpo/delta": -0.02628418803215027,
|
|
"fcm_dpo/margin": 27.96092414855957,
|
|
"fcm_dpo/q_t": 0.4063448905944824,
|
|
"grad_norm": 14.743314743041992,
|
|
"learning_rate": 3.8957891383162304e-07,
|
|
"logits/chosen": 0.6809608936309814,
|
|
"logits/rejected": 0.6407158374786377,
|
|
"logps/chosen": -114.03982543945312,
|
|
"logps/ref_chosen": -52.21001434326172,
|
|
"logps/ref_rejected": -58.75764846801758,
|
|
"logps/rejected": -148.54837036132812,
|
|
"loss": 1.1254,
|
|
"margin_dpo/margin_mean": 27.96092414855957,
|
|
"margin_dpo/margin_std": 46.356727600097656,
|
|
"step": 253
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -66.46315002441406,
|
|
"KL/mean": -81.40289306640625,
|
|
"KL/rejected_KL_mean": -96.34263610839844,
|
|
"KL/std": 48.92360305786133,
|
|
"epoch": 0.3839758125472411,
|
|
"fcm_dpo/beta": 0.015153815969824791,
|
|
"fcm_dpo/delta": -0.05432434752583504,
|
|
"fcm_dpo/margin": 29.879486083984375,
|
|
"fcm_dpo/q_t": 0.40253984928131104,
|
|
"grad_norm": 15.075303077697754,
|
|
"learning_rate": 3.884800159665276e-07,
|
|
"logits/chosen": 0.6358954906463623,
|
|
"logits/rejected": 0.5810351967811584,
|
|
"logps/chosen": -132.09947204589844,
|
|
"logps/ref_chosen": -65.63632202148438,
|
|
"logps/ref_rejected": -82.34425354003906,
|
|
"logps/rejected": -178.6868896484375,
|
|
"loss": 1.1096,
|
|
"margin_dpo/margin_mean": 29.879486083984375,
|
|
"margin_dpo/margin_std": 48.819847106933594,
|
|
"step": 254
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -58.688385009765625,
|
|
"KL/mean": -77.56295776367188,
|
|
"KL/rejected_KL_mean": -96.43754577636719,
|
|
"KL/std": 49.136959075927734,
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.014958618208765984,
|
|
"fcm_dpo/delta": -0.16970089077949524,
|
|
"fcm_dpo/margin": 37.74916458129883,
|
|
"fcm_dpo/q_t": 0.3781914710998535,
|
|
"grad_norm": 17.472856521606445,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 0.5938354730606079,
|
|
"logits/rejected": 0.5634579658508301,
|
|
"logps/chosen": -126.59947204589844,
|
|
"logps/ref_chosen": -67.91108703613281,
|
|
"logps/ref_rejected": -83.89114379882812,
|
|
"logps/rejected": -180.32870483398438,
|
|
"loss": 1.0402,
|
|
"margin_dpo/margin_mean": 37.74916458129883,
|
|
"margin_dpo/margin_std": 53.198333740234375,
|
|
"step": 255
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -71.89956665039062,
|
|
"KL/mean": -88.86312103271484,
|
|
"KL/rejected_KL_mean": -105.8266830444336,
|
|
"KL/std": 46.04535675048828,
|
|
"epoch": 0.3869992441421013,
|
|
"fcm_dpo/beta": 0.014758838340640068,
|
|
"fcm_dpo/delta": -0.1038253977894783,
|
|
"fcm_dpo/margin": 33.927127838134766,
|
|
"fcm_dpo/q_t": 0.3933441638946533,
|
|
"grad_norm": 17.44881248474121,
|
|
"learning_rate": 3.862706303320329e-07,
|
|
"logits/chosen": 0.6069653630256653,
|
|
"logits/rejected": 0.5400733351707458,
|
|
"logps/chosen": -135.39955139160156,
|
|
"logps/ref_chosen": -63.49998474121094,
|
|
"logps/ref_rejected": -90.77104187011719,
|
|
"logps/rejected": -196.59771728515625,
|
|
"loss": 1.1172,
|
|
"margin_dpo/margin_mean": 33.927127838134766,
|
|
"margin_dpo/margin_std": 59.00623321533203,
|
|
"step": 256
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -69.59188842773438,
|
|
"KL/mean": -90.31320190429688,
|
|
"KL/rejected_KL_mean": -111.03451538085938,
|
|
"KL/std": 51.5991096496582,
|
|
"epoch": 0.3885109599395314,
|
|
"fcm_dpo/beta": 0.014510784298181534,
|
|
"fcm_dpo/delta": -0.20795145630836487,
|
|
"fcm_dpo/margin": 41.44263458251953,
|
|
"fcm_dpo/q_t": 0.3730431795120239,
|
|
"grad_norm": 16.238489151000977,
|
|
"learning_rate": 3.851602043638994e-07,
|
|
"logits/chosen": 0.6382098197937012,
|
|
"logits/rejected": 0.5668275952339172,
|
|
"logps/chosen": -140.19253540039062,
|
|
"logps/ref_chosen": -70.60064697265625,
|
|
"logps/ref_rejected": -108.58313751220703,
|
|
"logps/rejected": -219.61764526367188,
|
|
"loss": 1.0249,
|
|
"margin_dpo/margin_mean": 41.44263458251953,
|
|
"margin_dpo/margin_std": 57.874202728271484,
|
|
"step": 257
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -68.49305725097656,
|
|
"KL/mean": -84.60340881347656,
|
|
"KL/rejected_KL_mean": -100.71376037597656,
|
|
"KL/std": 45.056861877441406,
|
|
"epoch": 0.3900226757369615,
|
|
"fcm_dpo/beta": 0.01438925601541996,
|
|
"fcm_dpo/delta": -0.06512182205915451,
|
|
"fcm_dpo/margin": 32.22071075439453,
|
|
"fcm_dpo/q_t": 0.39385807514190674,
|
|
"grad_norm": 17.002336502075195,
|
|
"learning_rate": 3.840459976743023e-07,
|
|
"logits/chosen": 0.6762516498565674,
|
|
"logits/rejected": 0.6209896802902222,
|
|
"logps/chosen": -127.74722290039062,
|
|
"logps/ref_chosen": -59.25416564941406,
|
|
"logps/ref_rejected": -85.58709716796875,
|
|
"logps/rejected": -186.30084228515625,
|
|
"loss": 1.0619,
|
|
"margin_dpo/margin_mean": 32.220706939697266,
|
|
"margin_dpo/margin_std": 42.65643310546875,
|
|
"step": 258
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -58.80857849121094,
|
|
"KL/mean": -82.843017578125,
|
|
"KL/rejected_KL_mean": -106.87745666503906,
|
|
"KL/std": 50.94042205810547,
|
|
"epoch": 0.3915343915343915,
|
|
"fcm_dpo/beta": 0.014012206345796585,
|
|
"fcm_dpo/delta": -0.28445327281951904,
|
|
"fcm_dpo/margin": 48.06887435913086,
|
|
"fcm_dpo/q_t": 0.35398849844932556,
|
|
"grad_norm": 15.033981323242188,
|
|
"learning_rate": 3.8292804142999796e-07,
|
|
"logits/chosen": 0.5819064378738403,
|
|
"logits/rejected": 0.4743342995643616,
|
|
"logps/chosen": -124.24345397949219,
|
|
"logps/ref_chosen": -65.43487548828125,
|
|
"logps/ref_rejected": -95.41731262207031,
|
|
"logps/rejected": -202.29476928710938,
|
|
"loss": 0.9712,
|
|
"margin_dpo/margin_mean": 48.06887435913086,
|
|
"margin_dpo/margin_std": 57.69731903076172,
|
|
"step": 259
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -58.18017578125,
|
|
"KL/mean": -77.72256469726562,
|
|
"KL/rejected_KL_mean": -97.26494598388672,
|
|
"KL/std": 47.47976303100586,
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.01377411000430584,
|
|
"fcm_dpo/delta": -0.1428002119064331,
|
|
"fcm_dpo/margin": 39.08477020263672,
|
|
"fcm_dpo/q_t": 0.3838440775871277,
|
|
"grad_norm": 17.183258056640625,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 0.6456409096717834,
|
|
"logits/rejected": 0.5488805770874023,
|
|
"logps/chosen": -107.26976013183594,
|
|
"logps/ref_chosen": -49.08958435058594,
|
|
"logps/ref_rejected": -79.01708221435547,
|
|
"logps/rejected": -176.2820281982422,
|
|
"loss": 1.0657,
|
|
"margin_dpo/margin_mean": 39.08477020263672,
|
|
"margin_dpo/margin_std": 58.11090087890625,
|
|
"step": 260
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -57.641117095947266,
|
|
"KL/mean": -74.09243774414062,
|
|
"KL/rejected_KL_mean": -90.54375457763672,
|
|
"KL/std": 46.2230110168457,
|
|
"epoch": 0.3945578231292517,
|
|
"fcm_dpo/beta": 0.013722619041800499,
|
|
"fcm_dpo/delta": -0.05269971117377281,
|
|
"fcm_dpo/margin": 32.90263748168945,
|
|
"fcm_dpo/q_t": 0.40154245495796204,
|
|
"grad_norm": 16.702274322509766,
|
|
"learning_rate": 3.806810054678331e-07,
|
|
"logits/chosen": 0.5115213394165039,
|
|
"logits/rejected": 0.5498528480529785,
|
|
"logps/chosen": -128.51351928710938,
|
|
"logps/ref_chosen": -70.87239074707031,
|
|
"logps/ref_rejected": -65.01522064208984,
|
|
"logps/rejected": -155.55897521972656,
|
|
"loss": 1.1024,
|
|
"margin_dpo/margin_mean": 32.90263748168945,
|
|
"margin_dpo/margin_std": 52.139732360839844,
|
|
"step": 261
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -62.82524108886719,
|
|
"KL/mean": -78.07699584960938,
|
|
"KL/rejected_KL_mean": -93.3287582397461,
|
|
"KL/std": 46.10828399658203,
|
|
"epoch": 0.3960695389266818,
|
|
"fcm_dpo/beta": 0.01368524506688118,
|
|
"fcm_dpo/delta": -0.01785985752940178,
|
|
"fcm_dpo/margin": 30.50351905822754,
|
|
"fcm_dpo/q_t": 0.40719324350357056,
|
|
"grad_norm": 16.82356834411621,
|
|
"learning_rate": 3.7955198860439887e-07,
|
|
"logits/chosen": 0.660781741142273,
|
|
"logits/rejected": 0.5931707620620728,
|
|
"logps/chosen": -130.69586181640625,
|
|
"logps/ref_chosen": -67.8706283569336,
|
|
"logps/ref_rejected": -88.7205810546875,
|
|
"logps/rejected": -182.04934692382812,
|
|
"loss": 1.1145,
|
|
"margin_dpo/margin_mean": 30.503520965576172,
|
|
"margin_dpo/margin_std": 48.53475570678711,
|
|
"step": 262
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -58.691795349121094,
|
|
"KL/mean": -73.73480224609375,
|
|
"KL/rejected_KL_mean": -88.77780151367188,
|
|
"KL/std": 46.25077819824219,
|
|
"epoch": 0.3975812547241119,
|
|
"fcm_dpo/beta": 0.013658740557730198,
|
|
"fcm_dpo/delta": -0.01117362268269062,
|
|
"fcm_dpo/margin": 30.08599853515625,
|
|
"fcm_dpo/q_t": 0.407392293214798,
|
|
"grad_norm": 14.87549114227295,
|
|
"learning_rate": 3.784193478933516e-07,
|
|
"logits/chosen": 0.590676486492157,
|
|
"logits/rejected": 0.478776752948761,
|
|
"logps/chosen": -113.88638305664062,
|
|
"logps/ref_chosen": -55.194583892822266,
|
|
"logps/ref_rejected": -80.54048156738281,
|
|
"logps/rejected": -169.3182830810547,
|
|
"loss": 1.1243,
|
|
"margin_dpo/margin_mean": 30.08599853515625,
|
|
"margin_dpo/margin_std": 49.763710021972656,
|
|
"step": 263
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -62.72454071044922,
|
|
"KL/mean": -79.83084106445312,
|
|
"KL/rejected_KL_mean": -96.93714904785156,
|
|
"KL/std": 50.30043029785156,
|
|
"epoch": 0.39909297052154197,
|
|
"fcm_dpo/beta": 0.013571709394454956,
|
|
"fcm_dpo/delta": -0.06590654700994492,
|
|
"fcm_dpo/margin": 34.21260452270508,
|
|
"fcm_dpo/q_t": 0.3982086777687073,
|
|
"grad_norm": 15.11206340789795,
|
|
"learning_rate": 3.7728311501708674e-07,
|
|
"logits/chosen": 0.5183438062667847,
|
|
"logits/rejected": 0.4760022759437561,
|
|
"logps/chosen": -145.89523315429688,
|
|
"logps/ref_chosen": -83.17068481445312,
|
|
"logps/ref_rejected": -88.33625793457031,
|
|
"logps/rejected": -185.27340698242188,
|
|
"loss": 1.1024,
|
|
"margin_dpo/margin_mean": 34.21260452270508,
|
|
"margin_dpo/margin_std": 54.977115631103516,
|
|
"step": 264
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -59.88679504394531,
|
|
"KL/mean": -77.98982238769531,
|
|
"KL/rejected_KL_mean": -96.09284973144531,
|
|
"KL/std": 50.352210998535156,
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.013487452641129494,
|
|
"fcm_dpo/delta": -0.09049642086029053,
|
|
"fcm_dpo/margin": 36.2060432434082,
|
|
"fcm_dpo/q_t": 0.3939788043498993,
|
|
"grad_norm": 13.420147895812988,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 0.6994237899780273,
|
|
"logits/rejected": 0.6313549280166626,
|
|
"logps/chosen": -111.54965209960938,
|
|
"logps/ref_chosen": -51.66284942626953,
|
|
"logps/ref_rejected": -67.1720962524414,
|
|
"logps/rejected": -163.26495361328125,
|
|
"loss": 1.1168,
|
|
"margin_dpo/margin_mean": 36.20604705810547,
|
|
"margin_dpo/margin_std": 61.83930206298828,
|
|
"step": 265
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -57.699310302734375,
|
|
"KL/mean": -75.489501953125,
|
|
"KL/rejected_KL_mean": -93.27967834472656,
|
|
"KL/std": 50.65924835205078,
|
|
"epoch": 0.4021164021164021,
|
|
"fcm_dpo/beta": 0.013348875567317009,
|
|
"fcm_dpo/delta": -0.07685475051403046,
|
|
"fcm_dpo/margin": 35.58037185668945,
|
|
"fcm_dpo/q_t": 0.39615702629089355,
|
|
"grad_norm": 19.877452850341797,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 0.6154479384422302,
|
|
"logits/rejected": 0.5375107526779175,
|
|
"logps/chosen": -115.14981079101562,
|
|
"logps/ref_chosen": -57.45049285888672,
|
|
"logps/ref_rejected": -77.60826110839844,
|
|
"logps/rejected": -170.887939453125,
|
|
"loss": 1.0923,
|
|
"margin_dpo/margin_mean": 35.58037185668945,
|
|
"margin_dpo/margin_std": 55.90092086791992,
|
|
"step": 266
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -51.893741607666016,
|
|
"KL/mean": -66.06210327148438,
|
|
"KL/rejected_KL_mean": -80.23046875,
|
|
"KL/std": 47.46539306640625,
|
|
"epoch": 0.4036281179138322,
|
|
"fcm_dpo/beta": 0.013270881026983261,
|
|
"fcm_dpo/delta": -0.09247631579637527,
|
|
"fcm_dpo/margin": 28.33672332763672,
|
|
"fcm_dpo/q_t": 0.41640275716781616,
|
|
"grad_norm": 15.783865928649902,
|
|
"learning_rate": 3.738531817228131e-07,
|
|
"logits/chosen": 0.6694222688674927,
|
|
"logits/rejected": 0.6498180627822876,
|
|
"logps/chosen": -106.92909240722656,
|
|
"logps/ref_chosen": -55.03535079956055,
|
|
"logps/ref_rejected": -66.0953369140625,
|
|
"logps/rejected": -146.3258056640625,
|
|
"loss": 1.1874,
|
|
"margin_dpo/margin_mean": 28.33672332763672,
|
|
"margin_dpo/margin_std": 56.38341522216797,
|
|
"step": 267
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -52.81679916381836,
|
|
"KL/mean": -68.95881652832031,
|
|
"KL/rejected_KL_mean": -85.10086059570312,
|
|
"KL/std": 50.63154602050781,
|
|
"epoch": 0.4051398337112623,
|
|
"fcm_dpo/beta": 0.013201778754591942,
|
|
"fcm_dpo/delta": -0.02682078629732132,
|
|
"fcm_dpo/margin": 32.284061431884766,
|
|
"fcm_dpo/q_t": 0.40620461106300354,
|
|
"grad_norm": 14.012471199035645,
|
|
"learning_rate": 3.7270289900589204e-07,
|
|
"logits/chosen": 0.4926779270172119,
|
|
"logits/rejected": 0.4790382981300354,
|
|
"logps/chosen": -117.88854217529297,
|
|
"logps/ref_chosen": -65.07174682617188,
|
|
"logps/ref_rejected": -71.42485809326172,
|
|
"logps/rejected": -156.52572631835938,
|
|
"loss": 1.1006,
|
|
"margin_dpo/margin_mean": 32.284061431884766,
|
|
"margin_dpo/margin_std": 48.67503356933594,
|
|
"step": 268
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -59.36262512207031,
|
|
"KL/mean": -79.47508239746094,
|
|
"KL/rejected_KL_mean": -99.5875244140625,
|
|
"KL/std": 53.324642181396484,
|
|
"epoch": 0.40665154950869237,
|
|
"fcm_dpo/beta": 0.013089429587125778,
|
|
"fcm_dpo/delta": -0.12994520366191864,
|
|
"fcm_dpo/margin": 40.22490692138672,
|
|
"fcm_dpo/q_t": 0.38403090834617615,
|
|
"grad_norm": 12.414010047912598,
|
|
"learning_rate": 3.7154918402511714e-07,
|
|
"logits/chosen": 0.7272143363952637,
|
|
"logits/rejected": 0.6779624819755554,
|
|
"logps/chosen": -126.49883270263672,
|
|
"logps/ref_chosen": -67.1362075805664,
|
|
"logps/ref_rejected": -82.55778503417969,
|
|
"logps/rejected": -182.14532470703125,
|
|
"loss": 1.0409,
|
|
"margin_dpo/margin_mean": 40.22490692138672,
|
|
"margin_dpo/margin_std": 53.013492584228516,
|
|
"step": 269
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -60.410587310791016,
|
|
"KL/mean": -76.7166748046875,
|
|
"KL/rejected_KL_mean": -93.02275085449219,
|
|
"KL/std": 51.691871643066406,
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.01296667568385601,
|
|
"fcm_dpo/delta": -0.023443248122930527,
|
|
"fcm_dpo/margin": 32.61217498779297,
|
|
"fcm_dpo/q_t": 0.40514498949050903,
|
|
"grad_norm": 14.43160343170166,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 0.6568794250488281,
|
|
"logits/rejected": 0.57200026512146,
|
|
"logps/chosen": -127.09928894042969,
|
|
"logps/ref_chosen": -66.6886978149414,
|
|
"logps/ref_rejected": -85.16129302978516,
|
|
"logps/rejected": -178.18405151367188,
|
|
"loss": 1.1198,
|
|
"margin_dpo/margin_mean": 32.6121711730957,
|
|
"margin_dpo/margin_std": 53.93025207519531,
|
|
"step": 270
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -61.24378967285156,
|
|
"KL/mean": -74.28935241699219,
|
|
"KL/rejected_KL_mean": -87.33493041992188,
|
|
"KL/std": 54.709877014160156,
|
|
"epoch": 0.40967498110355255,
|
|
"fcm_dpo/beta": 0.012963888235390186,
|
|
"fcm_dpo/delta": -0.007490839809179306,
|
|
"fcm_dpo/margin": 26.09113883972168,
|
|
"fcm_dpo/q_t": 0.4311845004558563,
|
|
"grad_norm": 17.340864181518555,
|
|
"learning_rate": 3.692315864546635e-07,
|
|
"logits/chosen": 0.6431140303611755,
|
|
"logits/rejected": 0.5778870582580566,
|
|
"logps/chosen": -133.65133666992188,
|
|
"logps/ref_chosen": -72.40754699707031,
|
|
"logps/ref_rejected": -92.06311798095703,
|
|
"logps/rejected": -179.39804077148438,
|
|
"loss": 1.2315,
|
|
"margin_dpo/margin_mean": 26.09113883972168,
|
|
"margin_dpo/margin_std": 63.65034484863281,
|
|
"step": 271
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -52.58313751220703,
|
|
"KL/mean": -76.9792251586914,
|
|
"KL/rejected_KL_mean": -101.37531280517578,
|
|
"KL/std": 49.94551086425781,
|
|
"epoch": 0.41118669690098264,
|
|
"fcm_dpo/beta": 0.01276671327650547,
|
|
"fcm_dpo/delta": -0.2298007756471634,
|
|
"fcm_dpo/margin": 48.79218292236328,
|
|
"fcm_dpo/q_t": 0.35941970348358154,
|
|
"grad_norm": 14.38946533203125,
|
|
"learning_rate": 3.6806776869317067e-07,
|
|
"logits/chosen": 0.662402331829071,
|
|
"logits/rejected": 0.6833938360214233,
|
|
"logps/chosen": -119.18453979492188,
|
|
"logps/ref_chosen": -66.60140228271484,
|
|
"logps/ref_rejected": -67.74340057373047,
|
|
"logps/rejected": -169.11871337890625,
|
|
"loss": 0.9453,
|
|
"margin_dpo/margin_mean": 48.79218292236328,
|
|
"margin_dpo/margin_std": 48.469764709472656,
|
|
"step": 272
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -70.37814331054688,
|
|
"KL/mean": -88.6273193359375,
|
|
"KL/rejected_KL_mean": -106.87650299072266,
|
|
"KL/std": 53.626365661621094,
|
|
"epoch": 0.4126984126984127,
|
|
"fcm_dpo/beta": 0.012595186941325665,
|
|
"fcm_dpo/delta": -0.06125575676560402,
|
|
"fcm_dpo/margin": 36.49836730957031,
|
|
"fcm_dpo/q_t": 0.4004499614238739,
|
|
"grad_norm": 18.40157127380371,
|
|
"learning_rate": 3.669006483223828e-07,
|
|
"logits/chosen": 0.6722092628479004,
|
|
"logits/rejected": 0.5972082018852234,
|
|
"logps/chosen": -127.73301696777344,
|
|
"logps/ref_chosen": -57.35487747192383,
|
|
"logps/ref_rejected": -84.17168426513672,
|
|
"logps/rejected": -191.04818725585938,
|
|
"loss": 1.1404,
|
|
"margin_dpo/margin_mean": 36.49836349487305,
|
|
"margin_dpo/margin_std": 66.88555908203125,
|
|
"step": 273
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -68.04246520996094,
|
|
"KL/mean": -87.64198303222656,
|
|
"KL/rejected_KL_mean": -107.24151611328125,
|
|
"KL/std": 53.54254913330078,
|
|
"epoch": 0.41421012849584277,
|
|
"fcm_dpo/beta": 0.012515604496002197,
|
|
"fcm_dpo/delta": -0.09282846003770828,
|
|
"fcm_dpo/margin": 39.19903564453125,
|
|
"fcm_dpo/q_t": 0.39243191480636597,
|
|
"grad_norm": 14.0313720703125,
|
|
"learning_rate": 3.657302579891656e-07,
|
|
"logits/chosen": 0.557138204574585,
|
|
"logits/rejected": 0.5391717553138733,
|
|
"logps/chosen": -127.6839599609375,
|
|
"logps/ref_chosen": -59.64149475097656,
|
|
"logps/ref_rejected": -68.29348754882812,
|
|
"logps/rejected": -175.53500366210938,
|
|
"loss": 1.0973,
|
|
"margin_dpo/margin_mean": 39.19903564453125,
|
|
"margin_dpo/margin_std": 63.1242790222168,
|
|
"step": 274
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -67.34871673583984,
|
|
"KL/mean": -86.99173736572266,
|
|
"KL/rejected_KL_mean": -106.63475799560547,
|
|
"KL/std": 52.00896453857422,
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.012410417199134827,
|
|
"fcm_dpo/delta": -0.08969901502132416,
|
|
"fcm_dpo/margin": 39.286041259765625,
|
|
"fcm_dpo/q_t": 0.39085084199905396,
|
|
"grad_norm": 14.771478652954102,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 0.6109151840209961,
|
|
"logits/rejected": 0.527836799621582,
|
|
"logps/chosen": -120.6153564453125,
|
|
"logps/ref_chosen": -53.26664352416992,
|
|
"logps/ref_rejected": -73.84062194824219,
|
|
"logps/rejected": -180.47537231445312,
|
|
"loss": 1.06,
|
|
"margin_dpo/margin_mean": 39.286041259765625,
|
|
"margin_dpo/margin_std": 53.76435089111328,
|
|
"step": 275
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -64.30890655517578,
|
|
"KL/mean": -83.74125671386719,
|
|
"KL/rejected_KL_mean": -103.17359924316406,
|
|
"KL/std": 49.38056182861328,
|
|
"epoch": 0.41723356009070295,
|
|
"fcm_dpo/beta": 0.012298415414988995,
|
|
"fcm_dpo/delta": -0.07984931766986847,
|
|
"fcm_dpo/margin": 38.86470031738281,
|
|
"fcm_dpo/q_t": 0.39253324270248413,
|
|
"grad_norm": 15.210654258728027,
|
|
"learning_rate": 3.633797984793294e-07,
|
|
"logits/chosen": 0.5939422845840454,
|
|
"logits/rejected": 0.5611696243286133,
|
|
"logps/chosen": -117.32969665527344,
|
|
"logps/ref_chosen": -53.02079772949219,
|
|
"logps/ref_rejected": -61.56678771972656,
|
|
"logps/rejected": -164.74038696289062,
|
|
"loss": 1.0627,
|
|
"margin_dpo/margin_mean": 38.86470031738281,
|
|
"margin_dpo/margin_std": 53.050018310546875,
|
|
"step": 276
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -74.35980987548828,
|
|
"KL/mean": -84.62734985351562,
|
|
"KL/rejected_KL_mean": -94.89488220214844,
|
|
"KL/std": 52.500431060791016,
|
|
"epoch": 0.41874527588813304,
|
|
"fcm_dpo/beta": 0.012323346920311451,
|
|
"fcm_dpo/delta": 0.04671410098671913,
|
|
"fcm_dpo/margin": 20.535076141357422,
|
|
"fcm_dpo/q_t": 0.4437078833580017,
|
|
"grad_norm": 19.859909057617188,
|
|
"learning_rate": 3.6219979505011555e-07,
|
|
"logits/chosen": 0.6789242625236511,
|
|
"logits/rejected": 0.7065331935882568,
|
|
"logps/chosen": -145.7928009033203,
|
|
"logps/ref_chosen": -71.43299102783203,
|
|
"logps/ref_rejected": -67.65852355957031,
|
|
"logps/rejected": -162.55340576171875,
|
|
"loss": 1.2771,
|
|
"margin_dpo/margin_mean": 20.535076141357422,
|
|
"margin_dpo/margin_std": 59.605224609375,
|
|
"step": 277
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -76.2100830078125,
|
|
"KL/mean": -95.86701965332031,
|
|
"KL/rejected_KL_mean": -115.52394104003906,
|
|
"KL/std": 56.24003219604492,
|
|
"epoch": 0.42025699168556313,
|
|
"fcm_dpo/beta": 0.012290513142943382,
|
|
"fcm_dpo/delta": -0.08548504114151001,
|
|
"fcm_dpo/margin": 39.31385803222656,
|
|
"fcm_dpo/q_t": 0.3921630382537842,
|
|
"grad_norm": 21.52166748046875,
|
|
"learning_rate": 3.6101665315144353e-07,
|
|
"logits/chosen": 0.5738312005996704,
|
|
"logits/rejected": 0.5154014825820923,
|
|
"logps/chosen": -143.32086181640625,
|
|
"logps/ref_chosen": -67.11076354980469,
|
|
"logps/ref_rejected": -88.74851989746094,
|
|
"logps/rejected": -204.2724609375,
|
|
"loss": 1.0808,
|
|
"margin_dpo/margin_mean": 39.31385803222656,
|
|
"margin_dpo/margin_std": 57.77264404296875,
|
|
"step": 278
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -59.24208068847656,
|
|
"KL/mean": -85.44246673583984,
|
|
"KL/rejected_KL_mean": -111.64285278320312,
|
|
"KL/std": 54.95539093017578,
|
|
"epoch": 0.4217687074829932,
|
|
"fcm_dpo/beta": 0.012021970003843307,
|
|
"fcm_dpo/delta": -0.2371801733970642,
|
|
"fcm_dpo/margin": 52.40076446533203,
|
|
"fcm_dpo/q_t": 0.35811275243759155,
|
|
"grad_norm": 15.961465835571289,
|
|
"learning_rate": 3.5983040587833563e-07,
|
|
"logits/chosen": 0.6181567907333374,
|
|
"logits/rejected": 0.579509973526001,
|
|
"logps/chosen": -113.73956298828125,
|
|
"logps/ref_chosen": -54.49748611450195,
|
|
"logps/ref_rejected": -70.42373657226562,
|
|
"logps/rejected": -182.06658935546875,
|
|
"loss": 0.9443,
|
|
"margin_dpo/margin_mean": 52.40076446533203,
|
|
"margin_dpo/margin_std": 52.254241943359375,
|
|
"step": 279
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -64.63801574707031,
|
|
"KL/mean": -91.52815246582031,
|
|
"KL/rejected_KL_mean": -118.41828918457031,
|
|
"KL/std": 57.3443603515625,
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.011722654104232788,
|
|
"fcm_dpo/delta": -0.23794186115264893,
|
|
"fcm_dpo/margin": 53.78026580810547,
|
|
"fcm_dpo/q_t": 0.35931044816970825,
|
|
"grad_norm": 14.257292747497559,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 0.682168185710907,
|
|
"logits/rejected": 0.6354759931564331,
|
|
"logps/chosen": -125.07083129882812,
|
|
"logps/ref_chosen": -60.43281173706055,
|
|
"logps/ref_rejected": -78.39051818847656,
|
|
"logps/rejected": -196.80880737304688,
|
|
"loss": 0.9355,
|
|
"margin_dpo/margin_mean": 53.780269622802734,
|
|
"margin_dpo/margin_std": 52.49597930908203,
|
|
"step": 280
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -68.76528930664062,
|
|
"KL/mean": -91.57899475097656,
|
|
"KL/rejected_KL_mean": -114.3927001953125,
|
|
"KL/std": 56.311317443847656,
|
|
"epoch": 0.42479213907785335,
|
|
"fcm_dpo/beta": 0.011553199961781502,
|
|
"fcm_dpo/delta": -0.13048428297042847,
|
|
"fcm_dpo/margin": 45.627403259277344,
|
|
"fcm_dpo/q_t": 0.38298100233078003,
|
|
"grad_norm": 13.643623352050781,
|
|
"learning_rate": 3.574487280222929e-07,
|
|
"logits/chosen": 0.6527827978134155,
|
|
"logits/rejected": 0.6796063780784607,
|
|
"logps/chosen": -129.04737854003906,
|
|
"logps/ref_chosen": -60.2820930480957,
|
|
"logps/ref_rejected": -62.04009246826172,
|
|
"logps/rejected": -176.43280029296875,
|
|
"loss": 1.0424,
|
|
"margin_dpo/margin_mean": 45.62740707397461,
|
|
"margin_dpo/margin_std": 61.110443115234375,
|
|
"step": 281
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -77.0960693359375,
|
|
"KL/mean": -98.357421875,
|
|
"KL/rejected_KL_mean": -119.61876678466797,
|
|
"KL/std": 56.58844757080078,
|
|
"epoch": 0.42630385487528344,
|
|
"fcm_dpo/beta": 0.011478802189230919,
|
|
"fcm_dpo/delta": -0.09083320200443268,
|
|
"fcm_dpo/margin": 42.5226936340332,
|
|
"fcm_dpo/q_t": 0.39406535029411316,
|
|
"grad_norm": 17.099605560302734,
|
|
"learning_rate": 3.562533640600075e-07,
|
|
"logits/chosen": 0.5803452730178833,
|
|
"logits/rejected": 0.5337824821472168,
|
|
"logps/chosen": -137.72000122070312,
|
|
"logps/ref_chosen": -60.623924255371094,
|
|
"logps/ref_rejected": -68.67400360107422,
|
|
"logps/rejected": -188.2927703857422,
|
|
"loss": 1.0886,
|
|
"margin_dpo/margin_mean": 42.52268981933594,
|
|
"margin_dpo/margin_std": 63.944007873535156,
|
|
"step": 282
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -76.77944946289062,
|
|
"KL/mean": -96.90300750732422,
|
|
"KL/rejected_KL_mean": -117.02656555175781,
|
|
"KL/std": 58.02313995361328,
|
|
"epoch": 0.42781557067271353,
|
|
"fcm_dpo/beta": 0.011353500187397003,
|
|
"fcm_dpo/delta": -0.05825714394450188,
|
|
"fcm_dpo/margin": 40.247127532958984,
|
|
"fcm_dpo/q_t": 0.3990103006362915,
|
|
"grad_norm": 15.58221435546875,
|
|
"learning_rate": 3.550550279627215e-07,
|
|
"logits/chosen": 0.6266081929206848,
|
|
"logits/rejected": 0.5201466679573059,
|
|
"logps/chosen": -144.42718505859375,
|
|
"logps/ref_chosen": -67.64775085449219,
|
|
"logps/ref_rejected": -99.96835327148438,
|
|
"logps/rejected": -216.99493408203125,
|
|
"loss": 1.105,
|
|
"margin_dpo/margin_mean": 40.247127532958984,
|
|
"margin_dpo/margin_std": 64.6943588256836,
|
|
"step": 283
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -73.270263671875,
|
|
"KL/mean": -94.13587951660156,
|
|
"KL/rejected_KL_mean": -115.00149536132812,
|
|
"KL/std": 59.82183074951172,
|
|
"epoch": 0.4293272864701436,
|
|
"fcm_dpo/beta": 0.011242199689149857,
|
|
"fcm_dpo/delta": -0.07123777270317078,
|
|
"fcm_dpo/margin": 41.73122024536133,
|
|
"fcm_dpo/q_t": 0.39458784461021423,
|
|
"grad_norm": 13.611059188842773,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 0.68406742811203,
|
|
"logits/rejected": 0.6178258657455444,
|
|
"logps/chosen": -130.23770141601562,
|
|
"logps/ref_chosen": -56.96742630004883,
|
|
"logps/ref_rejected": -86.36236572265625,
|
|
"logps/rejected": -201.36386108398438,
|
|
"loss": 1.0655,
|
|
"margin_dpo/margin_mean": 41.73121643066406,
|
|
"margin_dpo/margin_std": 56.291465759277344,
|
|
"step": 284
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -86.26132202148438,
|
|
"KL/mean": -103.96051025390625,
|
|
"KL/rejected_KL_mean": -121.65971374511719,
|
|
"KL/std": 54.115394592285156,
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.011271166615188122,
|
|
"fcm_dpo/delta": 0.0009032115340232849,
|
|
"fcm_dpo/margin": 35.398399353027344,
|
|
"fcm_dpo/q_t": 0.41161608695983887,
|
|
"grad_norm": 17.662099838256836,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 0.6657054424285889,
|
|
"logits/rejected": 0.6382877230644226,
|
|
"logps/chosen": -157.91741943359375,
|
|
"logps/ref_chosen": -71.65611267089844,
|
|
"logps/ref_rejected": -81.63829803466797,
|
|
"logps/rejected": -203.29800415039062,
|
|
"loss": 1.1329,
|
|
"margin_dpo/margin_mean": 35.39840316772461,
|
|
"margin_dpo/margin_std": 59.63393783569336,
|
|
"step": 285
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -82.79715728759766,
|
|
"KL/mean": -110.67222595214844,
|
|
"KL/rejected_KL_mean": -138.54730224609375,
|
|
"KL/std": 58.081626892089844,
|
|
"epoch": 0.4323507180650038,
|
|
"fcm_dpo/beta": 0.011061318218708038,
|
|
"fcm_dpo/delta": -0.2236386388540268,
|
|
"fcm_dpo/margin": 55.75014114379883,
|
|
"fcm_dpo/q_t": 0.36405736207962036,
|
|
"grad_norm": 14.213504791259766,
|
|
"learning_rate": 3.514425224712835e-07,
|
|
"logits/chosen": 0.5927638411521912,
|
|
"logits/rejected": 0.4982491135597229,
|
|
"logps/chosen": -143.87667846679688,
|
|
"logps/ref_chosen": -61.07952117919922,
|
|
"logps/ref_rejected": -91.28128051757812,
|
|
"logps/rejected": -229.82858276367188,
|
|
"loss": 0.9617,
|
|
"margin_dpo/margin_mean": 55.750144958496094,
|
|
"margin_dpo/margin_std": 60.99748229980469,
|
|
"step": 286
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -67.14971160888672,
|
|
"KL/mean": -94.5885009765625,
|
|
"KL/rejected_KL_mean": -122.02730560302734,
|
|
"KL/std": 59.83270263671875,
|
|
"epoch": 0.43386243386243384,
|
|
"fcm_dpo/beta": 0.010845725424587727,
|
|
"fcm_dpo/delta": -0.20105648040771484,
|
|
"fcm_dpo/margin": 54.877593994140625,
|
|
"fcm_dpo/q_t": 0.3678228259086609,
|
|
"grad_norm": 10.925166130065918,
|
|
"learning_rate": 3.502326338516534e-07,
|
|
"logits/chosen": 0.6806952953338623,
|
|
"logits/rejected": 0.6402262449264526,
|
|
"logps/chosen": -113.18550109863281,
|
|
"logps/ref_chosen": -46.035789489746094,
|
|
"logps/ref_rejected": -59.95293426513672,
|
|
"logps/rejected": -181.98023986816406,
|
|
"loss": 0.9812,
|
|
"margin_dpo/margin_mean": 54.877593994140625,
|
|
"margin_dpo/margin_std": 62.61833190917969,
|
|
"step": 287
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -85.2081069946289,
|
|
"KL/mean": -105.62269592285156,
|
|
"KL/rejected_KL_mean": -126.03730010986328,
|
|
"KL/std": 57.28703308105469,
|
|
"epoch": 0.43537414965986393,
|
|
"fcm_dpo/beta": 0.01076011173427105,
|
|
"fcm_dpo/delta": -0.040205713361501694,
|
|
"fcm_dpo/margin": 40.829193115234375,
|
|
"fcm_dpo/q_t": 0.4016938805580139,
|
|
"grad_norm": 15.938902854919434,
|
|
"learning_rate": 3.490199415097892e-07,
|
|
"logits/chosen": 0.5452536344528198,
|
|
"logits/rejected": 0.4863455295562744,
|
|
"logps/chosen": -150.5989532470703,
|
|
"logps/ref_chosen": -65.3908462524414,
|
|
"logps/ref_rejected": -88.53607940673828,
|
|
"logps/rejected": -214.57337951660156,
|
|
"loss": 1.0964,
|
|
"margin_dpo/margin_mean": 40.829193115234375,
|
|
"margin_dpo/margin_std": 61.51420593261719,
|
|
"step": 288
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -87.21430969238281,
|
|
"KL/mean": -106.61245727539062,
|
|
"KL/rejected_KL_mean": -126.0106201171875,
|
|
"KL/std": 57.80829620361328,
|
|
"epoch": 0.436885865457294,
|
|
"fcm_dpo/beta": 0.01074419915676117,
|
|
"fcm_dpo/delta": -0.017281273379921913,
|
|
"fcm_dpo/margin": 38.79631042480469,
|
|
"fcm_dpo/q_t": 0.40947067737579346,
|
|
"grad_norm": 15.400930404663086,
|
|
"learning_rate": 3.4780447936730247e-07,
|
|
"logits/chosen": 0.7100911140441895,
|
|
"logits/rejected": 0.6719862222671509,
|
|
"logps/chosen": -141.8079376220703,
|
|
"logps/ref_chosen": -54.5936279296875,
|
|
"logps/ref_rejected": -67.20855712890625,
|
|
"logps/rejected": -193.21917724609375,
|
|
"loss": 1.1323,
|
|
"margin_dpo/margin_mean": 38.79631042480469,
|
|
"margin_dpo/margin_std": 66.661376953125,
|
|
"step": 289
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -99.97805786132812,
|
|
"KL/mean": -121.62673950195312,
|
|
"KL/rejected_KL_mean": -143.27542114257812,
|
|
"KL/std": 56.35100555419922,
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.01064335647970438,
|
|
"fcm_dpo/delta": -0.0627022534608841,
|
|
"fcm_dpo/margin": 43.29737091064453,
|
|
"fcm_dpo/q_t": 0.3976319432258606,
|
|
"grad_norm": 16.711933135986328,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 0.7268327474594116,
|
|
"logits/rejected": 0.6507706046104431,
|
|
"logps/chosen": -161.36264038085938,
|
|
"logps/ref_chosen": -61.38457489013672,
|
|
"logps/ref_rejected": -91.92778015136719,
|
|
"logps/rejected": -235.20321655273438,
|
|
"loss": 1.0892,
|
|
"margin_dpo/margin_mean": 43.29737091064453,
|
|
"margin_dpo/margin_std": 65.03199768066406,
|
|
"step": 290
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -92.73938751220703,
|
|
"KL/mean": -118.1191177368164,
|
|
"KL/rejected_KL_mean": -143.4988555908203,
|
|
"KL/std": 57.43284606933594,
|
|
"epoch": 0.4399092970521542,
|
|
"fcm_dpo/beta": 0.010591672733426094,
|
|
"fcm_dpo/delta": -0.1416485458612442,
|
|
"fcm_dpo/margin": 50.75947189331055,
|
|
"fcm_dpo/q_t": 0.38360595703125,
|
|
"grad_norm": 14.12414836883545,
|
|
"learning_rate": 3.4536538175334343e-07,
|
|
"logits/chosen": 0.8080837726593018,
|
|
"logits/rejected": 0.7307025194168091,
|
|
"logps/chosen": -143.6024169921875,
|
|
"logps/ref_chosen": -50.863037109375,
|
|
"logps/ref_rejected": -82.20868682861328,
|
|
"logps/rejected": -225.70755004882812,
|
|
"loss": 1.0365,
|
|
"margin_dpo/margin_mean": 50.75947189331055,
|
|
"margin_dpo/margin_std": 66.58517456054688,
|
|
"step": 291
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -92.92182922363281,
|
|
"KL/mean": -111.64067077636719,
|
|
"KL/rejected_KL_mean": -130.3594970703125,
|
|
"KL/std": 60.35356140136719,
|
|
"epoch": 0.4414210128495843,
|
|
"fcm_dpo/beta": 0.010505965910851955,
|
|
"fcm_dpo/delta": 0.0068125235848128796,
|
|
"fcm_dpo/margin": 37.437660217285156,
|
|
"fcm_dpo/q_t": 0.41355568170547485,
|
|
"grad_norm": 17.29844856262207,
|
|
"learning_rate": 3.4414181450867465e-07,
|
|
"logits/chosen": 0.6663789749145508,
|
|
"logits/rejected": 0.6157400608062744,
|
|
"logps/chosen": -157.27072143554688,
|
|
"logps/ref_chosen": -64.34888458251953,
|
|
"logps/ref_rejected": -72.86434173583984,
|
|
"logps/rejected": -203.22384643554688,
|
|
"loss": 1.1624,
|
|
"margin_dpo/margin_mean": 37.43766784667969,
|
|
"margin_dpo/margin_std": 71.77565002441406,
|
|
"step": 292
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -90.34585571289062,
|
|
"KL/mean": -118.80757904052734,
|
|
"KL/rejected_KL_mean": -147.26931762695312,
|
|
"KL/std": 61.175262451171875,
|
|
"epoch": 0.4429327286470144,
|
|
"fcm_dpo/beta": 0.010385457426309586,
|
|
"fcm_dpo/delta": -0.19676190614700317,
|
|
"fcm_dpo/margin": 56.92345428466797,
|
|
"fcm_dpo/q_t": 0.37089771032333374,
|
|
"grad_norm": 12.192429542541504,
|
|
"learning_rate": 3.4291561391508185e-07,
|
|
"logits/chosen": 0.8078802824020386,
|
|
"logits/rejected": 0.7143409252166748,
|
|
"logps/chosen": -145.21531677246094,
|
|
"logps/ref_chosen": -54.869468688964844,
|
|
"logps/ref_rejected": -81.858642578125,
|
|
"logps/rejected": -229.12796020507812,
|
|
"loss": 1.0109,
|
|
"margin_dpo/margin_mean": 56.9234619140625,
|
|
"margin_dpo/margin_std": 72.48579406738281,
|
|
"step": 293
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -94.7984619140625,
|
|
"KL/mean": -114.17256164550781,
|
|
"KL/rejected_KL_mean": -133.54666137695312,
|
|
"KL/std": 63.56127166748047,
|
|
"epoch": 0.4444444444444444,
|
|
"fcm_dpo/beta": 0.0102860601618886,
|
|
"fcm_dpo/delta": 0.001286383718252182,
|
|
"fcm_dpo/margin": 38.74821853637695,
|
|
"fcm_dpo/q_t": 0.41131994128227234,
|
|
"grad_norm": 12.293243408203125,
|
|
"learning_rate": 3.4168681427203153e-07,
|
|
"logits/chosen": 0.6845268607139587,
|
|
"logits/rejected": 0.637122392654419,
|
|
"logps/chosen": -151.4693603515625,
|
|
"logps/ref_chosen": -56.670902252197266,
|
|
"logps/ref_rejected": -70.32819366455078,
|
|
"logps/rejected": -203.87486267089844,
|
|
"loss": 1.1091,
|
|
"margin_dpo/margin_mean": 38.74821853637695,
|
|
"margin_dpo/margin_std": 57.99913024902344,
|
|
"step": 294
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -100.91421508789062,
|
|
"KL/mean": -118.94341278076172,
|
|
"KL/rejected_KL_mean": -136.97262573242188,
|
|
"KL/std": 59.839324951171875,
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.010322971269488335,
|
|
"fcm_dpo/delta": 0.0282976645976305,
|
|
"fcm_dpo/margin": 36.05839920043945,
|
|
"fcm_dpo/q_t": 0.4175993800163269,
|
|
"grad_norm": 17.256160736083984,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 0.6784563064575195,
|
|
"logits/rejected": 0.5708075761795044,
|
|
"logps/chosen": -151.31509399414062,
|
|
"logps/ref_chosen": -50.40088653564453,
|
|
"logps/ref_rejected": -83.43521881103516,
|
|
"logps/rejected": -220.4078369140625,
|
|
"loss": 1.1509,
|
|
"margin_dpo/margin_mean": 36.05839920043945,
|
|
"margin_dpo/margin_std": 64.55729675292969,
|
|
"step": 295
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.95724487304688,
|
|
"KL/mean": -125.48973083496094,
|
|
"KL/rejected_KL_mean": -148.02223205566406,
|
|
"KL/std": 63.723541259765625,
|
|
"epoch": 0.4474678760393046,
|
|
"fcm_dpo/beta": 0.01026562787592411,
|
|
"fcm_dpo/delta": -0.06442629545927048,
|
|
"fcm_dpo/margin": 45.064979553222656,
|
|
"fcm_dpo/q_t": 0.39792966842651367,
|
|
"grad_norm": 14.03458309173584,
|
|
"learning_rate": 3.392215553979679e-07,
|
|
"logits/chosen": 0.6150853633880615,
|
|
"logits/rejected": 0.566129744052887,
|
|
"logps/chosen": -172.10760498046875,
|
|
"logps/ref_chosen": -69.15034484863281,
|
|
"logps/ref_rejected": -89.60166931152344,
|
|
"logps/rejected": -237.6239013671875,
|
|
"loss": 1.1001,
|
|
"margin_dpo/margin_mean": 45.064979553222656,
|
|
"margin_dpo/margin_std": 71.2538833618164,
|
|
"step": 296
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -107.66227722167969,
|
|
"KL/mean": -130.36534118652344,
|
|
"KL/rejected_KL_mean": -153.0684051513672,
|
|
"KL/std": 55.826515197753906,
|
|
"epoch": 0.4489795918367347,
|
|
"fcm_dpo/beta": 0.010240158066153526,
|
|
"fcm_dpo/delta": -0.06653815507888794,
|
|
"fcm_dpo/margin": 45.406124114990234,
|
|
"fcm_dpo/q_t": 0.3945203721523285,
|
|
"grad_norm": 15.88345718383789,
|
|
"learning_rate": 3.3798516512554485e-07,
|
|
"logits/chosen": 0.6679160594940186,
|
|
"logits/rejected": 0.6121644973754883,
|
|
"logps/chosen": -165.6785888671875,
|
|
"logps/ref_chosen": -58.01630401611328,
|
|
"logps/ref_rejected": -69.95780944824219,
|
|
"logps/rejected": -223.02621459960938,
|
|
"loss": 1.062,
|
|
"margin_dpo/margin_mean": 45.40612030029297,
|
|
"margin_dpo/margin_std": 59.44774627685547,
|
|
"step": 297
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -104.00851440429688,
|
|
"KL/mean": -121.8464584350586,
|
|
"KL/rejected_KL_mean": -139.6844024658203,
|
|
"KL/std": 59.794219970703125,
|
|
"epoch": 0.4504913076341648,
|
|
"fcm_dpo/beta": 0.010224830359220505,
|
|
"fcm_dpo/delta": 0.03588207811117172,
|
|
"fcm_dpo/margin": 35.67589569091797,
|
|
"fcm_dpo/q_t": 0.4192472994327545,
|
|
"grad_norm": 12.60729694366455,
|
|
"learning_rate": 3.367463137189156e-07,
|
|
"logits/chosen": 0.7827329635620117,
|
|
"logits/rejected": 0.7249705791473389,
|
|
"logps/chosen": -160.17782592773438,
|
|
"logps/ref_chosen": -56.1693115234375,
|
|
"logps/ref_rejected": -68.55052185058594,
|
|
"logps/rejected": -208.23492431640625,
|
|
"loss": 1.1721,
|
|
"margin_dpo/margin_mean": 35.67589569091797,
|
|
"margin_dpo/margin_std": 69.20674896240234,
|
|
"step": 298
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -109.27811431884766,
|
|
"KL/mean": -123.76858520507812,
|
|
"KL/rejected_KL_mean": -138.25906372070312,
|
|
"KL/std": 57.69758605957031,
|
|
"epoch": 0.4520030234315949,
|
|
"fcm_dpo/beta": 0.010252503678202629,
|
|
"fcm_dpo/delta": 0.012759607285261154,
|
|
"fcm_dpo/margin": 28.980953216552734,
|
|
"fcm_dpo/q_t": 0.4349084198474884,
|
|
"grad_norm": 17.99590492248535,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": 0.5697805285453796,
|
|
"logits/rejected": 0.5381168723106384,
|
|
"logps/chosen": -171.59591674804688,
|
|
"logps/ref_chosen": -62.31780242919922,
|
|
"logps/ref_rejected": -72.60028839111328,
|
|
"logps/rejected": -210.85934448242188,
|
|
"loss": 1.2419,
|
|
"margin_dpo/margin_mean": 28.980953216552734,
|
|
"margin_dpo/margin_std": 71.6827392578125,
|
|
"step": 299
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -107.31396484375,
|
|
"KL/mean": -127.20628356933594,
|
|
"KL/rejected_KL_mean": -147.09860229492188,
|
|
"KL/std": 58.61286926269531,
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.010257832705974579,
|
|
"fcm_dpo/delta": -0.00831594131886959,
|
|
"fcm_dpo/margin": 39.78462600708008,
|
|
"fcm_dpo/q_t": 0.4097885489463806,
|
|
"grad_norm": 14.620009422302246,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 0.7101852893829346,
|
|
"logits/rejected": 0.6391198039054871,
|
|
"logps/chosen": -167.695556640625,
|
|
"logps/ref_chosen": -60.38157653808594,
|
|
"logps/ref_rejected": -75.45442199707031,
|
|
"logps/rejected": -222.5530242919922,
|
|
"loss": 1.135,
|
|
"margin_dpo/margin_mean": 39.784629821777344,
|
|
"margin_dpo/margin_std": 68.18647766113281,
|
|
"step": 300
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -107.89134216308594,
|
|
"KL/mean": -124.49720764160156,
|
|
"KL/rejected_KL_mean": -141.10305786132812,
|
|
"KL/std": 58.22515869140625,
|
|
"epoch": 0.455026455026455,
|
|
"fcm_dpo/beta": 0.010269934311509132,
|
|
"fcm_dpo/delta": 0.05990290641784668,
|
|
"fcm_dpo/margin": 33.21171569824219,
|
|
"fcm_dpo/q_t": 0.4246818423271179,
|
|
"grad_norm": 13.31199836730957,
|
|
"learning_rate": 3.3301533956555885e-07,
|
|
"logits/chosen": 0.7324000597000122,
|
|
"logits/rejected": 0.7023336291313171,
|
|
"logps/chosen": -160.7422332763672,
|
|
"logps/ref_chosen": -52.85089111328125,
|
|
"logps/ref_rejected": -69.97584533691406,
|
|
"logps/rejected": -211.07891845703125,
|
|
"loss": 1.187,
|
|
"margin_dpo/margin_mean": 33.21171569824219,
|
|
"margin_dpo/margin_std": 67.82133483886719,
|
|
"step": 301
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -114.09980773925781,
|
|
"KL/mean": -127.11161804199219,
|
|
"KL/rejected_KL_mean": -140.1234588623047,
|
|
"KL/std": 60.43231201171875,
|
|
"epoch": 0.4565381708238851,
|
|
"fcm_dpo/beta": 0.01040370762348175,
|
|
"fcm_dpo/delta": 0.1310705542564392,
|
|
"fcm_dpo/margin": 26.02364730834961,
|
|
"fcm_dpo/q_t": 0.43995052576065063,
|
|
"grad_norm": 17.703641891479492,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 0.5384005308151245,
|
|
"logits/rejected": 0.48114463686943054,
|
|
"logps/chosen": -181.06631469726562,
|
|
"logps/ref_chosen": -66.96650695800781,
|
|
"logps/ref_rejected": -88.09510803222656,
|
|
"logps/rejected": -228.21856689453125,
|
|
"loss": 1.2448,
|
|
"margin_dpo/margin_mean": 26.02364730834961,
|
|
"margin_dpo/margin_std": 66.42599487304688,
|
|
"step": 302
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -99.53472900390625,
|
|
"KL/mean": -124.82545471191406,
|
|
"KL/rejected_KL_mean": -150.11618041992188,
|
|
"KL/std": 61.703609466552734,
|
|
"epoch": 0.4580498866213152,
|
|
"fcm_dpo/beta": 0.010347174480557442,
|
|
"fcm_dpo/delta": -0.12664856016635895,
|
|
"fcm_dpo/margin": 50.581451416015625,
|
|
"fcm_dpo/q_t": 0.3852653205394745,
|
|
"grad_norm": 11.763897895812988,
|
|
"learning_rate": 3.3051635489464793e-07,
|
|
"logits/chosen": 0.6962045431137085,
|
|
"logits/rejected": 0.6208950281143188,
|
|
"logps/chosen": -161.65625,
|
|
"logps/ref_chosen": -62.12152862548828,
|
|
"logps/ref_rejected": -90.31204223632812,
|
|
"logps/rejected": -240.42822265625,
|
|
"loss": 1.0709,
|
|
"margin_dpo/margin_mean": 50.581451416015625,
|
|
"margin_dpo/margin_std": 75.7410888671875,
|
|
"step": 303
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -91.32601928710938,
|
|
"KL/mean": -117.4713134765625,
|
|
"KL/rejected_KL_mean": -143.61660766601562,
|
|
"KL/std": 65.41432189941406,
|
|
"epoch": 0.4595616024187453,
|
|
"fcm_dpo/beta": 0.010198384523391724,
|
|
"fcm_dpo/delta": -0.1370396912097931,
|
|
"fcm_dpo/margin": 52.29057312011719,
|
|
"fcm_dpo/q_t": 0.37835174798965454,
|
|
"grad_norm": 13.234185218811035,
|
|
"learning_rate": 3.292634667444117e-07,
|
|
"logits/chosen": 0.6562365293502808,
|
|
"logits/rejected": 0.6004523038864136,
|
|
"logps/chosen": -152.0211181640625,
|
|
"logps/ref_chosen": -60.695091247558594,
|
|
"logps/ref_rejected": -78.2525405883789,
|
|
"logps/rejected": -221.869140625,
|
|
"loss": 1.0088,
|
|
"margin_dpo/margin_mean": 52.29057312011719,
|
|
"margin_dpo/margin_std": 59.6365966796875,
|
|
"step": 304
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.27919006347656,
|
|
"KL/mean": -121.75218200683594,
|
|
"KL/rejected_KL_mean": -141.22515869140625,
|
|
"KL/std": 59.09837341308594,
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.010143334977328777,
|
|
"fcm_dpo/delta": 0.004717826843261719,
|
|
"fcm_dpo/margin": 38.945987701416016,
|
|
"fcm_dpo/q_t": 0.4113967716693878,
|
|
"grad_norm": 14.273884773254395,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 0.5788962244987488,
|
|
"logits/rejected": 0.6131304502487183,
|
|
"logps/chosen": -174.97833251953125,
|
|
"logps/ref_chosen": -72.69914245605469,
|
|
"logps/ref_rejected": -65.65670776367188,
|
|
"logps/rejected": -206.8818817138672,
|
|
"loss": 1.1558,
|
|
"margin_dpo/margin_mean": 38.94599151611328,
|
|
"margin_dpo/margin_std": 71.23085021972656,
|
|
"step": 305
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -97.44586181640625,
|
|
"KL/mean": -116.39836120605469,
|
|
"KL/rejected_KL_mean": -135.3508758544922,
|
|
"KL/std": 57.433387756347656,
|
|
"epoch": 0.46258503401360546,
|
|
"fcm_dpo/beta": 0.010152928531169891,
|
|
"fcm_dpo/delta": 0.015069954097270966,
|
|
"fcm_dpo/margin": 37.905006408691406,
|
|
"fcm_dpo/q_t": 0.41264447569847107,
|
|
"grad_norm": 14.028386116027832,
|
|
"learning_rate": 3.267510740432719e-07,
|
|
"logits/chosen": 0.7493832111358643,
|
|
"logits/rejected": 0.635926365852356,
|
|
"logps/chosen": -151.4163818359375,
|
|
"logps/ref_chosen": -53.97052764892578,
|
|
"logps/ref_rejected": -71.02423095703125,
|
|
"logps/rejected": -206.37510681152344,
|
|
"loss": 1.1205,
|
|
"margin_dpo/margin_mean": 37.905006408691406,
|
|
"margin_dpo/margin_std": 57.04331970214844,
|
|
"step": 306
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -90.1369857788086,
|
|
"KL/mean": -101.20155334472656,
|
|
"KL/rejected_KL_mean": -112.26611328125,
|
|
"KL/std": 59.156707763671875,
|
|
"epoch": 0.46409674981103555,
|
|
"fcm_dpo/beta": 0.010222095064818859,
|
|
"fcm_dpo/delta": 0.06298152357339859,
|
|
"fcm_dpo/margin": 22.129131317138672,
|
|
"fcm_dpo/q_t": 0.4512529969215393,
|
|
"grad_norm": 17.113676071166992,
|
|
"learning_rate": 3.2549163976939285e-07,
|
|
"logits/chosen": 0.734958291053772,
|
|
"logits/rejected": 0.6855578422546387,
|
|
"logps/chosen": -147.5500946044922,
|
|
"logps/ref_chosen": -57.413108825683594,
|
|
"logps/ref_rejected": -68.68010711669922,
|
|
"logps/rejected": -180.94622802734375,
|
|
"loss": 1.3041,
|
|
"margin_dpo/margin_mean": 22.129127502441406,
|
|
"margin_dpo/margin_std": 72.85504913330078,
|
|
"step": 307
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -91.11901092529297,
|
|
"KL/mean": -110.48268127441406,
|
|
"KL/rejected_KL_mean": -129.84634399414062,
|
|
"KL/std": 60.69014358520508,
|
|
"epoch": 0.4656084656084656,
|
|
"fcm_dpo/beta": 0.01022748276591301,
|
|
"fcm_dpo/delta": 0.00366780161857605,
|
|
"fcm_dpo/margin": 38.727333068847656,
|
|
"fcm_dpo/q_t": 0.4115917682647705,
|
|
"grad_norm": 11.219232559204102,
|
|
"learning_rate": 3.2423009383206874e-07,
|
|
"logits/chosen": 0.6557912826538086,
|
|
"logits/rejected": 0.6440641283988953,
|
|
"logps/chosen": -157.71780395507812,
|
|
"logps/ref_chosen": -66.59879302978516,
|
|
"logps/ref_rejected": -74.337158203125,
|
|
"logps/rejected": -204.18350219726562,
|
|
"loss": 1.1477,
|
|
"margin_dpo/margin_mean": 38.727333068847656,
|
|
"margin_dpo/margin_std": 69.07156372070312,
|
|
"step": 308
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -104.48262023925781,
|
|
"KL/mean": -123.4713134765625,
|
|
"KL/rejected_KL_mean": -142.45999145507812,
|
|
"KL/std": 57.12620544433594,
|
|
"epoch": 0.4671201814058957,
|
|
"fcm_dpo/beta": 0.010236620903015137,
|
|
"fcm_dpo/delta": 0.01114867627620697,
|
|
"fcm_dpo/margin": 37.977386474609375,
|
|
"fcm_dpo/q_t": 0.41322100162506104,
|
|
"grad_norm": 12.856152534484863,
|
|
"learning_rate": 3.229664715194511e-07,
|
|
"logits/chosen": 0.6905493140220642,
|
|
"logits/rejected": 0.6322106719017029,
|
|
"logps/chosen": -169.87734985351562,
|
|
"logps/ref_chosen": -65.39474487304688,
|
|
"logps/ref_rejected": -75.70930480957031,
|
|
"logps/rejected": -218.16929626464844,
|
|
"loss": 1.1406,
|
|
"margin_dpo/margin_mean": 37.977386474609375,
|
|
"margin_dpo/margin_std": 64.66454315185547,
|
|
"step": 309
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -106.30168151855469,
|
|
"KL/mean": -117.9731216430664,
|
|
"KL/rejected_KL_mean": -129.64456176757812,
|
|
"KL/std": 53.239688873291016,
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.010323047637939453,
|
|
"fcm_dpo/delta": 0.05201638489961624,
|
|
"fcm_dpo/margin": 23.342880249023438,
|
|
"fcm_dpo/q_t": 0.44772541522979736,
|
|
"grad_norm": 14.3025541305542,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 0.6922097206115723,
|
|
"logits/rejected": 0.6753602623939514,
|
|
"logps/chosen": -180.96995544433594,
|
|
"logps/ref_chosen": -74.66827392578125,
|
|
"logps/ref_rejected": -80.5689697265625,
|
|
"logps/rejected": -210.21353149414062,
|
|
"loss": 1.2613,
|
|
"margin_dpo/margin_mean": 23.342884063720703,
|
|
"margin_dpo/margin_std": 63.40098571777344,
|
|
"step": 310
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -83.98698425292969,
|
|
"KL/mean": -105.22430419921875,
|
|
"KL/rejected_KL_mean": -126.46160888671875,
|
|
"KL/std": 66.99015808105469,
|
|
"epoch": 0.47014361300075586,
|
|
"fcm_dpo/beta": 0.010266819037497044,
|
|
"fcm_dpo/delta": -0.037273284047842026,
|
|
"fcm_dpo/margin": 42.47461700439453,
|
|
"fcm_dpo/q_t": 0.4061315357685089,
|
|
"grad_norm": 13.74625301361084,
|
|
"learning_rate": 3.204331392103574e-07,
|
|
"logits/chosen": 0.5769006013870239,
|
|
"logits/rejected": 0.4292186200618744,
|
|
"logps/chosen": -143.7250213623047,
|
|
"logps/ref_chosen": -59.738033294677734,
|
|
"logps/ref_rejected": -93.60757446289062,
|
|
"logps/rejected": -220.06918334960938,
|
|
"loss": 1.111,
|
|
"margin_dpo/margin_mean": 42.47461700439453,
|
|
"margin_dpo/margin_std": 69.2762451171875,
|
|
"step": 311
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -86.10234069824219,
|
|
"KL/mean": -111.2046890258789,
|
|
"KL/rejected_KL_mean": -136.30703735351562,
|
|
"KL/std": 59.88706970214844,
|
|
"epoch": 0.47165532879818595,
|
|
"fcm_dpo/beta": 0.010209228843450546,
|
|
"fcm_dpo/delta": -0.11543036252260208,
|
|
"fcm_dpo/margin": 50.20468521118164,
|
|
"fcm_dpo/q_t": 0.38333389163017273,
|
|
"grad_norm": 11.453956604003906,
|
|
"learning_rate": 3.1916350007663176e-07,
|
|
"logits/chosen": 0.7100861072540283,
|
|
"logits/rejected": 0.6133370995521545,
|
|
"logps/chosen": -139.9187774658203,
|
|
"logps/ref_chosen": -53.816436767578125,
|
|
"logps/ref_rejected": -68.6575698852539,
|
|
"logps/rejected": -204.964599609375,
|
|
"loss": 1.0167,
|
|
"margin_dpo/margin_mean": 50.204689025878906,
|
|
"margin_dpo/margin_std": 57.12451171875,
|
|
"step": 312
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -89.08069610595703,
|
|
"KL/mean": -102.43197631835938,
|
|
"KL/rejected_KL_mean": -115.78325653076172,
|
|
"KL/std": 58.436729431152344,
|
|
"epoch": 0.47316704459561604,
|
|
"fcm_dpo/beta": 0.01026826910674572,
|
|
"fcm_dpo/delta": 0.12750136852264404,
|
|
"fcm_dpo/margin": 26.702564239501953,
|
|
"fcm_dpo/q_t": 0.43933600187301636,
|
|
"grad_norm": 13.30626106262207,
|
|
"learning_rate": 3.178919262911314e-07,
|
|
"logits/chosen": 0.7160056829452515,
|
|
"logits/rejected": 0.6943833827972412,
|
|
"logps/chosen": -149.03805541992188,
|
|
"logps/ref_chosen": -59.957359313964844,
|
|
"logps/ref_rejected": -69.31729888916016,
|
|
"logps/rejected": -185.10055541992188,
|
|
"loss": 1.228,
|
|
"margin_dpo/margin_mean": 26.70256233215332,
|
|
"margin_dpo/margin_std": 62.89814758300781,
|
|
"step": 313
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -81.21215057373047,
|
|
"KL/mean": -106.61221313476562,
|
|
"KL/rejected_KL_mean": -132.01226806640625,
|
|
"KL/std": 57.23918533325195,
|
|
"epoch": 0.47467876039304613,
|
|
"fcm_dpo/beta": 0.010188684798777103,
|
|
"fcm_dpo/delta": -0.12099461257457733,
|
|
"fcm_dpo/margin": 50.80012512207031,
|
|
"fcm_dpo/q_t": 0.3848544955253601,
|
|
"grad_norm": 12.311768531799316,
|
|
"learning_rate": 3.166184534225087e-07,
|
|
"logits/chosen": 0.6671676635742188,
|
|
"logits/rejected": 0.7002524137496948,
|
|
"logps/chosen": -151.48031616210938,
|
|
"logps/ref_chosen": -70.26815795898438,
|
|
"logps/ref_rejected": -69.23971557617188,
|
|
"logps/rejected": -201.25198364257812,
|
|
"loss": 1.0282,
|
|
"margin_dpo/margin_mean": 50.80012512207031,
|
|
"margin_dpo/margin_std": 62.269187927246094,
|
|
"step": 314
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -85.4464340209961,
|
|
"KL/mean": -104.84877014160156,
|
|
"KL/rejected_KL_mean": -124.2510986328125,
|
|
"KL/std": 57.08869171142578,
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.010202569887042046,
|
|
"fcm_dpo/delta": 0.0038949474692344666,
|
|
"fcm_dpo/margin": 38.8046760559082,
|
|
"fcm_dpo/q_t": 0.4093957543373108,
|
|
"grad_norm": 12.350065231323242,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 0.6125457286834717,
|
|
"logits/rejected": 0.576646089553833,
|
|
"logps/chosen": -153.24111938476562,
|
|
"logps/ref_chosen": -67.79469299316406,
|
|
"logps/ref_rejected": -74.55148315429688,
|
|
"logps/rejected": -198.80258178710938,
|
|
"loss": 1.1164,
|
|
"margin_dpo/margin_mean": 38.8046760559082,
|
|
"margin_dpo/margin_std": 58.82640075683594,
|
|
"step": 315
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -80.5737533569336,
|
|
"KL/mean": -107.49533081054688,
|
|
"KL/rejected_KL_mean": -134.41690063476562,
|
|
"KL/std": 60.39004135131836,
|
|
"epoch": 0.47770219198790626,
|
|
"fcm_dpo/beta": 0.010121572762727737,
|
|
"fcm_dpo/delta": -0.14948400855064392,
|
|
"fcm_dpo/margin": 53.8431396484375,
|
|
"fcm_dpo/q_t": 0.37797385454177856,
|
|
"grad_norm": 12.834312438964844,
|
|
"learning_rate": 3.1406595297511564e-07,
|
|
"logits/chosen": 0.5562376976013184,
|
|
"logits/rejected": 0.4149426817893982,
|
|
"logps/chosen": -135.86224365234375,
|
|
"logps/ref_chosen": -55.288482666015625,
|
|
"logps/ref_rejected": -96.15723419189453,
|
|
"logps/rejected": -230.5741424560547,
|
|
"loss": 1.0071,
|
|
"margin_dpo/margin_mean": 53.843143463134766,
|
|
"margin_dpo/margin_std": 58.867286682128906,
|
|
"step": 316
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -72.8826904296875,
|
|
"KL/mean": -99.0898666381836,
|
|
"KL/rejected_KL_mean": -125.29704284667969,
|
|
"KL/std": 56.391448974609375,
|
|
"epoch": 0.47921390778533635,
|
|
"fcm_dpo/beta": 0.009914349764585495,
|
|
"fcm_dpo/delta": -0.1232588142156601,
|
|
"fcm_dpo/margin": 52.41436004638672,
|
|
"fcm_dpo/q_t": 0.3815646767616272,
|
|
"grad_norm": 14.458547592163086,
|
|
"learning_rate": 3.1278699679526975e-07,
|
|
"logits/chosen": 0.6879777908325195,
|
|
"logits/rejected": 0.6428084373474121,
|
|
"logps/chosen": -127.46406555175781,
|
|
"logps/ref_chosen": -54.58137512207031,
|
|
"logps/ref_rejected": -72.77232360839844,
|
|
"logps/rejected": -198.06936645507812,
|
|
"loss": 1.0189,
|
|
"margin_dpo/margin_mean": 52.41436004638672,
|
|
"margin_dpo/margin_std": 61.21650695800781,
|
|
"step": 317
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -82.05154418945312,
|
|
"KL/mean": -100.57650756835938,
|
|
"KL/rejected_KL_mean": -119.10148620605469,
|
|
"KL/std": 59.87347412109375,
|
|
"epoch": 0.48072562358276644,
|
|
"fcm_dpo/beta": 0.009916391223669052,
|
|
"fcm_dpo/delta": 0.03319194167852402,
|
|
"fcm_dpo/margin": 37.04994201660156,
|
|
"fcm_dpo/q_t": 0.4215635657310486,
|
|
"grad_norm": 12.727980613708496,
|
|
"learning_rate": 3.1150628432815336e-07,
|
|
"logits/chosen": 0.666507363319397,
|
|
"logits/rejected": 0.594628095626831,
|
|
"logps/chosen": -134.9397735595703,
|
|
"logps/ref_chosen": -52.88822937011719,
|
|
"logps/ref_rejected": -80.63988494873047,
|
|
"logps/rejected": -199.74136352539062,
|
|
"loss": 1.1952,
|
|
"margin_dpo/margin_mean": 37.0499382019043,
|
|
"margin_dpo/margin_std": 79.71516418457031,
|
|
"step": 318
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -79.12725830078125,
|
|
"KL/mean": -105.85105895996094,
|
|
"KL/rejected_KL_mean": -132.57485961914062,
|
|
"KL/std": 62.8668212890625,
|
|
"epoch": 0.48223733938019653,
|
|
"fcm_dpo/beta": 0.009839287959039211,
|
|
"fcm_dpo/delta": -0.12930774688720703,
|
|
"fcm_dpo/margin": 53.44761657714844,
|
|
"fcm_dpo/q_t": 0.3846117854118347,
|
|
"grad_norm": 12.548164367675781,
|
|
"learning_rate": 3.1022385139804707e-07,
|
|
"logits/chosen": 0.6063874959945679,
|
|
"logits/rejected": 0.5900925993919373,
|
|
"logps/chosen": -143.49058532714844,
|
|
"logps/ref_chosen": -64.36333465576172,
|
|
"logps/ref_rejected": -79.47296142578125,
|
|
"logps/rejected": -212.04782104492188,
|
|
"loss": 1.0392,
|
|
"margin_dpo/margin_mean": 53.44761657714844,
|
|
"margin_dpo/margin_std": 71.73039245605469,
|
|
"step": 319
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -73.84049987792969,
|
|
"KL/mean": -95.15985107421875,
|
|
"KL/rejected_KL_mean": -116.47920227050781,
|
|
"KL/std": 63.761451721191406,
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.009745562449097633,
|
|
"fcm_dpo/delta": -0.12943899631500244,
|
|
"fcm_dpo/margin": 42.63869857788086,
|
|
"fcm_dpo/q_t": 0.4095991253852844,
|
|
"grad_norm": 14.181422233581543,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 0.5515351295471191,
|
|
"logits/rejected": 0.510681688785553,
|
|
"logps/chosen": -123.39924621582031,
|
|
"logps/ref_chosen": -49.558746337890625,
|
|
"logps/ref_rejected": -71.23444366455078,
|
|
"logps/rejected": -187.71365356445312,
|
|
"loss": 1.1252,
|
|
"margin_dpo/margin_mean": 42.63869857788086,
|
|
"margin_dpo/margin_std": 68.31529998779297,
|
|
"step": 320
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -78.46990966796875,
|
|
"KL/mean": -100.66580200195312,
|
|
"KL/rejected_KL_mean": -122.86168670654297,
|
|
"KL/std": 56.711334228515625,
|
|
"epoch": 0.4852607709750567,
|
|
"fcm_dpo/beta": 0.009625335223972797,
|
|
"fcm_dpo/delta": -0.02857048809528351,
|
|
"fcm_dpo/margin": 44.39177322387695,
|
|
"fcm_dpo/q_t": 0.40270182490348816,
|
|
"grad_norm": 18.4453182220459,
|
|
"learning_rate": 3.0765396768561004e-07,
|
|
"logits/chosen": 0.6360567212104797,
|
|
"logits/rejected": 0.6163891553878784,
|
|
"logps/chosen": -130.55517578125,
|
|
"logps/ref_chosen": -52.08526611328125,
|
|
"logps/ref_rejected": -55.58674621582031,
|
|
"logps/rejected": -178.44842529296875,
|
|
"loss": 1.1064,
|
|
"margin_dpo/margin_mean": 44.39176940917969,
|
|
"margin_dpo/margin_std": 66.52297973632812,
|
|
"step": 321
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -88.74971008300781,
|
|
"KL/mean": -117.3365707397461,
|
|
"KL/rejected_KL_mean": -145.92344665527344,
|
|
"KL/std": 63.385902404785156,
|
|
"epoch": 0.48677248677248675,
|
|
"fcm_dpo/beta": 0.009563559666275978,
|
|
"fcm_dpo/delta": -0.15077075362205505,
|
|
"fcm_dpo/margin": 57.173744201660156,
|
|
"fcm_dpo/q_t": 0.37540823221206665,
|
|
"grad_norm": 11.574180603027344,
|
|
"learning_rate": 3.063665887884511e-07,
|
|
"logits/chosen": 0.6899732947349548,
|
|
"logits/rejected": 0.6027319431304932,
|
|
"logps/chosen": -136.15380859375,
|
|
"logps/ref_chosen": -47.404109954833984,
|
|
"logps/ref_rejected": -73.4260025024414,
|
|
"logps/rejected": -219.34945678710938,
|
|
"loss": 0.9962,
|
|
"margin_dpo/margin_mean": 57.173744201660156,
|
|
"margin_dpo/margin_std": 62.43910217285156,
|
|
"step": 322
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -89.77249145507812,
|
|
"KL/mean": -109.56617736816406,
|
|
"KL/rejected_KL_mean": -129.35984802246094,
|
|
"KL/std": 61.87095260620117,
|
|
"epoch": 0.48828420256991684,
|
|
"fcm_dpo/beta": 0.009531868621706963,
|
|
"fcm_dpo/delta": 0.023070599883794785,
|
|
"fcm_dpo/margin": 39.58735656738281,
|
|
"fcm_dpo/q_t": 0.4186198115348816,
|
|
"grad_norm": 14.259188652038574,
|
|
"learning_rate": 3.0507763319663517e-07,
|
|
"logits/chosen": 0.5941547751426697,
|
|
"logits/rejected": 0.5161798596382141,
|
|
"logps/chosen": -159.77879333496094,
|
|
"logps/ref_chosen": -70.00630187988281,
|
|
"logps/ref_rejected": -86.96690368652344,
|
|
"logps/rejected": -216.32675170898438,
|
|
"loss": 1.1713,
|
|
"margin_dpo/margin_mean": 39.58735656738281,
|
|
"margin_dpo/margin_std": 78.26737976074219,
|
|
"step": 323
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -80.48503112792969,
|
|
"KL/mean": -105.15479278564453,
|
|
"KL/rejected_KL_mean": -129.8245849609375,
|
|
"KL/std": 68.54472351074219,
|
|
"epoch": 0.4897959183673469,
|
|
"fcm_dpo/beta": 0.009465748444199562,
|
|
"fcm_dpo/delta": -0.06890767812728882,
|
|
"fcm_dpo/margin": 49.33953857421875,
|
|
"fcm_dpo/q_t": 0.39398831129074097,
|
|
"grad_norm": 17.981904983520508,
|
|
"learning_rate": 3.0378713696502097e-07,
|
|
"logits/chosen": 0.6834410429000854,
|
|
"logits/rejected": 0.6231163144111633,
|
|
"logps/chosen": -136.37384033203125,
|
|
"logps/ref_chosen": -55.88882064819336,
|
|
"logps/ref_rejected": -75.23088073730469,
|
|
"logps/rejected": -205.05545043945312,
|
|
"loss": 1.0572,
|
|
"margin_dpo/margin_mean": 49.33953857421875,
|
|
"margin_dpo/margin_std": 63.26485061645508,
|
|
"step": 324
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -97.74079895019531,
|
|
"KL/mean": -122.80034637451172,
|
|
"KL/rejected_KL_mean": -147.85989379882812,
|
|
"KL/std": 67.08414459228516,
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.009393356740474701,
|
|
"fcm_dpo/delta": -0.07288876920938492,
|
|
"fcm_dpo/margin": 50.11908721923828,
|
|
"fcm_dpo/q_t": 0.3959406614303589,
|
|
"grad_norm": 12.675724983215332,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 0.6396251916885376,
|
|
"logits/rejected": 0.5719287395477295,
|
|
"logps/chosen": -161.8878173828125,
|
|
"logps/ref_chosen": -64.14701843261719,
|
|
"logps/ref_rejected": -79.91143798828125,
|
|
"logps/rejected": -227.77133178710938,
|
|
"loss": 1.094,
|
|
"margin_dpo/margin_mean": 50.11908721923828,
|
|
"margin_dpo/margin_std": 79.13504791259766,
|
|
"step": 325
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -116.0241928100586,
|
|
"KL/mean": -126.10319519042969,
|
|
"KL/rejected_KL_mean": -136.18218994140625,
|
|
"KL/std": 63.22649002075195,
|
|
"epoch": 0.4928193499622071,
|
|
"fcm_dpo/beta": 0.009398145601153374,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 20.15801429748535,
|
|
"fcm_dpo/q_t": 0.457425981760025,
|
|
"grad_norm": 15.125726699829102,
|
|
"learning_rate": 3.012016670162977e-07,
|
|
"logits/chosen": 0.6008260250091553,
|
|
"logits/rejected": 0.6089076995849609,
|
|
"logps/chosen": -191.55551147460938,
|
|
"logps/ref_chosen": -75.53131103515625,
|
|
"logps/ref_rejected": -76.5898666381836,
|
|
"logps/rejected": -212.77206420898438,
|
|
"loss": 1.3098,
|
|
"margin_dpo/margin_mean": 20.15801429748535,
|
|
"margin_dpo/margin_std": 70.23220825195312,
|
|
"step": 326
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -103.82672119140625,
|
|
"KL/mean": -123.67758178710938,
|
|
"KL/rejected_KL_mean": -143.5284423828125,
|
|
"KL/std": 67.15496826171875,
|
|
"epoch": 0.4943310657596372,
|
|
"fcm_dpo/beta": 0.009429624304175377,
|
|
"fcm_dpo/delta": 0.026001080870628357,
|
|
"fcm_dpo/margin": 39.701717376708984,
|
|
"fcm_dpo/q_t": 0.41706597805023193,
|
|
"grad_norm": 13.899744987487793,
|
|
"learning_rate": 2.99906765620341e-07,
|
|
"logits/chosen": 0.569645881652832,
|
|
"logits/rejected": 0.5422235727310181,
|
|
"logps/chosen": -173.16387939453125,
|
|
"logps/ref_chosen": -69.33717346191406,
|
|
"logps/ref_rejected": -73.37751770019531,
|
|
"logps/rejected": -216.9059600830078,
|
|
"loss": 1.1669,
|
|
"margin_dpo/margin_mean": 39.70172119140625,
|
|
"margin_dpo/margin_std": 76.1077880859375,
|
|
"step": 327
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -90.67269897460938,
|
|
"KL/mean": -115.12163543701172,
|
|
"KL/rejected_KL_mean": -139.57057189941406,
|
|
"KL/std": 67.25538635253906,
|
|
"epoch": 0.4958427815570673,
|
|
"fcm_dpo/beta": 0.009389924816787243,
|
|
"fcm_dpo/delta": -0.06051798164844513,
|
|
"fcm_dpo/margin": 48.89786911010742,
|
|
"fcm_dpo/q_t": 0.39682865142822266,
|
|
"grad_norm": 12.907678604125977,
|
|
"learning_rate": 2.9861046822486766e-07,
|
|
"logits/chosen": 0.5505099892616272,
|
|
"logits/rejected": 0.5118117332458496,
|
|
"logps/chosen": -152.37893676757812,
|
|
"logps/ref_chosen": -61.70623016357422,
|
|
"logps/ref_rejected": -83.73808288574219,
|
|
"logps/rejected": -223.30865478515625,
|
|
"loss": 1.0644,
|
|
"margin_dpo/margin_mean": 48.897865295410156,
|
|
"margin_dpo/margin_std": 64.38197326660156,
|
|
"step": 328
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.58954620361328,
|
|
"KL/mean": -126.92652893066406,
|
|
"KL/rejected_KL_mean": -151.2635040283203,
|
|
"KL/std": 70.39198303222656,
|
|
"epoch": 0.4973544973544973,
|
|
"fcm_dpo/beta": 0.009322911500930786,
|
|
"fcm_dpo/delta": -0.05506677180528641,
|
|
"fcm_dpo/margin": 48.673973083496094,
|
|
"fcm_dpo/q_t": 0.40022924542427063,
|
|
"grad_norm": 14.821163177490234,
|
|
"learning_rate": 2.9731281109010253e-07,
|
|
"logits/chosen": 0.6908204555511475,
|
|
"logits/rejected": 0.6287527680397034,
|
|
"logps/chosen": -167.08795166015625,
|
|
"logps/ref_chosen": -64.4984130859375,
|
|
"logps/ref_rejected": -83.6591796875,
|
|
"logps/rejected": -234.9226837158203,
|
|
"loss": 1.0857,
|
|
"margin_dpo/margin_mean": 48.673973083496094,
|
|
"margin_dpo/margin_std": 72.0045166015625,
|
|
"step": 329
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -88.2018814086914,
|
|
"KL/mean": -113.23458862304688,
|
|
"KL/rejected_KL_mean": -138.26730346679688,
|
|
"KL/std": 66.01676940917969,
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.009286103770136833,
|
|
"fcm_dpo/delta": -0.06647955626249313,
|
|
"fcm_dpo/margin": 50.06542205810547,
|
|
"fcm_dpo/q_t": 0.3974093794822693,
|
|
"grad_norm": 13.082165718078613,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 0.6412705183029175,
|
|
"logits/rejected": 0.5723448395729065,
|
|
"logps/chosen": -143.00653076171875,
|
|
"logps/ref_chosen": -54.80464172363281,
|
|
"logps/ref_rejected": -75.3194351196289,
|
|
"logps/rejected": -213.58673095703125,
|
|
"loss": 1.1144,
|
|
"margin_dpo/margin_mean": 50.06542205810547,
|
|
"margin_dpo/margin_std": 83.27445983886719,
|
|
"step": 330
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -95.866455078125,
|
|
"KL/mean": -127.72227478027344,
|
|
"KL/rejected_KL_mean": -159.57809448242188,
|
|
"KL/std": 70.19152069091797,
|
|
"epoch": 0.5003779289493575,
|
|
"fcm_dpo/beta": 0.00912795402109623,
|
|
"fcm_dpo/delta": -0.18693023920059204,
|
|
"fcm_dpo/margin": 63.711647033691406,
|
|
"fcm_dpo/q_t": 0.3695805072784424,
|
|
"grad_norm": 12.054786682128906,
|
|
"learning_rate": 2.947135628327544e-07,
|
|
"logits/chosen": 0.7579110860824585,
|
|
"logits/rejected": 0.7310981750488281,
|
|
"logps/chosen": -155.10903930664062,
|
|
"logps/ref_chosen": -59.242584228515625,
|
|
"logps/ref_rejected": -69.87483215332031,
|
|
"logps/rejected": -229.45294189453125,
|
|
"loss": 0.9972,
|
|
"margin_dpo/margin_mean": 63.711647033691406,
|
|
"margin_dpo/margin_std": 76.30567169189453,
|
|
"step": 331
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -98.1175537109375,
|
|
"KL/mean": -124.7101058959961,
|
|
"KL/rejected_KL_mean": -151.30264282226562,
|
|
"KL/std": 67.22337341308594,
|
|
"epoch": 0.5018896447467877,
|
|
"fcm_dpo/beta": 0.00906030461192131,
|
|
"fcm_dpo/delta": -0.08419599384069443,
|
|
"fcm_dpo/margin": 53.185089111328125,
|
|
"fcm_dpo/q_t": 0.39281436800956726,
|
|
"grad_norm": 12.46126651763916,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": 0.5806922912597656,
|
|
"logits/rejected": 0.5372457504272461,
|
|
"logps/chosen": -165.22731018066406,
|
|
"logps/ref_chosen": -67.10975646972656,
|
|
"logps/ref_rejected": -77.11839294433594,
|
|
"logps/rejected": -228.42103576660156,
|
|
"loss": 1.0549,
|
|
"margin_dpo/margin_mean": 53.18508529663086,
|
|
"margin_dpo/margin_std": 68.5789794921875,
|
|
"step": 332
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -105.52397155761719,
|
|
"KL/mean": -134.80032348632812,
|
|
"KL/rejected_KL_mean": -164.07667541503906,
|
|
"KL/std": 67.71150970458984,
|
|
"epoch": 0.5034013605442177,
|
|
"fcm_dpo/beta": 0.008926951326429844,
|
|
"fcm_dpo/delta": -0.12592321634292603,
|
|
"fcm_dpo/margin": 58.552703857421875,
|
|
"fcm_dpo/q_t": 0.38305091857910156,
|
|
"grad_norm": 11.675630569458008,
|
|
"learning_rate": 2.921093116725076e-07,
|
|
"logits/chosen": 0.6542540788650513,
|
|
"logits/rejected": 0.5743177533149719,
|
|
"logps/chosen": -163.9051055908203,
|
|
"logps/ref_chosen": -58.381134033203125,
|
|
"logps/ref_rejected": -85.02839660644531,
|
|
"logps/rejected": -249.10507202148438,
|
|
"loss": 1.0203,
|
|
"margin_dpo/margin_mean": 58.552703857421875,
|
|
"margin_dpo/margin_std": 70.5644760131836,
|
|
"step": 333
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -100.73344421386719,
|
|
"KL/mean": -121.85630798339844,
|
|
"KL/rejected_KL_mean": -142.97915649414062,
|
|
"KL/std": 69.27299499511719,
|
|
"epoch": 0.5049130763416477,
|
|
"fcm_dpo/beta": 0.008913639932870865,
|
|
"fcm_dpo/delta": 0.02385806478559971,
|
|
"fcm_dpo/margin": 42.245704650878906,
|
|
"fcm_dpo/q_t": 0.41680508852005005,
|
|
"grad_norm": 13.630656242370605,
|
|
"learning_rate": 2.9080540104031484e-07,
|
|
"logits/chosen": 0.6709837913513184,
|
|
"logits/rejected": 0.6222789287567139,
|
|
"logps/chosen": -167.62545776367188,
|
|
"logps/ref_chosen": -66.89199829101562,
|
|
"logps/ref_rejected": -91.83695220947266,
|
|
"logps/rejected": -234.81610107421875,
|
|
"loss": 1.1611,
|
|
"margin_dpo/margin_mean": 42.245704650878906,
|
|
"margin_dpo/margin_std": 79.60578918457031,
|
|
"step": 334
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -101.89778137207031,
|
|
"KL/mean": -125.25917053222656,
|
|
"KL/rejected_KL_mean": -148.6205596923828,
|
|
"KL/std": 69.05400085449219,
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.008921606466174126,
|
|
"fcm_dpo/delta": -0.017355509102344513,
|
|
"fcm_dpo/margin": 46.72278594970703,
|
|
"fcm_dpo/q_t": 0.40833404660224915,
|
|
"grad_norm": 17.24571418762207,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 0.6567748188972473,
|
|
"logits/rejected": 0.6178112626075745,
|
|
"logps/chosen": -163.4122314453125,
|
|
"logps/ref_chosen": -61.51445770263672,
|
|
"logps/ref_rejected": -75.68916320800781,
|
|
"logps/rejected": -224.30972290039062,
|
|
"loss": 1.1266,
|
|
"margin_dpo/margin_mean": 46.72278594970703,
|
|
"margin_dpo/margin_std": 78.48033905029297,
|
|
"step": 335
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -114.77719116210938,
|
|
"KL/mean": -139.40399169921875,
|
|
"KL/rejected_KL_mean": -164.03079223632812,
|
|
"KL/std": 72.44754791259766,
|
|
"epoch": 0.5079365079365079,
|
|
"fcm_dpo/beta": 0.008839685469865799,
|
|
"fcm_dpo/delta": -0.036914002150297165,
|
|
"fcm_dpo/margin": 49.253597259521484,
|
|
"fcm_dpo/q_t": 0.407079815864563,
|
|
"grad_norm": 13.143045425415039,
|
|
"learning_rate": 2.8819419203668675e-07,
|
|
"logits/chosen": 0.5766974687576294,
|
|
"logits/rejected": 0.5520354509353638,
|
|
"logps/chosen": -183.62725830078125,
|
|
"logps/ref_chosen": -68.85006713867188,
|
|
"logps/ref_rejected": -92.99603271484375,
|
|
"logps/rejected": -257.02679443359375,
|
|
"loss": 1.1166,
|
|
"margin_dpo/margin_mean": 49.25359344482422,
|
|
"margin_dpo/margin_std": 80.84031677246094,
|
|
"step": 336
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -119.58826446533203,
|
|
"KL/mean": -138.95216369628906,
|
|
"KL/rejected_KL_mean": -158.31607055664062,
|
|
"KL/std": 71.384521484375,
|
|
"epoch": 0.509448223733938,
|
|
"fcm_dpo/beta": 0.008906678296625614,
|
|
"fcm_dpo/delta": 0.05599668622016907,
|
|
"fcm_dpo/margin": 38.72779083251953,
|
|
"fcm_dpo/q_t": 0.42252814769744873,
|
|
"grad_norm": 12.240598678588867,
|
|
"learning_rate": 2.8688696670638053e-07,
|
|
"logits/chosen": 0.5554500818252563,
|
|
"logits/rejected": 0.5237593650817871,
|
|
"logps/chosen": -192.77609252929688,
|
|
"logps/ref_chosen": -73.18783569335938,
|
|
"logps/ref_rejected": -86.89118957519531,
|
|
"logps/rejected": -245.20724487304688,
|
|
"loss": 1.1728,
|
|
"margin_dpo/margin_mean": 38.7277946472168,
|
|
"margin_dpo/margin_std": 74.26626586914062,
|
|
"step": 337
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -117.08216857910156,
|
|
"KL/mean": -137.17984008789062,
|
|
"KL/rejected_KL_mean": -157.27749633789062,
|
|
"KL/std": 67.41790008544922,
|
|
"epoch": 0.5109599395313681,
|
|
"fcm_dpo/beta": 0.008936947211623192,
|
|
"fcm_dpo/delta": 0.041531890630722046,
|
|
"fcm_dpo/margin": 40.19532775878906,
|
|
"fcm_dpo/q_t": 0.4194255471229553,
|
|
"grad_norm": 12.413491249084473,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 0.5770224332809448,
|
|
"logits/rejected": 0.5512077808380127,
|
|
"logps/chosen": -181.02178955078125,
|
|
"logps/ref_chosen": -63.939613342285156,
|
|
"logps/ref_rejected": -75.34243774414062,
|
|
"logps/rejected": -232.61993408203125,
|
|
"loss": 1.1652,
|
|
"margin_dpo/margin_mean": 40.19532775878906,
|
|
"margin_dpo/margin_std": 75.50584411621094,
|
|
"step": 338
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -95.16416931152344,
|
|
"KL/mean": -118.84825134277344,
|
|
"KL/rejected_KL_mean": -142.53231811523438,
|
|
"KL/std": 67.74819946289062,
|
|
"epoch": 0.5124716553287982,
|
|
"fcm_dpo/beta": 0.00892747100442648,
|
|
"fcm_dpo/delta": -0.023438837379217148,
|
|
"fcm_dpo/margin": 47.36815643310547,
|
|
"fcm_dpo/q_t": 0.40453624725341797,
|
|
"grad_norm": 15.334397315979004,
|
|
"learning_rate": 2.842694572172736e-07,
|
|
"logits/chosen": 0.7413595914840698,
|
|
"logits/rejected": 0.6550474166870117,
|
|
"logps/chosen": -140.7133026123047,
|
|
"logps/ref_chosen": -45.54913330078125,
|
|
"logps/ref_rejected": -67.0482177734375,
|
|
"logps/rejected": -209.58053588867188,
|
|
"loss": 1.1057,
|
|
"margin_dpo/margin_mean": 47.36815643310547,
|
|
"margin_dpo/margin_std": 71.91868591308594,
|
|
"step": 339
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -108.51216888427734,
|
|
"KL/mean": -131.76951599121094,
|
|
"KL/rejected_KL_mean": -155.02687072753906,
|
|
"KL/std": 73.69456481933594,
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.008935904130339622,
|
|
"fcm_dpo/delta": -0.016073474660515785,
|
|
"fcm_dpo/margin": 46.51470184326172,
|
|
"fcm_dpo/q_t": 0.4097430109977722,
|
|
"grad_norm": 13.037310600280762,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 0.6497082114219666,
|
|
"logits/rejected": 0.6278408765792847,
|
|
"logps/chosen": -162.517822265625,
|
|
"logps/ref_chosen": -54.00564956665039,
|
|
"logps/ref_rejected": -61.314430236816406,
|
|
"logps/rejected": -216.34130859375,
|
|
"loss": 1.1483,
|
|
"margin_dpo/margin_mean": 46.51470947265625,
|
|
"margin_dpo/margin_std": 85.32322692871094,
|
|
"step": 340
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -108.59716796875,
|
|
"KL/mean": -135.66192626953125,
|
|
"KL/rejected_KL_mean": -162.72669982910156,
|
|
"KL/std": 72.96824645996094,
|
|
"epoch": 0.5154950869236583,
|
|
"fcm_dpo/beta": 0.008768146857619286,
|
|
"fcm_dpo/delta": -0.16836212575435638,
|
|
"fcm_dpo/margin": 54.129554748535156,
|
|
"fcm_dpo/q_t": 0.39465534687042236,
|
|
"grad_norm": 14.708179473876953,
|
|
"learning_rate": 2.816481133934373e-07,
|
|
"logits/chosen": 0.6575794219970703,
|
|
"logits/rejected": 0.6086676120758057,
|
|
"logps/chosen": -171.9922637939453,
|
|
"logps/ref_chosen": -63.39509582519531,
|
|
"logps/ref_rejected": -76.20973205566406,
|
|
"logps/rejected": -238.93643188476562,
|
|
"loss": 1.0871,
|
|
"margin_dpo/margin_mean": 54.129554748535156,
|
|
"margin_dpo/margin_std": 77.37159729003906,
|
|
"step": 341
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -107.54547119140625,
|
|
"KL/mean": -134.28675842285156,
|
|
"KL/rejected_KL_mean": -161.02804565429688,
|
|
"KL/std": 77.28424072265625,
|
|
"epoch": 0.5170068027210885,
|
|
"fcm_dpo/beta": 0.008711115457117558,
|
|
"fcm_dpo/delta": -0.06765161454677582,
|
|
"fcm_dpo/margin": 53.482566833496094,
|
|
"fcm_dpo/q_t": 0.3980483412742615,
|
|
"grad_norm": 11.901773452758789,
|
|
"learning_rate": 2.8033609524527046e-07,
|
|
"logits/chosen": 0.7269519567489624,
|
|
"logits/rejected": 0.6833984851837158,
|
|
"logps/chosen": -160.59329223632812,
|
|
"logps/ref_chosen": -53.047813415527344,
|
|
"logps/ref_rejected": -68.2854232788086,
|
|
"logps/rejected": -229.31346130371094,
|
|
"loss": 1.0848,
|
|
"margin_dpo/margin_mean": 53.482566833496094,
|
|
"margin_dpo/margin_std": 79.55198669433594,
|
|
"step": 342
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -101.60369873046875,
|
|
"KL/mean": -120.99630737304688,
|
|
"KL/rejected_KL_mean": -140.388916015625,
|
|
"KL/std": 71.24321746826172,
|
|
"epoch": 0.5185185185185185,
|
|
"fcm_dpo/beta": 0.00866604596376419,
|
|
"fcm_dpo/delta": -0.04947742447257042,
|
|
"fcm_dpo/margin": 38.78521728515625,
|
|
"fcm_dpo/q_t": 0.423829585313797,
|
|
"grad_norm": 11.599870681762695,
|
|
"learning_rate": 2.7902322853130753e-07,
|
|
"logits/chosen": 0.5325268507003784,
|
|
"logits/rejected": 0.5232107639312744,
|
|
"logps/chosen": -172.18222045898438,
|
|
"logps/ref_chosen": -70.57852935791016,
|
|
"logps/ref_rejected": -84.73873901367188,
|
|
"logps/rejected": -225.12765502929688,
|
|
"loss": 1.1746,
|
|
"margin_dpo/margin_mean": 38.78521728515625,
|
|
"margin_dpo/margin_std": 72.67005157470703,
|
|
"step": 343
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -112.94359588623047,
|
|
"KL/mean": -141.10887145996094,
|
|
"KL/rejected_KL_mean": -169.27413940429688,
|
|
"KL/std": 72.50189208984375,
|
|
"epoch": 0.5200302343159486,
|
|
"fcm_dpo/beta": 0.008603915572166443,
|
|
"fcm_dpo/delta": -0.08684791624546051,
|
|
"fcm_dpo/margin": 56.3305549621582,
|
|
"fcm_dpo/q_t": 0.3930312991142273,
|
|
"grad_norm": 14.812845230102539,
|
|
"learning_rate": 2.7770954997525274e-07,
|
|
"logits/chosen": 0.6950997114181519,
|
|
"logits/rejected": 0.6217761039733887,
|
|
"logps/chosen": -168.75460815429688,
|
|
"logps/ref_chosen": -55.811004638671875,
|
|
"logps/ref_rejected": -84.77637481689453,
|
|
"logps/rejected": -254.05052185058594,
|
|
"loss": 1.0695,
|
|
"margin_dpo/margin_mean": 56.3305549621582,
|
|
"margin_dpo/margin_std": 81.02676391601562,
|
|
"step": 344
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -90.91252136230469,
|
|
"KL/mean": -115.78691101074219,
|
|
"KL/rejected_KL_mean": -140.6613006591797,
|
|
"KL/std": 66.29633331298828,
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.008582234382629395,
|
|
"fcm_dpo/delta": -0.027557937428355217,
|
|
"fcm_dpo/margin": 49.748783111572266,
|
|
"fcm_dpo/q_t": 0.40453413128852844,
|
|
"grad_norm": 13.830284118652344,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 0.6835196614265442,
|
|
"logits/rejected": 0.6392360925674438,
|
|
"logps/chosen": -148.6986083984375,
|
|
"logps/ref_chosen": -57.78609848022461,
|
|
"logps/ref_rejected": -78.91847229003906,
|
|
"logps/rejected": -219.57977294921875,
|
|
"loss": 1.1093,
|
|
"margin_dpo/margin_mean": 49.748783111572266,
|
|
"margin_dpo/margin_std": 78.42387390136719,
|
|
"step": 345
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -97.63043975830078,
|
|
"KL/mean": -127.51663208007812,
|
|
"KL/rejected_KL_mean": -157.40283203125,
|
|
"KL/std": 77.06617736816406,
|
|
"epoch": 0.5230536659108088,
|
|
"fcm_dpo/beta": 0.008512051776051521,
|
|
"fcm_dpo/delta": -0.11155696958303452,
|
|
"fcm_dpo/margin": 59.77239227294922,
|
|
"fcm_dpo/q_t": 0.3856911063194275,
|
|
"grad_norm": 13.432490348815918,
|
|
"learning_rate": 2.7507990434420123e-07,
|
|
"logits/chosen": 0.6890122890472412,
|
|
"logits/rejected": 0.6013680100440979,
|
|
"logps/chosen": -153.91555786132812,
|
|
"logps/ref_chosen": -56.285125732421875,
|
|
"logps/ref_rejected": -91.15303039550781,
|
|
"logps/rejected": -248.5558624267578,
|
|
"loss": 1.0516,
|
|
"margin_dpo/margin_mean": 59.77239227294922,
|
|
"margin_dpo/margin_std": 79.76498413085938,
|
|
"step": 346
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -107.83124542236328,
|
|
"KL/mean": -130.78091430664062,
|
|
"KL/rejected_KL_mean": -153.7305908203125,
|
|
"KL/std": 73.54902648925781,
|
|
"epoch": 0.5245653817082389,
|
|
"fcm_dpo/beta": 0.008475182577967644,
|
|
"fcm_dpo/delta": 0.011207839474081993,
|
|
"fcm_dpo/margin": 45.89935302734375,
|
|
"fcm_dpo/q_t": 0.41313329339027405,
|
|
"grad_norm": 14.230241775512695,
|
|
"learning_rate": 2.737640108260456e-07,
|
|
"logits/chosen": 0.7623085975646973,
|
|
"logits/rejected": 0.7118766903877258,
|
|
"logps/chosen": -161.33078002929688,
|
|
"logps/ref_chosen": -53.499542236328125,
|
|
"logps/ref_rejected": -72.52565002441406,
|
|
"logps/rejected": -226.25625610351562,
|
|
"loss": 1.1268,
|
|
"margin_dpo/margin_mean": 45.89935302734375,
|
|
"margin_dpo/margin_std": 74.375244140625,
|
|
"step": 347
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -96.4140625,
|
|
"KL/mean": -123.11373901367188,
|
|
"KL/rejected_KL_mean": -149.8134002685547,
|
|
"KL/std": 69.68951416015625,
|
|
"epoch": 0.5260770975056689,
|
|
"fcm_dpo/beta": 0.008433183655142784,
|
|
"fcm_dpo/delta": -0.05172973871231079,
|
|
"fcm_dpo/margin": 53.399356842041016,
|
|
"fcm_dpo/q_t": 0.40203267335891724,
|
|
"grad_norm": 13.573116302490234,
|
|
"learning_rate": 2.724474525774229e-07,
|
|
"logits/chosen": 0.7553528547286987,
|
|
"logits/rejected": 0.7240778207778931,
|
|
"logps/chosen": -147.20091247558594,
|
|
"logps/ref_chosen": -50.78684997558594,
|
|
"logps/ref_rejected": -68.63732147216797,
|
|
"logps/rejected": -218.4507293701172,
|
|
"loss": 1.0993,
|
|
"margin_dpo/margin_mean": 53.399356842041016,
|
|
"margin_dpo/margin_std": 83.59718322753906,
|
|
"step": 348
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -97.35595703125,
|
|
"KL/mean": -124.8929443359375,
|
|
"KL/rejected_KL_mean": -152.429931640625,
|
|
"KL/std": 76.0298843383789,
|
|
"epoch": 0.527588813303099,
|
|
"fcm_dpo/beta": 0.008418774232268333,
|
|
"fcm_dpo/delta": -0.06526511907577515,
|
|
"fcm_dpo/margin": 55.07396697998047,
|
|
"fcm_dpo/q_t": 0.3980802297592163,
|
|
"grad_norm": 12.64448070526123,
|
|
"learning_rate": 2.711302664252973e-07,
|
|
"logits/chosen": 0.7135356664657593,
|
|
"logits/rejected": 0.6159645318984985,
|
|
"logps/chosen": -150.68096923828125,
|
|
"logps/ref_chosen": -53.325008392333984,
|
|
"logps/ref_rejected": -83.21236419677734,
|
|
"logps/rejected": -235.6422882080078,
|
|
"loss": 1.082,
|
|
"margin_dpo/margin_mean": 55.07396697998047,
|
|
"margin_dpo/margin_std": 80.17333984375,
|
|
"step": 349
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.40263366699219,
|
|
"KL/mean": -138.00588989257812,
|
|
"KL/rejected_KL_mean": -173.60916137695312,
|
|
"KL/std": 80.74720001220703,
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.008256148546934128,
|
|
"fcm_dpo/delta": -0.19376999139785767,
|
|
"fcm_dpo/margin": 71.20652770996094,
|
|
"fcm_dpo/q_t": 0.3678281307220459,
|
|
"grad_norm": 15.81769847869873,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 0.672303318977356,
|
|
"logits/rejected": 0.5934668779373169,
|
|
"logps/chosen": -164.02841186523438,
|
|
"logps/ref_chosen": -61.625770568847656,
|
|
"logps/ref_rejected": -87.63627624511719,
|
|
"logps/rejected": -261.24542236328125,
|
|
"loss": 0.9739,
|
|
"margin_dpo/margin_mean": 71.20652770996094,
|
|
"margin_dpo/margin_std": 76.9776382446289,
|
|
"step": 350
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -97.55595397949219,
|
|
"KL/mean": -122.96000671386719,
|
|
"KL/rejected_KL_mean": -148.36407470703125,
|
|
"KL/std": 69.04269409179688,
|
|
"epoch": 0.5306122448979592,
|
|
"fcm_dpo/beta": 0.00820033997297287,
|
|
"fcm_dpo/delta": -0.017102137207984924,
|
|
"fcm_dpo/margin": 50.80812454223633,
|
|
"fcm_dpo/q_t": 0.40488386154174805,
|
|
"grad_norm": 13.743908882141113,
|
|
"learning_rate": 2.6849415780518357e-07,
|
|
"logits/chosen": 0.6163613796234131,
|
|
"logits/rejected": 0.5413371324539185,
|
|
"logps/chosen": -153.81228637695312,
|
|
"logps/ref_chosen": -56.2563362121582,
|
|
"logps/ref_rejected": -79.11589813232422,
|
|
"logps/rejected": -227.47998046875,
|
|
"loss": 1.1327,
|
|
"margin_dpo/margin_mean": 50.808128356933594,
|
|
"margin_dpo/margin_std": 86.70310974121094,
|
|
"step": 351
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -99.45494079589844,
|
|
"KL/mean": -127.73379516601562,
|
|
"KL/rejected_KL_mean": -156.0126495361328,
|
|
"KL/std": 76.89591979980469,
|
|
"epoch": 0.5321239606953893,
|
|
"fcm_dpo/beta": 0.008157326839864254,
|
|
"fcm_dpo/delta": -0.06299930065870285,
|
|
"fcm_dpo/margin": 56.557716369628906,
|
|
"fcm_dpo/q_t": 0.39789849519729614,
|
|
"grad_norm": 12.438852310180664,
|
|
"learning_rate": 2.6717530907482024e-07,
|
|
"logits/chosen": 0.6851711273193359,
|
|
"logits/rejected": 0.6315196752548218,
|
|
"logps/chosen": -162.50689697265625,
|
|
"logps/ref_chosen": -63.05195236206055,
|
|
"logps/ref_rejected": -85.52035522460938,
|
|
"logps/rejected": -241.5330047607422,
|
|
"loss": 1.0801,
|
|
"margin_dpo/margin_mean": 56.55772018432617,
|
|
"margin_dpo/margin_std": 82.60224914550781,
|
|
"step": 352
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -96.09440612792969,
|
|
"KL/mean": -123.33383178710938,
|
|
"KL/rejected_KL_mean": -150.57325744628906,
|
|
"KL/std": 71.5403823852539,
|
|
"epoch": 0.5336356764928194,
|
|
"fcm_dpo/beta": 0.008136594668030739,
|
|
"fcm_dpo/delta": -0.04424406588077545,
|
|
"fcm_dpo/margin": 54.47883987426758,
|
|
"fcm_dpo/q_t": 0.3993375301361084,
|
|
"grad_norm": 12.729469299316406,
|
|
"learning_rate": 2.658559799141411e-07,
|
|
"logits/chosen": 0.6415415406227112,
|
|
"logits/rejected": 0.6472923755645752,
|
|
"logps/chosen": -165.10360717773438,
|
|
"logps/ref_chosen": -69.00918579101562,
|
|
"logps/ref_rejected": -72.65840148925781,
|
|
"logps/rejected": -223.23165893554688,
|
|
"loss": 1.0819,
|
|
"margin_dpo/margin_mean": 54.47883987426758,
|
|
"margin_dpo/margin_std": 76.14570617675781,
|
|
"step": 353
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -96.82170104980469,
|
|
"KL/mean": -128.14501953125,
|
|
"KL/rejected_KL_mean": -159.46832275390625,
|
|
"KL/std": 68.76026916503906,
|
|
"epoch": 0.5351473922902494,
|
|
"fcm_dpo/beta": 0.008040174841880798,
|
|
"fcm_dpo/delta": -0.10674531012773514,
|
|
"fcm_dpo/margin": 62.646644592285156,
|
|
"fcm_dpo/q_t": 0.3866727948188782,
|
|
"grad_norm": 14.666440963745117,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"logits/chosen": 0.7752627730369568,
|
|
"logits/rejected": 0.6414341926574707,
|
|
"logps/chosen": -136.61004638671875,
|
|
"logps/ref_chosen": -39.78833770751953,
|
|
"logps/ref_rejected": -69.56885528564453,
|
|
"logps/rejected": -229.03720092773438,
|
|
"loss": 1.0481,
|
|
"margin_dpo/margin_mean": 62.646636962890625,
|
|
"margin_dpo/margin_std": 82.4053955078125,
|
|
"step": 354
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.98497009277344,
|
|
"KL/mean": -136.47068786621094,
|
|
"KL/rejected_KL_mean": -169.95639038085938,
|
|
"KL/std": 79.25082397460938,
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.007963665761053562,
|
|
"fcm_dpo/delta": -0.1368802785873413,
|
|
"fcm_dpo/margin": 66.9714126586914,
|
|
"fcm_dpo/q_t": 0.3815860152244568,
|
|
"grad_norm": 14.87078857421875,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 0.7125252485275269,
|
|
"logits/rejected": 0.5806901454925537,
|
|
"logps/chosen": -149.2403564453125,
|
|
"logps/ref_chosen": -46.25537872314453,
|
|
"logps/ref_rejected": -78.20236206054688,
|
|
"logps/rejected": -248.15875244140625,
|
|
"loss": 1.039,
|
|
"margin_dpo/margin_mean": 66.9714126586914,
|
|
"margin_dpo/margin_std": 88.78955078125,
|
|
"step": 355
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -96.94461822509766,
|
|
"KL/mean": -123.7825698852539,
|
|
"KL/rejected_KL_mean": -150.6205291748047,
|
|
"KL/std": 74.64903259277344,
|
|
"epoch": 0.5381708238851096,
|
|
"fcm_dpo/beta": 0.007890915498137474,
|
|
"fcm_dpo/delta": -0.02438228577375412,
|
|
"fcm_dpo/margin": 53.6759033203125,
|
|
"fcm_dpo/q_t": 0.4065490663051605,
|
|
"grad_norm": 12.321948051452637,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 0.7034469842910767,
|
|
"logits/rejected": 0.6208847761154175,
|
|
"logps/chosen": -144.85076904296875,
|
|
"logps/ref_chosen": -47.906158447265625,
|
|
"logps/ref_rejected": -74.29397583007812,
|
|
"logps/rejected": -224.91448974609375,
|
|
"loss": 1.1385,
|
|
"margin_dpo/margin_mean": 53.6759033203125,
|
|
"margin_dpo/margin_std": 94.7049560546875,
|
|
"step": 356
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -116.19082641601562,
|
|
"KL/mean": -138.58761596679688,
|
|
"KL/rejected_KL_mean": -160.98440551757812,
|
|
"KL/std": 74.61604309082031,
|
|
"epoch": 0.5396825396825397,
|
|
"fcm_dpo/beta": 0.007838654331862926,
|
|
"fcm_dpo/delta": -0.08558088541030884,
|
|
"fcm_dpo/margin": 44.7935791015625,
|
|
"fcm_dpo/q_t": 0.41946035623550415,
|
|
"grad_norm": 13.116528511047363,
|
|
"learning_rate": 2.6057459723762076e-07,
|
|
"logits/chosen": 0.6802359819412231,
|
|
"logits/rejected": 0.6578832864761353,
|
|
"logps/chosen": -178.82583618164062,
|
|
"logps/ref_chosen": -62.63500213623047,
|
|
"logps/ref_rejected": -65.11399841308594,
|
|
"logps/rejected": -226.09841918945312,
|
|
"loss": 1.1689,
|
|
"margin_dpo/margin_mean": 44.793582916259766,
|
|
"margin_dpo/margin_std": 80.24801635742188,
|
|
"step": 357
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -113.53591918945312,
|
|
"KL/mean": -143.63429260253906,
|
|
"KL/rejected_KL_mean": -173.732666015625,
|
|
"KL/std": 77.68006896972656,
|
|
"epoch": 0.5411942554799698,
|
|
"fcm_dpo/beta": 0.007832800969481468,
|
|
"fcm_dpo/delta": -0.07379653304815292,
|
|
"fcm_dpo/margin": 60.196739196777344,
|
|
"fcm_dpo/q_t": 0.39626243710517883,
|
|
"grad_norm": 18.77865219116211,
|
|
"learning_rate": 2.5925341972508954e-07,
|
|
"logits/chosen": 0.6306195259094238,
|
|
"logits/rejected": 0.644746720790863,
|
|
"logps/chosen": -180.74554443359375,
|
|
"logps/ref_chosen": -67.20960998535156,
|
|
"logps/ref_rejected": -69.34715270996094,
|
|
"logps/rejected": -243.07980346679688,
|
|
"loss": 1.0817,
|
|
"margin_dpo/margin_mean": 60.196739196777344,
|
|
"margin_dpo/margin_std": 86.37281799316406,
|
|
"step": 358
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -126.44595336914062,
|
|
"KL/mean": -142.26309204101562,
|
|
"KL/rejected_KL_mean": -158.0802459716797,
|
|
"KL/std": 76.30311584472656,
|
|
"epoch": 0.5427059712773998,
|
|
"fcm_dpo/beta": 0.007785119116306305,
|
|
"fcm_dpo/delta": 0.010527527891099453,
|
|
"fcm_dpo/margin": 31.634292602539062,
|
|
"fcm_dpo/q_t": 0.44452568888664246,
|
|
"grad_norm": 14.612931251525879,
|
|
"learning_rate": 2.579319833745169e-07,
|
|
"logits/chosen": 0.6535632014274597,
|
|
"logits/rejected": 0.6203292012214661,
|
|
"logps/chosen": -188.97174072265625,
|
|
"logps/ref_chosen": -62.52578353881836,
|
|
"logps/ref_rejected": -76.63114929199219,
|
|
"logps/rejected": -234.71139526367188,
|
|
"loss": 1.2527,
|
|
"margin_dpo/margin_mean": 31.634294509887695,
|
|
"margin_dpo/margin_std": 80.08071899414062,
|
|
"step": 359
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -122.09922790527344,
|
|
"KL/mean": -149.19642639160156,
|
|
"KL/rejected_KL_mean": -176.2936248779297,
|
|
"KL/std": 80.96435546875,
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.007769769057631493,
|
|
"fcm_dpo/delta": -0.02158222533762455,
|
|
"fcm_dpo/margin": 54.19439697265625,
|
|
"fcm_dpo/q_t": 0.40599554777145386,
|
|
"grad_norm": 12.877197265625,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 0.6050270795822144,
|
|
"logits/rejected": 0.5135411024093628,
|
|
"logps/chosen": -185.58694458007812,
|
|
"logps/ref_chosen": -63.48772048950195,
|
|
"logps/ref_rejected": -90.6891098022461,
|
|
"logps/rejected": -266.98272705078125,
|
|
"loss": 1.0997,
|
|
"margin_dpo/margin_mean": 54.19439697265625,
|
|
"margin_dpo/margin_std": 80.44186401367188,
|
|
"step": 360
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -116.56092834472656,
|
|
"KL/mean": -147.8263702392578,
|
|
"KL/rejected_KL_mean": -179.09181213378906,
|
|
"KL/std": 77.82601165771484,
|
|
"epoch": 0.54572940287226,
|
|
"fcm_dpo/beta": 0.007730739191174507,
|
|
"fcm_dpo/delta": -0.08543084561824799,
|
|
"fcm_dpo/margin": 62.530860900878906,
|
|
"fcm_dpo/q_t": 0.3902705907821655,
|
|
"grad_norm": 12.314515113830566,
|
|
"learning_rate": 2.552884820191154e-07,
|
|
"logits/chosen": 0.731009840965271,
|
|
"logits/rejected": 0.685173749923706,
|
|
"logps/chosen": -174.4780731201172,
|
|
"logps/ref_chosen": -57.917144775390625,
|
|
"logps/ref_rejected": -72.39089965820312,
|
|
"logps/rejected": -251.4827117919922,
|
|
"loss": 1.0479,
|
|
"margin_dpo/margin_mean": 62.530860900878906,
|
|
"margin_dpo/margin_std": 79.190673828125,
|
|
"step": 361
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.0023193359375,
|
|
"KL/mean": -152.08425903320312,
|
|
"KL/rejected_KL_mean": -186.16619873046875,
|
|
"KL/std": 85.76898956298828,
|
|
"epoch": 0.54724111866969,
|
|
"fcm_dpo/beta": 0.007655893452465534,
|
|
"fcm_dpo/delta": -0.12508359551429749,
|
|
"fcm_dpo/margin": 68.16388702392578,
|
|
"fcm_dpo/q_t": 0.38804155588150024,
|
|
"grad_norm": 13.495170593261719,
|
|
"learning_rate": 2.53966490958702e-07,
|
|
"logits/chosen": 0.7136771082878113,
|
|
"logits/rejected": 0.6079861521720886,
|
|
"logps/chosen": -181.44578552246094,
|
|
"logps/ref_chosen": -63.4434700012207,
|
|
"logps/ref_rejected": -103.45516967773438,
|
|
"logps/rejected": -289.6213684082031,
|
|
"loss": 1.0586,
|
|
"margin_dpo/margin_mean": 68.16387939453125,
|
|
"margin_dpo/margin_std": 99.61306762695312,
|
|
"step": 362
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -122.56401062011719,
|
|
"KL/mean": -154.55455017089844,
|
|
"KL/rejected_KL_mean": -186.54507446289062,
|
|
"KL/std": 76.2325668334961,
|
|
"epoch": 0.5487528344671202,
|
|
"fcm_dpo/beta": 0.007558226585388184,
|
|
"fcm_dpo/delta": -0.08570672571659088,
|
|
"fcm_dpo/margin": 63.9810791015625,
|
|
"fcm_dpo/q_t": 0.39227616786956787,
|
|
"grad_norm": 14.701594352722168,
|
|
"learning_rate": 2.526443889470099e-07,
|
|
"logits/chosen": 0.7462435364723206,
|
|
"logits/rejected": 0.6098591685295105,
|
|
"logps/chosen": -171.21585083007812,
|
|
"logps/ref_chosen": -48.65182876586914,
|
|
"logps/ref_rejected": -88.65904235839844,
|
|
"logps/rejected": -275.2041320800781,
|
|
"loss": 1.0595,
|
|
"margin_dpo/margin_mean": 63.98107147216797,
|
|
"margin_dpo/margin_std": 87.90388488769531,
|
|
"step": 363
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -108.49606323242188,
|
|
"KL/mean": -144.57577514648438,
|
|
"KL/rejected_KL_mean": -180.6554718017578,
|
|
"KL/std": 84.4358901977539,
|
|
"epoch": 0.5502645502645502,
|
|
"fcm_dpo/beta": 0.007462693378329277,
|
|
"fcm_dpo/delta": -0.14240244030952454,
|
|
"fcm_dpo/margin": 72.15941619873047,
|
|
"fcm_dpo/q_t": 0.38352420926094055,
|
|
"grad_norm": 11.132577896118164,
|
|
"learning_rate": 2.513222129660744e-07,
|
|
"logits/chosen": 0.5967110395431519,
|
|
"logits/rejected": 0.5130397081375122,
|
|
"logps/chosen": -166.36712646484375,
|
|
"logps/ref_chosen": -57.87107467651367,
|
|
"logps/ref_rejected": -80.95503234863281,
|
|
"logps/rejected": -261.6105041503906,
|
|
"loss": 1.0536,
|
|
"margin_dpo/margin_mean": 72.15941619873047,
|
|
"margin_dpo/margin_std": 104.17182922363281,
|
|
"step": 364
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -100.76558685302734,
|
|
"KL/mean": -134.24905395507812,
|
|
"KL/rejected_KL_mean": -167.73248291015625,
|
|
"KL/std": 89.16426086425781,
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.007374256383627653,
|
|
"fcm_dpo/delta": -0.09644719213247299,
|
|
"fcm_dpo/margin": 66.96690368652344,
|
|
"fcm_dpo/q_t": 0.388121098279953,
|
|
"grad_norm": 10.792167663574219,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 0.6934993267059326,
|
|
"logits/rejected": 0.6882491111755371,
|
|
"logps/chosen": -165.707763671875,
|
|
"logps/ref_chosen": -64.94217681884766,
|
|
"logps/ref_rejected": -74.8599853515625,
|
|
"logps/rejected": -242.59246826171875,
|
|
"loss": 1.0271,
|
|
"margin_dpo/margin_mean": 66.96691131591797,
|
|
"margin_dpo/margin_std": 78.49931335449219,
|
|
"step": 365
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -100.29678344726562,
|
|
"KL/mean": -127.97156524658203,
|
|
"KL/rejected_KL_mean": -155.64633178710938,
|
|
"KL/std": 77.52647399902344,
|
|
"epoch": 0.5532879818594104,
|
|
"fcm_dpo/beta": 0.007377326022833586,
|
|
"fcm_dpo/delta": -0.008646734058856964,
|
|
"fcm_dpo/margin": 55.34955978393555,
|
|
"fcm_dpo/q_t": 0.4100736975669861,
|
|
"grad_norm": 14.334081649780273,
|
|
"learning_rate": 2.486777870339255e-07,
|
|
"logits/chosen": 0.6378070712089539,
|
|
"logits/rejected": 0.6290265321731567,
|
|
"logps/chosen": -155.4627685546875,
|
|
"logps/ref_chosen": -55.16598129272461,
|
|
"logps/ref_rejected": -65.26121520996094,
|
|
"logps/rejected": -220.90756225585938,
|
|
"loss": 1.1431,
|
|
"margin_dpo/margin_mean": 55.34956359863281,
|
|
"margin_dpo/margin_std": 98.8299560546875,
|
|
"step": 366
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -116.16023254394531,
|
|
"KL/mean": -143.62210083007812,
|
|
"KL/rejected_KL_mean": -171.083984375,
|
|
"KL/std": 78.83016204833984,
|
|
"epoch": 0.5547996976568406,
|
|
"fcm_dpo/beta": 0.007352732587605715,
|
|
"fcm_dpo/delta": -0.003950958140194416,
|
|
"fcm_dpo/margin": 54.92376708984375,
|
|
"fcm_dpo/q_t": 0.406520277261734,
|
|
"grad_norm": 12.853515625,
|
|
"learning_rate": 2.4735561105299014e-07,
|
|
"logits/chosen": 0.6784968376159668,
|
|
"logits/rejected": 0.5745057463645935,
|
|
"logps/chosen": -172.17068481445312,
|
|
"logps/ref_chosen": -56.01046371459961,
|
|
"logps/ref_rejected": -77.31010437011719,
|
|
"logps/rejected": -248.3940887451172,
|
|
"loss": 1.1112,
|
|
"margin_dpo/margin_mean": 54.923763275146484,
|
|
"margin_dpo/margin_std": 83.17230987548828,
|
|
"step": 367
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -127.41226196289062,
|
|
"KL/mean": -152.2227020263672,
|
|
"KL/rejected_KL_mean": -177.03314208984375,
|
|
"KL/std": 77.63436889648438,
|
|
"epoch": 0.5563114134542706,
|
|
"fcm_dpo/beta": 0.007370097562670708,
|
|
"fcm_dpo/delta": 0.03492274507880211,
|
|
"fcm_dpo/margin": 49.62086486816406,
|
|
"fcm_dpo/q_t": 0.4161420464515686,
|
|
"grad_norm": 14.774358749389648,
|
|
"learning_rate": 2.46033509041298e-07,
|
|
"logits/chosen": 0.529477059841156,
|
|
"logits/rejected": 0.531141996383667,
|
|
"logps/chosen": -202.24154663085938,
|
|
"logps/ref_chosen": -74.82927703857422,
|
|
"logps/ref_rejected": -76.11680603027344,
|
|
"logps/rejected": -253.1499481201172,
|
|
"loss": 1.141,
|
|
"margin_dpo/margin_mean": 49.62086486816406,
|
|
"margin_dpo/margin_std": 82.22639465332031,
|
|
"step": 368
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.47069549560547,
|
|
"KL/mean": -140.7674560546875,
|
|
"KL/rejected_KL_mean": -163.064208984375,
|
|
"KL/std": 78.53901672363281,
|
|
"epoch": 0.5578231292517006,
|
|
"fcm_dpo/beta": 0.007440326269716024,
|
|
"fcm_dpo/delta": 0.0689493790268898,
|
|
"fcm_dpo/margin": 44.593502044677734,
|
|
"fcm_dpo/q_t": 0.42516326904296875,
|
|
"grad_norm": 12.66993236541748,
|
|
"learning_rate": 2.447115179808846e-07,
|
|
"logits/chosen": 0.737145185470581,
|
|
"logits/rejected": 0.6832119226455688,
|
|
"logps/chosen": -176.79690551757812,
|
|
"logps/ref_chosen": -58.32621765136719,
|
|
"logps/ref_rejected": -80.92183685302734,
|
|
"logps/rejected": -243.98605346679688,
|
|
"loss": 1.1731,
|
|
"margin_dpo/margin_mean": 44.593502044677734,
|
|
"margin_dpo/margin_std": 83.1320571899414,
|
|
"step": 369
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -112.98982238769531,
|
|
"KL/mean": -145.85186767578125,
|
|
"KL/rejected_KL_mean": -178.7139129638672,
|
|
"KL/std": 81.59536743164062,
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.007377557922154665,
|
|
"fcm_dpo/delta": -0.08716459572315216,
|
|
"fcm_dpo/margin": 65.72410583496094,
|
|
"fcm_dpo/q_t": 0.3903299570083618,
|
|
"grad_norm": 13.048238754272461,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 0.8076372146606445,
|
|
"logits/rejected": 0.7371571063995361,
|
|
"logps/chosen": -165.87353515625,
|
|
"logps/ref_chosen": -52.88372039794922,
|
|
"logps/ref_rejected": -79.43692016601562,
|
|
"logps/rejected": -258.15081787109375,
|
|
"loss": 1.072,
|
|
"margin_dpo/margin_mean": 65.72410583496094,
|
|
"margin_dpo/margin_std": 94.8158950805664,
|
|
"step": 370
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -112.11306762695312,
|
|
"KL/mean": -140.52859497070312,
|
|
"KL/rejected_KL_mean": -168.94415283203125,
|
|
"KL/std": 78.1666259765625,
|
|
"epoch": 0.5608465608465608,
|
|
"fcm_dpo/beta": 0.007379734888672829,
|
|
"fcm_dpo/delta": -0.020063556730747223,
|
|
"fcm_dpo/margin": 56.831085205078125,
|
|
"fcm_dpo/q_t": 0.40541699528694153,
|
|
"grad_norm": 16.693967819213867,
|
|
"learning_rate": 2.420680166254831e-07,
|
|
"logits/chosen": 0.809400737285614,
|
|
"logits/rejected": 0.7771478891372681,
|
|
"logps/chosen": -161.3372802734375,
|
|
"logps/ref_chosen": -49.224212646484375,
|
|
"logps/ref_rejected": -63.348472595214844,
|
|
"logps/rejected": -232.29263305664062,
|
|
"loss": 1.1031,
|
|
"margin_dpo/margin_mean": 56.831085205078125,
|
|
"margin_dpo/margin_std": 84.16738891601562,
|
|
"step": 371
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -119.46139526367188,
|
|
"KL/mean": -136.53561401367188,
|
|
"KL/rejected_KL_mean": -153.6098175048828,
|
|
"KL/std": 77.68089294433594,
|
|
"epoch": 0.562358276643991,
|
|
"fcm_dpo/beta": 0.007359931245446205,
|
|
"fcm_dpo/delta": 0.01141467783600092,
|
|
"fcm_dpo/margin": 34.14841842651367,
|
|
"fcm_dpo/q_t": 0.4443041682243347,
|
|
"grad_norm": 15.302005767822266,
|
|
"learning_rate": 2.4074658027491044e-07,
|
|
"logits/chosen": 0.7413223385810852,
|
|
"logits/rejected": 0.6426433324813843,
|
|
"logps/chosen": -171.73094177246094,
|
|
"logps/ref_chosen": -52.269554138183594,
|
|
"logps/ref_rejected": -72.99522399902344,
|
|
"logps/rejected": -226.60504150390625,
|
|
"loss": 1.2838,
|
|
"margin_dpo/margin_mean": 34.14841842651367,
|
|
"margin_dpo/margin_std": 100.55604553222656,
|
|
"step": 372
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -134.50927734375,
|
|
"KL/mean": -157.69007873535156,
|
|
"KL/rejected_KL_mean": -180.87088012695312,
|
|
"KL/std": 81.10966491699219,
|
|
"epoch": 0.563869992441421,
|
|
"fcm_dpo/beta": 0.007399425841867924,
|
|
"fcm_dpo/delta": 0.05791056156158447,
|
|
"fcm_dpo/margin": 46.3615837097168,
|
|
"fcm_dpo/q_t": 0.4238479733467102,
|
|
"grad_norm": 13.056395530700684,
|
|
"learning_rate": 2.394254027623792e-07,
|
|
"logits/chosen": 0.7051236629486084,
|
|
"logits/rejected": 0.6355280876159668,
|
|
"logps/chosen": -195.62228393554688,
|
|
"logps/ref_chosen": -61.112998962402344,
|
|
"logps/ref_rejected": -76.24851989746094,
|
|
"logps/rejected": -257.119384765625,
|
|
"loss": 1.2026,
|
|
"margin_dpo/margin_mean": 46.36157989501953,
|
|
"margin_dpo/margin_std": 100.72816467285156,
|
|
"step": 373
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -113.98735809326172,
|
|
"KL/mean": -151.8142547607422,
|
|
"KL/rejected_KL_mean": -189.64114379882812,
|
|
"KL/std": 83.27899169921875,
|
|
"epoch": 0.5653817082388511,
|
|
"fcm_dpo/beta": 0.007316051051020622,
|
|
"fcm_dpo/delta": -0.15795591473579407,
|
|
"fcm_dpo/margin": 75.65379333496094,
|
|
"fcm_dpo/q_t": 0.3763822019100189,
|
|
"grad_norm": 14.877225875854492,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 0.6398267149925232,
|
|
"logits/rejected": 0.6415112018585205,
|
|
"logps/chosen": -186.65655517578125,
|
|
"logps/ref_chosen": -72.66920471191406,
|
|
"logps/ref_rejected": -76.83158874511719,
|
|
"logps/rejected": -266.47271728515625,
|
|
"loss": 1.0242,
|
|
"margin_dpo/margin_mean": 75.65379333496094,
|
|
"margin_dpo/margin_std": 98.41178131103516,
|
|
"step": 374
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -112.32624816894531,
|
|
"KL/mean": -140.14105224609375,
|
|
"KL/rejected_KL_mean": -167.95584106445312,
|
|
"KL/std": 89.7429428100586,
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.007276525720953941,
|
|
"fcm_dpo/delta": -0.00499936006963253,
|
|
"fcm_dpo/margin": 55.629600524902344,
|
|
"fcm_dpo/q_t": 0.4101766347885132,
|
|
"grad_norm": 14.62483024597168,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 0.7120260000228882,
|
|
"logits/rejected": 0.6535938382148743,
|
|
"logps/chosen": -170.00955200195312,
|
|
"logps/ref_chosen": -57.68330383300781,
|
|
"logps/ref_rejected": -79.34097290039062,
|
|
"logps/rejected": -247.29681396484375,
|
|
"loss": 1.1405,
|
|
"margin_dpo/margin_mean": 55.629600524902344,
|
|
"margin_dpo/margin_std": 97.83927917480469,
|
|
"step": 375
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -119.90031433105469,
|
|
"KL/mean": -154.85186767578125,
|
|
"KL/rejected_KL_mean": -189.80343627929688,
|
|
"KL/std": 90.06429290771484,
|
|
"epoch": 0.5684051398337112,
|
|
"fcm_dpo/beta": 0.007219870574772358,
|
|
"fcm_dpo/delta": -0.10759859532117844,
|
|
"fcm_dpo/margin": 69.90312957763672,
|
|
"fcm_dpo/q_t": 0.39126765727996826,
|
|
"grad_norm": 12.410775184631348,
|
|
"learning_rate": 2.3546379277238103e-07,
|
|
"logits/chosen": 0.7589130997657776,
|
|
"logits/rejected": 0.6876646280288696,
|
|
"logps/chosen": -171.57437133789062,
|
|
"logps/ref_chosen": -51.674072265625,
|
|
"logps/ref_rejected": -75.69713592529297,
|
|
"logps/rejected": -265.5005798339844,
|
|
"loss": 1.0719,
|
|
"margin_dpo/margin_mean": 69.90312957763672,
|
|
"margin_dpo/margin_std": 104.9350814819336,
|
|
"step": 376
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -122.13226318359375,
|
|
"KL/mean": -146.1297607421875,
|
|
"KL/rejected_KL_mean": -170.12725830078125,
|
|
"KL/std": 76.14776611328125,
|
|
"epoch": 0.5699168556311414,
|
|
"fcm_dpo/beta": 0.007252752780914307,
|
|
"fcm_dpo/delta": 0.05253633111715317,
|
|
"fcm_dpo/margin": 47.99498748779297,
|
|
"fcm_dpo/q_t": 0.42017650604248047,
|
|
"grad_norm": 12.712069511413574,
|
|
"learning_rate": 2.3414402008585886e-07,
|
|
"logits/chosen": 0.7002503871917725,
|
|
"logits/rejected": 0.6761239767074585,
|
|
"logps/chosen": -168.310791015625,
|
|
"logps/ref_chosen": -46.17853546142578,
|
|
"logps/ref_rejected": -57.756500244140625,
|
|
"logps/rejected": -227.88375854492188,
|
|
"loss": 1.1613,
|
|
"margin_dpo/margin_mean": 47.99498748779297,
|
|
"margin_dpo/margin_std": 85.907470703125,
|
|
"step": 377
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -119.85548400878906,
|
|
"KL/mean": -143.11758422851562,
|
|
"KL/rejected_KL_mean": -166.37969970703125,
|
|
"KL/std": 84.02218627929688,
|
|
"epoch": 0.5714285714285714,
|
|
"fcm_dpo/beta": 0.0072968401946127415,
|
|
"fcm_dpo/delta": 0.061222780495882034,
|
|
"fcm_dpo/margin": 46.52420425415039,
|
|
"fcm_dpo/q_t": 0.42288243770599365,
|
|
"grad_norm": 12.689841270446777,
|
|
"learning_rate": 2.3282469092517977e-07,
|
|
"logits/chosen": 0.7788221836090088,
|
|
"logits/rejected": 0.7302362322807312,
|
|
"logps/chosen": -179.07435607910156,
|
|
"logps/ref_chosen": -59.21887969970703,
|
|
"logps/ref_rejected": -71.24818420410156,
|
|
"logps/rejected": -237.62786865234375,
|
|
"loss": 1.1658,
|
|
"margin_dpo/margin_mean": 46.524200439453125,
|
|
"margin_dpo/margin_std": 83.06361389160156,
|
|
"step": 378
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.43516540527344,
|
|
"KL/mean": -151.5200958251953,
|
|
"KL/rejected_KL_mean": -184.60504150390625,
|
|
"KL/std": 86.96133422851562,
|
|
"epoch": 0.5729402872260015,
|
|
"fcm_dpo/beta": 0.007248941343277693,
|
|
"fcm_dpo/delta": -0.08160345256328583,
|
|
"fcm_dpo/margin": 66.16989135742188,
|
|
"fcm_dpo/q_t": 0.3943200707435608,
|
|
"grad_norm": 12.96621036529541,
|
|
"learning_rate": 2.3150584219481643e-07,
|
|
"logits/chosen": 0.6654449105262756,
|
|
"logits/rejected": 0.5925348997116089,
|
|
"logps/chosen": -194.75173950195312,
|
|
"logps/ref_chosen": -76.31658935546875,
|
|
"logps/ref_rejected": -104.26200103759766,
|
|
"logps/rejected": -288.8670349121094,
|
|
"loss": 1.0691,
|
|
"margin_dpo/margin_mean": 66.16989135742188,
|
|
"margin_dpo/margin_std": 94.88582611083984,
|
|
"step": 379
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.01139831542969,
|
|
"KL/mean": -139.78485107421875,
|
|
"KL/rejected_KL_mean": -177.5583038330078,
|
|
"KL/std": 79.55252838134766,
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.007158986292779446,
|
|
"fcm_dpo/delta": -0.14468258619308472,
|
|
"fcm_dpo/margin": 75.54691314697266,
|
|
"fcm_dpo/q_t": 0.37787097692489624,
|
|
"grad_norm": 11.858986854553223,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 0.7284643650054932,
|
|
"logits/rejected": 0.6896545886993408,
|
|
"logps/chosen": -163.2945556640625,
|
|
"logps/ref_chosen": -61.283164978027344,
|
|
"logps/ref_rejected": -72.38892364501953,
|
|
"logps/rejected": -249.94723510742188,
|
|
"loss": 1.0226,
|
|
"margin_dpo/margin_mean": 75.54691314697266,
|
|
"margin_dpo/margin_std": 94.35493469238281,
|
|
"step": 380
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -125.65938568115234,
|
|
"KL/mean": -140.38873291015625,
|
|
"KL/rejected_KL_mean": -155.11807250976562,
|
|
"KL/std": 82.22688293457031,
|
|
"epoch": 0.5759637188208617,
|
|
"fcm_dpo/beta": 0.0071467505767941475,
|
|
"fcm_dpo/delta": 0.051012542098760605,
|
|
"fcm_dpo/margin": 29.45867347717285,
|
|
"fcm_dpo/q_t": 0.45320022106170654,
|
|
"grad_norm": 12.560222625732422,
|
|
"learning_rate": 2.288697335747027e-07,
|
|
"logits/chosen": 0.6835178732872009,
|
|
"logits/rejected": 0.6654014587402344,
|
|
"logps/chosen": -183.87338256835938,
|
|
"logps/ref_chosen": -58.2139892578125,
|
|
"logps/ref_rejected": -60.78669357299805,
|
|
"logps/rejected": -215.90475463867188,
|
|
"loss": 1.3002,
|
|
"margin_dpo/margin_mean": 29.45867156982422,
|
|
"margin_dpo/margin_std": 97.09398651123047,
|
|
"step": 381
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -126.28196716308594,
|
|
"KL/mean": -152.9098358154297,
|
|
"KL/rejected_KL_mean": -179.53768920898438,
|
|
"KL/std": 82.63713073730469,
|
|
"epoch": 0.5774754346182918,
|
|
"fcm_dpo/beta": 0.007190399337559938,
|
|
"fcm_dpo/delta": 0.01715882495045662,
|
|
"fcm_dpo/margin": 53.25571823120117,
|
|
"fcm_dpo/q_t": 0.41300198435783386,
|
|
"grad_norm": 13.798529624938965,
|
|
"learning_rate": 2.2755254742257706e-07,
|
|
"logits/chosen": 0.7263970971107483,
|
|
"logits/rejected": 0.6708396673202515,
|
|
"logps/chosen": -188.1072998046875,
|
|
"logps/ref_chosen": -61.82532501220703,
|
|
"logps/ref_rejected": -83.0452880859375,
|
|
"logps/rejected": -262.5829772949219,
|
|
"loss": 1.1193,
|
|
"margin_dpo/margin_mean": 53.25571823120117,
|
|
"margin_dpo/margin_std": 80.22282409667969,
|
|
"step": 382
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -126.58073425292969,
|
|
"KL/mean": -151.06576538085938,
|
|
"KL/rejected_KL_mean": -175.55081176757812,
|
|
"KL/std": 82.11665344238281,
|
|
"epoch": 0.5789871504157218,
|
|
"fcm_dpo/beta": 0.007188569754362106,
|
|
"fcm_dpo/delta": 0.048761457204818726,
|
|
"fcm_dpo/margin": 48.9700813293457,
|
|
"fcm_dpo/q_t": 0.4225116968154907,
|
|
"grad_norm": 14.41041374206543,
|
|
"learning_rate": 2.2623598917395436e-07,
|
|
"logits/chosen": 0.6197212338447571,
|
|
"logits/rejected": 0.6536753177642822,
|
|
"logps/chosen": -207.1439971923828,
|
|
"logps/ref_chosen": -80.56326293945312,
|
|
"logps/ref_rejected": -74.62922668457031,
|
|
"logps/rejected": -250.1800537109375,
|
|
"loss": 1.1915,
|
|
"margin_dpo/margin_mean": 48.97008514404297,
|
|
"margin_dpo/margin_std": 102.07487487792969,
|
|
"step": 383
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -119.900390625,
|
|
"KL/mean": -147.88455200195312,
|
|
"KL/rejected_KL_mean": -175.86871337890625,
|
|
"KL/std": 80.97911071777344,
|
|
"epoch": 0.5804988662131519,
|
|
"fcm_dpo/beta": 0.007212701253592968,
|
|
"fcm_dpo/delta": -0.0037618507631123066,
|
|
"fcm_dpo/margin": 55.96833801269531,
|
|
"fcm_dpo/q_t": 0.4062194526195526,
|
|
"grad_norm": 15.638869285583496,
|
|
"learning_rate": 2.2492009565579875e-07,
|
|
"logits/chosen": 0.7447338104248047,
|
|
"logits/rejected": 0.7039715647697449,
|
|
"logps/chosen": -185.37554931640625,
|
|
"logps/ref_chosen": -65.47514343261719,
|
|
"logps/ref_rejected": -79.67378234863281,
|
|
"logps/rejected": -255.54251098632812,
|
|
"loss": 1.1084,
|
|
"margin_dpo/margin_mean": 55.96833419799805,
|
|
"margin_dpo/margin_std": 84.08903503417969,
|
|
"step": 384
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -117.82969665527344,
|
|
"KL/mean": -153.86634826660156,
|
|
"KL/rejected_KL_mean": -189.9029998779297,
|
|
"KL/std": 82.87105560302734,
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.007158665452152491,
|
|
"fcm_dpo/delta": -0.11893562972545624,
|
|
"fcm_dpo/margin": 72.07331085205078,
|
|
"fcm_dpo/q_t": 0.38393640518188477,
|
|
"grad_norm": 12.668081283569336,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 0.6462754011154175,
|
|
"logits/rejected": 0.6033315658569336,
|
|
"logps/chosen": -183.88621520996094,
|
|
"logps/ref_chosen": -66.0565185546875,
|
|
"logps/ref_rejected": -86.68023681640625,
|
|
"logps/rejected": -276.583251953125,
|
|
"loss": 1.0246,
|
|
"margin_dpo/margin_mean": 72.07331085205078,
|
|
"margin_dpo/margin_std": 86.06195068359375,
|
|
"step": 385
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.56265258789062,
|
|
"KL/mean": -161.7833709716797,
|
|
"KL/rejected_KL_mean": -185.00405883789062,
|
|
"KL/std": 87.20138549804688,
|
|
"epoch": 0.5835222978080121,
|
|
"fcm_dpo/beta": 0.007153231650590897,
|
|
"fcm_dpo/delta": 0.06896546483039856,
|
|
"fcm_dpo/margin": 46.44140625,
|
|
"fcm_dpo/q_t": 0.42301318049430847,
|
|
"grad_norm": 12.726027488708496,
|
|
"learning_rate": 2.2229045002474724e-07,
|
|
"logits/chosen": 0.6074446439743042,
|
|
"logits/rejected": 0.5527122020721436,
|
|
"logps/chosen": -214.1863250732422,
|
|
"logps/ref_chosen": -75.6236572265625,
|
|
"logps/ref_rejected": -92.62330627441406,
|
|
"logps/rejected": -277.62738037109375,
|
|
"loss": 1.1726,
|
|
"margin_dpo/margin_mean": 46.44140625,
|
|
"margin_dpo/margin_std": 86.5411376953125,
|
|
"step": 386
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -120.71371459960938,
|
|
"KL/mean": -154.63528442382812,
|
|
"KL/rejected_KL_mean": -188.55685424804688,
|
|
"KL/std": 79.68009185791016,
|
|
"epoch": 0.5850340136054422,
|
|
"fcm_dpo/beta": 0.007125685922801495,
|
|
"fcm_dpo/delta": -0.08552815020084381,
|
|
"fcm_dpo/margin": 67.84313201904297,
|
|
"fcm_dpo/q_t": 0.39083755016326904,
|
|
"grad_norm": 14.345316886901855,
|
|
"learning_rate": 2.209767714686924e-07,
|
|
"logits/chosen": 0.6987097263336182,
|
|
"logits/rejected": 0.5895580053329468,
|
|
"logps/chosen": -167.9354248046875,
|
|
"logps/ref_chosen": -47.22170639038086,
|
|
"logps/ref_rejected": -87.338134765625,
|
|
"logps/rejected": -275.8949890136719,
|
|
"loss": 1.0392,
|
|
"margin_dpo/margin_mean": 67.84313201904297,
|
|
"margin_dpo/margin_std": 82.55656433105469,
|
|
"step": 387
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -123.98579406738281,
|
|
"KL/mean": -145.66050720214844,
|
|
"KL/rejected_KL_mean": -167.33523559570312,
|
|
"KL/std": 85.07298278808594,
|
|
"epoch": 0.5865457294028723,
|
|
"fcm_dpo/beta": 0.00710913585498929,
|
|
"fcm_dpo/delta": -0.004904305562376976,
|
|
"fcm_dpo/margin": 43.34943771362305,
|
|
"fcm_dpo/q_t": 0.4326748251914978,
|
|
"grad_norm": 13.500115394592285,
|
|
"learning_rate": 2.1966390475472954e-07,
|
|
"logits/chosen": 0.6811926364898682,
|
|
"logits/rejected": 0.6773085594177246,
|
|
"logps/chosen": -198.56527709960938,
|
|
"logps/ref_chosen": -74.5794677734375,
|
|
"logps/ref_rejected": -79.92558288574219,
|
|
"logps/rejected": -247.2608184814453,
|
|
"loss": 1.2213,
|
|
"margin_dpo/margin_mean": 43.34943389892578,
|
|
"margin_dpo/margin_std": 100.55591583251953,
|
|
"step": 388
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.2269287109375,
|
|
"KL/mean": -151.8536376953125,
|
|
"KL/rejected_KL_mean": -185.4803466796875,
|
|
"KL/std": 81.9114761352539,
|
|
"epoch": 0.5880574452003023,
|
|
"fcm_dpo/beta": 0.007074140477925539,
|
|
"fcm_dpo/delta": -0.07756941765546799,
|
|
"fcm_dpo/margin": 67.25341033935547,
|
|
"fcm_dpo/q_t": 0.3934253454208374,
|
|
"grad_norm": 24.65688133239746,
|
|
"learning_rate": 2.1835188660656265e-07,
|
|
"logits/chosen": 0.6841369271278381,
|
|
"logits/rejected": 0.6484010219573975,
|
|
"logps/chosen": -179.85128784179688,
|
|
"logps/ref_chosen": -61.624366760253906,
|
|
"logps/ref_rejected": -76.50978088378906,
|
|
"logps/rejected": -261.9901123046875,
|
|
"loss": 1.0612,
|
|
"margin_dpo/margin_mean": 67.25341796875,
|
|
"margin_dpo/margin_std": 90.9244384765625,
|
|
"step": 389
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -105.85232543945312,
|
|
"KL/mean": -132.65867614746094,
|
|
"KL/rejected_KL_mean": -159.46502685546875,
|
|
"KL/std": 78.86771392822266,
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.007057240232825279,
|
|
"fcm_dpo/delta": 0.022026551887392998,
|
|
"fcm_dpo/margin": 53.612693786621094,
|
|
"fcm_dpo/q_t": 0.413882851600647,
|
|
"grad_norm": 11.843779563903809,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 0.7847983837127686,
|
|
"logits/rejected": 0.710970401763916,
|
|
"logps/chosen": -151.72418212890625,
|
|
"logps/ref_chosen": -45.871864318847656,
|
|
"logps/ref_rejected": -61.305999755859375,
|
|
"logps/rejected": -220.77102661132812,
|
|
"loss": 1.1247,
|
|
"margin_dpo/margin_mean": 53.61269760131836,
|
|
"margin_dpo/margin_std": 83.22119140625,
|
|
"step": 390
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.16459655761719,
|
|
"KL/mean": -150.44976806640625,
|
|
"KL/rejected_KL_mean": -182.73495483398438,
|
|
"KL/std": 80.91856384277344,
|
|
"epoch": 0.5910808767951625,
|
|
"fcm_dpo/beta": 0.007031668908894062,
|
|
"fcm_dpo/delta": -0.05542689561843872,
|
|
"fcm_dpo/margin": 64.57035827636719,
|
|
"fcm_dpo/q_t": 0.39774399995803833,
|
|
"grad_norm": 11.76223087310791,
|
|
"learning_rate": 2.1573054278272636e-07,
|
|
"logits/chosen": 0.7060242891311646,
|
|
"logits/rejected": 0.6382230520248413,
|
|
"logps/chosen": -176.35159301757812,
|
|
"logps/ref_chosen": -58.18701171875,
|
|
"logps/ref_rejected": -83.63442993164062,
|
|
"logps/rejected": -266.369384765625,
|
|
"loss": 1.0927,
|
|
"margin_dpo/margin_mean": 64.57035827636719,
|
|
"margin_dpo/margin_std": 97.62187194824219,
|
|
"step": 391
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -107.81562042236328,
|
|
"KL/mean": -141.46731567382812,
|
|
"KL/rejected_KL_mean": -175.1190185546875,
|
|
"KL/std": 87.15658569335938,
|
|
"epoch": 0.5925925925925926,
|
|
"fcm_dpo/beta": 0.0070152729749679565,
|
|
"fcm_dpo/delta": -0.07409797608852386,
|
|
"fcm_dpo/margin": 67.30340576171875,
|
|
"fcm_dpo/q_t": 0.39608487486839294,
|
|
"grad_norm": 12.50259780883789,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 0.7775279879570007,
|
|
"logits/rejected": 0.7178206443786621,
|
|
"logps/chosen": -177.56015014648438,
|
|
"logps/ref_chosen": -69.7445297241211,
|
|
"logps/ref_rejected": -94.05877685546875,
|
|
"logps/rejected": -269.17779541015625,
|
|
"loss": 1.0882,
|
|
"margin_dpo/margin_mean": 67.30340576171875,
|
|
"margin_dpo/margin_std": 102.33578491210938,
|
|
"step": 392
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -121.32159423828125,
|
|
"KL/mean": -156.99478149414062,
|
|
"KL/rejected_KL_mean": -192.66796875,
|
|
"KL/std": 85.10556030273438,
|
|
"epoch": 0.5941043083900227,
|
|
"fcm_dpo/beta": 0.006909149698913097,
|
|
"fcm_dpo/delta": -0.0958985909819603,
|
|
"fcm_dpo/margin": 71.34637451171875,
|
|
"fcm_dpo/q_t": 0.3879711627960205,
|
|
"grad_norm": 11.010285377502441,
|
|
"learning_rate": 2.131130332936195e-07,
|
|
"logits/chosen": 0.739720344543457,
|
|
"logits/rejected": 0.6996071934700012,
|
|
"logps/chosen": -173.656494140625,
|
|
"logps/ref_chosen": -52.33489990234375,
|
|
"logps/ref_rejected": -74.33809661865234,
|
|
"logps/rejected": -267.00604248046875,
|
|
"loss": 1.0358,
|
|
"margin_dpo/margin_mean": 71.34637451171875,
|
|
"margin_dpo/margin_std": 85.20559692382812,
|
|
"step": 393
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -116.47808837890625,
|
|
"KL/mean": -147.32754516601562,
|
|
"KL/rejected_KL_mean": -178.177001953125,
|
|
"KL/std": 78.3963623046875,
|
|
"epoch": 0.5956160241874527,
|
|
"fcm_dpo/beta": 0.006909521296620369,
|
|
"fcm_dpo/delta": -0.026946131139993668,
|
|
"fcm_dpo/margin": 61.69890594482422,
|
|
"fcm_dpo/q_t": 0.40109002590179443,
|
|
"grad_norm": 12.337517738342285,
|
|
"learning_rate": 2.1180580796331323e-07,
|
|
"logits/chosen": 0.7602550983428955,
|
|
"logits/rejected": 0.7296338081359863,
|
|
"logps/chosen": -177.15423583984375,
|
|
"logps/ref_chosen": -60.6761360168457,
|
|
"logps/ref_rejected": -71.36074829101562,
|
|
"logps/rejected": -249.53775024414062,
|
|
"loss": 1.0763,
|
|
"margin_dpo/margin_mean": 61.69890594482422,
|
|
"margin_dpo/margin_std": 78.14915466308594,
|
|
"step": 394
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -120.11184692382812,
|
|
"KL/mean": -148.55426025390625,
|
|
"KL/rejected_KL_mean": -176.99667358398438,
|
|
"KL/std": 81.75111389160156,
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.006908024661242962,
|
|
"fcm_dpo/delta": 0.00706930086016655,
|
|
"fcm_dpo/margin": 56.88482666015625,
|
|
"fcm_dpo/q_t": 0.41212499141693115,
|
|
"grad_norm": 13.815701484680176,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 0.7410081624984741,
|
|
"logits/rejected": 0.6432370543479919,
|
|
"logps/chosen": -170.71617126464844,
|
|
"logps/ref_chosen": -50.60432434082031,
|
|
"logps/ref_rejected": -77.08731079101562,
|
|
"logps/rejected": -254.083984375,
|
|
"loss": 1.1141,
|
|
"margin_dpo/margin_mean": 56.88482666015625,
|
|
"margin_dpo/margin_std": 86.48599243164062,
|
|
"step": 395
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -115.30360412597656,
|
|
"KL/mean": -145.23068237304688,
|
|
"KL/rejected_KL_mean": -175.1577606201172,
|
|
"KL/std": 90.39159393310547,
|
|
"epoch": 0.5986394557823129,
|
|
"fcm_dpo/beta": 0.006861069705337286,
|
|
"fcm_dpo/delta": -0.011781930923461914,
|
|
"fcm_dpo/margin": 59.854164123535156,
|
|
"fcm_dpo/q_t": 0.406708300113678,
|
|
"grad_norm": 11.736763954162598,
|
|
"learning_rate": 2.0919459895968517e-07,
|
|
"logits/chosen": 0.7553054094314575,
|
|
"logits/rejected": 0.6594061255455017,
|
|
"logps/chosen": -166.6632080078125,
|
|
"logps/ref_chosen": -51.35961151123047,
|
|
"logps/ref_rejected": -79.89360046386719,
|
|
"logps/rejected": -255.05136108398438,
|
|
"loss": 1.0922,
|
|
"margin_dpo/margin_mean": 59.854164123535156,
|
|
"margin_dpo/margin_std": 78.47380828857422,
|
|
"step": 396
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -132.54153442382812,
|
|
"KL/mean": -149.40411376953125,
|
|
"KL/rejected_KL_mean": -166.2666778564453,
|
|
"KL/std": 82.85358428955078,
|
|
"epoch": 0.600151171579743,
|
|
"fcm_dpo/beta": 0.006979919038712978,
|
|
"fcm_dpo/delta": 0.16647832095623016,
|
|
"fcm_dpo/margin": 33.72515106201172,
|
|
"fcm_dpo/q_t": 0.44676756858825684,
|
|
"grad_norm": 12.708525657653809,
|
|
"learning_rate": 2.078906883274924e-07,
|
|
"logits/chosen": 0.6481236219406128,
|
|
"logits/rejected": 0.597358226776123,
|
|
"logps/chosen": -198.99777221679688,
|
|
"logps/ref_chosen": -66.45622253417969,
|
|
"logps/ref_rejected": -85.74736785888672,
|
|
"logps/rejected": -252.01405334472656,
|
|
"loss": 1.2722,
|
|
"margin_dpo/margin_mean": 33.72515106201172,
|
|
"margin_dpo/margin_std": 95.81825256347656,
|
|
"step": 397
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -112.0426025390625,
|
|
"KL/mean": -150.51390075683594,
|
|
"KL/rejected_KL_mean": -188.98519897460938,
|
|
"KL/std": 86.69169616699219,
|
|
"epoch": 0.6016628873771731,
|
|
"fcm_dpo/beta": 0.006919104605913162,
|
|
"fcm_dpo/delta": -0.13651816546916962,
|
|
"fcm_dpo/margin": 76.94258117675781,
|
|
"fcm_dpo/q_t": 0.3824811577796936,
|
|
"grad_norm": 10.686772346496582,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": 0.7003393173217773,
|
|
"logits/rejected": 0.6357911229133606,
|
|
"logps/chosen": -161.28684997558594,
|
|
"logps/ref_chosen": -49.244239807128906,
|
|
"logps/ref_rejected": -75.18949127197266,
|
|
"logps/rejected": -264.1746826171875,
|
|
"loss": 1.013,
|
|
"margin_dpo/margin_mean": 76.94258117675781,
|
|
"margin_dpo/margin_std": 92.05025482177734,
|
|
"step": 398
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -132.21946716308594,
|
|
"KL/mean": -175.92172241210938,
|
|
"KL/rejected_KL_mean": -219.62399291992188,
|
|
"KL/std": 96.214599609375,
|
|
"epoch": 0.6031746031746031,
|
|
"fcm_dpo/beta": 0.006793228909373283,
|
|
"fcm_dpo/delta": -0.2003403753042221,
|
|
"fcm_dpo/margin": 87.40452575683594,
|
|
"fcm_dpo/q_t": 0.37209832668304443,
|
|
"grad_norm": 12.391461372375488,
|
|
"learning_rate": 2.052864371672457e-07,
|
|
"logits/chosen": 0.6501820087432861,
|
|
"logits/rejected": 0.5070763826370239,
|
|
"logps/chosen": -200.5262451171875,
|
|
"logps/ref_chosen": -68.30679321289062,
|
|
"logps/ref_rejected": -113.2708511352539,
|
|
"logps/rejected": -332.89483642578125,
|
|
"loss": 0.9922,
|
|
"margin_dpo/margin_mean": 87.40452575683594,
|
|
"margin_dpo/margin_std": 108.04206085205078,
|
|
"step": 399
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.68338012695312,
|
|
"KL/mean": -171.9394073486328,
|
|
"KL/rejected_KL_mean": -197.19541931152344,
|
|
"KL/std": 87.3747329711914,
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.006736475508660078,
|
|
"fcm_dpo/delta": -0.05543239042162895,
|
|
"fcm_dpo/margin": 50.512027740478516,
|
|
"fcm_dpo/q_t": 0.4210391640663147,
|
|
"grad_norm": 16.501981735229492,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 0.6782976388931274,
|
|
"logits/rejected": 0.6201345920562744,
|
|
"logps/chosen": -218.30987548828125,
|
|
"logps/ref_chosen": -71.62649536132812,
|
|
"logps/ref_rejected": -90.98765563964844,
|
|
"logps/rejected": -288.1830749511719,
|
|
"loss": 1.1563,
|
|
"margin_dpo/margin_mean": 50.512027740478516,
|
|
"margin_dpo/margin_std": 83.42546844482422,
|
|
"step": 400
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -111.67019653320312,
|
|
"KL/mean": -147.51351928710938,
|
|
"KL/rejected_KL_mean": -183.35682678222656,
|
|
"KL/std": 94.3001708984375,
|
|
"epoch": 0.6061980347694633,
|
|
"fcm_dpo/beta": 0.006682539824396372,
|
|
"fcm_dpo/delta": -0.0813048779964447,
|
|
"fcm_dpo/margin": 71.68663024902344,
|
|
"fcm_dpo/q_t": 0.3925744295120239,
|
|
"grad_norm": 9.24927043914795,
|
|
"learning_rate": 2.0268718890989752e-07,
|
|
"logits/chosen": 0.7080041170120239,
|
|
"logits/rejected": 0.6210964918136597,
|
|
"logps/chosen": -165.39515686035156,
|
|
"logps/ref_chosen": -53.72495651245117,
|
|
"logps/ref_rejected": -75.06304931640625,
|
|
"logps/rejected": -258.41986083984375,
|
|
"loss": 1.0478,
|
|
"margin_dpo/margin_mean": 71.68663024902344,
|
|
"margin_dpo/margin_std": 90.2989501953125,
|
|
"step": 401
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -121.87428283691406,
|
|
"KL/mean": -149.92950439453125,
|
|
"KL/rejected_KL_mean": -177.98472595214844,
|
|
"KL/std": 80.09333801269531,
|
|
"epoch": 0.6077097505668935,
|
|
"fcm_dpo/beta": 0.006674374919384718,
|
|
"fcm_dpo/delta": 0.02568529173731804,
|
|
"fcm_dpo/margin": 56.110450744628906,
|
|
"fcm_dpo/q_t": 0.41535648703575134,
|
|
"grad_norm": 14.178557395935059,
|
|
"learning_rate": 2.013895317751323e-07,
|
|
"logits/chosen": 0.6826661825180054,
|
|
"logits/rejected": 0.6661210656166077,
|
|
"logps/chosen": -183.74819946289062,
|
|
"logps/ref_chosen": -61.873931884765625,
|
|
"logps/ref_rejected": -66.15198516845703,
|
|
"logps/rejected": -244.13671875,
|
|
"loss": 1.1469,
|
|
"margin_dpo/margin_mean": 56.11044692993164,
|
|
"margin_dpo/margin_std": 95.95289611816406,
|
|
"step": 402
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -129.19552612304688,
|
|
"KL/mean": -164.98533630371094,
|
|
"KL/rejected_KL_mean": -200.775146484375,
|
|
"KL/std": 92.66357421875,
|
|
"epoch": 0.6092214663643235,
|
|
"fcm_dpo/beta": 0.006653132848441601,
|
|
"fcm_dpo/delta": -0.07821422815322876,
|
|
"fcm_dpo/margin": 71.57965087890625,
|
|
"fcm_dpo/q_t": 0.39432263374328613,
|
|
"grad_norm": 10.321096420288086,
|
|
"learning_rate": 2.0009323437965898e-07,
|
|
"logits/chosen": 0.8121441602706909,
|
|
"logits/rejected": 0.7254369258880615,
|
|
"logps/chosen": -180.51702880859375,
|
|
"logps/ref_chosen": -51.321502685546875,
|
|
"logps/ref_rejected": -86.54010772705078,
|
|
"logps/rejected": -287.31524658203125,
|
|
"loss": 1.0629,
|
|
"margin_dpo/margin_mean": 71.57965087890625,
|
|
"margin_dpo/margin_std": 97.02379608154297,
|
|
"step": 403
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -121.81800842285156,
|
|
"KL/mean": -158.99508666992188,
|
|
"KL/rejected_KL_mean": -196.1721649169922,
|
|
"KL/std": 94.6757583618164,
|
|
"epoch": 0.6107331821617535,
|
|
"fcm_dpo/beta": 0.006578211672604084,
|
|
"fcm_dpo/delta": -0.09217271953821182,
|
|
"fcm_dpo/margin": 74.35415649414062,
|
|
"fcm_dpo/q_t": 0.3923729658126831,
|
|
"grad_norm": 14.204198837280273,
|
|
"learning_rate": 1.9879833298370237e-07,
|
|
"logits/chosen": 0.714877188205719,
|
|
"logits/rejected": 0.6183456182479858,
|
|
"logps/chosen": -184.08090209960938,
|
|
"logps/ref_chosen": -62.26288604736328,
|
|
"logps/ref_rejected": -95.19029998779297,
|
|
"logps/rejected": -291.3624572753906,
|
|
"loss": 1.0655,
|
|
"margin_dpo/margin_mean": 74.3541488647461,
|
|
"margin_dpo/margin_std": 103.49223327636719,
|
|
"step": 404
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -125.59844207763672,
|
|
"KL/mean": -154.3402557373047,
|
|
"KL/rejected_KL_mean": -183.0820770263672,
|
|
"KL/std": 84.47518920898438,
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.00657174177467823,
|
|
"fcm_dpo/delta": 0.022154785692691803,
|
|
"fcm_dpo/margin": 57.48363494873047,
|
|
"fcm_dpo/q_t": 0.4147895276546478,
|
|
"grad_norm": 11.96500301361084,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 0.7631029486656189,
|
|
"logits/rejected": 0.7178771495819092,
|
|
"logps/chosen": -176.1827850341797,
|
|
"logps/ref_chosen": -50.5843391418457,
|
|
"logps/ref_rejected": -65.43156433105469,
|
|
"logps/rejected": -248.51364135742188,
|
|
"loss": 1.1224,
|
|
"margin_dpo/margin_mean": 57.48363494873047,
|
|
"margin_dpo/margin_std": 84.4254150390625,
|
|
"step": 405
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.9271469116211,
|
|
"KL/mean": -157.67666625976562,
|
|
"KL/rejected_KL_mean": -196.42616271972656,
|
|
"KL/std": 91.14208984375,
|
|
"epoch": 0.6137566137566137,
|
|
"fcm_dpo/beta": 0.006554439663887024,
|
|
"fcm_dpo/delta": -0.1107010692358017,
|
|
"fcm_dpo/margin": 77.4990234375,
|
|
"fcm_dpo/q_t": 0.38545718789100647,
|
|
"grad_norm": 12.365456581115723,
|
|
"learning_rate": 1.9621286303497914e-07,
|
|
"logits/chosen": 0.7842617034912109,
|
|
"logits/rejected": 0.6225013732910156,
|
|
"logps/chosen": -167.92276000976562,
|
|
"logps/ref_chosen": -48.99560546875,
|
|
"logps/ref_rejected": -92.47774505615234,
|
|
"logps/rejected": -288.9039001464844,
|
|
"loss": 1.0444,
|
|
"margin_dpo/margin_mean": 77.4990234375,
|
|
"margin_dpo/margin_std": 101.26646423339844,
|
|
"step": 406
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.33389282226562,
|
|
"KL/mean": -183.8157501220703,
|
|
"KL/rejected_KL_mean": -214.297607421875,
|
|
"KL/std": 104.63140869140625,
|
|
"epoch": 0.6152683295540439,
|
|
"fcm_dpo/beta": 0.006535977590829134,
|
|
"fcm_dpo/delta": 0.0015209503471851349,
|
|
"fcm_dpo/margin": 60.963714599609375,
|
|
"fcm_dpo/q_t": 0.41023004055023193,
|
|
"grad_norm": 13.808930397033691,
|
|
"learning_rate": 1.9492236680336483e-07,
|
|
"logits/chosen": 0.6006584167480469,
|
|
"logits/rejected": 0.539162278175354,
|
|
"logps/chosen": -242.7344512939453,
|
|
"logps/ref_chosen": -89.40056610107422,
|
|
"logps/ref_rejected": -99.28775024414062,
|
|
"logps/rejected": -313.5853576660156,
|
|
"loss": 1.1362,
|
|
"margin_dpo/margin_mean": 60.963714599609375,
|
|
"margin_dpo/margin_std": 104.1991195678711,
|
|
"step": 407
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -113.87435913085938,
|
|
"KL/mean": -156.99960327148438,
|
|
"KL/rejected_KL_mean": -200.12484741210938,
|
|
"KL/std": 85.77928161621094,
|
|
"epoch": 0.6167800453514739,
|
|
"fcm_dpo/beta": 0.006468884646892548,
|
|
"fcm_dpo/delta": -0.16234487295150757,
|
|
"fcm_dpo/margin": 86.25048828125,
|
|
"fcm_dpo/q_t": 0.3727743327617645,
|
|
"grad_norm": 10.917062759399414,
|
|
"learning_rate": 1.9363341121154895e-07,
|
|
"logits/chosen": 0.7527220249176025,
|
|
"logits/rejected": 0.6760420799255371,
|
|
"logps/chosen": -168.57827758789062,
|
|
"logps/ref_chosen": -54.70391845703125,
|
|
"logps/ref_rejected": -73.98648834228516,
|
|
"logps/rejected": -274.111328125,
|
|
"loss": 0.9854,
|
|
"margin_dpo/margin_mean": 86.25048065185547,
|
|
"margin_dpo/margin_std": 89.98963928222656,
|
|
"step": 408
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -141.41400146484375,
|
|
"KL/mean": -161.61331176757812,
|
|
"KL/rejected_KL_mean": -181.8126220703125,
|
|
"KL/std": 74.4041748046875,
|
|
"epoch": 0.618291761148904,
|
|
"fcm_dpo/beta": 0.006487199570983648,
|
|
"fcm_dpo/delta": 0.13983039557933807,
|
|
"fcm_dpo/margin": 40.39863967895508,
|
|
"fcm_dpo/q_t": 0.44003820419311523,
|
|
"grad_norm": 12.600934982299805,
|
|
"learning_rate": 1.9234603231438994e-07,
|
|
"logits/chosen": 0.7069829702377319,
|
|
"logits/rejected": 0.7162041664123535,
|
|
"logps/chosen": -203.5322265625,
|
|
"logps/ref_chosen": -62.11822509765625,
|
|
"logps/ref_rejected": -61.933509826660156,
|
|
"logps/rejected": -243.7461395263672,
|
|
"loss": 1.2127,
|
|
"margin_dpo/margin_mean": 40.39863967895508,
|
|
"margin_dpo/margin_std": 83.7572021484375,
|
|
"step": 409
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -132.57125854492188,
|
|
"KL/mean": -167.6019287109375,
|
|
"KL/rejected_KL_mean": -202.632568359375,
|
|
"KL/std": 84.42596435546875,
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.006462418474256992,
|
|
"fcm_dpo/delta": -0.054811958223581314,
|
|
"fcm_dpo/margin": 70.06130981445312,
|
|
"fcm_dpo/q_t": 0.3960353136062622,
|
|
"grad_norm": 10.96718978881836,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 0.7206809520721436,
|
|
"logits/rejected": 0.6975326538085938,
|
|
"logps/chosen": -194.37391662597656,
|
|
"logps/ref_chosen": -61.80266189575195,
|
|
"logps/ref_rejected": -76.60002136230469,
|
|
"logps/rejected": -279.23260498046875,
|
|
"loss": 1.0556,
|
|
"margin_dpo/margin_mean": 70.06130981445312,
|
|
"margin_dpo/margin_std": 83.01362609863281,
|
|
"step": 410
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.17918395996094,
|
|
"KL/mean": -172.2271728515625,
|
|
"KL/rejected_KL_mean": -206.27520751953125,
|
|
"KL/std": 91.54496765136719,
|
|
"epoch": 0.6213151927437641,
|
|
"fcm_dpo/beta": 0.006460669916123152,
|
|
"fcm_dpo/delta": -0.040836043655872345,
|
|
"fcm_dpo/margin": 68.09601593017578,
|
|
"fcm_dpo/q_t": 0.4005202054977417,
|
|
"grad_norm": 10.051225662231445,
|
|
"learning_rate": 1.8977614860195296e-07,
|
|
"logits/chosen": 0.7776986360549927,
|
|
"logits/rejected": 0.721227765083313,
|
|
"logps/chosen": -192.62457275390625,
|
|
"logps/ref_chosen": -54.44539260864258,
|
|
"logps/ref_rejected": -74.5650863647461,
|
|
"logps/rejected": -280.84027099609375,
|
|
"loss": 1.0824,
|
|
"margin_dpo/margin_mean": 68.09602355957031,
|
|
"margin_dpo/margin_std": 96.12615966796875,
|
|
"step": 411
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -140.81988525390625,
|
|
"KL/mean": -172.00961303710938,
|
|
"KL/rejected_KL_mean": -203.19931030273438,
|
|
"KL/std": 78.07281494140625,
|
|
"epoch": 0.6228269085411943,
|
|
"fcm_dpo/beta": 0.006440272089093924,
|
|
"fcm_dpo/delta": -0.0018970891833305359,
|
|
"fcm_dpo/margin": 62.379432678222656,
|
|
"fcm_dpo/q_t": 0.4074127674102783,
|
|
"grad_norm": 11.758828163146973,
|
|
"learning_rate": 1.8849371567184662e-07,
|
|
"logits/chosen": 0.7582228183746338,
|
|
"logits/rejected": 0.697446346282959,
|
|
"logps/chosen": -196.06796264648438,
|
|
"logps/ref_chosen": -55.248085021972656,
|
|
"logps/ref_rejected": -68.96623229980469,
|
|
"logps/rejected": -272.16552734375,
|
|
"loss": 1.0914,
|
|
"margin_dpo/margin_mean": 62.379432678222656,
|
|
"margin_dpo/margin_std": 82.22096252441406,
|
|
"step": 412
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.92532348632812,
|
|
"KL/mean": -187.0555877685547,
|
|
"KL/rejected_KL_mean": -216.18585205078125,
|
|
"KL/std": 91.01189422607422,
|
|
"epoch": 0.6243386243386243,
|
|
"fcm_dpo/beta": 0.006467553786933422,
|
|
"fcm_dpo/delta": 0.023597121238708496,
|
|
"fcm_dpo/margin": 58.260520935058594,
|
|
"fcm_dpo/q_t": 0.4164498448371887,
|
|
"grad_norm": 14.318431854248047,
|
|
"learning_rate": 1.872130032047302e-07,
|
|
"logits/chosen": 0.5566600561141968,
|
|
"logits/rejected": 0.5286005735397339,
|
|
"logps/chosen": -226.6460723876953,
|
|
"logps/ref_chosen": -68.72074890136719,
|
|
"logps/ref_rejected": -78.76539611816406,
|
|
"logps/rejected": -294.9512634277344,
|
|
"loss": 1.162,
|
|
"margin_dpo/margin_mean": 58.260520935058594,
|
|
"margin_dpo/margin_std": 109.60464477539062,
|
|
"step": 413
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -139.60523986816406,
|
|
"KL/mean": -174.1144256591797,
|
|
"KL/rejected_KL_mean": -208.62362670898438,
|
|
"KL/std": 98.31627655029297,
|
|
"epoch": 0.6258503401360545,
|
|
"fcm_dpo/beta": 0.006445493549108505,
|
|
"fcm_dpo/delta": -0.0458698645234108,
|
|
"fcm_dpo/margin": 69.01838684082031,
|
|
"fcm_dpo/q_t": 0.3977741599082947,
|
|
"grad_norm": 11.090164184570312,
|
|
"learning_rate": 1.8593404702488436e-07,
|
|
"logits/chosen": 0.7615224123001099,
|
|
"logits/rejected": 0.7027798891067505,
|
|
"logps/chosen": -193.7434539794922,
|
|
"logps/ref_chosen": -54.138214111328125,
|
|
"logps/ref_rejected": -74.65741729736328,
|
|
"logps/rejected": -283.2810363769531,
|
|
"loss": 1.0724,
|
|
"margin_dpo/margin_mean": 69.01838684082031,
|
|
"margin_dpo/margin_std": 90.85995483398438,
|
|
"step": 414
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -137.65797424316406,
|
|
"KL/mean": -166.07562255859375,
|
|
"KL/rejected_KL_mean": -194.49325561523438,
|
|
"KL/std": 90.88127899169922,
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.006447440013289452,
|
|
"fcm_dpo/delta": 0.03418339416384697,
|
|
"fcm_dpo/margin": 56.83525848388672,
|
|
"fcm_dpo/q_t": 0.41667789220809937,
|
|
"grad_norm": 11.855904579162598,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 0.7516089081764221,
|
|
"logits/rejected": 0.7356454133987427,
|
|
"logps/chosen": -193.5765380859375,
|
|
"logps/ref_chosen": -55.91856002807617,
|
|
"logps/ref_rejected": -61.747703552246094,
|
|
"logps/rejected": -256.240966796875,
|
|
"loss": 1.1419,
|
|
"margin_dpo/margin_mean": 56.83525848388672,
|
|
"margin_dpo/margin_std": 95.65105438232422,
|
|
"step": 415
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -145.9019775390625,
|
|
"KL/mean": -171.45094299316406,
|
|
"KL/rejected_KL_mean": -196.99990844726562,
|
|
"KL/std": 95.56219482421875,
|
|
"epoch": 0.6288737717309146,
|
|
"fcm_dpo/beta": 0.006426485255360603,
|
|
"fcm_dpo/delta": -0.10007449239492416,
|
|
"fcm_dpo/margin": 51.097923278808594,
|
|
"fcm_dpo/q_t": 0.4265289902687073,
|
|
"grad_norm": 12.941224098205566,
|
|
"learning_rate": 1.8338154657749128e-07,
|
|
"logits/chosen": 0.7113362550735474,
|
|
"logits/rejected": 0.665785551071167,
|
|
"logps/chosen": -200.62506103515625,
|
|
"logps/ref_chosen": -54.72308349609375,
|
|
"logps/ref_rejected": -69.17388916015625,
|
|
"logps/rejected": -266.1737976074219,
|
|
"loss": 1.1818,
|
|
"margin_dpo/margin_mean": 51.097923278808594,
|
|
"margin_dpo/margin_std": 91.49386596679688,
|
|
"step": 416
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.55128479003906,
|
|
"KL/mean": -183.77899169921875,
|
|
"KL/rejected_KL_mean": -216.0067138671875,
|
|
"KL/std": 89.0639877319336,
|
|
"epoch": 0.6303854875283447,
|
|
"fcm_dpo/beta": 0.006405606400221586,
|
|
"fcm_dpo/delta": -0.013418477028608322,
|
|
"fcm_dpo/margin": 64.45542907714844,
|
|
"fcm_dpo/q_t": 0.4045155644416809,
|
|
"grad_norm": 12.611370086669922,
|
|
"learning_rate": 1.8210807370886849e-07,
|
|
"logits/chosen": 0.7983624935150146,
|
|
"logits/rejected": 0.7391947507858276,
|
|
"logps/chosen": -208.34254455566406,
|
|
"logps/ref_chosen": -56.791259765625,
|
|
"logps/ref_rejected": -68.7791748046875,
|
|
"logps/rejected": -284.785888671875,
|
|
"loss": 1.1282,
|
|
"margin_dpo/margin_mean": 64.45542907714844,
|
|
"margin_dpo/margin_std": 108.26139068603516,
|
|
"step": 417
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.94886779785156,
|
|
"KL/mean": -188.7616424560547,
|
|
"KL/rejected_KL_mean": -215.57443237304688,
|
|
"KL/std": 100.53971862792969,
|
|
"epoch": 0.6318972033257747,
|
|
"fcm_dpo/beta": 0.006337217055261135,
|
|
"fcm_dpo/delta": -0.0762988030910492,
|
|
"fcm_dpo/margin": 53.62554931640625,
|
|
"fcm_dpo/q_t": 0.4238419234752655,
|
|
"grad_norm": 13.483736991882324,
|
|
"learning_rate": 1.8083649992336825e-07,
|
|
"logits/chosen": 0.761638343334198,
|
|
"logits/rejected": 0.7650953531265259,
|
|
"logps/chosen": -231.05685424804688,
|
|
"logps/ref_chosen": -69.10798645019531,
|
|
"logps/ref_rejected": -75.09132385253906,
|
|
"logps/rejected": -290.665771484375,
|
|
"loss": 1.1796,
|
|
"margin_dpo/margin_mean": 53.62554931640625,
|
|
"margin_dpo/margin_std": 101.3090591430664,
|
|
"step": 418
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -128.22523498535156,
|
|
"KL/mean": -167.106689453125,
|
|
"KL/rejected_KL_mean": -205.9881591796875,
|
|
"KL/std": 90.63583374023438,
|
|
"epoch": 0.6334089191232048,
|
|
"fcm_dpo/beta": 0.00629136199131608,
|
|
"fcm_dpo/delta": -0.09151894599199295,
|
|
"fcm_dpo/margin": 77.76293182373047,
|
|
"fcm_dpo/q_t": 0.391714870929718,
|
|
"grad_norm": 11.661109924316406,
|
|
"learning_rate": 1.7956686078964255e-07,
|
|
"logits/chosen": 0.678460955619812,
|
|
"logits/rejected": 0.6326619386672974,
|
|
"logps/chosen": -186.39700317382812,
|
|
"logps/ref_chosen": -58.1717643737793,
|
|
"logps/ref_rejected": -71.67066955566406,
|
|
"logps/rejected": -277.6588134765625,
|
|
"loss": 1.0509,
|
|
"margin_dpo/margin_mean": 77.76293182373047,
|
|
"margin_dpo/margin_std": 103.44277954101562,
|
|
"step": 419
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -160.03488159179688,
|
|
"KL/mean": -179.75146484375,
|
|
"KL/rejected_KL_mean": -199.46807861328125,
|
|
"KL/std": 94.88818359375,
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.006308958865702152,
|
|
"fcm_dpo/delta": 0.04682771489024162,
|
|
"fcm_dpo/margin": 39.43321228027344,
|
|
"fcm_dpo/q_t": 0.445299357175827,
|
|
"grad_norm": 12.114187240600586,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 0.7428210377693176,
|
|
"logits/rejected": 0.7005817294120789,
|
|
"logps/chosen": -217.08837890625,
|
|
"logps/ref_chosen": -57.05351257324219,
|
|
"logps/ref_rejected": -62.670982360839844,
|
|
"logps/rejected": -262.1390380859375,
|
|
"loss": 1.264,
|
|
"margin_dpo/margin_mean": 39.43321228027344,
|
|
"margin_dpo/margin_std": 108.58183288574219,
|
|
"step": 420
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.22166442871094,
|
|
"KL/mean": -176.59242248535156,
|
|
"KL/rejected_KL_mean": -205.96316528320312,
|
|
"KL/std": 93.75239562988281,
|
|
"epoch": 0.636432350718065,
|
|
"fcm_dpo/beta": 0.0063071902841329575,
|
|
"fcm_dpo/delta": 0.029881957918405533,
|
|
"fcm_dpo/margin": 58.74150466918945,
|
|
"fcm_dpo/q_t": 0.41815799474716187,
|
|
"grad_norm": 13.981078147888184,
|
|
"learning_rate": 1.7703352848054887e-07,
|
|
"logits/chosen": 0.6982603073120117,
|
|
"logits/rejected": 0.6414200067520142,
|
|
"logps/chosen": -204.544921875,
|
|
"logps/ref_chosen": -57.32324981689453,
|
|
"logps/ref_rejected": -75.33782958984375,
|
|
"logps/rejected": -281.3009948730469,
|
|
"loss": 1.1903,
|
|
"margin_dpo/margin_mean": 58.74150466918945,
|
|
"margin_dpo/margin_std": 122.801513671875,
|
|
"step": 421
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -129.5014190673828,
|
|
"KL/mean": -167.12002563476562,
|
|
"KL/rejected_KL_mean": -204.73861694335938,
|
|
"KL/std": 92.84248352050781,
|
|
"epoch": 0.6379440665154951,
|
|
"fcm_dpo/beta": 0.006302628666162491,
|
|
"fcm_dpo/delta": -0.07598047703504562,
|
|
"fcm_dpo/margin": 75.23722076416016,
|
|
"fcm_dpo/q_t": 0.39243677258491516,
|
|
"grad_norm": 13.011792182922363,
|
|
"learning_rate": 1.7576990616793137e-07,
|
|
"logits/chosen": 0.722919762134552,
|
|
"logits/rejected": 0.718876838684082,
|
|
"logps/chosen": -196.55899047851562,
|
|
"logps/ref_chosen": -67.05757141113281,
|
|
"logps/ref_rejected": -72.12803649902344,
|
|
"logps/rejected": -276.86663818359375,
|
|
"loss": 1.0479,
|
|
"margin_dpo/margin_mean": 75.23722839355469,
|
|
"margin_dpo/margin_std": 93.00556945800781,
|
|
"step": 422
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -131.59103393554688,
|
|
"KL/mean": -171.2489013671875,
|
|
"KL/rejected_KL_mean": -210.9067840576172,
|
|
"KL/std": 94.39566040039062,
|
|
"epoch": 0.6394557823129252,
|
|
"fcm_dpo/beta": 0.0062285978347063065,
|
|
"fcm_dpo/delta": -0.09654450416564941,
|
|
"fcm_dpo/margin": 79.31578063964844,
|
|
"fcm_dpo/q_t": 0.3895735442638397,
|
|
"grad_norm": 11.53082275390625,
|
|
"learning_rate": 1.745083602306071e-07,
|
|
"logits/chosen": 0.8044902086257935,
|
|
"logits/rejected": 0.7378097772598267,
|
|
"logps/chosen": -185.65269470214844,
|
|
"logps/ref_chosen": -54.06167221069336,
|
|
"logps/ref_rejected": -76.64092254638672,
|
|
"logps/rejected": -287.5477294921875,
|
|
"loss": 1.0413,
|
|
"margin_dpo/margin_mean": 79.31578063964844,
|
|
"margin_dpo/margin_std": 100.5966796875,
|
|
"step": 423
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -139.37454223632812,
|
|
"KL/mean": -178.97323608398438,
|
|
"KL/rejected_KL_mean": -218.57192993164062,
|
|
"KL/std": 88.43770599365234,
|
|
"epoch": 0.6409674981103552,
|
|
"fcm_dpo/beta": 0.006160058081150055,
|
|
"fcm_dpo/delta": -0.09064456075429916,
|
|
"fcm_dpo/margin": 79.19739532470703,
|
|
"fcm_dpo/q_t": 0.3903810977935791,
|
|
"grad_norm": 15.572758674621582,
|
|
"learning_rate": 1.7324892595672804e-07,
|
|
"logits/chosen": 0.642951250076294,
|
|
"logits/rejected": 0.6025166511535645,
|
|
"logps/chosen": -192.9833984375,
|
|
"logps/ref_chosen": -53.60887145996094,
|
|
"logps/ref_rejected": -79.2139892578125,
|
|
"logps/rejected": -297.78594970703125,
|
|
"loss": 1.046,
|
|
"margin_dpo/margin_mean": 79.19740295410156,
|
|
"margin_dpo/margin_std": 100.458251953125,
|
|
"step": 424
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -140.4604034423828,
|
|
"KL/mean": -169.88015747070312,
|
|
"KL/rejected_KL_mean": -199.29989624023438,
|
|
"KL/std": 86.0601806640625,
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.006180041469633579,
|
|
"fcm_dpo/delta": 0.037045918405056,
|
|
"fcm_dpo/margin": 58.83949661254883,
|
|
"fcm_dpo/q_t": 0.4177452325820923,
|
|
"grad_norm": 12.2982177734375,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 0.7776466012001038,
|
|
"logits/rejected": 0.7510822415351868,
|
|
"logps/chosen": -198.87509155273438,
|
|
"logps/ref_chosen": -58.41468048095703,
|
|
"logps/ref_rejected": -66.59054565429688,
|
|
"logps/rejected": -265.89044189453125,
|
|
"loss": 1.141,
|
|
"margin_dpo/margin_mean": 58.83949279785156,
|
|
"margin_dpo/margin_std": 97.37757110595703,
|
|
"step": 425
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -167.95077514648438,
|
|
"KL/mean": -186.11044311523438,
|
|
"KL/rejected_KL_mean": -204.27015686035156,
|
|
"KL/std": 92.40206909179688,
|
|
"epoch": 0.6439909297052154,
|
|
"fcm_dpo/beta": 0.006268607452511787,
|
|
"fcm_dpo/delta": 0.17428483068943024,
|
|
"fcm_dpo/margin": 36.31938171386719,
|
|
"fcm_dpo/q_t": 0.44831526279449463,
|
|
"grad_norm": 16.91359519958496,
|
|
"learning_rate": 1.7073653325558828e-07,
|
|
"logits/chosen": 0.7123624086380005,
|
|
"logits/rejected": 0.7156420946121216,
|
|
"logps/chosen": -239.65899658203125,
|
|
"logps/ref_chosen": -71.70822143554688,
|
|
"logps/ref_rejected": -73.57725524902344,
|
|
"logps/rejected": -277.847412109375,
|
|
"loss": 1.2915,
|
|
"margin_dpo/margin_mean": 36.319374084472656,
|
|
"margin_dpo/margin_std": 115.06658935546875,
|
|
"step": 426
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.71859741210938,
|
|
"KL/mean": -190.8339080810547,
|
|
"KL/rejected_KL_mean": -222.94923400878906,
|
|
"KL/std": 101.37161254882812,
|
|
"epoch": 0.6455026455026455,
|
|
"fcm_dpo/beta": 0.006289360579103231,
|
|
"fcm_dpo/delta": -0.004068998619914055,
|
|
"fcm_dpo/margin": 64.23062133789062,
|
|
"fcm_dpo/q_t": 0.4114408493041992,
|
|
"grad_norm": 13.480173110961914,
|
|
"learning_rate": 1.6948364510535218e-07,
|
|
"logits/chosen": 0.7616878747940063,
|
|
"logits/rejected": 0.6968189477920532,
|
|
"logps/chosen": -217.3613739013672,
|
|
"logps/ref_chosen": -58.64276885986328,
|
|
"logps/ref_rejected": -86.25437927246094,
|
|
"logps/rejected": -309.20361328125,
|
|
"loss": 1.1332,
|
|
"margin_dpo/margin_mean": 64.23062133789062,
|
|
"margin_dpo/margin_std": 111.18358612060547,
|
|
"step": 427
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.2641143798828,
|
|
"KL/mean": -186.94076538085938,
|
|
"KL/rejected_KL_mean": -220.61741638183594,
|
|
"KL/std": 103.556640625,
|
|
"epoch": 0.6470143613000756,
|
|
"fcm_dpo/beta": 0.006294772028923035,
|
|
"fcm_dpo/delta": -0.024632573127746582,
|
|
"fcm_dpo/margin": 67.3532943725586,
|
|
"fcm_dpo/q_t": 0.4058830142021179,
|
|
"grad_norm": 11.533279418945312,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 0.705619752407074,
|
|
"logits/rejected": 0.6661095023155212,
|
|
"logps/chosen": -219.86016845703125,
|
|
"logps/ref_chosen": -66.5960464477539,
|
|
"logps/ref_rejected": -82.3941650390625,
|
|
"logps/rejected": -303.0115661621094,
|
|
"loss": 1.1203,
|
|
"margin_dpo/margin_mean": 67.3532943725586,
|
|
"margin_dpo/margin_std": 110.29072570800781,
|
|
"step": 428
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -155.98072814941406,
|
|
"KL/mean": -182.2216796875,
|
|
"KL/rejected_KL_mean": -208.462646484375,
|
|
"KL/std": 86.56990814208984,
|
|
"epoch": 0.6485260770975056,
|
|
"fcm_dpo/beta": 0.006322845816612244,
|
|
"fcm_dpo/delta": 0.06899110972881317,
|
|
"fcm_dpo/margin": 52.481910705566406,
|
|
"fcm_dpo/q_t": 0.42302316427230835,
|
|
"grad_norm": 13.077984809875488,
|
|
"learning_rate": 1.669846604344412e-07,
|
|
"logits/chosen": 0.6891961097717285,
|
|
"logits/rejected": 0.7085661888122559,
|
|
"logps/chosen": -212.99041748046875,
|
|
"logps/ref_chosen": -57.00970458984375,
|
|
"logps/ref_rejected": -59.86549377441406,
|
|
"logps/rejected": -268.328125,
|
|
"loss": 1.1728,
|
|
"margin_dpo/margin_mean": 52.48191833496094,
|
|
"margin_dpo/margin_std": 96.186279296875,
|
|
"step": 429
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.53695678710938,
|
|
"KL/mean": -180.16635131835938,
|
|
"KL/rejected_KL_mean": -221.79574584960938,
|
|
"KL/std": 88.29185485839844,
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.006260054185986519,
|
|
"fcm_dpo/delta": -0.12449215352535248,
|
|
"fcm_dpo/margin": 83.25879669189453,
|
|
"fcm_dpo/q_t": 0.3800179064273834,
|
|
"grad_norm": 12.902923583984375,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 0.6401696801185608,
|
|
"logits/rejected": 0.6379525661468506,
|
|
"logps/chosen": -198.10015869140625,
|
|
"logps/ref_chosen": -59.563194274902344,
|
|
"logps/ref_rejected": -70.52289581298828,
|
|
"logps/rejected": -292.3186340332031,
|
|
"loss": 1.0256,
|
|
"margin_dpo/margin_mean": 83.2587890625,
|
|
"margin_dpo/margin_std": 100.79164123535156,
|
|
"step": 430
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -136.57052612304688,
|
|
"KL/mean": -167.95053100585938,
|
|
"KL/rejected_KL_mean": -199.33050537109375,
|
|
"KL/std": 90.65411376953125,
|
|
"epoch": 0.6515495086923658,
|
|
"fcm_dpo/beta": 0.006242701783776283,
|
|
"fcm_dpo/delta": 0.008363962173461914,
|
|
"fcm_dpo/margin": 62.759971618652344,
|
|
"fcm_dpo/q_t": 0.411226361989975,
|
|
"grad_norm": 12.752163887023926,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": 0.7298303842544556,
|
|
"logits/rejected": 0.6745837926864624,
|
|
"logps/chosen": -186.77085876464844,
|
|
"logps/ref_chosen": -50.20032501220703,
|
|
"logps/ref_rejected": -77.81680297851562,
|
|
"logps/rejected": -277.1473083496094,
|
|
"loss": 1.1228,
|
|
"margin_dpo/margin_mean": 62.759971618652344,
|
|
"margin_dpo/margin_std": 99.06048583984375,
|
|
"step": 431
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -142.88516235351562,
|
|
"KL/mean": -175.96714782714844,
|
|
"KL/rejected_KL_mean": -209.04913330078125,
|
|
"KL/std": 88.95008850097656,
|
|
"epoch": 0.6530612244897959,
|
|
"fcm_dpo/beta": 0.006238770205527544,
|
|
"fcm_dpo/delta": -0.013064262457191944,
|
|
"fcm_dpo/margin": 66.16395568847656,
|
|
"fcm_dpo/q_t": 0.4058328866958618,
|
|
"grad_norm": 13.137709617614746,
|
|
"learning_rate": 1.632536862810844e-07,
|
|
"logits/chosen": 0.773892879486084,
|
|
"logits/rejected": 0.720773458480835,
|
|
"logps/chosen": -204.54791259765625,
|
|
"logps/ref_chosen": -61.662757873535156,
|
|
"logps/ref_rejected": -83.94496154785156,
|
|
"logps/rejected": -292.99407958984375,
|
|
"loss": 1.1118,
|
|
"margin_dpo/margin_mean": 66.16395568847656,
|
|
"margin_dpo/margin_std": 102.72517395019531,
|
|
"step": 432
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -142.64552307128906,
|
|
"KL/mean": -180.88632202148438,
|
|
"KL/rejected_KL_mean": -219.1271209716797,
|
|
"KL/std": 85.44865417480469,
|
|
"epoch": 0.654572940287226,
|
|
"fcm_dpo/beta": 0.006212181411683559,
|
|
"fcm_dpo/delta": -0.0769159272313118,
|
|
"fcm_dpo/margin": 76.48158264160156,
|
|
"fcm_dpo/q_t": 0.3909345269203186,
|
|
"grad_norm": 12.641650199890137,
|
|
"learning_rate": 1.6201483487445515e-07,
|
|
"logits/chosen": 0.7978846430778503,
|
|
"logits/rejected": 0.7998515367507935,
|
|
"logps/chosen": -206.37469482421875,
|
|
"logps/ref_chosen": -63.72917938232422,
|
|
"logps/ref_rejected": -65.8391342163086,
|
|
"logps/rejected": -284.96624755859375,
|
|
"loss": 1.0538,
|
|
"margin_dpo/margin_mean": 76.48158264160156,
|
|
"margin_dpo/margin_std": 97.22679138183594,
|
|
"step": 433
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -121.046142578125,
|
|
"KL/mean": -162.82235717773438,
|
|
"KL/rejected_KL_mean": -204.59854125976562,
|
|
"KL/std": 93.16885375976562,
|
|
"epoch": 0.656084656084656,
|
|
"fcm_dpo/beta": 0.006117374636232853,
|
|
"fcm_dpo/delta": -0.11492545157670975,
|
|
"fcm_dpo/margin": 83.55239868164062,
|
|
"fcm_dpo/q_t": 0.38394761085510254,
|
|
"grad_norm": 11.63713550567627,
|
|
"learning_rate": 1.6077844460203204e-07,
|
|
"logits/chosen": 0.8505921363830566,
|
|
"logits/rejected": 0.7877355813980103,
|
|
"logps/chosen": -169.0194549560547,
|
|
"logps/ref_chosen": -47.97331619262695,
|
|
"logps/ref_rejected": -72.51132202148438,
|
|
"logps/rejected": -277.10986328125,
|
|
"loss": 1.052,
|
|
"margin_dpo/margin_mean": 83.55240631103516,
|
|
"margin_dpo/margin_std": 112.16519165039062,
|
|
"step": 434
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.3050537109375,
|
|
"KL/mean": -178.33323669433594,
|
|
"KL/rejected_KL_mean": -210.36143493652344,
|
|
"KL/std": 90.318115234375,
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.006136502139270306,
|
|
"fcm_dpo/delta": 0.006874606013298035,
|
|
"fcm_dpo/margin": 64.05638122558594,
|
|
"fcm_dpo/q_t": 0.41139477491378784,
|
|
"grad_norm": 14.192190170288086,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 0.8371284008026123,
|
|
"logits/rejected": 0.7992656230926514,
|
|
"logps/chosen": -203.36529541015625,
|
|
"logps/ref_chosen": -57.06024932861328,
|
|
"logps/ref_rejected": -71.69146728515625,
|
|
"logps/rejected": -282.05291748046875,
|
|
"loss": 1.1284,
|
|
"margin_dpo/margin_mean": 64.05638122558594,
|
|
"margin_dpo/margin_std": 103.9969253540039,
|
|
"step": 435
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.34535217285156,
|
|
"KL/mean": -173.91993713378906,
|
|
"KL/rejected_KL_mean": -200.49453735351562,
|
|
"KL/std": 88.72954559326172,
|
|
"epoch": 0.6591080876795162,
|
|
"fcm_dpo/beta": 0.006153300404548645,
|
|
"fcm_dpo/delta": 0.0742039680480957,
|
|
"fcm_dpo/margin": 53.149192810058594,
|
|
"fcm_dpo/q_t": 0.42647284269332886,
|
|
"grad_norm": 14.044795989990234,
|
|
"learning_rate": 1.5831318572796847e-07,
|
|
"logits/chosen": 0.7753983736038208,
|
|
"logits/rejected": 0.7198779582977295,
|
|
"logps/chosen": -203.50340270996094,
|
|
"logps/ref_chosen": -56.158050537109375,
|
|
"logps/ref_rejected": -67.63787841796875,
|
|
"logps/rejected": -268.1324157714844,
|
|
"loss": 1.1919,
|
|
"margin_dpo/margin_mean": 53.149192810058594,
|
|
"margin_dpo/margin_std": 109.05805969238281,
|
|
"step": 436
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.93136596679688,
|
|
"KL/mean": -185.1273651123047,
|
|
"KL/rejected_KL_mean": -217.3233642578125,
|
|
"KL/std": 97.64086151123047,
|
|
"epoch": 0.6606198034769464,
|
|
"fcm_dpo/beta": 0.00611593434587121,
|
|
"fcm_dpo/delta": -0.08910680562257767,
|
|
"fcm_dpo/margin": 64.3919677734375,
|
|
"fcm_dpo/q_t": 0.4129069447517395,
|
|
"grad_norm": 15.56796932220459,
|
|
"learning_rate": 1.5708438608491815e-07,
|
|
"logits/chosen": 0.7241424918174744,
|
|
"logits/rejected": 0.6046632528305054,
|
|
"logps/chosen": -209.9171600341797,
|
|
"logps/ref_chosen": -56.98578643798828,
|
|
"logps/ref_rejected": -85.61524963378906,
|
|
"logps/rejected": -302.9385986328125,
|
|
"loss": 1.17,
|
|
"margin_dpo/margin_mean": 64.3919677734375,
|
|
"margin_dpo/margin_std": 123.81708526611328,
|
|
"step": 437
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -132.17140197753906,
|
|
"KL/mean": -174.42945861816406,
|
|
"KL/rejected_KL_mean": -216.6875,
|
|
"KL/std": 100.91641235351562,
|
|
"epoch": 0.6621315192743764,
|
|
"fcm_dpo/beta": 0.00607282854616642,
|
|
"fcm_dpo/delta": -0.11615258455276489,
|
|
"fcm_dpo/margin": 84.51612091064453,
|
|
"fcm_dpo/q_t": 0.38812965154647827,
|
|
"grad_norm": 12.231389999389648,
|
|
"learning_rate": 1.558581854913253e-07,
|
|
"logits/chosen": 0.7996244430541992,
|
|
"logits/rejected": 0.7366063594818115,
|
|
"logps/chosen": -173.44918823242188,
|
|
"logps/ref_chosen": -41.27777862548828,
|
|
"logps/ref_rejected": -65.33840942382812,
|
|
"logps/rejected": -282.02593994140625,
|
|
"loss": 1.0367,
|
|
"margin_dpo/margin_mean": 84.51612854003906,
|
|
"margin_dpo/margin_std": 111.33171081542969,
|
|
"step": 438
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.48358154296875,
|
|
"KL/mean": -188.70355224609375,
|
|
"KL/rejected_KL_mean": -225.92356872558594,
|
|
"KL/std": 102.09961700439453,
|
|
"epoch": 0.6636432350718064,
|
|
"fcm_dpo/beta": 0.006045527756214142,
|
|
"fcm_dpo/delta": -0.05158894509077072,
|
|
"fcm_dpo/margin": 74.43999481201172,
|
|
"fcm_dpo/q_t": 0.39816814661026,
|
|
"grad_norm": 12.475773811340332,
|
|
"learning_rate": 1.5463461824665658e-07,
|
|
"logits/chosen": 0.6343196630477905,
|
|
"logits/rejected": 0.6036988496780396,
|
|
"logps/chosen": -232.90121459960938,
|
|
"logps/ref_chosen": -81.41764831542969,
|
|
"logps/ref_rejected": -94.72309875488281,
|
|
"logps/rejected": -320.64666748046875,
|
|
"loss": 1.0801,
|
|
"margin_dpo/margin_mean": 74.43999481201172,
|
|
"margin_dpo/margin_std": 102.896240234375,
|
|
"step": 439
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -130.29151916503906,
|
|
"KL/mean": -167.2786102294922,
|
|
"KL/rejected_KL_mean": -204.2656707763672,
|
|
"KL/std": 90.60092163085938,
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.0060059186071157455,
|
|
"fcm_dpo/delta": -0.04536076635122299,
|
|
"fcm_dpo/margin": 73.97416687011719,
|
|
"fcm_dpo/q_t": 0.4001381993293762,
|
|
"grad_norm": 23.53094482421875,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 0.7301117181777954,
|
|
"logits/rejected": 0.6352328062057495,
|
|
"logps/chosen": -172.8297119140625,
|
|
"logps/ref_chosen": -42.538185119628906,
|
|
"logps/ref_rejected": -69.78813934326172,
|
|
"logps/rejected": -274.0538330078125,
|
|
"loss": 1.0864,
|
|
"margin_dpo/margin_mean": 73.97416687011719,
|
|
"margin_dpo/margin_std": 105.63287353515625,
|
|
"step": 440
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -132.41702270507812,
|
|
"KL/mean": -172.68722534179688,
|
|
"KL/rejected_KL_mean": -212.95742797851562,
|
|
"KL/std": 98.60311126708984,
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.005939549300819635,
|
|
"fcm_dpo/delta": -0.08060160279273987,
|
|
"fcm_dpo/margin": 80.54043579101562,
|
|
"fcm_dpo/q_t": 0.3895171284675598,
|
|
"grad_norm": 16.08013343811035,
|
|
"learning_rate": 1.521955206326976e-07,
|
|
"logits/chosen": 0.6959245204925537,
|
|
"logits/rejected": 0.6078984141349792,
|
|
"logps/chosen": -190.01023864746094,
|
|
"logps/ref_chosen": -57.593223571777344,
|
|
"logps/ref_rejected": -84.82878875732422,
|
|
"logps/rejected": -297.7862243652344,
|
|
"loss": 1.0311,
|
|
"margin_dpo/margin_mean": 80.54043579101562,
|
|
"margin_dpo/margin_std": 88.84800720214844,
|
|
"step": 441
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.59190368652344,
|
|
"KL/mean": -200.01634216308594,
|
|
"KL/rejected_KL_mean": -241.4407958984375,
|
|
"KL/std": 98.7254638671875,
|
|
"epoch": 0.6681783824640968,
|
|
"fcm_dpo/beta": 0.0058901673182845116,
|
|
"fcm_dpo/delta": -0.09041900932788849,
|
|
"fcm_dpo/margin": 82.848876953125,
|
|
"fcm_dpo/q_t": 0.38918882608413696,
|
|
"grad_norm": 13.906518936157227,
|
|
"learning_rate": 1.5098005849021078e-07,
|
|
"logits/chosen": 0.7144099473953247,
|
|
"logits/rejected": 0.6716795563697815,
|
|
"logps/chosen": -226.05313110351562,
|
|
"logps/ref_chosen": -67.46121978759766,
|
|
"logps/ref_rejected": -89.0693588256836,
|
|
"logps/rejected": -330.5101318359375,
|
|
"loss": 1.0402,
|
|
"margin_dpo/margin_mean": 82.848876953125,
|
|
"margin_dpo/margin_std": 101.45462036132812,
|
|
"step": 442
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -131.2276611328125,
|
|
"KL/mean": -181.3895263671875,
|
|
"KL/rejected_KL_mean": -231.55137634277344,
|
|
"KL/std": 104.52983093261719,
|
|
"epoch": 0.6696900982615268,
|
|
"fcm_dpo/beta": 0.005800019949674606,
|
|
"fcm_dpo/delta": -0.18746662139892578,
|
|
"fcm_dpo/margin": 100.3237075805664,
|
|
"fcm_dpo/q_t": 0.3702532649040222,
|
|
"grad_norm": 12.695876121520996,
|
|
"learning_rate": 1.4976736614834662e-07,
|
|
"logits/chosen": 0.7216641902923584,
|
|
"logits/rejected": 0.6618741154670715,
|
|
"logps/chosen": -186.02377319335938,
|
|
"logps/ref_chosen": -54.79610061645508,
|
|
"logps/ref_rejected": -77.80781555175781,
|
|
"logps/rejected": -309.35919189453125,
|
|
"loss": 0.9857,
|
|
"margin_dpo/margin_mean": 100.32371520996094,
|
|
"margin_dpo/margin_std": 116.24922180175781,
|
|
"step": 443
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -162.1514434814453,
|
|
"KL/mean": -184.8280029296875,
|
|
"KL/rejected_KL_mean": -207.50457763671875,
|
|
"KL/std": 103.80731201171875,
|
|
"epoch": 0.671201814058957,
|
|
"fcm_dpo/beta": 0.005789580289274454,
|
|
"fcm_dpo/delta": 0.023211942985653877,
|
|
"fcm_dpo/margin": 45.353126525878906,
|
|
"fcm_dpo/q_t": 0.44268032908439636,
|
|
"grad_norm": 16.172504425048828,
|
|
"learning_rate": 1.4855747752871654e-07,
|
|
"logits/chosen": 0.7398580312728882,
|
|
"logits/rejected": 0.654506266117096,
|
|
"logps/chosen": -220.90049743652344,
|
|
"logps/ref_chosen": -58.749061584472656,
|
|
"logps/ref_rejected": -86.87396240234375,
|
|
"logps/rejected": -294.3785400390625,
|
|
"loss": 1.2457,
|
|
"margin_dpo/margin_mean": 45.353126525878906,
|
|
"margin_dpo/margin_std": 114.57366943359375,
|
|
"step": 444
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.96978759765625,
|
|
"KL/mean": -194.79391479492188,
|
|
"KL/rejected_KL_mean": -236.61801147460938,
|
|
"KL/std": 95.42649841308594,
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.005754541605710983,
|
|
"fcm_dpo/delta": -0.08335185050964355,
|
|
"fcm_dpo/margin": 83.64823913574219,
|
|
"fcm_dpo/q_t": 0.3891189396381378,
|
|
"grad_norm": 11.959833145141602,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 0.7360788583755493,
|
|
"logits/rejected": 0.7303141951560974,
|
|
"logps/chosen": -213.88722229003906,
|
|
"logps/ref_chosen": -60.91743850708008,
|
|
"logps/ref_rejected": -71.5637435913086,
|
|
"logps/rejected": -308.1817626953125,
|
|
"loss": 1.0388,
|
|
"margin_dpo/margin_mean": 83.64823913574219,
|
|
"margin_dpo/margin_std": 99.53976440429688,
|
|
"step": 445
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.58480834960938,
|
|
"KL/mean": -181.18853759765625,
|
|
"KL/rejected_KL_mean": -223.792236328125,
|
|
"KL/std": 97.45524597167969,
|
|
"epoch": 0.674225245653817,
|
|
"fcm_dpo/beta": 0.005728841293603182,
|
|
"fcm_dpo/delta": -0.09076888859272003,
|
|
"fcm_dpo/margin": 85.20742797851562,
|
|
"fcm_dpo/q_t": 0.38860780000686646,
|
|
"grad_norm": 11.939724922180176,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 0.7924954891204834,
|
|
"logits/rejected": 0.7156757116317749,
|
|
"logps/chosen": -187.38406372070312,
|
|
"logps/ref_chosen": -48.79924774169922,
|
|
"logps/ref_rejected": -71.8719482421875,
|
|
"logps/rejected": -295.6641845703125,
|
|
"loss": 1.0319,
|
|
"margin_dpo/margin_mean": 85.20742797851562,
|
|
"margin_dpo/margin_std": 93.93531799316406,
|
|
"step": 446
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -134.3602294921875,
|
|
"KL/mean": -183.44198608398438,
|
|
"KL/rejected_KL_mean": -232.52374267578125,
|
|
"KL/std": 96.26948547363281,
|
|
"epoch": 0.6757369614512472,
|
|
"fcm_dpo/beta": 0.005613087676465511,
|
|
"fcm_dpo/delta": -0.15572789311408997,
|
|
"fcm_dpo/margin": 98.16349792480469,
|
|
"fcm_dpo/q_t": 0.3735390305519104,
|
|
"grad_norm": 12.98915958404541,
|
|
"learning_rate": 1.4494497203727843e-07,
|
|
"logits/chosen": 0.7023895382881165,
|
|
"logits/rejected": 0.6039215326309204,
|
|
"logps/chosen": -188.04293823242188,
|
|
"logps/ref_chosen": -53.682716369628906,
|
|
"logps/ref_rejected": -88.17315673828125,
|
|
"logps/rejected": -320.6968994140625,
|
|
"loss": 1.0001,
|
|
"margin_dpo/margin_mean": 98.16350555419922,
|
|
"margin_dpo/margin_std": 109.70346069335938,
|
|
"step": 447
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.90728759765625,
|
|
"KL/mean": -186.858642578125,
|
|
"KL/rejected_KL_mean": -225.80999755859375,
|
|
"KL/std": 99.62005615234375,
|
|
"epoch": 0.6772486772486772,
|
|
"fcm_dpo/beta": 0.0055874912068247795,
|
|
"fcm_dpo/delta": -0.036061156541109085,
|
|
"fcm_dpo/margin": 77.9027099609375,
|
|
"fcm_dpo/q_t": 0.4004305899143219,
|
|
"grad_norm": 9.615347862243652,
|
|
"learning_rate": 1.4374663593999256e-07,
|
|
"logits/chosen": 0.7915422916412354,
|
|
"logits/rejected": 0.7420874834060669,
|
|
"logps/chosen": -201.65853881835938,
|
|
"logps/ref_chosen": -53.75125503540039,
|
|
"logps/ref_rejected": -77.17623901367188,
|
|
"logps/rejected": -302.9862365722656,
|
|
"loss": 1.0799,
|
|
"margin_dpo/margin_mean": 77.9027099609375,
|
|
"margin_dpo/margin_std": 106.86851501464844,
|
|
"step": 448
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -176.3856964111328,
|
|
"KL/mean": -196.4332275390625,
|
|
"KL/rejected_KL_mean": -216.48077392578125,
|
|
"KL/std": 103.67825317382812,
|
|
"epoch": 0.6787603930461074,
|
|
"fcm_dpo/beta": 0.005611460190266371,
|
|
"fcm_dpo/delta": 0.05342148244380951,
|
|
"fcm_dpo/margin": 40.09507751464844,
|
|
"fcm_dpo/q_t": 0.4481010437011719,
|
|
"grad_norm": 18.21133804321289,
|
|
"learning_rate": 1.4255127197770707e-07,
|
|
"logits/chosen": 0.6275852918624878,
|
|
"logits/rejected": 0.6276884078979492,
|
|
"logps/chosen": -252.21307373046875,
|
|
"logps/ref_chosen": -75.82737731933594,
|
|
"logps/ref_rejected": -82.20687866210938,
|
|
"logps/rejected": -298.6876220703125,
|
|
"loss": 1.2512,
|
|
"margin_dpo/margin_mean": 40.09507369995117,
|
|
"margin_dpo/margin_std": 100.46414184570312,
|
|
"step": 449
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -144.65740966796875,
|
|
"KL/mean": -176.92686462402344,
|
|
"KL/rejected_KL_mean": -209.19630432128906,
|
|
"KL/std": 102.94686126708984,
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.0056182220578193665,
|
|
"fcm_dpo/delta": 0.03805023059248924,
|
|
"fcm_dpo/margin": 64.53890228271484,
|
|
"fcm_dpo/q_t": 0.4211123585700989,
|
|
"grad_norm": 12.111907005310059,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 0.8361331224441528,
|
|
"logits/rejected": 0.7297120094299316,
|
|
"logps/chosen": -191.77313232421875,
|
|
"logps/ref_chosen": -47.11572265625,
|
|
"logps/ref_rejected": -78.7546615600586,
|
|
"logps/rejected": -287.95098876953125,
|
|
"loss": 1.1592,
|
|
"margin_dpo/margin_mean": 64.53890228271484,
|
|
"margin_dpo/margin_std": 118.03001403808594,
|
|
"step": 450
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.1675567626953,
|
|
"KL/mean": -175.3653564453125,
|
|
"KL/rejected_KL_mean": -204.5631561279297,
|
|
"KL/std": 97.54582214355469,
|
|
"epoch": 0.6817838246409675,
|
|
"fcm_dpo/beta": 0.005667516030371189,
|
|
"fcm_dpo/delta": 0.07010924071073532,
|
|
"fcm_dpo/margin": 58.395599365234375,
|
|
"fcm_dpo/q_t": 0.4240720868110657,
|
|
"grad_norm": 12.86605167388916,
|
|
"learning_rate": 1.4016959412166437e-07,
|
|
"logits/chosen": 0.6757749319076538,
|
|
"logits/rejected": 0.6317286491394043,
|
|
"logps/chosen": -209.51800537109375,
|
|
"logps/ref_chosen": -63.350440979003906,
|
|
"logps/ref_rejected": -76.28530883789062,
|
|
"logps/rejected": -280.84844970703125,
|
|
"loss": 1.1577,
|
|
"margin_dpo/margin_mean": 58.395599365234375,
|
|
"margin_dpo/margin_std": 99.56322479248047,
|
|
"step": 451
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.09828186035156,
|
|
"KL/mean": -180.1845245361328,
|
|
"KL/rejected_KL_mean": -213.27078247070312,
|
|
"KL/std": 94.0128402709961,
|
|
"epoch": 0.6832955404383976,
|
|
"fcm_dpo/beta": 0.005681175272911787,
|
|
"fcm_dpo/delta": 0.02452705055475235,
|
|
"fcm_dpo/margin": 66.1725082397461,
|
|
"fcm_dpo/q_t": 0.4149036109447479,
|
|
"grad_norm": 14.72726058959961,
|
|
"learning_rate": 1.3898334684855645e-07,
|
|
"logits/chosen": 0.7004408836364746,
|
|
"logits/rejected": 0.6229137182235718,
|
|
"logps/chosen": -202.68411254882812,
|
|
"logps/ref_chosen": -55.58583450317383,
|
|
"logps/ref_rejected": -77.68738555908203,
|
|
"logps/rejected": -290.95819091796875,
|
|
"loss": 1.1448,
|
|
"margin_dpo/margin_mean": 66.1725082397461,
|
|
"margin_dpo/margin_std": 114.46353149414062,
|
|
"step": 452
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.33566284179688,
|
|
"KL/mean": -179.667724609375,
|
|
"KL/rejected_KL_mean": -211.99978637695312,
|
|
"KL/std": 97.49998474121094,
|
|
"epoch": 0.6848072562358276,
|
|
"fcm_dpo/beta": 0.0056960792280733585,
|
|
"fcm_dpo/delta": 0.03226463496685028,
|
|
"fcm_dpo/margin": 64.66412353515625,
|
|
"fcm_dpo/q_t": 0.41683125495910645,
|
|
"grad_norm": 14.382550239562988,
|
|
"learning_rate": 1.3780020494988445e-07,
|
|
"logits/chosen": 0.6994547247886658,
|
|
"logits/rejected": 0.6791571378707886,
|
|
"logps/chosen": -209.11386108398438,
|
|
"logps/ref_chosen": -61.778202056884766,
|
|
"logps/ref_rejected": -71.51403045654297,
|
|
"logps/rejected": -283.5137939453125,
|
|
"loss": 1.1448,
|
|
"margin_dpo/margin_mean": 64.66412353515625,
|
|
"margin_dpo/margin_std": 110.0992431640625,
|
|
"step": 453
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -134.96542358398438,
|
|
"KL/mean": -172.3758544921875,
|
|
"KL/rejected_KL_mean": -209.78628540039062,
|
|
"KL/std": 98.027099609375,
|
|
"epoch": 0.6863189720332578,
|
|
"fcm_dpo/beta": 0.005690881051123142,
|
|
"fcm_dpo/delta": -0.026379181072115898,
|
|
"fcm_dpo/margin": 74.82087707519531,
|
|
"fcm_dpo/q_t": 0.4026657044887543,
|
|
"grad_norm": 12.05843448638916,
|
|
"learning_rate": 1.366202015206706e-07,
|
|
"logits/chosen": 0.7408164143562317,
|
|
"logits/rejected": 0.7055944204330444,
|
|
"logps/chosen": -186.56057739257812,
|
|
"logps/ref_chosen": -51.59515380859375,
|
|
"logps/ref_rejected": -63.96732711791992,
|
|
"logps/rejected": -273.75360107421875,
|
|
"loss": 1.0964,
|
|
"margin_dpo/margin_mean": 74.82087707519531,
|
|
"margin_dpo/margin_std": 111.03755187988281,
|
|
"step": 454
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.0542449951172,
|
|
"KL/mean": -192.82235717773438,
|
|
"KL/rejected_KL_mean": -227.5904998779297,
|
|
"KL/std": 98.14766693115234,
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.005689314566552639,
|
|
"fcm_dpo/delta": 0.004469834268093109,
|
|
"fcm_dpo/margin": 69.5362548828125,
|
|
"fcm_dpo/q_t": 0.4116792678833008,
|
|
"grad_norm": 12.594999313354492,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 0.6422542333602905,
|
|
"logits/rejected": 0.616525411605835,
|
|
"logps/chosen": -228.70596313476562,
|
|
"logps/ref_chosen": -70.65170288085938,
|
|
"logps/ref_rejected": -77.44276428222656,
|
|
"logps/rejected": -305.03326416015625,
|
|
"loss": 1.1118,
|
|
"margin_dpo/margin_mean": 69.5362548828125,
|
|
"margin_dpo/margin_std": 105.42153930664062,
|
|
"step": 455
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.19696044921875,
|
|
"KL/mean": -186.0067138671875,
|
|
"KL/rejected_KL_mean": -219.81646728515625,
|
|
"KL/std": 97.37277221679688,
|
|
"epoch": 0.6893424036281179,
|
|
"fcm_dpo/beta": 0.005693695042282343,
|
|
"fcm_dpo/delta": 0.01527065597474575,
|
|
"fcm_dpo/margin": 67.61949920654297,
|
|
"fcm_dpo/q_t": 0.4144898056983948,
|
|
"grad_norm": 15.261795043945312,
|
|
"learning_rate": 1.3426974201083439e-07,
|
|
"logits/chosen": 0.6495950818061829,
|
|
"logits/rejected": 0.5909262895584106,
|
|
"logps/chosen": -208.59524536132812,
|
|
"logps/ref_chosen": -56.398284912109375,
|
|
"logps/ref_rejected": -82.61642456054688,
|
|
"logps/rejected": -302.4328918457031,
|
|
"loss": 1.1281,
|
|
"margin_dpo/margin_mean": 67.61949920654297,
|
|
"margin_dpo/margin_std": 110.9076156616211,
|
|
"step": 456
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -150.51907348632812,
|
|
"KL/mean": -185.42654418945312,
|
|
"KL/rejected_KL_mean": -220.33399963378906,
|
|
"KL/std": 99.75857543945312,
|
|
"epoch": 0.690854119425548,
|
|
"fcm_dpo/beta": 0.005693856626749039,
|
|
"fcm_dpo/delta": 0.002454077824950218,
|
|
"fcm_dpo/margin": 69.8149185180664,
|
|
"fcm_dpo/q_t": 0.40801647305488586,
|
|
"grad_norm": 12.972249031066895,
|
|
"learning_rate": 1.3309935167761717e-07,
|
|
"logits/chosen": 0.8584772944450378,
|
|
"logits/rejected": 0.7793265581130981,
|
|
"logps/chosen": -195.23965454101562,
|
|
"logps/ref_chosen": -44.72057342529297,
|
|
"logps/ref_rejected": -68.1158676147461,
|
|
"logps/rejected": -288.4498596191406,
|
|
"loss": 1.0968,
|
|
"margin_dpo/margin_mean": 69.8149185180664,
|
|
"margin_dpo/margin_std": 94.71832275390625,
|
|
"step": 457
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -144.84140014648438,
|
|
"KL/mean": -182.7362518310547,
|
|
"KL/rejected_KL_mean": -220.63108825683594,
|
|
"KL/std": 102.23555755615234,
|
|
"epoch": 0.6923658352229781,
|
|
"fcm_dpo/beta": 0.005692486185580492,
|
|
"fcm_dpo/delta": -0.03212110325694084,
|
|
"fcm_dpo/margin": 75.78968811035156,
|
|
"fcm_dpo/q_t": 0.40190398693084717,
|
|
"grad_norm": 12.632534980773926,
|
|
"learning_rate": 1.3193223130682936e-07,
|
|
"logits/chosen": 0.7434733510017395,
|
|
"logits/rejected": 0.6328174471855164,
|
|
"logps/chosen": -194.84710693359375,
|
|
"logps/ref_chosen": -50.00569152832031,
|
|
"logps/ref_rejected": -87.50015258789062,
|
|
"logps/rejected": -308.1312255859375,
|
|
"loss": 1.0962,
|
|
"margin_dpo/margin_mean": 75.78968811035156,
|
|
"margin_dpo/margin_std": 113.03216552734375,
|
|
"step": 458
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -140.63232421875,
|
|
"KL/mean": -186.453857421875,
|
|
"KL/rejected_KL_mean": -232.27536010742188,
|
|
"KL/std": 117.89798736572266,
|
|
"epoch": 0.6938775510204082,
|
|
"fcm_dpo/beta": 0.0056568896397948265,
|
|
"fcm_dpo/delta": -0.12178346514701843,
|
|
"fcm_dpo/margin": 91.64302825927734,
|
|
"fcm_dpo/q_t": 0.38132259249687195,
|
|
"grad_norm": 11.232112884521484,
|
|
"learning_rate": 1.3076841354533658e-07,
|
|
"logits/chosen": 0.7520014643669128,
|
|
"logits/rejected": 0.7225250005722046,
|
|
"logps/chosen": -206.01028442382812,
|
|
"logps/ref_chosen": -65.37794494628906,
|
|
"logps/ref_rejected": -88.19244384765625,
|
|
"logps/rejected": -320.4678039550781,
|
|
"loss": 1.0308,
|
|
"margin_dpo/margin_mean": 91.64302825927734,
|
|
"margin_dpo/margin_std": 110.58131408691406,
|
|
"step": 459
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.98495483398438,
|
|
"KL/mean": -197.4256591796875,
|
|
"KL/rejected_KL_mean": -241.8663330078125,
|
|
"KL/std": 103.51060485839844,
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.0055643608793616295,
|
|
"fcm_dpo/delta": -0.09728430211544037,
|
|
"fcm_dpo/margin": 88.88137817382812,
|
|
"fcm_dpo/q_t": 0.38935142755508423,
|
|
"grad_norm": 12.16611385345459,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 0.7717406749725342,
|
|
"logits/rejected": 0.6576837301254272,
|
|
"logps/chosen": -217.546630859375,
|
|
"logps/ref_chosen": -64.5616683959961,
|
|
"logps/ref_rejected": -88.67890167236328,
|
|
"logps/rejected": -330.54522705078125,
|
|
"loss": 1.0388,
|
|
"margin_dpo/margin_mean": 88.88137817382812,
|
|
"margin_dpo/margin_std": 110.88707733154297,
|
|
"step": 460
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -127.3624267578125,
|
|
"KL/mean": -172.24356079101562,
|
|
"KL/rejected_KL_mean": -217.12472534179688,
|
|
"KL/std": 96.47776794433594,
|
|
"epoch": 0.6969009826152683,
|
|
"fcm_dpo/beta": 0.0054948460310697556,
|
|
"fcm_dpo/delta": -0.09694714099168777,
|
|
"fcm_dpo/margin": 89.76228332519531,
|
|
"fcm_dpo/q_t": 0.3879685401916504,
|
|
"grad_norm": 11.350987434387207,
|
|
"learning_rate": 1.2845081597488286e-07,
|
|
"logits/chosen": 0.870736300945282,
|
|
"logits/rejected": 0.7916768193244934,
|
|
"logps/chosen": -176.84033203125,
|
|
"logps/ref_chosen": -49.4779167175293,
|
|
"logps/ref_rejected": -72.65262603759766,
|
|
"logps/rejected": -289.77734375,
|
|
"loss": 1.036,
|
|
"margin_dpo/margin_mean": 89.76228332519531,
|
|
"margin_dpo/margin_std": 104.13487243652344,
|
|
"step": 461
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -140.73956298828125,
|
|
"KL/mean": -185.01748657226562,
|
|
"KL/rejected_KL_mean": -229.29541015625,
|
|
"KL/std": 93.14476013183594,
|
|
"epoch": 0.6984126984126984,
|
|
"fcm_dpo/beta": 0.005464589223265648,
|
|
"fcm_dpo/delta": -0.0861920714378357,
|
|
"fcm_dpo/margin": 88.55587768554688,
|
|
"fcm_dpo/q_t": 0.3879750967025757,
|
|
"grad_norm": 12.345738410949707,
|
|
"learning_rate": 1.27297100994108e-07,
|
|
"logits/chosen": 0.7432578802108765,
|
|
"logits/rejected": 0.6969763040542603,
|
|
"logps/chosen": -201.2346649169922,
|
|
"logps/ref_chosen": -60.4951171875,
|
|
"logps/ref_rejected": -74.82136535644531,
|
|
"logps/rejected": -304.1167907714844,
|
|
"loss": 1.036,
|
|
"margin_dpo/margin_mean": 88.55587768554688,
|
|
"margin_dpo/margin_std": 103.93488311767578,
|
|
"step": 462
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -169.8530731201172,
|
|
"KL/mean": -196.46295166015625,
|
|
"KL/rejected_KL_mean": -223.07284545898438,
|
|
"KL/std": 93.60169982910156,
|
|
"epoch": 0.6999244142101285,
|
|
"fcm_dpo/beta": 0.005465439520776272,
|
|
"fcm_dpo/delta": 0.009897367097437382,
|
|
"fcm_dpo/margin": 53.21976852416992,
|
|
"fcm_dpo/q_t": 0.43197718262672424,
|
|
"grad_norm": 17.456735610961914,
|
|
"learning_rate": 1.2614681827718695e-07,
|
|
"logits/chosen": 0.7289791703224182,
|
|
"logits/rejected": 0.7264447808265686,
|
|
"logps/chosen": -237.53819274902344,
|
|
"logps/ref_chosen": -67.68511962890625,
|
|
"logps/ref_rejected": -71.32196044921875,
|
|
"logps/rejected": -294.394775390625,
|
|
"loss": 1.1932,
|
|
"margin_dpo/margin_mean": 53.21976852416992,
|
|
"margin_dpo/margin_std": 102.3716049194336,
|
|
"step": 463
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.48385620117188,
|
|
"KL/mean": -195.25,
|
|
"KL/rejected_KL_mean": -237.01614379882812,
|
|
"KL/std": 106.62663269042969,
|
|
"epoch": 0.7014361300075586,
|
|
"fcm_dpo/beta": 0.005451948381960392,
|
|
"fcm_dpo/delta": -0.05673052370548248,
|
|
"fcm_dpo/margin": 83.53227233886719,
|
|
"fcm_dpo/q_t": 0.3966714143753052,
|
|
"grad_norm": 11.42442512512207,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 0.7221876978874207,
|
|
"logits/rejected": 0.6961289644241333,
|
|
"logps/chosen": -212.64950561523438,
|
|
"logps/ref_chosen": -59.16564178466797,
|
|
"logps/ref_rejected": -69.56146240234375,
|
|
"logps/rejected": -306.5776062011719,
|
|
"loss": 1.0838,
|
|
"margin_dpo/margin_mean": 83.53227233886719,
|
|
"margin_dpo/margin_std": 119.04618835449219,
|
|
"step": 464
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.208251953125,
|
|
"KL/mean": -196.86251831054688,
|
|
"KL/rejected_KL_mean": -232.5167999267578,
|
|
"KL/std": 100.1377182006836,
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.005430829711258411,
|
|
"fcm_dpo/delta": 0.012898722663521767,
|
|
"fcm_dpo/margin": 71.30854797363281,
|
|
"fcm_dpo/q_t": 0.41208869218826294,
|
|
"grad_norm": 12.347009658813477,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 0.8417786955833435,
|
|
"logits/rejected": 0.7821944355964661,
|
|
"logps/chosen": -219.721923828125,
|
|
"logps/ref_chosen": -58.513671875,
|
|
"logps/ref_rejected": -84.31745910644531,
|
|
"logps/rejected": -316.8342590332031,
|
|
"loss": 1.127,
|
|
"margin_dpo/margin_mean": 71.30854797363281,
|
|
"margin_dpo/margin_std": 113.30348205566406,
|
|
"step": 465
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -174.59625244140625,
|
|
"KL/mean": -194.4929656982422,
|
|
"KL/rejected_KL_mean": -214.3896942138672,
|
|
"KL/std": 108.3690185546875,
|
|
"epoch": 0.7044595616024187,
|
|
"fcm_dpo/beta": 0.00547228567302227,
|
|
"fcm_dpo/delta": 0.05634971708059311,
|
|
"fcm_dpo/margin": 39.793460845947266,
|
|
"fcm_dpo/q_t": 0.4485054612159729,
|
|
"grad_norm": 18.98539924621582,
|
|
"learning_rate": 1.2271688498291334e-07,
|
|
"logits/chosen": 0.7350534200668335,
|
|
"logits/rejected": 0.7408978343009949,
|
|
"logps/chosen": -247.862060546875,
|
|
"logps/ref_chosen": -73.26580810546875,
|
|
"logps/ref_rejected": -74.83621215820312,
|
|
"logps/rejected": -289.22589111328125,
|
|
"loss": 1.2686,
|
|
"margin_dpo/margin_mean": 39.793460845947266,
|
|
"margin_dpo/margin_std": 110.71612548828125,
|
|
"step": 466
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.86773681640625,
|
|
"KL/mean": -187.89874267578125,
|
|
"KL/rejected_KL_mean": -221.92974853515625,
|
|
"KL/std": 105.77481079101562,
|
|
"epoch": 0.7059712773998488,
|
|
"fcm_dpo/beta": 0.005480615422129631,
|
|
"fcm_dpo/delta": 0.0274942759424448,
|
|
"fcm_dpo/margin": 68.06199645996094,
|
|
"fcm_dpo/q_t": 0.4171332120895386,
|
|
"grad_norm": 11.073009490966797,
|
|
"learning_rate": 1.2158065210664848e-07,
|
|
"logits/chosen": 0.8141295909881592,
|
|
"logits/rejected": 0.6771500110626221,
|
|
"logps/chosen": -201.4472198486328,
|
|
"logps/ref_chosen": -47.57947540283203,
|
|
"logps/ref_rejected": -78.68522644042969,
|
|
"logps/rejected": -300.614990234375,
|
|
"loss": 1.1314,
|
|
"margin_dpo/margin_mean": 68.06199645996094,
|
|
"margin_dpo/margin_std": 111.87946319580078,
|
|
"step": 467
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -145.59478759765625,
|
|
"KL/mean": -189.73138427734375,
|
|
"KL/rejected_KL_mean": -233.86801147460938,
|
|
"KL/std": 103.32711791992188,
|
|
"epoch": 0.7074829931972789,
|
|
"fcm_dpo/beta": 0.005452793091535568,
|
|
"fcm_dpo/delta": -0.08334958553314209,
|
|
"fcm_dpo/margin": 88.27322387695312,
|
|
"fcm_dpo/q_t": 0.38989126682281494,
|
|
"grad_norm": 17.591140747070312,
|
|
"learning_rate": 1.204480113956011e-07,
|
|
"logits/chosen": 0.7198989391326904,
|
|
"logits/rejected": 0.7071471810340881,
|
|
"logps/chosen": -209.52256774902344,
|
|
"logps/ref_chosen": -63.92778778076172,
|
|
"logps/ref_rejected": -76.51626586914062,
|
|
"logps/rejected": -310.38427734375,
|
|
"loss": 1.0574,
|
|
"margin_dpo/margin_mean": 88.27322387695312,
|
|
"margin_dpo/margin_std": 117.32644653320312,
|
|
"step": 468
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -148.2090606689453,
|
|
"KL/mean": -187.73593139648438,
|
|
"KL/rejected_KL_mean": -227.2628173828125,
|
|
"KL/std": 100.49610900878906,
|
|
"epoch": 0.708994708994709,
|
|
"fcm_dpo/beta": 0.005415789783000946,
|
|
"fcm_dpo/delta": -0.029107674956321716,
|
|
"fcm_dpo/margin": 79.05377197265625,
|
|
"fcm_dpo/q_t": 0.4013463854789734,
|
|
"grad_norm": 13.335221290588379,
|
|
"learning_rate": 1.1931899453216697e-07,
|
|
"logits/chosen": 0.8280462026596069,
|
|
"logits/rejected": 0.8123753070831299,
|
|
"logps/chosen": -207.26724243164062,
|
|
"logps/ref_chosen": -59.05818176269531,
|
|
"logps/ref_rejected": -75.67672729492188,
|
|
"logps/rejected": -302.9395446777344,
|
|
"loss": 1.0668,
|
|
"margin_dpo/margin_mean": 79.05377197265625,
|
|
"margin_dpo/margin_std": 94.64749145507812,
|
|
"step": 469
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -142.2071990966797,
|
|
"KL/mean": -181.10409545898438,
|
|
"KL/rejected_KL_mean": -220.00100708007812,
|
|
"KL/std": 97.92410278320312,
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.005424699746072292,
|
|
"fcm_dpo/delta": -0.02253422886133194,
|
|
"fcm_dpo/margin": 77.7938003540039,
|
|
"fcm_dpo/q_t": 0.4034566283226013,
|
|
"grad_norm": 11.77826976776123,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 0.7707306146621704,
|
|
"logits/rejected": 0.7097501754760742,
|
|
"logps/chosen": -190.07464599609375,
|
|
"logps/ref_chosen": -47.86743927001953,
|
|
"logps/ref_rejected": -65.96859741210938,
|
|
"logps/rejected": -285.9696044921875,
|
|
"loss": 1.0921,
|
|
"margin_dpo/margin_mean": 77.7938003540039,
|
|
"margin_dpo/margin_std": 109.74154663085938,
|
|
"step": 470
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.9017333984375,
|
|
"KL/mean": -181.6110382080078,
|
|
"KL/rejected_KL_mean": -224.3203582763672,
|
|
"KL/std": 97.48130798339844,
|
|
"epoch": 0.7120181405895691,
|
|
"fcm_dpo/beta": 0.0053864410147070885,
|
|
"fcm_dpo/delta": -0.06155485659837723,
|
|
"fcm_dpo/margin": 85.41862487792969,
|
|
"fcm_dpo/q_t": 0.39364123344421387,
|
|
"grad_norm": 12.505250930786133,
|
|
"learning_rate": 1.1707195857000215e-07,
|
|
"logits/chosen": 0.7397251129150391,
|
|
"logits/rejected": 0.690066933631897,
|
|
"logps/chosen": -196.67958068847656,
|
|
"logps/ref_chosen": -57.777854919433594,
|
|
"logps/ref_rejected": -73.81172180175781,
|
|
"logps/rejected": -298.132080078125,
|
|
"loss": 1.0588,
|
|
"margin_dpo/margin_mean": 85.41863250732422,
|
|
"margin_dpo/margin_std": 107.98445129394531,
|
|
"step": 471
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.8323211669922,
|
|
"KL/mean": -173.5557861328125,
|
|
"KL/rejected_KL_mean": -208.27926635742188,
|
|
"KL/std": 101.28384399414062,
|
|
"epoch": 0.7135298563869993,
|
|
"fcm_dpo/beta": 0.005398962181061506,
|
|
"fcm_dpo/delta": 0.02540610171854496,
|
|
"fcm_dpo/margin": 69.44692993164062,
|
|
"fcm_dpo/q_t": 0.4162787199020386,
|
|
"grad_norm": 12.438529968261719,
|
|
"learning_rate": 1.1595400232569768e-07,
|
|
"logits/chosen": 0.7366932034492493,
|
|
"logits/rejected": 0.6941611766815186,
|
|
"logps/chosen": -194.74099731445312,
|
|
"logps/ref_chosen": -55.908668518066406,
|
|
"logps/ref_rejected": -74.70294189453125,
|
|
"logps/rejected": -282.9822082519531,
|
|
"loss": 1.154,
|
|
"margin_dpo/margin_mean": 69.44692993164062,
|
|
"margin_dpo/margin_std": 126.89105224609375,
|
|
"step": 472
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -142.76242065429688,
|
|
"KL/mean": -186.11215209960938,
|
|
"KL/rejected_KL_mean": -229.46189880371094,
|
|
"KL/std": 118.70932006835938,
|
|
"epoch": 0.7150415721844293,
|
|
"fcm_dpo/beta": 0.0053661237470805645,
|
|
"fcm_dpo/delta": -0.06681197136640549,
|
|
"fcm_dpo/margin": 86.69949340820312,
|
|
"fcm_dpo/q_t": 0.3992576003074646,
|
|
"grad_norm": 14.272549629211426,
|
|
"learning_rate": 1.1483979563610069e-07,
|
|
"logits/chosen": 0.8617863655090332,
|
|
"logits/rejected": 0.7521858215332031,
|
|
"logps/chosen": -196.92330932617188,
|
|
"logps/ref_chosen": -54.16088104248047,
|
|
"logps/ref_rejected": -92.76789855957031,
|
|
"logps/rejected": -322.22979736328125,
|
|
"loss": 1.0996,
|
|
"margin_dpo/margin_mean": 86.69949340820312,
|
|
"margin_dpo/margin_std": 139.1651611328125,
|
|
"step": 473
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.14871215820312,
|
|
"KL/mean": -182.34521484375,
|
|
"KL/rejected_KL_mean": -217.54168701171875,
|
|
"KL/std": 104.5096435546875,
|
|
"epoch": 0.7165532879818595,
|
|
"fcm_dpo/beta": 0.0053691859357059,
|
|
"fcm_dpo/delta": 0.02245914936065674,
|
|
"fcm_dpo/margin": 70.39295959472656,
|
|
"fcm_dpo/q_t": 0.4160653352737427,
|
|
"grad_norm": 17.718891143798828,
|
|
"learning_rate": 1.1372936966796709e-07,
|
|
"logits/chosen": 0.8245396614074707,
|
|
"logits/rejected": 0.7552185654640198,
|
|
"logps/chosen": -193.8344268798828,
|
|
"logps/ref_chosen": -46.685707092285156,
|
|
"logps/ref_rejected": -71.44731903076172,
|
|
"logps/rejected": -288.989013671875,
|
|
"loss": 1.1455,
|
|
"margin_dpo/margin_mean": 70.39295959472656,
|
|
"margin_dpo/margin_std": 123.71578216552734,
|
|
"step": 474
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -141.00540161132812,
|
|
"KL/mean": -191.953857421875,
|
|
"KL/rejected_KL_mean": -242.9022979736328,
|
|
"KL/std": 105.191650390625,
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.005304010584950447,
|
|
"fcm_dpo/delta": -0.14470763504505157,
|
|
"fcm_dpo/margin": 101.89692687988281,
|
|
"fcm_dpo/q_t": 0.3790287971496582,
|
|
"grad_norm": 9.486334800720215,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 0.7848609089851379,
|
|
"logits/rejected": 0.7348465323448181,
|
|
"logps/chosen": -199.49270629882812,
|
|
"logps/ref_chosen": -58.4873046875,
|
|
"logps/ref_rejected": -87.00187683105469,
|
|
"logps/rejected": -329.9041748046875,
|
|
"loss": 1.0063,
|
|
"margin_dpo/margin_mean": 101.89692687988281,
|
|
"margin_dpo/margin_std": 117.62330627441406,
|
|
"step": 475
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -170.7285919189453,
|
|
"KL/mean": -202.77792358398438,
|
|
"KL/rejected_KL_mean": -234.8272247314453,
|
|
"KL/std": 111.24575805664062,
|
|
"epoch": 0.7195767195767195,
|
|
"fcm_dpo/beta": 0.005327947437763214,
|
|
"fcm_dpo/delta": 0.05919649824500084,
|
|
"fcm_dpo/margin": 64.0986557006836,
|
|
"fcm_dpo/q_t": 0.42360225319862366,
|
|
"grad_norm": 13.499788284301758,
|
|
"learning_rate": 1.1151998403347243e-07,
|
|
"logits/chosen": 0.6565501689910889,
|
|
"logits/rejected": 0.6592780351638794,
|
|
"logps/chosen": -246.11021423339844,
|
|
"logps/ref_chosen": -75.38162231445312,
|
|
"logps/ref_rejected": -76.99822235107422,
|
|
"logps/rejected": -311.825439453125,
|
|
"loss": 1.1685,
|
|
"margin_dpo/margin_mean": 64.0986557006836,
|
|
"margin_dpo/margin_std": 118.65982818603516,
|
|
"step": 476
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -168.5745391845703,
|
|
"KL/mean": -204.99005126953125,
|
|
"KL/rejected_KL_mean": -241.40553283691406,
|
|
"KL/std": 111.12599182128906,
|
|
"epoch": 0.7210884353741497,
|
|
"fcm_dpo/beta": 0.005328074097633362,
|
|
"fcm_dpo/delta": 0.012188016436994076,
|
|
"fcm_dpo/margin": 72.83097839355469,
|
|
"fcm_dpo/q_t": 0.4138525724411011,
|
|
"grad_norm": 14.185981750488281,
|
|
"learning_rate": 1.1042108616837692e-07,
|
|
"logits/chosen": 0.742246687412262,
|
|
"logits/rejected": 0.696631908416748,
|
|
"logps/chosen": -229.64793395996094,
|
|
"logps/ref_chosen": -61.073387145996094,
|
|
"logps/ref_rejected": -81.34375,
|
|
"logps/rejected": -322.749267578125,
|
|
"loss": 1.1683,
|
|
"margin_dpo/margin_mean": 72.83097839355469,
|
|
"margin_dpo/margin_std": 141.9138946533203,
|
|
"step": 477
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -149.35894775390625,
|
|
"KL/mean": -177.10594177246094,
|
|
"KL/rejected_KL_mean": -204.8529510498047,
|
|
"KL/std": 93.83651733398438,
|
|
"epoch": 0.7226001511715797,
|
|
"fcm_dpo/beta": 0.005365452729165554,
|
|
"fcm_dpo/delta": 0.10386156290769577,
|
|
"fcm_dpo/margin": 55.493988037109375,
|
|
"fcm_dpo/q_t": 0.4315721392631531,
|
|
"grad_norm": 14.666532516479492,
|
|
"learning_rate": 1.0932609262554746e-07,
|
|
"logits/chosen": 0.7067092061042786,
|
|
"logits/rejected": 0.7186316251754761,
|
|
"logps/chosen": -206.52626037597656,
|
|
"logps/ref_chosen": -57.16731643676758,
|
|
"logps/ref_rejected": -53.30917739868164,
|
|
"logps/rejected": -258.162109375,
|
|
"loss": 1.2021,
|
|
"margin_dpo/margin_mean": 55.493988037109375,
|
|
"margin_dpo/margin_std": 114.86959838867188,
|
|
"step": 478
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -154.97164916992188,
|
|
"KL/mean": -179.97824096679688,
|
|
"KL/rejected_KL_mean": -204.98483276367188,
|
|
"KL/std": 95.27410125732422,
|
|
"epoch": 0.7241118669690099,
|
|
"fcm_dpo/beta": 0.0053960708901286125,
|
|
"fcm_dpo/delta": 0.017206639051437378,
|
|
"fcm_dpo/margin": 50.0131950378418,
|
|
"fcm_dpo/q_t": 0.4387489855289459,
|
|
"grad_norm": 14.426673889160156,
|
|
"learning_rate": 1.0823503403430734e-07,
|
|
"logits/chosen": 0.7068608999252319,
|
|
"logits/rejected": 0.6723740100860596,
|
|
"logps/chosen": -213.8849639892578,
|
|
"logps/ref_chosen": -58.91331481933594,
|
|
"logps/ref_rejected": -63.7403450012207,
|
|
"logps/rejected": -268.7251892089844,
|
|
"loss": 1.2248,
|
|
"margin_dpo/margin_mean": 50.01319885253906,
|
|
"margin_dpo/margin_std": 113.27484893798828,
|
|
"step": 479
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.38790893554688,
|
|
"KL/mean": -197.4619140625,
|
|
"KL/rejected_KL_mean": -236.5359344482422,
|
|
"KL/std": 98.72262573242188,
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.005407529883086681,
|
|
"fcm_dpo/delta": -0.023590780794620514,
|
|
"fcm_dpo/margin": 78.14801788330078,
|
|
"fcm_dpo/q_t": 0.40428832173347473,
|
|
"grad_norm": 15.613828659057617,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 0.7216229438781738,
|
|
"logits/rejected": 0.7144607305526733,
|
|
"logps/chosen": -221.18850708007812,
|
|
"logps/ref_chosen": -62.80061340332031,
|
|
"logps/ref_rejected": -67.58859252929688,
|
|
"logps/rejected": -304.12451171875,
|
|
"loss": 1.102,
|
|
"margin_dpo/margin_mean": 78.14801788330078,
|
|
"margin_dpo/margin_std": 114.8359375,
|
|
"step": 480
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.2689971923828,
|
|
"KL/mean": -192.56350708007812,
|
|
"KL/rejected_KL_mean": -227.85804748535156,
|
|
"KL/std": 103.66803741455078,
|
|
"epoch": 0.72713529856387,
|
|
"fcm_dpo/beta": 0.005385189317166805,
|
|
"fcm_dpo/delta": 0.020217716693878174,
|
|
"fcm_dpo/margin": 70.58904266357422,
|
|
"fcm_dpo/q_t": 0.4145625829696655,
|
|
"grad_norm": 14.026666641235352,
|
|
"learning_rate": 1.0606484367268906e-07,
|
|
"logits/chosen": 0.7155318260192871,
|
|
"logits/rejected": 0.7176682353019714,
|
|
"logps/chosen": -222.55548095703125,
|
|
"logps/ref_chosen": -65.28649139404297,
|
|
"logps/ref_rejected": -70.78668212890625,
|
|
"logps/rejected": -298.64471435546875,
|
|
"loss": 1.1296,
|
|
"margin_dpo/margin_mean": 70.58905029296875,
|
|
"margin_dpo/margin_std": 114.90113830566406,
|
|
"step": 481
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -174.841064453125,
|
|
"KL/mean": -212.74134826660156,
|
|
"KL/rejected_KL_mean": -250.64163208007812,
|
|
"KL/std": 112.12718200683594,
|
|
"epoch": 0.7286470143613001,
|
|
"fcm_dpo/beta": 0.005397781729698181,
|
|
"fcm_dpo/delta": -0.009407659992575645,
|
|
"fcm_dpo/margin": 75.80056762695312,
|
|
"fcm_dpo/q_t": 0.41211438179016113,
|
|
"grad_norm": 14.997920989990234,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 0.6870341300964355,
|
|
"logits/rejected": 0.5407265424728394,
|
|
"logps/chosen": -235.74725341796875,
|
|
"logps/ref_chosen": -60.906185150146484,
|
|
"logps/ref_rejected": -103.44656372070312,
|
|
"logps/rejected": -354.08819580078125,
|
|
"loss": 1.1473,
|
|
"margin_dpo/margin_mean": 75.80055236816406,
|
|
"margin_dpo/margin_std": 140.5430145263672,
|
|
"step": 482
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.6907958984375,
|
|
"KL/mean": -191.40762329101562,
|
|
"KL/rejected_KL_mean": -236.1244354248047,
|
|
"KL/std": 100.60107421875,
|
|
"epoch": 0.7301587301587301,
|
|
"fcm_dpo/beta": 0.005361596588045359,
|
|
"fcm_dpo/delta": -0.08141931146383286,
|
|
"fcm_dpo/margin": 89.43363952636719,
|
|
"fcm_dpo/q_t": 0.39102429151535034,
|
|
"grad_norm": 11.981908798217773,
|
|
"learning_rate": 1.0391075790138232e-07,
|
|
"logits/chosen": 0.7962086200714111,
|
|
"logits/rejected": 0.694132924079895,
|
|
"logps/chosen": -199.8828125,
|
|
"logps/ref_chosen": -53.192012786865234,
|
|
"logps/ref_rejected": -81.83927154541016,
|
|
"logps/rejected": -317.96368408203125,
|
|
"loss": 1.0508,
|
|
"margin_dpo/margin_mean": 89.43363952636719,
|
|
"margin_dpo/margin_std": 114.74483489990234,
|
|
"step": 483
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.88995361328125,
|
|
"KL/mean": -181.50051879882812,
|
|
"KL/rejected_KL_mean": -209.11105346679688,
|
|
"KL/std": 98.21662902832031,
|
|
"epoch": 0.7316704459561603,
|
|
"fcm_dpo/beta": 0.005396378692239523,
|
|
"fcm_dpo/delta": 0.1032441109418869,
|
|
"fcm_dpo/margin": 55.221099853515625,
|
|
"fcm_dpo/q_t": 0.4302479326725006,
|
|
"grad_norm": 16.038970947265625,
|
|
"learning_rate": 1.0283982962570681e-07,
|
|
"logits/chosen": 0.8445461988449097,
|
|
"logits/rejected": 0.8143681287765503,
|
|
"logps/chosen": -211.659423828125,
|
|
"logps/ref_chosen": -57.76945877075195,
|
|
"logps/ref_rejected": -71.6829833984375,
|
|
"logps/rejected": -280.7940368652344,
|
|
"loss": 1.1635,
|
|
"margin_dpo/margin_mean": 55.22110366821289,
|
|
"margin_dpo/margin_std": 86.54141235351562,
|
|
"step": 484
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -156.120849609375,
|
|
"KL/mean": -188.37078857421875,
|
|
"KL/rejected_KL_mean": -220.62071228027344,
|
|
"KL/std": 99.75823211669922,
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.005373854655772448,
|
|
"fcm_dpo/delta": -0.05030012130737305,
|
|
"fcm_dpo/margin": 64.49986267089844,
|
|
"fcm_dpo/q_t": 0.4205852150917053,
|
|
"grad_norm": 12.939190864562988,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 0.7804263830184937,
|
|
"logits/rejected": 0.7611652612686157,
|
|
"logps/chosen": -212.75669860839844,
|
|
"logps/ref_chosen": -56.63584899902344,
|
|
"logps/ref_rejected": -70.85614013671875,
|
|
"logps/rejected": -291.47686767578125,
|
|
"loss": 1.139,
|
|
"margin_dpo/margin_mean": 64.49986267089844,
|
|
"margin_dpo/margin_std": 98.01535034179688,
|
|
"step": 485
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -180.25726318359375,
|
|
"KL/mean": -211.1187744140625,
|
|
"KL/rejected_KL_mean": -241.98031616210938,
|
|
"KL/std": 115.53843688964844,
|
|
"epoch": 0.7346938775510204,
|
|
"fcm_dpo/beta": 0.005400184541940689,
|
|
"fcm_dpo/delta": 0.06782576441764832,
|
|
"fcm_dpo/margin": 61.7230224609375,
|
|
"fcm_dpo/q_t": 0.42767518758773804,
|
|
"grad_norm": 11.404672622680664,
|
|
"learning_rate": 1.007103520743035e-07,
|
|
"logits/chosen": 0.786423921585083,
|
|
"logits/rejected": 0.6762491464614868,
|
|
"logps/chosen": -236.60427856445312,
|
|
"logps/ref_chosen": -56.347023010253906,
|
|
"logps/ref_rejected": -85.97221374511719,
|
|
"logps/rejected": -327.9525146484375,
|
|
"loss": 1.2,
|
|
"margin_dpo/margin_mean": 61.7230224609375,
|
|
"margin_dpo/margin_std": 134.1376495361328,
|
|
"step": 486
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.36866760253906,
|
|
"KL/mean": -198.8399658203125,
|
|
"KL/rejected_KL_mean": -236.311279296875,
|
|
"KL/std": 106.22632598876953,
|
|
"epoch": 0.7362055933484505,
|
|
"fcm_dpo/beta": 0.005406576208770275,
|
|
"fcm_dpo/delta": -0.005295786075294018,
|
|
"fcm_dpo/margin": 74.94261169433594,
|
|
"fcm_dpo/q_t": 0.41207367181777954,
|
|
"grad_norm": 12.90602970123291,
|
|
"learning_rate": 9.965186236464046e-08,
|
|
"logits/chosen": 0.8448601961135864,
|
|
"logits/rejected": 0.7895363569259644,
|
|
"logps/chosen": -221.9858856201172,
|
|
"logps/ref_chosen": -60.617218017578125,
|
|
"logps/ref_rejected": -82.50975036621094,
|
|
"logps/rejected": -318.821044921875,
|
|
"loss": 1.122,
|
|
"margin_dpo/margin_mean": 74.94259643554688,
|
|
"margin_dpo/margin_std": 125.87879180908203,
|
|
"step": 487
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -149.65948486328125,
|
|
"KL/mean": -190.60897827148438,
|
|
"KL/rejected_KL_mean": -231.5584716796875,
|
|
"KL/std": 100.84095001220703,
|
|
"epoch": 0.7377173091458806,
|
|
"fcm_dpo/beta": 0.005403395742177963,
|
|
"fcm_dpo/delta": -0.043636057525873184,
|
|
"fcm_dpo/margin": 81.89897918701172,
|
|
"fcm_dpo/q_t": 0.40081119537353516,
|
|
"grad_norm": 15.800978660583496,
|
|
"learning_rate": 9.859757821558337e-08,
|
|
"logits/chosen": 0.7783271074295044,
|
|
"logits/rejected": 0.7260488271713257,
|
|
"logps/chosen": -212.76853942871094,
|
|
"logps/ref_chosen": -63.10905075073242,
|
|
"logps/ref_rejected": -82.49348449707031,
|
|
"logps/rejected": -314.05194091796875,
|
|
"loss": 1.076,
|
|
"margin_dpo/margin_mean": 81.89897918701172,
|
|
"margin_dpo/margin_std": 110.99021911621094,
|
|
"step": 488
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -176.54498291015625,
|
|
"KL/mean": -201.12728881835938,
|
|
"KL/rejected_KL_mean": -225.70957946777344,
|
|
"KL/std": 111.06015014648438,
|
|
"epoch": 0.7392290249433107,
|
|
"fcm_dpo/beta": 0.005437152460217476,
|
|
"fcm_dpo/delta": 0.13451340794563293,
|
|
"fcm_dpo/margin": 49.16458511352539,
|
|
"fcm_dpo/q_t": 0.4393724799156189,
|
|
"grad_norm": 12.729429244995117,
|
|
"learning_rate": 9.754752911772615e-08,
|
|
"logits/chosen": 0.7313129901885986,
|
|
"logits/rejected": 0.6881732940673828,
|
|
"logps/chosen": -241.53395080566406,
|
|
"logps/ref_chosen": -64.98896026611328,
|
|
"logps/ref_rejected": -84.39607238769531,
|
|
"logps/rejected": -310.10565185546875,
|
|
"loss": 1.2368,
|
|
"margin_dpo/margin_mean": 49.164581298828125,
|
|
"margin_dpo/margin_std": 119.40127563476562,
|
|
"step": 489
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.84475708007812,
|
|
"KL/mean": -188.68807983398438,
|
|
"KL/rejected_KL_mean": -218.53143310546875,
|
|
"KL/std": 107.776123046875,
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.005476398393511772,
|
|
"fcm_dpo/delta": 0.07433385401964188,
|
|
"fcm_dpo/margin": 59.686683654785156,
|
|
"fcm_dpo/q_t": 0.4273667633533478,
|
|
"grad_norm": 11.690750122070312,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 0.8151066303253174,
|
|
"logits/rejected": 0.7985807657241821,
|
|
"logps/chosen": -220.7534942626953,
|
|
"logps/ref_chosen": -61.90874481201172,
|
|
"logps/ref_rejected": -70.58566284179688,
|
|
"logps/rejected": -289.1170959472656,
|
|
"loss": 1.2126,
|
|
"margin_dpo/margin_mean": 59.686683654785156,
|
|
"margin_dpo/margin_std": 134.43087768554688,
|
|
"step": 490
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -154.75782775878906,
|
|
"KL/mean": -185.53125,
|
|
"KL/rejected_KL_mean": -216.30465698242188,
|
|
"KL/std": 97.72976684570312,
|
|
"epoch": 0.7422524565381708,
|
|
"fcm_dpo/beta": 0.0054778759367764,
|
|
"fcm_dpo/delta": -0.03602200374007225,
|
|
"fcm_dpo/margin": 61.54682922363281,
|
|
"fcm_dpo/q_t": 0.4233064651489258,
|
|
"grad_norm": 12.059008598327637,
|
|
"learning_rate": 9.546025344484868e-08,
|
|
"logits/chosen": 0.690648078918457,
|
|
"logits/rejected": 0.6332237720489502,
|
|
"logps/chosen": -210.2335205078125,
|
|
"logps/ref_chosen": -55.47570037841797,
|
|
"logps/ref_rejected": -78.70318603515625,
|
|
"logps/rejected": -295.0078430175781,
|
|
"loss": 1.1563,
|
|
"margin_dpo/margin_mean": 61.54682922363281,
|
|
"margin_dpo/margin_std": 104.0001220703125,
|
|
"step": 491
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -181.66058349609375,
|
|
"KL/mean": -209.5015411376953,
|
|
"KL/rejected_KL_mean": -237.34249877929688,
|
|
"KL/std": 111.32963562011719,
|
|
"epoch": 0.7437641723356009,
|
|
"fcm_dpo/beta": 0.005480607505887747,
|
|
"fcm_dpo/delta": 0.00996854156255722,
|
|
"fcm_dpo/margin": 55.681915283203125,
|
|
"fcm_dpo/q_t": 0.4316216707229614,
|
|
"grad_norm": 14.345245361328125,
|
|
"learning_rate": 9.442308525541589e-08,
|
|
"logits/chosen": 0.7753247022628784,
|
|
"logits/rejected": 0.7084952592849731,
|
|
"logps/chosen": -248.94696044921875,
|
|
"logps/ref_chosen": -67.28638458251953,
|
|
"logps/ref_rejected": -82.78628540039062,
|
|
"logps/rejected": -320.1287841796875,
|
|
"loss": 1.2187,
|
|
"margin_dpo/margin_mean": 55.681915283203125,
|
|
"margin_dpo/margin_std": 124.83898162841797,
|
|
"step": 492
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -149.962890625,
|
|
"KL/mean": -191.0338592529297,
|
|
"KL/rejected_KL_mean": -232.1048126220703,
|
|
"KL/std": 108.8577880859375,
|
|
"epoch": 0.745275888133031,
|
|
"fcm_dpo/beta": 0.005483937915414572,
|
|
"fcm_dpo/delta": -0.05205898731946945,
|
|
"fcm_dpo/margin": 82.14193725585938,
|
|
"fcm_dpo/q_t": 0.3994411826133728,
|
|
"grad_norm": 13.144427299499512,
|
|
"learning_rate": 9.339026888672468e-08,
|
|
"logits/chosen": 0.7183511257171631,
|
|
"logits/rejected": 0.6497530937194824,
|
|
"logps/chosen": -205.89039611816406,
|
|
"logps/ref_chosen": -55.92750549316406,
|
|
"logps/ref_rejected": -79.12149810791016,
|
|
"logps/rejected": -311.226318359375,
|
|
"loss": 1.0843,
|
|
"margin_dpo/margin_mean": 82.14193725585938,
|
|
"margin_dpo/margin_std": 116.04005432128906,
|
|
"step": 493
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -155.21142578125,
|
|
"KL/mean": -191.24118041992188,
|
|
"KL/rejected_KL_mean": -227.27098083496094,
|
|
"KL/std": 108.94725036621094,
|
|
"epoch": 0.7467876039304611,
|
|
"fcm_dpo/beta": 0.005453408230096102,
|
|
"fcm_dpo/delta": 0.007150499150156975,
|
|
"fcm_dpo/margin": 72.05953979492188,
|
|
"fcm_dpo/q_t": 0.4127471446990967,
|
|
"grad_norm": 13.744332313537598,
|
|
"learning_rate": 9.236183322886945e-08,
|
|
"logits/chosen": 0.655427098274231,
|
|
"logits/rejected": 0.6113977432250977,
|
|
"logps/chosen": -223.16552734375,
|
|
"logps/ref_chosen": -67.95410919189453,
|
|
"logps/ref_rejected": -90.50865173339844,
|
|
"logps/rejected": -317.7796325683594,
|
|
"loss": 1.1538,
|
|
"margin_dpo/margin_mean": 72.0595474243164,
|
|
"margin_dpo/margin_std": 134.70046997070312,
|
|
"step": 494
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.46163940429688,
|
|
"KL/mean": -180.19345092773438,
|
|
"KL/rejected_KL_mean": -208.92526245117188,
|
|
"KL/std": 107.10484313964844,
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.00545845553278923,
|
|
"fcm_dpo/delta": -0.0011510624317452312,
|
|
"fcm_dpo/margin": 57.46360778808594,
|
|
"fcm_dpo/q_t": 0.43396657705307007,
|
|
"grad_norm": 15.65528678894043,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 0.8183537721633911,
|
|
"logits/rejected": 0.760042130947113,
|
|
"logps/chosen": -204.08709716796875,
|
|
"logps/ref_chosen": -52.62546157836914,
|
|
"logps/ref_rejected": -72.06781005859375,
|
|
"logps/rejected": -280.9930725097656,
|
|
"loss": 1.2129,
|
|
"margin_dpo/margin_mean": 57.46360778808594,
|
|
"margin_dpo/margin_std": 132.02793884277344,
|
|
"step": 495
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.80612182617188,
|
|
"KL/mean": -205.53250122070312,
|
|
"KL/rejected_KL_mean": -245.25885009765625,
|
|
"KL/std": 115.95829772949219,
|
|
"epoch": 0.7498110355253212,
|
|
"fcm_dpo/beta": 0.005426845978945494,
|
|
"fcm_dpo/delta": -0.03242133557796478,
|
|
"fcm_dpo/margin": 79.45274353027344,
|
|
"fcm_dpo/q_t": 0.41209036111831665,
|
|
"grad_norm": 12.084851264953613,
|
|
"learning_rate": 9.031821899254797e-08,
|
|
"logits/chosen": 0.7423931360244751,
|
|
"logits/rejected": 0.6340707540512085,
|
|
"logps/chosen": -223.4034423828125,
|
|
"logps/ref_chosen": -57.597320556640625,
|
|
"logps/ref_rejected": -94.36127471923828,
|
|
"logps/rejected": -339.6201171875,
|
|
"loss": 1.1356,
|
|
"margin_dpo/margin_mean": 79.45274353027344,
|
|
"margin_dpo/margin_std": 147.74464416503906,
|
|
"step": 496
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -169.126220703125,
|
|
"KL/mean": -207.857421875,
|
|
"KL/rejected_KL_mean": -246.58860778808594,
|
|
"KL/std": 106.638427734375,
|
|
"epoch": 0.7513227513227513,
|
|
"fcm_dpo/beta": 0.005428179167211056,
|
|
"fcm_dpo/delta": -0.02095372974872589,
|
|
"fcm_dpo/margin": 77.46238708496094,
|
|
"fcm_dpo/q_t": 0.40322697162628174,
|
|
"grad_norm": 12.732601165771484,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": 0.7879632711410522,
|
|
"logits/rejected": 0.7538702487945557,
|
|
"logps/chosen": -241.91616821289062,
|
|
"logps/ref_chosen": -72.78994750976562,
|
|
"logps/ref_rejected": -89.48483276367188,
|
|
"logps/rejected": -336.07342529296875,
|
|
"loss": 1.1031,
|
|
"margin_dpo/margin_mean": 77.46238708496094,
|
|
"margin_dpo/margin_std": 116.40192413330078,
|
|
"step": 497
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -149.73135375976562,
|
|
"KL/mean": -190.19110107421875,
|
|
"KL/rejected_KL_mean": -230.65081787109375,
|
|
"KL/std": 101.40240478515625,
|
|
"epoch": 0.7528344671201814,
|
|
"fcm_dpo/beta": 0.00541150476783514,
|
|
"fcm_dpo/delta": -0.038762595504522324,
|
|
"fcm_dpo/margin": 80.91944885253906,
|
|
"fcm_dpo/q_t": 0.3986179828643799,
|
|
"grad_norm": 14.68930435180664,
|
|
"learning_rate": 8.829247120198563e-08,
|
|
"logits/chosen": 0.7435435652732849,
|
|
"logits/rejected": 0.7241132259368896,
|
|
"logps/chosen": -218.09707641601562,
|
|
"logps/ref_chosen": -68.36572265625,
|
|
"logps/ref_rejected": -71.28846740722656,
|
|
"logps/rejected": -301.93927001953125,
|
|
"loss": 1.0767,
|
|
"margin_dpo/margin_mean": 80.91944885253906,
|
|
"margin_dpo/margin_std": 108.93598175048828,
|
|
"step": 498
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.90406799316406,
|
|
"KL/mean": -193.71255493164062,
|
|
"KL/rejected_KL_mean": -235.52105712890625,
|
|
"KL/std": 111.45284271240234,
|
|
"epoch": 0.7543461829176115,
|
|
"fcm_dpo/beta": 0.0053865425288677216,
|
|
"fcm_dpo/delta": -0.05157562717795372,
|
|
"fcm_dpo/margin": 83.61698150634766,
|
|
"fcm_dpo/q_t": 0.4019428491592407,
|
|
"grad_norm": 16.794891357421875,
|
|
"learning_rate": 8.728636813280163e-08,
|
|
"logits/chosen": 0.770295262336731,
|
|
"logits/rejected": 0.7065696716308594,
|
|
"logps/chosen": -213.81289672851562,
|
|
"logps/ref_chosen": -61.90882873535156,
|
|
"logps/ref_rejected": -91.9411392211914,
|
|
"logps/rejected": -327.4621887207031,
|
|
"loss": 1.1271,
|
|
"margin_dpo/margin_mean": 83.61698150634766,
|
|
"margin_dpo/margin_std": 145.55764770507812,
|
|
"step": 499
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.18289184570312,
|
|
"KL/mean": -192.304931640625,
|
|
"KL/rejected_KL_mean": -226.4269561767578,
|
|
"KL/std": 99.20738220214844,
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.005387833341956139,
|
|
"fcm_dpo/delta": 0.032907333225011826,
|
|
"fcm_dpo/margin": 68.24404907226562,
|
|
"fcm_dpo/q_t": 0.41398870944976807,
|
|
"grad_norm": 14.322124481201172,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 0.685723602771759,
|
|
"logits/rejected": 0.693622350692749,
|
|
"logps/chosen": -228.40872192382812,
|
|
"logps/ref_chosen": -70.225830078125,
|
|
"logps/ref_rejected": -71.72203063964844,
|
|
"logps/rejected": -298.14898681640625,
|
|
"loss": 1.1633,
|
|
"margin_dpo/margin_mean": 68.24405670166016,
|
|
"margin_dpo/margin_std": 127.1922607421875,
|
|
"step": 500
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -159.03128051757812,
|
|
"KL/mean": -190.15521240234375,
|
|
"KL/rejected_KL_mean": -221.27915954589844,
|
|
"KL/std": 105.76667022705078,
|
|
"epoch": 0.7573696145124716,
|
|
"fcm_dpo/beta": 0.005429171957075596,
|
|
"fcm_dpo/delta": 0.06300182640552521,
|
|
"fcm_dpo/margin": 62.24788284301758,
|
|
"fcm_dpo/q_t": 0.42096561193466187,
|
|
"grad_norm": 10.816944122314453,
|
|
"learning_rate": 8.528784436016878e-08,
|
|
"logits/chosen": 0.7591949701309204,
|
|
"logits/rejected": 0.763592004776001,
|
|
"logps/chosen": -223.63009643554688,
|
|
"logps/ref_chosen": -64.59880828857422,
|
|
"logps/ref_rejected": -70.59329223632812,
|
|
"logps/rejected": -291.8724365234375,
|
|
"loss": 1.1287,
|
|
"margin_dpo/margin_mean": 62.247886657714844,
|
|
"margin_dpo/margin_std": 85.21642303466797,
|
|
"step": 501
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.91415405273438,
|
|
"KL/mean": -192.72329711914062,
|
|
"KL/rejected_KL_mean": -226.5324249267578,
|
|
"KL/std": 112.70011138916016,
|
|
"epoch": 0.7588813303099018,
|
|
"fcm_dpo/beta": 0.005454091355204582,
|
|
"fcm_dpo/delta": 0.03162407875061035,
|
|
"fcm_dpo/margin": 67.61825561523438,
|
|
"fcm_dpo/q_t": 0.4157477021217346,
|
|
"grad_norm": 14.987340927124023,
|
|
"learning_rate": 8.4295479559726e-08,
|
|
"logits/chosen": 0.7527762651443481,
|
|
"logits/rejected": 0.7075684070587158,
|
|
"logps/chosen": -224.38079833984375,
|
|
"logps/ref_chosen": -65.46662902832031,
|
|
"logps/ref_rejected": -90.22233581542969,
|
|
"logps/rejected": -316.7547607421875,
|
|
"loss": 1.1283,
|
|
"margin_dpo/margin_mean": 67.6182632446289,
|
|
"margin_dpo/margin_std": 104.54127502441406,
|
|
"step": 502
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -142.51441955566406,
|
|
"KL/mean": -176.8465576171875,
|
|
"KL/rejected_KL_mean": -211.17868041992188,
|
|
"KL/std": 97.20864868164062,
|
|
"epoch": 0.7603930461073318,
|
|
"fcm_dpo/beta": 0.0054566278122365475,
|
|
"fcm_dpo/delta": 0.02580934390425682,
|
|
"fcm_dpo/margin": 68.66426086425781,
|
|
"fcm_dpo/q_t": 0.4131636917591095,
|
|
"grad_norm": 11.592777252197266,
|
|
"learning_rate": 8.330774987092712e-08,
|
|
"logits/chosen": 0.7066045999526978,
|
|
"logits/rejected": 0.7125393152236938,
|
|
"logps/chosen": -194.34918212890625,
|
|
"logps/ref_chosen": -51.83476257324219,
|
|
"logps/ref_rejected": -57.62522506713867,
|
|
"logps/rejected": -268.8039245605469,
|
|
"loss": 1.135,
|
|
"margin_dpo/margin_mean": 68.66426086425781,
|
|
"margin_dpo/margin_std": 111.32344055175781,
|
|
"step": 503
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.58689880371094,
|
|
"KL/mean": -192.6900634765625,
|
|
"KL/rejected_KL_mean": -237.7931671142578,
|
|
"KL/std": 97.58802795410156,
|
|
"epoch": 0.7619047619047619,
|
|
"fcm_dpo/beta": 0.005443494766950607,
|
|
"fcm_dpo/delta": -0.09346402436494827,
|
|
"fcm_dpo/margin": 90.20628356933594,
|
|
"fcm_dpo/q_t": 0.3868753910064697,
|
|
"grad_norm": 12.498225212097168,
|
|
"learning_rate": 8.232468292269479e-08,
|
|
"logits/chosen": 0.7445521354675293,
|
|
"logits/rejected": 0.7288735508918762,
|
|
"logps/chosen": -216.23809814453125,
|
|
"logps/ref_chosen": -68.65119934082031,
|
|
"logps/ref_rejected": -77.91394805908203,
|
|
"logps/rejected": -315.7071228027344,
|
|
"loss": 1.0278,
|
|
"margin_dpo/margin_mean": 90.20629119873047,
|
|
"margin_dpo/margin_std": 100.27572631835938,
|
|
"step": 504
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -156.47079467773438,
|
|
"KL/mean": -186.9658660888672,
|
|
"KL/rejected_KL_mean": -217.4609375,
|
|
"KL/std": 113.70817565917969,
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.005394799634814262,
|
|
"fcm_dpo/delta": -0.03289571776986122,
|
|
"fcm_dpo/margin": 60.99015808105469,
|
|
"fcm_dpo/q_t": 0.4272536635398865,
|
|
"grad_norm": 13.567100524902344,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 0.7401154041290283,
|
|
"logits/rejected": 0.7042238712310791,
|
|
"logps/chosen": -216.46963500976562,
|
|
"logps/ref_chosen": -59.99884796142578,
|
|
"logps/ref_rejected": -76.88048553466797,
|
|
"logps/rejected": -294.3414306640625,
|
|
"loss": 1.1941,
|
|
"margin_dpo/margin_mean": 60.990150451660156,
|
|
"margin_dpo/margin_std": 125.24246215820312,
|
|
"step": 505
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -155.918212890625,
|
|
"KL/mean": -188.87939453125,
|
|
"KL/rejected_KL_mean": -221.840576171875,
|
|
"KL/std": 100.90999603271484,
|
|
"epoch": 0.764928193499622,
|
|
"fcm_dpo/beta": 0.005406984128057957,
|
|
"fcm_dpo/delta": 0.044350821524858475,
|
|
"fcm_dpo/margin": 65.92234802246094,
|
|
"fcm_dpo/q_t": 0.418279767036438,
|
|
"grad_norm": 13.672428131103516,
|
|
"learning_rate": 8.037264711071698e-08,
|
|
"logits/chosen": 0.7638306021690369,
|
|
"logits/rejected": 0.7502192854881287,
|
|
"logps/chosen": -225.98951721191406,
|
|
"logps/ref_chosen": -70.07130432128906,
|
|
"logps/ref_rejected": -82.03775024414062,
|
|
"logps/rejected": -303.8783264160156,
|
|
"loss": 1.1701,
|
|
"margin_dpo/margin_mean": 65.92234802246094,
|
|
"margin_dpo/margin_std": 125.46661376953125,
|
|
"step": 506
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -166.49728393554688,
|
|
"KL/mean": -204.08596801757812,
|
|
"KL/rejected_KL_mean": -241.6746368408203,
|
|
"KL/std": 113.40718078613281,
|
|
"epoch": 0.7664399092970522,
|
|
"fcm_dpo/beta": 0.005402985028922558,
|
|
"fcm_dpo/delta": -0.006565045565366745,
|
|
"fcm_dpo/margin": 75.17735290527344,
|
|
"fcm_dpo/q_t": 0.4134998321533203,
|
|
"grad_norm": 13.669761657714844,
|
|
"learning_rate": 7.940373284960933e-08,
|
|
"logits/chosen": 0.7151587009429932,
|
|
"logits/rejected": 0.6711717844009399,
|
|
"logps/chosen": -238.50433349609375,
|
|
"logps/ref_chosen": -72.00703430175781,
|
|
"logps/ref_rejected": -93.94987487792969,
|
|
"logps/rejected": -335.62451171875,
|
|
"loss": 1.1418,
|
|
"margin_dpo/margin_mean": 75.17735290527344,
|
|
"margin_dpo/margin_std": 133.1409149169922,
|
|
"step": 507
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.20932006835938,
|
|
"KL/mean": -197.40313720703125,
|
|
"KL/rejected_KL_mean": -241.5969696044922,
|
|
"KL/std": 119.44464874267578,
|
|
"epoch": 0.7679516250944822,
|
|
"fcm_dpo/beta": 0.005401117727160454,
|
|
"fcm_dpo/delta": -0.07946581393480301,
|
|
"fcm_dpo/margin": 88.38764953613281,
|
|
"fcm_dpo/q_t": 0.3947511613368988,
|
|
"grad_norm": 15.133732795715332,
|
|
"learning_rate": 7.843959053281663e-08,
|
|
"logits/chosen": 0.6844520568847656,
|
|
"logits/rejected": 0.5736185908317566,
|
|
"logps/chosen": -213.42926025390625,
|
|
"logps/ref_chosen": -60.21992492675781,
|
|
"logps/ref_rejected": -95.9200668334961,
|
|
"logps/rejected": -337.51702880859375,
|
|
"loss": 1.0794,
|
|
"margin_dpo/margin_mean": 88.38764190673828,
|
|
"margin_dpo/margin_std": 128.1157684326172,
|
|
"step": 508
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -163.38623046875,
|
|
"KL/mean": -195.60104370117188,
|
|
"KL/rejected_KL_mean": -227.81588745117188,
|
|
"KL/std": 105.54531860351562,
|
|
"epoch": 0.7694633408919124,
|
|
"fcm_dpo/beta": 0.005387607030570507,
|
|
"fcm_dpo/delta": 0.053819116204977036,
|
|
"fcm_dpo/margin": 64.42964172363281,
|
|
"fcm_dpo/q_t": 0.4206730127334595,
|
|
"grad_norm": 15.97049331665039,
|
|
"learning_rate": 7.748024712947204e-08,
|
|
"logits/chosen": 0.6781574487686157,
|
|
"logits/rejected": 0.6578694581985474,
|
|
"logps/chosen": -229.65640258789062,
|
|
"logps/ref_chosen": -66.27017211914062,
|
|
"logps/ref_rejected": -71.73065185546875,
|
|
"logps/rejected": -299.5465087890625,
|
|
"loss": 1.1735,
|
|
"margin_dpo/margin_mean": 64.42964935302734,
|
|
"margin_dpo/margin_std": 123.07508087158203,
|
|
"step": 509
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.87185668945312,
|
|
"KL/mean": -203.8086395263672,
|
|
"KL/rejected_KL_mean": -245.74542236328125,
|
|
"KL/std": 116.02742004394531,
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.005369152873754501,
|
|
"fcm_dpo/delta": -0.05177878588438034,
|
|
"fcm_dpo/margin": 83.87354278564453,
|
|
"fcm_dpo/q_t": 0.4047764539718628,
|
|
"grad_norm": 13.38278865814209,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 0.841407060623169,
|
|
"logits/rejected": 0.7504467964172363,
|
|
"logps/chosen": -215.416748046875,
|
|
"logps/ref_chosen": -53.54487609863281,
|
|
"logps/ref_rejected": -91.36648559570312,
|
|
"logps/rejected": -337.1119079589844,
|
|
"loss": 1.1246,
|
|
"margin_dpo/margin_mean": 83.87353515625,
|
|
"margin_dpo/margin_std": 147.5673065185547,
|
|
"step": 510
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.28256225585938,
|
|
"KL/mean": -199.58544921875,
|
|
"KL/rejected_KL_mean": -246.8883514404297,
|
|
"KL/std": 104.28471374511719,
|
|
"epoch": 0.7724867724867724,
|
|
"fcm_dpo/beta": 0.005333633162081242,
|
|
"fcm_dpo/delta": -0.10725206136703491,
|
|
"fcm_dpo/margin": 94.60578918457031,
|
|
"fcm_dpo/q_t": 0.38472574949264526,
|
|
"grad_norm": 15.535444259643555,
|
|
"learning_rate": 7.557606426772961e-08,
|
|
"logits/chosen": 0.7721422910690308,
|
|
"logits/rejected": 0.7171282768249512,
|
|
"logps/chosen": -208.126953125,
|
|
"logps/ref_chosen": -55.844383239746094,
|
|
"logps/ref_rejected": -86.49819946289062,
|
|
"logps/rejected": -333.38653564453125,
|
|
"loss": 1.0296,
|
|
"margin_dpo/margin_mean": 94.60578918457031,
|
|
"margin_dpo/margin_std": 111.76858520507812,
|
|
"step": 511
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.52825927734375,
|
|
"KL/mean": -187.4003143310547,
|
|
"KL/rejected_KL_mean": -216.2723388671875,
|
|
"KL/std": 97.47703552246094,
|
|
"epoch": 0.7739984882842026,
|
|
"fcm_dpo/beta": 0.005345332436263561,
|
|
"fcm_dpo/delta": 0.09281959384679794,
|
|
"fcm_dpo/margin": 57.74409484863281,
|
|
"fcm_dpo/q_t": 0.4283400774002075,
|
|
"grad_norm": 19.753469467163086,
|
|
"learning_rate": 7.463127807341966e-08,
|
|
"logits/chosen": 0.6650197505950928,
|
|
"logits/rejected": 0.6646835207939148,
|
|
"logps/chosen": -220.18130493164062,
|
|
"logps/ref_chosen": -61.653038024902344,
|
|
"logps/ref_rejected": -72.83148193359375,
|
|
"logps/rejected": -289.10382080078125,
|
|
"loss": 1.1975,
|
|
"margin_dpo/margin_mean": 57.74409484863281,
|
|
"margin_dpo/margin_std": 117.67747497558594,
|
|
"step": 512
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -140.71241760253906,
|
|
"KL/mean": -178.47698974609375,
|
|
"KL/rejected_KL_mean": -216.24154663085938,
|
|
"KL/std": 99.27334594726562,
|
|
"epoch": 0.7755102040816326,
|
|
"fcm_dpo/beta": 0.00535899493843317,
|
|
"fcm_dpo/delta": -0.004894678480923176,
|
|
"fcm_dpo/margin": 75.52914428710938,
|
|
"fcm_dpo/q_t": 0.40808457136154175,
|
|
"grad_norm": 10.680781364440918,
|
|
"learning_rate": 7.369139731924401e-08,
|
|
"logits/chosen": 0.8681324124336243,
|
|
"logits/rejected": 0.8150092363357544,
|
|
"logps/chosen": -191.56497192382812,
|
|
"logps/ref_chosen": -50.85256576538086,
|
|
"logps/ref_rejected": -69.21754455566406,
|
|
"logps/rejected": -285.4591064453125,
|
|
"loss": 1.0961,
|
|
"margin_dpo/margin_mean": 75.52914428710938,
|
|
"margin_dpo/margin_std": 105.1405258178711,
|
|
"step": 513
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.7886962890625,
|
|
"KL/mean": -201.05538940429688,
|
|
"KL/rejected_KL_mean": -244.3220672607422,
|
|
"KL/std": 108.67066192626953,
|
|
"epoch": 0.7770219198790628,
|
|
"fcm_dpo/beta": 0.005348518490791321,
|
|
"fcm_dpo/delta": -0.0643550381064415,
|
|
"fcm_dpo/margin": 86.53337097167969,
|
|
"fcm_dpo/q_t": 0.39564377069473267,
|
|
"grad_norm": 15.53198528289795,
|
|
"learning_rate": 7.275644829568747e-08,
|
|
"logits/chosen": 0.7602769136428833,
|
|
"logits/rejected": 0.7313047647476196,
|
|
"logps/chosen": -227.1736297607422,
|
|
"logps/ref_chosen": -69.38493347167969,
|
|
"logps/ref_rejected": -83.32447814941406,
|
|
"logps/rejected": -327.64654541015625,
|
|
"loss": 1.0763,
|
|
"margin_dpo/margin_mean": 86.53337097167969,
|
|
"margin_dpo/margin_std": 122.80735778808594,
|
|
"step": 514
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -166.77413940429688,
|
|
"KL/mean": -200.5863037109375,
|
|
"KL/rejected_KL_mean": -234.39846801757812,
|
|
"KL/std": 101.7635498046875,
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.005340108182281256,
|
|
"fcm_dpo/delta": 0.03959900885820389,
|
|
"fcm_dpo/margin": 67.62432861328125,
|
|
"fcm_dpo/q_t": 0.41682863235473633,
|
|
"grad_norm": 13.542834281921387,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 0.776336669921875,
|
|
"logits/rejected": 0.6978193521499634,
|
|
"logps/chosen": -220.46116638183594,
|
|
"logps/ref_chosen": -53.687034606933594,
|
|
"logps/ref_rejected": -83.59614562988281,
|
|
"logps/rejected": -317.9945983886719,
|
|
"loss": 1.1484,
|
|
"margin_dpo/margin_mean": 67.62432861328125,
|
|
"margin_dpo/margin_std": 116.02633666992188,
|
|
"step": 515
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -144.1265411376953,
|
|
"KL/mean": -176.46029663085938,
|
|
"KL/rejected_KL_mean": -208.79405212402344,
|
|
"KL/std": 100.71145629882812,
|
|
"epoch": 0.780045351473923,
|
|
"fcm_dpo/beta": 0.005379277281463146,
|
|
"fcm_dpo/delta": 0.05288197100162506,
|
|
"fcm_dpo/margin": 64.66749572753906,
|
|
"fcm_dpo/q_t": 0.41916587948799133,
|
|
"grad_norm": 15.515447616577148,
|
|
"learning_rate": 7.090144991188568e-08,
|
|
"logits/chosen": 0.741989254951477,
|
|
"logits/rejected": 0.7075991034507751,
|
|
"logps/chosen": -201.02825927734375,
|
|
"logps/ref_chosen": -56.9017219543457,
|
|
"logps/ref_rejected": -67.83477783203125,
|
|
"logps/rejected": -276.62884521484375,
|
|
"loss": 1.1656,
|
|
"margin_dpo/margin_mean": 64.66749572753906,
|
|
"margin_dpo/margin_std": 118.56062316894531,
|
|
"step": 516
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -172.152099609375,
|
|
"KL/mean": -197.2422332763672,
|
|
"KL/rejected_KL_mean": -222.33236694335938,
|
|
"KL/std": 106.96368408203125,
|
|
"epoch": 0.781557067271353,
|
|
"fcm_dpo/beta": 0.005388031248003244,
|
|
"fcm_dpo/delta": 0.03654252737760544,
|
|
"fcm_dpo/margin": 50.18029022216797,
|
|
"fcm_dpo/q_t": 0.43927669525146484,
|
|
"grad_norm": 14.144505500793457,
|
|
"learning_rate": 6.998145243993284e-08,
|
|
"logits/chosen": 0.7997580766677856,
|
|
"logits/rejected": 0.8024989366531372,
|
|
"logps/chosen": -233.92723083496094,
|
|
"logps/ref_chosen": -61.775142669677734,
|
|
"logps/ref_rejected": -62.88270950317383,
|
|
"logps/rejected": -285.215087890625,
|
|
"loss": 1.2188,
|
|
"margin_dpo/margin_mean": 50.18029022216797,
|
|
"margin_dpo/margin_std": 111.07633972167969,
|
|
"step": 517
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.79779052734375,
|
|
"KL/mean": -182.67062377929688,
|
|
"KL/rejected_KL_mean": -218.54345703125,
|
|
"KL/std": 104.68883514404297,
|
|
"epoch": 0.783068783068783,
|
|
"fcm_dpo/beta": 0.005403092131018639,
|
|
"fcm_dpo/delta": 0.012595370411872864,
|
|
"fcm_dpo/margin": 71.74566650390625,
|
|
"fcm_dpo/q_t": 0.41405540704727173,
|
|
"grad_norm": 12.186388969421387,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 0.7632776498794556,
|
|
"logits/rejected": 0.7231289148330688,
|
|
"logps/chosen": -208.82302856445312,
|
|
"logps/ref_chosen": -62.02523422241211,
|
|
"logps/ref_rejected": -79.06085205078125,
|
|
"logps/rejected": -297.60430908203125,
|
|
"loss": 1.1293,
|
|
"margin_dpo/margin_mean": 71.74566650390625,
|
|
"margin_dpo/margin_std": 118.25212097167969,
|
|
"step": 518
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -175.76966857910156,
|
|
"KL/mean": -196.76345825195312,
|
|
"KL/rejected_KL_mean": -217.75721740722656,
|
|
"KL/std": 106.1201171875,
|
|
"epoch": 0.7845804988662132,
|
|
"fcm_dpo/beta": 0.005423480644822121,
|
|
"fcm_dpo/delta": 0.0693359300494194,
|
|
"fcm_dpo/margin": 41.98755645751953,
|
|
"fcm_dpo/q_t": 0.44785207509994507,
|
|
"grad_norm": 18.86286735534668,
|
|
"learning_rate": 6.815658960673781e-08,
|
|
"logits/chosen": 0.7679111957550049,
|
|
"logits/rejected": 0.7231424450874329,
|
|
"logps/chosen": -237.37603759765625,
|
|
"logps/ref_chosen": -61.60636901855469,
|
|
"logps/ref_rejected": -74.50727844238281,
|
|
"logps/rejected": -292.2644958496094,
|
|
"loss": 1.3065,
|
|
"margin_dpo/margin_mean": 41.98755645751953,
|
|
"margin_dpo/margin_std": 139.41989135742188,
|
|
"step": 519
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.29348754882812,
|
|
"KL/mean": -193.04122924804688,
|
|
"KL/rejected_KL_mean": -224.78900146484375,
|
|
"KL/std": 106.16980743408203,
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.005449650809168816,
|
|
"fcm_dpo/delta": 0.05476709082722664,
|
|
"fcm_dpo/margin": 63.495513916015625,
|
|
"fcm_dpo/q_t": 0.4222913384437561,
|
|
"grad_norm": 14.424744606018066,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 0.8182486295700073,
|
|
"logits/rejected": 0.7657685279846191,
|
|
"logps/chosen": -224.16690063476562,
|
|
"logps/ref_chosen": -62.87343215942383,
|
|
"logps/ref_rejected": -76.505615234375,
|
|
"logps/rejected": -301.29461669921875,
|
|
"loss": 1.1541,
|
|
"margin_dpo/margin_mean": 63.495513916015625,
|
|
"margin_dpo/margin_std": 108.85906982421875,
|
|
"step": 520
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -159.10711669921875,
|
|
"KL/mean": -204.9796142578125,
|
|
"KL/rejected_KL_mean": -250.85211181640625,
|
|
"KL/std": 106.46446990966797,
|
|
"epoch": 0.7876039304610734,
|
|
"fcm_dpo/beta": 0.0054365498945117,
|
|
"fcm_dpo/delta": -0.1012413501739502,
|
|
"fcm_dpo/margin": 91.74497985839844,
|
|
"fcm_dpo/q_t": 0.3887580633163452,
|
|
"grad_norm": 12.211526870727539,
|
|
"learning_rate": 6.63520728356167e-08,
|
|
"logits/chosen": 0.6779965162277222,
|
|
"logits/rejected": 0.6061267256736755,
|
|
"logps/chosen": -223.3137969970703,
|
|
"logps/ref_chosen": -64.20668029785156,
|
|
"logps/ref_rejected": -92.28083038330078,
|
|
"logps/rejected": -343.1329345703125,
|
|
"loss": 1.0433,
|
|
"margin_dpo/margin_mean": 91.74498748779297,
|
|
"margin_dpo/margin_std": 119.9051513671875,
|
|
"step": 521
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.3887939453125,
|
|
"KL/mean": -188.32659912109375,
|
|
"KL/rejected_KL_mean": -215.26438903808594,
|
|
"KL/std": 105.62191772460938,
|
|
"epoch": 0.7891156462585034,
|
|
"fcm_dpo/beta": 0.005454981699585915,
|
|
"fcm_dpo/delta": 0.10776165127754211,
|
|
"fcm_dpo/margin": 53.87560272216797,
|
|
"fcm_dpo/q_t": 0.43405523896217346,
|
|
"grad_norm": 14.450811386108398,
|
|
"learning_rate": 6.545750740770336e-08,
|
|
"logits/chosen": 0.7721718549728394,
|
|
"logits/rejected": 0.7647619247436523,
|
|
"logps/chosen": -219.75851440429688,
|
|
"logps/ref_chosen": -58.369720458984375,
|
|
"logps/ref_rejected": -68.79248046875,
|
|
"logps/rejected": -284.056884765625,
|
|
"loss": 1.2465,
|
|
"margin_dpo/margin_mean": 53.87559509277344,
|
|
"margin_dpo/margin_std": 137.96524047851562,
|
|
"step": 522
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.62039184570312,
|
|
"KL/mean": -198.57485961914062,
|
|
"KL/rejected_KL_mean": -231.52932739257812,
|
|
"KL/std": 107.88241577148438,
|
|
"epoch": 0.7906273620559335,
|
|
"fcm_dpo/beta": 0.005490235518664122,
|
|
"fcm_dpo/delta": 0.03885466977953911,
|
|
"fcm_dpo/margin": 65.908935546875,
|
|
"fcm_dpo/q_t": 0.4159238934516907,
|
|
"grad_norm": 14.94658088684082,
|
|
"learning_rate": 6.456810403001012e-08,
|
|
"logits/chosen": 0.7408667802810669,
|
|
"logits/rejected": 0.6249905824661255,
|
|
"logps/chosen": -231.3336181640625,
|
|
"logps/ref_chosen": -65.71324157714844,
|
|
"logps/ref_rejected": -91.98896789550781,
|
|
"logps/rejected": -323.518310546875,
|
|
"loss": 1.1608,
|
|
"margin_dpo/margin_mean": 65.90894317626953,
|
|
"margin_dpo/margin_std": 121.20352172851562,
|
|
"step": 523
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -144.1225128173828,
|
|
"KL/mean": -178.475830078125,
|
|
"KL/rejected_KL_mean": -212.82916259765625,
|
|
"KL/std": 102.94498443603516,
|
|
"epoch": 0.7921390778533636,
|
|
"fcm_dpo/beta": 0.0055147213861346245,
|
|
"fcm_dpo/delta": 0.021370386704802513,
|
|
"fcm_dpo/margin": 68.7066421508789,
|
|
"fcm_dpo/q_t": 0.4128548502922058,
|
|
"grad_norm": 13.837875366210938,
|
|
"learning_rate": 6.368388758106134e-08,
|
|
"logits/chosen": 0.6644772291183472,
|
|
"logits/rejected": 0.6428880095481873,
|
|
"logps/chosen": -220.4737548828125,
|
|
"logps/ref_chosen": -76.35124969482422,
|
|
"logps/ref_rejected": -89.96072387695312,
|
|
"logps/rejected": -302.7898864746094,
|
|
"loss": 1.1207,
|
|
"margin_dpo/margin_mean": 68.70664978027344,
|
|
"margin_dpo/margin_std": 104.10082244873047,
|
|
"step": 524
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -164.4867706298828,
|
|
"KL/mean": -191.9500732421875,
|
|
"KL/rejected_KL_mean": -219.41336059570312,
|
|
"KL/std": 101.97250366210938,
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.00554524315521121,
|
|
"fcm_dpo/delta": 0.0969410240650177,
|
|
"fcm_dpo/margin": 54.92658233642578,
|
|
"fcm_dpo/q_t": 0.4324960708618164,
|
|
"grad_norm": 18.944414138793945,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.5748239755630493,
|
|
"logits/rejected": 0.5724896788597107,
|
|
"logps/chosen": -239.98257446289062,
|
|
"logps/ref_chosen": -75.49578857421875,
|
|
"logps/ref_rejected": -84.04852294921875,
|
|
"logps/rejected": -303.4618835449219,
|
|
"loss": 1.2089,
|
|
"margin_dpo/margin_mean": 54.92658233642578,
|
|
"margin_dpo/margin_std": 121.0759506225586,
|
|
"step": 525
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -170.76470947265625,
|
|
"KL/mean": -196.333740234375,
|
|
"KL/rejected_KL_mean": -221.9027862548828,
|
|
"KL/std": 104.82849884033203,
|
|
"epoch": 0.7951625094482238,
|
|
"fcm_dpo/beta": 0.005551319103688002,
|
|
"fcm_dpo/delta": -0.020082008093595505,
|
|
"fcm_dpo/margin": 51.138092041015625,
|
|
"fcm_dpo/q_t": 0.4350159764289856,
|
|
"grad_norm": 14.094649314880371,
|
|
"learning_rate": 6.193111425735515e-08,
|
|
"logits/chosen": 0.7870807647705078,
|
|
"logits/rejected": 0.7190835475921631,
|
|
"logps/chosen": -232.05712890625,
|
|
"logps/ref_chosen": -61.29241943359375,
|
|
"logps/ref_rejected": -82.47763061523438,
|
|
"logps/rejected": -304.38043212890625,
|
|
"loss": 1.2111,
|
|
"margin_dpo/margin_mean": 51.138092041015625,
|
|
"margin_dpo/margin_std": 107.18817138671875,
|
|
"step": 526
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -178.09033203125,
|
|
"KL/mean": -200.3950958251953,
|
|
"KL/rejected_KL_mean": -222.69985961914062,
|
|
"KL/std": 103.8236312866211,
|
|
"epoch": 0.7966742252456538,
|
|
"fcm_dpo/beta": 0.0055601890198886395,
|
|
"fcm_dpo/delta": 0.03190518543124199,
|
|
"fcm_dpo/margin": 44.60955047607422,
|
|
"fcm_dpo/q_t": 0.4445539712905884,
|
|
"grad_norm": 15.526667594909668,
|
|
"learning_rate": 6.106260641143546e-08,
|
|
"logits/chosen": 0.7976419925689697,
|
|
"logits/rejected": 0.7184303998947144,
|
|
"logps/chosen": -239.56295776367188,
|
|
"logps/ref_chosen": -61.472625732421875,
|
|
"logps/ref_rejected": -90.52831268310547,
|
|
"logps/rejected": -313.2281799316406,
|
|
"loss": 1.254,
|
|
"margin_dpo/margin_mean": 44.60955047607422,
|
|
"margin_dpo/margin_std": 116.43598937988281,
|
|
"step": 527
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.5458984375,
|
|
"KL/mean": -183.33978271484375,
|
|
"KL/rejected_KL_mean": -209.1336669921875,
|
|
"KL/std": 99.59080505371094,
|
|
"epoch": 0.7981859410430839,
|
|
"fcm_dpo/beta": 0.005608296953141689,
|
|
"fcm_dpo/delta": 0.1123834028840065,
|
|
"fcm_dpo/margin": 51.58777618408203,
|
|
"fcm_dpo/q_t": 0.43541765213012695,
|
|
"grad_norm": 16.270727157592773,
|
|
"learning_rate": 6.019938355056422e-08,
|
|
"logits/chosen": 0.7032827734947205,
|
|
"logits/rejected": 0.6334577798843384,
|
|
"logps/chosen": -216.33790588378906,
|
|
"logps/ref_chosen": -58.792015075683594,
|
|
"logps/ref_rejected": -71.82516479492188,
|
|
"logps/rejected": -280.9588317871094,
|
|
"loss": 1.241,
|
|
"margin_dpo/margin_mean": 51.58777618408203,
|
|
"margin_dpo/margin_std": 127.14857482910156,
|
|
"step": 528
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.56332397460938,
|
|
"KL/mean": -201.77618408203125,
|
|
"KL/rejected_KL_mean": -251.989013671875,
|
|
"KL/std": 98.80380249023438,
|
|
"epoch": 0.799697656840514,
|
|
"fcm_dpo/beta": 0.005563225597143173,
|
|
"fcm_dpo/delta": -0.1632690727710724,
|
|
"fcm_dpo/margin": 100.42568969726562,
|
|
"fcm_dpo/q_t": 0.37246793508529663,
|
|
"grad_norm": 13.96048641204834,
|
|
"learning_rate": 5.934146982094049e-08,
|
|
"logits/chosen": 0.7107451558113098,
|
|
"logits/rejected": 0.660121738910675,
|
|
"logps/chosen": -206.63429260253906,
|
|
"logps/ref_chosen": -55.070960998535156,
|
|
"logps/ref_rejected": -75.44007873535156,
|
|
"logps/rejected": -327.4290771484375,
|
|
"loss": 0.9865,
|
|
"margin_dpo/margin_mean": 100.42569732666016,
|
|
"margin_dpo/margin_std": 107.61436462402344,
|
|
"step": 529
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.33047485351562,
|
|
"KL/mean": -185.0406494140625,
|
|
"KL/rejected_KL_mean": -217.75082397460938,
|
|
"KL/std": 105.76217651367188,
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.005553663708269596,
|
|
"fcm_dpo/delta": 0.037364620715379715,
|
|
"fcm_dpo/margin": 65.42034912109375,
|
|
"fcm_dpo/q_t": 0.4195675253868103,
|
|
"grad_norm": 17.086462020874023,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 0.7974028587341309,
|
|
"logits/rejected": 0.7539876699447632,
|
|
"logps/chosen": -209.07427978515625,
|
|
"logps/ref_chosen": -56.743812561035156,
|
|
"logps/ref_rejected": -76.6692123413086,
|
|
"logps/rejected": -294.4200439453125,
|
|
"loss": 1.1455,
|
|
"margin_dpo/margin_mean": 65.42034912109375,
|
|
"margin_dpo/margin_std": 110.92483520507812,
|
|
"step": 530
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.74856567382812,
|
|
"KL/mean": -185.8570098876953,
|
|
"KL/rejected_KL_mean": -219.96542358398438,
|
|
"KL/std": 104.23086547851562,
|
|
"epoch": 0.8027210884353742,
|
|
"fcm_dpo/beta": 0.00557300029322505,
|
|
"fcm_dpo/delta": 0.0201882291585207,
|
|
"fcm_dpo/margin": 68.21687316894531,
|
|
"fcm_dpo/q_t": 0.41433852910995483,
|
|
"grad_norm": 13.243087768554688,
|
|
"learning_rate": 5.7641665597021435e-08,
|
|
"logits/chosen": 0.7171991467475891,
|
|
"logits/rejected": 0.6416677236557007,
|
|
"logps/chosen": -202.86502075195312,
|
|
"logps/ref_chosen": -51.116455078125,
|
|
"logps/ref_rejected": -79.52884674072266,
|
|
"logps/rejected": -299.4942626953125,
|
|
"loss": 1.1333,
|
|
"margin_dpo/margin_mean": 68.21687316894531,
|
|
"margin_dpo/margin_std": 112.31360626220703,
|
|
"step": 531
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -176.72532653808594,
|
|
"KL/mean": -212.27444458007812,
|
|
"KL/rejected_KL_mean": -247.8235626220703,
|
|
"KL/std": 102.87384033203125,
|
|
"epoch": 0.8042328042328042,
|
|
"fcm_dpo/beta": 0.005574529990553856,
|
|
"fcm_dpo/delta": 0.0037346151657402515,
|
|
"fcm_dpo/margin": 71.09822845458984,
|
|
"fcm_dpo/q_t": 0.4103427529335022,
|
|
"grad_norm": 13.557002067565918,
|
|
"learning_rate": 5.679982264990424e-08,
|
|
"logits/chosen": 0.692879319190979,
|
|
"logits/rejected": 0.645154595375061,
|
|
"logps/chosen": -235.00527954101562,
|
|
"logps/ref_chosen": -58.279945373535156,
|
|
"logps/ref_rejected": -78.05426788330078,
|
|
"logps/rejected": -325.8778076171875,
|
|
"loss": 1.127,
|
|
"margin_dpo/margin_mean": 71.09822082519531,
|
|
"margin_dpo/margin_std": 116.00345611572266,
|
|
"step": 532
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -126.48112487792969,
|
|
"KL/mean": -166.22779846191406,
|
|
"KL/rejected_KL_mean": -205.97445678710938,
|
|
"KL/std": 109.5059814453125,
|
|
"epoch": 0.8057445200302343,
|
|
"fcm_dpo/beta": 0.0055624693632125854,
|
|
"fcm_dpo/delta": -0.0431300550699234,
|
|
"fcm_dpo/margin": 79.49333190917969,
|
|
"fcm_dpo/q_t": 0.40088561177253723,
|
|
"grad_norm": 13.765048027038574,
|
|
"learning_rate": 5.596338392706076e-08,
|
|
"logits/chosen": 0.8455485105514526,
|
|
"logits/rejected": 0.7852663993835449,
|
|
"logps/chosen": -182.89913940429688,
|
|
"logps/ref_chosen": -56.41801071166992,
|
|
"logps/ref_rejected": -73.89324951171875,
|
|
"logps/rejected": -279.8677062988281,
|
|
"loss": 1.0884,
|
|
"margin_dpo/margin_mean": 79.49333190917969,
|
|
"margin_dpo/margin_std": 115.32478332519531,
|
|
"step": 533
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.75350952148438,
|
|
"KL/mean": -190.13699340820312,
|
|
"KL/rejected_KL_mean": -222.52047729492188,
|
|
"KL/std": 101.15007019042969,
|
|
"epoch": 0.8072562358276644,
|
|
"fcm_dpo/beta": 0.005559004843235016,
|
|
"fcm_dpo/delta": 0.04065629094839096,
|
|
"fcm_dpo/margin": 64.76697540283203,
|
|
"fcm_dpo/q_t": 0.41991138458251953,
|
|
"grad_norm": 13.056085586547852,
|
|
"learning_rate": 5.513237282548033e-08,
|
|
"logits/chosen": 0.7214757204055786,
|
|
"logits/rejected": 0.6887925863265991,
|
|
"logps/chosen": -218.502197265625,
|
|
"logps/ref_chosen": -60.748687744140625,
|
|
"logps/ref_rejected": -73.8623046875,
|
|
"logps/rejected": -296.3827819824219,
|
|
"loss": 1.1673,
|
|
"margin_dpo/margin_mean": 64.76697540283203,
|
|
"margin_dpo/margin_std": 122.95958709716797,
|
|
"step": 534
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -168.16741943359375,
|
|
"KL/mean": -198.52764892578125,
|
|
"KL/rejected_KL_mean": -228.88787841796875,
|
|
"KL/std": 108.06431579589844,
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.005603575613349676,
|
|
"fcm_dpo/delta": 0.060702014714479446,
|
|
"fcm_dpo/margin": 60.720481872558594,
|
|
"fcm_dpo/q_t": 0.424109548330307,
|
|
"grad_norm": 15.344324111938477,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 0.6816630959510803,
|
|
"logits/rejected": 0.6240689754486084,
|
|
"logps/chosen": -229.80482482910156,
|
|
"logps/ref_chosen": -61.637413024902344,
|
|
"logps/ref_rejected": -80.93138885498047,
|
|
"logps/rejected": -309.81927490234375,
|
|
"loss": 1.1799,
|
|
"margin_dpo/margin_mean": 60.720481872558594,
|
|
"margin_dpo/margin_std": 118.55729675292969,
|
|
"step": 535
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.31011962890625,
|
|
"KL/mean": -192.822998046875,
|
|
"KL/rejected_KL_mean": -239.33587646484375,
|
|
"KL/std": 107.46395874023438,
|
|
"epoch": 0.8102796674225246,
|
|
"fcm_dpo/beta": 0.00555273424834013,
|
|
"fcm_dpo/delta": -0.11976976692676544,
|
|
"fcm_dpo/margin": 93.02576446533203,
|
|
"fcm_dpo/q_t": 0.38322311639785767,
|
|
"grad_norm": 10.78260326385498,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 0.7982736825942993,
|
|
"logits/rejected": 0.7178894281387329,
|
|
"logps/chosen": -198.1990966796875,
|
|
"logps/ref_chosen": -51.88897705078125,
|
|
"logps/ref_rejected": -73.34864044189453,
|
|
"logps/rejected": -312.68450927734375,
|
|
"loss": 1.0115,
|
|
"margin_dpo/margin_mean": 93.02577209472656,
|
|
"margin_dpo/margin_std": 104.45921325683594,
|
|
"step": 536
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.74179077148438,
|
|
"KL/mean": -205.87986755371094,
|
|
"KL/rejected_KL_mean": -246.01795959472656,
|
|
"KL/std": 115.47622680664062,
|
|
"epoch": 0.8117913832199547,
|
|
"fcm_dpo/beta": 0.005515134893357754,
|
|
"fcm_dpo/delta": -0.04383276030421257,
|
|
"fcm_dpo/margin": 80.27616119384766,
|
|
"fcm_dpo/q_t": 0.40226906538009644,
|
|
"grad_norm": 13.362234115600586,
|
|
"learning_rate": 5.267213693697695e-08,
|
|
"logits/chosen": 0.8222005367279053,
|
|
"logits/rejected": 0.7345283627510071,
|
|
"logps/chosen": -219.9904022216797,
|
|
"logps/ref_chosen": -54.248619079589844,
|
|
"logps/ref_rejected": -94.94343566894531,
|
|
"logps/rejected": -340.9613952636719,
|
|
"loss": 1.102,
|
|
"margin_dpo/margin_mean": 80.27616882324219,
|
|
"margin_dpo/margin_std": 125.26017761230469,
|
|
"step": 537
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.9024658203125,
|
|
"KL/mean": -205.28594970703125,
|
|
"KL/rejected_KL_mean": -244.66944885253906,
|
|
"KL/std": 112.10774993896484,
|
|
"epoch": 0.8133030990173847,
|
|
"fcm_dpo/beta": 0.005506400018930435,
|
|
"fcm_dpo/delta": -0.0344671905040741,
|
|
"fcm_dpo/margin": 78.76699829101562,
|
|
"fcm_dpo/q_t": 0.4018894135951996,
|
|
"grad_norm": 13.904546737670898,
|
|
"learning_rate": 5.1863067244167144e-08,
|
|
"logits/chosen": 0.7492853403091431,
|
|
"logits/rejected": 0.7276612520217896,
|
|
"logps/chosen": -235.99600219726562,
|
|
"logps/ref_chosen": -70.09353637695312,
|
|
"logps/ref_rejected": -79.49833679199219,
|
|
"logps/rejected": -324.16778564453125,
|
|
"loss": 1.0865,
|
|
"margin_dpo/margin_mean": 78.76699829101562,
|
|
"margin_dpo/margin_std": 112.43721008300781,
|
|
"step": 538
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -171.10504150390625,
|
|
"KL/mean": -201.38827514648438,
|
|
"KL/rejected_KL_mean": -231.67153930664062,
|
|
"KL/std": 105.21803283691406,
|
|
"epoch": 0.8148148148148148,
|
|
"fcm_dpo/beta": 0.005520418286323547,
|
|
"fcm_dpo/delta": 0.06679143756628036,
|
|
"fcm_dpo/margin": 60.566490173339844,
|
|
"fcm_dpo/q_t": 0.4265446960926056,
|
|
"grad_norm": 14.400826454162598,
|
|
"learning_rate": 5.105953986729195e-08,
|
|
"logits/chosen": 0.7303096652030945,
|
|
"logits/rejected": 0.649269700050354,
|
|
"logps/chosen": -233.0367431640625,
|
|
"logps/ref_chosen": -61.93169403076172,
|
|
"logps/ref_rejected": -84.08946228027344,
|
|
"logps/rejected": -315.760986328125,
|
|
"loss": 1.1659,
|
|
"margin_dpo/margin_mean": 60.566490173339844,
|
|
"margin_dpo/margin_std": 111.43452453613281,
|
|
"step": 539
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.5883026123047,
|
|
"KL/mean": -204.3798828125,
|
|
"KL/rejected_KL_mean": -251.17144775390625,
|
|
"KL/std": 119.66874694824219,
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.0054971762001514435,
|
|
"fcm_dpo/delta": -0.11739315837621689,
|
|
"fcm_dpo/margin": 93.58316040039062,
|
|
"fcm_dpo/q_t": 0.383919358253479,
|
|
"grad_norm": 12.504426956176758,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 0.7808132171630859,
|
|
"logits/rejected": 0.6928094625473022,
|
|
"logps/chosen": -220.29254150390625,
|
|
"logps/ref_chosen": -62.704254150390625,
|
|
"logps/ref_rejected": -95.63597106933594,
|
|
"logps/rejected": -346.80743408203125,
|
|
"loss": 1.0269,
|
|
"margin_dpo/margin_mean": 93.58316040039062,
|
|
"margin_dpo/margin_std": 113.10142517089844,
|
|
"step": 540
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.55435180664062,
|
|
"KL/mean": -192.2169189453125,
|
|
"KL/rejected_KL_mean": -230.87948608398438,
|
|
"KL/std": 105.19171905517578,
|
|
"epoch": 0.817838246409675,
|
|
"fcm_dpo/beta": 0.0054497793316841125,
|
|
"fcm_dpo/delta": -0.0220711100846529,
|
|
"fcm_dpo/margin": 77.32511901855469,
|
|
"fcm_dpo/q_t": 0.4023526608943939,
|
|
"grad_norm": 12.606244087219238,
|
|
"learning_rate": 4.9469201811239035e-08,
|
|
"logits/chosen": 0.7935340404510498,
|
|
"logits/rejected": 0.8222974538803101,
|
|
"logps/chosen": -216.03518676757812,
|
|
"logps/ref_chosen": -62.48084259033203,
|
|
"logps/ref_rejected": -57.55541229248047,
|
|
"logps/rejected": -288.43487548828125,
|
|
"loss": 1.0832,
|
|
"margin_dpo/margin_mean": 77.32511138916016,
|
|
"margin_dpo/margin_std": 101.81745910644531,
|
|
"step": 541
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -133.76834106445312,
|
|
"KL/mean": -176.443115234375,
|
|
"KL/rejected_KL_mean": -219.11790466308594,
|
|
"KL/std": 100.40478515625,
|
|
"epoch": 0.8193499622071051,
|
|
"fcm_dpo/beta": 0.005427202209830284,
|
|
"fcm_dpo/delta": -0.06477969884872437,
|
|
"fcm_dpo/margin": 85.34957885742188,
|
|
"fcm_dpo/q_t": 0.39474669098854065,
|
|
"grad_norm": 12.707962989807129,
|
|
"learning_rate": 4.868243561723534e-08,
|
|
"logits/chosen": 0.8016009330749512,
|
|
"logits/rejected": 0.7575170993804932,
|
|
"logps/chosen": -183.22323608398438,
|
|
"logps/ref_chosen": -49.454891204833984,
|
|
"logps/ref_rejected": -65.33275604248047,
|
|
"logps/rejected": -284.45068359375,
|
|
"loss": 1.078,
|
|
"margin_dpo/margin_mean": 85.34957885742188,
|
|
"margin_dpo/margin_std": 122.28993225097656,
|
|
"step": 542
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -145.21658325195312,
|
|
"KL/mean": -187.30999755859375,
|
|
"KL/rejected_KL_mean": -229.40341186523438,
|
|
"KL/std": 103.92768859863281,
|
|
"epoch": 0.8208616780045351,
|
|
"fcm_dpo/beta": 0.005407546181231737,
|
|
"fcm_dpo/delta": -0.056535687297582626,
|
|
"fcm_dpo/margin": 84.18681335449219,
|
|
"fcm_dpo/q_t": 0.39622536301612854,
|
|
"grad_norm": 10.859258651733398,
|
|
"learning_rate": 4.790130070827028e-08,
|
|
"logits/chosen": 0.8015737533569336,
|
|
"logits/rejected": 0.7174701690673828,
|
|
"logps/chosen": -196.31744384765625,
|
|
"logps/ref_chosen": -51.100860595703125,
|
|
"logps/ref_rejected": -76.06130981445312,
|
|
"logps/rejected": -305.4647216796875,
|
|
"loss": 1.0724,
|
|
"margin_dpo/margin_mean": 84.18682098388672,
|
|
"margin_dpo/margin_std": 113.26998138427734,
|
|
"step": 543
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -156.00685119628906,
|
|
"KL/mean": -204.41116333007812,
|
|
"KL/rejected_KL_mean": -252.81546020507812,
|
|
"KL/std": 114.34260559082031,
|
|
"epoch": 0.8223733938019653,
|
|
"fcm_dpo/beta": 0.005343287251889706,
|
|
"fcm_dpo/delta": -0.12039691209793091,
|
|
"fcm_dpo/margin": 96.80862426757812,
|
|
"fcm_dpo/q_t": 0.3851155638694763,
|
|
"grad_norm": 15.20737075805664,
|
|
"learning_rate": 4.7125818934366454e-08,
|
|
"logits/chosen": 0.7543131113052368,
|
|
"logits/rejected": 0.6815561056137085,
|
|
"logps/chosen": -216.28408813476562,
|
|
"logps/ref_chosen": -60.2772331237793,
|
|
"logps/ref_rejected": -88.40553283691406,
|
|
"logps/rejected": -341.22100830078125,
|
|
"loss": 1.0475,
|
|
"margin_dpo/margin_mean": 96.80862426757812,
|
|
"margin_dpo/margin_std": 132.04238891601562,
|
|
"step": 544
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -170.58294677734375,
|
|
"KL/mean": -197.3397979736328,
|
|
"KL/rejected_KL_mean": -224.09664916992188,
|
|
"KL/std": 106.12724304199219,
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.0053751831874251366,
|
|
"fcm_dpo/delta": 0.11393832415342331,
|
|
"fcm_dpo/margin": 53.513710021972656,
|
|
"fcm_dpo/q_t": 0.4343859553337097,
|
|
"grad_norm": 14.545745849609375,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 0.745832085609436,
|
|
"logits/rejected": 0.6867306232452393,
|
|
"logps/chosen": -232.19818115234375,
|
|
"logps/ref_chosen": -61.61524963378906,
|
|
"logps/ref_rejected": -78.71266174316406,
|
|
"logps/rejected": -302.8092956542969,
|
|
"loss": 1.2101,
|
|
"margin_dpo/margin_mean": 53.513710021972656,
|
|
"margin_dpo/margin_std": 114.78982543945312,
|
|
"step": 545
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.4920654296875,
|
|
"KL/mean": -191.81800842285156,
|
|
"KL/rejected_KL_mean": -222.1439208984375,
|
|
"KL/std": 100.21609497070312,
|
|
"epoch": 0.8253968253968254,
|
|
"fcm_dpo/beta": 0.005415499676018953,
|
|
"fcm_dpo/delta": 0.07275774329900742,
|
|
"fcm_dpo/margin": 60.65185546875,
|
|
"fcm_dpo/q_t": 0.4240911602973938,
|
|
"grad_norm": 17.701772689819336,
|
|
"learning_rate": 4.559190140057428e-08,
|
|
"logits/chosen": 0.8552579879760742,
|
|
"logits/rejected": 0.8509665131568909,
|
|
"logps/chosen": -220.80532836914062,
|
|
"logps/ref_chosen": -59.313262939453125,
|
|
"logps/ref_rejected": -64.73631286621094,
|
|
"logps/rejected": -286.8802490234375,
|
|
"loss": 1.1886,
|
|
"margin_dpo/margin_mean": 60.651851654052734,
|
|
"margin_dpo/margin_std": 122.34352111816406,
|
|
"step": 546
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -140.5301055908203,
|
|
"KL/mean": -185.026611328125,
|
|
"KL/rejected_KL_mean": -229.5231475830078,
|
|
"KL/std": 105.99732971191406,
|
|
"epoch": 0.8269085411942555,
|
|
"fcm_dpo/beta": 0.005385642871260643,
|
|
"fcm_dpo/delta": -0.08148474991321564,
|
|
"fcm_dpo/margin": 88.9930419921875,
|
|
"fcm_dpo/q_t": 0.3910977840423584,
|
|
"grad_norm": 12.845486640930176,
|
|
"learning_rate": 4.483350854765672e-08,
|
|
"logits/chosen": 0.6854957938194275,
|
|
"logits/rejected": 0.6286203861236572,
|
|
"logps/chosen": -195.50685119628906,
|
|
"logps/ref_chosen": -54.97674560546875,
|
|
"logps/ref_rejected": -75.35922241210938,
|
|
"logps/rejected": -304.88238525390625,
|
|
"loss": 1.0626,
|
|
"margin_dpo/margin_mean": 88.9930419921875,
|
|
"margin_dpo/margin_std": 121.67992401123047,
|
|
"step": 547
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -166.811279296875,
|
|
"KL/mean": -194.1539306640625,
|
|
"KL/rejected_KL_mean": -221.49659729003906,
|
|
"KL/std": 105.83686828613281,
|
|
"epoch": 0.8284202569916855,
|
|
"fcm_dpo/beta": 0.005432730540633202,
|
|
"fcm_dpo/delta": 0.10426247864961624,
|
|
"fcm_dpo/margin": 54.68531799316406,
|
|
"fcm_dpo/q_t": 0.43284255266189575,
|
|
"grad_norm": 16.044816970825195,
|
|
"learning_rate": 4.4080854642541826e-08,
|
|
"logits/chosen": 0.7021690607070923,
|
|
"logits/rejected": 0.6480152606964111,
|
|
"logps/chosen": -230.02194213867188,
|
|
"logps/ref_chosen": -63.21067428588867,
|
|
"logps/ref_rejected": -81.23347473144531,
|
|
"logps/rejected": -302.7300720214844,
|
|
"loss": 1.2023,
|
|
"margin_dpo/margin_mean": 54.685325622558594,
|
|
"margin_dpo/margin_std": 114.0304946899414,
|
|
"step": 548
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -164.0994110107422,
|
|
"KL/mean": -197.63067626953125,
|
|
"KL/rejected_KL_mean": -231.16192626953125,
|
|
"KL/std": 115.47174835205078,
|
|
"epoch": 0.8299319727891157,
|
|
"fcm_dpo/beta": 0.005443481728434563,
|
|
"fcm_dpo/delta": 0.03550173342227936,
|
|
"fcm_dpo/margin": 67.06251525878906,
|
|
"fcm_dpo/q_t": 0.41986796259880066,
|
|
"grad_norm": 16.801313400268555,
|
|
"learning_rate": 4.333396073857723e-08,
|
|
"logits/chosen": 0.8281651735305786,
|
|
"logits/rejected": 0.7629342079162598,
|
|
"logps/chosen": -228.3729248046875,
|
|
"logps/ref_chosen": -64.27351379394531,
|
|
"logps/ref_rejected": -92.31663513183594,
|
|
"logps/rejected": -323.47857666015625,
|
|
"loss": 1.181,
|
|
"margin_dpo/margin_mean": 67.06251525878906,
|
|
"margin_dpo/margin_std": 136.05055236816406,
|
|
"step": 549
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -167.90005493164062,
|
|
"KL/mean": -191.40850830078125,
|
|
"KL/rejected_KL_mean": -214.9169921875,
|
|
"KL/std": 102.388427734375,
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.005469627678394318,
|
|
"fcm_dpo/delta": 0.018692484125494957,
|
|
"fcm_dpo/margin": 47.01694107055664,
|
|
"fcm_dpo/q_t": 0.4415434002876282,
|
|
"grad_norm": 15.141081809997559,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 0.7710243463516235,
|
|
"logits/rejected": 0.7470707893371582,
|
|
"logps/chosen": -224.1304931640625,
|
|
"logps/ref_chosen": -56.230438232421875,
|
|
"logps/ref_rejected": -62.59788513183594,
|
|
"logps/rejected": -277.5148620605469,
|
|
"loss": 1.2349,
|
|
"margin_dpo/margin_mean": 47.01694107055664,
|
|
"margin_dpo/margin_std": 110.23169708251953,
|
|
"step": 550
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -172.0569305419922,
|
|
"KL/mean": -202.14736938476562,
|
|
"KL/rejected_KL_mean": -232.23782348632812,
|
|
"KL/std": 111.20361328125,
|
|
"epoch": 0.8329554043839759,
|
|
"fcm_dpo/beta": 0.005502756219357252,
|
|
"fcm_dpo/delta": 0.06991526484489441,
|
|
"fcm_dpo/margin": 60.1808967590332,
|
|
"fcm_dpo/q_t": 0.42772114276885986,
|
|
"grad_norm": 14.024113655090332,
|
|
"learning_rate": 4.1857536341307176e-08,
|
|
"logits/chosen": 0.7946928143501282,
|
|
"logits/rejected": 0.7632230520248413,
|
|
"logps/chosen": -239.80413818359375,
|
|
"logps/ref_chosen": -67.74720764160156,
|
|
"logps/ref_rejected": -87.04285430908203,
|
|
"logps/rejected": -319.28070068359375,
|
|
"loss": 1.1639,
|
|
"margin_dpo/margin_mean": 60.18090057373047,
|
|
"margin_dpo/margin_std": 110.46688842773438,
|
|
"step": 551
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -159.5831298828125,
|
|
"KL/mean": -193.7672119140625,
|
|
"KL/rejected_KL_mean": -227.95132446289062,
|
|
"KL/std": 111.20988464355469,
|
|
"epoch": 0.8344671201814059,
|
|
"fcm_dpo/beta": 0.005529999267309904,
|
|
"fcm_dpo/delta": 0.022074926644563675,
|
|
"fcm_dpo/margin": 68.36819458007812,
|
|
"fcm_dpo/q_t": 0.41193437576293945,
|
|
"grad_norm": 17.451871871948242,
|
|
"learning_rate": 4.112804714676593e-08,
|
|
"logits/chosen": 0.7318317890167236,
|
|
"logits/rejected": 0.681510329246521,
|
|
"logps/chosen": -222.50936889648438,
|
|
"logps/ref_chosen": -62.92625427246094,
|
|
"logps/ref_rejected": -82.98365783691406,
|
|
"logps/rejected": -310.9349670410156,
|
|
"loss": 1.1228,
|
|
"margin_dpo/margin_mean": 68.3681869506836,
|
|
"margin_dpo/margin_std": 103.78141021728516,
|
|
"step": 552
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -172.37855529785156,
|
|
"KL/mean": -206.55819702148438,
|
|
"KL/rejected_KL_mean": -240.73782348632812,
|
|
"KL/std": 109.01435852050781,
|
|
"epoch": 0.8359788359788359,
|
|
"fcm_dpo/beta": 0.005528800189495087,
|
|
"fcm_dpo/delta": 0.02248218283057213,
|
|
"fcm_dpo/margin": 68.35926818847656,
|
|
"fcm_dpo/q_t": 0.4174407124519348,
|
|
"grad_norm": 14.270644187927246,
|
|
"learning_rate": 4.0404400549748144e-08,
|
|
"logits/chosen": 0.7643309831619263,
|
|
"logits/rejected": 0.6657716035842896,
|
|
"logps/chosen": -228.41705322265625,
|
|
"logps/ref_chosen": -56.038490295410156,
|
|
"logps/ref_rejected": -84.48454284667969,
|
|
"logps/rejected": -325.22235107421875,
|
|
"loss": 1.1615,
|
|
"margin_dpo/margin_mean": 68.35926818847656,
|
|
"margin_dpo/margin_std": 130.02491760253906,
|
|
"step": 553
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -155.05990600585938,
|
|
"KL/mean": -195.98202514648438,
|
|
"KL/rejected_KL_mean": -236.9041748046875,
|
|
"KL/std": 104.36518859863281,
|
|
"epoch": 0.8374905517762661,
|
|
"fcm_dpo/beta": 0.005510974675416946,
|
|
"fcm_dpo/delta": -0.05222010612487793,
|
|
"fcm_dpo/margin": 81.84425354003906,
|
|
"fcm_dpo/q_t": 0.3959125876426697,
|
|
"grad_norm": 13.632112503051758,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 0.7195841073989868,
|
|
"logits/rejected": 0.7111239433288574,
|
|
"logps/chosen": -219.59048461914062,
|
|
"logps/ref_chosen": -64.53059387207031,
|
|
"logps/ref_rejected": -71.2155990600586,
|
|
"logps/rejected": -308.1197814941406,
|
|
"loss": 1.0765,
|
|
"margin_dpo/margin_mean": 81.8442611694336,
|
|
"margin_dpo/margin_std": 112.41148376464844,
|
|
"step": 554
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -169.70193481445312,
|
|
"KL/mean": -200.88150024414062,
|
|
"KL/rejected_KL_mean": -232.06106567382812,
|
|
"KL/std": 104.07061767578125,
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.0054922583512961864,
|
|
"fcm_dpo/delta": -0.04197073355317116,
|
|
"fcm_dpo/margin": 62.35913848876953,
|
|
"fcm_dpo/q_t": 0.41978704929351807,
|
|
"grad_norm": 14.049398422241211,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 0.721834659576416,
|
|
"logits/rejected": 0.7036848664283752,
|
|
"logps/chosen": -236.35385131835938,
|
|
"logps/ref_chosen": -66.65191650390625,
|
|
"logps/ref_rejected": -68.6667251586914,
|
|
"logps/rejected": -300.727783203125,
|
|
"loss": 1.1892,
|
|
"margin_dpo/margin_mean": 62.359134674072266,
|
|
"margin_dpo/margin_std": 123.86202239990234,
|
|
"step": 555
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -167.73110961914062,
|
|
"KL/mean": -198.33973693847656,
|
|
"KL/rejected_KL_mean": -228.94833374023438,
|
|
"KL/std": 108.29742431640625,
|
|
"epoch": 0.8405139833711263,
|
|
"fcm_dpo/beta": 0.005501300562173128,
|
|
"fcm_dpo/delta": 0.06433117389678955,
|
|
"fcm_dpo/margin": 61.21722412109375,
|
|
"fcm_dpo/q_t": 0.4249955713748932,
|
|
"grad_norm": 13.985133171081543,
|
|
"learning_rate": 3.826871794280192e-08,
|
|
"logits/chosen": 0.8011499643325806,
|
|
"logits/rejected": 0.7572546005249023,
|
|
"logps/chosen": -220.5634765625,
|
|
"logps/ref_chosen": -52.832366943359375,
|
|
"logps/ref_rejected": -64.49044036865234,
|
|
"logps/rejected": -293.43878173828125,
|
|
"loss": 1.1834,
|
|
"margin_dpo/margin_mean": 61.21723175048828,
|
|
"margin_dpo/margin_std": 121.02392578125,
|
|
"step": 556
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.6449737548828,
|
|
"KL/mean": -209.26373291015625,
|
|
"KL/rejected_KL_mean": -252.88246154785156,
|
|
"KL/std": 106.13115692138672,
|
|
"epoch": 0.8420256991685563,
|
|
"fcm_dpo/beta": 0.005473933648318052,
|
|
"fcm_dpo/delta": -0.07968606054782867,
|
|
"fcm_dpo/margin": 87.23750305175781,
|
|
"fcm_dpo/q_t": 0.3914426565170288,
|
|
"grad_norm": 11.581623077392578,
|
|
"learning_rate": 3.756864251262143e-08,
|
|
"logits/chosen": 0.8539372086524963,
|
|
"logits/rejected": 0.782925009727478,
|
|
"logps/chosen": -220.68096923828125,
|
|
"logps/ref_chosen": -55.03598403930664,
|
|
"logps/ref_rejected": -75.80644989013672,
|
|
"logps/rejected": -328.68890380859375,
|
|
"loss": 1.0399,
|
|
"margin_dpo/margin_mean": 87.23750305175781,
|
|
"margin_dpo/margin_std": 102.06510162353516,
|
|
"step": 557
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.54786682128906,
|
|
"KL/mean": -201.2423858642578,
|
|
"KL/rejected_KL_mean": -248.93690490722656,
|
|
"KL/std": 113.41873931884766,
|
|
"epoch": 0.8435374149659864,
|
|
"fcm_dpo/beta": 0.005413743667304516,
|
|
"fcm_dpo/delta": -0.11984787881374359,
|
|
"fcm_dpo/margin": 95.38902282714844,
|
|
"fcm_dpo/q_t": 0.38484978675842285,
|
|
"grad_norm": 11.094016075134277,
|
|
"learning_rate": 3.687450924416341e-08,
|
|
"logits/chosen": 0.7985022664070129,
|
|
"logits/rejected": 0.7477705478668213,
|
|
"logps/chosen": -216.77423095703125,
|
|
"logps/ref_chosen": -63.226348876953125,
|
|
"logps/ref_rejected": -91.46881866455078,
|
|
"logps/rejected": -340.40570068359375,
|
|
"loss": 1.0284,
|
|
"margin_dpo/margin_mean": 95.38902282714844,
|
|
"margin_dpo/margin_std": 117.99440002441406,
|
|
"step": 558
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.86526489257812,
|
|
"KL/mean": -200.24497985839844,
|
|
"KL/rejected_KL_mean": -242.6247100830078,
|
|
"KL/std": 112.23207092285156,
|
|
"epoch": 0.8450491307634165,
|
|
"fcm_dpo/beta": 0.005368214100599289,
|
|
"fcm_dpo/delta": -0.05681581795215607,
|
|
"fcm_dpo/margin": 84.75942993164062,
|
|
"fcm_dpo/q_t": 0.4020443260669708,
|
|
"grad_norm": 11.214516639709473,
|
|
"learning_rate": 3.6186337553827743e-08,
|
|
"logits/chosen": 0.7290089726448059,
|
|
"logits/rejected": 0.6718306541442871,
|
|
"logps/chosen": -219.38690185546875,
|
|
"logps/ref_chosen": -61.521644592285156,
|
|
"logps/ref_rejected": -82.83859252929688,
|
|
"logps/rejected": -325.46331787109375,
|
|
"loss": 1.1014,
|
|
"margin_dpo/margin_mean": 84.75942993164062,
|
|
"margin_dpo/margin_std": 134.75906372070312,
|
|
"step": 559
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -172.03170776367188,
|
|
"KL/mean": -211.0216064453125,
|
|
"KL/rejected_KL_mean": -250.0115203857422,
|
|
"KL/std": 107.847412109375,
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.005380082409828901,
|
|
"fcm_dpo/delta": -0.020102493464946747,
|
|
"fcm_dpo/margin": 77.97981262207031,
|
|
"fcm_dpo/q_t": 0.40382474660873413,
|
|
"grad_norm": 16.135025024414062,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 0.755692720413208,
|
|
"logits/rejected": 0.7201418876647949,
|
|
"logps/chosen": -232.6729278564453,
|
|
"logps/ref_chosen": -60.64122009277344,
|
|
"logps/ref_rejected": -78.75474548339844,
|
|
"logps/rejected": -328.7662658691406,
|
|
"loss": 1.0824,
|
|
"margin_dpo/margin_mean": 77.97980499267578,
|
|
"margin_dpo/margin_std": 101.29356384277344,
|
|
"step": 560
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -156.94430541992188,
|
|
"KL/mean": -192.54425048828125,
|
|
"KL/rejected_KL_mean": -228.14418029785156,
|
|
"KL/std": 105.1910629272461,
|
|
"epoch": 0.8480725623582767,
|
|
"fcm_dpo/beta": 0.0053678411059081554,
|
|
"fcm_dpo/delta": 0.018137481063604355,
|
|
"fcm_dpo/margin": 71.19991302490234,
|
|
"fcm_dpo/q_t": 0.4145040512084961,
|
|
"grad_norm": 14.00090503692627,
|
|
"learning_rate": 3.482795573879241e-08,
|
|
"logits/chosen": 0.7281751036643982,
|
|
"logits/rejected": 0.6966254711151123,
|
|
"logps/chosen": -219.44290161132812,
|
|
"logps/ref_chosen": -62.49859619140625,
|
|
"logps/ref_rejected": -78.72064208984375,
|
|
"logps/rejected": -306.86480712890625,
|
|
"loss": 1.1295,
|
|
"margin_dpo/margin_mean": 71.19990539550781,
|
|
"margin_dpo/margin_std": 114.6388931274414,
|
|
"step": 561
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -168.30091857910156,
|
|
"KL/mean": -214.32786560058594,
|
|
"KL/rejected_KL_mean": -260.35479736328125,
|
|
"KL/std": 123.86079406738281,
|
|
"epoch": 0.8495842781557067,
|
|
"fcm_dpo/beta": 0.005321085918694735,
|
|
"fcm_dpo/delta": -0.09280488640069962,
|
|
"fcm_dpo/margin": 92.05390930175781,
|
|
"fcm_dpo/q_t": 0.39151978492736816,
|
|
"grad_norm": 17.36707878112793,
|
|
"learning_rate": 3.415778361095226e-08,
|
|
"logits/chosen": 0.7379502058029175,
|
|
"logits/rejected": 0.7081115245819092,
|
|
"logps/chosen": -243.08265686035156,
|
|
"logps/ref_chosen": -74.78173828125,
|
|
"logps/ref_rejected": -92.63499450683594,
|
|
"logps/rejected": -352.98980712890625,
|
|
"loss": 1.0494,
|
|
"margin_dpo/margin_mean": 92.05390167236328,
|
|
"margin_dpo/margin_std": 119.15115356445312,
|
|
"step": 562
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -139.54893493652344,
|
|
"KL/mean": -178.90478515625,
|
|
"KL/rejected_KL_mean": -218.26065063476562,
|
|
"KL/std": 94.62449645996094,
|
|
"epoch": 0.8510959939531368,
|
|
"fcm_dpo/beta": 0.005318550858646631,
|
|
"fcm_dpo/delta": -0.01902971789240837,
|
|
"fcm_dpo/margin": 78.71174621582031,
|
|
"fcm_dpo/q_t": 0.40383607149124146,
|
|
"grad_norm": 19.003753662109375,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": 0.8296000957489014,
|
|
"logits/rejected": 0.7802062034606934,
|
|
"logps/chosen": -189.7474365234375,
|
|
"logps/ref_chosen": -50.19850158691406,
|
|
"logps/ref_rejected": -66.76687622070312,
|
|
"logps/rejected": -285.02752685546875,
|
|
"loss": 1.107,
|
|
"margin_dpo/margin_mean": 78.71174621582031,
|
|
"margin_dpo/margin_std": 120.09756469726562,
|
|
"step": 563
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -141.2943115234375,
|
|
"KL/mean": -190.6590576171875,
|
|
"KL/rejected_KL_mean": -240.0238037109375,
|
|
"KL/std": 102.90581512451172,
|
|
"epoch": 0.8526077097505669,
|
|
"fcm_dpo/beta": 0.005265322048217058,
|
|
"fcm_dpo/delta": -0.12308531999588013,
|
|
"fcm_dpo/margin": 98.7294921875,
|
|
"fcm_dpo/q_t": 0.38137996196746826,
|
|
"grad_norm": 12.049213409423828,
|
|
"learning_rate": 3.283557064487785e-08,
|
|
"logits/chosen": 0.7457233667373657,
|
|
"logits/rejected": 0.7183442115783691,
|
|
"logps/chosen": -197.03515625,
|
|
"logps/ref_chosen": -55.7408447265625,
|
|
"logps/ref_rejected": -74.82323455810547,
|
|
"logps/rejected": -314.8470458984375,
|
|
"loss": 1.0297,
|
|
"margin_dpo/margin_mean": 98.7294921875,
|
|
"margin_dpo/margin_std": 122.86710357666016,
|
|
"step": 564
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -172.18325805664062,
|
|
"KL/mean": -208.79727172851562,
|
|
"KL/rejected_KL_mean": -245.41128540039062,
|
|
"KL/std": 107.77508544921875,
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.005266151856631041,
|
|
"fcm_dpo/delta": 0.014484787359833717,
|
|
"fcm_dpo/margin": 73.22801208496094,
|
|
"fcm_dpo/q_t": 0.4109097421169281,
|
|
"grad_norm": 14.099479675292969,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 0.8104864358901978,
|
|
"logits/rejected": 0.7604826092720032,
|
|
"logps/chosen": -230.52066040039062,
|
|
"logps/ref_chosen": -58.33738327026367,
|
|
"logps/ref_rejected": -78.31776428222656,
|
|
"logps/rejected": -323.72906494140625,
|
|
"loss": 1.1122,
|
|
"margin_dpo/margin_mean": 73.22801208496094,
|
|
"margin_dpo/margin_std": 106.36097717285156,
|
|
"step": 565
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -162.88836669921875,
|
|
"KL/mean": -196.37548828125,
|
|
"KL/rejected_KL_mean": -229.86260986328125,
|
|
"KL/std": 111.643310546875,
|
|
"epoch": 0.8556311413454271,
|
|
"fcm_dpo/beta": 0.005288278684020042,
|
|
"fcm_dpo/delta": 0.046343594789505005,
|
|
"fcm_dpo/margin": 66.9742431640625,
|
|
"fcm_dpo/q_t": 0.42172494530677795,
|
|
"grad_norm": 13.346035957336426,
|
|
"learning_rate": 3.1537655732553764e-08,
|
|
"logits/chosen": 0.7527545690536499,
|
|
"logits/rejected": 0.7446944713592529,
|
|
"logps/chosen": -234.1121063232422,
|
|
"logps/ref_chosen": -71.22373962402344,
|
|
"logps/ref_rejected": -71.11601257324219,
|
|
"logps/rejected": -300.9786376953125,
|
|
"loss": 1.1904,
|
|
"margin_dpo/margin_mean": 66.9742431640625,
|
|
"margin_dpo/margin_std": 139.96917724609375,
|
|
"step": 566
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -154.48211669921875,
|
|
"KL/mean": -196.6006622314453,
|
|
"KL/rejected_KL_mean": -238.71920776367188,
|
|
"KL/std": 105.12157440185547,
|
|
"epoch": 0.8571428571428571,
|
|
"fcm_dpo/beta": 0.005248716101050377,
|
|
"fcm_dpo/delta": -0.04363976791501045,
|
|
"fcm_dpo/margin": 84.23709106445312,
|
|
"fcm_dpo/q_t": 0.39844948053359985,
|
|
"grad_norm": 11.927016258239746,
|
|
"learning_rate": 3.089785553471233e-08,
|
|
"logits/chosen": 0.7675319314002991,
|
|
"logits/rejected": 0.6852154731750488,
|
|
"logps/chosen": -207.15139770507812,
|
|
"logps/ref_chosen": -52.669273376464844,
|
|
"logps/ref_rejected": -74.34785461425781,
|
|
"logps/rejected": -313.06707763671875,
|
|
"loss": 1.0777,
|
|
"margin_dpo/margin_mean": 84.23709106445312,
|
|
"margin_dpo/margin_std": 112.46174621582031,
|
|
"step": 567
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -142.3792724609375,
|
|
"KL/mean": -193.59828186035156,
|
|
"KL/rejected_KL_mean": -244.8173065185547,
|
|
"KL/std": 112.16947937011719,
|
|
"epoch": 0.8586545729402872,
|
|
"fcm_dpo/beta": 0.005223691463470459,
|
|
"fcm_dpo/delta": -0.13882620632648468,
|
|
"fcm_dpo/margin": 102.43801879882812,
|
|
"fcm_dpo/q_t": 0.3804175853729248,
|
|
"grad_norm": 15.164554595947266,
|
|
"learning_rate": 3.026418409484513e-08,
|
|
"logits/chosen": 0.7956768274307251,
|
|
"logits/rejected": 0.7187240719795227,
|
|
"logps/chosen": -194.55728149414062,
|
|
"logps/ref_chosen": -52.178001403808594,
|
|
"logps/ref_rejected": -85.8277587890625,
|
|
"logps/rejected": -330.64508056640625,
|
|
"loss": 1.0066,
|
|
"margin_dpo/margin_mean": 102.4380111694336,
|
|
"margin_dpo/margin_std": 113.28675842285156,
|
|
"step": 568
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -168.2152862548828,
|
|
"KL/mean": -193.2032928466797,
|
|
"KL/rejected_KL_mean": -218.1912841796875,
|
|
"KL/std": 112.74418640136719,
|
|
"epoch": 0.8601662887377173,
|
|
"fcm_dpo/beta": 0.005182644352316856,
|
|
"fcm_dpo/delta": -0.008694237098097801,
|
|
"fcm_dpo/margin": 49.97599792480469,
|
|
"fcm_dpo/q_t": 0.44116294384002686,
|
|
"grad_norm": 15.202239036560059,
|
|
"learning_rate": 2.963665913810451e-08,
|
|
"logits/chosen": 0.6939299702644348,
|
|
"logits/rejected": 0.6641807556152344,
|
|
"logps/chosen": -230.8645477294922,
|
|
"logps/ref_chosen": -62.649261474609375,
|
|
"logps/ref_rejected": -75.4298324584961,
|
|
"logps/rejected": -293.6211242675781,
|
|
"loss": 1.2298,
|
|
"margin_dpo/margin_mean": 49.97599792480469,
|
|
"margin_dpo/margin_std": 112.70172119140625,
|
|
"step": 569
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.83944702148438,
|
|
"KL/mean": -196.29156494140625,
|
|
"KL/rejected_KL_mean": -244.74365234375,
|
|
"KL/std": 104.0134048461914,
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.00515684112906456,
|
|
"fcm_dpo/delta": -0.10228677093982697,
|
|
"fcm_dpo/margin": 96.90422058105469,
|
|
"fcm_dpo/q_t": 0.38731202483177185,
|
|
"grad_norm": 13.407984733581543,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 0.7353050708770752,
|
|
"logits/rejected": 0.6555818319320679,
|
|
"logps/chosen": -197.88124084472656,
|
|
"logps/ref_chosen": -50.04179382324219,
|
|
"logps/ref_rejected": -78.27146911621094,
|
|
"logps/rejected": -323.01513671875,
|
|
"loss": 1.0354,
|
|
"margin_dpo/margin_mean": 96.90422058105469,
|
|
"margin_dpo/margin_std": 119.29371643066406,
|
|
"step": 570
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -159.38134765625,
|
|
"KL/mean": -187.38658142089844,
|
|
"KL/rejected_KL_mean": -215.3917999267578,
|
|
"KL/std": 101.22817993164062,
|
|
"epoch": 0.8631897203325775,
|
|
"fcm_dpo/beta": 0.0051817260682582855,
|
|
"fcm_dpo/delta": 0.11107006669044495,
|
|
"fcm_dpo/margin": 56.01045227050781,
|
|
"fcm_dpo/q_t": 0.4332231879234314,
|
|
"grad_norm": 13.501057624816895,
|
|
"learning_rate": 2.840011871446962e-08,
|
|
"logits/chosen": 0.7619874477386475,
|
|
"logits/rejected": 0.7413293123245239,
|
|
"logps/chosen": -213.03817749023438,
|
|
"logps/ref_chosen": -53.65681457519531,
|
|
"logps/ref_rejected": -66.13298034667969,
|
|
"logps/rejected": -281.5247802734375,
|
|
"loss": 1.2066,
|
|
"margin_dpo/margin_mean": 56.01044845581055,
|
|
"margin_dpo/margin_std": 116.42642211914062,
|
|
"step": 571
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -163.20970153808594,
|
|
"KL/mean": -193.975341796875,
|
|
"KL/rejected_KL_mean": -224.740966796875,
|
|
"KL/std": 106.86662292480469,
|
|
"epoch": 0.8647014361300076,
|
|
"fcm_dpo/beta": 0.005229267291724682,
|
|
"fcm_dpo/delta": 0.07915746420621872,
|
|
"fcm_dpo/margin": 61.531272888183594,
|
|
"fcm_dpo/q_t": 0.42566853761672974,
|
|
"grad_norm": 13.843246459960938,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 0.7318317890167236,
|
|
"logits/rejected": 0.7722631692886353,
|
|
"logps/chosen": -238.02761840820312,
|
|
"logps/ref_chosen": -74.81792449951172,
|
|
"logps/ref_rejected": -65.88681030273438,
|
|
"logps/rejected": -290.6277770996094,
|
|
"loss": 1.168,
|
|
"margin_dpo/margin_mean": 61.531272888183594,
|
|
"margin_dpo/margin_std": 107.99958801269531,
|
|
"step": 572
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -175.80645751953125,
|
|
"KL/mean": -212.58859252929688,
|
|
"KL/rejected_KL_mean": -249.37071228027344,
|
|
"KL/std": 118.34883880615234,
|
|
"epoch": 0.8662131519274376,
|
|
"fcm_dpo/beta": 0.005235068500041962,
|
|
"fcm_dpo/delta": 0.015175528824329376,
|
|
"fcm_dpo/margin": 73.56425476074219,
|
|
"fcm_dpo/q_t": 0.416350781917572,
|
|
"grad_norm": 14.092700958251953,
|
|
"learning_rate": 2.718837261761528e-08,
|
|
"logits/chosen": 0.7830581665039062,
|
|
"logits/rejected": 0.7414644360542297,
|
|
"logps/chosen": -244.5321044921875,
|
|
"logps/ref_chosen": -68.72564697265625,
|
|
"logps/ref_rejected": -88.16201782226562,
|
|
"logps/rejected": -337.53271484375,
|
|
"loss": 1.1624,
|
|
"margin_dpo/margin_mean": 73.56425476074219,
|
|
"margin_dpo/margin_std": 141.61434936523438,
|
|
"step": 573
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -155.66375732421875,
|
|
"KL/mean": -202.1156463623047,
|
|
"KL/rejected_KL_mean": -248.56753540039062,
|
|
"KL/std": 109.053955078125,
|
|
"epoch": 0.8677248677248677,
|
|
"fcm_dpo/beta": 0.005218617618083954,
|
|
"fcm_dpo/delta": -0.08705511689186096,
|
|
"fcm_dpo/margin": 92.90379333496094,
|
|
"fcm_dpo/q_t": 0.3897053003311157,
|
|
"grad_norm": 10.808903694152832,
|
|
"learning_rate": 2.659183991914696e-08,
|
|
"logits/chosen": 0.8427752256393433,
|
|
"logits/rejected": 0.782508134841919,
|
|
"logps/chosen": -211.97714233398438,
|
|
"logps/ref_chosen": -56.31340026855469,
|
|
"logps/ref_rejected": -83.91553497314453,
|
|
"logps/rejected": -332.48309326171875,
|
|
"loss": 1.028,
|
|
"margin_dpo/margin_mean": 92.90379333496094,
|
|
"margin_dpo/margin_std": 103.26882934570312,
|
|
"step": 574
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.21359252929688,
|
|
"KL/mean": -194.38609313964844,
|
|
"KL/rejected_KL_mean": -227.55859375,
|
|
"KL/std": 109.57356262207031,
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.005164761561900377,
|
|
"fcm_dpo/delta": -0.050626128911972046,
|
|
"fcm_dpo/margin": 66.34501647949219,
|
|
"fcm_dpo/q_t": 0.42494362592697144,
|
|
"grad_norm": 13.409728050231934,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 0.7642154097557068,
|
|
"logits/rejected": 0.6914390325546265,
|
|
"logps/chosen": -225.79771423339844,
|
|
"logps/ref_chosen": -64.5841293334961,
|
|
"logps/ref_rejected": -93.47034454345703,
|
|
"logps/rejected": -321.0289306640625,
|
|
"loss": 1.1851,
|
|
"margin_dpo/margin_mean": 66.34501647949219,
|
|
"margin_dpo/margin_std": 130.5989532470703,
|
|
"step": 575
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -142.51329040527344,
|
|
"KL/mean": -191.24363708496094,
|
|
"KL/rejected_KL_mean": -239.97398376464844,
|
|
"KL/std": 110.81013488769531,
|
|
"epoch": 0.8707482993197279,
|
|
"fcm_dpo/beta": 0.005107846576720476,
|
|
"fcm_dpo/delta": -0.10117129236459732,
|
|
"fcm_dpo/margin": 97.46070861816406,
|
|
"fcm_dpo/q_t": 0.3875857889652252,
|
|
"grad_norm": 12.749735832214355,
|
|
"learning_rate": 2.5417538653170754e-08,
|
|
"logits/chosen": 0.7456009387969971,
|
|
"logits/rejected": 0.6536252498626709,
|
|
"logps/chosen": -195.7938232421875,
|
|
"logps/ref_chosen": -53.28052520751953,
|
|
"logps/ref_rejected": -84.2000503540039,
|
|
"logps/rejected": -324.1740417480469,
|
|
"loss": 1.0436,
|
|
"margin_dpo/margin_mean": 97.46070861816406,
|
|
"margin_dpo/margin_std": 122.39414978027344,
|
|
"step": 576
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -159.835693359375,
|
|
"KL/mean": -191.1136474609375,
|
|
"KL/rejected_KL_mean": -222.3916015625,
|
|
"KL/std": 107.13118743896484,
|
|
"epoch": 0.872260015117158,
|
|
"fcm_dpo/beta": 0.005140656605362892,
|
|
"fcm_dpo/delta": 0.07973390072584152,
|
|
"fcm_dpo/margin": 62.55592346191406,
|
|
"fcm_dpo/q_t": 0.4253769516944885,
|
|
"grad_norm": 12.675858497619629,
|
|
"learning_rate": 2.4839802933393607e-08,
|
|
"logits/chosen": 0.7293660640716553,
|
|
"logits/rejected": 0.7244733572006226,
|
|
"logps/chosen": -222.16036987304688,
|
|
"logps/ref_chosen": -62.32468795776367,
|
|
"logps/ref_rejected": -67.300537109375,
|
|
"logps/rejected": -289.692138671875,
|
|
"loss": 1.1782,
|
|
"margin_dpo/margin_mean": 62.55592346191406,
|
|
"margin_dpo/margin_std": 115.12950134277344,
|
|
"step": 577
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.3883819580078,
|
|
"KL/mean": -182.31329345703125,
|
|
"KL/rejected_KL_mean": -213.2382354736328,
|
|
"KL/std": 112.3051528930664,
|
|
"epoch": 0.873771730914588,
|
|
"fcm_dpo/beta": 0.005183388479053974,
|
|
"fcm_dpo/delta": 0.0807221531867981,
|
|
"fcm_dpo/margin": 61.8498649597168,
|
|
"fcm_dpo/q_t": 0.42893123626708984,
|
|
"grad_norm": 12.679496765136719,
|
|
"learning_rate": 2.4268365428344733e-08,
|
|
"logits/chosen": 0.8184738159179688,
|
|
"logits/rejected": 0.8057058453559875,
|
|
"logps/chosen": -208.0439453125,
|
|
"logps/ref_chosen": -56.65557861328125,
|
|
"logps/ref_rejected": -68.21835327148438,
|
|
"logps/rejected": -281.45660400390625,
|
|
"loss": 1.1928,
|
|
"margin_dpo/margin_mean": 61.84986877441406,
|
|
"margin_dpo/margin_std": 127.25001525878906,
|
|
"step": 578
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.0225067138672,
|
|
"KL/mean": -202.35211181640625,
|
|
"KL/rejected_KL_mean": -247.6816864013672,
|
|
"KL/std": 103.64462280273438,
|
|
"epoch": 0.8752834467120182,
|
|
"fcm_dpo/beta": 0.005164108704775572,
|
|
"fcm_dpo/delta": -0.06989452242851257,
|
|
"fcm_dpo/margin": 90.65919494628906,
|
|
"fcm_dpo/q_t": 0.391162633895874,
|
|
"grad_norm": 12.282883644104004,
|
|
"learning_rate": 2.3703242122359357e-08,
|
|
"logits/chosen": 0.7404800653457642,
|
|
"logits/rejected": 0.7219932079315186,
|
|
"logps/chosen": -213.8321533203125,
|
|
"logps/ref_chosen": -56.809661865234375,
|
|
"logps/ref_rejected": -68.09613037109375,
|
|
"logps/rejected": -315.77783203125,
|
|
"loss": 1.0424,
|
|
"margin_dpo/margin_mean": 90.6592025756836,
|
|
"margin_dpo/margin_std": 105.98770141601562,
|
|
"step": 579
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.99053955078125,
|
|
"KL/mean": -198.50558471679688,
|
|
"KL/rejected_KL_mean": -238.02059936523438,
|
|
"KL/std": 122.10806274414062,
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.0051682982593774796,
|
|
"fcm_dpo/delta": -0.008839219808578491,
|
|
"fcm_dpo/margin": 79.03005981445312,
|
|
"fcm_dpo/q_t": 0.4109044373035431,
|
|
"grad_norm": 13.690019607543945,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 0.719865083694458,
|
|
"logits/rejected": 0.6755998134613037,
|
|
"logps/chosen": -216.690673828125,
|
|
"logps/ref_chosen": -57.70011520385742,
|
|
"logps/ref_rejected": -77.90664672851562,
|
|
"logps/rejected": -315.92724609375,
|
|
"loss": 1.1314,
|
|
"margin_dpo/margin_mean": 79.03005981445312,
|
|
"margin_dpo/margin_std": 135.28524780273438,
|
|
"step": 580
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -173.80230712890625,
|
|
"KL/mean": -209.59906005859375,
|
|
"KL/rejected_KL_mean": -245.3957977294922,
|
|
"KL/std": 108.29855346679688,
|
|
"epoch": 0.8783068783068783,
|
|
"fcm_dpo/beta": 0.005157398991286755,
|
|
"fcm_dpo/delta": 0.0312652587890625,
|
|
"fcm_dpo/margin": 71.59349822998047,
|
|
"fcm_dpo/q_t": 0.4178553521633148,
|
|
"grad_norm": 12.915555953979492,
|
|
"learning_rate": 2.259200116137039e-08,
|
|
"logits/chosen": 0.7903176546096802,
|
|
"logits/rejected": 0.7315240502357483,
|
|
"logps/chosen": -233.13467407226562,
|
|
"logps/ref_chosen": -59.332359313964844,
|
|
"logps/ref_rejected": -83.64482116699219,
|
|
"logps/rejected": -329.0406188964844,
|
|
"loss": 1.1547,
|
|
"margin_dpo/margin_mean": 71.59349822998047,
|
|
"margin_dpo/margin_std": 129.0301055908203,
|
|
"step": 581
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.9951934814453,
|
|
"KL/mean": -193.22772216796875,
|
|
"KL/rejected_KL_mean": -227.46023559570312,
|
|
"KL/std": 102.33547973632812,
|
|
"epoch": 0.8798185941043084,
|
|
"fcm_dpo/beta": 0.005193857476115227,
|
|
"fcm_dpo/delta": 0.04511256143450737,
|
|
"fcm_dpo/margin": 68.46503448486328,
|
|
"fcm_dpo/q_t": 0.419838547706604,
|
|
"grad_norm": 10.930115699768066,
|
|
"learning_rate": 2.204591459016525e-08,
|
|
"logits/chosen": 0.764413595199585,
|
|
"logits/rejected": 0.793784499168396,
|
|
"logps/chosen": -223.15805053710938,
|
|
"logps/ref_chosen": -64.16285705566406,
|
|
"logps/ref_rejected": -58.632896423339844,
|
|
"logps/rejected": -286.0931396484375,
|
|
"loss": 1.15,
|
|
"margin_dpo/margin_mean": 68.46504211425781,
|
|
"margin_dpo/margin_std": 116.98333740234375,
|
|
"step": 582
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.42164611816406,
|
|
"KL/mean": -201.77542114257812,
|
|
"KL/rejected_KL_mean": -246.1291961669922,
|
|
"KL/std": 118.03565979003906,
|
|
"epoch": 0.8813303099017384,
|
|
"fcm_dpo/beta": 0.005167889408767223,
|
|
"fcm_dpo/delta": -0.059853777289390564,
|
|
"fcm_dpo/margin": 88.70755004882812,
|
|
"fcm_dpo/q_t": 0.397440105676651,
|
|
"grad_norm": 14.307217597961426,
|
|
"learning_rate": 2.1506204384751064e-08,
|
|
"logits/chosen": 0.8450102806091309,
|
|
"logits/rejected": 0.752760648727417,
|
|
"logps/chosen": -209.29403686523438,
|
|
"logps/ref_chosen": -51.87239456176758,
|
|
"logps/ref_rejected": -83.86331176757812,
|
|
"logps/rejected": -329.99249267578125,
|
|
"loss": 1.0903,
|
|
"margin_dpo/margin_mean": 88.7075424194336,
|
|
"margin_dpo/margin_std": 133.6029815673828,
|
|
"step": 583
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -143.89703369140625,
|
|
"KL/mean": -181.06478881835938,
|
|
"KL/rejected_KL_mean": -218.2325439453125,
|
|
"KL/std": 105.04563903808594,
|
|
"epoch": 0.8828420256991686,
|
|
"fcm_dpo/beta": 0.0051615494303405285,
|
|
"fcm_dpo/delta": 0.016559286043047905,
|
|
"fcm_dpo/margin": 74.33548736572266,
|
|
"fcm_dpo/q_t": 0.4158337712287903,
|
|
"grad_norm": 14.493661880493164,
|
|
"learning_rate": 2.09728856419826e-08,
|
|
"logits/chosen": 0.8573096990585327,
|
|
"logits/rejected": 0.7604676485061646,
|
|
"logps/chosen": -190.46844482421875,
|
|
"logps/ref_chosen": -46.571388244628906,
|
|
"logps/ref_rejected": -80.67969512939453,
|
|
"logps/rejected": -298.9122314453125,
|
|
"loss": 1.1513,
|
|
"margin_dpo/margin_mean": 74.33548736572266,
|
|
"margin_dpo/margin_std": 134.90829467773438,
|
|
"step": 584
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -169.13851928710938,
|
|
"KL/mean": -196.63491821289062,
|
|
"KL/rejected_KL_mean": -224.13131713867188,
|
|
"KL/std": 116.8939208984375,
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.0051758429035544395,
|
|
"fcm_dpo/delta": 0.015592229552567005,
|
|
"fcm_dpo/margin": 54.99279022216797,
|
|
"fcm_dpo/q_t": 0.43469154834747314,
|
|
"grad_norm": 12.688776016235352,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 0.729500412940979,
|
|
"logits/rejected": 0.6794741153717041,
|
|
"logps/chosen": -227.2630615234375,
|
|
"logps/ref_chosen": -58.124534606933594,
|
|
"logps/ref_rejected": -79.00538635253906,
|
|
"logps/rejected": -303.13671875,
|
|
"loss": 1.2076,
|
|
"margin_dpo/margin_mean": 54.992794036865234,
|
|
"margin_dpo/margin_std": 115.35411071777344,
|
|
"step": 585
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -155.092041015625,
|
|
"KL/mean": -190.19879150390625,
|
|
"KL/rejected_KL_mean": -225.3055419921875,
|
|
"KL/std": 96.13870239257812,
|
|
"epoch": 0.8858654572940288,
|
|
"fcm_dpo/beta": 0.0051888106390833855,
|
|
"fcm_dpo/delta": 0.036333490163087845,
|
|
"fcm_dpo/margin": 70.2135009765625,
|
|
"fcm_dpo/q_t": 0.4154217541217804,
|
|
"grad_norm": 12.625263214111328,
|
|
"learning_rate": 1.9925482037469187e-08,
|
|
"logits/chosen": 0.8116664886474609,
|
|
"logits/rejected": 0.7716276049613953,
|
|
"logps/chosen": -209.1936798095703,
|
|
"logps/ref_chosen": -54.10163879394531,
|
|
"logps/ref_rejected": -63.72113037109375,
|
|
"logps/rejected": -289.02667236328125,
|
|
"loss": 1.1055,
|
|
"margin_dpo/margin_mean": 70.2135009765625,
|
|
"margin_dpo/margin_std": 89.32926177978516,
|
|
"step": 586
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -164.7338104248047,
|
|
"KL/mean": -199.86599731445312,
|
|
"KL/rejected_KL_mean": -234.99815368652344,
|
|
"KL/std": 114.43092346191406,
|
|
"epoch": 0.8873771730914588,
|
|
"fcm_dpo/beta": 0.005209009163081646,
|
|
"fcm_dpo/delta": 0.03463202714920044,
|
|
"fcm_dpo/margin": 70.26435089111328,
|
|
"fcm_dpo/q_t": 0.41193169355392456,
|
|
"grad_norm": 14.828489303588867,
|
|
"learning_rate": 1.9411426473854687e-08,
|
|
"logits/chosen": 0.8051067590713501,
|
|
"logits/rejected": 0.803104043006897,
|
|
"logps/chosen": -228.1510009765625,
|
|
"logps/ref_chosen": -63.41719436645508,
|
|
"logps/ref_rejected": -63.47003936767578,
|
|
"logps/rejected": -298.46820068359375,
|
|
"loss": 1.1928,
|
|
"margin_dpo/margin_mean": 70.26435852050781,
|
|
"margin_dpo/margin_std": 153.72738647460938,
|
|
"step": 587
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -166.94920349121094,
|
|
"KL/mean": -206.59706115722656,
|
|
"KL/rejected_KL_mean": -246.2449188232422,
|
|
"KL/std": 113.91057586669922,
|
|
"epoch": 0.8888888888888888,
|
|
"fcm_dpo/beta": 0.005201970227062702,
|
|
"fcm_dpo/delta": -0.0129234679043293,
|
|
"fcm_dpo/margin": 79.29570007324219,
|
|
"fcm_dpo/q_t": 0.4085354804992676,
|
|
"grad_norm": 16.49046516418457,
|
|
"learning_rate": 1.890382096832699e-08,
|
|
"logits/chosen": 0.8124452829360962,
|
|
"logits/rejected": 0.773036003112793,
|
|
"logps/chosen": -229.15023803710938,
|
|
"logps/ref_chosen": -62.20103454589844,
|
|
"logps/ref_rejected": -82.10249328613281,
|
|
"logps/rejected": -328.347412109375,
|
|
"loss": 1.137,
|
|
"margin_dpo/margin_mean": 79.29570770263672,
|
|
"margin_dpo/margin_std": 139.8895263671875,
|
|
"step": 588
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -156.48114013671875,
|
|
"KL/mean": -196.67636108398438,
|
|
"KL/rejected_KL_mean": -236.87161254882812,
|
|
"KL/std": 108.80821990966797,
|
|
"epoch": 0.890400604686319,
|
|
"fcm_dpo/beta": 0.005208967719227076,
|
|
"fcm_dpo/delta": -0.01918977126479149,
|
|
"fcm_dpo/margin": 80.39045715332031,
|
|
"fcm_dpo/q_t": 0.40265777707099915,
|
|
"grad_norm": 11.467910766601562,
|
|
"learning_rate": 1.840267971970344e-08,
|
|
"logits/chosen": 0.7243702411651611,
|
|
"logits/rejected": 0.6968478560447693,
|
|
"logps/chosen": -213.19476318359375,
|
|
"logps/ref_chosen": -56.71361541748047,
|
|
"logps/ref_rejected": -76.7366943359375,
|
|
"logps/rejected": -313.6082763671875,
|
|
"loss": 1.0773,
|
|
"margin_dpo/margin_mean": 80.39045715332031,
|
|
"margin_dpo/margin_std": 101.80084228515625,
|
|
"step": 589
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -170.92239379882812,
|
|
"KL/mean": -208.17005920410156,
|
|
"KL/rejected_KL_mean": -245.417724609375,
|
|
"KL/std": 100.08826446533203,
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.005213206633925438,
|
|
"fcm_dpo/delta": 0.011741623282432556,
|
|
"fcm_dpo/margin": 74.49533081054688,
|
|
"fcm_dpo/q_t": 0.4106898307800293,
|
|
"grad_norm": 15.664216995239258,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.7344226837158203,
|
|
"logits/rejected": 0.6997847557067871,
|
|
"logps/chosen": -237.4362030029297,
|
|
"logps/ref_chosen": -66.5138168334961,
|
|
"logps/ref_rejected": -85.70820617675781,
|
|
"logps/rejected": -331.12591552734375,
|
|
"loss": 1.1136,
|
|
"margin_dpo/margin_mean": 74.49533081054688,
|
|
"margin_dpo/margin_std": 109.51068115234375,
|
|
"step": 590
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -144.41981506347656,
|
|
"KL/mean": -192.6387176513672,
|
|
"KL/rejected_KL_mean": -240.8576202392578,
|
|
"KL/std": 118.31570434570312,
|
|
"epoch": 0.8934240362811792,
|
|
"fcm_dpo/beta": 0.005147356539964676,
|
|
"fcm_dpo/delta": -0.09985911846160889,
|
|
"fcm_dpo/margin": 96.43781280517578,
|
|
"fcm_dpo/q_t": 0.3918688893318176,
|
|
"grad_norm": 13.487171173095703,
|
|
"learning_rate": 1.7419845883949098e-08,
|
|
"logits/chosen": 0.8497673869132996,
|
|
"logits/rejected": 0.7953537702560425,
|
|
"logps/chosen": -205.11700439453125,
|
|
"logps/ref_chosen": -60.697181701660156,
|
|
"logps/ref_rejected": -86.12278747558594,
|
|
"logps/rejected": -326.98040771484375,
|
|
"loss": 1.0782,
|
|
"margin_dpo/margin_mean": 96.43781280517578,
|
|
"margin_dpo/margin_std": 144.5989990234375,
|
|
"step": 591
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.78662109375,
|
|
"KL/mean": -194.60498046875,
|
|
"KL/rejected_KL_mean": -231.42337036132812,
|
|
"KL/std": 100.8841552734375,
|
|
"epoch": 0.8949357520786092,
|
|
"fcm_dpo/beta": 0.005148878321051598,
|
|
"fcm_dpo/delta": 0.0210278183221817,
|
|
"fcm_dpo/margin": 73.6367416381836,
|
|
"fcm_dpo/q_t": 0.4155803620815277,
|
|
"grad_norm": 13.96022891998291,
|
|
"learning_rate": 1.6938180788793556e-08,
|
|
"logits/chosen": 0.8256150484085083,
|
|
"logits/rejected": 0.7192568778991699,
|
|
"logps/chosen": -209.02394104003906,
|
|
"logps/ref_chosen": -51.237327575683594,
|
|
"logps/ref_rejected": -81.60242462158203,
|
|
"logps/rejected": -313.0257873535156,
|
|
"loss": 1.1246,
|
|
"margin_dpo/margin_mean": 73.63674926757812,
|
|
"margin_dpo/margin_std": 113.75395202636719,
|
|
"step": 592
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -131.33885192871094,
|
|
"KL/mean": -171.43441772460938,
|
|
"KL/rejected_KL_mean": -211.5299530029297,
|
|
"KL/std": 102.96261596679688,
|
|
"epoch": 0.8964474678760394,
|
|
"fcm_dpo/beta": 0.005155351012945175,
|
|
"fcm_dpo/delta": -0.013741271570324898,
|
|
"fcm_dpo/margin": 80.19113159179688,
|
|
"fcm_dpo/q_t": 0.404448926448822,
|
|
"grad_norm": 15.287273406982422,
|
|
"learning_rate": 1.6463034933723336e-08,
|
|
"logits/chosen": 0.7751158475875854,
|
|
"logits/rejected": 0.688532292842865,
|
|
"logps/chosen": -173.41885375976562,
|
|
"logps/ref_chosen": -42.08000183105469,
|
|
"logps/ref_rejected": -68.47499084472656,
|
|
"logps/rejected": -280.00494384765625,
|
|
"loss": 1.1012,
|
|
"margin_dpo/margin_mean": 80.19113159179688,
|
|
"margin_dpo/margin_std": 117.18716430664062,
|
|
"step": 593
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -166.78387451171875,
|
|
"KL/mean": -200.7757568359375,
|
|
"KL/rejected_KL_mean": -234.76763916015625,
|
|
"KL/std": 103.14260864257812,
|
|
"epoch": 0.8979591836734694,
|
|
"fcm_dpo/beta": 0.0051770322024822235,
|
|
"fcm_dpo/delta": 0.048906899988651276,
|
|
"fcm_dpo/margin": 67.98377990722656,
|
|
"fcm_dpo/q_t": 0.41802340745925903,
|
|
"grad_norm": 13.367891311645508,
|
|
"learning_rate": 1.5994421609589385e-08,
|
|
"logits/chosen": 0.6833805441856384,
|
|
"logits/rejected": 0.6724764704704285,
|
|
"logps/chosen": -230.44253540039062,
|
|
"logps/ref_chosen": -63.658668518066406,
|
|
"logps/ref_rejected": -70.35597229003906,
|
|
"logps/rejected": -305.1236267089844,
|
|
"loss": 1.1351,
|
|
"margin_dpo/margin_mean": 67.98377990722656,
|
|
"margin_dpo/margin_std": 104.35983276367188,
|
|
"step": 594
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -154.01788330078125,
|
|
"KL/mean": -198.89547729492188,
|
|
"KL/rejected_KL_mean": -243.77310180664062,
|
|
"KL/std": 110.08161926269531,
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.0051672267727553844,
|
|
"fcm_dpo/delta": -0.06530828773975372,
|
|
"fcm_dpo/margin": 89.75520324707031,
|
|
"fcm_dpo/q_t": 0.39671388268470764,
|
|
"grad_norm": 10.75599479675293,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 0.7898309230804443,
|
|
"logits/rejected": 0.7085649967193604,
|
|
"logps/chosen": -210.2366485595703,
|
|
"logps/ref_chosen": -56.21875762939453,
|
|
"logps/ref_rejected": -83.95773315429688,
|
|
"logps/rejected": -327.7308349609375,
|
|
"loss": 1.0865,
|
|
"margin_dpo/margin_mean": 89.75520324707031,
|
|
"margin_dpo/margin_std": 135.01220703125,
|
|
"step": 595
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -178.6473846435547,
|
|
"KL/mean": -196.4628143310547,
|
|
"KL/rejected_KL_mean": -214.27825927734375,
|
|
"KL/std": 100.60887145996094,
|
|
"epoch": 0.9009826152683296,
|
|
"fcm_dpo/beta": 0.005187712609767914,
|
|
"fcm_dpo/delta": 0.07349441200494766,
|
|
"fcm_dpo/margin": 35.6308708190918,
|
|
"fcm_dpo/q_t": 0.45823240280151367,
|
|
"grad_norm": 14.705337524414062,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": 0.6860886812210083,
|
|
"logits/rejected": 0.7082855701446533,
|
|
"logps/chosen": -247.12826538085938,
|
|
"logps/ref_chosen": -68.48088073730469,
|
|
"logps/ref_rejected": -61.732967376708984,
|
|
"logps/rejected": -276.01123046875,
|
|
"loss": 1.2819,
|
|
"margin_dpo/margin_mean": 35.63086700439453,
|
|
"margin_dpo/margin_std": 104.82177734375,
|
|
"step": 596
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -135.70375061035156,
|
|
"KL/mean": -170.39703369140625,
|
|
"KL/rejected_KL_mean": -205.09033203125,
|
|
"KL/std": 89.3361587524414,
|
|
"epoch": 0.9024943310657596,
|
|
"fcm_dpo/beta": 0.005208521615713835,
|
|
"fcm_dpo/delta": 0.03922061622142792,
|
|
"fcm_dpo/margin": 69.38656616210938,
|
|
"fcm_dpo/q_t": 0.41688859462738037,
|
|
"grad_norm": 10.742737770080566,
|
|
"learning_rate": 1.4627906988186111e-08,
|
|
"logits/chosen": 0.7661982774734497,
|
|
"logits/rejected": 0.7519518136978149,
|
|
"logps/chosen": -184.56126403808594,
|
|
"logps/ref_chosen": -48.85750961303711,
|
|
"logps/ref_rejected": -55.068084716796875,
|
|
"logps/rejected": -260.1584167480469,
|
|
"loss": 1.1263,
|
|
"margin_dpo/margin_mean": 69.3865737915039,
|
|
"margin_dpo/margin_std": 103.54592895507812,
|
|
"step": 597
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -179.37509155273438,
|
|
"KL/mean": -202.16049194335938,
|
|
"KL/rejected_KL_mean": -224.9458770751953,
|
|
"KL/std": 106.90162658691406,
|
|
"epoch": 0.9040060468631897,
|
|
"fcm_dpo/beta": 0.005223256070166826,
|
|
"fcm_dpo/delta": 0.05803574621677399,
|
|
"fcm_dpo/margin": 45.570777893066406,
|
|
"fcm_dpo/q_t": 0.4463616609573364,
|
|
"grad_norm": 12.302933692932129,
|
|
"learning_rate": 1.4185553036259095e-08,
|
|
"logits/chosen": 0.7624181509017944,
|
|
"logits/rejected": 0.6901006698608398,
|
|
"logps/chosen": -238.26223754882812,
|
|
"logps/ref_chosen": -58.88715362548828,
|
|
"logps/ref_rejected": -81.43145751953125,
|
|
"logps/rejected": -306.3773193359375,
|
|
"loss": 1.2461,
|
|
"margin_dpo/margin_mean": 45.570777893066406,
|
|
"margin_dpo/margin_std": 113.48300170898438,
|
|
"step": 598
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -179.419921875,
|
|
"KL/mean": -206.19313049316406,
|
|
"KL/rejected_KL_mean": -232.96633911132812,
|
|
"KL/std": 102.81201934814453,
|
|
"epoch": 0.9055177626606198,
|
|
"fcm_dpo/beta": 0.005292929708957672,
|
|
"fcm_dpo/delta": 0.11798475682735443,
|
|
"fcm_dpo/margin": 53.54644012451172,
|
|
"fcm_dpo/q_t": 0.43629956245422363,
|
|
"grad_norm": 14.819068908691406,
|
|
"learning_rate": 1.3749795321332885e-08,
|
|
"logits/chosen": 0.814558744430542,
|
|
"logits/rejected": 0.7772165536880493,
|
|
"logps/chosen": -237.027099609375,
|
|
"logps/ref_chosen": -57.60719299316406,
|
|
"logps/ref_rejected": -71.80469512939453,
|
|
"logps/rejected": -304.77105712890625,
|
|
"loss": 1.2149,
|
|
"margin_dpo/margin_mean": 53.54644012451172,
|
|
"margin_dpo/margin_std": 117.68329620361328,
|
|
"step": 599
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -163.1828155517578,
|
|
"KL/mean": -195.33013916015625,
|
|
"KL/rejected_KL_mean": -227.47747802734375,
|
|
"KL/std": 112.71634674072266,
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.005290608387440443,
|
|
"fcm_dpo/delta": -0.03771144151687622,
|
|
"fcm_dpo/margin": 64.29467010498047,
|
|
"fcm_dpo/q_t": 0.4249119162559509,
|
|
"grad_norm": 16.649999618530273,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 0.7844213247299194,
|
|
"logits/rejected": 0.7326794862747192,
|
|
"logps/chosen": -221.6251220703125,
|
|
"logps/ref_chosen": -58.44231414794922,
|
|
"logps/ref_rejected": -83.64639282226562,
|
|
"logps/rejected": -311.1238708496094,
|
|
"loss": 1.172,
|
|
"margin_dpo/margin_mean": 64.29466247558594,
|
|
"margin_dpo/margin_std": 120.23384094238281,
|
|
"step": 600
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.95947265625,
|
|
"KL/mean": -190.47390747070312,
|
|
"KL/rejected_KL_mean": -233.98834228515625,
|
|
"KL/std": 118.11160278320312,
|
|
"epoch": 0.90854119425548,
|
|
"fcm_dpo/beta": 0.00524750305339694,
|
|
"fcm_dpo/delta": -0.05827419087290764,
|
|
"fcm_dpo/margin": 87.02886962890625,
|
|
"fcm_dpo/q_t": 0.3984156548976898,
|
|
"grad_norm": 11.094379425048828,
|
|
"learning_rate": 1.2898117173950868e-08,
|
|
"logits/chosen": 0.7712780833244324,
|
|
"logits/rejected": 0.6982487440109253,
|
|
"logps/chosen": -202.55380249023438,
|
|
"logps/ref_chosen": -55.59432601928711,
|
|
"logps/ref_rejected": -83.68630981445312,
|
|
"logps/rejected": -317.6746520996094,
|
|
"loss": 1.0952,
|
|
"margin_dpo/margin_mean": 87.02887725830078,
|
|
"margin_dpo/margin_std": 134.14871215820312,
|
|
"step": 601
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -136.39028930664062,
|
|
"KL/mean": -176.0529327392578,
|
|
"KL/rejected_KL_mean": -215.71560668945312,
|
|
"KL/std": 101.70611572265625,
|
|
"epoch": 0.91005291005291,
|
|
"fcm_dpo/beta": 0.005252411589026451,
|
|
"fcm_dpo/delta": -0.01709642820060253,
|
|
"fcm_dpo/margin": 79.32528686523438,
|
|
"fcm_dpo/q_t": 0.4042466878890991,
|
|
"grad_norm": 12.754347801208496,
|
|
"learning_rate": 1.2482220564763667e-08,
|
|
"logits/chosen": 0.7693321704864502,
|
|
"logits/rejected": 0.7426538467407227,
|
|
"logps/chosen": -192.73947143554688,
|
|
"logps/ref_chosen": -56.349185943603516,
|
|
"logps/ref_rejected": -71.9959716796875,
|
|
"logps/rejected": -287.7115478515625,
|
|
"loss": 1.0882,
|
|
"margin_dpo/margin_mean": 79.32528686523438,
|
|
"margin_dpo/margin_std": 108.13163757324219,
|
|
"step": 602
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -150.73326110839844,
|
|
"KL/mean": -190.11868286132812,
|
|
"KL/rejected_KL_mean": -229.50411987304688,
|
|
"KL/std": 101.74800109863281,
|
|
"epoch": 0.9115646258503401,
|
|
"fcm_dpo/beta": 0.005231490824371576,
|
|
"fcm_dpo/delta": -0.012386243790388107,
|
|
"fcm_dpo/margin": 78.7708740234375,
|
|
"fcm_dpo/q_t": 0.4063763916492462,
|
|
"grad_norm": 12.99497127532959,
|
|
"learning_rate": 1.2072967838448051e-08,
|
|
"logits/chosen": 0.7219693660736084,
|
|
"logits/rejected": 0.6771643161773682,
|
|
"logps/chosen": -203.90164184570312,
|
|
"logps/ref_chosen": -53.16838836669922,
|
|
"logps/ref_rejected": -73.8604736328125,
|
|
"logps/rejected": -303.3646240234375,
|
|
"loss": 1.1051,
|
|
"margin_dpo/margin_mean": 78.7708740234375,
|
|
"margin_dpo/margin_std": 118.08514404296875,
|
|
"step": 603
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -147.13201904296875,
|
|
"KL/mean": -180.1353759765625,
|
|
"KL/rejected_KL_mean": -213.1387176513672,
|
|
"KL/std": 96.78003692626953,
|
|
"epoch": 0.9130763416477702,
|
|
"fcm_dpo/beta": 0.005260279402136803,
|
|
"fcm_dpo/delta": 0.053637612611055374,
|
|
"fcm_dpo/margin": 66.00669860839844,
|
|
"fcm_dpo/q_t": 0.42211106419563293,
|
|
"grad_norm": 15.003375053405762,
|
|
"learning_rate": 1.1670370442682459e-08,
|
|
"logits/chosen": 0.7370727062225342,
|
|
"logits/rejected": 0.7455264329910278,
|
|
"logps/chosen": -219.78143310546875,
|
|
"logps/ref_chosen": -72.64942169189453,
|
|
"logps/ref_rejected": -69.8792724609375,
|
|
"logps/rejected": -283.01800537109375,
|
|
"loss": 1.1722,
|
|
"margin_dpo/margin_mean": 66.00669860839844,
|
|
"margin_dpo/margin_std": 126.84770202636719,
|
|
"step": 604
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.53436279296875,
|
|
"KL/mean": -202.14730834960938,
|
|
"KL/rejected_KL_mean": -238.76028442382812,
|
|
"KL/std": 101.69642639160156,
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.005261688493192196,
|
|
"fcm_dpo/delta": 0.014948038384318352,
|
|
"fcm_dpo/margin": 73.22592163085938,
|
|
"fcm_dpo/q_t": 0.4120534062385559,
|
|
"grad_norm": 14.244430541992188,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 0.8310182690620422,
|
|
"logits/rejected": 0.7815611362457275,
|
|
"logps/chosen": -227.14720153808594,
|
|
"logps/ref_chosen": -61.61284637451172,
|
|
"logps/ref_rejected": -79.34398651123047,
|
|
"logps/rejected": -318.104248046875,
|
|
"loss": 1.1339,
|
|
"margin_dpo/margin_mean": 73.22592163085938,
|
|
"margin_dpo/margin_std": 120.64686584472656,
|
|
"step": 605
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -145.78570556640625,
|
|
"KL/mean": -190.61325073242188,
|
|
"KL/rejected_KL_mean": -235.4407958984375,
|
|
"KL/std": 106.72883605957031,
|
|
"epoch": 0.9160997732426304,
|
|
"fcm_dpo/beta": 0.005251706577837467,
|
|
"fcm_dpo/delta": -0.07255351543426514,
|
|
"fcm_dpo/margin": 89.65508270263672,
|
|
"fcm_dpo/q_t": 0.394401878118515,
|
|
"grad_norm": 14.536449432373047,
|
|
"learning_rate": 1.0885186502381016e-08,
|
|
"logits/chosen": 0.7300942540168762,
|
|
"logits/rejected": 0.6719257235527039,
|
|
"logps/chosen": -200.24993896484375,
|
|
"logps/ref_chosen": -54.46424102783203,
|
|
"logps/ref_rejected": -79.62708282470703,
|
|
"logps/rejected": -315.06787109375,
|
|
"loss": 1.0643,
|
|
"margin_dpo/margin_mean": 89.65508270263672,
|
|
"margin_dpo/margin_std": 121.5821533203125,
|
|
"step": 606
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.5489501953125,
|
|
"KL/mean": -199.4913787841797,
|
|
"KL/rejected_KL_mean": -237.4337921142578,
|
|
"KL/std": 103.01091766357422,
|
|
"epoch": 0.9176114890400605,
|
|
"fcm_dpo/beta": 0.005182595923542976,
|
|
"fcm_dpo/delta": -0.09567366540431976,
|
|
"fcm_dpo/margin": 75.88485717773438,
|
|
"fcm_dpo/q_t": 0.4100458323955536,
|
|
"grad_norm": 11.888671875,
|
|
"learning_rate": 1.0502621921127774e-08,
|
|
"logits/chosen": 0.7242947816848755,
|
|
"logits/rejected": 0.7043846249580383,
|
|
"logps/chosen": -224.40982055664062,
|
|
"logps/ref_chosen": -62.86086654663086,
|
|
"logps/ref_rejected": -72.5501937866211,
|
|
"logps/rejected": -309.9840087890625,
|
|
"loss": 1.1222,
|
|
"margin_dpo/margin_mean": 75.88485717773438,
|
|
"margin_dpo/margin_std": 114.86539459228516,
|
|
"step": 607
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.748779296875,
|
|
"KL/mean": -206.59561157226562,
|
|
"KL/rejected_KL_mean": -247.44241333007812,
|
|
"KL/std": 113.71495056152344,
|
|
"epoch": 0.9191232048374905,
|
|
"fcm_dpo/beta": 0.005177418701350689,
|
|
"fcm_dpo/delta": -0.023463426157832146,
|
|
"fcm_dpo/margin": 81.69361877441406,
|
|
"fcm_dpo/q_t": 0.4044950306415558,
|
|
"grad_norm": 12.438267707824707,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 0.7202458381652832,
|
|
"logits/rejected": 0.64822918176651,
|
|
"logps/chosen": -228.92950439453125,
|
|
"logps/ref_chosen": -63.18071746826172,
|
|
"logps/ref_rejected": -99.15888214111328,
|
|
"logps/rejected": -346.6012878417969,
|
|
"loss": 1.0914,
|
|
"margin_dpo/margin_mean": 81.69361877441406,
|
|
"margin_dpo/margin_std": 116.19631958007812,
|
|
"step": 608
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.61346435546875,
|
|
"KL/mean": -189.2342987060547,
|
|
"KL/rejected_KL_mean": -231.85513305664062,
|
|
"KL/std": 100.7978515625,
|
|
"epoch": 0.9206349206349206,
|
|
"fcm_dpo/beta": 0.005143946968019009,
|
|
"fcm_dpo/delta": -0.03961944580078125,
|
|
"fcm_dpo/margin": 85.24164581298828,
|
|
"fcm_dpo/q_t": 0.39760255813598633,
|
|
"grad_norm": 12.297566413879395,
|
|
"learning_rate": 9.757601041885694e-09,
|
|
"logits/chosen": 0.8377814888954163,
|
|
"logits/rejected": 0.8020462989807129,
|
|
"logps/chosen": -195.2366943359375,
|
|
"logps/ref_chosen": -48.62322235107422,
|
|
"logps/ref_rejected": -68.28271484375,
|
|
"logps/rejected": -300.1378479003906,
|
|
"loss": 1.0615,
|
|
"margin_dpo/margin_mean": 85.24164581298828,
|
|
"margin_dpo/margin_std": 99.11607360839844,
|
|
"step": 609
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.833251953125,
|
|
"KL/mean": -201.87298583984375,
|
|
"KL/rejected_KL_mean": -241.91273498535156,
|
|
"KL/std": 112.21017456054688,
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.005136369727551937,
|
|
"fcm_dpo/delta": -0.011704063042998314,
|
|
"fcm_dpo/margin": 80.0794906616211,
|
|
"fcm_dpo/q_t": 0.40747424960136414,
|
|
"grad_norm": 13.656766891479492,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 0.734131932258606,
|
|
"logits/rejected": 0.7291549444198608,
|
|
"logps/chosen": -234.49838256835938,
|
|
"logps/ref_chosen": -72.66513061523438,
|
|
"logps/ref_rejected": -87.15310668945312,
|
|
"logps/rejected": -329.06585693359375,
|
|
"loss": 1.1206,
|
|
"margin_dpo/margin_mean": 80.07948303222656,
|
|
"margin_dpo/margin_std": 129.4326629638672,
|
|
"step": 610
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.4734649658203,
|
|
"KL/mean": -179.7707977294922,
|
|
"KL/rejected_KL_mean": -213.068115234375,
|
|
"KL/std": 98.2099609375,
|
|
"epoch": 0.9236583522297808,
|
|
"fcm_dpo/beta": 0.005159948021173477,
|
|
"fcm_dpo/delta": 0.05737149715423584,
|
|
"fcm_dpo/margin": 66.59465026855469,
|
|
"fcm_dpo/q_t": 0.4199606776237488,
|
|
"grad_norm": 13.197135925292969,
|
|
"learning_rate": 9.03946036001449e-09,
|
|
"logits/chosen": 0.8454437255859375,
|
|
"logits/rejected": 0.7986049652099609,
|
|
"logps/chosen": -194.78204345703125,
|
|
"logps/ref_chosen": -48.30857849121094,
|
|
"logps/ref_rejected": -70.6141128540039,
|
|
"logps/rejected": -283.6822509765625,
|
|
"loss": 1.1361,
|
|
"margin_dpo/margin_mean": 66.59465026855469,
|
|
"margin_dpo/margin_std": 100.08576202392578,
|
|
"step": 611
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -162.59796142578125,
|
|
"KL/mean": -211.33056640625,
|
|
"KL/rejected_KL_mean": -260.06317138671875,
|
|
"KL/std": 105.462158203125,
|
|
"epoch": 0.9251700680272109,
|
|
"fcm_dpo/beta": 0.005135006736963987,
|
|
"fcm_dpo/delta": -0.10304394364356995,
|
|
"fcm_dpo/margin": 97.46522521972656,
|
|
"fcm_dpo/q_t": 0.38530468940734863,
|
|
"grad_norm": 11.957515716552734,
|
|
"learning_rate": 8.690495320571839e-09,
|
|
"logits/chosen": 0.6936167478561401,
|
|
"logits/rejected": 0.6285920143127441,
|
|
"logps/chosen": -223.82952880859375,
|
|
"logps/ref_chosen": -61.23155975341797,
|
|
"logps/ref_rejected": -94.37979888916016,
|
|
"logps/rejected": -354.4429626464844,
|
|
"loss": 1.0283,
|
|
"margin_dpo/margin_mean": 97.4652099609375,
|
|
"margin_dpo/margin_std": 115.6092529296875,
|
|
"step": 612
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -141.11468505859375,
|
|
"KL/mean": -183.64779663085938,
|
|
"KL/rejected_KL_mean": -226.18089294433594,
|
|
"KL/std": 113.89877319335938,
|
|
"epoch": 0.926681783824641,
|
|
"fcm_dpo/beta": 0.005114906467497349,
|
|
"fcm_dpo/delta": -0.03593066334724426,
|
|
"fcm_dpo/margin": 85.06620025634766,
|
|
"fcm_dpo/q_t": 0.399356484413147,
|
|
"grad_norm": 11.872891426086426,
|
|
"learning_rate": 8.348280226706722e-09,
|
|
"logits/chosen": 0.7217748165130615,
|
|
"logits/rejected": 0.7181686162948608,
|
|
"logps/chosen": -195.09780883789062,
|
|
"logps/ref_chosen": -53.98310852050781,
|
|
"logps/ref_rejected": -58.32208251953125,
|
|
"logps/rejected": -284.50299072265625,
|
|
"loss": 1.0792,
|
|
"margin_dpo/margin_mean": 85.06620025634766,
|
|
"margin_dpo/margin_std": 114.50570678710938,
|
|
"step": 613
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.42440795898438,
|
|
"KL/mean": -200.96511840820312,
|
|
"KL/rejected_KL_mean": -240.5058135986328,
|
|
"KL/std": 97.52909851074219,
|
|
"epoch": 0.9281934996220711,
|
|
"fcm_dpo/beta": 0.0051159486174583435,
|
|
"fcm_dpo/delta": -0.0050355978310108185,
|
|
"fcm_dpo/margin": 79.0813980102539,
|
|
"fcm_dpo/q_t": 0.40616530179977417,
|
|
"grad_norm": 14.680092811584473,
|
|
"learning_rate": 8.012824650910937e-09,
|
|
"logits/chosen": 0.7758277654647827,
|
|
"logits/rejected": 0.768898606300354,
|
|
"logps/chosen": -221.66744995117188,
|
|
"logps/ref_chosen": -60.24303436279297,
|
|
"logps/ref_rejected": -72.26258850097656,
|
|
"logps/rejected": -312.7684020996094,
|
|
"loss": 1.1005,
|
|
"margin_dpo/margin_mean": 79.08139038085938,
|
|
"margin_dpo/margin_std": 110.44924926757812,
|
|
"step": 614
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -169.56674194335938,
|
|
"KL/mean": -214.61654663085938,
|
|
"KL/rejected_KL_mean": -259.6663513183594,
|
|
"KL/std": 127.01201629638672,
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.0050588808953762054,
|
|
"fcm_dpo/delta": -0.057871684432029724,
|
|
"fcm_dpo/margin": 90.0996322631836,
|
|
"fcm_dpo/q_t": 0.3983679413795471,
|
|
"grad_norm": 12.894699096679688,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 0.7372691035270691,
|
|
"logits/rejected": 0.6819196343421936,
|
|
"logps/chosen": -241.66140747070312,
|
|
"logps/ref_chosen": -72.09467315673828,
|
|
"logps/ref_rejected": -104.02980041503906,
|
|
"logps/rejected": -363.6961669921875,
|
|
"loss": 1.0955,
|
|
"margin_dpo/margin_mean": 90.0996322631836,
|
|
"margin_dpo/margin_std": 137.28213500976562,
|
|
"step": 615
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.02145385742188,
|
|
"KL/mean": -195.76901245117188,
|
|
"KL/rejected_KL_mean": -230.51657104492188,
|
|
"KL/std": 117.80242156982422,
|
|
"epoch": 0.9312169312169312,
|
|
"fcm_dpo/beta": 0.00508509948849678,
|
|
"fcm_dpo/delta": 0.04746280610561371,
|
|
"fcm_dpo/margin": 69.49510192871094,
|
|
"fcm_dpo/q_t": 0.4191455543041229,
|
|
"grad_norm": 11.971718788146973,
|
|
"learning_rate": 7.36222939784098e-09,
|
|
"logits/chosen": 0.8236125707626343,
|
|
"logits/rejected": 0.7546209096908569,
|
|
"logps/chosen": -219.55218505859375,
|
|
"logps/ref_chosen": -58.530723571777344,
|
|
"logps/ref_rejected": -75.48025512695312,
|
|
"logps/rejected": -305.996826171875,
|
|
"loss": 1.1406,
|
|
"margin_dpo/margin_mean": 69.49510192871094,
|
|
"margin_dpo/margin_std": 111.99686431884766,
|
|
"step": 616
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -169.080322265625,
|
|
"KL/mean": -201.710693359375,
|
|
"KL/rejected_KL_mean": -234.341064453125,
|
|
"KL/std": 106.47898864746094,
|
|
"epoch": 0.9327286470143613,
|
|
"fcm_dpo/beta": 0.005073944106698036,
|
|
"fcm_dpo/delta": -0.04003974422812462,
|
|
"fcm_dpo/margin": 65.26075744628906,
|
|
"fcm_dpo/q_t": 0.4251210391521454,
|
|
"grad_norm": 15.623849868774414,
|
|
"learning_rate": 7.047107919114586e-09,
|
|
"logits/chosen": 0.7990385293960571,
|
|
"logits/rejected": 0.7504894733428955,
|
|
"logps/chosen": -226.68899536132812,
|
|
"logps/ref_chosen": -57.608673095703125,
|
|
"logps/ref_rejected": -81.22109985351562,
|
|
"logps/rejected": -315.5621643066406,
|
|
"loss": 1.1665,
|
|
"margin_dpo/margin_mean": 65.26075744628906,
|
|
"margin_dpo/margin_std": 113.83659362792969,
|
|
"step": 617
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -155.96665954589844,
|
|
"KL/mean": -195.5559539794922,
|
|
"KL/rejected_KL_mean": -235.14523315429688,
|
|
"KL/std": 117.75038146972656,
|
|
"epoch": 0.9342403628117913,
|
|
"fcm_dpo/beta": 0.005066881887614727,
|
|
"fcm_dpo/delta": -0.001282472163438797,
|
|
"fcm_dpo/margin": 79.1785659790039,
|
|
"fcm_dpo/q_t": 0.41073286533355713,
|
|
"grad_norm": 14.753360748291016,
|
|
"learning_rate": 6.738782355044048e-09,
|
|
"logits/chosen": 0.7793570756912231,
|
|
"logits/rejected": 0.6889488697052002,
|
|
"logps/chosen": -212.66259765625,
|
|
"logps/ref_chosen": -56.69594192504883,
|
|
"logps/ref_rejected": -85.92362976074219,
|
|
"logps/rejected": -321.06884765625,
|
|
"loss": 1.1064,
|
|
"margin_dpo/margin_mean": 79.17855834960938,
|
|
"margin_dpo/margin_std": 118.40252685546875,
|
|
"step": 618
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -149.95797729492188,
|
|
"KL/mean": -191.42184448242188,
|
|
"KL/rejected_KL_mean": -232.88571166992188,
|
|
"KL/std": 110.40597534179688,
|
|
"epoch": 0.9357520786092215,
|
|
"fcm_dpo/beta": 0.005061000119894743,
|
|
"fcm_dpo/delta": -0.020172085613012314,
|
|
"fcm_dpo/margin": 82.92772674560547,
|
|
"fcm_dpo/q_t": 0.40420350432395935,
|
|
"grad_norm": 12.547645568847656,
|
|
"learning_rate": 6.437261330158206e-09,
|
|
"logits/chosen": 0.8313688039779663,
|
|
"logits/rejected": 0.76116943359375,
|
|
"logps/chosen": -204.01638793945312,
|
|
"logps/ref_chosen": -54.05841827392578,
|
|
"logps/ref_rejected": -83.55493927001953,
|
|
"logps/rejected": -316.440673828125,
|
|
"loss": 1.0984,
|
|
"margin_dpo/margin_mean": 82.927734375,
|
|
"margin_dpo/margin_std": 122.13333129882812,
|
|
"step": 619
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.41476440429688,
|
|
"KL/mean": -185.45521545410156,
|
|
"KL/rejected_KL_mean": -217.49563598632812,
|
|
"KL/std": 100.77690887451172,
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.0050468724220991135,
|
|
"fcm_dpo/delta": -0.06418817490339279,
|
|
"fcm_dpo/margin": 64.08085632324219,
|
|
"fcm_dpo/q_t": 0.4251076579093933,
|
|
"grad_norm": 13.02280330657959,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 0.7924202680587769,
|
|
"logits/rejected": 0.7943447232246399,
|
|
"logps/chosen": -216.78448486328125,
|
|
"logps/ref_chosen": -63.36971664428711,
|
|
"logps/ref_rejected": -65.68269348144531,
|
|
"logps/rejected": -283.1783142089844,
|
|
"loss": 1.1655,
|
|
"margin_dpo/margin_mean": 64.08085632324219,
|
|
"margin_dpo/margin_std": 104.76075744628906,
|
|
"step": 620
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.3845977783203,
|
|
"KL/mean": -189.0948944091797,
|
|
"KL/rejected_KL_mean": -226.80520629882812,
|
|
"KL/std": 110.36802673339844,
|
|
"epoch": 0.9387755102040817,
|
|
"fcm_dpo/beta": 0.005047002341598272,
|
|
"fcm_dpo/delta": 0.01956525817513466,
|
|
"fcm_dpo/margin": 75.42060089111328,
|
|
"fcm_dpo/q_t": 0.4157317876815796,
|
|
"grad_norm": 12.515904426574707,
|
|
"learning_rate": 5.854666444131934e-09,
|
|
"logits/chosen": 0.7756680250167847,
|
|
"logits/rejected": 0.6757792234420776,
|
|
"logps/chosen": -203.705810546875,
|
|
"logps/ref_chosen": -52.321224212646484,
|
|
"logps/ref_rejected": -88.09001159667969,
|
|
"logps/rejected": -314.89520263671875,
|
|
"loss": 1.1423,
|
|
"margin_dpo/margin_mean": 75.42060089111328,
|
|
"margin_dpo/margin_std": 129.843994140625,
|
|
"step": 621
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -167.65750122070312,
|
|
"KL/mean": -204.69784545898438,
|
|
"KL/rejected_KL_mean": -241.73818969726562,
|
|
"KL/std": 114.98136901855469,
|
|
"epoch": 0.9402872260015117,
|
|
"fcm_dpo/beta": 0.0050589400343596935,
|
|
"fcm_dpo/delta": 0.025546085089445114,
|
|
"fcm_dpo/margin": 74.08068084716797,
|
|
"fcm_dpo/q_t": 0.41506049036979675,
|
|
"grad_norm": 14.55111026763916,
|
|
"learning_rate": 5.573608879422875e-09,
|
|
"logits/chosen": 0.7685604095458984,
|
|
"logits/rejected": 0.7235514521598816,
|
|
"logps/chosen": -227.52296447753906,
|
|
"logps/ref_chosen": -59.86545944213867,
|
|
"logps/ref_rejected": -81.86668395996094,
|
|
"logps/rejected": -323.6048583984375,
|
|
"loss": 1.1324,
|
|
"margin_dpo/margin_mean": 74.08068084716797,
|
|
"margin_dpo/margin_std": 118.68621826171875,
|
|
"step": 622
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -177.27322387695312,
|
|
"KL/mean": -216.47161865234375,
|
|
"KL/rejected_KL_mean": -255.66998291015625,
|
|
"KL/std": 120.90714263916016,
|
|
"epoch": 0.9417989417989417,
|
|
"fcm_dpo/beta": 0.0050424449145793915,
|
|
"fcm_dpo/delta": 0.0045386552810668945,
|
|
"fcm_dpo/margin": 78.39675903320312,
|
|
"fcm_dpo/q_t": 0.4104178547859192,
|
|
"grad_norm": 13.705345153808594,
|
|
"learning_rate": 5.299388446305342e-09,
|
|
"logits/chosen": 0.7614437341690063,
|
|
"logits/rejected": 0.7155400514602661,
|
|
"logps/chosen": -244.64169311523438,
|
|
"logps/ref_chosen": -67.36846160888672,
|
|
"logps/ref_rejected": -82.02733612060547,
|
|
"logps/rejected": -337.69732666015625,
|
|
"loss": 1.1231,
|
|
"margin_dpo/margin_mean": 78.39675903320312,
|
|
"margin_dpo/margin_std": 124.67327117919922,
|
|
"step": 623
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -149.1544647216797,
|
|
"KL/mean": -190.79296875,
|
|
"KL/rejected_KL_mean": -232.4315185546875,
|
|
"KL/std": 112.71261596679688,
|
|
"epoch": 0.9433106575963719,
|
|
"fcm_dpo/beta": 0.005042126402258873,
|
|
"fcm_dpo/delta": -0.0203988179564476,
|
|
"fcm_dpo/margin": 83.27703857421875,
|
|
"fcm_dpo/q_t": 0.4059855341911316,
|
|
"grad_norm": 13.627564430236816,
|
|
"learning_rate": 5.03201281531429e-09,
|
|
"logits/chosen": 0.7556251287460327,
|
|
"logits/rejected": 0.6709643602371216,
|
|
"logps/chosen": -200.18101501464844,
|
|
"logps/ref_chosen": -51.02655029296875,
|
|
"logps/ref_rejected": -76.49203491210938,
|
|
"logps/rejected": -308.92352294921875,
|
|
"loss": 1.1,
|
|
"margin_dpo/margin_mean": 83.27703857421875,
|
|
"margin_dpo/margin_std": 123.53499603271484,
|
|
"step": 624
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.11605834960938,
|
|
"KL/mean": -184.39889526367188,
|
|
"KL/rejected_KL_mean": -216.68174743652344,
|
|
"KL/std": 107.09259033203125,
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.005073658190667629,
|
|
"fcm_dpo/delta": 0.0736159235239029,
|
|
"fcm_dpo/margin": 64.565673828125,
|
|
"fcm_dpo/q_t": 0.42789918184280396,
|
|
"grad_norm": 12.193577766418457,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 0.8765029907226562,
|
|
"logits/rejected": 0.789482593536377,
|
|
"logps/chosen": -206.32366943359375,
|
|
"logps/ref_chosen": -54.20761489868164,
|
|
"logps/ref_rejected": -84.93669128417969,
|
|
"logps/rejected": -301.6184387207031,
|
|
"loss": 1.1845,
|
|
"margin_dpo/margin_mean": 64.565673828125,
|
|
"margin_dpo/margin_std": 129.017333984375,
|
|
"step": 625
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -141.93064880371094,
|
|
"KL/mean": -186.54937744140625,
|
|
"KL/rejected_KL_mean": -231.1681365966797,
|
|
"KL/std": 114.87901306152344,
|
|
"epoch": 0.9463340891912321,
|
|
"fcm_dpo/beta": 0.005059602204710245,
|
|
"fcm_dpo/delta": -0.05274330824613571,
|
|
"fcm_dpo/margin": 89.23748779296875,
|
|
"fcm_dpo/q_t": 0.4016817808151245,
|
|
"grad_norm": 13.155999183654785,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 0.8622666597366333,
|
|
"logits/rejected": 0.7380850315093994,
|
|
"logps/chosen": -186.99266052246094,
|
|
"logps/ref_chosen": -45.06201934814453,
|
|
"logps/ref_rejected": -89.66368103027344,
|
|
"logps/rejected": -320.8318176269531,
|
|
"loss": 1.0934,
|
|
"margin_dpo/margin_mean": 89.23748779296875,
|
|
"margin_dpo/margin_std": 139.2135009765625,
|
|
"step": 626
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -163.630859375,
|
|
"KL/mean": -211.3690948486328,
|
|
"KL/rejected_KL_mean": -259.1073303222656,
|
|
"KL/std": 119.14567565917969,
|
|
"epoch": 0.9478458049886621,
|
|
"fcm_dpo/beta": 0.005019536241889,
|
|
"fcm_dpo/delta": -0.08134086430072784,
|
|
"fcm_dpo/margin": 95.4764633178711,
|
|
"fcm_dpo/q_t": 0.38964006304740906,
|
|
"grad_norm": 13.641153335571289,
|
|
"learning_rate": 4.271028567242818e-09,
|
|
"logits/chosen": 0.7114182710647583,
|
|
"logits/rejected": 0.6020195484161377,
|
|
"logps/chosen": -222.42190551757812,
|
|
"logps/ref_chosen": -58.791053771972656,
|
|
"logps/ref_rejected": -94.90802001953125,
|
|
"logps/rejected": -354.0153503417969,
|
|
"loss": 1.0454,
|
|
"margin_dpo/margin_mean": 95.47645568847656,
|
|
"margin_dpo/margin_std": 117.75228118896484,
|
|
"step": 627
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.30911254882812,
|
|
"KL/mean": -195.6680908203125,
|
|
"KL/rejected_KL_mean": -240.02703857421875,
|
|
"KL/std": 112.40299987792969,
|
|
"epoch": 0.9493575207860923,
|
|
"fcm_dpo/beta": 0.005014676600694656,
|
|
"fcm_dpo/delta": -0.046218838542699814,
|
|
"fcm_dpo/margin": 88.71791076660156,
|
|
"fcm_dpo/q_t": 0.39759713411331177,
|
|
"grad_norm": 13.16943073272705,
|
|
"learning_rate": 4.0311050177251895e-09,
|
|
"logits/chosen": 0.7571415901184082,
|
|
"logits/rejected": 0.7210949659347534,
|
|
"logps/chosen": -204.11270141601562,
|
|
"logps/ref_chosen": -52.80357360839844,
|
|
"logps/ref_rejected": -76.49468994140625,
|
|
"logps/rejected": -316.521728515625,
|
|
"loss": 1.099,
|
|
"margin_dpo/margin_mean": 88.71791076660156,
|
|
"margin_dpo/margin_std": 127.91849517822266,
|
|
"step": 628
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.62564086914062,
|
|
"KL/mean": -194.30014038085938,
|
|
"KL/rejected_KL_mean": -226.97463989257812,
|
|
"KL/std": 103.2464599609375,
|
|
"epoch": 0.9508692365835223,
|
|
"fcm_dpo/beta": 0.005023307632654905,
|
|
"fcm_dpo/delta": 0.07284069806337357,
|
|
"fcm_dpo/margin": 65.3489990234375,
|
|
"fcm_dpo/q_t": 0.42592015862464905,
|
|
"grad_norm": 13.31646728515625,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": 0.7634239196777344,
|
|
"logits/rejected": 0.7633325457572937,
|
|
"logps/chosen": -232.3431396484375,
|
|
"logps/ref_chosen": -70.71749877929688,
|
|
"logps/ref_rejected": -78.96273803710938,
|
|
"logps/rejected": -305.9373779296875,
|
|
"loss": 1.1568,
|
|
"margin_dpo/margin_mean": 65.3489990234375,
|
|
"margin_dpo/margin_std": 110.16796875,
|
|
"step": 629
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -150.17520141601562,
|
|
"KL/mean": -195.0472412109375,
|
|
"KL/rejected_KL_mean": -239.91928100585938,
|
|
"KL/std": 111.29176330566406,
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.005015358328819275,
|
|
"fcm_dpo/delta": -0.05125061422586441,
|
|
"fcm_dpo/margin": 89.74407958984375,
|
|
"fcm_dpo/q_t": 0.3971395492553711,
|
|
"grad_norm": 10.784340858459473,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 0.772534191608429,
|
|
"logits/rejected": 0.7334715127944946,
|
|
"logps/chosen": -206.37661743164062,
|
|
"logps/ref_chosen": -56.201412200927734,
|
|
"logps/ref_rejected": -74.69807434082031,
|
|
"logps/rejected": -314.61737060546875,
|
|
"loss": 1.0704,
|
|
"margin_dpo/margin_mean": 89.74407958984375,
|
|
"margin_dpo/margin_std": 119.89186096191406,
|
|
"step": 630
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -143.826171875,
|
|
"KL/mean": -196.12371826171875,
|
|
"KL/rejected_KL_mean": -248.4212646484375,
|
|
"KL/std": 119.42754364013672,
|
|
"epoch": 0.9538926681783825,
|
|
"fcm_dpo/beta": 0.0049383072182536125,
|
|
"fcm_dpo/delta": -0.12071359902620316,
|
|
"fcm_dpo/margin": 104.59510803222656,
|
|
"fcm_dpo/q_t": 0.38493913412094116,
|
|
"grad_norm": 13.567523002624512,
|
|
"learning_rate": 3.352641923861144e-09,
|
|
"logits/chosen": 0.8590891361236572,
|
|
"logits/rejected": 0.7551760077476501,
|
|
"logps/chosen": -202.64678955078125,
|
|
"logps/ref_chosen": -58.82059860229492,
|
|
"logps/ref_rejected": -96.51437377929688,
|
|
"logps/rejected": -344.9356689453125,
|
|
"loss": 1.0359,
|
|
"margin_dpo/margin_mean": 104.59510040283203,
|
|
"margin_dpo/margin_std": 131.27734375,
|
|
"step": 631
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -146.25799560546875,
|
|
"KL/mean": -193.65936279296875,
|
|
"KL/rejected_KL_mean": -241.06069946289062,
|
|
"KL/std": 104.68757629394531,
|
|
"epoch": 0.9554043839758125,
|
|
"fcm_dpo/beta": 0.00492379255592823,
|
|
"fcm_dpo/delta": -0.06836389005184174,
|
|
"fcm_dpo/margin": 94.80271911621094,
|
|
"fcm_dpo/q_t": 0.39218297600746155,
|
|
"grad_norm": 12.88455581665039,
|
|
"learning_rate": 3.140277830901428e-09,
|
|
"logits/chosen": 0.78548264503479,
|
|
"logits/rejected": 0.7705127000808716,
|
|
"logps/chosen": -205.04405212402344,
|
|
"logps/ref_chosen": -58.786048889160156,
|
|
"logps/ref_rejected": -67.21923828125,
|
|
"logps/rejected": -308.27996826171875,
|
|
"loss": 1.0471,
|
|
"margin_dpo/margin_mean": 94.80271911621094,
|
|
"margin_dpo/margin_std": 113.64961242675781,
|
|
"step": 632
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -141.02102661132812,
|
|
"KL/mean": -178.62750244140625,
|
|
"KL/rejected_KL_mean": -216.2340087890625,
|
|
"KL/std": 100.91416931152344,
|
|
"epoch": 0.9569160997732427,
|
|
"fcm_dpo/beta": 0.004929271526634693,
|
|
"fcm_dpo/delta": 0.029602598398923874,
|
|
"fcm_dpo/margin": 75.21295166015625,
|
|
"fcm_dpo/q_t": 0.4161266088485718,
|
|
"grad_norm": 13.078737258911133,
|
|
"learning_rate": 2.9348189350335007e-09,
|
|
"logits/chosen": 0.7822316288948059,
|
|
"logits/rejected": 0.731358528137207,
|
|
"logps/chosen": -193.15122985839844,
|
|
"logps/ref_chosen": -52.13019561767578,
|
|
"logps/ref_rejected": -67.23016357421875,
|
|
"logps/rejected": -283.46417236328125,
|
|
"loss": 1.1259,
|
|
"margin_dpo/margin_mean": 75.21295166015625,
|
|
"margin_dpo/margin_std": 114.31226348876953,
|
|
"step": 633
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -170.3883056640625,
|
|
"KL/mean": -184.89678955078125,
|
|
"KL/rejected_KL_mean": -199.4052734375,
|
|
"KL/std": 105.41249084472656,
|
|
"epoch": 0.9584278155706727,
|
|
"fcm_dpo/beta": 0.004923511762171984,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 29.016963958740234,
|
|
"fcm_dpo/q_t": 0.4682360887527466,
|
|
"grad_norm": 15.32580280303955,
|
|
"learning_rate": 2.736270983384276e-09,
|
|
"logits/chosen": 0.8242474794387817,
|
|
"logits/rejected": 0.8397818803787231,
|
|
"logps/chosen": -231.36810302734375,
|
|
"logps/ref_chosen": -60.97979736328125,
|
|
"logps/ref_rejected": -58.50825119018555,
|
|
"logps/rejected": -257.91351318359375,
|
|
"loss": 1.3327,
|
|
"margin_dpo/margin_mean": 29.016963958740234,
|
|
"margin_dpo/margin_std": 120.33830261230469,
|
|
"step": 634
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -186.2093048095703,
|
|
"KL/mean": -217.1748046875,
|
|
"KL/rejected_KL_mean": -248.14031982421875,
|
|
"KL/std": 103.67413330078125,
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.004965448752045631,
|
|
"fcm_dpo/delta": 0.0937037542462349,
|
|
"fcm_dpo/margin": 61.931007385253906,
|
|
"fcm_dpo/q_t": 0.42995530366897583,
|
|
"grad_norm": 11.797026634216309,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 0.6636896133422852,
|
|
"logits/rejected": 0.615709662437439,
|
|
"logps/chosen": -252.18231201171875,
|
|
"logps/ref_chosen": -65.9730224609375,
|
|
"logps/ref_rejected": -85.61317443847656,
|
|
"logps/rejected": -333.75347900390625,
|
|
"loss": 1.1945,
|
|
"margin_dpo/margin_mean": 61.931007385253906,
|
|
"margin_dpo/margin_std": 124.16761779785156,
|
|
"step": 635
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -144.98782348632812,
|
|
"KL/mean": -186.74229431152344,
|
|
"KL/rejected_KL_mean": -228.49676513671875,
|
|
"KL/std": 105.39955139160156,
|
|
"epoch": 0.9614512471655329,
|
|
"fcm_dpo/beta": 0.0049590300768613815,
|
|
"fcm_dpo/delta": -0.014486796222627163,
|
|
"fcm_dpo/margin": 83.50894165039062,
|
|
"fcm_dpo/q_t": 0.4041042625904083,
|
|
"grad_norm": 10.30007266998291,
|
|
"learning_rate": 2.359929934524829e-09,
|
|
"logits/chosen": 0.7785642147064209,
|
|
"logits/rejected": 0.6857019066810608,
|
|
"logps/chosen": -194.12799072265625,
|
|
"logps/ref_chosen": -49.140167236328125,
|
|
"logps/ref_rejected": -81.26971435546875,
|
|
"logps/rejected": -309.7664794921875,
|
|
"loss": 1.0874,
|
|
"margin_dpo/margin_mean": 83.50894165039062,
|
|
"margin_dpo/margin_std": 111.7535171508789,
|
|
"step": 636
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -195.08912658691406,
|
|
"KL/mean": -226.00186157226562,
|
|
"KL/rejected_KL_mean": -256.9145812988281,
|
|
"KL/std": 106.48178100585938,
|
|
"epoch": 0.9629629629629629,
|
|
"fcm_dpo/beta": 0.005004129372537136,
|
|
"fcm_dpo/delta": 0.09182324260473251,
|
|
"fcm_dpo/margin": 61.82545471191406,
|
|
"fcm_dpo/q_t": 0.43030744791030884,
|
|
"grad_norm": 13.099990844726562,
|
|
"learning_rate": 2.1821473643827137e-09,
|
|
"logits/chosen": 0.7206562757492065,
|
|
"logits/rejected": 0.6680300235748291,
|
|
"logps/chosen": -268.78570556640625,
|
|
"logps/ref_chosen": -73.69658660888672,
|
|
"logps/ref_rejected": -83.01487731933594,
|
|
"logps/rejected": -339.929443359375,
|
|
"loss": 1.2015,
|
|
"margin_dpo/margin_mean": 61.82545471191406,
|
|
"margin_dpo/margin_std": 131.51806640625,
|
|
"step": 637
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -168.87652587890625,
|
|
"KL/mean": -206.572265625,
|
|
"KL/rejected_KL_mean": -244.26800537109375,
|
|
"KL/std": 105.70046997070312,
|
|
"epoch": 0.9644746787603931,
|
|
"fcm_dpo/beta": 0.005014636553823948,
|
|
"fcm_dpo/delta": 0.022363822907209396,
|
|
"fcm_dpo/margin": 75.39149475097656,
|
|
"fcm_dpo/q_t": 0.4142439365386963,
|
|
"grad_norm": 12.821139335632324,
|
|
"learning_rate": 2.0112967923011646e-09,
|
|
"logits/chosen": 0.8064244985580444,
|
|
"logits/rejected": 0.7650469541549683,
|
|
"logps/chosen": -231.65809631347656,
|
|
"logps/ref_chosen": -62.78158187866211,
|
|
"logps/ref_rejected": -85.40478515625,
|
|
"logps/rejected": -329.67279052734375,
|
|
"loss": 1.123,
|
|
"margin_dpo/margin_mean": 75.39149475097656,
|
|
"margin_dpo/margin_std": 116.29246520996094,
|
|
"step": 638
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -150.9545135498047,
|
|
"KL/mean": -195.05892944335938,
|
|
"KL/rejected_KL_mean": -239.16334533691406,
|
|
"KL/std": 113.81045532226562,
|
|
"epoch": 0.9659863945578231,
|
|
"fcm_dpo/beta": 0.005007440224289894,
|
|
"fcm_dpo/delta": -0.04263288155198097,
|
|
"fcm_dpo/margin": 88.20883178710938,
|
|
"fcm_dpo/q_t": 0.4009074568748474,
|
|
"grad_norm": 15.49028491973877,
|
|
"learning_rate": 1.847382997337943e-09,
|
|
"logits/chosen": 0.7628029584884644,
|
|
"logits/rejected": 0.6811566352844238,
|
|
"logps/chosen": -204.72109985351562,
|
|
"logps/ref_chosen": -53.76658630371094,
|
|
"logps/ref_rejected": -72.30009460449219,
|
|
"logps/rejected": -311.46343994140625,
|
|
"loss": 1.08,
|
|
"margin_dpo/margin_mean": 88.20882415771484,
|
|
"margin_dpo/margin_std": 123.04683685302734,
|
|
"step": 639
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.29843139648438,
|
|
"KL/mean": -195.86241149902344,
|
|
"KL/rejected_KL_mean": -234.42637634277344,
|
|
"KL/std": 107.74363708496094,
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.0049981530755758286,
|
|
"fcm_dpo/delta": 0.014752366580069065,
|
|
"fcm_dpo/margin": 77.12796020507812,
|
|
"fcm_dpo/q_t": 0.41179871559143066,
|
|
"grad_norm": 12.726821899414062,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 0.8481752872467041,
|
|
"logits/rejected": 0.7886199951171875,
|
|
"logps/chosen": -208.71621704101562,
|
|
"logps/ref_chosen": -51.41777801513672,
|
|
"logps/ref_rejected": -77.27879333496094,
|
|
"logps/rejected": -311.7051696777344,
|
|
"loss": 1.1192,
|
|
"margin_dpo/margin_mean": 77.12796020507812,
|
|
"margin_dpo/margin_std": 117.64265441894531,
|
|
"step": 640
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -165.22206115722656,
|
|
"KL/mean": -200.39663696289062,
|
|
"KL/rejected_KL_mean": -235.57122802734375,
|
|
"KL/std": 99.89443969726562,
|
|
"epoch": 0.9690098261526833,
|
|
"fcm_dpo/beta": 0.0050179436802864075,
|
|
"fcm_dpo/delta": 0.04783842712640762,
|
|
"fcm_dpo/margin": 70.34918212890625,
|
|
"fcm_dpo/q_t": 0.41749656200408936,
|
|
"grad_norm": 13.244073867797852,
|
|
"learning_rate": 1.5403838846864692e-09,
|
|
"logits/chosen": 0.7351692914962769,
|
|
"logits/rejected": 0.7242539525032043,
|
|
"logps/chosen": -236.2767333984375,
|
|
"logps/ref_chosen": -71.0546646118164,
|
|
"logps/ref_rejected": -82.2440185546875,
|
|
"logps/rejected": -317.81524658203125,
|
|
"loss": 1.1184,
|
|
"margin_dpo/margin_mean": 70.34918212890625,
|
|
"margin_dpo/margin_std": 95.07601165771484,
|
|
"step": 641
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -172.56155395507812,
|
|
"KL/mean": -199.61013793945312,
|
|
"KL/rejected_KL_mean": -226.65869140625,
|
|
"KL/std": 98.68553924560547,
|
|
"epoch": 0.9705215419501134,
|
|
"fcm_dpo/beta": 0.0050382474437355995,
|
|
"fcm_dpo/delta": 0.01700037717819214,
|
|
"fcm_dpo/margin": 54.09712600708008,
|
|
"fcm_dpo/q_t": 0.43819987773895264,
|
|
"grad_norm": 13.722558975219727,
|
|
"learning_rate": 1.3973071544233218e-09,
|
|
"logits/chosen": 0.7409595251083374,
|
|
"logits/rejected": 0.7626011371612549,
|
|
"logps/chosen": -241.4908447265625,
|
|
"logps/ref_chosen": -68.92927551269531,
|
|
"logps/ref_rejected": -70.85682678222656,
|
|
"logps/rejected": -297.5155029296875,
|
|
"loss": 1.2189,
|
|
"margin_dpo/margin_mean": 54.097129821777344,
|
|
"margin_dpo/margin_std": 118.69155883789062,
|
|
"step": 642
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.39663696289062,
|
|
"KL/mean": -198.70083618164062,
|
|
"KL/rejected_KL_mean": -240.00503540039062,
|
|
"KL/std": 107.27330017089844,
|
|
"epoch": 0.9720332577475435,
|
|
"fcm_dpo/beta": 0.005031741224229336,
|
|
"fcm_dpo/delta": -0.01599844917654991,
|
|
"fcm_dpo/margin": 82.6083984375,
|
|
"fcm_dpo/q_t": 0.4059451222419739,
|
|
"grad_norm": 17.722583770751953,
|
|
"learning_rate": 1.261184375888541e-09,
|
|
"logits/chosen": 0.740868091583252,
|
|
"logits/rejected": 0.6648463010787964,
|
|
"logps/chosen": -222.70567321777344,
|
|
"logps/ref_chosen": -65.30903625488281,
|
|
"logps/ref_rejected": -83.61613464355469,
|
|
"logps/rejected": -323.62115478515625,
|
|
"loss": 1.1057,
|
|
"margin_dpo/margin_mean": 82.60839080810547,
|
|
"margin_dpo/margin_std": 124.01104736328125,
|
|
"step": 643
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -148.37164306640625,
|
|
"KL/mean": -175.71395874023438,
|
|
"KL/rejected_KL_mean": -203.05625915527344,
|
|
"KL/std": 94.66099548339844,
|
|
"epoch": 0.9735449735449735,
|
|
"fcm_dpo/beta": 0.005031779408454895,
|
|
"fcm_dpo/delta": 0.006305241491645575,
|
|
"fcm_dpo/margin": 54.684608459472656,
|
|
"fcm_dpo/q_t": 0.4357111155986786,
|
|
"grad_norm": 10.841629981994629,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 0.8413490056991577,
|
|
"logits/rejected": 0.8117701411247253,
|
|
"logps/chosen": -199.37425231933594,
|
|
"logps/ref_chosen": -51.002601623535156,
|
|
"logps/ref_rejected": -64.46372985839844,
|
|
"logps/rejected": -267.5199890136719,
|
|
"loss": 1.2123,
|
|
"margin_dpo/margin_mean": 54.68461608886719,
|
|
"margin_dpo/margin_std": 114.31196594238281,
|
|
"step": 644
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -162.00721740722656,
|
|
"KL/mean": -196.057373046875,
|
|
"KL/rejected_KL_mean": -230.1074981689453,
|
|
"KL/std": 103.26606750488281,
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.005054825451225042,
|
|
"fcm_dpo/delta": 0.056740857660770416,
|
|
"fcm_dpo/margin": 68.10031127929688,
|
|
"fcm_dpo/q_t": 0.41977882385253906,
|
|
"grad_norm": 14.985542297363281,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 0.7427242994308472,
|
|
"logits/rejected": 0.7293637990951538,
|
|
"logps/chosen": -222.97061157226562,
|
|
"logps/ref_chosen": -60.963409423828125,
|
|
"logps/ref_rejected": -69.73353576660156,
|
|
"logps/rejected": -299.8410339355469,
|
|
"loss": 1.1382,
|
|
"margin_dpo/margin_mean": 68.10031127929688,
|
|
"margin_dpo/margin_std": 105.08343505859375,
|
|
"step": 645
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -178.91403198242188,
|
|
"KL/mean": -211.22756958007812,
|
|
"KL/rejected_KL_mean": -243.54112243652344,
|
|
"KL/std": 121.20231628417969,
|
|
"epoch": 0.9765684051398337,
|
|
"fcm_dpo/beta": 0.005090632475912571,
|
|
"fcm_dpo/delta": 0.07217521965503693,
|
|
"fcm_dpo/margin": 64.62708282470703,
|
|
"fcm_dpo/q_t": 0.42554330825805664,
|
|
"grad_norm": 11.895480155944824,
|
|
"learning_rate": 8.945768539031783e-10,
|
|
"logits/chosen": 0.7835663557052612,
|
|
"logits/rejected": 0.7295738458633423,
|
|
"logps/chosen": -241.2041015625,
|
|
"logps/ref_chosen": -62.290069580078125,
|
|
"logps/ref_rejected": -85.54812622070312,
|
|
"logps/rejected": -329.0892333984375,
|
|
"loss": 1.1734,
|
|
"margin_dpo/margin_mean": 64.62708282470703,
|
|
"margin_dpo/margin_std": 121.17414855957031,
|
|
"step": 646
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -175.54522705078125,
|
|
"KL/mean": -226.7615509033203,
|
|
"KL/rejected_KL_mean": -277.97784423828125,
|
|
"KL/std": 120.04508209228516,
|
|
"epoch": 0.9780801209372638,
|
|
"fcm_dpo/beta": 0.005056916270405054,
|
|
"fcm_dpo/delta": -0.12105900794267654,
|
|
"fcm_dpo/margin": 102.4326171875,
|
|
"fcm_dpo/q_t": 0.38193100690841675,
|
|
"grad_norm": 14.29843521118164,
|
|
"learning_rate": 7.863060120144316e-10,
|
|
"logits/chosen": 0.7624701261520386,
|
|
"logits/rejected": 0.676109790802002,
|
|
"logps/chosen": -243.0611114501953,
|
|
"logps/ref_chosen": -67.515869140625,
|
|
"logps/ref_rejected": -101.50871276855469,
|
|
"logps/rejected": -379.486572265625,
|
|
"loss": 1.016,
|
|
"margin_dpo/margin_mean": 102.4326171875,
|
|
"margin_dpo/margin_std": 117.96634674072266,
|
|
"step": 647
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -175.37025451660156,
|
|
"KL/mean": -211.1006622314453,
|
|
"KL/rejected_KL_mean": -246.8310546875,
|
|
"KL/std": 103.89393615722656,
|
|
"epoch": 0.9795918367346939,
|
|
"fcm_dpo/beta": 0.005042518023401499,
|
|
"fcm_dpo/delta": 0.04030587896704674,
|
|
"fcm_dpo/margin": 71.46080780029297,
|
|
"fcm_dpo/q_t": 0.41920343041419983,
|
|
"grad_norm": 12.912647247314453,
|
|
"learning_rate": 6.850062128694045e-10,
|
|
"logits/chosen": 0.7104675769805908,
|
|
"logits/rejected": 0.6561470031738281,
|
|
"logps/chosen": -239.9661865234375,
|
|
"logps/ref_chosen": -64.59593963623047,
|
|
"logps/ref_rejected": -83.384033203125,
|
|
"logps/rejected": -330.215087890625,
|
|
"loss": 1.1702,
|
|
"margin_dpo/margin_mean": 71.4608154296875,
|
|
"margin_dpo/margin_std": 133.63043212890625,
|
|
"step": 648
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -157.33462524414062,
|
|
"KL/mean": -194.87408447265625,
|
|
"KL/rejected_KL_mean": -232.41355895996094,
|
|
"KL/std": 102.01509094238281,
|
|
"epoch": 0.981103552532124,
|
|
"fcm_dpo/beta": 0.0050583453848958015,
|
|
"fcm_dpo/delta": 0.02056538499891758,
|
|
"fcm_dpo/margin": 75.07894897460938,
|
|
"fcm_dpo/q_t": 0.4127805829048157,
|
|
"grad_norm": 16.18140983581543,
|
|
"learning_rate": 5.906802900412788e-10,
|
|
"logits/chosen": 0.7951866388320923,
|
|
"logits/rejected": 0.7381685376167297,
|
|
"logps/chosen": -206.64427185058594,
|
|
"logps/ref_chosen": -49.30964660644531,
|
|
"logps/ref_rejected": -73.73710632324219,
|
|
"logps/rejected": -306.1506652832031,
|
|
"loss": 1.1309,
|
|
"margin_dpo/margin_mean": 75.07894897460938,
|
|
"margin_dpo/margin_std": 120.8050537109375,
|
|
"step": 649
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -153.83245849609375,
|
|
"KL/mean": -192.3116455078125,
|
|
"KL/rejected_KL_mean": -230.7908172607422,
|
|
"KL/std": 104.44856262207031,
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.005066189914941788,
|
|
"fcm_dpo/delta": 0.010274587199091911,
|
|
"fcm_dpo/margin": 76.95834350585938,
|
|
"fcm_dpo/q_t": 0.4116915762424469,
|
|
"grad_norm": 12.397159576416016,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 0.8175115585327148,
|
|
"logits/rejected": 0.7612703442573547,
|
|
"logps/chosen": -208.89572143554688,
|
|
"logps/ref_chosen": -55.06325912475586,
|
|
"logps/ref_rejected": -77.39610290527344,
|
|
"logps/rejected": -308.1869201660156,
|
|
"loss": 1.1303,
|
|
"margin_dpo/margin_mean": 76.9583511352539,
|
|
"margin_dpo/margin_std": 126.22981262207031,
|
|
"step": 650
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.21575927734375,
|
|
"KL/mean": -185.7320556640625,
|
|
"KL/rejected_KL_mean": -219.2483673095703,
|
|
"KL/std": 103.46519470214844,
|
|
"epoch": 0.9841269841269841,
|
|
"fcm_dpo/beta": 0.0051023634150624275,
|
|
"fcm_dpo/delta": 0.058836936950683594,
|
|
"fcm_dpo/margin": 67.03260803222656,
|
|
"fcm_dpo/q_t": 0.42291390895843506,
|
|
"grad_norm": 12.655989646911621,
|
|
"learning_rate": 4.2296043218295606e-10,
|
|
"logits/chosen": 0.8118767142295837,
|
|
"logits/rejected": 0.7455794811248779,
|
|
"logps/chosen": -206.28091430664062,
|
|
"logps/ref_chosen": -54.065162658691406,
|
|
"logps/ref_rejected": -77.79080200195312,
|
|
"logps/rejected": -297.0391845703125,
|
|
"loss": 1.1455,
|
|
"margin_dpo/margin_mean": 67.03260803222656,
|
|
"margin_dpo/margin_std": 108.17694091796875,
|
|
"step": 651
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -180.15231323242188,
|
|
"KL/mean": -211.26609802246094,
|
|
"KL/rejected_KL_mean": -242.3798828125,
|
|
"KL/std": 114.86628723144531,
|
|
"epoch": 0.9856386999244142,
|
|
"fcm_dpo/beta": 0.0050994837656617165,
|
|
"fcm_dpo/delta": -0.00733643863350153,
|
|
"fcm_dpo/margin": 62.22758865356445,
|
|
"fcm_dpo/q_t": 0.43050581216812134,
|
|
"grad_norm": 14.892316818237305,
|
|
"learning_rate": 3.4957118863768176e-10,
|
|
"logits/chosen": 0.7670651078224182,
|
|
"logits/rejected": 0.7252798080444336,
|
|
"logps/chosen": -243.7926025390625,
|
|
"logps/ref_chosen": -63.64030456542969,
|
|
"logps/ref_rejected": -78.86882019042969,
|
|
"logps/rejected": -321.24871826171875,
|
|
"loss": 1.2071,
|
|
"margin_dpo/margin_mean": 62.22758483886719,
|
|
"margin_dpo/margin_std": 137.31927490234375,
|
|
"step": 652
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -158.90919494628906,
|
|
"KL/mean": -196.4755859375,
|
|
"KL/rejected_KL_mean": -234.0419921875,
|
|
"KL/std": 101.4590835571289,
|
|
"epoch": 0.9871504157218443,
|
|
"fcm_dpo/beta": 0.005113469902426004,
|
|
"fcm_dpo/delta": 0.015999607741832733,
|
|
"fcm_dpo/margin": 75.13280487060547,
|
|
"fcm_dpo/q_t": 0.4124922752380371,
|
|
"grad_norm": 14.617870330810547,
|
|
"learning_rate": 2.831652042480093e-10,
|
|
"logits/chosen": 0.7422964572906494,
|
|
"logits/rejected": 0.702532947063446,
|
|
"logps/chosen": -220.5775604248047,
|
|
"logps/ref_chosen": -61.668373107910156,
|
|
"logps/ref_rejected": -73.83012390136719,
|
|
"logps/rejected": -307.87213134765625,
|
|
"loss": 1.1238,
|
|
"margin_dpo/margin_mean": 75.13280487060547,
|
|
"margin_dpo/margin_std": 116.88356018066406,
|
|
"step": 653
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -167.12106323242188,
|
|
"KL/mean": -202.30426025390625,
|
|
"KL/rejected_KL_mean": -237.4874725341797,
|
|
"KL/std": 120.61741638183594,
|
|
"epoch": 0.9886621315192744,
|
|
"fcm_dpo/beta": 0.0050742849707603455,
|
|
"fcm_dpo/delta": -0.06553708761930466,
|
|
"fcm_dpo/margin": 70.36641693115234,
|
|
"fcm_dpo/q_t": 0.41872087121009827,
|
|
"grad_norm": 13.03736686706543,
|
|
"learning_rate": 2.2374433653205016e-10,
|
|
"logits/chosen": 0.7251932621002197,
|
|
"logits/rejected": 0.6387395858764648,
|
|
"logps/chosen": -224.6893310546875,
|
|
"logps/ref_chosen": -57.568267822265625,
|
|
"logps/ref_rejected": -87.74789428710938,
|
|
"logps/rejected": -325.2353515625,
|
|
"loss": 1.1438,
|
|
"margin_dpo/margin_mean": 70.36641693115234,
|
|
"margin_dpo/margin_std": 111.77685546875,
|
|
"step": 654
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -130.86376953125,
|
|
"KL/mean": -182.77474975585938,
|
|
"KL/rejected_KL_mean": -234.68572998046875,
|
|
"KL/std": 105.52117919921875,
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.005005775019526482,
|
|
"fcm_dpo/delta": -0.12419614940881729,
|
|
"fcm_dpo/margin": 103.82197570800781,
|
|
"fcm_dpo/q_t": 0.38056305050849915,
|
|
"grad_norm": 11.715929985046387,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 0.7341259717941284,
|
|
"logits/rejected": 0.6585381031036377,
|
|
"logps/chosen": -183.0109100341797,
|
|
"logps/ref_chosen": -52.14714813232422,
|
|
"logps/ref_rejected": -80.85014343261719,
|
|
"logps/rejected": -315.535888671875,
|
|
"loss": 1.0018,
|
|
"margin_dpo/margin_mean": 103.82197570800781,
|
|
"margin_dpo/margin_std": 104.50448608398438,
|
|
"step": 655
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -173.95578002929688,
|
|
"KL/mean": -211.63235473632812,
|
|
"KL/rejected_KL_mean": -249.3089599609375,
|
|
"KL/std": 107.24411010742188,
|
|
"epoch": 0.9916855631141346,
|
|
"fcm_dpo/beta": 0.005020740441977978,
|
|
"fcm_dpo/delta": 0.02208440564572811,
|
|
"fcm_dpo/margin": 75.35320281982422,
|
|
"fcm_dpo/q_t": 0.41347581148147583,
|
|
"grad_norm": 10.929963111877441,
|
|
"learning_rate": 1.2586440420372934e-10,
|
|
"logits/chosen": 0.7052562236785889,
|
|
"logits/rejected": 0.6669734120368958,
|
|
"logps/chosen": -247.21249389648438,
|
|
"logps/ref_chosen": -73.25672912597656,
|
|
"logps/ref_rejected": -85.35127258300781,
|
|
"logps/rejected": -334.66021728515625,
|
|
"loss": 1.1259,
|
|
"margin_dpo/margin_mean": 75.35320281982422,
|
|
"margin_dpo/margin_std": 118.24618530273438,
|
|
"step": 656
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.77743530273438,
|
|
"KL/mean": -199.23867797851562,
|
|
"KL/rejected_KL_mean": -245.69992065429688,
|
|
"KL/std": 118.88941955566406,
|
|
"epoch": 0.9931972789115646,
|
|
"fcm_dpo/beta": 0.00500155333429575,
|
|
"fcm_dpo/delta": -0.0662706047296524,
|
|
"fcm_dpo/margin": 92.9224853515625,
|
|
"fcm_dpo/q_t": 0.39535459876060486,
|
|
"grad_norm": 11.326783180236816,
|
|
"learning_rate": 8.740807750345913e-11,
|
|
"logits/chosen": 0.8647462129592896,
|
|
"logits/rejected": 0.7894011735916138,
|
|
"logps/chosen": -202.50082397460938,
|
|
"logps/ref_chosen": -49.72339630126953,
|
|
"logps/ref_rejected": -75.1568603515625,
|
|
"logps/rejected": -320.8567810058594,
|
|
"loss": 1.072,
|
|
"margin_dpo/margin_mean": 92.9224853515625,
|
|
"margin_dpo/margin_std": 130.06622314453125,
|
|
"step": 657
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -163.71214294433594,
|
|
"KL/mean": -201.5113525390625,
|
|
"KL/rejected_KL_mean": -239.31056213378906,
|
|
"KL/std": 122.80596923828125,
|
|
"epoch": 0.9947089947089947,
|
|
"fcm_dpo/beta": 0.005012688226997852,
|
|
"fcm_dpo/delta": 0.021015215665102005,
|
|
"fcm_dpo/margin": 75.59841918945312,
|
|
"fcm_dpo/q_t": 0.4151880145072937,
|
|
"grad_norm": 11.15853500366211,
|
|
"learning_rate": 5.594234322453539e-11,
|
|
"logits/chosen": 0.7942638397216797,
|
|
"logits/rejected": 0.7527487874031067,
|
|
"logps/chosen": -226.75848388671875,
|
|
"logps/ref_chosen": -63.04634094238281,
|
|
"logps/ref_rejected": -83.44963073730469,
|
|
"logps/rejected": -322.76019287109375,
|
|
"loss": 1.1631,
|
|
"margin_dpo/margin_mean": 75.59841918945312,
|
|
"margin_dpo/margin_std": 142.60012817382812,
|
|
"step": 658
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -166.32443237304688,
|
|
"KL/mean": -195.42449951171875,
|
|
"KL/rejected_KL_mean": -224.52456665039062,
|
|
"KL/std": 106.95308685302734,
|
|
"epoch": 0.9962207105064248,
|
|
"fcm_dpo/beta": 0.005004746373742819,
|
|
"fcm_dpo/delta": 0.009378593415021896,
|
|
"fcm_dpo/margin": 58.20014953613281,
|
|
"fcm_dpo/q_t": 0.4330612123012543,
|
|
"grad_norm": 15.631381034851074,
|
|
"learning_rate": 3.146808153123293e-11,
|
|
"logits/chosen": 0.8345963954925537,
|
|
"logits/rejected": 0.7758294343948364,
|
|
"logps/chosen": -221.40463256835938,
|
|
"logps/ref_chosen": -55.0802001953125,
|
|
"logps/ref_rejected": -71.91049194335938,
|
|
"logps/rejected": -296.43505859375,
|
|
"loss": 1.2061,
|
|
"margin_dpo/margin_mean": 58.20014953613281,
|
|
"margin_dpo/margin_std": 121.64048767089844,
|
|
"step": 659
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -156.56204223632812,
|
|
"KL/mean": -204.19027709960938,
|
|
"KL/rejected_KL_mean": -251.81854248046875,
|
|
"KL/std": 115.70323181152344,
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.004977273289114237,
|
|
"fcm_dpo/delta": -0.07590591907501221,
|
|
"fcm_dpo/margin": 95.25648498535156,
|
|
"fcm_dpo/q_t": 0.393707811832428,
|
|
"grad_norm": 13.175061225891113,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 0.8665298223495483,
|
|
"logits/rejected": 0.807388424873352,
|
|
"logps/chosen": -211.08795166015625,
|
|
"logps/ref_chosen": -54.525917053222656,
|
|
"logps/ref_rejected": -81.23604583740234,
|
|
"logps/rejected": -333.0545654296875,
|
|
"loss": 1.0503,
|
|
"margin_dpo/margin_mean": 95.25648498535156,
|
|
"margin_dpo/margin_std": 123.85679626464844,
|
|
"step": 660
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -181.41653442382812,
|
|
"KL/mean": -211.52963256835938,
|
|
"KL/rejected_KL_mean": -241.64276123046875,
|
|
"KL/std": 114.91862487792969,
|
|
"epoch": 0.999244142101285,
|
|
"fcm_dpo/beta": 0.005009702406823635,
|
|
"fcm_dpo/delta": 0.09962660819292068,
|
|
"fcm_dpo/margin": 60.22621536254883,
|
|
"fcm_dpo/q_t": 0.431339293718338,
|
|
"grad_norm": 10.713399887084961,
|
|
"learning_rate": 3.4965187065971735e-12,
|
|
"logits/chosen": 0.747988760471344,
|
|
"logits/rejected": 0.6799809336662292,
|
|
"logps/chosen": -241.78916931152344,
|
|
"logps/ref_chosen": -60.37263870239258,
|
|
"logps/ref_rejected": -77.42874145507812,
|
|
"logps/rejected": -319.071533203125,
|
|
"loss": 1.2091,
|
|
"margin_dpo/margin_mean": 60.226219177246094,
|
|
"margin_dpo/margin_std": 131.71713256835938,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.150458091776959,
|
|
"train_runtime": 1648.8624,
|
|
"train_samples_per_second": 25.676,
|
|
"train_steps_per_second": 0.401
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|