Model: jackf857/qwen3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.85 Source: Original Platform
12705 lines
464 KiB
JSON
12705 lines
464 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 100,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.002980858087539673,
|
|
"fcm_dpo/q_t": 0.5000747442245483,
|
|
"grad_norm": 17.898353576660156,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 1.702779769897461,
|
|
"logits/rejected": 1.6965749263763428,
|
|
"logps/chosen": -80.20932006835938,
|
|
"logps/ref_chosen": -80.27740478515625,
|
|
"logps/ref_rejected": -83.5943374633789,
|
|
"logps/rejected": -83.52326965332031,
|
|
"loss": 1.387,
|
|
"margin_dpo/margin_mean": -0.0029816031455993652,
|
|
"margin_dpo/margin_std": 0.3835117816925049,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.029325395822525024,
|
|
"fcm_dpo/q_t": 0.4992692470550537,
|
|
"grad_norm": 21.484628677368164,
|
|
"learning_rate": 7.462686567164179e-09,
|
|
"logits/chosen": 1.7006168365478516,
|
|
"logits/rejected": 1.6698178052902222,
|
|
"logps/chosen": -74.51097869873047,
|
|
"logps/ref_chosen": -74.56095886230469,
|
|
"logps/ref_rejected": -83.53636169433594,
|
|
"logps/rejected": -83.51570892333984,
|
|
"loss": 1.3839,
|
|
"margin_dpo/margin_mean": 0.029325813055038452,
|
|
"margin_dpo/margin_std": 0.4646317958831787,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0045351473922902496,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.003523111343383789,
|
|
"fcm_dpo/q_t": 0.5000885128974915,
|
|
"grad_norm": 19.935203552246094,
|
|
"learning_rate": 1.4925373134328357e-08,
|
|
"logits/chosen": 1.6261146068572998,
|
|
"logits/rejected": 1.535043716430664,
|
|
"logps/chosen": -82.14225006103516,
|
|
"logps/ref_chosen": -82.1510009765625,
|
|
"logps/ref_rejected": -109.82986450195312,
|
|
"logps/rejected": -109.81758117675781,
|
|
"loss": 1.3871,
|
|
"margin_dpo/margin_mean": -0.0035227537155151367,
|
|
"margin_dpo/margin_std": 0.4260812997817993,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.006046863189720333,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.006996512413024902,
|
|
"fcm_dpo/q_t": 0.5001745223999023,
|
|
"grad_norm": 19.782325744628906,
|
|
"learning_rate": 2.2388059701492534e-08,
|
|
"logits/chosen": 1.766474723815918,
|
|
"logits/rejected": 1.7546875476837158,
|
|
"logps/chosen": -92.36776733398438,
|
|
"logps/ref_chosen": -92.37549591064453,
|
|
"logps/ref_rejected": -99.59553527832031,
|
|
"logps/rejected": -99.580810546875,
|
|
"loss": 1.3875,
|
|
"margin_dpo/margin_mean": -0.00699692964553833,
|
|
"margin_dpo/margin_std": 0.4406859278678894,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03886502981185913,
|
|
"fcm_dpo/q_t": 0.500970721244812,
|
|
"grad_norm": 18.87920570373535,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 1.5482947826385498,
|
|
"logits/rejected": 1.4964426755905151,
|
|
"logps/chosen": -78.91131591796875,
|
|
"logps/ref_chosen": -78.84872436523438,
|
|
"logps/ref_rejected": -97.88040161132812,
|
|
"logps/rejected": -97.90412902832031,
|
|
"loss": 1.3906,
|
|
"margin_dpo/margin_mean": -0.03886544704437256,
|
|
"margin_dpo/margin_std": 0.4082863926887512,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.009070294784580499,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07824429869651794,
|
|
"fcm_dpo/q_t": 0.4980442523956299,
|
|
"grad_norm": 18.059982299804688,
|
|
"learning_rate": 3.731343283582089e-08,
|
|
"logits/chosen": 1.5881304740905762,
|
|
"logits/rejected": 1.4806277751922607,
|
|
"logps/chosen": -68.29608917236328,
|
|
"logps/ref_chosen": -68.34607696533203,
|
|
"logps/ref_rejected": -99.24614715576172,
|
|
"logps/rejected": -99.27439880371094,
|
|
"loss": 1.3789,
|
|
"margin_dpo/margin_mean": 0.07824432849884033,
|
|
"margin_dpo/margin_std": 0.4144379794597626,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010582010582010581,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.07302752137184143,
|
|
"fcm_dpo/q_t": 0.5018250942230225,
|
|
"grad_norm": 17.438974380493164,
|
|
"learning_rate": 4.477611940298507e-08,
|
|
"logits/chosen": 1.4593021869659424,
|
|
"logits/rejected": 1.3967918157577515,
|
|
"logps/chosen": -69.18865966796875,
|
|
"logps/ref_chosen": -69.11282348632812,
|
|
"logps/ref_rejected": -84.01641845703125,
|
|
"logps/rejected": -84.01922607421875,
|
|
"loss": 1.3939,
|
|
"margin_dpo/margin_mean": -0.0730276107788086,
|
|
"margin_dpo/margin_std": 0.36465680599212646,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.012093726379440665,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0639738142490387,
|
|
"fcm_dpo/q_t": 0.5015987157821655,
|
|
"grad_norm": 18.393983840942383,
|
|
"learning_rate": 5.223880597014925e-08,
|
|
"logits/chosen": 1.647634506225586,
|
|
"logits/rejected": 1.6342148780822754,
|
|
"logps/chosen": -78.386474609375,
|
|
"logps/ref_chosen": -78.3912353515625,
|
|
"logps/ref_rejected": -91.06254577636719,
|
|
"logps/rejected": -90.99380493164062,
|
|
"loss": 1.3931,
|
|
"margin_dpo/margin_mean": -0.06397378444671631,
|
|
"margin_dpo/margin_std": 0.3821854591369629,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013605442176870748,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03132617473602295,
|
|
"fcm_dpo/q_t": 0.4992170035839081,
|
|
"grad_norm": 19.418487548828125,
|
|
"learning_rate": 5.970149253731343e-08,
|
|
"logits/chosen": 2.1118154525756836,
|
|
"logits/rejected": 1.8952994346618652,
|
|
"logps/chosen": -69.66719818115234,
|
|
"logps/ref_chosen": -69.67422485351562,
|
|
"logps/ref_rejected": -105.00473022460938,
|
|
"logps/rejected": -105.0290298461914,
|
|
"loss": 1.3836,
|
|
"margin_dpo/margin_mean": 0.03132587671279907,
|
|
"margin_dpo/margin_std": 0.3932754695415497,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.031807154417037964,
|
|
"fcm_dpo/q_t": 0.4992050230503082,
|
|
"grad_norm": 19.023218154907227,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 1.7425557374954224,
|
|
"logits/rejected": 1.6554481983184814,
|
|
"logps/chosen": -79.6943359375,
|
|
"logps/ref_chosen": -79.730712890625,
|
|
"logps/ref_rejected": -105.50645446777344,
|
|
"logps/rejected": -105.50188446044922,
|
|
"loss": 1.3836,
|
|
"margin_dpo/margin_mean": 0.031807392835617065,
|
|
"margin_dpo/margin_std": 0.41680943965911865,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016628873771730914,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.00566980242729187,
|
|
"fcm_dpo/q_t": 0.4998597502708435,
|
|
"grad_norm": 17.280485153198242,
|
|
"learning_rate": 7.462686567164178e-08,
|
|
"logits/chosen": 1.610607385635376,
|
|
"logits/rejected": 1.566218614578247,
|
|
"logps/chosen": -85.4349365234375,
|
|
"logps/ref_chosen": -85.41248321533203,
|
|
"logps/ref_rejected": -86.50241088867188,
|
|
"logps/rejected": -86.53053283691406,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.005669832229614258,
|
|
"margin_dpo/margin_std": 0.40662485361099243,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.018140589569160998,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.045232102274894714,
|
|
"fcm_dpo/q_t": 0.498870313167572,
|
|
"grad_norm": 17.30267906188965,
|
|
"learning_rate": 8.208955223880596e-08,
|
|
"logits/chosen": 1.5188937187194824,
|
|
"logits/rejected": 1.4821337461471558,
|
|
"logps/chosen": -81.39826965332031,
|
|
"logps/ref_chosen": -81.38086700439453,
|
|
"logps/ref_rejected": -89.88151550292969,
|
|
"logps/rejected": -89.94414520263672,
|
|
"loss": 1.3821,
|
|
"margin_dpo/margin_mean": 0.045232415199279785,
|
|
"margin_dpo/margin_std": 0.33078908920288086,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.019652305366591082,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0010968297719955444,
|
|
"fcm_dpo/q_t": 0.49997279047966003,
|
|
"grad_norm": 17.860565185546875,
|
|
"learning_rate": 8.955223880597014e-08,
|
|
"logits/chosen": 1.5909333229064941,
|
|
"logits/rejected": 1.4107434749603271,
|
|
"logps/chosen": -63.136024475097656,
|
|
"logps/ref_chosen": -63.17030715942383,
|
|
"logps/ref_rejected": -105.61166381835938,
|
|
"logps/rejected": -105.57847595214844,
|
|
"loss": 1.3864,
|
|
"margin_dpo/margin_mean": 0.0010965168476104736,
|
|
"margin_dpo/margin_std": 0.3132143020629883,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.021164021164021163,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.025872915983200073,
|
|
"fcm_dpo/q_t": 0.500646710395813,
|
|
"grad_norm": 19.512710571289062,
|
|
"learning_rate": 9.701492537313432e-08,
|
|
"logits/chosen": 1.6522200107574463,
|
|
"logits/rejected": 1.6186612844467163,
|
|
"logps/chosen": -80.70231628417969,
|
|
"logps/ref_chosen": -80.71014404296875,
|
|
"logps/ref_rejected": -89.86041259765625,
|
|
"logps/rejected": -89.82671356201172,
|
|
"loss": 1.3891,
|
|
"margin_dpo/margin_mean": -0.025872111320495605,
|
|
"margin_dpo/margin_std": 0.296722948551178,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.07544693350791931,
|
|
"fcm_dpo/q_t": 0.5018854141235352,
|
|
"grad_norm": 20.62965965270996,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 1.3531144857406616,
|
|
"logits/rejected": 1.2740521430969238,
|
|
"logps/chosen": -82.05131530761719,
|
|
"logps/ref_chosen": -82.00294494628906,
|
|
"logps/ref_rejected": -106.43550109863281,
|
|
"logps/rejected": -106.40841674804688,
|
|
"loss": 1.3941,
|
|
"margin_dpo/margin_mean": -0.07544746994972229,
|
|
"margin_dpo/margin_std": 0.3172140121459961,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02418745275888133,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.014335334300994873,
|
|
"fcm_dpo/q_t": 0.49964210391044617,
|
|
"grad_norm": 17.199363708496094,
|
|
"learning_rate": 1.1194029850746268e-07,
|
|
"logits/chosen": 1.7679736614227295,
|
|
"logits/rejected": 1.6486386060714722,
|
|
"logps/chosen": -62.32301330566406,
|
|
"logps/ref_chosen": -62.308345794677734,
|
|
"logps/ref_rejected": -89.6508560180664,
|
|
"logps/rejected": -89.67985534667969,
|
|
"loss": 1.3851,
|
|
"margin_dpo/margin_mean": 0.01433536410331726,
|
|
"margin_dpo/margin_std": 0.3363468050956726,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.025699168556311415,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0437452495098114,
|
|
"fcm_dpo/q_t": 0.501093327999115,
|
|
"grad_norm": 18.453763961791992,
|
|
"learning_rate": 1.1940298507462686e-07,
|
|
"logits/chosen": 1.6343588829040527,
|
|
"logits/rejected": 1.5999202728271484,
|
|
"logps/chosen": -85.20316314697266,
|
|
"logps/ref_chosen": -85.16903686523438,
|
|
"logps/ref_rejected": -102.57087707519531,
|
|
"logps/rejected": -102.56124877929688,
|
|
"loss": 1.391,
|
|
"margin_dpo/margin_mean": -0.04374605417251587,
|
|
"margin_dpo/margin_std": 0.3627570867538452,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.027210884353741496,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.13690681755542755,
|
|
"fcm_dpo/q_t": 0.496579110622406,
|
|
"grad_norm": 17.045387268066406,
|
|
"learning_rate": 1.2686567164179106e-07,
|
|
"logits/chosen": 1.6750521659851074,
|
|
"logits/rejected": 1.5454905033111572,
|
|
"logps/chosen": -63.13652801513672,
|
|
"logps/ref_chosen": -63.17793273925781,
|
|
"logps/ref_rejected": -86.06461334228516,
|
|
"logps/rejected": -86.16011047363281,
|
|
"loss": 1.373,
|
|
"margin_dpo/margin_mean": 0.13690713047981262,
|
|
"margin_dpo/margin_std": 0.3616068363189697,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02872260015117158,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.039892733097076416,
|
|
"fcm_dpo/q_t": 0.50099778175354,
|
|
"grad_norm": 19.78537940979004,
|
|
"learning_rate": 1.343283582089552e-07,
|
|
"logits/chosen": 1.9965240955352783,
|
|
"logits/rejected": 1.98760986328125,
|
|
"logps/chosen": -85.86576080322266,
|
|
"logps/ref_chosen": -85.82405853271484,
|
|
"logps/ref_rejected": -100.07136535644531,
|
|
"logps/rejected": -100.07318115234375,
|
|
"loss": 1.3907,
|
|
"margin_dpo/margin_mean": -0.03989291191101074,
|
|
"margin_dpo/margin_std": 0.39928844571113586,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.046687304973602295,
|
|
"fcm_dpo/q_t": 0.5011659860610962,
|
|
"grad_norm": 18.191457748413086,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 1.9364959001541138,
|
|
"logits/rejected": 1.8560017347335815,
|
|
"logps/chosen": -73.64295959472656,
|
|
"logps/ref_chosen": -73.58621215820312,
|
|
"logps/ref_rejected": -91.21690368652344,
|
|
"logps/rejected": -91.22695922851562,
|
|
"loss": 1.3913,
|
|
"margin_dpo/margin_mean": -0.046687573194503784,
|
|
"margin_dpo/margin_std": 0.36182230710983276,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08522829413414001,
|
|
"fcm_dpo/q_t": 0.4978693723678589,
|
|
"grad_norm": 18.132219314575195,
|
|
"learning_rate": 1.4925373134328355e-07,
|
|
"logits/chosen": 1.9595677852630615,
|
|
"logits/rejected": 1.8412469625473022,
|
|
"logps/chosen": -81.89152526855469,
|
|
"logps/ref_chosen": -81.97251892089844,
|
|
"logps/ref_rejected": -98.05976867675781,
|
|
"logps/rejected": -98.06401062011719,
|
|
"loss": 1.3783,
|
|
"margin_dpo/margin_mean": 0.08522748947143555,
|
|
"margin_dpo/margin_std": 0.4540433883666992,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.03325774754346183,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.024463504552841187,
|
|
"fcm_dpo/q_t": 0.49939069151878357,
|
|
"grad_norm": 18.27927017211914,
|
|
"learning_rate": 1.5671641791044775e-07,
|
|
"logits/chosen": 1.7110607624053955,
|
|
"logits/rejected": 1.6684741973876953,
|
|
"logps/chosen": -76.95679473876953,
|
|
"logps/ref_chosen": -76.99579620361328,
|
|
"logps/ref_rejected": -95.76089477539062,
|
|
"logps/rejected": -95.74636840820312,
|
|
"loss": 1.3844,
|
|
"margin_dpo/margin_mean": 0.024462968111038208,
|
|
"margin_dpo/margin_std": 0.4479817748069763,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.03476946334089191,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0017972886562347412,
|
|
"fcm_dpo/q_t": 0.5000447630882263,
|
|
"grad_norm": 19.142587661743164,
|
|
"learning_rate": 1.6417910447761193e-07,
|
|
"logits/chosen": 2.1720407009124756,
|
|
"logits/rejected": 2.068018913269043,
|
|
"logps/chosen": -84.75054168701172,
|
|
"logps/ref_chosen": -84.76856994628906,
|
|
"logps/ref_rejected": -107.28266906738281,
|
|
"logps/rejected": -107.2628402709961,
|
|
"loss": 1.3869,
|
|
"margin_dpo/margin_mean": -0.0017971396446228027,
|
|
"margin_dpo/margin_std": 0.3923270106315613,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.036281179138321996,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.053469717502593994,
|
|
"fcm_dpo/q_t": 0.49866342544555664,
|
|
"grad_norm": 17.15791893005371,
|
|
"learning_rate": 1.716417910447761e-07,
|
|
"logits/chosen": 1.7385156154632568,
|
|
"logits/rejected": 1.6794748306274414,
|
|
"logps/chosen": -69.82743835449219,
|
|
"logps/ref_chosen": -69.87112426757812,
|
|
"logps/ref_rejected": -84.02084350585938,
|
|
"logps/rejected": -84.03063201904297,
|
|
"loss": 1.3813,
|
|
"margin_dpo/margin_mean": 0.053469330072402954,
|
|
"margin_dpo/margin_std": 0.37825846672058105,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.009903967380523682,
|
|
"fcm_dpo/q_t": 0.4997522532939911,
|
|
"grad_norm": 19.575454711914062,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 2.062734603881836,
|
|
"logits/rejected": 1.9006829261779785,
|
|
"logps/chosen": -78.26441192626953,
|
|
"logps/ref_chosen": -78.22694396972656,
|
|
"logps/ref_rejected": -106.65234375,
|
|
"logps/rejected": -106.69970703125,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.009904235601425171,
|
|
"margin_dpo/margin_std": 0.4116858243942261,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.039304610733182165,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07625684142112732,
|
|
"fcm_dpo/q_t": 0.4980948865413666,
|
|
"grad_norm": 17.888681411743164,
|
|
"learning_rate": 1.8656716417910447e-07,
|
|
"logits/chosen": 2.0140490531921387,
|
|
"logits/rejected": 1.9844526052474976,
|
|
"logps/chosen": -74.57620239257812,
|
|
"logps/ref_chosen": -74.59750366210938,
|
|
"logps/ref_rejected": -93.57858276367188,
|
|
"logps/rejected": -93.63352966308594,
|
|
"loss": 1.3791,
|
|
"margin_dpo/margin_mean": 0.07625627517700195,
|
|
"margin_dpo/margin_std": 0.4107317328453064,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.04081632653061224,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09230369329452515,
|
|
"fcm_dpo/q_t": 0.4976937770843506,
|
|
"grad_norm": 18.47425079345703,
|
|
"learning_rate": 1.9402985074626865e-07,
|
|
"logits/chosen": 1.786578893661499,
|
|
"logits/rejected": 1.7295043468475342,
|
|
"logps/chosen": -78.6236801147461,
|
|
"logps/ref_chosen": -78.64625549316406,
|
|
"logps/ref_rejected": -92.33645629882812,
|
|
"logps/rejected": -92.40618896484375,
|
|
"loss": 1.3774,
|
|
"margin_dpo/margin_mean": 0.09230378270149231,
|
|
"margin_dpo/margin_std": 0.32660043239593506,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.042328042328042326,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06432390213012695,
|
|
"fcm_dpo/q_t": 0.5016065835952759,
|
|
"grad_norm": 18.158647537231445,
|
|
"learning_rate": 2.0149253731343282e-07,
|
|
"logits/chosen": 1.6356275081634521,
|
|
"logits/rejected": 1.585396409034729,
|
|
"logps/chosen": -76.95513153076172,
|
|
"logps/ref_chosen": -76.91271209716797,
|
|
"logps/ref_rejected": -88.48194885253906,
|
|
"logps/rejected": -88.46004486083984,
|
|
"loss": 1.3931,
|
|
"margin_dpo/margin_mean": -0.06432461738586426,
|
|
"margin_dpo/margin_std": 0.35822808742523193,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.04383975812547241,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.002993851900100708,
|
|
"fcm_dpo/q_t": 0.49992460012435913,
|
|
"grad_norm": 21.283586502075195,
|
|
"learning_rate": 2.08955223880597e-07,
|
|
"logits/chosen": 1.913273811340332,
|
|
"logits/rejected": 1.850356936454773,
|
|
"logps/chosen": -89.58018493652344,
|
|
"logps/ref_chosen": -89.62060546875,
|
|
"logps/ref_rejected": -100.57090759277344,
|
|
"logps/rejected": -100.53347778320312,
|
|
"loss": 1.3864,
|
|
"margin_dpo/margin_mean": 0.002994030714035034,
|
|
"margin_dpo/margin_std": 0.4085092842578888,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.029388487339019775,
|
|
"fcm_dpo/q_t": 0.4992656707763672,
|
|
"grad_norm": 18.88498306274414,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 1.961219072341919,
|
|
"logits/rejected": 1.7812705039978027,
|
|
"logps/chosen": -68.8134536743164,
|
|
"logps/ref_chosen": -68.82381439208984,
|
|
"logps/ref_rejected": -104.7047119140625,
|
|
"logps/rejected": -104.72373962402344,
|
|
"loss": 1.3838,
|
|
"margin_dpo/margin_mean": 0.029387563467025757,
|
|
"margin_dpo/margin_std": 0.44134002923965454,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04686318972033258,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.04079049825668335,
|
|
"fcm_dpo/q_t": 0.5010193586349487,
|
|
"grad_norm": 21.307546615600586,
|
|
"learning_rate": 2.2388059701492537e-07,
|
|
"logits/chosen": 1.6620802879333496,
|
|
"logits/rejected": 1.5458331108093262,
|
|
"logps/chosen": -86.08538818359375,
|
|
"logps/ref_chosen": -86.06916809082031,
|
|
"logps/ref_rejected": -116.66394805908203,
|
|
"logps/rejected": -116.63937377929688,
|
|
"loss": 1.3908,
|
|
"margin_dpo/margin_mean": -0.04079073667526245,
|
|
"margin_dpo/margin_std": 0.4178071618080139,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04837490551776266,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04694744944572449,
|
|
"fcm_dpo/q_t": 0.4988267421722412,
|
|
"grad_norm": 18.534250259399414,
|
|
"learning_rate": 2.3134328358208954e-07,
|
|
"logits/chosen": 1.7644070386886597,
|
|
"logits/rejected": 1.810452938079834,
|
|
"logps/chosen": -87.56648254394531,
|
|
"logps/ref_chosen": -87.59808349609375,
|
|
"logps/ref_rejected": -100.26905822753906,
|
|
"logps/rejected": -100.28439331054688,
|
|
"loss": 1.3821,
|
|
"margin_dpo/margin_mean": 0.04694738984107971,
|
|
"margin_dpo/margin_std": 0.4298707842826843,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.049886621315192746,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.006792932748794556,
|
|
"fcm_dpo/q_t": 0.4998300075531006,
|
|
"grad_norm": 19.715877532958984,
|
|
"learning_rate": 2.388059701492537e-07,
|
|
"logits/chosen": 1.1110432147979736,
|
|
"logits/rejected": 1.0335161685943604,
|
|
"logps/chosen": -83.32142639160156,
|
|
"logps/ref_chosen": -83.29850769042969,
|
|
"logps/ref_rejected": -94.60990142822266,
|
|
"logps/rejected": -94.63961791992188,
|
|
"loss": 1.386,
|
|
"margin_dpo/margin_mean": 0.006792932748794556,
|
|
"margin_dpo/margin_std": 0.4145265221595764,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.05139833711262283,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.016542896628379822,
|
|
"fcm_dpo/q_t": 0.5004134178161621,
|
|
"grad_norm": 18.04196548461914,
|
|
"learning_rate": 2.4626865671641786e-07,
|
|
"logits/chosen": 2.0257890224456787,
|
|
"logits/rejected": 1.9314777851104736,
|
|
"logps/chosen": -70.14801788330078,
|
|
"logps/ref_chosen": -70.15069580078125,
|
|
"logps/ref_rejected": -84.4693832397461,
|
|
"logps/rejected": -84.45014953613281,
|
|
"loss": 1.3883,
|
|
"margin_dpo/margin_mean": -0.016543224453926086,
|
|
"margin_dpo/margin_std": 0.35318687558174133,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.14254692196846008,
|
|
"fcm_dpo/q_t": 0.4964386820793152,
|
|
"grad_norm": 17.937305450439453,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 1.6588869094848633,
|
|
"logits/rejected": 1.600965142250061,
|
|
"logps/chosen": -78.1865005493164,
|
|
"logps/ref_chosen": -78.25238037109375,
|
|
"logps/ref_rejected": -91.06356811523438,
|
|
"logps/rejected": -91.14024353027344,
|
|
"loss": 1.3725,
|
|
"margin_dpo/margin_mean": 0.14254716038703918,
|
|
"margin_dpo/margin_std": 0.4224141240119934,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05442176870748299,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.004450559616088867,
|
|
"fcm_dpo/q_t": 0.5001120567321777,
|
|
"grad_norm": 18.370346069335938,
|
|
"learning_rate": 2.611940298507462e-07,
|
|
"logits/chosen": 1.8511872291564941,
|
|
"logits/rejected": 1.7305129766464233,
|
|
"logps/chosen": -67.08213806152344,
|
|
"logps/ref_chosen": -67.06676483154297,
|
|
"logps/ref_rejected": -99.34661865234375,
|
|
"logps/rejected": -99.35753631591797,
|
|
"loss": 1.387,
|
|
"margin_dpo/margin_mean": -0.004450619220733643,
|
|
"margin_dpo/margin_std": 0.3298412561416626,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.055933484504913075,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03487496078014374,
|
|
"fcm_dpo/q_t": 0.500869870185852,
|
|
"grad_norm": 23.71449851989746,
|
|
"learning_rate": 2.686567164179104e-07,
|
|
"logits/chosen": 1.983677864074707,
|
|
"logits/rejected": 1.6817830801010132,
|
|
"logps/chosen": -75.88996124267578,
|
|
"logps/ref_chosen": -75.9269790649414,
|
|
"logps/ref_rejected": -130.34371948242188,
|
|
"logps/rejected": -130.27182006835938,
|
|
"loss": 1.3903,
|
|
"margin_dpo/margin_mean": -0.03487536311149597,
|
|
"margin_dpo/margin_std": 0.440315306186676,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.05744520030234316,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.008413180708885193,
|
|
"fcm_dpo/q_t": 0.5002103447914124,
|
|
"grad_norm": 18.42804718017578,
|
|
"learning_rate": 2.761194029850746e-07,
|
|
"logits/chosen": 1.5153725147247314,
|
|
"logits/rejected": 1.4806842803955078,
|
|
"logps/chosen": -83.677978515625,
|
|
"logps/ref_chosen": -83.65460205078125,
|
|
"logps/ref_rejected": -89.15221405029297,
|
|
"logps/rejected": -89.16717529296875,
|
|
"loss": 1.3877,
|
|
"margin_dpo/margin_mean": -0.008413195610046387,
|
|
"margin_dpo/margin_std": 0.4714970588684082,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05895691609977324,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06950554251670837,
|
|
"fcm_dpo/q_t": 0.4982631206512451,
|
|
"grad_norm": 19.082138061523438,
|
|
"learning_rate": 2.8358208955223876e-07,
|
|
"logits/chosen": 1.8353855609893799,
|
|
"logits/rejected": 1.793013572692871,
|
|
"logps/chosen": -76.090087890625,
|
|
"logps/ref_chosen": -76.18706512451172,
|
|
"logps/ref_rejected": -94.39262390136719,
|
|
"logps/rejected": -94.36514282226562,
|
|
"loss": 1.3797,
|
|
"margin_dpo/margin_mean": 0.06950537860393524,
|
|
"margin_dpo/margin_std": 0.35339751839637756,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.007292389869689941,
|
|
"fcm_dpo/q_t": 0.5001822710037231,
|
|
"grad_norm": 18.086139678955078,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 1.9194378852844238,
|
|
"logits/rejected": 1.8017828464508057,
|
|
"logps/chosen": -77.4515609741211,
|
|
"logps/ref_chosen": -77.43476867675781,
|
|
"logps/ref_rejected": -98.58720397949219,
|
|
"logps/rejected": -98.5967025756836,
|
|
"loss": 1.3874,
|
|
"margin_dpo/margin_mean": -0.00729215145111084,
|
|
"margin_dpo/margin_std": 0.35634469985961914,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06198034769463341,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09080618619918823,
|
|
"fcm_dpo/q_t": 0.4977305829524994,
|
|
"grad_norm": 18.268693923950195,
|
|
"learning_rate": 2.985074626865671e-07,
|
|
"logits/chosen": 1.5746355056762695,
|
|
"logits/rejected": 1.5183868408203125,
|
|
"logps/chosen": -86.79508972167969,
|
|
"logps/ref_chosen": -86.87640380859375,
|
|
"logps/ref_rejected": -101.0856704711914,
|
|
"logps/rejected": -101.09515380859375,
|
|
"loss": 1.3777,
|
|
"margin_dpo/margin_mean": 0.09080681204795837,
|
|
"margin_dpo/margin_std": 0.4117854833602905,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.038176313042640686,
|
|
"fcm_dpo/q_t": 0.49904680252075195,
|
|
"grad_norm": 18.118419647216797,
|
|
"learning_rate": 3.059701492537313e-07,
|
|
"logits/chosen": 1.7109103202819824,
|
|
"logits/rejected": 1.6621713638305664,
|
|
"logps/chosen": -79.33087158203125,
|
|
"logps/ref_chosen": -79.35625457763672,
|
|
"logps/ref_rejected": -91.54881286621094,
|
|
"logps/rejected": -91.56159973144531,
|
|
"loss": 1.3829,
|
|
"margin_dpo/margin_mean": 0.03817671537399292,
|
|
"margin_dpo/margin_std": 0.4199693202972412,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.06500377928949358,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.049236446619033813,
|
|
"fcm_dpo/q_t": 0.5012304782867432,
|
|
"grad_norm": 19.544836044311523,
|
|
"learning_rate": 3.134328358208955e-07,
|
|
"logits/chosen": 1.7505764961242676,
|
|
"logits/rejected": 1.6548776626586914,
|
|
"logps/chosen": -90.8389892578125,
|
|
"logps/ref_chosen": -90.81220245361328,
|
|
"logps/ref_rejected": -94.16316986083984,
|
|
"logps/rejected": -94.1407241821289,
|
|
"loss": 1.3918,
|
|
"margin_dpo/margin_mean": -0.0492367148399353,
|
|
"margin_dpo/margin_std": 0.46574994921684265,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06651549508692366,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.14597035944461823,
|
|
"fcm_dpo/q_t": 0.4963557720184326,
|
|
"grad_norm": 18.76498794555664,
|
|
"learning_rate": 3.2089552238805965e-07,
|
|
"logits/chosen": 1.420806884765625,
|
|
"logits/rejected": 1.3459522724151611,
|
|
"logps/chosen": -88.20413208007812,
|
|
"logps/ref_chosen": -88.27932739257812,
|
|
"logps/ref_rejected": -101.14324951171875,
|
|
"logps/rejected": -101.21401977539062,
|
|
"loss": 1.3724,
|
|
"margin_dpo/margin_mean": 0.14597000181674957,
|
|
"margin_dpo/margin_std": 0.48067185282707214,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08005937933921814,
|
|
"fcm_dpo/q_t": 0.49800071120262146,
|
|
"grad_norm": 19.359880447387695,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 1.6777125597000122,
|
|
"logits/rejected": 1.5594508647918701,
|
|
"logps/chosen": -78.37330627441406,
|
|
"logps/ref_chosen": -78.40264892578125,
|
|
"logps/ref_rejected": -109.39339447021484,
|
|
"logps/rejected": -109.4441146850586,
|
|
"loss": 1.3788,
|
|
"margin_dpo/margin_mean": 0.08005967736244202,
|
|
"margin_dpo/margin_std": 0.43401455879211426,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06953892668178382,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09679755568504333,
|
|
"fcm_dpo/q_t": 0.497580885887146,
|
|
"grad_norm": 18.390920639038086,
|
|
"learning_rate": 3.3582089552238805e-07,
|
|
"logits/chosen": 1.4970589876174927,
|
|
"logits/rejected": 1.38013756275177,
|
|
"logps/chosen": -77.99214172363281,
|
|
"logps/ref_chosen": -78.08491516113281,
|
|
"logps/ref_rejected": -97.42544555664062,
|
|
"logps/rejected": -97.42948150634766,
|
|
"loss": 1.3769,
|
|
"margin_dpo/margin_mean": 0.09679737687110901,
|
|
"margin_dpo/margin_std": 0.3261662721633911,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0710506424792139,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.010644227266311646,
|
|
"fcm_dpo/q_t": 0.499734491109848,
|
|
"grad_norm": 19.139677047729492,
|
|
"learning_rate": 3.432835820895522e-07,
|
|
"logits/chosen": 1.5834190845489502,
|
|
"logits/rejected": 1.5048835277557373,
|
|
"logps/chosen": -70.7773208618164,
|
|
"logps/ref_chosen": -70.78988647460938,
|
|
"logps/ref_rejected": -91.17266845703125,
|
|
"logps/rejected": -91.17074584960938,
|
|
"loss": 1.3856,
|
|
"margin_dpo/margin_mean": 0.010644763708114624,
|
|
"margin_dpo/margin_std": 0.3661983013153076,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07256235827664399,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08006027340888977,
|
|
"fcm_dpo/q_t": 0.4979988932609558,
|
|
"grad_norm": 17.098857879638672,
|
|
"learning_rate": 3.507462686567164e-07,
|
|
"logits/chosen": 1.9507906436920166,
|
|
"logits/rejected": 1.8869541883468628,
|
|
"logps/chosen": -66.59869384765625,
|
|
"logps/ref_chosen": -66.67327880859375,
|
|
"logps/ref_rejected": -79.28543853759766,
|
|
"logps/rejected": -79.29090881347656,
|
|
"loss": 1.3786,
|
|
"margin_dpo/margin_mean": 0.08006066083908081,
|
|
"margin_dpo/margin_std": 0.3583065867424011,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07407407407407407,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04423801600933075,
|
|
"fcm_dpo/q_t": 0.49889448285102844,
|
|
"grad_norm": 17.491674423217773,
|
|
"learning_rate": 3.5820895522388055e-07,
|
|
"logits/chosen": 1.35086190700531,
|
|
"logits/rejected": 1.3094708919525146,
|
|
"logps/chosen": -75.12703704833984,
|
|
"logps/ref_chosen": -75.17504119873047,
|
|
"logps/ref_rejected": -80.5369873046875,
|
|
"logps/rejected": -80.5332260131836,
|
|
"loss": 1.3822,
|
|
"margin_dpo/margin_mean": 0.04423774778842926,
|
|
"margin_dpo/margin_std": 0.36896711587905884,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03308814764022827,
|
|
"fcm_dpo/q_t": 0.5008265972137451,
|
|
"grad_norm": 17.986473083496094,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 1.7103403806686401,
|
|
"logits/rejected": 1.6472971439361572,
|
|
"logps/chosen": -71.20738220214844,
|
|
"logps/ref_chosen": -71.2314224243164,
|
|
"logps/ref_rejected": -87.59088134765625,
|
|
"logps/rejected": -87.53375244140625,
|
|
"loss": 1.3899,
|
|
"margin_dpo/margin_mean": -0.03308817744255066,
|
|
"margin_dpo/margin_std": 0.33914172649383545,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07709750566893424,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.07528911530971527,
|
|
"fcm_dpo/q_t": 0.5018813610076904,
|
|
"grad_norm": 18.881925582885742,
|
|
"learning_rate": 3.7313432835820895e-07,
|
|
"logits/chosen": 1.7805354595184326,
|
|
"logits/rejected": 1.7294878959655762,
|
|
"logps/chosen": -78.7321548461914,
|
|
"logps/ref_chosen": -78.69171142578125,
|
|
"logps/ref_rejected": -100.78950500488281,
|
|
"logps/rejected": -100.75465393066406,
|
|
"loss": 1.3941,
|
|
"margin_dpo/margin_mean": -0.07528868317604065,
|
|
"margin_dpo/margin_std": 0.34039679169654846,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.07860922146636433,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.036653727293014526,
|
|
"fcm_dpo/q_t": 0.4990845322608948,
|
|
"grad_norm": 20.350269317626953,
|
|
"learning_rate": 3.805970149253731e-07,
|
|
"logits/chosen": 1.852992296218872,
|
|
"logits/rejected": 1.678769588470459,
|
|
"logps/chosen": -89.09419250488281,
|
|
"logps/ref_chosen": -89.09419250488281,
|
|
"logps/ref_rejected": -116.87469482421875,
|
|
"logps/rejected": -116.91134643554688,
|
|
"loss": 1.3831,
|
|
"margin_dpo/margin_mean": 0.03665390610694885,
|
|
"margin_dpo/margin_std": 0.44389206171035767,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.0801209372637944,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07223968207836151,
|
|
"fcm_dpo/q_t": 0.498193621635437,
|
|
"grad_norm": 17.331947326660156,
|
|
"learning_rate": 3.880597014925373e-07,
|
|
"logits/chosen": 1.4440419673919678,
|
|
"logits/rejected": 1.405485987663269,
|
|
"logps/chosen": -74.11995697021484,
|
|
"logps/ref_chosen": -74.21418762207031,
|
|
"logps/ref_rejected": -75.71168518066406,
|
|
"logps/rejected": -75.68968200683594,
|
|
"loss": 1.3796,
|
|
"margin_dpo/margin_mean": 0.07224002480506897,
|
|
"margin_dpo/margin_std": 0.4341847598552704,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.08163265306122448,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0331706702709198,
|
|
"fcm_dpo/q_t": 0.49917101860046387,
|
|
"grad_norm": 16.379093170166016,
|
|
"learning_rate": 3.9552238805970144e-07,
|
|
"logits/chosen": 1.6438074111938477,
|
|
"logits/rejected": 1.6267582178115845,
|
|
"logps/chosen": -65.57046508789062,
|
|
"logps/ref_chosen": -65.63475799560547,
|
|
"logps/ref_rejected": -76.4462890625,
|
|
"logps/rejected": -76.41516876220703,
|
|
"loss": 1.3834,
|
|
"margin_dpo/margin_mean": 0.03317078948020935,
|
|
"margin_dpo/margin_std": 0.4223175644874573,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.19592681527137756,
|
|
"fcm_dpo/q_t": 0.49510544538497925,
|
|
"grad_norm": 19.32520294189453,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 1.6696865558624268,
|
|
"logits/rejected": 1.4560625553131104,
|
|
"logps/chosen": -68.66702270507812,
|
|
"logps/ref_chosen": -68.7640380859375,
|
|
"logps/ref_rejected": -108.80074310302734,
|
|
"logps/rejected": -108.899658203125,
|
|
"loss": 1.3673,
|
|
"margin_dpo/margin_mean": 0.1959269940853119,
|
|
"margin_dpo/margin_std": 0.4270592927932739,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08465608465608465,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09722763299942017,
|
|
"fcm_dpo/q_t": 0.49757248163223267,
|
|
"grad_norm": 16.941253662109375,
|
|
"learning_rate": 4.1044776119402984e-07,
|
|
"logits/chosen": 1.7606887817382812,
|
|
"logits/rejected": 1.7339457273483276,
|
|
"logps/chosen": -74.73141479492188,
|
|
"logps/ref_chosen": -74.7939453125,
|
|
"logps/ref_rejected": -81.83535766601562,
|
|
"logps/rejected": -81.87005615234375,
|
|
"loss": 1.3771,
|
|
"margin_dpo/margin_mean": 0.09722745418548584,
|
|
"margin_dpo/margin_std": 0.46504199504852295,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08616780045351474,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.18711082637310028,
|
|
"fcm_dpo/q_t": 0.49532490968704224,
|
|
"grad_norm": 18.977163314819336,
|
|
"learning_rate": 4.17910447761194e-07,
|
|
"logits/chosen": 1.6965608596801758,
|
|
"logits/rejected": 1.528353214263916,
|
|
"logps/chosen": -74.48811340332031,
|
|
"logps/ref_chosen": -74.5794677734375,
|
|
"logps/ref_rejected": -105.61981964111328,
|
|
"logps/rejected": -105.715576171875,
|
|
"loss": 1.3684,
|
|
"margin_dpo/margin_mean": 0.18711179494857788,
|
|
"margin_dpo/margin_std": 0.5311607122421265,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08767951625094482,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.040792107582092285,
|
|
"fcm_dpo/q_t": 0.49898386001586914,
|
|
"grad_norm": 19.24970054626465,
|
|
"learning_rate": 4.253731343283582e-07,
|
|
"logits/chosen": 1.5790772438049316,
|
|
"logits/rejected": 1.50129234790802,
|
|
"logps/chosen": -92.24163055419922,
|
|
"logps/ref_chosen": -92.24464416503906,
|
|
"logps/ref_rejected": -103.18975830078125,
|
|
"logps/rejected": -103.22753143310547,
|
|
"loss": 1.383,
|
|
"margin_dpo/margin_mean": 0.040792256593704224,
|
|
"margin_dpo/margin_std": 0.5367269515991211,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08919123204837491,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.12307733297348022,
|
|
"fcm_dpo/q_t": 0.4969256520271301,
|
|
"grad_norm": 16.87409782409668,
|
|
"learning_rate": 4.3283582089552234e-07,
|
|
"logits/chosen": 1.8218092918395996,
|
|
"logits/rejected": 1.5633070468902588,
|
|
"logps/chosen": -67.01643371582031,
|
|
"logps/ref_chosen": -67.12688446044922,
|
|
"logps/ref_rejected": -91.69569396972656,
|
|
"logps/rejected": -91.70832824707031,
|
|
"loss": 1.3747,
|
|
"margin_dpo/margin_mean": 0.12307757139205933,
|
|
"margin_dpo/margin_std": 0.4987383782863617,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.026643604040145874,
|
|
"fcm_dpo/q_t": 0.4993351995944977,
|
|
"grad_norm": 18.580293655395508,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 1.6838085651397705,
|
|
"logits/rejected": 1.705696940422058,
|
|
"logps/chosen": -79.6468505859375,
|
|
"logps/ref_chosen": -79.74327087402344,
|
|
"logps/ref_rejected": -77.89244079589844,
|
|
"logps/rejected": -77.82266235351562,
|
|
"loss": 1.3845,
|
|
"margin_dpo/margin_mean": 0.02664312720298767,
|
|
"margin_dpo/margin_std": 0.5903670787811279,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.09221466364323508,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.057510122656822205,
|
|
"fcm_dpo/q_t": 0.49856314063072205,
|
|
"grad_norm": 16.548521041870117,
|
|
"learning_rate": 4.4776119402985074e-07,
|
|
"logits/chosen": 1.6789517402648926,
|
|
"logits/rejected": 1.6414930820465088,
|
|
"logps/chosen": -65.96928405761719,
|
|
"logps/ref_chosen": -66.08685302734375,
|
|
"logps/ref_rejected": -88.1458740234375,
|
|
"logps/rejected": -88.0858154296875,
|
|
"loss": 1.3813,
|
|
"margin_dpo/margin_mean": 0.05751065909862518,
|
|
"margin_dpo/margin_std": 0.5422056913375854,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09372637944066516,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08958558738231659,
|
|
"fcm_dpo/q_t": 0.49775969982147217,
|
|
"grad_norm": 18.009490966796875,
|
|
"learning_rate": 4.552238805970149e-07,
|
|
"logits/chosen": 2.040876865386963,
|
|
"logits/rejected": 1.9636160135269165,
|
|
"logps/chosen": -80.92247009277344,
|
|
"logps/ref_chosen": -81.0108871459961,
|
|
"logps/ref_rejected": -95.50444793701172,
|
|
"logps/rejected": -95.50562286376953,
|
|
"loss": 1.378,
|
|
"margin_dpo/margin_mean": 0.08958582580089569,
|
|
"margin_dpo/margin_std": 0.5040308833122253,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.16455836594104767,
|
|
"fcm_dpo/q_t": 0.49589046835899353,
|
|
"grad_norm": 19.401832580566406,
|
|
"learning_rate": 4.626865671641791e-07,
|
|
"logits/chosen": 2.1442782878875732,
|
|
"logits/rejected": 2.062937021255493,
|
|
"logps/chosen": -78.39234924316406,
|
|
"logps/ref_chosen": -78.57593536376953,
|
|
"logps/ref_rejected": -99.71000671386719,
|
|
"logps/rejected": -99.69097900390625,
|
|
"loss": 1.3706,
|
|
"margin_dpo/margin_mean": 0.16455818712711334,
|
|
"margin_dpo/margin_std": 0.5183212757110596,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09674981103552532,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.019572198390960693,
|
|
"fcm_dpo/q_t": 0.4995095133781433,
|
|
"grad_norm": 16.533632278442383,
|
|
"learning_rate": 4.701492537313433e-07,
|
|
"logits/chosen": 1.6818785667419434,
|
|
"logits/rejected": 1.6190211772918701,
|
|
"logps/chosen": -69.181396484375,
|
|
"logps/ref_chosen": -69.24063110351562,
|
|
"logps/ref_rejected": -84.14842987060547,
|
|
"logps/rejected": -84.1087646484375,
|
|
"loss": 1.3849,
|
|
"margin_dpo/margin_mean": 0.019572317600250244,
|
|
"margin_dpo/margin_std": 0.45630577206611633,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09427054226398468,
|
|
"fcm_dpo/q_t": 0.4976460933685303,
|
|
"grad_norm": 18.83917236328125,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 1.6213884353637695,
|
|
"logits/rejected": 1.5737123489379883,
|
|
"logps/chosen": -83.97723388671875,
|
|
"logps/ref_chosen": -84.0351333618164,
|
|
"logps/ref_rejected": -96.42926788330078,
|
|
"logps/rejected": -96.46564483642578,
|
|
"loss": 1.3777,
|
|
"margin_dpo/margin_mean": 0.09427036345005035,
|
|
"margin_dpo/margin_std": 0.5586059093475342,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09977324263038549,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.17929774522781372,
|
|
"fcm_dpo/q_t": 0.49552321434020996,
|
|
"grad_norm": 18.57543182373047,
|
|
"learning_rate": 4.850746268656717e-07,
|
|
"logits/chosen": 1.5354599952697754,
|
|
"logits/rejected": 1.4523582458496094,
|
|
"logps/chosen": -87.74684143066406,
|
|
"logps/ref_chosen": -87.79238891601562,
|
|
"logps/ref_rejected": -95.26547241210938,
|
|
"logps/rejected": -95.39921569824219,
|
|
"loss": 1.3693,
|
|
"margin_dpo/margin_mean": 0.17929738759994507,
|
|
"margin_dpo/margin_std": 0.5706717371940613,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.10128495842781557,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.16508522629737854,
|
|
"fcm_dpo/q_t": 0.4958776831626892,
|
|
"grad_norm": 19.095687866210938,
|
|
"learning_rate": 4.925373134328357e-07,
|
|
"logits/chosen": 1.5590996742248535,
|
|
"logits/rejected": 1.4503483772277832,
|
|
"logps/chosen": -77.86262512207031,
|
|
"logps/ref_chosen": -78.00114440917969,
|
|
"logps/ref_rejected": -96.03421020507812,
|
|
"logps/rejected": -96.06077575683594,
|
|
"loss": 1.3704,
|
|
"margin_dpo/margin_mean": 0.16508588194847107,
|
|
"margin_dpo/margin_std": 0.4702576994895935,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.10279667422524566,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09334829449653625,
|
|
"fcm_dpo/q_t": 0.4976690411567688,
|
|
"grad_norm": 19.808475494384766,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 1.5709608793258667,
|
|
"logits/rejected": 1.4888055324554443,
|
|
"logps/chosen": -96.04895782470703,
|
|
"logps/ref_chosen": -96.04267883300781,
|
|
"logps/ref_rejected": -110.91169738769531,
|
|
"logps/rejected": -111.01132202148438,
|
|
"loss": 1.3779,
|
|
"margin_dpo/margin_mean": 0.09334835410118103,
|
|
"margin_dpo/margin_std": 0.6071600914001465,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.10430839002267574,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.22511988878250122,
|
|
"fcm_dpo/q_t": 0.4943769872188568,
|
|
"grad_norm": 19.941877365112305,
|
|
"learning_rate": 4.999965034812934e-07,
|
|
"logits/chosen": 1.3999309539794922,
|
|
"logits/rejected": 1.2983310222625732,
|
|
"logps/chosen": -84.92649841308594,
|
|
"logps/ref_chosen": -85.11124420166016,
|
|
"logps/ref_rejected": -107.57357025146484,
|
|
"logps/rejected": -107.61393737792969,
|
|
"loss": 1.3647,
|
|
"margin_dpo/margin_mean": 0.2251199185848236,
|
|
"margin_dpo/margin_std": 0.542759895324707,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1886361539363861,
|
|
"fcm_dpo/q_t": 0.4952925741672516,
|
|
"grad_norm": 18.396041870117188,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 1.8381619453430176,
|
|
"logits/rejected": 1.7834123373031616,
|
|
"logps/chosen": -81.65312194824219,
|
|
"logps/ref_chosen": -81.87960815429688,
|
|
"logps/ref_rejected": -92.63243103027344,
|
|
"logps/rejected": -92.59457397460938,
|
|
"loss": 1.3687,
|
|
"margin_dpo/margin_mean": 0.1886359453201294,
|
|
"margin_dpo/margin_std": 0.6872633695602417,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1073318216175359,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1606692373752594,
|
|
"fcm_dpo/q_t": 0.49598369002342224,
|
|
"grad_norm": 17.651378631591797,
|
|
"learning_rate": 4.999685319184688e-07,
|
|
"logits/chosen": 1.5981061458587646,
|
|
"logits/rejected": 1.5926434993743896,
|
|
"logps/chosen": -79.60566711425781,
|
|
"logps/ref_chosen": -79.74766540527344,
|
|
"logps/ref_rejected": -83.39110565185547,
|
|
"logps/rejected": -83.4097671508789,
|
|
"loss": 1.3712,
|
|
"margin_dpo/margin_mean": 0.16066959500312805,
|
|
"margin_dpo/margin_std": 0.60643470287323,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10884353741496598,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.23458097875118256,
|
|
"fcm_dpo/q_t": 0.4941413402557373,
|
|
"grad_norm": 19.199542999267578,
|
|
"learning_rate": 4.999440576567755e-07,
|
|
"logits/chosen": 1.6563818454742432,
|
|
"logits/rejected": 1.4758176803588867,
|
|
"logps/chosen": -72.76530456542969,
|
|
"logps/ref_chosen": -73.04458618164062,
|
|
"logps/ref_rejected": -92.64720153808594,
|
|
"logps/rejected": -92.60250854492188,
|
|
"loss": 1.3639,
|
|
"margin_dpo/margin_mean": 0.23458027839660645,
|
|
"margin_dpo/margin_std": 0.6084821820259094,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.11035525321239607,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.018752366304397583,
|
|
"fcm_dpo/q_t": 0.49953436851501465,
|
|
"grad_norm": 19.4569091796875,
|
|
"learning_rate": 4.999125919224965e-07,
|
|
"logits/chosen": 1.4742746353149414,
|
|
"logits/rejected": 1.4132012128829956,
|
|
"logps/chosen": -87.65899658203125,
|
|
"logps/ref_chosen": -87.71681213378906,
|
|
"logps/ref_rejected": -96.93572998046875,
|
|
"logps/rejected": -96.89665985107422,
|
|
"loss": 1.3858,
|
|
"margin_dpo/margin_mean": 0.018752455711364746,
|
|
"margin_dpo/margin_std": 0.7262225151062012,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.11186696900982615,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.27362507581710815,
|
|
"fcm_dpo/q_t": 0.4931687116622925,
|
|
"grad_norm": 18.025976181030273,
|
|
"learning_rate": 4.998741355957963e-07,
|
|
"logits/chosen": 1.7035603523254395,
|
|
"logits/rejected": 1.5352582931518555,
|
|
"logps/chosen": -66.72885131835938,
|
|
"logps/ref_chosen": -67.07321166992188,
|
|
"logps/ref_rejected": -96.5340347290039,
|
|
"logps/rejected": -96.46330261230469,
|
|
"loss": 1.3603,
|
|
"margin_dpo/margin_mean": 0.27362462878227234,
|
|
"margin_dpo/margin_std": 0.6678668260574341,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.263182669878006,
|
|
"fcm_dpo/q_t": 0.4934360384941101,
|
|
"grad_norm": 16.99013900756836,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 1.5969147682189941,
|
|
"logits/rejected": 1.4522219896316528,
|
|
"logps/chosen": -61.555511474609375,
|
|
"logps/ref_chosen": -61.80186462402344,
|
|
"logps/ref_rejected": -82.37368774414062,
|
|
"logps/rejected": -82.39051818847656,
|
|
"loss": 1.3616,
|
|
"margin_dpo/margin_mean": 0.26318252086639404,
|
|
"margin_dpo/margin_std": 0.751126766204834,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11489040060468632,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.21271714568138123,
|
|
"fcm_dpo/q_t": 0.4946902394294739,
|
|
"grad_norm": 24.02982521057129,
|
|
"learning_rate": 4.997762556634679e-07,
|
|
"logits/chosen": 1.528346300125122,
|
|
"logits/rejected": 1.4039630889892578,
|
|
"logps/chosen": -69.59974670410156,
|
|
"logps/ref_chosen": -69.92233276367188,
|
|
"logps/ref_rejected": -97.08378601074219,
|
|
"logps/rejected": -96.97392272949219,
|
|
"loss": 1.3667,
|
|
"margin_dpo/margin_mean": 0.21271675825119019,
|
|
"margin_dpo/margin_std": 0.7994598150253296,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1164021164021164,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4026382267475128,
|
|
"fcm_dpo/q_t": 0.4899574816226959,
|
|
"grad_norm": 18.348121643066406,
|
|
"learning_rate": 4.99716834795752e-07,
|
|
"logits/chosen": 1.432910680770874,
|
|
"logits/rejected": 1.342555284500122,
|
|
"logps/chosen": -70.83218383789062,
|
|
"logps/ref_chosen": -71.206298828125,
|
|
"logps/ref_rejected": -95.22071075439453,
|
|
"logps/rejected": -95.24923706054688,
|
|
"loss": 1.348,
|
|
"margin_dpo/margin_mean": 0.40263840556144714,
|
|
"margin_dpo/margin_std": 0.729675829410553,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.11791383219954649,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.37810221314430237,
|
|
"fcm_dpo/q_t": 0.4905719757080078,
|
|
"grad_norm": 17.756649017333984,
|
|
"learning_rate": 4.996504288113623e-07,
|
|
"logits/chosen": 1.6024014949798584,
|
|
"logits/rejected": 1.5885231494903564,
|
|
"logps/chosen": -83.94834899902344,
|
|
"logps/ref_chosen": -84.40055847167969,
|
|
"logps/ref_rejected": -95.41949462890625,
|
|
"logps/rejected": -95.34538269042969,
|
|
"loss": 1.3506,
|
|
"margin_dpo/margin_mean": 0.37810176610946655,
|
|
"margin_dpo/margin_std": 0.819503903388977,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11942554799697656,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.39458632469177246,
|
|
"fcm_dpo/q_t": 0.4901793599128723,
|
|
"grad_norm": 19.339645385742188,
|
|
"learning_rate": 4.995770395678171e-07,
|
|
"logits/chosen": 1.9015599489212036,
|
|
"logits/rejected": 1.694523572921753,
|
|
"logps/chosen": -65.59817504882812,
|
|
"logps/ref_chosen": -65.93923950195312,
|
|
"logps/ref_rejected": -102.92240905761719,
|
|
"logps/rejected": -102.97592163085938,
|
|
"loss": 1.3497,
|
|
"margin_dpo/margin_mean": 0.39458605647087097,
|
|
"margin_dpo/margin_std": 0.9920768737792969,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.31022706627845764,
|
|
"fcm_dpo/q_t": 0.4922669529914856,
|
|
"grad_norm": 17.57803726196289,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 1.6756740808486938,
|
|
"logits/rejected": 1.4945653676986694,
|
|
"logps/chosen": -78.33760070800781,
|
|
"logps/ref_chosen": -78.61624908447266,
|
|
"logps/ref_rejected": -99.9122314453125,
|
|
"logps/rejected": -99.94380950927734,
|
|
"loss": 1.3576,
|
|
"margin_dpo/margin_mean": 0.3102267384529114,
|
|
"margin_dpo/margin_std": 0.9209951758384705,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12244897959183673,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4144776165485382,
|
|
"fcm_dpo/q_t": 0.48966920375823975,
|
|
"grad_norm": 17.837419509887695,
|
|
"learning_rate": 4.994093197099587e-07,
|
|
"logits/chosen": 1.5351800918579102,
|
|
"logits/rejected": 1.438685655593872,
|
|
"logps/chosen": -79.15130615234375,
|
|
"logps/ref_chosen": -79.49641418457031,
|
|
"logps/ref_rejected": -94.52413940429688,
|
|
"logps/rejected": -94.593505859375,
|
|
"loss": 1.3474,
|
|
"margin_dpo/margin_mean": 0.4144783914089203,
|
|
"margin_dpo/margin_std": 0.8849209547042847,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12396069538926682,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5314297676086426,
|
|
"fcm_dpo/q_t": 0.486750990152359,
|
|
"grad_norm": 17.93260383605957,
|
|
"learning_rate": 4.993149937871306e-07,
|
|
"logits/chosen": 2.1229565143585205,
|
|
"logits/rejected": 1.9273746013641357,
|
|
"logps/chosen": -64.33843231201172,
|
|
"logps/ref_chosen": -64.97168731689453,
|
|
"logps/ref_rejected": -86.69085693359375,
|
|
"logps/rejected": -86.58903503417969,
|
|
"loss": 1.3359,
|
|
"margin_dpo/margin_mean": 0.531429648399353,
|
|
"margin_dpo/margin_std": 0.8953331112861633,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.1254724111866969,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4476925730705261,
|
|
"fcm_dpo/q_t": 0.4888409674167633,
|
|
"grad_norm": 18.511592864990234,
|
|
"learning_rate": 4.992136939879856e-07,
|
|
"logits/chosen": 1.7243876457214355,
|
|
"logits/rejected": 1.5867960453033447,
|
|
"logps/chosen": -72.40562438964844,
|
|
"logps/ref_chosen": -72.92498779296875,
|
|
"logps/ref_rejected": -92.27165222167969,
|
|
"logps/rejected": -92.19998168945312,
|
|
"loss": 1.3441,
|
|
"margin_dpo/margin_mean": 0.44769296050071716,
|
|
"margin_dpo/margin_std": 0.8938767910003662,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.45546847581863403,
|
|
"fcm_dpo/q_t": 0.48863863945007324,
|
|
"grad_norm": 19.406330108642578,
|
|
"learning_rate": 4.991054231460969e-07,
|
|
"logits/chosen": 1.9599827527999878,
|
|
"logits/rejected": 1.7785536050796509,
|
|
"logps/chosen": -81.36518859863281,
|
|
"logps/ref_chosen": -81.79109191894531,
|
|
"logps/ref_rejected": -99.20896911621094,
|
|
"logps/rejected": -99.23854064941406,
|
|
"loss": 1.344,
|
|
"margin_dpo/margin_mean": 0.45546823740005493,
|
|
"margin_dpo/margin_std": 1.0371967554092407,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.48611921072006226,
|
|
"fcm_dpo/q_t": 0.4878706634044647,
|
|
"grad_norm": 17.551944732666016,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 1.5689201354980469,
|
|
"logits/rejected": 1.4499050378799438,
|
|
"logps/chosen": -67.32066345214844,
|
|
"logps/ref_chosen": -67.94147491455078,
|
|
"logps/ref_rejected": -85.76875305175781,
|
|
"logps/rejected": -85.63406372070312,
|
|
"loss": 1.3406,
|
|
"margin_dpo/margin_mean": 0.4861195683479309,
|
|
"margin_dpo/margin_std": 0.9498151540756226,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.13000755857898716,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3929840326309204,
|
|
"fcm_dpo/q_t": 0.4902006983757019,
|
|
"grad_norm": 17.350364685058594,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 1.9112555980682373,
|
|
"logits/rejected": 1.8482015132904053,
|
|
"logps/chosen": -78.8712158203125,
|
|
"logps/ref_chosen": -79.21485900878906,
|
|
"logps/ref_rejected": -88.69877624511719,
|
|
"logps/rejected": -88.74812316894531,
|
|
"loss": 1.3492,
|
|
"margin_dpo/margin_mean": 0.392984002828598,
|
|
"margin_dpo/margin_std": 0.840862512588501,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.13151927437641722,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7411639094352722,
|
|
"fcm_dpo/q_t": 0.4815508723258972,
|
|
"grad_norm": 18.811569213867188,
|
|
"learning_rate": 4.987388156241114e-07,
|
|
"logits/chosen": 1.541797161102295,
|
|
"logits/rejected": 1.3224825859069824,
|
|
"logps/chosen": -83.91616821289062,
|
|
"logps/ref_chosen": -84.45362854003906,
|
|
"logps/ref_rejected": -103.43824005126953,
|
|
"logps/rejected": -103.6419448852539,
|
|
"loss": 1.3171,
|
|
"margin_dpo/margin_mean": 0.7411632537841797,
|
|
"margin_dpo/margin_std": 1.1999635696411133,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.1330309901738473,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.36682209372520447,
|
|
"fcm_dpo/q_t": 0.4908638894557953,
|
|
"grad_norm": 18.169139862060547,
|
|
"learning_rate": 4.986026928455767e-07,
|
|
"logits/chosen": 1.8407284021377563,
|
|
"logits/rejected": 1.814268708229065,
|
|
"logps/chosen": -80.88067626953125,
|
|
"logps/ref_chosen": -81.27230834960938,
|
|
"logps/ref_rejected": -89.51646423339844,
|
|
"logps/rejected": -89.49165344238281,
|
|
"loss": 1.3531,
|
|
"margin_dpo/margin_mean": 0.3668217658996582,
|
|
"margin_dpo/margin_std": 1.042789101600647,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.1345427059712774,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7424743175506592,
|
|
"fcm_dpo/q_t": 0.4815485179424286,
|
|
"grad_norm": 18.281597137451172,
|
|
"learning_rate": 4.984596161153135e-07,
|
|
"logits/chosen": 2.108861207962036,
|
|
"logits/rejected": 1.8158016204833984,
|
|
"logps/chosen": -57.46028137207031,
|
|
"logps/ref_chosen": -58.142333984375,
|
|
"logps/ref_rejected": -102.53756713867188,
|
|
"logps/rejected": -102.59799194335938,
|
|
"loss": 1.3176,
|
|
"margin_dpo/margin_mean": 0.7424756288528442,
|
|
"margin_dpo/margin_std": 1.2756874561309814,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5880427360534668,
|
|
"fcm_dpo/q_t": 0.4853513538837433,
|
|
"grad_norm": 19.718008041381836,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 1.734527587890625,
|
|
"logits/rejected": 1.509333610534668,
|
|
"logps/chosen": -74.75138854980469,
|
|
"logps/ref_chosen": -75.26505279541016,
|
|
"logps/ref_rejected": -104.32842254638672,
|
|
"logps/rejected": -104.40279388427734,
|
|
"loss": 1.3319,
|
|
"margin_dpo/margin_mean": 0.588042676448822,
|
|
"margin_dpo/margin_std": 1.1916567087173462,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13756613756613756,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5270799398422241,
|
|
"fcm_dpo/q_t": 0.4869080185890198,
|
|
"grad_norm": 19.484155654907227,
|
|
"learning_rate": 4.98152617002662e-07,
|
|
"logits/chosen": 2.009382486343384,
|
|
"logits/rejected": 1.8327490091323853,
|
|
"logps/chosen": -68.78492736816406,
|
|
"logps/ref_chosen": -69.33901977539062,
|
|
"logps/ref_rejected": -90.31411743164062,
|
|
"logps/rejected": -90.28709411621094,
|
|
"loss": 1.3388,
|
|
"margin_dpo/margin_mean": 0.5270801782608032,
|
|
"margin_dpo/margin_std": 1.3382683992385864,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13907785336356765,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6017959713935852,
|
|
"fcm_dpo/q_t": 0.4850356876850128,
|
|
"grad_norm": 18.92400360107422,
|
|
"learning_rate": 4.979887032076988e-07,
|
|
"logits/chosen": 1.8455489873886108,
|
|
"logits/rejected": 1.6811567544937134,
|
|
"logps/chosen": -71.865478515625,
|
|
"logps/ref_chosen": -72.4566650390625,
|
|
"logps/ref_rejected": -91.6706771850586,
|
|
"logps/rejected": -91.68128204345703,
|
|
"loss": 1.3306,
|
|
"margin_dpo/margin_mean": 0.6017957925796509,
|
|
"margin_dpo/margin_std": 1.1899182796478271,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.14058956916099774,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4494805932044983,
|
|
"fcm_dpo/q_t": 0.48889386653900146,
|
|
"grad_norm": 16.139991760253906,
|
|
"learning_rate": 4.978178526356172e-07,
|
|
"logits/chosen": 1.5873305797576904,
|
|
"logits/rejected": 1.5008435249328613,
|
|
"logps/chosen": -63.4215202331543,
|
|
"logps/ref_chosen": -64.08897399902344,
|
|
"logps/ref_rejected": -75.09095764160156,
|
|
"logps/rejected": -74.87298583984375,
|
|
"loss": 1.3479,
|
|
"margin_dpo/margin_mean": 0.4494805335998535,
|
|
"margin_dpo/margin_std": 1.543592929840088,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.1421012849584278,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0324513912200928,
|
|
"fcm_dpo/q_t": 0.474477618932724,
|
|
"grad_norm": 20.35590171813965,
|
|
"learning_rate": 4.976400700654751e-07,
|
|
"logits/chosen": 1.6213706731796265,
|
|
"logits/rejected": 1.4608677625656128,
|
|
"logps/chosen": -78.84420776367188,
|
|
"logps/ref_chosen": -79.67372131347656,
|
|
"logps/ref_rejected": -94.64076232910156,
|
|
"logps/rejected": -94.84370422363281,
|
|
"loss": 1.2933,
|
|
"margin_dpo/margin_mean": 1.0324519872665405,
|
|
"margin_dpo/margin_std": 1.7397394180297852,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6408168077468872,
|
|
"fcm_dpo/q_t": 0.4840792715549469,
|
|
"grad_norm": 18.675636291503906,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 1.4493110179901123,
|
|
"logits/rejected": 1.2806124687194824,
|
|
"logps/chosen": -78.254638671875,
|
|
"logps/ref_chosen": -78.65760803222656,
|
|
"logps/ref_rejected": -109.4048080444336,
|
|
"logps/rejected": -109.64266204833984,
|
|
"loss": 1.3286,
|
|
"margin_dpo/margin_mean": 0.6408175230026245,
|
|
"margin_dpo/margin_std": 1.4486722946166992,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14512471655328799,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.812680721282959,
|
|
"fcm_dpo/q_t": 0.47984737157821655,
|
|
"grad_norm": 18.943740844726562,
|
|
"learning_rate": 4.972637290166157e-07,
|
|
"logits/chosen": 1.8304685354232788,
|
|
"logits/rejected": 1.6877751350402832,
|
|
"logps/chosen": -77.20866394042969,
|
|
"logps/ref_chosen": -77.708251953125,
|
|
"logps/ref_rejected": -104.36044311523438,
|
|
"logps/rejected": -104.67352294921875,
|
|
"loss": 1.3126,
|
|
"margin_dpo/margin_mean": 0.8126805424690247,
|
|
"margin_dpo/margin_std": 1.518812656402588,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14663643235071808,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3321409225463867,
|
|
"fcm_dpo/q_t": 0.4917249083518982,
|
|
"grad_norm": 19.33458709716797,
|
|
"learning_rate": 4.970651810649666e-07,
|
|
"logits/chosen": 1.5826592445373535,
|
|
"logits/rejected": 1.481719732284546,
|
|
"logps/chosen": -84.24017333984375,
|
|
"logps/ref_chosen": -84.58917999267578,
|
|
"logps/ref_rejected": -99.25704956054688,
|
|
"logps/rejected": -99.24018096923828,
|
|
"loss": 1.3605,
|
|
"margin_dpo/margin_mean": 0.3321412205696106,
|
|
"margin_dpo/margin_std": 1.6863982677459717,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14814814814814814,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4240918755531311,
|
|
"fcm_dpo/q_t": 0.4894029498100281,
|
|
"grad_norm": 17.551227569580078,
|
|
"learning_rate": 4.968597221690985e-07,
|
|
"logits/chosen": 1.6251521110534668,
|
|
"logits/rejected": 1.5772819519042969,
|
|
"logps/chosen": -74.0291519165039,
|
|
"logps/ref_chosen": -74.42477416992188,
|
|
"logps/ref_rejected": -88.93840026855469,
|
|
"logps/rejected": -88.96687316894531,
|
|
"loss": 1.3493,
|
|
"margin_dpo/margin_mean": 0.42409175634384155,
|
|
"margin_dpo/margin_std": 1.4047505855560303,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14965986394557823,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6838282346725464,
|
|
"fcm_dpo/q_t": 0.4832128882408142,
|
|
"grad_norm": 17.991985321044922,
|
|
"learning_rate": 4.966473580761389e-07,
|
|
"logits/chosen": 1.8413865566253662,
|
|
"logits/rejected": 1.7409846782684326,
|
|
"logps/chosen": -75.00747680664062,
|
|
"logps/ref_chosen": -75.59742736816406,
|
|
"logps/ref_rejected": -98.2310791015625,
|
|
"logps/rejected": -98.324951171875,
|
|
"loss": 1.3295,
|
|
"margin_dpo/margin_mean": 0.6838279962539673,
|
|
"margin_dpo/margin_std": 2.0383212566375732,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7622036337852478,
|
|
"fcm_dpo/q_t": 0.4814043343067169,
|
|
"grad_norm": 19.61545181274414,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 1.954576849937439,
|
|
"logits/rejected": 1.9274578094482422,
|
|
"logps/chosen": -98.07032775878906,
|
|
"logps/ref_chosen": -98.55859375,
|
|
"logps/ref_rejected": -106.01295471191406,
|
|
"logps/rejected": -106.2868881225586,
|
|
"loss": 1.325,
|
|
"margin_dpo/margin_mean": 0.7622038125991821,
|
|
"margin_dpo/margin_std": 2.1858882904052734,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"eval_fcm_dpo/beta": 0.10000000894069672,
|
|
"eval_logits/chosen": 1.6834615468978882,
|
|
"eval_logits/rejected": 1.5697993040084839,
|
|
"eval_logps/chosen": -86.1994857788086,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -96.89765167236328,
|
|
"eval_loss": 0.6541018486022949,
|
|
"eval_margin_dpo/margin_mean": 0.9035704731941223,
|
|
"eval_margin_dpo/margin_std": 1.9803118705749512,
|
|
"eval_runtime": 42.2787,
|
|
"eval_samples_per_second": 54.472,
|
|
"eval_steps_per_second": 1.703,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15268329554043839,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9653818607330322,
|
|
"fcm_dpo/q_t": 0.4760565757751465,
|
|
"grad_norm": 16.312408447265625,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": 1.4704093933105469,
|
|
"logits/rejected": 1.4150559902191162,
|
|
"logps/chosen": -68.62451171875,
|
|
"logps/ref_chosen": -69.45216369628906,
|
|
"logps/ref_rejected": -88.0458755493164,
|
|
"logps/rejected": -88.18360137939453,
|
|
"loss": 1.3003,
|
|
"margin_dpo/margin_mean": 0.9653820991516113,
|
|
"margin_dpo/margin_std": 1.8038573265075684,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.15419501133786848,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6511964797973633,
|
|
"fcm_dpo/q_t": 0.48394304513931274,
|
|
"grad_norm": 17.326839447021484,
|
|
"learning_rate": 4.959688949822748e-07,
|
|
"logits/chosen": 1.6613342761993408,
|
|
"logits/rejected": 1.582979679107666,
|
|
"logps/chosen": -79.78057861328125,
|
|
"logps/ref_chosen": -80.35308837890625,
|
|
"logps/ref_rejected": -90.61380004882812,
|
|
"logps/rejected": -90.69248962402344,
|
|
"loss": 1.3318,
|
|
"margin_dpo/margin_mean": 0.651196300983429,
|
|
"margin_dpo/margin_std": 1.949533462524414,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.15570672713529857,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.2242300510406494,
|
|
"fcm_dpo/q_t": 0.4698329269886017,
|
|
"grad_norm": 17.35326385498047,
|
|
"learning_rate": 4.957289714327572e-07,
|
|
"logits/chosen": 1.6548218727111816,
|
|
"logits/rejected": 1.5961244106292725,
|
|
"logps/chosen": -78.47612762451172,
|
|
"logps/ref_chosen": -79.30392456054688,
|
|
"logps/ref_rejected": -93.745361328125,
|
|
"logps/rejected": -94.14179992675781,
|
|
"loss": 1.2785,
|
|
"margin_dpo/margin_mean": 1.224229097366333,
|
|
"margin_dpo/margin_std": 2.101567029953003,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.15721844293272866,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1804628372192383,
|
|
"fcm_dpo/q_t": 0.4710468351840973,
|
|
"grad_norm": 18.6791934967041,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 1.7360849380493164,
|
|
"logits/rejected": 1.5076425075531006,
|
|
"logps/chosen": -73.59494018554688,
|
|
"logps/ref_chosen": -74.50674438476562,
|
|
"logps/ref_rejected": -116.09912872314453,
|
|
"logps/rejected": -116.3677978515625,
|
|
"loss": 1.2851,
|
|
"margin_dpo/margin_mean": 1.18046236038208,
|
|
"margin_dpo/margin_std": 2.282648801803589,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9783838987350464,
|
|
"fcm_dpo/q_t": 0.4760921001434326,
|
|
"grad_norm": 18.822858810424805,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 1.7781691551208496,
|
|
"logits/rejected": 1.611711025238037,
|
|
"logps/chosen": -87.190673828125,
|
|
"logps/ref_chosen": -87.76654815673828,
|
|
"logps/ref_rejected": -108.07927703857422,
|
|
"logps/rejected": -108.48179626464844,
|
|
"loss": 1.3109,
|
|
"margin_dpo/margin_mean": 0.9783839583396912,
|
|
"margin_dpo/margin_std": 2.846864700317383,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.1602418745275888,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9594533443450928,
|
|
"fcm_dpo/q_t": 0.4763692617416382,
|
|
"grad_norm": 16.99517250061035,
|
|
"learning_rate": 4.949679871846857e-07,
|
|
"logits/chosen": 1.7635796070098877,
|
|
"logits/rejected": 1.7058625221252441,
|
|
"logps/chosen": -75.43994140625,
|
|
"logps/ref_chosen": -76.38548278808594,
|
|
"logps/ref_rejected": -81.63407897949219,
|
|
"logps/rejected": -81.64799499511719,
|
|
"loss": 1.3078,
|
|
"margin_dpo/margin_mean": 0.9594534039497375,
|
|
"margin_dpo/margin_std": 2.473605155944824,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.1617535903250189,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.620085597038269,
|
|
"fcm_dpo/q_t": 0.4845724403858185,
|
|
"grad_norm": 19.18521499633789,
|
|
"learning_rate": 4.947006115536947e-07,
|
|
"logits/chosen": 1.33339262008667,
|
|
"logits/rejected": 1.2754939794540405,
|
|
"logps/chosen": -95.81202697753906,
|
|
"logps/ref_chosen": -96.14849853515625,
|
|
"logps/ref_rejected": -107.0481185913086,
|
|
"logps/rejected": -107.33172607421875,
|
|
"loss": 1.3381,
|
|
"margin_dpo/margin_mean": 0.6200859546661377,
|
|
"margin_dpo/margin_std": 2.2718071937561035,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.16326530612244897,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8618567585945129,
|
|
"fcm_dpo/q_t": 0.4787394404411316,
|
|
"grad_norm": 17.177370071411133,
|
|
"learning_rate": 4.944263911205772e-07,
|
|
"logits/chosen": 1.4922263622283936,
|
|
"logits/rejected": 1.3662118911743164,
|
|
"logps/chosen": -84.60310363769531,
|
|
"logps/ref_chosen": -85.39241027832031,
|
|
"logps/ref_rejected": -97.79592895507812,
|
|
"logps/rejected": -97.86846923828125,
|
|
"loss": 1.3151,
|
|
"margin_dpo/margin_mean": 0.8618567585945129,
|
|
"margin_dpo/margin_std": 2.300678253173828,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16477702191987906,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.3147889375686646,
|
|
"fcm_dpo/q_t": 0.46812164783477783,
|
|
"grad_norm": 18.14927101135254,
|
|
"learning_rate": 4.941453335558681e-07,
|
|
"logits/chosen": 1.3905439376831055,
|
|
"logits/rejected": 1.174678087234497,
|
|
"logps/chosen": -77.99679565429688,
|
|
"logps/ref_chosen": -78.99874877929688,
|
|
"logps/ref_rejected": -100.79278564453125,
|
|
"logps/rejected": -101.10562133789062,
|
|
"loss": 1.2781,
|
|
"margin_dpo/margin_mean": 1.3147889375686646,
|
|
"margin_dpo/margin_std": 2.780221939086914,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2178063839673996,
|
|
"fcm_dpo/q_t": 0.49449440836906433,
|
|
"grad_norm": 20.665647506713867,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 1.4688796997070312,
|
|
"logits/rejected": 1.5312542915344238,
|
|
"logps/chosen": -96.5684814453125,
|
|
"logps/ref_chosen": -96.95277404785156,
|
|
"logps/ref_rejected": -91.44450378417969,
|
|
"logps/rejected": -91.27799987792969,
|
|
"loss": 1.381,
|
|
"margin_dpo/margin_mean": 0.2178059071302414,
|
|
"margin_dpo/margin_std": 2.5462493896484375,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16780045351473924,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0503623485565186,
|
|
"fcm_dpo/q_t": 0.4742129445075989,
|
|
"grad_norm": 16.398740768432617,
|
|
"learning_rate": 4.935627386698418e-07,
|
|
"logits/chosen": 1.9407978057861328,
|
|
"logits/rejected": 1.774618148803711,
|
|
"logps/chosen": -69.15205383300781,
|
|
"logps/ref_chosen": -70.01641845703125,
|
|
"logps/ref_rejected": -92.87696838378906,
|
|
"logps/rejected": -93.06297302246094,
|
|
"loss": 1.3,
|
|
"margin_dpo/margin_mean": 1.0503621101379395,
|
|
"margin_dpo/margin_std": 2.530200958251953,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1693121693121693,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.3121293783187866,
|
|
"fcm_dpo/q_t": 0.46776682138442993,
|
|
"grad_norm": 19.021568298339844,
|
|
"learning_rate": 4.932612176449559e-07,
|
|
"logits/chosen": 1.7154479026794434,
|
|
"logits/rejected": 1.5044281482696533,
|
|
"logps/chosen": -76.7746810913086,
|
|
"logps/ref_chosen": -77.80027770996094,
|
|
"logps/ref_rejected": -123.10624694824219,
|
|
"logps/rejected": -123.39279174804688,
|
|
"loss": 1.276,
|
|
"margin_dpo/margin_mean": 1.3121283054351807,
|
|
"margin_dpo/margin_std": 2.5639796257019043,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.1708238851095994,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8528228402137756,
|
|
"fcm_dpo/q_t": 0.47901567816734314,
|
|
"grad_norm": 16.568147659301758,
|
|
"learning_rate": 4.929528920808854e-07,
|
|
"logits/chosen": 1.7319364547729492,
|
|
"logits/rejected": 1.641928791999817,
|
|
"logps/chosen": -69.28436279296875,
|
|
"logps/ref_chosen": -70.54346466064453,
|
|
"logps/ref_rejected": -88.79286193847656,
|
|
"logps/rejected": -88.38658142089844,
|
|
"loss": 1.3191,
|
|
"margin_dpo/margin_mean": 0.8528228998184204,
|
|
"margin_dpo/margin_std": 2.5240535736083984,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.17233560090702948,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.332871913909912,
|
|
"fcm_dpo/q_t": 0.46783509850502014,
|
|
"grad_norm": 21.800825119018555,
|
|
"learning_rate": 4.92637770602159e-07,
|
|
"logits/chosen": 1.7463035583496094,
|
|
"logits/rejected": 1.6003742218017578,
|
|
"logps/chosen": -82.77552032470703,
|
|
"logps/ref_chosen": -83.9239501953125,
|
|
"logps/ref_rejected": -92.85765838623047,
|
|
"logps/rejected": -93.04209899902344,
|
|
"loss": 1.282,
|
|
"margin_dpo/margin_mean": 1.3328726291656494,
|
|
"margin_dpo/margin_std": 3.1606192588806152,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1995645761489868,
|
|
"fcm_dpo/q_t": 0.4707218408584595,
|
|
"grad_norm": 23.079219818115234,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 1.6879757642745972,
|
|
"logits/rejected": 1.517378568649292,
|
|
"logps/chosen": -68.5137939453125,
|
|
"logps/ref_chosen": -69.82767486572266,
|
|
"logps/ref_rejected": -96.51564025878906,
|
|
"logps/rejected": -96.40132141113281,
|
|
"loss": 1.287,
|
|
"margin_dpo/margin_mean": 1.199564814567566,
|
|
"margin_dpo/margin_std": 2.603288173675537,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17535903250188964,
|
|
"fcm_dpo/beta": 0.10327555239200592,
|
|
"fcm_dpo/delta": 0.3172721266746521,
|
|
"fcm_dpo/margin": 1.6490867137908936,
|
|
"fcm_dpo/q_t": 0.45969825983047485,
|
|
"grad_norm": 18.574960708618164,
|
|
"learning_rate": 4.91987175349089e-07,
|
|
"logits/chosen": 1.7174615859985352,
|
|
"logits/rejected": 1.561848521232605,
|
|
"logps/chosen": -64.7716064453125,
|
|
"logps/ref_chosen": -66.19773864746094,
|
|
"logps/ref_rejected": -90.88304138183594,
|
|
"logps/rejected": -91.10599517822266,
|
|
"loss": 1.2404,
|
|
"margin_dpo/margin_mean": 1.6490864753723145,
|
|
"margin_dpo/margin_std": 2.610063314437866,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17687074829931973,
|
|
"fcm_dpo/beta": 0.11360542476177216,
|
|
"fcm_dpo/delta": 0.32053306698799133,
|
|
"fcm_dpo/margin": 1.6154546737670898,
|
|
"fcm_dpo/q_t": 0.45687851309776306,
|
|
"grad_norm": 18.478755950927734,
|
|
"learning_rate": 4.916517197732933e-07,
|
|
"logits/chosen": 1.6380093097686768,
|
|
"logits/rejected": 1.536433458328247,
|
|
"logps/chosen": -70.42033386230469,
|
|
"logps/ref_chosen": -72.15988159179688,
|
|
"logps/ref_rejected": -85.30296325683594,
|
|
"logps/rejected": -85.17887115478516,
|
|
"loss": 1.2359,
|
|
"margin_dpo/margin_mean": 1.6154546737670898,
|
|
"margin_dpo/margin_std": 2.7676258087158203,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17838246409674982,
|
|
"fcm_dpo/beta": 0.11360542476177216,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.179227590560913,
|
|
"fcm_dpo/q_t": 0.4674232006072998,
|
|
"grad_norm": 18.794208526611328,
|
|
"learning_rate": 4.913095046794281e-07,
|
|
"logits/chosen": 1.7830784320831299,
|
|
"logits/rejected": 1.6586743593215942,
|
|
"logps/chosen": -70.03721618652344,
|
|
"logps/ref_chosen": -71.47773742675781,
|
|
"logps/ref_rejected": -96.95051574707031,
|
|
"logps/rejected": -96.6892318725586,
|
|
"loss": 1.2843,
|
|
"margin_dpo/margin_mean": 1.1792272329330444,
|
|
"margin_dpo/margin_std": 2.940877676010132,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17989417989417988,
|
|
"fcm_dpo/beta": 0.11360542476177216,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.357081413269043,
|
|
"fcm_dpo/q_t": 0.46345484256744385,
|
|
"grad_norm": 19.133548736572266,
|
|
"learning_rate": 4.909605396399855e-07,
|
|
"logits/chosen": 1.7609422206878662,
|
|
"logits/rejected": 1.6618965864181519,
|
|
"logps/chosen": -76.75531005859375,
|
|
"logps/ref_chosen": -78.2727279663086,
|
|
"logps/ref_rejected": -94.71317291259766,
|
|
"logps/rejected": -94.5528335571289,
|
|
"loss": 1.2794,
|
|
"margin_dpo/margin_mean": 1.3570810556411743,
|
|
"margin_dpo/margin_std": 3.6027512550354004,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 0.12034539878368378,
|
|
"fcm_dpo/delta": 0.28817349672317505,
|
|
"fcm_dpo/margin": 2.0527327060699463,
|
|
"fcm_dpo/q_t": 0.4423573911190033,
|
|
"grad_norm": 21.03508758544922,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 2.0089728832244873,
|
|
"logits/rejected": 1.8490796089172363,
|
|
"logps/chosen": -76.51966094970703,
|
|
"logps/ref_chosen": -78.43109130859375,
|
|
"logps/ref_rejected": -100.2771987915039,
|
|
"logps/rejected": -100.41851043701172,
|
|
"loss": 1.1849,
|
|
"margin_dpo/margin_mean": 2.052732467651367,
|
|
"margin_dpo/margin_std": 2.924802303314209,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.18291761148904007,
|
|
"fcm_dpo/beta": 0.12824112176895142,
|
|
"fcm_dpo/delta": 0.317731648683548,
|
|
"fcm_dpo/margin": 1.4499316215515137,
|
|
"fcm_dpo/q_t": 0.4577900767326355,
|
|
"grad_norm": 22.99397850036621,
|
|
"learning_rate": 4.902423989581143e-07,
|
|
"logits/chosen": 2.067244052886963,
|
|
"logits/rejected": 1.7988061904907227,
|
|
"logps/chosen": -72.40137481689453,
|
|
"logps/ref_chosen": -74.08768463134766,
|
|
"logps/ref_rejected": -118.6731948852539,
|
|
"logps/rejected": -118.43681335449219,
|
|
"loss": 1.25,
|
|
"margin_dpo/margin_mean": 1.4499316215515137,
|
|
"margin_dpo/margin_std": 3.1986875534057617,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.18442932728647016,
|
|
"fcm_dpo/beta": 0.13225537538528442,
|
|
"fcm_dpo/delta": 0.30361536145210266,
|
|
"fcm_dpo/margin": 1.4434542655944824,
|
|
"fcm_dpo/q_t": 0.45735594630241394,
|
|
"grad_norm": 22.976837158203125,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 1.6426172256469727,
|
|
"logits/rejected": 1.5161330699920654,
|
|
"logps/chosen": -77.5340576171875,
|
|
"logps/ref_chosen": -79.36762237548828,
|
|
"logps/ref_rejected": -92.42371368408203,
|
|
"logps/rejected": -92.03360748291016,
|
|
"loss": 1.2557,
|
|
"margin_dpo/margin_mean": 1.4434537887573242,
|
|
"margin_dpo/margin_std": 3.4955062866210938,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18594104308390022,
|
|
"fcm_dpo/beta": 0.140414759516716,
|
|
"fcm_dpo/delta": 0.29529285430908203,
|
|
"fcm_dpo/margin": 1.6619194746017456,
|
|
"fcm_dpo/q_t": 0.4464726448059082,
|
|
"grad_norm": 22.928123474121094,
|
|
"learning_rate": 4.894973780788722e-07,
|
|
"logits/chosen": 1.538325309753418,
|
|
"logits/rejected": 1.4405975341796875,
|
|
"logps/chosen": -69.86561584472656,
|
|
"logps/ref_chosen": -71.91705322265625,
|
|
"logps/ref_rejected": -96.36418151855469,
|
|
"logps/rejected": -95.97465515136719,
|
|
"loss": 1.2102,
|
|
"margin_dpo/margin_mean": 1.661919355392456,
|
|
"margin_dpo/margin_std": 3.0403366088867188,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.1874527588813303,
|
|
"fcm_dpo/beta": 0.15786468982696533,
|
|
"fcm_dpo/delta": 0.5678054690361023,
|
|
"fcm_dpo/margin": 1.8830089569091797,
|
|
"fcm_dpo/q_t": 0.43587011098861694,
|
|
"grad_norm": 27.404041290283203,
|
|
"learning_rate": 4.89114813497619e-07,
|
|
"logits/chosen": 1.656646966934204,
|
|
"logits/rejected": 1.5460621118545532,
|
|
"logps/chosen": -69.46039581298828,
|
|
"logps/ref_chosen": -71.72529602050781,
|
|
"logps/ref_rejected": -111.17984771728516,
|
|
"logps/rejected": -110.79795837402344,
|
|
"loss": 1.1936,
|
|
"margin_dpo/margin_mean": 1.883009433746338,
|
|
"margin_dpo/margin_std": 3.7926838397979736,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 0.1739986538887024,
|
|
"fcm_dpo/delta": 0.509579062461853,
|
|
"fcm_dpo/margin": 2.0727288722991943,
|
|
"fcm_dpo/q_t": 0.42179858684539795,
|
|
"grad_norm": 29.48147201538086,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 1.688849687576294,
|
|
"logits/rejected": 1.507893443107605,
|
|
"logps/chosen": -79.08648681640625,
|
|
"logps/ref_chosen": -81.55532836914062,
|
|
"logps/ref_rejected": -110.9144287109375,
|
|
"logps/rejected": -110.51831817626953,
|
|
"loss": 1.1477,
|
|
"margin_dpo/margin_mean": 2.072727680206299,
|
|
"margin_dpo/margin_std": 3.4490890502929688,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"fcm_dpo/beta": 0.1837829053401947,
|
|
"fcm_dpo/delta": 0.24289314448833466,
|
|
"fcm_dpo/margin": 1.6221380233764648,
|
|
"fcm_dpo/q_t": 0.4397786855697632,
|
|
"grad_norm": 33.398712158203125,
|
|
"learning_rate": 4.883296295573176e-07,
|
|
"logits/chosen": 1.1400885581970215,
|
|
"logits/rejected": 1.1677029132843018,
|
|
"logps/chosen": -83.61918640136719,
|
|
"logps/ref_chosen": -87.07349395751953,
|
|
"logps/ref_rejected": -85.05271911621094,
|
|
"logps/rejected": -83.22055053710938,
|
|
"loss": 1.2483,
|
|
"margin_dpo/margin_mean": 1.6221377849578857,
|
|
"margin_dpo/margin_std": 4.112092018127441,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.19198790627362056,
|
|
"fcm_dpo/beta": 0.19954904913902283,
|
|
"fcm_dpo/delta": 0.4273369312286377,
|
|
"fcm_dpo/margin": 2.2214713096618652,
|
|
"fcm_dpo/q_t": 0.4040955603122711,
|
|
"grad_norm": 31.264402389526367,
|
|
"learning_rate": 4.87927032161552e-07,
|
|
"logits/chosen": 1.6167099475860596,
|
|
"logits/rejected": 1.5610288381576538,
|
|
"logps/chosen": -77.12249755859375,
|
|
"logps/ref_chosen": -80.4578857421875,
|
|
"logps/ref_rejected": -90.50740051269531,
|
|
"logps/rejected": -89.39348602294922,
|
|
"loss": 1.0822,
|
|
"margin_dpo/margin_mean": 2.221470832824707,
|
|
"margin_dpo/margin_std": 3.047856330871582,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.19349962207105065,
|
|
"fcm_dpo/beta": 0.21528372168540955,
|
|
"fcm_dpo/delta": 0.24369965493679047,
|
|
"fcm_dpo/margin": 1.4183709621429443,
|
|
"fcm_dpo/q_t": 0.43787145614624023,
|
|
"grad_norm": 39.298763275146484,
|
|
"learning_rate": 4.875177794352363e-07,
|
|
"logits/chosen": 1.5626955032348633,
|
|
"logits/rejected": 1.3739066123962402,
|
|
"logps/chosen": -82.65255737304688,
|
|
"logps/ref_chosen": -85.77519226074219,
|
|
"logps/ref_rejected": -112.63516998291016,
|
|
"logps/rejected": -110.930908203125,
|
|
"loss": 1.3259,
|
|
"margin_dpo/margin_mean": 1.4183712005615234,
|
|
"margin_dpo/margin_std": 4.649229526519775,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.19501133786848074,
|
|
"fcm_dpo/beta": 0.2325761914253235,
|
|
"fcm_dpo/delta": 0.497514545917511,
|
|
"fcm_dpo/margin": 1.5880060195922852,
|
|
"fcm_dpo/q_t": 0.4272322654724121,
|
|
"grad_norm": 48.150299072265625,
|
|
"learning_rate": 4.871018828260491e-07,
|
|
"logits/chosen": 1.585038423538208,
|
|
"logits/rejected": 1.5908199548721313,
|
|
"logps/chosen": -81.96442413330078,
|
|
"logps/ref_chosen": -84.94615173339844,
|
|
"logps/ref_rejected": -85.36473846435547,
|
|
"logps/rejected": -83.97102355957031,
|
|
"loss": 1.2814,
|
|
"margin_dpo/margin_mean": 1.5880064964294434,
|
|
"margin_dpo/margin_std": 4.411214828491211,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 0.25136274099349976,
|
|
"fcm_dpo/delta": 0.4040859341621399,
|
|
"fcm_dpo/margin": 1.8546559810638428,
|
|
"fcm_dpo/q_t": 0.40786296129226685,
|
|
"grad_norm": 44.689945220947266,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 1.576695442199707,
|
|
"logits/rejected": 1.4571049213409424,
|
|
"logps/chosen": -75.59423828125,
|
|
"logps/ref_chosen": -79.0184555053711,
|
|
"logps/ref_rejected": -97.63998413085938,
|
|
"logps/rejected": -96.07042694091797,
|
|
"loss": 1.1687,
|
|
"margin_dpo/margin_mean": 1.8546559810638428,
|
|
"margin_dpo/margin_std": 3.65340518951416,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1980347694633409,
|
|
"fcm_dpo/beta": 0.26572394371032715,
|
|
"fcm_dpo/delta": 0.2274240255355835,
|
|
"fcm_dpo/margin": 2.399470090866089,
|
|
"fcm_dpo/q_t": 0.3838863968849182,
|
|
"grad_norm": 40.63927459716797,
|
|
"learning_rate": 4.86250204678667e-07,
|
|
"logits/chosen": 1.4264767169952393,
|
|
"logits/rejected": 1.1921199560165405,
|
|
"logps/chosen": -64.54088592529297,
|
|
"logps/ref_chosen": -68.24565887451172,
|
|
"logps/ref_rejected": -97.99555969238281,
|
|
"logps/rejected": -96.69026184082031,
|
|
"loss": 1.1124,
|
|
"margin_dpo/margin_mean": 2.399470329284668,
|
|
"margin_dpo/margin_std": 4.310845851898193,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19954648526077098,
|
|
"fcm_dpo/beta": 0.28637751936912537,
|
|
"fcm_dpo/delta": 0.3753895163536072,
|
|
"fcm_dpo/margin": 1.716357707977295,
|
|
"fcm_dpo/q_t": 0.40129733085632324,
|
|
"grad_norm": 48.86708450317383,
|
|
"learning_rate": 4.858144469637408e-07,
|
|
"logits/chosen": 1.8425252437591553,
|
|
"logits/rejected": 1.721367359161377,
|
|
"logps/chosen": -78.26350402832031,
|
|
"logps/ref_chosen": -82.06532287597656,
|
|
"logps/ref_rejected": -89.47691345214844,
|
|
"logps/rejected": -87.39144134521484,
|
|
"loss": 1.187,
|
|
"margin_dpo/margin_mean": 1.716357707977295,
|
|
"margin_dpo/margin_std": 3.696367025375366,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.20105820105820105,
|
|
"fcm_dpo/beta": 0.3037889003753662,
|
|
"fcm_dpo/delta": 0.30762773752212524,
|
|
"fcm_dpo/margin": 1.8439881801605225,
|
|
"fcm_dpo/q_t": 0.39642536640167236,
|
|
"grad_norm": 55.50751876831055,
|
|
"learning_rate": 4.853720930118138e-07,
|
|
"logits/chosen": 1.4474172592163086,
|
|
"logits/rejected": 1.4812402725219727,
|
|
"logps/chosen": -79.32565307617188,
|
|
"logps/ref_chosen": -83.70661163330078,
|
|
"logps/ref_rejected": -89.3868179321289,
|
|
"logps/rejected": -86.84983825683594,
|
|
"loss": 1.1999,
|
|
"margin_dpo/margin_mean": 1.8439884185791016,
|
|
"margin_dpo/margin_std": 3.8783974647521973,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.20256991685563114,
|
|
"fcm_dpo/beta": 0.31286799907684326,
|
|
"fcm_dpo/delta": 0.05367041379213333,
|
|
"fcm_dpo/margin": 2.5576937198638916,
|
|
"fcm_dpo/q_t": 0.3593463897705078,
|
|
"grad_norm": 47.953609466552734,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": 1.8493995666503906,
|
|
"logits/rejected": 1.7130919694900513,
|
|
"logps/chosen": -66.60760498046875,
|
|
"logps/ref_chosen": -71.57601928710938,
|
|
"logps/ref_rejected": -92.34259033203125,
|
|
"logps/rejected": -89.931884765625,
|
|
"loss": 1.0617,
|
|
"margin_dpo/margin_mean": 2.5576934814453125,
|
|
"margin_dpo/margin_std": 4.291810989379883,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 0.32738637924194336,
|
|
"fcm_dpo/delta": 0.2678135931491852,
|
|
"fcm_dpo/margin": 1.8198938369750977,
|
|
"fcm_dpo/q_t": 0.38489198684692383,
|
|
"grad_norm": 49.348323822021484,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 1.499782919883728,
|
|
"logits/rejected": 1.4428374767303467,
|
|
"logps/chosen": -61.08345031738281,
|
|
"logps/ref_chosen": -66.39884948730469,
|
|
"logps/ref_rejected": -81.38636779785156,
|
|
"logps/rejected": -77.89085388183594,
|
|
"loss": 1.1684,
|
|
"margin_dpo/margin_mean": 1.8198933601379395,
|
|
"margin_dpo/margin_std": 3.714056968688965,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.20559334845049132,
|
|
"fcm_dpo/beta": 0.32900992035865784,
|
|
"fcm_dpo/delta": 0.10048435628414154,
|
|
"fcm_dpo/margin": 2.2877230644226074,
|
|
"fcm_dpo/q_t": 0.3859502673149109,
|
|
"grad_norm": 65.70194244384766,
|
|
"learning_rate": 4.840055783904106e-07,
|
|
"logits/chosen": 1.6483759880065918,
|
|
"logits/rejected": 1.3668988943099976,
|
|
"logps/chosen": -82.15748596191406,
|
|
"logps/ref_chosen": -86.75381469726562,
|
|
"logps/ref_rejected": -113.35548400878906,
|
|
"logps/rejected": -111.04689025878906,
|
|
"loss": 1.2694,
|
|
"margin_dpo/margin_mean": 2.2877230644226074,
|
|
"margin_dpo/margin_std": 4.9393510818481445,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.20710506424792138,
|
|
"fcm_dpo/beta": 0.3471960425376892,
|
|
"fcm_dpo/delta": 0.0843411386013031,
|
|
"fcm_dpo/margin": 2.200601816177368,
|
|
"fcm_dpo/q_t": 0.36530107259750366,
|
|
"grad_norm": 49.037841796875,
|
|
"learning_rate": 4.835369650662767e-07,
|
|
"logits/chosen": 1.7660987377166748,
|
|
"logits/rejected": 1.6533045768737793,
|
|
"logps/chosen": -66.63494110107422,
|
|
"logps/ref_chosen": -72.21119689941406,
|
|
"logps/ref_rejected": -88.30802917480469,
|
|
"logps/rejected": -84.93238067626953,
|
|
"loss": 1.0984,
|
|
"margin_dpo/margin_mean": 2.2006025314331055,
|
|
"margin_dpo/margin_std": 3.669823169708252,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.20861678004535147,
|
|
"fcm_dpo/beta": 0.3590894043445587,
|
|
"fcm_dpo/delta": 0.30840471386909485,
|
|
"fcm_dpo/margin": 1.5543081760406494,
|
|
"fcm_dpo/q_t": 0.4112345576286316,
|
|
"grad_norm": 63.31071472167969,
|
|
"learning_rate": 4.830618192112065e-07,
|
|
"logits/chosen": 1.435781717300415,
|
|
"logits/rejected": 1.3367691040039062,
|
|
"logps/chosen": -69.54479217529297,
|
|
"logps/ref_chosen": -74.54273223876953,
|
|
"logps/ref_rejected": -84.63615417480469,
|
|
"logps/rejected": -81.19251251220703,
|
|
"loss": 1.3065,
|
|
"margin_dpo/margin_mean": 1.5543079376220703,
|
|
"margin_dpo/margin_std": 4.103353977203369,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.21012849584278157,
|
|
"fcm_dpo/beta": 0.3809944987297058,
|
|
"fcm_dpo/delta": 0.2578265070915222,
|
|
"fcm_dpo/margin": 1.588025689125061,
|
|
"fcm_dpo/q_t": 0.38131409883499146,
|
|
"grad_norm": 84.61750030517578,
|
|
"learning_rate": 4.825801541160509e-07,
|
|
"logits/chosen": 1.6165993213653564,
|
|
"logits/rejected": 1.5509192943572998,
|
|
"logps/chosen": -82.63727569580078,
|
|
"logps/ref_chosen": -87.63740539550781,
|
|
"logps/ref_rejected": -101.3896484375,
|
|
"logps/rejected": -97.97754669189453,
|
|
"loss": 1.2994,
|
|
"margin_dpo/margin_mean": 1.5880258083343506,
|
|
"margin_dpo/margin_std": 3.8282291889190674,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 0.376004695892334,
|
|
"fcm_dpo/delta": -0.06728397309780121,
|
|
"fcm_dpo/margin": 2.417325019836426,
|
|
"fcm_dpo/q_t": 0.35431066155433655,
|
|
"grad_norm": 72.31173706054688,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 1.5636279582977295,
|
|
"logits/rejected": 1.4595727920532227,
|
|
"logps/chosen": -75.60233306884766,
|
|
"logps/ref_chosen": -81.32339477539062,
|
|
"logps/ref_rejected": -99.7275619506836,
|
|
"logps/rejected": -96.423828125,
|
|
"loss": 1.1302,
|
|
"margin_dpo/margin_mean": 2.417325973510742,
|
|
"margin_dpo/margin_std": 4.188722133636475,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21315192743764172,
|
|
"fcm_dpo/beta": 0.3740063011646271,
|
|
"fcm_dpo/delta": -0.018594570457935333,
|
|
"fcm_dpo/margin": 2.3140902519226074,
|
|
"fcm_dpo/q_t": 0.35507285594940186,
|
|
"grad_norm": 57.69200134277344,
|
|
"learning_rate": 4.815973202802966e-07,
|
|
"logits/chosen": 1.8846083879470825,
|
|
"logits/rejected": 1.7731068134307861,
|
|
"logps/chosen": -72.21094512939453,
|
|
"logps/ref_chosen": -78.08534240722656,
|
|
"logps/ref_rejected": -101.70516967773438,
|
|
"logps/rejected": -98.14485931396484,
|
|
"loss": 1.0983,
|
|
"margin_dpo/margin_mean": 2.3140902519226074,
|
|
"margin_dpo/margin_std": 3.8522796630859375,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2146636432350718,
|
|
"fcm_dpo/beta": 0.39571413397789,
|
|
"fcm_dpo/delta": 0.27962976694107056,
|
|
"fcm_dpo/margin": 1.4749341011047363,
|
|
"fcm_dpo/q_t": 0.4017455577850342,
|
|
"grad_norm": 64.10457611083984,
|
|
"learning_rate": 4.810961790316729e-07,
|
|
"logits/chosen": 1.6766084432601929,
|
|
"logits/rejected": 1.6162614822387695,
|
|
"logps/chosen": -76.84469604492188,
|
|
"logps/ref_chosen": -82.84616088867188,
|
|
"logps/ref_rejected": -95.14714050292969,
|
|
"logps/rejected": -90.62060546875,
|
|
"loss": 1.3081,
|
|
"margin_dpo/margin_mean": 1.4749343395233154,
|
|
"margin_dpo/margin_std": 3.789971351623535,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2161753590325019,
|
|
"fcm_dpo/beta": 0.41883236169815063,
|
|
"fcm_dpo/delta": 0.32501593232154846,
|
|
"fcm_dpo/margin": 1.2943665981292725,
|
|
"fcm_dpo/q_t": 0.4153968393802643,
|
|
"grad_norm": 101.07804870605469,
|
|
"learning_rate": 4.805885735261454e-07,
|
|
"logits/chosen": 1.5617542266845703,
|
|
"logits/rejected": 1.5315983295440674,
|
|
"logps/chosen": -74.02728271484375,
|
|
"logps/ref_chosen": -80.29791259765625,
|
|
"logps/ref_rejected": -87.44291687011719,
|
|
"logps/rejected": -82.46665954589844,
|
|
"loss": 1.4461,
|
|
"margin_dpo/margin_mean": 1.2943671941757202,
|
|
"margin_dpo/margin_std": 4.0470781326293945,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21768707482993196,
|
|
"fcm_dpo/beta": 0.4348004460334778,
|
|
"fcm_dpo/delta": 0.11130322515964508,
|
|
"fcm_dpo/margin": 0.8273683190345764,
|
|
"fcm_dpo/q_t": 0.46184492111206055,
|
|
"grad_norm": 111.18672943115234,
|
|
"learning_rate": 4.800745179625307e-07,
|
|
"logits/chosen": 1.8222707509994507,
|
|
"logits/rejected": 1.7600898742675781,
|
|
"logps/chosen": -73.16317749023438,
|
|
"logps/ref_chosen": -79.09429168701172,
|
|
"logps/ref_rejected": -92.42912292480469,
|
|
"logps/rejected": -87.32537841796875,
|
|
"loss": 1.7283,
|
|
"margin_dpo/margin_mean": 0.8273676633834839,
|
|
"margin_dpo/margin_std": 4.292266368865967,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 0.4399738609790802,
|
|
"fcm_dpo/delta": 0.10660990327596664,
|
|
"fcm_dpo/margin": 1.7080283164978027,
|
|
"fcm_dpo/q_t": 0.36994537711143494,
|
|
"grad_norm": 100.66554260253906,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 1.6031596660614014,
|
|
"logits/rejected": 1.6387650966644287,
|
|
"logps/chosen": -90.66630554199219,
|
|
"logps/ref_chosen": -97.7087173461914,
|
|
"logps/ref_rejected": -97.63011169433594,
|
|
"logps/rejected": -92.29573059082031,
|
|
"loss": 1.3454,
|
|
"margin_dpo/margin_mean": 1.7080283164978027,
|
|
"margin_dpo/margin_std": 4.123508453369141,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22071050642479215,
|
|
"fcm_dpo/beta": 0.46773919463157654,
|
|
"fcm_dpo/delta": 0.29919642210006714,
|
|
"fcm_dpo/margin": 1.2071739435195923,
|
|
"fcm_dpo/q_t": 0.40284016728401184,
|
|
"grad_norm": 89.68445587158203,
|
|
"learning_rate": 4.790271143580173e-07,
|
|
"logits/chosen": 1.3380377292633057,
|
|
"logits/rejected": 1.3341362476348877,
|
|
"logps/chosen": -69.06131744384766,
|
|
"logps/ref_chosen": -76.56294250488281,
|
|
"logps/ref_rejected": -83.78160095214844,
|
|
"logps/rejected": -77.48715209960938,
|
|
"loss": 1.3059,
|
|
"margin_dpo/margin_mean": 1.2071746587753296,
|
|
"margin_dpo/margin_std": 3.1625607013702393,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"fcm_dpo/beta": 0.4885963797569275,
|
|
"fcm_dpo/delta": 0.22966217994689941,
|
|
"fcm_dpo/margin": 1.298787236213684,
|
|
"fcm_dpo/q_t": 0.4034077525138855,
|
|
"grad_norm": 102.2337417602539,
|
|
"learning_rate": 4.784937956152489e-07,
|
|
"logits/chosen": 1.6136701107025146,
|
|
"logits/rejected": 1.5254911184310913,
|
|
"logps/chosen": -76.01017761230469,
|
|
"logps/ref_chosen": -83.24113464355469,
|
|
"logps/ref_rejected": -97.50960540771484,
|
|
"logps/rejected": -91.57743835449219,
|
|
"loss": 1.3886,
|
|
"margin_dpo/margin_mean": 1.298788070678711,
|
|
"margin_dpo/margin_std": 3.5394697189331055,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2237339380196523,
|
|
"fcm_dpo/beta": 0.4971775710582733,
|
|
"fcm_dpo/delta": -0.038856156170368195,
|
|
"fcm_dpo/margin": 1.7757008075714111,
|
|
"fcm_dpo/q_t": 0.3661366105079651,
|
|
"grad_norm": 69.0617446899414,
|
|
"learning_rate": 4.779540854098347e-07,
|
|
"logits/chosen": 1.8557909727096558,
|
|
"logits/rejected": 1.6362807750701904,
|
|
"logps/chosen": -58.756004333496094,
|
|
"logps/ref_chosen": -66.36277770996094,
|
|
"logps/ref_rejected": -87.66487121582031,
|
|
"logps/rejected": -81.83380126953125,
|
|
"loss": 1.24,
|
|
"margin_dpo/margin_mean": 1.7757010459899902,
|
|
"margin_dpo/margin_std": 3.699665069580078,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.2252456538170824,
|
|
"fcm_dpo/beta": 0.5092729330062866,
|
|
"fcm_dpo/delta": 0.26832953095436096,
|
|
"fcm_dpo/margin": 1.1741929054260254,
|
|
"fcm_dpo/q_t": 0.39438316226005554,
|
|
"grad_norm": 84.74821472167969,
|
|
"learning_rate": 4.774079988386296e-07,
|
|
"logits/chosen": 1.424088716506958,
|
|
"logits/rejected": 1.3155875205993652,
|
|
"logps/chosen": -64.8631591796875,
|
|
"logps/ref_chosen": -72.0576171875,
|
|
"logps/ref_rejected": -83.94097900390625,
|
|
"logps/rejected": -77.92071533203125,
|
|
"loss": 1.2921,
|
|
"margin_dpo/margin_mean": 1.1741926670074463,
|
|
"margin_dpo/margin_std": 2.8617429733276367,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 0.511961042881012,
|
|
"fcm_dpo/delta": -0.03353915736079216,
|
|
"fcm_dpo/margin": 1.7189321517944336,
|
|
"fcm_dpo/q_t": 0.36745959520339966,
|
|
"grad_norm": 103.64623260498047,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 1.6222187280654907,
|
|
"logits/rejected": 1.558018445968628,
|
|
"logps/chosen": -77.80455017089844,
|
|
"logps/ref_chosen": -85.52684783935547,
|
|
"logps/ref_rejected": -108.37449645996094,
|
|
"logps/rejected": -102.37114715576172,
|
|
"loss": 1.2533,
|
|
"margin_dpo/margin_mean": 1.7189325094223022,
|
|
"margin_dpo/margin_std": 3.4795143604278564,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22826908541194255,
|
|
"fcm_dpo/beta": 0.4981721043586731,
|
|
"fcm_dpo/delta": -0.1791907250881195,
|
|
"fcm_dpo/margin": 2.0277981758117676,
|
|
"fcm_dpo/q_t": 0.332998663187027,
|
|
"grad_norm": 69.01924896240234,
|
|
"learning_rate": 4.762967578776406e-07,
|
|
"logits/chosen": 1.5413520336151123,
|
|
"logits/rejected": 1.4086077213287354,
|
|
"logps/chosen": -60.528953552246094,
|
|
"logps/ref_chosen": -69.160888671875,
|
|
"logps/ref_rejected": -91.42207336425781,
|
|
"logps/rejected": -84.81794738769531,
|
|
"loss": 1.0109,
|
|
"margin_dpo/margin_mean": 2.0277981758117676,
|
|
"margin_dpo/margin_std": 3.165318489074707,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22978080120937264,
|
|
"fcm_dpo/beta": 0.5071883201599121,
|
|
"fcm_dpo/delta": 0.05071830749511719,
|
|
"fcm_dpo/margin": 1.5727683305740356,
|
|
"fcm_dpo/q_t": 0.3731786906719208,
|
|
"grad_norm": 83.03250122070312,
|
|
"learning_rate": 4.757316345716553e-07,
|
|
"logits/chosen": 2.0041542053222656,
|
|
"logits/rejected": 1.8374791145324707,
|
|
"logps/chosen": -64.47390747070312,
|
|
"logps/ref_chosen": -72.48135375976562,
|
|
"logps/ref_rejected": -94.44818878173828,
|
|
"logps/rejected": -88.01351165771484,
|
|
"loss": 1.1366,
|
|
"margin_dpo/margin_mean": 1.5727685689926147,
|
|
"margin_dpo/margin_std": 3.0940496921539307,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.23129251700680273,
|
|
"fcm_dpo/beta": 0.5022574067115784,
|
|
"fcm_dpo/delta": -0.014510933309793472,
|
|
"fcm_dpo/margin": 1.7176454067230225,
|
|
"fcm_dpo/q_t": 0.3635936677455902,
|
|
"grad_norm": 77.17406463623047,
|
|
"learning_rate": 4.751601970666064e-07,
|
|
"logits/chosen": 1.5174614191055298,
|
|
"logits/rejected": 1.4577882289886475,
|
|
"logps/chosen": -81.51570129394531,
|
|
"logps/ref_chosen": -89.6655044555664,
|
|
"logps/ref_rejected": -90.67737579345703,
|
|
"logps/rejected": -84.24522399902344,
|
|
"loss": 1.0794,
|
|
"margin_dpo/margin_mean": 1.7176458835601807,
|
|
"margin_dpo/margin_std": 2.961656332015991,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2328042328042328,
|
|
"fcm_dpo/beta": 0.5269556045532227,
|
|
"fcm_dpo/delta": 0.3518673777580261,
|
|
"fcm_dpo/margin": 0.978224515914917,
|
|
"fcm_dpo/q_t": 0.4100213944911957,
|
|
"grad_norm": 96.40692138671875,
|
|
"learning_rate": 4.745824613468292e-07,
|
|
"logits/chosen": 1.7381703853607178,
|
|
"logits/rejected": 1.6854023933410645,
|
|
"logps/chosen": -68.4336929321289,
|
|
"logps/ref_chosen": -76.58096313476562,
|
|
"logps/ref_rejected": -78.18669891357422,
|
|
"logps/rejected": -71.01765441894531,
|
|
"loss": 1.3664,
|
|
"margin_dpo/margin_mean": 0.9782246351242065,
|
|
"margin_dpo/margin_std": 2.7987735271453857,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 0.5390084981918335,
|
|
"fcm_dpo/delta": 0.08307254314422607,
|
|
"fcm_dpo/margin": 1.4344533681869507,
|
|
"fcm_dpo/q_t": 0.3690027892589569,
|
|
"grad_norm": 95.5949478149414,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 1.6641685962677002,
|
|
"logits/rejected": 1.6441385746002197,
|
|
"logps/chosen": -74.2553482055664,
|
|
"logps/ref_chosen": -82.65617370605469,
|
|
"logps/ref_rejected": -95.52484130859375,
|
|
"logps/rejected": -88.5584716796875,
|
|
"loss": 1.2341,
|
|
"margin_dpo/margin_mean": 1.4344533681869507,
|
|
"margin_dpo/margin_std": 2.9911344051361084,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.23582766439909297,
|
|
"fcm_dpo/beta": 0.5347750186920166,
|
|
"fcm_dpo/delta": -0.04582615941762924,
|
|
"fcm_dpo/margin": 1.6623612642288208,
|
|
"fcm_dpo/q_t": 0.3499874472618103,
|
|
"grad_norm": 91.02811431884766,
|
|
"learning_rate": 4.7340816008085305e-07,
|
|
"logits/chosen": 1.663049340248108,
|
|
"logits/rejected": 1.5744928121566772,
|
|
"logps/chosen": -79.29481506347656,
|
|
"logps/ref_chosen": -87.66494750976562,
|
|
"logps/ref_rejected": -108.2437744140625,
|
|
"logps/rejected": -101.53599548339844,
|
|
"loss": 1.0597,
|
|
"margin_dpo/margin_mean": 1.6623611450195312,
|
|
"margin_dpo/margin_std": 2.6712918281555176,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.23733938019652306,
|
|
"fcm_dpo/beta": 0.5272543430328369,
|
|
"fcm_dpo/delta": -0.11355408281087875,
|
|
"fcm_dpo/margin": 1.0453565120697021,
|
|
"fcm_dpo/q_t": 0.41212987899780273,
|
|
"grad_norm": 83.02538299560547,
|
|
"learning_rate": 4.728116273823847e-07,
|
|
"logits/chosen": 1.5571646690368652,
|
|
"logits/rejected": 1.562293529510498,
|
|
"logps/chosen": -61.72172927856445,
|
|
"logps/ref_chosen": -70.77095794677734,
|
|
"logps/ref_rejected": -78.78271484375,
|
|
"logps/rejected": -70.77883911132812,
|
|
"loss": 1.2828,
|
|
"margin_dpo/margin_mean": 1.0453565120697021,
|
|
"margin_dpo/margin_std": 2.5130724906921387,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23885109599395313,
|
|
"fcm_dpo/beta": 0.5388778448104858,
|
|
"fcm_dpo/delta": 0.19029124081134796,
|
|
"fcm_dpo/margin": 1.2486048936843872,
|
|
"fcm_dpo/q_t": 0.3905091881752014,
|
|
"grad_norm": 89.57019805908203,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 1.6847059726715088,
|
|
"logits/rejected": 1.6245031356811523,
|
|
"logps/chosen": -72.43364715576172,
|
|
"logps/ref_chosen": -81.21516418457031,
|
|
"logps/ref_rejected": -97.8381118774414,
|
|
"logps/rejected": -90.30520629882812,
|
|
"loss": 1.2569,
|
|
"margin_dpo/margin_mean": 1.24860417842865,
|
|
"margin_dpo/margin_std": 2.810882091522217,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.24036281179138322,
|
|
"fcm_dpo/beta": 0.5474465489387512,
|
|
"fcm_dpo/delta": 0.02943047508597374,
|
|
"fcm_dpo/margin": 1.5027416944503784,
|
|
"fcm_dpo/q_t": 0.35206806659698486,
|
|
"grad_norm": 77.63984680175781,
|
|
"learning_rate": 4.715998812855304e-07,
|
|
"logits/chosen": 1.7496761083602905,
|
|
"logits/rejected": 1.6762511730194092,
|
|
"logps/chosen": -63.55103302001953,
|
|
"logps/ref_chosen": -72.33412170410156,
|
|
"logps/ref_rejected": -89.49591064453125,
|
|
"logps/rejected": -82.21556091308594,
|
|
"loss": 1.0543,
|
|
"margin_dpo/margin_mean": 1.5027427673339844,
|
|
"margin_dpo/margin_std": 2.681060791015625,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 0.5775221586227417,
|
|
"fcm_dpo/delta": 0.32942667603492737,
|
|
"fcm_dpo/margin": 0.9330779314041138,
|
|
"fcm_dpo/q_t": 0.3996606469154358,
|
|
"grad_norm": 84.65573120117188,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": 1.4114882946014404,
|
|
"logits/rejected": 1.2656062841415405,
|
|
"logps/chosen": -54.79229736328125,
|
|
"logps/ref_chosen": -63.26386260986328,
|
|
"logps/ref_rejected": -82.27867126464844,
|
|
"logps/rejected": -74.74017333984375,
|
|
"loss": 1.2682,
|
|
"margin_dpo/margin_mean": 0.9330783486366272,
|
|
"margin_dpo/margin_std": 2.315305233001709,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.24338624338624337,
|
|
"fcm_dpo/beta": 0.6198341846466064,
|
|
"fcm_dpo/delta": 0.29712557792663574,
|
|
"fcm_dpo/margin": 0.9137060642242432,
|
|
"fcm_dpo/q_t": 0.3977496027946472,
|
|
"grad_norm": 97.38703918457031,
|
|
"learning_rate": 4.703633408618955e-07,
|
|
"logits/chosen": 1.8469001054763794,
|
|
"logits/rejected": 1.7587244510650635,
|
|
"logps/chosen": -61.889503479003906,
|
|
"logps/ref_chosen": -70.69304656982422,
|
|
"logps/ref_rejected": -82.73606872558594,
|
|
"logps/rejected": -74.84622955322266,
|
|
"loss": 1.3213,
|
|
"margin_dpo/margin_mean": 0.9137062430381775,
|
|
"margin_dpo/margin_std": 2.3940858840942383,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.24489795918367346,
|
|
"fcm_dpo/beta": 0.6205596327781677,
|
|
"fcm_dpo/delta": -0.1046181172132492,
|
|
"fcm_dpo/margin": 1.5212860107421875,
|
|
"fcm_dpo/q_t": 0.3311256170272827,
|
|
"grad_norm": 94.5212173461914,
|
|
"learning_rate": 4.697358159051549e-07,
|
|
"logits/chosen": 1.73167085647583,
|
|
"logits/rejected": 1.638787031173706,
|
|
"logps/chosen": -80.61572265625,
|
|
"logps/ref_chosen": -89.3046646118164,
|
|
"logps/ref_rejected": -114.05778503417969,
|
|
"logps/rejected": -106.89012908935547,
|
|
"loss": 0.9664,
|
|
"margin_dpo/margin_mean": 1.5212857723236084,
|
|
"margin_dpo/margin_std": 2.2130351066589355,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.24640967498110355,
|
|
"fcm_dpo/beta": 0.6169939041137695,
|
|
"fcm_dpo/delta": 0.020175732672214508,
|
|
"fcm_dpo/margin": 1.3464248180389404,
|
|
"fcm_dpo/q_t": 0.3558712303638458,
|
|
"grad_norm": 86.37958526611328,
|
|
"learning_rate": 4.691021444652876e-07,
|
|
"logits/chosen": 1.7835041284561157,
|
|
"logits/rejected": 1.6861741542816162,
|
|
"logps/chosen": -59.539772033691406,
|
|
"logps/ref_chosen": -68.61222076416016,
|
|
"logps/ref_rejected": -89.03155517578125,
|
|
"logps/rejected": -81.3055419921875,
|
|
"loss": 1.05,
|
|
"margin_dpo/margin_mean": 1.3464242219924927,
|
|
"margin_dpo/margin_std": 2.1600122451782227,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24792139077853365,
|
|
"fcm_dpo/beta": 0.6004210710525513,
|
|
"fcm_dpo/delta": -0.14600637555122375,
|
|
"fcm_dpo/margin": 1.6347507238388062,
|
|
"fcm_dpo/q_t": 0.33791494369506836,
|
|
"grad_norm": 91.74009704589844,
|
|
"learning_rate": 4.6846234426744624e-07,
|
|
"logits/chosen": 1.3953123092651367,
|
|
"logits/rejected": 1.2430917024612427,
|
|
"logps/chosen": -64.46735382080078,
|
|
"logps/ref_chosen": -73.55902862548828,
|
|
"logps/ref_rejected": -94.16201782226562,
|
|
"logps/rejected": -86.705078125,
|
|
"loss": 1.1363,
|
|
"margin_dpo/margin_mean": 1.6347506046295166,
|
|
"margin_dpo/margin_std": 2.7181103229522705,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 0.5928429365158081,
|
|
"fcm_dpo/delta": -0.08283931761980057,
|
|
"fcm_dpo/margin": 1.5601141452789307,
|
|
"fcm_dpo/q_t": 0.3488255739212036,
|
|
"grad_norm": 81.30715942382812,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 1.9156373739242554,
|
|
"logits/rejected": 1.759425401687622,
|
|
"logps/chosen": -59.70091247558594,
|
|
"logps/ref_chosen": -68.67132568359375,
|
|
"logps/ref_rejected": -85.95689392089844,
|
|
"logps/rejected": -78.54659271240234,
|
|
"loss": 0.9951,
|
|
"margin_dpo/margin_mean": 1.5601141452789307,
|
|
"margin_dpo/margin_std": 2.4698634147644043,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.2509448223733938,
|
|
"fcm_dpo/beta": 0.6078928112983704,
|
|
"fcm_dpo/delta": 0.17859025299549103,
|
|
"fcm_dpo/margin": 0.6258453130722046,
|
|
"fcm_dpo/q_t": 0.4346492290496826,
|
|
"grad_norm": 114.55907440185547,
|
|
"learning_rate": 4.6716442935512214e-07,
|
|
"logits/chosen": 1.5327785015106201,
|
|
"logits/rejected": 1.3427965641021729,
|
|
"logps/chosen": -72.38050079345703,
|
|
"logps/ref_chosen": -80.89755249023438,
|
|
"logps/ref_rejected": -111.91075134277344,
|
|
"logps/rejected": -104.01954650878906,
|
|
"loss": 1.3994,
|
|
"margin_dpo/margin_mean": 0.6258450746536255,
|
|
"margin_dpo/margin_std": 2.1673123836517334,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.25245653817082386,
|
|
"fcm_dpo/beta": 0.6160829663276672,
|
|
"fcm_dpo/delta": 0.1423880010843277,
|
|
"fcm_dpo/margin": 1.1649137735366821,
|
|
"fcm_dpo/q_t": 0.36513078212738037,
|
|
"grad_norm": 88.68524932861328,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": 1.1840271949768066,
|
|
"logits/rejected": 1.1251733303070068,
|
|
"logps/chosen": -67.54503631591797,
|
|
"logps/ref_chosen": -76.73136138916016,
|
|
"logps/ref_rejected": -92.57389068603516,
|
|
"logps/rejected": -84.55247497558594,
|
|
"loss": 1.0499,
|
|
"margin_dpo/margin_mean": 1.1649138927459717,
|
|
"margin_dpo/margin_std": 1.8795359134674072,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"fcm_dpo/beta": 0.6469905376434326,
|
|
"fcm_dpo/delta": 0.22754508256912231,
|
|
"fcm_dpo/margin": 0.9844677448272705,
|
|
"fcm_dpo/q_t": 0.380914568901062,
|
|
"grad_norm": 100.6047592163086,
|
|
"learning_rate": 4.6584221638904767e-07,
|
|
"logits/chosen": 1.5291261672973633,
|
|
"logits/rejected": 1.4169994592666626,
|
|
"logps/chosen": -74.01771545410156,
|
|
"logps/ref_chosen": -82.63671112060547,
|
|
"logps/ref_rejected": -96.72691345214844,
|
|
"logps/rejected": -89.09239196777344,
|
|
"loss": 1.1236,
|
|
"margin_dpo/margin_mean": 0.9844681024551392,
|
|
"margin_dpo/margin_std": 1.8431049585342407,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.25547996976568405,
|
|
"fcm_dpo/beta": 0.6636664867401123,
|
|
"fcm_dpo/delta": 0.0465950183570385,
|
|
"fcm_dpo/margin": 1.2143943309783936,
|
|
"fcm_dpo/q_t": 0.372569739818573,
|
|
"grad_norm": 111.19847106933594,
|
|
"learning_rate": 4.651720442612075e-07,
|
|
"logits/chosen": 1.8221468925476074,
|
|
"logits/rejected": 1.8048608303070068,
|
|
"logps/chosen": -69.81880950927734,
|
|
"logps/ref_chosen": -78.87673950195312,
|
|
"logps/ref_rejected": -94.18919372558594,
|
|
"logps/rejected": -86.34565734863281,
|
|
"loss": 1.126,
|
|
"margin_dpo/margin_mean": 1.2143940925598145,
|
|
"margin_dpo/margin_std": 2.2234106063842773,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 0.6836942434310913,
|
|
"fcm_dpo/delta": 0.17387819290161133,
|
|
"fcm_dpo/margin": 1.0023448467254639,
|
|
"fcm_dpo/q_t": 0.385806679725647,
|
|
"grad_norm": 104.86164093017578,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 1.6126837730407715,
|
|
"logits/rejected": 1.6387670040130615,
|
|
"logps/chosen": -64.28702545166016,
|
|
"logps/ref_chosen": -73.35820007324219,
|
|
"logps/ref_rejected": -76.85077667236328,
|
|
"logps/rejected": -68.78193664550781,
|
|
"loss": 1.2824,
|
|
"margin_dpo/margin_mean": 1.0023449659347534,
|
|
"margin_dpo/margin_std": 2.2842979431152344,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2585034013605442,
|
|
"fcm_dpo/beta": 0.6622629165649414,
|
|
"fcm_dpo/delta": -0.1891041398048401,
|
|
"fcm_dpo/margin": 1.5381622314453125,
|
|
"fcm_dpo/q_t": 0.3209994435310364,
|
|
"grad_norm": 93.2053451538086,
|
|
"learning_rate": 4.6381366244617224e-07,
|
|
"logits/chosen": 1.8994969129562378,
|
|
"logits/rejected": 1.752985954284668,
|
|
"logps/chosen": -71.6744384765625,
|
|
"logps/ref_chosen": -80.4322738647461,
|
|
"logps/ref_rejected": -96.99999237060547,
|
|
"logps/rejected": -89.78031921386719,
|
|
"loss": 1.0006,
|
|
"margin_dpo/margin_mean": 1.5381627082824707,
|
|
"margin_dpo/margin_std": 2.266875743865967,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2600151171579743,
|
|
"fcm_dpo/beta": 0.6701521873474121,
|
|
"fcm_dpo/delta": 0.06850168108940125,
|
|
"fcm_dpo/margin": 1.172062635421753,
|
|
"fcm_dpo/q_t": 0.3691914975643158,
|
|
"grad_norm": 105.6916275024414,
|
|
"learning_rate": 4.631254907558365e-07,
|
|
"logits/chosen": 1.7724663019180298,
|
|
"logits/rejected": 1.6741005182266235,
|
|
"logps/chosen": -61.7747802734375,
|
|
"logps/ref_chosen": -70.45406341552734,
|
|
"logps/ref_rejected": -99.85603332519531,
|
|
"logps/rejected": -92.34881591796875,
|
|
"loss": 1.169,
|
|
"margin_dpo/margin_mean": 1.1720627546310425,
|
|
"margin_dpo/margin_std": 2.2597615718841553,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2615268329554044,
|
|
"fcm_dpo/beta": 0.6584789752960205,
|
|
"fcm_dpo/delta": 0.04426664113998413,
|
|
"fcm_dpo/margin": 1.2176408767700195,
|
|
"fcm_dpo/q_t": 0.37218981981277466,
|
|
"grad_norm": 106.22607421875,
|
|
"learning_rate": 4.624313574873786e-07,
|
|
"logits/chosen": 1.7763798236846924,
|
|
"logits/rejected": 1.5477124452590942,
|
|
"logps/chosen": -63.321632385253906,
|
|
"logps/ref_chosen": -72.15026092529297,
|
|
"logps/ref_rejected": -94.10212707519531,
|
|
"logps/rejected": -86.49114990234375,
|
|
"loss": 1.1903,
|
|
"margin_dpo/margin_mean": 1.2176411151885986,
|
|
"margin_dpo/margin_std": 2.281262159347534,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.26303854875283444,
|
|
"fcm_dpo/beta": 0.674136757850647,
|
|
"fcm_dpo/delta": 0.03188333287835121,
|
|
"fcm_dpo/margin": 1.2171550989151,
|
|
"fcm_dpo/q_t": 0.3766389787197113,
|
|
"grad_norm": 116.89978790283203,
|
|
"learning_rate": 4.61731282057198e-07,
|
|
"logits/chosen": 1.798130750656128,
|
|
"logits/rejected": 1.6105579137802124,
|
|
"logps/chosen": -67.50865936279297,
|
|
"logps/ref_chosen": -75.99629211425781,
|
|
"logps/ref_rejected": -106.2359619140625,
|
|
"logps/rejected": -98.96548461914062,
|
|
"loss": 1.2473,
|
|
"margin_dpo/margin_mean": 1.2171552181243896,
|
|
"margin_dpo/margin_std": 2.535811424255371,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 0.6743377447128296,
|
|
"fcm_dpo/delta": -0.04016567021608353,
|
|
"fcm_dpo/margin": 1.3147974014282227,
|
|
"fcm_dpo/q_t": 0.3643365800380707,
|
|
"grad_norm": 117.52193450927734,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 1.733808159828186,
|
|
"logits/rejected": 1.673501968383789,
|
|
"logps/chosen": -75.97535705566406,
|
|
"logps/ref_chosen": -84.51177978515625,
|
|
"logps/ref_rejected": -104.46299743652344,
|
|
"logps/rejected": -97.24137115478516,
|
|
"loss": 1.2056,
|
|
"margin_dpo/margin_mean": 1.3147969245910645,
|
|
"margin_dpo/margin_std": 2.6120011806488037,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.2660619803476946,
|
|
"fcm_dpo/beta": 0.665306031703949,
|
|
"fcm_dpo/delta": -0.054544560611248016,
|
|
"fcm_dpo/margin": 0.9671778678894043,
|
|
"fcm_dpo/q_t": 0.404443621635437,
|
|
"grad_norm": 131.4767608642578,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 1.8389785289764404,
|
|
"logits/rejected": 1.755552053451538,
|
|
"logps/chosen": -90.02412414550781,
|
|
"logps/ref_chosen": -98.2034912109375,
|
|
"logps/ref_rejected": -103.2023696899414,
|
|
"logps/rejected": -95.99018859863281,
|
|
"loss": 1.3566,
|
|
"margin_dpo/margin_mean": 0.9671777486801147,
|
|
"margin_dpo/margin_std": 2.518389940261841,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2675736961451247,
|
|
"fcm_dpo/beta": 0.6090140342712402,
|
|
"fcm_dpo/delta": -0.5656509399414062,
|
|
"fcm_dpo/margin": 2.1946027278900146,
|
|
"fcm_dpo/q_t": 0.2761075496673584,
|
|
"grad_norm": 115.04895782470703,
|
|
"learning_rate": 4.5959559945025183e-07,
|
|
"logits/chosen": 1.9416172504425049,
|
|
"logits/rejected": 1.738627314567566,
|
|
"logps/chosen": -69.18362426757812,
|
|
"logps/ref_chosen": -78.029541015625,
|
|
"logps/ref_rejected": -112.57099914550781,
|
|
"logps/rejected": -105.91969299316406,
|
|
"loss": 0.8653,
|
|
"margin_dpo/margin_mean": 2.19460391998291,
|
|
"margin_dpo/margin_std": 2.5792174339294434,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.2690854119425548,
|
|
"fcm_dpo/beta": 0.5866925716400146,
|
|
"fcm_dpo/delta": -0.008861862123012543,
|
|
"fcm_dpo/margin": 1.458240032196045,
|
|
"fcm_dpo/q_t": 0.3417511284351349,
|
|
"grad_norm": 75.9957275390625,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"logits/chosen": 1.3384625911712646,
|
|
"logits/rejected": 1.2324693202972412,
|
|
"logps/chosen": -70.76741027832031,
|
|
"logps/ref_chosen": -79.48869323730469,
|
|
"logps/ref_rejected": -96.62449645996094,
|
|
"logps/rejected": -89.3614501953125,
|
|
"loss": 0.9873,
|
|
"margin_dpo/margin_mean": 1.458240270614624,
|
|
"margin_dpo/margin_std": 2.050736904144287,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.2705971277399849,
|
|
"fcm_dpo/beta": 0.6052649617195129,
|
|
"fcm_dpo/delta": 0.13288062810897827,
|
|
"fcm_dpo/margin": 1.2006174325942993,
|
|
"fcm_dpo/q_t": 0.3806132972240448,
|
|
"grad_norm": 95.76106262207031,
|
|
"learning_rate": 4.581424636586928e-07,
|
|
"logits/chosen": 1.5645568370819092,
|
|
"logits/rejected": 1.5139702558517456,
|
|
"logps/chosen": -75.68244934082031,
|
|
"logps/ref_chosen": -84.5088119506836,
|
|
"logps/ref_rejected": -93.07945251464844,
|
|
"logps/rejected": -85.45369720458984,
|
|
"loss": 1.209,
|
|
"margin_dpo/margin_mean": 1.2006173133850098,
|
|
"margin_dpo/margin_std": 2.5477375984191895,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 0.6288666725158691,
|
|
"fcm_dpo/delta": 0.18441423773765564,
|
|
"fcm_dpo/margin": 1.0763027667999268,
|
|
"fcm_dpo/q_t": 0.38645124435424805,
|
|
"grad_norm": 93.25940704345703,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 1.276105284690857,
|
|
"logits/rejected": 1.212243914604187,
|
|
"logps/chosen": -65.58257293701172,
|
|
"logps/ref_chosen": -74.5645523071289,
|
|
"logps/ref_rejected": -81.02266693115234,
|
|
"logps/rejected": -73.11698913574219,
|
|
"loss": 1.1389,
|
|
"margin_dpo/margin_mean": 1.0763027667999268,
|
|
"margin_dpo/margin_std": 2.0827672481536865,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.273620559334845,
|
|
"fcm_dpo/beta": 0.6410256624221802,
|
|
"fcm_dpo/delta": 0.06620515137910843,
|
|
"fcm_dpo/margin": 1.2298375368118286,
|
|
"fcm_dpo/q_t": 0.36326679587364197,
|
|
"grad_norm": 93.19351196289062,
|
|
"learning_rate": 4.566660392614228e-07,
|
|
"logits/chosen": 1.4095215797424316,
|
|
"logits/rejected": 1.3104197978973389,
|
|
"logps/chosen": -69.90322875976562,
|
|
"logps/ref_chosen": -78.77166748046875,
|
|
"logps/ref_rejected": -98.29750061035156,
|
|
"logps/rejected": -90.65890502929688,
|
|
"loss": 1.0257,
|
|
"margin_dpo/margin_mean": 1.2298375368118286,
|
|
"margin_dpo/margin_std": 1.9370605945587158,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.2751322751322751,
|
|
"fcm_dpo/beta": 0.6139971613883972,
|
|
"fcm_dpo/delta": -0.28054773807525635,
|
|
"fcm_dpo/margin": 1.7906737327575684,
|
|
"fcm_dpo/q_t": 0.32836031913757324,
|
|
"grad_norm": 92.22091674804688,
|
|
"learning_rate": 4.5591914535745817e-07,
|
|
"logits/chosen": 1.8836357593536377,
|
|
"logits/rejected": 1.7016699314117432,
|
|
"logps/chosen": -66.96229553222656,
|
|
"logps/ref_chosen": -75.67765045166016,
|
|
"logps/ref_rejected": -107.47894287109375,
|
|
"logps/rejected": -100.55426025390625,
|
|
"loss": 0.9697,
|
|
"margin_dpo/margin_mean": 1.7906737327575684,
|
|
"margin_dpo/margin_std": 2.7334959506988525,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2766439909297052,
|
|
"fcm_dpo/beta": 0.6289302706718445,
|
|
"fcm_dpo/delta": 0.19042739272117615,
|
|
"fcm_dpo/margin": 0.4961353540420532,
|
|
"fcm_dpo/q_t": 0.4469048082828522,
|
|
"grad_norm": 121.42922973632812,
|
|
"learning_rate": 4.551664914523433e-07,
|
|
"logits/chosen": 1.6563621759414673,
|
|
"logits/rejected": 1.5986146926879883,
|
|
"logps/chosen": -72.16058349609375,
|
|
"logps/ref_chosen": -79.99969482421875,
|
|
"logps/ref_rejected": -89.35220336914062,
|
|
"logps/rejected": -82.00922393798828,
|
|
"loss": 1.453,
|
|
"margin_dpo/margin_mean": 0.4961353540420532,
|
|
"margin_dpo/margin_std": 2.0346364974975586,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2781557067271353,
|
|
"fcm_dpo/beta": 0.6417108178138733,
|
|
"fcm_dpo/delta": 0.15971535444259644,
|
|
"fcm_dpo/margin": 1.0936038494110107,
|
|
"fcm_dpo/q_t": 0.36856314539909363,
|
|
"grad_norm": 82.0419921875,
|
|
"learning_rate": 4.544080985994258e-07,
|
|
"logits/chosen": 2.0075020790100098,
|
|
"logits/rejected": 1.8546810150146484,
|
|
"logps/chosen": -53.30805206298828,
|
|
"logps/ref_chosen": -62.133941650390625,
|
|
"logps/ref_rejected": -84.44404602050781,
|
|
"logps/rejected": -76.71176147460938,
|
|
"loss": 1.0538,
|
|
"margin_dpo/margin_mean": 1.0936038494110107,
|
|
"margin_dpo/margin_std": 1.735282301902771,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 0.6385919451713562,
|
|
"fcm_dpo/delta": -0.0348266065120697,
|
|
"fcm_dpo/margin": 1.3761906623840332,
|
|
"fcm_dpo/q_t": 0.3491626977920532,
|
|
"grad_norm": 88.21831512451172,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 1.896065592765808,
|
|
"logits/rejected": 1.775359869003296,
|
|
"logps/chosen": -59.2283821105957,
|
|
"logps/ref_chosen": -67.93174743652344,
|
|
"logps/ref_rejected": -83.76744079589844,
|
|
"logps/rejected": -76.44026184082031,
|
|
"loss": 1.1267,
|
|
"margin_dpo/margin_mean": 1.3761909008026123,
|
|
"margin_dpo/margin_std": 2.3595733642578125,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2811791383219955,
|
|
"fcm_dpo/beta": 0.6363253593444824,
|
|
"fcm_dpo/delta": -0.0157480388879776,
|
|
"fcm_dpo/margin": 1.3545132875442505,
|
|
"fcm_dpo/q_t": 0.3593972623348236,
|
|
"grad_norm": 102.75836181640625,
|
|
"learning_rate": 4.5287418106563354e-07,
|
|
"logits/chosen": 1.4891412258148193,
|
|
"logits/rejected": 1.3690303564071655,
|
|
"logps/chosen": -77.59123229980469,
|
|
"logps/ref_chosen": -86.22174072265625,
|
|
"logps/ref_rejected": -100.42019653320312,
|
|
"logps/rejected": -93.14421081542969,
|
|
"loss": 1.1483,
|
|
"margin_dpo/margin_mean": 1.3545129299163818,
|
|
"margin_dpo/margin_std": 2.477220058441162,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.28269085411942557,
|
|
"fcm_dpo/beta": 0.6449699401855469,
|
|
"fcm_dpo/delta": 0.02151723951101303,
|
|
"fcm_dpo/margin": 1.2872929573059082,
|
|
"fcm_dpo/q_t": 0.3625199496746063,
|
|
"grad_norm": 116.49866485595703,
|
|
"learning_rate": 4.520986992917297e-07,
|
|
"logits/chosen": 1.7953424453735352,
|
|
"logits/rejected": 1.6858184337615967,
|
|
"logps/chosen": -84.72871398925781,
|
|
"logps/ref_chosen": -92.81202697753906,
|
|
"logps/ref_rejected": -117.28926086425781,
|
|
"logps/rejected": -110.49324035644531,
|
|
"loss": 1.1079,
|
|
"margin_dpo/margin_mean": 1.2872931957244873,
|
|
"margin_dpo/margin_std": 2.2549121379852295,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2842025699168556,
|
|
"fcm_dpo/beta": 0.6408007740974426,
|
|
"fcm_dpo/delta": -0.14380419254302979,
|
|
"fcm_dpo/margin": 1.5262317657470703,
|
|
"fcm_dpo/q_t": 0.34180963039398193,
|
|
"grad_norm": 95.1890869140625,
|
|
"learning_rate": 4.5131756438276466e-07,
|
|
"logits/chosen": 1.8667633533477783,
|
|
"logits/rejected": 1.7648987770080566,
|
|
"logps/chosen": -79.36767578125,
|
|
"logps/ref_chosen": -87.85247802734375,
|
|
"logps/ref_rejected": -94.58252716064453,
|
|
"logps/rejected": -87.62394714355469,
|
|
"loss": 0.9844,
|
|
"margin_dpo/margin_mean": 1.5262320041656494,
|
|
"margin_dpo/margin_std": 2.5105552673339844,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"fcm_dpo/beta": 0.6031299829483032,
|
|
"fcm_dpo/delta": -0.19629237055778503,
|
|
"fcm_dpo/margin": 1.142228364944458,
|
|
"fcm_dpo/q_t": 0.3792150020599365,
|
|
"grad_norm": 118.79749298095703,
|
|
"learning_rate": 4.5053079818876096e-07,
|
|
"logits/chosen": 1.6882644891738892,
|
|
"logits/rejected": 1.6798309087753296,
|
|
"logps/chosen": -86.59615325927734,
|
|
"logps/ref_chosen": -95.00414276123047,
|
|
"logps/ref_rejected": -90.50090789794922,
|
|
"logps/rejected": -83.23514556884766,
|
|
"loss": 1.2151,
|
|
"margin_dpo/margin_mean": 1.1422284841537476,
|
|
"margin_dpo/margin_std": 2.214970588684082,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 0.5990357398986816,
|
|
"fcm_dpo/delta": -0.0020574182271957397,
|
|
"fcm_dpo/margin": 1.4207065105438232,
|
|
"fcm_dpo/q_t": 0.3320964574813843,
|
|
"grad_norm": 89.09442138671875,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 1.8460028171539307,
|
|
"logits/rejected": 1.50532865524292,
|
|
"logps/chosen": -62.2581787109375,
|
|
"logps/ref_chosen": -70.79264831542969,
|
|
"logps/ref_rejected": -122.56155395507812,
|
|
"logps/rejected": -115.44779968261719,
|
|
"loss": 0.9537,
|
|
"margin_dpo/margin_mean": 1.4207061529159546,
|
|
"margin_dpo/margin_std": 1.7832438945770264,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2887377173091459,
|
|
"fcm_dpo/beta": 0.6087629795074463,
|
|
"fcm_dpo/delta": 0.08328632265329361,
|
|
"fcm_dpo/margin": 1.270153284072876,
|
|
"fcm_dpo/q_t": 0.3589320778846741,
|
|
"grad_norm": 111.36334991455078,
|
|
"learning_rate": 4.48940460132708e-07,
|
|
"logits/chosen": 1.5673696994781494,
|
|
"logits/rejected": 1.4889506101608276,
|
|
"logps/chosen": -84.18894958496094,
|
|
"logps/ref_chosen": -92.15048217773438,
|
|
"logps/ref_rejected": -106.4153060913086,
|
|
"logps/rejected": -99.72392272949219,
|
|
"loss": 1.0703,
|
|
"margin_dpo/margin_mean": 1.2701534032821655,
|
|
"margin_dpo/margin_std": 2.1253159046173096,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.29024943310657597,
|
|
"fcm_dpo/beta": 0.6490231156349182,
|
|
"fcm_dpo/delta": 0.3352760076522827,
|
|
"fcm_dpo/margin": 0.8153266906738281,
|
|
"fcm_dpo/q_t": 0.40081295371055603,
|
|
"grad_norm": 86.10425567626953,
|
|
"learning_rate": 4.481369327558329e-07,
|
|
"logits/chosen": 1.7814728021621704,
|
|
"logits/rejected": 1.7212591171264648,
|
|
"logps/chosen": -61.26172637939453,
|
|
"logps/ref_chosen": -69.51527404785156,
|
|
"logps/ref_rejected": -80.15898132324219,
|
|
"logps/rejected": -72.72076416015625,
|
|
"loss": 1.2172,
|
|
"margin_dpo/margin_mean": 0.8153265714645386,
|
|
"margin_dpo/margin_std": 1.8970856666564941,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.29176114890400606,
|
|
"fcm_dpo/beta": 0.6535841226577759,
|
|
"fcm_dpo/delta": -0.020041286945343018,
|
|
"fcm_dpo/margin": 1.3285222053527832,
|
|
"fcm_dpo/q_t": 0.3438006043434143,
|
|
"grad_norm": 87.5340805053711,
|
|
"learning_rate": 4.47327863063023e-07,
|
|
"logits/chosen": 1.6366324424743652,
|
|
"logits/rejected": 1.6494388580322266,
|
|
"logps/chosen": -64.92332458496094,
|
|
"logps/ref_chosen": -73.43276977539062,
|
|
"logps/ref_rejected": -77.81238555908203,
|
|
"logps/rejected": -70.63145446777344,
|
|
"loss": 0.9885,
|
|
"margin_dpo/margin_mean": 1.3285223245620728,
|
|
"margin_dpo/margin_std": 1.8894892930984497,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.29327286470143615,
|
|
"fcm_dpo/beta": 0.6577266454696655,
|
|
"fcm_dpo/delta": 0.12170780450105667,
|
|
"fcm_dpo/margin": 1.1193406581878662,
|
|
"fcm_dpo/q_t": 0.3734307885169983,
|
|
"grad_norm": 89.93775177001953,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 1.660496711730957,
|
|
"logits/rejected": 1.588250994682312,
|
|
"logps/chosen": -68.1932144165039,
|
|
"logps/ref_chosen": -76.63236999511719,
|
|
"logps/ref_rejected": -85.67449188232422,
|
|
"logps/rejected": -78.35467529296875,
|
|
"loss": 1.14,
|
|
"margin_dpo/margin_mean": 1.119341254234314,
|
|
"margin_dpo/margin_std": 2.060373306274414,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 0.6720187067985535,
|
|
"fcm_dpo/delta": 0.09920601546764374,
|
|
"fcm_dpo/margin": 1.1267728805541992,
|
|
"fcm_dpo/q_t": 0.35634636878967285,
|
|
"grad_norm": 107.51087188720703,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 1.5862284898757935,
|
|
"logits/rejected": 1.6139237880706787,
|
|
"logps/chosen": -81.3615493774414,
|
|
"logps/ref_chosen": -89.43354797363281,
|
|
"logps/ref_rejected": -91.25908660888672,
|
|
"logps/rejected": -84.31385803222656,
|
|
"loss": 1.1097,
|
|
"margin_dpo/margin_mean": 1.126772403717041,
|
|
"margin_dpo/margin_std": 1.9978525638580322,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2962962962962963,
|
|
"fcm_dpo/beta": 0.726308286190033,
|
|
"fcm_dpo/delta": 0.357510507106781,
|
|
"fcm_dpo/margin": 0.6964332461357117,
|
|
"fcm_dpo/q_t": 0.4117005467414856,
|
|
"grad_norm": 120.37006378173828,
|
|
"learning_rate": 4.448676271745197e-07,
|
|
"logits/chosen": 1.6787617206573486,
|
|
"logits/rejected": 1.5786409378051758,
|
|
"logps/chosen": -67.36844635009766,
|
|
"logps/ref_chosen": -75.47528839111328,
|
|
"logps/ref_rejected": -99.37582397460938,
|
|
"logps/rejected": -91.96542358398438,
|
|
"loss": 1.4766,
|
|
"margin_dpo/margin_mean": 0.6964335441589355,
|
|
"margin_dpo/margin_std": 2.46044921875,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.29780801209372637,
|
|
"fcm_dpo/beta": 0.736380398273468,
|
|
"fcm_dpo/delta": -0.10233466327190399,
|
|
"fcm_dpo/margin": 1.2729198932647705,
|
|
"fcm_dpo/q_t": 0.3577408790588379,
|
|
"grad_norm": 125.2252426147461,
|
|
"learning_rate": 4.440366160729392e-07,
|
|
"logits/chosen": 1.6729737520217896,
|
|
"logits/rejected": 1.55661940574646,
|
|
"logps/chosen": -59.2158088684082,
|
|
"logps/ref_chosen": -67.57392883300781,
|
|
"logps/ref_rejected": -89.97993469238281,
|
|
"logps/rejected": -82.89472961425781,
|
|
"loss": 1.3448,
|
|
"margin_dpo/margin_mean": 1.2729198932647705,
|
|
"margin_dpo/margin_std": 2.7377114295959473,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.29931972789115646,
|
|
"fcm_dpo/beta": 0.7087437510490417,
|
|
"fcm_dpo/delta": -0.21124190092086792,
|
|
"fcm_dpo/margin": 1.4629135131835938,
|
|
"fcm_dpo/q_t": 0.31773099303245544,
|
|
"grad_norm": 93.81770324707031,
|
|
"learning_rate": 4.432001773500957e-07,
|
|
"logits/chosen": 1.6051056385040283,
|
|
"logits/rejected": 1.5143699645996094,
|
|
"logps/chosen": -68.83720397949219,
|
|
"logps/ref_chosen": -77.36013793945312,
|
|
"logps/ref_rejected": -90.55670166015625,
|
|
"logps/rejected": -83.49667358398438,
|
|
"loss": 0.9198,
|
|
"margin_dpo/margin_mean": 1.462914228439331,
|
|
"margin_dpo/margin_std": 1.8346251249313354,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.30083144368858655,
|
|
"fcm_dpo/beta": 0.7020251750946045,
|
|
"fcm_dpo/delta": 0.13893431425094604,
|
|
"fcm_dpo/margin": 1.0273115634918213,
|
|
"fcm_dpo/q_t": 0.3853752613067627,
|
|
"grad_norm": 124.17066192626953,
|
|
"learning_rate": 4.4235833440297856e-07,
|
|
"logits/chosen": 1.7206742763519287,
|
|
"logits/rejected": 1.4951976537704468,
|
|
"logps/chosen": -65.01742553710938,
|
|
"logps/ref_chosen": -73.05004119873047,
|
|
"logps/ref_rejected": -95.21923065185547,
|
|
"logps/rejected": -88.21392059326172,
|
|
"loss": 1.3002,
|
|
"margin_dpo/margin_mean": 1.0273126363754272,
|
|
"margin_dpo/margin_std": 2.409886598587036,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 0.6905279755592346,
|
|
"fcm_dpo/delta": -0.07728119194507599,
|
|
"fcm_dpo/margin": 1.3263179063796997,
|
|
"fcm_dpo/q_t": 0.34874215722084045,
|
|
"grad_norm": 118.3066177368164,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 1.7629958391189575,
|
|
"logits/rejected": 1.5483076572418213,
|
|
"logps/chosen": -65.42704010009766,
|
|
"logps/ref_chosen": -73.75833129882812,
|
|
"logps/ref_rejected": -105.00157165527344,
|
|
"logps/rejected": -97.99659729003906,
|
|
"loss": 1.1881,
|
|
"margin_dpo/margin_mean": 1.3263182640075684,
|
|
"margin_dpo/margin_std": 2.497131824493408,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 0.6969584226608276,
|
|
"eval_logits/chosen": 1.8626530170440674,
|
|
"eval_logits/rejected": 1.7409390211105347,
|
|
"eval_logps/chosen": -78.61611938476562,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -89.6644287109375,
|
|
"eval_loss": 0.5625263452529907,
|
|
"eval_margin_dpo/margin_mean": 1.2537044286727905,
|
|
"eval_margin_dpo/margin_std": 2.2270843982696533,
|
|
"eval_runtime": 42.2527,
|
|
"eval_samples_per_second": 54.505,
|
|
"eval_steps_per_second": 1.704,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30385487528344673,
|
|
"fcm_dpo/beta": 0.6900404691696167,
|
|
"fcm_dpo/delta": -0.05886637419462204,
|
|
"fcm_dpo/margin": 1.3092676401138306,
|
|
"fcm_dpo/q_t": 0.34856978058815,
|
|
"grad_norm": 118.63353729248047,
|
|
"learning_rate": 4.4065853017905953e-07,
|
|
"logits/chosen": 1.997534155845642,
|
|
"logits/rejected": 1.8514721393585205,
|
|
"logps/chosen": -71.470458984375,
|
|
"logps/ref_chosen": -79.4841079711914,
|
|
"logps/ref_rejected": -100.94435119628906,
|
|
"logps/rejected": -94.2399673461914,
|
|
"loss": 1.0364,
|
|
"margin_dpo/margin_mean": 1.3092677593231201,
|
|
"margin_dpo/margin_std": 2.063784122467041,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.30536659108087677,
|
|
"fcm_dpo/beta": 0.7053598165512085,
|
|
"fcm_dpo/delta": 0.023715481162071228,
|
|
"fcm_dpo/margin": 1.1626203060150146,
|
|
"fcm_dpo/q_t": 0.358773797750473,
|
|
"grad_norm": 105.97468566894531,
|
|
"learning_rate": 4.3980061644943575e-07,
|
|
"logits/chosen": 1.4268461465835571,
|
|
"logits/rejected": 1.2712658643722534,
|
|
"logps/chosen": -58.487152099609375,
|
|
"logps/ref_chosen": -66.83952331542969,
|
|
"logps/ref_rejected": -93.05116271972656,
|
|
"logps/rejected": -85.86141204833984,
|
|
"loss": 1.1519,
|
|
"margin_dpo/margin_mean": 1.1626203060150146,
|
|
"margin_dpo/margin_std": 2.15989351272583,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.30687830687830686,
|
|
"fcm_dpo/beta": 0.6896209120750427,
|
|
"fcm_dpo/delta": -0.03713443875312805,
|
|
"fcm_dpo/margin": 1.2816414833068848,
|
|
"fcm_dpo/q_t": 0.3703385293483734,
|
|
"grad_norm": 114.06891632080078,
|
|
"learning_rate": 4.3893739358856455e-07,
|
|
"logits/chosen": 1.7179381847381592,
|
|
"logits/rejected": 1.4678292274475098,
|
|
"logps/chosen": -72.00523376464844,
|
|
"logps/ref_chosen": -80.32998657226562,
|
|
"logps/ref_rejected": -113.52803039550781,
|
|
"logps/rejected": -106.48490905761719,
|
|
"loss": 1.1194,
|
|
"margin_dpo/margin_mean": 1.2816411256790161,
|
|
"margin_dpo/margin_std": 2.475268840789795,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.30839002267573695,
|
|
"fcm_dpo/beta": 0.6696390509605408,
|
|
"fcm_dpo/delta": -0.03372015058994293,
|
|
"fcm_dpo/margin": 1.3020836114883423,
|
|
"fcm_dpo/q_t": 0.34320205450057983,
|
|
"grad_norm": 91.20820617675781,
|
|
"learning_rate": 4.380688857426449e-07,
|
|
"logits/chosen": 1.6615278720855713,
|
|
"logits/rejected": 1.502671241760254,
|
|
"logps/chosen": -58.270694732666016,
|
|
"logps/ref_chosen": -66.68875885009766,
|
|
"logps/ref_rejected": -85.07585906982422,
|
|
"logps/rejected": -77.95987701416016,
|
|
"loss": 1.0448,
|
|
"margin_dpo/margin_mean": 1.302083134651184,
|
|
"margin_dpo/margin_std": 1.9596548080444336,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 0.6933684945106506,
|
|
"fcm_dpo/delta": 0.11131396889686584,
|
|
"fcm_dpo/margin": 1.0774143934249878,
|
|
"fcm_dpo/q_t": 0.3806332051753998,
|
|
"grad_norm": 119.31201934814453,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 1.9803879261016846,
|
|
"logits/rejected": 1.8551146984100342,
|
|
"logps/chosen": -78.51766204833984,
|
|
"logps/ref_chosen": -86.51950073242188,
|
|
"logps/ref_rejected": -112.55376434326172,
|
|
"logps/rejected": -105.62934112548828,
|
|
"loss": 1.2119,
|
|
"margin_dpo/margin_mean": 1.0774142742156982,
|
|
"margin_dpo/margin_std": 2.233020067214966,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31141345427059713,
|
|
"fcm_dpo/beta": 0.688675045967102,
|
|
"fcm_dpo/delta": -0.13103625178337097,
|
|
"fcm_dpo/margin": 0.9063512086868286,
|
|
"fcm_dpo/q_t": 0.42519694566726685,
|
|
"grad_norm": 138.00732421875,
|
|
"learning_rate": 4.363161124189387e-07,
|
|
"logits/chosen": 2.0987722873687744,
|
|
"logits/rejected": 2.042625665664673,
|
|
"logps/chosen": -80.69242095947266,
|
|
"logps/ref_chosen": -88.68557739257812,
|
|
"logps/ref_rejected": -97.75945281982422,
|
|
"logps/rejected": -90.67264556884766,
|
|
"loss": 1.4227,
|
|
"margin_dpo/margin_mean": 0.9063505530357361,
|
|
"margin_dpo/margin_std": 2.7547712326049805,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3129251700680272,
|
|
"fcm_dpo/beta": 0.6832928657531738,
|
|
"fcm_dpo/delta": 0.0868806540966034,
|
|
"fcm_dpo/margin": 1.1258772611618042,
|
|
"fcm_dpo/q_t": 0.3708181083202362,
|
|
"grad_norm": 113.93563842773438,
|
|
"learning_rate": 4.3543189596998986e-07,
|
|
"logits/chosen": 1.40671968460083,
|
|
"logits/rejected": 1.2092251777648926,
|
|
"logps/chosen": -77.26618957519531,
|
|
"logps/ref_chosen": -85.12134552001953,
|
|
"logps/ref_rejected": -103.34955596923828,
|
|
"logps/rejected": -96.62027740478516,
|
|
"loss": 1.1403,
|
|
"margin_dpo/margin_mean": 1.1258769035339355,
|
|
"margin_dpo/margin_std": 2.300265073776245,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3144368858654573,
|
|
"fcm_dpo/beta": 0.7332829833030701,
|
|
"fcm_dpo/delta": 0.37100207805633545,
|
|
"fcm_dpo/margin": 0.6764590740203857,
|
|
"fcm_dpo/q_t": 0.42520248889923096,
|
|
"grad_norm": 134.1845245361328,
|
|
"learning_rate": 4.3454249259229664e-07,
|
|
"logits/chosen": 1.4971542358398438,
|
|
"logits/rejected": 1.4829304218292236,
|
|
"logps/chosen": -70.43374633789062,
|
|
"logps/ref_chosen": -78.84121704101562,
|
|
"logps/ref_rejected": -89.82504272460938,
|
|
"logps/rejected": -82.09403991699219,
|
|
"loss": 1.4158,
|
|
"margin_dpo/margin_mean": 0.6764594316482544,
|
|
"margin_dpo/margin_std": 2.1062352657318115,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.31594860166288735,
|
|
"fcm_dpo/beta": 0.7302354574203491,
|
|
"fcm_dpo/delta": -0.23841455578804016,
|
|
"fcm_dpo/margin": 1.4544109106063843,
|
|
"fcm_dpo/q_t": 0.33910322189331055,
|
|
"grad_norm": 112.4314193725586,
|
|
"learning_rate": 4.336479271643833e-07,
|
|
"logits/chosen": 1.6004903316497803,
|
|
"logits/rejected": 1.5286908149719238,
|
|
"logps/chosen": -77.70037841796875,
|
|
"logps/ref_chosen": -85.98588562011719,
|
|
"logps/ref_rejected": -107.1638412475586,
|
|
"logps/rejected": -100.33273315429688,
|
|
"loss": 1.0837,
|
|
"margin_dpo/margin_mean": 1.4544110298156738,
|
|
"margin_dpo/margin_std": 2.3539958000183105,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 0.6787157654762268,
|
|
"fcm_dpo/delta": -0.3772972822189331,
|
|
"fcm_dpo/margin": 1.7446019649505615,
|
|
"fcm_dpo/q_t": 0.3218177258968353,
|
|
"grad_norm": 100.42723083496094,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 1.6652277708053589,
|
|
"logits/rejected": 1.4687567949295044,
|
|
"logps/chosen": -63.37964630126953,
|
|
"logps/ref_chosen": -71.75653076171875,
|
|
"logps/ref_rejected": -102.47966003417969,
|
|
"logps/rejected": -95.84736633300781,
|
|
"loss": 1.0083,
|
|
"margin_dpo/margin_mean": 1.7446017265319824,
|
|
"margin_dpo/margin_std": 2.5480265617370605,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.31897203325774753,
|
|
"fcm_dpo/beta": 0.6792653799057007,
|
|
"fcm_dpo/delta": 0.14284920692443848,
|
|
"fcm_dpo/margin": 1.0514057874679565,
|
|
"fcm_dpo/q_t": 0.3711149990558624,
|
|
"grad_norm": 112.68789672851562,
|
|
"learning_rate": 4.3184341039326217e-07,
|
|
"logits/chosen": 1.7981252670288086,
|
|
"logits/rejected": 1.5915374755859375,
|
|
"logps/chosen": -62.572628021240234,
|
|
"logps/ref_chosen": -70.95170593261719,
|
|
"logps/ref_rejected": -108.51902770996094,
|
|
"logps/rejected": -101.19136047363281,
|
|
"loss": 1.1428,
|
|
"margin_dpo/margin_mean": 1.0514049530029297,
|
|
"margin_dpo/margin_std": 2.041271209716797,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.3204837490551776,
|
|
"fcm_dpo/beta": 0.660285472869873,
|
|
"fcm_dpo/delta": -0.15960374474525452,
|
|
"fcm_dpo/margin": 1.5043416023254395,
|
|
"fcm_dpo/q_t": 0.32801175117492676,
|
|
"grad_norm": 99.5758056640625,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 1.6569452285766602,
|
|
"logits/rejected": 1.533679723739624,
|
|
"logps/chosen": -65.77145385742188,
|
|
"logps/ref_chosen": -74.34010314941406,
|
|
"logps/ref_rejected": -97.58259582519531,
|
|
"logps/rejected": -90.5182876586914,
|
|
"loss": 0.9946,
|
|
"margin_dpo/margin_mean": 1.5043418407440186,
|
|
"margin_dpo/margin_std": 2.18064022064209,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.3219954648526077,
|
|
"fcm_dpo/beta": 0.6624789237976074,
|
|
"fcm_dpo/delta": 0.05524115264415741,
|
|
"fcm_dpo/margin": 1.2062104940414429,
|
|
"fcm_dpo/q_t": 0.3639560341835022,
|
|
"grad_norm": 110.04399108886719,
|
|
"learning_rate": 4.3001854756006724e-07,
|
|
"logits/chosen": 1.7112276554107666,
|
|
"logits/rejected": 1.722957968711853,
|
|
"logps/chosen": -71.81007385253906,
|
|
"logps/ref_chosen": -80.2526626586914,
|
|
"logps/ref_rejected": -94.76947021484375,
|
|
"logps/rejected": -87.5330810546875,
|
|
"loss": 1.1933,
|
|
"margin_dpo/margin_mean": 1.2062103748321533,
|
|
"margin_dpo/margin_std": 2.391385555267334,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3235071806500378,
|
|
"fcm_dpo/beta": 0.6735548973083496,
|
|
"fcm_dpo/delta": 0.07444822043180466,
|
|
"fcm_dpo/margin": 1.1592483520507812,
|
|
"fcm_dpo/q_t": 0.3672788441181183,
|
|
"grad_norm": 105.3218765258789,
|
|
"learning_rate": 4.290985500881143e-07,
|
|
"logits/chosen": 1.4524340629577637,
|
|
"logits/rejected": 1.394425392150879,
|
|
"logps/chosen": -69.37520599365234,
|
|
"logps/ref_chosen": -77.9675064086914,
|
|
"logps/ref_rejected": -84.0354232788086,
|
|
"logps/rejected": -76.60237121582031,
|
|
"loss": 1.0677,
|
|
"margin_dpo/margin_mean": 1.1592485904693604,
|
|
"margin_dpo/margin_std": 2.0362842082977295,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.6623063087463379,
|
|
"fcm_dpo/delta": -0.1859441101551056,
|
|
"fcm_dpo/margin": 1.535156011581421,
|
|
"fcm_dpo/q_t": 0.3248975872993469,
|
|
"grad_norm": 98.75223541259766,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 1.5025854110717773,
|
|
"logits/rejected": 1.2978618144989014,
|
|
"logps/chosen": -72.98377990722656,
|
|
"logps/ref_chosen": -81.2047348022461,
|
|
"logps/ref_rejected": -116.18414306640625,
|
|
"logps/rejected": -109.49835205078125,
|
|
"loss": 0.9512,
|
|
"margin_dpo/margin_mean": 1.53515625,
|
|
"margin_dpo/margin_std": 2.0457441806793213,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.32653061224489793,
|
|
"fcm_dpo/beta": 0.6392388343811035,
|
|
"fcm_dpo/delta": -0.187744140625,
|
|
"fcm_dpo/margin": 1.5922799110412598,
|
|
"fcm_dpo/q_t": 0.34065473079681396,
|
|
"grad_norm": 88.64974975585938,
|
|
"learning_rate": 4.2724355170431247e-07,
|
|
"logits/chosen": 2.229978084564209,
|
|
"logits/rejected": 1.9987678527832031,
|
|
"logps/chosen": -75.49063873291016,
|
|
"logps/ref_chosen": -83.57113647460938,
|
|
"logps/ref_rejected": -112.51902770996094,
|
|
"logps/rejected": -106.03080749511719,
|
|
"loss": 1.0216,
|
|
"margin_dpo/margin_mean": 1.5922796726226807,
|
|
"margin_dpo/margin_std": 2.530792713165283,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.328042328042328,
|
|
"fcm_dpo/beta": 0.618314266204834,
|
|
"fcm_dpo/delta": -0.05883919447660446,
|
|
"fcm_dpo/margin": 1.4609345197677612,
|
|
"fcm_dpo/q_t": 0.36006930470466614,
|
|
"grad_norm": 106.62305450439453,
|
|
"learning_rate": 4.26308602680756e-07,
|
|
"logits/chosen": 1.7996132373809814,
|
|
"logits/rejected": 1.5775671005249023,
|
|
"logps/chosen": -69.00894165039062,
|
|
"logps/ref_chosen": -77.01390075683594,
|
|
"logps/ref_rejected": -105.28099822998047,
|
|
"logps/rejected": -98.73696899414062,
|
|
"loss": 1.0933,
|
|
"margin_dpo/margin_mean": 1.460935115814209,
|
|
"margin_dpo/margin_std": 2.555617332458496,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3295540438397581,
|
|
"fcm_dpo/beta": 0.6417362093925476,
|
|
"fcm_dpo/delta": 0.20055294036865234,
|
|
"fcm_dpo/margin": 1.028103232383728,
|
|
"fcm_dpo/q_t": 0.3811229467391968,
|
|
"grad_norm": 99.2293472290039,
|
|
"learning_rate": 4.253687219265803e-07,
|
|
"logits/chosen": 1.507783055305481,
|
|
"logits/rejected": 1.506219744682312,
|
|
"logps/chosen": -84.46420288085938,
|
|
"logps/ref_chosen": -92.47299194335938,
|
|
"logps/ref_rejected": -92.80751037597656,
|
|
"logps/rejected": -85.82682800292969,
|
|
"loss": 1.1278,
|
|
"margin_dpo/margin_mean": 1.0281034708023071,
|
|
"margin_dpo/margin_std": 1.8990416526794434,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.3310657596371882,
|
|
"fcm_dpo/beta": 0.6548258066177368,
|
|
"fcm_dpo/delta": 0.08293704688549042,
|
|
"fcm_dpo/margin": 1.1794757843017578,
|
|
"fcm_dpo/q_t": 0.3652268648147583,
|
|
"grad_norm": 90.89139556884766,
|
|
"learning_rate": 4.2442393573227043e-07,
|
|
"logits/chosen": 1.5002977848052979,
|
|
"logits/rejected": 1.4157586097717285,
|
|
"logps/chosen": -68.95957946777344,
|
|
"logps/ref_chosen": -77.10382080078125,
|
|
"logps/ref_rejected": -92.3438949584961,
|
|
"logps/rejected": -85.37913513183594,
|
|
"loss": 1.1377,
|
|
"margin_dpo/margin_mean": 1.1794754266738892,
|
|
"margin_dpo/margin_std": 2.164668083190918,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.6796347498893738,
|
|
"fcm_dpo/delta": 0.1952458620071411,
|
|
"fcm_dpo/margin": 0.9770975708961487,
|
|
"fcm_dpo/q_t": 0.39049315452575684,
|
|
"grad_norm": 91.66635131835938,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 2.009399890899658,
|
|
"logits/rejected": 1.8451021909713745,
|
|
"logps/chosen": -54.451942443847656,
|
|
"logps/ref_chosen": -62.48021697998047,
|
|
"logps/ref_rejected": -86.93276977539062,
|
|
"logps/rejected": -79.881591796875,
|
|
"loss": 1.2308,
|
|
"margin_dpo/margin_mean": 0.9770973920822144,
|
|
"margin_dpo/margin_std": 2.1490917205810547,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3340891912320484,
|
|
"fcm_dpo/beta": 0.6868494153022766,
|
|
"fcm_dpo/delta": 0.022192861884832382,
|
|
"fcm_dpo/margin": 1.206336259841919,
|
|
"fcm_dpo/q_t": 0.36380764842033386,
|
|
"grad_norm": 109.6985092163086,
|
|
"learning_rate": 4.22519752870528e-07,
|
|
"logits/chosen": 1.804550051689148,
|
|
"logits/rejected": 1.6219594478607178,
|
|
"logps/chosen": -70.11363220214844,
|
|
"logps/ref_chosen": -78.35491943359375,
|
|
"logps/ref_rejected": -108.17631530761719,
|
|
"logps/rejected": -101.141357421875,
|
|
"loss": 1.1374,
|
|
"margin_dpo/margin_mean": 1.2063356637954712,
|
|
"margin_dpo/margin_std": 2.2121567726135254,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.3356009070294785,
|
|
"fcm_dpo/beta": 0.6856993436813354,
|
|
"fcm_dpo/delta": -0.011833667755126953,
|
|
"fcm_dpo/margin": 1.254509687423706,
|
|
"fcm_dpo/q_t": 0.3495626151561737,
|
|
"grad_norm": 115.02777862548828,
|
|
"learning_rate": 4.2156040946718343e-07,
|
|
"logits/chosen": 2.041644334793091,
|
|
"logits/rejected": 1.7993850708007812,
|
|
"logps/chosen": -69.54031372070312,
|
|
"logps/ref_chosen": -77.2734375,
|
|
"logps/ref_rejected": -126.41007995605469,
|
|
"logps/rejected": -119.93147277832031,
|
|
"loss": 1.0305,
|
|
"margin_dpo/margin_mean": 1.2545100450515747,
|
|
"margin_dpo/margin_std": 1.9318989515304565,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3371126228269085,
|
|
"fcm_dpo/beta": 0.6608577966690063,
|
|
"fcm_dpo/delta": -0.19959528744220734,
|
|
"fcm_dpo/margin": 1.5568513870239258,
|
|
"fcm_dpo/q_t": 0.31522971391677856,
|
|
"grad_norm": 93.16920471191406,
|
|
"learning_rate": 4.2059626715039065e-07,
|
|
"logits/chosen": 1.7795042991638184,
|
|
"logits/rejected": 1.6519936323165894,
|
|
"logps/chosen": -70.11776733398438,
|
|
"logps/ref_chosen": -78.4210205078125,
|
|
"logps/ref_rejected": -101.38420867919922,
|
|
"logps/rejected": -94.63780975341797,
|
|
"loss": 0.8706,
|
|
"margin_dpo/margin_mean": 1.5568515062332153,
|
|
"margin_dpo/margin_std": 1.8883434534072876,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.3386243386243386,
|
|
"fcm_dpo/beta": 0.6652363538742065,
|
|
"fcm_dpo/delta": 0.10752552002668381,
|
|
"fcm_dpo/margin": 1.128204107284546,
|
|
"fcm_dpo/q_t": 0.362488716840744,
|
|
"grad_norm": 107.10758972167969,
|
|
"learning_rate": 4.1962735288928304e-07,
|
|
"logits/chosen": 2.056093454360962,
|
|
"logits/rejected": 1.9930920600891113,
|
|
"logps/chosen": -71.17828369140625,
|
|
"logps/ref_chosen": -79.36337280273438,
|
|
"logps/ref_rejected": -89.99789428710938,
|
|
"logps/rejected": -82.94100952148438,
|
|
"loss": 1.0344,
|
|
"margin_dpo/margin_mean": 1.128204584121704,
|
|
"margin_dpo/margin_std": 1.7746167182922363,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.662153422832489,
|
|
"fcm_dpo/delta": 0.008250989019870758,
|
|
"fcm_dpo/margin": 1.2687159776687622,
|
|
"fcm_dpo/q_t": 0.35935088992118835,
|
|
"grad_norm": 137.93043518066406,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 1.6968061923980713,
|
|
"logits/rejected": 1.4539750814437866,
|
|
"logps/chosen": -81.02485656738281,
|
|
"logps/ref_chosen": -88.99606323242188,
|
|
"logps/ref_rejected": -127.55032348632812,
|
|
"logps/rejected": -120.84783935546875,
|
|
"loss": 1.1876,
|
|
"margin_dpo/margin_mean": 1.2687162160873413,
|
|
"margin_dpo/margin_std": 2.3828163146972656,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3416477702191988,
|
|
"fcm_dpo/beta": 0.6740131378173828,
|
|
"fcm_dpo/delta": 0.04441865161061287,
|
|
"fcm_dpo/margin": 1.2005562782287598,
|
|
"fcm_dpo/q_t": 0.3622625172138214,
|
|
"grad_norm": 88.77747344970703,
|
|
"learning_rate": 4.176753170773052e-07,
|
|
"logits/chosen": 1.8422725200653076,
|
|
"logits/rejected": 1.749298095703125,
|
|
"logps/chosen": -60.43705368041992,
|
|
"logps/ref_chosen": -68.68444061279297,
|
|
"logps/ref_rejected": -85.81898498535156,
|
|
"logps/rejected": -78.77215576171875,
|
|
"loss": 1.1248,
|
|
"margin_dpo/margin_mean": 1.2005561590194702,
|
|
"margin_dpo/margin_std": 2.140181541442871,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3431594860166289,
|
|
"fcm_dpo/beta": 0.6707133054733276,
|
|
"fcm_dpo/delta": 0.00018239766359329224,
|
|
"fcm_dpo/margin": 1.2653456926345825,
|
|
"fcm_dpo/q_t": 0.35821062326431274,
|
|
"grad_norm": 108.87771606445312,
|
|
"learning_rate": 4.166922501290729e-07,
|
|
"logits/chosen": 1.8412506580352783,
|
|
"logits/rejected": 1.7449533939361572,
|
|
"logps/chosen": -64.33416748046875,
|
|
"logps/ref_chosen": -72.52029418945312,
|
|
"logps/ref_rejected": -90.7720718383789,
|
|
"logps/rejected": -83.85128784179688,
|
|
"loss": 1.1363,
|
|
"margin_dpo/margin_mean": 1.265345811843872,
|
|
"margin_dpo/margin_std": 2.2258872985839844,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.34467120181405897,
|
|
"fcm_dpo/beta": 0.6848806142807007,
|
|
"fcm_dpo/delta": 0.057742100208997726,
|
|
"fcm_dpo/margin": 1.162534475326538,
|
|
"fcm_dpo/q_t": 0.36188948154449463,
|
|
"grad_norm": 101.69074249267578,
|
|
"learning_rate": 4.1570452044027405e-07,
|
|
"logits/chosen": 1.739363431930542,
|
|
"logits/rejected": 1.585823893547058,
|
|
"logps/chosen": -64.13141632080078,
|
|
"logps/ref_chosen": -72.23167419433594,
|
|
"logps/ref_rejected": -95.45873260498047,
|
|
"logps/rejected": -88.52101135253906,
|
|
"loss": 1.1563,
|
|
"margin_dpo/margin_mean": 1.1625350713729858,
|
|
"margin_dpo/margin_std": 2.186185598373413,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.34618291761148906,
|
|
"fcm_dpo/beta": 0.6733275651931763,
|
|
"fcm_dpo/delta": -0.10372693836688995,
|
|
"fcm_dpo/margin": 1.4018828868865967,
|
|
"fcm_dpo/q_t": 0.3341498374938965,
|
|
"grad_norm": 94.96331787109375,
|
|
"learning_rate": 4.147121556398312e-07,
|
|
"logits/chosen": 1.7647275924682617,
|
|
"logits/rejected": 1.5876259803771973,
|
|
"logps/chosen": -58.482269287109375,
|
|
"logps/ref_chosen": -66.88822174072266,
|
|
"logps/ref_rejected": -92.27890014648438,
|
|
"logps/rejected": -85.27483367919922,
|
|
"loss": 1.0192,
|
|
"margin_dpo/margin_mean": 1.4018831253051758,
|
|
"margin_dpo/margin_std": 2.1348557472229004,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.6939650774002075,
|
|
"fcm_dpo/delta": 0.12067630141973495,
|
|
"fcm_dpo/margin": 1.052196741104126,
|
|
"fcm_dpo/q_t": 0.3544687330722809,
|
|
"grad_norm": 114.52188110351562,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 1.8524203300476074,
|
|
"logits/rejected": 1.8436353206634521,
|
|
"logps/chosen": -67.94467163085938,
|
|
"logps/ref_chosen": -76.12332153320312,
|
|
"logps/ref_rejected": -78.19171905517578,
|
|
"logps/rejected": -71.06526947021484,
|
|
"loss": 1.1746,
|
|
"margin_dpo/margin_mean": 1.0521972179412842,
|
|
"margin_dpo/margin_std": 2.023137092590332,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"fcm_dpo/beta": 0.6795934438705444,
|
|
"fcm_dpo/delta": -0.047244250774383545,
|
|
"fcm_dpo/margin": 1.3137178421020508,
|
|
"fcm_dpo/q_t": 0.3336409330368042,
|
|
"grad_norm": 117.06100463867188,
|
|
"learning_rate": 4.1271363186719835e-07,
|
|
"logits/chosen": 1.5964305400848389,
|
|
"logits/rejected": 1.5918617248535156,
|
|
"logps/chosen": -84.60713195800781,
|
|
"logps/ref_chosen": -92.45181274414062,
|
|
"logps/ref_rejected": -100.89735412597656,
|
|
"logps/rejected": -94.36639404296875,
|
|
"loss": 1.0741,
|
|
"margin_dpo/margin_mean": 1.3137177228927612,
|
|
"margin_dpo/margin_std": 2.144465923309326,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3507180650037793,
|
|
"fcm_dpo/beta": 0.7037328481674194,
|
|
"fcm_dpo/delta": 0.2652207016944885,
|
|
"fcm_dpo/margin": 0.8546900749206543,
|
|
"fcm_dpo/q_t": 0.4035346508026123,
|
|
"grad_norm": 234.2224884033203,
|
|
"learning_rate": 4.1170752879801436e-07,
|
|
"logits/chosen": 1.4689631462097168,
|
|
"logits/rejected": 1.4187445640563965,
|
|
"logps/chosen": -78.85874938964844,
|
|
"logps/ref_chosen": -86.75383758544922,
|
|
"logps/ref_rejected": -98.16909790039062,
|
|
"logps/rejected": -91.12869262695312,
|
|
"loss": 1.4459,
|
|
"margin_dpo/margin_mean": 0.854690432548523,
|
|
"margin_dpo/margin_std": 2.5719518661499023,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.35222978080120937,
|
|
"fcm_dpo/beta": 0.6840606331825256,
|
|
"fcm_dpo/delta": -0.23278965055942535,
|
|
"fcm_dpo/margin": 1.099263310432434,
|
|
"fcm_dpo/q_t": 0.37205249071121216,
|
|
"grad_norm": 110.37529754638672,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": 1.5777666568756104,
|
|
"logits/rejected": 1.4945855140686035,
|
|
"logps/chosen": -64.928955078125,
|
|
"logps/ref_chosen": -72.87556457519531,
|
|
"logps/ref_rejected": -85.22943115234375,
|
|
"logps/rejected": -78.38209533691406,
|
|
"loss": 1.197,
|
|
"margin_dpo/margin_mean": 1.0992629528045654,
|
|
"margin_dpo/margin_std": 2.0313029289245605,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.35374149659863946,
|
|
"fcm_dpo/beta": 0.701070249080658,
|
|
"fcm_dpo/delta": 0.08123414218425751,
|
|
"fcm_dpo/margin": 1.099867820739746,
|
|
"fcm_dpo/q_t": 0.36371809244155884,
|
|
"grad_norm": 95.63811492919922,
|
|
"learning_rate": 4.09681781007452e-07,
|
|
"logits/chosen": 1.4570426940917969,
|
|
"logits/rejected": 1.408102035522461,
|
|
"logps/chosen": -61.800174713134766,
|
|
"logps/ref_chosen": -70.05477905273438,
|
|
"logps/ref_rejected": -68.7240982055664,
|
|
"logps/rejected": -61.569358825683594,
|
|
"loss": 1.1211,
|
|
"margin_dpo/margin_mean": 1.099867820739746,
|
|
"margin_dpo/margin_std": 1.8799715042114258,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.6795494556427002,
|
|
"fcm_dpo/delta": -0.1432761698961258,
|
|
"fcm_dpo/margin": 1.440608024597168,
|
|
"fcm_dpo/q_t": 0.316908061504364,
|
|
"grad_norm": 107.87815856933594,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 1.609418511390686,
|
|
"logits/rejected": 1.5853018760681152,
|
|
"logps/chosen": -77.45685577392578,
|
|
"logps/ref_chosen": -85.86051940917969,
|
|
"logps/ref_rejected": -96.14663696289062,
|
|
"logps/rejected": -89.18359375,
|
|
"loss": 0.9073,
|
|
"margin_dpo/margin_mean": 1.4406075477600098,
|
|
"margin_dpo/margin_std": 1.7774099111557007,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.35676492819349964,
|
|
"fcm_dpo/beta": 0.6827093362808228,
|
|
"fcm_dpo/delta": 0.08276916295289993,
|
|
"fcm_dpo/margin": 1.133293628692627,
|
|
"fcm_dpo/q_t": 0.36910420656204224,
|
|
"grad_norm": 112.19277954101562,
|
|
"learning_rate": 4.076381667711306e-07,
|
|
"logits/chosen": 1.8993335962295532,
|
|
"logits/rejected": 1.8813176155090332,
|
|
"logps/chosen": -81.87387084960938,
|
|
"logps/ref_chosen": -89.75252532958984,
|
|
"logps/ref_rejected": -99.28534698486328,
|
|
"logps/rejected": -92.53997802734375,
|
|
"loss": 1.1588,
|
|
"margin_dpo/margin_mean": 1.133293867111206,
|
|
"margin_dpo/margin_std": 2.1710267066955566,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.35827664399092973,
|
|
"fcm_dpo/beta": 0.7253872156143188,
|
|
"fcm_dpo/delta": 0.3865929841995239,
|
|
"fcm_dpo/margin": 0.6659508943557739,
|
|
"fcm_dpo/q_t": 0.415424644947052,
|
|
"grad_norm": 137.00572204589844,
|
|
"learning_rate": 4.066097311132753e-07,
|
|
"logits/chosen": 1.6266775131225586,
|
|
"logits/rejected": 1.6260457038879395,
|
|
"logps/chosen": -84.624267578125,
|
|
"logps/ref_chosen": -92.59001922607422,
|
|
"logps/ref_rejected": -101.45584869384766,
|
|
"logps/rejected": -94.15604400634766,
|
|
"loss": 1.3267,
|
|
"margin_dpo/margin_mean": 0.6659514904022217,
|
|
"margin_dpo/margin_std": 1.8473809957504272,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.35978835978835977,
|
|
"fcm_dpo/beta": 0.7275218963623047,
|
|
"fcm_dpo/delta": -0.08721604943275452,
|
|
"fcm_dpo/margin": 1.275065302848816,
|
|
"fcm_dpo/q_t": 0.3357663154602051,
|
|
"grad_norm": 118.53727722167969,
|
|
"learning_rate": 4.0557691474458414e-07,
|
|
"logits/chosen": 1.514404058456421,
|
|
"logits/rejected": 1.5060009956359863,
|
|
"logps/chosen": -73.95974731445312,
|
|
"logps/ref_chosen": -82.2470474243164,
|
|
"logps/ref_rejected": -92.59944152832031,
|
|
"logps/rejected": -85.58721923828125,
|
|
"loss": 1.0485,
|
|
"margin_dpo/margin_mean": 1.2750656604766846,
|
|
"margin_dpo/margin_std": 1.9951952695846558,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.36130007558578986,
|
|
"fcm_dpo/beta": 0.7322613596916199,
|
|
"fcm_dpo/delta": -0.01715192198753357,
|
|
"fcm_dpo/margin": 1.181195616722107,
|
|
"fcm_dpo/q_t": 0.35318899154663086,
|
|
"grad_norm": 118.10260009765625,
|
|
"learning_rate": 4.045397465551513e-07,
|
|
"logits/chosen": 2.0101892948150635,
|
|
"logits/rejected": 1.724487543106079,
|
|
"logps/chosen": -67.31920623779297,
|
|
"logps/ref_chosen": -75.30878448486328,
|
|
"logps/ref_rejected": -131.2318115234375,
|
|
"logps/rejected": -124.42342376708984,
|
|
"loss": 1.087,
|
|
"margin_dpo/margin_mean": 1.1811952590942383,
|
|
"margin_dpo/margin_std": 1.9706592559814453,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.714773416519165,
|
|
"fcm_dpo/delta": -0.23679864406585693,
|
|
"fcm_dpo/margin": 1.4823143482208252,
|
|
"fcm_dpo/q_t": 0.31994450092315674,
|
|
"grad_norm": 102.3418197631836,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 1.5584038496017456,
|
|
"logits/rejected": 1.364628553390503,
|
|
"logps/chosen": -62.70786666870117,
|
|
"logps/ref_chosen": -70.81785583496094,
|
|
"logps/ref_rejected": -98.53778076171875,
|
|
"logps/rejected": -91.91011047363281,
|
|
"loss": 0.9887,
|
|
"margin_dpo/margin_mean": 1.4823133945465088,
|
|
"margin_dpo/margin_std": 2.0326457023620605,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.36432350718065004,
|
|
"fcm_dpo/beta": 0.7306882739067078,
|
|
"fcm_dpo/delta": 0.3302416205406189,
|
|
"fcm_dpo/margin": 0.7337081432342529,
|
|
"fcm_dpo/q_t": 0.41419199109077454,
|
|
"grad_norm": 125.35552978515625,
|
|
"learning_rate": 4.0245247088227377e-07,
|
|
"logits/chosen": 1.4376188516616821,
|
|
"logits/rejected": 1.402127981185913,
|
|
"logps/chosen": -80.48194885253906,
|
|
"logps/ref_chosen": -88.60260772705078,
|
|
"logps/ref_rejected": -101.42214965820312,
|
|
"logps/rejected": -94.03518676757812,
|
|
"loss": 1.2775,
|
|
"margin_dpo/margin_mean": 0.7337080240249634,
|
|
"margin_dpo/margin_std": 1.8403754234313965,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.36583522297808013,
|
|
"fcm_dpo/beta": 0.6982107162475586,
|
|
"fcm_dpo/delta": -0.33571815490722656,
|
|
"fcm_dpo/margin": 1.6368021965026855,
|
|
"fcm_dpo/q_t": 0.30604660511016846,
|
|
"grad_norm": 85.99467468261719,
|
|
"learning_rate": 4.0140242178441665e-07,
|
|
"logits/chosen": 1.5356554985046387,
|
|
"logits/rejected": 1.451372742652893,
|
|
"logps/chosen": -68.53840637207031,
|
|
"logps/ref_chosen": -77.34110260009766,
|
|
"logps/ref_rejected": -84.76332092285156,
|
|
"logps/rejected": -77.59742736816406,
|
|
"loss": 0.8799,
|
|
"margin_dpo/margin_mean": 1.636801838874817,
|
|
"margin_dpo/margin_std": 2.0876946449279785,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3673469387755102,
|
|
"fcm_dpo/beta": 0.6918625831604004,
|
|
"fcm_dpo/delta": 0.05498592555522919,
|
|
"fcm_dpo/margin": 1.153847336769104,
|
|
"fcm_dpo/q_t": 0.3613772988319397,
|
|
"grad_norm": 119.00396728515625,
|
|
"learning_rate": 4.003481376353596e-07,
|
|
"logits/chosen": 1.7110121250152588,
|
|
"logits/rejected": 1.7173479795455933,
|
|
"logps/chosen": -85.46659088134766,
|
|
"logps/ref_chosen": -93.55897521972656,
|
|
"logps/ref_rejected": -89.33551025390625,
|
|
"logps/rejected": -82.39696502685547,
|
|
"loss": 1.0625,
|
|
"margin_dpo/margin_mean": 1.1538474559783936,
|
|
"margin_dpo/margin_std": 1.8430607318878174,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.3688586545729403,
|
|
"fcm_dpo/beta": 0.665020227432251,
|
|
"fcm_dpo/delta": -0.3315753936767578,
|
|
"fcm_dpo/margin": 1.7183572053909302,
|
|
"fcm_dpo/q_t": 0.27718719840049744,
|
|
"grad_norm": 68.53643798828125,
|
|
"learning_rate": 3.9928964792569654e-07,
|
|
"logits/chosen": 1.6703214645385742,
|
|
"logits/rejected": 1.5628724098205566,
|
|
"logps/chosen": -61.40729904174805,
|
|
"logps/ref_chosen": -69.82603454589844,
|
|
"logps/ref_rejected": -92.4764175415039,
|
|
"logps/rejected": -85.77603149414062,
|
|
"loss": 0.7236,
|
|
"margin_dpo/margin_mean": 1.7183579206466675,
|
|
"margin_dpo/margin_std": 1.5396392345428467,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.6507720947265625,
|
|
"fcm_dpo/delta": -0.028134608641266823,
|
|
"fcm_dpo/margin": 1.34529709815979,
|
|
"fcm_dpo/q_t": 0.34099605679512024,
|
|
"grad_norm": 118.12181854248047,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 1.7217857837677002,
|
|
"logits/rejected": 1.6632800102233887,
|
|
"logps/chosen": -77.65235900878906,
|
|
"logps/ref_chosen": -85.68216705322266,
|
|
"logps/ref_rejected": -93.8754653930664,
|
|
"logps/rejected": -87.19094848632812,
|
|
"loss": 1.0423,
|
|
"margin_dpo/margin_mean": 1.3452973365783691,
|
|
"margin_dpo/margin_std": 2.0852465629577637,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.37188208616780044,
|
|
"fcm_dpo/beta": 0.6409244537353516,
|
|
"fcm_dpo/delta": -0.06382192671298981,
|
|
"fcm_dpo/margin": 1.4147106409072876,
|
|
"fcm_dpo/q_t": 0.3571142554283142,
|
|
"grad_norm": 108.5011978149414,
|
|
"learning_rate": 3.971601703742932e-07,
|
|
"logits/chosen": 1.762617826461792,
|
|
"logits/rejected": 1.6464555263519287,
|
|
"logps/chosen": -82.2135238647461,
|
|
"logps/ref_chosen": -90.05093383789062,
|
|
"logps/ref_rejected": -112.77645874023438,
|
|
"logps/rejected": -106.353759765625,
|
|
"loss": 1.0714,
|
|
"margin_dpo/margin_mean": 1.4147106409072876,
|
|
"margin_dpo/margin_std": 2.4063146114349365,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.37339380196523053,
|
|
"fcm_dpo/beta": 0.6898531913757324,
|
|
"fcm_dpo/delta": 0.45279592275619507,
|
|
"fcm_dpo/margin": 0.6013703346252441,
|
|
"fcm_dpo/q_t": 0.4185771942138672,
|
|
"grad_norm": 155.9046630859375,
|
|
"learning_rate": 3.960892420986177e-07,
|
|
"logits/chosen": 1.8204069137573242,
|
|
"logits/rejected": 1.7708896398544312,
|
|
"logps/chosen": -95.5366439819336,
|
|
"logps/ref_chosen": -103.23979187011719,
|
|
"logps/ref_rejected": -105.26278686523438,
|
|
"logps/rejected": -98.1610107421875,
|
|
"loss": 1.3927,
|
|
"margin_dpo/margin_mean": 0.6013697385787964,
|
|
"margin_dpo/margin_std": 2.031219244003296,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3749055177626606,
|
|
"fcm_dpo/beta": 0.7205560207366943,
|
|
"fcm_dpo/delta": 0.1306939274072647,
|
|
"fcm_dpo/margin": 1.0105092525482178,
|
|
"fcm_dpo/q_t": 0.3771660327911377,
|
|
"grad_norm": 125.46910095214844,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 1.6252491474151611,
|
|
"logits/rejected": 1.7246313095092773,
|
|
"logps/chosen": -80.30120849609375,
|
|
"logps/ref_chosen": -88.16007995605469,
|
|
"logps/ref_rejected": -75.11514282226562,
|
|
"logps/rejected": -68.26677703857422,
|
|
"loss": 1.224,
|
|
"margin_dpo/margin_mean": 1.0105094909667969,
|
|
"margin_dpo/margin_std": 2.206491470336914,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3764172335600907,
|
|
"fcm_dpo/beta": 0.7585580348968506,
|
|
"fcm_dpo/delta": 0.26705750823020935,
|
|
"fcm_dpo/margin": 0.7847131490707397,
|
|
"fcm_dpo/q_t": 0.3981843888759613,
|
|
"grad_norm": 278.2481994628906,
|
|
"learning_rate": 3.9393515632731094e-07,
|
|
"logits/chosen": 1.854946255683899,
|
|
"logits/rejected": 1.8992762565612793,
|
|
"logps/chosen": -83.19174194335938,
|
|
"logps/ref_chosen": -91.01773071289062,
|
|
"logps/ref_rejected": -80.51113891601562,
|
|
"logps/rejected": -73.46986389160156,
|
|
"loss": 1.427,
|
|
"margin_dpo/margin_mean": 0.7847132682800293,
|
|
"margin_dpo/margin_std": 2.277224063873291,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.7253923416137695,
|
|
"fcm_dpo/delta": -0.4017148017883301,
|
|
"fcm_dpo/margin": 1.6611292362213135,
|
|
"fcm_dpo/q_t": 0.2990487813949585,
|
|
"grad_norm": 98.78463745117188,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 2.0098440647125244,
|
|
"logits/rejected": 1.9583611488342285,
|
|
"logps/chosen": -72.56451416015625,
|
|
"logps/ref_chosen": -80.5888671875,
|
|
"logps/ref_rejected": -90.15093994140625,
|
|
"logps/rejected": -83.7877197265625,
|
|
"loss": 0.9505,
|
|
"margin_dpo/margin_mean": 1.6611298322677612,
|
|
"margin_dpo/margin_std": 2.270531177520752,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3794406651549509,
|
|
"fcm_dpo/beta": 0.731696605682373,
|
|
"fcm_dpo/delta": 0.27014490962028503,
|
|
"fcm_dpo/margin": 0.8148388862609863,
|
|
"fcm_dpo/q_t": 0.39993759989738464,
|
|
"grad_norm": 125.67576599121094,
|
|
"learning_rate": 3.9176496596569265e-07,
|
|
"logits/chosen": 2.2030210494995117,
|
|
"logits/rejected": 2.085310459136963,
|
|
"logps/chosen": -74.93942260742188,
|
|
"logps/ref_chosen": -82.70405578613281,
|
|
"logps/ref_rejected": -98.94266510009766,
|
|
"logps/rejected": -91.99287414550781,
|
|
"loss": 1.3519,
|
|
"margin_dpo/margin_mean": 0.8148387670516968,
|
|
"margin_dpo/margin_std": 2.184969186782837,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"fcm_dpo/beta": 0.7701340913772583,
|
|
"fcm_dpo/delta": 0.16608819365501404,
|
|
"fcm_dpo/margin": 0.8971800804138184,
|
|
"fcm_dpo/q_t": 0.37846630811691284,
|
|
"grad_norm": 123.63345336914062,
|
|
"learning_rate": 3.9067390737445254e-07,
|
|
"logits/chosen": 1.8227040767669678,
|
|
"logits/rejected": 1.7111635208129883,
|
|
"logps/chosen": -65.16363525390625,
|
|
"logps/ref_chosen": -73.10369110107422,
|
|
"logps/ref_rejected": -94.90235900878906,
|
|
"logps/rejected": -87.85948181152344,
|
|
"loss": 1.2088,
|
|
"margin_dpo/margin_mean": 0.8971810936927795,
|
|
"margin_dpo/margin_std": 1.9166100025177002,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.382464096749811,
|
|
"fcm_dpo/beta": 0.7829554080963135,
|
|
"fcm_dpo/delta": 0.22496165335178375,
|
|
"fcm_dpo/margin": 0.8155179619789124,
|
|
"fcm_dpo/q_t": 0.3876587152481079,
|
|
"grad_norm": 109.97724151611328,
|
|
"learning_rate": 3.8957891383162304e-07,
|
|
"logits/chosen": 2.1262078285217285,
|
|
"logits/rejected": 2.010744094848633,
|
|
"logps/chosen": -60.83722686767578,
|
|
"logps/ref_chosen": -68.7789535522461,
|
|
"logps/ref_rejected": -75.98162078857422,
|
|
"logps/rejected": -68.85540771484375,
|
|
"loss": 1.1644,
|
|
"margin_dpo/margin_mean": 0.8155180215835571,
|
|
"margin_dpo/margin_std": 1.587095022201538,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.3839758125472411,
|
|
"fcm_dpo/beta": 0.7989780902862549,
|
|
"fcm_dpo/delta": -0.00629083439707756,
|
|
"fcm_dpo/margin": 1.070601463317871,
|
|
"fcm_dpo/q_t": 0.35906800627708435,
|
|
"grad_norm": 130.26258850097656,
|
|
"learning_rate": 3.884800159665276e-07,
|
|
"logits/chosen": 1.5529742240905762,
|
|
"logits/rejected": 1.442957878112793,
|
|
"logps/chosen": -73.70938873291016,
|
|
"logps/ref_chosen": -81.49362182617188,
|
|
"logps/ref_rejected": -101.43672943115234,
|
|
"logps/rejected": -94.72309875488281,
|
|
"loss": 1.1745,
|
|
"margin_dpo/margin_mean": 1.0706019401550293,
|
|
"margin_dpo/margin_std": 2.0320396423339844,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.7874916791915894,
|
|
"fcm_dpo/delta": -0.00892484188079834,
|
|
"fcm_dpo/margin": 1.082383394241333,
|
|
"fcm_dpo/q_t": 0.351526141166687,
|
|
"grad_norm": 126.74822235107422,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 1.9369912147521973,
|
|
"logits/rejected": 1.8948047161102295,
|
|
"logps/chosen": -82.60552215576172,
|
|
"logps/ref_chosen": -90.46351623535156,
|
|
"logps/ref_rejected": -105.32445526123047,
|
|
"logps/rejected": -98.54884338378906,
|
|
"loss": 1.0957,
|
|
"margin_dpo/margin_mean": 1.0823832750320435,
|
|
"margin_dpo/margin_std": 1.7699964046478271,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3869992441421013,
|
|
"fcm_dpo/beta": 0.7961438894271851,
|
|
"fcm_dpo/delta": -0.011890236288309097,
|
|
"fcm_dpo/margin": 1.0808762311935425,
|
|
"fcm_dpo/q_t": 0.3597353994846344,
|
|
"grad_norm": 130.22622680664062,
|
|
"learning_rate": 3.862706303320329e-07,
|
|
"logits/chosen": 1.7444734573364258,
|
|
"logits/rejected": 1.5635294914245605,
|
|
"logps/chosen": -73.82586669921875,
|
|
"logps/ref_chosen": -81.56578063964844,
|
|
"logps/ref_rejected": -108.58460998535156,
|
|
"logps/rejected": -101.92556762695312,
|
|
"loss": 1.1265,
|
|
"margin_dpo/margin_mean": 1.0808756351470947,
|
|
"margin_dpo/margin_std": 1.953932523727417,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.3885109599395314,
|
|
"fcm_dpo/beta": 0.7655524015426636,
|
|
"fcm_dpo/delta": -0.21847796440124512,
|
|
"fcm_dpo/margin": 1.3607196807861328,
|
|
"fcm_dpo/q_t": 0.34392106533050537,
|
|
"grad_norm": 142.00601196289062,
|
|
"learning_rate": 3.851602043638994e-07,
|
|
"logits/chosen": 1.687360167503357,
|
|
"logits/rejected": 1.6134676933288574,
|
|
"logps/chosen": -81.99454498291016,
|
|
"logps/ref_chosen": -89.57557678222656,
|
|
"logps/ref_rejected": -123.74462127685547,
|
|
"logps/rejected": -117.52430725097656,
|
|
"loss": 1.1313,
|
|
"margin_dpo/margin_mean": 1.360719919204712,
|
|
"margin_dpo/margin_std": 2.331404209136963,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3900226757369615,
|
|
"fcm_dpo/beta": 0.7688815593719482,
|
|
"fcm_dpo/delta": -0.041162073612213135,
|
|
"fcm_dpo/margin": 1.1515392065048218,
|
|
"fcm_dpo/q_t": 0.3347882628440857,
|
|
"grad_norm": 122.0123062133789,
|
|
"learning_rate": 3.840459976743023e-07,
|
|
"logits/chosen": 1.8406729698181152,
|
|
"logits/rejected": 1.712424635887146,
|
|
"logps/chosen": -69.8455581665039,
|
|
"logps/ref_chosen": -77.34173583984375,
|
|
"logps/ref_rejected": -99.5709228515625,
|
|
"logps/rejected": -93.22627258300781,
|
|
"loss": 0.9454,
|
|
"margin_dpo/margin_mean": 1.1515395641326904,
|
|
"margin_dpo/margin_std": 1.4946105480194092,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3915343915343915,
|
|
"fcm_dpo/beta": 0.7144708037376404,
|
|
"fcm_dpo/delta": -0.3142710328102112,
|
|
"fcm_dpo/margin": 1.5702617168426514,
|
|
"fcm_dpo/q_t": 0.30293339490890503,
|
|
"grad_norm": 104.40306854248047,
|
|
"learning_rate": 3.8292804142999796e-07,
|
|
"logits/chosen": 1.6947746276855469,
|
|
"logits/rejected": 1.5361230373382568,
|
|
"logps/chosen": -74.45024108886719,
|
|
"logps/ref_chosen": -82.39556121826172,
|
|
"logps/ref_rejected": -113.73309326171875,
|
|
"logps/rejected": -107.3580322265625,
|
|
"loss": 0.9803,
|
|
"margin_dpo/margin_mean": 1.5702614784240723,
|
|
"margin_dpo/margin_std": 2.119558334350586,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.70084547996521,
|
|
"fcm_dpo/delta": -0.02580847591161728,
|
|
"fcm_dpo/margin": 1.2419445514678955,
|
|
"fcm_dpo/q_t": 0.3515279293060303,
|
|
"grad_norm": 115.76726531982422,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 1.5052006244659424,
|
|
"logits/rejected": 1.3275035619735718,
|
|
"logps/chosen": -58.38023376464844,
|
|
"logps/ref_chosen": -65.98947143554688,
|
|
"logps/ref_rejected": -94.59706115722656,
|
|
"logps/rejected": -88.22976684570312,
|
|
"loss": 1.0882,
|
|
"margin_dpo/margin_mean": 1.241944432258606,
|
|
"margin_dpo/margin_std": 2.076496124267578,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3945578231292517,
|
|
"fcm_dpo/beta": 0.7224990129470825,
|
|
"fcm_dpo/delta": 0.16039912402629852,
|
|
"fcm_dpo/margin": 0.9704261422157288,
|
|
"fcm_dpo/q_t": 0.3779695928096771,
|
|
"grad_norm": 124.87211608886719,
|
|
"learning_rate": 3.806810054678331e-07,
|
|
"logits/chosen": 1.6255950927734375,
|
|
"logits/rejected": 1.6686420440673828,
|
|
"logps/chosen": -81.06610870361328,
|
|
"logps/ref_chosen": -88.87684631347656,
|
|
"logps/ref_rejected": -82.34838104248047,
|
|
"logps/rejected": -75.50807189941406,
|
|
"loss": 1.1139,
|
|
"margin_dpo/margin_mean": 0.9704260230064392,
|
|
"margin_dpo/margin_std": 1.7810626029968262,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3960695389266818,
|
|
"fcm_dpo/beta": 0.7318278551101685,
|
|
"fcm_dpo/delta": -0.09685448557138443,
|
|
"fcm_dpo/margin": 1.2777411937713623,
|
|
"fcm_dpo/q_t": 0.33473628759384155,
|
|
"grad_norm": 107.7884521484375,
|
|
"learning_rate": 3.7955198860439887e-07,
|
|
"logits/chosen": 2.0021393299102783,
|
|
"logits/rejected": 1.834110975265503,
|
|
"logps/chosen": -78.01080322265625,
|
|
"logps/ref_chosen": -85.81719970703125,
|
|
"logps/ref_rejected": -105.49027252197266,
|
|
"logps/rejected": -98.96160888671875,
|
|
"loss": 0.9994,
|
|
"margin_dpo/margin_mean": 1.277741551399231,
|
|
"margin_dpo/margin_std": 1.8485413789749146,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.3975812547241119,
|
|
"fcm_dpo/beta": 0.7293317317962646,
|
|
"fcm_dpo/delta": 0.11338646709918976,
|
|
"fcm_dpo/margin": 1.0216336250305176,
|
|
"fcm_dpo/q_t": 0.37511640787124634,
|
|
"grad_norm": 124.12923431396484,
|
|
"learning_rate": 3.784193478933516e-07,
|
|
"logits/chosen": 1.688468098640442,
|
|
"logits/rejected": 1.4333298206329346,
|
|
"logps/chosen": -65.73736572265625,
|
|
"logps/ref_chosen": -73.61693572998047,
|
|
"logps/ref_rejected": -102.39161682128906,
|
|
"logps/rejected": -95.53368377685547,
|
|
"loss": 1.1517,
|
|
"margin_dpo/margin_mean": 1.0216336250305176,
|
|
"margin_dpo/margin_std": 1.9469211101531982,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.39909297052154197,
|
|
"fcm_dpo/beta": 0.7261339426040649,
|
|
"fcm_dpo/delta": -0.09379763156175613,
|
|
"fcm_dpo/margin": 1.287791132926941,
|
|
"fcm_dpo/q_t": 0.3396008610725403,
|
|
"grad_norm": 115.59385681152344,
|
|
"learning_rate": 3.7728311501708674e-07,
|
|
"logits/chosen": 1.5454028844833374,
|
|
"logits/rejected": 1.414917230606079,
|
|
"logps/chosen": -93.84513854980469,
|
|
"logps/ref_chosen": -101.57856750488281,
|
|
"logps/ref_rejected": -111.65735626220703,
|
|
"logps/rejected": -105.21172332763672,
|
|
"loss": 0.9606,
|
|
"margin_dpo/margin_mean": 1.2877914905548096,
|
|
"margin_dpo/margin_std": 1.829077959060669,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.693960428237915,
|
|
"fcm_dpo/delta": -0.3431906998157501,
|
|
"fcm_dpo/margin": 1.6635128259658813,
|
|
"fcm_dpo/q_t": 0.31836625933647156,
|
|
"grad_norm": 101.43350982666016,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 1.7897309064865112,
|
|
"logits/rejected": 1.6861083507537842,
|
|
"logps/chosen": -57.975059509277344,
|
|
"logps/ref_chosen": -65.76426696777344,
|
|
"logps/ref_rejected": -85.19627380371094,
|
|
"logps/rejected": -79.0705795288086,
|
|
"loss": 0.9912,
|
|
"margin_dpo/margin_mean": 1.6635124683380127,
|
|
"margin_dpo/margin_std": 2.3382043838500977,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.4021164021164021,
|
|
"fcm_dpo/beta": 0.6654868125915527,
|
|
"fcm_dpo/delta": -0.08284502476453781,
|
|
"fcm_dpo/margin": 1.3903380632400513,
|
|
"fcm_dpo/q_t": 0.3421247601509094,
|
|
"grad_norm": 109.36337280273438,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 1.7660210132598877,
|
|
"logits/rejected": 1.606216549873352,
|
|
"logps/chosen": -67.14865112304688,
|
|
"logps/ref_chosen": -75.05682373046875,
|
|
"logps/ref_rejected": -97.52758026123047,
|
|
"logps/rejected": -91.00975036621094,
|
|
"loss": 1.0966,
|
|
"margin_dpo/margin_mean": 1.3903379440307617,
|
|
"margin_dpo/margin_std": 2.380831718444824,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.4036281179138322,
|
|
"fcm_dpo/beta": 0.6768280267715454,
|
|
"fcm_dpo/delta": 0.007773265242576599,
|
|
"fcm_dpo/margin": 1.2320008277893066,
|
|
"fcm_dpo/q_t": 0.34474682807922363,
|
|
"grad_norm": 93.51432800292969,
|
|
"learning_rate": 3.738531817228131e-07,
|
|
"logits/chosen": 1.7341187000274658,
|
|
"logits/rejected": 1.6710472106933594,
|
|
"logps/chosen": -63.215660095214844,
|
|
"logps/ref_chosen": -71.13494110107422,
|
|
"logps/ref_rejected": -81.14566040039062,
|
|
"logps/rejected": -74.45838928222656,
|
|
"loss": 1.0138,
|
|
"margin_dpo/margin_mean": 1.2320003509521484,
|
|
"margin_dpo/margin_std": 1.68747878074646,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.4051398337112623,
|
|
"fcm_dpo/beta": 0.6612510085105896,
|
|
"fcm_dpo/delta": -0.019557401537895203,
|
|
"fcm_dpo/margin": 0.8605862855911255,
|
|
"fcm_dpo/q_t": 0.40874695777893066,
|
|
"grad_norm": 110.64611053466797,
|
|
"learning_rate": 3.7270289900589204e-07,
|
|
"logits/chosen": 1.6089922189712524,
|
|
"logits/rejected": 1.5754320621490479,
|
|
"logps/chosen": -72.44566345214844,
|
|
"logps/ref_chosen": -80.06082153320312,
|
|
"logps/ref_rejected": -87.43035888671875,
|
|
"logps/rejected": -80.67579650878906,
|
|
"loss": 1.2458,
|
|
"margin_dpo/margin_mean": 0.860586404800415,
|
|
"margin_dpo/margin_std": 2.068237066268921,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.40665154950869237,
|
|
"fcm_dpo/beta": 0.6837191581726074,
|
|
"fcm_dpo/delta": 0.07626542448997498,
|
|
"fcm_dpo/margin": 1.1217018365859985,
|
|
"fcm_dpo/q_t": 0.3589403033256531,
|
|
"grad_norm": 116.86734008789062,
|
|
"learning_rate": 3.7154918402511714e-07,
|
|
"logits/chosen": 2.1879143714904785,
|
|
"logits/rejected": 2.1236653327941895,
|
|
"logps/chosen": -75.88362121582031,
|
|
"logps/ref_chosen": -83.36944580078125,
|
|
"logps/ref_rejected": -100.66839599609375,
|
|
"logps/rejected": -94.30427551269531,
|
|
"loss": 1.0928,
|
|
"margin_dpo/margin_mean": 1.121701955795288,
|
|
"margin_dpo/margin_std": 1.7563908100128174,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.6945409774780273,
|
|
"fcm_dpo/delta": 0.22258900105953217,
|
|
"fcm_dpo/margin": 0.9228448867797852,
|
|
"fcm_dpo/q_t": 0.38596194982528687,
|
|
"grad_norm": 128.61280822753906,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 1.8595054149627686,
|
|
"logits/rejected": 1.6868481636047363,
|
|
"logps/chosen": -77.72801208496094,
|
|
"logps/ref_chosen": -85.35945129394531,
|
|
"logps/ref_rejected": -104.47489929199219,
|
|
"logps/rejected": -97.76631164550781,
|
|
"loss": 1.1717,
|
|
"margin_dpo/margin_mean": 0.9228445291519165,
|
|
"margin_dpo/margin_std": 1.8657047748565674,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.40967498110355255,
|
|
"fcm_dpo/beta": 0.7304049134254456,
|
|
"fcm_dpo/delta": 0.2556050419807434,
|
|
"fcm_dpo/margin": 0.833530068397522,
|
|
"fcm_dpo/q_t": 0.41634997725486755,
|
|
"grad_norm": 134.74444580078125,
|
|
"learning_rate": 3.692315864546635e-07,
|
|
"logits/chosen": 1.6439170837402344,
|
|
"logits/rejected": 1.5446662902832031,
|
|
"logps/chosen": -78.65917205810547,
|
|
"logps/ref_chosen": -86.01373291015625,
|
|
"logps/ref_rejected": -109.99561309814453,
|
|
"logps/rejected": -103.47459411621094,
|
|
"loss": 1.3205,
|
|
"margin_dpo/margin_mean": 0.8335303068161011,
|
|
"margin_dpo/margin_std": 2.265486717224121,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.41118669690098264,
|
|
"fcm_dpo/beta": 0.7071244716644287,
|
|
"fcm_dpo/delta": -0.19628196954727173,
|
|
"fcm_dpo/margin": 1.4435720443725586,
|
|
"fcm_dpo/q_t": 0.311567485332489,
|
|
"grad_norm": 101.71968841552734,
|
|
"learning_rate": 3.6806776869317067e-07,
|
|
"logits/chosen": 1.8980367183685303,
|
|
"logits/rejected": 1.907454252243042,
|
|
"logps/chosen": -78.45289611816406,
|
|
"logps/ref_chosen": -86.37013244628906,
|
|
"logps/ref_rejected": -85.74638366699219,
|
|
"logps/rejected": -79.27271270751953,
|
|
"loss": 0.9784,
|
|
"margin_dpo/margin_mean": 1.4435718059539795,
|
|
"margin_dpo/margin_std": 2.2550201416015625,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"fcm_dpo/beta": 0.6779999136924744,
|
|
"fcm_dpo/delta": -0.2062806934118271,
|
|
"fcm_dpo/margin": 1.5170402526855469,
|
|
"fcm_dpo/q_t": 0.31864964962005615,
|
|
"grad_norm": 97.80946350097656,
|
|
"learning_rate": 3.669006483223828e-07,
|
|
"logits/chosen": 2.017239809036255,
|
|
"logits/rejected": 1.89370858669281,
|
|
"logps/chosen": -67.67605590820312,
|
|
"logps/ref_chosen": -75.51087951660156,
|
|
"logps/ref_rejected": -101.60345458984375,
|
|
"logps/rejected": -95.28567504882812,
|
|
"loss": 0.9779,
|
|
"margin_dpo/margin_mean": 1.5170400142669678,
|
|
"margin_dpo/margin_std": 2.1456356048583984,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.41421012849584277,
|
|
"fcm_dpo/beta": 0.6887916326522827,
|
|
"fcm_dpo/delta": 0.07680375128984451,
|
|
"fcm_dpo/margin": 1.1310944557189941,
|
|
"fcm_dpo/q_t": 0.3631494641304016,
|
|
"grad_norm": 110.92794036865234,
|
|
"learning_rate": 3.657302579891656e-07,
|
|
"logits/chosen": 1.6533360481262207,
|
|
"logits/rejected": 1.6071021556854248,
|
|
"logps/chosen": -71.356201171875,
|
|
"logps/ref_chosen": -79.040283203125,
|
|
"logps/ref_rejected": -86.31329345703125,
|
|
"logps/rejected": -79.76029968261719,
|
|
"loss": 1.0728,
|
|
"margin_dpo/margin_mean": 1.1310945749282837,
|
|
"margin_dpo/margin_std": 1.8437747955322266,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.6772985458374023,
|
|
"fcm_dpo/delta": -0.1687670648097992,
|
|
"fcm_dpo/margin": 1.4796142578125,
|
|
"fcm_dpo/q_t": 0.32771944999694824,
|
|
"grad_norm": 96.30763244628906,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 1.5781314373016357,
|
|
"logits/rejected": 1.3936455249786377,
|
|
"logps/chosen": -63.84782409667969,
|
|
"logps/ref_chosen": -71.82034301757812,
|
|
"logps/ref_rejected": -94.29946899414062,
|
|
"logps/rejected": -87.80656433105469,
|
|
"loss": 0.9312,
|
|
"margin_dpo/margin_mean": 1.4796141386032104,
|
|
"margin_dpo/margin_std": 1.9710824489593506,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.41723356009070295,
|
|
"fcm_dpo/beta": 0.6629969477653503,
|
|
"fcm_dpo/delta": 0.0017523542046546936,
|
|
"fcm_dpo/margin": 1.2779521942138672,
|
|
"fcm_dpo/q_t": 0.35035306215286255,
|
|
"grad_norm": 104.47041320800781,
|
|
"learning_rate": 3.633797984793294e-07,
|
|
"logits/chosen": 1.33461594581604,
|
|
"logits/rejected": 1.284557580947876,
|
|
"logps/chosen": -61.51626205444336,
|
|
"logps/ref_chosen": -69.54020690917969,
|
|
"logps/ref_rejected": -78.59674072265625,
|
|
"logps/rejected": -71.85074615478516,
|
|
"loss": 1.0768,
|
|
"margin_dpo/margin_mean": 1.277951717376709,
|
|
"margin_dpo/margin_std": 2.062228202819824,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.41874527588813304,
|
|
"fcm_dpo/beta": 0.7123348116874695,
|
|
"fcm_dpo/delta": 0.4012848138809204,
|
|
"fcm_dpo/margin": 0.6534094214439392,
|
|
"fcm_dpo/q_t": 0.4210251569747925,
|
|
"grad_norm": 130.19923400878906,
|
|
"learning_rate": 3.6219979505011555e-07,
|
|
"logits/chosen": 1.9294466972351074,
|
|
"logits/rejected": 1.9969127178192139,
|
|
"logps/chosen": -87.2117919921875,
|
|
"logps/ref_chosen": -94.4896240234375,
|
|
"logps/ref_rejected": -85.45901489257812,
|
|
"logps/rejected": -78.8345947265625,
|
|
"loss": 1.4113,
|
|
"margin_dpo/margin_mean": 0.6534090638160706,
|
|
"margin_dpo/margin_std": 2.0752005577087402,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.42025699168556313,
|
|
"fcm_dpo/beta": 0.7564055323600769,
|
|
"fcm_dpo/delta": 0.16849283874034882,
|
|
"fcm_dpo/margin": 0.9032930135726929,
|
|
"fcm_dpo/q_t": 0.3842105269432068,
|
|
"grad_norm": 154.10960388183594,
|
|
"learning_rate": 3.6101665315144353e-07,
|
|
"logits/chosen": 1.4928083419799805,
|
|
"logits/rejected": 1.3751368522644043,
|
|
"logps/chosen": -79.97828674316406,
|
|
"logps/ref_chosen": -87.42613220214844,
|
|
"logps/ref_rejected": -105.44854736328125,
|
|
"logps/rejected": -98.90399932861328,
|
|
"loss": 1.2411,
|
|
"margin_dpo/margin_mean": 0.9032933712005615,
|
|
"margin_dpo/margin_std": 1.8733234405517578,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.4217687074829932,
|
|
"fcm_dpo/beta": 0.7383002042770386,
|
|
"fcm_dpo/delta": -0.1532890498638153,
|
|
"fcm_dpo/margin": 1.3381755352020264,
|
|
"fcm_dpo/q_t": 0.31081023812294006,
|
|
"grad_norm": 95.42361450195312,
|
|
"learning_rate": 3.5983040587833563e-07,
|
|
"logits/chosen": 1.6094672679901123,
|
|
"logits/rejected": 1.5519022941589355,
|
|
"logps/chosen": -62.492191314697266,
|
|
"logps/ref_chosen": -70.516845703125,
|
|
"logps/ref_rejected": -86.04249572753906,
|
|
"logps/rejected": -79.35601806640625,
|
|
"loss": 0.922,
|
|
"margin_dpo/margin_mean": 1.3381754159927368,
|
|
"margin_dpo/margin_std": 1.6217188835144043,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.6995072364807129,
|
|
"fcm_dpo/delta": -0.2576003074645996,
|
|
"fcm_dpo/margin": 1.5437543392181396,
|
|
"fcm_dpo/q_t": 0.2961677312850952,
|
|
"grad_norm": 82.45001983642578,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 1.6309643983840942,
|
|
"logits/rejected": 1.534651517868042,
|
|
"logps/chosen": -68.54518127441406,
|
|
"logps/ref_chosen": -76.5021743774414,
|
|
"logps/ref_rejected": -94.2752685546875,
|
|
"logps/rejected": -87.86203002929688,
|
|
"loss": 0.792,
|
|
"margin_dpo/margin_mean": 1.5437543392181396,
|
|
"margin_dpo/margin_std": 1.59554123878479,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.42479213907785335,
|
|
"fcm_dpo/beta": 0.6891137361526489,
|
|
"fcm_dpo/delta": -0.01607239432632923,
|
|
"fcm_dpo/margin": 1.2547452449798584,
|
|
"fcm_dpo/q_t": 0.34183090925216675,
|
|
"grad_norm": 86.30965423583984,
|
|
"learning_rate": 3.574487280222929e-07,
|
|
"logits/chosen": 1.7331175804138184,
|
|
"logits/rejected": 1.7544758319854736,
|
|
"logps/chosen": -69.59275817871094,
|
|
"logps/ref_chosen": -77.50468444824219,
|
|
"logps/ref_rejected": -79.05717468261719,
|
|
"logps/rejected": -72.39997863769531,
|
|
"loss": 0.9887,
|
|
"margin_dpo/margin_mean": 1.2547452449798584,
|
|
"margin_dpo/margin_std": 1.7732079029083252,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.42630385487528344,
|
|
"fcm_dpo/beta": 0.6955768465995789,
|
|
"fcm_dpo/delta": -0.03829964995384216,
|
|
"fcm_dpo/margin": 1.2650123834609985,
|
|
"fcm_dpo/q_t": 0.35389894247055054,
|
|
"grad_norm": 114.90829467773438,
|
|
"learning_rate": 3.562533640600075e-07,
|
|
"logits/chosen": 1.4182642698287964,
|
|
"logits/rejected": 1.3314048051834106,
|
|
"logps/chosen": -72.59228515625,
|
|
"logps/ref_chosen": -80.31298065185547,
|
|
"logps/ref_rejected": -83.72120666503906,
|
|
"logps/rejected": -77.26553344726562,
|
|
"loss": 1.0497,
|
|
"margin_dpo/margin_mean": 1.2650126218795776,
|
|
"margin_dpo/margin_std": 2.0284345149993896,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.42781557067271353,
|
|
"fcm_dpo/beta": 0.6725457906723022,
|
|
"fcm_dpo/delta": -0.064914271235466,
|
|
"fcm_dpo/margin": 1.3505455255508423,
|
|
"fcm_dpo/q_t": 0.35023432970046997,
|
|
"grad_norm": 110.76354217529297,
|
|
"learning_rate": 3.550550279627215e-07,
|
|
"logits/chosen": 1.724194884300232,
|
|
"logits/rejected": 1.4388937950134277,
|
|
"logps/chosen": -73.36625671386719,
|
|
"logps/ref_chosen": -80.72602844238281,
|
|
"logps/ref_rejected": -115.68379211425781,
|
|
"logps/rejected": -109.67456817626953,
|
|
"loss": 1.0482,
|
|
"margin_dpo/margin_mean": 1.3505456447601318,
|
|
"margin_dpo/margin_std": 2.190821647644043,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4293272864701436,
|
|
"fcm_dpo/beta": 0.6819826364517212,
|
|
"fcm_dpo/delta": 0.028838299214839935,
|
|
"fcm_dpo/margin": 1.2053046226501465,
|
|
"fcm_dpo/q_t": 0.3472345471382141,
|
|
"grad_norm": 118.19925689697266,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 1.6477620601654053,
|
|
"logits/rejected": 1.527233600616455,
|
|
"logps/chosen": -69.83183288574219,
|
|
"logps/ref_chosen": -77.5223388671875,
|
|
"logps/ref_rejected": -104.1847152709961,
|
|
"logps/rejected": -97.69950866699219,
|
|
"loss": 1.0878,
|
|
"margin_dpo/margin_mean": 1.2053046226501465,
|
|
"margin_dpo/margin_std": 1.9999240636825562,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.7069652080535889,
|
|
"fcm_dpo/delta": 0.25054311752319336,
|
|
"fcm_dpo/margin": 0.866946816444397,
|
|
"fcm_dpo/q_t": 0.3933573365211487,
|
|
"grad_norm": 136.90176391601562,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 1.8853328227996826,
|
|
"logits/rejected": 1.8092838525772095,
|
|
"logps/chosen": -78.52452087402344,
|
|
"logps/ref_chosen": -85.79348754882812,
|
|
"logps/ref_rejected": -96.46463775634766,
|
|
"logps/rejected": -90.06261444091797,
|
|
"loss": 1.2171,
|
|
"margin_dpo/margin_mean": 0.8669461011886597,
|
|
"margin_dpo/margin_std": 1.8605599403381348,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4323507180650038,
|
|
"fcm_dpo/beta": 0.6897353529930115,
|
|
"fcm_dpo/delta": -0.19799461960792542,
|
|
"fcm_dpo/margin": 1.48897123336792,
|
|
"fcm_dpo/q_t": 0.3047965168952942,
|
|
"grad_norm": 103.36872863769531,
|
|
"learning_rate": 3.514425224712835e-07,
|
|
"logits/chosen": 1.4609103202819824,
|
|
"logits/rejected": 1.2690503597259521,
|
|
"logps/chosen": -70.50439453125,
|
|
"logps/ref_chosen": -77.86268615722656,
|
|
"logps/ref_rejected": -110.77134704589844,
|
|
"logps/rejected": -104.90202331542969,
|
|
"loss": 0.9198,
|
|
"margin_dpo/margin_mean": 1.4889705181121826,
|
|
"margin_dpo/margin_std": 1.9027836322784424,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.43386243386243384,
|
|
"fcm_dpo/beta": 0.6805661916732788,
|
|
"fcm_dpo/delta": -0.16245746612548828,
|
|
"fcm_dpo/margin": 1.4589457511901855,
|
|
"fcm_dpo/q_t": 0.32934582233428955,
|
|
"grad_norm": 129.42222595214844,
|
|
"learning_rate": 3.502326338516534e-07,
|
|
"logits/chosen": 1.5750706195831299,
|
|
"logits/rejected": 1.5692813396453857,
|
|
"logps/chosen": -54.83763885498047,
|
|
"logps/ref_chosen": -62.552825927734375,
|
|
"logps/ref_rejected": -77.7650146484375,
|
|
"logps/rejected": -71.5087661743164,
|
|
"loss": 1.0502,
|
|
"margin_dpo/margin_mean": 1.4589459896087646,
|
|
"margin_dpo/margin_std": 2.2289929389953613,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.43537414965986393,
|
|
"fcm_dpo/beta": 0.6700143218040466,
|
|
"fcm_dpo/delta": 0.08774229884147644,
|
|
"fcm_dpo/margin": 1.147871732711792,
|
|
"fcm_dpo/q_t": 0.367706835269928,
|
|
"grad_norm": 119.22252655029297,
|
|
"learning_rate": 3.490199415097892e-07,
|
|
"logits/chosen": 1.2430505752563477,
|
|
"logits/rejected": 1.1871659755706787,
|
|
"logps/chosen": -76.31163024902344,
|
|
"logps/ref_chosen": -83.74117279052734,
|
|
"logps/ref_rejected": -106.93913269042969,
|
|
"logps/rejected": -100.65745544433594,
|
|
"loss": 1.1886,
|
|
"margin_dpo/margin_mean": 1.1478712558746338,
|
|
"margin_dpo/margin_std": 2.267000675201416,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.436885865457294,
|
|
"fcm_dpo/beta": 0.6890686750411987,
|
|
"fcm_dpo/delta": 0.14361050724983215,
|
|
"fcm_dpo/margin": 1.0400171279907227,
|
|
"fcm_dpo/q_t": 0.37594807147979736,
|
|
"grad_norm": 107.06436157226562,
|
|
"learning_rate": 3.4780447936730247e-07,
|
|
"logits/chosen": 2.0020227432250977,
|
|
"logits/rejected": 1.9443631172180176,
|
|
"logps/chosen": -65.77583312988281,
|
|
"logps/ref_chosen": -73.04204559326172,
|
|
"logps/ref_rejected": -88.07904052734375,
|
|
"logps/rejected": -81.85284423828125,
|
|
"loss": 1.0901,
|
|
"margin_dpo/margin_mean": 1.0400168895721436,
|
|
"margin_dpo/margin_std": 1.791898250579834,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.6793229579925537,
|
|
"fcm_dpo/delta": -0.10350015759468079,
|
|
"fcm_dpo/margin": 1.387347936630249,
|
|
"fcm_dpo/q_t": 0.3246391713619232,
|
|
"grad_norm": 89.62129211425781,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 1.905479073524475,
|
|
"logits/rejected": 1.8604974746704102,
|
|
"logps/chosen": -71.6802978515625,
|
|
"logps/ref_chosen": -78.60614013671875,
|
|
"logps/ref_rejected": -108.50082397460938,
|
|
"logps/rejected": -102.96233367919922,
|
|
"loss": 0.9096,
|
|
"margin_dpo/margin_mean": 1.3873467445373535,
|
|
"margin_dpo/margin_std": 1.714540958404541,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4399092970521542,
|
|
"fcm_dpo/beta": 0.6729252338409424,
|
|
"fcm_dpo/delta": -0.13280794024467468,
|
|
"fcm_dpo/margin": 1.4405841827392578,
|
|
"fcm_dpo/q_t": 0.33513006567955017,
|
|
"grad_norm": 92.02787017822266,
|
|
"learning_rate": 3.4536538175334343e-07,
|
|
"logits/chosen": 1.7266473770141602,
|
|
"logits/rejected": 1.5194730758666992,
|
|
"logps/chosen": -59.418121337890625,
|
|
"logps/ref_chosen": -66.71226501464844,
|
|
"logps/ref_rejected": -96.14029693603516,
|
|
"logps/rejected": -90.28672790527344,
|
|
"loss": 1.0444,
|
|
"margin_dpo/margin_mean": 1.4405841827392578,
|
|
"margin_dpo/margin_std": 2.1988980770111084,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4414210128495843,
|
|
"fcm_dpo/beta": 0.6711477041244507,
|
|
"fcm_dpo/delta": 0.08469577133655548,
|
|
"fcm_dpo/margin": 1.1498453617095947,
|
|
"fcm_dpo/q_t": 0.37209683656692505,
|
|
"grad_norm": 128.0401611328125,
|
|
"learning_rate": 3.4414181450867465e-07,
|
|
"logits/chosen": 1.6419053077697754,
|
|
"logits/rejected": 1.5817103385925293,
|
|
"logps/chosen": -72.85503387451172,
|
|
"logps/ref_chosen": -80.3355484008789,
|
|
"logps/ref_rejected": -90.44906616210938,
|
|
"logps/rejected": -84.1183853149414,
|
|
"loss": 1.1254,
|
|
"margin_dpo/margin_mean": 1.1498451232910156,
|
|
"margin_dpo/margin_std": 2.085261583328247,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4429327286470144,
|
|
"fcm_dpo/beta": 0.6578654050827026,
|
|
"fcm_dpo/delta": -0.17292258143424988,
|
|
"fcm_dpo/margin": 1.5288608074188232,
|
|
"fcm_dpo/q_t": 0.3235200047492981,
|
|
"grad_norm": 98.03875732421875,
|
|
"learning_rate": 3.4291561391508185e-07,
|
|
"logits/chosen": 1.8567837476730347,
|
|
"logits/rejected": 1.749990701675415,
|
|
"logps/chosen": -64.44340515136719,
|
|
"logps/ref_chosen": -71.69970703125,
|
|
"logps/ref_rejected": -102.13948059082031,
|
|
"logps/rejected": -96.41204833984375,
|
|
"loss": 1.0279,
|
|
"margin_dpo/margin_mean": 1.5288608074188232,
|
|
"margin_dpo/margin_std": 2.306509017944336,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"fcm_dpo/beta": 0.6519556045532227,
|
|
"fcm_dpo/delta": 0.06831908226013184,
|
|
"fcm_dpo/margin": 1.2061865329742432,
|
|
"fcm_dpo/q_t": 0.3630805015563965,
|
|
"grad_norm": 106.47350311279297,
|
|
"learning_rate": 3.4168681427203153e-07,
|
|
"logits/chosen": 1.8985698223114014,
|
|
"logits/rejected": 1.8086557388305664,
|
|
"logps/chosen": -63.361061096191406,
|
|
"logps/ref_chosen": -70.73458862304688,
|
|
"logps/ref_rejected": -86.68821716308594,
|
|
"logps/rejected": -80.5208740234375,
|
|
"loss": 1.0708,
|
|
"margin_dpo/margin_mean": 1.206186056137085,
|
|
"margin_dpo/margin_std": 2.0081372261047363,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.6680145263671875,
|
|
"fcm_dpo/delta": 0.033205777406692505,
|
|
"fcm_dpo/margin": 1.2245148420333862,
|
|
"fcm_dpo/q_t": 0.36217230558395386,
|
|
"grad_norm": 95.26288604736328,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 1.6314678192138672,
|
|
"logits/rejected": 1.4151748418807983,
|
|
"logps/chosen": -59.078224182128906,
|
|
"logps/ref_chosen": -66.42644500732422,
|
|
"logps/ref_rejected": -99.58766174316406,
|
|
"logps/rejected": -93.46395874023438,
|
|
"loss": 1.073,
|
|
"margin_dpo/margin_mean": 1.2245147228240967,
|
|
"margin_dpo/margin_std": 2.132988929748535,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.4474678760393046,
|
|
"fcm_dpo/beta": 0.6513910293579102,
|
|
"fcm_dpo/delta": -0.0948001965880394,
|
|
"fcm_dpo/margin": 1.4355227947235107,
|
|
"fcm_dpo/q_t": 0.34222400188446045,
|
|
"grad_norm": 112.76392364501953,
|
|
"learning_rate": 3.392215553979679e-07,
|
|
"logits/chosen": 1.5478891134262085,
|
|
"logits/rejected": 1.4268944263458252,
|
|
"logps/chosen": -80.12300872802734,
|
|
"logps/ref_chosen": -87.47459411621094,
|
|
"logps/ref_rejected": -103.96894836425781,
|
|
"logps/rejected": -98.05288696289062,
|
|
"loss": 1.0099,
|
|
"margin_dpo/margin_mean": 1.4355229139328003,
|
|
"margin_dpo/margin_std": 2.1124844551086426,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.4489795918367347,
|
|
"fcm_dpo/beta": 0.6308517456054688,
|
|
"fcm_dpo/delta": -0.2799900472164154,
|
|
"fcm_dpo/margin": 1.7435264587402344,
|
|
"fcm_dpo/q_t": 0.2898668050765991,
|
|
"grad_norm": 77.33614349365234,
|
|
"learning_rate": 3.3798516512554485e-07,
|
|
"logits/chosen": 1.6062670946121216,
|
|
"logits/rejected": 1.498281717300415,
|
|
"logps/chosen": -66.13429260253906,
|
|
"logps/ref_chosen": -73.46731567382812,
|
|
"logps/ref_rejected": -88.22674560546875,
|
|
"logps/rejected": -82.63723754882812,
|
|
"loss": 0.7987,
|
|
"margin_dpo/margin_mean": 1.7435266971588135,
|
|
"margin_dpo/margin_std": 1.7232532501220703,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.4504913076341648,
|
|
"fcm_dpo/beta": 0.6258028745651245,
|
|
"fcm_dpo/delta": 0.04764336347579956,
|
|
"fcm_dpo/margin": 1.2851078510284424,
|
|
"fcm_dpo/q_t": 0.35264813899993896,
|
|
"grad_norm": 84.5466079711914,
|
|
"learning_rate": 3.367463137189156e-07,
|
|
"logits/chosen": 2.073479175567627,
|
|
"logits/rejected": 1.9958412647247314,
|
|
"logps/chosen": -65.99359130859375,
|
|
"logps/ref_chosen": -73.21676635742188,
|
|
"logps/ref_rejected": -84.9563217163086,
|
|
"logps/rejected": -79.01826477050781,
|
|
"loss": 1.0548,
|
|
"margin_dpo/margin_mean": 1.2851074934005737,
|
|
"margin_dpo/margin_std": 2.02394437789917,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4520030234315949,
|
|
"fcm_dpo/beta": 0.6498622894287109,
|
|
"fcm_dpo/delta": 0.2780328392982483,
|
|
"fcm_dpo/margin": 0.9038135409355164,
|
|
"fcm_dpo/q_t": 0.39124494791030884,
|
|
"grad_norm": 99.46502685546875,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": 1.5133554935455322,
|
|
"logits/rejected": 1.4368096590042114,
|
|
"logps/chosen": -69.8514633178711,
|
|
"logps/ref_chosen": -76.9534912109375,
|
|
"logps/ref_rejected": -87.53433227539062,
|
|
"logps/rejected": -81.33611297607422,
|
|
"loss": 1.1256,
|
|
"margin_dpo/margin_mean": 0.9038130044937134,
|
|
"margin_dpo/margin_std": 1.717150330543518,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.6565319299697876,
|
|
"fcm_dpo/delta": 0.0751301571726799,
|
|
"fcm_dpo/margin": 1.1859982013702393,
|
|
"fcm_dpo/q_t": 0.3662998080253601,
|
|
"grad_norm": 105.666015625,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 1.7022504806518555,
|
|
"logits/rejected": 1.5639350414276123,
|
|
"logps/chosen": -71.36064147949219,
|
|
"logps/ref_chosen": -78.36398315429688,
|
|
"logps/ref_rejected": -97.03912353515625,
|
|
"logps/rejected": -91.22178649902344,
|
|
"loss": 1.1394,
|
|
"margin_dpo/margin_mean": 1.1859978437423706,
|
|
"margin_dpo/margin_std": 2.2330620288848877,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"eval_fcm_dpo/beta": 0.6672008633613586,
|
|
"eval_logits/chosen": 1.875981330871582,
|
|
"eval_logits/rejected": 1.7587411403656006,
|
|
"eval_logps/chosen": -79.80497741699219,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -90.79612731933594,
|
|
"eval_loss": 0.5554325580596924,
|
|
"eval_margin_dpo/margin_mean": 1.1965415477752686,
|
|
"eval_margin_dpo/margin_std": 2.101234197616577,
|
|
"eval_runtime": 42.2923,
|
|
"eval_samples_per_second": 54.454,
|
|
"eval_steps_per_second": 1.702,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.455026455026455,
|
|
"fcm_dpo/beta": 0.6764302253723145,
|
|
"fcm_dpo/delta": 0.14282676577568054,
|
|
"fcm_dpo/margin": 1.0604530572891235,
|
|
"fcm_dpo/q_t": 0.3812572956085205,
|
|
"grad_norm": 111.0069351196289,
|
|
"learning_rate": 3.3301533956555885e-07,
|
|
"logits/chosen": 1.8055815696716309,
|
|
"logits/rejected": 1.753129482269287,
|
|
"logps/chosen": -63.5869255065918,
|
|
"logps/ref_chosen": -70.6719741821289,
|
|
"logps/ref_rejected": -87.11650085449219,
|
|
"logps/rejected": -81.09191131591797,
|
|
"loss": 1.1743,
|
|
"margin_dpo/margin_mean": 1.060452938079834,
|
|
"margin_dpo/margin_std": 2.0663914680480957,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4565381708238851,
|
|
"fcm_dpo/beta": 0.7274559140205383,
|
|
"fcm_dpo/delta": 0.33254778385162354,
|
|
"fcm_dpo/margin": 0.7300405502319336,
|
|
"fcm_dpo/q_t": 0.40241554379463196,
|
|
"grad_norm": 123.19378662109375,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 1.402217149734497,
|
|
"logits/rejected": 1.3271420001983643,
|
|
"logps/chosen": -78.32188415527344,
|
|
"logps/ref_chosen": -85.29096221923828,
|
|
"logps/ref_rejected": -106.22589874267578,
|
|
"logps/rejected": -99.98685455322266,
|
|
"loss": 1.2018,
|
|
"margin_dpo/margin_mean": 0.7300394773483276,
|
|
"margin_dpo/margin_std": 1.5843493938446045,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.4580498866213152,
|
|
"fcm_dpo/beta": 0.6903226375579834,
|
|
"fcm_dpo/delta": -0.33917659521102905,
|
|
"fcm_dpo/margin": 1.6581306457519531,
|
|
"fcm_dpo/q_t": 0.30730390548706055,
|
|
"grad_norm": 94.3570556640625,
|
|
"learning_rate": 3.3051635489464793e-07,
|
|
"logits/chosen": 1.7029955387115479,
|
|
"logits/rejected": 1.6008670330047607,
|
|
"logps/chosen": -76.84014129638672,
|
|
"logps/ref_chosen": -83.90059661865234,
|
|
"logps/ref_rejected": -104.7340087890625,
|
|
"logps/rejected": -99.33168029785156,
|
|
"loss": 0.8927,
|
|
"margin_dpo/margin_mean": 1.6581302881240845,
|
|
"margin_dpo/margin_std": 2.1296894550323486,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.4595616024187453,
|
|
"fcm_dpo/beta": 0.6939245462417603,
|
|
"fcm_dpo/delta": 0.02791178971529007,
|
|
"fcm_dpo/margin": 1.1859949827194214,
|
|
"fcm_dpo/q_t": 0.3410758972167969,
|
|
"grad_norm": 110.60444641113281,
|
|
"learning_rate": 3.292634667444117e-07,
|
|
"logits/chosen": 1.6923638582229614,
|
|
"logits/rejected": 1.5971921682357788,
|
|
"logps/chosen": -70.20957946777344,
|
|
"logps/ref_chosen": -77.39997100830078,
|
|
"logps/ref_rejected": -94.21647644042969,
|
|
"logps/rejected": -88.21208190917969,
|
|
"loss": 0.9704,
|
|
"margin_dpo/margin_mean": 1.1859947443008423,
|
|
"margin_dpo/margin_std": 1.6047704219818115,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.6839466094970703,
|
|
"fcm_dpo/delta": 0.019364356994628906,
|
|
"fcm_dpo/margin": 1.2137377262115479,
|
|
"fcm_dpo/q_t": 0.3603229522705078,
|
|
"grad_norm": 109.2071762084961,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 1.5578687191009521,
|
|
"logits/rejected": 1.6177153587341309,
|
|
"logps/chosen": -83.97364807128906,
|
|
"logps/ref_chosen": -90.90805053710938,
|
|
"logps/ref_rejected": -85.84992980957031,
|
|
"logps/rejected": -80.1292724609375,
|
|
"loss": 1.1384,
|
|
"margin_dpo/margin_mean": 1.213738203048706,
|
|
"margin_dpo/margin_std": 2.1745810508728027,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46258503401360546,
|
|
"fcm_dpo/beta": 0.6984622478485107,
|
|
"fcm_dpo/delta": 0.1099298745393753,
|
|
"fcm_dpo/margin": 1.0709459781646729,
|
|
"fcm_dpo/q_t": 0.37685760855674744,
|
|
"grad_norm": 103.61567687988281,
|
|
"learning_rate": 3.267510740432719e-07,
|
|
"logits/chosen": 1.6553785800933838,
|
|
"logits/rejected": 1.441781997680664,
|
|
"logps/chosen": -64.66431427001953,
|
|
"logps/ref_chosen": -71.7261962890625,
|
|
"logps/ref_rejected": -97.70491027832031,
|
|
"logps/rejected": -91.71397399902344,
|
|
"loss": 1.103,
|
|
"margin_dpo/margin_mean": 1.0709459781646729,
|
|
"margin_dpo/margin_std": 2.0044429302215576,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.46409674981103555,
|
|
"fcm_dpo/beta": 0.725088357925415,
|
|
"fcm_dpo/delta": 0.142677903175354,
|
|
"fcm_dpo/margin": 0.9886835813522339,
|
|
"fcm_dpo/q_t": 0.383963018655777,
|
|
"grad_norm": 117.87000274658203,
|
|
"learning_rate": 3.2549163976939285e-07,
|
|
"logits/chosen": 1.6312376260757446,
|
|
"logits/rejected": 1.5077619552612305,
|
|
"logps/chosen": -67.0914306640625,
|
|
"logps/ref_chosen": -74.38668823242188,
|
|
"logps/ref_rejected": -84.16001892089844,
|
|
"logps/rejected": -77.85345458984375,
|
|
"loss": 1.2788,
|
|
"margin_dpo/margin_mean": 0.9886834621429443,
|
|
"margin_dpo/margin_std": 2.2475128173828125,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4656084656084656,
|
|
"fcm_dpo/beta": 0.7155085802078247,
|
|
"fcm_dpo/delta": -0.11654899269342422,
|
|
"fcm_dpo/margin": 1.3352723121643066,
|
|
"fcm_dpo/q_t": 0.3403710424900055,
|
|
"grad_norm": 109.76605987548828,
|
|
"learning_rate": 3.2423009383206874e-07,
|
|
"logits/chosen": 1.6301313638687134,
|
|
"logits/rejected": 1.6842124462127686,
|
|
"logps/chosen": -80.45036315917969,
|
|
"logps/ref_chosen": -87.50894165039062,
|
|
"logps/ref_rejected": -94.80848693847656,
|
|
"logps/rejected": -89.08517456054688,
|
|
"loss": 1.0804,
|
|
"margin_dpo/margin_mean": 1.3352723121643066,
|
|
"margin_dpo/margin_std": 2.199124813079834,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4671201814058957,
|
|
"fcm_dpo/beta": 0.6909149885177612,
|
|
"fcm_dpo/delta": -0.06711931526660919,
|
|
"fcm_dpo/margin": 1.3067526817321777,
|
|
"fcm_dpo/q_t": 0.3539488911628723,
|
|
"grad_norm": 115.49034881591797,
|
|
"learning_rate": 3.229664715194511e-07,
|
|
"logits/chosen": 2.0674304962158203,
|
|
"logits/rejected": 1.9489227533340454,
|
|
"logps/chosen": -75.26066589355469,
|
|
"logps/ref_chosen": -82.15191650390625,
|
|
"logps/ref_rejected": -95.03496551513672,
|
|
"logps/rejected": -89.45046997070312,
|
|
"loss": 1.0461,
|
|
"margin_dpo/margin_mean": 1.3067526817321777,
|
|
"margin_dpo/margin_std": 2.162260055541992,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.7321085929870605,
|
|
"fcm_dpo/delta": 0.2117757946252823,
|
|
"fcm_dpo/margin": 0.48994821310043335,
|
|
"fcm_dpo/q_t": 0.4401339292526245,
|
|
"grad_norm": 151.37319946289062,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 1.7694876194000244,
|
|
"logits/rejected": 1.7599756717681885,
|
|
"logps/chosen": -87.14959716796875,
|
|
"logps/ref_chosen": -93.7555160522461,
|
|
"logps/ref_rejected": -96.93236541748047,
|
|
"logps/rejected": -90.81639099121094,
|
|
"loss": 1.5129,
|
|
"margin_dpo/margin_mean": 0.4899486005306244,
|
|
"margin_dpo/margin_std": 2.0309510231018066,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47014361300075586,
|
|
"fcm_dpo/beta": 0.7175389528274536,
|
|
"fcm_dpo/delta": -0.07844534516334534,
|
|
"fcm_dpo/margin": 1.2810978889465332,
|
|
"fcm_dpo/q_t": 0.35001736879348755,
|
|
"grad_norm": 114.28585052490234,
|
|
"learning_rate": 3.204331392103574e-07,
|
|
"logits/chosen": 1.8003596067428589,
|
|
"logits/rejected": 1.5091215372085571,
|
|
"logps/chosen": -68.82054138183594,
|
|
"logps/ref_chosen": -76.20762634277344,
|
|
"logps/ref_rejected": -110.48141479492188,
|
|
"logps/rejected": -104.37541961669922,
|
|
"loss": 1.0974,
|
|
"margin_dpo/margin_mean": 1.281097650527954,
|
|
"margin_dpo/margin_std": 2.2102816104888916,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.47165532879818595,
|
|
"fcm_dpo/beta": 0.6969722509384155,
|
|
"fcm_dpo/delta": -0.2547772526741028,
|
|
"fcm_dpo/margin": 1.5465623140335083,
|
|
"fcm_dpo/q_t": 0.32185274362564087,
|
|
"grad_norm": 94.9644546508789,
|
|
"learning_rate": 3.1916350007663176e-07,
|
|
"logits/chosen": 1.5592865943908691,
|
|
"logits/rejected": 1.4448232650756836,
|
|
"logps/chosen": -62.12285614013672,
|
|
"logps/ref_chosen": -69.08878326416016,
|
|
"logps/ref_rejected": -91.84494018554688,
|
|
"logps/rejected": -86.42558288574219,
|
|
"loss": 0.9426,
|
|
"margin_dpo/margin_mean": 1.546562910079956,
|
|
"margin_dpo/margin_std": 2.1454343795776367,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.47316704459561604,
|
|
"fcm_dpo/beta": 0.7049161195755005,
|
|
"fcm_dpo/delta": 0.18395394086837769,
|
|
"fcm_dpo/margin": 0.9621329307556152,
|
|
"fcm_dpo/q_t": 0.3967057466506958,
|
|
"grad_norm": 119.09286499023438,
|
|
"learning_rate": 3.178919262911314e-07,
|
|
"logits/chosen": 1.6497703790664673,
|
|
"logits/rejected": 1.6208115816116333,
|
|
"logps/chosen": -70.82321166992188,
|
|
"logps/ref_chosen": -78.20826721191406,
|
|
"logps/ref_rejected": -86.90351867675781,
|
|
"logps/rejected": -80.48060607910156,
|
|
"loss": 1.2842,
|
|
"margin_dpo/margin_mean": 0.9621328711509705,
|
|
"margin_dpo/margin_std": 2.283693313598633,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.47467876039304613,
|
|
"fcm_dpo/beta": 0.7063366174697876,
|
|
"fcm_dpo/delta": 0.015392206609249115,
|
|
"fcm_dpo/margin": 1.1817160844802856,
|
|
"fcm_dpo/q_t": 0.3654022812843323,
|
|
"grad_norm": 121.3820571899414,
|
|
"learning_rate": 3.166184534225087e-07,
|
|
"logits/chosen": 1.6584588289260864,
|
|
"logits/rejected": 1.6634647846221924,
|
|
"logps/chosen": -83.33063507080078,
|
|
"logps/ref_chosen": -90.41890716552734,
|
|
"logps/ref_rejected": -84.33525848388672,
|
|
"logps/rejected": -78.4287109375,
|
|
"loss": 1.1208,
|
|
"margin_dpo/margin_mean": 1.181715965270996,
|
|
"margin_dpo/margin_std": 2.112767219543457,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.7153505682945251,
|
|
"fcm_dpo/delta": -0.07875702530145645,
|
|
"fcm_dpo/margin": 1.2836734056472778,
|
|
"fcm_dpo/q_t": 0.33902478218078613,
|
|
"grad_norm": 103.74569702148438,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 1.672802448272705,
|
|
"logits/rejected": 1.6053755283355713,
|
|
"logps/chosen": -80.0546646118164,
|
|
"logps/ref_chosen": -87.32842254638672,
|
|
"logps/ref_rejected": -93.71661376953125,
|
|
"logps/rejected": -87.72652435302734,
|
|
"loss": 1.001,
|
|
"margin_dpo/margin_mean": 1.2836732864379883,
|
|
"margin_dpo/margin_std": 1.82138192653656,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.47770219198790626,
|
|
"fcm_dpo/beta": 0.7006301879882812,
|
|
"fcm_dpo/delta": -0.14333555102348328,
|
|
"fcm_dpo/margin": 1.3912466764450073,
|
|
"fcm_dpo/q_t": 0.32626068592071533,
|
|
"grad_norm": 95.67511749267578,
|
|
"learning_rate": 3.1406595297511564e-07,
|
|
"logits/chosen": 1.7230030298233032,
|
|
"logits/rejected": 1.4919373989105225,
|
|
"logps/chosen": -66.60594177246094,
|
|
"logps/ref_chosen": -73.898681640625,
|
|
"logps/ref_rejected": -115.42668151855469,
|
|
"logps/rejected": -109.52519226074219,
|
|
"loss": 0.9455,
|
|
"margin_dpo/margin_mean": 1.391247034072876,
|
|
"margin_dpo/margin_std": 1.7532538175582886,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.47921390778533635,
|
|
"fcm_dpo/beta": 0.6516163349151611,
|
|
"fcm_dpo/delta": -0.26026636362075806,
|
|
"fcm_dpo/margin": 1.6568280458450317,
|
|
"fcm_dpo/q_t": 0.3177596926689148,
|
|
"grad_norm": 97.86814880371094,
|
|
"learning_rate": 3.1278699679526975e-07,
|
|
"logits/chosen": 1.7669193744659424,
|
|
"logits/rejected": 1.682518720626831,
|
|
"logps/chosen": -68.1146240234375,
|
|
"logps/ref_chosen": -75.42947387695312,
|
|
"logps/ref_rejected": -90.60166931152344,
|
|
"logps/rejected": -84.94364929199219,
|
|
"loss": 0.9728,
|
|
"margin_dpo/margin_mean": 1.6568281650543213,
|
|
"margin_dpo/margin_std": 2.361431837081909,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.48072562358276644,
|
|
"fcm_dpo/beta": 0.6571391820907593,
|
|
"fcm_dpo/delta": 0.16736406087875366,
|
|
"fcm_dpo/margin": 1.055985689163208,
|
|
"fcm_dpo/q_t": 0.37599390745162964,
|
|
"grad_norm": 92.90299224853516,
|
|
"learning_rate": 3.1150628432815336e-07,
|
|
"logits/chosen": 1.9868645668029785,
|
|
"logits/rejected": 1.8562074899673462,
|
|
"logps/chosen": -63.284942626953125,
|
|
"logps/ref_chosen": -70.38318634033203,
|
|
"logps/ref_rejected": -98.19901275634766,
|
|
"logps/rejected": -92.15675354003906,
|
|
"loss": 1.2687,
|
|
"margin_dpo/margin_mean": 1.055985450744629,
|
|
"margin_dpo/margin_std": 2.3484559059143066,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.48223733938019653,
|
|
"fcm_dpo/beta": 0.6785935163497925,
|
|
"fcm_dpo/delta": 0.06372390687465668,
|
|
"fcm_dpo/margin": 1.1651678085327148,
|
|
"fcm_dpo/q_t": 0.3581188917160034,
|
|
"grad_norm": 106.19795227050781,
|
|
"learning_rate": 3.1022385139804707e-07,
|
|
"logits/chosen": 1.6127734184265137,
|
|
"logits/rejected": 1.5631192922592163,
|
|
"logps/chosen": -76.34298706054688,
|
|
"logps/ref_chosen": -83.40225982666016,
|
|
"logps/ref_rejected": -95.40069580078125,
|
|
"logps/rejected": -89.50658416748047,
|
|
"loss": 1.1087,
|
|
"margin_dpo/margin_mean": 1.1651681661605835,
|
|
"margin_dpo/margin_std": 2.0549566745758057,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.7149513363838196,
|
|
"fcm_dpo/delta": 0.28710103034973145,
|
|
"fcm_dpo/margin": 0.8032557964324951,
|
|
"fcm_dpo/q_t": 0.42163750529289246,
|
|
"grad_norm": 119.99693298339844,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 1.4589346647262573,
|
|
"logits/rejected": 1.3581949472427368,
|
|
"logps/chosen": -61.47698211669922,
|
|
"logps/ref_chosen": -68.70979309082031,
|
|
"logps/ref_rejected": -87.00540924072266,
|
|
"logps/rejected": -80.57585144042969,
|
|
"loss": 1.3991,
|
|
"margin_dpo/margin_mean": 0.8032557964324951,
|
|
"margin_dpo/margin_std": 2.3650741577148438,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4852607709750567,
|
|
"fcm_dpo/beta": 0.7074248194694519,
|
|
"fcm_dpo/delta": -0.008499190211296082,
|
|
"fcm_dpo/margin": 1.2074357271194458,
|
|
"fcm_dpo/q_t": 0.34586799144744873,
|
|
"grad_norm": 95.66856384277344,
|
|
"learning_rate": 3.0765396768561004e-07,
|
|
"logits/chosen": 1.56925630569458,
|
|
"logits/rejected": 1.516863226890564,
|
|
"logps/chosen": -59.29505157470703,
|
|
"logps/ref_chosen": -66.48135375976562,
|
|
"logps/ref_rejected": -71.84545135498047,
|
|
"logps/rejected": -65.8665771484375,
|
|
"loss": 1.0626,
|
|
"margin_dpo/margin_mean": 1.2074360847473145,
|
|
"margin_dpo/margin_std": 1.8859655857086182,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.48677248677248675,
|
|
"fcm_dpo/beta": 0.7035623788833618,
|
|
"fcm_dpo/delta": -0.19782285392284393,
|
|
"fcm_dpo/margin": 1.4601200819015503,
|
|
"fcm_dpo/q_t": 0.3138246536254883,
|
|
"grad_norm": 95.54669189453125,
|
|
"learning_rate": 3.063665887884511e-07,
|
|
"logits/chosen": 1.760352373123169,
|
|
"logits/rejected": 1.6214886903762817,
|
|
"logps/chosen": -58.70317077636719,
|
|
"logps/ref_chosen": -65.94654846191406,
|
|
"logps/ref_rejected": -94.26603698730469,
|
|
"logps/rejected": -88.4827880859375,
|
|
"loss": 0.9047,
|
|
"margin_dpo/margin_mean": 1.4601197242736816,
|
|
"margin_dpo/margin_std": 1.8270645141601562,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.48828420256991684,
|
|
"fcm_dpo/beta": 0.6910836696624756,
|
|
"fcm_dpo/delta": -0.08377201855182648,
|
|
"fcm_dpo/margin": 1.335155963897705,
|
|
"fcm_dpo/q_t": 0.352658212184906,
|
|
"grad_norm": 112.16151428222656,
|
|
"learning_rate": 3.0507763319663517e-07,
|
|
"logits/chosen": 1.7015190124511719,
|
|
"logits/rejected": 1.6088063716888428,
|
|
"logps/chosen": -79.46102142333984,
|
|
"logps/ref_chosen": -86.5498046875,
|
|
"logps/ref_rejected": -110.39498901367188,
|
|
"logps/rejected": -104.641357421875,
|
|
"loss": 1.0917,
|
|
"margin_dpo/margin_mean": 1.3351564407348633,
|
|
"margin_dpo/margin_std": 2.2610673904418945,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.4897959183673469,
|
|
"fcm_dpo/beta": 0.6660194396972656,
|
|
"fcm_dpo/delta": -0.08605515211820602,
|
|
"fcm_dpo/margin": 1.39243745803833,
|
|
"fcm_dpo/q_t": 0.32699155807495117,
|
|
"grad_norm": 88.87966918945312,
|
|
"learning_rate": 3.0378713696502097e-07,
|
|
"logits/chosen": 1.7513608932495117,
|
|
"logits/rejected": 1.61769700050354,
|
|
"logps/chosen": -66.96436309814453,
|
|
"logps/ref_chosen": -74.44218444824219,
|
|
"logps/ref_rejected": -85.7646484375,
|
|
"logps/rejected": -79.67926025390625,
|
|
"loss": 0.9591,
|
|
"margin_dpo/margin_mean": 1.3924373388290405,
|
|
"margin_dpo/margin_std": 1.9160056114196777,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.6700199842453003,
|
|
"fcm_dpo/delta": 0.12378650903701782,
|
|
"fcm_dpo/margin": 1.0951839685440063,
|
|
"fcm_dpo/q_t": 0.3673388957977295,
|
|
"grad_norm": 106.75263214111328,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 1.737715482711792,
|
|
"logits/rejected": 1.617845058441162,
|
|
"logps/chosen": -74.48870849609375,
|
|
"logps/ref_chosen": -81.43812561035156,
|
|
"logps/ref_rejected": -97.04302978515625,
|
|
"logps/rejected": -91.18878936767578,
|
|
"loss": 1.0978,
|
|
"margin_dpo/margin_mean": 1.0951833724975586,
|
|
"margin_dpo/margin_std": 1.8893883228302002,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4928193499622071,
|
|
"fcm_dpo/beta": 0.7143986225128174,
|
|
"fcm_dpo/delta": 0.3045271635055542,
|
|
"fcm_dpo/margin": 0.7874218225479126,
|
|
"fcm_dpo/q_t": 0.3986685872077942,
|
|
"grad_norm": 119.16207885742188,
|
|
"learning_rate": 3.012016670162977e-07,
|
|
"logits/chosen": 1.532388687133789,
|
|
"logits/rejected": 1.5331604480743408,
|
|
"logps/chosen": -85.00900268554688,
|
|
"logps/ref_chosen": -91.65318298339844,
|
|
"logps/ref_rejected": -90.64222717285156,
|
|
"logps/rejected": -84.78547668457031,
|
|
"loss": 1.2558,
|
|
"margin_dpo/margin_mean": 0.787421464920044,
|
|
"margin_dpo/margin_std": 1.8421351909637451,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4943310657596372,
|
|
"fcm_dpo/beta": 0.7667665481567383,
|
|
"fcm_dpo/delta": 0.30395039916038513,
|
|
"fcm_dpo/margin": 0.7277549505233765,
|
|
"fcm_dpo/q_t": 0.4066656231880188,
|
|
"grad_norm": 140.64083862304688,
|
|
"learning_rate": 2.99906765620341e-07,
|
|
"logits/chosen": 1.3383753299713135,
|
|
"logits/rejected": 1.2867255210876465,
|
|
"logps/chosen": -83.17657470703125,
|
|
"logps/ref_chosen": -89.97216796875,
|
|
"logps/ref_rejected": -97.54869079589844,
|
|
"logps/rejected": -91.48085021972656,
|
|
"loss": 1.3205,
|
|
"margin_dpo/margin_mean": 0.7277547717094421,
|
|
"margin_dpo/margin_std": 1.9155395030975342,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4958427815570673,
|
|
"fcm_dpo/beta": 0.7802586555480957,
|
|
"fcm_dpo/delta": 0.15494143962860107,
|
|
"fcm_dpo/margin": 0.9038246870040894,
|
|
"fcm_dpo/q_t": 0.3814663887023926,
|
|
"grad_norm": 132.4653778076172,
|
|
"learning_rate": 2.9861046822486766e-07,
|
|
"logits/chosen": 1.6037625074386597,
|
|
"logits/rejected": 1.5314295291900635,
|
|
"logps/chosen": -73.24417877197266,
|
|
"logps/ref_chosen": -80.27335357666016,
|
|
"logps/ref_rejected": -99.04093170166016,
|
|
"logps/rejected": -92.91558837890625,
|
|
"loss": 1.2057,
|
|
"margin_dpo/margin_mean": 0.9038242697715759,
|
|
"margin_dpo/margin_std": 1.8299709558486938,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4973544973544973,
|
|
"fcm_dpo/beta": 0.7895931005477905,
|
|
"fcm_dpo/delta": 0.01884014904499054,
|
|
"fcm_dpo/margin": 1.0524828433990479,
|
|
"fcm_dpo/q_t": 0.38687556982040405,
|
|
"grad_norm": 145.9662322998047,
|
|
"learning_rate": 2.9731281109010253e-07,
|
|
"logits/chosen": 1.860405683517456,
|
|
"logits/rejected": 1.7466496229171753,
|
|
"logps/chosen": -72.85738372802734,
|
|
"logps/ref_chosen": -79.75892639160156,
|
|
"logps/ref_rejected": -102.06265258789062,
|
|
"logps/rejected": -96.21360778808594,
|
|
"loss": 1.2091,
|
|
"margin_dpo/margin_mean": 1.0524829626083374,
|
|
"margin_dpo/margin_std": 2.168503522872925,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.7690603137016296,
|
|
"fcm_dpo/delta": -0.29933562874794006,
|
|
"fcm_dpo/margin": 1.4522162675857544,
|
|
"fcm_dpo/q_t": 0.3026127517223358,
|
|
"grad_norm": 94.12672424316406,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 1.6887285709381104,
|
|
"logits/rejected": 1.5656009912490845,
|
|
"logps/chosen": -63.10783767700195,
|
|
"logps/ref_chosen": -70.55734252929688,
|
|
"logps/ref_rejected": -94.53077697753906,
|
|
"logps/rejected": -88.53349304199219,
|
|
"loss": 0.9435,
|
|
"margin_dpo/margin_mean": 1.4522165060043335,
|
|
"margin_dpo/margin_std": 1.8758301734924316,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5003779289493575,
|
|
"fcm_dpo/beta": 0.722969651222229,
|
|
"fcm_dpo/delta": -0.2056279480457306,
|
|
"fcm_dpo/margin": 1.4265342950820923,
|
|
"fcm_dpo/q_t": 0.326630175113678,
|
|
"grad_norm": 110.82780456542969,
|
|
"learning_rate": 2.947135628327544e-07,
|
|
"logits/chosen": 1.7697137594223022,
|
|
"logits/rejected": 1.7361394166946411,
|
|
"logps/chosen": -68.56468963623047,
|
|
"logps/ref_chosen": -75.46063232421875,
|
|
"logps/ref_rejected": -84.78495788574219,
|
|
"logps/rejected": -79.31553649902344,
|
|
"loss": 1.0925,
|
|
"margin_dpo/margin_mean": 1.4265344142913818,
|
|
"margin_dpo/margin_std": 2.3631396293640137,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.5018896447467877,
|
|
"fcm_dpo/beta": 0.7259687185287476,
|
|
"fcm_dpo/delta": -0.03762027621269226,
|
|
"fcm_dpo/margin": 1.2150681018829346,
|
|
"fcm_dpo/q_t": 0.3482212424278259,
|
|
"grad_norm": 115.17147064208984,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": 1.4272816181182861,
|
|
"logits/rejected": 1.3553186655044556,
|
|
"logps/chosen": -77.3675537109375,
|
|
"logps/ref_chosen": -84.32807922363281,
|
|
"logps/ref_rejected": -95.63302612304688,
|
|
"logps/rejected": -89.88756561279297,
|
|
"loss": 1.0542,
|
|
"margin_dpo/margin_mean": 1.2150681018829346,
|
|
"margin_dpo/margin_std": 1.9501895904541016,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.5034013605442177,
|
|
"fcm_dpo/beta": 0.674473226070404,
|
|
"fcm_dpo/delta": -0.3686579167842865,
|
|
"fcm_dpo/margin": 1.738995909690857,
|
|
"fcm_dpo/q_t": 0.2906306982040405,
|
|
"grad_norm": 80.1180648803711,
|
|
"learning_rate": 2.921093116725076e-07,
|
|
"logits/chosen": 1.7883816957473755,
|
|
"logits/rejected": 1.6795051097869873,
|
|
"logps/chosen": -71.42243194580078,
|
|
"logps/ref_chosen": -78.2132339477539,
|
|
"logps/ref_rejected": -103.82716369628906,
|
|
"logps/rejected": -98.77536010742188,
|
|
"loss": 0.7898,
|
|
"margin_dpo/margin_mean": 1.7389962673187256,
|
|
"margin_dpo/margin_std": 1.895709753036499,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.5049130763416477,
|
|
"fcm_dpo/beta": 0.6675664186477661,
|
|
"fcm_dpo/delta": 0.06639145314693451,
|
|
"fcm_dpo/margin": 1.1811097860336304,
|
|
"fcm_dpo/q_t": 0.3664048910140991,
|
|
"grad_norm": 105.71363830566406,
|
|
"learning_rate": 2.9080540104031484e-07,
|
|
"logits/chosen": 1.6449368000030518,
|
|
"logits/rejected": 1.5412189960479736,
|
|
"logps/chosen": -78.09677124023438,
|
|
"logps/ref_chosen": -85.0171127319336,
|
|
"logps/ref_rejected": -106.79039764404297,
|
|
"logps/rejected": -101.05116271972656,
|
|
"loss": 1.1462,
|
|
"margin_dpo/margin_mean": 1.1811105012893677,
|
|
"margin_dpo/margin_std": 2.216235876083374,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.6799655556678772,
|
|
"fcm_dpo/delta": 0.005240932106971741,
|
|
"fcm_dpo/margin": 1.2411878108978271,
|
|
"fcm_dpo/q_t": 0.3700757622718811,
|
|
"grad_norm": 103.19578552246094,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 1.7492305040359497,
|
|
"logits/rejected": 1.6704251766204834,
|
|
"logps/chosen": -71.7774429321289,
|
|
"logps/ref_chosen": -78.56513214111328,
|
|
"logps/ref_rejected": -92.68515014648438,
|
|
"logps/rejected": -87.13864135742188,
|
|
"loss": 1.166,
|
|
"margin_dpo/margin_mean": 1.241187572479248,
|
|
"margin_dpo/margin_std": 2.3274662494659424,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"fcm_dpo/beta": 0.6593726873397827,
|
|
"fcm_dpo/delta": -0.09310643374919891,
|
|
"fcm_dpo/margin": 1.4134182929992676,
|
|
"fcm_dpo/q_t": 0.3384147882461548,
|
|
"grad_norm": 100.4131088256836,
|
|
"learning_rate": 2.8819419203668675e-07,
|
|
"logits/chosen": 1.7871836423873901,
|
|
"logits/rejected": 1.7552770376205444,
|
|
"logps/chosen": -82.16726684570312,
|
|
"logps/ref_chosen": -88.63243103027344,
|
|
"logps/ref_rejected": -107.89385986328125,
|
|
"logps/rejected": -102.84210968017578,
|
|
"loss": 0.9968,
|
|
"margin_dpo/margin_mean": 1.4134178161621094,
|
|
"margin_dpo/margin_std": 2.0912837982177734,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.509448223733938,
|
|
"fcm_dpo/beta": 0.6767586469650269,
|
|
"fcm_dpo/delta": 0.2001107931137085,
|
|
"fcm_dpo/margin": 0.9802356958389282,
|
|
"fcm_dpo/q_t": 0.3863935172557831,
|
|
"grad_norm": 113.31838989257812,
|
|
"learning_rate": 2.8688696670638053e-07,
|
|
"logits/chosen": 1.8030924797058105,
|
|
"logits/rejected": 1.699881911277771,
|
|
"logps/chosen": -86.61439514160156,
|
|
"logps/ref_chosen": -93.25018310546875,
|
|
"logps/ref_rejected": -103.8592529296875,
|
|
"logps/rejected": -98.20370483398438,
|
|
"loss": 1.2108,
|
|
"margin_dpo/margin_mean": 0.98023521900177,
|
|
"margin_dpo/margin_std": 2.0510101318359375,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.5109599395313681,
|
|
"fcm_dpo/beta": 0.7058612108230591,
|
|
"fcm_dpo/delta": 0.07689429819583893,
|
|
"fcm_dpo/margin": 1.0984101295471191,
|
|
"fcm_dpo/q_t": 0.3622785210609436,
|
|
"grad_norm": 104.60888671875,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 1.6752837896347046,
|
|
"logits/rejected": 1.6237120628356934,
|
|
"logps/chosen": -75.27117156982422,
|
|
"logps/ref_chosen": -81.79462432861328,
|
|
"logps/ref_rejected": -90.98942565917969,
|
|
"logps/rejected": -85.56438446044922,
|
|
"loss": 1.041,
|
|
"margin_dpo/margin_mean": 1.0984106063842773,
|
|
"margin_dpo/margin_std": 1.7123408317565918,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.5124716553287982,
|
|
"fcm_dpo/beta": 0.6881164312362671,
|
|
"fcm_dpo/delta": -0.08667253702878952,
|
|
"fcm_dpo/margin": 1.348482370376587,
|
|
"fcm_dpo/q_t": 0.322110652923584,
|
|
"grad_norm": 89.46073913574219,
|
|
"learning_rate": 2.842694572172736e-07,
|
|
"logits/chosen": 1.827816128730774,
|
|
"logits/rejected": 1.6548161506652832,
|
|
"logps/chosen": -54.872840881347656,
|
|
"logps/ref_chosen": -61.80355453491211,
|
|
"logps/ref_rejected": -85.16979217529297,
|
|
"logps/rejected": -79.58755493164062,
|
|
"loss": 0.9793,
|
|
"margin_dpo/margin_mean": 1.3484827280044556,
|
|
"margin_dpo/margin_std": 1.903770923614502,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.703204870223999,
|
|
"fcm_dpo/delta": 0.03393559157848358,
|
|
"fcm_dpo/margin": 1.1560697555541992,
|
|
"fcm_dpo/q_t": 0.3668256103992462,
|
|
"grad_norm": 101.28387451171875,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 1.4966051578521729,
|
|
"logits/rejected": 1.4827940464019775,
|
|
"logps/chosen": -66.0333251953125,
|
|
"logps/ref_chosen": -72.486083984375,
|
|
"logps/ref_rejected": -79.86129760742188,
|
|
"logps/rejected": -74.56460571289062,
|
|
"loss": 1.1705,
|
|
"margin_dpo/margin_mean": 1.1560699939727783,
|
|
"margin_dpo/margin_std": 2.1286869049072266,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5154950869236583,
|
|
"fcm_dpo/beta": 0.677609920501709,
|
|
"fcm_dpo/delta": -0.01477833092212677,
|
|
"fcm_dpo/margin": 1.2626094818115234,
|
|
"fcm_dpo/q_t": 0.3455454111099243,
|
|
"grad_norm": 104.65491485595703,
|
|
"learning_rate": 2.816481133934373e-07,
|
|
"logits/chosen": 1.6853771209716797,
|
|
"logits/rejected": 1.5724800825119019,
|
|
"logps/chosen": -70.659912109375,
|
|
"logps/ref_chosen": -77.36830139160156,
|
|
"logps/ref_rejected": -94.64933013916016,
|
|
"logps/rejected": -89.20354461669922,
|
|
"loss": 1.0474,
|
|
"margin_dpo/margin_mean": 1.2626094818115234,
|
|
"margin_dpo/margin_std": 1.8441420793533325,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5170068027210885,
|
|
"fcm_dpo/beta": 0.6741093397140503,
|
|
"fcm_dpo/delta": -0.15685820579528809,
|
|
"fcm_dpo/margin": 1.4702677726745605,
|
|
"fcm_dpo/q_t": 0.3300013840198517,
|
|
"grad_norm": 86.32221984863281,
|
|
"learning_rate": 2.8033609524527046e-07,
|
|
"logits/chosen": 1.7303967475891113,
|
|
"logits/rejected": 1.6494003534317017,
|
|
"logps/chosen": -64.28861999511719,
|
|
"logps/ref_chosen": -71.00831604003906,
|
|
"logps/ref_rejected": -84.22953796386719,
|
|
"logps/rejected": -78.98011779785156,
|
|
"loss": 0.9393,
|
|
"margin_dpo/margin_mean": 1.4702682495117188,
|
|
"margin_dpo/margin_std": 1.9783341884613037,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5185185185185185,
|
|
"fcm_dpo/beta": 0.6742951273918152,
|
|
"fcm_dpo/delta": 0.03692948818206787,
|
|
"fcm_dpo/margin": 0.7514773607254028,
|
|
"fcm_dpo/q_t": 0.4021931290626526,
|
|
"grad_norm": 112.97252655029297,
|
|
"learning_rate": 2.7902322853130753e-07,
|
|
"logits/chosen": 1.315962553024292,
|
|
"logits/rejected": 1.2896361351013184,
|
|
"logps/chosen": -84.8480224609375,
|
|
"logps/ref_chosen": -91.44624328613281,
|
|
"logps/ref_rejected": -99.06044006347656,
|
|
"logps/rejected": -93.21369934082031,
|
|
"loss": 1.2966,
|
|
"margin_dpo/margin_mean": 0.7514776587486267,
|
|
"margin_dpo/margin_std": 1.8802409172058105,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5200302343159486,
|
|
"fcm_dpo/beta": 0.6519577503204346,
|
|
"fcm_dpo/delta": -0.22193169593811035,
|
|
"fcm_dpo/margin": 1.6081267595291138,
|
|
"fcm_dpo/q_t": 0.3195720613002777,
|
|
"grad_norm": 91.64571380615234,
|
|
"learning_rate": 2.7770954997525274e-07,
|
|
"logits/chosen": 1.6671485900878906,
|
|
"logits/rejected": 1.5444729328155518,
|
|
"logps/chosen": -66.93972778320312,
|
|
"logps/ref_chosen": -73.43608093261719,
|
|
"logps/ref_rejected": -100.76569366455078,
|
|
"logps/rejected": -95.8774642944336,
|
|
"loss": 0.9195,
|
|
"margin_dpo/margin_mean": 1.608127474784851,
|
|
"margin_dpo/margin_std": 2.2376999855041504,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.6448332667350769,
|
|
"fcm_dpo/delta": 0.06164184957742691,
|
|
"fcm_dpo/margin": 1.2283003330230713,
|
|
"fcm_dpo/q_t": 0.35730862617492676,
|
|
"grad_norm": 89.81734466552734,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 2.0488429069519043,
|
|
"logits/rejected": 1.9527053833007812,
|
|
"logps/chosen": -68.85000610351562,
|
|
"logps/ref_chosen": -75.79296875,
|
|
"logps/ref_rejected": -94.34156799316406,
|
|
"logps/rejected": -88.62691497802734,
|
|
"loss": 1.0656,
|
|
"margin_dpo/margin_mean": 1.2283005714416504,
|
|
"margin_dpo/margin_std": 1.9429552555084229,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5230536659108088,
|
|
"fcm_dpo/beta": 0.6434746980667114,
|
|
"fcm_dpo/delta": -0.126961350440979,
|
|
"fcm_dpo/margin": 1.4994654655456543,
|
|
"fcm_dpo/q_t": 0.3333126902580261,
|
|
"grad_norm": 92.5584487915039,
|
|
"learning_rate": 2.7507990434420123e-07,
|
|
"logits/chosen": 1.7417409420013428,
|
|
"logits/rejected": 1.6066086292266846,
|
|
"logps/chosen": -65.25099182128906,
|
|
"logps/ref_chosen": -72.26289367675781,
|
|
"logps/ref_rejected": -106.36925506591797,
|
|
"logps/rejected": -100.85682678222656,
|
|
"loss": 1.0344,
|
|
"margin_dpo/margin_mean": 1.499464750289917,
|
|
"margin_dpo/margin_std": 2.23475980758667,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5245653817082389,
|
|
"fcm_dpo/beta": 0.6435239911079407,
|
|
"fcm_dpo/delta": 0.08430784940719604,
|
|
"fcm_dpo/margin": 1.2000800371170044,
|
|
"fcm_dpo/q_t": 0.37312763929367065,
|
|
"grad_norm": 102.34098815917969,
|
|
"learning_rate": 2.737640108260456e-07,
|
|
"logits/chosen": 1.687086582183838,
|
|
"logits/rejected": 1.6137919425964355,
|
|
"logps/chosen": -64.65863037109375,
|
|
"logps/ref_chosen": -71.19871520996094,
|
|
"logps/ref_rejected": -91.543212890625,
|
|
"logps/rejected": -86.20320892333984,
|
|
"loss": 1.181,
|
|
"margin_dpo/margin_mean": 1.200080394744873,
|
|
"margin_dpo/margin_std": 2.4104766845703125,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5260770975056689,
|
|
"fcm_dpo/beta": 0.6327615976333618,
|
|
"fcm_dpo/delta": -0.1322903335094452,
|
|
"fcm_dpo/margin": 1.5311083793640137,
|
|
"fcm_dpo/q_t": 0.3531337380409241,
|
|
"grad_norm": 89.33076477050781,
|
|
"learning_rate": 2.724474525774229e-07,
|
|
"logits/chosen": 2.2387795448303223,
|
|
"logits/rejected": 2.1621313095092773,
|
|
"logps/chosen": -63.21007537841797,
|
|
"logps/ref_chosen": -69.95603942871094,
|
|
"logps/ref_rejected": -83.64309692382812,
|
|
"logps/rejected": -78.42823791503906,
|
|
"loss": 1.0263,
|
|
"margin_dpo/margin_mean": 1.5311079025268555,
|
|
"margin_dpo/margin_std": 2.4763777256011963,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.527588813303099,
|
|
"fcm_dpo/beta": 0.638213038444519,
|
|
"fcm_dpo/delta": -0.043823257088661194,
|
|
"fcm_dpo/margin": 1.385543704032898,
|
|
"fcm_dpo/q_t": 0.34594935178756714,
|
|
"grad_norm": 93.91361999511719,
|
|
"learning_rate": 2.711302664252973e-07,
|
|
"logits/chosen": 1.8765864372253418,
|
|
"logits/rejected": 1.7512259483337402,
|
|
"logps/chosen": -63.903682708740234,
|
|
"logps/ref_chosen": -70.71857452392578,
|
|
"logps/ref_rejected": -99.93263244628906,
|
|
"logps/rejected": -94.50328826904297,
|
|
"loss": 1.0296,
|
|
"margin_dpo/margin_mean": 1.3855433464050293,
|
|
"margin_dpo/margin_std": 2.1671652793884277,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.5975005030632019,
|
|
"fcm_dpo/delta": -0.23265165090560913,
|
|
"fcm_dpo/margin": 1.7634344100952148,
|
|
"fcm_dpo/q_t": 0.31782710552215576,
|
|
"grad_norm": 78.32647705078125,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 1.5105071067810059,
|
|
"logits/rejected": 1.427919626235962,
|
|
"logps/chosen": -71.44319152832031,
|
|
"logps/ref_chosen": -78.16873168945312,
|
|
"logps/ref_rejected": -104.84308624267578,
|
|
"logps/rejected": -99.8809814453125,
|
|
"loss": 0.8778,
|
|
"margin_dpo/margin_mean": 1.7634345293045044,
|
|
"margin_dpo/margin_std": 2.1569459438323975,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5306122448979592,
|
|
"fcm_dpo/beta": 0.5988894104957581,
|
|
"fcm_dpo/delta": 0.056822769343853,
|
|
"fcm_dpo/margin": 1.3316857814788818,
|
|
"fcm_dpo/q_t": 0.34192731976509094,
|
|
"grad_norm": 91.16613006591797,
|
|
"learning_rate": 2.6849415780518357e-07,
|
|
"logits/chosen": 1.873887300491333,
|
|
"logits/rejected": 1.7134041786193848,
|
|
"logps/chosen": -65.16175842285156,
|
|
"logps/ref_chosen": -71.79151916503906,
|
|
"logps/ref_rejected": -97.04634094238281,
|
|
"logps/rejected": -91.74826049804688,
|
|
"loss": 1.0744,
|
|
"margin_dpo/margin_mean": 1.3316857814788818,
|
|
"margin_dpo/margin_std": 2.146547317504883,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5321239606953893,
|
|
"fcm_dpo/beta": 0.622305691242218,
|
|
"fcm_dpo/delta": 0.2838347256183624,
|
|
"fcm_dpo/margin": 0.9371163845062256,
|
|
"fcm_dpo/q_t": 0.3857288956642151,
|
|
"grad_norm": 102.97727966308594,
|
|
"learning_rate": 2.6717530907482024e-07,
|
|
"logits/chosen": 1.7103533744812012,
|
|
"logits/rejected": 1.6334961652755737,
|
|
"logps/chosen": -74.54743194580078,
|
|
"logps/ref_chosen": -80.86544799804688,
|
|
"logps/ref_rejected": -102.02129364013672,
|
|
"logps/rejected": -96.64038848876953,
|
|
"loss": 1.1919,
|
|
"margin_dpo/margin_mean": 0.9371156096458435,
|
|
"margin_dpo/margin_std": 1.9518498182296753,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5336356764928194,
|
|
"fcm_dpo/beta": 0.6423227787017822,
|
|
"fcm_dpo/delta": 0.03207054361701012,
|
|
"fcm_dpo/margin": 1.2774255275726318,
|
|
"fcm_dpo/q_t": 0.34687069058418274,
|
|
"grad_norm": 98.20146179199219,
|
|
"learning_rate": 2.658559799141411e-07,
|
|
"logits/chosen": 1.8999056816101074,
|
|
"logits/rejected": 1.9004359245300293,
|
|
"logps/chosen": -78.00933837890625,
|
|
"logps/ref_chosen": -84.77235412597656,
|
|
"logps/ref_rejected": -86.77130889892578,
|
|
"logps/rejected": -81.28572082519531,
|
|
"loss": 1.0978,
|
|
"margin_dpo/margin_mean": 1.2774256467819214,
|
|
"margin_dpo/margin_std": 2.1611897945404053,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5351473922902494,
|
|
"fcm_dpo/beta": 0.6360931396484375,
|
|
"fcm_dpo/delta": -0.04566780477762222,
|
|
"fcm_dpo/margin": 1.401000738143921,
|
|
"fcm_dpo/q_t": 0.34180209040641785,
|
|
"grad_norm": 89.1875228881836,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"logits/chosen": 1.8383228778839111,
|
|
"logits/rejected": 1.5652598142623901,
|
|
"logps/chosen": -47.40654754638672,
|
|
"logps/ref_chosen": -54.33562088012695,
|
|
"logps/ref_rejected": -92.4120101928711,
|
|
"logps/rejected": -86.88394165039062,
|
|
"loss": 1.0269,
|
|
"margin_dpo/margin_mean": 1.4010006189346313,
|
|
"margin_dpo/margin_std": 2.115865707397461,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.6303632259368896,
|
|
"fcm_dpo/delta": -0.04144010692834854,
|
|
"fcm_dpo/margin": 1.4075102806091309,
|
|
"fcm_dpo/q_t": 0.3529096841812134,
|
|
"grad_norm": 99.93326568603516,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 1.9949486255645752,
|
|
"logits/rejected": 1.7414538860321045,
|
|
"logps/chosen": -55.229888916015625,
|
|
"logps/ref_chosen": -61.8388671875,
|
|
"logps/ref_rejected": -98.65571594238281,
|
|
"logps/rejected": -93.45425415039062,
|
|
"loss": 1.0339,
|
|
"margin_dpo/margin_mean": 1.4075103998184204,
|
|
"margin_dpo/margin_std": 2.2196106910705566,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5381708238851096,
|
|
"fcm_dpo/beta": 0.6423078179359436,
|
|
"fcm_dpo/delta": 0.17999966442584991,
|
|
"fcm_dpo/margin": 1.061737060546875,
|
|
"fcm_dpo/q_t": 0.383014053106308,
|
|
"grad_norm": 106.97945404052734,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 1.916182279586792,
|
|
"logits/rejected": 1.7761037349700928,
|
|
"logps/chosen": -57.06999206542969,
|
|
"logps/ref_chosen": -63.92546463012695,
|
|
"logps/ref_rejected": -89.682861328125,
|
|
"logps/rejected": -83.88912963867188,
|
|
"loss": 1.3227,
|
|
"margin_dpo/margin_mean": 1.0617367029190063,
|
|
"margin_dpo/margin_std": 2.4881436824798584,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"fcm_dpo/beta": 0.6452049016952515,
|
|
"fcm_dpo/delta": 0.00560779869556427,
|
|
"fcm_dpo/margin": 1.3021241426467896,
|
|
"fcm_dpo/q_t": 0.35388654470443726,
|
|
"grad_norm": 103.93507385253906,
|
|
"learning_rate": 2.6057459723762076e-07,
|
|
"logits/chosen": 1.7489900588989258,
|
|
"logits/rejected": 1.6417170763015747,
|
|
"logps/chosen": -74.31562805175781,
|
|
"logps/ref_chosen": -81.07588958740234,
|
|
"logps/ref_rejected": -85.06967163085938,
|
|
"logps/rejected": -79.61152648925781,
|
|
"loss": 1.0675,
|
|
"margin_dpo/margin_mean": 1.3021240234375,
|
|
"margin_dpo/margin_std": 2.0928993225097656,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5411942554799698,
|
|
"fcm_dpo/beta": 0.6459277868270874,
|
|
"fcm_dpo/delta": -0.15767623484134674,
|
|
"fcm_dpo/margin": 1.535652756690979,
|
|
"fcm_dpo/q_t": 0.323412150144577,
|
|
"grad_norm": 88.0017318725586,
|
|
"learning_rate": 2.5925341972508954e-07,
|
|
"logits/chosen": 1.7714424133300781,
|
|
"logits/rejected": 1.7848539352416992,
|
|
"logps/chosen": -77.4293441772461,
|
|
"logps/ref_chosen": -84.09109497070312,
|
|
"logps/ref_rejected": -85.07244873046875,
|
|
"logps/rejected": -79.94635772705078,
|
|
"loss": 0.9144,
|
|
"margin_dpo/margin_mean": 1.5356526374816895,
|
|
"margin_dpo/margin_std": 1.961099624633789,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.5427059712773998,
|
|
"fcm_dpo/beta": 0.676872968673706,
|
|
"fcm_dpo/delta": 0.3618844151496887,
|
|
"fcm_dpo/margin": 0.7404053211212158,
|
|
"fcm_dpo/q_t": 0.4080279767513275,
|
|
"grad_norm": 138.78829956054688,
|
|
"learning_rate": 2.579319833745169e-07,
|
|
"logits/chosen": 1.537717342376709,
|
|
"logits/rejected": 1.5032204389572144,
|
|
"logps/chosen": -74.22945404052734,
|
|
"logps/ref_chosen": -80.7490234375,
|
|
"logps/ref_rejected": -94.92911529541016,
|
|
"logps/rejected": -89.14994812011719,
|
|
"loss": 1.4657,
|
|
"margin_dpo/margin_mean": 0.7404047250747681,
|
|
"margin_dpo/margin_std": 2.4702301025390625,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.6807535886764526,
|
|
"fcm_dpo/delta": 0.005708474665880203,
|
|
"fcm_dpo/margin": 1.2405047416687012,
|
|
"fcm_dpo/q_t": 0.36353182792663574,
|
|
"grad_norm": 104.39427947998047,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 1.6437644958496094,
|
|
"logits/rejected": 1.4671106338500977,
|
|
"logps/chosen": -71.83963012695312,
|
|
"logps/ref_chosen": -78.38681030273438,
|
|
"logps/ref_rejected": -109.68933868408203,
|
|
"logps/rejected": -104.3826675415039,
|
|
"loss": 1.0574,
|
|
"margin_dpo/margin_mean": 1.2405047416687012,
|
|
"margin_dpo/margin_std": 2.0635769367218018,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.54572940287226,
|
|
"fcm_dpo/beta": 0.6654163599014282,
|
|
"fcm_dpo/delta": -0.14612089097499847,
|
|
"fcm_dpo/margin": 1.4734528064727783,
|
|
"fcm_dpo/q_t": 0.33461007475852966,
|
|
"grad_norm": 99.80976867675781,
|
|
"learning_rate": 2.552884820191154e-07,
|
|
"logits/chosen": 1.853990077972412,
|
|
"logits/rejected": 1.7232751846313477,
|
|
"logps/chosen": -67.30521392822266,
|
|
"logps/ref_chosen": -73.9055404663086,
|
|
"logps/ref_rejected": -89.8489990234375,
|
|
"logps/rejected": -84.72212219238281,
|
|
"loss": 0.9743,
|
|
"margin_dpo/margin_mean": 1.4734525680541992,
|
|
"margin_dpo/margin_std": 2.1431329250335693,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.54724111866969,
|
|
"fcm_dpo/beta": 0.6757440567016602,
|
|
"fcm_dpo/delta": -0.07346963882446289,
|
|
"fcm_dpo/margin": 1.3376474380493164,
|
|
"fcm_dpo/q_t": 0.3617614507675171,
|
|
"grad_norm": 116.218017578125,
|
|
"learning_rate": 2.53966490958702e-07,
|
|
"logits/chosen": 1.9418387413024902,
|
|
"logits/rejected": 1.6752915382385254,
|
|
"logps/chosen": -75.97872924804688,
|
|
"logps/ref_chosen": -82.32565307617188,
|
|
"logps/ref_rejected": -123.14100646972656,
|
|
"logps/rejected": -118.13172912597656,
|
|
"loss": 1.1054,
|
|
"margin_dpo/margin_mean": 1.337647557258606,
|
|
"margin_dpo/margin_std": 2.2377192974090576,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5487528344671202,
|
|
"fcm_dpo/beta": 0.6594882011413574,
|
|
"fcm_dpo/delta": 0.006209194660186768,
|
|
"fcm_dpo/margin": 1.2788195610046387,
|
|
"fcm_dpo/q_t": 0.3492434322834015,
|
|
"grad_norm": 92.99041748046875,
|
|
"learning_rate": 2.526443889470099e-07,
|
|
"logits/chosen": 2.0503106117248535,
|
|
"logits/rejected": 1.757457971572876,
|
|
"logps/chosen": -59.85978698730469,
|
|
"logps/ref_chosen": -66.05493927001953,
|
|
"logps/ref_rejected": -106.79598999023438,
|
|
"logps/rejected": -101.87965393066406,
|
|
"loss": 0.9826,
|
|
"margin_dpo/margin_mean": 1.2788193225860596,
|
|
"margin_dpo/margin_std": 1.83968186378479,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5502645502645502,
|
|
"fcm_dpo/beta": 0.624494194984436,
|
|
"fcm_dpo/delta": -0.2905758023262024,
|
|
"fcm_dpo/margin": 1.7719086408615112,
|
|
"fcm_dpo/q_t": 0.32380062341690063,
|
|
"grad_norm": 83.77694702148438,
|
|
"learning_rate": 2.513222129660744e-07,
|
|
"logits/chosen": 1.5185627937316895,
|
|
"logits/rejected": 1.4087271690368652,
|
|
"logps/chosen": -69.42403411865234,
|
|
"logps/ref_chosen": -76.38365173339844,
|
|
"logps/ref_rejected": -100.22221374511719,
|
|
"logps/rejected": -95.03450012207031,
|
|
"loss": 0.9612,
|
|
"margin_dpo/margin_mean": 1.7719082832336426,
|
|
"margin_dpo/margin_std": 2.5487070083618164,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.6193841695785522,
|
|
"fcm_dpo/delta": -0.022001437842845917,
|
|
"fcm_dpo/margin": 1.4040987491607666,
|
|
"fcm_dpo/q_t": 0.3279988467693329,
|
|
"grad_norm": 77.93173217773438,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 1.806230902671814,
|
|
"logits/rejected": 1.8085339069366455,
|
|
"logps/chosen": -74.8114013671875,
|
|
"logps/ref_chosen": -81.83399963378906,
|
|
"logps/ref_rejected": -89.06932830810547,
|
|
"logps/rejected": -83.45082092285156,
|
|
"loss": 0.8875,
|
|
"margin_dpo/margin_mean": 1.4040985107421875,
|
|
"margin_dpo/margin_std": 1.5749378204345703,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5532879818594104,
|
|
"fcm_dpo/beta": 0.6239637732505798,
|
|
"fcm_dpo/delta": 0.04806492105126381,
|
|
"fcm_dpo/margin": 1.2896121740341187,
|
|
"fcm_dpo/q_t": 0.3578612804412842,
|
|
"grad_norm": 85.66990661621094,
|
|
"learning_rate": 2.486777870339255e-07,
|
|
"logits/chosen": 1.577677607536316,
|
|
"logits/rejected": 1.554199457168579,
|
|
"logps/chosen": -65.12374877929688,
|
|
"logps/ref_chosen": -72.03398895263672,
|
|
"logps/ref_rejected": -83.65354919433594,
|
|
"logps/rejected": -78.03291320800781,
|
|
"loss": 1.0881,
|
|
"margin_dpo/margin_mean": 1.2896113395690918,
|
|
"margin_dpo/margin_std": 2.166577100753784,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5547996976568406,
|
|
"fcm_dpo/beta": 0.6451700329780579,
|
|
"fcm_dpo/delta": 0.27520129084587097,
|
|
"fcm_dpo/margin": 0.9171974062919617,
|
|
"fcm_dpo/q_t": 0.38411933183670044,
|
|
"grad_norm": 103.54834747314453,
|
|
"learning_rate": 2.4735561105299014e-07,
|
|
"logits/chosen": 1.33754301071167,
|
|
"logits/rejected": 1.1491222381591797,
|
|
"logps/chosen": -66.0779037475586,
|
|
"logps/ref_chosen": -72.39828491210938,
|
|
"logps/ref_rejected": -95.58364868164062,
|
|
"logps/rejected": -90.18046569824219,
|
|
"loss": 1.1462,
|
|
"margin_dpo/margin_mean": 0.917197585105896,
|
|
"margin_dpo/margin_std": 1.7585558891296387,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.5563114134542706,
|
|
"fcm_dpo/beta": 0.666650652885437,
|
|
"fcm_dpo/delta": 0.1275821030139923,
|
|
"fcm_dpo/margin": 1.097920536994934,
|
|
"fcm_dpo/q_t": 0.3611149191856384,
|
|
"grad_norm": 99.10115814208984,
|
|
"learning_rate": 2.46033509041298e-07,
|
|
"logits/chosen": 1.3809869289398193,
|
|
"logits/rejected": 1.3760086297988892,
|
|
"logps/chosen": -83.81150817871094,
|
|
"logps/ref_chosen": -90.12812042236328,
|
|
"logps/ref_rejected": -91.6636962890625,
|
|
"logps/rejected": -86.44500732421875,
|
|
"loss": 1.0882,
|
|
"margin_dpo/margin_mean": 1.0979206562042236,
|
|
"margin_dpo/margin_std": 1.7993700504302979,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5578231292517006,
|
|
"fcm_dpo/beta": 0.6904096007347107,
|
|
"fcm_dpo/delta": 0.1361449509859085,
|
|
"fcm_dpo/margin": 1.0467851161956787,
|
|
"fcm_dpo/q_t": 0.3757448196411133,
|
|
"grad_norm": 100.53764343261719,
|
|
"learning_rate": 2.447115179808846e-07,
|
|
"logits/chosen": 1.497382402420044,
|
|
"logits/rejected": 1.4508342742919922,
|
|
"logps/chosen": -64.85260009765625,
|
|
"logps/ref_chosen": -71.29417419433594,
|
|
"logps/ref_rejected": -99.03875732421875,
|
|
"logps/rejected": -93.64397430419922,
|
|
"loss": 1.2263,
|
|
"margin_dpo/margin_mean": 1.0467851161956787,
|
|
"margin_dpo/margin_std": 2.2027394771575928,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.6650384664535522,
|
|
"fcm_dpo/delta": -0.23074662685394287,
|
|
"fcm_dpo/margin": 1.5856378078460693,
|
|
"fcm_dpo/q_t": 0.3312636613845825,
|
|
"grad_norm": 104.1839599609375,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 1.8746541738510132,
|
|
"logits/rejected": 1.7757220268249512,
|
|
"logps/chosen": -62.68914031982422,
|
|
"logps/ref_chosen": -69.14627075195312,
|
|
"logps/ref_rejected": -93.58651733398438,
|
|
"logps/rejected": -88.71501159667969,
|
|
"loss": 1.0246,
|
|
"margin_dpo/margin_mean": 1.5856380462646484,
|
|
"margin_dpo/margin_std": 2.374504566192627,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5608465608465608,
|
|
"fcm_dpo/beta": 0.6482232809066772,
|
|
"fcm_dpo/delta": -0.19568899273872375,
|
|
"fcm_dpo/margin": 1.1544885635375977,
|
|
"fcm_dpo/q_t": 0.38237959146499634,
|
|
"grad_norm": 106.52591705322266,
|
|
"learning_rate": 2.420680166254831e-07,
|
|
"logits/chosen": 1.9493294954299927,
|
|
"logits/rejected": 1.9273741245269775,
|
|
"logps/chosen": -59.593040466308594,
|
|
"logps/ref_chosen": -65.76728820800781,
|
|
"logps/ref_rejected": -79.9320068359375,
|
|
"logps/rejected": -74.91224670410156,
|
|
"loss": 1.264,
|
|
"margin_dpo/margin_mean": 1.1544888019561768,
|
|
"margin_dpo/margin_std": 2.445178508758545,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.562358276643991,
|
|
"fcm_dpo/beta": 0.6568230390548706,
|
|
"fcm_dpo/delta": 0.09663718938827515,
|
|
"fcm_dpo/margin": 1.1454503536224365,
|
|
"fcm_dpo/q_t": 0.38104021549224854,
|
|
"grad_norm": 106.87626647949219,
|
|
"learning_rate": 2.4074658027491044e-07,
|
|
"logits/chosen": 1.9118516445159912,
|
|
"logits/rejected": 1.7353770732879639,
|
|
"logps/chosen": -63.38538360595703,
|
|
"logps/ref_chosen": -69.97252655029297,
|
|
"logps/ref_rejected": -92.38316345214844,
|
|
"logps/rejected": -86.94148254394531,
|
|
"loss": 1.2561,
|
|
"margin_dpo/margin_mean": 1.1454503536224365,
|
|
"margin_dpo/margin_std": 2.4081778526306152,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.563869992441421,
|
|
"fcm_dpo/beta": 0.6400260925292969,
|
|
"fcm_dpo/delta": -0.09241245687007904,
|
|
"fcm_dpo/margin": 1.4590966701507568,
|
|
"fcm_dpo/q_t": 0.34233933687210083,
|
|
"grad_norm": 100.00511932373047,
|
|
"learning_rate": 2.394254027623792e-07,
|
|
"logits/chosen": 1.9285614490509033,
|
|
"logits/rejected": 1.7315351963043213,
|
|
"logps/chosen": -73.30908203125,
|
|
"logps/ref_chosen": -79.34700012207031,
|
|
"logps/ref_rejected": -95.69737243652344,
|
|
"logps/rejected": -91.1185531616211,
|
|
"loss": 1.0186,
|
|
"margin_dpo/margin_mean": 1.4590967893600464,
|
|
"margin_dpo/margin_std": 2.2739272117614746,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5653817082388511,
|
|
"fcm_dpo/beta": 0.6214843988418579,
|
|
"fcm_dpo/delta": -0.17304188013076782,
|
|
"fcm_dpo/margin": 1.6185259819030762,
|
|
"fcm_dpo/q_t": 0.31608933210372925,
|
|
"grad_norm": 96.7579116821289,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 1.5188226699829102,
|
|
"logits/rejected": 1.4934524297714233,
|
|
"logps/chosen": -87.31782531738281,
|
|
"logps/ref_chosen": -93.45108032226562,
|
|
"logps/ref_rejected": -93.575927734375,
|
|
"logps/rejected": -89.06118774414062,
|
|
"loss": 0.9094,
|
|
"margin_dpo/margin_mean": 1.6185256242752075,
|
|
"margin_dpo/margin_std": 1.9877994060516357,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.6275245547294617,
|
|
"fcm_dpo/delta": 0.1873345673084259,
|
|
"fcm_dpo/margin": 1.0766301155090332,
|
|
"fcm_dpo/q_t": 0.3831120431423187,
|
|
"grad_norm": 96.12359619140625,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 1.9656658172607422,
|
|
"logits/rejected": 1.900733232498169,
|
|
"logps/chosen": -70.84635925292969,
|
|
"logps/ref_chosen": -77.37177276611328,
|
|
"logps/ref_rejected": -98.59054565429688,
|
|
"logps/rejected": -93.14176177978516,
|
|
"loss": 1.161,
|
|
"margin_dpo/margin_mean": 1.0766297578811646,
|
|
"margin_dpo/margin_std": 2.177039623260498,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5684051398337112,
|
|
"fcm_dpo/beta": 0.6014668941497803,
|
|
"fcm_dpo/delta": -0.30763763189315796,
|
|
"fcm_dpo/margin": 1.8556671142578125,
|
|
"fcm_dpo/q_t": 0.3150825798511505,
|
|
"grad_norm": 77.44905090332031,
|
|
"learning_rate": 2.3546379277238103e-07,
|
|
"logits/chosen": 2.103670597076416,
|
|
"logits/rejected": 1.9150159358978271,
|
|
"logps/chosen": -62.69090270996094,
|
|
"logps/ref_chosen": -68.99790954589844,
|
|
"logps/ref_rejected": -90.37117004394531,
|
|
"logps/rejected": -85.91983032226562,
|
|
"loss": 0.9165,
|
|
"margin_dpo/margin_mean": 1.8556674718856812,
|
|
"margin_dpo/margin_std": 2.43137264251709,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.5699168556311414,
|
|
"fcm_dpo/beta": 0.6137855052947998,
|
|
"fcm_dpo/delta": 0.129922553896904,
|
|
"fcm_dpo/margin": 1.187425136566162,
|
|
"fcm_dpo/q_t": 0.3663738965988159,
|
|
"grad_norm": 80.50379943847656,
|
|
"learning_rate": 2.3414402008585886e-07,
|
|
"logits/chosen": 1.9742162227630615,
|
|
"logits/rejected": 1.9277293682098389,
|
|
"logps/chosen": -58.0892333984375,
|
|
"logps/ref_chosen": -64.22705841064453,
|
|
"logps/ref_rejected": -73.10292053222656,
|
|
"logps/rejected": -68.15251159667969,
|
|
"loss": 1.1213,
|
|
"margin_dpo/margin_mean": 1.187424898147583,
|
|
"margin_dpo/margin_std": 2.1330418586730957,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"fcm_dpo/beta": 0.6374775767326355,
|
|
"fcm_dpo/delta": 0.13885389268398285,
|
|
"fcm_dpo/margin": 1.1220755577087402,
|
|
"fcm_dpo/q_t": 0.38182294368743896,
|
|
"grad_norm": 104.7765884399414,
|
|
"learning_rate": 2.3282469092517977e-07,
|
|
"logits/chosen": 1.991929292678833,
|
|
"logits/rejected": 1.9022181034088135,
|
|
"logps/chosen": -70.78045654296875,
|
|
"logps/ref_chosen": -76.90864562988281,
|
|
"logps/ref_rejected": -90.53460693359375,
|
|
"logps/rejected": -85.52848815917969,
|
|
"loss": 1.1762,
|
|
"margin_dpo/margin_mean": 1.1220749616622925,
|
|
"margin_dpo/margin_std": 2.2846970558166504,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5729402872260015,
|
|
"fcm_dpo/beta": 0.6344826817512512,
|
|
"fcm_dpo/delta": -0.007661148905754089,
|
|
"fcm_dpo/margin": 1.3503775596618652,
|
|
"fcm_dpo/q_t": 0.3466755151748657,
|
|
"grad_norm": 118.14442443847656,
|
|
"learning_rate": 2.3150584219481643e-07,
|
|
"logits/chosen": 2.016268730163574,
|
|
"logits/rejected": 1.8680901527404785,
|
|
"logps/chosen": -85.14906311035156,
|
|
"logps/ref_chosen": -91.2371597290039,
|
|
"logps/ref_rejected": -120.1969985961914,
|
|
"logps/rejected": -115.45927429199219,
|
|
"loss": 1.0249,
|
|
"margin_dpo/margin_mean": 1.3503767251968384,
|
|
"margin_dpo/margin_std": 2.0785036087036133,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.6113913059234619,
|
|
"fcm_dpo/delta": -0.24338015913963318,
|
|
"fcm_dpo/margin": 1.7465507984161377,
|
|
"fcm_dpo/q_t": 0.3018234372138977,
|
|
"grad_norm": 84.41504669189453,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 1.567660927772522,
|
|
"logits/rejected": 1.5065686702728271,
|
|
"logps/chosen": -71.13554382324219,
|
|
"logps/ref_chosen": -77.78315734863281,
|
|
"logps/ref_rejected": -92.56083679199219,
|
|
"logps/rejected": -87.65977478027344,
|
|
"loss": 0.965,
|
|
"margin_dpo/margin_mean": 1.7465509176254272,
|
|
"margin_dpo/margin_std": 2.397176504135132,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5759637188208617,
|
|
"fcm_dpo/beta": 0.6341466903686523,
|
|
"fcm_dpo/delta": 0.20742377638816833,
|
|
"fcm_dpo/margin": 1.0140228271484375,
|
|
"fcm_dpo/q_t": 0.38262784481048584,
|
|
"grad_norm": 104.05586242675781,
|
|
"learning_rate": 2.288697335747027e-07,
|
|
"logits/chosen": 1.4323937892913818,
|
|
"logits/rejected": 1.3942238092422485,
|
|
"logps/chosen": -69.36187744140625,
|
|
"logps/ref_chosen": -75.28189086914062,
|
|
"logps/ref_rejected": -81.1995849609375,
|
|
"logps/rejected": -76.29359436035156,
|
|
"loss": 1.2221,
|
|
"margin_dpo/margin_mean": 1.0140235424041748,
|
|
"margin_dpo/margin_std": 2.141716480255127,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5774754346182918,
|
|
"fcm_dpo/beta": 0.6523127555847168,
|
|
"fcm_dpo/delta": 0.13858559727668762,
|
|
"fcm_dpo/margin": 1.0922915935516357,
|
|
"fcm_dpo/q_t": 0.38505297899246216,
|
|
"grad_norm": 98.1589584350586,
|
|
"learning_rate": 2.2755254742257706e-07,
|
|
"logits/chosen": 1.8270576000213623,
|
|
"logits/rejected": 1.7259621620178223,
|
|
"logps/chosen": -72.89222717285156,
|
|
"logps/ref_chosen": -78.74870300292969,
|
|
"logps/ref_rejected": -99.77484130859375,
|
|
"logps/rejected": -95.01066589355469,
|
|
"loss": 1.1694,
|
|
"margin_dpo/margin_mean": 1.0922926664352417,
|
|
"margin_dpo/margin_std": 2.1799964904785156,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5789871504157218,
|
|
"fcm_dpo/beta": 0.634296178817749,
|
|
"fcm_dpo/delta": -0.08330284804105759,
|
|
"fcm_dpo/margin": 1.458072543144226,
|
|
"fcm_dpo/q_t": 0.3481723964214325,
|
|
"grad_norm": 116.07969665527344,
|
|
"learning_rate": 2.2623598917395436e-07,
|
|
"logits/chosen": 1.3643097877502441,
|
|
"logits/rejected": 1.4560441970825195,
|
|
"logps/chosen": -89.80696105957031,
|
|
"logps/ref_chosen": -95.92772674560547,
|
|
"logps/ref_rejected": -92.13604736328125,
|
|
"logps/rejected": -87.47335815429688,
|
|
"loss": 1.0809,
|
|
"margin_dpo/margin_mean": 1.4580726623535156,
|
|
"margin_dpo/margin_std": 2.3793869018554688,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5804988662131519,
|
|
"fcm_dpo/beta": 0.637192964553833,
|
|
"fcm_dpo/delta": 0.027054572477936745,
|
|
"fcm_dpo/margin": 1.29500412940979,
|
|
"fcm_dpo/q_t": 0.34487384557724,
|
|
"grad_norm": 106.3746566772461,
|
|
"learning_rate": 2.2492009565579875e-07,
|
|
"logits/chosen": 2.0678038597106934,
|
|
"logits/rejected": 1.9981340169906616,
|
|
"logps/chosen": -74.25601959228516,
|
|
"logps/ref_chosen": -80.208984375,
|
|
"logps/ref_rejected": -94.39380645751953,
|
|
"logps/rejected": -89.73583984375,
|
|
"loss": 1.0629,
|
|
"margin_dpo/margin_mean": 1.29500412940979,
|
|
"margin_dpo/margin_std": 2.108096122741699,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.6329531669616699,
|
|
"fcm_dpo/delta": -0.12863296270370483,
|
|
"fcm_dpo/margin": 1.5248796939849854,
|
|
"fcm_dpo/q_t": 0.3282264471054077,
|
|
"grad_norm": 97.46591186523438,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 1.816765308380127,
|
|
"logits/rejected": 1.7105906009674072,
|
|
"logps/chosen": -79.43816375732422,
|
|
"logps/ref_chosen": -85.26632690429688,
|
|
"logps/ref_rejected": -102.1983413696289,
|
|
"logps/rejected": -97.89505767822266,
|
|
"loss": 0.9315,
|
|
"margin_dpo/margin_mean": 1.524879813194275,
|
|
"margin_dpo/margin_std": 1.9778671264648438,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5835222978080121,
|
|
"fcm_dpo/beta": 0.6364574432373047,
|
|
"fcm_dpo/delta": 0.14686298370361328,
|
|
"fcm_dpo/margin": 1.1207685470581055,
|
|
"fcm_dpo/q_t": 0.37132441997528076,
|
|
"grad_norm": 108.806396484375,
|
|
"learning_rate": 2.2229045002474724e-07,
|
|
"logits/chosen": 1.6947441101074219,
|
|
"logits/rejected": 1.5647040605545044,
|
|
"logps/chosen": -87.65086364746094,
|
|
"logps/ref_chosen": -93.19975280761719,
|
|
"logps/ref_rejected": -112.98831176757812,
|
|
"logps/rejected": -108.5601806640625,
|
|
"loss": 1.1979,
|
|
"margin_dpo/margin_mean": 1.1207683086395264,
|
|
"margin_dpo/margin_std": 2.267601490020752,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5850340136054422,
|
|
"fcm_dpo/beta": 0.6233910918235779,
|
|
"fcm_dpo/delta": -0.21825829148292542,
|
|
"fcm_dpo/margin": 1.677814245223999,
|
|
"fcm_dpo/q_t": 0.3103215992450714,
|
|
"grad_norm": 85.16645812988281,
|
|
"learning_rate": 2.209767714686924e-07,
|
|
"logits/chosen": 1.881676435470581,
|
|
"logits/rejected": 1.70719575881958,
|
|
"logps/chosen": -60.38775634765625,
|
|
"logps/ref_chosen": -66.32861328125,
|
|
"logps/ref_rejected": -100.56486511230469,
|
|
"logps/rejected": -96.30183410644531,
|
|
"loss": 0.8773,
|
|
"margin_dpo/margin_mean": 1.6778154373168945,
|
|
"margin_dpo/margin_std": 1.9726049900054932,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5865457294028723,
|
|
"fcm_dpo/beta": 0.6180027723312378,
|
|
"fcm_dpo/delta": 0.12911082804203033,
|
|
"fcm_dpo/margin": 1.1801085472106934,
|
|
"fcm_dpo/q_t": 0.3718109726905823,
|
|
"grad_norm": 95.51586151123047,
|
|
"learning_rate": 2.1966390475472954e-07,
|
|
"logits/chosen": 1.846620798110962,
|
|
"logits/rejected": 1.8400707244873047,
|
|
"logps/chosen": -87.35832214355469,
|
|
"logps/ref_chosen": -92.95967864990234,
|
|
"logps/ref_rejected": -97.9437255859375,
|
|
"logps/rejected": -93.52247619628906,
|
|
"loss": 1.1316,
|
|
"margin_dpo/margin_mean": 1.1801083087921143,
|
|
"margin_dpo/margin_std": 2.105041027069092,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5880574452003023,
|
|
"fcm_dpo/beta": 0.6405338048934937,
|
|
"fcm_dpo/delta": 0.08333232998847961,
|
|
"fcm_dpo/margin": 1.2051608562469482,
|
|
"fcm_dpo/q_t": 0.3516117334365845,
|
|
"grad_norm": 103.3174057006836,
|
|
"learning_rate": 2.1835188660656265e-07,
|
|
"logits/chosen": 1.9260954856872559,
|
|
"logits/rejected": 1.8456639051437378,
|
|
"logps/chosen": -71.16734313964844,
|
|
"logps/ref_chosen": -76.89031982421875,
|
|
"logps/ref_rejected": -93.79212951660156,
|
|
"logps/rejected": -89.27430725097656,
|
|
"loss": 1.1927,
|
|
"margin_dpo/margin_mean": 1.205160140991211,
|
|
"margin_dpo/margin_std": 2.2965409755706787,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.645778238773346,
|
|
"fcm_dpo/delta": 0.037246476858854294,
|
|
"fcm_dpo/margin": 1.2624857425689697,
|
|
"fcm_dpo/q_t": 0.3491223454475403,
|
|
"grad_norm": 86.41116333007812,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 2.1113734245300293,
|
|
"logits/rejected": 2.000617027282715,
|
|
"logps/chosen": -54.835906982421875,
|
|
"logps/ref_chosen": -61.058815002441406,
|
|
"logps/ref_rejected": -79.55152893066406,
|
|
"logps/rejected": -74.59110260009766,
|
|
"loss": 1.0129,
|
|
"margin_dpo/margin_mean": 1.2624856233596802,
|
|
"margin_dpo/margin_std": 1.911329984664917,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5910808767951625,
|
|
"fcm_dpo/beta": 0.6336863040924072,
|
|
"fcm_dpo/delta": -0.15196503698825836,
|
|
"fcm_dpo/margin": 1.5577452182769775,
|
|
"fcm_dpo/q_t": 0.34712958335876465,
|
|
"grad_norm": 101.08207702636719,
|
|
"learning_rate": 2.1573054278272636e-07,
|
|
"logits/chosen": 1.7356573343276978,
|
|
"logits/rejected": 1.647139310836792,
|
|
"logps/chosen": -72.69304656982422,
|
|
"logps/ref_chosen": -78.60820770263672,
|
|
"logps/ref_rejected": -103.3367691040039,
|
|
"logps/rejected": -98.97935485839844,
|
|
"loss": 1.1506,
|
|
"margin_dpo/margin_mean": 1.5577449798583984,
|
|
"margin_dpo/margin_std": 2.82521390914917,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5925925925925926,
|
|
"fcm_dpo/beta": 0.600347638130188,
|
|
"fcm_dpo/delta": -0.32079529762268066,
|
|
"fcm_dpo/margin": 1.891295313835144,
|
|
"fcm_dpo/q_t": 0.3097449541091919,
|
|
"grad_norm": 92.39323425292969,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 2.090341806411743,
|
|
"logits/rejected": 2.024127721786499,
|
|
"logps/chosen": -80.92561340332031,
|
|
"logps/ref_chosen": -86.99468994140625,
|
|
"logps/ref_rejected": -112.73616027832031,
|
|
"logps/rejected": -108.55838012695312,
|
|
"loss": 0.9083,
|
|
"margin_dpo/margin_mean": 1.8912949562072754,
|
|
"margin_dpo/margin_std": 2.522023916244507,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5941043083900227,
|
|
"fcm_dpo/beta": 0.5811384916305542,
|
|
"fcm_dpo/delta": 0.017529495060443878,
|
|
"fcm_dpo/margin": 1.431158423423767,
|
|
"fcm_dpo/q_t": 0.35059964656829834,
|
|
"grad_norm": 86.41921997070312,
|
|
"learning_rate": 2.131130332936195e-07,
|
|
"logits/chosen": 1.6732672452926636,
|
|
"logits/rejected": 1.5687006711959839,
|
|
"logps/chosen": -65.68231964111328,
|
|
"logps/ref_chosen": -71.26398468017578,
|
|
"logps/ref_rejected": -88.99722290039062,
|
|
"logps/rejected": -84.84672546386719,
|
|
"loss": 0.9829,
|
|
"margin_dpo/margin_mean": 1.4311583042144775,
|
|
"margin_dpo/margin_std": 1.997040033340454,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5956160241874527,
|
|
"fcm_dpo/beta": 0.5913628339767456,
|
|
"fcm_dpo/delta": 0.04651292413473129,
|
|
"fcm_dpo/margin": 1.3650890588760376,
|
|
"fcm_dpo/q_t": 0.34188681840896606,
|
|
"grad_norm": 94.80079650878906,
|
|
"learning_rate": 2.1180580796331323e-07,
|
|
"logits/chosen": 2.1540815830230713,
|
|
"logits/rejected": 2.0499520301818848,
|
|
"logps/chosen": -72.911865234375,
|
|
"logps/ref_chosen": -78.70564270019531,
|
|
"logps/ref_rejected": -87.01431274414062,
|
|
"logps/rejected": -82.58561706542969,
|
|
"loss": 0.9604,
|
|
"margin_dpo/margin_mean": 1.3650896549224854,
|
|
"margin_dpo/margin_std": 1.7961560487747192,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.5942326784133911,
|
|
"fcm_dpo/delta": -0.09017691761255264,
|
|
"fcm_dpo/margin": 1.5636615753173828,
|
|
"fcm_dpo/q_t": 0.33331990242004395,
|
|
"grad_norm": 74.20651245117188,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 1.5473686456680298,
|
|
"logits/rejected": 1.3798058032989502,
|
|
"logps/chosen": -59.18064498901367,
|
|
"logps/ref_chosen": -65.30274963378906,
|
|
"logps/ref_rejected": -93.22492218017578,
|
|
"logps/rejected": -88.6664810180664,
|
|
"loss": 0.9713,
|
|
"margin_dpo/margin_mean": 1.563661813735962,
|
|
"margin_dpo/margin_std": 2.150235891342163,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5986394557823129,
|
|
"fcm_dpo/beta": 0.5756855607032776,
|
|
"fcm_dpo/delta": 0.042295172810554504,
|
|
"fcm_dpo/margin": 1.3979148864746094,
|
|
"fcm_dpo/q_t": 0.3453894853591919,
|
|
"grad_norm": 85.51688385009766,
|
|
"learning_rate": 2.0919459895968517e-07,
|
|
"logits/chosen": 1.5687694549560547,
|
|
"logits/rejected": 1.4305109977722168,
|
|
"logps/chosen": -61.45856475830078,
|
|
"logps/ref_chosen": -67.33502197265625,
|
|
"logps/ref_rejected": -98.8193359375,
|
|
"logps/rejected": -94.3407974243164,
|
|
"loss": 1.0108,
|
|
"margin_dpo/margin_mean": 1.397914171218872,
|
|
"margin_dpo/margin_std": 1.9760756492614746,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.600151171579743,
|
|
"fcm_dpo/beta": 0.6103275418281555,
|
|
"fcm_dpo/delta": 0.24452278017997742,
|
|
"fcm_dpo/margin": 1.0165547132492065,
|
|
"fcm_dpo/q_t": 0.37763512134552,
|
|
"grad_norm": 118.63185119628906,
|
|
"learning_rate": 2.078906883274924e-07,
|
|
"logits/chosen": 1.754250168800354,
|
|
"logits/rejected": 1.6709903478622437,
|
|
"logps/chosen": -83.72361755371094,
|
|
"logps/ref_chosen": -89.6042251586914,
|
|
"logps/ref_rejected": -104.9779052734375,
|
|
"logps/rejected": -100.11386108398438,
|
|
"loss": 1.2895,
|
|
"margin_dpo/margin_mean": 1.0165547132492065,
|
|
"margin_dpo/margin_std": 2.3554749488830566,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.6016628873771731,
|
|
"fcm_dpo/beta": 0.599021315574646,
|
|
"fcm_dpo/delta": -0.14220577478408813,
|
|
"fcm_dpo/margin": 1.62839937210083,
|
|
"fcm_dpo/q_t": 0.32787105441093445,
|
|
"grad_norm": 79.75584411621094,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": 1.8086758852005005,
|
|
"logits/rejected": 1.6731503009796143,
|
|
"logps/chosen": -60.36062240600586,
|
|
"logps/ref_chosen": -66.43465423583984,
|
|
"logps/ref_rejected": -90.90376281738281,
|
|
"logps/rejected": -86.4581298828125,
|
|
"loss": 0.9496,
|
|
"margin_dpo/margin_mean": 1.6283986568450928,
|
|
"margin_dpo/margin_std": 2.1961557865142822,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"fcm_dpo/beta": 0.599816083908081,
|
|
"fcm_dpo/delta": 0.027060478925704956,
|
|
"fcm_dpo/margin": 1.375216007232666,
|
|
"fcm_dpo/q_t": 0.35677099227905273,
|
|
"grad_norm": 100.44293975830078,
|
|
"learning_rate": 2.052864371672457e-07,
|
|
"logits/chosen": 1.86820650100708,
|
|
"logits/rejected": 1.6108736991882324,
|
|
"logps/chosen": -81.86981964111328,
|
|
"logps/ref_chosen": -87.22315979003906,
|
|
"logps/ref_rejected": -136.32411193847656,
|
|
"logps/rejected": -132.34597778320312,
|
|
"loss": 1.0919,
|
|
"margin_dpo/margin_mean": 1.3752162456512451,
|
|
"margin_dpo/margin_std": 2.36777663230896,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.6271291971206665,
|
|
"fcm_dpo/delta": 0.27985769510269165,
|
|
"fcm_dpo/margin": 0.9363595247268677,
|
|
"fcm_dpo/q_t": 0.39870405197143555,
|
|
"grad_norm": 111.8140869140625,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 2.029981851577759,
|
|
"logits/rejected": 1.8537551164627075,
|
|
"logps/chosen": -85.73567199707031,
|
|
"logps/ref_chosen": -91.1212158203125,
|
|
"logps/ref_rejected": -108.19235229492188,
|
|
"logps/rejected": -103.7431640625,
|
|
"loss": 1.1945,
|
|
"margin_dpo/margin_mean": 0.9363600015640259,
|
|
"margin_dpo/margin_std": 1.9859216213226318,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.6381731033325195,
|
|
"eval_logits/chosen": 1.927345633506775,
|
|
"eval_logits/rejected": 1.811177372932434,
|
|
"eval_logps/chosen": -81.09713745117188,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -92.2294692993164,
|
|
"eval_loss": 0.5504243969917297,
|
|
"eval_margin_dpo/margin_mean": 1.337730884552002,
|
|
"eval_margin_dpo/margin_std": 2.288548707962036,
|
|
"eval_runtime": 42.2981,
|
|
"eval_samples_per_second": 54.447,
|
|
"eval_steps_per_second": 1.702,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6061980347694633,
|
|
"fcm_dpo/beta": 0.6117605566978455,
|
|
"fcm_dpo/delta": -0.28475117683410645,
|
|
"fcm_dpo/margin": 1.8033357858657837,
|
|
"fcm_dpo/q_t": 0.30270397663116455,
|
|
"grad_norm": 74.6077651977539,
|
|
"learning_rate": 2.0268718890989752e-07,
|
|
"logits/chosen": 1.9857186079025269,
|
|
"logits/rejected": 1.8438901901245117,
|
|
"logps/chosen": -61.3988037109375,
|
|
"logps/ref_chosen": -67.54151153564453,
|
|
"logps/ref_rejected": -98.06488800048828,
|
|
"logps/rejected": -93.72550964355469,
|
|
"loss": 0.8707,
|
|
"margin_dpo/margin_mean": 1.803335428237915,
|
|
"margin_dpo/margin_std": 2.163586378097534,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.6077097505668935,
|
|
"fcm_dpo/beta": 0.5940630435943604,
|
|
"fcm_dpo/delta": -0.0942755714058876,
|
|
"fcm_dpo/margin": 1.5745465755462646,
|
|
"fcm_dpo/q_t": 0.33752086758613586,
|
|
"grad_norm": 84.7685317993164,
|
|
"learning_rate": 2.013895317751323e-07,
|
|
"logits/chosen": 1.5190571546554565,
|
|
"logits/rejected": 1.489319086074829,
|
|
"logps/chosen": -71.37261962890625,
|
|
"logps/ref_chosen": -77.44487762451172,
|
|
"logps/ref_rejected": -83.1333236694336,
|
|
"logps/rejected": -78.63561248779297,
|
|
"loss": 0.9789,
|
|
"margin_dpo/margin_mean": 1.5745457410812378,
|
|
"margin_dpo/margin_std": 2.261594533920288,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.6092214663643235,
|
|
"fcm_dpo/beta": 0.5662052035331726,
|
|
"fcm_dpo/delta": -0.3262058198451996,
|
|
"fcm_dpo/margin": 2.0131633281707764,
|
|
"fcm_dpo/q_t": 0.3089754581451416,
|
|
"grad_norm": 75.9161148071289,
|
|
"learning_rate": 2.0009323437965898e-07,
|
|
"logits/chosen": 2.0743629932403564,
|
|
"logits/rejected": 1.8951172828674316,
|
|
"logps/chosen": -62.74315643310547,
|
|
"logps/ref_chosen": -68.8230972290039,
|
|
"logps/ref_rejected": -99.82356262207031,
|
|
"logps/rejected": -95.75678253173828,
|
|
"loss": 0.9734,
|
|
"margin_dpo/margin_mean": 2.013162851333618,
|
|
"margin_dpo/margin_std": 2.808767795562744,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.6107331821617535,
|
|
"fcm_dpo/beta": 0.5434421300888062,
|
|
"fcm_dpo/delta": -0.018316656351089478,
|
|
"fcm_dpo/margin": 1.5841295719146729,
|
|
"fcm_dpo/q_t": 0.3353651165962219,
|
|
"grad_norm": 85.1689224243164,
|
|
"learning_rate": 1.9879833298370237e-07,
|
|
"logits/chosen": 1.5624295473098755,
|
|
"logits/rejected": 1.443617582321167,
|
|
"logps/chosen": -74.19883728027344,
|
|
"logps/ref_chosen": -80.26783752441406,
|
|
"logps/ref_rejected": -111.60258483886719,
|
|
"logps/rejected": -107.11771392822266,
|
|
"loss": 0.957,
|
|
"margin_dpo/margin_mean": 1.5841295719146729,
|
|
"margin_dpo/margin_std": 2.007789134979248,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.5555033683776855,
|
|
"fcm_dpo/delta": 0.15998849272727966,
|
|
"fcm_dpo/margin": 1.2557036876678467,
|
|
"fcm_dpo/q_t": 0.37533363699913025,
|
|
"grad_norm": 75.46453094482422,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 1.792256236076355,
|
|
"logits/rejected": 1.6907178163528442,
|
|
"logps/chosen": -62.33729934692383,
|
|
"logps/ref_chosen": -68.31065368652344,
|
|
"logps/ref_rejected": -81.56044006347656,
|
|
"logps/rejected": -76.84278106689453,
|
|
"loss": 1.0891,
|
|
"margin_dpo/margin_mean": 1.2557039260864258,
|
|
"margin_dpo/margin_std": 2.046755790710449,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6137566137566137,
|
|
"fcm_dpo/beta": 0.567691445350647,
|
|
"fcm_dpo/delta": -0.02151249535381794,
|
|
"fcm_dpo/margin": 1.5318082571029663,
|
|
"fcm_dpo/q_t": 0.3437275290489197,
|
|
"grad_norm": 85.55471801757812,
|
|
"learning_rate": 1.9621286303497914e-07,
|
|
"logits/chosen": 1.7501626014709473,
|
|
"logits/rejected": 1.4838974475860596,
|
|
"logps/chosen": -58.76299285888672,
|
|
"logps/ref_chosen": -64.86714935302734,
|
|
"logps/ref_rejected": -110.06051635742188,
|
|
"logps/rejected": -105.48816680908203,
|
|
"loss": 0.9992,
|
|
"margin_dpo/margin_mean": 1.531808853149414,
|
|
"margin_dpo/margin_std": 2.274860382080078,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.6152683295540439,
|
|
"fcm_dpo/beta": 0.5869194269180298,
|
|
"fcm_dpo/delta": 0.14187076687812805,
|
|
"fcm_dpo/margin": 1.2160757780075073,
|
|
"fcm_dpo/q_t": 0.3661472797393799,
|
|
"grad_norm": 120.85926055908203,
|
|
"learning_rate": 1.9492236680336483e-07,
|
|
"logits/chosen": 1.4653615951538086,
|
|
"logits/rejected": 1.2881108522415161,
|
|
"logps/chosen": -96.7445068359375,
|
|
"logps/ref_chosen": -102.01712799072266,
|
|
"logps/ref_rejected": -121.53548431396484,
|
|
"logps/rejected": -117.47894287109375,
|
|
"loss": 1.057,
|
|
"margin_dpo/margin_mean": 1.2160767316818237,
|
|
"margin_dpo/margin_std": 1.9763216972351074,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.6167800453514739,
|
|
"fcm_dpo/beta": 0.5652141571044922,
|
|
"fcm_dpo/delta": -0.28196677565574646,
|
|
"fcm_dpo/margin": 1.94913649559021,
|
|
"fcm_dpo/q_t": 0.2972300052642822,
|
|
"grad_norm": 65.2387466430664,
|
|
"learning_rate": 1.9363341121154895e-07,
|
|
"logits/chosen": 1.9260717630386353,
|
|
"logits/rejected": 1.76041579246521,
|
|
"logps/chosen": -66.63929748535156,
|
|
"logps/ref_chosen": -72.77989959716797,
|
|
"logps/ref_rejected": -92.01815795898438,
|
|
"logps/rejected": -87.82669830322266,
|
|
"loss": 0.8207,
|
|
"margin_dpo/margin_mean": 1.949136734008789,
|
|
"margin_dpo/margin_std": 2.074869155883789,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.618291761148904,
|
|
"fcm_dpo/beta": 0.5699707865715027,
|
|
"fcm_dpo/delta": 0.2298847734928131,
|
|
"fcm_dpo/margin": 1.1142563819885254,
|
|
"fcm_dpo/q_t": 0.3908570408821106,
|
|
"grad_norm": 83.33867645263672,
|
|
"learning_rate": 1.9234603231438994e-07,
|
|
"logits/chosen": 1.7916343212127686,
|
|
"logits/rejected": 1.7911202907562256,
|
|
"logps/chosen": -72.04692077636719,
|
|
"logps/ref_chosen": -77.7901611328125,
|
|
"logps/ref_rejected": -79.2997055053711,
|
|
"logps/rejected": -74.67072296142578,
|
|
"loss": 1.1559,
|
|
"margin_dpo/margin_mean": 1.1142561435699463,
|
|
"margin_dpo/margin_std": 2.188920259475708,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.576758861541748,
|
|
"fcm_dpo/delta": 0.06280102580785751,
|
|
"fcm_dpo/margin": 1.3701996803283691,
|
|
"fcm_dpo/q_t": 0.35670357942581177,
|
|
"grad_norm": 94.09947967529297,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 1.590678095817566,
|
|
"logits/rejected": 1.514933705329895,
|
|
"logps/chosen": -74.49111938476562,
|
|
"logps/ref_chosen": -80.35844421386719,
|
|
"logps/ref_rejected": -92.19056701660156,
|
|
"logps/rejected": -87.69343566894531,
|
|
"loss": 1.0963,
|
|
"margin_dpo/margin_mean": 1.3701996803283691,
|
|
"margin_dpo/margin_std": 2.2878055572509766,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6213151927437641,
|
|
"fcm_dpo/beta": 0.5804015398025513,
|
|
"fcm_dpo/delta": -0.0807589739561081,
|
|
"fcm_dpo/margin": 1.5908392667770386,
|
|
"fcm_dpo/q_t": 0.3370782732963562,
|
|
"grad_norm": 97.5421371459961,
|
|
"learning_rate": 1.8977614860195296e-07,
|
|
"logits/chosen": 1.5710428953170776,
|
|
"logits/rejected": 1.4811426401138306,
|
|
"logps/chosen": -65.04608917236328,
|
|
"logps/ref_chosen": -70.72857666015625,
|
|
"logps/ref_rejected": -93.19204711914062,
|
|
"logps/rejected": -89.10040283203125,
|
|
"loss": 1.0582,
|
|
"margin_dpo/margin_mean": 1.5908393859863281,
|
|
"margin_dpo/margin_std": 2.534249782562256,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.6228269085411943,
|
|
"fcm_dpo/beta": 0.5793402194976807,
|
|
"fcm_dpo/delta": 0.09534160792827606,
|
|
"fcm_dpo/margin": 1.3141306638717651,
|
|
"fcm_dpo/q_t": 0.36529815196990967,
|
|
"grad_norm": 94.25563049316406,
|
|
"learning_rate": 1.8849371567184662e-07,
|
|
"logits/chosen": 1.7595200538635254,
|
|
"logits/rejected": 1.668968915939331,
|
|
"logps/chosen": -67.35857391357422,
|
|
"logps/ref_chosen": -72.87568664550781,
|
|
"logps/ref_rejected": -88.21068572998047,
|
|
"logps/rejected": -84.00770568847656,
|
|
"loss": 1.0512,
|
|
"margin_dpo/margin_mean": 1.3141303062438965,
|
|
"margin_dpo/margin_std": 2.1212642192840576,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6243386243386243,
|
|
"fcm_dpo/beta": 0.587976336479187,
|
|
"fcm_dpo/delta": 0.01775454543530941,
|
|
"fcm_dpo/margin": 1.4179542064666748,
|
|
"fcm_dpo/q_t": 0.3521287739276886,
|
|
"grad_norm": 90.87653350830078,
|
|
"learning_rate": 1.872130032047302e-07,
|
|
"logits/chosen": 1.2563691139221191,
|
|
"logits/rejected": 1.169920802116394,
|
|
"logps/chosen": -79.08854675292969,
|
|
"logps/ref_chosen": -84.70051574707031,
|
|
"logps/ref_rejected": -92.06742095947266,
|
|
"logps/rejected": -87.8734130859375,
|
|
"loss": 1.063,
|
|
"margin_dpo/margin_mean": 1.4179542064666748,
|
|
"margin_dpo/margin_std": 2.338256359100342,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6258503401360545,
|
|
"fcm_dpo/beta": 0.5731327533721924,
|
|
"fcm_dpo/delta": -0.11875976622104645,
|
|
"fcm_dpo/margin": 1.6653555631637573,
|
|
"fcm_dpo/q_t": 0.32663214206695557,
|
|
"grad_norm": 78.96257781982422,
|
|
"learning_rate": 1.8593404702488436e-07,
|
|
"logits/chosen": 1.8244965076446533,
|
|
"logits/rejected": 1.717824935913086,
|
|
"logps/chosen": -65.36703491210938,
|
|
"logps/ref_chosen": -70.97660827636719,
|
|
"logps/ref_rejected": -92.90523529052734,
|
|
"logps/rejected": -88.96101379394531,
|
|
"loss": 0.9509,
|
|
"margin_dpo/margin_mean": 1.6653554439544678,
|
|
"margin_dpo/margin_std": 2.187335252761841,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.5758957862854004,
|
|
"fcm_dpo/delta": 0.007150387391448021,
|
|
"fcm_dpo/margin": 1.4645922183990479,
|
|
"fcm_dpo/q_t": 0.3530283570289612,
|
|
"grad_norm": 84.59825134277344,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 1.6455614566802979,
|
|
"logits/rejected": 1.566821575164795,
|
|
"logps/chosen": -66.1544189453125,
|
|
"logps/ref_chosen": -71.7189712524414,
|
|
"logps/ref_rejected": -74.54219818115234,
|
|
"logps/rejected": -70.44224548339844,
|
|
"loss": 1.1014,
|
|
"margin_dpo/margin_mean": 1.4645925760269165,
|
|
"margin_dpo/margin_std": 2.4894518852233887,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6288737717309146,
|
|
"fcm_dpo/beta": 0.5732775926589966,
|
|
"fcm_dpo/delta": -0.049004100263118744,
|
|
"fcm_dpo/margin": 1.0549395084381104,
|
|
"fcm_dpo/q_t": 0.3943997621536255,
|
|
"grad_norm": 120.06104278564453,
|
|
"learning_rate": 1.8338154657749128e-07,
|
|
"logits/chosen": 1.7915780544281006,
|
|
"logits/rejected": 1.6690804958343506,
|
|
"logps/chosen": -67.31856536865234,
|
|
"logps/ref_chosen": -72.88249206542969,
|
|
"logps/ref_rejected": -85.30693054199219,
|
|
"logps/rejected": -80.79793548583984,
|
|
"loss": 1.2072,
|
|
"margin_dpo/margin_mean": 1.0549399852752686,
|
|
"margin_dpo/margin_std": 2.200162410736084,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6303854875283447,
|
|
"fcm_dpo/beta": 0.5584331750869751,
|
|
"fcm_dpo/delta": -0.2724847197532654,
|
|
"fcm_dpo/margin": 1.9535236358642578,
|
|
"fcm_dpo/q_t": 0.3041985034942627,
|
|
"grad_norm": 77.82061004638672,
|
|
"learning_rate": 1.8210807370886849e-07,
|
|
"logits/chosen": 1.860659122467041,
|
|
"logits/rejected": 1.7060253620147705,
|
|
"logps/chosen": -66.84624481201172,
|
|
"logps/ref_chosen": -72.49703216552734,
|
|
"logps/ref_rejected": -89.38966369628906,
|
|
"logps/rejected": -85.69239807128906,
|
|
"loss": 0.8895,
|
|
"margin_dpo/margin_mean": 1.9535231590270996,
|
|
"margin_dpo/margin_std": 2.344741106033325,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6318972033257747,
|
|
"fcm_dpo/beta": 0.5670143961906433,
|
|
"fcm_dpo/delta": 0.3630419969558716,
|
|
"fcm_dpo/margin": 0.8936295509338379,
|
|
"fcm_dpo/q_t": 0.4141634702682495,
|
|
"grad_norm": 104.79427337646484,
|
|
"learning_rate": 1.8083649992336825e-07,
|
|
"logits/chosen": 1.8443524837493896,
|
|
"logits/rejected": 1.8251988887786865,
|
|
"logps/chosen": -84.56147766113281,
|
|
"logps/ref_chosen": -89.70926666259766,
|
|
"logps/ref_rejected": -90.98756408691406,
|
|
"logps/rejected": -86.73341369628906,
|
|
"loss": 1.243,
|
|
"margin_dpo/margin_mean": 0.8936293125152588,
|
|
"margin_dpo/margin_std": 2.1351263523101807,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6334089191232048,
|
|
"fcm_dpo/beta": 0.5673000812530518,
|
|
"fcm_dpo/delta": -0.13866420090198517,
|
|
"fcm_dpo/margin": 1.717864751815796,
|
|
"fcm_dpo/q_t": 0.31805652379989624,
|
|
"grad_norm": 73.9402847290039,
|
|
"learning_rate": 1.7956686078964255e-07,
|
|
"logits/chosen": 1.4547133445739746,
|
|
"logits/rejected": 1.3302205801010132,
|
|
"logps/chosen": -69.59623718261719,
|
|
"logps/ref_chosen": -75.652099609375,
|
|
"logps/ref_rejected": -91.0013427734375,
|
|
"logps/rejected": -86.6633529663086,
|
|
"loss": 0.8888,
|
|
"margin_dpo/margin_mean": 1.717864751815796,
|
|
"margin_dpo/margin_std": 2.056272029876709,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.570350706577301,
|
|
"fcm_dpo/delta": 0.046944353729486465,
|
|
"fcm_dpo/margin": 1.4142495393753052,
|
|
"fcm_dpo/q_t": 0.3754135072231293,
|
|
"grad_norm": 88.30995178222656,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 1.5207473039627075,
|
|
"logits/rejected": 1.4120867252349854,
|
|
"logps/chosen": -67.18255615234375,
|
|
"logps/ref_chosen": -72.58027648925781,
|
|
"logps/ref_rejected": -79.90303802490234,
|
|
"logps/rejected": -75.91956329345703,
|
|
"loss": 1.1411,
|
|
"margin_dpo/margin_mean": 1.414249300956726,
|
|
"margin_dpo/margin_std": 2.669312000274658,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.636432350718065,
|
|
"fcm_dpo/beta": 0.5930507779121399,
|
|
"fcm_dpo/delta": 0.228386789560318,
|
|
"fcm_dpo/margin": 1.0698721408843994,
|
|
"fcm_dpo/q_t": 0.3951471447944641,
|
|
"grad_norm": 98.86088562011719,
|
|
"learning_rate": 1.7703352848054887e-07,
|
|
"logits/chosen": 1.5929409265518188,
|
|
"logits/rejected": 1.374795913696289,
|
|
"logps/chosen": -73.23966979980469,
|
|
"logps/ref_chosen": -78.71546936035156,
|
|
"logps/ref_rejected": -90.82321166992188,
|
|
"logps/rejected": -86.41729736328125,
|
|
"loss": 1.3057,
|
|
"margin_dpo/margin_mean": 1.0698716640472412,
|
|
"margin_dpo/margin_std": 2.6327433586120605,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6379440665154951,
|
|
"fcm_dpo/beta": 0.5989946126937866,
|
|
"fcm_dpo/delta": -0.007140956819057465,
|
|
"fcm_dpo/margin": 1.429376244544983,
|
|
"fcm_dpo/q_t": 0.3605658710002899,
|
|
"grad_norm": 107.47647094726562,
|
|
"learning_rate": 1.7576990616793137e-07,
|
|
"logits/chosen": 1.7196696996688843,
|
|
"logits/rejected": 1.6793361902236938,
|
|
"logps/chosen": -80.95000457763672,
|
|
"logps/ref_chosen": -86.74519348144531,
|
|
"logps/ref_rejected": -94.02015686035156,
|
|
"logps/rejected": -89.65435028076172,
|
|
"loss": 1.1255,
|
|
"margin_dpo/margin_mean": 1.4293758869171143,
|
|
"margin_dpo/margin_std": 2.6032376289367676,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6394557823129252,
|
|
"fcm_dpo/beta": 0.5808249711990356,
|
|
"fcm_dpo/delta": -0.1427406370639801,
|
|
"fcm_dpo/margin": 1.683246374130249,
|
|
"fcm_dpo/q_t": 0.3259110450744629,
|
|
"grad_norm": 79.15151977539062,
|
|
"learning_rate": 1.745083602306071e-07,
|
|
"logits/chosen": 1.875573992729187,
|
|
"logits/rejected": 1.7012746334075928,
|
|
"logps/chosen": -66.27205657958984,
|
|
"logps/ref_chosen": -72.02232360839844,
|
|
"logps/ref_rejected": -93.26976776123047,
|
|
"logps/rejected": -89.2027587890625,
|
|
"loss": 0.922,
|
|
"margin_dpo/margin_mean": 1.6832462549209595,
|
|
"margin_dpo/margin_std": 2.1930503845214844,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6409674981103552,
|
|
"fcm_dpo/beta": 0.572269856929779,
|
|
"fcm_dpo/delta": -0.10694173723459244,
|
|
"fcm_dpo/margin": 1.6546802520751953,
|
|
"fcm_dpo/q_t": 0.32539302110671997,
|
|
"grad_norm": 88.41535949707031,
|
|
"learning_rate": 1.7324892595672804e-07,
|
|
"logits/chosen": 1.6597294807434082,
|
|
"logits/rejected": 1.6074860095977783,
|
|
"logps/chosen": -62.50016784667969,
|
|
"logps/ref_chosen": -68.22148132324219,
|
|
"logps/ref_rejected": -94.12411499023438,
|
|
"logps/rejected": -90.05748748779297,
|
|
"loss": 0.9463,
|
|
"margin_dpo/margin_mean": 1.6546807289123535,
|
|
"margin_dpo/margin_std": 2.1842234134674072,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.5608553886413574,
|
|
"fcm_dpo/delta": -0.1236172616481781,
|
|
"fcm_dpo/margin": 1.7144718170166016,
|
|
"fcm_dpo/q_t": 0.3162755072116852,
|
|
"grad_norm": 73.8619384765625,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 1.8607254028320312,
|
|
"logits/rejected": 1.7949458360671997,
|
|
"logps/chosen": -70.13557434082031,
|
|
"logps/ref_chosen": -75.90104675292969,
|
|
"logps/ref_rejected": -86.08673095703125,
|
|
"logps/rejected": -82.03573608398438,
|
|
"loss": 0.9,
|
|
"margin_dpo/margin_mean": 1.7144721746444702,
|
|
"margin_dpo/margin_std": 2.046931743621826,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6439909297052154,
|
|
"fcm_dpo/beta": 0.5839896202087402,
|
|
"fcm_dpo/delta": 0.410540372133255,
|
|
"fcm_dpo/margin": 0.7879926562309265,
|
|
"fcm_dpo/q_t": 0.4247323274612427,
|
|
"grad_norm": 108.58043670654297,
|
|
"learning_rate": 1.7073653325558828e-07,
|
|
"logits/chosen": 1.8453181982040405,
|
|
"logits/rejected": 1.8402283191680908,
|
|
"logps/chosen": -84.62348937988281,
|
|
"logps/ref_chosen": -89.93118286132812,
|
|
"logps/ref_rejected": -91.04658508300781,
|
|
"logps/rejected": -86.52688598632812,
|
|
"loss": 1.4298,
|
|
"margin_dpo/margin_mean": 0.787992000579834,
|
|
"margin_dpo/margin_std": 2.581155300140381,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6455026455026455,
|
|
"fcm_dpo/beta": 0.600010871887207,
|
|
"fcm_dpo/delta": -0.029273666441440582,
|
|
"fcm_dpo/margin": 1.4609463214874268,
|
|
"fcm_dpo/q_t": 0.3494294285774231,
|
|
"grad_norm": 93.7463607788086,
|
|
"learning_rate": 1.6948364510535218e-07,
|
|
"logits/chosen": 2.0117008686065674,
|
|
"logits/rejected": 1.8596858978271484,
|
|
"logps/chosen": -72.434814453125,
|
|
"logps/ref_chosen": -77.83393859863281,
|
|
"logps/ref_rejected": -98.69864654541016,
|
|
"logps/rejected": -94.76046752929688,
|
|
"loss": 1.023,
|
|
"margin_dpo/margin_mean": 1.4609463214874268,
|
|
"margin_dpo/margin_std": 2.3029470443725586,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6470143613000756,
|
|
"fcm_dpo/beta": 0.5907303094863892,
|
|
"fcm_dpo/delta": -0.07789819687604904,
|
|
"fcm_dpo/margin": 1.5586960315704346,
|
|
"fcm_dpo/q_t": 0.35224780440330505,
|
|
"grad_norm": 110.9155502319336,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 1.7061760425567627,
|
|
"logits/rejected": 1.6600472927093506,
|
|
"logps/chosen": -84.93183898925781,
|
|
"logps/ref_chosen": -90.3450927734375,
|
|
"logps/ref_rejected": -100.24185180664062,
|
|
"logps/rejected": -96.38729858398438,
|
|
"loss": 1.0751,
|
|
"margin_dpo/margin_mean": 1.5586960315704346,
|
|
"margin_dpo/margin_std": 2.5471014976501465,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6485260770975056,
|
|
"fcm_dpo/beta": 0.5782663226127625,
|
|
"fcm_dpo/delta": -0.12282080948352814,
|
|
"fcm_dpo/margin": 1.6621170043945312,
|
|
"fcm_dpo/q_t": 0.32879000902175903,
|
|
"grad_norm": 88.22840118408203,
|
|
"learning_rate": 1.669846604344412e-07,
|
|
"logits/chosen": 1.6005504131317139,
|
|
"logits/rejected": 1.6149730682373047,
|
|
"logps/chosen": -72.61544799804688,
|
|
"logps/ref_chosen": -78.24811553955078,
|
|
"logps/ref_rejected": -75.24495697021484,
|
|
"logps/rejected": -71.27439880371094,
|
|
"loss": 1.0433,
|
|
"margin_dpo/margin_mean": 1.6621167659759521,
|
|
"margin_dpo/margin_std": 2.5759923458099365,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.5593866109848022,
|
|
"fcm_dpo/delta": -0.13159701228141785,
|
|
"fcm_dpo/margin": 1.7297087907791138,
|
|
"fcm_dpo/q_t": 0.31945735216140747,
|
|
"grad_norm": 73.9142837524414,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 1.5620594024658203,
|
|
"logits/rejected": 1.5809931755065918,
|
|
"logps/chosen": -70.3321533203125,
|
|
"logps/ref_chosen": -76.08027648925781,
|
|
"logps/ref_rejected": -84.09554290771484,
|
|
"logps/rejected": -80.0771255493164,
|
|
"loss": 0.9161,
|
|
"margin_dpo/margin_mean": 1.7297089099884033,
|
|
"margin_dpo/margin_std": 2.1894445419311523,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6515495086923658,
|
|
"fcm_dpo/beta": 0.560405969619751,
|
|
"fcm_dpo/delta": 0.0203985795378685,
|
|
"fcm_dpo/margin": 1.4833824634552002,
|
|
"fcm_dpo/q_t": 0.34995564818382263,
|
|
"grad_norm": 82.28486633300781,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": 1.4608607292175293,
|
|
"logits/rejected": 1.3733350038528442,
|
|
"logps/chosen": -61.16302490234375,
|
|
"logps/ref_chosen": -66.88581085205078,
|
|
"logps/ref_rejected": -89.56040954589844,
|
|
"logps/rejected": -85.32099914550781,
|
|
"loss": 1.0695,
|
|
"margin_dpo/margin_mean": 1.483382225036621,
|
|
"margin_dpo/margin_std": 2.421769142150879,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6530612244897959,
|
|
"fcm_dpo/beta": 0.5456819534301758,
|
|
"fcm_dpo/delta": -0.1097467765212059,
|
|
"fcm_dpo/margin": 1.7330988645553589,
|
|
"fcm_dpo/q_t": 0.3340963125228882,
|
|
"grad_norm": 82.00106048583984,
|
|
"learning_rate": 1.632536862810844e-07,
|
|
"logits/chosen": 1.8679120540618896,
|
|
"logits/rejected": 1.7690483331680298,
|
|
"logps/chosen": -73.94156646728516,
|
|
"logps/ref_chosen": -79.65066528320312,
|
|
"logps/ref_rejected": -103.92634582519531,
|
|
"logps/rejected": -99.95034790039062,
|
|
"loss": 0.9746,
|
|
"margin_dpo/margin_mean": 1.7330987453460693,
|
|
"margin_dpo/margin_std": 2.4346117973327637,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.654572940287226,
|
|
"fcm_dpo/beta": 0.5389462113380432,
|
|
"fcm_dpo/delta": -0.1568116843700409,
|
|
"fcm_dpo/margin": 1.8397012948989868,
|
|
"fcm_dpo/q_t": 0.3335553705692291,
|
|
"grad_norm": 70.97694396972656,
|
|
"learning_rate": 1.6201483487445515e-07,
|
|
"logits/chosen": 2.0188956260681152,
|
|
"logits/rejected": 1.9827733039855957,
|
|
"logps/chosen": -71.76106262207031,
|
|
"logps/ref_chosen": -77.30774688720703,
|
|
"logps/ref_rejected": -81.65180206298828,
|
|
"logps/rejected": -77.94482421875,
|
|
"loss": 1.0222,
|
|
"margin_dpo/margin_mean": 1.8397008180618286,
|
|
"margin_dpo/margin_std": 2.890751361846924,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.656084656084656,
|
|
"fcm_dpo/beta": 0.5215494632720947,
|
|
"fcm_dpo/delta": -0.03774160146713257,
|
|
"fcm_dpo/margin": 1.6875739097595215,
|
|
"fcm_dpo/q_t": 0.33912625908851624,
|
|
"grad_norm": 71.490966796875,
|
|
"learning_rate": 1.6077844460203204e-07,
|
|
"logits/chosen": 1.885265827178955,
|
|
"logits/rejected": 1.756982445716858,
|
|
"logps/chosen": -57.4691047668457,
|
|
"logps/ref_chosen": -63.31850051879883,
|
|
"logps/ref_rejected": -89.15093994140625,
|
|
"logps/rejected": -84.98912048339844,
|
|
"loss": 1.0276,
|
|
"margin_dpo/margin_mean": 1.6875743865966797,
|
|
"margin_dpo/margin_std": 2.495243549346924,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.5468607544898987,
|
|
"fcm_dpo/delta": 0.1618640273809433,
|
|
"fcm_dpo/margin": 1.271209478378296,
|
|
"fcm_dpo/q_t": 0.3671290874481201,
|
|
"grad_norm": 78.38941192626953,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 2.278850555419922,
|
|
"logits/rejected": 2.2084856033325195,
|
|
"logps/chosen": -65.71345520019531,
|
|
"logps/ref_chosen": -71.1719741821289,
|
|
"logps/ref_rejected": -86.42095184326172,
|
|
"logps/rejected": -82.233642578125,
|
|
"loss": 1.0575,
|
|
"margin_dpo/margin_mean": 1.271209955215454,
|
|
"margin_dpo/margin_std": 1.9971004724502563,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6591080876795162,
|
|
"fcm_dpo/beta": 0.5605753064155579,
|
|
"fcm_dpo/delta": 0.1037134975194931,
|
|
"fcm_dpo/margin": 1.3377704620361328,
|
|
"fcm_dpo/q_t": 0.35941988229751587,
|
|
"grad_norm": 81.34796905517578,
|
|
"learning_rate": 1.5831318572796847e-07,
|
|
"logits/chosen": 1.5238187313079834,
|
|
"logits/rejected": 1.4155125617980957,
|
|
"logps/chosen": -68.72625732421875,
|
|
"logps/ref_chosen": -74.45087432861328,
|
|
"logps/ref_rejected": -86.01708984375,
|
|
"logps/rejected": -81.63023376464844,
|
|
"loss": 1.0929,
|
|
"margin_dpo/margin_mean": 1.3377702236175537,
|
|
"margin_dpo/margin_std": 2.242361545562744,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6606198034769464,
|
|
"fcm_dpo/beta": 0.544946014881134,
|
|
"fcm_dpo/delta": -0.01443202793598175,
|
|
"fcm_dpo/margin": 1.570465087890625,
|
|
"fcm_dpo/q_t": 0.3623065948486328,
|
|
"grad_norm": 87.133544921875,
|
|
"learning_rate": 1.5708438608491815e-07,
|
|
"logits/chosen": 1.8089007139205933,
|
|
"logits/rejected": 1.5546307563781738,
|
|
"logps/chosen": -66.98934936523438,
|
|
"logps/ref_chosen": -72.38907623291016,
|
|
"logps/ref_rejected": -111.03279876708984,
|
|
"logps/rejected": -107.20354461669922,
|
|
"loss": 1.115,
|
|
"margin_dpo/margin_mean": 1.5704649686813354,
|
|
"margin_dpo/margin_std": 2.739298105239868,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6621315192743764,
|
|
"fcm_dpo/beta": 0.5507192015647888,
|
|
"fcm_dpo/delta": -0.16311952471733093,
|
|
"fcm_dpo/margin": 1.8056895732879639,
|
|
"fcm_dpo/q_t": 0.3250262141227722,
|
|
"grad_norm": 66.01669311523438,
|
|
"learning_rate": 1.558581854913253e-07,
|
|
"logits/chosen": 1.564880609512329,
|
|
"logits/rejected": 1.4786386489868164,
|
|
"logps/chosen": -51.56304168701172,
|
|
"logps/ref_chosen": -57.27682876586914,
|
|
"logps/ref_rejected": -83.07940673828125,
|
|
"logps/rejected": -79.17129516601562,
|
|
"loss": 0.9502,
|
|
"margin_dpo/margin_mean": 1.8056901693344116,
|
|
"margin_dpo/margin_std": 2.3092832565307617,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.6636432350718064,
|
|
"fcm_dpo/beta": 0.5346947908401489,
|
|
"fcm_dpo/delta": -0.10351482778787613,
|
|
"fcm_dpo/margin": 1.763001799583435,
|
|
"fcm_dpo/q_t": 0.32669079303741455,
|
|
"grad_norm": 84.674072265625,
|
|
"learning_rate": 1.5463461824665658e-07,
|
|
"logits/chosen": 1.6811615228652954,
|
|
"logits/rejected": 1.588653564453125,
|
|
"logps/chosen": -92.66934967041016,
|
|
"logps/ref_chosen": -98.35890197753906,
|
|
"logps/ref_rejected": -112.69817352294922,
|
|
"logps/rejected": -108.77161407470703,
|
|
"loss": 0.9337,
|
|
"margin_dpo/margin_mean": 1.7630020380020142,
|
|
"margin_dpo/margin_std": 2.2514312267303467,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.5049252510070801,
|
|
"fcm_dpo/delta": -0.28788667917251587,
|
|
"fcm_dpo/margin": 2.1909289360046387,
|
|
"fcm_dpo/q_t": 0.2954619824886322,
|
|
"grad_norm": 64.66181945800781,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 1.518364667892456,
|
|
"logits/rejected": 1.2891108989715576,
|
|
"logps/chosen": -55.59807205200195,
|
|
"logps/ref_chosen": -61.662452697753906,
|
|
"logps/ref_rejected": -86.81646728515625,
|
|
"logps/rejected": -82.9430160522461,
|
|
"loss": 0.8081,
|
|
"margin_dpo/margin_mean": 2.1909286975860596,
|
|
"margin_dpo/margin_std": 2.2650413513183594,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.5017116069793701,
|
|
"fcm_dpo/delta": 0.10611478239297867,
|
|
"fcm_dpo/margin": 1.4984135627746582,
|
|
"fcm_dpo/q_t": 0.35643941164016724,
|
|
"grad_norm": 76.86129760742188,
|
|
"learning_rate": 1.521955206326976e-07,
|
|
"logits/chosen": 1.6022846698760986,
|
|
"logits/rejected": 1.3970829248428345,
|
|
"logps/chosen": -68.37437438964844,
|
|
"logps/ref_chosen": -74.33235168457031,
|
|
"logps/ref_rejected": -99.654541015625,
|
|
"logps/rejected": -95.19499206542969,
|
|
"loss": 1.0114,
|
|
"margin_dpo/margin_mean": 1.4984139204025269,
|
|
"margin_dpo/margin_std": 2.176797389984131,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6681783824640968,
|
|
"fcm_dpo/beta": 0.5159778594970703,
|
|
"fcm_dpo/delta": 0.11021871864795685,
|
|
"fcm_dpo/margin": 1.4496480226516724,
|
|
"fcm_dpo/q_t": 0.35975363850593567,
|
|
"grad_norm": 85.56470489501953,
|
|
"learning_rate": 1.5098005849021078e-07,
|
|
"logits/chosen": 1.7873557806015015,
|
|
"logits/rejected": 1.7346773147583008,
|
|
"logps/chosen": -77.30924987792969,
|
|
"logps/ref_chosen": -82.42591857910156,
|
|
"logps/ref_rejected": -106.71090698242188,
|
|
"logps/rejected": -103.04387664794922,
|
|
"loss": 1.0487,
|
|
"margin_dpo/margin_mean": 1.4496479034423828,
|
|
"margin_dpo/margin_std": 2.3134610652923584,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6696900982615268,
|
|
"fcm_dpo/beta": 0.5047956109046936,
|
|
"fcm_dpo/delta": -0.1580228954553604,
|
|
"fcm_dpo/margin": 1.9641375541687012,
|
|
"fcm_dpo/q_t": 0.3337998390197754,
|
|
"grad_norm": 69.60123443603516,
|
|
"learning_rate": 1.4976736614834662e-07,
|
|
"logits/chosen": 2.1173884868621826,
|
|
"logits/rejected": 1.9571037292480469,
|
|
"logps/chosen": -67.29206848144531,
|
|
"logps/ref_chosen": -72.87019348144531,
|
|
"logps/ref_rejected": -94.48143005371094,
|
|
"logps/rejected": -90.86744689941406,
|
|
"loss": 0.9343,
|
|
"margin_dpo/margin_mean": 1.9641380310058594,
|
|
"margin_dpo/margin_std": 2.791342258453369,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.671201814058957,
|
|
"fcm_dpo/beta": 0.5266600847244263,
|
|
"fcm_dpo/delta": 0.35409846901893616,
|
|
"fcm_dpo/margin": 0.9787815809249878,
|
|
"fcm_dpo/q_t": 0.4062029719352722,
|
|
"grad_norm": 88.73951721191406,
|
|
"learning_rate": 1.4855747752871654e-07,
|
|
"logits/chosen": 1.7228628396987915,
|
|
"logits/rejected": 1.5662447214126587,
|
|
"logps/chosen": -69.3934097290039,
|
|
"logps/ref_chosen": -74.650390625,
|
|
"logps/ref_rejected": -106.89204406738281,
|
|
"logps/rejected": -102.61383819580078,
|
|
"loss": 1.248,
|
|
"margin_dpo/margin_mean": 0.9787817001342773,
|
|
"margin_dpo/margin_std": 2.3466291427612305,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.5298882126808167,
|
|
"fcm_dpo/delta": -0.08345725387334824,
|
|
"fcm_dpo/margin": 1.7450919151306152,
|
|
"fcm_dpo/q_t": 0.3345106840133667,
|
|
"grad_norm": 86.28073120117188,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 1.8607685565948486,
|
|
"logits/rejected": 1.8312535285949707,
|
|
"logps/chosen": -71.2159423828125,
|
|
"logps/ref_chosen": -76.26957702636719,
|
|
"logps/ref_rejected": -89.84994506835938,
|
|
"logps/rejected": -86.5414047241211,
|
|
"loss": 1.0261,
|
|
"margin_dpo/margin_mean": 1.7450923919677734,
|
|
"margin_dpo/margin_std": 2.598353862762451,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.674225245653817,
|
|
"fcm_dpo/beta": 0.5256571769714355,
|
|
"fcm_dpo/delta": -0.2002544403076172,
|
|
"fcm_dpo/margin": 1.9515938758850098,
|
|
"fcm_dpo/q_t": 0.3043816387653351,
|
|
"grad_norm": 57.9477424621582,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 1.8240482807159424,
|
|
"logits/rejected": 1.6946065425872803,
|
|
"logps/chosen": -57.154510498046875,
|
|
"logps/ref_chosen": -62.74647903442383,
|
|
"logps/ref_rejected": -86.395751953125,
|
|
"logps/rejected": -82.75537872314453,
|
|
"loss": 0.84,
|
|
"margin_dpo/margin_mean": 1.9515937566757202,
|
|
"margin_dpo/margin_std": 1.970376968383789,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6757369614512472,
|
|
"fcm_dpo/beta": 0.5028195977210999,
|
|
"fcm_dpo/delta": -0.09459690004587173,
|
|
"fcm_dpo/margin": 1.8610410690307617,
|
|
"fcm_dpo/q_t": 0.32163363695144653,
|
|
"grad_norm": 68.67730712890625,
|
|
"learning_rate": 1.4494497203727843e-07,
|
|
"logits/chosen": 1.4977566003799438,
|
|
"logits/rejected": 1.265622615814209,
|
|
"logps/chosen": -65.15301513671875,
|
|
"logps/ref_chosen": -71.06666564941406,
|
|
"logps/ref_rejected": -103.57111358642578,
|
|
"logps/rejected": -99.51850891113281,
|
|
"loss": 0.9279,
|
|
"margin_dpo/margin_mean": 1.8610403537750244,
|
|
"margin_dpo/margin_std": 2.3173511028289795,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.6772486772486772,
|
|
"fcm_dpo/beta": 0.5102126598358154,
|
|
"fcm_dpo/delta": 0.08946660906076431,
|
|
"fcm_dpo/margin": 1.501331090927124,
|
|
"fcm_dpo/q_t": 0.36332303285598755,
|
|
"grad_norm": 72.64019775390625,
|
|
"learning_rate": 1.4374663593999256e-07,
|
|
"logits/chosen": 1.6274654865264893,
|
|
"logits/rejected": 1.5620205402374268,
|
|
"logps/chosen": -68.0380859375,
|
|
"logps/ref_chosen": -73.400146484375,
|
|
"logps/ref_rejected": -96.34330749511719,
|
|
"logps/rejected": -92.48257446289062,
|
|
"loss": 1.0835,
|
|
"margin_dpo/margin_mean": 1.501330852508545,
|
|
"margin_dpo/margin_std": 2.5533509254455566,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6787603930461074,
|
|
"fcm_dpo/beta": 0.5148006677627563,
|
|
"fcm_dpo/delta": 0.05458805337548256,
|
|
"fcm_dpo/margin": 0.9342146515846252,
|
|
"fcm_dpo/q_t": 0.4083643853664398,
|
|
"grad_norm": 91.0919189453125,
|
|
"learning_rate": 1.4255127197770707e-07,
|
|
"logits/chosen": 1.5526158809661865,
|
|
"logits/rejected": 1.547848105430603,
|
|
"logps/chosen": -88.7008285522461,
|
|
"logps/ref_chosen": -93.66099548339844,
|
|
"logps/ref_rejected": -102.53019714355469,
|
|
"logps/rejected": -98.5042495727539,
|
|
"loss": 1.2504,
|
|
"margin_dpo/margin_mean": 0.9342143535614014,
|
|
"margin_dpo/margin_std": 2.240139961242676,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.5282891988754272,
|
|
"fcm_dpo/delta": 0.14706285297870636,
|
|
"fcm_dpo/margin": 1.348769187927246,
|
|
"fcm_dpo/q_t": 0.36513522267341614,
|
|
"grad_norm": 70.94473266601562,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 1.606363296508789,
|
|
"logits/rejected": 1.3925542831420898,
|
|
"logps/chosen": -56.854942321777344,
|
|
"logps/ref_chosen": -62.52460479736328,
|
|
"logps/ref_rejected": -94.04986572265625,
|
|
"logps/rejected": -89.72897338867188,
|
|
"loss": 1.0421,
|
|
"margin_dpo/margin_mean": 1.348769187927246,
|
|
"margin_dpo/margin_std": 2.1072897911071777,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6817838246409675,
|
|
"fcm_dpo/beta": 0.5469115376472473,
|
|
"fcm_dpo/delta": 0.17872020602226257,
|
|
"fcm_dpo/margin": 1.247553825378418,
|
|
"fcm_dpo/q_t": 0.3751906156539917,
|
|
"grad_norm": 88.25247955322266,
|
|
"learning_rate": 1.4016959412166437e-07,
|
|
"logits/chosen": 1.7541656494140625,
|
|
"logits/rejected": 1.6891770362854004,
|
|
"logps/chosen": -73.66560363769531,
|
|
"logps/ref_chosen": -79.14009094238281,
|
|
"logps/ref_rejected": -93.23919677734375,
|
|
"logps/rejected": -89.01226043701172,
|
|
"loss": 1.1194,
|
|
"margin_dpo/margin_mean": 1.247553825378418,
|
|
"margin_dpo/margin_std": 2.247030258178711,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6832955404383976,
|
|
"fcm_dpo/beta": 0.5521150827407837,
|
|
"fcm_dpo/delta": 0.04130814969539642,
|
|
"fcm_dpo/margin": 1.4708223342895508,
|
|
"fcm_dpo/q_t": 0.35841596126556396,
|
|
"grad_norm": 84.68819427490234,
|
|
"learning_rate": 1.3898334684855645e-07,
|
|
"logits/chosen": 1.6760461330413818,
|
|
"logits/rejected": 1.521647572517395,
|
|
"logps/chosen": -65.01879119873047,
|
|
"logps/ref_chosen": -70.38827514648438,
|
|
"logps/ref_rejected": -95.47691345214844,
|
|
"logps/rejected": -91.57825469970703,
|
|
"loss": 1.0956,
|
|
"margin_dpo/margin_mean": 1.4708220958709717,
|
|
"margin_dpo/margin_std": 2.534980535507202,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6848072562358276,
|
|
"fcm_dpo/beta": 0.5732132196426392,
|
|
"fcm_dpo/delta": 0.1644117832183838,
|
|
"fcm_dpo/margin": 1.2101879119873047,
|
|
"fcm_dpo/q_t": 0.3788529336452484,
|
|
"grad_norm": 90.92646026611328,
|
|
"learning_rate": 1.3780020494988445e-07,
|
|
"logits/chosen": 1.7101848125457764,
|
|
"logits/rejected": 1.611031413078308,
|
|
"logps/chosen": -74.53843688964844,
|
|
"logps/ref_chosen": -79.9207763671875,
|
|
"logps/ref_rejected": -90.20779418945312,
|
|
"logps/rejected": -86.03564453125,
|
|
"loss": 1.1578,
|
|
"margin_dpo/margin_mean": 1.2101881504058838,
|
|
"margin_dpo/margin_std": 2.333146095275879,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6863189720332578,
|
|
"fcm_dpo/beta": 0.5651696920394897,
|
|
"fcm_dpo/delta": -0.05718090757727623,
|
|
"fcm_dpo/margin": 1.5949244499206543,
|
|
"fcm_dpo/q_t": 0.34462159872055054,
|
|
"grad_norm": 76.58309936523438,
|
|
"learning_rate": 1.366202015206706e-07,
|
|
"logits/chosen": 1.6377469301223755,
|
|
"logits/rejected": 1.551138997077942,
|
|
"logps/chosen": -63.92818069458008,
|
|
"logps/ref_chosen": -69.71887969970703,
|
|
"logps/ref_rejected": -82.86952209472656,
|
|
"logps/rejected": -78.67375183105469,
|
|
"loss": 1.0832,
|
|
"margin_dpo/margin_mean": 1.5949238538742065,
|
|
"margin_dpo/margin_std": 2.597616672515869,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.5443323850631714,
|
|
"fcm_dpo/delta": -0.21190257370471954,
|
|
"fcm_dpo/margin": 1.9055442810058594,
|
|
"fcm_dpo/q_t": 0.31488168239593506,
|
|
"grad_norm": 82.54447174072266,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 1.338344931602478,
|
|
"logits/rejected": 1.2727608680725098,
|
|
"logps/chosen": -84.03068542480469,
|
|
"logps/ref_chosen": -89.51481628417969,
|
|
"logps/ref_rejected": -97.93235778808594,
|
|
"logps/rejected": -94.35377502441406,
|
|
"loss": 0.9113,
|
|
"margin_dpo/margin_mean": 1.9055445194244385,
|
|
"margin_dpo/margin_std": 2.519261121749878,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6893424036281179,
|
|
"fcm_dpo/beta": 0.539644181728363,
|
|
"fcm_dpo/delta": 0.01813328266143799,
|
|
"fcm_dpo/margin": 1.5422388315200806,
|
|
"fcm_dpo/q_t": 0.34008723497390747,
|
|
"grad_norm": 80.02922058105469,
|
|
"learning_rate": 1.3426974201083439e-07,
|
|
"logits/chosen": 1.7968485355377197,
|
|
"logits/rejected": 1.6787824630737305,
|
|
"logps/chosen": -69.20973205566406,
|
|
"logps/ref_chosen": -74.60527038574219,
|
|
"logps/ref_rejected": -97.98377227783203,
|
|
"logps/rejected": -94.13047790527344,
|
|
"loss": 0.962,
|
|
"margin_dpo/margin_mean": 1.5422389507293701,
|
|
"margin_dpo/margin_std": 2.0328426361083984,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.690854119425548,
|
|
"fcm_dpo/beta": 0.5443383455276489,
|
|
"fcm_dpo/delta": 0.07683409005403519,
|
|
"fcm_dpo/margin": 1.4279565811157227,
|
|
"fcm_dpo/q_t": 0.3532698154449463,
|
|
"grad_norm": 71.82817077636719,
|
|
"learning_rate": 1.3309935167761717e-07,
|
|
"logits/chosen": 1.6997053623199463,
|
|
"logits/rejected": 1.5273003578186035,
|
|
"logps/chosen": -58.893951416015625,
|
|
"logps/ref_chosen": -63.927032470703125,
|
|
"logps/ref_rejected": -83.15243530273438,
|
|
"logps/rejected": -79.54731750488281,
|
|
"loss": 0.9919,
|
|
"margin_dpo/margin_mean": 1.4279568195343018,
|
|
"margin_dpo/margin_std": 1.9557170867919922,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6923658352229781,
|
|
"fcm_dpo/beta": 0.5384401082992554,
|
|
"fcm_dpo/delta": -0.12141396105289459,
|
|
"fcm_dpo/margin": 1.77810800075531,
|
|
"fcm_dpo/q_t": 0.3204982280731201,
|
|
"grad_norm": 71.2940444946289,
|
|
"learning_rate": 1.3193223130682936e-07,
|
|
"logits/chosen": 1.7315986156463623,
|
|
"logits/rejected": 1.5027096271514893,
|
|
"logps/chosen": -62.00345993041992,
|
|
"logps/ref_chosen": -67.68869018554688,
|
|
"logps/ref_rejected": -104.40899658203125,
|
|
"logps/rejected": -100.50187683105469,
|
|
"loss": 0.9192,
|
|
"margin_dpo/margin_mean": 1.77810800075531,
|
|
"margin_dpo/margin_std": 2.1941637992858887,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6938775510204082,
|
|
"fcm_dpo/beta": 0.5484590530395508,
|
|
"fcm_dpo/delta": -0.1108066737651825,
|
|
"fcm_dpo/margin": 1.703685998916626,
|
|
"fcm_dpo/q_t": 0.33259010314941406,
|
|
"grad_norm": 87.72430419921875,
|
|
"learning_rate": 1.3076841354533658e-07,
|
|
"logits/chosen": 1.8845622539520264,
|
|
"logits/rejected": 1.785888433456421,
|
|
"logps/chosen": -78.22889709472656,
|
|
"logps/ref_chosen": -83.82363891601562,
|
|
"logps/ref_rejected": -103.75938415527344,
|
|
"logps/rejected": -99.86831665039062,
|
|
"loss": 0.9903,
|
|
"margin_dpo/margin_mean": 1.7036855220794678,
|
|
"margin_dpo/margin_std": 2.135005474090576,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.5242894887924194,
|
|
"fcm_dpo/delta": -0.02234676666557789,
|
|
"fcm_dpo/margin": 1.6595816612243652,
|
|
"fcm_dpo/q_t": 0.35441914200782776,
|
|
"grad_norm": 88.8072280883789,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 1.696329951286316,
|
|
"logits/rejected": 1.4185569286346436,
|
|
"logps/chosen": -74.41940307617188,
|
|
"logps/ref_chosen": -79.4836654663086,
|
|
"logps/ref_rejected": -112.31745910644531,
|
|
"logps/rejected": -108.91278076171875,
|
|
"loss": 1.0182,
|
|
"margin_dpo/margin_mean": 1.6595821380615234,
|
|
"margin_dpo/margin_std": 2.5861330032348633,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6969009826152683,
|
|
"fcm_dpo/beta": 0.5151861906051636,
|
|
"fcm_dpo/delta": -0.010441526770591736,
|
|
"fcm_dpo/margin": 1.6554614305496216,
|
|
"fcm_dpo/q_t": 0.3441765606403351,
|
|
"grad_norm": 74.9832534790039,
|
|
"learning_rate": 1.2845081597488286e-07,
|
|
"logits/chosen": 2.076706647872925,
|
|
"logits/rejected": 1.9077098369598389,
|
|
"logps/chosen": -58.701873779296875,
|
|
"logps/ref_chosen": -64.28482055664062,
|
|
"logps/ref_rejected": -93.73818969726562,
|
|
"logps/rejected": -89.81069946289062,
|
|
"loss": 1.0235,
|
|
"margin_dpo/margin_mean": 1.655461311340332,
|
|
"margin_dpo/margin_std": 2.43689227104187,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"fcm_dpo/beta": 0.5152299404144287,
|
|
"fcm_dpo/delta": -0.07396998256444931,
|
|
"fcm_dpo/margin": 1.7776132822036743,
|
|
"fcm_dpo/q_t": 0.3375456929206848,
|
|
"grad_norm": 76.16434478759766,
|
|
"learning_rate": 1.27297100994108e-07,
|
|
"logits/chosen": 1.8015167713165283,
|
|
"logits/rejected": 1.6600192785263062,
|
|
"logps/chosen": -71.72679901123047,
|
|
"logps/ref_chosen": -77.15335083007812,
|
|
"logps/ref_rejected": -91.12923431396484,
|
|
"logps/rejected": -87.48030090332031,
|
|
"loss": 0.9983,
|
|
"margin_dpo/margin_mean": 1.7776132822036743,
|
|
"margin_dpo/margin_std": 2.624232769012451,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6999244142101285,
|
|
"fcm_dpo/beta": 0.5137749910354614,
|
|
"fcm_dpo/delta": 0.03142701834440231,
|
|
"fcm_dpo/margin": 1.5948131084442139,
|
|
"fcm_dpo/q_t": 0.3569672405719757,
|
|
"grad_norm": 85.6458511352539,
|
|
"learning_rate": 1.2614681827718695e-07,
|
|
"logits/chosen": 1.7675786018371582,
|
|
"logits/rejected": 1.7552111148834229,
|
|
"logps/chosen": -82.32722473144531,
|
|
"logps/ref_chosen": -87.58760070800781,
|
|
"logps/ref_rejected": -87.97022247314453,
|
|
"logps/rejected": -84.30465698242188,
|
|
"loss": 1.0237,
|
|
"margin_dpo/margin_mean": 1.594813585281372,
|
|
"margin_dpo/margin_std": 2.4094183444976807,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.7014361300075586,
|
|
"fcm_dpo/beta": 0.5252482891082764,
|
|
"fcm_dpo/delta": -0.01944570243358612,
|
|
"fcm_dpo/margin": 1.6464765071868896,
|
|
"fcm_dpo/q_t": 0.35834911465644836,
|
|
"grad_norm": 83.27401733398438,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 1.5235177278518677,
|
|
"logits/rejected": 1.4369018077850342,
|
|
"logps/chosen": -70.7602310180664,
|
|
"logps/ref_chosen": -75.83175659179688,
|
|
"logps/ref_rejected": -84.4811019897461,
|
|
"logps/rejected": -81.05604553222656,
|
|
"loss": 1.1129,
|
|
"margin_dpo/margin_mean": 1.6464769840240479,
|
|
"margin_dpo/margin_std": 2.8207643032073975,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.5253424644470215,
|
|
"fcm_dpo/delta": 0.08905892819166183,
|
|
"fcm_dpo/margin": 1.4612863063812256,
|
|
"fcm_dpo/q_t": 0.3681473135948181,
|
|
"grad_norm": 88.10529327392578,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 1.891004204750061,
|
|
"logits/rejected": 1.7700917720794678,
|
|
"logps/chosen": -71.99024963378906,
|
|
"logps/ref_chosen": -77.057861328125,
|
|
"logps/ref_rejected": -102.75727844238281,
|
|
"logps/rejected": -99.15094757080078,
|
|
"loss": 1.0896,
|
|
"margin_dpo/margin_mean": 1.461285948753357,
|
|
"margin_dpo/margin_std": 2.4996771812438965,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7044595616024187,
|
|
"fcm_dpo/beta": 0.5524252653121948,
|
|
"fcm_dpo/delta": 0.3179141581058502,
|
|
"fcm_dpo/margin": 0.9951980710029602,
|
|
"fcm_dpo/q_t": 0.398731529712677,
|
|
"grad_norm": 108.6539535522461,
|
|
"learning_rate": 1.2271688498291334e-07,
|
|
"logits/chosen": 1.800580620765686,
|
|
"logits/rejected": 1.824805736541748,
|
|
"logps/chosen": -86.94599914550781,
|
|
"logps/ref_chosen": -91.7751693725586,
|
|
"logps/ref_rejected": -90.2679443359375,
|
|
"logps/rejected": -86.43397521972656,
|
|
"loss": 1.1914,
|
|
"margin_dpo/margin_mean": 0.9951978921890259,
|
|
"margin_dpo/margin_std": 2.0894367694854736,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.7059712773998488,
|
|
"fcm_dpo/beta": 0.5498294234275818,
|
|
"fcm_dpo/delta": -0.11944600939750671,
|
|
"fcm_dpo/margin": 1.7410266399383545,
|
|
"fcm_dpo/q_t": 0.3283127546310425,
|
|
"grad_norm": 68.84928131103516,
|
|
"learning_rate": 1.2158065210664848e-07,
|
|
"logits/chosen": 1.9185365438461304,
|
|
"logits/rejected": 1.6363033056259155,
|
|
"logps/chosen": -59.54905319213867,
|
|
"logps/ref_chosen": -64.77557373046875,
|
|
"logps/ref_rejected": -102.58863830566406,
|
|
"logps/rejected": -99.10314178466797,
|
|
"loss": 0.9153,
|
|
"margin_dpo/margin_mean": 1.741027593612671,
|
|
"margin_dpo/margin_std": 2.2309818267822266,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.7074829931972789,
|
|
"fcm_dpo/beta": 0.5375959277153015,
|
|
"fcm_dpo/delta": -0.19219672679901123,
|
|
"fcm_dpo/margin": 1.9023988246917725,
|
|
"fcm_dpo/q_t": 0.31552615761756897,
|
|
"grad_norm": 79.92118072509766,
|
|
"learning_rate": 1.204480113956011e-07,
|
|
"logits/chosen": 1.9349050521850586,
|
|
"logits/rejected": 1.9412510395050049,
|
|
"logps/chosen": -77.01482391357422,
|
|
"logps/ref_chosen": -82.22445678710938,
|
|
"logps/ref_rejected": -92.99041748046875,
|
|
"logps/rejected": -89.68318176269531,
|
|
"loss": 0.8997,
|
|
"margin_dpo/margin_mean": 1.9023983478546143,
|
|
"margin_dpo/margin_std": 2.337515354156494,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.708994708994709,
|
|
"fcm_dpo/beta": 0.5282810926437378,
|
|
"fcm_dpo/delta": 0.1127682775259018,
|
|
"fcm_dpo/margin": 1.4049665927886963,
|
|
"fcm_dpo/q_t": 0.3656489849090576,
|
|
"grad_norm": 82.08759307861328,
|
|
"learning_rate": 1.1931899453216697e-07,
|
|
"logits/chosen": 1.9689496755599976,
|
|
"logits/rejected": 1.948734998703003,
|
|
"logps/chosen": -70.78817749023438,
|
|
"logps/ref_chosen": -75.93031311035156,
|
|
"logps/ref_rejected": -92.26559448242188,
|
|
"logps/rejected": -88.52842712402344,
|
|
"loss": 1.0365,
|
|
"margin_dpo/margin_mean": 1.4049668312072754,
|
|
"margin_dpo/margin_std": 2.103820562362671,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.5462840795516968,
|
|
"fcm_dpo/delta": 0.07731571793556213,
|
|
"fcm_dpo/margin": 1.425428032875061,
|
|
"fcm_dpo/q_t": 0.3547430634498596,
|
|
"grad_norm": 66.45403289794922,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 1.4768280982971191,
|
|
"logits/rejected": 1.3471271991729736,
|
|
"logps/chosen": -60.886566162109375,
|
|
"logps/ref_chosen": -65.86345672607422,
|
|
"logps/ref_rejected": -85.89832305908203,
|
|
"logps/rejected": -82.34687042236328,
|
|
"loss": 1.0604,
|
|
"margin_dpo/margin_mean": 1.4254283905029297,
|
|
"margin_dpo/margin_std": 2.2816340923309326,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7120181405895691,
|
|
"fcm_dpo/beta": 0.53891521692276,
|
|
"fcm_dpo/delta": -0.16177864372730255,
|
|
"fcm_dpo/margin": 1.847838044166565,
|
|
"fcm_dpo/q_t": 0.32141464948654175,
|
|
"grad_norm": 71.49030303955078,
|
|
"learning_rate": 1.1707195857000215e-07,
|
|
"logits/chosen": 1.760124921798706,
|
|
"logits/rejected": 1.6479649543762207,
|
|
"logps/chosen": -68.9094467163086,
|
|
"logps/ref_chosen": -74.3460922241211,
|
|
"logps/ref_rejected": -93.43672943115234,
|
|
"logps/rejected": -89.84793090820312,
|
|
"loss": 0.9497,
|
|
"margin_dpo/margin_mean": 1.8478378057479858,
|
|
"margin_dpo/margin_std": 2.4519433975219727,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.7135298563869993,
|
|
"fcm_dpo/beta": 0.5203032493591309,
|
|
"fcm_dpo/delta": -0.18647995591163635,
|
|
"fcm_dpo/margin": 1.9560319185256958,
|
|
"fcm_dpo/q_t": 0.33989793062210083,
|
|
"grad_norm": 79.76250457763672,
|
|
"learning_rate": 1.1595400232569768e-07,
|
|
"logits/chosen": 2.001483201980591,
|
|
"logits/rejected": 1.8946865797042847,
|
|
"logps/chosen": -69.1002426147461,
|
|
"logps/ref_chosen": -74.75674438476562,
|
|
"logps/ref_rejected": -95.18183135986328,
|
|
"logps/rejected": -91.48135375976562,
|
|
"loss": 1.0331,
|
|
"margin_dpo/margin_mean": 1.9560320377349854,
|
|
"margin_dpo/margin_std": 3.0834403038024902,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.7150415721844293,
|
|
"fcm_dpo/beta": 0.5078408718109131,
|
|
"fcm_dpo/delta": -0.03109436295926571,
|
|
"fcm_dpo/margin": 1.7292225360870361,
|
|
"fcm_dpo/q_t": 0.34417200088500977,
|
|
"grad_norm": 69.86459350585938,
|
|
"learning_rate": 1.1483979563610069e-07,
|
|
"logits/chosen": 1.8151731491088867,
|
|
"logits/rejected": 1.6192166805267334,
|
|
"logps/chosen": -65.9654312133789,
|
|
"logps/ref_chosen": -71.65933227539062,
|
|
"logps/ref_rejected": -109.99200439453125,
|
|
"logps/rejected": -106.02731323242188,
|
|
"loss": 1.0484,
|
|
"margin_dpo/margin_mean": 1.729222059249878,
|
|
"margin_dpo/margin_std": 2.8078012466430664,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.7165532879818595,
|
|
"fcm_dpo/beta": 0.5117301940917969,
|
|
"fcm_dpo/delta": 0.06498853117227554,
|
|
"fcm_dpo/margin": 1.5440913438796997,
|
|
"fcm_dpo/q_t": 0.35763585567474365,
|
|
"grad_norm": 70.37702941894531,
|
|
"learning_rate": 1.1372936966796709e-07,
|
|
"logits/chosen": 2.007793426513672,
|
|
"logits/rejected": 1.824831485748291,
|
|
"logps/chosen": -61.00871276855469,
|
|
"logps/ref_chosen": -65.91990661621094,
|
|
"logps/ref_rejected": -89.09432983398438,
|
|
"logps/rejected": -85.72722625732422,
|
|
"loss": 1.0608,
|
|
"margin_dpo/margin_mean": 1.5440911054611206,
|
|
"margin_dpo/margin_std": 2.4802653789520264,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.4982389807701111,
|
|
"fcm_dpo/delta": -0.17701946198940277,
|
|
"fcm_dpo/margin": 2.021491765975952,
|
|
"fcm_dpo/q_t": 0.3174302577972412,
|
|
"grad_norm": 67.81841278076172,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 1.543381690979004,
|
|
"logits/rejected": 1.5033472776412964,
|
|
"logps/chosen": -74.09674835205078,
|
|
"logps/ref_chosen": -79.02459716796875,
|
|
"logps/ref_rejected": -107.33058166503906,
|
|
"logps/rejected": -104.42422485351562,
|
|
"loss": 0.9131,
|
|
"margin_dpo/margin_mean": 2.021491050720215,
|
|
"margin_dpo/margin_std": 2.585183620452881,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7195767195767195,
|
|
"fcm_dpo/beta": 0.5055756568908691,
|
|
"fcm_dpo/delta": 0.14022918045520782,
|
|
"fcm_dpo/margin": 1.424285888671875,
|
|
"fcm_dpo/q_t": 0.367862343788147,
|
|
"grad_norm": 89.28751373291016,
|
|
"learning_rate": 1.1151998403347243e-07,
|
|
"logits/chosen": 1.6474313735961914,
|
|
"logits/rejected": 1.612290859222412,
|
|
"logps/chosen": -88.98843383789062,
|
|
"logps/ref_chosen": -93.72602844238281,
|
|
"logps/ref_rejected": -94.390625,
|
|
"logps/rejected": -91.07731628417969,
|
|
"loss": 1.0766,
|
|
"margin_dpo/margin_mean": 1.4242854118347168,
|
|
"margin_dpo/margin_std": 2.3705697059631348,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7210884353741497,
|
|
"fcm_dpo/beta": 0.5270170569419861,
|
|
"fcm_dpo/delta": 0.21027953922748566,
|
|
"fcm_dpo/margin": 1.2401819229125977,
|
|
"fcm_dpo/q_t": 0.3814718723297119,
|
|
"grad_norm": 92.97918701171875,
|
|
"learning_rate": 1.1042108616837692e-07,
|
|
"logits/chosen": 2.0096664428710938,
|
|
"logits/rejected": 1.9469687938690186,
|
|
"logps/chosen": -71.95021057128906,
|
|
"logps/ref_chosen": -76.51399993896484,
|
|
"logps/ref_rejected": -99.14356231689453,
|
|
"logps/rejected": -95.81996154785156,
|
|
"loss": 1.2076,
|
|
"margin_dpo/margin_mean": 1.240182638168335,
|
|
"margin_dpo/margin_std": 2.572378158569336,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7226001511715797,
|
|
"fcm_dpo/beta": 0.5363619327545166,
|
|
"fcm_dpo/delta": 0.07953216135501862,
|
|
"fcm_dpo/margin": 1.4475769996643066,
|
|
"fcm_dpo/q_t": 0.37544333934783936,
|
|
"grad_norm": 82.0169677734375,
|
|
"learning_rate": 1.0932609262554746e-07,
|
|
"logits/chosen": 1.8754984140396118,
|
|
"logits/rejected": 1.8362398147583008,
|
|
"logps/chosen": -73.00425720214844,
|
|
"logps/ref_chosen": -77.95186614990234,
|
|
"logps/ref_rejected": -69.77754211425781,
|
|
"logps/rejected": -66.27751159667969,
|
|
"loss": 1.2311,
|
|
"margin_dpo/margin_mean": 1.4475772380828857,
|
|
"margin_dpo/margin_std": 3.051959991455078,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7241118669690099,
|
|
"fcm_dpo/beta": 0.5497398376464844,
|
|
"fcm_dpo/delta": 0.15630201995372772,
|
|
"fcm_dpo/margin": 1.2815978527069092,
|
|
"fcm_dpo/q_t": 0.3775695264339447,
|
|
"grad_norm": 88.32197570800781,
|
|
"learning_rate": 1.0823503403430734e-07,
|
|
"logits/chosen": 1.392703652381897,
|
|
"logits/rejected": 1.299776315689087,
|
|
"logps/chosen": -71.73004150390625,
|
|
"logps/ref_chosen": -76.56551361083984,
|
|
"logps/ref_rejected": -84.33758544921875,
|
|
"logps/rejected": -80.7837142944336,
|
|
"loss": 1.2199,
|
|
"margin_dpo/margin_mean": 1.2815985679626465,
|
|
"margin_dpo/margin_std": 2.66666841506958,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.5562114715576172,
|
|
"fcm_dpo/delta": -0.12454620003700256,
|
|
"fcm_dpo/margin": 1.72718346118927,
|
|
"fcm_dpo/q_t": 0.32873159646987915,
|
|
"grad_norm": 84.74827575683594,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 1.7120177745819092,
|
|
"logits/rejected": 1.7063748836517334,
|
|
"logps/chosen": -74.99505615234375,
|
|
"logps/ref_chosen": -80.15884399414062,
|
|
"logps/ref_rejected": -84.88697814941406,
|
|
"logps/rejected": -81.45037841796875,
|
|
"loss": 1.0862,
|
|
"margin_dpo/margin_mean": 1.7271829843521118,
|
|
"margin_dpo/margin_std": 2.7646212577819824,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.72713529856387,
|
|
"fcm_dpo/beta": 0.5491403341293335,
|
|
"fcm_dpo/delta": 0.01012316346168518,
|
|
"fcm_dpo/margin": 1.5297075510025024,
|
|
"fcm_dpo/q_t": 0.3679242432117462,
|
|
"grad_norm": 86.62976837158203,
|
|
"learning_rate": 1.0606484367268906e-07,
|
|
"logits/chosen": 1.5805827379226685,
|
|
"logits/rejected": 1.5588011741638184,
|
|
"logps/chosen": -79.52948760986328,
|
|
"logps/ref_chosen": -84.56254577636719,
|
|
"logps/ref_rejected": -90.06451416015625,
|
|
"logps/rejected": -86.56116485595703,
|
|
"loss": 1.1362,
|
|
"margin_dpo/margin_mean": 1.5297071933746338,
|
|
"margin_dpo/margin_std": 2.896221399307251,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7286470143613001,
|
|
"fcm_dpo/beta": 0.5414802432060242,
|
|
"fcm_dpo/delta": -0.08684663474559784,
|
|
"fcm_dpo/margin": 1.715275526046753,
|
|
"fcm_dpo/q_t": 0.3422348201274872,
|
|
"grad_norm": 91.90193939208984,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 1.6824023723602295,
|
|
"logits/rejected": 1.4254873991012573,
|
|
"logps/chosen": -74.20223999023438,
|
|
"logps/ref_chosen": -78.88141632080078,
|
|
"logps/ref_rejected": -125.41990661621094,
|
|
"logps/rejected": -122.45600128173828,
|
|
"loss": 1.0844,
|
|
"margin_dpo/margin_mean": 1.7152750492095947,
|
|
"margin_dpo/margin_std": 2.783423900604248,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"fcm_dpo/beta": 0.5211665630340576,
|
|
"fcm_dpo/delta": -0.18208253383636475,
|
|
"fcm_dpo/margin": 1.9437726736068726,
|
|
"fcm_dpo/q_t": 0.3126748204231262,
|
|
"grad_norm": 71.99545288085938,
|
|
"learning_rate": 1.0391075790138232e-07,
|
|
"logits/chosen": 1.7354581356048584,
|
|
"logits/rejected": 1.5467109680175781,
|
|
"logps/chosen": -67.4843521118164,
|
|
"logps/ref_chosen": -72.690185546875,
|
|
"logps/ref_rejected": -98.37237548828125,
|
|
"logps/rejected": -95.11031341552734,
|
|
"loss": 0.8906,
|
|
"margin_dpo/margin_mean": 1.9437729120254517,
|
|
"margin_dpo/margin_std": 2.387289524078369,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.7316704459561603,
|
|
"fcm_dpo/beta": 0.5343146920204163,
|
|
"fcm_dpo/delta": 0.16501466929912567,
|
|
"fcm_dpo/margin": 1.2994334697723389,
|
|
"fcm_dpo/q_t": 0.36794763803482056,
|
|
"grad_norm": 87.20106506347656,
|
|
"learning_rate": 1.0283982962570681e-07,
|
|
"logits/chosen": 1.7466881275177002,
|
|
"logits/rejected": 1.7247745990753174,
|
|
"logps/chosen": -68.83059692382812,
|
|
"logps/ref_chosen": -73.98435974121094,
|
|
"logps/ref_rejected": -89.99178314208984,
|
|
"logps/rejected": -86.137451171875,
|
|
"loss": 1.0497,
|
|
"margin_dpo/margin_mean": 1.299433946609497,
|
|
"margin_dpo/margin_std": 2.0427210330963135,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.5269999504089355,
|
|
"fcm_dpo/delta": -0.00029647350311279297,
|
|
"fcm_dpo/margin": 1.6050857305526733,
|
|
"fcm_dpo/q_t": 0.3547622263431549,
|
|
"grad_norm": 83.7960205078125,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 1.9939520359039307,
|
|
"logits/rejected": 1.9123587608337402,
|
|
"logps/chosen": -72.948486328125,
|
|
"logps/ref_chosen": -78.0927963256836,
|
|
"logps/ref_rejected": -89.14010620117188,
|
|
"logps/rejected": -85.60088348388672,
|
|
"loss": 1.0126,
|
|
"margin_dpo/margin_mean": 1.605086326599121,
|
|
"margin_dpo/margin_std": 2.3685264587402344,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7346938775510204,
|
|
"fcm_dpo/beta": 0.5255827903747559,
|
|
"fcm_dpo/delta": -0.17571258544921875,
|
|
"fcm_dpo/margin": 1.917954444885254,
|
|
"fcm_dpo/q_t": 0.3390832543373108,
|
|
"grad_norm": 74.30439758300781,
|
|
"learning_rate": 1.007103520743035e-07,
|
|
"logits/chosen": 1.6516636610031128,
|
|
"logits/rejected": 1.4582644701004028,
|
|
"logps/chosen": -69.16941833496094,
|
|
"logps/ref_chosen": -73.74685668945312,
|
|
"logps/ref_rejected": -107.752685546875,
|
|
"logps/rejected": -105.09320068359375,
|
|
"loss": 1.0416,
|
|
"margin_dpo/margin_mean": 1.917954921722412,
|
|
"margin_dpo/margin_std": 3.016083240509033,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7362055933484505,
|
|
"fcm_dpo/beta": 0.5252140760421753,
|
|
"fcm_dpo/delta": 0.09578922390937805,
|
|
"fcm_dpo/margin": 1.4492162466049194,
|
|
"fcm_dpo/q_t": 0.3582766056060791,
|
|
"grad_norm": 81.06444549560547,
|
|
"learning_rate": 9.965186236464046e-08,
|
|
"logits/chosen": 1.869274377822876,
|
|
"logits/rejected": 1.7187423706054688,
|
|
"logps/chosen": -74.88436889648438,
|
|
"logps/ref_chosen": -79.57780456542969,
|
|
"logps/ref_rejected": -102.2916259765625,
|
|
"logps/rejected": -99.04740905761719,
|
|
"loss": 1.058,
|
|
"margin_dpo/margin_mean": 1.4492161273956299,
|
|
"margin_dpo/margin_std": 2.371695041656494,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7377173091458806,
|
|
"fcm_dpo/beta": 0.5243767499923706,
|
|
"fcm_dpo/delta": -0.1707746684551239,
|
|
"fcm_dpo/margin": 1.904654622077942,
|
|
"fcm_dpo/q_t": 0.3486997187137604,
|
|
"grad_norm": 81.35235595703125,
|
|
"learning_rate": 9.859757821558337e-08,
|
|
"logits/chosen": 1.7296903133392334,
|
|
"logits/rejected": 1.5843555927276611,
|
|
"logps/chosen": -75.58885955810547,
|
|
"logps/ref_chosen": -80.62767791748047,
|
|
"logps/ref_rejected": -100.4541015625,
|
|
"logps/rejected": -97.3199462890625,
|
|
"loss": 1.0677,
|
|
"margin_dpo/margin_mean": 1.9046547412872314,
|
|
"margin_dpo/margin_std": 3.0004210472106934,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7392290249433107,
|
|
"fcm_dpo/beta": 0.5423703789710999,
|
|
"fcm_dpo/delta": 0.43078815937042236,
|
|
"fcm_dpo/margin": 0.8081073760986328,
|
|
"fcm_dpo/q_t": 0.4194217324256897,
|
|
"grad_norm": 92.3609390258789,
|
|
"learning_rate": 9.754752911772615e-08,
|
|
"logits/chosen": 1.6849393844604492,
|
|
"logits/rejected": 1.6025831699371338,
|
|
"logps/chosen": -80.61341857910156,
|
|
"logps/ref_chosen": -85.39521026611328,
|
|
"logps/ref_rejected": -101.97309875488281,
|
|
"logps/rejected": -97.9994125366211,
|
|
"loss": 1.2433,
|
|
"margin_dpo/margin_mean": 0.8081076741218567,
|
|
"margin_dpo/margin_std": 1.996825933456421,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.5469303131103516,
|
|
"fcm_dpo/delta": -0.05695779621601105,
|
|
"fcm_dpo/margin": 1.6473114490509033,
|
|
"fcm_dpo/q_t": 0.3572388291358948,
|
|
"grad_norm": 90.47389221191406,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 2.277765989303589,
|
|
"logits/rejected": 2.227505683898926,
|
|
"logps/chosen": -72.671875,
|
|
"logps/ref_chosen": -77.75590515136719,
|
|
"logps/ref_rejected": -88.98885345458984,
|
|
"logps/rejected": -85.55213928222656,
|
|
"loss": 1.1484,
|
|
"margin_dpo/margin_mean": 1.6473113298416138,
|
|
"margin_dpo/margin_std": 2.9487578868865967,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7422524565381708,
|
|
"fcm_dpo/beta": 0.5318253040313721,
|
|
"fcm_dpo/delta": -0.10165757685899734,
|
|
"fcm_dpo/margin": 1.7601425647735596,
|
|
"fcm_dpo/q_t": 0.33336108922958374,
|
|
"grad_norm": 78.02272033691406,
|
|
"learning_rate": 9.546025344484868e-08,
|
|
"logits/chosen": 1.6132152080535889,
|
|
"logits/rejected": 1.4740610122680664,
|
|
"logps/chosen": -69.28722381591797,
|
|
"logps/ref_chosen": -74.33360290527344,
|
|
"logps/ref_rejected": -91.4105224609375,
|
|
"logps/rejected": -88.12428283691406,
|
|
"loss": 0.9392,
|
|
"margin_dpo/margin_mean": 1.7601426839828491,
|
|
"margin_dpo/margin_std": 2.289900302886963,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7437641723356009,
|
|
"fcm_dpo/beta": 0.5352230072021484,
|
|
"fcm_dpo/delta": -0.038240764290094376,
|
|
"fcm_dpo/margin": 1.1772292852401733,
|
|
"fcm_dpo/q_t": 0.39304035902023315,
|
|
"grad_norm": 98.42277526855469,
|
|
"learning_rate": 9.442308525541589e-08,
|
|
"logits/chosen": 1.5843994617462158,
|
|
"logits/rejected": 1.4141943454742432,
|
|
"logps/chosen": -80.89947509765625,
|
|
"logps/ref_chosen": -85.14178466796875,
|
|
"logps/ref_rejected": -103.44204711914062,
|
|
"logps/rejected": -100.37696838378906,
|
|
"loss": 1.264,
|
|
"margin_dpo/margin_mean": 1.177229642868042,
|
|
"margin_dpo/margin_std": 2.6634840965270996,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.745275888133031,
|
|
"fcm_dpo/beta": 0.5297720432281494,
|
|
"fcm_dpo/delta": -0.07003698498010635,
|
|
"fcm_dpo/margin": 1.7245081663131714,
|
|
"fcm_dpo/q_t": 0.3485276997089386,
|
|
"grad_norm": 82.64747619628906,
|
|
"learning_rate": 9.339026888672468e-08,
|
|
"logits/chosen": 2.035825490951538,
|
|
"logits/rejected": 1.8640735149383545,
|
|
"logps/chosen": -70.65989685058594,
|
|
"logps/ref_chosen": -75.81439208984375,
|
|
"logps/ref_rejected": -95.30766296386719,
|
|
"logps/rejected": -91.87767028808594,
|
|
"loss": 1.042,
|
|
"margin_dpo/margin_mean": 1.7245078086853027,
|
|
"margin_dpo/margin_std": 2.7354183197021484,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7467876039304611,
|
|
"fcm_dpo/beta": 0.5313657522201538,
|
|
"fcm_dpo/delta": 0.0808698982000351,
|
|
"fcm_dpo/margin": 1.459385633468628,
|
|
"fcm_dpo/q_t": 0.3662447929382324,
|
|
"grad_norm": 103.77287292480469,
|
|
"learning_rate": 9.236183322886945e-08,
|
|
"logits/chosen": 1.5601379871368408,
|
|
"logits/rejected": 1.5064457654953003,
|
|
"logps/chosen": -88.89629364013672,
|
|
"logps/ref_chosen": -93.83562469482422,
|
|
"logps/ref_rejected": -112.21142578125,
|
|
"logps/rejected": -108.73147583007812,
|
|
"loss": 1.1484,
|
|
"margin_dpo/margin_mean": 1.459385633468628,
|
|
"margin_dpo/margin_std": 2.74635648727417,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.5466286540031433,
|
|
"fcm_dpo/delta": 0.10114302486181259,
|
|
"fcm_dpo/margin": 1.3804593086242676,
|
|
"fcm_dpo/q_t": 0.36845171451568604,
|
|
"grad_norm": 77.80597686767578,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 1.9278578758239746,
|
|
"logits/rejected": 1.7768769264221191,
|
|
"logps/chosen": -63.57561492919922,
|
|
"logps/ref_chosen": -68.52467346191406,
|
|
"logps/ref_rejected": -89.65379333496094,
|
|
"logps/rejected": -86.08518981933594,
|
|
"loss": 1.1418,
|
|
"margin_dpo/margin_mean": 1.3804597854614258,
|
|
"margin_dpo/margin_std": 2.606942653656006,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7498110355253212,
|
|
"fcm_dpo/beta": 0.5298216938972473,
|
|
"fcm_dpo/delta": -0.11912409216165543,
|
|
"fcm_dpo/margin": 1.800965428352356,
|
|
"fcm_dpo/q_t": 0.34737032651901245,
|
|
"grad_norm": 77.42396545410156,
|
|
"learning_rate": 9.031821899254797e-08,
|
|
"logits/chosen": 1.789339542388916,
|
|
"logits/rejected": 1.6037667989730835,
|
|
"logps/chosen": -68.10794830322266,
|
|
"logps/ref_chosen": -73.13618469238281,
|
|
"logps/ref_rejected": -111.50930786132812,
|
|
"logps/rejected": -108.28204345703125,
|
|
"loss": 1.0363,
|
|
"margin_dpo/margin_mean": 1.8009655475616455,
|
|
"margin_dpo/margin_std": 2.837244987487793,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7513227513227513,
|
|
"fcm_dpo/beta": 0.51277756690979,
|
|
"fcm_dpo/delta": -0.21961292624473572,
|
|
"fcm_dpo/margin": 2.0381200313568115,
|
|
"fcm_dpo/q_t": 0.32603585720062256,
|
|
"grad_norm": 73.66954040527344,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": 2.010392427444458,
|
|
"logits/rejected": 1.8880023956298828,
|
|
"logps/chosen": -84.01908874511719,
|
|
"logps/ref_chosen": -88.71475219726562,
|
|
"logps/ref_rejected": -105.74935913085938,
|
|
"logps/rejected": -103.09181213378906,
|
|
"loss": 0.9522,
|
|
"margin_dpo/margin_mean": 2.0381202697753906,
|
|
"margin_dpo/margin_std": 2.9248361587524414,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7528344671201814,
|
|
"fcm_dpo/beta": 0.5155347585678101,
|
|
"fcm_dpo/delta": 0.06701716780662537,
|
|
"fcm_dpo/margin": 1.5286895036697388,
|
|
"fcm_dpo/q_t": 0.3693251311779022,
|
|
"grad_norm": 89.92591857910156,
|
|
"learning_rate": 8.829247120198563e-08,
|
|
"logits/chosen": 1.8527765274047852,
|
|
"logits/rejected": 1.798376441001892,
|
|
"logps/chosen": -78.18376922607422,
|
|
"logps/ref_chosen": -83.3353271484375,
|
|
"logps/ref_rejected": -89.34941864013672,
|
|
"logps/rejected": -85.72654724121094,
|
|
"loss": 1.0684,
|
|
"margin_dpo/margin_mean": 1.5286893844604492,
|
|
"margin_dpo/margin_std": 2.6188063621520996,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7543461829176115,
|
|
"fcm_dpo/beta": 0.5106238722801208,
|
|
"fcm_dpo/delta": -0.10215874761343002,
|
|
"fcm_dpo/margin": 1.8459928035736084,
|
|
"fcm_dpo/q_t": 0.34619784355163574,
|
|
"grad_norm": 78.80824279785156,
|
|
"learning_rate": 8.728636813280163e-08,
|
|
"logits/chosen": 1.7513785362243652,
|
|
"logits/rejected": 1.5904102325439453,
|
|
"logps/chosen": -74.64079284667969,
|
|
"logps/ref_chosen": -79.373779296875,
|
|
"logps/ref_rejected": -104.62533569335938,
|
|
"logps/rejected": -101.73834228515625,
|
|
"loss": 1.0767,
|
|
"margin_dpo/margin_mean": 1.8459930419921875,
|
|
"margin_dpo/margin_std": 2.95662522315979,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.5103375911712646,
|
|
"fcm_dpo/delta": 0.05852968245744705,
|
|
"fcm_dpo/margin": 1.5600131750106812,
|
|
"fcm_dpo/q_t": 0.3537482023239136,
|
|
"grad_norm": 77.21739959716797,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 1.3777530193328857,
|
|
"logits/rejected": 1.3496954441070557,
|
|
"logps/chosen": -81.1361083984375,
|
|
"logps/ref_chosen": -85.953857421875,
|
|
"logps/ref_rejected": -90.40995788574219,
|
|
"logps/rejected": -87.1522216796875,
|
|
"loss": 1.0818,
|
|
"margin_dpo/margin_mean": 1.5600130558013916,
|
|
"margin_dpo/margin_std": 2.59567928314209,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"eval_fcm_dpo/beta": 0.5119248032569885,
|
|
"eval_logits/chosen": 1.8607048988342285,
|
|
"eval_logits/rejected": 1.7467539310455322,
|
|
"eval_logps/chosen": -81.96269989013672,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -93.33091735839844,
|
|
"eval_loss": 0.5445140600204468,
|
|
"eval_margin_dpo/margin_mean": 1.573616623878479,
|
|
"eval_margin_dpo/margin_std": 2.6447198390960693,
|
|
"eval_runtime": 42.2639,
|
|
"eval_samples_per_second": 54.491,
|
|
"eval_steps_per_second": 1.704,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7573696145124716,
|
|
"fcm_dpo/beta": 0.5188782215118408,
|
|
"fcm_dpo/delta": 0.004477545619010925,
|
|
"fcm_dpo/margin": 1.6241436004638672,
|
|
"fcm_dpo/q_t": 0.3532504439353943,
|
|
"grad_norm": 71.30140686035156,
|
|
"learning_rate": 8.528784436016878e-08,
|
|
"logits/chosen": 1.705322027206421,
|
|
"logits/rejected": 1.6886937618255615,
|
|
"logps/chosen": -76.24929809570312,
|
|
"logps/ref_chosen": -81.22268676757812,
|
|
"logps/ref_rejected": -86.97892761230469,
|
|
"logps/rejected": -83.62968444824219,
|
|
"loss": 0.9835,
|
|
"margin_dpo/margin_mean": 1.6241438388824463,
|
|
"margin_dpo/margin_std": 2.3686084747314453,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.7588813303099018,
|
|
"fcm_dpo/beta": 0.5274479985237122,
|
|
"fcm_dpo/delta": 0.22003662586212158,
|
|
"fcm_dpo/margin": 1.222203016281128,
|
|
"fcm_dpo/q_t": 0.3778621554374695,
|
|
"grad_norm": 86.91670227050781,
|
|
"learning_rate": 8.4295479559726e-08,
|
|
"logits/chosen": 1.8385815620422363,
|
|
"logits/rejected": 1.7697994709014893,
|
|
"logps/chosen": -78.10995483398438,
|
|
"logps/ref_chosen": -83.1567611694336,
|
|
"logps/ref_rejected": -106.74440002441406,
|
|
"logps/rejected": -102.91980743408203,
|
|
"loss": 1.0534,
|
|
"margin_dpo/margin_mean": 1.2222027778625488,
|
|
"margin_dpo/margin_std": 1.9343492984771729,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7603930461073318,
|
|
"fcm_dpo/beta": 0.5325933694839478,
|
|
"fcm_dpo/delta": -0.007229819893836975,
|
|
"fcm_dpo/margin": 1.6076710224151611,
|
|
"fcm_dpo/q_t": 0.34382903575897217,
|
|
"grad_norm": 78.07044982910156,
|
|
"learning_rate": 8.330774987092712e-08,
|
|
"logits/chosen": 1.5651829242706299,
|
|
"logits/rejected": 1.5705194473266602,
|
|
"logps/chosen": -63.34757995605469,
|
|
"logps/ref_chosen": -68.51583862304688,
|
|
"logps/ref_rejected": -75.02178955078125,
|
|
"logps/rejected": -71.46119689941406,
|
|
"loss": 1.0491,
|
|
"margin_dpo/margin_mean": 1.6076714992523193,
|
|
"margin_dpo/margin_std": 2.520142078399658,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"fcm_dpo/beta": 0.5068508386611938,
|
|
"fcm_dpo/delta": -0.38821882009506226,
|
|
"fcm_dpo/margin": 2.3534343242645264,
|
|
"fcm_dpo/q_t": 0.2800072133541107,
|
|
"grad_norm": 62.00618362426758,
|
|
"learning_rate": 8.232468292269479e-08,
|
|
"logits/chosen": 1.7697081565856934,
|
|
"logits/rejected": 1.7505295276641846,
|
|
"logps/chosen": -80.1093521118164,
|
|
"logps/ref_chosen": -85.15829467773438,
|
|
"logps/ref_rejected": -96.16879272460938,
|
|
"logps/rejected": -93.47328186035156,
|
|
"loss": 0.7507,
|
|
"margin_dpo/margin_mean": 2.3534343242645264,
|
|
"margin_dpo/margin_std": 2.318760395050049,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.5089028477668762,
|
|
"fcm_dpo/delta": 0.22177591919898987,
|
|
"fcm_dpo/margin": 1.2635679244995117,
|
|
"fcm_dpo/q_t": 0.39394375681877136,
|
|
"grad_norm": 92.9467544555664,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 1.7371578216552734,
|
|
"logits/rejected": 1.60854172706604,
|
|
"logps/chosen": -74.40583801269531,
|
|
"logps/ref_chosen": -79.26185607910156,
|
|
"logps/ref_rejected": -96.34947967529297,
|
|
"logps/rejected": -92.75701904296875,
|
|
"loss": 1.2414,
|
|
"margin_dpo/margin_mean": 1.2635676860809326,
|
|
"margin_dpo/margin_std": 2.7791409492492676,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.764928193499622,
|
|
"fcm_dpo/beta": 0.5233539342880249,
|
|
"fcm_dpo/delta": 0.15490612387657166,
|
|
"fcm_dpo/margin": 1.3474644422531128,
|
|
"fcm_dpo/q_t": 0.38606366515159607,
|
|
"grad_norm": 109.93718719482422,
|
|
"learning_rate": 8.037264711071698e-08,
|
|
"logits/chosen": 1.879191279411316,
|
|
"logits/rejected": 1.866205096244812,
|
|
"logps/chosen": -83.36624908447266,
|
|
"logps/ref_chosen": -88.192626953125,
|
|
"logps/ref_rejected": -100.86880493164062,
|
|
"logps/rejected": -97.38990020751953,
|
|
"loss": 1.3173,
|
|
"margin_dpo/margin_mean": 1.347464919090271,
|
|
"margin_dpo/margin_std": 3.231222629547119,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7664399092970522,
|
|
"fcm_dpo/beta": 0.5418146252632141,
|
|
"fcm_dpo/delta": 0.09071876108646393,
|
|
"fcm_dpo/margin": 1.4136903285980225,
|
|
"fcm_dpo/q_t": 0.36703741550445557,
|
|
"grad_norm": 96.97240447998047,
|
|
"learning_rate": 7.940373284960933e-08,
|
|
"logits/chosen": 1.6270873546600342,
|
|
"logits/rejected": 1.5263984203338623,
|
|
"logps/chosen": -81.430908203125,
|
|
"logps/ref_chosen": -86.04632568359375,
|
|
"logps/ref_rejected": -111.44412994384766,
|
|
"logps/rejected": -108.24239349365234,
|
|
"loss": 1.1249,
|
|
"margin_dpo/margin_mean": 1.4136903285980225,
|
|
"margin_dpo/margin_std": 2.5658071041107178,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7679516250944822,
|
|
"fcm_dpo/beta": 0.5383831262588501,
|
|
"fcm_dpo/delta": -0.1335231363773346,
|
|
"fcm_dpo/margin": 1.8011484146118164,
|
|
"fcm_dpo/q_t": 0.3420735001564026,
|
|
"grad_norm": 74.95172882080078,
|
|
"learning_rate": 7.843959053281663e-08,
|
|
"logits/chosen": 1.6859643459320068,
|
|
"logits/rejected": 1.438178539276123,
|
|
"logps/chosen": -74.3707504272461,
|
|
"logps/ref_chosen": -79.25038146972656,
|
|
"logps/ref_rejected": -118.49089813232422,
|
|
"logps/rejected": -115.41241455078125,
|
|
"loss": 0.9989,
|
|
"margin_dpo/margin_mean": 1.8011486530303955,
|
|
"margin_dpo/margin_std": 2.758845806121826,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7694633408919124,
|
|
"fcm_dpo/beta": 0.5251543521881104,
|
|
"fcm_dpo/delta": -0.097801074385643,
|
|
"fcm_dpo/margin": 1.786637783050537,
|
|
"fcm_dpo/q_t": 0.3379754424095154,
|
|
"grad_norm": 73.10486602783203,
|
|
"learning_rate": 7.748024712947204e-08,
|
|
"logits/chosen": 1.5579968690872192,
|
|
"logits/rejected": 1.4831342697143555,
|
|
"logps/chosen": -75.50135803222656,
|
|
"logps/ref_chosen": -80.7039566040039,
|
|
"logps/ref_rejected": -90.50444793701172,
|
|
"logps/rejected": -87.08848571777344,
|
|
"loss": 0.9619,
|
|
"margin_dpo/margin_mean": 1.7866381406784058,
|
|
"margin_dpo/margin_std": 2.4936304092407227,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.5186738967895508,
|
|
"fcm_dpo/delta": 0.03997340798377991,
|
|
"fcm_dpo/margin": 1.5671298503875732,
|
|
"fcm_dpo/q_t": 0.35538774728775024,
|
|
"grad_norm": 91.2940673828125,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 1.920919418334961,
|
|
"logits/rejected": 1.748721957206726,
|
|
"logps/chosen": -62.64276885986328,
|
|
"logps/ref_chosen": -67.64491271972656,
|
|
"logps/ref_rejected": -108.92274475097656,
|
|
"logps/rejected": -105.48773193359375,
|
|
"loss": 1.1015,
|
|
"margin_dpo/margin_mean": 1.5671300888061523,
|
|
"margin_dpo/margin_std": 2.6930947303771973,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7724867724867724,
|
|
"fcm_dpo/beta": 0.5019285678863525,
|
|
"fcm_dpo/delta": -0.2909063696861267,
|
|
"fcm_dpo/margin": 2.2096104621887207,
|
|
"fcm_dpo/q_t": 0.3058483600616455,
|
|
"grad_norm": 66.41272735595703,
|
|
"learning_rate": 7.557606426772961e-08,
|
|
"logits/chosen": 1.8397446870803833,
|
|
"logits/rejected": 1.7726900577545166,
|
|
"logps/chosen": -70.74977111816406,
|
|
"logps/ref_chosen": -75.66263580322266,
|
|
"logps/ref_rejected": -104.26296997070312,
|
|
"logps/rejected": -101.5597152709961,
|
|
"loss": 0.8574,
|
|
"margin_dpo/margin_mean": 2.209610939025879,
|
|
"margin_dpo/margin_std": 2.602205991744995,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7739984882842026,
|
|
"fcm_dpo/beta": 0.4980090856552124,
|
|
"fcm_dpo/delta": 0.08836930245161057,
|
|
"fcm_dpo/margin": 1.5430858135223389,
|
|
"fcm_dpo/q_t": 0.36593347787857056,
|
|
"grad_norm": 79.47098541259766,
|
|
"learning_rate": 7.463127807341966e-08,
|
|
"logits/chosen": 1.4316006898880005,
|
|
"logits/rejected": 1.3491363525390625,
|
|
"logps/chosen": -74.39913940429688,
|
|
"logps/ref_chosen": -79.31925964355469,
|
|
"logps/ref_rejected": -82.22052001953125,
|
|
"logps/rejected": -78.8434829711914,
|
|
"loss": 1.036,
|
|
"margin_dpo/margin_mean": 1.543086051940918,
|
|
"margin_dpo/margin_std": 2.4387998580932617,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7755102040816326,
|
|
"fcm_dpo/beta": 0.485543429851532,
|
|
"fcm_dpo/delta": -0.11648038774728775,
|
|
"fcm_dpo/margin": 1.9538068771362305,
|
|
"fcm_dpo/q_t": 0.3228296935558319,
|
|
"grad_norm": 57.1087760925293,
|
|
"learning_rate": 7.369139731924401e-08,
|
|
"logits/chosen": 2.3472414016723633,
|
|
"logits/rejected": 2.1964426040649414,
|
|
"logps/chosen": -66.7118148803711,
|
|
"logps/ref_chosen": -72.02534484863281,
|
|
"logps/ref_rejected": -86.56224060058594,
|
|
"logps/rejected": -83.2025146484375,
|
|
"loss": 0.8578,
|
|
"margin_dpo/margin_mean": 1.9538071155548096,
|
|
"margin_dpo/margin_std": 2.2184200286865234,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7770219198790628,
|
|
"fcm_dpo/beta": 0.4895557463169098,
|
|
"fcm_dpo/delta": -0.029002681374549866,
|
|
"fcm_dpo/margin": 1.7902264595031738,
|
|
"fcm_dpo/q_t": 0.34171241521835327,
|
|
"grad_norm": 71.4445571899414,
|
|
"learning_rate": 7.275644829568747e-08,
|
|
"logits/chosen": 1.7180615663528442,
|
|
"logits/rejected": 1.6842756271362305,
|
|
"logps/chosen": -80.10594177246094,
|
|
"logps/ref_chosen": -84.94093322753906,
|
|
"logps/ref_rejected": -102.44367980957031,
|
|
"logps/rejected": -99.39891052246094,
|
|
"loss": 0.9853,
|
|
"margin_dpo/margin_mean": 1.7902262210845947,
|
|
"margin_dpo/margin_std": 2.5876619815826416,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.4983447194099426,
|
|
"fcm_dpo/delta": 0.10146874189376831,
|
|
"fcm_dpo/margin": 1.51442551612854,
|
|
"fcm_dpo/q_t": 0.3634001910686493,
|
|
"grad_norm": 77.20426177978516,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 1.7342910766601562,
|
|
"logits/rejected": 1.601979374885559,
|
|
"logps/chosen": -68.45315551757812,
|
|
"logps/ref_chosen": -72.9662094116211,
|
|
"logps/ref_rejected": -102.53651428222656,
|
|
"logps/rejected": -99.53788757324219,
|
|
"loss": 1.0669,
|
|
"margin_dpo/margin_mean": 1.514425277709961,
|
|
"margin_dpo/margin_std": 2.46881103515625,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.780045351473923,
|
|
"fcm_dpo/beta": 0.511983335018158,
|
|
"fcm_dpo/delta": 0.16067957878112793,
|
|
"fcm_dpo/margin": 1.3661940097808838,
|
|
"fcm_dpo/q_t": 0.37095892429351807,
|
|
"grad_norm": 82.00129699707031,
|
|
"learning_rate": 7.090144991188568e-08,
|
|
"logits/chosen": 1.8110804557800293,
|
|
"logits/rejected": 1.678971529006958,
|
|
"logps/chosen": -71.64615631103516,
|
|
"logps/ref_chosen": -76.63414001464844,
|
|
"logps/ref_rejected": -91.01750183105469,
|
|
"logps/rejected": -87.39571380615234,
|
|
"loss": 1.1056,
|
|
"margin_dpo/margin_mean": 1.366194486618042,
|
|
"margin_dpo/margin_std": 2.439441204071045,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.781557067271353,
|
|
"fcm_dpo/beta": 0.5124858617782593,
|
|
"fcm_dpo/delta": -0.02756066992878914,
|
|
"fcm_dpo/margin": 1.7076544761657715,
|
|
"fcm_dpo/q_t": 0.3710241913795471,
|
|
"grad_norm": 76.47826385498047,
|
|
"learning_rate": 6.998145243993284e-08,
|
|
"logits/chosen": 1.9861897230148315,
|
|
"logits/rejected": 1.9750926494598389,
|
|
"logps/chosen": -72.4866714477539,
|
|
"logps/ref_chosen": -77.06817626953125,
|
|
"logps/ref_rejected": -80.048583984375,
|
|
"logps/rejected": -77.17472839355469,
|
|
"loss": 1.1053,
|
|
"margin_dpo/margin_mean": 1.7076544761657715,
|
|
"margin_dpo/margin_std": 3.1321678161621094,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.783068783068783,
|
|
"fcm_dpo/beta": 0.526599109172821,
|
|
"fcm_dpo/delta": 0.13619472086429596,
|
|
"fcm_dpo/margin": 1.3683173656463623,
|
|
"fcm_dpo/q_t": 0.3868216872215271,
|
|
"grad_norm": 79.98131561279297,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 1.6740235090255737,
|
|
"logits/rejected": 1.5478146076202393,
|
|
"logps/chosen": -73.65196228027344,
|
|
"logps/ref_chosen": -78.69026184082031,
|
|
"logps/ref_rejected": -97.58124542236328,
|
|
"logps/rejected": -93.9112548828125,
|
|
"loss": 1.191,
|
|
"margin_dpo/margin_mean": 1.3683173656463623,
|
|
"margin_dpo/margin_std": 2.781571388244629,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.7845804988662132,
|
|
"fcm_dpo/beta": 0.5283833742141724,
|
|
"fcm_dpo/delta": 0.06050370633602142,
|
|
"fcm_dpo/margin": 0.9821269512176514,
|
|
"fcm_dpo/q_t": 0.4151061177253723,
|
|
"grad_norm": 106.65807342529297,
|
|
"learning_rate": 6.815658960673781e-08,
|
|
"logits/chosen": 2.014273166656494,
|
|
"logits/rejected": 1.9104008674621582,
|
|
"logps/chosen": -73.96952819824219,
|
|
"logps/ref_chosen": -78.35087585449219,
|
|
"logps/ref_rejected": -95.79212188720703,
|
|
"logps/rejected": -92.39290618896484,
|
|
"loss": 1.4895,
|
|
"margin_dpo/margin_mean": 0.9821275472640991,
|
|
"margin_dpo/margin_std": 3.163806915283203,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.5247069597244263,
|
|
"fcm_dpo/delta": 0.002389952540397644,
|
|
"fcm_dpo/margin": 1.6092243194580078,
|
|
"fcm_dpo/q_t": 0.345553994178772,
|
|
"grad_norm": 80.86093139648438,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 1.7643589973449707,
|
|
"logits/rejected": 1.607407808303833,
|
|
"logps/chosen": -75.44636535644531,
|
|
"logps/ref_chosen": -80.40513610839844,
|
|
"logps/ref_rejected": -93.02791595458984,
|
|
"logps/rejected": -89.67837524414062,
|
|
"loss": 0.9769,
|
|
"margin_dpo/margin_mean": 1.6092244386672974,
|
|
"margin_dpo/margin_std": 2.20902681350708,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7876039304610734,
|
|
"fcm_dpo/beta": 0.5398433208465576,
|
|
"fcm_dpo/delta": 0.026781171560287476,
|
|
"fcm_dpo/margin": 1.5242526531219482,
|
|
"fcm_dpo/q_t": 0.3553396463394165,
|
|
"grad_norm": 88.6592025756836,
|
|
"learning_rate": 6.63520728356167e-08,
|
|
"logits/chosen": 1.613523006439209,
|
|
"logits/rejected": 1.4470891952514648,
|
|
"logps/chosen": -81.82543182373047,
|
|
"logps/ref_chosen": -86.5218276977539,
|
|
"logps/ref_rejected": -109.20257568359375,
|
|
"logps/rejected": -106.03044128417969,
|
|
"loss": 1.0274,
|
|
"margin_dpo/margin_mean": 1.5242516994476318,
|
|
"margin_dpo/margin_std": 2.3445534706115723,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7891156462585034,
|
|
"fcm_dpo/beta": 0.5545932650566101,
|
|
"fcm_dpo/delta": 0.17674265801906586,
|
|
"fcm_dpo/margin": 1.2288824319839478,
|
|
"fcm_dpo/q_t": 0.3987041711807251,
|
|
"grad_norm": 93.76902770996094,
|
|
"learning_rate": 6.545750740770336e-08,
|
|
"logits/chosen": 1.6111791133880615,
|
|
"logits/rejected": 1.6222134828567505,
|
|
"logps/chosen": -73.34974670410156,
|
|
"logps/ref_chosen": -78.24254608154297,
|
|
"logps/ref_rejected": -85.23554992675781,
|
|
"logps/rejected": -81.57162475585938,
|
|
"loss": 1.3201,
|
|
"margin_dpo/margin_mean": 1.2288823127746582,
|
|
"margin_dpo/margin_std": 2.988895893096924,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7906273620559335,
|
|
"fcm_dpo/beta": 0.5467442870140076,
|
|
"fcm_dpo/delta": -0.03936249762773514,
|
|
"fcm_dpo/margin": 1.6179664134979248,
|
|
"fcm_dpo/q_t": 0.33749115467071533,
|
|
"grad_norm": 95.7950439453125,
|
|
"learning_rate": 6.456810403001012e-08,
|
|
"logits/chosen": 2.068002223968506,
|
|
"logits/rejected": 1.8295702934265137,
|
|
"logps/chosen": -78.77156066894531,
|
|
"logps/ref_chosen": -83.50096893310547,
|
|
"logps/ref_rejected": -117.45217895507812,
|
|
"logps/rejected": -114.34073638916016,
|
|
"loss": 1.0423,
|
|
"margin_dpo/margin_mean": 1.6179664134979248,
|
|
"margin_dpo/margin_std": 2.464113712310791,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7921390778533636,
|
|
"fcm_dpo/beta": 0.5638613104820251,
|
|
"fcm_dpo/delta": 0.1269315630197525,
|
|
"fcm_dpo/margin": 1.2957689762115479,
|
|
"fcm_dpo/q_t": 0.36413639783859253,
|
|
"grad_norm": 103.0126953125,
|
|
"learning_rate": 6.368388758106134e-08,
|
|
"logits/chosen": 1.544802188873291,
|
|
"logits/rejected": 1.5172946453094482,
|
|
"logps/chosen": -87.98350524902344,
|
|
"logps/ref_chosen": -93.22590637207031,
|
|
"logps/ref_rejected": -108.17863464355469,
|
|
"logps/rejected": -104.23199462890625,
|
|
"loss": 1.123,
|
|
"margin_dpo/margin_mean": 1.2957689762115479,
|
|
"margin_dpo/margin_std": 2.3095810413360596,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.5804117918014526,
|
|
"fcm_dpo/delta": 0.23566076159477234,
|
|
"fcm_dpo/margin": 1.0851058959960938,
|
|
"fcm_dpo/q_t": 0.40458396077156067,
|
|
"grad_norm": 105.79865264892578,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 1.4469276666641235,
|
|
"logits/rejected": 1.4357231855392456,
|
|
"logps/chosen": -89.34066772460938,
|
|
"logps/ref_chosen": -94.08831787109375,
|
|
"logps/ref_rejected": -100.682373046875,
|
|
"logps/rejected": -97.01982116699219,
|
|
"loss": 1.3254,
|
|
"margin_dpo/margin_mean": 1.085106372833252,
|
|
"margin_dpo/margin_std": 2.825222969055176,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7951625094482238,
|
|
"fcm_dpo/beta": 0.5839090347290039,
|
|
"fcm_dpo/delta": 0.07317899167537689,
|
|
"fcm_dpo/margin": 1.3251323699951172,
|
|
"fcm_dpo/q_t": 0.36826279759407043,
|
|
"grad_norm": 86.31532287597656,
|
|
"learning_rate": 6.193111425735515e-08,
|
|
"logits/chosen": 1.864978551864624,
|
|
"logits/rejected": 1.7237721681594849,
|
|
"logps/chosen": -73.06343841552734,
|
|
"logps/ref_chosen": -77.78373718261719,
|
|
"logps/ref_rejected": -100.29583740234375,
|
|
"logps/rejected": -96.90068054199219,
|
|
"loss": 1.1024,
|
|
"margin_dpo/margin_mean": 1.3251322507858276,
|
|
"margin_dpo/margin_std": 2.194312810897827,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7966742252456538,
|
|
"fcm_dpo/beta": 0.627768874168396,
|
|
"fcm_dpo/delta": 0.26169681549072266,
|
|
"fcm_dpo/margin": 0.957957923412323,
|
|
"fcm_dpo/q_t": 0.3964363932609558,
|
|
"grad_norm": 133.1971435546875,
|
|
"learning_rate": 6.106260641143546e-08,
|
|
"logits/chosen": 2.207190990447998,
|
|
"logits/rejected": 2.003704786300659,
|
|
"logps/chosen": -72.29412841796875,
|
|
"logps/ref_chosen": -76.695068359375,
|
|
"logps/ref_rejected": -107.68281555175781,
|
|
"logps/rejected": -104.23982238769531,
|
|
"loss": 1.3457,
|
|
"margin_dpo/margin_mean": 0.9579578638076782,
|
|
"margin_dpo/margin_std": 2.4768576622009277,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.7981859410430839,
|
|
"fcm_dpo/beta": 0.6325068473815918,
|
|
"fcm_dpo/delta": 0.1113191694021225,
|
|
"fcm_dpo/margin": 1.176019310951233,
|
|
"fcm_dpo/q_t": 0.37600845098495483,
|
|
"grad_norm": 96.25592803955078,
|
|
"learning_rate": 6.019938355056422e-08,
|
|
"logits/chosen": 1.7700958251953125,
|
|
"logits/rejected": 1.6433663368225098,
|
|
"logps/chosen": -70.225341796875,
|
|
"logps/ref_chosen": -75.0361328125,
|
|
"logps/ref_rejected": -94.67579650878906,
|
|
"logps/rejected": -91.04102325439453,
|
|
"loss": 1.3686,
|
|
"margin_dpo/margin_mean": 1.1760194301605225,
|
|
"margin_dpo/margin_std": 2.789802074432373,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.799697656840514,
|
|
"fcm_dpo/beta": 0.5813958644866943,
|
|
"fcm_dpo/delta": -0.5735065340995789,
|
|
"fcm_dpo/margin": 2.284200668334961,
|
|
"fcm_dpo/q_t": 0.28090566396713257,
|
|
"grad_norm": 72.14741516113281,
|
|
"learning_rate": 5.934146982094049e-08,
|
|
"logits/chosen": 1.7006018161773682,
|
|
"logits/rejected": 1.5770983695983887,
|
|
"logps/chosen": -67.83549499511719,
|
|
"logps/ref_chosen": -72.84869384765625,
|
|
"logps/ref_rejected": -93.25855255126953,
|
|
"logps/rejected": -90.52955627441406,
|
|
"loss": 0.7807,
|
|
"margin_dpo/margin_mean": 2.284201145172119,
|
|
"margin_dpo/margin_std": 2.4762625694274902,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.5802004337310791,
|
|
"fcm_dpo/delta": 0.04107225686311722,
|
|
"fcm_dpo/margin": 1.399862289428711,
|
|
"fcm_dpo/q_t": 0.36792880296707153,
|
|
"grad_norm": 95.07054138183594,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 1.9237902164459229,
|
|
"logits/rejected": 1.8377227783203125,
|
|
"logps/chosen": -74.80259704589844,
|
|
"logps/ref_chosen": -79.4971694946289,
|
|
"logps/ref_rejected": -93.59564208984375,
|
|
"logps/rejected": -90.3009262084961,
|
|
"loss": 1.1697,
|
|
"margin_dpo/margin_mean": 1.3998620510101318,
|
|
"margin_dpo/margin_std": 2.7202823162078857,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8027210884353742,
|
|
"fcm_dpo/beta": 0.6038322448730469,
|
|
"fcm_dpo/delta": 0.12020966410636902,
|
|
"fcm_dpo/margin": 1.2052103281021118,
|
|
"fcm_dpo/q_t": 0.36855727434158325,
|
|
"grad_norm": 97.9964599609375,
|
|
"learning_rate": 5.7641665597021435e-08,
|
|
"logits/chosen": 1.6562883853912354,
|
|
"logits/rejected": 1.561152458190918,
|
|
"logps/chosen": -64.65231323242188,
|
|
"logps/ref_chosen": -69.45396423339844,
|
|
"logps/ref_rejected": -96.30017852783203,
|
|
"logps/rejected": -92.70374298095703,
|
|
"loss": 1.1081,
|
|
"margin_dpo/margin_mean": 1.2052104473114014,
|
|
"margin_dpo/margin_std": 2.0545098781585693,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.8042328042328042,
|
|
"fcm_dpo/beta": 0.5897899866104126,
|
|
"fcm_dpo/delta": -0.0036588534712791443,
|
|
"fcm_dpo/margin": 1.4446355104446411,
|
|
"fcm_dpo/q_t": 0.35917529463768005,
|
|
"grad_norm": 97.351806640625,
|
|
"learning_rate": 5.679982264990424e-08,
|
|
"logits/chosen": 1.3556745052337646,
|
|
"logits/rejected": 1.2717360258102417,
|
|
"logps/chosen": -72.01132202148438,
|
|
"logps/ref_chosen": -76.52011108398438,
|
|
"logps/ref_rejected": -94.79593658447266,
|
|
"logps/rejected": -91.73179626464844,
|
|
"loss": 1.0801,
|
|
"margin_dpo/margin_mean": 1.4446359872817993,
|
|
"margin_dpo/margin_std": 2.4376420974731445,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.8057445200302343,
|
|
"fcm_dpo/beta": 0.5956892967224121,
|
|
"fcm_dpo/delta": -0.05127408355474472,
|
|
"fcm_dpo/margin": 1.5031523704528809,
|
|
"fcm_dpo/q_t": 0.35674595832824707,
|
|
"grad_norm": 99.48766326904297,
|
|
"learning_rate": 5.596338392706076e-08,
|
|
"logits/chosen": 2.067509412765503,
|
|
"logits/rejected": 1.904585838317871,
|
|
"logps/chosen": -67.12313842773438,
|
|
"logps/ref_chosen": -72.31800842285156,
|
|
"logps/ref_rejected": -89.26652526855469,
|
|
"logps/rejected": -85.57481384277344,
|
|
"loss": 1.1153,
|
|
"margin_dpo/margin_mean": 1.5031521320343018,
|
|
"margin_dpo/margin_std": 2.5594966411590576,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.8072562358276644,
|
|
"fcm_dpo/beta": 0.5751690864562988,
|
|
"fcm_dpo/delta": -0.005829840898513794,
|
|
"fcm_dpo/margin": 1.4704824686050415,
|
|
"fcm_dpo/q_t": 0.34591174125671387,
|
|
"grad_norm": 92.95126342773438,
|
|
"learning_rate": 5.513237282548033e-08,
|
|
"logits/chosen": 1.5100412368774414,
|
|
"logits/rejected": 1.4817965030670166,
|
|
"logps/chosen": -72.63851928710938,
|
|
"logps/ref_chosen": -77.87559509277344,
|
|
"logps/ref_rejected": -92.21171569824219,
|
|
"logps/rejected": -88.44511413574219,
|
|
"loss": 1.1029,
|
|
"margin_dpo/margin_mean": 1.4704830646514893,
|
|
"margin_dpo/margin_std": 2.3595573902130127,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.5866076946258545,
|
|
"fcm_dpo/delta": -0.038706980645656586,
|
|
"fcm_dpo/margin": 1.5089240074157715,
|
|
"fcm_dpo/q_t": 0.35734421014785767,
|
|
"grad_norm": 90.3484115600586,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 1.4031028747558594,
|
|
"logits/rejected": 1.2734661102294922,
|
|
"logps/chosen": -73.26467895507812,
|
|
"logps/ref_chosen": -78.16358184814453,
|
|
"logps/ref_rejected": -97.78164672851562,
|
|
"logps/rejected": -94.39166259765625,
|
|
"loss": 1.1293,
|
|
"margin_dpo/margin_mean": 1.5089242458343506,
|
|
"margin_dpo/margin_std": 2.7176802158355713,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8102796674225246,
|
|
"fcm_dpo/beta": 0.5691394805908203,
|
|
"fcm_dpo/delta": -0.13241420686244965,
|
|
"fcm_dpo/margin": 1.7017556428909302,
|
|
"fcm_dpo/q_t": 0.3234456777572632,
|
|
"grad_norm": 75.58480834960938,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 1.7192052602767944,
|
|
"logits/rejected": 1.5519870519638062,
|
|
"logps/chosen": -61.61140823364258,
|
|
"logps/ref_chosen": -66.65623474121094,
|
|
"logps/ref_rejected": -89.49085998535156,
|
|
"logps/rejected": -86.14779663085938,
|
|
"loss": 0.9111,
|
|
"margin_dpo/margin_mean": 1.7017555236816406,
|
|
"margin_dpo/margin_std": 2.1126914024353027,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.8117913832199547,
|
|
"fcm_dpo/beta": 0.563645601272583,
|
|
"fcm_dpo/delta": -0.042136892676353455,
|
|
"fcm_dpo/margin": 1.5759865045547485,
|
|
"fcm_dpo/q_t": 0.346375048160553,
|
|
"grad_norm": 74.55496215820312,
|
|
"learning_rate": 5.267213693697695e-08,
|
|
"logits/chosen": 1.7074635028839111,
|
|
"logits/rejected": 1.5484142303466797,
|
|
"logps/chosen": -70.44686126708984,
|
|
"logps/ref_chosen": -74.99390411376953,
|
|
"logps/ref_rejected": -110.6627197265625,
|
|
"logps/rejected": -107.69166564941406,
|
|
"loss": 1.0427,
|
|
"margin_dpo/margin_mean": 1.5759867429733276,
|
|
"margin_dpo/margin_std": 2.441058397293091,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.8133030990173847,
|
|
"fcm_dpo/beta": 0.5549603700637817,
|
|
"fcm_dpo/delta": -0.16347691416740417,
|
|
"fcm_dpo/margin": 1.796407699584961,
|
|
"fcm_dpo/q_t": 0.32237929105758667,
|
|
"grad_norm": 88.10001373291016,
|
|
"learning_rate": 5.1863067244167144e-08,
|
|
"logits/chosen": 1.4939422607421875,
|
|
"logits/rejected": 1.4379546642303467,
|
|
"logps/chosen": -82.83648681640625,
|
|
"logps/ref_chosen": -87.61151123046875,
|
|
"logps/ref_rejected": -98.1150131225586,
|
|
"logps/rejected": -95.13639068603516,
|
|
"loss": 0.9309,
|
|
"margin_dpo/margin_mean": 1.79640793800354,
|
|
"margin_dpo/margin_std": 2.3823800086975098,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.8148148148148148,
|
|
"fcm_dpo/beta": 0.5492007732391357,
|
|
"fcm_dpo/delta": 0.0604596883058548,
|
|
"fcm_dpo/margin": 1.4464352130889893,
|
|
"fcm_dpo/q_t": 0.3573772609233856,
|
|
"grad_norm": 86.81356811523438,
|
|
"learning_rate": 5.105953986729195e-08,
|
|
"logits/chosen": 1.5843884944915771,
|
|
"logits/rejected": 1.3659675121307373,
|
|
"logps/chosen": -74.08658599853516,
|
|
"logps/ref_chosen": -78.86482238769531,
|
|
"logps/ref_rejected": -100.84349822998047,
|
|
"logps/rejected": -97.5116958618164,
|
|
"loss": 1.0038,
|
|
"margin_dpo/margin_mean": 1.446435809135437,
|
|
"margin_dpo/margin_std": 2.100681781768799,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.5411156415939331,
|
|
"fcm_dpo/delta": -0.14517144858837128,
|
|
"fcm_dpo/margin": 1.8131245374679565,
|
|
"fcm_dpo/q_t": 0.322839617729187,
|
|
"grad_norm": 92.86735534667969,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 1.8254791498184204,
|
|
"logits/rejected": 1.632624626159668,
|
|
"logps/chosen": -78.81214141845703,
|
|
"logps/ref_chosen": -83.66409301757812,
|
|
"logps/ref_rejected": -114.8860092163086,
|
|
"logps/rejected": -111.84718322753906,
|
|
"loss": 1.0435,
|
|
"margin_dpo/margin_mean": 1.813124656677246,
|
|
"margin_dpo/margin_std": 2.7530481815338135,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.817838246409675,
|
|
"fcm_dpo/beta": 0.5307464003562927,
|
|
"fcm_dpo/delta": -0.04240588843822479,
|
|
"fcm_dpo/margin": 1.6736271381378174,
|
|
"fcm_dpo/q_t": 0.3363361060619354,
|
|
"grad_norm": 88.91351318359375,
|
|
"learning_rate": 4.9469201811239035e-08,
|
|
"logits/chosen": 1.5942778587341309,
|
|
"logits/rejected": 1.6136151552200317,
|
|
"logps/chosen": -78.06910705566406,
|
|
"logps/ref_chosen": -83.12225341796875,
|
|
"logps/ref_rejected": -74.80526733398438,
|
|
"logps/rejected": -71.42574310302734,
|
|
"loss": 1.0115,
|
|
"margin_dpo/margin_mean": 1.6736273765563965,
|
|
"margin_dpo/margin_std": 2.4181206226348877,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.8193499622071051,
|
|
"fcm_dpo/beta": 0.5184097290039062,
|
|
"fcm_dpo/delta": -0.11768058687448502,
|
|
"fcm_dpo/margin": 1.8414095640182495,
|
|
"fcm_dpo/q_t": 0.3354414105415344,
|
|
"grad_norm": 68.62602233886719,
|
|
"learning_rate": 4.868243561723534e-08,
|
|
"logits/chosen": 1.882128119468689,
|
|
"logits/rejected": 1.7743515968322754,
|
|
"logps/chosen": -60.99085235595703,
|
|
"logps/ref_chosen": -66.3132553100586,
|
|
"logps/ref_rejected": -83.24588012695312,
|
|
"logps/rejected": -79.764892578125,
|
|
"loss": 0.9949,
|
|
"margin_dpo/margin_mean": 1.8414098024368286,
|
|
"margin_dpo/margin_std": 2.648585557937622,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.8208616780045351,
|
|
"fcm_dpo/beta": 0.5239220857620239,
|
|
"fcm_dpo/delta": 0.005960091948509216,
|
|
"fcm_dpo/margin": 1.6092872619628906,
|
|
"fcm_dpo/q_t": 0.3501929044723511,
|
|
"grad_norm": 77.4872817993164,
|
|
"learning_rate": 4.790130070827028e-08,
|
|
"logits/chosen": 1.893631100654602,
|
|
"logits/rejected": 1.6607489585876465,
|
|
"logps/chosen": -63.158504486083984,
|
|
"logps/ref_chosen": -68.11429595947266,
|
|
"logps/ref_rejected": -94.62380981445312,
|
|
"logps/rejected": -91.27730560302734,
|
|
"loss": 1.0308,
|
|
"margin_dpo/margin_mean": 1.6092875003814697,
|
|
"margin_dpo/margin_std": 2.471250057220459,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.8223733938019653,
|
|
"fcm_dpo/beta": 0.49690988659858704,
|
|
"fcm_dpo/delta": -0.31122803688049316,
|
|
"fcm_dpo/margin": 2.2667787075042725,
|
|
"fcm_dpo/q_t": 0.3194485902786255,
|
|
"grad_norm": 66.967529296875,
|
|
"learning_rate": 4.7125818934366454e-08,
|
|
"logits/chosen": 1.8328646421432495,
|
|
"logits/rejected": 1.6826552152633667,
|
|
"logps/chosen": -76.01614379882812,
|
|
"logps/ref_chosen": -81.187255859375,
|
|
"logps/ref_rejected": -105.84722900390625,
|
|
"logps/rejected": -102.9428939819336,
|
|
"loss": 0.9294,
|
|
"margin_dpo/margin_mean": 2.2667789459228516,
|
|
"margin_dpo/margin_std": 3.020232677459717,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.4952470362186432,
|
|
"fcm_dpo/delta": 0.13346421718597412,
|
|
"fcm_dpo/margin": 1.4665462970733643,
|
|
"fcm_dpo/q_t": 0.37403714656829834,
|
|
"grad_norm": 81.59075927734375,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 1.6682891845703125,
|
|
"logits/rejected": 1.5271377563476562,
|
|
"logps/chosen": -74.03522491455078,
|
|
"logps/ref_chosen": -78.81717681884766,
|
|
"logps/ref_rejected": -98.65876770019531,
|
|
"logps/rejected": -95.3433609008789,
|
|
"loss": 1.0963,
|
|
"margin_dpo/margin_mean": 1.4665460586547852,
|
|
"margin_dpo/margin_std": 2.5844061374664307,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"fcm_dpo/beta": 0.5028541088104248,
|
|
"fcm_dpo/delta": -0.008033901453018188,
|
|
"fcm_dpo/margin": 1.704458236694336,
|
|
"fcm_dpo/q_t": 0.3371015191078186,
|
|
"grad_norm": 72.56092834472656,
|
|
"learning_rate": 4.559190140057428e-08,
|
|
"logits/chosen": 1.9643224477767944,
|
|
"logits/rejected": 1.9538801908493042,
|
|
"logps/chosen": -68.92066955566406,
|
|
"logps/ref_chosen": -74.2529296875,
|
|
"logps/ref_rejected": -80.32308959960938,
|
|
"logps/rejected": -76.6952896118164,
|
|
"loss": 0.9537,
|
|
"margin_dpo/margin_mean": 1.704458236694336,
|
|
"margin_dpo/margin_std": 2.263866901397705,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8269085411942555,
|
|
"fcm_dpo/beta": 0.49540919065475464,
|
|
"fcm_dpo/delta": -0.06381530314683914,
|
|
"fcm_dpo/margin": 1.8326653242111206,
|
|
"fcm_dpo/q_t": 0.32726162672042847,
|
|
"grad_norm": 64.46299743652344,
|
|
"learning_rate": 4.483350854765672e-08,
|
|
"logits/chosen": 1.5813419818878174,
|
|
"logits/rejected": 1.4572780132293701,
|
|
"logps/chosen": -64.79362487792969,
|
|
"logps/ref_chosen": -69.9368896484375,
|
|
"logps/ref_rejected": -90.25672912597656,
|
|
"logps/rejected": -86.94613647460938,
|
|
"loss": 0.9145,
|
|
"margin_dpo/margin_mean": 1.8326648473739624,
|
|
"margin_dpo/margin_std": 2.2874417304992676,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8284202569916855,
|
|
"fcm_dpo/beta": 0.5134807825088501,
|
|
"fcm_dpo/delta": 0.1882130205631256,
|
|
"fcm_dpo/margin": 1.306201696395874,
|
|
"fcm_dpo/q_t": 0.3803349733352661,
|
|
"grad_norm": 88.97979736328125,
|
|
"learning_rate": 4.4080854642541826e-08,
|
|
"logits/chosen": 1.4894611835479736,
|
|
"logits/rejected": 1.3644602298736572,
|
|
"logps/chosen": -76.54277801513672,
|
|
"logps/ref_chosen": -81.1605224609375,
|
|
"logps/ref_rejected": -99.7246322631836,
|
|
"logps/rejected": -96.4130859375,
|
|
"loss": 1.1116,
|
|
"margin_dpo/margin_mean": 1.306201457977295,
|
|
"margin_dpo/margin_std": 2.309016704559326,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8299319727891157,
|
|
"fcm_dpo/beta": 0.5256662964820862,
|
|
"fcm_dpo/delta": 0.1741892248392105,
|
|
"fcm_dpo/margin": 1.3089189529418945,
|
|
"fcm_dpo/q_t": 0.3829908072948456,
|
|
"grad_norm": 93.77330780029297,
|
|
"learning_rate": 4.333396073857723e-08,
|
|
"logits/chosen": 2.2063562870025635,
|
|
"logits/rejected": 2.0459249019622803,
|
|
"logps/chosen": -75.51959991455078,
|
|
"logps/ref_chosen": -80.49800872802734,
|
|
"logps/ref_rejected": -113.20750427246094,
|
|
"logps/rejected": -109.53801727294922,
|
|
"loss": 1.2299,
|
|
"margin_dpo/margin_mean": 1.3089196681976318,
|
|
"margin_dpo/margin_std": 2.8981709480285645,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.5375339984893799,
|
|
"fcm_dpo/delta": 0.15981407463550568,
|
|
"fcm_dpo/margin": 1.30253267288208,
|
|
"fcm_dpo/q_t": 0.37385594844818115,
|
|
"grad_norm": 77.2861328125,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 1.5224547386169434,
|
|
"logits/rejected": 1.4622929096221924,
|
|
"logps/chosen": -70.20506286621094,
|
|
"logps/ref_chosen": -75.13760375976562,
|
|
"logps/ref_rejected": -79.04876708984375,
|
|
"logps/rejected": -75.41875457763672,
|
|
"loss": 1.1472,
|
|
"margin_dpo/margin_mean": 1.30253267288208,
|
|
"margin_dpo/margin_std": 2.410634994506836,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8329554043839759,
|
|
"fcm_dpo/beta": 0.5626946091651917,
|
|
"fcm_dpo/delta": 0.08639901876449585,
|
|
"fcm_dpo/margin": 1.362003207206726,
|
|
"fcm_dpo/q_t": 0.35773536562919617,
|
|
"grad_norm": 93.26870727539062,
|
|
"learning_rate": 4.1857536341307176e-08,
|
|
"logits/chosen": 1.9698476791381836,
|
|
"logits/rejected": 1.842726230621338,
|
|
"logps/chosen": -80.63394927978516,
|
|
"logps/ref_chosen": -85.4496078491211,
|
|
"logps/ref_rejected": -103.48530578613281,
|
|
"logps/rejected": -100.03164672851562,
|
|
"loss": 1.0683,
|
|
"margin_dpo/margin_mean": 1.3620030879974365,
|
|
"margin_dpo/margin_std": 2.265345573425293,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8344671201814059,
|
|
"fcm_dpo/beta": 0.5713049173355103,
|
|
"fcm_dpo/delta": 0.01876942813396454,
|
|
"fcm_dpo/margin": 1.443709135055542,
|
|
"fcm_dpo/q_t": 0.3453782796859741,
|
|
"grad_norm": 87.71670532226562,
|
|
"learning_rate": 4.112804714676593e-08,
|
|
"logits/chosen": 1.630192518234253,
|
|
"logits/rejected": 1.5265223979949951,
|
|
"logps/chosen": -76.890380859375,
|
|
"logps/ref_chosen": -82.01036071777344,
|
|
"logps/ref_rejected": -101.61884307861328,
|
|
"logps/rejected": -97.94257354736328,
|
|
"loss": 1.0856,
|
|
"margin_dpo/margin_mean": 1.4437094926834106,
|
|
"margin_dpo/margin_std": 2.2685065269470215,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8359788359788359,
|
|
"fcm_dpo/beta": 0.5514621138572693,
|
|
"fcm_dpo/delta": -0.06148216500878334,
|
|
"fcm_dpo/margin": 1.640283465385437,
|
|
"fcm_dpo/q_t": 0.3572534918785095,
|
|
"grad_norm": 92.91000366210938,
|
|
"learning_rate": 4.0404400549748144e-08,
|
|
"logits/chosen": 1.8313771486282349,
|
|
"logits/rejected": 1.6508121490478516,
|
|
"logps/chosen": -69.30296325683594,
|
|
"logps/ref_chosen": -73.81416320800781,
|
|
"logps/ref_rejected": -104.27050018310547,
|
|
"logps/rejected": -101.39958953857422,
|
|
"loss": 1.1162,
|
|
"margin_dpo/margin_mean": 1.6402831077575684,
|
|
"margin_dpo/margin_std": 2.8277735710144043,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8374905517762661,
|
|
"fcm_dpo/beta": 0.5503508448600769,
|
|
"fcm_dpo/delta": -0.00613846629858017,
|
|
"fcm_dpo/margin": 1.5537428855895996,
|
|
"fcm_dpo/q_t": 0.3472326695919037,
|
|
"grad_norm": 82.89059448242188,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 1.3481284379959106,
|
|
"logits/rejected": 1.3186860084533691,
|
|
"logps/chosen": -76.67272186279297,
|
|
"logps/ref_chosen": -81.43980407714844,
|
|
"logps/ref_rejected": -89.32518005371094,
|
|
"logps/rejected": -86.11184692382812,
|
|
"loss": 1.0697,
|
|
"margin_dpo/margin_mean": 1.55374276638031,
|
|
"margin_dpo/margin_std": 2.5019257068634033,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.5594339370727539,
|
|
"fcm_dpo/delta": -0.010179772973060608,
|
|
"fcm_dpo/margin": 1.5303804874420166,
|
|
"fcm_dpo/q_t": 0.3368152379989624,
|
|
"grad_norm": 85.1632308959961,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 1.6793913841247559,
|
|
"logits/rejected": 1.6269229650497437,
|
|
"logps/chosen": -76.93789672851562,
|
|
"logps/ref_chosen": -81.66071319580078,
|
|
"logps/ref_rejected": -87.20857238769531,
|
|
"logps/rejected": -84.01614379882812,
|
|
"loss": 1.0866,
|
|
"margin_dpo/margin_mean": 1.5303804874420166,
|
|
"margin_dpo/margin_std": 2.4207754135131836,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8405139833711263,
|
|
"fcm_dpo/beta": 0.5653465986251831,
|
|
"fcm_dpo/delta": 0.14113594591617584,
|
|
"fcm_dpo/margin": 1.2706456184387207,
|
|
"fcm_dpo/q_t": 0.38003456592559814,
|
|
"grad_norm": 89.62040710449219,
|
|
"learning_rate": 3.826871794280192e-08,
|
|
"logits/chosen": 1.7433538436889648,
|
|
"logits/rejected": 1.6561615467071533,
|
|
"logps/chosen": -61.646141052246094,
|
|
"logps/ref_chosen": -66.02448272705078,
|
|
"logps/ref_rejected": -82.74746704101562,
|
|
"logps/rejected": -79.6397705078125,
|
|
"loss": 1.2086,
|
|
"margin_dpo/margin_mean": 1.2706454992294312,
|
|
"margin_dpo/margin_std": 2.578793525695801,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8420256991685563,
|
|
"fcm_dpo/beta": 0.5589163303375244,
|
|
"fcm_dpo/delta": -0.01051899790763855,
|
|
"fcm_dpo/margin": 1.53108811378479,
|
|
"fcm_dpo/q_t": 0.3562447428703308,
|
|
"grad_norm": 100.4636459350586,
|
|
"learning_rate": 3.756864251262143e-08,
|
|
"logits/chosen": 1.7672959566116333,
|
|
"logits/rejected": 1.5255687236785889,
|
|
"logps/chosen": -68.58070373535156,
|
|
"logps/ref_chosen": -73.08985900878906,
|
|
"logps/ref_rejected": -97.43034362792969,
|
|
"logps/rejected": -94.45227813720703,
|
|
"loss": 1.1511,
|
|
"margin_dpo/margin_mean": 1.5310877561569214,
|
|
"margin_dpo/margin_std": 2.7770981788635254,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8435374149659864,
|
|
"fcm_dpo/beta": 0.5567211508750916,
|
|
"fcm_dpo/delta": -0.06094827130436897,
|
|
"fcm_dpo/margin": 1.623206615447998,
|
|
"fcm_dpo/q_t": 0.33581194281578064,
|
|
"grad_norm": 91.5750732421875,
|
|
"learning_rate": 3.687450924416341e-08,
|
|
"logits/chosen": 1.9485085010528564,
|
|
"logits/rejected": 1.8674168586730957,
|
|
"logps/chosen": -75.47303771972656,
|
|
"logps/ref_chosen": -80.1357192993164,
|
|
"logps/ref_rejected": -106.65797424316406,
|
|
"logps/rejected": -103.61849975585938,
|
|
"loss": 1.0374,
|
|
"margin_dpo/margin_mean": 1.6232068538665771,
|
|
"margin_dpo/margin_std": 2.4886608123779297,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8450491307634165,
|
|
"fcm_dpo/beta": 0.5594509840011597,
|
|
"fcm_dpo/delta": 0.08693183213472366,
|
|
"fcm_dpo/margin": 1.3717814683914185,
|
|
"fcm_dpo/q_t": 0.37605616450309753,
|
|
"grad_norm": 99.626953125,
|
|
"learning_rate": 3.6186337553827743e-08,
|
|
"logits/chosen": 1.522691011428833,
|
|
"logits/rejected": 1.3501641750335693,
|
|
"logps/chosen": -74.56044006347656,
|
|
"logps/ref_chosen": -79.42267608642578,
|
|
"logps/ref_rejected": -98.59402465820312,
|
|
"logps/rejected": -95.10356140136719,
|
|
"loss": 1.2814,
|
|
"margin_dpo/margin_mean": 1.371781349182129,
|
|
"margin_dpo/margin_std": 2.9634933471679688,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.5630248785018921,
|
|
"fcm_dpo/delta": -0.13770724833011627,
|
|
"fcm_dpo/margin": 1.7297303676605225,
|
|
"fcm_dpo/q_t": 0.3417326807975769,
|
|
"grad_norm": 83.30538177490234,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 1.4917066097259521,
|
|
"logits/rejected": 1.4372332096099854,
|
|
"logps/chosen": -72.65574645996094,
|
|
"logps/ref_chosen": -77.49559020996094,
|
|
"logps/ref_rejected": -92.61347961425781,
|
|
"logps/rejected": -89.50337219238281,
|
|
"loss": 1.0605,
|
|
"margin_dpo/margin_mean": 1.7297307252883911,
|
|
"margin_dpo/margin_std": 2.701158046722412,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8480725623582767,
|
|
"fcm_dpo/beta": 0.5409590005874634,
|
|
"fcm_dpo/delta": -0.1466280072927475,
|
|
"fcm_dpo/margin": 1.8150770664215088,
|
|
"fcm_dpo/q_t": 0.349008172750473,
|
|
"grad_norm": 78.01751708984375,
|
|
"learning_rate": 3.482795573879241e-08,
|
|
"logits/chosen": 1.7881979942321777,
|
|
"logits/rejected": 1.7428940534591675,
|
|
"logps/chosen": -74.37679290771484,
|
|
"logps/ref_chosen": -79.20771789550781,
|
|
"logps/ref_rejected": -93.46514892578125,
|
|
"logps/rejected": -90.44929504394531,
|
|
"loss": 1.0416,
|
|
"margin_dpo/margin_mean": 1.8150770664215088,
|
|
"margin_dpo/margin_std": 2.915646553039551,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8495842781557067,
|
|
"fcm_dpo/beta": 0.5316770076751709,
|
|
"fcm_dpo/delta": -0.06161924824118614,
|
|
"fcm_dpo/margin": 1.7036185264587402,
|
|
"fcm_dpo/q_t": 0.3406429886817932,
|
|
"grad_norm": 77.7786865234375,
|
|
"learning_rate": 3.415778361095226e-08,
|
|
"logits/chosen": 1.9883166551589966,
|
|
"logits/rejected": 1.8916831016540527,
|
|
"logps/chosen": -90.15193176269531,
|
|
"logps/ref_chosen": -94.88652801513672,
|
|
"logps/ref_rejected": -109.33815002441406,
|
|
"logps/rejected": -106.30716705322266,
|
|
"loss": 1.0043,
|
|
"margin_dpo/margin_mean": 1.7036182880401611,
|
|
"margin_dpo/margin_std": 2.4467196464538574,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8510959939531368,
|
|
"fcm_dpo/beta": 0.5239354372024536,
|
|
"fcm_dpo/delta": -0.08977065980434418,
|
|
"fcm_dpo/margin": 1.7775870561599731,
|
|
"fcm_dpo/q_t": 0.34091219305992126,
|
|
"grad_norm": 75.40060424804688,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": 1.6115353107452393,
|
|
"logits/rejected": 1.4812626838684082,
|
|
"logps/chosen": -60.68045425415039,
|
|
"logps/ref_chosen": -65.90719604492188,
|
|
"logps/ref_rejected": -84.07121276855469,
|
|
"logps/rejected": -80.62205505371094,
|
|
"loss": 1.0734,
|
|
"margin_dpo/margin_mean": 1.7775870561599731,
|
|
"margin_dpo/margin_std": 2.815526247024536,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8526077097505669,
|
|
"fcm_dpo/beta": 0.5255653858184814,
|
|
"fcm_dpo/delta": 0.024834435433149338,
|
|
"fcm_dpo/margin": 1.5733851194381714,
|
|
"fcm_dpo/q_t": 0.35912322998046875,
|
|
"grad_norm": 86.95561218261719,
|
|
"learning_rate": 3.283557064487785e-08,
|
|
"logits/chosen": 1.7449924945831299,
|
|
"logits/rejected": 1.6827529668807983,
|
|
"logps/chosen": -67.22463989257812,
|
|
"logps/ref_chosen": -72.32071685791016,
|
|
"logps/ref_rejected": -88.05014038085938,
|
|
"logps/rejected": -84.52745056152344,
|
|
"loss": 1.1226,
|
|
"margin_dpo/margin_mean": 1.5733850002288818,
|
|
"margin_dpo/margin_std": 2.7897634506225586,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.528499960899353,
|
|
"fcm_dpo/delta": 0.007532309740781784,
|
|
"fcm_dpo/margin": 1.5936261415481567,
|
|
"fcm_dpo/q_t": 0.34886032342910767,
|
|
"grad_norm": 86.20529174804688,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 2.053807258605957,
|
|
"logits/rejected": 1.9824556112289429,
|
|
"logps/chosen": -75.60353088378906,
|
|
"logps/ref_chosen": -80.18453979492188,
|
|
"logps/ref_rejected": -99.55126953125,
|
|
"logps/rejected": -96.56388854980469,
|
|
"loss": 1.0384,
|
|
"margin_dpo/margin_mean": 1.5936262607574463,
|
|
"margin_dpo/margin_std": 2.4494309425354004,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8556311413454271,
|
|
"fcm_dpo/beta": 0.5436303615570068,
|
|
"fcm_dpo/delta": 0.09894217550754547,
|
|
"fcm_dpo/margin": 1.3781930208206177,
|
|
"fcm_dpo/q_t": 0.3776889443397522,
|
|
"grad_norm": 96.017578125,
|
|
"learning_rate": 3.1537655732553764e-08,
|
|
"logits/chosen": 1.9359018802642822,
|
|
"logits/rejected": 1.872054100036621,
|
|
"logps/chosen": -83.3899154663086,
|
|
"logps/ref_chosen": -88.0877914428711,
|
|
"logps/ref_rejected": -87.7589111328125,
|
|
"logps/rejected": -84.43922424316406,
|
|
"loss": 1.2347,
|
|
"margin_dpo/margin_mean": 1.3781930208206177,
|
|
"margin_dpo/margin_std": 2.8941593170166016,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"fcm_dpo/beta": 0.530328631401062,
|
|
"fcm_dpo/delta": -0.04391162469983101,
|
|
"fcm_dpo/margin": 1.6774555444717407,
|
|
"fcm_dpo/q_t": 0.33421647548675537,
|
|
"grad_norm": 78.94988250732422,
|
|
"learning_rate": 3.089785553471233e-08,
|
|
"logits/chosen": 1.9912657737731934,
|
|
"logits/rejected": 1.7774717807769775,
|
|
"logps/chosen": -65.12935638427734,
|
|
"logps/ref_chosen": -69.93267822265625,
|
|
"logps/ref_rejected": -95.71786499023438,
|
|
"logps/rejected": -92.59199523925781,
|
|
"loss": 0.9518,
|
|
"margin_dpo/margin_mean": 1.6774554252624512,
|
|
"margin_dpo/margin_std": 2.2273342609405518,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8586545729402872,
|
|
"fcm_dpo/beta": 0.5215575695037842,
|
|
"fcm_dpo/delta": -0.2450091391801834,
|
|
"fcm_dpo/margin": 2.045576572418213,
|
|
"fcm_dpo/q_t": 0.3052712678909302,
|
|
"grad_norm": 66.90857696533203,
|
|
"learning_rate": 3.026418409484513e-08,
|
|
"logits/chosen": 1.7025278806686401,
|
|
"logits/rejected": 1.5524958372116089,
|
|
"logps/chosen": -65.14755249023438,
|
|
"logps/ref_chosen": -70.33343505859375,
|
|
"logps/ref_rejected": -108.86271667480469,
|
|
"logps/rejected": -105.722412109375,
|
|
"loss": 0.8475,
|
|
"margin_dpo/margin_mean": 2.045576572418213,
|
|
"margin_dpo/margin_std": 2.2456419467926025,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8601662887377173,
|
|
"fcm_dpo/beta": 0.5055083632469177,
|
|
"fcm_dpo/delta": 0.0923711508512497,
|
|
"fcm_dpo/margin": 1.5074416399002075,
|
|
"fcm_dpo/q_t": 0.35584020614624023,
|
|
"grad_norm": 73.7436752319336,
|
|
"learning_rate": 2.963665913810451e-08,
|
|
"logits/chosen": 1.5560593605041504,
|
|
"logits/rejected": 1.5503093004226685,
|
|
"logps/chosen": -76.06471252441406,
|
|
"logps/ref_chosen": -80.85043334960938,
|
|
"logps/ref_rejected": -92.77810668945312,
|
|
"logps/rejected": -89.49983215332031,
|
|
"loss": 1.0816,
|
|
"margin_dpo/margin_mean": 1.5074411630630493,
|
|
"margin_dpo/margin_std": 2.4058260917663574,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.5042208433151245,
|
|
"fcm_dpo/delta": -0.2551186978816986,
|
|
"fcm_dpo/margin": 2.1350512504577637,
|
|
"fcm_dpo/q_t": 0.3052746653556824,
|
|
"grad_norm": 66.28195190429688,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 1.4883897304534912,
|
|
"logits/rejected": 1.3627548217773438,
|
|
"logps/chosen": -64.59855651855469,
|
|
"logps/ref_chosen": -69.94769287109375,
|
|
"logps/ref_rejected": -97.37059020996094,
|
|
"logps/rejected": -94.15650939941406,
|
|
"loss": 0.8679,
|
|
"margin_dpo/margin_mean": 2.1350512504577637,
|
|
"margin_dpo/margin_std": 2.4027867317199707,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8631897203325775,
|
|
"fcm_dpo/beta": 0.5050674080848694,
|
|
"fcm_dpo/delta": 0.22430172562599182,
|
|
"fcm_dpo/margin": 1.2679533958435059,
|
|
"fcm_dpo/q_t": 0.3824647068977356,
|
|
"grad_norm": 78.28890991210938,
|
|
"learning_rate": 2.840011871446962e-08,
|
|
"logits/chosen": 1.7317427396774292,
|
|
"logits/rejected": 1.6764953136444092,
|
|
"logps/chosen": -67.2318115234375,
|
|
"logps/ref_chosen": -72.28555297851562,
|
|
"logps/ref_rejected": -84.57748413085938,
|
|
"logps/rejected": -80.79170227050781,
|
|
"loss": 1.1642,
|
|
"margin_dpo/margin_mean": 1.2679531574249268,
|
|
"margin_dpo/margin_std": 2.5002126693725586,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8647014361300076,
|
|
"fcm_dpo/beta": 0.5231929421424866,
|
|
"fcm_dpo/delta": 0.15702974796295166,
|
|
"fcm_dpo/margin": 1.3459784984588623,
|
|
"fcm_dpo/q_t": 0.37420764565467834,
|
|
"grad_norm": 91.95054626464844,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 1.8538180589675903,
|
|
"logits/rejected": 1.9062275886535645,
|
|
"logps/chosen": -86.89059448242188,
|
|
"logps/ref_chosen": -91.4906997680664,
|
|
"logps/ref_rejected": -80.44602966308594,
|
|
"logps/rejected": -77.19190216064453,
|
|
"loss": 1.0802,
|
|
"margin_dpo/margin_mean": 1.3459784984588623,
|
|
"margin_dpo/margin_std": 2.314822196960449,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8662131519274376,
|
|
"fcm_dpo/beta": 0.5159151554107666,
|
|
"fcm_dpo/delta": -0.12967026233673096,
|
|
"fcm_dpo/margin": 1.872837781906128,
|
|
"fcm_dpo/q_t": 0.3495451807975769,
|
|
"grad_norm": 82.7747802734375,
|
|
"learning_rate": 2.718837261761528e-08,
|
|
"logits/chosen": 1.7055302858352661,
|
|
"logits/rejected": 1.623398780822754,
|
|
"logps/chosen": -82.83917236328125,
|
|
"logps/ref_chosen": -87.54232788085938,
|
|
"logps/ref_rejected": -104.32984924316406,
|
|
"logps/rejected": -101.49952697753906,
|
|
"loss": 1.0981,
|
|
"margin_dpo/margin_mean": 1.8728383779525757,
|
|
"margin_dpo/margin_std": 3.2378549575805664,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8677248677248677,
|
|
"fcm_dpo/beta": 0.5023356080055237,
|
|
"fcm_dpo/delta": -0.21158885955810547,
|
|
"fcm_dpo/margin": 2.0703470706939697,
|
|
"fcm_dpo/q_t": 0.31453508138656616,
|
|
"grad_norm": 68.75479888916016,
|
|
"learning_rate": 2.659183991914696e-08,
|
|
"logits/chosen": 2.0316388607025146,
|
|
"logits/rejected": 1.9578845500946045,
|
|
"logps/chosen": -70.46785736083984,
|
|
"logps/ref_chosen": -75.36632537841797,
|
|
"logps/ref_rejected": -103.27328491210938,
|
|
"logps/rejected": -100.4451675415039,
|
|
"loss": 0.8919,
|
|
"margin_dpo/margin_mean": 2.0703463554382324,
|
|
"margin_dpo/margin_std": 2.517101764678955,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.4883292615413666,
|
|
"fcm_dpo/delta": -0.051660750061273575,
|
|
"fcm_dpo/margin": 1.2661917209625244,
|
|
"fcm_dpo/q_t": 0.3973788917064667,
|
|
"grad_norm": 85.65643310546875,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 1.819608211517334,
|
|
"logits/rejected": 1.6805698871612549,
|
|
"logps/chosen": -76.97919464111328,
|
|
"logps/ref_chosen": -81.678466796875,
|
|
"logps/ref_rejected": -112.84233093261719,
|
|
"logps/rejected": -109.40924835205078,
|
|
"loss": 1.2291,
|
|
"margin_dpo/margin_mean": 1.266192078590393,
|
|
"margin_dpo/margin_std": 2.7936654090881348,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8707482993197279,
|
|
"fcm_dpo/beta": 0.46955758333206177,
|
|
"fcm_dpo/delta": -0.15395161509513855,
|
|
"fcm_dpo/margin": 2.08994722366333,
|
|
"fcm_dpo/q_t": 0.3229488730430603,
|
|
"grad_norm": 68.0239486694336,
|
|
"learning_rate": 2.5417538653170754e-08,
|
|
"logits/chosen": 2.0921566486358643,
|
|
"logits/rejected": 1.8919597864151,
|
|
"logps/chosen": -63.571937561035156,
|
|
"logps/ref_chosen": -68.78944396972656,
|
|
"logps/ref_rejected": -102.79037475585938,
|
|
"logps/rejected": -99.66281127929688,
|
|
"loss": 0.9436,
|
|
"margin_dpo/margin_mean": 2.089946746826172,
|
|
"margin_dpo/margin_std": 2.686835765838623,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.872260015117158,
|
|
"fcm_dpo/beta": 0.4906901717185974,
|
|
"fcm_dpo/delta": 0.2550051510334015,
|
|
"fcm_dpo/margin": 1.2452499866485596,
|
|
"fcm_dpo/q_t": 0.39396393299102783,
|
|
"grad_norm": 75.48487091064453,
|
|
"learning_rate": 2.4839802933393607e-08,
|
|
"logits/chosen": 1.5746073722839355,
|
|
"logits/rejected": 1.5519602298736572,
|
|
"logps/chosen": -74.94413757324219,
|
|
"logps/ref_chosen": -79.84675598144531,
|
|
"logps/ref_rejected": -84.08309936523438,
|
|
"logps/rejected": -80.42573547363281,
|
|
"loss": 1.1763,
|
|
"margin_dpo/margin_mean": 1.2452495098114014,
|
|
"margin_dpo/margin_std": 2.5598554611206055,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.873771730914588,
|
|
"fcm_dpo/beta": 0.5132976770401001,
|
|
"fcm_dpo/delta": 0.22512920200824738,
|
|
"fcm_dpo/margin": 1.2464478015899658,
|
|
"fcm_dpo/q_t": 0.3836420774459839,
|
|
"grad_norm": 75.34688568115234,
|
|
"learning_rate": 2.4268365428344733e-08,
|
|
"logits/chosen": 1.906113862991333,
|
|
"logits/rejected": 1.8371565341949463,
|
|
"logps/chosen": -69.94638061523438,
|
|
"logps/ref_chosen": -74.91357421875,
|
|
"logps/ref_rejected": -83.64881896972656,
|
|
"logps/rejected": -79.92807006835938,
|
|
"loss": 1.1037,
|
|
"margin_dpo/margin_mean": 1.2464478015899658,
|
|
"margin_dpo/margin_std": 2.216444492340088,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8752834467120182,
|
|
"fcm_dpo/beta": 0.5136229395866394,
|
|
"fcm_dpo/delta": -0.053459469228982925,
|
|
"fcm_dpo/margin": 1.747258186340332,
|
|
"fcm_dpo/q_t": 0.3330836296081543,
|
|
"grad_norm": 84.27195739746094,
|
|
"learning_rate": 2.3703242122359357e-08,
|
|
"logits/chosen": 1.5232186317443848,
|
|
"logits/rejected": 1.4639792442321777,
|
|
"logps/chosen": -70.65562438964844,
|
|
"logps/ref_chosen": -75.51022338867188,
|
|
"logps/ref_rejected": -84.83192443847656,
|
|
"logps/rejected": -81.72457885742188,
|
|
"loss": 1.0588,
|
|
"margin_dpo/margin_mean": 1.7472577095031738,
|
|
"margin_dpo/margin_std": 2.7469921112060547,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.5151132941246033,
|
|
"fcm_dpo/delta": -0.06574690341949463,
|
|
"fcm_dpo/margin": 1.7644736766815186,
|
|
"fcm_dpo/q_t": 0.33593645691871643,
|
|
"grad_norm": 64.35208892822266,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 1.6839550733566284,
|
|
"logits/rejected": 1.5548913478851318,
|
|
"logps/chosen": -71.26051330566406,
|
|
"logps/ref_chosen": -76.61564636230469,
|
|
"logps/ref_rejected": -97.09959411621094,
|
|
"logps/rejected": -93.50894165039062,
|
|
"loss": 1.0099,
|
|
"margin_dpo/margin_mean": 1.7644741535186768,
|
|
"margin_dpo/margin_std": 2.5570900440216064,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8783068783068783,
|
|
"fcm_dpo/beta": 0.505305290222168,
|
|
"fcm_dpo/delta": 0.011192187666893005,
|
|
"fcm_dpo/margin": 1.6590213775634766,
|
|
"fcm_dpo/q_t": 0.35038408637046814,
|
|
"grad_norm": 80.1307601928711,
|
|
"learning_rate": 2.259200116137039e-08,
|
|
"logits/chosen": 1.7930132150650024,
|
|
"logits/rejected": 1.6991674900054932,
|
|
"logps/chosen": -70.05377197265625,
|
|
"logps/ref_chosen": -74.8531265258789,
|
|
"logps/ref_rejected": -101.5344009399414,
|
|
"logps/rejected": -98.3940658569336,
|
|
"loss": 1.0409,
|
|
"margin_dpo/margin_mean": 1.6590216159820557,
|
|
"margin_dpo/margin_std": 2.571774959564209,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8798185941043084,
|
|
"fcm_dpo/beta": 0.5117372274398804,
|
|
"fcm_dpo/delta": 0.04091556370258331,
|
|
"fcm_dpo/margin": 1.5874507427215576,
|
|
"fcm_dpo/q_t": 0.35794246196746826,
|
|
"grad_norm": 78.47570037841797,
|
|
"learning_rate": 2.204591459016525e-08,
|
|
"logits/chosen": 1.5402312278747559,
|
|
"logits/rejected": 1.5932610034942627,
|
|
"logps/chosen": -76.41178894042969,
|
|
"logps/ref_chosen": -81.07638549804688,
|
|
"logps/ref_rejected": -72.83570861816406,
|
|
"logps/rejected": -69.75856018066406,
|
|
"loss": 1.0968,
|
|
"margin_dpo/margin_mean": 1.5874508619308472,
|
|
"margin_dpo/margin_std": 2.7462587356567383,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.8813303099017384,
|
|
"fcm_dpo/beta": 0.5294514298439026,
|
|
"fcm_dpo/delta": 0.18893937766551971,
|
|
"fcm_dpo/margin": 1.2722506523132324,
|
|
"fcm_dpo/q_t": 0.393571674823761,
|
|
"grad_norm": 94.36971282958984,
|
|
"learning_rate": 2.1506204384751064e-08,
|
|
"logits/chosen": 1.9707103967666626,
|
|
"logits/rejected": 1.7553215026855469,
|
|
"logps/chosen": -61.93994903564453,
|
|
"logps/ref_chosen": -66.78465270996094,
|
|
"logps/ref_rejected": -106.45825958251953,
|
|
"logps/rejected": -102.88580322265625,
|
|
"loss": 1.2479,
|
|
"margin_dpo/margin_mean": 1.2722513675689697,
|
|
"margin_dpo/margin_std": 2.820009708404541,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8828420256991686,
|
|
"fcm_dpo/beta": 0.534497857093811,
|
|
"fcm_dpo/delta": 0.05336347967386246,
|
|
"fcm_dpo/margin": 1.497223138809204,
|
|
"fcm_dpo/q_t": 0.3743545413017273,
|
|
"grad_norm": 78.58843231201172,
|
|
"learning_rate": 2.09728856419826e-08,
|
|
"logits/chosen": 1.9033856391906738,
|
|
"logits/rejected": 1.7374662160873413,
|
|
"logps/chosen": -55.71133041381836,
|
|
"logps/ref_chosen": -60.802913665771484,
|
|
"logps/ref_rejected": -99.45012664794922,
|
|
"logps/rejected": -95.85576629638672,
|
|
"loss": 1.1555,
|
|
"margin_dpo/margin_mean": 1.4972236156463623,
|
|
"margin_dpo/margin_std": 2.8621163368225098,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.5567061305046082,
|
|
"fcm_dpo/delta": 0.22613489627838135,
|
|
"fcm_dpo/margin": 1.1473885774612427,
|
|
"fcm_dpo/q_t": 0.3761305809020996,
|
|
"grad_norm": 84.38145446777344,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 1.534332513809204,
|
|
"logits/rejected": 1.4831292629241943,
|
|
"logps/chosen": -71.04457092285156,
|
|
"logps/ref_chosen": -75.92616271972656,
|
|
"logps/ref_rejected": -94.47601318359375,
|
|
"logps/rejected": -90.74182891845703,
|
|
"loss": 1.1613,
|
|
"margin_dpo/margin_mean": 1.1473884582519531,
|
|
"margin_dpo/margin_std": 2.2100892066955566,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8858654572940288,
|
|
"fcm_dpo/beta": 0.552130937576294,
|
|
"fcm_dpo/delta": -0.13436739146709442,
|
|
"fcm_dpo/margin": 1.7582968473434448,
|
|
"fcm_dpo/q_t": 0.3284263610839844,
|
|
"grad_norm": 361.6727600097656,
|
|
"learning_rate": 1.9925482037469187e-08,
|
|
"logits/chosen": 1.6084355115890503,
|
|
"logits/rejected": 1.513660192489624,
|
|
"logps/chosen": -63.508636474609375,
|
|
"logps/ref_chosen": -68.62062072753906,
|
|
"logps/ref_rejected": -81.98324584960938,
|
|
"logps/rejected": -78.62955474853516,
|
|
"loss": 0.9626,
|
|
"margin_dpo/margin_mean": 1.7582967281341553,
|
|
"margin_dpo/margin_std": 2.437290906906128,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8873771730914588,
|
|
"fcm_dpo/beta": 0.5419132709503174,
|
|
"fcm_dpo/delta": -0.14227743446826935,
|
|
"fcm_dpo/margin": 1.8054625988006592,
|
|
"fcm_dpo/q_t": 0.33169883489608765,
|
|
"grad_norm": 92.720947265625,
|
|
"learning_rate": 1.9411426473854687e-08,
|
|
"logits/chosen": 1.8379313945770264,
|
|
"logits/rejected": 1.8169505596160889,
|
|
"logps/chosen": -72.4877700805664,
|
|
"logps/ref_chosen": -77.67031860351562,
|
|
"logps/ref_rejected": -79.35327911376953,
|
|
"logps/rejected": -75.9761962890625,
|
|
"loss": 1.0136,
|
|
"margin_dpo/margin_mean": 1.8054628372192383,
|
|
"margin_dpo/margin_std": 2.6331286430358887,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"fcm_dpo/beta": 0.5268039703369141,
|
|
"fcm_dpo/delta": -0.051131971180438995,
|
|
"fcm_dpo/margin": 1.699599027633667,
|
|
"fcm_dpo/q_t": 0.32383257150650024,
|
|
"grad_norm": 76.1657485961914,
|
|
"learning_rate": 1.890382096832699e-08,
|
|
"logits/chosen": 1.8298721313476562,
|
|
"logits/rejected": 1.7479240894317627,
|
|
"logps/chosen": -73.06620788574219,
|
|
"logps/ref_chosen": -77.94320678710938,
|
|
"logps/ref_rejected": -98.41210174560547,
|
|
"logps/rejected": -95.23469543457031,
|
|
"loss": 0.9219,
|
|
"margin_dpo/margin_mean": 1.699598789215088,
|
|
"margin_dpo/margin_std": 2.1158838272094727,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.890400604686319,
|
|
"fcm_dpo/beta": 0.5218112468719482,
|
|
"fcm_dpo/delta": -0.0979146659374237,
|
|
"fcm_dpo/margin": 1.7990820407867432,
|
|
"fcm_dpo/q_t": 0.32140815258026123,
|
|
"grad_norm": 90.76171112060547,
|
|
"learning_rate": 1.840267971970344e-08,
|
|
"logits/chosen": 1.8079521656036377,
|
|
"logits/rejected": 1.7648406028747559,
|
|
"logps/chosen": -70.08314514160156,
|
|
"logps/ref_chosen": -75.18646240234375,
|
|
"logps/ref_rejected": -93.35910034179688,
|
|
"logps/rejected": -90.05486297607422,
|
|
"loss": 0.9824,
|
|
"margin_dpo/margin_mean": 1.799081563949585,
|
|
"margin_dpo/margin_std": 2.512829303741455,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.5154800415039062,
|
|
"fcm_dpo/delta": -0.06581678241491318,
|
|
"fcm_dpo/margin": 1.764974594116211,
|
|
"fcm_dpo/q_t": 0.3345610499382019,
|
|
"grad_norm": 78.90821075439453,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 1.7630910873413086,
|
|
"logits/rejected": 1.7047700881958008,
|
|
"logps/chosen": -82.21047973632812,
|
|
"logps/ref_chosen": -86.9908447265625,
|
|
"logps/ref_rejected": -100.61723327636719,
|
|
"logps/rejected": -97.60183715820312,
|
|
"loss": 0.9671,
|
|
"margin_dpo/margin_mean": 1.7649750709533691,
|
|
"margin_dpo/margin_std": 2.4630494117736816,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8934240362811792,
|
|
"fcm_dpo/beta": 0.49655839800834656,
|
|
"fcm_dpo/delta": -0.0999019667506218,
|
|
"fcm_dpo/margin": 1.8817025423049927,
|
|
"fcm_dpo/q_t": 0.32333502173423767,
|
|
"grad_norm": 71.44941711425781,
|
|
"learning_rate": 1.7419845883949098e-08,
|
|
"logits/chosen": 1.6200222969055176,
|
|
"logits/rejected": 1.5246787071228027,
|
|
"logps/chosen": -69.49836730957031,
|
|
"logps/ref_chosen": -74.85809326171875,
|
|
"logps/ref_rejected": -102.75840759277344,
|
|
"logps/rejected": -99.28038024902344,
|
|
"loss": 0.9459,
|
|
"margin_dpo/margin_mean": 1.8817024230957031,
|
|
"margin_dpo/margin_std": 2.3763575553894043,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.8949357520786092,
|
|
"fcm_dpo/beta": 0.4994744658470154,
|
|
"fcm_dpo/delta": -0.023017864674329758,
|
|
"fcm_dpo/margin": 1.743746042251587,
|
|
"fcm_dpo/q_t": 0.3495708703994751,
|
|
"grad_norm": 70.25218963623047,
|
|
"learning_rate": 1.6938180788793556e-08,
|
|
"logits/chosen": 1.5799570083618164,
|
|
"logits/rejected": 1.410193681716919,
|
|
"logps/chosen": -62.72476577758789,
|
|
"logps/ref_chosen": -67.90579223632812,
|
|
"logps/ref_rejected": -100.35234069824219,
|
|
"logps/rejected": -96.91506958007812,
|
|
"loss": 1.0063,
|
|
"margin_dpo/margin_mean": 1.7437461614608765,
|
|
"margin_dpo/margin_std": 2.631432056427002,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8964474678760394,
|
|
"fcm_dpo/beta": 0.5060767531394958,
|
|
"fcm_dpo/delta": 0.08997678756713867,
|
|
"fcm_dpo/margin": 1.5154685974121094,
|
|
"fcm_dpo/q_t": 0.36925971508026123,
|
|
"grad_norm": 77.16971588134766,
|
|
"learning_rate": 1.6463034933723336e-08,
|
|
"logits/chosen": 1.7553977966308594,
|
|
"logits/rejected": 1.5694162845611572,
|
|
"logps/chosen": -54.12544250488281,
|
|
"logps/ref_chosen": -59.29489517211914,
|
|
"logps/ref_rejected": -85.31307983398438,
|
|
"logps/rejected": -81.65910339355469,
|
|
"loss": 1.1547,
|
|
"margin_dpo/margin_mean": 1.5154688358306885,
|
|
"margin_dpo/margin_std": 2.83261775970459,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8979591836734694,
|
|
"fcm_dpo/beta": 0.5137929916381836,
|
|
"fcm_dpo/delta": 0.13700157403945923,
|
|
"fcm_dpo/margin": 1.4055750370025635,
|
|
"fcm_dpo/q_t": 0.37197840213775635,
|
|
"grad_norm": 74.884765625,
|
|
"learning_rate": 1.5994421609589385e-08,
|
|
"logits/chosen": 1.6110825538635254,
|
|
"logits/rejected": 1.5490773916244507,
|
|
"logps/chosen": -78.18447875976562,
|
|
"logps/ref_chosen": -83.14643859863281,
|
|
"logps/ref_rejected": -88.201904296875,
|
|
"logps/rejected": -84.64552307128906,
|
|
"loss": 1.0794,
|
|
"margin_dpo/margin_mean": 1.4055747985839844,
|
|
"margin_dpo/margin_std": 2.3255198001861572,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.5185683965682983,
|
|
"fcm_dpo/delta": -0.08683624863624573,
|
|
"fcm_dpo/margin": 1.790905237197876,
|
|
"fcm_dpo/q_t": 0.334526926279068,
|
|
"grad_norm": 88.72738647460938,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 2.002946615219116,
|
|
"logits/rejected": 1.8320910930633545,
|
|
"logps/chosen": -65.44224548339844,
|
|
"logps/ref_chosen": -70.40016174316406,
|
|
"logps/ref_rejected": -103.95550537109375,
|
|
"logps/rejected": -100.78850555419922,
|
|
"loss": 1.111,
|
|
"margin_dpo/margin_mean": 1.7909047603607178,
|
|
"margin_dpo/margin_std": 3.0485243797302246,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9009826152683296,
|
|
"fcm_dpo/beta": 0.5402435064315796,
|
|
"fcm_dpo/delta": 0.34699779748916626,
|
|
"fcm_dpo/margin": 0.9653339982032776,
|
|
"fcm_dpo/q_t": 0.42396220564842224,
|
|
"grad_norm": 86.49633026123047,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": 1.4556769132614136,
|
|
"logits/rejected": 1.4827091693878174,
|
|
"logps/chosen": -81.25218963623047,
|
|
"logps/ref_chosen": -86.083740234375,
|
|
"logps/ref_rejected": -78.41991424560547,
|
|
"logps/rejected": -74.55369567871094,
|
|
"loss": 1.2796,
|
|
"margin_dpo/margin_mean": 0.965334415435791,
|
|
"margin_dpo/margin_std": 2.5436482429504395,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.9024943310657596,
|
|
"fcm_dpo/beta": 0.5435788631439209,
|
|
"fcm_dpo/delta": -0.0711166262626648,
|
|
"fcm_dpo/margin": 1.682100534439087,
|
|
"fcm_dpo/q_t": 0.3363920748233795,
|
|
"grad_norm": 70.85747528076172,
|
|
"learning_rate": 1.4627906988186111e-08,
|
|
"logits/chosen": 1.7241406440734863,
|
|
"logits/rejected": 1.6851555109024048,
|
|
"logps/chosen": -62.60491943359375,
|
|
"logps/ref_chosen": -67.8086166381836,
|
|
"logps/ref_rejected": -71.09245300292969,
|
|
"logps/rejected": -67.57085418701172,
|
|
"loss": 0.9945,
|
|
"margin_dpo/margin_mean": 1.6821008920669556,
|
|
"margin_dpo/margin_std": 2.4386579990386963,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.9040060468631897,
|
|
"fcm_dpo/beta": 0.5551949739456177,
|
|
"fcm_dpo/delta": 0.10281100124120712,
|
|
"fcm_dpo/margin": 1.3566747903823853,
|
|
"fcm_dpo/q_t": 0.3706873059272766,
|
|
"grad_norm": 86.94221496582031,
|
|
"learning_rate": 1.4185553036259095e-08,
|
|
"logits/chosen": 1.848606824874878,
|
|
"logits/rejected": 1.704300880432129,
|
|
"logps/chosen": -69.71743774414062,
|
|
"logps/ref_chosen": -74.31095886230469,
|
|
"logps/ref_rejected": -98.08122253417969,
|
|
"logps/rejected": -94.84437561035156,
|
|
"loss": 1.1446,
|
|
"margin_dpo/margin_mean": 1.3566749095916748,
|
|
"margin_dpo/margin_std": 2.557732343673706,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.9055177626606198,
|
|
"fcm_dpo/beta": 0.555027961730957,
|
|
"fcm_dpo/delta": 0.007491939701139927,
|
|
"fcm_dpo/margin": 1.5190777778625488,
|
|
"fcm_dpo/q_t": 0.3532414436340332,
|
|
"grad_norm": 84.7350082397461,
|
|
"learning_rate": 1.3749795321332885e-08,
|
|
"logits/chosen": 1.6712158918380737,
|
|
"logits/rejected": 1.6012566089630127,
|
|
"logps/chosen": -69.50666809082031,
|
|
"logps/ref_chosen": -74.21861267089844,
|
|
"logps/ref_rejected": -90.1492919921875,
|
|
"logps/rejected": -86.9564208984375,
|
|
"loss": 1.0617,
|
|
"margin_dpo/margin_mean": 1.5190777778625488,
|
|
"margin_dpo/margin_std": 2.5156443119049072,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.5707370042800903,
|
|
"fcm_dpo/delta": 0.1365622580051422,
|
|
"fcm_dpo/margin": 1.2634763717651367,
|
|
"fcm_dpo/q_t": 0.3676835894584656,
|
|
"grad_norm": 82.2341079711914,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 1.9393136501312256,
|
|
"logits/rejected": 1.793006181716919,
|
|
"logps/chosen": -74.63346099853516,
|
|
"logps/ref_chosen": -79.34190368652344,
|
|
"logps/ref_rejected": -97.0519790649414,
|
|
"logps/rejected": -93.60700988769531,
|
|
"loss": 1.1017,
|
|
"margin_dpo/margin_mean": 1.2634763717651367,
|
|
"margin_dpo/margin_std": 2.209320068359375,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.5703136920928955,
|
|
"eval_logits/chosen": 1.713449239730835,
|
|
"eval_logits/rejected": 1.6065305471420288,
|
|
"eval_logps/chosen": -82.01586151123047,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -93.35726928710938,
|
|
"eval_loss": 0.5468015074729919,
|
|
"eval_margin_dpo/margin_mean": 1.546818733215332,
|
|
"eval_margin_dpo/margin_std": 2.597526788711548,
|
|
"eval_runtime": 42.2813,
|
|
"eval_samples_per_second": 54.469,
|
|
"eval_steps_per_second": 1.703,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.90854119425548,
|
|
"fcm_dpo/beta": 0.5453665256500244,
|
|
"fcm_dpo/delta": -0.22268065810203552,
|
|
"fcm_dpo/margin": 1.9149441719055176,
|
|
"fcm_dpo/q_t": 0.3091183304786682,
|
|
"grad_norm": 69.08990478515625,
|
|
"learning_rate": 1.2898117173950868e-08,
|
|
"logits/chosen": 1.5800609588623047,
|
|
"logits/rejected": 1.451780080795288,
|
|
"logps/chosen": -66.61608123779297,
|
|
"logps/ref_chosen": -72.06497192382812,
|
|
"logps/ref_rejected": -97.60928344726562,
|
|
"logps/rejected": -94.0753402709961,
|
|
"loss": 0.8982,
|
|
"margin_dpo/margin_mean": 1.9149435758590698,
|
|
"margin_dpo/margin_std": 2.311171531677246,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.91005291005291,
|
|
"fcm_dpo/beta": 0.5353580713272095,
|
|
"fcm_dpo/delta": -0.16216537356376648,
|
|
"fcm_dpo/margin": 1.8609248399734497,
|
|
"fcm_dpo/q_t": 0.3207598924636841,
|
|
"grad_norm": 69.55952453613281,
|
|
"learning_rate": 1.2482220564763667e-08,
|
|
"logits/chosen": 1.7727607488632202,
|
|
"logits/rejected": 1.6953372955322266,
|
|
"logps/chosen": -72.2029800415039,
|
|
"logps/ref_chosen": -77.80416870117188,
|
|
"logps/ref_rejected": -89.05026245117188,
|
|
"logps/rejected": -85.30999755859375,
|
|
"loss": 0.9179,
|
|
"margin_dpo/margin_mean": 1.8609247207641602,
|
|
"margin_dpo/margin_std": 2.3164451122283936,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.9115646258503401,
|
|
"fcm_dpo/beta": 0.5281209945678711,
|
|
"fcm_dpo/delta": 0.02333132177591324,
|
|
"fcm_dpo/margin": 1.5686895847320557,
|
|
"fcm_dpo/q_t": 0.34824633598327637,
|
|
"grad_norm": 69.28165435791016,
|
|
"learning_rate": 1.2072967838448051e-08,
|
|
"logits/chosen": 1.318486213684082,
|
|
"logits/rejected": 1.2564785480499268,
|
|
"logps/chosen": -63.34666442871094,
|
|
"logps/ref_chosen": -68.30155944824219,
|
|
"logps/ref_rejected": -90.542724609375,
|
|
"logps/rejected": -87.15652465820312,
|
|
"loss": 1.0188,
|
|
"margin_dpo/margin_mean": 1.5686898231506348,
|
|
"margin_dpo/margin_std": 2.38751220703125,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.9130763416477702,
|
|
"fcm_dpo/beta": 0.5328730344772339,
|
|
"fcm_dpo/delta": 0.055717065930366516,
|
|
"fcm_dpo/margin": 1.4987510442733765,
|
|
"fcm_dpo/q_t": 0.3579341769218445,
|
|
"grad_norm": 76.82192993164062,
|
|
"learning_rate": 1.1670370442682459e-08,
|
|
"logits/chosen": 1.7707507610321045,
|
|
"logits/rejected": 1.7423770427703857,
|
|
"logps/chosen": -85.27188110351562,
|
|
"logps/ref_chosen": -90.55952453613281,
|
|
"logps/ref_rejected": -84.6327133178711,
|
|
"logps/rejected": -80.84382629394531,
|
|
"loss": 1.1278,
|
|
"margin_dpo/margin_mean": 1.4987506866455078,
|
|
"margin_dpo/margin_std": 2.708588123321533,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.5490473508834839,
|
|
"fcm_dpo/delta": 0.14943906664848328,
|
|
"fcm_dpo/margin": 1.2950233221054077,
|
|
"fcm_dpo/q_t": 0.3697357177734375,
|
|
"grad_norm": 135.77040100097656,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 1.8290135860443115,
|
|
"logits/rejected": 1.7359825372695923,
|
|
"logps/chosen": -75.59449768066406,
|
|
"logps/ref_chosen": -80.26661682128906,
|
|
"logps/ref_rejected": -100.26485443115234,
|
|
"logps/rejected": -96.88775634765625,
|
|
"loss": 1.1026,
|
|
"margin_dpo/margin_mean": 1.295022964477539,
|
|
"margin_dpo/margin_std": 2.2529892921447754,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9160997732426304,
|
|
"fcm_dpo/beta": 0.5741589069366455,
|
|
"fcm_dpo/delta": 0.18281231820583344,
|
|
"fcm_dpo/margin": 1.1763060092926025,
|
|
"fcm_dpo/q_t": 0.385870099067688,
|
|
"grad_norm": 90.35079956054688,
|
|
"learning_rate": 1.0885186502381016e-08,
|
|
"logits/chosen": 1.9423694610595703,
|
|
"logits/rejected": 1.8221031427383423,
|
|
"logps/chosen": -65.81397247314453,
|
|
"logps/ref_chosen": -70.73554229736328,
|
|
"logps/ref_rejected": -95.9410400390625,
|
|
"logps/rejected": -92.19577026367188,
|
|
"loss": 1.1632,
|
|
"margin_dpo/margin_mean": 1.1763060092926025,
|
|
"margin_dpo/margin_std": 2.2583560943603516,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.9176114890400605,
|
|
"fcm_dpo/beta": 0.564205527305603,
|
|
"fcm_dpo/delta": -0.014900833368301392,
|
|
"fcm_dpo/margin": 1.5237514972686768,
|
|
"fcm_dpo/q_t": 0.34097421169281006,
|
|
"grad_norm": 91.87316131591797,
|
|
"learning_rate": 1.0502621921127774e-08,
|
|
"logits/chosen": 1.543046474456787,
|
|
"logits/rejected": 1.481963038444519,
|
|
"logps/chosen": -76.32693481445312,
|
|
"logps/ref_chosen": -81.26203918457031,
|
|
"logps/ref_rejected": -92.71575927734375,
|
|
"logps/rejected": -89.30439758300781,
|
|
"loss": 1.0182,
|
|
"margin_dpo/margin_mean": 1.5237513780593872,
|
|
"margin_dpo/margin_std": 2.2329049110412598,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.9191232048374905,
|
|
"fcm_dpo/beta": 0.580283522605896,
|
|
"fcm_dpo/delta": 0.084572434425354,
|
|
"fcm_dpo/margin": 1.3297598361968994,
|
|
"fcm_dpo/q_t": 0.3788830041885376,
|
|
"grad_norm": 99.93155670166016,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 1.7759461402893066,
|
|
"logits/rejected": 1.5685594081878662,
|
|
"logps/chosen": -78.00862121582031,
|
|
"logps/ref_chosen": -82.6530990600586,
|
|
"logps/ref_rejected": -110.64334106445312,
|
|
"logps/rejected": -107.32861328125,
|
|
"loss": 1.148,
|
|
"margin_dpo/margin_mean": 1.3297593593597412,
|
|
"margin_dpo/margin_std": 2.623138904571533,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"fcm_dpo/beta": 0.5636272430419922,
|
|
"fcm_dpo/delta": -0.10882381349802017,
|
|
"fcm_dpo/margin": 1.6727293729782104,
|
|
"fcm_dpo/q_t": 0.3256949484348297,
|
|
"grad_norm": 68.13907623291016,
|
|
"learning_rate": 9.757601041885694e-09,
|
|
"logits/chosen": 2.040724039077759,
|
|
"logits/rejected": 1.932198405265808,
|
|
"logps/chosen": -62.85011291503906,
|
|
"logps/ref_chosen": -68.20232391357422,
|
|
"logps/ref_rejected": -81.90515899658203,
|
|
"logps/rejected": -78.22567749023438,
|
|
"loss": 0.9724,
|
|
"margin_dpo/margin_mean": 1.6727293729782104,
|
|
"margin_dpo/margin_std": 2.256746292114258,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.5605812668800354,
|
|
"fcm_dpo/delta": -0.08716221898794174,
|
|
"fcm_dpo/margin": 1.6569585800170898,
|
|
"fcm_dpo/q_t": 0.348406046628952,
|
|
"grad_norm": 108.67435455322266,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 1.8048813343048096,
|
|
"logits/rejected": 1.7305305004119873,
|
|
"logps/chosen": -93.95142364501953,
|
|
"logps/ref_chosen": -99.01324462890625,
|
|
"logps/ref_rejected": -102.26054382324219,
|
|
"logps/rejected": -98.85566711425781,
|
|
"loss": 1.101,
|
|
"margin_dpo/margin_mean": 1.656959056854248,
|
|
"margin_dpo/margin_std": 2.800694704055786,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9236583522297808,
|
|
"fcm_dpo/beta": 0.5618192553520203,
|
|
"fcm_dpo/delta": 0.09524710476398468,
|
|
"fcm_dpo/margin": 1.3548386096954346,
|
|
"fcm_dpo/q_t": 0.3647194802761078,
|
|
"grad_norm": 79.89419555664062,
|
|
"learning_rate": 9.03946036001449e-09,
|
|
"logits/chosen": 1.958254098892212,
|
|
"logits/rejected": 1.8665025234222412,
|
|
"logps/chosen": -61.16302490234375,
|
|
"logps/ref_chosen": -66.36254119873047,
|
|
"logps/ref_rejected": -88.74557495117188,
|
|
"logps/rejected": -84.90089416503906,
|
|
"loss": 1.1055,
|
|
"margin_dpo/margin_mean": 1.3548383712768555,
|
|
"margin_dpo/margin_std": 2.3446433544158936,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.9251700680272109,
|
|
"fcm_dpo/beta": 0.5615559816360474,
|
|
"fcm_dpo/delta": -0.13578204810619354,
|
|
"fcm_dpo/margin": 1.7321219444274902,
|
|
"fcm_dpo/q_t": 0.3264394700527191,
|
|
"grad_norm": 77.33811950683594,
|
|
"learning_rate": 8.690495320571839e-09,
|
|
"logits/chosen": 1.6673080921173096,
|
|
"logits/rejected": 1.5355725288391113,
|
|
"logps/chosen": -73.73802185058594,
|
|
"logps/ref_chosen": -78.6339111328125,
|
|
"logps/ref_rejected": -108.34969329833984,
|
|
"logps/rejected": -105.18592834472656,
|
|
"loss": 1.0014,
|
|
"margin_dpo/margin_mean": 1.7321220636367798,
|
|
"margin_dpo/margin_std": 2.5373339653015137,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.926681783824641,
|
|
"fcm_dpo/beta": 0.5403913259506226,
|
|
"fcm_dpo/delta": -0.22849300503730774,
|
|
"fcm_dpo/margin": 1.9520108699798584,
|
|
"fcm_dpo/q_t": 0.31533053517341614,
|
|
"grad_norm": 84.33403778076172,
|
|
"learning_rate": 8.348280226706722e-09,
|
|
"logits/chosen": 1.7047350406646729,
|
|
"logits/rejected": 1.6978881359100342,
|
|
"logps/chosen": -68.17232513427734,
|
|
"logps/ref_chosen": -73.3539047241211,
|
|
"logps/ref_rejected": -76.91837310791016,
|
|
"logps/rejected": -73.68879699707031,
|
|
"loss": 0.9682,
|
|
"margin_dpo/margin_mean": 1.9520103931427002,
|
|
"margin_dpo/margin_std": 2.7025699615478516,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9281934996220711,
|
|
"fcm_dpo/beta": 0.5397230386734009,
|
|
"fcm_dpo/delta": 0.055488236248493195,
|
|
"fcm_dpo/margin": 1.4754180908203125,
|
|
"fcm_dpo/q_t": 0.3497070074081421,
|
|
"grad_norm": 78.92454528808594,
|
|
"learning_rate": 8.012824650910937e-09,
|
|
"logits/chosen": 1.951556921005249,
|
|
"logits/rejected": 1.9331920146942139,
|
|
"logps/chosen": -73.11341857910156,
|
|
"logps/ref_chosen": -77.80007934570312,
|
|
"logps/ref_rejected": -89.05572509765625,
|
|
"logps/rejected": -85.84449005126953,
|
|
"loss": 1.008,
|
|
"margin_dpo/margin_mean": 1.475417971611023,
|
|
"margin_dpo/margin_std": 2.1356678009033203,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.5249578952789307,
|
|
"fcm_dpo/delta": -0.0973423644900322,
|
|
"fcm_dpo/margin": 1.7850191593170166,
|
|
"fcm_dpo/q_t": 0.3419983386993408,
|
|
"grad_norm": 94.88264465332031,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 1.7977044582366943,
|
|
"logits/rejected": 1.711987018585205,
|
|
"logps/chosen": -85.27580261230469,
|
|
"logps/ref_chosen": -90.06971740722656,
|
|
"logps/ref_rejected": -118.7764892578125,
|
|
"logps/rejected": -115.76759338378906,
|
|
"loss": 1.0349,
|
|
"margin_dpo/margin_mean": 1.785017967224121,
|
|
"margin_dpo/margin_std": 2.7509849071502686,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9312169312169312,
|
|
"fcm_dpo/beta": 0.5319191813468933,
|
|
"fcm_dpo/delta": 0.14790388941764832,
|
|
"fcm_dpo/margin": 1.3388638496398926,
|
|
"fcm_dpo/q_t": 0.3731805682182312,
|
|
"grad_norm": 75.28299713134766,
|
|
"learning_rate": 7.36222939784098e-09,
|
|
"logits/chosen": 1.7395646572113037,
|
|
"logits/rejected": 1.5876948833465576,
|
|
"logps/chosen": -69.80252838134766,
|
|
"logps/ref_chosen": -74.62954711914062,
|
|
"logps/ref_rejected": -93.655029296875,
|
|
"logps/rejected": -90.1668701171875,
|
|
"loss": 1.1017,
|
|
"margin_dpo/margin_mean": 1.3388640880584717,
|
|
"margin_dpo/margin_std": 2.321733236312866,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9327286470143613,
|
|
"fcm_dpo/beta": 0.5505938529968262,
|
|
"fcm_dpo/delta": 0.20431050658226013,
|
|
"fcm_dpo/margin": 1.1955896615982056,
|
|
"fcm_dpo/q_t": 0.3828273117542267,
|
|
"grad_norm": 90.21337890625,
|
|
"learning_rate": 7.047107919114586e-09,
|
|
"logits/chosen": 1.8987562656402588,
|
|
"logits/rejected": 1.8017901182174683,
|
|
"logps/chosen": -71.38261413574219,
|
|
"logps/ref_chosen": -75.98182678222656,
|
|
"logps/ref_rejected": -97.1640625,
|
|
"logps/rejected": -93.76043701171875,
|
|
"loss": 1.1641,
|
|
"margin_dpo/margin_mean": 1.1955888271331787,
|
|
"margin_dpo/margin_std": 2.333718776702881,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9342403628117913,
|
|
"fcm_dpo/beta": 0.5518548488616943,
|
|
"fcm_dpo/delta": -0.06153812259435654,
|
|
"fcm_dpo/margin": 1.6353678703308105,
|
|
"fcm_dpo/q_t": 0.3478153347969055,
|
|
"grad_norm": 88.34520721435547,
|
|
"learning_rate": 6.738782355044048e-09,
|
|
"logits/chosen": 1.519637107849121,
|
|
"logits/rejected": 1.3564965724945068,
|
|
"logps/chosen": -69.31785583496094,
|
|
"logps/ref_chosen": -74.47208404541016,
|
|
"logps/ref_rejected": -107.09980773925781,
|
|
"logps/rejected": -103.5809555053711,
|
|
"loss": 1.0393,
|
|
"margin_dpo/margin_mean": 1.6353681087493896,
|
|
"margin_dpo/margin_std": 2.6017632484436035,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.9357520786092215,
|
|
"fcm_dpo/beta": 0.5464029312133789,
|
|
"fcm_dpo/delta": -0.044458553194999695,
|
|
"fcm_dpo/margin": 1.623572826385498,
|
|
"fcm_dpo/q_t": 0.3364899456501007,
|
|
"grad_norm": 84.14434814453125,
|
|
"learning_rate": 6.437261330158206e-09,
|
|
"logits/chosen": 1.9933700561523438,
|
|
"logits/rejected": 1.82912278175354,
|
|
"logps/chosen": -65.78286743164062,
|
|
"logps/ref_chosen": -70.84220886230469,
|
|
"logps/ref_rejected": -98.07801818847656,
|
|
"logps/rejected": -94.64225006103516,
|
|
"loss": 0.9841,
|
|
"margin_dpo/margin_mean": 1.6235718727111816,
|
|
"margin_dpo/margin_std": 2.256570339202881,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.5447486042976379,
|
|
"fcm_dpo/delta": -0.13042320311069489,
|
|
"fcm_dpo/margin": 1.1954697370529175,
|
|
"fcm_dpo/q_t": 0.3794524669647217,
|
|
"grad_norm": 106.2513198852539,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 1.6421568393707275,
|
|
"logits/rejected": 1.6440951824188232,
|
|
"logps/chosen": -71.89732360839844,
|
|
"logps/ref_chosen": -76.93606567382812,
|
|
"logps/ref_rejected": -81.28453063964844,
|
|
"logps/rejected": -77.44125366210938,
|
|
"loss": 1.2403,
|
|
"margin_dpo/margin_mean": 1.1954690217971802,
|
|
"margin_dpo/margin_std": 2.530783176422119,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9387755102040817,
|
|
"fcm_dpo/beta": 0.5438255667686462,
|
|
"fcm_dpo/delta": -0.037219464778900146,
|
|
"fcm_dpo/margin": 1.616603136062622,
|
|
"fcm_dpo/q_t": 0.36209145188331604,
|
|
"grad_norm": 80.9903335571289,
|
|
"learning_rate": 5.854666444131934e-09,
|
|
"logits/chosen": 1.8847250938415527,
|
|
"logits/rejected": 1.6826179027557373,
|
|
"logps/chosen": -64.96229553222656,
|
|
"logps/ref_chosen": -69.87464904785156,
|
|
"logps/ref_rejected": -105.61328887939453,
|
|
"logps/rejected": -102.31753540039062,
|
|
"loss": 1.134,
|
|
"margin_dpo/margin_mean": 1.616602897644043,
|
|
"margin_dpo/margin_std": 2.893141269683838,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9402872260015117,
|
|
"fcm_dpo/beta": 0.5321084260940552,
|
|
"fcm_dpo/delta": -0.040384843945503235,
|
|
"fcm_dpo/margin": 1.6664624214172363,
|
|
"fcm_dpo/q_t": 0.3371589183807373,
|
|
"grad_norm": 73.74857330322266,
|
|
"learning_rate": 5.573608879422875e-09,
|
|
"logits/chosen": 1.545579433441162,
|
|
"logits/rejected": 1.4461474418640137,
|
|
"logps/chosen": -73.89500427246094,
|
|
"logps/ref_chosen": -78.9598388671875,
|
|
"logps/ref_rejected": -97.90648651123047,
|
|
"logps/rejected": -94.50811767578125,
|
|
"loss": 0.9599,
|
|
"margin_dpo/margin_mean": 1.6664619445800781,
|
|
"margin_dpo/margin_std": 2.2627735137939453,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9417989417989417,
|
|
"fcm_dpo/beta": 0.5304499864578247,
|
|
"fcm_dpo/delta": -0.01973407343029976,
|
|
"fcm_dpo/margin": 1.63567316532135,
|
|
"fcm_dpo/q_t": 0.34609299898147583,
|
|
"grad_norm": 77.88653564453125,
|
|
"learning_rate": 5.299388446305342e-09,
|
|
"logits/chosen": 1.8903741836547852,
|
|
"logits/rejected": 1.7817987203598022,
|
|
"logps/chosen": -78.66651916503906,
|
|
"logps/ref_chosen": -83.22647094726562,
|
|
"logps/ref_rejected": -105.1362533569336,
|
|
"logps/rejected": -102.21197509765625,
|
|
"loss": 1.0052,
|
|
"margin_dpo/margin_mean": 1.6356725692749023,
|
|
"margin_dpo/margin_std": 2.4788856506347656,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9433106575963719,
|
|
"fcm_dpo/beta": 0.5237078070640564,
|
|
"fcm_dpo/delta": -0.0306610856205225,
|
|
"fcm_dpo/margin": 1.6761407852172852,
|
|
"fcm_dpo/q_t": 0.3443066477775574,
|
|
"grad_norm": 80.54615783691406,
|
|
"learning_rate": 5.03201281531429e-09,
|
|
"logits/chosen": 1.5723378658294678,
|
|
"logits/rejected": 1.3790596723556519,
|
|
"logps/chosen": -60.83345031738281,
|
|
"logps/ref_chosen": -66.10560607910156,
|
|
"logps/ref_rejected": -91.66778564453125,
|
|
"logps/rejected": -88.07176208496094,
|
|
"loss": 1.0358,
|
|
"margin_dpo/margin_mean": 1.6761407852172852,
|
|
"margin_dpo/margin_std": 2.5644569396972656,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.5492905378341675,
|
|
"fcm_dpo/delta": 0.27562734484672546,
|
|
"fcm_dpo/margin": 1.0710108280181885,
|
|
"fcm_dpo/q_t": 0.3919370174407959,
|
|
"grad_norm": 82.9334945678711,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 1.9585500955581665,
|
|
"logits/rejected": 1.806025743484497,
|
|
"logps/chosen": -68.38748168945312,
|
|
"logps/ref_chosen": -73.20295715332031,
|
|
"logps/ref_rejected": -105.31025695800781,
|
|
"logps/rejected": -101.5657958984375,
|
|
"loss": 1.2049,
|
|
"margin_dpo/margin_mean": 1.0710105895996094,
|
|
"margin_dpo/margin_std": 2.273772716522217,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9463340891912321,
|
|
"fcm_dpo/beta": 0.5458341836929321,
|
|
"fcm_dpo/delta": -0.1310361623764038,
|
|
"fcm_dpo/margin": 1.7742984294891357,
|
|
"fcm_dpo/q_t": 0.3442504405975342,
|
|
"grad_norm": 82.92687225341797,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 1.956076979637146,
|
|
"logits/rejected": 1.7062575817108154,
|
|
"logps/chosen": -57.01714324951172,
|
|
"logps/ref_chosen": -62.181278228759766,
|
|
"logps/ref_rejected": -108.17747497558594,
|
|
"logps/rejected": -104.78764343261719,
|
|
"loss": 1.0583,
|
|
"margin_dpo/margin_mean": 1.7742981910705566,
|
|
"margin_dpo/margin_std": 2.8389077186584473,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.9478458049886621,
|
|
"fcm_dpo/beta": 0.5245725512504578,
|
|
"fcm_dpo/delta": -0.16692830622196198,
|
|
"fcm_dpo/margin": 1.9054150581359863,
|
|
"fcm_dpo/q_t": 0.3315790891647339,
|
|
"grad_norm": 84.73611450195312,
|
|
"learning_rate": 4.271028567242818e-09,
|
|
"logits/chosen": 1.9087677001953125,
|
|
"logits/rejected": 1.6444659233093262,
|
|
"logps/chosen": -72.71656799316406,
|
|
"logps/ref_chosen": -77.72123718261719,
|
|
"logps/ref_rejected": -114.40547180175781,
|
|
"logps/rejected": -111.30622100830078,
|
|
"loss": 0.926,
|
|
"margin_dpo/margin_mean": 1.9054151773452759,
|
|
"margin_dpo/margin_std": 2.5662083625793457,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.9493575207860923,
|
|
"fcm_dpo/beta": 0.5171005129814148,
|
|
"fcm_dpo/delta": -0.15388283133506775,
|
|
"fcm_dpo/margin": 1.9088798761367798,
|
|
"fcm_dpo/q_t": 0.32542526721954346,
|
|
"grad_norm": 82.72279357910156,
|
|
"learning_rate": 4.0311050177251895e-09,
|
|
"logits/chosen": 1.9638164043426514,
|
|
"logits/rejected": 1.9315705299377441,
|
|
"logps/chosen": -65.36286926269531,
|
|
"logps/ref_chosen": -70.71195983886719,
|
|
"logps/ref_rejected": -93.85909271240234,
|
|
"logps/rejected": -90.41887664794922,
|
|
"loss": 1.0614,
|
|
"margin_dpo/margin_mean": 1.9088804721832275,
|
|
"margin_dpo/margin_std": 2.9062271118164062,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9508692365835223,
|
|
"fcm_dpo/beta": 0.508413553237915,
|
|
"fcm_dpo/delta": -0.01258166879415512,
|
|
"fcm_dpo/margin": 1.6929104328155518,
|
|
"fcm_dpo/q_t": 0.34355059266090393,
|
|
"grad_norm": 75.12218475341797,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": 1.9230015277862549,
|
|
"logits/rejected": 1.86118745803833,
|
|
"logps/chosen": -83.1578369140625,
|
|
"logps/ref_chosen": -88.66283416748047,
|
|
"logps/ref_rejected": -94.67845153808594,
|
|
"logps/rejected": -90.86636352539062,
|
|
"loss": 0.9851,
|
|
"margin_dpo/margin_mean": 1.692910075187683,
|
|
"margin_dpo/margin_std": 2.403411865234375,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.49728578329086304,
|
|
"fcm_dpo/delta": -0.13409729301929474,
|
|
"fcm_dpo/margin": 1.9530057907104492,
|
|
"fcm_dpo/q_t": 0.32737135887145996,
|
|
"grad_norm": 62.6065559387207,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 1.497613787651062,
|
|
"logits/rejected": 1.3795734643936157,
|
|
"logps/chosen": -67.59822845458984,
|
|
"logps/ref_chosen": -72.94979858398438,
|
|
"logps/ref_rejected": -92.7632827758789,
|
|
"logps/rejected": -89.36473083496094,
|
|
"loss": 0.9097,
|
|
"margin_dpo/margin_mean": 1.953005313873291,
|
|
"margin_dpo/margin_std": 2.455533742904663,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9538926681783825,
|
|
"fcm_dpo/beta": 0.4697021245956421,
|
|
"fcm_dpo/delta": -0.1924063116312027,
|
|
"fcm_dpo/margin": 2.1605029106140137,
|
|
"fcm_dpo/q_t": 0.3222210109233856,
|
|
"grad_norm": 64.52576446533203,
|
|
"learning_rate": 3.352641923861144e-09,
|
|
"logits/chosen": 1.821082353591919,
|
|
"logits/rejected": 1.5979212522506714,
|
|
"logps/chosen": -73.60709381103516,
|
|
"logps/ref_chosen": -78.58656311035156,
|
|
"logps/ref_rejected": -115.38685607910156,
|
|
"logps/rejected": -112.56788635253906,
|
|
"loss": 0.9299,
|
|
"margin_dpo/margin_mean": 2.160503387451172,
|
|
"margin_dpo/margin_std": 2.728344440460205,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9554043839758125,
|
|
"fcm_dpo/beta": 0.4686143100261688,
|
|
"fcm_dpo/delta": -0.03186669945716858,
|
|
"fcm_dpo/margin": 1.8749642372131348,
|
|
"fcm_dpo/q_t": 0.328419029712677,
|
|
"grad_norm": 65.45675659179688,
|
|
"learning_rate": 3.140277830901428e-09,
|
|
"logits/chosen": 1.9738693237304688,
|
|
"logits/rejected": 1.889809250831604,
|
|
"logps/chosen": -70.00298309326172,
|
|
"logps/ref_chosen": -75.24861907958984,
|
|
"logps/ref_rejected": -82.98665618896484,
|
|
"logps/rejected": -79.6159896850586,
|
|
"loss": 0.9563,
|
|
"margin_dpo/margin_mean": 1.8749642372131348,
|
|
"margin_dpo/margin_std": 2.4336998462677,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9569160997732427,
|
|
"fcm_dpo/beta": 0.46875280141830444,
|
|
"fcm_dpo/delta": -0.08242163062095642,
|
|
"fcm_dpo/margin": 1.9704583883285522,
|
|
"fcm_dpo/q_t": 0.34461504220962524,
|
|
"grad_norm": 64.5451889038086,
|
|
"learning_rate": 2.9348189350335007e-09,
|
|
"logits/chosen": 1.4927654266357422,
|
|
"logits/rejected": 1.4023582935333252,
|
|
"logps/chosen": -63.2589111328125,
|
|
"logps/ref_chosen": -68.8402099609375,
|
|
"logps/ref_rejected": -84.64610290527344,
|
|
"logps/rejected": -81.0352554321289,
|
|
"loss": 1.0005,
|
|
"margin_dpo/margin_mean": 1.9704585075378418,
|
|
"margin_dpo/margin_std": 2.9617648124694824,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9584278155706727,
|
|
"fcm_dpo/beta": 0.49346354603767395,
|
|
"fcm_dpo/delta": 0.4376094341278076,
|
|
"fcm_dpo/margin": 0.873611569404602,
|
|
"fcm_dpo/q_t": 0.43060654401779175,
|
|
"grad_norm": 88.31072998046875,
|
|
"learning_rate": 2.736270983384276e-09,
|
|
"logits/chosen": 1.7620604038238525,
|
|
"logits/rejected": 1.7831530570983887,
|
|
"logps/chosen": -72.12706756591797,
|
|
"logps/ref_chosen": -77.0589599609375,
|
|
"logps/ref_rejected": -74.37579345703125,
|
|
"logps/rejected": -70.31752014160156,
|
|
"loss": 1.2866,
|
|
"margin_dpo/margin_mean": 0.8736119270324707,
|
|
"margin_dpo/margin_std": 2.4669814109802246,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.523192822933197,
|
|
"fcm_dpo/delta": 0.22281187772750854,
|
|
"fcm_dpo/margin": 1.2240848541259766,
|
|
"fcm_dpo/q_t": 0.3974232077598572,
|
|
"grad_norm": 89.01469421386719,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 1.5564526319503784,
|
|
"logits/rejected": 1.446262001991272,
|
|
"logps/chosen": -81.07947540283203,
|
|
"logps/ref_chosen": -85.60243225097656,
|
|
"logps/ref_rejected": -104.29497528076172,
|
|
"logps/rejected": -100.99610900878906,
|
|
"loss": 1.1869,
|
|
"margin_dpo/margin_mean": 1.2240850925445557,
|
|
"margin_dpo/margin_std": 2.678163766860962,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9614512471655329,
|
|
"fcm_dpo/beta": 0.5146247148513794,
|
|
"fcm_dpo/delta": -0.1677190065383911,
|
|
"fcm_dpo/margin": 1.945070743560791,
|
|
"fcm_dpo/q_t": 0.31559455394744873,
|
|
"grad_norm": 67.87616729736328,
|
|
"learning_rate": 2.359929934524829e-09,
|
|
"logits/chosen": 1.7444641590118408,
|
|
"logits/rejected": 1.53926682472229,
|
|
"logps/chosen": -63.456119537353516,
|
|
"logps/ref_chosen": -68.72154235839844,
|
|
"logps/ref_rejected": -97.44863891601562,
|
|
"logps/rejected": -94.12828063964844,
|
|
"loss": 0.8636,
|
|
"margin_dpo/margin_mean": 1.9450714588165283,
|
|
"margin_dpo/margin_std": 2.2955727577209473,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9629629629629629,
|
|
"fcm_dpo/beta": 0.5062402486801147,
|
|
"fcm_dpo/delta": -0.03895487263798714,
|
|
"fcm_dpo/margin": 1.7489988803863525,
|
|
"fcm_dpo/q_t": 0.3463430404663086,
|
|
"grad_norm": 75.6401596069336,
|
|
"learning_rate": 2.1821473643827137e-09,
|
|
"logits/chosen": 1.8284952640533447,
|
|
"logits/rejected": 1.7029112577438354,
|
|
"logps/chosen": -87.67061614990234,
|
|
"logps/ref_chosen": -92.38919067382812,
|
|
"logps/ref_rejected": -103.70460510253906,
|
|
"logps/rejected": -100.73503875732422,
|
|
"loss": 0.9843,
|
|
"margin_dpo/margin_mean": 1.748998761177063,
|
|
"margin_dpo/margin_std": 2.5600695610046387,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9644746787603931,
|
|
"fcm_dpo/beta": 0.49277734756469727,
|
|
"fcm_dpo/delta": -0.14965899288654327,
|
|
"fcm_dpo/margin": 1.9968385696411133,
|
|
"fcm_dpo/q_t": 0.31863152980804443,
|
|
"grad_norm": 71.01853942871094,
|
|
"learning_rate": 2.0112967923011646e-09,
|
|
"logits/chosen": 1.5376474857330322,
|
|
"logits/rejected": 1.4166994094848633,
|
|
"logps/chosen": -78.29296875,
|
|
"logps/ref_chosen": -83.36921691894531,
|
|
"logps/ref_rejected": -103.04508209228516,
|
|
"logps/rejected": -99.96566772460938,
|
|
"loss": 0.9037,
|
|
"margin_dpo/margin_mean": 1.9968383312225342,
|
|
"margin_dpo/margin_std": 2.4473977088928223,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9659863945578231,
|
|
"fcm_dpo/beta": 0.5017915964126587,
|
|
"fcm_dpo/delta": 0.15579509735107422,
|
|
"fcm_dpo/margin": 1.405687928199768,
|
|
"fcm_dpo/q_t": 0.3671724200248718,
|
|
"grad_norm": 75.0201644897461,
|
|
"learning_rate": 1.847382997337943e-09,
|
|
"logits/chosen": 1.6120787858963013,
|
|
"logits/rejected": 1.4171923398971558,
|
|
"logps/chosen": -65.34456634521484,
|
|
"logps/ref_chosen": -70.45247650146484,
|
|
"logps/ref_rejected": -93.77748107910156,
|
|
"logps/rejected": -90.07525634765625,
|
|
"loss": 1.0572,
|
|
"margin_dpo/margin_mean": 1.4056884050369263,
|
|
"margin_dpo/margin_std": 2.241938591003418,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.5281813144683838,
|
|
"fcm_dpo/delta": 0.26455286145210266,
|
|
"fcm_dpo/margin": 1.137129306793213,
|
|
"fcm_dpo/q_t": 0.3849751949310303,
|
|
"grad_norm": 80.01968383789062,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 1.9318921566009521,
|
|
"logits/rejected": 1.7825345993041992,
|
|
"logps/chosen": -63.573158264160156,
|
|
"logps/ref_chosen": -68.51570129394531,
|
|
"logps/ref_rejected": -92.35081481933594,
|
|
"logps/rejected": -88.54540252685547,
|
|
"loss": 1.221,
|
|
"margin_dpo/margin_mean": 1.1371290683746338,
|
|
"margin_dpo/margin_std": 2.42228364944458,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9690098261526833,
|
|
"fcm_dpo/beta": 0.5420957803726196,
|
|
"fcm_dpo/delta": 0.11781658977270126,
|
|
"fcm_dpo/margin": 1.3670419454574585,
|
|
"fcm_dpo/q_t": 0.3666565716266632,
|
|
"grad_norm": 112.83721160888672,
|
|
"learning_rate": 1.5403838846864692e-09,
|
|
"logits/chosen": 1.751739501953125,
|
|
"logits/rejected": 1.7309410572052002,
|
|
"logps/chosen": -87.5758056640625,
|
|
"logps/ref_chosen": -92.35102844238281,
|
|
"logps/ref_rejected": -102.4269790649414,
|
|
"logps/rejected": -99.018798828125,
|
|
"loss": 1.0715,
|
|
"margin_dpo/margin_mean": 1.3670417070388794,
|
|
"margin_dpo/margin_std": 2.2877607345581055,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9705215419501134,
|
|
"fcm_dpo/beta": 0.5414741039276123,
|
|
"fcm_dpo/delta": 0.020895883440971375,
|
|
"fcm_dpo/margin": 1.5288487672805786,
|
|
"fcm_dpo/q_t": 0.3546501100063324,
|
|
"grad_norm": 91.0616683959961,
|
|
"learning_rate": 1.3973071544233218e-09,
|
|
"logits/chosen": 1.7637138366699219,
|
|
"logits/rejected": 1.767164945602417,
|
|
"logps/chosen": -83.28409576416016,
|
|
"logps/ref_chosen": -88.39617919921875,
|
|
"logps/ref_rejected": -88.73035430908203,
|
|
"logps/rejected": -85.14712524414062,
|
|
"loss": 1.0806,
|
|
"margin_dpo/margin_mean": 1.5288479328155518,
|
|
"margin_dpo/margin_std": 2.504338264465332,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9720332577475435,
|
|
"fcm_dpo/beta": 0.5468122363090515,
|
|
"fcm_dpo/delta": 0.0597110316157341,
|
|
"fcm_dpo/margin": 1.4491500854492188,
|
|
"fcm_dpo/q_t": 0.3618500232696533,
|
|
"grad_norm": 105.30182647705078,
|
|
"learning_rate": 1.261184375888541e-09,
|
|
"logits/chosen": 1.5826127529144287,
|
|
"logits/rejected": 1.3515585660934448,
|
|
"logps/chosen": -79.6438217163086,
|
|
"logps/ref_chosen": -84.83087158203125,
|
|
"logps/ref_rejected": -105.31499481201172,
|
|
"logps/rejected": -101.57708740234375,
|
|
"loss": 1.1106,
|
|
"margin_dpo/margin_mean": 1.4491502046585083,
|
|
"margin_dpo/margin_std": 2.5398941040039062,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9735449735449735,
|
|
"fcm_dpo/beta": 0.5619449615478516,
|
|
"fcm_dpo/delta": 0.0273895226418972,
|
|
"fcm_dpo/margin": 1.465846300125122,
|
|
"fcm_dpo/q_t": 0.351859986782074,
|
|
"grad_norm": 76.38080596923828,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 1.936923861503601,
|
|
"logits/rejected": 1.8447864055633545,
|
|
"logps/chosen": -60.06108856201172,
|
|
"logps/ref_chosen": -65.11122131347656,
|
|
"logps/ref_rejected": -80.4027328491211,
|
|
"logps/rejected": -76.81845092773438,
|
|
"loss": 1.0602,
|
|
"margin_dpo/margin_mean": 1.4658467769622803,
|
|
"margin_dpo/margin_std": 2.313875675201416,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.5310744047164917,
|
|
"fcm_dpo/delta": -0.26899102330207825,
|
|
"fcm_dpo/margin": 2.0425100326538086,
|
|
"fcm_dpo/q_t": 0.3078554570674896,
|
|
"grad_norm": 77.42607879638672,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 1.8113110065460205,
|
|
"logits/rejected": 1.7860450744628906,
|
|
"logps/chosen": -71.65998840332031,
|
|
"logps/ref_chosen": -76.93634033203125,
|
|
"logps/ref_rejected": -89.14311981201172,
|
|
"logps/rejected": -85.9092788696289,
|
|
"loss": 0.8496,
|
|
"margin_dpo/margin_mean": 2.0425095558166504,
|
|
"margin_dpo/margin_std": 2.3543620109558105,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9765684051398337,
|
|
"fcm_dpo/beta": 0.526309609413147,
|
|
"fcm_dpo/delta": -0.043940138071775436,
|
|
"fcm_dpo/margin": 1.6911022663116455,
|
|
"fcm_dpo/q_t": 0.33978480100631714,
|
|
"grad_norm": 81.16790008544922,
|
|
"learning_rate": 8.945768539031783e-10,
|
|
"logits/chosen": 1.8826818466186523,
|
|
"logits/rejected": 1.8223220109939575,
|
|
"logps/chosen": -73.10558319091797,
|
|
"logps/ref_chosen": -77.69122314453125,
|
|
"logps/ref_rejected": -98.14374542236328,
|
|
"logps/rejected": -95.24920654296875,
|
|
"loss": 1.0431,
|
|
"margin_dpo/margin_mean": 1.691102147102356,
|
|
"margin_dpo/margin_std": 2.6934101581573486,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9780801209372638,
|
|
"fcm_dpo/beta": 0.5049861669540405,
|
|
"fcm_dpo/delta": -0.2970149517059326,
|
|
"fcm_dpo/margin": 2.2074761390686035,
|
|
"fcm_dpo/q_t": 0.30400532484054565,
|
|
"grad_norm": 72.28752899169922,
|
|
"learning_rate": 7.863060120144316e-10,
|
|
"logits/chosen": 1.770219326019287,
|
|
"logits/rejected": 1.6623930931091309,
|
|
"logps/chosen": -78.85108947753906,
|
|
"logps/ref_chosen": -83.79997253417969,
|
|
"logps/ref_rejected": -116.81965637207031,
|
|
"logps/rejected": -114.0782470703125,
|
|
"loss": 0.8414,
|
|
"margin_dpo/margin_mean": 2.2074756622314453,
|
|
"margin_dpo/margin_std": 2.673130989074707,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9795918367346939,
|
|
"fcm_dpo/beta": 0.4951819181442261,
|
|
"fcm_dpo/delta": 0.001391381025314331,
|
|
"fcm_dpo/margin": 1.7139298915863037,
|
|
"fcm_dpo/q_t": 0.3343745470046997,
|
|
"grad_norm": 75.23444366455078,
|
|
"learning_rate": 6.850062128694045e-10,
|
|
"logits/chosen": 1.7372221946716309,
|
|
"logits/rejected": 1.5879833698272705,
|
|
"logps/chosen": -81.30325317382812,
|
|
"logps/ref_chosen": -85.9629898071289,
|
|
"logps/ref_rejected": -101.36552429199219,
|
|
"logps/rejected": -98.41972351074219,
|
|
"loss": 0.9327,
|
|
"margin_dpo/margin_mean": 1.713930606842041,
|
|
"margin_dpo/margin_std": 2.1953155994415283,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.981103552532124,
|
|
"fcm_dpo/beta": 0.4961739182472229,
|
|
"fcm_dpo/delta": 0.011451632715761662,
|
|
"fcm_dpo/margin": 1.6918838024139404,
|
|
"fcm_dpo/q_t": 0.34832143783569336,
|
|
"grad_norm": 75.87056732177734,
|
|
"learning_rate": 5.906802900412788e-10,
|
|
"logits/chosen": 1.8142719268798828,
|
|
"logits/rejected": 1.7031172513961792,
|
|
"logps/chosen": -63.719032287597656,
|
|
"logps/ref_chosen": -68.64892578125,
|
|
"logps/ref_rejected": -89.84898376464844,
|
|
"logps/rejected": -86.61097717285156,
|
|
"loss": 1.0822,
|
|
"margin_dpo/margin_mean": 1.6918833255767822,
|
|
"margin_dpo/margin_std": 2.7808849811553955,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.49525290727615356,
|
|
"fcm_dpo/delta": -0.05411721393465996,
|
|
"fcm_dpo/margin": 1.8146793842315674,
|
|
"fcm_dpo/q_t": 0.32556748390197754,
|
|
"grad_norm": 70.36974334716797,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 2.1288902759552,
|
|
"logits/rejected": 1.9611902236938477,
|
|
"logps/chosen": -67.82957458496094,
|
|
"logps/ref_chosen": -72.97265625,
|
|
"logps/ref_rejected": -93.04617309570312,
|
|
"logps/rejected": -89.7177734375,
|
|
"loss": 0.9211,
|
|
"margin_dpo/margin_mean": 1.8146789073944092,
|
|
"margin_dpo/margin_std": 2.226635694503784,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"fcm_dpo/beta": 0.5010315179824829,
|
|
"fcm_dpo/delta": 0.09419667720794678,
|
|
"fcm_dpo/margin": 1.5195380449295044,
|
|
"fcm_dpo/q_t": 0.3595953583717346,
|
|
"grad_norm": 71.67884826660156,
|
|
"learning_rate": 4.2296043218295606e-10,
|
|
"logits/chosen": 1.8622299432754517,
|
|
"logits/rejected": 1.676352858543396,
|
|
"logps/chosen": -65.97269439697266,
|
|
"logps/ref_chosen": -71.05281066894531,
|
|
"logps/ref_rejected": -94.23469543457031,
|
|
"logps/rejected": -90.67411804199219,
|
|
"loss": 1.0445,
|
|
"margin_dpo/margin_mean": 1.5195378065109253,
|
|
"margin_dpo/margin_std": 2.327319622039795,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9856386999244142,
|
|
"fcm_dpo/beta": 0.5064749121665955,
|
|
"fcm_dpo/delta": 0.09547622501850128,
|
|
"fcm_dpo/margin": 1.5042533874511719,
|
|
"fcm_dpo/q_t": 0.3629649877548218,
|
|
"grad_norm": 83.33802795410156,
|
|
"learning_rate": 3.4957118863768176e-10,
|
|
"logits/chosen": 1.598526954650879,
|
|
"logits/rejected": 1.5740216970443726,
|
|
"logps/chosen": -75.08648681640625,
|
|
"logps/ref_chosen": -80.06941223144531,
|
|
"logps/ref_rejected": -99.22327423095703,
|
|
"logps/rejected": -95.74461364746094,
|
|
"loss": 1.1239,
|
|
"margin_dpo/margin_mean": 1.5042537450790405,
|
|
"margin_dpo/margin_std": 2.696470260620117,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9871504157218443,
|
|
"fcm_dpo/beta": 0.5104721784591675,
|
|
"fcm_dpo/delta": 0.024954237043857574,
|
|
"fcm_dpo/margin": 1.6202447414398193,
|
|
"fcm_dpo/q_t": 0.3395509421825409,
|
|
"grad_norm": 75.06707763671875,
|
|
"learning_rate": 2.831652042480093e-10,
|
|
"logits/chosen": 1.9889471530914307,
|
|
"logits/rejected": 1.885004997253418,
|
|
"logps/chosen": -75.12460327148438,
|
|
"logps/ref_chosen": -80.35701751708984,
|
|
"logps/ref_rejected": -92.1295394897461,
|
|
"logps/rejected": -88.51736450195312,
|
|
"loss": 0.9896,
|
|
"margin_dpo/margin_mean": 1.6202449798583984,
|
|
"margin_dpo/margin_std": 2.2250888347625732,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.9886621315192744,
|
|
"fcm_dpo/beta": 0.5066887140274048,
|
|
"fcm_dpo/delta": 0.04392646253108978,
|
|
"fcm_dpo/margin": 1.5880036354064941,
|
|
"fcm_dpo/q_t": 0.362444132566452,
|
|
"grad_norm": 82.49794006347656,
|
|
"learning_rate": 2.2374433653205016e-10,
|
|
"logits/chosen": 1.6642907857894897,
|
|
"logits/rejected": 1.4560799598693848,
|
|
"logps/chosen": -73.12020874023438,
|
|
"logps/ref_chosen": -78.06475830078125,
|
|
"logps/ref_rejected": -106.05763244628906,
|
|
"logps/rejected": -102.70108032226562,
|
|
"loss": 1.0555,
|
|
"margin_dpo/margin_mean": 1.588003158569336,
|
|
"margin_dpo/margin_std": 2.5010390281677246,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.5086958408355713,
|
|
"fcm_dpo/delta": -0.013962805271148682,
|
|
"fcm_dpo/margin": 1.6889777183532715,
|
|
"fcm_dpo/q_t": 0.3477616310119629,
|
|
"grad_norm": 68.81745147705078,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 1.5516088008880615,
|
|
"logits/rejected": 1.3263245820999146,
|
|
"logps/chosen": -61.7364501953125,
|
|
"logps/ref_chosen": -67.03407287597656,
|
|
"logps/ref_rejected": -97.57197570800781,
|
|
"logps/rejected": -93.96332550048828,
|
|
"loss": 0.9777,
|
|
"margin_dpo/margin_mean": 1.6889780759811401,
|
|
"margin_dpo/margin_std": 2.4260029792785645,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9916855631141346,
|
|
"fcm_dpo/beta": 0.507418155670166,
|
|
"fcm_dpo/delta": -0.06031068414449692,
|
|
"fcm_dpo/margin": 1.7811647653579712,
|
|
"fcm_dpo/q_t": 0.3425629734992981,
|
|
"grad_norm": 117.67913055419922,
|
|
"learning_rate": 1.2586440420372934e-10,
|
|
"logits/chosen": 1.5796771049499512,
|
|
"logits/rejected": 1.482344388961792,
|
|
"logps/chosen": -84.55900573730469,
|
|
"logps/ref_chosen": -89.31463623046875,
|
|
"logps/ref_rejected": -105.14315795898438,
|
|
"logps/rejected": -102.168701171875,
|
|
"loss": 1.0279,
|
|
"margin_dpo/margin_mean": 1.7811646461486816,
|
|
"margin_dpo/margin_std": 2.7895307540893555,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9931972789115646,
|
|
"fcm_dpo/beta": 0.4933139383792877,
|
|
"fcm_dpo/delta": -0.18509797751903534,
|
|
"fcm_dpo/margin": 2.0582315921783447,
|
|
"fcm_dpo/q_t": 0.3128923177719116,
|
|
"grad_norm": 69.68338012695312,
|
|
"learning_rate": 8.740807750345913e-11,
|
|
"logits/chosen": 1.9346446990966797,
|
|
"logits/rejected": 1.777329921722412,
|
|
"logps/chosen": -59.56598663330078,
|
|
"logps/ref_chosen": -64.89747619628906,
|
|
"logps/ref_rejected": -94.21998596191406,
|
|
"logps/rejected": -90.94673156738281,
|
|
"loss": 0.8776,
|
|
"margin_dpo/margin_mean": 2.058232545852661,
|
|
"margin_dpo/margin_std": 2.4367268085479736,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9947089947089947,
|
|
"fcm_dpo/beta": 0.5024198293685913,
|
|
"fcm_dpo/delta": 0.06533484160900116,
|
|
"fcm_dpo/margin": 1.5623362064361572,
|
|
"fcm_dpo/q_t": 0.36561936140060425,
|
|
"grad_norm": 81.33644104003906,
|
|
"learning_rate": 5.594234322453539e-11,
|
|
"logits/chosen": 1.726067304611206,
|
|
"logits/rejected": 1.6502690315246582,
|
|
"logps/chosen": -76.22871398925781,
|
|
"logps/ref_chosen": -81.16606140136719,
|
|
"logps/ref_rejected": -97.72825622558594,
|
|
"logps/rejected": -94.35324096679688,
|
|
"loss": 1.1139,
|
|
"margin_dpo/margin_mean": 1.5623366832733154,
|
|
"margin_dpo/margin_std": 2.743770122528076,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9962207105064248,
|
|
"fcm_dpo/beta": 0.5083540678024292,
|
|
"fcm_dpo/delta": 0.24330151081085205,
|
|
"fcm_dpo/margin": 1.2222051620483398,
|
|
"fcm_dpo/q_t": 0.3878973424434662,
|
|
"grad_norm": 78.94991302490234,
|
|
"learning_rate": 3.146808153123293e-11,
|
|
"logits/chosen": 1.8459105491638184,
|
|
"logits/rejected": 1.6787996292114258,
|
|
"logps/chosen": -69.77076721191406,
|
|
"logps/ref_chosen": -74.42193603515625,
|
|
"logps/ref_rejected": -87.81561279296875,
|
|
"logps/rejected": -84.38665008544922,
|
|
"loss": 1.2148,
|
|
"margin_dpo/margin_mean": 1.222205400466919,
|
|
"margin_dpo/margin_std": 2.527038097381592,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.5075035095214844,
|
|
"fcm_dpo/delta": -0.19134506583213806,
|
|
"fcm_dpo/margin": 2.0135555267333984,
|
|
"fcm_dpo/q_t": 0.3037455677986145,
|
|
"grad_norm": 70.64783477783203,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 1.8135673999786377,
|
|
"logits/rejected": 1.689598560333252,
|
|
"logps/chosen": -66.68452453613281,
|
|
"logps/ref_chosen": -71.68511962890625,
|
|
"logps/ref_rejected": -98.01472473144531,
|
|
"logps/rejected": -95.0276870727539,
|
|
"loss": 0.8403,
|
|
"margin_dpo/margin_mean": 2.0135550498962402,
|
|
"margin_dpo/margin_std": 2.1681275367736816,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"fcm_dpo/beta": 0.5117430686950684,
|
|
"fcm_dpo/delta": 0.07624995708465576,
|
|
"fcm_dpo/margin": 1.5202569961547852,
|
|
"fcm_dpo/q_t": 0.3583983778953552,
|
|
"grad_norm": 78.20012664794922,
|
|
"learning_rate": 3.4965187065971735e-12,
|
|
"logits/chosen": 1.612052083015442,
|
|
"logits/rejected": 1.4620475769042969,
|
|
"logps/chosen": -73.8533935546875,
|
|
"logps/ref_chosen": -78.35111999511719,
|
|
"logps/ref_rejected": -99.47113037109375,
|
|
"logps/rejected": -96.49366760253906,
|
|
"loss": 1.103,
|
|
"margin_dpo/margin_mean": 1.5202577114105225,
|
|
"margin_dpo/margin_std": 2.647984027862549,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.1363916052632181,
|
|
"train_runtime": 2119.1053,
|
|
"train_samples_per_second": 19.978,
|
|
"train_steps_per_second": 0.312
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|